From 954092869f4c4224c999193ea1a37c2817063d97 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 1 Apr 2015 12:06:24 -0400 Subject: update (passing) reference shaders w/ @const for passing shaders, where reference shader did not contain @const, copy the output shader over. $ir3compiler $args $f >& $out ./ir3test $ref $out res=$? if [ $res = 0 ]; then pass=$((pass + 1)) + grep @const $out > /dev/null + outconst=$? + grep @const $ref > /dev/null + refconst=$? + if [ $outconst = 0 ] && [ $refconst != 0 ]; then + cp $out $ref + fi else --- reference/0ad-alpine-valley/0ad-100.asm | 282 +- reference/0ad-alpine-valley/0ad-101.asm | 154 +- reference/0ad-alpine-valley/0ad-102.asm | 333 +- reference/0ad-alpine-valley/0ad-103.asm | 282 +- reference/0ad-alpine-valley/0ad-104.asm | 333 +- reference/0ad-alpine-valley/0ad-105.asm | 83 +- reference/0ad-alpine-valley/0ad-106.asm | 57 +- reference/0ad-alpine-valley/0ad-107.asm | 83 +- reference/0ad-alpine-valley/0ad-115.asm | 48 +- reference/0ad-alpine-valley/0ad-118.asm | 48 +- reference/0ad-alpine-valley/0ad-121.asm | 48 +- reference/0ad-alpine-valley/0ad-124.asm | 48 +- reference/0ad-alpine-valley/0ad-127.asm | 48 +- reference/0ad-alpine-valley/0ad-130.asm | 48 +- reference/0ad-alpine-valley/0ad-133.asm | 48 +- reference/0ad-alpine-valley/0ad-136.asm | 19 +- reference/0ad-alpine-valley/0ad-139.asm | 19 +- reference/0ad-alpine-valley/0ad-142.asm | 31 +- reference/0ad-alpine-valley/0ad-145.asm | 19 +- reference/0ad-alpine-valley/0ad-148.asm | 29 +- reference/0ad-alpine-valley/0ad-151.asm | 21 +- reference/0ad-alpine-valley/0ad-34.asm | 31 +- reference/0ad-alpine-valley/0ad-36.asm | 231 +- reference/0ad-alpine-valley/0ad-37.asm | 31 +- reference/0ad-alpine-valley/0ad-38.asm | 231 +- reference/0ad-alpine-valley/0ad-40.asm | 71 +- reference/0ad-alpine-valley/0ad-46.asm | 276 +- reference/0ad-alpine-valley/0ad-49.asm | 401 +-- reference/0ad-alpine-valley/0ad-51.asm | 256 +- reference/0ad-alpine-valley/0ad-52.asm | 825 ++--- reference/0ad-alpine-valley/0ad-53.asm | 256 +- reference/0ad-alpine-valley/0ad-54.asm | 262 +- reference/0ad-alpine-valley/0ad-55.asm | 813 ++--- reference/0ad-alpine-valley/0ad-56.asm | 262 +- reference/0ad-alpine-valley/0ad-58.asm | 431 +-- reference/0ad-alpine-valley/0ad-61.asm | 296 +- reference/0ad-alpine-valley/0ad-63.asm | 248 +- reference/0ad-alpine-valley/0ad-64.asm | 490 ++- reference/0ad-alpine-valley/0ad-65.asm | 248 +- reference/0ad-alpine-valley/0ad-66.asm | 124 +- reference/0ad-alpine-valley/0ad-67.asm | 322 +- reference/0ad-alpine-valley/0ad-68.asm | 124 +- reference/0ad-alpine-valley/0ad-69.asm | 124 +- reference/0ad-alpine-valley/0ad-70.asm | 278 +- reference/0ad-alpine-valley/0ad-71.asm | 124 +- reference/0ad-alpine-valley/0ad-72.asm | 124 +- reference/0ad-alpine-valley/0ad-73.asm | 322 +- reference/0ad-alpine-valley/0ad-74.asm | 124 +- reference/0ad-alpine-valley/0ad-75.asm | 222 +- reference/0ad-alpine-valley/0ad-76.asm | 326 +- reference/0ad-alpine-valley/0ad-77.asm | 222 +- reference/0ad-alpine-valley/0ad-78.asm | 222 +- reference/0ad-alpine-valley/0ad-79.asm | 384 +-- reference/0ad-alpine-valley/0ad-80.asm | 222 +- reference/0ad-alpine-valley/0ad-81.asm | 222 +- reference/0ad-alpine-valley/0ad-82.asm | 356 +- reference/0ad-alpine-valley/0ad-83.asm | 222 +- reference/0ad-alpine-valley/0ad-84.asm | 154 +- reference/0ad-alpine-valley/0ad-85.asm | 322 +- reference/0ad-alpine-valley/0ad-86.asm | 154 +- reference/0ad-alpine-valley/0ad-87.asm | 154 +- reference/0ad-alpine-valley/0ad-88.asm | 278 +- reference/0ad-alpine-valley/0ad-89.asm | 154 +- reference/0ad-alpine-valley/0ad-90.asm | 154 +- reference/0ad-alpine-valley/0ad-91.asm | 322 +- reference/0ad-alpine-valley/0ad-92.asm | 154 +- reference/0ad-alpine-valley/0ad-93.asm | 154 +- reference/0ad-alpine-valley/0ad-94.asm | 282 +- reference/0ad-alpine-valley/0ad-95.asm | 154 +- reference/0ad-alpine-valley/0ad-96.asm | 333 +- reference/0ad-alpine-valley/0ad-97.asm | 282 +- reference/0ad-alpine-valley/0ad-98.asm | 333 +- reference/0ad-alpine-valley/0ad-99.asm | 154 +- .../0ad-cycladic-archipelago-00.asm | 61 +- .../0ad-cycladic-archipelago-01.asm | 46 +- .../0ad-cycladic-archipelago-02.asm | 41 +- .../0ad-cycladic-archipelago-101.asm | 63 +- .../0ad-cycladic-archipelago-103.asm | 63 +- .../0ad-cycladic-archipelago-105.asm | 63 +- .../0ad-cycladic-archipelago-107.asm | 63 +- .../0ad-cycladic-archipelago-111.asm | 31 +- .../0ad-cycladic-archipelago-114.asm | 31 +- .../0ad-cycladic-archipelago-116.asm | 43 +- .../0ad-cycladic-archipelago-118.asm | 31 +- .../0ad-cycladic-archipelago-120.asm | 31 +- .../0ad-cycladic-archipelago-122.asm | 23 +- .../0ad-cycladic-archipelago-126.asm | 49 +- .../0ad-cycladic-archipelago-127.asm | 79 +- .../0ad-cycladic-archipelago-128.asm | 122 +- .../0ad-cycladic-archipelago-130.asm | 90 +- .../0ad-cycladic-archipelago-131.asm | 50 +- .../0ad-cycladic-archipelago-133.asm | 43 +- .../0ad-cycladic-archipelago-135.asm | 63 +- .../0ad-cycladic-archipelago-139.asm | 369 +- .../0ad-cycladic-archipelago-140.asm | 259 +- .../0ad-cycladic-archipelago-21.asm | 43 +- .../0ad-cycladic-archipelago-22.asm | 250 +- .../0ad-cycladic-archipelago-23.asm | 43 +- .../0ad-cycladic-archipelago-24.asm | 250 +- .../0ad-cycladic-archipelago-25.asm | 43 +- .../0ad-cycladic-archipelago-27.asm | 43 +- .../0ad-cycladic-archipelago-28.asm | 250 +- .../0ad-cycladic-archipelago-29.asm | 37 +- .../0ad-cycladic-archipelago-30.asm | 285 +- .../0ad-cycladic-archipelago-32.asm | 838 ++--- .../0ad-cycladic-archipelago-33.asm | 333 +- .../0ad-cycladic-archipelago-34.asm | 493 ++- .../0ad-cycladic-archipelago-35.asm | 335 +- .../0ad-cycladic-archipelago-36.asm | 826 ++--- .../0ad-cycladic-archipelago-37.asm | 339 +- .../0ad-cycladic-archipelago-38.asm | 311 +- .../0ad-cycladic-archipelago-40.asm | 501 ++- .../0ad-cycladic-archipelago-41.asm | 353 +- .../0ad-cycladic-archipelago-42.asm | 503 ++- .../0ad-cycladic-archipelago-43.asm | 323 +- .../0ad-cycladic-archipelago-44.asm | 307 +- .../0ad-cycladic-archipelago-46.asm | 335 +- .../0ad-cycladic-archipelago-47.asm | 175 +- .../0ad-cycladic-archipelago-48.asm | 335 +- .../0ad-cycladic-archipelago-49.asm | 175 +- .../0ad-cycladic-archipelago-50.asm | 335 +- .../0ad-cycladic-archipelago-51.asm | 207 +- .../0ad-cycladic-archipelago-52.asm | 369 +- .../0ad-cycladic-archipelago-53.asm | 285 +- .../0ad-cycladic-archipelago-54.asm | 339 +- .../0ad-cycladic-archipelago-55.asm | 285 +- .../0ad-cycladic-archipelago-56.asm | 335 +- .../0ad-cycladic-archipelago-57.asm | 207 +- .../0ad-cycladic-archipelago-58.asm | 397 +-- .../0ad-cycladic-archipelago-59.asm | 285 +- .../0ad-cycladic-archipelago-60.asm | 285 +- .../0ad-cycladic-archipelago-61.asm | 207 +- .../0ad-cycladic-archipelago-62.asm | 1041 +++--- .../0ad-cycladic-archipelago-63.asm | 566 ++-- .../0ad-cycladic-archipelago-64.asm | 1437 ++++---- .../0ad-cycladic-archipelago-65.asm | 566 ++-- .../0ad-cycladic-archipelago-66.asm | 303 +- .../0ad-cycladic-archipelago-67.asm | 394 +-- .../0ad-cycladic-archipelago-68.asm | 303 +- .../0ad-cycladic-archipelago-69.asm | 207 +- .../0ad-cycladic-archipelago-70.asm | 1041 +++--- .../0ad-cycladic-archipelago-71.asm | 566 ++-- .../0ad-cycladic-archipelago-72.asm | 1437 ++++---- .../0ad-cycladic-archipelago-73.asm | 566 ++-- .../0ad-cycladic-archipelago-74.asm | 303 +- .../0ad-cycladic-archipelago-75.asm | 394 +-- .../0ad-cycladic-archipelago-76.asm | 303 +- .../0ad-cycladic-archipelago-77.asm | 207 +- .../0ad-cycladic-archipelago-81.asm | 83 +- .../0ad-cycladic-archipelago-85.asm | 1649 ++++----- .../0ad-cycladic-archipelago-86.asm | 274 +- .../0ad-cycladic-archipelago-87.asm | 69 +- .../0ad-cycladic-archipelago-88.asm | 122 +- .../0ad-cycladic-archipelago-95.asm | 43 +- .../0ad-cycladic-archipelago-96.asm | 250 +- .../0ad-cycladic-archipelago-97.asm | 63 +- .../0ad-cycladic-archipelago-99.asm | 63 +- reference/0ad-frag-1.asm | 282 +- reference/0ad-frag-2.asm | 278 +- reference/0ad-frag.asm | 163 +- reference/2color-after.asm | 23 +- reference/ChameleonMan-vert.asm | 1261 +++---- reference/builtin2.asm | 15 +- reference/bump/bump-12.asm | 258 +- reference/bump/bump-13.asm | 191 +- reference/bump1.asm | 191 +- reference/bump2.asm | 258 +- reference/chrome/bad.asm | 155 +- reference/crazy-frag-conflict.asm | 47 +- reference/crazy-frag.asm | 47 +- reference/dd.asm | 409 +-- reference/es2gears-vert.asm | 112 +- reference/face.asm | 16 +- reference/ffox-otmc/ffox-otmc-03.asm | 1 + reference/ffox-otmc/ffox-otmc-04.asm | 1 + reference/ffox-otmc/ffox-otmc-05.asm | 1 + reference/ffox-otmc/ffox-otmc-06.asm | 1 + reference/ffox-otmc/ffox-otmc-08.asm | 1 + reference/ffox-otmc/ffox-otmc-13.asm | 1 + reference/ffox-otmc/ffox-otmc-14.asm | 1 + reference/ffox-otmc/ffox-otmc-15.asm | 1 + reference/ffox-otmc/ffox-otmc-16.asm | 1 + reference/ffox-otmc/ffox-otmc-17.asm | 1 + reference/ffox-otmc/ffox-otmc-18.asm | 1 + reference/ffox-otmc/ffox-otmc-19.asm | 1 + reference/ffox-otmc/ffox-otmc-24.asm | 1 + reference/ffox-otmc/ffox-otmc-26.asm | 2 + reference/ffox-otmc/ffox-otmc-27.asm | 1 + reference/ffox-otmc/ffox-otmc-28.asm | 2 + reference/ffox-otmc/ffox-otmc-29.asm | 1 + reference/ffox-otmc/ffox-otmc-30.asm | 41 +- reference/ffox-otmc/ffox-otmc-31.asm | 1 + reference/ffox-otmc/ffox-otmc-32.asm | 1 + reference/ffox-otmc/ffox-otmc-33.asm | 1 + reference/ffox-otmc/ffox-otmc-34.asm | 1 + reference/ffox-otmc/ffox-otmc-36.asm | 2 + reference/ffox-otmc/ffox-otmc-39.asm | 2 + reference/ffox-otmc/ffox-otmc-41.asm | 2 + reference/ffox-otmc/ffox-otmc-42.asm | 2 + reference/ffox-otmc/ffox-otmc-43.asm | 2 + reference/ffox-otmc/ffox-otmc-44.asm | 2 + reference/ffox-otmc/ffox-otmc-45.asm | 2 + reference/ffox-otmc/ffox-otmc-46.asm | 2 + reference/ffox-otmc/ffox-otmc-48.asm | 2 + reference/ffox-otmc/ffox-otmc-50.asm | 3 + reference/ffox-otmc/ffox-otmc-51.asm | 2 + reference/ffox-otmc/ffox-otmc-52.asm | 3 + reference/ffox-otmc/ffox-otmc-54.asm | 2 + reference/ffox-otmc/ffox-otmc-57.asm | 1 + reference/ffox-otmc/ffox-otmc-59.asm | 1 + reference/ffox-vert.asm | 127 +- reference/flow.asm | 315 +- reference/foo.asm | 19 +- reference/fragProg1/fragProg1-08.asm | 1 + reference/fragProg1/fragProg1-09.asm | 1 + reference/fragProg1/fragProg1-10.asm | 1 + reference/fragProg1/fragProg1-11.asm | 1 + reference/fragProg1/fragProg1-12.asm | 23 +- reference/fragProg1/fragProg1-13.asm | 1 + reference/fragProg1/fragProg1-14.asm | 1 + reference/fragProg1/fragProg1-15.asm | 1 + reference/fragProg1/fragProg1-16.asm | 13 +- reference/fragProg1/fragProg1-17.asm | 8 +- reference/fragProg1/fragProg1-18.asm | 15 +- reference/fragProg1/fragProg1-19.asm | 28 +- reference/fragProg1/fragProg1-20.asm | 2 + reference/fragProg1/fragProg1-21.asm | 2 + reference/fragProg1/fragProg1-22.asm | 2 + reference/fragProg1/fragProg1-23.asm | 2 + reference/fragProg1/fragProg1-24.asm | 2 + reference/fragProg1/fragProg1-25.asm | 30 +- reference/fragProg1/fragProg1-26.asm | 30 +- reference/fragProg1/fragProg1-27.asm | 30 +- reference/fragProg1/fragProg1-28.asm | 2 + reference/fragProg1/fragProg1-35.asm | 12 +- reference/fragProg1/fragProg1-36.asm | 1 + reference/fragProg1/fragProg1-37.asm | 1 + reference/fragProg1/fragProg1-38.asm | 1 + reference/fragProg1/fragProg1-39.asm | 1 + reference/fragProg1/fragProg1-40.asm | 2 + reference/fragProg1/fragProg1-42.asm | 1 + reference/fragProg1/fragProg1-43.asm | 1 + reference/fragProg1/fragProg1-46.asm | 2 + reference/fragProg1/fragProg1-47.asm | 1 + reference/fragProg1/fragProg1-50.asm | 1 + reference/fragProg1/fragProg1-54.asm | 45 +- reference/fragProg1/fragProg1-55.asm | 1 + reference/fragProg1/fragProg1-56.asm | 35 +- reference/fragProg1/fragProg1-57.asm | 57 +- reference/fragProg1/fragProg1-58.asm | 35 +- reference/fragProg1/fragProg1-59.asm | 35 +- reference/fragProg1/fragProg1-60.asm | 49 +- reference/glmark1.asm | 22 +- reference/glmark2.asm | 111 +- reference/glmark3.asm | 492 ++- reference/glsl-fs-raytrace-bug27060.asm | 2206 +++++------- reference/gmaps-frag.asm | 100 +- reference/idiv-vert.asm | 154 +- reference/jellyfish-frag.asm | 593 ++-- reference/maniadrive/maniadrive-01.asm | 73 +- reference/maniadrive/maniadrive-02.asm | 45 +- reference/maniadrive/maniadrive-03.asm | 444 ++- reference/maniadrive/maniadrive-04.asm | 31 +- reference/maniadrive/maniadrive-06.asm | 37 +- reference/maniadrive/maniadrive-07.asm | 41 +- reference/maniadrive/maniadrive-08.asm | 85 +- reference/maniadrive/maniadrive-09.asm | 45 +- reference/maniadrive/maniadrive-10.asm | 360 +- reference/maniadrive/maniadrive-13.asm | 43 +- reference/maniadrive/maniadrive-14.asm | 344 +- reference/maniadrive/maniadrive-15.asm | 374 +-- reference/maniadrive/maniadrive-17.asm | 346 +- reference/maniadrive/maniadrive-18.asm | 422 ++- reference/maniadrive/maniadrive-19.asm | 35 +- reference/multi-kill.asm | 73 +- .../piglit-arb_framebuffer_srgb-blit-frag1.asm | 1 + .../piglit-arb_framebuffer_srgb-blit-frag2.asm | 1 + .../piglit-fs-uniform-array-mat2-index-rd.asm | 30 +- reference/piglit-glsl-fs-varying-array.asm | 3 - .../piglit-tex-miplevel-selection-1d-shadow.asm | 1 + reference/piglit-vs-temp-mat3-row-rd.asm | 57 +- .../piglit-vs-varying-array-mat2-index-rd.asm | 3 - .../piglit-vs-varying-array-mat4-index-rd.asm | 3 - reference/problem/0ad-frag.asm | 68 +- reference/problem/frag-conflict-1.asm | 17 +- reference/problem/frag-conflict-2.asm | 31 +- reference/relative-lowered.asm | 33 +- reference/relative-med.asm | 17 +- reference/relative-piglit-bad.asm | 82 +- .../relative-temp/fs-temp-mat3-col-row-wr.asm | 3 - reference/sad-frag.asm | 1 + reference/simple-frag.asm | 39 +- reference/simple-if-else.asm | 37 +- reference/simple-if.asm | 25 +- reference/simple-vert.asm | 8 +- reference/simple.asm | 25 +- reference/simpletest.asm | 17 +- reference/stk-mines/stk-mines-00.asm | 35 +- reference/stk-mines/stk-mines-01.asm | 81 +- reference/stk-mines/stk-mines-02.asm | 85 +- reference/stk-mines/stk-mines-03.asm | 43 +- reference/stk-mines/stk-mines-05.asm | 73 +- reference/stk-mines/stk-mines-06.asm | 73 +- reference/stk-mines/stk-mines-07.asm | 85 +- reference/stk-mines/stk-mines-08.asm | 235 +- reference/stk-mines/stk-mines-09.asm | 235 +- reference/stk-mines/stk-mines-10.asm | 57 +- reference/stk-mines/stk-mines-11.asm | 248 +- reference/stk-mines/stk-mines-12.asm | 37 +- reference/stk-mines/stk-mines-13.asm | 236 +- reference/stk-mines/stk-mines-14.asm | 276 +- reference/stk-mines/stk-mines-15.asm | 248 +- reference/stk-mines/stk-mines-16.asm | 79 +- reference/stk-mines/stk-mines-17.asm | 254 +- reference/stk-mines/stk-mines-18.asm | 312 +- reference/stk-mines/stk-mines-19.asm | 235 +- reference/stk-mines/stk-mines-20.asm | 248 +- reference/stk-mines/stk-mines-21.asm | 43 +- reference/stk-mines/stk-mines-22.asm | 412 +-- reference/stk-mines/stk-mines-23.asm | 307 +- reference/stk-mines/stk-mines-24.asm | 57 +- reference/stk-mines/stk-mines-25.asm | 43 +- reference/stk-mines/stk-mines-26.asm | 239 +- reference/stk-mines/stk-mines-27.asm | 412 +-- reference/stk-mines/stk-mines-28.asm | 307 +- reference/stk-mines/stk-mines-29.asm | 412 +-- reference/stk-mines/stk-mines-30.asm | 307 +- reference/stk-mines/stk-mines-31.asm | 412 +-- reference/stk-mines/stk-mines-32.asm | 307 +- reference/stk-mines/stk-mines-33.asm | 412 +-- reference/stk-mines/stk-mines-34.asm | 307 +- reference/stk-mines/stk-mines-35.asm | 235 +- reference/stk-mines/stk-mines-36.asm | 412 +-- reference/stk-mines/stk-mines-37.asm | 307 +- reference/stk-mines/stk-mines-38.asm | 43 +- reference/stk/stk0100.asm | 35 +- reference/stk/stk0101.asm | 81 +- reference/stk/stk0102.asm | 85 +- reference/stk/stk0200.asm | 43 +- reference/stk/stk0301.asm | 73 +- reference/stk/stk0302.asm | 73 +- reference/stk/stk0303.asm | 85 +- reference/stk/stk0304.asm | 235 +- reference/stk/stk0305.asm | 235 +- reference/stk/stk0306.asm | 255 +- reference/stk/stk0307.asm | 312 +- reference/stk/stk0400.asm | 43 +- reference/stk/stk0500.asm | 315 +- reference/stk/stk0501.asm | 303 +- reference/stk/stk0600.asm | 43 +- reference/stk/stk0601.asm | 239 +- reference/stk/stk0700.asm | 43 +- reference/test.asm | 28 +- reference/test0.asm | 43 +- reference/test1.asm | 235 +- reference/test2.asm | 120 +- reference/test3.asm | 36 +- reference/testN.asm | 91 +- reference/tex-clamp0.asm | 43 +- reference/tex-clamp1.asm | 37 +- reference/twoside-frag.asm | 90 +- reference/twoside-vert.asm | 109 +- reference/vs-op-neg-int.asm | 53 +- reference/webgl-blob-frag.asm | 31 +- reference/webgl-water/webgl-water-13.asm | 73 +- reference/webgl-water/webgl-water-14.asm | 35 +- reference/webgl-water/webgl-water-18.asm | 31 +- reference/webgl-water/webgl-water-20.asm | 27 +- reference/webgl-water/webgl-water-27.asm | 73 +- reference/webgl-water/webgl-water-33.asm | 73 +- reference/webgl-water/webgl-water-34.asm | 13 +- reference/webgl-water/webgl-water-36.asm | 70 +- reference/webgl-water/webgl-water-37.asm | 21 +- reference/webgl-water/webgl-water-38.asm | 484 +-- reference/webgl-water/webgl-water-39.asm | 63 +- reference/webgl-water/webgl-water-40.asm | 3525 +++++++------------- reference/xa-composite-fs.asm | 41 +- reference/xon1.asm | 319 +- reference/xon2.asm | 150 +- reference/xon3.asm | 343 +- reference/xon4.asm | 150 +- reference/xon5.asm | 201 +- reference/xon6.asm | 117 +- reference/xon7.asm | 327 +- reference/xon8.asm | 150 +- reference/xon9.asm | 124 +- reference/xonotic-gl2/xonotic-glx-gl2-01.asm | 73 +- reference/xonotic-gl2/xonotic-glx-gl2-02.asm | 25 +- reference/xonotic-gl2/xonotic-glx-gl2-04.asm | 25 +- reference/xonotic-gl2/xonotic-glx-gl2-06.asm | 25 +- reference/xonotic-gl2/xonotic-glx-gl2-10.asm | 1 + reference/xonotic-gl2/xonotic-glx-gl2-12.asm | 319 +- reference/xonotic-gl2/xonotic-glx-gl2-13.asm | 150 +- reference/xonotic-gl2/xonotic-glx-gl2-14.asm | 343 +- reference/xonotic-gl2/xonotic-glx-gl2-15.asm | 150 +- reference/xonotic-gl2/xonotic-glx-gl2-16.asm | 201 +- reference/xonotic-gl2/xonotic-glx-gl2-17.asm | 117 +- reference/xonotic-gl2/xonotic-glx-gl2-18.asm | 327 +- reference/xonotic-gl2/xonotic-glx-gl2-19.asm | 150 +- reference/xonotic-gl2/xonotic-glx-gl2-20.asm | 87 +- reference/xonotic-gl2/xonotic-glx-gl2-21.asm | 55 +- reference/xonotic-gl2/xonotic-glx-gl2-22.asm | 420 +-- reference/xonotic-gl2/xonotic-glx-gl2-23.asm | 204 +- reference/xonotic-gl2/xonotic-glx-gl2-24.asm | 170 +- reference/xonotic-gl2/xonotic-glx-gl2-25.asm | 146 +- reference/xonotic-gl2/xonotic-glx-gl2-26.asm | 382 +-- reference/xonotic-gl2/xonotic-glx-gl2-27.asm | 204 +- reference/xonotic-gl2/xonotic-glx-gl2-28.asm | 246 +- reference/xonotic-gl2/xonotic-glx-gl2-29.asm | 180 +- reference/xonotic-gl2/xonotic-glx-gl2-30.asm | 47 +- reference/xonotic-gl2/xonotic-glx-gl2-31.asm | 55 +- reference/xonotic-gl2/xonotic-glx-gl2-32.asm | 29 +- reference/xonotic-gl2/xonotic-glx-gl2-33.asm | 55 +- reference/xonotic-gl2/xonotic-glx-gl2-34.asm | 31 +- reference/xonotic-gl2/xonotic-glx-gl2-35.asm | 81 +- reference/xonotic-gl2/xonotic-glx-gl2-36.asm | 49 +- reference/xonotic-gl2/xonotic-glx-gl2-37.asm | 81 +- reference/xonotic-gl2/xonotic-glx-gl2-38.asm | 222 +- reference/xonotic-gl2/xonotic-glx-gl2-39.asm | 180 +- reference/xonotic-gl2/xonotic-glx-gl2-40.asm | 282 +- reference/xonotic-gl2/xonotic-glx-gl2-41.asm | 180 +- reference/xonotic/xonotic05.asm | 73 +- reference/xonotic/xonotic06.asm | 31 +- reference/xonotic/xonotic08.asm | 73 +- reference/xonotic/xonotic09.asm | 35 +- reference/xonotic/xonotic10.asm | 85 +- reference/xonotic/xonotic11.asm | 73 +- reference/xonotic/xonotic14.asm | 81 +- reference/xonotic/xonotic16.asm | 45 +- reference/xonotic/xonotic17.asm | 89 +- reference/xonotic/xonotic18.asm | 85 +- reference/xonotic/xonotic20.asm | 47 +- reference/xonotic/xonotic21.asm | 83 +- reference/xonotic/xonotic22.asm | 97 +- 434 files changed, 30858 insertions(+), 45588 deletions(-) diff --git a/reference/0ad-alpine-valley/0ad-100.asm b/reference/0ad-alpine-valley/0ad-100.asm index 12fbb01..be30c1a 100644 --- a/reference/0ad-alpine-valley/0ad-100.asm +++ b/reference/0ad-alpine-valley/0ad-100.asm @@ -8,203 +8,139 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.y, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.w, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r4.x, c9.x, r0.z -mov.f32f32 r3.z, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r4.x -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r4.x, r3.y, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.w, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.y, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r3.y -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.y, r3.z -mov.f32f32 r5.w, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r3.y -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.y, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.y, r3.y -bary.f r3.y, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.y, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.y, r2.w -mov.f32f32 r5.z, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r6.y, r3.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r7.w, r5.x, s#2, t#2 -(sy)mov.f32f32 r1.w, r7.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r7.w, r6.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r7.w -(ss)nop -sam.s (f32)(x)r5.x, r5.w, s#2, t#2 -(sy)mov.f32f32 r2.w, r5.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.y, r3.w -add.f r3.z, c12.y, (neg)r0.y -add.f r3.w, c12.y, (neg)r0.y -add.f r5.x, c12.y, (neg)r0.y -mul.f r5.y, r2.z, r3.y -mul.f r3.z, r3.z, c6.z -mul.f r3.w, r3.w, c6.y -mul.f r5.x, r5.x, c6.x -mul.f r1.w, r5.y, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.w, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.y, r1.z, r3.y -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.y, r2.w, r1.w -sam.s (f32)(x)r5.y, r7.y, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r5.y -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r5.y, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r0.z, r4.w -mov.f32f32 r5.z, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.z, r1.x -mul.f r0.w, r4.y, r0.w -mov.f32f32 r2.w, r0.z -mul.f r0.x, c10.w, r0.x -mul.f r0.z, r4.x, r2.z -sam (f32)(w)r1.y, r5.y, s#1, t#1 -nop -(sy)cmps.f.lt r1.y, r2.x, c11.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.x -mov.f32f32 r1.w, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r0.z, r0.x +add.f r0.y, r0.z, r1.z cov.u32f32 r0.z, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.z, r1.x -mad.f32 r0.w, c5.y, r4.y, r0.w -mov.f32f32 r0.x, r0.x +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y cmps.f.ne r0.z, r0.z, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r4.x, r0.x -mov.f32f32 r1.y, r1.z -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r0.z, r1.w, r0.z, r1.y -add.f r1.x, r1.x, r3.z -add.f r0.w, r0.w, r3.w -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r0.z +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z mul.f r0.w, r0.w, r0.z -add.f r0.x, r0.x, r5.x -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, c4.x end nop nop nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 196 instructions, 0 half, 8 full +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-101.asm b/reference/0ad-alpine-valley/0ad-101.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-101.asm +++ b/reference/0ad-alpine-valley/0ad-101.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-102.asm b/reference/0ad-alpine-valley/0ad-102.asm index 36d09e4..63ee7bc 100644 --- a/reference/0ad-alpine-valley/0ad-102.asm +++ b/reference/0ad-alpine-valley/0ad-102.asm @@ -6,8 +6,8 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,223 +24,164 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)floor.f r1.z, c14.z floor.f r1.w, c14.x absneg.f r2.x, (abs)c17.x absneg.f r2.y, (abs)c17.y add.f r1.z, c14.z, (neg)r1.z add.f r1.w, c14.x, (neg)r1.w -mul.f r2.z, c11.x, r0.w -add.f r2.x, r2.x, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r2.y, c12.x, r1.x, r2.z -mov.f32f32 r2.x, r2.x +mov.f32f32 r2.z, c18.y +mul.f r2.w, c11.x, r0.w max.f r1.z, r1.z, c18.y max.f r1.w, r1.w, c18.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, c16.x, r2.x +add.f r2.x, r2.x, r2.y +add.f r2.y, r2.z, c19.x min.f r1.z, r1.z, c22.y min.f r1.w, r1.w, c22.y -mul.f r2.w, c11.z, r0.x -mad.f32 r2.y, c13.x, r1.y, r2.y +mul.f r2.z, c16.x, r2.x +mul.f r3.x, c11.z, r0.x max.f r1.z, r1.z, c18.x max.f r1.w, r1.w, c18.x -mad.f32 r2.w, c12.z, r0.y, r2.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r2.w, c13.z, r0.z, r2.w -mul.f r3.x, c11.x, r0.x +mul.f r3.y, c11.x, r0.x +mad.f32 r3.x, c12.z, r0.y, r3.x mul.f r1.z, c16.x, r1.z -mad.f32 r3.x, c12.x, r0.y, r3.x -add.f r2.w, r2.w, c14.z -mad.f32 r3.x, c13.x, r0.z, r3.x -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c18.w, r2.z, r2.w -mul.f r2.y, r2.y, (neg)c4.x -mul.f r3.y, c11.y, r0.w +mad.f32 r3.y, c12.x, r0.y, r3.y +mad.f32 r3.x, c13.z, r0.z, r3.x +mad.f32 r3.y, c13.x, r0.z, r3.y mad.f32 r1.z, c18.z, r1.z, c14.x -add.f r3.x, r3.x, c14.x -mov.f32f32 r2.z, r2.z -mad.f32 r1.w, c16.x, r1.w, r3.x -mov.f32f32 r1.z, r1.z -mad.f32 r3.y, c12.y, r1.x, r3.y -mov.f32f32 r2.z, r2.z +floor.f r4.x, r2.y +mad.f32 r2.w, c12.x, r1.x, r2.w +add.f r3.y, r3.y, c14.x +add.f r1.z, r1.z, c19.x +mad.f32 r1.w, c16.x, r1.w, r3.y +add.f r3.x, r3.x, c14.z +add.f r2.y, r2.y, (neg)r4.x +floor.f r4.x, r1.z add.f r1.w, r1.w, c19.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y +mad.f32 r2.z, c18.w, r2.z, r3.x +mad.f32 r2.y, c19.y, r2.y, c19.z +add.f r1.z, r1.z, (neg)r4.x +floor.f r4.x, r1.w add.f r2.z, r2.z, c19.x -mad.f32 r3.y, c13.y, r1.y, r3.y -add.f r1.z, r1.z, c19.x -floor.f r3.z, r1.w -floor.f r3.w, r2.z -mov.f32f32 r3.y, r3.y -floor.f r4.z, r1.z -add.f r1.w, r1.w, (neg)r3.z -add.f r2.z, r2.z, (neg)r3.w -mad.f32 r2.y, (neg)c4.y, r3.y, r2.y -add.f r1.z, r1.z, (neg)r4.z -mad.f32 r1.w, c19.y, r1.w, c19.z -mad.f32 r2.z, c19.y, r2.z, c19.z -mov.f32f32 r2.y, r2.y +absneg.f r2.y, (abs)r2.y mad.f32 r1.z, c19.y, r1.z, c19.z -absneg.f r1.w, (abs)r1.w -absneg.f r2.z, (abs)r2.z -mul.f r0.w, c11.z, r0.w +add.f r4.x, r1.w, (neg)r4.x +floor.f r4.y, r2.z +mul.f r1.w, r2.y, r2.y absneg.f r1.z, (abs)r1.z -mul.f r3.y, c19.y, r1.w -mul.f r3.z, c19.y, r2.z -mul.f r1.w, r1.w, r1.w -mul.f r3.w, c19.y, r1.z -add.f r3.y, c19.w, (neg)r3.y -add.f r3.z, c19.w, (neg)r3.z -mul.f r2.z, r2.z, r2.z -add.f r3.w, c19.w, (neg)r3.w +mad.f32 r2.y, c19.y, r4.x, c19.z +add.f r2.z, r2.z, (neg)r4.y +mad.f32 r2.w, c13.x, r1.y, r2.w +mul.f r4.x, c19.y, r1.z +absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c19.y, r2.z, c19.z mul.f r1.z, r1.z, r1.z -mul.f r1.w, r1.w, r3.y -mul.f r2.z, r2.z, r3.z +add.f r4.x, c19.w, (neg)r4.x +mul.f r4.y, c19.y, r2.y +absneg.f r2.z, (abs)r2.z +mul.f r2.y, r2.y, r2.y +mul.f r1.z, r1.z, r4.x +mul.f r4.x, r0.y, c21.x +add.f r4.y, c19.w, (neg)r4.y +mul.f r4.z, c19.y, r2.z +mul.f r2.z, r2.z, r2.z +max.f r4.x, r4.x, c18.y +mul.f r2.y, r2.y, r4.y +mul.f r4.y, r0.x, r0.z +add.f r4.z, c19.w, (neg)r4.z +min.f r4.x, r4.x, c22.y +mul.f r4.w, r0.y, c20.x +mul.f r2.w, r2.w, (neg)c4.x +mul.f r5.x, c11.y, r0.w +min.f r4.x, r4.x, c18.w +mul.f r4.y, r4.y, r4.w +mul.f r2.z, r2.z, r4.z +mad.f32 r4.z, c12.y, r1.x, r5.x +mul.f r1.z, r1.z, r4.x +max.f r4.x, r4.y, c18.y +mad.f32 r4.y, c13.y, r1.y, r4.z +mov.f32f32 r2.x, r2.x +mov.f32f32 r4.z, r1.z +min.f r4.x, r4.x, c22.y +mad.f32 r2.w, (neg)c4.y, r4.y, r2.w +mul.f r0.w, c11.z, r0.w +max.f r2.x, r2.x, c20.z +min.f r4.x, r4.x, c20.y mad.f32 r0.w, c12.z, r1.x, r0.w -mul.f r1.x, r1.z, r3.w -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.z -mul.f r2.z, r0.x, r0.z -mov.f32f32 r1.x, r1.x -mul.f r3.y, r0.y, c21.x -mul.f r3.z, r0.y, c20.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.w, c18.y -mov.f32f32 r3.y, r3.y -mul.f r2.z, r2.z, r3.z -mad.f32 r0.w, c13.z, r1.y, r0.w -add.f r1.y, r3.w, c19.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -floor.f r3.z, r1.y -max.f r3.y, r3.y, c18.y -mov.f32f32 r2.z, r2.z -mad.f32 r0.w, (neg)c4.z, r0.w, r2.y -add.f r1.y, r1.y, (neg)r3.z -min.f r2.y, r3.y, c22.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -mad.f32 r1.y, c19.y, r1.y, c19.z -min.f r2.y, r2.y, c18.w -max.f r2.z, r2.z, c18.y -max.f r0.w, c18.y, r0.w -absneg.f r1.y, (abs)r1.y -mov.f32f32 r2.y, r2.y -min.f r2.z, r2.z, c22.y -mov.f32f32 r0.w, r0.w -mul.f r3.y, r1.y, r1.y -mul.f r1.x, r1.x, r2.y -min.f r1.y, r2.z, c20.y -mul.f r2.y, r0.w, c5.z -mul.f r2.z, r0.w, c5.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mul.f r0.w, r0.w, c5.x -mul.f r3.z, r1.z, r1.y -mul.f r3.w, r1.w, r1.y -max.f r1.w, r2.x, c20.z -mov.f32f32 r1.z, r2.y -mov.f32f32 r2.x, r3.z -mov.f32f32 r1.y, r2.z -mad.f32 r2.y, c17.x, r1.x, r2.x -mad.f32 r1.x, c17.y, r1.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r1.x -min.f r2.z, r1.w, c20.w -mov.f32f32 r1.x, r0.w -mov.f32f32 r1.w, r3.y -mul.f r0.w, c11.y, r0.x -mov.f32f32 r2.z, r2.z -mad.f32 r0.w, c12.y, r0.y, r0.w +mul.f r1.x, c11.y, r0.x mul.f r0.x, c11.w, r0.x -mov.f32f32 r3.z, r4.y -mad.f32 r3.x, r3.w, r2.z, r3.x -mad.f32 r2.w, r3.w, r2.z, r2.w -mad.f32 r0.w, c13.y, r0.z, r0.w +mov.f32f32 r4.y, r4.x +mul.f r2.z, r2.z, r4.x +min.f r2.x, r2.x, c20.w +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, r2.y, r4.y +mad.f32 r1.x, c12.y, r0.y, r1.x mad.f32 r0.x, c12.w, r0.y, r0.x -add.f r0.y, r3.x, r2.x -add.f r2.x, r2.w, r2.y -add.f r0.w, r0.w, c14.y -nop -mov.f32f32 r3.x, r0.y -mov.f32f32 r3.y, r2.x -mad.f32 r0.y, r3.w, r2.z, r0.w +mad.f32 r0.y, c13.y, r0.z, r1.x +mov.f32f32 r1.x, r1.y +mad.f32 r1.y, c17.y, r1.z, r1.y +mad.f32 r1.x, c17.x, r4.z, r1.x +mov.f32f32 r1.z, r2.x +mad.f32 r2.x, r2.z, r2.x, r3.x +add.f r0.y, r0.y, c14.y +mad.f32 r0.w, (neg)c4.z, r0.w, r2.w +mad.f32 r2.y, r2.z, r1.z, r3.y +add.f r1.y, r2.x, r1.y +mad.f32 r0.y, r2.z, r1.z, r0.y +max.f r4.x, c18.y, r0.w +add.f r0.w, r2.y, r1.x +mov.f32f32 r1.x, r1.y mad.f32 r0.x, c13.w, r0.z, r0.x -mul.f r0.z, c7.w, r3.x -mul.f r0.w, c7.z, r3.x -mad.f32 r0.z, c8.w, r0.y, r0.z -mad.f32 r0.w, c8.z, r0.y, r0.w -mul.f r2.x, c7.y, r3.x -mul.f r2.y, c7.x, r3.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c9.w, r3.y, r0.z -add.f r0.x, r0.x, c14.w -mad.f32 r0.w, c9.z, r3.y, r0.w -mad.f32 r2.x, c8.y, r0.y, r2.x -mad.f32 r2.y, c8.x, r0.y, r2.y -mad.f32 r0.z, c10.w, r0.x, r0.z -mad.f32 r0.w, c10.z, r0.x, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c9.y, r3.y, r2.x -mad.f32 r2.y, c9.x, r3.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c10.y, r0.x, r2.x -mad.f32 r2.y, c10.x, r0.x, r2.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r2.z, r0.w -mov.f32f32 r0.z, r2.x -mov.f32f32 r0.w, r2.y -mul.f r3.w, c0.w, r3.x -mul.f r4.y, c0.z, r3.x -mul.f r0.z, r0.z, c15.y -mul.f r0.w, r0.w, c15.x -(rpt1)nop -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.w -mad.f32 r0.z, c1.w, r0.y, r3.w -mad.f32 r0.w, c1.z, r0.y, r4.y -mad.f32 r0.z, c2.w, r3.y, r0.z -mad.f32 r0.w, c2.z, r3.y, r0.w -mad.f32 r0.z, c3.w, r0.x, r0.z -mad.f32 r3.w, c3.z, r0.x, r0.w -mul.f r4.y, c0.y, r3.x -mul.f r4.z, c0.x, r3.x -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.w -mad.f32 r3.w, c1.y, r0.y, r4.y -mad.f32 r0.y, c1.x, r0.y, r4.z -mad.f32 r3.w, c2.y, r3.y, r3.w -mad.f32 r0.y, c2.x, r3.y, r0.y -mad.f32 r3.w, c3.y, r0.x, r3.w -mad.f32 r0.x, c3.x, r0.x, r0.y -(rpt1)nop -mov.f32f32 r0.y, r3.w -mov.f32f32 r0.x, r0.x -mad.f32 r3.y, c6.x, r3.y, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -(rpt1)nop -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.z -mov.f32f32 r3.z, r4.x -(rpt2)nop -mov.f32f32 r3.z, r3.z +nop +mov.f32f32 r1.z, r0.w +mul.f r3.x, c0.x, r0.w +mad.f32 r3.y, c6.x, r1.x, c6.y +mov.f32f32 r4.y, r4.x +mul.f r0.z, c7.y, r1.z +mul.f r0.w, c7.x, r1.z +mad.f32 r0.z, c8.y, r0.y, r0.z +mad.f32 r0.w, c8.x, r0.y, r0.w +mad.f32 r0.z, c9.y, r1.x, r0.z +add.f r4.z, r0.x, c14.w +mad.f32 r0.x, c9.x, r1.x, r0.w +mul.f r0.w, c7.w, r1.z +mul.f r2.x, c7.z, r1.z +mad.f32 r0.z, c10.y, r4.z, r0.z +mad.f32 r0.x, c10.x, r4.z, r0.x +mad.f32 r0.w, c8.w, r0.y, r0.w +mad.f32 r2.z, c8.z, r0.y, r2.x +mul.f r2.y, r0.z, c15.y +mul.f r2.x, r0.x, c15.x +mad.f32 r0.x, c9.w, r1.x, r0.w +mad.f32 r0.z, c9.z, r1.x, r2.z +mad.f32 r2.w, c10.w, r4.z, r0.x +mad.f32 r2.z, c10.z, r4.z, r0.z +mul.f r0.x, c0.w, r1.z +mul.f r0.z, c0.z, r1.z +mad.f32 r0.x, c1.w, r0.y, r0.x +mad.f32 r0.z, c1.z, r0.y, r0.z +mad.f32 r0.x, c2.w, r1.x, r0.x +mad.f32 r0.z, c2.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r4.z, r0.x +mad.f32 r0.z, c3.z, r4.z, r0.z +mul.f r0.x, c0.y, r1.z +mad.f32 r3.x, c1.x, r0.y, r3.x +mad.f32 r0.x, c1.y, r0.y, r0.x +mad.f32 r0.y, c2.x, r1.y, r3.x +mad.f32 r1.x, c2.y, r1.x, r0.x +mad.f32 r0.x, c3.x, r4.z, r0.y +mad.f32 r0.y, c3.y, r4.z, r1.x +mad.f32 r3.x, c6.x, r1.z, c6.y +mul.f r1.z, r4.y, c5.z +mul.f r1.y, r4.y, c5.y +mul.f r1.x, r4.x, c5.x end ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 221 instructions, 0 half, 5 full +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 152 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-103.asm b/reference/0ad-alpine-valley/0ad-103.asm index 12fbb01..be30c1a 100644 --- a/reference/0ad-alpine-valley/0ad-103.asm +++ b/reference/0ad-alpine-valley/0ad-103.asm @@ -8,203 +8,139 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.y, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.w, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r4.x, c9.x, r0.z -mov.f32f32 r3.z, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r4.x -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r4.x, r3.y, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.w, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.y, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r3.y -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.y, r3.z -mov.f32f32 r5.w, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r3.y -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.y, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.y, r3.y -bary.f r3.y, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.y, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.y, r2.w -mov.f32f32 r5.z, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r6.y, r3.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r7.w, r5.x, s#2, t#2 -(sy)mov.f32f32 r1.w, r7.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r7.w, r6.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r7.w -(ss)nop -sam.s (f32)(x)r5.x, r5.w, s#2, t#2 -(sy)mov.f32f32 r2.w, r5.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.y, r3.w -add.f r3.z, c12.y, (neg)r0.y -add.f r3.w, c12.y, (neg)r0.y -add.f r5.x, c12.y, (neg)r0.y -mul.f r5.y, r2.z, r3.y -mul.f r3.z, r3.z, c6.z -mul.f r3.w, r3.w, c6.y -mul.f r5.x, r5.x, c6.x -mul.f r1.w, r5.y, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.w, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.y, r1.z, r3.y -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.y, r2.w, r1.w -sam.s (f32)(x)r5.y, r7.y, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r5.y -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r5.y, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r0.z, r4.w -mov.f32f32 r5.z, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.z, r1.x -mul.f r0.w, r4.y, r0.w -mov.f32f32 r2.w, r0.z -mul.f r0.x, c10.w, r0.x -mul.f r0.z, r4.x, r2.z -sam (f32)(w)r1.y, r5.y, s#1, t#1 -nop -(sy)cmps.f.lt r1.y, r2.x, c11.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.x -mov.f32f32 r1.w, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r0.z, r0.x +add.f r0.y, r0.z, r1.z cov.u32f32 r0.z, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.z, r1.x -mad.f32 r0.w, c5.y, r4.y, r0.w -mov.f32f32 r0.x, r0.x +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y cmps.f.ne r0.z, r0.z, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r4.x, r0.x -mov.f32f32 r1.y, r1.z -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r0.z, r1.w, r0.z, r1.y -add.f r1.x, r1.x, r3.z -add.f r0.w, r0.w, r3.w -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r0.z +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z mul.f r0.w, r0.w, r0.z -add.f r0.x, r0.x, r5.x -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, c4.x end nop nop nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 196 instructions, 0 half, 8 full +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-104.asm b/reference/0ad-alpine-valley/0ad-104.asm index 36d09e4..63ee7bc 100644 --- a/reference/0ad-alpine-valley/0ad-104.asm +++ b/reference/0ad-alpine-valley/0ad-104.asm @@ -6,8 +6,8 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,223 +24,164 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)floor.f r1.z, c14.z floor.f r1.w, c14.x absneg.f r2.x, (abs)c17.x absneg.f r2.y, (abs)c17.y add.f r1.z, c14.z, (neg)r1.z add.f r1.w, c14.x, (neg)r1.w -mul.f r2.z, c11.x, r0.w -add.f r2.x, r2.x, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r2.y, c12.x, r1.x, r2.z -mov.f32f32 r2.x, r2.x +mov.f32f32 r2.z, c18.y +mul.f r2.w, c11.x, r0.w max.f r1.z, r1.z, c18.y max.f r1.w, r1.w, c18.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, c16.x, r2.x +add.f r2.x, r2.x, r2.y +add.f r2.y, r2.z, c19.x min.f r1.z, r1.z, c22.y min.f r1.w, r1.w, c22.y -mul.f r2.w, c11.z, r0.x -mad.f32 r2.y, c13.x, r1.y, r2.y +mul.f r2.z, c16.x, r2.x +mul.f r3.x, c11.z, r0.x max.f r1.z, r1.z, c18.x max.f r1.w, r1.w, c18.x -mad.f32 r2.w, c12.z, r0.y, r2.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r2.w, c13.z, r0.z, r2.w -mul.f r3.x, c11.x, r0.x +mul.f r3.y, c11.x, r0.x +mad.f32 r3.x, c12.z, r0.y, r3.x mul.f r1.z, c16.x, r1.z -mad.f32 r3.x, c12.x, r0.y, r3.x -add.f r2.w, r2.w, c14.z -mad.f32 r3.x, c13.x, r0.z, r3.x -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c18.w, r2.z, r2.w -mul.f r2.y, r2.y, (neg)c4.x -mul.f r3.y, c11.y, r0.w +mad.f32 r3.y, c12.x, r0.y, r3.y +mad.f32 r3.x, c13.z, r0.z, r3.x +mad.f32 r3.y, c13.x, r0.z, r3.y mad.f32 r1.z, c18.z, r1.z, c14.x -add.f r3.x, r3.x, c14.x -mov.f32f32 r2.z, r2.z -mad.f32 r1.w, c16.x, r1.w, r3.x -mov.f32f32 r1.z, r1.z -mad.f32 r3.y, c12.y, r1.x, r3.y -mov.f32f32 r2.z, r2.z +floor.f r4.x, r2.y +mad.f32 r2.w, c12.x, r1.x, r2.w +add.f r3.y, r3.y, c14.x +add.f r1.z, r1.z, c19.x +mad.f32 r1.w, c16.x, r1.w, r3.y +add.f r3.x, r3.x, c14.z +add.f r2.y, r2.y, (neg)r4.x +floor.f r4.x, r1.z add.f r1.w, r1.w, c19.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y +mad.f32 r2.z, c18.w, r2.z, r3.x +mad.f32 r2.y, c19.y, r2.y, c19.z +add.f r1.z, r1.z, (neg)r4.x +floor.f r4.x, r1.w add.f r2.z, r2.z, c19.x -mad.f32 r3.y, c13.y, r1.y, r3.y -add.f r1.z, r1.z, c19.x -floor.f r3.z, r1.w -floor.f r3.w, r2.z -mov.f32f32 r3.y, r3.y -floor.f r4.z, r1.z -add.f r1.w, r1.w, (neg)r3.z -add.f r2.z, r2.z, (neg)r3.w -mad.f32 r2.y, (neg)c4.y, r3.y, r2.y -add.f r1.z, r1.z, (neg)r4.z -mad.f32 r1.w, c19.y, r1.w, c19.z -mad.f32 r2.z, c19.y, r2.z, c19.z -mov.f32f32 r2.y, r2.y +absneg.f r2.y, (abs)r2.y mad.f32 r1.z, c19.y, r1.z, c19.z -absneg.f r1.w, (abs)r1.w -absneg.f r2.z, (abs)r2.z -mul.f r0.w, c11.z, r0.w +add.f r4.x, r1.w, (neg)r4.x +floor.f r4.y, r2.z +mul.f r1.w, r2.y, r2.y absneg.f r1.z, (abs)r1.z -mul.f r3.y, c19.y, r1.w -mul.f r3.z, c19.y, r2.z -mul.f r1.w, r1.w, r1.w -mul.f r3.w, c19.y, r1.z -add.f r3.y, c19.w, (neg)r3.y -add.f r3.z, c19.w, (neg)r3.z -mul.f r2.z, r2.z, r2.z -add.f r3.w, c19.w, (neg)r3.w +mad.f32 r2.y, c19.y, r4.x, c19.z +add.f r2.z, r2.z, (neg)r4.y +mad.f32 r2.w, c13.x, r1.y, r2.w +mul.f r4.x, c19.y, r1.z +absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c19.y, r2.z, c19.z mul.f r1.z, r1.z, r1.z -mul.f r1.w, r1.w, r3.y -mul.f r2.z, r2.z, r3.z +add.f r4.x, c19.w, (neg)r4.x +mul.f r4.y, c19.y, r2.y +absneg.f r2.z, (abs)r2.z +mul.f r2.y, r2.y, r2.y +mul.f r1.z, r1.z, r4.x +mul.f r4.x, r0.y, c21.x +add.f r4.y, c19.w, (neg)r4.y +mul.f r4.z, c19.y, r2.z +mul.f r2.z, r2.z, r2.z +max.f r4.x, r4.x, c18.y +mul.f r2.y, r2.y, r4.y +mul.f r4.y, r0.x, r0.z +add.f r4.z, c19.w, (neg)r4.z +min.f r4.x, r4.x, c22.y +mul.f r4.w, r0.y, c20.x +mul.f r2.w, r2.w, (neg)c4.x +mul.f r5.x, c11.y, r0.w +min.f r4.x, r4.x, c18.w +mul.f r4.y, r4.y, r4.w +mul.f r2.z, r2.z, r4.z +mad.f32 r4.z, c12.y, r1.x, r5.x +mul.f r1.z, r1.z, r4.x +max.f r4.x, r4.y, c18.y +mad.f32 r4.y, c13.y, r1.y, r4.z +mov.f32f32 r2.x, r2.x +mov.f32f32 r4.z, r1.z +min.f r4.x, r4.x, c22.y +mad.f32 r2.w, (neg)c4.y, r4.y, r2.w +mul.f r0.w, c11.z, r0.w +max.f r2.x, r2.x, c20.z +min.f r4.x, r4.x, c20.y mad.f32 r0.w, c12.z, r1.x, r0.w -mul.f r1.x, r1.z, r3.w -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.z -mul.f r2.z, r0.x, r0.z -mov.f32f32 r1.x, r1.x -mul.f r3.y, r0.y, c21.x -mul.f r3.z, r0.y, c20.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.w, c18.y -mov.f32f32 r3.y, r3.y -mul.f r2.z, r2.z, r3.z -mad.f32 r0.w, c13.z, r1.y, r0.w -add.f r1.y, r3.w, c19.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -floor.f r3.z, r1.y -max.f r3.y, r3.y, c18.y -mov.f32f32 r2.z, r2.z -mad.f32 r0.w, (neg)c4.z, r0.w, r2.y -add.f r1.y, r1.y, (neg)r3.z -min.f r2.y, r3.y, c22.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -mad.f32 r1.y, c19.y, r1.y, c19.z -min.f r2.y, r2.y, c18.w -max.f r2.z, r2.z, c18.y -max.f r0.w, c18.y, r0.w -absneg.f r1.y, (abs)r1.y -mov.f32f32 r2.y, r2.y -min.f r2.z, r2.z, c22.y -mov.f32f32 r0.w, r0.w -mul.f r3.y, r1.y, r1.y -mul.f r1.x, r1.x, r2.y -min.f r1.y, r2.z, c20.y -mul.f r2.y, r0.w, c5.z -mul.f r2.z, r0.w, c5.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mul.f r0.w, r0.w, c5.x -mul.f r3.z, r1.z, r1.y -mul.f r3.w, r1.w, r1.y -max.f r1.w, r2.x, c20.z -mov.f32f32 r1.z, r2.y -mov.f32f32 r2.x, r3.z -mov.f32f32 r1.y, r2.z -mad.f32 r2.y, c17.x, r1.x, r2.x -mad.f32 r1.x, c17.y, r1.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r1.x -min.f r2.z, r1.w, c20.w -mov.f32f32 r1.x, r0.w -mov.f32f32 r1.w, r3.y -mul.f r0.w, c11.y, r0.x -mov.f32f32 r2.z, r2.z -mad.f32 r0.w, c12.y, r0.y, r0.w +mul.f r1.x, c11.y, r0.x mul.f r0.x, c11.w, r0.x -mov.f32f32 r3.z, r4.y -mad.f32 r3.x, r3.w, r2.z, r3.x -mad.f32 r2.w, r3.w, r2.z, r2.w -mad.f32 r0.w, c13.y, r0.z, r0.w +mov.f32f32 r4.y, r4.x +mul.f r2.z, r2.z, r4.x +min.f r2.x, r2.x, c20.w +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, r2.y, r4.y +mad.f32 r1.x, c12.y, r0.y, r1.x mad.f32 r0.x, c12.w, r0.y, r0.x -add.f r0.y, r3.x, r2.x -add.f r2.x, r2.w, r2.y -add.f r0.w, r0.w, c14.y -nop -mov.f32f32 r3.x, r0.y -mov.f32f32 r3.y, r2.x -mad.f32 r0.y, r3.w, r2.z, r0.w +mad.f32 r0.y, c13.y, r0.z, r1.x +mov.f32f32 r1.x, r1.y +mad.f32 r1.y, c17.y, r1.z, r1.y +mad.f32 r1.x, c17.x, r4.z, r1.x +mov.f32f32 r1.z, r2.x +mad.f32 r2.x, r2.z, r2.x, r3.x +add.f r0.y, r0.y, c14.y +mad.f32 r0.w, (neg)c4.z, r0.w, r2.w +mad.f32 r2.y, r2.z, r1.z, r3.y +add.f r1.y, r2.x, r1.y +mad.f32 r0.y, r2.z, r1.z, r0.y +max.f r4.x, c18.y, r0.w +add.f r0.w, r2.y, r1.x +mov.f32f32 r1.x, r1.y mad.f32 r0.x, c13.w, r0.z, r0.x -mul.f r0.z, c7.w, r3.x -mul.f r0.w, c7.z, r3.x -mad.f32 r0.z, c8.w, r0.y, r0.z -mad.f32 r0.w, c8.z, r0.y, r0.w -mul.f r2.x, c7.y, r3.x -mul.f r2.y, c7.x, r3.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c9.w, r3.y, r0.z -add.f r0.x, r0.x, c14.w -mad.f32 r0.w, c9.z, r3.y, r0.w -mad.f32 r2.x, c8.y, r0.y, r2.x -mad.f32 r2.y, c8.x, r0.y, r2.y -mad.f32 r0.z, c10.w, r0.x, r0.z -mad.f32 r0.w, c10.z, r0.x, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c9.y, r3.y, r2.x -mad.f32 r2.y, c9.x, r3.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c10.y, r0.x, r2.x -mad.f32 r2.y, c10.x, r0.x, r2.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r2.z, r0.w -mov.f32f32 r0.z, r2.x -mov.f32f32 r0.w, r2.y -mul.f r3.w, c0.w, r3.x -mul.f r4.y, c0.z, r3.x -mul.f r0.z, r0.z, c15.y -mul.f r0.w, r0.w, c15.x -(rpt1)nop -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.w -mad.f32 r0.z, c1.w, r0.y, r3.w -mad.f32 r0.w, c1.z, r0.y, r4.y -mad.f32 r0.z, c2.w, r3.y, r0.z -mad.f32 r0.w, c2.z, r3.y, r0.w -mad.f32 r0.z, c3.w, r0.x, r0.z -mad.f32 r3.w, c3.z, r0.x, r0.w -mul.f r4.y, c0.y, r3.x -mul.f r4.z, c0.x, r3.x -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.w -mad.f32 r3.w, c1.y, r0.y, r4.y -mad.f32 r0.y, c1.x, r0.y, r4.z -mad.f32 r3.w, c2.y, r3.y, r3.w -mad.f32 r0.y, c2.x, r3.y, r0.y -mad.f32 r3.w, c3.y, r0.x, r3.w -mad.f32 r0.x, c3.x, r0.x, r0.y -(rpt1)nop -mov.f32f32 r0.y, r3.w -mov.f32f32 r0.x, r0.x -mad.f32 r3.y, c6.x, r3.y, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -(rpt1)nop -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.z -mov.f32f32 r3.z, r4.x -(rpt2)nop -mov.f32f32 r3.z, r3.z +nop +mov.f32f32 r1.z, r0.w +mul.f r3.x, c0.x, r0.w +mad.f32 r3.y, c6.x, r1.x, c6.y +mov.f32f32 r4.y, r4.x +mul.f r0.z, c7.y, r1.z +mul.f r0.w, c7.x, r1.z +mad.f32 r0.z, c8.y, r0.y, r0.z +mad.f32 r0.w, c8.x, r0.y, r0.w +mad.f32 r0.z, c9.y, r1.x, r0.z +add.f r4.z, r0.x, c14.w +mad.f32 r0.x, c9.x, r1.x, r0.w +mul.f r0.w, c7.w, r1.z +mul.f r2.x, c7.z, r1.z +mad.f32 r0.z, c10.y, r4.z, r0.z +mad.f32 r0.x, c10.x, r4.z, r0.x +mad.f32 r0.w, c8.w, r0.y, r0.w +mad.f32 r2.z, c8.z, r0.y, r2.x +mul.f r2.y, r0.z, c15.y +mul.f r2.x, r0.x, c15.x +mad.f32 r0.x, c9.w, r1.x, r0.w +mad.f32 r0.z, c9.z, r1.x, r2.z +mad.f32 r2.w, c10.w, r4.z, r0.x +mad.f32 r2.z, c10.z, r4.z, r0.z +mul.f r0.x, c0.w, r1.z +mul.f r0.z, c0.z, r1.z +mad.f32 r0.x, c1.w, r0.y, r0.x +mad.f32 r0.z, c1.z, r0.y, r0.z +mad.f32 r0.x, c2.w, r1.x, r0.x +mad.f32 r0.z, c2.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r4.z, r0.x +mad.f32 r0.z, c3.z, r4.z, r0.z +mul.f r0.x, c0.y, r1.z +mad.f32 r3.x, c1.x, r0.y, r3.x +mad.f32 r0.x, c1.y, r0.y, r0.x +mad.f32 r0.y, c2.x, r1.y, r3.x +mad.f32 r1.x, c2.y, r1.x, r0.x +mad.f32 r0.x, c3.x, r4.z, r0.y +mad.f32 r0.y, c3.y, r4.z, r1.x +mad.f32 r3.x, c6.x, r1.z, c6.y +mul.f r1.z, r4.y, c5.z +mul.f r1.y, r4.y, c5.y +mul.f r1.x, r4.x, c5.x end ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 221 instructions, 0 half, 5 full +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 152 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-105.asm b/reference/0ad-alpine-valley/0ad-105.asm index 8e79f89..270bdcf 100644 --- a/reference/0ad-alpine-valley/0ad-105.asm +++ b/reference/0ad-alpine-valley/0ad-105.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r4.w) in3 @in(r1.x) in4 @in(r1.y) in5 @in(r1.z) in6 @in(r1.w) in7 -@in(r4.x) in8 -@in(r4.y) in9 -@in(r4.z) in10 -@in(r4.w) in11 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -28,55 +28,40 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r0.x, c3.x, r2.x -mul.f r0.y, c2.x, r2.x -mad.f32 r0.x, c3.y, r2.y, r0.x -mad.f32 r0.y, c2.y, r2.y, r0.y -mul.f r0.z, c1.x, r2.x -mul.f r0.w, c0.x, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.z, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r3.x, c1.y, r2.y, r0.z -mad.f32 r2.y, c0.y, r2.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c2.w, r2.w, r0.y -mov.f32f32 r0.x, r3.x -mov.f32f32 r0.y, r2.y -mad.f32 r0.x, c1.z, r2.z, r0.x -mad.f32 r0.y, c0.z, r2.z, r0.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r0.y, c1.w, r2.w, r0.x -mad.f32 r0.x, c0.w, r2.w, r2.y +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.x, c3.x, r4.x +mul.f r0.y, c2.x, r4.x +mad.f32 r0.x, c3.y, r4.y, r0.x +mad.f32 r0.y, c2.y, r4.y, r0.y +mad.f32 r0.x, c3.z, r4.z, r0.x +mad.f32 r0.y, c2.z, r4.z, r0.y +mad.f32 r0.w, c3.w, r4.w, r0.x +mad.f32 r0.z, c2.w, r4.w, r0.y +mul.f r0.x, c1.x, r4.x +mul.f r0.y, c0.x, r4.x +mad.f32 r0.x, c1.y, r4.y, r0.x +mad.f32 r0.y, c0.y, r4.y, r0.y +mad.f32 r0.x, c1.z, r4.z, r0.x +mad.f32 r3.x, c0.z, r4.z, r0.y +mad.f32 r0.y, c1.w, r4.w, r0.x +mad.f32 r0.x, c0.w, r4.w, r3.x max.f r1.w, r1.w, c5.x max.f r1.z, r1.z, c5.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -min.f r1.w, r1.w, c5.y -min.f r1.z, r1.z, c5.y max.f r1.y, r1.y, c5.x max.f r1.x, r1.x, c5.x -(rpt1)nop +min.f r1.w, r1.w, c5.y +min.f r1.z, r1.z, c5.y min.f r1.y, r1.y, c5.y min.f r1.x, r1.x, c5.y -mad.f32 r3.w, c4.x, r2.z, c4.y -mad.f32 r3.z, c4.x, r2.z, c4.y -mad.f32 r3.y, c4.x, r2.z, c4.y -mad.f32 r3.x, c4.x, r2.x, c4.y -mov.f32f32 r2.w, r4.w -mov.f32f32 r2.z, r4.z -mov.f32f32 r2.y, r4.y -mov.f32f32 r2.x, r4.x +mad.f32 r3.w, c4.x, r4.z, c4.y +mad.f32 r3.z, c4.x, r4.z, c4.y +mad.f32 r3.y, c4.x, r4.z, c4.y +mad.f32 r3.x, c4.x, r4.x, c4.y end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r4.x (0:0,cm=f,il=16,b=0) -; VERT: 47 instructions, 0 half, 5 full +; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 29 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-106.asm b/reference/0ad-alpine-valley/0ad-106.asm index f6c10a2..e8e697e 100644 --- a/reference/0ad-alpine-valley/0ad-106.asm +++ b/reference/0ad-alpine-valley/0ad-106.asm @@ -6,43 +6,40 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x -bary.f r1.x, 5, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.y, r0.z -mov.f32f32 r2.x, r0.w -mov.f32f32 r1.z, r0.x -mov.f32f32 r0.z, r0.x -mov.f32f32 r2.y, r1.x -(rpt3)nop -sam (f32)(xyz)r0.w, r1.y, s#1, t#1 -(sy)add.f r0.x, c1.y, (neg)r1.y -sam (f32)(xyzw)r2.z, r0.y, s#0, t#0 -(ss)add.f r0.y, c1.y, (neg)r1.x -add.f r0.z, c1.y, (neg)r0.w -(sy)mul.f r1.w, c0.w, r3.y -mul.f r0.x, r0.x, r3.x -mul.f r1.y, r1.y, c0.z -mul.f r0.y, r0.y, r2.w -mul.f r0.z, r0.z, r2.z -mul.f r1.x, r1.x, c0.y -add.f r0.x, r1.y, r0.x -sam (f32)(w)r2.x, r2.x, s#2, t#2 -mul.f r0.w, r0.w, c0.x +bary.f r0.w, 1, r0.x +bary.f r1.x, 4, r0.x +bary.f (ei)r1.y, 5, r0.x +mov.f32f32 r0.x, r0.z +mov.f32f32 r0.y, r0.w (rpt1)nop -(sy)mul.f r1.z, r0.x, r2.w -add.f r0.x, r1.x, r0.y -add.f r0.y, r0.w, r0.z +sam (f32)(xyz)r1.z, r0.z, s#1, t#1 +(sy)(ss)add.f r0.z, c1.y, (neg)r2.x +mul.f r0.w, r2.x, c0.z +mul.f r2.x, r1.w, c0.y +add.f r1.w, c1.y, (neg)r1.w +sam (f32)(xyzw)r2.y, r0.x, s#0, t#0 +(sy)(ss)mul.f r0.x, r0.z, r2.w +add.f r0.y, c1.y, (neg)r1.z +mul.f r0.z, r1.z, c0.x +mul.f r2.z, r1.w, r2.z +add.f r0.x, r0.w, r0.x +sam (f32)(w)r3.y, r1.x, s#2, t#2 +mul.f r0.y, r0.y, r2.y +mul.f r1.w, c0.w, r3.x +nop +(sy)mul.f r1.z, r0.x, r4.x +add.f r0.x, r2.x, r2.z +add.f r0.y, r0.z, r0.y (rpt1)nop -mul.f r1.y, r0.x, r2.w -mul.f r1.x, r0.y, r2.w +(ss)mul.f r1.y, r0.x, r4.x +mul.f r1.x, r0.y, r4.x end nop nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:1,cm=f,il=12,b=1) -; FRAG: 38 instructions, 0 half, 4 full +; FRAG: 31 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-107.asm b/reference/0ad-alpine-valley/0ad-107.asm index 8e79f89..270bdcf 100644 --- a/reference/0ad-alpine-valley/0ad-107.asm +++ b/reference/0ad-alpine-valley/0ad-107.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r4.w) in3 @in(r1.x) in4 @in(r1.y) in5 @in(r1.z) in6 @in(r1.w) in7 -@in(r4.x) in8 -@in(r4.y) in9 -@in(r4.z) in10 -@in(r4.w) in11 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -28,55 +28,40 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r0.x, c3.x, r2.x -mul.f r0.y, c2.x, r2.x -mad.f32 r0.x, c3.y, r2.y, r0.x -mad.f32 r0.y, c2.y, r2.y, r0.y -mul.f r0.z, c1.x, r2.x -mul.f r0.w, c0.x, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.z, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r3.x, c1.y, r2.y, r0.z -mad.f32 r2.y, c0.y, r2.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c2.w, r2.w, r0.y -mov.f32f32 r0.x, r3.x -mov.f32f32 r0.y, r2.y -mad.f32 r0.x, c1.z, r2.z, r0.x -mad.f32 r0.y, c0.z, r2.z, r0.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r0.y, c1.w, r2.w, r0.x -mad.f32 r0.x, c0.w, r2.w, r2.y +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.x, c3.x, r4.x +mul.f r0.y, c2.x, r4.x +mad.f32 r0.x, c3.y, r4.y, r0.x +mad.f32 r0.y, c2.y, r4.y, r0.y +mad.f32 r0.x, c3.z, r4.z, r0.x +mad.f32 r0.y, c2.z, r4.z, r0.y +mad.f32 r0.w, c3.w, r4.w, r0.x +mad.f32 r0.z, c2.w, r4.w, r0.y +mul.f r0.x, c1.x, r4.x +mul.f r0.y, c0.x, r4.x +mad.f32 r0.x, c1.y, r4.y, r0.x +mad.f32 r0.y, c0.y, r4.y, r0.y +mad.f32 r0.x, c1.z, r4.z, r0.x +mad.f32 r3.x, c0.z, r4.z, r0.y +mad.f32 r0.y, c1.w, r4.w, r0.x +mad.f32 r0.x, c0.w, r4.w, r3.x max.f r1.w, r1.w, c5.x max.f r1.z, r1.z, c5.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -min.f r1.w, r1.w, c5.y -min.f r1.z, r1.z, c5.y max.f r1.y, r1.y, c5.x max.f r1.x, r1.x, c5.x -(rpt1)nop +min.f r1.w, r1.w, c5.y +min.f r1.z, r1.z, c5.y min.f r1.y, r1.y, c5.y min.f r1.x, r1.x, c5.y -mad.f32 r3.w, c4.x, r2.z, c4.y -mad.f32 r3.z, c4.x, r2.z, c4.y -mad.f32 r3.y, c4.x, r2.z, c4.y -mad.f32 r3.x, c4.x, r2.x, c4.y -mov.f32f32 r2.w, r4.w -mov.f32f32 r2.z, r4.z -mov.f32f32 r2.y, r4.y -mov.f32f32 r2.x, r4.x +mad.f32 r3.w, c4.x, r4.z, c4.y +mad.f32 r3.z, c4.x, r4.z, c4.y +mad.f32 r3.y, c4.x, r4.z, c4.y +mad.f32 r3.x, c4.x, r4.x, c4.y end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r4.x (0:0,cm=f,il=16,b=0) -; VERT: 47 instructions, 0 half, 5 full +; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 29 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-115.asm b/reference/0ad-alpine-valley/0ad-115.asm index 3be9d09..85a697c 100644 --- a/reference/0ad-alpine-valley/0ad-115.asm +++ b/reference/0ad-alpine-valley/0ad-115.asm @@ -6,44 +6,34 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r2.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r1.y, r0.x, c0.y -add.f r0.y, r1.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r2.z, r0.y -add.f r0.y, r1.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end +nop +nop +nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-alpine-valley/0ad-118.asm b/reference/0ad-alpine-valley/0ad-118.asm index 3be9d09..85a697c 100644 --- a/reference/0ad-alpine-valley/0ad-118.asm +++ b/reference/0ad-alpine-valley/0ad-118.asm @@ -6,44 +6,34 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r2.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r1.y, r0.x, c0.y -add.f r0.y, r1.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r2.z, r0.y -add.f r0.y, r1.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end +nop +nop +nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-alpine-valley/0ad-121.asm b/reference/0ad-alpine-valley/0ad-121.asm index 3be9d09..85a697c 100644 --- a/reference/0ad-alpine-valley/0ad-121.asm +++ b/reference/0ad-alpine-valley/0ad-121.asm @@ -6,44 +6,34 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r2.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r1.y, r0.x, c0.y -add.f r0.y, r1.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r2.z, r0.y -add.f r0.y, r1.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end +nop +nop +nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-alpine-valley/0ad-124.asm b/reference/0ad-alpine-valley/0ad-124.asm index 3be9d09..85a697c 100644 --- a/reference/0ad-alpine-valley/0ad-124.asm +++ b/reference/0ad-alpine-valley/0ad-124.asm @@ -6,44 +6,34 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r2.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r1.y, r0.x, c0.y -add.f r0.y, r1.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r2.z, r0.y -add.f r0.y, r1.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end +nop +nop +nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-alpine-valley/0ad-127.asm b/reference/0ad-alpine-valley/0ad-127.asm index 3be9d09..85a697c 100644 --- a/reference/0ad-alpine-valley/0ad-127.asm +++ b/reference/0ad-alpine-valley/0ad-127.asm @@ -6,44 +6,34 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r2.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r1.y, r0.x, c0.y -add.f r0.y, r1.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r2.z, r0.y -add.f r0.y, r1.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end +nop +nop +nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-alpine-valley/0ad-130.asm b/reference/0ad-alpine-valley/0ad-130.asm index 3be9d09..85a697c 100644 --- a/reference/0ad-alpine-valley/0ad-130.asm +++ b/reference/0ad-alpine-valley/0ad-130.asm @@ -6,44 +6,34 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r2.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r1.y, r0.x, c0.y -add.f r0.y, r1.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r2.z, r0.y -add.f r0.y, r1.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end +nop +nop +nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-alpine-valley/0ad-133.asm b/reference/0ad-alpine-valley/0ad-133.asm index 3be9d09..85a697c 100644 --- a/reference/0ad-alpine-valley/0ad-133.asm +++ b/reference/0ad-alpine-valley/0ad-133.asm @@ -6,44 +6,34 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r2.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r1.y, r0.x, c0.y -add.f r0.y, r1.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r2.z, r0.y -add.f r0.y, r1.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end +nop +nop +nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-alpine-valley/0ad-136.asm b/reference/0ad-alpine-valley/0ad-136.asm index 9114513..13ea129 100644 --- a/reference/0ad-alpine-valley/0ad-136.asm +++ b/reference/0ad-alpine-valley/0ad-136.asm @@ -6,23 +6,16 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 end nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 22 instructions, 0 half, 2 full +; FRAG: 10 instructions, 0 half, 2 full diff --git a/reference/0ad-alpine-valley/0ad-139.asm b/reference/0ad-alpine-valley/0ad-139.asm index 9114513..13ea129 100644 --- a/reference/0ad-alpine-valley/0ad-139.asm +++ b/reference/0ad-alpine-valley/0ad-139.asm @@ -6,23 +6,16 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 end nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 22 instructions, 0 half, 2 full +; FRAG: 10 instructions, 0 half, 2 full diff --git a/reference/0ad-alpine-valley/0ad-142.asm b/reference/0ad-alpine-valley/0ad-142.asm index c46fb79..7495bc9 100644 --- a/reference/0ad-alpine-valley/0ad-142.asm +++ b/reference/0ad-alpine-valley/0ad-142.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)add.f r0.w, r0.w, c0.w -(ss)add.f r0.z, r0.z, c0.z +sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 +(sy)(ss)add.f r0.w, r0.w, c0.w +add.f r0.z, r0.z, c0.z add.f r0.y, r0.y, c0.y add.f r0.x, r0.x, c0.x -mul.f r0.w, r0.w, c1.w -mul.f r0.z, r0.z, c1.z -mul.f r0.y, r0.y, c1.y -mul.f r0.x, r0.x, c1.x -mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +mul.f r1.w, r0.w, c1.w +mul.f r1.z, r0.z, c1.z +mul.f r1.y, r0.y, c1.y +mul.f r1.x, r0.x, c1.x end nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 30 instructions, 0 half, 2 full +; FRAG: 18 instructions, 0 half, 2 full diff --git a/reference/0ad-alpine-valley/0ad-145.asm b/reference/0ad-alpine-valley/0ad-145.asm index 9114513..13ea129 100644 --- a/reference/0ad-alpine-valley/0ad-145.asm +++ b/reference/0ad-alpine-valley/0ad-145.asm @@ -6,23 +6,16 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 end nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 22 instructions, 0 half, 2 full +; FRAG: 10 instructions, 0 half, 2 full diff --git a/reference/0ad-alpine-valley/0ad-148.asm b/reference/0ad-alpine-valley/0ad-148.asm index 600d62a..a965927 100644 --- a/reference/0ad-alpine-valley/0ad-148.asm +++ b/reference/0ad-alpine-valley/0ad-148.asm @@ -6,31 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, c0.x -mov.f32f32 r0.w, c0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.x, c0.x -(rpt4)nop -sam (f32)(w)r0.y, r0.y, s#0, t#0 -(sy)(ss)add.f r0.y, c0.y, (neg)r1.x -mov.f32f32 r1.x, r0.x -(rpt1)nop -mov.f32f32 r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r1.z, c0.x +mov.f32f32 r1.y, c0.x +mov.f32f32 r1.x, c0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.w, r0.x +sam (f32)(w)r0.x, r0.z, s#0, t#0 +(sy)add.f r1.w, c0.y, (neg)r0.w end nop nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 31 instructions, 0 half, 2 full +; FRAG: 11 instructions, 0 half, 2 full diff --git a/reference/0ad-alpine-valley/0ad-151.asm b/reference/0ad-alpine-valley/0ad-151.asm index 9f44609..1171e2e 100644 --- a/reference/0ad-alpine-valley/0ad-151.asm +++ b/reference/0ad-alpine-valley/0ad-151.asm @@ -6,19 +6,16 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 1, r0.x -bary.f (ei)r0.x, 0, r0.x -mov.f32f32 r0.y, c0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +(sy)(ss)mov.f32f32 r1.w, c0.x +bary.f r1.z, 2, r0.x +bary.f r1.y, 1, r0.x +bary.f (ei)r1.x, 0, r0.x end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 12 instructions, 0 half, 2 full +; FRAG: 5 instructions, 0 half, 2 full diff --git a/reference/0ad-alpine-valley/0ad-34.asm b/reference/0ad-alpine-valley/0ad-34.asm index 63e7be5..22ca830 100644 --- a/reference/0ad-alpine-valley/0ad-34.asm +++ b/reference/0ad-alpine-valley/0ad-34.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -(rpt5)nop -sam (f32)(xyzw)r2.y, r0.z, s#0, t#0 -(sy)cmps.f.lt r0.x, r3.x, c0.x -mov.f32f32 r1.w, r3.x -mov.f32f32 r1.z, r2.w -mov.f32f32 r1.y, r2.z -cov.u32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.x, r2.y -cmps.f.ne p0.x, r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, (0.000000) +(rpt4)nop +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)cmps.f.lt r0.y, r1.w, c0.x +(rpt2)nop +cov.u32f32 r0.y, r0.y +(rpt2)nop +cmps.f.ne p0.x, r0.y, r0.x (rpt5)nop kill p0.x end nop nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 34 instructions, 0 half, 4 full +; FRAG: 26 instructions, 0 half, 2 full diff --git a/reference/0ad-alpine-valley/0ad-36.asm b/reference/0ad-alpine-valley/0ad-36.asm index 42c6c0a..f362ccd 100644 --- a/reference/0ad-alpine-valley/0ad-36.asm +++ b/reference/0ad-alpine-valley/0ad-36.asm @@ -3,8 +3,8 @@ @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 +@in(r1.x) in4 +@in(r1.y) in5 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -13,159 +13,124 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)floor.f r1.y, c11.z +@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r0.w, c11.z floor.f r1.z, c11.x absneg.f r1.w, (abs)c14.x absneg.f r2.x, (abs)c14.y -add.f r1.y, c11.z, (neg)r1.y +add.f r0.w, c11.z, (neg)r0.w add.f r1.z, c11.x, (neg)r1.z mul.f r2.y, r0.x, r0.z add.f r1.w, r1.w, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r0.y, c17.x -mov.f32f32 r1.w, r1.w -max.f r1.y, r1.y, c15.y +max.f r0.w, r0.w, c15.y max.f r1.z, r1.z, c15.y -mul.f r2.x, r2.y, r2.x -mul.f r2.y, c13.x, r1.w -min.f r1.y, r1.y, c19.y +mul.f r2.x, r0.y, c17.x +mul.f r2.z, c13.x, r1.w +min.f r0.w, r0.w, c19.y min.f r1.z, r1.z, c19.y -mul.f r2.z, c8.z, r0.x -mov.f32f32 r2.x, r2.x -max.f r1.y, r1.y, c15.x +mul.f r2.w, c8.z, r0.x +mov.f32f32 r1.w, r1.w +max.f r0.w, r0.w, c15.x max.f r1.z, r1.z, c15.x -mad.f32 r2.z, c9.z, r0.y, r2.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c10.z, r0.z, r2.z -mul.f r2.w, c8.x, r0.x -mul.f r1.y, c13.x, r1.y -mad.f32 r2.w, c9.x, r0.y, r2.w -add.f r2.z, r2.z, c11.z -mad.f32 r2.w, c10.x, r0.z, r2.w -mov.f32f32 r1.y, r1.y -mad.f32 r2.y, c15.w, r2.y, r2.z -mov.f32f32 r2.x, r2.x -add.f r2.w, r2.w, c11.x -mad.f32 r1.y, c15.z, r1.y, c11.x -mad.f32 r1.z, c13.x, r1.z, r2.w -mov.f32f32 r2.y, r2.y -max.f r2.x, r2.x, c15.y -mov.f32f32 r1.y, r1.y +mul.f r3.x, c8.x, r0.x +mad.f32 r2.w, c9.z, r0.y, r2.w +mul.f r0.w, c13.x, r0.w +mad.f32 r3.x, c9.x, r0.y, r3.x +mad.f32 r2.w, c10.z, r0.z, r2.w +mad.f32 r3.x, c10.x, r0.z, r3.x +mad.f32 r0.w, c15.z, r0.w, c11.x +max.f r1.w, r1.w, c17.z +mul.f r2.x, r2.y, r2.x +add.f r2.y, r3.x, c11.x +add.f r0.w, r0.w, c16.x +mad.f32 r1.z, c13.x, r1.z, r2.y +add.f r2.w, r2.w, c11.z +min.f r1.w, r1.w, c17.w +floor.f r3.x, r0.w add.f r1.z, r1.z, c16.x -mov.f32f32 r2.y, r2.y -min.f r2.x, r2.x, c19.y -mov.f32f32 r1.y, r1.y +mad.f32 r2.z, c15.w, r2.z, r2.w +mov.f32f32 r3.y, r1.w +add.f r0.w, r0.w, (neg)r3.x floor.f r3.x, r1.z -add.f r2.y, r2.y, c16.x -min.f r2.x, r2.x, c17.y -add.f r1.y, r1.y, c16.x +add.f r2.z, r2.z, c16.x +max.f r2.x, r2.x, c15.y +mad.f32 r0.w, c16.y, r0.w, c16.z add.f r1.z, r1.z, (neg)r3.x -floor.f r3.x, r2.y -mov.f32f32 r2.x, r2.x -floor.f r3.y, r1.y +floor.f r3.x, r2.z +min.f r2.x, r2.x, c19.y +absneg.f r0.w, (abs)r0.w mad.f32 r1.z, c16.y, r1.z, c16.z -add.f r2.y, r2.y, (neg)r3.x -mul.f r3.x, r0.y, c18.x -add.f r1.y, r1.y, (neg)r3.y +add.f r2.z, r2.z, (neg)r3.x +min.f r2.x, r2.x, c17.y +mul.f r3.x, c16.y, r0.w absneg.f r1.z, (abs)r1.z -mad.f32 r2.y, c16.y, r2.y, c16.z -mov.f32f32 r3.x, r3.x -mad.f32 r1.y, c16.y, r1.y, c16.z -mul.f r3.y, c16.y, r1.z -absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c16.y, r2.z, c16.z +mul.f r0.w, r0.w, r0.w +add.f r3.x, c16.w, (neg)r3.x +mul.f r3.z, c16.y, r1.z +absneg.f r2.z, (abs)r2.z mul.f r1.z, r1.z, r1.z -absneg.f r1.y, (abs)r1.y -add.f r3.y, c16.w, (neg)r3.y -mul.f r3.z, c16.y, r2.y -mul.f r2.y, r2.y, r2.y -mul.f r3.w, c16.y, r1.y -mul.f r1.z, r1.z, r3.y -add.f r3.y, c16.w, (neg)r3.z -mul.f r1.y, r1.y, r1.y -add.f r3.z, c16.w, (neg)r3.w -mov.f32f32 r1.z, r1.z -mul.f r2.y, r2.y, r3.y -mov.f32f32 r3.x, r3.x -mul.f r1.y, r1.y, r3.z -mul.f r1.z, r1.z, r2.x -mov.f32f32 r2.y, r2.y +mul.f r0.w, r0.w, r3.x +mul.f r3.x, r0.y, c18.x +add.f r3.z, c16.w, (neg)r3.z +mul.f r3.w, c16.y, r2.z +mul.f r2.z, r2.z, r2.z max.f r3.x, r3.x, c15.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r2.y, r2.x -min.f r2.y, r3.x, c19.y -max.f r1.w, r1.w, c17.z -mul.f r3.x, c8.y, r0.x +mul.f r1.z, r1.z, r3.z +mov.f32f32 r3.z, r2.x +add.f r3.w, c16.w, (neg)r3.w +min.f r3.x, r3.x, c19.y +mul.f r4.x, c8.y, r0.x mul.f r0.x, c8.w, r0.x -min.f r2.y, r2.y, c15.w -mov.f32f32 r1.w, r1.w -mad.f32 r3.x, c9.y, r0.y, r3.x +mad.f32 r4.x, c9.y, r0.y, r4.x +min.f r3.x, r3.x, c15.w +mul.f r1.z, r1.z, r3.z +mul.f r2.z, r2.z, r3.w +mad.f32 r3.z, c10.y, r0.z, r4.x +mul.f r0.w, r0.w, r3.x +mov.f32f32 r3.x, r1.z +mul.f r2.x, r2.z, r2.x +add.f r2.z, r3.z, c11.y +mov.f32f32 r3.z, r0.w +mad.f32 r0.w, c14.y, r0.w, r1.z +mad.f32 r1.z, r2.x, r1.w, r2.w +mad.f32 r1.w, r2.x, r3.y, r2.y +mad.f32 r2.y, c14.x, r3.z, r3.x +mad.f32 r2.x, r2.x, r3.y, r2.z mad.f32 r0.x, c9.w, r0.y, r0.x -mov.f32f32 r0.y, r2.y -min.f r1.w, r1.w, c17.w -mad.f32 r2.y, c10.y, r0.z, r3.x -mad.f32 r0.x, c10.w, r0.z, r0.x -mul.f r0.y, r1.y, r0.y -mov.f32f32 r0.z, r1.w -add.f r1.y, r2.y, c11.y -add.f r0.x, r0.x, c11.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r2.x, r0.z, r2.w -mad.f32 r2.y, r2.x, r0.z, r1.y -mad.f32 r0.z, r2.x, r0.z, r2.z -mad.f32 r1.y, c14.x, r0.y, r1.z -mad.f32 r0.y, c14.y, r0.y, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.w -add.f r0.w, r1.w, r1.z -add.f r0.y, r0.z, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -(rpt1)nop -mul.f r0.w, c0.w, r0.z -mul.f r1.z, c0.z, r0.z -mad.f32 r0.w, c1.w, r2.y, r0.w -mad.f32 r1.z, c1.z, r2.y, r1.z -mul.f r1.w, c0.y, r0.z -mul.f r0.z, c0.x, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c2.w, r0.y, r0.w -mad.f32 r1.z, c2.z, r0.y, r1.z -mad.f32 r0.w, c3.w, r0.x, r2.x -mad.f32 r2.z, c3.z, r0.x, r1.z -mad.f32 r1.w, c1.y, r2.y, r1.w -mad.f32 r0.z, c1.x, r2.y, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r2.y -mad.f32 r1.w, c2.y, r0.y, r1.w -mad.f32 r0.y, c2.x, r0.y, r2.z -mad.f32 r2.y, c3.y, r0.x, r1.w -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r1.w, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r2.y -mov.f32f32 r0.x, r0.x +nop +add.f r0.y, r1.w, r2.y +add.f r2.y, r1.z, r0.w (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x +mov.f32f32 r0.w, r0.y +mul.f r0.y, c0.x, r0.y +mov.f32f32 r2.z, r2.y +mad.f32 r0.x, c10.w, r0.z, r0.x +mul.f r0.z, c0.w, r0.w +mul.f r1.z, c0.z, r0.w +mad.f32 r0.z, c1.w, r2.x, r0.z +mad.f32 r1.z, c1.z, r2.x, r1.z +mad.f32 r1.w, c2.w, r2.z, r0.z +add.f r2.w, r0.x, c11.w +mad.f32 r1.z, c2.z, r2.z, r1.z +mul.f r0.x, c0.y, r0.w +mad.f32 r0.y, c1.x, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.w, r1.w +mad.f32 r0.z, c3.z, r2.w, r1.z +mad.f32 r0.x, c1.y, r2.x, r0.x +mad.f32 r0.y, c2.x, r2.y, r0.y +mad.f32 r2.x, c2.y, r2.z, r0.x +mad.f32 r0.x, c3.x, r2.w, r0.y +mad.f32 r0.y, c3.y, r2.w, r2.x end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0) -; VERT: 152 instructions, 0 half, 4 full +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) +; VERT: 110 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-37.asm b/reference/0ad-alpine-valley/0ad-37.asm index 63e7be5..22ca830 100644 --- a/reference/0ad-alpine-valley/0ad-37.asm +++ b/reference/0ad-alpine-valley/0ad-37.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -(rpt5)nop -sam (f32)(xyzw)r2.y, r0.z, s#0, t#0 -(sy)cmps.f.lt r0.x, r3.x, c0.x -mov.f32f32 r1.w, r3.x -mov.f32f32 r1.z, r2.w -mov.f32f32 r1.y, r2.z -cov.u32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.x, r2.y -cmps.f.ne p0.x, r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, (0.000000) +(rpt4)nop +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)cmps.f.lt r0.y, r1.w, c0.x +(rpt2)nop +cov.u32f32 r0.y, r0.y +(rpt2)nop +cmps.f.ne p0.x, r0.y, r0.x (rpt5)nop kill p0.x end nop nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 34 instructions, 0 half, 4 full +; FRAG: 26 instructions, 0 half, 2 full diff --git a/reference/0ad-alpine-valley/0ad-38.asm b/reference/0ad-alpine-valley/0ad-38.asm index 42c6c0a..f362ccd 100644 --- a/reference/0ad-alpine-valley/0ad-38.asm +++ b/reference/0ad-alpine-valley/0ad-38.asm @@ -3,8 +3,8 @@ @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 +@in(r1.x) in4 +@in(r1.y) in5 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -13,159 +13,124 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)floor.f r1.y, c11.z +@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r0.w, c11.z floor.f r1.z, c11.x absneg.f r1.w, (abs)c14.x absneg.f r2.x, (abs)c14.y -add.f r1.y, c11.z, (neg)r1.y +add.f r0.w, c11.z, (neg)r0.w add.f r1.z, c11.x, (neg)r1.z mul.f r2.y, r0.x, r0.z add.f r1.w, r1.w, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r0.y, c17.x -mov.f32f32 r1.w, r1.w -max.f r1.y, r1.y, c15.y +max.f r0.w, r0.w, c15.y max.f r1.z, r1.z, c15.y -mul.f r2.x, r2.y, r2.x -mul.f r2.y, c13.x, r1.w -min.f r1.y, r1.y, c19.y +mul.f r2.x, r0.y, c17.x +mul.f r2.z, c13.x, r1.w +min.f r0.w, r0.w, c19.y min.f r1.z, r1.z, c19.y -mul.f r2.z, c8.z, r0.x -mov.f32f32 r2.x, r2.x -max.f r1.y, r1.y, c15.x +mul.f r2.w, c8.z, r0.x +mov.f32f32 r1.w, r1.w +max.f r0.w, r0.w, c15.x max.f r1.z, r1.z, c15.x -mad.f32 r2.z, c9.z, r0.y, r2.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c10.z, r0.z, r2.z -mul.f r2.w, c8.x, r0.x -mul.f r1.y, c13.x, r1.y -mad.f32 r2.w, c9.x, r0.y, r2.w -add.f r2.z, r2.z, c11.z -mad.f32 r2.w, c10.x, r0.z, r2.w -mov.f32f32 r1.y, r1.y -mad.f32 r2.y, c15.w, r2.y, r2.z -mov.f32f32 r2.x, r2.x -add.f r2.w, r2.w, c11.x -mad.f32 r1.y, c15.z, r1.y, c11.x -mad.f32 r1.z, c13.x, r1.z, r2.w -mov.f32f32 r2.y, r2.y -max.f r2.x, r2.x, c15.y -mov.f32f32 r1.y, r1.y +mul.f r3.x, c8.x, r0.x +mad.f32 r2.w, c9.z, r0.y, r2.w +mul.f r0.w, c13.x, r0.w +mad.f32 r3.x, c9.x, r0.y, r3.x +mad.f32 r2.w, c10.z, r0.z, r2.w +mad.f32 r3.x, c10.x, r0.z, r3.x +mad.f32 r0.w, c15.z, r0.w, c11.x +max.f r1.w, r1.w, c17.z +mul.f r2.x, r2.y, r2.x +add.f r2.y, r3.x, c11.x +add.f r0.w, r0.w, c16.x +mad.f32 r1.z, c13.x, r1.z, r2.y +add.f r2.w, r2.w, c11.z +min.f r1.w, r1.w, c17.w +floor.f r3.x, r0.w add.f r1.z, r1.z, c16.x -mov.f32f32 r2.y, r2.y -min.f r2.x, r2.x, c19.y -mov.f32f32 r1.y, r1.y +mad.f32 r2.z, c15.w, r2.z, r2.w +mov.f32f32 r3.y, r1.w +add.f r0.w, r0.w, (neg)r3.x floor.f r3.x, r1.z -add.f r2.y, r2.y, c16.x -min.f r2.x, r2.x, c17.y -add.f r1.y, r1.y, c16.x +add.f r2.z, r2.z, c16.x +max.f r2.x, r2.x, c15.y +mad.f32 r0.w, c16.y, r0.w, c16.z add.f r1.z, r1.z, (neg)r3.x -floor.f r3.x, r2.y -mov.f32f32 r2.x, r2.x -floor.f r3.y, r1.y +floor.f r3.x, r2.z +min.f r2.x, r2.x, c19.y +absneg.f r0.w, (abs)r0.w mad.f32 r1.z, c16.y, r1.z, c16.z -add.f r2.y, r2.y, (neg)r3.x -mul.f r3.x, r0.y, c18.x -add.f r1.y, r1.y, (neg)r3.y +add.f r2.z, r2.z, (neg)r3.x +min.f r2.x, r2.x, c17.y +mul.f r3.x, c16.y, r0.w absneg.f r1.z, (abs)r1.z -mad.f32 r2.y, c16.y, r2.y, c16.z -mov.f32f32 r3.x, r3.x -mad.f32 r1.y, c16.y, r1.y, c16.z -mul.f r3.y, c16.y, r1.z -absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c16.y, r2.z, c16.z +mul.f r0.w, r0.w, r0.w +add.f r3.x, c16.w, (neg)r3.x +mul.f r3.z, c16.y, r1.z +absneg.f r2.z, (abs)r2.z mul.f r1.z, r1.z, r1.z -absneg.f r1.y, (abs)r1.y -add.f r3.y, c16.w, (neg)r3.y -mul.f r3.z, c16.y, r2.y -mul.f r2.y, r2.y, r2.y -mul.f r3.w, c16.y, r1.y -mul.f r1.z, r1.z, r3.y -add.f r3.y, c16.w, (neg)r3.z -mul.f r1.y, r1.y, r1.y -add.f r3.z, c16.w, (neg)r3.w -mov.f32f32 r1.z, r1.z -mul.f r2.y, r2.y, r3.y -mov.f32f32 r3.x, r3.x -mul.f r1.y, r1.y, r3.z -mul.f r1.z, r1.z, r2.x -mov.f32f32 r2.y, r2.y +mul.f r0.w, r0.w, r3.x +mul.f r3.x, r0.y, c18.x +add.f r3.z, c16.w, (neg)r3.z +mul.f r3.w, c16.y, r2.z +mul.f r2.z, r2.z, r2.z max.f r3.x, r3.x, c15.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r2.y, r2.x -min.f r2.y, r3.x, c19.y -max.f r1.w, r1.w, c17.z -mul.f r3.x, c8.y, r0.x +mul.f r1.z, r1.z, r3.z +mov.f32f32 r3.z, r2.x +add.f r3.w, c16.w, (neg)r3.w +min.f r3.x, r3.x, c19.y +mul.f r4.x, c8.y, r0.x mul.f r0.x, c8.w, r0.x -min.f r2.y, r2.y, c15.w -mov.f32f32 r1.w, r1.w -mad.f32 r3.x, c9.y, r0.y, r3.x +mad.f32 r4.x, c9.y, r0.y, r4.x +min.f r3.x, r3.x, c15.w +mul.f r1.z, r1.z, r3.z +mul.f r2.z, r2.z, r3.w +mad.f32 r3.z, c10.y, r0.z, r4.x +mul.f r0.w, r0.w, r3.x +mov.f32f32 r3.x, r1.z +mul.f r2.x, r2.z, r2.x +add.f r2.z, r3.z, c11.y +mov.f32f32 r3.z, r0.w +mad.f32 r0.w, c14.y, r0.w, r1.z +mad.f32 r1.z, r2.x, r1.w, r2.w +mad.f32 r1.w, r2.x, r3.y, r2.y +mad.f32 r2.y, c14.x, r3.z, r3.x +mad.f32 r2.x, r2.x, r3.y, r2.z mad.f32 r0.x, c9.w, r0.y, r0.x -mov.f32f32 r0.y, r2.y -min.f r1.w, r1.w, c17.w -mad.f32 r2.y, c10.y, r0.z, r3.x -mad.f32 r0.x, c10.w, r0.z, r0.x -mul.f r0.y, r1.y, r0.y -mov.f32f32 r0.z, r1.w -add.f r1.y, r2.y, c11.y -add.f r0.x, r0.x, c11.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r2.x, r0.z, r2.w -mad.f32 r2.y, r2.x, r0.z, r1.y -mad.f32 r0.z, r2.x, r0.z, r2.z -mad.f32 r1.y, c14.x, r0.y, r1.z -mad.f32 r0.y, c14.y, r0.y, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.w -add.f r0.w, r1.w, r1.z -add.f r0.y, r0.z, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -(rpt1)nop -mul.f r0.w, c0.w, r0.z -mul.f r1.z, c0.z, r0.z -mad.f32 r0.w, c1.w, r2.y, r0.w -mad.f32 r1.z, c1.z, r2.y, r1.z -mul.f r1.w, c0.y, r0.z -mul.f r0.z, c0.x, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c2.w, r0.y, r0.w -mad.f32 r1.z, c2.z, r0.y, r1.z -mad.f32 r0.w, c3.w, r0.x, r2.x -mad.f32 r2.z, c3.z, r0.x, r1.z -mad.f32 r1.w, c1.y, r2.y, r1.w -mad.f32 r0.z, c1.x, r2.y, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r2.y -mad.f32 r1.w, c2.y, r0.y, r1.w -mad.f32 r0.y, c2.x, r0.y, r2.z -mad.f32 r2.y, c3.y, r0.x, r1.w -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r1.w, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r2.y -mov.f32f32 r0.x, r0.x +nop +add.f r0.y, r1.w, r2.y +add.f r2.y, r1.z, r0.w (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x +mov.f32f32 r0.w, r0.y +mul.f r0.y, c0.x, r0.y +mov.f32f32 r2.z, r2.y +mad.f32 r0.x, c10.w, r0.z, r0.x +mul.f r0.z, c0.w, r0.w +mul.f r1.z, c0.z, r0.w +mad.f32 r0.z, c1.w, r2.x, r0.z +mad.f32 r1.z, c1.z, r2.x, r1.z +mad.f32 r1.w, c2.w, r2.z, r0.z +add.f r2.w, r0.x, c11.w +mad.f32 r1.z, c2.z, r2.z, r1.z +mul.f r0.x, c0.y, r0.w +mad.f32 r0.y, c1.x, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.w, r1.w +mad.f32 r0.z, c3.z, r2.w, r1.z +mad.f32 r0.x, c1.y, r2.x, r0.x +mad.f32 r0.y, c2.x, r2.y, r0.y +mad.f32 r2.x, c2.y, r2.z, r0.x +mad.f32 r0.x, c3.x, r2.w, r0.y +mad.f32 r0.y, c3.y, r2.w, r2.x end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0) -; VERT: 152 instructions, 0 half, 4 full +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) +; VERT: 110 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-40.asm b/reference/0ad-alpine-valley/0ad-40.asm index c9ec6ed..df4bfc0 100644 --- a/reference/0ad-alpine-valley/0ad-40.asm +++ b/reference/0ad-alpine-valley/0ad-40.asm @@ -6,59 +6,32 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 1, r0.x -bary.f r0.w, 0, r0.x -bary.f (ei)r0.x, 2, r0.x +@const(c0.x) 0x3e800000, 0x40800000, 0x00000000, 0x00000000 +(sy)(ss)bary.f r0.w, 1, r0.x +bary.f r0.z, 0, r0.x +bary.f (ei)r1.x, 2, r0.x nop -add.f r0.y, c0.x, (neg)r0.z -mov.f32f32 r0.w, r0.w -cmps.f.lt r1.x, c0.z, r0.z -mov.f32f32 r0.x, r0.x -mul.f r0.y, r0.y, c0.y -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.z, r0.z -cov.u32f32 r0.w, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.x -mov.f32f32 r1.z, r0.z +add.f r0.x, c0.x, (neg)r0.w +cmps.f.lt r0.y, c0.z, r0.w +(rpt1)nop +mul.f r0.x, r0.x, c0.y +sam.3d (f32)(xyzw)r2.x, r0.z, s#0, t#0 +cov.u32f32 r0.y, r0.y +(rpt1)nop +(ss)mov.f32f32 r0.z, r0.x +(sy)mul.f r0.x, r2.x, r0.x +cmps.f.ne r0.y, r0.y, c0.z nop -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.z, r0.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.y, r0.y -cmps.f.ne r0.w, r0.w, c0.z -sam.3d (f32)(xyzw)r1.y, r1.y, s#0, t#0 -(sy)mov.f32f32 r2.y, r2.x -mov.f32f32 r2.z, r1.w -mov.f32f32 r2.w, r1.z -mov.f32f32 r3.x, r1.y -mul.f r0.x, r2.y, r0.x -mul.f r0.z, r2.z, r0.z -mul.f r1.x, r2.w, r1.x -mul.f r0.y, r3.x, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.x, r2.x -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r0.x, r0.w, r2.x -mov.f32f32 r2.x, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r0.x -sel.b32 r0.x, r0.z, r0.w, r2.x -sel.b32 r0.z, r1.x, r0.w, r1.z -sel.b32 r0.y, r0.y, r0.w, r1.y +mul.f r0.w, r2.w, r0.z +mul.f r1.x, r2.z, r0.z +mul.f r0.z, r2.y, r0.z nop -mov.f32f32 r1.z, r0.x -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.y +sel.b32 r1.w, r0.w, r0.y, r2.w +sel.b32 r1.z, r1.x, r0.y, r2.z +sel.b32 r1.y, r0.z, r0.y, r2.y +sel.b32 r1.x, r0.x, r0.y, r2.x end ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 52 instructions, 0 half, 4 full +; FRAG: 26 instructions, 0 half, 3 full diff --git a/reference/0ad-alpine-valley/0ad-46.asm b/reference/0ad-alpine-valley/0ad-46.asm index b06167e..eb8f852 100644 --- a/reference/0ad-alpine-valley/0ad-46.asm +++ b/reference/0ad-alpine-valley/0ad-46.asm @@ -8,199 +8,131 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c8.x) 0x3f000000, 0x00000000, 0x3f800000, 0xba03126f +@const(c9.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3fb8aa65 +@const(c10.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 0, r1.x add.f r0.y, r0.w, c8.y bary.f r0.w, 1, r1.x bary.f r1.z, 4, r1.x -add.f r1.w, r0.x, c9.x -bary.f r2.x, 6, r1.x -bary.f r2.y, 2, r1.x -add.f r2.z, r0.w, c9.x -floor.f r2.w, r1.w +add.f r2.x, r0.x, c9.x +bary.f r1.w, 5, r1.x +add.f r2.y, r0.w, c9.x +bary.f r2.z, 2, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c8.y -mov.f32f32 r1.z, r1.z -floor.f r3.x, r2.z -add.f r1.w, r1.w, (neg)r2.w +floor.f r3.x, r2.y +add.f r3.w, r2.z, c8.w +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -mov.f32f32 r3.y, r1.z -add.f r0.z, r2.z, (neg)r3.x -mov.f32f32 r1.z, r1.w +absneg.f r0.z, (neg)c6.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c8.z +mul.f r0.z, r0.z, c6.x +sam (f32)(w)r4.x, r1.z, s#1, t#1 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c8.x, r2.z +add.f r2.z, c9.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r1.w, (neg)c6.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c8.x, r1.z -add.f r2.w, c9.y, (neg)r1.z -mul.f r1.w, r1.w, c6.x -add.f r3.x, c9.y, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c8.x, r0.z -mul.f r1.w, r1.w, r0.y -mov.f32f32 r2.w, r2.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r2.z -mul.f r0.y, r1.w, r0.y -mul.f r1.w, r2.w, r3.x -add.f r2.z, c9.x, r0.x +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c8.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c9.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c9.w -add.f r3.z, c9.z, r0.w -mul.f r2.z, r2.z, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r2.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -add.f r0.w, c9.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r4.x -mov.f32f32 r5.x, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c11.y, (neg)r0.y -mov.f32f32 r5.w, r3.w -mul.f r3.z, r3.z, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c6.y -mul.f r0.y, r0.y, c8.z -mov.f32f32 r3.w, r3.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.z, r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r4.z, r4.x -add.f r2.y, r2.y, c8.w -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r3.w -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r4.w, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r2.y -mov.f32f32 r6.w, r2.z -mov.f32f32 r6.y, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r5.z, r0.y -sam.s (f32)(x)r3.z, r4.y, s#2, t#2 -(sy)mov.f32f32 r0.y, r3.z +add.f r0.z, c9.x, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c3.z +add.f r0.x, c9.z, r0.w +mul.f r4.x, r0.z, c3.z +add.f r0.z, c9.x, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c3.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c11.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#2, t#2 +add.f r0.z, c9.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#2, t#2 +mul.f r0.x, r0.x, c8.z +add.f r0.w, r2.y, c8.z +mul.f r0.y, r0.y, c6.y +(ss)nop +sam.s (f32)(x)r5.x, r5.w, s#2, t#2 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#2, t#2 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c10.y +bary.f r2.x, 6, r1.x +mul.f r0.y, r0.y, r5.x max.f r0.x, r0.x, c8.y -mov.f32f32 r7.x, r0.w -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y min.f r0.x, r0.x, c8.z -sam.s (f32)(x)r3.z, r5.w, s#2, t#2 -nop -(sy)mov.f32f32 r1.w, r3.z -mul.f r0.y, r0.w, r0.y -sam.s (f32)(x)r3.z, r5.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r3.z -add.f r1.z, r1.z, c8.z -add.f r2.y, c11.y, (neg)r0.x -add.f r2.z, c11.y, (neg)r0.x -add.f r3.z, c11.y, (neg)r0.x -mul.f r3.x, r1.z, r3.x -mul.f r2.y, r2.y, c5.z -mul.f r2.z, r2.z, c5.y -mul.f r3.w, r3.z, c5.x -mov.f32f32 r3.x, r3.x -sam.s (f32)(x)r4.x, r6.z, s#2, t#2 -add.f r0.z, r0.z, c8.z -(sy)mov.f32f32 r3.z, r4.x -bary.f r4.x, 5, r1.x -mad.f32 r0.y, r3.x, r0.w, r0.y -mul.f r0.w, r2.w, r0.z +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c8.y (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.z, r1.z, r0.z -mov.f32f32 r1.z, r4.x -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, r0.w, r3.z, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r1.z -mov.f32f32 r4.x, r2.x -mov.f32f32 r0.y, r0.y -bary.f r0.w, 7, r1.x -mad.f32 r0.y, r0.z, r1.w, r0.y -mov.f32f32 r0.z, c8.z -bary.f r1.z, 10, r1.x -sam (f32)(w)r2.w, r3.y, s#1, t#1 -(sy)cmps.f.lt r1.w, r3.z, c10.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.x, r3.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.w, r0.z mul.f r0.y, c10.x, r0.y -cov.u32f32 r0.z, r1.w -mov.f32f32 r1.w, r2.x -(ss)mov.f32f32 r4.y, r0.w -mov.f32f32 r0.y, r0.y -cmps.f.ne r0.z, r0.z, c8.y -nop -mov.f32f32 r0.w, c8.y -bary.f r2.x, 9, r1.x +bary.f r2.y, 7, r1.x +add.f r0.w, c11.y, (neg)r0.x +add.f r1.z, c11.y, (neg)r0.x +mov.f32f32 r1.w, r0.y +add.f r2.z, c11.y, (neg)r0.x +(rpt1)nop +sam (f32)(xyz)r2.w, r2.x, s#0, t#0 +(ss)bary.f r2.x, 10, r1.x +bary.f r2.y, 9, r1.x bary.f (ei)r1.x, 8, r1.x -sam (f32)(xyz)r4.x, r4.x, s#0, t#0 -(sy)mul.f r1.y, r4.z, r1.z -sel.b32 r0.z, r0.w, r0.z, r1.w -mul.f r0.w, r4.y, r2.x -mul.f r1.x, r4.x, r1.x -mul.f r1.y, r1.y, r0.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -mul.f r0.w, r0.w, r0.y -mad.f32 r1.y, c4.z, r4.z, r1.y +mul.f r0.w, r0.w, c5.z +(sy)mul.f r1.y, r3.y, r2.x +mul.f r2.x, r3.x, r2.y +mul.f r1.x, r2.w, r1.x +mul.f r1.z, r1.z, c5.y +mul.f r1.y, r1.y, r1.w +mul.f r1.w, r2.x, r1.w +mad.f32 r1.y, c4.z, r3.y, r1.y +mad.f32 r1.w, c4.y, r3.x, r1.w mul.f r0.y, r1.x, r0.y -(rpt1)nop -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, c4.y, r4.y, r0.w -mul.f r1.x, r0.x, r1.x -mad.f32 r0.y, c4.x, r4.x, r0.y -(rpt1)nop -add.f r1.x, r1.x, r2.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y +mul.f r1.x, r2.z, c5.x +mul.f r1.y, r0.x, r1.y +mul.f r1.w, r0.x, r1.w +mad.f32 r0.y, c4.x, r2.w, r0.y +mov.f32f32 r2.x, c8.y +add.f r0.w, r1.y, r0.w +add.f r1.y, r1.w, r1.z nop -mul.f r1.x, r1.x, r0.z -mul.f r0.w, r0.x, r0.w +sel.b32 r0.z, r2.x, r0.z, r4.w mul.f r0.x, r0.x, r0.y +mov.f32f32 r2.w, c8.z nop -mov.f32f32 r0.y, r1.x -add.f r0.w, r0.w, r2.z -add.f r0.x, r0.x, r3.w +mul.f r2.z, r0.w, r0.z +mul.f r2.y, r1.y, r0.z +add.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, r0.z +end nop -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r0.z -mul.f r0.x, r0.x, r0.z nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x -end nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r0.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) -; FRAG: 198 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) +; FRAG: 121 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-49.asm b/reference/0ad-alpine-valley/0ad-49.asm index bdc5151..2dd080b 100644 --- a/reference/0ad-alpine-valley/0ad-49.asm +++ b/reference/0ad-alpine-valley/0ad-49.asm @@ -8,6 +8,11 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c8.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097 +@const(c9.x) 0x3cff9724, 0xba03126f, 0xbf000000, 0x40000000 +@const(c10.x) 0x3f800000, 0xbf000000, 0x3fb8aa65, 0x3de38866 +@const(c11.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 9, r1.x bary.f r0.y, 0, r1.x add.f r0.w, r0.w, c8.y @@ -16,275 +21,171 @@ mul.f r1.w, r0.x, r0.x bary.f r2.x, 10, r1.x add.f r2.y, r0.y, c9.z add.f r2.z, r1.z, c9.z -bary.f r2.w, 4, r1.x +bary.f r3.x, 4, r1.x mad.f32 r1.w, r2.x, r2.x, r1.w -floor.f r3.x, r2.y +bary.f r2.w, 11, r1.x +floor.f r3.y, r2.y rcp r0.w, r0.w add.f r0.z, r0.z, c8.y -floor.f r3.y, r2.z -mov.f32f32 r1.w, r1.w -bary.f r3.z, 11, r1.x -add.f r2.y, r2.y, (neg)r3.x +floor.f r3.z, r2.z +mad.f32 r1.w, r2.w, r2.w, r1.w +add.f r2.y, r2.y, (neg)r3.y (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.z, (neg)r3.y -mad.f32 r1.w, r3.z, r3.z, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -absneg.f r2.z, (neg)c6.x -mov.f32f32 r0.w, r0.w -mul.f r3.x, c8.x, r2.y -add.f r3.y, c9.w, (neg)r2.y +(ss)absneg.f r0.w, (neg)c6.x +add.f r2.z, r2.z, (neg)r3.z +mov.f32f32 r3.y, r2.y +add.f r2.y, r2.y, c8.z rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mul.f r2.z, r2.z, c6.x -mov.f32f32 r3.x, r3.x -mul.f r3.w, c8.x, r0.w -mul.f r0.x, r0.x, r1.w -mul.f r2.z, r2.z, r0.z -add.f r0.y, r0.y, (neg)r3.x -mov.f32f32 r3.x, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.y, r0.y -add.f r1.z, r1.z, (neg)r3.x -absneg.f r0.x, (abs)r0.x -mul.f r0.z, r2.z, r0.z -add.f r2.z, c10.y, r0.y -add.f r0.y, c10.x, r0.y -mov.f32f32 r0.x, r0.x +(ss)mov.f32f32 r3.z, r1.w +mul.f r0.w, r0.w, c6.x +mul.f r3.w, c8.x, r3.y +mov.f32f32 r4.x, r2.z +mul.f r0.x, r0.x, r3.z +mul.f r0.w, r0.w, r0.z mov.f32f32 r0.z, r0.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.y, r0.y +add.f r0.y, r0.y, (neg)r3.w +absneg.f r0.x, (abs)r0.x +mul.f r3.w, c8.x, r4.x +mul.f r0.z, r0.w, r0.z +add.f r0.w, c10.x, r0.y add.f r0.x, r0.x, c8.w +mov.f32f32 r0.y, r0.y mul.f r0.z, r0.z, c10.z -mul.f r2.z, r2.z, c3.z -mul.f r0.y, r0.y, c3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r2.z -mov.f32f32 r3.w, r0.y +add.f r1.z, r1.z, (neg)r3.w max.f r0.x, r0.x, c8.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.x, r3.x -mov.f32f32 r0.x, r0.x -mul.f r2.x, r2.x, r1.w -mov.f32f32 r1.z, r1.z -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.w -mov.f32f32 r2.x, r2.x -add.f r3.w, c10.y, r1.z -add.f r4.y, c12.y, (neg)r0.z -mov.f32f32 r4.w, r3.x -absneg.f r2.x, (abs)r2.x -mov.f32f32 r3.x, r3.w -mul.f r3.w, r4.y, c6.y -mul.f r0.z, r0.z, c8.z -mov.f32f32 r2.x, r2.x -mul.f r3.x, r3.x, c3.w +mul.f r2.x, r2.x, r3.z +add.f r0.y, c10.y, r0.y +mov.f32f32 r3.z, r1.z +mul.f r4.y, r0.w, c3.z +absneg.f r0.w, (abs)r2.x +mul.f r5.x, r0.y, c3.z +exp2 r0.y, r0.z +(ss)mov.f32f32 r0.z, r0.y +add.f r2.x, c10.y, r3.z +add.f r0.w, r0.w, c8.w +mov.f32f32 r5.w, r5.x +add.f r0.z, c12.y, (neg)r0.z +mul.f r6.w, r2.x, c3.w +max.f r0.w, r0.w, c8.y +mov.f32f32 r6.z, r4.y add.f r1.z, c10.x, r1.z -add.f r0.z, r0.z, r3.w -add.f r2.x, r2.x, c8.w -mov.f32f32 r3.w, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.y, r3.w -bary.f r3.w, 2, r1.x -mov.f32f32 r0.z, r0.z -max.f r2.x, r2.x, c8.y -mul.f r1.z, r1.z, c3.w -add.f r3.w, r3.w, c9.y -mov.f32f32 r5.z, r0.y -mov.f32f32 r0.y, r2.x -max.f r0.z, r0.z, c8.y -mov.f32f32 r2.x, r3.w -mov.f32f32 r5.x, r1.z -add.f r6.x, r0.x, r0.y -mul.f r1.w, r3.z, r1.w -mov.f32f32 r4.z, r2.x -min.f r0.z, r0.z, c8.z -mov.f32f32 r2.x, r5.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r6.y, r2.z -add.f r2.z, c12.y, (neg)r0.z +mul.f r0.z, r0.z, c6.y +add.f r2.x, r0.x, r0.w +mul.f r1.w, r2.w, r1.w +mov.f32f32 r6.x, r6.w +mul.f r0.y, r0.y, c8.z +bary.f r2.w, 2, r1.x absneg.f r1.w, (abs)r1.w -sam.s (f32)(x)r6.z, r4.x, s#2, t#2 -(sy)mov.f32f32 r3.z, r6.z -(ss)add.f r4.x, c12.y, (neg)r0.z -mul.f r2.z, r2.z, c5.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.y, r3.y -add.f r4.y, c9.w, (neg)r0.w -add.f r1.w, r1.w, c8.w -mul.f r4.x, r4.x, c5.y -add.f r4.z, c12.y, (neg)r0.z -mov.f32f32 r4.y, r4.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.x, r2.x -mov.f32f32 r2.x, r3.w -mov.f32f32 r5.w, r3.x -max.f r1.w, r1.w, c8.y -mul.f r3.x, r3.y, r4.y -mul.f r4.z, r4.z, c5.x -mov.f32f32 r5.y, r2.x -mov.f32f32 r1.w, r1.w -mul.f r2.x, r3.x, r3.z -mov.f32f32 r3.x, r3.w -mov.f32f32 r1.z, r1.z -add.f r3.z, r6.x, r1.w -add.f r2.y, r2.y, c8.z -add.f r0.w, r0.w, c8.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r6.x, r3.x -sam.s (f32)(x)r6.z, r4.w, s#2, t#2 -(rpt3)nop -(sy)mov.f32f32 r3.x, r6.z -(ss)rcp r4.w, r3.z -(ss)mov.f32f32 r4.w, r4.w -sam.s (f32)(x)r5.x, r5.z, s#2, t#2 -(sy)mov.f32f32 r5.x, r5.x -mul.f r4.y, r2.y, r4.y -rcp r5.y, r3.z -(ss)mov.f32f32 r5.y, r5.y -mul.f r0.x, r0.x, r4.w -(ss)rcp r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -mad.f32 r2.x, r4.y, r5.x, r2.x -mul.f r0.y, r0.y, r5.y +mul.f r4.z, r1.z, c3.w +add.f r0.y, r0.y, r0.z +add.f r4.w, r2.w, c9.y +add.f r0.z, r1.w, c8.w +mov.f32f32 r5.y, r4.z mov.f32f32 r0.x, r0.x -bary.f r4.y, 13, r1.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r6.z, r1.z -mov.f32f32 r1.z, r3.w -mul.f r3.w, r4.y, c9.x -mov.f32f32 r0.y, r0.y -mul.f r1.w, r1.w, r3.z -mov.f32f32 r6.w, r1.z -mov.f32f32 r1.z, r3.w -mul.f r3.y, r3.y, r0.w -mov.f32f32 r4.w, r2.w -mul.f r0.w, r2.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -sam.s (f32)(x)r5.x, r6.y, s#2, t#2 -(sy)mov.f32f32 r2.y, r5.x -bary.f r2.w, 5, r1.x -mov.f32f32 r5.y, r1.z -bary.f r1.z, 14, r1.x -mad.f32 r2.x, r3.y, r2.y, r2.x -mov.f32f32 r2.y, r2.w -mov.f32f32 r2.w, r3.w -mul.f r1.z, r1.z, c9.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r5.x, r2.y -mov.f32f32 r3.y, r2.w -mov.f32f32 r2.y, r1.z -mad.f32 r0.w, r0.w, r3.x, r2.x -mov.f32f32 r1.z, r1.z -bary.f r2.x, 12, r1.x -mov.f32f32 r2.y, r2.y mov.f32f32 r0.w, r0.w +max.f r0.z, r0.z, c8.y +mov.f32f32 r6.y, r4.w +max.f r0.y, r0.y, c8.y +mov.f32f32 r7.x, r4.w +mov.f32f32 r1.z, r0.z +mov.f32f32 r5.z, r4.w +min.f r0.y, r0.y, c8.z +sam.s (f32)(x)r7.y, r4.y, s#2, t#2 +add.f r1.w, c9.w, (neg)r3.y +add.f r1.z, r2.x, r1.z +sam.s (f32)(x)r7.z, r5.w, s#2, t#2 +nop +add.f r2.x, c12.y, (neg)r0.y +add.f r2.w, c12.y, (neg)r0.y +mov.f32f32 r3.y, r1.z +mov.f32f32 r3.z, r1.w +mul.f r2.x, r2.x, c5.z +mul.f r3.w, r2.w, c5.y +add.f r2.w, c12.y, (neg)r0.y +add.f r4.x, c9.w, (neg)r4.x +rcp r1.z, r1.z +(ss)mul.f r0.z, r0.z, r1.z +(ss)rcp r1.z, r3.y +(ss)mul.f r0.x, r0.x, r1.z +bary.f r1.z, 13, r1.x +mov.f32f32 r4.y, r4.x +(ss)rcp r3.y, r3.y +(ss)mul.f r0.w, r0.w, r3.y +mov.f32f32 r4.z, r0.x +mul.f r5.w, r1.z, c9.x +mul.f r1.z, r3.z, r4.y +mov.f32f32 r3.z, r0.w +mul.f r4.y, r2.w, c5.x +mov.f32f32 r6.x, r5.w +bary.f r2.w, 14, r1.x +(sy)mul.f r1.z, r1.z, r7.z +sam.s (f32)(x)r7.z, r6.z, s#2, t#2 +mov.f32f32 r4.w, r0.z +mul.f r4.x, r2.y, r4.x +mul.f r6.y, r2.w, c9.x +sam.s (f32)(x)r7.w, r5.x, s#2, t#2 +add.f r2.z, r2.z, c8.z +(ss)bary.f r3.y, 5, r1.x +(sy)mad.f32 r1.z, r4.x, r7.z, r1.z +bary.f r4.x, 12, r1.x +mov.f32f32 r2.w, c8.z +bary.f r5.x, 8, r1.x +sam (f32)(xyzw)r8.x, r6.x, s#0, t#0 +(sy)mul.f r5.y, r8.z, r4.z +(ss)mul.f r6.x, r4.x, c9.x +mul.f r4.x, r8.y, r4.z +mul.f r0.x, r8.x, r0.x +mul.f r1.w, r1.w, r2.z +mov.f32f32 r6.z, r6.x +mul.f r2.y, r2.y, r2.z +sam (f32)(w)r8.x, r3.x, s#1, t#1 +(sy)cmps.f.lt r2.z, r8.w, c11.x +mad.f32 r1.z, r1.w, r7.w, r1.z +sam (f32)(xyzw)r7.z, r5.w, s#0, t#0 +bary.f r1.w, 7, r1.x +mad.f32 r1.z, r2.y, r7.y, r1.z +cov.u32f32 r2.y, r2.z (ss)nop -sam (f32)(w)r5.z, r4.w, s#1, t#1 -(sy)cmps.f.lt r2.w, r6.y, c11.x -mov.f32f32 r3.x, r6.y -mov.f32f32 r5.z, r2.y -mul.f r0.w, c10.w, r0.w -cov.u32f32 r2.y, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.z, r1.z -mul.f r1.z, r2.x, c9.x -mov.f32f32 r2.x, c8.z -(ss)nop -sam (f32)(xyzw)r4.w, r5.y, s#0, t#0 -(sy)mul.f r4.y, r5.y, r0.x -mul.f r5.x, r5.x, r0.x -mov.f32f32 r2.w, r1.z -mul.f r0.x, r4.w, r0.x -mov.f32f32 r0.w, r0.w -cmps.f.ne r2.y, r2.y, c8.y -mov.f32f32 r3.w, r2.w -mov.f32f32 r4.w, c8.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.w, r2.x -bary.f r2.x, 8, r1.x -(ss)bary.f r5.y, 7, r1.x +sam (f32)(xyzw)r5.z, r6.y, s#0, t#0 +(sy)mad.f32 r2.z, r6.x, r3.z, r5.y +mad.f32 r3.x, r5.w, r3.z, r4.x +mad.f32 r2.z, r8.x, r4.w, r2.z +mad.f32 r3.x, r7.w, r4.w, r3.x +mad.f32 r0.x, r5.z, r0.w, r0.x +mul.f r0.w, c10.w, r1.z +mov.f32f32 r1.z, r2.z +mov.f32f32 r3.y, r3.x +mad.f32 r0.x, r7.z, r0.z, r0.x +mov.f32f32 r0.z, r0.w +mul.f r1.z, r1.z, r5.x +mul.f r1.w, r3.y, r1.w +mov.f32f32 r3.y, r0.x bary.f (ei)r1.x, 6, r1.x -sam (f32)(xyzw)r5.z, r3.z, s#0, t#0 -(sy)mad.f32 r1.y, r6.x, r0.y, r4.y -(ss)mov.f32f32 r3.z, r1.z -mad.f32 r1.z, r5.w, r0.y, r5.x -mad.f32 r0.x, r5.z, r0.y, r0.x -sel.b32 r0.y, r4.w, r2.y, r3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r5.y -mov.f32f32 r1.x, r1.x -sam (f32)(xyzw)r3.x, r3.y, s#0, t#0 -(sy)mad.f32 r1.y, r3.z, r1.w, r1.y -mad.f32 r1.z, r3.y, r1.w, r1.z -mad.f32 r0.x, r3.x, r1.w, r0.x -nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r0.x -nop -mul.f r1.w, r1.y, r2.x -mul.f r2.x, r1.z, r2.y -mul.f r1.x, r0.x, r1.x -nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.x, r1.x -nop -mul.f r1.w, r1.w, r0.w -mul.f r2.x, r2.x, r0.w +mul.f r1.y, r1.z, r0.z +mul.f r0.z, r1.w, r0.z +mad.f32 r1.y, c4.z, r2.z, r1.y +mad.f32 r0.z, c4.y, r3.x, r0.z +mul.f r1.x, r3.y, r1.x +cmps.f.ne r1.z, r2.y, c8.y +mul.f r1.y, r0.y, r1.y +mul.f r0.z, r0.y, r0.z mul.f r0.w, r1.x, r0.w +mov.f32f32 r1.x, c8.y +add.f r1.y, r1.y, r2.x +add.f r0.z, r0.z, r3.w nop -mov.f32f32 r1.x, r1.w -mov.f32f32 r1.w, r2.x -mad.f32 r1.x, c4.z, r1.y, r1.x -mad.f32 r1.y, c4.y, r1.z, r1.w -mov.f32f32 r0.w, r0.w -nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y +sel.b32 r1.x, r1.x, r1.z, r8.w mad.f32 r0.x, c4.x, r0.x, r0.w -nop -mul.f r0.w, r0.z, r1.x -mul.f r1.x, r0.z, r1.y -mov.f32f32 r0.x, r0.x -nop -add.f r0.w, r0.w, r2.z -add.f r1.x, r1.x, r4.x -mul.f r0.x, r0.z, r0.x -nop -mul.f r0.z, r0.w, r0.y -mul.f r0.w, r1.x, r0.y -add.f r0.x, r0.x, r4.z -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, r0.y -nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x +(rpt1)nop +mul.f r2.z, r1.y, r1.x +mul.f r2.y, r0.z, r1.x +mul.f r0.x, r0.y, r0.x +(rpt2)nop +add.f r0.x, r0.x, r4.y (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, r1.x end -nop -nop -nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r2.w (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) -; FRAG: 278 instructions, 0 half, 7 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.x (5:10,cm=f,il=12,b=1) r2.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) +; FRAG: 177 instructions, 0 half, 9 full diff --git a/reference/0ad-alpine-valley/0ad-51.asm b/reference/0ad-alpine-valley/0ad-51.asm index 890976a..4dd0acb 100644 --- a/reference/0ad-alpine-valley/0ad-51.asm +++ b/reference/0ad-alpine-valley/0ad-51.asm @@ -1,14 +1,14 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r5.x) in4 -@in(r5.y) in5 -@in(r5.z) in6 -@in(r8.x) in8 -@in(r8.y) in9 -@in(r8.z) in10 +@in(r6.z) in0 +@in(r6.w) in1 +@in(r7.x) in2 +@in(r5.w) in4 +@in(r6.x) in5 +@in(r6.y) in6 +@in(r2.w) in8 +@in(r3.x) in9 +@in(r3.y) in10 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -41,163 +41,105 @@ @out(r7.y) out29 @out(r7.z) out30 @out(r7.w) out31 -(sy)(ss)add.f r0.x, c4.x, (neg)r2.w -mul.f r0.y, r5.x, r5.x -mul.f r0.z, c8.w, r2.w -mul.f r0.w, c8.z, r2.w +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r6.z +mul.f r0.y, r5.w, r5.w +mul.f r0.z, c8.y, r6.z +mul.f r0.w, c8.x, r6.z mul.f r1.x, r0.x, r0.x -add.f r1.y, c4.y, (neg)r3.x +add.f r1.z, c4.y, (neg)r6.w add.f r0.y, c13.x, (neg)r0.y -mad.f32 r0.z, c9.w, r3.x, r0.z -mad.f32 r0.w, c9.z, r3.x, r0.w -mad.f32 r1.x, r1.y, r1.y, r1.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.w, r3.y, r0.z -mad.f32 r0.w, c10.z, r3.y, r0.w -mov.f32f32 r1.x, r1.x -add.f r2.x, c4.z, (neg)r3.y -mul.f r2.y, r0.y, r0.y -mul.f r1.z, r5.y, r5.x -add.f r0.z, r0.z, c11.w -mad.f32 r1.x, r2.x, r2.x, r1.x -add.f r0.w, r0.w, c11.z -mul.f r2.z, c8.y, r2.w -mul.f r3.z, c8.x, r2.w -add.f r1.z, c13.y, (neg)r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w +mad.f32 r0.z, c9.y, r6.w, r0.z +mad.f32 r0.w, c9.x, r6.w, r0.w +mad.f32 r1.x, r1.z, r1.z, r1.x +add.f r1.w, c4.z, (neg)r7.x +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.x, r0.z +mad.f32 r0.w, c10.x, r7.x, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.x, r5.w +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.y, r5.w +mul.f r2.z, c8.w, r6.z rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.z -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w +(ss)mov.f32f32 r3.z, r1.x +add.f r3.w, c13.y, (neg)r1.y mad.f32 r0.x, r0.x, r1.x, (neg)c5.x -mad.f32 r0.z, r3.w, r3.w, r2.y -mad.f32 r0.w, r1.y, r1.x, (neg)c5.y -mad.f32 r1.x, r2.x, r1.x, (neg)c5.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mul.f r1.y, r5.z, r5.x -mov.f32f32 r0.w, r0.w -mul.f r2.x, r0.x, r0.x -mov.f32f32 r1.x, r1.x -add.f r1.y, c13.y, (neg)r1.y -mad.f32 r2.x, r0.w, r0.w, r2.x -mad.f32 r2.y, c9.y, r3.x, r2.z -mad.f32 r2.z, c9.x, r3.x, r3.z -mad.f32 r2.y, c10.y, r3.y, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r1.y -mad.f32 r1.y, r1.x, r1.x, r2.x -add.f r2.x, r2.y, c11.y -mad.f32 r2.y, c10.x, r3.y, r2.z -mul.f r5.w, c0.w, r2.w -mul.f r6.x, c0.z, r2.w -mul.f r6.y, c0.y, r2.w -mul.f r6.z, c0.x, r2.w -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mad.f32 r0.z, r3.z, r3.z, r0.z -mul.f r2.x, r2.x, c12.y -add.f r2.y, r2.y, c11.x -mul.f r1.x, r1.x, r1.y -mul.f r0.w, r0.w, r1.y -mul.f r0.x, r0.x, r1.y -mov.f32f32 r1.y, r2.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r3.z, (neg)c5.y +mov.f32f32 r1.z, r3.w +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r3.z, (neg)c5.z +mov.f32f32 r3.z, r0.z +mad.f32 r2.x, r3.w, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r3.z, r1.x +mov.f32f32 r3.w, r1.w +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r6.w, r2.z +mad.f32 r0.z, r1.w, r3.w, r0.z +mad.f32 r1.w, c10.w, r7.x, r2.z +mul.f r2.z, c8.z, r6.z +mul.f r4.x, c0.w, r6.z +mul.f r5.x, c0.z, r6.z +mul.f r5.y, c0.y, r6.z +mul.f r5.z, c0.x, r6.z rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x +(ss)mov.f32f32 r4.z, r0.z +mul.f r4.y, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r4.w, r3.w, r4.z +mul.f r4.z, r3.z, r4.z +(ss)mad.f32 r0.z, c9.z, r6.w, r2.z +mad.f32 r2.y, c1.w, r6.w, r4.x +mad.f32 r5.x, c1.z, r6.w, r5.x +rsq r0.x, r0.x +(ss)mov.f32f32 r3.z, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.x, r0.z +mad.f32 r0.y, c2.w, r7.x, r2.y +mul.f r2.z, r0.w, r3.z +mul.f r2.y, r1.z, r3.z +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r4.x, r2.y +mul.f r7.y, r6.x, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r5.w, r0.x +mul.f r3.z, r6.y, r4.x +mad.f32 r3.w, r6.y, r0.z, (neg)r0.y +mad.f32 r3.z, r6.x, r0.x, (neg)r3.z +mad.f32 r4.x, r5.w, r4.x, (neg)r7.y +mad.f32 r0.x, c2.z, r7.x, r5.x +mad.f32 r0.y, c1.y, r6.w, r5.y +mad.f32 r5.x, c1.x, r6.w, r5.z +mad.f32 r0.y, c2.y, r7.x, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.x, r5.x nop -mov.f32f32 r4.w, r1.x -mov.f32f32 r4.z, r0.w -mov.f32f32 r4.y, r0.x -mul.f r0.x, r0.y, r0.z -mul.f r0.y, r3.z, r0.z -mul.f r0.z, r3.w, r0.z -mul.f r0.w, r2.y, c12.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r0.w -mul.f r0.w, r5.y, r0.x -mul.f r2.x, r5.x, r0.y -mad.f32 r0.w, r5.x, r0.z, (neg)r0.w -mad.f32 r2.x, r5.z, r0.x, (neg)r2.x -mul.f r2.y, r5.z, r0.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, r5.y, r0.y, (neg)r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.w, r2.x -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r4.x, r0.w -mov.f32f32 r2.y, r0.z -mov.f32f32 r3.z, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r3.x, r5.w -mad.f32 r0.z, c1.z, r3.x, r6.x -mad.f32 r0.w, c1.y, r3.x, r6.y -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r3.y, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c2.y, r3.y, r0.w -mad.f32 r0.w, c1.x, r3.x, r6.z -add.f r0.x, r0.x, c3.w -add.f r0.y, r0.y, c3.z -add.f r5.w, r0.z, c3.y -mad.f32 r6.x, c2.x, r3.y, r0.w -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mov.f32f32 r0.y, r5.w -add.f r0.x, r6.x, c3.x -mov.f32f32 r5.w, (0.000000) -mov.f32f32 r6.x, (0.000000) -mov.f32f32 r6.y, (0.000000) -mov.f32f32 r0.x, r0.x -mov.f32f32 r7.w, r5.w -mov.f32f32 r7.z, r6.x -mov.f32f32 r7.y, r6.y -mov.f32f32 r5.w, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r6.x, r2.w -mov.f32f32 r5.z, r5.z -mov.f32f32 r7.x, r5.w -mov.f32f32 r6.w, r3.x -mov.f32f32 r6.z, r6.x -mov.f32f32 r6.y, r5.z -mov.f32f32 r3.x, r5.y -mov.f32f32 r5.x, r5.x -mul.f r5.y, r8.z, c6.z -mul.f r8.y, r8.y, c6.y -mov.f32f32 r6.x, r3.x -mov.f32f32 r5.w, r5.x -mov.f32f32 r5.z, r5.y -mov.f32f32 r5.y, r8.y -mul.f r3.x, r8.x, c6.x -mad.f32 r3.y, c7.x, r3.y, c7.y -mad.f32 r2.w, c7.x, r2.w, c7.y -mov.f32f32 r8.x, c13.z -mov.f32f32 r5.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r2.w -mov.f32f32 r2.w, r8.x +add.f r0.y, r0.y, c3.y +mov.f32f32 r7.w, (0.000000) +add.f r0.x, r0.x, c3.x +mov.f32f32 r7.z, (0.000000) +mov.f32f32 r7.y, (0.000000) +mul.f r5.z, r3.y, c6.z +mul.f r5.y, r3.x, c6.y +mul.f r5.x, r2.w, c6.x +mad.f32 r3.y, c7.x, r7.x, c7.y +mad.f32 r3.x, c7.x, r6.z, c7.y +mov.f32f32 r2.w, c13.z end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) -; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r5.x (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0) -; VERT: 153 instructions, 0 half, 9 full +; VERT: inputs: r6.z (0:0,cm=7,il=8,b=0) r5.w (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) +; VERT: 93 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-52.asm b/reference/0ad-alpine-valley/0ad-52.asm index 538f644..dc38031 100644 --- a/reference/0ad-alpine-valley/0ad-52.asm +++ b/reference/0ad-alpine-valley/0ad-52.asm @@ -8,539 +8,364 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097 +@const(c14.x) 0x3cff9724, 0x40000000, 0xbf800000, 0xba03126f +@const(c15.x) 0xbf000000, 0x3f800000, 0x3fb8aa65, 0x3de38866 +@const(c16.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 19, r1.x bary.f r0.y, 0, r1.x add.f r0.w, r0.w, c13.y bary.f r1.z, 1, r1.x -mov.f32f32 r0.x, r0.x -add.f r1.w, r0.y, c15.x -bary.f r2.x, 23, r1.x -bary.f r2.y, 24, r1.x -mul.f r2.z, r0.x, r0.x +mov.f32f32 r1.w, r0.x +add.f r2.x, r0.y, c15.x +bary.f r2.y, 8, r1.x +bary.f r2.z, 23, r1.x +mul.f r0.x, r0.x, r1.w bary.f r2.w, 20, r1.x -floor.f r3.x, r1.w +floor.f r3.x, r2.x rcp r0.w, r0.w add.f r0.z, r0.z, c13.y add.f r3.y, r1.z, c15.x -mov.f32f32 r2.w, r2.w -add.f r1.w, r1.w, (neg)r3.x +mov.f32f32 r3.z, r2.w +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.z, r0.z, r0.w -(ss)floor.f r0.w, r3.y -mad.f32 r2.z, r2.w, r2.w, r2.z -mov.f32f32 r1.w, r1.w +(ss)absneg.f r0.w, (neg)c10.x +mad.f32 r0.x, r2.w, r3.z, r0.x +bary.f r2.w, 21, r1.x +mov.f32f32 r3.x, r2.x +mul.f r0.w, r0.w, c10.x +floor.f r3.w, r3.y +mov.f32f32 r4.x, r2.w +mul.f r4.y, c13.x, r3.x +mul.f r0.w, r0.w, r0.z mov.f32f32 r0.z, r0.z -absneg.f r3.x, (neg)c10.x -mov.f32f32 r2.z, r2.z -bary.f r3.z, 21, r1.x -mul.f r3.w, c13.x, r1.w -mul.f r3.x, r3.x, c10.x -add.f r0.w, r3.y, (neg)r0.w -mov.f32f32 r3.y, r3.z -mov.f32f32 r3.z, r3.w -mul.f r3.x, r3.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.z, r3.y, r3.y, r2.z -add.f r0.y, r0.y, (neg)r3.z -mov.f32f32 r3.x, r3.x -mul.f r3.z, c13.x, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.y, r0.y -rsq r3.w, r2.z -(ss)mul.f r4.x, r0.x, r3.w -rsq r4.y, r2.z -(ss)mov.f32f32 r4.y, r4.y -(ss)rsq r2.z, r2.z -(ss)mul.f r4.z, r0.x, r2.z -add.f r4.w, c15.x, r0.y -mov.f32f32 r4.x, r4.x -mul.f r5.x, r0.x, r4.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.w, r4.w -absneg.f r4.x, (abs)r4.x -mov.f32f32 r5.x, r5.x +mad.f32 r0.x, r4.x, r4.x, r0.x +add.f r0.y, r0.y, (neg)r4.y +add.f r3.y, r3.y, (neg)r3.w +mul.f r0.z, r0.w, r0.z +add.f r0.w, c14.y, (neg)r3.x +mov.f32f32 r3.x, r0.y +mov.f32f32 r3.w, r3.y +rsq r4.y, r0.x +(ss)mul.f r4.z, r1.w, r4.y +rsq r4.w, r0.x +(ss)mov.f32f32 r5.x, r4.w +(ss)rsq r0.x, r0.x +(ss)mul.f r5.y, r1.w, r0.x +add.f r3.x, c15.x, r3.x absneg.f r4.z, (abs)r4.z -mul.f r4.w, r4.w, c5.z -mov.f32f32 r4.x, r4.x -absneg.f r5.x, (abs)r5.x -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.y, r4.w -add.f r4.x, r4.x, c13.w -mov.f32f32 r5.x, r5.x +mul.f r5.z, r1.w, r5.x +absneg.f r5.y, (abs)r5.y +mul.f r5.w, r3.x, c5.z +add.f r3.x, r4.z, c13.w +absneg.f r4.z, (abs)r5.z +add.f r5.y, r5.y, c13.w +mov.f32f32 r6.x, r5.w +max.f r3.x, r3.x, c13.y +mul.f r5.z, r3.z, r4.y add.f r4.z, r4.z, c13.w -mov.f32f32 r5.y, r5.y -mov.f32f32 r4.x, r4.x -add.f r5.x, r5.x, c13.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r3.z, r3.z -max.f r4.x, r4.x, c13.y -mov.f32f32 r5.x, r5.x -max.f r4.z, r4.z, c13.y -add.f r1.z, r1.z, (neg)r3.z -mov.f32f32 r3.z, r4.x -mul.f r4.x, r2.w, r3.w -max.f r5.x, r5.x, c13.y -mov.f32f32 r4.z, r4.z -mul.f r5.z, r2.w, r2.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.x, r5.x -mul.f r5.w, r2.w, r4.y -mov.f32f32 r5.z, r5.z -absneg.f r4.x, (abs)r4.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.w, r5.w +max.f r5.y, r5.y, c13.y +mul.f r6.y, r3.z, r0.x absneg.f r5.z, (abs)r5.z -mov.f32f32 r4.x, r4.x -add.f r6.x, c15.x, r1.z -absneg.f r5.w, (abs)r5.w -mov.f32f32 r5.z, r5.z -add.f r4.x, r4.x, c13.w -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.w, r5.w +max.f r4.z, r4.z, c13.y +mul.f r5.x, r3.z, r5.x +absneg.f r6.y, (abs)r6.y add.f r5.z, r5.z, c13.w -mov.f32f32 r4.x, r4.x -mul.f r6.x, r6.x, c5.w -add.f r5.w, r5.w, c13.w -mov.f32f32 r5.z, r5.z -max.f r4.x, r4.x, c13.y -mov.f32f32 r6.y, r6.x -mov.f32f32 r5.w, r5.w -max.f r6.z, r5.z, c13.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.z, r6.y -max.f r5.w, r5.w, c13.y -mov.f32f32 r6.y, r6.z -add.f r6.z, r3.z, r4.x -mul.f r3.w, r3.y, r3.w -mov.f32f32 r6.w, r5.w -add.f r7.x, r4.z, r6.y -(ss)mul.f r2.z, r3.y, r2.z -mov.f32f32 r3.w, r3.w -add.f r7.y, r5.x, r6.w -mul.f r4.y, r3.y, r4.y -mov.f32f32 r2.z, r2.z -absneg.f r3.w, (abs)r3.w -bary.f r5.w, 2, r1.x -mov.f32f32 r4.y, r4.y -absneg.f r2.z, (abs)r2.z -mov.f32f32 r3.w, r3.w -add.f r7.z, r5.w, c14.w +mul.f r6.z, c13.x, r3.w +mul.f r0.z, r0.z, c15.z +absneg.f r5.x, (abs)r5.x +max.f r5.z, r5.z, c13.y +add.f r6.y, r6.y, c13.w +add.f r1.z, r1.z, (neg)r6.z +add.f r5.x, r5.x, c13.w +add.f r6.z, r3.x, r5.z +mul.f r4.y, r4.x, r4.y +max.f r6.w, r6.y, c13.y +max.f r5.x, r5.x, c13.y +mov.f32f32 r6.y, r1.z +absneg.f r4.y, (abs)r4.y +add.f r7.x, r5.y, r6.w +add.f r7.y, r4.z, r5.x +(ss)mul.f r0.x, r2.w, r0.x +add.f r2.w, r4.y, c13.w +mul.f r4.y, r4.x, r4.w +add.f r4.w, c15.x, r6.y +absneg.f r0.x, (abs)r0.x +max.f r2.w, r2.w, c13.y absneg.f r4.y, (abs)r4.y -mov.f32f32 r2.z, r2.z -add.f r3.w, r3.w, c13.w -mov.f32f32 r5.w, r7.z -mov.f32f32 r4.y, r4.y -add.f r2.z, r2.z, c13.w -mov.f32f32 r3.w, r3.w -mov.f32f32 r5.w, r5.w +mul.f r7.w, r4.w, c5.w +add.f r0.x, r0.x, c13.w +mov.f32f32 r4.w, r2.w add.f r4.y, r4.y, c13.w -mov.f32f32 r2.z, r2.z -max.f r3.w, r3.w, c13.y -mul.f r0.z, r3.x, r0.z -mov.f32f32 r3.x, r4.y -max.f r2.z, r2.z, c13.y -mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r7.w, r5.y, s#4, t#4 -(sy)mov.f32f32 r4.y, r7.w -max.f r3.x, r3.x, c13.y -mov.f32f32 r2.z, r2.z -(ss)add.f r5.y, r6.z, r3.w -mov.f32f32 r4.y, r4.y +mov.f32f32 r6.y, r7.w +max.f r0.x, r0.x, c13.y +add.f r4.w, r6.z, r4.w +max.f r4.y, r4.y, c13.y +bary.f r6.z, 2, r1.x +mov.f32f32 r7.z, r0.x +mov.f32f32 r8.x, r4.w +mov.f32f32 r8.y, r4.y +add.f r9.x, r6.z, c14.w +add.f r7.x, r7.x, r7.z +exp2 r0.z, r0.z +(ss)mov.f32f32 r7.z, r0.z +add.f r7.y, r7.y, r8.y +mov.f32f32 r6.z, r9.x +rcp r8.y, r8.x mov.f32f32 r3.x, r3.x -add.f r5.z, r7.x, r2.z -mov.f32f32 r5.y, r5.y -add.f r5.w, c14.y, (neg)r1.w -add.f r6.z, r7.y, r3.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r5.w, r5.w -add.f r7.x, c14.y, (neg)r0.w -rcp r7.y, r5.y -(ss)mov.f32f32 r7.y, r7.y -mov.f32f32 r6.z, r6.z -mul.f r0.z, r0.z, c15.z -rcp r7.w, r5.y -nop -rcp r8.x, r5.z -mul.f r3.z, r3.z, r7.y -(ss)mov.f32f32 r7.y, r8.x -mov.f32f32 r7.x, r7.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r3.z -mul.f r2.x, r2.x, c14.x -rcp r8.x, r6.z -(ss)mov.f32f32 r8.x, r8.x -mul.f r4.z, r4.z, r7.y -mul.f r7.y, r5.w, r7.x -mov.f32f32 r2.x, r2.x -mul.f r5.x, r5.x, r8.x +mov.f32f32 r8.z, r7.x +mov.f32f32 r8.w, r7.y +add.f r7.z, c17.y, (neg)r7.z +(ss)mul.f r3.x, r3.x, r8.y +mul.f r9.y, r2.z, c14.x +sam.s (f32)(x)r9.w, r6.x, s#4, t#4 +rcp r2.z, r7.y +(ss)mov.f32f32 r6.x, r0.w +mul.f r6.y, r7.z, c10.y +mov.f32f32 r6.z, r3.x +mov.f32f32 r7.y, r9.y +rcp r7.z, r8.w mov.f32f32 r4.z, r4.z -mul.f r4.y, r7.y, r4.y -mov.f32f32 r7.y, r2.x -mov.f32f32 r5.x, r5.x -mov.f32f32 r8.x, r2.x -mov.f32f32 r8.y, r2.x -mov.f32f32 r8.z, r7.y -mul.f r2.y, r2.y, c14.x -mov.f32f32 r7.y, r8.x -mov.f32f32 r8.x, r8.y -add.f r0.y, c15.y, r0.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r9.x, r7.y -rcp r7.y, r6.z -nop -(ss)rcp r6.z, r6.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r8.y, r2.y -mov.f32f32 r9.y, r2.y -mov.f32f32 r9.z, r2.y -mul.f r0.y, r0.y, c5.z -mov.f32f32 r8.w, r8.y -mov.f32f32 r9.y, r9.y -mov.f32f32 r8.y, r9.z -mov.f32f32 r9.z, r0.y -(ss)mov.f32f32 r7.y, r7.y -(ss)mov.f32f32 r6.z, r6.z -mov.f32f32 r9.y, r9.y -sam (f32)(xyzw)r9.w, r8.z, s#2, t#2 -(sy)(ss)mov.f32f32 r8.z, r9.w -add.f r8.w, c13.z, (neg)r10.x -mov.f32f32 r9.w, r10.y -sam (f32)(xyzw)r10.x, r8.x, s#0, t#0 -(sy)(ss)mul.f r8.x, r10.z, r4.z -mul.f r8.y, r8.z, r3.z -mov.f32f32 r7.w, r7.w -sam (f32)(xyzw)r10.z, r9.x, s#3, t#3 -(sy)mul.f r8.z, r10.w, r5.x -(ss)mul.f r9.x, r10.z, r5.x -mul.f r5.x, r11.x, r5.x -mul.f r4.x, r4.x, r7.w -mul.f r6.w, r6.w, r7.y -mov.f32f32 r7.y, r8.w -mul.f r7.w, r9.w, r3.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r8.w, r2.y -mov.f32f32 r6.w, r6.w -mov.f32f32 r9.y, r2.y -mov.f32f32 r7.y, r7.y -mov.f32f32 r10.z, r8.w -bary.f r8.w, 22, r1.x -mov.f32f32 r11.x, r9.y -mul.f r3.z, r7.y, r3.z -rcp r7.y, r5.z -(ss)mov.f32f32 r7.y, r7.y -mov.f32f32 r8.w, r8.w -mov.f32f32 r9.y, r9.z -mul.f r10.y, r10.y, r4.z -mul.f r4.z, r10.x, r4.z -mul.f r8.w, r8.w, c14.x -mul.f r6.y, r6.y, r7.y -mov.f32f32 r6.x, r6.x -mul.f r3.x, r3.x, r6.z -mov.f32f32 r6.z, r8.w -mov.f32f32 r6.y, r6.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r9.z, r6.x -mov.f32f32 r6.x, r6.z -mov.f32f32 r7.y, r6.z -mov.f32f32 r11.z, r2.y -mov.f32f32 r2.y, r6.z -mov.f32f32 r10.w, r6.x -mov.f32f32 r11.y, r7.y -mov.f32f32 r6.x, r7.z -mov.f32f32 r11.w, r2.y -mov.f32f32 r2.y, r3.x -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r9.w, r6.x -sam (f32)(xyzw)r12.x, r10.z, s#2, t#2 -(sy)mov.f32f32 r3.x, r12.y -(ss)nop -sam (f32)(xyzw)r10.z, r11.x, s#3, t#3 -(sy)mad.f32 r6.x, r10.w, r6.w, r8.z -mad.f32 r7.y, r10.z, r6.w, r9.x -mad.f32 r5.x, r11.x, r6.w, r5.x -mad.f32 r3.x, r3.x, r4.x, r8.y -rcp r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -mov.f32f32 r6.w, r2.x -add.f r8.y, c13.z, (neg)r12.x -mov.f32f32 r8.z, r12.z -mul.f r3.w, r3.w, r5.y -mov.f32f32 r8.w, r6.w -mov.f32f32 r5.y, r6.z -mov.f32f32 r6.w, r8.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r8.y, r2.x -mov.f32f32 r9.x, r5.y -mov.f32f32 r5.y, r6.w -mad.f32 r6.w, r8.z, r4.x, r7.w -mov.f32f32 r8.y, r8.y -mov.f32f32 r7.w, r6.z -mad.f32 r3.z, r5.y, r4.x, r3.z -sam (f32)(xyzw)r10.z, r11.z, s#0, t#0 -(sy)mad.f32 r4.x, r10.w, r6.y, r10.y -(ss)nop -sam (f32)(xyzw)r11.y, r8.w, s#3, t#3 -(sy)mad.f32 r5.x, r11.w, r2.y, r5.x -mov.f32f32 r8.z, r7.w -mad.f32 r5.y, r11.z, r2.y, r6.x -mad.f32 r2.y, r11.y, r2.y, r7.y -mov.f32f32 r5.x, r5.x -mad.f32 r6.x, r11.x, r6.y, r8.x -rcp r5.z, r5.z +rcp r7.w, r8.z mov.f32f32 r5.y, r5.y -mov.f32f32 r2.y, r2.y -sam (f32)(xyzw)r7.w, r8.y, s#2, t#2 -(sy)mov.f32f32 r7.y, r8.x -mul.f r5.x, c7.z, r5.x -mul.f r5.y, c7.y, r5.y -mul.f r2.y, c7.x, r2.y -mad.f32 r3.x, r7.y, r3.w, r3.x -add.f r7.y, c13.z, (neg)r7.w -mov.f32f32 r7.w, r8.y -(ss)mov.f32f32 r5.z, r5.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r7.y, r7.y -mad.f32 r6.w, r7.w, r3.w, r6.w -mul.f r2.z, r2.z, r5.z -mad.f32 r3.x, c14.y, r3.x, c14.z -mov.f32f32 r5.z, r7.y -mov.f32f32 r6.w, r6.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.x, r3.x -bary.f r7.y, 4, r1.x -bary.f r7.w, 5, r1.x -bary.f r8.x, 6, r1.x -mad.f32 r3.z, r5.z, r3.w, r3.z -mul.f r3.w, r7.y, r3.x -mul.f r5.z, r7.w, r3.x -mul.f r3.x, r8.x, r3.x -mov.f32f32 r3.z, r3.z -mad.f32 r6.w, c14.y, r6.w, c14.z -mov.f32f32 r2.x, r2.x -mad.f32 r4.z, r10.z, r6.y, r4.z -mad.f32 r3.z, c14.y, r3.z, c14.z -mov.f32f32 r6.y, r6.w -mov.f32f32 r7.w, r2.x -mov.f32f32 r2.x, r6.z -mov.f32f32 r3.z, r3.z -bary.f r6.z, 10, r1.x -bary.f r6.w, 11, r1.x -bary.f r7.y, 12, r1.x -mov.f32f32 r8.x, r2.x -mov.f32f32 r2.x, r6.z -bary.f r6.z, 7, r1.x +rcp r8.y, r8.w +mov.f32f32 r5.x, r5.x +mov.f32f32 r10.x, r7.y +(ss)bary.f r8.w, 24, r1.x +(ss)mul.f r4.z, r4.z, r7.z +mul.f r5.y, r5.y, r7.w +mov.f32f32 r10.z, r7.y +mul.f r11.x, r8.w, c14.x +mov.f32f32 r7.w, r4.z +mov.f32f32 r8.w, r5.y +mul.f r5.x, r5.x, r8.y +mov.f32f32 r7.z, r11.x +add.f r3.w, c14.y, (neg)r3.w +mul.f r0.z, r0.z, c13.z +mul.f r2.z, r4.y, r2.z +mov.f32f32 r10.y, r7.z +mov.f32f32 r10.w, r7.z +mov.f32f32 r4.y, r5.x +sam (f32)(xyzw)r11.y, r7.y, s#0, t#0 +(sy)mul.f r8.y, r11.w, r8.w +rcp r8.z, r8.z mov.f32f32 r6.w, r6.w -mov.f32f32 r7.y, r7.y -sam.s (f32)(x)r8.y, r9.y, s#4, t#4 -(sy)mov.f32f32 r8.y, r8.y -mul.f r2.x, r2.x, (neg)r6.z -mul.f r6.w, r6.w, (neg)r6.z -mul.f r6.z, r7.y, (neg)r6.z +mul.f r8.w, r11.z, r8.w +mul.f r5.y, r11.y, r5.y +sam (f32)(xyzw)r11.y, r10.x, s#2, t#2 +(sy)(ss)mul.f r10.x, r11.y, r6.z +rcp r8.x, r8.x +mov.f32f32 r5.z, r5.z +sam (f32)(xyzw)r12.x, r10.z, s#3, t#3 +(sy)mul.f r10.y, r12.y, r7.w +add.f r9.z, c13.z, (neg)r11.z +(ss)mul.f r10.z, r12.z, r7.w +(ss)mul.f r5.z, r5.z, r8.x +mov.f32f32 r12.y, r7.z +bary.f r7.w, 22, r1.x +mul.f r6.z, r9.z, r6.z +mov.f32f32 r8.x, r5.z +mul.f r6.w, r6.w, r8.z +mul.f r9.z, r7.w, c14.x +mul.f r4.z, r12.x, r4.z +mul.f r3.x, r11.w, r3.x +mov.f32f32 r8.z, r6.w +mov.f32f32 r11.y, r9.z +mov.f32f32 r10.w, r3.w +add.f r0.z, r0.z, r6.y +mov.f32f32 r6.y, r2.z +mov.f32f32 r7.w, r11.y +mov.f32f32 r12.z, r11.y +mul.f r6.x, r6.x, r10.w +sam (f32)(xyzw)r12.w, r11.x, s#0, t#0 +(sy)mad.f32 r8.y, r13.y, r8.z, r8.y +rcp r7.x, r7.x +(ss)mul.f r0.x, r0.x, r7.x +(ss)mad.f32 r7.x, r13.x, r8.z, r8.w +mad.f32 r5.y, r12.w, r6.w, r5.y +sam (f32)(xyzw)r12.w, r7.z, s#2, t#2 +(sy)mad.f32 r6.w, r13.x, r8.x, r10.x +rcp r4.w, r4.w +(ss)mul.f r2.w, r2.w, r4.w +sam (f32)(xyzw)r11.z, r12.y, s#3, t#3 +(sy)(ss)mad.f32 r4.w, r11.w, r4.y, r10.y +add.f r7.z, c13.z, (neg)r12.w +mad.f32 r4.y, r12.x, r4.y, r10.z +mov.f32f32 r8.z, r2.w +mov.f32f32 r10.x, r7.y +mov.f32f32 r10.y, r11.y +mov.f32f32 r10.z, r7.y +mov.f32f32 r10.w, r11.y +mad.f32 r6.z, r7.z, r8.x, r6.z +mov.f32f32 r7.y, r0.x +sam (f32)(xyzw)r11.w, r9.y, s#0, t#0 +mad.f32 r4.z, r11.z, r5.x, r4.z +mad.f32 r3.x, r13.y, r5.z, r3.x +sam (f32)(xyzw)r12.z, r10.x, s#2, t#2 +(sy)mad.f32 r5.x, r12.w, r8.z, r6.w +mad.f32 r5.z, r12.y, r7.y, r8.y (ss)nop -sam (f32)(xyzw)r8.z, r7.w, s#0, t#0 -(sy)mad.f32 r6.x, r9.x, r2.z, r6.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r6.w, r6.w -mov.f32f32 r6.z, r6.z -mov.f32f32 r6.x, r6.x -mad.f32 r2.x, r2.x, r3.z, r3.w -mad.f32 r3.w, r6.w, r3.z, r5.z -mad.f32 r3.x, r6.z, r3.z, r3.x -mad.f32 r3.z, r8.w, r2.z, r4.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r3.w -mad.f32 r0.x, r0.x, r6.y, r2.x -mad.f32 r2.x, r2.w, r6.y, r3.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mad.f32 r2.w, r3.y, r6.y, r2.w -nop -mul.f r3.y, r0.x, r0.x -mad.f32 r2.z, r8.z, r2.z, r4.z -mad.f32 r3.y, r2.x, r2.x, r3.y -mov.f32f32 r2.w, r2.w -add.f r1.w, r1.w, c13.z -add.f r3.z, c17.y, (neg)r0.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mad.f32 r3.y, r2.w, r2.w, r3.y -mul.f r3.w, r1.w, r7.x -mul.f r3.z, r3.z, c10.y -mul.f r0.z, r0.z, c13.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.w -add.f r1.z, c15.y, r1.z -rsq r3.y, r3.y -(ss)mov.f32f32 r3.y, r3.y -mad.f32 r3.w, r3.w, r8.y, r4.y -add.f r0.z, r0.z, r3.z -mov.f32f32 r0.y, r0.y -mul.f r0.x, r0.x, r3.y -mul.f r2.x, r2.x, r3.y -mul.f r2.w, r2.w, r3.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.y, r3.w -mul.f r3.z, r0.x, r0.x -mul.f r3.w, (neg)c8.x, r0.x -mad.f32 r3.z, r2.x, r2.x, r3.z -mad.f32 r3.w, (neg)c8.y, r2.x, r3.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mad.f32 r3.z, r2.w, r2.w, r3.z -mad.f32 r3.w, (neg)c8.z, r2.w, r3.w -mul.f r1.z, r1.z, c5.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r6.y, r0.y -add.f r0.y, r0.w, c13.z -bary.f r0.w, 8, r1.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, r1.z -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r3.z -max.f r3.w, r3.w, c13.y -mul.f r2.x, r2.x, r3.z -mul.f r2.w, r2.w, r3.z -mov.f32f32 r0.x, r0.x -bary.f r3.z, 13, r1.x -mov.f32f32 r3.w, r3.w -bary.f r4.z, 17, r1.x -bary.f r4.w, 16, r1.x -mul.f r0.x, r0.x, r3.z -mov.f32f32 r2.x, r2.x -bary.f r3.z, 14, r1.x -bary.f r5.z, 18, r1.x -mad.f32 r6.z, c7.y, r3.w, (neg)r4.z -mad.f32 r6.w, c7.x, r3.w, (neg)r4.w -mad.f32 r0.x, r2.x, r3.z, r0.x -mad.f32 r2.x, c7.z, r3.w, (neg)r5.z -mov.f32f32 r3.z, r6.z -mov.f32f32 r3.w, r6.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r2.w -bary.f r6.z, 15, r1.x -mov.f32f32 r2.x, r2.x -mad.f32 r3.z, c11.x, r3.z, r4.z -mad.f32 r3.w, c11.x, r3.w, r4.w -mad.f32 r0.x, r2.w, r6.z, r0.x -mad.f32 r2.x, c11.x, r2.x, r5.z -mov.f32f32 r2.w, r3.z -mov.f32f32 r3.z, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.w, r7.z -max.f r0.x, c13.y, r0.x +sam (f32)(xyzw)r10.x, r10.z, s#3, t#3 +(sy)mad.f32 r4.y, r10.z, r6.y, r4.y +mad.f32 r4.w, r10.y, r6.y, r4.w +mad.f32 r5.x, c14.y, r5.x, c14.z +bary.f r6.y, 6, r1.x +mul.f r4.y, c7.z, r4.y +mul.f r4.w, c7.y, r4.w +mov.f32f32 r6.w, r5.x +bary.f r7.z, 4, r1.x +bary.f r8.x, 5, r1.x +mul.f r5.x, r6.y, r5.x +add.f r6.y, c13.z, (neg)r12.z +mul.f r7.z, r7.z, r6.w +mul.f r6.w, r8.x, r6.w +mov.f32f32 r8.y, r5.z +mad.f32 r6.y, r6.y, r8.z, r6.z +mad.f32 r6.z, r12.x, r7.y, r7.x +mad.f32 r2.z, r10.x, r2.z, r4.z +mad.f32 r2.w, r13.x, r2.w, r3.x +mad.f32 r3.x, c14.y, r6.y, c14.z +bary.f r4.z, 12, r1.x +bary.f r6.y, 7, r1.x +mov.f32f32 r7.x, r6.z +mov.f32f32 r7.y, r3.x +bary.f r8.x, 10, r1.x +bary.f r8.z, 11, r1.x +mul.f r4.z, r4.z, (neg)r6.y +mul.f r9.y, c7.x, r2.z +mul.f r2.z, r8.x, (neg)r6.y +mul.f r6.y, r8.z, (neg)r6.y +mad.f32 r3.x, r4.z, r3.x, r5.x +mad.f32 r2.w, c14.y, r2.w, c14.z +mad.f32 r2.z, r2.z, r7.y, r7.z +mad.f32 r4.z, r6.y, r7.y, r6.w +mad.f32 r0.x, r11.w, r0.x, r5.y +mov.f32f32 r5.x, r2.w +mad.f32 r2.w, r4.x, r2.w, r3.x +(rpt1)nop +mad.f32 r1.w, r1.w, r5.x, r2.z +mad.f32 r2.z, r3.z, r5.x, r4.z +(rpt1)nop +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.z, r2.z +mov.f32f32 r4.x, r2.w +mov.f32f32 r4.z, r0.x +mul.f r1.w, r1.w, r3.x +mul.f r5.x, r6.x, r9.w +mad.f32 r1.w, r2.z, r3.z, r1.w +add.f r0.y, c15.y, r0.y +mad.f32 r1.w, r4.x, r4.x, r1.w max.f r0.z, r0.z, c13.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.w -mov.f32f32 r0.x, r0.x +add.f r1.z, c15.y, r1.z +mul.f r8.z, r0.y, c5.z +add.f r0.y, r2.x, c13.z +add.f r2.x, r3.y, c13.z +bary.f r2.z, 9, r1.x +rsq r1.w, r1.w +(ss)mov.f32f32 r3.y, r1.w +(ss)mul.f r1.w, r2.w, r1.w +mov.f32f32 r7.z, r8.z min.f r0.z, r0.z, c13.z -mov.f32f32 r1.z, r1.z -mul.f r3.w, r5.w, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.y, r1.w, r0.y -sam.s (f32)(x)r4.x, r4.x, s#4, t#4 -(sy)mov.f32f32 r1.w, r4.x -log2 r0.x, r0.x -(ss)mul.f r0.x, c11.y, r0.x -add.f r4.x, c17.y, (neg)r0.z -add.f r4.y, c17.y, (neg)r0.z -add.f r4.z, c17.y, (neg)r0.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.w, r3.w, r1.w, r3.y -mul.f r3.y, r4.x, c9.z -mul.f r3.w, r4.y, c9.y -mul.f r4.x, r4.z, c9.x -mov.f32f32 r1.w, r1.w +mul.f r2.w, r3.x, r3.y +mul.f r3.x, r3.z, r3.y +mov.f32f32 r3.y, r1.w +mov.f32f32 r8.x, r9.x +mov.f32f32 r3.z, r2.w +mul.f r2.w, (neg)c8.x, r2.w +mov.f32f32 r4.x, r3.x +mad.f32 r2.w, (neg)c8.y, r3.x, r2.w +mul.f r3.x, r3.z, r3.z +mad.f32 r1.w, (neg)c8.z, r1.w, r2.w +mad.f32 r2.w, r4.x, r4.x, r3.x +sam.s (f32)(x)r7.y, r7.z, s#4, t#4 +mul.f r3.x, r0.y, r3.w +mad.f32 r2.w, r3.y, r3.y, r2.w +max.f r1.w, r1.w, c13.y +bary.f r3.w, 16, r1.x +(sy)mad.f32 r3.x, r3.x, r7.y, r5.x +add.f r5.x, c17.y, (neg)r0.z +add.f r5.y, c17.y, (neg)r0.z +add.f r6.x, c17.y, (neg)r0.z +rsq r2.w, r2.w +(ss)mov.f32f32 r6.y, r2.w +mov.f32f32 r6.w, r1.w +bary.f r7.y, 17, r1.x +(ss)bary.f r7.z, 18, r1.x +mul.f r3.z, r3.z, r6.y +bary.f r7.w, 13, r1.x +mad.f32 r8.x, c7.z, r6.w, (neg)r7.z +mad.f32 r6.w, c7.y, r6.w, (neg)r7.y +mul.f r4.x, r4.x, r6.y +mul.f r3.z, r3.z, r7.w +bary.f r6.y, 14, r1.x +mad.f32 r7.z, c11.x, r8.x, r7.z +mad.f32 r6.w, c11.x, r6.w, r7.y +mul.f r2.w, r3.y, r2.w +mad.f32 r3.y, r4.x, r6.y, r3.z +bary.f (ei)r1.x, 15, r1.x +mad.f32 r1.y, c7.x, r1.w, (neg)r3.w +(rpt1)nop +mad.f32 r1.x, r2.w, r1.x, r3.y +mad.f32 r1.y, c11.x, r1.y, r3.w +mul.f r8.w, r1.z, c5.w +mul.f r1.z, r5.x, c9.z +max.f r1.x, c13.y, r1.x +mul.f r1.w, r5.y, c9.y +mul.f r3.y, r6.x, c9.x +mov.f32f32 r6.x, r8.w +mov.f32f32 r6.y, r9.x nop -exp2 r0.x, r0.x -(ss)mul.f r4.y, r5.x, r0.x -mul.f r4.z, r5.y, r0.x -mad.f32 r2.x, r6.x, r2.x, r4.y -mad.f32 r2.w, r3.x, r2.w, r4.z -(ss)mul.f r0.x, r2.y, r0.x -mov.f32f32 r6.z, r1.z -mov.f32f32 r1.z, r2.x -mov.f32f32 r2.x, r7.z -mov.f32f32 r2.y, r2.w -mad.f32 r0.x, r2.z, r3.z, r0.x -mov.f32f32 r4.y, r0.w -mov.f32f32 r6.w, r2.x -bary.f (ei)r0.w, 9, r1.x -mov.f32f32 r1.x, c13.z -mov.f32f32 r1.y, c13.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.w, r1.x -sam.s (f32)(x)r4.z, r6.y, s#4, t#4 -(sy)mov.f32f32 r1.x, r4.z -(rpt2)nop -mad.f32 r0.y, r0.y, r1.x, r1.w -mov.f32f32 r4.z, r0.w +sam.s (f32)(x)r9.z, r8.z, s#4, t#4 +mul.f r0.w, r0.w, r2.x +log2 r1.x, r1.x +(ss)mul.f r1.x, c11.y, r1.x +mul.f r0.y, r0.y, r2.x +sam (f32)(w)r9.w, r2.y, s#1, t#1 +(sy)cmps.f.lt r2.x, r10.z, c16.x +mov.f32f32 r2.w, c13.z +(ss)mov.f32f32 r2.y, c13.y (rpt1)nop -mov.f32f32 r0.y, r0.y -(rpt2)nop -mul.f r0.y, c15.w, r0.y -sam (f32)(w)r4.y, r4.y, s#1, t#1 -(sy)mov.f32f32 r0.w, r5.x -cmps.f.lt r1.x, r5.x, c16.x +exp2 r1.x, r1.x +(ss)mul.f r2.z, r4.y, r1.x +mul.f r3.z, r4.w, r1.x +mad.f32 r2.z, r8.y, r7.z, r2.z +sam.s (f32)(x)r7.y, r5.w, s#4, t#4 +(sy)mad.f32 r0.w, r0.w, r7.y, r3.x +mad.f32 r3.x, r7.x, r6.w, r3.z +mad.f32 r0.y, r0.y, r9.z, r0.w +mul.f r0.w, r9.y, r1.x +(ss)cov.u32f32 r1.x, r2.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -cov.u32f32 r1.x, r1.x +mul.f r0.y, c15.w, r0.y +mad.f32 r0.w, r4.z, r1.y, r0.w +cmps.f.ne r1.x, r1.x, c13.y nop -mul.f r1.z, r1.z, r0.y -mul.f r1.w, r2.y, r0.y -mul.f r0.x, r0.x, r0.y -cmps.f.ne r0.y, r1.x, c13.y -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mad.f32 r1.x, c6.z, r6.x, r1.x -mad.f32 r1.z, c6.y, r3.x, r1.z -mov.f32f32 r0.x, r0.x -sel.b32 r0.y, r1.y, r0.y, r0.w -mov.f32f32 r0.w, r1.x -mov.f32f32 r1.x, r1.z -mad.f32 r0.x, c6.x, r2.z, r0.x +mov.f32f32 r1.y, r0.y +mul.f r0.y, r0.w, r0.y +sel.b32 r0.w, r2.y, r1.x, r10.z nop -mul.f r0.w, r0.z, r0.w -mul.f r1.x, r0.z, r1.x -mov.f32f32 r0.x, r0.x +mul.f r1.x, r2.z, r1.y +mul.f r1.y, r3.x, r1.y +mad.f32 r1.x, c6.z, r5.z, r1.x +mad.f32 r1.y, c6.y, r6.z, r1.y +mad.f32 r0.x, c6.x, r0.x, r0.y nop -add.f r0.w, r0.w, r3.y -add.f r1.x, r1.x, r3.w +mul.f r0.y, r0.z, r1.x +mul.f r1.x, r0.z, r1.y mul.f r0.x, r0.z, r0.x nop -mul.f r0.z, r0.w, r0.y -mul.f r0.w, r1.x, r0.y -add.f r0.x, r0.x, r4.x +add.f r0.y, r0.y, r1.z +add.f r0.z, r1.x, r1.w +add.f r0.x, r0.x, r3.y nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, r0.y +mul.f r2.z, r0.y, r0.w +mul.f r2.y, r0.z, r0.w +mul.f r2.x, r0.x, r0.w +end nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r2.x, r0.x -end ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r5.w (5:10,cm=f,il=12,b=1) r6.x (5:11,cm=f,il=16,b=1) r5.w (5:12,cm=f,il=20,b=1) r63.y (5:13,cm=f,il=24,b=1) r1.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 539 instructions, 0 half, 13 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r5.z (5:10,cm=f,il=12,b=1) r7.w (5:11,cm=f,il=16,b=1) r0.y (5:12,cm=f,il=20,b=1) r63.y (5:13,cm=f,il=24,b=1) r1.w (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 354 instructions, 0 half, 14 full diff --git a/reference/0ad-alpine-valley/0ad-53.asm b/reference/0ad-alpine-valley/0ad-53.asm index 890976a..4dd0acb 100644 --- a/reference/0ad-alpine-valley/0ad-53.asm +++ b/reference/0ad-alpine-valley/0ad-53.asm @@ -1,14 +1,14 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r5.x) in4 -@in(r5.y) in5 -@in(r5.z) in6 -@in(r8.x) in8 -@in(r8.y) in9 -@in(r8.z) in10 +@in(r6.z) in0 +@in(r6.w) in1 +@in(r7.x) in2 +@in(r5.w) in4 +@in(r6.x) in5 +@in(r6.y) in6 +@in(r2.w) in8 +@in(r3.x) in9 +@in(r3.y) in10 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -41,163 +41,105 @@ @out(r7.y) out29 @out(r7.z) out30 @out(r7.w) out31 -(sy)(ss)add.f r0.x, c4.x, (neg)r2.w -mul.f r0.y, r5.x, r5.x -mul.f r0.z, c8.w, r2.w -mul.f r0.w, c8.z, r2.w +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r6.z +mul.f r0.y, r5.w, r5.w +mul.f r0.z, c8.y, r6.z +mul.f r0.w, c8.x, r6.z mul.f r1.x, r0.x, r0.x -add.f r1.y, c4.y, (neg)r3.x +add.f r1.z, c4.y, (neg)r6.w add.f r0.y, c13.x, (neg)r0.y -mad.f32 r0.z, c9.w, r3.x, r0.z -mad.f32 r0.w, c9.z, r3.x, r0.w -mad.f32 r1.x, r1.y, r1.y, r1.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.w, r3.y, r0.z -mad.f32 r0.w, c10.z, r3.y, r0.w -mov.f32f32 r1.x, r1.x -add.f r2.x, c4.z, (neg)r3.y -mul.f r2.y, r0.y, r0.y -mul.f r1.z, r5.y, r5.x -add.f r0.z, r0.z, c11.w -mad.f32 r1.x, r2.x, r2.x, r1.x -add.f r0.w, r0.w, c11.z -mul.f r2.z, c8.y, r2.w -mul.f r3.z, c8.x, r2.w -add.f r1.z, c13.y, (neg)r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w +mad.f32 r0.z, c9.y, r6.w, r0.z +mad.f32 r0.w, c9.x, r6.w, r0.w +mad.f32 r1.x, r1.z, r1.z, r1.x +add.f r1.w, c4.z, (neg)r7.x +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.x, r0.z +mad.f32 r0.w, c10.x, r7.x, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.x, r5.w +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.y, r5.w +mul.f r2.z, c8.w, r6.z rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.z -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w +(ss)mov.f32f32 r3.z, r1.x +add.f r3.w, c13.y, (neg)r1.y mad.f32 r0.x, r0.x, r1.x, (neg)c5.x -mad.f32 r0.z, r3.w, r3.w, r2.y -mad.f32 r0.w, r1.y, r1.x, (neg)c5.y -mad.f32 r1.x, r2.x, r1.x, (neg)c5.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mul.f r1.y, r5.z, r5.x -mov.f32f32 r0.w, r0.w -mul.f r2.x, r0.x, r0.x -mov.f32f32 r1.x, r1.x -add.f r1.y, c13.y, (neg)r1.y -mad.f32 r2.x, r0.w, r0.w, r2.x -mad.f32 r2.y, c9.y, r3.x, r2.z -mad.f32 r2.z, c9.x, r3.x, r3.z -mad.f32 r2.y, c10.y, r3.y, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r1.y -mad.f32 r1.y, r1.x, r1.x, r2.x -add.f r2.x, r2.y, c11.y -mad.f32 r2.y, c10.x, r3.y, r2.z -mul.f r5.w, c0.w, r2.w -mul.f r6.x, c0.z, r2.w -mul.f r6.y, c0.y, r2.w -mul.f r6.z, c0.x, r2.w -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mad.f32 r0.z, r3.z, r3.z, r0.z -mul.f r2.x, r2.x, c12.y -add.f r2.y, r2.y, c11.x -mul.f r1.x, r1.x, r1.y -mul.f r0.w, r0.w, r1.y -mul.f r0.x, r0.x, r1.y -mov.f32f32 r1.y, r2.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r3.z, (neg)c5.y +mov.f32f32 r1.z, r3.w +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r3.z, (neg)c5.z +mov.f32f32 r3.z, r0.z +mad.f32 r2.x, r3.w, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r3.z, r1.x +mov.f32f32 r3.w, r1.w +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r6.w, r2.z +mad.f32 r0.z, r1.w, r3.w, r0.z +mad.f32 r1.w, c10.w, r7.x, r2.z +mul.f r2.z, c8.z, r6.z +mul.f r4.x, c0.w, r6.z +mul.f r5.x, c0.z, r6.z +mul.f r5.y, c0.y, r6.z +mul.f r5.z, c0.x, r6.z rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x +(ss)mov.f32f32 r4.z, r0.z +mul.f r4.y, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r4.w, r3.w, r4.z +mul.f r4.z, r3.z, r4.z +(ss)mad.f32 r0.z, c9.z, r6.w, r2.z +mad.f32 r2.y, c1.w, r6.w, r4.x +mad.f32 r5.x, c1.z, r6.w, r5.x +rsq r0.x, r0.x +(ss)mov.f32f32 r3.z, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.x, r0.z +mad.f32 r0.y, c2.w, r7.x, r2.y +mul.f r2.z, r0.w, r3.z +mul.f r2.y, r1.z, r3.z +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r4.x, r2.y +mul.f r7.y, r6.x, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r5.w, r0.x +mul.f r3.z, r6.y, r4.x +mad.f32 r3.w, r6.y, r0.z, (neg)r0.y +mad.f32 r3.z, r6.x, r0.x, (neg)r3.z +mad.f32 r4.x, r5.w, r4.x, (neg)r7.y +mad.f32 r0.x, c2.z, r7.x, r5.x +mad.f32 r0.y, c1.y, r6.w, r5.y +mad.f32 r5.x, c1.x, r6.w, r5.z +mad.f32 r0.y, c2.y, r7.x, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.x, r5.x nop -mov.f32f32 r4.w, r1.x -mov.f32f32 r4.z, r0.w -mov.f32f32 r4.y, r0.x -mul.f r0.x, r0.y, r0.z -mul.f r0.y, r3.z, r0.z -mul.f r0.z, r3.w, r0.z -mul.f r0.w, r2.y, c12.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r0.w -mul.f r0.w, r5.y, r0.x -mul.f r2.x, r5.x, r0.y -mad.f32 r0.w, r5.x, r0.z, (neg)r0.w -mad.f32 r2.x, r5.z, r0.x, (neg)r2.x -mul.f r2.y, r5.z, r0.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, r5.y, r0.y, (neg)r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.w, r2.x -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r4.x, r0.w -mov.f32f32 r2.y, r0.z -mov.f32f32 r3.z, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r3.x, r5.w -mad.f32 r0.z, c1.z, r3.x, r6.x -mad.f32 r0.w, c1.y, r3.x, r6.y -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r3.y, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c2.y, r3.y, r0.w -mad.f32 r0.w, c1.x, r3.x, r6.z -add.f r0.x, r0.x, c3.w -add.f r0.y, r0.y, c3.z -add.f r5.w, r0.z, c3.y -mad.f32 r6.x, c2.x, r3.y, r0.w -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mov.f32f32 r0.y, r5.w -add.f r0.x, r6.x, c3.x -mov.f32f32 r5.w, (0.000000) -mov.f32f32 r6.x, (0.000000) -mov.f32f32 r6.y, (0.000000) -mov.f32f32 r0.x, r0.x -mov.f32f32 r7.w, r5.w -mov.f32f32 r7.z, r6.x -mov.f32f32 r7.y, r6.y -mov.f32f32 r5.w, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r6.x, r2.w -mov.f32f32 r5.z, r5.z -mov.f32f32 r7.x, r5.w -mov.f32f32 r6.w, r3.x -mov.f32f32 r6.z, r6.x -mov.f32f32 r6.y, r5.z -mov.f32f32 r3.x, r5.y -mov.f32f32 r5.x, r5.x -mul.f r5.y, r8.z, c6.z -mul.f r8.y, r8.y, c6.y -mov.f32f32 r6.x, r3.x -mov.f32f32 r5.w, r5.x -mov.f32f32 r5.z, r5.y -mov.f32f32 r5.y, r8.y -mul.f r3.x, r8.x, c6.x -mad.f32 r3.y, c7.x, r3.y, c7.y -mad.f32 r2.w, c7.x, r2.w, c7.y -mov.f32f32 r8.x, c13.z -mov.f32f32 r5.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r2.w -mov.f32f32 r2.w, r8.x +add.f r0.y, r0.y, c3.y +mov.f32f32 r7.w, (0.000000) +add.f r0.x, r0.x, c3.x +mov.f32f32 r7.z, (0.000000) +mov.f32f32 r7.y, (0.000000) +mul.f r5.z, r3.y, c6.z +mul.f r5.y, r3.x, c6.y +mul.f r5.x, r2.w, c6.x +mad.f32 r3.y, c7.x, r7.x, c7.y +mad.f32 r3.x, c7.x, r6.z, c7.y +mov.f32f32 r2.w, c13.z end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) -; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r5.x (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0) -; VERT: 153 instructions, 0 half, 9 full +; VERT: inputs: r6.z (0:0,cm=7,il=8,b=0) r5.w (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) +; VERT: 93 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-54.asm b/reference/0ad-alpine-valley/0ad-54.asm index d4490e5..494f814 100644 --- a/reference/0ad-alpine-valley/0ad-54.asm +++ b/reference/0ad-alpine-valley/0ad-54.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r8.x) in4 -@in(r8.y) in5 -@in(r8.z) in6 -@in(r6.y) in8 -@in(r6.z) in9 -@in(r6.w) in10 -@in(r8.w) in12 -@in(r9.x) in13 +@in(r7.x) in0 +@in(r7.y) in1 +@in(r7.z) in2 +@in(r6.y) in4 +@in(r6.z) in5 +@in(r6.w) in6 +@in(r8.x) in8 +@in(r8.y) in9 +@in(r8.z) in10 +@in(r3.x) in12 +@in(r3.y) in13 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -43,167 +43,101 @@ @out(r7.y) out29 @out(r7.z) out30 @out(r7.w) out31 -(sy)(ss)add.f r0.x, c4.x, (neg)r2.w -mul.f r0.y, r8.x, r8.x -mul.f r0.z, c8.w, r2.w -mul.f r0.w, c8.z, r2.w +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r7.x +mul.f r0.y, r6.y, r6.y +mul.f r0.z, c8.y, r7.x +mul.f r0.w, c8.x, r7.x mul.f r1.x, r0.x, r0.x -add.f r1.y, c4.y, (neg)r3.x +add.f r1.z, c4.y, (neg)r7.y add.f r0.y, c13.x, (neg)r0.y -mad.f32 r0.z, c9.w, r3.x, r0.z -mad.f32 r0.w, c9.z, r3.x, r0.w -mad.f32 r1.x, r1.y, r1.y, r1.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.w, r3.y, r0.z -mad.f32 r0.w, c10.z, r3.y, r0.w -mov.f32f32 r1.x, r1.x -add.f r1.z, c4.z, (neg)r3.y -mul.f r1.w, r0.y, r0.y -mul.f r2.x, r8.y, r8.x -add.f r0.z, r0.z, c11.w +mad.f32 r0.z, c9.y, r7.y, r0.z +mad.f32 r0.w, c9.x, r7.y, r0.w mad.f32 r1.x, r1.z, r1.z, r1.x -add.f r0.w, r0.w, c11.z -mul.f r2.y, c8.y, r2.w -mul.f r2.z, c8.x, r2.w -add.f r2.x, c13.y, (neg)r2.x -mul.f r3.z, r6.w, c6.z -mov.f32f32 r0.z, r0.z +add.f r1.w, c4.z, (neg)r7.z +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.z, r0.z +mad.f32 r0.w, c10.x, r7.z, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.z, r6.y +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.w, r6.y +mul.f r2.z, c8.w, r7.x rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.w, r0.w -mad.f32 r2.y, c9.y, r3.x, r2.y +(ss)mov.f32f32 r2.w, r1.x +add.f r3.z, c13.y, (neg)r1.y mad.f32 r0.x, r0.x, r1.x, (neg)c5.x -mad.f32 r1.w, r2.x, r2.x, r1.w -mad.f32 r1.y, r1.y, r1.x, (neg)c5.y -mad.f32 r1.x, r1.z, r1.x, (neg)c5.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r1.w -mul.f r1.z, r8.z, r8.x -mov.f32f32 r1.y, r1.y -mul.f r1.w, r0.x, r0.x -mov.f32f32 r1.x, r1.x -add.f r4.x, c13.y, (neg)r1.z -mad.f32 r4.y, r1.y, r1.y, r1.w -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w -mad.f32 r0.z, c10.y, r3.y, r2.y -mov.f32f32 r0.w, r4.y -mov.f32f32 r2.y, r4.x -mad.f32 r0.w, r1.x, r1.x, r0.w -add.f r0.z, r0.z, c11.y -mad.f32 r2.z, c9.x, r3.x, r2.z -mov.f32f32 r3.z, r3.z -mad.f32 r2.z, c10.x, r3.y, r2.z -mul.f r5.z, c0.w, r2.w -mul.f r5.w, c0.z, r2.w -rsq r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mad.f32 r3.w, r2.y, r2.y, r3.w -mul.f r0.z, r0.z, c12.y -add.f r2.z, r2.z, c11.x -mul.f r1.x, r1.x, r0.w -mul.f r4.x, r1.y, r0.w -mul.f r0.x, r0.x, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.w, r4.x -mov.f32f32 r0.x, r0.x -rsq r1.x, r3.w -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r2.w, (neg)c5.y +mov.f32f32 r1.z, r3.z +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r2.w, (neg)c5.z +mov.f32f32 r2.w, r0.z +mad.f32 r2.x, r3.z, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r2.w, r1.x +mov.f32f32 r3.z, r1.w +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r7.y, r2.z +mad.f32 r0.z, r1.w, r3.z, r0.z +mad.f32 r1.w, c10.w, r7.z, r2.z +mul.f r2.z, c8.z, r7.x +mul.f r3.w, c0.w, r7.x +mul.f r4.x, c0.z, r7.x +mul.f r5.z, c0.y, r7.x +mul.f r5.w, c0.x, r7.x +rsq r0.z, r0.z +(ss)mov.f32f32 r4.y, r0.z +mul.f r4.w, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r5.y, r3.z, r4.y +mul.f r5.x, r2.w, r4.y +(ss)mad.f32 r0.z, c9.z, r7.y, r2.z +mad.f32 r2.y, c1.w, r7.y, r3.w +mad.f32 r2.w, c1.z, r7.y, r4.x +rsq r0.x, r0.x +(ss)mov.f32f32 r3.z, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.z, r0.z +mad.f32 r0.y, c2.w, r7.z, r2.y +mul.f r2.z, r0.w, r3.z +mul.f r2.y, r1.z, r3.z +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r3.z, r2.y +mul.f r3.w, r6.z, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r6.y, r0.x +mul.f r4.x, r6.w, r3.z +mad.f32 r4.y, r6.w, r0.z, (neg)r0.y +mad.f32 r4.x, r6.z, r0.x, (neg)r4.x +mad.f32 r4.z, r6.y, r3.z, (neg)r3.w +mad.f32 r0.x, c2.z, r7.z, r2.w +mad.f32 r0.y, c1.y, r7.y, r5.z +mad.f32 r2.w, c1.x, r7.y, r5.w +mad.f32 r0.y, c2.y, r7.z, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.z, r2.w nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -(rpt1)nop -mov.f32f32 r5.y, r0.z -mov.f32f32 r5.x, r0.w -mov.f32f32 r4.w, r0.x -mul.f r0.x, r0.y, r1.x -mul.f r0.y, r2.y, r1.x -mul.f r0.z, r2.x, r1.x -mul.f r0.w, r2.z, c12.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r0.w -mul.f r0.w, r8.y, r0.x -mul.f r2.x, r8.x, r0.y -mad.f32 r0.w, r8.x, r0.z, (neg)r0.w -mad.f32 r2.x, r8.z, r0.x, (neg)r2.x -mul.f r2.y, r8.z, r0.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, r8.y, r0.y, (neg)r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.z, r0.w -mov.f32f32 r4.y, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r4.x, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.x, r3.z -mad.f32 r0.y, c1.w, r3.x, r5.z -mad.f32 r0.z, c1.z, r3.x, r5.w -mul.f r0.w, c0.y, r2.w -mov.f32f32 r6.x, r0.x -mad.f32 r0.x, c2.w, r3.y, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c1.y, r3.x, r0.w -mul.f r0.w, c0.x, r2.w -add.f r0.x, r0.x, c3.w -add.f r0.y, r0.y, c3.z -mad.f32 r3.z, c2.y, r3.y, r0.z -(ss)mad.f32 r3.w, c1.x, r3.x, r0.w -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -add.f r0.x, r3.z, c3.y -mad.f32 r3.z, c2.x, r3.y, r3.w -mul.f r3.w, r6.z, c6.y -mul.f r5.z, r6.y, c6.x -mov.f32f32 r0.y, r0.x -add.f r0.x, r3.z, c3.x -mov.f32f32 r3.z, r3.w -mov.f32f32 r3.w, r5.z -mad.f32 r6.y, c7.x, r3.y, c7.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.w, r3.z -mov.f32f32 r5.z, r3.w -mov.f32f32 r3.z, r6.y -mad.f32 r6.y, c7.x, r2.w, c7.y -mov.f32f32 r6.z, (0.000000) -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.z -mov.f32f32 r3.z, r6.y -mov.f32f32 r7.w, r6.z -mov.f32f32 r7.z, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.y, r8.z -mov.f32f32 r7.y, r3.x -mov.f32f32 r3.x, r8.y -mov.f32f32 r7.x, r2.w -mov.f32f32 r6.w, r3.y -mov.f32f32 r2.w, r8.x -mov.f32f32 r6.z, r3.x -mov.f32f32 r3.x, r9.x -mov.f32f32 r8.x, r8.w -mov.f32f32 r6.y, r2.w +add.f r0.y, r0.y, c3.y +mov.f32f32 r7.w, (0.000000) +add.f r0.x, r0.x, c3.x +mul.f r6.x, r8.z, c6.z +mul.f r5.w, r8.y, c6.y +mul.f r5.z, r8.x, c6.x +mad.f32 r3.w, c7.x, r7.z, c7.y +mad.f32 r3.z, c7.x, r7.x, c7.y mov.f32f32 r2.w, c13.z -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r8.x -nop -mov.f32f32 r2.w, r2.w end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) -; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r8.x (0:0,cm=7,il=12,b=0) r6.y (0:0,cm=7,il=16,b=0) r8.w (0:0,cm=3,il=20,b=0) -; VERT: 160 instructions, 0 half, 10 full +; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=3,il=20,b=0) +; VERT: 91 instructions, 0 half, 9 full diff --git a/reference/0ad-alpine-valley/0ad-55.asm b/reference/0ad-alpine-valley/0ad-55.asm index ece3ada..4760742 100644 --- a/reference/0ad-alpine-valley/0ad-55.asm +++ b/reference/0ad-alpine-valley/0ad-55.asm @@ -8,6 +8,11 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097 +@const(c15.x) 0x3cff9724, 0x40000000, 0xbf800000, 0xba03126f +@const(c16.x) 0xbf000000, 0x3f800000, 0x3fb8aa65, 0x3de38866 +@const(c17.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 21, r1.x bary.f r0.y, 0, r1.x add.f r0.w, r0.w, c14.y @@ -16,531 +21,347 @@ mul.f r1.w, r0.x, r0.x bary.f r2.x, 22, r1.x add.f r2.y, r0.y, c16.x add.f r2.z, r1.z, c16.x -bary.f r2.w, 8, r1.x +bary.f r2.w, 10, r1.x mad.f32 r1.w, r2.x, r2.x, r1.w -floor.f r3.x, r2.y +bary.f r3.x, 23, r1.x +floor.f r3.y, r2.y rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.y, r2.z -mov.f32f32 r1.w, r1.w -bary.f r3.z, 23, r1.x -add.f r2.y, r2.y, (neg)r3.x +floor.f r3.z, r2.z +mad.f32 r1.w, r3.x, r3.x, r1.w +add.f r2.y, r2.y, (neg)r3.y (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.z, (neg)r3.y -mad.f32 r1.w, r3.z, r3.z, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -absneg.f r2.z, (neg)c11.x -mov.f32f32 r0.w, r0.w -mul.f r3.x, c14.x, r2.y -mov.f32f32 r2.w, r2.w -rsq r3.y, r1.w -(ss)mul.f r3.w, r0.x, r3.y +(ss)absneg.f r0.w, (neg)c11.x +add.f r2.z, r2.z, (neg)r3.z +mov.f32f32 r3.y, r2.y +add.f r2.y, r2.y, c14.z +rsq r3.z, r1.w +(ss)mul.f r3.w, r0.x, r3.z rsq r4.x, r1.w -(ss)mov.f32f32 r4.x, r4.x -mov.f32f32 r3.x, r3.x +(ss)mov.f32f32 r4.y, r4.x (ss)rsq r1.w, r1.w -(ss)mul.f r4.y, r0.x, r1.w -mov.f32f32 r3.w, r3.w -mul.f r4.z, r0.x, r4.x -add.f r0.y, r0.y, (neg)r3.x -mov.f32f32 r3.x, r4.y +(ss)mul.f r4.z, r0.x, r1.w +mul.f r4.w, c14.x, r3.y absneg.f r3.w, (abs)r3.w -mov.f32f32 r4.y, r4.z -mov.f32f32 r0.y, r0.y -absneg.f r3.x, (abs)r3.x -mov.f32f32 r3.w, r3.w -absneg.f r4.y, (abs)r4.y -add.f r4.z, c16.x, r0.y -mov.f32f32 r3.x, r3.x +mul.f r5.x, r0.x, r4.y +absneg.f r4.z, (abs)r4.z +add.f r0.y, r0.y, (neg)r4.w add.f r3.w, r3.w, c14.w -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.z, r4.z -add.f r3.x, r3.x, c14.w -mov.f32f32 r3.w, r3.w -add.f r4.y, r4.y, c14.w -mul.f r4.z, r4.z, c6.z -mov.f32f32 r3.x, r3.x +absneg.f r4.w, (abs)r5.x +add.f r4.z, r4.z, c14.w +mov.f32f32 r5.x, r0.y max.f r3.w, r3.w, c14.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.w, r4.z -max.f r3.x, r3.x, c14.y -mov.f32f32 r3.w, r3.w -mul.f r5.x, r2.x, r3.y -max.f r4.y, r4.y, c14.y -mov.f32f32 r5.y, r4.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.w, r5.x -mov.f32f32 r4.y, r4.y -mul.f r5.x, r2.x, r4.x -mul.f r5.z, c14.x, r0.w -absneg.f r4.w, (abs)r4.w -mul.f r5.w, r2.x, r1.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.w, r5.w -absneg.f r5.x, (abs)r5.x -add.f r1.z, r1.z, (neg)r5.z +mul.f r5.y, r2.x, r3.z add.f r4.w, r4.w, c14.w -absneg.f r5.z, (abs)r5.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.z, r5.z -add.f r5.x, r5.x, c14.w -add.f r5.w, c16.x, r1.z +max.f r4.z, r4.z, c14.y +mul.f r5.z, r2.x, r1.w +absneg.f r5.y, (abs)r5.y max.f r4.w, r4.w, c14.y +mul.f r4.y, r2.x, r4.y +absneg.f r5.z, (abs)r5.z +add.f r5.y, r5.y, c14.w +add.f r5.x, c16.x, r5.x +absneg.f r4.y, (abs)r4.y +mul.f r0.w, r0.w, c11.x +max.f r5.y, r5.y, c14.y add.f r5.z, r5.z, c14.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.z, r5.z -max.f r5.x, r5.x, c14.y -mul.f r6.x, r5.w, c6.w -add.f r6.y, r3.w, r4.w -mul.f r3.y, r3.z, r3.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.w, r6.x -max.f r6.z, r5.z, c14.y -mov.f32f32 r3.y, r3.y -add.f r6.w, r4.y, r5.x -mul.f r4.x, r3.z, r4.x -mov.f32f32 r5.z, r5.w -absneg.f r3.y, (abs)r3.y -bary.f r5.w, 2, r1.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r3.y, r3.y -add.f r7.x, r5.w, c15.w +add.f r4.y, r4.y, c14.w +mul.f r5.w, r5.x, c6.z +add.f r5.x, r3.w, r5.y +mul.f r3.z, r3.x, r3.z +max.f r4.y, r4.y, c14.y +max.f r5.z, r5.z, c14.y +mov.f32f32 r6.x, r5.w +absneg.f r3.z, (abs)r3.z +add.f r6.y, r4.w, r4.y +mul.f r4.x, r3.x, r4.x +add.f r6.z, r4.z, r5.z +add.f r3.z, r3.z, c14.w +(ss)mul.f r1.w, r3.x, r1.w absneg.f r4.x, (abs)r4.x -add.f r7.y, r3.x, r6.z -add.f r3.y, r3.y, c14.w -mov.f32f32 r5.w, r7.x -mov.f32f32 r4.x, r4.x -(ss)mul.f r1.w, r3.z, r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r5.w, r5.w -add.f r4.x, r4.x, c14.w -mov.f32f32 r1.w, r1.w -max.f r3.y, r3.y, c14.y -mul.f r2.z, r2.z, c11.x -mov.f32f32 r4.x, r4.x +mov.f32f32 r6.w, r2.z +max.f r3.z, r3.z, c14.y absneg.f r1.w, (abs)r1.w -mov.f32f32 r3.y, r3.y -sam.s (f32)(x)r7.z, r5.y, s#5, t#5 -(sy)(ss)mov.f32f32 r5.y, r7.z -max.f r4.x, r4.x, c14.y -mov.f32f32 r1.w, r1.w -add.f r5.z, r6.y, r3.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r4.x, r4.x +add.f r4.x, r4.x, c14.w +mul.f r7.x, c14.x, r6.w +mov.f32f32 r7.y, r3.z add.f r1.w, r1.w, c14.w -mov.f32f32 r5.z, r5.z -add.f r5.w, c15.y, (neg)r2.y -add.f r6.y, r6.w, r4.x -mov.f32f32 r1.w, r1.w -mul.f r2.z, r2.z, r0.z -mov.f32f32 r5.w, r5.w -add.f r6.w, c15.y, (neg)r0.w -rcp r7.z, r5.z -(ss)mov.f32f32 r7.z, r7.z -mov.f32f32 r6.y, r6.y +max.f r4.x, r4.x, c14.y +add.f r1.z, r1.z, (neg)r7.x +add.f r5.x, r5.x, r7.y max.f r1.w, r1.w, c14.y -mov.f32f32 r6.w, r6.w -mul.f r3.w, r3.w, r7.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.w, r1.w -mul.f r7.z, r5.w, r6.w -mov.f32f32 r3.w, r3.w -bary.f r7.w, 25, r1.x -rcp r8.x, r6.y -(ss)mov.f32f32 r8.x, r8.x -mul.f r5.y, r7.z, r5.y -add.f r0.y, c16.y, r0.y -mul.f r7.z, r7.w, c15.x -mul.f r4.y, r4.y, r8.x -add.f r7.y, r7.y, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r7.w, r7.z -mov.f32f32 r4.y, r4.y -mov.f32f32 r8.x, r7.z -mul.f r0.y, r0.y, c6.z -mov.f32f32 r8.y, r7.w -bary.f r7.w, 26, r1.x -mov.f32f32 r8.x, r8.x -mov.f32f32 r8.z, r0.y -mov.f32f32 r7.y, r7.y -mul.f r7.w, r7.w, c15.x -mov.f32f32 r8.w, r8.x -mov.f32f32 r9.y, r8.z -mov.f32f32 r6.x, r6.x -mov.f32f32 r8.x, r7.w -mov.f32f32 r9.x, r7.w -rcp r8.z, r7.y -(ss)mov.f32f32 r9.w, r8.z -mov.f32f32 r9.z, r6.x -mov.f32f32 r8.z, r8.x -mov.f32f32 r6.x, r9.x -mov.f32f32 r8.x, r7.x -mul.f r3.x, r3.x, r9.w -mul.f r0.z, r2.z, r0.z -rcp r2.z, r6.y -nop -(ss)rcp r6.y, r6.y -mov.f32f32 r9.x, r6.x -sam (f32)(xyzw)r9.w, r8.y, s#3, t#3 -(sy)mov.f32f32 r6.x, r9.w -(ss)add.f r8.y, c14.z, (neg)r10.x -mov.f32f32 r8.z, r10.y -mov.f32f32 r9.w, r8.x -mul.f r6.x, r6.x, r3.w -rcp r8.x, r5.z -(ss)mov.f32f32 r8.x, r8.x -sam (f32)(xyzw)r10.x, r8.w, s#4, t#4 -(sy)(ss)mul.f r8.w, r10.y, r4.y -mul.f r9.x, r10.x, r4.y -mul.f r4.y, r10.z, r4.y -mul.f r4.w, r4.w, r8.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r8.x, r8.y -mul.f r8.y, r8.z, r3.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r8.z, r7.w -mul.f r2.z, r5.x, r2.z -mov.f32f32 r5.x, r8.x -sam.s (f32)(x)r9.y, r9.y, s#5, t#5 -(sy)mov.f32f32 r8.x, r9.y -(ss)mov.f32f32 r9.y, r8.z -bary.f r8.z, 24, r1.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r9.z, r7.w -mul.f r3.w, r5.x, r3.w -mul.f r5.x, r8.z, c15.x -add.f r2.y, r2.y, c14.z -mov.f32f32 r9.w, r9.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r8.z, r5.x -mov.f32f32 r10.x, r5.x -mul.f r6.w, r2.y, r6.w -mov.f32f32 r10.y, r7.z -mov.f32f32 r9.z, r8.z -mov.f32f32 r10.x, r10.x -mad.f32 r5.y, r6.w, r8.x, r5.y -mov.f32f32 r10.y, r10.y -mov.f32f32 r6.w, r7.w +mov.f32f32 r7.x, r4.x +mov.f32f32 r7.y, r1.z +mov.f32f32 r7.z, r5.x +mov.f32f32 r7.w, r1.w +add.f r7.x, r6.y, r7.x +add.f r6.y, c16.x, r7.y +mul.f r0.w, r0.w, r0.z +add.f r7.y, r6.z, r7.w mov.f32f32 r0.z, r0.z -mov.f32f32 r5.y, r5.y -sam (f32)(xyzw)r10.z, r9.y, s#3, t#3 -(sy)mov.f32f32 r8.x, r10.w -(ss)nop -sam (f32)(xyzw)r9.y, r9.w, s#4, t#4 -(sy)mad.f32 r8.z, r9.z, r2.z, r8.w -mad.f32 r8.w, r9.y, r2.z, r9.x -mad.f32 r2.z, r9.w, r2.z, r4.y -mad.f32 r4.y, r8.x, r4.w, r6.x -rcp r5.z, r5.z -(ss)mov.f32f32 r5.z, r5.z -mov.f32f32 r6.x, r6.y -add.f r6.y, c14.z, (neg)r10.z -mov.f32f32 r8.x, r11.x -mul.f r3.y, r3.y, r5.z -mul.f r4.x, r4.x, r6.x -mov.f32f32 r5.z, r6.y -mad.f32 r6.x, r8.x, r4.w, r8.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.y, r7.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r8.x, r7.z -mov.f32f32 r5.z, r5.z -mov.f32f32 r9.x, r6.y -mov.f32f32 r6.y, r5.x -mov.f32f32 r8.x, r8.x -mov.f32f32 r8.y, r5.x -mad.f32 r3.w, r5.z, r4.w, r3.w -mov.f32f32 r9.y, r6.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r8.y, r8.y -mov.f32f32 r10.z, r6.w -mul.f r0.z, r0.z, c16.z -mov.f32f32 r9.z, r4.z -add.f r1.z, c16.y, r1.z -sam (f32)(xyzw)r10.w, r9.x, s#3, t#3 -(sy)mov.f32f32 r4.z, r11.x -add.f r4.w, c14.z, (neg)r10.w -mov.f32f32 r5.z, r11.y -sam (f32)(xyzw)r10.w, r8.x, s#4, t#4 -(sy)mad.f32 r2.z, r11.y, r4.x, r2.z -mad.f32 r4.y, r4.z, r3.y, r4.y -mad.f32 r4.z, r11.x, r4.x, r8.z -mad.f32 r4.x, r10.w, r4.x, r8.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.x, r4.x -mul.f r2.z, c8.z, r2.z -mad.f32 r4.y, c15.y, r4.y, c15.z -mul.f r4.z, c8.y, r4.z -mul.f r4.x, c8.x, r4.x -mov.f32f32 r4.w, r4.w -mov.f32f32 r4.y, r4.y -bary.f r6.y, 4, r1.x -bary.f r6.w, 5, r1.x -(ss)bary.f r8.x, 6, r1.x -mov.f32f32 r2.z, r2.z -mul.f r6.y, r6.y, r4.y +rcp r6.z, r7.z +mov.f32f32 r3.w, r3.w +mov.f32f32 r7.w, r7.x +mov.f32f32 r8.x, r7.y +mul.f r8.z, r6.y, c6.w +(ss)mul.f r3.w, r3.w, r6.z +bary.f r6.z, 25, r1.x +mul.f r0.z, r0.w, r0.z +mov.f32f32 r6.y, r8.z +mov.f32f32 r0.w, r3.w +mul.f r8.w, r6.z, c15.x +rcp r6.z, r7.w mov.f32f32 r4.w, r4.w -mul.f r6.w, r6.w, r4.y -mul.f r4.y, r8.x, r4.y +rcp r8.y, r8.x mov.f32f32 r4.z, r4.z -mad.f32 r3.w, r4.w, r3.y, r3.w -mov.f32f32 r4.x, r4.x -mad.f32 r3.y, r5.z, r3.y, r6.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r3.w -sam (f32)(xyzw)r8.x, r10.y, s#0, t#0 -(sy)mul.f r4.w, r8.x, r3.x -mov.f32f32 r3.y, r3.y -mul.f r1.z, r1.z, c6.w -mad.f32 r3.w, c15.y, r3.w, c15.z -mul.f r5.z, r8.z, r3.x -mul.f r3.x, r8.y, r3.x -rcp r6.x, r7.y -mad.f32 r3.y, c15.y, r3.y, c15.z -mov.f32f32 r3.w, r3.w -bary.f r8.x, 12, r1.x -bary.f r8.y, 7, r1.x -bary.f r8.z, 13, r1.x -bary.f r8.w, 14, r1.x -mov.f32f32 r3.y, r3.y -mul.f r8.x, r8.x, (neg)r8.y -mul.f r8.z, r8.z, (neg)r8.y -mul.f r8.y, r8.w, (neg)r8.y -mov.f32f32 r8.w, r1.z -mad.f32 r6.y, r8.x, r3.w, r6.y -mad.f32 r6.w, r8.z, r3.w, r6.w -mad.f32 r3.w, r8.y, r3.w, r4.y +bary.f r9.x, 2, r1.x +mov.f32f32 r9.y, r8.w +bary.f r9.z, 26, r1.x +(ss)mul.f r4.w, r4.w, r6.z +mul.f r4.z, r4.z, r8.y mov.f32f32 r9.w, r8.w -mov.f32f32 r4.y, r6.y -mov.f32f32 r6.y, r6.w -mad.f32 r0.x, r0.x, r3.y, r4.y -mad.f32 r2.x, r2.x, r3.y, r6.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, r7.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mad.f32 r3.y, r3.z, r3.y, r3.w -nop -mul.f r3.z, r0.x, r0.x -mov.f32f32 r10.x, r4.y -mad.f32 r3.z, r2.x, r2.x, r3.z -mov.f32f32 r3.y, r3.y -(ss)mov.f32f32 r3.w, r6.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r3.z -rcp r4.y, r7.y -(ss)mov.f32f32 r4.y, r4.y -mad.f32 r3.z, r3.y, r3.y, r3.z -sam.s (f32)(x)r8.x, r9.z, s#5, t#5 -mul.f r3.w, r6.z, r3.w -(sy)mov.f32f32 r6.x, r8.x -add.f r0.w, r0.w, c14.z -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r6.y, r7.w -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -mul.f r5.w, r5.w, r0.w -add.f r6.z, c18.y, (neg)r0.z -mul.f r1.w, r1.w, r4.y -mul.f r0.x, r0.x, r3.z -mul.f r2.x, r2.x, r3.z -mul.f r3.y, r3.y, r3.z +mul.f r10.z, r9.z, c15.x +mov.f32f32 r6.z, r4.w +mov.f32f32 r8.y, r4.z +mov.f32f32 r10.y, r8.w +mov.f32f32 r9.z, r10.z +mov.f32f32 r10.x, r10.z +add.f r11.z, r9.x, c15.w +mul.f r0.z, r0.z, c16.z +(ss)rcp r7.w, r7.w nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.y -mad.f32 r3.z, r5.w, r6.x, r5.y -mul.f r4.y, r0.x, r0.x -mul.f r5.y, (neg)c9.x, r0.x -mad.f32 r4.y, r2.x, r2.x, r4.y -mad.f32 r5.y, (neg)c9.y, r2.x, r5.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r5.w, r6.y +rcp r7.x, r7.x mov.f32f32 r4.y, r4.y +sam (f32)(xyzw)r11.w, r10.y, s#0, t#0 +(sy)(ss)mul.f r10.y, r12.y, r8.y +sam (f32)(xyzw)r12.y, r9.y, s#3, t#3 +(sy)(ss)mul.f r9.y, r12.y, r0.w +rcp r7.z, r7.z mov.f32f32 r5.y, r5.y -mad.f32 r4.y, r3.y, r3.y, r4.y -mad.f32 r5.y, (neg)c9.z, r3.y, r5.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r5.x -mul.f r6.y, r6.z, c11.y +sam (f32)(xyzw)r13.x, r9.w, s#4, t#4 +(sy)mul.f r9.z, r13.y, r6.z +add.f r9.x, c14.z, (neg)r12.z +(ss)mul.f r9.w, r13.z, r6.z +(ss)mul.f r5.y, r5.y, r7.z +mul.f r4.y, r4.y, r7.w +mul.f r0.w, r9.x, r0.w +rcp r6.z, r8.x +mov.f32f32 r5.z, r5.z +mov.f32f32 r7.z, r5.y +mov.f32f32 r7.w, r10.z +(ss)bary.f r8.x, 24, r1.x +mov.f32f32 r10.x, r4.y +mov.f32f32 r11.x, r10.z +(ss)mul.f r5.z, r5.z, r6.z +mul.f r9.x, r8.x, c15.x +mul.f r8.y, r12.x, r8.y +mul.f r4.w, r13.x, r4.w +mul.f r3.w, r12.w, r3.w +mov.f32f32 r8.x, r9.x +mov.f32f32 r11.y, r9.x +mov.f32f32 r12.x, r5.z +mov.f32f32 r10.w, r9.x +mov.f32f32 r6.z, r11.z +mul.f r4.z, r11.w, r4.z +exp2 r0.z, r0.z +(ss)mov.f32f32 r11.w, r0.z +sam (f32)(xyzw)r12.y, r7.w, s#3, t#3 +(sy)(ss)mad.f32 r7.w, r12.z, r7.z, r9.y +rcp r5.x, r5.x +(ss)mul.f r3.z, r3.z, r5.x +sam (f32)(xyzw)r13.x, r11.x, s#4, t#4 +(sy)(ss)mad.f32 r5.x, r13.y, r10.x, r9.z +add.f r8.x, c14.z, (neg)r12.y +mad.f32 r9.y, r13.z, r10.x, r9.w +mov.f32f32 r9.z, r3.z +mov.f32f32 r9.w, r8.w +mov.f32f32 r10.x, r9.x +mul.f r4.x, r4.x, r7.x +mad.f32 r0.w, r8.x, r7.z, r0.w +sam (f32)(xyzw)r10.z, r10.z, s#0, t#0 +(sy)mad.f32 r7.x, r11.x, r12.x, r10.y +rcp r7.y, r7.y +(ss)mul.f r1.w, r1.w, r7.y +(ss)mov.f32f32 r7.y, r4.x +mad.f32 r7.z, r10.w, r12.x, r8.y +sam (f32)(xyzw)r13.y, r9.w, s#3, t#3 +(sy)mad.f32 r7.w, r13.z, r9.z, r7.w +mov.f32f32 r8.x, r8.w +mov.f32f32 r8.y, r9.x +(ss)add.f r9.w, c14.z, (neg)r13.y +mad.f32 r7.w, c15.y, r7.w, c15.z +bary.f r10.x, 6, r1.x +mov.f32f32 r10.y, r1.w +mad.f32 r0.w, r9.w, r9.z, r0.w +mov.f32f32 r9.z, r7.w +bary.f r9.w, 4, r1.x +bary.f r10.w, 5, r1.x +mul.f r7.w, r10.x, r7.w +mad.f32 r0.w, c15.y, r0.w, c15.z +mul.f r9.w, r9.w, r9.z +mul.f r9.z, r10.w, r9.z +bary.f r10.x, 14, r1.x +mov.f32f32 r10.w, r0.w +bary.f r11.x, 12, r1.x +bary.f r11.y, 7, r1.x +bary.f r12.x, 13, r1.x +sam (f32)(xyzw)r14.x, r8.x, s#4, t#4 +(sy)mad.f32 r5.x, r14.y, r7.y, r5.x +mad.f32 r7.y, r14.z, r7.y, r9.y +(ss)mul.f r8.x, r11.x, (neg)r11.y +mul.f r8.y, r12.x, (neg)r11.y +mul.f r9.y, r10.x, (neg)r11.y +mul.f r7.y, c8.z, r7.y +mad.f32 r8.x, r8.x, r10.w, r9.w +mad.f32 r3.w, r12.w, r5.y, r3.w +mad.f32 r5.y, r8.y, r10.w, r9.z +mad.f32 r3.z, r13.w, r3.z, r3.w +mad.f32 r0.w, r9.y, r0.w, r7.w +mul.f r3.w, c8.y, r5.x +sam (f32)(xyzw)r8.w, r8.w, s#0, t#0 +(sy)mad.f32 r5.x, r9.y, r10.y, r7.x +mad.f32 r3.z, c15.y, r3.z, c15.z +mad.f32 r4.y, r13.x, r4.y, r4.w +mad.f32 r4.w, r9.x, r10.y, r7.z +mov.f32f32 r7.x, r5.x +mov.f32f32 r7.z, r3.z +mad.f32 r0.w, r3.x, r3.z, r0.w +(rpt1)nop +mad.f32 r0.x, r0.x, r7.z, r8.x +mad.f32 r2.x, r2.x, r7.z, r5.y +(rpt1)nop +mov.f32f32 r3.x, r0.x +mov.f32f32 r3.z, r2.x +mov.f32f32 r5.y, r0.w +mov.f32f32 r7.z, r4.w +mul.f r0.x, r0.x, r3.x +mad.f32 r4.x, r14.x, r4.x, r4.y +mad.f32 r0.x, r2.x, r3.z, r0.x +mad.f32 r2.x, r10.z, r5.z, r4.z +mad.f32 r0.x, r5.y, r5.y, r0.x +mul.f r4.x, c8.x, r4.x +add.f r4.y, c18.y, (neg)r11.w +mad.f32 r1.w, r8.w, r1.w, r2.x +(ss)nop +sam.s (f32)(x)r8.w, r6.x, s#5, t#5 +add.f r2.x, c15.y, (neg)r3.y +add.f r0.y, c16.y, r0.y +add.f r1.z, c16.y, r1.z +rsq r0.x, r0.x +(ss)mov.f32f32 r3.y, r0.x +(ss)mul.f r0.x, r0.w, r0.x +mov.f32f32 r0.w, r1.w +mov.f32f32 r4.z, r2.x +mul.f r3.x, r3.x, r3.y +mul.f r3.y, r3.z, r3.y +mov.f32f32 r3.z, r0.x +add.f r5.y, c15.y, (neg)r6.w +mov.f32f32 r5.z, r3.x +mul.f r3.x, (neg)c9.x, r3.x +mov.f32f32 r6.x, r3.y +mov.f32f32 r6.y, r5.y +mul.f r6.z, r5.z, r5.z +mad.f32 r3.x, (neg)c9.y, r3.y, r3.x +mad.f32 r3.y, r6.x, r6.x, r6.z +mad.f32 r0.x, (neg)c9.z, r0.x, r3.x +mad.f32 r3.x, r3.z, r3.z, r3.y +mul.f r3.y, r4.z, r6.y +mul.f r4.y, r4.y, c11.y mul.f r0.z, r0.z, c14.z -mov.f32f32 r0.y, r0.y -rsq r4.y, r4.y -(ss)mov.f32f32 r4.y, r4.y -max.f r5.y, r5.y, c14.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r7.w, r0.y -mul.f r0.x, r0.x, r4.y -mov.f32f32 r0.y, r5.y -bary.f r5.y, 19, r1.x -bary.f r6.z, 18, r1.x -mov.f32f32 r0.x, r0.x -bary.f r6.w, 15, r1.x -bary.f r7.y, 20, r1.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.z, r6.z -mov.f32f32 r6.w, r6.w -mov.f32f32 r7.y, r7.y -mad.f32 r8.x, c8.y, r0.y, (neg)r5.y -mad.f32 r8.y, c8.x, r0.y, (neg)r6.z -mul.f r0.x, r0.x, r6.w -mul.f r2.x, r2.x, r4.y -mad.f32 r0.y, c8.z, r0.y, (neg)r7.y -mov.f32f32 r6.w, r8.x -mov.f32f32 r8.x, r8.y -mov.f32f32 r2.x, r2.x -bary.f r8.y, 16, r1.x -mov.f32f32 r0.y, r0.y -mad.f32 r5.y, c12.x, r6.w, r5.y -mad.f32 r6.z, c12.x, r8.x, r6.z -mov.f32f32 r6.w, r8.y -mad.f32 r0.y, c12.x, r0.y, r7.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.z, r6.z -mad.f32 r0.x, r2.x, r6.w, r0.x -mov.f32f32 r0.y, r0.y -mul.f r2.x, r3.y, r4.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -bary.f r3.y, 17, r1.x -sam (f32)(xyzw)r8.x, r5.w, s#0, t#0 -(sy)mad.f32 r4.y, r8.x, r3.w, r4.w -mov.f32f32 r1.z, r1.z -mad.f32 r4.w, r8.z, r3.w, r5.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -mad.f32 r3.x, r8.y, r3.w, r3.x -mov.f32f32 r3.w, r7.z -mad.f32 r0.x, r2.x, r3.y, r0.x -mov.f32f32 r8.x, r1.z -mov.f32f32 r1.z, r7.x -mov.f32f32 r5.z, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r5.x -mov.f32f32 r8.y, r1.z -add.f r0.z, r0.z, r6.y -max.f r0.x, c14.y, r0.x -(ss)mov.f32f32 r5.w, r2.x -mov.f32f32 r6.x, r2.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -sam.s (f32)(x)r6.w, r7.w, s#5, t#5 -(sy)mov.f32f32 r1.z, r6.w -mul.f r0.w, r2.y, r0.w -mov.f32f32 r0.z, r0.z -bary.f r2.x, 9, r1.x -sam (f32)(xyzw)r6.w, r5.z, s#0, t#0 -(sy)mad.f32 r2.y, r7.x, r1.w, r3.x -mad.f32 r0.w, r0.w, r1.z, r3.z -log2 r0.x, r0.x -(ss)mul.f r0.x, c12.y, r0.x -mad.f32 r1.z, r7.y, r1.w, r4.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.w, r6.w, r1.w, r4.y -mul.f r0.w, c16.w, r0.w +mul.f r11.x, r0.y, c6.z +mul.f r11.y, r1.z, c6.w +mul.f r0.y, r2.y, r5.y +rsq r1.z, r3.x +(ss)mov.f32f32 r3.x, r1.z +max.f r0.x, r0.x, c14.y +mul.f r1.z, r3.z, r1.z +bary.f r3.z, 18, r1.x +mul.f r4.z, r5.z, r3.x +bary.f r5.y, 15, r1.x +mov.f32f32 r5.z, r0.x +bary.f r6.y, 19, r1.x +mul.f r3.x, r6.x, r3.x +mul.f r4.z, r4.z, r5.y +bary.f r5.y, 16, r1.x +bary.f r6.x, 20, r1.x +mad.f32 r6.z, c8.y, r5.z, (neg)r6.y +mad.f32 r5.z, c8.z, r5.z, (neg)r6.x +mad.f32 r3.x, r3.x, r5.y, r4.z +bary.f r4.z, 17, r1.x +mov.f32f32 r5.y, r6.x +mov.f32f32 r6.x, r6.y +mad.f32 r0.x, c8.x, r0.x, (neg)r3.z +mad.f32 r1.z, r1.z, r4.z, r3.x +mad.f32 r3.x, c12.x, r5.z, r5.y +mad.f32 r4.z, c12.x, r6.z, r6.x +mov.f32f32 r3.z, r3.z +max.f r1.z, c14.y, r1.z +mad.f32 r0.x, c12.x, r0.x, r3.z +(sy)mul.f r3.y, r3.y, r8.w +add.f r0.z, r0.z, r4.y +mov.f32f32 r8.y, r11.x +mov.f32f32 r8.w, r11.z +mov.f32f32 r6.x, r11.y +log2 r1.z, r1.z +(ss)mul.f r1.z, c12.y, r1.z max.f r0.z, r0.z, c14.y -mov.f32f32 r2.x, r2.x -bary.f r2.w, 10, r1.x -exp2 r0.x, r0.x -(ss)mul.f r2.z, r2.z, r0.x -mul.f r3.x, r4.z, r0.x -mad.f32 r0.y, r1.z, r0.y, r2.z -mad.f32 r2.z, r2.y, r5.y, r3.x -(ss)mul.f r0.x, r4.x, r0.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mad.f32 r0.x, r1.w, r6.z, r0.x +mov.f32f32 r6.y, r11.z +(rpt1)nop +sam.s (f32)(x)r7.w, r8.y, s#5, t#5 min.f r0.z, r0.z, c14.z -mul.f r0.y, r0.y, r0.w -mul.f r2.z, r2.z, r0.w -mov.f32f32 r0.x, r0.x +(sy)mad.f32 r0.y, r0.y, r7.w, r3.y +exp2 r1.z, r1.z +(ss)mul.f r3.y, r7.y, r1.z +mul.f r3.z, r3.w, r1.z +mad.f32 r3.y, r7.x, r3.x, r3.y +sam.s (f32)(x)r5.y, r5.w, s#5, t#5 +add.f r2.z, r2.z, c14.z +mad.f32 r3.z, r7.z, r4.z, r3.z +(ss)mul.f r1.z, r4.x, r1.z add.f r3.x, c18.y, (neg)r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -mad.f32 r0.y, c7.z, r1.z, r0.y -mad.f32 r1.z, c7.y, r2.y, r2.z -mul.f r0.x, r0.x, r0.w -mul.f r0.w, r3.x, c10.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r0.x -add.f r2.y, c18.y, (neg)r0.z -mul.f r0.y, r0.z, r0.y -mul.f r1.z, r0.z, r1.z -mad.f32 r0.x, c7.x, r1.w, r0.x -mul.f r1.w, r2.y, c10.y -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.x, r0.x -add.f r1.z, r1.z, r1.w -add.f r1.w, c18.y, (neg)r0.z -mov.f32f32 r2.y, r0.w -bary.f (ei)r0.w, 11, r1.x -mul.f r0.x, r0.z, r0.x -mul.f r0.z, r1.w, c10.x -mov.f32f32 r6.y, r2.x -mov.f32f32 r0.w, r0.w +mul.f r2.x, r2.x, r2.z +mad.f32 r0.x, r0.w, r0.x, r1.z +add.f r0.w, c18.y, (neg)r0.z +add.f r1.z, c18.y, (neg)r0.z +(sy)mad.f32 r0.y, r2.x, r5.y, r0.y +sam.s (f32)(x)r3.w, r11.x, s#5, t#5 +mul.f r2.x, r2.y, r2.z +mul.f r2.y, r3.x, c10.z +mul.f r0.w, r0.w, c10.y +mul.f r1.z, r1.z, c10.x +(sy)mad.f32 r0.y, r2.x, r3.w, r0.y +bary.f r3.x, 11, r1.x +bary.f r3.w, 8, r1.x +bary.f (ei)r4.x, 9, r1.x +mul.f r0.y, c16.w, r0.y mov.f32f32 r1.x, c14.y (rpt1)nop -mov.f32f32 r2.z, r0.w -add.f r0.x, r0.x, r0.z -sam (f32)(w)r2.w, r6.x, s#1, t#1 -(sy)add.f r0.z, c14.z, (neg)r3.z -(rpt3)nop -sam (f32)(w)r1.w, r2.y, s#2, t#2 -(sy)cmps.f.lt r0.w, r2.z, c17.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r2.z -nop -cov.u32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -nop -cmps.f.ne r0.w, r0.w, c14.y -mov.f32f32 r0.z, r0.z -(rpt1)nop -sel.b32 r0.w, r1.x, r0.w, r1.y -mov.f32f32 r2.w, r0.z -(rpt1)nop -mul.f r0.y, r0.y, r0.w -mul.f r0.z, r1.z, r0.w -mul.f r0.x, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -(ss)mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.x +mov.f32f32 r1.y, r0.y +mul.f r0.x, r0.x, r0.y +sam (f32)(w)r5.y, r2.w, s#2, t#2 +(sy)cmps.f.lt r0.y, r6.x, c17.x +sam (f32)(w)r3.w, r3.w, s#1, t#1 +mad.f32 r0.x, c7.x, r1.w, r0.x +mul.f r1.w, r3.y, r1.y +mul.f r1.y, r3.z, r1.y +mad.f32 r1.w, c7.z, r5.x, r1.w +mad.f32 r1.y, c7.y, r4.w, r1.y +mul.f r0.x, r0.z, r0.x +cov.u32f32 r0.y, r0.y +mul.f r1.w, r0.z, r1.w +mul.f r0.z, r0.z, r1.y +add.f r0.x, r0.x, r1.z +cmps.f.ne r0.y, r0.y, c14.y +add.f r1.y, r1.w, r2.y +add.f r0.z, r0.z, r0.w +(sy)(ss)add.f r2.w, c14.z, (neg)r4.z +sel.b32 r0.y, r1.x, r0.y, r6.x +(rpt2)nop +mul.f r2.z, r1.y, r0.y +mul.f r2.y, r0.z, r0.y +mul.f r2.x, r0.x, r0.y end -nop -nop -nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r7.z (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1) r6.x (5:12,cm=f,il=20,b=1) r4.z (5:13,cm=f,il=24,b=1) r2.w (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 535 instructions, 0 half, 12 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r10.z (5:10,cm=f,il=12,b=1) r2.y (5:11,cm=f,il=16,b=1) r4.z (5:12,cm=f,il=20,b=1) r5.z (5:13,cm=f,il=24,b=1) r2.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 354 instructions, 0 half, 15 full diff --git a/reference/0ad-alpine-valley/0ad-56.asm b/reference/0ad-alpine-valley/0ad-56.asm index d4490e5..494f814 100644 --- a/reference/0ad-alpine-valley/0ad-56.asm +++ b/reference/0ad-alpine-valley/0ad-56.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r8.x) in4 -@in(r8.y) in5 -@in(r8.z) in6 -@in(r6.y) in8 -@in(r6.z) in9 -@in(r6.w) in10 -@in(r8.w) in12 -@in(r9.x) in13 +@in(r7.x) in0 +@in(r7.y) in1 +@in(r7.z) in2 +@in(r6.y) in4 +@in(r6.z) in5 +@in(r6.w) in6 +@in(r8.x) in8 +@in(r8.y) in9 +@in(r8.z) in10 +@in(r3.x) in12 +@in(r3.y) in13 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -43,167 +43,101 @@ @out(r7.y) out29 @out(r7.z) out30 @out(r7.w) out31 -(sy)(ss)add.f r0.x, c4.x, (neg)r2.w -mul.f r0.y, r8.x, r8.x -mul.f r0.z, c8.w, r2.w -mul.f r0.w, c8.z, r2.w +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r7.x +mul.f r0.y, r6.y, r6.y +mul.f r0.z, c8.y, r7.x +mul.f r0.w, c8.x, r7.x mul.f r1.x, r0.x, r0.x -add.f r1.y, c4.y, (neg)r3.x +add.f r1.z, c4.y, (neg)r7.y add.f r0.y, c13.x, (neg)r0.y -mad.f32 r0.z, c9.w, r3.x, r0.z -mad.f32 r0.w, c9.z, r3.x, r0.w -mad.f32 r1.x, r1.y, r1.y, r1.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.w, r3.y, r0.z -mad.f32 r0.w, c10.z, r3.y, r0.w -mov.f32f32 r1.x, r1.x -add.f r1.z, c4.z, (neg)r3.y -mul.f r1.w, r0.y, r0.y -mul.f r2.x, r8.y, r8.x -add.f r0.z, r0.z, c11.w +mad.f32 r0.z, c9.y, r7.y, r0.z +mad.f32 r0.w, c9.x, r7.y, r0.w mad.f32 r1.x, r1.z, r1.z, r1.x -add.f r0.w, r0.w, c11.z -mul.f r2.y, c8.y, r2.w -mul.f r2.z, c8.x, r2.w -add.f r2.x, c13.y, (neg)r2.x -mul.f r3.z, r6.w, c6.z -mov.f32f32 r0.z, r0.z +add.f r1.w, c4.z, (neg)r7.z +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.z, r0.z +mad.f32 r0.w, c10.x, r7.z, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.z, r6.y +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.w, r6.y +mul.f r2.z, c8.w, r7.x rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.w, r0.w -mad.f32 r2.y, c9.y, r3.x, r2.y +(ss)mov.f32f32 r2.w, r1.x +add.f r3.z, c13.y, (neg)r1.y mad.f32 r0.x, r0.x, r1.x, (neg)c5.x -mad.f32 r1.w, r2.x, r2.x, r1.w -mad.f32 r1.y, r1.y, r1.x, (neg)c5.y -mad.f32 r1.x, r1.z, r1.x, (neg)c5.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r1.w -mul.f r1.z, r8.z, r8.x -mov.f32f32 r1.y, r1.y -mul.f r1.w, r0.x, r0.x -mov.f32f32 r1.x, r1.x -add.f r4.x, c13.y, (neg)r1.z -mad.f32 r4.y, r1.y, r1.y, r1.w -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w -mad.f32 r0.z, c10.y, r3.y, r2.y -mov.f32f32 r0.w, r4.y -mov.f32f32 r2.y, r4.x -mad.f32 r0.w, r1.x, r1.x, r0.w -add.f r0.z, r0.z, c11.y -mad.f32 r2.z, c9.x, r3.x, r2.z -mov.f32f32 r3.z, r3.z -mad.f32 r2.z, c10.x, r3.y, r2.z -mul.f r5.z, c0.w, r2.w -mul.f r5.w, c0.z, r2.w -rsq r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mad.f32 r3.w, r2.y, r2.y, r3.w -mul.f r0.z, r0.z, c12.y -add.f r2.z, r2.z, c11.x -mul.f r1.x, r1.x, r0.w -mul.f r4.x, r1.y, r0.w -mul.f r0.x, r0.x, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.w, r4.x -mov.f32f32 r0.x, r0.x -rsq r1.x, r3.w -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r2.w, (neg)c5.y +mov.f32f32 r1.z, r3.z +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r2.w, (neg)c5.z +mov.f32f32 r2.w, r0.z +mad.f32 r2.x, r3.z, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r2.w, r1.x +mov.f32f32 r3.z, r1.w +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r7.y, r2.z +mad.f32 r0.z, r1.w, r3.z, r0.z +mad.f32 r1.w, c10.w, r7.z, r2.z +mul.f r2.z, c8.z, r7.x +mul.f r3.w, c0.w, r7.x +mul.f r4.x, c0.z, r7.x +mul.f r5.z, c0.y, r7.x +mul.f r5.w, c0.x, r7.x +rsq r0.z, r0.z +(ss)mov.f32f32 r4.y, r0.z +mul.f r4.w, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r5.y, r3.z, r4.y +mul.f r5.x, r2.w, r4.y +(ss)mad.f32 r0.z, c9.z, r7.y, r2.z +mad.f32 r2.y, c1.w, r7.y, r3.w +mad.f32 r2.w, c1.z, r7.y, r4.x +rsq r0.x, r0.x +(ss)mov.f32f32 r3.z, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.z, r0.z +mad.f32 r0.y, c2.w, r7.z, r2.y +mul.f r2.z, r0.w, r3.z +mul.f r2.y, r1.z, r3.z +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r3.z, r2.y +mul.f r3.w, r6.z, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r6.y, r0.x +mul.f r4.x, r6.w, r3.z +mad.f32 r4.y, r6.w, r0.z, (neg)r0.y +mad.f32 r4.x, r6.z, r0.x, (neg)r4.x +mad.f32 r4.z, r6.y, r3.z, (neg)r3.w +mad.f32 r0.x, c2.z, r7.z, r2.w +mad.f32 r0.y, c1.y, r7.y, r5.z +mad.f32 r2.w, c1.x, r7.y, r5.w +mad.f32 r0.y, c2.y, r7.z, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.z, r2.w nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -(rpt1)nop -mov.f32f32 r5.y, r0.z -mov.f32f32 r5.x, r0.w -mov.f32f32 r4.w, r0.x -mul.f r0.x, r0.y, r1.x -mul.f r0.y, r2.y, r1.x -mul.f r0.z, r2.x, r1.x -mul.f r0.w, r2.z, c12.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r0.w -mul.f r0.w, r8.y, r0.x -mul.f r2.x, r8.x, r0.y -mad.f32 r0.w, r8.x, r0.z, (neg)r0.w -mad.f32 r2.x, r8.z, r0.x, (neg)r2.x -mul.f r2.y, r8.z, r0.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, r8.y, r0.y, (neg)r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.z, r0.w -mov.f32f32 r4.y, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r4.x, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.x, r3.z -mad.f32 r0.y, c1.w, r3.x, r5.z -mad.f32 r0.z, c1.z, r3.x, r5.w -mul.f r0.w, c0.y, r2.w -mov.f32f32 r6.x, r0.x -mad.f32 r0.x, c2.w, r3.y, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c1.y, r3.x, r0.w -mul.f r0.w, c0.x, r2.w -add.f r0.x, r0.x, c3.w -add.f r0.y, r0.y, c3.z -mad.f32 r3.z, c2.y, r3.y, r0.z -(ss)mad.f32 r3.w, c1.x, r3.x, r0.w -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -add.f r0.x, r3.z, c3.y -mad.f32 r3.z, c2.x, r3.y, r3.w -mul.f r3.w, r6.z, c6.y -mul.f r5.z, r6.y, c6.x -mov.f32f32 r0.y, r0.x -add.f r0.x, r3.z, c3.x -mov.f32f32 r3.z, r3.w -mov.f32f32 r3.w, r5.z -mad.f32 r6.y, c7.x, r3.y, c7.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.w, r3.z -mov.f32f32 r5.z, r3.w -mov.f32f32 r3.z, r6.y -mad.f32 r6.y, c7.x, r2.w, c7.y -mov.f32f32 r6.z, (0.000000) -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.z -mov.f32f32 r3.z, r6.y -mov.f32f32 r7.w, r6.z -mov.f32f32 r7.z, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.y, r8.z -mov.f32f32 r7.y, r3.x -mov.f32f32 r3.x, r8.y -mov.f32f32 r7.x, r2.w -mov.f32f32 r6.w, r3.y -mov.f32f32 r2.w, r8.x -mov.f32f32 r6.z, r3.x -mov.f32f32 r3.x, r9.x -mov.f32f32 r8.x, r8.w -mov.f32f32 r6.y, r2.w +add.f r0.y, r0.y, c3.y +mov.f32f32 r7.w, (0.000000) +add.f r0.x, r0.x, c3.x +mul.f r6.x, r8.z, c6.z +mul.f r5.w, r8.y, c6.y +mul.f r5.z, r8.x, c6.x +mad.f32 r3.w, c7.x, r7.z, c7.y +mad.f32 r3.z, c7.x, r7.x, c7.y mov.f32f32 r2.w, c13.z -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r8.x -nop -mov.f32f32 r2.w, r2.w end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) -; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r8.x (0:0,cm=7,il=12,b=0) r6.y (0:0,cm=7,il=16,b=0) r8.w (0:0,cm=3,il=20,b=0) -; VERT: 160 instructions, 0 half, 10 full +; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=3,il=20,b=0) +; VERT: 91 instructions, 0 half, 9 full diff --git a/reference/0ad-alpine-valley/0ad-58.asm b/reference/0ad-alpine-valley/0ad-58.asm index 0f377f1..9adec78 100644 --- a/reference/0ad-alpine-valley/0ad-58.asm +++ b/reference/0ad-alpine-valley/0ad-58.asm @@ -8,299 +8,192 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097 +@const(c10.x) 0x3cff9724, 0xba03126f, 0xbf000000, 0x40000000 +@const(c11.x) 0x3f800000, 0xbf000000, 0x3fb8aa65, 0x3de38866 +@const(c12.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 11, r1.x bary.f r0.y, 0, r1.x add.f r0.w, r0.w, c9.y bary.f r1.z, 1, r1.x -mov.f32f32 r0.x, r0.x -add.f r1.w, r0.y, c10.z -bary.f r2.x, 4, r1.x -bary.f r2.y, 15, r1.x -mul.f r2.z, r0.x, r0.x +mov.f32f32 r1.w, r0.x +add.f r2.x, r0.y, c10.z +bary.f r2.y, 6, r1.x +add.f r2.z, r1.z, c10.z +mul.f r0.x, r0.x, r1.w bary.f r2.w, 12, r1.x -floor.f r3.x, r1.w +floor.f r3.x, r2.x rcp r0.w, r0.w add.f r0.z, r0.z, c9.y -add.f r3.y, r1.z, c10.z -mov.f32f32 r2.w, r2.w -add.f r1.w, r1.w, (neg)r3.x +floor.f r3.y, r2.z +mov.f32f32 r3.z, r2.w +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.z, r0.z, r0.w -(ss)floor.f r0.w, r3.y -mad.f32 r2.z, r2.w, r2.w, r2.z -mov.f32f32 r1.w, r1.w +(ss)absneg.f r0.w, (neg)c7.x +mad.f32 r0.x, r2.w, r3.z, r0.x +bary.f r2.w, 13, r1.x +mov.f32f32 r3.x, r2.x +mul.f r0.w, r0.w, c7.x +add.f r2.z, r2.z, (neg)r3.y +mov.f32f32 r3.y, r2.w +mul.f r3.w, c9.x, r3.x +mul.f r0.w, r0.w, r0.z mov.f32f32 r0.z, r0.z -absneg.f r3.x, (neg)c7.x -mov.f32f32 r2.z, r2.z -bary.f r3.z, 13, r1.x -mul.f r3.w, c9.x, r1.w -mul.f r3.x, r3.x, c7.x -add.f r0.w, r3.y, (neg)r0.w -mov.f32f32 r3.y, r3.z -mov.f32f32 r3.z, r3.w -mul.f r3.x, r3.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.z, r3.y, r3.y, r2.z -add.f r0.y, r0.y, (neg)r3.z -mov.f32f32 r3.x, r3.x -mul.f r3.z, c9.x, r0.w -add.f r3.w, c10.w, (neg)r1.w -add.f r4.x, c10.w, (neg)r0.w -mov.f32f32 r0.y, r0.y -rsq r2.z, r2.z -(ss)mov.f32f32 r2.z, r2.z -mul.f r0.z, r3.x, r0.z -mov.f32f32 r3.x, r3.z -add.f r3.z, c11.x, r0.y -mul.f r0.x, r0.x, r2.z -add.f r0.y, c11.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.x, r3.y, r3.y, r0.x +add.f r0.y, r0.y, (neg)r3.w +mov.f32f32 r3.y, r2.z +mul.f r0.z, r0.w, r0.z +add.f r0.w, c10.w, (neg)r3.x +mov.f32f32 r3.x, r0.y +mul.f r3.w, c9.x, r3.y +rsq r0.x, r0.x +(ss)mov.f32f32 r4.x, r0.x mul.f r0.z, r0.z, c11.z -mul.f r3.z, r3.z, c4.z -absneg.f r0.x, (abs)r0.x -mul.f r0.y, r0.y, c4.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, r0.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r0.y, r0.y -add.f r0.x, r0.x, c9.w -mov.f32f32 r4.z, r4.z -add.f r1.z, r1.z, (neg)r3.x +add.f r3.x, c11.y, r3.x +add.f r1.z, r1.z, (neg)r3.w +mul.f r1.w, r1.w, r4.x +add.f r0.y, c11.x, r0.y +mul.f r4.y, r3.x, c4.z +mul.f r3.x, r3.z, r4.x +absneg.f r1.w, (abs)r1.w exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r4.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.y, r3.z -max.f r0.x, r0.x, c9.y -add.f r3.z, c13.y, (neg)r0.z -add.f r4.y, c11.y, r1.z -mov.f32f32 r6.x, r3.x -mov.f32f32 r0.x, r0.x -mul.f r2.w, r2.w, r2.z -mov.f32f32 r3.x, r4.y +(ss)mov.f32f32 r3.z, r0.z +mov.f32f32 r4.w, r4.y +mov.f32f32 r3.w, r1.z +add.f r1.w, r1.w, c9.w +add.f r3.z, c13.y, (neg)r3.z +mul.f r5.z, r0.y, c4.z +add.f r0.y, c11.x, r1.z +max.f r1.z, r1.w, c9.y +absneg.f r1.w, (abs)r3.x +add.f r3.x, c11.y, r3.w mul.f r3.z, r3.z, c7.y -mul.f r0.z, r0.z, c9.z -mov.f32f32 r2.w, r2.w -mul.f r3.x, r3.x, c4.w -add.f r1.z, c11.x, r1.z +(ss)mul.f r0.z, r0.z, c9.z +add.f r1.w, r1.w, c9.w +mul.f r6.z, r3.x, c4.w +mov.f32f32 r6.y, r5.z +mul.f r5.w, r0.y, c4.w +max.f r0.y, r1.w, c9.y +mov.f32f32 r5.x, r6.z add.f r0.z, r0.z, r3.z -absneg.f r2.w, (abs)r2.w -mov.f32f32 r3.z, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.w, r3.z -bary.f r3.z, 2, r1.x -mov.f32f32 r0.z, r0.z -add.f r2.w, r2.w, c9.w -mul.f r1.z, r1.z, c4.w -add.f r3.z, r3.z, c10.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.w, r2.w +bary.f r1.w, 2, r1.x +add.f r3.x, r1.z, r0.y +mul.f r0.x, r2.w, r0.x max.f r0.z, r0.z, c9.y -mov.f32f32 r4.y, r3.z -mov.f32f32 r5.z, r1.z -max.f r2.w, r2.w, c9.y +add.f r6.x, r1.w, c10.y +mov.f32f32 r4.z, r5.w +absneg.f r0.x, (abs)r0.x min.f r0.z, r0.z, c9.z -mov.f32f32 r5.x, r4.y -mov.f32f32 r4.y, r5.z -mov.f32f32 r2.w, r2.w -add.f r5.z, c13.y, (neg)r0.z -add.f r5.w, c13.y, (neg)r0.z -add.f r6.y, c13.y, (neg)r0.z -add.f r6.w, r0.x, r2.w -mul.f r2.z, r3.y, r2.z -sam.s (f32)(x)r7.x, r4.z, s#3, t#3 -(sy)mov.f32f32 r3.y, r7.x -(ss)mul.f r4.z, r5.z, c6.z -mul.f r4.w, r5.w, c6.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r4.x -absneg.f r2.z, (abs)r2.z -mul.f r5.x, r6.y, c6.x -mov.f32f32 r6.y, r4.y -mov.f32f32 r4.y, r3.z -mov.f32f32 r2.z, r2.z -mul.f r5.w, r3.w, r4.x -mov.f32f32 r5.z, r3.x -mov.f32f32 r7.x, r0.y -add.f r0.y, r2.z, c9.w -mov.f32f32 r2.z, r5.w -mov.f32f32 r6.z, r4.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.y, r0.y -mul.f r2.z, r2.z, r3.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.w, r3.x -max.f r0.y, r0.y, c9.y -sam.s (f32)(x)r7.y, r6.x, s#3, t#3 -(rpt1)nop -(sy)mov.f32f32 r3.x, r7.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r7.y, r1.z -sam.s (f32)(x)r5.y, r5.y, s#3, t#3 -mov.f32f32 r1.z, r3.z -(sy)mov.f32f32 r3.y, r5.y -add.f r3.z, r6.w, r0.y -add.f r1.w, r1.w, c9.z -mov.f32f32 r7.z, r1.z -add.f r0.w, r0.w, c9.z -mov.f32f32 r1.z, r3.z -mul.f r3.z, r1.w, r4.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mul.f r3.w, r3.w, r0.w -mul.f r0.w, r1.w, r0.w -mov.f32f32 r1.w, r3.z -rcp r3.z, r1.z -(ss)mov.f32f32 r3.z, r3.z -rcp r4.x, r1.z -nop -(ss)rcp r1.z, r1.z -(ss)mov.f32f32 r4.x, r4.x -(ss)mov.f32f32 r1.z, r1.z -mul.f r0.x, r0.x, r3.z -mad.f32 r1.w, r1.w, r3.y, r2.z -mul.f r2.z, r2.w, r4.x -mul.f r0.y, r0.y, r1.z -mov.f32f32 r0.x, r0.x -mul.f r1.z, r2.y, c10.x -mov.f32f32 r1.w, r1.w -sam.s (f32)(x)r5.y, r7.x, s#3, t#3 -mov.f32f32 r2.y, r2.z -(sy)mov.f32f32 r2.z, r5.y +mov.f32f32 r5.y, r6.x +mov.f32f32 r6.w, r6.x +add.f r0.x, r0.x, c9.w +add.f r1.w, c13.y, (neg)r0.z +add.f r2.w, c13.y, (neg)r0.z +add.f r3.z, c13.y, (neg)r0.z +max.f r0.x, r0.x, c9.y +mul.f r1.w, r1.w, c6.z +mul.f r3.w, r2.w, c6.y +sam.s (f32)(x)r7.x, r4.w, s#3, t#3 +mov.f32f32 r2.w, r0.w +mov.f32f32 r4.x, r0.x +add.f r3.y, c10.w, (neg)r3.y +mul.f r3.z, r3.z, c6.x +sam.s (f32)(x)r7.y, r6.y, s#3, t#3 +(ss)mov.f32f32 r4.w, r6.x +add.f r3.x, r3.x, r4.x +mov.f32f32 r4.x, r3.y mov.f32f32 r1.z, r1.z -mov.f32f32 r2.w, r3.w mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r2.x -mov.f32f32 r2.x, r1.z -mad.f32 r1.w, r2.w, r2.z, r1.w -bary.f r2.z, 5, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.z, r2.z -bary.f r2.w, 6, r1.x -mov.f32f32 r3.w, r2.x -bary.f r2.x, 16, r1.x -mad.f32 r0.w, r0.w, r3.x, r1.w -mov.f32f32 r3.z, r2.z -mov.f32f32 r1.w, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.y, r1.w -mul.f r1.w, r2.x, c10.x -mul.f r0.w, c11.w, r0.w -sam (f32)(w)r2.z, r3.y, s#1, t#1 -(sy)add.f r2.x, c9.z, (neg)r3.y +mov.f32f32 r5.x, r3.x +mul.f r2.w, r2.w, r4.x +(rpt3)nop +(sy)mul.f r2.w, r2.w, r7.x +rcp r4.x, r5.x +(ss)mul.f r1.z, r1.z, r4.x +bary.f r4.x, 15, r1.x +(ss)rcp r5.x, r5.x +add.f r2.x, r2.x, c9.z +(ss)mul.f r0.y, r0.y, r5.x +(ss)mov.f32f32 r5.x, r1.z +mul.f r6.y, r4.x, c10.x +mul.f r3.y, r2.x, r3.y +rcp r3.x, r3.x +(ss)mul.f r0.x, r0.x, r3.x +(ss)mov.f32f32 r3.x, r0.y +mov.f32f32 r6.z, r6.y +bary.f r4.x, 16, r1.x +mad.f32 r2.w, r3.y, r7.y, r2.w +mov.f32f32 r3.y, r0.x +sam.s (f32)(x)r7.x, r4.y, s#3, t#3 +add.f r2.z, r2.z, c9.z +mul.f r4.x, r4.x, c10.x +(rpt1)nop +mul.f r0.w, r0.w, r2.z +mov.f32f32 r6.w, r4.x +sam.s (f32)(x)r5.y, r5.z, s#3, t#3 +mul.f r2.x, r2.x, r2.z bary.f r2.z, 7, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.w, r1.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r1.z -bary.f r1.z, 14, r1.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r5.z, r2.z -mov.f32f32 r5.w, r1.w -mov.f32f32 r4.x, r2.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r1.z, r1.z -bary.f r2.x, 10, r1.x -bary.f r2.z, 9, r1.x -(ss)mov.f32f32 r3.z, c9.y +(sy)mad.f32 r0.w, r0.w, r7.x, r2.w +bary.f r2.w, 14, r1.x +(ss)bary.f r4.z, 4, r1.x +mad.f32 r0.w, r2.x, r5.y, r0.w +sam (f32)(xyzw)r5.y, r6.z, s#0, t#0 +(sy)mul.f r2.x, r5.w, r5.x +(ss)mul.f r6.z, r2.w, c10.x +mul.f r2.w, r5.z, r5.x +mul.f r1.z, r5.y, r1.z +mul.f r0.w, c11.w, r0.w +mov.f32f32 r4.y, r6.z +sam (f32)(w)r5.x, r2.y, s#2, t#2 +(sy)(ss)cmps.f.lt r2.y, r5.w, c12.x +bary.f r4.w, 5, r1.x +mov.f32f32 r2.z, r0.w +sam (f32)(xyzw)r6.x, r6.y, s#0, t#0 +bary.f r5.x, 10, r1.x +bary.f r5.y, 9, r1.x +cov.u32f32 r2.y, r2.y +sam (f32)(xyzw)r6.w, r4.x, s#0, t#0 +(sy)mad.f32 r2.x, r7.y, r3.x, r2.x +mad.f32 r2.w, r7.x, r3.x, r2.w +mad.f32 r2.x, r6.z, r3.y, r2.x +mad.f32 r2.w, r6.y, r3.y, r2.w +mad.f32 r0.y, r6.w, r0.y, r1.z +cmps.f.ne r1.z, r2.y, c9.y +mov.f32f32 r2.y, r2.x +mov.f32f32 r3.x, r2.w +mad.f32 r0.x, r6.x, r0.x, r0.y +mov.f32f32 r0.y, c9.y +mul.f r2.y, r2.y, r5.x +mul.f r3.x, r3.x, r5.y +mov.f32f32 r3.y, r0.x bary.f (ei)r1.x, 8, r1.x -sam (f32)(xyzw)r6.x, r3.w, s#0, t#0 -(sy)mul.f r1.y, r6.z, r0.x -mul.f r1.z, r1.z, c10.x -(ss)mul.f r3.w, r6.y, r0.x -mul.f r0.x, r6.x, r0.x -mov.f32f32 r2.w, r1.w -mov.f32f32 r1.z, r1.z -sam (f32)(w)r6.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r1.w, r6.w -cmps.f.lt r3.y, r6.w, c12.x -nop -mov.f32f32 r4.x, r1.z -mov.f32f32 r1.z, r1.z -cov.u32f32 r4.y, r3.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r6.x, r4.x -mov.f32f32 r3.y, r1.z -cmps.f.ne r1.z, r4.y, c9.y -(rpt3)nop -(ss)nop -sam (f32)(xyzw)r5.y, r5.w, s#0, t#0 -(sy)mad.f32 r1.y, r5.w, r2.y, r1.y -(ss)nop -sam (f32)(xyzw)r5.w, r3.x, s#0, t#0 -(ss)mad.f32 r3.x, r5.z, r2.y, r3.w -(sy)mad.f32 r1.y, r6.y, r0.y, r1.y -mad.f32 r3.x, r6.x, r0.y, r3.x -mad.f32 r0.x, r5.y, r2.y, r0.x -sel.b32 r1.z, r3.z, r1.z, r1.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r3.x -mad.f32 r0.x, r5.w, r0.y, r0.x -nop -mul.f r0.y, r1.y, r2.x -mul.f r2.x, r1.w, r2.z -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.y, r0.w -mul.f r2.x, r2.x, r0.w -mul.f r1.x, r0.x, r1.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, c5.z, r1.y, r0.y -mad.f32 r1.y, c5.y, r1.w, r2.x +mul.f r1.y, r2.y, r2.z +mul.f r2.y, r3.x, r2.z +mad.f32 r1.y, c5.z, r2.x, r1.y +mad.f32 r2.x, c5.y, r2.w, r2.y +mul.f r1.x, r3.y, r1.x +sel.b32 r0.y, r0.y, r1.z, r5.w +mul.f r1.y, r0.z, r1.y +mul.f r1.z, r0.z, r2.x mul.f r0.w, r1.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -nop -mul.f r0.y, r0.z, r0.y -mul.f r1.x, r0.z, r1.x +sam (f32)(w)r2.x, r4.z, s#1, t#1 +(sy)add.f r2.w, c9.z, (neg)r2.w +add.f r1.x, r1.y, r1.w +add.f r1.y, r1.z, r3.w +(rpt1)nop +mul.f r2.z, r1.x, r0.y +mul.f r2.y, r1.y, r0.y mad.f32 r0.x, c5.x, r0.x, r0.w -nop -add.f r0.y, r0.y, r4.z -add.f r0.w, r1.x, r4.w -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.y, r1.z -mul.f r0.w, r0.w, r1.z +(rpt2)nop mul.f r0.x, r0.z, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -add.f r0.x, r0.x, r5.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r1.z -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x +add.f r0.x, r0.x, r3.z (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, r0.y end +nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.w (5:10,cm=f,il=12,b=1) r63.y (5:11,cm=f,il=16,b=1) r1.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) -; FRAG: 300 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.w (5:10,cm=f,il=12,b=1) r63.y (5:11,cm=f,il=16,b=1) r3.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) +; FRAG: 190 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-61.asm b/reference/0ad-alpine-valley/0ad-61.asm index e890c72..95d5c75 100644 --- a/reference/0ad-alpine-valley/0ad-61.asm +++ b/reference/0ad-alpine-valley/0ad-61.asm @@ -8,211 +8,135 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xba03126f +@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3fb8aa65 +@const(c11.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 0, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 1, r1.x -bary.f r1.z, 4, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 6, r1.x +bary.f r1.z, 6, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 7, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 2, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.x, r2.y +add.f r3.w, r2.z, c9.w +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 5, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c9.z +mul.f r0.z, r0.z, c7.x +sam (f32)(w)r4.x, r1.z, s#2, t#2 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c9.x, r2.z +add.f r2.z, c10.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.y, (neg)r1.z -add.f r3.z, c10.y, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r3.w, c9.x, r0.z -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(w)r3.w, r3.x, s#1, t#1 -(sy)add.f r2.y, c9.z, (neg)r4.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c9.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c10.z, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.z, r0.w -mul.f r1.w, r1.w, c4.z mul.f r0.y, r0.y, c10.w -mul.f r0.x, r0.x, c4.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.x -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.x, r3.y -mov.f32f32 r4.z, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r5.y, r3.x -mul.f r1.w, r2.w, c4.w -mul.f r0.w, r0.w, c4.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r6.x, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.x, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c9.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r4.x, r3.x -bary.f r3.x, 2, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.x, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.z, r1.w -mov.f32f32 r4.w, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.x, r2.w -mov.f32f32 r4.y, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r5.w, r1.w -mov.f32f32 r5.x, r3.x -mov.f32f32 r6.y, r0.x -mov.f32f32 r0.x, r2.w -mov.f32f32 r0.w, r2.y -sam.s (f32)(x)r6.z, r3.w, s#3, t#3 -(sy)mov.f32f32 r1.w, r6.z -min.f r0.y, r0.y, c9.z -sam.s (f32)(x)r6.z, r5.y, s#3, t#3 -(sy)mov.f32f32 r2.y, r6.z +add.f r0.z, c10.x, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c4.z +add.f r0.x, c10.z, r0.w +mul.f r4.x, r0.z, c4.z +add.f r0.z, c10.x, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c4.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c4.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c12.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#3, t#3 +add.f r0.z, c10.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#3, t#3 +mul.f r0.x, r0.x, c9.z +add.f r0.w, r2.y, c9.z +mul.f r0.y, r0.y, c7.y (ss)nop -sam.s (f32)(x)r3.w, r4.z, s#3, t#3 -(sy)mov.f32f32 r2.w, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.z -add.f r3.y, c12.y, (neg)r0.y -add.f r3.z, c12.y, (neg)r0.y -add.f r3.w, c12.y, (neg)r0.y -mul.f r4.x, r2.z, r3.x -mul.f r3.y, r3.y, c6.z -mul.f r3.z, r3.z, c6.y -mul.f r3.w, r3.w, c6.x -mul.f r1.w, r4.x, r1.w -add.f r1.z, r1.z, c9.z -mov.f32f32 r6.z, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.z, r0.z, c9.z -mul.f r0.w, r1.z, r3.x -mov.f32f32 r2.x, r2.x -bary.f r3.x, 8, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.w, r0.w, r2.w, r1.w -sam.s (f32)(x)r4.x, r6.x, s#3, t#3 +sam.s (f32)(x)r5.x, r5.w, s#3, t#3 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#3, t#3 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c11.y +bary.f r3.z, 4, r1.x +mul.f r0.y, r0.y, r5.x +max.f r0.x, r0.x, c9.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y +min.f r0.x, r0.x, c9.z +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c9.y (rpt1)nop -(sy)mov.f32f32 r1.w, r4.x -mov.f32f32 r0.w, r0.w -mul.f r2.z, r2.z, r0.z -mov.f32f32 r2.w, r0.x -mov.f32f32 r4.x, r2.x -mul.f r0.x, r1.z, r0.z -mad.f32 r0.z, r2.z, r1.w, r0.w -bary.f r0.w, 7, r1.x -mov.f32f32 r1.z, r3.x +mul.f r0.y, c11.x, r0.y +bary.f r1.z, 8, r1.x bary.f r1.w, 9, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, r0.x, r2.y, r0.z -mov.f32f32 r2.x, r1.z -mov.f32f32 r0.z, r1.w -mov.f32f32 r4.y, r0.w -mov.f32f32 r0.x, r0.x -bary.f r0.w, 12, r1.x -bary.f r1.z, 11, r1.x -bary.f (ei)r1.x, 10, r1.x -mul.f r0.x, c11.x, r0.x -mov.f32f32 r2.y, r0.z +add.f r0.w, c12.y, (neg)r0.x +mov.f32f32 r2.x, r0.y +add.f r2.y, c12.y, (neg)r0.x +add.f r2.z, c12.y, (neg)r0.x +(rpt1)nop (ss)nop -sam (f32)(w)r4.x, r4.x, s#2, t#2 -(sy)cmps.f.lt r0.z, r4.w, c11.y -mov.f32f32 r1.y, r4.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.x, r1.x -sam (f32)(xyz)r1.w, r2.x, s#0, t#0 -cov.u32f32 r0.z, r0.z -(sy)mul.f r0.w, r2.y, r0.w -mul.f r1.z, r2.x, r1.z -mul.f r1.x, r1.w, r1.x -cmps.f.ne r0.z, r0.z, c9.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mul.f r0.w, r0.w, r0.x -mul.f r1.z, r1.z, r0.x -mul.f r0.x, r1.x, r0.x -mov.f32f32 r1.x, c9.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, c5.z, r2.y, r0.w -mad.f32 r1.z, c5.y, r2.x, r1.z -mov.f32f32 r0.x, r0.x -sel.b32 r0.z, r1.x, r0.z, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.z -mad.f32 r0.x, c5.x, r1.w, r0.x -nop -mul.f r0.w, r0.y, r0.w -mul.f r1.x, r0.y, r1.x -mov.f32f32 r0.x, r0.x -nop -add.f r0.w, r0.w, r3.y -add.f r1.x, r1.x, r3.z -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r0.w, r0.z -mul.f r0.w, r1.x, r0.z -add.f r0.x, r0.x, r3.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, r0.z +sam (f32)(xyz)r3.w, r1.z, s#0, t#0 +(ss)bary.f r1.z, 12, r1.x +bary.f r1.w, 11, r1.x +bary.f r2.w, 10, r1.x +mul.f r0.w, r0.w, c6.z +(sy)mul.f r1.z, r4.y, r1.z +mul.f r1.w, r4.x, r1.w +mul.f r2.w, r3.w, r2.w +mul.f r2.y, r2.y, c6.y +mul.f r1.z, r1.z, r2.x +mul.f r1.w, r1.w, r2.x +mad.f32 r1.z, c5.z, r4.y, r1.z +mad.f32 r1.w, c5.y, r4.x, r1.w +mul.f r0.y, r2.w, r0.y +mul.f r2.x, r2.z, c6.x +mul.f r1.z, r0.x, r1.z +mul.f r1.w, r0.x, r1.w +mad.f32 r0.y, c5.x, r3.w, r0.y +mov.f32f32 r2.z, c9.y +add.f r0.w, r1.z, r0.w +add.f r1.z, r1.w, r2.y nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x +sel.b32 r0.z, r2.z, r0.z, r4.w +mul.f r0.x, r0.x, r0.y +bary.f (ei)r3.w, 5, r1.x nop -mov.f32f32 r2.z, r0.y -(ss)mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.z, r0.w, r0.z +mul.f r2.y, r1.z, r0.z +add.f r0.x, r0.x, r2.x +(rpt1)nop +sam (f32)(w)r0.w, r3.z, s#1, t#1 +(sy)add.f r2.w, c9.z, (neg)r1.z +mul.f r2.x, r0.x, r0.z end nop nop nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r0.x (5:10,cm=f,il=12,b=1) r0.w (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) -; FRAG: 204 instructions, 0 half, 7 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) +; FRAG: 124 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-63.asm b/reference/0ad-alpine-valley/0ad-63.asm index 3ac80f9..b4c4f88 100644 --- a/reference/0ad-alpine-valley/0ad-63.asm +++ b/reference/0ad-alpine-valley/0ad-63.asm @@ -1,14 +1,14 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r7.x) in4 -@in(r7.y) in5 -@in(r7.z) in6 -@in(r6.y) in8 -@in(r6.z) in9 -@in(r6.w) in10 +@in(r7.x) in0 +@in(r7.y) in1 +@in(r7.z) in2 +@in(r6.y) in4 +@in(r6.z) in5 +@in(r6.w) in6 +@in(r2.w) in8 +@in(r3.x) in9 +@in(r3.y) in10 @in(r3.z) in12 @in(r3.w) in13 @out(r0.x) out0 @@ -39,159 +39,101 @@ @out(r6.y) out25 @out(r6.z) out26 @out(r6.w) out27 -(sy)(ss)add.f r0.x, c4.x, (neg)r2.w -mul.f r0.y, r7.x, r7.x -mul.f r0.z, c8.w, r2.w -mul.f r0.w, c8.z, r2.w +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r7.x +mul.f r0.y, r6.y, r6.y +mul.f r0.z, c8.y, r7.x +mul.f r0.w, c8.x, r7.x mul.f r1.x, r0.x, r0.x -add.f r1.y, c4.y, (neg)r3.x +add.f r1.z, c4.y, (neg)r7.y add.f r0.y, c13.x, (neg)r0.y -mad.f32 r0.z, c9.w, r3.x, r0.z -mad.f32 r0.w, c9.z, r3.x, r0.w -mad.f32 r1.x, r1.y, r1.y, r1.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.w, r3.y, r0.z -mad.f32 r0.w, c10.z, r3.y, r0.w -mov.f32f32 r1.x, r1.x -add.f r1.z, c4.z, (neg)r3.y -mul.f r1.w, r0.y, r0.y -mul.f r2.x, r7.y, r7.x -add.f r0.z, r0.z, c11.w +mad.f32 r0.z, c9.y, r7.y, r0.z +mad.f32 r0.w, c9.x, r7.y, r0.w mad.f32 r1.x, r1.z, r1.z, r1.x -add.f r0.w, r0.w, c11.z -mul.f r2.y, c8.y, r2.w -mul.f r2.z, c8.x, r2.w -add.f r2.x, c13.y, (neg)r2.x -mul.f r4.x, r6.w, c6.z -mov.f32f32 r0.z, r0.z +add.f r1.w, c4.z, (neg)r7.z +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.z, r0.z +mad.f32 r0.w, c10.x, r7.z, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.z, r6.y +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.w, r6.y +mul.f r2.z, c8.w, r7.x rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.w, r0.w -mad.f32 r2.y, c9.y, r3.x, r2.y +(ss)mov.f32f32 r4.x, r1.x +add.f r4.y, c13.y, (neg)r1.y mad.f32 r0.x, r0.x, r1.x, (neg)c5.x -mad.f32 r1.w, r2.x, r2.x, r1.w -mad.f32 r1.y, r1.y, r1.x, (neg)c5.y -mad.f32 r1.x, r1.z, r1.x, (neg)c5.z -mov.f32f32 r0.x, r0.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r4.x, (neg)c5.y +mov.f32f32 r1.z, r4.y +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r4.x, (neg)c5.z +mov.f32f32 r4.x, r0.z +mad.f32 r2.x, r4.y, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r4.x, r1.x mov.f32f32 r4.y, r1.w -mul.f r1.z, r7.z, r7.x -mov.f32f32 r1.y, r1.y -mul.f r1.w, r0.x, r0.x -mov.f32f32 r1.x, r1.x -add.f r4.z, c13.y, (neg)r1.z -mad.f32 r4.w, r1.y, r1.y, r1.w -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w -mad.f32 r0.z, c10.y, r3.y, r2.y -mov.f32f32 r0.w, r4.w -mov.f32f32 r2.y, r4.z -mad.f32 r0.w, r1.x, r1.x, r0.w -add.f r0.z, r0.z, c11.y -mad.f32 r2.z, c9.x, r3.x, r2.z -mov.f32f32 r5.z, r4.x -mad.f32 r2.z, c10.x, r3.y, r2.z -mul.f r5.w, c0.w, r2.w -mul.f r6.x, c0.z, r2.w -rsq r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mad.f32 r4.x, r2.y, r2.y, r4.y -mul.f r0.z, r0.z, c12.y -add.f r2.z, r2.z, c11.x -mul.f r1.x, r1.x, r0.w -mul.f r4.y, r1.y, r0.w -mul.f r0.x, r0.x, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.w, r4.y -mov.f32f32 r0.x, r0.x -rsq r1.x, r4.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -(rpt1)nop -mov.f32f32 r5.y, r0.z -mov.f32f32 r5.x, r0.w -mov.f32f32 r4.w, r0.x -mul.f r0.x, r0.y, r1.x -mul.f r0.y, r2.y, r1.x -mul.f r0.z, r2.x, r1.x -mul.f r0.w, r2.z, c12.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r0.w -mul.f r0.w, r7.y, r0.x -mul.f r2.x, r7.x, r0.y -mad.f32 r0.w, r7.x, r0.z, (neg)r0.w -mad.f32 r2.x, r7.z, r0.x, (neg)r2.x -mul.f r2.y, r7.z, r0.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, r7.y, r0.y, (neg)r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.z, r0.w -mov.f32f32 r4.y, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r7.y, r2.z +mad.f32 r0.z, r1.w, r4.y, r0.z +mad.f32 r1.w, c10.w, r7.z, r2.z +mul.f r2.z, c8.z, r7.x +mul.f r4.z, c0.w, r7.x +mul.f r5.z, c0.z, r7.x +mul.f r5.w, c0.y, r7.x +mul.f r6.x, c0.x, r7.x +rsq r0.z, r0.z +(ss)mov.f32f32 r5.x, r0.z +mul.f r4.w, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r5.y, r4.y, r5.x +mul.f r5.x, r4.x, r5.x +(ss)mad.f32 r0.z, c9.z, r7.y, r2.z +mad.f32 r2.y, c1.w, r7.y, r4.z +mad.f32 r5.z, c1.z, r7.y, r5.z +rsq r0.x, r0.x +(ss)mov.f32f32 r4.x, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.z, r0.z +mad.f32 r0.y, c2.w, r7.z, r2.y +mul.f r2.z, r0.w, r4.x +mul.f r2.y, r1.z, r4.x +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r4.z, r2.y +mul.f r7.w, r6.z, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r6.y, r0.x +mul.f r4.x, r6.w, r4.z +mad.f32 r4.y, r6.w, r0.z, (neg)r0.y +mad.f32 r4.x, r6.z, r0.x, (neg)r4.x +mad.f32 r4.z, r6.y, r4.z, (neg)r7.w +mad.f32 r0.x, c2.z, r7.z, r5.z +mad.f32 r0.y, c1.y, r7.y, r5.w +mad.f32 r5.z, c1.x, r7.y, r6.x +mad.f32 r0.y, c2.y, r7.z, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.z, r5.z nop -(ss)mov.f32f32 r4.x, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.x, r5.z -mad.f32 r0.y, c1.w, r3.x, r5.w -mad.f32 r0.z, c1.z, r3.x, r6.x -mul.f r0.w, c0.y, r2.w -mov.f32f32 r6.x, r0.x -mad.f32 r0.x, c2.w, r3.y, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c1.y, r3.x, r0.w -mul.f r0.w, c0.x, r2.w -add.f r0.x, r0.x, c3.w -add.f r0.y, r0.y, c3.z -mad.f32 r5.z, c2.y, r3.y, r0.z -mad.f32 r3.x, c1.x, r3.x, r0.w -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -add.f r0.x, r5.z, c3.y -mad.f32 r3.x, c2.x, r3.y, r3.x -mul.f r5.z, r6.z, c6.y -mul.f r5.w, r6.y, c6.x -mov.f32f32 r0.y, r0.x -add.f r0.x, r3.x, c3.x -mov.f32f32 r3.x, r5.z -mov.f32f32 r5.z, r5.w -mov.f32f32 r6.y, r7.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.w, r3.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.w, r6.y -mov.f32f32 r3.x, r7.y -mov.f32f32 r6.y, r7.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r6.z, r3.x -mov.f32f32 r6.y, r6.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -mad.f32 r3.x, c7.x, r3.y, c7.y -mad.f32 r2.w, c7.x, r2.w, c7.y -mov.f32f32 r7.x, c13.z -nop -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r2.w -mov.f32f32 r2.w, r7.x +add.f r0.y, r0.y, c3.y +mul.f r6.x, r3.y, c6.z +add.f r0.x, r0.x, c3.x +mul.f r5.w, r3.x, c6.y +mul.f r5.z, r2.w, c6.x +mad.f32 r3.y, c7.x, r7.z, c7.y +mad.f32 r3.x, c7.x, r7.x, c7.y +mov.f32f32 r2.w, c13.z end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) -; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r7.x (0:0,cm=7,il=12,b=0) r6.y (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0) -; VERT: 150 instructions, 0 half, 8 full +; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0) +; VERT: 90 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-64.asm b/reference/0ad-alpine-valley/0ad-64.asm index 9fd1082..78c7452 100644 --- a/reference/0ad-alpine-valley/0ad-64.asm +++ b/reference/0ad-alpine-valley/0ad-64.asm @@ -8,327 +8,223 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x40000000, 0xbf800000 +@const(c15.x) 0xba03126f, 0xbf000000, 0x3f800000, 0x3fb8aa65 +@const(c16.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 10, r1.x -bary.f r0.y, 0, r1.x +bary.f r1.z, 0, r1.x add.f r0.w, r0.w, c14.y -bary.f r1.z, 1, r1.x -mov.f32f32 r1.w, r0.x -add.f r2.x, r0.y, c15.y -bary.f r2.y, 8, r1.x +bary.f r1.w, 1, r1.x +mov.f32f32 r2.x, r0.x +bary.f r0.y, 11, r1.x add.f r2.z, r1.z, c15.y -mov.f32f32 r2.w, r1.w -bary.f r1.w, 11, r1.x -floor.f r3.x, r2.x +add.f r2.w, r1.w, c15.y +bary.f r3.x, 8, r1.x +mov.f32f32 r2.y, r0.y +floor.f r3.z, r2.z rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.y, r2.z -mov.f32f32 r3.z, r1.w -add.f r2.x, r2.x, (neg)r3.x +floor.f r3.w, r2.w +bary.f r3.y, 9, r1.x +add.f r2.z, r2.z, (neg)r3.z (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.z, (neg)r3.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -absneg.f r2.z, (neg)c11.x -mov.f32f32 r0.w, r0.w -mul.f r3.y, c14.x, r2.x -add.f r3.z, c14.z, (neg)r2.x -sam (f32)(xyz)r3.w, r2.w, s#2, t#2 -(sy)(ss)mad.f32 r2.w, c14.z, r3.w, c14.w -mul.f r2.z, r2.z, c11.x -mov.f32f32 r3.x, r3.y -mul.f r3.y, c14.x, r0.w -mov.f32f32 r2.w, r2.w +sam (f32)(xyz)r4.x, r2.x, s#2, t#2 +(sy)(ss)mad.f32 r0.w, c14.z, r4.x, c14.w +absneg.f r2.x, (neg)c11.x +mov.f32f32 r2.y, r2.z +add.f r2.w, r2.w, (neg)r3.w +mov.f32f32 r3.z, r0.w bary.f r3.w, 4, r1.x -add.f r0.y, r0.y, (neg)r3.x -mul.f r2.z, r2.z, r0.z -mov.f32f32 r3.x, r3.y -mul.f r3.y, r3.w, r2.w -mad.f32 r3.w, c14.z, r4.x, c14.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -add.f r1.z, r1.z, (neg)r3.x -mov.f32f32 r3.x, r3.w -bary.f r3.w, 12, r1.x -bary.f r4.x, 7, r1.x -add.f r4.z, c15.y, r0.y -mul.f r0.z, r2.z, r0.z -add.f r0.y, c15.z, r0.y -mul.f r2.z, r3.w, (neg)r4.x -mov.f32f32 r3.w, r4.z +mul.f r4.x, c14.x, r2.y +mul.f r2.x, r2.x, c11.x +mov.f32f32 r4.w, r2.w +mul.f r3.w, r3.w, r3.z +mad.f32 r4.y, c14.z, r4.y, c14.w +add.f r1.z, r1.z, (neg)r4.x +mul.f r2.x, r2.x, r0.z mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mad.f32 r2.z, r2.z, r3.x, r3.y -mul.f r3.y, r3.w, c5.z +mov.f32f32 r4.x, r4.y +bary.f r5.x, 12, r1.x +bary.f r5.y, 7, r1.x +mov.f32f32 r5.z, r1.z +mul.f r0.z, r2.x, r0.z +mul.f r2.x, c14.x, r4.w +mul.f r5.x, r5.x, (neg)r5.y +add.f r5.z, c15.y, r5.z mul.f r0.z, r0.z, c15.w -mul.f r0.y, r0.y, c5.z -mov.f32f32 r2.z, r2.z -mad.f32 r3.w, c14.z, r4.y, c14.w -mov.f32f32 r4.y, r3.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.z, r0.y -mov.f32f32 r3.w, r3.w -bary.f r4.w, 21, r1.x -mov.f32f32 r5.x, r4.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.y, r4.z -mad.f32 r2.z, r4.w, r3.w, r2.z -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -add.f r4.z, c15.y, r1.z -mov.f32f32 r5.w, r4.y -mov.f32f32 r2.z, r2.z -add.f r4.y, c17.y, (neg)r0.z -mov.f32f32 r4.z, r4.z +add.f r1.w, r1.w, (neg)r2.x +mad.f32 r2.x, r5.x, r4.x, r3.w +mad.f32 r3.w, c14.z, r4.z, c14.w +mul.f r5.z, r5.z, c5.z +mov.f32f32 r4.z, r1.w add.f r1.z, c15.z, r1.z -mul.f r4.w, r2.z, r2.z -bary.f r5.y, 5, r1.x -mul.f r4.z, r4.z, c5.w -mul.f r4.y, r4.y, c11.y -mul.f r0.z, r0.z, c15.z -mul.f r5.z, r5.y, r2.w -bary.f r5.y, 13, r1.x -mov.f32f32 r6.x, r4.z -add.f r0.z, r0.z, r4.y -mov.f32f32 r1.z, r1.z -mul.f r4.y, r5.y, (neg)r4.x -mov.f32f32 r5.y, r6.x -bary.f r6.x, 2, r1.x -mov.f32f32 r0.z, r0.z -mad.f32 r4.y, r4.y, r3.x, r5.z -mul.f r1.z, r1.z, c5.w -add.f r6.z, r6.x, c15.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r4.y, r4.y -bary.f r5.z, 22, r1.x -mov.f32f32 r6.x, r6.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r6.y, r1.z -mad.f32 r4.y, r5.z, r3.w, r4.y -mov.f32f32 r5.z, r6.x +mov.f32f32 r5.x, r3.w +bary.f r5.w, 21, r1.x +mov.f32f32 r6.y, r5.z +exp2 r0.z, r0.z +(ss)mov.f32f32 r6.x, r0.z +add.f r4.z, c15.y, r4.z +mad.f32 r2.x, r5.w, r5.x, r2.x +mul.f r7.x, r1.z, c5.z +add.f r1.z, c15.z, r1.w +add.f r1.w, c17.y, (neg)r6.x +mov.f32f32 r7.w, r2.x +mul.f r8.y, r4.z, c5.w +mov.f32f32 r8.x, r7.x +mul.f r7.y, r1.z, c5.w +mul.f r1.z, r2.x, r7.w +bary.f r2.x, 5, r1.x +mov.f32f32 r6.z, r8.y +mul.f r1.w, r1.w, c11.y +(ss)mul.f r0.z, r0.z, c15.z +mul.f r2.x, r2.x, r3.z +bary.f r3.z, 13, r1.x +bary.f r4.z, 2, r1.x +add.f r0.z, r0.z, r1.w +mov.f32f32 r5.w, r7.y +mul.f r1.w, r3.z, (neg)r5.y +add.f r7.z, r4.z, c15.x max.f r0.z, r0.z, c14.y -mov.f32f32 r6.x, r6.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r6.w, r0.y -mov.f32f32 r0.y, r3.y +add.f r2.y, c14.z, (neg)r2.y +mad.f32 r1.w, r1.w, r4.x, r2.x +bary.f r2.x, 22, r1.x +mov.f32f32 r6.w, r7.z min.f r0.z, r0.z, c15.z -mad.f32 r3.y, r4.y, r4.y, r4.w -sam.s (f32)(x)r4.w, r5.x, s#4, t#4 -(sy)mov.f32f32 r4.w, r4.w -mov.f32f32 r6.x, r6.x -(ss)add.f r5.x, c17.y, (neg)r0.z -mov.f32f32 r3.y, r3.y -bary.f r5.y, 6, r1.x -mov.f32f32 r4.w, r4.w -mov.f32f32 r3.z, r3.z -add.f r5.z, c14.z, (neg)r0.w -mul.f r2.w, r5.y, r2.w -bary.f r5.y, 14, r1.x -mul.f r5.x, r5.x, c10.z -add.f r6.y, c17.y, (neg)r0.z -add.f r7.x, c17.y, (neg)r0.z -mul.f r4.x, r5.y, (neg)r4.x -mov.f32f32 r5.y, r5.z -mul.f r5.z, r6.y, c10.y -mul.f r7.z, r7.x, c10.x -mad.f32 r2.w, r4.x, r3.x, r2.w -mul.f r3.x, r3.z, r5.y -mov.f32f32 r4.x, r6.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r2.w -bary.f r6.y, 23, r1.x -mul.f r3.x, r3.x, r4.w -mov.f32f32 r7.x, r4.z -mov.f32f32 r4.z, r6.z -mad.f32 r2.w, r6.y, r3.w, r2.w -mov.f32f32 r6.y, r4.x -mov.f32f32 r7.w, r0.y -mov.f32f32 r7.y, r4.z -mov.f32f32 r0.y, r2.w -mov.f32f32 r1.z, r1.z -add.f r2.x, r2.x, c15.z -add.f r0.w, r0.w, c15.z -mad.f32 r2.w, r0.y, r0.y, r3.y -sam.s (f32)(x)r8.y, r5.w, s#4, t#4 -mov.f32f32 r8.x, r1.z -sam.s (f32)(x)r8.z, r6.w, s#4, t#4 -mov.f32f32 r1.z, r6.z -mul.f r3.y, r2.x, r5.y -mul.f r3.z, r3.z, r0.w -(sy)mov.f32f32 r3.w, r8.z -mov.f32f32 r4.x, r8.y -rsq r2.w, r2.w -(ss)mov.f32f32 r2.w, r2.w -mov.f32f32 r8.y, r1.z -mov.f32f32 r1.z, r2.y -mul.f r0.w, r2.x, r0.w -mul.f r2.x, r2.z, r2.w -mad.f32 r2.y, r3.y, r3.w, r3.x -mul.f r2.z, r4.y, r2.w -mul.f r0.y, r0.y, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -sam.s (f32)(x)r5.w, r7.w, s#4, t#4 -(sy)mov.f32f32 r2.w, r5.w -mov.f32f32 r2.z, r2.z -mul.f r3.x, r2.x, r2.x -mul.f r3.y, (neg)c9.x, r2.x -mad.f32 r2.y, r3.z, r2.w, r2.y -mad.f32 r2.w, r2.z, r2.z, r3.x -mad.f32 r3.x, (neg)c9.y, r2.z, r3.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r1.z -mov.f32f32 r1.z, r2.w -mov.f32f32 r2.w, r3.x -mad.f32 r1.z, r0.y, r0.y, r1.z -mad.f32 r2.w, (neg)c9.z, r0.y, r2.w -mov.f32f32 r2.y, r2.y -bary.f r3.x, 9, r1.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r1.w +mov.f32f32 r8.z, r7.z +mad.f32 r1.w, r2.x, r5.x, r1.w +mov.f32f32 r6.x, r7.z +add.f r2.x, c17.y, (neg)r0.z +add.f r3.z, c17.y, (neg)r0.z +mov.f32f32 r4.x, r1.w +sam.s (f32)(x)r8.w, r6.y, s#4, t#4 +add.f r4.z, c17.y, (neg)r0.z +mov.f32f32 r5.x, r2.y +add.f r4.w, c14.z, (neg)r4.w +mad.f32 r1.z, r1.w, r4.x, r1.z +bary.f r1.w, 6, r1.x +(ss)mul.f r6.y, r2.x, c10.z +mul.f r3.z, r3.z, c10.y +mul.f r4.z, r4.z, c10.x +mul.f r0.w, r1.w, r0.w +bary.f r1.w, 14, r1.x +mov.f32f32 r2.x, r4.w +sam.s (f32)(x)r9.x, r8.x, s#4, t#4 +sam.s (f32)(x)r9.y, r5.z, s#4, t#4 +sam.s (f32)(x)r6.z, r7.x, s#4, t#4 +add.f r2.z, r2.z, c15.z +add.f r2.w, r2.w, c15.z +mul.f r1.w, r1.w, (neg)r5.y +mul.f r2.x, r5.x, r2.x +mul.f r4.w, r2.z, r4.w +mul.f r2.y, r2.y, r2.w +mad.f32 r0.w, r1.w, r4.y, r0.w +bary.f r1.w, 23, r1.x +(sy)mul.f r2.x, r2.x, r8.w +mul.f r2.z, r2.z, r2.w +mad.f32 r2.x, r4.w, r9.x, r2.x +mad.f32 r0.w, r1.w, r3.w, r0.w +mad.f32 r1.w, r2.y, r9.y, r2.x +(ss)nop +sam (f32)(w)r4.w, r3.x, s#1, t#1 +(sy)cmps.f.lt r2.x, r5.z, c16.y +mov.f32f32 r2.w, r0.x +mov.f32f32 r2.y, r0.w +mad.f32 r1.w, r2.z, r6.z, r1.w +cov.u32f32 r2.x, r2.x +(ss)mov.f32f32 r3.x, r0.y +mad.f32 r1.z, r2.y, r2.y, r1.z +mul.f r1.w, c16.x, r1.w +cmps.f.ne r2.x, r2.x, c14.y +(rpt3)nop rsq r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r2.w, r2.w -mad.f32 r0.w, r0.w, r4.x, r2.y -mov.f32f32 r2.y, r3.x -mul.f r2.x, r2.x, r1.z -max.f r2.w, r2.w, c14.y -mov.f32f32 r0.w, r0.w -mul.f r2.z, r2.z, r1.z -mov.f32f32 r2.x, r2.x -bary.f r3.x, 15, r1.x -mov.f32f32 r2.w, r2.w -bary.f r3.z, 19, r1.x -bary.f r4.x, 18, r1.x -mov.f32f32 r3.x, r3.x -bary.f r4.z, 20, r1.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r4.x, r4.x -mul.f r2.x, r2.x, r3.x -mov.f32f32 r2.z, r2.z -bary.f r3.x, 16, r1.x -mov.f32f32 r4.z, r4.z -mad.f32 r4.w, c8.y, r2.w, (neg)r3.z -mad.f32 r5.y, c8.x, r2.w, (neg)r4.x -mov.f32f32 r3.x, r3.x -mad.f32 r2.w, c8.z, r2.w, (neg)r4.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.y, r5.y -mad.f32 r2.x, r2.z, r3.x, r2.x -mov.f32f32 r2.z, r2.w -mad.f32 r2.w, c12.x, r4.w, r3.z -mad.f32 r3.x, c12.x, r5.y, r4.x -mov.f32f32 r2.x, r2.x -mul.f r0.y, r0.y, r1.z -mad.f32 r1.z, c12.x, r2.z, r4.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y +(ss)mov.f32f32 r2.y, r1.z +mov.f32f32 r3.y, r1.w +mul.f r0.w, r0.w, r1.z +(ss)mov.f32f32 r1.z, c14.y +mul.f r2.z, r7.w, r2.y +mul.f r2.y, r4.x, r2.y +mov.f32f32 r3.w, r0.w +sel.b32 r1.z, r1.z, r2.x, r5.z +mov.f32f32 r2.x, r2.z +mul.f r2.z, (neg)c9.x, r2.z +mov.f32f32 r4.x, r2.y +mad.f32 r2.y, (neg)c9.y, r2.y, r2.z +mul.f r2.z, r2.x, r2.x +mad.f32 r0.w, (neg)c9.z, r0.w, r2.y +mad.f32 r2.y, r4.x, r4.x, r2.z +sam (f32)(xyz)r4.w, r2.w, s#3, t#3 +(sy)(ss)mul.f r3.x, c8.z, r5.y +mad.f32 r2.y, r3.w, r3.w, r2.y +max.f r0.w, r0.w, c14.y +bary.f r2.z, 18, r1.x +(rpt1)nop +mov.f32f32 r2.w, r0.w +bary.f r4.y, 19, r1.x +rsq r2.y, r2.y +(ss)mov.f32f32 r5.y, r2.y +bary.f r5.z, 20, r1.x +mad.f32 r5.w, c8.y, r2.w, (neg)r4.y +mad.f32 r2.w, c8.z, r2.w, (neg)r5.z +mul.f r2.x, r2.x, r5.y +bary.f r6.x, 15, r1.x +mov.f32f32 r5.z, r5.z +mov.f32f32 r4.y, r4.y +mul.f r4.x, r4.x, r5.y +mul.f r2.x, r2.x, r6.x +bary.f r5.y, 16, r1.x +mad.f32 r5.z, c12.x, r2.w, r5.z +mad.f32 r4.y, c12.x, r5.w, r4.y +(ss)mul.f r2.y, r3.w, r2.y +mad.f32 r2.x, r4.x, r5.y, r2.x bary.f (ei)r1.x, 17, r1.x -mov.f32f32 r1.y, r1.z -mul.f r0.w, c16.x, r0.w -nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.z, r2.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.z, r0.x -mad.f32 r0.x, r0.y, r1.x, r2.x -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.w, r1.w -mov.f32f32 r4.w, r4.y -mov.f32f32 r0.x, r0.x -sam (f32)(w)r1.z, r3.y, s#1, t#1 -(sy)mov.f32f32 r1.x, r2.y -cmps.f.lt r1.z, r2.y, c16.y -mov.f32f32 r4.x, r0.w -max.f r0.x, c14.y, r0.x -mov.f32f32 r0.w, r1.x -cov.u32f32 r1.x, r1.z -sam (f32)(xyz)r1.z, r4.z, s#3, t#3 -(sy)mul.f r2.x, c8.z, r2.x -mov.f32f32 r0.x, r0.x -mul.f r1.w, c8.y, r1.w -mul.f r1.z, c8.x, r1.z -(ss)nop -sam (f32)(xyzw)r3.y, r3.w, s#0, t#0 -cmps.f.ne r1.x, r1.x, c14.y -(rpt2)nop -log2 r0.x, r0.x +mad.f32 r0.w, c8.x, r0.w, (neg)r2.z +(rpt1)nop +mad.f32 r1.x, r2.y, r1.x, r2.x +mov.f32f32 r1.y, r2.z +mul.f r3.w, c8.y, r5.x +mul.f r4.x, c8.x, r4.w +max.f r1.x, c14.y, r1.x +mad.f32 r0.w, c12.x, r0.w, r1.y +sam (f32)(xyzw)r2.x, r0.x, s#0, t#0 +(rpt4)nop +(ss)log2 r0.x, r1.x (ss)mul.f r0.x, c12.y, r0.x -mov.f32f32 r2.y, c14.y -mov.f32f32 r2.x, r2.x -(sy)mov.f32f32 r2.w, r4.x -mov.f32f32 r0.x, r0.x -sel.b32 r0.w, r2.y, r1.x, r0.w -mov.f32f32 r1.x, r1.w -mov.f32f32 r1.z, r1.z -nop -mov.f32f32 r2.w, r2.w -nop +(rpt5)nop exp2 r0.x, r0.x -(ss)mul.f r1.w, r2.x, r0.x -mul.f r1.x, r1.x, r0.x -mad.f32 r1.y, r3.w, r1.y, r1.w -mad.f32 r1.x, r3.z, r2.z, r1.x -(ss)mul.f r0.x, r1.z, r0.x -nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.x, r3.y, r3.x, r0.x -nop -mul.f r1.y, r1.y, r0.y -mul.f r1.x, r1.x, r0.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mad.f32 r1.y, c7.z, r3.w, r1.y -mad.f32 r1.x, c7.y, r3.z, r1.x -mul.f r0.x, r0.x, r0.y -nop -mov.f32f32 r0.y, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.y, r3.x, r0.x +(ss)mul.f r1.x, r3.w, r0.x +(sy)mad.f32 r0.y, r2.z, r5.z, r0.y +mad.f32 r1.x, r2.y, r4.y, r1.x +mul.f r0.x, r4.x, r0.x +nop +mul.f r0.y, r0.y, r3.y +mul.f r1.x, r1.x, r3.y +mad.f32 r0.y, c7.z, r2.z, r0.y +mad.f32 r1.x, c7.y, r2.y, r1.x +mad.f32 r0.x, r2.x, r0.w, r0.x nop mul.f r0.y, r0.z, r0.y -mul.f r1.x, r0.z, r1.x -mad.f32 r0.x, c7.x, r3.y, r0.x +mul.f r0.w, r0.z, r1.x +mul.f r0.x, r0.x, r1.w nop -add.f r0.y, r0.y, r5.x -add.f r1.x, r1.x, r5.z -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, r6.y +add.f r0.w, r0.w, r3.z +mad.f32 r0.x, c7.x, r2.x, r0.x nop -mul.f r0.y, r0.y, r0.w -mul.f r1.x, r1.x, r0.w +mul.f r0.y, r0.y, r1.z +mul.f r0.w, r0.w, r1.z mul.f r0.x, r0.z, r0.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r1.x -add.f r0.x, r0.x, r7.z -nop -mul.f r0.y, r0.y, c6.z -mul.f r0.z, r0.z, c6.y -mul.f r0.x, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c6.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x +mul.f r2.z, r0.y, c6.z +mul.f r2.y, r0.w, c6.y +add.f r0.x, r0.x, r4.z (rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r1.z (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, c6.x end -nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r3.y (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r2.w (5:13,cm=f,il=24,b=1) r5.z (5:14,cm=f,il=28,b=1) -; FRAG: 325 instructions, 0 half, 9 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r63.z (5:11,cm=f,il=16,b=1) r5.y (5:12,cm=f,il=20,b=1) r3.z (5:13,cm=f,il=24,b=1) r1.x (5:14,cm=f,il=28,b=1) +; FRAG: 230 instructions, 0 half, 10 full diff --git a/reference/0ad-alpine-valley/0ad-65.asm b/reference/0ad-alpine-valley/0ad-65.asm index 3ac80f9..b4c4f88 100644 --- a/reference/0ad-alpine-valley/0ad-65.asm +++ b/reference/0ad-alpine-valley/0ad-65.asm @@ -1,14 +1,14 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r7.x) in4 -@in(r7.y) in5 -@in(r7.z) in6 -@in(r6.y) in8 -@in(r6.z) in9 -@in(r6.w) in10 +@in(r7.x) in0 +@in(r7.y) in1 +@in(r7.z) in2 +@in(r6.y) in4 +@in(r6.z) in5 +@in(r6.w) in6 +@in(r2.w) in8 +@in(r3.x) in9 +@in(r3.y) in10 @in(r3.z) in12 @in(r3.w) in13 @out(r0.x) out0 @@ -39,159 +39,101 @@ @out(r6.y) out25 @out(r6.z) out26 @out(r6.w) out27 -(sy)(ss)add.f r0.x, c4.x, (neg)r2.w -mul.f r0.y, r7.x, r7.x -mul.f r0.z, c8.w, r2.w -mul.f r0.w, c8.z, r2.w +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r7.x +mul.f r0.y, r6.y, r6.y +mul.f r0.z, c8.y, r7.x +mul.f r0.w, c8.x, r7.x mul.f r1.x, r0.x, r0.x -add.f r1.y, c4.y, (neg)r3.x +add.f r1.z, c4.y, (neg)r7.y add.f r0.y, c13.x, (neg)r0.y -mad.f32 r0.z, c9.w, r3.x, r0.z -mad.f32 r0.w, c9.z, r3.x, r0.w -mad.f32 r1.x, r1.y, r1.y, r1.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.w, r3.y, r0.z -mad.f32 r0.w, c10.z, r3.y, r0.w -mov.f32f32 r1.x, r1.x -add.f r1.z, c4.z, (neg)r3.y -mul.f r1.w, r0.y, r0.y -mul.f r2.x, r7.y, r7.x -add.f r0.z, r0.z, c11.w +mad.f32 r0.z, c9.y, r7.y, r0.z +mad.f32 r0.w, c9.x, r7.y, r0.w mad.f32 r1.x, r1.z, r1.z, r1.x -add.f r0.w, r0.w, c11.z -mul.f r2.y, c8.y, r2.w -mul.f r2.z, c8.x, r2.w -add.f r2.x, c13.y, (neg)r2.x -mul.f r4.x, r6.w, c6.z -mov.f32f32 r0.z, r0.z +add.f r1.w, c4.z, (neg)r7.z +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.z, r0.z +mad.f32 r0.w, c10.x, r7.z, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.z, r6.y +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.w, r6.y +mul.f r2.z, c8.w, r7.x rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.w, r0.w -mad.f32 r2.y, c9.y, r3.x, r2.y +(ss)mov.f32f32 r4.x, r1.x +add.f r4.y, c13.y, (neg)r1.y mad.f32 r0.x, r0.x, r1.x, (neg)c5.x -mad.f32 r1.w, r2.x, r2.x, r1.w -mad.f32 r1.y, r1.y, r1.x, (neg)c5.y -mad.f32 r1.x, r1.z, r1.x, (neg)c5.z -mov.f32f32 r0.x, r0.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r4.x, (neg)c5.y +mov.f32f32 r1.z, r4.y +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r4.x, (neg)c5.z +mov.f32f32 r4.x, r0.z +mad.f32 r2.x, r4.y, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r4.x, r1.x mov.f32f32 r4.y, r1.w -mul.f r1.z, r7.z, r7.x -mov.f32f32 r1.y, r1.y -mul.f r1.w, r0.x, r0.x -mov.f32f32 r1.x, r1.x -add.f r4.z, c13.y, (neg)r1.z -mad.f32 r4.w, r1.y, r1.y, r1.w -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w -mad.f32 r0.z, c10.y, r3.y, r2.y -mov.f32f32 r0.w, r4.w -mov.f32f32 r2.y, r4.z -mad.f32 r0.w, r1.x, r1.x, r0.w -add.f r0.z, r0.z, c11.y -mad.f32 r2.z, c9.x, r3.x, r2.z -mov.f32f32 r5.z, r4.x -mad.f32 r2.z, c10.x, r3.y, r2.z -mul.f r5.w, c0.w, r2.w -mul.f r6.x, c0.z, r2.w -rsq r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mad.f32 r4.x, r2.y, r2.y, r4.y -mul.f r0.z, r0.z, c12.y -add.f r2.z, r2.z, c11.x -mul.f r1.x, r1.x, r0.w -mul.f r4.y, r1.y, r0.w -mul.f r0.x, r0.x, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.w, r4.y -mov.f32f32 r0.x, r0.x -rsq r1.x, r4.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -(rpt1)nop -mov.f32f32 r5.y, r0.z -mov.f32f32 r5.x, r0.w -mov.f32f32 r4.w, r0.x -mul.f r0.x, r0.y, r1.x -mul.f r0.y, r2.y, r1.x -mul.f r0.z, r2.x, r1.x -mul.f r0.w, r2.z, c12.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r0.w -mul.f r0.w, r7.y, r0.x -mul.f r2.x, r7.x, r0.y -mad.f32 r0.w, r7.x, r0.z, (neg)r0.w -mad.f32 r2.x, r7.z, r0.x, (neg)r2.x -mul.f r2.y, r7.z, r0.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, r7.y, r0.y, (neg)r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.z, r0.w -mov.f32f32 r4.y, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r7.y, r2.z +mad.f32 r0.z, r1.w, r4.y, r0.z +mad.f32 r1.w, c10.w, r7.z, r2.z +mul.f r2.z, c8.z, r7.x +mul.f r4.z, c0.w, r7.x +mul.f r5.z, c0.z, r7.x +mul.f r5.w, c0.y, r7.x +mul.f r6.x, c0.x, r7.x +rsq r0.z, r0.z +(ss)mov.f32f32 r5.x, r0.z +mul.f r4.w, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r5.y, r4.y, r5.x +mul.f r5.x, r4.x, r5.x +(ss)mad.f32 r0.z, c9.z, r7.y, r2.z +mad.f32 r2.y, c1.w, r7.y, r4.z +mad.f32 r5.z, c1.z, r7.y, r5.z +rsq r0.x, r0.x +(ss)mov.f32f32 r4.x, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.z, r0.z +mad.f32 r0.y, c2.w, r7.z, r2.y +mul.f r2.z, r0.w, r4.x +mul.f r2.y, r1.z, r4.x +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r4.z, r2.y +mul.f r7.w, r6.z, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r6.y, r0.x +mul.f r4.x, r6.w, r4.z +mad.f32 r4.y, r6.w, r0.z, (neg)r0.y +mad.f32 r4.x, r6.z, r0.x, (neg)r4.x +mad.f32 r4.z, r6.y, r4.z, (neg)r7.w +mad.f32 r0.x, c2.z, r7.z, r5.z +mad.f32 r0.y, c1.y, r7.y, r5.w +mad.f32 r5.z, c1.x, r7.y, r6.x +mad.f32 r0.y, c2.y, r7.z, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.z, r5.z nop -(ss)mov.f32f32 r4.x, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.x, r5.z -mad.f32 r0.y, c1.w, r3.x, r5.w -mad.f32 r0.z, c1.z, r3.x, r6.x -mul.f r0.w, c0.y, r2.w -mov.f32f32 r6.x, r0.x -mad.f32 r0.x, c2.w, r3.y, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c1.y, r3.x, r0.w -mul.f r0.w, c0.x, r2.w -add.f r0.x, r0.x, c3.w -add.f r0.y, r0.y, c3.z -mad.f32 r5.z, c2.y, r3.y, r0.z -mad.f32 r3.x, c1.x, r3.x, r0.w -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -add.f r0.x, r5.z, c3.y -mad.f32 r3.x, c2.x, r3.y, r3.x -mul.f r5.z, r6.z, c6.y -mul.f r5.w, r6.y, c6.x -mov.f32f32 r0.y, r0.x -add.f r0.x, r3.x, c3.x -mov.f32f32 r3.x, r5.z -mov.f32f32 r5.z, r5.w -mov.f32f32 r6.y, r7.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.w, r3.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.w, r6.y -mov.f32f32 r3.x, r7.y -mov.f32f32 r6.y, r7.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r6.z, r3.x -mov.f32f32 r6.y, r6.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -mad.f32 r3.x, c7.x, r3.y, c7.y -mad.f32 r2.w, c7.x, r2.w, c7.y -mov.f32f32 r7.x, c13.z -nop -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r2.w -mov.f32f32 r2.w, r7.x +add.f r0.y, r0.y, c3.y +mul.f r6.x, r3.y, c6.z +add.f r0.x, r0.x, c3.x +mul.f r5.w, r3.x, c6.y +mul.f r5.z, r2.w, c6.x +mad.f32 r3.y, c7.x, r7.z, c7.y +mad.f32 r3.x, c7.x, r7.x, c7.y +mov.f32f32 r2.w, c13.z end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) -; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r7.x (0:0,cm=7,il=12,b=0) r6.y (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0) -; VERT: 150 instructions, 0 half, 8 full +; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0) +; VERT: 90 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-66.asm b/reference/0ad-alpine-valley/0ad-66.asm index 035c379..5b14079 100644 --- a/reference/0ad-alpine-valley/0ad-66.asm +++ b/reference/0ad-alpine-valley/0ad-66.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,79 +24,57 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r0.x, (neg)c4.x -mul.f r0.w, c7.w, r3.x +mul.f r0.w, c7.y, r4.x mad.f32 r0.x, (neg)c4.y, r0.y, r0.x -mad.f32 r0.y, c8.w, r3.y, r0.w -mul.f r0.w, c7.z, r3.x -mul.f r1.x, c7.y, r3.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c9.w, r3.z, r0.y +mad.f32 r0.y, c8.y, r4.y, r0.w mad.f32 r0.x, (neg)c4.z, r0.z, r0.x -mad.f32 r0.z, c8.z, r3.y, r0.w -mad.f32 r0.w, c8.y, r3.y, r1.x -mul.f r1.w, c7.x, r3.x +mad.f32 r0.y, c9.y, r4.z, r0.y +mul.f r0.z, c7.x, r4.x +mul.f r0.w, c7.w, r4.x max.f r0.x, c12.x, r0.x -add.f r0.y, r0.y, c10.w -mad.f32 r0.z, c9.z, r3.z, r0.z -mad.f32 r0.w, c9.y, r3.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.z, c10.z -add.f r0.w, r0.w, c10.y -mul.f r1.x, r0.x, c5.z -mul.f r1.y, r0.x, c5.y -mul.f r0.x, r0.x, c5.x -mov.f32f32 r2.w, r0.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x -mov.f32f32 r0.x, r0.z -mul.f r0.y, r0.w, c11.y -mad.f32 r0.z, c8.x, r3.y, r1.w -mul.f r0.w, c0.w, r3.x -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r0.x, c9.x, r3.z, r0.z -mad.f32 r0.y, c1.w, r3.y, r0.w -mul.f r0.z, c0.z, r3.x -mul.f r0.w, c0.y, r3.x -add.f r0.x, r0.x, c10.x -mad.f32 r0.y, c2.w, r3.z, r0.y -mad.f32 r0.z, c1.z, r3.y, r0.z -mad.f32 r0.w, c1.y, r3.y, r0.w -mul.f r0.x, r0.x, c11.x -add.f r0.y, r0.y, c3.w -mad.f32 r0.z, c2.z, r3.z, r0.z -mad.f32 r1.w, c2.y, r3.z, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.w, r0.y -add.f r0.x, r0.z, c3.z -add.f r0.y, r1.w, c3.y -mul.f r1.w, c0.x, r3.x -mov.f32f32 r3.w, r4.y -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c1.x, r3.y, r1.w -mov.f32f32 r3.w, r3.w -mad.f32 r0.x, c2.x, r3.z, r0.x -mov.f32f32 r1.w, r4.x -mad.f32 r3.y, c6.x, r3.z, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -add.f r0.x, r0.x, c3.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, c10.y +mad.f32 r0.z, c8.x, r4.y, r0.z +mad.f32 r0.w, c8.w, r4.y, r0.w +mov.f32f32 r1.y, r0.x +mul.f r2.y, r0.y, c11.y +mad.f32 r0.y, c9.x, r4.z, r0.z +mul.f r1.x, r0.x, c5.x +mul.f r1.z, r1.y, c5.z +mul.f r1.y, r1.y, c5.y +add.f r0.x, r0.y, c10.x +mad.f32 r0.y, c9.w, r4.z, r0.w +mul.f r0.z, c7.z, r4.x +mul.f r0.w, c0.w, r4.x +mul.f r2.x, r0.x, c11.x +add.f r2.w, r0.y, c10.w +mad.f32 r0.x, c8.z, r4.y, r0.z +mad.f32 r0.y, c1.w, r4.y, r0.w +mad.f32 r0.x, c9.z, r4.z, r0.x +mad.f32 r0.y, c2.w, r4.z, r0.y +mul.f r0.z, c0.z, r4.x +mul.f r1.w, c0.y, r4.x +add.f r2.z, r0.x, c10.z +add.f r0.w, r0.y, c3.w +mad.f32 r0.x, c1.z, r4.y, r0.z +mad.f32 r0.y, c1.y, r4.y, r1.w +mad.f32 r0.x, c2.z, r4.z, r0.x +mad.f32 r0.y, c2.y, r4.z, r0.y +mul.f r1.w, c0.x, r4.x +mad.f32 r3.y, c6.x, r4.z, c6.y +add.f r0.z, r0.x, c3.z +add.f r0.y, r0.y, c3.y +mad.f32 r0.x, c1.x, r4.y, r1.w +mad.f32 r3.x, c6.x, r4.x, c6.y +mad.f32 r0.x, c2.x, r4.z, r0.x mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w +(rpt1)nop +add.f r0.x, r0.x, c3.x end +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 74 instructions, 0 half, 5 full +; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 48 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-67.asm b/reference/0ad-alpine-valley/0ad-67.asm index 1d73264..f1c05da 100644 --- a/reference/0ad-alpine-valley/0ad-67.asm +++ b/reference/0ad-alpine-valley/0ad-67.asm @@ -8,215 +8,155 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c10.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c11.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c11.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c11.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 8, r1.x +floor.f r3.x, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c10.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.y, r2.y +bary.f r2.w, 9, r1.x +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c8.x +add.f r2.y, r2.y, (neg)r3.y +mov.f32f32 r3.x, r2.x +sam (f32)(xyzw)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, c13.y, (neg)r4.x +mul.f r0.z, r0.z, c8.x +mov.f32f32 r1.w, r2.y +mul.f r4.y, c10.x, r3.x +add.f r3.x, c11.y, (neg)r3.x +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c8.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c10.x, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, r2.y, c8.x -add.f r2.w, c11.y, (neg)r1.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c10.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r1.w, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r1.w -mul.f r0.y, r2.y, r0.y -sam (f32)(xyzw)r2.w, r3.x, s#0, t#0 -(sy)add.f r1.w, c13.y, (neg)r3.z -add.f r2.y, c11.x, r0.x +add.f r0.x, r0.x, (neg)r4.y +mul.f r4.y, c10.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r4.y add.f r0.x, c11.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c12.x -add.f r3.w, c11.z, r0.w -mul.f r2.y, r2.y, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r2.y -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.x -add.f r0.w, c11.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c13.y, (neg)r0.y -mov.f32f32 r6.x, r4.x -mul.f r3.w, r3.w, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c8.y -mul.f r0.y, r0.y, c10.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.w, r2.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r4.w, r4.y -bary.f r3.w, 6, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r4.x -mov.f32f32 r5.z, r0.x -add.f r0.x, r3.w, c10.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r0.w -mov.f32f32 r7.x, r2.y -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r5.x, r0.w -max.f r0.y, r0.y, c10.y -mov.f32f32 r6.z, r2.y -mov.f32f32 r5.w, r3.w -mov.f32f32 r0.x, r0.x -mul.f r0.w, r1.w, c4.z -add.f r1.w, c13.y, (neg)r3.z -sam.s (f32)(x)r3.w, r4.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r3.w -min.f r0.y, r0.y, c10.z -sam.s (f32)(x)r3.w, r6.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -add.f r4.y, c11.y, (neg)r0.z -(ss)add.f r4.z, c13.y, (neg)r0.y -add.f r4.w, c13.y, (neg)r0.y -add.f r5.x, c13.y, (neg)r0.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c7.z -mul.f r4.w, r4.w, c7.y -mul.f r5.x, r5.x, c7.x -mul.f r5.y, r2.z, r4.y -mov.f32f32 r7.y, r0.x -mul.f r0.x, r3.z, c10.z -mul.f r1.w, r1.w, c4.y -mul.f r2.y, r5.y, r2.y -add.f r1.z, r1.z, c10.z -add.f r0.x, r0.x, r0.w -mul.f r0.w, r3.z, c10.z -sam.s (f32)(x)r5.y, r6.w, s#2, t#2 -(sy)mov.f32f32 r5.y, r5.y -mul.f r4.y, r1.z, r4.y -add.f r5.z, c13.y, (neg)r3.z -mul.f r0.x, r3.y, r0.x -add.f r0.w, r0.w, r1.w -mad.f32 r1.w, r4.y, r4.x, r2.y -mul.f r2.y, r5.z, c4.x -mov.f32f32 r0.x, r0.x -bary.f r3.y, 2, r1.x -mov.f32f32 r1.w, r1.w -add.f r0.z, r0.z, c10.z -mul.f r0.w, r3.x, r0.w -mul.f r3.x, r3.z, c10.z -mov.f32f32 r2.x, r2.x -mul.f r2.z, r2.z, r0.z -mul.f r3.y, r0.x, r3.y -mov.f32f32 r0.w, r0.w -add.f r2.y, r3.x, r2.y -mad.f32 r1.w, r2.z, r5.y, r1.w -bary.f r2.z, 1, r1.x -mov.f32f32 r4.x, r2.x -mul.f r2.x, r2.w, r2.y -mov.f32f32 r1.w, r1.w -mul.f r0.z, r1.z, r0.z -mul.f r1.z, r0.w, r2.z -mov.f32f32 r2.x, r2.x -nop -mad.f32 r0.z, r0.z, r3.w, r1.w -bary.f r1.w, 0, r1.x -bary.f (ei)r1.x, 9, r1.x -mov.f32f32 r1.y, c10.z -mov.f32f32 r0.z, r0.z -mul.f r1.w, r2.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.w, r1.y -mul.f r0.z, c11.w, r0.z -mov.f32f32 r1.y, c10.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r1.x +add.f r0.z, c11.x, r0.z +mov.f32f32 r4.y, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c11.z, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c11.x, r4.y +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c13.y, (neg)r0.y +add.f r5.y, r0.z, c10.w +add.f r0.z, c11.y, (neg)r1.w +mul.f r0.x, r0.x, c10.z +add.f r0.w, c13.y, (neg)r4.x +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c8.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -mul.f r1.x, r3.y, r0.z -mul.f r1.z, r1.z, r0.z -mul.f r0.z, r1.w, r0.z +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.x, c6.z, r0.x, r1.x -mad.f32 r0.w, c6.y, r0.w, r1.z -mov.f32f32 r0.z, r0.z -sam (f32)(w)r3.x, r4.x, s#1, t#1 -(sy)cmps.f.lt r1.x, r3.w, c12.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c6.x, r2.x, r0.z -cov.u32f32 r1.x, r1.x -mul.f r0.x, r0.y, r0.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r1.x, r1.x, c10.y -add.f r0.x, r0.x, r4.z -mov.f32f32 r1.z, r3.w -add.f r0.w, r0.w, r4.w -mul.f r0.y, r0.y, r0.z -nop -mov.f32f32 r0.z, r1.z -(rpt2)nop -sel.b32 r0.z, r1.y, r1.x, r0.z -add.f r0.y, r0.y, r5.x -(rpt1)nop -mul.f r0.x, r0.x, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 +mul.f r0.w, r0.w, c4.z +add.f r1.w, c13.y, (neg)r4.x +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c10.y +sam.s (f32)(x)r4.y, r5.z, s#2, t#2 +mul.f r4.z, r4.x, c10.z +mul.f r1.w, r1.w, c4.y +(sy)mul.f r0.y, r0.y, r7.x +add.f r2.x, r2.x, c10.z +min.f r0.x, r0.x, c10.z +add.f r0.w, r4.z, r0.w +mul.f r4.z, r4.x, c10.z +mul.f r0.z, r2.x, r0.z +(ss)add.f r4.w, c13.y, (neg)r0.x +add.f r5.x, c13.y, (neg)r0.x +add.f r5.y, c13.y, (neg)r0.x +mad.f32 r0.y, r0.z, r7.y, r0.y +add.f r0.z, r2.y, c10.z +mul.f r2.y, r4.w, c7.z +mul.f r4.w, r5.x, c7.y +mul.f r5.x, r5.y, c7.x +mul.f r3.x, r3.x, r0.z +add.f r1.w, r4.z, r1.w +mul.f r1.z, r1.z, c4.x +mul.f r0.w, r3.w, r0.w +mad.f32 r0.y, r3.x, r4.y, r0.y +mul.f r0.z, r2.x, r0.z +mul.f r2.x, r4.x, c10.z +mov.f32f32 r3.x, r0.w +bary.f r3.w, 2, r1.x +mad.f32 r0.y, r0.z, r7.z, r0.y +mul.f r0.z, r3.z, r1.w +add.f r1.z, r2.x, r1.z +mul.f r1.w, r3.x, r3.w +mul.f r0.y, c11.w, r0.y +mov.f32f32 r2.x, r0.z +mul.f r1.z, r3.y, r1.z +bary.f r3.x, 1, r1.x +mov.f32f32 r3.y, r0.y +sam (f32)(w)r3.z, r2.z, s#1, t#1 +(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y +mov.f32f32 r3.z, r1.z +mul.f r2.x, r2.x, r3.x +mul.f r1.w, r1.w, r3.y +bary.f (ei)r1.x, 0, r1.x +mad.f32 r0.w, c6.z, r0.w, r1.w +mul.f r1.y, r2.x, r3.y +cov.u32f32 r1.w, r2.z +mov.f32f32 r2.w, c10.z +mul.f r0.w, r0.x, r0.w +mad.f32 r0.z, c6.y, r0.z, r1.y +mul.f r1.x, r3.z, r1.x +cmps.f.ne r1.y, r1.w, c10.y +add.f r0.w, r0.w, r2.y +mov.f32f32 r1.w, c10.y +mul.f r0.z, r0.x, r0.z +mul.f r0.y, r1.x, r0.y nop -mul.f r0.x, r0.x, c5.z -mul.f r0.z, r0.w, c5.y -mul.f r0.y, r0.y, c5.x +sel.b32 r1.x, r1.w, r1.y, r4.y +add.f r0.z, r0.z, r4.w +mad.f32 r0.y, c6.x, r1.z, r0.y nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +mul.f r0.w, r0.w, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.w, c5.z +mul.f r2.y, r0.z, c5.y +mul.f r0.x, r0.x, r0.y +(rpt2)nop +add.f r0.x, r0.x, r5.x +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.y -end nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 212 instructions, 0 half, 8 full +; FRAG: 149 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-68.asm b/reference/0ad-alpine-valley/0ad-68.asm index 035c379..5b14079 100644 --- a/reference/0ad-alpine-valley/0ad-68.asm +++ b/reference/0ad-alpine-valley/0ad-68.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,79 +24,57 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r0.x, (neg)c4.x -mul.f r0.w, c7.w, r3.x +mul.f r0.w, c7.y, r4.x mad.f32 r0.x, (neg)c4.y, r0.y, r0.x -mad.f32 r0.y, c8.w, r3.y, r0.w -mul.f r0.w, c7.z, r3.x -mul.f r1.x, c7.y, r3.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c9.w, r3.z, r0.y +mad.f32 r0.y, c8.y, r4.y, r0.w mad.f32 r0.x, (neg)c4.z, r0.z, r0.x -mad.f32 r0.z, c8.z, r3.y, r0.w -mad.f32 r0.w, c8.y, r3.y, r1.x -mul.f r1.w, c7.x, r3.x +mad.f32 r0.y, c9.y, r4.z, r0.y +mul.f r0.z, c7.x, r4.x +mul.f r0.w, c7.w, r4.x max.f r0.x, c12.x, r0.x -add.f r0.y, r0.y, c10.w -mad.f32 r0.z, c9.z, r3.z, r0.z -mad.f32 r0.w, c9.y, r3.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.z, c10.z -add.f r0.w, r0.w, c10.y -mul.f r1.x, r0.x, c5.z -mul.f r1.y, r0.x, c5.y -mul.f r0.x, r0.x, c5.x -mov.f32f32 r2.w, r0.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x -mov.f32f32 r0.x, r0.z -mul.f r0.y, r0.w, c11.y -mad.f32 r0.z, c8.x, r3.y, r1.w -mul.f r0.w, c0.w, r3.x -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r0.x, c9.x, r3.z, r0.z -mad.f32 r0.y, c1.w, r3.y, r0.w -mul.f r0.z, c0.z, r3.x -mul.f r0.w, c0.y, r3.x -add.f r0.x, r0.x, c10.x -mad.f32 r0.y, c2.w, r3.z, r0.y -mad.f32 r0.z, c1.z, r3.y, r0.z -mad.f32 r0.w, c1.y, r3.y, r0.w -mul.f r0.x, r0.x, c11.x -add.f r0.y, r0.y, c3.w -mad.f32 r0.z, c2.z, r3.z, r0.z -mad.f32 r1.w, c2.y, r3.z, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.w, r0.y -add.f r0.x, r0.z, c3.z -add.f r0.y, r1.w, c3.y -mul.f r1.w, c0.x, r3.x -mov.f32f32 r3.w, r4.y -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c1.x, r3.y, r1.w -mov.f32f32 r3.w, r3.w -mad.f32 r0.x, c2.x, r3.z, r0.x -mov.f32f32 r1.w, r4.x -mad.f32 r3.y, c6.x, r3.z, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -add.f r0.x, r0.x, c3.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, c10.y +mad.f32 r0.z, c8.x, r4.y, r0.z +mad.f32 r0.w, c8.w, r4.y, r0.w +mov.f32f32 r1.y, r0.x +mul.f r2.y, r0.y, c11.y +mad.f32 r0.y, c9.x, r4.z, r0.z +mul.f r1.x, r0.x, c5.x +mul.f r1.z, r1.y, c5.z +mul.f r1.y, r1.y, c5.y +add.f r0.x, r0.y, c10.x +mad.f32 r0.y, c9.w, r4.z, r0.w +mul.f r0.z, c7.z, r4.x +mul.f r0.w, c0.w, r4.x +mul.f r2.x, r0.x, c11.x +add.f r2.w, r0.y, c10.w +mad.f32 r0.x, c8.z, r4.y, r0.z +mad.f32 r0.y, c1.w, r4.y, r0.w +mad.f32 r0.x, c9.z, r4.z, r0.x +mad.f32 r0.y, c2.w, r4.z, r0.y +mul.f r0.z, c0.z, r4.x +mul.f r1.w, c0.y, r4.x +add.f r2.z, r0.x, c10.z +add.f r0.w, r0.y, c3.w +mad.f32 r0.x, c1.z, r4.y, r0.z +mad.f32 r0.y, c1.y, r4.y, r1.w +mad.f32 r0.x, c2.z, r4.z, r0.x +mad.f32 r0.y, c2.y, r4.z, r0.y +mul.f r1.w, c0.x, r4.x +mad.f32 r3.y, c6.x, r4.z, c6.y +add.f r0.z, r0.x, c3.z +add.f r0.y, r0.y, c3.y +mad.f32 r0.x, c1.x, r4.y, r1.w +mad.f32 r3.x, c6.x, r4.x, c6.y +mad.f32 r0.x, c2.x, r4.z, r0.x mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w +(rpt1)nop +add.f r0.x, r0.x, c3.x end +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 74 instructions, 0 half, 5 full +; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 48 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-69.asm b/reference/0ad-alpine-valley/0ad-69.asm index 035c379..5b14079 100644 --- a/reference/0ad-alpine-valley/0ad-69.asm +++ b/reference/0ad-alpine-valley/0ad-69.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,79 +24,57 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r0.x, (neg)c4.x -mul.f r0.w, c7.w, r3.x +mul.f r0.w, c7.y, r4.x mad.f32 r0.x, (neg)c4.y, r0.y, r0.x -mad.f32 r0.y, c8.w, r3.y, r0.w -mul.f r0.w, c7.z, r3.x -mul.f r1.x, c7.y, r3.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c9.w, r3.z, r0.y +mad.f32 r0.y, c8.y, r4.y, r0.w mad.f32 r0.x, (neg)c4.z, r0.z, r0.x -mad.f32 r0.z, c8.z, r3.y, r0.w -mad.f32 r0.w, c8.y, r3.y, r1.x -mul.f r1.w, c7.x, r3.x +mad.f32 r0.y, c9.y, r4.z, r0.y +mul.f r0.z, c7.x, r4.x +mul.f r0.w, c7.w, r4.x max.f r0.x, c12.x, r0.x -add.f r0.y, r0.y, c10.w -mad.f32 r0.z, c9.z, r3.z, r0.z -mad.f32 r0.w, c9.y, r3.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.z, c10.z -add.f r0.w, r0.w, c10.y -mul.f r1.x, r0.x, c5.z -mul.f r1.y, r0.x, c5.y -mul.f r0.x, r0.x, c5.x -mov.f32f32 r2.w, r0.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x -mov.f32f32 r0.x, r0.z -mul.f r0.y, r0.w, c11.y -mad.f32 r0.z, c8.x, r3.y, r1.w -mul.f r0.w, c0.w, r3.x -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r0.x, c9.x, r3.z, r0.z -mad.f32 r0.y, c1.w, r3.y, r0.w -mul.f r0.z, c0.z, r3.x -mul.f r0.w, c0.y, r3.x -add.f r0.x, r0.x, c10.x -mad.f32 r0.y, c2.w, r3.z, r0.y -mad.f32 r0.z, c1.z, r3.y, r0.z -mad.f32 r0.w, c1.y, r3.y, r0.w -mul.f r0.x, r0.x, c11.x -add.f r0.y, r0.y, c3.w -mad.f32 r0.z, c2.z, r3.z, r0.z -mad.f32 r1.w, c2.y, r3.z, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.w, r0.y -add.f r0.x, r0.z, c3.z -add.f r0.y, r1.w, c3.y -mul.f r1.w, c0.x, r3.x -mov.f32f32 r3.w, r4.y -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c1.x, r3.y, r1.w -mov.f32f32 r3.w, r3.w -mad.f32 r0.x, c2.x, r3.z, r0.x -mov.f32f32 r1.w, r4.x -mad.f32 r3.y, c6.x, r3.z, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -add.f r0.x, r0.x, c3.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, c10.y +mad.f32 r0.z, c8.x, r4.y, r0.z +mad.f32 r0.w, c8.w, r4.y, r0.w +mov.f32f32 r1.y, r0.x +mul.f r2.y, r0.y, c11.y +mad.f32 r0.y, c9.x, r4.z, r0.z +mul.f r1.x, r0.x, c5.x +mul.f r1.z, r1.y, c5.z +mul.f r1.y, r1.y, c5.y +add.f r0.x, r0.y, c10.x +mad.f32 r0.y, c9.w, r4.z, r0.w +mul.f r0.z, c7.z, r4.x +mul.f r0.w, c0.w, r4.x +mul.f r2.x, r0.x, c11.x +add.f r2.w, r0.y, c10.w +mad.f32 r0.x, c8.z, r4.y, r0.z +mad.f32 r0.y, c1.w, r4.y, r0.w +mad.f32 r0.x, c9.z, r4.z, r0.x +mad.f32 r0.y, c2.w, r4.z, r0.y +mul.f r0.z, c0.z, r4.x +mul.f r1.w, c0.y, r4.x +add.f r2.z, r0.x, c10.z +add.f r0.w, r0.y, c3.w +mad.f32 r0.x, c1.z, r4.y, r0.z +mad.f32 r0.y, c1.y, r4.y, r1.w +mad.f32 r0.x, c2.z, r4.z, r0.x +mad.f32 r0.y, c2.y, r4.z, r0.y +mul.f r1.w, c0.x, r4.x +mad.f32 r3.y, c6.x, r4.z, c6.y +add.f r0.z, r0.x, c3.z +add.f r0.y, r0.y, c3.y +mad.f32 r0.x, c1.x, r4.y, r1.w +mad.f32 r3.x, c6.x, r4.x, c6.y +mad.f32 r0.x, c2.x, r4.z, r0.x mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w +(rpt1)nop +add.f r0.x, r0.x, c3.x end +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 74 instructions, 0 half, 5 full +; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 48 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-70.asm b/reference/0ad-alpine-valley/0ad-70.asm index 4377ecf..227a081 100644 --- a/reference/0ad-alpine-valley/0ad-70.asm +++ b/reference/0ad-alpine-valley/0ad-70.asm @@ -8,199 +8,135 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 8, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 10, r1.x -bary.f r2.y, 6, r1.x -add.f r2.z, r0.w, c10.x -floor.f r2.w, r1.w +add.f r2.x, r0.x, c10.x +bary.f r1.w, 9, r1.x +add.f r2.y, r0.w, c10.x +bary.f r2.z, 6, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y -mov.f32f32 r1.z, r1.z -floor.f r3.x, r2.z -add.f r1.w, r1.w, (neg)r2.w +floor.f r3.x, r2.y +add.f r3.w, r2.z, c9.w +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -mov.f32f32 r3.y, r1.z -add.f r0.z, r2.z, (neg)r3.x -mov.f32f32 r1.z, r1.w +absneg.f r0.z, (neg)c7.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c9.z +mul.f r0.z, r0.z, c7.x +sam (f32)(w)r4.x, r1.z, s#1, t#1 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c9.x, r2.z +add.f r2.z, c10.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r1.w, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -add.f r2.w, c10.y, (neg)r1.z -mul.f r1.w, r1.w, c7.x -bary.f r3.x, 9, r1.x -mov.f32f32 r2.z, r2.z -mul.f r3.z, c9.x, r0.z -mul.f r1.w, r1.w, r0.y -mov.f32f32 r2.w, r2.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r1.w, r1.w -add.f r3.z, c10.y, (neg)r0.z -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r2.z -mul.f r0.y, r1.w, r0.y -mov.f32f32 r1.w, r3.z -add.f r2.z, c10.x, r0.x +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c9.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c10.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c11.x -add.f r3.z, c10.z, r0.w -mul.f r2.z, r2.z, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r2.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -add.f r0.w, c10.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r4.x -mov.f32f32 r5.x, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c12.y, (neg)r0.y -mov.f32f32 r5.w, r3.w -mul.f r3.z, r3.z, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c7.y -mul.f r0.y, r0.y, c9.z -mov.f32f32 r3.w, r3.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.z, r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r4.z, r4.x -add.f r2.y, r2.y, c9.w -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r3.w -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r4.w, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r2.y -mov.f32f32 r6.w, r2.z -mov.f32f32 r6.y, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r5.z, r0.y -sam.s (f32)(x)r3.z, r4.y, s#2, t#2 -(sy)mov.f32f32 r0.y, r3.z +add.f r0.z, c10.x, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c3.z +add.f r0.x, c10.z, r0.w +mul.f r4.x, r0.z, c3.z +add.f r0.z, c10.x, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c3.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c12.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#2, t#2 +add.f r0.z, c10.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#2, t#2 +mul.f r0.x, r0.x, c9.z +add.f r0.w, r2.y, c9.z +mul.f r0.y, r0.y, c7.y +(ss)nop +sam.s (f32)(x)r5.x, r5.w, s#2, t#2 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#2, t#2 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c11.y +bary.f r2.x, 10, r1.x +mul.f r0.y, r0.y, r5.x max.f r0.x, r0.x, c9.y -mov.f32f32 r7.x, r0.w -mul.f r0.w, r2.w, r1.w -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y min.f r0.x, r0.x, c9.z -sam.s (f32)(x)r3.z, r5.w, s#2, t#2 -nop -(sy)mov.f32f32 r2.y, r3.z -mul.f r0.y, r0.w, r0.y -sam.s (f32)(x)r3.z, r5.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r3.z -add.f r1.z, r1.z, c9.z -add.f r2.z, c12.y, (neg)r0.x -add.f r3.z, c12.y, (neg)r0.x -add.f r3.w, c12.y, (neg)r0.x -mul.f r1.w, r1.z, r1.w -mul.f r2.z, r2.z, c6.z -mul.f r4.x, r3.z, c6.y -mul.f r3.w, r3.w, c6.x -mad.f32 r0.y, r1.w, r0.w, r0.y -(ss)nop -sam.s (f32)(x)r4.y, r6.z, s#2, t#2 -add.f r0.z, r0.z, c9.z -(sy)mov.f32f32 r0.w, r4.y -mov.f32f32 r1.w, r3.x -mov.f32f32 r0.y, r0.y -mul.f r2.w, r2.w, r0.z -mul.f r0.z, r1.z, r0.z -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.z, r2.x -mad.f32 r0.y, r2.w, r0.w, r0.y -bary.f r0.w, 11, r1.x -mov.f32f32 r1.w, c9.z -bary.f r2.x, 2, r1.x -mov.f32f32 r0.y, r0.y -sam (f32)(w)r2.w, r3.y, s#1, t#1 -(sy)cmps.f.lt r2.w, r3.z, c11.y -mad.f32 r0.y, r0.z, r2.y, r0.y -mov.f32f32 r0.z, r3.z -mov.f32f32 r3.x, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -cov.u32f32 r1.z, r2.w -mov.f32f32 r0.z, r0.z -(ss)mov.f32f32 r3.y, r0.w +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt1)nop mul.f r0.y, c10.w, r0.y -cmps.f.ne r0.w, r1.z, c9.y +bary.f r2.y, 11, r1.x +add.f r0.w, c12.y, (neg)r0.x +add.f r1.z, c12.y, (neg)r0.x +mov.f32f32 r1.w, r0.y +add.f r2.z, c12.y, (neg)r0.x (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, c9.y -sam (f32)(xyz)r4.y, r3.x, s#0, t#0 -(sy)mul.f r2.x, r4.w, r2.x +sam (f32)(xyz)r2.w, r2.x, s#0, t#0 +(ss)bary.f r2.x, 2, r1.x bary.f r2.y, 1, r1.x bary.f (ei)r1.x, 0, r1.x -sel.b32 r0.z, r1.z, r0.w, r0.z -mul.f r0.w, r2.x, r0.y -mul.f r1.y, r4.z, r2.y -mul.f r1.x, r4.y, r1.x -mov.f32f32 r2.w, r1.w -mov.f32f32 r0.w, r0.w -mul.f r1.y, r1.y, r0.y -mad.f32 r0.w, c5.z, r4.w, r0.w +mul.f r0.w, r0.w, c6.z +(sy)mul.f r1.y, r3.y, r2.x +mul.f r2.x, r3.x, r2.y +mul.f r1.x, r2.w, r1.x +mul.f r1.z, r1.z, c6.y +mul.f r1.y, r1.y, r1.w +mul.f r1.w, r2.x, r1.w +mad.f32 r1.y, c5.z, r3.y, r1.y +mad.f32 r1.w, c5.y, r3.x, r1.w mul.f r0.y, r1.x, r0.y -(rpt1)nop -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, c5.y, r4.z, r1.x -mul.f r0.w, r0.x, r0.w -mad.f32 r0.y, c5.x, r4.y, r0.y -(rpt1)nop -add.f r0.w, r0.w, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -nop -mul.f r0.w, r0.w, r0.z -mul.f r1.x, r0.x, r1.x +mul.f r1.x, r2.z, c6.x +mul.f r1.y, r0.x, r1.y +mul.f r1.w, r0.x, r1.w +mad.f32 r0.y, c5.x, r2.w, r0.y +mov.f32f32 r2.x, c9.y +add.f r0.w, r1.y, r0.w +add.f r1.y, r1.w, r1.z mul.f r0.x, r0.x, r0.y -nop -mul.f r0.y, r0.w, c4.z -add.f r0.w, r1.x, r4.x -add.f r0.x, r0.x, r3.w -nop -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r0.z -mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mul.f r0.z, r0.w, c4.y -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x +sel.b32 r0.y, r2.x, r0.z, r4.w +mov.f32f32 r2.w, c9.z (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x +mul.f r0.z, r0.w, r0.y +mul.f r0.w, r1.y, r0.y (rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +mul.f r2.z, r0.z, c4.z +mul.f r2.y, r0.w, c4.y +add.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r0.x, r0.x, r0.y +(rpt2)nop +mul.f r2.x, r0.x, c4.x end nop nop +nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1) -; FRAG: 195 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.z (5:11,cm=f,il=16,b=1) +; FRAG: 129 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-71.asm b/reference/0ad-alpine-valley/0ad-71.asm index 035c379..5b14079 100644 --- a/reference/0ad-alpine-valley/0ad-71.asm +++ b/reference/0ad-alpine-valley/0ad-71.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,79 +24,57 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r0.x, (neg)c4.x -mul.f r0.w, c7.w, r3.x +mul.f r0.w, c7.y, r4.x mad.f32 r0.x, (neg)c4.y, r0.y, r0.x -mad.f32 r0.y, c8.w, r3.y, r0.w -mul.f r0.w, c7.z, r3.x -mul.f r1.x, c7.y, r3.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c9.w, r3.z, r0.y +mad.f32 r0.y, c8.y, r4.y, r0.w mad.f32 r0.x, (neg)c4.z, r0.z, r0.x -mad.f32 r0.z, c8.z, r3.y, r0.w -mad.f32 r0.w, c8.y, r3.y, r1.x -mul.f r1.w, c7.x, r3.x +mad.f32 r0.y, c9.y, r4.z, r0.y +mul.f r0.z, c7.x, r4.x +mul.f r0.w, c7.w, r4.x max.f r0.x, c12.x, r0.x -add.f r0.y, r0.y, c10.w -mad.f32 r0.z, c9.z, r3.z, r0.z -mad.f32 r0.w, c9.y, r3.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.z, c10.z -add.f r0.w, r0.w, c10.y -mul.f r1.x, r0.x, c5.z -mul.f r1.y, r0.x, c5.y -mul.f r0.x, r0.x, c5.x -mov.f32f32 r2.w, r0.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x -mov.f32f32 r0.x, r0.z -mul.f r0.y, r0.w, c11.y -mad.f32 r0.z, c8.x, r3.y, r1.w -mul.f r0.w, c0.w, r3.x -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r0.x, c9.x, r3.z, r0.z -mad.f32 r0.y, c1.w, r3.y, r0.w -mul.f r0.z, c0.z, r3.x -mul.f r0.w, c0.y, r3.x -add.f r0.x, r0.x, c10.x -mad.f32 r0.y, c2.w, r3.z, r0.y -mad.f32 r0.z, c1.z, r3.y, r0.z -mad.f32 r0.w, c1.y, r3.y, r0.w -mul.f r0.x, r0.x, c11.x -add.f r0.y, r0.y, c3.w -mad.f32 r0.z, c2.z, r3.z, r0.z -mad.f32 r1.w, c2.y, r3.z, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.w, r0.y -add.f r0.x, r0.z, c3.z -add.f r0.y, r1.w, c3.y -mul.f r1.w, c0.x, r3.x -mov.f32f32 r3.w, r4.y -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c1.x, r3.y, r1.w -mov.f32f32 r3.w, r3.w -mad.f32 r0.x, c2.x, r3.z, r0.x -mov.f32f32 r1.w, r4.x -mad.f32 r3.y, c6.x, r3.z, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -add.f r0.x, r0.x, c3.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, c10.y +mad.f32 r0.z, c8.x, r4.y, r0.z +mad.f32 r0.w, c8.w, r4.y, r0.w +mov.f32f32 r1.y, r0.x +mul.f r2.y, r0.y, c11.y +mad.f32 r0.y, c9.x, r4.z, r0.z +mul.f r1.x, r0.x, c5.x +mul.f r1.z, r1.y, c5.z +mul.f r1.y, r1.y, c5.y +add.f r0.x, r0.y, c10.x +mad.f32 r0.y, c9.w, r4.z, r0.w +mul.f r0.z, c7.z, r4.x +mul.f r0.w, c0.w, r4.x +mul.f r2.x, r0.x, c11.x +add.f r2.w, r0.y, c10.w +mad.f32 r0.x, c8.z, r4.y, r0.z +mad.f32 r0.y, c1.w, r4.y, r0.w +mad.f32 r0.x, c9.z, r4.z, r0.x +mad.f32 r0.y, c2.w, r4.z, r0.y +mul.f r0.z, c0.z, r4.x +mul.f r1.w, c0.y, r4.x +add.f r2.z, r0.x, c10.z +add.f r0.w, r0.y, c3.w +mad.f32 r0.x, c1.z, r4.y, r0.z +mad.f32 r0.y, c1.y, r4.y, r1.w +mad.f32 r0.x, c2.z, r4.z, r0.x +mad.f32 r0.y, c2.y, r4.z, r0.y +mul.f r1.w, c0.x, r4.x +mad.f32 r3.y, c6.x, r4.z, c6.y +add.f r0.z, r0.x, c3.z +add.f r0.y, r0.y, c3.y +mad.f32 r0.x, c1.x, r4.y, r1.w +mad.f32 r3.x, c6.x, r4.x, c6.y +mad.f32 r0.x, c2.x, r4.z, r0.x mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w +(rpt1)nop +add.f r0.x, r0.x, c3.x end +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 74 instructions, 0 half, 5 full +; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 48 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-72.asm b/reference/0ad-alpine-valley/0ad-72.asm index 035c379..5b14079 100644 --- a/reference/0ad-alpine-valley/0ad-72.asm +++ b/reference/0ad-alpine-valley/0ad-72.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,79 +24,57 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r0.x, (neg)c4.x -mul.f r0.w, c7.w, r3.x +mul.f r0.w, c7.y, r4.x mad.f32 r0.x, (neg)c4.y, r0.y, r0.x -mad.f32 r0.y, c8.w, r3.y, r0.w -mul.f r0.w, c7.z, r3.x -mul.f r1.x, c7.y, r3.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c9.w, r3.z, r0.y +mad.f32 r0.y, c8.y, r4.y, r0.w mad.f32 r0.x, (neg)c4.z, r0.z, r0.x -mad.f32 r0.z, c8.z, r3.y, r0.w -mad.f32 r0.w, c8.y, r3.y, r1.x -mul.f r1.w, c7.x, r3.x +mad.f32 r0.y, c9.y, r4.z, r0.y +mul.f r0.z, c7.x, r4.x +mul.f r0.w, c7.w, r4.x max.f r0.x, c12.x, r0.x -add.f r0.y, r0.y, c10.w -mad.f32 r0.z, c9.z, r3.z, r0.z -mad.f32 r0.w, c9.y, r3.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.z, c10.z -add.f r0.w, r0.w, c10.y -mul.f r1.x, r0.x, c5.z -mul.f r1.y, r0.x, c5.y -mul.f r0.x, r0.x, c5.x -mov.f32f32 r2.w, r0.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x -mov.f32f32 r0.x, r0.z -mul.f r0.y, r0.w, c11.y -mad.f32 r0.z, c8.x, r3.y, r1.w -mul.f r0.w, c0.w, r3.x -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r0.x, c9.x, r3.z, r0.z -mad.f32 r0.y, c1.w, r3.y, r0.w -mul.f r0.z, c0.z, r3.x -mul.f r0.w, c0.y, r3.x -add.f r0.x, r0.x, c10.x -mad.f32 r0.y, c2.w, r3.z, r0.y -mad.f32 r0.z, c1.z, r3.y, r0.z -mad.f32 r0.w, c1.y, r3.y, r0.w -mul.f r0.x, r0.x, c11.x -add.f r0.y, r0.y, c3.w -mad.f32 r0.z, c2.z, r3.z, r0.z -mad.f32 r1.w, c2.y, r3.z, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.w, r0.y -add.f r0.x, r0.z, c3.z -add.f r0.y, r1.w, c3.y -mul.f r1.w, c0.x, r3.x -mov.f32f32 r3.w, r4.y -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c1.x, r3.y, r1.w -mov.f32f32 r3.w, r3.w -mad.f32 r0.x, c2.x, r3.z, r0.x -mov.f32f32 r1.w, r4.x -mad.f32 r3.y, c6.x, r3.z, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -add.f r0.x, r0.x, c3.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, c10.y +mad.f32 r0.z, c8.x, r4.y, r0.z +mad.f32 r0.w, c8.w, r4.y, r0.w +mov.f32f32 r1.y, r0.x +mul.f r2.y, r0.y, c11.y +mad.f32 r0.y, c9.x, r4.z, r0.z +mul.f r1.x, r0.x, c5.x +mul.f r1.z, r1.y, c5.z +mul.f r1.y, r1.y, c5.y +add.f r0.x, r0.y, c10.x +mad.f32 r0.y, c9.w, r4.z, r0.w +mul.f r0.z, c7.z, r4.x +mul.f r0.w, c0.w, r4.x +mul.f r2.x, r0.x, c11.x +add.f r2.w, r0.y, c10.w +mad.f32 r0.x, c8.z, r4.y, r0.z +mad.f32 r0.y, c1.w, r4.y, r0.w +mad.f32 r0.x, c9.z, r4.z, r0.x +mad.f32 r0.y, c2.w, r4.z, r0.y +mul.f r0.z, c0.z, r4.x +mul.f r1.w, c0.y, r4.x +add.f r2.z, r0.x, c10.z +add.f r0.w, r0.y, c3.w +mad.f32 r0.x, c1.z, r4.y, r0.z +mad.f32 r0.y, c1.y, r4.y, r1.w +mad.f32 r0.x, c2.z, r4.z, r0.x +mad.f32 r0.y, c2.y, r4.z, r0.y +mul.f r1.w, c0.x, r4.x +mad.f32 r3.y, c6.x, r4.z, c6.y +add.f r0.z, r0.x, c3.z +add.f r0.y, r0.y, c3.y +mad.f32 r0.x, c1.x, r4.y, r1.w +mad.f32 r3.x, c6.x, r4.x, c6.y +mad.f32 r0.x, c2.x, r4.z, r0.x mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w +(rpt1)nop +add.f r0.x, r0.x, c3.x end +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 74 instructions, 0 half, 5 full +; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 48 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-73.asm b/reference/0ad-alpine-valley/0ad-73.asm index 1d73264..f1c05da 100644 --- a/reference/0ad-alpine-valley/0ad-73.asm +++ b/reference/0ad-alpine-valley/0ad-73.asm @@ -8,215 +8,155 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c10.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c11.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c11.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c11.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 8, r1.x +floor.f r3.x, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c10.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.y, r2.y +bary.f r2.w, 9, r1.x +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c8.x +add.f r2.y, r2.y, (neg)r3.y +mov.f32f32 r3.x, r2.x +sam (f32)(xyzw)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, c13.y, (neg)r4.x +mul.f r0.z, r0.z, c8.x +mov.f32f32 r1.w, r2.y +mul.f r4.y, c10.x, r3.x +add.f r3.x, c11.y, (neg)r3.x +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c8.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c10.x, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, r2.y, c8.x -add.f r2.w, c11.y, (neg)r1.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c10.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r1.w, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r1.w -mul.f r0.y, r2.y, r0.y -sam (f32)(xyzw)r2.w, r3.x, s#0, t#0 -(sy)add.f r1.w, c13.y, (neg)r3.z -add.f r2.y, c11.x, r0.x +add.f r0.x, r0.x, (neg)r4.y +mul.f r4.y, c10.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r4.y add.f r0.x, c11.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c12.x -add.f r3.w, c11.z, r0.w -mul.f r2.y, r2.y, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r2.y -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.x -add.f r0.w, c11.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c13.y, (neg)r0.y -mov.f32f32 r6.x, r4.x -mul.f r3.w, r3.w, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c8.y -mul.f r0.y, r0.y, c10.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.w, r2.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r4.w, r4.y -bary.f r3.w, 6, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r4.x -mov.f32f32 r5.z, r0.x -add.f r0.x, r3.w, c10.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r0.w -mov.f32f32 r7.x, r2.y -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r5.x, r0.w -max.f r0.y, r0.y, c10.y -mov.f32f32 r6.z, r2.y -mov.f32f32 r5.w, r3.w -mov.f32f32 r0.x, r0.x -mul.f r0.w, r1.w, c4.z -add.f r1.w, c13.y, (neg)r3.z -sam.s (f32)(x)r3.w, r4.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r3.w -min.f r0.y, r0.y, c10.z -sam.s (f32)(x)r3.w, r6.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -add.f r4.y, c11.y, (neg)r0.z -(ss)add.f r4.z, c13.y, (neg)r0.y -add.f r4.w, c13.y, (neg)r0.y -add.f r5.x, c13.y, (neg)r0.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c7.z -mul.f r4.w, r4.w, c7.y -mul.f r5.x, r5.x, c7.x -mul.f r5.y, r2.z, r4.y -mov.f32f32 r7.y, r0.x -mul.f r0.x, r3.z, c10.z -mul.f r1.w, r1.w, c4.y -mul.f r2.y, r5.y, r2.y -add.f r1.z, r1.z, c10.z -add.f r0.x, r0.x, r0.w -mul.f r0.w, r3.z, c10.z -sam.s (f32)(x)r5.y, r6.w, s#2, t#2 -(sy)mov.f32f32 r5.y, r5.y -mul.f r4.y, r1.z, r4.y -add.f r5.z, c13.y, (neg)r3.z -mul.f r0.x, r3.y, r0.x -add.f r0.w, r0.w, r1.w -mad.f32 r1.w, r4.y, r4.x, r2.y -mul.f r2.y, r5.z, c4.x -mov.f32f32 r0.x, r0.x -bary.f r3.y, 2, r1.x -mov.f32f32 r1.w, r1.w -add.f r0.z, r0.z, c10.z -mul.f r0.w, r3.x, r0.w -mul.f r3.x, r3.z, c10.z -mov.f32f32 r2.x, r2.x -mul.f r2.z, r2.z, r0.z -mul.f r3.y, r0.x, r3.y -mov.f32f32 r0.w, r0.w -add.f r2.y, r3.x, r2.y -mad.f32 r1.w, r2.z, r5.y, r1.w -bary.f r2.z, 1, r1.x -mov.f32f32 r4.x, r2.x -mul.f r2.x, r2.w, r2.y -mov.f32f32 r1.w, r1.w -mul.f r0.z, r1.z, r0.z -mul.f r1.z, r0.w, r2.z -mov.f32f32 r2.x, r2.x -nop -mad.f32 r0.z, r0.z, r3.w, r1.w -bary.f r1.w, 0, r1.x -bary.f (ei)r1.x, 9, r1.x -mov.f32f32 r1.y, c10.z -mov.f32f32 r0.z, r0.z -mul.f r1.w, r2.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.w, r1.y -mul.f r0.z, c11.w, r0.z -mov.f32f32 r1.y, c10.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r1.x +add.f r0.z, c11.x, r0.z +mov.f32f32 r4.y, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c11.z, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c11.x, r4.y +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c13.y, (neg)r0.y +add.f r5.y, r0.z, c10.w +add.f r0.z, c11.y, (neg)r1.w +mul.f r0.x, r0.x, c10.z +add.f r0.w, c13.y, (neg)r4.x +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c8.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -mul.f r1.x, r3.y, r0.z -mul.f r1.z, r1.z, r0.z -mul.f r0.z, r1.w, r0.z +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.x, c6.z, r0.x, r1.x -mad.f32 r0.w, c6.y, r0.w, r1.z -mov.f32f32 r0.z, r0.z -sam (f32)(w)r3.x, r4.x, s#1, t#1 -(sy)cmps.f.lt r1.x, r3.w, c12.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c6.x, r2.x, r0.z -cov.u32f32 r1.x, r1.x -mul.f r0.x, r0.y, r0.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r1.x, r1.x, c10.y -add.f r0.x, r0.x, r4.z -mov.f32f32 r1.z, r3.w -add.f r0.w, r0.w, r4.w -mul.f r0.y, r0.y, r0.z -nop -mov.f32f32 r0.z, r1.z -(rpt2)nop -sel.b32 r0.z, r1.y, r1.x, r0.z -add.f r0.y, r0.y, r5.x -(rpt1)nop -mul.f r0.x, r0.x, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 +mul.f r0.w, r0.w, c4.z +add.f r1.w, c13.y, (neg)r4.x +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c10.y +sam.s (f32)(x)r4.y, r5.z, s#2, t#2 +mul.f r4.z, r4.x, c10.z +mul.f r1.w, r1.w, c4.y +(sy)mul.f r0.y, r0.y, r7.x +add.f r2.x, r2.x, c10.z +min.f r0.x, r0.x, c10.z +add.f r0.w, r4.z, r0.w +mul.f r4.z, r4.x, c10.z +mul.f r0.z, r2.x, r0.z +(ss)add.f r4.w, c13.y, (neg)r0.x +add.f r5.x, c13.y, (neg)r0.x +add.f r5.y, c13.y, (neg)r0.x +mad.f32 r0.y, r0.z, r7.y, r0.y +add.f r0.z, r2.y, c10.z +mul.f r2.y, r4.w, c7.z +mul.f r4.w, r5.x, c7.y +mul.f r5.x, r5.y, c7.x +mul.f r3.x, r3.x, r0.z +add.f r1.w, r4.z, r1.w +mul.f r1.z, r1.z, c4.x +mul.f r0.w, r3.w, r0.w +mad.f32 r0.y, r3.x, r4.y, r0.y +mul.f r0.z, r2.x, r0.z +mul.f r2.x, r4.x, c10.z +mov.f32f32 r3.x, r0.w +bary.f r3.w, 2, r1.x +mad.f32 r0.y, r0.z, r7.z, r0.y +mul.f r0.z, r3.z, r1.w +add.f r1.z, r2.x, r1.z +mul.f r1.w, r3.x, r3.w +mul.f r0.y, c11.w, r0.y +mov.f32f32 r2.x, r0.z +mul.f r1.z, r3.y, r1.z +bary.f r3.x, 1, r1.x +mov.f32f32 r3.y, r0.y +sam (f32)(w)r3.z, r2.z, s#1, t#1 +(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y +mov.f32f32 r3.z, r1.z +mul.f r2.x, r2.x, r3.x +mul.f r1.w, r1.w, r3.y +bary.f (ei)r1.x, 0, r1.x +mad.f32 r0.w, c6.z, r0.w, r1.w +mul.f r1.y, r2.x, r3.y +cov.u32f32 r1.w, r2.z +mov.f32f32 r2.w, c10.z +mul.f r0.w, r0.x, r0.w +mad.f32 r0.z, c6.y, r0.z, r1.y +mul.f r1.x, r3.z, r1.x +cmps.f.ne r1.y, r1.w, c10.y +add.f r0.w, r0.w, r2.y +mov.f32f32 r1.w, c10.y +mul.f r0.z, r0.x, r0.z +mul.f r0.y, r1.x, r0.y nop -mul.f r0.x, r0.x, c5.z -mul.f r0.z, r0.w, c5.y -mul.f r0.y, r0.y, c5.x +sel.b32 r1.x, r1.w, r1.y, r4.y +add.f r0.z, r0.z, r4.w +mad.f32 r0.y, c6.x, r1.z, r0.y nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +mul.f r0.w, r0.w, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.w, c5.z +mul.f r2.y, r0.z, c5.y +mul.f r0.x, r0.x, r0.y +(rpt2)nop +add.f r0.x, r0.x, r5.x +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.y -end nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 212 instructions, 0 half, 8 full +; FRAG: 149 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-74.asm b/reference/0ad-alpine-valley/0ad-74.asm index 035c379..5b14079 100644 --- a/reference/0ad-alpine-valley/0ad-74.asm +++ b/reference/0ad-alpine-valley/0ad-74.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,79 +24,57 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r0.x, (neg)c4.x -mul.f r0.w, c7.w, r3.x +mul.f r0.w, c7.y, r4.x mad.f32 r0.x, (neg)c4.y, r0.y, r0.x -mad.f32 r0.y, c8.w, r3.y, r0.w -mul.f r0.w, c7.z, r3.x -mul.f r1.x, c7.y, r3.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c9.w, r3.z, r0.y +mad.f32 r0.y, c8.y, r4.y, r0.w mad.f32 r0.x, (neg)c4.z, r0.z, r0.x -mad.f32 r0.z, c8.z, r3.y, r0.w -mad.f32 r0.w, c8.y, r3.y, r1.x -mul.f r1.w, c7.x, r3.x +mad.f32 r0.y, c9.y, r4.z, r0.y +mul.f r0.z, c7.x, r4.x +mul.f r0.w, c7.w, r4.x max.f r0.x, c12.x, r0.x -add.f r0.y, r0.y, c10.w -mad.f32 r0.z, c9.z, r3.z, r0.z -mad.f32 r0.w, c9.y, r3.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.z, c10.z -add.f r0.w, r0.w, c10.y -mul.f r1.x, r0.x, c5.z -mul.f r1.y, r0.x, c5.y -mul.f r0.x, r0.x, c5.x -mov.f32f32 r2.w, r0.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x -mov.f32f32 r0.x, r0.z -mul.f r0.y, r0.w, c11.y -mad.f32 r0.z, c8.x, r3.y, r1.w -mul.f r0.w, c0.w, r3.x -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r0.x, c9.x, r3.z, r0.z -mad.f32 r0.y, c1.w, r3.y, r0.w -mul.f r0.z, c0.z, r3.x -mul.f r0.w, c0.y, r3.x -add.f r0.x, r0.x, c10.x -mad.f32 r0.y, c2.w, r3.z, r0.y -mad.f32 r0.z, c1.z, r3.y, r0.z -mad.f32 r0.w, c1.y, r3.y, r0.w -mul.f r0.x, r0.x, c11.x -add.f r0.y, r0.y, c3.w -mad.f32 r0.z, c2.z, r3.z, r0.z -mad.f32 r1.w, c2.y, r3.z, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.w, r0.y -add.f r0.x, r0.z, c3.z -add.f r0.y, r1.w, c3.y -mul.f r1.w, c0.x, r3.x -mov.f32f32 r3.w, r4.y -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c1.x, r3.y, r1.w -mov.f32f32 r3.w, r3.w -mad.f32 r0.x, c2.x, r3.z, r0.x -mov.f32f32 r1.w, r4.x -mad.f32 r3.y, c6.x, r3.z, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -add.f r0.x, r0.x, c3.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, c10.y +mad.f32 r0.z, c8.x, r4.y, r0.z +mad.f32 r0.w, c8.w, r4.y, r0.w +mov.f32f32 r1.y, r0.x +mul.f r2.y, r0.y, c11.y +mad.f32 r0.y, c9.x, r4.z, r0.z +mul.f r1.x, r0.x, c5.x +mul.f r1.z, r1.y, c5.z +mul.f r1.y, r1.y, c5.y +add.f r0.x, r0.y, c10.x +mad.f32 r0.y, c9.w, r4.z, r0.w +mul.f r0.z, c7.z, r4.x +mul.f r0.w, c0.w, r4.x +mul.f r2.x, r0.x, c11.x +add.f r2.w, r0.y, c10.w +mad.f32 r0.x, c8.z, r4.y, r0.z +mad.f32 r0.y, c1.w, r4.y, r0.w +mad.f32 r0.x, c9.z, r4.z, r0.x +mad.f32 r0.y, c2.w, r4.z, r0.y +mul.f r0.z, c0.z, r4.x +mul.f r1.w, c0.y, r4.x +add.f r2.z, r0.x, c10.z +add.f r0.w, r0.y, c3.w +mad.f32 r0.x, c1.z, r4.y, r0.z +mad.f32 r0.y, c1.y, r4.y, r1.w +mad.f32 r0.x, c2.z, r4.z, r0.x +mad.f32 r0.y, c2.y, r4.z, r0.y +mul.f r1.w, c0.x, r4.x +mad.f32 r3.y, c6.x, r4.z, c6.y +add.f r0.z, r0.x, c3.z +add.f r0.y, r0.y, c3.y +mad.f32 r0.x, c1.x, r4.y, r1.w +mad.f32 r3.x, c6.x, r4.x, c6.y +mad.f32 r0.x, c2.x, r4.z, r0.x mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w +(rpt1)nop +add.f r0.x, r0.x, c3.x end +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 74 instructions, 0 half, 5 full +; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 48 instructions, 0 half, 5 full diff --git a/reference/0ad-alpine-valley/0ad-75.asm b/reference/0ad-alpine-valley/0ad-75.asm index 67a2ad5..e7bcae3 100644 --- a/reference/0ad-alpine-valley/0ad-75.asm +++ b/reference/0ad-alpine-valley/0ad-75.asm @@ -32,159 +32,113 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c12.x, r0.x mul.f r1.w, c12.x, r0.w mad.f32 r1.z, c13.x, r0.y, r1.z mad.f32 r1.w, c13.x, r1.x, r1.w mad.f32 r1.z, c14.x, r0.z, r1.z -mul.f r2.x, c12.z, r0.x -mul.f r2.y, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r1.z, r1.z, c15.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, c13.z, r0.y, r2.x -mad.f32 r2.y, c13.z, r1.x, r2.y -add.f r2.z, c4.x, (neg)r1.z mad.f32 r1.w, c14.x, r1.y, r1.w -mul.f r2.w, c8.w, r1.z -mul.f r3.x, c8.z, r1.z -mul.f r3.y, r2.z, r2.z -mul.f r3.z, c12.y, r0.x -mov.f32f32 r1.w, r1.w -mad.f32 r3.z, c13.y, r0.y, r3.z -absneg.f r3.w, (neg)c5.x -mad.f32 r3.z, c14.y, r0.z, r3.z -mul.f r4.x, c8.y, r1.z -mul.f r4.y, c8.x, r1.z -mul.f r5.x, r1.w, r3.w -add.f r5.w, r3.z, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r1.z -mul.f r3.z, c0.z, r1.z +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z add.f r5.y, c4.y, (neg)r5.w -mov.f32f32 r0.w, r0.w -mad.f32 r2.w, c9.w, r5.w, r2.w -mad.f32 r3.x, c9.z, r5.w, r3.x -mad.f32 r3.y, r5.y, r5.y, r3.y -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.x, c14.z, r0.z, r2.x -mad.f32 r4.x, c9.y, r5.w, r4.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.x, r0.w -add.f r2.x, r2.x, c15.z -absneg.f r0.w, (neg)c5.y -mad.f32 r4.y, c9.x, r5.w, r4.y -mad.f32 r1.x, c1.w, r5.w, r1.x -add.f r5.z, c4.z, (neg)r2.x -mad.f32 r5.x, r6.x, r0.w, r5.x -mad.f32 r2.w, c10.w, r2.x, r2.w -mad.f32 r3.x, c10.z, r2.x, r3.x -mad.f32 r3.y, r5.z, r5.z, r3.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.y, r2.y +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.x, c10.y, r2.x, r4.x -mad.f32 r4.y, c10.x, r2.x, r4.y -mad.f32 r1.y, c14.z, r1.y, r2.y -rsq r2.y, r3.y -(ss)mov.f32f32 r2.y, r2.y -(ss)absneg.f r3.y, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r6.y, r1.y -mad.f32 r0.y, r2.z, r2.y, r3.w -mad.f32 r0.w, r5.y, r2.y, r0.w -mad.f32 r1.y, r5.z, r2.y, r3.y -mad.f32 r2.y, r6.y, r3.y, r5.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, r0.y, r0.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r0.w, r0.w, r2.z -max.f r2.y, c17.x, r2.y -mad.f32 r2.z, c2.w, r2.x, r1.x -mad.f32 r1.x, c1.z, r5.w, r3.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mad.f32 r0.z, r1.y, r1.y, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r6.z, c2.z, r2.x, r1.x -mul.f r6.w, c0.y, r1.z -mul.f r7.x, c0.x, r1.z -mad.f32 r7.y, c7.x, r2.x, c7.y -mad.f32 r7.z, c7.x, r1.z, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r1.x, r2.y, c6.z -mul.f r3.y, r2.y, c6.y -mul.f r2.y, r2.y, c6.x -mul.f r1.y, r1.y, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.z, r1.x -mov.f32f32 r5.y, r0.w -mov.f32f32 r5.x, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.y, r3.y -mov.f32f32 r0.z, r2.y -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -mad.f32 r0.y, c11.w, r0.x, r2.w -mad.f32 r0.z, c11.z, r0.x, r3.x -mad.f32 r0.w, c11.y, r0.x, r4.x -mad.f32 r2.y, c11.x, r0.x, r4.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c16.y -mul.f r2.y, r2.y, c16.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r3.y, r0.w -mov.f32f32 r3.x, r2.y -mad.f32 r0.y, c3.w, r0.x, r2.z -mad.f32 r0.z, c3.z, r0.x, r6.z -mad.f32 r2.y, c1.y, r5.w, r6.w -mad.f32 r2.z, c1.x, r5.w, r7.x -mov.f32f32 r0.w, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.y, c2.y, r2.x, r2.y -mad.f32 r2.x, c2.x, r2.x, r2.z +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y mad.f32 r0.x, c3.x, r0.x, r2.x -mov.f32f32 r2.x, r7.y -mov.f32f32 r2.y, r7.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r2.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r2.x, r6.y +mov.f32f32 r2.z, r6.y mov.f32f32 r2.y, r6.x -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r2.z, r2.x -mov.f32f32 r2.y, r2.y mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.w, r4.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r6.x, (0.000000) -mov.f32f32 r5.w, r1.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r6.x -mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 8 full +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-76.asm b/reference/0ad-alpine-valley/0ad-76.asm index b853b0d..d0ec086 100644 --- a/reference/0ad-alpine-valley/0ad-76.asm +++ b/reference/0ad-alpine-valley/0ad-76.asm @@ -8,6 +8,10 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 8, r1.x add.f r0.y, r0.w, c12.y bary.f r0.w, 4, r1.x @@ -23,224 +27,144 @@ add.f r3.x, r1.z, c13.x mad.f32 r2.y, r2.z, r2.z, r2.y add.f r1.w, r1.w, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -floor.f r0.z, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w +absneg.f r0.z, (neg)c9.x +bary.f r2.w, 6, r1.x +mov.f32f32 r3.y, r1.w +floor.f r3.z, r3.x +mul.f r0.z, r0.z, c9.x +mad.f32 r2.y, r2.w, r2.w, r2.y +mul.f r3.w, c12.x, r3.y +add.f r3.x, r3.x, (neg)r3.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.w, (neg)c9.x -bary.f r3.y, 6, r1.x -mul.f r3.z, c12.x, r1.w -add.f r0.z, r3.x, (neg)r0.z -mul.f r2.w, r2.w, c9.x -mad.f32 r2.y, r3.y, r3.y, r2.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.z, r0.z -mul.f r2.w, r2.w, r0.y -add.f r3.z, c13.y, (neg)r1.w -add.f r0.x, r0.x, (neg)r3.x -mul.f r3.x, c12.x, r0.z -mov.f32f32 r2.w, r2.w +add.f r0.x, r0.x, (neg)r3.w +mov.f32f32 r3.z, r3.x rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mul.f r0.y, r2.w, r0.y -mul.f r0.w, r0.w, r2.y -add.f r2.w, c13.x, r0.x +(ss)mov.f32f32 r3.w, r2.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +mul.f r4.x, c12.x, r3.z add.f r0.x, c13.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.w, r2.w -bary.f r3.w, 16, r1.x -mul.f r0.y, r0.y, c14.x -mov.f32f32 r0.x, r0.x -mul.f r2.w, r2.w, c4.z mul.f r0.w, r0.w, r3.w -mul.f r2.z, r2.z, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r2.w -mul.f r0.x, r0.x, c4.z -mov.f32f32 r2.z, r2.z -bary.f r4.x, 17, r1.x -mov.f32f32 r4.y, r3.w -add.f r1.z, r1.z, (neg)r3.x +add.f r0.z, c13.x, r0.z +mul.f r0.y, r0.y, c14.x +bary.f r4.y, 16, r1.x +add.f r1.z, r1.z, (neg)r4.x +mul.f r4.z, r0.z, c4.z +mul.f r5.y, r0.x, c4.z +mul.f r0.x, r0.w, r4.y +add.f r0.z, c13.z, r1.z +mov.f32f32 r6.x, r4.z +mov.f32f32 r0.w, r1.z exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r0.w, r2.z, r4.x, r0.w -mov.f32f32 r2.z, r0.x -mov.f32f32 r1.z, r1.z -add.f r3.x, c15.y, (neg)r0.y -mov.f32f32 r0.w, r0.w -mul.f r2.y, r3.y, r2.y -add.f r3.y, c13.x, r1.z -mul.f r3.x, r3.x, c9.y -mul.f r0.y, r0.y, c12.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.y, r3.y -bary.f r3.w, 18, r1.x -add.f r0.y, r0.y, r3.x -mov.f32f32 r2.z, r2.z -mul.f r3.x, r3.y, c4.w -mad.f32 r0.w, r2.y, r3.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.x, r2.z -mov.f32f32 r2.y, r3.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -add.f r1.z, c13.z, r1.z -mov.f32f32 r4.z, r2.y -bary.f r2.y, 10, r1.x +(ss)mov.f32f32 r1.z, r0.y +mul.f r2.z, r2.z, r3.w +bary.f r3.w, 17, r1.x +add.f r0.w, c13.x, r0.w +add.f r1.z, c15.y, (neg)r1.z +mov.f32f32 r6.w, r5.y +mul.f r5.z, r0.z, c4.w +mul.f r7.x, r0.w, c4.w +mul.f r0.z, r1.z, c9.y +(ss)mul.f r0.y, r0.y, c12.z +mad.f32 r0.x, r2.z, r3.w, r0.x +mov.f32f32 r6.y, r7.x +bary.f r0.w, 10, r1.x +add.f r0.y, r0.y, r0.z +mul.f r0.z, r2.w, r2.y +bary.f r1.z, 18, r1.x +add.f r5.w, r0.w, c12.w max.f r0.y, r0.y, c12.y -max.f r0.w, c12.y, r0.w -mov.f32f32 r1.z, r1.z -add.f r2.y, r2.y, c12.w +mov.f32f32 r4.w, r5.z +mad.f32 r0.x, r0.z, r1.z, r0.x +mov.f32f32 r6.z, r5.w min.f r0.y, r0.y, c12.z -mov.f32f32 r0.w, r0.w -mul.f r1.z, r1.z, c4.w -mov.f32f32 r2.z, r2.y -add.f r3.y, c15.y, (neg)r0.y -add.f r3.w, c15.y, (neg)r0.y -add.f r4.x, c15.y, (neg)r0.y -mov.f32f32 r4.w, r2.z -mul.f r2.z, r3.y, c8.z -mul.f r3.y, r3.w, c8.y -mul.f r3.w, r4.x, c8.x -log2 r0.w, r0.w -(ss)mul.f r0.w, c10.y, r0.w -mov.f32f32 r4.x, r1.z -mov.f32f32 r0.x, r0.x -sam.s (f32)(x)r5.y, r4.y, s#3, t#3 -(sy)(ss)mov.f32f32 r4.y, r5.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.w, r0.x -mov.f32f32 r0.x, r4.y -mov.f32f32 r3.z, r3.z -add.f r4.y, c13.y, (neg)r0.z -mov.f32f32 r5.y, r4.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r4.y -exp2 r0.w, r0.w -bary.f r4.z, 14, r1.x -mov.f32f32 r5.z, r4.x -mov.f32f32 r6.x, r3.x -mul.f r3.x, r3.z, r4.y -mov.f32f32 r4.x, r4.z -mov.f32f32 r4.w, r2.y -mov.f32f32 r2.w, r2.w -mul.f r0.x, r3.x, r0.x -mov.f32f32 r6.z, r4.x -mov.f32f32 r6.y, r4.w -bary.f r3.x, 15, r1.x -sam.s (f32)(x)r4.w, r5.x, s#3, t#3 +mov.f32f32 r7.y, r5.w +mov.f32f32 r5.x, r5.w +max.f r0.x, c12.y, r0.x +nop +add.f r0.z, c15.y, (neg)r0.y +sam.s (f32)(x)r7.z, r6.x, s#3, t#3 +add.f r0.w, c13.y, (neg)r3.y +add.f r1.z, c15.y, (neg)r0.y +add.f r2.y, c15.y, (neg)r0.y +mul.f r0.z, r0.z, c8.z +mov.f32f32 r2.z, r0.w +add.f r2.w, c13.y, (neg)r3.z +mul.f r1.z, r1.z, c8.y +mul.f r3.y, r2.y, c8.x +log2 r0.x, r0.x +(ss)mul.f r0.x, c10.y, r0.x +mov.f32f32 r2.y, r2.w +sam.s (f32)(x)r3.z, r6.w, s#3, t#3 nop -(sy)mov.f32f32 r4.x, r4.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r1.z, r1.z +sam.s (f32)(x)r3.w, r4.z, s#3, t#3 +sam.s (f32)(x)r4.x, r5.y, s#3, t#3 add.f r1.w, r1.w, c12.z -(ss)nop -sam.s (f32)(x)r5.x, r5.w, s#3, t#3 -(sy)mov.f32f32 r2.w, r5.x -mov.f32f32 r5.y, r3.x -mov.f32f32 r5.x, r1.z -mul.f r1.z, r1.w, r4.y -mov.f32f32 r2.y, r2.y -add.f r0.z, r0.z, c12.z -mov.f32f32 r2.x, r2.x -mad.f32 r0.x, r1.z, r2.w, r0.x -mov.f32f32 r6.w, r5.y -mov.f32f32 r5.y, r2.y -mul.f r1.z, r3.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x +add.f r3.x, r3.x, c12.z +mul.f r2.z, r2.z, r2.y bary.f r2.y, 13, r1.x -mul.f r0.z, r1.w, r0.z -(ss)nop -sam (f32)(xyz)r5.z, r6.z, s#2, t#2 -(sy)mul.f r1.w, c7.z, r6.x -sam.s (f32)(x)r6.x, r4.w, s#3, t#3 -(sy)mov.f32f32 r2.w, r6.x -mul.f r3.z, c7.y, r5.w -mul.f r4.y, c7.x, r5.z -(ss)mul.f r1.w, r1.w, r0.w -mad.f32 r0.x, r1.z, r2.w, r0.x -mov.f32f32 r1.z, r4.z -mul.f r3.z, r3.z, r0.w -mul.f r0.w, r4.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r1.z -mad.f32 r0.x, r0.z, r4.x, r0.x -mov.f32f32 r0.z, r3.x -mov.f32f32 r1.z, r2.y +bary.f r4.y, 14, r1.x +(ss)bary.f r4.z, 15, r1.x +(sy)mul.f r2.z, r2.z, r7.z +mul.f r2.w, r1.w, r2.w +exp2 r0.x, r0.x +mov.f32f32 r4.w, r4.y +mov.f32f32 r5.x, r4.z +mul.f r0.w, r0.w, r3.x +mad.f32 r2.z, r2.w, r3.z, r2.z +mul.f r1.w, r1.w, r3.x +sam (f32)(w)r5.y, r2.x, s#1, t#1 +(sy)(ss)cmps.f.lt r2.x, r6.x, c14.y +mad.f32 r0.w, r0.w, r3.w, r2.z +sam (f32)(xyz)r6.y, r4.y, s#0, t#0 mov.f32f32 r2.w, c12.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, r0.z -mov.f32f32 r2.y, r1.z -mov.f32f32 r2.w, r2.w -mul.f r0.x, c13.w, r0.x -bary.f r0.z, 2, r1.x -bary.f r1.z, 1, r1.x -mov.f32f32 r3.x, c12.y -mov.f32f32 r0.x, r0.x -sam (f32)(xyz)r4.x, r4.y, s#0, t#0 -(sy)mad.f32 r0.z, r4.z, r0.z, r1.w -mad.f32 r1.z, r4.y, r1.z, r3.z +mad.f32 r0.w, r1.w, r4.x, r0.w +sam (f32)(xyz)r3.z, r4.w, s#2, t#2 +(sy)mul.f r1.w, c7.y, r3.w +mul.f r2.y, c7.x, r3.z +mul.f r2.z, c7.z, r4.x +mul.f r0.w, c13.w, r0.w +(ss)mul.f r1.w, r1.w, r0.x +mul.f r2.y, r2.y, r0.x +mul.f r0.x, r2.z, r0.x +mov.f32f32 r2.z, r0.w +bary.f r3.x, 2, r1.x +bary.f r3.z, 1, r1.x bary.f (ei)r1.x, 0, r1.x -sam (f32)(w)r4.w, r2.x, s#1, t#1 -(sy)cmps.f.lt r1.y, r5.z, c14.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, r4.x, r1.x, r0.w -cov.u32f32 r1.x, r1.y -mul.f r0.z, r0.z, r0.x -mul.f r1.y, r1.z, r0.x -mov.f32f32 r0.w, r0.w -cmps.f.ne r1.x, r1.x, c12.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mad.f32 r0.z, c6.z, r4.z, r0.z -mad.f32 r1.y, c6.y, r4.y, r1.y -mul.f r0.x, r0.w, r0.x -mov.f32f32 r0.w, r5.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mul.f r0.z, r0.y, r0.z -mul.f r1.y, r0.y, r1.y -mad.f32 r0.x, c6.x, r4.x, r0.x -sel.b32 r0.w, r3.x, r1.x, r0.w -add.f r0.z, r0.z, r2.z -add.f r1.x, r1.y, r3.y -mov.f32f32 r0.x, r0.x -nop -mul.f r0.z, r0.z, r0.w -mul.f r1.x, r1.x, r0.w +cov.u32f32 r1.y, r2.x +mad.f32 r0.x, r6.w, r3.x, r0.x +mad.f32 r1.w, r6.z, r3.z, r1.w +mad.f32 r1.x, r6.y, r1.x, r2.y +cmps.f.ne r1.y, r1.y, c12.y +mul.f r0.x, r0.x, r2.z +mul.f r1.w, r1.w, r2.z +mad.f32 r0.x, c6.z, r6.w, r0.x +mad.f32 r1.w, c6.y, r6.z, r1.w +mul.f r0.w, r1.x, r0.w +mov.f32f32 r1.x, c12.y mul.f r0.x, r0.y, r0.x +mul.f r1.w, r0.y, r1.w +mad.f32 r0.w, c6.x, r6.y, r0.w +sel.b32 r1.x, r1.x, r1.y, r6.x +add.f r0.x, r0.x, r0.z +add.f r0.z, r1.w, r1.z +mul.f r0.y, r0.y, r0.w nop -mul.f r0.y, r0.z, c5.z -mul.f r0.z, r1.x, c5.y -add.f r0.x, r0.x, r3.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c5.x -nop -mov.f32f32 r2.z, r0.y -(ss)mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.x, c5.z +mul.f r2.y, r0.z, c5.y +add.f r0.x, r0.y, r3.y (rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r1.x (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, c5.x end nop +nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) -; FRAG: 235 instructions, 0 half, 7 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r3.w (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) +; FRAG: 155 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-77.asm b/reference/0ad-alpine-valley/0ad-77.asm index 67a2ad5..e7bcae3 100644 --- a/reference/0ad-alpine-valley/0ad-77.asm +++ b/reference/0ad-alpine-valley/0ad-77.asm @@ -32,159 +32,113 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c12.x, r0.x mul.f r1.w, c12.x, r0.w mad.f32 r1.z, c13.x, r0.y, r1.z mad.f32 r1.w, c13.x, r1.x, r1.w mad.f32 r1.z, c14.x, r0.z, r1.z -mul.f r2.x, c12.z, r0.x -mul.f r2.y, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r1.z, r1.z, c15.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, c13.z, r0.y, r2.x -mad.f32 r2.y, c13.z, r1.x, r2.y -add.f r2.z, c4.x, (neg)r1.z mad.f32 r1.w, c14.x, r1.y, r1.w -mul.f r2.w, c8.w, r1.z -mul.f r3.x, c8.z, r1.z -mul.f r3.y, r2.z, r2.z -mul.f r3.z, c12.y, r0.x -mov.f32f32 r1.w, r1.w -mad.f32 r3.z, c13.y, r0.y, r3.z -absneg.f r3.w, (neg)c5.x -mad.f32 r3.z, c14.y, r0.z, r3.z -mul.f r4.x, c8.y, r1.z -mul.f r4.y, c8.x, r1.z -mul.f r5.x, r1.w, r3.w -add.f r5.w, r3.z, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r1.z -mul.f r3.z, c0.z, r1.z +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z add.f r5.y, c4.y, (neg)r5.w -mov.f32f32 r0.w, r0.w -mad.f32 r2.w, c9.w, r5.w, r2.w -mad.f32 r3.x, c9.z, r5.w, r3.x -mad.f32 r3.y, r5.y, r5.y, r3.y -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.x, c14.z, r0.z, r2.x -mad.f32 r4.x, c9.y, r5.w, r4.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.x, r0.w -add.f r2.x, r2.x, c15.z -absneg.f r0.w, (neg)c5.y -mad.f32 r4.y, c9.x, r5.w, r4.y -mad.f32 r1.x, c1.w, r5.w, r1.x -add.f r5.z, c4.z, (neg)r2.x -mad.f32 r5.x, r6.x, r0.w, r5.x -mad.f32 r2.w, c10.w, r2.x, r2.w -mad.f32 r3.x, c10.z, r2.x, r3.x -mad.f32 r3.y, r5.z, r5.z, r3.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.y, r2.y +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.x, c10.y, r2.x, r4.x -mad.f32 r4.y, c10.x, r2.x, r4.y -mad.f32 r1.y, c14.z, r1.y, r2.y -rsq r2.y, r3.y -(ss)mov.f32f32 r2.y, r2.y -(ss)absneg.f r3.y, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r6.y, r1.y -mad.f32 r0.y, r2.z, r2.y, r3.w -mad.f32 r0.w, r5.y, r2.y, r0.w -mad.f32 r1.y, r5.z, r2.y, r3.y -mad.f32 r2.y, r6.y, r3.y, r5.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, r0.y, r0.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r0.w, r0.w, r2.z -max.f r2.y, c17.x, r2.y -mad.f32 r2.z, c2.w, r2.x, r1.x -mad.f32 r1.x, c1.z, r5.w, r3.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mad.f32 r0.z, r1.y, r1.y, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r6.z, c2.z, r2.x, r1.x -mul.f r6.w, c0.y, r1.z -mul.f r7.x, c0.x, r1.z -mad.f32 r7.y, c7.x, r2.x, c7.y -mad.f32 r7.z, c7.x, r1.z, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r1.x, r2.y, c6.z -mul.f r3.y, r2.y, c6.y -mul.f r2.y, r2.y, c6.x -mul.f r1.y, r1.y, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.z, r1.x -mov.f32f32 r5.y, r0.w -mov.f32f32 r5.x, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.y, r3.y -mov.f32f32 r0.z, r2.y -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -mad.f32 r0.y, c11.w, r0.x, r2.w -mad.f32 r0.z, c11.z, r0.x, r3.x -mad.f32 r0.w, c11.y, r0.x, r4.x -mad.f32 r2.y, c11.x, r0.x, r4.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c16.y -mul.f r2.y, r2.y, c16.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r3.y, r0.w -mov.f32f32 r3.x, r2.y -mad.f32 r0.y, c3.w, r0.x, r2.z -mad.f32 r0.z, c3.z, r0.x, r6.z -mad.f32 r2.y, c1.y, r5.w, r6.w -mad.f32 r2.z, c1.x, r5.w, r7.x -mov.f32f32 r0.w, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.y, c2.y, r2.x, r2.y -mad.f32 r2.x, c2.x, r2.x, r2.z +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y mad.f32 r0.x, c3.x, r0.x, r2.x -mov.f32f32 r2.x, r7.y -mov.f32f32 r2.y, r7.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r2.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r2.x, r6.y +mov.f32f32 r2.z, r6.y mov.f32f32 r2.y, r6.x -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r2.z, r2.x -mov.f32f32 r2.y, r2.y mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.w, r4.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r6.x, (0.000000) -mov.f32f32 r5.w, r1.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r6.x -mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 8 full +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-78.asm b/reference/0ad-alpine-valley/0ad-78.asm index 67a2ad5..e7bcae3 100644 --- a/reference/0ad-alpine-valley/0ad-78.asm +++ b/reference/0ad-alpine-valley/0ad-78.asm @@ -32,159 +32,113 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c12.x, r0.x mul.f r1.w, c12.x, r0.w mad.f32 r1.z, c13.x, r0.y, r1.z mad.f32 r1.w, c13.x, r1.x, r1.w mad.f32 r1.z, c14.x, r0.z, r1.z -mul.f r2.x, c12.z, r0.x -mul.f r2.y, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r1.z, r1.z, c15.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, c13.z, r0.y, r2.x -mad.f32 r2.y, c13.z, r1.x, r2.y -add.f r2.z, c4.x, (neg)r1.z mad.f32 r1.w, c14.x, r1.y, r1.w -mul.f r2.w, c8.w, r1.z -mul.f r3.x, c8.z, r1.z -mul.f r3.y, r2.z, r2.z -mul.f r3.z, c12.y, r0.x -mov.f32f32 r1.w, r1.w -mad.f32 r3.z, c13.y, r0.y, r3.z -absneg.f r3.w, (neg)c5.x -mad.f32 r3.z, c14.y, r0.z, r3.z -mul.f r4.x, c8.y, r1.z -mul.f r4.y, c8.x, r1.z -mul.f r5.x, r1.w, r3.w -add.f r5.w, r3.z, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r1.z -mul.f r3.z, c0.z, r1.z +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z add.f r5.y, c4.y, (neg)r5.w -mov.f32f32 r0.w, r0.w -mad.f32 r2.w, c9.w, r5.w, r2.w -mad.f32 r3.x, c9.z, r5.w, r3.x -mad.f32 r3.y, r5.y, r5.y, r3.y -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.x, c14.z, r0.z, r2.x -mad.f32 r4.x, c9.y, r5.w, r4.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.x, r0.w -add.f r2.x, r2.x, c15.z -absneg.f r0.w, (neg)c5.y -mad.f32 r4.y, c9.x, r5.w, r4.y -mad.f32 r1.x, c1.w, r5.w, r1.x -add.f r5.z, c4.z, (neg)r2.x -mad.f32 r5.x, r6.x, r0.w, r5.x -mad.f32 r2.w, c10.w, r2.x, r2.w -mad.f32 r3.x, c10.z, r2.x, r3.x -mad.f32 r3.y, r5.z, r5.z, r3.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.y, r2.y +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.x, c10.y, r2.x, r4.x -mad.f32 r4.y, c10.x, r2.x, r4.y -mad.f32 r1.y, c14.z, r1.y, r2.y -rsq r2.y, r3.y -(ss)mov.f32f32 r2.y, r2.y -(ss)absneg.f r3.y, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r6.y, r1.y -mad.f32 r0.y, r2.z, r2.y, r3.w -mad.f32 r0.w, r5.y, r2.y, r0.w -mad.f32 r1.y, r5.z, r2.y, r3.y -mad.f32 r2.y, r6.y, r3.y, r5.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, r0.y, r0.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r0.w, r0.w, r2.z -max.f r2.y, c17.x, r2.y -mad.f32 r2.z, c2.w, r2.x, r1.x -mad.f32 r1.x, c1.z, r5.w, r3.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mad.f32 r0.z, r1.y, r1.y, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r6.z, c2.z, r2.x, r1.x -mul.f r6.w, c0.y, r1.z -mul.f r7.x, c0.x, r1.z -mad.f32 r7.y, c7.x, r2.x, c7.y -mad.f32 r7.z, c7.x, r1.z, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r1.x, r2.y, c6.z -mul.f r3.y, r2.y, c6.y -mul.f r2.y, r2.y, c6.x -mul.f r1.y, r1.y, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.z, r1.x -mov.f32f32 r5.y, r0.w -mov.f32f32 r5.x, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.y, r3.y -mov.f32f32 r0.z, r2.y -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -mad.f32 r0.y, c11.w, r0.x, r2.w -mad.f32 r0.z, c11.z, r0.x, r3.x -mad.f32 r0.w, c11.y, r0.x, r4.x -mad.f32 r2.y, c11.x, r0.x, r4.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c16.y -mul.f r2.y, r2.y, c16.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r3.y, r0.w -mov.f32f32 r3.x, r2.y -mad.f32 r0.y, c3.w, r0.x, r2.z -mad.f32 r0.z, c3.z, r0.x, r6.z -mad.f32 r2.y, c1.y, r5.w, r6.w -mad.f32 r2.z, c1.x, r5.w, r7.x -mov.f32f32 r0.w, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.y, c2.y, r2.x, r2.y -mad.f32 r2.x, c2.x, r2.x, r2.z +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y mad.f32 r0.x, c3.x, r0.x, r2.x -mov.f32f32 r2.x, r7.y -mov.f32f32 r2.y, r7.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r2.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r2.x, r6.y +mov.f32f32 r2.z, r6.y mov.f32f32 r2.y, r6.x -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r2.z, r2.x -mov.f32f32 r2.y, r2.y mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.w, r4.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r6.x, (0.000000) -mov.f32f32 r5.w, r1.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r6.x -mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 8 full +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-79.asm b/reference/0ad-alpine-valley/0ad-79.asm index f56593b..57bb137 100644 --- a/reference/0ad-alpine-valley/0ad-79.asm +++ b/reference/0ad-alpine-valley/0ad-79.asm @@ -8,251 +8,183 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c14.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c15.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 8, r1.x add.f r0.y, r0.w, c13.y bary.f r0.w, 4, r1.x bary.f r1.z, 9, r1.x add.f r1.w, r0.x, c14.x bary.f r2.x, 14, r1.x -mul.f r2.y, r0.w, r0.w -bary.f r2.z, 5, r1.x +bary.f r2.y, 15, r1.x +add.f r2.z, r1.z, c14.x floor.f r2.w, r1.w rcp r0.y, r0.y add.f r0.z, r0.z, c13.y -add.f r3.x, r1.z, c14.x -mad.f32 r2.y, r2.z, r2.z, r2.y +mul.f r3.x, r0.w, r0.w +bary.f r3.y, 5, r1.x add.f r1.w, r1.w, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -floor.f r0.z, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w +absneg.f r0.z, (neg)c9.x +mad.f32 r2.w, r3.y, r3.y, r3.x +mov.f32f32 r3.x, r1.w +bary.f r3.z, 6, r1.x +mul.f r0.z, r0.z, c9.x +floor.f r3.w, r2.z +mul.f r4.x, c13.x, r3.x +mad.f32 r2.w, r3.z, r3.z, r2.w +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.w, (neg)c9.x -bary.f r3.y, 6, r1.x -mul.f r3.z, c13.x, r1.w -add.f r0.z, r3.x, (neg)r0.z -mul.f r2.w, r2.w, c9.x -mad.f32 r2.y, r3.y, r3.y, r2.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.z, r0.z -mul.f r2.w, r2.w, r0.y -mov.f32f32 r2.x, r2.x -add.f r0.x, r0.x, (neg)r3.x -mul.f r3.x, c13.x, r0.z -mov.f32f32 r2.w, r2.w -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mul.f r0.y, r2.w, r0.y -mul.f r0.w, r0.w, r2.y -add.f r2.w, c14.x, r0.x -add.f r0.x, c14.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.w, r2.w -bary.f r3.z, 16, r1.x +add.f r0.x, r0.x, (neg)r4.x +add.f r2.z, r2.z, (neg)r3.w +add.f r3.x, c14.y, (neg)r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +rsq r2.w, r2.w +(ss)mov.f32f32 r3.w, r2.w +mov.f32f32 r4.x, r2.z mul.f r0.y, r0.y, c15.x -mov.f32f32 r0.x, r0.x -mul.f r2.w, r2.w, c3.z -mul.f r0.w, r0.w, r3.z -mul.f r2.z, r2.z, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r2.w -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.z, r2.z -bary.f r3.w, 17, r1.x -mov.f32f32 r4.x, r3.z -add.f r1.z, r1.z, (neg)r3.x +add.f r0.z, c14.x, r0.z +mul.f r0.w, r0.w, r3.w +bary.f r4.y, 16, r1.x +mul.f r4.z, c13.x, r4.x +mul.f r4.w, r0.z, c3.z +add.f r0.x, c14.z, r0.x +mul.f r0.z, r0.w, r4.y exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r0.w, r2.z, r3.w, r0.w -mov.f32f32 r2.z, r0.x -mov.f32f32 r1.z, r1.z -add.f r3.x, c16.y, (neg)r0.y -mov.f32f32 r0.w, r0.w -mul.f r2.y, r3.y, r2.y -add.f r3.y, c14.x, r1.z -mul.f r3.x, r3.x, c9.y -mul.f r0.y, r0.y, c13.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.y, r3.y -bary.f r3.z, 18, r1.x -add.f r0.y, r0.y, r3.x -mov.f32f32 r2.z, r2.z -mul.f r3.x, r3.y, c3.w -mad.f32 r0.w, r2.y, r3.z, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r2.z -mov.f32f32 r2.y, r3.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -add.f r1.z, c14.z, r1.z -mov.f32f32 r4.y, r2.y -bary.f r2.y, 10, r1.x -max.f r0.y, r0.y, c13.y -max.f r0.w, c13.y, r0.w -mov.f32f32 r1.z, r1.z -add.f r2.y, r2.y, c13.w -min.f r0.y, r0.y, c13.z -mov.f32f32 r0.w, r0.w -mul.f r1.z, r1.z, c3.w -mov.f32f32 r2.z, r2.y -add.f r3.z, c16.y, (neg)r0.y -add.f r3.w, c16.y, (neg)r0.y -add.f r4.w, c16.y, (neg)r0.y -mov.f32f32 r4.z, r2.z -mul.f r2.z, r3.z, c8.z -mul.f r5.x, r3.w, c8.y -mul.f r4.w, r4.w, c8.x -log2 r0.w, r0.w -(ss)mul.f r0.w, c10.x, r0.w -mov.f32f32 r3.z, r1.z -mov.f32f32 r0.x, r0.x -sam.s (f32)(x)r3.w, r4.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.z, r3.z -(ss)mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r3.w -add.f r3.w, c14.y, (neg)r1.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r4.z, r2.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r5.y, r3.w -add.f r3.w, c14.y, (neg)r0.z -exp2 r0.w, r0.w -mov.f32f32 r4.y, c7.y -mov.f32f32 r5.z, c7.x -mov.f32f32 r5.w, c7.z -mov.f32f32 r6.x, r3.w -mul.f r3.w, r4.y, c11.y -mul.f r4.y, r5.z, c11.x -mul.f r5.z, r5.w, c11.z -mul.f r5.w, r5.y, r6.x -(ss)mul.f r6.y, r3.w, r0.w -mul.f r6.z, r4.y, r0.w -(ss)mul.f r0.w, r5.z, r0.w -mul.f r0.x, r5.w, r0.x -mov.f32f32 r4.y, r3.x -mov.f32f32 r3.x, r2.y -mov.f32f32 r5.z, r2.x -bary.f r2.x, 15, r1.x -mov.f32f32 r3.w, r4.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -add.f r1.w, r1.w, c13.z -add.f r0.z, r0.z, c13.z -sam.s (f32)(x)r3.x, r3.y, s#2, t#2 -(sy)mov.f32f32 r3.x, r3.x -(ss)nop -sam.s (f32)(x)r3.y, r4.x, s#2, t#2 -(sy)mov.f32f32 r3.y, r3.y -mul.f r3.z, r1.w, r6.x -mov.f32f32 r5.w, r2.x -mov.f32f32 r3.w, r2.w -(ss)mov.f32f32 r4.x, r1.z -mad.f32 r0.x, r3.z, r3.y, r0.x -mov.f32f32 r1.z, r2.y -mul.f r2.x, r5.y, r0.z -bary.f r2.y, 12, r1.x -mov.f32f32 r0.x, r0.x +(ss)mov.f32f32 r0.w, r0.y +mov.f32f32 r5.z, r4.w +add.f r1.z, r1.z, (neg)r4.z +mul.f r3.y, r3.y, r3.w +bary.f r3.w, 17, r1.x +add.f r0.w, c16.y, (neg)r0.w mov.f32f32 r4.y, r1.z -sam (f32)(xyzw)r5.y, r5.z, s#0, t#0 -(sy)add.f r1.z, c16.y, (neg)r6.x -add.f r2.w, c16.y, (neg)r6.x -add.f r3.y, c16.y, (neg)r6.x -mov.f32f32 r2.y, r2.y -mul.f r1.z, r1.z, c4.x -mul.f r0.z, r1.w, r0.z -sam.s (f32)(x)r3.z, r3.w, s#2, t#2 -(sy)mov.f32f32 r1.w, r3.z -mul.f r2.w, r2.w, c4.z -mul.f r3.y, r3.y, c4.y -mul.f r3.z, r6.x, c13.z -mad.f32 r0.x, r2.x, r1.w, r0.x -mul.f r1.w, r6.x, c13.z -mul.f r2.x, r6.x, c13.z -add.f r1.z, r3.z, r1.z -mov.f32f32 r0.x, r0.x -add.f r1.w, r1.w, r2.w -mad.f32 r0.x, r0.z, r3.x, r0.x -add.f r0.z, r2.x, r3.y -mul.f r1.z, r5.y, r1.z -mul.f r1.w, r5.w, r1.w -mov.f32f32 r0.x, r0.x -mul.f r0.z, r5.z, r0.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mul.f r0.x, c14.w, r0.x -bary.f r2.x, 2, r1.x -mov.f32f32 r0.z, r0.z -bary.f r2.w, 0, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.w, r1.w, r2.x, r0.w -bary.f r2.x, 1, r1.x -mad.f32 r2.w, r1.z, r2.w, r6.z -mov.f32f32 r3.x, r2.y -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, r0.z, r2.x, r6.y -mov.f32f32 r2.y, r2.w -bary.f (ei)r1.x, 13, r1.x -mul.f r0.w, r0.w, r0.x -mov.f32f32 r1.y, r2.x -mul.f r2.x, r2.y, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mul.f r0.x, r1.y, r0.x -mad.f32 r0.w, c6.z, r1.w, r0.w -mov.f32f32 r1.y, r2.x -mov.f32f32 r3.y, r1.x -mov.f32f32 r1.x, c13.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mad.f32 r1.y, c6.x, r1.z, r1.y -mad.f32 r0.x, c6.y, r0.z, r0.x -mul.f r0.z, r0.y, r0.w -sam (f32)(w)r1.z, r3.x, s#1, t#1 -(sy)cmps.f.lt r0.w, r2.y, c15.y -mov.f32f32 r1.z, r2.y -mov.f32f32 r2.w, r1.x -add.f r0.z, r0.z, r2.z -cov.u32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r1.y, r1.z -cmps.f.ne r0.w, r0.w, c13.y -mov.f32f32 r1.z, c13.y -mul.f r0.x, r0.y, r0.x -mul.f r0.y, r0.y, r1.x -nop -sel.b32 r0.w, r1.z, r0.w, r1.y -add.f r0.x, r0.x, r5.x -add.f r0.y, r0.y, r4.w +mul.f r6.y, r0.x, c3.z +add.f r0.x, c14.z, r1.z +mad.f32 r0.z, r3.y, r3.w, r0.z +add.f r1.z, c14.x, r4.y +mul.f r0.w, r0.w, c9.y +(ss)mul.f r0.y, r0.y, c13.z +mul.f r2.w, r3.z, r2.w +mul.f r3.z, r1.z, c3.w +bary.f r1.z, 18, r1.x +add.f r0.y, r0.y, r0.w +mov.f32f32 r3.y, r6.y +mov.f32f32 r5.w, r3.z +bary.f r0.w, 10, r1.x +max.f r0.y, r0.y, c13.y +mad.f32 r0.z, r2.w, r1.z, r0.z +mul.f r6.z, r0.x, c3.w +add.f r6.w, r0.w, c13.w +min.f r0.x, r0.y, c13.z +max.f r0.y, c13.y, r0.z +mov.f32f32 r5.x, r6.z +mov.f32f32 r6.x, r6.w +add.f r0.z, c16.y, (neg)r0.x +add.f r0.w, c16.y, (neg)r0.x +add.f r1.z, c16.y, (neg)r0.x +mov.f32f32 r3.w, r6.w +mov.f32f32 r5.y, r6.w +log2 r0.y, r0.y +mul.f r0.z, r0.z, c8.z +sam.s (f32)(x)r7.x, r5.z, s#2, t#2 +mov.f32f32 r2.w, r3.x +add.f r4.x, c14.y, (neg)r4.x +mul.f r0.w, r0.w, c8.y +mul.f r1.z, r1.z, c8.x +(ss)mul.f r0.y, c10.x, r0.y +mov.f32f32 r4.y, r4.x nop -mul.f r0.z, r0.z, r0.w -mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w +sam.s (f32)(x)r7.y, r3.y, s#2, t#2 +sam.s (f32)(x)r4.z, r4.w, s#2, t#2 nop -mul.f r0.z, r0.z, c5.z -mul.f r0.x, r0.x, c5.y -mul.f r0.y, r0.y, c5.x +(ss)nop +sam.s (f32)(x)r4.w, r6.y, s#2, t#2 +add.f r1.w, r1.w, c13.z +mul.f r2.w, r2.w, r4.y +add.f r2.z, r2.z, c13.z +sam (f32)(xyzw)r5.x, r2.x, s#0, t#0 +exp2 r0.y, r0.y +(ss)mov.f32f32 r2.x, c7.y +mov.f32f32 r2.y, c7.z +(sy)mul.f r2.w, r2.w, r7.x +mul.f r3.y, r1.w, r4.x +mul.f r2.x, r2.x, c11.y +mov.f32f32 r3.z, c7.x +mul.f r2.y, r2.y, c11.z +mad.f32 r2.w, r3.y, r7.y, r2.w +mul.f r3.x, r3.x, r2.z +(ss)mul.f r2.x, r2.x, r0.y +mul.f r3.y, r3.z, c11.x +mul.f r2.y, r2.y, r0.y +mad.f32 r2.w, r3.x, r4.z, r2.w +mul.f r1.w, r1.w, r2.z +add.f r2.z, c16.y, (neg)r5.w +mul.f r0.y, r3.y, r0.y +add.f r3.x, c16.y, (neg)r5.w +mad.f32 r1.w, r1.w, r4.w, r2.w +mul.f r2.z, r2.z, c4.y +add.f r2.w, c16.y, (neg)r5.w +mul.f r3.x, r3.x, c4.z +mul.f r1.w, c14.w, r1.w +mul.f r3.y, r5.w, c13.z +mul.f r3.z, r5.w, c13.z +mul.f r2.w, r2.w, c4.x +mov.f32f32 r3.w, r1.w +add.f r3.x, r3.y, r3.x +add.f r2.z, r3.z, r2.z +mul.f r3.y, r5.w, c13.z +bary.f r4.x, 12, r1.x +mul.f r3.x, r5.z, r3.x +mul.f r2.z, r5.y, r2.z +add.f r2.w, r3.y, r2.w +bary.f r4.y, 13, r1.x +mov.f32f32 r3.y, r3.x +bary.f r3.z, 2, r1.x +mov.f32f32 r4.z, r2.z +bary.f r4.w, 1, r1.x +mul.f r5.x, r5.x, r2.w +mad.f32 r2.y, r3.y, r3.z, r2.y +sam (f32)(w)r5.y, r4.x, s#1, t#1 +(sy)cmps.f.lt r3.y, r6.x, c15.y +mov.f32f32 r2.w, c13.z +mov.f32f32 r3.z, c13.y +mul.f r2.y, r2.y, r3.w +mad.f32 r2.x, r4.z, r4.w, r2.x +mad.f32 r2.y, c6.z, r3.x, r2.y +mov.f32f32 r3.x, r5.x +bary.f (ei)r1.x, 0, r1.x +mul.f r1.y, r2.x, r3.w +mul.f r2.x, r0.x, r2.y +mad.f32 r1.y, c6.y, r2.z, r1.y +mad.f32 r0.y, r3.x, r1.x, r0.y +cov.u32f32 r1.x, r3.y +add.f r0.z, r2.x, r0.z +mul.f r1.y, r0.x, r1.y +mul.f r0.y, r0.y, r1.w +cmps.f.ne r1.x, r1.x, c13.y +mad.f32 r0.y, c6.x, r5.x, r0.y +add.f r0.w, r1.y, r0.w nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +sel.b32 r1.x, r3.z, r1.x, r6.x +mul.f r0.x, r0.x, r0.y +(rpt1)nop +mul.f r0.y, r0.z, r1.x +mul.f r0.z, r0.w, r1.x +(rpt1)nop +mul.f r2.z, r0.y, c5.z +mul.f r2.y, r0.z, c5.y +add.f r0.x, r0.x, r1.z +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.z, r0.z -mov.f32f32 r2.y, r0.x -mov.f32f32 r2.x, r0.y -end ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) -; FRAG: 244 instructions, 0 half, 7 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) +; FRAG: 176 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-80.asm b/reference/0ad-alpine-valley/0ad-80.asm index 67a2ad5..e7bcae3 100644 --- a/reference/0ad-alpine-valley/0ad-80.asm +++ b/reference/0ad-alpine-valley/0ad-80.asm @@ -32,159 +32,113 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c12.x, r0.x mul.f r1.w, c12.x, r0.w mad.f32 r1.z, c13.x, r0.y, r1.z mad.f32 r1.w, c13.x, r1.x, r1.w mad.f32 r1.z, c14.x, r0.z, r1.z -mul.f r2.x, c12.z, r0.x -mul.f r2.y, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r1.z, r1.z, c15.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, c13.z, r0.y, r2.x -mad.f32 r2.y, c13.z, r1.x, r2.y -add.f r2.z, c4.x, (neg)r1.z mad.f32 r1.w, c14.x, r1.y, r1.w -mul.f r2.w, c8.w, r1.z -mul.f r3.x, c8.z, r1.z -mul.f r3.y, r2.z, r2.z -mul.f r3.z, c12.y, r0.x -mov.f32f32 r1.w, r1.w -mad.f32 r3.z, c13.y, r0.y, r3.z -absneg.f r3.w, (neg)c5.x -mad.f32 r3.z, c14.y, r0.z, r3.z -mul.f r4.x, c8.y, r1.z -mul.f r4.y, c8.x, r1.z -mul.f r5.x, r1.w, r3.w -add.f r5.w, r3.z, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r1.z -mul.f r3.z, c0.z, r1.z +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z add.f r5.y, c4.y, (neg)r5.w -mov.f32f32 r0.w, r0.w -mad.f32 r2.w, c9.w, r5.w, r2.w -mad.f32 r3.x, c9.z, r5.w, r3.x -mad.f32 r3.y, r5.y, r5.y, r3.y -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.x, c14.z, r0.z, r2.x -mad.f32 r4.x, c9.y, r5.w, r4.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.x, r0.w -add.f r2.x, r2.x, c15.z -absneg.f r0.w, (neg)c5.y -mad.f32 r4.y, c9.x, r5.w, r4.y -mad.f32 r1.x, c1.w, r5.w, r1.x -add.f r5.z, c4.z, (neg)r2.x -mad.f32 r5.x, r6.x, r0.w, r5.x -mad.f32 r2.w, c10.w, r2.x, r2.w -mad.f32 r3.x, c10.z, r2.x, r3.x -mad.f32 r3.y, r5.z, r5.z, r3.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.y, r2.y +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.x, c10.y, r2.x, r4.x -mad.f32 r4.y, c10.x, r2.x, r4.y -mad.f32 r1.y, c14.z, r1.y, r2.y -rsq r2.y, r3.y -(ss)mov.f32f32 r2.y, r2.y -(ss)absneg.f r3.y, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r6.y, r1.y -mad.f32 r0.y, r2.z, r2.y, r3.w -mad.f32 r0.w, r5.y, r2.y, r0.w -mad.f32 r1.y, r5.z, r2.y, r3.y -mad.f32 r2.y, r6.y, r3.y, r5.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, r0.y, r0.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r0.w, r0.w, r2.z -max.f r2.y, c17.x, r2.y -mad.f32 r2.z, c2.w, r2.x, r1.x -mad.f32 r1.x, c1.z, r5.w, r3.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mad.f32 r0.z, r1.y, r1.y, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r6.z, c2.z, r2.x, r1.x -mul.f r6.w, c0.y, r1.z -mul.f r7.x, c0.x, r1.z -mad.f32 r7.y, c7.x, r2.x, c7.y -mad.f32 r7.z, c7.x, r1.z, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r1.x, r2.y, c6.z -mul.f r3.y, r2.y, c6.y -mul.f r2.y, r2.y, c6.x -mul.f r1.y, r1.y, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.z, r1.x -mov.f32f32 r5.y, r0.w -mov.f32f32 r5.x, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.y, r3.y -mov.f32f32 r0.z, r2.y -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -mad.f32 r0.y, c11.w, r0.x, r2.w -mad.f32 r0.z, c11.z, r0.x, r3.x -mad.f32 r0.w, c11.y, r0.x, r4.x -mad.f32 r2.y, c11.x, r0.x, r4.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c16.y -mul.f r2.y, r2.y, c16.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r3.y, r0.w -mov.f32f32 r3.x, r2.y -mad.f32 r0.y, c3.w, r0.x, r2.z -mad.f32 r0.z, c3.z, r0.x, r6.z -mad.f32 r2.y, c1.y, r5.w, r6.w -mad.f32 r2.z, c1.x, r5.w, r7.x -mov.f32f32 r0.w, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.y, c2.y, r2.x, r2.y -mad.f32 r2.x, c2.x, r2.x, r2.z +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y mad.f32 r0.x, c3.x, r0.x, r2.x -mov.f32f32 r2.x, r7.y -mov.f32f32 r2.y, r7.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r2.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r2.x, r6.y +mov.f32f32 r2.z, r6.y mov.f32f32 r2.y, r6.x -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r2.z, r2.x -mov.f32f32 r2.y, r2.y mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.w, r4.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r6.x, (0.000000) -mov.f32f32 r5.w, r1.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r6.x -mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 8 full +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-81.asm b/reference/0ad-alpine-valley/0ad-81.asm index 67a2ad5..e7bcae3 100644 --- a/reference/0ad-alpine-valley/0ad-81.asm +++ b/reference/0ad-alpine-valley/0ad-81.asm @@ -32,159 +32,113 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c12.x, r0.x mul.f r1.w, c12.x, r0.w mad.f32 r1.z, c13.x, r0.y, r1.z mad.f32 r1.w, c13.x, r1.x, r1.w mad.f32 r1.z, c14.x, r0.z, r1.z -mul.f r2.x, c12.z, r0.x -mul.f r2.y, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r1.z, r1.z, c15.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, c13.z, r0.y, r2.x -mad.f32 r2.y, c13.z, r1.x, r2.y -add.f r2.z, c4.x, (neg)r1.z mad.f32 r1.w, c14.x, r1.y, r1.w -mul.f r2.w, c8.w, r1.z -mul.f r3.x, c8.z, r1.z -mul.f r3.y, r2.z, r2.z -mul.f r3.z, c12.y, r0.x -mov.f32f32 r1.w, r1.w -mad.f32 r3.z, c13.y, r0.y, r3.z -absneg.f r3.w, (neg)c5.x -mad.f32 r3.z, c14.y, r0.z, r3.z -mul.f r4.x, c8.y, r1.z -mul.f r4.y, c8.x, r1.z -mul.f r5.x, r1.w, r3.w -add.f r5.w, r3.z, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r1.z -mul.f r3.z, c0.z, r1.z +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z add.f r5.y, c4.y, (neg)r5.w -mov.f32f32 r0.w, r0.w -mad.f32 r2.w, c9.w, r5.w, r2.w -mad.f32 r3.x, c9.z, r5.w, r3.x -mad.f32 r3.y, r5.y, r5.y, r3.y -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.x, c14.z, r0.z, r2.x -mad.f32 r4.x, c9.y, r5.w, r4.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.x, r0.w -add.f r2.x, r2.x, c15.z -absneg.f r0.w, (neg)c5.y -mad.f32 r4.y, c9.x, r5.w, r4.y -mad.f32 r1.x, c1.w, r5.w, r1.x -add.f r5.z, c4.z, (neg)r2.x -mad.f32 r5.x, r6.x, r0.w, r5.x -mad.f32 r2.w, c10.w, r2.x, r2.w -mad.f32 r3.x, c10.z, r2.x, r3.x -mad.f32 r3.y, r5.z, r5.z, r3.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.y, r2.y +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.x, c10.y, r2.x, r4.x -mad.f32 r4.y, c10.x, r2.x, r4.y -mad.f32 r1.y, c14.z, r1.y, r2.y -rsq r2.y, r3.y -(ss)mov.f32f32 r2.y, r2.y -(ss)absneg.f r3.y, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r6.y, r1.y -mad.f32 r0.y, r2.z, r2.y, r3.w -mad.f32 r0.w, r5.y, r2.y, r0.w -mad.f32 r1.y, r5.z, r2.y, r3.y -mad.f32 r2.y, r6.y, r3.y, r5.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, r0.y, r0.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r0.w, r0.w, r2.z -max.f r2.y, c17.x, r2.y -mad.f32 r2.z, c2.w, r2.x, r1.x -mad.f32 r1.x, c1.z, r5.w, r3.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mad.f32 r0.z, r1.y, r1.y, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r6.z, c2.z, r2.x, r1.x -mul.f r6.w, c0.y, r1.z -mul.f r7.x, c0.x, r1.z -mad.f32 r7.y, c7.x, r2.x, c7.y -mad.f32 r7.z, c7.x, r1.z, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r1.x, r2.y, c6.z -mul.f r3.y, r2.y, c6.y -mul.f r2.y, r2.y, c6.x -mul.f r1.y, r1.y, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.z, r1.x -mov.f32f32 r5.y, r0.w -mov.f32f32 r5.x, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.y, r3.y -mov.f32f32 r0.z, r2.y -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -mad.f32 r0.y, c11.w, r0.x, r2.w -mad.f32 r0.z, c11.z, r0.x, r3.x -mad.f32 r0.w, c11.y, r0.x, r4.x -mad.f32 r2.y, c11.x, r0.x, r4.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c16.y -mul.f r2.y, r2.y, c16.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r3.y, r0.w -mov.f32f32 r3.x, r2.y -mad.f32 r0.y, c3.w, r0.x, r2.z -mad.f32 r0.z, c3.z, r0.x, r6.z -mad.f32 r2.y, c1.y, r5.w, r6.w -mad.f32 r2.z, c1.x, r5.w, r7.x -mov.f32f32 r0.w, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.y, c2.y, r2.x, r2.y -mad.f32 r2.x, c2.x, r2.x, r2.z +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y mad.f32 r0.x, c3.x, r0.x, r2.x -mov.f32f32 r2.x, r7.y -mov.f32f32 r2.y, r7.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r2.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r2.x, r6.y +mov.f32f32 r2.z, r6.y mov.f32f32 r2.y, r6.x -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r2.z, r2.x -mov.f32f32 r2.y, r2.y mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.w, r4.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r6.x, (0.000000) -mov.f32f32 r5.w, r1.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r6.x -mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 8 full +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-82.asm b/reference/0ad-alpine-valley/0ad-82.asm index 52f5266..593f290 100644 --- a/reference/0ad-alpine-valley/0ad-82.asm +++ b/reference/0ad-alpine-valley/0ad-82.asm @@ -8,239 +8,163 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 8, r1.x -bary.f r0.y, 4, r1.x -add.f r0.w, r0.w, c12.y +add.f r0.y, r0.w, c12.y +bary.f r0.w, 4, r1.x bary.f r1.z, 9, r1.x add.f r1.w, r0.x, c13.x -mul.f r2.x, r0.y, r0.y -bary.f r2.y, 5, r1.x -add.f r2.z, r1.z, c13.x +bary.f r2.x, 12, r1.x +mul.f r2.y, r0.w, r0.w +bary.f r2.z, 5, r1.x floor.f r2.w, r1.w -rcp r0.w, r0.w +rcp r0.y, r0.y add.f r0.z, r0.z, c12.y -mad.f32 r2.x, r2.y, r2.y, r2.x -floor.f r3.x, r2.z +add.f r3.x, r1.z, c13.x +mad.f32 r2.y, r2.z, r2.z, r2.y add.f r1.w, r1.w, (neg)r2.w -(ss)mul.f r0.z, r0.z, r0.w -(ss)mov.f32f32 r0.w, r2.x -bary.f r2.x, 6, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.z, r0.z -absneg.f r2.w, (neg)c8.x -mad.f32 r0.w, r2.x, r2.x, r0.w -mul.f r3.y, c12.x, r1.w -add.f r2.z, r2.z, (neg)r3.x -mul.f r2.w, r2.w, c8.x -add.f r3.x, c13.y, (neg)r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mul.f r2.w, r2.w, r0.z -rsq r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -add.f r0.x, r0.x, (neg)r3.y -mul.f r3.y, c12.x, r2.z -mov.f32f32 r2.w, r2.w -mul.f r0.y, r0.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.y, r3.y -mul.f r0.z, r2.w, r0.z +(ss)mul.f r0.y, r0.z, r0.y +absneg.f r0.z, (neg)c8.x +bary.f r2.w, 6, r1.x +mov.f32f32 r3.y, r1.w +floor.f r3.z, r3.x +mul.f r0.z, r0.z, c8.x +mad.f32 r2.y, r2.w, r2.w, r2.y +mul.f r3.w, c12.x, r3.y +add.f r3.x, r3.x, (neg)r3.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -add.f r2.w, c13.x, r0.x -bary.f r3.z, 16, r1.x -mov.f32f32 r0.z, r0.z +add.f r0.x, r0.x, (neg)r3.w +mov.f32f32 r3.z, r3.x +rsq r2.y, r2.y +(ss)mov.f32f32 r3.w, r2.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +mul.f r4.x, c12.x, r3.z add.f r0.x, c13.z, r0.x -mov.f32f32 r2.w, r2.w -mul.f r0.y, r0.y, r3.z -mul.f r2.y, r2.y, r0.w -mul.f r0.z, r0.z, c14.x -mul.f r2.w, r2.w, c3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r2.w -bary.f r3.w, 17, r1.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.x, r3.z -add.f r1.z, r1.z, (neg)r3.y -mad.f32 r0.y, r2.y, r3.w, r0.y -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r0.w, r2.x, r0.w -add.f r2.x, c15.y, (neg)r0.z -add.f r3.y, c13.x, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.w, r0.w -mul.f r2.x, r2.x, c8.y -mov.f32f32 r3.y, r3.y -bary.f r3.z, 18, r1.x -mul.f r0.z, r0.z, c12.z -mov.f32f32 r4.w, r2.y -mul.f r2.y, r3.y, c3.w -mad.f32 r0.y, r0.w, r3.z, r0.y -add.f r0.z, r0.z, r2.x -add.f r0.w, c13.z, r1.z -mov.f32f32 r1.z, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.y, r1.z -bary.f r1.z, 10, r1.x -max.f r0.y, c12.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c3.w -add.f r1.z, r1.z, c12.w -mov.f32f32 r0.y, r0.y -max.f r0.z, r0.z, c12.y -mov.f32f32 r2.x, r0.w -mov.f32f32 r3.y, r1.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.z, r3.y -log2 r0.y, r0.y -(ss)mul.f r0.y, c9.x, r0.y -min.f r0.z, r0.z, c12.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r0.x -mov.f32f32 r5.w, r0.w -mov.f32f32 r0.x, r2.y -sam.s (f32)(x)r3.z, r4.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r3.z -mov.f32f32 r0.y, r0.y -add.f r2.y, c15.y, (neg)r0.z -add.f r2.w, c15.y, (neg)r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.x, r3.x -add.f r3.z, c13.y, (neg)r2.z -mul.f r2.y, r2.y, c7.z -(ss)mul.f r4.x, r2.w, c7.y -add.f r2.w, c15.y, (neg)r0.z -mov.f32f32 r4.y, r3.z +mul.f r0.w, r0.w, r3.w +add.f r0.z, c13.x, r0.z +mul.f r0.y, r0.y, c14.x +bary.f r4.y, 16, r1.x +add.f r1.z, r1.z, (neg)r4.x +mul.f r4.z, r0.z, c3.z +mul.f r5.y, r0.x, c3.z +mul.f r0.x, r0.w, r4.y +add.f r0.z, c13.z, r1.z +mov.f32f32 r6.x, r4.z +mov.f32f32 r0.w, r1.z exp2 r0.y, r0.y -mov.f32f32 r3.z, c6.y -mov.f32f32 r3.w, c6.x -mov.f32f32 r4.z, c6.z -mul.f r5.x, r3.x, r4.y -mul.f r5.y, r3.z, c10.y -mul.f r3.w, r3.w, c10.x -mul.f r6.y, r2.w, c7.x -mul.f r0.w, r5.x, r0.w -mov.f32f32 r3.z, r0.x -mov.f32f32 r0.x, r1.z -mul.f r2.w, r4.z, c10.z -(ss)mul.f r4.z, r5.y, r0.y -mul.f r5.x, r3.w, r0.y -mov.f32f32 r3.w, r0.x -mul.f r0.x, r2.w, r0.y -(ss)mov.f32f32 r0.y, r4.z -mov.f32f32 r2.w, r5.x -bary.f r4.z, 14, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r2.x -sam.s (f32)(x)r6.z, r3.y, s#2, t#2 -(sy)mov.f32f32 r2.x, r6.z -add.f r1.w, r1.w, c12.z -(ss)mov.f32f32 r3.y, r4.z -mov.f32f32 r3.z, r1.z -mov.f32f32 r1.z, r1.z -mul.f r3.w, r1.w, r4.y -mov.f32f32 r4.y, r3.y -bary.f r3.y, 15, r1.x -mov.f32f32 r5.y, r3.z -mad.f32 r0.w, r3.w, r2.x, r0.w -mov.f32f32 r6.x, r1.z -add.f r1.z, r2.z, c12.z -bary.f r2.x, 12, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r3.y -sam.s (f32)(x)r4.z, r4.w, s#2, t#2 -(sy)mov.f32f32 r3.y, r4.z -mul.f r3.x, r3.x, r1.z -sam.s (f32)(x)r4.z, r5.z, s#2, t#2 -(sy)mov.f32f32 r3.z, r4.z -mov.f32f32 r4.z, r2.z -mov.f32f32 r2.x, r2.x -mul.f r1.z, r1.w, r1.z -mad.f32 r0.w, r3.x, r3.z, r0.w -bary.f r1.w, 13, r1.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, c12.z -mov.f32f32 r0.w, r0.w -(ss)nop -sam (f32)(xyz)r4.y, r4.y, s#0, t#0 -bary.f r2.z, 2, r1.x -mad.f32 r0.w, r1.z, r3.y, r0.w -bary.f r1.z, 1, r1.x -bary.f (ei)r1.x, 0, r1.x -(sy)mad.f32 r0.x, r4.w, r2.z, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, r4.z, r1.z, r0.y -mad.f32 r1.x, r4.y, r1.x, r2.w +(ss)mov.f32f32 r1.z, r0.y +mul.f r2.z, r2.z, r3.w +bary.f r3.w, 17, r1.x +add.f r0.w, c13.x, r0.w +add.f r1.z, c15.y, (neg)r1.z +mov.f32f32 r6.w, r5.y +mul.f r5.z, r0.z, c3.w +mul.f r7.x, r0.w, c3.w +mul.f r0.z, r1.z, c8.y +(ss)mul.f r0.y, r0.y, c12.z +mad.f32 r0.x, r2.z, r3.w, r0.x +mov.f32f32 r6.y, r7.x +bary.f r0.w, 10, r1.x +add.f r0.y, r0.y, r0.z +mul.f r0.z, r2.w, r2.y +bary.f r1.z, 18, r1.x +add.f r5.w, r0.w, c12.w +max.f r0.y, r0.y, c12.y +mov.f32f32 r4.w, r5.z +mad.f32 r0.x, r0.z, r1.z, r0.x +mov.f32f32 r6.z, r5.w +min.f r0.y, r0.y, c12.z +mov.f32f32 r7.y, r5.w +mov.f32f32 r5.x, r5.w +max.f r0.x, c12.y, r0.x +nop +add.f r0.z, c15.y, (neg)r0.y +sam.s (f32)(x)r7.z, r6.x, s#2, t#2 +add.f r0.w, c13.y, (neg)r3.y +add.f r1.z, c15.y, (neg)r0.y +add.f r2.y, c15.y, (neg)r0.y +mul.f r0.z, r0.z, c7.z +mov.f32f32 r2.z, r0.w +add.f r2.w, c13.y, (neg)r3.z +mul.f r1.z, r1.z, c7.y +mul.f r3.y, r2.y, c7.x +log2 r0.x, r0.x +(ss)mul.f r0.x, c9.x, r0.x +mov.f32f32 r2.y, r2.w +sam.s (f32)(x)r3.z, r6.w, s#2, t#2 nop +sam.s (f32)(x)r3.w, r4.z, s#2, t#2 +sam.s (f32)(x)r4.x, r5.y, s#2, t#2 +add.f r1.w, r1.w, c12.z +add.f r3.x, r3.x, c12.z +mul.f r2.z, r2.z, r2.y +bary.f r2.y, 13, r1.x +mov.f32f32 r4.y, c6.z +(ss)bary.f r4.z, 14, r1.x +(sy)mul.f r2.z, r2.z, r7.z +mul.f r2.w, r1.w, r2.w +exp2 r0.x, r0.x +mov.f32f32 r4.w, c6.y +mov.f32f32 r5.x, c6.x +mul.f r4.y, r4.y, c10.z +mad.f32 r2.z, r2.w, r3.z, r2.z +mul.f r0.w, r0.w, r3.x +mul.f r2.w, r4.w, c10.y +mul.f r3.z, r5.x, c10.x +(ss)mul.f r4.y, r4.y, r0.x +mad.f32 r0.w, r0.w, r3.w, r2.z +mul.f r1.w, r1.w, r3.x +bary.f r4.w, 15, r1.x +mul.f r2.z, r2.w, r0.x +(ss)mul.f r0.x, r3.z, r0.x +mad.f32 r0.w, r1.w, r4.x, r0.w +sam (f32)(w)r5.x, r2.x, s#1, t#1 +(sy)cmps.f.lt r1.w, r5.w, c14.y +mov.f32f32 r2.w, c12.z +(ss)bary.f r2.x, 2, r1.x mul.f r0.w, c13.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.w -mov.f32f32 r2.w, r2.x -mov.f32f32 r1.z, c12.y +sam (f32)(xyz)r4.z, r4.z, s#0, t#0 +bary.f r2.y, 0, r1.x +bary.f (ei)r1.x, 1, r1.x +cov.u32f32 r1.y, r1.w +mov.f32f32 r1.w, r0.w +(sy)mad.f32 r2.x, r5.x, r2.x, r4.y +mad.f32 r1.x, r4.w, r1.x, r2.z +mad.f32 r0.x, r4.z, r2.y, r0.x +cmps.f.ne r1.y, r1.y, c12.y +mul.f r2.x, r2.x, r1.w +mul.f r1.x, r1.x, r1.w +mad.f32 r1.w, c5.z, r5.x, r2.x +mad.f32 r1.x, c5.y, r4.w, r1.x mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w -mul.f r0.w, r1.x, r0.w -mov.f32f32 r3.w, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c5.z, r4.w, r0.x -mad.f32 r0.y, c5.y, r4.z, r0.y -mov.f32f32 r0.w, r0.w +mov.f32f32 r0.w, c12.y +mul.f r1.w, r0.y, r1.w +mul.f r1.x, r0.y, r1.x +mad.f32 r0.x, c5.x, r4.z, r0.x +sel.b32 r0.w, r0.w, r1.y, r5.w +add.f r0.z, r1.w, r0.z +add.f r1.x, r1.x, r1.z +mul.f r0.x, r0.y, r0.x nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, c5.x, r4.y, r0.w -sam (f32)(w)r3.x, r3.z, s#1, t#1 -(sy)cmps.f.lt r1.x, r3.w, c14.y -mul.f r0.x, r0.z, r0.x -mul.f r0.y, r0.z, r0.y -mov.f32f32 r0.w, r0.w -cov.u32f32 r1.x, r1.x -add.f r0.x, r0.x, r2.y -add.f r0.y, r0.y, r4.x -mul.f r0.z, r0.z, r0.w -cmps.f.ne r0.w, r1.x, c12.y -mov.f32f32 r1.x, r3.w -(rpt2)nop -mov.f32f32 r1.x, r1.x -add.f r0.z, r0.z, r6.y +mul.f r0.y, r0.z, r0.w +mul.f r0.z, r1.x, r0.w (rpt1)nop -sel.b32 r0.w, r1.z, r0.w, r1.x +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.z, c4.y +add.f r0.x, r0.x, r3.y (rpt2)nop mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w -mul.f r0.z, r0.z, r0.w -nop -mul.f r0.x, r0.x, c4.z -mul.f r0.y, r0.y, c4.y -mul.f r0.z, r0.z, c4.x -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z +(rpt2)nop +mul.f r2.x, r0.x, c4.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.z -end ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) -; FRAG: 237 instructions, 0 half, 7 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) +; FRAG: 155 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-83.asm b/reference/0ad-alpine-valley/0ad-83.asm index 67a2ad5..e7bcae3 100644 --- a/reference/0ad-alpine-valley/0ad-83.asm +++ b/reference/0ad-alpine-valley/0ad-83.asm @@ -32,159 +32,113 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c12.x, r0.x mul.f r1.w, c12.x, r0.w mad.f32 r1.z, c13.x, r0.y, r1.z mad.f32 r1.w, c13.x, r1.x, r1.w mad.f32 r1.z, c14.x, r0.z, r1.z -mul.f r2.x, c12.z, r0.x -mul.f r2.y, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r1.z, r1.z, c15.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, c13.z, r0.y, r2.x -mad.f32 r2.y, c13.z, r1.x, r2.y -add.f r2.z, c4.x, (neg)r1.z mad.f32 r1.w, c14.x, r1.y, r1.w -mul.f r2.w, c8.w, r1.z -mul.f r3.x, c8.z, r1.z -mul.f r3.y, r2.z, r2.z -mul.f r3.z, c12.y, r0.x -mov.f32f32 r1.w, r1.w -mad.f32 r3.z, c13.y, r0.y, r3.z -absneg.f r3.w, (neg)c5.x -mad.f32 r3.z, c14.y, r0.z, r3.z -mul.f r4.x, c8.y, r1.z -mul.f r4.y, c8.x, r1.z -mul.f r5.x, r1.w, r3.w -add.f r5.w, r3.z, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r1.z -mul.f r3.z, c0.z, r1.z +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z add.f r5.y, c4.y, (neg)r5.w -mov.f32f32 r0.w, r0.w -mad.f32 r2.w, c9.w, r5.w, r2.w -mad.f32 r3.x, c9.z, r5.w, r3.x -mad.f32 r3.y, r5.y, r5.y, r3.y -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.x, c14.z, r0.z, r2.x -mad.f32 r4.x, c9.y, r5.w, r4.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.x, r0.w -add.f r2.x, r2.x, c15.z -absneg.f r0.w, (neg)c5.y -mad.f32 r4.y, c9.x, r5.w, r4.y -mad.f32 r1.x, c1.w, r5.w, r1.x -add.f r5.z, c4.z, (neg)r2.x -mad.f32 r5.x, r6.x, r0.w, r5.x -mad.f32 r2.w, c10.w, r2.x, r2.w -mad.f32 r3.x, c10.z, r2.x, r3.x -mad.f32 r3.y, r5.z, r5.z, r3.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.y, r2.y +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.x, c10.y, r2.x, r4.x -mad.f32 r4.y, c10.x, r2.x, r4.y -mad.f32 r1.y, c14.z, r1.y, r2.y -rsq r2.y, r3.y -(ss)mov.f32f32 r2.y, r2.y -(ss)absneg.f r3.y, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r6.y, r1.y -mad.f32 r0.y, r2.z, r2.y, r3.w -mad.f32 r0.w, r5.y, r2.y, r0.w -mad.f32 r1.y, r5.z, r2.y, r3.y -mad.f32 r2.y, r6.y, r3.y, r5.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, r0.y, r0.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r0.w, r0.w, r2.z -max.f r2.y, c17.x, r2.y -mad.f32 r2.z, c2.w, r2.x, r1.x -mad.f32 r1.x, c1.z, r5.w, r3.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mad.f32 r0.z, r1.y, r1.y, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r6.z, c2.z, r2.x, r1.x -mul.f r6.w, c0.y, r1.z -mul.f r7.x, c0.x, r1.z -mad.f32 r7.y, c7.x, r2.x, c7.y -mad.f32 r7.z, c7.x, r1.z, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r1.x, r2.y, c6.z -mul.f r3.y, r2.y, c6.y -mul.f r2.y, r2.y, c6.x -mul.f r1.y, r1.y, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.z, r1.x -mov.f32f32 r5.y, r0.w -mov.f32f32 r5.x, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.y, r3.y -mov.f32f32 r0.z, r2.y -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -mad.f32 r0.y, c11.w, r0.x, r2.w -mad.f32 r0.z, c11.z, r0.x, r3.x -mad.f32 r0.w, c11.y, r0.x, r4.x -mad.f32 r2.y, c11.x, r0.x, r4.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c16.y -mul.f r2.y, r2.y, c16.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r3.y, r0.w -mov.f32f32 r3.x, r2.y -mad.f32 r0.y, c3.w, r0.x, r2.z -mad.f32 r0.z, c3.z, r0.x, r6.z -mad.f32 r2.y, c1.y, r5.w, r6.w -mad.f32 r2.z, c1.x, r5.w, r7.x -mov.f32f32 r0.w, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.y, c2.y, r2.x, r2.y -mad.f32 r2.x, c2.x, r2.x, r2.z +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y mad.f32 r0.x, c3.x, r0.x, r2.x -mov.f32f32 r2.x, r7.y -mov.f32f32 r2.y, r7.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r2.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r2.x, r6.y +mov.f32f32 r2.z, r6.y mov.f32f32 r2.y, r6.x -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r2.z, r2.x -mov.f32f32 r2.y, r2.y mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.w, r4.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r6.x, (0.000000) -mov.f32f32 r5.w, r1.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r6.x -mov.f32f32 r1.w, (0.000000) -(rpt2)nop -mov.f32f32 r1.w, r1.w end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 8 full +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-84.asm b/reference/0ad-alpine-valley/0ad-84.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-84.asm +++ b/reference/0ad-alpine-valley/0ad-84.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-85.asm b/reference/0ad-alpine-valley/0ad-85.asm index 1d73264..f1c05da 100644 --- a/reference/0ad-alpine-valley/0ad-85.asm +++ b/reference/0ad-alpine-valley/0ad-85.asm @@ -8,215 +8,155 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c10.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c11.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c11.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c11.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 8, r1.x +floor.f r3.x, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c10.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.y, r2.y +bary.f r2.w, 9, r1.x +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c8.x +add.f r2.y, r2.y, (neg)r3.y +mov.f32f32 r3.x, r2.x +sam (f32)(xyzw)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, c13.y, (neg)r4.x +mul.f r0.z, r0.z, c8.x +mov.f32f32 r1.w, r2.y +mul.f r4.y, c10.x, r3.x +add.f r3.x, c11.y, (neg)r3.x +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c8.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c10.x, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, r2.y, c8.x -add.f r2.w, c11.y, (neg)r1.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c10.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r1.w, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r1.w -mul.f r0.y, r2.y, r0.y -sam (f32)(xyzw)r2.w, r3.x, s#0, t#0 -(sy)add.f r1.w, c13.y, (neg)r3.z -add.f r2.y, c11.x, r0.x +add.f r0.x, r0.x, (neg)r4.y +mul.f r4.y, c10.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r4.y add.f r0.x, c11.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c12.x -add.f r3.w, c11.z, r0.w -mul.f r2.y, r2.y, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r2.y -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.x -add.f r0.w, c11.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c13.y, (neg)r0.y -mov.f32f32 r6.x, r4.x -mul.f r3.w, r3.w, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c8.y -mul.f r0.y, r0.y, c10.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.w, r2.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r4.w, r4.y -bary.f r3.w, 6, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r4.x -mov.f32f32 r5.z, r0.x -add.f r0.x, r3.w, c10.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r0.w -mov.f32f32 r7.x, r2.y -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r5.x, r0.w -max.f r0.y, r0.y, c10.y -mov.f32f32 r6.z, r2.y -mov.f32f32 r5.w, r3.w -mov.f32f32 r0.x, r0.x -mul.f r0.w, r1.w, c4.z -add.f r1.w, c13.y, (neg)r3.z -sam.s (f32)(x)r3.w, r4.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r3.w -min.f r0.y, r0.y, c10.z -sam.s (f32)(x)r3.w, r6.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -add.f r4.y, c11.y, (neg)r0.z -(ss)add.f r4.z, c13.y, (neg)r0.y -add.f r4.w, c13.y, (neg)r0.y -add.f r5.x, c13.y, (neg)r0.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c7.z -mul.f r4.w, r4.w, c7.y -mul.f r5.x, r5.x, c7.x -mul.f r5.y, r2.z, r4.y -mov.f32f32 r7.y, r0.x -mul.f r0.x, r3.z, c10.z -mul.f r1.w, r1.w, c4.y -mul.f r2.y, r5.y, r2.y -add.f r1.z, r1.z, c10.z -add.f r0.x, r0.x, r0.w -mul.f r0.w, r3.z, c10.z -sam.s (f32)(x)r5.y, r6.w, s#2, t#2 -(sy)mov.f32f32 r5.y, r5.y -mul.f r4.y, r1.z, r4.y -add.f r5.z, c13.y, (neg)r3.z -mul.f r0.x, r3.y, r0.x -add.f r0.w, r0.w, r1.w -mad.f32 r1.w, r4.y, r4.x, r2.y -mul.f r2.y, r5.z, c4.x -mov.f32f32 r0.x, r0.x -bary.f r3.y, 2, r1.x -mov.f32f32 r1.w, r1.w -add.f r0.z, r0.z, c10.z -mul.f r0.w, r3.x, r0.w -mul.f r3.x, r3.z, c10.z -mov.f32f32 r2.x, r2.x -mul.f r2.z, r2.z, r0.z -mul.f r3.y, r0.x, r3.y -mov.f32f32 r0.w, r0.w -add.f r2.y, r3.x, r2.y -mad.f32 r1.w, r2.z, r5.y, r1.w -bary.f r2.z, 1, r1.x -mov.f32f32 r4.x, r2.x -mul.f r2.x, r2.w, r2.y -mov.f32f32 r1.w, r1.w -mul.f r0.z, r1.z, r0.z -mul.f r1.z, r0.w, r2.z -mov.f32f32 r2.x, r2.x -nop -mad.f32 r0.z, r0.z, r3.w, r1.w -bary.f r1.w, 0, r1.x -bary.f (ei)r1.x, 9, r1.x -mov.f32f32 r1.y, c10.z -mov.f32f32 r0.z, r0.z -mul.f r1.w, r2.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.w, r1.y -mul.f r0.z, c11.w, r0.z -mov.f32f32 r1.y, c10.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r1.x +add.f r0.z, c11.x, r0.z +mov.f32f32 r4.y, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c11.z, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c11.x, r4.y +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c13.y, (neg)r0.y +add.f r5.y, r0.z, c10.w +add.f r0.z, c11.y, (neg)r1.w +mul.f r0.x, r0.x, c10.z +add.f r0.w, c13.y, (neg)r4.x +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c8.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -mul.f r1.x, r3.y, r0.z -mul.f r1.z, r1.z, r0.z -mul.f r0.z, r1.w, r0.z +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.x, c6.z, r0.x, r1.x -mad.f32 r0.w, c6.y, r0.w, r1.z -mov.f32f32 r0.z, r0.z -sam (f32)(w)r3.x, r4.x, s#1, t#1 -(sy)cmps.f.lt r1.x, r3.w, c12.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c6.x, r2.x, r0.z -cov.u32f32 r1.x, r1.x -mul.f r0.x, r0.y, r0.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r1.x, r1.x, c10.y -add.f r0.x, r0.x, r4.z -mov.f32f32 r1.z, r3.w -add.f r0.w, r0.w, r4.w -mul.f r0.y, r0.y, r0.z -nop -mov.f32f32 r0.z, r1.z -(rpt2)nop -sel.b32 r0.z, r1.y, r1.x, r0.z -add.f r0.y, r0.y, r5.x -(rpt1)nop -mul.f r0.x, r0.x, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 +mul.f r0.w, r0.w, c4.z +add.f r1.w, c13.y, (neg)r4.x +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c10.y +sam.s (f32)(x)r4.y, r5.z, s#2, t#2 +mul.f r4.z, r4.x, c10.z +mul.f r1.w, r1.w, c4.y +(sy)mul.f r0.y, r0.y, r7.x +add.f r2.x, r2.x, c10.z +min.f r0.x, r0.x, c10.z +add.f r0.w, r4.z, r0.w +mul.f r4.z, r4.x, c10.z +mul.f r0.z, r2.x, r0.z +(ss)add.f r4.w, c13.y, (neg)r0.x +add.f r5.x, c13.y, (neg)r0.x +add.f r5.y, c13.y, (neg)r0.x +mad.f32 r0.y, r0.z, r7.y, r0.y +add.f r0.z, r2.y, c10.z +mul.f r2.y, r4.w, c7.z +mul.f r4.w, r5.x, c7.y +mul.f r5.x, r5.y, c7.x +mul.f r3.x, r3.x, r0.z +add.f r1.w, r4.z, r1.w +mul.f r1.z, r1.z, c4.x +mul.f r0.w, r3.w, r0.w +mad.f32 r0.y, r3.x, r4.y, r0.y +mul.f r0.z, r2.x, r0.z +mul.f r2.x, r4.x, c10.z +mov.f32f32 r3.x, r0.w +bary.f r3.w, 2, r1.x +mad.f32 r0.y, r0.z, r7.z, r0.y +mul.f r0.z, r3.z, r1.w +add.f r1.z, r2.x, r1.z +mul.f r1.w, r3.x, r3.w +mul.f r0.y, c11.w, r0.y +mov.f32f32 r2.x, r0.z +mul.f r1.z, r3.y, r1.z +bary.f r3.x, 1, r1.x +mov.f32f32 r3.y, r0.y +sam (f32)(w)r3.z, r2.z, s#1, t#1 +(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y +mov.f32f32 r3.z, r1.z +mul.f r2.x, r2.x, r3.x +mul.f r1.w, r1.w, r3.y +bary.f (ei)r1.x, 0, r1.x +mad.f32 r0.w, c6.z, r0.w, r1.w +mul.f r1.y, r2.x, r3.y +cov.u32f32 r1.w, r2.z +mov.f32f32 r2.w, c10.z +mul.f r0.w, r0.x, r0.w +mad.f32 r0.z, c6.y, r0.z, r1.y +mul.f r1.x, r3.z, r1.x +cmps.f.ne r1.y, r1.w, c10.y +add.f r0.w, r0.w, r2.y +mov.f32f32 r1.w, c10.y +mul.f r0.z, r0.x, r0.z +mul.f r0.y, r1.x, r0.y nop -mul.f r0.x, r0.x, c5.z -mul.f r0.z, r0.w, c5.y -mul.f r0.y, r0.y, c5.x +sel.b32 r1.x, r1.w, r1.y, r4.y +add.f r0.z, r0.z, r4.w +mad.f32 r0.y, c6.x, r1.z, r0.y nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +mul.f r0.w, r0.w, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.w, c5.z +mul.f r2.y, r0.z, c5.y +mul.f r0.x, r0.x, r0.y +(rpt2)nop +add.f r0.x, r0.x, r5.x +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.y -end nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 212 instructions, 0 half, 8 full +; FRAG: 149 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-86.asm b/reference/0ad-alpine-valley/0ad-86.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-86.asm +++ b/reference/0ad-alpine-valley/0ad-86.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-87.asm b/reference/0ad-alpine-valley/0ad-87.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-87.asm +++ b/reference/0ad-alpine-valley/0ad-87.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-88.asm b/reference/0ad-alpine-valley/0ad-88.asm index 4377ecf..227a081 100644 --- a/reference/0ad-alpine-valley/0ad-88.asm +++ b/reference/0ad-alpine-valley/0ad-88.asm @@ -8,199 +8,135 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 8, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 10, r1.x -bary.f r2.y, 6, r1.x -add.f r2.z, r0.w, c10.x -floor.f r2.w, r1.w +add.f r2.x, r0.x, c10.x +bary.f r1.w, 9, r1.x +add.f r2.y, r0.w, c10.x +bary.f r2.z, 6, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y -mov.f32f32 r1.z, r1.z -floor.f r3.x, r2.z -add.f r1.w, r1.w, (neg)r2.w +floor.f r3.x, r2.y +add.f r3.w, r2.z, c9.w +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -mov.f32f32 r3.y, r1.z -add.f r0.z, r2.z, (neg)r3.x -mov.f32f32 r1.z, r1.w +absneg.f r0.z, (neg)c7.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c9.z +mul.f r0.z, r0.z, c7.x +sam (f32)(w)r4.x, r1.z, s#1, t#1 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c9.x, r2.z +add.f r2.z, c10.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r1.w, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -add.f r2.w, c10.y, (neg)r1.z -mul.f r1.w, r1.w, c7.x -bary.f r3.x, 9, r1.x -mov.f32f32 r2.z, r2.z -mul.f r3.z, c9.x, r0.z -mul.f r1.w, r1.w, r0.y -mov.f32f32 r2.w, r2.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r1.w, r1.w -add.f r3.z, c10.y, (neg)r0.z -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r2.z -mul.f r0.y, r1.w, r0.y -mov.f32f32 r1.w, r3.z -add.f r2.z, c10.x, r0.x +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c9.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c10.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c11.x -add.f r3.z, c10.z, r0.w -mul.f r2.z, r2.z, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r2.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -add.f r0.w, c10.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r4.x -mov.f32f32 r5.x, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c12.y, (neg)r0.y -mov.f32f32 r5.w, r3.w -mul.f r3.z, r3.z, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c7.y -mul.f r0.y, r0.y, c9.z -mov.f32f32 r3.w, r3.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.z, r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r4.z, r4.x -add.f r2.y, r2.y, c9.w -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r3.w -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r4.w, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r2.y -mov.f32f32 r6.w, r2.z -mov.f32f32 r6.y, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r5.z, r0.y -sam.s (f32)(x)r3.z, r4.y, s#2, t#2 -(sy)mov.f32f32 r0.y, r3.z +add.f r0.z, c10.x, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c3.z +add.f r0.x, c10.z, r0.w +mul.f r4.x, r0.z, c3.z +add.f r0.z, c10.x, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c3.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c12.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#2, t#2 +add.f r0.z, c10.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#2, t#2 +mul.f r0.x, r0.x, c9.z +add.f r0.w, r2.y, c9.z +mul.f r0.y, r0.y, c7.y +(ss)nop +sam.s (f32)(x)r5.x, r5.w, s#2, t#2 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#2, t#2 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c11.y +bary.f r2.x, 10, r1.x +mul.f r0.y, r0.y, r5.x max.f r0.x, r0.x, c9.y -mov.f32f32 r7.x, r0.w -mul.f r0.w, r2.w, r1.w -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y min.f r0.x, r0.x, c9.z -sam.s (f32)(x)r3.z, r5.w, s#2, t#2 -nop -(sy)mov.f32f32 r2.y, r3.z -mul.f r0.y, r0.w, r0.y -sam.s (f32)(x)r3.z, r5.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r3.z -add.f r1.z, r1.z, c9.z -add.f r2.z, c12.y, (neg)r0.x -add.f r3.z, c12.y, (neg)r0.x -add.f r3.w, c12.y, (neg)r0.x -mul.f r1.w, r1.z, r1.w -mul.f r2.z, r2.z, c6.z -mul.f r4.x, r3.z, c6.y -mul.f r3.w, r3.w, c6.x -mad.f32 r0.y, r1.w, r0.w, r0.y -(ss)nop -sam.s (f32)(x)r4.y, r6.z, s#2, t#2 -add.f r0.z, r0.z, c9.z -(sy)mov.f32f32 r0.w, r4.y -mov.f32f32 r1.w, r3.x -mov.f32f32 r0.y, r0.y -mul.f r2.w, r2.w, r0.z -mul.f r0.z, r1.z, r0.z -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.z, r2.x -mad.f32 r0.y, r2.w, r0.w, r0.y -bary.f r0.w, 11, r1.x -mov.f32f32 r1.w, c9.z -bary.f r2.x, 2, r1.x -mov.f32f32 r0.y, r0.y -sam (f32)(w)r2.w, r3.y, s#1, t#1 -(sy)cmps.f.lt r2.w, r3.z, c11.y -mad.f32 r0.y, r0.z, r2.y, r0.y -mov.f32f32 r0.z, r3.z -mov.f32f32 r3.x, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -cov.u32f32 r1.z, r2.w -mov.f32f32 r0.z, r0.z -(ss)mov.f32f32 r3.y, r0.w +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt1)nop mul.f r0.y, c10.w, r0.y -cmps.f.ne r0.w, r1.z, c9.y +bary.f r2.y, 11, r1.x +add.f r0.w, c12.y, (neg)r0.x +add.f r1.z, c12.y, (neg)r0.x +mov.f32f32 r1.w, r0.y +add.f r2.z, c12.y, (neg)r0.x (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, c9.y -sam (f32)(xyz)r4.y, r3.x, s#0, t#0 -(sy)mul.f r2.x, r4.w, r2.x +sam (f32)(xyz)r2.w, r2.x, s#0, t#0 +(ss)bary.f r2.x, 2, r1.x bary.f r2.y, 1, r1.x bary.f (ei)r1.x, 0, r1.x -sel.b32 r0.z, r1.z, r0.w, r0.z -mul.f r0.w, r2.x, r0.y -mul.f r1.y, r4.z, r2.y -mul.f r1.x, r4.y, r1.x -mov.f32f32 r2.w, r1.w -mov.f32f32 r0.w, r0.w -mul.f r1.y, r1.y, r0.y -mad.f32 r0.w, c5.z, r4.w, r0.w +mul.f r0.w, r0.w, c6.z +(sy)mul.f r1.y, r3.y, r2.x +mul.f r2.x, r3.x, r2.y +mul.f r1.x, r2.w, r1.x +mul.f r1.z, r1.z, c6.y +mul.f r1.y, r1.y, r1.w +mul.f r1.w, r2.x, r1.w +mad.f32 r1.y, c5.z, r3.y, r1.y +mad.f32 r1.w, c5.y, r3.x, r1.w mul.f r0.y, r1.x, r0.y -(rpt1)nop -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, c5.y, r4.z, r1.x -mul.f r0.w, r0.x, r0.w -mad.f32 r0.y, c5.x, r4.y, r0.y -(rpt1)nop -add.f r0.w, r0.w, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -nop -mul.f r0.w, r0.w, r0.z -mul.f r1.x, r0.x, r1.x +mul.f r1.x, r2.z, c6.x +mul.f r1.y, r0.x, r1.y +mul.f r1.w, r0.x, r1.w +mad.f32 r0.y, c5.x, r2.w, r0.y +mov.f32f32 r2.x, c9.y +add.f r0.w, r1.y, r0.w +add.f r1.y, r1.w, r1.z mul.f r0.x, r0.x, r0.y -nop -mul.f r0.y, r0.w, c4.z -add.f r0.w, r1.x, r4.x -add.f r0.x, r0.x, r3.w -nop -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r0.z -mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mul.f r0.z, r0.w, c4.y -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x +sel.b32 r0.y, r2.x, r0.z, r4.w +mov.f32f32 r2.w, c9.z (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x +mul.f r0.z, r0.w, r0.y +mul.f r0.w, r1.y, r0.y (rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +mul.f r2.z, r0.z, c4.z +mul.f r2.y, r0.w, c4.y +add.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r0.x, r0.x, r0.y +(rpt2)nop +mul.f r2.x, r0.x, c4.x end nop nop +nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1) -; FRAG: 195 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.z (5:11,cm=f,il=16,b=1) +; FRAG: 129 instructions, 0 half, 7 full diff --git a/reference/0ad-alpine-valley/0ad-89.asm b/reference/0ad-alpine-valley/0ad-89.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-89.asm +++ b/reference/0ad-alpine-valley/0ad-89.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-90.asm b/reference/0ad-alpine-valley/0ad-90.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-90.asm +++ b/reference/0ad-alpine-valley/0ad-90.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-91.asm b/reference/0ad-alpine-valley/0ad-91.asm index 1d73264..f1c05da 100644 --- a/reference/0ad-alpine-valley/0ad-91.asm +++ b/reference/0ad-alpine-valley/0ad-91.asm @@ -8,215 +8,155 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c10.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c11.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c11.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c11.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 8, r1.x +floor.f r3.x, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c10.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.y, r2.y +bary.f r2.w, 9, r1.x +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c8.x +add.f r2.y, r2.y, (neg)r3.y +mov.f32f32 r3.x, r2.x +sam (f32)(xyzw)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, c13.y, (neg)r4.x +mul.f r0.z, r0.z, c8.x +mov.f32f32 r1.w, r2.y +mul.f r4.y, c10.x, r3.x +add.f r3.x, c11.y, (neg)r3.x +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c8.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c10.x, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, r2.y, c8.x -add.f r2.w, c11.y, (neg)r1.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c10.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r1.w, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r1.w -mul.f r0.y, r2.y, r0.y -sam (f32)(xyzw)r2.w, r3.x, s#0, t#0 -(sy)add.f r1.w, c13.y, (neg)r3.z -add.f r2.y, c11.x, r0.x +add.f r0.x, r0.x, (neg)r4.y +mul.f r4.y, c10.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r4.y add.f r0.x, c11.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c12.x -add.f r3.w, c11.z, r0.w -mul.f r2.y, r2.y, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r2.y -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.x -add.f r0.w, c11.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c13.y, (neg)r0.y -mov.f32f32 r6.x, r4.x -mul.f r3.w, r3.w, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c8.y -mul.f r0.y, r0.y, c10.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.w, r2.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r4.w, r4.y -bary.f r3.w, 6, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r4.x -mov.f32f32 r5.z, r0.x -add.f r0.x, r3.w, c10.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r0.w -mov.f32f32 r7.x, r2.y -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r5.x, r0.w -max.f r0.y, r0.y, c10.y -mov.f32f32 r6.z, r2.y -mov.f32f32 r5.w, r3.w -mov.f32f32 r0.x, r0.x -mul.f r0.w, r1.w, c4.z -add.f r1.w, c13.y, (neg)r3.z -sam.s (f32)(x)r3.w, r4.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r3.w -min.f r0.y, r0.y, c10.z -sam.s (f32)(x)r3.w, r6.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -add.f r4.y, c11.y, (neg)r0.z -(ss)add.f r4.z, c13.y, (neg)r0.y -add.f r4.w, c13.y, (neg)r0.y -add.f r5.x, c13.y, (neg)r0.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c7.z -mul.f r4.w, r4.w, c7.y -mul.f r5.x, r5.x, c7.x -mul.f r5.y, r2.z, r4.y -mov.f32f32 r7.y, r0.x -mul.f r0.x, r3.z, c10.z -mul.f r1.w, r1.w, c4.y -mul.f r2.y, r5.y, r2.y -add.f r1.z, r1.z, c10.z -add.f r0.x, r0.x, r0.w -mul.f r0.w, r3.z, c10.z -sam.s (f32)(x)r5.y, r6.w, s#2, t#2 -(sy)mov.f32f32 r5.y, r5.y -mul.f r4.y, r1.z, r4.y -add.f r5.z, c13.y, (neg)r3.z -mul.f r0.x, r3.y, r0.x -add.f r0.w, r0.w, r1.w -mad.f32 r1.w, r4.y, r4.x, r2.y -mul.f r2.y, r5.z, c4.x -mov.f32f32 r0.x, r0.x -bary.f r3.y, 2, r1.x -mov.f32f32 r1.w, r1.w -add.f r0.z, r0.z, c10.z -mul.f r0.w, r3.x, r0.w -mul.f r3.x, r3.z, c10.z -mov.f32f32 r2.x, r2.x -mul.f r2.z, r2.z, r0.z -mul.f r3.y, r0.x, r3.y -mov.f32f32 r0.w, r0.w -add.f r2.y, r3.x, r2.y -mad.f32 r1.w, r2.z, r5.y, r1.w -bary.f r2.z, 1, r1.x -mov.f32f32 r4.x, r2.x -mul.f r2.x, r2.w, r2.y -mov.f32f32 r1.w, r1.w -mul.f r0.z, r1.z, r0.z -mul.f r1.z, r0.w, r2.z -mov.f32f32 r2.x, r2.x -nop -mad.f32 r0.z, r0.z, r3.w, r1.w -bary.f r1.w, 0, r1.x -bary.f (ei)r1.x, 9, r1.x -mov.f32f32 r1.y, c10.z -mov.f32f32 r0.z, r0.z -mul.f r1.w, r2.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.w, r1.y -mul.f r0.z, c11.w, r0.z -mov.f32f32 r1.y, c10.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r1.x +add.f r0.z, c11.x, r0.z +mov.f32f32 r4.y, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c11.z, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c11.x, r4.y +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c13.y, (neg)r0.y +add.f r5.y, r0.z, c10.w +add.f r0.z, c11.y, (neg)r1.w +mul.f r0.x, r0.x, c10.z +add.f r0.w, c13.y, (neg)r4.x +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c8.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -mul.f r1.x, r3.y, r0.z -mul.f r1.z, r1.z, r0.z -mul.f r0.z, r1.w, r0.z +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.x, c6.z, r0.x, r1.x -mad.f32 r0.w, c6.y, r0.w, r1.z -mov.f32f32 r0.z, r0.z -sam (f32)(w)r3.x, r4.x, s#1, t#1 -(sy)cmps.f.lt r1.x, r3.w, c12.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c6.x, r2.x, r0.z -cov.u32f32 r1.x, r1.x -mul.f r0.x, r0.y, r0.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r1.x, r1.x, c10.y -add.f r0.x, r0.x, r4.z -mov.f32f32 r1.z, r3.w -add.f r0.w, r0.w, r4.w -mul.f r0.y, r0.y, r0.z -nop -mov.f32f32 r0.z, r1.z -(rpt2)nop -sel.b32 r0.z, r1.y, r1.x, r0.z -add.f r0.y, r0.y, r5.x -(rpt1)nop -mul.f r0.x, r0.x, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 +mul.f r0.w, r0.w, c4.z +add.f r1.w, c13.y, (neg)r4.x +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c10.y +sam.s (f32)(x)r4.y, r5.z, s#2, t#2 +mul.f r4.z, r4.x, c10.z +mul.f r1.w, r1.w, c4.y +(sy)mul.f r0.y, r0.y, r7.x +add.f r2.x, r2.x, c10.z +min.f r0.x, r0.x, c10.z +add.f r0.w, r4.z, r0.w +mul.f r4.z, r4.x, c10.z +mul.f r0.z, r2.x, r0.z +(ss)add.f r4.w, c13.y, (neg)r0.x +add.f r5.x, c13.y, (neg)r0.x +add.f r5.y, c13.y, (neg)r0.x +mad.f32 r0.y, r0.z, r7.y, r0.y +add.f r0.z, r2.y, c10.z +mul.f r2.y, r4.w, c7.z +mul.f r4.w, r5.x, c7.y +mul.f r5.x, r5.y, c7.x +mul.f r3.x, r3.x, r0.z +add.f r1.w, r4.z, r1.w +mul.f r1.z, r1.z, c4.x +mul.f r0.w, r3.w, r0.w +mad.f32 r0.y, r3.x, r4.y, r0.y +mul.f r0.z, r2.x, r0.z +mul.f r2.x, r4.x, c10.z +mov.f32f32 r3.x, r0.w +bary.f r3.w, 2, r1.x +mad.f32 r0.y, r0.z, r7.z, r0.y +mul.f r0.z, r3.z, r1.w +add.f r1.z, r2.x, r1.z +mul.f r1.w, r3.x, r3.w +mul.f r0.y, c11.w, r0.y +mov.f32f32 r2.x, r0.z +mul.f r1.z, r3.y, r1.z +bary.f r3.x, 1, r1.x +mov.f32f32 r3.y, r0.y +sam (f32)(w)r3.z, r2.z, s#1, t#1 +(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y +mov.f32f32 r3.z, r1.z +mul.f r2.x, r2.x, r3.x +mul.f r1.w, r1.w, r3.y +bary.f (ei)r1.x, 0, r1.x +mad.f32 r0.w, c6.z, r0.w, r1.w +mul.f r1.y, r2.x, r3.y +cov.u32f32 r1.w, r2.z +mov.f32f32 r2.w, c10.z +mul.f r0.w, r0.x, r0.w +mad.f32 r0.z, c6.y, r0.z, r1.y +mul.f r1.x, r3.z, r1.x +cmps.f.ne r1.y, r1.w, c10.y +add.f r0.w, r0.w, r2.y +mov.f32f32 r1.w, c10.y +mul.f r0.z, r0.x, r0.z +mul.f r0.y, r1.x, r0.y nop -mul.f r0.x, r0.x, c5.z -mul.f r0.z, r0.w, c5.y -mul.f r0.y, r0.y, c5.x +sel.b32 r1.x, r1.w, r1.y, r4.y +add.f r0.z, r0.z, r4.w +mad.f32 r0.y, c6.x, r1.z, r0.y nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +mul.f r0.w, r0.w, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.w, c5.z +mul.f r2.y, r0.z, c5.y +mul.f r0.x, r0.x, r0.y +(rpt2)nop +add.f r0.x, r0.x, r5.x +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.y -end nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 212 instructions, 0 half, 8 full +; FRAG: 149 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-92.asm b/reference/0ad-alpine-valley/0ad-92.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-92.asm +++ b/reference/0ad-alpine-valley/0ad-92.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-93.asm b/reference/0ad-alpine-valley/0ad-93.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-93.asm +++ b/reference/0ad-alpine-valley/0ad-93.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-94.asm b/reference/0ad-alpine-valley/0ad-94.asm index 12fbb01..5630fc2 100644 --- a/reference/0ad-alpine-valley/0ad-94.asm +++ b/reference/0ad-alpine-valley/0ad-94.asm @@ -8,203 +8,139 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f233333, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.y, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.w, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r4.x, c9.x, r0.z -mov.f32f32 r3.z, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r4.x -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r4.x, r3.y, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.w, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.y, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r3.y -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.y, r3.z -mov.f32f32 r5.w, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r3.y -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.y, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.y, r3.y -bary.f r3.y, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.y, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.y, r2.w -mov.f32f32 r5.z, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r6.y, r3.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r7.w, r5.x, s#2, t#2 -(sy)mov.f32f32 r1.w, r7.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r7.w, r6.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r7.w -(ss)nop -sam.s (f32)(x)r5.x, r5.w, s#2, t#2 -(sy)mov.f32f32 r2.w, r5.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.y, r3.w -add.f r3.z, c12.y, (neg)r0.y -add.f r3.w, c12.y, (neg)r0.y -add.f r5.x, c12.y, (neg)r0.y -mul.f r5.y, r2.z, r3.y -mul.f r3.z, r3.z, c6.z -mul.f r3.w, r3.w, c6.y -mul.f r5.x, r5.x, c6.x -mul.f r1.w, r5.y, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.w, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.y, r1.z, r3.y -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.y, r2.w, r1.w -sam.s (f32)(x)r5.y, r7.y, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r5.y -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r5.y, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r0.z, r4.w -mov.f32f32 r5.z, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.z, r1.x -mul.f r0.w, r4.y, r0.w -mov.f32f32 r2.w, r0.z -mul.f r0.x, c10.w, r0.x -mul.f r0.z, r4.x, r2.z -sam (f32)(w)r1.y, r5.y, s#1, t#1 -nop -(sy)cmps.f.lt r1.y, r2.x, c11.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.x -mov.f32f32 r1.w, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r0.z, r0.x +add.f r0.y, r0.z, r1.z cov.u32f32 r0.z, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.z, r1.x -mad.f32 r0.w, c5.y, r4.y, r0.w -mov.f32f32 r0.x, r0.x +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y cmps.f.ne r0.z, r0.z, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r4.x, r0.x -mov.f32f32 r1.y, r1.z -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r0.z, r1.w, r0.z, r1.y -add.f r1.x, r1.x, r3.z -add.f r0.w, r0.w, r3.w -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r0.z +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z mul.f r0.w, r0.w, r0.z -add.f r0.x, r0.x, r5.x -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, c4.x end nop nop nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 196 instructions, 0 half, 8 full +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-95.asm b/reference/0ad-alpine-valley/0ad-95.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-95.asm +++ b/reference/0ad-alpine-valley/0ad-95.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-96.asm b/reference/0ad-alpine-valley/0ad-96.asm index 36d09e4..63ee7bc 100644 --- a/reference/0ad-alpine-valley/0ad-96.asm +++ b/reference/0ad-alpine-valley/0ad-96.asm @@ -6,8 +6,8 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,223 +24,164 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)floor.f r1.z, c14.z floor.f r1.w, c14.x absneg.f r2.x, (abs)c17.x absneg.f r2.y, (abs)c17.y add.f r1.z, c14.z, (neg)r1.z add.f r1.w, c14.x, (neg)r1.w -mul.f r2.z, c11.x, r0.w -add.f r2.x, r2.x, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r2.y, c12.x, r1.x, r2.z -mov.f32f32 r2.x, r2.x +mov.f32f32 r2.z, c18.y +mul.f r2.w, c11.x, r0.w max.f r1.z, r1.z, c18.y max.f r1.w, r1.w, c18.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, c16.x, r2.x +add.f r2.x, r2.x, r2.y +add.f r2.y, r2.z, c19.x min.f r1.z, r1.z, c22.y min.f r1.w, r1.w, c22.y -mul.f r2.w, c11.z, r0.x -mad.f32 r2.y, c13.x, r1.y, r2.y +mul.f r2.z, c16.x, r2.x +mul.f r3.x, c11.z, r0.x max.f r1.z, r1.z, c18.x max.f r1.w, r1.w, c18.x -mad.f32 r2.w, c12.z, r0.y, r2.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r2.w, c13.z, r0.z, r2.w -mul.f r3.x, c11.x, r0.x +mul.f r3.y, c11.x, r0.x +mad.f32 r3.x, c12.z, r0.y, r3.x mul.f r1.z, c16.x, r1.z -mad.f32 r3.x, c12.x, r0.y, r3.x -add.f r2.w, r2.w, c14.z -mad.f32 r3.x, c13.x, r0.z, r3.x -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c18.w, r2.z, r2.w -mul.f r2.y, r2.y, (neg)c4.x -mul.f r3.y, c11.y, r0.w +mad.f32 r3.y, c12.x, r0.y, r3.y +mad.f32 r3.x, c13.z, r0.z, r3.x +mad.f32 r3.y, c13.x, r0.z, r3.y mad.f32 r1.z, c18.z, r1.z, c14.x -add.f r3.x, r3.x, c14.x -mov.f32f32 r2.z, r2.z -mad.f32 r1.w, c16.x, r1.w, r3.x -mov.f32f32 r1.z, r1.z -mad.f32 r3.y, c12.y, r1.x, r3.y -mov.f32f32 r2.z, r2.z +floor.f r4.x, r2.y +mad.f32 r2.w, c12.x, r1.x, r2.w +add.f r3.y, r3.y, c14.x +add.f r1.z, r1.z, c19.x +mad.f32 r1.w, c16.x, r1.w, r3.y +add.f r3.x, r3.x, c14.z +add.f r2.y, r2.y, (neg)r4.x +floor.f r4.x, r1.z add.f r1.w, r1.w, c19.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y +mad.f32 r2.z, c18.w, r2.z, r3.x +mad.f32 r2.y, c19.y, r2.y, c19.z +add.f r1.z, r1.z, (neg)r4.x +floor.f r4.x, r1.w add.f r2.z, r2.z, c19.x -mad.f32 r3.y, c13.y, r1.y, r3.y -add.f r1.z, r1.z, c19.x -floor.f r3.z, r1.w -floor.f r3.w, r2.z -mov.f32f32 r3.y, r3.y -floor.f r4.z, r1.z -add.f r1.w, r1.w, (neg)r3.z -add.f r2.z, r2.z, (neg)r3.w -mad.f32 r2.y, (neg)c4.y, r3.y, r2.y -add.f r1.z, r1.z, (neg)r4.z -mad.f32 r1.w, c19.y, r1.w, c19.z -mad.f32 r2.z, c19.y, r2.z, c19.z -mov.f32f32 r2.y, r2.y +absneg.f r2.y, (abs)r2.y mad.f32 r1.z, c19.y, r1.z, c19.z -absneg.f r1.w, (abs)r1.w -absneg.f r2.z, (abs)r2.z -mul.f r0.w, c11.z, r0.w +add.f r4.x, r1.w, (neg)r4.x +floor.f r4.y, r2.z +mul.f r1.w, r2.y, r2.y absneg.f r1.z, (abs)r1.z -mul.f r3.y, c19.y, r1.w -mul.f r3.z, c19.y, r2.z -mul.f r1.w, r1.w, r1.w -mul.f r3.w, c19.y, r1.z -add.f r3.y, c19.w, (neg)r3.y -add.f r3.z, c19.w, (neg)r3.z -mul.f r2.z, r2.z, r2.z -add.f r3.w, c19.w, (neg)r3.w +mad.f32 r2.y, c19.y, r4.x, c19.z +add.f r2.z, r2.z, (neg)r4.y +mad.f32 r2.w, c13.x, r1.y, r2.w +mul.f r4.x, c19.y, r1.z +absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c19.y, r2.z, c19.z mul.f r1.z, r1.z, r1.z -mul.f r1.w, r1.w, r3.y -mul.f r2.z, r2.z, r3.z +add.f r4.x, c19.w, (neg)r4.x +mul.f r4.y, c19.y, r2.y +absneg.f r2.z, (abs)r2.z +mul.f r2.y, r2.y, r2.y +mul.f r1.z, r1.z, r4.x +mul.f r4.x, r0.y, c21.x +add.f r4.y, c19.w, (neg)r4.y +mul.f r4.z, c19.y, r2.z +mul.f r2.z, r2.z, r2.z +max.f r4.x, r4.x, c18.y +mul.f r2.y, r2.y, r4.y +mul.f r4.y, r0.x, r0.z +add.f r4.z, c19.w, (neg)r4.z +min.f r4.x, r4.x, c22.y +mul.f r4.w, r0.y, c20.x +mul.f r2.w, r2.w, (neg)c4.x +mul.f r5.x, c11.y, r0.w +min.f r4.x, r4.x, c18.w +mul.f r4.y, r4.y, r4.w +mul.f r2.z, r2.z, r4.z +mad.f32 r4.z, c12.y, r1.x, r5.x +mul.f r1.z, r1.z, r4.x +max.f r4.x, r4.y, c18.y +mad.f32 r4.y, c13.y, r1.y, r4.z +mov.f32f32 r2.x, r2.x +mov.f32f32 r4.z, r1.z +min.f r4.x, r4.x, c22.y +mad.f32 r2.w, (neg)c4.y, r4.y, r2.w +mul.f r0.w, c11.z, r0.w +max.f r2.x, r2.x, c20.z +min.f r4.x, r4.x, c20.y mad.f32 r0.w, c12.z, r1.x, r0.w -mul.f r1.x, r1.z, r3.w -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.z -mul.f r2.z, r0.x, r0.z -mov.f32f32 r1.x, r1.x -mul.f r3.y, r0.y, c21.x -mul.f r3.z, r0.y, c20.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.w, c18.y -mov.f32f32 r3.y, r3.y -mul.f r2.z, r2.z, r3.z -mad.f32 r0.w, c13.z, r1.y, r0.w -add.f r1.y, r3.w, c19.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -floor.f r3.z, r1.y -max.f r3.y, r3.y, c18.y -mov.f32f32 r2.z, r2.z -mad.f32 r0.w, (neg)c4.z, r0.w, r2.y -add.f r1.y, r1.y, (neg)r3.z -min.f r2.y, r3.y, c22.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -mad.f32 r1.y, c19.y, r1.y, c19.z -min.f r2.y, r2.y, c18.w -max.f r2.z, r2.z, c18.y -max.f r0.w, c18.y, r0.w -absneg.f r1.y, (abs)r1.y -mov.f32f32 r2.y, r2.y -min.f r2.z, r2.z, c22.y -mov.f32f32 r0.w, r0.w -mul.f r3.y, r1.y, r1.y -mul.f r1.x, r1.x, r2.y -min.f r1.y, r2.z, c20.y -mul.f r2.y, r0.w, c5.z -mul.f r2.z, r0.w, c5.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mul.f r0.w, r0.w, c5.x -mul.f r3.z, r1.z, r1.y -mul.f r3.w, r1.w, r1.y -max.f r1.w, r2.x, c20.z -mov.f32f32 r1.z, r2.y -mov.f32f32 r2.x, r3.z -mov.f32f32 r1.y, r2.z -mad.f32 r2.y, c17.x, r1.x, r2.x -mad.f32 r1.x, c17.y, r1.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r1.x -min.f r2.z, r1.w, c20.w -mov.f32f32 r1.x, r0.w -mov.f32f32 r1.w, r3.y -mul.f r0.w, c11.y, r0.x -mov.f32f32 r2.z, r2.z -mad.f32 r0.w, c12.y, r0.y, r0.w +mul.f r1.x, c11.y, r0.x mul.f r0.x, c11.w, r0.x -mov.f32f32 r3.z, r4.y -mad.f32 r3.x, r3.w, r2.z, r3.x -mad.f32 r2.w, r3.w, r2.z, r2.w -mad.f32 r0.w, c13.y, r0.z, r0.w +mov.f32f32 r4.y, r4.x +mul.f r2.z, r2.z, r4.x +min.f r2.x, r2.x, c20.w +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, r2.y, r4.y +mad.f32 r1.x, c12.y, r0.y, r1.x mad.f32 r0.x, c12.w, r0.y, r0.x -add.f r0.y, r3.x, r2.x -add.f r2.x, r2.w, r2.y -add.f r0.w, r0.w, c14.y -nop -mov.f32f32 r3.x, r0.y -mov.f32f32 r3.y, r2.x -mad.f32 r0.y, r3.w, r2.z, r0.w +mad.f32 r0.y, c13.y, r0.z, r1.x +mov.f32f32 r1.x, r1.y +mad.f32 r1.y, c17.y, r1.z, r1.y +mad.f32 r1.x, c17.x, r4.z, r1.x +mov.f32f32 r1.z, r2.x +mad.f32 r2.x, r2.z, r2.x, r3.x +add.f r0.y, r0.y, c14.y +mad.f32 r0.w, (neg)c4.z, r0.w, r2.w +mad.f32 r2.y, r2.z, r1.z, r3.y +add.f r1.y, r2.x, r1.y +mad.f32 r0.y, r2.z, r1.z, r0.y +max.f r4.x, c18.y, r0.w +add.f r0.w, r2.y, r1.x +mov.f32f32 r1.x, r1.y mad.f32 r0.x, c13.w, r0.z, r0.x -mul.f r0.z, c7.w, r3.x -mul.f r0.w, c7.z, r3.x -mad.f32 r0.z, c8.w, r0.y, r0.z -mad.f32 r0.w, c8.z, r0.y, r0.w -mul.f r2.x, c7.y, r3.x -mul.f r2.y, c7.x, r3.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c9.w, r3.y, r0.z -add.f r0.x, r0.x, c14.w -mad.f32 r0.w, c9.z, r3.y, r0.w -mad.f32 r2.x, c8.y, r0.y, r2.x -mad.f32 r2.y, c8.x, r0.y, r2.y -mad.f32 r0.z, c10.w, r0.x, r0.z -mad.f32 r0.w, c10.z, r0.x, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c9.y, r3.y, r2.x -mad.f32 r2.y, c9.x, r3.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c10.y, r0.x, r2.x -mad.f32 r2.y, c10.x, r0.x, r2.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r2.z, r0.w -mov.f32f32 r0.z, r2.x -mov.f32f32 r0.w, r2.y -mul.f r3.w, c0.w, r3.x -mul.f r4.y, c0.z, r3.x -mul.f r0.z, r0.z, c15.y -mul.f r0.w, r0.w, c15.x -(rpt1)nop -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.w -mad.f32 r0.z, c1.w, r0.y, r3.w -mad.f32 r0.w, c1.z, r0.y, r4.y -mad.f32 r0.z, c2.w, r3.y, r0.z -mad.f32 r0.w, c2.z, r3.y, r0.w -mad.f32 r0.z, c3.w, r0.x, r0.z -mad.f32 r3.w, c3.z, r0.x, r0.w -mul.f r4.y, c0.y, r3.x -mul.f r4.z, c0.x, r3.x -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.w -mad.f32 r3.w, c1.y, r0.y, r4.y -mad.f32 r0.y, c1.x, r0.y, r4.z -mad.f32 r3.w, c2.y, r3.y, r3.w -mad.f32 r0.y, c2.x, r3.y, r0.y -mad.f32 r3.w, c3.y, r0.x, r3.w -mad.f32 r0.x, c3.x, r0.x, r0.y -(rpt1)nop -mov.f32f32 r0.y, r3.w -mov.f32f32 r0.x, r0.x -mad.f32 r3.y, c6.x, r3.y, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -(rpt1)nop -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.z -mov.f32f32 r3.z, r4.x -(rpt2)nop -mov.f32f32 r3.z, r3.z +nop +mov.f32f32 r1.z, r0.w +mul.f r3.x, c0.x, r0.w +mad.f32 r3.y, c6.x, r1.x, c6.y +mov.f32f32 r4.y, r4.x +mul.f r0.z, c7.y, r1.z +mul.f r0.w, c7.x, r1.z +mad.f32 r0.z, c8.y, r0.y, r0.z +mad.f32 r0.w, c8.x, r0.y, r0.w +mad.f32 r0.z, c9.y, r1.x, r0.z +add.f r4.z, r0.x, c14.w +mad.f32 r0.x, c9.x, r1.x, r0.w +mul.f r0.w, c7.w, r1.z +mul.f r2.x, c7.z, r1.z +mad.f32 r0.z, c10.y, r4.z, r0.z +mad.f32 r0.x, c10.x, r4.z, r0.x +mad.f32 r0.w, c8.w, r0.y, r0.w +mad.f32 r2.z, c8.z, r0.y, r2.x +mul.f r2.y, r0.z, c15.y +mul.f r2.x, r0.x, c15.x +mad.f32 r0.x, c9.w, r1.x, r0.w +mad.f32 r0.z, c9.z, r1.x, r2.z +mad.f32 r2.w, c10.w, r4.z, r0.x +mad.f32 r2.z, c10.z, r4.z, r0.z +mul.f r0.x, c0.w, r1.z +mul.f r0.z, c0.z, r1.z +mad.f32 r0.x, c1.w, r0.y, r0.x +mad.f32 r0.z, c1.z, r0.y, r0.z +mad.f32 r0.x, c2.w, r1.x, r0.x +mad.f32 r0.z, c2.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r4.z, r0.x +mad.f32 r0.z, c3.z, r4.z, r0.z +mul.f r0.x, c0.y, r1.z +mad.f32 r3.x, c1.x, r0.y, r3.x +mad.f32 r0.x, c1.y, r0.y, r0.x +mad.f32 r0.y, c2.x, r1.y, r3.x +mad.f32 r1.x, c2.y, r1.x, r0.x +mad.f32 r0.x, c3.x, r4.z, r0.y +mad.f32 r0.y, c3.y, r4.z, r1.x +mad.f32 r3.x, c6.x, r1.z, c6.y +mul.f r1.z, r4.y, c5.z +mul.f r1.y, r4.y, c5.y +mul.f r1.x, r4.x, c5.x end ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 221 instructions, 0 half, 5 full +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 152 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-97.asm b/reference/0ad-alpine-valley/0ad-97.asm index 12fbb01..5630fc2 100644 --- a/reference/0ad-alpine-valley/0ad-97.asm +++ b/reference/0ad-alpine-valley/0ad-97.asm @@ -8,203 +8,139 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f233333, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.y, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.w, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r4.x, c9.x, r0.z -mov.f32f32 r3.z, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r4.x -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r4.x, r3.y, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.w, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.y, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r3.y -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.y, r3.z -mov.f32f32 r5.w, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r3.y -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.y, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.y, r3.y -bary.f r3.y, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.y, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.y, r2.w -mov.f32f32 r5.z, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r6.y, r3.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r7.w, r5.x, s#2, t#2 -(sy)mov.f32f32 r1.w, r7.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r7.w, r6.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r7.w -(ss)nop -sam.s (f32)(x)r5.x, r5.w, s#2, t#2 -(sy)mov.f32f32 r2.w, r5.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.y, r3.w -add.f r3.z, c12.y, (neg)r0.y -add.f r3.w, c12.y, (neg)r0.y -add.f r5.x, c12.y, (neg)r0.y -mul.f r5.y, r2.z, r3.y -mul.f r3.z, r3.z, c6.z -mul.f r3.w, r3.w, c6.y -mul.f r5.x, r5.x, c6.x -mul.f r1.w, r5.y, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.w, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.y, r1.z, r3.y -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.y, r2.w, r1.w -sam.s (f32)(x)r5.y, r7.y, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r5.y -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r5.y, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r0.z, r4.w -mov.f32f32 r5.z, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.z, r1.x -mul.f r0.w, r4.y, r0.w -mov.f32f32 r2.w, r0.z -mul.f r0.x, c10.w, r0.x -mul.f r0.z, r4.x, r2.z -sam (f32)(w)r1.y, r5.y, s#1, t#1 -nop -(sy)cmps.f.lt r1.y, r2.x, c11.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.x -mov.f32f32 r1.w, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r0.z, r0.x +add.f r0.y, r0.z, r1.z cov.u32f32 r0.z, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.z, r1.x -mad.f32 r0.w, c5.y, r4.y, r0.w -mov.f32f32 r0.x, r0.x +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y cmps.f.ne r0.z, r0.z, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r4.x, r0.x -mov.f32f32 r1.y, r1.z -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r0.z, r1.w, r0.z, r1.y -add.f r1.x, r1.x, r3.z -add.f r0.w, r0.w, r3.w -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r0.z +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z mul.f r0.w, r0.w, r0.z -add.f r0.x, r0.x, r5.x -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, c4.x end nop nop nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 196 instructions, 0 half, 8 full +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-alpine-valley/0ad-98.asm b/reference/0ad-alpine-valley/0ad-98.asm index 36d09e4..63ee7bc 100644 --- a/reference/0ad-alpine-valley/0ad-98.asm +++ b/reference/0ad-alpine-valley/0ad-98.asm @@ -6,8 +6,8 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r4.x) in8 -@in(r4.y) in9 +@in(r3.z) in8 +@in(r3.w) in9 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,223 +24,164 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)floor.f r1.z, c14.z floor.f r1.w, c14.x absneg.f r2.x, (abs)c17.x absneg.f r2.y, (abs)c17.y add.f r1.z, c14.z, (neg)r1.z add.f r1.w, c14.x, (neg)r1.w -mul.f r2.z, c11.x, r0.w -add.f r2.x, r2.x, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r2.y, c12.x, r1.x, r2.z -mov.f32f32 r2.x, r2.x +mov.f32f32 r2.z, c18.y +mul.f r2.w, c11.x, r0.w max.f r1.z, r1.z, c18.y max.f r1.w, r1.w, c18.y -mov.f32f32 r2.y, r2.y -mul.f r2.z, c16.x, r2.x +add.f r2.x, r2.x, r2.y +add.f r2.y, r2.z, c19.x min.f r1.z, r1.z, c22.y min.f r1.w, r1.w, c22.y -mul.f r2.w, c11.z, r0.x -mad.f32 r2.y, c13.x, r1.y, r2.y +mul.f r2.z, c16.x, r2.x +mul.f r3.x, c11.z, r0.x max.f r1.z, r1.z, c18.x max.f r1.w, r1.w, c18.x -mad.f32 r2.w, c12.z, r0.y, r2.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r2.w, c13.z, r0.z, r2.w -mul.f r3.x, c11.x, r0.x +mul.f r3.y, c11.x, r0.x +mad.f32 r3.x, c12.z, r0.y, r3.x mul.f r1.z, c16.x, r1.z -mad.f32 r3.x, c12.x, r0.y, r3.x -add.f r2.w, r2.w, c14.z -mad.f32 r3.x, c13.x, r0.z, r3.x -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c18.w, r2.z, r2.w -mul.f r2.y, r2.y, (neg)c4.x -mul.f r3.y, c11.y, r0.w +mad.f32 r3.y, c12.x, r0.y, r3.y +mad.f32 r3.x, c13.z, r0.z, r3.x +mad.f32 r3.y, c13.x, r0.z, r3.y mad.f32 r1.z, c18.z, r1.z, c14.x -add.f r3.x, r3.x, c14.x -mov.f32f32 r2.z, r2.z -mad.f32 r1.w, c16.x, r1.w, r3.x -mov.f32f32 r1.z, r1.z -mad.f32 r3.y, c12.y, r1.x, r3.y -mov.f32f32 r2.z, r2.z +floor.f r4.x, r2.y +mad.f32 r2.w, c12.x, r1.x, r2.w +add.f r3.y, r3.y, c14.x +add.f r1.z, r1.z, c19.x +mad.f32 r1.w, c16.x, r1.w, r3.y +add.f r3.x, r3.x, c14.z +add.f r2.y, r2.y, (neg)r4.x +floor.f r4.x, r1.z add.f r1.w, r1.w, c19.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y +mad.f32 r2.z, c18.w, r2.z, r3.x +mad.f32 r2.y, c19.y, r2.y, c19.z +add.f r1.z, r1.z, (neg)r4.x +floor.f r4.x, r1.w add.f r2.z, r2.z, c19.x -mad.f32 r3.y, c13.y, r1.y, r3.y -add.f r1.z, r1.z, c19.x -floor.f r3.z, r1.w -floor.f r3.w, r2.z -mov.f32f32 r3.y, r3.y -floor.f r4.z, r1.z -add.f r1.w, r1.w, (neg)r3.z -add.f r2.z, r2.z, (neg)r3.w -mad.f32 r2.y, (neg)c4.y, r3.y, r2.y -add.f r1.z, r1.z, (neg)r4.z -mad.f32 r1.w, c19.y, r1.w, c19.z -mad.f32 r2.z, c19.y, r2.z, c19.z -mov.f32f32 r2.y, r2.y +absneg.f r2.y, (abs)r2.y mad.f32 r1.z, c19.y, r1.z, c19.z -absneg.f r1.w, (abs)r1.w -absneg.f r2.z, (abs)r2.z -mul.f r0.w, c11.z, r0.w +add.f r4.x, r1.w, (neg)r4.x +floor.f r4.y, r2.z +mul.f r1.w, r2.y, r2.y absneg.f r1.z, (abs)r1.z -mul.f r3.y, c19.y, r1.w -mul.f r3.z, c19.y, r2.z -mul.f r1.w, r1.w, r1.w -mul.f r3.w, c19.y, r1.z -add.f r3.y, c19.w, (neg)r3.y -add.f r3.z, c19.w, (neg)r3.z -mul.f r2.z, r2.z, r2.z -add.f r3.w, c19.w, (neg)r3.w +mad.f32 r2.y, c19.y, r4.x, c19.z +add.f r2.z, r2.z, (neg)r4.y +mad.f32 r2.w, c13.x, r1.y, r2.w +mul.f r4.x, c19.y, r1.z +absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c19.y, r2.z, c19.z mul.f r1.z, r1.z, r1.z -mul.f r1.w, r1.w, r3.y -mul.f r2.z, r2.z, r3.z +add.f r4.x, c19.w, (neg)r4.x +mul.f r4.y, c19.y, r2.y +absneg.f r2.z, (abs)r2.z +mul.f r2.y, r2.y, r2.y +mul.f r1.z, r1.z, r4.x +mul.f r4.x, r0.y, c21.x +add.f r4.y, c19.w, (neg)r4.y +mul.f r4.z, c19.y, r2.z +mul.f r2.z, r2.z, r2.z +max.f r4.x, r4.x, c18.y +mul.f r2.y, r2.y, r4.y +mul.f r4.y, r0.x, r0.z +add.f r4.z, c19.w, (neg)r4.z +min.f r4.x, r4.x, c22.y +mul.f r4.w, r0.y, c20.x +mul.f r2.w, r2.w, (neg)c4.x +mul.f r5.x, c11.y, r0.w +min.f r4.x, r4.x, c18.w +mul.f r4.y, r4.y, r4.w +mul.f r2.z, r2.z, r4.z +mad.f32 r4.z, c12.y, r1.x, r5.x +mul.f r1.z, r1.z, r4.x +max.f r4.x, r4.y, c18.y +mad.f32 r4.y, c13.y, r1.y, r4.z +mov.f32f32 r2.x, r2.x +mov.f32f32 r4.z, r1.z +min.f r4.x, r4.x, c22.y +mad.f32 r2.w, (neg)c4.y, r4.y, r2.w +mul.f r0.w, c11.z, r0.w +max.f r2.x, r2.x, c20.z +min.f r4.x, r4.x, c20.y mad.f32 r0.w, c12.z, r1.x, r0.w -mul.f r1.x, r1.z, r3.w -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.z -mul.f r2.z, r0.x, r0.z -mov.f32f32 r1.x, r1.x -mul.f r3.y, r0.y, c21.x -mul.f r3.z, r0.y, c20.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.w, c18.y -mov.f32f32 r3.y, r3.y -mul.f r2.z, r2.z, r3.z -mad.f32 r0.w, c13.z, r1.y, r0.w -add.f r1.y, r3.w, c19.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -floor.f r3.z, r1.y -max.f r3.y, r3.y, c18.y -mov.f32f32 r2.z, r2.z -mad.f32 r0.w, (neg)c4.z, r0.w, r2.y -add.f r1.y, r1.y, (neg)r3.z -min.f r2.y, r3.y, c22.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -mad.f32 r1.y, c19.y, r1.y, c19.z -min.f r2.y, r2.y, c18.w -max.f r2.z, r2.z, c18.y -max.f r0.w, c18.y, r0.w -absneg.f r1.y, (abs)r1.y -mov.f32f32 r2.y, r2.y -min.f r2.z, r2.z, c22.y -mov.f32f32 r0.w, r0.w -mul.f r3.y, r1.y, r1.y -mul.f r1.x, r1.x, r2.y -min.f r1.y, r2.z, c20.y -mul.f r2.y, r0.w, c5.z -mul.f r2.z, r0.w, c5.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mul.f r0.w, r0.w, c5.x -mul.f r3.z, r1.z, r1.y -mul.f r3.w, r1.w, r1.y -max.f r1.w, r2.x, c20.z -mov.f32f32 r1.z, r2.y -mov.f32f32 r2.x, r3.z -mov.f32f32 r1.y, r2.z -mad.f32 r2.y, c17.x, r1.x, r2.x -mad.f32 r1.x, c17.y, r1.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r1.x -min.f r2.z, r1.w, c20.w -mov.f32f32 r1.x, r0.w -mov.f32f32 r1.w, r3.y -mul.f r0.w, c11.y, r0.x -mov.f32f32 r2.z, r2.z -mad.f32 r0.w, c12.y, r0.y, r0.w +mul.f r1.x, c11.y, r0.x mul.f r0.x, c11.w, r0.x -mov.f32f32 r3.z, r4.y -mad.f32 r3.x, r3.w, r2.z, r3.x -mad.f32 r2.w, r3.w, r2.z, r2.w -mad.f32 r0.w, c13.y, r0.z, r0.w +mov.f32f32 r4.y, r4.x +mul.f r2.z, r2.z, r4.x +min.f r2.x, r2.x, c20.w +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, r2.y, r4.y +mad.f32 r1.x, c12.y, r0.y, r1.x mad.f32 r0.x, c12.w, r0.y, r0.x -add.f r0.y, r3.x, r2.x -add.f r2.x, r2.w, r2.y -add.f r0.w, r0.w, c14.y -nop -mov.f32f32 r3.x, r0.y -mov.f32f32 r3.y, r2.x -mad.f32 r0.y, r3.w, r2.z, r0.w +mad.f32 r0.y, c13.y, r0.z, r1.x +mov.f32f32 r1.x, r1.y +mad.f32 r1.y, c17.y, r1.z, r1.y +mad.f32 r1.x, c17.x, r4.z, r1.x +mov.f32f32 r1.z, r2.x +mad.f32 r2.x, r2.z, r2.x, r3.x +add.f r0.y, r0.y, c14.y +mad.f32 r0.w, (neg)c4.z, r0.w, r2.w +mad.f32 r2.y, r2.z, r1.z, r3.y +add.f r1.y, r2.x, r1.y +mad.f32 r0.y, r2.z, r1.z, r0.y +max.f r4.x, c18.y, r0.w +add.f r0.w, r2.y, r1.x +mov.f32f32 r1.x, r1.y mad.f32 r0.x, c13.w, r0.z, r0.x -mul.f r0.z, c7.w, r3.x -mul.f r0.w, c7.z, r3.x -mad.f32 r0.z, c8.w, r0.y, r0.z -mad.f32 r0.w, c8.z, r0.y, r0.w -mul.f r2.x, c7.y, r3.x -mul.f r2.y, c7.x, r3.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c9.w, r3.y, r0.z -add.f r0.x, r0.x, c14.w -mad.f32 r0.w, c9.z, r3.y, r0.w -mad.f32 r2.x, c8.y, r0.y, r2.x -mad.f32 r2.y, c8.x, r0.y, r2.y -mad.f32 r0.z, c10.w, r0.x, r0.z -mad.f32 r0.w, c10.z, r0.x, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c9.y, r3.y, r2.x -mad.f32 r2.y, c9.x, r3.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c10.y, r0.x, r2.x -mad.f32 r2.y, c10.x, r0.x, r2.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r2.z, r0.w -mov.f32f32 r0.z, r2.x -mov.f32f32 r0.w, r2.y -mul.f r3.w, c0.w, r3.x -mul.f r4.y, c0.z, r3.x -mul.f r0.z, r0.z, c15.y -mul.f r0.w, r0.w, c15.x -(rpt1)nop -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.w -mad.f32 r0.z, c1.w, r0.y, r3.w -mad.f32 r0.w, c1.z, r0.y, r4.y -mad.f32 r0.z, c2.w, r3.y, r0.z -mad.f32 r0.w, c2.z, r3.y, r0.w -mad.f32 r0.z, c3.w, r0.x, r0.z -mad.f32 r3.w, c3.z, r0.x, r0.w -mul.f r4.y, c0.y, r3.x -mul.f r4.z, c0.x, r3.x -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.w -mad.f32 r3.w, c1.y, r0.y, r4.y -mad.f32 r0.y, c1.x, r0.y, r4.z -mad.f32 r3.w, c2.y, r3.y, r3.w -mad.f32 r0.y, c2.x, r3.y, r0.y -mad.f32 r3.w, c3.y, r0.x, r3.w -mad.f32 r0.x, c3.x, r0.x, r0.y -(rpt1)nop -mov.f32f32 r0.y, r3.w -mov.f32f32 r0.x, r0.x -mad.f32 r3.y, c6.x, r3.y, c6.y -mad.f32 r3.x, c6.x, r3.x, c6.y -(rpt1)nop -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.z -mov.f32f32 r3.z, r4.x -(rpt2)nop -mov.f32f32 r3.z, r3.z +nop +mov.f32f32 r1.z, r0.w +mul.f r3.x, c0.x, r0.w +mad.f32 r3.y, c6.x, r1.x, c6.y +mov.f32f32 r4.y, r4.x +mul.f r0.z, c7.y, r1.z +mul.f r0.w, c7.x, r1.z +mad.f32 r0.z, c8.y, r0.y, r0.z +mad.f32 r0.w, c8.x, r0.y, r0.w +mad.f32 r0.z, c9.y, r1.x, r0.z +add.f r4.z, r0.x, c14.w +mad.f32 r0.x, c9.x, r1.x, r0.w +mul.f r0.w, c7.w, r1.z +mul.f r2.x, c7.z, r1.z +mad.f32 r0.z, c10.y, r4.z, r0.z +mad.f32 r0.x, c10.x, r4.z, r0.x +mad.f32 r0.w, c8.w, r0.y, r0.w +mad.f32 r2.z, c8.z, r0.y, r2.x +mul.f r2.y, r0.z, c15.y +mul.f r2.x, r0.x, c15.x +mad.f32 r0.x, c9.w, r1.x, r0.w +mad.f32 r0.z, c9.z, r1.x, r2.z +mad.f32 r2.w, c10.w, r4.z, r0.x +mad.f32 r2.z, c10.z, r4.z, r0.z +mul.f r0.x, c0.w, r1.z +mul.f r0.z, c0.z, r1.z +mad.f32 r0.x, c1.w, r0.y, r0.x +mad.f32 r0.z, c1.z, r0.y, r0.z +mad.f32 r0.x, c2.w, r1.x, r0.x +mad.f32 r0.z, c2.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r4.z, r0.x +mad.f32 r0.z, c3.z, r4.z, r0.z +mul.f r0.x, c0.y, r1.z +mad.f32 r3.x, c1.x, r0.y, r3.x +mad.f32 r0.x, c1.y, r0.y, r0.x +mad.f32 r0.y, c2.x, r1.y, r3.x +mad.f32 r1.x, c2.y, r1.x, r0.x +mad.f32 r0.x, c3.x, r4.z, r0.y +mad.f32 r0.y, c3.y, r4.z, r1.x +mad.f32 r3.x, c6.x, r1.z, c6.y +mul.f r1.z, r4.y, c5.z +mul.f r1.y, r4.y, c5.y +mul.f r1.x, r4.x, c5.x end ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0) -; VERT: 221 instructions, 0 half, 5 full +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 152 instructions, 0 half, 6 full diff --git a/reference/0ad-alpine-valley/0ad-99.asm b/reference/0ad-alpine-valley/0ad-99.asm index c6a3a6f..9c8ac11 100644 --- a/reference/0ad-alpine-valley/0ad-99.asm +++ b/reference/0ad-alpine-valley/0ad-99.asm @@ -24,115 +24,81 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r1.z, c11.x, r0.w mul.f r1.w, c11.x, r0.x mad.f32 r1.z, c12.x, r1.x, r1.z mad.f32 r1.w, c12.x, r0.y, r1.w -mul.f r2.x, c11.z, r0.x -mad.f32 r1.w, c13.x, r0.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.x, c12.z, r0.y, r2.x mad.f32 r1.z, c13.x, r1.y, r1.z -add.f r1.w, r1.w, c14.x -mad.f32 r2.x, c13.z, r0.z, r2.x -mul.f r2.y, c11.y, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, c7.w, r1.w -mul.f r2.w, c7.z, r1.w -mul.f r3.x, c7.y, r1.w +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x mul.f r1.z, r1.z, (neg)c4.x -mad.f32 r2.y, c12.y, r1.x, r2.y -mul.f r3.y, c11.y, r0.x -mul.f r4.x, c7.x, r1.w -mad.f32 r3.y, c12.y, r0.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.y, r0.z, r3.y -mad.f32 r2.y, c13.y, r1.y, r2.y -mul.f r4.y, c0.w, r1.w -mul.f r4.z, c0.z, r1.w -mul.f r4.w, c0.y, r1.w -mov.f32f32 r2.y, r2.y -add.f r3.y, r3.y, c14.y -mul.f r5.x, c0.x, r1.w -add.f r5.y, r2.x, c14.z -mad.f32 r1.z, (neg)c4.y, r2.y, r1.z -mad.f32 r2.x, c8.w, r3.y, r2.z -mad.f32 r2.y, c8.z, r3.y, r2.w -mad.f32 r2.z, c8.y, r3.y, r3.x -mov.f32f32 r1.z, r1.z +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.x, c9.w, r5.y, r2.x +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r5.y, r2.y -mad.f32 r2.y, c9.y, r5.y, r2.z -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.y, r4.x -mad.f32 r0.w, c1.w, r3.y, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r5.y, r0.z -mad.f32 r0.w, c2.w, r5.y, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r1.z -mad.f32 r1.y, c10.w, r0.x, r2.x -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r1.z, c10.y, r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r2.w, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c1.z, r3.y, r4.z -mul.f r1.x, r0.y, c5.z -mul.f r1.y, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, c2.z, r5.y, r3.x -mad.f32 r0.z, c1.y, r3.y, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r3.x, c2.y, r5.y, r0.z -mad.f32 r3.y, c1.x, r3.y, r5.x -mad.f32 r4.x, c6.x, r5.y, c6.y -mov.f32f32 r0.z, r0.y -mad.f32 r0.y, c3.y, r0.x, r3.x -mad.f32 r3.x, c2.x, r5.y, r3.y -mov.f32f32 r3.y, r4.x -mad.f32 r1.w, c6.x, r1.w, c6.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c3.x, r0.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r3.z -mov.f32f32 r4.x, (0.000000) -mov.f32f32 r3.w, r3.w -nop -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.w, r4.x +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop ; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 6 full +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm index 4829aaa..ca5a064 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm @@ -2,49 +2,40 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r2.x) out0 -@out(r2.y) out1 -@out(r2.z) out2 -@out(r2.w) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c3.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.z, c2.x -bary.f r0.w, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -max.f r0.z, r0.z, c3.x -mov.f32f32 r1.x, r0.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r0.x -min.f r0.z, r0.z, c3.y -mov.f32f32 r1.z, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.y -add.f r1.w, c3.y, (neg)r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r1.y -mov.f32f32 r2.z, r0.y +bary.f r1.w, 0, r0.x +bary.f (ei)r2.x, 1, r0.x +mov.f32f32 r1.z, (0.000000) +max.f r0.x, r0.z, c3.x +mov.f32f32 r1.x, r1.w +mov.f32f32 r1.y, r2.x +nop +min.f r0.x, r0.x, c3.y +sam (f32)(w)r1.w, r1.w, s#0, t#0 +(rpt2)nop +add.f r0.y, c3.y, (neg)r0.x +sam (f32)(w)r2.w, r1.x, s#1, t#1 +(sy)mul.f r0.x, r0.x, r2.z (rpt1)nop -sam (f32)(w)r3.x, r0.w, s#1, t#1 -(sy)mul.f r0.y, r1.w, r3.w -mov.f32f32 r1.w, r0.x -(rpt5)nop -sam (f32)(w)r0.w, r1.z, s#0, t#0 -(sy)mul.f r0.x, r0.z, r1.z +mul.f r0.y, r0.y, r3.z (rpt2)nop -add.f r0.x, r0.x, r0.y +(ss)add.f r1.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.y, r0.x -mov.f32f32 r2.x, r0.x -(rpt1)nop -mov.f32f32 r0.x, r0.y +mov.f32f32 r0.x, r1.x +(rpt2)nop +mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r2.w, r0.x +mov.f32f32 r1.w, r0.x end nop nop nop -; FRAG: outputs: r2.x (1:0) +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 46 instructions, 0 half, 4 full -; pos (bary): r0.x -; color: r2.x +; FRAG: 36 instructions, 0 half, 4 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm index f2dc1b6..5ad7d4d 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm @@ -3,38 +3,22 @@ @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@out(r0.w) out0 -@out(r1.x) out1 -@out(r1.y) out2 -@out(r1.z) out3 -@out(r1.w) out4 -@out(r2.x) out5 -@out(r2.y) out6 -@out(r2.z) out7 -(sy)(ss)mov.f32f32 r1.y, (0.000000) +@in(r1.x) in4 +@in(r1.y) in5 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +(sy)(ss)mov.f32f32 r1.w, (0.000000) mov.f32f32 r1.z, (0.000000) -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r1.y -mov.f32f32 r2.y, r1.z -mov.f32f32 r2.x, r1.x -mov.f32f32 r1.w, r0.w mov.f32f32 r0.w, c0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.w, r0.x end -nop -nop -nop -; VERT: outputs: r0.w (0:0) r1.w (5:9) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0) -; VERT: 17 instructions, 0 half, 3 full -; pos: r0.w +; VERT: outputs: r0.x (0:0) r1.x (5:9) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) +; VERT: 4 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm index ab7ad75..809d0f3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm @@ -1,36 +1,23 @@ ; options: ; FRAG: new compiler -@out(r0.x) out0 -@out(r0.y) out1 -@out(r0.z) out2 -@out(r0.w) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c9.x -mov.f32f32 r0.y, c9.w -mov.f32f32 r0.z, c9.y -mov.f32f32 r0.w, c4.z -mov.f32f32 r1.x, r0.x -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r1.w, c4.y -mov.f32f32 r2.x, c4.x +mov.f32f32 r0.y, c9.y +mov.f32f32 r0.z, c9.w +mov.f32f32 r1.z, c4.z +mov.f32f32 r1.y, c4.y +mov.f32f32 r1.x, c4.x (rpt2)nop -sam.p (f32)(w)r2.y, r1.x, s#0, t#0 -(sy)(ss)mul.f r1.x, r3.x, c4.w -mov.f32f32 r0.y, r1.w -mov.f32f32 r0.x, r2.x -nop -mov.f32f32 r1.x, r1.x -(rpt2)nop -mov.f32f32 r1.x, r1.x -(rpt2)nop -mov.f32f32 r0.w, r1.x +sam.p (f32)(w)r0.x, r0.x, s#0, t#0 +(sy)mul.f r1.w, r0.w, c4.w end nop nop -; FRAG: outputs: r0.x (1:0) +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 28 instructions, 0 half, 4 full -; pos (bary): r0.x -; color: r0.x +; FRAG: 12 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm index 7fd0d8e..85a697c 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm @@ -2,48 +2,38 @@ ; FRAG: new compiler @in(r0.z) in2 @in(r0.w) in3 -@out(r1.x) out0 -@out(r1.y) out1 -@out(r1.z) out2 -@out(r1.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r1.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z add.f r0.y, c5.y, (neg)r0.x add.f r0.z, c5.y, (neg)r0.x add.f r0.w, c5.y, (neg)r0.x -mul.f r2.x, r0.x, c0.z +mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r2.y, r0.x, c0.y -add.f r0.y, r2.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r1.z, r0.y -add.f r0.y, r2.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end -; FRAG: outputs: r1.x (1:0) +nop +nop +nop +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full -; pos (bary): r1.x -; color: r1.x -; fragcoord: r0.x +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm index 7fd0d8e..85a697c 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm @@ -2,48 +2,38 @@ ; FRAG: new compiler @in(r0.z) in2 @in(r0.w) in3 -@out(r1.x) out0 -@out(r1.y) out1 -@out(r1.z) out2 -@out(r1.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r1.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z add.f r0.y, c5.y, (neg)r0.x add.f r0.z, c5.y, (neg)r0.x add.f r0.w, c5.y, (neg)r0.x -mul.f r2.x, r0.x, c0.z +mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r2.y, r0.x, c0.y -add.f r0.y, r2.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r1.z, r0.y -add.f r0.y, r2.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end -; FRAG: outputs: r1.x (1:0) +nop +nop +nop +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full -; pos (bary): r1.x -; color: r1.x -; fragcoord: r0.x +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm index 7fd0d8e..85a697c 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm @@ -2,48 +2,38 @@ ; FRAG: new compiler @in(r0.z) in2 @in(r0.w) in3 -@out(r1.x) out0 -@out(r1.y) out1 -@out(r1.z) out2 -@out(r1.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r1.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z add.f r0.y, c5.y, (neg)r0.x add.f r0.z, c5.y, (neg)r0.x add.f r0.w, c5.y, (neg)r0.x -mul.f r2.x, r0.x, c0.z +mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r2.y, r0.x, c0.y -add.f r0.y, r2.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r1.z, r0.y -add.f r0.y, r2.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end -; FRAG: outputs: r1.x (1:0) +nop +nop +nop +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full -; pos (bary): r1.x -; color: r1.x -; fragcoord: r0.x +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm index 7fd0d8e..85a697c 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm @@ -2,48 +2,38 @@ ; FRAG: new compiler @in(r0.z) in2 @in(r0.w) in3 -@out(r1.x) out0 -@out(r1.y) out1 -@out(r1.z) out2 -@out(r1.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r1.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z add.f r0.y, c5.y, (neg)r0.x add.f r0.z, c5.y, (neg)r0.x add.f r0.w, c5.y, (neg)r0.x -mul.f r2.x, r0.x, c0.z +mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r2.y, r0.x, c0.y -add.f r0.y, r2.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r1.z, r0.y -add.f r0.y, r2.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end -; FRAG: outputs: r1.x (1:0) +nop +nop +nop +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full -; pos (bary): r1.x -; color: r1.x -; fragcoord: r0.x +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm index 7eb689b..13ea129 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm @@ -2,29 +2,20 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r0.w) out0 -@out(r1.x) out1 -@out(r1.y) out2 -@out(r1.z) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.w, r0.x +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 end nop -; FRAG: outputs: r0.w (1:0) +nop +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 22 instructions, 0 half, 2 full -; pos (bary): r0.x -; color: r0.w +; FRAG: 10 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm index 7eb689b..13ea129 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm @@ -2,29 +2,20 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r0.w) out0 -@out(r1.x) out1 -@out(r1.y) out2 -@out(r1.z) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.w, r0.x +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 end nop -; FRAG: outputs: r0.w (1:0) +nop +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 22 instructions, 0 half, 2 full -; pos (bary): r0.x -; color: r0.w +; FRAG: 10 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm index 9e5b9a0..7495bc9 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm @@ -2,37 +2,28 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r0.w) out0 -@out(r1.x) out1 -@out(r1.y) out2 -@out(r1.z) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c2.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)add.f r0.w, r0.w, c0.w -(ss)add.f r0.z, r0.z, c0.z +sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 +(sy)(ss)add.f r0.w, r0.w, c0.w +add.f r0.z, r0.z, c0.z add.f r0.y, r0.y, c0.y add.f r0.x, r0.x, c0.x -mul.f r0.w, r0.w, c1.w -mul.f r0.z, r0.z, c1.z -mul.f r0.y, r0.y, c1.y -mul.f r0.x, r0.x, c1.x -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.w, r0.x +mul.f r1.w, r0.w, c1.w +mul.f r1.z, r0.z, c1.z +mul.f r1.y, r0.y, c1.y +mul.f r1.x, r0.x, c1.x end nop -; FRAG: outputs: r0.w (1:0) +nop +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 30 instructions, 0 half, 2 full -; pos (bary): r0.x -; color: r0.w +; FRAG: 18 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm index 7eb689b..13ea129 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm @@ -2,29 +2,20 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r0.w) out0 -@out(r1.x) out1 -@out(r1.y) out2 -@out(r1.z) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.w, r0.x +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 end nop -; FRAG: outputs: r0.w (1:0) +nop +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 22 instructions, 0 half, 2 full -; pos (bary): r0.x -; color: r0.w +; FRAG: 10 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm index 51a801a..a965927 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm @@ -6,33 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, c0.x -mov.f32f32 r0.w, c0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.x, c0.x -(rpt4)nop -sam (f32)(w)r2.x, r0.y, s#0, t#0 -(sy)(ss)add.f r0.y, c0.y, (neg)r2.w -mov.f32f32 r1.x, r0.x -(rpt1)nop -mov.f32f32 r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r1.z, c0.x +mov.f32f32 r1.y, c0.x +mov.f32f32 r1.x, c0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.w, r0.x +sam (f32)(w)r0.x, r0.z, s#0, t#0 +(sy)add.f r1.w, c0.y, (neg)r0.w end nop nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 31 instructions, 0 half, 3 full -; pos (bary): r0.x -; color: r1.x +; FRAG: 11 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm index 4c2f276..1171e2e 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm @@ -6,21 +6,16 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 1, r0.x -bary.f (ei)r0.x, 0, r0.x -mov.f32f32 r0.y, c0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +(sy)(ss)mov.f32f32 r1.w, c0.x +bary.f r1.z, 2, r0.x +bary.f r1.y, 1, r0.x +bary.f (ei)r1.x, 0, r0.x end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 12 instructions, 0 half, 2 full -; pos (bary): r0.x -; color: r1.x +; FRAG: 5 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm index 6d04ab5..159d8e0 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm @@ -1,44 +1,23 @@ ; options: ; FRAG: new compiler -@out(r1.w) out0 -@out(r2.x) out1 -@out(r2.y) out2 -@out(r2.z) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c9.x -mov.f32f32 r0.y, c4.w +mov.f32f32 r0.y, c9.y mov.f32f32 r0.z, c9.w -mov.f32f32 r0.w, c9.y -mov.f32f32 r1.x, r0.x -mov.f32f32 r2.z, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -(rpt5)nop -sam.p (f32)(xyzw)r0.x, r1.x, s#0, t#0 -(sy)mul.f r0.z, r0.z, c4.z -mul.f r0.y, r0.y, c4.y -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.y -mov.f32f32 r1.w, r0.x +mov.f32f32 r1.w, c4.w +(rpt4)nop +sam.p (f32)(xyzw)r0.x, r0.x, s#0, t#0 +(sy)mul.f r1.z, r0.z, c4.z +mul.f r1.y, r0.y, c4.y +mul.f r1.x, r0.x, c4.x end nop nop -; FRAG: outputs: r1.w (1:0) +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 35 instructions, 0 half, 3 full -; pos (bary): r0.x -; color: r1.w +; FRAG: 14 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm index 5de51e6..72bfca4 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm @@ -2,53 +2,44 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r4.x) out0 -@out(r4.y) out1 -@out(r4.z) out2 -@out(r4.w) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 8, r0.x -bary.f r1.x, 9, r0.x -bary.f r1.y, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 8, r0.x +bary.f r1.y, 9, r0.x mov.f32f32 r1.z, r0.z -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r1.w, r1.y -mov.f32f32 r2.y, r1.y -mov.f32f32 r0.w, r1.x -bary.f r1.x, 3, r0.x -bary.f r1.y, 2, r0.x -bary.f r2.z, 1, r0.x +mov.f32f32 r1.w, r0.w +bary.f r2.x, 3, r0.x +bary.f r2.y, 2, r0.x +sam (f32)(xyz)r2.z, r0.z, s#1, t#1 +(sy)(ss)add.f r0.z, c0.y, (neg)r3.x +bary.f r0.w, 1, r0.x +add.f r3.y, c0.y, (neg)r2.w +add.f r3.z, c0.y, (neg)r2.z +sam (f32)(xyzw)r3.w, r1.z, s#0, t#0 +(sy)mul.f r0.z, r0.z, r4.y +(ss)mul.f r1.z, r3.x, r2.y +mul.f r1.w, r3.y, r4.x +mul.f r0.w, r2.w, r0.w +mul.f r2.y, r3.z, r3.w +add.f r0.z, r1.z, r0.z +sam (f32)(w)r2.w, r1.x, s#2, t#2 bary.f (ei)r0.x, 0, r0.x -sam (f32)(xyz)r2.w, r1.z, s#1, t#1 -(sy)add.f r0.y, c0.y, (neg)r3.y -(ss)nop -sam (f32)(xyzw)r1.z, r2.x, s#0, t#0 -add.f r3.z, c0.y, (neg)r3.x -add.f r3.w, c0.y, (neg)r2.w -(sy)mul.f r4.w, r1.x, r2.y -mul.f r0.y, r0.y, r2.x -mul.f r1.x, r3.y, r1.y -mul.f r1.y, r3.z, r1.w -mul.f r1.z, r3.w, r1.z -mul.f r1.w, r3.x, r2.z -add.f r0.y, r1.x, r0.y -sam (f32)(w)r3.x, r0.z, s#2, t#2 -mul.f r0.x, r2.w, r0.x +add.f r0.y, r0.w, r1.w +mul.f r1.w, r2.x, r4.z +(sy)mul.f r1.z, r0.z, r3.z +mul.f r0.x, r2.z, r0.x +(ss)mul.f r1.y, r0.y, r3.z (rpt1)nop -(sy)mul.f r4.z, r0.y, r3.w -add.f r0.y, r1.w, r1.y -add.f r0.x, r0.x, r1.z -(rpt1)nop -mul.f r4.y, r0.y, r3.w -mul.f r4.x, r0.x, r3.w +add.f r0.x, r0.x, r2.y +(rpt2)nop +mul.f r1.x, r0.x, r3.z end -nop -nop -nop -; FRAG: outputs: r4.x (1:0) -; FRAG: inputs: r0.y (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r2.x (5:1,cm=f,il=16,b=1) -; FRAG: 39 instructions, 0 half, 5 full -; pos (bary): r0.x -; color: r4.x +; FRAG: outputs: r1.x (1:0) +; FRAG: inputs: r1.y (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r2.x (5:1,cm=f,il=16,b=1) +; FRAG: 35 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm index 948d48e..270bdcf 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in3 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r4.w) in3 @in(r1.x) in4 @in(r1.y) in5 @in(r1.z) in6 @@ -12,72 +12,56 @@ @in(r2.y) in9 @in(r2.z) in10 @in(r2.w) in11 -@out(r3.w) out0 -@out(r4.x) out1 -@out(r4.y) out2 -@out(r4.z) out3 -@out(r4.w) out4 -@out(r5.x) out5 -@out(r5.y) out6 -@out(r5.z) out7 -@out(r2.w) out8 -@out(r3.x) out9 -@out(r3.y) out10 -@out(r3.z) out11 -@out(r0.w) out12 -@out(r1.x) out13 -@out(r1.y) out14 -@out(r1.z) out15 -(sy)(ss)mul.f r3.x, c3.x, r0.x -mul.f r3.y, c2.x, r0.x -mad.f32 r3.x, c3.y, r0.y, r3.x -mad.f32 r3.y, c2.y, r0.y, r3.y -mul.f r3.z, c1.x, r0.x -mul.f r3.w, c0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r3.x, c3.z, r0.z, r3.x -mad.f32 r3.y, c2.z, r0.z, r3.y -mad.f32 r3.z, c1.y, r0.y, r3.z -mad.f32 r0.y, c0.y, r0.y, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r4.z, c3.w, r0.w, r3.x -mad.f32 r4.y, c2.w, r0.w, r3.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.y, r0.y -mad.f32 r3.x, c1.z, r0.z, r3.x -mad.f32 r0.y, c0.z, r0.z, r0.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -mad.f32 r4.x, c1.w, r0.w, r3.x -mad.f32 r3.w, c0.w, r0.w, r0.y -max.f r0.y, r1.w, c5.x -max.f r0.w, r1.z, c5.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -min.f r5.z, r0.y, c5.y -min.f r5.y, r0.w, c5.y -max.f r0.y, r1.y, c5.x -max.f r0.w, r1.x, c5.x -(rpt1)nop -min.f r5.x, r0.y, c5.y -min.f r4.w, r0.w, c5.y -mad.f32 r1.z, c4.x, r0.z, c4.y -mad.f32 r1.y, c4.x, r0.z, c4.y -mad.f32 r1.x, c4.x, r0.z, c4.y -mad.f32 r0.w, c4.x, r0.x, c4.y -mov.f32f32 r3.z, r2.w -mov.f32f32 r3.y, r2.z -mov.f32f32 r3.x, r2.y -mov.f32f32 r2.w, r2.x +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.x, c3.x, r4.x +mul.f r0.y, c2.x, r4.x +mad.f32 r0.x, c3.y, r4.y, r0.x +mad.f32 r0.y, c2.y, r4.y, r0.y +mad.f32 r0.x, c3.z, r4.z, r0.x +mad.f32 r0.y, c2.z, r4.z, r0.y +mad.f32 r0.w, c3.w, r4.w, r0.x +mad.f32 r0.z, c2.w, r4.w, r0.y +mul.f r0.x, c1.x, r4.x +mul.f r0.y, c0.x, r4.x +mad.f32 r0.x, c1.y, r4.y, r0.x +mad.f32 r0.y, c0.y, r4.y, r0.y +mad.f32 r0.x, c1.z, r4.z, r0.x +mad.f32 r3.x, c0.z, r4.z, r0.y +mad.f32 r0.y, c1.w, r4.w, r0.x +mad.f32 r0.x, c0.w, r4.w, r3.x +max.f r1.w, r1.w, c5.x +max.f r1.z, r1.z, c5.x +max.f r1.y, r1.y, c5.x +max.f r1.x, r1.x, c5.x +min.f r1.w, r1.w, c5.y +min.f r1.z, r1.z, c5.y +min.f r1.y, r1.y, c5.y +min.f r1.x, r1.x, c5.y +mad.f32 r3.w, c4.x, r4.z, c4.y +mad.f32 r3.z, c4.x, r4.z, c4.y +mad.f32 r3.y, c4.x, r4.z, c4.y +mad.f32 r3.x, c4.x, r4.x, c4.y end nop nop -; VERT: outputs: r3.w (0:0) r4.w (1:0) r2.w (5:0) r0.w (5:1) -; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) -; VERT: 47 instructions, 0 half, 6 full -; pos: r3.w +nop +; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1) +; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 29 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm index eedb778..284e180 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm @@ -1,55 +1,51 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in3 -@in(r1.x) in4 -@in(r1.y) in5 -@in(r1.z) in6 -@in(r1.w) in7 -@out(r2.w) out0 -@out(r3.x) out1 -@out(r3.y) out2 -@out(r3.z) out3 -@out(r1.w) out4 -@out(r2.x) out5 -@out(r2.y) out6 -@out(r2.z) out7 -(sy)(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -max.f r1.w, r1.w, c4.x -max.f r1.z, r1.z, c4.x -max.f r1.y, r1.y, c4.x -max.f r1.x, r1.x, c4.x -min.f r2.z, r1.w, c4.y -min.f r2.y, r1.z, c4.y -min.f r2.x, r1.y, c4.y +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x min.f r1.w, r1.x, c4.y -mul.f r1.x, r0.x, c0.w -mul.f r1.y, r0.x, c0.z -mad.f32 r1.x, c1.w, r0.y, r1.x -mad.f32 r1.y, c1.z, r0.y, r1.y -mad.f32 r1.x, c2.w, r0.z, r1.x -mad.f32 r1.y, c2.z, r0.z, r1.y -mad.f32 r3.z, c3.w, r0.w, r1.x -mad.f32 r3.y, c3.z, r0.w, r1.y -mul.f r1.x, r0.x, c0.y -mul.f r0.x, r0.x, c0.x -mad.f32 r1.x, c1.y, r0.y, r1.x -mad.f32 r0.x, c1.x, r0.y, r0.x -mad.f32 r0.y, c2.y, r0.z, r1.x -mad.f32 r0.x, c2.x, r0.z, r0.x -mad.f32 r3.x, c3.y, r0.w, r0.y -mad.f32 r2.w, c3.x, r0.w, r0.x +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop -; VERT: outputs: r2.w (0:0) r1.w (1:0) -; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 4 full -; pos: r2.w +; VERT: outputs: r0.x (0:0) r1.x (1:0) +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm index 2639ca3..3db9988 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm @@ -2,45 +2,33 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r0.w) out0 -@out(r1.x) out1 -@out(r1.y) out2 -@out(r1.z) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3e99999a, 0x3f170a3d, 0x3de147ae, 0x00000000 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mul.f r0.x, r0.x, c0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c0.y, r0.y, r0.x -(rpt1)nop -mov.f32f32 r1.z, r0.w -mov.f32f32 r0.x, r0.x +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)mul.f r0.x, r1.x, c0.x nop -mad.f32 r0.x, c0.z, r0.z, r0.x +mad.f32 r0.x, c0.y, r1.y, r0.x +nop +mad.f32 r1.x, c0.z, r1.z, r0.x (rpt2)nop -(ss)mov.f32f32 r0.y, r0.x -mov.f32f32 r0.z, r0.x +mov.f32f32 r0.x, r1.x +mov.f32f32 r0.y, r1.x +(rpt1)nop mov.f32f32 r0.x, r0.x -nop mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop +(rpt1)nop +mov.f32f32 r1.z, r0.x mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -mov.f32f32 r0.w, r0.x end nop -; FRAG: outputs: r0.w (1:0) +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 41 instructions, 0 half, 2 full -; pos (bary): r0.x -; color: r0.w +; FRAG: 28 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm index a7cfae4..22ca830 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm @@ -2,37 +2,28 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -(rpt5)nop -sam (f32)(xyzw)r0.z, r0.z, s#0, t#0 -(sy)cmps.f.lt r0.x, r1.y, c0.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r1.w, r1.x -mov.f32f32 r1.z, r0.w -cov.u32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.z -cmps.f.ne p0.x, r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, (0.000000) +(rpt4)nop +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)cmps.f.lt r0.y, r1.w, c0.x +(rpt2)nop +cov.u32f32 r0.y, r0.y +(rpt2)nop +cmps.f.ne p0.x, r0.y, r0.x (rpt5)nop kill p0.x end nop nop -; FRAG: outputs: r1.y (1:0) +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 34 instructions, 0 half, 3 full -; pos (bary): r0.x -; color: r1.y +; FRAG: 26 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm index 7fd0d8e..85a697c 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm @@ -2,48 +2,38 @@ ; FRAG: new compiler @in(r0.z) in2 @in(r0.w) in3 -@out(r1.x) out0 -@out(r1.y) out1 -@out(r1.z) out2 -@out(r1.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r1.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z add.f r0.y, c5.y, (neg)r0.x add.f r0.z, c5.y, (neg)r0.x add.f r0.w, c5.y, (neg)r0.x -mul.f r2.x, r0.x, c0.z +mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r2.y, r0.x, c0.y -add.f r0.y, r2.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r1.z, r0.y -add.f r0.y, r2.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end -; FRAG: outputs: r1.x (1:0) +nop +nop +nop +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full -; pos (bary): r1.x -; color: r1.x -; fragcoord: r0.x +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm index c6e09ad..593f290 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm @@ -4,246 +4,167 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r5.y) out0 -@out(r5.z) out1 -@out(r5.w) out2 -@out(r6.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 8, r1.x -bary.f r0.y, 4, r1.x -add.f r0.w, r0.w, c12.y +add.f r0.y, r0.w, c12.y +bary.f r0.w, 4, r1.x bary.f r1.z, 9, r1.x add.f r1.w, r0.x, c13.x -mul.f r2.x, r0.y, r0.y -bary.f r2.y, 5, r1.x -add.f r2.z, r1.z, c13.x +bary.f r2.x, 12, r1.x +mul.f r2.y, r0.w, r0.w +bary.f r2.z, 5, r1.x floor.f r2.w, r1.w -rcp r0.w, r0.w +rcp r0.y, r0.y add.f r0.z, r0.z, c12.y -mad.f32 r2.x, r2.y, r2.y, r2.x -floor.f r3.x, r2.z +add.f r3.x, r1.z, c13.x +mad.f32 r2.y, r2.z, r2.z, r2.y add.f r1.w, r1.w, (neg)r2.w -(ss)mul.f r0.z, r0.z, r0.w -(ss)mov.f32f32 r0.w, r2.x -bary.f r2.x, 6, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.z, r0.z -absneg.f r2.w, (neg)c8.x -mad.f32 r0.w, r2.x, r2.x, r0.w -mul.f r3.y, c12.x, r1.w -add.f r2.z, r2.z, (neg)r3.x -mul.f r2.w, r2.w, c8.x -add.f r3.x, c13.y, (neg)r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mul.f r2.w, r2.w, r0.z -rsq r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -add.f r0.x, r0.x, (neg)r3.y -mul.f r3.y, c12.x, r2.z -mov.f32f32 r2.w, r2.w -mul.f r0.y, r0.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.y, r3.y -mul.f r0.z, r2.w, r0.z +(ss)mul.f r0.y, r0.z, r0.y +absneg.f r0.z, (neg)c8.x +bary.f r2.w, 6, r1.x +mov.f32f32 r3.y, r1.w +floor.f r3.z, r3.x +mul.f r0.z, r0.z, c8.x +mad.f32 r2.y, r2.w, r2.w, r2.y +mul.f r3.w, c12.x, r3.y +add.f r3.x, r3.x, (neg)r3.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -add.f r2.w, c13.x, r0.x -bary.f r3.z, 16, r1.x -mov.f32f32 r0.z, r0.z +add.f r0.x, r0.x, (neg)r3.w +mov.f32f32 r3.z, r3.x +rsq r2.y, r2.y +(ss)mov.f32f32 r3.w, r2.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +mul.f r4.x, c12.x, r3.z add.f r0.x, c13.z, r0.x -mov.f32f32 r2.w, r2.w -mul.f r0.y, r0.y, r3.z -mul.f r2.y, r2.y, r0.w -mul.f r0.z, r0.z, c14.x -mul.f r2.w, r2.w, c3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r2.w -bary.f r3.w, 17, r1.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.x, r3.z -add.f r1.z, r1.z, (neg)r3.y -mad.f32 r0.y, r2.y, r3.w, r0.y -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r0.w, r2.x, r0.w -add.f r2.x, c15.y, (neg)r0.z -add.f r3.y, c13.x, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.w, r0.w -mul.f r2.x, r2.x, c8.y -mov.f32f32 r3.y, r3.y -bary.f r3.z, 18, r1.x -mul.f r0.z, r0.z, c12.z -mov.f32f32 r4.w, r2.y -mul.f r2.y, r3.y, c3.w -mad.f32 r0.y, r0.w, r3.z, r0.y -add.f r0.z, r0.z, r2.x -add.f r0.w, c13.z, r1.z -mov.f32f32 r1.z, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.y, r1.z -bary.f r1.z, 10, r1.x -max.f r0.y, c12.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c3.w -add.f r1.z, r1.z, c12.w -mov.f32f32 r0.y, r0.y -max.f r0.z, r0.z, c12.y -mov.f32f32 r2.x, r0.w -mov.f32f32 r3.y, r1.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.z, r3.y -log2 r0.y, r0.y -(ss)mul.f r0.y, c9.x, r0.y -min.f r0.z, r0.z, c12.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r0.x -mov.f32f32 r5.w, r0.w -mov.f32f32 r0.x, r2.y -sam.s (f32)(x)r0.w, r4.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -add.f r2.y, c15.y, (neg)r0.z -add.f r2.w, c15.y, (neg)r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.x, r3.x -add.f r3.z, c13.y, (neg)r2.z -mul.f r2.y, r2.y, c7.z -mul.f r2.w, r2.w, c7.y -add.f r3.w, c15.y, (neg)r0.z -(ss)mov.f32f32 r4.x, r3.z +mul.f r0.w, r0.w, r3.w +add.f r0.z, c13.x, r0.z +mul.f r0.y, r0.y, c14.x +bary.f r4.y, 16, r1.x +add.f r1.z, r1.z, (neg)r4.x +mul.f r4.z, r0.z, c3.z +mul.f r5.y, r0.x, c3.z +mul.f r0.x, r0.w, r4.y +add.f r0.z, c13.z, r1.z +mov.f32f32 r6.x, r4.z +mov.f32f32 r0.w, r1.z exp2 r0.y, r0.y -mov.f32f32 r3.z, c6.y -mov.f32f32 r4.y, c6.x -mov.f32f32 r4.z, c6.z -mul.f r5.x, r3.x, r4.x -mul.f r5.y, r3.z, c10.y -mul.f r4.y, r4.y, c10.x -mul.f r6.y, r3.w, c7.x -mul.f r0.w, r5.x, r0.w -mov.f32f32 r3.z, r0.x -mov.f32f32 r0.x, r1.z -mul.f r4.z, r4.z, c10.z -(ss)mul.f r5.x, r5.y, r0.y -mul.f r4.y, r4.y, r0.y -mov.f32f32 r3.w, r0.x -mul.f r0.x, r4.z, r0.y -(ss)mov.f32f32 r0.y, r5.x -mov.f32f32 r4.y, r4.y -bary.f r4.z, 14, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r2.x -sam.s (f32)(x)r2.x, r3.y, s#2, t#2 -(sy)mov.f32f32 r2.x, r2.x -add.f r1.w, r1.w, c12.z -(ss)mov.f32f32 r3.y, r4.z -mov.f32f32 r3.z, r1.z -mov.f32f32 r1.z, r1.z -mul.f r3.w, r1.w, r4.x -mov.f32f32 r6.z, r3.y -bary.f r3.y, 15, r1.x -mov.f32f32 r5.y, r3.z -mad.f32 r0.w, r3.w, r2.x, r0.w -mov.f32f32 r6.x, r1.z -add.f r1.z, r2.z, c12.z -bary.f r2.x, 12, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r3.y -sam.s (f32)(x)r3.y, r4.w, s#2, t#2 -(sy)mov.f32f32 r3.y, r3.y -mul.f r3.x, r3.x, r1.z -sam.s (f32)(x)r3.z, r5.z, s#2, t#2 -(sy)mov.f32f32 r3.z, r3.z -mov.f32f32 r6.w, r2.z -mov.f32f32 r2.x, r2.x -mul.f r1.z, r1.w, r1.z -mad.f32 r0.w, r3.x, r3.z, r0.w -bary.f r1.w, 13, r1.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, c12.z -mov.f32f32 r0.w, r0.w -(ss)nop -sam (f32)(xyz)r4.z, r6.z, s#0, t#0 -bary.f r2.z, 2, r1.x -mad.f32 r0.w, r1.z, r3.y, r0.w -bary.f r1.z, 1, r1.x -bary.f (ei)r1.x, 0, r1.x -(sy)mad.f32 r0.x, r5.x, r2.z, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, r4.w, r1.z, r0.y -mad.f32 r1.x, r4.z, r1.x, r4.y +(ss)mov.f32f32 r1.z, r0.y +mul.f r2.z, r2.z, r3.w +bary.f r3.w, 17, r1.x +add.f r0.w, c13.x, r0.w +add.f r1.z, c15.y, (neg)r1.z +mov.f32f32 r6.w, r5.y +mul.f r5.z, r0.z, c3.w +mul.f r7.x, r0.w, c3.w +mul.f r0.z, r1.z, c8.y +(ss)mul.f r0.y, r0.y, c12.z +mad.f32 r0.x, r2.z, r3.w, r0.x +mov.f32f32 r6.y, r7.x +bary.f r0.w, 10, r1.x +add.f r0.y, r0.y, r0.z +mul.f r0.z, r2.w, r2.y +bary.f r1.z, 18, r1.x +add.f r5.w, r0.w, c12.w +max.f r0.y, r0.y, c12.y +mov.f32f32 r4.w, r5.z +mad.f32 r0.x, r0.z, r1.z, r0.x +mov.f32f32 r6.z, r5.w +min.f r0.y, r0.y, c12.z +mov.f32f32 r7.y, r5.w +mov.f32f32 r5.x, r5.w +max.f r0.x, c12.y, r0.x +nop +add.f r0.z, c15.y, (neg)r0.y +sam.s (f32)(x)r7.z, r6.x, s#2, t#2 +add.f r0.w, c13.y, (neg)r3.y +add.f r1.z, c15.y, (neg)r0.y +add.f r2.y, c15.y, (neg)r0.y +mul.f r0.z, r0.z, c7.z +mov.f32f32 r2.z, r0.w +add.f r2.w, c13.y, (neg)r3.z +mul.f r1.z, r1.z, c7.y +mul.f r3.y, r2.y, c7.x +log2 r0.x, r0.x +(ss)mul.f r0.x, c9.x, r0.x +mov.f32f32 r2.y, r2.w +sam.s (f32)(x)r3.z, r6.w, s#2, t#2 nop +sam.s (f32)(x)r3.w, r4.z, s#2, t#2 +sam.s (f32)(x)r4.x, r5.y, s#2, t#2 +add.f r1.w, r1.w, c12.z +add.f r3.x, r3.x, c12.z +mul.f r2.z, r2.z, r2.y +bary.f r2.y, 13, r1.x +mov.f32f32 r4.y, c6.z +(ss)bary.f r4.z, 14, r1.x +(sy)mul.f r2.z, r2.z, r7.z +mul.f r2.w, r1.w, r2.w +exp2 r0.x, r0.x +mov.f32f32 r4.w, c6.y +mov.f32f32 r5.x, c6.x +mul.f r4.y, r4.y, c10.z +mad.f32 r2.z, r2.w, r3.z, r2.z +mul.f r0.w, r0.w, r3.x +mul.f r2.w, r4.w, c10.y +mul.f r3.z, r5.x, c10.x +(ss)mul.f r4.y, r4.y, r0.x +mad.f32 r0.w, r0.w, r3.w, r2.z +mul.f r1.w, r1.w, r3.x +bary.f r4.w, 15, r1.x +mul.f r2.z, r2.w, r0.x +(ss)mul.f r0.x, r3.z, r0.x +mad.f32 r0.w, r1.w, r4.x, r0.w +sam (f32)(w)r5.x, r2.x, s#1, t#1 +(sy)cmps.f.lt r1.w, r5.w, c14.y +mov.f32f32 r2.w, c12.z +(ss)bary.f r2.x, 2, r1.x mul.f r0.w, c13.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.w -mov.f32f32 r6.x, r2.x -mov.f32f32 r1.z, c12.y +sam (f32)(xyz)r4.z, r4.z, s#0, t#0 +bary.f r2.y, 0, r1.x +bary.f (ei)r1.x, 1, r1.x +cov.u32f32 r1.y, r1.w +mov.f32f32 r1.w, r0.w +(sy)mad.f32 r2.x, r5.x, r2.x, r4.y +mad.f32 r1.x, r4.w, r1.x, r2.z +mad.f32 r0.x, r4.z, r2.y, r0.x +cmps.f.ne r1.y, r1.y, c12.y +mul.f r2.x, r2.x, r1.w +mul.f r1.x, r1.x, r1.w +mad.f32 r1.w, c5.z, r5.x, r2.x +mad.f32 r1.x, c5.y, r4.w, r1.x mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w -mul.f r0.w, r1.x, r0.w -mov.f32f32 r3.w, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c5.z, r5.x, r0.x -mad.f32 r0.y, c5.y, r4.w, r0.y -mov.f32f32 r0.w, r0.w +mov.f32f32 r0.w, c12.y +mul.f r1.w, r0.y, r1.w +mul.f r1.x, r0.y, r1.x +mad.f32 r0.x, c5.x, r4.z, r0.x +sel.b32 r0.w, r0.w, r1.y, r5.w +add.f r0.z, r1.w, r0.z +add.f r1.x, r1.x, r1.z +mul.f r0.x, r0.y, r0.x nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, c5.x, r4.z, r0.w -sam (f32)(w)r3.x, r3.z, s#1, t#1 -(sy)cmps.f.lt r1.x, r3.w, c14.y -mul.f r0.x, r0.z, r0.x -mul.f r0.y, r0.z, r0.y -mov.f32f32 r0.w, r0.w -cov.u32f32 r1.x, r1.x -add.f r0.x, r0.x, r2.y -add.f r0.y, r0.y, r2.w -mul.f r0.z, r0.z, r0.w -cmps.f.ne r0.w, r1.x, c12.y -mov.f32f32 r1.x, r3.w -(rpt2)nop -mov.f32f32 r1.x, r1.x -add.f r0.z, r0.z, r6.y +mul.f r0.y, r0.z, r0.w +mul.f r0.z, r1.x, r0.w (rpt1)nop -sel.b32 r0.w, r1.z, r0.w, r1.x +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.z, c4.y +add.f r0.x, r0.x, r3.y (rpt2)nop mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w -mul.f r0.z, r0.z, r0.w -nop -mul.f r0.x, r0.x, c4.z -mul.f r0.y, r0.y, c4.y -mul.f r0.z, r0.z, c4.x -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z +(rpt2)nop +mul.f r2.x, r0.x, c4.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z nop -mov.f32f32 r5.w, r0.x -mov.f32f32 r5.z, r0.y -mov.f32f32 r5.y, r0.z -end -; FRAG: outputs: r5.y (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) -; FRAG: 237 instructions, 0 half, 7 full -; pos (bary): r1.x -; color: r5.y -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) +; FRAG: 155 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm index 8e2237c..042d36d 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm @@ -1,155 +1,116 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -@out(r6.x) out4 -@out(r6.y) out5 -@out(r6.z) out6 -@out(r6.w) out7 -@out(r4.x) out8 -@out(r4.y) out9 -@out(r4.z) out10 -@out(r4.w) out11 -@out(r5.x) out12 -@out(r5.y) out13 -@out(r5.z) out14 -@out(r5.w) out15 -@out(r1.w) out16 -@out(r2.x) out17 -@out(r2.y) out18 -@out(r2.z) out19 -@out(r7.x) out20 -@out(r7.y) out21 -@out(r7.z) out22 -@out(r7.w) out23 -(sy)(ss)add.f r2.x, c4.x, (neg)r0.x -absneg.f r2.y, (neg)c5.x -mul.f r2.z, c8.w, r0.x -mul.f r2.w, c8.z, r0.x -mul.f r3.x, r2.x, r2.x -add.f r3.y, c4.y, (neg)r0.y -mul.f r3.z, r0.w, r2.y -absneg.f r3.w, (neg)c5.y -mad.f32 r2.z, c9.w, r0.y, r2.z -mad.f32 r3.x, r3.y, r3.y, r3.x -mad.f32 r2.z, c10.w, r0.z, r2.z -mad.f32 r2.w, c9.z, r0.y, r2.w -mul.f r4.x, c8.y, r0.x -mov.f32f32 r3.x, r3.x -add.f r4.y, c4.z, (neg)r0.z -mad.f32 r3.z, r1.x, r3.w, r3.z -add.f r2.z, r2.z, c11.w -mad.f32 r2.w, c10.z, r0.z, r2.w -mad.f32 r3.x, r4.y, r4.y, r3.x -mov.f32f32 r3.z, r3.z -absneg.f r4.z, (neg)c5.z -mov.f32f32 r2.z, r2.z -add.f r2.w, r2.w, c11.z -mad.f32 r4.x, c9.y, r0.y, r4.x -mul.f r4.w, c8.x, r0.x -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mad.f32 r3.z, r1.y, r4.z, r3.z -mov.f32f32 r5.w, r2.z -mov.f32f32 r2.z, r2.w -mad.f32 r2.x, r2.x, r3.x, r2.y -mov.f32f32 r2.y, r3.z -mad.f32 r2.w, r3.y, r3.x, r3.w -mad.f32 r3.x, r4.y, r3.x, r4.z -mov.f32f32 r2.x, r2.x -max.f r2.y, c13.x, r2.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mul.f r3.y, r2.x, r2.x -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, r2.w, r2.w, r3.y -mov.f32f32 r5.z, r2.z -mad.f32 r2.z, c10.y, r0.z, r4.x -mad.f32 r3.z, c9.x, r0.y, r4.w -mov.f32f32 r3.y, r3.y -mul.f r3.w, r2.y, c6.z -mad.f32 r3.y, r3.x, r3.x, r3.y -mul.f r4.x, r2.y, c6.y -mul.f r2.y, r2.y, c6.x -mov.f32f32 r3.w, r3.w -add.f r2.z, r2.z, c11.y -mad.f32 r3.z, c10.x, r0.z, r3.z -mul.f r4.y, c0.w, r0.x -rsq r3.y, r3.y -(ss)mov.f32f32 r3.y, r3.y -mov.f32f32 r6.z, r3.w -mov.f32f32 r3.w, r4.x -mov.f32f32 r2.y, r2.y -mul.f r3.x, r3.x, r3.y -mul.f r2.w, r2.w, r3.y -mul.f r2.x, r2.x, r3.y -mov.f32f32 r6.y, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.x, r2.x +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r4.z) in8 +@in(r4.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@const(c13.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r6.x +absneg.f r0.y, (neg)c5.x +mul.f r0.z, c8.y, r6.x +mul.f r0.w, c8.x, r6.x +mul.f r1.x, r0.x, r0.x +add.f r1.y, c4.y, (neg)r6.y +mul.f r1.z, r2.x, r0.y +absneg.f r1.w, (neg)c5.y +mad.f32 r0.z, c9.y, r6.y, r0.z +mad.f32 r1.x, r1.y, r1.y, r1.x +add.f r2.w, c4.z, (neg)r6.z +mad.f32 r1.z, r2.y, r1.w, r1.z +absneg.f r3.x, (neg)c5.z +mad.f32 r0.z, c10.y, r6.z, r0.z +mad.f32 r1.x, r2.w, r2.w, r1.x +mad.f32 r0.w, c9.x, r6.y, r0.w +mad.f32 r1.z, r2.z, r3.x, r1.z +mul.f r3.z, c8.w, r6.x +mul.f r3.w, c8.z, r6.x +add.f r0.z, r0.z, c11.y +mad.f32 r0.w, c10.x, r6.z, r0.w +rsq r1.x, r1.x +(ss)mov.f32f32 r4.x, r1.x +mad.f32 r0.x, r0.x, r1.x, r0.y +max.f r0.y, c13.x, r1.z +mul.f r3.y, r0.z, c12.y +mad.f32 r0.z, r1.y, r4.x, r1.w +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.y, r2.w, r4.x, r3.x nop -mov.f32f32 r7.z, r3.x -mov.f32f32 r7.y, r2.w -mov.f32f32 r7.x, r2.x -mov.f32f32 r6.x, r2.y -mul.f r2.x, r2.z, c12.y -add.f r2.y, r3.z, c11.x -mad.f32 r2.z, c1.w, r0.y, r4.y -mul.f r2.w, c0.z, r0.x -mov.f32f32 r5.y, r2.x -mul.f r2.x, r2.y, c12.x -mad.f32 r2.y, c2.w, r0.z, r2.z -mad.f32 r2.z, c1.z, r0.y, r2.w -mul.f r2.w, c0.y, r0.x -mov.f32f32 r5.x, r2.x -add.f r2.x, r2.y, c3.w -mad.f32 r2.y, c2.z, r0.z, r2.z -mad.f32 r2.z, c1.y, r0.y, r2.w -mul.f r2.w, c0.x, r0.x -mov.f32f32 r3.w, r2.x -add.f r2.x, r2.y, c3.z -mad.f32 r2.y, c2.y, r0.z, r2.z -mad.f32 r0.y, c1.x, r0.y, r2.w -mov.f32f32 r2.z, (0.000000) -mov.f32f32 r3.z, r2.x -add.f r2.x, r2.y, c3.y -mad.f32 r0.y, c2.x, r0.z, r0.y -mov.f32f32 r7.w, r2.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.y, r2.x -add.f r0.y, r0.y, c3.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.z, c7.x, r0.z, c7.y -mad.f32 r0.x, c7.x, r0.x, c7.y +mov.f32f32 r1.w, r0.z +mul.f r1.x, r1.x, r1.x +mov.f32f32 r2.w, r1.y mov.f32f32 r3.x, r0.y -mov.f32f32 r2.z, r1.w -mov.f32f32 r2.y, r1.z -mov.f32f32 r2.x, r0.z -mov.f32f32 r1.w, r0.x -mov.f32f32 r0.x, (0.000000) -mov.f32f32 r0.y, r1.y -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.w, r0.x -mov.f32f32 r4.z, r0.y -mov.f32f32 r4.y, r0.z -mov.f32f32 r4.x, r0.w -mov.f32f32 r0.x, (0.000000) -(rpt2)nop -mov.f32f32 r6.w, r0.x +mad.f32 r0.z, r0.z, r1.w, r1.x +mul.f r1.x, r0.y, c6.x +mad.f32 r0.y, r1.y, r2.w, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +add.f r0.z, r0.w, c11.x +mad.f32 r0.w, c9.w, r6.y, r3.z +mad.f32 r3.z, c9.z, r6.y, r3.w +mul.f r4.x, c0.w, r6.x +rsq r0.y, r0.y +(ss)mov.f32f32 r3.w, r0.y +mul.f r5.x, r0.x, r0.y +mul.f r3.x, r0.z, c12.x +mad.f32 r0.x, c10.w, r6.z, r0.w +mul.f r5.z, r2.w, r3.w +mul.f r5.y, r1.w, r3.w +(ss)mad.f32 r0.y, c10.z, r6.z, r3.z +add.f r3.w, r0.x, c11.w +mad.f32 r0.x, c1.w, r6.y, r4.x +mul.f r0.z, c0.z, r6.x +add.f r3.z, r0.y, c11.z +mad.f32 r0.x, c2.w, r6.z, r0.x +mad.f32 r0.y, c1.z, r6.y, r0.z +mul.f r0.z, c0.y, r6.x +mul.f r1.w, c0.x, r6.x +add.f r0.w, r0.x, c3.w +mad.f32 r0.x, c2.z, r6.z, r0.y +mad.f32 r0.y, c1.y, r6.y, r0.z +mad.f32 r1.w, c1.x, r6.y, r1.w +mad.f32 r0.y, c2.y, r6.z, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r6.z, r1.w +nop +add.f r0.y, r0.y, c3.y +mov.f32f32 r5.w, (0.000000) +add.f r0.x, r0.x, c3.x +mad.f32 r4.y, c7.x, r6.z, c7.y +mad.f32 r4.x, c7.x, r6.x, c7.y +mov.f32f32 r2.w, (0.000000) +mov.f32f32 r1.w, (0.000000) end -; VERT: outputs: r3.x (0:0) r6.x (5:9) r4.x (5:10) r5.x (5:11) r1.w (5:12) r7.x (5:13) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 118 instructions, 0 half, 8 full -; pos: r3.x +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) +; VERT: inputs: r6.x (0:0,cm=7,il=8,b=0) r2.x (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) +; VERT: 75 instructions, 0 half, 7 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm index a7cfae4..22ca830 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm @@ -2,37 +2,28 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -(rpt5)nop -sam (f32)(xyzw)r0.z, r0.z, s#0, t#0 -(sy)cmps.f.lt r0.x, r1.y, c0.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r1.w, r1.x -mov.f32f32 r1.z, r0.w -cov.u32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.z -cmps.f.ne p0.x, r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, (0.000000) +(rpt4)nop +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)cmps.f.lt r0.y, r1.w, c0.x +(rpt2)nop +cov.u32f32 r0.y, r0.y +(rpt2)nop +cmps.f.ne p0.x, r0.y, r0.x (rpt5)nop kill p0.x end nop nop -; FRAG: outputs: r1.y (1:0) +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 34 instructions, 0 half, 3 full -; pos (bary): r0.x -; color: r1.y +; FRAG: 26 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm index 08421de..f362ccd 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm @@ -3,170 +3,134 @@ @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -@out(r2.x) out4 -@out(r2.y) out5 -@out(r2.z) out6 -@out(r2.w) out7 -(sy)(ss)floor.f r1.y, c11.z +@in(r1.x) in4 +@in(r1.y) in5 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r0.w, c11.z floor.f r1.z, c11.x absneg.f r1.w, (abs)c14.x absneg.f r2.x, (abs)c14.y -add.f r1.y, c11.z, (neg)r1.y +add.f r0.w, c11.z, (neg)r0.w add.f r1.z, c11.x, (neg)r1.z mul.f r2.y, r0.x, r0.z add.f r1.w, r1.w, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r0.y, c17.x -mov.f32f32 r1.w, r1.w -max.f r1.y, r1.y, c15.y +max.f r0.w, r0.w, c15.y max.f r1.z, r1.z, c15.y -mul.f r2.x, r2.y, r2.x -mul.f r2.y, c13.x, r1.w -min.f r1.y, r1.y, c19.y +mul.f r2.x, r0.y, c17.x +mul.f r2.z, c13.x, r1.w +min.f r0.w, r0.w, c19.y min.f r1.z, r1.z, c19.y -mul.f r2.z, c8.z, r0.x -mov.f32f32 r2.x, r2.x -max.f r1.y, r1.y, c15.x +mul.f r2.w, c8.z, r0.x +mov.f32f32 r1.w, r1.w +max.f r0.w, r0.w, c15.x max.f r1.z, r1.z, c15.x -mad.f32 r2.z, c9.z, r0.y, r2.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c10.z, r0.z, r2.z -mul.f r2.w, c8.x, r0.x -mul.f r1.y, c13.x, r1.y -mad.f32 r2.w, c9.x, r0.y, r2.w -add.f r2.z, r2.z, c11.z -mad.f32 r2.w, c10.x, r0.z, r2.w -mov.f32f32 r1.y, r1.y -mad.f32 r2.y, c15.w, r2.y, r2.z -mov.f32f32 r2.x, r2.x -add.f r2.w, r2.w, c11.x -mad.f32 r1.y, c15.z, r1.y, c11.x -mad.f32 r1.z, c13.x, r1.z, r2.w -mov.f32f32 r2.y, r2.y -max.f r2.x, r2.x, c15.y -mov.f32f32 r1.y, r1.y +mul.f r3.x, c8.x, r0.x +mad.f32 r2.w, c9.z, r0.y, r2.w +mul.f r0.w, c13.x, r0.w +mad.f32 r3.x, c9.x, r0.y, r3.x +mad.f32 r2.w, c10.z, r0.z, r2.w +mad.f32 r3.x, c10.x, r0.z, r3.x +mad.f32 r0.w, c15.z, r0.w, c11.x +max.f r1.w, r1.w, c17.z +mul.f r2.x, r2.y, r2.x +add.f r2.y, r3.x, c11.x +add.f r0.w, r0.w, c16.x +mad.f32 r1.z, c13.x, r1.z, r2.y +add.f r2.w, r2.w, c11.z +min.f r1.w, r1.w, c17.w +floor.f r3.x, r0.w add.f r1.z, r1.z, c16.x -mov.f32f32 r2.y, r2.y -min.f r2.x, r2.x, c19.y -mov.f32f32 r1.y, r1.y +mad.f32 r2.z, c15.w, r2.z, r2.w +mov.f32f32 r3.y, r1.w +add.f r0.w, r0.w, (neg)r3.x floor.f r3.x, r1.z -add.f r2.y, r2.y, c16.x -min.f r2.x, r2.x, c17.y -add.f r1.y, r1.y, c16.x +add.f r2.z, r2.z, c16.x +max.f r2.x, r2.x, c15.y +mad.f32 r0.w, c16.y, r0.w, c16.z add.f r1.z, r1.z, (neg)r3.x -floor.f r3.x, r2.y -mov.f32f32 r2.x, r2.x -floor.f r3.y, r1.y +floor.f r3.x, r2.z +min.f r2.x, r2.x, c19.y +absneg.f r0.w, (abs)r0.w mad.f32 r1.z, c16.y, r1.z, c16.z -add.f r2.y, r2.y, (neg)r3.x -mul.f r3.x, r0.y, c18.x -add.f r1.y, r1.y, (neg)r3.y +add.f r2.z, r2.z, (neg)r3.x +min.f r2.x, r2.x, c17.y +mul.f r3.x, c16.y, r0.w absneg.f r1.z, (abs)r1.z -mad.f32 r2.y, c16.y, r2.y, c16.z -mov.f32f32 r3.x, r3.x -mad.f32 r1.y, c16.y, r1.y, c16.z -mul.f r3.y, c16.y, r1.z -absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c16.y, r2.z, c16.z +mul.f r0.w, r0.w, r0.w +add.f r3.x, c16.w, (neg)r3.x +mul.f r3.z, c16.y, r1.z +absneg.f r2.z, (abs)r2.z mul.f r1.z, r1.z, r1.z -absneg.f r1.y, (abs)r1.y -add.f r3.y, c16.w, (neg)r3.y -mul.f r3.z, c16.y, r2.y -mul.f r2.y, r2.y, r2.y -mul.f r3.w, c16.y, r1.y -mul.f r1.z, r1.z, r3.y -add.f r3.y, c16.w, (neg)r3.z -mul.f r1.y, r1.y, r1.y -add.f r3.z, c16.w, (neg)r3.w -mov.f32f32 r1.z, r1.z -mul.f r2.y, r2.y, r3.y -mov.f32f32 r3.x, r3.x -mul.f r1.y, r1.y, r3.z -mul.f r1.z, r1.z, r2.x -mov.f32f32 r2.y, r2.y +mul.f r0.w, r0.w, r3.x +mul.f r3.x, r0.y, c18.x +add.f r3.z, c16.w, (neg)r3.z +mul.f r3.w, c16.y, r2.z +mul.f r2.z, r2.z, r2.z max.f r3.x, r3.x, c15.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r2.y, r2.x -min.f r2.y, r3.x, c19.y -max.f r1.w, r1.w, c17.z -mul.f r3.x, c8.y, r0.x +mul.f r1.z, r1.z, r3.z +mov.f32f32 r3.z, r2.x +add.f r3.w, c16.w, (neg)r3.w +min.f r3.x, r3.x, c19.y +mul.f r4.x, c8.y, r0.x mul.f r0.x, c8.w, r0.x -min.f r2.y, r2.y, c15.w -mov.f32f32 r1.w, r1.w -mad.f32 r3.x, c9.y, r0.y, r3.x +mad.f32 r4.x, c9.y, r0.y, r4.x +min.f r3.x, r3.x, c15.w +mul.f r1.z, r1.z, r3.z +mul.f r2.z, r2.z, r3.w +mad.f32 r3.z, c10.y, r0.z, r4.x +mul.f r0.w, r0.w, r3.x +mov.f32f32 r3.x, r1.z +mul.f r2.x, r2.z, r2.x +add.f r2.z, r3.z, c11.y +mov.f32f32 r3.z, r0.w +mad.f32 r0.w, c14.y, r0.w, r1.z +mad.f32 r1.z, r2.x, r1.w, r2.w +mad.f32 r1.w, r2.x, r3.y, r2.y +mad.f32 r2.y, c14.x, r3.z, r3.x +mad.f32 r2.x, r2.x, r3.y, r2.z mad.f32 r0.x, c9.w, r0.y, r0.x -mov.f32f32 r0.y, r2.y -min.f r1.w, r1.w, c17.w -mad.f32 r2.y, c10.y, r0.z, r3.x -mad.f32 r0.x, c10.w, r0.z, r0.x -mul.f r0.y, r1.y, r0.y -mov.f32f32 r0.z, r1.w -add.f r1.y, r2.y, c11.y -add.f r0.x, r0.x, c11.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r2.x, r0.z, r2.w -mad.f32 r1.y, r2.x, r0.z, r1.y -mad.f32 r0.z, r2.x, r0.z, r2.z -mad.f32 r2.x, c14.x, r0.y, r1.z -mad.f32 r0.y, c14.y, r0.y, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.x, r0.w -add.f r0.w, r1.w, r1.z -add.f r0.y, r0.z, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -(rpt1)nop -mul.f r0.w, c0.w, r0.z -mul.f r1.x, c0.z, r0.z -mad.f32 r0.w, c1.w, r1.y, r0.w -mad.f32 r1.x, c1.z, r1.y, r1.x -mul.f r1.z, c0.y, r0.z -mul.f r0.z, c0.x, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, c2.w, r0.y, r0.w -mad.f32 r1.x, c2.z, r0.y, r1.x -mad.f32 r1.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c3.z, r0.x, r1.x -mad.f32 r1.z, c1.y, r1.y, r1.z -mad.f32 r0.z, c1.x, r1.y, r0.z -mov.f32f32 r1.y, r1.w -mov.f32f32 r1.w, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r1.y -mov.f32f32 r3.z, r1.w -mad.f32 r1.y, c2.y, r0.y, r1.z -mad.f32 r0.y, c2.x, r0.y, r0.z -mad.f32 r0.z, c3.y, r0.x, r1.y -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r2.w, r0.w -mov.f32f32 r2.z, r1.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x +nop +add.f r0.y, r1.w, r2.y +add.f r2.y, r1.z, r0.w (rpt1)nop -mov.f32f32 r3.y, r0.y -mov.f32f32 r3.x, r0.x +mov.f32f32 r0.w, r0.y +mul.f r0.y, c0.x, r0.y +mov.f32f32 r2.z, r2.y +mad.f32 r0.x, c10.w, r0.z, r0.x +mul.f r0.z, c0.w, r0.w +mul.f r1.z, c0.z, r0.w +mad.f32 r0.z, c1.w, r2.x, r0.z +mad.f32 r1.z, c1.z, r2.x, r1.z +mad.f32 r1.w, c2.w, r2.z, r0.z +add.f r2.w, r0.x, c11.w +mad.f32 r1.z, c2.z, r2.z, r1.z +mul.f r0.x, c0.y, r0.w +mad.f32 r0.y, c1.x, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.w, r1.w +mad.f32 r0.z, c3.z, r2.w, r1.z +mad.f32 r0.x, c1.y, r2.x, r0.x +mad.f32 r0.y, c2.x, r2.y, r0.y +mad.f32 r2.x, c2.y, r2.z, r0.x +mad.f32 r0.x, c3.x, r2.w, r0.y +mad.f32 r0.y, c3.y, r2.w, r2.x end nop nop nop -; VERT: outputs: r3.x (0:0) r2.x (5:9) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0) -; VERT: 152 instructions, 0 half, 4 full -; pos: r3.x +; VERT: outputs: r0.x (0:0) r1.x (5:9) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) +; VERT: 110 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm index a7cfae4..22ca830 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm @@ -2,37 +2,28 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -(rpt5)nop -sam (f32)(xyzw)r0.z, r0.z, s#0, t#0 -(sy)cmps.f.lt r0.x, r1.y, c0.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r1.w, r1.x -mov.f32f32 r1.z, r0.w -cov.u32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.z -cmps.f.ne p0.x, r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, (0.000000) +(rpt4)nop +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)cmps.f.lt r0.y, r1.w, c0.x +(rpt2)nop +cov.u32f32 r0.y, r0.y +(rpt2)nop +cmps.f.ne p0.x, r0.y, r0.x (rpt5)nop kill p0.x end nop nop -; FRAG: outputs: r1.y (1:0) +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 34 instructions, 0 half, 3 full -; pos (bary): r0.x -; color: r1.y +; FRAG: 26 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm index 08421de..f362ccd 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm @@ -3,170 +3,134 @@ @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -@out(r2.x) out4 -@out(r2.y) out5 -@out(r2.z) out6 -@out(r2.w) out7 -(sy)(ss)floor.f r1.y, c11.z +@in(r1.x) in4 +@in(r1.y) in5 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r0.w, c11.z floor.f r1.z, c11.x absneg.f r1.w, (abs)c14.x absneg.f r2.x, (abs)c14.y -add.f r1.y, c11.z, (neg)r1.y +add.f r0.w, c11.z, (neg)r0.w add.f r1.z, c11.x, (neg)r1.z mul.f r2.y, r0.x, r0.z add.f r1.w, r1.w, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r0.y, c17.x -mov.f32f32 r1.w, r1.w -max.f r1.y, r1.y, c15.y +max.f r0.w, r0.w, c15.y max.f r1.z, r1.z, c15.y -mul.f r2.x, r2.y, r2.x -mul.f r2.y, c13.x, r1.w -min.f r1.y, r1.y, c19.y +mul.f r2.x, r0.y, c17.x +mul.f r2.z, c13.x, r1.w +min.f r0.w, r0.w, c19.y min.f r1.z, r1.z, c19.y -mul.f r2.z, c8.z, r0.x -mov.f32f32 r2.x, r2.x -max.f r1.y, r1.y, c15.x +mul.f r2.w, c8.z, r0.x +mov.f32f32 r1.w, r1.w +max.f r0.w, r0.w, c15.x max.f r1.z, r1.z, c15.x -mad.f32 r2.z, c9.z, r0.y, r2.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c10.z, r0.z, r2.z -mul.f r2.w, c8.x, r0.x -mul.f r1.y, c13.x, r1.y -mad.f32 r2.w, c9.x, r0.y, r2.w -add.f r2.z, r2.z, c11.z -mad.f32 r2.w, c10.x, r0.z, r2.w -mov.f32f32 r1.y, r1.y -mad.f32 r2.y, c15.w, r2.y, r2.z -mov.f32f32 r2.x, r2.x -add.f r2.w, r2.w, c11.x -mad.f32 r1.y, c15.z, r1.y, c11.x -mad.f32 r1.z, c13.x, r1.z, r2.w -mov.f32f32 r2.y, r2.y -max.f r2.x, r2.x, c15.y -mov.f32f32 r1.y, r1.y +mul.f r3.x, c8.x, r0.x +mad.f32 r2.w, c9.z, r0.y, r2.w +mul.f r0.w, c13.x, r0.w +mad.f32 r3.x, c9.x, r0.y, r3.x +mad.f32 r2.w, c10.z, r0.z, r2.w +mad.f32 r3.x, c10.x, r0.z, r3.x +mad.f32 r0.w, c15.z, r0.w, c11.x +max.f r1.w, r1.w, c17.z +mul.f r2.x, r2.y, r2.x +add.f r2.y, r3.x, c11.x +add.f r0.w, r0.w, c16.x +mad.f32 r1.z, c13.x, r1.z, r2.y +add.f r2.w, r2.w, c11.z +min.f r1.w, r1.w, c17.w +floor.f r3.x, r0.w add.f r1.z, r1.z, c16.x -mov.f32f32 r2.y, r2.y -min.f r2.x, r2.x, c19.y -mov.f32f32 r1.y, r1.y +mad.f32 r2.z, c15.w, r2.z, r2.w +mov.f32f32 r3.y, r1.w +add.f r0.w, r0.w, (neg)r3.x floor.f r3.x, r1.z -add.f r2.y, r2.y, c16.x -min.f r2.x, r2.x, c17.y -add.f r1.y, r1.y, c16.x +add.f r2.z, r2.z, c16.x +max.f r2.x, r2.x, c15.y +mad.f32 r0.w, c16.y, r0.w, c16.z add.f r1.z, r1.z, (neg)r3.x -floor.f r3.x, r2.y -mov.f32f32 r2.x, r2.x -floor.f r3.y, r1.y +floor.f r3.x, r2.z +min.f r2.x, r2.x, c19.y +absneg.f r0.w, (abs)r0.w mad.f32 r1.z, c16.y, r1.z, c16.z -add.f r2.y, r2.y, (neg)r3.x -mul.f r3.x, r0.y, c18.x -add.f r1.y, r1.y, (neg)r3.y +add.f r2.z, r2.z, (neg)r3.x +min.f r2.x, r2.x, c17.y +mul.f r3.x, c16.y, r0.w absneg.f r1.z, (abs)r1.z -mad.f32 r2.y, c16.y, r2.y, c16.z -mov.f32f32 r3.x, r3.x -mad.f32 r1.y, c16.y, r1.y, c16.z -mul.f r3.y, c16.y, r1.z -absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c16.y, r2.z, c16.z +mul.f r0.w, r0.w, r0.w +add.f r3.x, c16.w, (neg)r3.x +mul.f r3.z, c16.y, r1.z +absneg.f r2.z, (abs)r2.z mul.f r1.z, r1.z, r1.z -absneg.f r1.y, (abs)r1.y -add.f r3.y, c16.w, (neg)r3.y -mul.f r3.z, c16.y, r2.y -mul.f r2.y, r2.y, r2.y -mul.f r3.w, c16.y, r1.y -mul.f r1.z, r1.z, r3.y -add.f r3.y, c16.w, (neg)r3.z -mul.f r1.y, r1.y, r1.y -add.f r3.z, c16.w, (neg)r3.w -mov.f32f32 r1.z, r1.z -mul.f r2.y, r2.y, r3.y -mov.f32f32 r3.x, r3.x -mul.f r1.y, r1.y, r3.z -mul.f r1.z, r1.z, r2.x -mov.f32f32 r2.y, r2.y +mul.f r0.w, r0.w, r3.x +mul.f r3.x, r0.y, c18.x +add.f r3.z, c16.w, (neg)r3.z +mul.f r3.w, c16.y, r2.z +mul.f r2.z, r2.z, r2.z max.f r3.x, r3.x, c15.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r2.y, r2.x -min.f r2.y, r3.x, c19.y -max.f r1.w, r1.w, c17.z -mul.f r3.x, c8.y, r0.x +mul.f r1.z, r1.z, r3.z +mov.f32f32 r3.z, r2.x +add.f r3.w, c16.w, (neg)r3.w +min.f r3.x, r3.x, c19.y +mul.f r4.x, c8.y, r0.x mul.f r0.x, c8.w, r0.x -min.f r2.y, r2.y, c15.w -mov.f32f32 r1.w, r1.w -mad.f32 r3.x, c9.y, r0.y, r3.x +mad.f32 r4.x, c9.y, r0.y, r4.x +min.f r3.x, r3.x, c15.w +mul.f r1.z, r1.z, r3.z +mul.f r2.z, r2.z, r3.w +mad.f32 r3.z, c10.y, r0.z, r4.x +mul.f r0.w, r0.w, r3.x +mov.f32f32 r3.x, r1.z +mul.f r2.x, r2.z, r2.x +add.f r2.z, r3.z, c11.y +mov.f32f32 r3.z, r0.w +mad.f32 r0.w, c14.y, r0.w, r1.z +mad.f32 r1.z, r2.x, r1.w, r2.w +mad.f32 r1.w, r2.x, r3.y, r2.y +mad.f32 r2.y, c14.x, r3.z, r3.x +mad.f32 r2.x, r2.x, r3.y, r2.z mad.f32 r0.x, c9.w, r0.y, r0.x -mov.f32f32 r0.y, r2.y -min.f r1.w, r1.w, c17.w -mad.f32 r2.y, c10.y, r0.z, r3.x -mad.f32 r0.x, c10.w, r0.z, r0.x -mul.f r0.y, r1.y, r0.y -mov.f32f32 r0.z, r1.w -add.f r1.y, r2.y, c11.y -add.f r0.x, r0.x, c11.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r2.x, r0.z, r2.w -mad.f32 r1.y, r2.x, r0.z, r1.y -mad.f32 r0.z, r2.x, r0.z, r2.z -mad.f32 r2.x, c14.x, r0.y, r1.z -mad.f32 r0.y, c14.y, r0.y, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.x, r0.w -add.f r0.w, r1.w, r1.z -add.f r0.y, r0.z, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -(rpt1)nop -mul.f r0.w, c0.w, r0.z -mul.f r1.x, c0.z, r0.z -mad.f32 r0.w, c1.w, r1.y, r0.w -mad.f32 r1.x, c1.z, r1.y, r1.x -mul.f r1.z, c0.y, r0.z -mul.f r0.z, c0.x, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, c2.w, r0.y, r0.w -mad.f32 r1.x, c2.z, r0.y, r1.x -mad.f32 r1.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c3.z, r0.x, r1.x -mad.f32 r1.z, c1.y, r1.y, r1.z -mad.f32 r0.z, c1.x, r1.y, r0.z -mov.f32f32 r1.y, r1.w -mov.f32f32 r1.w, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r1.y -mov.f32f32 r3.z, r1.w -mad.f32 r1.y, c2.y, r0.y, r1.z -mad.f32 r0.y, c2.x, r0.y, r0.z -mad.f32 r0.z, c3.y, r0.x, r1.y -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r2.w, r0.w -mov.f32f32 r2.z, r1.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x +nop +add.f r0.y, r1.w, r2.y +add.f r2.y, r1.z, r0.w (rpt1)nop -mov.f32f32 r3.y, r0.y -mov.f32f32 r3.x, r0.x +mov.f32f32 r0.w, r0.y +mul.f r0.y, c0.x, r0.y +mov.f32f32 r2.z, r2.y +mad.f32 r0.x, c10.w, r0.z, r0.x +mul.f r0.z, c0.w, r0.w +mul.f r1.z, c0.z, r0.w +mad.f32 r0.z, c1.w, r2.x, r0.z +mad.f32 r1.z, c1.z, r2.x, r1.z +mad.f32 r1.w, c2.w, r2.z, r0.z +add.f r2.w, r0.x, c11.w +mad.f32 r1.z, c2.z, r2.z, r1.z +mul.f r0.x, c0.y, r0.w +mad.f32 r0.y, c1.x, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.w, r1.w +mad.f32 r0.z, c3.z, r2.w, r1.z +mad.f32 r0.x, c1.y, r2.x, r0.x +mad.f32 r0.y, c2.x, r2.y, r0.y +mad.f32 r2.x, c2.y, r2.z, r0.x +mad.f32 r0.x, c3.x, r2.w, r0.y +mad.f32 r0.y, c3.y, r2.w, r2.x end nop nop nop -; VERT: outputs: r3.x (0:0) r2.x (5:9) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0) -; VERT: 152 instructions, 0 half, 4 full -; pos: r3.x +; VERT: outputs: r0.x (0:0) r1.x (5:9) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) +; VERT: 110 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm index a7cfae4..22ca830 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm @@ -2,37 +2,28 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -(rpt5)nop -sam (f32)(xyzw)r0.z, r0.z, s#0, t#0 -(sy)cmps.f.lt r0.x, r1.y, c0.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r1.w, r1.x -mov.f32f32 r1.z, r0.w -cov.u32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.z -cmps.f.ne p0.x, r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, (0.000000) +(rpt4)nop +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)cmps.f.lt r0.y, r1.w, c0.x +(rpt2)nop +cov.u32f32 r0.y, r0.y +(rpt2)nop +cmps.f.ne p0.x, r0.y, r0.x (rpt5)nop kill p0.x end nop nop -; FRAG: outputs: r1.y (1:0) +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 34 instructions, 0 half, 3 full -; pos (bary): r0.x -; color: r1.y +; FRAG: 26 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm index a7cfae4..22ca830 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm @@ -2,37 +2,28 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -(rpt5)nop -sam (f32)(xyzw)r0.z, r0.z, s#0, t#0 -(sy)cmps.f.lt r0.x, r1.y, c0.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r1.w, r1.x -mov.f32f32 r1.z, r0.w -cov.u32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.z -cmps.f.ne p0.x, r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, (0.000000) +(rpt4)nop +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)cmps.f.lt r0.y, r1.w, c0.x +(rpt2)nop +cov.u32f32 r0.y, r0.y +(rpt2)nop +cmps.f.ne p0.x, r0.y, r0.x (rpt5)nop kill p0.x end nop nop -; FRAG: outputs: r1.y (1:0) +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 34 instructions, 0 half, 3 full -; pos (bary): r0.x -; color: r1.y +; FRAG: 26 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm index 08421de..f362ccd 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm @@ -3,170 +3,134 @@ @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -@out(r2.x) out4 -@out(r2.y) out5 -@out(r2.z) out6 -@out(r2.w) out7 -(sy)(ss)floor.f r1.y, c11.z +@in(r1.x) in4 +@in(r1.y) in5 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r0.w, c11.z floor.f r1.z, c11.x absneg.f r1.w, (abs)c14.x absneg.f r2.x, (abs)c14.y -add.f r1.y, c11.z, (neg)r1.y +add.f r0.w, c11.z, (neg)r0.w add.f r1.z, c11.x, (neg)r1.z mul.f r2.y, r0.x, r0.z add.f r1.w, r1.w, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r0.y, c17.x -mov.f32f32 r1.w, r1.w -max.f r1.y, r1.y, c15.y +max.f r0.w, r0.w, c15.y max.f r1.z, r1.z, c15.y -mul.f r2.x, r2.y, r2.x -mul.f r2.y, c13.x, r1.w -min.f r1.y, r1.y, c19.y +mul.f r2.x, r0.y, c17.x +mul.f r2.z, c13.x, r1.w +min.f r0.w, r0.w, c19.y min.f r1.z, r1.z, c19.y -mul.f r2.z, c8.z, r0.x -mov.f32f32 r2.x, r2.x -max.f r1.y, r1.y, c15.x +mul.f r2.w, c8.z, r0.x +mov.f32f32 r1.w, r1.w +max.f r0.w, r0.w, c15.x max.f r1.z, r1.z, c15.x -mad.f32 r2.z, c9.z, r0.y, r2.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c10.z, r0.z, r2.z -mul.f r2.w, c8.x, r0.x -mul.f r1.y, c13.x, r1.y -mad.f32 r2.w, c9.x, r0.y, r2.w -add.f r2.z, r2.z, c11.z -mad.f32 r2.w, c10.x, r0.z, r2.w -mov.f32f32 r1.y, r1.y -mad.f32 r2.y, c15.w, r2.y, r2.z -mov.f32f32 r2.x, r2.x -add.f r2.w, r2.w, c11.x -mad.f32 r1.y, c15.z, r1.y, c11.x -mad.f32 r1.z, c13.x, r1.z, r2.w -mov.f32f32 r2.y, r2.y -max.f r2.x, r2.x, c15.y -mov.f32f32 r1.y, r1.y +mul.f r3.x, c8.x, r0.x +mad.f32 r2.w, c9.z, r0.y, r2.w +mul.f r0.w, c13.x, r0.w +mad.f32 r3.x, c9.x, r0.y, r3.x +mad.f32 r2.w, c10.z, r0.z, r2.w +mad.f32 r3.x, c10.x, r0.z, r3.x +mad.f32 r0.w, c15.z, r0.w, c11.x +max.f r1.w, r1.w, c17.z +mul.f r2.x, r2.y, r2.x +add.f r2.y, r3.x, c11.x +add.f r0.w, r0.w, c16.x +mad.f32 r1.z, c13.x, r1.z, r2.y +add.f r2.w, r2.w, c11.z +min.f r1.w, r1.w, c17.w +floor.f r3.x, r0.w add.f r1.z, r1.z, c16.x -mov.f32f32 r2.y, r2.y -min.f r2.x, r2.x, c19.y -mov.f32f32 r1.y, r1.y +mad.f32 r2.z, c15.w, r2.z, r2.w +mov.f32f32 r3.y, r1.w +add.f r0.w, r0.w, (neg)r3.x floor.f r3.x, r1.z -add.f r2.y, r2.y, c16.x -min.f r2.x, r2.x, c17.y -add.f r1.y, r1.y, c16.x +add.f r2.z, r2.z, c16.x +max.f r2.x, r2.x, c15.y +mad.f32 r0.w, c16.y, r0.w, c16.z add.f r1.z, r1.z, (neg)r3.x -floor.f r3.x, r2.y -mov.f32f32 r2.x, r2.x -floor.f r3.y, r1.y +floor.f r3.x, r2.z +min.f r2.x, r2.x, c19.y +absneg.f r0.w, (abs)r0.w mad.f32 r1.z, c16.y, r1.z, c16.z -add.f r2.y, r2.y, (neg)r3.x -mul.f r3.x, r0.y, c18.x -add.f r1.y, r1.y, (neg)r3.y +add.f r2.z, r2.z, (neg)r3.x +min.f r2.x, r2.x, c17.y +mul.f r3.x, c16.y, r0.w absneg.f r1.z, (abs)r1.z -mad.f32 r2.y, c16.y, r2.y, c16.z -mov.f32f32 r3.x, r3.x -mad.f32 r1.y, c16.y, r1.y, c16.z -mul.f r3.y, c16.y, r1.z -absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c16.y, r2.z, c16.z +mul.f r0.w, r0.w, r0.w +add.f r3.x, c16.w, (neg)r3.x +mul.f r3.z, c16.y, r1.z +absneg.f r2.z, (abs)r2.z mul.f r1.z, r1.z, r1.z -absneg.f r1.y, (abs)r1.y -add.f r3.y, c16.w, (neg)r3.y -mul.f r3.z, c16.y, r2.y -mul.f r2.y, r2.y, r2.y -mul.f r3.w, c16.y, r1.y -mul.f r1.z, r1.z, r3.y -add.f r3.y, c16.w, (neg)r3.z -mul.f r1.y, r1.y, r1.y -add.f r3.z, c16.w, (neg)r3.w -mov.f32f32 r1.z, r1.z -mul.f r2.y, r2.y, r3.y -mov.f32f32 r3.x, r3.x -mul.f r1.y, r1.y, r3.z -mul.f r1.z, r1.z, r2.x -mov.f32f32 r2.y, r2.y +mul.f r0.w, r0.w, r3.x +mul.f r3.x, r0.y, c18.x +add.f r3.z, c16.w, (neg)r3.z +mul.f r3.w, c16.y, r2.z +mul.f r2.z, r2.z, r2.z max.f r3.x, r3.x, c15.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r2.y, r2.x -min.f r2.y, r3.x, c19.y -max.f r1.w, r1.w, c17.z -mul.f r3.x, c8.y, r0.x +mul.f r1.z, r1.z, r3.z +mov.f32f32 r3.z, r2.x +add.f r3.w, c16.w, (neg)r3.w +min.f r3.x, r3.x, c19.y +mul.f r4.x, c8.y, r0.x mul.f r0.x, c8.w, r0.x -min.f r2.y, r2.y, c15.w -mov.f32f32 r1.w, r1.w -mad.f32 r3.x, c9.y, r0.y, r3.x +mad.f32 r4.x, c9.y, r0.y, r4.x +min.f r3.x, r3.x, c15.w +mul.f r1.z, r1.z, r3.z +mul.f r2.z, r2.z, r3.w +mad.f32 r3.z, c10.y, r0.z, r4.x +mul.f r0.w, r0.w, r3.x +mov.f32f32 r3.x, r1.z +mul.f r2.x, r2.z, r2.x +add.f r2.z, r3.z, c11.y +mov.f32f32 r3.z, r0.w +mad.f32 r0.w, c14.y, r0.w, r1.z +mad.f32 r1.z, r2.x, r1.w, r2.w +mad.f32 r1.w, r2.x, r3.y, r2.y +mad.f32 r2.y, c14.x, r3.z, r3.x +mad.f32 r2.x, r2.x, r3.y, r2.z mad.f32 r0.x, c9.w, r0.y, r0.x -mov.f32f32 r0.y, r2.y -min.f r1.w, r1.w, c17.w -mad.f32 r2.y, c10.y, r0.z, r3.x -mad.f32 r0.x, c10.w, r0.z, r0.x -mul.f r0.y, r1.y, r0.y -mov.f32f32 r0.z, r1.w -add.f r1.y, r2.y, c11.y -add.f r0.x, r0.x, c11.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r2.x, r0.z, r2.w -mad.f32 r1.y, r2.x, r0.z, r1.y -mad.f32 r0.z, r2.x, r0.z, r2.z -mad.f32 r2.x, c14.x, r0.y, r1.z -mad.f32 r0.y, c14.y, r0.y, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.x, r0.w -add.f r0.w, r1.w, r1.z -add.f r0.y, r0.z, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -(rpt1)nop -mul.f r0.w, c0.w, r0.z -mul.f r1.x, c0.z, r0.z -mad.f32 r0.w, c1.w, r1.y, r0.w -mad.f32 r1.x, c1.z, r1.y, r1.x -mul.f r1.z, c0.y, r0.z -mul.f r0.z, c0.x, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, c2.w, r0.y, r0.w -mad.f32 r1.x, c2.z, r0.y, r1.x -mad.f32 r1.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c3.z, r0.x, r1.x -mad.f32 r1.z, c1.y, r1.y, r1.z -mad.f32 r0.z, c1.x, r1.y, r0.z -mov.f32f32 r1.y, r1.w -mov.f32f32 r1.w, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r1.y -mov.f32f32 r3.z, r1.w -mad.f32 r1.y, c2.y, r0.y, r1.z -mad.f32 r0.y, c2.x, r0.y, r0.z -mad.f32 r0.z, c3.y, r0.x, r1.y -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r2.w, r0.w -mov.f32f32 r2.z, r1.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x +nop +add.f r0.y, r1.w, r2.y +add.f r2.y, r1.z, r0.w (rpt1)nop -mov.f32f32 r3.y, r0.y -mov.f32f32 r3.x, r0.x +mov.f32f32 r0.w, r0.y +mul.f r0.y, c0.x, r0.y +mov.f32f32 r2.z, r2.y +mad.f32 r0.x, c10.w, r0.z, r0.x +mul.f r0.z, c0.w, r0.w +mul.f r1.z, c0.z, r0.w +mad.f32 r0.z, c1.w, r2.x, r0.z +mad.f32 r1.z, c1.z, r2.x, r1.z +mad.f32 r1.w, c2.w, r2.z, r0.z +add.f r2.w, r0.x, c11.w +mad.f32 r1.z, c2.z, r2.z, r1.z +mul.f r0.x, c0.y, r0.w +mad.f32 r0.y, c1.x, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.w, r1.w +mad.f32 r0.z, c3.z, r2.w, r1.z +mad.f32 r0.x, c1.y, r2.x, r0.x +mad.f32 r0.y, c2.x, r2.y, r0.y +mad.f32 r2.x, c2.y, r2.z, r0.x +mad.f32 r0.x, c3.x, r2.w, r0.y +mad.f32 r0.y, c3.y, r2.w, r2.x end nop nop nop -; VERT: outputs: r3.x (0:0) r2.x (5:9) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0) -; VERT: 152 instructions, 0 half, 4 full -; pos: r3.x +; VERT: outputs: r0.x (0:0) r1.x (5:9) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) +; VERT: 110 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm index 6eeb0e0..e298c69 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm @@ -1,32 +1,23 @@ ; options: ; FRAG: new compiler -@out(r0.w) out0 -@out(r1.x) out1 -@out(r1.y) out2 -@out(r1.z) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c9.x -mov.f32f32 r0.y, c9.w -mov.f32f32 r0.z, c9.y -nop -mov.f32f32 r0.w, r0.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z +mov.f32f32 r0.y, c9.y +mov.f32f32 r0.z, c9.w (rpt5)nop -sam.p (f32)(xyzw)r0.x, r0.w, s#0, t#0 -(sy)(ss)mul.f r0.w, r0.w, c4.w -mul.f r0.z, r0.z, c4.z -mul.f r0.y, r0.y, c4.y -mul.f r0.x, r0.x, c4.x -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.w, r0.x +sam.p (f32)(xyzw)r0.x, r0.x, s#0, t#0 +(sy)mul.f r1.w, r0.w, c4.w +mul.f r1.z, r0.z, c4.z +mul.f r1.y, r0.y, c4.y +mul.f r1.x, r0.x, c4.x end nop nop -; FRAG: outputs: r0.w (1:0) +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 23 instructions, 0 half, 2 full -; pos (bary): r0.x -; color: r0.w +; FRAG: 15 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm index 1b081fa..eb8f852 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm @@ -4,210 +4,135 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r4.x) out0 -@out(r4.y) out1 -@out(r4.z) out2 -@out(r4.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c8.x) 0x3f000000, 0x00000000, 0x3f800000, 0xba03126f +@const(c9.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3fb8aa65 +@const(c10.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 0, r1.x add.f r0.y, r0.w, c8.y bary.f r0.w, 1, r1.x bary.f r1.z, 4, r1.x -add.f r1.w, r0.x, c9.x -bary.f r2.x, 6, r1.x -bary.f r2.y, 2, r1.x -add.f r2.z, r0.w, c9.x -floor.f r2.w, r1.w +add.f r2.x, r0.x, c9.x +bary.f r1.w, 5, r1.x +add.f r2.y, r0.w, c9.x +bary.f r2.z, 2, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c8.y -mov.f32f32 r1.z, r1.z -floor.f r3.x, r2.z -add.f r1.w, r1.w, (neg)r2.w +floor.f r3.x, r2.y +add.f r3.w, r2.z, c8.w +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -mov.f32f32 r3.y, r1.z -add.f r0.z, r2.z, (neg)r3.x -mov.f32f32 r1.z, r1.w +absneg.f r0.z, (neg)c6.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c8.z +mul.f r0.z, r0.z, c6.x +sam (f32)(w)r4.x, r1.z, s#1, t#1 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c8.x, r2.z +add.f r2.z, c9.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r1.w, (neg)c6.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c8.x, r1.z -add.f r2.w, c9.y, (neg)r1.z -mul.f r1.w, r1.w, c6.x -add.f r3.x, c9.y, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c8.x, r0.z -mul.f r1.w, r1.w, r0.y -mov.f32f32 r2.w, r2.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r2.z -mul.f r0.y, r1.w, r0.y -mul.f r1.w, r2.w, r3.x -add.f r2.z, c9.x, r0.x +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c8.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c9.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c9.w -add.f r3.z, c9.z, r0.w -mul.f r2.z, r2.z, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r2.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -add.f r0.w, c9.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r4.x -mov.f32f32 r5.x, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c11.y, (neg)r0.y -mov.f32f32 r5.w, r3.w -mul.f r3.z, r3.z, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c6.y -mul.f r0.y, r0.y, c8.z -mov.f32f32 r3.w, r3.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.z, r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r4.z, r4.x -add.f r2.y, r2.y, c8.w -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r3.w -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r4.w, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r2.y -mov.f32f32 r6.w, r2.z -mov.f32f32 r6.y, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r5.z, r0.y -sam.s (f32)(x)r0.y, r4.y, s#2, t#2 -(sy)mov.f32f32 r0.y, r0.y +add.f r0.z, c9.x, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c3.z +add.f r0.x, c9.z, r0.w +mul.f r4.x, r0.z, c3.z +add.f r0.z, c9.x, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c3.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c11.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#2, t#2 +add.f r0.z, c9.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#2, t#2 +mul.f r0.x, r0.x, c8.z +add.f r0.w, r2.y, c8.z +mul.f r0.y, r0.y, c6.y +(ss)nop +sam.s (f32)(x)r5.x, r5.w, s#2, t#2 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#2, t#2 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c10.y +bary.f r2.x, 6, r1.x +mul.f r0.y, r0.y, r5.x max.f r0.x, r0.x, c8.y -mov.f32f32 r7.x, r0.w -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y min.f r0.x, r0.x, c8.z -sam.s (f32)(x)r1.w, r5.w, s#2, t#2 -nop -(sy)mov.f32f32 r1.w, r1.w -mul.f r0.y, r0.w, r0.y -sam.s (f32)(x)r0.w, r5.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r0.w -add.f r1.z, r1.z, c8.z -add.f r2.y, c11.y, (neg)r0.x -add.f r2.z, c11.y, (neg)r0.x -add.f r3.z, c11.y, (neg)r0.x -mul.f r3.x, r1.z, r3.x -mul.f r2.y, r2.y, c5.z -mul.f r2.z, r2.z, c5.y -mul.f r3.w, r3.z, c5.x -mov.f32f32 r3.x, r3.x -sam.s (f32)(x)r3.z, r6.z, s#2, t#2 -add.f r0.z, r0.z, c8.z -(sy)mov.f32f32 r3.z, r3.z -bary.f r4.x, 5, r1.x -mad.f32 r0.y, r3.x, r0.w, r0.y -mul.f r0.w, r2.w, r0.z +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c8.y (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.z, r1.z, r0.z -mov.f32f32 r1.z, r4.x -mov.f32f32 r2.x, r2.x -mad.f32 r0.y, r0.w, r3.z, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r1.z -mov.f32f32 r2.w, r2.x -mov.f32f32 r0.y, r0.y -bary.f r0.w, 7, r1.x -mad.f32 r0.y, r0.z, r1.w, r0.y -mov.f32f32 r0.z, c8.z -bary.f r1.z, 10, r1.x -(ss)nop -sam (f32)(w)r4.x, r3.y, s#1, t#1 -(sy)cmps.f.lt r1.w, r4.w, c10.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.x, r4.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.w, r0.z mul.f r0.y, c10.x, r0.y -cov.u32f32 r0.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r3.x, r0.w -mov.f32f32 r0.y, r0.y -cmps.f.ne r0.z, r0.z, c8.y -nop -mov.f32f32 r0.w, c8.y -bary.f r2.x, 9, r1.x +bary.f r2.y, 7, r1.x +add.f r0.w, c11.y, (neg)r0.x +add.f r1.z, c11.y, (neg)r0.x +mov.f32f32 r1.w, r0.y +add.f r2.z, c11.y, (neg)r0.x +(rpt1)nop +sam (f32)(xyz)r2.w, r2.x, s#0, t#0 +(ss)bary.f r2.x, 10, r1.x +bary.f r2.y, 9, r1.x bary.f (ei)r1.x, 8, r1.x -(ss)nop -sam (f32)(xyz)r2.w, r2.w, s#0, t#0 -(sy)mul.f r1.y, r3.y, r1.z -sel.b32 r0.z, r0.w, r0.z, r1.w -mul.f r0.w, r3.x, r2.x +mul.f r0.w, r0.w, c5.z +(sy)mul.f r1.y, r3.y, r2.x +mul.f r2.x, r3.x, r2.y mul.f r1.x, r2.w, r1.x -mul.f r1.y, r1.y, r0.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -mul.f r0.w, r0.w, r0.y +mul.f r1.z, r1.z, c5.y +mul.f r1.y, r1.y, r1.w +mul.f r1.w, r2.x, r1.w mad.f32 r1.y, c4.z, r3.y, r1.y +mad.f32 r1.w, c4.y, r3.x, r1.w mul.f r0.y, r1.x, r0.y -(rpt1)nop -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, c4.y, r3.x, r0.w -mul.f r1.x, r0.x, r1.x +mul.f r1.x, r2.z, c5.x +mul.f r1.y, r0.x, r1.y +mul.f r1.w, r0.x, r1.w mad.f32 r0.y, c4.x, r2.w, r0.y -(rpt1)nop -add.f r1.x, r1.x, r2.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y +mov.f32f32 r2.x, c8.y +add.f r0.w, r1.y, r0.w +add.f r1.y, r1.w, r1.z nop -mul.f r1.x, r1.x, r0.z -mul.f r0.w, r0.x, r0.w +sel.b32 r0.z, r2.x, r0.z, r4.w mul.f r0.x, r0.x, r0.y +mov.f32f32 r2.w, c8.z nop -mov.f32f32 r0.y, r1.x -add.f r0.w, r0.w, r2.z -add.f r0.x, r0.x, r3.w -nop -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r0.z -mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r4.z, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r4.y, r0.y -mov.f32f32 r4.x, r0.x +mul.f r2.z, r0.w, r0.z +mul.f r2.y, r1.y, r0.z +add.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, r0.z end nop nop nop -; FRAG: outputs: r4.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r0.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) -; FRAG: 200 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r4.x -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) +; FRAG: 121 instructions, 0 half, 7 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm index a1623b7..dc38031 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm @@ -4,546 +4,368 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r4.w) out0 -@out(r5.x) out1 -@out(r5.y) out2 -@out(r5.z) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097 +@const(c14.x) 0x3cff9724, 0x40000000, 0xbf800000, 0xba03126f +@const(c15.x) 0xbf000000, 0x3f800000, 0x3fb8aa65, 0x3de38866 +@const(c16.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 19, r1.x bary.f r0.y, 0, r1.x add.f r0.w, r0.w, c13.y bary.f r1.z, 1, r1.x -mov.f32f32 r0.x, r0.x -add.f r1.w, r0.y, c15.x -bary.f r2.x, 23, r1.x -bary.f r2.y, 24, r1.x -mul.f r2.z, r0.x, r0.x +mov.f32f32 r1.w, r0.x +add.f r2.x, r0.y, c15.x +bary.f r2.y, 8, r1.x +bary.f r2.z, 23, r1.x +mul.f r0.x, r0.x, r1.w bary.f r2.w, 20, r1.x -floor.f r3.x, r1.w +floor.f r3.x, r2.x rcp r0.w, r0.w add.f r0.z, r0.z, c13.y add.f r3.y, r1.z, c15.x -mov.f32f32 r2.w, r2.w -add.f r1.w, r1.w, (neg)r3.x +mov.f32f32 r3.z, r2.w +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.z, r0.z, r0.w -(ss)floor.f r0.w, r3.y -mad.f32 r2.z, r2.w, r2.w, r2.z -mov.f32f32 r1.w, r1.w +(ss)absneg.f r0.w, (neg)c10.x +mad.f32 r0.x, r2.w, r3.z, r0.x +bary.f r2.w, 21, r1.x +mov.f32f32 r3.x, r2.x +mul.f r0.w, r0.w, c10.x +floor.f r3.w, r3.y +mov.f32f32 r4.x, r2.w +mul.f r4.y, c13.x, r3.x +mul.f r0.w, r0.w, r0.z mov.f32f32 r0.z, r0.z -absneg.f r3.x, (neg)c10.x -mov.f32f32 r2.z, r2.z -bary.f r3.z, 21, r1.x -mul.f r3.w, c13.x, r1.w -mul.f r3.x, r3.x, c10.x -add.f r0.w, r3.y, (neg)r0.w -mov.f32f32 r3.y, r3.z -mov.f32f32 r3.z, r3.w -mul.f r3.x, r3.x, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r2.z, r3.y, r3.y, r2.z -add.f r0.y, r0.y, (neg)r3.z -mov.f32f32 r3.x, r3.x -mul.f r3.z, c13.x, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.y, r0.y -rsq r3.w, r2.z -(ss)mul.f r4.x, r0.x, r3.w -rsq r4.y, r2.z -(ss)mov.f32f32 r4.y, r4.y -(ss)rsq r2.z, r2.z -(ss)mul.f r4.z, r0.x, r2.z -add.f r4.w, c15.x, r0.y -mov.f32f32 r4.x, r4.x -mul.f r5.x, r0.x, r4.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.w, r4.w -absneg.f r4.x, (abs)r4.x -mov.f32f32 r5.x, r5.x +mad.f32 r0.x, r4.x, r4.x, r0.x +add.f r0.y, r0.y, (neg)r4.y +add.f r3.y, r3.y, (neg)r3.w +mul.f r0.z, r0.w, r0.z +add.f r0.w, c14.y, (neg)r3.x +mov.f32f32 r3.x, r0.y +mov.f32f32 r3.w, r3.y +rsq r4.y, r0.x +(ss)mul.f r4.z, r1.w, r4.y +rsq r4.w, r0.x +(ss)mov.f32f32 r5.x, r4.w +(ss)rsq r0.x, r0.x +(ss)mul.f r5.y, r1.w, r0.x +add.f r3.x, c15.x, r3.x absneg.f r4.z, (abs)r4.z -mul.f r4.w, r4.w, c5.z -mov.f32f32 r4.x, r4.x -absneg.f r5.x, (abs)r5.x -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.y, r4.w -add.f r4.x, r4.x, c13.w -mov.f32f32 r5.x, r5.x +mul.f r5.z, r1.w, r5.x +absneg.f r5.y, (abs)r5.y +mul.f r5.w, r3.x, c5.z +add.f r3.x, r4.z, c13.w +absneg.f r4.z, (abs)r5.z +add.f r5.y, r5.y, c13.w +mov.f32f32 r6.x, r5.w +max.f r3.x, r3.x, c13.y +mul.f r5.z, r3.z, r4.y add.f r4.z, r4.z, c13.w -mov.f32f32 r5.y, r5.y -mov.f32f32 r4.x, r4.x -add.f r5.x, r5.x, c13.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r3.z, r3.z -max.f r4.x, r4.x, c13.y -mov.f32f32 r5.x, r5.x -max.f r4.z, r4.z, c13.y -add.f r1.z, r1.z, (neg)r3.z -mov.f32f32 r3.z, r4.x -mul.f r4.x, r2.w, r3.w -max.f r5.x, r5.x, c13.y -mov.f32f32 r4.z, r4.z -mul.f r5.z, r2.w, r2.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.x, r5.x -mul.f r5.w, r2.w, r4.y -mov.f32f32 r5.z, r5.z -absneg.f r4.x, (abs)r4.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.w, r5.w +max.f r5.y, r5.y, c13.y +mul.f r6.y, r3.z, r0.x absneg.f r5.z, (abs)r5.z -mov.f32f32 r4.x, r4.x -add.f r6.x, c15.x, r1.z -absneg.f r5.w, (abs)r5.w -mov.f32f32 r5.z, r5.z -add.f r4.x, r4.x, c13.w -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.w, r5.w +max.f r4.z, r4.z, c13.y +mul.f r5.x, r3.z, r5.x +absneg.f r6.y, (abs)r6.y add.f r5.z, r5.z, c13.w -mov.f32f32 r4.x, r4.x -mul.f r6.x, r6.x, c5.w -add.f r5.w, r5.w, c13.w -mov.f32f32 r5.z, r5.z -max.f r4.x, r4.x, c13.y -mov.f32f32 r6.y, r6.x -mov.f32f32 r5.w, r5.w -max.f r6.z, r5.z, c13.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.z, r6.y -max.f r5.w, r5.w, c13.y -mov.f32f32 r6.y, r6.z -add.f r6.z, r3.z, r4.x -mul.f r3.w, r3.y, r3.w -mov.f32f32 r6.w, r5.w -add.f r7.x, r4.z, r6.y -(ss)mul.f r2.z, r3.y, r2.z -mov.f32f32 r3.w, r3.w -add.f r7.y, r5.x, r6.w -mul.f r4.y, r3.y, r4.y -mov.f32f32 r2.z, r2.z -absneg.f r3.w, (abs)r3.w -bary.f r5.w, 2, r1.x -mov.f32f32 r4.y, r4.y -absneg.f r2.z, (abs)r2.z -mov.f32f32 r3.w, r3.w -add.f r7.z, r5.w, c14.w +mul.f r6.z, c13.x, r3.w +mul.f r0.z, r0.z, c15.z +absneg.f r5.x, (abs)r5.x +max.f r5.z, r5.z, c13.y +add.f r6.y, r6.y, c13.w +add.f r1.z, r1.z, (neg)r6.z +add.f r5.x, r5.x, c13.w +add.f r6.z, r3.x, r5.z +mul.f r4.y, r4.x, r4.y +max.f r6.w, r6.y, c13.y +max.f r5.x, r5.x, c13.y +mov.f32f32 r6.y, r1.z +absneg.f r4.y, (abs)r4.y +add.f r7.x, r5.y, r6.w +add.f r7.y, r4.z, r5.x +(ss)mul.f r0.x, r2.w, r0.x +add.f r2.w, r4.y, c13.w +mul.f r4.y, r4.x, r4.w +add.f r4.w, c15.x, r6.y +absneg.f r0.x, (abs)r0.x +max.f r2.w, r2.w, c13.y absneg.f r4.y, (abs)r4.y -mov.f32f32 r2.z, r2.z -add.f r3.w, r3.w, c13.w -mov.f32f32 r5.w, r7.z -mov.f32f32 r4.y, r4.y -add.f r2.z, r2.z, c13.w -mov.f32f32 r3.w, r3.w -mov.f32f32 r5.w, r5.w +mul.f r7.w, r4.w, c5.w +add.f r0.x, r0.x, c13.w +mov.f32f32 r4.w, r2.w add.f r4.y, r4.y, c13.w -mov.f32f32 r2.z, r2.z -max.f r3.w, r3.w, c13.y -mul.f r0.z, r3.x, r0.z -mov.f32f32 r3.x, r4.y -max.f r2.z, r2.z, c13.y -mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.y, r5.y, s#4, t#4 -(sy)mov.f32f32 r4.y, r4.y -max.f r3.x, r3.x, c13.y -mov.f32f32 r2.z, r2.z -(ss)add.f r5.y, r6.z, r3.w -mov.f32f32 r4.y, r4.y +mov.f32f32 r6.y, r7.w +max.f r0.x, r0.x, c13.y +add.f r4.w, r6.z, r4.w +max.f r4.y, r4.y, c13.y +bary.f r6.z, 2, r1.x +mov.f32f32 r7.z, r0.x +mov.f32f32 r8.x, r4.w +mov.f32f32 r8.y, r4.y +add.f r9.x, r6.z, c14.w +add.f r7.x, r7.x, r7.z +exp2 r0.z, r0.z +(ss)mov.f32f32 r7.z, r0.z +add.f r7.y, r7.y, r8.y +mov.f32f32 r6.z, r9.x +rcp r8.y, r8.x mov.f32f32 r3.x, r3.x -add.f r5.z, r7.x, r2.z -mov.f32f32 r5.y, r5.y -add.f r5.w, c14.y, (neg)r1.w -add.f r6.z, r7.y, r3.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r5.w, r5.w -add.f r7.x, c14.y, (neg)r0.w -rcp r7.y, r5.y -(ss)mov.f32f32 r7.y, r7.y -mov.f32f32 r6.z, r6.z -mul.f r0.z, r0.z, c15.z -rcp r7.w, r5.y -nop -rcp r8.x, r5.z -mul.f r3.z, r3.z, r7.y -(ss)mov.f32f32 r7.y, r8.x -mov.f32f32 r7.x, r7.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r3.z -mul.f r2.x, r2.x, c14.x -rcp r8.x, r6.z -(ss)mov.f32f32 r8.x, r8.x -mul.f r4.z, r4.z, r7.y -mul.f r7.y, r5.w, r7.x -mov.f32f32 r2.x, r2.x -mul.f r5.x, r5.x, r8.x +mov.f32f32 r8.z, r7.x +mov.f32f32 r8.w, r7.y +add.f r7.z, c17.y, (neg)r7.z +(ss)mul.f r3.x, r3.x, r8.y +mul.f r9.y, r2.z, c14.x +sam.s (f32)(x)r9.w, r6.x, s#4, t#4 +rcp r2.z, r7.y +(ss)mov.f32f32 r6.x, r0.w +mul.f r6.y, r7.z, c10.y +mov.f32f32 r6.z, r3.x +mov.f32f32 r7.y, r9.y +rcp r7.z, r8.w mov.f32f32 r4.z, r4.z -mul.f r4.y, r7.y, r4.y -mov.f32f32 r7.y, r2.x -mov.f32f32 r5.x, r5.x -mov.f32f32 r8.x, r2.x -mov.f32f32 r8.y, r2.x -mov.f32f32 r8.z, r7.y -mul.f r2.y, r2.y, c14.x -mov.f32f32 r7.y, r8.x -mov.f32f32 r8.x, r8.y -add.f r0.y, c15.y, r0.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r9.x, r7.y -rcp r7.y, r6.z -nop -(ss)rcp r6.z, r6.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r8.y, r2.y -mov.f32f32 r9.y, r2.y -mov.f32f32 r9.z, r2.y -mul.f r0.y, r0.y, c5.z -mov.f32f32 r8.w, r8.y -mov.f32f32 r9.y, r9.y -mov.f32f32 r8.y, r9.z -mov.f32f32 r9.z, r0.y -(ss)mov.f32f32 r7.y, r7.y -(ss)mov.f32f32 r6.z, r6.z -mov.f32f32 r9.y, r9.y -sam (f32)(xyzw)r9.w, r8.z, s#2, t#2 -(sy)(ss)mov.f32f32 r8.z, r9.w -add.f r8.w, c13.z, (neg)r10.x -mov.f32f32 r9.w, r10.y -sam (f32)(xyzw)r10.x, r8.x, s#0, t#0 -(sy)(ss)mul.f r8.x, r10.z, r4.z -mul.f r8.y, r8.z, r3.z -mov.f32f32 r7.w, r7.w -sam (f32)(xyzw)r10.z, r9.x, s#3, t#3 -(sy)mul.f r8.z, r10.w, r5.x -(ss)mul.f r9.x, r10.z, r5.x -mul.f r5.x, r11.x, r5.x -mul.f r4.x, r4.x, r7.w -mul.f r6.w, r6.w, r7.y -mov.f32f32 r7.y, r8.w -mul.f r7.w, r9.w, r3.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r8.w, r2.y -mov.f32f32 r6.w, r6.w -mov.f32f32 r9.y, r2.y -mov.f32f32 r7.y, r7.y -mov.f32f32 r10.z, r8.w -bary.f r8.w, 22, r1.x -mov.f32f32 r11.x, r9.y -mul.f r3.z, r7.y, r3.z -rcp r7.y, r5.z -(ss)mov.f32f32 r7.y, r7.y -mov.f32f32 r8.w, r8.w -mov.f32f32 r9.y, r9.z -mul.f r10.y, r10.y, r4.z -mul.f r4.z, r10.x, r4.z -mul.f r8.w, r8.w, c14.x -mul.f r6.y, r6.y, r7.y -mov.f32f32 r6.x, r6.x -mul.f r3.x, r3.x, r6.z -mov.f32f32 r6.z, r8.w -mov.f32f32 r6.y, r6.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r9.z, r6.x -mov.f32f32 r6.x, r6.z -mov.f32f32 r7.y, r6.z -mov.f32f32 r11.z, r2.y -mov.f32f32 r2.y, r6.z -mov.f32f32 r10.w, r6.x -mov.f32f32 r11.y, r7.y -mov.f32f32 r6.x, r7.z -mov.f32f32 r11.w, r2.y -mov.f32f32 r2.y, r3.x -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r9.w, r6.x -sam (f32)(xyzw)r12.x, r10.z, s#2, t#2 -(sy)mov.f32f32 r3.x, r12.y -(ss)nop -sam (f32)(xyzw)r10.z, r11.x, s#3, t#3 -(sy)mad.f32 r6.x, r10.w, r6.w, r8.z -mad.f32 r7.y, r10.z, r6.w, r9.x -mad.f32 r5.x, r11.x, r6.w, r5.x -mad.f32 r3.x, r3.x, r4.x, r8.y -rcp r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -mov.f32f32 r6.w, r2.x -add.f r8.y, c13.z, (neg)r12.x -mov.f32f32 r8.z, r12.z -mul.f r3.w, r3.w, r5.y -mov.f32f32 r8.w, r6.w -mov.f32f32 r5.y, r6.z -mov.f32f32 r6.w, r8.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r8.y, r2.x -mov.f32f32 r9.x, r5.y -mov.f32f32 r5.y, r6.w -mad.f32 r6.w, r8.z, r4.x, r7.w -mov.f32f32 r8.y, r8.y -mov.f32f32 r7.w, r6.z -mad.f32 r3.z, r5.y, r4.x, r3.z -sam (f32)(xyzw)r10.z, r11.z, s#0, t#0 -(sy)mad.f32 r4.x, r10.w, r6.y, r10.y -(ss)nop -sam (f32)(xyzw)r11.y, r8.w, s#3, t#3 -(sy)mad.f32 r5.x, r11.w, r2.y, r5.x -mov.f32f32 r8.z, r7.w -mad.f32 r5.y, r11.z, r2.y, r6.x -mad.f32 r2.y, r11.y, r2.y, r7.y -mov.f32f32 r5.x, r5.x -mad.f32 r6.x, r11.x, r6.y, r8.x -rcp r5.z, r5.z +rcp r7.w, r8.z mov.f32f32 r5.y, r5.y -mov.f32f32 r2.y, r2.y -sam (f32)(xyzw)r7.w, r8.y, s#2, t#2 -(sy)mov.f32f32 r7.y, r8.x -mul.f r5.x, c7.z, r5.x -mul.f r5.y, c7.y, r5.y -mul.f r2.y, c7.x, r2.y -mad.f32 r3.x, r7.y, r3.w, r3.x -add.f r7.y, c13.z, (neg)r7.w -mov.f32f32 r7.w, r8.y -(ss)mov.f32f32 r5.z, r5.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r7.y, r7.y -mad.f32 r6.w, r7.w, r3.w, r6.w -mul.f r2.z, r2.z, r5.z -mad.f32 r3.x, c14.y, r3.x, c14.z -mov.f32f32 r5.z, r7.y -mov.f32f32 r6.w, r6.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.x, r3.x -bary.f r7.y, 4, r1.x -bary.f r7.w, 5, r1.x -bary.f r8.x, 6, r1.x -mad.f32 r3.z, r5.z, r3.w, r3.z -mul.f r3.w, r7.y, r3.x -mul.f r5.z, r7.w, r3.x -mul.f r3.x, r8.x, r3.x -mov.f32f32 r3.z, r3.z -mad.f32 r6.w, c14.y, r6.w, c14.z -mov.f32f32 r2.x, r2.x -mad.f32 r4.z, r10.z, r6.y, r4.z -mad.f32 r3.z, c14.y, r3.z, c14.z -mov.f32f32 r6.y, r6.w -mov.f32f32 r7.w, r2.x -mov.f32f32 r2.x, r6.z -mov.f32f32 r3.z, r3.z -bary.f r6.z, 10, r1.x -bary.f r6.w, 11, r1.x -bary.f r7.y, 12, r1.x -mov.f32f32 r8.x, r2.x -mov.f32f32 r2.x, r6.z -bary.f r6.z, 7, r1.x +rcp r8.y, r8.w +mov.f32f32 r5.x, r5.x +mov.f32f32 r10.x, r7.y +(ss)bary.f r8.w, 24, r1.x +(ss)mul.f r4.z, r4.z, r7.z +mul.f r5.y, r5.y, r7.w +mov.f32f32 r10.z, r7.y +mul.f r11.x, r8.w, c14.x +mov.f32f32 r7.w, r4.z +mov.f32f32 r8.w, r5.y +mul.f r5.x, r5.x, r8.y +mov.f32f32 r7.z, r11.x +add.f r3.w, c14.y, (neg)r3.w +mul.f r0.z, r0.z, c13.z +mul.f r2.z, r4.y, r2.z +mov.f32f32 r10.y, r7.z +mov.f32f32 r10.w, r7.z +mov.f32f32 r4.y, r5.x +sam (f32)(xyzw)r11.y, r7.y, s#0, t#0 +(sy)mul.f r8.y, r11.w, r8.w +rcp r8.z, r8.z mov.f32f32 r6.w, r6.w -mov.f32f32 r7.y, r7.y -sam.s (f32)(x)r8.y, r9.y, s#4, t#4 -(sy)mov.f32f32 r8.y, r8.y -mul.f r2.x, r2.x, (neg)r6.z -mul.f r6.w, r6.w, (neg)r6.z -mul.f r6.z, r7.y, (neg)r6.z +mul.f r8.w, r11.z, r8.w +mul.f r5.y, r11.y, r5.y +sam (f32)(xyzw)r11.y, r10.x, s#2, t#2 +(sy)(ss)mul.f r10.x, r11.y, r6.z +rcp r8.x, r8.x +mov.f32f32 r5.z, r5.z +sam (f32)(xyzw)r12.x, r10.z, s#3, t#3 +(sy)mul.f r10.y, r12.y, r7.w +add.f r9.z, c13.z, (neg)r11.z +(ss)mul.f r10.z, r12.z, r7.w +(ss)mul.f r5.z, r5.z, r8.x +mov.f32f32 r12.y, r7.z +bary.f r7.w, 22, r1.x +mul.f r6.z, r9.z, r6.z +mov.f32f32 r8.x, r5.z +mul.f r6.w, r6.w, r8.z +mul.f r9.z, r7.w, c14.x +mul.f r4.z, r12.x, r4.z +mul.f r3.x, r11.w, r3.x +mov.f32f32 r8.z, r6.w +mov.f32f32 r11.y, r9.z +mov.f32f32 r10.w, r3.w +add.f r0.z, r0.z, r6.y +mov.f32f32 r6.y, r2.z +mov.f32f32 r7.w, r11.y +mov.f32f32 r12.z, r11.y +mul.f r6.x, r6.x, r10.w +sam (f32)(xyzw)r12.w, r11.x, s#0, t#0 +(sy)mad.f32 r8.y, r13.y, r8.z, r8.y +rcp r7.x, r7.x +(ss)mul.f r0.x, r0.x, r7.x +(ss)mad.f32 r7.x, r13.x, r8.z, r8.w +mad.f32 r5.y, r12.w, r6.w, r5.y +sam (f32)(xyzw)r12.w, r7.z, s#2, t#2 +(sy)mad.f32 r6.w, r13.x, r8.x, r10.x +rcp r4.w, r4.w +(ss)mul.f r2.w, r2.w, r4.w +sam (f32)(xyzw)r11.z, r12.y, s#3, t#3 +(sy)(ss)mad.f32 r4.w, r11.w, r4.y, r10.y +add.f r7.z, c13.z, (neg)r12.w +mad.f32 r4.y, r12.x, r4.y, r10.z +mov.f32f32 r8.z, r2.w +mov.f32f32 r10.x, r7.y +mov.f32f32 r10.y, r11.y +mov.f32f32 r10.z, r7.y +mov.f32f32 r10.w, r11.y +mad.f32 r6.z, r7.z, r8.x, r6.z +mov.f32f32 r7.y, r0.x +sam (f32)(xyzw)r11.w, r9.y, s#0, t#0 +mad.f32 r4.z, r11.z, r5.x, r4.z +mad.f32 r3.x, r13.y, r5.z, r3.x +sam (f32)(xyzw)r12.z, r10.x, s#2, t#2 +(sy)mad.f32 r5.x, r12.w, r8.z, r6.w +mad.f32 r5.z, r12.y, r7.y, r8.y (ss)nop -sam (f32)(xyzw)r8.z, r7.w, s#0, t#0 -(sy)mad.f32 r6.x, r9.x, r2.z, r6.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r6.w, r6.w -mov.f32f32 r6.z, r6.z -mov.f32f32 r6.x, r6.x -mad.f32 r2.x, r2.x, r3.z, r3.w -mad.f32 r3.w, r6.w, r3.z, r5.z -mad.f32 r3.x, r6.z, r3.z, r3.x -mad.f32 r3.z, r8.w, r2.z, r4.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r3.w -mad.f32 r0.x, r0.x, r6.y, r2.x -mad.f32 r2.x, r2.w, r6.y, r3.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mad.f32 r2.w, r3.y, r6.y, r2.w -nop -mul.f r3.y, r0.x, r0.x -mad.f32 r2.z, r8.z, r2.z, r4.z -mad.f32 r3.y, r2.x, r2.x, r3.y -mov.f32f32 r2.w, r2.w -add.f r1.w, r1.w, c13.z -add.f r3.z, c17.y, (neg)r0.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mad.f32 r3.y, r2.w, r2.w, r3.y -mul.f r3.w, r1.w, r7.x -mul.f r3.z, r3.z, c10.y -mul.f r0.z, r0.z, c13.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.w -add.f r1.z, c15.y, r1.z -rsq r3.y, r3.y -(ss)mov.f32f32 r3.y, r3.y -mad.f32 r3.w, r3.w, r8.y, r4.y -add.f r0.z, r0.z, r3.z -mov.f32f32 r0.y, r0.y -mul.f r0.x, r0.x, r3.y -mul.f r2.x, r2.x, r3.y -mul.f r2.w, r2.w, r3.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.y, r3.w -mul.f r3.z, r0.x, r0.x -mul.f r3.w, (neg)c8.x, r0.x -mad.f32 r3.z, r2.x, r2.x, r3.z -mad.f32 r3.w, (neg)c8.y, r2.x, r3.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mad.f32 r3.z, r2.w, r2.w, r3.z -mad.f32 r3.w, (neg)c8.z, r2.w, r3.w -mul.f r1.z, r1.z, c5.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r6.y, r0.y -add.f r0.y, r0.w, c13.z -bary.f r0.w, 8, r1.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, r1.z -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r3.z -max.f r3.w, r3.w, c13.y -mul.f r2.x, r2.x, r3.z -mul.f r2.w, r2.w, r3.z -mov.f32f32 r0.x, r0.x -bary.f r3.z, 13, r1.x -mov.f32f32 r3.w, r3.w -bary.f r4.z, 17, r1.x -bary.f r4.w, 16, r1.x -mul.f r0.x, r0.x, r3.z -mov.f32f32 r2.x, r2.x -bary.f r3.z, 14, r1.x -bary.f r5.z, 18, r1.x -mad.f32 r6.z, c7.y, r3.w, (neg)r4.z -mad.f32 r6.w, c7.x, r3.w, (neg)r4.w -mad.f32 r0.x, r2.x, r3.z, r0.x -mad.f32 r2.x, c7.z, r3.w, (neg)r5.z -mov.f32f32 r3.z, r6.z -mov.f32f32 r3.w, r6.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r2.w -bary.f r6.z, 15, r1.x -mov.f32f32 r2.x, r2.x -mad.f32 r3.z, c11.x, r3.z, r4.z -mad.f32 r3.w, c11.x, r3.w, r4.w -mad.f32 r0.x, r2.w, r6.z, r0.x -mad.f32 r2.x, c11.x, r2.x, r5.z -mov.f32f32 r2.w, r3.z -mov.f32f32 r3.z, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.w, r7.z -max.f r0.x, c13.y, r0.x +sam (f32)(xyzw)r10.x, r10.z, s#3, t#3 +(sy)mad.f32 r4.y, r10.z, r6.y, r4.y +mad.f32 r4.w, r10.y, r6.y, r4.w +mad.f32 r5.x, c14.y, r5.x, c14.z +bary.f r6.y, 6, r1.x +mul.f r4.y, c7.z, r4.y +mul.f r4.w, c7.y, r4.w +mov.f32f32 r6.w, r5.x +bary.f r7.z, 4, r1.x +bary.f r8.x, 5, r1.x +mul.f r5.x, r6.y, r5.x +add.f r6.y, c13.z, (neg)r12.z +mul.f r7.z, r7.z, r6.w +mul.f r6.w, r8.x, r6.w +mov.f32f32 r8.y, r5.z +mad.f32 r6.y, r6.y, r8.z, r6.z +mad.f32 r6.z, r12.x, r7.y, r7.x +mad.f32 r2.z, r10.x, r2.z, r4.z +mad.f32 r2.w, r13.x, r2.w, r3.x +mad.f32 r3.x, c14.y, r6.y, c14.z +bary.f r4.z, 12, r1.x +bary.f r6.y, 7, r1.x +mov.f32f32 r7.x, r6.z +mov.f32f32 r7.y, r3.x +bary.f r8.x, 10, r1.x +bary.f r8.z, 11, r1.x +mul.f r4.z, r4.z, (neg)r6.y +mul.f r9.y, c7.x, r2.z +mul.f r2.z, r8.x, (neg)r6.y +mul.f r6.y, r8.z, (neg)r6.y +mad.f32 r3.x, r4.z, r3.x, r5.x +mad.f32 r2.w, c14.y, r2.w, c14.z +mad.f32 r2.z, r2.z, r7.y, r7.z +mad.f32 r4.z, r6.y, r7.y, r6.w +mad.f32 r0.x, r11.w, r0.x, r5.y +mov.f32f32 r5.x, r2.w +mad.f32 r2.w, r4.x, r2.w, r3.x +(rpt1)nop +mad.f32 r1.w, r1.w, r5.x, r2.z +mad.f32 r2.z, r3.z, r5.x, r4.z +(rpt1)nop +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.z, r2.z +mov.f32f32 r4.x, r2.w +mov.f32f32 r4.z, r0.x +mul.f r1.w, r1.w, r3.x +mul.f r5.x, r6.x, r9.w +mad.f32 r1.w, r2.z, r3.z, r1.w +add.f r0.y, c15.y, r0.y +mad.f32 r1.w, r4.x, r4.x, r1.w max.f r0.z, r0.z, c13.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.w -mov.f32f32 r0.x, r0.x +add.f r1.z, c15.y, r1.z +mul.f r8.z, r0.y, c5.z +add.f r0.y, r2.x, c13.z +add.f r2.x, r3.y, c13.z +bary.f r2.z, 9, r1.x +rsq r1.w, r1.w +(ss)mov.f32f32 r3.y, r1.w +(ss)mul.f r1.w, r2.w, r1.w +mov.f32f32 r7.z, r8.z min.f r0.z, r0.z, c13.z -mov.f32f32 r1.z, r1.z -mul.f r3.w, r5.w, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.y, r1.w, r0.y -sam.s (f32)(x)r1.w, r4.x, s#4, t#4 -(sy)mov.f32f32 r1.w, r1.w -log2 r0.x, r0.x -(ss)mul.f r0.x, c11.y, r0.x -add.f r4.x, c17.y, (neg)r0.z -add.f r4.y, c17.y, (neg)r0.z -add.f r4.z, c17.y, (neg)r0.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.w, r3.w, r1.w, r3.y -mul.f r3.y, r4.x, c9.z -mul.f r3.w, r4.y, c9.y -mul.f r4.x, r4.z, c9.x -mov.f32f32 r1.w, r1.w +mul.f r2.w, r3.x, r3.y +mul.f r3.x, r3.z, r3.y +mov.f32f32 r3.y, r1.w +mov.f32f32 r8.x, r9.x +mov.f32f32 r3.z, r2.w +mul.f r2.w, (neg)c8.x, r2.w +mov.f32f32 r4.x, r3.x +mad.f32 r2.w, (neg)c8.y, r3.x, r2.w +mul.f r3.x, r3.z, r3.z +mad.f32 r1.w, (neg)c8.z, r1.w, r2.w +mad.f32 r2.w, r4.x, r4.x, r3.x +sam.s (f32)(x)r7.y, r7.z, s#4, t#4 +mul.f r3.x, r0.y, r3.w +mad.f32 r2.w, r3.y, r3.y, r2.w +max.f r1.w, r1.w, c13.y +bary.f r3.w, 16, r1.x +(sy)mad.f32 r3.x, r3.x, r7.y, r5.x +add.f r5.x, c17.y, (neg)r0.z +add.f r5.y, c17.y, (neg)r0.z +add.f r6.x, c17.y, (neg)r0.z +rsq r2.w, r2.w +(ss)mov.f32f32 r6.y, r2.w +mov.f32f32 r6.w, r1.w +bary.f r7.y, 17, r1.x +(ss)bary.f r7.z, 18, r1.x +mul.f r3.z, r3.z, r6.y +bary.f r7.w, 13, r1.x +mad.f32 r8.x, c7.z, r6.w, (neg)r7.z +mad.f32 r6.w, c7.y, r6.w, (neg)r7.y +mul.f r4.x, r4.x, r6.y +mul.f r3.z, r3.z, r7.w +bary.f r6.y, 14, r1.x +mad.f32 r7.z, c11.x, r8.x, r7.z +mad.f32 r6.w, c11.x, r6.w, r7.y +mul.f r2.w, r3.y, r2.w +mad.f32 r3.y, r4.x, r6.y, r3.z +bary.f (ei)r1.x, 15, r1.x +mad.f32 r1.y, c7.x, r1.w, (neg)r3.w +(rpt1)nop +mad.f32 r1.x, r2.w, r1.x, r3.y +mad.f32 r1.y, c11.x, r1.y, r3.w +mul.f r8.w, r1.z, c5.w +mul.f r1.z, r5.x, c9.z +max.f r1.x, c13.y, r1.x +mul.f r1.w, r5.y, c9.y +mul.f r3.y, r6.x, c9.x +mov.f32f32 r6.x, r8.w +mov.f32f32 r6.y, r9.x nop -exp2 r0.x, r0.x -(ss)mul.f r4.y, r5.x, r0.x -mul.f r4.z, r5.y, r0.x -mad.f32 r2.x, r6.x, r2.x, r4.y -mad.f32 r2.w, r3.x, r2.w, r4.z -(ss)mul.f r0.x, r2.y, r0.x -mov.f32f32 r6.z, r1.z -mov.f32f32 r1.z, r2.x -mov.f32f32 r2.x, r7.z -mov.f32f32 r2.y, r2.w -mad.f32 r0.x, r2.z, r3.z, r0.x -mov.f32f32 r4.y, r0.w -mov.f32f32 r6.w, r2.x -bary.f (ei)r0.w, 9, r1.x -mov.f32f32 r1.x, c13.z -mov.f32f32 r1.y, c13.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r5.z, r1.x -sam.s (f32)(x)r1.x, r6.y, s#4, t#4 -(sy)mov.f32f32 r1.x, r1.x -(rpt2)nop -mad.f32 r0.y, r0.y, r1.x, r1.w -mov.f32f32 r4.z, r0.w +sam.s (f32)(x)r9.z, r8.z, s#4, t#4 +mul.f r0.w, r0.w, r2.x +log2 r1.x, r1.x +(ss)mul.f r1.x, c11.y, r1.x +mul.f r0.y, r0.y, r2.x +sam (f32)(w)r9.w, r2.y, s#1, t#1 +(sy)cmps.f.lt r2.x, r10.z, c16.x +mov.f32f32 r2.w, c13.z +(ss)mov.f32f32 r2.y, c13.y (rpt1)nop -mov.f32f32 r0.y, r0.y -(rpt2)nop -mul.f r0.y, c15.w, r0.y -sam (f32)(w)r6.y, r4.y, s#1, t#1 -(sy)mov.f32f32 r0.w, r7.x -cmps.f.lt r1.x, r7.x, c16.x +exp2 r1.x, r1.x +(ss)mul.f r2.z, r4.y, r1.x +mul.f r3.z, r4.w, r1.x +mad.f32 r2.z, r8.y, r7.z, r2.z +sam.s (f32)(x)r7.y, r5.w, s#4, t#4 +(sy)mad.f32 r0.w, r0.w, r7.y, r3.x +mad.f32 r3.x, r7.x, r6.w, r3.z +mad.f32 r0.y, r0.y, r9.z, r0.w +mul.f r0.w, r9.y, r1.x +(ss)cov.u32f32 r1.x, r2.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -cov.u32f32 r1.x, r1.x +mul.f r0.y, c15.w, r0.y +mad.f32 r0.w, r4.z, r1.y, r0.w +cmps.f.ne r1.x, r1.x, c13.y nop -mul.f r1.z, r1.z, r0.y -mul.f r1.w, r2.y, r0.y -mul.f r0.x, r0.x, r0.y -cmps.f.ne r0.y, r1.x, c13.y -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mad.f32 r1.x, c6.z, r6.x, r1.x -mad.f32 r1.z, c6.y, r3.x, r1.z -mov.f32f32 r0.x, r0.x -sel.b32 r0.y, r1.y, r0.y, r0.w -mov.f32f32 r0.w, r1.x -mov.f32f32 r1.x, r1.z -mad.f32 r0.x, c6.x, r2.z, r0.x +mov.f32f32 r1.y, r0.y +mul.f r0.y, r0.w, r0.y +sel.b32 r0.w, r2.y, r1.x, r10.z nop -mul.f r0.w, r0.z, r0.w -mul.f r1.x, r0.z, r1.x -mov.f32f32 r0.x, r0.x +mul.f r1.x, r2.z, r1.y +mul.f r1.y, r3.x, r1.y +mad.f32 r1.x, c6.z, r5.z, r1.x +mad.f32 r1.y, c6.y, r6.z, r1.y +mad.f32 r0.x, c6.x, r0.x, r0.y nop -add.f r0.w, r0.w, r3.y -add.f r1.x, r1.x, r3.w +mul.f r0.y, r0.z, r1.x +mul.f r1.x, r0.z, r1.y mul.f r0.x, r0.z, r0.x nop -mul.f r0.z, r0.w, r0.y -mul.f r0.w, r1.x, r0.y -add.f r0.x, r0.x, r4.x +add.f r0.y, r0.y, r1.z +add.f r0.z, r1.x, r1.w +add.f r0.x, r0.x, r3.y nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, r0.y +mul.f r2.z, r0.y, r0.w +mul.f r2.y, r0.z, r0.w +mul.f r2.x, r0.x, r0.w +end nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x nop -mov.f32f32 r5.y, r0.y -mov.f32f32 r5.x, r0.z -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r4.w, r0.x -end -; FRAG: outputs: r4.w (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r5.w (5:10,cm=f,il=12,b=1) r6.x (5:11,cm=f,il=16,b=1) r5.w (5:12,cm=f,il=20,b=1) r63.y (5:13,cm=f,il=24,b=1) r1.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 539 instructions, 0 half, 65 full -; pos (bary): r1.x -; color: r4.w -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r5.z (5:10,cm=f,il=12,b=1) r7.w (5:11,cm=f,il=16,b=1) r0.y (5:12,cm=f,il=20,b=1) r63.y (5:13,cm=f,il=24,b=1) r1.w (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 354 instructions, 0 half, 14 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm index 54e5b92..4dd0acb 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm @@ -1,204 +1,145 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@out(r8.w) out0 -@out(r9.x) out1 -@out(r9.y) out2 -@out(r9.z) out3 -@out(r5.x) out4 -@out(r5.y) out5 -@out(r5.z) out6 -@out(r5.w) out7 -@out(r3.z) out8 -@out(r3.w) out9 -@out(r4.x) out10 -@out(r4.y) out11 -@out(r7.w) out12 -@out(r8.x) out13 -@out(r8.y) out14 -@out(r8.z) out15 -@out(r6.w) out16 -@out(r7.x) out17 -@out(r7.y) out18 -@out(r7.z) out19 -@out(r1.w) out20 -@out(r2.x) out21 -@out(r2.y) out22 -@out(r2.z) out23 -@out(r10.w) out24 -@out(r11.x) out25 -@out(r11.y) out26 -@out(r11.z) out27 -@out(r9.w) out28 -@out(r10.x) out29 -@out(r10.y) out30 -@out(r10.z) out31 -(sy)(ss)add.f r2.y, c4.x, (neg)r0.x -mul.f r2.z, r0.w, r0.w -mul.f r2.w, c8.w, r0.x -mul.f r3.x, c8.z, r0.x -mul.f r3.y, r2.y, r2.y -add.f r3.z, c4.y, (neg)r0.y -add.f r2.z, c13.x, (neg)r2.z -mad.f32 r2.w, c9.w, r0.y, r2.w -mad.f32 r3.x, c9.z, r0.y, r3.x -mad.f32 r3.y, r3.z, r3.z, r3.y -mov.f32f32 r2.z, r2.z -mad.f32 r2.w, c10.w, r0.z, r2.w -mad.f32 r3.x, c10.z, r0.z, r3.x -mov.f32f32 r3.y, r3.y -add.f r3.w, c4.z, (neg)r0.z -mul.f r4.x, r2.z, r2.z -mul.f r4.y, r1.x, r0.w -add.f r2.w, r2.w, c11.w -mad.f32 r3.y, r3.w, r3.w, r3.y -add.f r3.x, r3.x, c11.z -mul.f r4.z, c8.y, r0.x -mul.f r4.w, c8.x, r0.x -add.f r4.y, c13.y, (neg)r4.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -rsq r3.y, r3.y -(ss)mov.f32f32 r3.y, r3.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r5.w, r2.w -mov.f32f32 r5.z, r3.x -mad.f32 r2.y, r2.y, r3.y, (neg)c5.x -mad.f32 r2.w, r4.y, r4.y, r4.x -mad.f32 r3.x, r3.z, r3.y, (neg)c5.y -mad.f32 r3.y, r3.w, r3.y, (neg)c5.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -mul.f r3.z, r1.y, r0.w -mov.f32f32 r3.x, r3.x -mul.f r3.w, r2.y, r2.y -mov.f32f32 r3.y, r3.y -add.f r3.z, c13.y, (neg)r3.z -mad.f32 r3.w, r3.x, r3.x, r3.w -mad.f32 r4.x, c9.y, r0.y, r4.z -mad.f32 r4.z, c9.x, r0.y, r4.w -mad.f32 r4.x, c10.y, r0.z, r4.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -mad.f32 r3.w, r3.y, r3.y, r3.w -add.f r4.x, r4.x, c11.y -mad.f32 r4.z, c10.x, r0.z, r4.z -mul.f r4.w, c0.w, r0.x -mul.f r6.x, c0.z, r0.x -mul.f r6.y, c0.y, r0.x -mul.f r6.z, c0.x, r0.x -rsq r3.w, r3.w -(ss)mov.f32f32 r3.w, r3.w -mad.f32 r2.w, r3.z, r3.z, r2.w -mul.f r4.x, r4.x, c12.y -add.f r4.z, r4.z, c11.x -mul.f r3.y, r3.y, r3.w -mul.f r3.x, r3.x, r3.w -mul.f r2.y, r2.y, r3.w -mov.f32f32 r5.y, r4.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.y, r2.y -rsq r2.w, r2.w -(ss)mov.f32f32 r2.w, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.y, r2.y +@in(r6.z) in0 +@in(r6.w) in1 +@in(r7.x) in2 +@in(r5.w) in4 +@in(r6.x) in5 +@in(r6.y) in6 +@in(r2.w) in8 +@in(r3.x) in9 +@in(r3.y) in10 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@out(r7.x) out28 +@out(r7.y) out29 +@out(r7.z) out30 +@out(r7.w) out31 +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r6.z +mul.f r0.y, r5.w, r5.w +mul.f r0.z, c8.y, r6.z +mul.f r0.w, c8.x, r6.z +mul.f r1.x, r0.x, r0.x +add.f r1.z, c4.y, (neg)r6.w +add.f r0.y, c13.x, (neg)r0.y +mad.f32 r0.z, c9.y, r6.w, r0.z +mad.f32 r0.w, c9.x, r6.w, r0.w +mad.f32 r1.x, r1.z, r1.z, r1.x +add.f r1.w, c4.z, (neg)r7.x +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.x, r0.z +mad.f32 r0.w, c10.x, r7.x, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.x, r5.w +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.y, r5.w +mul.f r2.z, c8.w, r6.z +rsq r1.x, r1.x +(ss)mov.f32f32 r3.z, r1.x +add.f r3.w, c13.y, (neg)r1.y +mad.f32 r0.x, r0.x, r1.x, (neg)c5.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r3.z, (neg)c5.y +mov.f32f32 r1.z, r3.w +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r3.z, (neg)c5.z +mov.f32f32 r3.z, r0.z +mad.f32 r2.x, r3.w, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r3.z, r1.x +mov.f32f32 r3.w, r1.w +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r6.w, r2.z +mad.f32 r0.z, r1.w, r3.w, r0.z +mad.f32 r1.w, c10.w, r7.x, r2.z +mul.f r2.z, c8.z, r6.z +mul.f r4.x, c0.w, r6.z +mul.f r5.x, c0.z, r6.z +mul.f r5.y, c0.y, r6.z +mul.f r5.z, c0.x, r6.z +rsq r0.z, r0.z +(ss)mov.f32f32 r4.z, r0.z +mul.f r4.y, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r4.w, r3.w, r4.z +mul.f r4.z, r3.z, r4.z +(ss)mad.f32 r0.z, c9.z, r6.w, r2.z +mad.f32 r2.y, c1.w, r6.w, r4.x +mad.f32 r5.x, c1.z, r6.w, r5.x +rsq r0.x, r0.x +(ss)mov.f32f32 r3.z, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.x, r0.z +mad.f32 r0.y, c2.w, r7.x, r2.y +mul.f r2.z, r0.w, r3.z +mul.f r2.y, r1.z, r3.z +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r4.x, r2.y +mul.f r7.y, r6.x, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r5.w, r0.x +mul.f r3.z, r6.y, r4.x +mad.f32 r3.w, r6.y, r0.z, (neg)r0.y +mad.f32 r3.z, r6.x, r0.x, (neg)r3.z +mad.f32 r4.x, r5.w, r4.x, (neg)r7.y +mad.f32 r0.x, c2.z, r7.x, r5.x +mad.f32 r0.y, c1.y, r6.w, r5.y +mad.f32 r5.x, c1.x, r6.w, r5.z +mad.f32 r0.y, c2.y, r7.x, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.x, r5.x nop -mov.f32f32 r7.z, r3.y -mov.f32f32 r7.y, r3.x -mov.f32f32 r7.x, r2.y -mul.f r2.y, r2.z, r2.w -mul.f r2.z, r3.z, r2.w -mul.f r2.w, r4.y, r2.w -mul.f r3.x, r4.z, c12.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r5.x, r3.x -mul.f r3.x, r1.x, r2.y -mul.f r3.y, r0.w, r2.z -mad.f32 r3.x, r0.w, r2.w, (neg)r3.x -mad.f32 r3.y, r1.y, r2.y, (neg)r3.y -mul.f r3.z, r1.y, r2.w -mov.f32f32 r3.w, r2.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r2.z, r1.x, r2.z, (neg)r3.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r8.z, r3.y -mov.f32f32 r2.z, r2.z -nop -mov.f32f32 r6.w, r3.x -mov.f32f32 r3.w, r2.w -mov.f32f32 r8.y, r2.z -mov.f32f32 r2.y, r2.y -mad.f32 r2.z, c1.w, r0.y, r4.w -mad.f32 r2.w, c1.z, r0.y, r6.x -mad.f32 r3.x, c1.y, r0.y, r6.y -mov.f32f32 r3.z, r2.y -mad.f32 r2.y, c2.w, r0.z, r2.z -mad.f32 r2.z, c2.z, r0.z, r2.w -mad.f32 r2.w, c2.y, r0.z, r3.x -mad.f32 r3.x, c1.x, r0.y, r6.z -add.f r2.y, r2.y, c3.w -add.f r2.z, r2.z, c3.z -add.f r2.w, r2.w, c3.y -mad.f32 r3.x, c2.x, r0.z, r3.x -mov.f32f32 r9.z, r2.y -mov.f32f32 r9.y, r2.z -mov.f32f32 r9.x, r2.w -add.f r2.y, r3.x, c3.x -mov.f32f32 r2.z, (0.000000) -mov.f32f32 r2.w, (0.000000) -mov.f32f32 r3.x, (0.000000) -mov.f32f32 r8.w, r2.y -mov.f32f32 r10.z, r2.z -mov.f32f32 r10.y, r2.w -mov.f32f32 r10.x, r3.x -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r0.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r9.w, r2.y -mov.f32f32 r11.z, r0.y -mov.f32f32 r11.y, r2.z -mov.f32f32 r11.x, r1.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -mul.f r1.x, r2.x, c6.z -mul.f r1.y, r1.w, c6.y -mov.f32f32 r10.w, r0.y -mov.f32f32 r2.z, r0.w -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.x, r1.y -mul.f r0.y, r1.z, c6.x -mad.f32 r0.z, c7.x, r0.z, c7.y -mad.f32 r0.x, c7.x, r0.x, c7.y -mov.f32f32 r0.w, c13.z -mov.f32f32 r1.w, r0.y -mov.f32f32 r8.x, r0.z -mov.f32f32 r7.w, r0.x -mov.f32f32 r4.y, r0.w +add.f r0.y, r0.y, c3.y +mov.f32f32 r7.w, (0.000000) +add.f r0.x, r0.x, c3.x +mov.f32f32 r7.z, (0.000000) +mov.f32f32 r7.y, (0.000000) +mul.f r5.z, r3.y, c6.z +mul.f r5.y, r3.x, c6.y +mul.f r5.x, r2.w, c6.x +mad.f32 r3.y, c7.x, r7.x, c7.y +mad.f32 r3.x, c7.x, r6.z, c7.y +mov.f32f32 r2.w, c13.z end nop nop nop -; VERT: outputs: r8.w (0:0) r5.x (5:9) r3.z (5:10) r7.w (5:11) r6.w (5:12) r1.w (5:13) r10.w (5:14) r9.w (5:15) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) -; VERT: 153 instructions, 0 half, 12 full -; pos: r8.w +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) +; VERT: inputs: r6.z (0:0,cm=7,il=8,b=0) r5.w (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) +; VERT: 93 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm index 1058343..18b73b5 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm @@ -4,326 +4,227 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r5.w) out0 -@out(r6.x) out1 -@out(r6.y) out2 -@out(r6.z) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0x40000000 +@const(c14.x) 0xbf800000, 0xba03126f, 0xbf000000, 0x3f800000 +@const(c15.x) 0x3fb8aa65, 0x3de38866, 0x3cf5c28f, 0x00000000 +@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 10, r1.x -bary.f r0.y, 0, r1.x +bary.f r1.z, 0, r1.x add.f r0.w, r0.w, c13.y -bary.f r1.z, 1, r1.x -mov.f32f32 r1.w, r0.x -add.f r2.x, r0.y, c14.z -bary.f r2.y, 8, r1.x +bary.f r1.w, 1, r1.x +mov.f32f32 r2.x, r0.x +bary.f r0.y, 11, r1.x add.f r2.z, r1.z, c14.z -mov.f32f32 r2.w, r1.w -bary.f r1.w, 11, r1.x -floor.f r3.x, r2.x +add.f r2.w, r1.w, c14.z +bary.f r3.x, 8, r1.x +mov.f32f32 r2.y, r0.y +floor.f r3.z, r2.z rcp r0.w, r0.w add.f r0.z, r0.z, c13.y -floor.f r3.y, r2.z -mov.f32f32 r3.z, r1.w -add.f r2.x, r2.x, (neg)r3.x +floor.f r3.w, r2.w +bary.f r3.y, 9, r1.x +add.f r2.z, r2.z, (neg)r3.z (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.z, (neg)r3.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -absneg.f r2.z, (neg)c10.x -mov.f32f32 r0.w, r0.w -mul.f r3.y, c13.x, r2.x -add.f r3.z, c13.w, (neg)r2.x -sam (f32)(xyz)r3.w, r2.w, s#2, t#2 -(sy)(ss)mad.f32 r2.w, c13.w, r3.w, c14.x -mul.f r2.z, r2.z, c10.x -mov.f32f32 r3.x, r3.y -mul.f r3.y, c13.x, r0.w -mov.f32f32 r2.w, r2.w +sam (f32)(xyz)r4.x, r2.x, s#2, t#2 +(sy)(ss)mad.f32 r0.w, c13.w, r4.x, c14.x +absneg.f r2.x, (neg)c10.x +mov.f32f32 r2.y, r2.z +add.f r2.w, r2.w, (neg)r3.w +mov.f32f32 r3.z, r0.w bary.f r3.w, 4, r1.x -add.f r0.y, r0.y, (neg)r3.x -mul.f r2.z, r2.z, r0.z -mov.f32f32 r3.x, r3.y -mul.f r3.y, r3.w, r2.w -mad.f32 r3.w, c13.w, r4.x, c14.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -add.f r1.z, r1.z, (neg)r3.x -mov.f32f32 r3.x, r3.w -bary.f r3.w, 12, r1.x -bary.f r4.x, 7, r1.x -add.f r4.z, c14.z, r0.y -mul.f r0.z, r2.z, r0.z -add.f r0.y, c14.w, r0.y -mul.f r2.z, r3.w, (neg)r4.x -mov.f32f32 r3.w, r4.z +mul.f r4.x, c13.x, r2.y +mul.f r2.x, r2.x, c10.x +mov.f32f32 r4.w, r2.w +mul.f r3.w, r3.w, r3.z +mad.f32 r4.y, c13.w, r4.y, c14.x +add.f r1.z, r1.z, (neg)r4.x +mul.f r2.x, r2.x, r0.z mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mad.f32 r2.z, r2.z, r3.x, r3.y -mul.f r3.y, r3.w, c5.z +mov.f32f32 r4.x, r4.y +bary.f r5.x, 12, r1.x +bary.f r5.y, 7, r1.x +mov.f32f32 r5.z, r1.z +mul.f r0.z, r2.x, r0.z +mul.f r2.x, c13.x, r4.w +mul.f r5.x, r5.x, (neg)r5.y +add.f r5.z, c14.z, r5.z mul.f r0.z, r0.z, c15.x -mul.f r0.y, r0.y, c5.z -mov.f32f32 r2.z, r2.z -mad.f32 r3.w, c13.w, r4.y, c14.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.z, r0.y -mov.f32f32 r3.w, r3.w -bary.f r4.w, 21, r1.x -mov.f32f32 r5.x, r4.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.y, r4.z -mad.f32 r2.z, r4.w, r3.w, r2.z -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -add.f r4.z, c14.z, r1.z -mov.f32f32 r5.w, r4.y -mov.f32f32 r2.z, r2.z -add.f r4.y, c16.y, (neg)r0.z -mov.f32f32 r4.z, r4.z +add.f r1.w, r1.w, (neg)r2.x +mad.f32 r2.x, r5.x, r4.x, r3.w +mad.f32 r3.w, c13.w, r4.z, c14.x +mul.f r5.z, r5.z, c5.z +mov.f32f32 r4.z, r1.w add.f r1.z, c14.w, r1.z -mul.f r4.w, r2.z, r2.z -bary.f r5.y, 5, r1.x -mul.f r4.z, r4.z, c5.w -mul.f r4.y, r4.y, c10.y -mul.f r0.z, r0.z, c13.z -mul.f r5.z, r5.y, r2.w -bary.f r5.y, 13, r1.x -mov.f32f32 r6.x, r4.z -add.f r0.z, r0.z, r4.y -mov.f32f32 r1.z, r1.z -mul.f r4.y, r5.y, (neg)r4.x -mov.f32f32 r5.y, r6.x -bary.f r6.x, 2, r1.x -mov.f32f32 r0.z, r0.z -mad.f32 r4.y, r4.y, r3.x, r5.z -mul.f r1.z, r1.z, c5.w -add.f r6.z, r6.x, c14.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r4.y, r4.y -bary.f r5.z, 22, r1.x -mov.f32f32 r6.x, r6.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r6.y, r1.z -mad.f32 r4.y, r5.z, r3.w, r4.y -mov.f32f32 r5.z, r6.x +mov.f32f32 r5.x, r3.w +bary.f r5.w, 21, r1.x +mov.f32f32 r6.y, r5.z +exp2 r0.z, r0.z +(ss)mov.f32f32 r6.x, r0.z +add.f r4.z, c14.z, r4.z +mad.f32 r2.x, r5.w, r5.x, r2.x +mul.f r7.x, r1.z, c5.z +add.f r1.z, c14.w, r1.w +add.f r1.w, c16.y, (neg)r6.x +mov.f32f32 r7.w, r2.x +mul.f r8.y, r4.z, c5.w +mov.f32f32 r8.x, r7.x +mul.f r7.y, r1.z, c5.w +mul.f r1.z, r2.x, r7.w +bary.f r2.x, 5, r1.x +mov.f32f32 r6.z, r8.y +mul.f r1.w, r1.w, c10.y +(ss)mul.f r0.z, r0.z, c13.z +mul.f r2.x, r2.x, r3.z +bary.f r3.z, 13, r1.x +bary.f r4.z, 2, r1.x +add.f r0.z, r0.z, r1.w +mov.f32f32 r5.w, r7.y +mul.f r1.w, r3.z, (neg)r5.y +add.f r7.z, r4.z, c14.y max.f r0.z, r0.z, c13.y -mov.f32f32 r6.x, r6.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r6.w, r0.y -mov.f32f32 r0.y, r3.y +add.f r2.y, c13.w, (neg)r2.y +mad.f32 r1.w, r1.w, r4.x, r2.x +bary.f r2.x, 22, r1.x +mov.f32f32 r6.w, r7.z min.f r0.z, r0.z, c13.z -mad.f32 r3.y, r4.y, r4.y, r4.w -sam.s (f32)(x)r4.w, r5.x, s#4, t#4 -(sy)mov.f32f32 r4.w, r4.w -mov.f32f32 r6.x, r6.x -(ss)add.f r5.x, c16.y, (neg)r0.z -mov.f32f32 r3.y, r3.y -bary.f r5.y, 6, r1.x -mov.f32f32 r4.w, r4.w -mov.f32f32 r3.z, r3.z -add.f r5.z, c13.w, (neg)r0.w -mul.f r2.w, r5.y, r2.w -bary.f r5.y, 14, r1.x -mul.f r5.x, r5.x, c9.z -add.f r6.y, c16.y, (neg)r0.z -add.f r7.x, c16.y, (neg)r0.z -mul.f r4.x, r5.y, (neg)r4.x -mov.f32f32 r5.y, r5.z -mul.f r5.z, r6.y, c9.y -mul.f r7.z, r7.x, c9.x -mad.f32 r2.w, r4.x, r3.x, r2.w -mul.f r3.x, r3.z, r5.y -mov.f32f32 r4.x, r6.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r2.w -bary.f r6.y, 23, r1.x -mul.f r3.x, r3.x, r4.w -mov.f32f32 r7.x, r4.z -mov.f32f32 r4.z, r6.z -mad.f32 r2.w, r6.y, r3.w, r2.w -mov.f32f32 r6.y, r4.x -mov.f32f32 r7.w, r0.y -mov.f32f32 r7.y, r4.z -mov.f32f32 r0.y, r2.w -mov.f32f32 r1.z, r1.z -add.f r2.x, r2.x, c13.z -add.f r0.w, r0.w, c13.z -mad.f32 r2.w, r0.y, r0.y, r3.y -sam.s (f32)(x)r3.y, r5.w, s#4, t#4 -mov.f32f32 r8.x, r1.z -sam.s (f32)(x)r1.z, r6.w, s#4, t#4 -mov.f32f32 r3.w, r6.z -mul.f r4.x, r2.x, r5.y -mul.f r3.z, r3.z, r0.w -(sy)mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y -rsq r2.w, r2.w -(ss)mov.f32f32 r2.w, r2.w -mov.f32f32 r8.y, r3.w -mov.f32f32 r2.y, r2.y -mul.f r0.w, r2.x, r0.w -mul.f r2.x, r2.z, r2.w -mad.f32 r1.z, r4.x, r1.z, r3.x -mul.f r2.z, r4.y, r2.w -mul.f r0.y, r0.y, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -sam.s (f32)(x)r2.w, r7.w, s#4, t#4 -(sy)mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mul.f r3.x, r2.x, r2.x -mul.f r3.w, (neg)c8.x, r2.x -mad.f32 r1.z, r3.z, r2.w, r1.z -mad.f32 r2.w, r2.z, r2.z, r3.x -mad.f32 r3.x, (neg)c8.y, r2.z, r3.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r2.y -mov.f32f32 r2.y, r2.w -mov.f32f32 r2.w, r3.x -mad.f32 r2.y, r0.y, r0.y, r2.y -mad.f32 r2.w, (neg)c8.z, r0.y, r2.w -mov.f32f32 r1.z, r1.z -bary.f r3.x, 9, r1.x -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r1.w -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.w, r0.w, r3.y, r1.z -mov.f32f32 r1.z, r3.x -mul.f r2.x, r2.x, r2.y -max.f r2.w, r2.w, c13.y -mov.f32f32 r0.w, r0.w -mul.f r2.z, r2.z, r2.y -mov.f32f32 r2.x, r2.x -bary.f r3.x, 15, r1.x -mov.f32f32 r2.w, r2.w -bary.f r3.y, 19, r1.x -bary.f r3.w, 18, r1.x -mov.f32f32 r3.x, r3.x -bary.f r4.z, 20, r1.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -mul.f r2.x, r2.x, r3.x -mov.f32f32 r2.z, r2.z -bary.f r3.x, 16, r1.x -mov.f32f32 r4.z, r4.z -mad.f32 r4.w, c7.y, r2.w, (neg)r3.y -mad.f32 r5.y, c7.x, r2.w, (neg)r3.w -mov.f32f32 r3.x, r3.x -mad.f32 r2.w, c7.z, r2.w, (neg)r4.z -mov.f32f32 r4.w, r4.w +mov.f32f32 r8.z, r7.z +mad.f32 r1.w, r2.x, r5.x, r1.w +mov.f32f32 r6.x, r7.z +add.f r2.x, c16.y, (neg)r0.z +add.f r3.z, c16.y, (neg)r0.z +mov.f32f32 r4.x, r1.w +sam.s (f32)(x)r8.w, r6.y, s#4, t#4 +add.f r4.z, c16.y, (neg)r0.z +mov.f32f32 r5.x, r2.y +add.f r4.w, c13.w, (neg)r4.w +mad.f32 r1.z, r1.w, r4.x, r1.z +bary.f r1.w, 6, r1.x +mul.f r2.x, r2.x, c9.z +mul.f r3.z, r3.z, c9.y +mul.f r4.z, r4.z, c9.x +mul.f r0.w, r1.w, r0.w +bary.f r1.w, 14, r1.x +(ss)mov.f32f32 r6.y, r4.w +sam.s (f32)(x)r9.x, r8.x, s#4, t#4 +sam.s (f32)(x)r9.y, r5.z, s#4, t#4 +sam.s (f32)(x)r6.z, r7.x, s#4, t#4 +add.f r2.z, r2.z, c13.z +add.f r2.w, r2.w, c13.z +mul.f r1.w, r1.w, (neg)r5.y +mul.f r5.x, r5.x, r6.y +mul.f r4.w, r2.z, r4.w +mul.f r2.y, r2.y, r2.w +mad.f32 r0.w, r1.w, r4.y, r0.w +bary.f r1.w, 23, r1.x +(sy)mul.f r4.y, r5.x, r8.w +mul.f r2.z, r2.z, r2.w +mad.f32 r2.w, r4.w, r9.x, r4.y +mad.f32 r0.w, r1.w, r3.w, r0.w +mad.f32 r1.w, r2.y, r9.y, r2.w +(ss)nop +sam (f32)(w)r4.w, r3.x, s#1, t#1 +(sy)cmps.f.lt r2.y, r5.z, c15.z +mov.f32f32 r2.w, r0.x +(ss)mov.f32f32 r3.y, r0.w +mad.f32 r1.w, r2.z, r6.z, r1.w +cov.u32f32 r2.y, r2.y +mov.f32f32 r3.x, r0.y +mad.f32 r1.z, r3.y, r3.y, r1.z +mul.f r1.w, c15.y, r1.w +cmps.f.ne r2.y, r2.y, c13.y +(rpt3)nop +rsq r1.z, r1.z +(ss)mov.f32f32 r2.z, r1.z +mov.f32f32 r3.y, r1.w +mul.f r0.w, r0.w, r1.z +(ss)mov.f32f32 r1.z, c13.y +mul.f r3.w, r7.w, r2.z +mul.f r2.z, r4.x, r2.z +mov.f32f32 r4.x, r0.w +sel.b32 r1.z, r1.z, r2.y, r5.z +mov.f32f32 r2.y, r3.w +mul.f r3.w, (neg)c8.x, r3.w +mov.f32f32 r4.y, r2.z +mad.f32 r2.z, (neg)c8.y, r2.z, r3.w +mul.f r3.w, r2.y, r2.y +mad.f32 r0.w, (neg)c8.z, r0.w, r2.z +mad.f32 r2.z, r4.y, r4.y, r3.w +sam (f32)(xyz)r4.w, r2.w, s#3, t#3 +(sy)(ss)mul.f r3.x, c7.z, r5.y +mad.f32 r2.z, r4.x, r4.x, r2.z +max.f r0.w, r0.w, c13.y +bary.f r2.w, 18, r1.x +(rpt1)nop +mov.f32f32 r3.w, r0.w +bary.f r5.y, 19, r1.x +rsq r2.z, r2.z +(ss)mov.f32f32 r5.z, r2.z +bary.f r5.w, 20, r1.x +mad.f32 r6.x, c7.y, r3.w, (neg)r5.y +mad.f32 r3.w, c7.z, r3.w, (neg)r5.w +mul.f r2.y, r2.y, r5.z +bary.f r6.y, 15, r1.x +mov.f32f32 r5.w, r5.w mov.f32f32 r5.y, r5.y -mad.f32 r2.x, r2.z, r3.x, r2.x -mov.f32f32 r2.z, r2.w -mad.f32 r2.w, c11.x, r4.w, r3.y -mad.f32 r3.x, c11.x, r5.y, r3.w -mov.f32f32 r2.x, r2.x -mul.f r0.y, r0.y, r2.y -mad.f32 r2.y, c11.x, r2.z, r4.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r0.y, r0.y +mul.f r4.y, r4.y, r5.z +mul.f r2.y, r2.y, r6.y +bary.f r5.z, 16, r1.x +mad.f32 r3.w, c11.x, r3.w, r5.w +mad.f32 r5.y, c11.x, r6.x, r5.y +(ss)mul.f r2.z, r4.x, r2.z +mad.f32 r2.y, r4.y, r5.z, r2.y bary.f (ei)r1.x, 17, r1.x -mov.f32f32 r1.y, r2.y -mul.f r0.w, c15.y, r0.w -nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.z -mov.f32f32 r3.x, r4.x -mov.f32f32 r3.y, r4.y -mad.f32 r0.y, r0.y, r1.x, r2.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.y, r0.y -sam (f32)(w)r3.z, r3.z, s#1, t#1 -(sy)mov.f32f32 r1.x, r4.y -cmps.f.lt r1.z, r4.y, c15.z -(ss)nop -sam (f32)(xyz)r3.x, r3.x, s#3, t#3 -(sy)mul.f r1.w, c7.z, r3.z -max.f r0.y, c13.y, r0.y -mov.f32f32 r1.x, r1.x -cov.u32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -(ss)mul.f r3.y, c7.y, r3.y -mul.f r3.x, c7.x, r3.x -mov.f32f32 r2.y, r0.x -mov.f32f32 r0.x, c13.z -nop -cmps.f.ne r1.z, r1.z, c13.y -log2 r0.y, r0.y -(ss)mul.f r0.y, c11.y, r0.y -mov.f32f32 r3.z, c13.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -sel.b32 r1.x, r3.z, r1.z, r1.x -sam (f32)(xyz)r3.z, r2.x, s#0, t#0 -mov.f32f32 r6.z, r0.x +mad.f32 r0.w, c7.x, r0.w, (neg)r2.w +(rpt1)nop +mad.f32 r1.x, r2.z, r1.x, r2.y +mov.f32f32 r1.y, r2.w +mul.f r2.y, c7.y, r5.x +mul.f r2.z, c7.x, r4.w +max.f r1.x, c13.y, r1.x +mad.f32 r0.w, c11.x, r0.w, r1.y +sam (f32)(xyz)r5.z, r0.x, s#0, t#0 +mov.f32f32 r2.w, c13.z (rpt3)nop -exp2 r0.x, r0.y -(ss)mul.f r0.y, r1.w, r0.x -mul.f r1.z, r3.y, r0.x -(sy)mad.f32 r0.y, r4.x, r1.y, r0.y -mad.f32 r1.y, r3.w, r2.z, r1.z -mul.f r0.x, r3.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, r3.z, r2.w, r0.x -nop -mul.f r0.y, r0.y, r0.w -mul.f r1.y, r1.y, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.y, c6.z, r4.x, r0.y -mad.f32 r1.y, c6.y, r3.w, r1.y -mul.f r0.x, r0.x, r0.w +(ss)log2 r0.x, r1.x +(ss)mul.f r0.x, c11.y, r0.x +(rpt5)nop +exp2 r0.x, r0.x +(ss)mul.f r0.y, r3.x, r0.x +(ss)mul.f r1.x, r2.y, r0.x +(sy)mad.f32 r0.y, r6.x, r3.w, r0.y +mad.f32 r1.x, r5.w, r5.y, r1.x +mul.f r0.x, r2.z, r0.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, r3.y +mul.f r1.x, r1.x, r3.y +mad.f32 r0.y, c6.z, r6.x, r0.y +mad.f32 r1.x, c6.y, r5.w, r1.x +mad.f32 r0.x, r5.z, r0.w, r0.x nop mul.f r0.y, r0.z, r0.y -mul.f r0.w, r0.z, r0.w -mad.f32 r0.x, c6.x, r3.z, r0.x +mul.f r0.w, r0.z, r1.x +mul.f r0.x, r0.x, r1.w nop -add.f r0.y, r0.y, r5.x -add.f r0.w, r0.w, r5.z -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, r2.x +add.f r0.w, r0.w, r3.z +mad.f32 r0.x, c6.x, r5.z, r0.x nop -mul.f r0.y, r0.y, r1.x -mul.f r0.w, r0.w, r1.x +mul.f r2.z, r0.y, r1.z +mul.f r2.y, r0.w, r1.z mul.f r0.x, r0.z, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -add.f r0.x, r0.x, r7.z -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r1.x -nop -mov.f32f32 r6.y, r0.y -mov.f32f32 r6.x, r0.z -mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x +add.f r0.x, r0.x, r4.z (rpt2)nop -mov.f32f32 r5.w, r0.x +mul.f r2.x, r0.x, r1.z end nop nop -; FRAG: outputs: r5.w (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r3.y (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r2.z (5:13,cm=f,il=24,b=1) r5.z (5:14,cm=f,il=28,b=1) -; FRAG: 317 instructions, 0 half, 9 full -; pos (bary): r1.x -; color: r5.w -; fragcoord: r0.x +nop +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r63.z (5:11,cm=f,il=16,b=1) r5.z (5:12,cm=f,il=20,b=1) r4.z (5:13,cm=f,il=24,b=1) r1.x (5:14,cm=f,il=28,b=1) +; FRAG: 226 instructions, 0 half, 10 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm index 497186f..e6a0565 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm @@ -1,204 +1,141 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@out(r9.z) out0 -@out(r9.w) out1 -@out(r10.x) out2 -@out(r10.y) out3 -@out(r2.w) out4 -@out(r3.x) out5 -@out(r3.y) out6 -@out(r3.z) out7 -@out(r8.z) out8 -@out(r8.w) out9 -@out(r9.x) out10 -@out(r9.y) out11 -@out(r5.z) out12 -@out(r5.w) out13 -@out(r6.x) out14 -@out(r6.y) out15 -@out(r7.z) out16 -@out(r7.w) out17 -@out(r8.x) out18 -@out(r8.y) out19 -@out(r6.z) out20 -@out(r6.w) out21 -@out(r7.x) out22 -@out(r7.y) out23 -@out(r4.x) out24 -@out(r4.y) out25 -@out(r4.z) out26 -@out(r4.w) out27 -(sy)(ss)add.f r2.y, c4.x, (neg)r0.x -mul.f r2.z, r0.w, r0.w -absneg.f r2.w, (neg)c7.y -mul.f r3.x, r0.x, c7.x -mul.f r3.y, r2.y, r2.y -add.f r3.z, c4.y, (neg)r0.y -add.f r2.z, c14.x, (neg)r2.z -mul.f r3.w, r0.x, (neg)r2.w -mad.f32 r2.w, r0.z, (neg)r2.w, r3.x -mad.f32 r3.x, r3.z, r3.z, r3.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -add.f r3.w, c4.z, (neg)r0.z -mul.f r4.x, r2.z, r2.z -mul.f r4.y, r1.x, r0.w -mad.f32 r3.y, (neg)c7.x, r0.z, r3.y -mad.f32 r3.x, r3.w, r3.w, r3.x -mov.f32f32 r2.w, r2.w -mul.f r4.z, c9.w, r0.x -mul.f r4.w, c9.z, r0.x -mul.f r5.x, c9.y, r0.x -mul.f r5.y, c9.x, r0.x -mov.f32f32 r3.y, r3.y -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -add.f r4.y, c14.y, (neg)r4.y -mov.f32f32 r6.x, r2.w -mad.f32 r2.w, c10.w, r0.y, r4.z -mad.f32 r2.y, r2.y, r3.x, (neg)c5.x -mov.f32f32 r4.y, r4.y -mad.f32 r3.z, r3.z, r3.x, (neg)c5.y -mad.f32 r3.x, r3.w, r3.x, (neg)c5.z -mov.f32f32 r2.y, r2.y -mad.f32 r3.w, r4.y, r4.y, r4.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.x, r3.x -mul.f r4.x, r2.y, r2.y -mov.f32f32 r3.w, r3.w -mad.f32 r4.x, r3.z, r3.z, r4.x -mul.f r4.z, r1.y, r0.w -mov.f32f32 r3.y, r3.y -mad.f32 r2.w, c11.w, r0.z, r2.w -mov.f32f32 r4.x, r4.x -add.f r4.z, c14.y, (neg)r4.z -mad.f32 r4.x, r3.x, r3.x, r4.x -mov.f32f32 r3.y, r3.y -(rpt4)nop -rsq r4.x, r4.x -(ss)mov.f32f32 r4.x, r4.x -mov.f32f32 r4.z, r4.z -mov.f32f32 r6.y, r3.y -add.f r2.w, r2.w, c12.w -mul.f r3.x, r3.x, r4.x -mul.f r3.y, r3.z, r4.x -mul.f r2.y, r2.y, r4.x -mad.f32 r3.z, r4.z, r4.z, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.y, r2.y +@in(r7.x) in0 +@in(r7.y) in1 +@in(r7.z) in2 +@in(r6.y) in4 +@in(r6.z) in5 +@in(r6.w) in6 +@in(r2.w) in8 +@in(r3.x) in9 +@in(r3.y) in10 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@const(c14.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r7.x +mul.f r0.y, r6.y, r6.y +mul.f r0.z, c9.y, r7.x +mul.f r0.w, c9.x, r7.x +mul.f r1.x, r0.x, r0.x +add.f r1.z, c4.y, (neg)r7.y +add.f r0.y, c14.x, (neg)r0.y +mad.f32 r0.z, c10.y, r7.y, r0.z +mad.f32 r0.w, c10.x, r7.y, r0.w +mad.f32 r1.x, r1.z, r1.z, r1.x +add.f r1.w, c4.z, (neg)r7.z +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c11.y, r7.z, r0.z +mad.f32 r0.w, c11.x, r7.z, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.z, r6.y +add.f r0.z, r0.z, c12.y +add.f r0.w, r0.w, c12.x +mul.f r2.y, r6.w, r6.y +mul.f r2.z, c9.w, r7.x +rsq r1.x, r1.x +(ss)mov.f32f32 r3.z, r1.x +add.f r3.w, c14.y, (neg)r1.y +mad.f32 r0.x, r0.x, r1.x, (neg)c5.x +mul.f r1.y, r0.z, c13.y +mad.f32 r0.z, r1.z, r3.z, (neg)c5.y +mov.f32f32 r1.z, r3.w +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r3.z, (neg)c5.z +mov.f32f32 r3.z, r0.z +mad.f32 r2.x, r3.w, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c14.y, (neg)r2.y +mad.f32 r0.z, r0.z, r3.z, r1.x +mov.f32f32 r3.w, r1.w +mul.f r1.x, r0.w, c13.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c10.w, r7.y, r2.z +mad.f32 r0.z, r1.w, r3.w, r0.z +mad.f32 r1.w, c11.w, r7.z, r2.z +mul.f r2.z, c9.z, r7.x +mul.f r4.x, c0.w, r7.x +mul.f r4.y, c0.z, r7.x +mul.f r5.z, c0.y, r7.x +mul.f r5.w, c0.x, r7.x +rsq r0.z, r0.z +(ss)mov.f32f32 r4.z, r0.z +mul.f r4.w, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c12.w +mul.f r5.y, r3.w, r4.z +mul.f r5.x, r3.z, r4.z +(ss)mad.f32 r0.z, c10.z, r7.y, r2.z +mad.f32 r2.y, c1.w, r7.y, r4.x +mad.f32 r3.z, c1.z, r7.y, r4.y +rsq r0.x, r0.x +(ss)mov.f32f32 r3.w, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c11.z, r7.z, r0.z +mad.f32 r0.y, c2.w, r7.z, r2.y +mul.f r2.z, r0.w, r3.w +mul.f r2.y, r1.z, r3.w +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c12.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r3.w, r2.y +mul.f r4.z, r6.z, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r6.y, r0.x +mul.f r4.x, r6.w, r3.w +mad.f32 r4.y, r6.w, r0.z, (neg)r0.y +mad.f32 r4.x, r6.z, r0.x, (neg)r4.x +mad.f32 r4.z, r6.y, r3.w, (neg)r4.z +mad.f32 r0.x, c2.z, r7.z, r3.z +mad.f32 r0.y, c1.y, r7.y, r5.z +mad.f32 r3.z, c1.x, r7.y, r5.w +absneg.f r5.z, (neg)c7.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.y, r7.z, r0.y +mad.f32 r3.z, c2.x, r7.z, r3.z +mul.f r3.w, r7.x, (neg)r5.z nop -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -(rpt1)nop -mov.f32f32 r6.w, r3.x -mov.f32f32 r6.z, r3.y -mov.f32f32 r8.y, r2.y -rsq r2.y, r3.z -(ss)mov.f32f32 r2.y, r2.y -(ss)mov.f32f32 r3.z, r2.w -mad.f32 r3.w, c10.z, r0.y, r4.w -mad.f32 r4.x, c10.y, r0.y, r5.x -mul.f r2.z, r2.z, r2.y -mul.f r4.z, r4.z, r2.y -mul.f r2.y, r4.y, r2.y -mad.f32 r3.w, c11.z, r0.z, r3.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r4.z -mov.f32f32 r2.y, r2.y -add.f r3.w, r3.w, c12.z -mul.f r4.z, r1.x, r2.z -mul.f r4.w, r0.w, r4.y -mad.f32 r4.z, r0.w, r2.y, (neg)r4.z -mad.f32 r4.w, r1.y, r2.z, (neg)r4.w -mul.f r5.x, r1.y, r2.y -mov.f32f32 r8.z, r4.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.w, r4.w -mad.f32 r4.y, r1.x, r4.y, (neg)r5.x -mov.f32f32 r9.x, r8.z -mov.f32f32 r8.x, r4.z -mov.f32f32 r7.w, r4.w -mov.f32f32 r4.y, r4.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -nop -mov.f32f32 r7.z, r4.y -mov.f32f32 r8.w, r2.y -mov.f32f32 r8.z, r2.z -mov.f32f32 r2.y, r3.w -mad.f32 r2.z, c11.y, r0.z, r4.x -mad.f32 r3.w, c10.x, r0.y, r5.y -mul.f r2.x, r2.x, c6.z -mov.f32f32 r3.y, r2.y -add.f r2.y, r2.z, c12.y -mad.f32 r2.z, c11.x, r0.z, r3.w -mov.f32f32 r2.x, r2.x -mul.f r3.w, c0.w, r0.x -mul.f r2.y, r2.y, c13.y -add.f r2.z, r2.z, c12.x -mov.f32f32 r2.x, r2.x -mad.f32 r3.w, c1.w, r0.y, r3.w -mov.f32f32 r3.x, r2.y -mul.f r2.y, r2.z, c13.x -mov.f32f32 r4.x, r2.x -mad.f32 r2.x, c2.w, r0.z, r3.w -mul.f r2.z, c0.z, r0.x -mov.f32f32 r2.w, r2.y -mad.f32 r2.y, c1.z, r0.y, r2.z -add.f r2.x, r2.x, c3.w -mad.f32 r2.y, c2.z, r0.z, r2.y -mul.f r2.z, c0.y, r0.x -mul.f r3.w, c0.x, r0.x -mov.f32f32 r10.y, r2.x -add.f r2.x, r2.y, c3.z -mad.f32 r2.y, c1.y, r0.y, r2.z -mad.f32 r0.y, c1.x, r0.y, r3.w -mul.f r1.w, r1.w, c6.y -mov.f32f32 r10.x, r2.x -mad.f32 r2.x, c2.y, r0.z, r2.y -mad.f32 r0.y, c2.x, r0.z, r0.y -mov.f32f32 r1.w, r1.w -mul.f r1.z, r1.z, c6.x -add.f r2.x, r2.x, c3.y -add.f r0.y, r0.y, c3.x -mov.f32f32 r7.y, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r9.w, r2.x -mov.f32f32 r9.z, r0.y -mov.f32f32 r0.y, r1.y -mov.f32f32 r7.x, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.w, r0.y -mad.f32 r0.y, c8.x, r0.z, c8.y -mov.f32f32 r4.z, r1.x -mov.f32f32 r4.y, r0.w -mad.f32 r0.x, c8.x, r0.x, c8.y -mov.f32f32 r5.w, r0.y -mov.f32f32 r0.y, c14.z -nop -mov.f32f32 r5.z, r0.x -nop -mov.f32f32 r9.y, r0.y +add.f r0.y, r0.x, c3.y +add.f r0.x, r3.z, c3.x +mad.f32 r3.w, (neg)c7.x, r7.z, r3.w +mul.f r3.z, r7.x, c7.x +mul.f r6.x, r3.y, c6.z +mad.f32 r3.z, r7.z, (neg)r5.z, r3.z +mul.f r5.w, r3.x, c6.y +mul.f r5.z, r2.w, c6.x +mad.f32 r3.y, c8.x, r7.z, c8.y +mad.f32 r3.x, c8.x, r7.x, c8.y +mov.f32f32 r2.w, c14.z end -; VERT: outputs: r9.z (0:0) r2.w (5:9) r8.z (5:10) r5.z (5:11) r7.z (5:12) r6.z (5:13) r4.x (5:14) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) -; VERT: 165 instructions, 0 half, 11 full -; pos: r9.z +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) +; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) +; VERT: 95 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm index a72e66e..4760742 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm @@ -4,10 +4,15 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r1.w) out0 -@out(r2.x) out1 -@out(r2.y) out2 -@out(r2.z) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097 +@const(c15.x) 0x3cff9724, 0x40000000, 0xbf800000, 0xba03126f +@const(c16.x) 0xbf000000, 0x3f800000, 0x3fb8aa65, 0x3de38866 +@const(c17.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 21, r1.x bary.f r0.y, 0, r1.x add.f r0.w, r0.w, c14.y @@ -16,534 +21,347 @@ mul.f r1.w, r0.x, r0.x bary.f r2.x, 22, r1.x add.f r2.y, r0.y, c16.x add.f r2.z, r1.z, c16.x -bary.f r2.w, 8, r1.x +bary.f r2.w, 10, r1.x mad.f32 r1.w, r2.x, r2.x, r1.w -floor.f r3.x, r2.y +bary.f r3.x, 23, r1.x +floor.f r3.y, r2.y rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.y, r2.z -mov.f32f32 r1.w, r1.w -bary.f r3.z, 23, r1.x -add.f r2.y, r2.y, (neg)r3.x +floor.f r3.z, r2.z +mad.f32 r1.w, r3.x, r3.x, r1.w +add.f r2.y, r2.y, (neg)r3.y (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.z, (neg)r3.y -mad.f32 r1.w, r3.z, r3.z, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -absneg.f r2.z, (neg)c11.x -mov.f32f32 r0.w, r0.w -mul.f r3.x, c14.x, r2.y -mov.f32f32 r2.w, r2.w -rsq r3.y, r1.w -(ss)mul.f r3.w, r0.x, r3.y +(ss)absneg.f r0.w, (neg)c11.x +add.f r2.z, r2.z, (neg)r3.z +mov.f32f32 r3.y, r2.y +add.f r2.y, r2.y, c14.z +rsq r3.z, r1.w +(ss)mul.f r3.w, r0.x, r3.z rsq r4.x, r1.w -(ss)mov.f32f32 r4.x, r4.x -mov.f32f32 r3.x, r3.x +(ss)mov.f32f32 r4.y, r4.x (ss)rsq r1.w, r1.w -(ss)mul.f r4.y, r0.x, r1.w -mov.f32f32 r3.w, r3.w -mul.f r4.z, r0.x, r4.x -add.f r0.y, r0.y, (neg)r3.x -mov.f32f32 r3.x, r4.y +(ss)mul.f r4.z, r0.x, r1.w +mul.f r4.w, c14.x, r3.y absneg.f r3.w, (abs)r3.w -mov.f32f32 r4.y, r4.z -mov.f32f32 r0.y, r0.y -absneg.f r3.x, (abs)r3.x -mov.f32f32 r3.w, r3.w -absneg.f r4.y, (abs)r4.y -add.f r4.z, c16.x, r0.y -mov.f32f32 r3.x, r3.x +mul.f r5.x, r0.x, r4.y +absneg.f r4.z, (abs)r4.z +add.f r0.y, r0.y, (neg)r4.w add.f r3.w, r3.w, c14.w -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.z, r4.z -add.f r3.x, r3.x, c14.w -mov.f32f32 r3.w, r3.w -add.f r4.y, r4.y, c14.w -mul.f r4.z, r4.z, c6.z -mov.f32f32 r3.x, r3.x +absneg.f r4.w, (abs)r5.x +add.f r4.z, r4.z, c14.w +mov.f32f32 r5.x, r0.y max.f r3.w, r3.w, c14.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.w, r4.z -max.f r3.x, r3.x, c14.y -mov.f32f32 r3.w, r3.w -mul.f r5.x, r2.x, r3.y -max.f r4.y, r4.y, c14.y -mov.f32f32 r5.y, r4.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.w, r5.x -mov.f32f32 r4.y, r4.y -mul.f r5.x, r2.x, r4.x -mul.f r5.z, c14.x, r0.w -absneg.f r4.w, (abs)r4.w -mul.f r5.w, r2.x, r1.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.w, r5.w -absneg.f r5.x, (abs)r5.x -add.f r1.z, r1.z, (neg)r5.z +mul.f r5.y, r2.x, r3.z add.f r4.w, r4.w, c14.w -absneg.f r5.z, (abs)r5.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.z, r5.z -add.f r5.x, r5.x, c14.w -add.f r5.w, c16.x, r1.z +max.f r4.z, r4.z, c14.y +mul.f r5.z, r2.x, r1.w +absneg.f r5.y, (abs)r5.y max.f r4.w, r4.w, c14.y +mul.f r4.y, r2.x, r4.y +absneg.f r5.z, (abs)r5.z +add.f r5.y, r5.y, c14.w +add.f r5.x, c16.x, r5.x +absneg.f r4.y, (abs)r4.y +mul.f r0.w, r0.w, c11.x +max.f r5.y, r5.y, c14.y add.f r5.z, r5.z, c14.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.z, r5.z -max.f r5.x, r5.x, c14.y -mul.f r6.x, r5.w, c6.w -add.f r6.y, r3.w, r4.w -mul.f r3.y, r3.z, r3.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.w, r6.x -max.f r6.z, r5.z, c14.y -mov.f32f32 r3.y, r3.y -add.f r6.w, r4.y, r5.x -mul.f r4.x, r3.z, r4.x -mov.f32f32 r5.z, r5.w -absneg.f r3.y, (abs)r3.y -bary.f r5.w, 2, r1.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r3.y, r3.y -add.f r7.x, r5.w, c15.w +add.f r4.y, r4.y, c14.w +mul.f r5.w, r5.x, c6.z +add.f r5.x, r3.w, r5.y +mul.f r3.z, r3.x, r3.z +max.f r4.y, r4.y, c14.y +max.f r5.z, r5.z, c14.y +mov.f32f32 r6.x, r5.w +absneg.f r3.z, (abs)r3.z +add.f r6.y, r4.w, r4.y +mul.f r4.x, r3.x, r4.x +add.f r6.z, r4.z, r5.z +add.f r3.z, r3.z, c14.w +(ss)mul.f r1.w, r3.x, r1.w absneg.f r4.x, (abs)r4.x -add.f r7.y, r3.x, r6.z -add.f r3.y, r3.y, c14.w -mov.f32f32 r5.w, r7.x -mov.f32f32 r4.x, r4.x -(ss)mul.f r1.w, r3.z, r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r5.w, r5.w -add.f r4.x, r4.x, c14.w -mov.f32f32 r1.w, r1.w -max.f r3.y, r3.y, c14.y -mul.f r2.z, r2.z, c11.x -mov.f32f32 r4.x, r4.x +mov.f32f32 r6.w, r2.z +max.f r3.z, r3.z, c14.y absneg.f r1.w, (abs)r1.w -mov.f32f32 r3.y, r3.y -sam.s (f32)(x)r5.y, r5.y, s#5, t#5 -(sy)(ss)mov.f32f32 r5.y, r5.y -max.f r4.x, r4.x, c14.y -mov.f32f32 r1.w, r1.w -add.f r5.z, r6.y, r3.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r4.x, r4.x +add.f r4.x, r4.x, c14.w +mul.f r7.x, c14.x, r6.w +mov.f32f32 r7.y, r3.z add.f r1.w, r1.w, c14.w -mov.f32f32 r5.z, r5.z -add.f r5.w, c15.y, (neg)r2.y -add.f r6.y, r6.w, r4.x -mov.f32f32 r1.w, r1.w -mul.f r2.z, r2.z, r0.z -mov.f32f32 r5.w, r5.w -add.f r6.w, c15.y, (neg)r0.w -rcp r7.z, r5.z -(ss)mov.f32f32 r7.z, r7.z -mov.f32f32 r6.y, r6.y +max.f r4.x, r4.x, c14.y +add.f r1.z, r1.z, (neg)r7.x +add.f r5.x, r5.x, r7.y max.f r1.w, r1.w, c14.y -mov.f32f32 r6.w, r6.w -mul.f r3.w, r3.w, r7.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.w, r1.w -mul.f r7.z, r5.w, r6.w -mov.f32f32 r3.w, r3.w -bary.f r7.w, 25, r1.x -rcp r8.x, r6.y -(ss)mov.f32f32 r8.x, r8.x -mul.f r5.y, r7.z, r5.y -add.f r0.y, c16.y, r0.y -mul.f r7.z, r7.w, c15.x -mul.f r4.y, r4.y, r8.x -add.f r7.y, r7.y, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r7.w, r7.z -mov.f32f32 r4.y, r4.y -mov.f32f32 r8.x, r7.z -mul.f r0.y, r0.y, c6.z -mov.f32f32 r8.y, r7.w -bary.f r7.w, 26, r1.x -mov.f32f32 r8.x, r8.x -mov.f32f32 r8.z, r0.y -mov.f32f32 r7.y, r7.y -mul.f r7.w, r7.w, c15.x -mov.f32f32 r8.w, r8.x -mov.f32f32 r9.y, r8.z -mov.f32f32 r6.x, r6.x -mov.f32f32 r8.x, r7.w -mov.f32f32 r9.x, r7.w -rcp r8.z, r7.y -(ss)mov.f32f32 r9.w, r8.z -mov.f32f32 r9.z, r6.x -mov.f32f32 r8.z, r8.x -mov.f32f32 r6.x, r9.x -mov.f32f32 r8.x, r7.x -mul.f r3.x, r3.x, r9.w -mul.f r0.z, r2.z, r0.z -rcp r2.z, r6.y -nop -(ss)rcp r6.y, r6.y -mov.f32f32 r9.x, r6.x -sam (f32)(xyzw)r9.w, r8.y, s#3, t#3 -(sy)mov.f32f32 r6.x, r9.w -(ss)add.f r8.y, c14.z, (neg)r10.x -mov.f32f32 r8.z, r10.y -mov.f32f32 r9.w, r8.x -mul.f r6.x, r6.x, r3.w -rcp r8.x, r5.z -(ss)mov.f32f32 r8.x, r8.x -sam (f32)(xyzw)r10.x, r8.w, s#4, t#4 -(sy)(ss)mul.f r8.w, r10.y, r4.y -mul.f r9.x, r10.x, r4.y -mul.f r4.y, r10.z, r4.y -mul.f r4.w, r4.w, r8.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r8.x, r8.y -mul.f r8.y, r8.z, r3.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r8.z, r7.w -mul.f r2.z, r5.x, r2.z -mov.f32f32 r5.x, r8.x -sam.s (f32)(x)r8.x, r9.y, s#5, t#5 -(sy)mov.f32f32 r8.x, r8.x -(ss)mov.f32f32 r9.y, r8.z -bary.f r8.z, 24, r1.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r9.z, r7.w -mul.f r3.w, r5.x, r3.w -mul.f r5.x, r8.z, c15.x -add.f r2.y, r2.y, c14.z -mov.f32f32 r9.w, r9.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r8.z, r5.x -mov.f32f32 r10.x, r5.x -mul.f r6.w, r2.y, r6.w -mov.f32f32 r10.y, r7.z -mov.f32f32 r9.z, r8.z -mov.f32f32 r10.x, r10.x -mad.f32 r5.y, r6.w, r8.x, r5.y -mov.f32f32 r10.y, r10.y -mov.f32f32 r6.w, r7.w +mov.f32f32 r7.x, r4.x +mov.f32f32 r7.y, r1.z +mov.f32f32 r7.z, r5.x +mov.f32f32 r7.w, r1.w +add.f r7.x, r6.y, r7.x +add.f r6.y, c16.x, r7.y +mul.f r0.w, r0.w, r0.z +add.f r7.y, r6.z, r7.w mov.f32f32 r0.z, r0.z -mov.f32f32 r5.y, r5.y -sam (f32)(xyzw)r10.z, r9.y, s#3, t#3 -(sy)mov.f32f32 r8.x, r10.w -(ss)nop -sam (f32)(xyzw)r9.y, r9.w, s#4, t#4 -(sy)mad.f32 r8.z, r9.z, r2.z, r8.w -mad.f32 r8.w, r9.y, r2.z, r9.x -mad.f32 r2.z, r9.w, r2.z, r4.y -mad.f32 r4.y, r8.x, r4.w, r6.x -rcp r5.z, r5.z -(ss)mov.f32f32 r5.z, r5.z -mov.f32f32 r6.x, r6.y -add.f r6.y, c14.z, (neg)r10.z -mov.f32f32 r8.x, r11.x -mul.f r3.y, r3.y, r5.z -mul.f r4.x, r4.x, r6.x -mov.f32f32 r5.z, r6.y -mad.f32 r6.x, r8.x, r4.w, r8.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.y, r7.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r8.x, r7.z -mov.f32f32 r5.z, r5.z -mov.f32f32 r9.x, r6.y -mov.f32f32 r6.y, r5.x -mov.f32f32 r8.x, r8.x -mov.f32f32 r8.y, r5.x -mad.f32 r3.w, r5.z, r4.w, r3.w -mov.f32f32 r9.y, r6.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r8.y, r8.y -mov.f32f32 r10.z, r6.w -mul.f r0.z, r0.z, c16.z -mov.f32f32 r9.z, r4.z -add.f r1.z, c16.y, r1.z -sam (f32)(xyzw)r10.w, r9.x, s#3, t#3 -(sy)mov.f32f32 r4.z, r11.x -add.f r4.w, c14.z, (neg)r10.w -mov.f32f32 r5.z, r11.y -sam (f32)(xyzw)r10.w, r8.x, s#4, t#4 -(sy)mad.f32 r2.z, r11.y, r4.x, r2.z -mad.f32 r4.y, r4.z, r3.y, r4.y -mad.f32 r4.z, r11.x, r4.x, r8.z -mad.f32 r4.x, r10.w, r4.x, r8.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.x, r4.x -mul.f r2.z, c8.z, r2.z -mad.f32 r4.y, c15.y, r4.y, c15.z -mul.f r4.z, c8.y, r4.z -mul.f r4.x, c8.x, r4.x -mov.f32f32 r4.w, r4.w -mov.f32f32 r4.y, r4.y -bary.f r6.y, 4, r1.x -bary.f r6.w, 5, r1.x -(ss)bary.f r8.x, 6, r1.x -mov.f32f32 r2.z, r2.z -mul.f r6.y, r6.y, r4.y +rcp r6.z, r7.z +mov.f32f32 r3.w, r3.w +mov.f32f32 r7.w, r7.x +mov.f32f32 r8.x, r7.y +mul.f r8.z, r6.y, c6.w +(ss)mul.f r3.w, r3.w, r6.z +bary.f r6.z, 25, r1.x +mul.f r0.z, r0.w, r0.z +mov.f32f32 r6.y, r8.z +mov.f32f32 r0.w, r3.w +mul.f r8.w, r6.z, c15.x +rcp r6.z, r7.w mov.f32f32 r4.w, r4.w -mul.f r6.w, r6.w, r4.y -mul.f r4.y, r8.x, r4.y +rcp r8.y, r8.x mov.f32f32 r4.z, r4.z -mad.f32 r3.w, r4.w, r3.y, r3.w -mov.f32f32 r4.x, r4.x -mad.f32 r3.y, r5.z, r3.y, r6.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r3.w -sam (f32)(xyzw)r8.x, r10.y, s#0, t#0 -(sy)mul.f r4.w, r8.x, r3.x -mov.f32f32 r3.y, r3.y -mul.f r1.z, r1.z, c6.w -mad.f32 r3.w, c15.y, r3.w, c15.z -mul.f r5.z, r8.z, r3.x -mul.f r3.x, r8.y, r3.x -rcp r6.x, r7.y -mad.f32 r3.y, c15.y, r3.y, c15.z -mov.f32f32 r3.w, r3.w -bary.f r8.x, 12, r1.x -bary.f r8.y, 7, r1.x -bary.f r8.z, 13, r1.x -bary.f r8.w, 14, r1.x -mov.f32f32 r3.y, r3.y -mul.f r8.x, r8.x, (neg)r8.y -mul.f r8.z, r8.z, (neg)r8.y -mul.f r8.y, r8.w, (neg)r8.y -mov.f32f32 r8.w, r1.z -mad.f32 r6.y, r8.x, r3.w, r6.y -mad.f32 r6.w, r8.z, r3.w, r6.w -mad.f32 r3.w, r8.y, r3.w, r4.y +bary.f r9.x, 2, r1.x +mov.f32f32 r9.y, r8.w +bary.f r9.z, 26, r1.x +(ss)mul.f r4.w, r4.w, r6.z +mul.f r4.z, r4.z, r8.y mov.f32f32 r9.w, r8.w -mov.f32f32 r4.y, r6.y -mov.f32f32 r6.y, r6.w -mad.f32 r0.x, r0.x, r3.y, r4.y -mad.f32 r2.x, r2.x, r3.y, r6.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, r7.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mad.f32 r3.y, r3.z, r3.y, r3.w -nop -mul.f r3.z, r0.x, r0.x -mov.f32f32 r10.x, r4.y -mad.f32 r3.z, r2.x, r2.x, r3.z -mov.f32f32 r3.y, r3.y -(ss)mov.f32f32 r3.w, r6.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r3.z -rcp r4.y, r7.y -(ss)mov.f32f32 r4.y, r4.y -mad.f32 r3.z, r3.y, r3.y, r3.z -sam.s (f32)(x)r6.x, r9.z, s#5, t#5 -mul.f r3.w, r6.z, r3.w -(sy)mov.f32f32 r6.x, r6.x -add.f r0.w, r0.w, c14.z -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r6.y, r7.w -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -mul.f r5.w, r5.w, r0.w -add.f r6.z, c18.y, (neg)r0.z -mul.f r1.w, r1.w, r4.y -mul.f r0.x, r0.x, r3.z -mul.f r2.x, r2.x, r3.z -mul.f r3.y, r3.y, r3.z +mul.f r10.z, r9.z, c15.x +mov.f32f32 r6.z, r4.w +mov.f32f32 r8.y, r4.z +mov.f32f32 r10.y, r8.w +mov.f32f32 r9.z, r10.z +mov.f32f32 r10.x, r10.z +add.f r11.z, r9.x, c15.w +mul.f r0.z, r0.z, c16.z +(ss)rcp r7.w, r7.w nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.y -mad.f32 r3.z, r5.w, r6.x, r5.y -mul.f r4.y, r0.x, r0.x -mul.f r5.y, (neg)c9.x, r0.x -mad.f32 r4.y, r2.x, r2.x, r4.y -mad.f32 r5.y, (neg)c9.y, r2.x, r5.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r5.w, r6.y +rcp r7.x, r7.x mov.f32f32 r4.y, r4.y +sam (f32)(xyzw)r11.w, r10.y, s#0, t#0 +(sy)(ss)mul.f r10.y, r12.y, r8.y +sam (f32)(xyzw)r12.y, r9.y, s#3, t#3 +(sy)(ss)mul.f r9.y, r12.y, r0.w +rcp r7.z, r7.z mov.f32f32 r5.y, r5.y -mad.f32 r4.y, r3.y, r3.y, r4.y -mad.f32 r5.y, (neg)c9.z, r3.y, r5.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r5.x -mul.f r6.y, r6.z, c11.y +sam (f32)(xyzw)r13.x, r9.w, s#4, t#4 +(sy)mul.f r9.z, r13.y, r6.z +add.f r9.x, c14.z, (neg)r12.z +(ss)mul.f r9.w, r13.z, r6.z +(ss)mul.f r5.y, r5.y, r7.z +mul.f r4.y, r4.y, r7.w +mul.f r0.w, r9.x, r0.w +rcp r6.z, r8.x +mov.f32f32 r5.z, r5.z +mov.f32f32 r7.z, r5.y +mov.f32f32 r7.w, r10.z +(ss)bary.f r8.x, 24, r1.x +mov.f32f32 r10.x, r4.y +mov.f32f32 r11.x, r10.z +(ss)mul.f r5.z, r5.z, r6.z +mul.f r9.x, r8.x, c15.x +mul.f r8.y, r12.x, r8.y +mul.f r4.w, r13.x, r4.w +mul.f r3.w, r12.w, r3.w +mov.f32f32 r8.x, r9.x +mov.f32f32 r11.y, r9.x +mov.f32f32 r12.x, r5.z +mov.f32f32 r10.w, r9.x +mov.f32f32 r6.z, r11.z +mul.f r4.z, r11.w, r4.z +exp2 r0.z, r0.z +(ss)mov.f32f32 r11.w, r0.z +sam (f32)(xyzw)r12.y, r7.w, s#3, t#3 +(sy)(ss)mad.f32 r7.w, r12.z, r7.z, r9.y +rcp r5.x, r5.x +(ss)mul.f r3.z, r3.z, r5.x +sam (f32)(xyzw)r13.x, r11.x, s#4, t#4 +(sy)(ss)mad.f32 r5.x, r13.y, r10.x, r9.z +add.f r8.x, c14.z, (neg)r12.y +mad.f32 r9.y, r13.z, r10.x, r9.w +mov.f32f32 r9.z, r3.z +mov.f32f32 r9.w, r8.w +mov.f32f32 r10.x, r9.x +mul.f r4.x, r4.x, r7.x +mad.f32 r0.w, r8.x, r7.z, r0.w +sam (f32)(xyzw)r10.z, r10.z, s#0, t#0 +(sy)mad.f32 r7.x, r11.x, r12.x, r10.y +rcp r7.y, r7.y +(ss)mul.f r1.w, r1.w, r7.y +(ss)mov.f32f32 r7.y, r4.x +mad.f32 r7.z, r10.w, r12.x, r8.y +sam (f32)(xyzw)r13.y, r9.w, s#3, t#3 +(sy)mad.f32 r7.w, r13.z, r9.z, r7.w +mov.f32f32 r8.x, r8.w +mov.f32f32 r8.y, r9.x +(ss)add.f r9.w, c14.z, (neg)r13.y +mad.f32 r7.w, c15.y, r7.w, c15.z +bary.f r10.x, 6, r1.x +mov.f32f32 r10.y, r1.w +mad.f32 r0.w, r9.w, r9.z, r0.w +mov.f32f32 r9.z, r7.w +bary.f r9.w, 4, r1.x +bary.f r10.w, 5, r1.x +mul.f r7.w, r10.x, r7.w +mad.f32 r0.w, c15.y, r0.w, c15.z +mul.f r9.w, r9.w, r9.z +mul.f r9.z, r10.w, r9.z +bary.f r10.x, 14, r1.x +mov.f32f32 r10.w, r0.w +bary.f r11.x, 12, r1.x +bary.f r11.y, 7, r1.x +bary.f r12.x, 13, r1.x +sam (f32)(xyzw)r14.x, r8.x, s#4, t#4 +(sy)mad.f32 r5.x, r14.y, r7.y, r5.x +mad.f32 r7.y, r14.z, r7.y, r9.y +(ss)mul.f r8.x, r11.x, (neg)r11.y +mul.f r8.y, r12.x, (neg)r11.y +mul.f r9.y, r10.x, (neg)r11.y +mul.f r7.y, c8.z, r7.y +mad.f32 r8.x, r8.x, r10.w, r9.w +mad.f32 r3.w, r12.w, r5.y, r3.w +mad.f32 r5.y, r8.y, r10.w, r9.z +mad.f32 r3.z, r13.w, r3.z, r3.w +mad.f32 r0.w, r9.y, r0.w, r7.w +mul.f r3.w, c8.y, r5.x +sam (f32)(xyzw)r8.w, r8.w, s#0, t#0 +(sy)mad.f32 r5.x, r9.y, r10.y, r7.x +mad.f32 r3.z, c15.y, r3.z, c15.z +mad.f32 r4.y, r13.x, r4.y, r4.w +mad.f32 r4.w, r9.x, r10.y, r7.z +mov.f32f32 r7.x, r5.x +mov.f32f32 r7.z, r3.z +mad.f32 r0.w, r3.x, r3.z, r0.w +(rpt1)nop +mad.f32 r0.x, r0.x, r7.z, r8.x +mad.f32 r2.x, r2.x, r7.z, r5.y +(rpt1)nop +mov.f32f32 r3.x, r0.x +mov.f32f32 r3.z, r2.x +mov.f32f32 r5.y, r0.w +mov.f32f32 r7.z, r4.w +mul.f r0.x, r0.x, r3.x +mad.f32 r4.x, r14.x, r4.x, r4.y +mad.f32 r0.x, r2.x, r3.z, r0.x +mad.f32 r2.x, r10.z, r5.z, r4.z +mad.f32 r0.x, r5.y, r5.y, r0.x +mul.f r4.x, c8.x, r4.x +add.f r4.y, c18.y, (neg)r11.w +mad.f32 r1.w, r8.w, r1.w, r2.x +(ss)nop +sam.s (f32)(x)r8.w, r6.x, s#5, t#5 +add.f r2.x, c15.y, (neg)r3.y +add.f r0.y, c16.y, r0.y +add.f r1.z, c16.y, r1.z +rsq r0.x, r0.x +(ss)mov.f32f32 r3.y, r0.x +(ss)mul.f r0.x, r0.w, r0.x +mov.f32f32 r0.w, r1.w +mov.f32f32 r4.z, r2.x +mul.f r3.x, r3.x, r3.y +mul.f r3.y, r3.z, r3.y +mov.f32f32 r3.z, r0.x +add.f r5.y, c15.y, (neg)r6.w +mov.f32f32 r5.z, r3.x +mul.f r3.x, (neg)c9.x, r3.x +mov.f32f32 r6.x, r3.y +mov.f32f32 r6.y, r5.y +mul.f r6.z, r5.z, r5.z +mad.f32 r3.x, (neg)c9.y, r3.y, r3.x +mad.f32 r3.y, r6.x, r6.x, r6.z +mad.f32 r0.x, (neg)c9.z, r0.x, r3.x +mad.f32 r3.x, r3.z, r3.z, r3.y +mul.f r3.y, r4.z, r6.y +mul.f r4.y, r4.y, c11.y mul.f r0.z, r0.z, c14.z -mov.f32f32 r0.y, r0.y -rsq r4.y, r4.y -(ss)mov.f32f32 r4.y, r4.y -max.f r5.y, r5.y, c14.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r7.w, r0.y -mul.f r0.x, r0.x, r4.y -mov.f32f32 r0.y, r5.y -bary.f r5.y, 19, r1.x -bary.f r6.z, 18, r1.x -mov.f32f32 r0.x, r0.x -bary.f r6.w, 15, r1.x -bary.f r7.y, 20, r1.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.z, r6.z -mov.f32f32 r6.w, r6.w -mov.f32f32 r7.y, r7.y -mad.f32 r8.x, c8.y, r0.y, (neg)r5.y -mad.f32 r8.y, c8.x, r0.y, (neg)r6.z -mul.f r0.x, r0.x, r6.w -mul.f r2.x, r2.x, r4.y -mad.f32 r0.y, c8.z, r0.y, (neg)r7.y -mov.f32f32 r6.w, r8.x -mov.f32f32 r8.x, r8.y -mov.f32f32 r2.x, r2.x -bary.f r8.y, 16, r1.x -mov.f32f32 r0.y, r0.y -mad.f32 r5.y, c12.x, r6.w, r5.y -mad.f32 r6.z, c12.x, r8.x, r6.z -mov.f32f32 r6.w, r8.y -mad.f32 r0.y, c12.x, r0.y, r7.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.z, r6.z -mad.f32 r0.x, r2.x, r6.w, r0.x -mov.f32f32 r0.y, r0.y -mul.f r2.x, r3.y, r4.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -bary.f r3.y, 17, r1.x -sam (f32)(xyzw)r8.x, r5.w, s#0, t#0 -(sy)mad.f32 r4.y, r8.x, r3.w, r4.w -mov.f32f32 r1.z, r1.z -mad.f32 r4.w, r8.z, r3.w, r5.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -mad.f32 r3.x, r8.y, r3.w, r3.x -mov.f32f32 r3.w, r7.z -mad.f32 r0.x, r2.x, r3.y, r0.x -mov.f32f32 r8.x, r1.z -mov.f32f32 r1.z, r7.x -mov.f32f32 r5.z, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r5.x -mov.f32f32 r8.y, r1.z -add.f r0.z, r0.z, r6.y -max.f r0.x, c14.y, r0.x -(ss)mov.f32f32 r5.w, r2.x -mov.f32f32 r6.x, r2.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -sam.s (f32)(x)r1.z, r7.w, s#5, t#5 -(sy)mov.f32f32 r1.z, r1.z -mul.f r0.w, r2.y, r0.w -mov.f32f32 r0.z, r0.z -bary.f r2.x, 9, r1.x -sam (f32)(xyzw)r6.w, r5.z, s#0, t#0 -(sy)mad.f32 r2.y, r7.x, r1.w, r3.x -mad.f32 r0.w, r0.w, r1.z, r3.z -log2 r0.x, r0.x -(ss)mul.f r0.x, c12.y, r0.x -mad.f32 r1.z, r7.y, r1.w, r4.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.w, r6.w, r1.w, r4.y -mul.f r0.w, c16.w, r0.w +mul.f r11.x, r0.y, c6.z +mul.f r11.y, r1.z, c6.w +mul.f r0.y, r2.y, r5.y +rsq r1.z, r3.x +(ss)mov.f32f32 r3.x, r1.z +max.f r0.x, r0.x, c14.y +mul.f r1.z, r3.z, r1.z +bary.f r3.z, 18, r1.x +mul.f r4.z, r5.z, r3.x +bary.f r5.y, 15, r1.x +mov.f32f32 r5.z, r0.x +bary.f r6.y, 19, r1.x +mul.f r3.x, r6.x, r3.x +mul.f r4.z, r4.z, r5.y +bary.f r5.y, 16, r1.x +bary.f r6.x, 20, r1.x +mad.f32 r6.z, c8.y, r5.z, (neg)r6.y +mad.f32 r5.z, c8.z, r5.z, (neg)r6.x +mad.f32 r3.x, r3.x, r5.y, r4.z +bary.f r4.z, 17, r1.x +mov.f32f32 r5.y, r6.x +mov.f32f32 r6.x, r6.y +mad.f32 r0.x, c8.x, r0.x, (neg)r3.z +mad.f32 r1.z, r1.z, r4.z, r3.x +mad.f32 r3.x, c12.x, r5.z, r5.y +mad.f32 r4.z, c12.x, r6.z, r6.x +mov.f32f32 r3.z, r3.z +max.f r1.z, c14.y, r1.z +mad.f32 r0.x, c12.x, r0.x, r3.z +(sy)mul.f r3.y, r3.y, r8.w +add.f r0.z, r0.z, r4.y +mov.f32f32 r8.y, r11.x +mov.f32f32 r8.w, r11.z +mov.f32f32 r6.x, r11.y +log2 r1.z, r1.z +(ss)mul.f r1.z, c12.y, r1.z max.f r0.z, r0.z, c14.y -mov.f32f32 r2.x, r2.x -bary.f r2.w, 10, r1.x -exp2 r0.x, r0.x -(ss)mul.f r2.z, r2.z, r0.x -mul.f r3.x, r4.z, r0.x -mad.f32 r0.y, r1.z, r0.y, r2.z -mad.f32 r2.z, r2.y, r5.y, r3.x -(ss)mul.f r0.x, r4.x, r0.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mad.f32 r0.x, r1.w, r6.z, r0.x +mov.f32f32 r6.y, r11.z +(rpt1)nop +sam.s (f32)(x)r7.w, r8.y, s#5, t#5 min.f r0.z, r0.z, c14.z -mul.f r0.y, r0.y, r0.w -mul.f r2.z, r2.z, r0.w -mov.f32f32 r0.x, r0.x +(sy)mad.f32 r0.y, r0.y, r7.w, r3.y +exp2 r1.z, r1.z +(ss)mul.f r3.y, r7.y, r1.z +mul.f r3.z, r3.w, r1.z +mad.f32 r3.y, r7.x, r3.x, r3.y +sam.s (f32)(x)r5.y, r5.w, s#5, t#5 +add.f r2.z, r2.z, c14.z +mad.f32 r3.z, r7.z, r4.z, r3.z +(ss)mul.f r1.z, r4.x, r1.z add.f r3.x, c18.y, (neg)r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -mad.f32 r0.y, c7.z, r1.z, r0.y -mad.f32 r1.z, c7.y, r2.y, r2.z -mul.f r0.x, r0.x, r0.w -mul.f r0.w, r3.x, c10.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r0.x -add.f r2.y, c18.y, (neg)r0.z -mul.f r0.y, r0.z, r0.y -mul.f r1.z, r0.z, r1.z -mad.f32 r0.x, c7.x, r1.w, r0.x -mul.f r1.w, r2.y, c10.y -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.x, r0.x -add.f r1.z, r1.z, r1.w -add.f r1.w, c18.y, (neg)r0.z -mov.f32f32 r2.y, r0.w -bary.f (ei)r0.w, 11, r1.x -mul.f r0.x, r0.z, r0.x -mul.f r0.z, r1.w, c10.x -mov.f32f32 r6.y, r2.x -mov.f32f32 r0.w, r0.w +mul.f r2.x, r2.x, r2.z +mad.f32 r0.x, r0.w, r0.x, r1.z +add.f r0.w, c18.y, (neg)r0.z +add.f r1.z, c18.y, (neg)r0.z +(sy)mad.f32 r0.y, r2.x, r5.y, r0.y +sam.s (f32)(x)r3.w, r11.x, s#5, t#5 +mul.f r2.x, r2.y, r2.z +mul.f r2.y, r3.x, c10.z +mul.f r0.w, r0.w, c10.y +mul.f r1.z, r1.z, c10.x +(sy)mad.f32 r0.y, r2.x, r3.w, r0.y +bary.f r3.x, 11, r1.x +bary.f r3.w, 8, r1.x +bary.f (ei)r4.x, 9, r1.x +mul.f r0.y, c16.w, r0.y mov.f32f32 r1.x, c14.y (rpt1)nop -mov.f32f32 r2.z, r0.w -add.f r0.x, r0.x, r0.z -sam (f32)(w)r2.w, r6.x, s#1, t#1 -(sy)add.f r0.z, c14.z, (neg)r3.z -(rpt3)nop -sam (f32)(w)r1.w, r2.y, s#2, t#2 -(sy)cmps.f.lt r0.w, r2.z, c17.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r2.z -nop -cov.u32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -nop -cmps.f.ne r0.w, r0.w, c14.y -mov.f32f32 r0.z, r0.z -(rpt1)nop -sel.b32 r0.w, r1.x, r0.w, r1.y -(ss)mov.f32f32 r2.z, r0.z -(rpt1)nop -mul.f r0.y, r0.y, r0.w -mul.f r0.z, r1.z, r0.w -mul.f r0.x, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r1.w, r0.x +mov.f32f32 r1.y, r0.y +mul.f r0.x, r0.x, r0.y +sam (f32)(w)r5.y, r2.w, s#2, t#2 +(sy)cmps.f.lt r0.y, r6.x, c17.x +sam (f32)(w)r3.w, r3.w, s#1, t#1 +mad.f32 r0.x, c7.x, r1.w, r0.x +mul.f r1.w, r3.y, r1.y +mul.f r1.y, r3.z, r1.y +mad.f32 r1.w, c7.z, r5.x, r1.w +mad.f32 r1.y, c7.y, r4.w, r1.y +mul.f r0.x, r0.z, r0.x +cov.u32f32 r0.y, r0.y +mul.f r1.w, r0.z, r1.w +mul.f r0.z, r0.z, r1.y +add.f r0.x, r0.x, r1.z +cmps.f.ne r0.y, r0.y, c14.y +add.f r1.y, r1.w, r2.y +add.f r0.z, r0.z, r0.w +(sy)(ss)add.f r2.w, c14.z, (neg)r4.z +sel.b32 r0.y, r1.x, r0.y, r6.x +(rpt2)nop +mul.f r2.z, r1.y, r0.y +mul.f r2.y, r0.z, r0.y +mul.f r2.x, r0.x, r0.y end -nop -nop -nop -; FRAG: outputs: r1.w (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r7.z (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1) r6.x (5:12,cm=f,il=20,b=1) r4.z (5:13,cm=f,il=24,b=1) r2.w (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 535 instructions, 0 half, 12 full -; pos (bary): r1.x -; color: r1.w -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r10.z (5:10,cm=f,il=12,b=1) r2.y (5:11,cm=f,il=16,b=1) r4.z (5:12,cm=f,il=20,b=1) r5.z (5:13,cm=f,il=24,b=1) r2.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 354 instructions, 0 half, 15 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm index 83ef8e1..494f814 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm @@ -1,210 +1,143 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in12 -@in(r2.z) in13 -@out(r9.w) out0 -@out(r10.x) out1 -@out(r10.y) out2 -@out(r10.z) out3 -@out(r5.w) out4 -@out(r6.x) out5 -@out(r6.y) out6 -@out(r6.z) out7 -@out(r4.x) out8 -@out(r4.y) out9 -@out(r4.z) out10 -@out(r4.w) out11 -@out(r2.w) out12 -@out(r3.x) out13 -@out(r3.y) out14 -@out(r3.z) out15 -@out(r7.w) out16 -@out(r8.x) out17 -@out(r8.y) out18 -@out(r8.z) out19 -@out(r6.w) out20 -@out(r7.x) out21 -@out(r7.y) out22 -@out(r7.z) out23 -@out(r8.w) out24 -@out(r9.x) out25 -@out(r9.y) out26 -@out(r9.z) out27 -@out(r10.w) out28 -@out(r11.x) out29 -@out(r11.y) out30 -@out(r11.z) out31 -(sy)(ss)add.f r2.w, c4.x, (neg)r0.x -mul.f r3.x, r0.w, r0.w -mul.f r3.y, c8.w, r0.x -mul.f r3.z, c8.z, r0.x -mul.f r3.w, r2.w, r2.w -add.f r4.x, c4.y, (neg)r0.y -add.f r3.x, c13.x, (neg)r3.x -mad.f32 r3.y, c9.w, r0.y, r3.y -mad.f32 r3.z, c9.z, r0.y, r3.z -mad.f32 r3.w, r4.x, r4.x, r3.w -mov.f32f32 r3.x, r3.x -mad.f32 r3.y, c10.w, r0.z, r3.y -mad.f32 r3.z, c10.z, r0.z, r3.z -mov.f32f32 r3.w, r3.w -add.f r4.y, c4.z, (neg)r0.z -mul.f r4.z, r3.x, r3.x -mul.f r4.w, r1.x, r0.w -add.f r3.y, r3.y, c11.w -mad.f32 r3.w, r4.y, r4.y, r3.w -add.f r3.z, r3.z, c11.z -mul.f r5.x, c8.y, r0.x -mul.f r5.y, c8.x, r0.x -add.f r4.w, c13.y, (neg)r4.w -mul.f r2.x, r2.x, c6.z -mov.f32f32 r3.y, r3.y -rsq r3.w, r3.w -(ss)mov.f32f32 r3.w, r3.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r3.z, r3.z -mad.f32 r5.x, c9.y, r0.y, r5.x -mad.f32 r2.w, r2.w, r3.w, (neg)c5.x -mad.f32 r4.z, r4.w, r4.w, r4.z -mad.f32 r4.x, r4.x, r3.w, (neg)c5.y -mad.f32 r3.w, r4.y, r3.w, (neg)c5.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.y, r4.z -mul.f r4.z, r1.y, r0.w -mov.f32f32 r4.x, r4.x -mul.f r5.z, r2.w, r2.w -mov.f32f32 r3.w, r3.w -add.f r4.z, c13.y, (neg)r4.z -mad.f32 r5.z, r4.x, r4.x, r5.z -mov.f32f32 r6.z, r3.y -mov.f32f32 r6.y, r3.z -mad.f32 r3.y, c10.y, r0.z, r5.x -mov.f32f32 r3.z, r5.z -mov.f32f32 r4.z, r4.z -mad.f32 r3.z, r3.w, r3.w, r3.z -add.f r3.y, r3.y, c11.y -mad.f32 r5.x, c9.x, r0.y, r5.y -mov.f32f32 r2.x, r2.x -mad.f32 r5.x, c10.x, r0.z, r5.x -mul.f r5.y, c0.w, r0.x -mul.f r5.z, c0.z, r0.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -mad.f32 r4.y, r4.z, r4.z, r4.y -mul.f r3.y, r3.y, c12.y -add.f r5.x, r5.x, c11.x -mul.f r3.w, r3.w, r3.z -mul.f r4.x, r4.x, r3.z -mul.f r2.w, r2.w, r3.z -mov.f32f32 r6.x, r3.y -mov.f32f32 r3.y, r3.w -mov.f32f32 r3.z, r4.x -mov.f32f32 r2.w, r2.w -rsq r3.w, r4.y -(ss)mov.f32f32 r3.w, r3.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -nop -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -(rpt1)nop -mov.f32f32 r7.x, r3.y -mov.f32f32 r6.w, r3.z -mov.f32f32 r8.z, r2.w -mul.f r2.w, r3.x, r3.w -mul.f r3.x, r4.z, r3.w -mul.f r3.y, r4.w, r3.w -mul.f r3.z, r5.x, c12.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r5.w, r3.z -mul.f r3.z, r1.x, r2.w -mul.f r3.w, r0.w, r3.x -mad.f32 r3.z, r0.w, r3.y, (neg)r3.z -mad.f32 r3.w, r1.y, r2.w, (neg)r3.w -mul.f r4.x, r1.y, r3.y -(ss)mov.f32f32 r4.y, r3.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mad.f32 r3.x, r1.x, r3.x, (neg)r4.x -mov.f32f32 r4.z, r4.y -mov.f32f32 r8.y, r3.z -mov.f32f32 r8.x, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.w, r2.w -nop -mov.f32f32 r7.w, r3.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r4.x, r2.w -mov.f32f32 r2.x, r2.x -mad.f32 r2.w, c1.w, r0.y, r5.y -mad.f32 r3.x, c1.z, r0.y, r5.z -mul.f r3.y, c0.y, r0.x -mov.f32f32 r8.w, r2.x -mad.f32 r2.x, c2.w, r0.z, r2.w -mad.f32 r2.w, c2.z, r0.z, r3.x -mad.f32 r3.x, c1.y, r0.y, r3.y -mul.f r3.y, c0.x, r0.x -add.f r2.x, r2.x, c3.w -add.f r2.w, r2.w, c3.z -mad.f32 r3.x, c2.y, r0.z, r3.x -mad.f32 r3.y, c1.x, r0.y, r3.y -mov.f32f32 r10.z, r2.x -mov.f32f32 r10.y, r2.w -add.f r2.x, r3.x, c3.y -mad.f32 r2.w, c2.x, r0.z, r3.y -mul.f r1.w, r1.w, c6.y -mul.f r1.z, r1.z, c6.x -mov.f32f32 r10.x, r2.x -add.f r2.x, r2.w, c3.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.w, c7.x, r0.z, c7.y -mov.f32f32 r9.w, r2.x -mov.f32f32 r7.z, r1.w -mov.f32f32 r7.y, r1.z -mov.f32f32 r1.z, r2.w -mad.f32 r1.w, c7.x, r0.x, c7.y -mov.f32f32 r2.x, (0.000000) -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r11.z, r2.x -mov.f32f32 r11.y, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r1.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r1.y -mov.f32f32 r11.x, r0.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r10.w, r0.x -mov.f32f32 r9.z, r0.z -mov.f32f32 r0.x, r0.w -mov.f32f32 r9.y, r0.y -mov.f32f32 r0.y, r2.z -mov.f32f32 r0.z, r2.y -mov.f32f32 r9.x, r0.x -mov.f32f32 r0.x, c13.z -mov.f32f32 r3.x, r0.y +@in(r7.x) in0 +@in(r7.y) in1 +@in(r7.z) in2 +@in(r6.y) in4 +@in(r6.z) in5 +@in(r6.w) in6 +@in(r8.x) in8 +@in(r8.y) in9 +@in(r8.z) in10 +@in(r3.x) in12 +@in(r3.y) in13 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@out(r7.x) out28 +@out(r7.y) out29 +@out(r7.z) out30 +@out(r7.w) out31 +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r7.x +mul.f r0.y, r6.y, r6.y +mul.f r0.z, c8.y, r7.x +mul.f r0.w, c8.x, r7.x +mul.f r1.x, r0.x, r0.x +add.f r1.z, c4.y, (neg)r7.y +add.f r0.y, c13.x, (neg)r0.y +mad.f32 r0.z, c9.y, r7.y, r0.z +mad.f32 r0.w, c9.x, r7.y, r0.w +mad.f32 r1.x, r1.z, r1.z, r1.x +add.f r1.w, c4.z, (neg)r7.z +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.z, r0.z +mad.f32 r0.w, c10.x, r7.z, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.z, r6.y +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.w, r6.y +mul.f r2.z, c8.w, r7.x +rsq r1.x, r1.x +(ss)mov.f32f32 r2.w, r1.x +add.f r3.z, c13.y, (neg)r1.y +mad.f32 r0.x, r0.x, r1.x, (neg)c5.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r2.w, (neg)c5.y +mov.f32f32 r1.z, r3.z +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r2.w, (neg)c5.z mov.f32f32 r2.w, r0.z +mad.f32 r2.x, r3.z, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r2.w, r1.x +mov.f32f32 r3.z, r1.w +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r7.y, r2.z +mad.f32 r0.z, r1.w, r3.z, r0.z +mad.f32 r1.w, c10.w, r7.z, r2.z +mul.f r2.z, c8.z, r7.x +mul.f r3.w, c0.w, r7.x +mul.f r4.x, c0.z, r7.x +mul.f r5.z, c0.y, r7.x +mul.f r5.w, c0.x, r7.x +rsq r0.z, r0.z +(ss)mov.f32f32 r4.y, r0.z +mul.f r4.w, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r5.y, r3.z, r4.y +mul.f r5.x, r2.w, r4.y +(ss)mad.f32 r0.z, c9.z, r7.y, r2.z +mad.f32 r2.y, c1.w, r7.y, r3.w +mad.f32 r2.w, c1.z, r7.y, r4.x +rsq r0.x, r0.x +(ss)mov.f32f32 r3.z, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.z, r0.z +mad.f32 r0.y, c2.w, r7.z, r2.y +mul.f r2.z, r0.w, r3.z +mul.f r2.y, r1.z, r3.z +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r3.z, r2.y +mul.f r3.w, r6.z, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r6.y, r0.x +mul.f r4.x, r6.w, r3.z +mad.f32 r4.y, r6.w, r0.z, (neg)r0.y +mad.f32 r4.x, r6.z, r0.x, (neg)r4.x +mad.f32 r4.z, r6.y, r3.z, (neg)r3.w +mad.f32 r0.x, c2.z, r7.z, r2.w +mad.f32 r0.y, c1.y, r7.y, r5.z +mad.f32 r2.w, c1.x, r7.y, r5.w +mad.f32 r0.y, c2.y, r7.z, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.z, r2.w nop -mov.f32f32 r4.w, r0.x +add.f r0.y, r0.y, c3.y +mov.f32f32 r7.w, (0.000000) +add.f r0.x, r0.x, c3.x +mul.f r6.x, r8.z, c6.z +mul.f r5.w, r8.y, c6.y +mul.f r5.z, r8.x, c6.x +mad.f32 r3.w, c7.x, r7.z, c7.y +mad.f32 r3.z, c7.x, r7.x, c7.y +mov.f32f32 r2.w, c13.z end nop -; VERT: outputs: r9.w (0:0) r5.w (5:9) r4.x (5:10) r2.w (5:11) r7.w (5:12) r6.w (5:13) r8.w (5:14) r10.w (5:15) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=3,il=20,b=0) -; VERT: 160 instructions, 0 half, 12 full -; pos: r9.w +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) +; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=3,il=20,b=0) +; VERT: 91 instructions, 0 half, 9 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm index bb354e6..95d5c75 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm @@ -4,214 +4,139 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r4.x) out0 -@out(r4.y) out1 -@out(r4.z) out2 -@out(r4.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xba03126f +@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3fb8aa65 +@const(c11.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 0, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 1, r1.x -bary.f r1.z, 4, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 6, r1.x +bary.f r1.z, 6, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 7, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 2, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.x, r2.y +add.f r3.w, r2.z, c9.w +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 5, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c9.z +mul.f r0.z, r0.z, c7.x +sam (f32)(w)r4.x, r1.z, s#2, t#2 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c9.x, r2.z +add.f r2.z, c10.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.y, (neg)r1.z -add.f r3.z, c10.y, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r3.w, c9.x, r0.z -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(w)r3.w, r3.x, s#1, t#1 -(sy)add.f r2.y, c9.z, (neg)r4.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c9.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c10.z, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.z, r0.w -mul.f r1.w, r1.w, c4.z mul.f r0.y, r0.y, c10.w -mul.f r0.x, r0.x, c4.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.x -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.x, r3.y -mov.f32f32 r4.z, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r5.y, r3.x -mul.f r1.w, r2.w, c4.w -mul.f r0.w, r0.w, c4.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r6.x, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.x, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c9.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r4.x, r3.x -bary.f r3.x, 2, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.x, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.z, r1.w -mov.f32f32 r4.w, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.x, r2.w -mov.f32f32 r4.y, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r5.w, r1.w -mov.f32f32 r5.x, r3.x -mov.f32f32 r6.y, r0.x -mov.f32f32 r0.x, r2.w -mov.f32f32 r0.w, r2.y -sam.s (f32)(x)r1.w, r3.w, s#3, t#3 -(sy)mov.f32f32 r1.w, r1.w -min.f r0.y, r0.y, c9.z -sam.s (f32)(x)r2.y, r5.y, s#3, t#3 -(sy)mov.f32f32 r2.y, r2.y -sam.s (f32)(x)r2.w, r4.z, s#3, t#3 -(sy)mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.z -add.f r3.y, c12.y, (neg)r0.y -add.f r3.z, c12.y, (neg)r0.y -(ss)add.f r3.w, c12.y, (neg)r0.y -mul.f r4.x, r2.z, r3.x -mul.f r3.y, r3.y, c6.z -mul.f r3.z, r3.z, c6.y -mul.f r3.w, r3.w, c6.x -mul.f r1.w, r4.x, r1.w -add.f r1.z, r1.z, c9.z -mov.f32f32 r6.z, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.z, r0.z, c9.z -mul.f r0.w, r1.z, r3.x -mov.f32f32 r2.x, r2.x -bary.f r3.x, 8, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.w, r0.w, r2.w, r1.w -sam.s (f32)(x)r1.w, r6.x, s#3, t#3 +add.f r0.z, c10.x, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c4.z +add.f r0.x, c10.z, r0.w +mul.f r4.x, r0.z, c4.z +add.f r0.z, c10.x, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c4.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c4.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c12.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#3, t#3 +add.f r0.z, c10.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#3, t#3 +mul.f r0.x, r0.x, c9.z +add.f r0.w, r2.y, c9.z +mul.f r0.y, r0.y, c7.y +(ss)nop +sam.s (f32)(x)r5.x, r5.w, s#3, t#3 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#3, t#3 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c11.y +bary.f r3.z, 4, r1.x +mul.f r0.y, r0.y, r5.x +max.f r0.x, r0.x, c9.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y +min.f r0.x, r0.x, c9.z +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c9.y (rpt1)nop -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mul.f r2.z, r2.z, r0.z -mov.f32f32 r4.w, r0.x -mov.f32f32 r5.x, r2.x -mul.f r0.x, r1.z, r0.z -mad.f32 r0.z, r2.z, r1.w, r0.w -bary.f r0.w, 7, r1.x -mov.f32f32 r1.z, r3.x +mul.f r0.y, c11.x, r0.y +bary.f r1.z, 8, r1.x bary.f r1.w, 9, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, r0.x, r2.y, r0.z -mov.f32f32 r2.x, r1.z -mov.f32f32 r0.z, r1.w -mov.f32f32 r5.y, r0.w -mov.f32f32 r0.x, r0.x -bary.f r0.w, 12, r1.x -bary.f r1.z, 11, r1.x -bary.f (ei)r1.x, 10, r1.x -mul.f r0.x, c11.x, r0.x -mov.f32f32 r2.y, r0.z -sam (f32)(w)r5.x, r5.x, s#2, t#2 -(sy)cmps.f.lt r0.z, r5.w, c11.y -mov.f32f32 r1.y, r5.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.x, r1.x -sam (f32)(xyz)r1.w, r2.x, s#0, t#0 -cov.u32f32 r0.z, r0.z -(sy)mul.f r0.w, r2.y, r0.w -mul.f r1.z, r2.x, r1.z -mul.f r1.x, r1.w, r1.x -cmps.f.ne r0.z, r0.z, c9.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mul.f r0.w, r0.w, r0.x -mul.f r1.z, r1.z, r0.x -mul.f r0.x, r1.x, r0.x -mov.f32f32 r1.x, c9.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, c5.z, r2.y, r0.w -mad.f32 r1.z, c5.y, r2.x, r1.z -mov.f32f32 r0.x, r0.x -sel.b32 r0.z, r1.x, r0.z, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.z -mad.f32 r0.x, c5.x, r1.w, r0.x -nop -mul.f r0.w, r0.y, r0.w -mul.f r1.x, r0.y, r1.x -mov.f32f32 r0.x, r0.x -nop -add.f r0.w, r0.w, r3.y -add.f r1.x, r1.x, r3.z -mul.f r0.x, r0.y, r0.x +add.f r0.w, c12.y, (neg)r0.x +mov.f32f32 r2.x, r0.y +add.f r2.y, c12.y, (neg)r0.x +add.f r2.z, c12.y, (neg)r0.x +(rpt1)nop +(ss)nop +sam (f32)(xyz)r3.w, r1.z, s#0, t#0 +(ss)bary.f r1.z, 12, r1.x +bary.f r1.w, 11, r1.x +bary.f r2.w, 10, r1.x +mul.f r0.w, r0.w, c6.z +(sy)mul.f r1.z, r4.y, r1.z +mul.f r1.w, r4.x, r1.w +mul.f r2.w, r3.w, r2.w +mul.f r2.y, r2.y, c6.y +mul.f r1.z, r1.z, r2.x +mul.f r1.w, r1.w, r2.x +mad.f32 r1.z, c5.z, r4.y, r1.z +mad.f32 r1.w, c5.y, r4.x, r1.w +mul.f r0.y, r2.w, r0.y +mul.f r2.x, r2.z, c6.x +mul.f r1.z, r0.x, r1.z +mul.f r1.w, r0.x, r1.w +mad.f32 r0.y, c5.x, r3.w, r0.y +mov.f32f32 r2.z, c9.y +add.f r0.w, r1.z, r0.w +add.f r1.z, r1.w, r2.y nop -mul.f r0.y, r0.w, r0.z -mul.f r0.w, r1.x, r0.z -add.f r0.x, r0.x, r3.w +sel.b32 r0.z, r2.z, r0.z, r4.w +mul.f r0.x, r0.x, r0.y +bary.f (ei)r3.w, 5, r1.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, r0.z +mul.f r2.z, r0.w, r0.z +mul.f r2.y, r1.z, r0.z +add.f r0.x, r0.x, r2.x +(rpt1)nop +sam (f32)(w)r0.w, r3.z, s#1, t#1 +(sy)add.f r2.w, c9.z, (neg)r1.z +mul.f r2.x, r0.x, r0.z +end nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x nop -mov.f32f32 r4.z, r0.y -mov.f32f32 r4.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r4.x, r0.x -end nop -; FRAG: outputs: r4.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r0.x (5:10,cm=f,il=12,b=1) r0.w (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) -; FRAG: 202 instructions, 0 half, 7 full -; pos (bary): r1.x -; color: r4.x -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) +; FRAG: 124 instructions, 0 half, 7 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm index e895f8d..15beb9d 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm @@ -4,334 +4,227 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r3.z) out0 -@out(r3.w) out1 -@out(r4.x) out2 -@out(r4.y) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x3f800000, 0x40000000 +@const(c15.x) 0xbf800000, 0xba03126f, 0xbf000000, 0x3f800000 +@const(c16.x) 0x3fb8aa65, 0x3de38866, 0x3cf5c28f, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 12, r1.x -bary.f r0.y, 0, r1.x +bary.f r1.z, 0, r1.x add.f r0.w, r0.w, c14.y -bary.f r1.z, 1, r1.x -mov.f32f32 r1.w, r0.x -add.f r2.x, r0.y, c15.z -bary.f r2.y, 8, r1.x +bary.f r1.w, 1, r1.x +mov.f32f32 r2.x, r0.x +bary.f r0.y, 13, r1.x add.f r2.z, r1.z, c15.z -mov.f32f32 r2.w, r1.w -bary.f r1.w, 13, r1.x -floor.f r3.x, r2.x +add.f r2.w, r1.w, c15.z +bary.f r3.x, 10, r1.x +mov.f32f32 r2.y, r0.y +floor.f r3.z, r2.z rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.y, r2.z -mov.f32f32 r3.z, r1.w -add.f r2.x, r2.x, (neg)r3.x +floor.f r3.w, r2.w +bary.f r3.y, 11, r1.x +add.f r2.z, r2.z, (neg)r3.z (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.z, (neg)r3.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r2.x, r2.x +sam (f32)(xyz)r4.x, r2.x, s#3, t#3 +(sy)(ss)mad.f32 r0.w, c14.w, r4.x, c15.x +absneg.f r2.x, (neg)c11.x +mov.f32f32 r2.y, r2.z +add.f r2.w, r2.w, (neg)r3.w +mov.f32f32 r3.z, r0.w +bary.f r3.w, 4, r1.x +mul.f r4.x, c14.x, r2.y +mul.f r2.x, r2.x, c11.x +mov.f32f32 r4.w, r2.w +mul.f r3.w, r3.w, r3.z +mad.f32 r4.y, c14.w, r4.y, c15.x +add.f r1.z, r1.z, (neg)r4.x +mul.f r2.x, r2.x, r0.z mov.f32f32 r0.z, r0.z -absneg.f r2.z, (neg)c11.x -mov.f32f32 r0.w, r0.w -mul.f r3.y, c14.x, r2.x -mov.f32f32 r2.y, r2.y -sam (f32)(xyz)r3.z, r2.w, s#3, t#3 -(sy)(ss)mad.f32 r2.w, c14.w, r3.z, c15.x -mul.f r2.z, r2.z, c11.x -mov.f32f32 r3.x, r3.y -mul.f r3.y, c14.x, r0.w -mov.f32f32 r2.w, r2.w -bary.f r3.z, 4, r1.x -add.f r0.y, r0.y, (neg)r3.x -mul.f r2.z, r2.z, r0.z -mov.f32f32 r3.x, r3.y -mul.f r3.y, r3.z, r2.w -mad.f32 r3.z, c14.w, r3.w, c15.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -add.f r1.z, r1.z, (neg)r3.x -mov.f32f32 r3.x, r3.z -bary.f r3.z, 14, r1.x -add.f r3.w, c15.z, r0.y -mul.f r0.z, r2.z, r0.z -add.f r0.y, c15.w, r0.y -mov.f32f32 r2.z, r3.z -bary.f r3.z, 7, r1.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mul.f r2.z, r2.z, (neg)r3.z -mul.f r3.w, r3.w, c6.z +mov.f32f32 r4.x, r4.y +bary.f r5.x, 14, r1.x +bary.f r5.y, 7, r1.x +mov.f32f32 r5.z, r1.z +mul.f r0.z, r2.x, r0.z +mul.f r2.x, c14.x, r4.w +mul.f r5.x, r5.x, (neg)r5.y +add.f r5.z, c15.z, r5.z mul.f r0.z, r0.z, c16.x -mul.f r0.y, r0.y, c6.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.z, r0.y -mad.f32 r2.z, r2.z, r3.x, r3.y -mov.f32f32 r4.w, r4.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r4.z -mov.f32f32 r2.z, r2.z -mad.f32 r4.x, c14.w, r4.x, c15.x -add.f r4.y, c15.z, r1.z -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r5.z, r3.y -mov.f32f32 r3.y, r4.x -bary.f r4.x, 23, r1.x -mov.f32f32 r4.y, r4.y -add.f r4.z, c17.y, (neg)r0.z +add.f r1.w, r1.w, (neg)r2.x +mad.f32 r2.x, r5.x, r4.x, r3.w +mad.f32 r3.w, c14.w, r4.z, c15.x +mul.f r5.z, r5.z, c6.z +mov.f32f32 r4.z, r1.w add.f r1.z, c15.w, r1.z -mov.f32f32 r4.x, r4.x -mul.f r4.y, r4.y, c6.w -mul.f r4.z, r4.z, c11.y -mul.f r0.z, r0.z, c14.z -mad.f32 r2.z, r4.x, r3.y, r2.z -mov.f32f32 r4.x, r4.y -mov.f32f32 r1.z, r1.z -add.f r0.z, r0.z, r4.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r5.x, r4.x -bary.f r4.x, 2, r1.x -mov.f32f32 r0.z, r0.z -mul.f r4.z, r2.z, r2.z -bary.f r5.y, 5, r1.x -add.f r4.x, r4.x, c15.y -mov.f32f32 r0.z, r0.z -mul.f r1.z, r1.z, c6.w -mul.f r5.w, r5.y, r2.w -bary.f r5.y, 15, r1.x -mov.f32f32 r6.x, r4.x +mov.f32f32 r5.x, r3.w +bary.f r5.w, 23, r1.x +mov.f32f32 r6.y, r5.z +exp2 r0.z, r0.z +(ss)mov.f32f32 r6.x, r0.z +add.f r4.z, c15.z, r4.z +mad.f32 r2.x, r5.w, r5.x, r2.x +mul.f r7.x, r1.z, c6.z +add.f r1.z, c15.w, r1.w +add.f r1.w, c17.y, (neg)r6.x +mov.f32f32 r7.w, r2.x +mul.f r8.y, r4.z, c6.w +mov.f32f32 r8.x, r7.x +mul.f r7.y, r1.z, c6.w +mul.f r1.z, r2.x, r7.w +bary.f r2.x, 5, r1.x +mov.f32f32 r6.z, r8.y +mul.f r1.w, r1.w, c11.y +(ss)mul.f r0.z, r0.z, c14.z +mul.f r2.x, r2.x, r3.z +bary.f r3.z, 15, r1.x +bary.f r4.z, 2, r1.x +add.f r0.z, r0.z, r1.w +mov.f32f32 r5.w, r7.y +mul.f r1.w, r3.z, (neg)r5.y +add.f r7.z, r4.z, c15.y max.f r0.z, r0.z, c14.y -mov.f32f32 r6.y, r1.z -mov.f32f32 r6.z, r5.y -mov.f32f32 r5.y, r6.x +add.f r2.y, c14.w, (neg)r2.y +mad.f32 r1.w, r1.w, r4.x, r2.x +bary.f r2.x, 24, r1.x +mov.f32f32 r6.w, r7.z min.f r0.z, r0.z, c14.z -mov.f32f32 r6.x, r6.y -mul.f r6.y, r6.z, (neg)r3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -add.f r6.z, c17.y, (neg)r0.z -mov.f32f32 r6.y, r6.y -sam.s (f32)(x)r4.w, r4.w, s#5, t#5 -(sy)(ss)mov.f32f32 r4.w, r4.w -add.f r5.x, c17.y, (neg)r0.z -mul.f r5.y, r6.z, c10.z -mad.f32 r5.w, r6.y, r3.x, r5.w -mov.f32f32 r4.w, r4.w -add.f r6.y, c14.w, (neg)r2.x -mul.f r5.x, r5.x, c10.y -mov.f32f32 r6.z, r5.w -bary.f r5.w, 24, r1.x -mov.f32f32 r6.y, r6.y -add.f r6.w, c14.w, (neg)r0.w -add.f r7.x, c17.y, (neg)r0.z -mov.f32f32 r7.y, r5.w -mov.f32f32 r5.w, r6.x -mov.f32f32 r6.x, r4.x -mov.f32f32 r7.z, r0.y -mad.f32 r0.y, r7.y, r3.y, r6.z -mov.f32f32 r6.z, r6.w -mul.f r6.w, r7.x, c10.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r0.y, r0.y -mul.f r7.x, r6.y, r6.z -mov.f32f32 r4.y, r4.y -mov.f32f32 r8.y, r3.w -mad.f32 r3.w, r0.y, r0.y, r4.z -mul.f r4.z, r7.x, r4.w -mov.f32f32 r7.w, r4.y -mov.f32f32 r4.y, r4.x -mov.f32f32 r3.w, r3.w -bary.f r4.w, 6, r1.x -sam.s (f32)(x)r5.z, r5.z, s#5, t#5 -mov.f32f32 r1.z, r1.z -mov.f32f32 r8.x, r4.y -(sy)mov.f32f32 r4.y, r5.z -mul.f r2.w, r4.w, r2.w -bary.f r4.w, 16, r1.x -mov.f32f32 r8.z, r1.z -mov.f32f32 r1.z, r4.x -(ss)mov.f32f32 r5.z, r2.y -mov.f32f32 r2.y, r4.w -sam.s (f32)(x)r4.x, r7.z, s#5, t#5 -(sy)mov.f32f32 r4.x, r4.x -add.f r2.x, r2.x, c14.z -mov.f32f32 r8.w, r1.z -mul.f r1.z, r2.y, (neg)r3.z -bary.f r2.y, 9, r1.x -mul.f r3.z, r2.x, r6.z -add.f r0.w, r0.w, c14.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -bary.f r4.w, 10, r1.x -mad.f32 r3.z, r3.z, r4.x, r4.z -mad.f32 r1.z, r1.z, r3.x, r2.w -sam.s (f32)(x)r2.w, r8.y, s#5, t#5 -mov.f32f32 r5.w, r2.y -mul.f r2.y, r6.y, r0.w -mov.f32f32 r3.x, r3.z -mov.f32f32 r1.z, r1.z -bary.f r3.z, 25, r1.x -(sy)mov.f32f32 r2.w, r2.w -mov.f32f32 r4.x, r4.w -sam (f32)(w)r5.z, r5.z, s#1, t#1 -(sy)add.f r4.z, c14.z, (neg)r6.y -mov.f32f32 r3.z, r3.z -mad.f32 r2.y, r2.y, r2.w, r3.x -mov.f32f32 r2.w, r4.x -mul.f r0.w, r2.x, r0.w -mad.f32 r1.z, r3.z, r3.y, r1.z -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r4.z -bary.f r3.x, 11, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, r0.w, r4.y, r2.x -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r3.x -mad.f32 r3.x, r1.z, r1.z, r3.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x +mov.f32f32 r8.z, r7.z +mad.f32 r1.w, r2.x, r5.x, r1.w +mov.f32f32 r6.x, r7.z +add.f r2.x, c17.y, (neg)r0.z +add.f r3.z, c17.y, (neg)r0.z +mov.f32f32 r4.x, r1.w +sam.s (f32)(x)r8.w, r6.y, s#5, t#5 +add.f r4.z, c17.y, (neg)r0.z +mov.f32f32 r5.x, r2.y +add.f r4.w, c14.w, (neg)r4.w +mad.f32 r1.z, r1.w, r4.x, r1.z +bary.f r1.w, 6, r1.x +mul.f r2.x, r2.x, c10.z +mul.f r3.z, r3.z, c10.y +mul.f r4.z, r4.z, c10.x +mul.f r0.w, r1.w, r0.w +bary.f r1.w, 16, r1.x +(ss)mov.f32f32 r6.y, r4.w +sam.s (f32)(x)r9.x, r8.x, s#5, t#5 +sam.s (f32)(x)r9.y, r5.z, s#5, t#5 +sam.s (f32)(x)r6.z, r7.x, s#5, t#5 +add.f r2.z, r2.z, c14.z +add.f r2.w, r2.w, c14.z +mul.f r1.w, r1.w, (neg)r5.y +mul.f r5.x, r5.x, r6.y +mul.f r4.w, r2.z, r4.w +mul.f r2.y, r2.y, r2.w +mad.f32 r0.w, r1.w, r4.y, r0.w +bary.f r1.w, 25, r1.x +(sy)mul.f r4.y, r5.x, r8.w +mul.f r2.z, r2.z, r2.w +mad.f32 r2.w, r4.w, r9.x, r4.y +mad.f32 r0.w, r1.w, r3.w, r0.w +mad.f32 r1.w, r2.y, r9.y, r2.w +(ss)nop +sam (f32)(w)r4.w, r3.x, s#2, t#2 +(sy)cmps.f.lt r2.y, r5.z, c16.z +mov.f32f32 r2.w, r0.x +(ss)mov.f32f32 r3.y, r0.w +mad.f32 r1.w, r2.z, r6.z, r1.w +cov.u32f32 r2.y, r2.y +mov.f32f32 r3.x, r0.y +mad.f32 r1.z, r3.y, r3.y, r1.z +mul.f r1.w, c16.y, r1.w +cmps.f.ne r2.y, r2.y, c14.y (rpt3)nop -rsq r3.x, r3.x -(ss)mov.f32f32 r3.y, r3.x -mul.f r0.w, c16.y, r0.w -mov.f32f32 r4.y, r2.x -(ss)mov.f32f32 r3.x, r2.y -mul.f r2.x, r2.z, r3.y -mov.f32f32 r0.w, r0.w -mul.f r0.y, r0.y, r3.y -mul.f r1.z, r1.z, r3.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r0.x -sam (f32)(w)r5.z, r2.w, s#2, t#2 -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -mul.f r2.z, r2.x, r2.x -(ss)mul.f r2.w, (neg)c9.x, r2.x -mad.f32 r2.z, r0.y, r0.y, r2.z -mad.f32 r2.w, (neg)c9.y, r0.y, r2.w -(sy)cmps.f.lt r3.x, r6.y, c16.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.w, r2.w -mad.f32 r2.z, r1.z, r1.z, r2.z -mad.f32 r2.w, (neg)c9.z, r1.z, r2.w -cov.u32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.z, r2.y -mov.f32f32 r2.y, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.z, r1.z +(ss)mov.f32f32 r2.z, r1.z +mov.f32f32 r3.y, r1.w +mul.f r0.w, r0.w, r1.z +(ss)mov.f32f32 r1.z, c14.y +mul.f r3.w, r7.w, r2.z +mul.f r2.z, r4.x, r2.z +mov.f32f32 r4.x, r0.w +sel.b32 r1.z, r1.z, r2.y, r5.z +mov.f32f32 r2.y, r3.w +mul.f r3.w, (neg)c9.x, r3.w +mov.f32f32 r4.y, r2.z +mad.f32 r2.z, (neg)c9.y, r2.z, r3.w +mul.f r3.w, r2.y, r2.y +mad.f32 r0.w, (neg)c9.z, r0.w, r2.z +mad.f32 r2.z, r4.y, r4.y, r3.w +sam (f32)(xyz)r4.w, r2.w, s#4, t#4 +(sy)(ss)mul.f r3.x, c8.z, r5.y +mad.f32 r2.z, r4.x, r4.x, r2.z +max.f r0.w, r0.w, c14.y +bary.f r2.w, 20, r1.x +(rpt1)nop +mov.f32f32 r3.w, r0.w +bary.f r5.y, 21, r1.x rsq r2.z, r2.z -(ss)mov.f32f32 r2.z, r2.z -mov.f32f32 r2.w, r2.w -cmps.f.ne r3.x, r3.x, c14.y -nop -mul.f r2.x, r2.x, r2.z -max.f r2.w, r2.w, c14.y -mul.f r0.y, r0.y, r2.z -mul.f r1.z, r1.z, r2.z -mov.f32f32 r2.x, r2.x -bary.f r2.z, 17, r1.x -mov.f32f32 r2.w, r2.w -bary.f r4.w, 21, r1.x -bary.f r5.z, 20, r1.x -mul.f r2.x, r2.x, r2.z -mov.f32f32 r0.y, r0.y -bary.f r2.z, 18, r1.x +(ss)mov.f32f32 r5.z, r2.z bary.f r5.w, 22, r1.x -mad.f32 r6.x, c8.y, r2.w, (neg)r4.w -mad.f32 r6.y, c8.x, r2.w, (neg)r5.z -mad.f32 r0.y, r0.y, r2.z, r2.x -mad.f32 r2.x, c8.z, r2.w, (neg)r5.w -mov.f32f32 r2.z, r6.x -mov.f32f32 r2.w, r6.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -bary.f (ei)r1.x, 19, r1.x -mov.f32f32 r1.y, r2.x -mad.f32 r2.x, c12.x, r2.z, r4.w -mad.f32 r2.z, c12.x, r2.w, r5.z -mad.f32 r0.y, r1.z, r1.x, r0.y -mad.f32 r1.x, c12.x, r1.y, r5.w -mov.f32f32 r1.y, r2.x -mov.f32f32 r1.z, r2.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.x, c14.y -mov.f32f32 r4.w, r2.y -max.f r0.y, c14.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r0.x, r1.w -sel.b32 r1.w, r2.x, r3.x, r3.y -mov.f32f32 r0.y, r0.y +mad.f32 r6.x, c8.y, r3.w, (neg)r5.y +mad.f32 r3.w, c8.z, r3.w, (neg)r5.w +mul.f r2.y, r2.y, r5.z +bary.f r6.y, 17, r1.x +mad.f32 r5.y, c12.x, r6.x, r5.y +mul.f r4.y, r4.y, r5.z +(ss)mul.f r2.z, r4.x, r2.z +mul.f r2.y, r2.y, r6.y +bary.f r4.x, 18, r1.x +mad.f32 r3.w, c12.x, r3.w, r5.w +mad.f32 r0.w, c8.x, r0.w, (neg)r2.w +nop +mad.f32 r2.y, r4.y, r4.x, r2.y +bary.f r4.x, 19, r1.x +mad.f32 r0.w, c12.x, r0.w, r2.w +mul.f r4.y, c8.y, r5.x +mul.f r4.w, c8.x, r4.w +mad.f32 r2.y, r2.z, r4.x, r2.y +bary.f r2.z, 8, r1.x +bary.f (ei)r2.w, 9, r1.x +sam (f32)(xyz)r5.z, r0.x, s#0, t#0 +nop +(ss)max.f r0.x, c14.y, r2.y (rpt5)nop -log2 r0.y, r0.y -(ss)mul.f r0.y, c12.y, r0.y -sam (f32)(xyz)r2.z, r4.z, s#4, t#4 -(sy)mul.f r2.x, c8.y, r2.w -mul.f r2.w, c8.x, r2.z -mul.f r3.x, c8.z, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r0.x +log2 r0.x, r0.x +(ss)mul.f r0.x, c12.y, r0.x +sam (f32)(w)r6.y, r2.z, s#1, t#1 (rpt4)nop -exp2 r0.x, r0.y +(sy)(ss)add.f r2.w, c14.z, (neg)r7.x +exp2 r0.x, r0.x (ss)mul.f r0.y, r3.x, r0.x -sam (f32)(xyz)r5.z, r2.y, s#0, t#0 -mul.f r2.x, r2.x, r0.x -(sy)mad.f32 r0.y, r6.x, r1.x, r0.y -mul.f r0.x, r2.w, r0.x -mad.f32 r1.x, r5.w, r1.y, r2.x -mad.f32 r0.x, r5.z, r1.z, r0.x -mov.f32f32 r0.y, r0.y -(rpt2)nop -mul.f r0.y, r0.y, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.x, r0.x +mul.f r1.x, r4.y, r0.x +mad.f32 r0.y, r6.x, r3.w, r0.y +mad.f32 r1.x, r5.w, r5.y, r1.x +(ss)mul.f r0.x, r4.w, r0.x nop -mov.f32f32 r0.y, r0.y -mul.f r1.x, r1.x, r0.w +mul.f r0.y, r0.y, r3.y +mul.f r1.x, r1.x, r3.y mad.f32 r0.y, c7.z, r6.x, r0.y -mul.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.w, c7.y, r5.w, r0.w +mad.f32 r1.x, c7.y, r5.w, r1.x +mad.f32 r0.x, r5.z, r0.w, r0.x +nop mul.f r0.y, r0.z, r0.y +mul.f r0.w, r0.z, r1.x +mul.f r0.x, r0.x, r1.w +nop +add.f r0.y, r0.y, r2.x +add.f r0.w, r0.w, r3.z mad.f32 r0.x, c7.x, r5.z, r0.x -(rpt1)nop -add.f r0.y, r0.y, r5.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x nop -mul.f r0.y, r0.y, r1.w -mul.f r0.w, r0.z, r0.w +mul.f r2.z, r0.y, r1.z +mul.f r2.y, r0.w, r1.z mul.f r0.x, r0.z, r0.x -nop -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.w, r5.x -add.f r0.x, r0.x, r6.w -nop -mov.f32f32 r0.y, r0.y -mul.f r0.z, r0.z, r1.w -mul.f r0.x, r0.x, r1.w -nop -mov.f32f32 r4.x, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.x +(rpt2)nop +add.f r0.x, r0.x, r4.z +(rpt2)nop +mul.f r2.x, r0.x, r1.z end nop nop -; FRAG: outputs: r3.z (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.w (5:10,cm=f,il=12,b=1) r2.y (5:11,cm=f,il=16,b=1) r4.z (5:12,cm=f,il=20,b=1) r0.y (5:13,cm=f,il=24,b=1) r3.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 336 instructions, 0 half, 9 full -; pos (bary): r1.x -; color: r3.z -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r2.z (5:11,cm=f,il=16,b=1) r2.w (5:12,cm=f,il=20,b=1) r3.y (5:13,cm=f,il=24,b=1) r5.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 227 instructions, 0 half, 10 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm index d73d764..5a712c6 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm @@ -1,218 +1,151 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in12 -@in(r2.z) in13 -@out(r4.w) out0 -@out(r5.x) out1 -@out(r5.y) out2 -@out(r5.z) out3 -@out(r3.y) out4 -@out(r3.z) out5 -@out(r3.w) out6 -@out(r4.x) out7 -@out(r8.x) out8 -@out(r8.y) out9 -@out(r8.z) out10 -@out(r8.w) out11 -@out(r9.x) out12 -@out(r9.y) out13 -@out(r9.z) out14 -@out(r9.w) out15 -@out(r6.x) out16 -@out(r6.y) out17 -@out(r6.z) out18 -@out(r6.w) out19 -@out(r7.x) out20 -@out(r7.y) out21 -@out(r7.z) out22 -@out(r7.w) out23 -@out(r11.x) out24 -@out(r11.y) out25 -@out(r11.z) out26 -@out(r11.w) out27 -@out(r10.x) out28 -@out(r10.y) out29 -@out(r10.z) out30 -@out(r10.w) out31 -(sy)(ss)add.f r2.w, c4.x, (neg)r0.x -mul.f r3.x, r0.w, r0.w -absneg.f r3.y, (neg)c7.y -mul.f r3.z, r0.x, c7.x -mul.f r3.w, r2.w, r2.w -add.f r4.x, c4.y, (neg)r0.y -add.f r3.x, c14.x, (neg)r3.x -mul.f r4.y, r0.x, (neg)r3.y -mad.f32 r3.y, r0.z, (neg)r3.y, r3.z -mad.f32 r3.z, r4.x, r4.x, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r4.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -add.f r4.y, c4.z, (neg)r0.z -mul.f r4.z, r3.x, r3.x -mul.f r4.w, r1.x, r0.w -mad.f32 r3.w, (neg)c7.x, r0.z, r3.w -mad.f32 r3.z, r4.y, r4.y, r3.z -mov.f32f32 r3.y, r3.y -mul.f r5.x, c9.w, r0.x -mul.f r5.y, c9.z, r0.x -mul.f r5.z, c9.y, r0.x -mul.f r5.w, c9.x, r0.x -mov.f32f32 r3.w, r3.w -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -add.f r4.w, c14.y, (neg)r4.w -mov.f32f32 r6.x, r3.y -mad.f32 r3.y, c10.w, r0.y, r5.x -mad.f32 r2.w, r2.w, r3.z, (neg)c5.x -mov.f32f32 r4.w, r4.w -mad.f32 r4.x, r4.x, r3.z, (neg)c5.y -mad.f32 r3.z, r4.y, r3.z, (neg)c5.z -mov.f32f32 r2.w, r2.w -mad.f32 r4.y, r4.w, r4.w, r4.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r3.z, r3.z -mul.f r4.z, r2.w, r2.w -mov.f32f32 r4.y, r4.y -mad.f32 r4.z, r4.x, r4.x, r4.z -mul.f r5.x, r1.y, r0.w -mov.f32f32 r3.w, r3.w -mad.f32 r3.y, c11.w, r0.z, r3.y -mov.f32f32 r4.z, r4.z -add.f r5.x, c14.y, (neg)r5.x -mad.f32 r4.z, r3.z, r3.z, r4.z -mov.f32f32 r3.w, r3.w -(rpt4)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r6.y, r3.w -add.f r3.y, r3.y, c12.w -mul.f r3.z, r3.z, r4.z -mul.f r3.w, r4.x, r4.z -mul.f r2.w, r2.w, r4.z -mad.f32 r4.x, r5.x, r5.x, r4.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r2.w, r2.w -nop -mov.f32f32 r7.w, r3.z -mov.f32f32 r7.z, r3.w -mov.f32f32 r7.y, r2.w -rsq r2.w, r4.x -(ss)mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r4.x, r3.y -mad.f32 r4.y, c10.z, r0.y, r5.y -mad.f32 r4.z, c10.y, r0.y, r5.z -mul.f r3.x, r3.x, r2.w -mul.f r5.x, r5.x, r2.w -mul.f r2.w, r4.w, r2.w -mad.f32 r4.y, c11.z, r0.z, r4.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.w, r5.x -mov.f32f32 r2.w, r2.w -add.f r4.y, r4.y, c12.z -mul.f r5.x, r1.x, r3.x -mul.f r5.y, r0.w, r4.w -mad.f32 r5.x, r0.w, r2.w, (neg)r5.x -mad.f32 r5.y, r1.y, r3.x, (neg)r5.y -mul.f r5.z, r1.y, r2.w -mov.f32f32 r8.x, r4.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.y, r5.y -mad.f32 r4.w, r1.x, r4.w, (neg)r5.z -mov.f32f32 r8.z, r8.x -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r4.w, r4.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r6.w, r5.y -mov.f32f32 r4.w, r4.w -nop -mov.f32f32 r7.x, r5.x -mov.f32f32 r8.y, r2.w -mov.f32f32 r6.z, r4.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r3.x, r4.y -mad.f32 r4.y, c11.y, r0.z, r4.z -mad.f32 r4.z, c10.x, r0.y, r5.w -mov.f32f32 r8.x, r2.w -mov.f32f32 r3.w, r3.x -add.f r2.w, r4.y, c12.y -mad.f32 r3.x, c11.x, r0.z, r4.z -mul.f r4.y, c0.w, r0.x -mul.f r4.z, c0.z, r0.x -mul.f r2.w, r2.w, c13.y -add.f r3.x, r3.x, c12.x -mad.f32 r4.y, c1.w, r0.y, r4.y -mad.f32 r4.z, c1.z, r0.y, r4.z -mov.f32f32 r3.z, r2.w -mul.f r2.w, r3.x, c13.x -mad.f32 r3.x, c2.w, r0.z, r4.y -mad.f32 r4.y, c2.z, r0.z, r4.z -mul.f r4.z, c0.y, r0.x -mov.f32f32 r3.y, r2.w -add.f r2.w, r3.x, c3.w -add.f r3.x, r4.y, c3.z -mad.f32 r4.y, c1.y, r0.y, r4.z -mul.f r4.z, c0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r5.y, r3.x -mad.f32 r2.w, c2.y, r0.z, r4.y -mad.f32 r0.y, c1.x, r0.y, r4.z -mad.f32 r3.x, c8.x, r0.z, c8.y -mad.f32 r0.x, c8.x, r0.x, c8.y -add.f r2.w, r2.w, c3.y -mad.f32 r0.y, c2.x, r0.z, r0.y -mov.f32f32 r0.z, r3.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r2.w -add.f r0.y, r0.y, c3.x -mov.f32f32 r9.w, r0.z -mov.f32f32 r9.z, r0.x -mov.f32f32 r0.x, (0.000000) -mov.f32f32 r4.w, r0.y -mov.f32f32 r0.y, (0.000000) -mov.f32f32 r0.z, r1.y -mov.f32f32 r10.w, r0.x -mov.f32f32 r0.x, r1.x -mov.f32f32 r10.z, r0.y -mov.f32f32 r10.y, r0.z -mov.f32f32 r0.y, r0.w -mov.f32f32 r10.x, r0.x -mul.f r0.x, r2.x, c6.z -mul.f r0.z, r1.w, c6.y -mov.f32f32 r11.w, r0.y -mul.f r0.y, r1.z, c6.x -mov.f32f32 r11.z, r0.x -mov.f32f32 r11.y, r0.z +@in(r8.x) in0 +@in(r8.y) in1 +@in(r8.z) in2 +@in(r6.w) in4 +@in(r7.x) in5 +@in(r7.y) in6 +@in(r8.w) in8 +@in(r9.x) in9 +@in(r9.y) in10 +@in(r3.x) in12 +@in(r3.y) in13 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@out(r7.x) out28 +@out(r7.y) out29 +@out(r7.z) out30 +@out(r7.w) out31 +@const(c14.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r8.x +mul.f r0.y, r6.w, r6.w +mul.f r0.z, c9.y, r8.x +mul.f r0.w, c9.x, r8.x +mul.f r1.x, r0.x, r0.x +add.f r1.z, c4.y, (neg)r8.y +add.f r0.y, c14.x, (neg)r0.y +mad.f32 r0.z, c10.y, r8.y, r0.z +mad.f32 r0.w, c10.x, r8.y, r0.w +mad.f32 r1.x, r1.z, r1.z, r1.x +add.f r1.w, c4.z, (neg)r8.z +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c11.y, r8.z, r0.z +mad.f32 r0.w, c11.x, r8.z, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r7.x, r6.w +add.f r0.z, r0.z, c12.y +add.f r0.w, r0.w, c12.x +mul.f r2.y, r7.y, r6.w +mul.f r2.z, c9.w, r8.x +rsq r1.x, r1.x +(ss)mov.f32f32 r2.w, r1.x +add.f r3.z, c14.y, (neg)r1.y +mad.f32 r0.x, r0.x, r1.x, (neg)c5.x +mul.f r1.y, r0.z, c13.y +mad.f32 r0.z, r1.z, r2.w, (neg)c5.y +mov.f32f32 r1.z, r3.z +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r2.w, (neg)c5.z +mov.f32f32 r2.w, r0.z +mad.f32 r2.x, r3.z, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c14.y, (neg)r2.y +mad.f32 r0.z, r0.z, r2.w, r1.x +mov.f32f32 r3.z, r1.w +mul.f r1.x, r0.w, c13.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c10.w, r8.y, r2.z +mad.f32 r0.z, r1.w, r3.z, r0.z +mad.f32 r1.w, c11.w, r8.z, r2.z +mul.f r2.z, c9.z, r8.x +mul.f r3.w, c0.w, r8.x +mul.f r4.x, c0.z, r8.x +mul.f r4.y, c0.y, r8.x +mul.f r6.x, c0.x, r8.x +rsq r0.z, r0.z +(ss)mov.f32f32 r4.z, r0.z +mul.f r5.y, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c12.w +mul.f r5.w, r3.z, r4.z +mul.f r5.z, r2.w, r4.z +(ss)mad.f32 r0.z, c10.z, r8.y, r2.z +mad.f32 r2.y, c1.w, r8.y, r3.w +mad.f32 r2.w, c1.z, r8.y, r4.x +rsq r0.x, r0.x +(ss)mov.f32f32 r3.z, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c11.z, r8.z, r0.z +mad.f32 r0.y, c2.w, r8.z, r2.y +mul.f r2.z, r0.w, r3.z +mul.f r2.y, r1.z, r3.z +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c12.z mov.f32f32 r0.x, r2.z -mov.f32f32 r11.x, r0.y -mov.f32f32 r0.y, r2.y -mov.f32f32 r0.z, c14.z -mov.f32f32 r9.y, r0.x +mov.f32f32 r3.z, r2.y +mul.f r3.w, r7.x, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r6.w, r0.x +mul.f r4.x, r7.y, r3.z +mad.f32 r4.w, r7.y, r0.z, (neg)r0.y +mad.f32 r4.z, r7.x, r0.x, (neg)r4.x +mad.f32 r5.x, r6.w, r3.z, (neg)r3.w +mad.f32 r0.x, c2.z, r8.z, r2.w +mad.f32 r0.y, c1.y, r8.y, r4.y +mad.f32 r2.w, c1.x, r8.y, r6.x +absneg.f r3.z, (neg)c7.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.y, r8.z, r0.y +mad.f32 r2.w, c2.x, r8.z, r2.w +mul.f r3.w, r8.x, (neg)r3.z nop -mov.f32f32 r9.x, r0.y -mov.f32f32 r8.w, r0.z +add.f r0.y, r0.x, c3.y +add.f r0.x, r2.w, c3.x +mad.f32 r4.y, (neg)c7.x, r8.z, r3.w +mul.f r2.w, r8.x, c7.x +mov.f32f32 r7.w, (0.000000) +mad.f32 r4.x, r8.z, (neg)r3.z, r2.w +mov.f32f32 r7.z, (0.000000) +mul.f r6.z, r9.y, c6.z +mul.f r6.y, r9.x, c6.y +mul.f r6.x, r8.w, c6.x +mad.f32 r3.w, c8.x, r8.z, c8.y +mad.f32 r3.z, c8.x, r8.x, c8.y +mov.f32f32 r2.w, c14.z end nop nop -; VERT: outputs: r4.w (0:0) r3.y (5:9) r8.x (5:10) r9.x (5:11) r6.x (5:12) r7.x (5:13) r11.x (5:14) r10.x (5:15) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=3,il=20,b=0) -; VERT: 170 instructions, 0 half, 12 full -; pos: r4.w +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) +; VERT: inputs: r8.x (0:0,cm=7,il=8,b=0) r6.w (0:0,cm=7,il=12,b=0) r8.w (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=3,il=20,b=0) +; VERT: 97 instructions, 0 half, 10 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm index 2f17090..78c7452 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm @@ -4,334 +4,227 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r3.w) out0 -@out(r4.x) out1 -@out(r4.y) out2 -@out(r4.z) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x40000000, 0xbf800000 +@const(c15.x) 0xba03126f, 0xbf000000, 0x3f800000, 0x3fb8aa65 +@const(c16.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 10, r1.x -bary.f r0.y, 0, r1.x +bary.f r1.z, 0, r1.x add.f r0.w, r0.w, c14.y -bary.f r1.z, 1, r1.x -mov.f32f32 r1.w, r0.x -add.f r2.x, r0.y, c15.y -bary.f r2.y, 8, r1.x +bary.f r1.w, 1, r1.x +mov.f32f32 r2.x, r0.x +bary.f r0.y, 11, r1.x add.f r2.z, r1.z, c15.y -mov.f32f32 r2.w, r1.w -bary.f r1.w, 11, r1.x -floor.f r3.x, r2.x +add.f r2.w, r1.w, c15.y +bary.f r3.x, 8, r1.x +mov.f32f32 r2.y, r0.y +floor.f r3.z, r2.z rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.y, r2.z -mov.f32f32 r3.z, r1.w -add.f r2.x, r2.x, (neg)r3.x +floor.f r3.w, r2.w +bary.f r3.y, 9, r1.x +add.f r2.z, r2.z, (neg)r3.z (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.z, (neg)r3.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -absneg.f r2.z, (neg)c11.x -mov.f32f32 r0.w, r0.w -mul.f r3.y, c14.x, r2.x -add.f r3.z, c14.z, (neg)r2.x -sam (f32)(xyz)r3.w, r2.w, s#2, t#2 -(sy)(ss)mad.f32 r2.w, c14.z, r3.w, c14.w -mul.f r2.z, r2.z, c11.x -mov.f32f32 r3.x, r3.y -mul.f r3.y, c14.x, r0.w -mov.f32f32 r2.w, r2.w +sam (f32)(xyz)r4.x, r2.x, s#2, t#2 +(sy)(ss)mad.f32 r0.w, c14.z, r4.x, c14.w +absneg.f r2.x, (neg)c11.x +mov.f32f32 r2.y, r2.z +add.f r2.w, r2.w, (neg)r3.w +mov.f32f32 r3.z, r0.w bary.f r3.w, 4, r1.x -add.f r0.y, r0.y, (neg)r3.x -mul.f r2.z, r2.z, r0.z -mov.f32f32 r3.x, r3.y -mul.f r3.y, r3.w, r2.w -mad.f32 r3.w, c14.z, r4.x, c14.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -add.f r1.z, r1.z, (neg)r3.x -mov.f32f32 r3.x, r3.w -bary.f r3.w, 12, r1.x -bary.f r4.x, 7, r1.x -add.f r4.z, c15.y, r0.y -mul.f r0.z, r2.z, r0.z -add.f r0.y, c15.z, r0.y -mul.f r2.z, r3.w, (neg)r4.x -mov.f32f32 r3.w, r4.z +mul.f r4.x, c14.x, r2.y +mul.f r2.x, r2.x, c11.x +mov.f32f32 r4.w, r2.w +mul.f r3.w, r3.w, r3.z +mad.f32 r4.y, c14.z, r4.y, c14.w +add.f r1.z, r1.z, (neg)r4.x +mul.f r2.x, r2.x, r0.z mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mad.f32 r2.z, r2.z, r3.x, r3.y -mul.f r3.y, r3.w, c5.z +mov.f32f32 r4.x, r4.y +bary.f r5.x, 12, r1.x +bary.f r5.y, 7, r1.x +mov.f32f32 r5.z, r1.z +mul.f r0.z, r2.x, r0.z +mul.f r2.x, c14.x, r4.w +mul.f r5.x, r5.x, (neg)r5.y +add.f r5.z, c15.y, r5.z mul.f r0.z, r0.z, c15.w -mul.f r0.y, r0.y, c5.z -mov.f32f32 r2.z, r2.z -mad.f32 r3.w, c14.z, r4.y, c14.w -mov.f32f32 r4.y, r3.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.z, r0.y -mov.f32f32 r3.w, r3.w -bary.f r4.w, 21, r1.x -mov.f32f32 r5.x, r4.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.y, r4.z -mad.f32 r2.z, r4.w, r3.w, r2.z -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -add.f r4.z, c15.y, r1.z -mov.f32f32 r5.w, r4.y -mov.f32f32 r2.z, r2.z -add.f r4.y, c17.y, (neg)r0.z -mov.f32f32 r4.z, r4.z +add.f r1.w, r1.w, (neg)r2.x +mad.f32 r2.x, r5.x, r4.x, r3.w +mad.f32 r3.w, c14.z, r4.z, c14.w +mul.f r5.z, r5.z, c5.z +mov.f32f32 r4.z, r1.w add.f r1.z, c15.z, r1.z -mul.f r4.w, r2.z, r2.z -bary.f r5.y, 5, r1.x -mul.f r4.z, r4.z, c5.w -mul.f r4.y, r4.y, c11.y -mul.f r0.z, r0.z, c15.z -mul.f r5.z, r5.y, r2.w -bary.f r5.y, 13, r1.x -mov.f32f32 r6.x, r4.z -add.f r0.z, r0.z, r4.y -mov.f32f32 r1.z, r1.z -mul.f r4.y, r5.y, (neg)r4.x -mov.f32f32 r5.y, r6.x -bary.f r6.x, 2, r1.x -mov.f32f32 r0.z, r0.z -mad.f32 r4.y, r4.y, r3.x, r5.z -mul.f r1.z, r1.z, c5.w -add.f r6.z, r6.x, c15.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r4.y, r4.y -bary.f r5.z, 22, r1.x -mov.f32f32 r6.x, r6.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r6.y, r1.z -mad.f32 r4.y, r5.z, r3.w, r4.y -mov.f32f32 r5.z, r6.x +mov.f32f32 r5.x, r3.w +bary.f r5.w, 21, r1.x +mov.f32f32 r6.y, r5.z +exp2 r0.z, r0.z +(ss)mov.f32f32 r6.x, r0.z +add.f r4.z, c15.y, r4.z +mad.f32 r2.x, r5.w, r5.x, r2.x +mul.f r7.x, r1.z, c5.z +add.f r1.z, c15.z, r1.w +add.f r1.w, c17.y, (neg)r6.x +mov.f32f32 r7.w, r2.x +mul.f r8.y, r4.z, c5.w +mov.f32f32 r8.x, r7.x +mul.f r7.y, r1.z, c5.w +mul.f r1.z, r2.x, r7.w +bary.f r2.x, 5, r1.x +mov.f32f32 r6.z, r8.y +mul.f r1.w, r1.w, c11.y +(ss)mul.f r0.z, r0.z, c15.z +mul.f r2.x, r2.x, r3.z +bary.f r3.z, 13, r1.x +bary.f r4.z, 2, r1.x +add.f r0.z, r0.z, r1.w +mov.f32f32 r5.w, r7.y +mul.f r1.w, r3.z, (neg)r5.y +add.f r7.z, r4.z, c15.x max.f r0.z, r0.z, c14.y -mov.f32f32 r6.x, r6.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r6.w, r0.y -mov.f32f32 r0.y, r3.y +add.f r2.y, c14.z, (neg)r2.y +mad.f32 r1.w, r1.w, r4.x, r2.x +bary.f r2.x, 22, r1.x +mov.f32f32 r6.w, r7.z min.f r0.z, r0.z, c15.z -mad.f32 r3.y, r4.y, r4.y, r4.w -sam.s (f32)(x)r4.w, r5.x, s#4, t#4 -(sy)mov.f32f32 r4.w, r4.w -mov.f32f32 r6.x, r6.x -(ss)add.f r5.x, c17.y, (neg)r0.z -mov.f32f32 r3.y, r3.y -bary.f r5.y, 6, r1.x -mov.f32f32 r4.w, r4.w -mov.f32f32 r3.z, r3.z -add.f r5.z, c14.z, (neg)r0.w -mul.f r2.w, r5.y, r2.w -bary.f r5.y, 14, r1.x -mul.f r5.x, r5.x, c10.z -add.f r6.y, c17.y, (neg)r0.z -add.f r7.x, c17.y, (neg)r0.z -mul.f r4.x, r5.y, (neg)r4.x -mov.f32f32 r5.y, r5.z -mul.f r5.z, r6.y, c10.y -mul.f r7.z, r7.x, c10.x -mad.f32 r2.w, r4.x, r3.x, r2.w -mul.f r3.x, r3.z, r5.y -mov.f32f32 r4.x, r6.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.w, r2.w -bary.f r6.y, 23, r1.x -mul.f r3.x, r3.x, r4.w -mov.f32f32 r7.x, r4.z -mov.f32f32 r4.z, r6.z -mad.f32 r2.w, r6.y, r3.w, r2.w -mov.f32f32 r6.y, r4.x -mov.f32f32 r7.w, r0.y -mov.f32f32 r7.y, r4.z -mov.f32f32 r0.y, r2.w -mov.f32f32 r1.z, r1.z -add.f r2.x, r2.x, c15.z -add.f r0.w, r0.w, c15.z -mad.f32 r2.w, r0.y, r0.y, r3.y -sam.s (f32)(x)r3.y, r5.w, s#4, t#4 -mov.f32f32 r8.x, r1.z -sam.s (f32)(x)r1.z, r6.w, s#4, t#4 -mov.f32f32 r3.w, r6.z -mul.f r4.x, r2.x, r5.y -mul.f r3.z, r3.z, r0.w -(sy)mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y -rsq r2.w, r2.w -(ss)mov.f32f32 r2.w, r2.w -mov.f32f32 r8.y, r3.w -mov.f32f32 r2.y, r2.y -mul.f r0.w, r2.x, r0.w -mul.f r2.x, r2.z, r2.w -mad.f32 r1.z, r4.x, r1.z, r3.x -mul.f r2.z, r4.y, r2.w -mul.f r0.y, r0.y, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -sam.s (f32)(x)r2.w, r7.w, s#4, t#4 -(sy)mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mul.f r3.x, r2.x, r2.x -mul.f r3.w, (neg)c9.x, r2.x -mad.f32 r1.z, r3.z, r2.w, r1.z -mad.f32 r2.w, r2.z, r2.z, r3.x -mad.f32 r3.x, (neg)c9.y, r2.z, r3.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r2.y -mov.f32f32 r2.y, r2.w -mov.f32f32 r2.w, r3.x -mad.f32 r2.y, r0.y, r0.y, r2.y -mad.f32 r2.w, (neg)c9.z, r0.y, r2.w -mov.f32f32 r1.z, r1.z -bary.f r3.x, 9, r1.x -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r1.w +mov.f32f32 r8.z, r7.z +mad.f32 r1.w, r2.x, r5.x, r1.w +mov.f32f32 r6.x, r7.z +add.f r2.x, c17.y, (neg)r0.z +add.f r3.z, c17.y, (neg)r0.z +mov.f32f32 r4.x, r1.w +sam.s (f32)(x)r8.w, r6.y, s#4, t#4 +add.f r4.z, c17.y, (neg)r0.z +mov.f32f32 r5.x, r2.y +add.f r4.w, c14.z, (neg)r4.w +mad.f32 r1.z, r1.w, r4.x, r1.z +bary.f r1.w, 6, r1.x +(ss)mul.f r6.y, r2.x, c10.z +mul.f r3.z, r3.z, c10.y +mul.f r4.z, r4.z, c10.x +mul.f r0.w, r1.w, r0.w +bary.f r1.w, 14, r1.x +mov.f32f32 r2.x, r4.w +sam.s (f32)(x)r9.x, r8.x, s#4, t#4 +sam.s (f32)(x)r9.y, r5.z, s#4, t#4 +sam.s (f32)(x)r6.z, r7.x, s#4, t#4 +add.f r2.z, r2.z, c15.z +add.f r2.w, r2.w, c15.z +mul.f r1.w, r1.w, (neg)r5.y +mul.f r2.x, r5.x, r2.x +mul.f r4.w, r2.z, r4.w +mul.f r2.y, r2.y, r2.w +mad.f32 r0.w, r1.w, r4.y, r0.w +bary.f r1.w, 23, r1.x +(sy)mul.f r2.x, r2.x, r8.w +mul.f r2.z, r2.z, r2.w +mad.f32 r2.x, r4.w, r9.x, r2.x +mad.f32 r0.w, r1.w, r3.w, r0.w +mad.f32 r1.w, r2.y, r9.y, r2.x +(ss)nop +sam (f32)(w)r4.w, r3.x, s#1, t#1 +(sy)cmps.f.lt r2.x, r5.z, c16.y +mov.f32f32 r2.w, r0.x +mov.f32f32 r2.y, r0.w +mad.f32 r1.w, r2.z, r6.z, r1.w +cov.u32f32 r2.x, r2.x +(ss)mov.f32f32 r3.x, r0.y +mad.f32 r1.z, r2.y, r2.y, r1.z +mul.f r1.w, c16.x, r1.w +cmps.f.ne r2.x, r2.x, c14.y +(rpt3)nop +rsq r1.z, r1.z +(ss)mov.f32f32 r2.y, r1.z +mov.f32f32 r3.y, r1.w +mul.f r0.w, r0.w, r1.z +(ss)mov.f32f32 r1.z, c14.y +mul.f r2.z, r7.w, r2.y +mul.f r2.y, r4.x, r2.y +mov.f32f32 r3.w, r0.w +sel.b32 r1.z, r1.z, r2.x, r5.z +mov.f32f32 r2.x, r2.z +mul.f r2.z, (neg)c9.x, r2.z +mov.f32f32 r4.x, r2.y +mad.f32 r2.y, (neg)c9.y, r2.y, r2.z +mul.f r2.z, r2.x, r2.x +mad.f32 r0.w, (neg)c9.z, r0.w, r2.y +mad.f32 r2.y, r4.x, r4.x, r2.z +sam (f32)(xyz)r4.w, r2.w, s#3, t#3 +(sy)(ss)mul.f r3.x, c8.z, r5.y +mad.f32 r2.y, r3.w, r3.w, r2.y +max.f r0.w, r0.w, c14.y +bary.f r2.z, 18, r1.x +(rpt1)nop +mov.f32f32 r2.w, r0.w +bary.f r4.y, 19, r1.x rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.w, r0.w, r3.y, r1.z -mov.f32f32 r1.z, r3.x -mul.f r2.x, r2.x, r2.y -max.f r2.w, r2.w, c14.y -mov.f32f32 r0.w, r0.w -mul.f r2.z, r2.z, r2.y -mov.f32f32 r2.x, r2.x -bary.f r3.x, 15, r1.x -mov.f32f32 r2.w, r2.w -bary.f r3.y, 19, r1.x -bary.f r3.w, 18, r1.x -mov.f32f32 r3.x, r3.x -bary.f r4.z, 20, r1.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -mul.f r2.x, r2.x, r3.x -mov.f32f32 r2.z, r2.z -bary.f r3.x, 16, r1.x -mov.f32f32 r4.z, r4.z -mad.f32 r4.w, c8.y, r2.w, (neg)r3.y -mad.f32 r5.y, c8.x, r2.w, (neg)r3.w -mov.f32f32 r3.x, r3.x -mad.f32 r2.w, c8.z, r2.w, (neg)r4.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.y, r5.y -mad.f32 r2.x, r2.z, r3.x, r2.x -mov.f32f32 r2.z, r2.w -mad.f32 r2.w, c12.x, r4.w, r3.y -mad.f32 r3.x, c12.x, r5.y, r3.w -mov.f32f32 r2.x, r2.x -mul.f r0.y, r0.y, r2.y -mad.f32 r2.y, c12.x, r2.z, r4.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r0.y, r0.y +(ss)mov.f32f32 r5.y, r2.y +bary.f r5.z, 20, r1.x +mad.f32 r5.w, c8.y, r2.w, (neg)r4.y +mad.f32 r2.w, c8.z, r2.w, (neg)r5.z +mul.f r2.x, r2.x, r5.y +bary.f r6.x, 15, r1.x +mov.f32f32 r5.z, r5.z +mov.f32f32 r4.y, r4.y +mul.f r4.x, r4.x, r5.y +mul.f r2.x, r2.x, r6.x +bary.f r5.y, 16, r1.x +mad.f32 r5.z, c12.x, r2.w, r5.z +mad.f32 r4.y, c12.x, r5.w, r4.y +(ss)mul.f r2.y, r3.w, r2.y +mad.f32 r2.x, r4.x, r5.y, r2.x bary.f (ei)r1.x, 17, r1.x -mov.f32f32 r1.y, r2.y -mul.f r0.w, c16.x, r0.w -nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.z -mov.f32f32 r3.x, r4.x -mov.f32f32 r4.z, r0.x -mad.f32 r0.x, r0.y, r1.x, r2.x -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.w, r1.w -mov.f32f32 r4.w, r4.y -mov.f32f32 r0.x, r0.x -sam (f32)(w)r1.z, r3.z, s#1, t#1 -(sy)mov.f32f32 r1.x, r2.y -cmps.f.lt r1.z, r2.y, c16.y -mov.f32f32 r3.y, r0.w -max.f r0.x, c14.y, r0.x -mov.f32f32 r0.w, r1.x -cov.u32f32 r1.x, r1.z -sam (f32)(xyz)r1.z, r4.z, s#3, t#3 -(sy)mul.f r2.x, c8.z, r2.x -mov.f32f32 r0.x, r0.x -mul.f r1.w, c8.y, r1.w -mul.f r1.z, c8.x, r1.z -(ss)nop -sam (f32)(xyzw)r3.x, r3.x, s#0, t#0 -cmps.f.ne r1.x, r1.x, c14.y -(rpt2)nop -log2 r0.x, r0.x +mad.f32 r0.w, c8.x, r0.w, (neg)r2.z +(rpt1)nop +mad.f32 r1.x, r2.y, r1.x, r2.x +mov.f32f32 r1.y, r2.z +mul.f r3.w, c8.y, r5.x +mul.f r4.x, c8.x, r4.w +max.f r1.x, c14.y, r1.x +mad.f32 r0.w, c12.x, r0.w, r1.y +sam (f32)(xyzw)r2.x, r0.x, s#0, t#0 +(rpt4)nop +(ss)log2 r0.x, r1.x (ss)mul.f r0.x, c12.y, r0.x -mov.f32f32 r2.y, c14.y -mov.f32f32 r2.x, r2.x -(sy)mov.f32f32 r3.w, r3.w -mov.f32f32 r0.x, r0.x -sel.b32 r0.w, r2.y, r1.x, r0.w -mov.f32f32 r1.x, r1.w -mov.f32f32 r1.z, r1.z -nop -mov.f32f32 r4.z, r3.w -nop +(rpt5)nop exp2 r0.x, r0.x -(ss)mul.f r1.w, r2.x, r0.x -mul.f r1.x, r1.x, r0.x -mad.f32 r1.y, r3.z, r1.y, r1.w -mad.f32 r1.x, r3.y, r2.z, r1.x -(ss)mul.f r0.x, r1.z, r0.x -nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.x, r3.x, r2.w, r0.x -nop -mul.f r1.y, r1.y, r0.y -mul.f r1.x, r1.x, r0.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mad.f32 r1.y, c7.z, r3.z, r1.y -mad.f32 r1.x, c7.y, r3.y, r1.x -mul.f r0.x, r0.x, r0.y -nop -mov.f32f32 r0.y, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.y, r3.x, r0.x +(ss)mul.f r1.x, r3.w, r0.x +(sy)mad.f32 r0.y, r2.z, r5.z, r0.y +mad.f32 r1.x, r2.y, r4.y, r1.x +mul.f r0.x, r4.x, r0.x +nop +mul.f r0.y, r0.y, r3.y +mul.f r1.x, r1.x, r3.y +mad.f32 r0.y, c7.z, r2.z, r0.y +mad.f32 r1.x, c7.y, r2.y, r1.x +mad.f32 r0.x, r2.x, r0.w, r0.x nop mul.f r0.y, r0.z, r0.y -mul.f r1.x, r0.z, r1.x -mad.f32 r0.x, c7.x, r3.x, r0.x +mul.f r0.w, r0.z, r1.x +mul.f r0.x, r0.x, r1.w nop -add.f r0.y, r0.y, r5.x -add.f r1.x, r1.x, r5.z -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, r6.y +add.f r0.w, r0.w, r3.z +mad.f32 r0.x, c7.x, r2.x, r0.x nop -mul.f r0.y, r0.y, r0.w -mul.f r1.x, r1.x, r0.w +mul.f r0.y, r0.y, r1.z +mul.f r0.w, r0.w, r1.z mul.f r0.x, r0.z, r0.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r1.x -add.f r0.x, r0.x, r7.z -nop -mul.f r0.y, r0.y, c6.z -mul.f r0.z, r0.z, c6.y -mul.f r0.x, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c6.x -nop -mov.f32f32 r4.y, r0.y -mov.f32f32 r4.x, r0.z -mov.f32f32 r0.x, r0.x +mul.f r2.z, r0.y, c6.z +mul.f r2.y, r0.w, c6.y +add.f r0.x, r0.x, r4.z (rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r1.z (rpt2)nop -mov.f32f32 r3.w, r0.x +mul.f r2.x, r0.x, c6.x end -nop -; FRAG: outputs: r3.w (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r3.y (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r2.z (5:13,cm=f,il=24,b=1) r5.z (5:14,cm=f,il=28,b=1) -; FRAG: 325 instructions, 0 half, 9 full -; pos (bary): r1.x -; color: r3.w -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r63.z (5:11,cm=f,il=16,b=1) r5.y (5:12,cm=f,il=20,b=1) r3.z (5:13,cm=f,il=24,b=1) r1.x (5:14,cm=f,il=28,b=1) +; FRAG: 230 instructions, 0 half, 10 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm index a5989e6..b4c4f88 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm @@ -1,198 +1,139 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in12 -@in(r2.z) in13 -@out(r9.w) out0 -@out(r10.x) out1 -@out(r10.y) out2 -@out(r10.z) out3 -@out(r5.w) out4 -@out(r6.x) out5 -@out(r6.y) out6 -@out(r6.z) out7 -@out(r4.x) out8 -@out(r4.y) out9 -@out(r4.z) out10 -@out(r4.w) out11 -@out(r1.z) out12 -@out(r1.w) out13 -@out(r2.x) out14 -@out(r2.y) out15 -@out(r7.w) out16 -@out(r8.x) out17 -@out(r8.y) out18 -@out(r8.z) out19 -@out(r6.w) out20 -@out(r7.x) out21 -@out(r7.y) out22 -@out(r7.z) out23 -@out(r8.w) out24 -@out(r9.x) out25 -@out(r9.y) out26 -@out(r9.z) out27 -(sy)(ss)add.f r2.w, c4.x, (neg)r0.x -mul.f r3.x, r0.w, r0.w -mul.f r3.y, c8.w, r0.x -mul.f r3.z, c8.z, r0.x -mul.f r3.w, r2.w, r2.w -add.f r4.x, c4.y, (neg)r0.y -add.f r3.x, c13.x, (neg)r3.x -mad.f32 r3.y, c9.w, r0.y, r3.y -mad.f32 r3.z, c9.z, r0.y, r3.z -mad.f32 r3.w, r4.x, r4.x, r3.w -mov.f32f32 r3.x, r3.x -mad.f32 r3.y, c10.w, r0.z, r3.y -mad.f32 r3.z, c10.z, r0.z, r3.z -mov.f32f32 r3.w, r3.w -add.f r4.y, c4.z, (neg)r0.z -mul.f r4.z, r3.x, r3.x -mul.f r4.w, r1.x, r0.w -add.f r3.y, r3.y, c11.w -mad.f32 r3.w, r4.y, r4.y, r3.w -add.f r3.z, r3.z, c11.z -mul.f r5.x, c8.y, r0.x -mul.f r5.y, c8.x, r0.x -add.f r4.w, c13.y, (neg)r4.w -mul.f r2.x, r2.x, c6.z -mov.f32f32 r3.y, r3.y -rsq r3.w, r3.w -(ss)mov.f32f32 r3.w, r3.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r3.z, r3.z -mad.f32 r5.x, c9.y, r0.y, r5.x -mad.f32 r2.w, r2.w, r3.w, (neg)c5.x -mad.f32 r4.z, r4.w, r4.w, r4.z -mad.f32 r4.x, r4.x, r3.w, (neg)c5.y -mad.f32 r3.w, r4.y, r3.w, (neg)c5.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.y, r4.z -mul.f r4.z, r1.y, r0.w -mov.f32f32 r4.x, r4.x -mul.f r5.z, r2.w, r2.w -mov.f32f32 r3.w, r3.w -add.f r4.z, c13.y, (neg)r4.z -mad.f32 r5.z, r4.x, r4.x, r5.z -mov.f32f32 r6.z, r3.y -mov.f32f32 r6.y, r3.z -mad.f32 r3.y, c10.y, r0.z, r5.x -mov.f32f32 r3.z, r5.z -mov.f32f32 r4.z, r4.z -mad.f32 r3.z, r3.w, r3.w, r3.z -add.f r3.y, r3.y, c11.y -mad.f32 r5.x, c9.x, r0.y, r5.y -mov.f32f32 r2.x, r2.x -mad.f32 r5.x, c10.x, r0.z, r5.x -mul.f r5.y, c0.w, r0.x -mul.f r5.z, c0.z, r0.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -mad.f32 r4.y, r4.z, r4.z, r4.y -mul.f r3.y, r3.y, c12.y -add.f r5.x, r5.x, c11.x -mul.f r3.w, r3.w, r3.z -mul.f r4.x, r4.x, r3.z -mul.f r2.w, r2.w, r3.z -mov.f32f32 r6.x, r3.y -mov.f32f32 r3.y, r3.w -mov.f32f32 r3.z, r4.x -mov.f32f32 r2.w, r2.w -rsq r3.w, r4.y -(ss)mov.f32f32 r3.w, r3.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w +@in(r7.x) in0 +@in(r7.y) in1 +@in(r7.z) in2 +@in(r6.y) in4 +@in(r6.z) in5 +@in(r6.w) in6 +@in(r2.w) in8 +@in(r3.x) in9 +@in(r3.y) in10 +@in(r3.z) in12 +@in(r3.w) in13 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r7.x +mul.f r0.y, r6.y, r6.y +mul.f r0.z, c8.y, r7.x +mul.f r0.w, c8.x, r7.x +mul.f r1.x, r0.x, r0.x +add.f r1.z, c4.y, (neg)r7.y +add.f r0.y, c13.x, (neg)r0.y +mad.f32 r0.z, c9.y, r7.y, r0.z +mad.f32 r0.w, c9.x, r7.y, r0.w +mad.f32 r1.x, r1.z, r1.z, r1.x +add.f r1.w, c4.z, (neg)r7.z +mov.f32f32 r1.y, r0.y +mad.f32 r0.z, c10.y, r7.z, r0.z +mad.f32 r0.w, c10.x, r7.z, r0.w +mad.f32 r1.x, r1.w, r1.w, r1.x +mul.f r2.x, r1.y, r1.y +mul.f r1.y, r6.z, r6.y +add.f r0.z, r0.z, c11.y +add.f r0.w, r0.w, c11.x +mul.f r2.y, r6.w, r6.y +mul.f r2.z, c8.w, r7.x +rsq r1.x, r1.x +(ss)mov.f32f32 r4.x, r1.x +add.f r4.y, c13.y, (neg)r1.y +mad.f32 r0.x, r0.x, r1.x, (neg)c5.x +mul.f r1.y, r0.z, c12.y +mad.f32 r0.z, r1.z, r4.x, (neg)c5.y +mov.f32f32 r1.z, r4.y +(ss)mov.f32f32 r1.x, r0.x +mad.f32 r1.w, r1.w, r4.x, (neg)c5.z +mov.f32f32 r4.x, r0.z +mad.f32 r2.x, r4.y, r1.z, r2.x +mul.f r1.x, r1.x, r1.x +add.f r2.y, c13.y, (neg)r2.y +mad.f32 r0.z, r0.z, r4.x, r1.x +mov.f32f32 r4.y, r1.w +mul.f r1.x, r0.w, c12.x +mov.f32f32 r0.w, r2.y +mad.f32 r2.z, c9.w, r7.y, r2.z +mad.f32 r0.z, r1.w, r4.y, r0.z +mad.f32 r1.w, c10.w, r7.z, r2.z +mul.f r2.z, c8.z, r7.x +mul.f r4.z, c0.w, r7.x +mul.f r5.z, c0.z, r7.x +mul.f r5.w, c0.y, r7.x +mul.f r6.x, c0.x, r7.x +rsq r0.z, r0.z +(ss)mov.f32f32 r5.x, r0.z +mul.f r4.w, r0.x, r0.z +mad.f32 r0.x, r2.y, r0.w, r2.x +add.f r1.w, r1.w, c11.w +mul.f r5.y, r4.y, r5.x +mul.f r5.x, r4.x, r5.x +(ss)mad.f32 r0.z, c9.z, r7.y, r2.z +mad.f32 r2.y, c1.w, r7.y, r4.z +mad.f32 r5.z, c1.z, r7.y, r5.z +rsq r0.x, r0.x +(ss)mov.f32f32 r4.x, r0.x +mul.f r2.x, r0.y, r0.x +(ss)mad.f32 r0.x, c10.z, r7.z, r0.z +mad.f32 r0.y, c2.w, r7.z, r2.y +mul.f r2.z, r0.w, r4.x +mul.f r2.y, r1.z, r4.x +mov.f32f32 r0.z, r2.x +add.f r1.z, r0.x, c11.z +mov.f32f32 r0.x, r2.z +mov.f32f32 r4.z, r2.y +mul.f r7.w, r6.z, r0.z +add.f r0.w, r0.y, c3.w +mul.f r0.y, r6.y, r0.x +mul.f r4.x, r6.w, r4.z +mad.f32 r4.y, r6.w, r0.z, (neg)r0.y +mad.f32 r4.x, r6.z, r0.x, (neg)r4.x +mad.f32 r4.z, r6.y, r4.z, (neg)r7.w +mad.f32 r0.x, c2.z, r7.z, r5.z +mad.f32 r0.y, c1.y, r7.y, r5.w +mad.f32 r5.z, c1.x, r7.y, r6.x +mad.f32 r0.y, c2.y, r7.z, r0.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r7.z, r5.z nop -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -(rpt1)nop -mov.f32f32 r7.x, r3.y -mov.f32f32 r6.w, r3.z -mov.f32f32 r8.z, r2.w -mul.f r2.w, r3.x, r3.w -mul.f r3.x, r4.z, r3.w -mul.f r3.y, r4.w, r3.w -mul.f r3.z, r5.x, c12.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r5.w, r3.z -mul.f r3.z, r1.x, r2.w -mul.f r3.w, r0.w, r3.x -mad.f32 r3.z, r0.w, r3.y, (neg)r3.z -mad.f32 r3.w, r1.y, r2.w, (neg)r3.w -mul.f r4.x, r1.y, r3.y -(ss)mov.f32f32 r4.y, r3.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mad.f32 r3.x, r1.x, r3.x, (neg)r4.x -mov.f32f32 r4.z, r4.y -mov.f32f32 r8.y, r3.z -mov.f32f32 r8.x, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.w, r2.w -nop -mov.f32f32 r7.w, r3.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r4.x, r2.w -mov.f32f32 r2.x, r2.x -mad.f32 r2.w, c1.w, r0.y, r5.y -mad.f32 r3.x, c1.z, r0.y, r5.z -mul.f r3.y, c0.y, r0.x -mov.f32f32 r8.w, r2.x -mad.f32 r2.x, c2.w, r0.z, r2.w -mad.f32 r2.w, c2.z, r0.z, r3.x -mad.f32 r3.x, c1.y, r0.y, r3.y -mul.f r3.y, c0.x, r0.x -add.f r2.x, r2.x, c3.w -add.f r2.w, r2.w, c3.z -mad.f32 r3.x, c2.y, r0.z, r3.x -mad.f32 r0.y, c1.x, r0.y, r3.y -mov.f32f32 r10.z, r2.x -mov.f32f32 r10.y, r2.w -add.f r2.x, r3.x, c3.y -mad.f32 r0.y, c2.x, r0.z, r0.y -mul.f r1.w, r1.w, c6.y -mul.f r1.z, r1.z, c6.x -mov.f32f32 r10.x, r2.x -add.f r0.y, r0.y, c3.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r9.w, r0.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r7.y, r1.z -mov.f32f32 r9.z, r1.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r2.z -mov.f32f32 r1.y, r2.y -mov.f32f32 r9.y, r0.y -mov.f32f32 r9.x, r0.w -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.x, r1.y -mad.f32 r0.y, c7.x, r0.z, c7.y -mad.f32 r0.x, c7.x, r0.x, c7.y -mov.f32f32 r0.z, c13.z -nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.x -mov.f32f32 r4.w, r0.z +add.f r0.y, r0.y, c3.y +mul.f r6.x, r3.y, c6.z +add.f r0.x, r0.x, c3.x +mul.f r5.w, r3.x, c6.y +mul.f r5.z, r2.w, c6.x +mad.f32 r3.y, c7.x, r7.z, c7.y +mad.f32 r3.x, c7.x, r7.x, c7.y +mov.f32f32 r2.w, c13.z end nop nop -nop -; VERT: outputs: r9.w (0:0) r5.w (5:9) r4.x (5:10) r1.z (5:11) r7.w (5:12) r6.w (5:13) r8.w (5:14) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=3,il=20,b=0) -; VERT: 150 instructions, 0 half, 11 full -; pos: r9.w +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) +; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0) +; VERT: 90 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm index f9f5af9..7a7273d 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm @@ -4,214 +4,135 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r2.y) out0 -@out(r2.z) out1 -@out(r2.w) out2 -@out(r3.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0xba03126f, 0xbf000000 +@const(c10.x) 0x3f800000, 0x40000000, 0xbf000000, 0x3fb8aa65 +@const(c11.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 0, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 1, r1.x bary.f r1.z, 4, r1.x -add.f r1.w, r0.x, c9.w -bary.f r2.x, 6, r1.x +add.f r2.x, r0.x, c9.w +bary.f r1.w, 5, r1.x add.f r2.y, r0.w, c9.w -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 2, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.x, r2.y +add.f r3.w, r2.z, c9.z +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 5, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c10.x +mul.f r0.z, r0.z, c7.x +sam (f32)(w)r4.x, r1.z, s#1, t#1 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c9.x, r2.z +add.f r2.z, c10.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -add.f r2.w, c10.y, (neg)r1.z -mul.f r2.y, r2.y, c7.x -add.f r3.y, c10.y, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c9.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r2.w, r2.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r2.z -mul.f r0.y, r2.y, r0.y -mul.f r2.y, r2.w, r3.y -add.f r2.z, c10.z, r0.x +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c9.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c10.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c10.w -add.f r3.z, c10.x, r0.w -mul.f r2.z, r2.z, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r2.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -add.f r0.w, c10.z, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r4.x -mov.f32f32 r5.x, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c12.y, (neg)r0.y -mov.f32f32 r5.w, r3.w -mul.f r3.z, r3.z, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c7.y -mul.f r0.y, r0.y, c10.x -mov.f32f32 r3.w, r3.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.z, r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r4.z, r4.x -bary.f r3.z, 2, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r3.w -mov.f32f32 r5.y, r0.x -add.f r0.x, r3.z, c9.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r0.w -mov.f32f32 r6.w, r2.z -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r0.x -mov.f32f32 r3.z, r0.x -mov.f32f32 r4.w, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r6.y, r2.z -mov.f32f32 r5.z, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r2.y -add.f r1.z, r1.z, c10.x -sam.s (f32)(x)r2.y, r4.y, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -min.f r0.y, r0.y, c10.x -sam.s (f32)(x)r2.z, r5.w, s#2, t#2 -(sy)mov.f32f32 r2.z, r2.z -sam.s (f32)(x)r3.z, r5.x, s#2, t#2 -(sy)mov.f32f32 r3.z, r3.z -mov.f32f32 r2.y, r2.y -add.f r3.w, c12.y, (neg)r0.y -add.f r4.x, c12.y, (neg)r0.y -(ss)add.f r4.y, c12.y, (neg)r0.y -mul.f r0.w, r0.w, r2.y -mul.f r2.y, r1.z, r3.y -mul.f r3.w, r3.w, c6.z -mul.f r4.x, r4.x, c6.y -mul.f r4.y, r4.y, c6.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.x, r0.x -add.f r0.x, r0.z, c10.x -mov.f32f32 r0.z, r1.w -mad.f32 r0.w, r2.y, r3.z, r0.w -mov.f32f32 r1.w, r2.x -bary.f r2.x, 7, r1.x -mul.f r2.y, r2.w, r0.x -mov.f32f32 r0.w, r0.w -sam.s (f32)(x)r2.w, r6.z, s#2, t#2 -(sy)mov.f32f32 r2.w, r2.w -mul.f r0.x, r1.z, r0.x -mov.f32f32 r1.z, r2.y -mov.f32f32 r3.y, r0.z -mov.f32f32 r4.z, r1.w -mov.f32f32 r0.z, r2.x -mad.f32 r0.w, r1.z, r2.w, r0.w -mov.f32f32 r0.x, r0.x -bary.f r1.z, 10, r1.x -mov.f32f32 r4.w, r0.z -mov.f32f32 r0.z, r0.w -sam (f32)(w)r2.w, r3.x, s#1, t#1 -(sy)cmps.f.lt r0.w, r3.z, c11.y -mad.f32 r0.x, r0.x, r2.z, r0.z -mov.f32f32 r0.z, r3.z -bary.f r1.w, 9, r1.x -cov.u32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -sam (f32)(xyzw)r2.x, r4.z, s#0, t#0 -mov.f32f32 r0.z, r0.z -(sy)mov.f32f32 r2.w, r2.w -nop -mul.f r0.x, c11.x, r0.x -cmps.f.ne r0.w, r0.w, c9.y +add.f r0.z, c10.z, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c3.z +add.f r0.x, c10.x, r0.w +mul.f r4.x, r0.z, c3.z +add.f r0.z, c10.z, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c3.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c12.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#2, t#2 +add.f r0.z, c10.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#2, t#2 +mul.f r0.x, r0.x, c10.x +add.f r0.w, r2.y, c10.x +mul.f r0.y, r0.y, c7.y +(ss)nop +sam.s (f32)(x)r5.x, r5.w, s#2, t#2 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#2, t#2 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c11.y +bary.f r2.x, 6, r1.x +mul.f r0.y, r0.y, r5.x +max.f r0.x, r0.x, c9.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y +min.f r0.x, r0.x, c10.x +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt1)nop +mul.f r0.y, c11.x, r0.y +bary.f r2.y, 7, r1.x +add.f r0.w, c12.y, (neg)r0.x +add.f r1.z, c12.y, (neg)r0.x +mov.f32f32 r1.w, r0.y +add.f r3.x, c12.y, (neg)r0.x (rpt1)nop -mov.f32f32 r0.x, r0.x -mul.f r1.z, r2.z, r1.z -mul.f r1.w, r2.y, r1.w +sam (f32)(xyzw)r2.x, r2.x, s#0, t#0 +bary.f r3.y, 10, r1.x +bary.f r3.z, 9, r1.x bary.f (ei)r1.x, 8, r1.x -mov.f32f32 r1.y, c9.y -mul.f r1.z, r1.z, r0.x -mul.f r1.w, r1.w, r0.x +mul.f r0.w, r0.w, c6.z +(sy)mul.f r1.y, r2.z, r3.y +mul.f r3.y, r2.y, r3.z mul.f r1.x, r2.x, r1.x -sel.b32 r0.z, r1.y, r0.w, r0.z -mov.f32f32 r0.w, r1.z -mov.f32f32 r1.y, r1.w -mad.f32 r0.w, c5.z, r2.z, r0.w -mad.f32 r1.y, c5.y, r2.y, r1.y -mul.f r0.x, r1.x, r0.x -(ss)mov.f32f32 r3.x, r2.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.x, r0.x -nop -mul.f r0.w, r0.y, r0.w -mul.f r1.x, r0.y, r1.x -mad.f32 r0.x, c5.x, r2.x, r0.x -nop -add.f r0.w, r0.w, r3.w -add.f r1.x, r1.x, r4.x -mov.f32f32 r0.x, r0.x -nop -mul.f r0.w, r0.w, r0.z -mul.f r1.x, r1.x, r0.z -mul.f r0.x, r0.y, r0.x -nop -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.w, r1.x -add.f r0.x, r0.x, r4.y -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y -mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r2.w, r0.y -mov.f32f32 r2.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r1.z, r1.z, c6.y +mul.f r1.y, r1.y, r1.w +mul.f r1.w, r3.y, r1.w +mad.f32 r1.y, c5.z, r2.z, r1.y +mad.f32 r1.w, c5.y, r2.y, r1.w +mul.f r0.y, r1.x, r0.y +mul.f r1.x, r3.x, c6.x +mul.f r1.y, r0.x, r1.y +mul.f r1.w, r0.x, r1.w +mad.f32 r0.y, c5.x, r2.x, r0.y +(ss)mov.f32f32 r2.x, c9.y +add.f r0.w, r1.y, r0.w +add.f r1.y, r1.w, r1.z +mul.f r0.x, r0.x, r0.y +sel.b32 r0.y, r2.x, r0.z, r4.w +(rpt1)nop +add.f r0.x, r0.x, r1.x +mul.f r0.z, r0.w, r0.y +mul.f r0.w, r1.y, r0.y +(rpt1)nop +mul.f r2.z, r0.z, c4.z +mul.f r2.y, r0.w, c4.y +mul.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r2.y, r0.x +mul.f r2.x, r0.x, c4.x end nop -nop -nop -; FRAG: outputs: r2.y (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) -; FRAG: 202 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r2.y -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) +; FRAG: 125 instructions, 0 half, 7 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm index 6e58225..f1c05da 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm @@ -4,222 +4,159 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r2.y) out0 -@out(r2.z) out1 -@out(r2.w) out2 -@out(r3.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c10.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c11.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c11.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c11.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 8, r1.x +floor.f r3.x, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c10.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.y, r2.y +bary.f r2.w, 9, r1.x +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c8.x +add.f r2.y, r2.y, (neg)r3.y +mov.f32f32 r3.x, r2.x +sam (f32)(xyzw)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, c13.y, (neg)r4.x +mul.f r0.z, r0.z, c8.x +mov.f32f32 r1.w, r2.y +mul.f r4.y, c10.x, r3.x +add.f r3.x, c11.y, (neg)r3.x +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c8.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c10.x, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, r2.y, c8.x -add.f r2.w, c11.y, (neg)r1.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c10.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r1.w, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r1.w -mul.f r0.y, r2.y, r0.y -sam (f32)(xyzw)r2.w, r3.x, s#0, t#0 -(sy)add.f r1.w, c13.y, (neg)r3.z -add.f r2.y, c11.x, r0.x +add.f r0.x, r0.x, (neg)r4.y +mul.f r4.y, c10.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r4.y add.f r0.x, c11.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c12.x -add.f r3.w, c11.z, r0.w -mul.f r2.y, r2.y, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r2.y -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.x -add.f r0.w, c11.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c13.y, (neg)r0.y -mov.f32f32 r6.x, r4.x -mul.f r3.w, r3.w, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c8.y -mul.f r0.y, r0.y, c10.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.w, r2.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r4.w, r4.y -bary.f r3.w, 6, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r4.x -mov.f32f32 r5.z, r0.x -add.f r0.x, r3.w, c10.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r0.w -mov.f32f32 r7.x, r2.y -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r5.x, r0.w -max.f r0.y, r0.y, c10.y -mov.f32f32 r6.z, r2.y -mov.f32f32 r5.w, r3.w -mov.f32f32 r0.x, r0.x -mul.f r0.w, r1.w, c4.z -add.f r1.w, c13.y, (neg)r3.z -sam.s (f32)(x)r2.y, r4.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -min.f r0.y, r0.y, c10.z -sam.s (f32)(x)r3.w, r6.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -add.f r4.y, c11.y, (neg)r0.z -(ss)add.f r4.z, c13.y, (neg)r0.y -add.f r4.w, c13.y, (neg)r0.y -add.f r5.x, c13.y, (neg)r0.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c7.z -mul.f r4.w, r4.w, c7.y -mul.f r5.x, r5.x, c7.x -mul.f r5.y, r2.z, r4.y -mov.f32f32 r7.y, r0.x -mul.f r0.x, r3.z, c10.z -mul.f r1.w, r1.w, c4.y -mul.f r2.y, r5.y, r2.y -add.f r1.z, r1.z, c10.z -add.f r0.x, r0.x, r0.w -mul.f r0.w, r3.z, c10.z -sam.s (f32)(x)r5.y, r6.w, s#2, t#2 -(sy)mov.f32f32 r5.y, r5.y -mul.f r4.y, r1.z, r4.y -add.f r5.z, c13.y, (neg)r3.z -mul.f r0.x, r3.y, r0.x -add.f r0.w, r0.w, r1.w -mad.f32 r1.w, r4.y, r4.x, r2.y -mul.f r2.y, r5.z, c4.x -mov.f32f32 r0.x, r0.x -bary.f r3.y, 2, r1.x -mov.f32f32 r1.w, r1.w -add.f r0.z, r0.z, c10.z -mul.f r0.w, r3.x, r0.w -mul.f r3.x, r3.z, c10.z -mov.f32f32 r2.x, r2.x -mul.f r2.z, r2.z, r0.z -mul.f r3.y, r0.x, r3.y -mov.f32f32 r0.w, r0.w -add.f r2.y, r3.x, r2.y -mad.f32 r1.w, r2.z, r5.y, r1.w -bary.f r2.z, 1, r1.x -mov.f32f32 r4.x, r2.x -mul.f r2.x, r2.w, r2.y -mov.f32f32 r1.w, r1.w -mul.f r0.z, r1.z, r0.z -mul.f r1.z, r0.w, r2.z -mov.f32f32 r2.x, r2.x -nop -mad.f32 r0.z, r0.z, r3.w, r1.w -bary.f r1.w, 0, r1.x -bary.f (ei)r1.x, 9, r1.x -mov.f32f32 r1.y, c10.z -mov.f32f32 r0.z, r0.z -mul.f r1.w, r2.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r1.y -mul.f r0.z, c11.w, r0.z -mov.f32f32 r1.y, c10.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r1.x +add.f r0.z, c11.x, r0.z +mov.f32f32 r4.y, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c11.z, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c11.x, r4.y +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c13.y, (neg)r0.y +add.f r5.y, r0.z, c10.w +add.f r0.z, c11.y, (neg)r1.w +mul.f r0.x, r0.x, c10.z +add.f r0.w, c13.y, (neg)r4.x +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c8.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -mul.f r1.x, r3.y, r0.z -mul.f r1.z, r1.z, r0.z -mul.f r0.z, r1.w, r0.z +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.x, c6.z, r0.x, r1.x -mad.f32 r0.w, c6.y, r0.w, r1.z -mov.f32f32 r0.z, r0.z -sam (f32)(w)r3.y, r4.x, s#1, t#1 -(sy)cmps.f.lt r1.x, r4.x, c12.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c6.x, r2.x, r0.z -cov.u32f32 r1.x, r1.x -mul.f r0.x, r0.y, r0.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r1.x, r1.x, c10.y -add.f r0.x, r0.x, r4.z -mov.f32f32 r1.z, r4.x -add.f r0.w, r0.w, r4.w -mul.f r0.y, r0.y, r0.z -nop -mov.f32f32 r0.z, r1.z -(rpt2)nop -sel.b32 r0.z, r1.y, r1.x, r0.z -add.f r0.y, r0.y, r5.x -(rpt1)nop -mul.f r0.x, r0.x, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 +mul.f r0.w, r0.w, c4.z +add.f r1.w, c13.y, (neg)r4.x +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c10.y +sam.s (f32)(x)r4.y, r5.z, s#2, t#2 +mul.f r4.z, r4.x, c10.z +mul.f r1.w, r1.w, c4.y +(sy)mul.f r0.y, r0.y, r7.x +add.f r2.x, r2.x, c10.z +min.f r0.x, r0.x, c10.z +add.f r0.w, r4.z, r0.w +mul.f r4.z, r4.x, c10.z +mul.f r0.z, r2.x, r0.z +(ss)add.f r4.w, c13.y, (neg)r0.x +add.f r5.x, c13.y, (neg)r0.x +add.f r5.y, c13.y, (neg)r0.x +mad.f32 r0.y, r0.z, r7.y, r0.y +add.f r0.z, r2.y, c10.z +mul.f r2.y, r4.w, c7.z +mul.f r4.w, r5.x, c7.y +mul.f r5.x, r5.y, c7.x +mul.f r3.x, r3.x, r0.z +add.f r1.w, r4.z, r1.w +mul.f r1.z, r1.z, c4.x +mul.f r0.w, r3.w, r0.w +mad.f32 r0.y, r3.x, r4.y, r0.y +mul.f r0.z, r2.x, r0.z +mul.f r2.x, r4.x, c10.z +mov.f32f32 r3.x, r0.w +bary.f r3.w, 2, r1.x +mad.f32 r0.y, r0.z, r7.z, r0.y +mul.f r0.z, r3.z, r1.w +add.f r1.z, r2.x, r1.z +mul.f r1.w, r3.x, r3.w +mul.f r0.y, c11.w, r0.y +mov.f32f32 r2.x, r0.z +mul.f r1.z, r3.y, r1.z +bary.f r3.x, 1, r1.x +mov.f32f32 r3.y, r0.y +sam (f32)(w)r3.z, r2.z, s#1, t#1 +(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y +mov.f32f32 r3.z, r1.z +mul.f r2.x, r2.x, r3.x +mul.f r1.w, r1.w, r3.y +bary.f (ei)r1.x, 0, r1.x +mad.f32 r0.w, c6.z, r0.w, r1.w +mul.f r1.y, r2.x, r3.y +cov.u32f32 r1.w, r2.z +mov.f32f32 r2.w, c10.z +mul.f r0.w, r0.x, r0.w +mad.f32 r0.z, c6.y, r0.z, r1.y +mul.f r1.x, r3.z, r1.x +cmps.f.ne r1.y, r1.w, c10.y +add.f r0.w, r0.w, r2.y +mov.f32f32 r1.w, c10.y +mul.f r0.z, r0.x, r0.z +mul.f r0.y, r1.x, r0.y nop -mul.f r0.x, r0.x, c5.z -mul.f r0.z, r0.w, c5.y -mul.f r0.y, r0.y, c5.x +sel.b32 r1.x, r1.w, r1.y, r4.y +add.f r0.z, r0.z, r4.w +mad.f32 r0.y, c6.x, r1.z, r0.y nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +mul.f r0.w, r0.w, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.w, c5.z +mul.f r2.y, r0.z, c5.y +mul.f r0.x, r0.x, r0.y +(rpt2)nop +add.f r0.x, r0.x, r5.x +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.w, r0.x -mov.f32f32 r2.z, r0.z -mov.f32f32 r2.y, r0.y -end nop -; FRAG: outputs: r2.y (1:0) +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 212 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r2.y -; fragcoord: r0.x +; FRAG: 149 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm index 78028ad..5b14079 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm @@ -1,103 +1,80 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r5.x) out0 -@out(r5.y) out1 -@out(r5.z) out2 -@out(r5.w) out3 -@out(r4.x) out4 -@out(r4.y) out5 -@out(r4.z) out6 -@out(r4.w) out7 -@out(r3.x) out8 -@out(r3.y) out9 -@out(r3.z) out10 -@out(r3.w) out11 -@out(r1.w) out12 -@out(r2.x) out13 -@out(r2.y) out14 -@out(r2.z) out15 -(sy)(ss)mul.f r0.w, r0.w, (neg)c4.x -mul.f r2.x, c7.w, r0.x -mad.f32 r0.w, (neg)c4.y, r1.x, r0.w -mad.f32 r1.x, c8.w, r0.y, r2.x -mul.f r2.x, c7.z, r0.x -mul.f r2.y, c7.y, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c9.w, r0.z, r1.x -mad.f32 r0.w, (neg)c4.z, r1.y, r0.w -mad.f32 r1.y, c8.z, r0.y, r2.x -mad.f32 r2.x, c8.y, r0.y, r2.y -mul.f r2.y, c7.x, r0.x -max.f r0.w, c12.x, r0.w -add.f r1.x, r1.x, c10.w -mad.f32 r1.y, c9.z, r0.z, r1.y -mad.f32 r2.x, c9.y, r0.z, r2.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -add.f r1.y, r1.y, c10.z -add.f r2.x, r2.x, c10.y -mul.f r2.z, r0.w, c5.z -mul.f r2.w, r0.w, c5.y -mul.f r0.w, r0.w, c5.x -mov.f32f32 r3.w, r1.x -mov.f32f32 r1.x, r2.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.w, r0.w -nop -mov.f32f32 r4.z, r1.x -mov.f32f32 r4.y, r2.z -mov.f32f32 r4.x, r0.w -mov.f32f32 r0.w, r1.y -mul.f r1.x, r2.x, c11.y -mad.f32 r1.y, c8.x, r0.y, r2.y -mul.f r2.x, c0.w, r0.x -mov.f32f32 r3.z, r0.w -mov.f32f32 r3.y, r1.x -mad.f32 r0.w, c9.x, r0.z, r1.y -mad.f32 r1.x, c1.w, r0.y, r2.x -mul.f r1.y, c0.z, r0.x -mul.f r2.x, c0.y, r0.x -add.f r0.w, r0.w, c10.x -mad.f32 r1.x, c2.w, r0.z, r1.x -mad.f32 r1.y, c1.z, r0.y, r1.y -mad.f32 r2.x, c1.y, r0.y, r2.x -mul.f r0.w, r0.w, c11.x -add.f r1.x, r1.x, c3.w -mad.f32 r1.y, c2.z, r0.z, r1.y -mad.f32 r2.x, c2.y, r0.z, r2.x -mov.f32f32 r3.x, r0.w -mov.f32f32 r5.w, r1.x -add.f r0.w, r1.y, c3.z -add.f r1.x, r2.x, c3.y -mul.f r1.y, c0.x, r0.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.z, r0.w -mov.f32f32 r5.y, r1.x -mad.f32 r0.y, c1.x, r0.y, r1.y -mov.f32f32 r2.z, r1.w -mad.f32 r0.y, c2.x, r0.z, r0.y -mov.f32f32 r0.w, r1.z -mad.f32 r0.z, c6.x, r0.z, c6.y -mad.f32 r0.x, c6.x, r0.x, c6.y -add.f r0.y, r0.y, c3.x -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r0.z -mov.f32f32 r1.w, r0.x -mov.f32f32 r5.x, r0.y -mov.f32f32 r0.x, (0.000000) -(rpt2)nop -mov.f32f32 r4.w, r0.x +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r0.x) in4 +@in(r0.y) in5 +@in(r0.z) in6 +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x +mul.f r0.w, c7.y, r4.x +mad.f32 r0.x, (neg)c4.y, r0.y, r0.x +mad.f32 r0.y, c8.y, r4.y, r0.w +mad.f32 r0.x, (neg)c4.z, r0.z, r0.x +mad.f32 r0.y, c9.y, r4.z, r0.y +mul.f r0.z, c7.x, r4.x +mul.f r0.w, c7.w, r4.x +max.f r0.x, c12.x, r0.x +add.f r0.y, r0.y, c10.y +mad.f32 r0.z, c8.x, r4.y, r0.z +mad.f32 r0.w, c8.w, r4.y, r0.w +mov.f32f32 r1.y, r0.x +mul.f r2.y, r0.y, c11.y +mad.f32 r0.y, c9.x, r4.z, r0.z +mul.f r1.x, r0.x, c5.x +mul.f r1.z, r1.y, c5.z +mul.f r1.y, r1.y, c5.y +add.f r0.x, r0.y, c10.x +mad.f32 r0.y, c9.w, r4.z, r0.w +mul.f r0.z, c7.z, r4.x +mul.f r0.w, c0.w, r4.x +mul.f r2.x, r0.x, c11.x +add.f r2.w, r0.y, c10.w +mad.f32 r0.x, c8.z, r4.y, r0.z +mad.f32 r0.y, c1.w, r4.y, r0.w +mad.f32 r0.x, c9.z, r4.z, r0.x +mad.f32 r0.y, c2.w, r4.z, r0.y +mul.f r0.z, c0.z, r4.x +mul.f r1.w, c0.y, r4.x +add.f r2.z, r0.x, c10.z +add.f r0.w, r0.y, c3.w +mad.f32 r0.x, c1.z, r4.y, r0.z +mad.f32 r0.y, c1.y, r4.y, r1.w +mad.f32 r0.x, c2.z, r4.z, r0.x +mad.f32 r0.y, c2.y, r4.z, r0.y +mul.f r1.w, c0.x, r4.x +mad.f32 r3.y, c6.x, r4.z, c6.y +add.f r0.z, r0.x, c3.z +add.f r0.y, r0.y, c3.y +mad.f32 r0.x, c1.x, r4.y, r1.w +mad.f32 r3.x, c6.x, r4.x, c6.y +mad.f32 r0.x, c2.x, r4.z, r0.x +mov.f32f32 r1.w, (0.000000) +(rpt1)nop +add.f r0.x, r0.x, c3.x end -; VERT: outputs: r5.x (0:0) r4.x (5:9) r3.x (5:10) r1.w (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 74 instructions, 0 half, 6 full -; pos: r5.x +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 48 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm index 6e58225..f1c05da 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm @@ -4,222 +4,159 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r2.y) out0 -@out(r2.z) out1 -@out(r2.w) out2 -@out(r3.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c10.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c11.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c11.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c11.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 8, r1.x +floor.f r3.x, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c10.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.y, r2.y +bary.f r2.w, 9, r1.x +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c8.x +add.f r2.y, r2.y, (neg)r3.y +mov.f32f32 r3.x, r2.x +sam (f32)(xyzw)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, c13.y, (neg)r4.x +mul.f r0.z, r0.z, c8.x +mov.f32f32 r1.w, r2.y +mul.f r4.y, c10.x, r3.x +add.f r3.x, c11.y, (neg)r3.x +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c8.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c10.x, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, r2.y, c8.x -add.f r2.w, c11.y, (neg)r1.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c10.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r1.w, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r1.w -mul.f r0.y, r2.y, r0.y -sam (f32)(xyzw)r2.w, r3.x, s#0, t#0 -(sy)add.f r1.w, c13.y, (neg)r3.z -add.f r2.y, c11.x, r0.x +add.f r0.x, r0.x, (neg)r4.y +mul.f r4.y, c10.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r4.y add.f r0.x, c11.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c12.x -add.f r3.w, c11.z, r0.w -mul.f r2.y, r2.y, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r2.y -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.x -add.f r0.w, c11.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c13.y, (neg)r0.y -mov.f32f32 r6.x, r4.x -mul.f r3.w, r3.w, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c8.y -mul.f r0.y, r0.y, c10.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.w, r2.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r4.w, r4.y -bary.f r3.w, 6, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r4.x -mov.f32f32 r5.z, r0.x -add.f r0.x, r3.w, c10.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r0.w -mov.f32f32 r7.x, r2.y -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r5.x, r0.w -max.f r0.y, r0.y, c10.y -mov.f32f32 r6.z, r2.y -mov.f32f32 r5.w, r3.w -mov.f32f32 r0.x, r0.x -mul.f r0.w, r1.w, c4.z -add.f r1.w, c13.y, (neg)r3.z -sam.s (f32)(x)r2.y, r4.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -min.f r0.y, r0.y, c10.z -sam.s (f32)(x)r3.w, r6.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -add.f r4.y, c11.y, (neg)r0.z -(ss)add.f r4.z, c13.y, (neg)r0.y -add.f r4.w, c13.y, (neg)r0.y -add.f r5.x, c13.y, (neg)r0.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c7.z -mul.f r4.w, r4.w, c7.y -mul.f r5.x, r5.x, c7.x -mul.f r5.y, r2.z, r4.y -mov.f32f32 r7.y, r0.x -mul.f r0.x, r3.z, c10.z -mul.f r1.w, r1.w, c4.y -mul.f r2.y, r5.y, r2.y -add.f r1.z, r1.z, c10.z -add.f r0.x, r0.x, r0.w -mul.f r0.w, r3.z, c10.z -sam.s (f32)(x)r5.y, r6.w, s#2, t#2 -(sy)mov.f32f32 r5.y, r5.y -mul.f r4.y, r1.z, r4.y -add.f r5.z, c13.y, (neg)r3.z -mul.f r0.x, r3.y, r0.x -add.f r0.w, r0.w, r1.w -mad.f32 r1.w, r4.y, r4.x, r2.y -mul.f r2.y, r5.z, c4.x -mov.f32f32 r0.x, r0.x -bary.f r3.y, 2, r1.x -mov.f32f32 r1.w, r1.w -add.f r0.z, r0.z, c10.z -mul.f r0.w, r3.x, r0.w -mul.f r3.x, r3.z, c10.z -mov.f32f32 r2.x, r2.x -mul.f r2.z, r2.z, r0.z -mul.f r3.y, r0.x, r3.y -mov.f32f32 r0.w, r0.w -add.f r2.y, r3.x, r2.y -mad.f32 r1.w, r2.z, r5.y, r1.w -bary.f r2.z, 1, r1.x -mov.f32f32 r4.x, r2.x -mul.f r2.x, r2.w, r2.y -mov.f32f32 r1.w, r1.w -mul.f r0.z, r1.z, r0.z -mul.f r1.z, r0.w, r2.z -mov.f32f32 r2.x, r2.x -nop -mad.f32 r0.z, r0.z, r3.w, r1.w -bary.f r1.w, 0, r1.x -bary.f (ei)r1.x, 9, r1.x -mov.f32f32 r1.y, c10.z -mov.f32f32 r0.z, r0.z -mul.f r1.w, r2.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r1.y -mul.f r0.z, c11.w, r0.z -mov.f32f32 r1.y, c10.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r1.x +add.f r0.z, c11.x, r0.z +mov.f32f32 r4.y, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c11.z, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c11.x, r4.y +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c13.y, (neg)r0.y +add.f r5.y, r0.z, c10.w +add.f r0.z, c11.y, (neg)r1.w +mul.f r0.x, r0.x, c10.z +add.f r0.w, c13.y, (neg)r4.x +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c8.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -mul.f r1.x, r3.y, r0.z -mul.f r1.z, r1.z, r0.z -mul.f r0.z, r1.w, r0.z +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.x, c6.z, r0.x, r1.x -mad.f32 r0.w, c6.y, r0.w, r1.z -mov.f32f32 r0.z, r0.z -sam (f32)(w)r3.y, r4.x, s#1, t#1 -(sy)cmps.f.lt r1.x, r4.x, c12.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c6.x, r2.x, r0.z -cov.u32f32 r1.x, r1.x -mul.f r0.x, r0.y, r0.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r1.x, r1.x, c10.y -add.f r0.x, r0.x, r4.z -mov.f32f32 r1.z, r4.x -add.f r0.w, r0.w, r4.w -mul.f r0.y, r0.y, r0.z -nop -mov.f32f32 r0.z, r1.z -(rpt2)nop -sel.b32 r0.z, r1.y, r1.x, r0.z -add.f r0.y, r0.y, r5.x -(rpt1)nop -mul.f r0.x, r0.x, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 +mul.f r0.w, r0.w, c4.z +add.f r1.w, c13.y, (neg)r4.x +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c10.y +sam.s (f32)(x)r4.y, r5.z, s#2, t#2 +mul.f r4.z, r4.x, c10.z +mul.f r1.w, r1.w, c4.y +(sy)mul.f r0.y, r0.y, r7.x +add.f r2.x, r2.x, c10.z +min.f r0.x, r0.x, c10.z +add.f r0.w, r4.z, r0.w +mul.f r4.z, r4.x, c10.z +mul.f r0.z, r2.x, r0.z +(ss)add.f r4.w, c13.y, (neg)r0.x +add.f r5.x, c13.y, (neg)r0.x +add.f r5.y, c13.y, (neg)r0.x +mad.f32 r0.y, r0.z, r7.y, r0.y +add.f r0.z, r2.y, c10.z +mul.f r2.y, r4.w, c7.z +mul.f r4.w, r5.x, c7.y +mul.f r5.x, r5.y, c7.x +mul.f r3.x, r3.x, r0.z +add.f r1.w, r4.z, r1.w +mul.f r1.z, r1.z, c4.x +mul.f r0.w, r3.w, r0.w +mad.f32 r0.y, r3.x, r4.y, r0.y +mul.f r0.z, r2.x, r0.z +mul.f r2.x, r4.x, c10.z +mov.f32f32 r3.x, r0.w +bary.f r3.w, 2, r1.x +mad.f32 r0.y, r0.z, r7.z, r0.y +mul.f r0.z, r3.z, r1.w +add.f r1.z, r2.x, r1.z +mul.f r1.w, r3.x, r3.w +mul.f r0.y, c11.w, r0.y +mov.f32f32 r2.x, r0.z +mul.f r1.z, r3.y, r1.z +bary.f r3.x, 1, r1.x +mov.f32f32 r3.y, r0.y +sam (f32)(w)r3.z, r2.z, s#1, t#1 +(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y +mov.f32f32 r3.z, r1.z +mul.f r2.x, r2.x, r3.x +mul.f r1.w, r1.w, r3.y +bary.f (ei)r1.x, 0, r1.x +mad.f32 r0.w, c6.z, r0.w, r1.w +mul.f r1.y, r2.x, r3.y +cov.u32f32 r1.w, r2.z +mov.f32f32 r2.w, c10.z +mul.f r0.w, r0.x, r0.w +mad.f32 r0.z, c6.y, r0.z, r1.y +mul.f r1.x, r3.z, r1.x +cmps.f.ne r1.y, r1.w, c10.y +add.f r0.w, r0.w, r2.y +mov.f32f32 r1.w, c10.y +mul.f r0.z, r0.x, r0.z +mul.f r0.y, r1.x, r0.y nop -mul.f r0.x, r0.x, c5.z -mul.f r0.z, r0.w, c5.y -mul.f r0.y, r0.y, c5.x +sel.b32 r1.x, r1.w, r1.y, r4.y +add.f r0.z, r0.z, r4.w +mad.f32 r0.y, c6.x, r1.z, r0.y nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +mul.f r0.w, r0.w, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.w, c5.z +mul.f r2.y, r0.z, c5.y +mul.f r0.x, r0.x, r0.y +(rpt2)nop +add.f r0.x, r0.x, r5.x +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.w, r0.x -mov.f32f32 r2.z, r0.z -mov.f32f32 r2.y, r0.y -end nop -; FRAG: outputs: r2.y (1:0) +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 212 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r2.y -; fragcoord: r0.x +; FRAG: 149 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm index 78028ad..5b14079 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm @@ -1,103 +1,80 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r5.x) out0 -@out(r5.y) out1 -@out(r5.z) out2 -@out(r5.w) out3 -@out(r4.x) out4 -@out(r4.y) out5 -@out(r4.z) out6 -@out(r4.w) out7 -@out(r3.x) out8 -@out(r3.y) out9 -@out(r3.z) out10 -@out(r3.w) out11 -@out(r1.w) out12 -@out(r2.x) out13 -@out(r2.y) out14 -@out(r2.z) out15 -(sy)(ss)mul.f r0.w, r0.w, (neg)c4.x -mul.f r2.x, c7.w, r0.x -mad.f32 r0.w, (neg)c4.y, r1.x, r0.w -mad.f32 r1.x, c8.w, r0.y, r2.x -mul.f r2.x, c7.z, r0.x -mul.f r2.y, c7.y, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c9.w, r0.z, r1.x -mad.f32 r0.w, (neg)c4.z, r1.y, r0.w -mad.f32 r1.y, c8.z, r0.y, r2.x -mad.f32 r2.x, c8.y, r0.y, r2.y -mul.f r2.y, c7.x, r0.x -max.f r0.w, c12.x, r0.w -add.f r1.x, r1.x, c10.w -mad.f32 r1.y, c9.z, r0.z, r1.y -mad.f32 r2.x, c9.y, r0.z, r2.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -add.f r1.y, r1.y, c10.z -add.f r2.x, r2.x, c10.y -mul.f r2.z, r0.w, c5.z -mul.f r2.w, r0.w, c5.y -mul.f r0.w, r0.w, c5.x -mov.f32f32 r3.w, r1.x -mov.f32f32 r1.x, r2.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.w, r0.w -nop -mov.f32f32 r4.z, r1.x -mov.f32f32 r4.y, r2.z -mov.f32f32 r4.x, r0.w -mov.f32f32 r0.w, r1.y -mul.f r1.x, r2.x, c11.y -mad.f32 r1.y, c8.x, r0.y, r2.y -mul.f r2.x, c0.w, r0.x -mov.f32f32 r3.z, r0.w -mov.f32f32 r3.y, r1.x -mad.f32 r0.w, c9.x, r0.z, r1.y -mad.f32 r1.x, c1.w, r0.y, r2.x -mul.f r1.y, c0.z, r0.x -mul.f r2.x, c0.y, r0.x -add.f r0.w, r0.w, c10.x -mad.f32 r1.x, c2.w, r0.z, r1.x -mad.f32 r1.y, c1.z, r0.y, r1.y -mad.f32 r2.x, c1.y, r0.y, r2.x -mul.f r0.w, r0.w, c11.x -add.f r1.x, r1.x, c3.w -mad.f32 r1.y, c2.z, r0.z, r1.y -mad.f32 r2.x, c2.y, r0.z, r2.x -mov.f32f32 r3.x, r0.w -mov.f32f32 r5.w, r1.x -add.f r0.w, r1.y, c3.z -add.f r1.x, r2.x, c3.y -mul.f r1.y, c0.x, r0.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.z, r0.w -mov.f32f32 r5.y, r1.x -mad.f32 r0.y, c1.x, r0.y, r1.y -mov.f32f32 r2.z, r1.w -mad.f32 r0.y, c2.x, r0.z, r0.y -mov.f32f32 r0.w, r1.z -mad.f32 r0.z, c6.x, r0.z, c6.y -mad.f32 r0.x, c6.x, r0.x, c6.y -add.f r0.y, r0.y, c3.x -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r0.z -mov.f32f32 r1.w, r0.x -mov.f32f32 r5.x, r0.y -mov.f32f32 r0.x, (0.000000) -(rpt2)nop -mov.f32f32 r4.w, r0.x +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r0.x) in4 +@in(r0.y) in5 +@in(r0.z) in6 +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x +mul.f r0.w, c7.y, r4.x +mad.f32 r0.x, (neg)c4.y, r0.y, r0.x +mad.f32 r0.y, c8.y, r4.y, r0.w +mad.f32 r0.x, (neg)c4.z, r0.z, r0.x +mad.f32 r0.y, c9.y, r4.z, r0.y +mul.f r0.z, c7.x, r4.x +mul.f r0.w, c7.w, r4.x +max.f r0.x, c12.x, r0.x +add.f r0.y, r0.y, c10.y +mad.f32 r0.z, c8.x, r4.y, r0.z +mad.f32 r0.w, c8.w, r4.y, r0.w +mov.f32f32 r1.y, r0.x +mul.f r2.y, r0.y, c11.y +mad.f32 r0.y, c9.x, r4.z, r0.z +mul.f r1.x, r0.x, c5.x +mul.f r1.z, r1.y, c5.z +mul.f r1.y, r1.y, c5.y +add.f r0.x, r0.y, c10.x +mad.f32 r0.y, c9.w, r4.z, r0.w +mul.f r0.z, c7.z, r4.x +mul.f r0.w, c0.w, r4.x +mul.f r2.x, r0.x, c11.x +add.f r2.w, r0.y, c10.w +mad.f32 r0.x, c8.z, r4.y, r0.z +mad.f32 r0.y, c1.w, r4.y, r0.w +mad.f32 r0.x, c9.z, r4.z, r0.x +mad.f32 r0.y, c2.w, r4.z, r0.y +mul.f r0.z, c0.z, r4.x +mul.f r1.w, c0.y, r4.x +add.f r2.z, r0.x, c10.z +add.f r0.w, r0.y, c3.w +mad.f32 r0.x, c1.z, r4.y, r0.z +mad.f32 r0.y, c1.y, r4.y, r1.w +mad.f32 r0.x, c2.z, r4.z, r0.x +mad.f32 r0.y, c2.y, r4.z, r0.y +mul.f r1.w, c0.x, r4.x +mad.f32 r3.y, c6.x, r4.z, c6.y +add.f r0.z, r0.x, c3.z +add.f r0.y, r0.y, c3.y +mad.f32 r0.x, c1.x, r4.y, r1.w +mad.f32 r3.x, c6.x, r4.x, c6.y +mad.f32 r0.x, c2.x, r4.z, r0.x +mov.f32f32 r1.w, (0.000000) +(rpt1)nop +add.f r0.x, r0.x, c3.x end -; VERT: outputs: r5.x (0:0) r4.x (5:9) r3.x (5:10) r1.w (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 74 instructions, 0 half, 6 full -; pos: r5.x +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 48 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm index 6e58225..f1c05da 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm @@ -4,222 +4,159 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r2.y) out0 -@out(r2.z) out1 -@out(r2.w) out2 -@out(r3.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c10.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c11.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c11.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c11.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 8, r1.x +floor.f r3.x, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c10.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.y, r2.y +bary.f r2.w, 9, r1.x +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c8.x +add.f r2.y, r2.y, (neg)r3.y +mov.f32f32 r3.x, r2.x +sam (f32)(xyzw)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, c13.y, (neg)r4.x +mul.f r0.z, r0.z, c8.x +mov.f32f32 r1.w, r2.y +mul.f r4.y, c10.x, r3.x +add.f r3.x, c11.y, (neg)r3.x +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c8.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c10.x, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, r2.y, c8.x -add.f r2.w, c11.y, (neg)r1.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c10.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r1.w, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r1.w -mul.f r0.y, r2.y, r0.y -sam (f32)(xyzw)r2.w, r3.x, s#0, t#0 -(sy)add.f r1.w, c13.y, (neg)r3.z -add.f r2.y, c11.x, r0.x +add.f r0.x, r0.x, (neg)r4.y +mul.f r4.y, c10.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r4.y add.f r0.x, c11.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c12.x -add.f r3.w, c11.z, r0.w -mul.f r2.y, r2.y, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r2.y -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.x -add.f r0.w, c11.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c13.y, (neg)r0.y -mov.f32f32 r6.x, r4.x -mul.f r3.w, r3.w, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c8.y -mul.f r0.y, r0.y, c10.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.w, r2.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r4.w, r4.y -bary.f r3.w, 6, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r4.x -mov.f32f32 r5.z, r0.x -add.f r0.x, r3.w, c10.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r0.w -mov.f32f32 r7.x, r2.y -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r5.x, r0.w -max.f r0.y, r0.y, c10.y -mov.f32f32 r6.z, r2.y -mov.f32f32 r5.w, r3.w -mov.f32f32 r0.x, r0.x -mul.f r0.w, r1.w, c4.z -add.f r1.w, c13.y, (neg)r3.z -sam.s (f32)(x)r2.y, r4.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -min.f r0.y, r0.y, c10.z -sam.s (f32)(x)r3.w, r6.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -add.f r4.y, c11.y, (neg)r0.z -(ss)add.f r4.z, c13.y, (neg)r0.y -add.f r4.w, c13.y, (neg)r0.y -add.f r5.x, c13.y, (neg)r0.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c7.z -mul.f r4.w, r4.w, c7.y -mul.f r5.x, r5.x, c7.x -mul.f r5.y, r2.z, r4.y -mov.f32f32 r7.y, r0.x -mul.f r0.x, r3.z, c10.z -mul.f r1.w, r1.w, c4.y -mul.f r2.y, r5.y, r2.y -add.f r1.z, r1.z, c10.z -add.f r0.x, r0.x, r0.w -mul.f r0.w, r3.z, c10.z -sam.s (f32)(x)r5.y, r6.w, s#2, t#2 -(sy)mov.f32f32 r5.y, r5.y -mul.f r4.y, r1.z, r4.y -add.f r5.z, c13.y, (neg)r3.z -mul.f r0.x, r3.y, r0.x -add.f r0.w, r0.w, r1.w -mad.f32 r1.w, r4.y, r4.x, r2.y -mul.f r2.y, r5.z, c4.x -mov.f32f32 r0.x, r0.x -bary.f r3.y, 2, r1.x -mov.f32f32 r1.w, r1.w -add.f r0.z, r0.z, c10.z -mul.f r0.w, r3.x, r0.w -mul.f r3.x, r3.z, c10.z -mov.f32f32 r2.x, r2.x -mul.f r2.z, r2.z, r0.z -mul.f r3.y, r0.x, r3.y -mov.f32f32 r0.w, r0.w -add.f r2.y, r3.x, r2.y -mad.f32 r1.w, r2.z, r5.y, r1.w -bary.f r2.z, 1, r1.x -mov.f32f32 r4.x, r2.x -mul.f r2.x, r2.w, r2.y -mov.f32f32 r1.w, r1.w -mul.f r0.z, r1.z, r0.z -mul.f r1.z, r0.w, r2.z -mov.f32f32 r2.x, r2.x -nop -mad.f32 r0.z, r0.z, r3.w, r1.w -bary.f r1.w, 0, r1.x -bary.f (ei)r1.x, 9, r1.x -mov.f32f32 r1.y, c10.z -mov.f32f32 r0.z, r0.z -mul.f r1.w, r2.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r1.y -mul.f r0.z, c11.w, r0.z -mov.f32f32 r1.y, c10.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r1.x +add.f r0.z, c11.x, r0.z +mov.f32f32 r4.y, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c11.z, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c11.x, r4.y +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c13.y, (neg)r0.y +add.f r5.y, r0.z, c10.w +add.f r0.z, c11.y, (neg)r1.w +mul.f r0.x, r0.x, c10.z +add.f r0.w, c13.y, (neg)r4.x +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c8.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -mul.f r1.x, r3.y, r0.z -mul.f r1.z, r1.z, r0.z -mul.f r0.z, r1.w, r0.z +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.x, c6.z, r0.x, r1.x -mad.f32 r0.w, c6.y, r0.w, r1.z -mov.f32f32 r0.z, r0.z -sam (f32)(w)r3.y, r4.x, s#1, t#1 -(sy)cmps.f.lt r1.x, r4.x, c12.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c6.x, r2.x, r0.z -cov.u32f32 r1.x, r1.x -mul.f r0.x, r0.y, r0.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r1.x, r1.x, c10.y -add.f r0.x, r0.x, r4.z -mov.f32f32 r1.z, r4.x -add.f r0.w, r0.w, r4.w -mul.f r0.y, r0.y, r0.z -nop -mov.f32f32 r0.z, r1.z -(rpt2)nop -sel.b32 r0.z, r1.y, r1.x, r0.z -add.f r0.y, r0.y, r5.x -(rpt1)nop -mul.f r0.x, r0.x, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 +mul.f r0.w, r0.w, c4.z +add.f r1.w, c13.y, (neg)r4.x +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c10.y +sam.s (f32)(x)r4.y, r5.z, s#2, t#2 +mul.f r4.z, r4.x, c10.z +mul.f r1.w, r1.w, c4.y +(sy)mul.f r0.y, r0.y, r7.x +add.f r2.x, r2.x, c10.z +min.f r0.x, r0.x, c10.z +add.f r0.w, r4.z, r0.w +mul.f r4.z, r4.x, c10.z +mul.f r0.z, r2.x, r0.z +(ss)add.f r4.w, c13.y, (neg)r0.x +add.f r5.x, c13.y, (neg)r0.x +add.f r5.y, c13.y, (neg)r0.x +mad.f32 r0.y, r0.z, r7.y, r0.y +add.f r0.z, r2.y, c10.z +mul.f r2.y, r4.w, c7.z +mul.f r4.w, r5.x, c7.y +mul.f r5.x, r5.y, c7.x +mul.f r3.x, r3.x, r0.z +add.f r1.w, r4.z, r1.w +mul.f r1.z, r1.z, c4.x +mul.f r0.w, r3.w, r0.w +mad.f32 r0.y, r3.x, r4.y, r0.y +mul.f r0.z, r2.x, r0.z +mul.f r2.x, r4.x, c10.z +mov.f32f32 r3.x, r0.w +bary.f r3.w, 2, r1.x +mad.f32 r0.y, r0.z, r7.z, r0.y +mul.f r0.z, r3.z, r1.w +add.f r1.z, r2.x, r1.z +mul.f r1.w, r3.x, r3.w +mul.f r0.y, c11.w, r0.y +mov.f32f32 r2.x, r0.z +mul.f r1.z, r3.y, r1.z +bary.f r3.x, 1, r1.x +mov.f32f32 r3.y, r0.y +sam (f32)(w)r3.z, r2.z, s#1, t#1 +(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y +mov.f32f32 r3.z, r1.z +mul.f r2.x, r2.x, r3.x +mul.f r1.w, r1.w, r3.y +bary.f (ei)r1.x, 0, r1.x +mad.f32 r0.w, c6.z, r0.w, r1.w +mul.f r1.y, r2.x, r3.y +cov.u32f32 r1.w, r2.z +mov.f32f32 r2.w, c10.z +mul.f r0.w, r0.x, r0.w +mad.f32 r0.z, c6.y, r0.z, r1.y +mul.f r1.x, r3.z, r1.x +cmps.f.ne r1.y, r1.w, c10.y +add.f r0.w, r0.w, r2.y +mov.f32f32 r1.w, c10.y +mul.f r0.z, r0.x, r0.z +mul.f r0.y, r1.x, r0.y nop -mul.f r0.x, r0.x, c5.z -mul.f r0.z, r0.w, c5.y -mul.f r0.y, r0.y, c5.x +sel.b32 r1.x, r1.w, r1.y, r4.y +add.f r0.z, r0.z, r4.w +mad.f32 r0.y, c6.x, r1.z, r0.y nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +mul.f r0.w, r0.w, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.w, c5.z +mul.f r2.y, r0.z, c5.y +mul.f r0.x, r0.x, r0.y +(rpt2)nop +add.f r0.x, r0.x, r5.x +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.w, r0.x -mov.f32f32 r2.z, r0.z -mov.f32f32 r2.y, r0.y -end nop -; FRAG: outputs: r2.y (1:0) +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 212 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r2.y -; fragcoord: r0.x +; FRAG: 149 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm index 0e4d5ee..9c8ac11 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm @@ -6,134 +6,99 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r6.y) out0 -@out(r6.z) out1 -@out(r6.w) out2 -@out(r7.x) out3 -@out(r5.y) out4 -@out(r5.z) out5 -@out(r5.w) out6 -@out(r6.x) out7 -@out(r2.w) out8 -@out(r3.x) out9 -@out(r3.y) out10 -@out(r3.z) out11 -@out(r3.w) out12 -@out(r4.x) out13 -@out(r4.y) out14 -@out(r4.z) out15 -(sy)(ss)mul.f r2.x, c11.x, r0.w -mul.f r2.y, c11.x, r0.x -mad.f32 r2.x, c12.x, r1.x, r2.x -mad.f32 r2.y, c12.x, r0.y, r2.y -mul.f r2.z, c11.z, r0.x -mad.f32 r2.y, c13.x, r0.z, r2.y -mov.f32f32 r2.x, r2.x -mad.f32 r2.z, c12.z, r0.y, r2.z -mad.f32 r2.x, c13.x, r1.y, r2.x -add.f r2.y, r2.y, c14.x -mad.f32 r2.z, c13.z, r0.z, r2.z -mul.f r2.w, c11.y, r0.w -mov.f32f32 r2.x, r2.x -mul.f r3.x, c7.w, r2.y -mul.f r3.y, c7.z, r2.y -mul.f r3.z, c7.y, r2.y -mul.f r2.x, r2.x, (neg)c4.x -mad.f32 r2.w, c12.y, r1.x, r2.w -mul.f r3.w, c11.y, r0.x -mul.f r4.x, c7.x, r2.y -mad.f32 r3.w, c12.y, r0.y, r3.w -mov.f32f32 r2.w, r2.w -mad.f32 r3.w, c13.y, r0.z, r3.w -mad.f32 r2.w, c13.y, r1.y, r2.w -mul.f r4.y, c0.w, r2.y -mul.f r4.z, c0.z, r2.y -mul.f r4.w, c0.y, r2.y -mov.f32f32 r2.w, r2.w -add.f r3.w, r3.w, c14.y -mul.f r5.x, c0.x, r2.y -add.f r2.z, r2.z, c14.z -mad.f32 r2.x, (neg)c4.y, r2.w, r2.x -mad.f32 r2.w, c8.w, r3.w, r3.x -mad.f32 r3.x, c8.z, r3.w, r3.y -mad.f32 r3.y, c8.y, r3.w, r3.z -mov.f32f32 r2.x, r2.x +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r1.z, c11.x, r0.w +mul.f r1.w, c11.x, r0.x +mad.f32 r1.z, c12.x, r1.x, r1.z +mad.f32 r1.w, c12.x, r0.y, r1.w +mad.f32 r1.z, c13.x, r1.y, r1.z +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x +mul.f r1.z, r1.z, (neg)c4.x +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.w, c9.w, r2.z, r2.w +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r2.z, r3.x -mad.f32 r3.x, c9.y, r2.z, r3.y -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.w, r4.x -mad.f32 r0.w, c1.w, r3.w, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r2.z, r0.z -mad.f32 r0.w, c2.w, r2.z, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r2.x -mad.f32 r1.y, c10.w, r0.x, r2.w -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r2.x, c10.y, r0.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r2.x, r2.x, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r3.z, r1.y -mov.f32f32 r3.y, r1.x -mov.f32f32 r3.x, r2.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r1.x, c1.z, r3.w, r4.z -mul.f r1.y, r0.y, c5.z -mul.f r2.x, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.w, r1.y -mov.f32f32 r5.z, r2.x -mov.f32f32 r5.y, r0.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r7.x, r0.w -mad.f32 r0.y, c2.z, r2.z, r1.x -mad.f32 r0.z, c1.y, r3.w, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r0.z, c2.y, r2.z, r0.z -mad.f32 r0.w, c1.x, r3.w, r5.x -mad.f32 r1.x, c6.x, r2.z, c6.y -mov.f32f32 r6.w, r0.y -mad.f32 r0.y, c3.y, r0.x, r0.z -mad.f32 r0.z, c2.x, r2.z, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r1.x, c6.x, r2.y, c6.y -mov.f32f32 r6.z, r0.y -mad.f32 r0.x, c3.x, r0.x, r0.z -mov.f32f32 r4.x, r0.w -mov.f32f32 r0.y, r1.x -nop -mov.f32f32 r6.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.w, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r0.z, (0.000000) -mov.f32f32 r4.z, r0.x -nop -mov.f32f32 r4.y, r0.y -mov.f32f32 r6.x, r0.z +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop -; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 8 full -; pos: r6.y +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm index c6e09ad..593f290 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm @@ -4,246 +4,167 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r5.y) out0 -@out(r5.z) out1 -@out(r5.w) out2 -@out(r6.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 8, r1.x -bary.f r0.y, 4, r1.x -add.f r0.w, r0.w, c12.y +add.f r0.y, r0.w, c12.y +bary.f r0.w, 4, r1.x bary.f r1.z, 9, r1.x add.f r1.w, r0.x, c13.x -mul.f r2.x, r0.y, r0.y -bary.f r2.y, 5, r1.x -add.f r2.z, r1.z, c13.x +bary.f r2.x, 12, r1.x +mul.f r2.y, r0.w, r0.w +bary.f r2.z, 5, r1.x floor.f r2.w, r1.w -rcp r0.w, r0.w +rcp r0.y, r0.y add.f r0.z, r0.z, c12.y -mad.f32 r2.x, r2.y, r2.y, r2.x -floor.f r3.x, r2.z +add.f r3.x, r1.z, c13.x +mad.f32 r2.y, r2.z, r2.z, r2.y add.f r1.w, r1.w, (neg)r2.w -(ss)mul.f r0.z, r0.z, r0.w -(ss)mov.f32f32 r0.w, r2.x -bary.f r2.x, 6, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.z, r0.z -absneg.f r2.w, (neg)c8.x -mad.f32 r0.w, r2.x, r2.x, r0.w -mul.f r3.y, c12.x, r1.w -add.f r2.z, r2.z, (neg)r3.x -mul.f r2.w, r2.w, c8.x -add.f r3.x, c13.y, (neg)r1.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.z, r2.z -mul.f r2.w, r2.w, r0.z -rsq r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -add.f r0.x, r0.x, (neg)r3.y -mul.f r3.y, c12.x, r2.z -mov.f32f32 r2.w, r2.w -mul.f r0.y, r0.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.y, r3.y -mul.f r0.z, r2.w, r0.z +(ss)mul.f r0.y, r0.z, r0.y +absneg.f r0.z, (neg)c8.x +bary.f r2.w, 6, r1.x +mov.f32f32 r3.y, r1.w +floor.f r3.z, r3.x +mul.f r0.z, r0.z, c8.x +mad.f32 r2.y, r2.w, r2.w, r2.y +mul.f r3.w, c12.x, r3.y +add.f r3.x, r3.x, (neg)r3.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -add.f r2.w, c13.x, r0.x -bary.f r3.z, 16, r1.x -mov.f32f32 r0.z, r0.z +add.f r0.x, r0.x, (neg)r3.w +mov.f32f32 r3.z, r3.x +rsq r2.y, r2.y +(ss)mov.f32f32 r3.w, r2.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +mul.f r4.x, c12.x, r3.z add.f r0.x, c13.z, r0.x -mov.f32f32 r2.w, r2.w -mul.f r0.y, r0.y, r3.z -mul.f r2.y, r2.y, r0.w -mul.f r0.z, r0.z, c14.x -mul.f r2.w, r2.w, c3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r2.w -bary.f r3.w, 17, r1.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.x, r3.z -add.f r1.z, r1.z, (neg)r3.y -mad.f32 r0.y, r2.y, r3.w, r0.y -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r0.w, r2.x, r0.w -add.f r2.x, c15.y, (neg)r0.z -add.f r3.y, c13.x, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.w, r0.w -mul.f r2.x, r2.x, c8.y -mov.f32f32 r3.y, r3.y -bary.f r3.z, 18, r1.x -mul.f r0.z, r0.z, c12.z -mov.f32f32 r4.w, r2.y -mul.f r2.y, r3.y, c3.w -mad.f32 r0.y, r0.w, r3.z, r0.y -add.f r0.z, r0.z, r2.x -add.f r0.w, c13.z, r1.z -mov.f32f32 r1.z, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.y, r1.z -bary.f r1.z, 10, r1.x -max.f r0.y, c12.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, c3.w -add.f r1.z, r1.z, c12.w -mov.f32f32 r0.y, r0.y -max.f r0.z, r0.z, c12.y -mov.f32f32 r2.x, r0.w -mov.f32f32 r3.y, r1.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.z, r3.y -log2 r0.y, r0.y -(ss)mul.f r0.y, c9.x, r0.y -min.f r0.z, r0.z, c12.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r0.x -mov.f32f32 r5.w, r0.w -mov.f32f32 r0.x, r2.y -sam.s (f32)(x)r0.w, r4.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -add.f r2.y, c15.y, (neg)r0.z -add.f r2.w, c15.y, (neg)r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.x, r3.x -add.f r3.z, c13.y, (neg)r2.z -mul.f r2.y, r2.y, c7.z -mul.f r2.w, r2.w, c7.y -add.f r3.w, c15.y, (neg)r0.z -(ss)mov.f32f32 r4.x, r3.z +mul.f r0.w, r0.w, r3.w +add.f r0.z, c13.x, r0.z +mul.f r0.y, r0.y, c14.x +bary.f r4.y, 16, r1.x +add.f r1.z, r1.z, (neg)r4.x +mul.f r4.z, r0.z, c3.z +mul.f r5.y, r0.x, c3.z +mul.f r0.x, r0.w, r4.y +add.f r0.z, c13.z, r1.z +mov.f32f32 r6.x, r4.z +mov.f32f32 r0.w, r1.z exp2 r0.y, r0.y -mov.f32f32 r3.z, c6.y -mov.f32f32 r4.y, c6.x -mov.f32f32 r4.z, c6.z -mul.f r5.x, r3.x, r4.x -mul.f r5.y, r3.z, c10.y -mul.f r4.y, r4.y, c10.x -mul.f r6.y, r3.w, c7.x -mul.f r0.w, r5.x, r0.w -mov.f32f32 r3.z, r0.x -mov.f32f32 r0.x, r1.z -mul.f r4.z, r4.z, c10.z -(ss)mul.f r5.x, r5.y, r0.y -mul.f r4.y, r4.y, r0.y -mov.f32f32 r3.w, r0.x -mul.f r0.x, r4.z, r0.y -(ss)mov.f32f32 r0.y, r5.x -mov.f32f32 r4.y, r4.y -bary.f r4.z, 14, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r2.x -sam.s (f32)(x)r2.x, r3.y, s#2, t#2 -(sy)mov.f32f32 r2.x, r2.x -add.f r1.w, r1.w, c12.z -(ss)mov.f32f32 r3.y, r4.z -mov.f32f32 r3.z, r1.z -mov.f32f32 r1.z, r1.z -mul.f r3.w, r1.w, r4.x -mov.f32f32 r6.z, r3.y -bary.f r3.y, 15, r1.x -mov.f32f32 r5.y, r3.z -mad.f32 r0.w, r3.w, r2.x, r0.w -mov.f32f32 r6.x, r1.z -add.f r1.z, r2.z, c12.z -bary.f r2.x, 12, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r3.y -sam.s (f32)(x)r3.y, r4.w, s#2, t#2 -(sy)mov.f32f32 r3.y, r3.y -mul.f r3.x, r3.x, r1.z -sam.s (f32)(x)r3.z, r5.z, s#2, t#2 -(sy)mov.f32f32 r3.z, r3.z -mov.f32f32 r6.w, r2.z -mov.f32f32 r2.x, r2.x -mul.f r1.z, r1.w, r1.z -mad.f32 r0.w, r3.x, r3.z, r0.w -bary.f r1.w, 13, r1.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, c12.z -mov.f32f32 r0.w, r0.w -(ss)nop -sam (f32)(xyz)r4.z, r6.z, s#0, t#0 -bary.f r2.z, 2, r1.x -mad.f32 r0.w, r1.z, r3.y, r0.w -bary.f r1.z, 1, r1.x -bary.f (ei)r1.x, 0, r1.x -(sy)mad.f32 r0.x, r5.x, r2.z, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.y, r4.w, r1.z, r0.y -mad.f32 r1.x, r4.z, r1.x, r4.y +(ss)mov.f32f32 r1.z, r0.y +mul.f r2.z, r2.z, r3.w +bary.f r3.w, 17, r1.x +add.f r0.w, c13.x, r0.w +add.f r1.z, c15.y, (neg)r1.z +mov.f32f32 r6.w, r5.y +mul.f r5.z, r0.z, c3.w +mul.f r7.x, r0.w, c3.w +mul.f r0.z, r1.z, c8.y +(ss)mul.f r0.y, r0.y, c12.z +mad.f32 r0.x, r2.z, r3.w, r0.x +mov.f32f32 r6.y, r7.x +bary.f r0.w, 10, r1.x +add.f r0.y, r0.y, r0.z +mul.f r0.z, r2.w, r2.y +bary.f r1.z, 18, r1.x +add.f r5.w, r0.w, c12.w +max.f r0.y, r0.y, c12.y +mov.f32f32 r4.w, r5.z +mad.f32 r0.x, r0.z, r1.z, r0.x +mov.f32f32 r6.z, r5.w +min.f r0.y, r0.y, c12.z +mov.f32f32 r7.y, r5.w +mov.f32f32 r5.x, r5.w +max.f r0.x, c12.y, r0.x +nop +add.f r0.z, c15.y, (neg)r0.y +sam.s (f32)(x)r7.z, r6.x, s#2, t#2 +add.f r0.w, c13.y, (neg)r3.y +add.f r1.z, c15.y, (neg)r0.y +add.f r2.y, c15.y, (neg)r0.y +mul.f r0.z, r0.z, c7.z +mov.f32f32 r2.z, r0.w +add.f r2.w, c13.y, (neg)r3.z +mul.f r1.z, r1.z, c7.y +mul.f r3.y, r2.y, c7.x +log2 r0.x, r0.x +(ss)mul.f r0.x, c9.x, r0.x +mov.f32f32 r2.y, r2.w +sam.s (f32)(x)r3.z, r6.w, s#2, t#2 nop +sam.s (f32)(x)r3.w, r4.z, s#2, t#2 +sam.s (f32)(x)r4.x, r5.y, s#2, t#2 +add.f r1.w, r1.w, c12.z +add.f r3.x, r3.x, c12.z +mul.f r2.z, r2.z, r2.y +bary.f r2.y, 13, r1.x +mov.f32f32 r4.y, c6.z +(ss)bary.f r4.z, 14, r1.x +(sy)mul.f r2.z, r2.z, r7.z +mul.f r2.w, r1.w, r2.w +exp2 r0.x, r0.x +mov.f32f32 r4.w, c6.y +mov.f32f32 r5.x, c6.x +mul.f r4.y, r4.y, c10.z +mad.f32 r2.z, r2.w, r3.z, r2.z +mul.f r0.w, r0.w, r3.x +mul.f r2.w, r4.w, c10.y +mul.f r3.z, r5.x, c10.x +(ss)mul.f r4.y, r4.y, r0.x +mad.f32 r0.w, r0.w, r3.w, r2.z +mul.f r1.w, r1.w, r3.x +bary.f r4.w, 15, r1.x +mul.f r2.z, r2.w, r0.x +(ss)mul.f r0.x, r3.z, r0.x +mad.f32 r0.w, r1.w, r4.x, r0.w +sam (f32)(w)r5.x, r2.x, s#1, t#1 +(sy)cmps.f.lt r1.w, r5.w, c14.y +mov.f32f32 r2.w, c12.z +(ss)bary.f r2.x, 2, r1.x mul.f r0.w, c13.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.w -mov.f32f32 r6.x, r2.x -mov.f32f32 r1.z, c12.y +sam (f32)(xyz)r4.z, r4.z, s#0, t#0 +bary.f r2.y, 0, r1.x +bary.f (ei)r1.x, 1, r1.x +cov.u32f32 r1.y, r1.w +mov.f32f32 r1.w, r0.w +(sy)mad.f32 r2.x, r5.x, r2.x, r4.y +mad.f32 r1.x, r4.w, r1.x, r2.z +mad.f32 r0.x, r4.z, r2.y, r0.x +cmps.f.ne r1.y, r1.y, c12.y +mul.f r2.x, r2.x, r1.w +mul.f r1.x, r1.x, r1.w +mad.f32 r1.w, c5.z, r5.x, r2.x +mad.f32 r1.x, c5.y, r4.w, r1.x mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w -mul.f r0.w, r1.x, r0.w -mov.f32f32 r3.w, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c5.z, r5.x, r0.x -mad.f32 r0.y, c5.y, r4.w, r0.y -mov.f32f32 r0.w, r0.w +mov.f32f32 r0.w, c12.y +mul.f r1.w, r0.y, r1.w +mul.f r1.x, r0.y, r1.x +mad.f32 r0.x, c5.x, r4.z, r0.x +sel.b32 r0.w, r0.w, r1.y, r5.w +add.f r0.z, r1.w, r0.z +add.f r1.x, r1.x, r1.z +mul.f r0.x, r0.y, r0.x nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, c5.x, r4.z, r0.w -sam (f32)(w)r3.x, r3.z, s#1, t#1 -(sy)cmps.f.lt r1.x, r3.w, c14.y -mul.f r0.x, r0.z, r0.x -mul.f r0.y, r0.z, r0.y -mov.f32f32 r0.w, r0.w -cov.u32f32 r1.x, r1.x -add.f r0.x, r0.x, r2.y -add.f r0.y, r0.y, r2.w -mul.f r0.z, r0.z, r0.w -cmps.f.ne r0.w, r1.x, c12.y -mov.f32f32 r1.x, r3.w -(rpt2)nop -mov.f32f32 r1.x, r1.x -add.f r0.z, r0.z, r6.y +mul.f r0.y, r0.z, r0.w +mul.f r0.z, r1.x, r0.w (rpt1)nop -sel.b32 r0.w, r1.z, r0.w, r1.x +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.z, c4.y +add.f r0.x, r0.x, r3.y (rpt2)nop mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w -mul.f r0.z, r0.z, r0.w -nop -mul.f r0.x, r0.x, c4.z -mul.f r0.y, r0.y, c4.y -mul.f r0.z, r0.z, c4.x -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z +(rpt2)nop +mul.f r2.x, r0.x, c4.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z nop -mov.f32f32 r5.w, r0.x -mov.f32f32 r5.z, r0.y -mov.f32f32 r5.y, r0.z -end -; FRAG: outputs: r5.y (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) -; FRAG: 237 instructions, 0 half, 7 full -; pos (bary): r1.x -; color: r5.y -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) +; FRAG: 155 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm index 9f1027e..e7bcae3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm @@ -6,186 +6,139 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -@out(r7.y) out4 -@out(r7.z) out5 -@out(r7.w) out6 -@out(r8.x) out7 +@in(r4.z) in8 +@in(r4.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 @out(r2.x) out8 @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -@out(r8.y) out12 -@out(r8.z) out13 -@out(r8.w) out14 -@out(r9.x) out15 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 @out(r4.x) out16 @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 -@out(r6.y) out20 -@out(r6.z) out21 -@out(r6.w) out22 -@out(r7.x) out23 -(sy)(ss)mul.f r2.x, c12.x, r0.x -mul.f r2.y, c12.x, r0.w -mad.f32 r2.x, c13.x, r0.y, r2.x -mad.f32 r2.y, c13.x, r1.x, r2.y -mad.f32 r2.x, c14.x, r0.z, r2.x -mul.f r2.z, c12.z, r0.x -mul.f r2.w, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r2.x, r2.x, c15.x -mov.f32f32 r2.y, r2.y -mad.f32 r2.z, c13.z, r0.y, r2.z -mad.f32 r2.w, c13.z, r1.x, r2.w -add.f r3.x, c4.x, (neg)r2.x -mad.f32 r2.y, c14.x, r1.y, r2.y -mul.f r3.y, c8.w, r2.x -mul.f r3.z, c8.z, r2.x -mul.f r3.w, r3.x, r3.x -mul.f r4.x, c12.y, r0.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.x, c13.y, r0.y, r4.x -absneg.f r4.y, (neg)c5.x -mad.f32 r4.x, c14.y, r0.z, r4.x -mul.f r4.z, c8.y, r2.x -mul.f r4.w, c8.x, r2.x -mul.f r5.x, r2.y, r4.y -add.f r4.x, r4.x, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r2.x -mul.f r5.y, c0.z, r2.x -add.f r5.z, c4.y, (neg)r4.x -mov.f32f32 r0.w, r0.w -mad.f32 r3.y, c9.w, r4.x, r3.y -mad.f32 r3.z, c9.z, r4.x, r3.z -mad.f32 r3.w, r5.z, r5.z, r3.w -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.z, c14.z, r0.z, r2.z -mad.f32 r4.z, c9.y, r4.x, r4.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.w, r0.w -add.f r2.z, r2.z, c15.z -absneg.f r5.w, (neg)c5.y -mad.f32 r4.w, c9.x, r4.x, r4.w -mad.f32 r1.x, c1.w, r4.x, r1.x -add.f r6.x, c4.z, (neg)r2.z -mad.f32 r5.x, r0.w, r5.w, r5.x -mad.f32 r3.y, c10.w, r2.z, r3.y -mad.f32 r3.z, c10.z, r2.z, r3.z -mad.f32 r3.w, r6.x, r6.x, r3.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.w, r2.w +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r1.z, c12.x, r0.x +mul.f r1.w, c12.x, r0.w +mad.f32 r1.z, c13.x, r0.y, r1.z +mad.f32 r1.w, c13.x, r1.x, r1.w +mad.f32 r1.z, c14.x, r0.z, r1.z +mad.f32 r1.w, c14.x, r1.y, r1.w +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z +add.f r5.y, c4.y, (neg)r5.w +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.z, c10.y, r2.z, r4.z -mad.f32 r4.w, c10.x, r2.z, r4.w -mad.f32 r1.y, c14.z, r1.y, r2.w -rsq r2.w, r3.w -(ss)mov.f32f32 r2.w, r2.w -(ss)absneg.f r3.w, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r0.y, r1.y -mad.f32 r1.y, r3.x, r2.w, r4.y -mad.f32 r3.x, r5.z, r2.w, r5.w -mad.f32 r2.w, r6.x, r2.w, r3.w -mad.f32 r3.w, r0.y, r3.w, r5.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.w, r3.w -mul.f r4.y, r1.y, r1.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r3.x, r3.x, r4.y -max.f r3.w, c17.x, r3.w -mad.f32 r1.x, c2.w, r2.z, r1.x -mad.f32 r4.y, c1.z, r4.x, r5.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r3.w -mad.f32 r0.z, r2.w, r2.w, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r4.y, c2.z, r2.z, r4.y -mul.f r5.x, c0.y, r2.x -mul.f r5.y, c0.x, r2.x -mad.f32 r5.z, c7.x, r2.z, c7.y -mad.f32 r2.x, c7.x, r2.x, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r5.w, r3.w, c6.z -mul.f r6.x, r3.w, c6.y -mul.f r3.w, r3.w, c6.x -mul.f r2.w, r2.w, r0.z -mul.f r3.x, r3.x, r0.z -mul.f r0.z, r1.y, r0.z -mov.f32f32 r1.y, r5.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r6.w, r2.w -mov.f32f32 r6.z, r3.x -mov.f32f32 r6.y, r0.z -mov.f32f32 r7.w, r1.y -mov.f32f32 r0.z, r6.x -mov.f32f32 r1.y, r3.w -(rpt1)nop -mov.f32f32 r7.z, r0.z -mov.f32f32 r7.y, r1.y -mad.f32 r0.z, c11.w, r0.x, r3.y -mad.f32 r1.y, c11.z, r0.x, r3.z -mad.f32 r2.w, c11.y, r0.x, r4.z -mad.f32 r3.x, c11.x, r0.x, r4.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mul.f r2.w, r2.w, c16.y +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y mul.f r3.x, r3.x, c16.x -mov.f32f32 r9.x, r0.z -mov.f32f32 r8.w, r1.y -mov.f32f32 r8.z, r2.w -mov.f32f32 r8.y, r3.x -mad.f32 r0.z, c3.w, r0.x, r1.x -mad.f32 r1.x, c3.z, r0.x, r4.y -mad.f32 r1.y, c1.y, r4.x, r5.x -mad.f32 r2.w, c1.x, r4.x, r5.y -mov.f32f32 r3.w, r0.z -mov.f32f32 r3.z, r1.x -mad.f32 r0.z, c2.y, r2.z, r1.y -mad.f32 r1.x, c2.x, r2.z, r2.w -mad.f32 r0.z, c3.y, r0.x, r0.z -mad.f32 r0.x, c3.x, r0.x, r1.x -mov.f32f32 r1.x, r5.z -mov.f32f32 r1.y, r2.x -mov.f32f32 r3.y, r0.z -mov.f32f32 r3.x, r0.x -mov.f32f32 r4.y, r1.x -mov.f32f32 r4.x, r1.y -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r2.y -nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.x, (0.000000) -mov.f32f32 r0.y, r1.w -mov.f32f32 r0.z, r1.z -mov.f32f32 r0.w, (0.000000) -mov.f32f32 r7.x, r0.x -mov.f32f32 r4.w, r0.y -mov.f32f32 r4.z, r0.z -mov.f32f32 r2.w, r0.w -mov.f32f32 r0.x, (0.000000) -(rpt2)nop -mov.f32f32 r8.x, r0.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z +mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y +mad.f32 r0.x, c3.x, r0.x, r2.x +mov.f32f32 r2.z, r6.y +mov.f32f32 r2.y, r6.x +mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) +mov.f32f32 r1.w, (0.000000) end nop -; VERT: outputs: r3.x (0:0) r7.y (5:9) r2.x (5:10) r8.y (5:11) r4.x (5:12) r6.y (5:13) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 10 full -; pos: r3.x +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm index 4bf6e1b..d0ec086 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm @@ -4,10 +4,14 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r4.w) out0 -@out(r5.x) out1 -@out(r5.y) out2 -@out(r5.z) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 8, r1.x add.f r0.y, r0.w, c12.y bary.f r0.w, 4, r1.x @@ -23,227 +27,144 @@ add.f r3.x, r1.z, c13.x mad.f32 r2.y, r2.z, r2.z, r2.y add.f r1.w, r1.w, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -floor.f r0.z, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w +absneg.f r0.z, (neg)c9.x +bary.f r2.w, 6, r1.x +mov.f32f32 r3.y, r1.w +floor.f r3.z, r3.x +mul.f r0.z, r0.z, c9.x +mad.f32 r2.y, r2.w, r2.w, r2.y +mul.f r3.w, c12.x, r3.y +add.f r3.x, r3.x, (neg)r3.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.w, (neg)c9.x -bary.f r3.y, 6, r1.x -mul.f r3.z, c12.x, r1.w -add.f r0.z, r3.x, (neg)r0.z -mul.f r2.w, r2.w, c9.x -mad.f32 r2.y, r3.y, r3.y, r2.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.z, r0.z -mul.f r2.w, r2.w, r0.y -add.f r3.z, c13.y, (neg)r1.w -add.f r0.x, r0.x, (neg)r3.x -mul.f r3.x, c12.x, r0.z -mov.f32f32 r2.w, r2.w +add.f r0.x, r0.x, (neg)r3.w +mov.f32f32 r3.z, r3.x rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mul.f r0.y, r2.w, r0.y -mul.f r0.w, r0.w, r2.y -add.f r2.w, c13.x, r0.x +(ss)mov.f32f32 r3.w, r2.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +mul.f r4.x, c12.x, r3.z add.f r0.x, c13.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.w, r2.w -bary.f r3.w, 16, r1.x -mul.f r0.y, r0.y, c14.x -mov.f32f32 r0.x, r0.x -mul.f r2.w, r2.w, c4.z mul.f r0.w, r0.w, r3.w -mul.f r2.z, r2.z, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r2.w -mul.f r0.x, r0.x, c4.z -mov.f32f32 r2.z, r2.z -bary.f r4.x, 17, r1.x -mov.f32f32 r4.y, r3.w -add.f r1.z, r1.z, (neg)r3.x +add.f r0.z, c13.x, r0.z +mul.f r0.y, r0.y, c14.x +bary.f r4.y, 16, r1.x +add.f r1.z, r1.z, (neg)r4.x +mul.f r4.z, r0.z, c4.z +mul.f r5.y, r0.x, c4.z +mul.f r0.x, r0.w, r4.y +add.f r0.z, c13.z, r1.z +mov.f32f32 r6.x, r4.z +mov.f32f32 r0.w, r1.z exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r0.w, r2.z, r4.x, r0.w -mov.f32f32 r2.z, r0.x -mov.f32f32 r1.z, r1.z -add.f r3.x, c15.y, (neg)r0.y -mov.f32f32 r0.w, r0.w -mul.f r2.y, r3.y, r2.y -add.f r3.y, c13.x, r1.z -mul.f r3.x, r3.x, c9.y -mul.f r0.y, r0.y, c12.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.y, r3.y -bary.f r3.w, 18, r1.x -add.f r0.y, r0.y, r3.x -mov.f32f32 r2.z, r2.z -mul.f r3.x, r3.y, c4.w -mad.f32 r0.w, r2.y, r3.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.x, r2.z -mov.f32f32 r2.y, r3.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -add.f r1.z, c13.z, r1.z -mov.f32f32 r4.z, r2.y -bary.f r2.y, 10, r1.x +(ss)mov.f32f32 r1.z, r0.y +mul.f r2.z, r2.z, r3.w +bary.f r3.w, 17, r1.x +add.f r0.w, c13.x, r0.w +add.f r1.z, c15.y, (neg)r1.z +mov.f32f32 r6.w, r5.y +mul.f r5.z, r0.z, c4.w +mul.f r7.x, r0.w, c4.w +mul.f r0.z, r1.z, c9.y +(ss)mul.f r0.y, r0.y, c12.z +mad.f32 r0.x, r2.z, r3.w, r0.x +mov.f32f32 r6.y, r7.x +bary.f r0.w, 10, r1.x +add.f r0.y, r0.y, r0.z +mul.f r0.z, r2.w, r2.y +bary.f r1.z, 18, r1.x +add.f r5.w, r0.w, c12.w max.f r0.y, r0.y, c12.y -max.f r0.w, c12.y, r0.w -mov.f32f32 r1.z, r1.z -add.f r2.y, r2.y, c12.w +mov.f32f32 r4.w, r5.z +mad.f32 r0.x, r0.z, r1.z, r0.x +mov.f32f32 r6.z, r5.w min.f r0.y, r0.y, c12.z -mov.f32f32 r0.w, r0.w -mul.f r1.z, r1.z, c4.w -mov.f32f32 r2.z, r2.y -add.f r3.y, c15.y, (neg)r0.y -add.f r3.w, c15.y, (neg)r0.y -add.f r4.x, c15.y, (neg)r0.y -mov.f32f32 r4.w, r2.z -mul.f r2.z, r3.y, c8.z -mul.f r3.y, r3.w, c8.y -mul.f r3.w, r4.x, c8.x -log2 r0.w, r0.w -(ss)mul.f r0.w, c10.y, r0.w -mov.f32f32 r4.x, r1.z -mov.f32f32 r0.x, r0.x -sam.s (f32)(x)r4.y, r4.y, s#3, t#3 -(sy)(ss)mov.f32f32 r4.y, r4.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.w, r0.x -mov.f32f32 r0.x, r4.y -mov.f32f32 r3.z, r3.z -add.f r4.y, c13.y, (neg)r0.z -mov.f32f32 r5.y, r4.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r4.y -exp2 r0.w, r0.w -bary.f r4.z, 14, r1.x -mov.f32f32 r5.z, r4.x -mov.f32f32 r6.x, r3.x -mul.f r3.x, r3.z, r4.y -mov.f32f32 r4.x, r4.z -mov.f32f32 r4.w, r2.y -mov.f32f32 r2.w, r2.w -mul.f r0.x, r3.x, r0.x -mov.f32f32 r6.z, r4.x -mov.f32f32 r6.y, r4.w -bary.f r3.x, 15, r1.x -sam.s (f32)(x)r4.x, r5.x, s#3, t#3 +mov.f32f32 r7.y, r5.w +mov.f32f32 r5.x, r5.w +max.f r0.x, c12.y, r0.x +nop +add.f r0.z, c15.y, (neg)r0.y +sam.s (f32)(x)r7.z, r6.x, s#3, t#3 +add.f r0.w, c13.y, (neg)r3.y +add.f r1.z, c15.y, (neg)r0.y +add.f r2.y, c15.y, (neg)r0.y +mul.f r0.z, r0.z, c8.z +mov.f32f32 r2.z, r0.w +add.f r2.w, c13.y, (neg)r3.z +mul.f r1.z, r1.z, c8.y +mul.f r3.y, r2.y, c8.x +log2 r0.x, r0.x +(ss)mul.f r0.x, c10.y, r0.x +mov.f32f32 r2.y, r2.w +sam.s (f32)(x)r3.z, r6.w, s#3, t#3 nop -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r4.w, r2.w -mov.f32f32 r1.z, r1.z +sam.s (f32)(x)r3.w, r4.z, s#3, t#3 +sam.s (f32)(x)r4.x, r5.y, s#3, t#3 add.f r1.w, r1.w, c12.z -sam.s (f32)(x)r2.w, r5.w, s#3, t#3 -(sy)mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r5.y, r3.x -mov.f32f32 r5.x, r1.z -mul.f r1.z, r1.w, r4.y -mov.f32f32 r2.y, r2.y -add.f r0.z, r0.z, c12.z -mov.f32f32 r2.x, r2.x -mad.f32 r0.x, r1.z, r2.w, r0.x -mov.f32f32 r6.w, r5.y -mov.f32f32 r5.y, r2.y -mul.f r1.z, r3.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x +add.f r3.x, r3.x, c12.z +mul.f r2.z, r2.z, r2.y bary.f r2.y, 13, r1.x -mul.f r0.z, r1.w, r0.z -sam (f32)(xyz)r5.z, r6.z, s#2, t#2 -(sy)mul.f r1.w, c7.z, r6.x -sam.s (f32)(x)r2.w, r4.w, s#3, t#3 -(sy)mov.f32f32 r2.w, r2.w -mul.f r3.z, c7.y, r5.w -mul.f r4.y, c7.x, r5.z -(ss)mul.f r1.w, r1.w, r0.w -mad.f32 r0.x, r1.z, r2.w, r0.x -mov.f32f32 r1.z, r4.z -mul.f r2.w, r3.z, r0.w -mul.f r0.w, r4.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.y, r1.z -mad.f32 r0.x, r0.z, r4.x, r0.x -mov.f32f32 r0.z, r3.x -mov.f32f32 r1.z, r2.y -mov.f32f32 r3.x, c12.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, r0.z -mov.f32f32 r2.y, r1.z -mov.f32f32 r5.z, r3.x -mul.f r0.x, c13.w, r0.x -bary.f r0.z, 2, r1.x -bary.f r1.z, 1, r1.x -mov.f32f32 r3.x, c12.y -mov.f32f32 r0.x, r0.x -sam (f32)(xyz)r5.w, r4.y, s#0, t#0 -(sy)mad.f32 r0.z, r6.y, r0.z, r1.w -mad.f32 r1.z, r6.x, r1.z, r2.w +bary.f r4.y, 14, r1.x +(ss)bary.f r4.z, 15, r1.x +(sy)mul.f r2.z, r2.z, r7.z +mul.f r2.w, r1.w, r2.w +exp2 r0.x, r0.x +mov.f32f32 r4.w, r4.y +mov.f32f32 r5.x, r4.z +mul.f r0.w, r0.w, r3.x +mad.f32 r2.z, r2.w, r3.z, r2.z +mul.f r1.w, r1.w, r3.x +sam (f32)(w)r5.y, r2.x, s#1, t#1 +(sy)(ss)cmps.f.lt r2.x, r6.x, c14.y +mad.f32 r0.w, r0.w, r3.w, r2.z +sam (f32)(xyz)r6.y, r4.y, s#0, t#0 +mov.f32f32 r2.w, c12.z +mad.f32 r0.w, r1.w, r4.x, r0.w +sam (f32)(xyz)r3.z, r4.w, s#2, t#2 +(sy)mul.f r1.w, c7.y, r3.w +mul.f r2.y, c7.x, r3.z +mul.f r2.z, c7.z, r4.x +mul.f r0.w, c13.w, r0.w +(ss)mul.f r1.w, r1.w, r0.x +mul.f r2.y, r2.y, r0.x +mul.f r0.x, r2.z, r0.x +mov.f32f32 r2.z, r0.w +bary.f r3.x, 2, r1.x +bary.f r3.z, 1, r1.x bary.f (ei)r1.x, 0, r1.x -sam (f32)(w)r6.z, r2.x, s#1, t#1 -(sy)cmps.f.lt r1.y, r7.y, c14.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, r5.w, r1.x, r0.w -cov.u32f32 r1.x, r1.y -mul.f r0.z, r0.z, r0.x -mul.f r1.y, r1.z, r0.x -mov.f32f32 r0.w, r0.w -cmps.f.ne r1.x, r1.x, c12.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mad.f32 r0.z, c6.z, r6.y, r0.z -mad.f32 r1.y, c6.y, r6.x, r1.y -mul.f r0.x, r0.w, r0.x -mov.f32f32 r0.w, r7.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mul.f r0.z, r0.y, r0.z -mul.f r1.y, r0.y, r1.y -mad.f32 r0.x, c6.x, r5.w, r0.x -sel.b32 r0.w, r3.x, r1.x, r0.w -add.f r0.z, r0.z, r2.z -add.f r1.x, r1.y, r3.y -mov.f32f32 r0.x, r0.x -nop -mul.f r0.z, r0.z, r0.w -mul.f r1.x, r1.x, r0.w +cov.u32f32 r1.y, r2.x +mad.f32 r0.x, r6.w, r3.x, r0.x +mad.f32 r1.w, r6.z, r3.z, r1.w +mad.f32 r1.x, r6.y, r1.x, r2.y +cmps.f.ne r1.y, r1.y, c12.y +mul.f r0.x, r0.x, r2.z +mul.f r1.w, r1.w, r2.z +mad.f32 r0.x, c6.z, r6.w, r0.x +mad.f32 r1.w, c6.y, r6.z, r1.w +mul.f r0.w, r1.x, r0.w +mov.f32f32 r1.x, c12.y mul.f r0.x, r0.y, r0.x +mul.f r1.w, r0.y, r1.w +mad.f32 r0.w, c6.x, r6.y, r0.w +sel.b32 r1.x, r1.x, r1.y, r6.x +add.f r0.x, r0.x, r0.z +add.f r0.z, r1.w, r1.z +mul.f r0.y, r0.y, r0.w nop -mul.f r0.y, r0.z, c5.z -mul.f r0.z, r1.x, c5.y -add.f r0.x, r0.x, r3.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c5.x -nop -(ss)mov.f32f32 r5.y, r0.y -mov.f32f32 r5.x, r0.z -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.x, c5.z +mul.f r2.y, r0.z, c5.y +add.f r0.x, r0.y, r3.y (rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r1.x (rpt2)nop -mov.f32f32 r4.w, r0.x +mul.f r2.x, r0.x, c5.x end nop nop -nop -; FRAG: outputs: r4.w (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) -; FRAG: 233 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r4.w -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r3.w (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) +; FRAG: 155 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm index 9f1027e..e7bcae3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm @@ -6,186 +6,139 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -@out(r7.y) out4 -@out(r7.z) out5 -@out(r7.w) out6 -@out(r8.x) out7 +@in(r4.z) in8 +@in(r4.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 @out(r2.x) out8 @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -@out(r8.y) out12 -@out(r8.z) out13 -@out(r8.w) out14 -@out(r9.x) out15 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 @out(r4.x) out16 @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 -@out(r6.y) out20 -@out(r6.z) out21 -@out(r6.w) out22 -@out(r7.x) out23 -(sy)(ss)mul.f r2.x, c12.x, r0.x -mul.f r2.y, c12.x, r0.w -mad.f32 r2.x, c13.x, r0.y, r2.x -mad.f32 r2.y, c13.x, r1.x, r2.y -mad.f32 r2.x, c14.x, r0.z, r2.x -mul.f r2.z, c12.z, r0.x -mul.f r2.w, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r2.x, r2.x, c15.x -mov.f32f32 r2.y, r2.y -mad.f32 r2.z, c13.z, r0.y, r2.z -mad.f32 r2.w, c13.z, r1.x, r2.w -add.f r3.x, c4.x, (neg)r2.x -mad.f32 r2.y, c14.x, r1.y, r2.y -mul.f r3.y, c8.w, r2.x -mul.f r3.z, c8.z, r2.x -mul.f r3.w, r3.x, r3.x -mul.f r4.x, c12.y, r0.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.x, c13.y, r0.y, r4.x -absneg.f r4.y, (neg)c5.x -mad.f32 r4.x, c14.y, r0.z, r4.x -mul.f r4.z, c8.y, r2.x -mul.f r4.w, c8.x, r2.x -mul.f r5.x, r2.y, r4.y -add.f r4.x, r4.x, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r2.x -mul.f r5.y, c0.z, r2.x -add.f r5.z, c4.y, (neg)r4.x -mov.f32f32 r0.w, r0.w -mad.f32 r3.y, c9.w, r4.x, r3.y -mad.f32 r3.z, c9.z, r4.x, r3.z -mad.f32 r3.w, r5.z, r5.z, r3.w -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.z, c14.z, r0.z, r2.z -mad.f32 r4.z, c9.y, r4.x, r4.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.w, r0.w -add.f r2.z, r2.z, c15.z -absneg.f r5.w, (neg)c5.y -mad.f32 r4.w, c9.x, r4.x, r4.w -mad.f32 r1.x, c1.w, r4.x, r1.x -add.f r6.x, c4.z, (neg)r2.z -mad.f32 r5.x, r0.w, r5.w, r5.x -mad.f32 r3.y, c10.w, r2.z, r3.y -mad.f32 r3.z, c10.z, r2.z, r3.z -mad.f32 r3.w, r6.x, r6.x, r3.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.w, r2.w +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r1.z, c12.x, r0.x +mul.f r1.w, c12.x, r0.w +mad.f32 r1.z, c13.x, r0.y, r1.z +mad.f32 r1.w, c13.x, r1.x, r1.w +mad.f32 r1.z, c14.x, r0.z, r1.z +mad.f32 r1.w, c14.x, r1.y, r1.w +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z +add.f r5.y, c4.y, (neg)r5.w +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.z, c10.y, r2.z, r4.z -mad.f32 r4.w, c10.x, r2.z, r4.w -mad.f32 r1.y, c14.z, r1.y, r2.w -rsq r2.w, r3.w -(ss)mov.f32f32 r2.w, r2.w -(ss)absneg.f r3.w, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r0.y, r1.y -mad.f32 r1.y, r3.x, r2.w, r4.y -mad.f32 r3.x, r5.z, r2.w, r5.w -mad.f32 r2.w, r6.x, r2.w, r3.w -mad.f32 r3.w, r0.y, r3.w, r5.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.w, r3.w -mul.f r4.y, r1.y, r1.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r3.x, r3.x, r4.y -max.f r3.w, c17.x, r3.w -mad.f32 r1.x, c2.w, r2.z, r1.x -mad.f32 r4.y, c1.z, r4.x, r5.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r3.w -mad.f32 r0.z, r2.w, r2.w, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r4.y, c2.z, r2.z, r4.y -mul.f r5.x, c0.y, r2.x -mul.f r5.y, c0.x, r2.x -mad.f32 r5.z, c7.x, r2.z, c7.y -mad.f32 r2.x, c7.x, r2.x, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r5.w, r3.w, c6.z -mul.f r6.x, r3.w, c6.y -mul.f r3.w, r3.w, c6.x -mul.f r2.w, r2.w, r0.z -mul.f r3.x, r3.x, r0.z -mul.f r0.z, r1.y, r0.z -mov.f32f32 r1.y, r5.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r6.w, r2.w -mov.f32f32 r6.z, r3.x -mov.f32f32 r6.y, r0.z -mov.f32f32 r7.w, r1.y -mov.f32f32 r0.z, r6.x -mov.f32f32 r1.y, r3.w -(rpt1)nop -mov.f32f32 r7.z, r0.z -mov.f32f32 r7.y, r1.y -mad.f32 r0.z, c11.w, r0.x, r3.y -mad.f32 r1.y, c11.z, r0.x, r3.z -mad.f32 r2.w, c11.y, r0.x, r4.z -mad.f32 r3.x, c11.x, r0.x, r4.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mul.f r2.w, r2.w, c16.y +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y mul.f r3.x, r3.x, c16.x -mov.f32f32 r9.x, r0.z -mov.f32f32 r8.w, r1.y -mov.f32f32 r8.z, r2.w -mov.f32f32 r8.y, r3.x -mad.f32 r0.z, c3.w, r0.x, r1.x -mad.f32 r1.x, c3.z, r0.x, r4.y -mad.f32 r1.y, c1.y, r4.x, r5.x -mad.f32 r2.w, c1.x, r4.x, r5.y -mov.f32f32 r3.w, r0.z -mov.f32f32 r3.z, r1.x -mad.f32 r0.z, c2.y, r2.z, r1.y -mad.f32 r1.x, c2.x, r2.z, r2.w -mad.f32 r0.z, c3.y, r0.x, r0.z -mad.f32 r0.x, c3.x, r0.x, r1.x -mov.f32f32 r1.x, r5.z -mov.f32f32 r1.y, r2.x -mov.f32f32 r3.y, r0.z -mov.f32f32 r3.x, r0.x -mov.f32f32 r4.y, r1.x -mov.f32f32 r4.x, r1.y -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r2.y -nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.x, (0.000000) -mov.f32f32 r0.y, r1.w -mov.f32f32 r0.z, r1.z -mov.f32f32 r0.w, (0.000000) -mov.f32f32 r7.x, r0.x -mov.f32f32 r4.w, r0.y -mov.f32f32 r4.z, r0.z -mov.f32f32 r2.w, r0.w -mov.f32f32 r0.x, (0.000000) -(rpt2)nop -mov.f32f32 r8.x, r0.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z +mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y +mad.f32 r0.x, c3.x, r0.x, r2.x +mov.f32f32 r2.z, r6.y +mov.f32f32 r2.y, r6.x +mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) +mov.f32f32 r1.w, (0.000000) end nop -; VERT: outputs: r3.x (0:0) r7.y (5:9) r2.x (5:10) r8.y (5:11) r4.x (5:12) r6.y (5:13) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 10 full -; pos: r3.x +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm index 6e58225..f1c05da 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm @@ -4,222 +4,159 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r2.y) out0 -@out(r2.z) out1 -@out(r2.w) out2 -@out(r3.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c10.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c11.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c11.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c11.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r2.z, 8, r1.x +floor.f r3.x, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c10.y -floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +floor.f r3.y, r2.y +bary.f r2.w, 9, r1.x +add.f r2.x, r2.x, (neg)r3.x (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c8.x +add.f r2.y, r2.y, (neg)r3.y +mov.f32f32 r3.x, r2.x +sam (f32)(xyzw)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, c13.y, (neg)r4.x +mul.f r0.z, r0.z, c8.x +mov.f32f32 r1.w, r2.y +mul.f r4.y, c10.x, r3.x +add.f r3.x, c11.y, (neg)r3.x +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c8.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c10.x, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, r2.y, c8.x -add.f r2.w, c11.y, (neg)r1.z -mov.f32f32 r2.z, r2.z -mul.f r3.z, c10.x, r0.z -mul.f r2.y, r2.y, r0.y -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r1.w, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r1.w -mul.f r0.y, r2.y, r0.y -sam (f32)(xyzw)r2.w, r3.x, s#0, t#0 -(sy)add.f r1.w, c13.y, (neg)r3.z -add.f r2.y, c11.x, r0.x +add.f r0.x, r0.x, (neg)r4.y +mul.f r4.y, c10.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r4.y add.f r0.x, c11.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c12.x -add.f r3.w, c11.z, r0.w -mul.f r2.y, r2.y, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r2.y -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.x -add.f r0.w, c11.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c13.y, (neg)r0.y -mov.f32f32 r6.x, r4.x -mul.f r3.w, r3.w, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c8.y -mul.f r0.y, r0.y, c10.z -mov.f32f32 r4.x, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.w, r2.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r4.w, r4.y -bary.f r3.w, 6, r1.x -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r4.x -mov.f32f32 r5.z, r0.x -add.f r0.x, r3.w, c10.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r0.w -mov.f32f32 r7.x, r2.y -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r0.x -mov.f32f32 r3.w, r0.x -mov.f32f32 r5.x, r0.w -max.f r0.y, r0.y, c10.y -mov.f32f32 r6.z, r2.y -mov.f32f32 r5.w, r3.w -mov.f32f32 r0.x, r0.x -mul.f r0.w, r1.w, c4.z -add.f r1.w, c13.y, (neg)r3.z -sam.s (f32)(x)r2.y, r4.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -min.f r0.y, r0.y, c10.z -sam.s (f32)(x)r3.w, r6.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -sam.s (f32)(x)r4.x, r5.y, s#2, t#2 -(sy)mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -add.f r4.y, c11.y, (neg)r0.z -(ss)add.f r4.z, c13.y, (neg)r0.y -add.f r4.w, c13.y, (neg)r0.y -add.f r5.x, c13.y, (neg)r0.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c7.z -mul.f r4.w, r4.w, c7.y -mul.f r5.x, r5.x, c7.x -mul.f r5.y, r2.z, r4.y -mov.f32f32 r7.y, r0.x -mul.f r0.x, r3.z, c10.z -mul.f r1.w, r1.w, c4.y -mul.f r2.y, r5.y, r2.y -add.f r1.z, r1.z, c10.z -add.f r0.x, r0.x, r0.w -mul.f r0.w, r3.z, c10.z -sam.s (f32)(x)r5.y, r6.w, s#2, t#2 -(sy)mov.f32f32 r5.y, r5.y -mul.f r4.y, r1.z, r4.y -add.f r5.z, c13.y, (neg)r3.z -mul.f r0.x, r3.y, r0.x -add.f r0.w, r0.w, r1.w -mad.f32 r1.w, r4.y, r4.x, r2.y -mul.f r2.y, r5.z, c4.x -mov.f32f32 r0.x, r0.x -bary.f r3.y, 2, r1.x -mov.f32f32 r1.w, r1.w -add.f r0.z, r0.z, c10.z -mul.f r0.w, r3.x, r0.w -mul.f r3.x, r3.z, c10.z -mov.f32f32 r2.x, r2.x -mul.f r2.z, r2.z, r0.z -mul.f r3.y, r0.x, r3.y -mov.f32f32 r0.w, r0.w -add.f r2.y, r3.x, r2.y -mad.f32 r1.w, r2.z, r5.y, r1.w -bary.f r2.z, 1, r1.x -mov.f32f32 r4.x, r2.x -mul.f r2.x, r2.w, r2.y -mov.f32f32 r1.w, r1.w -mul.f r0.z, r1.z, r0.z -mul.f r1.z, r0.w, r2.z -mov.f32f32 r2.x, r2.x -nop -mad.f32 r0.z, r0.z, r3.w, r1.w -bary.f r1.w, 0, r1.x -bary.f (ei)r1.x, 9, r1.x -mov.f32f32 r1.y, c10.z -mov.f32f32 r0.z, r0.z -mul.f r1.w, r2.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r1.y -mul.f r0.z, c11.w, r0.z -mov.f32f32 r1.y, c10.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.y, r1.x +add.f r0.z, c11.x, r0.z +mov.f32f32 r4.y, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c11.z, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c11.x, r4.y +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c13.y, (neg)r0.y +add.f r5.y, r0.z, c10.w +add.f r0.z, c11.y, (neg)r1.w +mul.f r0.x, r0.x, c10.z +add.f r0.w, c13.y, (neg)r4.x +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c8.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -mul.f r1.x, r3.y, r0.z -mul.f r1.z, r1.z, r0.z -mul.f r0.z, r1.w, r0.z +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r0.x, c6.z, r0.x, r1.x -mad.f32 r0.w, c6.y, r0.w, r1.z -mov.f32f32 r0.z, r0.z -sam (f32)(w)r3.y, r4.x, s#1, t#1 -(sy)cmps.f.lt r1.x, r4.x, c12.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c6.x, r2.x, r0.z -cov.u32f32 r1.x, r1.x -mul.f r0.x, r0.y, r0.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r1.x, r1.x, c10.y -add.f r0.x, r0.x, r4.z -mov.f32f32 r1.z, r4.x -add.f r0.w, r0.w, r4.w -mul.f r0.y, r0.y, r0.z -nop -mov.f32f32 r0.z, r1.z -(rpt2)nop -sel.b32 r0.z, r1.y, r1.x, r0.z -add.f r0.y, r0.y, r5.x -(rpt1)nop -mul.f r0.x, r0.x, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.y, r0.y, r0.z +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 +mul.f r0.w, r0.w, c4.z +add.f r1.w, c13.y, (neg)r4.x +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c10.y +sam.s (f32)(x)r4.y, r5.z, s#2, t#2 +mul.f r4.z, r4.x, c10.z +mul.f r1.w, r1.w, c4.y +(sy)mul.f r0.y, r0.y, r7.x +add.f r2.x, r2.x, c10.z +min.f r0.x, r0.x, c10.z +add.f r0.w, r4.z, r0.w +mul.f r4.z, r4.x, c10.z +mul.f r0.z, r2.x, r0.z +(ss)add.f r4.w, c13.y, (neg)r0.x +add.f r5.x, c13.y, (neg)r0.x +add.f r5.y, c13.y, (neg)r0.x +mad.f32 r0.y, r0.z, r7.y, r0.y +add.f r0.z, r2.y, c10.z +mul.f r2.y, r4.w, c7.z +mul.f r4.w, r5.x, c7.y +mul.f r5.x, r5.y, c7.x +mul.f r3.x, r3.x, r0.z +add.f r1.w, r4.z, r1.w +mul.f r1.z, r1.z, c4.x +mul.f r0.w, r3.w, r0.w +mad.f32 r0.y, r3.x, r4.y, r0.y +mul.f r0.z, r2.x, r0.z +mul.f r2.x, r4.x, c10.z +mov.f32f32 r3.x, r0.w +bary.f r3.w, 2, r1.x +mad.f32 r0.y, r0.z, r7.z, r0.y +mul.f r0.z, r3.z, r1.w +add.f r1.z, r2.x, r1.z +mul.f r1.w, r3.x, r3.w +mul.f r0.y, c11.w, r0.y +mov.f32f32 r2.x, r0.z +mul.f r1.z, r3.y, r1.z +bary.f r3.x, 1, r1.x +mov.f32f32 r3.y, r0.y +sam (f32)(w)r3.z, r2.z, s#1, t#1 +(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y +mov.f32f32 r3.z, r1.z +mul.f r2.x, r2.x, r3.x +mul.f r1.w, r1.w, r3.y +bary.f (ei)r1.x, 0, r1.x +mad.f32 r0.w, c6.z, r0.w, r1.w +mul.f r1.y, r2.x, r3.y +cov.u32f32 r1.w, r2.z +mov.f32f32 r2.w, c10.z +mul.f r0.w, r0.x, r0.w +mad.f32 r0.z, c6.y, r0.z, r1.y +mul.f r1.x, r3.z, r1.x +cmps.f.ne r1.y, r1.w, c10.y +add.f r0.w, r0.w, r2.y +mov.f32f32 r1.w, c10.y +mul.f r0.z, r0.x, r0.z +mul.f r0.y, r1.x, r0.y nop -mul.f r0.x, r0.x, c5.z -mul.f r0.z, r0.w, c5.y -mul.f r0.y, r0.y, c5.x +sel.b32 r1.x, r1.w, r1.y, r4.y +add.f r0.z, r0.z, r4.w +mad.f32 r0.y, c6.x, r1.z, r0.y nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +mul.f r0.w, r0.w, r1.x +mul.f r0.z, r0.z, r1.x +(rpt1)nop +mul.f r2.z, r0.w, c5.z +mul.f r2.y, r0.z, c5.y +mul.f r0.x, r0.x, r0.y +(rpt2)nop +add.f r0.x, r0.x, r5.x +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y nop -mov.f32f32 r2.w, r0.x -mov.f32f32 r2.z, r0.z -mov.f32f32 r2.y, r0.y -end nop -; FRAG: outputs: r2.y (1:0) +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 212 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r2.y -; fragcoord: r0.x +; FRAG: 149 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm index 0e4d5ee..9c8ac11 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm @@ -6,134 +6,99 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r6.y) out0 -@out(r6.z) out1 -@out(r6.w) out2 -@out(r7.x) out3 -@out(r5.y) out4 -@out(r5.z) out5 -@out(r5.w) out6 -@out(r6.x) out7 -@out(r2.w) out8 -@out(r3.x) out9 -@out(r3.y) out10 -@out(r3.z) out11 -@out(r3.w) out12 -@out(r4.x) out13 -@out(r4.y) out14 -@out(r4.z) out15 -(sy)(ss)mul.f r2.x, c11.x, r0.w -mul.f r2.y, c11.x, r0.x -mad.f32 r2.x, c12.x, r1.x, r2.x -mad.f32 r2.y, c12.x, r0.y, r2.y -mul.f r2.z, c11.z, r0.x -mad.f32 r2.y, c13.x, r0.z, r2.y -mov.f32f32 r2.x, r2.x -mad.f32 r2.z, c12.z, r0.y, r2.z -mad.f32 r2.x, c13.x, r1.y, r2.x -add.f r2.y, r2.y, c14.x -mad.f32 r2.z, c13.z, r0.z, r2.z -mul.f r2.w, c11.y, r0.w -mov.f32f32 r2.x, r2.x -mul.f r3.x, c7.w, r2.y -mul.f r3.y, c7.z, r2.y -mul.f r3.z, c7.y, r2.y -mul.f r2.x, r2.x, (neg)c4.x -mad.f32 r2.w, c12.y, r1.x, r2.w -mul.f r3.w, c11.y, r0.x -mul.f r4.x, c7.x, r2.y -mad.f32 r3.w, c12.y, r0.y, r3.w -mov.f32f32 r2.w, r2.w -mad.f32 r3.w, c13.y, r0.z, r3.w -mad.f32 r2.w, c13.y, r1.y, r2.w -mul.f r4.y, c0.w, r2.y -mul.f r4.z, c0.z, r2.y -mul.f r4.w, c0.y, r2.y -mov.f32f32 r2.w, r2.w -add.f r3.w, r3.w, c14.y -mul.f r5.x, c0.x, r2.y -add.f r2.z, r2.z, c14.z -mad.f32 r2.x, (neg)c4.y, r2.w, r2.x -mad.f32 r2.w, c8.w, r3.w, r3.x -mad.f32 r3.x, c8.z, r3.w, r3.y -mad.f32 r3.y, c8.y, r3.w, r3.z -mov.f32f32 r2.x, r2.x +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r1.z, c11.x, r0.w +mul.f r1.w, c11.x, r0.x +mad.f32 r1.z, c12.x, r1.x, r1.z +mad.f32 r1.w, c12.x, r0.y, r1.w +mad.f32 r1.z, c13.x, r1.y, r1.z +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x +mul.f r1.z, r1.z, (neg)c4.x +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.w, c9.w, r2.z, r2.w +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r2.z, r3.x -mad.f32 r3.x, c9.y, r2.z, r3.y -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.w, r4.x -mad.f32 r0.w, c1.w, r3.w, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r2.z, r0.z -mad.f32 r0.w, c2.w, r2.z, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r2.x -mad.f32 r1.y, c10.w, r0.x, r2.w -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r2.x, c10.y, r0.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r2.x, r2.x, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r3.z, r1.y -mov.f32f32 r3.y, r1.x -mov.f32f32 r3.x, r2.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r1.x, c1.z, r3.w, r4.z -mul.f r1.y, r0.y, c5.z -mul.f r2.x, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.w, r1.y -mov.f32f32 r5.z, r2.x -mov.f32f32 r5.y, r0.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r7.x, r0.w -mad.f32 r0.y, c2.z, r2.z, r1.x -mad.f32 r0.z, c1.y, r3.w, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r0.z, c2.y, r2.z, r0.z -mad.f32 r0.w, c1.x, r3.w, r5.x -mad.f32 r1.x, c6.x, r2.z, c6.y -mov.f32f32 r6.w, r0.y -mad.f32 r0.y, c3.y, r0.x, r0.z -mad.f32 r0.z, c2.x, r2.z, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r1.x, c6.x, r2.y, c6.y -mov.f32f32 r6.z, r0.y -mad.f32 r0.x, c3.x, r0.x, r0.z -mov.f32f32 r4.x, r0.w -mov.f32f32 r0.y, r1.x -nop -mov.f32f32 r6.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.w, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r0.z, (0.000000) -mov.f32f32 r4.z, r0.x -nop -mov.f32f32 r4.y, r0.y -mov.f32f32 r6.x, r0.z +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop -; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 8 full -; pos: r6.y +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm index 2316052..57bb137 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm @@ -4,258 +4,187 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r2.w) out0 -@out(r3.x) out1 -@out(r3.y) out2 -@out(r3.z) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c14.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c15.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 8, r1.x add.f r0.y, r0.w, c13.y bary.f r0.w, 4, r1.x bary.f r1.z, 9, r1.x add.f r1.w, r0.x, c14.x bary.f r2.x, 14, r1.x -mul.f r2.y, r0.w, r0.w -bary.f r2.z, 5, r1.x +bary.f r2.y, 15, r1.x +add.f r2.z, r1.z, c14.x floor.f r2.w, r1.w rcp r0.y, r0.y add.f r0.z, r0.z, c13.y -add.f r3.x, r1.z, c14.x -mad.f32 r2.y, r2.z, r2.z, r2.y +mul.f r3.x, r0.w, r0.w +bary.f r3.y, 5, r1.x add.f r1.w, r1.w, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -floor.f r0.z, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w +absneg.f r0.z, (neg)c9.x +mad.f32 r2.w, r3.y, r3.y, r3.x +mov.f32f32 r3.x, r1.w +bary.f r3.z, 6, r1.x +mul.f r0.z, r0.z, c9.x +floor.f r3.w, r2.z +mul.f r4.x, c13.x, r3.x +mad.f32 r2.w, r3.z, r3.z, r2.w +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.w, (neg)c9.x -bary.f r3.y, 6, r1.x -mul.f r3.z, c13.x, r1.w -add.f r0.z, r3.x, (neg)r0.z -mul.f r2.w, r2.w, c9.x -mad.f32 r2.y, r3.y, r3.y, r2.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.z, r0.z -mul.f r2.w, r2.w, r0.y -mov.f32f32 r2.x, r2.x -add.f r0.x, r0.x, (neg)r3.x -mul.f r3.x, c13.x, r0.z -mov.f32f32 r2.w, r2.w -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mul.f r0.y, r2.w, r0.y -mul.f r0.w, r0.w, r2.y -add.f r2.w, c14.x, r0.x -add.f r0.x, c14.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.w, r2.w -bary.f r3.z, 16, r1.x +add.f r0.x, r0.x, (neg)r4.x +add.f r2.z, r2.z, (neg)r3.w +add.f r3.x, c14.y, (neg)r3.x +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +rsq r2.w, r2.w +(ss)mov.f32f32 r3.w, r2.w +mov.f32f32 r4.x, r2.z mul.f r0.y, r0.y, c15.x -mov.f32f32 r0.x, r0.x -mul.f r2.w, r2.w, c3.z -mul.f r0.w, r0.w, r3.z -mul.f r2.z, r2.z, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r2.w -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.z, r2.z -bary.f r3.w, 17, r1.x -mov.f32f32 r4.x, r3.z -add.f r1.z, r1.z, (neg)r3.x +add.f r0.z, c14.x, r0.z +mul.f r0.w, r0.w, r3.w +bary.f r4.y, 16, r1.x +mul.f r4.z, c13.x, r4.x +mul.f r4.w, r0.z, c3.z +add.f r0.x, c14.z, r0.x +mul.f r0.z, r0.w, r4.y exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r0.w, r2.z, r3.w, r0.w -mov.f32f32 r2.z, r0.x -mov.f32f32 r1.z, r1.z -add.f r3.x, c16.y, (neg)r0.y -mov.f32f32 r0.w, r0.w -mul.f r2.y, r3.y, r2.y -add.f r3.y, c14.x, r1.z -mul.f r3.x, r3.x, c9.y -mul.f r0.y, r0.y, c13.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.y, r3.y -bary.f r3.z, 18, r1.x -add.f r0.y, r0.y, r3.x -mov.f32f32 r2.z, r2.z -mul.f r3.x, r3.y, c3.w -mad.f32 r0.w, r2.y, r3.z, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r2.z -mov.f32f32 r2.y, r3.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -add.f r1.z, c14.z, r1.z -mov.f32f32 r4.y, r2.y -bary.f r2.y, 10, r1.x -max.f r0.y, r0.y, c13.y -max.f r0.w, c13.y, r0.w -mov.f32f32 r1.z, r1.z -add.f r2.y, r2.y, c13.w -min.f r0.y, r0.y, c13.z -mov.f32f32 r0.w, r0.w -mul.f r1.z, r1.z, c3.w -mov.f32f32 r2.z, r2.y -add.f r3.z, c16.y, (neg)r0.y -add.f r3.w, c16.y, (neg)r0.y -add.f r4.w, c16.y, (neg)r0.y -mov.f32f32 r4.z, r2.z -mul.f r2.z, r3.z, c8.z -mul.f r5.x, r3.w, c8.y -mul.f r4.w, r4.w, c8.x -log2 r0.w, r0.w -(ss)mul.f r0.w, c10.x, r0.w -mov.f32f32 r3.z, r1.z -mov.f32f32 r0.x, r0.x -sam.s (f32)(x)r3.w, r4.x, s#2, t#2 -(sy)mov.f32f32 r3.w, r3.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.z, r3.z -(ss)mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r3.w -add.f r3.w, c14.y, (neg)r1.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r4.z, r2.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r5.y, r3.w -add.f r3.w, c14.y, (neg)r0.z -exp2 r0.w, r0.w -mov.f32f32 r4.y, c7.y -mov.f32f32 r5.z, c7.x -mov.f32f32 r5.w, c7.z -mov.f32f32 r6.x, r3.w -mul.f r3.w, r4.y, c11.y -mul.f r4.y, r5.z, c11.x -mul.f r5.z, r5.w, c11.z -mul.f r5.w, r5.y, r6.x -(ss)mul.f r6.y, r3.w, r0.w -mul.f r6.z, r4.y, r0.w -(ss)mul.f r0.w, r5.z, r0.w -mul.f r0.x, r5.w, r0.x -mov.f32f32 r4.y, r3.x -mov.f32f32 r3.x, r2.y -mov.f32f32 r5.z, r2.x -bary.f r2.x, 15, r1.x -mov.f32f32 r3.w, r4.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -add.f r1.w, r1.w, c13.z -add.f r0.z, r0.z, c13.z -sam.s (f32)(x)r3.x, r3.y, s#2, t#2 -(sy)mov.f32f32 r3.x, r3.x -(ss)nop -sam.s (f32)(x)r3.y, r4.x, s#2, t#2 -(sy)mov.f32f32 r3.y, r3.y -mul.f r3.z, r1.w, r6.x -mov.f32f32 r5.w, r2.x -mov.f32f32 r3.w, r2.w -(ss)mov.f32f32 r4.x, r1.z -mad.f32 r0.x, r3.z, r3.y, r0.x -mov.f32f32 r1.z, r2.y -mul.f r2.x, r5.y, r0.z -bary.f r2.y, 12, r1.x -mov.f32f32 r0.x, r0.x +(ss)mov.f32f32 r0.w, r0.y +mov.f32f32 r5.z, r4.w +add.f r1.z, r1.z, (neg)r4.z +mul.f r3.y, r3.y, r3.w +bary.f r3.w, 17, r1.x +add.f r0.w, c16.y, (neg)r0.w mov.f32f32 r4.y, r1.z -sam (f32)(xyzw)r5.y, r5.z, s#0, t#0 -(sy)add.f r1.z, c16.y, (neg)r6.x -add.f r2.w, c16.y, (neg)r6.x -add.f r3.y, c16.y, (neg)r6.x -mov.f32f32 r2.y, r2.y -mul.f r1.z, r1.z, c4.x -mul.f r0.z, r1.w, r0.z -sam.s (f32)(x)r1.w, r3.w, s#2, t#2 -(sy)mov.f32f32 r1.w, r1.w -mul.f r2.w, r2.w, c4.z -mul.f r3.y, r3.y, c4.y -mul.f r3.z, r6.x, c13.z -mad.f32 r0.x, r2.x, r1.w, r0.x -mul.f r1.w, r6.x, c13.z -mul.f r2.x, r6.x, c13.z -add.f r1.z, r3.z, r1.z -mov.f32f32 r0.x, r0.x -add.f r1.w, r1.w, r2.w -mad.f32 r0.x, r0.z, r3.x, r0.x -add.f r0.z, r2.x, r3.y -mul.f r1.z, r5.y, r1.z -mul.f r1.w, r5.w, r1.w -mov.f32f32 r0.x, r0.x -mul.f r0.z, r5.z, r0.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mul.f r0.x, c14.w, r0.x -bary.f r2.x, 2, r1.x -mov.f32f32 r0.z, r0.z -bary.f r2.w, 0, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.w, r1.w, r2.x, r0.w -bary.f r2.x, 1, r1.x -mad.f32 r2.w, r1.z, r2.w, r6.z -mov.f32f32 r3.x, r2.y -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, r0.z, r2.x, r6.y -mov.f32f32 r2.y, r2.w -bary.f (ei)r1.x, 13, r1.x -mul.f r0.w, r0.w, r0.x -mov.f32f32 r1.y, r2.x -mul.f r2.x, r2.y, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mul.f r0.x, r1.y, r0.x -mad.f32 r0.w, c6.z, r1.w, r0.w -mov.f32f32 r1.y, r2.x -mov.f32f32 r3.y, r1.x -mov.f32f32 r1.x, c13.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mad.f32 r1.y, c6.x, r1.z, r1.y -mad.f32 r0.x, c6.y, r0.z, r0.x -mul.f r0.z, r0.y, r0.w -sam (f32)(w)r1.z, r3.x, s#1, t#1 -(sy)cmps.f.lt r0.w, r2.y, c15.y -mov.f32f32 r1.z, r2.y -mov.f32f32 r3.z, r1.x -add.f r0.z, r0.z, r2.z -cov.u32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r1.y, r1.z -cmps.f.ne r0.w, r0.w, c13.y -mov.f32f32 r1.z, c13.y -mul.f r0.x, r0.y, r0.x -mul.f r0.y, r0.y, r1.x -nop -sel.b32 r0.w, r1.z, r0.w, r1.y -add.f r0.x, r0.x, r5.x -add.f r0.y, r0.y, r4.w +mul.f r6.y, r0.x, c3.z +add.f r0.x, c14.z, r1.z +mad.f32 r0.z, r3.y, r3.w, r0.z +add.f r1.z, c14.x, r4.y +mul.f r0.w, r0.w, c9.y +(ss)mul.f r0.y, r0.y, c13.z +mul.f r2.w, r3.z, r2.w +mul.f r3.z, r1.z, c3.w +bary.f r1.z, 18, r1.x +add.f r0.y, r0.y, r0.w +mov.f32f32 r3.y, r6.y +mov.f32f32 r5.w, r3.z +bary.f r0.w, 10, r1.x +max.f r0.y, r0.y, c13.y +mad.f32 r0.z, r2.w, r1.z, r0.z +mul.f r6.z, r0.x, c3.w +add.f r6.w, r0.w, c13.w +min.f r0.x, r0.y, c13.z +max.f r0.y, c13.y, r0.z +mov.f32f32 r5.x, r6.z +mov.f32f32 r6.x, r6.w +add.f r0.z, c16.y, (neg)r0.x +add.f r0.w, c16.y, (neg)r0.x +add.f r1.z, c16.y, (neg)r0.x +mov.f32f32 r3.w, r6.w +mov.f32f32 r5.y, r6.w +log2 r0.y, r0.y +mul.f r0.z, r0.z, c8.z +sam.s (f32)(x)r7.x, r5.z, s#2, t#2 +mov.f32f32 r2.w, r3.x +add.f r4.x, c14.y, (neg)r4.x +mul.f r0.w, r0.w, c8.y +mul.f r1.z, r1.z, c8.x +(ss)mul.f r0.y, c10.x, r0.y +mov.f32f32 r4.y, r4.x nop -mul.f r0.z, r0.z, r0.w -mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w +sam.s (f32)(x)r7.y, r3.y, s#2, t#2 +sam.s (f32)(x)r4.z, r4.w, s#2, t#2 nop -mul.f r0.z, r0.z, c5.z -mul.f r0.x, r0.x, c5.y -mul.f r0.y, r0.y, c5.x +(ss)nop +sam.s (f32)(x)r4.w, r6.y, s#2, t#2 +add.f r1.w, r1.w, c13.z +mul.f r2.w, r2.w, r4.y +add.f r2.z, r2.z, c13.z +sam (f32)(xyzw)r5.x, r2.x, s#0, t#0 +exp2 r0.y, r0.y +(ss)mov.f32f32 r2.x, c7.y +mov.f32f32 r2.y, c7.z +(sy)mul.f r2.w, r2.w, r7.x +mul.f r3.y, r1.w, r4.x +mul.f r2.x, r2.x, c11.y +mov.f32f32 r3.z, c7.x +mul.f r2.y, r2.y, c11.z +mad.f32 r2.w, r3.y, r7.y, r2.w +mul.f r3.x, r3.x, r2.z +(ss)mul.f r2.x, r2.x, r0.y +mul.f r3.y, r3.z, c11.x +mul.f r2.y, r2.y, r0.y +mad.f32 r2.w, r3.x, r4.z, r2.w +mul.f r1.w, r1.w, r2.z +add.f r2.z, c16.y, (neg)r5.w +mul.f r0.y, r3.y, r0.y +add.f r3.x, c16.y, (neg)r5.w +mad.f32 r1.w, r1.w, r4.w, r2.w +mul.f r2.z, r2.z, c4.y +add.f r2.w, c16.y, (neg)r5.w +mul.f r3.x, r3.x, c4.z +mul.f r1.w, c14.w, r1.w +mul.f r3.y, r5.w, c13.z +mul.f r3.z, r5.w, c13.z +mul.f r2.w, r2.w, c4.x +mov.f32f32 r3.w, r1.w +add.f r3.x, r3.y, r3.x +add.f r2.z, r3.z, r2.z +mul.f r3.y, r5.w, c13.z +bary.f r4.x, 12, r1.x +mul.f r3.x, r5.z, r3.x +mul.f r2.z, r5.y, r2.z +add.f r2.w, r3.y, r2.w +bary.f r4.y, 13, r1.x +mov.f32f32 r3.y, r3.x +bary.f r3.z, 2, r1.x +mov.f32f32 r4.z, r2.z +bary.f r4.w, 1, r1.x +mul.f r5.x, r5.x, r2.w +mad.f32 r2.y, r3.y, r3.z, r2.y +sam (f32)(w)r5.y, r4.x, s#1, t#1 +(sy)cmps.f.lt r3.y, r6.x, c15.y +mov.f32f32 r2.w, c13.z +mov.f32f32 r3.z, c13.y +mul.f r2.y, r2.y, r3.w +mad.f32 r2.x, r4.z, r4.w, r2.x +mad.f32 r2.y, c6.z, r3.x, r2.y +mov.f32f32 r3.x, r5.x +bary.f (ei)r1.x, 0, r1.x +mul.f r1.y, r2.x, r3.w +mul.f r2.x, r0.x, r2.y +mad.f32 r1.y, c6.y, r2.z, r1.y +mad.f32 r0.y, r3.x, r1.x, r0.y +cov.u32f32 r1.x, r3.y +add.f r0.z, r2.x, r0.z +mul.f r1.y, r0.x, r1.y +mul.f r0.y, r0.y, r1.w +cmps.f.ne r1.x, r1.x, c13.y +mad.f32 r0.y, c6.x, r5.x, r0.y +add.f r0.w, r1.y, r0.w nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +sel.b32 r1.x, r3.z, r1.x, r6.x +mul.f r0.x, r0.x, r0.y +(rpt1)nop +mul.f r0.y, r0.z, r1.x +mul.f r0.z, r0.w, r1.x +(rpt1)nop +mul.f r2.z, r0.y, c5.z +mul.f r2.y, r0.z, c5.y +add.f r0.x, r0.x, r1.z +(rpt2)nop +mul.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r2.x, r0.x, c5.x +end nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y nop -(ss)mov.f32f32 r3.y, r0.z -mov.f32f32 r3.x, r0.x -mov.f32f32 r2.w, r0.y -end -; FRAG: outputs: r2.w (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) -; FRAG: 244 instructions, 0 half, 7 full -; pos (bary): r1.x -; color: r2.w -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1) +; FRAG: 176 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm index 9f1027e..e7bcae3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm @@ -6,186 +6,139 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -@out(r7.y) out4 -@out(r7.z) out5 -@out(r7.w) out6 -@out(r8.x) out7 +@in(r4.z) in8 +@in(r4.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 @out(r2.x) out8 @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -@out(r8.y) out12 -@out(r8.z) out13 -@out(r8.w) out14 -@out(r9.x) out15 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 @out(r4.x) out16 @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 -@out(r6.y) out20 -@out(r6.z) out21 -@out(r6.w) out22 -@out(r7.x) out23 -(sy)(ss)mul.f r2.x, c12.x, r0.x -mul.f r2.y, c12.x, r0.w -mad.f32 r2.x, c13.x, r0.y, r2.x -mad.f32 r2.y, c13.x, r1.x, r2.y -mad.f32 r2.x, c14.x, r0.z, r2.x -mul.f r2.z, c12.z, r0.x -mul.f r2.w, c12.z, r0.w -mul.f r0.w, c12.y, r0.w -add.f r2.x, r2.x, c15.x -mov.f32f32 r2.y, r2.y -mad.f32 r2.z, c13.z, r0.y, r2.z -mad.f32 r2.w, c13.z, r1.x, r2.w -add.f r3.x, c4.x, (neg)r2.x -mad.f32 r2.y, c14.x, r1.y, r2.y -mul.f r3.y, c8.w, r2.x -mul.f r3.z, c8.z, r2.x -mul.f r3.w, r3.x, r3.x -mul.f r4.x, c12.y, r0.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.x, c13.y, r0.y, r4.x -absneg.f r4.y, (neg)c5.x -mad.f32 r4.x, c14.y, r0.z, r4.x -mul.f r4.z, c8.y, r2.x -mul.f r4.w, c8.x, r2.x -mul.f r5.x, r2.y, r4.y -add.f r4.x, r4.x, c15.y -mad.f32 r0.w, c13.y, r1.x, r0.w -mul.f r1.x, c0.w, r2.x -mul.f r5.y, c0.z, r2.x -add.f r5.z, c4.y, (neg)r4.x -mov.f32f32 r0.w, r0.w -mad.f32 r3.y, c9.w, r4.x, r3.y -mad.f32 r3.z, c9.z, r4.x, r3.z -mad.f32 r3.w, r5.z, r5.z, r3.w -mad.f32 r0.w, c14.y, r1.y, r0.w -mad.f32 r2.z, c14.z, r0.z, r2.z -mad.f32 r4.z, c9.y, r4.x, r4.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.w, r0.w -add.f r2.z, r2.z, c15.z -absneg.f r5.w, (neg)c5.y -mad.f32 r4.w, c9.x, r4.x, r4.w -mad.f32 r1.x, c1.w, r4.x, r1.x -add.f r6.x, c4.z, (neg)r2.z -mad.f32 r5.x, r0.w, r5.w, r5.x -mad.f32 r3.y, c10.w, r2.z, r3.y -mad.f32 r3.z, c10.z, r2.z, r3.z -mad.f32 r3.w, r6.x, r6.x, r3.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.w, r2.w +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r1.z, c12.x, r0.x +mul.f r1.w, c12.x, r0.w +mad.f32 r1.z, c13.x, r0.y, r1.z +mad.f32 r1.w, c13.x, r1.x, r1.w +mad.f32 r1.z, c14.x, r0.z, r1.z +mad.f32 r1.w, c14.x, r1.y, r1.w +absneg.f r2.x, (neg)c5.x +mul.f r2.y, c12.y, r0.x +add.f r2.z, r1.z, c15.x +mad.f32 r1.z, c13.y, r0.y, r2.y +mul.f r2.y, c12.z, r0.x +mul.f r2.w, r1.w, r2.x +add.f r3.x, c4.x, (neg)r2.z +mul.f r3.y, c12.y, r0.w +mul.f r3.z, c8.y, r2.z +mul.f r3.w, c8.x, r2.z +mul.f r4.x, r3.x, r3.x +mad.f32 r1.z, c14.y, r0.z, r1.z +mad.f32 r3.y, c13.y, r1.x, r3.y +mul.f r4.y, c8.w, r2.z +mul.f r5.x, c8.z, r2.z +add.f r5.w, r1.z, c15.y +mad.f32 r6.x, c14.y, r1.y, r3.y +absneg.f r1.z, (neg)c5.y +mul.f r3.y, c0.w, r2.z +add.f r5.y, c4.y, (neg)r5.w +mad.f32 r3.z, c9.y, r5.w, r3.z +mad.f32 r3.w, c9.x, r5.w, r3.w +mad.f32 r2.w, r6.x, r1.z, r2.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r2.y, c13.z, r0.y, r2.y +mul.f r0.w, c12.z, r0.w +mad.f32 r2.y, c14.z, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.x, r0.w +mad.f32 r1.x, c9.w, r5.w, r4.y +mad.f32 r4.y, c9.z, r5.w, r5.x +add.f r2.y, r2.y, c15.z +mad.f32 r6.y, c14.z, r1.y, r0.w +absneg.f r0.w, (neg)c5.z +mad.f32 r1.y, c1.w, r5.w, r3.y +add.f r3.y, c4.z, (neg)r2.y +mad.f32 r3.z, c10.y, r2.y, r3.z +mad.f32 r3.w, c10.x, r2.y, r3.w +mad.f32 r2.w, r6.y, r0.w, r2.w +mad.f32 r4.x, r3.y, r3.y, r4.x mul.f r0.x, c12.w, r0.x -mad.f32 r4.z, c10.y, r2.z, r4.z -mad.f32 r4.w, c10.x, r2.z, r4.w -mad.f32 r1.y, c14.z, r1.y, r2.w -rsq r2.w, r3.w -(ss)mov.f32f32 r2.w, r2.w -(ss)absneg.f r3.w, (neg)c5.z +mad.f32 r5.x, c10.w, r2.y, r1.x +mad.f32 r4.y, c10.z, r2.y, r4.y mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r0.y, r1.y -mad.f32 r1.y, r3.x, r2.w, r4.y -mad.f32 r3.x, r5.z, r2.w, r5.w -mad.f32 r2.w, r6.x, r2.w, r3.w -mad.f32 r3.w, r0.y, r3.w, r5.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.w, r3.w -mul.f r4.y, r1.y, r1.y +mad.f32 r0.y, c2.w, r2.y, r1.y +mul.f r6.z, c0.z, r2.z +rsq r1.x, r4.x +(ss)mov.f32f32 r1.y, r1.x +mad.f32 r2.x, r3.x, r1.x, r2.x +max.f r1.x, c17.x, r2.w mad.f32 r0.x, c14.w, r0.z, r0.x -mad.f32 r0.z, r3.x, r3.x, r4.y -max.f r3.w, c17.x, r3.w -mad.f32 r1.x, c2.w, r2.z, r1.x -mad.f32 r4.y, c1.z, r4.x, r5.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r3.w -mad.f32 r0.z, r2.w, r2.w, r0.z +mad.f32 r0.z, r5.y, r1.y, r1.z +mov.f32f32 r1.z, r2.x +mad.f32 r0.w, r3.y, r1.y, r0.w +nop +mov.f32f32 r2.w, r0.z +mul.f r1.y, r1.z, r1.z +(ss)mov.f32f32 r4.x, r0.w +mov.f32f32 r3.x, r1.x +mad.f32 r0.z, r0.z, r2.w, r1.y add.f r0.x, r0.x, c15.w -mad.f32 r4.y, c2.z, r2.z, r4.y -mul.f r5.x, c0.y, r2.x -mul.f r5.y, c0.x, r2.x -mad.f32 r5.z, c7.x, r2.z, c7.y -mad.f32 r2.x, c7.x, r2.x, c7.y +mad.f32 r0.z, r0.w, r4.x, r0.z +mul.f r1.z, r3.x, c6.z +mul.f r1.y, r3.x, c6.y +mad.f32 r0.w, c11.y, r0.x, r3.z +mad.f32 r3.x, c11.x, r0.x, r3.w +mul.f r1.x, r1.x, c6.x +mad.f32 r3.w, c11.w, r0.x, r5.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mul.f r5.w, r3.w, c6.z -mul.f r6.x, r3.w, c6.y -mul.f r3.w, r3.w, c6.x -mul.f r2.w, r2.w, r0.z -mul.f r3.x, r3.x, r0.z -mul.f r0.z, r1.y, r0.z -mov.f32f32 r1.y, r5.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r6.w, r2.w -mov.f32f32 r6.z, r3.x -mov.f32f32 r6.y, r0.z -mov.f32f32 r7.w, r1.y -mov.f32f32 r0.z, r6.x -mov.f32f32 r1.y, r3.w -(rpt1)nop -mov.f32f32 r7.z, r0.z -mov.f32f32 r7.y, r1.y -mad.f32 r0.z, c11.w, r0.x, r3.y -mad.f32 r1.y, c11.z, r0.x, r3.z -mad.f32 r2.w, c11.y, r0.x, r4.z -mad.f32 r3.x, c11.x, r0.x, r4.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mul.f r2.w, r2.w, c16.y +(ss)mov.f32f32 r3.z, r0.z +mul.f r5.x, r2.x, r0.z +mul.f r3.y, r0.w, c16.y mul.f r3.x, r3.x, c16.x -mov.f32f32 r9.x, r0.z -mov.f32f32 r8.w, r1.y -mov.f32f32 r8.z, r2.w -mov.f32f32 r8.y, r3.x -mad.f32 r0.z, c3.w, r0.x, r1.x -mad.f32 r1.x, c3.z, r0.x, r4.y -mad.f32 r1.y, c1.y, r4.x, r5.x -mad.f32 r2.w, c1.x, r4.x, r5.y -mov.f32f32 r3.w, r0.z -mov.f32f32 r3.z, r1.x -mad.f32 r0.z, c2.y, r2.z, r1.y -mad.f32 r1.x, c2.x, r2.z, r2.w -mad.f32 r0.z, c3.y, r0.x, r0.z -mad.f32 r0.x, c3.x, r0.x, r1.x -mov.f32f32 r1.x, r5.z -mov.f32f32 r1.y, r2.x -mov.f32f32 r3.y, r0.z -mov.f32f32 r3.x, r0.x -mov.f32f32 r4.y, r1.x -mov.f32f32 r4.x, r1.y -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r2.y -nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.x, (0.000000) -mov.f32f32 r0.y, r1.w -mov.f32f32 r0.z, r1.z -mov.f32f32 r0.w, (0.000000) -mov.f32f32 r7.x, r0.x -mov.f32f32 r4.w, r0.y -mov.f32f32 r4.z, r0.z -mov.f32f32 r2.w, r0.w -mov.f32f32 r0.x, (0.000000) -(rpt2)nop -mov.f32f32 r8.x, r0.x +mul.f r5.z, r4.x, r3.z +mul.f r5.y, r2.w, r3.z +mad.f32 r3.z, c11.z, r0.x, r4.y +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c1.z, r5.w, r6.z +(ss)mul.f r0.z, c0.y, r2.z +mad.f32 r0.y, c2.z, r2.y, r0.y +mad.f32 r2.x, c1.y, r5.w, r0.z +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c2.y, r2.y, r2.x +mul.f r2.x, c0.x, r2.z +mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r2.x, c1.x, r5.w, r2.x +mad.f32 r4.y, c7.x, r2.y, c7.y +mad.f32 r2.x, c2.x, r2.y, r2.x +mad.f32 r4.x, c7.x, r2.z, c7.y +mad.f32 r0.x, c3.x, r0.x, r2.x +mov.f32f32 r2.z, r6.y +mov.f32f32 r2.y, r6.x +mov.f32f32 r2.x, r1.w +mov.f32f32 r5.w, (0.000000) +mov.f32f32 r2.w, (0.000000) +mov.f32f32 r1.w, (0.000000) end nop -; VERT: outputs: r3.x (0:0) r7.y (5:9) r2.x (5:10) r8.y (5:11) r4.x (5:12) r6.y (5:13) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 154 instructions, 0 half, 10 full -; pos: r3.x +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0) +; VERT: 102 instructions, 0 half, 7 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm index 9303ad4..227a081 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm @@ -4,206 +4,139 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r1.z) out0 -@out(r1.w) out1 -@out(r2.x) out2 -@out(r2.y) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 8, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 10, r1.x -bary.f r2.y, 6, r1.x -add.f r2.z, r0.w, c10.x -floor.f r2.w, r1.w +add.f r2.x, r0.x, c10.x +bary.f r1.w, 9, r1.x +add.f r2.y, r0.w, c10.x +bary.f r2.z, 6, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y -mov.f32f32 r1.z, r1.z -floor.f r3.x, r2.z -add.f r1.w, r1.w, (neg)r2.w +floor.f r3.x, r2.y +add.f r3.w, r2.z, c9.w +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -mov.f32f32 r3.y, r1.z -add.f r0.z, r2.z, (neg)r3.x -mov.f32f32 r1.z, r1.w +absneg.f r0.z, (neg)c7.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c9.z +mul.f r0.z, r0.z, c7.x +sam (f32)(w)r4.x, r1.z, s#1, t#1 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c9.x, r2.z +add.f r2.z, c10.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r1.w, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -add.f r2.w, c10.y, (neg)r1.z -mul.f r1.w, r1.w, c7.x -bary.f r3.x, 9, r1.x -mov.f32f32 r2.z, r2.z -mul.f r3.z, c9.x, r0.z -mul.f r1.w, r1.w, r0.y -mov.f32f32 r2.w, r2.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r1.w, r1.w -add.f r3.z, c10.y, (neg)r0.z -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r2.z -mul.f r0.y, r1.w, r0.y -mov.f32f32 r1.w, r3.z -add.f r2.z, c10.x, r0.x +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c9.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c10.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c11.x -add.f r3.z, c10.z, r0.w -mul.f r2.z, r2.z, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r2.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -add.f r0.w, c10.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r4.x -mov.f32f32 r5.x, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c12.y, (neg)r0.y -mov.f32f32 r5.w, r3.w -mul.f r3.z, r3.z, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c7.y -mul.f r0.y, r0.y, c9.z -mov.f32f32 r3.w, r3.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.z, r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r4.z, r4.x -add.f r2.y, r2.y, c9.w -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r3.w -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r4.w, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r2.y -mov.f32f32 r6.w, r2.z -mov.f32f32 r6.y, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r5.z, r0.y -sam.s (f32)(x)r0.y, r4.y, s#2, t#2 -(sy)mov.f32f32 r0.y, r0.y +add.f r0.z, c10.x, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c3.z +add.f r0.x, c10.z, r0.w +mul.f r4.x, r0.z, c3.z +add.f r0.z, c10.x, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c3.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c12.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#2, t#2 +add.f r0.z, c10.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#2, t#2 +mul.f r0.x, r0.x, c9.z +add.f r0.w, r2.y, c9.z +mul.f r0.y, r0.y, c7.y +(ss)nop +sam.s (f32)(x)r5.x, r5.w, s#2, t#2 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#2, t#2 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c11.y +bary.f r2.x, 10, r1.x +mul.f r0.y, r0.y, r5.x max.f r0.x, r0.x, c9.y -mov.f32f32 r7.x, r0.w -mul.f r0.w, r2.w, r1.w -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y min.f r0.x, r0.x, c9.z -sam.s (f32)(x)r2.y, r5.w, s#2, t#2 -nop -(sy)mov.f32f32 r2.y, r2.y -mul.f r0.y, r0.w, r0.y -sam.s (f32)(x)r0.w, r5.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r0.w -add.f r1.z, r1.z, c9.z -add.f r2.z, c12.y, (neg)r0.x -add.f r3.z, c12.y, (neg)r0.x -add.f r3.w, c12.y, (neg)r0.x -mul.f r1.w, r1.z, r1.w -mul.f r2.z, r2.z, c6.z -mul.f r4.x, r3.z, c6.y -mul.f r3.w, r3.w, c6.x -mad.f32 r0.y, r1.w, r0.w, r0.y -sam.s (f32)(x)r0.w, r6.z, s#2, t#2 -add.f r0.z, r0.z, c9.z -(sy)mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r3.x -mov.f32f32 r0.y, r0.y -mul.f r2.w, r2.w, r0.z -mul.f r0.z, r1.z, r0.z -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.z, r2.x -mad.f32 r0.y, r2.w, r0.w, r0.y -bary.f r0.w, 11, r1.x -mov.f32f32 r1.w, c9.z -bary.f r2.x, 2, r1.x -mov.f32f32 r0.y, r0.y -sam (f32)(w)r2.w, r3.y, s#1, t#1 -(sy)cmps.f.lt r2.w, r3.z, c11.y -mad.f32 r0.y, r0.z, r2.y, r0.y -mov.f32f32 r0.z, r3.z -mov.f32f32 r3.x, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -cov.u32f32 r1.z, r2.w -mov.f32f32 r0.z, r0.z -(ss)mov.f32f32 r3.y, r0.w +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt1)nop mul.f r0.y, c10.w, r0.y -cmps.f.ne r0.w, r1.z, c9.y +bary.f r2.y, 11, r1.x +add.f r0.w, c12.y, (neg)r0.x +add.f r1.z, c12.y, (neg)r0.x +mov.f32f32 r1.w, r0.y +add.f r2.z, c12.y, (neg)r0.x (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, c9.y -sam (f32)(xyz)r2.w, r3.x, s#0, t#0 -(sy)mul.f r2.x, r3.y, r2.x +sam (f32)(xyz)r2.w, r2.x, s#0, t#0 +(ss)bary.f r2.x, 2, r1.x bary.f r2.y, 1, r1.x bary.f (ei)r1.x, 0, r1.x -sel.b32 r0.z, r1.z, r0.w, r0.z -mul.f r0.w, r2.x, r0.y -mul.f r1.y, r3.x, r2.y +mul.f r0.w, r0.w, c6.z +(sy)mul.f r1.y, r3.y, r2.x +mul.f r2.x, r3.x, r2.y mul.f r1.x, r2.w, r1.x -mov.f32f32 r2.y, r1.w -mov.f32f32 r0.w, r0.w -mul.f r1.y, r1.y, r0.y -mad.f32 r0.w, c5.z, r3.y, r0.w +mul.f r1.z, r1.z, c6.y +mul.f r1.y, r1.y, r1.w +mul.f r1.w, r2.x, r1.w +mad.f32 r1.y, c5.z, r3.y, r1.y +mad.f32 r1.w, c5.y, r3.x, r1.w mul.f r0.y, r1.x, r0.y -(rpt1)nop -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, c5.y, r3.x, r1.x -mul.f r0.w, r0.x, r0.w +mul.f r1.x, r2.z, c6.x +mul.f r1.y, r0.x, r1.y +mul.f r1.w, r0.x, r1.w mad.f32 r0.y, c5.x, r2.w, r0.y -(rpt1)nop -add.f r0.w, r0.w, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -nop -mul.f r0.w, r0.w, r0.z -mul.f r1.x, r0.x, r1.x +mov.f32f32 r2.x, c9.y +add.f r0.w, r1.y, r0.w +add.f r1.y, r1.w, r1.z mul.f r0.x, r0.x, r0.y -nop -mul.f r0.y, r0.w, c4.z -add.f r0.w, r1.x, r4.x -add.f r0.x, r0.x, r3.w -nop -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r0.z -mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mul.f r0.z, r0.w, c4.y -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r2.x, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x +sel.b32 r0.y, r2.x, r0.z, r4.w +mov.f32f32 r2.w, c9.z (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x +mul.f r0.z, r0.w, r0.y +mul.f r0.w, r1.y, r0.y (rpt1)nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.x +mul.f r2.z, r0.z, c4.z +mul.f r2.y, r0.w, c4.y +add.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r0.x, r0.x, r0.y +(rpt2)nop +mul.f r2.x, r0.x, c4.x end nop nop nop -; FRAG: outputs: r1.z (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1) -; FRAG: 194 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r1.z -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.z (5:11,cm=f,il=16,b=1) +; FRAG: 129 instructions, 0 half, 7 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm index 0e4d5ee..9c8ac11 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm @@ -6,134 +6,99 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r6.y) out0 -@out(r6.z) out1 -@out(r6.w) out2 -@out(r7.x) out3 -@out(r5.y) out4 -@out(r5.z) out5 -@out(r5.w) out6 -@out(r6.x) out7 -@out(r2.w) out8 -@out(r3.x) out9 -@out(r3.y) out10 -@out(r3.z) out11 -@out(r3.w) out12 -@out(r4.x) out13 -@out(r4.y) out14 -@out(r4.z) out15 -(sy)(ss)mul.f r2.x, c11.x, r0.w -mul.f r2.y, c11.x, r0.x -mad.f32 r2.x, c12.x, r1.x, r2.x -mad.f32 r2.y, c12.x, r0.y, r2.y -mul.f r2.z, c11.z, r0.x -mad.f32 r2.y, c13.x, r0.z, r2.y -mov.f32f32 r2.x, r2.x -mad.f32 r2.z, c12.z, r0.y, r2.z -mad.f32 r2.x, c13.x, r1.y, r2.x -add.f r2.y, r2.y, c14.x -mad.f32 r2.z, c13.z, r0.z, r2.z -mul.f r2.w, c11.y, r0.w -mov.f32f32 r2.x, r2.x -mul.f r3.x, c7.w, r2.y -mul.f r3.y, c7.z, r2.y -mul.f r3.z, c7.y, r2.y -mul.f r2.x, r2.x, (neg)c4.x -mad.f32 r2.w, c12.y, r1.x, r2.w -mul.f r3.w, c11.y, r0.x -mul.f r4.x, c7.x, r2.y -mad.f32 r3.w, c12.y, r0.y, r3.w -mov.f32f32 r2.w, r2.w -mad.f32 r3.w, c13.y, r0.z, r3.w -mad.f32 r2.w, c13.y, r1.y, r2.w -mul.f r4.y, c0.w, r2.y -mul.f r4.z, c0.z, r2.y -mul.f r4.w, c0.y, r2.y -mov.f32f32 r2.w, r2.w -add.f r3.w, r3.w, c14.y -mul.f r5.x, c0.x, r2.y -add.f r2.z, r2.z, c14.z -mad.f32 r2.x, (neg)c4.y, r2.w, r2.x -mad.f32 r2.w, c8.w, r3.w, r3.x -mad.f32 r3.x, c8.z, r3.w, r3.y -mad.f32 r3.y, c8.y, r3.w, r3.z -mov.f32f32 r2.x, r2.x +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r1.z, c11.x, r0.w +mul.f r1.w, c11.x, r0.x +mad.f32 r1.z, c12.x, r1.x, r1.z +mad.f32 r1.w, c12.x, r0.y, r1.w +mad.f32 r1.z, c13.x, r1.y, r1.z +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x +mul.f r1.z, r1.z, (neg)c4.x +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.w, c9.w, r2.z, r2.w +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r2.z, r3.x -mad.f32 r3.x, c9.y, r2.z, r3.y -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.w, r4.x -mad.f32 r0.w, c1.w, r3.w, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r2.z, r0.z -mad.f32 r0.w, c2.w, r2.z, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r2.x -mad.f32 r1.y, c10.w, r0.x, r2.w -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r2.x, c10.y, r0.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r2.x, r2.x, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r3.z, r1.y -mov.f32f32 r3.y, r1.x -mov.f32f32 r3.x, r2.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r1.x, c1.z, r3.w, r4.z -mul.f r1.y, r0.y, c5.z -mul.f r2.x, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.w, r1.y -mov.f32f32 r5.z, r2.x -mov.f32f32 r5.y, r0.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r7.x, r0.w -mad.f32 r0.y, c2.z, r2.z, r1.x -mad.f32 r0.z, c1.y, r3.w, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r0.z, c2.y, r2.z, r0.z -mad.f32 r0.w, c1.x, r3.w, r5.x -mad.f32 r1.x, c6.x, r2.z, c6.y -mov.f32f32 r6.w, r0.y -mad.f32 r0.y, c3.y, r0.x, r0.z -mad.f32 r0.z, c2.x, r2.z, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r1.x, c6.x, r2.y, c6.y -mov.f32f32 r6.z, r0.y -mad.f32 r0.x, c3.x, r0.x, r0.z -mov.f32f32 r4.x, r0.w -mov.f32f32 r0.y, r1.x -nop -mov.f32f32 r6.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.w, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r0.z, (0.000000) -mov.f32f32 r4.z, r0.x -nop -mov.f32f32 r4.y, r0.y -mov.f32f32 r6.x, r0.z +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop -; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 8 full -; pos: r6.y +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm index 60492a1..9e235d2 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm @@ -4,710 +4,481 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -(sy)(ss)bary.f r0.x, 7, r1.x -bary.f r0.y, 8, r1.x +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x43160000, 0x3bdb8bac +@const(c15.x) 0x41000000, 0x3f600000, 0x3e000000, 0x3f233333 +@const(c16.x) 0x40000000, 0xbf800000, 0xbb449ba6, 0xbf000000 +@const(c17.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x3fb8aa65 +@const(c18.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)bary.f r0.x, 20, r1.x +bary.f r0.y, 7, r1.x +bary.f r1.z, 8, r1.x add.f r0.w, r0.w, c14.y -bary.f r1.z, 9, r1.x -mov.f32f32 r0.x, r0.x -bary.f r1.w, 20, r1.x -add.f r2.x, r0.y, c16.w -add.f r2.y, r1.z, c16.w -bary.f r2.z, 18, r1.x -mul.f r2.w, r1.w, r0.x -bary.f r3.x, 15, r1.x -floor.f r3.y, r2.x +bary.f r1.w, 9, r1.x +mul.f r2.x, r0.x, r0.y +bary.f r2.y, 21, r1.x +bary.f r2.z, 15, r1.x +add.f r2.w, r1.z, c16.w +add.f r3.y, r1.w, c16.w rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.z, r2.y -mov.f32f32 r3.x, r3.x -bary.f r3.w, 21, r1.x -add.f r2.x, r2.x, (neg)r3.y +mad.f32 r2.x, r2.y, r2.z, r2.x +bary.f r3.z, 22, r1.x +bary.f r3.w, 3, r1.x +floor.f r4.x, r2.w (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.y, (neg)r3.z -mad.f32 r2.y, r3.w, r3.x, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -absneg.f r2.w, (neg)c11.x -mov.f32f32 r2.y, r2.y -bary.f r3.y, 3, r1.x -mul.f r3.z, c14.x, r2.x -mul.f r2.w, r2.w, c11.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.y, r3.y -bary.f r4.x, 22, r1.x -mov.f32f32 r3.z, r3.z -mul.f r2.w, r2.w, r0.z -mul.f r4.y, c14.x, r0.w -mad.f32 r2.y, r4.x, r3.y, r2.y -add.f r0.y, r0.y, (neg)r3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, r4.y -mov.f32f32 r2.y, r2.y -bary.f r4.y, 12, r1.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, r2.w, r0.z -add.f r1.z, r1.z, (neg)r3.z -mul.f r2.w, r1.w, r4.y -bary.f r3.z, 13, r1.x -add.f r4.z, c17.y, r0.y +(ss)absneg.f r0.w, (neg)c11.x +mad.f32 r2.x, r3.z, r3.w, r2.x +add.f r2.w, r2.w, (neg)r4.x +floor.f r4.x, r3.y +mul.f r0.w, r0.w, c11.x +mov.f32f32 r4.y, r2.x +bary.f r4.z, 12, r1.x +mov.f32f32 r4.w, r2.w +mul.f r0.w, r0.w, r0.z mov.f32f32 r0.z, r0.z -add.f r0.y, c17.x, r0.y -mad.f32 r2.w, r3.w, r3.z, r2.w -mov.f32f32 r4.z, r4.z +mul.f r5.x, r0.x, r4.z +bary.f r5.y, 13, r1.x +mul.f r5.z, c14.x, r4.w +mul.f r0.z, r0.w, r0.z +add.f r0.w, r3.y, (neg)r4.x +mad.f32 r3.y, r2.y, r5.y, r5.x +bary.f r4.x, 14, r1.x +add.f r1.z, r1.z, (neg)r5.z mul.f r0.z, r0.z, c17.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -bary.f r4.w, 14, r1.x -mul.f r4.z, r4.z, c5.z -mov.f32f32 r0.z, r0.z -mul.f r0.y, r0.y, c5.z -mad.f32 r2.w, r4.x, r4.w, r2.w -mov.f32f32 r5.x, r4.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.y, r0.y -mul.f r5.z, r2.w, r2.w -mov.f32f32 r5.w, r5.x -mad.f32 r5.x, r2.y, r2.y, r5.z -mov.f32f32 r1.z, r1.z +mov.f32f32 r5.x, r0.w +mad.f32 r3.y, r3.z, r4.x, r3.y +mov.f32f32 r5.z, r1.z +add.f r1.z, c17.x, r1.z +mul.f r5.w, c14.x, r5.x +mul.f r6.x, r3.y, r3.y +add.f r5.z, c17.y, r5.z +mad.f32 r2.x, r2.x, r4.y, r6.x +bary.f r6.x, 4, r1.x exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.x, r5.x -bary.f r5.z, 4, r1.x -add.f r6.x, c17.y, r1.z -add.f r6.y, c19.y, (neg)r0.z -mov.f32f32 r0.y, r0.y -mul.f r6.z, r1.w, r5.z -bary.f r6.w, 5, r1.x -mov.f32f32 r6.x, r6.x +(ss)mov.f32f32 r6.y, r0.z +mul.f r6.z, r5.z, c5.z +add.f r1.w, r1.w, (neg)r5.w +mul.f r5.z, r0.x, r6.x +bary.f r5.w, 5, r1.x +mov.f32f32 r7.y, r6.z +add.f r6.y, c19.y, (neg)r6.y +mov.f32f32 r6.w, r1.w +mad.f32 r5.z, r2.y, r5.w, r5.z +bary.f r8.x, 6, r1.x mul.f r6.y, r6.y, c11.y -mul.f r0.z, r0.z, c17.x -mad.f32 r6.z, r3.w, r6.w, r6.z -mul.f r7.x, r6.x, c5.w -mov.f32f32 r7.y, r0.y -add.f r0.y, c17.x, r1.z -mov.f32f32 r1.z, r6.z -bary.f r6.z, 6, r1.x -mov.f32f32 r6.x, r7.x +(ss)mul.f r0.z, r0.z, c17.x +add.f r6.w, c17.y, r6.w +mad.f32 r5.z, r3.z, r8.x, r5.z +mul.f r8.y, r1.z, c5.z +add.f r1.z, c17.x, r1.w add.f r0.z, r0.z, r6.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, r4.x, r6.z, r1.z -mov.f32f32 r6.x, r6.x -bary.f r6.y, 10, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.z, r1.z -mul.f r0.y, r0.y, c5.w -add.f r8.x, r6.y, c16.z -mov.f32f32 r8.y, r5.y -mad.f32 r5.x, r1.z, r1.z, r5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r5.y, r8.x -mov.f32f32 r7.z, r0.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r9.x, r4.z -mov.f32f32 r6.y, r5.y -rsq r4.z, r5.x -(ss)mov.f32f32 r4.z, r4.z +mov.f32f32 r1.w, r5.z +mul.f r9.y, r6.w, c5.w +mov.f32f32 r9.x, r8.y +mul.f r8.z, r1.z, c5.w +mad.f32 r1.z, r5.z, r1.w, r2.x +mov.f32f32 r7.z, r9.y max.f r0.z, r0.z, c14.y -(ss)mov.f32f32 r5.x, r7.z -mov.f32f32 r8.z, r7.x -mul.f r1.z, r1.z, r4.z +bary.f r2.x, 10, r1.x +mov.f32f32 r6.w, r8.z +add.f r4.w, c16.x, (neg)r4.w +mul.f r0.x, r0.x, r0.x +rsq r1.z, r1.z +(ss)mov.f32f32 r5.z, r1.z +add.f r8.w, r2.x, c16.z min.f r0.z, r0.z, c17.x -sam.s (f32)(x)r5.y, r5.w, s#4, t#4 -(sy)mov.f32f32 r5.y, r5.y -mov.f32f32 r7.z, r5.x -mov.f32f32 r1.z, r1.z -add.f r5.x, c19.y, (neg)r0.z -(ss)add.f r5.w, c19.y, (neg)r0.z -add.f r6.x, c19.y, (neg)r0.z -mul.f r1.z, r1.z, c15.x -mov.f32f32 r5.y, r5.y -add.f r6.y, c16.x, (neg)r2.x -mul.f r5.x, r5.x, c10.z -mov.f32f32 r1.z, r1.z -mul.f r5.w, r5.w, c10.y -mul.f r6.x, r6.x, c10.x -mov.f32f32 r6.y, r6.y -add.f r7.x, c16.x, (neg)r0.w -mov.f32f32 r7.w, r8.x -mul.f r2.y, r2.y, r4.z -rcp r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mul.f r2.w, r2.w, r4.z -mov.f32f32 r4.z, r7.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r2.w, r2.w -mul.f r7.x, r6.y, r4.z -mov.f32f32 r2.y, r2.y -mul.f r1.w, r1.w, r1.w -absneg.f r2.w, (neg)r2.w -mad.f32 r1.w, r3.w, r3.w, r1.w -mul.f r3.w, r7.x, r5.y -mov.f32f32 r5.y, r8.x -sam.s (f32)(x)r7.x, r7.y, s#4, t#4 -(sy)mov.f32f32 r7.x, r7.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r4.x, r4.x, r1.w -mov.f32f32 r8.w, r5.y -mov.f32f32 r4.x, r2.z -add.f r2.x, r2.x, c17.x +(ss)mul.f r1.z, r3.y, r1.z +mul.f r1.w, r1.w, r5.z +mov.f32f32 r7.w, r8.w +add.f r2.x, c19.y, (neg)r0.z +add.f r3.y, c19.y, (neg)r0.z +mul.f r1.w, r1.w, c15.x +add.f r6.y, c19.y, (neg)r0.z +mov.f32f32 r9.z, r8.w +mov.f32f32 r7.x, r8.w +nop +sam.s (f32)(x)r9.w, r7.y, s#4, t#4 +(ss)mul.f r7.y, r2.x, c10.z +mov.f32f32 r2.x, r4.w +rcp r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +mul.f r4.y, r4.y, r5.z +mad.f32 r0.x, r2.y, r2.y, r0.x +add.f r2.y, c16.x, (neg)r5.x +mad.f32 r0.x, r3.z, r3.z, r0.x +mul.f r3.y, r3.y, c10.y +mul.f r3.z, r6.y, c10.x +mov.f32f32 r5.x, r2.y +absneg.f r1.z, (neg)r1.z +sam.s (f32)(x)r10.x, r9.x, s#4, t#4 +sam.s (f32)(x)r6.y, r6.z, s#4, t#4 +sam.s (f32)(x)r8.y, r8.y, s#4, t#4 +add.f r2.w, r2.w, c17.x add.f r0.w, r0.w, c17.x -bary.f r5.y, 16, r1.x -(ss)mov.f32f32 r7.y, r4.x -sqrt r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -sam.s (f32)(x)r4.x, r8.y, s#4, t#4 -(sy)mov.f32f32 r4.x, r4.x -mul.f r4.z, r2.x, r4.z -mov.f32f32 r9.y, r0.y -add.f r0.y, c14.z, (neg)r1.w -mov.f32f32 r1.w, r8.x -bary.f r7.w, 19, r1.x -mul.f r6.y, r6.y, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r3.w, r4.z, r4.x, r3.w -mov.f32f32 r9.z, r1.w -mov.f32f32 r1.w, r7.w -mul.f r0.y, c12.z, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r5.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r0.y, r0.y -sam.s (f32)(x)r1.w, r9.x, s#4, t#4 -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r8.x, r4.x -bary.f r4.x, 17, r1.x -mul.f r0.y, r0.y, c14.w -mad.f32 r1.w, r6.y, r1.w, r3.w -(ss)nop -sam (f32)(w)r8.y, r7.y, s#2, t#2 -(sy)cmps.f.lt r3.w, r9.x, c15.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r2.x, r0.w -cov.u32f32 r2.x, r3.w -mul.f r2.w, r2.w, r0.y -mul.f r0.y, r2.y, r0.y -mad.f32 r0.w, r0.w, r7.x, r1.w -cmps.f.ne r1.w, r2.x, c14.y -mov.f32f32 r2.x, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, c15.z -mul.f r2.x, r2.x, r1.z -mul.f r0.y, r0.y, r1.z +sqrt r0.x, r0.x +(ss)add.f r0.x, c14.z, (neg)r0.x +mul.f r2.x, r2.x, r5.x +mul.f r2.y, r2.w, r2.y +mul.f r4.w, r4.w, r0.w +mul.f r0.x, c12.z, r0.x +(sy)mul.f r2.x, r2.x, r9.w +bary.f r6.z, 18, r1.x +mul.f r0.w, r2.w, r0.w +mul.f r0.x, r0.x, c14.w +mad.f32 r2.x, r2.y, r10.x, r2.x +bary.f r6.w, 19, r1.x +bary.f r8.z, 16, r1.x +mov.f32f32 r2.y, r0.x +mad.f32 r2.x, r4.w, r6.y, r2.x +mul.f r0.x, r1.z, r0.x +mad.f32 r0.w, r0.w, r8.y, r2.x +mul.f r1.z, r4.y, r2.y +sam (f32)(w)r9.x, r6.z, s#2, t#2 +bary.f r8.w, 17, r1.x +mul.f r0.x, r0.x, r1.w +(sy)cmps.f.lt r1.w, r9.w, c15.y +mul.f r1.z, r1.z, r7.z mul.f r0.w, c17.z, r0.w -mov.f32f32 r1.z, c14.y -mov.f32f32 r2.x, r2.x +mov.f32f32 r2.x, r0.x +cov.u32f32 r1.w, r1.w +mov.f32f32 r2.y, r1.z +cmps.f.lt r2.w, r9.w, c15.y +mov.f32f32 r4.y, r0.w +cmps.f.ne r1.w, r1.w, c14.y +mov.f32f32 r4.w, c14.y +mov.f32f32 r5.x, c14.y +cov.u32f32 r2.w, r2.w +sam (f32)(w)r8.y, r8.z, s#1, t#1 +(sy)cmps.f.lt r5.z, r9.x, c18.x +sel.b32 r1.z, r1.z, r1.w, r4.w +sel.b32 r0.x, r0.x, r1.w, r5.x +(rpt1)nop +add.f r1.w, r6.w, r1.z +add.f r1.z, r6.z, r0.x +cmps.f.ne r0.x, r2.w, c14.y +cov.u32f32 r2.w, r5.z +mov.f32f32 r4.w, c15.z +mov.f32f32 r5.x, c14.y mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -sel.b32 r1.z, r2.y, r1.w, r1.z -mov.f32f32 r1.w, r2.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.w, r0.y -mov.f32f32 r3.w, r0.y -mov.f32f32 r1.w, r1.w -cmps.f.lt r4.z, r9.x, c15.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w +mov.f32f32 r5.z, r2.z +sam (f32)(w)r6.y, r1.z, s#2, t#2 +cmps.f.ne r2.z, r2.w, c14.y +sel.b32 r0.x, r4.w, r0.x, r5.x +mov.f32f32 r2.w, c14.y mov.f32f32 r3.w, r3.w -cov.u32f32 r4.z, r4.z -mov.f32f32 r5.y, r2.x -mov.f32f32 r6.y, r0.y -mov.f32f32 r7.x, r2.x -cmps.f.ne r4.z, r4.z, c14.y -(ss)mov.f32f32 r7.y, c14.y -mov.f32f32 r7.z, c14.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.y, r6.y -sel.b32 r1.w, r1.w, r4.z, r7.y -sel.b32 r2.y, r2.y, r4.z, r7.z -mov.f32f32 r4.z, r7.x -mov.f32f32 r7.x, r0.y -add.f r1.w, r2.z, r1.w -add.f r2.y, r7.w, r2.y -mov.f32f32 r2.z, r2.x -mov.f32f32 r7.y, r0.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r7.w, r2.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r8.y, r7.z -mov.f32f32 r8.z, r7.w -mov.f32f32 r7.y, r7.y -mov.f32f32 r7.z, r2.x -mov.f32f32 r7.w, r0.y -mov.f32f32 r8.w, r2.x -mov.f32f32 r9.x, r0.y -mov.f32f32 r9.y, r2.x -sam (f32)(w)r9.z, r8.y, s#2, t#2 -add.f r1.z, c15.y, (neg)r1.z -mov.f32f32 r7.z, r7.z -mov.f32f32 r7.w, r7.w -(ss)mov.f32f32 r8.z, r8.w -(sy)cmps.f.lt r8.y, r10.y, r1.z -cmps.f.lt r8.w, r10.y, r1.z -mov.f32f32 r9.x, r9.x -mov.f32f32 r9.y, r9.y +mov.f32f32 r4.w, (0.000000) +add.f r0.x, c15.y, (neg)r0.x +sel.b32 r5.x, r2.w, r2.z, r9.x +bary.f r6.y, 2, r1.x +(ss)bary.f r6.z, 1, r1.x +(sy)cmps.f.lt r2.z, r7.x, r0.x +cmps.f.lt r2.w, r7.x, r0.x +bary.f r6.w, 23, r1.x +bary.f r7.x, 24, r1.x +cov.u32f32 r2.z, r2.z +cov.u32f32 r2.w, r2.w +bary.f r7.z, 25, r1.x +bary.f (ei)r1.x, 0, r1.x +cmps.f.ne r1.y, r2.z, c14.y +mov.f32f32 r2.z, c14.y +cmps.f.ne r2.w, r2.w, c14.y +mov.f32f32 r7.w, c14.y +mov.f32f32 r8.y, c15.z +sel.b32 r2.z, r2.x, r1.y, r2.z +mov.f32f32 r8.z, c14.y +sel.b32 r1.y, r2.y, r1.y, r7.w +mov.f32f32 r7.w, c14.y +add.f r1.z, r1.z, r2.z +sel.b32 r2.z, r8.y, r2.w, r8.z +add.f r8.z, r1.w, r1.y +mov.f32f32 r1.y, c14.y +mov.f32f32 r8.y, r1.z +add.f r0.x, r0.x, (neg)r2.z +mov.f32f32 r1.w, r8.z +mov.f32f32 r2.z, c14.y +mov.f32f32 r2.w, c15.z +mov.f32f32 r8.w, c14.y +mov.f32f32 r9.x, c14.y +sam (f32)(w)r9.y, r8.y, s#2, t#2 +(sy)(ss)cmps.f.lt r8.y, r10.x, r0.x +mov.f32f32 r0.x, r0.x +mov.f32f32 r8.z, c14.y +mov.f32f32 r9.y, c15.z cov.u32f32 r8.y, r8.y -cov.u32f32 r8.w, r8.w -mov.f32f32 r9.z, r2.x -nop -mov.f32f32 r8.y, r8.y -cmps.f.ne r8.w, r8.w, c14.y -mov.f32f32 r9.w, c15.z +cmps.f.lt r9.z, r10.x, r0.x +mov.f32f32 r9.w, c14.y mov.f32f32 r10.x, c14.y -cmps.f.ne r10.y, r8.y, c14.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r10.z, c14.y -mov.f32f32 r10.w, c14.y -sel.b32 r8.w, r9.w, r8.w, r10.x -mov.f32f32 r8.y, r4.x -sel.b32 r4.x, r9.z, r10.y, r10.z -sel.b32 r9.x, r9.x, r10.y, r10.w -add.f r1.z, r1.z, (neg)r8.w -bary.f r8.w, 23, r1.x -add.f r1.w, r1.w, r4.x -add.f r2.y, r2.y, r9.x -mov.f32f32 r1.z, r1.z -sam (f32)(w)r9.z, r8.x, s#1, t#1 -(sy)cmps.f.lt r4.x, r10.y, c18.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(ss)mov.f32f32 r8.x, r10.y -cov.u32f32 r4.x, r4.x -mov.f32f32 r8.y, r1.w -mov.f32f32 r9.x, r2.y -mov.f32f32 r8.x, r8.x -mov.f32f32 r8.w, r8.w -mov.f32f32 r9.z, r8.y -mov.f32f32 r9.w, r9.x -cmps.f.ne r4.x, r4.x, c14.y -(rpt1)nop -mov.f32f32 r8.y, c14.y -bary.f r9.x, 24, r1.x -bary.f r10.x, 25, r1.x -sam (f32)(w)r10.y, r9.z, s#2, t#2 -(sy)(ss)cmps.f.lt r9.z, r11.x, r1.z -cmps.f.lt r9.w, r11.x, r1.z -sel.b32 r4.x, r8.y, r4.x, r8.x -mov.f32f32 r8.x, r9.x -cov.u32f32 r8.y, r9.z -cov.u32f32 r9.x, r9.w -mov.f32f32 r9.z, r10.x -mov.f32f32 r9.w, (0.000000) -mov.f32f32 r8.y, r8.y -cmps.f.ne r9.x, r9.x, c14.y -mov.f32f32 r10.x, c15.z -mov.f32f32 r10.y, c14.y cmps.f.ne r8.y, r8.y, c14.y +mov.f32f32 r10.y, c14.y +cov.u32f32 r9.z, r9.z mov.f32f32 r10.z, c14.y mov.f32f32 r10.w, c14.y -sel.b32 r9.x, r10.x, r9.x, r10.y -bary.f r10.x, 2, r1.x -sel.b32 r9.y, r9.y, r8.y, r10.z -sel.b32 r7.w, r7.w, r8.y, r10.w -add.f r1.z, r1.z, (neg)r9.x -bary.f r8.y, 1, r1.x -add.f r1.w, r1.w, r9.y -add.f r2.y, r2.y, r7.w -mov.f32f32 r1.z, r1.z -bary.f (ei)r1.x, 0, r1.x -mov.f32f32 r1.y, r1.w -mov.f32f32 r1.w, r2.y -mov.f32f32 r2.y, c14.y -mov.f32f32 r7.w, c14.y -mov.f32f32 r9.x, r1.y -mov.f32f32 r9.y, r1.w -mov.f32f32 r10.y, c14.y +sel.b32 r10.y, r2.x, r8.y, r10.y +cmps.f.ne r9.z, r9.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r10.z mov.f32f32 r10.z, c15.z -mov.f32f32 r10.w, r9.x -mov.f32f32 r11.x, r9.y -mov.f32f32 r9.x, c14.y -mov.f32f32 r9.y, c14.y -mov.f32f32 r11.y, c14.y -mov.f32f32 r11.z, c15.z -mov.f32f32 r11.w, c14.y -mov.f32f32 r12.x, c14.y -sam (f32)(w)r12.y, r10.w, s#2, t#2 -(sy)(ss)cmps.f.lt r10.w, r13.x, r1.z -cmps.f.lt r11.x, r13.x, r1.z -mov.f32f32 r12.y, c14.y -mov.f32f32 r12.z, c15.z -cov.u32f32 r10.w, r10.w -cov.u32f32 r11.x, r11.x -mov.f32f32 r12.w, c14.y -mov.f32f32 r13.x, c14.y -mov.f32f32 r10.w, r10.w -cmps.f.ne r11.x, r11.x, c14.y -mov.f32f32 r13.y, c15.z -mov.f32f32 r13.z, c14.y -cmps.f.ne r10.w, r10.w, c14.y -mov.f32f32 r13.w, c14.y -mov.f32f32 r14.x, c14.y -sel.b32 r11.x, r13.y, r11.x, r13.z -nop -sel.b32 r8.z, r8.z, r10.w, r13.w -sel.b32 r7.y, r7.y, r10.w, r14.x -add.f r1.z, r1.z, (neg)r11.x -nop -add.f r1.y, r1.y, r8.z -add.f r1.w, r1.w, r7.y -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r10.y +mov.f32f32 r10.y, c14.y +add.f r11.y, r1.w, r8.y +mov.f32f32 r1.w, c15.z +mov.f32f32 r11.x, r1.z +sel.b32 r8.y, r10.z, r9.z, r10.y +mov.f32f32 r9.z, r11.y +mov.f32f32 r10.y, c14.y +mov.f32f32 r10.z, c14.y +mov.f32f32 r11.z, c14.y +mov.f32f32 r11.w, c15.z +sam (f32)(w)r12.x, r11.x, s#2, t#2 +add.f r0.x, r0.x, (neg)r8.y +mov.f32f32 r8.y, c14.y +(ss)mov.f32f32 r11.x, c14.y nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w -(rpt1)nop -mov.f32f32 r7.y, r1.y -mov.f32f32 r8.z, r1.w -(rpt1)nop -mov.f32f32 r10.w, r7.y -mov.f32f32 r11.x, r8.z -(rpt5)nop -sam (f32)(w)r13.y, r10.w, s#2, t#2 -(sy)cmps.f.lt r7.y, r14.x, r1.z -cmps.f.lt r8.z, r14.x, r1.z +(sy)cmps.f.lt r11.y, r12.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -cov.u32f32 r7.y, r7.y -cov.u32f32 r8.z, r8.z +cov.u32f32 r11.y, r11.y +cmps.f.lt r12.x, r12.w, r0.x (rpt1)nop -mov.f32f32 r7.y, r7.y -cmps.f.ne r8.z, r8.z, c14.y +cmps.f.ne r11.y, r11.y, c14.y +cov.u32f32 r12.x, r12.x (rpt1)nop -cmps.f.ne r7.y, r7.y, c14.y -sel.b32 r8.z, r12.z, r8.z, r12.y -(rpt1)nop -sel.b32 r7.z, r7.z, r7.y, r13.x -sel.b32 r7.x, r7.x, r7.y, r12.w -add.f r1.z, r1.z, (neg)r8.z +sel.b32 r11.x, r2.x, r11.y, r11.x +cmps.f.ne r12.x, r12.x, c14.y +sel.b32 r8.y, r2.y, r11.y, r8.y nop -add.f r1.y, r1.y, r7.z -add.f r1.w, r1.w, r7.x -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r11.x +sel.b32 r11.x, r11.w, r12.x, r11.z +add.f r11.z, r9.z, r8.y nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w -(rpt1)nop -mov.f32f32 r7.x, r1.y -mov.f32f32 r7.y, r1.w -(rpt1)nop -(ss)mov.f32f32 r10.w, r7.x -mov.f32f32 r11.x, r7.y -(rpt5)nop -sam (f32)(w)r12.y, r10.w, s#2, t#2 -(sy)cmps.f.lt r7.x, r13.x, r1.z -cmps.f.lt r7.y, r13.x, r1.z -(rpt1)nop -cov.u32f32 r7.x, r7.x -cov.u32f32 r7.y, r7.y +mov.f32f32 r11.y, r1.z +add.f r0.x, r0.x, (neg)r11.x +mov.f32f32 r8.y, r11.z +(rpt3)nop +sam (f32)(w)r11.x, r11.y, s#2, t#2 +(sy)cmps.f.lt r9.z, r11.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r7.x, r7.x -cmps.f.ne r7.y, r7.y, c14.y +cov.u32f32 r9.z, r9.z +cmps.f.lt r11.x, r11.w, r0.x (rpt1)nop -cmps.f.ne r7.x, r7.x, c14.y -sel.b32 r7.y, r11.z, r7.y, r11.y +cmps.f.ne r9.z, r9.z, c14.y +cov.u32f32 r11.x, r11.x (rpt1)nop -sel.b32 r2.z, r2.z, r7.x, r12.x -sel.b32 r6.y, r6.y, r7.x, r11.w -add.f r1.z, r1.z, (neg)r7.y +sel.b32 r10.z, r2.x, r9.z, r10.z +cmps.f.ne r11.x, r11.x, c14.y +sel.b32 r9.z, r2.y, r9.z, r10.y nop -add.f r1.y, r1.y, r2.z -add.f r1.w, r1.w, r6.y -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r10.z +sel.b32 r1.w, r1.w, r11.x, r10.w +add.f r10.z, r8.y, r9.z nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w +mov.f32f32 r10.y, r1.z +add.f r0.x, r0.x, (neg)r1.w +mov.f32f32 r1.w, r10.z +(rpt3)nop +sam (f32)(w)r10.y, r10.y, s#2, t#2 +(sy)cmps.f.lt r8.y, r11.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r2.z, r1.y -mov.f32f32 r6.y, r1.w +cov.u32f32 r8.y, r8.y +cmps.f.lt r9.z, r11.x, r0.x (rpt1)nop -mov.f32f32 r7.x, r2.z -mov.f32f32 r7.y, r6.y -(rpt5)nop -sam (f32)(w)r10.w, r7.x, s#2, t#2 -(sy)cmps.f.lt r2.z, r11.z, r1.z -cmps.f.lt r6.y, r11.z, r1.z +cmps.f.ne r8.y, r8.y, c14.y +cov.u32f32 r9.z, r9.z (rpt1)nop -cov.u32f32 r2.z, r2.z -cov.u32f32 r6.y, r6.y +sel.b32 r10.x, r2.x, r8.y, r10.x +cmps.f.ne r9.z, r9.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r9.w +nop +add.f r1.z, r1.z, r10.x +sel.b32 r8.z, r9.y, r9.z, r8.z +add.f r9.z, r1.w, r8.y +nop +mov.f32f32 r9.y, r1.z +add.f r0.x, r0.x, (neg)r8.z +mov.f32f32 r1.w, r9.z +(rpt3)nop +sam (f32)(w)r9.y, r9.y, s#2, t#2 +(sy)cmps.f.lt r8.y, r10.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r2.z, r2.z -cmps.f.ne r6.y, r6.y, c14.y +cov.u32f32 r8.y, r8.y +cmps.f.lt r8.z, r10.x, r0.x (rpt1)nop -cmps.f.ne r2.z, r2.z, c14.y -sel.b32 r6.y, r10.z, r6.y, r10.y +cmps.f.ne r8.y, r8.y, c14.y +cov.u32f32 r8.z, r8.z (rpt1)nop -sel.b32 r4.z, r4.z, r2.z, r9.y -sel.b32 r2.z, r3.w, r2.z, r9.x -add.f r1.z, r1.z, (neg)r6.y +sel.b32 r9.x, r2.x, r8.y, r9.x +cmps.f.ne r8.z, r8.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r8.w nop -add.f r1.y, r1.y, r4.z -add.f r1.w, r1.w, r2.z -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r9.x +sel.b32 r2.z, r2.w, r8.z, r2.z +add.f r8.z, r1.w, r8.y nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w -(rpt1)nop -mov.f32f32 r2.z, r1.y -mov.f32f32 r3.w, r1.w +mov.f32f32 r8.y, r1.z +add.f r0.x, r0.x, (neg)r2.z +mov.f32f32 r1.w, r8.z +(rpt3)nop +sam (f32)(w)r8.y, r8.y, s#2, t#2 +(sy)cmps.f.lt r2.z, r9.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -(ss)mov.f32f32 r7.x, r2.z -mov.f32f32 r7.y, r3.w -(rpt5)nop -sam (f32)(w)r10.y, r7.x, s#2, t#2 -(sy)cmps.f.lt r2.z, r11.x, r1.z -(rpt2)nop cov.u32f32 r2.z, r2.z (rpt2)nop cmps.f.ne r2.z, r2.z, c14.y (rpt2)nop -sel.b32 r3.w, r5.y, r2.z, r7.w -sel.b32 r2.y, r2.w, r2.z, r2.y +sel.b32 r1.y, r2.x, r2.z, r1.y +sel.b32 r2.z, r2.y, r2.z, r7.w (rpt1)nop -add.f r1.y, r1.y, r3.w -add.f r1.w, r1.w, r2.y +add.f r1.y, r1.z, r1.y +add.f r1.w, r1.w, r2.z (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w +mov.f32f32 r1.z, r1.y +mov.f32f32 r2.z, r1.w (rpt1)nop -add.f r2.y, r1.y, (neg)r2.x -mov.f32f32 r2.z, r1.y -add.f r2.w, r1.w, (neg)r0.y -mov.f32f32 r3.w, r1.w -mov.f32f32 r2.y, r2.y -(ss)mov.f32f32 r7.x, r2.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r7.y, r3.w -mov.f32f32 r2.y, r2.y -(rpt2)nop -mov.f32f32 r7.z, r2.y -mov.f32f32 r2.y, r2.z -sam (f32)(w)r10.y, r7.x, s#2, t#2 -(sy)add.f r2.z, c15.z, r11.x -add.f r1.z, r11.x, (neg)r1.z -nop -mov.f32f32 r7.w, r2.y -mov.f32f32 r2.y, r2.z -mov.f32f32 r1.z, r1.z -(rpt3)nop -sam (f32)(w)r7.x, r7.z, s#2, t#2 -(sy)add.f r2.y, r2.y, (neg)r7.w -(rpt2)nop -mov.f32f32 r2.y, r2.y +(ss)add.f r8.y, r1.z, (neg)r2.x +add.f r8.z, r2.z, (neg)r2.y +(rpt1)nop +sam (f32)(w)r8.w, r1.z, s#2, t#2 +(sy)(ss)add.f r1.z, c15.z, r9.z +add.f r0.x, r9.z, (neg)r0.x +(rpt1)nop +sam (f32)(w)r8.y, r8.y, s#2, t#2 +(sy)add.f r1.z, r1.z, (neg)r9.x (rpt5)nop -rcp r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -(rpt2)nop -mul.f r1.z, r1.z, r2.y -(rpt2)nop -mov.f32f32 r1.z, r1.z +rcp r1.z, r1.z +(ss)mul.f r0.x, r0.x, r1.z (rpt2)nop -mul.f r2.x, r2.x, r1.z -mul.f r0.y, r0.y, r1.z -(rpt1)nop -mov.f32f32 r1.z, r2.x -mov.f32f32 r0.y, r0.y +(ss)mov.f32f32 r1.z, r0.x +mul.f r0.x, r2.x, r0.x (rpt1)nop -add.f r1.y, r1.y, (neg)r1.z -add.f r0.y, r1.w, (neg)r0.y +mul.f r1.z, r2.y, r1.z +add.f r1.w, r1.y, (neg)r0.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y +add.f r2.x, r2.z, (neg)r1.z +mov.f32f32 r1.y, r1.w (rpt1)nop -mov.f32f32 r1.z, r1.y -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r0.y -mov.f32f32 r2.y, r1.z -mov.f32f32 r1.z, r0.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.z, r1.z mov.f32f32 r1.z, r2.x -mov.f32f32 r7.y, r0.y -nop -mov.f32f32 r1.w, r1.y -mov.f32f32 r2.x, r1.z +mov.f32f32 r8.y, r1.y nop -sam (f32)(xyz)r10.y, r2.y, s#2, t#2 -(sy)mad.f32 r0.y, c16.x, r10.z, c16.y -mad.f32 r1.y, c16.x, r10.y, c16.y -sam (f32)(xyzw)r7.x, r7.x, s#0, t#0 -(sy)cmps.f.lt r1.z, r7.w, c15.w -(ss)mov.f32f32 r2.y, r7.w -mov.f32f32 r0.y, r0.y -sam (f32)(xyz)r11.x, r1.w, s#3, t#3 -(sy)(ss)mul.f r1.w, c8.y, r11.y -mul.f r2.x, c8.x, r11.x -cov.u32f32 r1.z, r1.z -absneg.f r0.y, (neg)r0.y -mov.f32f32 r1.y, r1.y -mul.f r2.z, c8.z, r11.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mul.f r2.w, r4.y, r1.y -mul.f r3.z, r3.z, r1.y -mad.f32 r0.x, r0.x, r0.y, r2.w -mad.f32 r2.w, r3.x, r0.y, r3.z -mul.f r1.y, r4.w, r1.y -cmps.f.ne p0.x, r1.z, r9.w -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c16.x, r10.w, c16.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.y, r3.y, r0.y, r1.y -mov.f32f32 r1.y, r2.z -mov.f32f32 r1.z, r1.z +sam (f32)(xyzw)r2.x, r1.w, s#0, t#0 +(sy)cmps.f.lt r0.x, r2.w, c15.w +mov.f32f32 r8.z, r1.z +(rpt1)nop +sam (f32)(xyz)r8.w, r1.y, s#2, t#2 +(sy)(ss)mad.f32 r1.y, c16.x, r8.w, c16.y +cov.u32f32 r0.x, r0.x +mad.f32 r1.z, c16.x, r9.x, c16.y +mad.f32 r1.w, c16.x, r9.y, c16.y +mov.f32f32 r7.w, r1.y +cmps.f.ne p0.x, r0.x, r4.w +absneg.f r0.x, (neg)r1.z +mul.f r1.y, r4.x, r1.y +mul.f r1.z, r4.z, r7.w +mul.f r4.x, r5.y, r7.w +mad.f32 r0.y, r0.y, r0.x, r1.z +mov.f32f32 r1.z, r1.w +mad.f32 r4.x, r5.z, r0.x, r4.x +mad.f32 r0.x, r3.w, r0.x, r1.y kill p0.x -mov.f32f32 r3.w, r2.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, r5.z, r1.z, r0.x -mad.f32 r2.y, r6.w, r1.z, r2.w -mad.f32 r0.y, r6.z, r1.z, r0.y +mad.f32 r0.y, r6.x, r1.z, r0.y +mad.f32 r1.y, r5.w, r1.z, r4.x +mad.f32 r0.x, r8.x, r1.w, r0.x nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.y -mov.f32f32 r0.y, r0.y -nop -mul.f r2.y, r0.x, r0.x -nop -mad.f32 r2.y, r1.z, r1.z, r2.y -(rpt2)nop -mov.f32f32 r2.y, r2.y -nop -mad.f32 r2.y, r0.y, r0.y, r2.y +mov.f32f32 r1.z, r0.y +mov.f32f32 r1.w, r1.y +mov.f32f32 r3.w, r0.x +sam (f32)(xyz)r5.y, r8.y, s#3, t#3 +(sy)mul.f r4.x, c8.z, r5.w +mul.f r0.y, r0.y, r1.z +mul.f r4.z, c8.y, r5.z +mad.f32 r0.y, r1.y, r1.w, r0.y +mul.f r1.y, c8.x, r5.y +mad.f32 r0.y, r3.w, r3.w, r0.y (rpt5)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -(rpt2)nop -mul.f r0.x, r0.x, r2.y -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -nop -mul.f r2.y, r0.x, r0.x -mul.f r2.w, (neg)c9.x, r0.x -mad.f32 r2.y, r1.z, r1.z, r2.y -mad.f32 r2.w, (neg)c9.y, r1.z, r2.w -(rpt1)nop -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -mad.f32 r2.y, r0.y, r0.y, r2.y -mad.f32 r2.w, (neg)c9.z, r0.y, r2.w -(rpt4)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -max.f r2.w, r2.w, c14.y +rsq r0.y, r0.y +(ss)mov.f32f32 r3.w, r0.y +mul.f r0.x, r0.x, r0.y (rpt1)nop -mul.f r0.x, r0.x, r2.y -mov.f32f32 r2.w, r2.w -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r0.x, r0.x -mad.f32 r2.y, c8.z, r2.w, (neg)r10.x -mad.f32 r4.y, c8.y, r2.w, (neg)r8.y -mad.f32 r2.w, c8.x, r2.w, (neg)r1.x -mul.f r0.x, r0.x, r8.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.x, r1.z, r8.x, r0.x -mad.f32 r1.z, c12.x, r2.y, r10.x -mad.f32 r2.y, c12.x, r4.y, r8.y -mad.f32 r1.x, c12.x, r2.w, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.x, r0.y, r9.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -max.f r0.x, c14.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.y, r1.z, r3.w +mul.f r1.z, r1.w, r3.w +mov.f32f32 r1.w, r0.x +nop +mov.f32f32 r3.w, r0.y +mul.f r0.y, (neg)c9.x, r0.y +mov.f32f32 r4.w, r1.z +nop +mul.f r5.y, r3.w, r3.w +mad.f32 r0.y, (neg)c9.y, r1.z, r0.y +mad.f32 r1.z, r4.w, r4.w, r5.y +mad.f32 r0.x, (neg)c9.z, r0.x, r0.y +mad.f32 r0.y, r1.w, r1.w, r1.z (rpt5)nop -log2 r0.x, r0.x -(ss)mul.f r0.x, c12.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +rsq r0.y, r0.y +(ss)mov.f32f32 r1.z, r0.y +max.f r0.x, r0.x, c14.y +(ss)mul.f r0.y, r1.w, r0.y +nop +mul.f r1.w, r3.w, r1.z +mov.f32f32 r3.w, r0.x +mul.f r1.z, r4.w, r1.z +mad.f32 r0.x, c8.x, r0.x, (neg)r1.x +mul.f r1.w, r1.w, r6.w +mad.f32 r4.w, c8.z, r3.w, (neg)r6.y +mad.f32 r1.z, r1.z, r7.x, r1.w +mad.f32 r1.w, c8.y, r3.w, (neg)r6.z +mad.f32 r0.y, r0.y, r7.z, r1.z +mad.f32 r1.z, c12.x, r4.w, r6.y +mad.f32 r0.x, c12.x, r0.x, r1.x +nop +max.f r0.y, c14.y, r0.y +mad.f32 r1.x, c12.x, r1.w, r6.z +(rpt4)nop +log2 r0.y, r0.y +(ss)mul.f r0.y, c12.y, r0.y (rpt5)nop -exp2 r0.x, r0.x -(ss)mul.f r0.y, r1.y, r0.x -mul.f r1.y, r1.w, r0.x -mad.f32 r0.y, r7.z, r1.z, r0.y -mad.f32 r1.y, r7.y, r2.y, r1.y -(ss)mul.f r0.x, r2.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, r7.x, r1.x, r0.x -nop -mul.f r0.y, r0.y, r0.w -mul.f r1.x, r1.y, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.y, c7.z, r7.z, r0.y -mad.f32 r1.x, c7.y, r7.y, r1.x +exp2 r0.y, r0.y +(ss)mul.f r1.w, r4.x, r0.y +mul.f r3.w, r4.z, r0.y +mad.f32 r1.z, r2.z, r1.z, r1.w +mad.f32 r1.x, r2.y, r1.x, r3.w +(ss)mul.f r0.y, r1.y, r0.y +nop +mul.f r1.y, r1.z, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r1.y, c7.z, r2.z, r1.y +mad.f32 r1.x, c7.y, r2.y, r1.x +mad.f32 r0.x, r2.x, r0.x, r0.y +nop +mul.f r0.y, r0.z, r1.y +mul.f r1.x, r0.z, r1.x mul.f r0.x, r0.x, r0.w nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.z, r0.y -mul.f r0.w, r0.z, r0.w -mad.f32 r0.x, c7.x, r7.x, r0.x -nop -add.f r0.y, r0.y, r5.x -add.f r0.w, r0.w, r5.w -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, r7.y +add.f r0.w, r1.x, r3.y +mad.f32 r0.x, c7.x, r2.x, r0.x nop -mul.f r0.y, r0.y, r4.x -mul.f r0.w, r0.w, r4.x +mul.f r0.y, r0.y, r5.x +mul.f r0.w, r0.w, r5.x mul.f r0.x, r0.z, r0.x nop -mul.f r0.y, r0.y, c6.z -mul.f r0.z, r0.w, c6.y -add.f r0.x, r0.x, r6.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r4.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c6.x -nop -mov.f32f32 r3.z, r0.y -mov.f32f32 r3.y, r0.z -mov.f32f32 r0.x, r0.x +mul.f r2.z, r0.y, c6.z +mul.f r2.y, r0.w, c6.y +add.f r0.x, r0.x, r3.z (rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r5.x (rpt2)nop -mov.f32f32 r3.x, r0.x +mul.f r2.x, r0.x, c6.x end nop nop -nop -; FRAG: outputs: r3.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.z (5:9,cm=f,il=8,b=1) r63.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r7.x (5:13,cm=f,il=24,b=1) r8.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 802 instructions, 0 half, 65 full -; pos (bary): r1.x -; color: r3.x -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r3.x (5:9,cm=f,il=8,b=1) r63.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.w (5:12,cm=f,il=20,b=1) r6.x (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 532 instructions, 0 half, 13 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm index ad4df45..b2e35b3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm @@ -6,346 +6,250 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in11 -@in(r2.z) in12 -@in(r2.w) in13 -@out(r11.z) out0 -@out(r11.w) out1 -@out(r12.x) out2 -@out(r12.y) out3 -@out(r7.x) out4 -@out(r7.y) out5 -@out(r7.z) out6 -@out(r7.w) out7 -@out(r3.x) out8 -@out(r3.y) out9 -@out(r3.z) out10 -@out(r3.w) out11 -@out(r9.z) out12 -@out(r9.w) out13 -@out(r10.x) out14 -@out(r10.y) out15 -@out(r1.y) out16 -@out(r1.z) out17 -@out(r1.w) out18 -@out(r2.x) out19 -@out(r12.z) out20 -@out(r12.w) out21 -@out(r13.x) out22 -@out(r13.y) out23 -@out(r8.z) out24 -@out(r8.w) out25 -@out(r9.x) out26 -@out(r9.y) out27 -@out(r10.z) out28 -@out(r10.w) out29 -@out(r11.x) out30 -@out(r11.y) out31 -(sy)(ss)floor.f r3.x, c15.z -absneg.f r3.y, (abs)c18.x -absneg.f r3.z, (abs)c18.y -floor.f r3.w, c15.x -add.f r3.x, c15.z, (neg)r3.x -mul.f r4.x, c12.x, r1.z -mul.f r4.y, c12.x, r0.w -add.f r3.w, c15.x, (neg)r3.w -mov.f32f32 r3.x, r3.x -add.f r3.y, r3.y, r3.z -mad.f32 r3.z, c13.x, r1.w, r4.x -mad.f32 r4.x, c13.x, r1.x, r4.y -max.f r3.x, r3.x, c19.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -min.f r3.x, r3.x, c23.y -mul.f r4.y, c17.x, r3.y -mul.f r4.z, c12.z, r0.x -max.f r3.w, r3.w, c19.y -max.f r3.x, r3.x, c19.x -mad.f32 r4.z, c13.z, r0.y, r4.z -mad.f32 r3.z, c14.x, r2.x, r3.z -mad.f32 r4.x, c14.x, r1.y, r4.x -mov.f32f32 r3.x, r3.x -mad.f32 r4.z, c14.z, r0.z, r4.z -min.f r3.w, r3.w, c23.y -mov.f32f32 r3.z, r3.z -mul.f r3.x, c17.x, r3.x -add.f r4.z, r4.z, c15.z -max.f r3.w, r3.w, c19.x -mad.f32 r4.y, c19.w, r4.y, r4.z -mov.f32f32 r3.x, r3.x -mul.f r4.w, c12.y, r0.w -mov.f32f32 r4.x, r4.x -absneg.f r5.x, (neg)c5.x -mad.f32 r3.x, c19.z, r3.x, c15.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.w, r3.w -mul.f r5.y, c12.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r4.y -mad.f32 r5.y, c13.x, r0.y, r5.y -mad.f32 r4.w, c13.y, r1.x, r4.w -mov.f32f32 r3.x, r3.x -add.f r4.y, r4.y, c20.x -mad.f32 r5.y, c14.x, r0.z, r5.y -mad.f32 r4.w, c14.y, r1.y, r4.w -add.f r3.x, r3.x, c20.x -floor.f r5.z, r4.y -add.f r5.y, r5.y, c15.x -mov.f32f32 r4.w, r4.w -floor.f r5.w, r3.x -add.f r4.y, r4.y, (neg)r5.z -mad.f32 r3.w, c17.x, r3.w, r5.y -mul.f r5.z, r4.w, r3.z -add.f r3.x, r3.x, (neg)r5.w -mad.f32 r4.y, c20.y, r4.y, c20.z -add.f r3.w, r3.w, c20.x -mul.f r5.w, c12.y, r1.z -mad.f32 r3.x, c20.y, r3.x, c20.z -absneg.f r4.y, (abs)r4.y -floor.f r6.x, r3.w -mad.f32 r5.w, c13.y, r1.w, r5.w -absneg.f r3.x, (abs)r3.x -mul.f r6.y, c20.y, r4.y -add.f r3.w, r3.w, (neg)r6.x -mul.f r4.y, r4.y, r4.y -mul.f r6.x, c20.y, r3.x -add.f r6.y, c20.w, (neg)r6.y -mad.f32 r3.w, c20.y, r3.w, c20.z -mul.f r3.x, r3.x, r3.x -add.f r6.x, c20.w, (neg)r6.x -mul.f r4.y, r4.y, r6.y -absneg.f r3.w, (abs)r3.w -mov.f32f32 r5.w, r5.w -mul.f r3.x, r3.x, r6.x -mov.f32f32 r4.y, r4.y -mul.f r6.x, r0.x, r0.z -mul.f r6.y, r0.y, c21.x -mov.f32f32 r3.x, r3.x -mul.f r6.z, r0.y, c22.x -mul.f r6.w, c20.y, r3.w -mul.f r6.x, r6.x, r6.y -mul.f r3.w, r3.w, r3.w -mov.f32f32 r6.y, r6.z -add.f r6.z, c20.w, (neg)r6.w -mov.f32f32 r6.x, r6.x -mad.f32 r5.w, c14.y, r2.x, r5.w -mov.f32f32 r6.y, r6.y -mul.f r3.w, r3.w, r6.z -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.w, r5.w -max.f r6.y, r6.y, c19.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r6.x, r6.x -mad.f32 r5.z, r4.x, r5.w, (neg)r5.z -min.f r6.y, r6.y, c23.y -mul.f r6.z, r4.x, r5.x -absneg.f r6.w, (neg)c5.y -mul.f r1.z, c12.z, r1.z -min.f r6.y, r6.y, c19.w -max.f r6.x, r6.x, c19.y -mov.f32f32 r5.z, r5.z -mad.f32 r6.z, r4.w, r6.w, r6.z -mov.f32f32 r6.y, r6.y -min.f r6.x, r6.x, c23.y -mul.f r5.z, r5.z, r2.y -mov.f32f32 r6.z, r6.z -mul.f r3.x, r3.x, r6.y -min.f r6.x, r6.x, c21.y -mov.f32f32 r5.z, r5.z +@in(r8.x) in8 +@in(r8.y) in9 +@in(r8.z) in10 +@in(r8.w) in11 +@in(r5.z) in12 +@in(r5.w) in13 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@out(r7.x) out28 +@out(r7.y) out29 +@out(r7.z) out30 +@out(r7.w) out31 +@const(c19.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c20.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c21.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c22.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r1.z, c15.z +floor.f r1.w, c15.x +absneg.f r2.x, (abs)c18.x +absneg.f r2.y, (abs)c18.y +add.f r1.z, c15.z, (neg)r1.z +add.f r1.w, c15.x, (neg)r1.w +mul.f r2.z, c12.x, r0.w +add.f r2.x, r2.x, r2.y +max.f r1.z, r1.z, c19.y +max.f r1.w, r1.w, c19.y +mad.f32 r2.y, c13.x, r1.x, r2.z +mul.f r2.z, c17.x, r2.x +min.f r1.z, r1.z, c23.y +min.f r1.w, r1.w, c23.y +mul.f r2.w, c12.z, r0.x +mad.f32 r2.y, c14.x, r1.y, r2.y +max.f r1.z, r1.z, c19.x +max.f r1.w, r1.w, c19.x +mad.f32 r2.w, c13.z, r0.y, r2.w +mul.f r3.x, c12.x, r0.x +mul.f r1.z, c17.x, r1.z +mad.f32 r3.x, c13.x, r0.y, r3.x +mad.f32 r2.w, c14.z, r0.z, r2.w +mad.f32 r3.x, c14.x, r0.z, r3.x +mad.f32 r1.z, c19.z, r1.z, c15.x +absneg.f r3.y, (neg)c5.x +add.f r2.w, r2.w, c15.z +mov.f32f32 r2.x, r2.x +add.f r1.z, r1.z, c20.x +add.f r3.x, r3.x, c15.x +mad.f32 r2.z, c19.w, r2.z, r2.w +mad.f32 r1.w, c17.x, r1.w, r3.x +floor.f r3.z, r1.z +mul.f r3.w, r2.y, r3.y +mul.f r4.x, c12.y, r0.w +add.f r1.w, r1.w, c20.x +add.f r1.z, r1.z, (neg)r3.z +add.f r2.z, r2.z, c20.x +mad.f32 r3.z, c13.y, r1.x, r4.x +max.f r2.x, r2.x, c21.z +mad.f32 r1.z, c20.y, r1.z, c20.z +floor.f r4.x, r1.w +floor.f r4.y, r2.z +mad.f32 r3.z, c14.y, r1.y, r3.z +absneg.f r1.z, (abs)r1.z +add.f r1.w, r1.w, (neg)r4.x +add.f r2.z, r2.z, (neg)r4.y +absneg.f r4.x, (neg)c5.y +mul.f r4.y, c20.y, r1.z +mad.f32 r1.w, c20.y, r1.w, c20.z +mad.f32 r2.z, c20.y, r2.z, c20.z +mul.f r1.z, r1.z, r1.z +add.f r4.y, c20.w, (neg)r4.y +absneg.f r1.w, (abs)r1.w +absneg.f r2.z, (abs)r2.z +mad.f32 r3.w, r3.z, r4.x, r3.w +mul.f r1.z, r1.z, r4.y +mul.f r4.y, r0.y, c22.x +mul.f r4.z, c20.y, r1.w +mul.f r4.w, c20.y, r2.z +mul.f r1.w, r1.w, r1.w +max.f r4.y, r4.y, c19.y +add.f r4.z, c20.w, (neg)r4.z +add.f r4.w, c20.w, (neg)r4.w +mul.f r2.z, r2.z, r2.z +min.f r4.y, r4.y, c23.y +mul.f r1.w, r1.w, r4.z +mul.f r4.z, r0.x, r0.z +mul.f r4.w, r2.z, r4.w +min.f r2.z, r4.y, c19.w +mul.f r4.y, r0.y, c21.x mul.f r0.w, c12.z, r0.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.z, r5.z +min.f r5.x, r2.x, c21.w +mul.f r1.z, r1.z, r2.z +mul.f r2.x, r4.z, r4.y mad.f32 r0.w, c13.z, r1.x, r0.w -mad.f32 r1.x, c13.z, r1.w, r1.z -mul.f r1.z, r3.w, r6.x -mul.f r1.w, r4.y, r6.x -max.f r3.y, r3.y, c21.z -mov.f32f32 r3.w, r5.z -mov.f32f32 r1.z, r1.z +mov.f32f32 r1.x, r5.x +mov.f32f32 r4.y, r1.z +max.f r2.z, r2.x, c19.y mad.f32 r0.w, c14.z, r1.y, r0.w -mad.f32 r1.y, c18.x, r3.x, r1.z -mad.f32 r1.z, c18.y, r3.x, r1.z -mov.f32f32 r3.x, r3.y -mov.f32f32 r7.w, r3.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -min.f r3.x, r3.x, c21.w -mov.f32f32 r0.w, r0.w -absneg.f r3.y, (neg)c5.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mul.f r3.w, c12.y, r0.x -mad.f32 r4.y, r0.w, r3.y, r6.z -mad.f32 r1.x, c14.z, r2.x, r1.x -mad.f32 r2.x, r1.w, r3.x, r5.y -mad.f32 r4.z, r1.w, r3.x, r4.z -mad.f32 r3.w, c13.y, r0.y, r3.w -mov.f32f32 r4.y, r4.y -add.f r1.y, r2.x, r1.y -add.f r1.z, r4.z, r1.z -mad.f32 r2.x, c14.y, r0.z, r3.w -max.f r3.w, c19.y, r4.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -add.f r2.x, r2.x, c15.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, r1.y -mul.f r4.z, c8.w, r1.y -mul.f r5.y, c8.z, r1.y -mul.f r5.z, c8.y, r1.y -add.f r4.y, c4.x, (neg)r4.y -mad.f32 r1.w, r1.w, r3.x, r2.x -mul.f r2.x, c8.x, r1.y -mov.f32f32 r3.x, r1.z -mul.f r6.x, r4.y, r4.y -add.f r6.y, c4.y, (neg)r1.w -mad.f32 r6.z, c9.w, r1.w, r4.z -mad.f32 r8.x, c9.z, r1.w, r5.y -mad.f32 r5.z, c9.y, r1.w, r5.z -mad.f32 r6.x, r6.y, r6.y, r6.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r8.x, r8.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.x, r6.x -add.f r3.x, c4.z, (neg)r3.x -mad.f32 r6.z, c10.w, r1.z, r6.z -mad.f32 r8.x, c10.z, r1.z, r8.x -mad.f32 r5.z, c10.y, r1.z, r5.z -mad.f32 r6.x, r3.x, r3.x, r6.x +absneg.f r1.y, (neg)c5.z +mov.f32f32 r2.x, r2.y +min.f r4.z, r2.z, c23.y +mul.f r5.y, c12.z, r8.x +mov.f32f32 r2.z, r0.w +mov.f32f32 r2.y, r3.z +min.f r3.z, r4.z, c21.y +mad.f32 r0.w, r0.w, r1.y, r3.w +mad.f32 r4.z, c13.z, r8.y, r5.y +mul.f r9.x, c12.y, r8.x +mov.f32f32 r3.w, r3.z +mul.f r3.z, r4.w, r3.z +mul.f r4.w, c12.y, r0.x +max.f r9.y, c19.y, r0.w +mul.f r0.w, r1.w, r3.w +mad.f32 r1.w, c13.y, r0.y, r4.w +mad.f32 r3.x, r3.z, r1.x, r3.x +mad.f32 r1.w, c14.y, r0.z, r1.w +mov.f32f32 r3.w, r0.w +mad.f32 r0.w, c18.y, r1.z, r0.w +mad.f32 r1.z, c18.x, r4.y, r3.w +mad.f32 r2.w, r3.z, r5.x, r2.w +add.f r1.w, r1.w, c15.y +mov.f32f32 r4.y, r9.y +add.f r1.z, r3.x, r1.z +add.f r2.w, r2.w, r0.w +mad.f32 r1.x, r3.z, r1.x, r1.w +nop +mov.f32f32 r1.w, r1.z +mul.f r1.z, c0.x, r1.z +mov.f32f32 r4.w, r2.w +add.f r6.y, c4.y, (neg)r1.x +add.f r6.x, c4.x, (neg)r1.w +mul.f r0.w, c8.y, r1.w +mul.f r3.x, c8.x, r1.w +mul.f r7.w, c8.w, r1.w +mul.f r3.z, r6.x, r6.x +mad.f32 r0.w, c9.y, r1.x, r0.w +mad.f32 r3.z, r6.y, r6.y, r3.z +add.f r6.z, c4.z, (neg)r4.w +mad.f32 r0.w, c10.y, r4.w, r0.w +mad.f32 r3.x, c9.x, r1.x, r3.x +mad.f32 r3.w, c9.w, r1.x, r7.w +mad.f32 r3.z, r6.z, r6.z, r3.z mul.f r0.x, c12.w, r0.x -mad.f32 r2.x, c9.x, r1.w, r2.x +mad.f32 r3.x, c10.x, r4.w, r3.x +mad.f32 r3.w, c10.w, r4.w, r3.w +mul.f r7.z, c8.z, r1.w mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r0.y, r3.x -mov.f32f32 r8.y, r4.y +mul.f r0.y, c0.w, r1.w +rsq r3.z, r3.z +(ss)mov.f32f32 r5.x, r3.z +mad.f32 r3.y, r6.x, r3.z, r3.y mad.f32 r0.x, c14.w, r0.z, r0.x -rsq r0.z, r6.x -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r9.x, r0.y -mov.f32f32 r8.z, r8.y -mad.f32 r0.y, r4.y, r0.z, r5.x -mad.f32 r4.y, r6.y, r0.z, r6.w -mad.f32 r0.z, r3.x, r0.z, r3.y -add.f r0.x, r0.x, c15.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.x, r4.y -mov.f32f32 r0.z, r0.z -mad.f32 r3.y, c11.w, r0.x, r6.z -mul.f r4.y, r0.y, r0.y -mad.f32 r5.x, c11.z, r0.x, r8.x -mad.f32 r4.y, r3.x, r3.x, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r5.z, c11.y, r0.x, r5.z -mad.f32 r2.x, c10.x, r1.z, r2.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r4.y, r0.z, r0.z, r4.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.z, r5.z -mad.f32 r2.x, c11.x, r0.x, r2.x -(ss)mul.f r6.x, c0.w, r1.y -mul.f r6.z, c0.z, r1.y -mul.f r6.w, c0.y, r1.y -rsq r4.y, r4.y -(ss)mov.f32f32 r4.y, r4.y -mov.f32f32 r10.y, r3.y -mov.f32f32 r3.y, r5.x -mul.f r5.x, r5.z, c16.y -mul.f r0.z, r0.z, r4.y -mul.f r3.x, r3.x, r4.y -mul.f r0.y, r0.y, r4.y -mov.f32f32 r10.x, r3.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r9.w, r5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r1.x, r7.z +(ss)mad.f32 r3.z, r6.y, r5.x, r4.x +mov.f32f32 r4.x, r3.y +mad.f32 r1.y, r6.z, r5.x, r1.y nop -mov.f32f32 r10.w, r0.z -mov.f32f32 r10.z, r3.x -mov.f32f32 r9.y, r0.y -mov.f32f32 r0.y, r2.x -mad.f32 r0.z, c1.w, r1.w, r6.x -mad.f32 r2.x, c1.z, r1.w, r6.z -mad.f32 r3.x, c1.y, r1.w, r6.w -mul.f r0.y, r0.y, c16.x -mad.f32 r0.z, c2.w, r1.z, r0.z -mad.f32 r2.x, c2.z, r1.z, r2.x -mad.f32 r3.x, c2.y, r1.z, r3.x -mov.f32f32 r9.z, r0.y -mad.f32 r0.y, c3.w, r0.x, r0.z -mad.f32 r0.z, c3.z, r0.x, r2.x -mad.f32 r2.x, c3.y, r0.x, r3.x -mul.f r3.x, c0.x, r1.y -mov.f32f32 r12.y, r0.y -mov.f32f32 r12.x, r0.z -mov.f32f32 r11.w, r2.x -mad.f32 r0.y, c1.x, r1.w, r3.x -mad.f32 r0.z, c7.x, r1.z, c7.y -mad.f32 r0.y, c2.x, r1.z, r0.y -mad.f32 r1.y, c7.x, r1.y, c7.y -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r11.y, r4.z -mov.f32f32 r11.x, r5.y -mov.f32f32 r11.z, r0.x -mov.f32f32 r12.w, r0.y -mov.f32f32 r0.x, r1.y -mov.f32f32 r0.y, r6.y -(rpt1)nop -mov.f32f32 r12.z, r0.x -mov.f32f32 r8.w, r0.y -mul.f r0.x, r3.w, c6.z -mul.f r0.y, r3.w, c6.y -mul.f r0.z, r3.w, c6.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r7.z, r0.x -mov.f32f32 r7.y, r0.y -mov.f32f32 r7.x, r0.z -mul.f r0.x, r4.x, r1.x -mul.f r0.y, r0.w, r5.w -mad.f32 r0.x, r0.w, r3.z, (neg)r0.x -mad.f32 r0.y, r4.w, r1.x, (neg)r0.y -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r5.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r1.x -mul.f r0.x, r0.x, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r0.z, r3.z -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +mov.f32f32 r5.x, r3.z +mul.f r4.x, r4.x, r4.x +mov.f32f32 r5.y, r1.y +add.f r0.x, r0.x, c15.w +mad.f32 r3.z, r3.z, r5.x, r4.x +mad.f32 r0.z, c10.z, r4.w, r0.z +mad.f32 r1.y, r1.y, r5.y, r3.z +mad.f32 r0.w, c11.y, r0.x, r0.w +mad.f32 r3.x, c11.x, r0.x, r3.x +mad.f32 r3.w, c11.w, r0.x, r3.w +mad.f32 r3.z, c11.z, r0.x, r0.z +mad.f32 r0.y, c1.w, r1.x, r0.y +mul.f r0.z, c0.z, r1.w +rsq r1.y, r1.y +(ss)mov.f32f32 r4.x, r1.y +mul.f r6.w, r3.y, r1.y +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r7.y, r5.y, r4.x +mul.f r7.x, r5.x, r4.x +mad.f32 r0.y, c2.w, r4.w, r0.y +mad.f32 r0.z, c1.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c2.z, r4.w, r0.z +(ss)mul.f r1.y, c0.y, r1.w +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c1.y, r1.x, r1.y +mad.f32 r1.x, c1.x, r1.x, r1.z +mad.f32 r0.y, c2.y, r4.w, r0.y +mad.f32 r1.x, c2.x, r2.w, r1.x +mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r0.x, c3.x, r0.x, r1.x +mad.f32 r5.x, c7.x, r1.w, c7.y +mad.f32 r5.y, c7.x, r4.w, c7.y +mul.f r1.z, r4.y, c6.z +mul.f r1.y, r4.y, c6.y +mul.f r1.x, r9.y, c6.x +mad.f32 r1.w, c14.z, r8.z, r4.z +mad.f32 r2.w, c13.y, r8.y, r9.x +mul.f r4.x, c12.x, r8.x +mad.f32 r2.w, c14.y, r8.z, r2.w +mov.f32f32 r4.z, r1.w +mad.f32 r4.x, c13.x, r8.y, r4.x (rpt1)nop -mov.f32f32 r2.x, r0.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r0.x, r4.w -mov.f32f32 r0.y, r4.x +mul.f r4.w, r2.x, r4.z +mad.f32 r8.x, c14.x, r8.z, r4.x +mov.f32f32 r4.y, r2.w (rpt1)nop -mov.f32f32 r3.y, r0.x -mov.f32f32 r3.x, r0.y -mov.f32f32 r0.x, r2.w -mov.f32f32 r0.y, r2.z +mov.f32f32 r4.x, r8.x +mul.f r8.y, r2.z, r4.y +mul.f r8.x, r2.y, r8.x +mad.f32 r1.w, r2.y, r1.w, (neg)r8.y +mad.f32 r4.w, r2.z, r4.x, (neg)r4.w +mad.f32 r8.x, r2.x, r2.w, (neg)r8.x (rpt1)nop -mov.f32f32 r13.y, r0.x -mov.f32f32 r13.x, r0.y +mul.f r4.w, r4.w, r8.w +mul.f r2.w, r1.w, r8.w +mul.f r1.w, r8.x, r8.w end -; VERT: outputs: r11.z (0:0) r7.x (5:9) r3.x (5:10) r9.z (5:11) r1.y (5:12) r12.z (5:13) r8.z (5:14) r10.z (5:15) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=f,il=16,b=0) r2.z (0:0,cm=3,il=20,b=0) -; VERT: 304 instructions, 0 half, 14 full -; pos: r11.z +nop +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=f,il=16,b=0) r5.z (0:0,cm=3,il=20,b=0) +; VERT: 201 instructions, 0 half, 10 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm index 82c1168..8ea6dd3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm @@ -4,990 +4,677 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -(sy)(ss)bary.f r0.x, 7, r1.x -bary.f r0.y, 8, r1.x +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x43160000, 0x3bdb8bac +@const(c15.x) 0x41800000, 0x3f700000, 0x3d800000, 0x3f233333 +@const(c16.x) 0x40000000, 0xbf800000, 0xbb449ba6, 0xbf000000 +@const(c17.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x3fb8aa65 +@const(c18.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)bary.f r0.x, 20, r1.x +bary.f r0.y, 7, r1.x +bary.f r1.z, 8, r1.x add.f r0.w, r0.w, c14.y -bary.f r1.z, 9, r1.x -mov.f32f32 r0.x, r0.x -bary.f r1.w, 20, r1.x -add.f r2.x, r0.y, c16.w -add.f r2.y, r1.z, c16.w -bary.f r2.z, 18, r1.x -mul.f r2.w, r1.w, r0.x -bary.f r3.x, 15, r1.x -floor.f r3.y, r2.x +bary.f r1.w, 9, r1.x +mul.f r2.x, r0.x, r0.y +bary.f r2.y, 21, r1.x +bary.f r2.z, 15, r1.x +add.f r2.w, r1.z, c16.w +add.f r3.y, r1.w, c16.w rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.z, r2.y -mov.f32f32 r3.x, r3.x -bary.f r3.w, 21, r1.x -add.f r2.x, r2.x, (neg)r3.y +mad.f32 r2.x, r2.y, r2.z, r2.x +bary.f r3.z, 22, r1.x +bary.f r3.w, 3, r1.x +floor.f r4.x, r2.w (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.y, (neg)r3.z -mad.f32 r2.y, r3.w, r3.x, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -absneg.f r2.w, (neg)c11.x -mov.f32f32 r2.y, r2.y -bary.f r3.y, 3, r1.x -mul.f r3.z, c14.x, r2.x -mul.f r2.w, r2.w, c11.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.y, r3.y -bary.f r4.x, 22, r1.x -mov.f32f32 r3.z, r3.z -mul.f r2.w, r2.w, r0.z -mul.f r4.y, c14.x, r0.w -mad.f32 r2.y, r4.x, r3.y, r2.y -add.f r0.y, r0.y, (neg)r3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, r4.y -mov.f32f32 r2.y, r2.y -bary.f r4.y, 12, r1.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, r2.w, r0.z -add.f r1.z, r1.z, (neg)r3.z -mul.f r2.w, r1.w, r4.y -bary.f r3.z, 13, r1.x -add.f r4.z, c17.y, r0.y +(ss)absneg.f r0.w, (neg)c11.x +mad.f32 r2.x, r3.z, r3.w, r2.x +add.f r2.w, r2.w, (neg)r4.x +floor.f r4.x, r3.y +mul.f r0.w, r0.w, c11.x +mov.f32f32 r4.y, r2.x +bary.f r4.z, 12, r1.x +mov.f32f32 r4.w, r2.w +mul.f r0.w, r0.w, r0.z mov.f32f32 r0.z, r0.z -add.f r0.y, c17.x, r0.y -mad.f32 r2.w, r3.w, r3.z, r2.w -mov.f32f32 r4.z, r4.z +mul.f r5.x, r0.x, r4.z +bary.f r5.y, 13, r1.x +mul.f r5.z, c14.x, r4.w +mul.f r0.z, r0.w, r0.z +add.f r0.w, r3.y, (neg)r4.x +mad.f32 r3.y, r2.y, r5.y, r5.x +bary.f r4.x, 14, r1.x +add.f r1.z, r1.z, (neg)r5.z mul.f r0.z, r0.z, c17.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -bary.f r4.w, 14, r1.x -mul.f r4.z, r4.z, c5.z -mov.f32f32 r0.z, r0.z -mul.f r0.y, r0.y, c5.z -mad.f32 r2.w, r4.x, r4.w, r2.w -mov.f32f32 r5.x, r4.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.y, r0.y -mul.f r5.z, r2.w, r2.w -mov.f32f32 r5.w, r5.x -mad.f32 r5.x, r2.y, r2.y, r5.z -mov.f32f32 r1.z, r1.z +mov.f32f32 r5.x, r0.w +mad.f32 r3.y, r3.z, r4.x, r3.y +mov.f32f32 r5.z, r1.z +add.f r1.z, c17.x, r1.z +mul.f r5.w, c14.x, r5.x +mul.f r6.x, r3.y, r3.y +add.f r5.z, c17.y, r5.z +mad.f32 r2.x, r2.x, r4.y, r6.x +bary.f r6.x, 4, r1.x exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.x, r5.x -bary.f r5.z, 4, r1.x -add.f r6.x, c17.y, r1.z -add.f r6.y, c19.y, (neg)r0.z -mov.f32f32 r0.y, r0.y -mul.f r6.z, r1.w, r5.z -bary.f r6.w, 5, r1.x -mov.f32f32 r6.x, r6.x +(ss)mov.f32f32 r6.y, r0.z +mul.f r6.z, r5.z, c5.z +add.f r1.w, r1.w, (neg)r5.w +mul.f r5.z, r0.x, r6.x +bary.f r5.w, 5, r1.x +mov.f32f32 r7.y, r6.z +add.f r6.y, c19.y, (neg)r6.y +mov.f32f32 r6.w, r1.w +mad.f32 r5.z, r2.y, r5.w, r5.z +bary.f r8.x, 6, r1.x mul.f r6.y, r6.y, c11.y -mul.f r0.z, r0.z, c17.x -mad.f32 r6.z, r3.w, r6.w, r6.z -mul.f r7.x, r6.x, c5.w -mov.f32f32 r7.y, r0.y -add.f r0.y, c17.x, r1.z -mov.f32f32 r1.z, r6.z -bary.f r6.z, 6, r1.x -mov.f32f32 r6.x, r7.x +(ss)mul.f r0.z, r0.z, c17.x +add.f r6.w, c17.y, r6.w +mad.f32 r5.z, r3.z, r8.x, r5.z +mul.f r8.y, r1.z, c5.z +add.f r1.z, c17.x, r1.w add.f r0.z, r0.z, r6.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, r4.x, r6.z, r1.z -mov.f32f32 r6.x, r6.x -bary.f r6.y, 10, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.z, r1.z -mul.f r0.y, r0.y, c5.w -add.f r8.x, r6.y, c16.z -mov.f32f32 r8.y, r5.y -mad.f32 r5.x, r1.z, r1.z, r5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r5.y, r8.x -mov.f32f32 r7.z, r0.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r9.x, r4.z -mov.f32f32 r6.y, r5.y -rsq r4.z, r5.x -(ss)mov.f32f32 r4.z, r4.z +mov.f32f32 r1.w, r5.z +mul.f r9.y, r6.w, c5.w +mov.f32f32 r9.x, r8.y +mul.f r8.z, r1.z, c5.w +mad.f32 r1.z, r5.z, r1.w, r2.x +mov.f32f32 r7.z, r9.y max.f r0.z, r0.z, c14.y -(ss)mov.f32f32 r5.x, r7.z -mov.f32f32 r8.z, r7.x -mul.f r1.z, r1.z, r4.z +bary.f r2.x, 10, r1.x +mov.f32f32 r6.w, r8.z +add.f r4.w, c16.x, (neg)r4.w +mul.f r0.x, r0.x, r0.x +rsq r1.z, r1.z +(ss)mov.f32f32 r5.z, r1.z +add.f r8.w, r2.x, c16.z min.f r0.z, r0.z, c17.x -sam.s (f32)(x)r5.y, r5.w, s#4, t#4 -(sy)mov.f32f32 r5.y, r5.y -mov.f32f32 r7.z, r5.x -mov.f32f32 r1.z, r1.z -add.f r5.x, c19.y, (neg)r0.z -(ss)add.f r5.w, c19.y, (neg)r0.z -add.f r6.x, c19.y, (neg)r0.z -mul.f r1.z, r1.z, c15.x -mov.f32f32 r5.y, r5.y -add.f r6.y, c16.x, (neg)r2.x -mul.f r5.x, r5.x, c10.z -mov.f32f32 r1.z, r1.z -mul.f r5.w, r5.w, c10.y -mul.f r6.x, r6.x, c10.x -mov.f32f32 r6.y, r6.y -add.f r7.x, c16.x, (neg)r0.w -mov.f32f32 r7.w, r8.x -mul.f r2.y, r2.y, r4.z -rcp r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mul.f r2.w, r2.w, r4.z -mov.f32f32 r4.z, r7.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r2.w, r2.w -mul.f r7.x, r6.y, r4.z -mov.f32f32 r2.y, r2.y -mul.f r1.w, r1.w, r1.w -absneg.f r2.w, (neg)r2.w -mad.f32 r1.w, r3.w, r3.w, r1.w -mul.f r3.w, r7.x, r5.y -mov.f32f32 r5.y, r8.x -sam.s (f32)(x)r7.x, r7.y, s#4, t#4 -(sy)mov.f32f32 r7.x, r7.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r4.x, r4.x, r1.w -mov.f32f32 r8.w, r5.y -mov.f32f32 r4.x, r2.z -add.f r2.x, r2.x, c17.x +(ss)mul.f r1.z, r3.y, r1.z +mul.f r1.w, r1.w, r5.z +mov.f32f32 r7.w, r8.w +add.f r2.x, c19.y, (neg)r0.z +add.f r3.y, c19.y, (neg)r0.z +mul.f r1.w, r1.w, c15.x +add.f r6.y, c19.y, (neg)r0.z +mov.f32f32 r9.z, r8.w +mov.f32f32 r7.x, r8.w +nop +sam.s (f32)(x)r9.w, r7.y, s#4, t#4 +(ss)mul.f r7.y, r2.x, c10.z +mov.f32f32 r2.x, r4.w +rcp r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +mul.f r4.y, r4.y, r5.z +mad.f32 r0.x, r2.y, r2.y, r0.x +add.f r2.y, c16.x, (neg)r5.x +mad.f32 r0.x, r3.z, r3.z, r0.x +mul.f r3.y, r3.y, c10.y +mul.f r3.z, r6.y, c10.x +mov.f32f32 r5.x, r2.y +absneg.f r1.z, (neg)r1.z +sam.s (f32)(x)r10.x, r9.x, s#4, t#4 +sam.s (f32)(x)r6.y, r6.z, s#4, t#4 +sam.s (f32)(x)r8.y, r8.y, s#4, t#4 +add.f r2.w, r2.w, c17.x add.f r0.w, r0.w, c17.x -bary.f r5.y, 16, r1.x -(ss)mov.f32f32 r7.y, r4.x -sqrt r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -sam.s (f32)(x)r4.x, r8.y, s#4, t#4 -(sy)mov.f32f32 r4.x, r4.x -mul.f r4.z, r2.x, r4.z -mov.f32f32 r9.y, r0.y -add.f r0.y, c14.z, (neg)r1.w -mov.f32f32 r1.w, r8.x -bary.f r7.w, 19, r1.x -mul.f r6.y, r6.y, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r3.w, r4.z, r4.x, r3.w -mov.f32f32 r9.z, r1.w -mov.f32f32 r1.w, r7.w -mul.f r0.y, c12.z, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r5.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r0.y, r0.y -sam.s (f32)(x)r1.w, r9.x, s#4, t#4 -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r8.x, r4.x -bary.f r4.x, 17, r1.x -mul.f r0.y, r0.y, c14.w -mad.f32 r1.w, r6.y, r1.w, r3.w -(ss)nop -sam (f32)(w)r8.y, r7.y, s#2, t#2 -(sy)cmps.f.lt r3.w, r9.x, c15.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r2.x, r0.w -cov.u32f32 r2.x, r3.w -mul.f r2.w, r2.w, r0.y -mul.f r0.y, r2.y, r0.y -mad.f32 r0.w, r0.w, r7.x, r1.w -cmps.f.ne r1.w, r2.x, c14.y -mov.f32f32 r2.x, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, c15.z -mul.f r2.x, r2.x, r1.z -mul.f r0.y, r0.y, r1.z +sqrt r0.x, r0.x +(ss)add.f r0.x, c14.z, (neg)r0.x +mul.f r2.x, r2.x, r5.x +mul.f r2.y, r2.w, r2.y +mul.f r4.w, r4.w, r0.w +mul.f r0.x, c12.z, r0.x +(sy)mul.f r2.x, r2.x, r9.w +bary.f r6.z, 18, r1.x +mul.f r0.w, r2.w, r0.w +mul.f r0.x, r0.x, c14.w +mad.f32 r2.x, r2.y, r10.x, r2.x +bary.f r6.w, 19, r1.x +bary.f r8.z, 16, r1.x +mov.f32f32 r2.y, r0.x +mad.f32 r2.x, r4.w, r6.y, r2.x +mul.f r0.x, r1.z, r0.x +mad.f32 r0.w, r0.w, r8.y, r2.x +mul.f r1.z, r4.y, r2.y +sam (f32)(w)r9.x, r6.z, s#2, t#2 +bary.f r8.w, 17, r1.x +mul.f r0.x, r0.x, r1.w +(sy)cmps.f.lt r1.w, r9.w, c15.y +mul.f r1.z, r1.z, r7.z mul.f r0.w, c17.z, r0.w -mov.f32f32 r1.z, c14.y -mov.f32f32 r2.x, r2.x +mov.f32f32 r2.x, r0.x +cov.u32f32 r1.w, r1.w +mov.f32f32 r2.y, r1.z +cmps.f.lt r2.w, r9.w, c15.y +mov.f32f32 r4.y, r0.w +cmps.f.ne r1.w, r1.w, c14.y +mov.f32f32 r4.w, c14.y +mov.f32f32 r5.x, c14.y +cov.u32f32 r2.w, r2.w +sam (f32)(w)r8.y, r8.z, s#1, t#1 +(sy)cmps.f.lt r5.z, r9.x, c18.x +sel.b32 r1.z, r1.z, r1.w, r4.w +sel.b32 r0.x, r0.x, r1.w, r5.x +(rpt1)nop +add.f r1.w, r6.w, r1.z +add.f r1.z, r6.z, r0.x +cmps.f.ne r0.x, r2.w, c14.y +cov.u32f32 r2.w, r5.z +mov.f32f32 r4.w, c15.z +mov.f32f32 r5.x, c14.y mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -sel.b32 r1.z, r2.y, r1.w, r1.z -mov.f32f32 r1.w, r2.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.w, r0.y -mov.f32f32 r3.w, r0.y -mov.f32f32 r1.w, r1.w -cmps.f.lt r4.z, r9.x, c15.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w +mov.f32f32 r5.z, r2.z +sam (f32)(w)r6.y, r1.z, s#2, t#2 +cmps.f.ne r2.z, r2.w, c14.y +sel.b32 r0.x, r4.w, r0.x, r5.x +mov.f32f32 r2.w, c14.y mov.f32f32 r3.w, r3.w -cov.u32f32 r4.z, r4.z -mov.f32f32 r5.y, r2.x -mov.f32f32 r6.y, r0.y -mov.f32f32 r7.x, r2.x -cmps.f.ne r4.z, r4.z, c14.y -(ss)mov.f32f32 r7.y, c14.y -mov.f32f32 r7.z, c14.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.y, r6.y -sel.b32 r1.w, r1.w, r4.z, r7.y -sel.b32 r2.y, r2.y, r4.z, r7.z -mov.f32f32 r4.z, r7.x -mov.f32f32 r7.x, r0.y -add.f r1.w, r2.z, r1.w -add.f r2.y, r7.w, r2.y -mov.f32f32 r2.z, r2.x -mov.f32f32 r7.y, r0.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r7.w, r2.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r8.y, r7.z -mov.f32f32 r8.z, r7.w -mov.f32f32 r7.y, r7.y -mov.f32f32 r7.z, r2.x -mov.f32f32 r7.w, r0.y -mov.f32f32 r8.w, r2.x -mov.f32f32 r9.x, r0.y -mov.f32f32 r9.y, r2.x -sam (f32)(w)r9.z, r8.y, s#2, t#2 -add.f r1.z, c15.y, (neg)r1.z -mov.f32f32 r7.z, r7.z -mov.f32f32 r7.w, r7.w -(ss)mov.f32f32 r8.z, r8.w -(sy)cmps.f.lt r8.y, r10.y, r1.z -cmps.f.lt r8.w, r10.y, r1.z -mov.f32f32 r9.x, r9.x -mov.f32f32 r9.y, r9.y +mov.f32f32 r4.w, (0.000000) +add.f r0.x, c15.y, (neg)r0.x +sel.b32 r5.x, r2.w, r2.z, r9.x +bary.f r6.y, 2, r1.x +(ss)bary.f r6.z, 1, r1.x +(sy)cmps.f.lt r2.z, r7.x, r0.x +cmps.f.lt r2.w, r7.x, r0.x +bary.f r6.w, 23, r1.x +bary.f r7.x, 24, r1.x +cov.u32f32 r2.z, r2.z +cov.u32f32 r2.w, r2.w +bary.f r7.z, 25, r1.x +bary.f (ei)r1.x, 0, r1.x +cmps.f.ne r1.y, r2.z, c14.y +mov.f32f32 r2.z, c14.y +cmps.f.ne r2.w, r2.w, c14.y +mov.f32f32 r7.w, c14.y +mov.f32f32 r8.y, c15.z +sel.b32 r2.z, r2.x, r1.y, r2.z +mov.f32f32 r8.z, c14.y +sel.b32 r1.y, r2.y, r1.y, r7.w +mov.f32f32 r7.w, c14.y +add.f r1.z, r1.z, r2.z +sel.b32 r2.z, r8.y, r2.w, r8.z +add.f r8.z, r1.w, r1.y +mov.f32f32 r1.y, c14.y +mov.f32f32 r8.y, r1.z +add.f r0.x, r0.x, (neg)r2.z +mov.f32f32 r1.w, r8.z +mov.f32f32 r2.z, c14.y +mov.f32f32 r2.w, c15.z +mov.f32f32 r8.w, c14.y +mov.f32f32 r9.x, c14.y +sam (f32)(w)r9.y, r8.y, s#2, t#2 +(sy)(ss)cmps.f.lt r8.y, r10.x, r0.x +mov.f32f32 r0.x, r0.x +mov.f32f32 r8.z, c14.y +mov.f32f32 r9.y, c15.z cov.u32f32 r8.y, r8.y -cov.u32f32 r8.w, r8.w -mov.f32f32 r9.z, r0.y -mov.f32f32 r9.w, r2.x -mov.f32f32 r8.y, r8.y -cmps.f.ne r8.w, r8.w, c14.y -mov.f32f32 r10.x, c15.z -mov.f32f32 r10.y, c14.y +cmps.f.lt r9.z, r10.x, r0.x +mov.f32f32 r9.w, c14.y +mov.f32f32 r10.x, c14.y cmps.f.ne r8.y, r8.y, c14.y -mov.f32f32 r10.z, r2.x -mov.f32f32 r10.w, r0.y -sel.b32 r8.w, r10.x, r8.w, r10.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r10.x, r10.z mov.f32f32 r10.y, c14.y -mov.f32f32 r10.z, r10.w +cov.u32f32 r9.z, r9.z +mov.f32f32 r10.z, c14.y mov.f32f32 r10.w, c14.y -add.f r1.z, r1.z, (neg)r8.w -sel.b32 r8.w, r10.x, r8.y, r10.y -mov.f32f32 r9.w, r9.w -mov.f32f32 r10.x, r0.y -mov.f32f32 r10.y, r2.x -add.f r1.w, r1.w, r8.w -sel.b32 r8.y, r10.z, r8.y, r10.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r8.w, r10.x -mov.f32f32 r1.w, r1.w -add.f r2.y, r2.y, r8.y -mov.f32f32 r10.x, r10.y -mov.f32f32 r8.y, r0.y -mov.f32f32 r10.y, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r10.z, r2.x -mov.f32f32 r10.w, r0.y -mov.f32f32 r11.x, r10.y -mov.f32f32 r10.y, r2.y -mov.f32f32 r11.z, r8.y -mov.f32f32 r10.z, r10.z -mov.f32f32 r10.w, r10.w -mov.f32f32 r11.y, r10.y -mov.f32f32 r8.y, r2.x -mov.f32f32 r10.y, r0.y -mov.f32f32 r11.w, r2.x -mov.f32f32 r12.x, r0.y -mov.f32f32 r12.y, r2.x -mov.f32f32 r12.z, r0.y -sam (f32)(w)r12.w, r11.x, s#2, t#2 -(sy)(ss)cmps.f.lt r11.x, r13.z, r1.z -cmps.f.lt r11.y, r13.z, r1.z -mov.f32f32 r12.w, r8.y -mov.f32f32 r10.y, r10.y -cov.u32f32 r8.y, r11.x -cov.u32f32 r11.x, r11.y -mov.f32f32 r11.y, r11.w -mov.f32f32 r11.w, r12.x -mov.f32f32 r8.y, r8.y -cmps.f.ne r11.x, r11.x, c14.y -mov.f32f32 r12.x, c15.z +sel.b32 r10.y, r2.x, r8.y, r10.y +cmps.f.ne r9.z, r9.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r10.z +mov.f32f32 r10.z, c15.z +add.f r1.z, r1.z, r10.y +mov.f32f32 r10.y, c14.y +add.f r11.y, r1.w, r8.y +mov.f32f32 r1.w, c15.z +mov.f32f32 r11.x, r1.z +sel.b32 r8.y, r10.z, r9.z, r10.y +mov.f32f32 r9.z, r11.y +mov.f32f32 r10.y, c14.y +mov.f32f32 r10.z, c14.y +mov.f32f32 r11.z, c14.y +mov.f32f32 r11.w, c15.z +sam (f32)(w)r12.x, r11.x, s#2, t#2 +add.f r0.x, r0.x, (neg)r8.y +mov.f32f32 r8.y, c14.y +(ss)mov.f32f32 r11.x, c14.y +mov.f32f32 r11.y, c14.y +(sy)cmps.f.lt r12.x, r12.w, r0.x +mov.f32f32 r0.x, r0.x +mov.f32f32 r12.y, c15.z +mov.f32f32 r12.z, c14.y +cov.u32f32 r12.x, r12.x +cmps.f.lt r12.w, r12.w, r0.x mov.f32f32 r13.x, c14.y -cmps.f.ne r8.y, r8.y, c14.y -mov.f32f32 r13.y, r2.x -mov.f32f32 r12.z, r12.z +mov.f32f32 r13.y, c14.y +cmps.f.ne r12.x, r12.x, c14.y mov.f32f32 r13.z, c14.y -sel.b32 r11.x, r12.x, r11.x, r13.x -mov.f32f32 r12.x, r13.y -mov.f32f32 r13.x, c14.y -sel.b32 r12.z, r12.z, r8.y, r13.z -add.f r1.z, r1.z, (neg)r11.x -mov.f32f32 r11.x, r12.y -sel.b32 r8.y, r12.x, r8.y, r13.x -add.f r2.y, r2.y, r12.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r12.x, r2.x -add.f r1.w, r1.w, r8.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r8.y, r4.x -bary.f r4.x, 23, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r12.y, r2.y -mov.f32f32 r12.x, r12.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r12.z, r1.w -mov.f32f32 r13.y, r12.y -sam (f32)(w)r13.z, r8.x, s#1, t#1 -(sy)(ss)cmps.f.lt r8.x, r14.y, c18.x -mov.f32f32 r8.y, r14.y -mov.f32f32 r13.x, r12.z -bary.f r12.y, 24, r1.x -bary.f r12.z, 25, r1.x -mov.f32f32 r13.z, (0.000000) -cov.u32f32 r8.x, r8.x -mov.f32f32 r8.y, r8.y -mov.f32f32 r12.y, r12.y -sam (f32)(w)r13.w, r13.x, s#2, t#2 -(sy)(ss)cmps.f.lt r13.x, r14.z, r1.z -cmps.f.lt r13.y, r14.z, r1.z -cmps.f.ne r8.x, r8.x, c14.y -nop -cov.u32f32 r13.x, r13.x -cov.u32f32 r13.y, r13.y +cov.u32f32 r12.w, r12.w mov.f32f32 r13.w, c14.y -mov.f32f32 r12.z, r12.z -mov.f32f32 r13.x, r13.x -cmps.f.ne r13.y, r13.y, c14.y mov.f32f32 r14.x, c15.z -mov.f32f32 r14.y, c14.y -cmps.f.ne r13.x, r13.x, c14.y -mov.f32f32 r14.z, c14.y -mov.f32f32 r14.w, c14.y -sel.b32 r13.y, r14.x, r13.y, r14.y -sel.b32 r8.x, r13.w, r8.x, r8.y -sel.b32 r8.y, r12.x, r13.x, r14.z -sel.b32 r11.w, r11.w, r13.x, r14.w -add.f r1.z, r1.z, (neg)r13.y -bary.f r12.x, 2, r1.x -add.f r1.w, r1.w, r8.y -add.f r2.y, r2.y, r11.w -mov.f32f32 r1.z, r1.z -bary.f r8.y, 1, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -bary.f (ei)r1.x, 0, r1.x -mov.f32f32 r1.y, c14.y -mov.f32f32 r11.w, r1.w -mov.f32f32 r13.x, r2.y -mov.f32f32 r13.y, c14.y +sel.b32 r13.z, r2.x, r12.x, r13.z +cmps.f.ne r12.w, r12.w, c14.y +sel.b32 r12.x, r2.y, r12.x, r13.w +mov.f32f32 r13.w, c15.z +add.f r1.z, r1.z, r13.z +mov.f32f32 r13.z, c14.y +add.f r14.z, r9.z, r12.x +mov.f32f32 r9.z, c14.y +mov.f32f32 r14.y, r1.z +sel.b32 r12.x, r13.w, r12.w, r13.z +mov.f32f32 r12.w, r14.z +mov.f32f32 r13.z, c14.y mov.f32f32 r13.w, c14.y -mov.f32f32 r14.x, r11.w -mov.f32f32 r14.y, r13.x -mov.f32f32 r11.w, c15.z -mov.f32f32 r13.x, c14.y -mov.f32f32 r14.z, c14.y -mov.f32f32 r14.w, c14.y -mov.f32f32 r15.x, c15.z -mov.f32f32 r15.y, c14.y -sam (f32)(w)r15.z, r14.x, s#2, t#2 -(sy)(ss)cmps.f.lt r14.x, r16.y, r1.z -cmps.f.lt r14.y, r16.y, r1.z +mov.f32f32 r14.w, c15.z +mov.f32f32 r15.x, c14.y +sam (f32)(w)r15.y, r14.y, s#2, t#2 +add.f r0.x, r0.x, (neg)r12.x +mov.f32f32 r12.x, c14.y +(ss)mov.f32f32 r14.y, c14.y +mov.f32f32 r14.z, c15.z +(sy)cmps.f.lt r15.y, r16.x, r0.x +mov.f32f32 r0.x, r0.x mov.f32f32 r15.z, c14.y mov.f32f32 r15.w, c14.y -cov.u32f32 r14.x, r14.x -cov.u32f32 r14.y, r14.y -mov.f32f32 r16.x, c15.z +cov.u32f32 r15.y, r15.y +cmps.f.lt r16.x, r16.x, r0.x mov.f32f32 r16.y, c14.y -mov.f32f32 r14.x, r14.x -cmps.f.ne r14.y, r14.y, c14.y mov.f32f32 r16.z, c15.z +cmps.f.ne r15.y, r15.y, c14.y mov.f32f32 r16.w, c14.y -cmps.f.ne r14.x, r14.x, c14.y +cov.u32f32 r16.x, r16.x mov.f32f32 r17.x, c14.y mov.f32f32 r17.y, c14.y -sel.b32 r14.y, r16.z, r14.y, r16.w -mov.f32f32 r16.z, c14.y -sel.b32 r11.x, r11.x, r14.x, r17.x -sel.b32 r10.y, r10.y, r14.x, r17.y -add.f r1.z, r1.z, (neg)r14.y -mov.f32f32 r14.x, c14.y -add.f r1.w, r1.w, r11.x -add.f r2.y, r2.y, r10.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r10.y, c15.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r11.x, c14.y -mov.f32f32 r14.y, c14.y -mov.f32f32 r16.w, r1.w -mov.f32f32 r17.x, r2.y -mov.f32f32 r17.y, c14.y -mov.f32f32 r17.z, c15.z -mov.f32f32 r17.w, r16.w -mov.f32f32 r18.x, r17.x +sel.b32 r16.w, r2.x, r15.y, r16.w +cmps.f.ne r16.x, r16.x, c14.y +sel.b32 r15.y, r2.y, r15.y, r17.x +mov.f32f32 r17.x, c15.z +add.f r1.z, r1.z, r16.w mov.f32f32 r16.w, c14.y -mov.f32f32 r17.x, c14.y -mov.f32f32 r18.y, c14.y -mov.f32f32 r18.z, c15.z -mov.f32f32 r18.w, c14.y -mov.f32f32 r19.x, c14.y -sam (f32)(w)r19.y, r17.w, s#2, t#2 -(sy)(ss)cmps.f.lt r17.w, r20.x, r1.z -cmps.f.lt r18.x, r20.x, r1.z -mov.f32f32 r19.y, c14.y -mov.f32f32 r19.z, c15.z -cov.u32f32 r17.w, r17.w -cov.u32f32 r18.x, r18.x -mov.f32f32 r19.w, c14.y -mov.f32f32 r20.x, c14.y -mov.f32f32 r17.w, r17.w -cmps.f.ne r18.x, r18.x, c14.y -mov.f32f32 r20.y, c15.z -mov.f32f32 r20.z, c14.y -cmps.f.ne r17.w, r17.w, c14.y -mov.f32f32 r20.w, c14.y -mov.f32f32 r21.x, c14.y -sel.b32 r18.x, r20.y, r18.x, r20.z -mov.f32f32 r20.y, c14.y -sel.b32 r11.y, r11.y, r17.w, r20.w -sel.b32 r10.w, r10.w, r17.w, r21.x -add.f r1.z, r1.z, (neg)r18.x -mov.f32f32 r17.w, c15.z -add.f r1.w, r1.w, r11.y -add.f r2.y, r2.y, r10.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r10.w, c14.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r11.y, c14.y +add.f r17.w, r12.w, r15.y +mov.f32f32 r12.w, c14.y +mov.f32f32 r17.z, r1.z +sel.b32 r15.y, r17.x, r16.x, r16.w +mov.f32f32 r16.x, r17.w +mov.f32f32 r16.w, c14.y +mov.f32f32 r17.x, c15.z mov.f32f32 r18.x, c14.y -mov.f32f32 r20.z, r1.w -mov.f32f32 r20.w, r2.y -mov.f32f32 r21.x, c15.z -mov.f32f32 r21.y, c14.y -mov.f32f32 r21.z, r20.z -mov.f32f32 r21.w, r20.w -mov.f32f32 r20.z, c14.y -(rpt4)nop -sam (f32)(w)r21.z, r21.z, s#2, t#2 -(sy)cmps.f.lt r20.w, r22.y, r1.z -(ss)cmps.f.lt r21.z, r22.y, r1.z -(rpt1)nop -cov.u32f32 r20.w, r20.w -cov.u32f32 r21.z, r21.z +mov.f32f32 r18.y, c14.y +sam (f32)(w)r18.z, r17.z, s#2, t#2 +add.f r0.x, r0.x, (neg)r15.y +(rpt2)nop +(sy)cmps.f.lt r15.y, r19.y, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r20.w, r20.w -cmps.f.ne r21.z, r21.z, c14.y +cov.u32f32 r15.y, r15.y +(ss)cmps.f.lt r17.z, r19.y, r0.x (rpt1)nop -cmps.f.ne r20.w, r20.w, c14.y -sel.b32 r18.x, r21.x, r21.z, r18.x +cmps.f.ne r15.y, r15.y, c14.y +cov.u32f32 r17.z, r17.z (rpt1)nop -sel.b32 r12.w, r12.w, r20.w, r20.z -sel.b32 r11.z, r11.z, r20.w, r21.y -add.f r1.z, r1.z, (neg)r18.x +sel.b32 r17.w, r2.x, r15.y, r18.y +cmps.f.ne r17.z, r17.z, c14.y +sel.b32 r15.y, r2.y, r15.y, r18.x nop -add.f r1.w, r1.w, r12.w -add.f r2.y, r2.y, r11.z -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r17.w +sel.b32 r16.w, r17.x, r17.z, r16.w +add.f r17.w, r16.x, r15.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r11.z, r1.w -mov.f32f32 r12.w, r2.y -(rpt1)nop -mov.f32f32 r20.z, r11.z -mov.f32f32 r20.w, r12.w -(rpt5)nop -sam (f32)(w)r20.z, r20.z, s#2, t#2 -(sy)cmps.f.lt r11.z, r21.y, r1.z -cmps.f.lt r12.w, r21.y, r1.z +mov.f32f32 r17.z, r1.z +add.f r0.x, r0.x, (neg)r16.w +mov.f32f32 r15.y, r17.w +(rpt3)nop +sam (f32)(w)r17.z, r17.z, s#2, t#2 +(sy)cmps.f.lt r16.x, r18.y, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -cov.u32f32 r11.z, r11.z -cov.u32f32 r12.w, r12.w +cov.u32f32 r16.x, r16.x +cmps.f.lt r16.w, r18.y, r0.x (rpt1)nop -mov.f32f32 r11.z, r11.z -cmps.f.ne r12.w, r12.w, c14.y +cmps.f.ne r16.x, r16.x, c14.y +cov.u32f32 r16.w, r16.w (rpt1)nop -cmps.f.ne r11.z, r11.z, c14.y -sel.b32 r12.w, r17.w, r12.w, r20.y -(rpt1)nop -sel.b32 r10.z, r10.z, r11.z, r11.y -sel.b32 r8.w, r8.w, r11.z, r10.w -add.f r1.z, r1.z, (neg)r12.w +sel.b32 r12.w, r2.x, r16.x, r12.w +cmps.f.ne r16.w, r16.w, c14.y +sel.b32 r16.x, r2.y, r16.x, r17.y nop -add.f r1.w, r1.w, r10.z -add.f r2.y, r2.y, r8.w -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r12.w +sel.b32 r12.w, r16.z, r16.w, r16.y +add.f r16.y, r15.y, r16.x nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r8.w, r1.w -mov.f32f32 r10.z, r2.y -(rpt1)nop -mov.f32f32 r11.y, r8.w -mov.f32f32 r11.z, r10.z -(rpt5)nop -sam (f32)(w)r20.y, r11.y, s#2, t#2 -(sy)cmps.f.lt r8.w, r21.x, r1.z -cmps.f.lt r10.z, r21.x, r1.z -(rpt1)nop -cov.u32f32 r8.w, r8.w -cov.u32f32 r10.z, r10.z +mov.f32f32 r16.x, r1.z +add.f r0.x, r0.x, (neg)r12.w +mov.f32f32 r12.w, r16.y +(rpt3)nop +sam (f32)(w)r16.x, r16.x, s#2, t#2 +(sy)cmps.f.lt r15.y, r16.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r8.w, r8.w -cmps.f.ne r10.z, r10.z, c14.y +cov.u32f32 r15.y, r15.y +(ss)cmps.f.lt r16.x, r16.w, r0.x (rpt1)nop -cmps.f.ne r8.w, r8.w, c14.y -sel.b32 r10.z, r19.z, r10.z, r19.y +cmps.f.ne r15.y, r15.y, c14.y +cov.u32f32 r16.x, r16.x (rpt1)nop -sel.b32 r10.x, r10.x, r8.w, r20.x -sel.b32 r8.w, r9.z, r8.w, r19.w -add.f r1.z, r1.z, (neg)r10.z +sel.b32 r15.w, r2.x, r15.y, r15.w +cmps.f.ne r16.x, r16.x, c14.y +sel.b32 r15.y, r2.y, r15.y, r15.z nop -add.f r1.w, r1.w, r10.x -add.f r2.y, r2.y, r8.w -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r15.w +sel.b32 r14.y, r14.z, r16.x, r14.y +add.f r15.z, r12.w, r15.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r8.w, r1.w -mov.f32f32 r9.z, r2.y -(rpt1)nop -mov.f32f32 r10.z, r8.w -mov.f32f32 r10.w, r9.z -(rpt5)nop -sam (f32)(w)r19.y, r10.z, s#2, t#2 -(sy)cmps.f.lt r8.w, r20.x, r1.z -cmps.f.lt r9.z, r20.x, r1.z -(rpt1)nop -cov.u32f32 r8.w, r8.w -cov.u32f32 r9.z, r9.z +mov.f32f32 r15.y, r1.z +add.f r0.x, r0.x, (neg)r14.y +mov.f32f32 r12.w, r15.z +(rpt3)nop +sam (f32)(w)r15.y, r15.y, s#2, t#2 +(sy)cmps.f.lt r14.y, r16.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r8.w, r8.w -cmps.f.ne r9.z, r9.z, c14.y +cov.u32f32 r14.y, r14.y +cmps.f.lt r14.z, r16.x, r0.x (rpt1)nop -cmps.f.ne r8.w, r8.w, c14.y -sel.b32 r9.z, r18.z, r9.z, r18.y +cmps.f.ne r14.y, r14.y, c14.y +cov.u32f32 r14.z, r14.z (rpt1)nop -sel.b32 r9.w, r9.w, r8.w, r19.x -sel.b32 r8.w, r9.x, r8.w, r18.w -add.f r1.z, r1.z, (neg)r9.z +sel.b32 r12.x, r2.x, r14.y, r12.x +cmps.f.ne r14.z, r14.z, c14.y +sel.b32 r14.y, r2.y, r14.y, r15.x nop -add.f r1.w, r1.w, r9.w -add.f r2.y, r2.y, r8.w -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r12.x +sel.b32 r12.x, r14.w, r14.z, r13.w +add.f r14.z, r12.w, r14.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r8.w, r1.w -mov.f32f32 r9.x, r2.y -(rpt1)nop -mov.f32f32 r9.z, r8.w -mov.f32f32 r9.w, r9.x -(rpt5)nop -sam (f32)(w)r17.w, r9.z, s#2, t#2 -(sy)cmps.f.lt r8.w, r18.z, r1.z -cmps.f.lt r9.x, r18.z, r1.z -(rpt1)nop -cov.u32f32 r8.w, r8.w -cov.u32f32 r9.x, r9.x +mov.f32f32 r14.y, r1.z +add.f r0.x, r0.x, (neg)r12.x +mov.f32f32 r12.x, r14.z +(rpt3)nop +sam (f32)(w)r14.y, r14.y, s#2, t#2 +(sy)cmps.f.lt r12.w, r15.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r8.w, r8.w -cmps.f.ne r9.x, r9.x, c14.y +cov.u32f32 r12.w, r12.w +cmps.f.lt r13.w, r15.x, r0.x (rpt1)nop -cmps.f.ne r8.w, r8.w, c14.y -sel.b32 r9.x, r17.z, r9.x, r17.y +cmps.f.ne r12.w, r12.w, c14.y +cov.u32f32 r13.w, r13.w (rpt1)nop -sel.b32 r9.y, r9.y, r8.w, r17.x -sel.b32 r7.w, r7.w, r8.w, r16.w -add.f r1.z, r1.z, (neg)r9.x +sel.b32 r13.z, r2.x, r12.w, r13.z +cmps.f.ne r13.w, r13.w, c14.y +sel.b32 r9.z, r2.y, r12.w, r9.z nop -add.f r1.w, r1.w, r9.y -add.f r2.y, r2.y, r7.w -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r13.z +sel.b32 r12.w, r14.x, r13.w, r13.y +add.f r13.z, r12.x, r9.z nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r7.w, r1.w -mov.f32f32 r8.w, r2.y -(rpt1)nop -mov.f32f32 r9.x, r7.w -mov.f32f32 r9.y, r8.w -(rpt5)nop -(ss)nop -sam (f32)(w)r8.w, r9.x, s#2, t#2 -(sy)cmps.f.lt r7.w, r9.z, r1.z -cmps.f.lt r8.w, r9.z, r1.z -(rpt1)nop -cov.u32f32 r7.w, r7.w -cov.u32f32 r8.w, r8.w +mov.f32f32 r13.y, r1.z +add.f r0.x, r0.x, (neg)r12.w +mov.f32f32 r9.z, r13.z +(rpt3)nop +sam (f32)(w)r13.y, r13.y, s#2, t#2 +(sy)cmps.f.lt r12.x, r14.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r7.w, r7.w -cmps.f.ne r8.w, r8.w, c14.y +cov.u32f32 r12.x, r12.x +cmps.f.lt r12.w, r14.x, r0.x (rpt1)nop -cmps.f.ne r7.w, r7.w, c14.y -sel.b32 r8.w, r10.y, r8.w, r14.x +cmps.f.ne r12.x, r12.x, c14.y +cov.u32f32 r12.w, r12.w (rpt1)nop -sel.b32 r8.z, r8.z, r7.w, r14.y -sel.b32 r7.y, r7.y, r7.w, r11.x -add.f r1.z, r1.z, (neg)r8.w +sel.b32 r13.x, r2.x, r12.x, r13.x +cmps.f.ne r12.w, r12.w, c14.y +sel.b32 r12.x, r2.y, r12.x, r12.z nop -add.f r1.w, r1.w, r8.z -add.f r2.y, r2.y, r7.y -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r13.x +sel.b32 r11.y, r12.y, r12.w, r11.y +add.f r12.y, r9.z, r12.x nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r7.y, r1.w -mov.f32f32 r7.w, r2.y -(rpt1)nop -mov.f32f32 r8.z, r7.y -mov.f32f32 r8.w, r7.w -(rpt5)nop -(ss)nop -sam (f32)(w)r8.z, r8.z, s#2, t#2 -(sy)cmps.f.lt r7.y, r9.y, r1.z -cmps.f.lt r7.w, r9.y, r1.z -(rpt1)nop -cov.u32f32 r7.y, r7.y -cov.u32f32 r7.w, r7.w +mov.f32f32 r12.x, r1.z +add.f r0.x, r0.x, (neg)r11.y +mov.f32f32 r9.z, r12.y +(rpt3)nop +sam (f32)(w)r12.x, r12.x, s#2, t#2 +(sy)cmps.f.lt r11.y, r12.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r7.y, r7.y -cmps.f.ne r7.w, r7.w, c14.y +cov.u32f32 r11.y, r11.y +(ss)cmps.f.lt r12.x, r12.w, r0.x (rpt1)nop -cmps.f.ne r7.y, r7.y, c14.y -sel.b32 r7.w, r16.x, r7.w, r15.w +cmps.f.ne r11.y, r11.y, c14.y +cov.u32f32 r12.x, r12.x (rpt1)nop -sel.b32 r7.z, r7.z, r7.y, r16.z -sel.b32 r7.x, r7.x, r7.y, r16.y -add.f r1.z, r1.z, (neg)r7.w +sel.b32 r11.x, r2.x, r11.y, r11.x +cmps.f.ne r12.x, r12.x, c14.y +sel.b32 r8.y, r2.y, r11.y, r8.y nop -add.f r1.w, r1.w, r7.z -add.f r2.y, r2.y, r7.x -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r11.x +sel.b32 r11.x, r11.w, r12.x, r11.z +add.f r11.z, r9.z, r8.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r7.x, r1.w -mov.f32f32 r7.y, r2.y -(rpt1)nop -mov.f32f32 r7.z, r7.x -mov.f32f32 r7.w, r7.y -(rpt5)nop -sam (f32)(w)r7.x, r7.z, s#2, t#2 -(sy)cmps.f.lt r7.x, r7.w, r1.z -cmps.f.lt r7.y, r7.w, r1.z -(rpt1)nop -cov.u32f32 r7.x, r7.x -cov.u32f32 r7.y, r7.y +mov.f32f32 r11.y, r1.z +add.f r0.x, r0.x, (neg)r11.x +mov.f32f32 r8.y, r11.z +(rpt3)nop +sam (f32)(w)r11.x, r11.y, s#2, t#2 +(sy)cmps.f.lt r9.z, r11.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r7.x, r7.x -cmps.f.ne r7.y, r7.y, c14.y +cov.u32f32 r9.z, r9.z +cmps.f.lt r11.x, r11.w, r0.x (rpt1)nop -cmps.f.ne r7.x, r7.x, c14.y -sel.b32 r7.y, r15.x, r7.y, r14.w +cmps.f.ne r9.z, r9.z, c14.y +cov.u32f32 r11.x, r11.x (rpt1)nop -sel.b32 r2.z, r2.z, r7.x, r15.z -sel.b32 r6.y, r6.y, r7.x, r15.y -add.f r1.z, r1.z, (neg)r7.y +sel.b32 r10.z, r2.x, r9.z, r10.z +cmps.f.ne r11.x, r11.x, c14.y +sel.b32 r9.z, r2.y, r9.z, r10.y nop -add.f r1.w, r1.w, r2.z -add.f r2.y, r2.y, r6.y -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r10.z +sel.b32 r1.w, r1.w, r11.x, r10.w +add.f r10.z, r8.y, r9.z nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y +mov.f32f32 r10.y, r1.z +add.f r0.x, r0.x, (neg)r1.w +mov.f32f32 r1.w, r10.z +(rpt3)nop +sam (f32)(w)r10.y, r10.y, s#2, t#2 +(sy)cmps.f.lt r8.y, r11.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r2.z, r1.w -mov.f32f32 r6.y, r2.y +cov.u32f32 r8.y, r8.y +cmps.f.lt r9.z, r11.x, r0.x (rpt1)nop -mov.f32f32 r7.x, r2.z -mov.f32f32 r7.y, r6.y -(rpt5)nop -(ss)nop -sam (f32)(w)r7.x, r7.x, s#2, t#2 -(sy)cmps.f.lt r2.z, r7.w, r1.z -cmps.f.lt r6.y, r7.w, r1.z +cmps.f.ne r8.y, r8.y, c14.y +cov.u32f32 r9.z, r9.z (rpt1)nop -cov.u32f32 r2.z, r2.z -cov.u32f32 r6.y, r6.y +sel.b32 r10.x, r2.x, r8.y, r10.x +cmps.f.ne r9.z, r9.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r9.w +nop +add.f r1.z, r1.z, r10.x +sel.b32 r8.z, r9.y, r9.z, r8.z +add.f r9.z, r1.w, r8.y +nop +mov.f32f32 r9.y, r1.z +add.f r0.x, r0.x, (neg)r8.z +mov.f32f32 r1.w, r9.z +(rpt3)nop +sam (f32)(w)r9.y, r9.y, s#2, t#2 +(sy)cmps.f.lt r8.y, r10.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r2.z, r2.z -cmps.f.ne r6.y, r6.y, c14.y +cov.u32f32 r8.y, r8.y +cmps.f.lt r8.z, r10.x, r0.x (rpt1)nop -cmps.f.ne r2.z, r2.z, c14.y -sel.b32 r6.y, r11.w, r6.y, r13.w +cmps.f.ne r8.y, r8.y, c14.y +cov.u32f32 r8.z, r8.z (rpt1)nop -sel.b32 r4.z, r4.z, r2.z, r14.z -sel.b32 r2.z, r3.w, r2.z, r13.x -add.f r1.z, r1.z, (neg)r6.y +sel.b32 r9.x, r2.x, r8.y, r9.x +cmps.f.ne r8.z, r8.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r8.w nop -add.f r1.w, r1.w, r4.z -add.f r2.y, r2.y, r2.z -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r9.x +sel.b32 r2.z, r2.w, r8.z, r2.z +add.f r8.z, r1.w, r8.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r2.z, r1.w -mov.f32f32 r3.w, r2.y +mov.f32f32 r8.y, r1.z +add.f r0.x, r0.x, (neg)r2.z +mov.f32f32 r1.w, r8.z +(rpt3)nop +sam (f32)(w)r8.y, r8.y, s#2, t#2 +(sy)cmps.f.lt r2.z, r9.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -(ss)mov.f32f32 r7.x, r2.z -mov.f32f32 r7.y, r3.w -(rpt5)nop -sam (f32)(w)r7.x, r7.x, s#2, t#2 -(sy)cmps.f.lt r2.z, r7.w, r1.z -(rpt2)nop cov.u32f32 r2.z, r2.z (rpt2)nop cmps.f.ne r2.z, r2.z, c14.y (rpt2)nop -sel.b32 r3.w, r5.y, r2.z, r13.y -sel.b32 r1.y, r2.w, r2.z, r1.y +sel.b32 r1.y, r2.x, r2.z, r1.y +sel.b32 r2.z, r2.y, r2.z, r7.w (rpt1)nop -add.f r1.w, r1.w, r3.w -add.f r1.y, r2.y, r1.y -(rpt1)nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.y, r1.y +add.f r1.y, r1.z, r1.y +add.f r1.w, r1.w, r2.z (rpt1)nop -add.f r2.y, r1.w, (neg)r2.x +mov.f32f32 r1.z, r1.y mov.f32f32 r2.z, r1.w -add.f r2.w, r1.y, (neg)r0.y -mov.f32f32 r3.w, r1.y -mov.f32f32 r2.y, r2.y -(ss)mov.f32f32 r7.x, r2.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r7.y, r3.w -mov.f32f32 r2.y, r2.y -(rpt2)nop -mov.f32f32 r7.z, r2.y -mov.f32f32 r2.y, r2.z -sam (f32)(w)r8.z, r7.x, s#2, t#2 -(sy)add.f r2.z, c15.z, r9.y -add.f r1.z, r9.y, (neg)r1.z -nop -mov.f32f32 r7.w, r2.y -mov.f32f32 r2.y, r2.z -mov.f32f32 r1.z, r1.z -(rpt3)nop -sam (f32)(w)r7.x, r7.z, s#2, t#2 -(sy)add.f r2.y, r2.y, (neg)r7.w -(rpt2)nop -mov.f32f32 r2.y, r2.y +(rpt1)nop +(ss)add.f r8.y, r1.z, (neg)r2.x +add.f r8.z, r2.z, (neg)r2.y +(rpt1)nop +sam (f32)(w)r8.w, r1.z, s#2, t#2 +(sy)(ss)add.f r1.z, c15.z, r9.z +add.f r0.x, r9.z, (neg)r0.x +(rpt1)nop +sam (f32)(w)r8.y, r8.y, s#2, t#2 +(sy)add.f r1.z, r1.z, (neg)r9.x (rpt5)nop -rcp r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -(rpt2)nop -mul.f r1.z, r1.z, r2.y -(rpt2)nop -mov.f32f32 r1.z, r1.z +rcp r1.z, r1.z +(ss)mul.f r0.x, r0.x, r1.z (rpt2)nop -mul.f r2.x, r2.x, r1.z -mul.f r0.y, r0.y, r1.z +(ss)mov.f32f32 r1.z, r0.x +mul.f r0.x, r2.x, r0.x (rpt1)nop -mov.f32f32 r1.z, r2.x -mov.f32f32 r0.y, r0.y -(rpt1)nop -add.f r1.z, r1.w, (neg)r1.z -add.f r0.y, r1.y, (neg)r0.y +mul.f r1.z, r2.y, r1.z +add.f r1.w, r1.y, (neg)r0.x (rpt1)nop -mov.f32f32 r1.y, r1.z -mov.f32f32 r0.y, r0.y +add.f r2.x, r2.z, (neg)r1.z +mov.f32f32 r1.y, r1.w (rpt1)nop -mov.f32f32 r1.z, r1.y -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r0.y -mov.f32f32 r2.y, r1.z -mov.f32f32 r1.z, r0.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.z, r1.z mov.f32f32 r1.z, r2.x -mov.f32f32 r7.y, r0.y +mov.f32f32 r8.y, r1.y nop -mov.f32f32 r1.w, r1.y -mov.f32f32 r2.x, r1.z -nop -sam (f32)(xyz)r8.z, r2.y, s#2, t#2 -(sy)mad.f32 r0.y, c16.x, r8.w, c16.y -mad.f32 r1.y, c16.x, r8.z, c16.y -sam (f32)(xyzw)r7.x, r7.x, s#0, t#0 -(sy)cmps.f.lt r1.z, r7.w, c15.w -(ss)mov.f32f32 r2.y, r7.w -mov.f32f32 r0.y, r0.y -sam (f32)(xyz)r9.y, r1.w, s#3, t#3 -(sy)(ss)mul.f r1.w, c8.y, r9.z -mul.f r2.x, c8.x, r9.y -cov.u32f32 r1.z, r1.z -absneg.f r0.y, (neg)r0.y -mov.f32f32 r1.y, r1.y -mul.f r2.z, c8.z, r9.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mul.f r2.w, r4.y, r1.y -mul.f r3.z, r3.z, r1.y -mad.f32 r0.x, r0.x, r0.y, r2.w -mad.f32 r2.w, r3.x, r0.y, r3.z -mul.f r1.y, r4.w, r1.y -cmps.f.ne p0.x, r1.z, r13.z -mov.f32f32 r0.x, r0.x +sam (f32)(xyzw)r2.x, r1.w, s#0, t#0 +(sy)cmps.f.lt r0.x, r2.w, c15.w +mov.f32f32 r8.z, r1.z +(rpt1)nop +sam (f32)(xyz)r8.w, r1.y, s#2, t#2 +(sy)(ss)mad.f32 r1.y, c16.x, r8.w, c16.y +cov.u32f32 r0.x, r0.x mad.f32 r1.z, c16.x, r9.x, c16.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.y, r3.y, r0.y, r1.y -mov.f32f32 r1.y, r2.z -mov.f32f32 r1.z, r1.z +mad.f32 r1.w, c16.x, r9.y, c16.y +mov.f32f32 r7.w, r1.y +cmps.f.ne p0.x, r0.x, r4.w +absneg.f r0.x, (neg)r1.z +mul.f r1.y, r4.x, r1.y +mul.f r1.z, r4.z, r7.w +mul.f r4.x, r5.y, r7.w +mad.f32 r0.y, r0.y, r0.x, r1.z +mov.f32f32 r1.z, r1.w +mad.f32 r4.x, r5.z, r0.x, r4.x +mad.f32 r0.x, r3.w, r0.x, r1.y kill p0.x -mov.f32f32 r3.w, r2.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, r5.z, r1.z, r0.x -mad.f32 r2.y, r6.w, r1.z, r2.w -mad.f32 r0.y, r6.z, r1.z, r0.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r6.x, r1.z, r0.y +mad.f32 r1.y, r5.w, r1.z, r4.x +mad.f32 r0.x, r8.x, r1.w, r0.x nop -mul.f r2.y, r0.x, r0.x -nop -mad.f32 r2.y, r1.z, r1.z, r2.y -(rpt2)nop -mov.f32f32 r2.y, r2.y -nop -mad.f32 r2.y, r0.y, r0.y, r2.y +mov.f32f32 r1.z, r0.y +mov.f32f32 r1.w, r1.y +mov.f32f32 r3.w, r0.x +sam (f32)(xyz)r5.y, r8.y, s#3, t#3 +(sy)mul.f r4.x, c8.z, r5.w +mul.f r0.y, r0.y, r1.z +mul.f r4.z, c8.y, r5.z +mad.f32 r0.y, r1.y, r1.w, r0.y +mul.f r1.y, c8.x, r5.y +mad.f32 r0.y, r3.w, r3.w, r0.y (rpt5)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -(rpt2)nop -mul.f r0.x, r0.x, r2.y -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -nop -mul.f r2.y, r0.x, r0.x -mul.f r2.w, (neg)c9.x, r0.x -mad.f32 r2.y, r1.z, r1.z, r2.y -mad.f32 r2.w, (neg)c9.y, r1.z, r2.w +rsq r0.y, r0.y +(ss)mov.f32f32 r3.w, r0.y +mul.f r0.x, r0.x, r0.y (rpt1)nop -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -mad.f32 r2.y, r0.y, r0.y, r2.y -mad.f32 r2.w, (neg)c9.z, r0.y, r2.w -(rpt4)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -max.f r2.w, r2.w, c14.y -(rpt1)nop -mul.f r0.x, r0.x, r2.y -mov.f32f32 r2.w, r2.w -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r0.x, r0.x -mad.f32 r2.y, c8.z, r2.w, (neg)r12.x -mad.f32 r4.y, c8.y, r2.w, (neg)r8.y -mad.f32 r2.w, c8.x, r2.w, (neg)r1.x -mul.f r0.x, r0.x, r4.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.x, r1.z, r12.y, r0.x -mad.f32 r1.z, c12.x, r2.y, r12.x -mad.f32 r2.y, c12.x, r4.x, r8.y -mad.f32 r1.x, c12.x, r2.w, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.x, r0.y, r12.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -max.f r0.x, c14.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.y, r1.z, r3.w +mul.f r1.z, r1.w, r3.w +mov.f32f32 r1.w, r0.x +nop +mov.f32f32 r3.w, r0.y +mul.f r0.y, (neg)c9.x, r0.y +mov.f32f32 r4.w, r1.z +nop +mul.f r5.y, r3.w, r3.w +mad.f32 r0.y, (neg)c9.y, r1.z, r0.y +mad.f32 r1.z, r4.w, r4.w, r5.y +mad.f32 r0.x, (neg)c9.z, r0.x, r0.y +mad.f32 r0.y, r1.w, r1.w, r1.z (rpt5)nop -log2 r0.x, r0.x -(ss)mul.f r0.x, c12.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +rsq r0.y, r0.y +(ss)mov.f32f32 r1.z, r0.y +max.f r0.x, r0.x, c14.y +(ss)mul.f r0.y, r1.w, r0.y +nop +mul.f r1.w, r3.w, r1.z +mov.f32f32 r3.w, r0.x +mul.f r1.z, r4.w, r1.z +mad.f32 r0.x, c8.x, r0.x, (neg)r1.x +mul.f r1.w, r1.w, r6.w +mad.f32 r4.w, c8.z, r3.w, (neg)r6.y +mad.f32 r1.z, r1.z, r7.x, r1.w +mad.f32 r1.w, c8.y, r3.w, (neg)r6.z +mad.f32 r0.y, r0.y, r7.z, r1.z +mad.f32 r1.z, c12.x, r4.w, r6.y +mad.f32 r0.x, c12.x, r0.x, r1.x +nop +max.f r0.y, c14.y, r0.y +mad.f32 r1.x, c12.x, r1.w, r6.z +(rpt4)nop +log2 r0.y, r0.y +(ss)mul.f r0.y, c12.y, r0.y (rpt5)nop -exp2 r0.x, r0.x -(ss)mul.f r0.y, r1.y, r0.x -mul.f r1.y, r1.w, r0.x -mad.f32 r0.y, r7.z, r1.z, r0.y -mad.f32 r1.y, r7.y, r2.y, r1.y -(ss)mul.f r0.x, r2.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, r7.x, r1.x, r0.x -nop -mul.f r0.y, r0.y, r0.w -mul.f r1.x, r1.y, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.y, c7.z, r7.z, r0.y -mad.f32 r1.x, c7.y, r7.y, r1.x +exp2 r0.y, r0.y +(ss)mul.f r1.w, r4.x, r0.y +mul.f r3.w, r4.z, r0.y +mad.f32 r1.z, r2.z, r1.z, r1.w +mad.f32 r1.x, r2.y, r1.x, r3.w +(ss)mul.f r0.y, r1.y, r0.y +nop +mul.f r1.y, r1.z, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r1.y, c7.z, r2.z, r1.y +mad.f32 r1.x, c7.y, r2.y, r1.x +mad.f32 r0.x, r2.x, r0.x, r0.y +nop +mul.f r0.y, r0.z, r1.y +mul.f r1.x, r0.z, r1.x mul.f r0.x, r0.x, r0.w nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.z, r0.y -mul.f r0.w, r0.z, r0.w -mad.f32 r0.x, c7.x, r7.x, r0.x +add.f r0.y, r0.y, r7.y +add.f r0.w, r1.x, r3.y +mad.f32 r0.x, c7.x, r2.x, r0.x nop -add.f r0.y, r0.y, r5.x -add.f r0.w, r0.w, r5.w -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.y, r8.x -mul.f r0.w, r0.w, r8.x +mul.f r0.y, r0.y, r5.x +mul.f r0.w, r0.w, r5.x mul.f r0.x, r0.z, r0.x nop -mul.f r0.y, r0.y, c6.z -mul.f r0.z, r0.w, c6.y -add.f r0.x, r0.x, r6.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r8.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c6.x -nop -mov.f32f32 r3.z, r0.y -mov.f32f32 r3.y, r0.z -mov.f32f32 r0.x, r0.x +mul.f r2.z, r0.y, c6.z +mul.f r2.y, r0.w, c6.y +add.f r0.x, r0.x, r3.z (rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r5.x (rpt2)nop -mov.f32f32 r3.x, r0.x +mul.f r2.x, r0.x, c6.x end -; FRAG: outputs: r3.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.z (5:9,cm=f,il=8,b=1) r63.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r7.x (5:13,cm=f,il=24,b=1) r3.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 1147 instructions, 0 half, 65 full -; pos (bary): r1.x -; color: r3.x -; fragcoord: r0.x +nop +nop +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r3.x (5:9,cm=f,il=8,b=1) r63.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.w (5:12,cm=f,il=20,b=1) r6.x (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 766 instructions, 0 half, 20 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm index ad4df45..b2e35b3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm @@ -6,346 +6,250 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in11 -@in(r2.z) in12 -@in(r2.w) in13 -@out(r11.z) out0 -@out(r11.w) out1 -@out(r12.x) out2 -@out(r12.y) out3 -@out(r7.x) out4 -@out(r7.y) out5 -@out(r7.z) out6 -@out(r7.w) out7 -@out(r3.x) out8 -@out(r3.y) out9 -@out(r3.z) out10 -@out(r3.w) out11 -@out(r9.z) out12 -@out(r9.w) out13 -@out(r10.x) out14 -@out(r10.y) out15 -@out(r1.y) out16 -@out(r1.z) out17 -@out(r1.w) out18 -@out(r2.x) out19 -@out(r12.z) out20 -@out(r12.w) out21 -@out(r13.x) out22 -@out(r13.y) out23 -@out(r8.z) out24 -@out(r8.w) out25 -@out(r9.x) out26 -@out(r9.y) out27 -@out(r10.z) out28 -@out(r10.w) out29 -@out(r11.x) out30 -@out(r11.y) out31 -(sy)(ss)floor.f r3.x, c15.z -absneg.f r3.y, (abs)c18.x -absneg.f r3.z, (abs)c18.y -floor.f r3.w, c15.x -add.f r3.x, c15.z, (neg)r3.x -mul.f r4.x, c12.x, r1.z -mul.f r4.y, c12.x, r0.w -add.f r3.w, c15.x, (neg)r3.w -mov.f32f32 r3.x, r3.x -add.f r3.y, r3.y, r3.z -mad.f32 r3.z, c13.x, r1.w, r4.x -mad.f32 r4.x, c13.x, r1.x, r4.y -max.f r3.x, r3.x, c19.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -min.f r3.x, r3.x, c23.y -mul.f r4.y, c17.x, r3.y -mul.f r4.z, c12.z, r0.x -max.f r3.w, r3.w, c19.y -max.f r3.x, r3.x, c19.x -mad.f32 r4.z, c13.z, r0.y, r4.z -mad.f32 r3.z, c14.x, r2.x, r3.z -mad.f32 r4.x, c14.x, r1.y, r4.x -mov.f32f32 r3.x, r3.x -mad.f32 r4.z, c14.z, r0.z, r4.z -min.f r3.w, r3.w, c23.y -mov.f32f32 r3.z, r3.z -mul.f r3.x, c17.x, r3.x -add.f r4.z, r4.z, c15.z -max.f r3.w, r3.w, c19.x -mad.f32 r4.y, c19.w, r4.y, r4.z -mov.f32f32 r3.x, r3.x -mul.f r4.w, c12.y, r0.w -mov.f32f32 r4.x, r4.x -absneg.f r5.x, (neg)c5.x -mad.f32 r3.x, c19.z, r3.x, c15.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.w, r3.w -mul.f r5.y, c12.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r4.y -mad.f32 r5.y, c13.x, r0.y, r5.y -mad.f32 r4.w, c13.y, r1.x, r4.w -mov.f32f32 r3.x, r3.x -add.f r4.y, r4.y, c20.x -mad.f32 r5.y, c14.x, r0.z, r5.y -mad.f32 r4.w, c14.y, r1.y, r4.w -add.f r3.x, r3.x, c20.x -floor.f r5.z, r4.y -add.f r5.y, r5.y, c15.x -mov.f32f32 r4.w, r4.w -floor.f r5.w, r3.x -add.f r4.y, r4.y, (neg)r5.z -mad.f32 r3.w, c17.x, r3.w, r5.y -mul.f r5.z, r4.w, r3.z -add.f r3.x, r3.x, (neg)r5.w -mad.f32 r4.y, c20.y, r4.y, c20.z -add.f r3.w, r3.w, c20.x -mul.f r5.w, c12.y, r1.z -mad.f32 r3.x, c20.y, r3.x, c20.z -absneg.f r4.y, (abs)r4.y -floor.f r6.x, r3.w -mad.f32 r5.w, c13.y, r1.w, r5.w -absneg.f r3.x, (abs)r3.x -mul.f r6.y, c20.y, r4.y -add.f r3.w, r3.w, (neg)r6.x -mul.f r4.y, r4.y, r4.y -mul.f r6.x, c20.y, r3.x -add.f r6.y, c20.w, (neg)r6.y -mad.f32 r3.w, c20.y, r3.w, c20.z -mul.f r3.x, r3.x, r3.x -add.f r6.x, c20.w, (neg)r6.x -mul.f r4.y, r4.y, r6.y -absneg.f r3.w, (abs)r3.w -mov.f32f32 r5.w, r5.w -mul.f r3.x, r3.x, r6.x -mov.f32f32 r4.y, r4.y -mul.f r6.x, r0.x, r0.z -mul.f r6.y, r0.y, c21.x -mov.f32f32 r3.x, r3.x -mul.f r6.z, r0.y, c22.x -mul.f r6.w, c20.y, r3.w -mul.f r6.x, r6.x, r6.y -mul.f r3.w, r3.w, r3.w -mov.f32f32 r6.y, r6.z -add.f r6.z, c20.w, (neg)r6.w -mov.f32f32 r6.x, r6.x -mad.f32 r5.w, c14.y, r2.x, r5.w -mov.f32f32 r6.y, r6.y -mul.f r3.w, r3.w, r6.z -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.w, r5.w -max.f r6.y, r6.y, c19.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r6.x, r6.x -mad.f32 r5.z, r4.x, r5.w, (neg)r5.z -min.f r6.y, r6.y, c23.y -mul.f r6.z, r4.x, r5.x -absneg.f r6.w, (neg)c5.y -mul.f r1.z, c12.z, r1.z -min.f r6.y, r6.y, c19.w -max.f r6.x, r6.x, c19.y -mov.f32f32 r5.z, r5.z -mad.f32 r6.z, r4.w, r6.w, r6.z -mov.f32f32 r6.y, r6.y -min.f r6.x, r6.x, c23.y -mul.f r5.z, r5.z, r2.y -mov.f32f32 r6.z, r6.z -mul.f r3.x, r3.x, r6.y -min.f r6.x, r6.x, c21.y -mov.f32f32 r5.z, r5.z +@in(r8.x) in8 +@in(r8.y) in9 +@in(r8.z) in10 +@in(r8.w) in11 +@in(r5.z) in12 +@in(r5.w) in13 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@out(r7.x) out28 +@out(r7.y) out29 +@out(r7.z) out30 +@out(r7.w) out31 +@const(c19.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c20.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c21.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c22.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r1.z, c15.z +floor.f r1.w, c15.x +absneg.f r2.x, (abs)c18.x +absneg.f r2.y, (abs)c18.y +add.f r1.z, c15.z, (neg)r1.z +add.f r1.w, c15.x, (neg)r1.w +mul.f r2.z, c12.x, r0.w +add.f r2.x, r2.x, r2.y +max.f r1.z, r1.z, c19.y +max.f r1.w, r1.w, c19.y +mad.f32 r2.y, c13.x, r1.x, r2.z +mul.f r2.z, c17.x, r2.x +min.f r1.z, r1.z, c23.y +min.f r1.w, r1.w, c23.y +mul.f r2.w, c12.z, r0.x +mad.f32 r2.y, c14.x, r1.y, r2.y +max.f r1.z, r1.z, c19.x +max.f r1.w, r1.w, c19.x +mad.f32 r2.w, c13.z, r0.y, r2.w +mul.f r3.x, c12.x, r0.x +mul.f r1.z, c17.x, r1.z +mad.f32 r3.x, c13.x, r0.y, r3.x +mad.f32 r2.w, c14.z, r0.z, r2.w +mad.f32 r3.x, c14.x, r0.z, r3.x +mad.f32 r1.z, c19.z, r1.z, c15.x +absneg.f r3.y, (neg)c5.x +add.f r2.w, r2.w, c15.z +mov.f32f32 r2.x, r2.x +add.f r1.z, r1.z, c20.x +add.f r3.x, r3.x, c15.x +mad.f32 r2.z, c19.w, r2.z, r2.w +mad.f32 r1.w, c17.x, r1.w, r3.x +floor.f r3.z, r1.z +mul.f r3.w, r2.y, r3.y +mul.f r4.x, c12.y, r0.w +add.f r1.w, r1.w, c20.x +add.f r1.z, r1.z, (neg)r3.z +add.f r2.z, r2.z, c20.x +mad.f32 r3.z, c13.y, r1.x, r4.x +max.f r2.x, r2.x, c21.z +mad.f32 r1.z, c20.y, r1.z, c20.z +floor.f r4.x, r1.w +floor.f r4.y, r2.z +mad.f32 r3.z, c14.y, r1.y, r3.z +absneg.f r1.z, (abs)r1.z +add.f r1.w, r1.w, (neg)r4.x +add.f r2.z, r2.z, (neg)r4.y +absneg.f r4.x, (neg)c5.y +mul.f r4.y, c20.y, r1.z +mad.f32 r1.w, c20.y, r1.w, c20.z +mad.f32 r2.z, c20.y, r2.z, c20.z +mul.f r1.z, r1.z, r1.z +add.f r4.y, c20.w, (neg)r4.y +absneg.f r1.w, (abs)r1.w +absneg.f r2.z, (abs)r2.z +mad.f32 r3.w, r3.z, r4.x, r3.w +mul.f r1.z, r1.z, r4.y +mul.f r4.y, r0.y, c22.x +mul.f r4.z, c20.y, r1.w +mul.f r4.w, c20.y, r2.z +mul.f r1.w, r1.w, r1.w +max.f r4.y, r4.y, c19.y +add.f r4.z, c20.w, (neg)r4.z +add.f r4.w, c20.w, (neg)r4.w +mul.f r2.z, r2.z, r2.z +min.f r4.y, r4.y, c23.y +mul.f r1.w, r1.w, r4.z +mul.f r4.z, r0.x, r0.z +mul.f r4.w, r2.z, r4.w +min.f r2.z, r4.y, c19.w +mul.f r4.y, r0.y, c21.x mul.f r0.w, c12.z, r0.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.z, r5.z +min.f r5.x, r2.x, c21.w +mul.f r1.z, r1.z, r2.z +mul.f r2.x, r4.z, r4.y mad.f32 r0.w, c13.z, r1.x, r0.w -mad.f32 r1.x, c13.z, r1.w, r1.z -mul.f r1.z, r3.w, r6.x -mul.f r1.w, r4.y, r6.x -max.f r3.y, r3.y, c21.z -mov.f32f32 r3.w, r5.z -mov.f32f32 r1.z, r1.z +mov.f32f32 r1.x, r5.x +mov.f32f32 r4.y, r1.z +max.f r2.z, r2.x, c19.y mad.f32 r0.w, c14.z, r1.y, r0.w -mad.f32 r1.y, c18.x, r3.x, r1.z -mad.f32 r1.z, c18.y, r3.x, r1.z -mov.f32f32 r3.x, r3.y -mov.f32f32 r7.w, r3.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -min.f r3.x, r3.x, c21.w -mov.f32f32 r0.w, r0.w -absneg.f r3.y, (neg)c5.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mul.f r3.w, c12.y, r0.x -mad.f32 r4.y, r0.w, r3.y, r6.z -mad.f32 r1.x, c14.z, r2.x, r1.x -mad.f32 r2.x, r1.w, r3.x, r5.y -mad.f32 r4.z, r1.w, r3.x, r4.z -mad.f32 r3.w, c13.y, r0.y, r3.w -mov.f32f32 r4.y, r4.y -add.f r1.y, r2.x, r1.y -add.f r1.z, r4.z, r1.z -mad.f32 r2.x, c14.y, r0.z, r3.w -max.f r3.w, c19.y, r4.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -add.f r2.x, r2.x, c15.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, r1.y -mul.f r4.z, c8.w, r1.y -mul.f r5.y, c8.z, r1.y -mul.f r5.z, c8.y, r1.y -add.f r4.y, c4.x, (neg)r4.y -mad.f32 r1.w, r1.w, r3.x, r2.x -mul.f r2.x, c8.x, r1.y -mov.f32f32 r3.x, r1.z -mul.f r6.x, r4.y, r4.y -add.f r6.y, c4.y, (neg)r1.w -mad.f32 r6.z, c9.w, r1.w, r4.z -mad.f32 r8.x, c9.z, r1.w, r5.y -mad.f32 r5.z, c9.y, r1.w, r5.z -mad.f32 r6.x, r6.y, r6.y, r6.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r8.x, r8.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.x, r6.x -add.f r3.x, c4.z, (neg)r3.x -mad.f32 r6.z, c10.w, r1.z, r6.z -mad.f32 r8.x, c10.z, r1.z, r8.x -mad.f32 r5.z, c10.y, r1.z, r5.z -mad.f32 r6.x, r3.x, r3.x, r6.x +absneg.f r1.y, (neg)c5.z +mov.f32f32 r2.x, r2.y +min.f r4.z, r2.z, c23.y +mul.f r5.y, c12.z, r8.x +mov.f32f32 r2.z, r0.w +mov.f32f32 r2.y, r3.z +min.f r3.z, r4.z, c21.y +mad.f32 r0.w, r0.w, r1.y, r3.w +mad.f32 r4.z, c13.z, r8.y, r5.y +mul.f r9.x, c12.y, r8.x +mov.f32f32 r3.w, r3.z +mul.f r3.z, r4.w, r3.z +mul.f r4.w, c12.y, r0.x +max.f r9.y, c19.y, r0.w +mul.f r0.w, r1.w, r3.w +mad.f32 r1.w, c13.y, r0.y, r4.w +mad.f32 r3.x, r3.z, r1.x, r3.x +mad.f32 r1.w, c14.y, r0.z, r1.w +mov.f32f32 r3.w, r0.w +mad.f32 r0.w, c18.y, r1.z, r0.w +mad.f32 r1.z, c18.x, r4.y, r3.w +mad.f32 r2.w, r3.z, r5.x, r2.w +add.f r1.w, r1.w, c15.y +mov.f32f32 r4.y, r9.y +add.f r1.z, r3.x, r1.z +add.f r2.w, r2.w, r0.w +mad.f32 r1.x, r3.z, r1.x, r1.w +nop +mov.f32f32 r1.w, r1.z +mul.f r1.z, c0.x, r1.z +mov.f32f32 r4.w, r2.w +add.f r6.y, c4.y, (neg)r1.x +add.f r6.x, c4.x, (neg)r1.w +mul.f r0.w, c8.y, r1.w +mul.f r3.x, c8.x, r1.w +mul.f r7.w, c8.w, r1.w +mul.f r3.z, r6.x, r6.x +mad.f32 r0.w, c9.y, r1.x, r0.w +mad.f32 r3.z, r6.y, r6.y, r3.z +add.f r6.z, c4.z, (neg)r4.w +mad.f32 r0.w, c10.y, r4.w, r0.w +mad.f32 r3.x, c9.x, r1.x, r3.x +mad.f32 r3.w, c9.w, r1.x, r7.w +mad.f32 r3.z, r6.z, r6.z, r3.z mul.f r0.x, c12.w, r0.x -mad.f32 r2.x, c9.x, r1.w, r2.x +mad.f32 r3.x, c10.x, r4.w, r3.x +mad.f32 r3.w, c10.w, r4.w, r3.w +mul.f r7.z, c8.z, r1.w mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r0.y, r3.x -mov.f32f32 r8.y, r4.y +mul.f r0.y, c0.w, r1.w +rsq r3.z, r3.z +(ss)mov.f32f32 r5.x, r3.z +mad.f32 r3.y, r6.x, r3.z, r3.y mad.f32 r0.x, c14.w, r0.z, r0.x -rsq r0.z, r6.x -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r9.x, r0.y -mov.f32f32 r8.z, r8.y -mad.f32 r0.y, r4.y, r0.z, r5.x -mad.f32 r4.y, r6.y, r0.z, r6.w -mad.f32 r0.z, r3.x, r0.z, r3.y -add.f r0.x, r0.x, c15.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.x, r4.y -mov.f32f32 r0.z, r0.z -mad.f32 r3.y, c11.w, r0.x, r6.z -mul.f r4.y, r0.y, r0.y -mad.f32 r5.x, c11.z, r0.x, r8.x -mad.f32 r4.y, r3.x, r3.x, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r5.z, c11.y, r0.x, r5.z -mad.f32 r2.x, c10.x, r1.z, r2.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r4.y, r0.z, r0.z, r4.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.z, r5.z -mad.f32 r2.x, c11.x, r0.x, r2.x -(ss)mul.f r6.x, c0.w, r1.y -mul.f r6.z, c0.z, r1.y -mul.f r6.w, c0.y, r1.y -rsq r4.y, r4.y -(ss)mov.f32f32 r4.y, r4.y -mov.f32f32 r10.y, r3.y -mov.f32f32 r3.y, r5.x -mul.f r5.x, r5.z, c16.y -mul.f r0.z, r0.z, r4.y -mul.f r3.x, r3.x, r4.y -mul.f r0.y, r0.y, r4.y -mov.f32f32 r10.x, r3.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r9.w, r5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r1.x, r7.z +(ss)mad.f32 r3.z, r6.y, r5.x, r4.x +mov.f32f32 r4.x, r3.y +mad.f32 r1.y, r6.z, r5.x, r1.y nop -mov.f32f32 r10.w, r0.z -mov.f32f32 r10.z, r3.x -mov.f32f32 r9.y, r0.y -mov.f32f32 r0.y, r2.x -mad.f32 r0.z, c1.w, r1.w, r6.x -mad.f32 r2.x, c1.z, r1.w, r6.z -mad.f32 r3.x, c1.y, r1.w, r6.w -mul.f r0.y, r0.y, c16.x -mad.f32 r0.z, c2.w, r1.z, r0.z -mad.f32 r2.x, c2.z, r1.z, r2.x -mad.f32 r3.x, c2.y, r1.z, r3.x -mov.f32f32 r9.z, r0.y -mad.f32 r0.y, c3.w, r0.x, r0.z -mad.f32 r0.z, c3.z, r0.x, r2.x -mad.f32 r2.x, c3.y, r0.x, r3.x -mul.f r3.x, c0.x, r1.y -mov.f32f32 r12.y, r0.y -mov.f32f32 r12.x, r0.z -mov.f32f32 r11.w, r2.x -mad.f32 r0.y, c1.x, r1.w, r3.x -mad.f32 r0.z, c7.x, r1.z, c7.y -mad.f32 r0.y, c2.x, r1.z, r0.y -mad.f32 r1.y, c7.x, r1.y, c7.y -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r11.y, r4.z -mov.f32f32 r11.x, r5.y -mov.f32f32 r11.z, r0.x -mov.f32f32 r12.w, r0.y -mov.f32f32 r0.x, r1.y -mov.f32f32 r0.y, r6.y -(rpt1)nop -mov.f32f32 r12.z, r0.x -mov.f32f32 r8.w, r0.y -mul.f r0.x, r3.w, c6.z -mul.f r0.y, r3.w, c6.y -mul.f r0.z, r3.w, c6.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r7.z, r0.x -mov.f32f32 r7.y, r0.y -mov.f32f32 r7.x, r0.z -mul.f r0.x, r4.x, r1.x -mul.f r0.y, r0.w, r5.w -mad.f32 r0.x, r0.w, r3.z, (neg)r0.x -mad.f32 r0.y, r4.w, r1.x, (neg)r0.y -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r5.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r1.x -mul.f r0.x, r0.x, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r0.z, r3.z -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +mov.f32f32 r5.x, r3.z +mul.f r4.x, r4.x, r4.x +mov.f32f32 r5.y, r1.y +add.f r0.x, r0.x, c15.w +mad.f32 r3.z, r3.z, r5.x, r4.x +mad.f32 r0.z, c10.z, r4.w, r0.z +mad.f32 r1.y, r1.y, r5.y, r3.z +mad.f32 r0.w, c11.y, r0.x, r0.w +mad.f32 r3.x, c11.x, r0.x, r3.x +mad.f32 r3.w, c11.w, r0.x, r3.w +mad.f32 r3.z, c11.z, r0.x, r0.z +mad.f32 r0.y, c1.w, r1.x, r0.y +mul.f r0.z, c0.z, r1.w +rsq r1.y, r1.y +(ss)mov.f32f32 r4.x, r1.y +mul.f r6.w, r3.y, r1.y +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r7.y, r5.y, r4.x +mul.f r7.x, r5.x, r4.x +mad.f32 r0.y, c2.w, r4.w, r0.y +mad.f32 r0.z, c1.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c2.z, r4.w, r0.z +(ss)mul.f r1.y, c0.y, r1.w +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c1.y, r1.x, r1.y +mad.f32 r1.x, c1.x, r1.x, r1.z +mad.f32 r0.y, c2.y, r4.w, r0.y +mad.f32 r1.x, c2.x, r2.w, r1.x +mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r0.x, c3.x, r0.x, r1.x +mad.f32 r5.x, c7.x, r1.w, c7.y +mad.f32 r5.y, c7.x, r4.w, c7.y +mul.f r1.z, r4.y, c6.z +mul.f r1.y, r4.y, c6.y +mul.f r1.x, r9.y, c6.x +mad.f32 r1.w, c14.z, r8.z, r4.z +mad.f32 r2.w, c13.y, r8.y, r9.x +mul.f r4.x, c12.x, r8.x +mad.f32 r2.w, c14.y, r8.z, r2.w +mov.f32f32 r4.z, r1.w +mad.f32 r4.x, c13.x, r8.y, r4.x (rpt1)nop -mov.f32f32 r2.x, r0.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r0.x, r4.w -mov.f32f32 r0.y, r4.x +mul.f r4.w, r2.x, r4.z +mad.f32 r8.x, c14.x, r8.z, r4.x +mov.f32f32 r4.y, r2.w (rpt1)nop -mov.f32f32 r3.y, r0.x -mov.f32f32 r3.x, r0.y -mov.f32f32 r0.x, r2.w -mov.f32f32 r0.y, r2.z +mov.f32f32 r4.x, r8.x +mul.f r8.y, r2.z, r4.y +mul.f r8.x, r2.y, r8.x +mad.f32 r1.w, r2.y, r1.w, (neg)r8.y +mad.f32 r4.w, r2.z, r4.x, (neg)r4.w +mad.f32 r8.x, r2.x, r2.w, (neg)r8.x (rpt1)nop -mov.f32f32 r13.y, r0.x -mov.f32f32 r13.x, r0.y +mul.f r4.w, r4.w, r8.w +mul.f r2.w, r1.w, r8.w +mul.f r1.w, r8.x, r8.w end -; VERT: outputs: r11.z (0:0) r7.x (5:9) r3.x (5:10) r9.z (5:11) r1.y (5:12) r12.z (5:13) r8.z (5:14) r10.z (5:15) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=f,il=16,b=0) r2.z (0:0,cm=3,il=20,b=0) -; VERT: 304 instructions, 0 half, 14 full -; pos: r11.z +nop +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=f,il=16,b=0) r5.z (0:0,cm=3,il=20,b=0) +; VERT: 201 instructions, 0 half, 10 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm index 0274fe9..5630fc2 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm @@ -4,206 +4,143 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f233333, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.z, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r3.w, c9.x, r0.z -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r3.w, r3.x, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.z, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.w, r3.x -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.x, r3.y -mov.f32f32 r5.z, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r3.x -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.x, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.x, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.x, r3.x -bary.f r3.x, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.x, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r1.w -mov.f32f32 r5.w, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.x, r2.w -mov.f32f32 r5.y, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r3.x -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r1.w, r4.w, s#2, t#2 -(sy)mov.f32f32 r1.w, r1.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r2.y, r6.y, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -sam.s (f32)(x)r2.w, r5.z, s#2, t#2 -(sy)mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.z -add.f r3.y, c12.y, (neg)r0.y -add.f r3.z, c12.y, (neg)r0.y -(ss)add.f r4.w, c12.y, (neg)r0.y -mul.f r5.x, r2.z, r3.x -mul.f r3.y, r3.y, c6.z -mul.f r3.z, r3.z, c6.y -mul.f r4.w, r4.w, c6.x -mul.f r1.w, r5.x, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.x, r1.z, r3.x -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.x, r2.w, r1.w -sam.s (f32)(x)r1.w, r7.x, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r2.w, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r1.z, r4.z -mov.f32f32 r3.x, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.y, r1.x -mul.f r0.w, r4.x, r0.w -mov.f32f32 r2.x, r1.z -mul.f r0.x, c10.w, r0.x -mul.f r2.y, r3.w, r2.z -sam (f32)(w)r5.x, r2.w, s#1, t#1 -nop -(sy)cmps.f.lt r2.z, r5.w, c11.y -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r2.w, r5.w -mov.f32f32 r3.x, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r2.y, r0.x -cov.u32f32 r2.y, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.y, r1.x -mad.f32 r0.w, c5.y, r4.x, r0.w -mov.f32f32 r0.x, r0.x -cmps.f.ne r2.y, r2.y, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r3.w, r0.x -mov.f32f32 r2.z, r2.w -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r2.y, r3.x, r2.y, r2.z -add.f r1.x, r1.x, r3.y -add.f r0.w, r0.w, r3.z -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r2.y -mul.f r0.w, r0.w, r2.y -add.f r0.x, r0.x, r4.w -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y -mul.f r0.x, r0.x, r2.y +add.f r0.y, r0.z, r1.z +cov.u32f32 r0.z, r1.y +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z +mul.f r0.w, r0.w, r0.z +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y +mul.f r0.x, r0.x, r0.z +(rpt2)nop +mul.f r2.x, r0.x, c4.x +end nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, c4.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x -end -; FRAG: outputs: r1.y (1:0) +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 195 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r1.y -; fragcoord: r0.x +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm index b65a363..63ee7bc 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm @@ -6,242 +6,182 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r5.x) out0 -@out(r5.y) out1 -@out(r5.z) out2 -@out(r5.w) out3 -@out(r4.x) out4 -@out(r4.y) out5 -@out(r4.z) out6 -@out(r4.w) out7 -@out(r2.z) out8 -@out(r2.w) out9 -@out(r3.x) out10 -@out(r3.y) out11 -@out(r0.z) out12 -@out(r0.w) out13 -@out(r1.x) out14 -@out(r1.y) out15 -(sy)(ss)floor.f r2.x, c14.z -floor.f r2.y, c14.x -absneg.f r2.z, (abs)c17.x -absneg.f r2.w, (abs)c17.y -add.f r2.x, c14.z, (neg)r2.x -add.f r2.y, c14.x, (neg)r2.y -mul.f r3.x, c11.x, r0.w -add.f r2.z, r2.z, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mad.f32 r2.w, c12.x, r1.x, r3.x -mov.f32f32 r2.z, r2.z -max.f r2.x, r2.x, c18.y -max.f r2.y, r2.y, c18.y -mov.f32f32 r2.w, r2.w -mul.f r3.x, c16.x, r2.z -min.f r2.x, r2.x, c22.y -min.f r2.y, r2.y, c22.y -mul.f r3.y, c11.z, r0.x -mad.f32 r2.w, c13.x, r1.y, r2.w -max.f r2.x, r2.x, c18.x -max.f r2.y, r2.y, c18.x -mad.f32 r3.y, c12.z, r0.y, r3.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.z, r0.z, r3.y -mul.f r3.z, c11.x, r0.x -mul.f r2.x, c16.x, r2.x -mad.f32 r3.z, c12.x, r0.y, r3.z -add.f r3.y, r3.y, c14.z -mad.f32 r3.z, c13.x, r0.z, r3.z -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c18.w, r3.x, r3.y -mul.f r2.w, r2.w, (neg)c4.x -mul.f r3.w, c11.y, r0.w -mad.f32 r2.x, c18.z, r2.x, c14.x -add.f r3.z, r3.z, c14.x -mov.f32f32 r3.x, r3.x -mad.f32 r2.y, c16.x, r2.y, r3.z -mov.f32f32 r2.x, r2.x -mad.f32 r3.w, c12.y, r1.x, r3.w -mov.f32f32 r3.x, r3.x -add.f r2.y, r2.y, c19.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r3.w -add.f r3.x, r3.x, c19.x -mad.f32 r3.w, c13.y, r1.y, r3.w -add.f r2.x, r2.x, c19.x +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r1.z, c14.z +floor.f r1.w, c14.x +absneg.f r2.x, (abs)c17.x +absneg.f r2.y, (abs)c17.y +add.f r1.z, c14.z, (neg)r1.z +add.f r1.w, c14.x, (neg)r1.w +mov.f32f32 r2.z, c18.y +mul.f r2.w, c11.x, r0.w +max.f r1.z, r1.z, c18.y +max.f r1.w, r1.w, c18.y +add.f r2.x, r2.x, r2.y +add.f r2.y, r2.z, c19.x +min.f r1.z, r1.z, c22.y +min.f r1.w, r1.w, c22.y +mul.f r2.z, c16.x, r2.x +mul.f r3.x, c11.z, r0.x +max.f r1.z, r1.z, c18.x +max.f r1.w, r1.w, c18.x +mul.f r3.y, c11.x, r0.x +mad.f32 r3.x, c12.z, r0.y, r3.x +mul.f r1.z, c16.x, r1.z +mad.f32 r3.y, c12.x, r0.y, r3.y +mad.f32 r3.x, c13.z, r0.z, r3.x +mad.f32 r3.y, c13.x, r0.z, r3.y +mad.f32 r1.z, c18.z, r1.z, c14.x floor.f r4.x, r2.y -floor.f r4.y, r3.x -mov.f32f32 r3.w, r3.w -floor.f r4.z, r2.x +mad.f32 r2.w, c12.x, r1.x, r2.w +add.f r3.y, r3.y, c14.x +add.f r1.z, r1.z, c19.x +mad.f32 r1.w, c16.x, r1.w, r3.y +add.f r3.x, r3.x, c14.z add.f r2.y, r2.y, (neg)r4.x -add.f r3.x, r3.x, (neg)r4.y -mad.f32 r2.w, (neg)c4.y, r3.w, r2.w -add.f r2.x, r2.x, (neg)r4.z +floor.f r4.x, r1.z +add.f r1.w, r1.w, c19.x +mad.f32 r2.z, c18.w, r2.z, r3.x mad.f32 r2.y, c19.y, r2.y, c19.z -mad.f32 r3.x, c19.y, r3.x, c19.z -mov.f32f32 r2.w, r2.w -mad.f32 r2.x, c19.y, r2.x, c19.z +add.f r1.z, r1.z, (neg)r4.x +floor.f r4.x, r1.w +add.f r2.z, r2.z, c19.x absneg.f r2.y, (abs)r2.y -absneg.f r3.x, (abs)r3.x -mul.f r0.w, c11.z, r0.w -absneg.f r2.x, (abs)r2.x -mul.f r3.w, c19.y, r2.y -mul.f r4.x, c19.y, r3.x -mul.f r2.y, r2.y, r2.y -mul.f r4.y, c19.y, r2.x -add.f r3.w, c19.w, (neg)r3.w +mad.f32 r1.z, c19.y, r1.z, c19.z +add.f r4.x, r1.w, (neg)r4.x +floor.f r4.y, r2.z +mul.f r1.w, r2.y, r2.y +absneg.f r1.z, (abs)r1.z +mad.f32 r2.y, c19.y, r4.x, c19.z +add.f r2.z, r2.z, (neg)r4.y +mad.f32 r2.w, c13.x, r1.y, r2.w +mul.f r4.x, c19.y, r1.z +absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c19.y, r2.z, c19.z +mul.f r1.z, r1.z, r1.z add.f r4.x, c19.w, (neg)r4.x -mul.f r3.x, r3.x, r3.x +mul.f r4.y, c19.y, r2.y +absneg.f r2.z, (abs)r2.z +mul.f r2.y, r2.y, r2.y +mul.f r1.z, r1.z, r4.x +mul.f r4.x, r0.y, c21.x add.f r4.y, c19.w, (neg)r4.y -mul.f r2.x, r2.x, r2.x -mul.f r2.y, r2.y, r3.w -mul.f r3.x, r3.x, r4.x -mad.f32 r0.w, c12.z, r1.x, r0.w -mul.f r1.x, r2.x, r4.y -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r3.x -mul.f r3.x, r0.x, r0.z -mov.f32f32 r1.x, r1.x -mul.f r3.w, r0.y, c21.x -mul.f r4.x, r0.y, c20.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.y, c18.y -mov.f32f32 r3.w, r3.w -mul.f r3.x, r3.x, r4.x -mad.f32 r0.w, c13.z, r1.y, r0.w -add.f r1.y, r4.y, c19.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -floor.f r4.x, r1.y -max.f r3.w, r3.w, c18.y -mov.f32f32 r3.x, r3.x -mad.f32 r0.w, (neg)c4.z, r0.w, r2.w -add.f r1.y, r1.y, (neg)r4.x -min.f r2.w, r3.w, c22.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.y, c19.y, r1.y, c19.z -min.f r2.w, r2.w, c18.w -max.f r3.x, r3.x, c18.y -max.f r0.w, c18.y, r0.w -absneg.f r1.y, (abs)r1.y -mov.f32f32 r2.w, r2.w -min.f r3.x, r3.x, c22.y -mov.f32f32 r0.w, r0.w -mul.f r1.y, r1.y, r1.y -mul.f r1.x, r1.x, r2.w -min.f r2.w, r3.x, c20.y -mul.f r3.x, r0.w, c5.z -mul.f r3.w, r0.w, c5.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.w -mul.f r0.w, r0.w, c5.x -mul.f r2.x, r2.x, r2.w -mul.f r2.y, r2.y, r2.w -max.f r2.z, r2.z, c20.z -mov.f32f32 r4.z, r3.x +mul.f r4.z, c19.y, r2.z +mul.f r2.z, r2.z, r2.z +max.f r4.x, r4.x, c18.y +mul.f r2.y, r2.y, r4.y +mul.f r4.y, r0.x, r0.z +add.f r4.z, c19.w, (neg)r4.z +min.f r4.x, r4.x, c22.y +mul.f r4.w, r0.y, c20.x +mul.f r2.w, r2.w, (neg)c4.x +mul.f r5.x, c11.y, r0.w +min.f r4.x, r4.x, c18.w +mul.f r4.y, r4.y, r4.w +mul.f r2.z, r2.z, r4.z +mad.f32 r4.z, c12.y, r1.x, r5.x +mul.f r1.z, r1.z, r4.x +max.f r4.x, r4.y, c18.y +mad.f32 r4.y, c13.y, r1.y, r4.z mov.f32f32 r2.x, r2.x -mov.f32f32 r4.y, r3.w -mad.f32 r2.w, c17.x, r1.x, r2.x -mad.f32 r1.x, c17.y, r1.x, r2.x -mov.f32f32 r2.x, r2.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.w -mov.f32f32 r1.x, r1.x -min.f r2.x, r2.x, c20.w -mov.f32f32 r4.x, r0.w -mov.f32f32 r4.w, r1.y -mul.f r0.w, c11.y, r0.x -mov.f32f32 r1.y, r2.x -mad.f32 r0.w, c12.y, r0.y, r0.w +mov.f32f32 r4.z, r1.z +min.f r4.x, r4.x, c22.y +mad.f32 r2.w, (neg)c4.y, r4.y, r2.w +mul.f r0.w, c11.z, r0.w +max.f r2.x, r2.x, c20.z +min.f r4.x, r4.x, c20.y +mad.f32 r0.w, c12.z, r1.x, r0.w +mul.f r1.x, c11.y, r0.x mul.f r0.x, c11.w, r0.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, r2.y, r1.y, r3.z -mad.f32 r2.w, r2.y, r1.y, r3.y -mad.f32 r0.w, c13.y, r0.z, r0.w +mov.f32f32 r4.y, r4.x +mul.f r2.z, r2.z, r4.x +min.f r2.x, r2.x, c20.w +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, r2.y, r4.y +mad.f32 r1.x, c12.y, r0.y, r1.x mad.f32 r0.x, c12.w, r0.y, r0.x -add.f r0.y, r2.x, r2.z -add.f r1.x, r2.w, r1.x -add.f r0.w, r0.w, c14.y -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, r2.y, r1.y, r0.w +mad.f32 r0.y, c13.y, r0.z, r1.x +mov.f32f32 r1.x, r1.y +mad.f32 r1.y, c17.y, r1.z, r1.y +mad.f32 r1.x, c17.x, r4.z, r1.x +mov.f32f32 r1.z, r2.x +mad.f32 r2.x, r2.z, r2.x, r3.x +add.f r0.y, r0.y, c14.y +mad.f32 r0.w, (neg)c4.z, r0.w, r2.w +mad.f32 r2.y, r2.z, r1.z, r3.y +add.f r1.y, r2.x, r1.y +mad.f32 r0.y, r2.z, r1.z, r0.y +max.f r4.x, c18.y, r0.w +add.f r0.w, r2.y, r1.x +mov.f32f32 r1.x, r1.y mad.f32 r0.x, c13.w, r0.z, r0.x -mul.f r0.z, c7.w, r0.y -mul.f r1.y, c7.z, r0.y -mad.f32 r0.z, c8.w, r0.w, r0.z -mad.f32 r1.y, c8.z, r0.w, r1.y -mul.f r2.x, c7.y, r0.y -mul.f r2.y, c7.x, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mad.f32 r0.z, c9.w, r1.x, r0.z -add.f r0.x, r0.x, c14.w -mad.f32 r1.y, c9.z, r1.x, r1.y -mad.f32 r2.x, c8.y, r0.w, r2.x -mad.f32 r2.y, c8.x, r0.w, r2.y -mad.f32 r0.z, c10.w, r0.x, r0.z -mad.f32 r1.y, c10.z, r0.x, r1.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mad.f32 r2.x, c9.y, r1.x, r2.x -mad.f32 r2.y, c9.x, r1.x, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mad.f32 r2.x, c10.y, r0.x, r2.x -mad.f32 r2.y, c10.x, r0.x, r2.y -mov.f32f32 r3.y, r0.z -mov.f32f32 r3.x, r1.y -mov.f32f32 r0.z, r2.x -mov.f32f32 r1.y, r2.y -mul.f r2.x, c0.w, r0.y -mul.f r2.y, c0.z, r0.y -mul.f r0.z, r0.z, c15.y -mul.f r1.y, r1.y, c15.x -(rpt1)nop -mov.f32f32 r2.w, r0.z -mov.f32f32 r2.z, r1.y -mad.f32 r0.z, c1.w, r0.w, r2.x -mad.f32 r1.y, c1.z, r0.w, r2.y -mad.f32 r0.z, c2.w, r1.x, r0.z -mad.f32 r1.y, c2.z, r1.x, r1.y -mad.f32 r0.z, c3.w, r0.x, r0.z -mad.f32 r1.y, c3.z, r0.x, r1.y -mul.f r2.x, c0.y, r0.y -mul.f r2.y, c0.x, r0.y -mov.f32f32 r5.w, r0.z -mov.f32f32 r5.z, r1.y -mad.f32 r0.z, c1.y, r0.w, r2.x -mad.f32 r0.w, c1.x, r0.w, r2.y -mad.f32 r0.z, c2.y, r1.x, r0.z -mad.f32 r0.w, c2.x, r1.x, r0.w -mad.f32 r0.z, c3.y, r0.x, r0.z -mad.f32 r0.x, c3.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r5.y, r0.z -mov.f32f32 r5.x, r0.x -mad.f32 r0.x, c6.x, r1.x, c6.y -mad.f32 r0.y, c6.x, r0.y, c6.y -(rpt1)nop -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mov.f32f32 r1.y, r1.w -mov.f32f32 r0.x, r1.z -(rpt2)nop -mov.f32f32 r1.x, r0.x +nop +mov.f32f32 r1.z, r0.w +mul.f r3.x, c0.x, r0.w +mad.f32 r3.y, c6.x, r1.x, c6.y +mov.f32f32 r4.y, r4.x +mul.f r0.z, c7.y, r1.z +mul.f r0.w, c7.x, r1.z +mad.f32 r0.z, c8.y, r0.y, r0.z +mad.f32 r0.w, c8.x, r0.y, r0.w +mad.f32 r0.z, c9.y, r1.x, r0.z +add.f r4.z, r0.x, c14.w +mad.f32 r0.x, c9.x, r1.x, r0.w +mul.f r0.w, c7.w, r1.z +mul.f r2.x, c7.z, r1.z +mad.f32 r0.z, c10.y, r4.z, r0.z +mad.f32 r0.x, c10.x, r4.z, r0.x +mad.f32 r0.w, c8.w, r0.y, r0.w +mad.f32 r2.z, c8.z, r0.y, r2.x +mul.f r2.y, r0.z, c15.y +mul.f r2.x, r0.x, c15.x +mad.f32 r0.x, c9.w, r1.x, r0.w +mad.f32 r0.z, c9.z, r1.x, r2.z +mad.f32 r2.w, c10.w, r4.z, r0.x +mad.f32 r2.z, c10.z, r4.z, r0.z +mul.f r0.x, c0.w, r1.z +mul.f r0.z, c0.z, r1.z +mad.f32 r0.x, c1.w, r0.y, r0.x +mad.f32 r0.z, c1.z, r0.y, r0.z +mad.f32 r0.x, c2.w, r1.x, r0.x +mad.f32 r0.z, c2.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r4.z, r0.x +mad.f32 r0.z, c3.z, r4.z, r0.z +mul.f r0.x, c0.y, r1.z +mad.f32 r3.x, c1.x, r0.y, r3.x +mad.f32 r0.x, c1.y, r0.y, r0.x +mad.f32 r0.y, c2.x, r1.y, r3.x +mad.f32 r1.x, c2.y, r1.x, r0.x +mad.f32 r0.x, c3.x, r4.z, r0.y +mad.f32 r0.y, c3.y, r4.z, r1.x +mad.f32 r3.x, c6.x, r1.z, c6.y +mul.f r1.z, r4.y, c5.z +mul.f r1.y, r4.y, c5.y +mul.f r1.x, r4.x, c5.x end -; VERT: outputs: r5.x (0:0) r4.x (5:9) r2.z (5:10) r0.z (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 221 instructions, 0 half, 6 full -; pos: r5.x +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 152 instructions, 0 half, 6 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm index 0274fe9..5630fc2 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm @@ -4,206 +4,143 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f233333, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.z, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r3.w, c9.x, r0.z -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r3.w, r3.x, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.z, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.w, r3.x -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.x, r3.y -mov.f32f32 r5.z, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r3.x -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.x, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.x, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.x, r3.x -bary.f r3.x, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.x, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r1.w -mov.f32f32 r5.w, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.x, r2.w -mov.f32f32 r5.y, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r3.x -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r1.w, r4.w, s#2, t#2 -(sy)mov.f32f32 r1.w, r1.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r2.y, r6.y, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -sam.s (f32)(x)r2.w, r5.z, s#2, t#2 -(sy)mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.z -add.f r3.y, c12.y, (neg)r0.y -add.f r3.z, c12.y, (neg)r0.y -(ss)add.f r4.w, c12.y, (neg)r0.y -mul.f r5.x, r2.z, r3.x -mul.f r3.y, r3.y, c6.z -mul.f r3.z, r3.z, c6.y -mul.f r4.w, r4.w, c6.x -mul.f r1.w, r5.x, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.x, r1.z, r3.x -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.x, r2.w, r1.w -sam.s (f32)(x)r1.w, r7.x, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r2.w, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r1.z, r4.z -mov.f32f32 r3.x, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.y, r1.x -mul.f r0.w, r4.x, r0.w -mov.f32f32 r2.x, r1.z -mul.f r0.x, c10.w, r0.x -mul.f r2.y, r3.w, r2.z -sam (f32)(w)r5.x, r2.w, s#1, t#1 -nop -(sy)cmps.f.lt r2.z, r5.w, c11.y -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r2.w, r5.w -mov.f32f32 r3.x, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r2.y, r0.x -cov.u32f32 r2.y, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.y, r1.x -mad.f32 r0.w, c5.y, r4.x, r0.w -mov.f32f32 r0.x, r0.x -cmps.f.ne r2.y, r2.y, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r3.w, r0.x -mov.f32f32 r2.z, r2.w -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r2.y, r3.x, r2.y, r2.z -add.f r1.x, r1.x, r3.y -add.f r0.w, r0.w, r3.z -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r2.y -mul.f r0.w, r0.w, r2.y -add.f r0.x, r0.x, r4.w -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y -mul.f r0.x, r0.x, r2.y +add.f r0.y, r0.z, r1.z +cov.u32f32 r0.z, r1.y +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z +mul.f r0.w, r0.w, r0.z +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y +mul.f r0.x, r0.x, r0.z +(rpt2)nop +mul.f r2.x, r0.x, c4.x +end nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, c4.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x -end -; FRAG: outputs: r1.y (1:0) +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 195 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r1.y -; fragcoord: r0.x +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm index 0e4d5ee..9c8ac11 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm @@ -6,134 +6,99 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r6.y) out0 -@out(r6.z) out1 -@out(r6.w) out2 -@out(r7.x) out3 -@out(r5.y) out4 -@out(r5.z) out5 -@out(r5.w) out6 -@out(r6.x) out7 -@out(r2.w) out8 -@out(r3.x) out9 -@out(r3.y) out10 -@out(r3.z) out11 -@out(r3.w) out12 -@out(r4.x) out13 -@out(r4.y) out14 -@out(r4.z) out15 -(sy)(ss)mul.f r2.x, c11.x, r0.w -mul.f r2.y, c11.x, r0.x -mad.f32 r2.x, c12.x, r1.x, r2.x -mad.f32 r2.y, c12.x, r0.y, r2.y -mul.f r2.z, c11.z, r0.x -mad.f32 r2.y, c13.x, r0.z, r2.y -mov.f32f32 r2.x, r2.x -mad.f32 r2.z, c12.z, r0.y, r2.z -mad.f32 r2.x, c13.x, r1.y, r2.x -add.f r2.y, r2.y, c14.x -mad.f32 r2.z, c13.z, r0.z, r2.z -mul.f r2.w, c11.y, r0.w -mov.f32f32 r2.x, r2.x -mul.f r3.x, c7.w, r2.y -mul.f r3.y, c7.z, r2.y -mul.f r3.z, c7.y, r2.y -mul.f r2.x, r2.x, (neg)c4.x -mad.f32 r2.w, c12.y, r1.x, r2.w -mul.f r3.w, c11.y, r0.x -mul.f r4.x, c7.x, r2.y -mad.f32 r3.w, c12.y, r0.y, r3.w -mov.f32f32 r2.w, r2.w -mad.f32 r3.w, c13.y, r0.z, r3.w -mad.f32 r2.w, c13.y, r1.y, r2.w -mul.f r4.y, c0.w, r2.y -mul.f r4.z, c0.z, r2.y -mul.f r4.w, c0.y, r2.y -mov.f32f32 r2.w, r2.w -add.f r3.w, r3.w, c14.y -mul.f r5.x, c0.x, r2.y -add.f r2.z, r2.z, c14.z -mad.f32 r2.x, (neg)c4.y, r2.w, r2.x -mad.f32 r2.w, c8.w, r3.w, r3.x -mad.f32 r3.x, c8.z, r3.w, r3.y -mad.f32 r3.y, c8.y, r3.w, r3.z -mov.f32f32 r2.x, r2.x +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r1.z, c11.x, r0.w +mul.f r1.w, c11.x, r0.x +mad.f32 r1.z, c12.x, r1.x, r1.z +mad.f32 r1.w, c12.x, r0.y, r1.w +mad.f32 r1.z, c13.x, r1.y, r1.z +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x +mul.f r1.z, r1.z, (neg)c4.x +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.w, c9.w, r2.z, r2.w +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r2.z, r3.x -mad.f32 r3.x, c9.y, r2.z, r3.y -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.w, r4.x -mad.f32 r0.w, c1.w, r3.w, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r2.z, r0.z -mad.f32 r0.w, c2.w, r2.z, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r2.x -mad.f32 r1.y, c10.w, r0.x, r2.w -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r2.x, c10.y, r0.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r2.x, r2.x, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r3.z, r1.y -mov.f32f32 r3.y, r1.x -mov.f32f32 r3.x, r2.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r1.x, c1.z, r3.w, r4.z -mul.f r1.y, r0.y, c5.z -mul.f r2.x, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.w, r1.y -mov.f32f32 r5.z, r2.x -mov.f32f32 r5.y, r0.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r7.x, r0.w -mad.f32 r0.y, c2.z, r2.z, r1.x -mad.f32 r0.z, c1.y, r3.w, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r0.z, c2.y, r2.z, r0.z -mad.f32 r0.w, c1.x, r3.w, r5.x -mad.f32 r1.x, c6.x, r2.z, c6.y -mov.f32f32 r6.w, r0.y -mad.f32 r0.y, c3.y, r0.x, r0.z -mad.f32 r0.z, c2.x, r2.z, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r1.x, c6.x, r2.y, c6.y -mov.f32f32 r6.z, r0.y -mad.f32 r0.x, c3.x, r0.x, r0.z -mov.f32f32 r4.x, r0.w -mov.f32f32 r0.y, r1.x -nop -mov.f32f32 r6.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.w, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r0.z, (0.000000) -mov.f32f32 r4.z, r0.x -nop -mov.f32f32 r4.y, r0.y -mov.f32f32 r6.x, r0.z +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop -; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 8 full -; pos: r6.y +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm index 60492a1..2d7ae09 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm @@ -4,710 +4,481 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -(sy)(ss)bary.f r0.x, 7, r1.x -bary.f r0.y, 8, r1.x +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x43160000, 0x3bdb8bac +@const(c15.x) 0x41000000, 0x3f600000, 0x3e000000, 0x3d4ccccd +@const(c16.x) 0x40000000, 0xbf800000, 0xbb449ba6, 0xbf000000 +@const(c17.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x3fb8aa65 +@const(c18.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)bary.f r0.x, 20, r1.x +bary.f r0.y, 7, r1.x +bary.f r1.z, 8, r1.x add.f r0.w, r0.w, c14.y -bary.f r1.z, 9, r1.x -mov.f32f32 r0.x, r0.x -bary.f r1.w, 20, r1.x -add.f r2.x, r0.y, c16.w -add.f r2.y, r1.z, c16.w -bary.f r2.z, 18, r1.x -mul.f r2.w, r1.w, r0.x -bary.f r3.x, 15, r1.x -floor.f r3.y, r2.x +bary.f r1.w, 9, r1.x +mul.f r2.x, r0.x, r0.y +bary.f r2.y, 21, r1.x +bary.f r2.z, 15, r1.x +add.f r2.w, r1.z, c16.w +add.f r3.y, r1.w, c16.w rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.z, r2.y -mov.f32f32 r3.x, r3.x -bary.f r3.w, 21, r1.x -add.f r2.x, r2.x, (neg)r3.y +mad.f32 r2.x, r2.y, r2.z, r2.x +bary.f r3.z, 22, r1.x +bary.f r3.w, 3, r1.x +floor.f r4.x, r2.w (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.y, (neg)r3.z -mad.f32 r2.y, r3.w, r3.x, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -absneg.f r2.w, (neg)c11.x -mov.f32f32 r2.y, r2.y -bary.f r3.y, 3, r1.x -mul.f r3.z, c14.x, r2.x -mul.f r2.w, r2.w, c11.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.y, r3.y -bary.f r4.x, 22, r1.x -mov.f32f32 r3.z, r3.z -mul.f r2.w, r2.w, r0.z -mul.f r4.y, c14.x, r0.w -mad.f32 r2.y, r4.x, r3.y, r2.y -add.f r0.y, r0.y, (neg)r3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, r4.y -mov.f32f32 r2.y, r2.y -bary.f r4.y, 12, r1.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, r2.w, r0.z -add.f r1.z, r1.z, (neg)r3.z -mul.f r2.w, r1.w, r4.y -bary.f r3.z, 13, r1.x -add.f r4.z, c17.y, r0.y +(ss)absneg.f r0.w, (neg)c11.x +mad.f32 r2.x, r3.z, r3.w, r2.x +add.f r2.w, r2.w, (neg)r4.x +floor.f r4.x, r3.y +mul.f r0.w, r0.w, c11.x +mov.f32f32 r4.y, r2.x +bary.f r4.z, 12, r1.x +mov.f32f32 r4.w, r2.w +mul.f r0.w, r0.w, r0.z mov.f32f32 r0.z, r0.z -add.f r0.y, c17.x, r0.y -mad.f32 r2.w, r3.w, r3.z, r2.w -mov.f32f32 r4.z, r4.z +mul.f r5.x, r0.x, r4.z +bary.f r5.y, 13, r1.x +mul.f r5.z, c14.x, r4.w +mul.f r0.z, r0.w, r0.z +add.f r0.w, r3.y, (neg)r4.x +mad.f32 r3.y, r2.y, r5.y, r5.x +bary.f r4.x, 14, r1.x +add.f r1.z, r1.z, (neg)r5.z mul.f r0.z, r0.z, c17.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -bary.f r4.w, 14, r1.x -mul.f r4.z, r4.z, c5.z -mov.f32f32 r0.z, r0.z -mul.f r0.y, r0.y, c5.z -mad.f32 r2.w, r4.x, r4.w, r2.w -mov.f32f32 r5.x, r4.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.y, r0.y -mul.f r5.z, r2.w, r2.w -mov.f32f32 r5.w, r5.x -mad.f32 r5.x, r2.y, r2.y, r5.z -mov.f32f32 r1.z, r1.z +mov.f32f32 r5.x, r0.w +mad.f32 r3.y, r3.z, r4.x, r3.y +mov.f32f32 r5.z, r1.z +add.f r1.z, c17.x, r1.z +mul.f r5.w, c14.x, r5.x +mul.f r6.x, r3.y, r3.y +add.f r5.z, c17.y, r5.z +mad.f32 r2.x, r2.x, r4.y, r6.x +bary.f r6.x, 4, r1.x exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.x, r5.x -bary.f r5.z, 4, r1.x -add.f r6.x, c17.y, r1.z -add.f r6.y, c19.y, (neg)r0.z -mov.f32f32 r0.y, r0.y -mul.f r6.z, r1.w, r5.z -bary.f r6.w, 5, r1.x -mov.f32f32 r6.x, r6.x +(ss)mov.f32f32 r6.y, r0.z +mul.f r6.z, r5.z, c5.z +add.f r1.w, r1.w, (neg)r5.w +mul.f r5.z, r0.x, r6.x +bary.f r5.w, 5, r1.x +mov.f32f32 r7.y, r6.z +add.f r6.y, c19.y, (neg)r6.y +mov.f32f32 r6.w, r1.w +mad.f32 r5.z, r2.y, r5.w, r5.z +bary.f r8.x, 6, r1.x mul.f r6.y, r6.y, c11.y -mul.f r0.z, r0.z, c17.x -mad.f32 r6.z, r3.w, r6.w, r6.z -mul.f r7.x, r6.x, c5.w -mov.f32f32 r7.y, r0.y -add.f r0.y, c17.x, r1.z -mov.f32f32 r1.z, r6.z -bary.f r6.z, 6, r1.x -mov.f32f32 r6.x, r7.x +(ss)mul.f r0.z, r0.z, c17.x +add.f r6.w, c17.y, r6.w +mad.f32 r5.z, r3.z, r8.x, r5.z +mul.f r8.y, r1.z, c5.z +add.f r1.z, c17.x, r1.w add.f r0.z, r0.z, r6.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, r4.x, r6.z, r1.z -mov.f32f32 r6.x, r6.x -bary.f r6.y, 10, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.z, r1.z -mul.f r0.y, r0.y, c5.w -add.f r8.x, r6.y, c16.z -mov.f32f32 r8.y, r5.y -mad.f32 r5.x, r1.z, r1.z, r5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r5.y, r8.x -mov.f32f32 r7.z, r0.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r9.x, r4.z -mov.f32f32 r6.y, r5.y -rsq r4.z, r5.x -(ss)mov.f32f32 r4.z, r4.z +mov.f32f32 r1.w, r5.z +mul.f r9.y, r6.w, c5.w +mov.f32f32 r9.x, r8.y +mul.f r8.z, r1.z, c5.w +mad.f32 r1.z, r5.z, r1.w, r2.x +mov.f32f32 r7.z, r9.y max.f r0.z, r0.z, c14.y -(ss)mov.f32f32 r5.x, r7.z -mov.f32f32 r8.z, r7.x -mul.f r1.z, r1.z, r4.z +bary.f r2.x, 10, r1.x +mov.f32f32 r6.w, r8.z +add.f r4.w, c16.x, (neg)r4.w +mul.f r0.x, r0.x, r0.x +rsq r1.z, r1.z +(ss)mov.f32f32 r5.z, r1.z +add.f r8.w, r2.x, c16.z min.f r0.z, r0.z, c17.x -sam.s (f32)(x)r5.y, r5.w, s#4, t#4 -(sy)mov.f32f32 r5.y, r5.y -mov.f32f32 r7.z, r5.x -mov.f32f32 r1.z, r1.z -add.f r5.x, c19.y, (neg)r0.z -(ss)add.f r5.w, c19.y, (neg)r0.z -add.f r6.x, c19.y, (neg)r0.z -mul.f r1.z, r1.z, c15.x -mov.f32f32 r5.y, r5.y -add.f r6.y, c16.x, (neg)r2.x -mul.f r5.x, r5.x, c10.z -mov.f32f32 r1.z, r1.z -mul.f r5.w, r5.w, c10.y -mul.f r6.x, r6.x, c10.x -mov.f32f32 r6.y, r6.y -add.f r7.x, c16.x, (neg)r0.w -mov.f32f32 r7.w, r8.x -mul.f r2.y, r2.y, r4.z -rcp r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mul.f r2.w, r2.w, r4.z -mov.f32f32 r4.z, r7.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r2.w, r2.w -mul.f r7.x, r6.y, r4.z -mov.f32f32 r2.y, r2.y -mul.f r1.w, r1.w, r1.w -absneg.f r2.w, (neg)r2.w -mad.f32 r1.w, r3.w, r3.w, r1.w -mul.f r3.w, r7.x, r5.y -mov.f32f32 r5.y, r8.x -sam.s (f32)(x)r7.x, r7.y, s#4, t#4 -(sy)mov.f32f32 r7.x, r7.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r4.x, r4.x, r1.w -mov.f32f32 r8.w, r5.y -mov.f32f32 r4.x, r2.z -add.f r2.x, r2.x, c17.x +(ss)mul.f r1.z, r3.y, r1.z +mul.f r1.w, r1.w, r5.z +mov.f32f32 r7.w, r8.w +add.f r2.x, c19.y, (neg)r0.z +add.f r3.y, c19.y, (neg)r0.z +mul.f r1.w, r1.w, c15.x +add.f r6.y, c19.y, (neg)r0.z +mov.f32f32 r9.z, r8.w +mov.f32f32 r7.x, r8.w +nop +sam.s (f32)(x)r9.w, r7.y, s#4, t#4 +(ss)mul.f r7.y, r2.x, c10.z +mov.f32f32 r2.x, r4.w +rcp r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +mul.f r4.y, r4.y, r5.z +mad.f32 r0.x, r2.y, r2.y, r0.x +add.f r2.y, c16.x, (neg)r5.x +mad.f32 r0.x, r3.z, r3.z, r0.x +mul.f r3.y, r3.y, c10.y +mul.f r3.z, r6.y, c10.x +mov.f32f32 r5.x, r2.y +absneg.f r1.z, (neg)r1.z +sam.s (f32)(x)r10.x, r9.x, s#4, t#4 +sam.s (f32)(x)r6.y, r6.z, s#4, t#4 +sam.s (f32)(x)r8.y, r8.y, s#4, t#4 +add.f r2.w, r2.w, c17.x add.f r0.w, r0.w, c17.x -bary.f r5.y, 16, r1.x -(ss)mov.f32f32 r7.y, r4.x -sqrt r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -sam.s (f32)(x)r4.x, r8.y, s#4, t#4 -(sy)mov.f32f32 r4.x, r4.x -mul.f r4.z, r2.x, r4.z -mov.f32f32 r9.y, r0.y -add.f r0.y, c14.z, (neg)r1.w -mov.f32f32 r1.w, r8.x -bary.f r7.w, 19, r1.x -mul.f r6.y, r6.y, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r3.w, r4.z, r4.x, r3.w -mov.f32f32 r9.z, r1.w -mov.f32f32 r1.w, r7.w -mul.f r0.y, c12.z, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r5.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r0.y, r0.y -sam.s (f32)(x)r1.w, r9.x, s#4, t#4 -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r8.x, r4.x -bary.f r4.x, 17, r1.x -mul.f r0.y, r0.y, c14.w -mad.f32 r1.w, r6.y, r1.w, r3.w -(ss)nop -sam (f32)(w)r8.y, r7.y, s#2, t#2 -(sy)cmps.f.lt r3.w, r9.x, c15.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r2.x, r0.w -cov.u32f32 r2.x, r3.w -mul.f r2.w, r2.w, r0.y -mul.f r0.y, r2.y, r0.y -mad.f32 r0.w, r0.w, r7.x, r1.w -cmps.f.ne r1.w, r2.x, c14.y -mov.f32f32 r2.x, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, c15.z -mul.f r2.x, r2.x, r1.z -mul.f r0.y, r0.y, r1.z +sqrt r0.x, r0.x +(ss)add.f r0.x, c14.z, (neg)r0.x +mul.f r2.x, r2.x, r5.x +mul.f r2.y, r2.w, r2.y +mul.f r4.w, r4.w, r0.w +mul.f r0.x, c12.z, r0.x +(sy)mul.f r2.x, r2.x, r9.w +bary.f r6.z, 18, r1.x +mul.f r0.w, r2.w, r0.w +mul.f r0.x, r0.x, c14.w +mad.f32 r2.x, r2.y, r10.x, r2.x +bary.f r6.w, 19, r1.x +bary.f r8.z, 16, r1.x +mov.f32f32 r2.y, r0.x +mad.f32 r2.x, r4.w, r6.y, r2.x +mul.f r0.x, r1.z, r0.x +mad.f32 r0.w, r0.w, r8.y, r2.x +mul.f r1.z, r4.y, r2.y +sam (f32)(w)r9.x, r6.z, s#2, t#2 +bary.f r8.w, 17, r1.x +mul.f r0.x, r0.x, r1.w +(sy)cmps.f.lt r1.w, r9.w, c15.y +mul.f r1.z, r1.z, r7.z mul.f r0.w, c17.z, r0.w -mov.f32f32 r1.z, c14.y -mov.f32f32 r2.x, r2.x +mov.f32f32 r2.x, r0.x +cov.u32f32 r1.w, r1.w +mov.f32f32 r2.y, r1.z +cmps.f.lt r2.w, r9.w, c15.y +mov.f32f32 r4.y, r0.w +cmps.f.ne r1.w, r1.w, c14.y +mov.f32f32 r4.w, c14.y +mov.f32f32 r5.x, c14.y +cov.u32f32 r2.w, r2.w +sam (f32)(w)r8.y, r8.z, s#1, t#1 +(sy)cmps.f.lt r5.z, r9.x, c18.x +sel.b32 r1.z, r1.z, r1.w, r4.w +sel.b32 r0.x, r0.x, r1.w, r5.x +(rpt1)nop +add.f r1.w, r6.w, r1.z +add.f r1.z, r6.z, r0.x +cmps.f.ne r0.x, r2.w, c14.y +cov.u32f32 r2.w, r5.z +mov.f32f32 r4.w, c15.z +mov.f32f32 r5.x, c14.y mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -sel.b32 r1.z, r2.y, r1.w, r1.z -mov.f32f32 r1.w, r2.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.w, r0.y -mov.f32f32 r3.w, r0.y -mov.f32f32 r1.w, r1.w -cmps.f.lt r4.z, r9.x, c15.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w +mov.f32f32 r5.z, r2.z +sam (f32)(w)r6.y, r1.z, s#2, t#2 +cmps.f.ne r2.z, r2.w, c14.y +sel.b32 r0.x, r4.w, r0.x, r5.x +mov.f32f32 r2.w, c14.y mov.f32f32 r3.w, r3.w -cov.u32f32 r4.z, r4.z -mov.f32f32 r5.y, r2.x -mov.f32f32 r6.y, r0.y -mov.f32f32 r7.x, r2.x -cmps.f.ne r4.z, r4.z, c14.y -(ss)mov.f32f32 r7.y, c14.y -mov.f32f32 r7.z, c14.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.y, r6.y -sel.b32 r1.w, r1.w, r4.z, r7.y -sel.b32 r2.y, r2.y, r4.z, r7.z -mov.f32f32 r4.z, r7.x -mov.f32f32 r7.x, r0.y -add.f r1.w, r2.z, r1.w -add.f r2.y, r7.w, r2.y -mov.f32f32 r2.z, r2.x -mov.f32f32 r7.y, r0.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r7.w, r2.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r8.y, r7.z -mov.f32f32 r8.z, r7.w -mov.f32f32 r7.y, r7.y -mov.f32f32 r7.z, r2.x -mov.f32f32 r7.w, r0.y -mov.f32f32 r8.w, r2.x -mov.f32f32 r9.x, r0.y -mov.f32f32 r9.y, r2.x -sam (f32)(w)r9.z, r8.y, s#2, t#2 -add.f r1.z, c15.y, (neg)r1.z -mov.f32f32 r7.z, r7.z -mov.f32f32 r7.w, r7.w -(ss)mov.f32f32 r8.z, r8.w -(sy)cmps.f.lt r8.y, r10.y, r1.z -cmps.f.lt r8.w, r10.y, r1.z -mov.f32f32 r9.x, r9.x -mov.f32f32 r9.y, r9.y +mov.f32f32 r4.w, (0.000000) +add.f r0.x, c15.y, (neg)r0.x +sel.b32 r5.x, r2.w, r2.z, r9.x +bary.f r6.y, 2, r1.x +(ss)bary.f r6.z, 1, r1.x +(sy)cmps.f.lt r2.z, r7.x, r0.x +cmps.f.lt r2.w, r7.x, r0.x +bary.f r6.w, 23, r1.x +bary.f r7.x, 24, r1.x +cov.u32f32 r2.z, r2.z +cov.u32f32 r2.w, r2.w +bary.f r7.z, 25, r1.x +bary.f (ei)r1.x, 0, r1.x +cmps.f.ne r1.y, r2.z, c14.y +mov.f32f32 r2.z, c14.y +cmps.f.ne r2.w, r2.w, c14.y +mov.f32f32 r7.w, c14.y +mov.f32f32 r8.y, c15.z +sel.b32 r2.z, r2.x, r1.y, r2.z +mov.f32f32 r8.z, c14.y +sel.b32 r1.y, r2.y, r1.y, r7.w +mov.f32f32 r7.w, c14.y +add.f r1.z, r1.z, r2.z +sel.b32 r2.z, r8.y, r2.w, r8.z +add.f r8.z, r1.w, r1.y +mov.f32f32 r1.y, c14.y +mov.f32f32 r8.y, r1.z +add.f r0.x, r0.x, (neg)r2.z +mov.f32f32 r1.w, r8.z +mov.f32f32 r2.z, c14.y +mov.f32f32 r2.w, c15.z +mov.f32f32 r8.w, c14.y +mov.f32f32 r9.x, c14.y +sam (f32)(w)r9.y, r8.y, s#2, t#2 +(sy)(ss)cmps.f.lt r8.y, r10.x, r0.x +mov.f32f32 r0.x, r0.x +mov.f32f32 r8.z, c14.y +mov.f32f32 r9.y, c15.z cov.u32f32 r8.y, r8.y -cov.u32f32 r8.w, r8.w -mov.f32f32 r9.z, r2.x -nop -mov.f32f32 r8.y, r8.y -cmps.f.ne r8.w, r8.w, c14.y -mov.f32f32 r9.w, c15.z +cmps.f.lt r9.z, r10.x, r0.x +mov.f32f32 r9.w, c14.y mov.f32f32 r10.x, c14.y -cmps.f.ne r10.y, r8.y, c14.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r10.z, c14.y -mov.f32f32 r10.w, c14.y -sel.b32 r8.w, r9.w, r8.w, r10.x -mov.f32f32 r8.y, r4.x -sel.b32 r4.x, r9.z, r10.y, r10.z -sel.b32 r9.x, r9.x, r10.y, r10.w -add.f r1.z, r1.z, (neg)r8.w -bary.f r8.w, 23, r1.x -add.f r1.w, r1.w, r4.x -add.f r2.y, r2.y, r9.x -mov.f32f32 r1.z, r1.z -sam (f32)(w)r9.z, r8.x, s#1, t#1 -(sy)cmps.f.lt r4.x, r10.y, c18.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(ss)mov.f32f32 r8.x, r10.y -cov.u32f32 r4.x, r4.x -mov.f32f32 r8.y, r1.w -mov.f32f32 r9.x, r2.y -mov.f32f32 r8.x, r8.x -mov.f32f32 r8.w, r8.w -mov.f32f32 r9.z, r8.y -mov.f32f32 r9.w, r9.x -cmps.f.ne r4.x, r4.x, c14.y -(rpt1)nop -mov.f32f32 r8.y, c14.y -bary.f r9.x, 24, r1.x -bary.f r10.x, 25, r1.x -sam (f32)(w)r10.y, r9.z, s#2, t#2 -(sy)(ss)cmps.f.lt r9.z, r11.x, r1.z -cmps.f.lt r9.w, r11.x, r1.z -sel.b32 r4.x, r8.y, r4.x, r8.x -mov.f32f32 r8.x, r9.x -cov.u32f32 r8.y, r9.z -cov.u32f32 r9.x, r9.w -mov.f32f32 r9.z, r10.x -mov.f32f32 r9.w, (0.000000) -mov.f32f32 r8.y, r8.y -cmps.f.ne r9.x, r9.x, c14.y -mov.f32f32 r10.x, c15.z -mov.f32f32 r10.y, c14.y cmps.f.ne r8.y, r8.y, c14.y +mov.f32f32 r10.y, c14.y +cov.u32f32 r9.z, r9.z mov.f32f32 r10.z, c14.y mov.f32f32 r10.w, c14.y -sel.b32 r9.x, r10.x, r9.x, r10.y -bary.f r10.x, 2, r1.x -sel.b32 r9.y, r9.y, r8.y, r10.z -sel.b32 r7.w, r7.w, r8.y, r10.w -add.f r1.z, r1.z, (neg)r9.x -bary.f r8.y, 1, r1.x -add.f r1.w, r1.w, r9.y -add.f r2.y, r2.y, r7.w -mov.f32f32 r1.z, r1.z -bary.f (ei)r1.x, 0, r1.x -mov.f32f32 r1.y, r1.w -mov.f32f32 r1.w, r2.y -mov.f32f32 r2.y, c14.y -mov.f32f32 r7.w, c14.y -mov.f32f32 r9.x, r1.y -mov.f32f32 r9.y, r1.w -mov.f32f32 r10.y, c14.y +sel.b32 r10.y, r2.x, r8.y, r10.y +cmps.f.ne r9.z, r9.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r10.z mov.f32f32 r10.z, c15.z -mov.f32f32 r10.w, r9.x -mov.f32f32 r11.x, r9.y -mov.f32f32 r9.x, c14.y -mov.f32f32 r9.y, c14.y -mov.f32f32 r11.y, c14.y -mov.f32f32 r11.z, c15.z -mov.f32f32 r11.w, c14.y -mov.f32f32 r12.x, c14.y -sam (f32)(w)r12.y, r10.w, s#2, t#2 -(sy)(ss)cmps.f.lt r10.w, r13.x, r1.z -cmps.f.lt r11.x, r13.x, r1.z -mov.f32f32 r12.y, c14.y -mov.f32f32 r12.z, c15.z -cov.u32f32 r10.w, r10.w -cov.u32f32 r11.x, r11.x -mov.f32f32 r12.w, c14.y -mov.f32f32 r13.x, c14.y -mov.f32f32 r10.w, r10.w -cmps.f.ne r11.x, r11.x, c14.y -mov.f32f32 r13.y, c15.z -mov.f32f32 r13.z, c14.y -cmps.f.ne r10.w, r10.w, c14.y -mov.f32f32 r13.w, c14.y -mov.f32f32 r14.x, c14.y -sel.b32 r11.x, r13.y, r11.x, r13.z -nop -sel.b32 r8.z, r8.z, r10.w, r13.w -sel.b32 r7.y, r7.y, r10.w, r14.x -add.f r1.z, r1.z, (neg)r11.x -nop -add.f r1.y, r1.y, r8.z -add.f r1.w, r1.w, r7.y -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r10.y +mov.f32f32 r10.y, c14.y +add.f r11.y, r1.w, r8.y +mov.f32f32 r1.w, c15.z +mov.f32f32 r11.x, r1.z +sel.b32 r8.y, r10.z, r9.z, r10.y +mov.f32f32 r9.z, r11.y +mov.f32f32 r10.y, c14.y +mov.f32f32 r10.z, c14.y +mov.f32f32 r11.z, c14.y +mov.f32f32 r11.w, c15.z +sam (f32)(w)r12.x, r11.x, s#2, t#2 +add.f r0.x, r0.x, (neg)r8.y +mov.f32f32 r8.y, c14.y +(ss)mov.f32f32 r11.x, c14.y nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w -(rpt1)nop -mov.f32f32 r7.y, r1.y -mov.f32f32 r8.z, r1.w -(rpt1)nop -mov.f32f32 r10.w, r7.y -mov.f32f32 r11.x, r8.z -(rpt5)nop -sam (f32)(w)r13.y, r10.w, s#2, t#2 -(sy)cmps.f.lt r7.y, r14.x, r1.z -cmps.f.lt r8.z, r14.x, r1.z +(sy)cmps.f.lt r11.y, r12.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -cov.u32f32 r7.y, r7.y -cov.u32f32 r8.z, r8.z +cov.u32f32 r11.y, r11.y +cmps.f.lt r12.x, r12.w, r0.x (rpt1)nop -mov.f32f32 r7.y, r7.y -cmps.f.ne r8.z, r8.z, c14.y +cmps.f.ne r11.y, r11.y, c14.y +cov.u32f32 r12.x, r12.x (rpt1)nop -cmps.f.ne r7.y, r7.y, c14.y -sel.b32 r8.z, r12.z, r8.z, r12.y -(rpt1)nop -sel.b32 r7.z, r7.z, r7.y, r13.x -sel.b32 r7.x, r7.x, r7.y, r12.w -add.f r1.z, r1.z, (neg)r8.z +sel.b32 r11.x, r2.x, r11.y, r11.x +cmps.f.ne r12.x, r12.x, c14.y +sel.b32 r8.y, r2.y, r11.y, r8.y nop -add.f r1.y, r1.y, r7.z -add.f r1.w, r1.w, r7.x -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r11.x +sel.b32 r11.x, r11.w, r12.x, r11.z +add.f r11.z, r9.z, r8.y nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w -(rpt1)nop -mov.f32f32 r7.x, r1.y -mov.f32f32 r7.y, r1.w -(rpt1)nop -(ss)mov.f32f32 r10.w, r7.x -mov.f32f32 r11.x, r7.y -(rpt5)nop -sam (f32)(w)r12.y, r10.w, s#2, t#2 -(sy)cmps.f.lt r7.x, r13.x, r1.z -cmps.f.lt r7.y, r13.x, r1.z -(rpt1)nop -cov.u32f32 r7.x, r7.x -cov.u32f32 r7.y, r7.y +mov.f32f32 r11.y, r1.z +add.f r0.x, r0.x, (neg)r11.x +mov.f32f32 r8.y, r11.z +(rpt3)nop +sam (f32)(w)r11.x, r11.y, s#2, t#2 +(sy)cmps.f.lt r9.z, r11.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r7.x, r7.x -cmps.f.ne r7.y, r7.y, c14.y +cov.u32f32 r9.z, r9.z +cmps.f.lt r11.x, r11.w, r0.x (rpt1)nop -cmps.f.ne r7.x, r7.x, c14.y -sel.b32 r7.y, r11.z, r7.y, r11.y +cmps.f.ne r9.z, r9.z, c14.y +cov.u32f32 r11.x, r11.x (rpt1)nop -sel.b32 r2.z, r2.z, r7.x, r12.x -sel.b32 r6.y, r6.y, r7.x, r11.w -add.f r1.z, r1.z, (neg)r7.y +sel.b32 r10.z, r2.x, r9.z, r10.z +cmps.f.ne r11.x, r11.x, c14.y +sel.b32 r9.z, r2.y, r9.z, r10.y nop -add.f r1.y, r1.y, r2.z -add.f r1.w, r1.w, r6.y -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r10.z +sel.b32 r1.w, r1.w, r11.x, r10.w +add.f r10.z, r8.y, r9.z nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w +mov.f32f32 r10.y, r1.z +add.f r0.x, r0.x, (neg)r1.w +mov.f32f32 r1.w, r10.z +(rpt3)nop +sam (f32)(w)r10.y, r10.y, s#2, t#2 +(sy)cmps.f.lt r8.y, r11.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r2.z, r1.y -mov.f32f32 r6.y, r1.w +cov.u32f32 r8.y, r8.y +cmps.f.lt r9.z, r11.x, r0.x (rpt1)nop -mov.f32f32 r7.x, r2.z -mov.f32f32 r7.y, r6.y -(rpt5)nop -sam (f32)(w)r10.w, r7.x, s#2, t#2 -(sy)cmps.f.lt r2.z, r11.z, r1.z -cmps.f.lt r6.y, r11.z, r1.z +cmps.f.ne r8.y, r8.y, c14.y +cov.u32f32 r9.z, r9.z (rpt1)nop -cov.u32f32 r2.z, r2.z -cov.u32f32 r6.y, r6.y +sel.b32 r10.x, r2.x, r8.y, r10.x +cmps.f.ne r9.z, r9.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r9.w +nop +add.f r1.z, r1.z, r10.x +sel.b32 r8.z, r9.y, r9.z, r8.z +add.f r9.z, r1.w, r8.y +nop +mov.f32f32 r9.y, r1.z +add.f r0.x, r0.x, (neg)r8.z +mov.f32f32 r1.w, r9.z +(rpt3)nop +sam (f32)(w)r9.y, r9.y, s#2, t#2 +(sy)cmps.f.lt r8.y, r10.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r2.z, r2.z -cmps.f.ne r6.y, r6.y, c14.y +cov.u32f32 r8.y, r8.y +cmps.f.lt r8.z, r10.x, r0.x (rpt1)nop -cmps.f.ne r2.z, r2.z, c14.y -sel.b32 r6.y, r10.z, r6.y, r10.y +cmps.f.ne r8.y, r8.y, c14.y +cov.u32f32 r8.z, r8.z (rpt1)nop -sel.b32 r4.z, r4.z, r2.z, r9.y -sel.b32 r2.z, r3.w, r2.z, r9.x -add.f r1.z, r1.z, (neg)r6.y +sel.b32 r9.x, r2.x, r8.y, r9.x +cmps.f.ne r8.z, r8.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r8.w nop -add.f r1.y, r1.y, r4.z -add.f r1.w, r1.w, r2.z -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r9.x +sel.b32 r2.z, r2.w, r8.z, r2.z +add.f r8.z, r1.w, r8.y nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w -(rpt1)nop -mov.f32f32 r2.z, r1.y -mov.f32f32 r3.w, r1.w +mov.f32f32 r8.y, r1.z +add.f r0.x, r0.x, (neg)r2.z +mov.f32f32 r1.w, r8.z +(rpt3)nop +sam (f32)(w)r8.y, r8.y, s#2, t#2 +(sy)cmps.f.lt r2.z, r9.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -(ss)mov.f32f32 r7.x, r2.z -mov.f32f32 r7.y, r3.w -(rpt5)nop -sam (f32)(w)r10.y, r7.x, s#2, t#2 -(sy)cmps.f.lt r2.z, r11.x, r1.z -(rpt2)nop cov.u32f32 r2.z, r2.z (rpt2)nop cmps.f.ne r2.z, r2.z, c14.y (rpt2)nop -sel.b32 r3.w, r5.y, r2.z, r7.w -sel.b32 r2.y, r2.w, r2.z, r2.y +sel.b32 r1.y, r2.x, r2.z, r1.y +sel.b32 r2.z, r2.y, r2.z, r7.w (rpt1)nop -add.f r1.y, r1.y, r3.w -add.f r1.w, r1.w, r2.y +add.f r1.y, r1.z, r1.y +add.f r1.w, r1.w, r2.z (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w +mov.f32f32 r1.z, r1.y +mov.f32f32 r2.z, r1.w (rpt1)nop -add.f r2.y, r1.y, (neg)r2.x -mov.f32f32 r2.z, r1.y -add.f r2.w, r1.w, (neg)r0.y -mov.f32f32 r3.w, r1.w -mov.f32f32 r2.y, r2.y -(ss)mov.f32f32 r7.x, r2.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r7.y, r3.w -mov.f32f32 r2.y, r2.y -(rpt2)nop -mov.f32f32 r7.z, r2.y -mov.f32f32 r2.y, r2.z -sam (f32)(w)r10.y, r7.x, s#2, t#2 -(sy)add.f r2.z, c15.z, r11.x -add.f r1.z, r11.x, (neg)r1.z -nop -mov.f32f32 r7.w, r2.y -mov.f32f32 r2.y, r2.z -mov.f32f32 r1.z, r1.z -(rpt3)nop -sam (f32)(w)r7.x, r7.z, s#2, t#2 -(sy)add.f r2.y, r2.y, (neg)r7.w -(rpt2)nop -mov.f32f32 r2.y, r2.y +(ss)add.f r8.y, r1.z, (neg)r2.x +add.f r8.z, r2.z, (neg)r2.y +(rpt1)nop +sam (f32)(w)r8.w, r1.z, s#2, t#2 +(sy)(ss)add.f r1.z, c15.z, r9.z +add.f r0.x, r9.z, (neg)r0.x +(rpt1)nop +sam (f32)(w)r8.y, r8.y, s#2, t#2 +(sy)add.f r1.z, r1.z, (neg)r9.x (rpt5)nop -rcp r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -(rpt2)nop -mul.f r1.z, r1.z, r2.y -(rpt2)nop -mov.f32f32 r1.z, r1.z +rcp r1.z, r1.z +(ss)mul.f r0.x, r0.x, r1.z (rpt2)nop -mul.f r2.x, r2.x, r1.z -mul.f r0.y, r0.y, r1.z -(rpt1)nop -mov.f32f32 r1.z, r2.x -mov.f32f32 r0.y, r0.y +(ss)mov.f32f32 r1.z, r0.x +mul.f r0.x, r2.x, r0.x (rpt1)nop -add.f r1.y, r1.y, (neg)r1.z -add.f r0.y, r1.w, (neg)r0.y +mul.f r1.z, r2.y, r1.z +add.f r1.w, r1.y, (neg)r0.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y +add.f r2.x, r2.z, (neg)r1.z +mov.f32f32 r1.y, r1.w (rpt1)nop -mov.f32f32 r1.z, r1.y -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r0.y -mov.f32f32 r2.y, r1.z -mov.f32f32 r1.z, r0.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.z, r1.z mov.f32f32 r1.z, r2.x -mov.f32f32 r7.y, r0.y -nop -mov.f32f32 r1.w, r1.y -mov.f32f32 r2.x, r1.z +mov.f32f32 r8.y, r1.y nop -sam (f32)(xyz)r10.y, r2.y, s#2, t#2 -(sy)mad.f32 r0.y, c16.x, r10.z, c16.y -mad.f32 r1.y, c16.x, r10.y, c16.y -sam (f32)(xyzw)r7.x, r7.x, s#0, t#0 -(sy)cmps.f.lt r1.z, r7.w, c15.w -(ss)mov.f32f32 r2.y, r7.w -mov.f32f32 r0.y, r0.y -sam (f32)(xyz)r11.x, r1.w, s#3, t#3 -(sy)(ss)mul.f r1.w, c8.y, r11.y -mul.f r2.x, c8.x, r11.x -cov.u32f32 r1.z, r1.z -absneg.f r0.y, (neg)r0.y -mov.f32f32 r1.y, r1.y -mul.f r2.z, c8.z, r11.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mul.f r2.w, r4.y, r1.y -mul.f r3.z, r3.z, r1.y -mad.f32 r0.x, r0.x, r0.y, r2.w -mad.f32 r2.w, r3.x, r0.y, r3.z -mul.f r1.y, r4.w, r1.y -cmps.f.ne p0.x, r1.z, r9.w -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c16.x, r10.w, c16.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.y, r3.y, r0.y, r1.y -mov.f32f32 r1.y, r2.z -mov.f32f32 r1.z, r1.z +sam (f32)(xyzw)r2.x, r1.w, s#0, t#0 +(sy)cmps.f.lt r0.x, r2.w, c15.w +mov.f32f32 r8.z, r1.z +(rpt1)nop +sam (f32)(xyz)r8.w, r1.y, s#2, t#2 +(sy)(ss)mad.f32 r1.y, c16.x, r8.w, c16.y +cov.u32f32 r0.x, r0.x +mad.f32 r1.z, c16.x, r9.x, c16.y +mad.f32 r1.w, c16.x, r9.y, c16.y +mov.f32f32 r7.w, r1.y +cmps.f.ne p0.x, r0.x, r4.w +absneg.f r0.x, (neg)r1.z +mul.f r1.y, r4.x, r1.y +mul.f r1.z, r4.z, r7.w +mul.f r4.x, r5.y, r7.w +mad.f32 r0.y, r0.y, r0.x, r1.z +mov.f32f32 r1.z, r1.w +mad.f32 r4.x, r5.z, r0.x, r4.x +mad.f32 r0.x, r3.w, r0.x, r1.y kill p0.x -mov.f32f32 r3.w, r2.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, r5.z, r1.z, r0.x -mad.f32 r2.y, r6.w, r1.z, r2.w -mad.f32 r0.y, r6.z, r1.z, r0.y +mad.f32 r0.y, r6.x, r1.z, r0.y +mad.f32 r1.y, r5.w, r1.z, r4.x +mad.f32 r0.x, r8.x, r1.w, r0.x nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.y -mov.f32f32 r0.y, r0.y -nop -mul.f r2.y, r0.x, r0.x -nop -mad.f32 r2.y, r1.z, r1.z, r2.y -(rpt2)nop -mov.f32f32 r2.y, r2.y -nop -mad.f32 r2.y, r0.y, r0.y, r2.y +mov.f32f32 r1.z, r0.y +mov.f32f32 r1.w, r1.y +mov.f32f32 r3.w, r0.x +sam (f32)(xyz)r5.y, r8.y, s#3, t#3 +(sy)mul.f r4.x, c8.z, r5.w +mul.f r0.y, r0.y, r1.z +mul.f r4.z, c8.y, r5.z +mad.f32 r0.y, r1.y, r1.w, r0.y +mul.f r1.y, c8.x, r5.y +mad.f32 r0.y, r3.w, r3.w, r0.y (rpt5)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -(rpt2)nop -mul.f r0.x, r0.x, r2.y -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -nop -mul.f r2.y, r0.x, r0.x -mul.f r2.w, (neg)c9.x, r0.x -mad.f32 r2.y, r1.z, r1.z, r2.y -mad.f32 r2.w, (neg)c9.y, r1.z, r2.w -(rpt1)nop -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -mad.f32 r2.y, r0.y, r0.y, r2.y -mad.f32 r2.w, (neg)c9.z, r0.y, r2.w -(rpt4)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -max.f r2.w, r2.w, c14.y +rsq r0.y, r0.y +(ss)mov.f32f32 r3.w, r0.y +mul.f r0.x, r0.x, r0.y (rpt1)nop -mul.f r0.x, r0.x, r2.y -mov.f32f32 r2.w, r2.w -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r0.x, r0.x -mad.f32 r2.y, c8.z, r2.w, (neg)r10.x -mad.f32 r4.y, c8.y, r2.w, (neg)r8.y -mad.f32 r2.w, c8.x, r2.w, (neg)r1.x -mul.f r0.x, r0.x, r8.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.x, r1.z, r8.x, r0.x -mad.f32 r1.z, c12.x, r2.y, r10.x -mad.f32 r2.y, c12.x, r4.y, r8.y -mad.f32 r1.x, c12.x, r2.w, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.x, r0.y, r9.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -max.f r0.x, c14.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.y, r1.z, r3.w +mul.f r1.z, r1.w, r3.w +mov.f32f32 r1.w, r0.x +nop +mov.f32f32 r3.w, r0.y +mul.f r0.y, (neg)c9.x, r0.y +mov.f32f32 r4.w, r1.z +nop +mul.f r5.y, r3.w, r3.w +mad.f32 r0.y, (neg)c9.y, r1.z, r0.y +mad.f32 r1.z, r4.w, r4.w, r5.y +mad.f32 r0.x, (neg)c9.z, r0.x, r0.y +mad.f32 r0.y, r1.w, r1.w, r1.z (rpt5)nop -log2 r0.x, r0.x -(ss)mul.f r0.x, c12.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +rsq r0.y, r0.y +(ss)mov.f32f32 r1.z, r0.y +max.f r0.x, r0.x, c14.y +(ss)mul.f r0.y, r1.w, r0.y +nop +mul.f r1.w, r3.w, r1.z +mov.f32f32 r3.w, r0.x +mul.f r1.z, r4.w, r1.z +mad.f32 r0.x, c8.x, r0.x, (neg)r1.x +mul.f r1.w, r1.w, r6.w +mad.f32 r4.w, c8.z, r3.w, (neg)r6.y +mad.f32 r1.z, r1.z, r7.x, r1.w +mad.f32 r1.w, c8.y, r3.w, (neg)r6.z +mad.f32 r0.y, r0.y, r7.z, r1.z +mad.f32 r1.z, c12.x, r4.w, r6.y +mad.f32 r0.x, c12.x, r0.x, r1.x +nop +max.f r0.y, c14.y, r0.y +mad.f32 r1.x, c12.x, r1.w, r6.z +(rpt4)nop +log2 r0.y, r0.y +(ss)mul.f r0.y, c12.y, r0.y (rpt5)nop -exp2 r0.x, r0.x -(ss)mul.f r0.y, r1.y, r0.x -mul.f r1.y, r1.w, r0.x -mad.f32 r0.y, r7.z, r1.z, r0.y -mad.f32 r1.y, r7.y, r2.y, r1.y -(ss)mul.f r0.x, r2.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, r7.x, r1.x, r0.x -nop -mul.f r0.y, r0.y, r0.w -mul.f r1.x, r1.y, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.y, c7.z, r7.z, r0.y -mad.f32 r1.x, c7.y, r7.y, r1.x +exp2 r0.y, r0.y +(ss)mul.f r1.w, r4.x, r0.y +mul.f r3.w, r4.z, r0.y +mad.f32 r1.z, r2.z, r1.z, r1.w +mad.f32 r1.x, r2.y, r1.x, r3.w +(ss)mul.f r0.y, r1.y, r0.y +nop +mul.f r1.y, r1.z, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r1.y, c7.z, r2.z, r1.y +mad.f32 r1.x, c7.y, r2.y, r1.x +mad.f32 r0.x, r2.x, r0.x, r0.y +nop +mul.f r0.y, r0.z, r1.y +mul.f r1.x, r0.z, r1.x mul.f r0.x, r0.x, r0.w nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.z, r0.y -mul.f r0.w, r0.z, r0.w -mad.f32 r0.x, c7.x, r7.x, r0.x -nop -add.f r0.y, r0.y, r5.x -add.f r0.w, r0.w, r5.w -mov.f32f32 r0.x, r0.x +add.f r0.y, r0.y, r7.y +add.f r0.w, r1.x, r3.y +mad.f32 r0.x, c7.x, r2.x, r0.x nop -mul.f r0.y, r0.y, r4.x -mul.f r0.w, r0.w, r4.x +mul.f r0.y, r0.y, r5.x +mul.f r0.w, r0.w, r5.x mul.f r0.x, r0.z, r0.x nop -mul.f r0.y, r0.y, c6.z -mul.f r0.z, r0.w, c6.y -add.f r0.x, r0.x, r6.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r4.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c6.x -nop -mov.f32f32 r3.z, r0.y -mov.f32f32 r3.y, r0.z -mov.f32f32 r0.x, r0.x +mul.f r2.z, r0.y, c6.z +mul.f r2.y, r0.w, c6.y +add.f r0.x, r0.x, r3.z (rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r5.x (rpt2)nop -mov.f32f32 r3.x, r0.x +mul.f r2.x, r0.x, c6.x end nop nop -nop -; FRAG: outputs: r3.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.z (5:9,cm=f,il=8,b=1) r63.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r7.x (5:13,cm=f,il=24,b=1) r8.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 802 instructions, 0 half, 65 full -; pos (bary): r1.x -; color: r3.x -; fragcoord: r0.x +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r3.x (5:9,cm=f,il=8,b=1) r63.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.w (5:12,cm=f,il=20,b=1) r6.x (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 532 instructions, 0 half, 13 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm index ad4df45..b2e35b3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm @@ -6,346 +6,250 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in11 -@in(r2.z) in12 -@in(r2.w) in13 -@out(r11.z) out0 -@out(r11.w) out1 -@out(r12.x) out2 -@out(r12.y) out3 -@out(r7.x) out4 -@out(r7.y) out5 -@out(r7.z) out6 -@out(r7.w) out7 -@out(r3.x) out8 -@out(r3.y) out9 -@out(r3.z) out10 -@out(r3.w) out11 -@out(r9.z) out12 -@out(r9.w) out13 -@out(r10.x) out14 -@out(r10.y) out15 -@out(r1.y) out16 -@out(r1.z) out17 -@out(r1.w) out18 -@out(r2.x) out19 -@out(r12.z) out20 -@out(r12.w) out21 -@out(r13.x) out22 -@out(r13.y) out23 -@out(r8.z) out24 -@out(r8.w) out25 -@out(r9.x) out26 -@out(r9.y) out27 -@out(r10.z) out28 -@out(r10.w) out29 -@out(r11.x) out30 -@out(r11.y) out31 -(sy)(ss)floor.f r3.x, c15.z -absneg.f r3.y, (abs)c18.x -absneg.f r3.z, (abs)c18.y -floor.f r3.w, c15.x -add.f r3.x, c15.z, (neg)r3.x -mul.f r4.x, c12.x, r1.z -mul.f r4.y, c12.x, r0.w -add.f r3.w, c15.x, (neg)r3.w -mov.f32f32 r3.x, r3.x -add.f r3.y, r3.y, r3.z -mad.f32 r3.z, c13.x, r1.w, r4.x -mad.f32 r4.x, c13.x, r1.x, r4.y -max.f r3.x, r3.x, c19.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -min.f r3.x, r3.x, c23.y -mul.f r4.y, c17.x, r3.y -mul.f r4.z, c12.z, r0.x -max.f r3.w, r3.w, c19.y -max.f r3.x, r3.x, c19.x -mad.f32 r4.z, c13.z, r0.y, r4.z -mad.f32 r3.z, c14.x, r2.x, r3.z -mad.f32 r4.x, c14.x, r1.y, r4.x -mov.f32f32 r3.x, r3.x -mad.f32 r4.z, c14.z, r0.z, r4.z -min.f r3.w, r3.w, c23.y -mov.f32f32 r3.z, r3.z -mul.f r3.x, c17.x, r3.x -add.f r4.z, r4.z, c15.z -max.f r3.w, r3.w, c19.x -mad.f32 r4.y, c19.w, r4.y, r4.z -mov.f32f32 r3.x, r3.x -mul.f r4.w, c12.y, r0.w -mov.f32f32 r4.x, r4.x -absneg.f r5.x, (neg)c5.x -mad.f32 r3.x, c19.z, r3.x, c15.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.w, r3.w -mul.f r5.y, c12.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r4.y -mad.f32 r5.y, c13.x, r0.y, r5.y -mad.f32 r4.w, c13.y, r1.x, r4.w -mov.f32f32 r3.x, r3.x -add.f r4.y, r4.y, c20.x -mad.f32 r5.y, c14.x, r0.z, r5.y -mad.f32 r4.w, c14.y, r1.y, r4.w -add.f r3.x, r3.x, c20.x -floor.f r5.z, r4.y -add.f r5.y, r5.y, c15.x -mov.f32f32 r4.w, r4.w -floor.f r5.w, r3.x -add.f r4.y, r4.y, (neg)r5.z -mad.f32 r3.w, c17.x, r3.w, r5.y -mul.f r5.z, r4.w, r3.z -add.f r3.x, r3.x, (neg)r5.w -mad.f32 r4.y, c20.y, r4.y, c20.z -add.f r3.w, r3.w, c20.x -mul.f r5.w, c12.y, r1.z -mad.f32 r3.x, c20.y, r3.x, c20.z -absneg.f r4.y, (abs)r4.y -floor.f r6.x, r3.w -mad.f32 r5.w, c13.y, r1.w, r5.w -absneg.f r3.x, (abs)r3.x -mul.f r6.y, c20.y, r4.y -add.f r3.w, r3.w, (neg)r6.x -mul.f r4.y, r4.y, r4.y -mul.f r6.x, c20.y, r3.x -add.f r6.y, c20.w, (neg)r6.y -mad.f32 r3.w, c20.y, r3.w, c20.z -mul.f r3.x, r3.x, r3.x -add.f r6.x, c20.w, (neg)r6.x -mul.f r4.y, r4.y, r6.y -absneg.f r3.w, (abs)r3.w -mov.f32f32 r5.w, r5.w -mul.f r3.x, r3.x, r6.x -mov.f32f32 r4.y, r4.y -mul.f r6.x, r0.x, r0.z -mul.f r6.y, r0.y, c21.x -mov.f32f32 r3.x, r3.x -mul.f r6.z, r0.y, c22.x -mul.f r6.w, c20.y, r3.w -mul.f r6.x, r6.x, r6.y -mul.f r3.w, r3.w, r3.w -mov.f32f32 r6.y, r6.z -add.f r6.z, c20.w, (neg)r6.w -mov.f32f32 r6.x, r6.x -mad.f32 r5.w, c14.y, r2.x, r5.w -mov.f32f32 r6.y, r6.y -mul.f r3.w, r3.w, r6.z -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.w, r5.w -max.f r6.y, r6.y, c19.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r6.x, r6.x -mad.f32 r5.z, r4.x, r5.w, (neg)r5.z -min.f r6.y, r6.y, c23.y -mul.f r6.z, r4.x, r5.x -absneg.f r6.w, (neg)c5.y -mul.f r1.z, c12.z, r1.z -min.f r6.y, r6.y, c19.w -max.f r6.x, r6.x, c19.y -mov.f32f32 r5.z, r5.z -mad.f32 r6.z, r4.w, r6.w, r6.z -mov.f32f32 r6.y, r6.y -min.f r6.x, r6.x, c23.y -mul.f r5.z, r5.z, r2.y -mov.f32f32 r6.z, r6.z -mul.f r3.x, r3.x, r6.y -min.f r6.x, r6.x, c21.y -mov.f32f32 r5.z, r5.z +@in(r8.x) in8 +@in(r8.y) in9 +@in(r8.z) in10 +@in(r8.w) in11 +@in(r5.z) in12 +@in(r5.w) in13 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@out(r7.x) out28 +@out(r7.y) out29 +@out(r7.z) out30 +@out(r7.w) out31 +@const(c19.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c20.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c21.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c22.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r1.z, c15.z +floor.f r1.w, c15.x +absneg.f r2.x, (abs)c18.x +absneg.f r2.y, (abs)c18.y +add.f r1.z, c15.z, (neg)r1.z +add.f r1.w, c15.x, (neg)r1.w +mul.f r2.z, c12.x, r0.w +add.f r2.x, r2.x, r2.y +max.f r1.z, r1.z, c19.y +max.f r1.w, r1.w, c19.y +mad.f32 r2.y, c13.x, r1.x, r2.z +mul.f r2.z, c17.x, r2.x +min.f r1.z, r1.z, c23.y +min.f r1.w, r1.w, c23.y +mul.f r2.w, c12.z, r0.x +mad.f32 r2.y, c14.x, r1.y, r2.y +max.f r1.z, r1.z, c19.x +max.f r1.w, r1.w, c19.x +mad.f32 r2.w, c13.z, r0.y, r2.w +mul.f r3.x, c12.x, r0.x +mul.f r1.z, c17.x, r1.z +mad.f32 r3.x, c13.x, r0.y, r3.x +mad.f32 r2.w, c14.z, r0.z, r2.w +mad.f32 r3.x, c14.x, r0.z, r3.x +mad.f32 r1.z, c19.z, r1.z, c15.x +absneg.f r3.y, (neg)c5.x +add.f r2.w, r2.w, c15.z +mov.f32f32 r2.x, r2.x +add.f r1.z, r1.z, c20.x +add.f r3.x, r3.x, c15.x +mad.f32 r2.z, c19.w, r2.z, r2.w +mad.f32 r1.w, c17.x, r1.w, r3.x +floor.f r3.z, r1.z +mul.f r3.w, r2.y, r3.y +mul.f r4.x, c12.y, r0.w +add.f r1.w, r1.w, c20.x +add.f r1.z, r1.z, (neg)r3.z +add.f r2.z, r2.z, c20.x +mad.f32 r3.z, c13.y, r1.x, r4.x +max.f r2.x, r2.x, c21.z +mad.f32 r1.z, c20.y, r1.z, c20.z +floor.f r4.x, r1.w +floor.f r4.y, r2.z +mad.f32 r3.z, c14.y, r1.y, r3.z +absneg.f r1.z, (abs)r1.z +add.f r1.w, r1.w, (neg)r4.x +add.f r2.z, r2.z, (neg)r4.y +absneg.f r4.x, (neg)c5.y +mul.f r4.y, c20.y, r1.z +mad.f32 r1.w, c20.y, r1.w, c20.z +mad.f32 r2.z, c20.y, r2.z, c20.z +mul.f r1.z, r1.z, r1.z +add.f r4.y, c20.w, (neg)r4.y +absneg.f r1.w, (abs)r1.w +absneg.f r2.z, (abs)r2.z +mad.f32 r3.w, r3.z, r4.x, r3.w +mul.f r1.z, r1.z, r4.y +mul.f r4.y, r0.y, c22.x +mul.f r4.z, c20.y, r1.w +mul.f r4.w, c20.y, r2.z +mul.f r1.w, r1.w, r1.w +max.f r4.y, r4.y, c19.y +add.f r4.z, c20.w, (neg)r4.z +add.f r4.w, c20.w, (neg)r4.w +mul.f r2.z, r2.z, r2.z +min.f r4.y, r4.y, c23.y +mul.f r1.w, r1.w, r4.z +mul.f r4.z, r0.x, r0.z +mul.f r4.w, r2.z, r4.w +min.f r2.z, r4.y, c19.w +mul.f r4.y, r0.y, c21.x mul.f r0.w, c12.z, r0.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.z, r5.z +min.f r5.x, r2.x, c21.w +mul.f r1.z, r1.z, r2.z +mul.f r2.x, r4.z, r4.y mad.f32 r0.w, c13.z, r1.x, r0.w -mad.f32 r1.x, c13.z, r1.w, r1.z -mul.f r1.z, r3.w, r6.x -mul.f r1.w, r4.y, r6.x -max.f r3.y, r3.y, c21.z -mov.f32f32 r3.w, r5.z -mov.f32f32 r1.z, r1.z +mov.f32f32 r1.x, r5.x +mov.f32f32 r4.y, r1.z +max.f r2.z, r2.x, c19.y mad.f32 r0.w, c14.z, r1.y, r0.w -mad.f32 r1.y, c18.x, r3.x, r1.z -mad.f32 r1.z, c18.y, r3.x, r1.z -mov.f32f32 r3.x, r3.y -mov.f32f32 r7.w, r3.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -min.f r3.x, r3.x, c21.w -mov.f32f32 r0.w, r0.w -absneg.f r3.y, (neg)c5.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mul.f r3.w, c12.y, r0.x -mad.f32 r4.y, r0.w, r3.y, r6.z -mad.f32 r1.x, c14.z, r2.x, r1.x -mad.f32 r2.x, r1.w, r3.x, r5.y -mad.f32 r4.z, r1.w, r3.x, r4.z -mad.f32 r3.w, c13.y, r0.y, r3.w -mov.f32f32 r4.y, r4.y -add.f r1.y, r2.x, r1.y -add.f r1.z, r4.z, r1.z -mad.f32 r2.x, c14.y, r0.z, r3.w -max.f r3.w, c19.y, r4.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -add.f r2.x, r2.x, c15.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, r1.y -mul.f r4.z, c8.w, r1.y -mul.f r5.y, c8.z, r1.y -mul.f r5.z, c8.y, r1.y -add.f r4.y, c4.x, (neg)r4.y -mad.f32 r1.w, r1.w, r3.x, r2.x -mul.f r2.x, c8.x, r1.y -mov.f32f32 r3.x, r1.z -mul.f r6.x, r4.y, r4.y -add.f r6.y, c4.y, (neg)r1.w -mad.f32 r6.z, c9.w, r1.w, r4.z -mad.f32 r8.x, c9.z, r1.w, r5.y -mad.f32 r5.z, c9.y, r1.w, r5.z -mad.f32 r6.x, r6.y, r6.y, r6.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r8.x, r8.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.x, r6.x -add.f r3.x, c4.z, (neg)r3.x -mad.f32 r6.z, c10.w, r1.z, r6.z -mad.f32 r8.x, c10.z, r1.z, r8.x -mad.f32 r5.z, c10.y, r1.z, r5.z -mad.f32 r6.x, r3.x, r3.x, r6.x +absneg.f r1.y, (neg)c5.z +mov.f32f32 r2.x, r2.y +min.f r4.z, r2.z, c23.y +mul.f r5.y, c12.z, r8.x +mov.f32f32 r2.z, r0.w +mov.f32f32 r2.y, r3.z +min.f r3.z, r4.z, c21.y +mad.f32 r0.w, r0.w, r1.y, r3.w +mad.f32 r4.z, c13.z, r8.y, r5.y +mul.f r9.x, c12.y, r8.x +mov.f32f32 r3.w, r3.z +mul.f r3.z, r4.w, r3.z +mul.f r4.w, c12.y, r0.x +max.f r9.y, c19.y, r0.w +mul.f r0.w, r1.w, r3.w +mad.f32 r1.w, c13.y, r0.y, r4.w +mad.f32 r3.x, r3.z, r1.x, r3.x +mad.f32 r1.w, c14.y, r0.z, r1.w +mov.f32f32 r3.w, r0.w +mad.f32 r0.w, c18.y, r1.z, r0.w +mad.f32 r1.z, c18.x, r4.y, r3.w +mad.f32 r2.w, r3.z, r5.x, r2.w +add.f r1.w, r1.w, c15.y +mov.f32f32 r4.y, r9.y +add.f r1.z, r3.x, r1.z +add.f r2.w, r2.w, r0.w +mad.f32 r1.x, r3.z, r1.x, r1.w +nop +mov.f32f32 r1.w, r1.z +mul.f r1.z, c0.x, r1.z +mov.f32f32 r4.w, r2.w +add.f r6.y, c4.y, (neg)r1.x +add.f r6.x, c4.x, (neg)r1.w +mul.f r0.w, c8.y, r1.w +mul.f r3.x, c8.x, r1.w +mul.f r7.w, c8.w, r1.w +mul.f r3.z, r6.x, r6.x +mad.f32 r0.w, c9.y, r1.x, r0.w +mad.f32 r3.z, r6.y, r6.y, r3.z +add.f r6.z, c4.z, (neg)r4.w +mad.f32 r0.w, c10.y, r4.w, r0.w +mad.f32 r3.x, c9.x, r1.x, r3.x +mad.f32 r3.w, c9.w, r1.x, r7.w +mad.f32 r3.z, r6.z, r6.z, r3.z mul.f r0.x, c12.w, r0.x -mad.f32 r2.x, c9.x, r1.w, r2.x +mad.f32 r3.x, c10.x, r4.w, r3.x +mad.f32 r3.w, c10.w, r4.w, r3.w +mul.f r7.z, c8.z, r1.w mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r0.y, r3.x -mov.f32f32 r8.y, r4.y +mul.f r0.y, c0.w, r1.w +rsq r3.z, r3.z +(ss)mov.f32f32 r5.x, r3.z +mad.f32 r3.y, r6.x, r3.z, r3.y mad.f32 r0.x, c14.w, r0.z, r0.x -rsq r0.z, r6.x -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r9.x, r0.y -mov.f32f32 r8.z, r8.y -mad.f32 r0.y, r4.y, r0.z, r5.x -mad.f32 r4.y, r6.y, r0.z, r6.w -mad.f32 r0.z, r3.x, r0.z, r3.y -add.f r0.x, r0.x, c15.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.x, r4.y -mov.f32f32 r0.z, r0.z -mad.f32 r3.y, c11.w, r0.x, r6.z -mul.f r4.y, r0.y, r0.y -mad.f32 r5.x, c11.z, r0.x, r8.x -mad.f32 r4.y, r3.x, r3.x, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r5.z, c11.y, r0.x, r5.z -mad.f32 r2.x, c10.x, r1.z, r2.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r4.y, r0.z, r0.z, r4.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.z, r5.z -mad.f32 r2.x, c11.x, r0.x, r2.x -(ss)mul.f r6.x, c0.w, r1.y -mul.f r6.z, c0.z, r1.y -mul.f r6.w, c0.y, r1.y -rsq r4.y, r4.y -(ss)mov.f32f32 r4.y, r4.y -mov.f32f32 r10.y, r3.y -mov.f32f32 r3.y, r5.x -mul.f r5.x, r5.z, c16.y -mul.f r0.z, r0.z, r4.y -mul.f r3.x, r3.x, r4.y -mul.f r0.y, r0.y, r4.y -mov.f32f32 r10.x, r3.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r9.w, r5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r1.x, r7.z +(ss)mad.f32 r3.z, r6.y, r5.x, r4.x +mov.f32f32 r4.x, r3.y +mad.f32 r1.y, r6.z, r5.x, r1.y nop -mov.f32f32 r10.w, r0.z -mov.f32f32 r10.z, r3.x -mov.f32f32 r9.y, r0.y -mov.f32f32 r0.y, r2.x -mad.f32 r0.z, c1.w, r1.w, r6.x -mad.f32 r2.x, c1.z, r1.w, r6.z -mad.f32 r3.x, c1.y, r1.w, r6.w -mul.f r0.y, r0.y, c16.x -mad.f32 r0.z, c2.w, r1.z, r0.z -mad.f32 r2.x, c2.z, r1.z, r2.x -mad.f32 r3.x, c2.y, r1.z, r3.x -mov.f32f32 r9.z, r0.y -mad.f32 r0.y, c3.w, r0.x, r0.z -mad.f32 r0.z, c3.z, r0.x, r2.x -mad.f32 r2.x, c3.y, r0.x, r3.x -mul.f r3.x, c0.x, r1.y -mov.f32f32 r12.y, r0.y -mov.f32f32 r12.x, r0.z -mov.f32f32 r11.w, r2.x -mad.f32 r0.y, c1.x, r1.w, r3.x -mad.f32 r0.z, c7.x, r1.z, c7.y -mad.f32 r0.y, c2.x, r1.z, r0.y -mad.f32 r1.y, c7.x, r1.y, c7.y -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r11.y, r4.z -mov.f32f32 r11.x, r5.y -mov.f32f32 r11.z, r0.x -mov.f32f32 r12.w, r0.y -mov.f32f32 r0.x, r1.y -mov.f32f32 r0.y, r6.y -(rpt1)nop -mov.f32f32 r12.z, r0.x -mov.f32f32 r8.w, r0.y -mul.f r0.x, r3.w, c6.z -mul.f r0.y, r3.w, c6.y -mul.f r0.z, r3.w, c6.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r7.z, r0.x -mov.f32f32 r7.y, r0.y -mov.f32f32 r7.x, r0.z -mul.f r0.x, r4.x, r1.x -mul.f r0.y, r0.w, r5.w -mad.f32 r0.x, r0.w, r3.z, (neg)r0.x -mad.f32 r0.y, r4.w, r1.x, (neg)r0.y -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r5.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r1.x -mul.f r0.x, r0.x, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r0.z, r3.z -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +mov.f32f32 r5.x, r3.z +mul.f r4.x, r4.x, r4.x +mov.f32f32 r5.y, r1.y +add.f r0.x, r0.x, c15.w +mad.f32 r3.z, r3.z, r5.x, r4.x +mad.f32 r0.z, c10.z, r4.w, r0.z +mad.f32 r1.y, r1.y, r5.y, r3.z +mad.f32 r0.w, c11.y, r0.x, r0.w +mad.f32 r3.x, c11.x, r0.x, r3.x +mad.f32 r3.w, c11.w, r0.x, r3.w +mad.f32 r3.z, c11.z, r0.x, r0.z +mad.f32 r0.y, c1.w, r1.x, r0.y +mul.f r0.z, c0.z, r1.w +rsq r1.y, r1.y +(ss)mov.f32f32 r4.x, r1.y +mul.f r6.w, r3.y, r1.y +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r7.y, r5.y, r4.x +mul.f r7.x, r5.x, r4.x +mad.f32 r0.y, c2.w, r4.w, r0.y +mad.f32 r0.z, c1.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c2.z, r4.w, r0.z +(ss)mul.f r1.y, c0.y, r1.w +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c1.y, r1.x, r1.y +mad.f32 r1.x, c1.x, r1.x, r1.z +mad.f32 r0.y, c2.y, r4.w, r0.y +mad.f32 r1.x, c2.x, r2.w, r1.x +mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r0.x, c3.x, r0.x, r1.x +mad.f32 r5.x, c7.x, r1.w, c7.y +mad.f32 r5.y, c7.x, r4.w, c7.y +mul.f r1.z, r4.y, c6.z +mul.f r1.y, r4.y, c6.y +mul.f r1.x, r9.y, c6.x +mad.f32 r1.w, c14.z, r8.z, r4.z +mad.f32 r2.w, c13.y, r8.y, r9.x +mul.f r4.x, c12.x, r8.x +mad.f32 r2.w, c14.y, r8.z, r2.w +mov.f32f32 r4.z, r1.w +mad.f32 r4.x, c13.x, r8.y, r4.x (rpt1)nop -mov.f32f32 r2.x, r0.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r0.x, r4.w -mov.f32f32 r0.y, r4.x +mul.f r4.w, r2.x, r4.z +mad.f32 r8.x, c14.x, r8.z, r4.x +mov.f32f32 r4.y, r2.w (rpt1)nop -mov.f32f32 r3.y, r0.x -mov.f32f32 r3.x, r0.y -mov.f32f32 r0.x, r2.w -mov.f32f32 r0.y, r2.z +mov.f32f32 r4.x, r8.x +mul.f r8.y, r2.z, r4.y +mul.f r8.x, r2.y, r8.x +mad.f32 r1.w, r2.y, r1.w, (neg)r8.y +mad.f32 r4.w, r2.z, r4.x, (neg)r4.w +mad.f32 r8.x, r2.x, r2.w, (neg)r8.x (rpt1)nop -mov.f32f32 r13.y, r0.x -mov.f32f32 r13.x, r0.y +mul.f r4.w, r4.w, r8.w +mul.f r2.w, r1.w, r8.w +mul.f r1.w, r8.x, r8.w end -; VERT: outputs: r11.z (0:0) r7.x (5:9) r3.x (5:10) r9.z (5:11) r1.y (5:12) r12.z (5:13) r8.z (5:14) r10.z (5:15) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=f,il=16,b=0) r2.z (0:0,cm=3,il=20,b=0) -; VERT: 304 instructions, 0 half, 14 full -; pos: r11.z +nop +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=f,il=16,b=0) r5.z (0:0,cm=3,il=20,b=0) +; VERT: 201 instructions, 0 half, 10 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm index 82c1168..55cb4f7 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm @@ -4,990 +4,677 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -(sy)(ss)bary.f r0.x, 7, r1.x -bary.f r0.y, 8, r1.x +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c14.x) 0x3f000000, 0x00000000, 0x43160000, 0x3bdb8bac +@const(c15.x) 0x41800000, 0x3f700000, 0x3d800000, 0x3d4ccccd +@const(c16.x) 0x40000000, 0xbf800000, 0xbb449ba6, 0xbf000000 +@const(c17.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x3fb8aa65 +@const(c18.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)bary.f r0.x, 20, r1.x +bary.f r0.y, 7, r1.x +bary.f r1.z, 8, r1.x add.f r0.w, r0.w, c14.y -bary.f r1.z, 9, r1.x -mov.f32f32 r0.x, r0.x -bary.f r1.w, 20, r1.x -add.f r2.x, r0.y, c16.w -add.f r2.y, r1.z, c16.w -bary.f r2.z, 18, r1.x -mul.f r2.w, r1.w, r0.x -bary.f r3.x, 15, r1.x -floor.f r3.y, r2.x +bary.f r1.w, 9, r1.x +mul.f r2.x, r0.x, r0.y +bary.f r2.y, 21, r1.x +bary.f r2.z, 15, r1.x +add.f r2.w, r1.z, c16.w +add.f r3.y, r1.w, c16.w rcp r0.w, r0.w add.f r0.z, r0.z, c14.y -floor.f r3.z, r2.y -mov.f32f32 r3.x, r3.x -bary.f r3.w, 21, r1.x -add.f r2.x, r2.x, (neg)r3.y +mad.f32 r2.x, r2.y, r2.z, r2.x +bary.f r3.z, 22, r1.x +bary.f r3.w, 3, r1.x +floor.f r4.x, r2.w (ss)mul.f r0.z, r0.z, r0.w -(ss)add.f r0.w, r2.y, (neg)r3.z -mad.f32 r2.y, r3.w, r3.x, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -absneg.f r2.w, (neg)c11.x -mov.f32f32 r2.y, r2.y -bary.f r3.y, 3, r1.x -mul.f r3.z, c14.x, r2.x -mul.f r2.w, r2.w, c11.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.y, r3.y -bary.f r4.x, 22, r1.x -mov.f32f32 r3.z, r3.z -mul.f r2.w, r2.w, r0.z -mul.f r4.y, c14.x, r0.w -mad.f32 r2.y, r4.x, r3.y, r2.y -add.f r0.y, r0.y, (neg)r3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, r4.y -mov.f32f32 r2.y, r2.y -bary.f r4.y, 12, r1.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, r2.w, r0.z -add.f r1.z, r1.z, (neg)r3.z -mul.f r2.w, r1.w, r4.y -bary.f r3.z, 13, r1.x -add.f r4.z, c17.y, r0.y +(ss)absneg.f r0.w, (neg)c11.x +mad.f32 r2.x, r3.z, r3.w, r2.x +add.f r2.w, r2.w, (neg)r4.x +floor.f r4.x, r3.y +mul.f r0.w, r0.w, c11.x +mov.f32f32 r4.y, r2.x +bary.f r4.z, 12, r1.x +mov.f32f32 r4.w, r2.w +mul.f r0.w, r0.w, r0.z mov.f32f32 r0.z, r0.z -add.f r0.y, c17.x, r0.y -mad.f32 r2.w, r3.w, r3.z, r2.w -mov.f32f32 r4.z, r4.z +mul.f r5.x, r0.x, r4.z +bary.f r5.y, 13, r1.x +mul.f r5.z, c14.x, r4.w +mul.f r0.z, r0.w, r0.z +add.f r0.w, r3.y, (neg)r4.x +mad.f32 r3.y, r2.y, r5.y, r5.x +bary.f r4.x, 14, r1.x +add.f r1.z, r1.z, (neg)r5.z mul.f r0.z, r0.z, c17.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -bary.f r4.w, 14, r1.x -mul.f r4.z, r4.z, c5.z -mov.f32f32 r0.z, r0.z -mul.f r0.y, r0.y, c5.z -mad.f32 r2.w, r4.x, r4.w, r2.w -mov.f32f32 r5.x, r4.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.y, r0.y -mul.f r5.z, r2.w, r2.w -mov.f32f32 r5.w, r5.x -mad.f32 r5.x, r2.y, r2.y, r5.z -mov.f32f32 r1.z, r1.z +mov.f32f32 r5.x, r0.w +mad.f32 r3.y, r3.z, r4.x, r3.y +mov.f32f32 r5.z, r1.z +add.f r1.z, c17.x, r1.z +mul.f r5.w, c14.x, r5.x +mul.f r6.x, r3.y, r3.y +add.f r5.z, c17.y, r5.z +mad.f32 r2.x, r2.x, r4.y, r6.x +bary.f r6.x, 4, r1.x exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.x, r5.x -bary.f r5.z, 4, r1.x -add.f r6.x, c17.y, r1.z -add.f r6.y, c19.y, (neg)r0.z -mov.f32f32 r0.y, r0.y -mul.f r6.z, r1.w, r5.z -bary.f r6.w, 5, r1.x -mov.f32f32 r6.x, r6.x +(ss)mov.f32f32 r6.y, r0.z +mul.f r6.z, r5.z, c5.z +add.f r1.w, r1.w, (neg)r5.w +mul.f r5.z, r0.x, r6.x +bary.f r5.w, 5, r1.x +mov.f32f32 r7.y, r6.z +add.f r6.y, c19.y, (neg)r6.y +mov.f32f32 r6.w, r1.w +mad.f32 r5.z, r2.y, r5.w, r5.z +bary.f r8.x, 6, r1.x mul.f r6.y, r6.y, c11.y -mul.f r0.z, r0.z, c17.x -mad.f32 r6.z, r3.w, r6.w, r6.z -mul.f r7.x, r6.x, c5.w -mov.f32f32 r7.y, r0.y -add.f r0.y, c17.x, r1.z -mov.f32f32 r1.z, r6.z -bary.f r6.z, 6, r1.x -mov.f32f32 r6.x, r7.x +(ss)mul.f r0.z, r0.z, c17.x +add.f r6.w, c17.y, r6.w +mad.f32 r5.z, r3.z, r8.x, r5.z +mul.f r8.y, r1.z, c5.z +add.f r1.z, c17.x, r1.w add.f r0.z, r0.z, r6.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, r4.x, r6.z, r1.z -mov.f32f32 r6.x, r6.x -bary.f r6.y, 10, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.z, r1.z -mul.f r0.y, r0.y, c5.w -add.f r8.x, r6.y, c16.z -mov.f32f32 r8.y, r5.y -mad.f32 r5.x, r1.z, r1.z, r5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r5.y, r8.x -mov.f32f32 r7.z, r0.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r9.x, r4.z -mov.f32f32 r6.y, r5.y -rsq r4.z, r5.x -(ss)mov.f32f32 r4.z, r4.z +mov.f32f32 r1.w, r5.z +mul.f r9.y, r6.w, c5.w +mov.f32f32 r9.x, r8.y +mul.f r8.z, r1.z, c5.w +mad.f32 r1.z, r5.z, r1.w, r2.x +mov.f32f32 r7.z, r9.y max.f r0.z, r0.z, c14.y -(ss)mov.f32f32 r5.x, r7.z -mov.f32f32 r8.z, r7.x -mul.f r1.z, r1.z, r4.z +bary.f r2.x, 10, r1.x +mov.f32f32 r6.w, r8.z +add.f r4.w, c16.x, (neg)r4.w +mul.f r0.x, r0.x, r0.x +rsq r1.z, r1.z +(ss)mov.f32f32 r5.z, r1.z +add.f r8.w, r2.x, c16.z min.f r0.z, r0.z, c17.x -sam.s (f32)(x)r5.y, r5.w, s#4, t#4 -(sy)mov.f32f32 r5.y, r5.y -mov.f32f32 r7.z, r5.x -mov.f32f32 r1.z, r1.z -add.f r5.x, c19.y, (neg)r0.z -(ss)add.f r5.w, c19.y, (neg)r0.z -add.f r6.x, c19.y, (neg)r0.z -mul.f r1.z, r1.z, c15.x -mov.f32f32 r5.y, r5.y -add.f r6.y, c16.x, (neg)r2.x -mul.f r5.x, r5.x, c10.z -mov.f32f32 r1.z, r1.z -mul.f r5.w, r5.w, c10.y -mul.f r6.x, r6.x, c10.x -mov.f32f32 r6.y, r6.y -add.f r7.x, c16.x, (neg)r0.w -mov.f32f32 r7.w, r8.x -mul.f r2.y, r2.y, r4.z -rcp r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mul.f r2.w, r2.w, r4.z -mov.f32f32 r4.z, r7.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r2.w, r2.w -mul.f r7.x, r6.y, r4.z -mov.f32f32 r2.y, r2.y -mul.f r1.w, r1.w, r1.w -absneg.f r2.w, (neg)r2.w -mad.f32 r1.w, r3.w, r3.w, r1.w -mul.f r3.w, r7.x, r5.y -mov.f32f32 r5.y, r8.x -sam.s (f32)(x)r7.x, r7.y, s#4, t#4 -(sy)mov.f32f32 r7.x, r7.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r4.x, r4.x, r1.w -mov.f32f32 r8.w, r5.y -mov.f32f32 r4.x, r2.z -add.f r2.x, r2.x, c17.x +(ss)mul.f r1.z, r3.y, r1.z +mul.f r1.w, r1.w, r5.z +mov.f32f32 r7.w, r8.w +add.f r2.x, c19.y, (neg)r0.z +add.f r3.y, c19.y, (neg)r0.z +mul.f r1.w, r1.w, c15.x +add.f r6.y, c19.y, (neg)r0.z +mov.f32f32 r9.z, r8.w +mov.f32f32 r7.x, r8.w +nop +sam.s (f32)(x)r9.w, r7.y, s#4, t#4 +(ss)mul.f r7.y, r2.x, c10.z +mov.f32f32 r2.x, r4.w +rcp r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +mul.f r4.y, r4.y, r5.z +mad.f32 r0.x, r2.y, r2.y, r0.x +add.f r2.y, c16.x, (neg)r5.x +mad.f32 r0.x, r3.z, r3.z, r0.x +mul.f r3.y, r3.y, c10.y +mul.f r3.z, r6.y, c10.x +mov.f32f32 r5.x, r2.y +absneg.f r1.z, (neg)r1.z +sam.s (f32)(x)r10.x, r9.x, s#4, t#4 +sam.s (f32)(x)r6.y, r6.z, s#4, t#4 +sam.s (f32)(x)r8.y, r8.y, s#4, t#4 +add.f r2.w, r2.w, c17.x add.f r0.w, r0.w, c17.x -bary.f r5.y, 16, r1.x -(ss)mov.f32f32 r7.y, r4.x -sqrt r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -sam.s (f32)(x)r4.x, r8.y, s#4, t#4 -(sy)mov.f32f32 r4.x, r4.x -mul.f r4.z, r2.x, r4.z -mov.f32f32 r9.y, r0.y -add.f r0.y, c14.z, (neg)r1.w -mov.f32f32 r1.w, r8.x -bary.f r7.w, 19, r1.x -mul.f r6.y, r6.y, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r3.w, r4.z, r4.x, r3.w -mov.f32f32 r9.z, r1.w -mov.f32f32 r1.w, r7.w -mul.f r0.y, c12.z, r0.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r5.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r0.y, r0.y -sam.s (f32)(x)r1.w, r9.x, s#4, t#4 -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r8.x, r4.x -bary.f r4.x, 17, r1.x -mul.f r0.y, r0.y, c14.w -mad.f32 r1.w, r6.y, r1.w, r3.w -(ss)nop -sam (f32)(w)r8.y, r7.y, s#2, t#2 -(sy)cmps.f.lt r3.w, r9.x, c15.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r2.x, r0.w -cov.u32f32 r2.x, r3.w -mul.f r2.w, r2.w, r0.y -mul.f r0.y, r2.y, r0.y -mad.f32 r0.w, r0.w, r7.x, r1.w -cmps.f.ne r1.w, r2.x, c14.y -mov.f32f32 r2.x, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, c15.z -mul.f r2.x, r2.x, r1.z -mul.f r0.y, r0.y, r1.z +sqrt r0.x, r0.x +(ss)add.f r0.x, c14.z, (neg)r0.x +mul.f r2.x, r2.x, r5.x +mul.f r2.y, r2.w, r2.y +mul.f r4.w, r4.w, r0.w +mul.f r0.x, c12.z, r0.x +(sy)mul.f r2.x, r2.x, r9.w +bary.f r6.z, 18, r1.x +mul.f r0.w, r2.w, r0.w +mul.f r0.x, r0.x, c14.w +mad.f32 r2.x, r2.y, r10.x, r2.x +bary.f r6.w, 19, r1.x +bary.f r8.z, 16, r1.x +mov.f32f32 r2.y, r0.x +mad.f32 r2.x, r4.w, r6.y, r2.x +mul.f r0.x, r1.z, r0.x +mad.f32 r0.w, r0.w, r8.y, r2.x +mul.f r1.z, r4.y, r2.y +sam (f32)(w)r9.x, r6.z, s#2, t#2 +bary.f r8.w, 17, r1.x +mul.f r0.x, r0.x, r1.w +(sy)cmps.f.lt r1.w, r9.w, c15.y +mul.f r1.z, r1.z, r7.z mul.f r0.w, c17.z, r0.w -mov.f32f32 r1.z, c14.y -mov.f32f32 r2.x, r2.x +mov.f32f32 r2.x, r0.x +cov.u32f32 r1.w, r1.w +mov.f32f32 r2.y, r1.z +cmps.f.lt r2.w, r9.w, c15.y +mov.f32f32 r4.y, r0.w +cmps.f.ne r1.w, r1.w, c14.y +mov.f32f32 r4.w, c14.y +mov.f32f32 r5.x, c14.y +cov.u32f32 r2.w, r2.w +sam (f32)(w)r8.y, r8.z, s#1, t#1 +(sy)cmps.f.lt r5.z, r9.x, c18.x +sel.b32 r1.z, r1.z, r1.w, r4.w +sel.b32 r0.x, r0.x, r1.w, r5.x +(rpt1)nop +add.f r1.w, r6.w, r1.z +add.f r1.z, r6.z, r0.x +cmps.f.ne r0.x, r2.w, c14.y +cov.u32f32 r2.w, r5.z +mov.f32f32 r4.w, c15.z +mov.f32f32 r5.x, c14.y mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -sel.b32 r1.z, r2.y, r1.w, r1.z -mov.f32f32 r1.w, r2.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.w, r0.y -mov.f32f32 r3.w, r0.y -mov.f32f32 r1.w, r1.w -cmps.f.lt r4.z, r9.x, c15.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w +mov.f32f32 r5.z, r2.z +sam (f32)(w)r6.y, r1.z, s#2, t#2 +cmps.f.ne r2.z, r2.w, c14.y +sel.b32 r0.x, r4.w, r0.x, r5.x +mov.f32f32 r2.w, c14.y mov.f32f32 r3.w, r3.w -cov.u32f32 r4.z, r4.z -mov.f32f32 r5.y, r2.x -mov.f32f32 r6.y, r0.y -mov.f32f32 r7.x, r2.x -cmps.f.ne r4.z, r4.z, c14.y -(ss)mov.f32f32 r7.y, c14.y -mov.f32f32 r7.z, c14.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.y, r6.y -sel.b32 r1.w, r1.w, r4.z, r7.y -sel.b32 r2.y, r2.y, r4.z, r7.z -mov.f32f32 r4.z, r7.x -mov.f32f32 r7.x, r0.y -add.f r1.w, r2.z, r1.w -add.f r2.y, r7.w, r2.y -mov.f32f32 r2.z, r2.x -mov.f32f32 r7.y, r0.y -mov.f32f32 r7.z, r1.w -mov.f32f32 r7.w, r2.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r8.y, r7.z -mov.f32f32 r8.z, r7.w -mov.f32f32 r7.y, r7.y -mov.f32f32 r7.z, r2.x -mov.f32f32 r7.w, r0.y -mov.f32f32 r8.w, r2.x -mov.f32f32 r9.x, r0.y -mov.f32f32 r9.y, r2.x -sam (f32)(w)r9.z, r8.y, s#2, t#2 -add.f r1.z, c15.y, (neg)r1.z -mov.f32f32 r7.z, r7.z -mov.f32f32 r7.w, r7.w -(ss)mov.f32f32 r8.z, r8.w -(sy)cmps.f.lt r8.y, r10.y, r1.z -cmps.f.lt r8.w, r10.y, r1.z -mov.f32f32 r9.x, r9.x -mov.f32f32 r9.y, r9.y +mov.f32f32 r4.w, (0.000000) +add.f r0.x, c15.y, (neg)r0.x +sel.b32 r5.x, r2.w, r2.z, r9.x +bary.f r6.y, 2, r1.x +(ss)bary.f r6.z, 1, r1.x +(sy)cmps.f.lt r2.z, r7.x, r0.x +cmps.f.lt r2.w, r7.x, r0.x +bary.f r6.w, 23, r1.x +bary.f r7.x, 24, r1.x +cov.u32f32 r2.z, r2.z +cov.u32f32 r2.w, r2.w +bary.f r7.z, 25, r1.x +bary.f (ei)r1.x, 0, r1.x +cmps.f.ne r1.y, r2.z, c14.y +mov.f32f32 r2.z, c14.y +cmps.f.ne r2.w, r2.w, c14.y +mov.f32f32 r7.w, c14.y +mov.f32f32 r8.y, c15.z +sel.b32 r2.z, r2.x, r1.y, r2.z +mov.f32f32 r8.z, c14.y +sel.b32 r1.y, r2.y, r1.y, r7.w +mov.f32f32 r7.w, c14.y +add.f r1.z, r1.z, r2.z +sel.b32 r2.z, r8.y, r2.w, r8.z +add.f r8.z, r1.w, r1.y +mov.f32f32 r1.y, c14.y +mov.f32f32 r8.y, r1.z +add.f r0.x, r0.x, (neg)r2.z +mov.f32f32 r1.w, r8.z +mov.f32f32 r2.z, c14.y +mov.f32f32 r2.w, c15.z +mov.f32f32 r8.w, c14.y +mov.f32f32 r9.x, c14.y +sam (f32)(w)r9.y, r8.y, s#2, t#2 +(sy)(ss)cmps.f.lt r8.y, r10.x, r0.x +mov.f32f32 r0.x, r0.x +mov.f32f32 r8.z, c14.y +mov.f32f32 r9.y, c15.z cov.u32f32 r8.y, r8.y -cov.u32f32 r8.w, r8.w -mov.f32f32 r9.z, r0.y -mov.f32f32 r9.w, r2.x -mov.f32f32 r8.y, r8.y -cmps.f.ne r8.w, r8.w, c14.y -mov.f32f32 r10.x, c15.z -mov.f32f32 r10.y, c14.y +cmps.f.lt r9.z, r10.x, r0.x +mov.f32f32 r9.w, c14.y +mov.f32f32 r10.x, c14.y cmps.f.ne r8.y, r8.y, c14.y -mov.f32f32 r10.z, r2.x -mov.f32f32 r10.w, r0.y -sel.b32 r8.w, r10.x, r8.w, r10.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r10.x, r10.z mov.f32f32 r10.y, c14.y -mov.f32f32 r10.z, r10.w +cov.u32f32 r9.z, r9.z +mov.f32f32 r10.z, c14.y mov.f32f32 r10.w, c14.y -add.f r1.z, r1.z, (neg)r8.w -sel.b32 r8.w, r10.x, r8.y, r10.y -mov.f32f32 r9.w, r9.w -mov.f32f32 r10.x, r0.y -mov.f32f32 r10.y, r2.x -add.f r1.w, r1.w, r8.w -sel.b32 r8.y, r10.z, r8.y, r10.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r8.w, r10.x -mov.f32f32 r1.w, r1.w -add.f r2.y, r2.y, r8.y -mov.f32f32 r10.x, r10.y -mov.f32f32 r8.y, r0.y -mov.f32f32 r10.y, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r10.z, r2.x -mov.f32f32 r10.w, r0.y -mov.f32f32 r11.x, r10.y -mov.f32f32 r10.y, r2.y -mov.f32f32 r11.z, r8.y -mov.f32f32 r10.z, r10.z -mov.f32f32 r10.w, r10.w -mov.f32f32 r11.y, r10.y -mov.f32f32 r8.y, r2.x -mov.f32f32 r10.y, r0.y -mov.f32f32 r11.w, r2.x -mov.f32f32 r12.x, r0.y -mov.f32f32 r12.y, r2.x -mov.f32f32 r12.z, r0.y -sam (f32)(w)r12.w, r11.x, s#2, t#2 -(sy)(ss)cmps.f.lt r11.x, r13.z, r1.z -cmps.f.lt r11.y, r13.z, r1.z -mov.f32f32 r12.w, r8.y -mov.f32f32 r10.y, r10.y -cov.u32f32 r8.y, r11.x -cov.u32f32 r11.x, r11.y -mov.f32f32 r11.y, r11.w -mov.f32f32 r11.w, r12.x -mov.f32f32 r8.y, r8.y -cmps.f.ne r11.x, r11.x, c14.y -mov.f32f32 r12.x, c15.z +sel.b32 r10.y, r2.x, r8.y, r10.y +cmps.f.ne r9.z, r9.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r10.z +mov.f32f32 r10.z, c15.z +add.f r1.z, r1.z, r10.y +mov.f32f32 r10.y, c14.y +add.f r11.y, r1.w, r8.y +mov.f32f32 r1.w, c15.z +mov.f32f32 r11.x, r1.z +sel.b32 r8.y, r10.z, r9.z, r10.y +mov.f32f32 r9.z, r11.y +mov.f32f32 r10.y, c14.y +mov.f32f32 r10.z, c14.y +mov.f32f32 r11.z, c14.y +mov.f32f32 r11.w, c15.z +sam (f32)(w)r12.x, r11.x, s#2, t#2 +add.f r0.x, r0.x, (neg)r8.y +mov.f32f32 r8.y, c14.y +(ss)mov.f32f32 r11.x, c14.y +mov.f32f32 r11.y, c14.y +(sy)cmps.f.lt r12.x, r12.w, r0.x +mov.f32f32 r0.x, r0.x +mov.f32f32 r12.y, c15.z +mov.f32f32 r12.z, c14.y +cov.u32f32 r12.x, r12.x +cmps.f.lt r12.w, r12.w, r0.x mov.f32f32 r13.x, c14.y -cmps.f.ne r8.y, r8.y, c14.y -mov.f32f32 r13.y, r2.x -mov.f32f32 r12.z, r12.z +mov.f32f32 r13.y, c14.y +cmps.f.ne r12.x, r12.x, c14.y mov.f32f32 r13.z, c14.y -sel.b32 r11.x, r12.x, r11.x, r13.x -mov.f32f32 r12.x, r13.y -mov.f32f32 r13.x, c14.y -sel.b32 r12.z, r12.z, r8.y, r13.z -add.f r1.z, r1.z, (neg)r11.x -mov.f32f32 r11.x, r12.y -sel.b32 r8.y, r12.x, r8.y, r13.x -add.f r2.y, r2.y, r12.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r12.x, r2.x -add.f r1.w, r1.w, r8.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r8.y, r4.x -bary.f r4.x, 23, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r12.y, r2.y -mov.f32f32 r12.x, r12.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r12.z, r1.w -mov.f32f32 r13.y, r12.y -sam (f32)(w)r13.z, r8.x, s#1, t#1 -(sy)(ss)cmps.f.lt r8.x, r14.y, c18.x -mov.f32f32 r8.y, r14.y -mov.f32f32 r13.x, r12.z -bary.f r12.y, 24, r1.x -bary.f r12.z, 25, r1.x -mov.f32f32 r13.z, (0.000000) -cov.u32f32 r8.x, r8.x -mov.f32f32 r8.y, r8.y -mov.f32f32 r12.y, r12.y -sam (f32)(w)r13.w, r13.x, s#2, t#2 -(sy)(ss)cmps.f.lt r13.x, r14.z, r1.z -cmps.f.lt r13.y, r14.z, r1.z -cmps.f.ne r8.x, r8.x, c14.y -nop -cov.u32f32 r13.x, r13.x -cov.u32f32 r13.y, r13.y +cov.u32f32 r12.w, r12.w mov.f32f32 r13.w, c14.y -mov.f32f32 r12.z, r12.z -mov.f32f32 r13.x, r13.x -cmps.f.ne r13.y, r13.y, c14.y mov.f32f32 r14.x, c15.z -mov.f32f32 r14.y, c14.y -cmps.f.ne r13.x, r13.x, c14.y -mov.f32f32 r14.z, c14.y -mov.f32f32 r14.w, c14.y -sel.b32 r13.y, r14.x, r13.y, r14.y -sel.b32 r8.x, r13.w, r8.x, r8.y -sel.b32 r8.y, r12.x, r13.x, r14.z -sel.b32 r11.w, r11.w, r13.x, r14.w -add.f r1.z, r1.z, (neg)r13.y -bary.f r12.x, 2, r1.x -add.f r1.w, r1.w, r8.y -add.f r2.y, r2.y, r11.w -mov.f32f32 r1.z, r1.z -bary.f r8.y, 1, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -bary.f (ei)r1.x, 0, r1.x -mov.f32f32 r1.y, c14.y -mov.f32f32 r11.w, r1.w -mov.f32f32 r13.x, r2.y -mov.f32f32 r13.y, c14.y +sel.b32 r13.z, r2.x, r12.x, r13.z +cmps.f.ne r12.w, r12.w, c14.y +sel.b32 r12.x, r2.y, r12.x, r13.w +mov.f32f32 r13.w, c15.z +add.f r1.z, r1.z, r13.z +mov.f32f32 r13.z, c14.y +add.f r14.z, r9.z, r12.x +mov.f32f32 r9.z, c14.y +mov.f32f32 r14.y, r1.z +sel.b32 r12.x, r13.w, r12.w, r13.z +mov.f32f32 r12.w, r14.z +mov.f32f32 r13.z, c14.y mov.f32f32 r13.w, c14.y -mov.f32f32 r14.x, r11.w -mov.f32f32 r14.y, r13.x -mov.f32f32 r11.w, c15.z -mov.f32f32 r13.x, c14.y -mov.f32f32 r14.z, c14.y -mov.f32f32 r14.w, c14.y -mov.f32f32 r15.x, c15.z -mov.f32f32 r15.y, c14.y -sam (f32)(w)r15.z, r14.x, s#2, t#2 -(sy)(ss)cmps.f.lt r14.x, r16.y, r1.z -cmps.f.lt r14.y, r16.y, r1.z +mov.f32f32 r14.w, c15.z +mov.f32f32 r15.x, c14.y +sam (f32)(w)r15.y, r14.y, s#2, t#2 +add.f r0.x, r0.x, (neg)r12.x +mov.f32f32 r12.x, c14.y +(ss)mov.f32f32 r14.y, c14.y +mov.f32f32 r14.z, c15.z +(sy)cmps.f.lt r15.y, r16.x, r0.x +mov.f32f32 r0.x, r0.x mov.f32f32 r15.z, c14.y mov.f32f32 r15.w, c14.y -cov.u32f32 r14.x, r14.x -cov.u32f32 r14.y, r14.y -mov.f32f32 r16.x, c15.z +cov.u32f32 r15.y, r15.y +cmps.f.lt r16.x, r16.x, r0.x mov.f32f32 r16.y, c14.y -mov.f32f32 r14.x, r14.x -cmps.f.ne r14.y, r14.y, c14.y mov.f32f32 r16.z, c15.z +cmps.f.ne r15.y, r15.y, c14.y mov.f32f32 r16.w, c14.y -cmps.f.ne r14.x, r14.x, c14.y +cov.u32f32 r16.x, r16.x mov.f32f32 r17.x, c14.y mov.f32f32 r17.y, c14.y -sel.b32 r14.y, r16.z, r14.y, r16.w -mov.f32f32 r16.z, c14.y -sel.b32 r11.x, r11.x, r14.x, r17.x -sel.b32 r10.y, r10.y, r14.x, r17.y -add.f r1.z, r1.z, (neg)r14.y -mov.f32f32 r14.x, c14.y -add.f r1.w, r1.w, r11.x -add.f r2.y, r2.y, r10.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r10.y, c15.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r11.x, c14.y -mov.f32f32 r14.y, c14.y -mov.f32f32 r16.w, r1.w -mov.f32f32 r17.x, r2.y -mov.f32f32 r17.y, c14.y -mov.f32f32 r17.z, c15.z -mov.f32f32 r17.w, r16.w -mov.f32f32 r18.x, r17.x +sel.b32 r16.w, r2.x, r15.y, r16.w +cmps.f.ne r16.x, r16.x, c14.y +sel.b32 r15.y, r2.y, r15.y, r17.x +mov.f32f32 r17.x, c15.z +add.f r1.z, r1.z, r16.w mov.f32f32 r16.w, c14.y -mov.f32f32 r17.x, c14.y -mov.f32f32 r18.y, c14.y -mov.f32f32 r18.z, c15.z -mov.f32f32 r18.w, c14.y -mov.f32f32 r19.x, c14.y -sam (f32)(w)r19.y, r17.w, s#2, t#2 -(sy)(ss)cmps.f.lt r17.w, r20.x, r1.z -cmps.f.lt r18.x, r20.x, r1.z -mov.f32f32 r19.y, c14.y -mov.f32f32 r19.z, c15.z -cov.u32f32 r17.w, r17.w -cov.u32f32 r18.x, r18.x -mov.f32f32 r19.w, c14.y -mov.f32f32 r20.x, c14.y -mov.f32f32 r17.w, r17.w -cmps.f.ne r18.x, r18.x, c14.y -mov.f32f32 r20.y, c15.z -mov.f32f32 r20.z, c14.y -cmps.f.ne r17.w, r17.w, c14.y -mov.f32f32 r20.w, c14.y -mov.f32f32 r21.x, c14.y -sel.b32 r18.x, r20.y, r18.x, r20.z -mov.f32f32 r20.y, c14.y -sel.b32 r11.y, r11.y, r17.w, r20.w -sel.b32 r10.w, r10.w, r17.w, r21.x -add.f r1.z, r1.z, (neg)r18.x -mov.f32f32 r17.w, c15.z -add.f r1.w, r1.w, r11.y -add.f r2.y, r2.y, r10.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r10.w, c14.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r11.y, c14.y +add.f r17.w, r12.w, r15.y +mov.f32f32 r12.w, c14.y +mov.f32f32 r17.z, r1.z +sel.b32 r15.y, r17.x, r16.x, r16.w +mov.f32f32 r16.x, r17.w +mov.f32f32 r16.w, c14.y +mov.f32f32 r17.x, c15.z mov.f32f32 r18.x, c14.y -mov.f32f32 r20.z, r1.w -mov.f32f32 r20.w, r2.y -mov.f32f32 r21.x, c15.z -mov.f32f32 r21.y, c14.y -mov.f32f32 r21.z, r20.z -mov.f32f32 r21.w, r20.w -mov.f32f32 r20.z, c14.y -(rpt4)nop -sam (f32)(w)r21.z, r21.z, s#2, t#2 -(sy)cmps.f.lt r20.w, r22.y, r1.z -(ss)cmps.f.lt r21.z, r22.y, r1.z -(rpt1)nop -cov.u32f32 r20.w, r20.w -cov.u32f32 r21.z, r21.z +mov.f32f32 r18.y, c14.y +sam (f32)(w)r18.z, r17.z, s#2, t#2 +add.f r0.x, r0.x, (neg)r15.y +(rpt2)nop +(sy)cmps.f.lt r15.y, r19.y, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r20.w, r20.w -cmps.f.ne r21.z, r21.z, c14.y +cov.u32f32 r15.y, r15.y +(ss)cmps.f.lt r17.z, r19.y, r0.x (rpt1)nop -cmps.f.ne r20.w, r20.w, c14.y -sel.b32 r18.x, r21.x, r21.z, r18.x +cmps.f.ne r15.y, r15.y, c14.y +cov.u32f32 r17.z, r17.z (rpt1)nop -sel.b32 r12.w, r12.w, r20.w, r20.z -sel.b32 r11.z, r11.z, r20.w, r21.y -add.f r1.z, r1.z, (neg)r18.x +sel.b32 r17.w, r2.x, r15.y, r18.y +cmps.f.ne r17.z, r17.z, c14.y +sel.b32 r15.y, r2.y, r15.y, r18.x nop -add.f r1.w, r1.w, r12.w -add.f r2.y, r2.y, r11.z -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r17.w +sel.b32 r16.w, r17.x, r17.z, r16.w +add.f r17.w, r16.x, r15.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r11.z, r1.w -mov.f32f32 r12.w, r2.y -(rpt1)nop -mov.f32f32 r20.z, r11.z -mov.f32f32 r20.w, r12.w -(rpt5)nop -sam (f32)(w)r20.z, r20.z, s#2, t#2 -(sy)cmps.f.lt r11.z, r21.y, r1.z -cmps.f.lt r12.w, r21.y, r1.z +mov.f32f32 r17.z, r1.z +add.f r0.x, r0.x, (neg)r16.w +mov.f32f32 r15.y, r17.w +(rpt3)nop +sam (f32)(w)r17.z, r17.z, s#2, t#2 +(sy)cmps.f.lt r16.x, r18.y, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -cov.u32f32 r11.z, r11.z -cov.u32f32 r12.w, r12.w +cov.u32f32 r16.x, r16.x +cmps.f.lt r16.w, r18.y, r0.x (rpt1)nop -mov.f32f32 r11.z, r11.z -cmps.f.ne r12.w, r12.w, c14.y +cmps.f.ne r16.x, r16.x, c14.y +cov.u32f32 r16.w, r16.w (rpt1)nop -cmps.f.ne r11.z, r11.z, c14.y -sel.b32 r12.w, r17.w, r12.w, r20.y -(rpt1)nop -sel.b32 r10.z, r10.z, r11.z, r11.y -sel.b32 r8.w, r8.w, r11.z, r10.w -add.f r1.z, r1.z, (neg)r12.w +sel.b32 r12.w, r2.x, r16.x, r12.w +cmps.f.ne r16.w, r16.w, c14.y +sel.b32 r16.x, r2.y, r16.x, r17.y nop -add.f r1.w, r1.w, r10.z -add.f r2.y, r2.y, r8.w -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r12.w +sel.b32 r12.w, r16.z, r16.w, r16.y +add.f r16.y, r15.y, r16.x nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r8.w, r1.w -mov.f32f32 r10.z, r2.y -(rpt1)nop -mov.f32f32 r11.y, r8.w -mov.f32f32 r11.z, r10.z -(rpt5)nop -sam (f32)(w)r20.y, r11.y, s#2, t#2 -(sy)cmps.f.lt r8.w, r21.x, r1.z -cmps.f.lt r10.z, r21.x, r1.z -(rpt1)nop -cov.u32f32 r8.w, r8.w -cov.u32f32 r10.z, r10.z +mov.f32f32 r16.x, r1.z +add.f r0.x, r0.x, (neg)r12.w +mov.f32f32 r12.w, r16.y +(rpt3)nop +sam (f32)(w)r16.x, r16.x, s#2, t#2 +(sy)cmps.f.lt r15.y, r16.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r8.w, r8.w -cmps.f.ne r10.z, r10.z, c14.y +cov.u32f32 r15.y, r15.y +(ss)cmps.f.lt r16.x, r16.w, r0.x (rpt1)nop -cmps.f.ne r8.w, r8.w, c14.y -sel.b32 r10.z, r19.z, r10.z, r19.y +cmps.f.ne r15.y, r15.y, c14.y +cov.u32f32 r16.x, r16.x (rpt1)nop -sel.b32 r10.x, r10.x, r8.w, r20.x -sel.b32 r8.w, r9.z, r8.w, r19.w -add.f r1.z, r1.z, (neg)r10.z +sel.b32 r15.w, r2.x, r15.y, r15.w +cmps.f.ne r16.x, r16.x, c14.y +sel.b32 r15.y, r2.y, r15.y, r15.z nop -add.f r1.w, r1.w, r10.x -add.f r2.y, r2.y, r8.w -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r15.w +sel.b32 r14.y, r14.z, r16.x, r14.y +add.f r15.z, r12.w, r15.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r8.w, r1.w -mov.f32f32 r9.z, r2.y -(rpt1)nop -mov.f32f32 r10.z, r8.w -mov.f32f32 r10.w, r9.z -(rpt5)nop -sam (f32)(w)r19.y, r10.z, s#2, t#2 -(sy)cmps.f.lt r8.w, r20.x, r1.z -cmps.f.lt r9.z, r20.x, r1.z -(rpt1)nop -cov.u32f32 r8.w, r8.w -cov.u32f32 r9.z, r9.z +mov.f32f32 r15.y, r1.z +add.f r0.x, r0.x, (neg)r14.y +mov.f32f32 r12.w, r15.z +(rpt3)nop +sam (f32)(w)r15.y, r15.y, s#2, t#2 +(sy)cmps.f.lt r14.y, r16.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r8.w, r8.w -cmps.f.ne r9.z, r9.z, c14.y +cov.u32f32 r14.y, r14.y +cmps.f.lt r14.z, r16.x, r0.x (rpt1)nop -cmps.f.ne r8.w, r8.w, c14.y -sel.b32 r9.z, r18.z, r9.z, r18.y +cmps.f.ne r14.y, r14.y, c14.y +cov.u32f32 r14.z, r14.z (rpt1)nop -sel.b32 r9.w, r9.w, r8.w, r19.x -sel.b32 r8.w, r9.x, r8.w, r18.w -add.f r1.z, r1.z, (neg)r9.z +sel.b32 r12.x, r2.x, r14.y, r12.x +cmps.f.ne r14.z, r14.z, c14.y +sel.b32 r14.y, r2.y, r14.y, r15.x nop -add.f r1.w, r1.w, r9.w -add.f r2.y, r2.y, r8.w -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r12.x +sel.b32 r12.x, r14.w, r14.z, r13.w +add.f r14.z, r12.w, r14.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r8.w, r1.w -mov.f32f32 r9.x, r2.y -(rpt1)nop -mov.f32f32 r9.z, r8.w -mov.f32f32 r9.w, r9.x -(rpt5)nop -sam (f32)(w)r17.w, r9.z, s#2, t#2 -(sy)cmps.f.lt r8.w, r18.z, r1.z -cmps.f.lt r9.x, r18.z, r1.z -(rpt1)nop -cov.u32f32 r8.w, r8.w -cov.u32f32 r9.x, r9.x +mov.f32f32 r14.y, r1.z +add.f r0.x, r0.x, (neg)r12.x +mov.f32f32 r12.x, r14.z +(rpt3)nop +sam (f32)(w)r14.y, r14.y, s#2, t#2 +(sy)cmps.f.lt r12.w, r15.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r8.w, r8.w -cmps.f.ne r9.x, r9.x, c14.y +cov.u32f32 r12.w, r12.w +cmps.f.lt r13.w, r15.x, r0.x (rpt1)nop -cmps.f.ne r8.w, r8.w, c14.y -sel.b32 r9.x, r17.z, r9.x, r17.y +cmps.f.ne r12.w, r12.w, c14.y +cov.u32f32 r13.w, r13.w (rpt1)nop -sel.b32 r9.y, r9.y, r8.w, r17.x -sel.b32 r7.w, r7.w, r8.w, r16.w -add.f r1.z, r1.z, (neg)r9.x +sel.b32 r13.z, r2.x, r12.w, r13.z +cmps.f.ne r13.w, r13.w, c14.y +sel.b32 r9.z, r2.y, r12.w, r9.z nop -add.f r1.w, r1.w, r9.y -add.f r2.y, r2.y, r7.w -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r13.z +sel.b32 r12.w, r14.x, r13.w, r13.y +add.f r13.z, r12.x, r9.z nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r7.w, r1.w -mov.f32f32 r8.w, r2.y -(rpt1)nop -mov.f32f32 r9.x, r7.w -mov.f32f32 r9.y, r8.w -(rpt5)nop -(ss)nop -sam (f32)(w)r8.w, r9.x, s#2, t#2 -(sy)cmps.f.lt r7.w, r9.z, r1.z -cmps.f.lt r8.w, r9.z, r1.z -(rpt1)nop -cov.u32f32 r7.w, r7.w -cov.u32f32 r8.w, r8.w +mov.f32f32 r13.y, r1.z +add.f r0.x, r0.x, (neg)r12.w +mov.f32f32 r9.z, r13.z +(rpt3)nop +sam (f32)(w)r13.y, r13.y, s#2, t#2 +(sy)cmps.f.lt r12.x, r14.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r7.w, r7.w -cmps.f.ne r8.w, r8.w, c14.y +cov.u32f32 r12.x, r12.x +cmps.f.lt r12.w, r14.x, r0.x (rpt1)nop -cmps.f.ne r7.w, r7.w, c14.y -sel.b32 r8.w, r10.y, r8.w, r14.x +cmps.f.ne r12.x, r12.x, c14.y +cov.u32f32 r12.w, r12.w (rpt1)nop -sel.b32 r8.z, r8.z, r7.w, r14.y -sel.b32 r7.y, r7.y, r7.w, r11.x -add.f r1.z, r1.z, (neg)r8.w +sel.b32 r13.x, r2.x, r12.x, r13.x +cmps.f.ne r12.w, r12.w, c14.y +sel.b32 r12.x, r2.y, r12.x, r12.z nop -add.f r1.w, r1.w, r8.z -add.f r2.y, r2.y, r7.y -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r13.x +sel.b32 r11.y, r12.y, r12.w, r11.y +add.f r12.y, r9.z, r12.x nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r7.y, r1.w -mov.f32f32 r7.w, r2.y -(rpt1)nop -mov.f32f32 r8.z, r7.y -mov.f32f32 r8.w, r7.w -(rpt5)nop -(ss)nop -sam (f32)(w)r8.z, r8.z, s#2, t#2 -(sy)cmps.f.lt r7.y, r9.y, r1.z -cmps.f.lt r7.w, r9.y, r1.z -(rpt1)nop -cov.u32f32 r7.y, r7.y -cov.u32f32 r7.w, r7.w +mov.f32f32 r12.x, r1.z +add.f r0.x, r0.x, (neg)r11.y +mov.f32f32 r9.z, r12.y +(rpt3)nop +sam (f32)(w)r12.x, r12.x, s#2, t#2 +(sy)cmps.f.lt r11.y, r12.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r7.y, r7.y -cmps.f.ne r7.w, r7.w, c14.y +cov.u32f32 r11.y, r11.y +(ss)cmps.f.lt r12.x, r12.w, r0.x (rpt1)nop -cmps.f.ne r7.y, r7.y, c14.y -sel.b32 r7.w, r16.x, r7.w, r15.w +cmps.f.ne r11.y, r11.y, c14.y +cov.u32f32 r12.x, r12.x (rpt1)nop -sel.b32 r7.z, r7.z, r7.y, r16.z -sel.b32 r7.x, r7.x, r7.y, r16.y -add.f r1.z, r1.z, (neg)r7.w +sel.b32 r11.x, r2.x, r11.y, r11.x +cmps.f.ne r12.x, r12.x, c14.y +sel.b32 r8.y, r2.y, r11.y, r8.y nop -add.f r1.w, r1.w, r7.z -add.f r2.y, r2.y, r7.x -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r11.x +sel.b32 r11.x, r11.w, r12.x, r11.z +add.f r11.z, r9.z, r8.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r7.x, r1.w -mov.f32f32 r7.y, r2.y -(rpt1)nop -mov.f32f32 r7.z, r7.x -mov.f32f32 r7.w, r7.y -(rpt5)nop -sam (f32)(w)r7.x, r7.z, s#2, t#2 -(sy)cmps.f.lt r7.x, r7.w, r1.z -cmps.f.lt r7.y, r7.w, r1.z -(rpt1)nop -cov.u32f32 r7.x, r7.x -cov.u32f32 r7.y, r7.y +mov.f32f32 r11.y, r1.z +add.f r0.x, r0.x, (neg)r11.x +mov.f32f32 r8.y, r11.z +(rpt3)nop +sam (f32)(w)r11.x, r11.y, s#2, t#2 +(sy)cmps.f.lt r9.z, r11.w, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r7.x, r7.x -cmps.f.ne r7.y, r7.y, c14.y +cov.u32f32 r9.z, r9.z +cmps.f.lt r11.x, r11.w, r0.x (rpt1)nop -cmps.f.ne r7.x, r7.x, c14.y -sel.b32 r7.y, r15.x, r7.y, r14.w +cmps.f.ne r9.z, r9.z, c14.y +cov.u32f32 r11.x, r11.x (rpt1)nop -sel.b32 r2.z, r2.z, r7.x, r15.z -sel.b32 r6.y, r6.y, r7.x, r15.y -add.f r1.z, r1.z, (neg)r7.y +sel.b32 r10.z, r2.x, r9.z, r10.z +cmps.f.ne r11.x, r11.x, c14.y +sel.b32 r9.z, r2.y, r9.z, r10.y nop -add.f r1.w, r1.w, r2.z -add.f r2.y, r2.y, r6.y -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r10.z +sel.b32 r1.w, r1.w, r11.x, r10.w +add.f r10.z, r8.y, r9.z nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y +mov.f32f32 r10.y, r1.z +add.f r0.x, r0.x, (neg)r1.w +mov.f32f32 r1.w, r10.z +(rpt3)nop +sam (f32)(w)r10.y, r10.y, s#2, t#2 +(sy)cmps.f.lt r8.y, r11.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r2.z, r1.w -mov.f32f32 r6.y, r2.y +cov.u32f32 r8.y, r8.y +cmps.f.lt r9.z, r11.x, r0.x (rpt1)nop -mov.f32f32 r7.x, r2.z -mov.f32f32 r7.y, r6.y -(rpt5)nop -(ss)nop -sam (f32)(w)r7.x, r7.x, s#2, t#2 -(sy)cmps.f.lt r2.z, r7.w, r1.z -cmps.f.lt r6.y, r7.w, r1.z +cmps.f.ne r8.y, r8.y, c14.y +cov.u32f32 r9.z, r9.z (rpt1)nop -cov.u32f32 r2.z, r2.z -cov.u32f32 r6.y, r6.y +sel.b32 r10.x, r2.x, r8.y, r10.x +cmps.f.ne r9.z, r9.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r9.w +nop +add.f r1.z, r1.z, r10.x +sel.b32 r8.z, r9.y, r9.z, r8.z +add.f r9.z, r1.w, r8.y +nop +mov.f32f32 r9.y, r1.z +add.f r0.x, r0.x, (neg)r8.z +mov.f32f32 r1.w, r9.z +(rpt3)nop +sam (f32)(w)r9.y, r9.y, s#2, t#2 +(sy)cmps.f.lt r8.y, r10.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -mov.f32f32 r2.z, r2.z -cmps.f.ne r6.y, r6.y, c14.y +cov.u32f32 r8.y, r8.y +cmps.f.lt r8.z, r10.x, r0.x (rpt1)nop -cmps.f.ne r2.z, r2.z, c14.y -sel.b32 r6.y, r11.w, r6.y, r13.w +cmps.f.ne r8.y, r8.y, c14.y +cov.u32f32 r8.z, r8.z (rpt1)nop -sel.b32 r4.z, r4.z, r2.z, r14.z -sel.b32 r2.z, r3.w, r2.z, r13.x -add.f r1.z, r1.z, (neg)r6.y +sel.b32 r9.x, r2.x, r8.y, r9.x +cmps.f.ne r8.z, r8.z, c14.y +sel.b32 r8.y, r2.y, r8.y, r8.w nop -add.f r1.w, r1.w, r4.z -add.f r2.y, r2.y, r2.z -mov.f32f32 r1.z, r1.z +add.f r1.z, r1.z, r9.x +sel.b32 r2.z, r2.w, r8.z, r2.z +add.f r8.z, r1.w, r8.y nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -(rpt1)nop -mov.f32f32 r2.z, r1.w -mov.f32f32 r3.w, r2.y +mov.f32f32 r8.y, r1.z +add.f r0.x, r0.x, (neg)r2.z +mov.f32f32 r1.w, r8.z +(rpt3)nop +sam (f32)(w)r8.y, r8.y, s#2, t#2 +(sy)cmps.f.lt r2.z, r9.x, r0.x +mov.f32f32 r0.x, r0.x (rpt1)nop -(ss)mov.f32f32 r7.x, r2.z -mov.f32f32 r7.y, r3.w -(rpt5)nop -sam (f32)(w)r7.x, r7.x, s#2, t#2 -(sy)cmps.f.lt r2.z, r7.w, r1.z -(rpt2)nop cov.u32f32 r2.z, r2.z (rpt2)nop cmps.f.ne r2.z, r2.z, c14.y (rpt2)nop -sel.b32 r3.w, r5.y, r2.z, r13.y -sel.b32 r1.y, r2.w, r2.z, r1.y +sel.b32 r1.y, r2.x, r2.z, r1.y +sel.b32 r2.z, r2.y, r2.z, r7.w (rpt1)nop -add.f r1.w, r1.w, r3.w -add.f r1.y, r2.y, r1.y -(rpt1)nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.y, r1.y +add.f r1.y, r1.z, r1.y +add.f r1.w, r1.w, r2.z (rpt1)nop -add.f r2.y, r1.w, (neg)r2.x +mov.f32f32 r1.z, r1.y mov.f32f32 r2.z, r1.w -add.f r2.w, r1.y, (neg)r0.y -mov.f32f32 r3.w, r1.y -mov.f32f32 r2.y, r2.y -(ss)mov.f32f32 r7.x, r2.z -mov.f32f32 r2.z, r2.w -mov.f32f32 r7.y, r3.w -mov.f32f32 r2.y, r2.y -(rpt2)nop -mov.f32f32 r7.z, r2.y -mov.f32f32 r2.y, r2.z -sam (f32)(w)r8.z, r7.x, s#2, t#2 -(sy)add.f r2.z, c15.z, r9.y -add.f r1.z, r9.y, (neg)r1.z -nop -mov.f32f32 r7.w, r2.y -mov.f32f32 r2.y, r2.z -mov.f32f32 r1.z, r1.z -(rpt3)nop -sam (f32)(w)r7.x, r7.z, s#2, t#2 -(sy)add.f r2.y, r2.y, (neg)r7.w -(rpt2)nop -mov.f32f32 r2.y, r2.y +(rpt1)nop +(ss)add.f r8.y, r1.z, (neg)r2.x +add.f r8.z, r2.z, (neg)r2.y +(rpt1)nop +sam (f32)(w)r8.w, r1.z, s#2, t#2 +(sy)(ss)add.f r1.z, c15.z, r9.z +add.f r0.x, r9.z, (neg)r0.x +(rpt1)nop +sam (f32)(w)r8.y, r8.y, s#2, t#2 +(sy)add.f r1.z, r1.z, (neg)r9.x (rpt5)nop -rcp r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -(rpt2)nop -mul.f r1.z, r1.z, r2.y -(rpt2)nop -mov.f32f32 r1.z, r1.z +rcp r1.z, r1.z +(ss)mul.f r0.x, r0.x, r1.z (rpt2)nop -mul.f r2.x, r2.x, r1.z -mul.f r0.y, r0.y, r1.z +(ss)mov.f32f32 r1.z, r0.x +mul.f r0.x, r2.x, r0.x (rpt1)nop -mov.f32f32 r1.z, r2.x -mov.f32f32 r0.y, r0.y -(rpt1)nop -add.f r1.z, r1.w, (neg)r1.z -add.f r0.y, r1.y, (neg)r0.y +mul.f r1.z, r2.y, r1.z +add.f r1.w, r1.y, (neg)r0.x (rpt1)nop -mov.f32f32 r1.y, r1.z -mov.f32f32 r0.y, r0.y +add.f r2.x, r2.z, (neg)r1.z +mov.f32f32 r1.y, r1.w (rpt1)nop -mov.f32f32 r1.z, r1.y -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r0.y -mov.f32f32 r2.y, r1.z -mov.f32f32 r1.z, r0.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.z, r1.z mov.f32f32 r1.z, r2.x -mov.f32f32 r7.y, r0.y +mov.f32f32 r8.y, r1.y nop -mov.f32f32 r1.w, r1.y -mov.f32f32 r2.x, r1.z -nop -sam (f32)(xyz)r8.z, r2.y, s#2, t#2 -(sy)mad.f32 r0.y, c16.x, r8.w, c16.y -mad.f32 r1.y, c16.x, r8.z, c16.y -sam (f32)(xyzw)r7.x, r7.x, s#0, t#0 -(sy)cmps.f.lt r1.z, r7.w, c15.w -(ss)mov.f32f32 r2.y, r7.w -mov.f32f32 r0.y, r0.y -sam (f32)(xyz)r9.y, r1.w, s#3, t#3 -(sy)(ss)mul.f r1.w, c8.y, r9.z -mul.f r2.x, c8.x, r9.y -cov.u32f32 r1.z, r1.z -absneg.f r0.y, (neg)r0.y -mov.f32f32 r1.y, r1.y -mul.f r2.z, c8.z, r9.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mul.f r2.w, r4.y, r1.y -mul.f r3.z, r3.z, r1.y -mad.f32 r0.x, r0.x, r0.y, r2.w -mad.f32 r2.w, r3.x, r0.y, r3.z -mul.f r1.y, r4.w, r1.y -cmps.f.ne p0.x, r1.z, r13.z -mov.f32f32 r0.x, r0.x +sam (f32)(xyzw)r2.x, r1.w, s#0, t#0 +(sy)cmps.f.lt r0.x, r2.w, c15.w +mov.f32f32 r8.z, r1.z +(rpt1)nop +sam (f32)(xyz)r8.w, r1.y, s#2, t#2 +(sy)(ss)mad.f32 r1.y, c16.x, r8.w, c16.y +cov.u32f32 r0.x, r0.x mad.f32 r1.z, c16.x, r9.x, c16.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.y, r3.y, r0.y, r1.y -mov.f32f32 r1.y, r2.z -mov.f32f32 r1.z, r1.z +mad.f32 r1.w, c16.x, r9.y, c16.y +mov.f32f32 r7.w, r1.y +cmps.f.ne p0.x, r0.x, r4.w +absneg.f r0.x, (neg)r1.z +mul.f r1.y, r4.x, r1.y +mul.f r1.z, r4.z, r7.w +mul.f r4.x, r5.y, r7.w +mad.f32 r0.y, r0.y, r0.x, r1.z +mov.f32f32 r1.z, r1.w +mad.f32 r4.x, r5.z, r0.x, r4.x +mad.f32 r0.x, r3.w, r0.x, r1.y kill p0.x -mov.f32f32 r3.w, r2.y -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, r5.z, r1.z, r0.x -mad.f32 r2.y, r6.w, r1.z, r2.w -mad.f32 r0.y, r6.z, r1.z, r0.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r6.x, r1.z, r0.y +mad.f32 r1.y, r5.w, r1.z, r4.x +mad.f32 r0.x, r8.x, r1.w, r0.x nop -mul.f r2.y, r0.x, r0.x -nop -mad.f32 r2.y, r1.z, r1.z, r2.y -(rpt2)nop -mov.f32f32 r2.y, r2.y -nop -mad.f32 r2.y, r0.y, r0.y, r2.y +mov.f32f32 r1.z, r0.y +mov.f32f32 r1.w, r1.y +mov.f32f32 r3.w, r0.x +sam (f32)(xyz)r5.y, r8.y, s#3, t#3 +(sy)mul.f r4.x, c8.z, r5.w +mul.f r0.y, r0.y, r1.z +mul.f r4.z, c8.y, r5.z +mad.f32 r0.y, r1.y, r1.w, r0.y +mul.f r1.y, c8.x, r5.y +mad.f32 r0.y, r3.w, r3.w, r0.y (rpt5)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -(rpt2)nop -mul.f r0.x, r0.x, r2.y -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -nop -mul.f r2.y, r0.x, r0.x -mul.f r2.w, (neg)c9.x, r0.x -mad.f32 r2.y, r1.z, r1.z, r2.y -mad.f32 r2.w, (neg)c9.y, r1.z, r2.w +rsq r0.y, r0.y +(ss)mov.f32f32 r3.w, r0.y +mul.f r0.x, r0.x, r0.y (rpt1)nop -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -mad.f32 r2.y, r0.y, r0.y, r2.y -mad.f32 r2.w, (neg)c9.z, r0.y, r2.w -(rpt4)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -max.f r2.w, r2.w, c14.y -(rpt1)nop -mul.f r0.x, r0.x, r2.y -mov.f32f32 r2.w, r2.w -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r0.x, r0.x -mad.f32 r2.y, c8.z, r2.w, (neg)r12.x -mad.f32 r4.y, c8.y, r2.w, (neg)r8.y -mad.f32 r2.w, c8.x, r2.w, (neg)r1.x -mul.f r0.x, r0.x, r4.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r2.w, r2.w -mad.f32 r0.x, r1.z, r12.y, r0.x -mad.f32 r1.z, c12.x, r2.y, r12.x -mad.f32 r2.y, c12.x, r4.x, r8.y -mad.f32 r1.x, c12.x, r2.w, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.x, r0.y, r12.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -max.f r0.x, c14.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.y, r1.z, r3.w +mul.f r1.z, r1.w, r3.w +mov.f32f32 r1.w, r0.x +nop +mov.f32f32 r3.w, r0.y +mul.f r0.y, (neg)c9.x, r0.y +mov.f32f32 r4.w, r1.z +nop +mul.f r5.y, r3.w, r3.w +mad.f32 r0.y, (neg)c9.y, r1.z, r0.y +mad.f32 r1.z, r4.w, r4.w, r5.y +mad.f32 r0.x, (neg)c9.z, r0.x, r0.y +mad.f32 r0.y, r1.w, r1.w, r1.z (rpt5)nop -log2 r0.x, r0.x -(ss)mul.f r0.x, c12.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +rsq r0.y, r0.y +(ss)mov.f32f32 r1.z, r0.y +max.f r0.x, r0.x, c14.y +(ss)mul.f r0.y, r1.w, r0.y +nop +mul.f r1.w, r3.w, r1.z +mov.f32f32 r3.w, r0.x +mul.f r1.z, r4.w, r1.z +mad.f32 r0.x, c8.x, r0.x, (neg)r1.x +mul.f r1.w, r1.w, r6.w +mad.f32 r4.w, c8.z, r3.w, (neg)r6.y +mad.f32 r1.z, r1.z, r7.x, r1.w +mad.f32 r1.w, c8.y, r3.w, (neg)r6.z +mad.f32 r0.y, r0.y, r7.z, r1.z +mad.f32 r1.z, c12.x, r4.w, r6.y +mad.f32 r0.x, c12.x, r0.x, r1.x +nop +max.f r0.y, c14.y, r0.y +mad.f32 r1.x, c12.x, r1.w, r6.z +(rpt4)nop +log2 r0.y, r0.y +(ss)mul.f r0.y, c12.y, r0.y (rpt5)nop -exp2 r0.x, r0.x -(ss)mul.f r0.y, r1.y, r0.x -mul.f r1.y, r1.w, r0.x -mad.f32 r0.y, r7.z, r1.z, r0.y -mad.f32 r1.y, r7.y, r2.y, r1.y -(ss)mul.f r0.x, r2.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, r7.x, r1.x, r0.x -nop -mul.f r0.y, r0.y, r0.w -mul.f r1.x, r1.y, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.y, c7.z, r7.z, r0.y -mad.f32 r1.x, c7.y, r7.y, r1.x +exp2 r0.y, r0.y +(ss)mul.f r1.w, r4.x, r0.y +mul.f r3.w, r4.z, r0.y +mad.f32 r1.z, r2.z, r1.z, r1.w +mad.f32 r1.x, r2.y, r1.x, r3.w +(ss)mul.f r0.y, r1.y, r0.y +nop +mul.f r1.y, r1.z, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r1.y, c7.z, r2.z, r1.y +mad.f32 r1.x, c7.y, r2.y, r1.x +mad.f32 r0.x, r2.x, r0.x, r0.y +nop +mul.f r0.y, r0.z, r1.y +mul.f r1.x, r0.z, r1.x mul.f r0.x, r0.x, r0.w nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.z, r0.y -mul.f r0.w, r0.z, r0.w -mad.f32 r0.x, c7.x, r7.x, r0.x +add.f r0.y, r0.y, r7.y +add.f r0.w, r1.x, r3.y +mad.f32 r0.x, c7.x, r2.x, r0.x nop -add.f r0.y, r0.y, r5.x -add.f r0.w, r0.w, r5.w -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.y, r8.x -mul.f r0.w, r0.w, r8.x +mul.f r0.y, r0.y, r5.x +mul.f r0.w, r0.w, r5.x mul.f r0.x, r0.z, r0.x nop -mul.f r0.y, r0.y, c6.z -mul.f r0.z, r0.w, c6.y -add.f r0.x, r0.x, r6.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r8.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, c6.x -nop -mov.f32f32 r3.z, r0.y -mov.f32f32 r3.y, r0.z -mov.f32f32 r0.x, r0.x +mul.f r2.z, r0.y, c6.z +mul.f r2.y, r0.w, c6.y +add.f r0.x, r0.x, r3.z (rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.x, r0.x, r5.x (rpt2)nop -mov.f32f32 r3.x, r0.x +mul.f r2.x, r0.x, c6.x end -; FRAG: outputs: r3.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.z (5:9,cm=f,il=8,b=1) r63.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r7.x (5:13,cm=f,il=24,b=1) r3.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) -; FRAG: 1147 instructions, 0 half, 65 full -; pos (bary): r1.x -; color: r3.x -; fragcoord: r0.x +nop +nop +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r3.x (5:9,cm=f,il=8,b=1) r63.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.w (5:12,cm=f,il=20,b=1) r6.x (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1) +; FRAG: 766 instructions, 0 half, 20 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm index ad4df45..b2e35b3 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm @@ -6,346 +6,250 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in11 -@in(r2.z) in12 -@in(r2.w) in13 -@out(r11.z) out0 -@out(r11.w) out1 -@out(r12.x) out2 -@out(r12.y) out3 -@out(r7.x) out4 -@out(r7.y) out5 -@out(r7.z) out6 -@out(r7.w) out7 -@out(r3.x) out8 -@out(r3.y) out9 -@out(r3.z) out10 -@out(r3.w) out11 -@out(r9.z) out12 -@out(r9.w) out13 -@out(r10.x) out14 -@out(r10.y) out15 -@out(r1.y) out16 -@out(r1.z) out17 -@out(r1.w) out18 -@out(r2.x) out19 -@out(r12.z) out20 -@out(r12.w) out21 -@out(r13.x) out22 -@out(r13.y) out23 -@out(r8.z) out24 -@out(r8.w) out25 -@out(r9.x) out26 -@out(r9.y) out27 -@out(r10.z) out28 -@out(r10.w) out29 -@out(r11.x) out30 -@out(r11.y) out31 -(sy)(ss)floor.f r3.x, c15.z -absneg.f r3.y, (abs)c18.x -absneg.f r3.z, (abs)c18.y -floor.f r3.w, c15.x -add.f r3.x, c15.z, (neg)r3.x -mul.f r4.x, c12.x, r1.z -mul.f r4.y, c12.x, r0.w -add.f r3.w, c15.x, (neg)r3.w -mov.f32f32 r3.x, r3.x -add.f r3.y, r3.y, r3.z -mad.f32 r3.z, c13.x, r1.w, r4.x -mad.f32 r4.x, c13.x, r1.x, r4.y -max.f r3.x, r3.x, c19.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -min.f r3.x, r3.x, c23.y -mul.f r4.y, c17.x, r3.y -mul.f r4.z, c12.z, r0.x -max.f r3.w, r3.w, c19.y -max.f r3.x, r3.x, c19.x -mad.f32 r4.z, c13.z, r0.y, r4.z -mad.f32 r3.z, c14.x, r2.x, r3.z -mad.f32 r4.x, c14.x, r1.y, r4.x -mov.f32f32 r3.x, r3.x -mad.f32 r4.z, c14.z, r0.z, r4.z -min.f r3.w, r3.w, c23.y -mov.f32f32 r3.z, r3.z -mul.f r3.x, c17.x, r3.x -add.f r4.z, r4.z, c15.z -max.f r3.w, r3.w, c19.x -mad.f32 r4.y, c19.w, r4.y, r4.z -mov.f32f32 r3.x, r3.x -mul.f r4.w, c12.y, r0.w -mov.f32f32 r4.x, r4.x -absneg.f r5.x, (neg)c5.x -mad.f32 r3.x, c19.z, r3.x, c15.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.w, r3.w -mul.f r5.y, c12.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r4.y -mad.f32 r5.y, c13.x, r0.y, r5.y -mad.f32 r4.w, c13.y, r1.x, r4.w -mov.f32f32 r3.x, r3.x -add.f r4.y, r4.y, c20.x -mad.f32 r5.y, c14.x, r0.z, r5.y -mad.f32 r4.w, c14.y, r1.y, r4.w -add.f r3.x, r3.x, c20.x -floor.f r5.z, r4.y -add.f r5.y, r5.y, c15.x -mov.f32f32 r4.w, r4.w -floor.f r5.w, r3.x -add.f r4.y, r4.y, (neg)r5.z -mad.f32 r3.w, c17.x, r3.w, r5.y -mul.f r5.z, r4.w, r3.z -add.f r3.x, r3.x, (neg)r5.w -mad.f32 r4.y, c20.y, r4.y, c20.z -add.f r3.w, r3.w, c20.x -mul.f r5.w, c12.y, r1.z -mad.f32 r3.x, c20.y, r3.x, c20.z -absneg.f r4.y, (abs)r4.y -floor.f r6.x, r3.w -mad.f32 r5.w, c13.y, r1.w, r5.w -absneg.f r3.x, (abs)r3.x -mul.f r6.y, c20.y, r4.y -add.f r3.w, r3.w, (neg)r6.x -mul.f r4.y, r4.y, r4.y -mul.f r6.x, c20.y, r3.x -add.f r6.y, c20.w, (neg)r6.y -mad.f32 r3.w, c20.y, r3.w, c20.z -mul.f r3.x, r3.x, r3.x -add.f r6.x, c20.w, (neg)r6.x -mul.f r4.y, r4.y, r6.y -absneg.f r3.w, (abs)r3.w -mov.f32f32 r5.w, r5.w -mul.f r3.x, r3.x, r6.x -mov.f32f32 r4.y, r4.y -mul.f r6.x, r0.x, r0.z -mul.f r6.y, r0.y, c21.x -mov.f32f32 r3.x, r3.x -mul.f r6.z, r0.y, c22.x -mul.f r6.w, c20.y, r3.w -mul.f r6.x, r6.x, r6.y -mul.f r3.w, r3.w, r3.w -mov.f32f32 r6.y, r6.z -add.f r6.z, c20.w, (neg)r6.w -mov.f32f32 r6.x, r6.x -mad.f32 r5.w, c14.y, r2.x, r5.w -mov.f32f32 r6.y, r6.y -mul.f r3.w, r3.w, r6.z -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.w, r5.w -max.f r6.y, r6.y, c19.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r6.x, r6.x -mad.f32 r5.z, r4.x, r5.w, (neg)r5.z -min.f r6.y, r6.y, c23.y -mul.f r6.z, r4.x, r5.x -absneg.f r6.w, (neg)c5.y -mul.f r1.z, c12.z, r1.z -min.f r6.y, r6.y, c19.w -max.f r6.x, r6.x, c19.y -mov.f32f32 r5.z, r5.z -mad.f32 r6.z, r4.w, r6.w, r6.z -mov.f32f32 r6.y, r6.y -min.f r6.x, r6.x, c23.y -mul.f r5.z, r5.z, r2.y -mov.f32f32 r6.z, r6.z -mul.f r3.x, r3.x, r6.y -min.f r6.x, r6.x, c21.y -mov.f32f32 r5.z, r5.z +@in(r8.x) in8 +@in(r8.y) in9 +@in(r8.z) in10 +@in(r8.w) in11 +@in(r5.z) in12 +@in(r5.w) in13 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@out(r4.x) out16 +@out(r4.y) out17 +@out(r4.z) out18 +@out(r4.w) out19 +@out(r5.x) out20 +@out(r5.y) out21 +@out(r5.z) out22 +@out(r5.w) out23 +@out(r6.x) out24 +@out(r6.y) out25 +@out(r6.z) out26 +@out(r6.w) out27 +@out(r7.x) out28 +@out(r7.y) out29 +@out(r7.z) out30 +@out(r7.w) out31 +@const(c19.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c20.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c21.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c22.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r1.z, c15.z +floor.f r1.w, c15.x +absneg.f r2.x, (abs)c18.x +absneg.f r2.y, (abs)c18.y +add.f r1.z, c15.z, (neg)r1.z +add.f r1.w, c15.x, (neg)r1.w +mul.f r2.z, c12.x, r0.w +add.f r2.x, r2.x, r2.y +max.f r1.z, r1.z, c19.y +max.f r1.w, r1.w, c19.y +mad.f32 r2.y, c13.x, r1.x, r2.z +mul.f r2.z, c17.x, r2.x +min.f r1.z, r1.z, c23.y +min.f r1.w, r1.w, c23.y +mul.f r2.w, c12.z, r0.x +mad.f32 r2.y, c14.x, r1.y, r2.y +max.f r1.z, r1.z, c19.x +max.f r1.w, r1.w, c19.x +mad.f32 r2.w, c13.z, r0.y, r2.w +mul.f r3.x, c12.x, r0.x +mul.f r1.z, c17.x, r1.z +mad.f32 r3.x, c13.x, r0.y, r3.x +mad.f32 r2.w, c14.z, r0.z, r2.w +mad.f32 r3.x, c14.x, r0.z, r3.x +mad.f32 r1.z, c19.z, r1.z, c15.x +absneg.f r3.y, (neg)c5.x +add.f r2.w, r2.w, c15.z +mov.f32f32 r2.x, r2.x +add.f r1.z, r1.z, c20.x +add.f r3.x, r3.x, c15.x +mad.f32 r2.z, c19.w, r2.z, r2.w +mad.f32 r1.w, c17.x, r1.w, r3.x +floor.f r3.z, r1.z +mul.f r3.w, r2.y, r3.y +mul.f r4.x, c12.y, r0.w +add.f r1.w, r1.w, c20.x +add.f r1.z, r1.z, (neg)r3.z +add.f r2.z, r2.z, c20.x +mad.f32 r3.z, c13.y, r1.x, r4.x +max.f r2.x, r2.x, c21.z +mad.f32 r1.z, c20.y, r1.z, c20.z +floor.f r4.x, r1.w +floor.f r4.y, r2.z +mad.f32 r3.z, c14.y, r1.y, r3.z +absneg.f r1.z, (abs)r1.z +add.f r1.w, r1.w, (neg)r4.x +add.f r2.z, r2.z, (neg)r4.y +absneg.f r4.x, (neg)c5.y +mul.f r4.y, c20.y, r1.z +mad.f32 r1.w, c20.y, r1.w, c20.z +mad.f32 r2.z, c20.y, r2.z, c20.z +mul.f r1.z, r1.z, r1.z +add.f r4.y, c20.w, (neg)r4.y +absneg.f r1.w, (abs)r1.w +absneg.f r2.z, (abs)r2.z +mad.f32 r3.w, r3.z, r4.x, r3.w +mul.f r1.z, r1.z, r4.y +mul.f r4.y, r0.y, c22.x +mul.f r4.z, c20.y, r1.w +mul.f r4.w, c20.y, r2.z +mul.f r1.w, r1.w, r1.w +max.f r4.y, r4.y, c19.y +add.f r4.z, c20.w, (neg)r4.z +add.f r4.w, c20.w, (neg)r4.w +mul.f r2.z, r2.z, r2.z +min.f r4.y, r4.y, c23.y +mul.f r1.w, r1.w, r4.z +mul.f r4.z, r0.x, r0.z +mul.f r4.w, r2.z, r4.w +min.f r2.z, r4.y, c19.w +mul.f r4.y, r0.y, c21.x mul.f r0.w, c12.z, r0.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.z, r5.z +min.f r5.x, r2.x, c21.w +mul.f r1.z, r1.z, r2.z +mul.f r2.x, r4.z, r4.y mad.f32 r0.w, c13.z, r1.x, r0.w -mad.f32 r1.x, c13.z, r1.w, r1.z -mul.f r1.z, r3.w, r6.x -mul.f r1.w, r4.y, r6.x -max.f r3.y, r3.y, c21.z -mov.f32f32 r3.w, r5.z -mov.f32f32 r1.z, r1.z +mov.f32f32 r1.x, r5.x +mov.f32f32 r4.y, r1.z +max.f r2.z, r2.x, c19.y mad.f32 r0.w, c14.z, r1.y, r0.w -mad.f32 r1.y, c18.x, r3.x, r1.z -mad.f32 r1.z, c18.y, r3.x, r1.z -mov.f32f32 r3.x, r3.y -mov.f32f32 r7.w, r3.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -min.f r3.x, r3.x, c21.w -mov.f32f32 r0.w, r0.w -absneg.f r3.y, (neg)c5.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mul.f r3.w, c12.y, r0.x -mad.f32 r4.y, r0.w, r3.y, r6.z -mad.f32 r1.x, c14.z, r2.x, r1.x -mad.f32 r2.x, r1.w, r3.x, r5.y -mad.f32 r4.z, r1.w, r3.x, r4.z -mad.f32 r3.w, c13.y, r0.y, r3.w -mov.f32f32 r4.y, r4.y -add.f r1.y, r2.x, r1.y -add.f r1.z, r4.z, r1.z -mad.f32 r2.x, c14.y, r0.z, r3.w -max.f r3.w, c19.y, r4.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -add.f r2.x, r2.x, c15.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, r1.y -mul.f r4.z, c8.w, r1.y -mul.f r5.y, c8.z, r1.y -mul.f r5.z, c8.y, r1.y -add.f r4.y, c4.x, (neg)r4.y -mad.f32 r1.w, r1.w, r3.x, r2.x -mul.f r2.x, c8.x, r1.y -mov.f32f32 r3.x, r1.z -mul.f r6.x, r4.y, r4.y -add.f r6.y, c4.y, (neg)r1.w -mad.f32 r6.z, c9.w, r1.w, r4.z -mad.f32 r8.x, c9.z, r1.w, r5.y -mad.f32 r5.z, c9.y, r1.w, r5.z -mad.f32 r6.x, r6.y, r6.y, r6.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r8.x, r8.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.x, r6.x -add.f r3.x, c4.z, (neg)r3.x -mad.f32 r6.z, c10.w, r1.z, r6.z -mad.f32 r8.x, c10.z, r1.z, r8.x -mad.f32 r5.z, c10.y, r1.z, r5.z -mad.f32 r6.x, r3.x, r3.x, r6.x +absneg.f r1.y, (neg)c5.z +mov.f32f32 r2.x, r2.y +min.f r4.z, r2.z, c23.y +mul.f r5.y, c12.z, r8.x +mov.f32f32 r2.z, r0.w +mov.f32f32 r2.y, r3.z +min.f r3.z, r4.z, c21.y +mad.f32 r0.w, r0.w, r1.y, r3.w +mad.f32 r4.z, c13.z, r8.y, r5.y +mul.f r9.x, c12.y, r8.x +mov.f32f32 r3.w, r3.z +mul.f r3.z, r4.w, r3.z +mul.f r4.w, c12.y, r0.x +max.f r9.y, c19.y, r0.w +mul.f r0.w, r1.w, r3.w +mad.f32 r1.w, c13.y, r0.y, r4.w +mad.f32 r3.x, r3.z, r1.x, r3.x +mad.f32 r1.w, c14.y, r0.z, r1.w +mov.f32f32 r3.w, r0.w +mad.f32 r0.w, c18.y, r1.z, r0.w +mad.f32 r1.z, c18.x, r4.y, r3.w +mad.f32 r2.w, r3.z, r5.x, r2.w +add.f r1.w, r1.w, c15.y +mov.f32f32 r4.y, r9.y +add.f r1.z, r3.x, r1.z +add.f r2.w, r2.w, r0.w +mad.f32 r1.x, r3.z, r1.x, r1.w +nop +mov.f32f32 r1.w, r1.z +mul.f r1.z, c0.x, r1.z +mov.f32f32 r4.w, r2.w +add.f r6.y, c4.y, (neg)r1.x +add.f r6.x, c4.x, (neg)r1.w +mul.f r0.w, c8.y, r1.w +mul.f r3.x, c8.x, r1.w +mul.f r7.w, c8.w, r1.w +mul.f r3.z, r6.x, r6.x +mad.f32 r0.w, c9.y, r1.x, r0.w +mad.f32 r3.z, r6.y, r6.y, r3.z +add.f r6.z, c4.z, (neg)r4.w +mad.f32 r0.w, c10.y, r4.w, r0.w +mad.f32 r3.x, c9.x, r1.x, r3.x +mad.f32 r3.w, c9.w, r1.x, r7.w +mad.f32 r3.z, r6.z, r6.z, r3.z mul.f r0.x, c12.w, r0.x -mad.f32 r2.x, c9.x, r1.w, r2.x +mad.f32 r3.x, c10.x, r4.w, r3.x +mad.f32 r3.w, c10.w, r4.w, r3.w +mul.f r7.z, c8.z, r1.w mad.f32 r0.x, c13.w, r0.y, r0.x -mov.f32f32 r0.y, r3.x -mov.f32f32 r8.y, r4.y +mul.f r0.y, c0.w, r1.w +rsq r3.z, r3.z +(ss)mov.f32f32 r5.x, r3.z +mad.f32 r3.y, r6.x, r3.z, r3.y mad.f32 r0.x, c14.w, r0.z, r0.x -rsq r0.z, r6.x -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r9.x, r0.y -mov.f32f32 r8.z, r8.y -mad.f32 r0.y, r4.y, r0.z, r5.x -mad.f32 r4.y, r6.y, r0.z, r6.w -mad.f32 r0.z, r3.x, r0.z, r3.y -add.f r0.x, r0.x, c15.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.x, r4.y -mov.f32f32 r0.z, r0.z -mad.f32 r3.y, c11.w, r0.x, r6.z -mul.f r4.y, r0.y, r0.y -mad.f32 r5.x, c11.z, r0.x, r8.x -mad.f32 r4.y, r3.x, r3.x, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r5.z, c11.y, r0.x, r5.z -mad.f32 r2.x, c10.x, r1.z, r2.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r4.y, r0.z, r0.z, r4.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.z, r5.z -mad.f32 r2.x, c11.x, r0.x, r2.x -(ss)mul.f r6.x, c0.w, r1.y -mul.f r6.z, c0.z, r1.y -mul.f r6.w, c0.y, r1.y -rsq r4.y, r4.y -(ss)mov.f32f32 r4.y, r4.y -mov.f32f32 r10.y, r3.y -mov.f32f32 r3.y, r5.x -mul.f r5.x, r5.z, c16.y -mul.f r0.z, r0.z, r4.y -mul.f r3.x, r3.x, r4.y -mul.f r0.y, r0.y, r4.y -mov.f32f32 r10.x, r3.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r9.w, r5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r1.x, r7.z +(ss)mad.f32 r3.z, r6.y, r5.x, r4.x +mov.f32f32 r4.x, r3.y +mad.f32 r1.y, r6.z, r5.x, r1.y nop -mov.f32f32 r10.w, r0.z -mov.f32f32 r10.z, r3.x -mov.f32f32 r9.y, r0.y -mov.f32f32 r0.y, r2.x -mad.f32 r0.z, c1.w, r1.w, r6.x -mad.f32 r2.x, c1.z, r1.w, r6.z -mad.f32 r3.x, c1.y, r1.w, r6.w -mul.f r0.y, r0.y, c16.x -mad.f32 r0.z, c2.w, r1.z, r0.z -mad.f32 r2.x, c2.z, r1.z, r2.x -mad.f32 r3.x, c2.y, r1.z, r3.x -mov.f32f32 r9.z, r0.y -mad.f32 r0.y, c3.w, r0.x, r0.z -mad.f32 r0.z, c3.z, r0.x, r2.x -mad.f32 r2.x, c3.y, r0.x, r3.x -mul.f r3.x, c0.x, r1.y -mov.f32f32 r12.y, r0.y -mov.f32f32 r12.x, r0.z -mov.f32f32 r11.w, r2.x -mad.f32 r0.y, c1.x, r1.w, r3.x -mad.f32 r0.z, c7.x, r1.z, c7.y -mad.f32 r0.y, c2.x, r1.z, r0.y -mad.f32 r1.y, c7.x, r1.y, c7.y -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r11.y, r4.z -mov.f32f32 r11.x, r5.y -mov.f32f32 r11.z, r0.x -mov.f32f32 r12.w, r0.y -mov.f32f32 r0.x, r1.y -mov.f32f32 r0.y, r6.y -(rpt1)nop -mov.f32f32 r12.z, r0.x -mov.f32f32 r8.w, r0.y -mul.f r0.x, r3.w, c6.z -mul.f r0.y, r3.w, c6.y -mul.f r0.z, r3.w, c6.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r7.z, r0.x -mov.f32f32 r7.y, r0.y -mov.f32f32 r7.x, r0.z -mul.f r0.x, r4.x, r1.x -mul.f r0.y, r0.w, r5.w -mad.f32 r0.x, r0.w, r3.z, (neg)r0.x -mad.f32 r0.y, r4.w, r1.x, (neg)r0.y -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r5.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r1.x -mul.f r0.x, r0.x, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r0.z, r3.z -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +mov.f32f32 r5.x, r3.z +mul.f r4.x, r4.x, r4.x +mov.f32f32 r5.y, r1.y +add.f r0.x, r0.x, c15.w +mad.f32 r3.z, r3.z, r5.x, r4.x +mad.f32 r0.z, c10.z, r4.w, r0.z +mad.f32 r1.y, r1.y, r5.y, r3.z +mad.f32 r0.w, c11.y, r0.x, r0.w +mad.f32 r3.x, c11.x, r0.x, r3.x +mad.f32 r3.w, c11.w, r0.x, r3.w +mad.f32 r3.z, c11.z, r0.x, r0.z +mad.f32 r0.y, c1.w, r1.x, r0.y +mul.f r0.z, c0.z, r1.w +rsq r1.y, r1.y +(ss)mov.f32f32 r4.x, r1.y +mul.f r6.w, r3.y, r1.y +mul.f r3.y, r0.w, c16.y +mul.f r3.x, r3.x, c16.x +mul.f r7.y, r5.y, r4.x +mul.f r7.x, r5.x, r4.x +mad.f32 r0.y, c2.w, r4.w, r0.y +mad.f32 r0.z, c1.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r0.x, r0.y +mad.f32 r0.y, c2.z, r4.w, r0.z +(ss)mul.f r1.y, c0.y, r1.w +mad.f32 r0.z, c3.z, r0.x, r0.y +mad.f32 r0.y, c1.y, r1.x, r1.y +mad.f32 r1.x, c1.x, r1.x, r1.z +mad.f32 r0.y, c2.y, r4.w, r0.y +mad.f32 r1.x, c2.x, r2.w, r1.x +mad.f32 r0.y, c3.y, r0.x, r0.y +mad.f32 r0.x, c3.x, r0.x, r1.x +mad.f32 r5.x, c7.x, r1.w, c7.y +mad.f32 r5.y, c7.x, r4.w, c7.y +mul.f r1.z, r4.y, c6.z +mul.f r1.y, r4.y, c6.y +mul.f r1.x, r9.y, c6.x +mad.f32 r1.w, c14.z, r8.z, r4.z +mad.f32 r2.w, c13.y, r8.y, r9.x +mul.f r4.x, c12.x, r8.x +mad.f32 r2.w, c14.y, r8.z, r2.w +mov.f32f32 r4.z, r1.w +mad.f32 r4.x, c13.x, r8.y, r4.x (rpt1)nop -mov.f32f32 r2.x, r0.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r0.x, r4.w -mov.f32f32 r0.y, r4.x +mul.f r4.w, r2.x, r4.z +mad.f32 r8.x, c14.x, r8.z, r4.x +mov.f32f32 r4.y, r2.w (rpt1)nop -mov.f32f32 r3.y, r0.x -mov.f32f32 r3.x, r0.y -mov.f32f32 r0.x, r2.w -mov.f32f32 r0.y, r2.z +mov.f32f32 r4.x, r8.x +mul.f r8.y, r2.z, r4.y +mul.f r8.x, r2.y, r8.x +mad.f32 r1.w, r2.y, r1.w, (neg)r8.y +mad.f32 r4.w, r2.z, r4.x, (neg)r4.w +mad.f32 r8.x, r2.x, r2.w, (neg)r8.x (rpt1)nop -mov.f32f32 r13.y, r0.x -mov.f32f32 r13.x, r0.y +mul.f r4.w, r4.w, r8.w +mul.f r2.w, r1.w, r8.w +mul.f r1.w, r8.x, r8.w end -; VERT: outputs: r11.z (0:0) r7.x (5:9) r3.x (5:10) r9.z (5:11) r1.y (5:12) r12.z (5:13) r8.z (5:14) r10.z (5:15) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=f,il=16,b=0) r2.z (0:0,cm=3,il=20,b=0) -; VERT: 304 instructions, 0 half, 14 full -; pos: r11.z +nop +nop +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=f,il=16,b=0) r5.z (0:0,cm=3,il=20,b=0) +; VERT: 201 instructions, 0 half, 10 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm index 0274fe9..be30c1a 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm @@ -4,206 +4,143 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.z, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r3.w, c9.x, r0.z -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r3.w, r3.x, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.z, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.w, r3.x -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.x, r3.y -mov.f32f32 r5.z, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r3.x -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.x, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.x, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.x, r3.x -bary.f r3.x, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.x, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r1.w -mov.f32f32 r5.w, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.x, r2.w -mov.f32f32 r5.y, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r3.x -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r1.w, r4.w, s#2, t#2 -(sy)mov.f32f32 r1.w, r1.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r2.y, r6.y, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -sam.s (f32)(x)r2.w, r5.z, s#2, t#2 -(sy)mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.z -add.f r3.y, c12.y, (neg)r0.y -add.f r3.z, c12.y, (neg)r0.y -(ss)add.f r4.w, c12.y, (neg)r0.y -mul.f r5.x, r2.z, r3.x -mul.f r3.y, r3.y, c6.z -mul.f r3.z, r3.z, c6.y -mul.f r4.w, r4.w, c6.x -mul.f r1.w, r5.x, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.x, r1.z, r3.x -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.x, r2.w, r1.w -sam.s (f32)(x)r1.w, r7.x, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r2.w, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r1.z, r4.z -mov.f32f32 r3.x, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.y, r1.x -mul.f r0.w, r4.x, r0.w -mov.f32f32 r2.x, r1.z -mul.f r0.x, c10.w, r0.x -mul.f r2.y, r3.w, r2.z -sam (f32)(w)r5.x, r2.w, s#1, t#1 -nop -(sy)cmps.f.lt r2.z, r5.w, c11.y -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r2.w, r5.w -mov.f32f32 r3.x, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r2.y, r0.x -cov.u32f32 r2.y, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.y, r1.x -mad.f32 r0.w, c5.y, r4.x, r0.w -mov.f32f32 r0.x, r0.x -cmps.f.ne r2.y, r2.y, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r3.w, r0.x -mov.f32f32 r2.z, r2.w -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r2.y, r3.x, r2.y, r2.z -add.f r1.x, r1.x, r3.y -add.f r0.w, r0.w, r3.z -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r2.y -mul.f r0.w, r0.w, r2.y -add.f r0.x, r0.x, r4.w -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y -mul.f r0.x, r0.x, r2.y +add.f r0.y, r0.z, r1.z +cov.u32f32 r0.z, r1.y +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z +mul.f r0.w, r0.w, r0.z +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y +mul.f r0.x, r0.x, r0.z +(rpt2)nop +mul.f r2.x, r0.x, c4.x +end nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, c4.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x -end -; FRAG: outputs: r1.y (1:0) +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 195 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r1.y -; fragcoord: r0.x +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm index b65a363..63ee7bc 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm @@ -6,242 +6,182 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r5.x) out0 -@out(r5.y) out1 -@out(r5.z) out2 -@out(r5.w) out3 -@out(r4.x) out4 -@out(r4.y) out5 -@out(r4.z) out6 -@out(r4.w) out7 -@out(r2.z) out8 -@out(r2.w) out9 -@out(r3.x) out10 -@out(r3.y) out11 -@out(r0.z) out12 -@out(r0.w) out13 -@out(r1.x) out14 -@out(r1.y) out15 -(sy)(ss)floor.f r2.x, c14.z -floor.f r2.y, c14.x -absneg.f r2.z, (abs)c17.x -absneg.f r2.w, (abs)c17.y -add.f r2.x, c14.z, (neg)r2.x -add.f r2.y, c14.x, (neg)r2.y -mul.f r3.x, c11.x, r0.w -add.f r2.z, r2.z, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mad.f32 r2.w, c12.x, r1.x, r3.x -mov.f32f32 r2.z, r2.z -max.f r2.x, r2.x, c18.y -max.f r2.y, r2.y, c18.y -mov.f32f32 r2.w, r2.w -mul.f r3.x, c16.x, r2.z -min.f r2.x, r2.x, c22.y -min.f r2.y, r2.y, c22.y -mul.f r3.y, c11.z, r0.x -mad.f32 r2.w, c13.x, r1.y, r2.w -max.f r2.x, r2.x, c18.x -max.f r2.y, r2.y, c18.x -mad.f32 r3.y, c12.z, r0.y, r3.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c13.z, r0.z, r3.y -mul.f r3.z, c11.x, r0.x -mul.f r2.x, c16.x, r2.x -mad.f32 r3.z, c12.x, r0.y, r3.z -add.f r3.y, r3.y, c14.z -mad.f32 r3.z, c13.x, r0.z, r3.z -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c18.w, r3.x, r3.y -mul.f r2.w, r2.w, (neg)c4.x -mul.f r3.w, c11.y, r0.w -mad.f32 r2.x, c18.z, r2.x, c14.x -add.f r3.z, r3.z, c14.x -mov.f32f32 r3.x, r3.x -mad.f32 r2.y, c16.x, r2.y, r3.z -mov.f32f32 r2.x, r2.x -mad.f32 r3.w, c12.y, r1.x, r3.w -mov.f32f32 r3.x, r3.x -add.f r2.y, r2.y, c19.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r3.w -add.f r3.x, r3.x, c19.x -mad.f32 r3.w, c13.y, r1.y, r3.w -add.f r2.x, r2.x, c19.x +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r1.z, c14.z +floor.f r1.w, c14.x +absneg.f r2.x, (abs)c17.x +absneg.f r2.y, (abs)c17.y +add.f r1.z, c14.z, (neg)r1.z +add.f r1.w, c14.x, (neg)r1.w +mov.f32f32 r2.z, c18.y +mul.f r2.w, c11.x, r0.w +max.f r1.z, r1.z, c18.y +max.f r1.w, r1.w, c18.y +add.f r2.x, r2.x, r2.y +add.f r2.y, r2.z, c19.x +min.f r1.z, r1.z, c22.y +min.f r1.w, r1.w, c22.y +mul.f r2.z, c16.x, r2.x +mul.f r3.x, c11.z, r0.x +max.f r1.z, r1.z, c18.x +max.f r1.w, r1.w, c18.x +mul.f r3.y, c11.x, r0.x +mad.f32 r3.x, c12.z, r0.y, r3.x +mul.f r1.z, c16.x, r1.z +mad.f32 r3.y, c12.x, r0.y, r3.y +mad.f32 r3.x, c13.z, r0.z, r3.x +mad.f32 r3.y, c13.x, r0.z, r3.y +mad.f32 r1.z, c18.z, r1.z, c14.x floor.f r4.x, r2.y -floor.f r4.y, r3.x -mov.f32f32 r3.w, r3.w -floor.f r4.z, r2.x +mad.f32 r2.w, c12.x, r1.x, r2.w +add.f r3.y, r3.y, c14.x +add.f r1.z, r1.z, c19.x +mad.f32 r1.w, c16.x, r1.w, r3.y +add.f r3.x, r3.x, c14.z add.f r2.y, r2.y, (neg)r4.x -add.f r3.x, r3.x, (neg)r4.y -mad.f32 r2.w, (neg)c4.y, r3.w, r2.w -add.f r2.x, r2.x, (neg)r4.z +floor.f r4.x, r1.z +add.f r1.w, r1.w, c19.x +mad.f32 r2.z, c18.w, r2.z, r3.x mad.f32 r2.y, c19.y, r2.y, c19.z -mad.f32 r3.x, c19.y, r3.x, c19.z -mov.f32f32 r2.w, r2.w -mad.f32 r2.x, c19.y, r2.x, c19.z +add.f r1.z, r1.z, (neg)r4.x +floor.f r4.x, r1.w +add.f r2.z, r2.z, c19.x absneg.f r2.y, (abs)r2.y -absneg.f r3.x, (abs)r3.x -mul.f r0.w, c11.z, r0.w -absneg.f r2.x, (abs)r2.x -mul.f r3.w, c19.y, r2.y -mul.f r4.x, c19.y, r3.x -mul.f r2.y, r2.y, r2.y -mul.f r4.y, c19.y, r2.x -add.f r3.w, c19.w, (neg)r3.w +mad.f32 r1.z, c19.y, r1.z, c19.z +add.f r4.x, r1.w, (neg)r4.x +floor.f r4.y, r2.z +mul.f r1.w, r2.y, r2.y +absneg.f r1.z, (abs)r1.z +mad.f32 r2.y, c19.y, r4.x, c19.z +add.f r2.z, r2.z, (neg)r4.y +mad.f32 r2.w, c13.x, r1.y, r2.w +mul.f r4.x, c19.y, r1.z +absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c19.y, r2.z, c19.z +mul.f r1.z, r1.z, r1.z add.f r4.x, c19.w, (neg)r4.x -mul.f r3.x, r3.x, r3.x +mul.f r4.y, c19.y, r2.y +absneg.f r2.z, (abs)r2.z +mul.f r2.y, r2.y, r2.y +mul.f r1.z, r1.z, r4.x +mul.f r4.x, r0.y, c21.x add.f r4.y, c19.w, (neg)r4.y -mul.f r2.x, r2.x, r2.x -mul.f r2.y, r2.y, r3.w -mul.f r3.x, r3.x, r4.x -mad.f32 r0.w, c12.z, r1.x, r0.w -mul.f r1.x, r2.x, r4.y -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r3.x -mul.f r3.x, r0.x, r0.z -mov.f32f32 r1.x, r1.x -mul.f r3.w, r0.y, c21.x -mul.f r4.x, r0.y, c20.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.y, c18.y -mov.f32f32 r3.w, r3.w -mul.f r3.x, r3.x, r4.x -mad.f32 r0.w, c13.z, r1.y, r0.w -add.f r1.y, r4.y, c19.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -floor.f r4.x, r1.y -max.f r3.w, r3.w, c18.y -mov.f32f32 r3.x, r3.x -mad.f32 r0.w, (neg)c4.z, r0.w, r2.w -add.f r1.y, r1.y, (neg)r4.x -min.f r2.w, r3.w, c22.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.y, c19.y, r1.y, c19.z -min.f r2.w, r2.w, c18.w -max.f r3.x, r3.x, c18.y -max.f r0.w, c18.y, r0.w -absneg.f r1.y, (abs)r1.y -mov.f32f32 r2.w, r2.w -min.f r3.x, r3.x, c22.y -mov.f32f32 r0.w, r0.w -mul.f r1.y, r1.y, r1.y -mul.f r1.x, r1.x, r2.w -min.f r2.w, r3.x, c20.y -mul.f r3.x, r0.w, c5.z -mul.f r3.w, r0.w, c5.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.w -mul.f r0.w, r0.w, c5.x -mul.f r2.x, r2.x, r2.w -mul.f r2.y, r2.y, r2.w -max.f r2.z, r2.z, c20.z -mov.f32f32 r4.z, r3.x +mul.f r4.z, c19.y, r2.z +mul.f r2.z, r2.z, r2.z +max.f r4.x, r4.x, c18.y +mul.f r2.y, r2.y, r4.y +mul.f r4.y, r0.x, r0.z +add.f r4.z, c19.w, (neg)r4.z +min.f r4.x, r4.x, c22.y +mul.f r4.w, r0.y, c20.x +mul.f r2.w, r2.w, (neg)c4.x +mul.f r5.x, c11.y, r0.w +min.f r4.x, r4.x, c18.w +mul.f r4.y, r4.y, r4.w +mul.f r2.z, r2.z, r4.z +mad.f32 r4.z, c12.y, r1.x, r5.x +mul.f r1.z, r1.z, r4.x +max.f r4.x, r4.y, c18.y +mad.f32 r4.y, c13.y, r1.y, r4.z mov.f32f32 r2.x, r2.x -mov.f32f32 r4.y, r3.w -mad.f32 r2.w, c17.x, r1.x, r2.x -mad.f32 r1.x, c17.y, r1.x, r2.x -mov.f32f32 r2.x, r2.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.w -mov.f32f32 r1.x, r1.x -min.f r2.x, r2.x, c20.w -mov.f32f32 r4.x, r0.w -mov.f32f32 r4.w, r1.y -mul.f r0.w, c11.y, r0.x -mov.f32f32 r1.y, r2.x -mad.f32 r0.w, c12.y, r0.y, r0.w +mov.f32f32 r4.z, r1.z +min.f r4.x, r4.x, c22.y +mad.f32 r2.w, (neg)c4.y, r4.y, r2.w +mul.f r0.w, c11.z, r0.w +max.f r2.x, r2.x, c20.z +min.f r4.x, r4.x, c20.y +mad.f32 r0.w, c12.z, r1.x, r0.w +mul.f r1.x, c11.y, r0.x mul.f r0.x, c11.w, r0.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, r2.y, r1.y, r3.z -mad.f32 r2.w, r2.y, r1.y, r3.y -mad.f32 r0.w, c13.y, r0.z, r0.w +mov.f32f32 r4.y, r4.x +mul.f r2.z, r2.z, r4.x +min.f r2.x, r2.x, c20.w +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, r2.y, r4.y +mad.f32 r1.x, c12.y, r0.y, r1.x mad.f32 r0.x, c12.w, r0.y, r0.x -add.f r0.y, r2.x, r2.z -add.f r1.x, r2.w, r1.x -add.f r0.w, r0.w, c14.y -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, r2.y, r1.y, r0.w +mad.f32 r0.y, c13.y, r0.z, r1.x +mov.f32f32 r1.x, r1.y +mad.f32 r1.y, c17.y, r1.z, r1.y +mad.f32 r1.x, c17.x, r4.z, r1.x +mov.f32f32 r1.z, r2.x +mad.f32 r2.x, r2.z, r2.x, r3.x +add.f r0.y, r0.y, c14.y +mad.f32 r0.w, (neg)c4.z, r0.w, r2.w +mad.f32 r2.y, r2.z, r1.z, r3.y +add.f r1.y, r2.x, r1.y +mad.f32 r0.y, r2.z, r1.z, r0.y +max.f r4.x, c18.y, r0.w +add.f r0.w, r2.y, r1.x +mov.f32f32 r1.x, r1.y mad.f32 r0.x, c13.w, r0.z, r0.x -mul.f r0.z, c7.w, r0.y -mul.f r1.y, c7.z, r0.y -mad.f32 r0.z, c8.w, r0.w, r0.z -mad.f32 r1.y, c8.z, r0.w, r1.y -mul.f r2.x, c7.y, r0.y -mul.f r2.y, c7.x, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mad.f32 r0.z, c9.w, r1.x, r0.z -add.f r0.x, r0.x, c14.w -mad.f32 r1.y, c9.z, r1.x, r1.y -mad.f32 r2.x, c8.y, r0.w, r2.x -mad.f32 r2.y, c8.x, r0.w, r2.y -mad.f32 r0.z, c10.w, r0.x, r0.z -mad.f32 r1.y, c10.z, r0.x, r1.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mad.f32 r2.x, c9.y, r1.x, r2.x -mad.f32 r2.y, c9.x, r1.x, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mad.f32 r2.x, c10.y, r0.x, r2.x -mad.f32 r2.y, c10.x, r0.x, r2.y -mov.f32f32 r3.y, r0.z -mov.f32f32 r3.x, r1.y -mov.f32f32 r0.z, r2.x -mov.f32f32 r1.y, r2.y -mul.f r2.x, c0.w, r0.y -mul.f r2.y, c0.z, r0.y -mul.f r0.z, r0.z, c15.y -mul.f r1.y, r1.y, c15.x -(rpt1)nop -mov.f32f32 r2.w, r0.z -mov.f32f32 r2.z, r1.y -mad.f32 r0.z, c1.w, r0.w, r2.x -mad.f32 r1.y, c1.z, r0.w, r2.y -mad.f32 r0.z, c2.w, r1.x, r0.z -mad.f32 r1.y, c2.z, r1.x, r1.y -mad.f32 r0.z, c3.w, r0.x, r0.z -mad.f32 r1.y, c3.z, r0.x, r1.y -mul.f r2.x, c0.y, r0.y -mul.f r2.y, c0.x, r0.y -mov.f32f32 r5.w, r0.z -mov.f32f32 r5.z, r1.y -mad.f32 r0.z, c1.y, r0.w, r2.x -mad.f32 r0.w, c1.x, r0.w, r2.y -mad.f32 r0.z, c2.y, r1.x, r0.z -mad.f32 r0.w, c2.x, r1.x, r0.w -mad.f32 r0.z, c3.y, r0.x, r0.z -mad.f32 r0.x, c3.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r5.y, r0.z -mov.f32f32 r5.x, r0.x -mad.f32 r0.x, c6.x, r1.x, c6.y -mad.f32 r0.y, c6.x, r0.y, c6.y -(rpt1)nop -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mov.f32f32 r1.y, r1.w -mov.f32f32 r0.x, r1.z -(rpt2)nop -mov.f32f32 r1.x, r0.x +nop +mov.f32f32 r1.z, r0.w +mul.f r3.x, c0.x, r0.w +mad.f32 r3.y, c6.x, r1.x, c6.y +mov.f32f32 r4.y, r4.x +mul.f r0.z, c7.y, r1.z +mul.f r0.w, c7.x, r1.z +mad.f32 r0.z, c8.y, r0.y, r0.z +mad.f32 r0.w, c8.x, r0.y, r0.w +mad.f32 r0.z, c9.y, r1.x, r0.z +add.f r4.z, r0.x, c14.w +mad.f32 r0.x, c9.x, r1.x, r0.w +mul.f r0.w, c7.w, r1.z +mul.f r2.x, c7.z, r1.z +mad.f32 r0.z, c10.y, r4.z, r0.z +mad.f32 r0.x, c10.x, r4.z, r0.x +mad.f32 r0.w, c8.w, r0.y, r0.w +mad.f32 r2.z, c8.z, r0.y, r2.x +mul.f r2.y, r0.z, c15.y +mul.f r2.x, r0.x, c15.x +mad.f32 r0.x, c9.w, r1.x, r0.w +mad.f32 r0.z, c9.z, r1.x, r2.z +mad.f32 r2.w, c10.w, r4.z, r0.x +mad.f32 r2.z, c10.z, r4.z, r0.z +mul.f r0.x, c0.w, r1.z +mul.f r0.z, c0.z, r1.z +mad.f32 r0.x, c1.w, r0.y, r0.x +mad.f32 r0.z, c1.z, r0.y, r0.z +mad.f32 r0.x, c2.w, r1.x, r0.x +mad.f32 r0.z, c2.z, r1.x, r0.z +mad.f32 r0.w, c3.w, r4.z, r0.x +mad.f32 r0.z, c3.z, r4.z, r0.z +mul.f r0.x, c0.y, r1.z +mad.f32 r3.x, c1.x, r0.y, r3.x +mad.f32 r0.x, c1.y, r0.y, r0.x +mad.f32 r0.y, c2.x, r1.y, r3.x +mad.f32 r1.x, c2.y, r1.x, r0.x +mad.f32 r0.x, c3.x, r4.z, r0.y +mad.f32 r0.y, c3.y, r4.z, r1.x +mad.f32 r3.x, c6.x, r1.z, c6.y +mul.f r1.z, r4.y, c5.z +mul.f r1.y, r4.y, c5.y +mul.f r1.x, r4.x, c5.x end -; VERT: outputs: r5.x (0:0) r4.x (5:9) r2.z (5:10) r0.z (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 221 instructions, 0 half, 6 full -; pos: r5.x +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 152 instructions, 0 half, 6 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm index 0274fe9..be30c1a 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm @@ -4,206 +4,143 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.x, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.z, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r3.w, c9.x, r0.z -mov.f32f32 r3.y, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r3.w -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r3.w, r3.x, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.z, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.x, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.w, r3.x -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.x, r3.y -mov.f32f32 r5.z, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r3.x -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.x, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.x, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.x, r3.x -bary.f r3.x, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.x, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r1.w -mov.f32f32 r5.w, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.x, r2.w -mov.f32f32 r5.y, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r3.x -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r1.w, r4.w, s#2, t#2 -(sy)mov.f32f32 r1.w, r1.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r2.y, r6.y, s#2, t#2 -(sy)mov.f32f32 r2.y, r2.y -sam.s (f32)(x)r2.w, r5.z, s#2, t#2 -(sy)mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.z -add.f r3.y, c12.y, (neg)r0.y -add.f r3.z, c12.y, (neg)r0.y -(ss)add.f r4.w, c12.y, (neg)r0.y -mul.f r5.x, r2.z, r3.x -mul.f r3.y, r3.y, c6.z -mul.f r3.z, r3.z, c6.y -mul.f r4.w, r4.w, c6.x -mul.f r1.w, r5.x, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.x, r1.z, r3.x -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.x, r2.w, r1.w -sam.s (f32)(x)r1.w, r7.x, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r2.w, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r1.z, r4.z -mov.f32f32 r3.x, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.y, r1.x -mul.f r0.w, r4.x, r0.w -mov.f32f32 r2.x, r1.z -mul.f r0.x, c10.w, r0.x -mul.f r2.y, r3.w, r2.z -sam (f32)(w)r5.x, r2.w, s#1, t#1 -nop -(sy)cmps.f.lt r2.z, r5.w, c11.y -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r2.w, r5.w -mov.f32f32 r3.x, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r2.y, r0.x -cov.u32f32 r2.y, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.y, r1.x -mad.f32 r0.w, c5.y, r4.x, r0.w -mov.f32f32 r0.x, r0.x -cmps.f.ne r2.y, r2.y, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r3.w, r0.x -mov.f32f32 r2.z, r2.w -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r2.y, r3.x, r2.y, r2.z -add.f r1.x, r1.x, r3.y -add.f r0.w, r0.w, r3.z -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r2.y -mul.f r0.w, r0.w, r2.y -add.f r0.x, r0.x, r4.w -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y -mul.f r0.x, r0.x, r2.y +add.f r0.y, r0.z, r1.z +cov.u32f32 r0.z, r1.y +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z +mul.f r0.w, r0.w, r0.z +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y +mul.f r0.x, r0.x, r0.z +(rpt2)nop +mul.f r2.x, r0.x, c4.x +end nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, c4.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x -end -; FRAG: outputs: r1.y (1:0) +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 195 instructions, 0 half, 8 full -; pos (bary): r1.x -; color: r1.y -; fragcoord: r0.x +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm index 0e4d5ee..9c8ac11 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm @@ -6,134 +6,99 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@out(r6.y) out0 -@out(r6.z) out1 -@out(r6.w) out2 -@out(r7.x) out3 -@out(r5.y) out4 -@out(r5.z) out5 -@out(r5.w) out6 -@out(r6.x) out7 -@out(r2.w) out8 -@out(r3.x) out9 -@out(r3.y) out10 -@out(r3.z) out11 -@out(r3.w) out12 -@out(r4.x) out13 -@out(r4.y) out14 -@out(r4.z) out15 -(sy)(ss)mul.f r2.x, c11.x, r0.w -mul.f r2.y, c11.x, r0.x -mad.f32 r2.x, c12.x, r1.x, r2.x -mad.f32 r2.y, c12.x, r0.y, r2.y -mul.f r2.z, c11.z, r0.x -mad.f32 r2.y, c13.x, r0.z, r2.y -mov.f32f32 r2.x, r2.x -mad.f32 r2.z, c12.z, r0.y, r2.z -mad.f32 r2.x, c13.x, r1.y, r2.x -add.f r2.y, r2.y, c14.x -mad.f32 r2.z, c13.z, r0.z, r2.z -mul.f r2.w, c11.y, r0.w -mov.f32f32 r2.x, r2.x -mul.f r3.x, c7.w, r2.y -mul.f r3.y, c7.z, r2.y -mul.f r3.z, c7.y, r2.y -mul.f r2.x, r2.x, (neg)c4.x -mad.f32 r2.w, c12.y, r1.x, r2.w -mul.f r3.w, c11.y, r0.x -mul.f r4.x, c7.x, r2.y -mad.f32 r3.w, c12.y, r0.y, r3.w -mov.f32f32 r2.w, r2.w -mad.f32 r3.w, c13.y, r0.z, r3.w -mad.f32 r2.w, c13.y, r1.y, r2.w -mul.f r4.y, c0.w, r2.y -mul.f r4.z, c0.z, r2.y -mul.f r4.w, c0.y, r2.y -mov.f32f32 r2.w, r2.w -add.f r3.w, r3.w, c14.y -mul.f r5.x, c0.x, r2.y -add.f r2.z, r2.z, c14.z -mad.f32 r2.x, (neg)c4.y, r2.w, r2.x -mad.f32 r2.w, c8.w, r3.w, r3.x -mad.f32 r3.x, c8.z, r3.w, r3.y -mad.f32 r3.y, c8.y, r3.w, r3.z -mov.f32f32 r2.x, r2.x +@in(r3.z) in8 +@in(r3.w) in9 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r1.z, c11.x, r0.w +mul.f r1.w, c11.x, r0.x +mad.f32 r1.z, c12.x, r1.x, r1.z +mad.f32 r1.w, c12.x, r0.y, r1.w +mad.f32 r1.z, c13.x, r1.y, r1.z +mad.f32 r1.w, c13.x, r0.z, r1.w +mul.f r2.x, c11.z, r0.x +mul.f r2.y, c11.y, r0.x +mul.f r1.z, r1.z, (neg)c4.x +mul.f r2.z, c11.y, r0.w +add.f r1.w, r1.w, c14.x +mad.f32 r2.z, c12.y, r1.x, r2.z +mad.f32 r2.x, c12.z, r0.y, r2.x +mad.f32 r2.z, c13.y, r1.y, r2.z +mul.f r2.w, c7.y, r1.w +mul.f r3.x, c7.x, r1.w +mul.f r3.y, c7.w, r1.w +mad.f32 r1.z, (neg)c4.y, r2.z, r1.z mul.f r0.w, c11.z, r0.w -mad.f32 r2.w, c9.w, r2.z, r2.w +mad.f32 r2.y, c12.y, r0.y, r2.y mad.f32 r0.w, c12.z, r1.x, r0.w +mad.f32 r1.x, c13.y, r0.z, r2.y +mad.f32 r0.w, c13.z, r1.y, r0.w +mul.f r1.y, c7.z, r1.w +mul.f r2.y, c0.w, r1.w +mul.f r2.z, c0.z, r1.w +mad.f32 r0.w, (neg)c4.z, r0.w, r1.z +add.f r4.x, r1.x, c14.y +mul.f r4.y, c0.y, r1.w +mul.f r4.z, c0.x, r1.w +max.f r0.w, c16.x, r0.w +mad.f32 r2.w, c8.y, r4.x, r2.w +mad.f32 r3.x, c8.x, r4.x, r3.x +mad.f32 r3.y, c8.w, r4.x, r3.y +mov.f32f32 r4.w, r0.w +mad.f32 r2.x, c13.z, r0.z, r2.x +mul.f r1.x, r0.w, c5.x +mad.f32 r0.w, c8.z, r4.x, r1.y +mul.f r1.z, r4.w, c5.z +mul.f r1.y, r4.w, c5.y +add.f r4.w, r2.x, c14.z +mad.f32 r2.x, c1.w, r4.x, r2.y +mad.f32 r2.y, c1.z, r4.x, r2.z +mad.f32 r2.z, c1.y, r4.x, r4.y +mad.f32 r2.w, c9.y, r4.w, r2.w mul.f r0.x, c11.w, r0.x -mad.f32 r1.x, c9.z, r2.z, r3.x -mad.f32 r3.x, c9.y, r2.z, r3.y -mov.f32f32 r0.w, r0.w +mad.f32 r3.x, c9.x, r4.w, r3.x mad.f32 r0.x, c12.w, r0.y, r0.x -mad.f32 r0.y, c13.z, r1.y, r0.w +mad.f32 r0.y, c9.w, r4.w, r3.y mad.f32 r0.x, c13.w, r0.z, r0.x -mad.f32 r0.z, c8.x, r3.w, r4.x -mad.f32 r0.w, c1.w, r3.w, r4.y -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, c9.z, r4.w, r0.w +mad.f32 r0.w, c2.w, r4.w, r2.x +mad.f32 r4.y, c2.z, r4.w, r2.y add.f r0.x, r0.x, c14.w -mad.f32 r0.z, c9.x, r2.z, r0.z -mad.f32 r0.w, c2.w, r2.z, r0.w -mad.f32 r0.y, (neg)c4.z, r0.y, r2.x -mad.f32 r1.y, c10.w, r0.x, r2.w -mad.f32 r1.x, c10.z, r0.x, r1.x -mad.f32 r2.x, c10.y, r0.x, r3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mul.f r2.x, r2.x, c15.y -max.f r0.y, c16.x, r0.y -mov.f32f32 r3.z, r1.y -mov.f32f32 r3.y, r1.x -mov.f32f32 r3.x, r2.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c10.x, r0.x, r0.z +mad.f32 r5.x, c2.y, r4.w, r2.z +mad.f32 r4.x, c1.x, r4.x, r4.z +mad.f32 r3.y, c6.x, r4.w, c6.y +mad.f32 r2.x, c10.y, r0.x, r2.w +mad.f32 r3.x, c10.x, r0.x, r3.x +mad.f32 r2.w, c10.w, r0.x, r0.y +mad.f32 r2.z, c10.z, r0.x, r0.z +mul.f r2.y, r2.x, c15.y +mul.f r2.x, r3.x, c15.x mad.f32 r0.w, c3.w, r0.x, r0.w -mad.f32 r1.x, c1.z, r3.w, r4.z -mul.f r1.y, r0.y, c5.z -mul.f r2.x, r0.y, c5.y -mul.f r0.y, r0.y, c5.x -mul.f r0.z, r0.z, c15.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r5.w, r1.y -mov.f32f32 r5.z, r2.x -mov.f32f32 r5.y, r0.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r7.x, r0.w -mad.f32 r0.y, c2.z, r2.z, r1.x -mad.f32 r0.z, c1.y, r3.w, r4.w -mad.f32 r0.y, c3.z, r0.x, r0.y -mad.f32 r0.z, c2.y, r2.z, r0.z -mad.f32 r0.w, c1.x, r3.w, r5.x -mad.f32 r1.x, c6.x, r2.z, c6.y -mov.f32f32 r6.w, r0.y -mad.f32 r0.y, c3.y, r0.x, r0.z -mad.f32 r0.z, c2.x, r2.z, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r1.x, c6.x, r2.y, c6.y -mov.f32f32 r6.z, r0.y -mad.f32 r0.x, c3.x, r0.x, r0.z -mov.f32f32 r4.x, r0.w -mov.f32f32 r0.y, r1.x -nop -mov.f32f32 r6.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.w, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r0.z, (0.000000) -mov.f32f32 r4.z, r0.x -nop -mov.f32f32 r4.y, r0.y -mov.f32f32 r6.x, r0.z +mad.f32 r0.z, c3.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r0.x, r5.x +mad.f32 r4.x, c2.x, r4.w, r4.x +mad.f32 r3.x, c6.x, r1.w, c6.y +mad.f32 r0.x, c3.x, r0.x, r4.x +mov.f32f32 r1.w, (0.000000) end nop -; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0) -; VERT: 107 instructions, 0 half, 8 full -; pos: r6.y +; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0) +; VERT: 71 instructions, 0 half, 6 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm index f46066a..df4bfc0 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm @@ -2,65 +2,36 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r2.x) out0 -@out(r2.y) out1 -@out(r2.z) out2 -@out(r2.w) out3 -(sy)(ss)bary.f r0.z, 1, r0.x -bary.f r0.w, 0, r0.x -bary.f (ei)r0.x, 2, r0.x +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3e800000, 0x40800000, 0x00000000, 0x00000000 +(sy)(ss)bary.f r0.w, 1, r0.x +bary.f r0.z, 0, r0.x +bary.f (ei)r1.x, 2, r0.x nop -add.f r0.y, c0.x, (neg)r0.z -mov.f32f32 r0.w, r0.w -cmps.f.lt r1.x, c0.z, r0.z -mov.f32f32 r0.x, r0.x -mul.f r0.y, r0.y, c0.y -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.z, r0.z -cov.u32f32 r0.w, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.x -mov.f32f32 r1.z, r0.z +add.f r0.x, c0.x, (neg)r0.w +cmps.f.lt r0.y, c0.z, r0.w +(rpt1)nop +mul.f r0.x, r0.x, c0.y +sam.3d (f32)(xyzw)r2.x, r0.z, s#0, t#0 +cov.u32f32 r0.y, r0.y +(rpt1)nop +(ss)mov.f32f32 r0.z, r0.x +(sy)mul.f r0.x, r2.x, r0.x +cmps.f.ne r0.y, r0.y, c0.z nop -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.z, r0.y -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.y, r0.y -cmps.f.ne r0.w, r0.w, c0.z -sam.3d (f32)(xyzw)r1.y, r1.y, s#0, t#0 -(sy)mov.f32f32 r2.y, r2.x -mov.f32f32 r2.z, r1.w -mov.f32f32 r2.w, r1.z -mov.f32f32 r3.x, r1.y -mul.f r0.x, r2.y, r0.x -mul.f r0.z, r2.z, r0.z -mul.f r1.x, r2.w, r1.x -mul.f r0.y, r3.x, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.x, r2.x -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r0.x, r0.w, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.w, r0.x -sel.b32 r0.x, r0.z, r0.w, r1.w -sel.b32 r0.z, r1.x, r0.w, r1.z -sel.b32 r0.y, r0.y, r0.w, r1.y +mul.f r0.w, r2.w, r0.z +mul.f r1.x, r2.z, r0.z +mul.f r0.z, r2.y, r0.z nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.x, r0.y +sel.b32 r1.w, r0.w, r0.y, r2.w +sel.b32 r1.z, r1.x, r0.y, r2.z +sel.b32 r1.y, r0.z, r0.y, r2.y +sel.b32 r1.x, r0.x, r0.y, r2.x end -; FRAG: outputs: r2.x (1:0) +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 52 instructions, 0 half, 4 full -; pos (bary): r0.x -; color: r2.x +; FRAG: 26 instructions, 0 half, 3 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm index 86cd6d2..46ab29e 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm @@ -6,1030 +6,711 @@ @in(r0.w) in3 @in(r1.x) in4 @in(r1.y) in5 -@out(r10.y) out0 -@out(r10.z) out1 -@out(r10.w) out2 -@out(r11.x) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c28.x) 0x3f000000, 0x00000000, 0x40f00000, 0x3dcccccd +@const(c29.x) 0xbf000000, 0x80000000, 0x40400000, 0x3f800000 +@const(c30.x) 0x00000000, 0x3f800000, 0x46800000, 0x45801000 +@const(c31.x) 0x40000000, 0xbf800000, 0x457fe000, 0x3b23d70a +@const(c32.x) 0x3d4ccccd, 0x3a83126f, 0x3f7fbe77, 0x3f866666 +@const(c33.x) 0x3fa66666, 0x3f4ccccd, 0x40a00000, 0x3f316f00 +@const(c34.x) 0x43480000, 0x3e800000, 0x40e00000, 0xc2c80000 +@const(c35.x) 0xc4bb8000, 0x43fa0000, 0x44bb8000, 0x40800000 +@const(c36.x) 0x44fa0000, 0x3fc00000, 0x3cf5c28f, 0x3ca3d70a +@const(c37.x) 0x3e2ab368, 0x3fb8aa65, 0x3ecccccd, 0x40266666 +@const(c38.x) 0x3e3a29c7, 0x00000000, 0x00000000, 0x00000000 +@const(c39.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r1.z, 2, r1.x add.s r0.y, r0.y, -8 bary.f r1.w, 18, r1.x -add.f r0.w, r0.w, c28.y -mul.f r2.x, r1.z, c23.w +mov.f32f32 r2.x, c22.x +mul.f r2.y, r1.z, c23.w shr.b r0.y, r0.y, 4 -mul.f r2.y, r1.z, c23.z -mov.f32f32 r2.z, r1.w -mul.f r2.w, r2.x, c8.x +add.f r2.z, c6.x, (neg)r1.w +add.f r0.w, r0.w, c28.y +mul.f r2.w, r2.y, c8.x +bary.f r3.x, 0, r1.x cov.u32f32 r0.y, r0.y -mul.f r3.x, r2.y, c8.x -add.f r2.z, c6.x, (neg)r2.z -mov.f32f32 r2.w, r2.w -bary.f r3.y, 0, r1.x +mov.f32f32 r3.y, r2.z +mad.f32 r2.w, c28.w, r2.w, r3.x +rcp r2.x, r2.x +(ss)mul.f r2.x, c8.x, r2.x add.f r0.y, r0.y, c28.x -mov.f32f32 r3.x, r3.x -mad.f32 r2.w, c28.w, r2.w, r3.y -mov.f32f32 r2.z, r2.z -mad.f32 r0.y, c27.x, r0.y, c27.y -rcp r0.w, r0.w -mad.f32 r3.x, c28.w, r3.x, r3.y -mov.f32f32 r2.w, r2.w -mul.f r3.z, r2.z, r2.z -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.z, c28.y +mul.f r2.z, r2.z, r3.y mul.f r2.w, r2.w, c22.y -mov.f32f32 r3.w, c14.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.x, c22.x -mov.f32f32 r2.w, r2.w -bary.f r4.y, 19, r1.x -(ss)mul.f r0.w, r0.z, r0.w -mul.f r4.z, r1.z, c23.z -mul.f r2.w, r2.w, c29.z -rcp r3.w, r3.w -(ss)mul.f r3.w, r0.y, r3.w -rcp r4.x, r4.x -(ss)mul.f r4.x, c8.x, r4.x -mov.f32f32 r4.w, r4.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r4.x -add.f r4.w, c6.y, (neg)r4.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r5.x, r3.w -add.f r5.y, c22.y, (neg)r4.x -mov.f32f32 r4.w, r4.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r5.w, r5.x -add.s r0.x, r0.x, -8 -mov.f32f32 r5.x, r5.y -mov.f32f32 r6.x, r2.w +bary.f r3.z, 19, r1.x +add.f r3.w, c22.y, (neg)r2.x +mad.f32 r0.y, c27.x, r0.y, c27.y +mul.f r4.x, r2.w, c29.z bary.f r2.w, 3, r1.x +add.f r4.y, c6.y, (neg)r3.z +mov.f32f32 r4.z, r3.w +mov.f32f32 r4.w, r0.y +mul.f r5.x, r2.w, c23.w +mov.f32f32 r5.y, c14.y +mov.f32f32 r5.z, r4.y +mul.f r5.w, r2.w, c23.z +mul.f r6.x, r5.x, c8.x +bary.f r6.y, 1, r1.x +mad.f32 r2.z, r4.y, r5.z, r2.z +mul.f r4.y, r5.w, c8.x +mad.f32 r6.x, c28.w, r6.x, r6.y +rcp r5.y, r5.y +(ss)mul.f r6.w, r4.w, r5.y +bary.f r4.w, 20, r1.x +(ss)mad.f32 r5.y, c28.w, r4.y, r6.y +mul.f r4.y, r6.x, c22.y +mov.f32f32 r7.y, r6.w +add.s r0.x, r0.x, -8 +add.f r6.x, c6.z, (neg)r4.w +mul.f r4.y, r4.y, c29.z +mul.f r7.w, r5.y, r4.z +mul.f r4.z, r1.z, c23.z shr.b r0.x, r0.x, 4 -mul.f r3.x, r3.x, r5.x -mad.f32 r3.z, r4.w, r4.w, r3.z -mul.f r5.y, r2.w, c23.w +mov.f32f32 r5.y, r6.x +rcp r0.w, r0.w +add.f r0.z, r0.z, c28.y +mul.f r6.z, r4.z, c8.x +sam (f32)(xyz)r8.x, r4.x, s#3, t#3 +(ss)mov.f32f32 r4.x, c28.z cov.u32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.z, r3.z -mul.f r5.z, r5.y, c8.x +mad.f32 r2.z, r5.y, r5.y, r2.z +mad.f32 r4.y, c28.w, r6.z, r3.x +mul.f r4.x, r4.x, c15.x add.f r0.x, r0.x, c28.x -mov.f32f32 r6.y, c14.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r5.z, r5.z -bary.f r6.z, 1, r1.x -bary.f r6.w, 20, r1.x -mov.f32f32 r3.x, r3.x -mad.f32 r5.z, c28.w, r5.z, r6.z -rcp r6.y, r6.y -(ss)mul.f r6.y, r0.x, r6.y -mov.f32f32 r7.x, r6.w -mov.f32f32 r7.y, r3.x -mov.f32f32 r3.x, r5.z -mov.f32f32 r7.w, r6.y -mul.f r8.x, r2.w, c23.z -add.f r5.z, c6.z, (neg)r7.x -mul.f r3.x, r3.x, c22.y -mov.f32f32 r6.y, r7.w -mul.f r7.x, r8.x, c8.x -mov.f32f32 r8.y, r5.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r5.z, r6.y -mov.f32f32 r6.y, r7.x -mad.f32 r3.z, r8.y, r8.y, r3.z -mul.f r3.x, r3.x, c29.z -mov.f32f32 r0.w, r0.w +mov.f32f32 r5.y, c14.x +mul.f r7.z, r4.y, r3.w +floor.f r3.w, r4.x +rsq r2.z, r2.z +(ss)mul.f r0.w, r0.z, r0.w +mov.f32f32 r4.y, r2.z absneg.f r7.x, (neg)c13.x -mad.f32 r6.y, c28.w, r6.y, r6.z -mov.f32f32 r3.x, r3.x -sam (f32)(x)r5.z, r5.z, s#8, t#8 -(sy)(ss)mad.f32 r5.w, c31.x, r5.z, c31.y -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -mov.f32f32 r6.y, r6.y -mov.f32f32 r3.x, r3.x -mul.f r5.w, r5.w, c31.z -mul.f r4.w, r4.w, r3.z -mul.f r5.x, r6.y, r5.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r6.y, r3.x -add.f r3.x, c30.w, (neg)r5.w -mul.f r5.w, r4.w, r4.w -mov.f32f32 r5.x, r5.x -mul.f r7.x, r7.x, c13.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r5.w, r5.w -sam (f32)(xyz)r8.z, r6.x, s#3, t#3 -(ss)mov.f32f32 r6.x, c28.z -mov.f32f32 r5.x, r5.x -mul.f r6.y, r7.x, r0.w -mul.f r5.w, r5.w, c31.x -mul.f r6.x, r6.x, c15.x -rcp r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x +add.f r3.w, r4.x, (neg)r3.w +rcp r4.x, r5.y +(ss)mul.f r6.z, r0.x, r4.x +mul.f r4.x, r5.z, r4.y +sam (f32)(w)r8.w, r7.z, s#3, t#3 +(ss)mul.f r5.y, r7.x, c13.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r7.x, r6.z +mov.f32f32 r7.z, r4.x +mul.f r5.y, r5.y, r0.w +(sy)mul.f r7.w, r5.z, r8.y +mul.f r2.y, r2.y, c8.x +mul.f r8.x, r5.z, r8.x +mul.f r8.y, r7.z, r7.z +sam (f32)(x)r9.w, r7.x, s#8, t#8 +(sy)mad.f32 r8.w, c31.x, r9.w, c31.y +mad.f32 r2.y, c28.w, r2.y, r3.x +mul.f r9.x, r5.z, r9.z +mov.f32f32 r2.x, r2.x +mul.f r8.w, r8.w, c31.z +mul.f r2.y, r2.y, c22.y +mul.f r8.y, r8.y, c31.x +add.f r2.x, c22.y, (neg)r2.x +mul.f r4.z, r4.z, c8.x +mul.f r10.x, r2.y, c29.z +mul.f r2.y, r5.x, c8.x +add.f r5.x, c30.w, (neg)r8.w +max.f r8.y, r8.y, c28.y +mad.f32 r4.z, c28.w, r4.z, r3.x +mad.f32 r2.y, c28.w, r2.y, r6.y +mul.f r8.z, r5.z, r8.z +mov.f32f32 r0.w, r0.w +mul.f r3.y, r3.y, r4.y +mul.f r2.y, r2.y, c22.y +rcp r4.y, r5.x mad.f32 r0.z, c31.x, r0.z, c31.y -mov.f32f32 r7.z, r5.x -floor.f r5.x, r6.x -mov.f32f32 r5.w, r5.w +(ss)min.f r5.x, r8.y, c29.w +mul.f r10.z, r4.z, r2.x +mul.f r10.y, r2.y, c29.z mul.f r0.z, r0.z, c31.z -mov.f32f32 r6.y, r6.y -add.f r5.x, r6.x, (neg)r5.x -mov.f32f32 r5.w, r5.w -sam (f32)(w)r9.y, r7.y, s#3, t#3 -mov.f32f32 r0.z, r0.z -mul.f r0.w, r6.y, r0.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.w, r5.w +add.f r2.y, c39.y, (neg)r5.x +mul.f r4.z, r5.w, c8.x +mul.f r0.w, r5.y, r0.w +mov.f32f32 r5.y, r3.y +absneg.f r5.w, (neg)c0.x +sam (f32)(xyz)r11.x, r10.x, s#2, t#2 +add.f r8.y, c39.y, (neg)r5.z add.f r0.z, c30.w, (neg)r0.z -mov.f32f32 r0.w, r0.w -(sy)mul.f r6.x, r5.x, r8.w -mul.f r2.x, r2.x, c8.x -mov.f32f32 r0.z, r0.z -mul.f r6.y, r5.x, r8.z -mul.f r7.x, r5.x, r10.x -mad.f32 r2.x, c28.w, r2.x, r3.y -mul.f r2.y, r2.y, c8.x -(ss)mul.f r7.y, r5.x, r9.x -max.f r5.w, r5.w, c28.y -mov.f32f32 r2.x, r2.x +mul.f r2.y, r2.y, c33.z +add.f r8.w, c39.y, (neg)r5.z +(sy)mul.f r8.y, r8.y, r11.y +mul.f r5.x, r5.x, c36.x +mad.f32 r4.z, c28.w, r4.z, r6.y +mul.f r8.w, r8.w, r11.x +add.f r7.w, r7.w, r8.y rcp r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mad.f32 r2.y, c28.w, r2.y, r3.y -min.f r5.w, r5.w, c29.w -mul.f r2.x, r2.x, c22.y -mul.f r0.z, c30.z, r0.z -mov.f32f32 r2.y, r2.y -add.f r4.x, c22.y, (neg)r4.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -add.f r7.z, c39.y, (neg)r5.w -mul.f r2.y, r2.y, r4.x -mul.f r2.x, r2.x, c29.z -mad.f32 r3.x, c30.z, r3.x, (neg)r0.z -mul.f r7.z, r7.z, c33.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x -mul.f r5.w, r5.w, c36.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.x, r2.x -mul.f r3.x, r3.x, c31.w -add.f r5.w, r5.w, r7.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x +(ss)mul.f r0.z, c30.z, r0.z +add.f r2.y, r5.x, r2.y +mad.f32 r4.y, c30.z, r4.y, (neg)r0.z +add.f r5.x, r7.w, c29.x +add.f r7.w, r8.x, r8.w +mul.f r10.w, r4.z, r2.x +mul.f r2.x, r4.y, c31.w +mov.f32f32 r4.y, r5.x +bary.f r4.z, 5, r1.x +add.f r7.w, r7.w, c29.x +max.f r2.x, r2.x, c28.y +add.f r8.x, c39.y, (neg)r5.z +mul.f r4.y, r4.y, r4.z +mov.f32f32 r8.y, r7.w +bary.f r8.w, 4, r1.x +min.f r2.x, r2.x, c29.w +mul.f r7.w, r7.w, r4.z +sam (f32)(w)r10.x, r10.z, s#2, t#2 +add.f r9.y, c39.y, (neg)r5.z +mad.f32 r4.y, r8.y, r8.w, (neg)r4.y +mov.f32f32 r8.y, c22.x +min.f r2.x, r2.x, c32.x +mad.f32 r5.x, r5.x, r8.w, r7.w +(sy)mul.f r7.w, r9.y, r10.w +mul.f r8.x, r8.x, r11.z mul.f r0.w, r0.w, c37.y -mov.f32f32 r8.z, r2.y -mov.f32f32 r2.x, r2.x -mul.f r2.y, r5.y, c8.x -mov.f32f32 r3.x, r3.x -mul.f r5.y, r8.x, c8.x -mov.f32f32 r0.w, r0.w -mad.f32 r2.y, c28.w, r2.y, r6.z -mov.f32f32 r3.x, r3.x -mad.f32 r5.y, c28.w, r5.y, r6.z -mul.f r2.z, r2.z, r3.z -mov.f32f32 r2.y, r2.y -max.f r3.x, r3.x, c28.y -mov.f32f32 r5.y, r5.y +add.f r9.y, r5.w, r5.y +rcp r8.y, r8.y +(ss)mul.f r8.y, c8.x, r8.y +mov.f32f32 r10.x, r2.x +mov.f32f32 r10.y, r5.x +add.f r7.w, r9.x, r7.w +add.f r9.x, c22.y, (neg)r8.y +add.f r10.z, c39.y, (neg)r5.z +mul.f r11.x, r3.x, c37.x +add.f r8.x, r8.z, r8.x +mov.f32f32 r8.z, r9.x +mul.f r11.y, r2.w, c23.z +mov.f32f32 r10.y, r10.y +bary.f r11.z, 8, r1.x +mul.f r11.w, r1.z, c36.w +mul.f r12.x, r11.y, c8.x +add.f r8.x, r8.x, c29.y exp2 r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mul.f r2.y, r2.y, c22.y -min.f r3.x, r3.x, c29.w -mul.f r4.x, r5.y, r4.x -add.f r5.y, c39.y, (neg)r0.w -mov.f32f32 r2.y, r2.y -min.f r3.x, r3.x, c32.x -mov.f32f32 r4.x, r4.x -mul.f r5.y, r5.y, c13.y -mul.f r2.y, r2.y, c29.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.x, r4.x -mul.f r0.w, r0.w, c29.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -absneg.f r7.z, (neg)c0.x -add.f r0.w, r0.w, r5.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.x, r4.x -add.f r5.y, r7.z, r2.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r8.w, r4.x -mul.f r4.x, r5.y, r5.y -absneg.f r8.x, (neg)c0.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.w, r0.w -mul.f r9.x, r4.z, c8.x -add.f r9.y, r8.x, r4.w -sam (f32)(w)r10.y, r8.z, s#2, t#2 -(ss)add.f r8.z, c39.y, (neg)r5.x +(ss)mov.f32f32 r12.y, r0.w +add.f r12.z, c39.y, (neg)r11.z +mad.f32 r12.x, c28.w, r12.x, r6.y +add.f r12.w, r11.x, (neg)r11.w +mul.f r13.y, r6.y, c37.x +mov.f32f32 r13.x, r8.x +mul.f r13.w, r12.x, r8.z +mul.f r1.z, r1.z, c23.z +mul.f r8.z, r12.z, r10.y +mul.f r2.w, r2.w, c36.w +mov.f32f32 r10.y, r13.x +mul.f r12.x, r1.z, c8.x +add.f r12.z, c39.y, (neg)r11.z +add.f r12.y, c39.y, (neg)r12.y +mul.f r14.x, r9.y, r9.y +mad.f32 r12.x, c28.w, r12.x, r3.x +add.f r13.x, r13.y, (neg)r2.w +mul.f r10.y, r12.z, r10.y +mul.f r12.y, r12.y, c13.y +mul.f r13.z, r12.x, r9.x +(ss)mul.f r0.w, r0.w, c29.w +absneg.f r9.x, (neg)c0.y +mov.f32f32 r12.x, r4.y +sam (f32)(w)r14.y, r12.w, s#2, t#2 +add.f r12.z, c39.y, (neg)r11.z +(sy)mul.f r10.z, r10.z, r15.x +(ss)add.f r12.w, r11.x, (neg)r11.w +sam (f32)(xyz)r15.y, r13.z, s#3, t#3 +(sy)mul.f r11.x, r5.z, r15.z +mov.f32f32 r8.y, r8.y +mul.f r11.w, r5.z, r15.y +add.f r13.x, r13.y, (neg)r2.w +mul.f r2.w, r5.z, r15.w +add.f r8.y, c22.y, (neg)r8.y +mul.f r1.z, r1.z, c8.x +add.f r0.w, r0.w, r12.y +add.f r12.y, r9.x, r7.z +mul.f r12.x, r12.z, r12.x +mad.f32 r1.z, c28.w, r1.z, r3.x +sam (f32)(w)r12.z, r12.w, s#3, t#3 max.f r0.w, r0.w, c28.y -mov.f32f32 r8.w, r9.x -sam (f32)(xyz)r11.y, r2.x, s#2, t#2 -(ss)add.f r2.x, c39.y, (neg)r5.x -add.f r2.y, c39.y, (neg)r5.x -(sy)mul.f r8.z, r8.z, r11.x -add.f r9.x, c39.y, (neg)r5.x -mul.f r2.x, r2.x, r11.z -mul.f r2.y, r2.y, r11.y -add.f r7.x, r7.x, r8.z -mul.f r8.z, r9.x, r11.w -add.f r2.x, r6.x, r2.x -add.f r2.y, r6.y, r2.y -mul.f r6.x, r3.y, c37.x -add.f r6.y, r7.y, r8.z -add.f r2.x, r2.x, c29.x -add.f r2.y, r2.y, c29.x -mul.f r1.z, r1.z, c36.w -add.f r6.y, r6.y, c29.y -mov.f32f32 r2.x, r2.x -bary.f r7.y, 5, r1.x -mov.f32f32 r2.y, r2.y -add.f r8.z, r6.x, (neg)r1.z -mov.f32f32 r6.y, r6.y -mul.f r9.x, r2.x, r7.y -bary.f r9.z, 4, r1.x -mul.f r9.w, r2.y, r7.y -mov.f32f32 r8.z, r8.z -mov.f32f32 r6.y, r6.y -mad.f32 r2.y, r2.y, r9.z, (neg)r9.x -mad.f32 r2.x, r2.x, r9.z, r9.w -mov.f32f32 r8.z, r8.z -mad.f32 r8.w, c28.w, r8.w, r3.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r10.y, r8.z -mov.f32f32 r8.z, r8.w -mov.f32f32 r8.w, c22.x -mov.f32f32 r2.x, r2.x -mul.f r9.x, r6.z, c37.x -mul.f r9.w, r2.w, c36.w -bary.f r10.w, 8, r1.x +(sy)mul.f r3.x, r5.z, r13.y +mad.f32 r12.z, r12.y, r12.y, r14.x +(ss)mul.f r12.w, r1.z, r8.y +mul.f r1.z, r11.y, c8.x +add.f r3.x, r3.x, r10.z min.f r0.w, r0.w, c29.w -mad.f32 r4.x, r9.y, r9.y, r4.x -rcp r8.w, r8.w -(ss)mul.f r8.w, c8.x, r8.w -add.f r10.z, r9.x, (neg)r9.w -add.f r11.y, c39.y, (neg)r10.w -add.f r11.z, c39.y, (neg)r0.w -mov.f32f32 r8.w, r8.w -mov.f32f32 r10.z, r10.z -mul.f r11.y, r11.y, r2.x -mul.f r11.z, r11.z, c12.z -add.f r11.w, c22.y, (neg)r8.w -mov.f32f32 r10.z, r10.z -add.f r12.x, c39.y, (neg)r0.w -add.f r12.y, c39.y, (neg)r0.w -mov.f32f32 r11.w, r11.w -mov.f32f32 r10.z, r10.z -mul.f r12.x, r12.x, c12.y -mul.f r12.y, r12.y, c12.x -mul.f r8.z, r8.z, r11.w -add.f r12.z, c39.y, (neg)r10.w -add.f r12.w, c39.y, (neg)r10.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r8.z, r8.z -sam (f32)(w)r13.x, r10.y, s#3, t#3 -(sy)(ss)mul.f r10.y, r5.x, r13.w -add.f r1.z, r6.x, (neg)r1.z -mul.f r6.x, r12.z, r2.y -mov.f32f32 r8.z, r8.z -mul.f r10.z, r12.w, r6.y -mul.f r3.z, r8.y, r3.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r8.y, r8.z -mad.f32 r8.z, c33.z, r4.w, c29.w -mov.f32f32 r3.z, r3.z -absneg.f r12.z, (neg)c0.z -mov.f32f32 r12.w, r8.y -mul.f r2.w, r2.w, c23.z -mov.f32f32 r1.z, r1.z -add.f r8.y, r12.z, r3.z -log2 r8.z, r8.z -(ss)mov.f32f32 r8.z, r8.z -mul.f r13.x, r2.w, c8.x -mov.f32f32 r13.y, r1.z -add.f r1.z, r9.x, (neg)r9.w -mad.f32 r4.x, r8.y, r8.y, r4.x -mov.f32f32 r9.x, r13.x -mul.f r8.z, r8.z, c33.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r9.w, r3.w -mad.f32 r9.x, c28.w, r9.x, r6.z -mov.f32f32 r8.z, r8.z -mov.f32f32 r1.z, r1.z -rsq r4.x, r4.x -(ss)mov.f32f32 r4.x, r4.x -mov.f32f32 r9.x, r9.x -min.f r8.z, c29.w, r8.z -mov.f32f32 r13.z, r1.z -mov.f32f32 r14.y, r9.w -mul.f r1.z, r9.x, r11.w -mul.f r5.y, r5.y, r4.x -mul.f r9.x, r9.y, r4.x -mul.f r4.x, r8.y, r4.x -mov.f32f32 r1.z, r1.z -sam (f32)(w)r14.z, r13.y, s#2, t#2 -add.f r8.y, c39.y, (neg)r5.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r9.x, r9.x -mov.f32f32 r1.z, r1.z -(sy)mul.f r8.y, r8.y, r15.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r8.z, r8.z -mov.f32f32 r1.z, r1.z -add.f r8.y, r10.y, r8.y -mov.f32f32 r9.y, r7.w -mul.f r9.w, r5.x, r10.x -mov.f32f32 r13.x, r1.z -mul.f r1.z, r7.x, r8.y -mov.f32f32 r14.x, r9.y -add.f r7.x, c39.y, (neg)r5.x -mov.f32f32 r8.y, c14.y -mov.f32f32 r9.y, r5.z -mov.f32f32 r1.z, r1.z -(ss)nop -sam (f32)(xyz)r12.w, r12.w, s#3, t#3 -(sy)mul.f r10.x, r5.x, r13.x -mul.f r4.z, r4.z, c8.x -mul.f r10.y, r5.x, r12.w -mul.f r1.z, r1.z, r7.y -mul.f r7.x, r7.x, r11.x -mad.f32 r3.y, c28.w, r4.z, r3.y -mul.f r4.z, r5.x, r13.y -(ss)nop -sam (f32)(xzw)r12.w, r14.x, s#4, t#4 -(sy)cmps.f.lt r11.x, r13.z, c32.x -add.f r7.x, r9.w, r7.x -mov.f32f32 r3.y, r3.y -add.f r8.w, c22.y, (neg)r8.w -mul.f r9.w, r5.x, r13.w -add.f r11.w, c39.y, (neg)r5.x -cov.u32f32 r11.x, r11.x -mul.f r3.y, r3.y, r8.w -bary.f r13.x, 11, r1.x -mul.f r11.w, r11.w, r15.y -rcp r8.y, r8.y -(ss)mul.f r0.y, r0.y, r8.y -mov.f32f32 r3.y, r3.y -(ss)cmps.f.lt r8.y, r13.x, c31.y -add.f r9.w, r9.w, r11.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r3.y -cov.u32f32 r8.y, r8.y -mul.f r7.x, r7.x, r9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r3.y -mul.f r8.y, r11.x, r8.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r13.w, r3.y -mul.f r2.w, r2.w, c8.x -mad.f32 r1.z, r7.x, r9.z, (neg)r1.z -mov.f32f32 r3.y, c33.z -mov.f32f32 r7.x, r8.y -mad.f32 r2.w, c28.w, r2.w, r6.z -mov.f32f32 r14.z, r0.y -mov.f32f32 r0.y, r9.y -mul.f r6.z, r13.z, c28.x -mov.f32f32 r2.w, r2.w -add.f r3.y, r3.y, c8.x -cmps.f.ne r7.x, r7.x, c28.y -mov.f32f32 r8.y, c14.x -mul.f r2.w, r2.w, r8.w -mul.f r1.z, r1.z, r3.y -add.f r3.y, c39.y, (neg)r6.z -mul.f r8.w, r6.z, r12.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -rcp r8.y, r8.y -add.f r9.y, c39.y, (neg)r6.z -(ss)mul.f r0.x, r0.x, r8.y -mov.f32f32 r2.w, r2.w -(ss)max.f r8.y, r4.w, c28.w -add.f r9.w, c29.w, (neg)r13.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r2.w -log2 r1.z, r1.z -mov.f32f32 r11.x, c8.x -mul.f r6.z, r6.z, r13.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r14.x, r2.w -mul.f r2.w, r11.x, c38.x -mov.f32f32 r11.x, c11.x -mov.f32f32 r11.w, c22.z -mov.f32f32 r0.x, r0.x -bary.f r12.w, 6, r1.x -mov.f32f32 r13.x, c8.x -sam (f32)(xyz)r14.w, r13.w, s#2, t#2 -add.f r13.y, c39.y, (neg)r5.x -(ss)add.f r13.w, c39.y, (neg)r5.x -add.f r2.w, c37.w, (neg)r2.w -add.f r5.x, c39.y, (neg)r5.x -(sy)mul.f r13.y, r13.y, r15.x -mul.f r13.w, r13.w, r14.w -mov.f32f32 r2.w, r2.w -mul.f r5.x, r5.x, r15.y -add.f r10.x, r10.x, r13.y -add.f r10.y, r10.y, r13.w -(ss)mul.f r1.z, r2.w, r1.z -add.f r2.w, r4.z, r5.x -add.f r4.z, r10.x, c29.x -add.f r5.x, r10.y, c29.x -mov.f32f32 r1.z, r1.z -add.f r2.w, r2.w, c29.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r14.y, r0.x -log2 r0.x, r11.x -mov.f32f32 r2.w, r2.w -mul.f r10.x, r4.z, r7.y -mul.f r7.y, r5.x, r7.y -mad.f32 r5.x, r5.x, r9.z, (neg)r10.x -mad.f32 r4.z, r4.z, r9.z, r7.y -exp2 r1.z, r1.z -mov.f32f32 r2.w, r2.w -sam (f32)(xw)r13.w, r14.y, s#5, t#5 -(sy)mul.f r7.y, r13.w, r14.z -mov.f32f32 r5.x, r5.x -bary.f r9.z, 10, r1.x -mov.f32f32 r4.z, r4.z -mov.f32f32 r7.y, r7.y -(ss)mul.f r0.x, c34.y, r0.x -mul.f r10.x, r9.z, c28.w -mov.f32f32 r4.z, r4.z -(ss)mad.f32 r1.z, c37.z, r7.y, r1.z -mov.f32f32 r0.x, r0.x -mad.f32 r7.y, c23.x, r10.x, c23.y -rcp r10.x, r11.w -(ss)mad.f32 r9.z, r9.z, r10.x, c22.w -mov.f32f32 r10.x, r12.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r7.y, r7.y -mov.f32f32 r9.z, r9.z -exp2 r0.x, r0.x -(ss)add.f r10.y, c39.y, (neg)r0.x -mov.f32f32 r13.w, r10.x -mad.f32 r2.y, r2.y, r7.y, r5.x -mad.f32 r2.x, r2.x, r7.y, r4.z -mad.f32 r2.w, r6.y, r7.y, r2.w -mul.f r4.z, r10.y, c34.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -(ss)mul.f r0.x, r0.x, c28.w -mul.f r2.y, r10.w, r2.y -mul.f r2.x, r10.w, r2.x -mul.f r2.w, r10.w, r2.w -add.f r0.x, r0.x, r4.z -add.f r2.y, r2.y, r6.x -mov.f32f32 r4.z, r9.z -add.f r2.x, r2.x, r11.y +mul.f r2.z, r6.x, r2.z +mad.f32 r1.z, c28.w, r1.z, r6.y +mul.f r3.x, r7.w, r3.x +add.f r6.x, c39.y, (neg)r0.w +add.f r6.y, c39.y, (neg)r0.w +mul.f r13.x, r1.z, r8.y +mul.f r1.z, r3.x, r4.z +mul.f r3.x, r3.w, r9.z +mul.f r3.w, r6.x, c12.z +add.f r6.x, c39.y, (neg)r5.z +mul.f r6.y, r6.y, c12.y +add.f r7.w, c39.y, (neg)r0.w +sam (f32)(xyz)r13.z, r12.w, s#2, t#2 +add.f r8.y, c39.y, (neg)r5.z +add.f r9.z, c39.y, (neg)r5.z +mul.f r6.x, r6.x, r10.w +add.f r10.z, c39.y, (neg)r5.z +(sy)mul.f r8.y, r8.y, r13.w +mul.f r9.z, r9.z, r13.z +add.f r3.x, r3.x, r6.x +add.f r6.x, c39.y, (neg)r5.z +add.f r8.y, r11.x, r8.y +add.f r9.z, r11.w, r9.z +mul.f r10.z, r10.z, r14.x +mul.f r7.w, r7.w, c12.x +add.f r8.y, r8.y, c29.x +add.f r9.z, r9.z, c29.x +mul.f r6.x, r6.x, r15.x +mul.f r5.z, r5.z, r13.y +mov.f32f32 r10.w, r8.y +mul.f r11.x, r9.z, r4.z add.f r2.w, r2.w, r10.z -bary.f r5.x, 7, r1.x -mov.f32f32 r4.z, r4.z -rcp r6.x, r0.x -nop -(ss)rcp r0.x, r0.x -(ss)mov.f32f32 r6.x, r6.x -mov.f32f32 r5.x, r5.x +mov.f32f32 r10.z, r2.z +mul.f r4.z, r10.w, r4.z +mov.f32f32 r9.z, r9.z +mad.f32 r8.y, r8.y, r8.w, r11.x +bary.f r10.w, 10, r1.x +add.f r5.z, r5.z, r6.x +mad.f32 r4.z, r9.z, r8.w, (neg)r4.z +add.f r2.w, r2.w, c29.y +mul.f r6.x, r10.w, c28.w +mul.f r3.x, r3.x, r5.z +absneg.f r5.z, (neg)c0.z +mad.f32 r9.z, c33.z, r7.z, c29.w +mad.f32 r6.x, c23.x, r6.x, c23.y +mad.f32 r1.z, r3.x, r8.w, (neg)r1.z +mov.f32f32 r3.x, c33.z +add.f r8.w, r5.z, r10.z +mov.f32f32 r11.x, r6.x +mad.f32 r5.x, r5.x, r6.x, r8.y +add.f r3.x, r3.x, c8.x +mad.f32 r6.x, r8.w, r8.w, r12.z +mad.f32 r4.y, r4.y, r11.x, r4.z +mul.f r4.z, r11.z, r5.x +mul.f r1.z, r1.z, r3.x +mad.f32 r2.w, r8.x, r11.x, r2.w +mul.f r3.x, r11.z, r4.y +add.f r4.y, r4.z, r8.z +mov.f32f32 r4.z, c22.z +mul.f r2.w, r11.z, r2.w +add.f r3.x, r3.x, r12.x +log2 r1.z, r1.z +mov.f32f32 r5.x, c8.x +rsq r6.x, r6.x +(ss)mov.f32f32 r8.x, r6.x +add.f r2.w, r2.w, r10.y +(ss)mul.f r6.x, r8.w, r6.x +rcp r4.z, r4.z +(ss)mad.f32 r4.z, r10.w, r4.z, c22.w +mul.f r5.x, r5.x, c38.x +mul.f r8.y, r9.y, r8.x +mul.f r8.x, r12.y, r8.x max.f r4.z, r4.z, c28.y -mul.f r6.y, r13.x, c28.w -mov.f32f32 r7.y, c16.x -mov.f32f32 r14.x, r5.x +add.f r5.x, c37.w, (neg)r5.x +log2 r8.z, r9.z +(ss)mul.f r8.z, r8.z, c33.w +sam (f32)(xzw)r11.w, r6.z, s#4, t#4 +(sy)(ss)cmps.f.lt r6.z, r12.z, c32.x min.f r4.z, r4.z, c29.w -add.f r5.x, c39.y, (neg)r6.y -mul.f r7.y, r7.y, c28.x -absneg.f r9.z, (neg)c16.x -mul.f r2.y, r4.z, r2.y -add.f r10.x, c39.y, (neg)r4.z -mul.f r2.x, r4.z, r2.x +mul.f r1.z, r5.x, r1.z +min.f r5.x, c29.w, r8.z +cov.u32f32 r6.z, r6.z +mul.f r3.x, r4.z, r3.x +add.f r6.w, c39.y, (neg)r4.z +mul.f r4.y, r4.z, r4.y +(rpt1)nop +mul.f r6.w, r6.w, c30.x +add.f r8.z, c39.y, (neg)r4.z +exp2 r1.z, r1.z +nop +mov.f32f32 r8.w, c14.y +add.f r3.x, r3.x, r6.w +mul.f r6.w, r8.z, c30.x mul.f r2.w, r4.z, r2.w nop -mul.f r10.x, r10.x, c30.x -add.f r10.y, c39.y, (neg)r4.z +mul.f r8.z, r3.x, r3.x add.f r4.z, c39.y, (neg)r4.z -sam (f32)(w)r13.w, r13.w, s#0, t#0 -(sy)cmps.f.lt r10.z, r14.z, c36.z -add.f r2.y, r2.y, r10.x -mul.f r10.x, r10.y, c30.x -mul.f r4.z, r4.z, c30.y -nop -mul.f r10.y, r2.y, r2.y -add.f r2.x, r2.x, r10.x -add.f r2.w, r2.w, r4.z -cov.u32f32 r4.z, r10.z -mul.f r5.x, r5.x, c29.w -mul.f r6.y, r6.y, c35.w -mad.f32 r10.x, r2.w, r2.w, r10.y +add.f r4.y, r4.y, r6.w +rcp r6.w, r8.w +(ss)bary.f r8.w, 11, r1.x +(ss)mul.f r9.z, r0.y, r6.w +mul.f r0.y, r4.z, c30.y +mov.f32f32 r4.z, c14.x +cmps.f.lt r6.w, r8.w, c31.y +max.f r8.w, r7.z, c28.w +add.f r0.y, r2.w, r0.y +mul.f r2.w, r12.z, c28.x +cov.u32f32 r6.w, r6.w +mov.f32f32 r10.y, c11.x +mad.f32 r8.z, r0.y, r0.y, r8.z +rcp r4.z, r4.z +(ss)mul.f r9.y, r0.x, r4.z +mad.f32 r0.x, r4.y, r4.y, r8.z +(ss)mul.f r4.z, r6.z, r6.w +(rpt4)nop +rsq r0.x, r0.x +(ss)mov.f32f32 r6.z, r0.x +(ss)mul.f r0.x, r4.y, r0.x +mad.f32 r4.y, c28.x, r11.z, c28.x +sam (f32)(xw)r10.w, r9.y, s#5, t#5 +(sy)mul.f r6.w, r10.w, r11.z +mul.f r3.x, r3.x, r6.z +mul.f r0.y, r0.y, r6.z +mul.f r0.x, r4.y, r0.x +add.f r6.z, c39.y, (neg)r4.y +mul.f r3.x, r4.y, r3.x +add.f r8.z, c39.y, (neg)r4.y +mul.f r0.y, r4.y, r0.y +mul.f r6.z, r6.z, c30.x +add.f r4.y, c39.y, (neg)r4.y +mul.f r8.z, r8.z, c30.x +mad.f32 r1.z, c37.z, r6.w, r1.z cmps.f.ne r4.z, r4.z, c28.y -add.f r10.y, c35.x, r7.y -add.f r10.z, c35.z, r7.y -mov.f32f32 r10.x, r10.x -mov.f32f32 r11.x, r14.z -mad.f32 r10.x, r2.x, r2.x, r10.x -add.f r5.x, r6.y, r5.x -mov.f32f32 r6.y, r10.y -mov.f32f32 r10.y, r10.z -mov.f32f32 r10.z, r11.x -mul.f r11.x, r6.w, c34.y -add.f r11.y, c35.x, r7.y -rsq r10.x, r10.x -(ss)mov.f32f32 r10.x, r10.x -mov.f32f32 r11.w, c28.y -mul.f r5.x, c8.x, r5.x +add.f r6.w, c39.y, (neg)r2.w +add.f r3.x, r3.x, r8.z +add.f r0.x, r0.x, r6.z +add.f r6.z, c39.y, (neg)r2.w +mul.f r4.y, r4.y, c30.y +mul.f r3.x, r6.w, r3.x +mul.f r6.w, r2.w, r11.w +mul.f r0.x, r6.z, r0.x +mul.f r2.w, r2.w, r12.y +add.f r0.y, r0.y, r4.y +add.f r3.x, r6.w, r3.x +log2 r4.y, r10.y +(ss)mul.f r4.y, c34.y, r4.y +add.f r6.z, c29.w, (neg)r12.z +add.f r0.x, r2.w, r0.x +absneg.f r3.x, (neg)r3.x +mov.f32f32 r6.w, c8.x +bary.f r2.w, 15, r1.x +absneg.f r0.x, (neg)r0.x +mul.f r2.x, r3.x, r2.x +mul.f r8.z, r3.x, r5.y +(ss)mov.f32f32 r9.y, r0.y +mul.f r3.y, r3.x, r3.y +add.f r2.x, r7.x, (neg)r2.x +mul.f r7.x, r3.x, r8.y +mad.f32 r8.y, r9.y, r7.z, r8.z +mad.f32 r3.y, r9.y, r4.x, r3.y +max.f r2.x, r2.x, c32.y +mad.f32 r4.x, r0.x, r10.z, r8.y +mad.f32 r2.z, r0.x, r2.z, r3.y +mad.f32 r3.y, r9.y, r8.x, r7.x +min.f r8.x, r2.x, c32.z +mul.f r2.x, r0.x, r10.x +mul.f r7.x, r4.x, r9.y +max.f r2.z, r2.z, c28.y +mul.f r8.y, r4.x, r0.x +add.f r2.x, r7.y, (neg)r2.x +mul.f r7.x, c31.x, r7.x +min.f r2.z, r2.z, c29.w +mul.f r7.y, c31.x, r8.y +max.f r2.x, r2.x, c32.y +add.f r7.x, r7.z, (neg)r7.x +add.f r2.z, c32.w, (neg)r2.z +add.f r7.y, r10.z, (neg)r7.y +min.f r8.y, r2.x, c32.z +mov.f32f32 r2.x, r7.x +mul.f r4.x, r4.x, r3.x +mov.f32f32 r7.z, r7.y +mad.f32 r3.y, r0.x, r6.x, r3.y +rcp r6.x, r7.x nop -mul.f r2.y, r2.y, r10.x -mul.f r2.x, r2.x, r10.x -mul.f r2.w, r2.w, r10.x -sel.b32 r4.z, r11.w, r4.z, r10.z -mov.f32f32 r2.y, r2.y -mad.f32 r10.x, c28.x, r10.w, c28.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r5.x, r5.x -mul.f r2.y, r10.x, r2.y -add.f r10.z, c39.y, (neg)r10.x -mul.f r2.x, r10.x, r2.x -add.f r10.w, c39.y, (neg)r10.x -mul.f r2.w, r10.x, r2.w -mul.f r10.z, r10.z, c30.x -add.f r10.x, c39.y, (neg)r10.x -mul.f r10.w, r10.w, c30.x -mov.f32f32 r11.x, r11.x -add.f r2.y, r2.y, r10.z -mul.f r10.x, r10.x, c30.y -add.f r2.x, r2.x, r10.w -add.f r6.y, r6.y, (neg)r11.x -mul.f r2.y, r3.y, r2.y -add.f r2.w, r2.w, r10.x -mul.f r2.x, r9.y, r2.x -mov.f32f32 r3.y, r6.y -add.f r2.y, r8.w, r2.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r6.y, r10.y +log2 r2.z, r2.z +(ss)mul.f r2.z, c33.x, r2.z +sam (f32)(x)r10.x, r8.x, s#8, t#8 +(sy)cmps.f.lt r7.x, r10.x, r9.w +(ss)rcp r8.x, r2.x +mov.f32f32 r8.y, r3.z +mov.f32f32 r8.z, c34.w +rcp r9.y, r7.z +mov.f32f32 r9.z, c16.x +cov.u32f32 r7.x, r7.x +mul.f r4.x, c31.x, r4.x +exp2 r2.z, r2.z nop -absneg.f r2.y, (neg)r2.y -add.f r2.x, r6.z, r2.x -add.f r6.y, r6.y, (neg)r11.x -mul.f r6.z, r1.w, c34.y -mul.f r8.w, r2.y, r3.x -mul.f r9.y, r2.y, r2.z -mul.f r10.x, r2.y, r2.z -mad.f32 r9.y, r2.w, r4.w, r9.y -mov.f32f32 r8.w, r8.w -mad.f32 r10.x, r2.w, r4.w, r10.x -absneg.f r2.x, (neg)r2.x -mov.f32f32 r9.y, r9.y -add.f r7.w, r7.w, (neg)r8.w -mov.f32f32 r8.w, r10.x -mad.f32 r9.y, r2.x, r3.z, r9.y -mad.f32 r8.w, r2.x, r3.z, r8.w -mov.f32f32 r7.w, r7.w -mul.f r10.x, r5.x, r2.x -mul.f r10.y, r9.y, r2.x -mul.f r10.z, r9.y, r2.w -max.f r7.w, r7.w, c32.y -mov.f32f32 r8.w, r8.w -mov.f32f32 r10.x, r10.x -bary.f r10.w, 12, r1.x -mov.f32f32 r7.w, r7.w -mov.f32f32 r10.z, r10.z -mov.f32f32 r8.w, r8.w -mad.f32 r10.x, c28.x, r10.w, (neg)r10.x -min.f r7.w, r7.w, c32.z -mul.f r10.z, c31.x, r10.z -max.f r8.w, r8.w, c28.y -mov.f32f32 r10.x, r10.x -mov.f32f32 r7.w, r7.w -mov.f32f32 r10.z, r10.z -min.f r8.w, r8.w, c29.w -bary.f r10.w, 14, r1.x -mov.f32f32 r7.w, r7.w -add.f r4.w, r4.w, (neg)r10.z -add.f r8.w, c32.w, (neg)r8.w -mov.f32f32 r10.y, r10.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r8.w, r8.w -rcp r10.z, r10.w -(ss)mad.f32 r10.x, r10.x, r10.z, c28.x -(ss)mov.f32f32 r10.w, r7.w -mul.f r3.x, r2.x, r3.x -mul.f r7.w, c31.x, r10.y -mul.f r9.y, r9.y, r2.y -mov.f32f32 r10.x, r10.x -mov.f32f32 r3.x, r3.x -rcp r10.y, r4.w -mov.f32f32 r11.x, r4.y -mov.f32f32 r11.w, c35.y -log2 r8.w, r8.w -(ss)mul.f r8.w, c33.x, r8.w -add.f r3.x, r3.w, (neg)r3.x -mov.f32f32 r3.w, r10.x -mov.f32f32 r7.w, r7.w -add.f r10.x, r11.w, (neg)r11.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r8.w, r8.w -mov.f32f32 r3.w, r3.w -add.f r3.z, r3.z, (neg)r7.w -max.f r3.x, r3.x, c32.y -mul.f r7.w, r10.x, r10.y -max.f r3.w, r3.w, c28.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r7.w, r7.w -rcp r10.x, r4.w -mov.f32f32 r10.y, c34.w -exp2 r8.w, r8.w -(ss)mov.f32f32 r8.w, r8.w -min.f r3.x, r3.x, c32.z -min.f r3.w, r3.w, c29.w -rcp r11.w, r3.z -add.f r10.y, r10.y, (neg)r11.x -mov.f32f32 r8.w, r8.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.w -(ss)mul.f r6.y, r6.y, r11.w -mov.f32f32 r10.y, r10.y -mov.f32f32 r3.x, r3.x -max.f r8.w, r8.w, c28.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r6.y, r6.y -mov.f32f32 r3.x, r3.x -mul.f r10.x, r10.y, r10.x -min.f r8.w, r8.w, c29.w -mov.f32f32 r12.w, r3.w -mov.f32f32 r11.x, r3.x -mov.f32f32 r3.x, r10.x -min.f r3.w, r8.w, c33.y -mul.f r5.x, r5.x, r2.y -rcp r8.w, r3.z -mov.f32f32 r9.y, r9.y -(ss)mul.f r3.y, r3.y, r8.w -max.f r3.x, r7.w, r3.x -sam (f32)(x)r7.w, r10.w, s#8, t#8 -(sy)cmps.f.lt r5.z, r7.w, r5.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r5.x, r5.x -bary.f r8.w, 13, r1.x -cov.u32f32 r5.z, r5.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mul.f r3.w, r3.w, r8.z -mov.f32f32 r5.z, r5.z -mad.f32 r5.x, c28.x, r8.w, (neg)r5.x -max.f r3.y, r6.y, r3.y -mul.f r6.y, c31.x, r9.y -cmps.f.ne r5.z, r5.z, c28.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r5.x, r5.x -sel.b32 r0.y, r0.y, r5.z, r7.w -min.f r3.x, r3.x, r3.y -mov.f32f32 r3.y, r6.y -mad.f32 r3.w, c33.y, r3.w, c28.w -mad.f32 r0.y, c31.x, r0.y, c31.y -mad.f32 r5.x, r5.x, r10.z, c28.x -add.f r2.z, r2.z, (neg)r3.y -mov.f32f32 r3.y, r2.y -mul.f r0.y, r0.y, c31.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r0.y, r0.y -bary.f r5.z, 15, r1.x -mul.f r5.y, r2.y, r5.y -mov.f32f32 r6.y, r2.x -add.f r0.y, c30.w, (neg)r0.y -rcp r7.w, r2.z -add.f r7.y, c35.z, r7.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r0.y, r0.y -add.f r7.y, r7.y, (neg)r6.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r8.z, r5.z -mad.f32 r5.y, r2.w, r9.x, r5.y -rcp r8.w, r2.z -(ss)mul.f r7.y, r7.y, r7.w -max.f r5.x, r5.x, c28.y -rcp r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r3.y, c28.x, r8.z, r3.y -mov.f32f32 r5.y, r5.y -add.f r6.z, r11.y, (neg)r6.z -mad.f32 r0.y, c30.z, r0.y, (neg)r0.z -mov.f32f32 r0.z, r7.y -min.f r5.x, r5.x, c29.w -mov.f32f32 r6.z, r6.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r3.y -bary.f r7.y, 17, r1.x -mov.f32f32 r5.x, r5.x -max.f r0.y, r0.y, r13.z -mul.f r6.z, r6.z, r8.w -mad.f32 r4.x, r2.x, r4.x, r5.y -bary.f (ei)r1.x, 16, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r6.z -mov.f32f32 r5.x, r5.x -rcp r5.y, r7.y -(ss)mad.f32 r3.y, r3.y, r5.y, c28.x -mul.f r6.z, r0.y, c28.x -mul.f r7.w, r0.y, r8.y -mov.f32f32 r8.z, r0.y -mul.f r8.w, r0.y, c33.z -max.f r6.z, r6.z, c28.y -mov.f32f32 r7.w, r7.w -max.f r8.z, r8.z, c28.y -mov.f32f32 r8.w, r8.w -mov.f32f32 r6.z, r6.z -mul.f r6.x, r7.w, r6.x -min.f r7.w, r8.z, c29.w -mov.f32f32 r8.z, r8.w -min.f r6.z, r6.z, c34.z -mov.f32f32 r6.x, r6.x -add.f r8.w, c39.y, (neg)r7.w -max.f r8.z, r8.z, c28.y -mov.f32f32 r6.z, r6.z -add.f r6.x, c29.w, (neg)r6.x -mul.f r8.w, r8.w, r9.w -mul.f r7.w, r7.w, c29.w -mul.f r9.x, r2.y, r6.z -mul.f r6.z, r2.x, r6.z -mov.f32f32 r6.x, r6.x -add.f r7.w, r7.w, r8.w -mov.f32f32 r8.w, r9.x -mov.f32f32 r6.z, r6.z -max.f r6.x, c28.y, r6.x -mul.f r3.w, r3.w, r7.w -mul.f r7.w, r8.w, c34.z -mul.f r6.z, r6.z, c34.z -mov.f32f32 r6.x, r6.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r7.w, r7.w -mov.f32f32 r6.z, r6.z -mad.f32 r5.z, c28.x, r5.z, (neg)r7.w -mov.f32f32 r1.x, r1.x -add.f r7.w, c39.y, (neg)r6.x -add.f r8.w, c39.y, (neg)r6.x -mov.f32f32 r5.z, r5.z -(ss)rcp r7.y, r7.y -mad.f32 r6.z, c28.x, r1.x, (neg)r6.z -mul.f r7.w, r7.w, c9.z -mul.f r8.w, r8.w, c9.y -(ss)mad.f32 r5.z, r5.z, r7.y, c28.x -mov.f32f32 r6.z, r6.z -add.f r9.x, c39.y, (neg)r6.x -add.f r9.y, c39.y, (neg)r3.w -mov.f32f32 r5.z, r5.z -mad.f32 r6.z, r6.z, r7.y, c28.x -(ss)mul.f r7.y, r9.x, c9.x -max.f r0.z, r0.z, r1.y -mov.f32f32 r1.y, r5.z -mov.f32f32 r5.z, r6.z -add.f r6.z, c39.y, (neg)r3.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r5.z, r5.z -add.f r9.x, c39.y, (neg)r3.w -min.f r0.z, r0.z, r3.x -max.f r1.y, r1.y, c28.y -mov.f32f32 r3.x, r5.z -min.f r5.z, r8.z, c29.w -mul.f r0.y, r0.y, r8.y -min.f r1.y, r1.y, c29.w -max.f r3.x, r3.x, c28.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r5.z, r5.z -mov.f32f32 r1.y, r1.y -min.f r3.x, r3.x, c29.w -mul.f r2.z, r2.z, r0.z -mul.f r1.w, (neg)r1.w, c34.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r8.y, c28.y -mul.f r0.x, r0.y, r0.x -mov.f32f32 r9.w, r1.y -mov.f32f32 r0.y, r3.x -add.f r1.y, r1.w, r2.z -mul.f r1.w, r9.z, c28.x -sel.b32 r2.z, r8.y, r7.x, r5.z -mov.f32f32 r10.x, r0.y -mov.f32f32 r0.x, r0.x -add.f r0.y, r1.y, (neg)r1.w -mul.f r1.y, r4.w, r0.z -mov.f32f32 r11.x, r2.z -add.f r0.x, c29.w, (neg)r0.x -mul.f r1.w, r0.y, c3.x -sam (f32)(xyz)r13.x, r9.w, s#7, t#7 -(sy)add.f r2.z, r13.y, r13.z -mov.f32f32 r3.x, r13.z -mov.f32f32 r4.w, r13.y -mov.f32f32 r5.z, r13.x -add.f r2.z, r2.z, c34.y -mov.f32f32 r7.x, r4.y -mov.f32f32 r0.x, r0.x -mul.f r8.y, r0.y, c4.x -mov.f32f32 r2.z, r2.z -add.f r1.y, r7.x, r1.y -mov.f32f32 r4.y, r4.y -max.f r0.x, c28.y, r0.x -cmps.f.lt r2.z, r2.z, r13.x -mul.f r0.y, r0.y, c2.x -add.f r1.y, r1.y, (neg)r4.y -mul.f r0.z, r3.z, r0.z -cov.u32f32 r2.z, r2.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.w, c3.y, r1.y, r1.w -mad.f32 r3.z, c4.y, r1.y, r8.y -mov.f32f32 r2.z, r2.z -add.f r4.y, c39.y, (neg)r0.x -add.f r7.x, c39.y, (neg)r0.x -add.f r8.y, c39.y, (neg)r0.x -cmps.f.ne r2.z, r2.z, c28.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.w, r1.w -mul.f r6.w, (neg)r6.w, c34.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.y, r3.y -mad.f32 r0.y, c2.y, r1.y, r0.y -mov.f32f32 r1.y, r6.w -mov.f32f32 r13.x, r5.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r0.y, r0.y -add.f r0.z, r1.y, r0.z -mul.f r1.y, r9.z, c28.x -max.f r3.y, r3.y, c28.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -min.f r3.y, r3.y, c29.w -sam (f32)(xyzw)r12.w, r12.w, s#6, t#6 -(sy)mul.f r5.x, r12.w, r13.z -mul.f r6.w, r13.y, r13.z -mul.f r8.z, r13.x, r13.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r6.w, r6.w -mov.f32f32 r8.z, r8.z -mov.f32f32 r3.y, r3.y -add.f r0.z, r0.z, (neg)r1.y -mov.f32f32 r1.y, r5.x -add.f r5.x, c29.w, (neg)r13.z -mov.f32f32 r9.z, r3.y -mad.f32 r1.x, c28.x, r1.x, r6.y -mad.f32 r1.w, c3.z, r0.z, r1.w -mad.f32 r3.y, c4.z, r0.z, r3.z -mad.f32 r0.y, c2.z, r0.z, r0.y -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r1.w -mov.f32f32 r1.w, c29.w -mov.f32f32 r3.z, c28.y -mad.f32 r0.z, r0.z, r5.y, c28.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r0.y, r0.y -add.f r1.w, r1.w, r3.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, c28.y -log2 r4.x, r4.x -(ss)mul.f r4.x, r5.w, r4.x -mul.f r2.y, r2.y, r7.z -mov.f32f32 r0.z, r0.z -add.f r1.w, r1.w, (neg)r3.z -mov.f32f32 r3.z, r4.x -mad.f32 r2.y, r2.w, r8.x, r2.y +add.f r8.y, r8.z, (neg)r8.y +cmps.f.ne r7.x, r7.x, c28.y +(ss)max.f r2.z, r2.z, c28.y +mul.f r8.z, r9.z, c28.x +mul.f r8.x, r8.y, r8.x +sel.b32 r7.x, r9.w, r7.x, r10.x +mov.f32f32 r8.y, c35.y +min.f r2.z, r2.z, c29.w +add.f r9.z, c35.z, r8.z +mad.f32 r7.x, c31.x, r7.x, c31.y +add.f r8.y, r8.y, (neg)r3.z +min.f r2.z, r2.z, c33.y +mul.f r9.w, r4.w, c34.y +mul.f r7.x, r7.x, c31.z +mul.f r6.x, r8.y, r6.x +mul.f r2.z, r2.z, r5.x +add.f r5.x, r9.z, (neg)r9.w +add.f r7.x, c30.w, (neg)r7.x +max.f r6.x, r6.x, r8.x +mad.f32 r2.z, c33.y, r2.z, c28.w +mul.f r5.x, r5.x, r9.y +rcp r7.z, r7.z +add.f r8.x, c35.x, r8.z +mov.f32f32 r8.y, r9.w +add.f r4.x, r5.y, (neg)r4.x +rcp r5.y, r7.x mov.f32f32 r0.z, r0.z -mad.f32 r1.x, c3.w, r1.w, r1.x -mad.f32 r2.w, c4.w, r1.w, r3.y -mad.f32 r0.y, c2.w, r1.w, r0.y +log2 r3.y, r3.y +(ss)mul.f r2.y, r2.y, r3.y +mad.f32 r0.z, c30.z, r5.y, (neg)r0.z +(ss)add.f r3.y, r8.x, (neg)r8.y +mov.f32f32 r5.y, r4.x +mul.f r5.w, r3.x, r5.w +max.f r0.z, r0.z, r12.z +mul.f r3.y, r3.y, r7.z +exp2 r2.y, r2.y +(ss)mul.f r7.x, r2.y, c1.y +mul.f r7.z, r2.y, c1.z +mov.f32f32 r8.x, r0.z max.f r0.z, r0.z, c28.y -mul.f r1.x, r1.x, c35.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y +max.f r3.y, r5.x, r3.y +rcp r5.x, r5.y +add.f r8.y, c35.x, r8.z +mul.f r9.y, r8.x, c28.x +mul.f r9.z, r8.x, r8.w min.f r0.z, r0.z, c29.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r12.w, r0.y -mov.f32f32 r9.w, r0.z -mov.f32f32 r13.x, r1.x -mov.f32f32 r13.y, r1.w -exp2 r0.y, r3.z -(ss)mul.f r0.z, r0.y, c1.z -mul.f r1.x, r0.y, c1.y -mul.f r0.y, r0.y, c1.x -mov.f32f32 r1.w, r2.y -sam (f32)(xyz)r13.z, r9.z, s#7, t#7 -(sy)mov.f32f32 r2.y, r14.x -mov.f32f32 r2.w, r13.w -mov.f32f32 r3.y, r13.z -sam.3d (f32)(xyz)r12.w, r12.w, s#1, t#1 -(sy)(ss)mad.f32 r3.z, r13.y, r5.x, r6.w -sel.b32 r2.y, r2.y, r2.z, r3.x -sel.b32 r2.w, r2.w, r2.z, r4.w -sel.b32 r2.z, r3.y, r2.z, r5.z -mov.f32f32 r3.x, r3.z -mul.f r3.y, r2.y, c10.z -mul.f r3.z, r2.w, c10.y -mul.f r4.x, r2.z, c10.x -mul.f r2.y, r0.x, r2.y -mul.f r3.y, r4.y, r3.y -mul.f r3.z, r7.x, r3.z -mul.f r4.x, r8.y, r4.x -mul.f r2.w, r0.x, r2.w -add.f r2.y, r2.y, r3.y -mul.f r0.x, r0.x, r2.z -mul.f r2.z, r3.w, r3.x -add.f r2.w, r2.w, r3.z -mul.f r2.y, r6.x, r2.y -add.f r0.x, r0.x, r4.x -mad.f32 r3.x, r13.x, r5.x, r8.z -mul.f r2.w, r6.x, r2.w -add.f r2.y, r2.y, r7.w -mul.f r0.x, r6.x, r0.x -mov.f32f32 r3.x, r3.x -mad.f32 r1.y, r12.w, r5.x, r1.y -mul.f r2.y, r9.y, r2.y -add.f r2.w, r2.w, r8.w -add.f r0.x, r0.x, r7.y -mul.f r3.x, r3.w, r3.x -add.f r2.y, r2.z, r2.y -mov.f32f32 r0.z, r0.z -mul.f r2.z, r6.z, r2.w -mul.f r0.x, r9.x, r0.x -mov.f32f32 r1.y, r1.y -mul.f r0.z, r0.z, c36.y -add.f r2.z, r3.x, r2.z -mov.f32f32 r1.x, r1.x -mul.f r1.y, r3.w, r1.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mul.f r1.x, r1.x, c36.y -mad.f32 r1.w, r2.x, r12.z, r1.w -add.f r0.z, r2.y, r0.z -add.f r0.x, r1.y, r0.x -mov.f32f32 r1.x, r1.x -mul.f r0.y, r0.y, c36.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.w +exp2 r4.y, r4.y +(ss)add.f r9.w, c39.y, (neg)r4.y +max.f r9.y, r9.y, c28.y +mul.f r8.w, r8.x, r8.w +add.f r10.x, c39.y, (neg)r0.z +mul.f r9.w, r9.w, c34.x +min.f r9.y, r9.y, c34.z +(ss)mul.f r4.y, r4.y, c28.w +mul.f r6.z, r10.x, r6.z +mul.f r0.z, r0.z, c29.w +mov.f32f32 r10.x, r9.y +mul.f r9.y, r3.x, r9.y +add.f r4.y, r4.y, r9.w +add.f r0.z, r0.z, r6.z +mul.f r6.z, r0.x, r10.x +mul.f r9.y, r9.y, c34.z +mov.f32f32 r9.w, r2.w +mul.f r0.z, r2.z, r0.z +mul.f r2.z, r6.z, c34.z +bary.f r6.z, 16, r1.x +mad.f32 r9.y, c28.x, r9.w, (neg)r9.y +bary.f r9.w, 17, r1.x +rcp r10.x, r4.y +(ss)mul.f r9.z, r9.z, r10.x +mov.f32f32 r10.x, r6.z +mov.f32f32 r10.y, r0.z +min.f r3.y, r6.x, r3.y +add.f r6.x, c29.w, (neg)r9.z +mad.f32 r2.z, c28.x, r10.x, (neg)r2.z +rcp r9.z, r9.w +(ss)mad.f32 r9.y, r9.y, r9.z, c28.x +add.f r10.x, c39.y, (neg)r10.y +mul.f r10.z, r1.w, c34.y +mad.f32 r2.z, r2.z, r9.z, c28.x +max.f r9.y, r9.y, c28.y (rpt1)nop -mul.f r0.z, r0.w, r0.z -add.f r1.x, r2.z, r1.x -mov.f32f32 r0.y, r0.y -add.f r1.y, r1.y, c29.w -add.f r0.z, r0.z, r11.z -mov.f32f32 r1.x, r1.x -add.f r0.x, r0.x, r0.y -mov.f32f32 r0.y, r1.y -mul.f r0.z, r0.z, r4.z -mul.f r1.x, r0.w, r1.x -mov.f32f32 r0.x, r0.x +max.f r2.z, r2.z, c28.y +min.f r9.y, r9.y, c29.w +max.f r6.x, c28.y, r6.x +add.f r8.y, r8.y, (neg)r10.z +min.f r9.z, r2.z, c29.w +add.f r2.z, c39.y, (neg)r10.y +mov.f32f32 r10.w, r6.x +add.f r11.x, c39.y, (neg)r10.y +mul.f r5.x, r8.y, r5.x +rcp r4.x, r4.x +nop +(ss)rcp r4.y, r4.y +add.f r8.y, c39.y, (neg)r10.w +sam (f32)(xyz)r11.y, r9.y, s#7, t#7 +(sy)(ss)add.f r9.y, r11.z, r11.w +add.f r8.z, c35.z, r8.z +add.f r9.z, c39.y, (neg)r10.w +mul.f r8.y, r8.y, c9.z +add.f r9.y, r9.y, c34.y +add.f r8.z, r8.z, (neg)r10.z +mul.f r9.z, r9.z, c9.y +add.f r10.z, c39.y, (neg)r10.w +cmps.f.lt r9.y, r9.y, r11.y +(ss)mul.f r4.x, r8.z, r4.x +mul.f r4.y, r8.w, r4.y +mul.f r8.x, r8.x, c33.z +cov.u32f32 r8.z, r9.y +max.f r4.x, r4.x, r5.x +mul.f r5.x, r10.z, c9.x +add.f r4.y, c29.w, (neg)r4.y +cmps.f.ne r8.z, r8.z, c28.y +mad.f32 r2.w, c28.x, r2.w, r3.x +rcp r8.w, r9.w +min.f r3.y, r4.x, r3.y +max.f r4.x, c28.y, r4.y +max.f r4.y, r8.x, c28.y +(ss)mad.f32 r2.w, r2.w, r8.w, c28.x +mov.f32f32 r8.x, r3.y +mov.f32f32 r9.y, r4.x +min.f r4.y, r4.y, c29.w +max.f r2.w, r2.w, c28.y +mul.f r5.y, r5.y, r8.x +mul.f r1.w, (neg)r1.w, c34.y +(ss)add.f r9.w, c39.y, (neg)r9.y +min.f r12.x, r2.w, c29.w +mad.f32 r2.w, c28.x, r6.z, r0.x +add.f r1.w, r1.w, r5.y +absneg.f r5.y, (neg)c16.x +add.f r6.z, c39.y, (neg)r9.y +mad.f32 r2.w, r2.w, r8.w, c28.x +add.f r8.w, c39.y, (neg)r9.y +mul.f r10.z, r5.y, c28.x +mov.f32f32 r12.z, c28.y +max.f r2.w, r2.w, c28.y +mul.f r2.x, r2.x, r8.x +mul.f r3.y, r7.y, r3.y +mul.f r4.w, (neg)r4.w, c34.y +min.f r12.y, r2.w, c29.w +add.f r1.w, r1.w, (neg)r10.z +sel.b32 r2.w, r12.z, r4.z, r4.y +add.f r2.x, r3.z, r2.x +add.f r3.y, r4.w, r3.y +(rpt1)nop +sam (f32)(xyz)r12.x, r12.x, s#7, t#7 +(sy)sel.b32 r4.y, r12.z, r8.z, r11.w +sel.b32 r4.z, r12.y, r8.z, r11.z +sel.b32 r4.w, r12.x, r8.z, r11.y +mul.f r7.y, r1.w, c3.x +mul.f r8.x, r4.y, c10.z +mul.f r8.z, r4.z, c10.y +mul.f r10.z, r4.w, c10.x +mul.f r4.y, r9.y, r4.y +mul.f r8.x, r9.w, r8.x +mul.f r6.z, r6.z, r8.z +mul.f r8.z, r8.w, r10.z +mul.f r4.z, r9.y, r4.z +add.f r4.y, r4.y, r8.x +mul.f r4.x, r4.x, r4.w +add.f r2.x, r2.x, (neg)r3.z +mul.f r3.z, r1.w, c4.x +mul.f r4.y, r10.w, r4.y +add.f r4.z, r4.z, r6.z +add.f r4.x, r4.x, r8.z +mad.f32 r4.w, c3.y, r2.x, r7.y +add.f r4.y, r4.y, r8.y +mul.f r4.z, r10.w, r4.z +mul.f r4.x, r6.x, r4.x +mul.f r5.y, r5.y, c28.x +mul.f r4.y, r10.x, r4.y +add.f r4.z, r4.z, r9.z +add.f r4.x, r4.x, r5.x +add.f r3.y, r3.y, (neg)r5.y +mad.f32 r3.z, c4.y, r2.x, r3.z +mul.f r1.w, r1.w, c2.x +mul.f r5.x, r7.z, c36.y +mad.f32 r4.w, c3.z, r3.y, r4.w +mov.f32f32 r5.y, c29.w +mov.f32f32 r6.x, c28.y +mul.f r2.z, r2.z, r4.z +mul.f r4.x, r11.x, r4.x +mad.f32 r3.z, c4.z, r3.y, r3.z +add.f r4.z, r5.y, r6.x +mov.f32f32 r5.y, c28.y +mad.f32 r1.w, c2.y, r2.x, r1.w +mul.f r2.x, r7.x, c36.y +mul.f r2.y, r2.y, c1.x +add.f r4.z, r4.z, (neg)r5.y +mad.f32 r1.w, c2.z, r3.y, r1.w +(rpt1)nop +mad.f32 r3.y, c3.w, r4.z, r4.w +mad.f32 r7.z, c4.w, r4.z, r3.z +mad.f32 r7.x, c2.w, r4.z, r1.w +mul.f r1.w, r2.y, c36.y +mul.f r7.y, r3.y, c35.w +mad.f32 r0.y, r0.y, r9.x, r5.w +mul.f r2.y, r6.w, c28.w +mad.f32 r0.y, r0.x, r5.z, r0.y +bary.f r3.y, 6, r1.x +bary.f r3.z, 7, r1.x +add.f r4.z, c39.y, (neg)r2.y +sam.3d (f32)(xyz)r5.y, r7.x, s#1, t#1 +add.f r0.y, r0.y, c29.w +mul.f r2.y, r2.y, c35.w +bary.f r4.w, 14, r1.x +mul.f r4.z, r4.z, c29.w mul.f r0.y, r0.y, c28.x -mov.f32f32 r0.z, r0.z -add.f r1.x, r1.x, r12.x -mul.f r0.x, r0.w, r0.x -mov.f32f32 r0.y, r0.y -(rpt2)nop +(ss)nop +sam (f32)(w)r6.z, r3.y, s#0, t#0 +(sy)(ss)cmps.f.lt r3.y, r7.y, c36.z +bary.f r3.z, 13, r1.x +add.f r2.y, r2.y, r4.z mul.f r0.y, r1.z, r0.y -mul.f r0.w, r1.x, r4.z -add.f r0.x, r0.x, r12.y +cov.u32f32 r1.z, r3.y +rcp r3.y, r4.w +bary.f (ei)r1.x, 12, r1.x +mul.f r1.y, c8.x, r2.y +mov.f32f32 r2.y, r0.y +cmps.f.ne r1.z, r1.z, c28.y nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.x, r0.x, r4.z +mov.f32f32 r4.z, r1.y +mul.f r0.x, r1.y, r0.x +mov.f32f32 r1.y, c28.y +nop +mul.f r3.x, r4.z, r3.x +mad.f32 r0.x, c28.x, r1.x, (neg)r0.x +mad.f32 r1.x, c28.x, r3.z, (neg)r3.x +sel.b32 r1.y, r1.y, r1.z, r7.y +(rpt1)nop +(ss)mad.f32 r1.x, r1.x, r3.y, c28.x +mad.f32 r0.x, r0.x, r3.y, c28.x +(rpt1)nop +max.f r1.x, r1.x, c28.y +max.f r0.x, r0.x, c28.y +(rpt1)nop +min.f r3.y, r1.x, c29.w +min.f r3.x, r0.x, c29.w +(rpt5)nop +sam (f32)(xyzw)r6.z, r3.x, s#6, t#6 +(sy)add.f r0.x, c29.w, (neg)r7.y +mul.f r1.x, r7.x, r7.y +mul.f r1.z, r6.w, r7.y +(ss)mul.f r3.x, r6.z, r7.y +mad.f32 r1.x, r5.w, r0.x, r1.x +mad.f32 r1.z, r5.z, r0.x, r1.z +mad.f32 r0.x, r5.y, r0.x, r3.x nop -mad.f32 r0.z, r0.y, r4.z, r0.z -mad.f32 r0.w, r0.y, r4.z, r0.w -mov.f32f32 r0.x, r0.x +mul.f r1.x, r10.y, r1.x +mul.f r1.z, r10.y, r1.z +mul.f r0.x, r0.z, r0.x nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, r0.y, r4.z, r0.x +add.f r0.z, r1.x, r4.y +add.f r1.x, r1.z, r2.z +add.f r0.x, r0.x, r4.x +nop +add.f r0.z, r0.z, r5.x +add.f r1.x, r1.x, r2.x +add.f r0.x, r0.x, r1.w nop -mov.f32f32 r10.w, r0.z -mov.f32f32 r10.z, r0.w -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r10.y, r0.x +mul.f r0.z, r0.w, r0.z +mul.f r1.x, r0.w, r1.x +mul.f r0.x, r0.w, r0.x +nop +add.f r0.z, r0.z, r3.w +add.f r0.w, r1.x, r6.y +add.f r0.x, r0.x, r7.w +nop +mul.f r0.z, r0.z, r1.y +mul.f r0.w, r0.w, r1.y +mad.f32 r2.z, r2.y, r1.y, r0.z +mad.f32 r2.y, r2.y, r1.y, r0.w +mul.f r0.x, r0.x, r1.y +nop +mad.f32 r2.x, r0.y, r1.y, r0.x end nop nop -; FRAG: outputs: r10.y (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.x (5:9,cm=f,il=8,b=1) r4.y (5:10,cm=f,il=12,b=1) r12.y (5:11,cm=f,il=16,b=1) r4.w (5:12,cm=f,il=20,b=1) r3.z (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) -; FRAG: 1019 instructions, 0 half, 16 full -; pos (bary): r1.x -; color: r10.y -; fragcoord: r0.x +nop +; FRAG: outputs: r2.x (1:0) +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.x (5:9,cm=f,il=8,b=1) r2.w (5:10,cm=f,il=12,b=1) r8.x (5:11,cm=f,il=16,b=1) r2.x (5:12,cm=f,il=20,b=1) r2.w (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) +; FRAG: 701 instructions, 0 half, 16 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm index c62562e..a754b2c 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm @@ -1,180 +1,116 @@ ; options: -; VERT: new compiler +; VERT: TGSI compiler @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@out(r2.x) out0 -@out(r2.y) out1 -@out(r2.z) out2 -@out(r2.w) out3 -@out(r4.y) out4 -@out(r4.z) out5 -@out(r4.w) out6 -@out(r5.x) out7 -@out(r6.y) out8 -@out(r6.z) out9 -@out(r6.w) out10 -@out(r7.x) out11 -@out(r7.y) out12 -@out(r7.z) out13 -@out(r7.w) out14 -@out(r8.x) out15 -@out(r5.y) out16 -@out(r5.z) out17 -@out(r5.w) out18 -@out(r6.x) out19 -@out(r3.y) out20 -@out(r3.z) out21 -@out(r3.w) out22 -@out(r4.x) out23 -@out(r8.y) out24 -@out(r8.z) out25 -@out(r8.w) out26 -@out(r9.x) out27 -(sy)(ss)absneg.f r1.y, (neg)c13.x -mul.f r1.z, c4.w, r0.x -mul.f r1.w, c4.y, r0.x -mul.f r2.x, c0.w, r0.x -mul.f r2.y, c0.y, r0.x -mul.f r2.z, c0.x, r0.x -mad.f32 r1.z, c5.w, r0.y, r1.z -sin r2.w, r1.y -(ss)mov.f32f32 r2.w, r2.w -mad.f32 r1.z, c6.w, r0.z, r1.z -mad.f32 r1.w, c5.y, r0.y, r1.w -mad.f32 r2.x, c1.w, r0.y, r2.x -mul.f r3.x, r0.x, r2.w -add.f r1.z, r1.z, c7.w -mad.f32 r1.w, c6.y, r0.z, r1.w -mad.f32 r2.x, c2.w, r0.z, r2.x -mov.f32f32 r3.x, r3.x -(ss)cos r1.y, r1.y -mov.f32f32 r1.z, r1.z -(ss)mad.f32 r3.x, r0.z, r1.y, r3.x -add.f r1.w, r1.w, c7.y -add.f r2.x, r2.x, c3.w -mad.f32 r2.y, c1.y, r0.y, r2.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mul.f r3.x, r3.x, c12.x -mov.f32f32 r3.z, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r4.z, r3.x -mad.f32 r2.x, c2.y, r0.z, r2.y -mov.f32f32 r3.y, r1.z -mov.f32f32 r5.w, r1.w -mad.f32 r1.z, c1.x, r0.y, r2.z -add.f r1.w, r2.x, c3.y -mad.f32 r1.z, c2.x, r0.z, r1.z -mov.f32f32 r2.x, r2.w -mul.f r2.y, r0.z, r2.w -mov.f32f32 r1.w, r1.w -add.f r1.z, r1.z, c3.x -mov.f32f32 r2.x, r2.x -mad.f32 r2.y, r0.x, r1.y, (neg)r2.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -mul.f r2.y, r2.y, c12.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r6.z, r2.x -mov.f32f32 r4.y, r2.y -mov.f32f32 r5.z, r1.w -mov.f32f32 r1.z, r1.z -mul.f r1.w, c4.x, r0.x -mad.f32 r2.x, c20.z, r0.w, c20.w -mul.f r2.y, c8.y, r0.x -mov.f32f32 r5.y, r1.z -mad.f32 r1.z, c5.x, r0.y, r1.w -mul.f r1.w, c14.x, r2.x -mad.f32 r1.z, c6.x, r0.z, r1.z -mad.f32 r2.x, c9.y, r0.y, r2.y -mul.f r2.y, c8.x, r0.x -(ss)mov.f32f32 r1.y, r1.y -add.f r1.z, r1.z, c7.x -mov.f32f32 r1.w, r1.w +; in3 unused +@in(r1.z) in4 +@in(r1.w) in5 +; in6 unused +; in7 unused +@out(r7.w) out0 +@out(r8.x) out1 +@out(r8.y) out2 +@out(r8.z) out3 +@out(r4.w) out4 +@out(r5.x) out5 +@out(r5.y) out6 +@out(r5.z) out7 +@out(r3.w) out8 +@out(r4.x) out9 +@out(r4.y) out10 +@out(r4.z) out11 +@out(r2.x) out12 +@out(r2.y) out13 +@out(r2.z) out14 +@out(r2.w) out15 +@out(r6.w) out16 +@out(r7.x) out17 +@out(r7.y) out18 +@out(r7.z) out19 +@out(r5.w) out20 +@out(r6.x) out21 +@out(r6.y) out22 +@out(r6.z) out23 +@out(r0.z) out24 +@out(r0.w) out25 +@out(r1.x) out26 +@out(r1.y) out27 +@const(c20.x) 0x41700000, 0x00000000, 0x3f5e9e1b, 0x3e19999a +(sy)(ss)mov.f32f32 r2.x, c13.x +mul.f r2.y, c4.w, r0.x +mul.f r2.z, c4.y, r0.x +mul.f r2.w, c4.x, r0.x +mul.f r3.x, c0.w, r0.x +mul.f r3.y, c0.y, r0.x +mul.f r3.z, c0.x, r0.x +sin r4.x, (neg)r2.x +(ss)mul.f r4.w, r0.x, r4.x +cos r3.w, (neg)r2.x +(ss)mul.f r2.x, r0.z, r4.x +(ss)mad.f32 r4.w, r0.z, r3.w, r4.w +mad.f32 r2.x, r0.x, r3.w, (neg)r2.x +(rpt1)nop +mul.f r5.x, r4.w, c12.x +mul.f r4.w, r2.x, c12.x +mad.f32 r2.x, c5.w, r0.y, r2.y +mad.f32 r2.y, c5.y, r0.y, r2.z +mad.f32 r2.x, c6.w, r0.z, r2.x +mad.f32 r2.y, c6.y, r0.z, r2.y +mad.f32 r2.z, c5.x, r0.y, r2.w +mad.f32 r2.w, c1.w, r0.y, r3.x +add.f r6.x, r2.x, c7.w +add.f r5.w, r2.y, c7.y +mad.f32 r2.x, c6.x, r0.z, r2.z +mad.f32 r2.y, c2.w, r0.z, r2.w +mad.f32 r2.z, c1.y, r0.y, r3.y +mad.f32 r2.w, c1.x, r0.y, r3.z +add.f r7.z, r2.x, c7.x +add.f r7.y, r2.y, c3.w +mad.f32 r2.x, c2.y, r0.z, r2.z +mad.f32 r2.y, c2.x, r0.z, r2.w +mul.f r2.z, c8.y, r0.x +mul.f r2.w, c8.x, r0.x +add.f r7.x, r2.x, c3.y +add.f r6.w, r2.y, c3.x +mad.f32 r2.x, c9.y, r0.y, r2.z +mad.f32 r2.y, c9.x, r0.y, r2.w mad.f32 r2.x, c10.y, r0.z, r2.x -mad.f32 r2.y, c9.x, r0.y, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.x, r2.x, c11.y mad.f32 r2.y, c10.x, r0.z, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -add.f r2.y, r2.y, c11.x -mov.f32f32 r6.x, r1.z -mov.f32f32 r7.w, r1.w -mov.f32f32 r1.z, r2.x -mov.f32f32 r1.w, r2.y -mov.f32f32 r1.y, r1.y -mul.f r2.x, c16.w, r0.x -mov.f32f32 r7.x, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.y, r1.y -(rpt1)nop -mov.f32f32 r6.w, r1.z -mov.f32f32 r6.y, r1.y -mad.f32 r1.y, c17.w, r0.y, r2.x -mul.f r1.z, c16.z, r0.x -mad.f32 r1.y, c18.w, r0.z, r1.y -mad.f32 r1.z, c17.z, r0.y, r1.z -mul.f r1.w, c16.y, r0.x -mul.f r2.x, c16.x, r0.x -add.f r1.y, r1.y, c19.w -mad.f32 r1.z, c18.z, r0.z, r1.z -mad.f32 r1.w, c17.y, r0.y, r1.w -mad.f32 r0.y, c17.x, r0.y, r2.x -mov.f32f32 r2.w, r1.y -add.f r1.y, r1.z, c19.z -mad.f32 r1.z, c18.y, r0.z, r1.w +mul.f r2.z, c16.w, r0.x +mul.f r2.w, c16.z, r0.x +add.f r4.z, r2.x, c11.y +add.f r4.y, r2.y, c11.x +mad.f32 r2.x, c17.w, r0.y, r2.z +mad.f32 r2.y, c17.z, r0.y, r2.w +mad.f32 r2.x, c18.w, r0.z, r2.x +mad.f32 r2.y, c18.z, r0.z, r2.y +mul.f r2.z, c16.y, r0.x +mul.f r2.w, c16.x, r0.x +add.f r8.z, r2.x, c19.w +add.f r8.y, r2.y, c19.z +mad.f32 r2.x, c17.y, r0.y, r2.z +mad.f32 r0.y, c17.x, r0.y, r2.w +mad.f32 r2.x, c18.y, r0.z, r2.x mad.f32 r0.y, c18.x, r0.z, r0.y -mov.f32f32 r1.w, c20.x -mov.f32f32 r2.z, r1.y -add.f r1.y, r1.z, c19.y -add.f r0.y, r0.y, c19.x -mov.f32f32 r1.z, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r1.y -mov.f32f32 r2.x, r0.y -mov.f32f32 r4.x, r1.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, c20.y -mov.f32f32 r1.y, c15.x -mov.f32f32 r1.z, (0.000000) -mov.f32f32 r3.w, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r1.y -mov.f32f32 r9.x, r1.z +mov.f32f32 r6.z, c20.x +mad.f32 r2.y, c20.z, r1.z, c20.w +add.f r8.x, r2.x, c19.y +add.f r7.w, r0.y, c19.x +mov.f32f32 r0.w, r6.z +mul.f r2.z, c14.x, r2.y mov.f32f32 r1.y, (0.000000) -mov.f32f32 r5.x, r0.x -mov.f32f32 r4.w, r0.y -mov.f32f32 r8.z, r1.w -mov.f32f32 r8.w, r1.y -mov.f32f32 r0.x, r0.z -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r8.y, r0.x -mov.f32f32 r8.x, r0.y -mov.f32f32 r7.z, r0.z -mov.f32f32 r7.y, r0.w +mov.f32f32 r1.x, (0.000000) +mov.f32f32 r6.y, r0.x +mov.f32f32 r2.w, r1.w +mov.f32f32 r2.y, r1.w +mov.f32f32 r2.x, r1.z +mov.f32f32 r5.z, c20.y +mov.f32f32 r5.y, c15.x end nop -nop -nop -; VERT: outputs: r2.x (0:0) r4.y (5:9) r6.y (5:10) r7.y (5:11) r5.y (5:12) r3.y (5:13) r8.y (5:14) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0) -; VERT: 138 instructions, 0 half, 10 full -; pos: r2.x +; VERT: outputs: r7.w (0:0) r4.w (5:9) r3.w (5:10) r2.x (5:11) r6.w (5:12) r5.w (5:13) r0.z (5:14) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.z (0:0,cm=3,il=12,b=0) +; VERT: 72 instructions, 0 half, 9 full +; pos: r7.w diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm index dc36d85..e8e697e 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm @@ -2,49 +2,44 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x -bary.f r1.x, 5, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.y, r0.z -mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.x -mov.f32f32 r0.z, r0.x -mov.f32f32 r2.x, r1.x -(rpt3)nop -sam (f32)(xyz)r0.w, r1.y, s#1, t#1 -(sy)add.f r0.x, c1.y, (neg)r1.y -sam (f32)(xyzw)r2.y, r0.y, s#0, t#0 -(ss)add.f r0.y, c1.y, (neg)r1.x -add.f r0.z, c1.y, (neg)r0.w -(sy)mul.f r3.w, c0.w, r3.x -mul.f r0.x, r0.x, r2.w -mul.f r1.y, r1.y, c0.z -mul.f r0.y, r0.y, r2.z -mul.f r0.z, r0.z, r2.y -mul.f r1.x, r1.x, c0.y -add.f r0.x, r1.y, r0.x -sam (f32)(w)r1.y, r1.w, s#2, t#2 -mul.f r0.w, r0.w, c0.x +bary.f r0.w, 1, r0.x +bary.f r1.x, 4, r0.x +bary.f (ei)r1.y, 5, r0.x +mov.f32f32 r0.x, r0.z +mov.f32f32 r0.y, r0.w (rpt1)nop -(sy)mul.f r3.z, r0.x, r2.x -add.f r0.x, r1.x, r0.y -add.f r0.y, r0.w, r0.z +sam (f32)(xyz)r1.z, r0.z, s#1, t#1 +(sy)(ss)add.f r0.z, c1.y, (neg)r2.x +mul.f r0.w, r2.x, c0.z +mul.f r2.x, r1.w, c0.y +add.f r1.w, c1.y, (neg)r1.w +sam (f32)(xyzw)r2.y, r0.x, s#0, t#0 +(sy)(ss)mul.f r0.x, r0.z, r2.w +add.f r0.y, c1.y, (neg)r1.z +mul.f r0.z, r1.z, c0.x +mul.f r2.z, r1.w, r2.z +add.f r0.x, r0.w, r0.x +sam (f32)(w)r3.y, r1.x, s#2, t#2 +mul.f r0.y, r0.y, r2.y +mul.f r1.w, c0.w, r3.x +nop +(sy)mul.f r1.z, r0.x, r4.x +add.f r0.x, r2.x, r2.z +add.f r0.y, r0.z, r0.y (rpt1)nop -mul.f r3.y, r0.x, r2.x -mul.f r3.x, r0.y, r2.x +(ss)mul.f r1.y, r0.x, r4.x +mul.f r1.x, r0.y, r4.x end nop nop nop -; FRAG: outputs: r3.x (1:0) +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:1,cm=f,il=12,b=1) -; FRAG: 38 instructions, 0 half, 4 full -; pos (bary): r0.x -; color: r3.x +; FRAG: 31 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm index 948d48e..270bdcf 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in3 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r4.w) in3 @in(r1.x) in4 @in(r1.y) in5 @in(r1.z) in6 @@ -12,72 +12,56 @@ @in(r2.y) in9 @in(r2.z) in10 @in(r2.w) in11 -@out(r3.w) out0 -@out(r4.x) out1 -@out(r4.y) out2 -@out(r4.z) out3 -@out(r4.w) out4 -@out(r5.x) out5 -@out(r5.y) out6 -@out(r5.z) out7 -@out(r2.w) out8 -@out(r3.x) out9 -@out(r3.y) out10 -@out(r3.z) out11 -@out(r0.w) out12 -@out(r1.x) out13 -@out(r1.y) out14 -@out(r1.z) out15 -(sy)(ss)mul.f r3.x, c3.x, r0.x -mul.f r3.y, c2.x, r0.x -mad.f32 r3.x, c3.y, r0.y, r3.x -mad.f32 r3.y, c2.y, r0.y, r3.y -mul.f r3.z, c1.x, r0.x -mul.f r3.w, c0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r3.x, c3.z, r0.z, r3.x -mad.f32 r3.y, c2.z, r0.z, r3.y -mad.f32 r3.z, c1.y, r0.y, r3.z -mad.f32 r0.y, c0.y, r0.y, r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r4.z, c3.w, r0.w, r3.x -mad.f32 r4.y, c2.w, r0.w, r3.y -mov.f32f32 r3.x, r3.z -mov.f32f32 r0.y, r0.y -mad.f32 r3.x, c1.z, r0.z, r3.x -mad.f32 r0.y, c0.z, r0.z, r0.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -mad.f32 r4.x, c1.w, r0.w, r3.x -mad.f32 r3.w, c0.w, r0.w, r0.y -max.f r0.y, r1.w, c5.x -max.f r0.w, r1.z, c5.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -min.f r5.z, r0.y, c5.y -min.f r5.y, r0.w, c5.y -max.f r0.y, r1.y, c5.x -max.f r0.w, r1.x, c5.x -(rpt1)nop -min.f r5.x, r0.y, c5.y -min.f r4.w, r0.w, c5.y -mad.f32 r1.z, c4.x, r0.z, c4.y -mad.f32 r1.y, c4.x, r0.z, c4.y -mad.f32 r1.x, c4.x, r0.z, c4.y -mad.f32 r0.w, c4.x, r0.x, c4.y -mov.f32f32 r3.z, r2.w -mov.f32f32 r3.y, r2.z -mov.f32f32 r3.x, r2.y -mov.f32f32 r2.w, r2.x +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@out(r2.x) out8 +@out(r2.y) out9 +@out(r2.z) out10 +@out(r2.w) out11 +@out(r3.x) out12 +@out(r3.y) out13 +@out(r3.z) out14 +@out(r3.w) out15 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.x, c3.x, r4.x +mul.f r0.y, c2.x, r4.x +mad.f32 r0.x, c3.y, r4.y, r0.x +mad.f32 r0.y, c2.y, r4.y, r0.y +mad.f32 r0.x, c3.z, r4.z, r0.x +mad.f32 r0.y, c2.z, r4.z, r0.y +mad.f32 r0.w, c3.w, r4.w, r0.x +mad.f32 r0.z, c2.w, r4.w, r0.y +mul.f r0.x, c1.x, r4.x +mul.f r0.y, c0.x, r4.x +mad.f32 r0.x, c1.y, r4.y, r0.x +mad.f32 r0.y, c0.y, r4.y, r0.y +mad.f32 r0.x, c1.z, r4.z, r0.x +mad.f32 r3.x, c0.z, r4.z, r0.y +mad.f32 r0.y, c1.w, r4.w, r0.x +mad.f32 r0.x, c0.w, r4.w, r3.x +max.f r1.w, r1.w, c5.x +max.f r1.z, r1.z, c5.x +max.f r1.y, r1.y, c5.x +max.f r1.x, r1.x, c5.x +min.f r1.w, r1.w, c5.y +min.f r1.z, r1.z, c5.y +min.f r1.y, r1.y, c5.y +min.f r1.x, r1.x, c5.y +mad.f32 r3.w, c4.x, r4.z, c4.y +mad.f32 r3.z, c4.x, r4.z, c4.y +mad.f32 r3.y, c4.x, r4.z, c4.y +mad.f32 r3.x, c4.x, r4.x, c4.y end nop nop -; VERT: outputs: r3.w (0:0) r4.w (1:0) r2.w (5:0) r0.w (5:1) -; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) -; VERT: 47 instructions, 0 half, 6 full -; pos: r3.w +nop +; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1) +; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 29 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm index a7cfae4..22ca830 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm @@ -2,37 +2,28 @@ ; FRAG: new compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r1.y) out0 -@out(r1.z) out1 -@out(r1.w) out2 -@out(r2.x) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, (0.000000) -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -(rpt5)nop -sam (f32)(xyzw)r0.z, r0.z, s#0, t#0 -(sy)cmps.f.lt r0.x, r1.y, c0.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r1.w, r1.x -mov.f32f32 r1.z, r0.w -cov.u32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.z -cmps.f.ne p0.x, r0.x, r0.y +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, (0.000000) +(rpt4)nop +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)cmps.f.lt r0.y, r1.w, c0.x +(rpt2)nop +cov.u32f32 r0.y, r0.y +(rpt2)nop +cmps.f.ne p0.x, r0.y, r0.x (rpt5)nop kill p0.x end nop nop -; FRAG: outputs: r1.y (1:0) +nop +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 34 instructions, 0 half, 3 full -; pos (bary): r0.x -; color: r1.y +; FRAG: 26 instructions, 0 half, 2 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm index 08421de..f362ccd 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm @@ -3,170 +3,134 @@ @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@out(r3.x) out0 -@out(r3.y) out1 -@out(r3.z) out2 -@out(r3.w) out3 -@out(r2.x) out4 -@out(r2.y) out5 -@out(r2.z) out6 -@out(r2.w) out7 -(sy)(ss)floor.f r1.y, c11.z +@in(r1.x) in4 +@in(r1.y) in5 +@out(r0.x) out0 +@out(r0.y) out1 +@out(r0.z) out2 +@out(r0.w) out3 +@out(r1.x) out4 +@out(r1.y) out5 +@out(r1.z) out6 +@out(r1.w) out7 +@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000 +@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000 +@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a +@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)floor.f r0.w, c11.z floor.f r1.z, c11.x absneg.f r1.w, (abs)c14.x absneg.f r2.x, (abs)c14.y -add.f r1.y, c11.z, (neg)r1.y +add.f r0.w, c11.z, (neg)r0.w add.f r1.z, c11.x, (neg)r1.z mul.f r2.y, r0.x, r0.z add.f r1.w, r1.w, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r0.y, c17.x -mov.f32f32 r1.w, r1.w -max.f r1.y, r1.y, c15.y +max.f r0.w, r0.w, c15.y max.f r1.z, r1.z, c15.y -mul.f r2.x, r2.y, r2.x -mul.f r2.y, c13.x, r1.w -min.f r1.y, r1.y, c19.y +mul.f r2.x, r0.y, c17.x +mul.f r2.z, c13.x, r1.w +min.f r0.w, r0.w, c19.y min.f r1.z, r1.z, c19.y -mul.f r2.z, c8.z, r0.x -mov.f32f32 r2.x, r2.x -max.f r1.y, r1.y, c15.x +mul.f r2.w, c8.z, r0.x +mov.f32f32 r1.w, r1.w +max.f r0.w, r0.w, c15.x max.f r1.z, r1.z, c15.x -mad.f32 r2.z, c9.z, r0.y, r2.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, c10.z, r0.z, r2.z -mul.f r2.w, c8.x, r0.x -mul.f r1.y, c13.x, r1.y -mad.f32 r2.w, c9.x, r0.y, r2.w -add.f r2.z, r2.z, c11.z -mad.f32 r2.w, c10.x, r0.z, r2.w -mov.f32f32 r1.y, r1.y -mad.f32 r2.y, c15.w, r2.y, r2.z -mov.f32f32 r2.x, r2.x -add.f r2.w, r2.w, c11.x -mad.f32 r1.y, c15.z, r1.y, c11.x -mad.f32 r1.z, c13.x, r1.z, r2.w -mov.f32f32 r2.y, r2.y -max.f r2.x, r2.x, c15.y -mov.f32f32 r1.y, r1.y +mul.f r3.x, c8.x, r0.x +mad.f32 r2.w, c9.z, r0.y, r2.w +mul.f r0.w, c13.x, r0.w +mad.f32 r3.x, c9.x, r0.y, r3.x +mad.f32 r2.w, c10.z, r0.z, r2.w +mad.f32 r3.x, c10.x, r0.z, r3.x +mad.f32 r0.w, c15.z, r0.w, c11.x +max.f r1.w, r1.w, c17.z +mul.f r2.x, r2.y, r2.x +add.f r2.y, r3.x, c11.x +add.f r0.w, r0.w, c16.x +mad.f32 r1.z, c13.x, r1.z, r2.y +add.f r2.w, r2.w, c11.z +min.f r1.w, r1.w, c17.w +floor.f r3.x, r0.w add.f r1.z, r1.z, c16.x -mov.f32f32 r2.y, r2.y -min.f r2.x, r2.x, c19.y -mov.f32f32 r1.y, r1.y +mad.f32 r2.z, c15.w, r2.z, r2.w +mov.f32f32 r3.y, r1.w +add.f r0.w, r0.w, (neg)r3.x floor.f r3.x, r1.z -add.f r2.y, r2.y, c16.x -min.f r2.x, r2.x, c17.y -add.f r1.y, r1.y, c16.x +add.f r2.z, r2.z, c16.x +max.f r2.x, r2.x, c15.y +mad.f32 r0.w, c16.y, r0.w, c16.z add.f r1.z, r1.z, (neg)r3.x -floor.f r3.x, r2.y -mov.f32f32 r2.x, r2.x -floor.f r3.y, r1.y +floor.f r3.x, r2.z +min.f r2.x, r2.x, c19.y +absneg.f r0.w, (abs)r0.w mad.f32 r1.z, c16.y, r1.z, c16.z -add.f r2.y, r2.y, (neg)r3.x -mul.f r3.x, r0.y, c18.x -add.f r1.y, r1.y, (neg)r3.y +add.f r2.z, r2.z, (neg)r3.x +min.f r2.x, r2.x, c17.y +mul.f r3.x, c16.y, r0.w absneg.f r1.z, (abs)r1.z -mad.f32 r2.y, c16.y, r2.y, c16.z -mov.f32f32 r3.x, r3.x -mad.f32 r1.y, c16.y, r1.y, c16.z -mul.f r3.y, c16.y, r1.z -absneg.f r2.y, (abs)r2.y +mad.f32 r2.z, c16.y, r2.z, c16.z +mul.f r0.w, r0.w, r0.w +add.f r3.x, c16.w, (neg)r3.x +mul.f r3.z, c16.y, r1.z +absneg.f r2.z, (abs)r2.z mul.f r1.z, r1.z, r1.z -absneg.f r1.y, (abs)r1.y -add.f r3.y, c16.w, (neg)r3.y -mul.f r3.z, c16.y, r2.y -mul.f r2.y, r2.y, r2.y -mul.f r3.w, c16.y, r1.y -mul.f r1.z, r1.z, r3.y -add.f r3.y, c16.w, (neg)r3.z -mul.f r1.y, r1.y, r1.y -add.f r3.z, c16.w, (neg)r3.w -mov.f32f32 r1.z, r1.z -mul.f r2.y, r2.y, r3.y -mov.f32f32 r3.x, r3.x -mul.f r1.y, r1.y, r3.z -mul.f r1.z, r1.z, r2.x -mov.f32f32 r2.y, r2.y +mul.f r0.w, r0.w, r3.x +mul.f r3.x, r0.y, c18.x +add.f r3.z, c16.w, (neg)r3.z +mul.f r3.w, c16.y, r2.z +mul.f r2.z, r2.z, r2.z max.f r3.x, r3.x, c15.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mul.f r2.x, r2.y, r2.x -min.f r2.y, r3.x, c19.y -max.f r1.w, r1.w, c17.z -mul.f r3.x, c8.y, r0.x +mul.f r1.z, r1.z, r3.z +mov.f32f32 r3.z, r2.x +add.f r3.w, c16.w, (neg)r3.w +min.f r3.x, r3.x, c19.y +mul.f r4.x, c8.y, r0.x mul.f r0.x, c8.w, r0.x -min.f r2.y, r2.y, c15.w -mov.f32f32 r1.w, r1.w -mad.f32 r3.x, c9.y, r0.y, r3.x +mad.f32 r4.x, c9.y, r0.y, r4.x +min.f r3.x, r3.x, c15.w +mul.f r1.z, r1.z, r3.z +mul.f r2.z, r2.z, r3.w +mad.f32 r3.z, c10.y, r0.z, r4.x +mul.f r0.w, r0.w, r3.x +mov.f32f32 r3.x, r1.z +mul.f r2.x, r2.z, r2.x +add.f r2.z, r3.z, c11.y +mov.f32f32 r3.z, r0.w +mad.f32 r0.w, c14.y, r0.w, r1.z +mad.f32 r1.z, r2.x, r1.w, r2.w +mad.f32 r1.w, r2.x, r3.y, r2.y +mad.f32 r2.y, c14.x, r3.z, r3.x +mad.f32 r2.x, r2.x, r3.y, r2.z mad.f32 r0.x, c9.w, r0.y, r0.x -mov.f32f32 r0.y, r2.y -min.f r1.w, r1.w, c17.w -mad.f32 r2.y, c10.y, r0.z, r3.x -mad.f32 r0.x, c10.w, r0.z, r0.x -mul.f r0.y, r1.y, r0.y -mov.f32f32 r0.z, r1.w -add.f r1.y, r2.y, c11.y -add.f r0.x, r0.x, c11.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.w, r2.x, r0.z, r2.w -mad.f32 r1.y, r2.x, r0.z, r1.y -mad.f32 r0.z, r2.x, r0.z, r2.z -mad.f32 r2.x, c14.x, r0.y, r1.z -mad.f32 r0.y, c14.y, r0.y, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.x, r0.w -add.f r0.w, r1.w, r1.z -add.f r0.y, r0.z, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -(rpt1)nop -mul.f r0.w, c0.w, r0.z -mul.f r1.x, c0.z, r0.z -mad.f32 r0.w, c1.w, r1.y, r0.w -mad.f32 r1.x, c1.z, r1.y, r1.x -mul.f r1.z, c0.y, r0.z -mul.f r0.z, c0.x, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, c2.w, r0.y, r0.w -mad.f32 r1.x, c2.z, r0.y, r1.x -mad.f32 r1.w, c3.w, r0.x, r0.w -mad.f32 r3.x, c3.z, r0.x, r1.x -mad.f32 r1.z, c1.y, r1.y, r1.z -mad.f32 r0.z, c1.x, r1.y, r0.z -mov.f32f32 r1.y, r1.w -mov.f32f32 r1.w, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r1.y -mov.f32f32 r3.z, r1.w -mad.f32 r1.y, c2.y, r0.y, r1.z -mad.f32 r0.y, c2.x, r0.y, r0.z -mad.f32 r0.z, c3.y, r0.x, r1.y -mad.f32 r0.x, c3.x, r0.x, r0.y -mov.f32f32 r2.w, r0.w -mov.f32f32 r2.z, r1.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x +nop +add.f r0.y, r1.w, r2.y +add.f r2.y, r1.z, r0.w (rpt1)nop -mov.f32f32 r3.y, r0.y -mov.f32f32 r3.x, r0.x +mov.f32f32 r0.w, r0.y +mul.f r0.y, c0.x, r0.y +mov.f32f32 r2.z, r2.y +mad.f32 r0.x, c10.w, r0.z, r0.x +mul.f r0.z, c0.w, r0.w +mul.f r1.z, c0.z, r0.w +mad.f32 r0.z, c1.w, r2.x, r0.z +mad.f32 r1.z, c1.z, r2.x, r1.z +mad.f32 r1.w, c2.w, r2.z, r0.z +add.f r2.w, r0.x, c11.w +mad.f32 r1.z, c2.z, r2.z, r1.z +mul.f r0.x, c0.y, r0.w +mad.f32 r0.y, c1.x, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.w, r1.w +mad.f32 r0.z, c3.z, r2.w, r1.z +mad.f32 r0.x, c1.y, r2.x, r0.x +mad.f32 r0.y, c2.x, r2.y, r0.y +mad.f32 r2.x, c2.y, r2.z, r0.x +mad.f32 r0.x, c3.x, r2.w, r0.y +mad.f32 r0.y, c3.y, r2.w, r2.x end nop nop nop -; VERT: outputs: r3.x (0:0) r2.x (5:9) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0) -; VERT: 152 instructions, 0 half, 4 full -; pos: r3.x +; VERT: outputs: r0.x (0:0) r1.x (5:9) +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) +; VERT: 110 instructions, 0 half, 5 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm index 7fd0d8e..85a697c 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm @@ -2,48 +2,38 @@ ; FRAG: new compiler @in(r0.z) in2 @in(r0.w) in3 -@out(r1.x) out0 -@out(r1.y) out1 -@out(r1.z) out2 -@out(r1.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r1.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z add.f r0.y, c5.y, (neg)r0.x add.f r0.z, c5.y, (neg)r0.x add.f r0.w, c5.y, (neg)r0.x -mul.f r2.x, r0.x, c0.z +mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r2.y, r0.x, c0.y -add.f r0.y, r2.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r1.z, r0.y -add.f r0.y, r2.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end -; FRAG: outputs: r1.x (1:0) +nop +nop +nop +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full -; pos (bary): r1.x -; color: r1.x -; fragcoord: r0.x +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm index 7fd0d8e..85a697c 100644 --- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm +++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm @@ -2,48 +2,38 @@ ; FRAG: new compiler @in(r0.z) in2 @in(r0.w) in3 -@out(r1.x) out0 -@out(r1.y) out1 -@out(r1.z) out2 -@out(r1.w) out3 +@out(r2.x) out0 +@out(r2.y) out1 +@out(r2.z) out2 +@out(r2.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)add.f r0.x, r0.w, c4.y -mov.f32f32 r0.y, c0.w -absneg.f r0.w, (neg)c2.x +absneg.f r0.y, (neg)c2.x +mov.f32f32 r2.w, c0.w add.f r0.z, r0.z, c4.y (rpt2)nop rcp r0.x, r0.x (ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r1.w, r0.y -mul.f r0.y, r0.w, c2.x -nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r0.y, c2.x (rpt2)nop mul.f r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.x, r0.x (rpt2)nop mul.f r0.x, r0.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -add.f r0.y, c5.y, (neg)r0.x -mul.f r0.x, r0.x, c4.z +(ss)mov.f32f32 r0.y, r0.x +(ss)mul.f r0.x, r0.x, c4.z (rpt1)nop +add.f r0.y, c5.y, (neg)r0.y +(rpt2)nop mul.f r0.y, r0.y, c2.y (rpt2)nop add.f r0.x, r0.x, r0.y (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c4.y (rpt2)nop min.f r0.x, r0.x, c4.z @@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z add.f r0.y, c5.y, (neg)r0.x add.f r0.z, c5.y, (neg)r0.x add.f r0.w, c5.y, (neg)r0.x -mul.f r2.x, r0.x, c0.z +mul.f r1.x, r0.x, c0.z mul.f r0.y, r0.y, c1.z mul.f r0.z, r0.z, c1.y mul.f r0.w, r0.w, c1.x -mul.f r2.y, r0.x, c0.y -add.f r0.y, r2.x, r0.y +nop +add.f r2.z, r1.x, r0.y +mul.f r0.y, r0.x, c0.y mul.f r0.x, r0.x, c0.x (rpt1)nop -mov.f32f32 r1.z, r0.y -add.f r0.y, r2.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +add.f r2.y, r0.y, r0.z +add.f r2.x, r0.x, r0.w end -; FRAG: outputs: r1.x (1:0) +nop +nop +nop +; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) -; FRAG: 96 instructions, 0 half, 3 full -; pos (bary): r1.x -; color: r1.x -; fragcoord: r0.x +; FRAG: 70 instructions, 0 half, 3 full diff --git a/reference/0ad-frag-1.asm b/reference/0ad-frag-1.asm index 12fbb01..be30c1a 100644 --- a/reference/0ad-frag-1.asm +++ b/reference/0ad-frag-1.asm @@ -8,203 +8,139 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 10, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 8, r1.x +add.f r2.x, r0.x, c10.x +bary.f r1.w, 11, r1.x add.f r2.y, r0.w, c10.x -mov.f32f32 r1.z, r1.z -floor.f r2.z, r1.w +bary.f r3.y, 8, r1.x +floor.f r2.z, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y floor.f r2.w, r2.y -mov.f32f32 r3.y, r1.z -add.f r1.z, r1.w, (neg)r2.z +bary.f r3.z, 9, r1.x +add.f r3.w, r2.x, (neg)r2.z (ss)mul.f r0.y, r0.z, r0.y -add.f r0.z, r2.y, (neg)r2.w -bary.f r1.w, 11, r1.x -mov.f32f32 r1.z, r1.z +absneg.f r0.z, (neg)c7.x +add.f r4.x, r2.y, (neg)r2.w +mov.f32f32 r4.y, r3.w +sam (f32)(xyzw)r2.x, r1.z, s#0, t#0 +(ss)add.f r1.z, r3.w, c10.y +mul.f r0.z, r0.z, c7.x +mov.f32f32 r1.w, r4.x +mul.f r3.w, c9.x, r4.y +add.f r4.y, c10.z, (neg)r4.y +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r2.y, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -mov.f32f32 r1.w, r1.w -add.f r2.w, c10.z, (neg)r1.z -add.f r3.w, c10.z, (neg)r0.z -mov.f32f32 r2.z, r2.z -mul.f r2.y, r2.y, c7.x -mul.f r4.x, c9.x, r0.z -mov.f32f32 r3.z, r1.w -add.f r0.x, r0.x, (neg)r2.z -mul.f r1.w, r2.y, r0.y -mov.f32f32 r2.y, r4.x -mov.f32f32 r2.z, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r1.w -add.f r0.w, r0.w, (neg)r2.y -sam (f32)(xyzw)r4.x, r3.y, s#0, t#0 -(sy)cmps.f.lt r2.y, r4.w, c9.z -add.f r2.w, c10.x, r0.x -mul.f r0.y, r1.w, r0.y +add.f r0.x, r0.x, (neg)r3.w +mul.f r3.w, c9.x, r1.w +mov.f32f32 r4.z, r4.y +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r3.w add.f r0.x, c10.y, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -add.f r2.w, c10.y, r0.w -mul.f r1.w, r1.w, c3.z mul.f r0.y, r0.y, c11.x -mul.f r0.x, r0.x, c3.z -mov.f32f32 r2.w, r2.w -(ss)mov.f32f32 r3.y, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, r3.y -add.f r0.w, c10.x, r0.w -mov.f32f32 r3.y, r3.z -mov.f32f32 r5.w, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r0.w, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r6.z, r3.y -mul.f r1.w, r2.w, c3.w -mul.f r0.w, r0.w, c3.w -add.f r2.w, c12.y, (neg)r0.y -mov.f32f32 r7.y, r0.x -mov.f32f32 r0.x, r1.w -mov.f32f32 r3.y, r0.w -mul.f r2.w, r2.w, c7.y -mul.f r0.y, r0.y, c10.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.y, r3.y -bary.f r3.y, 6, r1.x -add.f r0.y, r0.y, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -add.f r2.w, r3.y, c9.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.w, r1.w -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.w -mov.f32f32 r3.y, r2.w -mov.f32f32 r5.z, r0.w -max.f r0.y, r0.y, c9.y -mov.f32f32 r7.x, r1.w -mov.f32f32 r6.y, r3.y -mov.f32f32 r7.z, r0.x -mov.f32f32 r0.x, r2.w -cov.u32f32 r0.w, r2.y -sam.s (f32)(x)r7.w, r5.x, s#2, t#2 -(sy)mov.f32f32 r1.w, r7.w -min.f r0.y, r0.y, c10.y -sam.s (f32)(x)r7.w, r6.z, s#2, t#2 -(sy)mov.f32f32 r2.y, r7.w -(ss)nop -sam.s (f32)(x)r5.x, r5.w, s#2, t#2 -(sy)mov.f32f32 r2.w, r5.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.y, r3.w -add.f r3.z, c12.y, (neg)r0.y -add.f r3.w, c12.y, (neg)r0.y -add.f r5.x, c12.y, (neg)r0.y -mul.f r5.y, r2.z, r3.y -mul.f r3.z, r3.z, c6.z -mul.f r3.w, r3.w, c6.y -mul.f r5.x, r5.x, c6.x -mul.f r1.w, r5.y, r1.w -add.f r1.z, r1.z, c10.y -mov.f32f32 r7.w, r0.x -mov.f32f32 r0.x, (0.000000) -add.f r0.z, r0.z, c10.y -mul.f r3.y, r1.z, r3.y -mov.f32f32 r2.x, r2.x -cmps.f.ne p0.x, r0.w, r0.x -mul.f r0.x, r2.z, r0.z -mad.f32 r0.w, r3.y, r2.w, r1.w -sam.s (f32)(x)r5.y, r7.y, s#2, t#2 +add.f r0.z, c10.x, r0.z +mov.f32f32 r3.w, r0.w +mul.f r4.w, r0.x, c3.z +add.f r0.x, c10.y, r0.w +mul.f r5.z, r0.z, c3.z +add.f r0.z, c10.x, r3.w +mov.f32f32 r6.y, r4.w +mul.f r5.x, r0.x, c3.w +mov.f32f32 r7.x, r5.z +mul.f r6.z, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +bary.f r0.z, 6, r1.x +mov.f32f32 r5.w, r5.x +mov.f32f32 r7.y, r6.z +add.f r0.y, c12.y, (neg)r0.y +add.f r5.y, r0.z, c9.w +add.f r0.z, c10.z, (neg)r1.w +mul.f r0.x, r0.x, c10.y +add.f r0.w, r4.x, c10.y +mov.f32f32 r7.z, r5.y +mul.f r0.y, r0.y, c7.y +mov.f32f32 r6.w, r5.y +mov.f32f32 r6.x, r5.y (rpt1)nop -(sy)mov.f32f32 r1.w, r5.y -mov.f32f32 r0.w, r0.w -bary.f r2.z, 0, r1.x -mov.f32f32 r5.y, r2.x -mad.f32 r0.x, r0.x, r1.w, r0.w -bary.f r0.w, 1, r1.x -bary.f r1.w, 9, r1.x +add.f r0.x, r0.x, r0.y +sam.s (f32)(x)r7.x, r7.x, s#2, t#2 +mov.f32f32 r0.y, r0.z +nop +(ss)nop +sam.s (f32)(x)r7.y, r6.y, s#2, t#2 +sam.s (f32)(x)r7.z, r4.w, s#2, t#2 mul.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -bary.f (ei)r1.x, 2, r1.x -mov.f32f32 r1.y, r1.w -mad.f32 r0.x, r0.z, r2.y, r0.x +mul.f r1.w, r4.y, r0.w +mul.f r0.y, r4.z, r0.y +max.f r0.x, r0.x, c9.y +sam.s (f32)(x)r3.w, r5.z, s#2, t#2 +(sy)cmps.f.lt r4.x, r2.w, c9.z +mul.f r0.w, r1.z, r0.w +mul.f r0.y, r0.y, r7.x +min.f r0.x, r0.x, c10.y +mad.f32 r0.y, r0.z, r7.y, r0.y +cov.u32f32 r0.z, r4.x +mad.f32 r0.y, r1.w, r3.w, r0.y +add.f r1.z, c12.y, (neg)r0.x +mad.f32 r0.y, r0.w, r7.z, r0.y +add.f r0.w, c12.y, (neg)r0.x +add.f r1.w, c12.y, (neg)r0.x +mov.f32f32 r3.w, (0.000000) +mul.f r0.y, c10.w, r0.y +bary.f r4.x, 0, r1.x +mul.f r1.z, r1.z, c6.z +mul.f r0.w, r0.w, c6.y +mov.f32f32 r4.y, r0.y +bary.f r4.z, 2, r1.x +bary.f (ei)r1.x, 1, r1.x +mul.f r1.y, r2.x, r4.x +mul.f r1.w, r1.w, c6.x +mul.f r4.x, r2.z, r4.z +mul.f r1.x, r2.y, r1.x +mul.f r0.y, r1.y, r0.y +cmps.f.ne p0.x, r0.z, r3.w +mul.f r0.z, r4.x, r4.y +mul.f r1.x, r1.x, r4.y +mad.f32 r0.z, c5.z, r2.z, r0.z +mad.f32 r1.x, c5.y, r2.y, r1.x +mad.f32 r0.y, c5.x, r2.x, r0.y +sam (f32)(w)r3.y, r3.y, s#1, t#1 +(sy)cmps.f.lt r1.y, r4.x, c11.y +mul.f r0.z, r0.x, r0.z +mul.f r1.x, r0.x, r1.x +mul.f r0.x, r0.x, r0.y kill p0.x -mov.f32f32 r0.z, r4.w -mov.f32f32 r5.z, r1.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r4.z, r1.x -mul.f r0.w, r4.y, r0.w -mov.f32f32 r2.w, r0.z -mul.f r0.x, c10.w, r0.x -mul.f r0.z, r4.x, r2.z -sam (f32)(w)r1.y, r5.y, s#1, t#1 -nop -(sy)cmps.f.lt r1.y, r2.x, c11.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r2.x -mov.f32f32 r1.w, c9.y -nop -mul.f r1.x, r1.x, r0.x -mul.f r0.w, r0.w, r0.x -mul.f r0.x, r0.z, r0.x +add.f r0.y, r0.z, r1.z cov.u32f32 r0.z, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.z, r4.z, r1.x -mad.f32 r0.w, c5.y, r4.y, r0.w -mov.f32f32 r0.x, r0.x +add.f r0.w, r1.x, r0.w +add.f r0.x, r0.x, r1.w +mov.f32f32 r1.x, c9.y cmps.f.ne r0.z, r0.z, c9.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c5.x, r4.x, r0.x -mov.f32f32 r1.y, r1.z -mul.f r1.x, r0.y, r1.x -mul.f r0.w, r0.y, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r0.z, r1.w, r0.z, r1.y -add.f r1.x, r1.x, r3.z -add.f r0.w, r0.w, r3.w -mul.f r0.x, r0.y, r0.x -nop -mul.f r0.y, r1.x, r0.z +(rpt2)nop +sel.b32 r0.z, r1.x, r0.z, r4.x +(rpt2)nop +mul.f r0.y, r0.y, r0.z mul.f r0.w, r0.w, r0.z -add.f r0.x, r0.x, r5.x -nop -mul.f r0.y, r0.y, c4.z -mul.f r0.w, r0.w, c4.y +(rpt1)nop +mul.f r2.z, r0.y, c4.z +mul.f r2.y, r0.w, c4.y mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r2.x, r0.x +mul.f r2.x, r0.x, c4.x end nop nop nop ; FRAG: outputs: r2.x (1:0) ; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) -; FRAG: 196 instructions, 0 half, 8 full +; FRAG: 133 instructions, 0 half, 8 full diff --git a/reference/0ad-frag-2.asm b/reference/0ad-frag-2.asm index 4377ecf..227a081 100644 --- a/reference/0ad-frag-2.asm +++ b/reference/0ad-frag-2.asm @@ -8,199 +8,135 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6 +@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866 +@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.x, 4, r1.x add.f r0.y, r0.w, c9.y bary.f r0.w, 5, r1.x bary.f r1.z, 8, r1.x -add.f r1.w, r0.x, c10.x -bary.f r2.x, 10, r1.x -bary.f r2.y, 6, r1.x -add.f r2.z, r0.w, c10.x -floor.f r2.w, r1.w +add.f r2.x, r0.x, c10.x +bary.f r1.w, 9, r1.x +add.f r2.y, r0.w, c10.x +bary.f r2.z, 6, r1.x +floor.f r2.w, r2.x rcp r0.y, r0.y add.f r0.z, r0.z, c9.y -mov.f32f32 r1.z, r1.z -floor.f r3.x, r2.z -add.f r1.w, r1.w, (neg)r2.w +floor.f r3.x, r2.y +add.f r3.w, r2.z, c9.w +add.f r2.x, r2.x, (neg)r2.w (ss)mul.f r0.y, r0.z, r0.y -mov.f32f32 r3.y, r1.z -add.f r0.z, r2.z, (neg)r3.x -mov.f32f32 r1.z, r1.w +absneg.f r0.z, (neg)c7.x +add.f r2.y, r2.y, (neg)r3.x +mov.f32f32 r2.z, r2.x +add.f r2.x, r2.x, c9.z +mul.f r0.z, r0.z, c7.x +sam (f32)(w)r4.x, r1.z, s#1, t#1 +(ss)mov.f32f32 r1.z, r2.y +mul.f r1.w, c9.x, r2.z +add.f r2.z, c10.y, (neg)r2.z +mul.f r0.z, r0.z, r0.y mov.f32f32 r0.y, r0.y -absneg.f r1.w, (neg)c7.x -mov.f32f32 r0.z, r0.z -mul.f r2.z, c9.x, r1.z -add.f r2.w, c10.y, (neg)r1.z -mul.f r1.w, r1.w, c7.x -bary.f r3.x, 9, r1.x -mov.f32f32 r2.z, r2.z -mul.f r3.z, c9.x, r0.z -mul.f r1.w, r1.w, r0.y -mov.f32f32 r2.w, r2.w -add.f r0.x, r0.x, (neg)r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r1.w, r1.w -add.f r3.z, c10.y, (neg)r0.z -mov.f32f32 r0.x, r0.x -add.f r0.w, r0.w, (neg)r2.z -mul.f r0.y, r1.w, r0.y -mov.f32f32 r1.w, r3.z -add.f r2.z, c10.x, r0.x +add.f r0.x, r0.x, (neg)r1.w +mul.f r1.w, c9.x, r1.z +mov.f32f32 r2.w, r2.z +mul.f r0.y, r0.z, r0.y +mov.f32f32 r0.z, r0.x +add.f r0.w, r0.w, (neg)r1.w add.f r0.x, c10.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.x, r0.x mul.f r0.y, r0.y, c11.x -add.f r3.z, c10.z, r0.w -mul.f r2.z, r2.z, c3.z -mul.f r0.x, r0.x, c3.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r2.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -add.f r0.w, c10.x, r0.w -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r4.x -mov.f32f32 r5.x, r0.x -mov.f32f32 r0.x, r0.w -add.f r0.w, c12.y, (neg)r0.y -mov.f32f32 r5.w, r3.w -mul.f r3.z, r3.z, c3.w -mul.f r0.x, r0.x, c3.w -mul.f r0.w, r0.w, c7.y -mul.f r0.y, r0.y, c9.z -mov.f32f32 r3.w, r3.z -mov.f32f32 r4.x, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r6.z, r2.z -mov.f32f32 r2.z, r3.z -mov.f32f32 r4.z, r4.x -add.f r2.y, r2.y, c9.w -add.f r0.y, r0.y, r0.w -mov.f32f32 r0.w, r3.w -mov.f32f32 r5.y, r0.x -mov.f32f32 r0.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r4.w, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r2.y -mov.f32f32 r6.w, r2.z -mov.f32f32 r6.y, r0.w -mov.f32f32 r0.w, r2.y -mov.f32f32 r5.z, r0.y -sam.s (f32)(x)r3.z, r4.y, s#2, t#2 -(sy)mov.f32f32 r0.y, r3.z +add.f r0.z, c10.x, r0.z +mov.f32f32 r1.w, r0.w +mul.f r3.y, r0.x, c3.z +add.f r0.x, c10.z, r0.w +mul.f r4.x, r0.z, c3.z +add.f r0.z, c10.x, r1.w +mov.f32f32 r5.x, r3.y +mul.f r3.z, r0.x, c3.w +mov.f32f32 r5.w, r4.x +mul.f r5.y, r0.z, c3.w +exp2 r0.x, r0.y +(ss)mov.f32f32 r0.y, r0.x +mov.f32f32 r5.z, r3.w +mov.f32f32 r4.y, r3.z +mov.f32f32 r6.x, r5.y +mov.f32f32 r6.y, r3.w +add.f r0.y, c12.y, (neg)r0.y +mov.f32f32 r4.z, r3.w +sam.s (f32)(x)r3.x, r3.y, s#2, t#2 +add.f r0.z, c10.y, (neg)r1.z +sam.s (f32)(x)r6.z, r5.x, s#2, t#2 +mul.f r0.x, r0.x, c9.z +add.f r0.w, r2.y, c9.z +mul.f r0.y, r0.y, c7.y +(ss)nop +sam.s (f32)(x)r5.x, r5.w, s#2, t#2 +mov.f32f32 r1.z, r0.z +mul.f r0.z, r2.x, r0.z +mul.f r1.w, r2.z, r0.w +add.f r0.x, r0.x, r0.y +mul.f r0.y, r2.w, r1.z +sam.s (f32)(x)r3.y, r4.x, s#2, t#2 +mul.f r0.w, r2.x, r0.w +(sy)cmps.f.lt r1.z, r4.w, c11.y +bary.f r2.x, 10, r1.x +mul.f r0.y, r0.y, r5.x max.f r0.x, r0.x, c9.y -mov.f32f32 r7.x, r0.w -mul.f r0.w, r2.w, r1.w -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r0.z, r6.z, r0.y +cov.u32f32 r0.z, r1.z +mad.f32 r0.y, r1.w, r3.y, r0.y min.f r0.x, r0.x, c9.z -sam.s (f32)(x)r3.z, r5.w, s#2, t#2 -nop -(sy)mov.f32f32 r2.y, r3.z -mul.f r0.y, r0.w, r0.y -sam.s (f32)(x)r3.z, r5.x, s#2, t#2 -(sy)mov.f32f32 r0.w, r3.z -add.f r1.z, r1.z, c9.z -add.f r2.z, c12.y, (neg)r0.x -add.f r3.z, c12.y, (neg)r0.x -add.f r3.w, c12.y, (neg)r0.x -mul.f r1.w, r1.z, r1.w -mul.f r2.z, r2.z, c6.z -mul.f r4.x, r3.z, c6.y -mul.f r3.w, r3.w, c6.x -mad.f32 r0.y, r1.w, r0.w, r0.y -(ss)nop -sam.s (f32)(x)r4.y, r6.z, s#2, t#2 -add.f r0.z, r0.z, c9.z -(sy)mov.f32f32 r0.w, r4.y -mov.f32f32 r1.w, r3.x -mov.f32f32 r0.y, r0.y -mul.f r2.w, r2.w, r0.z -mul.f r0.z, r1.z, r0.z -mov.f32f32 r3.z, r1.w -mov.f32f32 r1.z, r2.x -mad.f32 r0.y, r2.w, r0.w, r0.y -bary.f r0.w, 11, r1.x -mov.f32f32 r1.w, c9.z -bary.f r2.x, 2, r1.x -mov.f32f32 r0.y, r0.y -sam (f32)(w)r2.w, r3.y, s#1, t#1 -(sy)cmps.f.lt r2.w, r3.z, c11.y -mad.f32 r0.y, r0.z, r2.y, r0.y -mov.f32f32 r0.z, r3.z -mov.f32f32 r3.x, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -cov.u32f32 r1.z, r2.w -mov.f32f32 r0.z, r0.z -(ss)mov.f32f32 r3.y, r0.w +mad.f32 r0.y, r0.w, r3.x, r0.y +cmps.f.ne r0.z, r0.z, c9.y +(rpt1)nop mul.f r0.y, c10.w, r0.y -cmps.f.ne r0.w, r1.z, c9.y +bary.f r2.y, 11, r1.x +add.f r0.w, c12.y, (neg)r0.x +add.f r1.z, c12.y, (neg)r0.x +mov.f32f32 r1.w, r0.y +add.f r2.z, c12.y, (neg)r0.x (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, c9.y -sam (f32)(xyz)r4.y, r3.x, s#0, t#0 -(sy)mul.f r2.x, r4.w, r2.x +sam (f32)(xyz)r2.w, r2.x, s#0, t#0 +(ss)bary.f r2.x, 2, r1.x bary.f r2.y, 1, r1.x bary.f (ei)r1.x, 0, r1.x -sel.b32 r0.z, r1.z, r0.w, r0.z -mul.f r0.w, r2.x, r0.y -mul.f r1.y, r4.z, r2.y -mul.f r1.x, r4.y, r1.x -mov.f32f32 r2.w, r1.w -mov.f32f32 r0.w, r0.w -mul.f r1.y, r1.y, r0.y -mad.f32 r0.w, c5.z, r4.w, r0.w +mul.f r0.w, r0.w, c6.z +(sy)mul.f r1.y, r3.y, r2.x +mul.f r2.x, r3.x, r2.y +mul.f r1.x, r2.w, r1.x +mul.f r1.z, r1.z, c6.y +mul.f r1.y, r1.y, r1.w +mul.f r1.w, r2.x, r1.w +mad.f32 r1.y, c5.z, r3.y, r1.y +mad.f32 r1.w, c5.y, r3.x, r1.w mul.f r0.y, r1.x, r0.y -(rpt1)nop -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, c5.y, r4.z, r1.x -mul.f r0.w, r0.x, r0.w -mad.f32 r0.y, c5.x, r4.y, r0.y -(rpt1)nop -add.f r0.w, r0.w, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -nop -mul.f r0.w, r0.w, r0.z -mul.f r1.x, r0.x, r1.x +mul.f r1.x, r2.z, c6.x +mul.f r1.y, r0.x, r1.y +mul.f r1.w, r0.x, r1.w +mad.f32 r0.y, c5.x, r2.w, r0.y +mov.f32f32 r2.x, c9.y +add.f r0.w, r1.y, r0.w +add.f r1.y, r1.w, r1.z mul.f r0.x, r0.x, r0.y -nop -mul.f r0.y, r0.w, c4.z -add.f r0.w, r1.x, r4.x -add.f r0.x, r0.x, r3.w -nop -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r0.z -mul.f r0.x, r0.x, r0.z -nop -mov.f32f32 r0.y, r0.y -mul.f r0.z, r0.w, c4.y -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x +sel.b32 r0.y, r2.x, r0.z, r4.w +mov.f32f32 r2.w, c9.z (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x +mul.f r0.z, r0.w, r0.y +mul.f r0.w, r1.y, r0.y (rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x +mul.f r2.z, r0.z, c4.z +mul.f r2.y, r0.w, c4.y +add.f r0.x, r0.x, r1.x +(rpt2)nop +mul.f r0.x, r0.x, r0.y +(rpt2)nop +mul.f r2.x, r0.x, c4.x end nop nop +nop ; FRAG: outputs: r2.x (1:0) -; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1) -; FRAG: 195 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.z (5:11,cm=f,il=16,b=1) +; FRAG: 129 instructions, 0 half, 7 full diff --git a/reference/0ad-frag.asm b/reference/0ad-frag.asm index 26fc842..58b406d 100644 --- a/reference/0ad-frag.asm +++ b/reference/0ad-frag.asm @@ -6,131 +6,94 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c5.x) 0x3a03126f, 0x3f000000, 0x3f800000, 0x40000000 +@const(c6.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x00000000 +@const(c7.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 8, r0.x bary.f r0.w, 9, r0.x bary.f r1.x, 10, r0.x mov.f32f32 r1.y, c5.x add.f r1.z, r0.z, (neg)c5.y add.f r1.w, r0.w, (neg)c5.y -bary.f r2.x, 4, r0.x -bary.f r2.y, 12, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 4, r0.x floor.f r2.z, r1.z floor.f r2.w, r1.w -add.f r1.x, r1.x, (neg)r1.y -mov.f32f32 r3.x, r2.x +add.f r3.z, r1.x, (neg)r1.y +mul.f r1.x, r2.x, c5.w add.f r1.y, r1.z, (neg)r2.z add.f r1.z, r1.w, (neg)r2.w -mov.f32f32 r1.x, r1.x -bary.f r1.w, 5, r0.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r1.x -mov.f32f32 r4.z, r1.x +mov.f32f32 r4.y, r3.z +bary.f r2.z, 5, r0.x mad.f32 r0.z, (neg)c5.y, r1.y, r0.z mad.f32 r0.w, (neg)c5.y, r1.z, r0.w -add.f r2.x, c5.w, (neg)r1.y -add.f r1.y, r1.y, c5.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -add.f r2.z, c5.w, (neg)r1.z -add.f r1.z, r1.z, c5.z -add.f r2.w, c6.y, r0.z +mov.f32f32 r1.y, r1.y +mov.f32f32 r1.z, r1.z +mov.f32f32 r1.w, r0.z +mov.f32f32 r2.x, r0.w add.f r0.z, c6.x, r0.z -add.f r3.y, c6.y, r0.w add.f r0.w, c6.x, r0.w -mul.f r2.w, r2.w, c2.z -mul.f r0.z, r0.z, c2.z -mul.f r3.z, r3.y, c2.w -mul.f r0.w, r0.w, c2.w -mov.f32f32 r3.y, r2.w -mov.f32f32 r4.x, r0.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.y, r3.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r4.w, r2.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.z, r4.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r5.z, r0.z -mov.f32f32 r5.w, r0.w -mov.f32f32 r5.x, r0.w -mov.f32f32 r5.y, r1.x -mov.f32f32 r6.x, r1.x -sam.s (f32)(x)r6.y, r3.y, s#2, t#2 -mul.f r0.z, r2.x, r2.z -(ss)nop -sam.s (f32)(y)r3.y, r4.x, s#2, t#2 -mul.f r0.w, r1.y, r2.z -mul.f r1.x, r2.x, r1.z -mul.f r1.y, r1.y, r1.z -mov.f32f32 r0.z, r0.z -(ss)nop -sam.s (f32)(z)r3.w, r4.w, s#2, t#2 +add.f r1.w, c6.y, r1.w +add.f r2.x, c6.y, r2.x +mul.f r3.x, r0.z, c2.z +mul.f r3.y, r0.w, c2.w +mul.f r3.w, r1.w, c2.z +mul.f r4.w, r2.x, c2.w +mov.f32f32 r4.z, r3.x +mov.f32f32 r5.x, r4.y +mov.f32f32 r5.y, r3.w +mov.f32f32 r5.z, r4.w +mov.f32f32 r5.w, r4.y +mov.f32f32 r4.x, r3.y +sam.s (f32)(w)r2.w, r3.x, s#2, t#2 +nop +add.f r0.z, c5.w, (neg)r1.y +sam.s (f32)(y)r6.x, r4.z, s#2, t#2 (rpt1)nop +add.f r0.w, c5.w, (neg)r1.z (ss)nop -sam.s (f32)(w)r4.z, r5.z, s#2, t#2 -mov.f32f32 r0.w, r0.w -(sy)mul.f r0.z, r6.y, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mad.f32 r0.z, r3.z, r0.w, r0.z -mov.f32f32 r3.y, r1.w -mov.f32f32 r2.x, r2.y -bary.f r0.w, 13, r0.x -mov.f32f32 r0.z, r0.z -bary.f r1.z, 2, r0.x -mad.f32 r0.z, r4.y, r1.x, r0.z -mov.f32f32 r2.y, r0.w -sam (f32)(xyzw)r2.z, r3.x, s#0, t#0 -nop -mul.f r0.w, r1.z, c5.w -mov.f32f32 r0.z, r0.z -bary.f r1.x, 1, r0.x -mad.f32 r0.z, r5.y, r1.y, r0.z +sam.s (f32)(x)r4.z, r5.y, s#2, t#2 +add.f r1.y, r1.y, c5.z +add.f r1.z, r1.z, c5.z +(ss)nop +sam.s (f32)(z)r4.w, r3.w, s#2, t#2 +sam (f32)(xyzw)r2.x, r2.y, s#0, t#0 +bary.f r1.w, 1, r0.x +mul.f r2.w, r0.z, r0.w +mul.f r0.w, r1.y, r0.w +mul.f r0.z, r0.z, r1.z +mul.f r1.y, r1.y, r1.z +(sy)mul.f r1.z, r4.z, r2.w +mul.f r2.w, r1.w, c5.w +mad.f32 r0.w, r6.y, r0.w, r1.z +bary.f r3.x, 12, r0.x +mad.f32 r0.z, r5.y, r0.z, r0.w +bary.f r3.y, 13, r0.x +mad.f32 r0.z, r3.z, r1.y, r0.z bary.f (ei)r0.x, 0, r0.x mov.f32f32 r1.w, (0.000000) nop -mov.f32f32 r0.y, r0.z -sam (f32)(w)r3.y, r2.x, s#3, t#3 -(rpt1)nop -mul.f r0.z, r1.x, c5.w -mul.f r0.y, r0.y, c6.z +mul.f r0.y, r0.z, c6.z mul.f r0.x, r0.x, c5.w +(ss)nop +sam (f32)(w)r3.x, r3.x, s#3, t#3 (rpt1)nop -mov.f32f32 r0.y, r0.y -(rpt2)nop -mad.f32 r0.w, r0.w, r0.y, c4.z -mad.f32 r0.z, r0.z, r0.y, c4.y +mov.f32f32 r0.z, r0.y mad.f32 r0.x, r0.x, r0.y, c4.x +(rpt1)nop +mad.f32 r0.y, r1.x, r0.z, c4.z +mad.f32 r0.z, r2.w, r0.z, c4.y +mul.f r0.x, r2.x, r0.x nop -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -(sy)mul.f r0.y, r3.x, r0.y -mul.f r0.z, r2.w, r0.z -mul.f r0.x, r2.z, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mul.f r0.y, r0.y, r4.x -mul.f r0.z, r0.z, r4.x -mul.f r0.x, r0.x, r4.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mul.f r0.y, r2.z, r0.y +mul.f r0.z, r2.y, r0.z +(rpt1)nop +(sy)mul.f r1.z, r0.y, r3.w +mul.f r1.y, r0.z, r3.w +mul.f r1.x, r0.x, r3.w end nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r2.x (5:2,cm=f,il=16,b=1) r3.x (5:3,cm=f,il=20,b=1) -; FRAG: 128 instructions, 0 half, 7 full +; FRAG: 87 instructions, 0 half, 7 full diff --git a/reference/2color-after.asm b/reference/2color-after.asm index f44d151..e58817e 100644 --- a/reference/2color-after.asm +++ b/reference/2color-after.asm @@ -7,35 +7,32 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.s r0.z, hr0.x, 2 bary.f r0.w, 3, r0.x bary.f r1.x, 6, r0.x bary.f r1.y, 7, r0.x add.s r0.z, r0.z, 1 bary.f r1.z, 2, r0.x -bary.f r1.w, 5, r0.x -bary.f r2.x, 1, r0.x +bary.f r2.x, 5, r0.x +bary.f r2.y, 1, r0.x cov.s32f32 r0.z, r0.z -bary.f r2.y, 4, r0.x +bary.f r2.z, 4, r0.x bary.f (ei)r0.x, 0, r0.x nop cmps.f.lt r0.y, r0.z, c0.x -cmps.f.lt r2.z, r0.z, c0.x cmps.f.lt r2.w, r0.z, c0.x +cmps.f.lt r3.x, r0.z, c0.x cmps.f.lt r0.z, r0.z, c0.x -sel.b32 r0.y, r0.w, r0.y, r1.y -sel.b32 r0.w, r1.z, r2.z, r1.x -sel.b32 r1.x, r2.x, r2.w, r1.w -sel.b32 r0.x, r0.x, r0.z, r2.y -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x +sel.b32 r1.w, r0.w, r0.y, r1.y +sel.b32 r1.z, r1.z, r2.w, r1.x +sel.b32 r1.y, r2.y, r3.x, r2.x +sel.b32 r1.x, r0.x, r0.z, r2.z end nop nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.z (2:0,cm=f,il=12,b=1) r63.y (7:0,cm=f,il=16,b=0) -; FRAG: 25 instructions, 1 half, 3 full +; FRAG: 21 instructions, 1 half, 4 full diff --git a/reference/ChameleonMan-vert.asm b/reference/ChameleonMan-vert.asm index 2a8bdc3..c8ce259 100644 --- a/reference/ChameleonMan-vert.asm +++ b/reference/ChameleonMan-vert.asm @@ -3,24 +3,24 @@ @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@in(r0.w) in4 -@in(r1.x) in5 -@in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in12 -@in(r2.z) in13 -@in(r2.w) in14 -@in(r3.x) in16 -@in(r3.y) in17 -@in(r3.z) in20 -@in(r3.w) in21 -@in(r4.x) in22 -@in(r4.y) in23 -@in(r4.z) in24 -@in(r4.w) in25 -@in(r5.x) in26 +@in(r1.z) in4 +@in(r1.w) in5 +@in(r2.x) in6 +@in(r2.y) in8 +@in(r2.z) in9 +@in(r2.w) in10 +@in(r3.x) in12 +@in(r3.y) in13 +@in(r3.z) in14 +@in(r1.x) in16 +@in(r1.y) in17 +@in(r3.w) in20 +@in(r4.x) in21 +@in(r4.y) in22 +@in(r4.z) in23 +@in(r4.w) in24 +@in(r5.x) in25 +@in(r5.y) in26 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -33,783 +33,510 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)trunc.f r4.z, r4.z -mov.f32f32 r5.y, c63.w +@const(c63.x) 0x00000000, 0x40800000, 0x40400000, 0x3f800000 +@const(c64.x) 0x40000000, 0x00000000, 0x00000000, 0x00000000 +@const(c65.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mov.f32f32 r0.w, c63.w trunc.f r4.w, r4.w +trunc.f r5.x, r5.x mov.f32f32 r5.z, c63.x -mul.f r5.w, r4.z, c63.z -cmps.f.lt r5.y, r5.y, c5.x -mul.f r4.z, r4.z, c63.y -mov.f32f32 r6.x, r4.w +cmps.f.lt r0.w, r0.w, c5.x +mul.f r5.w, r4.w, c63.z +mul.f r4.w, r4.w, c63.y +mul.f r6.x, r5.x, c63.z +cov.u32f32 r0.w, r0.w cov.f32s16 hr0.x, r5.w -cov.u32f32 r5.y, r5.y -cov.f32s16 hr0.y, r4.z -mul.f r6.x, r6.x, c63.z +cov.f32s16 hr0.y, r4.w +mov.f32f32 r6.y, r6.x +cmps.f.ne r0.w, r0.w, c63.x +trunc.f r5.y, r5.y shl.b hr0.x, hr0.x, 2 -cmps.f.ne r5.y, r5.y, c63.x -trunc.f r5.x, r5.x shl.b hr0.y, hr0.y, 2 +cov.f32s16 hr0.z, r6.y +sel.b32 r5.y, r5.y, r0.w, r5.x mova a0.x, hr0.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r5.x, r5.x -mov.f32f32 r6.y, r4.w -mov.f32f32 r6.z, r4.x -mov.f32f32 r6.w, r3.w -mov.f32f32 r5.x, r5.x +mul.f r5.x, r5.x, c63.y +shl.b hr0.x, hr0.z, 2 +mov.f32f32 r6.z, r5.y +mul.f r5.y, r5.y, c63.y +cov.f32s16 hr0.z, r5.x +sel.b32 r6.w, r4.y, r0.w, r4.x +mov.f32f32 r6.z, r6.z +cov.f32s16 hr0.w, r5.y mov.f32f32 r7.x, c -mov.f32f32 r4.w, r4.w mov.f32f32 r7.y, c -(ul)mov.f32f32 r7.z, c -mov.f32f32 r7.w, r7.x -mov.f32f32 r8.x, r7.x -sel.b32 r4.w, r5.x, r5.y, r4.w -mov.f32f32 r5.x, r7.y -mul.f r7.w, r7.w, r2.y -cov.f32s16 hr0.x, r5.w -mul.f r8.x, r8.x, r1.z -mov.f32f32 r4.w, r4.w -mul.f r5.x, r5.x, r2.y -shl.b hr0.x, hr0.x, 2 -mov.f32f32 r8.y, r7.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r8.z, r7.y -mova a0.x, hr0.x -mul.f r8.y, r8.y, r2.y -mov.f32f32 r4.w, r4.w -mul.f r8.z, r8.z, r1.z -mov.f32f32 r8.w, r7.z -mul.f r7.x, r7.x, r0.w -mul.f r7.y, r7.y, r0.w -mov.f32f32 r9.x, c -mul.f r9.y, r4.w, c63.z -mul.f r4.w, r4.w, c63.y -mov.f32f32 r9.z, c -mov.f32f32 r9.w, r9.x -mov.f32f32 r10.x, r9.x -cov.f32s16 hr0.x, r9.y -cov.f32s16 hr0.z, r4.w -mad.f32 r7.w, r9.w, r2.z, r7.w -mad.f32 r8.x, r10.x, r1.w, r8.x -shl.b hr0.x, hr0.x, 2 -shl.b hr0.z, hr0.z, 2 -mov.f32f32 r7.w, r7.w -cov.f32s16 hr0.w, r5.w -mov.f32f32 r5.w, r8.x -mov.f32f32 r8.x, r9.z -(ul)mov.f32f32 r9.w, c +mov.f32f32 r6.z, r6.z shl.b hr0.w, hr0.w, 2 -mov.f32f32 r10.x, r9.z -mul.f r8.w, r8.w, r1.z -mov.f32f32 r10.y, r9.w -mova a0.x, hr0.w -mad.f32 r5.x, r8.x, r2.z, r5.x -mov.f32f32 r8.x, r9.w -mad.f32 r8.z, r10.x, r1.w, r8.z -mad.f32 r8.w, r10.y, r1.w, r8.w -mov.f32f32 r5.x, r5.x -mad.f32 r8.x, r8.x, r2.z, r8.y -mov.f32f32 r8.y, c -mov.f32f32 r10.x, c -mov.f32f32 r8.z, r8.z -mov.f32f32 r8.x, r8.x -mov.f32f32 r10.y, r8.y -mov.f32f32 r10.z, r8.y -mov.f32f32 r10.w, r10.x -(ul)mov.f32f32 r11.x, c -mad.f32 r7.w, r10.y, r2.w, r7.w -mad.f32 r5.w, r10.z, r2.x, r5.w -mad.f32 r5.x, r10.w, r2.w, r5.x -mov.f32f32 r10.y, r11.x -mov.f32f32 r7.w, r7.w -mov.f32f32 r5.w, r5.w -mov.f32f32 r5.x, r5.x -mad.f32 r8.x, r10.y, r2.w, r8.x -mul.f r7.w, r7.w, r3.z -mul.f r5.w, r5.w, r3.z -mul.f r5.x, r5.x, r3.z -mov.f32f32 r8.x, r8.x -mov.f32f32 r7.w, r7.w -mov.f32f32 r10.y, c62.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r5.x, r5.x -mul.f r8.x, r8.x, r3.z -cmps.f.ne r10.y, r10.y, c63.x -mov.f32f32 r10.z, (0.000000) -mov.f32f32 r10.w, (0.000000) -mov.f32f32 r11.y, (0.000000) -mov.f32f32 r8.x, r8.x -sel.b32 r7.w, r7.w, r10.y, r10.z -sel.b32 r5.w, r5.w, r10.y, r10.w -sel.b32 r5.x, r5.x, r10.y, r11.y -mov.f32f32 r10.z, (0.000000) -mov.f32f32 r10.w, r7.w -mov.f32f32 r11.y, r5.w -mov.f32f32 r11.z, r5.x -sel.b32 r8.x, r8.x, r10.y, r10.z -mov.f32f32 r10.z, r10.w -cov.f32s16 hr0.w, r6.x -mov.f32f32 r10.w, r11.y -mov.f32f32 r11.y, r11.z -mov.f32f32 r11.z, r8.x -shl.b hr0.w, hr0.w, 2 -mov.f32f32 r11.w, r10.x -mov.f32f32 r8.w, r8.w -mov.f32f32 r12.x, r11.x -mova a0.x, hr0.w -mov.f32f32 r11.z, r11.z -mad.f32 r8.z, r11.w, r2.x, r8.z -mad.f32 r8.w, r12.x, r2.x, r8.w -mad.f32 r7.x, r9.x, r1.x, r7.x -mad.f32 r7.y, r9.z, r1.x, r7.y -mul.f r7.z, r7.z, r0.w +mul.f r7.z, r7.x, r3.x +mul.f r7.w, r7.y, r3.x +mul.f r6.z, r6.z, c63.z +cov.f32s16 hr1.x, r5.w +mul.f r8.x, r7.x, r2.y +(ul)mov.f32f32 r8.y, c +cov.f32s16 hr1.y, r6.z +cov.f32s16 hr1.z, r6.z +cov.f32s16 hr1.w, r6.z +shl.b hr1.x, hr1.x, 2 +shl.b hr1.y, hr1.y, 2 +shl.b hr1.z, hr1.z, 2 +shl.b hr1.w, hr1.w, 2 +mul.f r6.z, r8.y, r3.x +mova a0.x, hr1.y +mul.f r8.z, r7.y, r2.y +mul.f r8.w, r8.y, r2.y +mul.f r7.x, r7.x, r1.z +mul.f r7.y, r7.y, r1.z +mul.f r8.y, r8.y, r1.z +cov.f32s16 hr1.y, r5.y mov.f32f32 r9.x, c -mov.f32f32 r9.z, c -(ul)mov.f32f32 r11.w, c -mov.f32f32 r8.z, r8.z -mov.f32f32 r12.x, r9.x -mov.f32f32 r12.y, r9.x -mov.f32f32 r12.z, r9.z -mov.f32f32 r12.w, r11.w -mul.f r12.x, r12.x, r2.y -cov.f32s16 hr0.w, r6.x -mul.f r12.y, r12.y, r1.z -mul.f r12.z, r12.z, r2.y -mul.f r12.w, r12.w, r2.y -shl.b hr0.w, hr0.w, 2 -mul.f r8.z, r8.z, r3.z -mov.f32f32 r8.w, r8.w -mov.f32f32 r7.x, r7.x -mova a0.x, hr0.w -mov.f32f32 r8.z, r8.z -mov.f32f32 r13.x, (0.000000) -mul.f r8.w, r8.w, r3.z -mad.f32 r7.x, r8.y, r1.y, r7.x -mov.f32f32 r7.y, r7.y -mad.f32 r7.z, r9.w, r1.x, r7.z -mov.f32f32 r8.y, c -mov.f32f32 r9.w, c -(ul)mov.f32f32 r13.y, c -sel.b32 r8.z, r8.z, r10.y, r13.x -mov.f32f32 r13.x, r8.y -mov.f32f32 r13.z, r8.y -mov.f32f32 r13.w, r9.w -mov.f32f32 r14.x, r13.y -mad.f32 r12.x, r13.x, r2.z, r12.x -mad.f32 r12.y, r13.z, r1.w, r12.y -mad.f32 r12.z, r13.w, r2.z, r12.z -mad.f32 r12.w, r14.x, r2.z, r12.w -mov.f32f32 r12.x, r12.x -cov.f32s16 hr0.w, r6.x -mov.f32f32 r6.x, r12.y -mov.f32f32 r12.y, r12.z -mov.f32f32 r12.z, r12.w -shl.b hr0.w, hr0.w, 2 -mov.f32f32 r12.w, r8.z -mov.f32f32 r8.w, r8.w -mov.f32f32 r13.x, (0.000000) -mova a0.x, hr0.w -mov.f32f32 r12.w, r12.w -mov.f32f32 r13.z, r9.z -sel.b32 r8.w, r8.w, r10.y, r13.x -mov.f32f32 r7.x, r7.x -mad.f32 r7.y, r10.x, r1.y, r7.y -mov.f32f32 r7.z, r7.z -mov.f32f32 r10.x, c -mov.f32f32 r10.y, c -(ul)mov.f32f32 r13.x, c -mul.f r13.z, r13.z, r1.z -mov.f32f32 r13.w, r10.x -mov.f32f32 r14.x, r10.x -mov.f32f32 r14.y, r10.y -mov.f32f32 r14.z, r13.x -mad.f32 r12.x, r13.w, r2.w, r12.x -mad.f32 r6.x, r14.x, r2.x, r6.x -mad.f32 r12.y, r14.y, r2.w, r12.y -mad.f32 r12.z, r14.z, r2.w, r12.z -mov.f32f32 r12.x, r12.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r12.y, r12.y -mov.f32f32 r12.z, r12.z -mad.f32 r10.z, r12.x, r3.w, r10.z -mad.f32 r6.x, r6.x, r3.w, r10.w -mad.f32 r10.w, r12.y, r3.w, r11.y -mad.f32 r11.y, r12.z, r3.w, r11.z -mov.f32f32 r10.z, r10.z +mov.f32f32 r9.y, c +(ul)mov.f32f32 r9.z, c +shl.b hr1.y, hr1.y, 2 +mul.f r9.w, r9.x, r3.x +mova a0.x, hr1.z +mul.f r10.x, r9.x, r1.z +mul.f r9.x, r9.x, r2.y +mul.f r10.y, r9.y, r3.x +mul.f r10.z, r9.z, r3.x +mul.f r10.w, r9.y, r2.y +mul.f r11.x, r9.z, r2.y +mov.f32f32 r11.y, c +mov.f32f32 r11.z, c +(ul)mov.f32f32 r11.w, c +mul.f r9.y, r9.y, r1.z +mad.f32 r9.w, r11.y, r3.y, r9.w +mova a0.x, hr1.w +mad.f32 r10.x, r11.y, r1.w, r10.x +mad.f32 r9.x, r11.y, r2.z, r9.x +mad.f32 r10.y, r11.z, r3.y, r10.y +mad.f32 r10.z, r11.w, r3.y, r10.z +mad.f32 r10.w, r11.z, r2.z, r10.w +mad.f32 r11.x, r11.w, r2.z, r11.x +mov.f32f32 r11.y, c +mov.f32f32 r12.x, c +(ul)mov.f32f32 r12.y, c +mad.f32 r9.y, r11.z, r1.w, r9.y +mad.f32 r9.w, r11.y, r3.z, r9.w +mova a0.x, hr1.x +mad.f32 r10.x, r11.y, r2.x, r10.x +mad.f32 r9.x, r11.y, r2.w, r9.x +mad.f32 r10.y, r12.x, r3.z, r10.y +mad.f32 r10.z, r12.y, r3.z, r10.z +mad.f32 r10.w, r12.x, r2.w, r10.w +mad.f32 r11.x, r12.y, r2.w, r11.x +mov.f32f32 r11.y, c +mov.f32f32 r11.z, c +(ul)mov.f32f32 r12.z, c +mad.f32 r9.y, r12.x, r2.x, r9.y +mad.f32 r7.z, r11.y, r3.y, r7.z +cov.f32s16 hr1.x, r5.w +mad.f32 r5.w, r11.y, r2.z, r8.x +mad.f32 r7.w, r11.z, r3.y, r7.w +mad.f32 r6.z, r12.z, r3.y, r6.z +shl.b hr1.x, hr1.x, 2 +mad.f32 r8.x, r11.z, r2.z, r8.z +mad.f32 r8.z, r12.z, r2.z, r8.w +mul.f r8.w, r9.z, r1.z +mova a0.x, hr1.x +mad.f32 r8.w, r11.w, r1.w, r8.w +mad.f32 r7.x, r11.y, r1.w, r7.x +mad.f32 r7.y, r11.z, r1.w, r7.y +mad.f32 r8.y, r12.z, r1.w, r8.y +cov.f32s16 hr1.x, r5.y +cov.f32s16 hr1.z, r5.y +mov.f32f32 r5.y, c +mov.f32f32 r9.z, c +(ul)mov.f32f32 r11.y, c +mad.f32 r8.w, r12.y, r2.x, r8.w +mad.f32 r7.z, r5.y, r3.z, r7.z +mad.f32 r5.w, r5.y, r2.w, r5.w +mad.f32 r7.w, r9.z, r3.z, r7.w +mad.f32 r6.z, r11.y, r3.z, r6.z +mul.f r7.z, r7.z, r3.w mov.f32f32 r11.z, c62.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r10.w, r10.w -mov.f32f32 r11.y, r11.y +mul.f r5.w, r5.w, r3.w +mul.f r7.w, r7.w, r3.w +mul.f r6.z, r6.z, r3.w cmps.f.ne r11.z, r11.z, c63.x -mov.f32f32 r12.x, r9.w -mov.f32f32 r12.y, r8.w -mul.f r7.x, r7.x, r3.z -sel.b32 r10.z, r10.z, r11.z, r7.w -sel.b32 r6.x, r6.x, r11.z, r5.w -sel.b32 r10.w, r10.w, r11.z, r5.x -sel.b32 r11.y, r11.y, r11.z, r8.x -sel.b32 r7.w, r10.z, r5.y, r7.w -sel.b32 r5.w, r6.x, r5.y, r5.w -sel.b32 r5.x, r10.w, r5.y, r5.x -sel.b32 r6.x, r11.y, r5.y, r8.x -mov.f32f32 r8.x, r7.w -mov.f32f32 r10.z, r5.w -mov.f32f32 r10.w, r5.x -mov.f32f32 r11.y, r6.x -mov.f32f32 r8.x, r8.x -mova a0.x, hr0.x -mov.f32f32 r10.z, r10.z -mov.f32f32 r10.w, r10.w -mov.f32f32 r11.y, r11.y -mad.f32 r12.x, r12.x, r1.w, r13.z -mov.f32f32 r12.y, r12.y -mov.f32f32 r12.z, r11.w -mov.f32f32 r13.z, c -mov.f32f32 r13.w, c -(ul)mov.f32f32 r14.x, c -mov.f32f32 r12.x, r12.x -mov.f32f32 r14.y, r13.z -mov.f32f32 r14.z, r13.z -mul.f r13.z, r13.z, r0.w -cov.f32s16 hr0.x, r9.y -mul.f r14.y, r14.y, r2.y -mul.f r14.z, r14.z, r1.z -mov.f32f32 r14.w, r13.w -shl.b hr0.x, hr0.x, 2 -mov.f32f32 r15.x, r14.x -mov.f32f32 r15.y, r10.y -mul.f r12.z, r12.z, r1.z -mova a0.x, hr0.x -mul.f r14.w, r14.w, r2.y -mul.f r2.y, r15.x, r2.y -mad.f32 r12.x, r15.y, r2.x, r12.x -mov.f32f32 r15.x, r13.y -mul.f r15.y, r13.w, r0.w -mul.f r15.z, r14.x, r0.w -mov.f32f32 r15.w, c -mov.f32f32 r16.x, c -(ul)mov.f32f32 r16.y, c -mov.f32f32 r12.x, r12.x -mov.f32f32 r16.z, r15.w -mov.f32f32 r16.w, r15.w -mad.f32 r13.z, r15.w, r1.x, r13.z -mov.f32f32 r15.w, r16.x -mad.f32 r14.y, r16.z, r2.z, r14.y -mad.f32 r14.z, r16.w, r1.w, r14.z -mov.f32f32 r13.z, r13.z -cov.f32s16 hr0.x, r9.y -mov.f32f32 r9.y, r14.y -mov.f32f32 r14.y, r14.z -mad.f32 r14.z, r15.w, r2.z, r14.w -shl.b hr0.x, hr0.x, 2 -mov.f32f32 r14.w, r16.y -mad.f32 r12.x, r12.x, r3.w, r12.w -mad.f32 r12.z, r15.x, r1.w, r12.z -mova a0.x, hr0.x -mov.f32f32 r12.w, r14.z -mad.f32 r2.y, r14.w, r2.z, r2.y -mov.f32f32 r2.z, r12.x -mov.f32f32 r12.x, r12.z -mov.f32f32 r12.z, r13.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r14.z, c -mov.f32f32 r14.w, c -(ul)mov.f32f32 r15.x, c -sel.b32 r2.z, r2.z, r11.z, r8.z -mov.f32f32 r15.w, r14.z -mov.f32f32 r16.z, r14.z -mad.f32 r13.z, r14.z, r1.y, r13.z -mov.f32f32 r14.z, r14.w -mad.f32 r9.y, r15.w, r2.w, r9.y -mad.f32 r14.y, r16.z, r2.x, r14.y -mov.f32f32 r13.z, r13.z -mov.f32f32 r7.x, r7.x -mov.f32f32 r9.y, r9.y -sel.b32 r6.z, r6.z, r5.y, r6.w -mov.f32f32 r6.w, r14.y -mov.f32f32 r14.y, r7.x -mul.f r9.x, r9.x, r0.w -mov.f32f32 r15.w, r6.z -mad.f32 r8.y, r8.y, r1.x, r9.x -mad.f32 r9.x, r14.z, r2.w, r12.w -mov.f32f32 r12.w, r15.x -mov.f32f32 r14.z, r15.w -mov.f32f32 r8.y, r8.y -mov.f32f32 r9.x, r9.x -mad.f32 r2.y, r12.w, r2.w, r2.y -mov.f32f32 r2.w, r14.z -mad.f32 r8.y, r10.x, r1.y, r8.y -sel.b32 r2.z, r2.z, r5.y, r8.z -mad.f32 r8.z, r12.z, r2.x, r12.x -mov.f32f32 r10.x, r2.w -mov.f32f32 r12.x, r2.w -mov.f32f32 r8.y, r8.y -mov.f32f32 r12.z, r2.w -mad.f32 r8.x, r9.y, r10.x, r8.x -mad.f32 r6.w, r6.w, r12.x, r10.z -mad.f32 r8.y, r8.y, r3.w, r14.y -mad.f32 r9.x, r9.x, r12.z, r10.w -mov.f32f32 r8.x, r8.x -mov.f32f32 r9.y, c62.x -mov.f32f32 r6.w, r6.w -mov.f32f32 r8.y, r8.y -mov.f32f32 r9.x, r9.x -cmps.f.ne r9.y, r9.y, c63.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r10.x, r2.w -sel.b32 r7.x, r8.y, r5.y, r7.x -sel.b32 r8.x, r8.x, r9.y, r7.w -mov.f32f32 r8.y, c64.x -sel.b32 r6.w, r6.w, r9.y, r5.w -mov.f32f32 r10.z, r7.x -sel.b32 r9.x, r9.x, r9.y, r5.x -cmps.f.lt r8.y, r8.y, c5.x -mad.f32 r10.z, r13.z, r2.w, r10.z -mad.f32 r2.y, r2.y, r10.x, r11.y -mov.f32f32 r10.x, r2.z -cov.u32f32 r8.y, r8.y -mov.f32f32 r10.z, r10.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r10.x, r10.x -cmps.f.ne r8.y, r8.y, c63.x -mov.f32f32 r10.w, r13.w -mov.f32f32 r8.z, r8.z -sel.b32 r2.y, r2.y, r9.y, r6.x -sel.b32 r7.w, r8.x, r8.y, r7.w -sel.b32 r5.w, r6.w, r8.y, r5.w -sel.b32 r6.w, r10.z, r8.y, r7.x -sel.b32 r5.x, r9.x, r8.y, r5.x -mov.f32f32 r7.x, r7.w -mov.f32f32 r8.x, r7.w -mov.f32f32 r9.x, r5.w -mov.f32f32 r10.z, r5.w -mov.f32f32 r11.y, r6.w -mul.f r7.x, r7.x, r8.x -mov.f32f32 r8.x, r5.x -mov.f32f32 r12.x, r5.x -mul.f r9.x, r9.x, r10.z -mul.f r10.z, r10.w, r1.z -mov.f32f32 r10.w, r16.x -mad.f32 r7.x, r8.x, r12.x, r7.x -mov.f32f32 r8.x, r6.w -mov.f32f32 r12.x, r6.w -mad.f32 r10.z, r10.w, r1.w, r10.z -mov.f32f32 r7.x, r7.x -sel.b32 r2.y, r2.y, r8.y, r6.x -mul.f r6.x, r11.y, r8.x -mov.f32f32 r8.x, r10.z -mad.f32 r10.z, r16.x, r1.x, r15.y -mov.f32f32 r10.w, r2.y -mov.f32f32 r11.y, r2.y -mov.f32f32 r12.z, r14.w -mov.f32f32 r10.z, r10.z -mov.f32f32 r12.w, r6.w -mad.f32 r7.x, r10.w, r11.y, r7.x -mad.f32 r8.x, r12.z, r2.x, r8.x -mad.f32 r10.z, r14.w, r1.y, r10.z -mul.f r10.w, r12.x, r12.w -mov.f32f32 r7.w, r7.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.y, r2.y -rsq r7.x, r7.x -(ss)mov.f32f32 r7.x, r7.x -mov.f32f32 r8.x, r8.x -mov.f32f32 r11.y, r2.w -mov.f32f32 r10.z, r10.z -mul.f r7.w, r7.w, r7.x -mov.f32f32 r7.y, r7.y -mad.f32 r8.x, r8.x, r11.y, r10.x -mul.f r5.x, r5.x, r7.x -mov.f32f32 r7.w, r7.w -mova a0.x, hr0.z -mov.f32f32 r8.x, r8.x -mul.f r7.y, r7.y, r3.z -mov.f32f32 r5.x, r5.x -mul.f r2.y, r2.y, r7.x -sel.b32 r7.x, r8.x, r9.y, r2.z -mov.f32f32 r7.y, r7.y -mov.f32f32 r8.x, c -mov.f32f32 r2.y, r2.y -sel.b32 r2.z, r7.x, r8.y, r2.z -mov.f32f32 r7.x, r7.y -mul.f r8.x, r8.x, r0.x -cov.f32s16 hr0.x, r4.w -mov.f32f32 r10.x, r2.z -mov.f32f32 r11.y, r2.z -mul.f r9.z, r9.z, r0.w -shl.b hr0.x, hr0.x, 2 -mad.f32 r9.z, r9.w, r1.x, r9.z -mad.f32 r9.x, r10.x, r11.y, r9.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r9.w, c -mov.f32f32 r10.x, c -(ul)mov.f32f32 r11.y, c +mov.f32f32 r11.w, (0.000000) +mov.f32f32 r12.x, (0.000000) +mov.f32f32 r12.y, (0.000000) +mov.f32f32 r12.z, (0.000000) +sel.b32 r7.z, r7.z, r11.z, r11.w mova a0.x, hr0.x -mov.f32f32 r9.x, r9.x -mad.f32 r8.z, r8.z, r3.w, r12.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r2.z, r2.z -mad.f32 r9.z, r10.y, r1.y, r9.z -mov.f32f32 r8.z, r8.z -mov.f32f32 r10.y, c -mov.f32f32 r12.x, r6.w -mov.f32f32 r9.z, r9.z -mad.f32 r12.y, r16.y, r1.x, r15.z -mad.f32 r8.x, r10.y, r0.y, r8.x -cov.f32s16 hr0.x, r4.w -sel.b32 r8.z, r8.z, r11.z, r8.w -mad.f32 r7.x, r9.z, r3.w, r7.x -mov.f32f32 r9.z, r12.y +sel.b32 r5.w, r5.w, r11.z, r12.x +sel.b32 r7.w, r7.w, r11.z, r12.y +sel.b32 r6.z, r6.z, r11.z, r12.z +mad.f32 r8.x, r9.z, r2.w, r8.x +mad.f32 r8.z, r11.y, r2.w, r8.z +mad.f32 r5.y, r5.y, r2.x, r7.x +mov.f32f32 r7.x, c +mul.f r8.x, r8.x, r3.w +mov.f32f32 r11.w, c +(ul)mov.f32f32 r12.x, c +mul.f r12.y, r7.x, r3.x +cov.f32s16 hr0.x, r6.y +mul.f r6.y, r7.x, r1.z +mul.f r7.x, r7.x, r2.y +mul.f r12.z, r11.w, r3.x shl.b hr0.x, hr0.x, 2 -sel.b32 r8.z, r8.z, r5.y, r8.w -mov.f32f32 r7.x, r7.x -mad.f32 r8.w, r15.x, r1.y, r9.z -mov.f32f32 r9.z, c -mov.f32f32 r10.y, c -(ul)mov.f32f32 r11.z, c +mul.f r3.x, r12.x, r3.x +mov.f32f32 r12.w, (0.000000) +mul.f r8.z, r8.z, r3.w mova a0.x, hr0.x -mov.f32f32 r12.y, r8.z -sel.b32 r7.x, r7.x, r5.y, r7.y -mov.f32f32 r7.y, r8.w -mad.f32 r7.z, r11.x, r1.y, r7.z -mov.f32f32 r6.w, r6.w -mov.f32f32 r8.w, r7.x -mov.f32f32 r11.x, c -mov.f32f32 r12.y, r12.y -mov.f32f32 r12.z, r14.x -mad.f32 r8.w, r10.z, r2.w, r8.w -mad.f32 r8.x, r11.x, r0.z, r8.x -cov.f32s16 hr0.x, r4.w -mul.f r1.z, r12.z, r1.z -mov.f32f32 r4.w, r16.y -mov.f32f32 r8.w, r8.w +mov.f32f32 r13.x, (0.000000) +mul.f r13.y, r11.w, r1.z +mul.f r1.z, r12.x, r1.z +sel.b32 r8.x, r8.x, r11.z, r12.w +sel.b32 r8.z, r8.z, r11.z, r13.x +mul.f r11.z, r11.w, r2.y +mov.f32f32 r11.w, c +mov.f32f32 r12.w, c +(ul)mov.f32f32 r13.x, c +mul.f r2.y, r12.x, r2.y +mad.f32 r12.x, r11.w, r3.y, r12.y +cov.f32s16 hr0.x, r6.x +mad.f32 r6.x, r11.w, r1.w, r6.y +mad.f32 r6.y, r11.w, r2.z, r7.x +mad.f32 r7.x, r12.w, r3.y, r12.z shl.b hr0.x, hr0.x, 2 -mov.f32f32 r7.z, r7.z -mul.f r9.w, r9.w, r0.x -mul.f r10.x, r10.x, r0.x -mov.f32f32 r10.z, c -mov.f32f32 r11.x, c -(ul)mov.f32f32 r12.z, c +mad.f32 r3.x, r13.x, r3.y, r3.x +mad.f32 r3.y, r12.w, r2.z, r11.z +mad.f32 r2.y, r13.x, r2.z, r2.y mova a0.x, hr0.x -mad.f32 r1.z, r4.w, r1.w, r1.z -sel.b32 r1.w, r8.w, r8.y, r7.x -mul.f r4.w, r7.z, r3.z -mad.f32 r7.x, r9.z, r0.y, r9.w -mad.f32 r7.z, r10.y, r0.y, r10.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r8.w, c -mov.f32f32 r9.z, r15.x -mov.f32f32 r9.w, r1.w -mov.f32f32 r10.x, r1.w -add.f r8.x, r8.x, r8.w -mov.f32f32 r8.w, c -mov.f32f32 r10.y, c -(ul)mov.f32f32 r12.w, c -mova a0.x, hr0.y -mad.f32 r1.z, r9.z, r2.x, r1.z -mad.f32 r2.x, r9.w, r10.x, r6.x -mov.f32f32 r6.x, r1.w -mov.f32f32 r9.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r9.w, c -mov.f32f32 r10.x, r2.w -mov.f32f32 r4.w, r4.w -mad.f32 r6.x, r6.x, r9.z, r10.w -mul.f r9.z, r9.w, r0.x -cov.f32s16 hr0.x, r4.z -mad.f32 r1.z, r1.z, r10.x, r12.y -mov.f32f32 r9.w, r4.w -mul.f r0.w, r11.w, r0.w -shl.b hr0.x, hr0.x, 2 -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, r13.y, r1.x, r0.w -mov.f32f32 r1.x, r6.x -mov.f32f32 r6.x, c -mov.f32f32 r10.x, c -(ul)mov.f32f32 r10.w, c +mad.f32 r2.z, r12.w, r1.w, r13.y +mad.f32 r1.z, r13.x, r1.w, r1.z +mul.f r1.w, r5.y, r3.w +mad.f32 r5.y, r9.z, r2.x, r7.y +mad.f32 r7.y, r11.y, r2.x, r8.y +shl.b hr0.x, hr1.x, 2 +mov.f32f32 r8.y, c +mov.f32f32 r9.z, c +(ul)mov.f32f32 r11.y, c +mov.f32f32 r11.z, r1.w +mad.f32 r11.w, r8.y, r3.z, r12.x +mad.f32 r6.x, r8.y, r2.x, r6.x +mad.f32 r6.y, r8.y, r2.w, r6.y +mad.f32 r7.x, r9.z, r3.z, r7.x +mad.f32 r8.y, r11.w, r4.x, r7.z +mov.f32f32 r11.w, c62.x +mad.f32 r1.w, r6.x, r4.x, r1.w +mad.f32 r6.x, r6.y, r4.x, r5.w +mad.f32 r6.y, r7.x, r4.x, r7.w +cmps.f.ne r7.x, r11.w, c63.x +sel.b32 r1.w, r1.w, r0.w, r11.z +mov.f32f32 r11.z, r6.w +mad.f32 r3.x, r11.y, r3.z, r3.x +sel.b32 r3.z, r8.y, r7.x, r7.z +sel.b32 r6.x, r6.x, r7.x, r5.w +mov.f32f32 r8.y, r11.z +sel.b32 r6.y, r6.y, r7.x, r7.w +sel.b32 r3.z, r3.z, r0.w, r7.z +sel.b32 r5.w, r6.x, r0.w, r5.w +mov.f32f32 r6.x, r8.y +sel.b32 r6.y, r6.y, r0.w, r7.w +mad.f32 r3.x, r3.x, r4.x, r6.z +mad.f32 r3.y, r9.z, r2.w, r3.y +mad.f32 r7.z, r9.w, r6.x, r3.z +mov.f32f32 r7.w, c62.x +mad.f32 r8.y, r10.x, r6.x, r1.w +mad.f32 r9.x, r9.x, r6.x, r5.w +mov.f32f32 r9.w, c64.x +cmps.f.ne r7.w, r7.w, c63.x +mad.f32 r10.x, r10.y, r6.x, r6.y +sel.b32 r3.x, r3.x, r7.x, r6.z +cmps.f.lt r9.w, r9.w, c5.x +sel.b32 r7.z, r7.z, r7.w, r3.z +sel.b32 r9.x, r9.x, r7.w, r5.w +sel.b32 r10.x, r10.x, r7.w, r6.y +cov.u32f32 r9.w, r9.w +sel.b32 r3.x, r3.x, r0.w, r6.z +mad.f32 r3.y, r3.y, r4.x, r8.x +mad.f32 r2.y, r11.y, r2.w, r2.y +cmps.f.ne r6.z, r9.w, c63.x +mad.f32 r2.w, r10.z, r6.x, r3.x +sel.b32 r3.y, r3.y, r7.x, r8.x +mad.f32 r2.y, r2.y, r4.x, r8.z +sel.b32 r3.z, r7.z, r6.z, r3.z +sel.b32 r1.w, r8.y, r6.z, r1.w +sel.b32 r5.w, r9.x, r6.z, r5.w +sel.b32 r6.y, r10.x, r6.z, r6.y +mul.f r7.z, r3.z, r3.z +mul.f r8.y, r1.w, r1.w +mul.f r9.x, r5.w, r5.w +mad.f32 r7.z, r6.y, r6.y, r7.z +sel.b32 r2.w, r2.w, r7.w, r3.x +mad.f32 r2.z, r9.z, r2.x, r2.z +sel.b32 r3.y, r3.y, r0.w, r8.x +mul.f r5.y, r5.y, r3.w +sel.b32 r2.w, r2.w, r6.z, r3.x +mad.f32 r2.z, r2.z, r4.x, r5.y +mad.f32 r3.x, r10.w, r6.x, r3.y +mov.f32f32 r5.y, r5.y +mad.f32 r7.z, r2.w, r2.w, r7.z +mul.f r8.x, r1.w, r1.w +sel.b32 r3.x, r3.x, r7.w, r3.y +sel.b32 r2.z, r2.z, r0.w, r5.y +sel.b32 r2.y, r2.y, r7.x, r8.z +mad.f32 r5.y, r9.y, r6.x, r2.z +mad.f32 r1.z, r11.y, r2.x, r1.z +rsq r2.x, r7.z +(ss)mov.f32f32 r7.x, r2.x +sel.b32 r3.x, r3.x, r6.z, r3.y +sel.b32 r2.z, r5.y, r6.z, r2.z +mul.f r2.x, r2.w, r2.x +mul.f r3.y, r3.z, r7.x +mova a0.x, hr0.w +mad.f32 r2.w, r2.z, r2.z, r8.y +mad.f32 r3.z, r3.x, r3.x, r9.x +mul.f r5.y, r7.y, r3.w +sel.b32 r2.y, r2.y, r0.w, r8.z +mad.f32 r1.z, r1.z, r4.x, r5.y +mad.f32 r7.y, r11.x, r6.x, r2.y +(ss)mov.f32f32 r7.z, c +mov.f32f32 r5.y, r5.y +mad.f32 r8.x, r2.z, r2.z, r8.x +sel.b32 r7.y, r7.y, r7.w, r2.y +mul.f r7.z, r7.z, r0.x +mov.f32f32 r7.w, c +mov.f32f32 r8.y, c +(ul)mov.f32f32 r8.z, c mova a0.x, hr0.x -sel.b32 r1.z, r1.z, r9.y, r8.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r9.y, r1.w -mad.f32 r0.w, r13.x, r1.y, r0.w -mov.f32f32 r1.y, r1.w -mad.f32 r1.w, r10.z, r0.z, r7.x -mov.f32f32 r7.x, c -sel.b32 r1.z, r1.z, r8.y, r8.z -mov.f32f32 r0.w, r0.w -add.f r1.w, r1.w, r8.w -mad.f32 r7.x, r7.x, r0.y, r9.z -cov.f32s16 hr0.x, r4.z -mov.f32f32 r8.z, r1.z -mov.f32f32 r8.w, r1.z -mad.f32 r0.w, r0.w, r3.w, r9.w -shl.b hr0.x, hr0.x, 2 -mov.f32f32 r1.z, r1.z -mul.f r6.x, r6.x, r0.x -mad.f32 r7.z, r11.x, r0.z, r7.z -mov.f32f32 r9.z, c -mov.f32f32 r9.w, c -(ul)mov.f32f32 r10.z, c +sel.b32 r1.z, r1.z, r0.w, r5.y +sel.b32 r2.y, r7.y, r6.z, r2.y +mad.f32 r5.y, r8.w, r6.x, r1.z +mul.f r6.y, r6.y, r7.x +mul.f r7.x, r7.w, r0.x +mul.f r7.y, r8.y, r0.x +mov.f32f32 r7.w, c +sel.b32 r1.z, r5.y, r6.z, r1.z +mad.f32 r3.z, r2.y, r2.y, r3.z +mov.f32f32 r5.y, c +mad.f32 r7.z, r7.w, r0.y, r7.z +shl.b hr0.x, hr1.z, 2 +mad.f32 r2.w, r1.z, r1.z, r2.w +mad.f32 r7.w, r1.z, r1.z, r8.x +mad.f32 r5.y, r5.y, r0.y, r7.x +mov.f32f32 r7.x, c +(ul)mov.f32f32 r8.x, c mova a0.x, hr0.x -mad.f32 r8.z, r8.z, r8.w, r9.x -mov.f32f32 r0.w, r0.w -mad.f32 r6.x, r9.z, r0.y, r6.x -add.f r7.z, r7.z, r10.y -mul.f r8.w, r10.x, r0.x -mul.f r9.x, r11.y, r0.x -mov.f32f32 r9.z, c -rsq r8.z, r8.z -(ss)mov.f32f32 r8.z, r8.z -sel.b32 r0.w, r0.w, r5.y, r4.w -mov.f32f32 r4.w, c -mad.f32 r7.x, r9.z, r0.z, r7.x -cov.f32s16 hr0.x, r4.z -mul.f r4.z, r5.w, r8.z -mov.f32f32 r5.w, r0.w -mul.f r2.z, r2.z, r8.z -shl.b hr0.x, hr0.x, 2 -mov.f32f32 r4.z, r4.z -mad.f32 r5.w, r7.y, r2.w, r5.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r7.y, c +rsq r3.z, r3.z +(ss)mov.f32f32 r8.y, r3.z +rsq r2.w, r2.w +(ss)mov.f32f32 r8.w, r2.w +rsq r7.w, r7.w +(ss)mov.f32f32 r9.x, r7.w +mul.f r2.y, r2.y, r3.z +(ss)mul.f r3.z, r1.z, r2.w +mul.f r1.z, r1.z, r7.w +mov.f32f32 r2.w, c +mul.f r7.w, r1.w, r8.w +mul.f r5.w, r5.w, r8.y +mul.f r1.w, r1.w, r9.x +mad.f32 r2.w, r2.w, r0.z, r7.z +mov.f32f32 r7.z, c +mov.f32f32 r9.y, c (ul)mov.f32f32 r9.z, c +mova a0.x, hr1.y +mul.f r3.x, r3.x, r8.y +mul.f r8.y, r2.z, r8.w +mul.f r2.z, r2.z, r9.x +mad.f32 r5.y, r7.z, r0.z, r5.y +mad.f32 r7.x, r7.x, r0.y, r7.y +mul.f r7.y, r8.z, r0.x +mov.f32f32 r7.z, c +mad.f32 r7.x, r9.y, r0.z, r7.x +mov.f32f32 r8.z, c +mov.f32f32 r8.w, c +add.f r7.z, r2.w, r7.z +(ul)mov.f32f32 r2.w, c +mova a0.x, hr0.y +add.f r5.y, r5.y, r8.z +add.f r7.x, r7.x, r8.w +mad.f32 r7.y, r8.x, r0.y, r7.y +shl.b hr0.x, hr0.z, 2 +cov.f32s16 hr0.y, r4.w +cov.f32s16 hr0.z, r4.w +mov.f32f32 r8.x, c +mov.f32f32 r8.z, c +mov.f32f32 r8.w, c +mad.f32 r7.y, r9.z, r0.z, r7.y +mul.f r8.x, r8.x, r0.x +shl.b hr0.z, hr0.z, 2 +mul.f r8.z, r8.z, r0.x +mul.f r8.w, r8.w, r0.x +add.f r7.y, r7.y, r2.w +(ul)mov.f32f32 r2.w, c +mova a0.x, hr0.z +shl.b hr0.y, hr0.y, 2 +cov.f32s16 hr0.z, r4.w +cov.f32s16 hr0.w, r5.x +cov.f32s16 hr1.x, r5.x +cov.f32s16 hr1.y, r5.x +shl.b hr0.z, hr0.z, 2 +mov.f32f32 r4.w, c +mov.f32f32 r5.x, c +mov.f32f32 r9.x, c +mul.f r2.w, r2.w, r0.x +mad.f32 r4.w, r4.w, r0.y, r8.x +(ul)mov.f32f32 r8.x, c +mova a0.x, hr0.z +mad.f32 r5.x, r5.x, r0.y, r8.z +mad.f32 r8.z, r9.x, r0.y, r8.w +mad.f32 r2.w, r8.x, r0.y, r2.w +shl.b hr0.z, hr0.w, 2 +shl.b hr0.w, hr1.x, 2 +shl.b hr1.x, hr1.y, 2 +mov.f32f32 r8.x, c +mov.f32f32 r8.w, c +mov.f32f32 r9.x, c +(ul)mov.f32f32 r9.y, c +mad.f32 r4.w, r8.x, r0.z, r4.w +mova a0.x, hr0.y +mad.f32 r5.x, r8.w, r0.z, r5.x +mad.f32 r8.x, r9.x, r0.z, r8.z +mad.f32 r2.w, r9.y, r0.z, r2.w +sel.b32 r8.z, r3.w, r0.w, r4.z +(rpt1)nop +mov.f32f32 r8.w, c +mov.f32f32 r9.x, c +mov.f32f32 r9.y, c +(ul)mov.f32f32 r9.z, c +add.f r4.w, r4.w, r8.w +add.f r5.x, r5.x, r9.x +add.f r8.x, r8.x, r9.y +add.f r2.w, r2.w, r9.z +mul.f r4.w, r4.w, r3.w mova a0.x, hr0.x -mov.f32f32 r5.w, r5.w -mul.f r1.z, r1.z, r8.z -mad.f32 r4.w, r4.w, r0.z, r6.x -mad.f32 r6.x, r9.w, r0.y, r8.w -mad.f32 r8.z, r11.z, r0.y, r9.x -mad.f32 r6.x, r7.y, r0.z, r6.x -mov.f32f32 r7.y, c -sel.b32 r0.w, r5.w, r8.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.w, c -add.f r7.x, r7.x, r7.y -mov.f32f32 r7.y, r0.w -mov.f32f32 r8.w, r0.w -mov.f32f32 r9.x, r0.w -mul.f r7.x, r7.x, r3.z -mov.f32f32 r9.w, r0.w -mad.f32 r2.x, r7.y, r8.w, r2.x -mov.f32f32 r7.y, r0.w -mov.f32f32 r8.w, r7.x -mul.f r6.y, r6.y, c63.y -mad.f32 r1.x, r9.x, r9.w, r1.x -mov.f32f32 r0.w, r0.w -add.f r4.w, r4.w, r5.w -cov.f32s16 hr0.x, r6.y -rsq r2.x, r2.x -(ss)mov.f32f32 r2.x, r2.x -mov.f32f32 r5.w, c -mul.f r4.w, r4.w, r3.z -shl.b hr0.x, hr0.x, 2 -mul.f r9.x, r12.x, r2.x -rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mul.f r9.y, r9.y, r2.x -(ul)mov.f32f32 r9.w, c -mova a0.x, hr0.x -mov.f32f32 r9.x, r9.x -mul.f r6.w, r6.w, r1.x -mov.f32f32 r9.y, r9.y -mul.f r2.x, r7.y, r2.x -mul.f r1.y, r1.y, r1.x -mul.f r0.w, r0.w, r1.x -mov.f32f32 r1.x, c -mov.f32f32 r6.w, r6.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.y, r1.y -mul.f r1.x, r1.x, r0.x -cov.f32s16 hr0.x, r6.y -mov.f32f32 r7.y, r0.w -mov.f32f32 r0.w, r4.w -add.f r5.w, r6.x, r5.w -shl.b hr0.x, hr0.x, 2 -mov.f32f32 r6.x, c -mad.f32 r8.z, r12.z, r0.z, r8.z -mul.f r5.w, r5.w, r3.z -mov.f32f32 r10.x, c -(ul)mov.f32f32 r10.y, c -mova a0.x, hr0.x -mul.f r6.x, r6.x, r0.x -mov.f32f32 r11.x, r5.w -mul.f r10.x, r10.x, r0.x -add.f r8.z, r8.z, r12.w -mul.f r10.w, r10.w, r0.x -mul.f r0.x, r10.y, r0.x -mov.f32f32 r10.y, c -mov.f32f32 r11.y, c -mov.f32f32 r11.z, c -mad.f32 r10.z, r10.z, r0.y, r10.w -mad.f32 r1.x, r10.y, r0.y, r1.x -cov.f32s16 hr0.x, r6.y -mad.f32 r6.x, r11.y, r0.y, r6.x -mad.f32 r10.x, r11.z, r0.y, r10.x -mad.f32 r9.z, r9.z, r0.z, r10.z -shl.b hr0.x, hr0.x, 2 +mul.f r5.x, r5.x, r3.w +mul.f r8.x, r8.x, r3.w +mul.f r3.w, r2.w, r3.w +cmps.f.lt r2.w, r5.z, c5.x +sel.b32 r4.y, r4.z, r0.w, r4.y +mov.f32f32 r4.z, c62.x +mov.f32f32 r5.z, c +mov.f32f32 r8.w, c +mov.f32f32 r9.x, c +(ul)mov.f32f32 r9.y, c +mul.f r5.z, r5.z, r0.x +mova a0.x, hr0.w +mul.f r8.w, r8.w, r0.x +mul.f r9.x, r9.x, r0.x +mul.f r0.x, r9.y, r0.x +cov.u32f32 r9.y, r2.w +cmps.f.ne r4.z, r4.z, c63.x +mov.f32f32 r2.w, (0.000000) +mov.f32f32 r9.z, c +mov.f32f32 r9.w, c +mov.f32f32 r10.x, c (ul)mov.f32f32 r10.y, c -cov.f32s16 hr0.y, r6.y -add.f r6.y, r9.z, r9.w -mova a0.x, hr0.x +mad.f32 r5.z, r9.z, r0.y, r5.z +mova a0.x, hr1.x +mad.f32 r8.w, r9.w, r0.y, r8.w +mad.f32 r9.x, r10.x, r0.y, r9.x mad.f32 r0.x, r10.y, r0.y, r0.x -shl.b hr0.x, hr0.y, 2 -mov.f32f32 r0.y, r3.z -mov.f32f32 r9.z, r4.y -mov.f32f32 r4.y, r4.y -cmps.f.lt r5.z, r5.z, c5.x -mov.f32f32 r9.w, c +cmps.f.ne r9.z, r9.y, c63.x +mov.f32f32 r9.w, (0.000000) +mov.f32f32 r10.x, (0.000000) +mov.f32f32 r0.y, c mov.f32f32 r10.y, c mov.f32f32 r10.z, c -mul.f r3.z, r6.y, r3.z -mad.f32 r1.x, r9.w, r0.z, r1.x -(ul)mov.f32f32 r6.y, c -mova a0.x, hr0.x -mad.f32 r6.x, r10.y, r0.z, r6.x -mad.f32 r9.w, r10.z, r0.z, r10.x -mov.f32f32 r10.x, r3.z -mad.f32 r0.x, r6.y, r0.z, r0.x -sel.b32 r6.y, r0.y, r5.y, r9.z -cov.u32f32 r5.z, r5.z -mov.f32f32 r0.y, c -mov.f32f32 r0.z, c -mov.f32f32 r9.z, c -(ul)mov.f32f32 r10.y, c -add.f r0.y, r1.x, r0.y -add.f r0.z, r6.x, r0.z -add.f r1.x, r9.w, r9.z -add.f r0.x, r0.x, r10.y -mad.f32 r0.y, r0.y, r3.w, r8.w -mad.f32 r0.z, r0.z, r3.w, r0.w -mad.f32 r0.w, r1.x, r3.w, r11.x -mad.f32 r0.x, r0.x, r3.w, r10.x -sel.b32 r0.y, r0.y, r5.y, r7.x -sel.b32 r0.z, r0.z, r5.y, r4.w -sel.b32 r0.w, r0.w, r5.y, r5.w -sel.b32 r0.x, r0.x, r5.y, r3.z -mov.f32f32 r1.x, r0.y -mov.f32f32 r3.z, r0.z -mad.f32 r1.x, r8.x, r2.w, r1.x -mad.f32 r1.w, r1.w, r2.w, r3.z -mov.f32f32 r3.z, r0.w -mov.f32f32 r3.w, r0.x -sel.b32 r0.y, r1.x, r8.y, r0.y -sel.b32 r0.z, r1.w, r8.y, r0.z -mad.f32 r1.x, r7.z, r2.w, r3.z -mad.f32 r1.w, r8.z, r2.w, r3.w -add.f r2.w, c4.x, (neg)r0.y -mul.f r3.z, c0.w, r0.y -mul.f r3.w, c0.z, r0.y -mul.f r4.w, c0.y, r0.y -mul.f r5.w, r2.w, r2.w -add.f r6.x, c4.y, (neg)r0.z -mad.f32 r3.z, c1.w, r0.z, r3.z -mad.f32 r3.w, c1.z, r0.z, r3.w -mad.f32 r4.w, c1.y, r0.z, r4.w -mad.f32 r5.w, r6.x, r6.x, r5.w -sel.b32 r0.w, r1.x, r8.y, r0.w -mul.f r0.y, c0.x, r0.y -sel.b32 r0.x, r1.w, r8.y, r0.x -mov.f32f32 r1.x, r5.w -add.f r1.w, c4.z, (neg)r0.w -mad.f32 r3.z, c2.w, r0.w, r3.z -mad.f32 r3.w, c2.z, r0.w, r3.w -mad.f32 r4.w, c2.y, r0.w, r4.w -mad.f32 r1.x, r1.w, r1.w, r1.x -mad.f32 r3.z, c3.w, r0.x, r3.z -mad.f32 r3.w, c3.z, r0.x, r3.w -mad.f32 r4.w, c3.y, r0.x, r4.w -mad.f32 r0.y, c1.x, r0.z, r0.y -cmps.f.ne r5.w, r5.z, c63.x +(ul)mov.f32f32 r10.w, c +mad.f32 r0.y, r0.y, r0.z, r5.z +mova a0.x, hr0.z +mad.f32 r5.z, r10.y, r0.z, r8.w +mad.f32 r8.w, r10.z, r0.z, r9.x +mad.f32 r0.x, r10.w, r0.z, r0.x +sel.b32 r2.w, r8.z, r9.z, r2.w +mov.f32f32 r8.z, (0.000000) mov.f32f32 r0.z, (0.000000) -rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r7.x, (0.000000) -mov.f32f32 r7.z, (0.000000) -mad.f32 r8.x, c2.x, r0.w, r0.y -mul.f r0.y, r2.w, r1.x -mul.f r0.w, r6.x, r1.x -mul.f r1.x, r1.w, r1.x -sel.b32 r1.w, r3.z, r5.w, r7.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r1.w -mov.f32f32 r1.w, r0.y -mov.f32f32 r3.z, r0.y -mov.f32f32 r6.x, r0.y -mov.f32f32 r0.y, r0.y -mul.f r1.w, r7.w, r1.w -mov.f32f32 r7.x, r2.w -mul.f r3.z, r4.z, r3.z -mov.f32f32 r4.z, r2.w -mul.f r6.x, r9.x, r6.x -mad.f32 r1.w, r5.x, r7.x, r1.w -mov.f32f32 r5.x, r2.w -mad.f32 r2.z, r2.z, r4.z, r3.z -mul.f r0.y, r6.w, r0.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.z, r1.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.z, r1.x -mad.f32 r5.x, r9.y, r5.x, r6.x -mad.f32 r1.w, r2.y, r3.z, r1.w -mov.f32f32 r2.y, r2.w -mov.f32f32 r2.w, r1.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r1.w -mad.f32 r1.z, r1.z, r4.z, r2.z -mov.f32f32 r2.z, r5.x -mad.f32 r0.y, r1.y, r2.y, r0.y -mov.f32f32 r1.y, r1.w -mov.f32f32 r1.w, c62.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mad.f32 r2.x, r2.x, r2.w, r2.z -cmps.f.ne r2.z, r1.w, c63.x -mad.f32 r0.y, r7.y, r1.x, r0.y -sel.b32 r0.z, r3.w, r5.w, r0.z -sel.b32 r1.x, r4.w, r5.w, r7.z -sel.b32 r1.y, r1.y, r2.z, r6.z -mov.f32f32 r1.w, (0.000000) -sel.b32 r1.z, r1.z, r2.z, r0.y -mov.f32f32 r2.w, (0.000000) -mov.f32f32 r2.x, r2.x -sel.b32 r1.y, r1.y, r5.w, r1.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r1.x -mad.f32 r0.x, c3.x, r0.x, r8.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r2.y, r1.y -sel.b32 r1.y, r1.z, r5.w, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.x -mov.f32f32 r1.x, r4.x -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r0.x, r5.w, r5.z -(rpt1)nop -mov.f32f32 r1.z, r1.y -sel.b32 r1.x, r4.y, r5.y, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, (0.000000) -mov.f32f32 r3.y, r3.y -sel.b32 r1.x, r2.x, r2.z, r1.x -mov.f32f32 r2.x, (0.000000) -sel.b32 r1.y, r6.y, r5.w, r1.y -(rpt1)nop -sel.b32 r1.x, r1.x, r5.w, r2.x -mov.f32f32 r2.w, r1.y -mov.f32f32 r1.y, r3.y -mov.f32f32 r2.x, r3.x -mov.f32f32 r3.x, r1.x -mov.f32f32 r2.z, r1.x -(rpt1)nop -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r2.x +mov.f32f32 r9.x, c +mov.f32f32 r10.y, c +mov.f32f32 r10.z, c +(ul)mov.f32f32 r10.w, c +add.f r0.y, r0.y, r9.x +add.f r5.z, r5.z, r10.y +add.f r8.w, r8.w, r10.z +add.f r0.x, r0.x, r10.w +mad.f32 r0.y, r0.y, r4.x, r4.w +mad.f32 r5.z, r5.z, r4.x, r5.x +mad.f32 r8.w, r8.w, r4.x, r8.x +mad.f32 r0.x, r0.x, r4.x, r3.w +sel.b32 r0.y, r0.y, r0.w, r4.w +sel.b32 r4.x, r5.z, r0.w, r5.x +mad.f32 r4.w, r7.z, r6.x, r0.y +mad.f32 r5.x, r5.y, r6.x, r4.x +sel.b32 r5.y, r8.w, r0.w, r8.x +sel.b32 r0.x, r0.x, r0.w, r3.w +sel.b32 r0.y, r4.w, r6.z, r0.y +sel.b32 r0.w, r5.x, r6.z, r4.x +mad.f32 r3.w, r7.x, r6.x, r5.y +mad.f32 r4.x, r7.y, r6.w, r0.x +add.f r4.w, c4.x, (neg)r0.y +mul.f r5.x, c0.w, r0.y +mul.f r5.z, c0.z, r0.y +mul.f r6.x, c0.y, r0.y +mul.f r7.x, r4.w, r4.w +add.f r7.y, c4.y, (neg)r0.w +mad.f32 r5.x, c1.w, r0.w, r5.x +mad.f32 r5.z, c1.z, r0.w, r5.z +mad.f32 r6.x, c1.y, r0.w, r6.x +mad.f32 r7.x, r7.y, r7.y, r7.x +sel.b32 r3.w, r3.w, r6.z, r5.y +mul.f r0.y, c0.x, r0.y +sel.b32 r0.x, r4.x, r6.z, r0.x +mov.f32f32 r4.x, (0.000000) +add.f r5.y, c4.z, (neg)r3.w +mad.f32 r5.x, c2.w, r3.w, r5.x +mad.f32 r5.z, c2.z, r3.w, r5.z +mad.f32 r6.x, c2.y, r3.w, r6.x +mad.f32 r6.z, r5.y, r5.y, r7.x +mad.f32 r5.x, c3.w, r0.x, r5.x +mad.f32 r5.z, c3.z, r0.x, r5.z +mad.f32 r6.x, c3.y, r0.x, r6.x +mad.f32 r7.x, c1.x, r0.w, r0.y +mov.f32f32 r0.y, (0.000000) +nop +rsq r0.w, r6.z +(ss)mov.f32f32 r6.z, r0.w +mul.f r5.y, r5.y, r0.w +sel.b32 r0.w, r5.x, r9.z, r0.z +sel.b32 r0.z, r5.z, r9.z, r4.x +mul.f r4.x, r4.w, r6.z +mul.f r4.w, r7.y, r6.z +mov.f32f32 r5.x, r5.y +sel.b32 r0.y, r6.x, r9.z, r0.y +mov.f32f32 r5.z, r4.x +mul.f r4.x, r5.w, r4.x +mov.f32f32 r5.w, r4.w +mad.f32 r3.w, c2.x, r3.w, r7.x +mul.f r3.y, r3.y, r5.z +mul.f r6.x, r7.w, r5.z +mad.f32 r3.y, r6.y, r5.w, r3.y +mad.f32 r6.x, r8.y, r5.w, r6.x +mad.f32 r2.x, r2.x, r5.x, r3.y +mad.f32 r3.y, r3.z, r5.x, r6.x +mad.f32 r3.x, r3.x, r4.w, r4.x +mul.f r1.w, r1.w, r5.z +sel.b32 r2.x, r2.x, r4.z, r6.w +sel.b32 r3.y, r3.y, r4.z, r4.y +mad.f32 r3.x, r2.y, r5.y, r3.x +mad.f32 r2.y, r2.z, r5.w, r1.w +sel.b32 r1.w, r2.x, r9.z, r10.x +sel.b32 r2.x, r3.y, r9.z, r9.w +mad.f32 r1.z, r1.z, r5.x, r2.y +mad.f32 r0.x, c3.x, r0.x, r3.w +mov.f32f32 r2.y, r1.w +mov.f32f32 r2.z, r2.x +sel.b32 r1.z, r3.x, r4.z, r1.z +sel.b32 r0.x, r0.x, r9.z, r9.y (rpt1)nop -mov.f32f32 r2.x, r3.x +sel.b32 r1.z, r1.z, r9.z, r8.z end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=7,il=20,b=0) r3.x (0:0,cm=3,il=24,b=0) r3.z (0:0,cm=f,il=28,b=0) r4.z (0:0,cm=7,il=32,b=0) -; VERT: 779 instructions, 1 half, 17 full +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.z (0:0,cm=7,il=12,b=0) r2.y (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=7,il=20,b=0) r1.x (0:0,cm=3,il=24,b=0) r3.w (0:0,cm=f,il=28,b=0) r4.w (0:0,cm=7,il=32,b=0) +; VERT: 499 instructions, 2 half, 14 full diff --git a/reference/builtin2.asm b/reference/builtin2.asm index 0731536..2949ecf 100644 --- a/reference/builtin2.asm +++ b/reference/builtin2.asm @@ -6,15 +6,16 @@ @out(hr1.x) out1 @out(hr1.y) out2 @out(hr1.z) out3 -(sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +(sy)(ss)bary.f r1.w, 0, r0.x +bary.f (ei)r2.x, 1, r0.x (rpt5)nop -sam (f16)(xyzw)hr0.w, r0.y, s#0, t#0 +sam (f16)(xyzw)hr0.w, r1.w, s#0, t#0 end +nop +nop +nop ; FRAG: outputs: r0.w (1:0) ; FRAG: inputs: r0.x (19:0,cm=f,il=8,b=1) -; FRAG: 14 instructions, 2 half, 1 full +; FRAG: 10 instructions, 2 half, 3 full diff --git a/reference/bump/bump-12.asm b/reference/bump/bump-12.asm index 0f8742e..9112ce0 100644 --- a/reference/bump/bump-12.asm +++ b/reference/bump/bump-12.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 +@in(r3.w) in0 +@in(r4.x) in1 +@in(r4.y) in2 @in(r1.x) in4 @in(r1.y) in5 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r3.w) in12 -@in(r4.x) in13 -@in(r4.y) in14 +@in(r0.x) in8 +@in(r0.y) in9 +@in(r0.z) in10 +@in(r4.z) in12 +@in(r4.w) in13 +@in(r5.x) in14 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,151 +27,109 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r0.w, r2.x, r4.x -mul.f r2.y, c4.x, r1.z -mad.f32 r0.w, r1.w, r4.y, (neg)r0.w -mad.f32 r2.y, c5.x, r1.w, r2.y -mul.f r2.z, c4.x, r3.w -mad.f32 r2.y, c6.x, r2.x, r2.y -mov.f32f32 r0.w, r0.w -mad.f32 r2.z, c5.x, r4.x, r2.z -mul.f r2.w, c4.y, r3.w -mul.f r3.x, c4.z, r3.w -mul.f r3.y, c4.x, r0.w -mul.f r3.z, r1.z, r4.y -add.f r2.y, r2.y, c7.x -mad.f32 r3.z, r2.x, r3.w, (neg)r3.z -mad.f32 r2.z, c6.x, r4.y, r2.z -mul.f r4.z, c4.y, r0.w -mul.f r0.w, c4.z, r0.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r4.w, r2.y -add.f r2.y, r2.z, c7.x -mad.f32 r2.z, c5.y, r4.x, r2.w -mad.f32 r2.w, c5.x, r3.z, r3.y -mul.f r3.y, r1.w, r3.w -mul.f r5.x, r4.w, r4.w -mad.f32 r3.y, r1.z, r4.x, (neg)r3.y -mul.f r5.y, c4.y, r1.z -mov.f32f32 r2.y, r2.y -mad.f32 r5.y, c5.y, r1.w, r5.y -mov.f32f32 r3.y, r3.y -mad.f32 r5.y, c6.y, r2.x, r5.y -mul.f r5.z, r2.y, r2.y -mad.f32 r2.z, c6.y, r4.y, r2.z -mad.f32 r2.w, c6.x, r3.y, r2.w -add.f r5.y, r5.y, c7.y -mad.f32 r4.z, c5.y, r3.z, r4.z -mad.f32 r0.w, c5.z, r3.z, r0.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r5.y, r5.y -add.f r2.z, r2.z, c7.y -mad.f32 r3.z, c6.y, r3.y, r4.z -add.f r2.w, r2.w, c7.x -mad.f32 r4.z, r5.y, r5.y, r5.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.z, r4.z -mad.f32 r5.x, r2.z, r2.z, r5.z -mul.f r1.z, c4.z, r1.z -mul.f r5.z, r2.w, r2.w -add.f r3.z, r3.z, c7.y -mad.f32 r1.z, c5.z, r1.w, r1.z -mov.f32f32 r1.w, r5.x -mad.f32 r3.x, c5.z, r4.x, r3.x -mov.f32f32 r3.z, r3.z -mad.f32 r1.z, c6.z, r2.x, r1.z -mad.f32 r2.x, c6.z, r4.y, r3.x -mad.f32 r0.w, c6.z, r3.y, r0.w -mad.f32 r3.x, r3.z, r3.z, r5.z -add.f r1.z, r1.z, c7.z -add.f r2.x, r2.x, c7.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -add.f r0.w, r0.w, c7.z -mul.f r3.y, c0.w, r0.x -mul.f r5.x, c0.z, r0.x -mul.f r5.z, c0.y, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r1.z, r1.z, r4.z -mad.f32 r1.w, r2.x, r2.x, r1.w -mad.f32 r3.y, c1.w, r0.y, r3.y -mad.f32 r3.x, r0.w, r0.w, r3.x -mad.f32 r3.y, c2.w, r0.z, r3.y -mad.f32 r5.x, c1.z, r0.y, r5.x -mad.f32 r5.z, c1.y, r0.y, r5.z -mul.f r0.x, c0.x, r0.x -mul.f r3.w, c4.w, r3.w -add.f r5.w, r3.y, c3.w -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -rsq r3.y, r4.z -nop -rsq r1.w, r1.w -(ss)mov.f32f32 r4.z, r3.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r0.w, r3.x -mul.f r3.y, r3.z, r3.x -mul.f r2.w, r2.w, r3.x -mul.f r1.z, r1.z, r4.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.x, r3.y -mov.f32f32 r2.w, r2.w -nop -mov.f32f32 r3.z, r0.w -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r2.w -mov.f32f32 r0.w, r1.z -mul.f r1.z, r2.x, r1.w -mul.f r2.x, r2.z, r1.w -mul.f r1.w, r2.y, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.x -mov.f32f32 r1.w, r1.w +@const(c8.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c9.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r0.z, r4.w +mul.f r1.z, c4.x, r4.z +mad.f32 r0.w, r0.y, r5.x, (neg)r0.w +mad.f32 r1.z, c5.x, r4.w, r1.z +mul.f r1.w, c4.x, r0.x +mad.f32 r1.z, c6.x, r5.x, r1.z mov.f32f32 r2.x, r0.w +mad.f32 r1.w, c5.x, r0.y, r1.w +mul.f r0.w, c4.x, r0.w +add.f r1.z, r1.z, c7.x +mul.f r2.y, c4.y, r2.x +mul.f r2.z, r0.x, r5.x +mad.f32 r1.w, c6.x, r0.z, r1.w +mad.f32 r2.z, r0.z, r4.z, (neg)r2.z mov.f32f32 r2.w, r1.z -mov.f32f32 r2.z, r2.y -mov.f32f32 r2.y, r1.w -mul.f r1.z, r5.y, r4.z -mul.f r1.w, r4.w, r4.z -mov.f32f32 r0.w, r5.w -mad.f32 r4.z, c2.z, r0.z, r5.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r4.w, c2.y, r0.z, r5.z -mad.f32 r0.x, c1.x, r0.y, r0.x -mov.f32f32 r0.y, r1.z -mov.f32f32 r1.z, r1.w -(rpt1)nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r1.z -add.f r0.y, r4.z, c3.z -add.f r4.z, r4.w, c3.y -mad.f32 r0.x, c2.x, r0.z, r0.x -mad.f32 r3.w, c5.w, r4.x, r3.w -mov.f32f32 r0.z, r0.y -mov.f32f32 r0.y, r4.z +mul.f r2.x, c4.z, r2.x +add.f r1.w, r1.w, c7.x +mov.f32f32 r3.x, r2.z +mul.f r2.w, r2.w, r2.w +mul.f r3.y, c4.y, r4.z +mov.f32f32 r3.z, r1.w +mad.f32 r2.y, c5.y, r3.x, r2.y +mul.f r5.y, r0.y, r4.z +mad.f32 r3.y, c5.y, r4.w, r3.y +mad.f32 r5.y, r0.x, r4.w, (neg)r5.y +mad.f32 r3.y, c6.y, r5.x, r3.y +mul.f r3.z, r3.z, r3.z +mul.f r5.z, c4.y, r0.x +mov.f32f32 r5.w, r5.y +add.f r3.y, r3.y, c7.y +mad.f32 r5.z, c5.y, r0.y, r5.z +mad.f32 r0.w, c5.x, r2.z, r0.w +mad.f32 r2.y, c6.y, r5.w, r2.y +mov.f32f32 r2.z, r3.y +mad.f32 r5.z, c6.y, r0.z, r5.z +mad.f32 r0.w, c6.x, r5.y, r0.w +add.f r2.y, r2.y, c7.y +mad.f32 r2.w, r3.y, r2.z, r2.w +mul.f r3.y, c4.z, r4.z +add.f r5.y, r5.z, c7.y +mov.f32f32 r5.z, r2.y +add.f r0.w, r0.w, c7.x +mad.f32 r3.y, c5.z, r4.w, r3.y +mov.f32f32 r6.x, r5.y +mad.f32 r3.y, c6.z, r5.x, r3.y +mov.f32f32 r6.y, r0.w +mad.f32 r2.x, c5.z, r3.x, r2.x +mad.f32 r5.y, r5.y, r6.x, r3.z +mad.f32 r2.x, c6.z, r5.w, r2.x +mul.f r3.x, r6.y, r6.y +add.f r3.y, r3.y, c7.z +mad.f32 r2.y, r2.y, r5.z, r3.x +add.f r2.x, r2.x, c7.z +mul.f r0.x, c4.z, r0.x +mov.f32f32 r5.w, r3.y +mad.f32 r0.x, c5.z, r0.y, r0.x +mov.f32f32 r0.y, r2.x +mad.f32 r0.x, c6.z, r0.z, r0.x +mad.f32 r0.z, r3.y, r5.w, r2.w +mul.f r2.w, c0.w, r3.w +mad.f32 r2.x, r2.x, r0.y, r2.y +add.f r0.x, r0.x, c7.z +mad.f32 r6.y, c1.w, r4.x, r2.w +mul.f r6.z, c0.z, r3.w +mul.f r6.w, c0.y, r3.w +mul.f r3.w, c0.x, r3.w +mul.f r4.z, c4.w, r4.z +rsq r2.x, r2.x +(ss)mov.f32f32 r2.y, r2.x +mul.f r3.x, r0.w, r2.x +rsq r0.z, r0.z +(ss)mov.f32f32 r0.w, r0.z +(ss)mov.f32f32 r2.x, r0.x +mul.f r3.z, r0.y, r2.y +mul.f r3.y, r5.z, r2.y +mul.f r2.w, r5.w, r0.w +mul.f r2.z, r2.z, r0.w +mad.f32 r0.x, r0.x, r2.x, r5.y +mul.f r2.y, r1.z, r0.z +mad.f32 r0.y, c2.w, r4.y, r6.y +mad.f32 r0.z, c1.z, r4.x, r6.z +mad.f32 r5.y, c1.y, r4.x, r6.w +mad.f32 r3.w, c1.x, r4.x, r3.w +mad.f32 r4.x, c5.w, r4.w, r4.z +rsq r0.x, r0.x +(ss)mov.f32f32 r4.z, r0.x +mul.f r1.z, r1.w, r0.x +add.f r0.w, r0.y, c3.w +(ss)mad.f32 r0.x, c2.z, r4.y, r0.z +mul.f r2.x, r2.x, r4.z +mul.f r1.w, r6.x, r4.z +mad.f32 r0.y, c2.y, r4.y, r5.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r4.y, r3.w +mad.f32 r3.w, c6.w, r5.x, r4.x +add.f r0.y, r0.y, c3.y +nop add.f r0.x, r0.x, c3.x -mad.f32 r3.w, c6.w, r4.y, r3.w -(rpt1)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -(rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x end nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) -; VERT: 146 instructions, 0 half, 6 full +; VERT: inputs: r3.w (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r4.z (0:0,cm=7,il=20,b=0) +; VERT: 99 instructions, 0 half, 7 full diff --git a/reference/bump/bump-13.asm b/reference/bump/bump-13.asm index a946e9f..3ae555a 100644 --- a/reference/bump/bump-13.asm +++ b/reference/bump/bump-13.asm @@ -6,143 +6,100 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3a83126f, 0x00000000, 0x41500000, 0x42c80000 +@const(c1.x) 0x3f4ccccd, 0x3f800000, 0x3f2aacda, 0x3eaaa64c +@const(c2.x) 0x3e23d70a, 0x3f800000, 0x3ed0ff97, 0x3f510625 +@const(c3.x) 0x3dcccccd, 0x3f800000, 0x00000000, 0x00000000 +@const(c4.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f r0.w, 1, r0.x -bary.f r1.x, 2, r0.x -bary.f r1.y, 3, r0.x +bary.f r1.x, 8, r0.x +bary.f r1.y, 5, r0.x add.f r1.z, r0.z, c0.x -add.f r1.w, r0.z, c0.y -add.f r2.x, r0.w, c0.x -add.f r2.y, r0.w, c0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r2.w, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -bary.f r2.x, 4, r0.x -sam (f32)(x)r3.x, r1.z, s#0, t#0 -(ss)mov.f32f32 r1.z, r0.z -sam (f32)(x)r3.y, r2.z, s#0, t#0 -mov.f32f32 r1.w, r0.w -bary.f r0.z, 8, r0.x -bary.f r0.w, 5, r0.x -bary.f r2.y, 9, r0.x -mov.f32f32 r2.x, r2.x -(ss)bary.f r2.z, 6, r0.x -bary.f r2.w, 10, r0.x -sam (f32)(x)r3.z, r1.z, s#0, t#0 -(sy)(ss)add.f r1.z, r3.y, (neg)r3.z -add.f r1.w, r3.x, (neg)r3.z -bary.f (ei)r0.x, 7, r0.x -nop -mov.f32f32 r0.y, r1.z -mov.f32f32 r1.z, r1.w -(rpt1)nop -mul.f r0.y, c0.z, r0.y +add.f r1.w, r0.w, c0.y +add.f r2.x, r0.z, c0.y +add.f r2.y, r0.w, c0.x +sam (f32)(x)r2.z, r0.z, s#0, t#0 +(ss)bary.f r0.z, 2, r0.x +bary.f r0.w, 9, r0.x +bary.f r2.w, 6, r0.x +bary.f r3.x, 3, r0.x +sam (f32)(x)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, r3.y, (neg)r2.z +bary.f r1.w, 10, r0.x +bary.f r3.y, 7, r0.x +sam (f32)(x)r3.z, r2.x, s#0, t#0 +(sy)(ss)add.f r2.x, r3.z, (neg)r2.z mul.f r1.z, c0.z, r1.z +bary.f (ei)r0.x, 4, r0.x (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -(rpt1)nop -mul.f r0.w, r0.y, r0.w -mul.f r1.w, r0.y, r2.z -mul.f r0.x, r0.y, r0.x -mul.f r0.y, r1.z, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r1.x, (neg)r0.z -add.f r0.w, r1.y, (neg)r0.w -add.f r0.x, r2.x, (neg)r0.x -mul.f r1.x, r1.z, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.x -add.f r0.y, r0.z, (neg)r0.y -mul.f r0.z, r1.z, r2.w -(rpt1)nop -mov.f32f32 r0.y, r0.y -add.f r0.w, r0.w, (neg)r1.x -mov.f32f32 r0.z, r0.z +mov.f32f32 r0.y, r1.z +mul.f r1.z, r1.z, r3.y +mul.f r2.x, c0.z, r2.x +nop +mul.f r1.y, r0.y, r1.y +mul.f r0.y, r0.y, r2.w +add.f r0.x, r0.x, (neg)r1.z +mul.f r1.z, r2.x, r1.w +add.f r0.z, r0.z, (neg)r1.y +mov.f32f32 r1.y, r2.x +add.f r0.y, r3.x, (neg)r0.y +add.f r0.x, r0.x, (neg)r1.z +nop +mul.f r1.x, r1.y, r1.x +mul.f r0.w, r1.y, r0.w +mov.f32f32 r1.y, r0.x nop -mul.f r1.x, r0.y, r0.y -mov.f32f32 r0.w, r0.w -add.f r0.x, r0.x, (neg)r0.z +add.f r0.z, r0.z, (neg)r1.x +add.f r0.y, r0.y, (neg)r0.w (rpt1)nop -mad.f32 r0.z, r0.w, r0.w, r1.x -mov.f32f32 r0.x, r0.x +mov.f32f32 r0.w, r0.z +mov.f32f32 r1.x, r0.y (rpt1)nop -mov.f32f32 r0.z, r0.z +mul.f r0.z, r0.z, r0.w nop -mad.f32 r0.z, r0.x, r0.x, r0.z -(rpt5)nop -rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -(rpt2)nop -mul.f r0.y, r0.y, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.x, r0.x, r0.z +mad.f32 r0.y, r0.y, r1.x, r0.z nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x +mad.f32 r0.y, r1.y, r1.y, r0.y +(rpt5)nop +rsq r0.y, r0.y +(ss)mov.f32f32 r0.z, r0.y +mul.f r0.x, r0.x, r0.y +(rpt1)nop +(ss)mul.f r0.y, r0.w, r0.z +mul.f r0.z, r1.x, r0.z +mov.f32f32 r0.w, r0.x nop -mul.f r0.w, r0.y, c2.z +mov.f32f32 r1.x, r0.y mul.f r0.y, r0.y, c1.z -mad.f32 r0.w, c2.z, r0.z, r0.w +mov.f32f32 r1.y, r0.z +nop +mul.f r1.x, r1.x, c2.z mad.f32 r0.y, c1.z, r0.z, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c2.w, r0.x, r0.z +mad.f32 r0.z, c2.z, r1.y, r1.x mad.f32 r0.x, c1.w, r0.x, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.z -max.f r0.x, r0.x, c0.y -(rpt1)nop +mad.f32 r0.y, c2.w, r0.w, r0.z +(rpt2)nop max.f r0.y, r0.y, c0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -(rpt5)nop +max.f r0.x, r0.x, c0.y +(rpt4)nop log2 r0.y, r0.y (ss)mul.f r0.y, c0.w, r0.y -(rpt2)nop -mov.f32f32 r0.y, r0.y -(rpt5)nop +mov.f32f32 r0.z, r0.x +(rpt4)nop exp2 r0.y, r0.y -(ss)mad.f32 r0.z, c2.y, r0.y, c3.y -mad.f32 r0.w, c2.x, r0.y, c3.x +(ss)mad.f32 r0.w, c2.y, r0.y, c3.y mad.f32 r1.x, c2.x, r0.y, c3.x +mad.f32 r1.w, c1.y, r0.z, r0.w +mad.f32 r1.z, c1.x, r0.z, r1.x +mad.f32 r0.w, c2.x, r0.y, c3.x (ss)mad.f32 r0.y, c2.x, r0.y, c3.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c1.y, r0.x, r0.z -mad.f32 r0.w, c1.x, r0.x, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w -mad.f32 r0.z, c1.x, r0.x, r1.x -mad.f32 r0.x, c1.x, r0.x, r0.y -(rpt1)nop -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mad.f32 r1.y, c1.x, r0.z, r0.w +mad.f32 r1.x, c1.x, r0.x, r0.y end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r63.y (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) -; FRAG: 166 instructions, 0 half, 4 full +; FRAG: inputs: r2.y (5:20,cm=f,il=8,b=1) r2.z (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) +; FRAG: 105 instructions, 0 half, 4 full diff --git a/reference/bump1.asm b/reference/bump1.asm index a946e9f..3ae555a 100644 --- a/reference/bump1.asm +++ b/reference/bump1.asm @@ -6,143 +6,100 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3a83126f, 0x00000000, 0x41500000, 0x42c80000 +@const(c1.x) 0x3f4ccccd, 0x3f800000, 0x3f2aacda, 0x3eaaa64c +@const(c2.x) 0x3e23d70a, 0x3f800000, 0x3ed0ff97, 0x3f510625 +@const(c3.x) 0x3dcccccd, 0x3f800000, 0x00000000, 0x00000000 +@const(c4.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f r0.w, 1, r0.x -bary.f r1.x, 2, r0.x -bary.f r1.y, 3, r0.x +bary.f r1.x, 8, r0.x +bary.f r1.y, 5, r0.x add.f r1.z, r0.z, c0.x -add.f r1.w, r0.z, c0.y -add.f r2.x, r0.w, c0.x -add.f r2.y, r0.w, c0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r2.w, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -bary.f r2.x, 4, r0.x -sam (f32)(x)r3.x, r1.z, s#0, t#0 -(ss)mov.f32f32 r1.z, r0.z -sam (f32)(x)r3.y, r2.z, s#0, t#0 -mov.f32f32 r1.w, r0.w -bary.f r0.z, 8, r0.x -bary.f r0.w, 5, r0.x -bary.f r2.y, 9, r0.x -mov.f32f32 r2.x, r2.x -(ss)bary.f r2.z, 6, r0.x -bary.f r2.w, 10, r0.x -sam (f32)(x)r3.z, r1.z, s#0, t#0 -(sy)(ss)add.f r1.z, r3.y, (neg)r3.z -add.f r1.w, r3.x, (neg)r3.z -bary.f (ei)r0.x, 7, r0.x -nop -mov.f32f32 r0.y, r1.z -mov.f32f32 r1.z, r1.w -(rpt1)nop -mul.f r0.y, c0.z, r0.y +add.f r1.w, r0.w, c0.y +add.f r2.x, r0.z, c0.y +add.f r2.y, r0.w, c0.x +sam (f32)(x)r2.z, r0.z, s#0, t#0 +(ss)bary.f r0.z, 2, r0.x +bary.f r0.w, 9, r0.x +bary.f r2.w, 6, r0.x +bary.f r3.x, 3, r0.x +sam (f32)(x)r3.y, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, r3.y, (neg)r2.z +bary.f r1.w, 10, r0.x +bary.f r3.y, 7, r0.x +sam (f32)(x)r3.z, r2.x, s#0, t#0 +(sy)(ss)add.f r2.x, r3.z, (neg)r2.z mul.f r1.z, c0.z, r1.z +bary.f (ei)r0.x, 4, r0.x (rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r1.z -(rpt1)nop -mul.f r0.w, r0.y, r0.w -mul.f r1.w, r0.y, r2.z -mul.f r0.x, r0.y, r0.x -mul.f r0.y, r1.z, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r1.x, (neg)r0.z -add.f r0.w, r1.y, (neg)r0.w -add.f r0.x, r2.x, (neg)r0.x -mul.f r1.x, r1.z, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.x -add.f r0.y, r0.z, (neg)r0.y -mul.f r0.z, r1.z, r2.w -(rpt1)nop -mov.f32f32 r0.y, r0.y -add.f r0.w, r0.w, (neg)r1.x -mov.f32f32 r0.z, r0.z +mov.f32f32 r0.y, r1.z +mul.f r1.z, r1.z, r3.y +mul.f r2.x, c0.z, r2.x +nop +mul.f r1.y, r0.y, r1.y +mul.f r0.y, r0.y, r2.w +add.f r0.x, r0.x, (neg)r1.z +mul.f r1.z, r2.x, r1.w +add.f r0.z, r0.z, (neg)r1.y +mov.f32f32 r1.y, r2.x +add.f r0.y, r3.x, (neg)r0.y +add.f r0.x, r0.x, (neg)r1.z +nop +mul.f r1.x, r1.y, r1.x +mul.f r0.w, r1.y, r0.w +mov.f32f32 r1.y, r0.x nop -mul.f r1.x, r0.y, r0.y -mov.f32f32 r0.w, r0.w -add.f r0.x, r0.x, (neg)r0.z +add.f r0.z, r0.z, (neg)r1.x +add.f r0.y, r0.y, (neg)r0.w (rpt1)nop -mad.f32 r0.z, r0.w, r0.w, r1.x -mov.f32f32 r0.x, r0.x +mov.f32f32 r0.w, r0.z +mov.f32f32 r1.x, r0.y (rpt1)nop -mov.f32f32 r0.z, r0.z +mul.f r0.z, r0.z, r0.w nop -mad.f32 r0.z, r0.x, r0.x, r0.z -(rpt5)nop -rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -(rpt2)nop -mul.f r0.y, r0.y, r0.z -mul.f r0.w, r0.w, r0.z -mul.f r0.x, r0.x, r0.z +mad.f32 r0.y, r0.y, r1.x, r0.z nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x +mad.f32 r0.y, r1.y, r1.y, r0.y +(rpt5)nop +rsq r0.y, r0.y +(ss)mov.f32f32 r0.z, r0.y +mul.f r0.x, r0.x, r0.y +(rpt1)nop +(ss)mul.f r0.y, r0.w, r0.z +mul.f r0.z, r1.x, r0.z +mov.f32f32 r0.w, r0.x nop -mul.f r0.w, r0.y, c2.z +mov.f32f32 r1.x, r0.y mul.f r0.y, r0.y, c1.z -mad.f32 r0.w, c2.z, r0.z, r0.w +mov.f32f32 r1.y, r0.z +nop +mul.f r1.x, r1.x, c2.z mad.f32 r0.y, c1.z, r0.z, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c2.w, r0.x, r0.z +mad.f32 r0.z, c2.z, r1.y, r1.x mad.f32 r0.x, c1.w, r0.x, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.z -max.f r0.x, r0.x, c0.y -(rpt1)nop +mad.f32 r0.y, c2.w, r0.w, r0.z +(rpt2)nop max.f r0.y, r0.y, c0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -(rpt5)nop +max.f r0.x, r0.x, c0.y +(rpt4)nop log2 r0.y, r0.y (ss)mul.f r0.y, c0.w, r0.y -(rpt2)nop -mov.f32f32 r0.y, r0.y -(rpt5)nop +mov.f32f32 r0.z, r0.x +(rpt4)nop exp2 r0.y, r0.y -(ss)mad.f32 r0.z, c2.y, r0.y, c3.y -mad.f32 r0.w, c2.x, r0.y, c3.x +(ss)mad.f32 r0.w, c2.y, r0.y, c3.y mad.f32 r1.x, c2.x, r0.y, c3.x +mad.f32 r1.w, c1.y, r0.z, r0.w +mad.f32 r1.z, c1.x, r0.z, r1.x +mad.f32 r0.w, c2.x, r0.y, c3.x (ss)mad.f32 r0.y, c2.x, r0.y, c3.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c1.y, r0.x, r0.z -mad.f32 r0.w, c1.x, r0.x, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w -mad.f32 r0.z, c1.x, r0.x, r1.x -mad.f32 r0.x, c1.x, r0.x, r0.y -(rpt1)nop -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mad.f32 r1.y, c1.x, r0.z, r0.w +mad.f32 r1.x, c1.x, r0.x, r0.y end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r63.y (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) -; FRAG: 166 instructions, 0 half, 4 full +; FRAG: inputs: r2.y (5:20,cm=f,il=8,b=1) r2.z (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) +; FRAG: 105 instructions, 0 half, 4 full diff --git a/reference/bump2.asm b/reference/bump2.asm index 0f8742e..9112ce0 100644 --- a/reference/bump2.asm +++ b/reference/bump2.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 +@in(r3.w) in0 +@in(r4.x) in1 +@in(r4.y) in2 @in(r1.x) in4 @in(r1.y) in5 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r3.w) in12 -@in(r4.x) in13 -@in(r4.y) in14 +@in(r0.x) in8 +@in(r0.y) in9 +@in(r0.z) in10 +@in(r4.z) in12 +@in(r4.w) in13 +@in(r5.x) in14 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,151 +27,109 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r0.w, r2.x, r4.x -mul.f r2.y, c4.x, r1.z -mad.f32 r0.w, r1.w, r4.y, (neg)r0.w -mad.f32 r2.y, c5.x, r1.w, r2.y -mul.f r2.z, c4.x, r3.w -mad.f32 r2.y, c6.x, r2.x, r2.y -mov.f32f32 r0.w, r0.w -mad.f32 r2.z, c5.x, r4.x, r2.z -mul.f r2.w, c4.y, r3.w -mul.f r3.x, c4.z, r3.w -mul.f r3.y, c4.x, r0.w -mul.f r3.z, r1.z, r4.y -add.f r2.y, r2.y, c7.x -mad.f32 r3.z, r2.x, r3.w, (neg)r3.z -mad.f32 r2.z, c6.x, r4.y, r2.z -mul.f r4.z, c4.y, r0.w -mul.f r0.w, c4.z, r0.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r4.w, r2.y -add.f r2.y, r2.z, c7.x -mad.f32 r2.z, c5.y, r4.x, r2.w -mad.f32 r2.w, c5.x, r3.z, r3.y -mul.f r3.y, r1.w, r3.w -mul.f r5.x, r4.w, r4.w -mad.f32 r3.y, r1.z, r4.x, (neg)r3.y -mul.f r5.y, c4.y, r1.z -mov.f32f32 r2.y, r2.y -mad.f32 r5.y, c5.y, r1.w, r5.y -mov.f32f32 r3.y, r3.y -mad.f32 r5.y, c6.y, r2.x, r5.y -mul.f r5.z, r2.y, r2.y -mad.f32 r2.z, c6.y, r4.y, r2.z -mad.f32 r2.w, c6.x, r3.y, r2.w -add.f r5.y, r5.y, c7.y -mad.f32 r4.z, c5.y, r3.z, r4.z -mad.f32 r0.w, c5.z, r3.z, r0.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r5.y, r5.y -add.f r2.z, r2.z, c7.y -mad.f32 r3.z, c6.y, r3.y, r4.z -add.f r2.w, r2.w, c7.x -mad.f32 r4.z, r5.y, r5.y, r5.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.z, r4.z -mad.f32 r5.x, r2.z, r2.z, r5.z -mul.f r1.z, c4.z, r1.z -mul.f r5.z, r2.w, r2.w -add.f r3.z, r3.z, c7.y -mad.f32 r1.z, c5.z, r1.w, r1.z -mov.f32f32 r1.w, r5.x -mad.f32 r3.x, c5.z, r4.x, r3.x -mov.f32f32 r3.z, r3.z -mad.f32 r1.z, c6.z, r2.x, r1.z -mad.f32 r2.x, c6.z, r4.y, r3.x -mad.f32 r0.w, c6.z, r3.y, r0.w -mad.f32 r3.x, r3.z, r3.z, r5.z -add.f r1.z, r1.z, c7.z -add.f r2.x, r2.x, c7.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -add.f r0.w, r0.w, c7.z -mul.f r3.y, c0.w, r0.x -mul.f r5.x, c0.z, r0.x -mul.f r5.z, c0.y, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r1.z, r1.z, r4.z -mad.f32 r1.w, r2.x, r2.x, r1.w -mad.f32 r3.y, c1.w, r0.y, r3.y -mad.f32 r3.x, r0.w, r0.w, r3.x -mad.f32 r3.y, c2.w, r0.z, r3.y -mad.f32 r5.x, c1.z, r0.y, r5.x -mad.f32 r5.z, c1.y, r0.y, r5.z -mul.f r0.x, c0.x, r0.x -mul.f r3.w, c4.w, r3.w -add.f r5.w, r3.y, c3.w -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -rsq r3.y, r4.z -nop -rsq r1.w, r1.w -(ss)mov.f32f32 r4.z, r3.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r0.w, r3.x -mul.f r3.y, r3.z, r3.x -mul.f r2.w, r2.w, r3.x -mul.f r1.z, r1.z, r4.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.x, r3.y -mov.f32f32 r2.w, r2.w -nop -mov.f32f32 r3.z, r0.w -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r2.w -mov.f32f32 r0.w, r1.z -mul.f r1.z, r2.x, r1.w -mul.f r2.x, r2.z, r1.w -mul.f r1.w, r2.y, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.x -mov.f32f32 r1.w, r1.w +@const(c8.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c9.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r0.z, r4.w +mul.f r1.z, c4.x, r4.z +mad.f32 r0.w, r0.y, r5.x, (neg)r0.w +mad.f32 r1.z, c5.x, r4.w, r1.z +mul.f r1.w, c4.x, r0.x +mad.f32 r1.z, c6.x, r5.x, r1.z mov.f32f32 r2.x, r0.w +mad.f32 r1.w, c5.x, r0.y, r1.w +mul.f r0.w, c4.x, r0.w +add.f r1.z, r1.z, c7.x +mul.f r2.y, c4.y, r2.x +mul.f r2.z, r0.x, r5.x +mad.f32 r1.w, c6.x, r0.z, r1.w +mad.f32 r2.z, r0.z, r4.z, (neg)r2.z mov.f32f32 r2.w, r1.z -mov.f32f32 r2.z, r2.y -mov.f32f32 r2.y, r1.w -mul.f r1.z, r5.y, r4.z -mul.f r1.w, r4.w, r4.z -mov.f32f32 r0.w, r5.w -mad.f32 r4.z, c2.z, r0.z, r5.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mad.f32 r4.w, c2.y, r0.z, r5.z -mad.f32 r0.x, c1.x, r0.y, r0.x -mov.f32f32 r0.y, r1.z -mov.f32f32 r1.z, r1.w -(rpt1)nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r1.z -add.f r0.y, r4.z, c3.z -add.f r4.z, r4.w, c3.y -mad.f32 r0.x, c2.x, r0.z, r0.x -mad.f32 r3.w, c5.w, r4.x, r3.w -mov.f32f32 r0.z, r0.y -mov.f32f32 r0.y, r4.z +mul.f r2.x, c4.z, r2.x +add.f r1.w, r1.w, c7.x +mov.f32f32 r3.x, r2.z +mul.f r2.w, r2.w, r2.w +mul.f r3.y, c4.y, r4.z +mov.f32f32 r3.z, r1.w +mad.f32 r2.y, c5.y, r3.x, r2.y +mul.f r5.y, r0.y, r4.z +mad.f32 r3.y, c5.y, r4.w, r3.y +mad.f32 r5.y, r0.x, r4.w, (neg)r5.y +mad.f32 r3.y, c6.y, r5.x, r3.y +mul.f r3.z, r3.z, r3.z +mul.f r5.z, c4.y, r0.x +mov.f32f32 r5.w, r5.y +add.f r3.y, r3.y, c7.y +mad.f32 r5.z, c5.y, r0.y, r5.z +mad.f32 r0.w, c5.x, r2.z, r0.w +mad.f32 r2.y, c6.y, r5.w, r2.y +mov.f32f32 r2.z, r3.y +mad.f32 r5.z, c6.y, r0.z, r5.z +mad.f32 r0.w, c6.x, r5.y, r0.w +add.f r2.y, r2.y, c7.y +mad.f32 r2.w, r3.y, r2.z, r2.w +mul.f r3.y, c4.z, r4.z +add.f r5.y, r5.z, c7.y +mov.f32f32 r5.z, r2.y +add.f r0.w, r0.w, c7.x +mad.f32 r3.y, c5.z, r4.w, r3.y +mov.f32f32 r6.x, r5.y +mad.f32 r3.y, c6.z, r5.x, r3.y +mov.f32f32 r6.y, r0.w +mad.f32 r2.x, c5.z, r3.x, r2.x +mad.f32 r5.y, r5.y, r6.x, r3.z +mad.f32 r2.x, c6.z, r5.w, r2.x +mul.f r3.x, r6.y, r6.y +add.f r3.y, r3.y, c7.z +mad.f32 r2.y, r2.y, r5.z, r3.x +add.f r2.x, r2.x, c7.z +mul.f r0.x, c4.z, r0.x +mov.f32f32 r5.w, r3.y +mad.f32 r0.x, c5.z, r0.y, r0.x +mov.f32f32 r0.y, r2.x +mad.f32 r0.x, c6.z, r0.z, r0.x +mad.f32 r0.z, r3.y, r5.w, r2.w +mul.f r2.w, c0.w, r3.w +mad.f32 r2.x, r2.x, r0.y, r2.y +add.f r0.x, r0.x, c7.z +mad.f32 r6.y, c1.w, r4.x, r2.w +mul.f r6.z, c0.z, r3.w +mul.f r6.w, c0.y, r3.w +mul.f r3.w, c0.x, r3.w +mul.f r4.z, c4.w, r4.z +rsq r2.x, r2.x +(ss)mov.f32f32 r2.y, r2.x +mul.f r3.x, r0.w, r2.x +rsq r0.z, r0.z +(ss)mov.f32f32 r0.w, r0.z +(ss)mov.f32f32 r2.x, r0.x +mul.f r3.z, r0.y, r2.y +mul.f r3.y, r5.z, r2.y +mul.f r2.w, r5.w, r0.w +mul.f r2.z, r2.z, r0.w +mad.f32 r0.x, r0.x, r2.x, r5.y +mul.f r2.y, r1.z, r0.z +mad.f32 r0.y, c2.w, r4.y, r6.y +mad.f32 r0.z, c1.z, r4.x, r6.z +mad.f32 r5.y, c1.y, r4.x, r6.w +mad.f32 r3.w, c1.x, r4.x, r3.w +mad.f32 r4.x, c5.w, r4.w, r4.z +rsq r0.x, r0.x +(ss)mov.f32f32 r4.z, r0.x +mul.f r1.z, r1.w, r0.x +add.f r0.w, r0.y, c3.w +(ss)mad.f32 r0.x, c2.z, r4.y, r0.z +mul.f r2.x, r2.x, r4.z +mul.f r1.w, r6.x, r4.z +mad.f32 r0.y, c2.y, r4.y, r5.y +add.f r0.z, r0.x, c3.z +mad.f32 r0.x, c2.x, r4.y, r3.w +mad.f32 r3.w, c6.w, r5.x, r4.x +add.f r0.y, r0.y, c3.y +nop add.f r0.x, r0.x, c3.x -mad.f32 r3.w, c6.w, r4.y, r3.w -(rpt1)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -(rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x end nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) -; VERT: 146 instructions, 0 half, 6 full +; VERT: inputs: r3.w (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r4.z (0:0,cm=7,il=20,b=0) +; VERT: 99 instructions, 0 half, 7 full diff --git a/reference/chrome/bad.asm b/reference/chrome/bad.asm index 386d1c8..9cccfa5 100644 --- a/reference/chrome/bad.asm +++ b/reference/chrome/bad.asm @@ -1,12 +1,12 @@ ; options: ; VERT: new compiler -@in(r0.x) in0 -@in(r0.y) in1 -@in(r0.z) in2 -@in(r0.w) in3 -@in(r1.x) in4 -@in(r1.y) in5 -@in(r1.z) in8 +@in(r2.x) in0 +@in(r2.y) in1 +@in(r2.z) in2 +@in(r2.w) in3 +@in(r0.x) in4 +@in(r0.y) in5 +@in(r0.z) in8 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -15,107 +15,76 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mul.f r1.z, r1.z, c40.x +@const(c40.x) 0x3e800000, 0x00000000, 0x40e00000, 0x40800000 +(sy)(ss)mul.f r0.z, r0.z, c40.x (rpt2)nop -trunc.f r1.z, r1.z -(rpt2)nop -mov.f32f32 r1.z, r1.z +trunc.f r0.z, r0.z (rpt2)nop -max.f r1.w, r1.z, c40.y -max.f r2.x, r1.z, c40.y -max.f r2.y, r1.z, c40.y -max.f r2.z, r1.z, c40.y -min.f r1.w, r1.w, c40.z -mov.f32f32 r2.x, r2.x -min.f r2.y, r2.y, c40.z -min.f r2.z, r2.z, c40.z -mov.f32f32 r1.w, r1.w -min.f r2.x, r2.x, c40.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -trunc.f r1.w, r1.w -mov.f32f32 r2.x, r2.x -trunc.f r2.y, r2.y -trunc.f r2.z, r2.z -mov.f32f32 r1.w, r1.w -trunc.f r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mul.f r1.w, r1.w, c40.w -mov.f32f32 r2.x, r2.x -mul.f r2.y, r2.y, c40.w -mul.f r2.z, r2.z, c40.w -mov.f32f32 r1.w, r1.w -cov.f32s16 hr0.x, r2.x -mov.f32f32 r2.x, r2.y -mov.f32f32 r2.y, r2.z -cov.f32s16 hr0.y, r1.w +mov.f32f32 r0.w, r0.z +max.f r0.z, r0.z, c40.y +(rpt1)nop +max.f r1.x, r0.w, c40.y +max.f r1.y, r0.w, c40.y +max.f r1.z, r0.w, c40.y +max.f r0.w, r0.w, c40.y +min.f r1.x, r1.x, c40.z +min.f r1.y, r1.y, c40.z +min.f r1.z, r1.z, c40.z +min.f r0.w, r0.w, c40.z +trunc.f r1.x, r1.x +trunc.f r1.y, r1.y +trunc.f r1.z, r1.z +trunc.f r0.w, r0.w +mul.f r1.x, r1.x, c40.w +cov.f32s16 hr0.x, r1.y +mul.f r1.y, r1.z, c40.w +mul.f r0.w, r0.w, c40.w +cov.f32s16 hr0.y, r1.x shl.b hr0.x, hr0.x, 2 -cov.f32s16 hr0.z, r2.x -cov.f32s16 hr0.w, r2.y +cov.f32s16 hr0.z, r1.y +cov.f32s16 hr0.w, r0.w shl.b hr0.y, hr0.y, 2 mova a0.x, hr0.x shl.b hr0.x, hr0.z, 2 shl.b hr0.z, hr0.w, 2 -max.f r2.x, r1.z, c40.y +min.f r0.z, r0.z, c40.z (rpt2)nop -mov.f32f32 r1.z, c +mov.f32f32 r0.w, c mov.f32f32 r1.w, c -mov.f32f32 r2.y, c -(ul)mov.f32f32 r2.z, c +mov.f32f32 r1.x, c +(ul)mov.f32f32 r1.z, c mova a0.x, hr0.y -mad.f32 r1.y, r1.y, r1.w, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r2.z -mad.f32 r1.x, r1.x, r2.z, r2.y -min.f r2.x, r2.x, c40.z -nop -mul.f r2.y, c, r0.x -mul.f r2.z, c, r0.x -mul.f r2.w, c, r0.x -(ul)mul.f r0.x, c, r0.x +mad.f32 r1.y, r0.y, r1.w, r0.w +trunc.f r0.y, r0.z +(rpt3)nop +mul.f r0.z, c, r2.x +mul.f r0.w, c, r2.x +mul.f r3.x, c, r2.x +(ul)mul.f r2.x, c, r2.x mova a0.x, hr0.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.x, r2.x -(rpt2)nop -mad.f32 r2.y, c, r0.y, r2.y -mad.f32 r2.z, c, r0.y, r2.z -mad.f32 r2.w, c, r0.y, r2.w -(ul)mad.f32 r0.x, c, r0.y, r0.x +mad.f32 r1.x, r0.x, r1.z, r1.x +mul.f r0.x, r0.y, c40.w +(rpt3)nop +mad.f32 r0.y, c, r2.y, r0.z +mad.f32 r0.z, c, r2.y, r0.w +mad.f32 r0.w, c, r2.y, r3.x +(ul)mad.f32 r2.x, c, r2.y, r2.x mova a0.x, hr0.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -trunc.f r0.y, r2.x -(rpt2)nop -mad.f32 r2.x, c, r0.z, r2.y -mov.f32f32 r0.y, r0.y -mad.f32 r2.y, c, r0.z, r2.z -mad.f32 r2.z, c, r0.z, r2.w -(ul)mad.f32 r0.x, c, r0.z, r0.x -mul.f r0.y, r0.y, c40.w -(rpt2)nop -mov.f32f32 r0.y, r0.y -(rpt2)nop -cov.f32s16 hr0.x, r0.y -(rpt2)nop +cov.f32s16 hr0.x, r0.x +(rpt4)nop +mad.f32 r0.x, c, r2.z, r0.y shl.b hr0.x, hr0.x, 2 -(rpt2)nop +mad.f32 r0.y, c, r2.z, r0.z +mad.f32 r2.y, c, r2.z, r0.w +(ul)mad.f32 r2.x, c, r2.z, r2.x mova a0.x, hr0.x (rpt5)nop -mad.f32 r0.y, c, r0.w, r2.x -mad.f32 r0.z, c, r0.w, r2.y -mad.f32 r2.x, c, r0.w, r2.z -(ul)mad.f32 r0.x, c, r0.w, r0.x -mov.f32f32 r0.w, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r2.x -mov.f32f32 r0.x, r0.x +mad.f32 r0.w, c, r2.w, r0.x +mad.f32 r0.z, c, r2.w, r0.y +mad.f32 r0.y, c, r2.w, r2.y +(ul)mad.f32 r0.x, c, r2.w, r2.x end -nop -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) -; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r1.z (0:0,cm=1,il=16,b=0) -; VERT: 122 instructions, 1 half, 3 full +; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=3,il=12,b=0) r0.z (0:0,cm=1,il=16,b=0) +; VERT: 90 instructions, 1 half, 4 full diff --git a/reference/crazy-frag-conflict.asm b/reference/crazy-frag-conflict.asm index 7544805..aeb5150 100644 --- a/reference/crazy-frag-conflict.asm +++ b/reference/crazy-frag-conflict.asm @@ -6,39 +6,32 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r2.x, r0.z -mul.f r0.z, r0.w, r1.y -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.z, r0.w -bary.f r0.w, 1, r0.x +bary.f r2.x, 1, r0.x +mov.f32f32 r1.w, r1.x bary.f (ei)r0.x, 0, r0.x nop -mov.f32f32 r1.w, r0.z -(rpt1)nop -sam.p (f32)(xyz)r2.x, r2.x, s#0, t#0 -(sy)(ss)mov.f32f32 r2.z, r2.x -mov.f32f32 r2.w, r2.y -mov.f32f32 r3.x, r2.x -(rpt5)nop -sam.p (f32)(xyz)r2.x, r2.z, s#0, t#0 -(sy)mul.f r0.y, r2.z, r1.z -mul.f r0.z, r2.y, r0.w -mul.f r0.x, r2.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +sam.p (f32)(xyz)r0.y, r0.z, s#0, t#0 +(sy)mov.f32f32 r2.y, r0.y +mov.f32f32 r2.z, r0.z +mov.f32f32 r2.w, r0.y +mul.f r1.w, r1.w, r1.y +(rpt4)nop +(ss)nop +sam.p (f32)(xyz)r0.y, r2.y, s#0, t#0 +(sy)mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +mul.f r1.x, r0.y, r0.x end nop +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 37 instructions, 0 half, 4 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 25 instructions, 0 half, 3 full diff --git a/reference/crazy-frag.asm b/reference/crazy-frag.asm index 81c6d71..5ef0248 100644 --- a/reference/crazy-frag.asm +++ b/reference/crazy-frag.asm @@ -6,39 +6,32 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r2.x, r0.z -mul.f r0.z, r0.w, r1.y -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.z, r0.w -bary.f r0.w, 1, r0.x +bary.f r2.x, 1, r0.x +mov.f32f32 r1.w, r1.x bary.f (ei)r0.x, 0, r0.x nop -mov.f32f32 r1.w, r0.z -(rpt1)nop -sam.p (f32)(xyz)r2.x, r2.x, s#0, t#0 -(sy)mov.f32f32 r2.w, r2.x -mov.f32f32 r3.x, r2.y -mov.f32f32 r3.y, r2.z -(rpt5)nop +sam.p (f32)(xyz)r0.y, r0.z, s#0, t#0 +(sy)mov.f32f32 r2.y, r0.y +mov.f32f32 r2.z, r0.z +mov.f32f32 r2.w, r0.w +mul.f r1.w, r1.w, r1.y +(rpt4)nop (ss)nop -sam.p (f32)(xyz)r2.x, r2.w, s#0, t#0 -(sy)mul.f r0.y, r2.z, r1.z -mul.f r0.z, r2.y, r0.w -mul.f r0.x, r2.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +sam.p (f32)(xyz)r0.y, r2.y, s#0, t#0 +(sy)mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +mul.f r1.x, r0.y, r0.x end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 38 instructions, 0 half, 4 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 25 instructions, 0 half, 3 full diff --git a/reference/dd.asm b/reference/dd.asm index 3008314..332831f 100644 --- a/reference/dd.asm +++ b/reference/dd.asm @@ -6,263 +6,196 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c4.x) 0x3f800000, 0x00000000, 0x3e4ccccd, 0x3f1013a9 +@const(c5.x) 0x3f400d1b, 0xbf800000, 0x3ccccccd, 0x3d4ccccd +@const(c6.x) 0x3fb8aa65, 0x40000000, 0x3f800000, 0xc39044fe +@const(c7.x) 0xbe2ab368, 0x41200000, 0x00000000, 0x00000000 +@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)absneg.f r0.z, (neg)c0.y -bary.f r0.w, 3, r0.x -bary.f r1.x, 0, r0.x +bary.f r0.w, 5, r0.x bary.f r1.y, 4, r0.x +bary.f r1.x, 3, r0.x mul.f r1.z, r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r1.x -bary.f r2.y, 1, r0.x +mov.f32f32 r1.w, r0.w +mov.f32f32 r2.x, r1.y +mov.f32f32 r2.y, r1.x add.f r1.z, c4.x, (neg)r1.z -mov.f32f32 r2.z, r0.w -mov.f32f32 r3.x, r1.y -mov.f32f32 r2.x, r2.y -mov.f32f32 r1.z, r1.z -mul.f r3.y, c3.x, r0.w -mov.f32f32 r2.w, r3.x -mul.f r3.w, c3.x, r1.x -mul.f r1.x, r1.z, c4.w -mul.f r3.z, c3.x, r3.x -dsx (f32)(xy)r4.y, r1.w -(sy)mul.f r1.z, r4.y, r4.y -mul.f r4.x, c3.x, r2.y -mov.f32f32 r1.x, r1.x -(ss)nop -dsx (f32)(xy)r1.w, r2.z -(sy)mul.f r1.w, r1.w, r1.w -mad.f32 r1.z, r4.z, r4.z, r1.z -mad.f32 r1.w, r2.x, r2.x, r1.w -add.f r1.x, c4.x, (neg)r1.x -dsy (f32)(xy)r2.x, r3.y -(sy)mul.f r2.x, r2.x, r2.x -(ss)nop -dsy (f32)(xy)r3.y, r3.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -bary.f r2.z, 5, r0.x -bary.f (ei)r0.x, 2, r0.x -mad.f32 r0.y, r2.y, r2.y, r2.x -mov.f32f32 r2.x, r1.x -mov.f32f32 r2.y, r2.z -cmps.f.lt r1.x, r1.x, c4.y -mov.f32f32 r2.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.y -cov.u32f32 r1.x, r1.x -sqrt r2.x, r2.x -(ss)mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r0.z -mul.f r4.x, c3.x, r2.y -(sy)mul.f r3.y, r3.y, r3.y -dsx (f32)(x)r4.y, r2.z -cmps.f.ne r1.x, r1.x, c4.y -mad.f32 r2.x, c5.x, r3.w, r2.x -dsx (f32)(x)r4.z, r2.w -(sy)mad.f32 r1.w, r4.z, r4.z, r1.w -mad.f32 r1.z, r4.y, r4.y, r1.z -(ss)mad.f32 r2.z, r3.z, r3.z, r3.y -mov.f32f32 r2.x, r2.x -dsy (f32)(x)r3.y, r4.x -(sy)mad.f32 r0.y, r3.y, r3.y, r0.y +mul.f r2.z, c3.x, r1.w +absneg.f r1.w, (neg)r1.w +absneg.f r2.w, (neg)r2.x +mul.f r1.z, r1.z, c4.w +absneg.f r3.x, (neg)r2.y +bary.f r3.y, 2, r0.x +mov.f32f32 r3.z, r2.w +add.f r1.z, c4.x, (neg)r1.z +dsy (f32)(x)r3.w, r2.z +mul.f r2.y, c3.x, r2.y +(ss)mul.f r2.z, c3.x, r2.x +mov.f32f32 r2.x, r1.w +mov.f32f32 r4.x, r1.z +cmps.f.lt r1.z, r1.z, c4.y +add.f r4.y, c1.y, r3.z +mov.f32f32 r4.z, r3.x +mul.f r4.w, c3.x, r3.y +mov.f32f32 r5.x, c2.x +dsy (f32)(xy)r5.y, r2.y +(sy)(ss)mul.f r2.y, r5.y, r5.y +sqrt r2.z, r4.x +(ss)mad.f32 r2.z, c5.x, r0.z, r2.z +mad.f32 r2.y, r5.z, r5.z, r2.y +cov.u32f32 r1.z, r1.z +mad.f32 r2.y, r3.w, r3.w, r2.y +mov.f32f32 r3.w, r2.z +mul.f r2.z, r2.z, c4.x +cmps.f.ne r1.z, r1.z, c4.y +(ss)add.f r4.x, c1.z, r2.x +mul.f r5.y, r3.w, c4.y +absneg.f r5.z, (neg)c0.x +mul.f r3.w, r3.w, c4.y +absneg.f r5.w, (neg)c0.z +mad.f32 r0.z, c5.x, r0.z, (neg)r2.z +mad.f32 r2.z, c5.x, r5.z, (neg)r5.y +mov.f32f32 r5.y, c4.y +mad.f32 r3.w, c5.x, r5.w, (neg)r3.w +mov.f32f32 r5.z, c4.y +mov.f32f32 r5.w, c4.y +sel.b32 r2.z, r5.y, r1.z, r2.z +add.f r5.y, c1.x, r4.z +rcp r5.x, r5.x +sel.b32 r3.w, r5.z, r1.z, r3.w +sel.b32 r0.z, r5.w, r1.z, r0.z +absneg.f r1.z, (neg)r2.z +(ss)mul.f r5.y, r5.y, r5.x +mul.f r4.x, r4.x, r5.x +absneg.f r5.z, (neg)r3.w +absneg.f r5.w, (neg)r0.z +mul.f r6.x, r5.y, (neg)r2.z +mov.f32f32 r6.y, r4.x +rcp r6.z, r1.z +add.f r3.x, c5.y, r3.x +mul.f r4.y, r4.y, r5.x +(ss)rcp r1.z, r1.z +add.f r4.z, c6.z, r4.z +mul.f r5.x, r6.y, r0.z +(ss)mul.f r3.x, r3.x, r6.z +mad.f32 r6.x, r4.y, (neg)r0.z, r6.x +(ss)mul.f r1.z, r4.z, r1.z +mad.f32 r4.x, r4.x, (neg)r3.w, r6.x +mov.f32f32 r4.z, r3.x +mov.f32f32 r4.y, r4.y +max.f r3.x, r3.x, r1.z +rcp r6.x, r5.w +add.f r2.w, c5.y, r2.w +mov.f32f32 r6.z, r4.x +mul.f r4.x, r4.x, c6.y mov.f32f32 r1.z, r1.z -mov.f32f32 r2.z, r2.z -mul.f r2.w, r2.x, c4.x -mul.f r3.y, r2.x, c4.y -mul.f r2.x, r2.x, c4.y -sqrt r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.y, r3.y -absneg.f r3.z, (neg)c0.x -mov.f32f32 r2.x, r2.x -mad.f32 r0.z, c5.x, r0.z, (neg)r2.w -mov.f32f32 r2.w, c4.y +(ss)mul.f r2.w, r2.w, r6.x +(ss)rcp r5.w, r5.w +add.f r3.z, c6.y, r3.z +mad.f32 r6.x, c5.z, r6.z, c5.w +max.f r4.x, r4.x, c4.y +min.f r1.z, r4.z, r1.z +(ss)mul.f r3.z, r3.z, r5.w +mov.f32f32 r4.z, r2.w +min.f r4.x, r4.x, c4.x +mad.f32 r5.x, r4.y, r3.w, (neg)r5.x +max.f r2.w, r2.w, r3.z +(ss)rcp r5.w, r6.x mov.f32f32 r3.z, r3.z -absneg.f r3.w, (neg)c0.z -sqrt r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -sel.b32 r0.z, r2.w, r1.x, r0.z -absneg.f r2.y, (neg)r2.y -mad.f32 r2.w, c5.x, r3.z, (neg)r3.y -mov.f32f32 r3.y, c4.y -mov.f32f32 r3.z, r3.w -mov.f32f32 r2.y, r2.y -absneg.f r3.w, (neg)r0.z -sel.b32 r2.w, r3.y, r1.x, r2.w -absneg.f r0.w, (neg)r0.w -add.f r3.y, c1.z, r2.y -mov.f32f32 r4.x, c2.x -absneg.f r4.y, (neg)r2.w -mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c5.x, r3.z, (neg)r2.x -rcp r3.z, r3.w -mov.f32f32 r4.z, c4.y -absneg.f r3.x, (neg)r3.x -add.f r4.w, c1.x, r0.w -rcp r4.x, r4.x -(ss)mul.f r3.y, r3.y, r4.x -rcp r5.x, r4.y -add.f r5.y, c5.y, r0.w -sel.b32 r1.x, r4.z, r1.x, r2.x -mul.f r2.x, r4.w, r4.x -mov.f32f32 r3.y, r3.y -(ss)mul.f r4.z, r5.y, r5.x -absneg.f r4.w, (neg)r1.x -mov.f32f32 r3.x, r3.x -mul.f r5.x, r3.y, r0.z -mov.f32f32 r4.z, r4.z -(ss)rcp r4.y, r4.y -add.f r0.w, c6.z, r0.w -add.f r5.y, c1.y, r3.x +(ss)add.f r6.x, c8.y, (neg)r4.x +mov.f32f32 r6.z, r5.x +min.f r2.w, r3.x, r2.w +rcp r3.x, r5.z +add.f r1.w, c5.y, r1.w +mul.f r6.x, r6.x, c4.x +mul.f r5.x, r5.x, r6.z +min.f r3.z, r4.z, r3.z +(ss)mul.f r1.w, r1.w, r3.x +rcp r3.x, r5.z +add.f r2.x, c6.z, r2.x +mov.f32f32 r4.z, r5.y +max.f r1.z, r1.z, r3.z +mov.f32f32 r3.z, r1.w +(ss)mul.f r2.x, r2.x, r3.x +mul.f r3.x, r4.z, r3.w +mul.f r3.w, r4.y, r2.z +mad.f32 r2.z, r6.y, r2.z, (neg)r3.x +max.f r1.w, r1.w, r2.x mov.f32f32 r2.x, r2.x -rcp r5.z, r4.w -add.f r5.w, c5.y, r3.x -mov.f32f32 r0.w, r0.w -mul.f r4.x, r5.y, r4.x -mul.f r5.y, r2.x, (neg)r2.w -add.f r6.x, c5.y, r2.y -mul.f r3.z, r5.w, r3.z -mov.f32f32 r4.x, r4.x -(ss)mul.f r0.w, r0.w, r4.y -(ss)mul.f r4.y, r6.x, r5.z -mov.f32f32 r3.z, r3.z -mad.f32 r5.x, r4.x, r1.x, (neg)r5.x -mov.f32f32 r0.w, r0.w -mad.f32 r5.y, r4.x, (neg)r0.z, r5.y -mov.f32f32 r4.y, r4.y -mov.f32f32 r5.x, r5.x -max.f r5.z, r4.z, r0.w -mov.f32f32 r5.y, r5.y -min.f r0.w, r4.z, r0.w -mul.f r4.z, r5.x, r5.x -mul.f r5.x, r2.x, r1.x -mov.f32f32 r5.z, r5.z -mad.f32 r5.x, r3.y, r2.w, (neg)r5.x -rcp r3.w, r3.w -add.f r3.x, c6.y, r3.x -mad.f32 r1.x, r3.y, (neg)r1.x, r5.y -rcp r3.y, r4.w -add.f r2.y, c6.z, r2.y -(ss)mov.f32f32 r4.w, r5.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.z, r4.w, r4.w, r4.z -(ss)mul.f r3.x, r3.x, r3.w -mul.f r3.w, r1.x, c6.y -mad.f32 r1.x, c5.z, r1.x, c5.w -mov.f32f32 r4.z, r4.z -mul.f r2.w, r4.x, r2.w -mov.f32f32 r3.x, r3.x -mad.f32 r2.x, r2.x, r0.z, (neg)r2.w -mov.f32f32 r2.w, r3.w -mul.f r2.y, r2.y, r3.y -max.f r3.y, r3.z, r3.x -mov.f32f32 r2.x, r2.x -max.f r2.w, r2.w, c4.y -min.f r3.x, r3.z, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r2.x, r2.x, r2.x, r4.z -min.f r2.w, r2.w, c4.x -max.f r0.w, r0.w, r3.x -min.f r3.x, r5.z, r3.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -add.f r3.y, c8.y, (neg)r2.w -rcp r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -add.f r2.x, r2.x, c5.y -max.f r3.z, r4.y, r2.y -mul.f r3.y, r3.y, c4.x -min.f r2.y, r4.y, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r3.z -mul.f r0.y, r1.w, r0.y -sqrt r1.z, r1.z -max.f r0.w, r0.w, r2.y -mad.f32 r1.x, r2.x, r1.x, c4.x -min.f r1.w, r3.x, r3.z -mov.f32f32 r0.y, r0.y -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.w -mul.f r0.x, c3.x, r0.x -mul.f r1.x, (neg)r1.x, c6.x +mad.f32 r3.x, r4.z, r0.z, (neg)r3.w +nop +min.f r1.w, r2.w, r1.w +mov.f32f32 r2.w, r2.z +min.f r2.x, r3.z, r2.x +mov.f32f32 r3.z, r3.x +mov.f32f32 r3.w, r1.w +mad.f32 r2.z, r2.z, r2.w, r5.x +max.f r1.z, r1.z, r2.x +mad.f32 r2.x, r3.x, r3.z, r2.z mul.f r0.z, r0.z, r1.w -add.f r0.w, r1.w, (neg)r0.w -rcp r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x +sqrt r2.y, r2.y +dsx (f32)(x)r6.y, r0.w +dsx (f32)(xy)r6.z, r1.x +(sy)(ss)mul.f r0.w, r6.z, r6.z +add.f r1.x, r3.w, (neg)r1.z +add.f r1.z, r2.x, c5.y add.f r0.z, r1.y, (neg)r0.z -mov.f32f32 r0.w, r0.w -dsy (f32)(x)r3.z, r0.x -(sy)(ss)mad.f32 r0.x, r3.z, r3.z, r2.z -mov.f32f32 r1.y, c4.y -mov.f32f32 r2.x, c4.y -mov.f32f32 r0.z, r0.z -exp2 r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mad.f32 r0.w, c7.y, r0.w, c4.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r1.y -add.f r1.x, c4.x, r1.x +mad.f32 r0.w, r6.w, r6.w, r0.w +mad.f32 r1.x, c7.y, r1.x, c4.x +mad.f32 r1.y, r1.z, r5.w, c4.x add.f r0.z, r0.z, c7.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -sqrt r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r2.x -(rpt1)nop +mad.f32 r0.w, r6.y, r6.y, r0.w +dsy (f32)(x)r4.y, r4.w +bary.f r2.z, 0, r0.x +dsx (f32)(x)r3.x, r3.y +mul.f r1.y, (neg)r1.y, c6.x mul.f r0.z, c6.w, r0.z rcp r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -rcp r0.w, r0.w -(rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -(ss)mov.f32f32 r0.w, r0.w -mul.f r0.x, r1.z, r0.x -mov.f32f32 r1.z, r1.y -mov.f32f32 r1.x, r1.x -mul.f r0.z, r0.z, r0.w -mov.f32f32 r0.x, r0.x +bary.f (ei)r2.w, 1, r0.x +mul.f r0.x, c3.x, r2.z +mov.f32f32 r1.w, c4.y +(ss)mul.f r0.z, r0.z, r1.x +sqrt r0.y, r0.w +(ss)mul.f r0.w, r0.y, r2.y +mul.f r0.y, c3.x, r2.w +mov.f32f32 r1.z, c4.y nop -max.f r0.w, r1.x, c4.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r0.x, r0.y -nop -min.f r0.y, r0.w, c4.x -(rpt2)nop -mul.f r0.y, r2.w, r0.y +exp2 r1.x, r1.y +(ss)add.f r1.x, c4.x, r1.x +dsx (f32)(xy)r2.x, r2.z +(sy)(ss)mul.f r1.y, r2.x, r2.x exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x +(ss)add.f r0.z, c4.x, r0.z +rcp r0.w, r0.w +mad.f32 r1.y, r2.y, r2.y, r1.y +dsy (f32)(xy)r2.x, r0.x nop -add.f r0.y, r0.y, r3.y -add.f r0.z, c4.x, r0.z -mul.f r0.x, r0.x, c4.z +(sy)(ss)mul.f r0.x, r2.x, r2.x +mad.f32 r0.y, r3.x, r3.x, r1.y +mad.f32 r0.x, r2.y, r2.y, r0.x +rcp r1.x, r1.x +(ss)max.f r1.x, r1.x, c4.y +rcp r0.z, r0.z +mad.f32 r0.x, r4.y, r4.y, r0.x +(rpt5)nop +sqrt r0.x, r0.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z +sqrt r0.y, r0.y +(ss)mul.f r0.x, r0.y, r0.x +min.f r0.y, r1.x, c4.x +(rpt1)nop +mul.f r0.x, r0.x, r0.w +mul.f r0.y, r4.x, r0.y (rpt1)nop -mov.f32f32 r1.y, r0.y -(rpt2)nop -rcp r0.y, r0.z -(ss)mov.f32f32 r0.y, r0.y -(rpt2)nop -mul.f r0.x, r0.x, r0.y -(rpt2)nop -mov.f32f32 r1.x, r0.x +mul.f r0.x, r0.x, c4.z +add.f r1.y, r0.y, r6.x +(rpt1)nop +mul.f r1.x, r0.x, r0.z end nop nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 265 instructions, 0 half, 7 full +; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) +; FRAG: 189 instructions, 0 half, 7 full diff --git a/reference/es2gears-vert.asm b/reference/es2gears-vert.asm index bc04597..b3981b3 100644 --- a/reference/es2gears-vert.asm +++ b/reference/es2gears-vert.asm @@ -14,6 +14,8 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.w, c4.x, r0.x mov.f32f32 r1.w, c8.x mad.f32 r0.w, c5.x, r0.y, r0.w @@ -25,88 +27,60 @@ mad.f32 r2.x, c5.y, r0.y, r2.x add.f r0.w, r0.w, c7.x mad.f32 r2.x, c6.y, r0.z, r2.x mul.f r0.x, c4.z, r0.x -mul.f r2.z, c0.w, r1.x -mov.f32f32 r2.w, r0.w -mad.f32 r0.w, c8.y, r2.y, r1.w -add.f r1.w, r2.x, c7.y +mad.f32 r1.w, c8.y, r2.y, r1.w +mov.f32f32 r2.y, r0.w +mov.f32f32 r2.z, c8.z +add.f r2.x, r2.x, c7.y mad.f32 r0.x, c5.z, r0.y, r0.x -mul.f r0.y, r2.w, r2.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, c8.z +mul.f r0.y, r0.w, r2.y +mad.f32 r0.w, c8.z, r2.z, r1.w +mov.f32f32 r1.w, r2.x mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c1.w, r1.y, r2.z -mad.f32 r0.y, r1.w, r1.w, r0.y -mad.f32 r0.w, c8.z, r2.x, r0.w +mul.f r0.z, c0.w, r1.x +mul.f r2.z, c0.z, r1.x +mad.f32 r0.y, r2.x, r1.w, r0.y add.f r0.x, r0.x, c7.z -mad.f32 r0.z, c2.w, r1.z, r0.z -mov.f32f32 r0.y, r0.y -mul.f r2.x, c0.z, r1.x -mov.f32f32 r0.x, r0.x -add.f r0.z, r0.z, c3.w -mad.f32 r2.x, c1.z, r1.y, r2.x -mul.f r2.y, c0.y, r1.x -mad.f32 r0.y, r0.x, r0.x, r0.y rsq r0.w, r0.w -(ss)mov.f32f32 r2.z, r0.w -(ss)mov.f32f32 r0.w, r0.z -mad.f32 r0.z, c2.z, r1.z, r2.x -mad.f32 r2.x, c1.y, r1.y, r2.y +(ss)mov.f32f32 r2.x, r0.w +mul.f r2.w, c8.z, r0.w +mad.f32 r0.z, c1.w, r1.y, r0.z +(ss)mov.f32f32 r0.w, r0.x +mul.f r3.x, c8.x, r2.x +mul.f r2.x, c8.y, r2.x +mad.f32 r0.z, c2.w, r1.z, r0.z +mad.f32 r0.y, r0.w, r0.w, r0.y +mad.f32 r0.w, c1.z, r1.y, r2.z +mul.f r2.z, c0.y, r1.x mul.f r1.x, c0.x, r1.x -mul.f r2.y, c8.x, r2.z +mad.f32 r3.y, c2.z, r1.z, r0.w +mad.f32 r2.z, c1.y, r1.y, r2.z +mad.f32 r1.x, c1.x, r1.y, r1.x rsq r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mul.f r3.x, c8.y, r2.z -mul.f r2.z, c8.z, r2.z -nop -mul.f r2.w, r2.w, r0.y -mul.f r1.w, r1.w, r0.y +(ss)mov.f32f32 r1.y, r0.y mul.f r0.x, r0.x, r0.y -nop -mov.f32f32 r0.y, r2.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r3.x -mul.f r0.y, r0.y, r2.y -mov.f32f32 r2.y, r2.z -add.f r0.z, r0.z, c3.z -mad.f32 r0.y, r1.w, r2.w, r0.y -mad.f32 r1.w, c2.y, r1.z, r2.x -mad.f32 r1.x, c1.x, r1.y, r1.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.x, r0.x, r2.y, r0.y -add.f r0.y, r1.w, c3.y +add.f r0.w, r0.z, c3.w +add.f r0.z, r3.y, c3.z +(ss)mul.f r0.y, r2.y, r1.y +mul.f r1.y, r1.w, r1.y +mad.f32 r1.w, c2.y, r1.z, r2.z mad.f32 r1.x, c2.x, r1.z, r1.x +mul.f r0.y, r0.y, r3.x nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r1.x, r1.x, c3.x -nop -max.f r0.x, r0.x, c10.x +mad.f32 r1.y, r1.y, r2.x, r0.y +add.f r0.y, r1.w, c3.y +mad.f32 r1.y, r0.x, r2.w, r1.y +add.f r0.x, r1.x, c3.x +(rpt1)nop +max.f r1.x, r1.y, c10.x (rpt2)nop -mov.f32f32 r1.y, r0.x -mov.f32f32 r0.x, r1.x +mov.f32f32 r1.y, r1.x +mul.f r1.x, r1.x, c9.x (rpt1)nop -mul.f r1.x, r1.y, c9.w +mul.f r1.w, r1.y, c9.w mul.f r1.z, r1.y, c9.z -mul.f r1.w, r1.y, c9.y -mul.f r1.y, r1.y, c9.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r1.w -mov.f32f32 r2.y, r1.y -mov.f32f32 r1.w, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r2.x -mov.f32f32 r1.x, r2.y +mul.f r1.y, r1.y, c9.y end -nop -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:19) ; VERT: inputs: r1.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) -; VERT: 92 instructions, 0 half, 4 full +; VERT: 68 instructions, 0 half, 4 full diff --git a/reference/face.asm b/reference/face.asm index 0e81ac5..d7e2e1c 100644 --- a/reference/face.asm +++ b/reference/face.asm @@ -5,27 +5,25 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 +@const(c1.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.s r0.x, hr0.x, 2 (rpt2)nop add.s r0.x, r0.x, 1 (rpt2)nop cov.s32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, r0.x, c0.y (rpt2)nop min.f r0.x, r0.x, c1.x (rpt2)nop -mul.f r0.x, c0.x, r0.x +mul.f r1.x, c0.x, r0.x (rpt2)nop -mov.f32f32 r1.w, r0.x -mov.f32f32 r1.z, r0.x -mov.f32f32 r1.y, r0.x -mov.f32f32 r1.x, r0.x +mov.f32f32 r1.w, r1.x +mov.f32f32 r1.z, r1.x +mov.f32f32 r1.y, r1.x end -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r63.y (7:0,cm=f,il=8,b=0) -; FRAG: 33 instructions, 1 half, 2 full +; FRAG: 28 instructions, 1 half, 2 full diff --git a/reference/ffox-otmc/ffox-otmc-03.asm b/reference/ffox-otmc/ffox-otmc-03.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-03.asm +++ b/reference/ffox-otmc/ffox-otmc-03.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-04.asm b/reference/ffox-otmc/ffox-otmc-04.asm index 4afc961..8eae3b4 100644 --- a/reference/ffox-otmc/ffox-otmc-04.asm +++ b/reference/ffox-otmc/ffox-otmc-04.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.w, 1, r0.x mov.f32f32 r1.w, c0.x diff --git a/reference/ffox-otmc/ffox-otmc-05.asm b/reference/ffox-otmc/ffox-otmc-05.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-05.asm +++ b/reference/ffox-otmc/ffox-otmc-05.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-06.asm b/reference/ffox-otmc/ffox-otmc-06.asm index c5ef2b9..34cd110 100644 --- a/reference/ffox-otmc/ffox-otmc-06.asm +++ b/reference/ffox-otmc/ffox-otmc-06.asm @@ -7,6 +7,7 @@ @out(r0.y) out1 @out(r0.z) out2 @out(r0.w) out3 +@const(c13.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.z, r0.w (rpt2)nop cov.f32s16 hr0.x, r0.z diff --git a/reference/ffox-otmc/ffox-otmc-08.asm b/reference/ffox-otmc/ffox-otmc-08.asm index c5ef2b9..34cd110 100644 --- a/reference/ffox-otmc/ffox-otmc-08.asm +++ b/reference/ffox-otmc/ffox-otmc-08.asm @@ -7,6 +7,7 @@ @out(r0.y) out1 @out(r0.z) out2 @out(r0.w) out3 +@const(c13.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.z, r0.w (rpt2)nop cov.f32s16 hr0.x, r0.z diff --git a/reference/ffox-otmc/ffox-otmc-13.asm b/reference/ffox-otmc/ffox-otmc-13.asm index a023928..36f82a6 100644 --- a/reference/ffox-otmc/ffox-otmc-13.asm +++ b/reference/ffox-otmc/ffox-otmc-13.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.w, 1, r0.x (rpt5)nop diff --git a/reference/ffox-otmc/ffox-otmc-14.asm b/reference/ffox-otmc/ffox-otmc-14.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-14.asm +++ b/reference/ffox-otmc/ffox-otmc-14.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-15.asm b/reference/ffox-otmc/ffox-otmc-15.asm index c99bf54..13ea129 100644 --- a/reference/ffox-otmc/ffox-otmc-15.asm +++ b/reference/ffox-otmc/ffox-otmc-15.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.w, 1, r0.x (rpt5)nop diff --git a/reference/ffox-otmc/ffox-otmc-16.asm b/reference/ffox-otmc/ffox-otmc-16.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-16.asm +++ b/reference/ffox-otmc/ffox-otmc-16.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-17.asm b/reference/ffox-otmc/ffox-otmc-17.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-17.asm +++ b/reference/ffox-otmc/ffox-otmc-17.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-18.asm b/reference/ffox-otmc/ffox-otmc-18.asm index 7e1ef51..78e569c 100644 --- a/reference/ffox-otmc/ffox-otmc-18.asm +++ b/reference/ffox-otmc/ffox-otmc-18.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.w, 1, r0.x (rpt5)nop diff --git a/reference/ffox-otmc/ffox-otmc-19.asm b/reference/ffox-otmc/ffox-otmc-19.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-19.asm +++ b/reference/ffox-otmc/ffox-otmc-19.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-24.asm b/reference/ffox-otmc/ffox-otmc-24.asm index 33986ec..938e6a5 100644 --- a/reference/ffox-otmc/ffox-otmc-24.asm +++ b/reference/ffox-otmc/ffox-otmc-24.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.w, 1, r0.x (rpt5)nop diff --git a/reference/ffox-otmc/ffox-otmc-26.asm b/reference/ffox-otmc/ffox-otmc-26.asm index a28588b..f65517a 100644 --- a/reference/ffox-otmc/ffox-otmc-26.asm +++ b/reference/ffox-otmc/ffox-otmc-26.asm @@ -34,6 +34,8 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 +@const(c19.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000 +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r2.x, c15.x mul.f r2.y, c4.x, r3.w mul.f r2.z, c4.x, r4.w diff --git a/reference/ffox-otmc/ffox-otmc-27.asm b/reference/ffox-otmc/ffox-otmc-27.asm index 4307641..39b56be 100644 --- a/reference/ffox-otmc/ffox-otmc-27.asm +++ b/reference/ffox-otmc/ffox-otmc-27.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 6, r0.x mov.f32f32 r0.w, c0.x bary.f r1.x, 4, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-28.asm b/reference/ffox-otmc/ffox-otmc-28.asm index a28588b..f65517a 100644 --- a/reference/ffox-otmc/ffox-otmc-28.asm +++ b/reference/ffox-otmc/ffox-otmc-28.asm @@ -34,6 +34,8 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 +@const(c19.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000 +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r2.x, c15.x mul.f r2.y, c4.x, r3.w mul.f r2.z, c4.x, r4.w diff --git a/reference/ffox-otmc/ffox-otmc-29.asm b/reference/ffox-otmc/ffox-otmc-29.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-29.asm +++ b/reference/ffox-otmc/ffox-otmc-29.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-30.asm b/reference/ffox-otmc/ffox-otmc-30.asm index ca29128..8f87710 100644 --- a/reference/ffox-otmc/ffox-otmc-30.asm +++ b/reference/ffox-otmc/ffox-otmc-30.asm @@ -1,34 +1,27 @@ ; options: -; FRAG: new compiler +; FRAG: TGSI compiler @in(r0.x) in0 @in(r0.y) in1 -@out(r0.w) out0 -@out(r1.x) out1 -@out(r1.y) out2 -@out(r1.z) out3 +@out(r1.x) out0 +@out(r1.y) out1 +@out(r1.z) out2 +@out(r1.w) out3 +@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, c0.x +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r1.w, c0.x +(rpt4)nop +sam (f32)(xyz)r0.x, r0.z, s#0, t#0 +(sy)mov.f32f32 r1.z, r0.x +mov.f32f32 r1.y, r0.y +mov.f32f32 r1.x, r0.z +end nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r0.y nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x -(rpt5)nop -sam (f32)(xyz)r1.w, r0.y, s#0, t#0 -(sy)mov.f32f32 r0.x, r1.w -(ss)mov.f32f32 r0.y, r2.x -mov.f32f32 r0.z, r2.y nop -mov.f32f32 r1.y, r0.x -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.w, r0.z -end -; FRAG: outputs: r0.w (1:0) +; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1) -; FRAG: 25 instructions, 0 half, 3 full +; FRAG: 13 instructions, 0 half, 2 full ; pos (bary): r0.x -; color: r0.w +; color: r1.x diff --git a/reference/ffox-otmc/ffox-otmc-31.asm b/reference/ffox-otmc/ffox-otmc-31.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-31.asm +++ b/reference/ffox-otmc/ffox-otmc-31.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-32.asm b/reference/ffox-otmc/ffox-otmc-32.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-32.asm +++ b/reference/ffox-otmc/ffox-otmc-32.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-33.asm b/reference/ffox-otmc/ffox-otmc-33.asm index 1664e6b..abba978 100644 --- a/reference/ffox-otmc/ffox-otmc-33.asm +++ b/reference/ffox-otmc/ffox-otmc-33.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c3.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.w, 1, r0.x mov.f32f32 r0.x, c2.x diff --git a/reference/ffox-otmc/ffox-otmc-34.asm b/reference/ffox-otmc/ffox-otmc-34.asm index 72ac097..be2ee4c 100644 --- a/reference/ffox-otmc/ffox-otmc-34.asm +++ b/reference/ffox-otmc/ffox-otmc-34.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-36.asm b/reference/ffox-otmc/ffox-otmc-36.asm index 50f2536..8619b6b 100644 --- a/reference/ffox-otmc/ffox-otmc-36.asm +++ b/reference/ffox-otmc/ffox-otmc-36.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c7.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 6, r0.x bary.f r0.w, 3, r0.x bary.f r1.x, 9, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-39.asm b/reference/ffox-otmc/ffox-otmc-39.asm index e07af58..8450922 100644 --- a/reference/ffox-otmc/ffox-otmc-39.asm +++ b/reference/ffox-otmc/ffox-otmc-39.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c6.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c7.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x bary.f r0.w, 5, r0.x bary.f r2.x, 8, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-41.asm b/reference/ffox-otmc/ffox-otmc-41.asm index 1cd75ff..5c6db50 100644 --- a/reference/ffox-otmc/ffox-otmc-41.asm +++ b/reference/ffox-otmc/ffox-otmc-41.asm @@ -25,6 +25,8 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x3f800000, 0x00000000, 0x40000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c5.x mov.f32f32 r0.y, c9.x mov.f32f32 r0.z, c1.w diff --git a/reference/ffox-otmc/ffox-otmc-42.asm b/reference/ffox-otmc/ffox-otmc-42.asm index 50f2536..8619b6b 100644 --- a/reference/ffox-otmc/ffox-otmc-42.asm +++ b/reference/ffox-otmc/ffox-otmc-42.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c7.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 6, r0.x bary.f r0.w, 3, r0.x bary.f r1.x, 9, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-43.asm b/reference/ffox-otmc/ffox-otmc-43.asm index 1cd75ff..5c6db50 100644 --- a/reference/ffox-otmc/ffox-otmc-43.asm +++ b/reference/ffox-otmc/ffox-otmc-43.asm @@ -25,6 +25,8 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x3f800000, 0x00000000, 0x40000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c5.x mov.f32f32 r0.y, c9.x mov.f32f32 r0.z, c1.w diff --git a/reference/ffox-otmc/ffox-otmc-44.asm b/reference/ffox-otmc/ffox-otmc-44.asm index 1cd75ff..5c6db50 100644 --- a/reference/ffox-otmc/ffox-otmc-44.asm +++ b/reference/ffox-otmc/ffox-otmc-44.asm @@ -25,6 +25,8 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x3f800000, 0x00000000, 0x40000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c5.x mov.f32f32 r0.y, c9.x mov.f32f32 r0.z, c1.w diff --git a/reference/ffox-otmc/ffox-otmc-45.asm b/reference/ffox-otmc/ffox-otmc-45.asm index 50f2536..8619b6b 100644 --- a/reference/ffox-otmc/ffox-otmc-45.asm +++ b/reference/ffox-otmc/ffox-otmc-45.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c7.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 6, r0.x bary.f r0.w, 3, r0.x bary.f r1.x, 9, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-46.asm b/reference/ffox-otmc/ffox-otmc-46.asm index 1cd75ff..5c6db50 100644 --- a/reference/ffox-otmc/ffox-otmc-46.asm +++ b/reference/ffox-otmc/ffox-otmc-46.asm @@ -25,6 +25,8 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x3f800000, 0x00000000, 0x40000000, 0x00000000 +@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c5.x mov.f32f32 r0.y, c9.x mov.f32f32 r0.z, c1.w diff --git a/reference/ffox-otmc/ffox-otmc-48.asm b/reference/ffox-otmc/ffox-otmc-48.asm index 8cc8ce8..d73ae0a 100644 --- a/reference/ffox-otmc/ffox-otmc-48.asm +++ b/reference/ffox-otmc/ffox-otmc-48.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c9.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 6, r0.x bary.f r0.w, 3, r0.x bary.f r1.x, 9, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-50.asm b/reference/ffox-otmc/ffox-otmc-50.asm index b25f88e..fb6c094 100644 --- a/reference/ffox-otmc/ffox-otmc-50.asm +++ b/reference/ffox-otmc/ffox-otmc-50.asm @@ -29,6 +29,9 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c14.x) 0x3f800000, 0x00000000, 0xc0800000, 0x3d8f5c29 +@const(c15.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 +@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r1.x, c12.x mov.f32f32 r1.y, c13.x mul.f r1.z, c0.z, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-51.asm b/reference/ffox-otmc/ffox-otmc-51.asm index 0b2b2d2..bc785ef 100644 --- a/reference/ffox-otmc/ffox-otmc-51.asm +++ b/reference/ffox-otmc/ffox-otmc-51.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c10.x) 0x3e99999a, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 6, r0.x bary.f r0.w, 3, r0.x bary.f r1.x, 9, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-52.asm b/reference/ffox-otmc/ffox-otmc-52.asm index b25f88e..fb6c094 100644 --- a/reference/ffox-otmc/ffox-otmc-52.asm +++ b/reference/ffox-otmc/ffox-otmc-52.asm @@ -29,6 +29,9 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c14.x) 0x3f800000, 0x00000000, 0xc0800000, 0x3d8f5c29 +@const(c15.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 +@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r1.x, c12.x mov.f32f32 r1.y, c13.x mul.f r1.z, c0.z, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-54.asm b/reference/ffox-otmc/ffox-otmc-54.asm index a8a0af6..85e3640 100644 --- a/reference/ffox-otmc/ffox-otmc-54.asm +++ b/reference/ffox-otmc/ffox-otmc-54.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x40000000, 0x3f8ccccd, 0x3f800000, 0x3e99999a +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x bary.f r0.w, 5, r0.x bary.f r1.x, 0, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-57.asm b/reference/ffox-otmc/ffox-otmc-57.asm index 937bced..6d3a071 100644 --- a/reference/ffox-otmc/ffox-otmc-57.asm +++ b/reference/ffox-otmc/ffox-otmc-57.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f r0.w, 1, r0.x bary.f (ei)r1.x, 2, r0.x diff --git a/reference/ffox-otmc/ffox-otmc-59.asm b/reference/ffox-otmc/ffox-otmc-59.asm index 46a2696..ccc345b 100644 --- a/reference/ffox-otmc/ffox-otmc-59.asm +++ b/reference/ffox-otmc/ffox-otmc-59.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.w, 1, r0.x (rpt5)nop diff --git a/reference/ffox-vert.asm b/reference/ffox-vert.asm index 0a2d6cc..631a17f 100644 --- a/reference/ffox-vert.asm +++ b/reference/ffox-vert.asm @@ -11,6 +11,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)trunc.f r0.x, r1.w (rpt2)nop cov.f32s16 hr0.x, r0.x @@ -27,66 +28,57 @@ mov.f32f32 r0.z, c (ul)mov.f32f32 r0.w, c mad.f32 r0.x, r1.x, r0.x, r0.y (rpt2)nop +mul.f r0.y, c8.w, r0.x +mad.f32 r0.z, r1.y, r0.z, r0.w mov.f32f32 r0.x, r0.x -mad.f32 r0.y, r1.y, r0.z, r0.w (rpt1)nop -mul.f r0.z, c8.w, r0.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, c9.w, r0.z, r0.y mul.f r0.w, c8.x, r0.x mul.f r1.z, c8.y, r0.x mul.f r0.x, c8.z, r0.x -mad.f32 r0.z, c9.w, r0.y, r0.z -mad.f32 r0.w, c9.x, r0.y, r0.w -mad.f32 r1.z, c9.y, r0.y, r1.z -mad.f32 r0.x, c9.z, r0.y, r0.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r1.z -mov.f32f32 r0.x, r0.x add.f r0.y, r0.y, c11.w -add.f r0.z, r0.z, c11.x -add.f r0.w, r0.w, c11.y +mov.f32f32 r0.z, r0.z +(rpt4)nop +rcp r1.w, r0.y +mad.f32 r0.w, c9.x, r0.z, r0.w +mad.f32 r1.z, c9.y, r0.z, r1.z +mad.f32 r0.x, c9.z, r0.z, r0.x +(ss)add.f r0.y, r0.y, (neg)c12.w +add.f r0.z, r0.w, c11.x +add.f r0.w, r1.z, c11.y add.f r0.x, r0.x, c11.z -(rpt2)nop -rcp r1.z, r0.y -(ss)mul.f r0.z, r0.z, r1.z -mul.f r0.w, r0.w, r1.z -mul.f r0.x, r0.x, r1.z -(ss)mov.f32f32 r0.y, r0.y +nop +(ss)mul.f r0.z, r0.z, r1.w +mul.f r0.w, r0.w, r1.w +mul.f r0.x, r0.x, r1.w +nop add.f r0.z, r0.z, (neg)c12.x add.f r0.w, r0.w, (neg)c12.y add.f r0.x, r0.x, (neg)c12.z -add.f r0.y, r0.y, (neg)c12.w -(rpt2)nop +nop mul.f r0.z, r0.z, r0.y -mul.f r0.w, r0.w, r0.y +mul.f r1.z, r0.w, r0.y mul.f r0.x, r0.x, r0.y nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.z, r0.w -mov.f32f32 r0.x, r0.x -nop -mul.f r0.w, c0.w, r0.z -mul.f r1.w, c0.z, r0.z -mad.f32 r0.w, c1.w, r1.z, r0.w -mad.f32 r1.w, c1.z, r1.z, r1.w -mad.f32 r0.w, c2.w, r0.x, r0.w -mad.f32 r1.w, c2.z, r0.x, r1.w -mad.f32 r0.w, c3.w, r0.y, r0.w -mad.f32 r1.w, c3.z, r0.y, r1.w -mul.f r2.x, c0.y, r0.z -mul.f r2.y, c0.x, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r1.w -mad.f32 r1.w, c1.y, r1.z, r2.x -mad.f32 r1.z, c1.x, r1.z, r2.y -mad.f32 r1.w, c2.y, r0.x, r1.w +mov.f32f32 r1.w, r0.z +mul.f r2.x, c0.x, r0.z +mov.f32f32 r2.y, r1.z +mov.f32f32 r2.z, r0.x +mul.f r0.z, c0.w, r1.w +mul.f r0.w, c0.z, r1.w +mad.f32 r0.z, c1.w, r2.y, r0.z +mad.f32 r0.w, c1.z, r2.y, r0.w +mad.f32 r0.z, c2.w, r2.z, r0.z +mad.f32 r2.w, c2.z, r2.z, r0.w +mad.f32 r0.w, c3.w, r0.y, r0.z +mad.f32 r0.z, c3.z, r0.y, r2.w +mul.f r1.w, c0.y, r1.w +mad.f32 r1.z, c1.x, r1.z, r2.x +mad.f32 r1.w, c1.y, r2.y, r1.w mad.f32 r0.x, c2.x, r0.x, r1.z -mad.f32 r1.z, c3.y, r0.y, r1.w +mad.f32 r1.z, c2.y, r2.z, r1.w mad.f32 r0.x, c3.x, r0.y, r0.x -(rpt1)nop -mov.f32f32 r0.y, r1.z -mov.f32f32 r0.x, r0.x +mad.f32 r0.y, c3.y, r0.y, r1.z mova a0.x, hr0.y (rpt5)nop mov.f32f32 r1.z, c @@ -95,39 +87,24 @@ mov.f32f32 r2.x, c (ul)mov.f32f32 r2.y, c mad.f32 r1.x, r1.x, r1.z, r1.w (rpt2)nop -mov.f32f32 r1.x, r1.x +mov.f32f32 r1.z, r1.x +mul.f r1.x, c13.x, r1.x mad.f32 r1.y, r1.y, r2.x, r2.y -(rpt1)nop -mul.f r1.z, c13.y, r1.x -mov.f32f32 r1.y, r1.y -mul.f r1.w, c13.x, r1.x -mul.f r2.x, c13.w, r1.x -mul.f r1.x, c13.z, r1.x -mad.f32 r1.z, c14.y, r1.y, r1.z -mad.f32 r1.w, c14.x, r1.y, r1.w -mad.f32 r2.x, c14.w, r1.y, r2.x -mad.f32 r1.x, c14.z, r1.y, r1.x -mov.f32f32 r1.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r1.x, r1.x +nop +mul.f r1.w, c13.y, r1.z +mul.f r2.x, c13.w, r1.z +mov.f32f32 r2.y, r1.y +mul.f r1.z, c13.z, r1.z +mad.f32 r1.x, c14.x, r1.y, r1.x +nop +mad.f32 r1.y, c14.y, r2.y, r1.w +mad.f32 r1.w, c14.w, r2.y, r2.x +mad.f32 r1.z, c14.z, r2.y, r1.z +add.f r1.x, r1.x, c16.x add.f r1.y, r1.y, c16.y -add.f r2.x, r1.z, c16.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r1.y, r2.x -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.x, r1.y -(rpt1)nop -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r2.x end +nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) ; VERT: inputs: r1.x (0:0,cm=b,il=8,b=0) -; VERT: 144 instructions, 1 half, 3 full +; VERT: 114 instructions, 1 half, 3 full diff --git a/reference/flow.asm b/reference/flow.asm index e9b7b37..c61e896 100644 --- a/reference/flow.asm +++ b/reference/flow.asm @@ -6,227 +6,154 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c5.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c6.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c7.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x mov.f32f32 r0.w, c3.x bary.f r1.x, 1, r0.x mov.f32f32 r1.y, c6.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x +add.f r1.z, r0.z, c4.x +add.f r2.x, r0.z, c3.x +add.f r1.w, r1.x, c4.y +add.f r2.y, r1.x, c3.y add.f r0.z, r0.z, r0.w add.f r0.w, r1.x, r1.y -mov.f32f32 r1.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r1.y -add.f r1.y, r1.x, c3.y -mov.f32f32 r2.y, r1.z -add.f r1.x, r1.x, c4.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 7, r0.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r3.x, r0.w -mov.f32f32 r0.z, r1.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r0.w, r1.z -bary.f r1.x, 8, r0.x -bary.f r1.y, 9, r0.x -mov.f32f32 r2.z, r0.z -sam (f32)(xyzw)r2.w, r2.w, s#2, t#2 -mov.f32f32 r0.z, c6.x -mov.f32f32 r1.z, c6.x -sam (f32)(xyz)r3.w, r1.w, s#0, t#0 -(sy)(ss)mad.f32 r1.w, c5.x, r4.x, c5.y -mad.f32 r2.x, c5.x, r3.w, c5.y -mad.f32 r3.w, c5.x, r4.y, c5.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r1.w -sam (f32)(xyz)r4.x, r2.y, s#1, t#1 -(sy)(ss)mad.f32 r2.y, c5.x, r4.y, c5.y -mov.f32f32 r2.x, r2.x -mad.f32 r2.z, c5.x, r4.x, c5.y +mov.f32f32 r1.x, c6.x +mov.f32f32 r1.y, c6.x +bary.f r2.z, 7, r0.x +sam (f32)(xyz)r2.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c5.x, r3.x, c5.y +sam (f32)(xyz)r3.z, r2.x, s#0, t#0 +(sy)mad.f32 r1.w, c5.x, r3.w, c5.y +(ss)mad.f32 r2.x, c5.x, r2.w, c5.y +mad.f32 r2.y, c5.x, r3.z, c5.y +mul.f r1.z, c5.z, r1.z mul.f r1.w, c5.z, r1.w -mov.f32f32 r2.y, r2.y -mul.f r4.x, r2.x, r2.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, c5.z, r2.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r2.z, r2.z -mad.f32 r4.x, r1.w, r1.w, r4.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.y, c5.x, r4.z, c5.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.x, r3.w, r3.w, r4.x -mul.f r4.z, r2.z, r2.z -mov.f32f32 r4.y, r4.y -(rpt3)nop -rsq r4.x, r4.x -(ss)mov.f32f32 r4.x, r4.x -mad.f32 r4.z, r2.y, r2.y, r4.z -mov.f32f32 r4.y, r4.y -bary.f r4.w, 4, r0.x -mul.f r2.x, r2.x, r4.x -mul.f r1.w, r1.w, r4.x -mul.f r3.w, r3.w, r4.x -mov.f32f32 r4.x, r4.z -mov.f32f32 r2.x, r2.x -mad.f32 r4.x, r4.y, r4.y, r4.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.w, r3.w -bary.f r4.z, 5, r0.x +mov.f32f32 r2.w, r2.x +mov.f32f32 r3.x, r2.y +mov.f32f32 r3.z, r1.z +mov.f32f32 r3.w, r1.w +mov.f32f32 r2.w, r2.w +mul.f r2.y, r2.y, r3.x +mov.f32f32 r3.z, r3.z +mad.f32 r1.w, r1.w, r3.w, r2.y +mul.f r2.x, r2.x, r2.w +mad.f32 r2.y, c5.x, r4.x, c5.y +mad.f32 r1.z, r1.z, r3.z, r2.x +mad.f32 r2.x, c5.x, r3.y, c5.y +sam (f32)(xyzw)r4.x, r0.z, s#2, t#2 +(ss)bary.f r0.z, 8, r0.x +bary.f r0.w, 9, r0.x +bary.f r3.y, 4, r0.x +mov.f32f32 r5.x, r2.x +mov.f32f32 r5.y, r2.y +bary.f r5.z, 5, r0.x bary.f (ei)r0.x, 6, r0.x +mov.f32f32 r0.y, r5.x +mad.f32 r1.w, r5.y, r5.y, r1.w (rpt1)nop -rsq r0.y, r4.x -(ss)mov.f32f32 r0.y, r0.y -(rpt2)nop -mad.f32 r2.x, r2.z, r0.y, r2.x -mad.f32 r1.w, r2.y, r0.y, r1.w -mad.f32 r0.y, r4.y, r0.y, r3.w -nop -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r0.y, r0.y, r1.z +(rpt5)nop +rsq r0.y, r0.y +(ss)mov.f32f32 r1.z, r0.y +rsq r1.w, r1.w +(ss)mov.f32f32 r5.x, r1.w +(ss)mul.f r1.w, r2.y, r1.w +(rpt1)nop +mul.f r2.y, r3.x, r5.x +mul.f r3.x, r3.w, r5.x +mad.f32 r2.y, r2.w, r1.z, r2.y +mad.f32 r1.z, r3.z, r1.z, r3.x +mad.f32 r0.y, r2.x, r0.y, r1.w nop -mul.f r2.x, r2.x, c5.w -mul.f r1.w, r1.w, c5.w +mul.f r1.w, r2.y, c5.w +mul.f r1.z, r1.z, c5.w mul.f r0.y, r0.y, c5.w nop -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -nop -mul.f r2.y, r2.x, r4.w -mul.f r2.z, r4.w, r2.x -mad.f32 r2.y, r1.w, r4.z, r2.y -mad.f32 r2.z, r4.z, r1.w, r2.z -(rpt1)nop -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mad.f32 r2.y, r0.y, r0.x, r2.y -mad.f32 r2.z, r0.x, r0.y, r2.z -(rpt1)nop -mul.f r2.x, r2.y, r2.x -max.f r2.z, r2.z, c6.x -mul.f r1.w, r2.y, r1.w -mul.f r0.y, r2.y, r0.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mul.f r2.x, c5.x, r2.x -mad.f32 r2.y, c6.y, r2.y, c6.z -mul.f r1.w, c5.x, r1.w -mul.f r0.y, c5.x, r0.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -add.f r2.x, r4.w, (neg)r2.x -mul.f r2.z, r3.z, r2.y -mul.f r3.y, r3.y, r2.y -mul.f r3.x, r3.x, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r2.z -mov.f32f32 r3.w, r3.y -(ss)mov.f32f32 r4.x, r3.x -mul.f r4.y, r2.x, r2.x -add.f r1.w, r4.z, (neg)r1.w -add.f r1.z, r3.z, r1.z -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.w, r2.y -mov.f32f32 r1.w, r1.w -add.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r0.y -mad.f32 r2.y, r1.w, r1.w, r4.y +mov.f32f32 r2.x, r1.w +mul.f r1.w, r3.y, r1.w +mov.f32f32 r2.y, r1.z +mov.f32f32 r2.w, r0.y +mul.f r3.x, r2.x, r3.y +mad.f32 r1.z, r5.z, r1.z, r1.w +mad.f32 r1.w, r2.y, r5.z, r3.x +mad.f32 r0.y, r0.x, r0.y, r1.z +mad.f32 r1.z, r2.w, r0.x, r1.w (rpt2)nop -mov.f32f32 r2.y, r2.y -nop -mad.f32 r2.y, r0.x, r0.x, r2.y -(rpt5)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y +mul.f r1.w, r1.z, r2.x +max.f r0.y, r0.y, c6.x +mul.f r2.x, r1.z, r2.y +mul.f r1.z, r1.z, r2.w +mul.f r1.w, c5.x, r1.w +mad.f32 r0.y, c6.y, r0.y, c6.z +mul.f r2.x, c5.x, r2.x +mul.f r1.z, c5.x, r1.z +add.f r1.w, r3.y, (neg)r1.w +mov.f32f32 r2.y, r0.y +add.f r2.x, r5.z, (neg)r2.x +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r2.w, r4.w, r2.y +mov.f32f32 r3.x, r2.x +mov.f32f32 r3.y, r0.x +mul.f r1.w, r1.w, r1.z +add.f r1.y, r2.w, r1.y +mad.f32 r1.w, r2.x, r3.x, r1.w +mul.f r2.x, r4.z, r2.y +mad.f32 r1.w, r3.y, r3.y, r1.w +add.f r1.x, r1.y, r1.x +mul.f r1.y, r4.y, r2.y +mul.f r0.y, r4.x, r0.y (rpt2)nop -mul.f r2.x, r2.x, r2.y -mul.f r1.w, r1.w, r2.y -mul.f r0.x, r0.x, r2.y -nop -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r2.y, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r2.y +(ss)mul.f r1.w, r3.x, r2.y +(rpt1)nop +mul.f r1.z, r1.z, r2.z nop -mul.f r0.w, r2.x, r0.w +mad.f32 r0.z, r1.w, r0.z, r1.z nop -mad.f32 r0.w, r1.w, r1.x, r0.w -(rpt2)nop -mov.f32f32 r0.w, r0.w -nop -mad.f32 r0.x, r0.x, r1.y, r0.w -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r0.w, r0.z (rpt2)nop max.f r0.x, r0.x, c6.x (rpt2)nop -mov.f32f32 r0.x, r0.x +mov.f32f32 r0.z, r0.x (rpt2)nop -mov.f32f32 r0.w, r0.x -mov.f32f32 r1.x, r0.x -cmps.f.lt r0.x, c6.x, r0.x +mul.f r0.x, r0.x, r0.z +cmps.f.lt r0.z, c6.x, r0.z (rpt1)nop -mul.f r0.w, r0.w, r1.x -cov.u32f32 r0.x, r0.x +mov.f32f32 r0.w, r0.x +cov.u32f32 r0.z, r0.z (rpt1)nop -mov.f32f32 r0.w, r0.w -cmps.f.ne r0.x, r0.x, c6.x +mul.f r0.x, r0.x, r0.w +cmps.f.ne r0.z, r0.z, c6.x (rpt1)nop +mov.f32f32 r0.w, r0.x +mul.f r0.x, r0.x, c6.w +sel.b32 r1.w, r1.x, r0.z, r2.w +nop mul.f r0.w, r0.w, r0.w -sel.b32 r0.z, r0.z, r0.x, r2.z -(rpt1)nop -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r0.z +add.f r1.x, r2.x, r0.x +add.f r1.z, r1.y, r0.x +add.f r0.x, r0.y, r0.x +mul.f r0.w, r0.w, c6.z +(rpt2)nop +mov.f32f32 r2.y, r0.w +add.f r0.x, r0.x, r0.w (rpt1)nop -mul.f r0.z, r0.w, r0.w -mul.f r0.w, r0.w, c6.w +add.f r0.w, r1.x, r2.y +add.f r1.x, r1.z, r2.y (rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r0.w -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.w, r0.w -mul.f r0.z, r0.z, c6.z -add.f r1.x, r3.w, r1.x -add.f r1.y, r4.x, r1.y -add.f r0.w, r1.z, r0.w -mov.f32f32 r0.z, r0.z -(rpt2)nop -mov.f32f32 r1.z, r0.z -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.z, r0.z -nop -add.f r1.x, r1.x, r1.z -add.f r1.y, r1.y, r2.x -add.f r0.z, r0.w, r0.z -nop -sel.b32 r0.w, r1.x, r0.x, r3.y -sel.b32 r1.x, r1.y, r0.x, r3.x -sel.b32 r0.x, r0.z, r0.x, r0.y -nop -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x +sel.b32 r1.z, r0.w, r0.z, r2.x +sel.b32 r1.y, r1.x, r0.z, r1.y +sel.b32 r1.x, r0.x, r0.z, r0.y end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r0.w (5:19,cm=f,il=12,b=1) r2.x (5:20,cm=f,il=16,b=1) -; FRAG: 254 instructions, 0 half, 5 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.w (5:19,cm=f,il=12,b=1) r2.x (5:20,cm=f,il=16,b=1) +; FRAG: 169 instructions, 0 half, 6 full diff --git a/reference/foo.asm b/reference/foo.asm index 60e36ef..f984014 100644 --- a/reference/foo.asm +++ b/reference/foo.asm @@ -1,24 +1,27 @@ ; options: -; FRAG: new compiler +; FRAG: TGSI compiler @in(r0.x) in0 @in(r0.y) in1 @out(r1.x) out0 @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x +bary.f (ei)r0.w, 1, r0.x mov.f32f32 r1.w, c0.y -nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x -(rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 +(rpt4)nop +sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 (sy)mov.f32f32 r1.z, r0.x mov.f32f32 r1.y, r0.y mov.f32f32 r1.x, r0.z end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) -; FRAG: 17 instructions, 0 half, 2 full +; FRAG: 13 instructions, 0 half, 2 full +; pos (bary): r0.x +; color: r1.x diff --git a/reference/fragProg1/fragProg1-08.asm b/reference/fragProg1/fragProg1-08.asm index 2c30bd8..6c590d1 100644 --- a/reference/fragProg1/fragProg1-08.asm +++ b/reference/fragProg1/fragProg1-08.asm @@ -12,6 +12,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c5.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c4.w mov.f32f32 r0.y, c4.z mov.f32f32 r0.z, c4.y diff --git a/reference/fragProg1/fragProg1-09.asm b/reference/fragProg1/fragProg1-09.asm index 505da11..2e6f7f9 100644 --- a/reference/fragProg1/fragProg1-09.asm +++ b/reference/fragProg1/fragProg1-09.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-10.asm b/reference/fragProg1/fragProg1-10.asm index 2c95a4d..a2f25c9 100644 --- a/reference/fragProg1/fragProg1-10.asm +++ b/reference/fragProg1/fragProg1-10.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x3e800000, 0x00000000, 0x3f000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-11.asm b/reference/fragProg1/fragProg1-11.asm index 2c95a4d..3683d38 100644 --- a/reference/fragProg1/fragProg1-11.asm +++ b/reference/fragProg1/fragProg1-11.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0xbe800000, 0xbe4ccccd, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-12.asm b/reference/fragProg1/fragProg1-12.asm index c6c895a..620d464 100644 --- a/reference/fragProg1/fragProg1-12.asm +++ b/reference/fragProg1/fragProg1-12.asm @@ -4,31 +4,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x40000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.z mov.f32f32 r0.y, c0.y mov.f32f32 r0.z, c0.x -mov.f32f32 r0.w, c0.w +mov.f32f32 r1.w, c0.w mul.f r0.x, r0.x, c1.x mul.f r0.y, r0.y, c1.x mul.f r0.z, r0.z, c1.x -mov.f32f32 r1.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z nop -add.f r0.x, r0.x, (neg)c1.y -add.f r0.y, r0.y, (neg)c1.y -add.f r0.z, r0.z, (neg)c1.y -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r1.z, r0.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z +add.f r1.z, r0.x, (neg)c1.y +add.f r1.y, r0.y, (neg)c1.y +add.f r1.x, r0.z, (neg)c1.y end ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 24 instructions, 0 half, 2 full +; FRAG: 12 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-13.asm b/reference/fragProg1/fragProg1-13.asm index 568337d..a2bf346 100644 --- a/reference/fragProg1/fragProg1-13.asm +++ b/reference/fragProg1/fragProg1-13.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c3.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-14.asm b/reference/fragProg1/fragProg1-14.asm index 1ee49dc..6b8d0ba 100644 --- a/reference/fragProg1/fragProg1-14.asm +++ b/reference/fragProg1/fragProg1-14.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x40490ff9, 0x3f000000, 0x3f800000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-15.asm b/reference/fragProg1/fragProg1-15.asm index 1ee49dc..88369af 100644 --- a/reference/fragProg1/fragProg1-15.asm +++ b/reference/fragProg1/fragProg1-15.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x40d90ff9, 0x40e90ff9, 0x40c90ff9, 0xc0b90ff9 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-16.asm b/reference/fragProg1/fragProg1-16.asm index d3ba6ce..9bf4264 100644 --- a/reference/fragProg1/fragProg1-16.asm +++ b/reference/fragProg1/fragProg1-16.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f r0.w, 1, r0.x bary.f (ei)r0.x, 2, r0.x @@ -14,15 +15,15 @@ mul.f r0.y, c0.x, r0.z nop mad.f32 r0.y, c0.y, r0.w, r0.y (rpt2)nop -mov.f32f32 r0.y, r0.y -nop -mad.f32 r1.w, c0.z, r0.x, r0.y -mad.f32 r1.z, c0.z, r0.x, r0.y -mad.f32 r1.y, c0.z, r0.x, r0.y +mov.f32f32 r0.z, r0.y mad.f32 r1.x, c0.z, r0.x, r0.y +mad.f32 r1.w, c0.z, r0.x, r0.z +mad.f32 r1.z, c0.z, r0.x, r0.z +mad.f32 r1.y, c0.z, r0.x, r0.z end nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) -; FRAG: 17 instructions, 0 half, 2 full +; FRAG: 16 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-17.asm b/reference/fragProg1/fragProg1-17.asm index c07ebd3..ee3ea36 100644 --- a/reference/fragProg1/fragProg1-17.asm +++ b/reference/fragProg1/fragProg1-17.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.x mov.f32f32 r0.y, c0.y mov.f32f32 r0.z, c0.z @@ -11,8 +13,6 @@ nop mul.f r0.x, r0.x, r0.x nop mad.f32 r0.x, r0.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x nop mad.f32 r0.x, r0.z, r0.z, r0.x (rpt2)nop @@ -22,9 +22,7 @@ mul.f r1.y, r0.x, c1.x mul.f r1.x, r0.x, c1.x end nop -nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 21 instructions, 0 half, 2 full +; FRAG: 17 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-18.asm b/reference/fragProg1/fragProg1-18.asm index 47b9fb9..23a8db4 100644 --- a/reference/fragProg1/fragProg1-18.asm +++ b/reference/fragProg1/fragProg1-18.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f r0.w, 1, r0.x bary.f r1.x, 2, r0.x @@ -13,20 +14,16 @@ bary.f (ei)r0.x, 3, r0.x mul.f r0.y, c0.x, r0.z nop mad.f32 r0.y, c0.y, r0.w, r0.y -(rpt2)nop -mov.f32f32 r0.y, r0.y nop mad.f32 r0.y, c0.z, r1.x, r0.y (rpt2)nop -mov.f32f32 r0.y, r0.y -nop -mad.f32 r1.w, c0.w, r0.x, r0.y -mad.f32 r1.z, c0.w, r0.x, r0.y -mad.f32 r1.y, c0.w, r0.x, r0.y +mov.f32f32 r0.z, r0.y mad.f32 r1.x, c0.w, r0.x, r0.y +mad.f32 r1.w, c0.w, r0.x, r0.z +mad.f32 r1.z, c0.w, r0.x, r0.z +mad.f32 r1.y, c0.w, r0.x, r0.z end -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) -; FRAG: 23 instructions, 0 half, 2 full +; FRAG: 18 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-19.asm b/reference/fragProg1/fragProg1-19.asm index 4939cbc..9e18be0 100644 --- a/reference/fragProg1/fragProg1-19.asm +++ b/reference/fragProg1/fragProg1-19.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x3dcccccd, 0x00000000, 0x00000000, 0x00000000 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f r0.w, 1, r0.x bary.f r1.x, 2, r0.x @@ -13,24 +15,24 @@ bary.f (ei)r0.x, 3, r0.x mul.f r0.y, c0.x, r0.z nop mad.f32 r0.y, c0.y, r0.w, r0.y -(rpt2)nop -mov.f32f32 r0.y, r0.y nop mad.f32 r0.y, c0.z, r1.x, r0.y (rpt2)nop -mov.f32f32 r0.y, r0.y -(rpt2)nop -add.f r0.z, r0.x, r0.y -add.f r0.w, r0.x, r0.y -add.f r1.x, r0.x, r0.y -add.f r0.x, r0.x, r0.y -mul.f r1.w, r0.z, c1.x -mul.f r1.z, r0.w, c1.x -mul.f r1.y, r1.x, c1.x -mul.f r1.x, r0.x, c1.x +mov.f32f32 r0.z, r0.y +add.f r0.y, r0.x, r0.y +(rpt1)nop +add.f r0.w, r0.x, r0.z +add.f r1.x, r0.x, r0.z +add.f r0.x, r0.x, r0.z +nop +mul.f r1.w, r0.w, c1.x +mul.f r1.z, r1.x, c1.x +mul.f r1.y, r0.x, c1.x +mul.f r1.x, r0.y, c1.x end nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) -; FRAG: 29 instructions, 0 half, 2 full +; FRAG: 25 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-20.asm b/reference/fragProg1/fragProg1-20.asm index 1c607f5..22b4ba3 100644 --- a/reference/fragProg1/fragProg1-20.asm +++ b/reference/fragProg1/fragProg1-20.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x411e6666, 0x3e23d70a, 0x40200000, 0x00000000 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.y mov.f32f32 r1.w, c0.z mov.f32f32 r1.z, c0.y diff --git a/reference/fragProg1/fragProg1-21.asm b/reference/fragProg1/fragProg1-21.asm index bddc018..f1b7f67 100644 --- a/reference/fragProg1/fragProg1-21.asm +++ b/reference/fragProg1/fragProg1-21.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x3f800000, 0x40800000, 0xc0000000 +@const(c1.x) 0x3c23d70a, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-22.asm b/reference/fragProg1/fragProg1-22.asm index 07fde5f..3dca99e 100644 --- a/reference/fragProg1/fragProg1-22.asm +++ b/reference/fragProg1/fragProg1-22.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x4099999a, 0x3e99999a, 0xbe4ccccd, 0x3f99999a +@const(c1.x) 0x3dcccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)floor.f r0.x, c0.w floor.f r0.y, c0.z floor.f r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-23.asm b/reference/fragProg1/fragProg1-23.asm index df9ee98..6b41ab8 100644 --- a/reference/fragProg1/fragProg1-23.asm +++ b/reference/fragProg1/fragProg1-23.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0xbf8ccccd, 0x3dcccccd, 0xc00ccccd, 0x4019999a +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)floor.f r0.x, c0.w floor.f r0.y, c0.z floor.f r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-24.asm b/reference/fragProg1/fragProg1-24.asm index 5b6302b..f37f50f 100644 --- a/reference/fragProg1/fragProg1-24.asm +++ b/reference/fragProg1/fragProg1-24.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x42800000, 0x3f800000, 0x41f00000, 0x40800000 +@const(c1.x) 0x3dcccccd, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-25.asm b/reference/fragProg1/fragProg1-25.asm index 8235a7f..82ed199 100644 --- a/reference/fragProg1/fragProg1-25.asm +++ b/reference/fragProg1/fragProg1-25.asm @@ -4,35 +4,25 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f266666, 0x3f666666, 0x00000000, 0x41000000 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.y -mov.f32f32 r0.y, c0.x -absneg.f r0.z, (neg)c0.x +absneg.f r0.y, (neg)c0.x +mov.f32f32 r0.z, c0.x mov.f32f32 r1.w, c1.y max.f r0.x, r0.x, c1.x -max.f r0.y, r0.y, c1.x -cmps.f.lt r0.z, r0.z, c0.z -min.f r0.w, c0.w, c1.z +cmps.f.lt r0.y, r0.y, c0.z +max.f r1.y, r0.z, c1.x +min.f r0.z, c0.w, c1.z mov.f32f32 r1.x, c1.y (rpt1)nop log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r0.y -(rpt1)nop -mul.f r0.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.x, r0.z, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -sel.b32 r0.x, r0.x, r0.z, c1.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.z, r0.x +(ss)sel.b32 r1.z, r0.x, r0.y, c1.x end -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 42 instructions, 0 half, 2 full +; FRAG: 22 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-26.asm b/reference/fragProg1/fragProg1-26.asm index 5eacda2..66a7344 100644 --- a/reference/fragProg1/fragProg1-26.asm +++ b/reference/fragProg1/fragProg1-26.asm @@ -4,35 +4,25 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f266666, 0x00000000, 0x00000000, 0x00000000 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.y -mov.f32f32 r0.y, c0.x -absneg.f r0.z, (neg)c0.x +absneg.f r0.y, (neg)c0.x +mov.f32f32 r0.z, c0.x mov.f32f32 r1.w, c1.y max.f r0.x, r0.x, c1.x -max.f r0.y, r0.y, c1.x -cmps.f.lt r0.z, r0.z, c0.y -min.f r0.w, c0.y, c1.z +cmps.f.lt r0.y, r0.y, c0.y +max.f r1.y, r0.z, c1.x +min.f r0.z, c0.y, c1.z mov.f32f32 r1.x, c1.y (rpt1)nop log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r0.y -(rpt1)nop -mul.f r0.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.x, r0.z, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -sel.b32 r0.x, r0.x, r0.z, c1.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.z, r0.x +(ss)sel.b32 r1.z, r0.x, r0.y, c1.x end -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 42 instructions, 0 half, 2 full +; FRAG: 22 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-27.asm b/reference/fragProg1/fragProg1-27.asm index 5eacda2..a009294 100644 --- a/reference/fragProg1/fragProg1-27.asm +++ b/reference/fragProg1/fragProg1-27.asm @@ -4,35 +4,25 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0xbf000000, 0x00000000, 0x00000000, 0x00000000 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.y -mov.f32f32 r0.y, c0.x -absneg.f r0.z, (neg)c0.x +absneg.f r0.y, (neg)c0.x +mov.f32f32 r0.z, c0.x mov.f32f32 r1.w, c1.y max.f r0.x, r0.x, c1.x -max.f r0.y, r0.y, c1.x -cmps.f.lt r0.z, r0.z, c0.y -min.f r0.w, c0.y, c1.z +cmps.f.lt r0.y, r0.y, c0.y +max.f r1.y, r0.z, c1.x +min.f r0.z, c0.y, c1.z mov.f32f32 r1.x, c1.y (rpt1)nop log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r0.y -(rpt1)nop -mul.f r0.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.x, r0.z, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -sel.b32 r0.x, r0.x, r0.z, c1.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.z, r0.x +(ss)sel.b32 r1.z, r0.x, r0.y, c1.x end -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 42 instructions, 0 half, 2 full +; FRAG: 22 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-28.asm b/reference/fragProg1/fragProg1-28.asm index bd4ce12..87d7d7b 100644 --- a/reference/fragProg1/fragProg1-28.asm +++ b/reference/fragProg1/fragProg1-28.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x3e4ccccd, 0x3f000000, 0x3f800000, 0x00000000 +@const(c3.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.z, c3.y mov.f32f32 r0.w, c3.y mov.f32f32 r1.x, c3.y diff --git a/reference/fragProg1/fragProg1-35.asm b/reference/fragProg1/fragProg1-35.asm index 09de760..8c6badb 100644 --- a/reference/fragProg1/fragProg1-35.asm +++ b/reference/fragProg1/fragProg1-35.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f000000, 0x40000000, 0x40400000, 0x40800000 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.x mov.f32f32 r0.z, c0.x @@ -14,18 +16,14 @@ log2 r0.x, r0.x log2 r0.y, r0.y (ss)mul.f r0.y, c0.w, r0.y log2 r0.z, r0.z +(rpt2)nop (ss)mul.f r0.z, c0.z, r0.z log2 r0.w, r0.w (ss)mul.f r0.w, c0.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -(rpt2)nop exp2 r1.w, r0.x nop exp2 r1.z, r0.y -nop +(rpt3)nop exp2 r1.y, r0.z nop exp2 r1.x, r0.w @@ -34,5 +32,5 @@ nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 30 instructions, 0 half, 2 full +; FRAG: 29 instructions, 0 half, 2 full diff --git a/reference/fragProg1/fragProg1-36.asm b/reference/fragProg1/fragProg1-36.asm index 6a0d9e3..c5a3b7d 100644 --- a/reference/fragProg1/fragProg1-36.asm +++ b/reference/fragProg1/fragProg1-36.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x41000000, 0xc1200000, 0x3f800000, 0x41400000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-37.asm b/reference/fragProg1/fragProg1-37.asm index 69c0cfe..53c389c 100644 --- a/reference/fragProg1/fragProg1-37.asm +++ b/reference/fragProg1/fragProg1-37.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x41000000, 0xc1200000, 0x3f800000, 0x41400000 (sy)(ss)mov.f32f32 r0.x, c0.x (rpt5)nop rcp r1.w, r0.x diff --git a/reference/fragProg1/fragProg1-38.asm b/reference/fragProg1/fragProg1-38.asm index 74debfe..b32fb5d 100644 --- a/reference/fragProg1/fragProg1-38.asm +++ b/reference/fragProg1/fragProg1-38.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f800000, 0x40800000, 0x41100000, 0x42c80000 (sy)(ss)absneg.f r0.x, (abs)c0.w absneg.f r0.y, (abs)c0.z absneg.f r0.z, (abs)c0.y diff --git a/reference/fragProg1/fragProg1-39.asm b/reference/fragProg1/fragProg1-39.asm index 74debfe..970892b 100644 --- a/reference/fragProg1/fragProg1-39.asm +++ b/reference/fragProg1/fragProg1-39.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0xc2c80000, 0xc0a00000, 0xbf800000 (sy)(ss)absneg.f r0.x, (abs)c0.w absneg.f r0.y, (abs)c0.z absneg.f r0.z, (abs)c0.y diff --git a/reference/fragProg1/fragProg1-40.asm b/reference/fragProg1/fragProg1-40.asm index 3d9646b..c1fcc94 100644 --- a/reference/fragProg1/fragProg1-40.asm +++ b/reference/fragProg1/fragProg1-40.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 +@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.x mov.f32f32 r0.y, c0.x mov.f32f32 r1.w, (0.000000) diff --git a/reference/fragProg1/fragProg1-42.asm b/reference/fragProg1/fragProg1-42.asm index c611cf9..cc6bad8 100644 --- a/reference/fragProg1/fragProg1-42.asm +++ b/reference/fragProg1/fragProg1-42.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3fc90ff9, 0xbfc90ff9, 0x3f000000, 0x3f800000 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-43.asm b/reference/fragProg1/fragProg1-43.asm index c611cf9..119976b 100644 --- a/reference/fragProg1/fragProg1-43.asm +++ b/reference/fragProg1/fragProg1-43.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x40490ff9, 0xc0490ff9, 0x40d90ff9, 0xc0b90ff9 (sy)(ss)mov.f32f32 r0.x, c0.w mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-46.asm b/reference/fragProg1/fragProg1-46.asm index b4e00f3..d3a01c4 100644 --- a/reference/fragProg1/fragProg1-46.asm +++ b/reference/fragProg1/fragProg1-46.asm @@ -6,6 +6,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x3dcccccd, 0x00000000, 0x00000000, 0x00000000 +@const(c2.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 3, r0.x bary.f r0.w, 2, r0.x bary.f r1.x, 1, r0.x diff --git a/reference/fragProg1/fragProg1-47.asm b/reference/fragProg1/fragProg1-47.asm index ceaaf94..5b982b1 100644 --- a/reference/fragProg1/fragProg1-47.asm +++ b/reference/fragProg1/fragProg1-47.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x3f800000, 0xbf800000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c1.x mov.f32f32 r0.y, c1.y mov.f32f32 r0.z, c1.z diff --git a/reference/fragProg1/fragProg1-50.asm b/reference/fragProg1/fragProg1-50.asm index a547737..a0d3e20 100644 --- a/reference/fragProg1/fragProg1-50.asm +++ b/reference/fragProg1/fragProg1-50.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.x mov.f32f32 r0.y, c0.z mov.f32f32 r0.z, c0.y diff --git a/reference/fragProg1/fragProg1-54.asm b/reference/fragProg1/fragProg1-54.asm index f6fe50e..f739db5 100644 --- a/reference/fragProg1/fragProg1-54.asm +++ b/reference/fragProg1/fragProg1-54.asm @@ -6,47 +6,44 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 3, r0.x bary.f r1.x, 2, r0.x bary.f r1.y, 1, r0.x mad.f32 r0.z, c0.x, r0.z, c0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -max.f r0.z, r0.z, c2.x max.f r0.w, r0.w, c2.x max.f r1.x, r1.x, c2.x max.f r1.y, r1.y, c2.x +max.f r0.z, r0.z, c2.x +min.f r1.w, r0.w, c2.y +min.f r0.w, r1.x, c2.y +min.f r1.x, r1.y, c2.y min.f r0.z, r0.z, c2.y -min.f r0.w, r0.w, c2.y -min.f r1.x, r1.x, c2.y -min.f r1.y, r1.y, c2.y -add.f r1.z, c2.y, (neg)r0.z -add.f r2.x, c2.y, (neg)r0.z -add.f r2.y, c2.y, (neg)r0.z -mov.f32f32 r1.w, r0.w -mul.f r0.w, r1.z, c1.z -mul.f r1.x, r0.z, r1.x -mul.f r2.x, r2.x, c1.y -mul.f r2.y, r2.y, c1.x -nop -add.f r1.z, r1.x, r0.w -mul.f r0.w, r0.z, r1.y bary.f (ei)r0.x, 0, r0.x (rpt1)nop -add.f r1.y, r0.w, r2.x -mov.f32f32 r0.x, r0.x -(rpt2)nop +add.f r0.y, c2.y, (neg)r0.z +add.f r1.y, c2.y, (neg)r0.z +add.f r1.z, c2.y, (neg)r0.z +mul.f r0.w, r0.z, r0.w +mul.f r0.y, r0.y, c1.z +mul.f r1.y, r1.y, c1.y +mul.f r2.x, r1.z, c1.x +nop +add.f r1.z, r0.w, r0.y +mul.f r0.y, r0.z, r1.x max.f r0.x, r0.x, c2.x -(rpt2)nop +(rpt1)nop +add.f r1.y, r0.y, r1.y min.f r0.x, r0.x, c2.y (rpt2)nop mul.f r0.x, r0.z, r0.x (rpt2)nop -add.f r1.x, r0.x, r2.y +add.f r1.x, r0.x, r2.x end +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) -; FRAG: 49 instructions, 0 half, 3 full +; FRAG: 40 instructions, 0 half, 3 full diff --git a/reference/fragProg1/fragProg1-55.asm b/reference/fragProg1/fragProg1-55.asm index e06354b..fd73990 100644 --- a/reference/fragProg1/fragProg1-55.asm +++ b/reference/fragProg1/fragProg1-55.asm @@ -16,6 +16,7 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 +@const(c6.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c4.w mov.f32f32 r0.y, c4.z mov.f32f32 r0.z, c4.y diff --git a/reference/fragProg1/fragProg1-56.asm b/reference/fragProg1/fragProg1-56.asm index d82d6b8..dbb2e48 100644 --- a/reference/fragProg1/fragProg1-56.asm +++ b/reference/fragProg1/fragProg1-56.asm @@ -6,36 +6,37 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 3, r0.x -bary.f r1.x, 2, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.w, 3, r0.x +bary.f r0.w, 2, r0.x +bary.f r1.x, 1, r0.x +add.f r1.y, c0.z, (neg)r0.z add.f r1.z, c0.z, (neg)r0.z -add.f r2.x, c0.z, (neg)r0.z add.f r0.z, c0.z, (neg)r0.z -mov.f32f32 r1.w, r0.w -mul.f r0.w, r1.z, c0.w -mul.f r1.z, r2.x, c0.w -mul.f r0.z, r0.z, c0.w bary.f (ei)r0.x, 0, r0.x -max.f r0.y, r0.w, c2.x -max.f r0.w, r1.z, c2.x +mul.f r0.y, r1.y, c0.w +mul.f r1.y, r1.z, c0.w +mul.f r0.z, r0.z, c0.w +nop +max.f r0.y, r0.y, c2.x +max.f r1.y, r1.y, c2.x max.f r0.z, r0.z, c2.x nop min.f r0.y, r0.y, c2.y -min.f r0.w, r0.w, c2.y +min.f r1.y, r1.y, c2.y min.f r0.z, r0.z, c2.y nop add.f r1.z, c2.y, (neg)r0.y -add.f r2.x, c2.y, (neg)r0.w +add.f r2.x, c2.y, (neg)r1.y add.f r2.y, c2.y, (neg)r0.z -mul.f r0.y, r0.y, r1.x -mul.f r1.x, r1.z, c1.z +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r1.z, c1.z mul.f r2.x, r2.x, c1.y mul.f r2.y, r2.y, c1.x nop -add.f r1.z, r0.y, r1.x -mul.f r0.y, r0.w, r1.y +add.f r1.z, r0.y, r0.w +mul.f r0.y, r1.y, r1.x mul.f r0.x, r0.z, r0.x (rpt1)nop add.f r1.y, r0.y, r2.x @@ -43,6 +44,6 @@ add.f r1.x, r0.x, r2.y end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) +; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) ; FRAG: 36 instructions, 0 half, 3 full diff --git a/reference/fragProg1/fragProg1-57.asm b/reference/fragProg1/fragProg1-57.asm index 75a9b0c..de526fe 100644 --- a/reference/fragProg1/fragProg1-57.asm +++ b/reference/fragProg1/fragProg1-57.asm @@ -6,51 +6,44 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 3, r0.x bary.f r1.x, 2, r0.x bary.f r1.y, 1, r0.x mul.f r0.z, c0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y +max.f r0.w, r0.w, c2.x +max.f r1.x, r1.x, c2.x +max.f r1.y, r1.y, c2.x bary.f (ei)r0.x, 0, r0.x -max.f r0.y, r0.w, c2.x -max.f r0.w, r1.x, c2.x -exp2 r0.z, (neg)r0.z -(ss)mov.f32f32 r0.z, r0.z -max.f r1.x, r1.y, c2.x -mov.f32f32 r0.x, r0.x -nop -max.f r0.z, r0.z, c2.x -min.f r0.y, r0.y, c2.y (rpt1)nop -min.f r0.z, r0.z, c2.y -min.f r0.w, r0.w, c2.y -min.f r1.x, r1.x, c2.y +exp2 r0.y, (neg)r0.z +(ss)max.f r0.y, r0.y, c2.x +min.f r1.w, r0.w, c2.y +(ss)min.f r0.z, r1.x, c2.y +min.f r0.w, r1.y, c2.y +min.f r0.y, r0.y, c2.y max.f r0.x, r0.x, c2.x -add.f r1.y, c2.y, (neg)r0.z -add.f r1.z, c2.y, (neg)r0.z -add.f r1.w, c2.y, (neg)r0.z -mul.f r0.w, r0.z, r0.w -mul.f r1.y, r1.y, c1.z -mul.f r2.x, r1.z, c1.y -mul.f r2.y, r1.w, c1.x +(rpt1)nop +add.f r1.x, c2.y, (neg)r0.y +add.f r1.y, c2.y, (neg)r0.y +add.f r1.z, c2.y, (neg)r0.y +mul.f r0.z, r0.y, r0.z +mul.f r1.x, r1.x, c1.z +mul.f r1.y, r1.y, c1.y +mul.f r2.x, r1.z, c1.x nop -add.f r1.z, r0.w, r1.y -mul.f r0.w, r0.z, r1.x +add.f r1.z, r0.z, r1.x +mul.f r0.z, r0.y, r0.w min.f r0.x, r0.x, c2.y -mov.f32f32 r1.w, r0.y -nop -add.f r1.y, r0.w, r2.x -mul.f r0.x, r0.z, r0.x +(rpt1)nop +add.f r1.y, r0.z, r1.y +mul.f r0.x, r0.y, r0.x (rpt2)nop -add.f r1.x, r0.x, r2.y +add.f r1.x, r0.x, r2.x end nop -nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) -; FRAG: 44 instructions, 0 half, 3 full +; FRAG: 40 instructions, 0 half, 3 full diff --git a/reference/fragProg1/fragProg1-58.asm b/reference/fragProg1/fragProg1-58.asm index d7dd979..af3f774 100644 --- a/reference/fragProg1/fragProg1-58.asm +++ b/reference/fragProg1/fragProg1-58.asm @@ -6,43 +6,44 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 3, r0.x -bary.f r1.x, 2, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.w, 3, r0.x +bary.f r0.w, 2, r0.x +bary.f r1.x, 1, r0.x mul.f r0.z, c0.x, r0.z -mov.f32f32 r1.w, r0.w bary.f (ei)r0.x, 0, r0.x -(rpt3)nop +(rpt4)nop exp2 r0.y, (neg)r0.z (ss)max.f r0.y, r0.y, c2.x -exp2 r0.w, (neg)r0.z +exp2 r1.y, (neg)r0.z nop (ss)exp2 r0.z, (neg)r0.z -(ss)max.f r0.w, r0.w, c2.x +(ss)max.f r1.y, r1.y, c2.x (ss)max.f r0.z, r0.z, c2.x min.f r0.y, r0.y, c2.y (rpt2)nop add.f r1.z, c2.y, (neg)r0.y -min.f r0.w, r0.w, c2.y +min.f r1.y, r1.y, c2.y min.f r0.z, r0.z, c2.y -mul.f r0.y, r0.y, r1.x -mul.f r1.x, r1.z, c1.z -add.f r2.x, c2.y, (neg)r0.w +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r1.z, c1.z +add.f r2.x, c2.y, (neg)r1.y add.f r2.y, c2.y, (neg)r0.z -mul.f r0.w, r0.w, r1.y -add.f r1.z, r0.y, r1.x +mul.f r1.x, r1.y, r1.x +add.f r1.z, r0.y, r0.w mul.f r0.y, r2.x, c1.y -mul.f r1.x, r2.y, c1.x +mul.f r0.w, r2.y, c1.x (rpt1)nop -add.f r1.y, r0.w, r0.y +add.f r1.y, r1.x, r0.y mul.f r0.x, r0.z, r0.x (rpt2)nop -add.f r1.x, r0.x, r1.x +add.f r1.x, r0.x, r0.w end nop nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) +; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) ; FRAG: 42 instructions, 0 half, 3 full diff --git a/reference/fragProg1/fragProg1-59.asm b/reference/fragProg1/fragProg1-59.asm index bf15c36..3774aca 100644 --- a/reference/fragProg1/fragProg1-59.asm +++ b/reference/fragProg1/fragProg1-59.asm @@ -6,33 +6,29 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 3, r0.x bary.f r1.x, 2, r0.x bary.f r1.y, 1, r0.x mul.f r0.z, c0.w, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mul.f r0.z, r0.z, r0.z max.f r0.w, r0.w, c2.x max.f r1.x, r1.x, c2.x max.f r1.y, r1.y, c2.x -mov.f32f32 r0.z, r0.z -min.f r0.w, r0.w, c2.y -(rpt4)nop -exp2 r0.z, (neg)r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r1.w, r0.w +mul.f r0.z, r0.z, r0.z +min.f r1.w, r0.w, c2.y min.f r0.w, r1.x, c2.y min.f r1.x, r1.y, c2.y -max.f r0.z, r0.z, c2.x bary.f (ei)r0.x, 0, r0.x (rpt1)nop -min.f r0.y, r0.z, c2.y -mov.f32f32 r0.x, r0.x +exp2 r0.y, (neg)r0.z +(ss)max.f r0.y, r0.y, c2.x +max.f r0.x, r0.x, c2.x +(rpt1)nop +min.f r0.y, r0.y, c2.y +min.f r0.x, r0.x, c2.y (rpt1)nop -add.f r0.z, c2.y, (neg)r0.y +(ss)add.f r0.z, c2.y, (neg)r0.y add.f r1.y, c2.y, (neg)r0.y add.f r1.z, c2.y, (neg)r0.y mul.f r0.w, r0.y, r0.w @@ -42,19 +38,12 @@ mul.f r2.x, r1.z, c1.x nop add.f r1.z, r0.w, r0.z mul.f r0.z, r0.y, r1.x -max.f r0.x, r0.x, c2.x +mul.f r0.x, r0.y, r0.x (rpt1)nop add.f r1.y, r0.z, r1.y -min.f r0.x, r0.x, c2.y -(rpt2)nop -mul.f r0.x, r0.y, r0.x -(rpt2)nop add.f r1.x, r0.x, r2.x end -nop -nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) -; FRAG: 56 instructions, 0 half, 3 full +; FRAG: 40 instructions, 0 half, 3 full diff --git a/reference/fragProg1/fragProg1-60.asm b/reference/fragProg1/fragProg1-60.asm index da7a509..46a5a61 100644 --- a/reference/fragProg1/fragProg1-60.asm +++ b/reference/fragProg1/fragProg1-60.asm @@ -6,47 +6,48 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 3, r0.x -bary.f r1.x, 2, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.w, 3, r0.x +bary.f r0.w, 2, r0.x +bary.f r1.x, 1, r0.x mul.f r0.z, c0.x, r0.z -mov.f32f32 r1.w, r0.w bary.f (ei)r0.x, 0, r0.x -nop +(rpt1)nop mul.f r0.y, r0.z, r0.z (rpt2)nop -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.z, r0.y (rpt5)nop -exp2 r0.z, (neg)r0.y +exp2 r1.y, (neg)r0.z +(ss)max.f r1.y, r1.y, c2.x +(ss)exp2 r0.z, (neg)r0.z (ss)max.f r0.z, r0.z, c2.x -exp2 r0.w, (neg)r0.y -nop -(ss)exp2 r0.y, (neg)r0.y -(ss)max.f r0.w, r0.w, c2.x +exp2 r0.y, (neg)r0.y (ss)max.f r0.y, r0.y, c2.x +nop +min.f r1.y, r1.y, c2.y min.f r0.z, r0.z, c2.y -(rpt2)nop -add.f r1.z, c2.y, (neg)r0.z -min.f r0.w, r0.w, c2.y min.f r0.y, r0.y, c2.y -mul.f r0.z, r0.z, r1.x -mul.f r1.x, r1.z, c1.z -add.f r2.x, c2.y, (neg)r0.w +nop +add.f r1.z, c2.y, (neg)r1.y +add.f r2.x, c2.y, (neg)r0.z add.f r2.y, c2.y, (neg)r0.y -mul.f r0.w, r0.w, r1.y -add.f r1.z, r0.z, r1.x -mul.f r0.z, r2.x, c1.y -mul.f r1.x, r2.y, c1.x +mul.f r0.w, r1.y, r0.w +mul.f r1.y, r1.z, c1.z +mul.f r2.x, r2.x, c1.y +(rpt1)nop +add.f r1.z, r0.w, r1.y +mul.f r0.z, r0.z, r1.x +mul.f r0.w, r2.y, c1.x (rpt1)nop -add.f r1.y, r0.w, r0.z +add.f r1.y, r0.z, r2.x mul.f r0.x, r0.y, r0.x (rpt2)nop -add.f r1.x, r0.x, r1.x +add.f r1.x, r0.x, r0.w end nop nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) +; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) ; FRAG: 50 instructions, 0 half, 3 full diff --git a/reference/glmark1.asm b/reference/glmark1.asm index 88f3652..2e188d9 100644 --- a/reference/glmark1.asm +++ b/reference/glmark1.asm @@ -6,13 +6,14 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c2.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c3.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, c2.x +mov.f32f32 r1.w, c2.x (rpt1)nop mul.f r0.x, r0.z, r0.x -mov.f32f32 r1.w, r0.y -(rpt1)nop +(rpt2)nop add.f r0.y, c3.y, (neg)r0.x add.f r0.z, c3.y, (neg)r0.x add.f r0.w, c3.y, (neg)r0.x @@ -20,21 +21,18 @@ mul.f r1.x, r0.x, c1.z mul.f r0.y, r0.y, c0.z mul.f r0.z, r0.z, c0.y mul.f r0.w, r0.w, c0.x -mul.f r1.y, r0.x, c1.y -add.f r0.y, r1.x, r0.y +nop +add.f r1.z, r1.x, r0.y +mul.f r0.y, r0.x, c1.y mul.f r0.x, r0.x, c1.x (rpt1)nop -mov.f32f32 r1.z, r0.y -add.f r0.y, r1.y, r0.z -add.f r0.x, r0.x, r0.w -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +add.f r1.y, r0.y, r0.z +add.f r1.x, r0.x, r0.w end nop nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 29 instructions, 0 half, 2 full +; FRAG: 25 instructions, 0 half, 2 full diff --git a/reference/glmark2.asm b/reference/glmark2.asm index b3885a6..6f1a27b 100644 --- a/reference/glmark2.asm +++ b/reference/glmark2.asm @@ -6,83 +6,50 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 1, r0.x -bary.f r1.x, 12, r0.x +@const(c3.x) 0x3d4ccccd, 0x3d2acd9f, 0x3ccccccd, 0x3caa64c3 +@const(c4.x) 0x40000000, 0xbe99999a, 0x00000000, 0x00000000 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)bary.f r0.z, 12, r0.x +bary.f r0.w, 2, r0.x +bary.f r1.x, 1, r0.x bary.f r1.y, 0, r0.x bary.f r1.z, 4, r0.x -add.f r0.z, r0.z, (neg)r0.w -bary.f r0.w, 8, r0.x -bary.f r1.w, 7, r0.x -mov.f32f32 r1.z, r1.z -mul.f r0.z, r0.z, c3.w -log2 r1.x, r1.x -(ss)mul.f r1.x, c4.x, r1.x -mul.f r1.y, r1.y, c3.y -mov.f32f32 r2.x, c3.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, c3.z -mov.f32f32 r1.x, r1.x -mad.f32 r1.y, c2.x, r2.x, r1.y -mov.f32f32 r2.z, r1.z -mad.f32 r0.z, c2.x, r2.y, r0.z -bary.f r1.z, 5, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.z, r0.z -exp2 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y +bary.f r1.w, 5, r0.x bary.f r2.x, 11, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.y, r1.y -add.f r0.w, r2.x, r0.w -mov.f32f32 r0.z, r0.z -bary.f r1.z, 10, r0.x -bary.f r2.x, 6, r0.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.z, r0.z -sam (f32)(xyzw)r2.y, r2.z, s#0, t#0 -(sy)(ss)add.f r1.x, r3.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.x, r1.y -mov.f32f32 r3.y, r0.z -mov.f32f32 r0.z, r1.x -add.f r1.x, r1.z, r1.w -mov.f32f32 r1.y, r2.x -bary.f (ei)r0.x, 9, r0.x -(rpt1)nop -sam (f32)(xyz)r3.x, r3.x, s#1, t#1 -(sy)add.f r0.y, r0.w, r3.z -mov.f32f32 r0.w, r1.x -add.f r0.x, r0.x, r1.y -add.f r0.z, r0.z, c4.y -mov.f32f32 r0.y, r0.y -add.f r0.w, r0.w, r3.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mul.f r0.y, r0.y, r2.w -mov.f32f32 r0.w, r0.w -add.f r0.x, r0.x, r3.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.y -mov.f32f32 r0.y, r0.w -mul.f r0.x, r0.x, r2.y -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r0.x, r0.x +log2 r0.z, r0.z +(ss)mul.f r0.z, c4.x, r0.z +add.f r0.w, r0.w, (neg)r1.x +mul.f r1.x, r1.y, c3.y +bary.f r1.y, 8, r0.x +bary.f r2.y, 10, r0.x +sam (f32)(xyzw)r2.z, r1.z, s#0, t#0 +mul.f r0.w, r0.w, c3.w +(ss)mov.f32f32 r1.z, c3.z +exp2 r0.z, r0.z +(sy)(ss)add.f r0.z, r3.y, r0.z +mov.f32f32 r3.y, c3.x +add.f r1.y, r2.x, r1.y +mad.f32 r3.w, c2.x, r1.z, r0.w +add.f r1.w, r0.z, c4.y +mad.f32 r3.z, c2.x, r3.y, r1.x +bary.f r0.z, 7, r0.x +bary.f r0.w, 9, r0.x +bary.f (ei)r0.x, 6, r0.x (rpt2)nop -mov.f32f32 r1.x, r0.x -end -nop +sam (f32)(xyz)r3.y, r3.z, s#1, t#1 +(sy)add.f r0.y, r1.y, r3.w +add.f r0.z, r2.y, r0.z +add.f r0.x, r0.w, r0.x nop +mul.f r1.z, r0.y, r3.x +add.f r0.y, r0.z, r3.z +add.f r0.x, r0.x, r3.y +(rpt1)nop +mul.f r1.y, r0.y, r2.w +mul.f r1.x, r0.x, r2.z +end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r1.y (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) -; FRAG: 77 instructions, 0 half, 4 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r63.w (5:21,cm=f,il=12,b=1) r1.y (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) +; FRAG: 42 instructions, 0 half, 4 full diff --git a/reference/glmark3.asm b/reference/glmark3.asm index 52cd74b..818e982 100644 --- a/reference/glmark3.asm +++ b/reference/glmark3.asm @@ -6,11 +6,11 @@ @in(r0.w) in4 @in(r1.x) in5 @in(r1.y) in6 -@in(r1.z) in8 -@in(r1.w) in9 -@in(r2.x) in10 -@in(r2.y) in12 -@in(r2.z) in13 +@in(r2.z) in8 +@in(r2.w) in9 +@in(r3.x) in10 +@in(r2.x) in12 +@in(r2.y) in13 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,303 +31,223 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 -(sy)(ss)add.f r2.w, (neg)r0.y, c19.z -mul.f r3.x, c8.x, r0.w +@const(c19.x) 0x3d889a02, 0x00000000, 0xbf4ccccd, 0x3dcccccd +@const(c20.x) 0x40400000, 0x40000000, 0x3daa9931, 0x3f000000 +@const(c21.x) 0x41700000, 0x3f800000, 0x3e000000, 0x41f00000 +@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r1.z, (neg)r0.y, c19.z +mul.f r1.w, c8.x, r0.w mov.f32f32 r3.y, c18.x -mad.f32 r3.x, c9.x, r1.x, r3.x -max.f r2.w, c19.y, r2.w -mad.f32 r3.x, c10.x, r1.y, r3.x +mad.f32 r1.w, c9.x, r1.x, r1.w +max.f r1.z, c19.y, r1.z +mad.f32 r1.w, c10.x, r1.y, r1.w mul.f r3.y, r3.y, c19.x mul.f r3.z, c8.y, r0.w -mov.f32f32 r2.w, r2.w -add.f r3.x, r3.x, c11.x -mul.f r3.w, r3.y, c21.w -mul.f r3.y, r3.y, c21.x -mul.f r2.w, r2.w, c19.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.w -mad.f32 r3.y, c20.w, r0.y, r3.y -mov.f32f32 r2.w, r2.w -mul.f r4.x, r3.x, r3.x +mul.f r1.z, r1.z, c19.w +add.f r1.w, r1.w, c11.x +mul.f r3.w, r3.y, c21.x +mul.f r3.y, r3.y, c21.w +max.f r1.z, r1.z, c19.y +mov.f32f32 r4.x, r1.w +mad.f32 r3.w, c20.w, r0.y, r3.w +mad.f32 r3.y, c20.y, r0.y, r3.y +min.f r1.z, r1.z, c21.y +mul.f r1.w, r1.w, r4.x mad.f32 r3.z, c9.y, r1.x, r3.z -mad.f32 r3.w, c20.y, r0.y, r3.w -max.f r2.w, r2.w, c19.y -mad.f32 r3.z, c10.y, r1.y, r3.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -min.f r2.w, r2.w, c21.y -add.f r3.z, r3.z, c11.y mul.f r0.w, c8.z, r0.w -mov.f32f32 r4.y, c13.x -mul.f r4.z, c20.y, r2.w -mov.f32f32 r3.z, r3.z +mul.f r4.y, c20.y, r1.z +mad.f32 r3.z, c10.y, r1.y, r3.z sin r3.w, r3.w -(ss)mov.f32f32 r3.w, r3.w -mul.f r4.w, r1.z, c21.z -add.f r4.z, c20.x, (neg)r4.z -mad.f32 r4.x, r3.z, r3.z, r4.x -mul.f r5.x, r1.w, c21.z -mul.f r5.y, r2.x, c21.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.x, r4.x +nop +sin r3.y, r3.y +(ss)mov.f32f32 r4.z, r3.w +add.f r4.y, c20.x, (neg)r4.y +add.f r3.z, r3.z, c11.y +mul.f r4.w, r2.z, c20.z +mov.f32f32 r5.x, r3.y +mul.f r4.y, r1.z, r4.y +mov.f32f32 r5.y, r3.z +mul.f r4.w, r4.w, r4.z +mul.f r2.z, r2.z, c21.z +mul.f r1.z, r1.z, r4.y +mad.f32 r1.w, r3.z, r5.y, r1.w mad.f32 r0.w, c9.z, r1.x, r0.w -mul.f r1.x, r4.w, r3.w -mul.f r4.z, r2.w, r4.z +mul.f r1.x, r2.z, r5.x +mov.f32f32 r2.z, r1.z mad.f32 r0.w, c10.z, r1.y, r0.w -mul.f r1.y, r5.x, r3.w -mul.f r3.w, r5.y, r3.w -mov.f32f32 r4.z, r4.z -add.f r0.w, r0.w, c11.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mul.f r2.w, r2.w, r4.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.w, r3.w -sin r3.y, r3.y -(ss)mov.f32f32 r3.y, r3.y -mov.f32f32 r2.w, r2.w -mad.f32 r4.x, r0.w, r0.w, r4.x -mul.f r1.z, r1.z, c20.z -mul.f r1.w, r1.w, c20.z -add.f r4.z, c21.y, (neg)r2.w -mul.f r2.x, r2.x, c20.z -mul.f r1.z, r1.z, r3.y -mul.f r1.w, r1.w, r3.y -rsq r4.x, r4.x -(ss)mov.f32f32 r4.x, r4.x -mul.f r2.x, r2.x, r3.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mul.f r3.x, r3.x, r4.x -mov.f32f32 r2.x, r2.x -mad.f32 r0.x, r1.z, r4.z, r0.x -mad.f32 r0.y, r1.w, r4.z, r0.y -mov.f32f32 r1.w, r3.x -mad.f32 r0.z, r2.x, r4.z, r0.z -mov.f32f32 r0.x, r0.x -add.f r1.z, c21.y, (neg)r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mul.f r2.x, r1.w, r1.w -mov.f32f32 r1.z, r1.z -mul.f r2.w, r3.z, r4.x -mul.f r0.w, r0.w, r4.x -nop -mad.f32 r0.x, r1.x, r1.z, r0.x -mad.f32 r0.y, r1.y, r1.z, r0.y -mad.f32 r0.z, r3.w, r1.z, r0.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.x, r0.z -mad.f32 r0.z, r2.w, r2.w, r2.x -mul.f r1.x, c0.w, r0.x -mul.f r1.y, c0.x, r0.x -mad.f32 r1.x, c1.w, r0.y, r1.x -mad.f32 r1.y, c1.x, r0.y, r1.y -mad.f32 r1.x, c2.w, r3.x, r1.x -mad.f32 r1.y, c2.x, r3.x, r1.y -mul.f r1.z, c0.x, r0.x -mul.f r2.x, c0.y, r0.x -mov.f32f32 r1.x, r1.x -add.f r3.y, r1.y, c3.x -mad.f32 r1.y, c1.x, r0.y, r1.z -mad.f32 r1.z, c1.y, r0.y, r2.x -add.f r1.x, r1.x, c3.w -add.f r2.x, c12.x, (neg)r3.y -add.f r3.z, c12.x, (neg)r3.y -mad.f32 r1.y, c2.x, r3.x, r1.y -mad.f32 r1.z, c2.y, r3.x, r1.z -mul.f r3.w, c0.z, r0.x -mul.f r4.x, c0.y, r0.x -rcp r4.z, r1.x -mov.f32f32 r1.y, r1.y -mul.f r2.x, r2.x, r2.x -mad.f32 r4.x, c1.y, r0.y, r4.x -mul.f r4.w, r3.z, r3.z -add.f r1.y, r1.y, c3.x -mad.f32 r4.x, c2.y, r3.x, r4.x -mov.f32f32 r1.z, r1.z -mad.f32 r3.w, c1.z, r0.y, r3.w -(ss)mul.f r1.y, r1.y, r4.z -add.f r4.x, r4.x, c3.y -add.f r1.z, r1.z, c3.y -mad.f32 r3.w, c2.z, r3.x, r3.w -mov.f32f32 r5.x, r1.y -add.f r1.y, c12.y, (neg)r4.x -add.f r5.y, c12.y, (neg)r4.x -mul.f r1.z, r1.z, r4.z -mul.f r5.z, r5.x, r5.x -mad.f32 r1.y, r1.y, r1.y, r2.x -mad.f32 r2.x, r5.y, r5.y, r4.w -mov.f32f32 r5.w, r1.z -mov.f32f32 r1.z, r3.w -mov.f32f32 r3.w, r1.y -mov.f32f32 r2.x, r2.x -mad.f32 r1.y, r5.w, r5.w, r5.z -mul.f r4.w, c0.z, r0.x -add.f r1.z, r1.z, c3.z -mad.f32 r4.w, c1.z, r0.y, r4.w -mov.f32f32 r5.z, r1.y -mad.f32 r1.y, c2.z, r3.x, r4.w +add.f r1.y, c21.y, (neg)r1.z +mul.f r1.z, r2.w, c20.z +add.f r2.z, c21.y, (neg)r2.z +add.f r3.z, r0.w, c11.z +mul.f r0.w, r3.x, c20.z mul.f r1.z, r1.z, r4.z -mov.f32f32 r4.w, r1.x -(ss)mul.f r1.x, c0.w, r0.x -add.f r4.z, r1.y, c3.z -mov.f32f32 r6.x, r1.z -mad.f32 r1.z, c1.w, r0.y, r1.x -mov.f32f32 r1.y, r4.x -mov.f32f32 r1.x, r3.y -mad.f32 r3.y, r6.x, r6.x, r5.z -add.f r4.x, c12.z, (neg)r4.z -add.f r5.z, c12.z, (neg)r4.z -mad.f32 r6.y, c2.w, r3.x, r1.z -mov.f32f32 r1.z, r4.z -mul.f r4.z, c4.w, r0.x -mul.f r6.z, c4.z, r0.x -rsq r3.y, r3.y -(ss)mov.f32f32 r3.y, r3.y -mad.f32 r3.w, r4.x, r4.x, r3.w -mad.f32 r2.x, r5.z, r5.z, r2.x -add.f r4.x, r6.y, c3.w -mul.f r5.x, r5.x, r3.y -mov.f32f32 r3.w, r3.w -mul.f r5.w, r5.w, r3.y -mul.f r3.y, r6.x, r3.y -mov.f32f32 r5.x, r5.x -rsq r2.x, r2.x -(ss)mov.f32f32 r2.x, r2.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r3.y, r3.y -mul.f r5.x, r1.w, (neg)r5.x -sqrt r3.w, r3.w -(ss)mov.f32f32 r3.w, r3.w -mad.f32 r5.x, r2.w, (neg)r5.w, r5.x -rcp r4.y, r4.y -mul.f r3.z, r3.z, r2.x -mul.f r5.y, r5.y, r2.x -(ss)mul.f r3.w, r3.w, r4.y -(ss)mov.f32f32 r4.y, r5.x -mov.f32f32 r5.x, r0.w -mov.f32f32 r0.w, r3.z -mov.f32f32 r3.z, r3.w -mov.f32f32 r0.z, r0.z -mad.f32 r3.y, r5.x, (neg)r3.y, r4.y -mad.f32 r0.z, r5.x, r5.x, r0.z -add.f r3.z, c21.y, (neg)r3.z -mov.f32f32 r3.w, r5.y -mov.f32f32 r3.y, r3.y -mul.f r2.x, r5.z, r2.x -(rpt1)nop -absneg.f r3.y, (abs)r3.y -mov.f32f32 r3.z, r3.z -rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.y -max.f r3.z, r3.z, c19.y -mul.f r4.y, r1.w, r0.z -mov.f32f32 r1.w, r4.x -max.f r3.y, r3.y, c19.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r4.x, r4.y -mad.f32 r4.y, c5.w, r0.y, r4.z -mov.f32f32 r3.y, r3.y -mad.f32 r4.y, c6.w, r3.x, r4.y -mad.f32 r4.z, c5.z, r0.y, r6.z -mul.f r5.y, c4.y, r0.x -add.f r3.y, c21.y, (neg)r3.y -log2 r3.z, r3.z -(ss)mul.f r3.z, c20.y, r3.z -mul.f r4.x, r4.x, r0.w -mul.f r0.w, r2.w, r0.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -add.f r4.y, r4.y, c7.w -mad.f32 r4.z, c6.z, r3.x, r4.z -mad.f32 r5.y, c5.y, r0.y, r5.y +mad.f32 r0.x, r4.w, r2.z, r0.x +mov.f32f32 r4.y, r1.y +mov.f32f32 r4.z, r3.z +mad.f32 r0.y, r1.z, r2.z, r0.y +mul.f r0.w, r0.w, r3.w +mad.f32 r0.x, r1.x, r4.y, r0.x +(ss)mad.f32 r3.w, r4.z, r4.z, r1.w +mul.f r1.x, r2.w, c21.z +mad.f32 r0.z, r0.w, r2.z, r0.z +mov.f32f32 r0.w, r0.x mul.f r0.x, c4.x, r0.x -mad.f32 r5.y, c6.y, r3.x, r5.y -log2 r3.y, r3.y -(ss)mul.f r3.y, c17.x, r3.y -exp2 r3.z, r3.z -mov.f32f32 r5.z, r0.w -mov.f32f32 r0.w, r4.y -add.f r4.y, r4.z, c7.z -mov.f32f32 r3.y, r3.y -mad.f32 r3.w, r5.z, r3.w, r4.x -add.f r4.x, r5.y, c7.y +mul.f r1.x, r1.x, r5.x +mul.f r1.z, r3.x, c21.z +mul.f r1.w, c0.w, r0.w +mul.f r2.z, c0.x, r0.w +mad.f32 r0.y, r1.x, r4.y, r0.y +mul.f r1.x, c0.y, r0.w +mul.f r2.w, c0.z, r0.w +mul.f r3.x, c0.x, r0.w +mov.f32f32 r4.y, r0.y +mul.f r4.z, c0.y, r0.w +mul.f r4.w, c0.z, r0.w +mul.f r5.x, c0.w, r0.w +mad.f32 r1.w, c1.w, r4.y, r1.w +mul.f r1.z, r1.z, r3.y +mad.f32 r2.z, c1.x, r4.y, r2.z +mad.f32 r1.x, c1.y, r4.y, r1.x +mad.f32 r2.w, c1.z, r4.y, r2.w +mad.f32 r3.y, r1.z, r1.y, r0.z +mad.f32 r0.z, c1.x, r4.y, r3.x +mad.f32 r3.x, c1.y, r4.y, r4.z +mad.f32 r4.z, c1.z, r4.y, r4.w +mov.f32f32 r5.z, r3.y +mad.f32 r5.x, c1.w, r4.y, r5.x +mul.f r5.w, c4.w, r0.w +mul.f r6.x, c4.z, r0.w +mad.f32 r1.y, c2.w, r5.z, r1.w +mad.f32 r1.z, c2.x, r5.z, r2.z +mad.f32 r1.w, c2.y, r5.z, r1.x +mad.f32 r2.z, c2.z, r5.z, r2.w +add.f r4.w, r1.y, c3.w +add.f r1.x, r1.z, c3.x +add.f r1.y, r1.w, c3.y +add.f r1.z, r2.z, c3.z +mad.f32 r0.z, c2.x, r5.z, r0.z +mad.f32 r1.w, c2.y, r5.z, r3.x +mad.f32 r2.z, c2.z, r5.z, r4.z +rcp r2.w, r4.w +add.f r3.x, c12.x, (neg)r1.x +add.f r0.z, r0.z, c3.x +add.f r4.z, c12.x, (neg)r1.x +add.f r1.w, r1.w, c3.y +add.f r2.z, r2.z, c3.z +(ss)mul.f r0.z, r0.z, r2.w +mul.f r3.x, r3.x, r3.x +add.f r6.y, c12.y, (neg)r1.y +mul.f r6.z, r4.z, r4.z +mov.f32f32 r6.w, r0.z +add.f r7.x, c12.y, (neg)r1.y +mad.f32 r3.x, r6.y, r6.y, r3.x +add.f r6.y, c12.z, (neg)r1.z +mul.f r0.z, r0.z, r6.w +mul.f r1.w, r1.w, r2.w +mad.f32 r6.z, r7.x, r7.x, r6.z +mad.f32 r3.x, r6.y, r6.y, r3.x +add.f r6.y, c12.z, (neg)r1.z +mov.f32f32 r7.y, r1.w +mul.f r2.z, r2.z, r2.w +mad.f32 r2.w, c2.w, r5.z, r5.x +mad.f32 r5.x, c5.w, r4.y, r5.w +mad.f32 r0.z, r1.w, r7.y, r0.z +mov.f32f32 r5.w, r2.z +sqrt r3.x, r3.x +mov.f32f32 r7.z, c13.x +mad.f32 r6.z, r6.y, r6.y, r6.z +add.f r1.w, r2.w, c3.w +mad.f32 r0.z, r5.w, r5.w, r0.z +mad.f32 r2.w, c6.w, r5.z, r5.x +mad.f32 r5.x, c5.z, r4.y, r6.x +mul.f r0.w, c4.y, r0.w +mad.f32 r5.x, c6.z, r5.z, r5.x +mad.f32 r4.y, c5.y, r4.y, r0.w mad.f32 r0.x, c5.x, r0.y, r0.x -mul.f r0.y, r5.x, r0.z -(rpt1)nop -exp2 r0.z, r3.y -(ss)mul.f r3.y, c16.w, r0.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.z, r0.y -mov.f32f32 r0.z, r4.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r0.y, r4.x -mad.f32 r0.x, c6.x, r3.x, r0.x -mov.f32f32 r3.x, c15.z -mul.f r4.x, c16.z, r3.y -mul.f r4.y, c16.y, r3.y -mul.f r3.y, c16.x, r3.y -mad.f32 r2.x, r4.z, r2.x, r3.w -mov.f32f32 r3.w, r4.x -mov.f32f32 r4.x, r4.y -mov.f32f32 r3.y, r3.y -nop -mov.f32f32 r4.z, r3.w -mov.f32f32 r4.y, r4.x -mov.f32f32 r4.x, r3.y -mov.f32f32 r2.x, r2.x +rsq r0.y, r0.z +(ss)mov.f32f32 r0.z, r0.y +rcp r0.w, r7.z +(ss)mul.f r0.w, r3.x, r0.w +rsq r3.x, r6.z +(ss)mov.f32f32 r5.w, r3.x +mul.f r2.z, r2.z, r0.y +mul.f r0.y, r6.w, r0.z +rsq r3.w, r3.w +(ss)mov.f32f32 r6.x, r3.w +add.f r0.w, c21.y, (neg)r0.w +mul.f r4.z, r4.z, r5.w +(ss)mul.f r6.z, r7.y, r0.z +mul.f r4.x, r4.x, r6.x +max.f r6.w, r0.w, c19.y +mul.f r5.w, r7.x, r5.w +mul.f r3.x, r6.y, r3.x +mov.f32f32 r6.y, r4.x +add.f r0.w, r2.w, c7.w +add.f r0.z, r5.x, c7.z +mad.f32 r2.w, c6.y, r5.z, r4.y +mul.f r4.y, r6.y, (neg)r0.y +mul.f r5.x, r5.y, r6.x +log2 r0.y, r6.w +(ss)mul.f r5.y, c20.y, r0.y +mul.f r4.x, r4.x, r6.y +add.f r0.y, r2.w, c7.y +mov.f32f32 r2.w, r5.x +mad.f32 r0.x, c6.x, r3.y, r0.x +mov.f32f32 r3.y, c15.z +mov.f32f32 r5.z, c15.y +mad.f32 r4.y, r2.w, (neg)r6.z, r4.y +mul.f r6.x, r3.z, r3.w +exp2 r5.y, r5.y +mad.f32 r3.z, r5.x, r2.w, r4.x add.f r0.x, r0.x, c7.x -(rpt1)nop -max.f r2.x, r2.x, c19.y -mov.f32f32 r0.x, r0.x -mul.f r3.x, r3.x, c15.w -mov.f32f32 r3.y, c15.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, c15.x -mul.f r3.x, r3.x, r2.w mul.f r3.y, r3.y, c15.w -mul.f r2.x, r2.x, r3.z -mul.f r3.z, r3.w, c15.w -mov.f32f32 r3.x, r3.x +mov.f32f32 r3.w, r6.x +mul.f r4.x, r5.z, c15.w +mov.f32f32 r5.x, c15.x +nop +mad.f32 r2.z, r3.w, (neg)r2.z, r4.y +mad.f32 r4.y, r3.w, r3.w, r3.z +mul.f r3.w, r3.y, r2.w +mul.f r3.z, r4.x, r2.w +absneg.f r2.z, (abs)r2.z +mul.f r3.y, r5.x, c15.w +(rpt1)nop +max.f r2.z, r2.z, c19.y +rsq r4.x, r4.y +(ss)mov.f32f32 r4.y, r4.x +mul.f r5.x, r6.x, r4.x mul.f r3.y, r3.y, r2.w -mov.f32f32 r2.x, r2.x -mul.f r2.w, r3.z, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mul.f r2.x, r2.x, c14.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r5.x, r2.z -mov.f32f32 r5.y, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r3.x -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.z, r2.w -mul.f r2.w, c14.z, r2.x -mul.f r3.x, c14.y, r2.x -mul.f r2.x, c14.x, r2.x -mov.f32f32 r3.z, r2.y -mov.f32f32 r2.y, r2.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r2.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r2.x, r2.x +add.f r2.z, c21.y, (neg)r2.z +mul.f r4.x, r6.y, r4.y +mul.f r2.w, r2.w, r4.y +(rpt3)nop +log2 r2.z, r2.z +(ss)mul.f r2.z, c17.x, r2.z +mul.f r4.x, r4.x, r4.z +(rpt4)nop +exp2 r2.z, r2.z +(ss)mul.f r2.z, c16.w, r2.z +mad.f32 r2.w, r2.w, r5.w, r4.x +(rpt1)nop +mov.f32f32 r4.y, r2.z +mul.f r4.x, c16.x, r2.z +mad.f32 r2.z, r5.x, r3.x, r2.w nop -mov.f32f32 r3.x, r2.y +mul.f r4.z, c16.z, r4.y +mul.f r4.y, c16.y, r4.y +max.f r2.z, r2.z, c19.y +(rpt2)nop +mul.f r2.z, r2.z, r5.y +(rpt2)nop +mul.f r2.z, r2.z, c14.w +(rpt2)nop mov.f32f32 r2.w, r2.z -mov.f32f32 r2.z, r2.x -mov.f32f32 r2.y, r5.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.x, r5.y +mul.f r2.z, c14.x, r2.z +(rpt1)nop +mul.f r3.x, c14.z, r2.w +mul.f r2.w, c14.y, r2.w end -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) -; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=3,il=20,b=0) -; VERT: 298 instructions, 0 half, 7 full +; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r2.z (0:0,cm=7,il=16,b=0) r2.x (0:0,cm=3,il=20,b=0) +; VERT: 228 instructions, 0 half, 8 full diff --git a/reference/glsl-fs-raytrace-bug27060.asm b/reference/glsl-fs-raytrace-bug27060.asm index f3551bf..d126427 100644 --- a/reference/glsl-fs-raytrace-bug27060.asm +++ b/reference/glsl-fs-raytrace-bug27060.asm @@ -6,1335 +6,953 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c3.x) 0xbf000000, 0xbf800000, 0x461c3f9a, 0x3f000000 +@const(c4.x) 0x00000000, 0xc0800000, 0x40800000, 0x3e800000 +@const(c5.x) 0x00000000, 0x00000000, 0x40400000, 0x40000000 +@const(c6.x) 0x80000000, 0x3f800000, 0x3fc00000, 0x40100000 +@const(c7.x) 0xc0400000, 0x00000000, 0xc0800000, 0x3f2aacda +@const(c8.x) 0x40400000, 0x00000000, 0x40800000, 0xc0800000 +@const(c9.x) 0x40400000, 0x80000000, 0x3f800000, 0xc0400000 +@const(c10.x) 0x00000000, 0xc0400000, 0x40800000, 0xc0000000 +@const(c11.x) 0x40000000, 0x00000000, 0xc0800000, 0xbf800000 +@const(c12.x) 0xc0000000, 0x80000000, 0x3f800000, 0x00000000 +@const(c13.x) 0x3e4ccccd, 0x3e99999a, 0x3ecccccd, 0x3f800000 +@const(c14.x) 0xc0400000, 0x00000000, 0xbf800000, 0x40400000 +@const(c15.x) 0x00000000, 0x41000000, 0x3f800000, 0x41800000 +@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x mov.f32f32 r0.w, (0.000000) mov.f32f32 r1.x, (0.000000) mov.f32f32 r1.y, (0.000000) -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mul.f r1.z, r0.z, c1.x +mov.f32f32 r1.z, r0.z +mul.f r0.z, r0.z, c2.x bary.f (ei)r0.x, 1, r0.x -mul.f r0.y, r0.z, c2.x -mul.f r0.z, r0.z, c0.x -mov.f32f32 r1.w, (0.000000) -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, (0.000000) -mov.f32f32 r2.y, (0.000000) -mov.f32f32 r2.z, c13.w -mad.f32 r1.z, c1.y, r0.x, r1.z -mad.f32 r0.y, c2.y, r0.x, r0.y -mad.f32 r0.x, c0.y, r0.x, r0.z -mov.f32f32 r0.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, c3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mad.f32 r1.z, c1.z, r1.w, r1.z -mad.f32 r0.y, c2.z, r1.w, r0.y -mad.f32 r0.x, c0.z, r1.w, r0.x -mov.f32f32 r1.w, r2.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r2.y, r0.x, r0.x -mov.f32f32 r2.w, (0.000000) -mad.f32 r2.y, r1.z, r1.z, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.x, c12.w -mov.f32f32 r3.y, c12.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.z, c12.w -mad.f32 r2.y, r0.y, r0.y, r2.y -mov.f32f32 r3.w, c12.w -mov.f32f32 r4.x, c12.w -mov.f32f32 r4.y, c12.w +mov.f32f32 r0.y, r0.w +mul.f r1.w, r1.z, c1.x +mul.f r1.z, r1.z, c0.x +mov.f32f32 r2.x, r0.x +mad.f32 r0.x, c2.y, r0.x, r0.z +mov.f32f32 r0.z, c3.x +mov.f32f32 r2.y, r1.x +mad.f32 r1.w, c1.y, r2.x, r1.w +mad.f32 r1.z, c0.y, r2.x, r1.z +mad.f32 r1.w, c1.z, r0.z, r1.w +mad.f32 r1.z, c0.z, r0.z, r1.z +mad.f32 r0.x, c2.z, r0.z, r0.x +mov.f32f32 r0.z, r1.y +mov.f32f32 r2.x, r1.w +mul.f r2.z, r1.z, r1.z +mov.f32f32 r2.w, r0.x +nop +mad.f32 r1.w, r1.w, r2.x, r2.z +mov.f32f32 r2.z, (0.000000) +mov.f32f32 r2.w, r2.w +mov.f32f32 r3.x, (0.000000) +mov.f32f32 r3.y, (0.000000) +mov.f32f32 r3.z, c13.w +mad.f32 r1.w, r2.w, r2.w, r1.w +mov.f32f32 r2.w, r2.z +mov.f32f32 r3.w, r3.x +mov.f32f32 r4.x, r3.y +mov.f32f32 r4.y, (0.000000) mov.f32f32 r4.z, c12.w mov.f32f32 r4.w, c12.w -mov.f32f32 r5.x, c13.w -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r5.y, c13.z -mov.f32f32 r5.z, c12.w -mov.f32f32 r5.w, c12.z -mul.f r0.x, r0.x, r2.y -mul.f r1.z, r1.z, r2.y -mul.f r0.y, r0.y, r2.y -mov.f32f32 r2.y, c12.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.x, c12.w -absneg.f r6.y, (neg)r0.x -absneg.f r6.z, (neg)r0.x -absneg.f r6.w, (neg)r0.x -absneg.f r7.x, (neg)r0.x -mul.f r7.y, r6.y, r0.x -absneg.f r7.z, (neg)r1.z -mul.f r7.w, r6.z, r0.x -mul.f r8.x, r6.w, r0.x -absneg.f r8.y, (neg)r1.z -mad.f32 r7.y, r7.z, r1.z, r7.y -absneg.f r8.z, (neg)r1.z -mul.f r8.w, r7.x, r0.x -mad.f32 r7.w, r8.y, r1.z, r7.w -mov.f32f32 r7.y, r7.y -absneg.f r9.x, (neg)r0.y -mad.f32 r8.x, r8.z, r1.z, r8.x -mov.f32f32 r7.w, r7.w -absneg.f r9.y, (neg)r0.y -mad.f32 r7.y, r9.x, r0.y, r7.y -mov.f32f32 r8.x, r8.x -absneg.f r9.z, (neg)r0.y -mad.f32 r7.w, r9.y, r0.y, r7.w -mov.f32f32 r7.y, r7.y -absneg.f r9.w, (neg)r1.z -mad.f32 r8.x, r9.z, r0.y, r8.x -mov.f32f32 r7.w, r7.w +rsq r1.w, r1.w +(ss)mov.f32f32 r5.x, r1.w +mul.f r0.x, r0.x, r1.w +(ss)mov.f32f32 r1.w, c12.w +mov.f32f32 r5.y, c12.w +mul.f r1.z, r1.z, r5.x +mul.f r2.x, r2.x, r5.x +mov.f32f32 r5.x, r0.x +absneg.f r0.x, (neg)r0.x +mov.f32f32 r5.z, r1.z +absneg.f r1.z, (neg)r1.z +mov.f32f32 r5.w, r2.x +absneg.f r6.x, (neg)r5.x +absneg.f r6.y, (neg)r5.z +absneg.f r6.z, (neg)r5.z +absneg.f r6.w, (neg)r5.z +mul.f r7.x, r1.z, r5.z +mul.f r7.y, r6.y, r5.z +absneg.f r7.z, (neg)r5.w +mul.f r7.w, r6.z, r5.z +mul.f r8.x, r6.w, r5.z +absneg.f r8.y, (neg)r5.w +mad.f32 r7.y, r7.z, r5.w, r7.y +absneg.f r8.z, (neg)r5.w +mad.f32 r7.y, r6.x, r5.x, r7.y +mad.f32 r7.w, r8.y, r5.w, r7.w +absneg.f r8.w, (neg)r5.x +mad.f32 r8.x, r8.z, r5.w, r8.x +absneg.f r9.x, (neg)r5.x +absneg.f r2.x, (neg)r2.x +mad.f32 r7.w, r8.w, r5.x, r7.w +rcp r7.y, r7.y mul.f r6.y, c4.x, r6.y +mad.f32 r8.x, r9.x, r5.x, r8.x +mad.f32 r6.y, c4.x, r7.z, r6.y +mad.f32 r7.x, r2.x, r5.w, r7.x +mad.f32 r6.x, c4.y, r6.x, r6.y +mad.f32 r6.y, r0.x, r5.x, r7.x +rcp r7.x, r7.w mul.f r6.z, c7.x, r6.z mul.f r6.w, c8.y, r6.w -rcp r7.y, r7.y -(ss)mov.f32f32 r7.y, r7.y -mad.f32 r6.y, c4.x, r7.z, r6.y -mov.f32f32 r7.z, r8.x -rcp r7.w, r7.w -(ss)mov.f32f32 r7.w, r7.w +(ss)mul.f r6.x, r6.x, r7.y mad.f32 r6.z, c7.y, r8.y, r6.z -mov.f32f32 r6.y, r6.y -mad.f32 r8.x, r9.w, r1.z, r8.w -mad.f32 r6.y, c4.y, r9.x, r6.y -mov.f32f32 r6.z, r6.z -rcp r7.z, r7.z -(ss)mov.f32f32 r7.z, r7.z -mad.f32 r6.z, c7.z, r9.y, r6.z -mul.f r6.y, r6.y, r7.y +(ss)rcp r7.y, r8.x mad.f32 r6.w, c8.x, r8.z, r6.w -mov.f32f32 r7.y, r8.x -mul.f r6.z, r6.z, r7.w -mov.f32f32 r6.y, r6.y -mov.f32f32 r6.w, r6.w -absneg.f r7.w, (neg)r0.y -mov.f32f32 r6.z, r6.z -mad.f32 r8.x, r0.x, r6.y, c4.x -mad.f32 r6.w, c8.w, r9.z, r6.w -mad.f32 r7.y, r7.w, r0.y, r7.y -mad.f32 r8.y, r0.x, r6.z, c8.x -mul.f r8.x, r8.x, r8.x -mad.f32 r8.z, r1.z, r6.y, c4.x -mul.f r6.w, r6.w, r7.z -mul.f r7.z, r8.y, r8.y -mad.f32 r8.y, r1.z, r6.z, c8.y -mad.f32 r8.x, r8.z, r8.z, r8.x -mov.f32f32 r6.w, r6.w -mov.f32f32 r7.y, r7.y -mad.f32 r7.z, r8.y, r8.y, r7.z -mov.f32f32 r8.x, r8.x -mad.f32 r8.y, r0.y, r6.y, c4.z -mad.f32 r8.z, r0.x, r6.w, c10.x -mov.f32f32 r7.z, r7.z -mad.f32 r8.w, r0.y, r6.z, c8.z -mad.f32 r8.x, r8.y, r8.y, r8.x -mul.f r8.y, r8.z, r8.z -mad.f32 r8.z, r1.z, r6.w, c10.y -mad.f32 r7.z, r8.w, r8.w, r7.z -mov.f32f32 r8.x, r8.x -rcp r7.y, r7.y -(ss)mov.f32f32 r7.y, r7.y -mad.f32 r8.y, r8.z, r8.z, r8.y -mul.f r7.x, c11.x, r7.x -mov.f32f32 r7.z, r7.z -mad.f32 r7.x, c11.y, r9.w, r7.x -mov.f32f32 r8.y, r8.y -sqrt r8.x, r8.x -(ss)mov.f32f32 r8.x, r8.x -mad.f32 r8.z, r0.y, r6.w, c10.z -mov.f32f32 r7.x, r7.x -mov.f32f32 r6.y, r6.y -mov.f32f32 r8.w, r8.x -mov.f32f32 r9.x, r8.x -sqrt r7.z, r7.z -mad.f32 r8.y, r8.z, r8.z, r8.y -(ss)mov.f32f32 r7.z, r7.z -mad.f32 r7.x, c11.z, r7.w, r7.x -mul.f r7.w, r8.w, r9.x -mov.f32f32 r8.y, r8.y -mov.f32f32 r8.z, r7.z -mov.f32f32 r8.w, r7.z -mov.f32f32 r7.w, r7.w -mul.f r7.x, r7.x, r7.y -cmps.f.ge r7.y, c6.z, r7.z -cmps.f.ge r7.z, c3.w, r8.x -add.f r7.w, c4.w, (neg)r7.w -mul.f r8.x, r8.z, r8.w -sqrt r8.y, r8.y -(ss)mov.f32f32 r8.y, r8.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r7.w, r7.w -mov.f32f32 r8.x, r8.x -mov.f32f32 r8.z, r8.y -mov.f32f32 r8.w, r8.y -mad.f32 r9.x, r0.x, r7.x, c10.w -add.f r8.x, c6.w, (neg)r8.x -cmps.f.ge r8.y, c3.w, r8.y -sqrt r7.w, r7.w -(ss)mov.f32f32 r7.w, r7.w -mul.f r8.z, r8.z, r8.w -mov.f32f32 r8.x, r8.x -mul.f r8.w, r9.x, r9.x -add.f r6.y, r6.y, (neg)r7.w -mov.f32f32 r7.w, r8.z -mad.f32 r8.z, r1.z, r7.x, c10.x -cov.u32f32 r8.y, r8.y -mov.f32f32 r6.y, r6.y -sqrt r8.x, r8.x -add.f r7.w, c4.w, (neg)r7.w -(ss)mov.f32f32 r8.x, r8.x -mov.f32f32 r6.z, r6.z -cmps.f.ge r9.x, r6.y, c4.x -mov.f32f32 r9.y, r6.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r9.z, r6.y -cov.u32f32 r9.x, r9.x -cmps.f.ge r9.w, r6.y, c5.x -mov.f32f32 r10.x, r0.x -add.f r6.z, r6.z, (neg)r8.x -mov.f32f32 r8.x, r1.z -cov.u32f32 r9.w, r9.w -mov.f32f32 r10.x, r10.x -mov.f32f32 r6.z, r6.z -sqrt r7.w, r7.w -(ss)mov.f32f32 r7.w, r7.w -mul.f r9.x, r9.x, r9.w -mad.f32 r9.y, r10.x, r9.y, c5.y -mov.f32f32 r9.w, r6.z -mov.f32f32 r6.w, r6.w -mov.f32f32 r9.x, r9.x -cmps.f.ge r10.x, c3.z, r6.y -add.f r10.y, r9.y, c6.x -mov.f32f32 r10.z, r0.x -add.f r6.w, r6.w, (neg)r7.w -cov.u32f32 r7.w, r10.x -mul.f r10.x, r10.y, c5.w -mov.f32f32 r10.y, r10.z -mov.f32f32 r6.w, r6.w -mul.f r7.w, r9.x, r7.w -mov.f32f32 r8.x, r8.x -mov.f32f32 r9.x, r6.z -mov.f32f32 r10.z, r6.w -mov.f32f32 r7.w, r7.w -mad.f32 r9.w, r10.y, r9.w, c5.y -mov.f32f32 r10.y, r6.w -mad.f32 r8.x, r8.x, r9.z, c5.y -cmps.f.ne r7.w, r7.w, c4.x -mov.f32f32 r9.z, r6.y -mov.f32f32 r6.y, r6.y -mov.f32f32 r10.w, r0.y -mov.f32f32 r11.x, c4.x -mov.f32f32 r9.z, r9.z -mov.f32f32 r11.y, c3.z -sel.b32 r10.x, r10.x, r7.w, r0.w -add.f r11.z, r8.x, c6.x -sel.b32 r9.y, r9.y, r7.w, r1.y -sel.b32 r9.z, r9.z, r7.w, r11.y -cov.u32f32 r7.z, r7.z -mul.f r11.z, r11.z, c5.w -sel.b32 r8.x, r8.x, r7.w, r0.z -mov.f32f32 r10.w, r10.w -cmps.f.ne r7.z, r7.z, c4.x -sel.b32 r11.z, r11.z, r7.w, r1.x -mov.f32f32 r11.w, c3.y -mad.f32 r6.y, r10.w, r6.y, c5.z -sel.b32 r9.z, r9.z, r7.z, r11.y -sel.b32 r0.w, r10.x, r7.z, r0.w -sel.b32 r1.x, r11.z, r7.z, r1.x -sel.b32 r1.y, r9.y, r7.z, r1.y -mov.f32f32 r9.y, r9.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -cmps.f.ge r9.y, r9.y, r6.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -cov.u32f32 r9.y, r9.y -sel.b32 r0.z, r8.x, r7.z, r0.z -sel.b32 r8.x, r6.y, r7.w, r2.x -add.f r6.y, r6.y, c6.y -mov.f32f32 r9.y, r9.y -cmps.f.ge r10.x, r6.z, c4.x -mov.f32f32 r0.z, r0.z -sel.b32 r2.x, r8.x, r7.z, r2.x -mul.f r6.y, r6.y, c5.w -cov.u32f32 r8.x, r10.x -cmps.f.ge r10.x, r6.z, c5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -sel.b32 r6.y, r6.y, r7.w, r1.w -cov.u32f32 r10.x, r10.x -sel.b32 r7.w, r11.x, r7.w, r11.w -mov.f32f32 r9.z, r9.z -sel.b32 r1.w, r6.y, r7.z, r1.w -mul.f r6.y, r8.x, r10.x -mov.f32f32 r2.x, r2.x -sel.b32 r7.z, r7.w, r7.z, r11.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r6.y, r6.y -add.f r7.w, r9.w, c9.x -mov.f32f32 r8.x, r0.x -mov.f32f32 r10.x, r1.z -mul.f r6.y, r6.y, r9.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r7.z, r7.z -nop -mov.f32f32 r6.y, r6.y -mul.f r7.w, r7.w, c7.w -mov.f32f32 r8.x, r8.x -mov.f32f32 r9.y, r10.x -cmps.f.ne r6.y, r6.y, c4.x -mov.f32f32 r10.x, r6.z -mov.f32f32 r6.z, r6.z -mov.f32f32 r10.w, r0.y -mov.f32f32 r7.z, r7.z -mov.f32f32 r10.x, r10.x -sel.b32 r7.w, r7.w, r6.y, r0.w -mad.f32 r9.x, r9.y, r9.x, c5.y -sel.b32 r9.y, r9.w, r6.y, r1.y -sel.b32 r9.w, r10.x, r6.y, r9.z -cov.u32f32 r7.y, r7.y -add.f r10.x, r9.x, c9.y -sel.b32 r9.x, r9.x, r6.y, r0.z -mov.f32f32 r10.w, r10.w -cmps.f.ne r7.y, r7.y, c4.x -mul.f r10.x, r10.x, c7.w -mov.f32f32 r11.x, c6.y -mad.f32 r6.z, r10.w, r6.z, c5.z -sel.b32 r9.z, r9.w, r7.y, r9.z -sel.b32 r0.w, r7.w, r7.y, r0.w -sel.b32 r7.w, r10.x, r6.y, r1.x -sel.b32 r1.y, r9.y, r7.y, r1.y -mov.f32f32 r9.y, r9.z -mov.f32f32 r0.w, r0.w -sel.b32 r1.x, r7.w, r7.y, r1.x -mov.f32f32 r1.y, r1.y -cmps.f.ge r7.w, r9.y, r6.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y +mad.f32 r6.z, c7.z, r8.w, r6.z +mov.f32f32 r7.z, r6.x +mad.f32 r6.w, c8.w, r9.x, r6.w +rcp r6.y, r6.y +mul.f r1.z, c11.x, r1.z +mul.f r6.z, r6.z, r7.x +mad.f32 r7.x, r5.z, r7.z, c4.x +(ss)mul.f r6.w, r6.w, r7.y +mad.f32 r1.z, c11.y, r2.x, r1.z +mov.f32f32 r2.x, r6.z +mul.f r7.x, r7.x, r7.x +mad.f32 r7.y, r5.w, r7.z, c4.x +mov.f32f32 r7.w, r6.w +(ss)mad.f32 r8.x, r5.z, r2.x, c8.x +mad.f32 r0.x, c11.z, r0.x, r1.z +mad.f32 r1.z, r7.y, r7.y, r7.x +mad.f32 r7.x, r5.x, r7.z, c4.z +mul.f r7.y, r8.x, r8.x +mad.f32 r7.z, r5.z, r7.w, c10.x +mad.f32 r8.x, r5.w, r2.x, c8.y +mad.f32 r1.z, r7.x, r7.x, r1.z +mul.f r0.x, r0.x, r6.y +mul.f r6.y, r7.z, r7.z +mad.f32 r7.x, r8.x, r8.x, r7.y +mad.f32 r7.y, r5.w, r7.w, c10.y +mad.f32 r2.x, r5.x, r2.x, c8.z +mov.f32f32 r7.z, r0.x +sqrt r1.z, r1.z +(ss)mov.f32f32 r8.x, r1.z +mad.f32 r6.y, r7.y, r7.y, r6.y +mad.f32 r2.x, r2.x, r2.x, r7.x +mad.f32 r7.x, r5.x, r7.w, c10.z +(ss)mul.f r1.z, r1.z, r8.x +mad.f32 r7.y, r5.z, r7.z, c10.w +cmps.f.ge r7.w, c3.w, r8.x +mad.f32 r6.y, r7.x, r7.x, r6.y +add.f r1.z, c4.w, (neg)r1.z +sqrt r2.x, r2.x +(ss)mov.f32f32 r7.x, r2.x +mul.f r7.y, r7.y, r7.y +mad.f32 r8.x, r5.w, r7.z, c10.x cov.u32f32 r7.w, r7.w -sel.b32 r0.z, r9.x, r7.y, r0.z -sel.b32 r9.x, r6.z, r6.y, r2.x -add.f r6.z, r6.z, c9.z -mov.f32f32 r7.w, r7.w -cmps.f.ge r9.y, r6.w, c4.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -sel.b32 r2.x, r9.x, r7.y, r2.x -cov.u32f32 r9.x, r9.y -cmps.f.ge r9.y, r6.w, c5.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mul.f r6.z, r6.z, c7.w -cov.u32f32 r9.y, r9.y -sel.b32 r9.w, r11.x, r6.y, r7.z -mov.f32f32 r9.z, r9.z -sel.b32 r6.y, r6.z, r6.y, r1.w -mul.f r6.z, r9.x, r9.y -mov.f32f32 r2.x, r2.x -sel.b32 r7.z, r9.w, r7.y, r7.z -sel.b32 r1.w, r6.y, r7.y, r1.w -mov.f32f32 r6.y, r6.z -mad.f32 r6.z, r8.x, r10.y, c5.y +cmps.f.ge r8.y, c6.z, r7.x +(ss)mul.f r2.x, r2.x, r7.x +sqrt r1.z, r1.z +(ss)add.f r1.z, r6.x, (neg)r1.z +sqrt r6.x, r6.y +(ss)mov.f32f32 r6.y, r6.x +mad.f32 r7.x, r8.x, r8.x, r7.y +add.f r2.x, c6.w, (neg)r2.x mov.f32f32 r7.y, r1.z -mov.f32f32 r8.x, r6.w -mul.f r6.y, r6.y, r7.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r7.z, r7.z -nop -mov.f32f32 r6.y, r6.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r7.z, r7.z -add.f r7.w, r6.z, c9.y -cmps.f.ne r6.y, r6.y, c4.x -mov.f32f32 r6.w, r6.w -mov.f32f32 r7.y, r7.y -mov.f32f32 r9.x, r0.y -mov.f32f32 r9.y, c5.w -mov.f32f32 r6.w, r6.w -mul.f r7.w, r7.w, c5.w -mad.f32 r7.y, r7.y, r10.z, c5.y -sel.b32 r6.z, r6.z, r6.y, r1.y -sel.b32 r6.w, r6.w, r6.y, r9.z -cmps.f.ne r8.y, r8.y, c4.x -sel.b32 r7.w, r7.w, r6.y, r0.w -add.f r9.w, r7.y, c9.w -sel.b32 r7.y, r7.y, r6.y, r0.z -sel.b32 r6.w, r6.w, r8.y, r9.z -sel.b32 r0.w, r7.w, r8.y, r0.w -mul.f r7.w, r9.w, c5.w -sel.b32 r1.y, r6.z, r8.y, r1.y -mov.f32f32 r6.z, r6.w -mad.f32 r6.w, r8.z, r8.z, r8.w -mov.f32f32 r0.w, r0.w -sel.b32 r7.w, r7.w, r6.y, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r6.w, r6.w -mad.f32 r8.z, r0.y, r7.x, c10.z -mov.f32f32 r0.w, r0.w -sel.b32 r1.x, r7.w, r8.y, r1.x +mul.f r6.x, r6.x, r6.y +mad.f32 r7.z, r5.x, r7.z, c10.z +cmps.f.ge r6.y, c3.w, r6.y +cmps.f.ge r8.x, r7.y, c4.x +mad.f32 r8.z, r5.z, r7.y, c5.y +sqrt r2.x, r2.x +add.f r6.x, c4.w, (neg)r6.x +(ss)add.f r2.x, r6.z, (neg)r2.x +cov.u32f32 r6.z, r8.x +cmps.f.ge r8.x, r7.y, c5.x +add.f r8.w, r8.z, c6.x +mov.f32f32 r9.x, r2.x +mad.f32 r9.y, r5.w, r7.y, c5.y +cov.u32f32 r8.x, r8.x +mul.f r8.w, r8.w, c5.w +mad.f32 r9.z, r5.z, r9.x, c5.y +sqrt r6.x, r6.x +(ss)add.f r6.x, r6.w, (neg)r6.x +mul.f r6.z, r6.z, r8.x +cmps.f.ge r6.w, c3.z, r7.y +add.f r8.x, r9.z, c9.x +mov.f32f32 r9.w, r6.x +add.f r10.x, r9.y, c6.x +cov.u32f32 r6.w, r6.w +mul.f r8.x, r8.x, c7.w +mad.f32 r10.y, r5.z, r9.w, c5.y +mul.f r10.x, r10.x, c5.w +mul.f r6.z, r6.z, r6.w +mad.f32 r6.w, r5.w, r9.x, c5.y +mad.f32 r10.z, r5.w, r9.w, c5.y +mad.f32 r7.y, r5.x, r7.y, c5.z +cmps.f.ne r6.z, r6.z, c4.x +mov.f32f32 r10.w, c3.z +mov.f32f32 r11.x, c4.x +add.f r11.y, r7.y, c6.y +mov.f32f32 r11.z, c3.y +sel.b32 r1.z, r1.z, r6.z, r10.w +cmps.f.ne r7.w, r7.w, c4.x +sel.b32 r0.w, r8.w, r6.z, r0.w +sel.b32 r1.x, r10.x, r6.z, r1.x +sel.b32 r1.y, r8.z, r6.z, r1.y +sel.b32 r1.z, r1.z, r7.w, r10.w +sel.b32 r0.y, r0.w, r7.w, r0.y +sel.b32 r0.w, r1.x, r7.w, r2.y +sel.b32 r0.z, r1.y, r7.w, r0.z +cmps.f.ge r1.x, r1.z, r9.x +mov.f32f32 r1.y, r0.y +mov.f32f32 r2.y, r0.w +mov.f32f32 r8.z, r0.z +cov.u32f32 r1.x, r1.x +cmps.f.ge r8.w, r9.x, c4.x mov.f32f32 r1.y, r1.y -mad.f32 r6.w, r8.z, r8.z, r6.w -sel.b32 r0.z, r7.y, r8.y, r0.z -mov.f32f32 r7.y, r9.x -sel.b32 r7.w, r9.y, r6.y, r7.z -mov.f32f32 r6.w, r6.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -mad.f32 r7.y, r7.y, r8.x, c5.z -sel.b32 r7.z, r7.w, r8.y, r7.z -mov.f32f32 r7.x, r7.x -mov.f32f32 r7.w, r0.x -sqrt r6.w, r6.w -(ss)mov.f32f32 r6.w, r6.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -sel.b32 r8.x, r7.y, r6.y, r2.x -mov.f32f32 r8.z, r6.w -mov.f32f32 r8.w, r6.w -add.f r7.y, r7.y, c9.z -mov.f32f32 r7.z, r7.z -sel.b32 r2.x, r8.x, r8.y, r2.x -mul.f r8.x, r8.z, r8.w -mul.f r7.y, r7.y, c5.w -mov.f32f32 r7.z, r7.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r8.x, r8.x -sel.b32 r6.y, r7.y, r6.y, r1.w -cmps.f.ge r6.w, c6.y, r6.w -mov.f32f32 r7.y, r7.w -add.f r7.w, c6.y, (neg)r8.x +mov.f32f32 r2.y, r2.y +mov.f32f32 r8.z, r8.z +cov.u32f32 r8.w, r8.w +cmps.f.ge r10.x, r9.x, c5.x +sel.b32 r2.z, r9.y, r6.z, r2.z +sel.b32 r3.x, r7.y, r6.z, r3.x +mul.f r7.y, r11.y, c5.w +cov.u32f32 r9.y, r10.x +sel.b32 r2.z, r2.z, r7.w, r2.w +sel.b32 r2.w, r3.x, r7.w, r3.w +sel.b32 r3.x, r7.y, r6.z, r3.y +mul.f r3.y, r8.w, r9.y +mov.f32f32 r3.w, r2.z +mov.f32f32 r7.y, r2.w +sel.b32 r3.x, r3.x, r7.w, r4.x +mul.f r1.x, r3.y, r1.x +mov.f32f32 r3.y, r3.w +mov.f32f32 r3.w, r7.y +mov.f32f32 r4.x, r3.x +cmps.f.ne r1.x, r1.x, c4.x +add.f r7.y, r6.w, c9.y +mad.f32 r8.w, r5.x, r9.x, c5.z +sel.b32 r6.z, r11.x, r6.z, r11.z +sel.b32 r2.x, r2.x, r1.x, r1.z +mov.f32f32 r1.z, r1.z +cov.u32f32 r8.y, r8.y +sel.b32 r0.y, r8.x, r1.x, r0.y +mul.f r7.y, r7.y, c7.w +sel.b32 r0.z, r9.z, r1.x, r0.z +cmps.f.ne r8.x, r8.y, c4.x +sel.b32 r2.z, r6.w, r1.x, r2.z +sel.b32 r2.w, r8.w, r1.x, r2.w +add.f r6.w, r8.w, c9.z +sel.b32 r1.z, r2.x, r8.x, r1.z +sel.b32 r0.y, r0.y, r8.x, r1.y +sel.b32 r0.w, r7.y, r1.x, r0.w +sel.b32 r0.z, r0.z, r8.x, r8.z +cmps.f.ge r1.y, r1.z, r9.w +mov.f32f32 r2.x, r0.y +sel.b32 r0.w, r0.w, r8.x, r2.y +mov.f32f32 r2.y, r0.z +cov.u32f32 r1.y, r1.y +cmps.f.ge r7.y, r9.w, c4.x mov.f32f32 r2.x, r2.x -sel.b32 r1.w, r6.y, r8.y, r1.w -cov.u32f32 r6.y, r6.w -mov.f32f32 r6.w, r7.w -mov.f32f32 r7.w, r1.z -mov.f32f32 r8.x, r0.y -mov.f32f32 r8.y, r0.x -mov.f32f32 r8.z, r1.z -mov.f32f32 r0.x, r0.x +mov.f32f32 r8.y, r0.w +mov.f32f32 r2.y, r2.y +cov.u32f32 r7.y, r7.y +cmps.f.ge r8.z, r9.w, c5.x +mov.f32f32 r8.y, r8.y +sel.b32 r2.z, r2.z, r8.x, r3.y +sel.b32 r2.w, r2.w, r8.x, r3.w +cov.u32f32 r3.y, r8.z +mul.f r3.w, r6.w, c7.w +sel.b32 r6.z, r6.z, r7.w, r11.z +mov.f32f32 r6.w, r2.z +mul.f r3.y, r7.y, r3.y +mov.f32f32 r7.y, r2.w +sel.b32 r3.x, r3.w, r1.x, r3.x +mov.f32f32 r3.w, c6.y +mul.f r1.y, r3.y, r1.y +mov.f32f32 r3.y, r6.w +mov.f32f32 r6.w, r7.y +mov.f32f32 r4.x, r4.x +cmps.f.ne r1.y, r1.y, c4.x +add.f r7.y, r10.y, c9.y +add.f r7.w, r10.z, c9.w +mad.f32 r8.z, r5.x, r9.w, c5.z +sel.b32 r6.x, r6.x, r1.y, r1.z mov.f32f32 r1.z, r1.z -sqrt r6.w, r6.w -(ss)mov.f32f32 r6.w, r6.w -mov.f32f32 r1.w, r1.w +cov.u32f32 r6.y, r6.y +mul.f r7.y, r7.y, c5.w +mul.f r7.w, r7.w, c5.w +sel.b32 r0.z, r10.y, r1.y, r0.z cmps.f.ne r6.y, r6.y, c4.x -mov.f32f32 r7.w, r7.w -add.f r6.w, r7.x, (neg)r6.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r7.x, r8.x -mov.f32f32 r8.x, r0.y -mov.f32f32 r6.w, r6.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r8.w, c12.w -mov.f32f32 r9.x, c12.z -cmps.f.ge r6.z, r6.z, r6.w -cmps.f.ge r9.y, r6.w, c4.x -mov.f32f32 r9.z, r6.w -mov.f32f32 r9.w, r6.w -cov.u32f32 r6.z, r6.z -cov.u32f32 r9.y, r9.y -mad.f32 r7.y, r7.y, r9.z, c5.y -mad.f32 r7.w, r7.w, r9.w, c5.y +sel.b32 r0.y, r7.y, r1.y, r0.y +sel.b32 r0.w, r7.w, r1.y, r0.w +sel.b32 r2.z, r10.z, r1.y, r2.z +sel.b32 r1.z, r6.x, r6.y, r1.z +mad.f32 r6.x, r7.z, r7.z, r7.x +sel.b32 r0.y, r0.y, r6.y, r2.x +sel.b32 r0.w, r0.w, r6.y, r8.y +sel.b32 r0.z, r0.z, r6.y, r2.y +sel.b32 r2.x, r2.z, r6.y, r3.y +sel.b32 r2.y, r8.z, r1.y, r2.w +sel.b32 r2.z, r3.x, r8.x, r4.x +sqrt r2.w, r6.x +(ss)mov.f32f32 r3.x, r2.w +mov.f32f32 r3.y, r0.y +mov.f32f32 r4.x, r0.w +(ss)mov.f32f32 r6.x, r0.z +mul.f r2.w, r2.w, r3.x +mov.f32f32 r3.y, r3.y +mov.f32f32 r4.x, r4.x +mov.f32f32 r6.x, r6.x +add.f r2.w, c6.y, (neg)r2.w +mov.f32f32 r7.x, r2.x +sel.b32 r2.y, r2.y, r6.y, r6.w +add.f r6.w, r8.z, c9.z +sel.b32 r1.x, r3.w, r1.x, r6.z +mov.f32f32 r3.w, r6.z +mov.f32f32 r6.z, r2.z +sqrt r2.w, r2.w +(ss)add.f r0.x, r0.x, (neg)r2.w +(ss)mov.f32f32 r2.w, r7.x +mov.f32f32 r7.x, r2.y +mul.f r6.w, r6.w, c5.w +mov.f32f32 r7.y, r0.x +mov.f32f32 r3.w, r3.w mov.f32f32 r6.z, r6.z -cmps.f.ge r9.z, r6.w, c5.x -add.f r9.w, r7.y, c12.x -add.f r10.x, r7.w, c12.y -mov.f32f32 r6.w, r6.w +cmps.f.ge r0.x, r0.x, c4.x +cmps.f.ge r1.z, r1.z, r7.y +mov.f32f32 r7.x, r7.x +sel.b32 r2.z, r6.w, r1.y, r2.z +sel.b32 r1.x, r1.x, r8.x, r3.w +cov.u32f32 r1.z, r1.z +cmps.f.ge r3.w, r7.y, c5.x +sel.b32 r2.z, r2.z, r6.y, r6.z +mov.f32f32 r6.z, c5.w +mov.f32f32 r6.w, r1.x +cov.u32f32 r3.w, r3.w +cov.u32f32 r0.x, r0.x +mov.f32f32 r7.z, r2.z +sel.b32 r1.x, r6.z, r1.y, r1.x +mov.f32f32 r1.y, r6.w +mul.f r0.x, r0.x, r3.w +mov.f32f32 r3.w, r7.z +mad.f32 r6.z, r5.z, r7.y, c5.y +mad.f32 r6.w, r5.w, r7.y, c5.y +mul.f r0.x, r0.x, r1.z +sel.b32 r1.x, r1.x, r6.y, r1.y +add.f r1.y, r6.z, c12.x +add.f r1.z, r6.w, c12.y +cmps.f.ne r0.x, r0.x, c4.x +mov.f32f32 r6.y, c5.z +mad.f32 r7.y, r5.x, r7.y, c5.z +mov.f32f32 r7.z, r1.x +sel.b32 r0.y, r1.y, r0.x, r0.y +cmps.f.ge r1.y, c6.y, r3.x +sel.b32 r0.z, r6.z, r0.x, r0.z +sel.b32 r1.x, r6.y, r0.x, r1.x +mov.f32f32 r3.x, r7.z +cov.u32f32 r1.y, r1.y +sel.b32 r2.x, r6.w, r0.x, r2.x +sel.b32 r2.y, r7.y, r0.x, r2.y +sel.b32 r0.w, r1.z, r0.x, r0.w +cmps.f.ne r1.y, r1.y, c4.x +add.f r1.z, r7.y, c12.z +mov.f32f32 r6.y, c12.w +mov.f32f32 r6.z, c12.w +sel.b32 r0.y, r0.y, r1.y, r3.y +sel.b32 r0.z, r0.z, r1.y, r6.x +sel.b32 r1.x, r1.x, r1.y, r3.x +sel.b32 r2.x, r2.x, r1.y, r2.w +mul.f r2.w, r0.y, r5.z +sel.b32 r0.w, r0.w, r1.y, r4.x +add.f r3.x, c15.x, (neg)r0.z +mov.f32f32 r3.y, r1.x +add.f r4.x, c5.y, (neg)r0.z +mad.f32 r2.w, r0.w, r5.w, r2.w +sel.b32 r0.x, r1.z, r0.x, r2.z +mul.f r1.z, r3.x, r3.x +add.f r2.z, c15.y, (neg)r2.x +mov.f32f32 r6.x, r3.y +sel.b32 r0.x, r0.x, r1.y, r3.w +mov.f32f32 r3.w, r4.x +mad.f32 r1.z, r2.z, r2.z, r1.z +sel.b32 r1.y, r2.y, r1.y, r7.x +mad.f32 r2.y, r0.x, r5.x, r2.w +cmps.f.eq r2.w, r6.x, c5.z +mul.f r4.x, r4.x, r3.w +add.f r6.x, c5.y, (neg)r2.x +mul.f r6.w, r2.y, r0.y +add.f r7.x, c15.z, (neg)r1.y +cov.u32f32 r2.w, r2.w +mov.f32f32 r7.y, r6.x +mul.f r6.w, c5.w, r6.w +mad.f32 r1.z, r7.x, r7.x, r1.z +cmps.f.ne r2.w, r2.w, c4.x +cmps.f.eq r7.z, r1.x, c4.x +add.f r5.z, r5.z, (neg)r6.w +mad.f32 r4.x, r6.x, r7.y, r4.x +add.f r6.x, c5.z, (neg)r1.y +mul.f r6.w, r2.y, r0.w +mov.f32f32 r7.w, r5.z +absneg.f r5.z, (neg)r5.z +rsq r1.z, r1.z +(ss)mov.f32f32 r8.x, r1.z +cov.u32f32 r7.z, r7.z +absneg.f r8.y, (neg)r7.w +absneg.f r8.z, (neg)r7.w +absneg.f r8.w, (neg)r7.w +mul.f r9.x, r5.z, r7.w +mul.f r9.y, r8.y, r7.w +mul.f r6.w, c5.w, r6.w +mul.f r9.z, r8.z, r7.w +mul.f r9.w, r8.w, r7.w +mul.f r3.x, r3.x, r8.x +add.f r5.w, r5.w, (neg)r6.w +add.f r10.x, c11.y, (neg)r0.z +add.f r10.y, c14.x, (neg)r0.z +add.f r10.z, c14.y, (neg)r0.z +mov.f32f32 r10.w, r5.w +absneg.f r5.w, (neg)r5.w +mov.f32f32 r11.x, r3.x +mul.f r3.x, r0.y, r3.x +absneg.f r11.y, (neg)r10.w +absneg.f r11.z, (neg)r10.w +absneg.f r11.w, (neg)r10.w +mad.f32 r9.x, r5.w, r10.w, r9.x +mad.f32 r9.y, r11.y, r10.w, r9.y +mul.f r2.y, r2.y, r0.x +mad.f32 r9.z, r11.z, r10.w, r9.z +mad.f32 r9.w, r11.w, r10.w, r9.w +absneg.f r11.x, (neg)r11.x +mul.f r2.y, c5.w, r2.y +mul.f r2.z, r2.z, r8.x +mul.f r8.x, r10.x, r8.y +mul.f r8.y, r10.y, r8.z +add.f r5.x, r5.x, (neg)r2.y +mul.f r8.z, r0.y, r11.x +mov.f32f32 r10.x, r2.z +mad.f32 r2.z, r0.w, r2.z, r3.x +mov.f32f32 r3.x, r5.x +absneg.f r5.x, (neg)r5.x +absneg.f r10.x, (neg)r10.x +(ss)mul.f r1.z, r7.x, r1.z +absneg.f r7.x, (neg)r3.x +absneg.f r10.y, (neg)r3.x +absneg.f r12.x, (neg)r3.x +mad.f32 r9.x, r5.x, r3.x, r9.x +mad.f32 r9.y, r7.x, r3.x, r9.y +mad.f32 r9.z, r10.y, r3.x, r9.z +mad.f32 r9.w, r12.x, r3.x, r9.w +mad.f32 r8.z, r0.w, r10.x, r8.z +mov.f32f32 r12.y, r1.z +mad.f32 r1.z, r0.x, r1.z, r2.z +add.f r2.z, c11.y, (neg)r2.x +rcp r9.y, r9.y +nop +rcp r9.x, r9.x +add.f r12.z, c11.x, (neg)r0.z +rcp r9.z, r9.z +add.f r12.w, c14.y, (neg)r2.x +mad.f32 r2.z, r2.z, r11.y, r8.x +add.f r8.x, c11.w, (neg)r1.y +rcp r9.w, r9.w +mul.f r8.w, r10.z, r8.w +mad.f32 r8.y, r12.w, r11.z, r8.y +add.f r10.z, c14.z, (neg)r1.y +mad.f32 r2.z, r8.x, r7.x, r2.z +add.f r7.x, c14.w, (neg)r2.x +mul.f r5.z, r12.z, r5.z +add.f r8.x, c11.y, (neg)r2.x +(ss)mul.f r2.z, r2.z, r9.y +mad.f32 r8.y, r10.z, r10.y, r8.y +mad.f32 r7.x, r7.x, r11.w, r8.w +add.f r8.w, c14.z, (neg)r1.y +(ss)mov.f32f32 r9.y, r2.z +mul.f r8.y, r8.y, r9.z +mad.f32 r5.z, r8.x, r5.w, r5.z +add.f r5.w, c11.w, (neg)r1.y +mad.f32 r8.x, r7.w, r9.y, r0.z +mov.f32f32 r9.z, r8.y +mad.f32 r7.x, r8.w, r12.x, r7.x +mad.f32 r5.x, r5.w, r5.x, r5.z +add.f r5.z, r8.x, c6.x +mad.f32 r5.w, r7.w, r9.z, r0.z +mul.f r7.x, r7.x, r9.w +mul.f r5.x, r5.x, r9.x +mov.f32f32 r8.x, r5.z +add.f r5.w, r5.w, c9.x +mov.f32f32 r8.w, r7.x +mov.f32f32 r9.x, r5.x +mul.f r5.z, r5.z, r8.x +mad.f32 r8.x, r10.w, r9.y, r2.x +mov.f32f32 r9.w, r5.w +mad.f32 r10.y, r7.w, r8.w, r0.z +mad.f32 r10.z, r7.w, r9.x, r0.z +add.f r8.x, r8.x, c6.x +mul.f r5.w, r5.w, r9.w +add.f r9.w, r10.y, c9.y +mad.f32 r10.y, r10.w, r9.z, r2.x +mov.f32f32 r11.y, r8.x +add.f r10.z, r10.z, c12.x +mov.f32f32 r11.z, r9.w +add.f r10.y, r10.y, c9.y +mad.f32 r5.z, r8.x, r11.y, r5.z +mad.f32 r8.x, r3.x, r9.y, r1.y +mul.f r9.y, r9.w, r11.z +mov.f32f32 r9.w, r10.y +mad.f32 r11.y, r10.w, r8.w, r2.x +add.f r8.x, r8.x, c6.y +mov.f32f32 r11.z, r10.z +mad.f32 r5.w, r10.y, r9.w, r5.w +add.f r9.w, r11.y, c9.w +mov.f32f32 r10.y, r8.x +mad.f32 r9.z, r3.x, r9.z, r1.y +mul.f r10.z, r10.z, r11.z +mad.f32 r11.y, r10.w, r9.x, r2.x +mad.f32 r5.z, r8.x, r10.y, r5.z +add.f r8.x, r9.z, c9.z +mov.f32f32 r9.z, r9.w +add.f r10.y, r11.y, c12.y +absneg.f r11.y, (neg)r12.y +mov.f32f32 r11.z, r8.x +mad.f32 r9.y, r9.w, r9.z, r9.y +sqrt r5.z, r5.z +(ss)mov.f32f32 r9.z, r5.z +mad.f32 r8.w, r3.x, r8.w, r1.y +mad.f32 r5.w, r8.x, r11.z, r5.w +mov.f32f32 r8.x, r10.y +(ss)mul.f r5.z, r5.z, r9.z +add.f r8.w, r8.w, c9.z +cmps.f.ge r9.z, c3.w, r9.z +mad.f32 r8.x, r10.y, r8.x, r10.z +add.f r5.z, c4.w, (neg)r5.z +sqrt r5.w, r5.w +mov.f32f32 r9.w, r8.w +(ss)mov.f32f32 r10.y, r5.w +mad.f32 r9.x, r3.x, r9.x, r1.y cov.u32f32 r9.z, r9.z -mov.f32f32 r10.y, c12.w -mov.f32f32 r10.z, c12.w -mov.f32f32 r10.w, c13.z -mul.f r9.y, r9.y, r9.z -mad.f32 r6.w, r7.x, r6.w, c5.z -(rpt1)nop -mov.f32f32 r7.x, r9.y -add.f r9.y, r6.w, c12.z -mov.f32f32 r9.z, c13.y -mov.f32f32 r11.x, c12.z -mul.f r6.z, r7.x, r6.z -mov.f32f32 r7.x, c12.w -mov.f32f32 r11.y, c12.z -mov.f32f32 r11.z, c12.w -mov.f32f32 r6.z, r6.z -mov.f32f32 r11.w, c12.z -mov.f32f32 r12.x, c12.w -mov.f32f32 r12.y, c12.z -cmps.f.ne r6.z, r6.z, c4.x -mov.f32f32 r12.z, c5.z -mov.f32f32 r12.w, c12.w -mov.f32f32 r13.x, c13.y -sel.b32 r9.w, r9.w, r6.z, r0.w -sel.b32 r7.y, r7.y, r6.z, r1.y -sel.b32 r12.z, r12.z, r6.z, r7.z -sel.b32 r7.w, r7.w, r6.z, r0.z -sel.b32 r0.w, r9.w, r6.y, r0.w -sel.b32 r1.y, r7.y, r6.y, r1.y -sel.b32 r7.y, r12.z, r6.y, r7.z -sel.b32 r0.z, r7.w, r6.y, r0.z -mov.f32f32 r7.z, r0.w -mov.f32f32 r7.w, r1.y -mov.f32f32 r9.w, r7.y -mov.f32f32 r12.z, r1.y -mul.f r0.x, r7.z, r0.x -sel.b32 r7.z, r10.x, r6.z, r1.x -add.f r7.w, c15.x, (neg)r7.w -mov.f32f32 r10.x, r9.w -add.f r12.z, c5.y, (neg)r12.z -sel.b32 r1.x, r7.z, r6.y, r1.x -mul.f r7.z, r7.w, r7.w -mov.f32f32 r13.y, r0.z -mov.f32f32 r13.z, r10.x -mov.f32f32 r13.w, r1.x -mov.f32f32 r12.z, r12.z -add.f r13.y, c15.y, (neg)r13.y -mov.f32f32 r13.z, r13.z -mad.f32 r0.x, r13.w, r1.z, r0.x -mul.f r1.z, r12.z, r12.z -mov.f32f32 r13.w, r0.z -mad.f32 r7.z, r13.y, r13.y, r7.z -mov.f32f32 r0.x, r0.x -sel.b32 r9.y, r9.y, r6.z, r1.w -cmps.f.eq r13.z, r13.z, c5.z -mov.f32f32 r7.z, r7.z -sel.b32 r6.z, r6.w, r6.z, r2.x -sel.b32 r1.w, r9.y, r6.y, r1.w -cov.u32f32 r6.w, r13.z -add.f r9.y, c5.y, (neg)r13.w -sel.b32 r2.x, r6.z, r6.y, r2.x -mov.f32f32 r6.y, r1.w -mov.f32f32 r6.z, r6.w -mov.f32f32 r6.w, r9.y -mov.f32f32 r9.y, r2.x -mad.f32 r0.x, r6.y, r8.x, r0.x -mov.f32f32 r6.y, r0.w -mov.f32f32 r8.x, r1.x -mov.f32f32 r13.z, r1.w -cmps.f.ne r6.z, r6.z, c4.x -mul.f r6.y, r0.x, r6.y -add.f r9.y, c15.z, (neg)r9.y -mul.f r8.x, r0.x, r8.x -mul.f r0.x, r0.x, r13.z -mov.f32f32 r6.y, r6.y -mad.f32 r7.z, r9.y, r9.y, r7.z -mov.f32f32 r8.x, r8.x -mov.f32f32 r0.x, r0.x -mul.f r6.y, c5.w, r6.y -mov.f32f32 r13.z, r7.y -mul.f r8.x, c5.w, r8.x -mul.f r0.x, c5.w, r0.x -mov.f32f32 r6.y, r6.y -rsq r7.z, r7.z -(ss)mov.f32f32 r7.z, r7.z -mov.f32f32 r8.x, r8.x -mov.f32f32 r0.x, r0.x -add.f r6.y, r8.y, (neg)r6.y -mul.f r7.w, r7.w, r7.z -add.f r8.y, r8.z, (neg)r8.x -add.f r0.y, r0.y, (neg)r0.x -mov.f32f32 r6.y, r6.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r8.z, r0.w -mov.f32f32 r8.y, r8.y -absneg.f r13.w, (neg)r6.y -absneg.f r14.x, (neg)r6.y -absneg.f r14.y, (neg)r6.y -absneg.f r14.z, (neg)r6.y -mul.f r14.w, r13.w, r6.y -absneg.f r15.x, (neg)r8.y -mul.f r15.y, r14.x, r6.y -mul.f r15.z, r14.y, r6.y -absneg.f r15.w, (neg)r8.y -mad.f32 r14.w, r15.x, r8.y, r14.w -absneg.f r16.x, (neg)r8.y -mul.f r16.y, r14.z, r6.y -mad.f32 r15.y, r15.w, r8.y, r15.y -mov.f32f32 r14.w, r14.w -mov.f32f32 r0.y, r0.y -mad.f32 r15.z, r16.x, r8.y, r15.z -mov.f32f32 r15.y, r15.y -absneg.f r16.z, (neg)r8.y -absneg.f r16.w, (neg)r0.y -absneg.f r17.x, (neg)r0.y -mov.f32f32 r15.z, r15.z -absneg.f r17.y, (neg)r0.y -mad.f32 r14.w, r16.w, r0.y, r14.w -mad.f32 r15.y, r17.x, r0.y, r15.y -mad.f32 r16.y, r16.z, r8.y, r16.y -mad.f32 r15.z, r17.y, r0.y, r15.z -mov.f32f32 r14.w, r14.w -mov.f32f32 r15.y, r15.y -mov.f32f32 r16.y, r16.y -mov.f32f32 r15.z, r15.z -absneg.f r17.z, (neg)r0.y -absneg.f r17.w, (neg)r7.w -mov.f32f32 r18.x, r0.w -rcp r14.w, r14.w -(ss)mov.f32f32 r14.w, r14.w -mov.f32f32 r18.y, r1.y -rcp r15.y, r15.y -(ss)mov.f32f32 r15.y, r15.y -rcp r15.z, r15.z -mov.f32f32 r18.z, r1.y -(ss)mov.f32f32 r15.z, r15.z -add.f r18.y, c11.y, (neg)r18.y -mov.f32f32 r18.w, r1.y -add.f r18.z, c14.x, (neg)r18.z -mad.f32 r16.y, r17.z, r0.y, r16.y -mul.f r13.w, r18.y, r13.w -mov.f32f32 r18.y, r0.z -mul.f r14.x, r18.z, r14.x -add.f r18.z, c14.y, (neg)r18.w -mov.f32f32 r18.w, r0.z -add.f r18.y, c11.y, (neg)r18.y -mov.f32f32 r16.y, r16.y -mul.f r14.y, r18.z, r14.y -add.f r18.z, c14.y, (neg)r18.w -mad.f32 r13.w, r18.y, r15.x, r13.w -mov.f32f32 r15.x, r0.z -mul.f r18.x, r18.x, r17.w -mad.f32 r14.x, r18.z, r15.w, r14.x -mov.f32f32 r13.w, r13.w -mov.f32f32 r15.w, r2.x -add.f r15.x, c14.w, (neg)r15.x -mov.f32f32 r14.x, r14.x -mov.f32f32 r18.y, r2.x -add.f r15.w, c11.w, (neg)r15.w -mad.f32 r14.y, r15.x, r16.x, r14.y -rcp r15.x, r16.y -(ss)mov.f32f32 r15.x, r15.x -add.f r16.x, c14.z, (neg)r18.y -mad.f32 r13.w, r15.w, r16.w, r13.w -mov.f32f32 r14.y, r14.y -mov.f32f32 r15.w, r2.x -mad.f32 r14.x, r16.x, r17.x, r14.x -mov.f32f32 r13.w, r13.w -mov.f32f32 r16.x, r1.y -add.f r15.w, c14.z, (neg)r15.w -mov.f32f32 r14.x, r14.x -mul.f r13.w, r13.w, r14.w -add.f r14.w, c11.x, (neg)r16.x -mad.f32 r14.y, r15.w, r17.y, r14.y -mul.f r14.x, r14.x, r15.y -mov.f32f32 r13.w, r13.w -mov.f32f32 r15.y, r1.y -mov.f32f32 r14.y, r14.y +mad.f32 r8.w, r8.w, r9.w, r9.y +(ss)mul.f r5.w, r5.w, r10.y +sqrt r5.z, r5.z +(ss)add.f r2.z, r2.z, (neg)r5.z +(ss)add.f r5.z, r9.x, c12.z +cmps.f.ge r9.x, c6.z, r10.y +cmps.f.ne r9.y, r9.z, c4.x +mov.f32f32 r9.z, r2.z +add.f r5.w, c6.w, (neg)r5.w +sqrt r8.w, r8.w +(ss)mov.f32f32 r9.w, r8.w +mov.f32f32 r10.y, r5.z +cmps.f.ge r10.z, r9.z, c4.x +mad.f32 r11.z, r7.w, r9.z, r0.z +(ss)mul.f r8.w, r8.w, r9.w +mad.f32 r11.w, r10.w, r9.z, r2.x +cov.u32f32 r10.z, r10.z +cmps.f.ge r12.x, r9.z, c5.x +add.f r12.y, r11.z, c6.x +sqrt r5.w, r5.w +add.f r8.w, c4.w, (neg)r8.w +(ss)add.f r5.w, r8.y, (neg)r5.w +cov.u32f32 r8.y, r12.x +mul.f r12.x, r12.y, c5.w +add.f r12.y, r11.w, c6.x +mov.f32f32 r12.z, r5.w +mul.f r8.y, r10.z, r8.y +cmps.f.ge r10.z, c3.z, r9.z +sqrt r8.w, r8.w +mul.f r12.y, r12.y, c5.w +mad.f32 r12.w, r10.w, r12.z, r2.x +mad.f32 r13.x, r7.w, r12.z, r0.z +cov.u32f32 r10.z, r10.z +(ss)add.f r7.x, r7.x, (neg)r8.w +(ss)add.f r8.w, r12.w, c9.y +mad.f32 r9.z, r3.x, r9.z, r1.y +mul.f r8.y, r8.y, r10.z +add.f r10.z, r13.x, c9.x +mov.f32f32 r13.y, r7.x +mul.f r13.z, r8.w, c7.w +cmps.f.ne r8.y, r8.y, c4.x +mov.f32f32 r13.w, c3.z +add.f r14.w, r9.z, c6.y +mov.f32f32 r15.z, c4.x +sel.b32 r12.x, r12.x, r8.y, r14.x +sel.b32 r2.z, r2.z, r8.y, r13.w mov.f32f32 r14.x, r14.x -mad.f32 r15.y, r6.y, r13.w, r15.y -mov.f32f32 r15.w, r1.y -mul.f r14.y, r14.y, r15.z -mad.f32 r15.z, r6.y, r14.x, r15.w -add.f r15.y, r15.y, c6.x -mul.f r14.z, r14.w, r14.z -mov.f32f32 r14.y, r14.y -mov.f32f32 r14.w, r0.z -mov.f32f32 r15.y, r15.y -add.f r15.z, r15.z, c9.x -mov.f32f32 r15.w, r1.y -add.f r14.w, c11.y, (neg)r14.w -mul.f r15.y, r15.y, r15.y -mov.f32f32 r16.x, r0.z -mov.f32f32 r15.z, r15.z -mad.f32 r16.x, r8.y, r13.w, r16.x -mad.f32 r15.w, r6.y, r14.y, r15.w -mad.f32 r14.z, r14.w, r16.z, r14.z -mul.f r14.w, r15.z, r15.z -add.f r15.z, r16.x, c6.x -mov.f32f32 r16.x, r0.z -add.f r15.w, r15.w, c9.y -mad.f32 r16.x, r8.y, r14.x, r16.x -mov.f32f32 r15.z, r15.z +sel.b32 r12.y, r12.y, r8.y, r14.z +sel.b32 r11.z, r11.z, r8.y, r14.y +sel.b32 r2.z, r2.z, r9.y, r13.w +sel.b32 r12.x, r12.x, r9.y, r14.x +mov.f32f32 r13.w, r14.z +mov.f32f32 r14.x, r14.y +cmps.f.ge r14.y, r2.z, r12.z +mov.f32f32 r14.z, r12.x +sel.b32 r12.y, r12.y, r9.y, r13.w +sel.b32 r11.z, r11.z, r9.y, r14.x +cov.u32f32 r13.w, r14.y +cmps.f.ge r14.x, r12.z, c4.x +mov.f32f32 r14.y, r14.z +mov.f32f32 r14.z, r12.y +mov.f32f32 r15.w, r11.z +cov.u32f32 r14.x, r14.x +cmps.f.ge r16.x, r12.z, c5.x mov.f32f32 r14.z, r14.z mov.f32f32 r15.w, r15.w -(ss)mov.f32f32 r16.y, r2.x -mad.f32 r15.y, r15.z, r15.z, r15.y -add.f r15.z, r16.x, c9.y -mul.f r15.w, r15.w, r15.w -mov.f32f32 r16.x, r0.z -mov.f32f32 r15.y, r15.y -mov.f32f32 r16.z, r2.x -mov.f32f32 r15.z, r15.z -mad.f32 r16.z, r0.y, r13.w, r16.z -mad.f32 r16.x, r8.y, r14.y, r16.x -add.f r16.y, c11.w, (neg)r16.y -mad.f32 r14.w, r15.z, r15.z, r14.w -add.f r15.z, r16.z, c6.y -add.f r16.x, r16.x, c9.w -mad.f32 r14.z, r16.y, r17.z, r14.z -mov.f32f32 r14.w, r14.w -mov.f32f32 r15.z, r15.z -mov.f32f32 r16.y, r2.x -mov.f32f32 r16.x, r16.x -mad.f32 r16.y, r0.y, r14.x, r16.y -mad.f32 r15.y, r15.z, r15.z, r15.y -mov.f32f32 r14.z, r14.z -mad.f32 r15.z, r16.x, r16.x, r15.w -add.f r15.w, r16.y, c9.z -mov.f32f32 r15.y, r15.y -mul.f r14.z, r14.z, r15.x -mov.f32f32 r15.x, r15.z -mov.f32f32 r15.z, r15.w -mov.f32f32 r15.w, r2.x -mov.f32f32 r14.z, r14.z -mad.f32 r15.w, r0.y, r14.y, r15.w -sqrt r15.y, r15.y -(ss)mov.f32f32 r15.y, r15.y -mad.f32 r14.w, r15.z, r15.z, r14.w -mov.f32f32 r15.z, r1.y -add.f r15.w, r15.w, c9.z -mov.f32f32 r16.x, r15.y -mov.f32f32 r16.y, r15.y -mov.f32f32 r14.w, r14.w -mov.f32f32 r15.w, r15.w -mad.f32 r15.z, r6.y, r14.z, r15.z -mul.f r16.x, r16.x, r16.y -cmps.f.ge r15.y, c3.w, r15.y -mad.f32 r15.x, r15.w, r15.w, r15.x -add.f r15.z, r15.z, c12.x -mov.f32f32 r15.w, r16.x -sqrt r14.w, r14.w -(ss)mov.f32f32 r14.w, r14.w +sel.b32 r11.w, r11.w, r8.y, r15.x +cov.u32f32 r16.x, r16.x mov.f32f32 r15.x, r15.x -mov.f32f32 r15.z, r15.z -add.f r15.w, c4.w, (neg)r15.w -mov.f32f32 r16.x, r14.w -mov.f32f32 r16.y, r14.w -mul.f r15.z, r15.z, r15.z -mov.f32f32 r15.w, r15.w -sqrt r15.x, r15.x -(ss)mov.f32f32 r15.x, r15.x -mul.f r16.x, r16.x, r16.y -mov.f32f32 r16.y, r0.z -cmps.f.ge r14.w, c6.z, r14.w -cov.u32f32 r15.y, r15.y -mov.f32f32 r16.x, r16.x -sqrt r15.w, r15.w -(ss)mov.f32f32 r15.w, r15.w -mov.f32f32 r13.w, r13.w -mov.f32f32 r16.z, r15.x -add.f r16.x, c6.w, (neg)r16.x -mov.f32f32 r16.w, r15.x -add.f r13.w, r13.w, (neg)r15.w -mad.f32 r15.w, r8.y, r14.z, r16.y -mov.f32f32 r16.x, r16.x -mul.f r16.y, r16.z, r16.w -mov.f32f32 r13.w, r13.w -add.f r15.w, r15.w, c12.y -cmps.f.ge r15.x, c3.w, r15.x -cov.u32f32 r14.w, r14.w -cmps.f.ge r16.z, r13.w, c4.x -mov.f32f32 r16.w, r13.w -sqrt r16.x, r16.x -mov.f32f32 r16.y, r16.y -mov.f32f32 r17.x, r6.y -cov.u32f32 r16.z, r16.z -cmps.f.ge r17.y, r13.w, c5.x -(ss)mov.f32f32 r16.x, r16.x -add.f r16.y, c4.w, (neg)r16.y -mov.f32f32 r17.z, r13.w -cov.u32f32 r17.y, r17.y -mov.f32f32 r17.x, r17.x -mov.f32f32 r14.x, r14.x -mov.f32f32 r16.y, r16.y -mul.f r16.z, r16.z, r17.y -mov.f32f32 r17.y, r1.y -add.f r14.x, r14.x, (neg)r16.x -mov.f32f32 r16.x, r8.y -mov.f32f32 r16.z, r16.z -cmps.f.ge r18.y, c3.z, r13.w -mov.f32f32 r17.y, r17.y -mov.f32f32 r14.x, r14.x -sqrt r16.y, r16.y -(ss)mov.f32f32 r16.y, r16.y -cov.u32f32 r18.y, r18.y -mov.f32f32 r17.y, r17.y -mov.f32f32 r18.z, r14.x -mov.f32f32 r14.y, r14.y -mul.f r16.z, r16.z, r18.y -mad.f32 r16.w, r17.x, r16.w, r17.y -mov.f32f32 r17.x, r6.y -add.f r14.y, r14.y, (neg)r16.y -mov.f32f32 r16.y, r16.z -add.f r16.z, r16.w, c6.x -mov.f32f32 r17.x, r17.x -mov.f32f32 r14.y, r14.y -cmps.f.ne r16.y, r16.y, c4.x -mov.f32f32 r17.y, r13.w -mul.f r16.z, r16.z, c5.w -mov.f32f32 r16.x, r16.x -mov.f32f32 r18.w, r18.w -mov.f32f32 r17.y, r17.y -mov.f32f32 r19.x, c3.z -mov.f32f32 r19.y, r0.z -mov.f32f32 r18.y, r18.y -mov.f32f32 r13.w, r13.w -sel.b32 r17.y, r17.y, r16.y, r19.x -cmps.f.ne r15.y, r15.y, c4.x -sel.b32 r16.z, r16.z, r16.y, r18.w -mov.f32f32 r19.y, r19.y -sel.b32 r16.w, r16.w, r16.y, r18.y -sel.b32 r17.y, r17.y, r15.y, r19.x -sel.b32 r16.z, r16.z, r15.y, r18.w -mov.f32f32 r18.w, r19.y -sel.b32 r16.w, r16.w, r15.y, r18.y -mov.f32f32 r18.y, r17.y -mov.f32f32 r16.z, r16.z -mad.f32 r16.x, r16.x, r17.z, r18.w -mov.f32f32 r16.w, r16.w -cmps.f.ge r17.z, r18.y, r14.x -mov.f32f32 r16.z, r16.z -add.f r18.y, r16.x, c6.x -mov.f32f32 r16.w, r16.w -cov.u32f32 r17.z, r17.z -mov.f32f32 r18.w, r0.y -mov.f32f32 r19.y, c4.x -mul.f r18.y, r18.y, c5.w -mov.f32f32 r17.z, r17.z -cmps.f.ge r19.z, r14.x, c4.x -mov.f32f32 r19.x, r19.x -mov.f32f32 r18.w, r18.w -mov.f32f32 r19.w, r20.x -cov.u32f32 r19.z, r19.z -cmps.f.ge r20.x, r14.x, c5.x -sel.b32 r16.x, r16.x, r16.y, r19.x -mov.f32f32 r20.y, r2.x -mov.f32f32 r20.z, c3.y -cov.u32f32 r20.x, r20.x -sel.b32 r18.y, r18.y, r16.y, r19.w -sel.b32 r16.x, r16.x, r15.y, r19.x -mov.f32f32 r19.x, r20.y -mul.f r19.z, r19.z, r20.x -sel.b32 r18.y, r18.y, r15.y, r19.w -mov.f32f32 r16.x, r16.x -mov.f32f32 r19.x, r19.x -mov.f32f32 r19.z, r19.z -mov.f32f32 r18.y, r18.y -mov.f32f32 r16.x, r16.x -mad.f32 r13.w, r18.w, r13.w, r19.x -mul.f r17.z, r19.z, r17.z -mov.f32f32 r18.y, r18.y -sel.b32 r18.w, r19.y, r16.y, r20.z -mov.f32f32 r19.x, r19.y -mov.f32f32 r17.z, r17.z -add.f r19.y, r13.w, c6.y -sel.b32 r18.w, r18.w, r15.y, r20.z -sel.b32 r13.w, r13.w, r16.y, r19.x -cmps.f.ne r17.z, r17.z, c4.x -mov.f32f32 r17.y, r17.y -mov.f32f32 r19.z, r14.x -mov.f32f32 r19.w, r1.y -mov.f32f32 r20.x, r14.x -mov.f32f32 r20.y, r8.y -mov.f32f32 r19.z, r19.z -mov.f32f32 r19.w, r19.w -sel.b32 r13.w, r13.w, r15.y, r19.x -mul.f r19.x, r19.y, c5.w -sel.b32 r19.y, r19.z, r17.z, r17.y -cmps.f.ne r14.w, r14.w, c4.x -mov.f32f32 r19.z, r19.w -mov.f32f32 r19.w, r20.y -mad.f32 r17.x, r17.x, r18.z, r19.z -sel.b32 r17.y, r19.y, r14.w, r17.y -mov.f32f32 r18.z, r0.z -mov.f32f32 r13.w, r13.w -add.f r19.y, r17.x, c9.x -mov.f32f32 r19.z, r17.y -mov.f32f32 r18.z, r18.z -sel.b32 r17.x, r17.x, r17.z, r16.w -mov.f32f32 r13.w, r13.w -cmps.f.ge r19.z, r19.z, r14.y -mul.f r19.y, r19.y, c7.w -mov.f32f32 r18.z, r18.z -sel.b32 r16.w, r17.x, r14.w, r16.w -cov.u32f32 r17.x, r19.z -sel.b32 r19.y, r19.y, r17.z, r16.z -mad.f32 r18.z, r19.w, r20.x, r18.z -mov.f32f32 r16.w, r16.w -mov.f32f32 r17.x, r17.x -cmps.f.ge r19.z, r14.y, c4.x -sel.b32 r16.z, r19.y, r14.w, r16.z -add.f r19.y, r18.z, c9.y -mov.f32f32 r16.w, r16.w -cov.u32f32 r19.z, r19.z -cmps.f.ge r20.x, r14.y, c5.x -mov.f32f32 r16.z, r16.z -mul.f r20.y, r19.y, c7.w -sel.b32 r18.z, r18.z, r17.z, r16.x -cov.u32f32 r20.x, r20.x -mov.f32f32 r16.z, r16.z -sel.b32 r20.y, r20.y, r17.z, r18.y -sel.b32 r16.x, r18.z, r14.w, r16.x -mul.f r18.z, r19.z, r20.x -mov.f32f32 r14.x, r14.x -mov.f32f32 r19.z, r19.w -mov.f32f32 r18.w, r18.w -mov.f32f32 r18.z, r18.z -sel.b32 r18.y, r20.y, r14.w, r18.y -mov.f32f32 r16.x, r16.x -mov.f32f32 r19.w, r0.y -mul.f r17.x, r18.z, r17.x -mov.f32f32 r18.y, r18.y -mov.f32f32 r16.x, r16.x -mov.f32f32 r18.z, r19.w -mov.f32f32 r17.x, r17.x -mov.f32f32 r18.y, r18.y -mov.f32f32 r19.w, r2.x -sel.b32 r16.y, r19.x, r16.y, r19.z -cmps.f.ne r17.x, r17.x, c4.x -mov.f32f32 r17.y, r17.y -mov.f32f32 r19.x, r14.y -mov.f32f32 r20.x, r14.y -mov.f32f32 r20.y, r14.y -mov.f32f32 r20.z, r6.y -mov.f32f32 r19.x, r19.x -mov.f32f32 r20.w, r8.y -mov.f32f32 r19.w, r19.w -sel.b32 r15.y, r16.y, r15.y, r19.z -sel.b32 r16.y, r19.x, r17.x, r17.y -cov.u32f32 r15.x, r15.x -mov.f32f32 r19.x, r20.z -mov.f32f32 r19.z, r20.w -mov.f32f32 r20.z, r1.y -cmps.f.ne r15.x, r15.x, c4.x -mov.f32f32 r20.w, r0.z -mov.f32f32 r19.w, r19.w -mov.f32f32 r15.y, r15.y -sel.b32 r16.y, r16.y, r15.x, r17.y -mov.f32f32 r17.y, r20.z -mov.f32f32 r20.z, r20.w -mad.f32 r14.x, r18.z, r14.x, r19.w -mov.f32f32 r16.y, r16.y -mov.f32f32 r15.w, r15.w -mov.f32f32 r17.y, r17.y -mov.f32f32 r18.z, r20.z -mad.f32 r17.y, r19.x, r20.x, r17.y -mad.f32 r15.z, r15.w, r15.w, r15.z -mad.f32 r15.w, r19.z, r20.y, r18.z -sel.b32 r18.z, r14.x, r17.z, r13.w -mov.f32f32 r15.y, r15.y -mov.f32f32 r15.z, r15.z -mov.f32f32 r19.x, r2.x -add.f r19.z, r17.y, c9.y -mad.f32 r19.x, r0.y, r14.z, r19.x -add.f r19.w, r15.w, c9.w -sel.b32 r17.y, r17.y, r17.x, r16.w -sel.b32 r15.w, r15.w, r17.x, r16.x -add.f r19.x, r19.x, c12.z -mul.f r19.z, r19.z, c5.w -mul.f r19.w, r19.w, c5.w -sel.b32 r16.w, r17.y, r15.x, r16.w -mov.f32f32 r17.y, r19.x -sel.b32 r19.x, r19.z, r17.x, r16.z -sel.b32 r19.z, r19.w, r17.x, r18.y -mov.f32f32 r16.w, r16.w -mad.f32 r15.z, r17.y, r17.y, r15.z -sel.b32 r16.z, r19.x, r15.x, r16.z -sel.b32 r17.y, r19.z, r15.x, r18.y -mov.f32f32 r16.w, r16.w -mov.f32f32 r15.z, r15.z -mov.f32f32 r16.z, r16.z -mov.f32f32 r17.y, r17.y -sel.b32 r15.w, r15.w, r15.x, r16.x -sel.b32 r13.w, r18.z, r14.w, r13.w -add.f r14.x, r14.x, c9.z -mov.f32f32 r16.x, r18.w -sqrt r15.z, r15.z -(ss)mov.f32f32 r15.z, r15.z -mov.f32f32 r16.z, r16.z -mov.f32f32 r17.y, r17.y -mov.f32f32 r15.w, r15.w -mov.f32f32 r18.y, r15.z -mov.f32f32 r18.z, r15.z -mov.f32f32 r13.w, r13.w -mul.f r18.w, r14.x, c7.w -mov.f32f32 r19.x, c6.y -mul.f r18.y, r18.y, r18.z -mov.f32f32 r15.w, r15.w -mov.f32f32 r13.w, r13.w -sel.b32 r18.z, r18.w, r17.z, r15.y -mov.f32f32 r18.y, r18.y -mov.f32f32 r14.y, r14.y -sel.b32 r18.w, r19.x, r17.z, r16.x -mov.f32f32 r19.x, r0.y -add.f r18.y, c6.y, (neg)r18.y -sel.b32 r15.y, r18.z, r14.w, r15.y -sel.b32 r16.x, r18.w, r14.w, r16.x -mov.f32f32 r18.z, r19.x -mov.f32f32 r18.y, r18.y -mov.f32f32 r18.w, r2.x -mov.f32f32 r15.y, r15.y -mov.f32f32 r16.x, r16.x -sel.b32 r14.x, r14.x, r17.z, r0.x -sel.b32 r17.z, r19.y, r17.z, r8.x -nop -sqrt r18.y, r18.y -(ss)mov.f32f32 r18.y, r18.y -mov.f32f32 r14.z, r14.z -mov.f32f32 r18.w, r18.w -mov.f32f32 r15.y, r15.y -mov.f32f32 r16.x, r16.x -add.f r14.z, r14.z, (neg)r18.y -mov.f32f32 r18.y, r18.w -mov.f32f32 r18.w, c5.w -mad.f32 r14.y, r18.z, r14.y, r18.y -mov.f32f32 r14.z, r14.z -sel.b32 r0.x, r14.x, r14.w, r0.x -sel.b32 r8.x, r17.z, r14.w, r8.x -add.f r14.x, r14.y, c9.z -cmps.f.ge r14.w, r16.y, r14.z -sel.b32 r14.y, r14.y, r17.x, r13.w -sel.b32 r16.y, r18.w, r17.x, r16.x -mul.f r14.x, r14.x, c5.w -cov.u32f32 r14.w, r14.w -sel.b32 r13.w, r14.y, r15.x, r13.w -sel.b32 r14.y, r16.y, r15.x, r16.x -sel.b32 r14.x, r14.x, r17.x, r15.y -mov.f32f32 r14.w, r14.w -cmps.f.ge r16.x, r14.z, c4.x -mov.f32f32 r13.w, r13.w -sel.b32 r14.x, r14.x, r15.x, r15.y -mov.f32f32 r14.y, r14.y -cov.u32f32 r15.x, r16.x -cmps.f.ge r15.y, r14.z, c5.x -mov.f32f32 r13.w, r13.w +sel.b32 r9.z, r9.z, r8.y, r15.y +mul.f r14.w, r14.w, c5.w +mul.f r14.x, r14.x, r16.x +sel.b32 r11.w, r11.w, r9.y, r15.x +mov.f32f32 r15.x, r15.y +sel.b32 r14.w, r14.w, r8.y, r15.w +mul.f r13.w, r14.x, r13.w +mov.f32f32 r14.x, r11.w +sel.b32 r9.z, r9.z, r9.y, r15.x +mov.f32f32 r15.x, r15.w +cmps.f.ne r13.w, r13.w, c4.x +mul.f r10.z, r10.z, c7.w +mad.f32 r12.z, r3.x, r12.z, r1.y +sel.b32 r14.w, r14.w, r9.y, r15.x +sel.b32 r5.w, r5.w, r13.w, r2.z +mov.f32f32 r2.z, r2.z +cov.u32f32 r9.x, r9.x +sel.b32 r10.z, r10.z, r13.w, r12.x +sel.b32 r12.x, r13.z, r13.w, r12.y +sel.b32 r11.z, r13.x, r13.w, r11.z +cmps.f.ne r9.x, r9.x, c4.x +sel.b32 r11.w, r12.w, r13.w, r11.w +sel.b32 r12.y, r12.z, r13.w, r9.z +add.f r12.z, r12.z, c9.z +sel.b32 r2.z, r5.w, r9.x, r2.z +sel.b32 r5.w, r10.z, r9.x, r14.y +sel.b32 r10.z, r12.x, r9.x, r14.z +sel.b32 r11.z, r11.z, r9.x, r15.w +cmps.f.ge r12.x, r2.z, r13.y +mov.f32f32 r12.w, r5.w +mov.f32f32 r13.x, r10.z +mov.f32f32 r13.z, r11.z +cov.u32f32 r12.x, r12.x +cmps.f.ge r14.y, r13.y, c4.x +mov.f32f32 r12.w, r12.w +mov.f32f32 r13.x, r13.x +mov.f32f32 r13.z, r13.z +cov.u32f32 r14.y, r14.y +cmps.f.ge r14.z, r13.y, c5.x mov.f32f32 r14.x, r14.x -mov.f32f32 r14.y, r14.y -cov.u32f32 r15.y, r15.y -mov.f32f32 r16.x, r14.z -mov.f32f32 r16.y, r14.z -mov.f32f32 r14.z, r14.z -mul.f r15.x, r15.x, r15.y +mov.f32f32 r9.z, r9.z +mul.f r15.x, r12.z, c7.w +cov.u32f32 r14.z, r14.z +sel.b32 r11.w, r11.w, r9.x, r14.x +mov.f32f32 r9.z, r9.z +sel.b32 r14.x, r15.x, r13.w, r14.w +mul.f r14.y, r14.y, r14.z +mov.f32f32 r14.z, r11.w +sel.b32 r9.z, r12.y, r9.x, r9.z +mov.f32f32 r12.y, r14.w +mul.f r12.x, r14.y, r12.x +mov.f32f32 r14.y, r14.z +mov.f32f32 r14.z, r9.z +mov.f32f32 r12.y, r12.y +cmps.f.ne r12.x, r12.x, c4.x +mad.f32 r14.w, r7.w, r13.y, r0.z +mad.f32 r15.x, r10.w, r13.y, r2.x +mad.f32 r13.y, r3.x, r13.y, r1.y +sel.b32 r7.x, r7.x, r12.x, r2.z +mov.f32f32 r2.z, r2.z +cmps.f.ge r9.w, c3.w, r9.w +add.f r15.y, r14.w, c9.y +add.f r15.w, r15.x, c9.w +sel.b32 r11.z, r14.w, r12.x, r11.z +cov.u32f32 r9.w, r9.w +mul.f r14.w, r15.y, c5.w +mul.f r15.y, r15.w, c5.w +sel.b32 r11.w, r15.x, r12.x, r11.w +cmps.f.ne r9.w, r9.w, c4.x +sel.b32 r5.w, r14.w, r12.x, r5.w +sel.b32 r10.z, r15.y, r12.x, r10.z +sel.b32 r9.z, r13.y, r12.x, r9.z +sel.b32 r2.z, r7.x, r9.w, r2.z +mad.f32 r5.z, r5.z, r10.y, r8.x +sel.b32 r5.w, r5.w, r9.w, r12.w +sel.b32 r7.x, r10.z, r9.w, r13.x +sel.b32 r8.x, r11.z, r9.w, r13.z +sel.b32 r10.y, r11.w, r9.w, r14.y +mov.f32f32 r10.z, r14.z +sel.b32 r11.z, r14.x, r9.x, r12.y +sqrt r5.z, r5.z +(ss)mov.f32f32 r11.w, r5.z +mov.f32f32 r12.y, r5.w +mov.f32f32 r12.w, r7.x +mov.f32f32 r13.x, r8.x +(ss)mul.f r5.z, r5.z, r11.w +mov.f32f32 r12.y, r12.y +mov.f32f32 r12.w, r12.w +mov.f32f32 r13.x, r13.x +add.f r5.z, c6.y, (neg)r5.z +mov.f32f32 r13.z, r10.y +sel.b32 r9.z, r9.z, r9.w, r10.z +add.f r10.z, r13.y, c9.z +mov.f32f32 r13.y, c3.y +mov.f32f32 r14.x, r11.z +mov.f32f32 r2.y, r2.y +sqrt r5.z, r5.z +(ss)add.f r5.x, r5.x, (neg)r5.z +(ss)mov.f32f32 r5.z, r13.z +mov.f32f32 r13.z, r9.z +mul.f r10.z, r10.z, c5.w +mov.f32f32 r14.y, r5.x +sel.b32 r8.y, r15.z, r8.y, r13.y mov.f32f32 r14.x, r14.x -mov.f32f32 r6.y, r6.y +sel.b32 r12.z, r12.z, r13.w, r2.y +cmps.f.ge r2.z, r2.z, r14.y +mov.f32f32 r13.z, r13.z +sel.b32 r10.z, r10.z, r12.x, r11.z +sel.b32 r8.y, r8.y, r9.y, r13.y +cov.u32f32 r2.z, r2.z +cmps.f.ge r9.y, r14.y, c5.x +sel.b32 r10.z, r10.z, r9.w, r14.x +mov.f32f32 r11.z, c6.y +sel.b32 r2.y, r12.z, r9.x, r2.y +cov.u32f32 r9.y, r9.y +cmps.f.ge r5.x, r5.x, c4.x +mov.f32f32 r12.z, r10.z +sel.b32 r11.z, r11.z, r13.w, r8.y mov.f32f32 r8.y, r8.y -mov.f32f32 r15.x, r15.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r6.y +cov.u32f32 r5.x, r5.x +mov.f32f32 r12.z, r12.z +mov.f32f32 r6.w, r6.w mov.f32f32 r8.y, r8.y -mul.f r14.w, r15.x, r14.w -mov.f32f32 r15.x, r1.y -mov.f32f32 r15.y, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r14.w, r14.w -mov.f32f32 r15.x, r15.x -mov.f32f32 r15.y, r15.y -mov.f32f32 r17.x, r2.x -cmps.f.ne r14.w, r14.w, c4.x -mov.f32f32 r15.x, r15.x -mov.f32f32 r17.z, c5.z -mad.f32 r6.y, r6.y, r16.x, r15.x -mov.f32f32 r15.x, r15.y -mov.f32f32 r15.y, r17.x -sel.b32 r16.x, r17.z, r14.w, r14.y -sel.b32 r17.x, r6.y, r14.w, r16.w -cmps.f.ge r15.z, c6.y, r15.z -mad.f32 r8.y, r8.y, r16.y, r15.x -mov.f32f32 r15.x, r15.y -add.f r6.y, r6.y, c12.x -cov.u32f32 r15.y, r15.z -sel.b32 r15.z, r8.y, r14.w, r15.w -mad.f32 r0.y, r0.y, r14.z, r15.x -sel.b32 r6.y, r6.y, r14.w, r16.z -cmps.f.ne r14.z, r15.y, c4.x -add.f r8.y, r8.y, c12.y -sel.b32 r15.x, r0.y, r14.w, r13.w -add.f r0.y, r0.y, c12.z -sel.b32 r15.y, r17.x, r14.z, r16.w -sel.b32 r14.y, r16.x, r14.z, r14.y -sel.b32 r15.z, r15.z, r14.z, r15.w -sel.b32 r13.w, r15.x, r14.z, r13.w -mov.f32f32 r15.x, r15.y -mov.f32f32 r15.w, r14.y -mov.f32f32 r15.y, r15.y -mov.f32f32 r1.y, r1.y -add.f r15.x, c15.x, (neg)r15.x -mov.f32f32 r16.x, r15.w -mov.f32f32 r15.w, r15.w -mov.f32f32 r1.y, r1.y -mul.f r16.y, r15.x, r15.x -mov.f32f32 r16.w, r15.z -mov.f32f32 r17.x, r16.x -add.f r1.y, r1.y, (neg)r15.y -mov.f32f32 r15.y, r16.x -add.f r16.x, c15.y, (neg)r16.w -mov.f32f32 r16.w, r17.x -mov.f32f32 r1.y, r1.y -cmps.f.eq r15.y, r15.y, c5.w -mad.f32 r16.y, r16.x, r16.x, r16.y -cmps.f.eq r16.w, r16.w, c5.z -mul.f r17.x, r1.y, r1.y -mov.f32f32 r15.z, r15.z -mov.f32f32 r16.y, r16.y -mov.f32f32 r17.z, r13.w -cov.u32f32 r16.w, r16.w -mov.f32f32 r0.z, r0.z -cov.u32f32 r15.y, r15.y -add.f r17.z, c15.z, (neg)r17.z -mov.f32f32 r16.w, r16.w -mov.f32f32 r0.z, r0.z -cmps.f.ne r15.y, r15.y, c4.x -mad.f32 r16.y, r17.z, r17.z, r16.y -cmps.f.ne r16.w, r16.w, c4.x -mov.f32f32 r18.y, r14.y -add.f r0.z, r0.z, (neg)r15.z -cmps.f.eq r15.z, r15.w, c6.y -mov.f32f32 r13.w, r13.w -cmps.f.eq r15.w, r18.y, c4.x -rsq r16.y, r16.y -(ss)mov.f32f32 r16.y, r16.y -sel.b32 r2.w, r3.x, r16.w, r2.w -sel.b32 r3.x, r5.z, r16.w, r0.x -sel.b32 r5.z, r11.x, r16.w, r8.x -mul.f r11.x, r15.x, r16.y -sel.b32 r2.w, r3.y, r15.y, r2.w -sel.b32 r3.x, r5.w, r15.y, r3.x -sel.b32 r3.y, r7.x, r15.y, r5.z -mov.f32f32 r5.z, r11.x -cov.u32f32 r5.w, r15.z -sel.b32 r6.y, r6.y, r14.z, r16.z -cov.u32f32 r7.x, r15.w -absneg.f r11.x, (neg)r5.z -cmps.f.ne r5.w, r5.w, c4.x -mov.f32f32 r15.x, r6.y -mov.f32f32 r15.z, r6.y -mov.f32f32 r15.w, c12.z -sel.b32 r2.w, r3.z, r5.w, r2.w -mul.f r3.z, r15.x, r11.x -mul.f r15.x, r16.x, r16.y -cmps.f.ne r16.x, r7.x, c4.x -sel.b32 r2.y, r2.y, r5.w, r3.x -sel.b32 r3.x, r11.y, r5.w, r3.y -mov.f32f32 r3.y, r15.x -sel.b32 r2.w, r3.w, r16.x, r2.w -sel.b32 r2.y, r6.x, r16.x, r2.y -sel.b32 r3.x, r11.z, r16.x, r3.x -absneg.f r3.w, (neg)r3.y -sel.b32 r6.x, r8.y, r14.w, r17.y -mov.f32f32 r8.y, r2.w -cmps.f.eq r11.y, r14.y, c3.y -mul.f r5.z, r15.z, r5.z -sel.b32 r6.x, r6.x, r14.z, r17.y -sel.b32 r4.x, r4.x, r6.z, r8.y -cov.u32f32 r8.y, r11.y -mov.f32f32 r10.x, r10.x -mov.f32f32 r11.y, r6.x -mov.f32f32 r11.z, r6.x -sel.b32 r7.x, r15.w, r16.w, r7.x -mov.f32f32 r14.y, c12.w -mad.f32 r3.z, r11.y, r3.w, r3.z -cmps.f.eq r10.x, r10.x, c5.w -cmps.f.ne r8.y, r8.y, c4.x -mad.f32 r3.y, r11.z, r3.y, r5.z -mov.f32f32 r3.z, r3.z -mul.f r5.z, r17.z, r16.y -cov.u32f32 r10.x, r10.x -sel.b32 r0.x, r0.x, r8.y, r2.y -sel.b32 r8.x, r8.x, r8.y, r3.x -mov.f32f32 r5.z, r5.z -cmps.f.ne r10.x, r10.x, c4.x -sel.b32 r0.x, r8.w, r6.z, r0.x -sel.b32 r8.x, r11.w, r6.z, r8.x -absneg.f r8.w, (neg)r5.z -sel.b32 r0.y, r0.y, r14.w, r14.x -sel.b32 r4.x, r4.y, r10.x, r4.x -sel.b32 r0.x, r9.x, r10.x, r0.x -sel.b32 r4.y, r12.x, r10.x, r8.x -sel.b32 r0.y, r0.y, r14.z, r14.x -mov.f32f32 r8.x, r9.w -mov.f32f32 r3.y, r3.y -sel.b32 r7.x, r14.y, r15.y, r7.x -mov.f32f32 r9.x, r0.y -cmps.f.eq r8.x, r8.x, c6.y -mov.f32f32 r9.w, r0.y -mov.f32f32 r11.y, c12.w -mad.f32 r3.z, r9.x, r8.w, r3.z -mov.f32f32 r6.y, r6.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r0.y, r0.y +mul.f r5.x, r5.x, r9.y +mad.f32 r7.w, r7.w, r14.y, r0.z +mad.f32 r9.y, r10.w, r14.y, r2.x +mad.f32 r3.x, r3.x, r14.y, r1.y +mul.f r2.z, r5.x, r2.z +sel.b32 r5.x, r11.z, r9.x, r8.y +mov.f32f32 r8.y, c5.w +sel.b32 r8.w, r8.w, r13.w, r6.w +cmps.f.ne r2.z, r2.z, c4.x +add.f r10.w, r7.w, c12.x +add.f r11.z, r9.y, c12.y +add.f r13.y, r3.x, c12.z +sel.b32 r7.w, r7.w, r2.z, r8.x +cmps.f.ge r8.x, c6.y, r11.w +sel.b32 r8.y, r8.y, r12.x, r5.x +mov.f32f32 r5.x, r5.x +sel.b32 r9.y, r9.y, r2.z, r10.y cov.u32f32 r8.x, r8.x -mul.f r6.y, r3.z, r6.y -mul.f r6.x, r3.z, r6.x -mul.f r0.y, r3.z, r0.y -cmps.f.ne r3.z, r8.x, c4.x -mov.f32f32 r6.y, r6.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r0.y, r0.y -sel.b32 r4.x, r4.z, r3.z, r4.x -mul.f r4.z, c5.w, r6.y -mul.f r6.x, c5.w, r6.x -mul.f r0.y, c5.w, r0.y -cmps.f.eq r6.y, r13.z, c4.x -mov.f32f32 r4.z, r4.z -mov.f32f32 r6.x, r6.x -mov.f32f32 r0.y, r0.y -cov.u32f32 r6.y, r6.y -add.f r4.z, r11.x, (neg)r4.z -add.f r3.w, r3.w, (neg)r6.x -add.f r0.y, r8.w, (neg)r0.y -nop -mov.f32f32 r4.z, r4.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.y, r0.y -cmps.f.ne r6.x, r6.y, c4.x -mad.f32 r8.x, r0.z, r0.z, r17.x -sel.b32 r0.x, r10.y, r3.z, r0.x -sel.b32 r4.y, r12.y, r3.z, r4.y -nop -mov.f32f32 r8.x, r8.x -mov.f32f32 r8.w, r2.x -sel.b32 r4.x, r4.w, r6.x, r4.x -sel.b32 r0.x, r10.z, r6.x, r0.x -sel.b32 r4.y, r12.w, r6.x, r4.y -mov.f32f32 r4.w, r8.w -mad.f32 r3.y, r9.w, r5.z, r3.y -sel.b32 r5.z, r11.y, r5.w, r7.x -nop -add.f r4.w, r4.w, (neg)r13.w -mov.f32f32 r3.y, r3.y +sel.b32 r3.x, r3.x, r2.z, r9.z +mov.f32f32 r5.x, r5.x +sel.b32 r5.w, r10.w, r2.z, r5.w +cmps.f.ne r8.x, r8.x, c4.x +sel.b32 r7.x, r11.z, r2.z, r7.x +sel.b32 r9.z, r13.y, r2.z, r10.z +sel.b32 r5.x, r8.y, r9.w, r5.x +sel.b32 r7.w, r7.w, r8.x, r13.x +mov.f32f32 r8.y, c5.z +sel.b32 r5.z, r9.y, r8.x, r5.z +sel.b32 r3.x, r3.x, r8.x, r13.z +add.f r9.y, c15.x, (neg)r7.w +sel.b32 r2.z, r8.y, r2.z, r5.x +mov.f32f32 r5.x, r5.x +add.f r0.z, r0.z, (neg)r7.w +mul.f r7.w, r9.y, r9.y +add.f r8.y, c15.y, (neg)r5.z +mov.f32f32 r5.x, r5.x +mov.f32f32 r9.w, r0.z +add.f r2.x, r2.x, (neg)r5.z +mad.f32 r5.z, r8.y, r8.y, r7.w +add.f r7.w, c15.z, (neg)r3.x +sel.b32 r2.z, r2.z, r8.x, r5.x +mul.f r0.z, r0.z, r9.w +mov.f32f32 r5.x, r2.x +mad.f32 r5.z, r7.w, r7.w, r5.z +mov.f32f32 r10.y, r2.z +cmps.f.eq r10.z, r2.z, c6.y +cmps.f.eq r10.w, r2.z, c4.x +cmps.f.eq r2.z, r2.z, c3.y +add.f r1.y, r1.y, (neg)r3.x +mov.f32f32 r3.x, r10.y +rsq r5.z, r5.z +(ss)mov.f32f32 r11.z, r5.z +mad.f32 r0.z, r2.x, r5.x, r0.z +mov.f32f32 r2.x, r1.y +(ss)mul.f r5.z, r7.w, r5.z +mul.f r7.w, r9.y, r11.z +cmps.f.eq r3.x, r3.x, c5.z +sel.b32 r5.w, r5.w, r8.x, r12.y +mul.f r8.y, r8.y, r11.z +mov.f32f32 r9.y, r7.w +cov.u32f32 r3.x, r3.x +mul.f r7.w, r5.w, r7.w +mov.f32f32 r11.z, r8.y +absneg.f r9.y, (neg)r9.y +cmps.f.ne r3.x, r3.x, c4.x +sel.b32 r6.w, r8.w, r9.x, r6.w +mov.f32f32 r8.w, c12.w +mul.f r9.x, r5.w, r9.y +absneg.f r11.z, (neg)r11.z +sel.b32 r7.x, r7.x, r8.x, r12.w +sel.b32 r8.w, r8.w, r3.x, r2.y +mov.f32f32 r11.w, c12.z +sel.b32 r4.y, r4.z, r3.x, r4.y +mad.f32 r4.z, r7.x, r11.z, r9.x +mov.f32f32 r9.x, r5.z +cmps.f.eq r10.y, r10.y, c5.w +sel.b32 r11.w, r11.w, r3.x, r6.w +mad.f32 r7.w, r7.x, r8.y, r7.w +absneg.f r8.y, (neg)r9.x +sel.b32 r8.x, r9.z, r8.x, r12.z +cov.u32f32 r9.x, r10.y +cov.u32f32 r9.z, r10.w +mov.f32f32 r10.y, c12.z +mad.f32 r4.z, r8.x, r8.y, r4.z +cmps.f.ne r9.x, r9.x, c4.x +mov.f32f32 r10.w, c12.w +mov.f32f32 r12.x, c12.z +mul.f r5.w, r4.z, r5.w +sel.b32 r4.y, r4.w, r9.x, r4.y +sel.b32 r4.w, r10.w, r9.x, r11.w +mul.f r7.x, r4.z, r7.x +mul.f r5.w, c5.w, r5.w +sel.b32 r8.w, r12.x, r9.x, r8.w +cov.u32f32 r10.z, r10.z +mul.f r7.x, c5.w, r7.x +add.f r5.w, r9.y, (neg)r5.w +mad.f32 r0.z, r2.x, r2.x, r0.z +cmps.f.ne r2.x, r10.z, c4.x +mov.f32f32 r9.y, c12.z +mov.f32f32 r10.z, c12.w +add.f r7.x, r11.z, (neg)r7.x +mul.f r4.z, r4.z, r8.x +sel.b32 r1.w, r1.w, r2.x, r4.y +rsq r0.z, r0.z +(ss)mov.f32f32 r4.y, r0.z +sel.b32 r8.w, r10.z, r2.x, r8.w +sel.b32 r4.w, r9.y, r2.x, r4.w +cmps.f.ne r9.y, r9.z, c4.x +mul.f r9.w, r9.w, r4.y +mov.f32f32 r10.z, c12.w +mov.f32f32 r10.w, c12.w +sel.b32 r1.w, r5.y, r9.y, r1.w +mul.f r5.y, r5.w, r9.w +mul.f r4.y, r5.x, r4.y +sel.b32 r5.x, r10.z, r9.y, r8.w +sel.b32 r4.w, r10.w, r9.y, r4.w +sel.b32 r5.w, r6.y, r2.w, r1.w +mad.f32 r4.y, r7.x, r4.y, r5.y +mul.f r4.z, c5.w, r4.z +cov.u32f32 r2.z, r2.z +cmps.f.eq r3.y, r3.y, c5.w +mad.f32 r5.y, r8.x, r5.z, r7.w +add.f r4.z, r8.y, (neg)r4.z +(ss)mul.f r0.z, r1.y, r0.z +cmps.f.ne r1.y, r2.z, c4.x +cov.u32f32 r2.z, r3.y +mov.f32f32 r3.y, r5.y +mad.f32 r0.z, r4.z, r0.z, r4.y +sel.b32 r2.y, r2.y, r1.y, r5.x +sel.b32 r4.y, r6.w, r1.y, r4.w +cmps.f.ne r2.z, r2.z, c4.x +mov.f32f32 r4.z, c12.w +mov.f32f32 r5.z, c12.z +sel.b32 r3.x, r10.y, r3.x, r9.z +log2 r0.z, r0.z +(ss)mul.f r0.z, c15.w, r0.z +sel.b32 r2.y, r4.z, r2.w, r2.y +sel.b32 r4.y, r5.z, r2.w, r4.y +sel.b32 r4.z, r6.z, r2.z, r5.w +mov.f32f32 r5.z, c12.z +mov.f32f32 r5.w, c12.w +cmps.f.eq r6.y, r1.x, c6.y +exp2 r0.z, r0.z +(ss)mad.f32 r1.w, r3.y, r1.w, r0.z +mad.f32 r5.x, r3.y, r5.x, r0.z +mad.f32 r3.y, r3.y, r4.w, r0.z +mov.f32f32 r4.w, c12.w +sel.b32 r1.w, r3.z, r1.y, r1.w +cov.u32f32 r3.z, r6.y +mov.f32f32 r6.y, c13.z +mov.f32f32 r6.z, c13.y +sel.b32 r3.x, r4.w, r9.x, r3.x +cmps.f.ne r3.z, r3.z, c4.x +mov.f32f32 r4.w, c12.w +sel.b32 r5.x, r6.y, r1.y, r5.x +sel.b32 r3.y, r6.z, r1.y, r3.y +mov.f32f32 r6.y, c12.w +sel.b32 r4.z, r4.w, r3.z, r4.z +cmps.f.ne r4.w, r7.z, c4.x +mov.f32f32 r6.z, c12.w +sel.b32 r2.y, r5.z, r2.z, r2.y +sel.b32 r4.y, r5.w, r2.z, r4.y +sel.b32 r2.x, r6.y, r2.x, r3.x +sel.b32 r3.x, r6.z, r4.w, r4.z +mov.f32f32 r4.z, c12.w +mov.f32f32 r5.z, c12.z mov.f32f32 r5.w, c12.z -mul.f r7.x, r13.y, r7.z -mov.f32f32 r4.w, r4.w -mul.f r7.w, r8.z, r7.w -mov.f32f32 r8.z, c12.z -mov.f32f32 r7.x, r7.x -mad.f32 r8.x, r4.w, r4.w, r8.x -sel.b32 r5.z, r5.w, r16.x, r5.z -mov.f32f32 r5.w, r1.x -absneg.f r8.w, (neg)r7.x -sel.b32 r6.y, r8.z, r6.z, r6.y -mov.f32f32 r6.z, r1.x -mad.f32 r5.w, r5.w, r7.x, r7.w -rsq r7.x, r8.x -(ss)mov.f32f32 r7.x, r7.x -mov.f32f32 r7.w, c12.w -mad.f32 r6.z, r6.z, r8.w, r18.x -mov.f32f32 r5.w, r5.w -mul.f r1.y, r1.y, r7.x -mul.f r0.z, r0.z, r7.x -mul.f r4.w, r4.w, r7.x -nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r6.z, r6.z -mul.f r1.y, r4.z, r1.y -mul.f r4.z, r9.y, r7.z -mad.f32 r0.z, r3.w, r0.z, r1.y -sel.b32 r1.y, r7.w, r10.x, r6.y -mov.f32f32 r3.w, c12.w -mad.f32 r1.z, r6.w, r6.w, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r4.z, r4.z -mad.f32 r0.y, r0.y, r4.w, r0.z -mov.f32f32 r0.z, r1.w -sel.b32 r1.y, r3.w, r3.z, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -absneg.f r3.z, (neg)r4.z -mov.f32f32 r3.w, r1.w -mad.f32 r0.z, r0.z, r4.z, r5.w -mov.f32f32 r4.z, c12.z -mov.f32f32 r2.x, r2.x -cmps.f.eq r4.w, r7.y, c3.y -log2 r0.y, r0.y -(ss)mul.f r0.y, c15.w, r0.y -mad.f32 r3.w, r3.w, r3.z, r6.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r3.w, r0.w -mul.f r1.x, r3.w, r1.x -mov.f32f32 r0.z, r0.z -sel.b32 r1.y, r4.z, r6.x, r1.y -add.f r2.x, c5.z, (neg)r2.x -exp2 r0.y, r0.y -(ss)mad.f32 r2.w, r3.y, r2.w, r0.y -mad.f32 r2.y, r3.y, r2.y, r0.y -mad.f32 r3.x, r3.y, r3.x, r0.y -(ss)mad.f32 r0.y, r3.y, r5.z, r0.y -sel.b32 r2.z, r2.z, r8.y, r2.w -sel.b32 r2.y, r5.y, r8.y, r2.y -sel.b32 r2.w, r9.z, r8.y, r3.x -mov.f32f32 r3.x, c13.x -add.f r2.z, r4.x, r2.z -mov.f32f32 r0.w, r0.w -add.f r0.x, r0.x, r2.y -add.f r2.y, r4.y, r2.w -sel.b32 r0.y, r3.x, r8.y, r0.y +add.f r1.w, r3.x, r1.w +mad.f32 r3.x, r0.x, r11.y, r8.z +sel.b32 r2.y, r4.z, r3.z, r2.y +sel.b32 r4.y, r5.z, r3.z, r4.y +sel.b32 r2.x, r5.w, r9.y, r2.x +mul.f r0.y, r3.x, r0.y +mov.f32f32 r4.z, c12.w +mov.f32f32 r5.z, c12.w +(ss)mad.f32 r0.z, r5.y, r2.x, r0.z +mul.f r0.y, c5.w, r0.y +sel.b32 r2.x, r4.z, r4.w, r2.y +sel.b32 r2.y, r5.z, r4.w, r4.y +mov.f32f32 r4.y, c13.x +add.f r0.y, r11.x, (neg)r0.y +mov.f32f32 r4.z, r6.x +add.f r2.x, r2.x, r5.x +add.f r2.y, r2.y, r3.y +sel.b32 r0.z, r4.y, r1.y, r0.z +mad.f32 r1.y, r4.z, r4.z, r4.x +mov.f32f32 r3.y, c12.z +mul.f r0.w, r3.x, r0.w +mul.f r0.x, r3.x, r0.x +mov.f32f32 r3.x, r1.z +sel.b32 r2.w, r3.y, r2.w, r7.z +mov.f32f32 r3.y, c12.w +rsq r1.y, r1.y +(ss)mov.f32f32 r4.x, r1.y mul.f r0.w, c5.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.w, r3.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.w, r0.w -add.f r0.y, r1.y, r0.y -mul.f r1.x, c5.w, r1.x -mov.f32f32 r1.y, r1.w -add.f r0.w, r17.w, (neg)r0.w -mad.f32 r1.z, r2.x, r2.x, r1.z -cov.u32f32 r1.w, r4.w +mul.f r0.x, c5.w, r0.x nop -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.y, c5.w, r1.y -cmps.f.ne r1.w, r1.w, c4.x -rsq r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -add.f r1.x, r8.w, (neg)r1.x -mov.f32f32 r1.y, r1.y +mul.f r3.w, r3.w, r4.x +sel.b32 r2.z, r3.y, r2.z, r2.w +mov.f32f32 r2.w, c12.w +add.f r0.w, r10.x, (neg)r0.w +mul.f r0.y, r0.y, r3.w +mul.f r3.y, r7.y, r4.x +sel.b32 r2.z, r2.w, r3.z, r2.z +mov.f32f32 r2.w, c12.z +add.f r0.x, r11.y, (neg)r0.x +mad.f32 r0.y, r0.w, r3.y, r0.y +mul.f r0.w, r6.x, r1.y +(ss)sel.b32 r1.y, r2.w, r4.w, r2.z +cmps.f.eq r1.x, r1.x, c3.y +mov.f32f32 r2.z, c13.w +mad.f32 r0.x, r0.x, r0.w, r0.y +add.f r0.y, r1.y, r0.z +cov.u32f32 r0.z, r1.x +(rpt3)nop +log2 r0.x, r0.x +(ss)mul.f r0.x, c15.w, r0.x +cmps.f.ne r0.z, r0.z, c4.x +mov.f32f32 r0.w, c13.z +mov.f32f32 r1.x, c13.y mov.f32f32 r2.w, c13.x -mul.f r3.x, r12.z, r1.z -mov.f32f32 r1.x, r1.x -add.f r1.y, r3.z, (neg)r1.y -nop -mov.f32f32 r3.x, r3.x -mul.f r3.y, r6.w, r1.z -mul.f r1.z, r2.x, r1.z -nop -mul.f r0.w, r0.w, r3.x -mov.f32f32 r2.x, r3.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -nop -mad.f32 r0.w, r1.x, r2.x, r0.w -(rpt2)nop -mov.f32f32 r0.w, r0.w -nop -mad.f32 r0.w, r1.y, r1.z, r0.w -(rpt2)nop -mov.f32f32 r0.w, r0.w -(rpt5)nop -log2 r0.w, r0.w -(ss)mul.f r0.w, c15.w, r0.w -(rpt2)nop -mov.f32f32 r0.w, r0.w -(rpt5)nop -exp2 r0.w, r0.w -(ss)mad.f32 r1.x, r0.z, r2.z, r0.w -mad.f32 r0.x, r0.z, r0.x, r0.w -mad.f32 r1.y, r0.z, r2.y, r0.w -mad.f32 r0.y, r0.z, r0.y, r0.w -sel.b32 r0.z, r5.x, r1.w, r1.x -sel.b32 r0.x, r10.w, r1.w, r0.x -(ss)sel.b32 r0.w, r13.x, r1.w, r1.y -sel.b32 r0.y, r2.w, r1.w, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.x -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.y +(rpt1)nop +exp2 r0.x, r0.x +(ss)mad.f32 r1.y, r3.x, r1.w, r0.x +mad.f32 r2.x, r3.x, r2.x, r0.x +mad.f32 r2.y, r3.x, r2.y, r0.x +(ss)mad.f32 r0.x, r1.z, r0.y, r0.x +sel.b32 r1.w, r2.z, r0.z, r1.y +sel.b32 r1.z, r0.w, r0.z, r2.x +sel.b32 r1.y, r1.x, r0.z, r2.y +sel.b32 r1.x, r2.w, r0.z, r0.x end nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 1344 instructions, 0 half, 21 full +; FRAG: 934 instructions, 0 half, 17 full diff --git a/reference/gmaps-frag.asm b/reference/gmaps-frag.asm index a658d94..b3a1bb0 100644 --- a/reference/gmaps-frag.asm +++ b/reference/gmaps-frag.asm @@ -6,79 +6,57 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x3f000000, 0x3f800000, 0x40000000 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 6, r0.x -bary.f r1.x, 7, r0.x -bary.f r1.y, 5, r0.x +bary.f r1.x, 6, r0.x +bary.f r1.y, 7, r0.x +bary.f r0.w, 5, r0.x mul.f r1.z, c0.x, r0.z -mov.f32f32 r2.x, r0.z -cmps.f.lt r2.z, c1.x, r0.w -mul.f r1.w, c0.x, r1.y -mov.f32f32 r2.y, r1.y -min.f r0.w, r0.w, r1.x -cov.u32f32 r2.z, r2.z -cmps.f.lt r1.x, c1.x, r1.x -mul.f r2.w, c1.w, r0.z -add.f r0.w, r0.w, c1.y -dsy (f32)(xy)r3.x, r1.z -(sy)(ss)mov.f32f32 r1.z, r3.x -mov.f32f32 r1.w, r3.y -dsx (f32)(xy)r3.x, r2.x -cov.u32f32 r1.x, r1.x -(sy)(ss)mov.f32f32 r2.x, r3.x -mad.f32 r1.z, r2.w, r1.z, (neg)r1.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r3.y -mul.f r1.x, r2.z, r1.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.w, r2.w, r2.x, (neg)r1.w -min.f r0.w, r0.w, c1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w +mul.f r2.x, c1.w, r0.z +cmps.f.lt r2.y, c1.x, r1.x +mul.f r1.w, c0.x, r0.w +min.f r1.x, r1.x, r1.y +cmps.f.lt r1.y, c1.x, r1.y +dsx (f32)(xy)r2.z, r0.z +(sy)mad.f32 r2.z, r2.x, r2.z, (neg)r2.w +cov.u32f32 r2.y, r2.y +add.f r1.x, r1.x, c1.y +cov.u32f32 r1.y, r1.y +dsy (f32)(xy)r2.w, r1.z +(sy)(ss)mad.f32 r1.z, r2.x, r2.w, (neg)r3.x +mov.f32f32 r1.w, r2.z +min.f r1.x, r1.x, c1.z +mad.f32 r0.z, r0.z, r0.z, (neg)r0.w +mov.f32f32 r0.w, r1.z +mul.f r1.w, r2.z, r1.w +mul.f r1.y, r2.y, r1.y +nop mov.f32f32 r0.w, r0.w -mad.f32 r0.z, r0.z, r0.z, (neg)r1.y -bary.f r1.y, 3, r0.x -mul.f r1.w, r1.w, r1.w -cmps.f.ne r1.x, r1.x, c1.x -mad.f32 r1.z, r1.z, r1.z, r1.w -bary.f r1.w, 2, r0.x -bary.f r2.x, 1, r0.x +bary.f r2.x, 3, r0.x +bary.f r2.y, 2, r0.x +bary.f r2.z, 1, r0.x +mad.f32 r0.w, r1.z, r0.w, r1.w +cmps.f.ne r1.y, r1.y, c1.x bary.f (ei)r0.x, 0, r0.x -mov.f32f32 r0.y, r1.z -(rpt5)nop -rsq r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -(rpt2)nop -mul.f r0.y, r0.z, r0.y -(rpt2)nop -mov.f32f32 r0.y, r0.y +(rpt3)nop +rsq r0.y, r0.w +(ss)mul.f r0.y, r0.z, r0.y (rpt2)nop add.f r0.y, c1.y, (neg)r0.y (rpt2)nop -mov.f32f32 r0.y, r0.y -(rpt2)nop max.f r0.y, r0.y, c1.x (rpt2)nop min.f r0.y, r0.y, c1.z (rpt2)nop -mov.f32f32 r0.y, r0.y +sel.b32 r0.y, r1.x, r1.y, r0.y (rpt2)nop -sel.b32 r0.y, r0.w, r1.x, r0.y -(rpt2)nop -mul.f r0.z, r1.y, r0.y -mul.f r0.w, r1.w, r0.y -mul.f r1.x, r2.x, r0.y -mul.f r0.x, r0.x, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x +mul.f r1.w, r2.x, r0.y +mul.f r1.z, r2.y, r0.y +mul.f r1.y, r2.z, r0.y +mul.f r1.x, r0.x, r0.y end -nop -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r0.y (5:21,cm=f,il=12,b=1) -; FRAG: 92 instructions, 0 half, 4 full +; FRAG: inputs: r1.y (5:20,cm=f,il=8,b=1) r0.z (5:21,cm=f,il=12,b=1) +; FRAG: 61 instructions, 0 half, 4 full diff --git a/reference/idiv-vert.asm b/reference/idiv-vert.asm index 292cf79..bb5adce 100644 --- a/reference/idiv-vert.asm +++ b/reference/idiv-vert.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r0.x) in0 +@in(r0.y) in1 +@in(r0.z) in2 +@in(r0.w) in3 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -12,90 +12,89 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)cov.s32f32 r0.x, c1.x -cov.s32f32 r0.y, c1.y -mov.f32f32 r0.z, c0.y -mov.f32f32 r0.w, c0.x -absneg.f r0.x, (abs)r0.x -absneg.f r0.y, (abs)r0.y -xor.b r0.z, r0.z, c1.y -xor.b r0.w, r0.w, c1.x +@const(c3.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c4.x) 0xfffffffe, 0x0000001f, 0x00000000, 0x00000000 +(sy)(ss)cov.s32f32 r1.x, c1.x +cov.s32f32 r1.y, c1.y +mov.f32f32 r1.z, c0.y +mov.f32f32 r1.w, c0.x +absneg.f r1.x, (abs)r1.x +absneg.f r1.y, (abs)r1.y +xor.b r1.z, r1.z, c1.y +xor.b r1.w, r1.w, c1.x cov.s32f32 r2.x, c0.y cov.s32f32 r2.y, c0.x mov.f32f32 r2.z, c3.y -rcp r0.x, r0.x -(ss)add.u r0.x, r0.x, c4.x -rcp r0.y, r0.y -(ss)add.u r2.w, r0.y, c4.x -(ss)absneg.f r0.y, (abs)r2.y +rcp r1.x, r1.x +(ss)add.u r1.x, r1.x, c4.x +rcp r1.y, r1.y +(ss)add.u r1.y, r1.y, c4.x +absneg.f r2.y, (abs)r2.y absneg.f r2.x, (abs)r2.x -shr.b r2.y, r0.z, c4.y -shr.b r3.x, r0.w, c4.y -mul.f r0.y, r0.y, r0.x -mul.f r0.z, r2.x, r2.w -mov.f32f32 r2.x, c3.y -mov.f32f32 r3.y, c3.x -cov.f32s32 r3.z, r0.y -absneg.s r3.w, (abs)c1.x -cov.f32s32 r4.x, r0.z -absneg.s r4.y, (abs)c1.y -mov.f32f32 r4.z, c3.x -mull.u r0.y, r3.z, r3.w -mov.f32f32 r4.w, c3.x -madsh.m16 r0.y, r3.z, r3.w, r0.y -mull.u r0.z, r4.x, r4.y -madsh.m16 r0.y, r3.w, r3.z, r0.y -absneg.s r5.x, (abs)c0.x -madsh.m16 r0.z, r4.x, r4.y, r0.z +shr.b r1.z, r1.z, c4.y +shr.b r1.w, r1.w, c4.y +mul.f r2.y, r2.y, r1.x +mul.f r2.x, r2.x, r1.y +mov.f32f32 r2.w, c3.y +mov.f32f32 r3.x, c3.x +cov.f32s32 r2.y, r2.y +absneg.s r3.y, (abs)c1.x +cov.f32s32 r2.x, r2.x +absneg.s r3.z, (abs)c1.y +mov.f32f32 r3.w, c3.x +mull.u r4.x, r2.y, r3.y +mov.f32f32 r4.y, c3.x +madsh.m16 r4.x, r2.y, r3.y, r4.x +mull.u r4.z, r2.x, r3.z +madsh.m16 r4.x, r3.y, r2.y, r4.x +absneg.s r4.w, (abs)c0.x +madsh.m16 r4.z, r2.x, r3.z, r4.z +mov.f32f32 r5.x, c3.y mov.f32f32 r5.y, c3.y -mov.f32f32 r5.z, c3.y -sub.u r0.y, r5.x, r0.y -madsh.m16 r5.w, r4.y, r4.x, r0.z -absneg.s r6.x, (abs)c0.y -mov.f32f32 r6.y, c3.x -cov.u32f32 r6.z, r0.y -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.z, r1.z -mov.f32f32 r0.y, r1.y -mul.f r1.y, r6.z, r0.x -sub.u r1.z, r6.x, r5.w -mov.f32f32 r0.x, r1.x -nop -cov.f32u32 r1.x, r1.y -cov.u32f32 r1.y, r1.z +sub.u r4.x, r4.w, r4.x +madsh.m16 r4.z, r3.z, r2.x, r4.z +absneg.s r5.z, (abs)c0.y +mov.f32f32 r5.w, c3.x +cov.u32f32 r4.x, r4.x +(rpt2)nop +mul.f r1.x, r4.x, r1.x +sub.u r4.x, r5.z, r4.z (rpt1)nop -add.u r1.x, r3.z, r1.x -mul.f r1.y, r1.y, r2.w +cov.f32u32 r1.x, r1.x +cov.u32f32 r4.x, r4.x (rpt1)nop -mull.u r1.z, r1.x, r3.w +add.u r1.x, r2.y, r1.x +mul.f r1.y, r4.x, r1.y +(rpt1)nop +mull.u r2.y, r1.x, r3.y cov.f32u32 r1.y, r1.y -madsh.m16 r1.z, r1.x, r3.w, r1.z +madsh.m16 r2.y, r1.x, r3.y, r2.y nop -madsh.m16 r1.z, r3.w, r1.x, r1.z -add.u r1.y, r4.x, r1.y +madsh.m16 r2.y, r3.y, r1.x, r2.y +add.u r1.y, r2.x, r1.y (rpt1)nop -sub.u r1.z, r5.x, r1.z -mull.u r1.w, r1.y, r4.y +sub.u r2.x, r4.w, r2.y +mull.u r2.y, r1.y, r3.z (rpt1)nop -cmps.u.ge r1.z, r1.z, r3.w -madsh.m16 r1.w, r1.y, r4.y, r1.w +cmps.u.ge r2.x, r2.x, r3.y +madsh.m16 r2.y, r1.y, r3.z, r2.y (rpt1)nop -add.u r1.x, r1.x, r1.z -madsh.m16 r1.z, r4.y, r1.y, r1.w +add.u r1.x, r1.x, r2.x +madsh.m16 r2.x, r3.z, r1.y, r2.y (rpt1)nop -absneg.s r1.w, (neg)r1.x -sub.u r1.z, r6.x, r1.z +absneg.s r2.y, (neg)r1.x +sub.u r2.x, r5.z, r2.x (rpt1)nop -sel.b32 r1.x, r1.w, r3.x, r1.x -cmps.u.ge r1.z, r1.z, r4.y +sel.b32 r1.x, r2.y, r1.w, r1.x +cmps.u.ge r1.w, r2.x, r3.z (rpt1)nop cmps.u.eq r1.x, r1.x, c2.x -add.u r1.y, r1.y, r1.z +add.u r1.y, r1.y, r1.w (rpt1)nop absneg.s r1.x, (neg)r1.x -absneg.s r1.z, (neg)r1.y +absneg.s r1.w, (neg)r1.y (rpt2)nop -sel.b32 r1.y, r1.z, r2.y, r1.y +sel.b32 r1.y, r1.w, r1.z, r1.y (rpt2)nop cmps.u.eq r1.y, r1.y, c2.y (rpt2)nop @@ -105,18 +104,13 @@ and.b r1.x, r1.x, r1.y (rpt2)nop cmps.u.ne r1.x, r1.x, c3.x (rpt2)nop -sel.b32 r1.y, r2.x, r1.x, r2.z -sel.b32 r1.z, r4.z, r1.x, r3.y -sel.b32 r2.x, r5.y, r1.x, r4.w -sel.b32 r1.x, r6.y, r1.x, r5.z -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r2.x -mov.f32f32 r1.x, r1.x +sel.b32 r1.w, r2.w, r1.x, r2.z +sel.b32 r1.z, r3.w, r1.x, r3.x +sel.b32 r1.y, r5.x, r1.x, r4.y +sel.b32 r1.x, r5.w, r1.x, r5.y end nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) -; VERT: 123 instructions, 0 half, 7 full +; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) +; VERT: 119 instructions, 0 half, 6 full diff --git a/reference/jellyfish-frag.asm b/reference/jellyfish-frag.asm index 38d65d6..e29f320 100644 --- a/reference/jellyfish-frag.asm +++ b/reference/jellyfish-frag.asm @@ -6,411 +6,226 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x3daa9931, 0x3caa64c3, 0x3c2c0831, 0xbf800000 +@const(c9.x) 0x3e4ccccd, 0x3e99999a, 0x3f000000, 0xbf333333 +@const(c10.x) 0x41200000, 0xbdcccccd, 0xc0200000, 0x3fcccccd +@const(c11.x) 0x40000000, 0x00000000, 0x3f800000, 0x3eaaa64c +@const(c12.x) 0x40400000, 0x40800000, 0x40a00000, 0x40c00000 +@const(c13.x) 0x40e00000, 0x41000000, 0x00000000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 8, r0.x -bary.f r0.w, 2, r0.x -bary.f r1.x, 1, r0.x -bary.f r1.y, 11, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.z, r0.z -add.f r0.w, r0.w, (neg)r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r1.y, r1.z -bary.f r1.z, 9, r0.x -mul.f r0.w, r0.w, c8.z -mov.f32f32 r1.w, c2.x +mov.f32f32 r1.x, c7.x +bary.f r1.w, 11, r0.x +bary.f r1.y, 2, r0.x mov.f32f32 r2.x, r0.z -mov.f32f32 r0.z, r1.z -mov.f32f32 r0.w, r0.w -mul.f r1.w, r1.w, c2.w -mov.f32f32 r2.y, r1.z -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.z, r0.w -add.f r0.w, r1.x, r1.w -bary.f r2.z, 12, r0.x -mov.f32f32 r2.w, c8.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.x, c2.y -mov.f32f32 r0.z, r0.z +bary.f r0.w, 9, r0.x +cmps.f.eq r1.x, r1.x, c13.y +mov.f32f32 r1.z, r1.w +mov.f32f32 r2.z, c2.x +mov.f32f32 r2.y, r0.w +cov.u32f32 r1.x, r1.x +bary.f r2.w, 1, r0.x +mul.f r2.z, r2.z, c2.w +mov.f32f32 r3.x, c8.w +cmps.f.ne r1.x, r1.x, c11.y mov.f32f32 r3.y, c8.w -samb (f32)(xyzw)r3.z, r1.y, r2.w, s#2, t#2 -(sy)(ss)add.f r1.y, (neg)r4.y, c10.y -mov.f32f32 r1.z, c6.x -mov.f32f32 r4.w, r0.z -bary.f r0.z, 0, r0.x -mul.f r2.w, r3.x, c2.w -mul.f r1.z, r1.z, c9.x -samb (f32)(xyzw)r5.x, r2.x, r3.y, s#0, t#0 -(ss)add.f r2.x, r4.y, c6.x -mul.f r0.z, r0.z, c8.y -mov.f32f32 r2.y, c8.x -add.f r1.y, r1.y, (neg)r1.z -add.f r3.x, r2.z, r2.w -(sy)mov.f32f32 r3.y, r5.w -mad.f32 r0.z, c0.x, r2.y, r0.z -mov.f32f32 r1.y, r1.y -add.f r0.w, r0.w, r3.x -bary.f r2.y, 13, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.x, c7.x -mul.f r2.x, r2.x, c9.y -mov.f32f32 r5.w, r5.w +add.f r3.z, r1.z, r2.z +mov.f32f32 r3.w, c2.y +add.f r1.y, r1.y, (neg)r2.w +mov.f32f32 r2.w, c6.x +samb (f32)(xyzw)r4.x, r2.x, r3.x, s#2, t#2 +(sy)(ss)add.f r2.x, (neg)r4.w, c10.y +mul.f r2.y, r3.w, c2.w +bary.f r3.x, 12, r0.x +mul.f r2.w, r2.w, c9.x +samb (f32)(xyzw)r5.x, r0.z, r3.y, s#0, t#0 +(ss)mov.f32f32 r0.z, c11.z +mul.f r6.y, r1.y, c8.z +add.f r0.w, r4.w, c6.x +add.f r1.y, r2.x, (neg)r2.w +(sy)sel.b32 r0.z, r0.z, r1.x, r5.w +add.f r2.x, r3.x, r2.y +mov.f32f32 r3.y, c7.x +bary.f r3.w, 0, r0.x +mul.f r0.w, r0.w, c9.y +mov.f32f32 r6.z, r5.w sin r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, c2.z -mul.f r1.y, r1.y, r3.z -mov.f32f32 r0.z, r0.z -cmps.f.eq r3.x, r3.x, c13.y -mul.f r3.z, r4.z, c2.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r4.z, r0.z -cov.u32f32 r0.z, r3.x -add.f r3.x, r2.y, r3.z +(ss)mul.f r1.y, r1.y, r4.x +cmps.f.eq r3.y, r3.y, c13.x +add.f r2.x, r3.z, r2.x +mov.f32f32 r3.z, c2.z mul.f r1.y, r1.y, c10.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r5.w, r5.w -add.f r0.w, r0.w, r3.x -mov.f32f32 r1.y, r1.y -add.f r3.x, (neg)r4.y, c9.w -sam (f32)(xyz)r6.x, r4.z, s#1, t#1 -(sy)(ss)mad.f32 r4.z, c1.w, r6.y, r2.z -mad.f32 r4.w, c1.w, r6.x, r1.x -mad.f32 r6.w, c1.w, r6.z, r2.y -add.f r3.x, r3.x, (neg)r1.z -add.f r4.z, r4.z, r2.w -add.f r4.w, r4.w, r1.w -add.f r6.w, r6.w, r3.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r6.w, r6.w -mov.f32f32 r6.z, r6.z -mov.f32f32 r6.y, r6.y -bary.f r7.x, 16, r0.x -sin r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -bary.f r7.y, 15, r0.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r7.z, r7.x -mul.f r3.x, r3.x, r3.w -mad.f32 r3.w, r6.w, r5.z, r7.z -mov.f32f32 r6.w, r7.y -bary.f r7.w, 14, r0.x +add.f r4.x, (neg)r4.w, c9.w +cov.u32f32 r3.y, r3.y +mul.f r3.z, r3.z, c2.w +bary.f r6.w, 13, r0.x +add.f r4.x, r4.x, (neg)r2.w +cmps.f.ne r3.y, r3.y, c11.y +mov.f32f32 r6.x, c11.z +add.f r7.x, r6.w, r3.z +mul.f r3.w, r3.w, c8.y +sin r0.w, r0.w +mov.f32f32 r7.y, c8.x +sel.b32 r0.z, r6.x, r3.y, r0.z +sin r4.x, r4.x +(ss)mul.f r4.x, r4.x, r4.y +mov.f32f32 r4.y, c7.x +add.f r2.x, r2.x, r7.x +mad.f32 r6.x, c0.x, r7.y, r3.w +mad.f32 r1.y, c10.x, r4.x, r1.y +add.f r3.w, (neg)r4.w, c10.z +cmps.f.eq r4.x, r4.y, c12.w +mul.f r2.x, r2.x, c11.w +mul.f r0.w, r0.w, c9.z +add.f r3.w, r3.w, (neg)r2.w +cov.u32f32 r4.x, r4.x +add.f r2.x, c11.z, (neg)r2.x +sam (f32)(xyz)r7.x, r6.x, s#1, t#1 +mov.f32f32 r4.y, r0.w mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.w -mad.f32 r4.z, r4.z, r5.y, r6.w -mov.f32f32 r8.x, r7.w -mad.f32 r1.y, c10.x, r3.x, r1.y -bary.f r3.x, 7, r0.x -mov.f32f32 r4.z, r4.z -mad.f32 r4.w, r4.w, r5.x, r8.x -mov.f32f32 r1.y, r1.y -add.f r4.y, (neg)r4.y, c10.z -add.f r8.y, c14.y, (neg)r3.x -add.f r8.z, c14.y, (neg)r3.x -mov.f32f32 r4.w, r4.w -add.f r4.y, r4.y, (neg)r1.z -mul.f r3.w, r8.y, r3.w -mul.f r4.z, r8.z, r4.z -add.f r8.y, c14.y, (neg)r3.x -mov.f32f32 r4.y, r4.y -bary.f r8.z, 6, r0.x -bary.f r8.w, 5, r0.x -mul.f r4.w, r8.y, r4.w -mov.f32f32 r6.z, r6.z -mov.f32f32 r6.y, r6.y -bary.f r8.y, 4, r0.x -sin r4.y, r4.y -(ss)mov.f32f32 r4.y, r4.y -mul.f r8.z, r3.x, r8.z -mul.f r8.w, r3.x, r8.w -mul.f r3.x, r3.x, r8.y -mul.f r4.x, r4.y, r4.x -add.f r3.w, r8.z, r3.w -add.f r4.y, r8.w, r4.z -add.f r3.x, r3.x, r4.w -mov.f32f32 r4.x, r4.x -add.f r1.z, c10.w, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.z, r6.z -mov.f32f32 r4.w, r6.y -mov.f32f32 r1.z, r1.z -mul.f r0.w, r0.w, c11.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r6.x, r6.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.z, r4.z -cos r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r6.x, r6.x -add.f r0.w, c11.z, (neg)r0.w -mul.f r1.z, r7.x, r1.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r6.x, r6.x -mov.f32f32 r1.z, r1.z -add.f r6.y, r7.w, r7.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.w, r4.w -mad.f32 r1.z, c11.x, r1.z, r6.y -mov.f32f32 r6.x, r6.x -cmps.f.ne r0.z, r0.z, c11.y -mov.f32f32 r6.y, c11.z -mov.f32f32 r1.z, r1.z -log2 r0.w, r0.w -(ss)mul.f r0.w, c12.x, r0.w -mad.f32 r1.z, c10.x, r4.x, r1.z -mov.f32f32 r4.x, r6.x -mov.f32f32 r6.x, r6.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.x, r4.x -sel.b32 r3.y, r6.x, r0.z, r3.y -mov.f32f32 r6.x, c7.x -add.f r1.y, r1.y, r1.z -mov.f32f32 r1.z, r4.x -exp2 r0.w, r0.w -(ss)add.f r4.x, c14.y, (neg)r0.w -add.f r6.y, c14.y, (neg)r0.w -mov.f32f32 r1.y, r1.y -add.f r6.z, c14.y, (neg)r0.w -(rpt1)nop +cmps.f.ne r4.x, r4.x, c11.y +mov.f32f32 r4.w, c11.z +sin r3.w, r3.w +(ss)mul.f r3.w, r3.w, r4.z +add.f r2.w, c10.w, r2.w +log2 r2.x, r2.x +(ss)mul.f r2.x, c12.x, r2.x +sel.b32 r0.z, r4.w, r4.x, r0.z +mov.f32f32 r4.z, c7.x +mov.f32f32 r4.w, r6.w +mul.f r6.x, r5.z, r4.y +(sy)mad.f32 r6.y, c1.w, r7.z, r4.w +cos r2.w, r2.w +bary.f r6.w, 16, r0.x +cmps.f.eq r4.z, r4.z, c12.z +exp2 r2.x, r2.x +(ss)add.f r7.w, c14.y, (neg)r2.x +add.f r8.x, c14.y, (neg)r2.x +(ss)mul.f r2.w, r6.w, r2.w +bary.f r8.y, 14, r0.x +bary.f r8.z, 15, r0.x +cov.u32f32 r4.z, r4.z +add.f r6.y, r6.y, r3.z +mad.f32 r8.w, c1.w, r7.y, r3.x +add.f r9.x, r8.y, r8.z +cmps.f.ne r4.z, r4.z, c11.y +mad.f32 r2.w, c11.x, r2.w, r9.x +mov.f32f32 r9.x, c11.z +mad.f32 r2.w, c10.x, r3.w, r2.w +mov.f32f32 r3.w, r6.w +add.f r8.w, r8.w, r2.y +add.f r9.y, c14.y, (neg)r2.x +add.f r1.y, r1.y, r2.w +sel.b32 r0.z, r9.x, r4.z, r0.z +mad.f32 r2.w, r6.y, r5.z, r3.w +mov.f32f32 r3.w, r8.z max.f r1.y, r1.y, c11.y -mul.f r6.y, r6.y, r3.w -mul.f r4.x, r4.x, r4.y -mul.f r6.z, r6.z, r3.x -mov.f32f32 r1.y, r1.y -sin r2.x, r2.x -(ss)mov.f32f32 r2.x, r2.x -cmps.f.eq r6.x, r6.x, c13.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r7.x, r5.z -mul.f r2.x, r2.x, c9.z -cov.u32f32 r6.x, r6.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r7.x, r7.x -mov.f32f32 r2.x, r2.x -cmps.f.ne r6.x, r6.x, c11.y -mov.f32f32 r7.y, c11.z -mov.f32f32 r5.w, r5.w -mul.f r5.z, r5.z, r2.x -mul.f r7.w, r5.y, r2.x -mul.f r2.x, r5.x, r2.x -mov.f32f32 r7.y, r7.y -mov.f32f32 r5.z, r5.z -mov.f32f32 r7.w, r7.w -mov.f32f32 r2.x, r2.x -sel.b32 r3.y, r7.y, r6.x, r3.y -mul.f r5.z, r5.z, r1.y -mul.f r7.y, r7.w, r1.y -mul.f r1.y, r2.x, r1.y -mov.f32f32 r2.x, c7.x -mov.f32f32 r5.z, r5.z -mov.f32f32 r7.y, r7.y -mov.f32f32 r1.y, r1.y -cmps.f.eq r2.x, r2.x, c12.w -mov.f32f32 r7.w, r5.z -mov.f32f32 r8.y, r7.y -mov.f32f32 r8.z, r1.y -add.f r3.w, r3.w, r5.z -mov.f32f32 r5.z, r7.w -mov.f32f32 r7.w, r8.y -mov.f32f32 r8.y, r8.z -mul.f r3.w, r0.w, r3.w -mov.f32f32 r5.z, r5.z -mov.f32f32 r7.w, r7.w -mov.f32f32 r8.y, r8.y -add.f r3.w, r3.w, r6.y -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.y, r7.w -mov.f32f32 r7.w, r8.y -add.f r4.y, r4.y, r7.y -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.y, r6.y -mov.f32f32 r7.y, r7.w -mul.f r4.y, r0.w, r4.y -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.y, r6.y -mov.f32f32 r7.y, r7.y -add.f r4.x, r4.y, r4.x -mov.f32f32 r4.y, r5.z -mov.f32f32 r5.z, r6.y -mov.f32f32 r6.y, r7.y -add.f r1.y, r3.x, r1.y -mov.f32f32 r3.x, r4.y -mov.f32f32 r4.y, r5.z -mov.f32f32 r5.z, r6.y +mul.f r0.w, r5.x, r0.w +mov.f32f32 r6.y, c7.x +bary.f r9.x, 7, r0.x +mov.f32f32 r9.z, r1.y +mul.f r4.y, r5.y, r4.y mul.f r0.w, r0.w, r1.y -mov.f32f32 r1.y, r3.x -mov.f32f32 r3.x, r4.y -mov.f32f32 r4.y, r5.z -add.f r0.w, r0.w, r6.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r3.x, r3.x +cmps.f.eq r1.y, r6.y, c12.y +mul.f r6.x, r6.x, r9.z +mul.f r4.y, r4.y, r9.z +mov.f32f32 r6.y, r0.w +mad.f32 r1.z, c1.w, r7.x, r1.z +mov.f32f32 r9.z, r6.x +add.f r9.w, c14.y, (neg)r9.x +mov.f32f32 r10.x, r4.y +mad.f32 r3.w, r8.w, r5.y, r3.w +add.f r1.z, r1.z, r2.z +mul.f r2.w, r9.w, r2.w +bary.f r8.w, 6, r0.x +add.f r9.w, c14.y, (neg)r9.x +mov.f32f32 r10.y, r8.y +cov.u32f32 r1.y, r1.y +mul.f r8.w, r9.x, r8.w +mul.f r3.w, r9.w, r3.w +bary.f r9.w, 5, r0.x +mad.f32 r1.z, r1.z, r5.x, r10.y +add.f r2.w, r8.w, r2.w +add.f r8.w, c14.y, (neg)r9.x +mul.f r9.w, r9.x, r9.w +cmps.f.ne r1.y, r1.y, c11.y +add.f r9.z, r2.w, r9.z +mul.f r1.z, r8.w, r1.z +add.f r3.w, r9.w, r3.w +bary.f r8.w, 4, r0.x +mul.f r9.z, r2.x, r9.z +mul.f r2.w, r8.x, r2.w +add.f r8.x, r3.w, r10.x +mul.f r8.w, r9.x, r8.w +mov.f32f32 r9.x, c11.z +add.f r2.w, r9.z, r2.w +mul.f r8.x, r2.x, r8.x +mul.f r3.w, r7.w, r3.w +add.f r1.z, r8.w, r1.z +sel.b32 r2.w, r6.x, r1.x, r2.w +mov.f32f32 r6.x, r6.z +add.f r3.w, r8.x, r3.w +add.f r6.y, r1.z, r6.y +sel.b32 r0.z, r9.x, r1.y, r0.z +mov.f32f32 r6.x, r6.x +sel.b32 r3.w, r4.y, r1.x, r3.w +mul.f r2.x, r2.x, r6.y +mul.f r1.z, r9.y, r1.z +mov.f32f32 r4.y, r6.x +mov.f32f32 r6.x, c7.x +mov.f32f32 r6.y, c7.x +mov.f32f32 r6.z, c7.x mov.f32f32 r4.y, r4.y -cov.u32f32 r2.x, r2.x -sel.b32 r1.y, r1.y, r0.z, r3.w -mov.f32f32 r3.w, r5.w -sel.b32 r3.x, r3.x, r0.z, r4.x -sel.b32 r0.z, r4.y, r0.z, r0.w -cmps.f.ne r0.w, r2.x, c11.y -mov.f32f32 r2.x, r3.w -mov.f32f32 r4.x, r3.w -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.y, c11.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r3.w, r3.w +add.f r1.z, r2.x, r1.z +cmps.f.eq r2.x, r6.x, c12.x +cmps.f.eq r6.x, r6.y, c11.x mov.f32f32 r4.y, r4.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r3.w, r3.w -sel.b32 r3.y, r4.y, r0.w, r3.y -sel.b32 r1.y, r2.x, r6.x, r1.y -mov.f32f32 r2.x, r7.x -sel.b32 r3.x, r4.x, r6.x, r3.x -sel.b32 r0.z, r3.w, r6.x, r0.z -mov.f32f32 r3.w, r5.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.x, r5.x -mov.f32f32 r4.y, c7.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.x, r4.x -cmps.f.eq r4.y, r4.y, c12.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.x, r4.x -cov.u32f32 r4.y, r4.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.x, r4.x -cmps.f.ne r4.y, r4.y, c11.y -mov.f32f32 r5.x, c11.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.x, r5.x -sel.b32 r1.y, r2.x, r0.w, r1.y -mov.f32f32 r2.x, r3.w -mov.f32f32 r3.w, r4.x -sel.b32 r3.y, r5.x, r4.y, r3.y -sel.b32 r1.y, r4.z, r4.y, r1.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.x, c7.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.w, r3.w -cmps.f.eq r4.x, r4.x, c12.y -mov.f32f32 r3.z, r3.z -sel.b32 r2.x, r2.x, r0.w, r3.x -sel.b32 r0.z, r3.w, r0.w, r0.z -cov.u32f32 r0.w, r4.x -mov.f32f32 r3.x, r3.z -sel.b32 r2.x, r4.w, r4.y, r2.x -sel.b32 r0.z, r1.z, r4.y, r0.z -mov.f32f32 r1.z, r2.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r1.w, r1.w -cmps.f.ne r0.w, r0.w, c11.y -mov.f32f32 r3.x, c11.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.x, r3.x -sel.b32 r1.y, r2.w, r0.w, r1.y -mov.f32f32 r2.w, r7.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -sel.b32 r3.x, r3.x, r0.w, r3.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.y, c7.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -cmps.f.eq r3.y, r3.y, c12.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -cov.u32f32 r3.y, r3.y -mov.f32f32 r2.w, r2.w -sel.b32 r1.z, r1.z, r0.w, r2.x -sel.b32 r0.z, r1.w, r0.w, r0.z -cmps.f.ne r0.w, r3.y, c11.y -mov.f32f32 r1.w, r6.w -mov.f32f32 r2.x, r8.x +sel.b32 r0.w, r0.w, r1.x, r1.z +cov.u32f32 r1.x, r2.x +cov.u32f32 r1.z, r6.x +sel.b32 r2.x, r4.y, r3.y, r2.w +sel.b32 r2.w, r4.y, r3.y, r3.w +sel.b32 r0.w, r5.w, r3.y, r0.w +cmps.f.ne r1.x, r1.x, c11.y +sel.b32 r2.x, r5.z, r4.x, r2.x +sel.b32 r2.w, r5.y, r4.x, r2.w +sel.b32 r0.w, r5.x, r4.x, r0.w mov.f32f32 r3.y, c11.z -sel.b32 r1.y, r2.w, r0.w, r1.y -mov.f32f32 r2.w, c7.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.y -cmps.f.eq r2.w, r2.w, c11.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -sel.b32 r3.x, r3.y, r0.w, r3.x -cov.u32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -cmps.f.ne r2.w, r2.w, c11.y -bary.f r3.y, 19, r0.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, c11.z -mov.f32f32 r3.y, r3.y -sel.b32 r1.z, r1.w, r0.w, r1.z -sel.b32 r0.z, r2.x, r0.w, r0.z -bary.f r0.w, 18, r0.x -mov.f32f32 r1.w, r3.y +sel.b32 r2.x, r7.z, r4.z, r2.x +sel.b32 r2.w, r7.y, r4.z, r2.w +sel.b32 r0.w, r7.x, r4.z, r0.w +sel.b32 r0.z, r3.y, r1.x, r0.z +sel.b32 r2.x, r3.z, r1.y, r2.x +sel.b32 r2.y, r2.y, r1.y, r2.w +sel.b32 r0.w, r2.z, r1.y, r0.w +cmps.f.ne r2.z, r1.z, c11.y +sel.b32 r1.y, r6.w, r1.x, r2.x +bary.f r1.z, 19, r0.x +sel.b32 r2.x, r8.z, r1.x, r2.y +bary.f r2.y, 18, r0.x +sel.b32 r0.w, r8.y, r1.x, r0.w +sel.b32 r1.x, r1.z, r2.z, r1.y +cmps.f.eq r1.y, r6.z, c11.z +sel.b32 r2.x, r2.y, r2.z, r2.x +(rpt1)nop +cov.u32f32 r1.y, r1.y bary.f (ei)r0.x, 17, r0.x -mov.f32f32 r0.y, r3.z -mov.f32f32 r2.x, r2.y -sel.b32 r1.y, r1.w, r2.w, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r2.x -mov.f32f32 r2.x, c7.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -sel.b32 r0.y, r0.y, r2.w, r3.x -cmps.f.eq r2.x, r2.x, c11.z -sel.b32 r0.w, r0.w, r2.w, r1.z -sel.b32 r0.x, r0.x, r2.w, r0.z -mov.f32f32 r0.z, r2.z -cov.u32f32 r1.z, r2.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.x, c11.z -nop -cmps.f.ne r2.y, r1.z, c11.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r2.x -sel.b32 r1.y, r1.w, r2.y, r1.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -sel.b32 r0.y, r1.z, r2.y, r0.y -mov.f32f32 r1.z, r1.y -sel.b32 r0.z, r0.z, r2.y, r0.w -sel.b32 r0.x, r1.x, r2.y, r0.x +mov.f32f32 r0.y, c11.z +mov.f32f32 r2.y, c11.z +cmps.f.ne r2.w, r1.y, c11.y +sel.b32 r0.x, r0.x, r2.z, r0.w (rpt1)nop -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x -mov.f32f32 r1.w, r0.y +sel.b32 r1.z, r4.w, r2.w, r1.x +sel.b32 r1.y, r3.x, r2.w, r2.x +sel.b32 r1.x, r1.w, r2.w, r0.x +sel.b32 r0.x, r0.y, r2.z, r0.z +(rpt2)nop +sel.b32 r1.w, r2.y, r2.w, r0.x end nop nop nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r2.y (5:21,cm=f,il=12,b=1) r0.z (5:22,cm=f,il=16,b=1) r6.z (5:23,cm=f,il=20,b=1) r2.z (5:24,cm=f,il=24,b=1) -; FRAG: 403 instructions, 0 half, 9 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r8.y (5:21,cm=f,il=12,b=1) r1.x (5:22,cm=f,il=16,b=1) r7.w (5:23,cm=f,il=20,b=1) r0.w (5:24,cm=f,il=24,b=1) +; FRAG: 213 instructions, 0 half, 11 full diff --git a/reference/maniadrive/maniadrive-01.asm b/reference/maniadrive/maniadrive-01.asm index 2c03e4f..284e180 100644 --- a/reference/maniadrive/maniadrive-01.asm +++ b/reference/maniadrive/maniadrive-01.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/maniadrive/maniadrive-02.asm b/reference/maniadrive/maniadrive-02.asm index 31895e9..ac30adb 100644 --- a/reference/maniadrive/maniadrive-02.asm +++ b/reference/maniadrive/maniadrive-02.asm @@ -6,43 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 8, r0.x -bary.f r0.w, 11, r0.x -bary.f r1.x, 9, r0.x +bary.f r0.w, 9, r0.x +bary.f r1.x, 11, r0.x bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x +bary.f r1.w, 1, r0.x +bary.f r2.x, 0, r0.x bary.f r1.z, 6, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -add.f r0.z, r1.y, r1.z -bary.f r0.w, 1, r0.x -bary.f r1.x, 0, r0.x -bary.f r1.y, 5, r0.x +bary.f r2.y, 5, r0.x bary.f r2.z, 4, r0.x +sam.p (f32)(w)r2.w, r0.z, s#0, t#0 bary.f (ei)r0.x, 3, r0.x -sam.p (f32)(w)r2.w, r1.w, s#0, t#0 -mov.f32f32 r1.z, r0.z -add.f r0.y, r0.w, r1.y -add.f r0.z, r1.x, r2.z -(sy)mul.f r0.x, r3.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -(ss)mov.f32f32 r1.w, r0.x +add.f r1.z, r1.y, r1.z +add.f r1.y, r1.w, r2.y +(ss)add.f r1.x, r2.x, r2.z +(sy)mul.f r1.w, r3.z, r0.x end -nop -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) r0.x (5:0,cm=f,il=16,b=1) -; FRAG: 39 instructions, 0 half, 4 full +; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) r0.y (5:0,cm=f,il=16,b=1) +; FRAG: 16 instructions, 0 half, 4 full diff --git a/reference/maniadrive/maniadrive-03.asm b/reference/maniadrive/maniadrive-03.asm index 90cb336..cb658a2 100644 --- a/reference/maniadrive/maniadrive-03.asm +++ b/reference/maniadrive/maniadrive-03.asm @@ -1,34 +1,34 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 -@in(r3.w) in3 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r6.w) in11 -@in(r7.x) in12 -@in(r7.y) in13 -@in(r7.z) in14 -@in(r7.w) in15 -@in(r0.w) in16 -@in(r1.x) in17 -@in(r1.y) in18 -@in(r2.x) in20 -@in(r2.y) in21 -@in(r2.z) in22 -@in(r5.x) in24 -@in(r5.y) in25 -@in(r5.z) in26 +@in(r7.x) in8 +@in(r7.y) in9 +@in(r7.z) in10 +@in(r7.w) in11 +@in(r3.x) in12 +@in(r3.y) in13 +@in(r3.z) in14 +@in(r3.w) in15 +@in(r1.x) in16 +@in(r1.y) in17 +@in(r1.z) in18 +@in(r1.w) in20 +@in(r2.x) in21 +@in(r2.y) in22 +@in(r8.x) in24 +@in(r8.y) in25 +@in(r8.z) in26 @in(r4.x) in28 @in(r4.y) in29 @in(r4.z) in30 -@in(r1.z) in32 -@in(r8.x) in36 +@in(r2.z) in32 +@in(r0.w) in36 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -53,231 +53,197 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)mul.f r1.w, r3.x, c10.x -mov.f32f32 r2.w, c18.y -mad.f32 r1.w, c11.x, r3.y, r1.w -mov.f32f32 r4.w, r6.w -mad.f32 r1.w, c12.x, r3.z, r1.w -mov.f32f32 r2.w, r2.w -mad.f32 r1.w, c13.x, r3.w, r1.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r5.w, c18.y -mov.f32f32 r6.w, r6.w -add.f r8.y, c9.x, (neg)r1.w -max.f r1.w, r2.w, c18.x -max.f r2.w, r4.w, c18.x -mov.f32f32 r8.z, r5.w -mul.f r8.w, r8.y, r8.y -mul.f r4.w, r3.x, c10.y -min.f r5.w, r1.w, c18.y -mad.f32 r1.w, c11.y, r3.y, r4.w -min.f r4.w, r2.w, c18.y -mad.f32 r1.w, c12.y, r3.z, r1.w -max.f r2.w, r8.z, c18.x -mad.f32 r1.w, c13.y, r3.w, r1.w -mov.f32f32 r6.w, r6.w -mul.f r8.z, r3.x, c10.z -mul.f r9.x, r0.x, c4.x -add.f r9.y, c9.y, (neg)r1.w -min.f r2.w, r2.w, c18.y -max.f r1.w, r6.w, c18.x -mad.f32 r6.w, c11.z, r3.y, r8.z -mad.f32 r8.z, r9.y, r9.y, r8.w -mad.f32 r6.w, c12.z, r3.z, r6.w -mad.f32 r8.w, c4.y, r0.y, r9.x -mul.f r9.x, r0.x, c5.x -mov.f32f32 r8.z, r8.z -mad.f32 r6.w, c13.z, r3.w, r6.w -min.f r1.w, r1.w, c18.y -mov.f32f32 r8.w, r8.w -mad.f32 r9.x, c5.y, r0.y, r9.x -add.f r6.w, c9.z, (neg)r6.w -mad.f32 r8.w, c4.z, r0.z, r8.w +@const(c18.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r2.w, r6.x, c10.x +mul.f r4.w, r6.x, c10.y +mad.f32 r2.w, c11.x, r6.y, r2.w +mad.f32 r4.w, c11.y, r6.y, r4.w +mad.f32 r2.w, c12.x, r6.z, r2.w +mad.f32 r4.w, c12.y, r6.z, r4.w +mad.f32 r2.w, c13.x, r6.w, r2.w +mad.f32 r4.w, c13.y, r6.w, r4.w +mul.f r5.x, r6.x, c10.z +mov.f32f32 r5.y, c18.y +add.f r2.w, c9.x, (neg)r2.w +add.f r4.w, c9.y, (neg)r4.w +mad.f32 r5.x, c11.z, r6.y, r5.x +max.f r5.y, r5.y, c18.x +mul.f r5.z, r2.w, r2.w +mad.f32 r5.x, c12.z, r6.z, r5.x +mad.f32 r5.z, r4.w, r4.w, r5.z +mad.f32 r5.x, c13.z, r6.w, r5.x +min.f r5.w, r5.y, c18.y +mov.f32f32 r5.y, c18.y +mul.f r8.w, r6.x, c0.w +add.f r5.x, c9.z, (neg)r5.x +mad.f32 r8.w, c1.w, r6.y, r8.w +mul.f r9.x, r6.x, c0.z +mul.f r9.y, r6.x, c0.y +mad.f32 r5.z, r5.x, r5.x, r5.z +max.f r5.y, r5.y, c18.x +mad.f32 r8.w, c2.w, r6.z, r8.w +mad.f32 r9.x, c1.z, r6.y, r9.x +mad.f32 r9.y, c1.y, r6.y, r9.y +mul.f r6.x, r6.x, c0.x +absneg.f r9.z, (neg)r0.w +rsq r0.w, (abs)r5.z +(ss)mul.f r2.w, r2.w, r0.w +(ss)rcp r5.z, r0.w +(ss)mov.f32f32 r9.w, r5.z +mul.f r10.x, r0.x, c4.x +rsq r10.y, (abs)r0.w +(ss)mul.f r4.w, r4.w, r10.y +add.f r10.y, r2.w, c18.x +mul.f r10.z, r0.w, r9.w +mad.f32 r10.x, c4.y, r0.y, r10.x +add.f r10.w, r4.w, c18.x +mul.f r11.x, r10.y, r10.y +mul.f r10.z, c14.x, r10.z +mad.f32 r10.x, c4.z, r0.z, r10.x +mad.f32 r11.x, r10.w, r10.w, r11.x +rsq r11.y, (abs)r0.w +(ss)mul.f r5.x, r5.x, r11.y +mad.f32 r9.w, c14.y, r9.w, r10.z +(ss)rcp r0.w, r0.w +(ss)mul.f r5.z, r0.w, r5.z +mul.f r10.z, r10.x, r2.w +add.f r11.y, r5.x, c18.y +mul.f r11.z, r0.x, c5.x +min.f r2.w, r5.y, c18.y +(ss)mad.f32 r0.w, c3.w, r6.w, r8.w +mad.f32 r5.y, r11.y, r11.y, r11.x +mad.f32 r5.z, c14.z, r5.z, r9.w +mad.f32 r8.w, c5.y, r0.y, r11.z +mad.f32 r9.x, c2.z, r6.z, r9.x +mad.f32 r9.y, c2.y, r6.z, r9.y +mad.f32 r6.x, c1.x, r6.y, r6.x +min.f r6.y, (neg)r9.z, c19.z +rsq r5.y, (abs)r5.y +(ss)mov.f32f32 r9.z, r5.y +mov.f32f32 r9.w, r5.z +rcp r11.x, r5.z +(ss)mad.f32 r5.z, c5.z, r0.z, r8.w +mul.f r5.y, r11.y, r5.y +mul.f r8.w, r10.y, r9.z +mul.f r9.z, r10.w, r9.z +mad.f32 r4.w, r5.z, r4.w, r10.z mul.f r0.x, r0.x, c6.x -mov.f32f32 r9.x, r9.x -mad.f32 r8.z, r6.w, r6.w, r8.z -mad.f32 r9.x, c5.z, r0.z, r9.x +mul.f r8.w, r10.x, r8.w +rcp r10.x, r9.w +mov.f32f32 r10.y, c19.y +mad.f32 r5.z, r5.z, r9.z, r8.w mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r3.x, c0.w -mul.f r9.z, r3.x, c0.z -mul.f r9.w, r3.x, c0.y -mul.f r3.x, r3.x, c0.x -rsq r8.z, (abs)r8.z -(ss)mul.f r8.y, r8.y, r8.z -rcp r10.x, r8.z -(ss)mov.f32f32 r10.x, r10.x -rcp r10.y, r8.z -(ss)mov.f32f32 r10.y, r10.y -rsq r10.z, (abs)r8.z -(ss)mul.f r9.y, r9.y, r10.z -add.f r10.z, r8.y, c18.x -mul.f r10.w, r8.z, r10.x -mul.f r8.y, r8.w, r8.y -mul.f r10.y, r10.y, r10.x -mul.f r11.x, r10.z, r10.z -add.f r11.y, r9.y, c18.x -mov.f32f32 r10.w, r10.w -mad.f32 r8.y, r9.x, r9.y, r8.y -mov.f32f32 r9.y, r10.y -mad.f32 r10.y, r11.y, r11.y, r11.x -mul.f r10.w, c14.x, r10.w -mov.f32f32 r8.y, r8.y -(ss)rsq r8.z, (abs)r8.z -mad.f32 r10.x, c14.y, r10.x, r10.w -mov.f32f32 r10.y, r10.y -(ss)mul.f r6.w, r6.w, r8.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r8.z, r10.x +mov.f32f32 r0.y, c19.y mad.f32 r0.x, c6.z, r0.z, r0.x -add.f r0.z, r6.w, c18.y -mad.f32 r8.z, c14.z, r9.y, r8.z -mad.f32 r0.y, c1.w, r3.y, r0.y -mad.f32 r9.y, c1.z, r3.y, r9.z -mad.f32 r9.z, r0.z, r0.z, r10.y -mov.f32f32 r8.z, r8.z -mad.f32 r6.w, r0.x, r6.w, r8.y -mad.f32 r0.y, c2.w, r3.z, r0.y -mad.f32 r8.y, c2.z, r3.z, r9.y -mad.f32 r9.y, c1.y, r3.y, r9.w -mad.f32 r3.x, c1.x, r3.y, r3.x -rsq r3.y, (abs)r9.z -(ss)mov.f32f32 r3.y, r3.y -(ss)rcp r9.z, r8.z -mov.f32f32 r9.w, c19.y -mov.f32f32 r10.x, c19.y -rcp r10.y, r8.z -max.f r10.w, (neg)r6.w, c19.x -mul.f r10.z, r10.z, r3.y -(ss)mul.f r9.w, r9.w, r9.z -mul.f r11.x, c15.y, r6.y -mul.f r11.z, c15.x, r6.x -mul.f r8.w, r8.w, r10.z -mul.f r10.z, r11.y, r3.y -mul.f r11.y, c15.z, r6.z -mad.f32 r4.y, c7.y, r6.y, r4.y -mad.f32 r4.x, c7.x, r6.x, r4.x -mad.f32 r8.w, r9.x, r10.z, r8.w -mad.f32 r4.z, c7.z, r6.z, r4.z -mad.f32 r4.y, r9.w, r11.x, r4.y -mad.f32 r4.x, r9.w, r11.z, r4.x -mov.f32f32 r8.w, r8.w -mul.f r0.z, r0.z, r3.y -mad.f32 r3.y, r9.w, r11.y, r4.z -mov.f32f32 r4.z, r10.w -mul.f r9.x, r10.x, r9.z -mad.f32 r0.x, r0.x, r0.z, r8.w -mul.f r0.z, c15.z, r6.z -mul.f r8.w, c15.y, r6.y -mul.f r9.z, c15.x, r6.x -max.f r9.w, (neg)r0.x, c19.x -max.f r0.x, r0.x, c19.x -mul.f r4.z, r4.z, r10.y -mul.f r10.x, c16.y, r6.y -mul.f r10.z, c16.x, r6.x -mad.f32 r5.z, c7.z, r6.z, r5.z -mad.f32 r5.y, c7.y, r6.y, r5.y -log2 r9.w, r9.w -(ss)mov.f32f32 r9.w, r9.w -absneg.f r8.x, (neg)r8.x -log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mul.f r10.w, c16.z, r6.z -min.f r8.x, (neg)r8.x, c19.z -mad.f32 r4.y, r4.z, r10.x, r4.y -mad.f32 r4.x, r4.z, r10.z, r4.x -mad.f32 r0.z, r9.x, r0.z, r5.z -mul.f r5.z, r8.x, r9.w -min.f r1.z, r1.z, c19.z -mad.f32 r3.y, r4.z, r10.w, r3.y -max.f r4.y, r4.y, c18.x -mov.f32f32 r4.z, r5.z -mul.f r0.x, r1.z, r0.x -max.f r1.z, r3.y, c18.x -min.f r4.y, r4.y, c18.y -max.f r3.y, r4.x, c18.x -max.f r5.z, r6.w, c19.x -mad.f32 r8.x, r9.x, r8.w, r5.y -exp2 r4.x, r4.z -(ss)mov.f32f32 r5.y, r4.x -cmps.f.lt r8.w, (neg)r6.w, c18.x -mov.f32f32 r0.x, r0.x -(ss)min.f r4.z, r1.z, c18.y -min.f r4.x, r3.y, c18.y -sel.b32 r1.z, r5.y, r8.w, c19.x -mov.f32f32 r3.y, r5.z -mad.f32 r5.x, c7.x, r6.x, r5.x -rcp r5.y, r8.z -cmps.f.lt r5.z, (neg)r6.w, c18.x -mov.f32f32 r1.z, r1.z -exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mul.f r3.y, r3.y, r10.y -mul.f r6.z, c16.z, r6.z -mov.f32f32 r1.z, r1.z -sel.b32 r0.x, r0.x, r5.z, c19.x -mul.f r6.y, c16.y, r6.y -mad.f32 r6.w, r9.x, r9.z, r5.x -mul.f r1.z, r1.z, r5.y -mul.f r2.z, c17.z, r2.z -mov.f32f32 r5.x, c18.x -mul.f r2.y, c17.y, r2.y -mul.f r2.x, c17.x, r2.x -mad.f32 r2.z, r1.z, r2.z, r5.x -mov.f32f32 r5.x, c18.x +(ss)mul.f r0.z, r10.y, r10.x +mul.f r8.w, c15.y, r7.y +mul.f r9.z, c15.x, r7.x +mad.f32 r5.y, r0.x, r5.y, r5.z +mul.f r5.z, c15.z, r7.z +mad.f32 r4.y, c7.y, r7.y, r4.y +mad.f32 r4.x, c7.x, r7.x, r4.x +max.f r10.y, (neg)r5.y, c19.x +max.f r5.y, r5.y, c19.x +mad.f32 r4.z, c7.z, r7.z, r4.z +mad.f32 r4.y, r0.z, r8.w, r4.y +mad.f32 r4.x, r0.z, r9.z, r4.x +mul.f r0.y, r0.y, r10.x +mul.f r8.w, c15.y, r7.y +log2 r9.z, r10.y +(ss)mul.f r6.y, r6.y, r9.z +log2 r5.y, r5.y +min.f r2.z, r2.z, c19.z +mad.f32 r0.z, r0.z, r5.z, r4.z +mad.f32 r0.x, r0.x, r5.x, r4.w +mul.f r4.z, c15.z, r7.z +mad.f32 r4.w, c7.y, r7.y, r8.y +(ss)mul.f r2.z, r2.z, r5.y +exp2 r5.x, r6.y +(ss)cmps.f.lt r5.y, (neg)r0.x, c18.x +max.f r5.z, (neg)r0.x, c19.x +mad.f32 r6.y, c7.z, r7.z, r8.z +mad.f32 r4.w, r0.y, r8.w, r4.w +(ss)sel.b32 r5.x, r5.x, r5.y, c19.x +rcp r8.y, r9.w +mul.f r8.z, r5.z, r11.x +mul.f r8.w, c16.y, r7.y +mul.f r9.z, c16.x, r7.x +(ss)mul.f r5.x, r5.x, r8.y +mul.f r2.y, c17.z, r2.y +mov.f32f32 r5.y, c18.x +mul.f r2.x, c17.y, r2.x +mul.f r1.w, c17.x, r1.w +mad.f32 r2.y, r5.x, r2.y, r5.y +mov.f32f32 r5.y, c18.x mov.f32f32 r5.z, c18.x -mov.f32f32 r0.x, r0.x -max.f r2.z, r2.z, c18.x -mad.f32 r2.y, r1.z, r2.y, r5.x -mad.f32 r1.z, r1.z, r2.x, r5.z -mov.f32f32 r0.x, r0.x -min.f r5.z, r2.z, c18.y -max.f r2.x, r2.y, c18.x -max.f r1.z, r1.z, c18.x -mul.f r0.x, r0.x, r5.y -mul.f r1.x, c17.y, r1.x +exp2 r2.z, r2.z +mad.f32 r2.x, r5.x, r2.x, r5.y +max.f r2.y, r2.y, c18.x +mad.f32 r1.w, r5.x, r1.w, r5.z +cmps.f.lt r5.x, (neg)r0.x, c18.x +max.f r2.x, r2.x, c18.x +min.f r5.z, r2.y, c18.y +max.f r1.w, r1.w, c18.x +(ss)sel.b32 r2.y, r2.z, r5.x, c19.x min.f r5.y, r2.x, c18.y -min.f r5.x, r1.z, c18.y -mul.f r1.y, c17.z, r1.y -mov.f32f32 r1.z, c18.x -mov.f32f32 r2.x, c18.x -mul.f r0.w, c17.x, r0.w -mad.f32 r1.y, r0.x, r1.y, r1.z -mad.f32 r1.x, r0.x, r1.x, r2.x -mov.f32f32 r1.z, c18.x -mad.f32 r0.z, r3.y, r6.z, r0.z +mul.f r2.x, c16.z, r7.z +min.f r5.x, r1.w, c18.y +mul.f r1.w, r2.y, r8.y +mul.f r1.z, c17.z, r1.z +mov.f32f32 r2.y, c18.x +mul.f r1.y, c17.y, r1.y +mul.f r1.x, c17.x, r1.x +mad.f32 r1.z, r1.w, r1.z, r2.y +mov.f32f32 r2.y, c18.x +(ss)mov.f32f32 r2.z, c18.x +mad.f32 r1.y, r1.w, r1.y, r2.y +max.f r1.z, r1.z, c18.x +mad.f32 r1.x, r1.w, r1.x, r2.z +mad.f32 r0.z, r8.z, r2.x, r0.z +mad.f32 r1.w, r8.z, r8.w, r4.y +min.f r2.z, r1.z, c18.y max.f r1.y, r1.y, c18.x max.f r1.x, r1.x, c18.x -mad.f32 r0.x, r0.x, r0.w, r1.z +(rpt1)nop +min.f r2.y, r1.y, c18.y +min.f r2.x, r1.x, c18.y max.f r0.z, r0.z, c18.x -min.f r2.z, r1.y, c18.y -min.f r2.y, r1.x, c18.y -max.f r0.x, r0.x, c18.x -min.f r1.z, r0.z, c18.y -mad.f32 r0.z, r3.y, r6.y, r8.x -mul.f r1.x, c16.x, r6.x -min.f r2.x, r0.x, c18.y -mad.f32 r0.w, c3.w, r3.w, r0.y -max.f r0.x, r0.z, c18.x -mad.f32 r0.y, r3.y, r1.x, r6.w -mad.f32 r0.z, c3.z, r3.w, r8.y -mad.f32 r1.x, c2.y, r3.z, r9.y -min.f r1.y, r0.x, c18.y -max.f r0.x, r0.y, c18.x -mad.f32 r0.y, c3.y, r3.w, r1.x -mad.f32 r3.x, c2.x, r3.z, r3.x +max.f r1.x, r1.w, c18.x +mad.f32 r1.y, r8.z, r9.z, r4.x +mad.f32 r1.z, r0.y, r4.z, r6.y +min.f r4.z, r0.z, c18.y +min.f r4.y, r1.x, c18.y +max.f r0.z, r1.y, c18.x +max.f r0.x, r0.x, c19.x +mul.f r1.x, c15.x, r7.x +mad.f32 r1.y, c7.x, r7.x, r8.x +min.f r4.x, r0.z, c18.y +mul.f r0.x, r0.x, r11.x +mul.f r0.z, c16.z, r7.z +mul.f r1.w, c16.y, r7.y +mad.f32 r1.x, r0.y, r1.x, r1.y +mul.f r1.y, c16.x, r7.x +mad.f32 r1.z, r0.x, r0.z, r1.z +mad.f32 r1.w, r0.x, r1.w, r4.w +mad.f32 r0.z, c3.z, r6.w, r9.x +mad.f32 r0.y, c3.y, r6.w, r9.y +max.f r1.z, r1.z, c18.x +max.f r1.w, r1.w, c18.x +mad.f32 r0.x, r0.x, r1.y, r1.x +mad.f32 r1.x, c2.x, r6.z, r6.x +min.f r1.z, r1.z, c18.y +min.f r1.y, r1.w, c18.y +max.f r1.w, r0.x, c18.x +mad.f32 r0.x, c3.x, r6.w, r1.x +max.f r4.w, r7.w, c18.x +max.f r6.x, r7.w, c18.x +min.f r1.x, r1.w, c18.y nop -min.f r1.x, r0.x, c18.y -mad.f32 r0.x, c3.x, r3.w, r3.x -mov.f32f32 r3.w, r7.w -mov.f32f32 r3.z, r7.z -mov.f32f32 r3.y, r7.y -mov.f32f32 r3.x, r7.x +min.f r4.w, r4.w, c18.y +min.f r1.w, r6.x, c18.y end -nop -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (5:0) r4.x (2:0) r5.x (2:1) -; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r6.x (0:0,cm=f,il=16,b=0) r7.x (0:0,cm=f,il=20,b=0) r0.w (0:0,cm=7,il=24,b=0) r2.x (0:0,cm=7,il=28,b=0) r5.x (0:0,cm=7,il=32,b=0) r4.x (0:0,cm=7,il=36,b=0) r1.z (0:0,cm=1,il=40,b=0) r8.x (0:0,cm=1,il=44,b=0) -; VERT: 221 instructions, 0 half, 12 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r7.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) r1.x (0:0,cm=7,il=24,b=0) r1.w (0:0,cm=7,il=28,b=0) r8.x (0:0,cm=7,il=32,b=0) r4.x (0:0,cm=7,il=36,b=0) r2.z (0:0,cm=1,il=40,b=0) r0.w (0:0,cm=1,il=44,b=0) +; VERT: 189 instructions, 0 half, 12 full diff --git a/reference/maniadrive/maniadrive-04.asm b/reference/maniadrive/maniadrive-04.asm index 694d656..59b8c44 100644 --- a/reference/maniadrive/maniadrive-04.asm +++ b/reference/maniadrive/maniadrive-04.asm @@ -6,31 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 3, r0.x -bary.f (ei)r0.x, 1, r0.x -nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.w, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +bary.f r0.w, 1, r0.x +bary.f (ei)r1.x, 3, r0.x (rpt5)nop -sam.p (f32)(xyzw)r0.x, r0.w, s#0, t#0 -(sy)(ss)mul.f r0.w, r0.w, c4.w -mul.f r0.z, r0.z, c4.z -mul.f r0.y, r0.y, c4.y -mul.f r0.x, r0.x, c4.x -mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +sam.p (f32)(xyzw)r0.x, r0.z, s#0, t#0 +(sy)mul.f r1.w, r0.w, c4.w +mul.f r1.z, r0.z, c4.z +mul.f r1.y, r0.y, c4.y +(ss)mul.f r1.x, r0.x, c4.x end nop nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) -; FRAG: 27 instructions, 0 half, 2 full +; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1) +; FRAG: 15 instructions, 0 half, 2 full diff --git a/reference/maniadrive/maniadrive-06.asm b/reference/maniadrive/maniadrive-06.asm index 2e8a6fe..0251eb7 100644 --- a/reference/maniadrive/maniadrive-06.asm +++ b/reference/maniadrive/maniadrive-06.asm @@ -6,35 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 3, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, c4.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r0.x, c4.y -mov.f32f32 r0.y, c4.x -(rpt3)nop -sam.p (f32)(w)r0.z, r1.w, s#0, t#0 -(sy)mul.f r0.z, r1.y, c4.w -mov.f32f32 r1.y, r0.x -mov.f32f32 r1.x, r0.y -nop -mov.f32f32 r0.x, r0.z -(rpt2)nop -mov.f32f32 r0.x, r0.x +@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +(sy)(ss)bary.f r1.w, 0, r0.x +bary.f r2.x, 1, r0.x +bary.f (ei)r2.y, 3, r0.x +mov.f32f32 r1.z, c4.z +mov.f32f32 r1.y, c4.y +mov.f32f32 r1.x, c4.x (rpt2)nop -(ss)mov.f32f32 r1.w, r0.x +sam.p (f32)(w)r0.x, r1.w, s#0, t#0 +(sy)(ss)mul.f r1.w, r0.w, c4.w end nop nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) -; FRAG: 32 instructions, 0 half, 3 full +; FRAG: inputs: r1.z (5:0,cm=f,il=8,b=1) +; FRAG: 12 instructions, 0 half, 3 full diff --git a/reference/maniadrive/maniadrive-07.asm b/reference/maniadrive/maniadrive-07.asm index 0a1e555..e61b89b 100644 --- a/reference/maniadrive/maniadrive-07.asm +++ b/reference/maniadrive/maniadrive-07.asm @@ -6,39 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x -bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -mov.f32f32 r1.z, r1.y -bary.f r0.z, 1, r0.x -bary.f r0.w, 0, r0.x -bary.f (ei)r0.x, 3, r0.x +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +(sy)(ss)bary.f r1.w, 4, r0.x +bary.f r2.x, 5, r0.x +bary.f r2.y, 7, r0.x +bary.f r0.z, 3, r0.x +bary.f r1.z, 2, r0.x +bary.f r1.y, 1, r0.x +bary.f (ei)r1.x, 0, r0.x (rpt1)nop sam.p (f32)(w)r1.w, r1.w, s#0, t#0 -mov.f32f32 r0.y, r0.z -(sy)mul.f r0.x, r2.z, r0.x -mov.f32f32 r0.z, r0.w -(rpt1)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z -nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -(ss)mov.f32f32 r1.w, r0.x +(sy)(ss)mul.f r1.w, r2.z, r0.z end nop -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 33 instructions, 0 half, 3 full +; FRAG: inputs: r63.w (1:0,cm=f,il=8,b=1) r1.z (5:0,cm=f,il=12,b=1) +; FRAG: 12 instructions, 0 half, 3 full diff --git a/reference/maniadrive/maniadrive-08.asm b/reference/maniadrive/maniadrive-08.asm index cca09e5..0583b5d 100644 --- a/reference/maniadrive/maniadrive-08.asm +++ b/reference/maniadrive/maniadrive-08.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r3.x) in4 +@in(r3.y) in5 +@in(r3.z) in6 +@in(r3.w) in7 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,43 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r3.w, c4.x +max.f r1.y, r3.z, c4.x +max.f r3.y, r3.y, c4.x +max.f r3.x, r3.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r3.y, c4.y +min.f r1.x, r3.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 33 instructions, 0 half, 4 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 25 instructions, 0 half, 4 full diff --git a/reference/maniadrive/maniadrive-09.asm b/reference/maniadrive/maniadrive-09.asm index 6a4268b..cc94e0f 100644 --- a/reference/maniadrive/maniadrive-09.asm +++ b/reference/maniadrive/maniadrive-09.asm @@ -6,39 +6,32 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 8, r0.x -bary.f r0.w, 11, r0.x -bary.f r1.x, 9, r0.x +bary.f r0.w, 9, r0.x +bary.f r1.x, 11, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 6, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 2, r0.x -bary.f r0.w, 5, r0.x -bary.f r1.x, 1, r0.x +bary.f r1.w, 2, r0.x +bary.f r2.x, 5, r0.x +bary.f r2.y, 1, r0.x bary.f r2.z, 4, r0.x +sam.p (f32)(xyzw)r2.w, r0.z, s#0, t#0 +(sy)(ss)mul.f r0.z, r3.y, r1.w bary.f (ei)r0.x, 0, r0.x -nop -sam.p (f32)(xyzw)r2.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r3.z, r1.y -mul.f r0.z, r3.y, r0.z -mul.f r1.x, r3.x, r1.x +mul.f r1.w, r3.z, r1.y +mul.f r0.y, r3.x, r2.y +add.f r1.z, r0.z, r1.z mul.f r0.x, r2.w, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.z, r1.z -add.f r0.w, r1.x, r0.w -add.f r0.x, r0.x, r2.z -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +nop +add.f r1.y, r0.y, r2.x +nop +add.f r1.x, r0.x, r2.z end nop +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) r0.x (5:0,cm=f,il=16,b=1) -; FRAG: 31 instructions, 0 half, 4 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) r0.y (5:0,cm=f,il=16,b=1) +; FRAG: 21 instructions, 0 half, 4 full diff --git a/reference/maniadrive/maniadrive-10.asm b/reference/maniadrive/maniadrive-10.asm index cdcbf04..0babf82 100644 --- a/reference/maniadrive/maniadrive-10.asm +++ b/reference/maniadrive/maniadrive-10.asm @@ -4,10 +4,10 @@ @in(r6.y) in1 @in(r6.z) in2 @in(r6.w) in3 -@in(r7.x) in4 -@in(r7.y) in5 -@in(r7.z) in6 -@in(r7.w) in7 +@in(r3.x) in4 +@in(r3.y) in5 +@in(r3.z) in6 +@in(r3.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -32,219 +32,193 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 +@const(c25.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r6.x, c14.x -mov.f32f32 r0.y, c25.y +mul.f r0.y, r6.x, c14.y mad.f32 r0.x, c15.x, r6.y, r0.x -mov.f32f32 r0.z, c12.w +mad.f32 r0.y, c15.y, r6.y, r0.y mad.f32 r0.x, c16.x, r6.z, r0.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, c16.y, r6.z, r0.y mad.f32 r0.x, c17.x, r6.w, r0.x -mov.f32f32 r0.z, r0.z +mad.f32 r0.y, c17.y, r6.w, r0.y +mul.f r0.z, r6.x, c14.z mov.f32f32 r0.w, c25.y -mov.f32f32 r1.x, c9.w add.f r0.x, c13.x, (neg)r0.x -max.f r0.y, r0.y, c25.x -max.f r0.z, r0.z, c25.x -mov.f32f32 r0.w, r0.w -mul.f r1.y, r0.x, r0.x -mul.f r1.z, r6.x, c14.y -min.f r5.w, r0.y, c25.y -mad.f32 r0.y, c15.y, r6.y, r1.z -min.f r4.w, r0.z, c25.y -mad.f32 r0.y, c16.y, r6.z, r0.y -max.f r0.z, r0.w, c25.x -mad.f32 r0.y, c17.y, r6.w, r0.y -mov.f32f32 r0.w, r1.x -mov.f32f32 r1.x, c4.x -mov.f32f32 r1.z, c4.x add.f r0.y, c13.y, (neg)r0.y -min.f r2.w, r0.z, c25.y -max.f r0.z, r0.w, c25.x -mul.f r0.w, r1.x, c5.x -mad.f32 r1.x, r0.y, r0.y, r1.y -mov.f32f32 r1.y, c5.y -mul.f r1.z, r1.z, c6.x -mov.f32f32 r2.x, c4.x -mov.f32f32 r1.x, r1.x -mul.f r2.y, r6.x, c14.z -min.f r1.w, r0.z, c25.y -mad.f32 r0.z, c15.z, r6.y, r2.y -mad.f32 r0.w, c4.y, r1.y, r0.w +mad.f32 r0.z, c15.z, r6.y, r0.z +max.f r0.w, r0.w, c25.x +mul.f r1.x, r0.x, r0.x mad.f32 r0.z, c16.z, r6.z, r0.z -mov.f32f32 r1.y, c6.y +mad.f32 r1.x, r0.y, r0.y, r1.x mad.f32 r0.z, c17.z, r6.w, r0.z -mov.f32f32 r0.w, r0.w -mul.f r2.x, r2.x, c7.x -mad.f32 r1.y, c4.y, r1.y, r1.z +min.f r5.w, r0.w, c25.y +mov.f32f32 r0.w, c12.w +mov.f32f32 r1.y, c25.y add.f r0.z, c13.z, (neg)r0.z -mov.f32f32 r1.z, c5.z -mov.f32f32 r2.y, c7.y -mov.f32f32 r1.y, r1.y +mov.f32f32 r1.z, c9.w +mov.f32f32 r1.w, c4.x +mov.f32f32 r2.x, c4.x mad.f32 r1.x, r0.z, r0.z, r1.x -mad.f32 r0.w, c4.z, r1.z, r0.w -mov.f32f32 r1.z, c6.z -mad.f32 r2.x, c4.y, r2.y, r2.x -mul.f r2.y, r6.x, c0.w -mul.f r2.z, r6.x, c0.z -mul.f r3.x, r6.x, c0.y +max.f r0.w, r0.w, c25.x +max.f r1.y, r1.y, c25.x +max.f r1.z, r1.z, c25.x +mul.f r1.w, r1.w, c5.x +mul.f r2.x, r2.x, c6.x +mov.f32f32 r2.y, c4.x rsq r1.x, (abs)r1.x (ss)mul.f r0.x, r0.x, r1.x -rcp r3.y, r1.x -(ss)mov.f32f32 r3.y, r3.y -rcp r3.z, r1.x -(ss)mov.f32f32 r3.z, r3.z -rsq r3.w, (abs)r1.x -(ss)mul.f r0.y, r0.y, r3.w -add.f r3.w, r0.x, c25.x -mul.f r4.x, r1.x, r3.y -mul.f r0.x, r0.w, r0.x -mad.f32 r1.y, c4.z, r1.z, r1.y -mul.f r1.z, r3.w, r3.w -add.f r4.y, r0.y, c25.x -mov.f32f32 r4.x, r4.x -mad.f32 r0.x, r1.y, r0.y, r0.x -mul.f r0.y, r3.z, r3.y -mad.f32 r1.z, r4.y, r4.y, r1.z -mul.f r3.z, c18.x, r4.x -mov.f32f32 r0.x, r0.x -(ss)rsq r1.x, (abs)r1.x -mad.f32 r3.y, c18.y, r3.y, r3.z -mov.f32f32 r1.z, r1.z -(ss)mul.f r0.z, r0.z, r1.x -(ss)mov.f32f32 r1.x, r2.x -mov.f32f32 r2.x, c7.z -mov.f32f32 r3.y, r3.y -add.f r3.z, r0.z, c25.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, c4.z, r2.x, r1.x -mad.f32 r2.x, c1.w, r6.y, r2.y -mad.f32 r1.z, r3.z, r3.z, r1.z -mad.f32 r0.y, c18.z, r0.y, r3.y -mad.f32 r0.x, r1.x, r0.z, r0.x -mad.f32 r0.z, c2.w, r6.z, r2.x -mad.f32 r3.y, c1.z, r6.y, r2.z -mad.f32 r3.x, c1.y, r6.y, r3.x -mul.f r6.x, r6.x, c0.x -rsq r1.z, (abs)r1.z -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -max.f r2.x, (neg)r0.x, c26.x -max.f r2.y, r0.x, c26.x -mul.f r2.z, r3.w, r1.z -mul.f r3.w, r4.y, r1.z -mul.f r1.z, r3.z, r1.z -mov.f32f32 r2.x, r2.x -mul.f r0.w, r0.w, r2.z -rcp r2.z, r0.y -mov.f32f32 r3.z, c26.y -mad.f32 r0.w, r1.y, r3.w, r0.w -mov.f32f32 r1.y, c26.y -rcp r3.w, r0.y -mov.f32f32 r2.y, r2.y -(ss)mul.f r3.z, r3.z, r2.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.x, c12.z -mad.f32 r0.w, r1.x, r1.z, r0.w -mad.f32 r1.x, c22.z, r3.z, r4.x -mov.f32f32 r1.z, c12.y -mov.f32f32 r4.x, c12.x -max.f r4.y, (neg)r0.w, c26.x +rcp r2.z, r1.x +(ss)mov.f32f32 r2.w, r2.z +mov.f32f32 r4.x, c5.y +rsq r4.y, (abs)r1.x +(ss)mul.f r0.y, r0.y, r4.y +add.f r4.y, r0.x, c25.x +mul.f r4.z, r1.x, r2.w +mad.f32 r1.w, c4.y, r4.x, r1.w +mov.f32f32 r4.x, c5.z +mul.f r4.w, r4.y, r4.y +add.f r5.x, r0.y, c25.x +mul.f r4.z, c18.x, r4.z +mad.f32 r4.x, c4.z, r4.x, r1.w +mad.f32 r1.w, c18.y, r2.w, r4.z +mad.f32 r4.z, r5.x, r5.x, r4.w +rsq r2.w, (abs)r1.x +(ss)mul.f r0.z, r0.z, r2.w +(ss)rcp r1.x, r1.x +(ss)mul.f r1.x, r1.x, r2.z +mul.f r0.x, r4.x, r0.x +mov.f32f32 r2.z, c6.y +add.f r5.y, r0.z, c25.y +mad.f32 r1.x, c18.z, r1.x, r1.w +min.f r4.w, r0.w, c25.y +min.f r2.w, r1.y, c25.y +mad.f32 r0.w, r5.y, r5.y, r4.z +mov.f32f32 r1.y, r1.x +mad.f32 r2.x, c4.y, r2.z, r2.x +mov.f32f32 r2.z, c6.z +min.f r1.w, r1.z, c25.y +mul.f r1.z, r2.y, c7.x +mov.f32f32 r2.y, c7.y +rsq r0.w, (abs)r0.w +(ss)mov.f32f32 r4.z, r0.w +rcp r5.z, r1.y +mov.f32f32 r7.x, c26.y +mov.f32f32 r7.y, c26.y +(ss)rcp r1.y, r1.y +nop +rcp r1.x, r1.x +mul.f r4.y, r4.y, r4.z +(ss)mul.f r7.x, r7.x, r5.z +mov.f32f32 r7.z, c12.y +mov.f32f32 r7.w, c12.x +mul.f r4.x, r4.x, r4.y +mul.f r4.y, r5.x, r4.z +mad.f32 r2.x, c4.z, r2.z, r2.x +mov.f32f32 r2.z, c12.z +mad.f32 r4.z, c22.y, r7.x, r7.z +mad.f32 r5.x, c22.x, r7.x, r7.w +mad.f32 r4.x, r2.x, r4.y, r4.x +mul.f r0.w, r5.y, r0.w +mad.f32 r1.z, c4.y, r2.y, r1.z +mov.f32f32 r2.y, c7.z +mad.f32 r2.z, c22.z, r7.x, r2.z +mad.f32 r0.x, r2.x, r0.y, r0.x +mul.f r0.y, r7.y, r5.z +mad.f32 r1.z, c4.z, r2.y, r1.z +mov.f32f32 r2.x, c9.z +mov.f32f32 r2.y, c9.y +mov.f32f32 r4.y, c9.x +mad.f32 r0.w, r1.z, r0.w, r4.x +mad.f32 r0.x, r1.z, r0.z, r0.x +mad.f32 r0.z, c19.z, r0.y, r2.x +mad.f32 r1.z, c19.y, r0.y, r2.y +max.f r2.x, (neg)r0.w, c26.x max.f r0.w, r0.w, c26.x -mul.f r2.x, r2.x, r3.w -mad.f32 r1.z, c22.y, r3.z, r1.z -mad.f32 r3.z, c22.x, r3.z, r4.x -mul.f r1.y, r1.y, r2.z -mov.f32f32 r2.z, c9.y -log2 r4.x, r4.y -(ss)mov.f32f32 r4.x, r4.x -(ss)absneg.f r4.y, (neg)c11.x -log2 r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r4.z, c8.x -mad.f32 r1.x, c23.z, r2.x, r1.x -min.f r4.y, (neg)r4.y, c26.z -mad.f32 r1.z, c23.y, r2.x, r1.z -mad.f32 r2.x, c23.x, r2.x, r3.z -mov.f32f32 r3.z, c9.z -mul.f r4.x, r4.y, r4.x -min.f r4.y, r4.z, c26.z -max.f r1.x, r1.x, c25.x -max.f r1.z, r1.z, c25.x -mov.f32f32 r4.x, r4.x -mul.f r0.w, r4.y, r0.w -min.f r4.z, r1.x, c25.y -min.f r4.y, r1.z, c25.y -max.f r1.x, r2.x, c25.x -mad.f32 r1.z, c19.z, r1.y, r3.z -mad.f32 r2.x, c19.y, r1.y, r2.z -exp2 r2.z, r4.x -(ss)mov.f32f32 r2.z, r2.z -cmps.f.lt r3.z, (neg)r0.x, c25.x -mov.f32f32 r0.w, r0.w -(ss)min.f r4.x, r1.x, c25.y -mul.f r1.x, r2.y, r3.w -sel.b32 r2.y, r2.z, r3.z, c26.x -mov.f32f32 r2.z, c9.x -rcp r0.y, r0.y +max.f r2.y, (neg)r0.x, c26.x +max.f r4.x, r0.x, c26.x +mad.f32 r0.y, c19.x, r0.y, r4.y +cmps.f.lt r4.y, (neg)r0.x, c25.x cmps.f.lt r0.x, (neg)r0.x, c25.x -mad.f32 r1.z, c20.z, r1.x, r1.z -mov.f32f32 r2.y, r2.y +log2 r2.x, r2.x +absneg.f r5.y, (neg)c11.x +log2 r0.w, r0.w +mov.f32f32 r5.z, c8.x +mul.f r2.y, r2.y, r1.x +(ss)mul.f r1.x, r4.x, r1.x +min.f r4.x, (neg)r5.y, c26.z +min.f r5.y, r5.z, c26.z +mad.f32 r2.z, c23.z, r2.y, r2.z +mad.f32 r4.z, c23.y, r2.y, r4.z +(ss)mul.f r2.x, r4.x, r2.x +mul.f r0.w, r5.y, r0.w +max.f r2.z, r2.z, c25.x +max.f r4.x, r4.z, c25.x +mad.f32 r2.y, c23.x, r2.y, r5.x +mad.f32 r0.z, c20.z, r1.x, r0.z +mad.f32 r7.x, c20.y, r1.x, r1.z +exp2 r1.z, r2.x +(ss)sel.b32 r1.z, r1.z, r4.y, c26.x exp2 r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c20.y, r1.x, r2.x -max.f r1.z, r1.z, c25.x -mov.f32f32 r2.y, r2.y -sel.b32 r0.x, r0.w, r0.x, c26.x -max.f r0.w, r2.x, c25.x -mad.f32 r3.z, c19.x, r1.y, r2.z -mul.f r1.y, r2.y, r0.y +(ss)sel.b32 r0.x, r0.w, r0.x, c26.x +min.f r4.z, r2.z, c25.y +min.f r4.y, r4.x, c25.y +(ss)mul.f r0.w, r1.z, r1.y +mov.f32f32 r1.z, c25.x mov.f32f32 r2.x, c25.x -mov.f32f32 r2.y, c25.x mov.f32f32 r2.z, c25.x -mad.f32 r2.x, c24.z, r1.y, r2.x -mad.f32 r2.y, c24.y, r1.y, r2.y -mad.f32 r1.y, c24.x, r1.y, r2.z -mov.f32f32 r0.x, r0.x -max.f r2.x, r2.x, c25.x -max.f r2.y, r2.y, c25.x -max.f r1.y, r1.y, c25.x -mov.f32f32 r0.x, r0.x -min.f r5.z, r2.x, c25.y -min.f r5.y, r2.y, c25.y -min.f r5.x, r1.y, c25.y -mul.f r0.x, r0.x, r0.y -mov.f32f32 r0.y, c25.x -mov.f32f32 r1.y, c25.x +mad.f32 r1.z, c24.z, r0.w, r1.z +mad.f32 r2.x, c24.y, r0.w, r2.x +mad.f32 r0.w, c24.x, r0.w, r2.z +mul.f r0.x, r0.x, r1.y +max.f r1.y, r1.z, c25.x +max.f r1.z, r2.x, c25.x +max.f r0.w, r0.w, c25.x mov.f32f32 r2.x, c25.x -mad.f32 r0.y, c21.z, r0.x, r0.y +min.f r5.z, r1.y, c25.y +min.f r5.y, r1.z, c25.y +min.f r5.x, r0.w, c25.y +mad.f32 r0.w, c21.z, r0.x, r2.x +mov.f32f32 r1.y, c25.x +mov.f32f32 r1.z, c25.x mad.f32 r1.y, c21.y, r0.x, r1.y -mad.f32 r0.x, c21.x, r0.x, r2.x -min.f r1.z, r1.z, c25.y -max.f r0.y, r0.y, c25.x -max.f r1.y, r1.y, c25.x +max.f r0.w, r0.w, c25.x +mad.f32 r0.x, c21.x, r0.x, r1.z +max.f r1.z, r2.y, c25.x +max.f r0.z, r0.z, c25.x +min.f r2.z, r0.w, c25.y +max.f r0.w, r1.y, c25.x max.f r0.x, r0.x, c25.x -nop -min.f r2.z, r0.y, c25.y -min.f r2.y, r1.y, c25.y +(rpt1)nop +min.f r2.y, r0.w, c25.y min.f r2.x, r0.x, c25.y -min.f r1.y, r0.w, c25.y -mad.f32 r0.x, c20.x, r1.x, r3.z -mad.f32 r0.w, c3.w, r6.w, r0.z -mad.f32 r0.y, c2.z, r6.z, r3.y -mad.f32 r1.x, c2.y, r6.z, r3.x -max.f r0.x, r0.x, c25.x -mad.f32 r0.z, c3.z, r6.w, r0.y -mad.f32 r0.y, c3.y, r6.w, r1.x -mad.f32 r3.x, c1.x, r6.y, r6.x +min.f r4.x, r1.z, c25.y +min.f r1.z, r0.z, c25.y +max.f r0.x, r7.x, c25.x +mad.f32 r0.y, c20.x, r1.x, r0.y +mul.f r0.z, r6.x, c0.w +mul.f r0.w, r6.x, c0.z +min.f r1.y, r0.x, c25.y +max.f r0.x, r0.y, c25.x +mad.f32 r0.y, c1.w, r6.y, r0.z +mad.f32 r0.z, c1.z, r6.y, r0.w +mul.f r7.x, r6.x, c0.y min.f r1.x, r0.x, c25.y -mad.f32 r0.x, c2.x, r6.z, r3.x -mov.f32f32 r3.w, r7.w +mad.f32 r0.x, c2.w, r6.z, r0.y +mad.f32 r0.y, c2.z, r6.z, r0.z +mad.f32 r0.w, c3.w, r6.w, r0.x +mad.f32 r0.z, c3.z, r6.w, r0.y +mad.f32 r0.x, c1.y, r6.y, r7.x +mul.f r0.y, r6.x, c0.x +mad.f32 r0.x, c2.y, r6.z, r0.x +mad.f32 r6.x, c1.x, r6.y, r0.y +mad.f32 r0.y, c3.y, r6.w, r0.x +mad.f32 r0.x, c2.x, r6.z, r6.x +nop mad.f32 r0.x, c3.x, r6.w, r0.x -mov.f32f32 r3.z, r7.z -mov.f32f32 r3.y, r7.y -mov.f32f32 r3.x, r7.x end +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (5:0) r4.x (2:0) r5.x (2:1) -; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r7.x (0:0,cm=f,il=12,b=0) -; VERT: 212 instructions, 0 half, 8 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) +; VERT: 184 instructions, 0 half, 8 full diff --git a/reference/maniadrive/maniadrive-13.asm b/reference/maniadrive/maniadrive-13.asm index 0c29dd4..4fe07cd 100644 --- a/reference/maniadrive/maniadrive-13.asm +++ b/reference/maniadrive/maniadrive-13.asm @@ -6,35 +6,32 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.z, c9.x -mov.f32f32 r0.w, c9.w -mov.f32f32 r1.x, c9.y -bary.f r1.y, 3, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r2.x, r0.w -mov.f32f32 r1.w, r1.x -bary.f r0.z, 6, r0.x -bary.f r0.w, 2, r0.x -bary.f r1.x, 5, r0.x +mov.f32f32 r0.w, c9.y +mov.f32f32 r1.x, c9.w +bary.f r1.y, 6, r0.x +bary.f r1.z, 2, r0.x +bary.f r1.w, 3, r0.x +bary.f r2.x, 5, r0.x bary.f r2.y, 1, r0.x bary.f r2.z, 4, r0.x +sam.p (f32)(xyzw)r2.w, r0.z, s#0, t#0 +(sy)(ss)mul.f r0.z, r3.y, r1.z bary.f (ei)r0.x, 0, r0.x -sam.p (f32)(xyzw)r2.w, r1.z, s#0, t#0 -(sy)mul.f r0.y, r3.z, r1.y -mul.f r0.w, r3.y, r0.w -mul.f r1.y, r3.x, r2.y +mul.f r1.w, r3.z, r1.w +mul.f r0.y, r3.x, r2.y +add.f r1.z, r0.z, r1.y mul.f r0.x, r2.w, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.z, r0.w, r0.z -add.f r0.w, r1.y, r1.x -add.f r0.x, r0.x, r2.z -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +nop +add.f r1.y, r0.y, r2.x +nop +add.f r1.x, r0.x, r2.z end nop +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) -; FRAG: 27 instructions, 0 half, 4 full +; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) +; FRAG: 21 instructions, 0 half, 4 full diff --git a/reference/maniadrive/maniadrive-14.asm b/reference/maniadrive/maniadrive-14.asm index 888c361..cd3f2f7 100644 --- a/reference/maniadrive/maniadrive-14.asm +++ b/reference/maniadrive/maniadrive-14.asm @@ -24,219 +24,193 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c25.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r5.x, c14.x -mov.f32f32 r0.y, c25.y +mul.f r0.y, r5.x, c14.y mad.f32 r0.x, c15.x, r5.y, r0.x -mov.f32f32 r0.z, c12.w +mad.f32 r0.y, c15.y, r5.y, r0.y mad.f32 r0.x, c16.x, r5.z, r0.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, c16.y, r5.z, r0.y mad.f32 r0.x, c17.x, r5.w, r0.x -mov.f32f32 r0.z, r0.z +mad.f32 r0.y, c17.y, r5.w, r0.y +mul.f r0.z, r5.x, c14.z mov.f32f32 r0.w, c25.y -mov.f32f32 r1.x, c9.w add.f r0.x, c13.x, (neg)r0.x -max.f r0.y, r0.y, c25.x -max.f r0.z, r0.z, c25.x -mov.f32f32 r0.w, r0.w -mul.f r1.y, r0.x, r0.x -mul.f r1.z, r5.x, c14.y -min.f r4.w, r0.y, c25.y -mad.f32 r0.y, c15.y, r5.y, r1.z -min.f r3.w, r0.z, c25.y -mad.f32 r0.y, c16.y, r5.z, r0.y -max.f r0.z, r0.w, c25.x -mad.f32 r0.y, c17.y, r5.w, r0.y -mov.f32f32 r0.w, r1.x -mov.f32f32 r1.x, c4.x -mov.f32f32 r1.z, c4.x add.f r0.y, c13.y, (neg)r0.y -min.f r2.w, r0.z, c25.y -max.f r0.z, r0.w, c25.x -mul.f r0.w, r1.x, c5.x -mad.f32 r1.x, r0.y, r0.y, r1.y -mov.f32f32 r1.y, c5.y -mul.f r1.z, r1.z, c6.x -mov.f32f32 r2.x, c4.x -mov.f32f32 r1.x, r1.x -mul.f r2.y, r5.x, c14.z -min.f r1.w, r0.z, c25.y -mad.f32 r0.z, c15.z, r5.y, r2.y -mad.f32 r0.w, c4.y, r1.y, r0.w +mad.f32 r0.z, c15.z, r5.y, r0.z +max.f r0.w, r0.w, c25.x +mul.f r1.x, r0.x, r0.x mad.f32 r0.z, c16.z, r5.z, r0.z -mov.f32f32 r1.y, c6.y +mad.f32 r1.x, r0.y, r0.y, r1.x mad.f32 r0.z, c17.z, r5.w, r0.z -mov.f32f32 r0.w, r0.w -mul.f r2.x, r2.x, c7.x -mad.f32 r1.y, c4.y, r1.y, r1.z +min.f r4.w, r0.w, c25.y +mov.f32f32 r0.w, c12.w +mov.f32f32 r1.y, c25.y add.f r0.z, c13.z, (neg)r0.z -mov.f32f32 r1.z, c5.z -mov.f32f32 r2.y, c7.y -mov.f32f32 r1.y, r1.y +mov.f32f32 r1.z, c9.w +mov.f32f32 r1.w, c4.x +mov.f32f32 r2.x, c4.x mad.f32 r1.x, r0.z, r0.z, r1.x -mad.f32 r0.w, c4.z, r1.z, r0.w -mov.f32f32 r1.z, c6.z -mad.f32 r2.x, c4.y, r2.y, r2.x -mul.f r2.y, r5.x, c0.w -mul.f r2.z, r5.x, c0.z -mul.f r3.x, r5.x, c0.y +max.f r0.w, r0.w, c25.x +max.f r1.y, r1.y, c25.x +max.f r1.z, r1.z, c25.x +mul.f r1.w, r1.w, c5.x +mul.f r2.x, r2.x, c6.x +mov.f32f32 r2.y, c4.x rsq r1.x, (abs)r1.x (ss)mul.f r0.x, r0.x, r1.x -rcp r3.y, r1.x -(ss)mov.f32f32 r3.y, r3.y -rcp r3.z, r1.x -(ss)mov.f32f32 r3.z, r3.z -rsq r4.x, (abs)r1.x -(ss)mul.f r0.y, r0.y, r4.x -add.f r4.x, r0.x, c25.x -mul.f r4.y, r1.x, r3.y -mul.f r0.x, r0.w, r0.x -mad.f32 r1.y, c4.z, r1.z, r1.y -mul.f r1.z, r4.x, r4.x -add.f r4.z, r0.y, c25.x -mov.f32f32 r4.y, r4.y -mad.f32 r0.x, r1.y, r0.y, r0.x -mul.f r0.y, r3.z, r3.y -mad.f32 r1.z, r4.z, r4.z, r1.z -mul.f r3.z, c18.x, r4.y -mov.f32f32 r0.x, r0.x -(ss)rsq r1.x, (abs)r1.x -mad.f32 r3.y, c18.y, r3.y, r3.z -mov.f32f32 r1.z, r1.z -(ss)mul.f r0.z, r0.z, r1.x -(ss)mov.f32f32 r1.x, r2.x -mov.f32f32 r2.x, c7.z -mov.f32f32 r3.y, r3.y -add.f r3.z, r0.z, c25.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, c4.z, r2.x, r1.x -mad.f32 r2.x, c1.w, r5.y, r2.y -mad.f32 r1.z, r3.z, r3.z, r1.z -mad.f32 r0.y, c18.z, r0.y, r3.y -mad.f32 r0.x, r1.x, r0.z, r0.x -mad.f32 r0.z, c2.w, r5.z, r2.x -mad.f32 r6.x, c1.z, r5.y, r2.z -mad.f32 r6.y, c1.y, r5.y, r3.x -mul.f r5.x, r5.x, c0.x -rsq r1.z, (abs)r1.z -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -max.f r2.x, (neg)r0.x, c26.x -max.f r2.y, r0.x, c26.x -mul.f r2.z, r4.x, r1.z -mul.f r3.x, r4.z, r1.z -mul.f r1.z, r3.z, r1.z -mov.f32f32 r2.x, r2.x -mul.f r0.w, r0.w, r2.z -rcp r2.z, r0.y -mov.f32f32 r3.y, c26.y -mad.f32 r0.w, r1.y, r3.x, r0.w -mov.f32f32 r1.y, c26.y -rcp r4.x, r0.y -mov.f32f32 r2.y, r2.y -(ss)mul.f r3.x, r3.y, r2.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r3.y, c12.z -mad.f32 r0.w, r1.x, r1.z, r0.w -mad.f32 r1.x, c22.z, r3.x, r3.y -mov.f32f32 r1.z, c12.y -mov.f32f32 r3.y, c12.x -max.f r3.z, (neg)r0.w, c26.x +rcp r2.z, r1.x +(ss)mov.f32f32 r2.w, r2.z +mov.f32f32 r3.x, c5.y +rsq r3.y, (abs)r1.x +(ss)mul.f r0.y, r0.y, r3.y +add.f r3.y, r0.x, c25.x +mul.f r3.z, r1.x, r2.w +mad.f32 r1.w, c4.y, r3.x, r1.w +mov.f32f32 r3.x, c5.z +mul.f r3.w, r3.y, r3.y +add.f r4.x, r0.y, c25.x +mul.f r3.z, c18.x, r3.z +mad.f32 r3.x, c4.z, r3.x, r1.w +mad.f32 r1.w, c18.y, r2.w, r3.z +mad.f32 r3.z, r4.x, r4.x, r3.w +rsq r2.w, (abs)r1.x +(ss)mul.f r0.z, r0.z, r2.w +(ss)rcp r1.x, r1.x +(ss)mul.f r1.x, r1.x, r2.z +mul.f r0.x, r3.x, r0.x +mov.f32f32 r2.z, c6.y +add.f r4.y, r0.z, c25.y +mad.f32 r1.x, c18.z, r1.x, r1.w +min.f r3.w, r0.w, c25.y +min.f r2.w, r1.y, c25.y +mad.f32 r0.w, r4.y, r4.y, r3.z +mov.f32f32 r1.y, r1.x +mad.f32 r2.x, c4.y, r2.z, r2.x +mov.f32f32 r2.z, c6.z +min.f r1.w, r1.z, c25.y +mul.f r1.z, r2.y, c7.x +mov.f32f32 r2.y, c7.y +rsq r0.w, (abs)r0.w +(ss)mov.f32f32 r3.z, r0.w +rcp r4.z, r1.y +mov.f32f32 r6.x, c26.y +mov.f32f32 r6.y, c26.y +(ss)rcp r1.y, r1.y +nop +rcp r1.x, r1.x +mul.f r3.y, r3.y, r3.z +(ss)mul.f r6.x, r6.x, r4.z +mov.f32f32 r6.z, c12.y +mov.f32f32 r6.w, c12.x +mul.f r3.x, r3.x, r3.y +mul.f r3.y, r4.x, r3.z +mad.f32 r2.x, c4.z, r2.z, r2.x +mov.f32f32 r2.z, c12.z +mad.f32 r3.z, c22.y, r6.x, r6.z +mad.f32 r4.x, c22.x, r6.x, r6.w +mad.f32 r3.x, r2.x, r3.y, r3.x +mul.f r0.w, r4.y, r0.w +mad.f32 r1.z, c4.y, r2.y, r1.z +mov.f32f32 r2.y, c7.z +mad.f32 r2.z, c22.z, r6.x, r2.z +mad.f32 r0.x, r2.x, r0.y, r0.x +mul.f r0.y, r6.y, r4.z +mad.f32 r1.z, c4.z, r2.y, r1.z +mov.f32f32 r2.x, c9.z +mov.f32f32 r2.y, c9.y +mov.f32f32 r3.y, c9.x +mad.f32 r0.w, r1.z, r0.w, r3.x +mad.f32 r0.x, r1.z, r0.z, r0.x +mad.f32 r0.z, c19.z, r0.y, r2.x +mad.f32 r1.z, c19.y, r0.y, r2.y +max.f r2.x, (neg)r0.w, c26.x max.f r0.w, r0.w, c26.x -mul.f r2.x, r2.x, r4.x -mad.f32 r1.z, c22.y, r3.x, r1.z -mad.f32 r3.x, c22.x, r3.x, r3.y -mul.f r1.y, r1.y, r2.z -mov.f32f32 r2.z, c9.y -log2 r3.y, r3.z -(ss)mov.f32f32 r3.y, r3.y -(ss)absneg.f r3.z, (neg)c11.x -log2 r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r4.y, c8.x -mad.f32 r1.x, c23.z, r2.x, r1.x -min.f r3.z, (neg)r3.z, c26.z -mad.f32 r1.z, c23.y, r2.x, r1.z -mad.f32 r2.x, c23.x, r2.x, r3.x -mov.f32f32 r3.x, c9.z -mul.f r3.y, r3.z, r3.y -min.f r3.z, r4.y, c26.z -max.f r1.x, r1.x, c25.x -max.f r1.z, r1.z, c25.x -mov.f32f32 r4.y, r3.y -mul.f r0.w, r3.z, r0.w -min.f r3.z, r1.x, c25.y -min.f r3.y, r1.z, c25.y -max.f r1.x, r2.x, c25.x -mad.f32 r1.z, c19.z, r1.y, r3.x -mad.f32 r2.x, c19.y, r1.y, r2.z -exp2 r2.z, r4.y -(ss)mov.f32f32 r2.z, r2.z -(ss)cmps.f.lt r4.y, (neg)r0.x, c25.x -mov.f32f32 r0.w, r0.w -min.f r3.x, r1.x, c25.y -mul.f r1.x, r2.y, r4.x -sel.b32 r2.y, r2.z, r4.y, c26.x -mov.f32f32 r2.z, c9.x -rcp r0.y, r0.y +max.f r2.y, (neg)r0.x, c26.x +max.f r3.x, r0.x, c26.x +mad.f32 r0.y, c19.x, r0.y, r3.y +cmps.f.lt r3.y, (neg)r0.x, c25.x cmps.f.lt r0.x, (neg)r0.x, c25.x -mad.f32 r1.z, c20.z, r1.x, r1.z -mov.f32f32 r2.y, r2.y +log2 r2.x, r2.x +absneg.f r4.y, (neg)c11.x +log2 r0.w, r0.w +mov.f32f32 r4.z, c8.x +mul.f r2.y, r2.y, r1.x +(ss)mul.f r1.x, r3.x, r1.x +min.f r3.x, (neg)r4.y, c26.z +min.f r4.y, r4.z, c26.z +mad.f32 r2.z, c23.z, r2.y, r2.z +mad.f32 r3.z, c23.y, r2.y, r3.z +(ss)mul.f r2.x, r3.x, r2.x +mul.f r0.w, r4.y, r0.w +max.f r2.z, r2.z, c25.x +max.f r3.x, r3.z, c25.x +mad.f32 r2.y, c23.x, r2.y, r4.x +mad.f32 r0.z, c20.z, r1.x, r0.z +mad.f32 r6.x, c20.y, r1.x, r1.z +exp2 r1.z, r2.x +(ss)sel.b32 r1.z, r1.z, r3.y, c26.x exp2 r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mad.f32 r2.x, c20.y, r1.x, r2.x -max.f r1.z, r1.z, c25.x -mov.f32f32 r2.y, r2.y -sel.b32 r0.x, r0.w, r0.x, c26.x -max.f r0.w, r2.x, c25.x -mad.f32 r6.z, c19.x, r1.y, r2.z -mul.f r1.y, r2.y, r0.y +(ss)sel.b32 r0.x, r0.w, r0.x, c26.x +min.f r3.z, r2.z, c25.y +min.f r3.y, r3.x, c25.y +(ss)mul.f r0.w, r1.z, r1.y +mov.f32f32 r1.z, c25.x mov.f32f32 r2.x, c25.x -mov.f32f32 r2.y, c25.x mov.f32f32 r2.z, c25.x -mad.f32 r2.x, c24.z, r1.y, r2.x -mad.f32 r2.y, c24.y, r1.y, r2.y -mad.f32 r1.y, c24.x, r1.y, r2.z -mov.f32f32 r0.x, r0.x -max.f r2.x, r2.x, c25.x -max.f r2.y, r2.y, c25.x -max.f r1.y, r1.y, c25.x -mov.f32f32 r0.x, r0.x -min.f r4.z, r2.x, c25.y -min.f r4.y, r2.y, c25.y -min.f r4.x, r1.y, c25.y -mul.f r0.x, r0.x, r0.y -mov.f32f32 r0.y, c25.x -mov.f32f32 r1.y, c25.x +mad.f32 r1.z, c24.z, r0.w, r1.z +mad.f32 r2.x, c24.y, r0.w, r2.x +mad.f32 r0.w, c24.x, r0.w, r2.z +mul.f r0.x, r0.x, r1.y +max.f r1.y, r1.z, c25.x +max.f r1.z, r2.x, c25.x +max.f r0.w, r0.w, c25.x mov.f32f32 r2.x, c25.x -mad.f32 r0.y, c21.z, r0.x, r0.y +min.f r4.z, r1.y, c25.y +min.f r4.y, r1.z, c25.y +min.f r4.x, r0.w, c25.y +mad.f32 r0.w, c21.z, r0.x, r2.x +mov.f32f32 r1.y, c25.x +mov.f32f32 r1.z, c25.x mad.f32 r1.y, c21.y, r0.x, r1.y -mad.f32 r0.x, c21.x, r0.x, r2.x -min.f r1.z, r1.z, c25.y -max.f r0.y, r0.y, c25.x -max.f r1.y, r1.y, c25.x +max.f r0.w, r0.w, c25.x +mad.f32 r0.x, c21.x, r0.x, r1.z +max.f r1.z, r2.y, c25.x +max.f r0.z, r0.z, c25.x +min.f r2.z, r0.w, c25.y +max.f r0.w, r1.y, c25.x max.f r0.x, r0.x, c25.x -nop -min.f r2.z, r0.y, c25.y -min.f r2.y, r1.y, c25.y +(rpt1)nop +min.f r2.y, r0.w, c25.y min.f r2.x, r0.x, c25.y -min.f r1.y, r0.w, c25.y -mad.f32 r0.x, c20.x, r1.x, r6.z -mad.f32 r0.w, c3.w, r5.w, r0.z -mad.f32 r0.y, c2.z, r5.z, r6.x -mad.f32 r1.x, c2.y, r5.z, r6.y -max.f r0.x, r0.x, c25.x -mad.f32 r0.z, c3.z, r5.w, r0.y -mad.f32 r0.y, c3.y, r5.w, r1.x -mad.f32 r5.x, c1.x, r5.y, r5.x +min.f r3.x, r1.z, c25.y +min.f r1.z, r0.z, c25.y +max.f r0.x, r6.x, c25.x +mad.f32 r0.y, c20.x, r1.x, r0.y +mul.f r0.z, r5.x, c0.w +mul.f r0.w, r5.x, c0.z +min.f r1.y, r0.x, c25.y +max.f r0.x, r0.y, c25.x +mad.f32 r0.y, c1.w, r5.y, r0.z +mad.f32 r0.z, c1.z, r5.y, r0.w +mul.f r6.x, r5.x, c0.y min.f r1.x, r0.x, c25.y +mad.f32 r0.x, c2.w, r5.z, r0.y +mad.f32 r0.y, c2.z, r5.z, r0.z +mad.f32 r0.w, c3.w, r5.w, r0.x +mad.f32 r0.z, c3.z, r5.w, r0.y +mad.f32 r0.x, c1.y, r5.y, r6.x +mul.f r0.y, r5.x, c0.x +mad.f32 r0.x, c2.y, r5.z, r0.x +mad.f32 r5.x, c1.x, r5.y, r0.y +mad.f32 r0.y, c3.y, r5.w, r0.x mad.f32 r0.x, c2.x, r5.z, r5.x nop mad.f32 r0.x, c3.x, r5.w, r0.x end nop -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (2:0) r4.x (2:1) ; VERT: inputs: r5.x (0:0,cm=f,il=8,b=0) -; VERT: 209 instructions, 0 half, 7 full +; VERT: 184 instructions, 0 half, 7 full diff --git a/reference/maniadrive/maniadrive-15.asm b/reference/maniadrive/maniadrive-15.asm index 7441bc0..a69b40d 100644 --- a/reference/maniadrive/maniadrive-15.asm +++ b/reference/maniadrive/maniadrive-15.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 -@in(r3.w) in3 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r6.w) in11 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r3.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -35,211 +35,185 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)mul.f r0.w, r3.x, c13.x -mov.f32f32 r1.x, c24.y -mad.f32 r0.w, c14.x, r3.y, r0.w -mov.f32f32 r1.y, c11.w -mad.f32 r0.w, c15.x, r3.z, r0.w -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, c16.x, r3.w, r0.w -mov.f32f32 r1.y, r1.y +@const(c24.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c25.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r6.x, c13.x +mul.f r1.x, r6.x, c13.y +mad.f32 r0.w, c14.x, r6.y, r0.w +mad.f32 r1.x, c14.y, r6.y, r1.x +mad.f32 r0.w, c15.x, r6.z, r0.w +mad.f32 r1.x, c15.y, r6.z, r1.x +mad.f32 r0.w, c16.x, r6.w, r0.w +mad.f32 r1.x, c16.y, r6.w, r1.x +mul.f r1.y, r6.x, c13.z mov.f32f32 r1.z, c24.y -mov.f32f32 r1.w, c8.w add.f r0.w, c12.x, (neg)r0.w -max.f r1.x, r1.x, c24.x -max.f r1.y, r1.y, c24.x -mov.f32f32 r1.z, r1.z -mul.f r2.x, r0.w, r0.w -mul.f r2.y, r3.x, c13.y -min.f r5.w, r1.x, c24.y -mad.f32 r1.x, c14.y, r3.y, r2.y -min.f r4.w, r1.y, c24.y -mad.f32 r1.x, c15.y, r3.z, r1.x -max.f r1.y, r1.z, c24.x -mad.f32 r1.x, c16.y, r3.w, r1.x -mov.f32f32 r1.z, r1.w -mul.f r1.w, r3.x, c13.z -mul.f r2.y, r0.x, c4.x add.f r1.x, c12.y, (neg)r1.x -min.f r2.w, r1.y, c24.y -max.f r1.y, r1.z, c24.x -mad.f32 r1.z, c14.z, r3.y, r1.w -mad.f32 r1.w, r1.x, r1.x, r2.x -mad.f32 r1.z, c15.z, r3.z, r1.z -mad.f32 r2.x, c4.y, r0.y, r2.y -mul.f r2.y, r0.x, c5.x -mov.f32f32 r2.z, r1.w -mad.f32 r1.z, c16.z, r3.w, r1.z -min.f r1.w, r1.y, c24.y -mov.f32f32 r1.y, r2.x -mad.f32 r2.x, c5.y, r0.y, r2.y -add.f r1.z, c12.z, (neg)r1.z -mad.f32 r1.y, c4.z, r0.z, r1.y -mul.f r0.x, r0.x, c6.x -mov.f32f32 r2.x, r2.x -mad.f32 r2.y, r1.z, r1.z, r2.z -mad.f32 r2.x, c5.z, r0.z, r2.x -mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r3.x, c0.w -mul.f r2.z, r3.x, c0.z -mul.f r4.x, r3.x, c0.y -mul.f r3.x, r3.x, c0.x -rsq r2.y, (abs)r2.y -(ss)mul.f r0.w, r0.w, r2.y -rcp r4.y, r2.y -(ss)mov.f32f32 r4.y, r4.y -rcp r4.z, r2.y -(ss)mov.f32f32 r4.z, r4.z -rsq r5.x, (abs)r2.y +mad.f32 r1.y, c14.z, r6.y, r1.y +max.f r1.z, r1.z, c24.x +mul.f r1.w, r0.w, r0.w +mad.f32 r1.y, c15.z, r6.z, r1.y +mad.f32 r1.w, r1.x, r1.x, r1.w +mad.f32 r1.y, c16.z, r6.w, r1.y +min.f r5.w, r1.z, c24.y +mov.f32f32 r1.z, c11.w +mov.f32f32 r2.x, c24.y +add.f r1.y, c12.z, (neg)r1.y +mov.f32f32 r2.y, c8.w +mul.f r2.z, r6.x, c0.w +mul.f r2.w, r6.x, c0.z +mad.f32 r1.w, r1.y, r1.y, r1.w +max.f r1.z, r1.z, c24.x +max.f r2.x, r2.x, c24.x +max.f r2.y, r2.y, c24.x +mad.f32 r2.z, c1.w, r6.y, r2.z +mad.f32 r4.x, c1.z, r6.y, r2.w +mul.f r4.y, r6.x, c0.y +rsq r1.w, (abs)r1.w +(ss)mul.f r0.w, r0.w, r1.w +rcp r2.w, r1.w +(ss)mov.f32f32 r4.z, r2.w +mul.f r4.w, r0.x, c4.x +rsq r5.x, (abs)r1.w (ss)mul.f r1.x, r1.x, r5.x add.f r5.x, r0.w, c24.x -mul.f r5.y, r2.y, r4.y -mul.f r0.w, r1.y, r0.w -mul.f r4.z, r4.z, r4.y -mul.f r5.z, r5.x, r5.x -add.f r7.x, r1.x, c24.x -mov.f32f32 r5.y, r5.y -mad.f32 r0.w, r2.x, r1.x, r0.w -mov.f32f32 r1.x, r4.z -mad.f32 r4.z, r7.x, r7.x, r5.z +mul.f r5.y, r1.w, r4.z +mad.f32 r4.w, c4.y, r0.y, r4.w +add.f r5.z, r1.x, c24.x +mul.f r7.x, r5.x, r5.x mul.f r5.y, c17.x, r5.y -mov.f32f32 r0.w, r0.w -(ss)rsq r2.y, (abs)r2.y -mad.f32 r4.y, c17.y, r4.y, r5.y -mov.f32f32 r4.z, r4.z -(ss)mul.f r1.z, r1.z, r2.y -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r2.y, r4.y +mad.f32 r7.y, c4.z, r0.z, r4.w +mad.f32 r7.x, r5.z, r5.z, r7.x +rsq r4.w, (abs)r1.w +(ss)mul.f r1.y, r1.y, r4.w +mad.f32 r4.z, c17.y, r4.z, r5.y +(ss)rcp r1.w, r1.w +(ss)mul.f r1.w, r1.w, r2.w +mul.f r0.w, r7.y, r0.w +add.f r5.y, r1.y, c24.y +mul.f r7.z, r0.x, c5.x +min.f r4.w, r1.z, c24.y +min.f r2.w, r2.x, c24.y +mad.f32 r1.z, r5.y, r5.y, r7.x +mad.f32 r2.x, c17.z, r1.w, r4.z +mad.f32 r4.z, c5.y, r0.y, r7.z +min.f r1.w, r2.y, c24.y +mad.f32 r7.x, c2.w, r6.z, r2.z +mad.f32 r7.z, c2.z, r6.z, r4.x +mad.f32 r7.w, c1.y, r6.y, r4.y +rsq r1.z, (abs)r1.z +(ss)mov.f32f32 r2.y, r1.z +mov.f32f32 r2.z, r2.x +rcp r2.x, r2.x +mad.f32 r4.x, c5.z, r0.z, r4.z +(ss)mul.f r1.z, r5.y, r1.z +mul.f r4.y, r5.x, r2.y +mul.f r2.y, r5.z, r2.y +mad.f32 r0.w, r4.x, r1.x, r0.w +mul.f r0.x, r0.x, c6.x +mul.f r1.x, r7.y, r4.y +rcp r4.y, r2.z +mov.f32f32 r4.z, c25.y +mad.f32 r1.x, r4.x, r2.y, r1.x +mad.f32 r0.x, c6.y, r0.y, r0.x +mov.f32f32 r0.y, c25.y mad.f32 r0.x, c6.z, r0.z, r0.x -add.f r0.z, r1.z, c24.y -mad.f32 r1.x, c17.z, r1.x, r2.y -mad.f32 r0.y, c1.w, r3.y, r0.y -mad.f32 r2.y, c1.z, r3.y, r2.z -mad.f32 r2.z, r0.z, r0.z, r4.z -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, r0.x, r1.z, r0.w -mad.f32 r0.y, c2.w, r3.z, r0.y -mad.f32 r2.y, c2.z, r3.z, r2.y -mad.f32 r7.y, c1.y, r3.y, r4.x -mad.f32 r3.x, c1.x, r3.y, r3.x -rsq r1.z, (abs)r2.z -(ss)mov.f32f32 r1.z, r1.z -(ss)rcp r2.z, r1.x -mov.f32f32 r3.y, c25.y -mov.f32f32 r4.x, c25.y -rcp r5.y, r1.x -max.f r4.y, (neg)r0.w, c25.x -mul.f r4.z, r5.x, r1.z -(ss)mul.f r3.y, r3.y, r2.z -mov.f32f32 r5.x, c11.y -mov.f32f32 r5.z, c11.x -mul.f r1.y, r1.y, r4.z -mul.f r4.z, r7.x, r1.z -mov.f32f32 r7.x, c11.z -mad.f32 r5.x, c21.y, r3.y, r5.x -mad.f32 r5.z, c21.x, r3.y, r5.z -mad.f32 r1.y, r2.x, r4.z, r1.y -mad.f32 r2.x, c21.z, r3.y, r7.x -mov.f32f32 r3.y, r4.y -mul.f r2.z, r4.x, r2.z -mov.f32f32 r1.y, r1.y -mul.f r0.z, r0.z, r1.z -mul.f r1.z, r3.y, r5.y -mov.f32f32 r3.y, c8.z -mov.f32f32 r4.x, c8.y -mad.f32 r0.x, r0.x, r0.z, r1.y -mad.f32 r0.z, c22.z, r1.z, r2.x -mad.f32 r1.y, c22.y, r1.z, r5.x -mad.f32 r1.z, c22.x, r1.z, r5.z -max.f r2.x, (neg)r0.x, c25.x -max.f r0.x, r0.x, c25.x +(ss)mul.f r0.z, r4.z, r4.y +mov.f32f32 r2.y, c11.y +mov.f32f32 r4.x, c11.x +mad.f32 r1.x, r0.x, r1.z, r1.x +mov.f32f32 r1.z, c11.z +mad.f32 r2.y, c21.y, r0.z, r2.y +mad.f32 r4.x, c21.x, r0.z, r4.x +max.f r4.z, (neg)r1.x, c25.x +max.f r1.x, r1.x, c25.x +mad.f32 r0.z, c21.z, r0.z, r1.z +mad.f32 r0.x, r0.x, r1.y, r0.w +mul.f r0.y, r0.y, r4.y +mov.f32f32 r0.w, c8.y +mov.f32f32 r1.y, c8.x +log2 r1.z, r4.z +absneg.f r4.y, (neg)c10.x +log2 r1.x, r1.x +(ss)mov.f32f32 r4.z, c7.x +max.f r5.x, (neg)r0.x, c25.x +mov.f32f32 r5.y, c8.z +min.f r4.y, (neg)r4.y, c25.z +min.f r4.z, r4.z, c25.z +mul.f r5.x, r5.x, r2.x +mad.f32 r7.y, c18.z, r0.y, r5.y +(ss)mul.f r1.z, r4.y, r1.z +mul.f r1.x, r4.z, r1.x +mad.f32 r0.z, c22.z, r5.x, r0.z +mad.f32 r2.y, c22.y, r5.x, r2.y +mad.f32 r4.x, c22.x, r5.x, r4.x +max.f r8.x, r0.x, c25.x +mad.f32 r0.w, c18.y, r0.y, r0.w +exp2 r1.z, r1.z +cmps.f.lt r4.y, (neg)r0.x, c24.x +exp2 r1.x, r1.x +cmps.f.lt r0.x, (neg)r0.x, c24.x max.f r0.z, r0.z, c24.x -max.f r1.y, r1.y, c24.x -max.f r1.z, r1.z, c24.x -mad.f32 r3.y, c18.z, r2.z, r3.y -mad.f32 r5.x, c18.y, r2.z, r4.x -log2 r2.x, r2.x -(ss)mov.f32f32 r2.x, r2.x -absneg.f r4.x, (neg)c10.x -log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, c7.x +max.f r2.y, r2.y, c24.x +(ss)sel.b32 r1.z, r1.z, r4.y, c25.x +rcp r2.z, r2.z +sel.b32 r0.x, r1.x, r0.x, c25.x min.f r4.z, r0.z, c24.y -min.f r0.z, (neg)r4.x, c25.z -min.f r4.y, r1.y, c24.y -min.f r4.x, r1.z, c24.y -max.f r1.y, r0.w, c25.x -mul.f r0.z, r0.z, r2.x -min.f r1.z, r5.z, c25.z -mov.f32f32 r2.x, c8.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r1.z, r0.x -mad.f32 r1.z, c18.x, r2.z, r2.x -mul.f r1.y, r1.y, r5.y -rcp r2.x, r1.x -(ss)cmps.f.lt r1.x, (neg)r0.w, c24.x -cmps.f.lt r0.w, (neg)r0.w, c24.x -mov.f32f32 r0.x, r0.x -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mad.f32 r2.z, c19.z, r1.y, r3.y -mad.f32 r3.y, c19.y, r1.y, r5.x -mad.f32 r1.y, c19.x, r1.y, r1.z -sel.b32 r0.z, r0.z, r1.x, c25.x -max.f r1.x, r2.z, c24.x -max.f r2.z, r3.y, c24.x -max.f r3.y, r1.y, c24.x -mov.f32f32 r0.z, r0.z -exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -min.f r1.z, r1.x, c24.y -min.f r1.y, r2.z, c24.y -mov.f32f32 r0.z, r0.z -sel.b32 r0.x, r0.x, r0.w, c25.x -min.f r1.x, r3.y, c24.y -mad.f32 r0.w, c3.w, r3.w, r0.y -mul.f r0.y, r0.z, r2.x -mov.f32f32 r0.z, c24.x -mov.f32f32 r2.z, c24.x -mov.f32f32 r3.y, c24.x -mad.f32 r0.z, c23.z, r0.y, r0.z -mad.f32 r2.z, c23.y, r0.y, r2.z -mad.f32 r0.y, c23.x, r0.y, r3.y -mov.f32f32 r0.x, r0.x +min.f r4.y, r2.y, c24.y +(ss)mul.f r0.z, r1.z, r2.z +mov.f32f32 r1.x, c24.x +mov.f32f32 r1.z, c24.x +mov.f32f32 r2.y, c24.x +mad.f32 r1.x, c23.z, r0.z, r1.x +mad.f32 r1.z, c23.y, r0.z, r1.z +mad.f32 r0.z, c23.x, r0.z, r2.y +mul.f r0.x, r0.x, r2.z +max.f r1.x, r1.x, c24.x +max.f r1.z, r1.z, c24.x +max.f r0.z, r0.z, c24.x +mov.f32f32 r2.y, c24.x +min.f r5.z, r1.x, c24.y +min.f r5.y, r1.z, c24.y +min.f r5.x, r0.z, c24.y +mad.f32 r0.z, c20.z, r0.x, r2.y +mov.f32f32 r1.x, c24.x +mov.f32f32 r1.z, c24.x +mad.f32 r1.x, c20.y, r0.x, r1.x max.f r0.z, r0.z, c24.x -max.f r2.z, r2.z, c24.x -max.f r0.y, r0.y, c24.x -mov.f32f32 r0.x, r0.x -min.f r5.z, r0.z, c24.y -min.f r5.y, r2.z, c24.y -min.f r5.x, r0.y, c24.y -mul.f r0.x, r0.x, r2.x -mov.f32f32 r0.y, c24.x -mov.f32f32 r0.z, c24.x -mov.f32f32 r2.x, c24.x -mad.f32 r0.y, c20.z, r0.x, r0.y -mad.f32 r2.z, c20.y, r0.x, r0.z -mad.f32 r0.x, c20.x, r0.x, r2.x -mad.f32 r0.z, c3.z, r3.w, r2.y -max.f r0.y, r0.y, c24.x -max.f r2.x, r2.z, c24.x +mad.f32 r0.x, c20.x, r0.x, r1.z +max.f r1.z, r4.x, c24.x +mul.f r8.x, r8.x, r2.x +(ss)min.f r2.z, r0.z, c24.y +max.f r0.z, r1.x, c24.x max.f r0.x, r0.x, c24.x -nop -min.f r2.z, r0.y, c24.y -min.f r2.y, r2.x, c24.y +(rpt1)nop +min.f r2.y, r0.z, c24.y min.f r2.x, r0.x, c24.y -mad.f32 r0.x, c2.y, r3.z, r7.y -mad.f32 r3.x, c2.x, r3.z, r3.x -mad.f32 r0.y, c3.y, r3.w, r0.x -mad.f32 r0.x, c3.x, r3.w, r3.x -mov.f32f32 r3.w, r6.w -mov.f32f32 r3.z, r6.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r3.x, r6.x +min.f r4.x, r1.z, c24.y +mad.f32 r0.x, c19.z, r8.x, r7.y +mad.f32 r0.z, c19.y, r8.x, r0.w +mad.f32 r0.y, c18.x, r0.y, r1.y +mad.f32 r0.w, c3.w, r6.w, r7.x +max.f r0.x, r0.x, c24.x +max.f r1.x, r0.z, c24.x +mad.f32 r0.y, c19.x, r8.x, r0.y +mad.f32 r0.z, c3.z, r6.w, r7.z +min.f r1.z, r0.x, c24.y +min.f r1.y, r1.x, c24.y +max.f r0.x, r0.y, c24.x +mad.f32 r0.y, c2.y, r6.z, r7.w +mul.f r6.x, r6.x, c0.x +nop +min.f r1.x, r0.x, c24.y +mad.f32 r0.y, c3.y, r6.w, r0.y +mad.f32 r0.x, c1.x, r6.y, r6.x +nop +mad.f32 r0.x, c2.x, r6.z, r0.x +nop +mad.f32 r0.x, c3.x, r6.w, r0.x end nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (5:0) r4.x (2:0) r5.x (2:1) -; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r6.x (0:0,cm=f,il=16,b=0) -; VERT: 203 instructions, 0 half, 8 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) +; VERT: 176 instructions, 0 half, 9 full diff --git a/reference/maniadrive/maniadrive-17.asm b/reference/maniadrive/maniadrive-17.asm index 7d03d2c..5f47c68 100644 --- a/reference/maniadrive/maniadrive-17.asm +++ b/reference/maniadrive/maniadrive-17.asm @@ -27,207 +27,185 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c24.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c25.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.w, r5.x, c13.x -mov.f32f32 r1.x, c24.y +mul.f r1.x, r5.x, c13.y mad.f32 r0.w, c14.x, r5.y, r0.w -mov.f32f32 r1.y, c11.w +mad.f32 r1.x, c14.y, r5.y, r1.x mad.f32 r0.w, c15.x, r5.z, r0.w -mov.f32f32 r1.x, r1.x +mad.f32 r1.x, c15.y, r5.z, r1.x mad.f32 r0.w, c16.x, r5.w, r0.w -mov.f32f32 r1.y, r1.y +mad.f32 r1.x, c16.y, r5.w, r1.x +mul.f r1.y, r5.x, c13.z mov.f32f32 r1.z, c24.y -mov.f32f32 r1.w, c8.w add.f r0.w, c12.x, (neg)r0.w -max.f r1.x, r1.x, c24.x -max.f r1.y, r1.y, c24.x -mov.f32f32 r1.z, r1.z -mul.f r2.x, r0.w, r0.w -mul.f r2.y, r5.x, c13.y -min.f r4.w, r1.x, c24.y -mad.f32 r1.x, c14.y, r5.y, r2.y -min.f r3.w, r1.y, c24.y -mad.f32 r1.x, c15.y, r5.z, r1.x -max.f r1.y, r1.z, c24.x -mad.f32 r1.x, c16.y, r5.w, r1.x -mov.f32f32 r1.z, r1.w -mul.f r1.w, r5.x, c13.z -mul.f r2.y, r0.x, c4.x add.f r1.x, c12.y, (neg)r1.x -min.f r2.w, r1.y, c24.y -max.f r1.y, r1.z, c24.x -mad.f32 r1.z, c14.z, r5.y, r1.w -mad.f32 r1.w, r1.x, r1.x, r2.x -mad.f32 r1.z, c15.z, r5.z, r1.z -mad.f32 r2.x, c4.y, r0.y, r2.y -mul.f r2.y, r0.x, c5.x -mov.f32f32 r2.z, r1.w -mad.f32 r1.z, c16.z, r5.w, r1.z -min.f r1.w, r1.y, c24.y -mov.f32f32 r1.y, r2.x -mad.f32 r2.x, c5.y, r0.y, r2.y -add.f r1.z, c12.z, (neg)r1.z -mad.f32 r1.y, c4.z, r0.z, r1.y +mad.f32 r1.y, c14.z, r5.y, r1.y +max.f r1.z, r1.z, c24.x +mul.f r1.w, r0.w, r0.w +mad.f32 r1.y, c15.z, r5.z, r1.y +mad.f32 r1.w, r1.x, r1.x, r1.w +mad.f32 r1.y, c16.z, r5.w, r1.y +min.f r4.w, r1.z, c24.y +mov.f32f32 r1.z, c11.w +mov.f32f32 r2.x, c24.y +add.f r1.y, c12.z, (neg)r1.y +mov.f32f32 r2.y, c8.w +mul.f r2.z, r5.x, c0.w +mul.f r2.w, r5.x, c0.z +mad.f32 r1.w, r1.y, r1.y, r1.w +max.f r1.z, r1.z, c24.x +max.f r2.x, r2.x, c24.x +max.f r2.y, r2.y, c24.x +mad.f32 r2.z, c1.w, r5.y, r2.z +mad.f32 r3.x, c1.z, r5.y, r2.w +mul.f r3.y, r5.x, c0.y +rsq r1.w, (abs)r1.w +(ss)mul.f r0.w, r0.w, r1.w +rcp r2.w, r1.w +(ss)mov.f32f32 r3.z, r2.w +mul.f r3.w, r0.x, c4.x +rsq r4.x, (abs)r1.w +(ss)mul.f r1.x, r1.x, r4.x +add.f r4.x, r0.w, c24.x +mul.f r4.y, r1.w, r3.z +mad.f32 r3.w, c4.y, r0.y, r3.w +add.f r4.z, r1.x, c24.x +mul.f r6.x, r4.x, r4.x +mul.f r4.y, c17.x, r4.y +mad.f32 r6.y, c4.z, r0.z, r3.w +mad.f32 r6.x, r4.z, r4.z, r6.x +rsq r3.w, (abs)r1.w +(ss)mul.f r1.y, r1.y, r3.w +mad.f32 r3.z, c17.y, r3.z, r4.y +(ss)rcp r1.w, r1.w +(ss)mul.f r1.w, r1.w, r2.w +mul.f r0.w, r6.y, r0.w +add.f r4.y, r1.y, c24.y +mul.f r6.z, r0.x, c5.x +min.f r3.w, r1.z, c24.y +min.f r2.w, r2.x, c24.y +mad.f32 r1.z, r4.y, r4.y, r6.x +mad.f32 r2.x, c17.z, r1.w, r3.z +mad.f32 r3.z, c5.y, r0.y, r6.z +min.f r1.w, r2.y, c24.y +mad.f32 r6.x, c2.w, r5.z, r2.z +mad.f32 r6.z, c2.z, r5.z, r3.x +mad.f32 r6.w, c1.y, r5.y, r3.y +rsq r1.z, (abs)r1.z +(ss)mov.f32f32 r2.y, r1.z +mov.f32f32 r2.z, r2.x +rcp r2.x, r2.x +mad.f32 r3.x, c5.z, r0.z, r3.z +(ss)mul.f r1.z, r4.y, r1.z +mul.f r3.y, r4.x, r2.y +mul.f r2.y, r4.z, r2.y +mad.f32 r0.w, r3.x, r1.x, r0.w mul.f r0.x, r0.x, c6.x -mov.f32f32 r2.x, r2.x -mad.f32 r2.y, r1.z, r1.z, r2.z -mad.f32 r2.x, c5.z, r0.z, r2.x +mul.f r1.x, r6.y, r3.y +rcp r3.y, r2.z +mov.f32f32 r3.z, c25.y +mad.f32 r1.x, r3.x, r2.y, r1.x mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r5.x, c0.w -mul.f r2.z, r5.x, c0.z -mul.f r3.x, r5.x, c0.y -mul.f r3.y, r5.x, c0.x -rsq r2.y, (abs)r2.y -(ss)mul.f r0.w, r0.w, r2.y -rcp r3.z, r2.y -(ss)mov.f32f32 r3.z, r3.z -rcp r4.x, r2.y -(ss)mov.f32f32 r4.x, r4.x -rsq r4.y, (abs)r2.y -(ss)mul.f r1.x, r1.x, r4.y -add.f r4.y, r0.w, c24.x -mul.f r4.z, r2.y, r3.z -mul.f r0.w, r1.y, r0.w -mul.f r4.x, r4.x, r3.z -mul.f r5.x, r4.y, r4.y -add.f r6.x, r1.x, c24.x -mov.f32f32 r4.z, r4.z -mad.f32 r0.w, r2.x, r1.x, r0.w -mov.f32f32 r1.x, r4.x -mad.f32 r4.x, r6.x, r6.x, r5.x -mul.f r4.z, c17.x, r4.z -mov.f32f32 r0.w, r0.w -(ss)rsq r2.y, (abs)r2.y -mad.f32 r3.z, c17.y, r3.z, r4.z -mov.f32f32 r4.x, r4.x -(ss)mul.f r1.z, r1.z, r2.y -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r2.y, r3.z +mov.f32f32 r0.y, c25.y mad.f32 r0.x, c6.z, r0.z, r0.x -add.f r0.z, r1.z, c24.y -mad.f32 r1.x, c17.z, r1.x, r2.y -mad.f32 r0.y, c1.w, r5.y, r0.y -mad.f32 r2.y, c1.z, r5.y, r2.z -mad.f32 r2.z, r0.z, r0.z, r4.x -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, r0.x, r1.z, r0.w -mad.f32 r0.y, c2.w, r5.z, r0.y -mad.f32 r2.y, c2.z, r5.z, r2.y -mad.f32 r5.x, c1.y, r5.y, r3.x -mad.f32 r5.y, c1.x, r5.y, r3.y -rsq r1.z, (abs)r2.z -(ss)mov.f32f32 r1.z, r1.z -(ss)rcp r2.z, r1.x -mov.f32f32 r3.x, c25.y -mov.f32f32 r3.y, c25.y -rcp r4.x, r1.x -max.f r3.z, (neg)r0.w, c25.x -mul.f r4.y, r4.y, r1.z -(ss)mul.f r3.x, r3.x, r2.z -mov.f32f32 r4.z, c11.y -mov.f32f32 r6.y, c11.x -mul.f r1.y, r1.y, r4.y -mul.f r4.y, r6.x, r1.z -mov.f32f32 r6.x, c11.z -mad.f32 r4.z, c21.y, r3.x, r4.z -mad.f32 r6.y, c21.x, r3.x, r6.y -mad.f32 r1.y, r2.x, r4.y, r1.y -mad.f32 r2.x, c21.z, r3.x, r6.x -mov.f32f32 r3.x, r3.z -mul.f r2.z, r3.y, r2.z -mov.f32f32 r1.y, r1.y -mul.f r0.z, r0.z, r1.z -mul.f r1.z, r3.x, r4.x -mov.f32f32 r3.x, c8.z -mov.f32f32 r3.y, c8.y -mad.f32 r0.x, r0.x, r0.z, r1.y -mad.f32 r0.z, c22.z, r1.z, r2.x -mad.f32 r1.y, c22.y, r1.z, r4.z -mad.f32 r1.z, c22.x, r1.z, r6.y -max.f r2.x, (neg)r0.x, c25.x -max.f r0.x, r0.x, c25.x +(ss)mul.f r0.z, r3.z, r3.y +mov.f32f32 r2.y, c11.y +mov.f32f32 r3.x, c11.x +mad.f32 r1.x, r0.x, r1.z, r1.x +mov.f32f32 r1.z, c11.z +mad.f32 r2.y, c21.y, r0.z, r2.y +mad.f32 r3.x, c21.x, r0.z, r3.x +max.f r3.z, (neg)r1.x, c25.x +max.f r1.x, r1.x, c25.x +mad.f32 r0.z, c21.z, r0.z, r1.z +mad.f32 r0.x, r0.x, r1.y, r0.w +mul.f r0.y, r0.y, r3.y +mov.f32f32 r0.w, c8.y +mov.f32f32 r1.y, c8.x +log2 r1.z, r3.z +absneg.f r3.y, (neg)c10.x +log2 r1.x, r1.x +(ss)mov.f32f32 r3.z, c7.x +max.f r4.x, (neg)r0.x, c25.x +mov.f32f32 r4.y, c8.z +min.f r3.y, (neg)r3.y, c25.z +min.f r3.z, r3.z, c25.z +mul.f r4.x, r4.x, r2.x +mad.f32 r6.y, c18.z, r0.y, r4.y +(ss)mul.f r1.z, r3.y, r1.z +mul.f r1.x, r3.z, r1.x +mad.f32 r0.z, c22.z, r4.x, r0.z +mad.f32 r2.y, c22.y, r4.x, r2.y +mad.f32 r3.x, c22.x, r4.x, r3.x +max.f r7.x, r0.x, c25.x +mad.f32 r0.w, c18.y, r0.y, r0.w +exp2 r1.z, r1.z +cmps.f.lt r3.y, (neg)r0.x, c24.x +exp2 r1.x, r1.x +cmps.f.lt r0.x, (neg)r0.x, c24.x max.f r0.z, r0.z, c24.x -max.f r1.y, r1.y, c24.x -max.f r1.z, r1.z, c24.x -mad.f32 r4.y, c18.z, r2.z, r3.x -mad.f32 r4.z, c18.y, r2.z, r3.y -log2 r2.x, r2.x -(ss)mov.f32f32 r2.x, r2.x -absneg.f r3.x, (neg)c10.x -log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r6.x, c7.x +max.f r2.y, r2.y, c24.x +(ss)sel.b32 r1.z, r1.z, r3.y, c25.x +rcp r2.z, r2.z +sel.b32 r0.x, r1.x, r0.x, c25.x min.f r3.z, r0.z, c24.y -min.f r0.z, (neg)r3.x, c25.z -min.f r3.y, r1.y, c24.y -min.f r3.x, r1.z, c24.y -max.f r1.y, r0.w, c25.x -mul.f r0.z, r0.z, r2.x -min.f r1.z, r6.x, c25.z -mov.f32f32 r2.x, c8.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.z, r0.z -mul.f r0.x, r1.z, r0.x -mad.f32 r1.z, c18.x, r2.z, r2.x -mul.f r1.y, r1.y, r4.x -rcp r2.x, r1.x -(ss)cmps.f.lt r1.x, (neg)r0.w, c24.x -cmps.f.lt r0.w, (neg)r0.w, c24.x -mov.f32f32 r0.x, r0.x -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mad.f32 r2.z, c19.z, r1.y, r4.y -mad.f32 r4.x, c19.y, r1.y, r4.z -mad.f32 r1.y, c19.x, r1.y, r1.z -sel.b32 r0.z, r0.z, r1.x, c25.x -max.f r1.x, r2.z, c24.x -max.f r2.z, r4.x, c24.x -max.f r4.x, r1.y, c24.x -mov.f32f32 r0.z, r0.z -exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -min.f r1.z, r1.x, c24.y -min.f r1.y, r2.z, c24.y -mov.f32f32 r0.z, r0.z -sel.b32 r0.x, r0.x, r0.w, c25.x -min.f r1.x, r4.x, c24.y -mad.f32 r0.w, c3.w, r5.w, r0.y -mul.f r0.y, r0.z, r2.x -mov.f32f32 r0.z, c24.x -mov.f32f32 r2.z, c24.x -mov.f32f32 r4.x, c24.x -mad.f32 r0.z, c23.z, r0.y, r0.z -mad.f32 r2.z, c23.y, r0.y, r2.z -mad.f32 r0.y, c23.x, r0.y, r4.x -mov.f32f32 r0.x, r0.x +min.f r3.y, r2.y, c24.y +(ss)mul.f r0.z, r1.z, r2.z +mov.f32f32 r1.x, c24.x +mov.f32f32 r1.z, c24.x +mov.f32f32 r2.y, c24.x +mad.f32 r1.x, c23.z, r0.z, r1.x +mad.f32 r1.z, c23.y, r0.z, r1.z +mad.f32 r0.z, c23.x, r0.z, r2.y +mul.f r0.x, r0.x, r2.z +max.f r1.x, r1.x, c24.x +max.f r1.z, r1.z, c24.x +max.f r0.z, r0.z, c24.x +mov.f32f32 r2.y, c24.x +min.f r4.z, r1.x, c24.y +min.f r4.y, r1.z, c24.y +min.f r4.x, r0.z, c24.y +mad.f32 r0.z, c20.z, r0.x, r2.y +mov.f32f32 r1.x, c24.x +mov.f32f32 r1.z, c24.x +mad.f32 r1.x, c20.y, r0.x, r1.x max.f r0.z, r0.z, c24.x -max.f r2.z, r2.z, c24.x -max.f r0.y, r0.y, c24.x -mov.f32f32 r0.x, r0.x -min.f r4.z, r0.z, c24.y -min.f r4.y, r2.z, c24.y -min.f r4.x, r0.y, c24.y -mul.f r0.x, r0.x, r2.x -mov.f32f32 r0.y, c24.x -mov.f32f32 r0.z, c24.x -mov.f32f32 r2.x, c24.x -mad.f32 r0.y, c20.z, r0.x, r0.y -mad.f32 r2.z, c20.y, r0.x, r0.z -mad.f32 r0.x, c20.x, r0.x, r2.x -mad.f32 r0.z, c3.z, r5.w, r2.y -max.f r0.y, r0.y, c24.x -max.f r2.x, r2.z, c24.x +mad.f32 r0.x, c20.x, r0.x, r1.z +max.f r1.z, r3.x, c24.x +mul.f r7.x, r7.x, r2.x +(ss)min.f r2.z, r0.z, c24.y +max.f r0.z, r1.x, c24.x max.f r0.x, r0.x, c24.x -nop -min.f r2.z, r0.y, c24.y -min.f r2.y, r2.x, c24.y +(rpt1)nop +min.f r2.y, r0.z, c24.y min.f r2.x, r0.x, c24.y -mad.f32 r0.x, c2.y, r5.z, r5.x -mad.f32 r5.x, c2.x, r5.z, r5.y -mad.f32 r0.y, c3.y, r5.w, r0.x -mad.f32 r0.x, c3.x, r5.w, r5.x +min.f r3.x, r1.z, c24.y +mad.f32 r0.x, c19.z, r7.x, r6.y +mad.f32 r0.z, c19.y, r7.x, r0.w +mad.f32 r0.y, c18.x, r0.y, r1.y +mad.f32 r0.w, c3.w, r5.w, r6.x +max.f r0.x, r0.x, c24.x +max.f r1.x, r0.z, c24.x +mad.f32 r0.y, c19.x, r7.x, r0.y +mad.f32 r0.z, c3.z, r5.w, r6.z +min.f r1.z, r0.x, c24.y +min.f r1.y, r1.x, c24.y +max.f r0.x, r0.y, c24.x +mad.f32 r0.y, c2.y, r5.z, r6.w +mul.f r5.x, r5.x, c0.x +nop +min.f r1.x, r0.x, c24.y +mad.f32 r0.y, c3.y, r5.w, r0.y +mad.f32 r0.x, c1.x, r5.y, r5.x +nop +mad.f32 r0.x, c2.x, r5.z, r0.x +nop +mad.f32 r0.x, c3.x, r5.w, r0.x end nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (2:0) r4.x (2:1) ; VERT: inputs: r5.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) -; VERT: 199 instructions, 0 half, 7 full +; VERT: 176 instructions, 0 half, 8 full diff --git a/reference/maniadrive/maniadrive-18.asm b/reference/maniadrive/maniadrive-18.asm index 94e8c0b..2f37b32 100644 --- a/reference/maniadrive/maniadrive-18.asm +++ b/reference/maniadrive/maniadrive-18.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 -@in(r3.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r6.w) in11 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 +@in(r7.x) in4 +@in(r7.y) in5 +@in(r7.z) in6 +@in(r7.w) in7 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r3.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -36,235 +36,205 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)mul.f r1.x, r3.x, c15.x -mov.f32f32 r1.y, c24.y -mad.f32 r1.x, c16.x, r3.y, r1.x -mov.f32f32 r1.z, r0.w -mad.f32 r1.x, c17.x, r3.z, r1.x -mov.f32f32 r1.y, r1.y -mad.f32 r1.x, c18.x, r3.w, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, c24.y -mov.f32f32 r0.w, r0.w -add.f r1.x, c14.x, (neg)r1.x -max.f r1.y, r1.y, c24.x -max.f r1.z, r1.z, c24.x -mov.f32f32 r1.w, r1.w -mul.f r2.x, r1.x, r1.x -mul.f r2.y, r3.x, c15.y -min.f r5.w, r1.y, c24.y -mad.f32 r1.y, c16.y, r3.y, r2.y -min.f r4.w, r1.z, c24.y -mad.f32 r1.y, c17.y, r3.z, r1.y -max.f r1.z, r1.w, c24.x -mad.f32 r1.y, c18.y, r3.w, r1.y -mov.f32f32 r0.w, r0.w +@const(c24.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c25.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.x, r6.x, c15.x +mul.f r0.y, r6.x, c15.y +mad.f32 r0.x, c16.x, r6.y, r0.x +mad.f32 r0.y, c16.y, r6.y, r0.y +mad.f32 r0.x, c17.x, r6.z, r0.x +mad.f32 r0.y, c17.y, r6.z, r0.y +mad.f32 r0.x, c18.x, r6.w, r0.x +mad.f32 r0.y, c18.y, r6.w, r0.y +mul.f r0.z, r6.x, c15.z +mov.f32f32 r0.w, c24.y +add.f r0.x, c14.x, (neg)r0.x +add.f r0.y, c14.y, (neg)r0.y +mad.f32 r0.z, c16.z, r6.y, r0.z +max.f r0.w, r0.w, c24.x +mul.f r1.x, r0.x, r0.x +mad.f32 r0.z, c17.z, r6.z, r0.z +mad.f32 r1.x, r0.y, r0.y, r1.x +mad.f32 r0.z, c18.z, r6.w, r0.z +min.f r5.w, r0.w, c24.y +mov.f32f32 r0.w, c24.y +mov.f32f32 r1.y, c4.x +add.f r0.z, c14.z, (neg)r0.z +mov.f32f32 r1.z, c4.x mov.f32f32 r1.w, c4.x -mov.f32f32 r2.y, c4.x -add.f r1.y, c14.y, (neg)r1.y -min.f r2.w, r1.z, c24.y +mul.f r1.y, r1.y, c5.x +mad.f32 r1.x, r0.z, r0.z, r1.x max.f r0.w, r0.w, c24.x -mul.f r1.z, r1.w, c5.x -mad.f32 r1.w, r1.y, r1.y, r2.x mov.f32f32 r2.x, c5.y -mul.f r2.y, r2.y, c6.x -mov.f32f32 r2.z, c4.x -mov.f32f32 r4.x, r1.w -mul.f r4.y, r3.x, c15.z -min.f r1.w, r0.w, c24.y -mad.f32 r0.w, c16.z, r3.y, r4.y -mad.f32 r1.z, c4.y, r2.x, r1.z -mad.f32 r0.w, c17.z, r3.z, r0.w -mov.f32f32 r2.x, c6.y -mad.f32 r0.w, c18.z, r3.w, r0.w -mov.f32f32 r1.z, r1.z -mul.f r2.z, r2.z, c7.x -mad.f32 r2.x, c4.y, r2.x, r2.y -add.f r0.w, c14.z, (neg)r0.w -mov.f32f32 r2.y, c5.z -mov.f32f32 r4.y, c7.y -mov.f32f32 r2.x, r2.x -mad.f32 r4.x, r0.w, r0.w, r4.x -mad.f32 r1.z, c4.z, r2.y, r1.z -mov.f32f32 r2.y, c6.z -mad.f32 r2.z, c4.y, r4.y, r2.z -mul.f r4.y, r3.x, c0.w -mul.f r4.z, r3.x, c0.z -mul.f r5.x, r3.x, c0.y -rsq r4.x, (abs)r4.x -(ss)mul.f r1.x, r1.x, r4.x -rcp r5.y, r4.x -(ss)mov.f32f32 r5.y, r5.y -rcp r5.z, r4.x -(ss)mov.f32f32 r5.z, r5.z -rsq r7.x, (abs)r4.x -(ss)mul.f r1.y, r1.y, r7.x -add.f r7.x, r1.x, c24.x -mul.f r7.y, r4.x, r5.y -mul.f r1.x, r1.z, r1.x -mad.f32 r2.x, c4.z, r2.y, r2.x -mul.f r2.y, r7.x, r7.x -add.f r7.z, r1.y, c24.x -mov.f32f32 r7.y, r7.y -mad.f32 r1.x, r2.x, r1.y, r1.x -mul.f r1.y, r5.z, r5.y -mad.f32 r2.y, r7.z, r7.z, r2.y -mul.f r5.z, c19.x, r7.y -mov.f32f32 r1.x, r1.x -(ss)rsq r4.x, (abs)r4.x -mad.f32 r5.y, c19.y, r5.y, r5.z -mov.f32f32 r2.y, r2.y -(ss)mul.f r0.w, r0.w, r4.x -mov.f32f32 r2.z, r2.z -(ss)mov.f32f32 r4.x, c7.z -mov.f32f32 r5.y, r5.y -add.f r5.z, r0.w, c24.y -mov.f32f32 r1.y, r1.y -mad.f32 r2.z, c4.z, r4.x, r2.z -mad.f32 r4.x, c1.w, r3.y, r4.y -mad.f32 r2.y, r5.z, r5.z, r2.y -mad.f32 r1.y, c19.z, r1.y, r5.y -mad.f32 r0.w, r2.z, r0.w, r1.x -mad.f32 r1.x, c2.w, r3.z, r4.x -mad.f32 r7.y, c1.z, r3.y, r4.z -mad.f32 r7.w, c1.y, r3.y, r5.x -mul.f r3.x, r3.x, c0.x -rsq r2.y, (abs)r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r1.y, r1.y -max.f r4.x, (neg)r0.w, c25.x -max.f r4.y, r0.w, c25.x -mul.f r4.z, r7.x, r2.y -mul.f r5.x, r7.z, r2.y -mul.f r2.y, r5.z, r2.y -mov.f32f32 r4.x, r4.x -mul.f r1.z, r1.z, r4.z -rcp r4.z, r1.y +mul.f r1.z, r1.z, c6.x +mul.f r1.w, r1.w, c7.x +mov.f32f32 r2.y, c6.y +mov.f32f32 r2.z, c7.y +rsq r1.x, (abs)r1.x +(ss)mul.f r0.x, r0.x, r1.x +rcp r2.w, r1.x +(ss)mov.f32f32 r4.x, r2.w +mad.f32 r1.y, c4.y, r2.x, r1.y +mov.f32f32 r2.x, c5.z +add.f r4.y, r0.x, c24.x +mul.f r4.z, r1.x, r4.x +rsq r4.w, (abs)r1.x +nop +rsq r5.x, (abs)r1.x +mad.f32 r1.y, c4.z, r2.x, r1.y +mul.f r2.x, r4.y, r4.y +(ss)mul.f r0.y, r0.y, r4.w +mul.f r4.z, c19.x, r4.z +mul.f r0.x, r1.y, r0.x +mad.f32 r1.z, c4.y, r2.y, r1.z +add.f r2.y, r0.y, c24.x +mad.f32 r4.x, c19.y, r4.x, r4.z +(ss)rcp r1.x, r1.x +(ss)mul.f r1.x, r1.x, r2.w +mov.f32f32 r2.w, c6.z +mad.f32 r2.x, r2.y, r2.y, r2.x +mul.f r0.z, r0.z, r5.x +mad.f32 r1.x, c19.z, r1.x, r4.x +mad.f32 r1.z, c4.z, r2.w, r1.z +min.f r2.w, r0.w, c24.y +add.f r0.w, r0.z, c24.y +mov.f32f32 r4.x, r1.x +mad.f32 r0.x, r1.z, r0.y, r0.x +mad.f32 r0.y, c4.y, r2.z, r1.w +mad.f32 r1.w, r0.w, r0.w, r2.x +rcp r1.x, r1.x +mov.f32f32 r2.x, c7.z +mul.f r4.w, r6.x, c0.w +mul.f r8.x, r6.x, c0.z +mul.f r8.y, r6.x, c0.y +mul.f r6.x, r6.x, c0.x +rcp r2.z, r4.x +mov.f32f32 r4.z, c25.y +rsq r1.w, (abs)r1.w +(ss)mov.f32f32 r5.x, r1.w mov.f32f32 r5.y, c25.y -mad.f32 r1.z, r2.x, r5.x, r1.z -mov.f32f32 r2.x, c25.y -rcp r5.x, r1.y -mov.f32f32 r5.z, r4.y -(ss)mul.f r4.y, r5.y, r4.z -mov.f32f32 r1.z, r1.z -mul.f r5.y, c20.z, r0.z -mad.f32 r1.z, r2.z, r2.y, r1.z -mad.f32 r2.y, c9.z, r0.z, c13.z -mul.f r2.z, c20.y, r0.y -mul.f r7.x, c20.x, r0.x -max.f r7.z, (neg)r1.z, c25.x -max.f r1.z, r1.z, c25.x -mad.f32 r2.y, r4.y, r5.y, r2.y -mad.f32 r5.y, c9.y, r0.y, c13.y -mad.f32 r8.x, c9.x, r0.x, c13.x -mul.f r8.y, r2.x, r4.z -mul.f r2.x, c20.y, r0.y -log2 r4.z, r7.z -(ss)mov.f32f32 r4.z, r4.z -(ss)absneg.f r7.z, (neg)c12.x -log2 r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r8.z, c8.x -mul.f r4.x, r4.x, r5.x -min.f r7.z, (neg)r7.z, c25.z -mul.f r8.w, c21.z, r0.z -mad.f32 r2.z, r4.y, r2.z, r5.y -mad.f32 r4.y, r4.y, r7.x, r8.x -mul.f r4.z, r7.z, r4.z -min.f r5.y, r8.z, c25.z -mad.f32 r2.y, r4.x, r8.w, r2.y -mul.f r7.x, c21.y, r0.y -mov.f32f32 r4.z, r4.z -mul.f r1.z, r5.y, r1.z -max.f r2.y, r2.y, c24.x -mad.f32 r2.z, r4.x, r7.x, r2.z -mul.f r5.y, c21.x, r0.x -mul.f r7.x, c20.z, r0.z -mad.f32 r7.z, c9.y, r0.y, c10.y -exp2 r4.z, r4.z -(ss)mov.f32f32 r8.x, r4.z -cmps.f.lt r8.z, (neg)r0.w, c24.x -mov.f32f32 r1.z, r1.z -(ss)min.f r4.z, r2.y, c24.y -max.f r2.y, r2.z, c24.x -sel.b32 r2.z, r8.x, r8.z, c25.x -mad.f32 r4.x, r4.x, r5.y, r4.y -mad.f32 r5.y, c9.z, r0.z, c10.z -mad.f32 r7.z, r8.y, r2.x, r7.z -mov.f32f32 r2.x, r2.z -exp2 r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -cmps.f.lt r0.w, (neg)r0.w, c24.x -min.f r4.y, r2.y, c24.y -mov.f32f32 r2.x, r2.x -rcp r1.y, r1.y -max.f r2.y, r4.x, c24.x -mad.f32 r2.z, r8.y, r7.x, r5.y -mul.f r7.x, r5.z, r5.x -(ss)mul.f r2.x, r2.x, r1.y -mov.f32f32 r4.x, c24.x -mov.f32f32 r5.x, c24.x -mov.f32f32 r5.y, c24.x -mad.f32 r4.x, c23.z, r2.x, r4.x -mad.f32 r5.x, c23.y, r2.x, r5.x -mad.f32 r2.x, c23.x, r2.x, r5.y -sel.b32 r0.w, r1.z, r0.w, c25.x -max.f r1.z, r4.x, c24.x -max.f r4.x, r5.x, c24.x +(ss)rcp r4.x, r4.x +mad.f32 r0.y, c4.z, r2.x, r0.y +mul.f r2.x, r4.z, r2.z +mul.f r4.y, r4.y, r5.x +mul.f r4.z, c20.z, r7.z +mul.f r5.z, c20.y, r7.y +mul.f r8.z, c20.x, r7.x +mul.f r1.y, r1.y, r4.y +mul.f r2.y, r2.y, r5.x +mad.f32 r4.y, c9.z, r7.z, c13.z +mad.f32 r5.x, c9.y, r7.y, c13.y +mad.f32 r8.w, c9.x, r7.x, c13.x +mad.f32 r1.y, r1.z, r2.y, r1.y +mul.f r0.w, r0.w, r1.w +mad.f32 r1.z, r2.x, r4.z, r4.y +mad.f32 r1.w, r2.x, r5.z, r5.x +mad.f32 r2.x, r2.x, r8.z, r8.w +mad.f32 r0.w, r0.y, r0.w, r1.y +mad.f32 r0.x, r0.y, r0.z, r0.x +mul.f r0.y, r5.y, r2.z +mul.f r0.z, c20.y, r7.y +max.f r1.y, (neg)r0.w, c25.x +max.f r0.w, r0.w, c25.x +max.f r2.y, (neg)r0.x, c25.x +mul.f r2.z, c20.z, r7.z +mad.f32 r4.y, c9.y, r7.y, c10.y +mul.f r8.z, c20.x, r7.x +mul.f r2.y, r2.y, r1.x +log2 r1.y, r1.y +absneg.f r4.z, (neg)c12.x +log2 r0.w, r0.w +mov.f32f32 r5.x, c8.x +mul.f r5.y, c21.z, r7.z +mul.f r5.z, c21.y, r7.y +min.f r4.z, (neg)r4.z, c25.z +min.f r5.x, r5.x, c25.z +mad.f32 r1.z, r2.y, r5.y, r1.z +mad.f32 r1.w, r2.y, r5.z, r1.w +(ss)mul.f r1.y, r4.z, r1.y +mul.f r0.w, r5.x, r0.w +max.f r1.z, r1.z, c24.x +max.f r1.w, r1.w, c24.x +mul.f r5.x, c21.x, r7.x +mad.f32 r5.y, c9.z, r7.z, c10.z +mad.f32 r0.z, r0.y, r0.z, r4.y +exp2 r1.y, r1.y +cmps.f.lt r5.z, (neg)r0.x, c24.x +exp2 r0.w, r0.w +cmps.f.lt r8.w, (neg)r0.x, c24.x +min.f r4.z, r1.z, c24.y +min.f r4.y, r1.w, c24.y +(ss)sel.b32 r1.y, r1.y, r5.z, c25.x +sel.b32 r0.w, r0.w, r8.w, c25.x +mad.f32 r1.z, r2.y, r5.x, r2.x +mad.f32 r1.w, r0.y, r2.z, r5.y +mul.f r1.y, r1.y, r4.x +mov.f32f32 r2.x, c24.x +mov.f32f32 r2.y, c24.x +mov.f32f32 r2.z, c24.x +mad.f32 r2.x, c23.z, r1.y, r2.x +mad.f32 r2.y, c23.y, r1.y, r2.y +mad.f32 r1.y, c23.x, r1.y, r2.z +mul.f r0.w, r0.w, r4.x max.f r2.x, r2.x, c24.x -mov.f32f32 r0.w, r0.w -min.f r5.z, r1.z, c24.y -min.f r5.y, r4.x, c24.y -min.f r5.x, r2.x, c24.y -mov.f32f32 r0.w, r0.w -min.f r4.x, r2.y, c24.y -mul.f r0.z, c21.z, r0.z -mul.f r0.y, c21.y, r0.y -mul.f r0.w, r0.w, r1.y -(ss)mov.f32f32 r1.y, c24.x -mov.f32f32 r1.z, c24.x +max.f r2.y, r2.y, c24.x +max.f r1.y, r1.y, c24.x +mov.f32f32 r2.z, c24.x +min.f r5.z, r2.x, c24.y +min.f r5.y, r2.y, c24.y +min.f r5.x, r1.y, c24.y +mad.f32 r1.y, c22.z, r0.w, r2.z mov.f32f32 r2.x, c24.x -mad.f32 r1.y, c22.z, r0.w, r1.y -mad.f32 r1.z, c22.y, r0.w, r1.z -mad.f32 r0.w, c22.x, r0.w, r2.x -mad.f32 r0.z, r7.x, r0.z, r2.z +mov.f32f32 r2.y, c24.x +mad.f32 r2.x, c22.y, r0.w, r2.x max.f r1.y, r1.y, c24.x +mad.f32 r0.w, c22.x, r0.w, r2.y max.f r1.z, r1.z, c24.x -max.f r0.w, r0.w, c24.x -nop +max.f r0.x, r0.x, c25.x min.f r2.z, r1.y, c24.y -min.f r2.y, r1.z, c24.y +max.f r1.y, r2.x, c24.x +max.f r0.w, r0.w, c24.x +(rpt1)nop +min.f r2.y, r1.y, c24.y min.f r2.x, r0.w, c24.y +min.f r4.x, r1.z, c24.y +mul.f r0.x, r0.x, r1.x +mul.f r0.w, c21.z, r7.z +mul.f r1.x, c21.y, r7.y +mad.f32 r1.y, c9.x, r7.x, c10.x +mad.f32 r1.z, c1.w, r6.y, r4.w +mad.f32 r0.w, r0.x, r0.w, r1.w +mad.f32 r0.z, r0.x, r1.x, r0.z +mad.f32 r0.y, r0.y, r8.z, r1.y +mul.f r1.x, c21.x, r7.x +max.f r0.w, r0.w, c24.x max.f r0.z, r0.z, c24.x -mad.f32 r0.y, r7.x, r0.y, r7.z -mul.f r0.w, c20.x, r0.x -mad.f32 r1.y, c9.x, r0.x, c10.x -min.f r1.z, r0.z, c24.y -max.f r0.y, r0.y, c24.x -mad.f32 r0.z, r8.y, r0.w, r1.y -mul.f r0.x, c21.x, r0.x -mad.f32 r0.w, c3.w, r3.w, r1.x -min.f r1.y, r0.y, c24.y -mad.f32 r0.y, c2.z, r3.z, r7.y -mad.f32 r0.x, r7.x, r0.x, r0.z -mad.f32 r0.z, c3.z, r3.w, r0.y -mad.f32 r0.y, c2.y, r3.z, r7.w -mad.f32 r1.x, c1.x, r3.y, r3.x +mad.f32 r1.w, c2.w, r6.z, r1.z +mad.f32 r4.w, c1.z, r6.y, r8.x +min.f r1.z, r0.w, c24.y +min.f r1.y, r0.z, c24.y +mad.f32 r0.x, r0.x, r1.x, r0.y +mad.f32 r0.w, c3.w, r6.w, r1.w +mad.f32 r0.y, c2.z, r6.z, r4.w +mad.f32 r1.x, c1.y, r6.y, r8.y max.f r0.x, r0.x, c24.x -mad.f32 r0.y, c3.y, r3.w, r0.y -mad.f32 r3.x, c2.x, r3.z, r1.x -nop +mad.f32 r0.z, c3.z, r6.w, r0.y +mad.f32 r0.y, c2.y, r6.z, r1.x +mad.f32 r1.w, c1.x, r6.y, r6.x min.f r1.x, r0.x, c24.y -mad.f32 r0.x, c3.x, r3.w, r3.x -mov.f32f32 r3.w, r6.w -mov.f32f32 r3.z, r6.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r3.x, r6.x -end +mad.f32 r0.y, c3.y, r6.w, r0.y +mad.f32 r0.x, c2.x, r6.z, r1.w +max.f r1.w, r7.w, c24.x +mad.f32 r0.x, c3.x, r6.w, r0.x +max.f r6.x, r7.w, c24.x nop +min.f r4.w, r1.w, c24.y +nop +min.f r1.w, r6.x, c24.y +end nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (5:0) r4.x (2:0) r5.x (2:1) -; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=f,il=16,b=0) -; VERT: 225 instructions, 0 half, 9 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r7.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) +; VERT: 195 instructions, 0 half, 9 full diff --git a/reference/maniadrive/maniadrive-19.asm b/reference/maniadrive/maniadrive-19.asm index 4a651b1..16ecc3c 100644 --- a/reference/maniadrive/maniadrive-19.asm +++ b/reference/maniadrive/maniadrive-19.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r2.z, r1.y -mul.f r0.w, r2.y, r1.z -mul.f r0.z, r2.x, r0.z -mul.f r0.x, r1.w, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.x, r1.y +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 27 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/multi-kill.asm b/reference/multi-kill.asm index 8667b58..50cbc8c 100644 --- a/reference/multi-kill.asm +++ b/reference/multi-kill.asm @@ -6,63 +6,44 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 3, r0.x bary.f r0.w, 2, r0.x bary.f r2.z, 1, r0.x bary.f (ei)r0.x, 0, r0.x mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r2.z -mov.f32f32 r1.x, r0.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.w, r0.w -mov.f32f32 r3.x, r1.x -add.f r1.y, r1.y, r1.z -cmps.f.ne r1.z, r0.x, c0.x -mov.f32f32 r3.y, r0.z -mov.f32f32 r3.z, r0.w -mov.f32f32 r3.w, r1.x -sel.b32 r0.y, r1.y, r1.z, r0.y -add.f r1.y, r1.w, r3.y -add.f r1.w, r2.w, r3.z -add.f r2.w, r3.x, r3.w -mov.f32f32 r3.x, r0.y -mov.f32f32 r3.y, r0.y -sel.b32 r0.z, r1.y, r1.z, r0.z -sel.b32 r0.w, r1.w, r1.z, r0.w -sel.b32 r1.x, r2.w, r1.z, r1.x -mul.f r1.y, r3.x, r3.y -cmps.f.ne r1.z, r2.z, c0.x -mov.f32f32 r2.w, r0.z -mov.f32f32 r3.x, r0.w -mov.f32f32 r3.y, r1.x -sel.b32 r0.y, r1.y, r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r3.z, r0.w -mov.f32f32 r3.w, r1.x -mov.f32f32 r1.w, r0.y -mul.f r0.y, r2.w, r1.y -mul.f r1.y, r3.x, r3.z -mul.f r2.w, r3.y, r3.w -mov.f32f32 r3.x, (0.000000) -sel.b32 r0.y, r0.y, r1.z, r0.z -sel.b32 r0.z, r1.y, r1.z, r0.w -sel.b32 r0.w, r2.w, r1.z, r1.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z mov.f32f32 r1.x, r0.w -cmps.f.ne p0.x, r2.z, r3.x +mov.f32f32 r1.y, r2.z +mov.f32f32 r1.z, r0.x +add.f r0.z, r0.z, r0.y +cmps.f.ne r1.w, r0.x, c0.x +add.f r0.w, r0.w, r1.x +add.f r2.w, r2.z, r1.y +add.f r3.x, r0.x, r1.z +sel.b32 r0.y, r0.z, r1.w, r0.y +sel.b32 r0.z, r0.w, r1.w, r1.x +sel.b32 r0.w, r2.w, r1.w, r1.y +sel.b32 r1.x, r3.x, r1.w, r1.z +mul.f r1.y, r0.y, r0.y +cmps.f.ne r2.w, r2.z, c0.x +mul.f r1.z, r0.z, r0.z +mul.f r3.x, r0.w, r0.w +mul.f r3.y, r1.x, r1.x +sel.b32 r1.w, r1.y, r2.w, r0.y +sel.b32 r1.z, r1.z, r2.w, r0.z +sel.b32 r1.y, r3.x, r2.w, r0.w +sel.b32 r1.x, r3.y, r2.w, r1.x mov.f32f32 r0.y, (0.000000) -(rpt4)nop +mov.f32f32 r0.z, (0.000000) +(rpt1)nop +cmps.f.ne p0.x, r2.z, r0.y +(rpt5)nop kill p0.x -cmps.f.ne p0.x, r0.x, r0.y +cmps.f.ne p0.x, r0.x, r0.z (rpt5)nop kill p0.x end ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r63.w (5:0,cm=f,il=8,b=1) -; FRAG: 65 instructions, 0 half, 4 full +; FRAG: 47 instructions, 0 half, 4 full diff --git a/reference/piglit-arb_framebuffer_srgb-blit-frag1.asm b/reference/piglit-arb_framebuffer_srgb-blit-frag1.asm index 8c430d0..92a8207 100644 --- a/reference/piglit-arb_framebuffer_srgb-blit-frag1.asm +++ b/reference/piglit-arb_framebuffer_srgb-blit-frag1.asm @@ -5,6 +5,7 @@ @out(r2.y) out1 @out(r2.z) out2 @out(r2.w) out3 +@const(c1.x) 0x3f000000, 0x00000000, 0xbf000000, 0x3b7f9724 (sy)(ss)add.s r0.x, r0.x, -8 (rpt2)nop shr.b r0.x, r0.x, 4 diff --git a/reference/piglit-arb_framebuffer_srgb-blit-frag2.asm b/reference/piglit-arb_framebuffer_srgb-blit-frag2.asm index 46a2696..ccc345b 100644 --- a/reference/piglit-arb_framebuffer_srgb-blit-frag2.asm +++ b/reference/piglit-arb_framebuffer_srgb-blit-frag2.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.w, 1, r0.x (rpt5)nop diff --git a/reference/piglit-fs-uniform-array-mat2-index-rd.asm b/reference/piglit-fs-uniform-array-mat2-index-rd.asm index 6deef36..ec0bb12 100644 --- a/reference/piglit-fs-uniform-array-mat2-index-rd.asm +++ b/reference/piglit-fs-uniform-array-mat2-index-rd.asm @@ -4,6 +4,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x00000002, 0x00000000, 0x00000000, 0x00000000 +@const(c9.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.x mov.f32f32 r0.y, c8.x mov.f32f32 r0.z, c9.y @@ -12,11 +14,11 @@ mov.f32f32 r1.x, c9.x mull.u r1.y, r0.x, r0.y mov.f32f32 r1.z, c9.x madsh.m16 r1.y, r0.x, r0.y, r1.y -mov.f32f32 r1.w, c9.x +mov.f32f32 r2.x, c9.x madsh.m16 r0.x, r0.y, r0.x, r1.y mov.f32f32 r0.y, c9.y -mov.f32f32 r1.y, c9.y -mov.f32f32 r2.x, c9.x +mov.f32f32 r2.y, c9.y +mov.f32f32 r2.z, c9.x cov.u32s16 hr0.x, r0.x (rpt2)nop shl.b hr0.x, hr0.x, 2 @@ -24,31 +26,27 @@ shl.b hr0.x, hr0.x, 2 mova a0.x, hr0.x (rpt5)nop mov.f32f32 r0.x, c -(ul)mov.f32f32 r2.y, c +(ul)mov.f32f32 r1.y, c (rpt1)nop cmps.f.eq r0.x, r0.x, c1.x -cmps.f.eq r2.y, r2.y, c1.y +cmps.f.eq r1.y, r1.y, c1.y (rpt1)nop absneg.s r0.x, (neg)r0.x -absneg.s r2.y, (neg)r2.y +absneg.s r1.y, (neg)r1.y (rpt2)nop -and.b r0.x, r0.x, r2.y +and.b r0.x, r0.x, r1.y (rpt2)nop cmps.u.ne r0.x, r0.x, c8.y (rpt2)nop -sel.b32 r0.z, r0.w, r0.x, r0.z -sel.b32 r0.w, r1.z, r0.x, r1.x -sel.b32 r0.y, r0.y, r0.x, r1.w -sel.b32 r0.x, r2.x, r0.x, r1.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +sel.b32 r1.w, r0.w, r0.x, r0.z +sel.b32 r1.z, r1.z, r0.x, r1.x +sel.b32 r1.y, r0.y, r0.x, r2.x +sel.b32 r1.x, r2.z, r0.x, r2.y end nop nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: -; FRAG: 58 instructions, 1 half, 3 full +; FRAG: 54 instructions, 1 half, 3 full diff --git a/reference/piglit-glsl-fs-varying-array.asm b/reference/piglit-glsl-fs-varying-array.asm index a9862ea..8f2f395 100644 --- a/reference/piglit-glsl-fs-varying-array.asm +++ b/reference/piglit-glsl-fs-varying-array.asm @@ -1,7 +1,4 @@ ; options: -ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying without copy propagation! -ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying fallback! - ; FRAG: old compiler @in(r1.x) in0 @in(r1.y) in0 diff --git a/reference/piglit-tex-miplevel-selection-1d-shadow.asm b/reference/piglit-tex-miplevel-selection-1d-shadow.asm index 73d2804..536cb14 100644 --- a/reference/piglit-tex-miplevel-selection-1d-shadow.asm +++ b/reference/piglit-tex-miplevel-selection-1d-shadow.asm @@ -6,6 +6,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c4.x) 0x80000000, 0xbd4ccccd, 0x00000000, 0x3d4ccccd (sy)(ss)bary.f r0.z, 0, r0.x bary.f (ei)r0.x, 2, r0.x mov.f32f32 r2.z, (0.000000) diff --git a/reference/piglit-vs-temp-mat3-row-rd.asm b/reference/piglit-vs-temp-mat3-row-rd.asm index f837f5f..89d1bbe 100644 --- a/reference/piglit-vs-temp-mat3-row-rd.asm +++ b/reference/piglit-vs-temp-mat3-row-rd.asm @@ -12,6 +12,9 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c6.x) 0x00000000, 0x00000001, 0x00000002, 0x00000000 +@const(c7.x) 0x40800000, 0x40a00000, 0x40c00000, 0x3f800000 +@const(c8.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c0.x mov.f32f32 r0.y, c0.x mul.f r0.z, c2.w, r1.x @@ -23,56 +26,44 @@ mad.f32 r0.w, c3.z, r1.y, r0.w absneg.s r0.x, (neg)r0.x absneg.s r0.y, (neg)r0.y mad.f32 r0.z, c4.w, r1.z, r0.z -mad.f32 r0.w, c4.z, r1.z, r0.w +mad.f32 r2.x, c4.z, r1.z, r0.w cov.s32f32 r0.x, r0.x cov.s32f32 r0.y, r0.y -mad.f32 r0.z, c5.w, r1.w, r0.z -mad.f32 r2.x, c5.z, r1.w, r0.w -cmps.f.lt r0.x, r0.x, c6.x -mov.f32f32 r0.y, r0.y (rpt1)nop -sel.b32 r0.x, c7.y, r0.x, c7.x +cmps.f.lt r0.x, r0.x, c6.x cmps.f.lt r0.y, r0.y, c6.x -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r2.x -mov.f32f32 r0.x, r0.x +mad.f32 r0.w, c5.w, r1.w, r0.z +mad.f32 r0.z, c5.z, r1.w, r2.x +sel.b32 r0.x, c7.y, r0.x, c7.x mul.f r2.x, c2.y, r1.x mul.f r1.x, c2.x, r1.x mad.f32 r2.x, c3.y, r1.y, r2.x sel.b32 r0.x, c7.z, r0.y, r0.x mad.f32 r0.y, c4.y, r1.z, r2.x mad.f32 r1.x, c3.x, r1.y, r1.x -mad.f32 r0.y, c5.y, r1.w, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r1.x, c4.x, r1.z, r1.x mov.f32f32 r1.y, c8.y -mov.f32f32 r1.z, c8.y cmps.f.eq r0.x, r0.x, c1.x -mov.f32f32 r0.y, r0.y -mad.f32 r2.x, c5.x, r1.w, r1.x +mad.f32 r0.y, c5.y, r1.w, r0.y +mad.f32 r1.x, c4.x, r1.z, r1.x nop -absneg.s r0.x, (neg)r0.x -mov.f32f32 r1.x, c8.x -mov.f32f32 r1.w, c8.x -mov.f32f32 r2.y, c8.x -cmps.u.ne r0.x, r0.x, c6.x +absneg.s r1.z, (neg)r0.x +mad.f32 r0.x, c5.x, r1.w, r1.x +mov.f32f32 r1.x, c8.y +mov.f32f32 r2.x, c8.x +cmps.u.ne r2.y, r1.z, c6.x +mov.f32f32 r1.z, c8.x mov.f32f32 r2.z, c8.y mov.f32f32 r2.w, c8.x -mov.f32f32 r3.x, c8.y -sel.b32 r1.y, r1.z, r0.x, r1.y -sel.b32 r1.x, r1.w, r0.x, r1.x -sel.b32 r2.y, r2.z, r0.x, r2.y -sel.b32 r0.x, r2.w, r0.x, r3.x -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r2.y -mov.f32f32 r1.x, r0.x -mov.f32f32 r0.x, r2.x +sel.b32 r1.w, r1.x, r2.y, r1.y +sel.b32 r1.z, r1.z, r2.y, r2.x +mov.f32f32 r1.x, c8.x +mov.f32f32 r2.x, c8.y +(rpt1)nop +sel.b32 r1.y, r2.z, r2.y, r1.x +sel.b32 r1.x, r2.w, r2.y, r2.x end nop -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) ; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) -; VERT: 58 instructions, 0 half, 4 full +; VERT: 49 instructions, 0 half, 3 full diff --git a/reference/piglit-vs-varying-array-mat2-index-rd.asm b/reference/piglit-vs-varying-array-mat2-index-rd.asm index 46ff3a2..e649199 100644 --- a/reference/piglit-vs-varying-array-mat2-index-rd.asm +++ b/reference/piglit-vs-varying-array-mat2-index-rd.asm @@ -1,7 +1,4 @@ ; options: -ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying without copy propagation! -ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying fallback! - ; VERT: old compiler @in(r0.x) in0 @in(r0.y) in0 diff --git a/reference/piglit-vs-varying-array-mat4-index-rd.asm b/reference/piglit-vs-varying-array-mat4-index-rd.asm index 0242f7f..0d301e0 100644 --- a/reference/piglit-vs-varying-array-mat4-index-rd.asm +++ b/reference/piglit-vs-varying-array-mat4-index-rd.asm @@ -1,7 +1,4 @@ ; options: -ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying without copy propagation! -ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying fallback! - ; VERT: old compiler @in(r0.x) in0 @in(r0.y) in0 diff --git a/reference/problem/0ad-frag.asm b/reference/problem/0ad-frag.asm index 59da729..588e4b2 100644 --- a/reference/problem/0ad-frag.asm +++ b/reference/problem/0ad-frag.asm @@ -6,47 +6,37 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 8, r0.x -bary.f r0.w, 10, r0.x -bary.f r1.x, 9, r0.x -bary.f (ei)r0.x, 11, r0.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r3.y, r0.y -mov.f32f32 r4.x, r0.z -mov.f32f32 r2.x, r1.x +@const(c5.x) 0x3a03126f, 0x3f000000, 0x3f800000, 0x40000000 +@const(c6.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x00000000 +(sy)(ss)bary.f r0.z, 10, r0.x +bary.f r0.w, 11, r0.x +bary.f r2.y, 9, r0.x +bary.f (ei)r1.y, 8, r0.x +mov.f32f32 r0.x, r0.z mov.f32f32 r0.y, r0.w -mov.f32f32 r2.w, r1.x -mov.f32f32 r0.x, r0.x +mov.f32f32 r1.x, r2.y +mov.f32f32 r2.w, r1.y +mov.f32f32 r1.w, r0.x +mov.f32f32 r1.z, r0.y +mov.f32f32 r3.x, r1.x +mov.f32f32 r2.x, r0.z +mov.f32f32 r3.y, r1.w +mov.f32f32 r2.z, r1.w +mov.f32f32 r1.x, r1.w nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r3.x, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.w, r0.y -mov.f32f32 r4.z, r0.y -(rpt1)nop -sam.s (f32)(w)r0.y, r1.w, s#2, t#2 -(sy)(ss)mov.f32f32 r1.w, r1.x -sam.s (f32)(z)r0.y, r2.z, s#2, t#2 -(sy)mov.f32f32 r1.z, r0.w -mov.f32f32 r3.z, r0.x -mov.f32f32 r4.y, r0.x -(rpt4)nop -sam.s (f32)(y)r0.x, r3.y, s#2, t#2 -(sy)mov.f32f32 r1.y, r0.y -sam.s (f32)(x)r0.x, r4.x, s#2, t#2 -(sy)mov.f32f32 r1.x, r0.x +sam.s (f32)(y)r1.x, r1.y, s#2, t#2 +(rpt2)nop +(ss)nop +sam.s (f32)(w)r1.x, r2.w, s#2, t#2 +nop +sam.s (f32)(z)r1.x, r2.x, s#2, t#2 +nop +sam.s (f32)(x)r1.x, r0.z, s#2, t#2 end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r63.y (5:2,cm=f,il=16,b=1) r3.x (5:3,cm=f,il=20,b=1) -; FRAG: 45 instructions, 0 half, 5 full +; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r0.x (5:2,cm=f,il=16,b=1) r3.x (5:3,cm=f,il=20,b=1) +; FRAG: 27 instructions, 0 half, 4 full diff --git a/reference/problem/frag-conflict-1.asm b/reference/problem/frag-conflict-1.asm index cea918a..5f6c550 100644 --- a/reference/problem/frag-conflict-1.asm +++ b/reference/problem/frag-conflict-1.asm @@ -1,24 +1,27 @@ ; options: -; FRAG: new compiler +; FRAG: TGSI compiler @in(r0.x) in0 @in(r0.y) in1 @out(r1.x) out0 @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 +sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 (sy)mov.f32f32 r1.w, r0.x mov.f32f32 r1.z, r0.y mov.f32f32 r1.y, r0.z mov.f32f32 r1.x, r0.w end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) -; FRAG: 18 instructions, 0 half, 2 full +; FRAG: 14 instructions, 0 half, 2 full +; pos (bary): r0.x +; color: r1.x diff --git a/reference/problem/frag-conflict-2.asm b/reference/problem/frag-conflict-2.asm index f69882b..b9e1c2d 100644 --- a/reference/problem/frag-conflict-2.asm +++ b/reference/problem/frag-conflict-2.asm @@ -6,31 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 1, r0.x -bary.f r0.w, 0, r0.x -bary.f (ei)r0.x, 2, r0.x +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +(sy)(ss)bary.f r0.w, 1, r0.x +bary.f r1.x, 2, r0.x +bary.f (ei)r0.z, 0, r0.x nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r1.x, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r0.y -mov.f32f32 r2.x, r1.x -mov.f32f32 r0.y, r0.z mov.f32f32 r1.z, r0.w -mov.f32f32 r1.w, r0.x -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r0.y -(rpt3)nop -sam.s (f32)(zw)r1.x, r1.y, s#0, t#0 +mov.f32f32 r2.x, r1.x +mov.f32f32 r1.w, r0.z (rpt1)nop -(ss)nop -sam.s (f32)(xy)r1.x, r2.x, s#0, t#0 +sam.s (f32)(xy)r1.x, r0.z, s#0, t#0 +(rpt3)nop +sam.s (f32)(zw)r1.x, r1.z, s#0, t#0 end -nop -nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) -; FRAG: 25 instructions, 0 half, 3 full +; FRAG: 16 instructions, 0 half, 3 full diff --git a/reference/relative-lowered.asm b/reference/relative-lowered.asm index 32ba282..bc41c3e 100644 --- a/reference/relative-lowered.asm +++ b/reference/relative-lowered.asm @@ -4,6 +4,7 @@ @out(r0.y) out1 @out(r0.z) out2 @out(r0.w) out3 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)cov.f32s16 hr0.x, c4.x mov.f32f32 r0.w, c5.y mov.f32f32 r0.x, c5.y @@ -13,34 +14,22 @@ shl.b hr0.x, hr0.x, 2 mova a0.x, hr0.x (rpt5)nop mov.f32f32 r0.y, c -mov.f32f32 r0.z, c -absneg.f r1.x, (neg)c +absneg.f r0.z, (neg)c +mov.f32f32 r1.x, c (ul)min.f r1.y, c, c5.z -max.f r0.y, r0.y, c5.x -max.f r0.z, r0.z, c5.x -cmps.f.lt r1.x, r1.x, c5.x +max.f r1.z, r0.y, c5.x +cmps.f.lt r0.z, r0.z, c5.x +max.f r0.y, r1.x, c5.x (rpt3)nop -log2 r0.y, r0.y -(ss)mov.f32f32 r1.z, r0.y -(ss)mov.f32f32 r0.y, r0.z -(rpt1)nop -mul.f r0.z, r1.y, r1.z -(rpt2)nop -mov.f32f32 r0.z, r0.z +log2 r1.x, r1.z +(ss)mul.f r1.x, r1.y, r1.x (rpt5)nop -exp2 r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -(rpt2)nop -sel.b32 r0.z, r0.z, r1.x, c5.x -(rpt2)nop -mov.f32f32 r0.z, r0.z -(rpt2)nop -mov.f32f32 r0.z, r0.z +exp2 r1.x, r1.x +(ss)sel.b32 r0.z, r1.x, r0.z, c5.x end nop nop -nop ; VERT: outputs: r0.x (0:0) ; VERT: inputs: -; VERT: 57 instructions, 1 half, 2 full +; VERT: 37 instructions, 1 half, 2 full diff --git a/reference/relative-med.asm b/reference/relative-med.asm index 19eed4d..26d7832 100644 --- a/reference/relative-med.asm +++ b/reference/relative-med.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r0.x) in0 +@in(r0.y) in1 +@in(r0.z) in2 +@in(r0.w) in3 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -12,6 +12,7 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c5.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)cov.f32s16 hr0.x, c4.w cov.f32s16 hr0.y, c4.z cov.f32s16 hr0.z, c4.y @@ -21,11 +22,7 @@ shl.b hr0.y, hr0.y, 2 shl.b hr0.z, hr0.z, 2 shl.b hr0.w, hr0.w, 2 mova a0.x, hr0.x -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.z, r1.z -mov.f32f32 r0.y, r1.y -mov.f32f32 r0.x, r1.x -(rpt1)nop +(rpt5)nop (ul)mov.f32f32 r1.x, c (rpt2)nop max.f r1.x, r1.x, c5.x @@ -57,6 +54,6 @@ nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) +; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) ; VERT: 73 instructions, 1 half, 2 full diff --git a/reference/relative-piglit-bad.asm b/reference/relative-piglit-bad.asm index 07e242f..becdd49 100644 --- a/reference/relative-piglit-bad.asm +++ b/reference/relative-piglit-bad.asm @@ -12,6 +12,8 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c20.x) 0x40800000, 0x00000000, 0x3f800000, 0x40000000 +@const(c21.x) 0x00000000, 0x3f800000, 0x40000000, 0x40400000 (sy)(ss)mov.f32f32 r0.x, c0.x mov.f32f32 r0.y, c2.x mov.f32f32 r0.z, c2.x @@ -24,67 +26,55 @@ add.f r0.x, c1.x, r0.x cov.u32f32 r0.y, r0.y cov.u32f32 r0.z, r0.z cov.u32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -cmps.f.lt r0.y, (neg)r0.y, c20.y -cmps.f.lt r2.x, (neg)r0.z, c20.y -cmps.f.lt r2.y, (neg)r0.w, c20.y cov.f32s16 hr0.x, r0.x -mul.f r0.x, c16.w, r1.x -mul.f r0.z, c16.z, r1.x -mul.f r0.w, c16.y, r1.x +cmps.f.lt r0.x, (neg)r0.y, c20.y +cmps.f.lt r0.y, (neg)r0.z, c20.y +cmps.f.lt r2.x, (neg)r0.w, c20.y shl.b hr0.x, hr0.x, 2 -mad.f32 r0.x, c17.w, r1.y, r0.x -mad.f32 r0.z, c17.z, r1.y, r0.z -mad.f32 r0.w, c17.y, r1.y, r0.w +mul.f r0.z, c16.w, r1.x +mul.f r0.w, c16.z, r1.x +mul.f r2.y, c16.y, r1.x mova a0.x, hr0.x -mad.f32 r0.x, c18.w, r1.z, r0.x -mad.f32 r0.z, c18.z, r1.z, r0.z -mad.f32 r2.z, c18.y, r1.z, r0.w +mad.f32 r0.z, c17.w, r1.y, r0.z +mad.f32 r0.w, c17.z, r1.y, r0.w +mad.f32 r2.y, c17.y, r1.y, r2.y mul.f r1.x, c16.x, r1.x -mad.f32 r0.x, c19.w, r1.w, r0.x -mad.f32 r0.z, c19.z, r1.w, r0.z +mad.f32 r0.z, c18.w, r1.z, r0.z +mad.f32 r2.z, c18.z, r1.z, r0.w mov.f32f32 r0.w, c mov.f32f32 r2.w, c nop mov.f32f32 r3.x, c (ul)mov.f32f32 r3.y, c -sel.b32 r0.y, r0.w, r0.y, r2.w -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.z -mad.f32 r0.x, c19.y, r1.w, r2.z -sel.b32 r0.y, r3.x, r2.x, r0.y +sel.b32 r0.x, r0.w, r0.x, r2.w +mad.f32 r0.w, c19.w, r1.w, r0.z +mad.f32 r0.z, c19.z, r1.w, r2.z +mad.f32 r2.y, c18.y, r1.z, r2.y +sel.b32 r0.x, r3.x, r0.y, r0.x +mad.f32 r0.y, c19.y, r1.w, r2.y mad.f32 r1.x, c17.x, r1.y, r1.x mov.f32f32 r1.y, c20.z -mov.f32f32 r2.x, c20.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.y, r0.x -mad.f32 r0.x, c18.x, r1.z, r1.x -mov.f32f32 r1.x, c20.y -sel.b32 r1.z, r3.y, r2.y, r2.z -mad.f32 r0.x, c19.x, r1.w, r0.x +sel.b32 r0.x, r3.y, r2.x, r0.x +mad.f32 r1.x, c18.x, r1.z, r1.x (rpt1)nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, c20.y +cmps.f.eq r1.z, r0.x, c3.x +mad.f32 r0.x, c19.x, r1.w, r1.x +mov.f32f32 r1.x, c20.z +mov.f32f32 r2.x, c20.y +cov.u32f32 r1.z, r1.z mov.f32f32 r2.y, c20.y -cmps.f.eq r1.z, r1.z, c3.x -mov.f32f32 r2.z, c20.z +mov.f32f32 r2.z, c20.y mov.f32f32 r2.w, c20.z -mov.f32f32 r3.x, c20.y -cov.u32f32 r1.z, r1.z -(rpt2)nop -cmps.f.ne r1.z, r1.z, c20.y -(rpt2)nop -sel.b32 r1.y, r2.x, r1.z, r1.y -sel.b32 r1.x, r1.w, r1.z, r1.x -sel.b32 r2.x, r2.z, r1.z, r2.y -sel.b32 r2.y, r3.x, r1.z, r2.w -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r2.x -mov.f32f32 r1.x, r2.y +cmps.f.ne r3.x, r1.z, c20.y +mov.f32f32 r3.y, c20.y +mov.f32f32 r3.z, c20.z +nop +sel.b32 r1.w, r1.x, r3.x, r1.y +sel.b32 r1.z, r2.y, r3.x, r2.x +sel.b32 r1.y, r2.w, r3.x, r2.z +sel.b32 r1.x, r3.y, r3.x, r3.z end ; VERT: outputs: r0.x (0:0) r1.x (5:20) ; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) -; VERT: 77 instructions, 1 half, 4 full +; VERT: 61 instructions, 1 half, 4 full diff --git a/reference/relative-temp/fs-temp-mat3-col-row-wr.asm b/reference/relative-temp/fs-temp-mat3-col-row-wr.asm index 9486eec..c6d75b9 100644 --- a/reference/relative-temp/fs-temp-mat3-col-row-wr.asm +++ b/reference/relative-temp/fs-temp-mat3-col-row-wr.asm @@ -1,7 +1,4 @@ ; options: -ir3/ir3_cmdline.c:213:reset_variant: error: new compiler failed, trying without copy propagation! -ir3/ir3_cmdline.c:213:reset_variant: error: new compiler failed, trying fallback! - ; FRAG: old compiler @out(r1.x) out0 @out(r1.y) out1 diff --git a/reference/sad-frag.asm b/reference/sad-frag.asm index 3b6fffc..3116ce3 100644 --- a/reference/sad-frag.asm +++ b/reference/sad-frag.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r1.w, c0.x mov.f32f32 r1.z, c0.x mov.f32f32 r1.y, c0.y diff --git a/reference/simple-frag.asm b/reference/simple-frag.asm index 2863317..abd34aa 100644 --- a/reference/simple-frag.asm +++ b/reference/simple-frag.asm @@ -6,35 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 -(sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +(sy)(ss)bary.f r1.x, 7, r0.x +bary.f r0.z, 4, r0.x +bary.f r0.w, 5, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 2, r0.x -mov.f32f32 r2.x, r0.z -mul.f r0.z, r0.w, r1.y -mov.f32f32 r2.y, r1.x -mov.f32f32 r2.z, r0.w -bary.f r0.w, 1, r0.x +mov.f32f32 r1.z, r1.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x bary.f (ei)r0.x, 0, r0.x -nop -mov.f32f32 r1.w, r0.z -(rpt1)nop -sam.p (f32)(xyz)r2.x, r2.x, s#0, t#0 -(sy)mul.f r0.y, r2.z, r1.z -mul.f r0.z, r2.y, r0.w -mul.f r0.x, r2.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mul.f r1.w, r1.z, r1.y +sam.p (f32)(xyz)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.z, r0.w, r2.x +mul.f r1.y, r0.z, r2.y +(ss)mul.f r1.x, r0.y, r0.x end nop nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 27 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 14 instructions, 0 half, 3 full diff --git a/reference/simple-if-else.asm b/reference/simple-if-else.asm index 7153aca..abdc8da 100644 --- a/reference/simple-if-else.asm +++ b/reference/simple-if-else.asm @@ -15,39 +15,28 @@ @out(r0.y) out1 @out(r0.z) out2 @out(r0.w) out3 +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r2.w, c4.z, r0.z mul.f r0.z, c4.z, r0.z mul.f r0.y, c4.y, r0.y mul.f r0.x, c4.x, r0.x -mov.f32f32 r3.x, r2.w -mov.f32f32 r3.y, r0.z -mov.f32f32 r3.z, r0.y -mov.f32f32 r3.w, r0.x -add.f r1.z, r3.x, r1.z -mov.f32f32 r2.w, r2.w -add.f r1.y, r3.y, r1.y -add.f r1.x, r3.z, r1.x -add.f r3.x, r3.w, r0.w -add.f r0.w, r2.w, (neg)r2.z -cmps.f.ne r2.z, r0.x, c10.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -sel.b32 r0.w, r1.z, r2.z, r0.w +add.f r1.z, r2.w, r1.z +add.f r1.y, r0.z, r1.y +add.f r1.x, r0.y, r1.x +cmps.f.ne r3.x, r0.x, c10.x +add.f r2.z, r2.w, (neg)r2.z add.f r0.z, r0.z, (neg)r2.y add.f r0.y, r0.y, (neg)r2.x +add.f r2.x, r0.x, r0.w +sel.b32 r0.w, r1.z, r3.x, r2.z +sel.b32 r0.z, r1.y, r3.x, r0.z +sel.b32 r0.y, r1.x, r3.x, r0.y add.f r0.x, r0.x, (neg)r1.w -mov.f32f32 r0.w, r0.w -sel.b32 r0.z, r1.y, r2.z, r0.z -sel.b32 r0.y, r1.x, r2.z, r0.y -sel.b32 r0.x, r3.x, r2.z, r0.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x +(rpt2)nop +sel.b32 r0.x, r2.x, r3.x, r0.x end nop ; VERT: outputs: r0.x (0:0) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=f,il=12,b=0) r1.w (0:0,cm=f,il=16,b=0) -; VERT: 31 instructions, 0 half, 4 full +; VERT: 21 instructions, 0 half, 4 full diff --git a/reference/simple-if.asm b/reference/simple-if.asm index d9f08ae..eb97ce7 100644 --- a/reference/simple-if.asm +++ b/reference/simple-if.asm @@ -11,31 +11,24 @@ @out(r0.y) out1 @out(r0.z) out2 @out(r0.w) out3 +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r1.w, c4.z, r0.z mul.f r0.z, c4.z, r0.z mul.f r0.y, c4.y, r0.y mul.f r0.x, c4.x, r0.x -mov.f32f32 r2.x, r1.w -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.w, r0.x -add.f r1.z, r2.x, r1.z +add.f r1.z, r1.w, r1.z +add.f r1.y, r0.z, r1.y +add.f r1.x, r0.y, r1.x cmps.f.ne r2.x, r0.x, c10.x -add.f r1.y, r2.y, r1.y -add.f r1.x, r2.z, r1.x -add.f r0.w, r2.w, r0.w -sel.b32 r1.z, r1.z, r2.x, r1.w +add.f r2.y, r0.x, r0.w +(rpt1)nop +sel.b32 r0.w, r1.z, r2.x, r1.w sel.b32 r0.z, r1.y, r2.x, r0.z sel.b32 r0.y, r1.x, r2.x, r0.y -sel.b32 r0.x, r0.w, r2.x, r0.x -mov.f32f32 r0.w, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x +sel.b32 r0.x, r2.y, r2.x, r0.x end nop -nop ; VERT: outputs: r0.x (0:0) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=f,il=12,b=0) -; VERT: 22 instructions, 0 half, 3 full +; VERT: 16 instructions, 0 half, 3 full diff --git a/reference/simple-vert.asm b/reference/simple-vert.asm index 261b6f2..a222610 100644 --- a/reference/simple-vert.asm +++ b/reference/simple-vert.asm @@ -4,6 +4,8 @@ @out(r0.y) out1 @out(r0.z) out2 @out(r0.w) out3 +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.x, c8.x mov.f32f32 r0.y, c8.y mov.f32f32 r0.z, c8.z @@ -11,20 +13,16 @@ nop mul.f r0.x, r0.x, c8.x nop mad.f32 r0.x, c8.y, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x nop mad.f32 r0.x, c8.z, r0.z, r0.x (rpt2)nop mov.f32f32 r0.w, r0.x mov.f32f32 r0.z, r0.x mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r0.x end nop nop -nop ; VERT: outputs: r0.x (0:0) ; VERT: inputs: -; VERT: 21 instructions, 0 half, 1 full +; VERT: 16 instructions, 0 half, 1 full diff --git a/reference/simple.asm b/reference/simple.asm index 5533fa5..b3fdbdb 100644 --- a/reference/simple.asm +++ b/reference/simple.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 5, r0.x bary.f r1.x, 3, r0.x bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -bary.f r1.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -(rpt5)nop -sam (f32)(xyzw)r1.w, r0.y, s#0, t#0 -(sy)(ss)mul.f r0.y, r2.z, r1.x -mul.f r0.z, r2.y, r1.y -mul.f r0.w, r2.x, r1.z -mul.f r0.x, r1.w, r0.x -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam (f32)(xyzw)r2.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r3.x, r1.x +mul.f r1.z, r2.w, r1.y +mul.f r1.y, r2.z, r2.x +mul.f r1.x, r2.y, r0.x end nop nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 26 instructions, 0 half, 3 full +; FRAG: 14 instructions, 0 half, 4 full diff --git a/reference/simpletest.asm b/reference/simpletest.asm index b755903..819ed60 100644 --- a/reference/simpletest.asm +++ b/reference/simpletest.asm @@ -6,19 +6,16 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.w +sam (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mov.f32f32 r1.w, r1.x +mov.f32f32 r1.z, r1.x +mov.f32f32 r1.y, r1.x end ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) -; FRAG: 18 instructions, 0 half, 2 full +; FRAG: 13 instructions, 0 half, 2 full diff --git a/reference/stk-mines/stk-mines-00.asm b/reference/stk-mines/stk-mines-00.asm index 4a651b1..16ecc3c 100644 --- a/reference/stk-mines/stk-mines-00.asm +++ b/reference/stk-mines/stk-mines-00.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r2.z, r1.y -mul.f r0.w, r2.y, r1.z -mul.f r0.z, r2.x, r0.z -mul.f r0.x, r1.w, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.x, r1.y +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 27 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/stk-mines/stk-mines-01.asm b/reference/stk-mines/stk-mines-01.asm index c16817f..530b9e3 100644 --- a/reference/stk-mines/stk-mines-01.asm +++ b/reference/stk-mines/stk-mines-01.asm @@ -1,20 +1,20 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.w) in11 -@in(r3.x) in12 -@in(r3.y) in13 -@in(r3.z) in14 -@in(r3.w) in15 +@in(r3.x) in0 +@in(r3.y) in1 +@in(r3.z) in2 +@in(r3.w) in3 +@in(r4.w) in11 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @in(r0.x) in16 @in(r0.y) in17 @in(r0.z) in18 -@in(r1.x) in20 -@in(r1.y) in21 -@in(r1.z) in22 +@in(r0.w) in20 +@in(r1.x) in21 +@in(r1.y) in22 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,47 +27,40 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c7.z, r0.z, r1.z -mad.f32 r0.y, c7.y, r0.y, r1.y -mad.f32 r0.x, c7.x, r0.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c8.x +@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mad.f32 r0.z, c7.z, r0.z, r1.y +mad.f32 r0.y, c7.y, r0.y, r1.x +mad.f32 r0.x, c7.x, r0.x, r0.w +mul.f r0.w, r3.x, c0.w max.f r0.z, r0.z, c8.x max.f r0.y, r0.y, c8.x max.f r0.x, r0.x, c8.x -min.f r1.w, r0.w, c8.y +mad.f32 r0.w, c1.w, r3.y, r0.w min.f r1.z, r0.z, c8.y min.f r1.y, r0.y, c8.y min.f r1.x, r0.x, c8.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +mad.f32 r0.x, c2.w, r3.z, r0.w +mul.f r0.y, r3.x, c0.z +mad.f32 r0.w, c3.w, r3.w, r0.x +mad.f32 r0.x, c1.z, r3.y, r0.y +mul.f r0.y, r3.x, c0.y +mad.f32 r0.x, c2.z, r3.z, r0.x +mad.f32 r0.y, c1.y, r3.y, r0.y +mad.f32 r0.z, c3.z, r3.w, r0.x +mad.f32 r0.x, c2.y, r3.z, r0.y +mul.f r1.w, r3.x, c0.x +mad.f32 r0.y, c3.y, r3.w, r0.x +mad.f32 r0.x, c1.x, r3.y, r1.w +max.f r1.w, r4.w, c8.x +mad.f32 r0.x, c2.x, r3.z, r0.x +nop +mad.f32 r0.x, c3.x, r3.w, r0.x +min.f r1.w, r1.w, c8.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r63.w (0:0,cm=0,il=12,b=0) r0.x (0:0,cm=8,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) r0.x (0:0,cm=7,il=24,b=0) r1.x (0:0,cm=7,il=28,b=0) r63.w (0:0,cm=0,il=32,b=0) -; VERT: 37 instructions, 0 half, 4 full +; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r63.w (0:0,cm=0,il=12,b=0) r4.x (0:0,cm=8,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) r0.x (0:0,cm=7,il=24,b=0) r0.w (0:0,cm=7,il=28,b=0) r63.w (0:0,cm=0,il=32,b=0) +; VERT: 29 instructions, 0 half, 5 full diff --git a/reference/stk-mines/stk-mines-02.asm b/reference/stk-mines/stk-mines-02.asm index cca09e5..0583b5d 100644 --- a/reference/stk-mines/stk-mines-02.asm +++ b/reference/stk-mines/stk-mines-02.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r3.x) in4 +@in(r3.y) in5 +@in(r3.z) in6 +@in(r3.w) in7 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,43 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r3.w, c4.x +max.f r1.y, r3.z, c4.x +max.f r3.y, r3.y, c4.x +max.f r3.x, r3.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r3.y, c4.y +min.f r1.x, r3.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 33 instructions, 0 half, 4 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 25 instructions, 0 half, 4 full diff --git a/reference/stk-mines/stk-mines-03.asm b/reference/stk-mines/stk-mines-03.asm index ff486a7..d610a0c 100644 --- a/reference/stk-mines/stk-mines-03.asm +++ b/reference/stk-mines/stk-mines-03.asm @@ -6,39 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x -bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 1, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt4)nop -sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0 -(sy)mul.f r0.w, r0.w, r1.y -mul.f r0.z, r0.z, r1.z -mul.f r0.x, r0.y, r0.x -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt2)nop +sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)mul.f r1.z, r1.z, r2.x +mul.f r1.y, r1.y, r2.y +(ss)mul.f r1.x, r1.x, r0.x end -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 34 instructions, 0 half, 3 full +; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 14 instructions, 0 half, 3 full diff --git a/reference/stk-mines/stk-mines-05.asm b/reference/stk-mines/stk-mines-05.asm index 2c03e4f..284e180 100644 --- a/reference/stk-mines/stk-mines-05.asm +++ b/reference/stk-mines/stk-mines-05.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/stk-mines/stk-mines-06.asm b/reference/stk-mines/stk-mines-06.asm index 2c03e4f..284e180 100644 --- a/reference/stk-mines/stk-mines-06.asm +++ b/reference/stk-mines/stk-mines-06.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/stk-mines/stk-mines-07.asm b/reference/stk-mines/stk-mines-07.asm index cca09e5..0583b5d 100644 --- a/reference/stk-mines/stk-mines-07.asm +++ b/reference/stk-mines/stk-mines-07.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r3.x) in4 +@in(r3.y) in5 +@in(r3.z) in6 +@in(r3.w) in7 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,43 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r3.w, c4.x +max.f r1.y, r3.z, c4.x +max.f r3.y, r3.y, c4.x +max.f r3.x, r3.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r3.y, c4.y +min.f r1.x, r3.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 33 instructions, 0 half, 4 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 25 instructions, 0 half, 4 full diff --git a/reference/stk-mines/stk-mines-08.asm b/reference/stk-mines/stk-mines-08.asm index 6763f4d..c381d21 100644 --- a/reference/stk-mines/stk-mines-08.asm +++ b/reference/stk-mines/stk-mines-08.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -19,131 +19,120 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mul.f r0.w, r2.x, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.y, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.z, r0.w -mov.f32f32 r1.y, r3.w -mad.f32 r0.w, c15.x, r2.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r1.x, c12.x +mul.f r3.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r1.y, r0.w +mad.f32 r3.x, c4.y, r0.y, r3.x +mad.f32 r0.w, c14.x, r1.z, r0.w +mad.f32 r3.x, c4.z, r0.z, r3.x +mad.f32 r3.y, c15.x, r1.w, r0.w +mul.f r0.w, r1.x, c12.y +mul.f r3.z, r1.x, c12.z +mul.f r3.w, r1.x, c0.w +mul.f r4.x, r3.y, r3.y +mad.f32 r0.w, c13.y, r1.y, r0.w +mul.f r4.y, r3.x, c10.x +mad.f32 r0.w, c14.y, r1.z, r0.w +mul.f r4.z, r0.x, c5.x +mad.f32 r4.w, c15.y, r1.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r4.z +mad.f32 r3.z, c13.z, r1.y, r3.z +mad.f32 r3.w, c1.w, r1.y, r3.w +mad.f32 r4.x, r4.w, r4.w, r4.x +mad.f32 r3.z, c14.z, r1.z, r3.z +mad.f32 r4.z, c5.z, r0.z, r0.w +mad.f32 r3.z, c15.z, r1.w, r3.z +mad.f32 r0.w, c2.w, r1.z, r3.w +mul.f r3.w, r1.x, c0.z +mul.f r5.x, r1.x, c0.y +mad.f32 r4.x, r3.z, r3.z, r4.x +mad.f32 r4.y, c10.y, r4.z, r4.y mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r3.w, r2.x, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r3.w, c13.y, r2.y, r3.w -mov.f32f32 r1.y, r1.y -mad.f32 r3.w, c14.y, r2.z, r3.w -mul.f r4.x, r1.x, c10.x -mad.f32 r3.w, c15.y, r2.w, r3.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c19.x +mad.f32 r0.w, c3.w, r1.w, r0.w +mad.f32 r3.w, c1.z, r1.y, r3.w +mad.f32 r5.x, c1.y, r1.y, r5.x +mul.f r1.x, r1.x, c0.x +rsq r4.x, (abs)r4.x +(ss)mov.f32f32 r5.y, r4.x +mul.f r3.z, r3.z, r4.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r3.w, r3.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r2.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r2.y, r1.z -mad.f32 r4.x, c10.y, r1.y, r4.x -mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r0.y, c2.z, r1.z, r3.w +mul.f r3.y, r3.y, r5.y +mul.f r3.w, r4.w, r5.y +(rpt1)nop +add.f r3.y, c10.x, (neg)r3.y +add.f r3.w, c10.y, (neg)r3.w +add.f r3.z, c10.z, (neg)r3.z mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r2.w, r1.z -mov.f32f32 r1.z, r4.x -mul.f r4.x, r2.x, c0.w -mul.f r4.y, r2.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r4.x, c1.w, r2.y, r4.x -mad.f32 r4.y, c1.z, r2.y, r4.y -mul.f r4.z, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -mul.f r4.w, c16.z, r3.z +(ss)mul.f r4.x, r3.y, r3.y +mad.f32 r0.z, c3.z, r1.w, r0.y +mad.f32 r0.y, r3.w, r3.w, r4.x +mad.f32 r4.x, c10.z, r0.x, r4.y +mad.f32 r0.y, r3.z, r3.z, r0.y +mad.f32 r4.y, c2.y, r1.z, r5.x +mad.f32 r1.x, c1.x, r1.y, r1.x +max.f r1.y, r2.w, c19.x +mul.f r2.w, c16.z, r2.z +mul.f r4.w, c16.y, r2.y +mul.f r5.x, c16.x, r2.x rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r5.x, r1.z, c19.x -cmps.f.lt r1.z, (neg)r1.z, c19.x -mad.f32 r4.x, c2.w, r2.z, r4.x -mul.f r0.w, r0.w, r0.y -mul.f r3.w, r3.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r5.x, r5.x -add.f r0.z, c10.x, (neg)r0.w -add.f r3.w, c10.y, (neg)r3.w -add.f r0.y, c10.z, (neg)r0.y -mad.f32 r0.w, c8.z, r3.z, c9.z -mul.f r5.y, r0.z, r0.z -mul.f r5.z, c16.y, r3.y -mad.f32 r5.y, r3.w, r3.w, r5.y -add.f r4.w, r4.w, r0.w -mad.f32 r0.w, c8.y, r3.y, c9.y -mul.f r5.w, c16.x, r3.x -mov.f32f32 r5.y, r5.y -mul.f r3.z, c17.z, r3.z -mad.f32 r5.y, r0.y, r0.y, r5.y -add.f r5.z, r5.z, r0.w -mad.f32 r6.x, c8.x, r3.x, c9.x -mad.f32 r0.w, c3.w, r2.w, r4.x -mad.f32 r4.x, c2.z, r2.z, r4.y -mad.f32 r4.y, c1.y, r2.y, r4.z -mad.f32 r2.x, c1.x, r2.y, r2.x -rsq r2.y, (abs)r5.y -(ss)mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r5.x, r3.z, r4.w -mul.f r3.y, c17.y, r3.y -add.f r4.z, r5.w, r6.x -mul.f r0.z, r0.z, r2.y -mul.f r3.w, r3.w, r2.y -mul.f r0.y, r0.y, r2.y -mad.f32 r2.y, r5.x, r3.y, r5.z -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.x, r3.x -mad.f32 r1.y, r1.y, r3.w, r0.z -mad.f32 r0.z, c3.z, r2.w, r4.x -mad.f32 r3.x, c2.y, r2.z, r4.y -mad.f32 r2.x, c2.x, r2.z, r2.x -mov.f32f32 r1.y, r1.y -mad.f32 r1.x, r5.x, r1.x, r4.z -mad.f32 r1.y, r0.x, r0.y, r1.y -mad.f32 r0.y, c3.y, r2.w, r3.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.x, c7.x -max.f r1.y, r1.y, c19.x -(rpt5)nop -log2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -min.f r2.x, r2.x, c19.z -(rpt2)nop -mul.f r1.y, r2.x, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +(ss)mov.f32f32 r5.y, r0.y +(ss)mul.f r0.y, r3.z, r0.y +max.f r3.z, r4.x, c19.x +mad.f32 r5.z, c8.x, r2.x, c9.x +mul.f r3.y, r3.y, r5.y +mul.f r3.w, r3.w, r5.y +mov.f32f32 r5.y, r3.z +mad.f32 r5.w, c8.y, r2.y, c9.y +mul.f r3.x, r3.x, r3.y +mad.f32 r3.y, c8.z, r2.z, c9.z +mad.f32 r3.x, r4.z, r3.w, r3.x +add.f r3.w, r4.w, r5.w +mad.f32 r0.x, r0.x, r0.y, r3.x +add.f r2.w, r2.w, r3.y +mul.f r0.y, c17.y, r2.y +add.f r2.y, r5.x, r5.z +max.f r0.x, r0.x, c19.x +mul.f r2.z, c17.z, r2.z +mad.f32 r3.x, r5.y, r0.y, r3.w +mul.f r2.x, c17.x, r2.x +cmps.f.lt r3.y, (neg)r4.x, c19.x +mad.f32 r0.y, c3.y, r1.w, r4.y +mad.f32 r1.x, c2.x, r1.z, r1.x +log2 r1.z, r0.x +mov.f32f32 r3.w, c7.x +mad.f32 r2.z, r5.y, r2.z, r2.w +mad.f32 r2.x, r3.z, r2.x, r2.y +(ss)mad.f32 r0.x, c3.x, r1.w, r1.x +min.f r1.x, r3.w, c19.z +min.f r1.w, r1.y, c19.y +(rpt1)nop +(ss)mul.f r1.x, r1.x, r1.z (rpt5)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r1.z, c19.x -(rpt2)nop -mov.f32f32 r1.y, r1.y +exp2 r1.x, r1.x +(ss)sel.b32 r1.x, r1.x, r3.y, c19.x (rpt2)nop +mov.f32f32 r1.y, r1.x +mad.f32 r1.x, c18.x, r1.x, r2.x +(rpt1)nop mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.z, c18.z, r1.y, r3.z -mad.f32 r2.x, c18.y, r1.y, r2.y -mad.f32 r1.x, c18.x, r1.y, r1.x -nop -max.f r1.y, r1.z, c19.x -max.f r2.x, r2.x, c19.x max.f r1.x, r1.x, c19.x -nop -min.f r1.z, r1.y, c19.y -min.f r1.y, r2.x, c19.y +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r2.z +mad.f32 r1.y, c18.y, r1.y, r3.x min.f r1.x, r1.x, c19.y +nop +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 145 instructions, 0 half, 7 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 121 instructions, 0 half, 6 full diff --git a/reference/stk-mines/stk-mines-09.asm b/reference/stk-mines/stk-mines-09.asm index 242def2..693d844 100644 --- a/reference/stk-mines/stk-mines-09.asm +++ b/reference/stk-mines/stk-mines-09.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @@ -11,10 +11,10 @@ @in(r3.y) in9 @in(r3.z) in10 @in(r3.w) in11 -@in(r4.x) in12 -@in(r4.y) in13 -@in(r4.z) in14 -@in(r4.w) in15 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,135 +27,120 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r0.w, r2.x, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.y, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.z, r0.w -mov.f32f32 r1.y, r3.w -mad.f32 r0.w, c15.x, r2.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r1.x, c12.x +mul.f r4.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r1.y, r0.w +mad.f32 r4.x, c4.y, r0.y, r4.x +mad.f32 r0.w, c14.x, r1.z, r0.w +mad.f32 r4.x, c4.z, r0.z, r4.x +mad.f32 r4.y, c15.x, r1.w, r0.w +mul.f r0.w, r1.x, c12.y +mul.f r4.z, r1.x, c12.z +mul.f r4.w, r1.x, c0.w +mul.f r5.x, r4.y, r4.y +mad.f32 r0.w, c13.y, r1.y, r0.w +mul.f r5.y, r4.x, c10.x +mad.f32 r0.w, c14.y, r1.z, r0.w +mul.f r5.z, r0.x, c5.x +mad.f32 r5.w, c15.y, r1.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r5.z +mad.f32 r4.z, c13.z, r1.y, r4.z +mad.f32 r4.w, c1.w, r1.y, r4.w +mad.f32 r5.x, r5.w, r5.w, r5.x +mad.f32 r4.z, c14.z, r1.z, r4.z +mad.f32 r5.z, c5.z, r0.z, r0.w +mad.f32 r4.z, c15.z, r1.w, r4.z +mad.f32 r0.w, c2.w, r1.z, r4.w +mul.f r4.w, r1.x, c0.z +mul.f r6.x, r1.x, c0.y +mad.f32 r5.x, r4.z, r4.z, r5.x +mad.f32 r5.y, c10.y, r5.z, r5.y mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r3.w, r2.x, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r3.w, c13.y, r2.y, r3.w -mov.f32f32 r1.y, r1.y -mad.f32 r3.w, c14.y, r2.z, r3.w -mul.f r5.x, r1.x, c10.x -mad.f32 r3.w, c15.y, r2.w, r3.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c19.x +mad.f32 r0.w, c3.w, r1.w, r0.w +mad.f32 r4.w, c1.z, r1.y, r4.w +mad.f32 r6.x, c1.y, r1.y, r6.x +mul.f r1.x, r1.x, c0.x +rsq r5.x, (abs)r5.x +(ss)mov.f32f32 r6.y, r5.x +mul.f r4.z, r4.z, r5.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r3.w, r3.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r2.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r2.y, r1.z -mad.f32 r5.x, c10.y, r1.y, r5.x -mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r0.y, c2.z, r1.z, r4.w +mul.f r4.y, r4.y, r6.y +mul.f r4.w, r5.w, r6.y +(rpt1)nop +add.f r4.y, c10.x, (neg)r4.y +add.f r4.w, c10.y, (neg)r4.w +add.f r4.z, c10.z, (neg)r4.z mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r2.w, r1.z -mov.f32f32 r1.z, r5.x -mul.f r5.x, r2.x, c0.w -mul.f r5.y, r2.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r5.x, c1.w, r2.y, r5.x -mad.f32 r5.y, c1.z, r2.y, r5.y -mul.f r5.z, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -mul.f r5.w, c16.z, r3.z +(ss)mul.f r5.x, r4.y, r4.y +mad.f32 r0.z, c3.z, r1.w, r0.y +mad.f32 r0.y, r4.w, r4.w, r5.x +mad.f32 r5.x, c10.z, r0.x, r5.y +mad.f32 r0.y, r4.z, r4.z, r0.y +mad.f32 r5.y, c2.y, r1.z, r6.x +mad.f32 r1.x, c1.x, r1.y, r1.x +max.f r1.y, r3.w, c19.x +mul.f r3.w, c16.z, r3.z +mul.f r5.w, c16.y, r3.y +mul.f r6.x, c16.x, r3.x rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r6.x, r1.z, c19.x -cmps.f.lt r1.z, (neg)r1.z, c19.x -mad.f32 r5.x, c2.w, r2.z, r5.x -mul.f r0.w, r0.w, r0.y -mul.f r3.w, r3.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r6.x, r6.x -add.f r0.z, c10.x, (neg)r0.w -add.f r3.w, c10.y, (neg)r3.w -add.f r0.y, c10.z, (neg)r0.y -mad.f32 r0.w, c8.z, r3.z, c9.z -mul.f r6.y, r0.z, r0.z -mul.f r6.z, c16.y, r3.y -mad.f32 r6.y, r3.w, r3.w, r6.y -add.f r5.w, r5.w, r0.w -mad.f32 r0.w, c8.y, r3.y, c9.y -mul.f r6.w, c16.x, r3.x -mov.f32f32 r6.y, r6.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r4.z, r0.y +max.f r4.z, r5.x, c19.x +mad.f32 r6.z, c8.x, r3.x, c9.x +mul.f r4.y, r4.y, r6.y +mul.f r4.w, r4.w, r6.y +mov.f32f32 r6.y, r4.z +mad.f32 r6.w, c8.y, r3.y, c9.y +mul.f r4.x, r4.x, r4.y +mad.f32 r4.y, c8.z, r3.z, c9.z +mad.f32 r4.x, r5.z, r4.w, r4.x +add.f r4.w, r5.w, r6.w +mad.f32 r0.x, r0.x, r0.y, r4.x +add.f r3.w, r3.w, r4.y +mul.f r0.y, c17.y, r3.y +add.f r3.y, r6.x, r6.z +max.f r0.x, r0.x, c19.x mul.f r3.z, c17.z, r3.z -mad.f32 r6.y, r0.y, r0.y, r6.y -add.f r6.z, r6.z, r0.w -mad.f32 r7.x, c8.x, r3.x, c9.x -mad.f32 r0.w, c3.w, r2.w, r5.x -mad.f32 r5.x, c2.z, r2.z, r5.y -mad.f32 r5.y, c1.y, r2.y, r5.z -mad.f32 r2.x, c1.x, r2.y, r2.x -rsq r2.y, (abs)r6.y -(ss)mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r6.x, r3.z, r5.w -mul.f r3.y, c17.y, r3.y -add.f r5.z, r6.w, r7.x -mul.f r0.z, r0.z, r2.y -mul.f r3.w, r3.w, r2.y -mul.f r0.y, r0.y, r2.y -mad.f32 r3.y, r6.x, r3.y, r6.z -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.x, r3.x -mad.f32 r1.y, r1.y, r3.w, r0.z -mad.f32 r0.z, c3.z, r2.w, r5.x -mad.f32 r2.y, c2.y, r2.z, r5.y -mad.f32 r2.x, c2.x, r2.z, r2.x -mov.f32f32 r1.y, r1.y -mad.f32 r1.x, r6.x, r1.x, r5.z -mad.f32 r1.y, r0.x, r0.y, r1.y -mad.f32 r0.y, c3.y, r2.w, r2.y -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r3.x, c7.x -max.f r1.y, r1.y, c19.x -mov.f32f32 r2.w, r4.w -mov.f32f32 r2.z, r4.z -mov.f32f32 r2.y, r4.y -mov.f32f32 r2.x, r4.x +mad.f32 r4.x, r6.y, r0.y, r4.w +mul.f r3.x, c17.x, r3.x +cmps.f.lt r4.y, (neg)r5.x, c19.x +mad.f32 r0.y, c3.y, r1.w, r5.y +mad.f32 r1.x, c2.x, r1.z, r1.x +log2 r1.z, r0.x +mov.f32f32 r4.w, c7.x +mad.f32 r3.z, r6.y, r3.z, r3.w +mad.f32 r3.x, r4.z, r3.x, r3.y +(ss)mad.f32 r0.x, c3.x, r1.w, r1.x +min.f r1.x, r4.w, c19.z +min.f r1.w, r1.y, c19.y (rpt1)nop -log2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -min.f r3.x, r3.x, c19.z -(rpt2)nop -mul.f r1.y, r3.x, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +(ss)mul.f r1.x, r1.x, r1.z (rpt5)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r1.z, c19.x -(rpt2)nop -mov.f32f32 r1.y, r1.y +exp2 r1.x, r1.x +(ss)sel.b32 r1.x, r1.x, r4.y, c19.x (rpt2)nop +mov.f32f32 r1.y, r1.x +mad.f32 r1.x, c18.x, r1.x, r3.x +(rpt1)nop mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.z, c18.z, r1.y, r3.z -mad.f32 r3.x, c18.y, r1.y, r3.y -mad.f32 r1.x, c18.x, r1.y, r1.x -nop -max.f r1.y, r1.z, c19.x -max.f r3.x, r3.x, c19.x max.f r1.x, r1.x, c19.x -nop -min.f r1.z, r1.y, c19.y -min.f r1.y, r3.x, c19.y +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r3.z +mad.f32 r1.y, c18.y, r1.y, r4.x min.f r1.x, r1.x, c19.y +nop +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0) -; VERT: 145 instructions, 0 half, 8 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) +; VERT: 121 instructions, 0 half, 7 full diff --git a/reference/stk-mines/stk-mines-10.asm b/reference/stk-mines/stk-mines-10.asm index 508a351..c42aaa4 100644 --- a/reference/stk-mines/stk-mines-10.asm +++ b/reference/stk-mines/stk-mines-10.asm @@ -6,51 +6,36 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c4.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 8, r0.x -bary.f r1.x, 11, r0.x -bary.f r1.y, 9, r0.x +bary.f r1.x, 9, r0.x +bary.f r1.y, 11, r0.x mad.f32 r0.z, c1.x, r0.z, c1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y +bary.f r1.z, 3, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x max.f r0.z, r0.z, c4.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r2.x, r1.x -mov.f32f32 r1.w, r1.y -min.f r0.z, r0.z, c4.x -bary.f r0.w, 3, r0.x -bary.f r1.x, 2, r0.x -bary.f r1.y, 1, r0.x -add.f r2.y, c4.x, (neg)r0.z bary.f (ei)r0.x, 0, r0.x nop -sam.p (f32)(xyzw)r2.z, r1.z, s#0, t#0 -(sy)mul.f r0.y, r3.y, r0.w -mul.f r0.w, c2.z, r2.y -(ss)mul.f r1.z, c2.y, r2.y -mul.f r1.w, c2.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r1.x, r3.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r1.w -mul.f r1.y, r2.w, r1.y -mad.f32 r0.w, r1.x, r0.z, r0.w +sam.p (f32)(xyzw)r2.z, r0.w, s#0, t#0 +(sy)mul.f r1.w, r3.y, r1.z +min.f r0.y, r0.z, c4.x +mul.f r0.z, r3.x, r2.x +(ss)mul.f r0.w, r2.w, r2.y mul.f r0.x, r2.z, r0.x -mov.f32f32 r1.w, r0.y -mad.f32 r0.y, r1.y, r0.z, r1.z -mov.f32f32 r1.z, r0.w -mad.f32 r0.x, r0.x, r0.z, r2.x +add.f r1.x, c4.x, (neg)r0.y +(rpt2)nop +mul.f r1.y, c2.z, r1.x +mul.f r2.x, c2.y, r1.x +mad.f32 r1.z, r0.z, r0.y, r1.y +mad.f32 r1.y, r0.w, r0.y, r2.x +mul.f r0.z, c2.x, r1.x nop -mov.f32f32 r1.y, r0.y -nop -mov.f32f32 r1.x, r0.x +mad.f32 r1.x, r0.x, r0.y, r0.z end nop -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.y (5:0,cm=f,il=16,b=1) -; FRAG: 41 instructions, 0 half, 4 full +; FRAG: inputs: r0.w (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.z (5:0,cm=f,il=16,b=1) +; FRAG: 29 instructions, 0 half, 4 full diff --git a/reference/stk-mines/stk-mines-11.asm b/reference/stk-mines/stk-mines-11.asm index e1b8ffe..27f5178 100644 --- a/reference/stk-mines/stk-mines-11.asm +++ b/reference/stk-mines/stk-mines-11.asm @@ -1,20 +1,20 @@ ; options: ; VERT: new compiler -@in(r2.y) in0 -@in(r2.z) in1 -@in(r2.w) in2 -@in(r3.x) in3 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r4.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r3.y) in8 -@in(r3.z) in9 -@in(r3.w) in10 -@in(r4.x) in11 -@in(r4.y) in12 -@in(r4.z) in13 -@in(r4.w) in14 -@in(r5.x) in15 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 +@in(r3.x) in12 +@in(r3.y) in13 +@in(r3.z) in14 +@in(r3.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,139 +31,121 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r0.w, r2.y, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.z, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.w, r0.w -mov.f32f32 r1.y, r4.x -mad.f32 r0.w, c15.x, r3.x, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r2.y, c12.z -mov.f32f32 r1.y, r1.y -mul.f r2.x, r0.w, r0.w -mul.f r1.w, r2.y, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r1.w, c13.y, r2.z, r1.w -max.f r1.y, r1.y, c19.x -mad.f32 r1.w, c14.y, r2.w, r1.w -mul.f r4.x, r1.x, c10.x -mad.f32 r5.y, c15.y, r3.x, r1.w -mul.f r5.z, r0.x, c5.x -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c13.z, r2.z, r1.z -mad.f32 r1.z, r5.y, r5.y, r2.x -mad.f32 r2.x, c5.y, r0.y, r5.z -mad.f32 r1.y, c14.z, r2.w, r1.y +@const(c19.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r4.x, c12.x +mul.f r2.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r4.y, r0.w +mad.f32 r2.x, c4.y, r0.y, r2.x +mad.f32 r0.w, c14.x, r4.z, r0.w +mad.f32 r2.y, c4.z, r0.z, r2.x +mad.f32 r0.w, c15.x, r4.w, r0.w +mul.f r2.x, r4.x, c12.z +mul.f r2.z, r4.x, c12.y +mad.f32 r2.x, c13.z, r4.y, r2.x +mul.f r2.w, r0.w, r0.w +mad.f32 r2.z, c13.y, r4.y, r2.z +mul.f r5.x, r2.y, c10.x +mad.f32 r2.z, c14.y, r4.z, r2.z +mul.f r5.y, r0.x, c5.x +mad.f32 r2.z, c15.y, r4.w, r2.z +mad.f32 r5.y, c5.y, r0.y, r5.y +mad.f32 r2.x, c14.z, r4.z, r2.x +mad.f32 r5.y, c5.z, r0.z, r5.y +mad.f32 r2.w, r2.z, r2.z, r2.w +mad.f32 r5.z, c15.z, r4.w, r2.x +mul.f r5.w, r4.x, c0.w +mul.f r6.x, r4.x, c0.z +mul.f r6.y, r4.x, c0.y +mad.f32 r2.w, r5.z, r5.z, r2.w +mad.f32 r5.x, c10.y, r5.y, r5.x mul.f r0.x, r0.x, c6.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.y, c15.z, r3.x, r1.y -mov.f32f32 r2.x, r2.x +absneg.f r2.x, (abs)r5.z +mad.f32 r5.w, c1.w, r4.y, r5.w +mad.f32 r6.x, c1.z, r4.y, r6.x +mad.f32 r6.y, c1.y, r4.y, r6.y +rsq r2.w, (abs)r2.w +(ss)mov.f32f32 r6.z, r2.w +(ss)mul.f r2.w, r5.z, r2.w mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r2.y, c0.w -mad.f32 r1.z, r1.y, r1.y, r1.z -mad.f32 r5.z, c5.z, r0.z, r2.x -absneg.f r2.x, (abs)r1.y -mov.f32f32 r0.x, r0.x -(rpt2)nop -rsq r1.z, (abs)r1.z -(ss)mov.f32f32 r1.z, r1.z -mad.f32 r4.x, c10.y, r5.z, r4.x +mad.f32 r0.y, c2.w, r4.z, r5.w +mul.f r0.w, r0.w, r6.z +mul.f r2.z, r2.z, r6.z +(rpt1)nop +add.f r5.z, c10.x, (neg)r0.w +add.f r2.z, c10.y, (neg)r2.z +add.f r2.w, c10.z, (neg)r2.w mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.y, c1.w, r2.z, r0.y -mul.f r0.z, r0.w, r1.z -mul.f r0.w, r5.y, r1.z -mul.f r1.y, r1.y, r1.z -mov.f32f32 r1.z, r4.x -add.f r0.z, c10.x, (neg)r0.z -add.f r4.x, c10.y, (neg)r0.w -add.f r1.y, c10.z, (neg)r1.y -mad.f32 r0.w, c10.z, r0.x, r1.z -mul.f r1.z, r0.z, r0.z -mad.f32 r0.y, c2.w, r2.w, r0.y -mad.f32 r1.z, r4.x, r4.x, r1.z -max.f r5.y, r0.w, c20.x -cmps.f.lt r5.w, (neg)r0.w, c19.x -mad.f32 r0.w, c3.w, r3.x, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r1.z, r5.y -mad.f32 r0.y, r1.y, r1.y, r0.y -mul.f r5.y, c16.z, r3.w -mul.f r6.x, c16.y, r3.z -mul.f r6.y, c16.x, r3.y -mul.f r6.z, r2.y, c0.z -mul.f r6.w, r2.y, c0.y -mul.f r2.y, r2.y, c0.x +mul.f r0.z, r5.z, r5.z +mad.f32 r0.w, c3.w, r4.w, r0.y +mad.f32 r0.y, r2.z, r2.z, r0.z +mad.f32 r0.z, c10.z, r0.x, r5.x +mad.f32 r0.y, r2.w, r2.w, r0.y +mad.f32 r5.x, c2.z, r4.z, r6.x +mad.f32 r5.w, c2.y, r4.z, r6.y +mul.f r4.x, r4.x, c0.x +max.f r1.w, r1.w, c19.x +mul.f r6.x, c16.z, r1.z +mul.f r6.y, c16.y, r1.y rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r7.x, c8.z, r3.w, c9.z -mad.f32 r7.y, c8.y, r3.z, c9.y -mad.f32 r7.z, c8.x, r3.y, c9.x -mul.f r0.z, r0.z, r0.y -mul.f r4.x, r4.x, r0.y -mul.f r0.y, r1.y, r0.y -add.f r1.y, r5.y, r7.x -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.z, r3.w -mad.f32 r0.z, r5.z, r4.x, r0.z -add.f r3.w, r6.x, r7.y -add.f r4.x, r6.y, r7.z -mad.f32 r5.y, c1.z, r2.z, r6.z -mov.f32f32 r0.z, r0.z -mad.f32 r1.x, r1.z, r1.x, r1.y -mad.f32 r0.x, r0.x, r0.y, r0.z -mul.f r0.y, c17.y, r3.z -mul.f r0.z, c17.x, r3.y -mad.f32 r1.y, c2.z, r2.w, r5.y +(ss)mov.f32f32 r6.z, r0.y +(ss)mul.f r0.y, r2.w, r0.y +max.f r6.w, r0.z, c20.x +mul.f r2.w, c16.x, r1.x +mul.f r5.z, r5.z, r6.z +mul.f r2.z, r2.z, r6.z +mov.f32f32 r6.z, r6.w +mad.f32 r7.x, c8.y, r1.y, c9.y +mul.f r2.y, r2.y, r5.z +mad.f32 r5.z, c8.z, r1.z, c9.z +mad.f32 r2.y, r5.y, r2.z, r2.y +add.f r2.z, r6.y, r7.x +mad.f32 r0.x, r0.x, r0.y, r2.y +add.f r2.y, r6.x, r5.z +mul.f r0.y, c17.y, r1.y +mad.f32 r1.y, c8.x, r1.x, c9.x max.f r0.x, r0.x, c20.x -mad.f32 r5.y, r1.z, r0.y, r3.w -mad.f32 r1.z, r1.z, r0.z, r4.x -mad.f32 r0.z, c3.z, r3.x, r1.y -mad.f32 r0.y, c1.y, r2.z, r6.w -mad.f32 r1.y, c1.x, r2.z, r2.y -mov.f32f32 r2.y, c7.x +mul.f r1.z, c17.z, r1.z +mad.f32 r5.y, r6.z, r0.y, r2.z +add.f r1.y, r2.w, r1.y +cmps.f.lt r5.z, (neg)r0.z, c19.x +mad.f32 r0.z, c3.z, r4.w, r5.x +mad.f32 r0.y, c3.y, r4.w, r5.w log2 r0.x, r0.x -(ss)mov.f32f32 r2.z, r0.x -(ss)mad.f32 r0.x, c2.y, r2.w, r0.y -mad.f32 r1.y, c2.x, r2.w, r1.y -min.f r2.y, r2.y, c20.z -mad.f32 r0.y, c3.y, r3.x, r0.x -mad.f32 r0.x, c3.x, r3.x, r1.y -mov.f32f32 r3.w, r5.x -mul.f r1.y, r2.y, r2.z -mov.f32f32 r3.z, r4.w -mov.f32f32 r3.y, r4.z -mov.f32f32 r3.x, r4.y -mov.f32f32 r1.y, r1.y +mov.f32f32 r2.z, c7.x +mad.f32 r1.z, r6.z, r1.z, r2.y +mul.f r1.x, c17.x, r1.x +mad.f32 r2.y, c1.x, r4.y, r4.x +min.f r2.z, r2.z, c20.z +mad.f32 r2.y, c2.x, r4.z, r2.y +min.f r1.w, r1.w, c19.y mov.f32f32 r2.w, c19.y +(ss)mul.f r4.x, r2.z, r0.x +mad.f32 r1.x, r6.w, r1.x, r1.y +(ss)mad.f32 r0.x, c3.x, r4.w, r2.y mov.f32f32 r2.z, c19.x mov.f32f32 r2.y, c19.x +(rpt1)nop +exp2 r1.y, r4.x +(ss)sel.b32 r1.y, r1.y, r5.z, c20.x (rpt2)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r5.w, c20.x -(rpt2)nop -mov.f32f32 r1.y, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.x, c18.z, r1.y, r1.x -mad.f32 r4.x, c18.y, r1.y, r5.y -mad.f32 r1.y, c18.x, r1.y, r1.z -nop +(ss)mov.f32f32 r4.x, r1.y +mad.f32 r1.x, c18.x, r1.y, r1.x +(rpt1)nop +mov.f32f32 r1.y, r4.x max.f r1.x, r1.x, c19.x -max.f r4.x, r4.x, c19.x -max.f r4.y, r1.y, c19.x +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r1.z +mad.f32 r1.y, c18.y, r1.y, r5.y +min.f r1.x, r1.x, c19.y nop -min.f r1.z, r1.x, c19.y -min.f r1.y, r4.x, c19.y -min.f r1.x, r4.y, c19.y +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end -nop -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0) -; VERT: inputs: r2.y (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.y (0:0,cm=f,il=16,b=0) r4.y (0:0,cm=f,il=20,b=0) -; VERT: 141 instructions, 0 half, 8 full +; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) +; VERT: 119 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-12.asm b/reference/stk-mines/stk-mines-12.asm index cf606e2..2205708 100644 --- a/reference/stk-mines/stk-mines-12.asm +++ b/reference/stk-mines/stk-mines-12.asm @@ -6,39 +6,32 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c3.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 3, r0.x -bary.f r1.x, 2, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.w, 3, r0.x +bary.f r0.w, 2, r0.x +bary.f r1.x, 1, r0.x mad.f32 r0.z, c0.x, r0.z, c0.y -mov.f32f32 r0.w, r0.w bary.f (ei)r0.x, 0, r0.x -nop -max.f r0.y, r0.z, c3.y -mov.f32f32 r1.w, r0.w (rpt1)nop +max.f r0.y, r0.z, c3.y +(rpt2)nop min.f r0.y, r0.y, c3.x (rpt2)nop add.f r0.z, c3.x, (neg)r0.y (rpt2)nop -mul.f r0.w, c1.z, r0.z -mul.f r1.z, c1.y, r0.z +mul.f r1.y, c1.z, r0.z +mul.f r2.x, c1.y, r0.z +mad.f32 r1.z, r0.w, r0.y, r1.y +mad.f32 r1.y, r1.x, r0.y, r2.x mul.f r0.z, c1.x, r0.z nop -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, r1.x, r0.y, r0.w -mad.f32 r1.x, r1.y, r0.y, r1.z -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r1.x -mad.f32 r0.x, r0.x, r0.y, r0.z -(rpt2)nop -mov.f32f32 r1.x, r0.x +mad.f32 r1.x, r0.x, r0.y, r0.z end nop +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) -; FRAG: 38 instructions, 0 half, 2 full +; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) +; FRAG: 28 instructions, 0 half, 3 full diff --git a/reference/stk-mines/stk-mines-13.asm b/reference/stk-mines/stk-mines-13.asm index f952885..bca028b 100644 --- a/reference/stk-mines/stk-mines-13.asm +++ b/reference/stk-mines/stk-mines-13.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r2.y) in0 -@in(r2.z) in1 -@in(r2.w) in2 -@in(r3.x) in3 +@in(r3.x) in0 +@in(r3.y) in1 +@in(r3.z) in2 +@in(r3.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r3.y) in8 -@in(r3.z) in9 -@in(r3.w) in10 -@in(r4.x) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -23,135 +23,121 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r0.w, r2.y, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.z, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.w, r0.w -mov.f32f32 r1.y, r4.x -mad.f32 r0.w, c15.x, r3.x, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r2.y, c12.z -mov.f32f32 r1.y, r1.y -mul.f r2.x, r0.w, r0.w -mul.f r1.w, r2.y, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r1.w, c13.y, r2.z, r1.w -max.f r1.y, r1.y, c19.x -mad.f32 r1.w, c14.y, r2.w, r1.w -mul.f r4.x, r1.x, c10.x -mad.f32 r4.y, c15.y, r3.x, r1.w -mul.f r4.z, r0.x, c5.x -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c13.z, r2.z, r1.z -mad.f32 r1.z, r4.y, r4.y, r2.x -mad.f32 r2.x, c5.y, r0.y, r4.z -mad.f32 r1.y, c14.z, r2.w, r1.y +@const(c19.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r3.x, c12.x +mul.f r2.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r3.y, r0.w +mad.f32 r2.x, c4.y, r0.y, r2.x +mad.f32 r0.w, c14.x, r3.z, r0.w +mad.f32 r2.y, c4.z, r0.z, r2.x +mad.f32 r0.w, c15.x, r3.w, r0.w +mul.f r2.x, r3.x, c12.z +mul.f r2.z, r3.x, c12.y +mad.f32 r2.x, c13.z, r3.y, r2.x +mul.f r2.w, r0.w, r0.w +mad.f32 r2.z, c13.y, r3.y, r2.z +mul.f r4.x, r2.y, c10.x +mad.f32 r2.z, c14.y, r3.z, r2.z +mul.f r4.y, r0.x, c5.x +mad.f32 r2.z, c15.y, r3.w, r2.z +mad.f32 r4.y, c5.y, r0.y, r4.y +mad.f32 r2.x, c14.z, r3.z, r2.x +mad.f32 r4.y, c5.z, r0.z, r4.y +mad.f32 r2.w, r2.z, r2.z, r2.w +mad.f32 r4.z, c15.z, r3.w, r2.x +mul.f r4.w, r3.x, c0.w +mul.f r5.x, r3.x, c0.z +mul.f r5.y, r3.x, c0.y +mad.f32 r2.w, r4.z, r4.z, r2.w +mad.f32 r4.x, c10.y, r4.y, r4.x mul.f r0.x, r0.x, c6.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.y, c15.z, r3.x, r1.y -mov.f32f32 r2.x, r2.x +absneg.f r2.x, (abs)r4.z +mad.f32 r4.w, c1.w, r3.y, r4.w +mad.f32 r5.x, c1.z, r3.y, r5.x +mad.f32 r5.y, c1.y, r3.y, r5.y +rsq r2.w, (abs)r2.w +(ss)mov.f32f32 r5.z, r2.w +(ss)mul.f r2.w, r4.z, r2.w mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r2.y, c0.w -mad.f32 r1.z, r1.y, r1.y, r1.z -mad.f32 r4.z, c5.z, r0.z, r2.x -absneg.f r2.x, (abs)r1.y -mov.f32f32 r0.x, r0.x -(rpt2)nop -rsq r1.z, (abs)r1.z -(ss)mov.f32f32 r1.z, r1.z -mad.f32 r4.x, c10.y, r4.z, r4.x +mad.f32 r0.y, c2.w, r3.z, r4.w +mul.f r0.w, r0.w, r5.z +mul.f r2.z, r2.z, r5.z +(rpt1)nop +add.f r4.z, c10.x, (neg)r0.w +add.f r2.z, c10.y, (neg)r2.z +add.f r2.w, c10.z, (neg)r2.w mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.y, c1.w, r2.z, r0.y -mul.f r0.z, r0.w, r1.z -mul.f r0.w, r4.y, r1.z -mul.f r1.y, r1.y, r1.z -mov.f32f32 r1.z, r4.x -add.f r0.z, c10.x, (neg)r0.z -add.f r4.x, c10.y, (neg)r0.w -add.f r1.y, c10.z, (neg)r1.y -mad.f32 r0.w, c10.z, r0.x, r1.z -mul.f r1.z, r0.z, r0.z -mad.f32 r0.y, c2.w, r2.w, r0.y -mad.f32 r1.z, r4.x, r4.x, r1.z -max.f r4.y, r0.w, c20.x -cmps.f.lt r4.w, (neg)r0.w, c19.x -mad.f32 r0.w, c3.w, r3.x, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r1.z, r4.y -mad.f32 r0.y, r1.y, r1.y, r0.y -mul.f r4.y, c16.z, r3.w -mul.f r5.x, c16.y, r3.z -mul.f r5.y, c16.x, r3.y -mul.f r5.z, r2.y, c0.z -mul.f r5.w, r2.y, c0.y -mul.f r2.y, r2.y, c0.x +mul.f r0.z, r4.z, r4.z +mad.f32 r0.w, c3.w, r3.w, r0.y +mad.f32 r0.y, r2.z, r2.z, r0.z +mad.f32 r0.z, c10.z, r0.x, r4.x +mad.f32 r0.y, r2.w, r2.w, r0.y +mad.f32 r4.x, c2.z, r3.z, r5.x +mad.f32 r4.w, c2.y, r3.z, r5.y +mul.f r3.x, r3.x, c0.x +max.f r1.w, r1.w, c19.x +mul.f r5.x, c16.z, r1.z +mul.f r5.y, c16.y, r1.y rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r6.x, c8.z, r3.w, c9.z -mad.f32 r6.y, c8.y, r3.z, c9.y -mad.f32 r6.z, c8.x, r3.y, c9.x -mul.f r0.z, r0.z, r0.y -mul.f r4.x, r4.x, r0.y -mul.f r0.y, r1.y, r0.y -add.f r1.y, r4.y, r6.x -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.z, r3.w -mad.f32 r0.z, r4.z, r4.x, r0.z -add.f r3.w, r5.x, r6.y -add.f r4.x, r5.y, r6.z -mad.f32 r4.y, c1.z, r2.z, r5.z -mov.f32f32 r0.z, r0.z -mad.f32 r1.x, r1.z, r1.x, r1.y -mad.f32 r0.x, r0.x, r0.y, r0.z -mul.f r0.y, c17.y, r3.z -mul.f r0.z, c17.x, r3.y -mad.f32 r1.y, c2.z, r2.w, r4.y +(ss)mov.f32f32 r5.z, r0.y +(ss)mul.f r0.y, r2.w, r0.y +max.f r5.w, r0.z, c20.x +mul.f r2.w, c16.x, r1.x +mul.f r4.z, r4.z, r5.z +mul.f r2.z, r2.z, r5.z +mov.f32f32 r5.z, r5.w +mad.f32 r6.x, c8.y, r1.y, c9.y +mul.f r2.y, r2.y, r4.z +mad.f32 r4.z, c8.z, r1.z, c9.z +mad.f32 r2.y, r4.y, r2.z, r2.y +add.f r2.z, r5.y, r6.x +mad.f32 r0.x, r0.x, r0.y, r2.y +add.f r2.y, r5.x, r4.z +mul.f r0.y, c17.y, r1.y +mad.f32 r1.y, c8.x, r1.x, c9.x max.f r0.x, r0.x, c20.x -mad.f32 r3.y, r1.z, r0.y, r3.w -mad.f32 r1.z, r1.z, r0.z, r4.x -mad.f32 r0.z, c3.z, r3.x, r1.y -mad.f32 r0.y, c1.y, r2.z, r5.w -mad.f32 r1.y, c1.x, r2.z, r2.y -mov.f32f32 r2.y, c7.x +mul.f r1.z, c17.z, r1.z +mad.f32 r4.y, r5.z, r0.y, r2.z +add.f r1.y, r2.w, r1.y +cmps.f.lt r4.z, (neg)r0.z, c19.x +mad.f32 r0.z, c3.z, r3.w, r4.x +mad.f32 r0.y, c3.y, r3.w, r4.w log2 r0.x, r0.x -(ss)mov.f32f32 r2.z, r0.x -(ss)mad.f32 r0.x, c2.y, r2.w, r0.y -mad.f32 r1.y, c2.x, r2.w, r1.y -min.f r2.y, r2.y, c20.z -mad.f32 r0.y, c3.y, r3.x, r0.x -mad.f32 r0.x, c3.x, r3.x, r1.y +mov.f32f32 r2.z, c7.x +mad.f32 r1.z, r5.z, r1.z, r2.y +mul.f r1.x, c17.x, r1.x +mad.f32 r2.y, c1.x, r3.y, r3.x +min.f r2.z, r2.z, c20.z +mad.f32 r2.y, c2.x, r3.z, r2.y +min.f r1.w, r1.w, c19.y mov.f32f32 r2.w, c19.y -mul.f r1.y, r2.y, r2.z +(ss)mul.f r3.x, r2.z, r0.x +mad.f32 r1.x, r5.w, r1.x, r1.y +(ss)mad.f32 r0.x, c3.x, r3.w, r2.y mov.f32f32 r2.z, c19.x mov.f32f32 r2.y, c19.x -nop -mov.f32f32 r1.y, r1.y -(rpt5)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r4.w, c20.x -(rpt2)nop -mov.f32f32 r1.y, r1.y +(rpt1)nop +exp2 r1.y, r3.x +(ss)sel.b32 r1.y, r1.y, r4.z, c20.x (rpt2)nop -mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.x, c18.z, r1.y, r1.x -mad.f32 r3.x, c18.y, r1.y, r3.y -mad.f32 r1.y, c18.x, r1.y, r1.z -nop +(ss)mov.f32f32 r3.x, r1.y +mad.f32 r1.x, c18.x, r1.y, r1.x +(rpt1)nop +mov.f32f32 r1.y, r3.x max.f r1.x, r1.x, c19.x -max.f r3.x, r3.x, c19.x -max.f r3.y, r1.y, c19.x +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r1.z +mad.f32 r1.y, c18.y, r1.y, r4.y +min.f r1.x, r1.x, c19.y nop -min.f r1.z, r1.x, c19.y -min.f r1.y, r3.x, c19.y -min.f r1.x, r3.y, c19.y +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) -; VERT: inputs: r2.y (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.y (0:0,cm=f,il=16,b=0) -; VERT: 141 instructions, 0 half, 7 full +; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) +; VERT: 119 instructions, 0 half, 7 full diff --git a/reference/stk-mines/stk-mines-14.asm b/reference/stk-mines/stk-mines-14.asm index e16d8e5..69665d0 100644 --- a/reference/stk-mines/stk-mines-14.asm +++ b/reference/stk-mines/stk-mines-14.asm @@ -29,171 +29,161 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c25.x) 0x00000000, 0x3f800000, 0x3f000000, 0x00000000 +@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.w, r4.x, c12.x mul.f r1.x, r0.x, c4.x mad.f32 r0.w, c13.x, r4.y, r0.w mad.f32 r1.x, c4.y, r0.y, r1.x mad.f32 r0.w, c14.x, r4.z, r0.w -mov.f32f32 r1.y, r3.x -mad.f32 r0.w, c15.x, r4.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r4.x, c12.z -mov.f32f32 r1.y, r1.y -mul.f r2.x, r0.w, r0.w -mul.f r1.w, r4.x, c12.y mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r1.w, c13.y, r4.y, r1.w -max.f r1.y, r1.y, c25.x -mad.f32 r1.w, c14.y, r4.z, r1.w -mul.f r3.x, r1.x, c10.x -mad.f32 r3.y, c15.y, r4.w, r1.w -mul.f r3.z, r0.x, c5.x -min.f r1.w, r1.y, c25.y -mad.f32 r1.y, c13.z, r4.y, r1.z -mad.f32 r1.z, r3.y, r3.y, r2.x -mad.f32 r2.x, c5.y, r0.y, r3.z +mad.f32 r0.w, c15.x, r4.w, r0.w +mul.f r1.y, r4.x, c12.z +mul.f r1.z, r4.x, c12.y +mad.f32 r1.y, c13.z, r4.y, r1.y +mul.f r1.w, r0.w, r0.w +mad.f32 r1.z, c13.y, r4.y, r1.z +mul.f r2.x, r1.x, c10.x +mad.f32 r1.z, c14.y, r4.z, r1.z +mul.f r3.y, r0.x, c5.x +mad.f32 r1.z, c15.y, r4.w, r1.z +mad.f32 r3.y, c5.y, r0.y, r3.y mad.f32 r1.y, c14.z, r4.z, r1.y -mul.f r0.x, r0.x, c6.x -mov.f32f32 r1.z, r1.z +mad.f32 r3.y, c5.z, r0.z, r3.y +mad.f32 r1.w, r1.z, r1.z, r1.w mad.f32 r1.y, c15.z, r4.w, r1.y -mov.f32f32 r2.x, r2.x -mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r4.x, c0.w -mad.f32 r1.z, r1.y, r1.y, r1.z -mad.f32 r3.z, c5.z, r0.z, r2.x +mul.f r3.z, r4.x, c0.w +mul.f r3.w, r4.x, c0.z +mul.f r5.x, r4.x, c0.y +mad.f32 r1.w, r1.y, r1.y, r1.w +mad.f32 r5.y, c10.y, r3.y, r2.x +mul.f r0.x, r0.x, c6.x absneg.f r2.x, (abs)r1.y -mov.f32f32 r0.x, r0.x -(rpt2)nop -rsq r1.z, (abs)r1.z -(ss)mov.f32f32 r1.z, r1.z -mad.f32 r3.x, c10.y, r3.z, r3.x +mad.f32 r3.z, c1.w, r4.y, r3.z +mad.f32 r3.w, c1.z, r4.y, r3.w +mad.f32 r5.x, c1.y, r4.y, r5.x +rsq r1.w, (abs)r1.w +(ss)mov.f32f32 r6.x, r1.w +mul.f r1.y, r1.y, r1.w +mad.f32 r0.x, c6.y, r0.y, r0.x +mad.f32 r0.y, c2.w, r4.z, r3.z +(ss)mul.f r1.w, r0.w, r6.x +mul.f r1.z, r1.z, r6.x +(rpt1)nop +add.f r3.z, c10.x, (neg)r1.w +mul.f r0.w, r1.x, r1.w +add.f r6.x, c10.y, (neg)r1.z +add.f r6.y, c10.z, (neg)r1.y +mul.f r6.z, r3.z, r3.z +mad.f32 r6.w, r3.y, r1.z, r0.w +mad.f32 r0.w, r6.x, r6.x, r6.z mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.y, c1.w, r4.y, r0.y -mul.f r0.z, r0.w, r1.z -mul.f r3.y, r3.y, r1.z -mul.f r1.y, r1.y, r1.z -mov.f32f32 r0.w, r3.x -add.f r1.z, c10.x, (neg)r0.z -mul.f r3.x, r1.x, r0.z -add.f r3.w, c10.y, (neg)r3.y -add.f r5.x, c10.z, (neg)r1.y -mul.f r5.y, r1.z, r1.z -mad.f32 r3.x, r3.z, r3.y, r3.x -mad.f32 r5.y, r3.w, r3.w, r5.y -mad.f32 r0.w, c10.z, r0.x, r0.w -mad.f32 r0.y, c2.w, r4.z, r0.y -mul.f r6.x, r4.x, c0.z -mov.f32f32 r5.y, r5.y -mov.f32f32 r3.x, r3.x -mad.f32 r5.y, r5.x, r5.x, r5.y -mad.f32 r6.y, r0.x, r1.y, r3.x -mad.f32 r6.z, r0.x, r1.y, r3.x -mad.f32 r3.x, r0.x, r1.y, r3.x -max.f r6.w, r0.w, c26.x -cmps.f.lt r7.x, (neg)r0.w, c25.x +mad.f32 r0.z, r6.y, r6.y, r0.w +mov.f32f32 r6.z, r6.w mad.f32 r0.w, c3.w, r4.w, r0.y -rsq r0.y, (abs)r5.y -(ss)mov.f32f32 r0.y, r0.y -(ss)add.f r5.y, r6.y, r6.y -add.f r6.y, r6.z, r6.z -add.f r3.x, r3.x, r3.x -mul.f r1.z, r1.z, r0.y -mad.f32 r0.z, (neg)r5.y, r1.x, r0.z -mul.f r3.w, r3.w, r0.y -mul.f r0.y, r5.x, r0.y -mul.f r1.x, r1.x, r1.z -add.f r1.z, r0.z, c25.x -mad.f32 r1.x, r3.z, r3.w, r1.x -mad.f32 r3.y, (neg)r6.y, r3.z, r3.y -mad.f32 r1.y, (neg)r3.x, r0.x, r1.y -nop -mov.f32f32 r1.x, r1.x -mul.f r1.z, r1.z, r1.z -mad.f32 r0.x, r0.x, r0.y, r1.x -add.f r0.y, r3.y, c25.x -add.f r1.x, r1.y, c25.y -mov.f32f32 r1.y, r6.w -max.f r0.x, r0.x, c26.x -mad.f32 r0.y, r0.y, r0.y, r1.z -mul.f r1.z, c16.z, r2.w -mul.f r3.x, c16.y, r2.z -mul.f r3.z, c16.x, r2.y -mad.f32 r3.w, c1.z, r4.y, r6.x -mul.f r5.x, r4.x, c0.y -log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r5.y, c7.x -mov.f32f32 r0.y, r0.y -mad.f32 r6.x, c8.z, r2.w, c9.z +mad.f32 r0.y, c2.z, r4.z, r3.w +mad.f32 r3.w, c2.y, r4.z, r5.x +mul.f r4.x, r4.x, c0.x +max.f r3.x, r3.x, c25.x +rsq r0.z, (abs)r0.z +(ss)mov.f32f32 r5.x, r0.z +mad.f32 r7.x, r0.x, r1.y, r6.z +mad.f32 r6.z, r0.x, r1.y, r6.z +mad.f32 r6.w, r0.x, r1.y, r6.w +mul.f r3.z, r3.z, r5.x +add.f r7.x, r7.x, r7.x +add.f r6.z, r6.z, r6.z +mul.f r5.x, r6.x, r5.x +mul.f r3.z, r1.x, r3.z +mad.f32 r1.x, (neg)r7.x, r1.x, r1.w +mad.f32 r1.y, (neg)r6.z, r0.x, r1.y +mad.f32 r1.w, r3.y, r5.x, r3.z +(ss)mul.f r0.z, r6.y, r0.z +add.f r3.z, r1.x, c25.x +add.f r1.y, r1.y, c25.y +add.f r5.x, r6.w, r6.w +mad.f32 r1.w, r0.x, r0.z, r1.w +mul.f r3.z, r3.z, r3.z +mad.f32 r0.x, c10.z, r0.x, r5.y +mad.f32 r0.z, c3.z, r4.w, r0.y +max.f r1.w, r1.w, c26.x +mad.f32 r3.y, (neg)r5.x, r3.y, r1.z +max.f r1.z, r0.x, c26.x +mul.f r5.x, c16.x, r2.y +cmps.f.lt r0.x, (neg)r0.x, c25.x +mad.f32 r0.y, c3.y, r4.w, r3.w +mad.f32 r3.w, c1.x, r4.y, r4.x +log2 r1.w, r1.w +mov.f32f32 r4.x, c7.x +add.f r4.y, r3.y, c25.x +mov.f32f32 r5.y, r1.z +mul.f r6.x, c16.y, r2.z +min.f r4.x, r4.x, c26.z +mad.f32 r3.z, r4.y, r4.y, r3.z +mul.f r4.y, c16.z, r2.w mad.f32 r6.y, c8.y, r2.z, c9.y -min.f r5.y, r5.y, c26.z -mad.f32 r0.y, r1.x, r1.x, r0.y -add.f r1.x, r1.z, r6.x -add.f r1.z, r3.x, r6.y -mul.f r0.x, r5.y, r0.x -mov.f32f32 r0.y, r0.y +(ss)mul.f r4.x, r4.x, r1.w +mad.f32 r1.y, r1.y, r1.y, r3.z +mad.f32 r3.z, c8.z, r2.w, c9.z +add.f r6.x, r6.x, r6.y +mad.f32 r6.y, c8.x, r2.y, c9.x +mad.f32 r4.z, c2.x, r4.z, r3.w +(ss)min.f r1.w, r3.x, c25.y +exp2 r3.x, r4.x +(ss)sel.b32 r0.x, r3.x, r0.x, c26.x +add.f r3.x, r5.x, r6.y +mul.f r2.y, c17.x, r2.y +rsq r1.y, (abs)r1.y +(ss)mul.f r3.w, r1.y, c25.z +(ss)mov.f32f32 r4.x, r0.x +rsq r1.y, (abs)r1.y +(ss)mul.f r5.x, r1.y, c25.z +(ss)add.f r1.y, r4.y, r3.z +mad.f32 r1.z, r1.z, r2.y, r3.x +mov.f32f32 r2.y, r4.x mul.f r2.w, c17.z, r2.w mul.f r2.z, c17.y, r2.z -mov.f32f32 r0.x, r0.x -mad.f32 r3.x, c8.x, r2.y, c9.x -mad.f32 r5.y, c2.z, r4.z, r3.w -mad.f32 r3.w, c1.y, r4.y, r5.x -mul.f r4.x, r4.x, c0.x -add.f r3.x, r3.z, r3.x -mad.f32 r5.x, c2.y, r4.z, r3.w -exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -rsq r0.y, (abs)r0.y -(ss)mul.f r3.z, r0.y, c25.z -(ss)rsq r0.y, (abs)r0.y -(ss)mul.f r0.y, r0.y, c25.z -mad.f32 r1.x, r1.y, r2.w, r1.x -sel.b32 r0.x, r0.x, r7.x, c26.x -mad.f32 r0.z, r0.z, r3.z, c25.z -mad.f32 r0.y, r3.y, r0.y, c25.z -mad.f32 r1.z, r1.y, r2.z, r1.z -mov.f32f32 r0.x, r0.x -mul.f r2.z, r0.z, c21.w -mul.f r2.w, r0.z, c21.z -mul.f r3.y, r0.z, c21.y -mov.f32f32 r0.x, r0.x -mul.f r2.y, c17.x, r2.y -mad.f32 r2.z, c22.w, r0.y, r2.z -mad.f32 r2.w, c22.z, r0.y, r2.w -mad.f32 r1.x, c18.z, r0.x, r1.x -mad.f32 r1.z, c18.y, r0.x, r1.z -mad.f32 r1.y, r1.y, r2.y, r3.x -mov.f32f32 r2.y, r5.z +mad.f32 r0.x, c18.x, r0.x, r1.z +mad.f32 r3.x, r1.x, r3.w, c25.z +mad.f32 r1.x, r5.y, r2.w, r1.y +mad.f32 r1.y, r5.y, r2.z, r6.x +mad.f32 r1.x, c18.z, r2.y, r1.x +mad.f32 r1.y, c18.y, r2.y, r1.y +max.f r0.x, r0.x, c25.x +nop max.f r1.x, r1.x, c25.x -max.f r3.x, r1.z, c25.x -mad.f32 r0.x, c18.x, r0.x, r1.y -mad.f32 r2.z, c23.w, r2.y, r2.z +max.f r1.y, r1.y, c25.x +(rpt1)nop min.f r1.z, r1.x, c25.y -min.f r1.y, r3.x, c25.y -max.f r0.x, r0.x, c25.x -mov.f32f32 r3.x, r5.w -mad.f32 r2.w, c23.z, r2.y, r2.w -mad.f32 r3.y, c22.y, r0.y, r3.y +min.f r1.y, r1.y, c25.y min.f r1.x, r0.x, c25.y -mad.f32 r3.w, c24.w, r3.x, r2.z -mad.f32 r3.z, c24.z, r3.x, r2.w -mad.f32 r0.x, c23.y, r2.y, r3.y -mul.f r0.z, r0.z, c21.x -mad.f32 r3.y, c24.y, r3.x, r0.x -mad.f32 r0.x, c22.x, r0.y, r0.z -mad.f32 r0.z, c3.z, r4.w, r5.y -mad.f32 r0.x, c23.x, r2.y, r0.x -mad.f32 r0.y, c3.y, r4.w, r5.x -mad.f32 r3.x, c24.x, r3.x, r0.x -mad.f32 r0.x, c1.x, r4.y, r4.x +mul.f r0.x, r3.x, c21.w +mad.f32 r2.y, r3.y, r5.x, c25.z +mul.f r2.z, r3.x, c21.z +mul.f r2.w, r3.x, c21.y +mul.f r3.x, r3.x, c21.x +mad.f32 r0.x, c22.w, r2.y, r0.x +mov.f32f32 r3.y, r5.z +mad.f32 r2.z, c22.z, r2.y, r2.z +mad.f32 r2.w, c22.y, r2.y, r2.w +mad.f32 r2.y, c22.x, r2.y, r3.x +mad.f32 r0.x, c23.w, r3.y, r0.x +mov.f32f32 r3.x, r5.w +mad.f32 r2.z, c23.z, r3.y, r2.z +mad.f32 r2.w, c23.y, r3.y, r2.w +mad.f32 r2.y, c23.x, r5.z, r2.y +mad.f32 r3.w, c24.w, r3.x, r0.x +mad.f32 r3.z, c24.z, r3.x, r2.z +mad.f32 r3.y, c24.y, r3.x, r2.w +mad.f32 r3.x, c24.x, r5.w, r2.y +mad.f32 r0.x, c3.x, r4.w, r4.z mov.f32f32 r2.w, c25.y -mad.f32 r0.x, c2.x, r4.z, r0.x mov.f32f32 r2.z, c25.x -mad.f32 r0.x, c3.x, r4.w, r0.x mov.f32f32 r2.y, c25.x end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0) ; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.y (0:0,cm=f,il=16,b=0) r5.x (0:0,cm=c,il=20,b=0) -; VERT: 165 instructions, 0 half, 8 full +; VERT: 152 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-15.asm b/reference/stk-mines/stk-mines-15.asm index e1b8ffe..27f5178 100644 --- a/reference/stk-mines/stk-mines-15.asm +++ b/reference/stk-mines/stk-mines-15.asm @@ -1,20 +1,20 @@ ; options: ; VERT: new compiler -@in(r2.y) in0 -@in(r2.z) in1 -@in(r2.w) in2 -@in(r3.x) in3 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r4.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r3.y) in8 -@in(r3.z) in9 -@in(r3.w) in10 -@in(r4.x) in11 -@in(r4.y) in12 -@in(r4.z) in13 -@in(r4.w) in14 -@in(r5.x) in15 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 +@in(r3.x) in12 +@in(r3.y) in13 +@in(r3.z) in14 +@in(r3.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,139 +31,121 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r0.w, r2.y, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.z, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.w, r0.w -mov.f32f32 r1.y, r4.x -mad.f32 r0.w, c15.x, r3.x, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r2.y, c12.z -mov.f32f32 r1.y, r1.y -mul.f r2.x, r0.w, r0.w -mul.f r1.w, r2.y, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r1.w, c13.y, r2.z, r1.w -max.f r1.y, r1.y, c19.x -mad.f32 r1.w, c14.y, r2.w, r1.w -mul.f r4.x, r1.x, c10.x -mad.f32 r5.y, c15.y, r3.x, r1.w -mul.f r5.z, r0.x, c5.x -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c13.z, r2.z, r1.z -mad.f32 r1.z, r5.y, r5.y, r2.x -mad.f32 r2.x, c5.y, r0.y, r5.z -mad.f32 r1.y, c14.z, r2.w, r1.y +@const(c19.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r4.x, c12.x +mul.f r2.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r4.y, r0.w +mad.f32 r2.x, c4.y, r0.y, r2.x +mad.f32 r0.w, c14.x, r4.z, r0.w +mad.f32 r2.y, c4.z, r0.z, r2.x +mad.f32 r0.w, c15.x, r4.w, r0.w +mul.f r2.x, r4.x, c12.z +mul.f r2.z, r4.x, c12.y +mad.f32 r2.x, c13.z, r4.y, r2.x +mul.f r2.w, r0.w, r0.w +mad.f32 r2.z, c13.y, r4.y, r2.z +mul.f r5.x, r2.y, c10.x +mad.f32 r2.z, c14.y, r4.z, r2.z +mul.f r5.y, r0.x, c5.x +mad.f32 r2.z, c15.y, r4.w, r2.z +mad.f32 r5.y, c5.y, r0.y, r5.y +mad.f32 r2.x, c14.z, r4.z, r2.x +mad.f32 r5.y, c5.z, r0.z, r5.y +mad.f32 r2.w, r2.z, r2.z, r2.w +mad.f32 r5.z, c15.z, r4.w, r2.x +mul.f r5.w, r4.x, c0.w +mul.f r6.x, r4.x, c0.z +mul.f r6.y, r4.x, c0.y +mad.f32 r2.w, r5.z, r5.z, r2.w +mad.f32 r5.x, c10.y, r5.y, r5.x mul.f r0.x, r0.x, c6.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.y, c15.z, r3.x, r1.y -mov.f32f32 r2.x, r2.x +absneg.f r2.x, (abs)r5.z +mad.f32 r5.w, c1.w, r4.y, r5.w +mad.f32 r6.x, c1.z, r4.y, r6.x +mad.f32 r6.y, c1.y, r4.y, r6.y +rsq r2.w, (abs)r2.w +(ss)mov.f32f32 r6.z, r2.w +(ss)mul.f r2.w, r5.z, r2.w mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r2.y, c0.w -mad.f32 r1.z, r1.y, r1.y, r1.z -mad.f32 r5.z, c5.z, r0.z, r2.x -absneg.f r2.x, (abs)r1.y -mov.f32f32 r0.x, r0.x -(rpt2)nop -rsq r1.z, (abs)r1.z -(ss)mov.f32f32 r1.z, r1.z -mad.f32 r4.x, c10.y, r5.z, r4.x +mad.f32 r0.y, c2.w, r4.z, r5.w +mul.f r0.w, r0.w, r6.z +mul.f r2.z, r2.z, r6.z +(rpt1)nop +add.f r5.z, c10.x, (neg)r0.w +add.f r2.z, c10.y, (neg)r2.z +add.f r2.w, c10.z, (neg)r2.w mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.y, c1.w, r2.z, r0.y -mul.f r0.z, r0.w, r1.z -mul.f r0.w, r5.y, r1.z -mul.f r1.y, r1.y, r1.z -mov.f32f32 r1.z, r4.x -add.f r0.z, c10.x, (neg)r0.z -add.f r4.x, c10.y, (neg)r0.w -add.f r1.y, c10.z, (neg)r1.y -mad.f32 r0.w, c10.z, r0.x, r1.z -mul.f r1.z, r0.z, r0.z -mad.f32 r0.y, c2.w, r2.w, r0.y -mad.f32 r1.z, r4.x, r4.x, r1.z -max.f r5.y, r0.w, c20.x -cmps.f.lt r5.w, (neg)r0.w, c19.x -mad.f32 r0.w, c3.w, r3.x, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r1.z, r5.y -mad.f32 r0.y, r1.y, r1.y, r0.y -mul.f r5.y, c16.z, r3.w -mul.f r6.x, c16.y, r3.z -mul.f r6.y, c16.x, r3.y -mul.f r6.z, r2.y, c0.z -mul.f r6.w, r2.y, c0.y -mul.f r2.y, r2.y, c0.x +mul.f r0.z, r5.z, r5.z +mad.f32 r0.w, c3.w, r4.w, r0.y +mad.f32 r0.y, r2.z, r2.z, r0.z +mad.f32 r0.z, c10.z, r0.x, r5.x +mad.f32 r0.y, r2.w, r2.w, r0.y +mad.f32 r5.x, c2.z, r4.z, r6.x +mad.f32 r5.w, c2.y, r4.z, r6.y +mul.f r4.x, r4.x, c0.x +max.f r1.w, r1.w, c19.x +mul.f r6.x, c16.z, r1.z +mul.f r6.y, c16.y, r1.y rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r7.x, c8.z, r3.w, c9.z -mad.f32 r7.y, c8.y, r3.z, c9.y -mad.f32 r7.z, c8.x, r3.y, c9.x -mul.f r0.z, r0.z, r0.y -mul.f r4.x, r4.x, r0.y -mul.f r0.y, r1.y, r0.y -add.f r1.y, r5.y, r7.x -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.z, r3.w -mad.f32 r0.z, r5.z, r4.x, r0.z -add.f r3.w, r6.x, r7.y -add.f r4.x, r6.y, r7.z -mad.f32 r5.y, c1.z, r2.z, r6.z -mov.f32f32 r0.z, r0.z -mad.f32 r1.x, r1.z, r1.x, r1.y -mad.f32 r0.x, r0.x, r0.y, r0.z -mul.f r0.y, c17.y, r3.z -mul.f r0.z, c17.x, r3.y -mad.f32 r1.y, c2.z, r2.w, r5.y +(ss)mov.f32f32 r6.z, r0.y +(ss)mul.f r0.y, r2.w, r0.y +max.f r6.w, r0.z, c20.x +mul.f r2.w, c16.x, r1.x +mul.f r5.z, r5.z, r6.z +mul.f r2.z, r2.z, r6.z +mov.f32f32 r6.z, r6.w +mad.f32 r7.x, c8.y, r1.y, c9.y +mul.f r2.y, r2.y, r5.z +mad.f32 r5.z, c8.z, r1.z, c9.z +mad.f32 r2.y, r5.y, r2.z, r2.y +add.f r2.z, r6.y, r7.x +mad.f32 r0.x, r0.x, r0.y, r2.y +add.f r2.y, r6.x, r5.z +mul.f r0.y, c17.y, r1.y +mad.f32 r1.y, c8.x, r1.x, c9.x max.f r0.x, r0.x, c20.x -mad.f32 r5.y, r1.z, r0.y, r3.w -mad.f32 r1.z, r1.z, r0.z, r4.x -mad.f32 r0.z, c3.z, r3.x, r1.y -mad.f32 r0.y, c1.y, r2.z, r6.w -mad.f32 r1.y, c1.x, r2.z, r2.y -mov.f32f32 r2.y, c7.x +mul.f r1.z, c17.z, r1.z +mad.f32 r5.y, r6.z, r0.y, r2.z +add.f r1.y, r2.w, r1.y +cmps.f.lt r5.z, (neg)r0.z, c19.x +mad.f32 r0.z, c3.z, r4.w, r5.x +mad.f32 r0.y, c3.y, r4.w, r5.w log2 r0.x, r0.x -(ss)mov.f32f32 r2.z, r0.x -(ss)mad.f32 r0.x, c2.y, r2.w, r0.y -mad.f32 r1.y, c2.x, r2.w, r1.y -min.f r2.y, r2.y, c20.z -mad.f32 r0.y, c3.y, r3.x, r0.x -mad.f32 r0.x, c3.x, r3.x, r1.y -mov.f32f32 r3.w, r5.x -mul.f r1.y, r2.y, r2.z -mov.f32f32 r3.z, r4.w -mov.f32f32 r3.y, r4.z -mov.f32f32 r3.x, r4.y -mov.f32f32 r1.y, r1.y +mov.f32f32 r2.z, c7.x +mad.f32 r1.z, r6.z, r1.z, r2.y +mul.f r1.x, c17.x, r1.x +mad.f32 r2.y, c1.x, r4.y, r4.x +min.f r2.z, r2.z, c20.z +mad.f32 r2.y, c2.x, r4.z, r2.y +min.f r1.w, r1.w, c19.y mov.f32f32 r2.w, c19.y +(ss)mul.f r4.x, r2.z, r0.x +mad.f32 r1.x, r6.w, r1.x, r1.y +(ss)mad.f32 r0.x, c3.x, r4.w, r2.y mov.f32f32 r2.z, c19.x mov.f32f32 r2.y, c19.x +(rpt1)nop +exp2 r1.y, r4.x +(ss)sel.b32 r1.y, r1.y, r5.z, c20.x (rpt2)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r5.w, c20.x -(rpt2)nop -mov.f32f32 r1.y, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.x, c18.z, r1.y, r1.x -mad.f32 r4.x, c18.y, r1.y, r5.y -mad.f32 r1.y, c18.x, r1.y, r1.z -nop +(ss)mov.f32f32 r4.x, r1.y +mad.f32 r1.x, c18.x, r1.y, r1.x +(rpt1)nop +mov.f32f32 r1.y, r4.x max.f r1.x, r1.x, c19.x -max.f r4.x, r4.x, c19.x -max.f r4.y, r1.y, c19.x +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r1.z +mad.f32 r1.y, c18.y, r1.y, r5.y +min.f r1.x, r1.x, c19.y nop -min.f r1.z, r1.x, c19.y -min.f r1.y, r4.x, c19.y -min.f r1.x, r4.y, c19.y +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end -nop -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0) -; VERT: inputs: r2.y (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.y (0:0,cm=f,il=16,b=0) r4.y (0:0,cm=f,il=20,b=0) -; VERT: 141 instructions, 0 half, 8 full +; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) +; VERT: 119 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-16.asm b/reference/stk-mines/stk-mines-16.asm index b80c94e..601a921 100644 --- a/reference/stk-mines/stk-mines-16.asm +++ b/reference/stk-mines/stk-mines-16.asm @@ -6,63 +6,44 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c5.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 8, r0.x -bary.f r1.x, 12, r0.x +bary.f r1.x, 9, r0.x bary.f r1.y, 11, r0.x mad.f32 r0.z, c2.x, r0.z, c2.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y +bary.f r1.z, 12, r0.x +bary.f r1.w, 13, r0.x +bary.f r2.x, 15, r0.x max.f r0.z, r0.z, c5.y -mov.f32f32 r1.z, r0.w -bary.f r0.w, 9, r0.x -mov.f32f32 r2.y, r1.x +bary.f r2.y, 3, r0.x +bary.f r2.z, 2, r0.x +bary.f r2.w, 1, r0.x min.f r0.z, r0.z, c5.x -bary.f r1.x, 13, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r1.y -add.f r1.y, c5.x, (neg)r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r0.w -bary.f r0.w, 15, r0.x -mul.f r2.w, c3.z, r1.y -mul.f r3.x, c3.y, r1.y -mul.f r1.y, c3.x, r1.y -mov.f32f32 r2.z, r1.x -mov.f32f32 r1.x, r2.w -sam.p (f32)(xyzw)r3.y, r1.z, s#1, t#1 -(ss)bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r3.x -mov.f32f32 r1.y, r1.y -bary.f r2.x, 1, r0.x -(sy)mul.f r1.z, r3.w, r1.z -mov.f32f32 r0.w, r0.w -bary.f r3.x, 0, r0.x -mul.f r2.x, r3.z, r2.x -bary.f (ei)r0.x, 3, r0.x -mov.f32f32 r2.w, r0.w -mul.f r0.y, r3.y, r3.x -(rpt1)nop -mul.f r0.x, r4.x, r0.x -(rpt1)nop -sam.p (f32)(xyzw)r2.y, r2.y, s#0, t#0 -(sy)mul.f r0.w, r2.w, r1.z -mul.f r1.z, r2.z, r2.x -mul.f r0.y, r2.y, r0.y +sam.p (f32)(xyzw)r3.x, r0.w, s#1, t#1 +(sy)(ss)mul.f r0.w, r3.w, r2.y +mul.f r1.x, r3.z, r2.z +sam.p (f32)(xyzw)r3.z, r1.z, s#0, t#0 +mul.f r1.y, r3.y, r2.w +(ss)add.f r2.x, c5.x, (neg)r0.z +(sy)mul.f r1.w, r4.y, r0.w +mul.f r0.w, r4.x, r1.x +mul.f r1.x, r3.w, r1.y +mul.f r1.y, c3.z, r2.x +mul.f r2.y, c3.y, r2.x +mad.f32 r1.z, r0.w, r0.z, r1.y +mad.f32 r1.y, r1.x, r0.z, r2.y +mul.f r0.w, c3.x, r2.x +bary.f (ei)r0.x, 0, r0.x +(rpt2)nop mul.f r0.x, r3.x, r0.x -mad.f32 r0.w, r0.w, r0.z, r1.x -mad.f32 r1.x, r1.z, r0.z, r1.w -mad.f32 r0.y, r0.y, r0.z, r1.y -nop -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.x, r0.x (rpt2)nop -mov.f32f32 r1.w, r0.x +mul.f r0.x, r3.z, r0.x +(rpt2)nop +mad.f32 r1.x, r0.x, r0.z, r0.w end +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.z (5:0,cm=f,il=16,b=1) r0.x (5:1,cm=f,il=20,b=1) -; FRAG: 60 instructions, 0 half, 5 full +; FRAG: inputs: r1.z (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.z (5:0,cm=f,il=16,b=1) r1.y (5:1,cm=f,il=20,b=1) +; FRAG: 41 instructions, 0 half, 5 full diff --git a/reference/stk-mines/stk-mines-17.asm b/reference/stk-mines/stk-mines-17.asm index 93bdddf..37655fe 100644 --- a/reference/stk-mines/stk-mines-17.asm +++ b/reference/stk-mines/stk-mines-17.asm @@ -1,24 +1,24 @@ ; options: ; VERT: new compiler -@in(r2.y) in0 -@in(r2.z) in1 -@in(r2.w) in2 -@in(r3.x) in3 +@in(r5.x) in0 +@in(r5.y) in1 +@in(r5.z) in2 +@in(r5.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r3.y) in8 -@in(r3.z) in9 -@in(r3.w) in10 -@in(r4.x) in11 -@in(r5.x) in12 -@in(r5.y) in13 -@in(r5.z) in14 -@in(r5.w) in15 -@in(r6.x) in16 -@in(r6.y) in17 -@in(r6.z) in18 -@in(r6.w) in19 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 +@in(r3.x) in12 +@in(r3.y) in13 +@in(r3.z) in14 +@in(r3.w) in15 +@in(r4.x) in16 +@in(r4.y) in17 +@in(r4.z) in18 +@in(r4.w) in19 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -39,139 +39,121 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 -(sy)(ss)mul.f r0.w, r2.y, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.z, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.w, r0.w -mov.f32f32 r1.y, r4.x -mad.f32 r0.w, c15.x, r3.x, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r2.y, c12.z -mov.f32f32 r1.y, r1.y -mul.f r2.x, r0.w, r0.w -mul.f r1.w, r2.y, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r1.w, c13.y, r2.z, r1.w -max.f r1.y, r1.y, c19.x -mad.f32 r1.w, c14.y, r2.w, r1.w -mul.f r4.x, r1.x, c10.x -mad.f32 r4.y, c15.y, r3.x, r1.w -mul.f r4.z, r0.x, c5.x -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c13.z, r2.z, r1.z -mad.f32 r1.z, r4.y, r4.y, r2.x -mad.f32 r2.x, c5.y, r0.y, r4.z -mad.f32 r1.y, c14.z, r2.w, r1.y +@const(c19.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r5.x, c12.x +mul.f r2.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r5.y, r0.w +mad.f32 r2.x, c4.y, r0.y, r2.x +mad.f32 r0.w, c14.x, r5.z, r0.w +mad.f32 r2.y, c4.z, r0.z, r2.x +mad.f32 r0.w, c15.x, r5.w, r0.w +mul.f r2.x, r5.x, c12.z +mul.f r2.z, r5.x, c12.y +mad.f32 r2.x, c13.z, r5.y, r2.x +mul.f r2.w, r0.w, r0.w +mad.f32 r2.z, c13.y, r5.y, r2.z +mul.f r6.x, r2.y, c10.x +mad.f32 r2.z, c14.y, r5.z, r2.z +mul.f r6.y, r0.x, c5.x +mad.f32 r2.z, c15.y, r5.w, r2.z +mad.f32 r6.y, c5.y, r0.y, r6.y +mad.f32 r2.x, c14.z, r5.z, r2.x +mad.f32 r6.y, c5.z, r0.z, r6.y +mad.f32 r2.w, r2.z, r2.z, r2.w +mad.f32 r6.z, c15.z, r5.w, r2.x +mul.f r6.w, r5.x, c0.w +mul.f r7.x, r5.x, c0.z +mul.f r7.y, r5.x, c0.y +mad.f32 r2.w, r6.z, r6.z, r2.w +mad.f32 r6.x, c10.y, r6.y, r6.x mul.f r0.x, r0.x, c6.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.y, c15.z, r3.x, r1.y -mov.f32f32 r2.x, r2.x +absneg.f r2.x, (abs)r6.z +mad.f32 r6.w, c1.w, r5.y, r6.w +mad.f32 r7.x, c1.z, r5.y, r7.x +mad.f32 r7.y, c1.y, r5.y, r7.y +rsq r2.w, (abs)r2.w +(ss)mov.f32f32 r7.z, r2.w +(ss)mul.f r2.w, r6.z, r2.w mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r2.y, c0.w -mad.f32 r1.z, r1.y, r1.y, r1.z -mad.f32 r4.z, c5.z, r0.z, r2.x -absneg.f r2.x, (abs)r1.y -mov.f32f32 r0.x, r0.x -(rpt2)nop -rsq r1.z, (abs)r1.z -(ss)mov.f32f32 r1.z, r1.z -mad.f32 r4.x, c10.y, r4.z, r4.x +mad.f32 r0.y, c2.w, r5.z, r6.w +mul.f r0.w, r0.w, r7.z +mul.f r2.z, r2.z, r7.z +(rpt1)nop +add.f r6.z, c10.x, (neg)r0.w +add.f r2.z, c10.y, (neg)r2.z +add.f r2.w, c10.z, (neg)r2.w mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.y, c1.w, r2.z, r0.y -mul.f r0.z, r0.w, r1.z -mul.f r0.w, r4.y, r1.z -mul.f r1.y, r1.y, r1.z -mov.f32f32 r1.z, r4.x -add.f r0.z, c10.x, (neg)r0.z -add.f r4.x, c10.y, (neg)r0.w -add.f r1.y, c10.z, (neg)r1.y -mad.f32 r0.w, c10.z, r0.x, r1.z -mul.f r1.z, r0.z, r0.z -mad.f32 r0.y, c2.w, r2.w, r0.y -mad.f32 r1.z, r4.x, r4.x, r1.z -max.f r4.y, r0.w, c20.x -cmps.f.lt r7.x, (neg)r0.w, c19.x -mad.f32 r0.w, c3.w, r3.x, r0.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r1.z, r4.y -mad.f32 r0.y, r1.y, r1.y, r0.y -mul.f r4.y, c16.z, r3.w -mul.f r4.w, c16.y, r3.z -mul.f r7.y, c16.x, r3.y -mul.f r7.z, r2.y, c0.z -mul.f r7.w, r2.y, c0.y -mul.f r2.y, r2.y, c0.x +mul.f r0.z, r6.z, r6.z +mad.f32 r0.w, c3.w, r5.w, r0.y +mad.f32 r0.y, r2.z, r2.z, r0.z +mad.f32 r0.z, c10.z, r0.x, r6.x +mad.f32 r0.y, r2.w, r2.w, r0.y +mad.f32 r6.x, c2.z, r5.z, r7.x +mad.f32 r6.w, c2.y, r5.z, r7.y +mul.f r5.x, r5.x, c0.x +max.f r1.w, r1.w, c19.x +mul.f r7.x, c16.z, r1.z +mul.f r7.y, c16.y, r1.y rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r8.x, c8.z, r3.w, c9.z -mad.f32 r8.y, c8.y, r3.z, c9.y -mad.f32 r8.z, c8.x, r3.y, c9.x -mul.f r0.z, r0.z, r0.y -mul.f r4.x, r4.x, r0.y -mul.f r0.y, r1.y, r0.y -add.f r1.y, r4.y, r8.x -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.z, r3.w -mad.f32 r0.z, r4.z, r4.x, r0.z -add.f r3.w, r4.w, r8.y -add.f r4.x, r7.y, r8.z -mad.f32 r4.y, c1.z, r2.z, r7.z -mov.f32f32 r0.z, r0.z -mad.f32 r1.x, r1.z, r1.x, r1.y -mad.f32 r0.x, r0.x, r0.y, r0.z -mul.f r0.y, c17.y, r3.z -mul.f r0.z, c17.x, r3.y -mad.f32 r1.y, c2.z, r2.w, r4.y +(ss)mov.f32f32 r7.z, r0.y +(ss)mul.f r0.y, r2.w, r0.y +max.f r7.w, r0.z, c20.x +mul.f r2.w, c16.x, r1.x +mul.f r6.z, r6.z, r7.z +mul.f r2.z, r2.z, r7.z +mov.f32f32 r7.z, r7.w +mad.f32 r8.x, c8.y, r1.y, c9.y +mul.f r2.y, r2.y, r6.z +mad.f32 r6.z, c8.z, r1.z, c9.z +mad.f32 r2.y, r6.y, r2.z, r2.y +add.f r2.z, r7.y, r8.x +mad.f32 r0.x, r0.x, r0.y, r2.y +add.f r2.y, r7.x, r6.z +mul.f r0.y, c17.y, r1.y +mad.f32 r1.y, c8.x, r1.x, c9.x max.f r0.x, r0.x, c20.x -mad.f32 r7.y, r1.z, r0.y, r3.w -mad.f32 r1.z, r1.z, r0.z, r4.x -mad.f32 r0.z, c3.z, r3.x, r1.y -mad.f32 r0.y, c1.y, r2.z, r7.w -mad.f32 r1.y, c1.x, r2.z, r2.y -mov.f32f32 r2.y, c7.x +mul.f r1.z, c17.z, r1.z +mad.f32 r6.y, r7.z, r0.y, r2.z +add.f r1.y, r2.w, r1.y +cmps.f.lt r6.z, (neg)r0.z, c19.x +mad.f32 r0.z, c3.z, r5.w, r6.x +mad.f32 r0.y, c3.y, r5.w, r6.w log2 r0.x, r0.x -(ss)mov.f32f32 r2.z, r0.x -(ss)mad.f32 r0.x, c2.y, r2.w, r0.y -mad.f32 r1.y, c2.x, r2.w, r1.y -min.f r2.y, r2.y, c20.z -mad.f32 r0.y, c3.y, r3.x, r0.x -mad.f32 r0.x, c3.x, r3.x, r1.y -mov.f32f32 r4.w, r6.w -mul.f r1.y, r2.y, r2.z -mov.f32f32 r4.z, r6.z -mov.f32f32 r4.y, r6.y -mov.f32f32 r4.x, r6.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r3.w, r5.w -mov.f32f32 r3.z, r5.z -mov.f32f32 r3.y, r5.y -mov.f32f32 r3.x, r5.x +mov.f32f32 r2.z, c7.x +mad.f32 r1.z, r7.z, r1.z, r2.y +mul.f r1.x, c17.x, r1.x +mad.f32 r2.y, c1.x, r5.y, r5.x +min.f r2.z, r2.z, c20.z +mad.f32 r2.y, c2.x, r5.z, r2.y +min.f r1.w, r1.w, c19.y mov.f32f32 r2.w, c19.y +(ss)mul.f r5.x, r2.z, r0.x +mad.f32 r1.x, r7.w, r1.x, r1.y +(ss)mad.f32 r0.x, c3.x, r5.w, r2.y mov.f32f32 r2.z, c19.x -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y mov.f32f32 r2.y, c19.x (rpt1)nop -sel.b32 r1.y, r1.y, r7.x, c20.x -(rpt2)nop -mov.f32f32 r1.y, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +exp2 r1.y, r5.x +(ss)sel.b32 r1.y, r1.y, r6.z, c20.x (rpt2)nop -mad.f32 r1.x, c18.z, r1.y, r1.x -mad.f32 r5.x, c18.y, r1.y, r7.y -mad.f32 r1.y, c18.x, r1.y, r1.z -nop +(ss)mov.f32f32 r5.x, r1.y +mad.f32 r1.x, c18.x, r1.y, r1.x +(rpt1)nop +mov.f32f32 r1.y, r5.x max.f r1.x, r1.x, c19.x -max.f r5.x, r5.x, c19.x -max.f r5.y, r1.y, c19.x +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r1.z +mad.f32 r1.y, c18.y, r1.y, r6.y +min.f r1.x, r1.x, c19.y nop -min.f r1.z, r1.x, c19.y -min.f r1.y, r5.x, c19.y -min.f r1.x, r5.y, c19.y +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0) r4.x (5:1) -; VERT: inputs: r2.y (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.y (0:0,cm=f,il=16,b=0) r5.x (0:0,cm=f,il=20,b=0) r6.x (0:0,cm=f,il=24,b=0) -; VERT: 141 instructions, 0 half, 9 full +; VERT: inputs: r5.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) r4.x (0:0,cm=f,il=24,b=0) +; VERT: 119 instructions, 0 half, 9 full diff --git a/reference/stk-mines/stk-mines-18.asm b/reference/stk-mines/stk-mines-18.asm index f978c20..5b1d566 100644 --- a/reference/stk-mines/stk-mines-18.asm +++ b/reference/stk-mines/stk-mines-18.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 -@in(r3.w) in3 +@in(r2.x) in0 +@in(r2.y) in1 +@in(r2.z) in2 +@in(r2.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r2.x) in8 -@in(r2.y) in9 -@in(r2.z) in10 -@in(r2.w) in11 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r3.w) in11 @in(r4.z) in14 @in(r4.w) in15 @out(r0.x) out0 @@ -25,171 +25,157 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r0.w, r3.x, c12.x +@const(c25.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000 +@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r2.x, c12.x mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r3.y, r0.w +mad.f32 r0.w, c13.x, r2.y, r0.w mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r3.z, r0.w -mov.f32f32 r1.y, r2.w -mad.f32 r0.w, c15.x, r3.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x -mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r2.w, r3.x, c12.y +mad.f32 r0.w, c14.x, r2.z, r0.w mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r2.w, c13.y, r3.y, r2.w -mov.f32f32 r1.y, r1.y -mad.f32 r2.w, c14.y, r3.z, r2.w -mul.f r4.x, r1.x, c10.x -mad.f32 r2.w, c15.y, r3.w, r2.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c25.y +mad.f32 r1.y, c15.x, r2.w, r0.w +mul.f r0.w, r2.x, c12.y +mul.f r1.z, r2.x, c12.z +mul.f r1.w, r2.x, c0.w +mul.f r4.x, r1.y, r1.y +mad.f32 r0.w, c13.y, r2.y, r0.w +mul.f r4.y, r1.x, c10.x +mad.f32 r0.w, c14.y, r2.z, r0.w +mul.f r5.x, r0.x, c5.x +mad.f32 r5.y, c15.y, r2.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r5.x +mad.f32 r1.z, c13.z, r2.y, r1.z +mad.f32 r1.w, c1.w, r2.y, r1.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r5.x, c5.z, r0.z, r0.w +mad.f32 r1.z, c15.z, r2.w, r1.z +mad.f32 r0.w, c2.w, r2.z, r1.w +mul.f r1.w, r2.x, c0.z +mul.f r5.z, r2.x, c0.y +mad.f32 r4.x, r1.z, r1.z, r4.x +mad.f32 r4.y, c10.y, r5.x, r4.y +mul.f r0.x, r0.x, c6.x +mad.f32 r0.w, c3.w, r2.w, r0.w +mad.f32 r1.w, c1.z, r2.y, r1.w +mad.f32 r5.z, c1.y, r2.y, r5.z +mul.f r2.x, r2.x, c0.x +rsq r4.x, (abs)r4.x +(ss)mov.f32f32 r5.w, r4.x +mul.f r1.z, r1.z, r4.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r2.w, r2.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c25.z -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r3.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r3.y, r1.z -mad.f32 r4.x, c10.y, r1.y, r4.x -mad.f32 r1.z, c14.z, r3.z, r1.z -mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r3.w, r1.z -mov.f32f32 r1.z, r4.x -mul.f r4.x, r3.x, c0.w -mul.f r4.y, r3.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r4.x, c1.w, r3.y, r4.x -mad.f32 r4.y, c1.z, r3.y, r4.y -mul.f r5.x, r3.x, c0.y -mul.f r3.x, r3.x, c0.x -mul.f r5.y, c16.z, r2.z -rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r5.z, r1.z, c26.x -cmps.f.lt r1.z, (neg)r1.z, c25.y -mad.f32 r4.x, c2.w, r3.z, r4.x -mul.f r0.w, r0.w, r0.y -mul.f r2.w, r2.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r5.z, r5.z -add.f r0.z, c10.x, (neg)r0.w -mul.f r5.w, r1.x, r0.w -add.f r6.x, c10.y, (neg)r2.w -add.f r6.y, c10.z, (neg)r0.y -mul.f r6.z, r0.z, r0.z -mad.f32 r5.w, r1.y, r2.w, r5.w -mad.f32 r6.z, r6.x, r6.x, r6.z -mad.f32 r6.w, c8.z, r2.z, c9.z -mul.f r7.x, c16.y, r2.y -mul.f r7.y, c16.x, r2.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r5.w, r5.w -mad.f32 r6.z, r6.y, r6.y, r6.z -mad.f32 r7.z, r0.x, r0.y, r5.w -mad.f32 r7.w, r0.x, r0.y, r5.w -mad.f32 r5.w, r0.x, r0.y, r5.w -add.f r5.y, r5.y, r6.w -mad.f32 r6.w, c8.y, r2.y, c9.y -mad.f32 r8.x, c8.x, r2.x, c9.x -rsq r6.z, (abs)r6.z -(ss)mov.f32f32 r6.z, r6.z -add.f r7.z, r7.z, r7.z -add.f r7.w, r7.w, r7.w -add.f r5.w, r5.w, r5.w -mul.f r0.z, r0.z, r6.z -mad.f32 r7.z, (neg)r7.z, r1.x, r0.w -mul.f r0.w, r6.x, r6.z -mul.f r6.x, r6.y, r6.z -mul.f r0.z, r1.x, r0.z -add.f r1.x, r7.z, c25.y -mad.f32 r0.z, r1.y, r0.w, r0.z -mad.f32 r1.y, (neg)r7.w, r1.y, r2.w -mad.f32 r0.y, (neg)r5.w, r0.x, r0.y -nop -mov.f32f32 r0.z, r0.z -mul.f r0.w, r1.x, r1.x -mad.f32 r0.x, r0.x, r6.x, r0.z -add.f r0.z, r1.y, c25.y -add.f r0.y, r0.y, c25.z -mul.f r1.x, c17.z, r2.z -max.f r0.x, r0.x, c26.x -mad.f32 r0.z, r0.z, r0.z, r0.w -add.f r2.z, r7.x, r6.w -add.f r2.w, r7.y, r8.x -mad.f32 r0.w, c3.w, r3.w, r4.x -mad.f32 r4.x, c2.z, r3.z, r4.y -mad.f32 r4.y, c1.y, r3.y, r5.x -log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, c7.x -mov.f32f32 r0.z, r0.z -mad.f32 r1.x, r5.z, r1.x, r5.y -mul.f r2.y, c17.y, r2.y -min.f r5.x, r5.x, c26.z -mad.f32 r0.y, r0.y, r0.y, r0.z -mul.f r2.x, c17.x, r2.x -mad.f32 r0.z, c3.z, r3.w, r4.x -mul.f r0.x, r5.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r2.y, r5.z, r2.y, r2.z -mad.f32 r2.x, r5.z, r2.x, r2.w -mov.f32f32 r0.x, r0.x -mad.f32 r2.z, c2.y, r3.z, r4.y -mad.f32 r2.w, c1.x, r3.y, r3.x -mov.f32f32 r3.x, r4.z -mov.f32f32 r3.y, r4.w +mad.f32 r0.y, c2.z, r2.z, r1.w +mul.f r1.y, r1.y, r5.w +mul.f r1.w, r5.y, r5.w (rpt1)nop -exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -rsq r0.y, (abs)r0.y -(ss)mul.f r4.x, r0.y, c25.x -(ss)rsq r0.y, (abs)r0.y -(ss)mul.f r4.y, r0.y, c25.x -(ss)mad.f32 r0.y, c3.y, r3.w, r2.z -sel.b32 r0.x, r0.x, r1.z, c26.x -mad.f32 r4.x, r7.z, r4.x, c25.x -mad.f32 r4.y, r1.y, r4.y, c25.x -mad.f32 r3.z, c2.x, r3.z, r2.w -mov.f32f32 r0.x, r0.x -mul.f r1.y, r4.x, c21.w -mul.f r1.z, r4.x, c21.z -mul.f r2.z, r4.x, c21.y -mov.f32f32 r0.x, r0.x -mad.f32 r1.y, c22.w, r4.y, r1.y -mad.f32 r4.z, c22.z, r4.y, r1.z -mad.f32 r2.z, c22.y, r4.y, r2.z -mad.f32 r1.x, c18.z, r0.x, r1.x -mad.f32 r1.z, c18.y, r0.x, r2.y -mad.f32 r0.x, c18.x, r0.x, r2.x -mad.f32 r2.x, c23.w, r3.x, r1.y -max.f r1.x, r1.x, c25.y -max.f r1.y, r1.z, c25.y -max.f r0.x, r0.x, c25.y +(ss)add.f r4.x, c10.x, (neg)r1.y +mul.f r5.y, r1.x, r1.y +add.f r5.w, c10.y, (neg)r1.w +add.f r6.x, c10.z, (neg)r1.z +mul.f r6.y, r4.x, r4.x +mad.f32 r5.y, r5.x, r1.w, r5.y +mad.f32 r6.y, r5.w, r5.w, r6.y +mad.f32 r0.x, c6.z, r0.z, r0.x +mad.f32 r6.y, r6.x, r6.x, r6.y +mov.f32f32 r6.z, r5.y +mad.f32 r0.z, c3.z, r2.w, r0.y +mad.f32 r0.y, c2.y, r2.z, r5.z +mad.f32 r2.x, c1.x, r2.y, r2.x +max.f r2.y, r3.w, c25.y +mul.f r3.w, c16.z, r3.z +rsq r5.z, (abs)r6.y +(ss)mov.f32f32 r6.y, r5.z +mad.f32 r6.w, r0.x, r1.z, r6.z +mad.f32 r6.z, r0.x, r1.z, r6.z +mad.f32 r5.y, r0.x, r1.z, r5.y +mul.f r4.x, r4.x, r6.y +add.f r6.w, r6.w, r6.w +add.f r6.z, r6.z, r6.z +mul.f r5.w, r5.w, r6.y +mul.f r4.x, r1.x, r4.x +mad.f32 r1.x, (neg)r6.w, r1.x, r1.y +mad.f32 r1.y, (neg)r6.z, r0.x, r1.z +mad.f32 r1.z, r5.x, r5.w, r4.x +mul.f r4.x, r6.x, r5.z +add.f r5.z, r1.x, c25.y +add.f r1.y, r1.y, c25.z +add.f r5.y, r5.y, r5.y +mad.f32 r1.z, r0.x, r4.x, r1.z +mul.f r4.x, r5.z, r5.z +mad.f32 r0.x, c10.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r2.w, r0.y +max.f r1.z, r1.z, c26.x +mad.f32 r4.y, (neg)r5.y, r5.x, r1.w +max.f r5.x, r0.x, c26.x +mul.f r5.y, c16.x, r3.x +cmps.f.lt r5.z, (neg)r0.x, c25.y +mad.f32 r0.x, c2.x, r2.z, r2.x +min.f r1.w, r2.y, c25.z +log2 r1.z, r1.z +mov.f32f32 r2.x, c7.x +add.f r2.y, r4.y, c25.y +mov.f32f32 r2.z, r5.x +mul.f r5.w, c16.y, r3.y +min.f r2.x, r2.x, c26.z +mad.f32 r2.y, r2.y, r2.y, r4.x +mad.f32 r4.x, c8.z, r3.z, c9.z +mad.f32 r6.x, c8.y, r3.y, c9.y +(ss)mul.f r1.z, r2.x, r1.z +mad.f32 r1.y, r1.y, r1.y, r2.y +add.f r2.x, r3.w, r4.x +add.f r2.y, r5.w, r6.x +mad.f32 r3.w, c8.x, r3.x, c9.x +mad.f32 r0.x, c3.x, r2.w, r0.x +mov.f32f32 r4.x, r4.z +exp2 r1.z, r1.z +(ss)sel.b32 r1.z, r1.z, r5.z, c26.x +add.f r2.w, r5.y, r3.w +mul.f r3.x, c17.x, r3.x +rsq r1.y, (abs)r1.y +(ss)mul.f r3.w, r1.y, c25.x +mov.f32f32 r5.y, r1.z +(ss)rsq r1.y, (abs)r1.y +(ss)mul.f r1.y, r1.y, c25.x +mul.f r3.z, c17.z, r3.z +mad.f32 r2.w, r5.x, r3.x, r2.w +mov.f32f32 r3.x, r5.y +mul.f r3.y, c17.y, r3.y +mad.f32 r2.x, r2.z, r3.z, r2.x +mad.f32 r1.z, c18.x, r1.z, r2.w +mad.f32 r2.x, c18.z, r3.x, r2.x +mad.f32 r2.y, r2.z, r3.y, r2.y +mad.f32 r2.z, r1.x, r3.w, c25.x +mad.f32 r3.y, r4.y, r1.y, c25.x +max.f r1.x, r2.x, c25.y +mad.f32 r1.y, c18.y, r3.x, r2.y +max.f r2.x, r1.z, c25.y nop min.f r1.z, r1.x, c25.z +max.f r1.y, r1.y, c25.y +min.f r1.x, r2.x, c25.z +mul.f r2.x, r2.z, c21.w +mul.f r2.y, r2.z, c21.z min.f r1.y, r1.y, c25.z -min.f r1.x, r0.x, c25.z -mad.f32 r2.w, c24.w, r3.y, r2.x -mad.f32 r0.x, c23.z, r3.x, r4.z -mad.f32 r2.x, c23.y, r3.x, r2.z -mad.f32 r2.z, c24.z, r3.y, r0.x -mad.f32 r2.y, c24.y, r3.y, r2.x -mul.f r2.x, r4.x, c21.x -mad.f32 r0.x, c3.x, r3.w, r3.z -mad.f32 r2.x, c22.x, r4.y, r2.x -nop -mad.f32 r2.x, c23.x, r3.x, r2.x -nop -mad.f32 r2.x, c24.x, r3.y, r2.x +mad.f32 r2.x, c22.w, r3.y, r2.x +mad.f32 r2.y, c22.z, r3.y, r2.y +mad.f32 r2.x, c23.w, r4.x, r2.x +mov.f32f32 r3.x, r4.w +mad.f32 r2.y, c23.z, r4.x, r2.y +mul.f r3.z, r2.z, c21.y +mul.f r3.w, r2.z, c21.x +mad.f32 r2.w, c24.w, r3.x, r2.x +mad.f32 r2.z, c24.z, r3.x, r2.y +mad.f32 r2.x, c22.y, r3.y, r3.z +mad.f32 r2.y, c22.x, r3.y, r3.w +mad.f32 r2.x, c23.y, r4.x, r2.x +mad.f32 r3.y, c23.x, r4.z, r2.y +mad.f32 r2.y, c24.y, r3.x, r2.x +mad.f32 r2.x, c24.x, r4.w, r3.y end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=c,il=20,b=0) -; VERT: 163 instructions, 0 half, 9 full +; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=c,il=20,b=0) +; VERT: 146 instructions, 0 half, 7 full diff --git a/reference/stk-mines/stk-mines-19.asm b/reference/stk-mines/stk-mines-19.asm index 6763f4d..c381d21 100644 --- a/reference/stk-mines/stk-mines-19.asm +++ b/reference/stk-mines/stk-mines-19.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -19,131 +19,120 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mul.f r0.w, r2.x, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.y, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.z, r0.w -mov.f32f32 r1.y, r3.w -mad.f32 r0.w, c15.x, r2.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r1.x, c12.x +mul.f r3.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r1.y, r0.w +mad.f32 r3.x, c4.y, r0.y, r3.x +mad.f32 r0.w, c14.x, r1.z, r0.w +mad.f32 r3.x, c4.z, r0.z, r3.x +mad.f32 r3.y, c15.x, r1.w, r0.w +mul.f r0.w, r1.x, c12.y +mul.f r3.z, r1.x, c12.z +mul.f r3.w, r1.x, c0.w +mul.f r4.x, r3.y, r3.y +mad.f32 r0.w, c13.y, r1.y, r0.w +mul.f r4.y, r3.x, c10.x +mad.f32 r0.w, c14.y, r1.z, r0.w +mul.f r4.z, r0.x, c5.x +mad.f32 r4.w, c15.y, r1.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r4.z +mad.f32 r3.z, c13.z, r1.y, r3.z +mad.f32 r3.w, c1.w, r1.y, r3.w +mad.f32 r4.x, r4.w, r4.w, r4.x +mad.f32 r3.z, c14.z, r1.z, r3.z +mad.f32 r4.z, c5.z, r0.z, r0.w +mad.f32 r3.z, c15.z, r1.w, r3.z +mad.f32 r0.w, c2.w, r1.z, r3.w +mul.f r3.w, r1.x, c0.z +mul.f r5.x, r1.x, c0.y +mad.f32 r4.x, r3.z, r3.z, r4.x +mad.f32 r4.y, c10.y, r4.z, r4.y mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r3.w, r2.x, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r3.w, c13.y, r2.y, r3.w -mov.f32f32 r1.y, r1.y -mad.f32 r3.w, c14.y, r2.z, r3.w -mul.f r4.x, r1.x, c10.x -mad.f32 r3.w, c15.y, r2.w, r3.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c19.x +mad.f32 r0.w, c3.w, r1.w, r0.w +mad.f32 r3.w, c1.z, r1.y, r3.w +mad.f32 r5.x, c1.y, r1.y, r5.x +mul.f r1.x, r1.x, c0.x +rsq r4.x, (abs)r4.x +(ss)mov.f32f32 r5.y, r4.x +mul.f r3.z, r3.z, r4.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r3.w, r3.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r2.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r2.y, r1.z -mad.f32 r4.x, c10.y, r1.y, r4.x -mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r0.y, c2.z, r1.z, r3.w +mul.f r3.y, r3.y, r5.y +mul.f r3.w, r4.w, r5.y +(rpt1)nop +add.f r3.y, c10.x, (neg)r3.y +add.f r3.w, c10.y, (neg)r3.w +add.f r3.z, c10.z, (neg)r3.z mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r2.w, r1.z -mov.f32f32 r1.z, r4.x -mul.f r4.x, r2.x, c0.w -mul.f r4.y, r2.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r4.x, c1.w, r2.y, r4.x -mad.f32 r4.y, c1.z, r2.y, r4.y -mul.f r4.z, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -mul.f r4.w, c16.z, r3.z +(ss)mul.f r4.x, r3.y, r3.y +mad.f32 r0.z, c3.z, r1.w, r0.y +mad.f32 r0.y, r3.w, r3.w, r4.x +mad.f32 r4.x, c10.z, r0.x, r4.y +mad.f32 r0.y, r3.z, r3.z, r0.y +mad.f32 r4.y, c2.y, r1.z, r5.x +mad.f32 r1.x, c1.x, r1.y, r1.x +max.f r1.y, r2.w, c19.x +mul.f r2.w, c16.z, r2.z +mul.f r4.w, c16.y, r2.y +mul.f r5.x, c16.x, r2.x rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r5.x, r1.z, c19.x -cmps.f.lt r1.z, (neg)r1.z, c19.x -mad.f32 r4.x, c2.w, r2.z, r4.x -mul.f r0.w, r0.w, r0.y -mul.f r3.w, r3.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r5.x, r5.x -add.f r0.z, c10.x, (neg)r0.w -add.f r3.w, c10.y, (neg)r3.w -add.f r0.y, c10.z, (neg)r0.y -mad.f32 r0.w, c8.z, r3.z, c9.z -mul.f r5.y, r0.z, r0.z -mul.f r5.z, c16.y, r3.y -mad.f32 r5.y, r3.w, r3.w, r5.y -add.f r4.w, r4.w, r0.w -mad.f32 r0.w, c8.y, r3.y, c9.y -mul.f r5.w, c16.x, r3.x -mov.f32f32 r5.y, r5.y -mul.f r3.z, c17.z, r3.z -mad.f32 r5.y, r0.y, r0.y, r5.y -add.f r5.z, r5.z, r0.w -mad.f32 r6.x, c8.x, r3.x, c9.x -mad.f32 r0.w, c3.w, r2.w, r4.x -mad.f32 r4.x, c2.z, r2.z, r4.y -mad.f32 r4.y, c1.y, r2.y, r4.z -mad.f32 r2.x, c1.x, r2.y, r2.x -rsq r2.y, (abs)r5.y -(ss)mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r5.x, r3.z, r4.w -mul.f r3.y, c17.y, r3.y -add.f r4.z, r5.w, r6.x -mul.f r0.z, r0.z, r2.y -mul.f r3.w, r3.w, r2.y -mul.f r0.y, r0.y, r2.y -mad.f32 r2.y, r5.x, r3.y, r5.z -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.x, r3.x -mad.f32 r1.y, r1.y, r3.w, r0.z -mad.f32 r0.z, c3.z, r2.w, r4.x -mad.f32 r3.x, c2.y, r2.z, r4.y -mad.f32 r2.x, c2.x, r2.z, r2.x -mov.f32f32 r1.y, r1.y -mad.f32 r1.x, r5.x, r1.x, r4.z -mad.f32 r1.y, r0.x, r0.y, r1.y -mad.f32 r0.y, c3.y, r2.w, r3.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.x, c7.x -max.f r1.y, r1.y, c19.x -(rpt5)nop -log2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -min.f r2.x, r2.x, c19.z -(rpt2)nop -mul.f r1.y, r2.x, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +(ss)mov.f32f32 r5.y, r0.y +(ss)mul.f r0.y, r3.z, r0.y +max.f r3.z, r4.x, c19.x +mad.f32 r5.z, c8.x, r2.x, c9.x +mul.f r3.y, r3.y, r5.y +mul.f r3.w, r3.w, r5.y +mov.f32f32 r5.y, r3.z +mad.f32 r5.w, c8.y, r2.y, c9.y +mul.f r3.x, r3.x, r3.y +mad.f32 r3.y, c8.z, r2.z, c9.z +mad.f32 r3.x, r4.z, r3.w, r3.x +add.f r3.w, r4.w, r5.w +mad.f32 r0.x, r0.x, r0.y, r3.x +add.f r2.w, r2.w, r3.y +mul.f r0.y, c17.y, r2.y +add.f r2.y, r5.x, r5.z +max.f r0.x, r0.x, c19.x +mul.f r2.z, c17.z, r2.z +mad.f32 r3.x, r5.y, r0.y, r3.w +mul.f r2.x, c17.x, r2.x +cmps.f.lt r3.y, (neg)r4.x, c19.x +mad.f32 r0.y, c3.y, r1.w, r4.y +mad.f32 r1.x, c2.x, r1.z, r1.x +log2 r1.z, r0.x +mov.f32f32 r3.w, c7.x +mad.f32 r2.z, r5.y, r2.z, r2.w +mad.f32 r2.x, r3.z, r2.x, r2.y +(ss)mad.f32 r0.x, c3.x, r1.w, r1.x +min.f r1.x, r3.w, c19.z +min.f r1.w, r1.y, c19.y +(rpt1)nop +(ss)mul.f r1.x, r1.x, r1.z (rpt5)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r1.z, c19.x -(rpt2)nop -mov.f32f32 r1.y, r1.y +exp2 r1.x, r1.x +(ss)sel.b32 r1.x, r1.x, r3.y, c19.x (rpt2)nop +mov.f32f32 r1.y, r1.x +mad.f32 r1.x, c18.x, r1.x, r2.x +(rpt1)nop mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.z, c18.z, r1.y, r3.z -mad.f32 r2.x, c18.y, r1.y, r2.y -mad.f32 r1.x, c18.x, r1.y, r1.x -nop -max.f r1.y, r1.z, c19.x -max.f r2.x, r2.x, c19.x max.f r1.x, r1.x, c19.x -nop -min.f r1.z, r1.y, c19.y -min.f r1.y, r2.x, c19.y +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r2.z +mad.f32 r1.y, c18.y, r1.y, r3.x min.f r1.x, r1.x, c19.y +nop +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 145 instructions, 0 half, 7 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 121 instructions, 0 half, 6 full diff --git a/reference/stk-mines/stk-mines-20.asm b/reference/stk-mines/stk-mines-20.asm index 6846c40..10cfae3 100644 --- a/reference/stk-mines/stk-mines-20.asm +++ b/reference/stk-mines/stk-mines-20.asm @@ -7,10 +7,10 @@ @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r0.w) in8 -@in(r1.x) in9 -@in(r1.y) in10 -@in(r1.z) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @in(r2.y) in12 @in(r2.z) in13 @in(r2.w) in14 @@ -31,147 +31,133 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r1.w, r4.x, c12.x +@const(c24.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +@const(c25.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r4.x, c12.x mul.f r2.x, r0.x, c4.x -mad.f32 r1.w, c13.x, r4.y, r1.w +mad.f32 r0.w, c13.x, r4.y, r0.w mad.f32 r2.x, c4.y, r0.y, r2.x -mad.f32 r1.w, c14.x, r4.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r3.y, c15.x, r4.w, r1.w -mov.f32f32 r1.w, r2.x +mad.f32 r0.w, c14.x, r4.z, r0.w +mad.f32 r3.y, c4.z, r0.z, r2.x +mad.f32 r0.w, c15.x, r4.w, r0.w mul.f r2.x, r4.x, c12.z -mov.f32f32 r1.z, r1.z -mul.f r3.z, r3.y, r3.y -mul.f r3.w, r4.x, c12.y -mad.f32 r5.x, c4.z, r0.z, r1.w -mad.f32 r1.w, c13.y, r4.y, r3.w -max.f r1.z, r1.z, c24.x -mad.f32 r1.w, c14.y, r4.z, r1.w -mul.f r3.w, r5.x, c10.x -mad.f32 r5.y, c15.y, r4.w, r1.w -mul.f r5.z, r0.x, c5.x -min.f r1.w, r1.z, c24.y -mad.f32 r1.z, c13.z, r4.y, r2.x -mad.f32 r2.x, r5.y, r5.y, r3.z -mad.f32 r3.z, c5.y, r0.y, r5.z -mad.f32 r1.z, c14.z, r4.z, r1.z +mul.f r3.z, r2.y, c20.w +mul.f r3.w, r2.y, c20.z +mul.f r5.x, r0.w, r0.w +mul.f r5.y, r4.x, c12.y +mul.f r5.z, r3.y, c10.x +mad.f32 r5.y, c13.y, r4.y, r5.y +mul.f r5.w, r0.x, c5.x +mad.f32 r5.y, c14.y, r4.z, r5.y +mad.f32 r5.w, c5.y, r0.y, r5.w +mad.f32 r5.y, c15.y, r4.w, r5.y +mad.f32 r5.w, c5.z, r0.z, r5.w +mad.f32 r2.x, c13.z, r4.y, r2.x +mad.f32 r3.z, c21.w, r2.z, r3.z +mad.f32 r5.x, r5.y, r5.y, r5.x +mad.f32 r2.x, c14.z, r4.z, r2.x +mad.f32 r5.z, c10.y, r5.w, r5.z +mad.f32 r6.x, c15.z, r4.w, r2.x mul.f r0.x, r0.x, c6.x -mov.f32f32 r2.x, r2.x -mad.f32 r1.z, c15.z, r4.w, r1.z -mov.f32f32 r3.z, r3.z +mad.f32 r3.z, c22.w, r2.w, r3.z +mad.f32 r6.y, c21.z, r2.z, r3.w +mad.f32 r5.x, r6.x, r6.x, r5.x mad.f32 r0.x, c6.y, r0.y, r0.x -mul.f r0.y, r2.y, c20.w -mad.f32 r5.z, r1.z, r1.z, r2.x -mad.f32 r3.z, c5.z, r0.z, r3.z -absneg.f r2.x, (abs)r1.z -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c21.w, r2.z, r0.y -mul.f r5.w, r2.y, c20.z -mul.f r6.x, r2.y, c20.y -rsq r5.z, (abs)r5.z -(ss)mov.f32f32 r5.z, r5.z -mad.f32 r3.w, c10.y, r3.z, r3.w -mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.y, c22.w, r2.w, r0.y -mul.f r0.z, r3.y, r5.z -mul.f r3.y, r5.y, r5.z -mul.f r1.z, r1.z, r5.z -mov.f32f32 r3.w, r3.w -add.f r0.z, c10.x, (neg)r0.z -add.f r3.y, c10.y, (neg)r3.y -add.f r1.z, c10.z, (neg)r1.z -mad.f32 r5.y, c10.z, r0.x, r3.w -mul.f r5.z, r0.z, r0.z -mad.f32 r3.w, c23.w, r3.x, r0.y -mad.f32 r0.y, r3.y, r3.y, r5.z -max.f r5.z, r5.y, c25.x -cmps.f.lt r5.y, (neg)r5.y, c24.x -mad.f32 r5.w, c21.z, r2.z, r5.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.z, r5.z -mad.f32 r0.y, r1.z, r1.z, r0.y -mul.f r6.y, c16.z, r1.y -mul.f r6.z, c16.y, r1.x -mul.f r6.w, c16.x, r0.w -mad.f32 r5.w, c22.z, r2.w, r5.w -mad.f32 r6.x, c21.y, r2.z, r6.x +absneg.f r2.x, (abs)r6.x +mad.f32 r3.w, c23.w, r3.x, r3.z +mad.f32 r0.y, c22.z, r2.w, r6.y +mul.f r6.y, r2.y, c20.y mul.f r2.y, r2.y, c20.x -rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -mad.f32 r7.x, c8.z, r1.y, c9.z -mad.f32 r7.y, c8.y, r1.x, c9.y -mad.f32 r7.z, c8.x, r0.w, c9.x -mul.f r0.z, r0.z, r0.y -mul.f r3.y, r3.y, r0.y -mul.f r0.y, r1.z, r0.y -add.f r1.z, r6.y, r7.x -mul.f r0.z, r5.x, r0.z -mul.f r1.y, c17.z, r1.y -mad.f32 r0.z, r3.z, r3.y, r0.z -add.f r3.y, r6.z, r7.y -add.f r5.x, r6.w, r7.z -mad.f32 r3.z, c23.z, r3.x, r5.w -mov.f32f32 r0.z, r0.z -mad.f32 r1.y, r5.z, r1.y, r1.z -mad.f32 r0.x, r0.x, r0.y, r0.z -mul.f r0.y, c17.y, r1.x -mul.f r0.z, c17.x, r0.w -mad.f32 r0.w, c22.y, r2.w, r6.x +rsq r3.z, (abs)r5.x +(ss)mov.f32f32 r5.x, r3.z +mul.f r6.x, r6.x, r3.z +mad.f32 r0.x, c6.z, r0.z, r0.x +mad.f32 r3.z, c23.z, r3.x, r0.y +mul.f r0.y, r0.w, r5.x +mul.f r0.z, r5.y, r5.x +(rpt1)nop +add.f r0.y, c10.x, (neg)r0.y +add.f r0.z, c10.y, (neg)r0.z +add.f r0.w, c10.z, (neg)r6.x +mad.f32 r5.x, c10.z, r0.x, r5.z +mul.f r5.y, r0.y, r0.y +mad.f32 r5.z, c21.y, r2.z, r6.y +mad.f32 r5.y, r0.z, r0.z, r5.y +max.f r6.x, r5.x, c25.x +mad.f32 r5.y, r0.w, r0.w, r5.y +mul.f r6.y, c16.x, r1.x +cmps.f.lt r5.x, (neg)r5.x, c24.x +mad.f32 r5.z, c22.y, r2.w, r5.z +mad.f32 r2.y, c21.x, r2.z, r2.y +mul.f r2.z, r4.x, c0.w +mul.f r6.z, r4.x, c0.z +rsq r5.y, (abs)r5.y +(ss)mov.f32f32 r6.w, r5.y +mul.f r0.w, r0.w, r5.y +(ss)mov.f32f32 r5.y, r6.x +mul.f r7.x, c16.y, r1.y +mul.f r0.y, r0.y, r6.w +mul.f r0.z, r0.z, r6.w +mul.f r6.w, c16.z, r1.z +mad.f32 r7.y, c8.y, r1.y, c9.y +mul.f r0.y, r3.y, r0.y +mad.f32 r3.y, c8.z, r1.z, c9.z +mad.f32 r0.y, r5.w, r0.z, r0.y +add.f r0.z, r7.x, r7.y +mad.f32 r0.x, r0.x, r0.w, r0.y +add.f r0.y, r6.w, r3.y +mul.f r0.w, c17.y, r1.y +mad.f32 r1.y, c8.x, r1.x, c9.x max.f r0.x, r0.x, c25.x -mad.f32 r1.x, r5.z, r0.y, r3.y -mad.f32 r1.z, r5.z, r0.z, r5.x -mad.f32 r3.y, c23.y, r3.x, r0.w -mad.f32 r0.y, c21.x, r2.z, r2.y -mul.f r0.z, r4.x, c0.w -mul.f r0.w, r4.x, c0.z +mul.f r1.z, c17.z, r1.z +mad.f32 r5.w, r5.y, r0.w, r0.z +add.f r0.z, r6.y, r1.y +mad.f32 r3.y, c23.y, r3.x, r5.z +mad.f32 r0.w, c22.x, r2.w, r2.y +mad.f32 r1.y, c1.w, r4.y, r2.z log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x mov.f32f32 r2.y, c7.x -mad.f32 r0.y, c22.x, r2.w, r0.y -mad.f32 r0.z, c1.w, r4.y, r0.z -mad.f32 r0.w, c1.z, r4.y, r0.w -min.f r2.y, r2.y, c25.z -mad.f32 r3.x, c23.x, r3.x, r0.y -mad.f32 r0.y, c2.w, r4.z, r0.z -mad.f32 r0.z, c2.z, r4.z, r0.w -mul.f r0.x, r2.y, r0.x -mad.f32 r0.w, c3.w, r4.w, r0.y +mad.f32 r1.z, r5.y, r1.z, r0.y +mul.f r0.y, c17.x, r1.x +mad.f32 r3.x, c23.x, r3.x, r0.w +min.f r0.w, r2.y, c25.z +mad.f32 r1.x, c2.w, r4.z, r1.y +mad.f32 r1.y, c1.z, r4.y, r6.z +mul.f r2.y, r4.x, c0.y +(ss)mul.f r0.x, r0.w, r0.x +mad.f32 r0.y, r6.x, r0.y, r0.z +mad.f32 r0.w, c3.w, r4.w, r1.x +mad.f32 r0.z, c2.z, r4.z, r1.y +mad.f32 r1.x, c1.y, r4.y, r2.y +mul.f r1.y, r4.x, c0.x +max.f r1.w, r1.w, c24.x +exp2 r0.x, r0.x +(ss)sel.b32 r0.x, r0.x, r5.x, c25.x mad.f32 r0.z, c3.z, r4.w, r0.z -mul.f r0.y, r4.x, c0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.y, r4.y, r0.y -mul.f r2.y, r4.x, c0.x -mad.f32 r0.y, c2.y, r4.z, r0.y -mad.f32 r2.y, c1.x, r4.y, r2.y +mad.f32 r1.x, c2.y, r4.z, r1.x +mad.f32 r1.y, c1.x, r4.y, r1.y +mov.f32f32 r2.y, r0.x +mad.f32 r0.x, c18.x, r0.x, r0.y +mad.f32 r0.y, c3.y, r4.w, r1.x +mad.f32 r1.y, c2.x, r4.z, r1.y +mov.f32f32 r1.x, r2.y +max.f r0.x, r0.x, c24.x (rpt1)nop -exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c3.y, r4.w, r0.y -mad.f32 r2.y, c2.x, r4.z, r2.y -nop -sel.b32 r4.x, r0.x, r5.y, c25.x -mad.f32 r0.x, c3.x, r4.w, r2.y +mad.f32 r1.z, c18.z, r1.x, r1.z +mad.f32 r2.y, c18.y, r1.x, r5.w +min.f r1.x, r0.x, c24.y +mad.f32 r0.x, c3.x, r4.w, r1.y +max.f r1.y, r1.z, c24.x +max.f r2.y, r2.y, c24.x +(rpt1)nop +min.f r1.z, r1.y, c24.y +min.f r1.y, r2.y, c24.y +min.f r1.w, r1.w, c24.y mov.f32f32 r2.w, c24.y mov.f32f32 r2.z, c24.x -mov.f32f32 r4.x, r4.x mov.f32f32 r2.y, c24.x -(rpt1)nop -mov.f32f32 r4.x, r4.x -(rpt2)nop -mad.f32 r1.y, c18.z, r4.x, r1.y -mad.f32 r1.x, c18.y, r4.x, r1.x -mad.f32 r1.z, c18.x, r4.x, r1.z -nop -max.f r1.y, r1.y, c24.x -max.f r1.x, r1.x, c24.x -max.f r4.x, r1.z, c24.x -nop -min.f r1.z, r1.y, c24.y -min.f r1.y, r1.x, c24.y -min.f r1.x, r4.x, c24.y end -nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0) -; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r2.y (0:0,cm=f,il=20,b=0) -; VERT: 143 instructions, 0 half, 8 full +; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r2.y (0:0,cm=f,il=20,b=0) +; VERT: 127 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-21.asm b/reference/stk-mines/stk-mines-21.asm index 74062e3..d96960a 100644 --- a/reference/stk-mines/stk-mines-21.asm +++ b/reference/stk-mines/stk-mines-21.asm @@ -6,39 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r1.y, r2.z -mul.f r0.w, r2.y, r1.z -mul.f r0.z, r2.x, r0.z -mul.f r0.x, r1.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.y, r1.x +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 35 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/stk-mines/stk-mines-22.asm b/reference/stk-mines/stk-mines-22.asm index 1435d60..141701a 100644 --- a/reference/stk-mines/stk-mines-22.asm +++ b/reference/stk-mines/stk-mines-22.asm @@ -6,275 +6,191 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x absneg.f r0.w, (neg)c6.x -mov.f32f32 r1.x, c3.x -bary.f r1.y, 1, r0.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x -add.f r2.x, c7.x, r0.w -add.f r0.z, r0.z, r1.x -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, c9.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.x -add.f r1.x, r1.y, c3.y -mov.f32f32 r2.w, r1.z -add.f r1.z, r1.y, c4.y -rcp r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -bary.f r3.y, 6, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -add.f r0.w, r3.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r0.z -mov.f32f32 r2.z, r1.x -mul.f r0.z, r0.w, r1.w -mov.f32f32 r3.x, r1.z -add.f r0.w, r1.y, r2.x -cmps.f.lt r1.x, c6.x, r3.y -cmps.f.lt r1.y, c7.x, r3.y -bary.f r1.z, 11, r0.x -sam (f32)(xyz)r1.w, r2.y, s#0, t#0 -(sy)mad.f32 r2.x, c8.x, r2.x, c8.y -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(xyz)r2.z, r2.w, s#1, t#1 -(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y -mad.f32 r2.z, c8.x, r2.z, c8.y -mov.f32f32 r2.x, r2.x -mul.f r3.y, r0.z, c5.w -mul.f r3.w, r0.z, c5.z -mul.f r4.x, r0.z, c5.y -mul.f r2.x, c8.z, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, c8.x, r1.w, c8.y -mul.f r2.w, c8.z, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, r2.z, r2.z -mov.f32f32 r4.x, r4.x -mul.f r4.z, r1.w, r1.w -mov.f32f32 r2.w, r2.w -mad.f32 r4.z, r2.x, r2.x, r4.z -mul.f r4.w, r0.z, c5.x -add.f r0.z, c10.x, (neg)r0.z -mad.f32 r3.w, r2.w, r2.w, r3.w -mov.f32f32 r4.z, r4.z +bary.f r1.x, 6, r0.x +mov.f32f32 r1.y, c3.x +add.f r1.z, r0.z, c4.x +bary.f r2.x, 1, r0.x +add.f r2.y, r0.z, c3.x +add.f r2.w, c7.x, r0.w +cmps.f.lt r3.x, c6.x, r1.x +add.f r1.w, r2.x, c4.y +add.f r2.z, r2.x, c3.y +cmps.f.lt r3.y, c7.x, r1.x +add.f r3.z, r0.z, r1.y +cov.u32f32 r0.z, r3.x +rcp r1.y, r2.w +add.f r0.w, r1.x, r0.w +cov.u32f32 r1.x, r3.y +sam (f32)(xyz)r3.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y +sam (f32)(xyz)r2.y, r2.y, s#0, t#0 +(sy)mad.f32 r1.w, c8.x, r2.z, c8.y +(ss)mul.f r0.w, r0.w, r1.y +mad.f32 r1.y, c8.x, r3.w, c8.y +mul.f r1.z, c8.z, r1.z +mul.f r1.w, c8.z, r1.w +(ss)mov.f32f32 r2.z, r0.w +mov.f32f32 r3.x, r1.y +mov.f32f32 r3.y, r1.z +mov.f32f32 r4.x, r1.w mad.f32 r2.y, c8.x, r2.y, c8.y -mov.f32f32 r4.w, r4.w -mad.f32 r3.x, c8.x, r3.x, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r3.w mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r2.y, r2.y, r4.z -cov.u32f32 r1.x, r1.x -cov.u32f32 r1.y, r1.y -(rpt3)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.y, r3.y +mul.f r4.z, r2.z, c5.w +mul.f r4.w, r2.z, c5.z +mul.f r1.y, r1.y, r3.x +mov.f32f32 r5.x, r2.y +mad.f32 r1.y, r1.z, r3.y, r1.y +mad.f32 r1.z, c8.x, r4.y, c8.y +mul.f r4.y, r2.z, c5.y +mul.f r2.z, r2.z, c5.x +mul.f r2.y, r2.y, r5.x +mov.f32f32 r3.w, r1.z +mad.f32 r1.w, r1.w, r4.x, r2.y +mad.f32 r2.y, c8.x, r2.w, c8.y +add.f r0.w, c10.x, (neg)r0.w +mov.f32f32 r2.w, r3.w +cmps.f.ne r0.z, r0.z, c9.x cmps.f.ne r1.x, r1.x, c9.x -mul.f r1.w, r1.w, r4.z -mul.f r2.x, r2.x, r4.z -mul.f r2.y, r2.y, r4.z -mad.f32 r4.z, r3.x, r3.x, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r0.w -cmps.f.ne r0.w, r1.y, c9.x -mov.f32f32 r1.y, r1.z -rsq r1.z, r4.z -(ss)mov.f32f32 r1.z, r1.z -(ss)bary.f r4.z, 12, r0.x -bary.f r5.x, 13, r0.x +mov.f32f32 r3.w, c9.x +mad.f32 r1.y, r2.w, r2.w, r1.y +mov.f32f32 r2.w, r2.y mov.f32f32 r5.y, c9.x -mad.f32 r1.w, r2.z, r1.z, r1.w -mad.f32 r2.x, r2.w, r1.z, r2.x -mad.f32 r1.z, r3.x, r1.z, r2.y -sam (f32)(xyzw)r2.y, r3.z, s#2, t#2 -(ss)mov.f32f32 r3.z, r4.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r5.x -mul.f r1.w, r1.w, c8.w -mul.f r2.x, r2.x, c8.w -mul.f r1.z, r1.z, c8.w -nop -mov.f32f32 r1.w, r1.w -bary.f r4.z, 8, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.x, c9.x -mul.f r5.z, r1.w, r4.z -bary.f r5.w, 9, r0.x -mul.f r6.x, r4.z, r1.w -mov.f32f32 r6.y, c5.w -mov.f32f32 r6.z, c5.z -mad.f32 r5.z, r2.x, r5.w, r5.z -mad.f32 r6.x, r5.w, r2.x, r6.x -mov.f32f32 r6.w, c5.y +mov.f32f32 r5.z, c9.x +mov.f32f32 r5.w, c5.w +mov.f32f32 r6.x, c5.z +mov.f32f32 r6.y, c5.y +rsq r1.y, r1.y +(ss)mov.f32f32 r6.z, r1.y +mad.f32 r1.w, r2.w, r2.w, r1.w +add.f r3.w, r2.x, r3.w +bary.f r2.x, 11, r0.x +bary.f r2.w, 12, r0.x +bary.f r6.w, 13, r0.x +bary.f r7.x, 8, r0.x +bary.f r7.y, 9, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +(ss)mul.f r1.w, r2.y, r1.w +sam (f32)(xyzw)r7.w, r3.z, s#2, t#2 bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r5.z -mov.f32f32 r5.z, r6.x -mov.f32f32 r6.x, c5.x -mad.f32 r0.y, r1.z, r0.x, r0.y -mad.f32 r5.z, r0.x, r1.z, r5.z -(rpt1)nop -mul.f r1.w, r0.y, r1.w -max.f r5.z, r5.z, c9.x -mul.f r2.x, r0.y, r2.x -mul.f r0.y, r0.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r1.z, c8.x, r1.z -mad.f32 r1.w, c9.y, r1.w, c9.z -mul.f r2.x, c8.x, r2.x -mul.f r0.y, c8.x, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -add.f r1.z, r4.z, (neg)r1.z -(sy)mul.f r3.x, r3.x, r1.w -mul.f r2.w, r2.w, r1.w -mul.f r2.z, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r7.x, r2.z -mul.f r7.y, r1.z, r1.z -add.f r2.x, r5.w, (neg)r2.x -add.f r4.z, r4.z, r5.x -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.y, r1.w -mov.f32f32 r1.w, r2.x -add.f r2.x, r4.z, r5.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r4.z, r1.w, r1.w, r7.y +mov.f32f32 r0.y, c5.x +mul.f r2.y, r5.x, r7.z +(ss)mul.f r3.z, r4.x, r7.z +mad.f32 r2.y, r3.x, r6.z, r2.y +mad.f32 r3.x, r3.y, r6.z, r3.z +mad.f32 r1.y, r1.z, r1.y, r1.w +nop +mul.f r1.z, r2.y, c8.w +mul.f r1.w, r3.x, c8.w +mul.f r1.y, r1.y, c8.w +nop +mov.f32f32 r2.y, r1.z +mul.f r1.z, r7.x, r1.z +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.y, r1.y +mul.f r3.z, r2.y, r7.x +mad.f32 r1.z, r7.y, r1.w, r1.z +mad.f32 r1.w, r3.x, r7.y, r3.z +mad.f32 r1.y, r0.x, r1.y, r1.z +mad.f32 r1.z, r3.y, r0.x, r1.w (rpt2)nop -mov.f32f32 r4.z, r4.z -nop -mad.f32 r4.z, r0.x, r0.x, r4.z -(rpt5)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z +mul.f r1.w, r1.z, r2.y +max.f r1.y, r1.y, c9.x +mul.f r2.y, r1.z, r3.x +mul.f r1.z, r1.z, r3.y +mul.f r1.w, c8.x, r1.w +mad.f32 r1.y, c9.y, r1.y, c9.z +mul.f r2.y, c8.x, r2.y +mul.f r1.z, c8.x, r1.z +add.f r1.w, r7.x, (neg)r1.w +mov.f32f32 r3.x, r1.y +add.f r2.y, r7.y, (neg)r2.y +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r3.y, r8.z, r3.x +mov.f32f32 r3.z, r2.y +mov.f32f32 r3.w, r0.x +mul.f r1.w, r1.w, r1.z +add.f r4.x, r3.y, r5.z +mad.f32 r1.w, r2.y, r3.z, r1.w +mul.f r2.y, r8.y, r3.x +mad.f32 r1.w, r3.w, r3.w, r1.w +add.f r3.w, r4.x, r5.y +mul.f r3.x, r8.x, r3.x +mul.f r1.y, r7.w, r1.y (rpt2)nop -mul.f r1.z, r1.z, r4.z -mul.f r1.w, r1.w, r4.z -mul.f r0.x, r0.x, r4.z -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r4.x, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r4.x +(ss)mul.f r1.w, r3.z, r4.x +(rpt1)nop +mul.f r1.z, r1.z, r2.x nop -mul.f r1.y, r1.z, r1.y +mad.f32 r1.z, r1.w, r2.w, r1.z nop -mad.f32 r1.y, r1.w, r3.z, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -nop -mad.f32 r0.x, r0.x, r3.w, r1.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r6.w, r1.z (rpt2)nop max.f r0.x, r0.x, c9.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x mov.f32f32 r1.z, r0.x -cmps.f.lt r0.x, c9.x, r0.x -(rpt1)nop -mul.f r1.y, r1.y, r1.z -cov.u32f32 r0.x, r0.x +(rpt2)nop +mul.f r0.x, r0.x, r1.z +cmps.f.lt r1.z, c9.x, r1.z (rpt1)nop -mov.f32f32 r1.y, r1.y -cmps.f.ne r0.x, r0.x, c9.x +mov.f32f32 r1.w, r0.x +cov.u32f32 r1.z, r1.z (rpt1)nop -mul.f r1.y, r1.y, r1.y -sel.b32 r1.z, r2.x, r0.x, r3.x +mul.f r0.x, r0.x, r1.w +cmps.f.ne r1.z, r1.z, c9.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.z +mov.f32f32 r1.w, r0.x +sel.b32 r2.x, r3.w, r1.z, r3.y +mul.f r0.x, r0.x, c9.w +nop +mul.f r1.w, r1.w, r1.w +mad.f32 r2.w, r0.w, r2.x, r4.z +add.f r3.y, r2.y, r0.x +add.f r3.z, r3.x, r0.x +mul.f r1.w, r1.w, c9.z +add.f r0.x, r1.y, r0.x +sel.b32 r2.x, r2.w, r0.z, r2.x +nop +mov.f32f32 r2.w, r1.w +add.f r0.x, r0.x, r1.w +sel.b32 r1.w, r5.w, r1.x, r2.x +nop +add.f r2.x, r3.y, r2.w +add.f r2.w, r3.z, r2.w +sel.b32 r0.x, r0.x, r1.z, r1.y +nop +sel.b32 r1.y, r2.x, r1.z, r2.y +sel.b32 r1.z, r2.w, r1.z, r3.x +mad.f32 r2.x, r0.w, r0.x, r2.z +nop +mad.f32 r2.y, r0.w, r1.y, r4.w +mad.f32 r0.w, r0.w, r1.z, r4.y +sel.b32 r0.x, r2.x, r0.z, r0.x +nop +sel.b32 r1.y, r2.y, r0.z, r1.y +sel.b32 r0.z, r0.w, r0.z, r1.z (rpt1)nop -mul.f r2.x, r1.y, r1.y -mov.f32f32 r1.w, r1.w -mul.f r1.y, r1.y, c9.w -nop -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, r0.z, r1.w, r3.y -mov.f32f32 r3.x, r1.y -mov.f32f32 r3.y, r1.y -mul.f r2.x, r2.x, c9.z -sel.b32 r1.z, r1.w, r1.x, r1.z -add.f r1.w, r5.z, r3.x -add.f r3.x, r7.x, r3.y -mov.f32f32 r2.x, r2.x -sel.b32 r1.z, r6.y, r0.w, r1.z -mov.f32f32 r1.y, r1.y -nop -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, r2.x -add.f r1.y, r2.y, r1.y -add.f r2.y, r1.w, r3.y -add.f r3.x, r3.x, r3.z -mov.f32f32 r1.w, r1.z -nop -sel.b32 r1.z, r2.y, r0.x, r2.w -sel.b32 r2.y, r3.x, r0.x, r2.z -add.f r1.y, r1.y, r2.x -nop -mov.f32f32 r2.x, r1.z -mov.f32f32 r2.z, r2.y -sel.b32 r0.x, r1.y, r0.x, r0.y -nop -mov.f32f32 r0.y, r2.x -mov.f32f32 r1.y, r2.z -mov.f32f32 r2.x, r0.x -nop -mad.f32 r0.y, r0.z, r0.y, r4.y -mad.f32 r1.y, r0.z, r1.y, r4.x -mov.f32f32 r2.x, r2.x -nop -sel.b32 r0.y, r0.y, r1.x, r1.z -sel.b32 r1.y, r1.y, r1.x, r2.y -mad.f32 r0.z, r0.z, r2.x, r4.w +sel.b32 r1.z, r6.x, r1.x, r1.y +sel.b32 r1.y, r6.y, r1.x, r0.z +sel.b32 r1.x, r0.y, r1.x, r0.x +end nop -sel.b32 r0.y, r6.z, r0.w, r0.y -sel.b32 r1.y, r6.w, r0.w, r1.y -sel.b32 r0.x, r0.z, r1.x, r0.x nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r6.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x -end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) -; FRAG: 297 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) +; FRAG: 193 instructions, 0 half, 9 full diff --git a/reference/stk-mines/stk-mines-23.asm b/reference/stk-mines/stk-mines-23.asm index 2d84780..726bef5 100644 --- a/reference/stk-mines/stk-mines-23.asm +++ b/reference/stk-mines/stk-mines-23.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r4.z) in0 +@in(r4.w) in1 +@in(r5.x) in2 +@in(r5.y) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r5.x) in8 -@in(r5.y) in9 -@in(r5.z) in10 -@in(r5.w) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,191 +31,144 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.w, c1.x -mul.f r2.x, c8.x, r1.x +mul.f r2.x, c8.x, r4.z mov.f32f32 r2.y, c1.y mov.f32f32 r2.z, c1.z mul.f r2.w, r0.w, r0.x mov.f32f32 r3.x, c2.x -mad.f32 r2.x, c9.x, r1.y, r2.x +mad.f32 r2.x, c9.x, r4.w, r2.x mul.f r3.y, r0.w, r0.w -mad.f32 r2.x, c10.x, r1.z, r2.x +mad.f32 r2.x, c10.x, r5.x, r2.x mad.f32 r2.w, r3.x, r0.y, r2.w -mad.f32 r2.x, c11.x, r1.w, r2.x -mad.f32 r3.x, r2.y, r2.y, r3.y -mul.f r3.y, r2.y, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, c3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x +mov.f32f32 r3.x, c3.x +mad.f32 r2.x, c11.x, r5.y, r2.x +mad.f32 r3.y, r2.y, r2.y, r3.y +mul.f r3.z, r2.y, r0.x +mad.f32 r2.w, r3.x, r0.z, r2.w +mov.f32f32 r3.x, r2.x +mad.f32 r3.y, r2.z, r2.z, r3.y mov.f32f32 r3.w, c2.y -mad.f32 r2.w, r3.z, r0.z, r2.w -mul.f r3.z, r2.x, r2.x -mul.f r4.x, c8.y, r1.x -mad.f32 r3.x, r2.z, r2.z, r3.x -mov.f32f32 r4.y, r2.w -mad.f32 r2.w, c9.y, r1.y, r4.x -mad.f32 r3.y, r3.w, r0.y, r3.y -mul.f r0.x, r2.z, r0.x -mul.f r3.w, r4.y, r4.y -mad.f32 r2.w, c10.y, r1.z, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, c3.y -mad.f32 r2.w, c11.y, r1.w, r2.w -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r4.z, c2.z -mad.f32 r3.y, r4.x, r0.z, r3.y mov.f32f32 r4.x, r2.w -mul.f r0.w, r0.w, r3.x -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r2.w, r4.x, r4.x, r3.z +mul.f r3.x, r3.x, r3.x +mul.f r4.y, c8.y, r4.z +mad.f32 r3.z, r3.w, r0.y, r3.z +mul.f r2.w, r2.w, r4.x +mov.f32f32 r3.w, c3.y +mad.f32 r4.y, c9.y, r4.w, r4.y +rsq r3.y, r3.y +(ss)mov.f32f32 r5.z, r3.y +mad.f32 r4.y, c10.y, r5.x, r4.y +mad.f32 r3.z, r3.w, r0.z, r3.z +mad.f32 r3.w, c11.y, r5.y, r4.y +mul.f r0.w, r0.w, r5.z +mul.f r2.y, r2.y, r5.z +mov.f32f32 r4.y, r3.z +mov.f32f32 r5.z, r3.w +mul.f r5.w, c0.x, r0.w mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r3.y, r3.y, r3.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, c8.z, r1.x -mul.f r4.w, c0.x, r0.w -mov.f32f32 r3.z, r3.z -mad.f32 r0.x, r4.z, r0.y, r0.x -mad.f32 r0.y, c9.z, r1.y, r3.w -mad.f32 r3.w, c0.y, r2.y, r4.w -mad.f32 r0.y, c10.z, r1.z, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, c3.z -mad.f32 r0.y, c11.z, r1.w, r0.y -mov.f32f32 r3.w, r3.w -mul.f r2.z, r2.z, r3.x -mad.f32 r0.x, r4.z, r0.z, r0.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, c4.w, r1.x -mul.f r3.x, c4.z, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r2.w, r0.y, r0.y, r2.w +mad.f32 r2.w, r3.z, r4.y, r2.w +mul.f r0.x, r2.z, r0.x +mov.f32f32 r3.z, c2.z +mad.f32 r3.x, r3.w, r5.z, r3.x +mul.f r3.w, c8.z, r4.z +mad.f32 r5.w, c0.y, r2.y, r5.w +mad.f32 r0.x, r3.z, r0.y, r0.x +mov.f32f32 r0.y, c3.z +mad.f32 r3.z, c9.z, r4.w, r3.w +mul.f r2.z, r2.z, r3.y +(ss)mad.f32 r3.y, c10.z, r5.x, r3.z +mad.f32 r0.x, r0.y, r0.z, r0.x +mad.f32 r0.y, c11.z, r5.y, r3.y +mad.f32 r3.y, c0.z, r2.z, r5.w +mov.f32f32 r0.z, r2.y +mov.f32f32 r2.y, r0.x +mov.f32f32 r3.z, r0.y +mov.f32f32 r3.w, r3.y mov.f32f32 r2.z, r2.z -mad.f32 r0.z, c5.w, r1.y, r0.z -mad.f32 r3.z, r0.x, r0.x, r3.z -mad.f32 r0.z, c6.w, r1.z, r0.z -mad.f32 r3.x, c5.z, r1.y, r3.x -mul.f r4.z, c4.y, r1.x -mul.f r1.x, c4.x, r1.x -mad.f32 r0.z, c7.w, r1.w, r0.z -mad.f32 r3.x, c6.z, r1.z, r3.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -rsq r2.w, r2.w -(ss)mov.f32f32 r4.w, r2.w -mad.f32 r3.w, c0.z, r2.z, r3.w -(ss)mov.f32f32 r2.w, r0.z -mul.f r0.x, r0.x, r3.z -mul.f r2.x, r2.x, r4.w -mul.f r4.y, r4.y, r3.z -mul.f r3.y, r3.y, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r4.y -mov.f32f32 r3.y, r3.y -mul.f r4.y, r0.x, r2.y -absneg.f r2.x, (neg)r2.x -mul.f r6.x, c0.x, r3.z -mad.f32 r4.y, r3.y, r2.z, (neg)r4.y -mad.f32 r6.x, c0.y, r3.y, r6.x -mov.f32f32 r6.y, r2.x -mul.f r2.x, r3.z, r2.z -mov.f32f32 r2.z, r4.y -mov.f32f32 r4.y, r6.x -mul.f r6.x, r6.y, r6.y -mul.f r4.x, r4.x, r4.w -mul.f r2.z, c0.x, r2.z -mad.f32 r2.x, r0.x, r0.w, (neg)r2.x -mad.f32 r0.x, c0.z, r0.x, r4.y +mad.f32 r2.y, r2.y, r2.y, r2.w +mad.f32 r0.y, r0.y, r3.z, r3.x +mul.f r3.x, r3.w, r3.w +mul.f r2.w, c4.w, r4.z +mul.f r3.w, c4.z, r4.z +mul.f r5.w, c4.y, r4.z +mul.f r4.z, c4.x, r4.z +rsq r2.y, r2.y +(ss)mov.f32f32 r6.x, r2.y +rsq r0.y, r0.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r2.x, r0.y +mul.f r0.x, r0.x, r2.y +mul.f r2.x, r4.y, r6.x +mul.f r2.y, r5.z, r6.y +mul.f r4.x, r4.x, r6.x +absneg.f r4.y, (neg)r0.y +mov.f32f32 r0.y, r2.x +mov.f32f32 r5.z, r0.x +absneg.f r2.y, (neg)r2.y +mul.f r6.x, c0.x, r4.x mov.f32f32 r4.x, r4.x -mul.f r0.w, r3.y, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -absneg.f r3.y, (neg)r4.x -mad.f32 r0.w, r3.z, r2.y, (neg)r0.w -mad.f32 r2.x, c0.y, r2.x, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.x, r3.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.w -mul.f r0.y, r0.y, r4.w -mad.f32 r0.w, c0.z, r0.w, r2.x -mad.f32 r2.x, r4.x, r4.x, r6.x -mul.f r2.y, r3.y, r3.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c7.z, r1.w, r3.x -mad.f32 r3.z, c5.y, r1.y, r4.z -mov.f32f32 r3.w, r0.w -absneg.f r0.y, (neg)r0.y -mov.f32f32 r2.z, r3.x -mad.f32 r0.w, c6.y, r1.z, r3.z -mad.f32 r2.y, r3.w, r3.w, r2.y -mov.f32f32 r3.z, r0.y -mad.f32 r0.y, c7.y, r1.w, r0.w -mad.f32 r0.w, c5.x, r1.y, r1.x -mov.f32f32 r1.x, r2.y -mad.f32 r1.y, r3.z, r3.z, r2.x -mad.f32 r1.x, r0.x, r0.x, r1.x +mul.f r6.z, r5.z, r0.z +mov.f32f32 r6.w, r2.y +mad.f32 r6.z, r0.y, r2.z, (neg)r6.z +mov.f32f32 r7.x, r4.y +mad.f32 r2.x, c0.y, r2.x, r6.x +mul.f r2.z, r4.x, r2.z +mul.f r6.x, c0.x, r6.z +mad.f32 r2.z, r5.z, r0.w, (neg)r2.z +mul.f r5.z, r7.x, r7.x +mad.f32 r0.x, c0.z, r0.x, r2.x +mad.f32 r2.x, r2.y, r6.w, r5.z +mad.f32 r2.y, c0.y, r2.z, r6.x +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r3.z, r6.y +mad.f32 r0.y, r4.x, r0.z, (neg)r0.y +mov.f32f32 r0.z, r0.x +mad.f32 r2.z, c5.w, r4.w, r2.w +mad.f32 r2.w, c5.z, r4.w, r3.w +mad.f32 r0.y, c0.z, r0.y, r2.y +absneg.f r2.y, (neg)r0.w +mov.f32f32 r3.z, r0.z +mad.f32 r0.z, c6.w, r5.x, r2.z +mov.f32f32 r2.z, r0.y +mov.f32f32 r4.x, r2.y +mad.f32 r0.w, c7.w, r5.y, r0.z +mad.f32 r0.z, c6.z, r5.x, r2.w +mov.f32f32 r5.z, r2.z +mad.f32 r2.x, r2.y, r4.x, r2.x +mov.f32f32 r2.w, r0.w +mad.f32 r0.z, c7.z, r5.y, r0.z +mad.f32 r0.y, r0.y, r5.z, r3.x +mad.f32 r2.y, c5.y, r4.w, r5.w +mad.f32 r0.x, r0.x, r3.z, r0.y +mov.f32f32 r2.z, r0.z +mad.f32 r0.y, c6.y, r5.x, r2.y +mad.f32 r2.y, c5.x, r4.w, r4.z +mad.f32 r0.y, c7.y, r5.y, r0.y +mad.f32 r4.z, c6.x, r5.x, r2.y +nop +rsq r0.x, r0.x +(ss)mov.f32f32 r2.y, r0.x +mul.f r3.x, r3.y, r0.x +(ss)rsq r0.x, r2.x +(ss)mov.f32f32 r2.x, r0.x +mul.f r3.w, r4.y, r0.x +mul.f r3.z, r3.z, r2.y +mul.f r3.y, r5.z, r2.y +mul.f r4.y, r4.x, r2.x +mul.f r4.x, r6.w, r2.x mov.f32f32 r2.y, r0.y -mad.f32 r1.z, c6.x, r1.z, r0.w -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c7.x, r1.w, r1.z -rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.x, r1.z -mul.f r0.x, r0.x, r1.x -mul.f r3.x, r3.w, r1.x -mul.f r1.x, r3.y, r1.x -mul.f r3.y, r3.z, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r3.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r4.y, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x +mad.f32 r0.x, c7.x, r5.y, r4.z +mov.f32f32 r4.z, r4.y +mov.f32f32 r4.w, (0.000000) nop -mov.f32f32 r3.z, r0.x -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r1.x -mov.f32f32 r0.x, r4.y -mul.f r1.x, r4.x, r1.y -mul.f r1.y, r6.y, r1.y -mov.f32f32 r4.z, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r1.z -mov.f32f32 r4.w, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.y -mov.f32f32 r1.w, r5.w -mov.f32f32 r1.z, r5.z -mov.f32f32 r4.x, r1.x -mov.f32f32 r1.y, r5.y -mov.f32f32 r1.x, r5.x +mov.f32f32 r2.x, r0.x end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0) -; VERT: 183 instructions, 0 half, 7 full +; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) +; VERT: 133 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-24.asm b/reference/stk-mines/stk-mines-24.asm index 68f0c23..3d55b19 100644 --- a/reference/stk-mines/stk-mines-24.asm +++ b/reference/stk-mines/stk-mines-24.asm @@ -6,51 +6,36 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c4.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 8, r0.x -bary.f r1.x, 11, r0.x -bary.f r1.y, 9, r0.x +bary.f r1.x, 9, r0.x +bary.f r1.y, 11, r0.x mad.f32 r0.z, c1.x, r0.z, c1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y +bary.f r1.z, 3, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x max.f r0.z, r0.z, c4.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r2.x, r1.x -mov.f32f32 r1.w, r1.y -min.f r0.z, r0.z, c4.x -bary.f r0.w, 3, r0.x -bary.f r1.x, 2, r0.x -bary.f r1.y, 1, r0.x -add.f r2.y, c4.x, (neg)r0.z bary.f (ei)r0.x, 0, r0.x nop -sam.p (f32)(xyzw)r2.z, r1.z, s#0, t#0 -(sy)mul.f r0.y, r0.w, r3.y -mul.f r0.w, c2.z, r2.y -(ss)mul.f r1.z, c2.y, r2.y -mul.f r1.w, c2.x, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r1.x, r3.x, r1.x -mov.f32f32 r2.x, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, r1.x, r0.z, r0.w -mul.f r1.x, r2.w, r1.y +sam.p (f32)(xyzw)r2.z, r0.w, s#0, t#0 +(sy)mul.f r1.w, r1.z, r3.y +min.f r0.y, r0.z, c4.x +mul.f r0.z, r3.x, r2.x +(ss)mul.f r0.w, r2.w, r2.y mul.f r0.x, r2.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r0.w -mad.f32 r0.w, r1.x, r0.z, r2.x -mad.f32 r0.x, r0.x, r0.z, r1.w -mov.f32f32 r1.w, r0.y +add.f r1.x, c4.x, (neg)r0.y +(rpt2)nop +mul.f r1.y, c2.z, r1.x +mul.f r2.x, c2.y, r1.x +mad.f32 r1.z, r0.z, r0.y, r1.y +mad.f32 r1.y, r0.w, r0.y, r2.x +mul.f r0.z, c2.x, r1.x nop -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +mad.f32 r1.x, r0.x, r0.y, r0.z end nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.y (5:0,cm=f,il=16,b=1) -; FRAG: 42 instructions, 0 half, 4 full +; FRAG: inputs: r0.w (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.z (5:0,cm=f,il=16,b=1) +; FRAG: 29 instructions, 0 half, 4 full diff --git a/reference/stk-mines/stk-mines-25.asm b/reference/stk-mines/stk-mines-25.asm index ff486a7..d610a0c 100644 --- a/reference/stk-mines/stk-mines-25.asm +++ b/reference/stk-mines/stk-mines-25.asm @@ -6,39 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x -bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 1, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt4)nop -sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0 -(sy)mul.f r0.w, r0.w, r1.y -mul.f r0.z, r0.z, r1.z -mul.f r0.x, r0.y, r0.x -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt2)nop +sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)mul.f r1.z, r1.z, r2.x +mul.f r1.y, r1.y, r2.y +(ss)mul.f r1.x, r1.x, r0.x end -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 34 instructions, 0 half, 3 full +; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 14 instructions, 0 half, 3 full diff --git a/reference/stk-mines/stk-mines-26.asm b/reference/stk-mines/stk-mines-26.asm index aadaff7..5f20141 100644 --- a/reference/stk-mines/stk-mines-26.asm +++ b/reference/stk-mines/stk-mines-26.asm @@ -1,20 +1,20 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r3.x) in0 +@in(r3.y) in1 +@in(r3.z) in2 +@in(r3.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r0.w) in8 -@in(r1.x) in9 -@in(r1.y) in10 -@in(r1.z) in11 -@in(r3.x) in12 -@in(r3.y) in13 -@in(r3.z) in14 -@in(r3.w) in15 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,135 +27,120 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r1.w, r2.x, c13.x +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r3.x, c13.x mul.f r4.x, r0.x, c4.x -mad.f32 r1.w, c14.x, r2.y, r1.w +mad.f32 r0.w, c14.x, r3.y, r0.w mad.f32 r4.x, c4.y, r0.y, r4.x -mad.f32 r1.w, c15.x, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r4.y, c16.x, r2.w, r1.w -mov.f32f32 r1.w, r4.x -mov.f32f32 r4.x, c10.z +mad.f32 r0.w, c15.x, r3.z, r0.w +mad.f32 r4.x, c4.z, r0.z, r4.x +mad.f32 r0.w, c16.x, r3.w, r0.w +mov.f32f32 r4.y, c10.z mov.f32f32 r4.z, c10.y -mul.f r4.w, r4.y, r4.y -mul.f r5.x, r2.x, c13.y -mad.f32 r5.y, c4.z, r0.z, r1.w -mad.f32 r1.w, c14.y, r2.y, r5.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.w, c15.y, r2.z, r1.w -mul.f r5.x, r5.y, c11.x -mad.f32 r5.z, c16.y, r2.w, r1.w -mul.f r1.w, r0.x, c5.x -max.f r1.z, r1.z, c20.x -mad.f32 r4.x, c8.z, r4.x, c9.z -mad.f32 r4.w, r5.z, r5.z, r4.w -mad.f32 r5.w, c5.y, r0.y, r1.w -min.f r1.w, r1.z, c20.y -add.f r1.z, c17.z, r4.x -mov.f32f32 r4.x, r4.w -mul.f r4.w, r2.x, c13.z -mov.f32f32 r5.w, r5.w -mad.f32 r4.w, c14.z, r2.y, r4.w +mov.f32f32 r4.w, c10.x +mul.f r5.x, r0.w, r0.w +mul.f r5.y, r3.x, c13.y +mul.f r5.z, r4.x, c11.x +mad.f32 r5.y, c14.y, r3.y, r5.y +mul.f r5.w, r0.x, c5.x +mad.f32 r5.y, c15.y, r3.z, r5.y +mad.f32 r5.w, c5.y, r0.y, r5.w +mad.f32 r5.y, c16.y, r3.w, r5.y mad.f32 r5.w, c5.z, r0.z, r5.w -mad.f32 r4.w, c15.z, r2.z, r4.w +mad.f32 r4.y, c8.z, r4.y, c9.z mad.f32 r4.z, c8.y, r4.z, c9.y -mad.f32 r4.w, c16.z, r2.w, r4.w -mad.f32 r5.x, c11.y, r5.w, r5.x -mov.f32f32 r6.x, c10.x +mad.f32 r5.x, r5.y, r5.y, r5.x +mul.f r6.x, r3.x, c13.z +mad.f32 r5.z, c11.y, r5.w, r5.z +mad.f32 r6.x, c14.z, r3.y, r6.x mul.f r0.x, r0.x, c6.x -mad.f32 r4.x, r4.w, r4.w, r4.x -mov.f32f32 r5.x, r5.x +mad.f32 r6.x, c15.z, r3.z, r6.x mad.f32 r0.x, c6.y, r0.y, r0.x -add.f r0.y, c17.y, r4.z -mad.f32 r4.z, c8.x, r6.x, c9.x -mul.f r6.x, r2.x, c0.w -mul.f r6.y, r2.x, c0.z -rsq r4.x, (abs)r4.x -(ss)mov.f32f32 r4.x, r4.x -mov.f32f32 r0.x, r0.x -add.f r4.z, c17.x, r4.z +mad.f32 r0.y, c16.z, r3.w, r6.x mad.f32 r0.x, c6.z, r0.z, r0.x -mul.f r0.z, r4.y, r4.x -mul.f r4.y, r5.z, r4.x -mul.f r4.x, r4.w, r4.x -mad.f32 r4.w, c11.z, r0.x, r5.x -add.f r0.z, c11.x, (neg)r0.z -add.f r4.y, c11.y, (neg)r4.y -add.f r4.x, c11.z, (neg)r4.x -max.f r5.x, r4.w, c20.x -mul.f r5.z, r0.z, r0.z -cmps.f.lt r4.w, (neg)r4.w, c20.x -mad.f32 r5.z, r4.y, r4.y, r5.z -mov.f32f32 r5.x, r5.x -mul.f r1.x, c18.y, r1.x -mul.f r0.w, c18.x, r0.w -mov.f32f32 r5.z, r5.z -mul.f r1.y, c18.z, r1.y -mad.f32 r5.z, r4.x, r4.x, r5.z -mad.f32 r1.x, r5.x, r1.x, r0.y -mad.f32 r4.z, r5.x, r0.w, r4.z -mad.f32 r0.y, c1.w, r2.y, r6.x -mad.f32 r0.w, c1.z, r2.y, r6.y -mul.f r6.x, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -rsq r5.z, (abs)r5.z -(ss)mov.f32f32 r5.z, r5.z -mad.f32 r1.y, r5.x, r1.y, r1.z -mad.f32 r0.y, c2.w, r2.z, r0.y -mad.f32 r1.z, c2.z, r2.z, r0.w -mul.f r0.z, r0.z, r5.z -mul.f r4.y, r4.y, r5.z -mul.f r4.x, r4.x, r5.z -mad.f32 r0.w, c3.w, r2.w, r0.y -mul.f r0.y, r5.y, r0.z -mad.f32 r0.z, c3.z, r2.w, r1.z -mad.f32 r0.y, r5.w, r4.y, r0.y -mad.f32 r1.z, c1.y, r2.y, r6.x -mad.f32 r2.x, c1.x, r2.y, r2.x -mov.f32f32 r2.y, c7.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c2.y, r2.z, r1.z -mad.f32 r0.x, r0.x, r4.x, r0.y -mad.f32 r0.y, c3.y, r2.w, r1.z -mad.f32 r1.z, c2.x, r2.z, r2.x -min.f r4.x, r2.y, c20.z -max.f r4.y, r0.x, c20.x -mad.f32 r0.x, c3.x, r2.w, r1.z -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x -nop -log2 r1.z, r4.y -(ss)mov.f32f32 r1.z, r1.z -(rpt2)nop -mul.f r1.z, r4.x, r1.z +add.f r0.z, c17.z, r4.y +add.f r4.y, c17.y, r4.z +mad.f32 r4.z, r0.y, r0.y, r5.x +mad.f32 r5.x, c11.z, r0.x, r5.z +mad.f32 r4.w, c8.x, r4.w, c9.x +mul.f r5.z, r3.x, c0.w +mul.f r6.x, r3.x, c0.z +mul.f r6.y, r3.x, c0.y +mul.f r3.x, r3.x, c0.x +rsq r4.z, (abs)r4.z +(ss)mov.f32f32 r6.z, r4.z +mul.f r0.y, r0.y, r4.z +(ss)max.f r4.z, r5.x, c20.x +add.f r4.w, c17.x, r4.w +mul.f r0.w, r0.w, r6.z +mul.f r5.y, r5.y, r6.z +(rpt1)nop +add.f r0.w, c11.x, (neg)r0.w +add.f r5.y, c11.y, (neg)r5.y +add.f r0.y, c11.z, (neg)r0.y +mov.f32f32 r6.z, r4.z +mul.f r6.w, r0.w, r0.w +mul.f r1.z, c18.z, r1.z +mad.f32 r6.w, r5.y, r5.y, r6.w +mul.f r1.y, c18.y, r1.y +mad.f32 r6.w, r0.y, r0.y, r6.w +mad.f32 r1.z, r6.z, r1.z, r0.z +mul.f r0.z, c18.x, r1.x +cmps.f.lt r1.x, (neg)r5.x, c20.x +mad.f32 r5.x, c1.w, r3.y, r5.z +mad.f32 r5.z, c1.z, r3.y, r6.x +mad.f32 r6.x, c1.y, r3.y, r6.y +rsq r6.y, (abs)r6.w +(ss)mov.f32f32 r6.w, r6.y +mul.f r0.y, r0.y, r6.y +mad.f32 r1.y, r6.z, r1.y, r4.y +mad.f32 r4.y, r4.z, r0.z, r4.w +mul.f r0.z, r0.w, r6.w +mul.f r4.z, r5.y, r6.w +mad.f32 r0.w, c2.w, r3.z, r5.x +mad.f32 r4.w, c2.z, r3.z, r5.z +mul.f r0.z, r4.x, r0.z +mad.f32 r0.w, c3.w, r3.w, r0.w +mad.f32 r4.x, r5.w, r4.z, r0.z +mad.f32 r0.z, c3.z, r3.w, r4.w +mad.f32 r0.x, r0.x, r0.y, r4.x +mad.f32 r0.y, c2.y, r3.z, r6.x +mad.f32 r3.x, c1.x, r3.y, r3.x +max.f r1.w, r1.w, c20.x +max.f r0.x, r0.x, c20.x +mad.f32 r0.y, c3.y, r3.w, r0.y +mad.f32 r3.x, c2.x, r3.z, r3.x +min.f r1.w, r1.w, c20.y +mov.f32f32 r3.y, c7.x +(rpt1)nop +log2 r3.z, r0.x +(ss)mad.f32 r0.x, c3.x, r3.w, r3.x +min.f r3.x, r3.y, c20.z (rpt2)nop -mov.f32f32 r1.z, r1.z +(ss)mul.f r3.x, r3.x, r3.z (rpt5)nop -exp2 r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -(rpt2)nop -sel.b32 r1.z, r1.z, r4.w, c20.x -(rpt2)nop -mov.f32f32 r1.z, r1.z -(rpt2)nop -mov.f32f32 r1.z, r1.z +exp2 r3.x, r3.x +(ss)sel.b32 r1.x, r3.x, r1.x, c20.x (rpt2)nop -mad.f32 r1.y, c19.z, r1.z, r1.y -mad.f32 r1.x, c19.y, r1.z, r1.x -mad.f32 r1.z, c19.x, r1.z, r4.z -nop -max.f r1.y, r1.y, c20.x +(ss)mov.f32f32 r3.x, r1.x +mad.f32 r1.x, c19.x, r1.x, r4.y +(rpt1)nop +mov.f32f32 r3.x, r3.x max.f r1.x, r1.x, c20.x -max.f r3.x, r1.z, c20.x +(rpt1)nop +mad.f32 r1.z, c19.z, r3.x, r1.z +mad.f32 r1.y, c19.y, r3.x, r1.y +min.f r1.x, r1.x, c20.y nop -min.f r1.z, r1.y, c20.y -min.f r1.y, r1.x, c20.y -min.f r1.x, r3.x, c20.y +max.f r1.z, r1.z, c20.x +max.f r1.y, r1.y, c20.x +(rpt1)nop +min.f r1.z, r1.z, c20.y +min.f r1.y, r1.y, c20.y end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) -; VERT: 144 instructions, 0 half, 7 full +; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) +; VERT: 124 instructions, 0 half, 7 full diff --git a/reference/stk-mines/stk-mines-27.asm b/reference/stk-mines/stk-mines-27.asm index 1435d60..141701a 100644 --- a/reference/stk-mines/stk-mines-27.asm +++ b/reference/stk-mines/stk-mines-27.asm @@ -6,275 +6,191 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x absneg.f r0.w, (neg)c6.x -mov.f32f32 r1.x, c3.x -bary.f r1.y, 1, r0.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x -add.f r2.x, c7.x, r0.w -add.f r0.z, r0.z, r1.x -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, c9.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.x -add.f r1.x, r1.y, c3.y -mov.f32f32 r2.w, r1.z -add.f r1.z, r1.y, c4.y -rcp r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -bary.f r3.y, 6, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -add.f r0.w, r3.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r0.z -mov.f32f32 r2.z, r1.x -mul.f r0.z, r0.w, r1.w -mov.f32f32 r3.x, r1.z -add.f r0.w, r1.y, r2.x -cmps.f.lt r1.x, c6.x, r3.y -cmps.f.lt r1.y, c7.x, r3.y -bary.f r1.z, 11, r0.x -sam (f32)(xyz)r1.w, r2.y, s#0, t#0 -(sy)mad.f32 r2.x, c8.x, r2.x, c8.y -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(xyz)r2.z, r2.w, s#1, t#1 -(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y -mad.f32 r2.z, c8.x, r2.z, c8.y -mov.f32f32 r2.x, r2.x -mul.f r3.y, r0.z, c5.w -mul.f r3.w, r0.z, c5.z -mul.f r4.x, r0.z, c5.y -mul.f r2.x, c8.z, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, c8.x, r1.w, c8.y -mul.f r2.w, c8.z, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, r2.z, r2.z -mov.f32f32 r4.x, r4.x -mul.f r4.z, r1.w, r1.w -mov.f32f32 r2.w, r2.w -mad.f32 r4.z, r2.x, r2.x, r4.z -mul.f r4.w, r0.z, c5.x -add.f r0.z, c10.x, (neg)r0.z -mad.f32 r3.w, r2.w, r2.w, r3.w -mov.f32f32 r4.z, r4.z +bary.f r1.x, 6, r0.x +mov.f32f32 r1.y, c3.x +add.f r1.z, r0.z, c4.x +bary.f r2.x, 1, r0.x +add.f r2.y, r0.z, c3.x +add.f r2.w, c7.x, r0.w +cmps.f.lt r3.x, c6.x, r1.x +add.f r1.w, r2.x, c4.y +add.f r2.z, r2.x, c3.y +cmps.f.lt r3.y, c7.x, r1.x +add.f r3.z, r0.z, r1.y +cov.u32f32 r0.z, r3.x +rcp r1.y, r2.w +add.f r0.w, r1.x, r0.w +cov.u32f32 r1.x, r3.y +sam (f32)(xyz)r3.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y +sam (f32)(xyz)r2.y, r2.y, s#0, t#0 +(sy)mad.f32 r1.w, c8.x, r2.z, c8.y +(ss)mul.f r0.w, r0.w, r1.y +mad.f32 r1.y, c8.x, r3.w, c8.y +mul.f r1.z, c8.z, r1.z +mul.f r1.w, c8.z, r1.w +(ss)mov.f32f32 r2.z, r0.w +mov.f32f32 r3.x, r1.y +mov.f32f32 r3.y, r1.z +mov.f32f32 r4.x, r1.w mad.f32 r2.y, c8.x, r2.y, c8.y -mov.f32f32 r4.w, r4.w -mad.f32 r3.x, c8.x, r3.x, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r3.w mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r2.y, r2.y, r4.z -cov.u32f32 r1.x, r1.x -cov.u32f32 r1.y, r1.y -(rpt3)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.y, r3.y +mul.f r4.z, r2.z, c5.w +mul.f r4.w, r2.z, c5.z +mul.f r1.y, r1.y, r3.x +mov.f32f32 r5.x, r2.y +mad.f32 r1.y, r1.z, r3.y, r1.y +mad.f32 r1.z, c8.x, r4.y, c8.y +mul.f r4.y, r2.z, c5.y +mul.f r2.z, r2.z, c5.x +mul.f r2.y, r2.y, r5.x +mov.f32f32 r3.w, r1.z +mad.f32 r1.w, r1.w, r4.x, r2.y +mad.f32 r2.y, c8.x, r2.w, c8.y +add.f r0.w, c10.x, (neg)r0.w +mov.f32f32 r2.w, r3.w +cmps.f.ne r0.z, r0.z, c9.x cmps.f.ne r1.x, r1.x, c9.x -mul.f r1.w, r1.w, r4.z -mul.f r2.x, r2.x, r4.z -mul.f r2.y, r2.y, r4.z -mad.f32 r4.z, r3.x, r3.x, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r0.w -cmps.f.ne r0.w, r1.y, c9.x -mov.f32f32 r1.y, r1.z -rsq r1.z, r4.z -(ss)mov.f32f32 r1.z, r1.z -(ss)bary.f r4.z, 12, r0.x -bary.f r5.x, 13, r0.x +mov.f32f32 r3.w, c9.x +mad.f32 r1.y, r2.w, r2.w, r1.y +mov.f32f32 r2.w, r2.y mov.f32f32 r5.y, c9.x -mad.f32 r1.w, r2.z, r1.z, r1.w -mad.f32 r2.x, r2.w, r1.z, r2.x -mad.f32 r1.z, r3.x, r1.z, r2.y -sam (f32)(xyzw)r2.y, r3.z, s#2, t#2 -(ss)mov.f32f32 r3.z, r4.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r5.x -mul.f r1.w, r1.w, c8.w -mul.f r2.x, r2.x, c8.w -mul.f r1.z, r1.z, c8.w -nop -mov.f32f32 r1.w, r1.w -bary.f r4.z, 8, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.x, c9.x -mul.f r5.z, r1.w, r4.z -bary.f r5.w, 9, r0.x -mul.f r6.x, r4.z, r1.w -mov.f32f32 r6.y, c5.w -mov.f32f32 r6.z, c5.z -mad.f32 r5.z, r2.x, r5.w, r5.z -mad.f32 r6.x, r5.w, r2.x, r6.x -mov.f32f32 r6.w, c5.y +mov.f32f32 r5.z, c9.x +mov.f32f32 r5.w, c5.w +mov.f32f32 r6.x, c5.z +mov.f32f32 r6.y, c5.y +rsq r1.y, r1.y +(ss)mov.f32f32 r6.z, r1.y +mad.f32 r1.w, r2.w, r2.w, r1.w +add.f r3.w, r2.x, r3.w +bary.f r2.x, 11, r0.x +bary.f r2.w, 12, r0.x +bary.f r6.w, 13, r0.x +bary.f r7.x, 8, r0.x +bary.f r7.y, 9, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +(ss)mul.f r1.w, r2.y, r1.w +sam (f32)(xyzw)r7.w, r3.z, s#2, t#2 bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r5.z -mov.f32f32 r5.z, r6.x -mov.f32f32 r6.x, c5.x -mad.f32 r0.y, r1.z, r0.x, r0.y -mad.f32 r5.z, r0.x, r1.z, r5.z -(rpt1)nop -mul.f r1.w, r0.y, r1.w -max.f r5.z, r5.z, c9.x -mul.f r2.x, r0.y, r2.x -mul.f r0.y, r0.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r1.z, c8.x, r1.z -mad.f32 r1.w, c9.y, r1.w, c9.z -mul.f r2.x, c8.x, r2.x -mul.f r0.y, c8.x, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -add.f r1.z, r4.z, (neg)r1.z -(sy)mul.f r3.x, r3.x, r1.w -mul.f r2.w, r2.w, r1.w -mul.f r2.z, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r7.x, r2.z -mul.f r7.y, r1.z, r1.z -add.f r2.x, r5.w, (neg)r2.x -add.f r4.z, r4.z, r5.x -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.y, r1.w -mov.f32f32 r1.w, r2.x -add.f r2.x, r4.z, r5.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r4.z, r1.w, r1.w, r7.y +mov.f32f32 r0.y, c5.x +mul.f r2.y, r5.x, r7.z +(ss)mul.f r3.z, r4.x, r7.z +mad.f32 r2.y, r3.x, r6.z, r2.y +mad.f32 r3.x, r3.y, r6.z, r3.z +mad.f32 r1.y, r1.z, r1.y, r1.w +nop +mul.f r1.z, r2.y, c8.w +mul.f r1.w, r3.x, c8.w +mul.f r1.y, r1.y, c8.w +nop +mov.f32f32 r2.y, r1.z +mul.f r1.z, r7.x, r1.z +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.y, r1.y +mul.f r3.z, r2.y, r7.x +mad.f32 r1.z, r7.y, r1.w, r1.z +mad.f32 r1.w, r3.x, r7.y, r3.z +mad.f32 r1.y, r0.x, r1.y, r1.z +mad.f32 r1.z, r3.y, r0.x, r1.w (rpt2)nop -mov.f32f32 r4.z, r4.z -nop -mad.f32 r4.z, r0.x, r0.x, r4.z -(rpt5)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z +mul.f r1.w, r1.z, r2.y +max.f r1.y, r1.y, c9.x +mul.f r2.y, r1.z, r3.x +mul.f r1.z, r1.z, r3.y +mul.f r1.w, c8.x, r1.w +mad.f32 r1.y, c9.y, r1.y, c9.z +mul.f r2.y, c8.x, r2.y +mul.f r1.z, c8.x, r1.z +add.f r1.w, r7.x, (neg)r1.w +mov.f32f32 r3.x, r1.y +add.f r2.y, r7.y, (neg)r2.y +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r3.y, r8.z, r3.x +mov.f32f32 r3.z, r2.y +mov.f32f32 r3.w, r0.x +mul.f r1.w, r1.w, r1.z +add.f r4.x, r3.y, r5.z +mad.f32 r1.w, r2.y, r3.z, r1.w +mul.f r2.y, r8.y, r3.x +mad.f32 r1.w, r3.w, r3.w, r1.w +add.f r3.w, r4.x, r5.y +mul.f r3.x, r8.x, r3.x +mul.f r1.y, r7.w, r1.y (rpt2)nop -mul.f r1.z, r1.z, r4.z -mul.f r1.w, r1.w, r4.z -mul.f r0.x, r0.x, r4.z -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r4.x, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r4.x +(ss)mul.f r1.w, r3.z, r4.x +(rpt1)nop +mul.f r1.z, r1.z, r2.x nop -mul.f r1.y, r1.z, r1.y +mad.f32 r1.z, r1.w, r2.w, r1.z nop -mad.f32 r1.y, r1.w, r3.z, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -nop -mad.f32 r0.x, r0.x, r3.w, r1.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r6.w, r1.z (rpt2)nop max.f r0.x, r0.x, c9.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x mov.f32f32 r1.z, r0.x -cmps.f.lt r0.x, c9.x, r0.x -(rpt1)nop -mul.f r1.y, r1.y, r1.z -cov.u32f32 r0.x, r0.x +(rpt2)nop +mul.f r0.x, r0.x, r1.z +cmps.f.lt r1.z, c9.x, r1.z (rpt1)nop -mov.f32f32 r1.y, r1.y -cmps.f.ne r0.x, r0.x, c9.x +mov.f32f32 r1.w, r0.x +cov.u32f32 r1.z, r1.z (rpt1)nop -mul.f r1.y, r1.y, r1.y -sel.b32 r1.z, r2.x, r0.x, r3.x +mul.f r0.x, r0.x, r1.w +cmps.f.ne r1.z, r1.z, c9.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.z +mov.f32f32 r1.w, r0.x +sel.b32 r2.x, r3.w, r1.z, r3.y +mul.f r0.x, r0.x, c9.w +nop +mul.f r1.w, r1.w, r1.w +mad.f32 r2.w, r0.w, r2.x, r4.z +add.f r3.y, r2.y, r0.x +add.f r3.z, r3.x, r0.x +mul.f r1.w, r1.w, c9.z +add.f r0.x, r1.y, r0.x +sel.b32 r2.x, r2.w, r0.z, r2.x +nop +mov.f32f32 r2.w, r1.w +add.f r0.x, r0.x, r1.w +sel.b32 r1.w, r5.w, r1.x, r2.x +nop +add.f r2.x, r3.y, r2.w +add.f r2.w, r3.z, r2.w +sel.b32 r0.x, r0.x, r1.z, r1.y +nop +sel.b32 r1.y, r2.x, r1.z, r2.y +sel.b32 r1.z, r2.w, r1.z, r3.x +mad.f32 r2.x, r0.w, r0.x, r2.z +nop +mad.f32 r2.y, r0.w, r1.y, r4.w +mad.f32 r0.w, r0.w, r1.z, r4.y +sel.b32 r0.x, r2.x, r0.z, r0.x +nop +sel.b32 r1.y, r2.y, r0.z, r1.y +sel.b32 r0.z, r0.w, r0.z, r1.z (rpt1)nop -mul.f r2.x, r1.y, r1.y -mov.f32f32 r1.w, r1.w -mul.f r1.y, r1.y, c9.w -nop -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, r0.z, r1.w, r3.y -mov.f32f32 r3.x, r1.y -mov.f32f32 r3.y, r1.y -mul.f r2.x, r2.x, c9.z -sel.b32 r1.z, r1.w, r1.x, r1.z -add.f r1.w, r5.z, r3.x -add.f r3.x, r7.x, r3.y -mov.f32f32 r2.x, r2.x -sel.b32 r1.z, r6.y, r0.w, r1.z -mov.f32f32 r1.y, r1.y -nop -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, r2.x -add.f r1.y, r2.y, r1.y -add.f r2.y, r1.w, r3.y -add.f r3.x, r3.x, r3.z -mov.f32f32 r1.w, r1.z -nop -sel.b32 r1.z, r2.y, r0.x, r2.w -sel.b32 r2.y, r3.x, r0.x, r2.z -add.f r1.y, r1.y, r2.x -nop -mov.f32f32 r2.x, r1.z -mov.f32f32 r2.z, r2.y -sel.b32 r0.x, r1.y, r0.x, r0.y -nop -mov.f32f32 r0.y, r2.x -mov.f32f32 r1.y, r2.z -mov.f32f32 r2.x, r0.x -nop -mad.f32 r0.y, r0.z, r0.y, r4.y -mad.f32 r1.y, r0.z, r1.y, r4.x -mov.f32f32 r2.x, r2.x -nop -sel.b32 r0.y, r0.y, r1.x, r1.z -sel.b32 r1.y, r1.y, r1.x, r2.y -mad.f32 r0.z, r0.z, r2.x, r4.w +sel.b32 r1.z, r6.x, r1.x, r1.y +sel.b32 r1.y, r6.y, r1.x, r0.z +sel.b32 r1.x, r0.y, r1.x, r0.x +end nop -sel.b32 r0.y, r6.z, r0.w, r0.y -sel.b32 r1.y, r6.w, r0.w, r1.y -sel.b32 r0.x, r0.z, r1.x, r0.x nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r6.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x -end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) -; FRAG: 297 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) +; FRAG: 193 instructions, 0 half, 9 full diff --git a/reference/stk-mines/stk-mines-28.asm b/reference/stk-mines/stk-mines-28.asm index 2d84780..726bef5 100644 --- a/reference/stk-mines/stk-mines-28.asm +++ b/reference/stk-mines/stk-mines-28.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r4.z) in0 +@in(r4.w) in1 +@in(r5.x) in2 +@in(r5.y) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r5.x) in8 -@in(r5.y) in9 -@in(r5.z) in10 -@in(r5.w) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,191 +31,144 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.w, c1.x -mul.f r2.x, c8.x, r1.x +mul.f r2.x, c8.x, r4.z mov.f32f32 r2.y, c1.y mov.f32f32 r2.z, c1.z mul.f r2.w, r0.w, r0.x mov.f32f32 r3.x, c2.x -mad.f32 r2.x, c9.x, r1.y, r2.x +mad.f32 r2.x, c9.x, r4.w, r2.x mul.f r3.y, r0.w, r0.w -mad.f32 r2.x, c10.x, r1.z, r2.x +mad.f32 r2.x, c10.x, r5.x, r2.x mad.f32 r2.w, r3.x, r0.y, r2.w -mad.f32 r2.x, c11.x, r1.w, r2.x -mad.f32 r3.x, r2.y, r2.y, r3.y -mul.f r3.y, r2.y, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, c3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x +mov.f32f32 r3.x, c3.x +mad.f32 r2.x, c11.x, r5.y, r2.x +mad.f32 r3.y, r2.y, r2.y, r3.y +mul.f r3.z, r2.y, r0.x +mad.f32 r2.w, r3.x, r0.z, r2.w +mov.f32f32 r3.x, r2.x +mad.f32 r3.y, r2.z, r2.z, r3.y mov.f32f32 r3.w, c2.y -mad.f32 r2.w, r3.z, r0.z, r2.w -mul.f r3.z, r2.x, r2.x -mul.f r4.x, c8.y, r1.x -mad.f32 r3.x, r2.z, r2.z, r3.x -mov.f32f32 r4.y, r2.w -mad.f32 r2.w, c9.y, r1.y, r4.x -mad.f32 r3.y, r3.w, r0.y, r3.y -mul.f r0.x, r2.z, r0.x -mul.f r3.w, r4.y, r4.y -mad.f32 r2.w, c10.y, r1.z, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, c3.y -mad.f32 r2.w, c11.y, r1.w, r2.w -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r4.z, c2.z -mad.f32 r3.y, r4.x, r0.z, r3.y mov.f32f32 r4.x, r2.w -mul.f r0.w, r0.w, r3.x -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r2.w, r4.x, r4.x, r3.z +mul.f r3.x, r3.x, r3.x +mul.f r4.y, c8.y, r4.z +mad.f32 r3.z, r3.w, r0.y, r3.z +mul.f r2.w, r2.w, r4.x +mov.f32f32 r3.w, c3.y +mad.f32 r4.y, c9.y, r4.w, r4.y +rsq r3.y, r3.y +(ss)mov.f32f32 r5.z, r3.y +mad.f32 r4.y, c10.y, r5.x, r4.y +mad.f32 r3.z, r3.w, r0.z, r3.z +mad.f32 r3.w, c11.y, r5.y, r4.y +mul.f r0.w, r0.w, r5.z +mul.f r2.y, r2.y, r5.z +mov.f32f32 r4.y, r3.z +mov.f32f32 r5.z, r3.w +mul.f r5.w, c0.x, r0.w mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r3.y, r3.y, r3.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, c8.z, r1.x -mul.f r4.w, c0.x, r0.w -mov.f32f32 r3.z, r3.z -mad.f32 r0.x, r4.z, r0.y, r0.x -mad.f32 r0.y, c9.z, r1.y, r3.w -mad.f32 r3.w, c0.y, r2.y, r4.w -mad.f32 r0.y, c10.z, r1.z, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, c3.z -mad.f32 r0.y, c11.z, r1.w, r0.y -mov.f32f32 r3.w, r3.w -mul.f r2.z, r2.z, r3.x -mad.f32 r0.x, r4.z, r0.z, r0.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, c4.w, r1.x -mul.f r3.x, c4.z, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r2.w, r0.y, r0.y, r2.w +mad.f32 r2.w, r3.z, r4.y, r2.w +mul.f r0.x, r2.z, r0.x +mov.f32f32 r3.z, c2.z +mad.f32 r3.x, r3.w, r5.z, r3.x +mul.f r3.w, c8.z, r4.z +mad.f32 r5.w, c0.y, r2.y, r5.w +mad.f32 r0.x, r3.z, r0.y, r0.x +mov.f32f32 r0.y, c3.z +mad.f32 r3.z, c9.z, r4.w, r3.w +mul.f r2.z, r2.z, r3.y +(ss)mad.f32 r3.y, c10.z, r5.x, r3.z +mad.f32 r0.x, r0.y, r0.z, r0.x +mad.f32 r0.y, c11.z, r5.y, r3.y +mad.f32 r3.y, c0.z, r2.z, r5.w +mov.f32f32 r0.z, r2.y +mov.f32f32 r2.y, r0.x +mov.f32f32 r3.z, r0.y +mov.f32f32 r3.w, r3.y mov.f32f32 r2.z, r2.z -mad.f32 r0.z, c5.w, r1.y, r0.z -mad.f32 r3.z, r0.x, r0.x, r3.z -mad.f32 r0.z, c6.w, r1.z, r0.z -mad.f32 r3.x, c5.z, r1.y, r3.x -mul.f r4.z, c4.y, r1.x -mul.f r1.x, c4.x, r1.x -mad.f32 r0.z, c7.w, r1.w, r0.z -mad.f32 r3.x, c6.z, r1.z, r3.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -rsq r2.w, r2.w -(ss)mov.f32f32 r4.w, r2.w -mad.f32 r3.w, c0.z, r2.z, r3.w -(ss)mov.f32f32 r2.w, r0.z -mul.f r0.x, r0.x, r3.z -mul.f r2.x, r2.x, r4.w -mul.f r4.y, r4.y, r3.z -mul.f r3.y, r3.y, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r4.y -mov.f32f32 r3.y, r3.y -mul.f r4.y, r0.x, r2.y -absneg.f r2.x, (neg)r2.x -mul.f r6.x, c0.x, r3.z -mad.f32 r4.y, r3.y, r2.z, (neg)r4.y -mad.f32 r6.x, c0.y, r3.y, r6.x -mov.f32f32 r6.y, r2.x -mul.f r2.x, r3.z, r2.z -mov.f32f32 r2.z, r4.y -mov.f32f32 r4.y, r6.x -mul.f r6.x, r6.y, r6.y -mul.f r4.x, r4.x, r4.w -mul.f r2.z, c0.x, r2.z -mad.f32 r2.x, r0.x, r0.w, (neg)r2.x -mad.f32 r0.x, c0.z, r0.x, r4.y +mad.f32 r2.y, r2.y, r2.y, r2.w +mad.f32 r0.y, r0.y, r3.z, r3.x +mul.f r3.x, r3.w, r3.w +mul.f r2.w, c4.w, r4.z +mul.f r3.w, c4.z, r4.z +mul.f r5.w, c4.y, r4.z +mul.f r4.z, c4.x, r4.z +rsq r2.y, r2.y +(ss)mov.f32f32 r6.x, r2.y +rsq r0.y, r0.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r2.x, r0.y +mul.f r0.x, r0.x, r2.y +mul.f r2.x, r4.y, r6.x +mul.f r2.y, r5.z, r6.y +mul.f r4.x, r4.x, r6.x +absneg.f r4.y, (neg)r0.y +mov.f32f32 r0.y, r2.x +mov.f32f32 r5.z, r0.x +absneg.f r2.y, (neg)r2.y +mul.f r6.x, c0.x, r4.x mov.f32f32 r4.x, r4.x -mul.f r0.w, r3.y, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -absneg.f r3.y, (neg)r4.x -mad.f32 r0.w, r3.z, r2.y, (neg)r0.w -mad.f32 r2.x, c0.y, r2.x, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.x, r3.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.w -mul.f r0.y, r0.y, r4.w -mad.f32 r0.w, c0.z, r0.w, r2.x -mad.f32 r2.x, r4.x, r4.x, r6.x -mul.f r2.y, r3.y, r3.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c7.z, r1.w, r3.x -mad.f32 r3.z, c5.y, r1.y, r4.z -mov.f32f32 r3.w, r0.w -absneg.f r0.y, (neg)r0.y -mov.f32f32 r2.z, r3.x -mad.f32 r0.w, c6.y, r1.z, r3.z -mad.f32 r2.y, r3.w, r3.w, r2.y -mov.f32f32 r3.z, r0.y -mad.f32 r0.y, c7.y, r1.w, r0.w -mad.f32 r0.w, c5.x, r1.y, r1.x -mov.f32f32 r1.x, r2.y -mad.f32 r1.y, r3.z, r3.z, r2.x -mad.f32 r1.x, r0.x, r0.x, r1.x +mul.f r6.z, r5.z, r0.z +mov.f32f32 r6.w, r2.y +mad.f32 r6.z, r0.y, r2.z, (neg)r6.z +mov.f32f32 r7.x, r4.y +mad.f32 r2.x, c0.y, r2.x, r6.x +mul.f r2.z, r4.x, r2.z +mul.f r6.x, c0.x, r6.z +mad.f32 r2.z, r5.z, r0.w, (neg)r2.z +mul.f r5.z, r7.x, r7.x +mad.f32 r0.x, c0.z, r0.x, r2.x +mad.f32 r2.x, r2.y, r6.w, r5.z +mad.f32 r2.y, c0.y, r2.z, r6.x +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r3.z, r6.y +mad.f32 r0.y, r4.x, r0.z, (neg)r0.y +mov.f32f32 r0.z, r0.x +mad.f32 r2.z, c5.w, r4.w, r2.w +mad.f32 r2.w, c5.z, r4.w, r3.w +mad.f32 r0.y, c0.z, r0.y, r2.y +absneg.f r2.y, (neg)r0.w +mov.f32f32 r3.z, r0.z +mad.f32 r0.z, c6.w, r5.x, r2.z +mov.f32f32 r2.z, r0.y +mov.f32f32 r4.x, r2.y +mad.f32 r0.w, c7.w, r5.y, r0.z +mad.f32 r0.z, c6.z, r5.x, r2.w +mov.f32f32 r5.z, r2.z +mad.f32 r2.x, r2.y, r4.x, r2.x +mov.f32f32 r2.w, r0.w +mad.f32 r0.z, c7.z, r5.y, r0.z +mad.f32 r0.y, r0.y, r5.z, r3.x +mad.f32 r2.y, c5.y, r4.w, r5.w +mad.f32 r0.x, r0.x, r3.z, r0.y +mov.f32f32 r2.z, r0.z +mad.f32 r0.y, c6.y, r5.x, r2.y +mad.f32 r2.y, c5.x, r4.w, r4.z +mad.f32 r0.y, c7.y, r5.y, r0.y +mad.f32 r4.z, c6.x, r5.x, r2.y +nop +rsq r0.x, r0.x +(ss)mov.f32f32 r2.y, r0.x +mul.f r3.x, r3.y, r0.x +(ss)rsq r0.x, r2.x +(ss)mov.f32f32 r2.x, r0.x +mul.f r3.w, r4.y, r0.x +mul.f r3.z, r3.z, r2.y +mul.f r3.y, r5.z, r2.y +mul.f r4.y, r4.x, r2.x +mul.f r4.x, r6.w, r2.x mov.f32f32 r2.y, r0.y -mad.f32 r1.z, c6.x, r1.z, r0.w -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c7.x, r1.w, r1.z -rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.x, r1.z -mul.f r0.x, r0.x, r1.x -mul.f r3.x, r3.w, r1.x -mul.f r1.x, r3.y, r1.x -mul.f r3.y, r3.z, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r3.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r4.y, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x +mad.f32 r0.x, c7.x, r5.y, r4.z +mov.f32f32 r4.z, r4.y +mov.f32f32 r4.w, (0.000000) nop -mov.f32f32 r3.z, r0.x -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r1.x -mov.f32f32 r0.x, r4.y -mul.f r1.x, r4.x, r1.y -mul.f r1.y, r6.y, r1.y -mov.f32f32 r4.z, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r1.z -mov.f32f32 r4.w, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.y -mov.f32f32 r1.w, r5.w -mov.f32f32 r1.z, r5.z -mov.f32f32 r4.x, r1.x -mov.f32f32 r1.y, r5.y -mov.f32f32 r1.x, r5.x +mov.f32f32 r2.x, r0.x end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0) -; VERT: 183 instructions, 0 half, 7 full +; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) +; VERT: 133 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-29.asm b/reference/stk-mines/stk-mines-29.asm index 1435d60..141701a 100644 --- a/reference/stk-mines/stk-mines-29.asm +++ b/reference/stk-mines/stk-mines-29.asm @@ -6,275 +6,191 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x absneg.f r0.w, (neg)c6.x -mov.f32f32 r1.x, c3.x -bary.f r1.y, 1, r0.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x -add.f r2.x, c7.x, r0.w -add.f r0.z, r0.z, r1.x -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, c9.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.x -add.f r1.x, r1.y, c3.y -mov.f32f32 r2.w, r1.z -add.f r1.z, r1.y, c4.y -rcp r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -bary.f r3.y, 6, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -add.f r0.w, r3.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r0.z -mov.f32f32 r2.z, r1.x -mul.f r0.z, r0.w, r1.w -mov.f32f32 r3.x, r1.z -add.f r0.w, r1.y, r2.x -cmps.f.lt r1.x, c6.x, r3.y -cmps.f.lt r1.y, c7.x, r3.y -bary.f r1.z, 11, r0.x -sam (f32)(xyz)r1.w, r2.y, s#0, t#0 -(sy)mad.f32 r2.x, c8.x, r2.x, c8.y -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(xyz)r2.z, r2.w, s#1, t#1 -(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y -mad.f32 r2.z, c8.x, r2.z, c8.y -mov.f32f32 r2.x, r2.x -mul.f r3.y, r0.z, c5.w -mul.f r3.w, r0.z, c5.z -mul.f r4.x, r0.z, c5.y -mul.f r2.x, c8.z, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, c8.x, r1.w, c8.y -mul.f r2.w, c8.z, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, r2.z, r2.z -mov.f32f32 r4.x, r4.x -mul.f r4.z, r1.w, r1.w -mov.f32f32 r2.w, r2.w -mad.f32 r4.z, r2.x, r2.x, r4.z -mul.f r4.w, r0.z, c5.x -add.f r0.z, c10.x, (neg)r0.z -mad.f32 r3.w, r2.w, r2.w, r3.w -mov.f32f32 r4.z, r4.z +bary.f r1.x, 6, r0.x +mov.f32f32 r1.y, c3.x +add.f r1.z, r0.z, c4.x +bary.f r2.x, 1, r0.x +add.f r2.y, r0.z, c3.x +add.f r2.w, c7.x, r0.w +cmps.f.lt r3.x, c6.x, r1.x +add.f r1.w, r2.x, c4.y +add.f r2.z, r2.x, c3.y +cmps.f.lt r3.y, c7.x, r1.x +add.f r3.z, r0.z, r1.y +cov.u32f32 r0.z, r3.x +rcp r1.y, r2.w +add.f r0.w, r1.x, r0.w +cov.u32f32 r1.x, r3.y +sam (f32)(xyz)r3.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y +sam (f32)(xyz)r2.y, r2.y, s#0, t#0 +(sy)mad.f32 r1.w, c8.x, r2.z, c8.y +(ss)mul.f r0.w, r0.w, r1.y +mad.f32 r1.y, c8.x, r3.w, c8.y +mul.f r1.z, c8.z, r1.z +mul.f r1.w, c8.z, r1.w +(ss)mov.f32f32 r2.z, r0.w +mov.f32f32 r3.x, r1.y +mov.f32f32 r3.y, r1.z +mov.f32f32 r4.x, r1.w mad.f32 r2.y, c8.x, r2.y, c8.y -mov.f32f32 r4.w, r4.w -mad.f32 r3.x, c8.x, r3.x, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r3.w mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r2.y, r2.y, r4.z -cov.u32f32 r1.x, r1.x -cov.u32f32 r1.y, r1.y -(rpt3)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.y, r3.y +mul.f r4.z, r2.z, c5.w +mul.f r4.w, r2.z, c5.z +mul.f r1.y, r1.y, r3.x +mov.f32f32 r5.x, r2.y +mad.f32 r1.y, r1.z, r3.y, r1.y +mad.f32 r1.z, c8.x, r4.y, c8.y +mul.f r4.y, r2.z, c5.y +mul.f r2.z, r2.z, c5.x +mul.f r2.y, r2.y, r5.x +mov.f32f32 r3.w, r1.z +mad.f32 r1.w, r1.w, r4.x, r2.y +mad.f32 r2.y, c8.x, r2.w, c8.y +add.f r0.w, c10.x, (neg)r0.w +mov.f32f32 r2.w, r3.w +cmps.f.ne r0.z, r0.z, c9.x cmps.f.ne r1.x, r1.x, c9.x -mul.f r1.w, r1.w, r4.z -mul.f r2.x, r2.x, r4.z -mul.f r2.y, r2.y, r4.z -mad.f32 r4.z, r3.x, r3.x, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r0.w -cmps.f.ne r0.w, r1.y, c9.x -mov.f32f32 r1.y, r1.z -rsq r1.z, r4.z -(ss)mov.f32f32 r1.z, r1.z -(ss)bary.f r4.z, 12, r0.x -bary.f r5.x, 13, r0.x +mov.f32f32 r3.w, c9.x +mad.f32 r1.y, r2.w, r2.w, r1.y +mov.f32f32 r2.w, r2.y mov.f32f32 r5.y, c9.x -mad.f32 r1.w, r2.z, r1.z, r1.w -mad.f32 r2.x, r2.w, r1.z, r2.x -mad.f32 r1.z, r3.x, r1.z, r2.y -sam (f32)(xyzw)r2.y, r3.z, s#2, t#2 -(ss)mov.f32f32 r3.z, r4.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r5.x -mul.f r1.w, r1.w, c8.w -mul.f r2.x, r2.x, c8.w -mul.f r1.z, r1.z, c8.w -nop -mov.f32f32 r1.w, r1.w -bary.f r4.z, 8, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.x, c9.x -mul.f r5.z, r1.w, r4.z -bary.f r5.w, 9, r0.x -mul.f r6.x, r4.z, r1.w -mov.f32f32 r6.y, c5.w -mov.f32f32 r6.z, c5.z -mad.f32 r5.z, r2.x, r5.w, r5.z -mad.f32 r6.x, r5.w, r2.x, r6.x -mov.f32f32 r6.w, c5.y +mov.f32f32 r5.z, c9.x +mov.f32f32 r5.w, c5.w +mov.f32f32 r6.x, c5.z +mov.f32f32 r6.y, c5.y +rsq r1.y, r1.y +(ss)mov.f32f32 r6.z, r1.y +mad.f32 r1.w, r2.w, r2.w, r1.w +add.f r3.w, r2.x, r3.w +bary.f r2.x, 11, r0.x +bary.f r2.w, 12, r0.x +bary.f r6.w, 13, r0.x +bary.f r7.x, 8, r0.x +bary.f r7.y, 9, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +(ss)mul.f r1.w, r2.y, r1.w +sam (f32)(xyzw)r7.w, r3.z, s#2, t#2 bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r5.z -mov.f32f32 r5.z, r6.x -mov.f32f32 r6.x, c5.x -mad.f32 r0.y, r1.z, r0.x, r0.y -mad.f32 r5.z, r0.x, r1.z, r5.z -(rpt1)nop -mul.f r1.w, r0.y, r1.w -max.f r5.z, r5.z, c9.x -mul.f r2.x, r0.y, r2.x -mul.f r0.y, r0.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r1.z, c8.x, r1.z -mad.f32 r1.w, c9.y, r1.w, c9.z -mul.f r2.x, c8.x, r2.x -mul.f r0.y, c8.x, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -add.f r1.z, r4.z, (neg)r1.z -(sy)mul.f r3.x, r3.x, r1.w -mul.f r2.w, r2.w, r1.w -mul.f r2.z, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r7.x, r2.z -mul.f r7.y, r1.z, r1.z -add.f r2.x, r5.w, (neg)r2.x -add.f r4.z, r4.z, r5.x -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.y, r1.w -mov.f32f32 r1.w, r2.x -add.f r2.x, r4.z, r5.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r4.z, r1.w, r1.w, r7.y +mov.f32f32 r0.y, c5.x +mul.f r2.y, r5.x, r7.z +(ss)mul.f r3.z, r4.x, r7.z +mad.f32 r2.y, r3.x, r6.z, r2.y +mad.f32 r3.x, r3.y, r6.z, r3.z +mad.f32 r1.y, r1.z, r1.y, r1.w +nop +mul.f r1.z, r2.y, c8.w +mul.f r1.w, r3.x, c8.w +mul.f r1.y, r1.y, c8.w +nop +mov.f32f32 r2.y, r1.z +mul.f r1.z, r7.x, r1.z +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.y, r1.y +mul.f r3.z, r2.y, r7.x +mad.f32 r1.z, r7.y, r1.w, r1.z +mad.f32 r1.w, r3.x, r7.y, r3.z +mad.f32 r1.y, r0.x, r1.y, r1.z +mad.f32 r1.z, r3.y, r0.x, r1.w (rpt2)nop -mov.f32f32 r4.z, r4.z -nop -mad.f32 r4.z, r0.x, r0.x, r4.z -(rpt5)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z +mul.f r1.w, r1.z, r2.y +max.f r1.y, r1.y, c9.x +mul.f r2.y, r1.z, r3.x +mul.f r1.z, r1.z, r3.y +mul.f r1.w, c8.x, r1.w +mad.f32 r1.y, c9.y, r1.y, c9.z +mul.f r2.y, c8.x, r2.y +mul.f r1.z, c8.x, r1.z +add.f r1.w, r7.x, (neg)r1.w +mov.f32f32 r3.x, r1.y +add.f r2.y, r7.y, (neg)r2.y +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r3.y, r8.z, r3.x +mov.f32f32 r3.z, r2.y +mov.f32f32 r3.w, r0.x +mul.f r1.w, r1.w, r1.z +add.f r4.x, r3.y, r5.z +mad.f32 r1.w, r2.y, r3.z, r1.w +mul.f r2.y, r8.y, r3.x +mad.f32 r1.w, r3.w, r3.w, r1.w +add.f r3.w, r4.x, r5.y +mul.f r3.x, r8.x, r3.x +mul.f r1.y, r7.w, r1.y (rpt2)nop -mul.f r1.z, r1.z, r4.z -mul.f r1.w, r1.w, r4.z -mul.f r0.x, r0.x, r4.z -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r4.x, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r4.x +(ss)mul.f r1.w, r3.z, r4.x +(rpt1)nop +mul.f r1.z, r1.z, r2.x nop -mul.f r1.y, r1.z, r1.y +mad.f32 r1.z, r1.w, r2.w, r1.z nop -mad.f32 r1.y, r1.w, r3.z, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -nop -mad.f32 r0.x, r0.x, r3.w, r1.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r6.w, r1.z (rpt2)nop max.f r0.x, r0.x, c9.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x mov.f32f32 r1.z, r0.x -cmps.f.lt r0.x, c9.x, r0.x -(rpt1)nop -mul.f r1.y, r1.y, r1.z -cov.u32f32 r0.x, r0.x +(rpt2)nop +mul.f r0.x, r0.x, r1.z +cmps.f.lt r1.z, c9.x, r1.z (rpt1)nop -mov.f32f32 r1.y, r1.y -cmps.f.ne r0.x, r0.x, c9.x +mov.f32f32 r1.w, r0.x +cov.u32f32 r1.z, r1.z (rpt1)nop -mul.f r1.y, r1.y, r1.y -sel.b32 r1.z, r2.x, r0.x, r3.x +mul.f r0.x, r0.x, r1.w +cmps.f.ne r1.z, r1.z, c9.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.z +mov.f32f32 r1.w, r0.x +sel.b32 r2.x, r3.w, r1.z, r3.y +mul.f r0.x, r0.x, c9.w +nop +mul.f r1.w, r1.w, r1.w +mad.f32 r2.w, r0.w, r2.x, r4.z +add.f r3.y, r2.y, r0.x +add.f r3.z, r3.x, r0.x +mul.f r1.w, r1.w, c9.z +add.f r0.x, r1.y, r0.x +sel.b32 r2.x, r2.w, r0.z, r2.x +nop +mov.f32f32 r2.w, r1.w +add.f r0.x, r0.x, r1.w +sel.b32 r1.w, r5.w, r1.x, r2.x +nop +add.f r2.x, r3.y, r2.w +add.f r2.w, r3.z, r2.w +sel.b32 r0.x, r0.x, r1.z, r1.y +nop +sel.b32 r1.y, r2.x, r1.z, r2.y +sel.b32 r1.z, r2.w, r1.z, r3.x +mad.f32 r2.x, r0.w, r0.x, r2.z +nop +mad.f32 r2.y, r0.w, r1.y, r4.w +mad.f32 r0.w, r0.w, r1.z, r4.y +sel.b32 r0.x, r2.x, r0.z, r0.x +nop +sel.b32 r1.y, r2.y, r0.z, r1.y +sel.b32 r0.z, r0.w, r0.z, r1.z (rpt1)nop -mul.f r2.x, r1.y, r1.y -mov.f32f32 r1.w, r1.w -mul.f r1.y, r1.y, c9.w -nop -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, r0.z, r1.w, r3.y -mov.f32f32 r3.x, r1.y -mov.f32f32 r3.y, r1.y -mul.f r2.x, r2.x, c9.z -sel.b32 r1.z, r1.w, r1.x, r1.z -add.f r1.w, r5.z, r3.x -add.f r3.x, r7.x, r3.y -mov.f32f32 r2.x, r2.x -sel.b32 r1.z, r6.y, r0.w, r1.z -mov.f32f32 r1.y, r1.y -nop -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, r2.x -add.f r1.y, r2.y, r1.y -add.f r2.y, r1.w, r3.y -add.f r3.x, r3.x, r3.z -mov.f32f32 r1.w, r1.z -nop -sel.b32 r1.z, r2.y, r0.x, r2.w -sel.b32 r2.y, r3.x, r0.x, r2.z -add.f r1.y, r1.y, r2.x -nop -mov.f32f32 r2.x, r1.z -mov.f32f32 r2.z, r2.y -sel.b32 r0.x, r1.y, r0.x, r0.y -nop -mov.f32f32 r0.y, r2.x -mov.f32f32 r1.y, r2.z -mov.f32f32 r2.x, r0.x -nop -mad.f32 r0.y, r0.z, r0.y, r4.y -mad.f32 r1.y, r0.z, r1.y, r4.x -mov.f32f32 r2.x, r2.x -nop -sel.b32 r0.y, r0.y, r1.x, r1.z -sel.b32 r1.y, r1.y, r1.x, r2.y -mad.f32 r0.z, r0.z, r2.x, r4.w +sel.b32 r1.z, r6.x, r1.x, r1.y +sel.b32 r1.y, r6.y, r1.x, r0.z +sel.b32 r1.x, r0.y, r1.x, r0.x +end nop -sel.b32 r0.y, r6.z, r0.w, r0.y -sel.b32 r1.y, r6.w, r0.w, r1.y -sel.b32 r0.x, r0.z, r1.x, r0.x nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r6.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x -end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) -; FRAG: 297 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) +; FRAG: 193 instructions, 0 half, 9 full diff --git a/reference/stk-mines/stk-mines-30.asm b/reference/stk-mines/stk-mines-30.asm index 2d84780..726bef5 100644 --- a/reference/stk-mines/stk-mines-30.asm +++ b/reference/stk-mines/stk-mines-30.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r4.z) in0 +@in(r4.w) in1 +@in(r5.x) in2 +@in(r5.y) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r5.x) in8 -@in(r5.y) in9 -@in(r5.z) in10 -@in(r5.w) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,191 +31,144 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.w, c1.x -mul.f r2.x, c8.x, r1.x +mul.f r2.x, c8.x, r4.z mov.f32f32 r2.y, c1.y mov.f32f32 r2.z, c1.z mul.f r2.w, r0.w, r0.x mov.f32f32 r3.x, c2.x -mad.f32 r2.x, c9.x, r1.y, r2.x +mad.f32 r2.x, c9.x, r4.w, r2.x mul.f r3.y, r0.w, r0.w -mad.f32 r2.x, c10.x, r1.z, r2.x +mad.f32 r2.x, c10.x, r5.x, r2.x mad.f32 r2.w, r3.x, r0.y, r2.w -mad.f32 r2.x, c11.x, r1.w, r2.x -mad.f32 r3.x, r2.y, r2.y, r3.y -mul.f r3.y, r2.y, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, c3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x +mov.f32f32 r3.x, c3.x +mad.f32 r2.x, c11.x, r5.y, r2.x +mad.f32 r3.y, r2.y, r2.y, r3.y +mul.f r3.z, r2.y, r0.x +mad.f32 r2.w, r3.x, r0.z, r2.w +mov.f32f32 r3.x, r2.x +mad.f32 r3.y, r2.z, r2.z, r3.y mov.f32f32 r3.w, c2.y -mad.f32 r2.w, r3.z, r0.z, r2.w -mul.f r3.z, r2.x, r2.x -mul.f r4.x, c8.y, r1.x -mad.f32 r3.x, r2.z, r2.z, r3.x -mov.f32f32 r4.y, r2.w -mad.f32 r2.w, c9.y, r1.y, r4.x -mad.f32 r3.y, r3.w, r0.y, r3.y -mul.f r0.x, r2.z, r0.x -mul.f r3.w, r4.y, r4.y -mad.f32 r2.w, c10.y, r1.z, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, c3.y -mad.f32 r2.w, c11.y, r1.w, r2.w -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r4.z, c2.z -mad.f32 r3.y, r4.x, r0.z, r3.y mov.f32f32 r4.x, r2.w -mul.f r0.w, r0.w, r3.x -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r2.w, r4.x, r4.x, r3.z +mul.f r3.x, r3.x, r3.x +mul.f r4.y, c8.y, r4.z +mad.f32 r3.z, r3.w, r0.y, r3.z +mul.f r2.w, r2.w, r4.x +mov.f32f32 r3.w, c3.y +mad.f32 r4.y, c9.y, r4.w, r4.y +rsq r3.y, r3.y +(ss)mov.f32f32 r5.z, r3.y +mad.f32 r4.y, c10.y, r5.x, r4.y +mad.f32 r3.z, r3.w, r0.z, r3.z +mad.f32 r3.w, c11.y, r5.y, r4.y +mul.f r0.w, r0.w, r5.z +mul.f r2.y, r2.y, r5.z +mov.f32f32 r4.y, r3.z +mov.f32f32 r5.z, r3.w +mul.f r5.w, c0.x, r0.w mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r3.y, r3.y, r3.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, c8.z, r1.x -mul.f r4.w, c0.x, r0.w -mov.f32f32 r3.z, r3.z -mad.f32 r0.x, r4.z, r0.y, r0.x -mad.f32 r0.y, c9.z, r1.y, r3.w -mad.f32 r3.w, c0.y, r2.y, r4.w -mad.f32 r0.y, c10.z, r1.z, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, c3.z -mad.f32 r0.y, c11.z, r1.w, r0.y -mov.f32f32 r3.w, r3.w -mul.f r2.z, r2.z, r3.x -mad.f32 r0.x, r4.z, r0.z, r0.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, c4.w, r1.x -mul.f r3.x, c4.z, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r2.w, r0.y, r0.y, r2.w +mad.f32 r2.w, r3.z, r4.y, r2.w +mul.f r0.x, r2.z, r0.x +mov.f32f32 r3.z, c2.z +mad.f32 r3.x, r3.w, r5.z, r3.x +mul.f r3.w, c8.z, r4.z +mad.f32 r5.w, c0.y, r2.y, r5.w +mad.f32 r0.x, r3.z, r0.y, r0.x +mov.f32f32 r0.y, c3.z +mad.f32 r3.z, c9.z, r4.w, r3.w +mul.f r2.z, r2.z, r3.y +(ss)mad.f32 r3.y, c10.z, r5.x, r3.z +mad.f32 r0.x, r0.y, r0.z, r0.x +mad.f32 r0.y, c11.z, r5.y, r3.y +mad.f32 r3.y, c0.z, r2.z, r5.w +mov.f32f32 r0.z, r2.y +mov.f32f32 r2.y, r0.x +mov.f32f32 r3.z, r0.y +mov.f32f32 r3.w, r3.y mov.f32f32 r2.z, r2.z -mad.f32 r0.z, c5.w, r1.y, r0.z -mad.f32 r3.z, r0.x, r0.x, r3.z -mad.f32 r0.z, c6.w, r1.z, r0.z -mad.f32 r3.x, c5.z, r1.y, r3.x -mul.f r4.z, c4.y, r1.x -mul.f r1.x, c4.x, r1.x -mad.f32 r0.z, c7.w, r1.w, r0.z -mad.f32 r3.x, c6.z, r1.z, r3.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -rsq r2.w, r2.w -(ss)mov.f32f32 r4.w, r2.w -mad.f32 r3.w, c0.z, r2.z, r3.w -(ss)mov.f32f32 r2.w, r0.z -mul.f r0.x, r0.x, r3.z -mul.f r2.x, r2.x, r4.w -mul.f r4.y, r4.y, r3.z -mul.f r3.y, r3.y, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r4.y -mov.f32f32 r3.y, r3.y -mul.f r4.y, r0.x, r2.y -absneg.f r2.x, (neg)r2.x -mul.f r6.x, c0.x, r3.z -mad.f32 r4.y, r3.y, r2.z, (neg)r4.y -mad.f32 r6.x, c0.y, r3.y, r6.x -mov.f32f32 r6.y, r2.x -mul.f r2.x, r3.z, r2.z -mov.f32f32 r2.z, r4.y -mov.f32f32 r4.y, r6.x -mul.f r6.x, r6.y, r6.y -mul.f r4.x, r4.x, r4.w -mul.f r2.z, c0.x, r2.z -mad.f32 r2.x, r0.x, r0.w, (neg)r2.x -mad.f32 r0.x, c0.z, r0.x, r4.y +mad.f32 r2.y, r2.y, r2.y, r2.w +mad.f32 r0.y, r0.y, r3.z, r3.x +mul.f r3.x, r3.w, r3.w +mul.f r2.w, c4.w, r4.z +mul.f r3.w, c4.z, r4.z +mul.f r5.w, c4.y, r4.z +mul.f r4.z, c4.x, r4.z +rsq r2.y, r2.y +(ss)mov.f32f32 r6.x, r2.y +rsq r0.y, r0.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r2.x, r0.y +mul.f r0.x, r0.x, r2.y +mul.f r2.x, r4.y, r6.x +mul.f r2.y, r5.z, r6.y +mul.f r4.x, r4.x, r6.x +absneg.f r4.y, (neg)r0.y +mov.f32f32 r0.y, r2.x +mov.f32f32 r5.z, r0.x +absneg.f r2.y, (neg)r2.y +mul.f r6.x, c0.x, r4.x mov.f32f32 r4.x, r4.x -mul.f r0.w, r3.y, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -absneg.f r3.y, (neg)r4.x -mad.f32 r0.w, r3.z, r2.y, (neg)r0.w -mad.f32 r2.x, c0.y, r2.x, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.x, r3.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.w -mul.f r0.y, r0.y, r4.w -mad.f32 r0.w, c0.z, r0.w, r2.x -mad.f32 r2.x, r4.x, r4.x, r6.x -mul.f r2.y, r3.y, r3.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c7.z, r1.w, r3.x -mad.f32 r3.z, c5.y, r1.y, r4.z -mov.f32f32 r3.w, r0.w -absneg.f r0.y, (neg)r0.y -mov.f32f32 r2.z, r3.x -mad.f32 r0.w, c6.y, r1.z, r3.z -mad.f32 r2.y, r3.w, r3.w, r2.y -mov.f32f32 r3.z, r0.y -mad.f32 r0.y, c7.y, r1.w, r0.w -mad.f32 r0.w, c5.x, r1.y, r1.x -mov.f32f32 r1.x, r2.y -mad.f32 r1.y, r3.z, r3.z, r2.x -mad.f32 r1.x, r0.x, r0.x, r1.x +mul.f r6.z, r5.z, r0.z +mov.f32f32 r6.w, r2.y +mad.f32 r6.z, r0.y, r2.z, (neg)r6.z +mov.f32f32 r7.x, r4.y +mad.f32 r2.x, c0.y, r2.x, r6.x +mul.f r2.z, r4.x, r2.z +mul.f r6.x, c0.x, r6.z +mad.f32 r2.z, r5.z, r0.w, (neg)r2.z +mul.f r5.z, r7.x, r7.x +mad.f32 r0.x, c0.z, r0.x, r2.x +mad.f32 r2.x, r2.y, r6.w, r5.z +mad.f32 r2.y, c0.y, r2.z, r6.x +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r3.z, r6.y +mad.f32 r0.y, r4.x, r0.z, (neg)r0.y +mov.f32f32 r0.z, r0.x +mad.f32 r2.z, c5.w, r4.w, r2.w +mad.f32 r2.w, c5.z, r4.w, r3.w +mad.f32 r0.y, c0.z, r0.y, r2.y +absneg.f r2.y, (neg)r0.w +mov.f32f32 r3.z, r0.z +mad.f32 r0.z, c6.w, r5.x, r2.z +mov.f32f32 r2.z, r0.y +mov.f32f32 r4.x, r2.y +mad.f32 r0.w, c7.w, r5.y, r0.z +mad.f32 r0.z, c6.z, r5.x, r2.w +mov.f32f32 r5.z, r2.z +mad.f32 r2.x, r2.y, r4.x, r2.x +mov.f32f32 r2.w, r0.w +mad.f32 r0.z, c7.z, r5.y, r0.z +mad.f32 r0.y, r0.y, r5.z, r3.x +mad.f32 r2.y, c5.y, r4.w, r5.w +mad.f32 r0.x, r0.x, r3.z, r0.y +mov.f32f32 r2.z, r0.z +mad.f32 r0.y, c6.y, r5.x, r2.y +mad.f32 r2.y, c5.x, r4.w, r4.z +mad.f32 r0.y, c7.y, r5.y, r0.y +mad.f32 r4.z, c6.x, r5.x, r2.y +nop +rsq r0.x, r0.x +(ss)mov.f32f32 r2.y, r0.x +mul.f r3.x, r3.y, r0.x +(ss)rsq r0.x, r2.x +(ss)mov.f32f32 r2.x, r0.x +mul.f r3.w, r4.y, r0.x +mul.f r3.z, r3.z, r2.y +mul.f r3.y, r5.z, r2.y +mul.f r4.y, r4.x, r2.x +mul.f r4.x, r6.w, r2.x mov.f32f32 r2.y, r0.y -mad.f32 r1.z, c6.x, r1.z, r0.w -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c7.x, r1.w, r1.z -rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.x, r1.z -mul.f r0.x, r0.x, r1.x -mul.f r3.x, r3.w, r1.x -mul.f r1.x, r3.y, r1.x -mul.f r3.y, r3.z, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r3.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r4.y, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x +mad.f32 r0.x, c7.x, r5.y, r4.z +mov.f32f32 r4.z, r4.y +mov.f32f32 r4.w, (0.000000) nop -mov.f32f32 r3.z, r0.x -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r1.x -mov.f32f32 r0.x, r4.y -mul.f r1.x, r4.x, r1.y -mul.f r1.y, r6.y, r1.y -mov.f32f32 r4.z, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r1.z -mov.f32f32 r4.w, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.y -mov.f32f32 r1.w, r5.w -mov.f32f32 r1.z, r5.z -mov.f32f32 r4.x, r1.x -mov.f32f32 r1.y, r5.y -mov.f32f32 r1.x, r5.x +mov.f32f32 r2.x, r0.x end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0) -; VERT: 183 instructions, 0 half, 7 full +; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) +; VERT: 133 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-31.asm b/reference/stk-mines/stk-mines-31.asm index 1435d60..141701a 100644 --- a/reference/stk-mines/stk-mines-31.asm +++ b/reference/stk-mines/stk-mines-31.asm @@ -6,275 +6,191 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x absneg.f r0.w, (neg)c6.x -mov.f32f32 r1.x, c3.x -bary.f r1.y, 1, r0.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x -add.f r2.x, c7.x, r0.w -add.f r0.z, r0.z, r1.x -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, c9.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.x -add.f r1.x, r1.y, c3.y -mov.f32f32 r2.w, r1.z -add.f r1.z, r1.y, c4.y -rcp r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -bary.f r3.y, 6, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -add.f r0.w, r3.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r0.z -mov.f32f32 r2.z, r1.x -mul.f r0.z, r0.w, r1.w -mov.f32f32 r3.x, r1.z -add.f r0.w, r1.y, r2.x -cmps.f.lt r1.x, c6.x, r3.y -cmps.f.lt r1.y, c7.x, r3.y -bary.f r1.z, 11, r0.x -sam (f32)(xyz)r1.w, r2.y, s#0, t#0 -(sy)mad.f32 r2.x, c8.x, r2.x, c8.y -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(xyz)r2.z, r2.w, s#1, t#1 -(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y -mad.f32 r2.z, c8.x, r2.z, c8.y -mov.f32f32 r2.x, r2.x -mul.f r3.y, r0.z, c5.w -mul.f r3.w, r0.z, c5.z -mul.f r4.x, r0.z, c5.y -mul.f r2.x, c8.z, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, c8.x, r1.w, c8.y -mul.f r2.w, c8.z, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, r2.z, r2.z -mov.f32f32 r4.x, r4.x -mul.f r4.z, r1.w, r1.w -mov.f32f32 r2.w, r2.w -mad.f32 r4.z, r2.x, r2.x, r4.z -mul.f r4.w, r0.z, c5.x -add.f r0.z, c10.x, (neg)r0.z -mad.f32 r3.w, r2.w, r2.w, r3.w -mov.f32f32 r4.z, r4.z +bary.f r1.x, 6, r0.x +mov.f32f32 r1.y, c3.x +add.f r1.z, r0.z, c4.x +bary.f r2.x, 1, r0.x +add.f r2.y, r0.z, c3.x +add.f r2.w, c7.x, r0.w +cmps.f.lt r3.x, c6.x, r1.x +add.f r1.w, r2.x, c4.y +add.f r2.z, r2.x, c3.y +cmps.f.lt r3.y, c7.x, r1.x +add.f r3.z, r0.z, r1.y +cov.u32f32 r0.z, r3.x +rcp r1.y, r2.w +add.f r0.w, r1.x, r0.w +cov.u32f32 r1.x, r3.y +sam (f32)(xyz)r3.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y +sam (f32)(xyz)r2.y, r2.y, s#0, t#0 +(sy)mad.f32 r1.w, c8.x, r2.z, c8.y +(ss)mul.f r0.w, r0.w, r1.y +mad.f32 r1.y, c8.x, r3.w, c8.y +mul.f r1.z, c8.z, r1.z +mul.f r1.w, c8.z, r1.w +(ss)mov.f32f32 r2.z, r0.w +mov.f32f32 r3.x, r1.y +mov.f32f32 r3.y, r1.z +mov.f32f32 r4.x, r1.w mad.f32 r2.y, c8.x, r2.y, c8.y -mov.f32f32 r4.w, r4.w -mad.f32 r3.x, c8.x, r3.x, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r3.w mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r2.y, r2.y, r4.z -cov.u32f32 r1.x, r1.x -cov.u32f32 r1.y, r1.y -(rpt3)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.y, r3.y +mul.f r4.z, r2.z, c5.w +mul.f r4.w, r2.z, c5.z +mul.f r1.y, r1.y, r3.x +mov.f32f32 r5.x, r2.y +mad.f32 r1.y, r1.z, r3.y, r1.y +mad.f32 r1.z, c8.x, r4.y, c8.y +mul.f r4.y, r2.z, c5.y +mul.f r2.z, r2.z, c5.x +mul.f r2.y, r2.y, r5.x +mov.f32f32 r3.w, r1.z +mad.f32 r1.w, r1.w, r4.x, r2.y +mad.f32 r2.y, c8.x, r2.w, c8.y +add.f r0.w, c10.x, (neg)r0.w +mov.f32f32 r2.w, r3.w +cmps.f.ne r0.z, r0.z, c9.x cmps.f.ne r1.x, r1.x, c9.x -mul.f r1.w, r1.w, r4.z -mul.f r2.x, r2.x, r4.z -mul.f r2.y, r2.y, r4.z -mad.f32 r4.z, r3.x, r3.x, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r0.w -cmps.f.ne r0.w, r1.y, c9.x -mov.f32f32 r1.y, r1.z -rsq r1.z, r4.z -(ss)mov.f32f32 r1.z, r1.z -(ss)bary.f r4.z, 12, r0.x -bary.f r5.x, 13, r0.x +mov.f32f32 r3.w, c9.x +mad.f32 r1.y, r2.w, r2.w, r1.y +mov.f32f32 r2.w, r2.y mov.f32f32 r5.y, c9.x -mad.f32 r1.w, r2.z, r1.z, r1.w -mad.f32 r2.x, r2.w, r1.z, r2.x -mad.f32 r1.z, r3.x, r1.z, r2.y -sam (f32)(xyzw)r2.y, r3.z, s#2, t#2 -(ss)mov.f32f32 r3.z, r4.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r5.x -mul.f r1.w, r1.w, c8.w -mul.f r2.x, r2.x, c8.w -mul.f r1.z, r1.z, c8.w -nop -mov.f32f32 r1.w, r1.w -bary.f r4.z, 8, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.x, c9.x -mul.f r5.z, r1.w, r4.z -bary.f r5.w, 9, r0.x -mul.f r6.x, r4.z, r1.w -mov.f32f32 r6.y, c5.w -mov.f32f32 r6.z, c5.z -mad.f32 r5.z, r2.x, r5.w, r5.z -mad.f32 r6.x, r5.w, r2.x, r6.x -mov.f32f32 r6.w, c5.y +mov.f32f32 r5.z, c9.x +mov.f32f32 r5.w, c5.w +mov.f32f32 r6.x, c5.z +mov.f32f32 r6.y, c5.y +rsq r1.y, r1.y +(ss)mov.f32f32 r6.z, r1.y +mad.f32 r1.w, r2.w, r2.w, r1.w +add.f r3.w, r2.x, r3.w +bary.f r2.x, 11, r0.x +bary.f r2.w, 12, r0.x +bary.f r6.w, 13, r0.x +bary.f r7.x, 8, r0.x +bary.f r7.y, 9, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +(ss)mul.f r1.w, r2.y, r1.w +sam (f32)(xyzw)r7.w, r3.z, s#2, t#2 bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r5.z -mov.f32f32 r5.z, r6.x -mov.f32f32 r6.x, c5.x -mad.f32 r0.y, r1.z, r0.x, r0.y -mad.f32 r5.z, r0.x, r1.z, r5.z -(rpt1)nop -mul.f r1.w, r0.y, r1.w -max.f r5.z, r5.z, c9.x -mul.f r2.x, r0.y, r2.x -mul.f r0.y, r0.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r1.z, c8.x, r1.z -mad.f32 r1.w, c9.y, r1.w, c9.z -mul.f r2.x, c8.x, r2.x -mul.f r0.y, c8.x, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -add.f r1.z, r4.z, (neg)r1.z -(sy)mul.f r3.x, r3.x, r1.w -mul.f r2.w, r2.w, r1.w -mul.f r2.z, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r7.x, r2.z -mul.f r7.y, r1.z, r1.z -add.f r2.x, r5.w, (neg)r2.x -add.f r4.z, r4.z, r5.x -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.y, r1.w -mov.f32f32 r1.w, r2.x -add.f r2.x, r4.z, r5.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r4.z, r1.w, r1.w, r7.y +mov.f32f32 r0.y, c5.x +mul.f r2.y, r5.x, r7.z +(ss)mul.f r3.z, r4.x, r7.z +mad.f32 r2.y, r3.x, r6.z, r2.y +mad.f32 r3.x, r3.y, r6.z, r3.z +mad.f32 r1.y, r1.z, r1.y, r1.w +nop +mul.f r1.z, r2.y, c8.w +mul.f r1.w, r3.x, c8.w +mul.f r1.y, r1.y, c8.w +nop +mov.f32f32 r2.y, r1.z +mul.f r1.z, r7.x, r1.z +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.y, r1.y +mul.f r3.z, r2.y, r7.x +mad.f32 r1.z, r7.y, r1.w, r1.z +mad.f32 r1.w, r3.x, r7.y, r3.z +mad.f32 r1.y, r0.x, r1.y, r1.z +mad.f32 r1.z, r3.y, r0.x, r1.w (rpt2)nop -mov.f32f32 r4.z, r4.z -nop -mad.f32 r4.z, r0.x, r0.x, r4.z -(rpt5)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z +mul.f r1.w, r1.z, r2.y +max.f r1.y, r1.y, c9.x +mul.f r2.y, r1.z, r3.x +mul.f r1.z, r1.z, r3.y +mul.f r1.w, c8.x, r1.w +mad.f32 r1.y, c9.y, r1.y, c9.z +mul.f r2.y, c8.x, r2.y +mul.f r1.z, c8.x, r1.z +add.f r1.w, r7.x, (neg)r1.w +mov.f32f32 r3.x, r1.y +add.f r2.y, r7.y, (neg)r2.y +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r3.y, r8.z, r3.x +mov.f32f32 r3.z, r2.y +mov.f32f32 r3.w, r0.x +mul.f r1.w, r1.w, r1.z +add.f r4.x, r3.y, r5.z +mad.f32 r1.w, r2.y, r3.z, r1.w +mul.f r2.y, r8.y, r3.x +mad.f32 r1.w, r3.w, r3.w, r1.w +add.f r3.w, r4.x, r5.y +mul.f r3.x, r8.x, r3.x +mul.f r1.y, r7.w, r1.y (rpt2)nop -mul.f r1.z, r1.z, r4.z -mul.f r1.w, r1.w, r4.z -mul.f r0.x, r0.x, r4.z -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r4.x, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r4.x +(ss)mul.f r1.w, r3.z, r4.x +(rpt1)nop +mul.f r1.z, r1.z, r2.x nop -mul.f r1.y, r1.z, r1.y +mad.f32 r1.z, r1.w, r2.w, r1.z nop -mad.f32 r1.y, r1.w, r3.z, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -nop -mad.f32 r0.x, r0.x, r3.w, r1.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r6.w, r1.z (rpt2)nop max.f r0.x, r0.x, c9.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x mov.f32f32 r1.z, r0.x -cmps.f.lt r0.x, c9.x, r0.x -(rpt1)nop -mul.f r1.y, r1.y, r1.z -cov.u32f32 r0.x, r0.x +(rpt2)nop +mul.f r0.x, r0.x, r1.z +cmps.f.lt r1.z, c9.x, r1.z (rpt1)nop -mov.f32f32 r1.y, r1.y -cmps.f.ne r0.x, r0.x, c9.x +mov.f32f32 r1.w, r0.x +cov.u32f32 r1.z, r1.z (rpt1)nop -mul.f r1.y, r1.y, r1.y -sel.b32 r1.z, r2.x, r0.x, r3.x +mul.f r0.x, r0.x, r1.w +cmps.f.ne r1.z, r1.z, c9.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.z +mov.f32f32 r1.w, r0.x +sel.b32 r2.x, r3.w, r1.z, r3.y +mul.f r0.x, r0.x, c9.w +nop +mul.f r1.w, r1.w, r1.w +mad.f32 r2.w, r0.w, r2.x, r4.z +add.f r3.y, r2.y, r0.x +add.f r3.z, r3.x, r0.x +mul.f r1.w, r1.w, c9.z +add.f r0.x, r1.y, r0.x +sel.b32 r2.x, r2.w, r0.z, r2.x +nop +mov.f32f32 r2.w, r1.w +add.f r0.x, r0.x, r1.w +sel.b32 r1.w, r5.w, r1.x, r2.x +nop +add.f r2.x, r3.y, r2.w +add.f r2.w, r3.z, r2.w +sel.b32 r0.x, r0.x, r1.z, r1.y +nop +sel.b32 r1.y, r2.x, r1.z, r2.y +sel.b32 r1.z, r2.w, r1.z, r3.x +mad.f32 r2.x, r0.w, r0.x, r2.z +nop +mad.f32 r2.y, r0.w, r1.y, r4.w +mad.f32 r0.w, r0.w, r1.z, r4.y +sel.b32 r0.x, r2.x, r0.z, r0.x +nop +sel.b32 r1.y, r2.y, r0.z, r1.y +sel.b32 r0.z, r0.w, r0.z, r1.z (rpt1)nop -mul.f r2.x, r1.y, r1.y -mov.f32f32 r1.w, r1.w -mul.f r1.y, r1.y, c9.w -nop -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, r0.z, r1.w, r3.y -mov.f32f32 r3.x, r1.y -mov.f32f32 r3.y, r1.y -mul.f r2.x, r2.x, c9.z -sel.b32 r1.z, r1.w, r1.x, r1.z -add.f r1.w, r5.z, r3.x -add.f r3.x, r7.x, r3.y -mov.f32f32 r2.x, r2.x -sel.b32 r1.z, r6.y, r0.w, r1.z -mov.f32f32 r1.y, r1.y -nop -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, r2.x -add.f r1.y, r2.y, r1.y -add.f r2.y, r1.w, r3.y -add.f r3.x, r3.x, r3.z -mov.f32f32 r1.w, r1.z -nop -sel.b32 r1.z, r2.y, r0.x, r2.w -sel.b32 r2.y, r3.x, r0.x, r2.z -add.f r1.y, r1.y, r2.x -nop -mov.f32f32 r2.x, r1.z -mov.f32f32 r2.z, r2.y -sel.b32 r0.x, r1.y, r0.x, r0.y -nop -mov.f32f32 r0.y, r2.x -mov.f32f32 r1.y, r2.z -mov.f32f32 r2.x, r0.x -nop -mad.f32 r0.y, r0.z, r0.y, r4.y -mad.f32 r1.y, r0.z, r1.y, r4.x -mov.f32f32 r2.x, r2.x -nop -sel.b32 r0.y, r0.y, r1.x, r1.z -sel.b32 r1.y, r1.y, r1.x, r2.y -mad.f32 r0.z, r0.z, r2.x, r4.w +sel.b32 r1.z, r6.x, r1.x, r1.y +sel.b32 r1.y, r6.y, r1.x, r0.z +sel.b32 r1.x, r0.y, r1.x, r0.x +end nop -sel.b32 r0.y, r6.z, r0.w, r0.y -sel.b32 r1.y, r6.w, r0.w, r1.y -sel.b32 r0.x, r0.z, r1.x, r0.x nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r6.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x -end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) -; FRAG: 297 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) +; FRAG: 193 instructions, 0 half, 9 full diff --git a/reference/stk-mines/stk-mines-32.asm b/reference/stk-mines/stk-mines-32.asm index 2d84780..726bef5 100644 --- a/reference/stk-mines/stk-mines-32.asm +++ b/reference/stk-mines/stk-mines-32.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r4.z) in0 +@in(r4.w) in1 +@in(r5.x) in2 +@in(r5.y) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r5.x) in8 -@in(r5.y) in9 -@in(r5.z) in10 -@in(r5.w) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,191 +31,144 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.w, c1.x -mul.f r2.x, c8.x, r1.x +mul.f r2.x, c8.x, r4.z mov.f32f32 r2.y, c1.y mov.f32f32 r2.z, c1.z mul.f r2.w, r0.w, r0.x mov.f32f32 r3.x, c2.x -mad.f32 r2.x, c9.x, r1.y, r2.x +mad.f32 r2.x, c9.x, r4.w, r2.x mul.f r3.y, r0.w, r0.w -mad.f32 r2.x, c10.x, r1.z, r2.x +mad.f32 r2.x, c10.x, r5.x, r2.x mad.f32 r2.w, r3.x, r0.y, r2.w -mad.f32 r2.x, c11.x, r1.w, r2.x -mad.f32 r3.x, r2.y, r2.y, r3.y -mul.f r3.y, r2.y, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, c3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x +mov.f32f32 r3.x, c3.x +mad.f32 r2.x, c11.x, r5.y, r2.x +mad.f32 r3.y, r2.y, r2.y, r3.y +mul.f r3.z, r2.y, r0.x +mad.f32 r2.w, r3.x, r0.z, r2.w +mov.f32f32 r3.x, r2.x +mad.f32 r3.y, r2.z, r2.z, r3.y mov.f32f32 r3.w, c2.y -mad.f32 r2.w, r3.z, r0.z, r2.w -mul.f r3.z, r2.x, r2.x -mul.f r4.x, c8.y, r1.x -mad.f32 r3.x, r2.z, r2.z, r3.x -mov.f32f32 r4.y, r2.w -mad.f32 r2.w, c9.y, r1.y, r4.x -mad.f32 r3.y, r3.w, r0.y, r3.y -mul.f r0.x, r2.z, r0.x -mul.f r3.w, r4.y, r4.y -mad.f32 r2.w, c10.y, r1.z, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, c3.y -mad.f32 r2.w, c11.y, r1.w, r2.w -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r4.z, c2.z -mad.f32 r3.y, r4.x, r0.z, r3.y mov.f32f32 r4.x, r2.w -mul.f r0.w, r0.w, r3.x -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r2.w, r4.x, r4.x, r3.z +mul.f r3.x, r3.x, r3.x +mul.f r4.y, c8.y, r4.z +mad.f32 r3.z, r3.w, r0.y, r3.z +mul.f r2.w, r2.w, r4.x +mov.f32f32 r3.w, c3.y +mad.f32 r4.y, c9.y, r4.w, r4.y +rsq r3.y, r3.y +(ss)mov.f32f32 r5.z, r3.y +mad.f32 r4.y, c10.y, r5.x, r4.y +mad.f32 r3.z, r3.w, r0.z, r3.z +mad.f32 r3.w, c11.y, r5.y, r4.y +mul.f r0.w, r0.w, r5.z +mul.f r2.y, r2.y, r5.z +mov.f32f32 r4.y, r3.z +mov.f32f32 r5.z, r3.w +mul.f r5.w, c0.x, r0.w mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r3.y, r3.y, r3.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, c8.z, r1.x -mul.f r4.w, c0.x, r0.w -mov.f32f32 r3.z, r3.z -mad.f32 r0.x, r4.z, r0.y, r0.x -mad.f32 r0.y, c9.z, r1.y, r3.w -mad.f32 r3.w, c0.y, r2.y, r4.w -mad.f32 r0.y, c10.z, r1.z, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, c3.z -mad.f32 r0.y, c11.z, r1.w, r0.y -mov.f32f32 r3.w, r3.w -mul.f r2.z, r2.z, r3.x -mad.f32 r0.x, r4.z, r0.z, r0.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, c4.w, r1.x -mul.f r3.x, c4.z, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r2.w, r0.y, r0.y, r2.w +mad.f32 r2.w, r3.z, r4.y, r2.w +mul.f r0.x, r2.z, r0.x +mov.f32f32 r3.z, c2.z +mad.f32 r3.x, r3.w, r5.z, r3.x +mul.f r3.w, c8.z, r4.z +mad.f32 r5.w, c0.y, r2.y, r5.w +mad.f32 r0.x, r3.z, r0.y, r0.x +mov.f32f32 r0.y, c3.z +mad.f32 r3.z, c9.z, r4.w, r3.w +mul.f r2.z, r2.z, r3.y +(ss)mad.f32 r3.y, c10.z, r5.x, r3.z +mad.f32 r0.x, r0.y, r0.z, r0.x +mad.f32 r0.y, c11.z, r5.y, r3.y +mad.f32 r3.y, c0.z, r2.z, r5.w +mov.f32f32 r0.z, r2.y +mov.f32f32 r2.y, r0.x +mov.f32f32 r3.z, r0.y +mov.f32f32 r3.w, r3.y mov.f32f32 r2.z, r2.z -mad.f32 r0.z, c5.w, r1.y, r0.z -mad.f32 r3.z, r0.x, r0.x, r3.z -mad.f32 r0.z, c6.w, r1.z, r0.z -mad.f32 r3.x, c5.z, r1.y, r3.x -mul.f r4.z, c4.y, r1.x -mul.f r1.x, c4.x, r1.x -mad.f32 r0.z, c7.w, r1.w, r0.z -mad.f32 r3.x, c6.z, r1.z, r3.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -rsq r2.w, r2.w -(ss)mov.f32f32 r4.w, r2.w -mad.f32 r3.w, c0.z, r2.z, r3.w -(ss)mov.f32f32 r2.w, r0.z -mul.f r0.x, r0.x, r3.z -mul.f r2.x, r2.x, r4.w -mul.f r4.y, r4.y, r3.z -mul.f r3.y, r3.y, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r4.y -mov.f32f32 r3.y, r3.y -mul.f r4.y, r0.x, r2.y -absneg.f r2.x, (neg)r2.x -mul.f r6.x, c0.x, r3.z -mad.f32 r4.y, r3.y, r2.z, (neg)r4.y -mad.f32 r6.x, c0.y, r3.y, r6.x -mov.f32f32 r6.y, r2.x -mul.f r2.x, r3.z, r2.z -mov.f32f32 r2.z, r4.y -mov.f32f32 r4.y, r6.x -mul.f r6.x, r6.y, r6.y -mul.f r4.x, r4.x, r4.w -mul.f r2.z, c0.x, r2.z -mad.f32 r2.x, r0.x, r0.w, (neg)r2.x -mad.f32 r0.x, c0.z, r0.x, r4.y +mad.f32 r2.y, r2.y, r2.y, r2.w +mad.f32 r0.y, r0.y, r3.z, r3.x +mul.f r3.x, r3.w, r3.w +mul.f r2.w, c4.w, r4.z +mul.f r3.w, c4.z, r4.z +mul.f r5.w, c4.y, r4.z +mul.f r4.z, c4.x, r4.z +rsq r2.y, r2.y +(ss)mov.f32f32 r6.x, r2.y +rsq r0.y, r0.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r2.x, r0.y +mul.f r0.x, r0.x, r2.y +mul.f r2.x, r4.y, r6.x +mul.f r2.y, r5.z, r6.y +mul.f r4.x, r4.x, r6.x +absneg.f r4.y, (neg)r0.y +mov.f32f32 r0.y, r2.x +mov.f32f32 r5.z, r0.x +absneg.f r2.y, (neg)r2.y +mul.f r6.x, c0.x, r4.x mov.f32f32 r4.x, r4.x -mul.f r0.w, r3.y, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -absneg.f r3.y, (neg)r4.x -mad.f32 r0.w, r3.z, r2.y, (neg)r0.w -mad.f32 r2.x, c0.y, r2.x, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.x, r3.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.w -mul.f r0.y, r0.y, r4.w -mad.f32 r0.w, c0.z, r0.w, r2.x -mad.f32 r2.x, r4.x, r4.x, r6.x -mul.f r2.y, r3.y, r3.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c7.z, r1.w, r3.x -mad.f32 r3.z, c5.y, r1.y, r4.z -mov.f32f32 r3.w, r0.w -absneg.f r0.y, (neg)r0.y -mov.f32f32 r2.z, r3.x -mad.f32 r0.w, c6.y, r1.z, r3.z -mad.f32 r2.y, r3.w, r3.w, r2.y -mov.f32f32 r3.z, r0.y -mad.f32 r0.y, c7.y, r1.w, r0.w -mad.f32 r0.w, c5.x, r1.y, r1.x -mov.f32f32 r1.x, r2.y -mad.f32 r1.y, r3.z, r3.z, r2.x -mad.f32 r1.x, r0.x, r0.x, r1.x +mul.f r6.z, r5.z, r0.z +mov.f32f32 r6.w, r2.y +mad.f32 r6.z, r0.y, r2.z, (neg)r6.z +mov.f32f32 r7.x, r4.y +mad.f32 r2.x, c0.y, r2.x, r6.x +mul.f r2.z, r4.x, r2.z +mul.f r6.x, c0.x, r6.z +mad.f32 r2.z, r5.z, r0.w, (neg)r2.z +mul.f r5.z, r7.x, r7.x +mad.f32 r0.x, c0.z, r0.x, r2.x +mad.f32 r2.x, r2.y, r6.w, r5.z +mad.f32 r2.y, c0.y, r2.z, r6.x +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r3.z, r6.y +mad.f32 r0.y, r4.x, r0.z, (neg)r0.y +mov.f32f32 r0.z, r0.x +mad.f32 r2.z, c5.w, r4.w, r2.w +mad.f32 r2.w, c5.z, r4.w, r3.w +mad.f32 r0.y, c0.z, r0.y, r2.y +absneg.f r2.y, (neg)r0.w +mov.f32f32 r3.z, r0.z +mad.f32 r0.z, c6.w, r5.x, r2.z +mov.f32f32 r2.z, r0.y +mov.f32f32 r4.x, r2.y +mad.f32 r0.w, c7.w, r5.y, r0.z +mad.f32 r0.z, c6.z, r5.x, r2.w +mov.f32f32 r5.z, r2.z +mad.f32 r2.x, r2.y, r4.x, r2.x +mov.f32f32 r2.w, r0.w +mad.f32 r0.z, c7.z, r5.y, r0.z +mad.f32 r0.y, r0.y, r5.z, r3.x +mad.f32 r2.y, c5.y, r4.w, r5.w +mad.f32 r0.x, r0.x, r3.z, r0.y +mov.f32f32 r2.z, r0.z +mad.f32 r0.y, c6.y, r5.x, r2.y +mad.f32 r2.y, c5.x, r4.w, r4.z +mad.f32 r0.y, c7.y, r5.y, r0.y +mad.f32 r4.z, c6.x, r5.x, r2.y +nop +rsq r0.x, r0.x +(ss)mov.f32f32 r2.y, r0.x +mul.f r3.x, r3.y, r0.x +(ss)rsq r0.x, r2.x +(ss)mov.f32f32 r2.x, r0.x +mul.f r3.w, r4.y, r0.x +mul.f r3.z, r3.z, r2.y +mul.f r3.y, r5.z, r2.y +mul.f r4.y, r4.x, r2.x +mul.f r4.x, r6.w, r2.x mov.f32f32 r2.y, r0.y -mad.f32 r1.z, c6.x, r1.z, r0.w -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c7.x, r1.w, r1.z -rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.x, r1.z -mul.f r0.x, r0.x, r1.x -mul.f r3.x, r3.w, r1.x -mul.f r1.x, r3.y, r1.x -mul.f r3.y, r3.z, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r3.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r4.y, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x +mad.f32 r0.x, c7.x, r5.y, r4.z +mov.f32f32 r4.z, r4.y +mov.f32f32 r4.w, (0.000000) nop -mov.f32f32 r3.z, r0.x -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r1.x -mov.f32f32 r0.x, r4.y -mul.f r1.x, r4.x, r1.y -mul.f r1.y, r6.y, r1.y -mov.f32f32 r4.z, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r1.z -mov.f32f32 r4.w, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.y -mov.f32f32 r1.w, r5.w -mov.f32f32 r1.z, r5.z -mov.f32f32 r4.x, r1.x -mov.f32f32 r1.y, r5.y -mov.f32f32 r1.x, r5.x +mov.f32f32 r2.x, r0.x end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0) -; VERT: 183 instructions, 0 half, 7 full +; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) +; VERT: 133 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-33.asm b/reference/stk-mines/stk-mines-33.asm index 1435d60..141701a 100644 --- a/reference/stk-mines/stk-mines-33.asm +++ b/reference/stk-mines/stk-mines-33.asm @@ -6,275 +6,191 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x absneg.f r0.w, (neg)c6.x -mov.f32f32 r1.x, c3.x -bary.f r1.y, 1, r0.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x -add.f r2.x, c7.x, r0.w -add.f r0.z, r0.z, r1.x -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, c9.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.x -add.f r1.x, r1.y, c3.y -mov.f32f32 r2.w, r1.z -add.f r1.z, r1.y, c4.y -rcp r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -bary.f r3.y, 6, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -add.f r0.w, r3.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r0.z -mov.f32f32 r2.z, r1.x -mul.f r0.z, r0.w, r1.w -mov.f32f32 r3.x, r1.z -add.f r0.w, r1.y, r2.x -cmps.f.lt r1.x, c6.x, r3.y -cmps.f.lt r1.y, c7.x, r3.y -bary.f r1.z, 11, r0.x -sam (f32)(xyz)r1.w, r2.y, s#0, t#0 -(sy)mad.f32 r2.x, c8.x, r2.x, c8.y -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(xyz)r2.z, r2.w, s#1, t#1 -(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y -mad.f32 r2.z, c8.x, r2.z, c8.y -mov.f32f32 r2.x, r2.x -mul.f r3.y, r0.z, c5.w -mul.f r3.w, r0.z, c5.z -mul.f r4.x, r0.z, c5.y -mul.f r2.x, c8.z, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, c8.x, r1.w, c8.y -mul.f r2.w, c8.z, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, r2.z, r2.z -mov.f32f32 r4.x, r4.x -mul.f r4.z, r1.w, r1.w -mov.f32f32 r2.w, r2.w -mad.f32 r4.z, r2.x, r2.x, r4.z -mul.f r4.w, r0.z, c5.x -add.f r0.z, c10.x, (neg)r0.z -mad.f32 r3.w, r2.w, r2.w, r3.w -mov.f32f32 r4.z, r4.z +bary.f r1.x, 6, r0.x +mov.f32f32 r1.y, c3.x +add.f r1.z, r0.z, c4.x +bary.f r2.x, 1, r0.x +add.f r2.y, r0.z, c3.x +add.f r2.w, c7.x, r0.w +cmps.f.lt r3.x, c6.x, r1.x +add.f r1.w, r2.x, c4.y +add.f r2.z, r2.x, c3.y +cmps.f.lt r3.y, c7.x, r1.x +add.f r3.z, r0.z, r1.y +cov.u32f32 r0.z, r3.x +rcp r1.y, r2.w +add.f r0.w, r1.x, r0.w +cov.u32f32 r1.x, r3.y +sam (f32)(xyz)r3.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y +sam (f32)(xyz)r2.y, r2.y, s#0, t#0 +(sy)mad.f32 r1.w, c8.x, r2.z, c8.y +(ss)mul.f r0.w, r0.w, r1.y +mad.f32 r1.y, c8.x, r3.w, c8.y +mul.f r1.z, c8.z, r1.z +mul.f r1.w, c8.z, r1.w +(ss)mov.f32f32 r2.z, r0.w +mov.f32f32 r3.x, r1.y +mov.f32f32 r3.y, r1.z +mov.f32f32 r4.x, r1.w mad.f32 r2.y, c8.x, r2.y, c8.y -mov.f32f32 r4.w, r4.w -mad.f32 r3.x, c8.x, r3.x, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r3.w mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r2.y, r2.y, r4.z -cov.u32f32 r1.x, r1.x -cov.u32f32 r1.y, r1.y -(rpt3)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.y, r3.y +mul.f r4.z, r2.z, c5.w +mul.f r4.w, r2.z, c5.z +mul.f r1.y, r1.y, r3.x +mov.f32f32 r5.x, r2.y +mad.f32 r1.y, r1.z, r3.y, r1.y +mad.f32 r1.z, c8.x, r4.y, c8.y +mul.f r4.y, r2.z, c5.y +mul.f r2.z, r2.z, c5.x +mul.f r2.y, r2.y, r5.x +mov.f32f32 r3.w, r1.z +mad.f32 r1.w, r1.w, r4.x, r2.y +mad.f32 r2.y, c8.x, r2.w, c8.y +add.f r0.w, c10.x, (neg)r0.w +mov.f32f32 r2.w, r3.w +cmps.f.ne r0.z, r0.z, c9.x cmps.f.ne r1.x, r1.x, c9.x -mul.f r1.w, r1.w, r4.z -mul.f r2.x, r2.x, r4.z -mul.f r2.y, r2.y, r4.z -mad.f32 r4.z, r3.x, r3.x, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r0.w -cmps.f.ne r0.w, r1.y, c9.x -mov.f32f32 r1.y, r1.z -rsq r1.z, r4.z -(ss)mov.f32f32 r1.z, r1.z -(ss)bary.f r4.z, 12, r0.x -bary.f r5.x, 13, r0.x +mov.f32f32 r3.w, c9.x +mad.f32 r1.y, r2.w, r2.w, r1.y +mov.f32f32 r2.w, r2.y mov.f32f32 r5.y, c9.x -mad.f32 r1.w, r2.z, r1.z, r1.w -mad.f32 r2.x, r2.w, r1.z, r2.x -mad.f32 r1.z, r3.x, r1.z, r2.y -sam (f32)(xyzw)r2.y, r3.z, s#2, t#2 -(ss)mov.f32f32 r3.z, r4.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r5.x -mul.f r1.w, r1.w, c8.w -mul.f r2.x, r2.x, c8.w -mul.f r1.z, r1.z, c8.w -nop -mov.f32f32 r1.w, r1.w -bary.f r4.z, 8, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.x, c9.x -mul.f r5.z, r1.w, r4.z -bary.f r5.w, 9, r0.x -mul.f r6.x, r4.z, r1.w -mov.f32f32 r6.y, c5.w -mov.f32f32 r6.z, c5.z -mad.f32 r5.z, r2.x, r5.w, r5.z -mad.f32 r6.x, r5.w, r2.x, r6.x -mov.f32f32 r6.w, c5.y +mov.f32f32 r5.z, c9.x +mov.f32f32 r5.w, c5.w +mov.f32f32 r6.x, c5.z +mov.f32f32 r6.y, c5.y +rsq r1.y, r1.y +(ss)mov.f32f32 r6.z, r1.y +mad.f32 r1.w, r2.w, r2.w, r1.w +add.f r3.w, r2.x, r3.w +bary.f r2.x, 11, r0.x +bary.f r2.w, 12, r0.x +bary.f r6.w, 13, r0.x +bary.f r7.x, 8, r0.x +bary.f r7.y, 9, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +(ss)mul.f r1.w, r2.y, r1.w +sam (f32)(xyzw)r7.w, r3.z, s#2, t#2 bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r5.z -mov.f32f32 r5.z, r6.x -mov.f32f32 r6.x, c5.x -mad.f32 r0.y, r1.z, r0.x, r0.y -mad.f32 r5.z, r0.x, r1.z, r5.z -(rpt1)nop -mul.f r1.w, r0.y, r1.w -max.f r5.z, r5.z, c9.x -mul.f r2.x, r0.y, r2.x -mul.f r0.y, r0.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r1.z, c8.x, r1.z -mad.f32 r1.w, c9.y, r1.w, c9.z -mul.f r2.x, c8.x, r2.x -mul.f r0.y, c8.x, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -add.f r1.z, r4.z, (neg)r1.z -(sy)mul.f r3.x, r3.x, r1.w -mul.f r2.w, r2.w, r1.w -mul.f r2.z, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r7.x, r2.z -mul.f r7.y, r1.z, r1.z -add.f r2.x, r5.w, (neg)r2.x -add.f r4.z, r4.z, r5.x -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.y, r1.w -mov.f32f32 r1.w, r2.x -add.f r2.x, r4.z, r5.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r4.z, r1.w, r1.w, r7.y +mov.f32f32 r0.y, c5.x +mul.f r2.y, r5.x, r7.z +(ss)mul.f r3.z, r4.x, r7.z +mad.f32 r2.y, r3.x, r6.z, r2.y +mad.f32 r3.x, r3.y, r6.z, r3.z +mad.f32 r1.y, r1.z, r1.y, r1.w +nop +mul.f r1.z, r2.y, c8.w +mul.f r1.w, r3.x, c8.w +mul.f r1.y, r1.y, c8.w +nop +mov.f32f32 r2.y, r1.z +mul.f r1.z, r7.x, r1.z +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.y, r1.y +mul.f r3.z, r2.y, r7.x +mad.f32 r1.z, r7.y, r1.w, r1.z +mad.f32 r1.w, r3.x, r7.y, r3.z +mad.f32 r1.y, r0.x, r1.y, r1.z +mad.f32 r1.z, r3.y, r0.x, r1.w (rpt2)nop -mov.f32f32 r4.z, r4.z -nop -mad.f32 r4.z, r0.x, r0.x, r4.z -(rpt5)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z +mul.f r1.w, r1.z, r2.y +max.f r1.y, r1.y, c9.x +mul.f r2.y, r1.z, r3.x +mul.f r1.z, r1.z, r3.y +mul.f r1.w, c8.x, r1.w +mad.f32 r1.y, c9.y, r1.y, c9.z +mul.f r2.y, c8.x, r2.y +mul.f r1.z, c8.x, r1.z +add.f r1.w, r7.x, (neg)r1.w +mov.f32f32 r3.x, r1.y +add.f r2.y, r7.y, (neg)r2.y +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r3.y, r8.z, r3.x +mov.f32f32 r3.z, r2.y +mov.f32f32 r3.w, r0.x +mul.f r1.w, r1.w, r1.z +add.f r4.x, r3.y, r5.z +mad.f32 r1.w, r2.y, r3.z, r1.w +mul.f r2.y, r8.y, r3.x +mad.f32 r1.w, r3.w, r3.w, r1.w +add.f r3.w, r4.x, r5.y +mul.f r3.x, r8.x, r3.x +mul.f r1.y, r7.w, r1.y (rpt2)nop -mul.f r1.z, r1.z, r4.z -mul.f r1.w, r1.w, r4.z -mul.f r0.x, r0.x, r4.z -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r4.x, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r4.x +(ss)mul.f r1.w, r3.z, r4.x +(rpt1)nop +mul.f r1.z, r1.z, r2.x nop -mul.f r1.y, r1.z, r1.y +mad.f32 r1.z, r1.w, r2.w, r1.z nop -mad.f32 r1.y, r1.w, r3.z, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -nop -mad.f32 r0.x, r0.x, r3.w, r1.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r6.w, r1.z (rpt2)nop max.f r0.x, r0.x, c9.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x mov.f32f32 r1.z, r0.x -cmps.f.lt r0.x, c9.x, r0.x -(rpt1)nop -mul.f r1.y, r1.y, r1.z -cov.u32f32 r0.x, r0.x +(rpt2)nop +mul.f r0.x, r0.x, r1.z +cmps.f.lt r1.z, c9.x, r1.z (rpt1)nop -mov.f32f32 r1.y, r1.y -cmps.f.ne r0.x, r0.x, c9.x +mov.f32f32 r1.w, r0.x +cov.u32f32 r1.z, r1.z (rpt1)nop -mul.f r1.y, r1.y, r1.y -sel.b32 r1.z, r2.x, r0.x, r3.x +mul.f r0.x, r0.x, r1.w +cmps.f.ne r1.z, r1.z, c9.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.z +mov.f32f32 r1.w, r0.x +sel.b32 r2.x, r3.w, r1.z, r3.y +mul.f r0.x, r0.x, c9.w +nop +mul.f r1.w, r1.w, r1.w +mad.f32 r2.w, r0.w, r2.x, r4.z +add.f r3.y, r2.y, r0.x +add.f r3.z, r3.x, r0.x +mul.f r1.w, r1.w, c9.z +add.f r0.x, r1.y, r0.x +sel.b32 r2.x, r2.w, r0.z, r2.x +nop +mov.f32f32 r2.w, r1.w +add.f r0.x, r0.x, r1.w +sel.b32 r1.w, r5.w, r1.x, r2.x +nop +add.f r2.x, r3.y, r2.w +add.f r2.w, r3.z, r2.w +sel.b32 r0.x, r0.x, r1.z, r1.y +nop +sel.b32 r1.y, r2.x, r1.z, r2.y +sel.b32 r1.z, r2.w, r1.z, r3.x +mad.f32 r2.x, r0.w, r0.x, r2.z +nop +mad.f32 r2.y, r0.w, r1.y, r4.w +mad.f32 r0.w, r0.w, r1.z, r4.y +sel.b32 r0.x, r2.x, r0.z, r0.x +nop +sel.b32 r1.y, r2.y, r0.z, r1.y +sel.b32 r0.z, r0.w, r0.z, r1.z (rpt1)nop -mul.f r2.x, r1.y, r1.y -mov.f32f32 r1.w, r1.w -mul.f r1.y, r1.y, c9.w -nop -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, r0.z, r1.w, r3.y -mov.f32f32 r3.x, r1.y -mov.f32f32 r3.y, r1.y -mul.f r2.x, r2.x, c9.z -sel.b32 r1.z, r1.w, r1.x, r1.z -add.f r1.w, r5.z, r3.x -add.f r3.x, r7.x, r3.y -mov.f32f32 r2.x, r2.x -sel.b32 r1.z, r6.y, r0.w, r1.z -mov.f32f32 r1.y, r1.y -nop -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, r2.x -add.f r1.y, r2.y, r1.y -add.f r2.y, r1.w, r3.y -add.f r3.x, r3.x, r3.z -mov.f32f32 r1.w, r1.z -nop -sel.b32 r1.z, r2.y, r0.x, r2.w -sel.b32 r2.y, r3.x, r0.x, r2.z -add.f r1.y, r1.y, r2.x -nop -mov.f32f32 r2.x, r1.z -mov.f32f32 r2.z, r2.y -sel.b32 r0.x, r1.y, r0.x, r0.y -nop -mov.f32f32 r0.y, r2.x -mov.f32f32 r1.y, r2.z -mov.f32f32 r2.x, r0.x -nop -mad.f32 r0.y, r0.z, r0.y, r4.y -mad.f32 r1.y, r0.z, r1.y, r4.x -mov.f32f32 r2.x, r2.x -nop -sel.b32 r0.y, r0.y, r1.x, r1.z -sel.b32 r1.y, r1.y, r1.x, r2.y -mad.f32 r0.z, r0.z, r2.x, r4.w +sel.b32 r1.z, r6.x, r1.x, r1.y +sel.b32 r1.y, r6.y, r1.x, r0.z +sel.b32 r1.x, r0.y, r1.x, r0.x +end nop -sel.b32 r0.y, r6.z, r0.w, r0.y -sel.b32 r1.y, r6.w, r0.w, r1.y -sel.b32 r0.x, r0.z, r1.x, r0.x nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r6.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x -end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) -; FRAG: 297 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) +; FRAG: 193 instructions, 0 half, 9 full diff --git a/reference/stk-mines/stk-mines-34.asm b/reference/stk-mines/stk-mines-34.asm index 2d84780..726bef5 100644 --- a/reference/stk-mines/stk-mines-34.asm +++ b/reference/stk-mines/stk-mines-34.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r4.z) in0 +@in(r4.w) in1 +@in(r5.x) in2 +@in(r5.y) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r5.x) in8 -@in(r5.y) in9 -@in(r5.z) in10 -@in(r5.w) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,191 +31,144 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.w, c1.x -mul.f r2.x, c8.x, r1.x +mul.f r2.x, c8.x, r4.z mov.f32f32 r2.y, c1.y mov.f32f32 r2.z, c1.z mul.f r2.w, r0.w, r0.x mov.f32f32 r3.x, c2.x -mad.f32 r2.x, c9.x, r1.y, r2.x +mad.f32 r2.x, c9.x, r4.w, r2.x mul.f r3.y, r0.w, r0.w -mad.f32 r2.x, c10.x, r1.z, r2.x +mad.f32 r2.x, c10.x, r5.x, r2.x mad.f32 r2.w, r3.x, r0.y, r2.w -mad.f32 r2.x, c11.x, r1.w, r2.x -mad.f32 r3.x, r2.y, r2.y, r3.y -mul.f r3.y, r2.y, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, c3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x +mov.f32f32 r3.x, c3.x +mad.f32 r2.x, c11.x, r5.y, r2.x +mad.f32 r3.y, r2.y, r2.y, r3.y +mul.f r3.z, r2.y, r0.x +mad.f32 r2.w, r3.x, r0.z, r2.w +mov.f32f32 r3.x, r2.x +mad.f32 r3.y, r2.z, r2.z, r3.y mov.f32f32 r3.w, c2.y -mad.f32 r2.w, r3.z, r0.z, r2.w -mul.f r3.z, r2.x, r2.x -mul.f r4.x, c8.y, r1.x -mad.f32 r3.x, r2.z, r2.z, r3.x -mov.f32f32 r4.y, r2.w -mad.f32 r2.w, c9.y, r1.y, r4.x -mad.f32 r3.y, r3.w, r0.y, r3.y -mul.f r0.x, r2.z, r0.x -mul.f r3.w, r4.y, r4.y -mad.f32 r2.w, c10.y, r1.z, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, c3.y -mad.f32 r2.w, c11.y, r1.w, r2.w -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r4.z, c2.z -mad.f32 r3.y, r4.x, r0.z, r3.y mov.f32f32 r4.x, r2.w -mul.f r0.w, r0.w, r3.x -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r2.w, r4.x, r4.x, r3.z +mul.f r3.x, r3.x, r3.x +mul.f r4.y, c8.y, r4.z +mad.f32 r3.z, r3.w, r0.y, r3.z +mul.f r2.w, r2.w, r4.x +mov.f32f32 r3.w, c3.y +mad.f32 r4.y, c9.y, r4.w, r4.y +rsq r3.y, r3.y +(ss)mov.f32f32 r5.z, r3.y +mad.f32 r4.y, c10.y, r5.x, r4.y +mad.f32 r3.z, r3.w, r0.z, r3.z +mad.f32 r3.w, c11.y, r5.y, r4.y +mul.f r0.w, r0.w, r5.z +mul.f r2.y, r2.y, r5.z +mov.f32f32 r4.y, r3.z +mov.f32f32 r5.z, r3.w +mul.f r5.w, c0.x, r0.w mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r3.y, r3.y, r3.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, c8.z, r1.x -mul.f r4.w, c0.x, r0.w -mov.f32f32 r3.z, r3.z -mad.f32 r0.x, r4.z, r0.y, r0.x -mad.f32 r0.y, c9.z, r1.y, r3.w -mad.f32 r3.w, c0.y, r2.y, r4.w -mad.f32 r0.y, c10.z, r1.z, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, c3.z -mad.f32 r0.y, c11.z, r1.w, r0.y -mov.f32f32 r3.w, r3.w -mul.f r2.z, r2.z, r3.x -mad.f32 r0.x, r4.z, r0.z, r0.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, c4.w, r1.x -mul.f r3.x, c4.z, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r2.w, r0.y, r0.y, r2.w +mad.f32 r2.w, r3.z, r4.y, r2.w +mul.f r0.x, r2.z, r0.x +mov.f32f32 r3.z, c2.z +mad.f32 r3.x, r3.w, r5.z, r3.x +mul.f r3.w, c8.z, r4.z +mad.f32 r5.w, c0.y, r2.y, r5.w +mad.f32 r0.x, r3.z, r0.y, r0.x +mov.f32f32 r0.y, c3.z +mad.f32 r3.z, c9.z, r4.w, r3.w +mul.f r2.z, r2.z, r3.y +(ss)mad.f32 r3.y, c10.z, r5.x, r3.z +mad.f32 r0.x, r0.y, r0.z, r0.x +mad.f32 r0.y, c11.z, r5.y, r3.y +mad.f32 r3.y, c0.z, r2.z, r5.w +mov.f32f32 r0.z, r2.y +mov.f32f32 r2.y, r0.x +mov.f32f32 r3.z, r0.y +mov.f32f32 r3.w, r3.y mov.f32f32 r2.z, r2.z -mad.f32 r0.z, c5.w, r1.y, r0.z -mad.f32 r3.z, r0.x, r0.x, r3.z -mad.f32 r0.z, c6.w, r1.z, r0.z -mad.f32 r3.x, c5.z, r1.y, r3.x -mul.f r4.z, c4.y, r1.x -mul.f r1.x, c4.x, r1.x -mad.f32 r0.z, c7.w, r1.w, r0.z -mad.f32 r3.x, c6.z, r1.z, r3.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -rsq r2.w, r2.w -(ss)mov.f32f32 r4.w, r2.w -mad.f32 r3.w, c0.z, r2.z, r3.w -(ss)mov.f32f32 r2.w, r0.z -mul.f r0.x, r0.x, r3.z -mul.f r2.x, r2.x, r4.w -mul.f r4.y, r4.y, r3.z -mul.f r3.y, r3.y, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r4.y -mov.f32f32 r3.y, r3.y -mul.f r4.y, r0.x, r2.y -absneg.f r2.x, (neg)r2.x -mul.f r6.x, c0.x, r3.z -mad.f32 r4.y, r3.y, r2.z, (neg)r4.y -mad.f32 r6.x, c0.y, r3.y, r6.x -mov.f32f32 r6.y, r2.x -mul.f r2.x, r3.z, r2.z -mov.f32f32 r2.z, r4.y -mov.f32f32 r4.y, r6.x -mul.f r6.x, r6.y, r6.y -mul.f r4.x, r4.x, r4.w -mul.f r2.z, c0.x, r2.z -mad.f32 r2.x, r0.x, r0.w, (neg)r2.x -mad.f32 r0.x, c0.z, r0.x, r4.y +mad.f32 r2.y, r2.y, r2.y, r2.w +mad.f32 r0.y, r0.y, r3.z, r3.x +mul.f r3.x, r3.w, r3.w +mul.f r2.w, c4.w, r4.z +mul.f r3.w, c4.z, r4.z +mul.f r5.w, c4.y, r4.z +mul.f r4.z, c4.x, r4.z +rsq r2.y, r2.y +(ss)mov.f32f32 r6.x, r2.y +rsq r0.y, r0.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r2.x, r0.y +mul.f r0.x, r0.x, r2.y +mul.f r2.x, r4.y, r6.x +mul.f r2.y, r5.z, r6.y +mul.f r4.x, r4.x, r6.x +absneg.f r4.y, (neg)r0.y +mov.f32f32 r0.y, r2.x +mov.f32f32 r5.z, r0.x +absneg.f r2.y, (neg)r2.y +mul.f r6.x, c0.x, r4.x mov.f32f32 r4.x, r4.x -mul.f r0.w, r3.y, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -absneg.f r3.y, (neg)r4.x -mad.f32 r0.w, r3.z, r2.y, (neg)r0.w -mad.f32 r2.x, c0.y, r2.x, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.x, r3.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.w -mul.f r0.y, r0.y, r4.w -mad.f32 r0.w, c0.z, r0.w, r2.x -mad.f32 r2.x, r4.x, r4.x, r6.x -mul.f r2.y, r3.y, r3.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c7.z, r1.w, r3.x -mad.f32 r3.z, c5.y, r1.y, r4.z -mov.f32f32 r3.w, r0.w -absneg.f r0.y, (neg)r0.y -mov.f32f32 r2.z, r3.x -mad.f32 r0.w, c6.y, r1.z, r3.z -mad.f32 r2.y, r3.w, r3.w, r2.y -mov.f32f32 r3.z, r0.y -mad.f32 r0.y, c7.y, r1.w, r0.w -mad.f32 r0.w, c5.x, r1.y, r1.x -mov.f32f32 r1.x, r2.y -mad.f32 r1.y, r3.z, r3.z, r2.x -mad.f32 r1.x, r0.x, r0.x, r1.x +mul.f r6.z, r5.z, r0.z +mov.f32f32 r6.w, r2.y +mad.f32 r6.z, r0.y, r2.z, (neg)r6.z +mov.f32f32 r7.x, r4.y +mad.f32 r2.x, c0.y, r2.x, r6.x +mul.f r2.z, r4.x, r2.z +mul.f r6.x, c0.x, r6.z +mad.f32 r2.z, r5.z, r0.w, (neg)r2.z +mul.f r5.z, r7.x, r7.x +mad.f32 r0.x, c0.z, r0.x, r2.x +mad.f32 r2.x, r2.y, r6.w, r5.z +mad.f32 r2.y, c0.y, r2.z, r6.x +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r3.z, r6.y +mad.f32 r0.y, r4.x, r0.z, (neg)r0.y +mov.f32f32 r0.z, r0.x +mad.f32 r2.z, c5.w, r4.w, r2.w +mad.f32 r2.w, c5.z, r4.w, r3.w +mad.f32 r0.y, c0.z, r0.y, r2.y +absneg.f r2.y, (neg)r0.w +mov.f32f32 r3.z, r0.z +mad.f32 r0.z, c6.w, r5.x, r2.z +mov.f32f32 r2.z, r0.y +mov.f32f32 r4.x, r2.y +mad.f32 r0.w, c7.w, r5.y, r0.z +mad.f32 r0.z, c6.z, r5.x, r2.w +mov.f32f32 r5.z, r2.z +mad.f32 r2.x, r2.y, r4.x, r2.x +mov.f32f32 r2.w, r0.w +mad.f32 r0.z, c7.z, r5.y, r0.z +mad.f32 r0.y, r0.y, r5.z, r3.x +mad.f32 r2.y, c5.y, r4.w, r5.w +mad.f32 r0.x, r0.x, r3.z, r0.y +mov.f32f32 r2.z, r0.z +mad.f32 r0.y, c6.y, r5.x, r2.y +mad.f32 r2.y, c5.x, r4.w, r4.z +mad.f32 r0.y, c7.y, r5.y, r0.y +mad.f32 r4.z, c6.x, r5.x, r2.y +nop +rsq r0.x, r0.x +(ss)mov.f32f32 r2.y, r0.x +mul.f r3.x, r3.y, r0.x +(ss)rsq r0.x, r2.x +(ss)mov.f32f32 r2.x, r0.x +mul.f r3.w, r4.y, r0.x +mul.f r3.z, r3.z, r2.y +mul.f r3.y, r5.z, r2.y +mul.f r4.y, r4.x, r2.x +mul.f r4.x, r6.w, r2.x mov.f32f32 r2.y, r0.y -mad.f32 r1.z, c6.x, r1.z, r0.w -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c7.x, r1.w, r1.z -rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.x, r1.z -mul.f r0.x, r0.x, r1.x -mul.f r3.x, r3.w, r1.x -mul.f r1.x, r3.y, r1.x -mul.f r3.y, r3.z, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r3.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r4.y, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x +mad.f32 r0.x, c7.x, r5.y, r4.z +mov.f32f32 r4.z, r4.y +mov.f32f32 r4.w, (0.000000) nop -mov.f32f32 r3.z, r0.x -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r1.x -mov.f32f32 r0.x, r4.y -mul.f r1.x, r4.x, r1.y -mul.f r1.y, r6.y, r1.y -mov.f32f32 r4.z, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r1.z -mov.f32f32 r4.w, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.y -mov.f32f32 r1.w, r5.w -mov.f32f32 r1.z, r5.z -mov.f32f32 r4.x, r1.x -mov.f32f32 r1.y, r5.y -mov.f32f32 r1.x, r5.x +mov.f32f32 r2.x, r0.x end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0) -; VERT: 183 instructions, 0 half, 7 full +; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) +; VERT: 133 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-35.asm b/reference/stk-mines/stk-mines-35.asm index 242def2..693d844 100644 --- a/reference/stk-mines/stk-mines-35.asm +++ b/reference/stk-mines/stk-mines-35.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @@ -11,10 +11,10 @@ @in(r3.y) in9 @in(r3.z) in10 @in(r3.w) in11 -@in(r4.x) in12 -@in(r4.y) in13 -@in(r4.z) in14 -@in(r4.w) in15 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,135 +27,120 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r0.w, r2.x, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.y, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.z, r0.w -mov.f32f32 r1.y, r3.w -mad.f32 r0.w, c15.x, r2.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r1.x, c12.x +mul.f r4.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r1.y, r0.w +mad.f32 r4.x, c4.y, r0.y, r4.x +mad.f32 r0.w, c14.x, r1.z, r0.w +mad.f32 r4.x, c4.z, r0.z, r4.x +mad.f32 r4.y, c15.x, r1.w, r0.w +mul.f r0.w, r1.x, c12.y +mul.f r4.z, r1.x, c12.z +mul.f r4.w, r1.x, c0.w +mul.f r5.x, r4.y, r4.y +mad.f32 r0.w, c13.y, r1.y, r0.w +mul.f r5.y, r4.x, c10.x +mad.f32 r0.w, c14.y, r1.z, r0.w +mul.f r5.z, r0.x, c5.x +mad.f32 r5.w, c15.y, r1.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r5.z +mad.f32 r4.z, c13.z, r1.y, r4.z +mad.f32 r4.w, c1.w, r1.y, r4.w +mad.f32 r5.x, r5.w, r5.w, r5.x +mad.f32 r4.z, c14.z, r1.z, r4.z +mad.f32 r5.z, c5.z, r0.z, r0.w +mad.f32 r4.z, c15.z, r1.w, r4.z +mad.f32 r0.w, c2.w, r1.z, r4.w +mul.f r4.w, r1.x, c0.z +mul.f r6.x, r1.x, c0.y +mad.f32 r5.x, r4.z, r4.z, r5.x +mad.f32 r5.y, c10.y, r5.z, r5.y mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r3.w, r2.x, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r3.w, c13.y, r2.y, r3.w -mov.f32f32 r1.y, r1.y -mad.f32 r3.w, c14.y, r2.z, r3.w -mul.f r5.x, r1.x, c10.x -mad.f32 r3.w, c15.y, r2.w, r3.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c19.x +mad.f32 r0.w, c3.w, r1.w, r0.w +mad.f32 r4.w, c1.z, r1.y, r4.w +mad.f32 r6.x, c1.y, r1.y, r6.x +mul.f r1.x, r1.x, c0.x +rsq r5.x, (abs)r5.x +(ss)mov.f32f32 r6.y, r5.x +mul.f r4.z, r4.z, r5.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r3.w, r3.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r2.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r2.y, r1.z -mad.f32 r5.x, c10.y, r1.y, r5.x -mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r0.y, c2.z, r1.z, r4.w +mul.f r4.y, r4.y, r6.y +mul.f r4.w, r5.w, r6.y +(rpt1)nop +add.f r4.y, c10.x, (neg)r4.y +add.f r4.w, c10.y, (neg)r4.w +add.f r4.z, c10.z, (neg)r4.z mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r2.w, r1.z -mov.f32f32 r1.z, r5.x -mul.f r5.x, r2.x, c0.w -mul.f r5.y, r2.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r5.x, c1.w, r2.y, r5.x -mad.f32 r5.y, c1.z, r2.y, r5.y -mul.f r5.z, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -mul.f r5.w, c16.z, r3.z +(ss)mul.f r5.x, r4.y, r4.y +mad.f32 r0.z, c3.z, r1.w, r0.y +mad.f32 r0.y, r4.w, r4.w, r5.x +mad.f32 r5.x, c10.z, r0.x, r5.y +mad.f32 r0.y, r4.z, r4.z, r0.y +mad.f32 r5.y, c2.y, r1.z, r6.x +mad.f32 r1.x, c1.x, r1.y, r1.x +max.f r1.y, r3.w, c19.x +mul.f r3.w, c16.z, r3.z +mul.f r5.w, c16.y, r3.y +mul.f r6.x, c16.x, r3.x rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r6.x, r1.z, c19.x -cmps.f.lt r1.z, (neg)r1.z, c19.x -mad.f32 r5.x, c2.w, r2.z, r5.x -mul.f r0.w, r0.w, r0.y -mul.f r3.w, r3.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r6.x, r6.x -add.f r0.z, c10.x, (neg)r0.w -add.f r3.w, c10.y, (neg)r3.w -add.f r0.y, c10.z, (neg)r0.y -mad.f32 r0.w, c8.z, r3.z, c9.z -mul.f r6.y, r0.z, r0.z -mul.f r6.z, c16.y, r3.y -mad.f32 r6.y, r3.w, r3.w, r6.y -add.f r5.w, r5.w, r0.w -mad.f32 r0.w, c8.y, r3.y, c9.y -mul.f r6.w, c16.x, r3.x -mov.f32f32 r6.y, r6.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r4.z, r0.y +max.f r4.z, r5.x, c19.x +mad.f32 r6.z, c8.x, r3.x, c9.x +mul.f r4.y, r4.y, r6.y +mul.f r4.w, r4.w, r6.y +mov.f32f32 r6.y, r4.z +mad.f32 r6.w, c8.y, r3.y, c9.y +mul.f r4.x, r4.x, r4.y +mad.f32 r4.y, c8.z, r3.z, c9.z +mad.f32 r4.x, r5.z, r4.w, r4.x +add.f r4.w, r5.w, r6.w +mad.f32 r0.x, r0.x, r0.y, r4.x +add.f r3.w, r3.w, r4.y +mul.f r0.y, c17.y, r3.y +add.f r3.y, r6.x, r6.z +max.f r0.x, r0.x, c19.x mul.f r3.z, c17.z, r3.z -mad.f32 r6.y, r0.y, r0.y, r6.y -add.f r6.z, r6.z, r0.w -mad.f32 r7.x, c8.x, r3.x, c9.x -mad.f32 r0.w, c3.w, r2.w, r5.x -mad.f32 r5.x, c2.z, r2.z, r5.y -mad.f32 r5.y, c1.y, r2.y, r5.z -mad.f32 r2.x, c1.x, r2.y, r2.x -rsq r2.y, (abs)r6.y -(ss)mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r6.x, r3.z, r5.w -mul.f r3.y, c17.y, r3.y -add.f r5.z, r6.w, r7.x -mul.f r0.z, r0.z, r2.y -mul.f r3.w, r3.w, r2.y -mul.f r0.y, r0.y, r2.y -mad.f32 r3.y, r6.x, r3.y, r6.z -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.x, r3.x -mad.f32 r1.y, r1.y, r3.w, r0.z -mad.f32 r0.z, c3.z, r2.w, r5.x -mad.f32 r2.y, c2.y, r2.z, r5.y -mad.f32 r2.x, c2.x, r2.z, r2.x -mov.f32f32 r1.y, r1.y -mad.f32 r1.x, r6.x, r1.x, r5.z -mad.f32 r1.y, r0.x, r0.y, r1.y -mad.f32 r0.y, c3.y, r2.w, r2.y -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r3.x, c7.x -max.f r1.y, r1.y, c19.x -mov.f32f32 r2.w, r4.w -mov.f32f32 r2.z, r4.z -mov.f32f32 r2.y, r4.y -mov.f32f32 r2.x, r4.x +mad.f32 r4.x, r6.y, r0.y, r4.w +mul.f r3.x, c17.x, r3.x +cmps.f.lt r4.y, (neg)r5.x, c19.x +mad.f32 r0.y, c3.y, r1.w, r5.y +mad.f32 r1.x, c2.x, r1.z, r1.x +log2 r1.z, r0.x +mov.f32f32 r4.w, c7.x +mad.f32 r3.z, r6.y, r3.z, r3.w +mad.f32 r3.x, r4.z, r3.x, r3.y +(ss)mad.f32 r0.x, c3.x, r1.w, r1.x +min.f r1.x, r4.w, c19.z +min.f r1.w, r1.y, c19.y (rpt1)nop -log2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -min.f r3.x, r3.x, c19.z -(rpt2)nop -mul.f r1.y, r3.x, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +(ss)mul.f r1.x, r1.x, r1.z (rpt5)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r1.z, c19.x -(rpt2)nop -mov.f32f32 r1.y, r1.y +exp2 r1.x, r1.x +(ss)sel.b32 r1.x, r1.x, r4.y, c19.x (rpt2)nop +mov.f32f32 r1.y, r1.x +mad.f32 r1.x, c18.x, r1.x, r3.x +(rpt1)nop mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.z, c18.z, r1.y, r3.z -mad.f32 r3.x, c18.y, r1.y, r3.y -mad.f32 r1.x, c18.x, r1.y, r1.x -nop -max.f r1.y, r1.z, c19.x -max.f r3.x, r3.x, c19.x max.f r1.x, r1.x, c19.x -nop -min.f r1.z, r1.y, c19.y -min.f r1.y, r3.x, c19.y +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r3.z +mad.f32 r1.y, c18.y, r1.y, r4.x min.f r1.x, r1.x, c19.y +nop +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0) -; VERT: 145 instructions, 0 half, 8 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) +; VERT: 121 instructions, 0 half, 7 full diff --git a/reference/stk-mines/stk-mines-36.asm b/reference/stk-mines/stk-mines-36.asm index 1435d60..141701a 100644 --- a/reference/stk-mines/stk-mines-36.asm +++ b/reference/stk-mines/stk-mines-36.asm @@ -6,275 +6,191 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x absneg.f r0.w, (neg)c6.x -mov.f32f32 r1.x, c3.x -bary.f r1.y, 1, r0.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x -add.f r2.x, c7.x, r0.w -add.f r0.z, r0.z, r1.x -mov.f32f32 r1.x, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, c9.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.x -add.f r1.x, r1.y, c3.y -mov.f32f32 r2.w, r1.z -add.f r1.z, r1.y, c4.y -rcp r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -bary.f r3.y, 6, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -add.f r0.w, r3.y, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.z, r0.z -mov.f32f32 r2.z, r1.x -mul.f r0.z, r0.w, r1.w -mov.f32f32 r3.x, r1.z -add.f r0.w, r1.y, r2.x -cmps.f.lt r1.x, c6.x, r3.y -cmps.f.lt r1.y, c7.x, r3.y -bary.f r1.z, 11, r0.x -sam (f32)(xyz)r1.w, r2.y, s#0, t#0 -(sy)mad.f32 r2.x, c8.x, r2.x, c8.y -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(xyz)r2.z, r2.w, s#1, t#1 -(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y -mad.f32 r2.z, c8.x, r2.z, c8.y -mov.f32f32 r2.x, r2.x -mul.f r3.y, r0.z, c5.w -mul.f r3.w, r0.z, c5.z -mul.f r4.x, r0.z, c5.y -mul.f r2.x, c8.z, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, c8.x, r1.w, c8.y -mul.f r2.w, c8.z, r2.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, r2.z, r2.z -mov.f32f32 r4.x, r4.x -mul.f r4.z, r1.w, r1.w -mov.f32f32 r2.w, r2.w -mad.f32 r4.z, r2.x, r2.x, r4.z -mul.f r4.w, r0.z, c5.x -add.f r0.z, c10.x, (neg)r0.z -mad.f32 r3.w, r2.w, r2.w, r3.w -mov.f32f32 r4.z, r4.z +bary.f r1.x, 6, r0.x +mov.f32f32 r1.y, c3.x +add.f r1.z, r0.z, c4.x +bary.f r2.x, 1, r0.x +add.f r2.y, r0.z, c3.x +add.f r2.w, c7.x, r0.w +cmps.f.lt r3.x, c6.x, r1.x +add.f r1.w, r2.x, c4.y +add.f r2.z, r2.x, c3.y +cmps.f.lt r3.y, c7.x, r1.x +add.f r3.z, r0.z, r1.y +cov.u32f32 r0.z, r3.x +rcp r1.y, r2.w +add.f r0.w, r1.x, r0.w +cov.u32f32 r1.x, r3.y +sam (f32)(xyz)r3.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y +sam (f32)(xyz)r2.y, r2.y, s#0, t#0 +(sy)mad.f32 r1.w, c8.x, r2.z, c8.y +(ss)mul.f r0.w, r0.w, r1.y +mad.f32 r1.y, c8.x, r3.w, c8.y +mul.f r1.z, c8.z, r1.z +mul.f r1.w, c8.z, r1.w +(ss)mov.f32f32 r2.z, r0.w +mov.f32f32 r3.x, r1.y +mov.f32f32 r3.y, r1.z +mov.f32f32 r4.x, r1.w mad.f32 r2.y, c8.x, r2.y, c8.y -mov.f32f32 r4.w, r4.w -mad.f32 r3.x, c8.x, r3.x, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r3.w mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w -mad.f32 r4.z, r2.y, r2.y, r4.z -cov.u32f32 r1.x, r1.x -cov.u32f32 r1.y, r1.y -(rpt3)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.y, r3.y +mul.f r4.z, r2.z, c5.w +mul.f r4.w, r2.z, c5.z +mul.f r1.y, r1.y, r3.x +mov.f32f32 r5.x, r2.y +mad.f32 r1.y, r1.z, r3.y, r1.y +mad.f32 r1.z, c8.x, r4.y, c8.y +mul.f r4.y, r2.z, c5.y +mul.f r2.z, r2.z, c5.x +mul.f r2.y, r2.y, r5.x +mov.f32f32 r3.w, r1.z +mad.f32 r1.w, r1.w, r4.x, r2.y +mad.f32 r2.y, c8.x, r2.w, c8.y +add.f r0.w, c10.x, (neg)r0.w +mov.f32f32 r2.w, r3.w +cmps.f.ne r0.z, r0.z, c9.x cmps.f.ne r1.x, r1.x, c9.x -mul.f r1.w, r1.w, r4.z -mul.f r2.x, r2.x, r4.z -mul.f r2.y, r2.y, r4.z -mad.f32 r4.z, r3.x, r3.x, r3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r0.w -cmps.f.ne r0.w, r1.y, c9.x -mov.f32f32 r1.y, r1.z -rsq r1.z, r4.z -(ss)mov.f32f32 r1.z, r1.z -(ss)bary.f r4.z, 12, r0.x -bary.f r5.x, 13, r0.x +mov.f32f32 r3.w, c9.x +mad.f32 r1.y, r2.w, r2.w, r1.y +mov.f32f32 r2.w, r2.y mov.f32f32 r5.y, c9.x -mad.f32 r1.w, r2.z, r1.z, r1.w -mad.f32 r2.x, r2.w, r1.z, r2.x -mad.f32 r1.z, r3.x, r1.z, r2.y -sam (f32)(xyzw)r2.y, r3.z, s#2, t#2 -(ss)mov.f32f32 r3.z, r4.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.w, r5.x -mul.f r1.w, r1.w, c8.w -mul.f r2.x, r2.x, c8.w -mul.f r1.z, r1.z, c8.w -nop -mov.f32f32 r1.w, r1.w -bary.f r4.z, 8, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.x, c9.x -mul.f r5.z, r1.w, r4.z -bary.f r5.w, 9, r0.x -mul.f r6.x, r4.z, r1.w -mov.f32f32 r6.y, c5.w -mov.f32f32 r6.z, c5.z -mad.f32 r5.z, r2.x, r5.w, r5.z -mad.f32 r6.x, r5.w, r2.x, r6.x -mov.f32f32 r6.w, c5.y +mov.f32f32 r5.z, c9.x +mov.f32f32 r5.w, c5.w +mov.f32f32 r6.x, c5.z +mov.f32f32 r6.y, c5.y +rsq r1.y, r1.y +(ss)mov.f32f32 r6.z, r1.y +mad.f32 r1.w, r2.w, r2.w, r1.w +add.f r3.w, r2.x, r3.w +bary.f r2.x, 11, r0.x +bary.f r2.w, 12, r0.x +bary.f r6.w, 13, r0.x +bary.f r7.x, 8, r0.x +bary.f r7.y, 9, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r7.z, r1.w +(ss)mul.f r1.w, r2.y, r1.w +sam (f32)(xyzw)r7.w, r3.z, s#2, t#2 bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r5.z -mov.f32f32 r5.z, r6.x -mov.f32f32 r6.x, c5.x -mad.f32 r0.y, r1.z, r0.x, r0.y -mad.f32 r5.z, r0.x, r1.z, r5.z -(rpt1)nop -mul.f r1.w, r0.y, r1.w -max.f r5.z, r5.z, c9.x -mul.f r2.x, r0.y, r2.x -mul.f r0.y, r0.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r1.z, c8.x, r1.z -mad.f32 r1.w, c9.y, r1.w, c9.z -mul.f r2.x, c8.x, r2.x -mul.f r0.y, c8.x, r0.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -add.f r1.z, r4.z, (neg)r1.z -(sy)mul.f r3.x, r3.x, r1.w -mul.f r2.w, r2.w, r1.w -mul.f r2.z, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.z, r3.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r7.x, r2.z -mul.f r7.y, r1.z, r1.z -add.f r2.x, r5.w, (neg)r2.x -add.f r4.z, r4.z, r5.x -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.y, r1.w -mov.f32f32 r1.w, r2.x -add.f r2.x, r4.z, r5.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mad.f32 r4.z, r1.w, r1.w, r7.y +mov.f32f32 r0.y, c5.x +mul.f r2.y, r5.x, r7.z +(ss)mul.f r3.z, r4.x, r7.z +mad.f32 r2.y, r3.x, r6.z, r2.y +mad.f32 r3.x, r3.y, r6.z, r3.z +mad.f32 r1.y, r1.z, r1.y, r1.w +nop +mul.f r1.z, r2.y, c8.w +mul.f r1.w, r3.x, c8.w +mul.f r1.y, r1.y, c8.w +nop +mov.f32f32 r2.y, r1.z +mul.f r1.z, r7.x, r1.z +mov.f32f32 r3.x, r1.w +mov.f32f32 r3.y, r1.y +mul.f r3.z, r2.y, r7.x +mad.f32 r1.z, r7.y, r1.w, r1.z +mad.f32 r1.w, r3.x, r7.y, r3.z +mad.f32 r1.y, r0.x, r1.y, r1.z +mad.f32 r1.z, r3.y, r0.x, r1.w (rpt2)nop -mov.f32f32 r4.z, r4.z -nop -mad.f32 r4.z, r0.x, r0.x, r4.z -(rpt5)nop -rsq r4.z, r4.z -(ss)mov.f32f32 r4.z, r4.z +mul.f r1.w, r1.z, r2.y +max.f r1.y, r1.y, c9.x +mul.f r2.y, r1.z, r3.x +mul.f r1.z, r1.z, r3.y +mul.f r1.w, c8.x, r1.w +mad.f32 r1.y, c9.y, r1.y, c9.z +mul.f r2.y, c8.x, r2.y +mul.f r1.z, c8.x, r1.z +add.f r1.w, r7.x, (neg)r1.w +mov.f32f32 r3.x, r1.y +add.f r2.y, r7.y, (neg)r2.y +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r3.y, r8.z, r3.x +mov.f32f32 r3.z, r2.y +mov.f32f32 r3.w, r0.x +mul.f r1.w, r1.w, r1.z +add.f r4.x, r3.y, r5.z +mad.f32 r1.w, r2.y, r3.z, r1.w +mul.f r2.y, r8.y, r3.x +mad.f32 r1.w, r3.w, r3.w, r1.w +add.f r3.w, r4.x, r5.y +mul.f r3.x, r8.x, r3.x +mul.f r1.y, r7.w, r1.y (rpt2)nop -mul.f r1.z, r1.z, r4.z -mul.f r1.w, r1.w, r4.z -mul.f r0.x, r0.x, r4.z -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r4.x, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r4.x +(ss)mul.f r1.w, r3.z, r4.x +(rpt1)nop +mul.f r1.z, r1.z, r2.x nop -mul.f r1.y, r1.z, r1.y +mad.f32 r1.z, r1.w, r2.w, r1.z nop -mad.f32 r1.y, r1.w, r3.z, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -nop -mad.f32 r0.x, r0.x, r3.w, r1.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r6.w, r1.z (rpt2)nop max.f r0.x, r0.x, c9.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.y, r0.x mov.f32f32 r1.z, r0.x -cmps.f.lt r0.x, c9.x, r0.x -(rpt1)nop -mul.f r1.y, r1.y, r1.z -cov.u32f32 r0.x, r0.x +(rpt2)nop +mul.f r0.x, r0.x, r1.z +cmps.f.lt r1.z, c9.x, r1.z (rpt1)nop -mov.f32f32 r1.y, r1.y -cmps.f.ne r0.x, r0.x, c9.x +mov.f32f32 r1.w, r0.x +cov.u32f32 r1.z, r1.z (rpt1)nop -mul.f r1.y, r1.y, r1.y -sel.b32 r1.z, r2.x, r0.x, r3.x +mul.f r0.x, r0.x, r1.w +cmps.f.ne r1.z, r1.z, c9.x (rpt1)nop -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.z +mov.f32f32 r1.w, r0.x +sel.b32 r2.x, r3.w, r1.z, r3.y +mul.f r0.x, r0.x, c9.w +nop +mul.f r1.w, r1.w, r1.w +mad.f32 r2.w, r0.w, r2.x, r4.z +add.f r3.y, r2.y, r0.x +add.f r3.z, r3.x, r0.x +mul.f r1.w, r1.w, c9.z +add.f r0.x, r1.y, r0.x +sel.b32 r2.x, r2.w, r0.z, r2.x +nop +mov.f32f32 r2.w, r1.w +add.f r0.x, r0.x, r1.w +sel.b32 r1.w, r5.w, r1.x, r2.x +nop +add.f r2.x, r3.y, r2.w +add.f r2.w, r3.z, r2.w +sel.b32 r0.x, r0.x, r1.z, r1.y +nop +sel.b32 r1.y, r2.x, r1.z, r2.y +sel.b32 r1.z, r2.w, r1.z, r3.x +mad.f32 r2.x, r0.w, r0.x, r2.z +nop +mad.f32 r2.y, r0.w, r1.y, r4.w +mad.f32 r0.w, r0.w, r1.z, r4.y +sel.b32 r0.x, r2.x, r0.z, r0.x +nop +sel.b32 r1.y, r2.y, r0.z, r1.y +sel.b32 r0.z, r0.w, r0.z, r1.z (rpt1)nop -mul.f r2.x, r1.y, r1.y -mov.f32f32 r1.w, r1.w -mul.f r1.y, r1.y, c9.w -nop -mov.f32f32 r2.x, r2.x -mad.f32 r1.w, r0.z, r1.w, r3.y -mov.f32f32 r3.x, r1.y -mov.f32f32 r3.y, r1.y -mul.f r2.x, r2.x, c9.z -sel.b32 r1.z, r1.w, r1.x, r1.z -add.f r1.w, r5.z, r3.x -add.f r3.x, r7.x, r3.y -mov.f32f32 r2.x, r2.x -sel.b32 r1.z, r6.y, r0.w, r1.z -mov.f32f32 r1.y, r1.y -nop -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, r2.x -add.f r1.y, r2.y, r1.y -add.f r2.y, r1.w, r3.y -add.f r3.x, r3.x, r3.z -mov.f32f32 r1.w, r1.z -nop -sel.b32 r1.z, r2.y, r0.x, r2.w -sel.b32 r2.y, r3.x, r0.x, r2.z -add.f r1.y, r1.y, r2.x -nop -mov.f32f32 r2.x, r1.z -mov.f32f32 r2.z, r2.y -sel.b32 r0.x, r1.y, r0.x, r0.y -nop -mov.f32f32 r0.y, r2.x -mov.f32f32 r1.y, r2.z -mov.f32f32 r2.x, r0.x -nop -mad.f32 r0.y, r0.z, r0.y, r4.y -mad.f32 r1.y, r0.z, r1.y, r4.x -mov.f32f32 r2.x, r2.x -nop -sel.b32 r0.y, r0.y, r1.x, r1.z -sel.b32 r1.y, r1.y, r1.x, r2.y -mad.f32 r0.z, r0.z, r2.x, r4.w +sel.b32 r1.z, r6.x, r1.x, r1.y +sel.b32 r1.y, r6.y, r1.x, r0.z +sel.b32 r1.x, r0.y, r1.x, r0.x +end nop -sel.b32 r0.y, r6.z, r0.w, r0.y -sel.b32 r1.y, r6.w, r0.w, r1.y -sel.b32 r0.x, r0.z, r1.x, r0.x nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r1.y -sel.b32 r0.x, r6.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x -end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) -; FRAG: 297 instructions, 0 half, 8 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1) +; FRAG: 193 instructions, 0 half, 9 full diff --git a/reference/stk-mines/stk-mines-37.asm b/reference/stk-mines/stk-mines-37.asm index 2d84780..726bef5 100644 --- a/reference/stk-mines/stk-mines-37.asm +++ b/reference/stk-mines/stk-mines-37.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r4.z) in0 +@in(r4.w) in1 +@in(r5.x) in2 +@in(r5.y) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r5.x) in8 -@in(r5.y) in9 -@in(r5.z) in10 -@in(r5.w) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -31,191 +31,144 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.w, c1.x -mul.f r2.x, c8.x, r1.x +mul.f r2.x, c8.x, r4.z mov.f32f32 r2.y, c1.y mov.f32f32 r2.z, c1.z mul.f r2.w, r0.w, r0.x mov.f32f32 r3.x, c2.x -mad.f32 r2.x, c9.x, r1.y, r2.x +mad.f32 r2.x, c9.x, r4.w, r2.x mul.f r3.y, r0.w, r0.w -mad.f32 r2.x, c10.x, r1.z, r2.x +mad.f32 r2.x, c10.x, r5.x, r2.x mad.f32 r2.w, r3.x, r0.y, r2.w -mad.f32 r2.x, c11.x, r1.w, r2.x -mad.f32 r3.x, r2.y, r2.y, r3.y -mul.f r3.y, r2.y, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, c3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x +mov.f32f32 r3.x, c3.x +mad.f32 r2.x, c11.x, r5.y, r2.x +mad.f32 r3.y, r2.y, r2.y, r3.y +mul.f r3.z, r2.y, r0.x +mad.f32 r2.w, r3.x, r0.z, r2.w +mov.f32f32 r3.x, r2.x +mad.f32 r3.y, r2.z, r2.z, r3.y mov.f32f32 r3.w, c2.y -mad.f32 r2.w, r3.z, r0.z, r2.w -mul.f r3.z, r2.x, r2.x -mul.f r4.x, c8.y, r1.x -mad.f32 r3.x, r2.z, r2.z, r3.x -mov.f32f32 r4.y, r2.w -mad.f32 r2.w, c9.y, r1.y, r4.x -mad.f32 r3.y, r3.w, r0.y, r3.y -mul.f r0.x, r2.z, r0.x -mul.f r3.w, r4.y, r4.y -mad.f32 r2.w, c10.y, r1.z, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, c3.y -mad.f32 r2.w, c11.y, r1.w, r2.w -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r4.z, c2.z -mad.f32 r3.y, r4.x, r0.z, r3.y mov.f32f32 r4.x, r2.w -mul.f r0.w, r0.w, r3.x -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r2.w, r4.x, r4.x, r3.z +mul.f r3.x, r3.x, r3.x +mul.f r4.y, c8.y, r4.z +mad.f32 r3.z, r3.w, r0.y, r3.z +mul.f r2.w, r2.w, r4.x +mov.f32f32 r3.w, c3.y +mad.f32 r4.y, c9.y, r4.w, r4.y +rsq r3.y, r3.y +(ss)mov.f32f32 r5.z, r3.y +mad.f32 r4.y, c10.y, r5.x, r4.y +mad.f32 r3.z, r3.w, r0.z, r3.z +mad.f32 r3.w, c11.y, r5.y, r4.y +mul.f r0.w, r0.w, r5.z +mul.f r2.y, r2.y, r5.z +mov.f32f32 r4.y, r3.z +mov.f32f32 r5.z, r3.w +mul.f r5.w, c0.x, r0.w mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r3.y, r3.y, r3.w -mov.f32f32 r2.w, r2.w -mul.f r3.w, c8.z, r1.x -mul.f r4.w, c0.x, r0.w -mov.f32f32 r3.z, r3.z -mad.f32 r0.x, r4.z, r0.y, r0.x -mad.f32 r0.y, c9.z, r1.y, r3.w -mad.f32 r3.w, c0.y, r2.y, r4.w -mad.f32 r0.y, c10.z, r1.z, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.z, c3.z -mad.f32 r0.y, c11.z, r1.w, r0.y -mov.f32f32 r3.w, r3.w -mul.f r2.z, r2.z, r3.x -mad.f32 r0.x, r4.z, r0.z, r0.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, c4.w, r1.x -mul.f r3.x, c4.z, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r2.w, r0.y, r0.y, r2.w +mad.f32 r2.w, r3.z, r4.y, r2.w +mul.f r0.x, r2.z, r0.x +mov.f32f32 r3.z, c2.z +mad.f32 r3.x, r3.w, r5.z, r3.x +mul.f r3.w, c8.z, r4.z +mad.f32 r5.w, c0.y, r2.y, r5.w +mad.f32 r0.x, r3.z, r0.y, r0.x +mov.f32f32 r0.y, c3.z +mad.f32 r3.z, c9.z, r4.w, r3.w +mul.f r2.z, r2.z, r3.y +(ss)mad.f32 r3.y, c10.z, r5.x, r3.z +mad.f32 r0.x, r0.y, r0.z, r0.x +mad.f32 r0.y, c11.z, r5.y, r3.y +mad.f32 r3.y, c0.z, r2.z, r5.w +mov.f32f32 r0.z, r2.y +mov.f32f32 r2.y, r0.x +mov.f32f32 r3.z, r0.y +mov.f32f32 r3.w, r3.y mov.f32f32 r2.z, r2.z -mad.f32 r0.z, c5.w, r1.y, r0.z -mad.f32 r3.z, r0.x, r0.x, r3.z -mad.f32 r0.z, c6.w, r1.z, r0.z -mad.f32 r3.x, c5.z, r1.y, r3.x -mul.f r4.z, c4.y, r1.x -mul.f r1.x, c4.x, r1.x -mad.f32 r0.z, c7.w, r1.w, r0.z -mad.f32 r3.x, c6.z, r1.z, r3.x -rsq r3.z, r3.z -(ss)mov.f32f32 r3.z, r3.z -rsq r2.w, r2.w -(ss)mov.f32f32 r4.w, r2.w -mad.f32 r3.w, c0.z, r2.z, r3.w -(ss)mov.f32f32 r2.w, r0.z -mul.f r0.x, r0.x, r3.z -mul.f r2.x, r2.x, r4.w -mul.f r4.y, r4.y, r3.z -mul.f r3.y, r3.y, r3.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r4.y -mov.f32f32 r3.y, r3.y -mul.f r4.y, r0.x, r2.y -absneg.f r2.x, (neg)r2.x -mul.f r6.x, c0.x, r3.z -mad.f32 r4.y, r3.y, r2.z, (neg)r4.y -mad.f32 r6.x, c0.y, r3.y, r6.x -mov.f32f32 r6.y, r2.x -mul.f r2.x, r3.z, r2.z -mov.f32f32 r2.z, r4.y -mov.f32f32 r4.y, r6.x -mul.f r6.x, r6.y, r6.y -mul.f r4.x, r4.x, r4.w -mul.f r2.z, c0.x, r2.z -mad.f32 r2.x, r0.x, r0.w, (neg)r2.x -mad.f32 r0.x, c0.z, r0.x, r4.y +mad.f32 r2.y, r2.y, r2.y, r2.w +mad.f32 r0.y, r0.y, r3.z, r3.x +mul.f r3.x, r3.w, r3.w +mul.f r2.w, c4.w, r4.z +mul.f r3.w, c4.z, r4.z +mul.f r5.w, c4.y, r4.z +mul.f r4.z, c4.x, r4.z +rsq r2.y, r2.y +(ss)mov.f32f32 r6.x, r2.y +rsq r0.y, r0.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r2.x, r0.y +mul.f r0.x, r0.x, r2.y +mul.f r2.x, r4.y, r6.x +mul.f r2.y, r5.z, r6.y +mul.f r4.x, r4.x, r6.x +absneg.f r4.y, (neg)r0.y +mov.f32f32 r0.y, r2.x +mov.f32f32 r5.z, r0.x +absneg.f r2.y, (neg)r2.y +mul.f r6.x, c0.x, r4.x mov.f32f32 r4.x, r4.x -mul.f r0.w, r3.y, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -absneg.f r3.y, (neg)r4.x -mad.f32 r0.w, r3.z, r2.y, (neg)r0.w -mad.f32 r2.x, c0.y, r2.x, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r4.x, r3.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.y, r3.w -mul.f r0.y, r0.y, r4.w -mad.f32 r0.w, c0.z, r0.w, r2.x -mad.f32 r2.x, r4.x, r4.x, r6.x -mul.f r2.y, r3.y, r3.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c7.z, r1.w, r3.x -mad.f32 r3.z, c5.y, r1.y, r4.z -mov.f32f32 r3.w, r0.w -absneg.f r0.y, (neg)r0.y -mov.f32f32 r2.z, r3.x -mad.f32 r0.w, c6.y, r1.z, r3.z -mad.f32 r2.y, r3.w, r3.w, r2.y -mov.f32f32 r3.z, r0.y -mad.f32 r0.y, c7.y, r1.w, r0.w -mad.f32 r0.w, c5.x, r1.y, r1.x -mov.f32f32 r1.x, r2.y -mad.f32 r1.y, r3.z, r3.z, r2.x -mad.f32 r1.x, r0.x, r0.x, r1.x +mul.f r6.z, r5.z, r0.z +mov.f32f32 r6.w, r2.y +mad.f32 r6.z, r0.y, r2.z, (neg)r6.z +mov.f32f32 r7.x, r4.y +mad.f32 r2.x, c0.y, r2.x, r6.x +mul.f r2.z, r4.x, r2.z +mul.f r6.x, c0.x, r6.z +mad.f32 r2.z, r5.z, r0.w, (neg)r2.z +mul.f r5.z, r7.x, r7.x +mad.f32 r0.x, c0.z, r0.x, r2.x +mad.f32 r2.x, r2.y, r6.w, r5.z +mad.f32 r2.y, c0.y, r2.z, r6.x +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r3.z, r6.y +mad.f32 r0.y, r4.x, r0.z, (neg)r0.y +mov.f32f32 r0.z, r0.x +mad.f32 r2.z, c5.w, r4.w, r2.w +mad.f32 r2.w, c5.z, r4.w, r3.w +mad.f32 r0.y, c0.z, r0.y, r2.y +absneg.f r2.y, (neg)r0.w +mov.f32f32 r3.z, r0.z +mad.f32 r0.z, c6.w, r5.x, r2.z +mov.f32f32 r2.z, r0.y +mov.f32f32 r4.x, r2.y +mad.f32 r0.w, c7.w, r5.y, r0.z +mad.f32 r0.z, c6.z, r5.x, r2.w +mov.f32f32 r5.z, r2.z +mad.f32 r2.x, r2.y, r4.x, r2.x +mov.f32f32 r2.w, r0.w +mad.f32 r0.z, c7.z, r5.y, r0.z +mad.f32 r0.y, r0.y, r5.z, r3.x +mad.f32 r2.y, c5.y, r4.w, r5.w +mad.f32 r0.x, r0.x, r3.z, r0.y +mov.f32f32 r2.z, r0.z +mad.f32 r0.y, c6.y, r5.x, r2.y +mad.f32 r2.y, c5.x, r4.w, r4.z +mad.f32 r0.y, c7.y, r5.y, r0.y +mad.f32 r4.z, c6.x, r5.x, r2.y +nop +rsq r0.x, r0.x +(ss)mov.f32f32 r2.y, r0.x +mul.f r3.x, r3.y, r0.x +(ss)rsq r0.x, r2.x +(ss)mov.f32f32 r2.x, r0.x +mul.f r3.w, r4.y, r0.x +mul.f r3.z, r3.z, r2.y +mul.f r3.y, r5.z, r2.y +mul.f r4.y, r4.x, r2.x +mul.f r4.x, r6.w, r2.x mov.f32f32 r2.y, r0.y -mad.f32 r1.z, c6.x, r1.z, r0.w -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r3.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c7.x, r1.w, r1.z -rsq r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, (0.000000) -mov.f32f32 r2.x, r1.z -mul.f r0.x, r0.x, r1.x -mul.f r3.x, r3.w, r1.x -mul.f r1.x, r3.y, r1.x -mul.f r3.y, r3.z, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r3.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r4.y, r3.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r1.x, r1.x +mad.f32 r0.x, c7.x, r5.y, r4.z +mov.f32f32 r4.z, r4.y +mov.f32f32 r4.w, (0.000000) nop -mov.f32f32 r3.z, r0.x -mov.f32f32 r3.y, r3.x -mov.f32f32 r3.x, r1.x -mov.f32f32 r0.x, r4.y -mul.f r1.x, r4.x, r1.y -mul.f r1.y, r6.y, r1.y -mov.f32f32 r4.z, r3.w -mov.f32f32 r4.y, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r1.z -mov.f32f32 r4.w, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.w, r1.y -mov.f32f32 r1.w, r5.w -mov.f32f32 r1.z, r5.z -mov.f32f32 r4.x, r1.x -mov.f32f32 r1.y, r5.y -mov.f32f32 r1.x, r5.x +mov.f32f32 r2.x, r0.x end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0) -; VERT: 183 instructions, 0 half, 7 full +; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) +; VERT: 133 instructions, 0 half, 8 full diff --git a/reference/stk-mines/stk-mines-38.asm b/reference/stk-mines/stk-mines-38.asm index ff486a7..d610a0c 100644 --- a/reference/stk-mines/stk-mines-38.asm +++ b/reference/stk-mines/stk-mines-38.asm @@ -6,39 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x -bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 1, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt4)nop -sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0 -(sy)mul.f r0.w, r0.w, r1.y -mul.f r0.z, r0.z, r1.z -mul.f r0.x, r0.y, r0.x -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt2)nop +sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)mul.f r1.z, r1.z, r2.x +mul.f r1.y, r1.y, r2.y +(ss)mul.f r1.x, r1.x, r0.x end -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 34 instructions, 0 half, 3 full +; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 14 instructions, 0 half, 3 full diff --git a/reference/stk/stk0100.asm b/reference/stk/stk0100.asm index 4a651b1..16ecc3c 100644 --- a/reference/stk/stk0100.asm +++ b/reference/stk/stk0100.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r2.z, r1.y -mul.f r0.w, r2.y, r1.z -mul.f r0.z, r2.x, r0.z -mul.f r0.x, r1.w, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.x, r1.y +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 27 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/stk/stk0101.asm b/reference/stk/stk0101.asm index c16817f..530b9e3 100644 --- a/reference/stk/stk0101.asm +++ b/reference/stk/stk0101.asm @@ -1,20 +1,20 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.w) in11 -@in(r3.x) in12 -@in(r3.y) in13 -@in(r3.z) in14 -@in(r3.w) in15 +@in(r3.x) in0 +@in(r3.y) in1 +@in(r3.z) in2 +@in(r3.w) in3 +@in(r4.w) in11 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @in(r0.x) in16 @in(r0.y) in17 @in(r0.z) in18 -@in(r1.x) in20 -@in(r1.y) in21 -@in(r1.z) in22 +@in(r0.w) in20 +@in(r1.x) in21 +@in(r1.y) in22 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,47 +27,40 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c7.z, r0.z, r1.z -mad.f32 r0.y, c7.y, r0.y, r1.y -mad.f32 r0.x, c7.x, r0.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c8.x +@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mad.f32 r0.z, c7.z, r0.z, r1.y +mad.f32 r0.y, c7.y, r0.y, r1.x +mad.f32 r0.x, c7.x, r0.x, r0.w +mul.f r0.w, r3.x, c0.w max.f r0.z, r0.z, c8.x max.f r0.y, r0.y, c8.x max.f r0.x, r0.x, c8.x -min.f r1.w, r0.w, c8.y +mad.f32 r0.w, c1.w, r3.y, r0.w min.f r1.z, r0.z, c8.y min.f r1.y, r0.y, c8.y min.f r1.x, r0.x, c8.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +mad.f32 r0.x, c2.w, r3.z, r0.w +mul.f r0.y, r3.x, c0.z +mad.f32 r0.w, c3.w, r3.w, r0.x +mad.f32 r0.x, c1.z, r3.y, r0.y +mul.f r0.y, r3.x, c0.y +mad.f32 r0.x, c2.z, r3.z, r0.x +mad.f32 r0.y, c1.y, r3.y, r0.y +mad.f32 r0.z, c3.z, r3.w, r0.x +mad.f32 r0.x, c2.y, r3.z, r0.y +mul.f r1.w, r3.x, c0.x +mad.f32 r0.y, c3.y, r3.w, r0.x +mad.f32 r0.x, c1.x, r3.y, r1.w +max.f r1.w, r4.w, c8.x +mad.f32 r0.x, c2.x, r3.z, r0.x +nop +mad.f32 r0.x, c3.x, r3.w, r0.x +min.f r1.w, r1.w, c8.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r63.w (0:0,cm=0,il=12,b=0) r0.x (0:0,cm=8,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) r0.x (0:0,cm=7,il=24,b=0) r1.x (0:0,cm=7,il=28,b=0) r63.w (0:0,cm=0,il=32,b=0) -; VERT: 37 instructions, 0 half, 4 full +; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r63.w (0:0,cm=0,il=12,b=0) r4.x (0:0,cm=8,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) r0.x (0:0,cm=7,il=24,b=0) r0.w (0:0,cm=7,il=28,b=0) r63.w (0:0,cm=0,il=32,b=0) +; VERT: 29 instructions, 0 half, 5 full diff --git a/reference/stk/stk0102.asm b/reference/stk/stk0102.asm index cca09e5..0583b5d 100644 --- a/reference/stk/stk0102.asm +++ b/reference/stk/stk0102.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r3.x) in4 +@in(r3.y) in5 +@in(r3.z) in6 +@in(r3.w) in7 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,43 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r3.w, c4.x +max.f r1.y, r3.z, c4.x +max.f r3.y, r3.y, c4.x +max.f r3.x, r3.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r3.y, c4.y +min.f r1.x, r3.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 33 instructions, 0 half, 4 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 25 instructions, 0 half, 4 full diff --git a/reference/stk/stk0200.asm b/reference/stk/stk0200.asm index ff486a7..d610a0c 100644 --- a/reference/stk/stk0200.asm +++ b/reference/stk/stk0200.asm @@ -6,39 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x -bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 1, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt4)nop -sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0 -(sy)mul.f r0.w, r0.w, r1.y -mul.f r0.z, r0.z, r1.z -mul.f r0.x, r0.y, r0.x -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt2)nop +sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)mul.f r1.z, r1.z, r2.x +mul.f r1.y, r1.y, r2.y +(ss)mul.f r1.x, r1.x, r0.x end -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 34 instructions, 0 half, 3 full +; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 14 instructions, 0 half, 3 full diff --git a/reference/stk/stk0301.asm b/reference/stk/stk0301.asm index 2c03e4f..284e180 100644 --- a/reference/stk/stk0301.asm +++ b/reference/stk/stk0301.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/stk/stk0302.asm b/reference/stk/stk0302.asm index 2c03e4f..284e180 100644 --- a/reference/stk/stk0302.asm +++ b/reference/stk/stk0302.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/stk/stk0303.asm b/reference/stk/stk0303.asm index cca09e5..0583b5d 100644 --- a/reference/stk/stk0303.asm +++ b/reference/stk/stk0303.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r3.x) in4 +@in(r3.y) in5 +@in(r3.z) in6 +@in(r3.w) in7 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,43 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r3.w, c4.x +max.f r1.y, r3.z, c4.x +max.f r3.y, r3.y, c4.x +max.f r3.x, r3.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r3.y, c4.y +min.f r1.x, r3.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 33 instructions, 0 half, 4 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 25 instructions, 0 half, 4 full diff --git a/reference/stk/stk0304.asm b/reference/stk/stk0304.asm index 6763f4d..c381d21 100644 --- a/reference/stk/stk0304.asm +++ b/reference/stk/stk0304.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -19,131 +19,120 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mul.f r0.w, r2.x, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.y, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.z, r0.w -mov.f32f32 r1.y, r3.w -mad.f32 r0.w, c15.x, r2.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r1.x, c12.x +mul.f r3.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r1.y, r0.w +mad.f32 r3.x, c4.y, r0.y, r3.x +mad.f32 r0.w, c14.x, r1.z, r0.w +mad.f32 r3.x, c4.z, r0.z, r3.x +mad.f32 r3.y, c15.x, r1.w, r0.w +mul.f r0.w, r1.x, c12.y +mul.f r3.z, r1.x, c12.z +mul.f r3.w, r1.x, c0.w +mul.f r4.x, r3.y, r3.y +mad.f32 r0.w, c13.y, r1.y, r0.w +mul.f r4.y, r3.x, c10.x +mad.f32 r0.w, c14.y, r1.z, r0.w +mul.f r4.z, r0.x, c5.x +mad.f32 r4.w, c15.y, r1.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r4.z +mad.f32 r3.z, c13.z, r1.y, r3.z +mad.f32 r3.w, c1.w, r1.y, r3.w +mad.f32 r4.x, r4.w, r4.w, r4.x +mad.f32 r3.z, c14.z, r1.z, r3.z +mad.f32 r4.z, c5.z, r0.z, r0.w +mad.f32 r3.z, c15.z, r1.w, r3.z +mad.f32 r0.w, c2.w, r1.z, r3.w +mul.f r3.w, r1.x, c0.z +mul.f r5.x, r1.x, c0.y +mad.f32 r4.x, r3.z, r3.z, r4.x +mad.f32 r4.y, c10.y, r4.z, r4.y mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r3.w, r2.x, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r3.w, c13.y, r2.y, r3.w -mov.f32f32 r1.y, r1.y -mad.f32 r3.w, c14.y, r2.z, r3.w -mul.f r4.x, r1.x, c10.x -mad.f32 r3.w, c15.y, r2.w, r3.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c19.x +mad.f32 r0.w, c3.w, r1.w, r0.w +mad.f32 r3.w, c1.z, r1.y, r3.w +mad.f32 r5.x, c1.y, r1.y, r5.x +mul.f r1.x, r1.x, c0.x +rsq r4.x, (abs)r4.x +(ss)mov.f32f32 r5.y, r4.x +mul.f r3.z, r3.z, r4.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r3.w, r3.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r2.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r2.y, r1.z -mad.f32 r4.x, c10.y, r1.y, r4.x -mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r0.y, c2.z, r1.z, r3.w +mul.f r3.y, r3.y, r5.y +mul.f r3.w, r4.w, r5.y +(rpt1)nop +add.f r3.y, c10.x, (neg)r3.y +add.f r3.w, c10.y, (neg)r3.w +add.f r3.z, c10.z, (neg)r3.z mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r2.w, r1.z -mov.f32f32 r1.z, r4.x -mul.f r4.x, r2.x, c0.w -mul.f r4.y, r2.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r4.x, c1.w, r2.y, r4.x -mad.f32 r4.y, c1.z, r2.y, r4.y -mul.f r4.z, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -mul.f r4.w, c16.z, r3.z +(ss)mul.f r4.x, r3.y, r3.y +mad.f32 r0.z, c3.z, r1.w, r0.y +mad.f32 r0.y, r3.w, r3.w, r4.x +mad.f32 r4.x, c10.z, r0.x, r4.y +mad.f32 r0.y, r3.z, r3.z, r0.y +mad.f32 r4.y, c2.y, r1.z, r5.x +mad.f32 r1.x, c1.x, r1.y, r1.x +max.f r1.y, r2.w, c19.x +mul.f r2.w, c16.z, r2.z +mul.f r4.w, c16.y, r2.y +mul.f r5.x, c16.x, r2.x rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r5.x, r1.z, c19.x -cmps.f.lt r1.z, (neg)r1.z, c19.x -mad.f32 r4.x, c2.w, r2.z, r4.x -mul.f r0.w, r0.w, r0.y -mul.f r3.w, r3.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r5.x, r5.x -add.f r0.z, c10.x, (neg)r0.w -add.f r3.w, c10.y, (neg)r3.w -add.f r0.y, c10.z, (neg)r0.y -mad.f32 r0.w, c8.z, r3.z, c9.z -mul.f r5.y, r0.z, r0.z -mul.f r5.z, c16.y, r3.y -mad.f32 r5.y, r3.w, r3.w, r5.y -add.f r4.w, r4.w, r0.w -mad.f32 r0.w, c8.y, r3.y, c9.y -mul.f r5.w, c16.x, r3.x -mov.f32f32 r5.y, r5.y -mul.f r3.z, c17.z, r3.z -mad.f32 r5.y, r0.y, r0.y, r5.y -add.f r5.z, r5.z, r0.w -mad.f32 r6.x, c8.x, r3.x, c9.x -mad.f32 r0.w, c3.w, r2.w, r4.x -mad.f32 r4.x, c2.z, r2.z, r4.y -mad.f32 r4.y, c1.y, r2.y, r4.z -mad.f32 r2.x, c1.x, r2.y, r2.x -rsq r2.y, (abs)r5.y -(ss)mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r5.x, r3.z, r4.w -mul.f r3.y, c17.y, r3.y -add.f r4.z, r5.w, r6.x -mul.f r0.z, r0.z, r2.y -mul.f r3.w, r3.w, r2.y -mul.f r0.y, r0.y, r2.y -mad.f32 r2.y, r5.x, r3.y, r5.z -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.x, r3.x -mad.f32 r1.y, r1.y, r3.w, r0.z -mad.f32 r0.z, c3.z, r2.w, r4.x -mad.f32 r3.x, c2.y, r2.z, r4.y -mad.f32 r2.x, c2.x, r2.z, r2.x -mov.f32f32 r1.y, r1.y -mad.f32 r1.x, r5.x, r1.x, r4.z -mad.f32 r1.y, r0.x, r0.y, r1.y -mad.f32 r0.y, c3.y, r2.w, r3.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.x, c7.x -max.f r1.y, r1.y, c19.x -(rpt5)nop -log2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -min.f r2.x, r2.x, c19.z -(rpt2)nop -mul.f r1.y, r2.x, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +(ss)mov.f32f32 r5.y, r0.y +(ss)mul.f r0.y, r3.z, r0.y +max.f r3.z, r4.x, c19.x +mad.f32 r5.z, c8.x, r2.x, c9.x +mul.f r3.y, r3.y, r5.y +mul.f r3.w, r3.w, r5.y +mov.f32f32 r5.y, r3.z +mad.f32 r5.w, c8.y, r2.y, c9.y +mul.f r3.x, r3.x, r3.y +mad.f32 r3.y, c8.z, r2.z, c9.z +mad.f32 r3.x, r4.z, r3.w, r3.x +add.f r3.w, r4.w, r5.w +mad.f32 r0.x, r0.x, r0.y, r3.x +add.f r2.w, r2.w, r3.y +mul.f r0.y, c17.y, r2.y +add.f r2.y, r5.x, r5.z +max.f r0.x, r0.x, c19.x +mul.f r2.z, c17.z, r2.z +mad.f32 r3.x, r5.y, r0.y, r3.w +mul.f r2.x, c17.x, r2.x +cmps.f.lt r3.y, (neg)r4.x, c19.x +mad.f32 r0.y, c3.y, r1.w, r4.y +mad.f32 r1.x, c2.x, r1.z, r1.x +log2 r1.z, r0.x +mov.f32f32 r3.w, c7.x +mad.f32 r2.z, r5.y, r2.z, r2.w +mad.f32 r2.x, r3.z, r2.x, r2.y +(ss)mad.f32 r0.x, c3.x, r1.w, r1.x +min.f r1.x, r3.w, c19.z +min.f r1.w, r1.y, c19.y +(rpt1)nop +(ss)mul.f r1.x, r1.x, r1.z (rpt5)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r1.z, c19.x -(rpt2)nop -mov.f32f32 r1.y, r1.y +exp2 r1.x, r1.x +(ss)sel.b32 r1.x, r1.x, r3.y, c19.x (rpt2)nop +mov.f32f32 r1.y, r1.x +mad.f32 r1.x, c18.x, r1.x, r2.x +(rpt1)nop mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.z, c18.z, r1.y, r3.z -mad.f32 r2.x, c18.y, r1.y, r2.y -mad.f32 r1.x, c18.x, r1.y, r1.x -nop -max.f r1.y, r1.z, c19.x -max.f r2.x, r2.x, c19.x max.f r1.x, r1.x, c19.x -nop -min.f r1.z, r1.y, c19.y -min.f r1.y, r2.x, c19.y +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r2.z +mad.f32 r1.y, c18.y, r1.y, r3.x min.f r1.x, r1.x, c19.y +nop +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 145 instructions, 0 half, 7 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 121 instructions, 0 half, 6 full diff --git a/reference/stk/stk0305.asm b/reference/stk/stk0305.asm index 242def2..693d844 100644 --- a/reference/stk/stk0305.asm +++ b/reference/stk/stk0305.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @@ -11,10 +11,10 @@ @in(r3.y) in9 @in(r3.z) in10 @in(r3.w) in11 -@in(r4.x) in12 -@in(r4.y) in13 -@in(r4.z) in14 -@in(r4.w) in15 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,135 +27,120 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r0.w, r2.x, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.y, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.z, r0.w -mov.f32f32 r1.y, r3.w -mad.f32 r0.w, c15.x, r2.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r1.x, c12.x +mul.f r4.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r1.y, r0.w +mad.f32 r4.x, c4.y, r0.y, r4.x +mad.f32 r0.w, c14.x, r1.z, r0.w +mad.f32 r4.x, c4.z, r0.z, r4.x +mad.f32 r4.y, c15.x, r1.w, r0.w +mul.f r0.w, r1.x, c12.y +mul.f r4.z, r1.x, c12.z +mul.f r4.w, r1.x, c0.w +mul.f r5.x, r4.y, r4.y +mad.f32 r0.w, c13.y, r1.y, r0.w +mul.f r5.y, r4.x, c10.x +mad.f32 r0.w, c14.y, r1.z, r0.w +mul.f r5.z, r0.x, c5.x +mad.f32 r5.w, c15.y, r1.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r5.z +mad.f32 r4.z, c13.z, r1.y, r4.z +mad.f32 r4.w, c1.w, r1.y, r4.w +mad.f32 r5.x, r5.w, r5.w, r5.x +mad.f32 r4.z, c14.z, r1.z, r4.z +mad.f32 r5.z, c5.z, r0.z, r0.w +mad.f32 r4.z, c15.z, r1.w, r4.z +mad.f32 r0.w, c2.w, r1.z, r4.w +mul.f r4.w, r1.x, c0.z +mul.f r6.x, r1.x, c0.y +mad.f32 r5.x, r4.z, r4.z, r5.x +mad.f32 r5.y, c10.y, r5.z, r5.y mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r3.w, r2.x, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r3.w, c13.y, r2.y, r3.w -mov.f32f32 r1.y, r1.y -mad.f32 r3.w, c14.y, r2.z, r3.w -mul.f r5.x, r1.x, c10.x -mad.f32 r3.w, c15.y, r2.w, r3.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c19.x +mad.f32 r0.w, c3.w, r1.w, r0.w +mad.f32 r4.w, c1.z, r1.y, r4.w +mad.f32 r6.x, c1.y, r1.y, r6.x +mul.f r1.x, r1.x, c0.x +rsq r5.x, (abs)r5.x +(ss)mov.f32f32 r6.y, r5.x +mul.f r4.z, r4.z, r5.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r3.w, r3.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r2.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r2.y, r1.z -mad.f32 r5.x, c10.y, r1.y, r5.x -mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r0.y, c2.z, r1.z, r4.w +mul.f r4.y, r4.y, r6.y +mul.f r4.w, r5.w, r6.y +(rpt1)nop +add.f r4.y, c10.x, (neg)r4.y +add.f r4.w, c10.y, (neg)r4.w +add.f r4.z, c10.z, (neg)r4.z mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r2.w, r1.z -mov.f32f32 r1.z, r5.x -mul.f r5.x, r2.x, c0.w -mul.f r5.y, r2.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r5.x, c1.w, r2.y, r5.x -mad.f32 r5.y, c1.z, r2.y, r5.y -mul.f r5.z, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -mul.f r5.w, c16.z, r3.z +(ss)mul.f r5.x, r4.y, r4.y +mad.f32 r0.z, c3.z, r1.w, r0.y +mad.f32 r0.y, r4.w, r4.w, r5.x +mad.f32 r5.x, c10.z, r0.x, r5.y +mad.f32 r0.y, r4.z, r4.z, r0.y +mad.f32 r5.y, c2.y, r1.z, r6.x +mad.f32 r1.x, c1.x, r1.y, r1.x +max.f r1.y, r3.w, c19.x +mul.f r3.w, c16.z, r3.z +mul.f r5.w, c16.y, r3.y +mul.f r6.x, c16.x, r3.x rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r6.x, r1.z, c19.x -cmps.f.lt r1.z, (neg)r1.z, c19.x -mad.f32 r5.x, c2.w, r2.z, r5.x -mul.f r0.w, r0.w, r0.y -mul.f r3.w, r3.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r6.x, r6.x -add.f r0.z, c10.x, (neg)r0.w -add.f r3.w, c10.y, (neg)r3.w -add.f r0.y, c10.z, (neg)r0.y -mad.f32 r0.w, c8.z, r3.z, c9.z -mul.f r6.y, r0.z, r0.z -mul.f r6.z, c16.y, r3.y -mad.f32 r6.y, r3.w, r3.w, r6.y -add.f r5.w, r5.w, r0.w -mad.f32 r0.w, c8.y, r3.y, c9.y -mul.f r6.w, c16.x, r3.x -mov.f32f32 r6.y, r6.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r4.z, r0.y +max.f r4.z, r5.x, c19.x +mad.f32 r6.z, c8.x, r3.x, c9.x +mul.f r4.y, r4.y, r6.y +mul.f r4.w, r4.w, r6.y +mov.f32f32 r6.y, r4.z +mad.f32 r6.w, c8.y, r3.y, c9.y +mul.f r4.x, r4.x, r4.y +mad.f32 r4.y, c8.z, r3.z, c9.z +mad.f32 r4.x, r5.z, r4.w, r4.x +add.f r4.w, r5.w, r6.w +mad.f32 r0.x, r0.x, r0.y, r4.x +add.f r3.w, r3.w, r4.y +mul.f r0.y, c17.y, r3.y +add.f r3.y, r6.x, r6.z +max.f r0.x, r0.x, c19.x mul.f r3.z, c17.z, r3.z -mad.f32 r6.y, r0.y, r0.y, r6.y -add.f r6.z, r6.z, r0.w -mad.f32 r7.x, c8.x, r3.x, c9.x -mad.f32 r0.w, c3.w, r2.w, r5.x -mad.f32 r5.x, c2.z, r2.z, r5.y -mad.f32 r5.y, c1.y, r2.y, r5.z -mad.f32 r2.x, c1.x, r2.y, r2.x -rsq r2.y, (abs)r6.y -(ss)mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r6.x, r3.z, r5.w -mul.f r3.y, c17.y, r3.y -add.f r5.z, r6.w, r7.x -mul.f r0.z, r0.z, r2.y -mul.f r3.w, r3.w, r2.y -mul.f r0.y, r0.y, r2.y -mad.f32 r3.y, r6.x, r3.y, r6.z -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.x, r3.x -mad.f32 r1.y, r1.y, r3.w, r0.z -mad.f32 r0.z, c3.z, r2.w, r5.x -mad.f32 r2.y, c2.y, r2.z, r5.y -mad.f32 r2.x, c2.x, r2.z, r2.x -mov.f32f32 r1.y, r1.y -mad.f32 r1.x, r6.x, r1.x, r5.z -mad.f32 r1.y, r0.x, r0.y, r1.y -mad.f32 r0.y, c3.y, r2.w, r2.y -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r3.x, c7.x -max.f r1.y, r1.y, c19.x -mov.f32f32 r2.w, r4.w -mov.f32f32 r2.z, r4.z -mov.f32f32 r2.y, r4.y -mov.f32f32 r2.x, r4.x +mad.f32 r4.x, r6.y, r0.y, r4.w +mul.f r3.x, c17.x, r3.x +cmps.f.lt r4.y, (neg)r5.x, c19.x +mad.f32 r0.y, c3.y, r1.w, r5.y +mad.f32 r1.x, c2.x, r1.z, r1.x +log2 r1.z, r0.x +mov.f32f32 r4.w, c7.x +mad.f32 r3.z, r6.y, r3.z, r3.w +mad.f32 r3.x, r4.z, r3.x, r3.y +(ss)mad.f32 r0.x, c3.x, r1.w, r1.x +min.f r1.x, r4.w, c19.z +min.f r1.w, r1.y, c19.y (rpt1)nop -log2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -min.f r3.x, r3.x, c19.z -(rpt2)nop -mul.f r1.y, r3.x, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +(ss)mul.f r1.x, r1.x, r1.z (rpt5)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r1.z, c19.x -(rpt2)nop -mov.f32f32 r1.y, r1.y +exp2 r1.x, r1.x +(ss)sel.b32 r1.x, r1.x, r4.y, c19.x (rpt2)nop +mov.f32f32 r1.y, r1.x +mad.f32 r1.x, c18.x, r1.x, r3.x +(rpt1)nop mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.z, c18.z, r1.y, r3.z -mad.f32 r3.x, c18.y, r1.y, r3.y -mad.f32 r1.x, c18.x, r1.y, r1.x -nop -max.f r1.y, r1.z, c19.x -max.f r3.x, r3.x, c19.x max.f r1.x, r1.x, c19.x -nop -min.f r1.z, r1.y, c19.y -min.f r1.y, r3.x, c19.y +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r3.z +mad.f32 r1.y, c18.y, r1.y, r4.x min.f r1.x, r1.x, c19.y +nop +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0) -; VERT: 145 instructions, 0 half, 8 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) +; VERT: 121 instructions, 0 half, 7 full diff --git a/reference/stk/stk0306.asm b/reference/stk/stk0306.asm index 324cf3b..55ee480 100644 --- a/reference/stk/stk0306.asm +++ b/reference/stk/stk0306.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 -@in(r3.w) in3 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r0.w) in8 -@in(r1.x) in9 -@in(r1.y) in10 -@in(r1.z) in11 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r3.w) in11 @in(r4.x) in12 @in(r4.y) in13 @in(r4.z) in14 @@ -27,143 +27,132 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r1.w, r3.x, c12.x +@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r1.x, c12.x mul.f r2.x, r0.x, c4.x -mad.f32 r1.w, c13.x, r3.y, r1.w +mad.f32 r0.w, c13.x, r1.y, r0.w mad.f32 r2.x, c4.y, r0.y, r2.x -mad.f32 r1.w, c14.x, r3.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r2.y, c15.x, r3.w, r1.w -mov.f32f32 r1.w, r2.x -mul.f r2.x, r0.x, c5.x +mad.f32 r0.w, c14.x, r1.z, r0.w +mad.f32 r2.x, c4.z, r0.z, r2.x +mad.f32 r0.w, c15.x, r1.w, r0.w +mul.f r2.y, r4.x, c19.w +mul.f r2.z, r4.x, c19.z +mul.f r5.x, r4.x, c19.y +mul.f r2.w, r0.w, r0.w +mul.f r5.y, r1.x, c12.y +mul.f r5.z, r2.x, c10.x +mad.f32 r5.y, c13.y, r1.y, r5.y +mul.f r5.w, r0.x, c5.x +mad.f32 r5.y, c14.y, r1.z, r5.y +mad.f32 r5.w, c5.y, r0.y, r5.w +mad.f32 r5.y, c15.y, r1.w, r5.y +mad.f32 r5.w, c5.z, r0.z, r5.w +mad.f32 r2.y, c20.w, r4.y, r2.y +mad.f32 r2.z, c20.z, r4.y, r2.z +mad.f32 r2.w, r5.y, r5.y, r2.w +mul.f r6.x, r1.x, c12.z +mad.f32 r5.z, c10.y, r5.w, r5.z +mad.f32 r6.x, c13.z, r1.y, r6.x mul.f r0.x, r0.x, c6.x -mul.f r2.z, r2.y, r2.y -mul.f r2.w, r3.x, c12.y -mad.f32 r5.x, c4.z, r0.z, r1.w -mad.f32 r1.w, c13.y, r3.y, r2.w -mov.f32f32 r1.z, r1.z -mad.f32 r1.w, c14.y, r3.z, r1.w -mul.f r2.w, r5.x, c10.x -mad.f32 r5.y, c15.y, r3.w, r1.w -mad.f32 r1.w, c5.y, r0.y, r2.x -max.f r1.z, r1.z, c23.x +mad.f32 r6.x, c14.z, r1.z, r6.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r5.y, r5.y, r2.z -mov.f32f32 r2.x, r1.w -min.f r1.w, r1.z, c23.y -mad.f32 r1.z, c5.z, r0.z, r2.x -mov.f32f32 r0.y, r0.y -mul.f r2.x, r3.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r2.x, c13.z, r3.y, r2.x -mad.f32 r2.z, c10.y, r1.z, r2.w -mad.f32 r2.x, c14.z, r3.z, r2.x +mad.f32 r0.y, c15.z, r1.w, r6.x mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r3.w, r2.x -mov.f32f32 r2.x, r2.z -mul.f r2.z, r4.x, c19.w -mul.f r2.w, r4.x, c19.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r2.x, c10.z, r0.x, r2.x -mad.f32 r2.z, c20.w, r4.y, r2.z -mad.f32 r5.z, c20.z, r4.y, r2.w -mul.f r5.w, r4.x, c19.y -mul.f r4.x, r4.x, c19.x -mul.f r6.x, r3.x, c0.w -rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r2.w, r2.x, c23.x -cmps.f.lt r6.y, (neg)r2.x, c23.x -mad.f32 r2.x, c21.w, r4.z, r2.z -mul.f r2.y, r2.y, r0.y -mul.f r2.z, r5.y, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r0.z, r2.w -add.f r2.y, c10.x, (neg)r2.y -add.f r2.z, c10.y, (neg)r2.z +mad.f32 r0.z, c21.w, r4.z, r2.y +mad.f32 r2.y, c21.z, r4.z, r2.z +mad.f32 r6.x, r0.y, r0.y, r2.w +mad.f32 r5.z, c10.z, r0.x, r5.z +mad.f32 r2.w, c22.w, r4.w, r0.z +mad.f32 r2.z, c22.z, r4.w, r2.y +mad.f32 r0.z, c20.y, r4.y, r5.x +mul.f r2.y, r4.x, c19.x +mul.f r4.x, r1.x, c0.w +rsq r5.x, (abs)r6.x +(ss)mov.f32f32 r6.x, r5.x +mul.f r0.y, r0.y, r5.x +max.f r5.x, r5.z, c23.x +mul.f r6.y, c16.x, r3.x +mul.f r0.w, r0.w, r6.x +mul.f r5.y, r5.y, r6.x +(rpt1)nop +add.f r0.w, c10.x, (neg)r0.w +add.f r5.y, c10.y, (neg)r5.y add.f r0.y, c10.z, (neg)r0.y -mul.f r5.y, c16.z, r1.y -mul.f r2.w, r2.y, r2.y -mad.f32 r6.z, c8.z, r1.y, c9.z -mad.f32 r6.w, r2.z, r2.z, r2.w -mul.f r7.x, c16.y, r1.x -mul.f r7.y, c16.x, r0.w -mad.f32 r2.w, c22.w, r4.w, r2.x -mov.f32f32 r2.x, r6.w -add.f r5.y, r5.y, r6.z -mad.f32 r2.x, r0.y, r0.y, r2.x -mul.f r1.y, c17.z, r1.y -mad.f32 r6.z, c8.y, r1.x, c9.y -mad.f32 r6.w, c8.x, r0.w, c9.x -mad.f32 r5.z, c21.z, r4.z, r5.z -mad.f32 r5.w, c20.y, r4.y, r5.w -mad.f32 r4.x, c20.x, r4.y, r4.x -rsq r2.x, (abs)r2.x -(ss)mov.f32f32 r2.x, r2.x -mad.f32 r1.y, r0.z, r1.y, r5.y -add.f r4.y, r7.x, r6.z -add.f r5.y, r7.y, r6.w -mul.f r2.y, r2.y, r2.x -mul.f r2.z, r2.z, r2.x -mul.f r0.y, r0.y, r2.x -mul.f r1.x, c17.y, r1.x -mul.f r2.x, r5.x, r2.y -mul.f r0.w, c17.x, r0.w -mad.f32 r1.z, r1.z, r2.z, r2.x -mad.f32 r1.x, r0.z, r1.x, r4.y -mad.f32 r2.z, c22.z, r4.w, r5.z -mad.f32 r2.x, c21.y, r4.z, r5.w -mov.f32f32 r1.z, r1.z -mad.f32 r4.y, r0.z, r0.w, r5.y -mad.f32 r0.x, r0.x, r0.y, r1.z -mad.f32 r2.y, c22.y, r4.w, r2.x -mad.f32 r0.y, c21.x, r4.z, r4.x -mad.f32 r0.z, c1.w, r3.y, r6.x +mov.f32f32 r6.x, r5.x +mul.f r6.z, r0.w, r0.w +mul.f r6.w, c16.z, r3.z +mad.f32 r6.z, r5.y, r5.y, r6.z +mad.f32 r7.x, c8.z, r3.z, c9.z +mad.f32 r6.z, r0.y, r0.y, r6.z +mul.f r7.y, c16.y, r3.y +mad.f32 r7.z, c8.x, r3.x, c9.x +cmps.f.lt r5.z, (neg)r5.z, c23.x +mad.f32 r0.z, c21.y, r4.z, r0.z +mad.f32 r4.y, c20.x, r4.y, r2.y +mad.f32 r4.x, c1.w, r1.y, r4.x +rsq r2.y, (abs)r6.z +(ss)mov.f32f32 r6.z, r2.y +mul.f r0.y, r0.y, r2.y +add.f r2.y, r6.w, r7.x +mad.f32 r6.w, c8.y, r3.y, c9.y +mul.f r0.w, r0.w, r6.z +mul.f r5.y, r5.y, r6.z +mul.f r3.z, c17.z, r3.z +add.f r6.z, r7.y, r6.w +mul.f r0.w, r2.x, r0.w +mul.f r2.x, c17.y, r3.y +mad.f32 r0.w, r5.w, r5.y, r0.w +mad.f32 r3.y, r6.x, r3.z, r2.y +mad.f32 r0.x, r0.x, r0.y, r0.w +mad.f32 r3.z, r6.x, r2.x, r6.z +add.f r0.y, r6.y, r7.z +mad.f32 r2.y, c22.y, r4.w, r0.z max.f r0.x, r0.x, c23.x -mad.f32 r2.x, c22.x, r4.w, r0.y -mad.f32 r0.y, c2.w, r3.z, r0.z -mul.f r0.z, r3.x, c0.z -mul.f r1.z, r3.x, c0.y -mul.f r3.x, r3.x, c0.x -mov.f32f32 r4.x, c7.x +mul.f r0.z, c17.x, r3.x +mad.f32 r0.w, c21.x, r4.z, r4.y +mad.f32 r3.x, c2.w, r1.z, r4.x +mul.f r4.x, r1.x, c0.z +mul.f r4.y, r1.x, c0.y +mul.f r1.x, r1.x, c0.x log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mad.f32 r0.w, c3.w, r3.w, r0.y -mad.f32 r0.y, c1.z, r3.y, r0.z -min.f r0.z, r4.x, c23.z -mad.f32 r0.y, c2.z, r3.z, r0.y -mad.f32 r1.z, c1.y, r3.y, r1.z -mad.f32 r3.x, c1.x, r3.y, r3.x -mul.f r0.x, r0.z, r0.x -mad.f32 r0.z, c3.z, r3.w, r0.y -mad.f32 r0.y, c2.y, r3.z, r1.z -mad.f32 r1.z, c2.x, r3.z, r3.x -mov.f32f32 r3.x, r0.x -mad.f32 r0.y, c3.y, r3.w, r0.y -mad.f32 r0.x, c3.x, r3.w, r1.z -(rpt3)nop -exp2 r1.z, r3.x -(ss)mov.f32f32 r1.z, r1.z -(rpt2)nop -sel.b32 r1.z, r1.z, r6.y, c23.x -(rpt2)nop -mov.f32f32 r1.z, r1.z -(rpt2)nop -mov.f32f32 r1.z, r1.z -(rpt2)nop -mad.f32 r1.y, c18.z, r1.z, r1.y -mad.f32 r1.x, c18.y, r1.z, r1.x -mad.f32 r1.z, c18.x, r1.z, r4.y +mov.f32f32 r4.z, c7.x +mad.f32 r5.x, r5.x, r0.z, r0.y +mad.f32 r2.x, c22.x, r4.w, r0.w +mad.f32 r0.w, c3.w, r1.w, r3.x +min.f r0.y, r4.z, c23.z +mad.f32 r0.z, c1.z, r1.y, r4.x +mad.f32 r3.x, c1.y, r1.y, r4.y +mad.f32 r1.x, c1.x, r1.y, r1.x +(ss)mul.f r0.x, r0.y, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.z +mad.f32 r1.y, c2.y, r1.z, r3.x +mad.f32 r1.x, c2.x, r1.z, r1.x +max.f r1.z, r3.w, c23.x +(rpt1)nop +exp2 r0.x, r0.x +(ss)sel.b32 r3.x, r0.x, r5.z, c23.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mad.f32 r0.y, c3.y, r1.w, r1.y +(ss)mad.f32 r0.x, c3.x, r1.w, r1.x +mov.f32f32 r1.x, r3.x +mad.f32 r1.y, c18.x, r3.x, r5.x +min.f r1.w, r1.z, c23.y nop +mov.f32f32 r1.x, r1.x max.f r1.y, r1.y, c23.x -max.f r1.x, r1.x, c23.x -(ss)max.f r3.x, r1.z, c23.x +(rpt1)nop +mad.f32 r1.z, c18.z, r1.x, r3.y +mad.f32 r3.x, c18.y, r1.x, r3.z +min.f r1.x, r1.y, c23.y nop +max.f r1.y, r1.z, c23.x +max.f r3.x, r3.x, c23.x +(rpt1)nop min.f r1.z, r1.y, c23.y -min.f r1.y, r1.x, c23.y -min.f r1.x, r3.x, c23.y +min.f r1.y, r3.x, c23.y end +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0) -; VERT: 147 instructions, 0 half, 8 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0) +; VERT: 127 instructions, 0 half, 8 full diff --git a/reference/stk/stk0307.asm b/reference/stk/stk0307.asm index f978c20..5b1d566 100644 --- a/reference/stk/stk0307.asm +++ b/reference/stk/stk0307.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r3.x) in0 -@in(r3.y) in1 -@in(r3.z) in2 -@in(r3.w) in3 +@in(r2.x) in0 +@in(r2.y) in1 +@in(r2.z) in2 +@in(r2.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r2.x) in8 -@in(r2.y) in9 -@in(r2.z) in10 -@in(r2.w) in11 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r3.w) in11 @in(r4.z) in14 @in(r4.w) in15 @out(r0.x) out0 @@ -25,171 +25,157 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r0.w, r3.x, c12.x +@const(c25.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000 +@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r2.x, c12.x mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r3.y, r0.w +mad.f32 r0.w, c13.x, r2.y, r0.w mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r3.z, r0.w -mov.f32f32 r1.y, r2.w -mad.f32 r0.w, c15.x, r3.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x -mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r2.w, r3.x, c12.y +mad.f32 r0.w, c14.x, r2.z, r0.w mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r2.w, c13.y, r3.y, r2.w -mov.f32f32 r1.y, r1.y -mad.f32 r2.w, c14.y, r3.z, r2.w -mul.f r4.x, r1.x, c10.x -mad.f32 r2.w, c15.y, r3.w, r2.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c25.y +mad.f32 r1.y, c15.x, r2.w, r0.w +mul.f r0.w, r2.x, c12.y +mul.f r1.z, r2.x, c12.z +mul.f r1.w, r2.x, c0.w +mul.f r4.x, r1.y, r1.y +mad.f32 r0.w, c13.y, r2.y, r0.w +mul.f r4.y, r1.x, c10.x +mad.f32 r0.w, c14.y, r2.z, r0.w +mul.f r5.x, r0.x, c5.x +mad.f32 r5.y, c15.y, r2.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r5.x +mad.f32 r1.z, c13.z, r2.y, r1.z +mad.f32 r1.w, c1.w, r2.y, r1.w +mad.f32 r4.x, r5.y, r5.y, r4.x +mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r5.x, c5.z, r0.z, r0.w +mad.f32 r1.z, c15.z, r2.w, r1.z +mad.f32 r0.w, c2.w, r2.z, r1.w +mul.f r1.w, r2.x, c0.z +mul.f r5.z, r2.x, c0.y +mad.f32 r4.x, r1.z, r1.z, r4.x +mad.f32 r4.y, c10.y, r5.x, r4.y +mul.f r0.x, r0.x, c6.x +mad.f32 r0.w, c3.w, r2.w, r0.w +mad.f32 r1.w, c1.z, r2.y, r1.w +mad.f32 r5.z, c1.y, r2.y, r5.z +mul.f r2.x, r2.x, c0.x +rsq r4.x, (abs)r4.x +(ss)mov.f32f32 r5.w, r4.x +mul.f r1.z, r1.z, r4.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r2.w, r2.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c25.z -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r3.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r3.y, r1.z -mad.f32 r4.x, c10.y, r1.y, r4.x -mad.f32 r1.z, c14.z, r3.z, r1.z -mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r3.w, r1.z -mov.f32f32 r1.z, r4.x -mul.f r4.x, r3.x, c0.w -mul.f r4.y, r3.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r4.x, c1.w, r3.y, r4.x -mad.f32 r4.y, c1.z, r3.y, r4.y -mul.f r5.x, r3.x, c0.y -mul.f r3.x, r3.x, c0.x -mul.f r5.y, c16.z, r2.z -rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r5.z, r1.z, c26.x -cmps.f.lt r1.z, (neg)r1.z, c25.y -mad.f32 r4.x, c2.w, r3.z, r4.x -mul.f r0.w, r0.w, r0.y -mul.f r2.w, r2.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r5.z, r5.z -add.f r0.z, c10.x, (neg)r0.w -mul.f r5.w, r1.x, r0.w -add.f r6.x, c10.y, (neg)r2.w -add.f r6.y, c10.z, (neg)r0.y -mul.f r6.z, r0.z, r0.z -mad.f32 r5.w, r1.y, r2.w, r5.w -mad.f32 r6.z, r6.x, r6.x, r6.z -mad.f32 r6.w, c8.z, r2.z, c9.z -mul.f r7.x, c16.y, r2.y -mul.f r7.y, c16.x, r2.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r5.w, r5.w -mad.f32 r6.z, r6.y, r6.y, r6.z -mad.f32 r7.z, r0.x, r0.y, r5.w -mad.f32 r7.w, r0.x, r0.y, r5.w -mad.f32 r5.w, r0.x, r0.y, r5.w -add.f r5.y, r5.y, r6.w -mad.f32 r6.w, c8.y, r2.y, c9.y -mad.f32 r8.x, c8.x, r2.x, c9.x -rsq r6.z, (abs)r6.z -(ss)mov.f32f32 r6.z, r6.z -add.f r7.z, r7.z, r7.z -add.f r7.w, r7.w, r7.w -add.f r5.w, r5.w, r5.w -mul.f r0.z, r0.z, r6.z -mad.f32 r7.z, (neg)r7.z, r1.x, r0.w -mul.f r0.w, r6.x, r6.z -mul.f r6.x, r6.y, r6.z -mul.f r0.z, r1.x, r0.z -add.f r1.x, r7.z, c25.y -mad.f32 r0.z, r1.y, r0.w, r0.z -mad.f32 r1.y, (neg)r7.w, r1.y, r2.w -mad.f32 r0.y, (neg)r5.w, r0.x, r0.y -nop -mov.f32f32 r0.z, r0.z -mul.f r0.w, r1.x, r1.x -mad.f32 r0.x, r0.x, r6.x, r0.z -add.f r0.z, r1.y, c25.y -add.f r0.y, r0.y, c25.z -mul.f r1.x, c17.z, r2.z -max.f r0.x, r0.x, c26.x -mad.f32 r0.z, r0.z, r0.z, r0.w -add.f r2.z, r7.x, r6.w -add.f r2.w, r7.y, r8.x -mad.f32 r0.w, c3.w, r3.w, r4.x -mad.f32 r4.x, c2.z, r3.z, r4.y -mad.f32 r4.y, c1.y, r3.y, r5.x -log2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r5.x, c7.x -mov.f32f32 r0.z, r0.z -mad.f32 r1.x, r5.z, r1.x, r5.y -mul.f r2.y, c17.y, r2.y -min.f r5.x, r5.x, c26.z -mad.f32 r0.y, r0.y, r0.y, r0.z -mul.f r2.x, c17.x, r2.x -mad.f32 r0.z, c3.z, r3.w, r4.x -mul.f r0.x, r5.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r2.y, r5.z, r2.y, r2.z -mad.f32 r2.x, r5.z, r2.x, r2.w -mov.f32f32 r0.x, r0.x -mad.f32 r2.z, c2.y, r3.z, r4.y -mad.f32 r2.w, c1.x, r3.y, r3.x -mov.f32f32 r3.x, r4.z -mov.f32f32 r3.y, r4.w +mad.f32 r0.y, c2.z, r2.z, r1.w +mul.f r1.y, r1.y, r5.w +mul.f r1.w, r5.y, r5.w (rpt1)nop -exp2 r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -rsq r0.y, (abs)r0.y -(ss)mul.f r4.x, r0.y, c25.x -(ss)rsq r0.y, (abs)r0.y -(ss)mul.f r4.y, r0.y, c25.x -(ss)mad.f32 r0.y, c3.y, r3.w, r2.z -sel.b32 r0.x, r0.x, r1.z, c26.x -mad.f32 r4.x, r7.z, r4.x, c25.x -mad.f32 r4.y, r1.y, r4.y, c25.x -mad.f32 r3.z, c2.x, r3.z, r2.w -mov.f32f32 r0.x, r0.x -mul.f r1.y, r4.x, c21.w -mul.f r1.z, r4.x, c21.z -mul.f r2.z, r4.x, c21.y -mov.f32f32 r0.x, r0.x -mad.f32 r1.y, c22.w, r4.y, r1.y -mad.f32 r4.z, c22.z, r4.y, r1.z -mad.f32 r2.z, c22.y, r4.y, r2.z -mad.f32 r1.x, c18.z, r0.x, r1.x -mad.f32 r1.z, c18.y, r0.x, r2.y -mad.f32 r0.x, c18.x, r0.x, r2.x -mad.f32 r2.x, c23.w, r3.x, r1.y -max.f r1.x, r1.x, c25.y -max.f r1.y, r1.z, c25.y -max.f r0.x, r0.x, c25.y +(ss)add.f r4.x, c10.x, (neg)r1.y +mul.f r5.y, r1.x, r1.y +add.f r5.w, c10.y, (neg)r1.w +add.f r6.x, c10.z, (neg)r1.z +mul.f r6.y, r4.x, r4.x +mad.f32 r5.y, r5.x, r1.w, r5.y +mad.f32 r6.y, r5.w, r5.w, r6.y +mad.f32 r0.x, c6.z, r0.z, r0.x +mad.f32 r6.y, r6.x, r6.x, r6.y +mov.f32f32 r6.z, r5.y +mad.f32 r0.z, c3.z, r2.w, r0.y +mad.f32 r0.y, c2.y, r2.z, r5.z +mad.f32 r2.x, c1.x, r2.y, r2.x +max.f r2.y, r3.w, c25.y +mul.f r3.w, c16.z, r3.z +rsq r5.z, (abs)r6.y +(ss)mov.f32f32 r6.y, r5.z +mad.f32 r6.w, r0.x, r1.z, r6.z +mad.f32 r6.z, r0.x, r1.z, r6.z +mad.f32 r5.y, r0.x, r1.z, r5.y +mul.f r4.x, r4.x, r6.y +add.f r6.w, r6.w, r6.w +add.f r6.z, r6.z, r6.z +mul.f r5.w, r5.w, r6.y +mul.f r4.x, r1.x, r4.x +mad.f32 r1.x, (neg)r6.w, r1.x, r1.y +mad.f32 r1.y, (neg)r6.z, r0.x, r1.z +mad.f32 r1.z, r5.x, r5.w, r4.x +mul.f r4.x, r6.x, r5.z +add.f r5.z, r1.x, c25.y +add.f r1.y, r1.y, c25.z +add.f r5.y, r5.y, r5.y +mad.f32 r1.z, r0.x, r4.x, r1.z +mul.f r4.x, r5.z, r5.z +mad.f32 r0.x, c10.z, r0.x, r4.y +mad.f32 r0.y, c3.y, r2.w, r0.y +max.f r1.z, r1.z, c26.x +mad.f32 r4.y, (neg)r5.y, r5.x, r1.w +max.f r5.x, r0.x, c26.x +mul.f r5.y, c16.x, r3.x +cmps.f.lt r5.z, (neg)r0.x, c25.y +mad.f32 r0.x, c2.x, r2.z, r2.x +min.f r1.w, r2.y, c25.z +log2 r1.z, r1.z +mov.f32f32 r2.x, c7.x +add.f r2.y, r4.y, c25.y +mov.f32f32 r2.z, r5.x +mul.f r5.w, c16.y, r3.y +min.f r2.x, r2.x, c26.z +mad.f32 r2.y, r2.y, r2.y, r4.x +mad.f32 r4.x, c8.z, r3.z, c9.z +mad.f32 r6.x, c8.y, r3.y, c9.y +(ss)mul.f r1.z, r2.x, r1.z +mad.f32 r1.y, r1.y, r1.y, r2.y +add.f r2.x, r3.w, r4.x +add.f r2.y, r5.w, r6.x +mad.f32 r3.w, c8.x, r3.x, c9.x +mad.f32 r0.x, c3.x, r2.w, r0.x +mov.f32f32 r4.x, r4.z +exp2 r1.z, r1.z +(ss)sel.b32 r1.z, r1.z, r5.z, c26.x +add.f r2.w, r5.y, r3.w +mul.f r3.x, c17.x, r3.x +rsq r1.y, (abs)r1.y +(ss)mul.f r3.w, r1.y, c25.x +mov.f32f32 r5.y, r1.z +(ss)rsq r1.y, (abs)r1.y +(ss)mul.f r1.y, r1.y, c25.x +mul.f r3.z, c17.z, r3.z +mad.f32 r2.w, r5.x, r3.x, r2.w +mov.f32f32 r3.x, r5.y +mul.f r3.y, c17.y, r3.y +mad.f32 r2.x, r2.z, r3.z, r2.x +mad.f32 r1.z, c18.x, r1.z, r2.w +mad.f32 r2.x, c18.z, r3.x, r2.x +mad.f32 r2.y, r2.z, r3.y, r2.y +mad.f32 r2.z, r1.x, r3.w, c25.x +mad.f32 r3.y, r4.y, r1.y, c25.x +max.f r1.x, r2.x, c25.y +mad.f32 r1.y, c18.y, r3.x, r2.y +max.f r2.x, r1.z, c25.y nop min.f r1.z, r1.x, c25.z +max.f r1.y, r1.y, c25.y +min.f r1.x, r2.x, c25.z +mul.f r2.x, r2.z, c21.w +mul.f r2.y, r2.z, c21.z min.f r1.y, r1.y, c25.z -min.f r1.x, r0.x, c25.z -mad.f32 r2.w, c24.w, r3.y, r2.x -mad.f32 r0.x, c23.z, r3.x, r4.z -mad.f32 r2.x, c23.y, r3.x, r2.z -mad.f32 r2.z, c24.z, r3.y, r0.x -mad.f32 r2.y, c24.y, r3.y, r2.x -mul.f r2.x, r4.x, c21.x -mad.f32 r0.x, c3.x, r3.w, r3.z -mad.f32 r2.x, c22.x, r4.y, r2.x -nop -mad.f32 r2.x, c23.x, r3.x, r2.x -nop -mad.f32 r2.x, c24.x, r3.y, r2.x +mad.f32 r2.x, c22.w, r3.y, r2.x +mad.f32 r2.y, c22.z, r3.y, r2.y +mad.f32 r2.x, c23.w, r4.x, r2.x +mov.f32f32 r3.x, r4.w +mad.f32 r2.y, c23.z, r4.x, r2.y +mul.f r3.z, r2.z, c21.y +mul.f r3.w, r2.z, c21.x +mad.f32 r2.w, c24.w, r3.x, r2.x +mad.f32 r2.z, c24.z, r3.x, r2.y +mad.f32 r2.x, c22.y, r3.y, r3.z +mad.f32 r2.y, c22.x, r3.y, r3.w +mad.f32 r2.x, c23.y, r4.x, r2.x +mad.f32 r3.y, c23.x, r4.z, r2.y +mad.f32 r2.y, c24.y, r3.x, r2.x +mad.f32 r2.x, c24.x, r4.w, r3.y end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=c,il=20,b=0) -; VERT: 163 instructions, 0 half, 9 full +; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=c,il=20,b=0) +; VERT: 146 instructions, 0 half, 7 full diff --git a/reference/stk/stk0400.asm b/reference/stk/stk0400.asm index 74062e3..d96960a 100644 --- a/reference/stk/stk0400.asm +++ b/reference/stk/stk0400.asm @@ -6,39 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r1.y, r2.z -mul.f r0.w, r2.y, r1.z -mul.f r0.z, r2.x, r0.z -mul.f r0.x, r1.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.y, r1.x +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 35 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/stk/stk0500.asm b/reference/stk/stk0500.asm index e9b7b37..c61e896 100644 --- a/reference/stk/stk0500.asm +++ b/reference/stk/stk0500.asm @@ -6,227 +6,154 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c5.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000 +@const(c6.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd +@const(c7.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x mov.f32f32 r0.w, c3.x bary.f r1.x, 1, r0.x mov.f32f32 r1.y, c6.x -add.f r1.z, r0.z, c3.x -add.f r1.w, r0.z, c4.x +add.f r1.z, r0.z, c4.x +add.f r2.x, r0.z, c3.x +add.f r1.w, r1.x, c4.y +add.f r2.y, r1.x, c3.y add.f r0.z, r0.z, r0.w add.f r0.w, r1.x, r1.y -mov.f32f32 r1.y, r1.z -mov.f32f32 r1.z, r1.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r1.y -add.f r1.y, r1.x, c3.y -mov.f32f32 r2.y, r1.z -add.f r1.x, r1.x, c4.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 7, r0.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.w, r0.z -mov.f32f32 r3.x, r0.w -mov.f32f32 r0.z, r1.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r0.w, r1.z -bary.f r1.x, 8, r0.x -bary.f r1.y, 9, r0.x -mov.f32f32 r2.z, r0.z -sam (f32)(xyzw)r2.w, r2.w, s#2, t#2 -mov.f32f32 r0.z, c6.x -mov.f32f32 r1.z, c6.x -sam (f32)(xyz)r3.w, r1.w, s#0, t#0 -(sy)(ss)mad.f32 r1.w, c5.x, r4.x, c5.y -mad.f32 r2.x, c5.x, r3.w, c5.y -mad.f32 r3.w, c5.x, r4.y, c5.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r1.w -sam (f32)(xyz)r4.x, r2.y, s#1, t#1 -(sy)(ss)mad.f32 r2.y, c5.x, r4.y, c5.y -mov.f32f32 r2.x, r2.x -mad.f32 r2.z, c5.x, r4.x, c5.y +mov.f32f32 r1.x, c6.x +mov.f32f32 r1.y, c6.x +bary.f r2.z, 7, r0.x +sam (f32)(xyz)r2.w, r1.z, s#1, t#1 +(sy)(ss)mad.f32 r1.z, c5.x, r3.x, c5.y +sam (f32)(xyz)r3.z, r2.x, s#0, t#0 +(sy)mad.f32 r1.w, c5.x, r3.w, c5.y +(ss)mad.f32 r2.x, c5.x, r2.w, c5.y +mad.f32 r2.y, c5.x, r3.z, c5.y +mul.f r1.z, c5.z, r1.z mul.f r1.w, c5.z, r1.w -mov.f32f32 r2.y, r2.y -mul.f r4.x, r2.x, r2.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.w, r1.w -mul.f r2.y, c5.z, r2.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r2.z, r2.z -mad.f32 r4.x, r1.w, r1.w, r4.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.y, c5.x, r4.z, c5.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.x, r3.w, r3.w, r4.x -mul.f r4.z, r2.z, r2.z -mov.f32f32 r4.y, r4.y -(rpt3)nop -rsq r4.x, r4.x -(ss)mov.f32f32 r4.x, r4.x -mad.f32 r4.z, r2.y, r2.y, r4.z -mov.f32f32 r4.y, r4.y -bary.f r4.w, 4, r0.x -mul.f r2.x, r2.x, r4.x -mul.f r1.w, r1.w, r4.x -mul.f r3.w, r3.w, r4.x -mov.f32f32 r4.x, r4.z -mov.f32f32 r2.x, r2.x -mad.f32 r4.x, r4.y, r4.y, r4.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.w, r3.w -bary.f r4.z, 5, r0.x +mov.f32f32 r2.w, r2.x +mov.f32f32 r3.x, r2.y +mov.f32f32 r3.z, r1.z +mov.f32f32 r3.w, r1.w +mov.f32f32 r2.w, r2.w +mul.f r2.y, r2.y, r3.x +mov.f32f32 r3.z, r3.z +mad.f32 r1.w, r1.w, r3.w, r2.y +mul.f r2.x, r2.x, r2.w +mad.f32 r2.y, c5.x, r4.x, c5.y +mad.f32 r1.z, r1.z, r3.z, r2.x +mad.f32 r2.x, c5.x, r3.y, c5.y +sam (f32)(xyzw)r4.x, r0.z, s#2, t#2 +(ss)bary.f r0.z, 8, r0.x +bary.f r0.w, 9, r0.x +bary.f r3.y, 4, r0.x +mov.f32f32 r5.x, r2.x +mov.f32f32 r5.y, r2.y +bary.f r5.z, 5, r0.x bary.f (ei)r0.x, 6, r0.x +mov.f32f32 r0.y, r5.x +mad.f32 r1.w, r5.y, r5.y, r1.w (rpt1)nop -rsq r0.y, r4.x -(ss)mov.f32f32 r0.y, r0.y -(rpt2)nop -mad.f32 r2.x, r2.z, r0.y, r2.x -mad.f32 r1.w, r2.y, r0.y, r1.w -mad.f32 r0.y, r4.y, r0.y, r3.w -nop -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y +mad.f32 r0.y, r0.y, r0.y, r1.z +(rpt5)nop +rsq r0.y, r0.y +(ss)mov.f32f32 r1.z, r0.y +rsq r1.w, r1.w +(ss)mov.f32f32 r5.x, r1.w +(ss)mul.f r1.w, r2.y, r1.w +(rpt1)nop +mul.f r2.y, r3.x, r5.x +mul.f r3.x, r3.w, r5.x +mad.f32 r2.y, r2.w, r1.z, r2.y +mad.f32 r1.z, r3.z, r1.z, r3.x +mad.f32 r0.y, r2.x, r0.y, r1.w nop -mul.f r2.x, r2.x, c5.w -mul.f r1.w, r1.w, c5.w +mul.f r1.w, r2.y, c5.w +mul.f r1.z, r1.z, c5.w mul.f r0.y, r0.y, c5.w nop -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -nop -mul.f r2.y, r2.x, r4.w -mul.f r2.z, r4.w, r2.x -mad.f32 r2.y, r1.w, r4.z, r2.y -mad.f32 r2.z, r4.z, r1.w, r2.z -(rpt1)nop -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mad.f32 r2.y, r0.y, r0.x, r2.y -mad.f32 r2.z, r0.x, r0.y, r2.z -(rpt1)nop -mul.f r2.x, r2.y, r2.x -max.f r2.z, r2.z, c6.x -mul.f r1.w, r2.y, r1.w -mul.f r0.y, r2.y, r0.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mul.f r2.x, c5.x, r2.x -mad.f32 r2.y, c6.y, r2.y, c6.z -mul.f r1.w, c5.x, r1.w -mul.f r0.y, c5.x, r0.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -add.f r2.x, r4.w, (neg)r2.x -mul.f r2.z, r3.z, r2.y -mul.f r3.y, r3.y, r2.y -mul.f r3.x, r3.x, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.z, r2.z -mov.f32f32 r3.w, r3.y -(ss)mov.f32f32 r4.x, r3.x -mul.f r4.y, r2.x, r2.x -add.f r1.w, r4.z, (neg)r1.w -add.f r1.z, r3.z, r1.z -add.f r0.x, r0.x, (neg)r0.y -mul.f r0.y, r2.w, r2.y -mov.f32f32 r1.w, r1.w -add.f r0.z, r1.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r0.y -mad.f32 r2.y, r1.w, r1.w, r4.y +mov.f32f32 r2.x, r1.w +mul.f r1.w, r3.y, r1.w +mov.f32f32 r2.y, r1.z +mov.f32f32 r2.w, r0.y +mul.f r3.x, r2.x, r3.y +mad.f32 r1.z, r5.z, r1.z, r1.w +mad.f32 r1.w, r2.y, r5.z, r3.x +mad.f32 r0.y, r0.x, r0.y, r1.z +mad.f32 r1.z, r2.w, r0.x, r1.w (rpt2)nop -mov.f32f32 r2.y, r2.y -nop -mad.f32 r2.y, r0.x, r0.x, r2.y -(rpt5)nop -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y +mul.f r1.w, r1.z, r2.x +max.f r0.y, r0.y, c6.x +mul.f r2.x, r1.z, r2.y +mul.f r1.z, r1.z, r2.w +mul.f r1.w, c5.x, r1.w +mad.f32 r0.y, c6.y, r0.y, c6.z +mul.f r2.x, c5.x, r2.x +mul.f r1.z, c5.x, r1.z +add.f r1.w, r3.y, (neg)r1.w +mov.f32f32 r2.y, r0.y +add.f r2.x, r5.z, (neg)r2.x +add.f r0.x, r0.x, (neg)r1.z +mov.f32f32 r1.z, r1.w +(sy)mul.f r2.w, r4.w, r2.y +mov.f32f32 r3.x, r2.x +mov.f32f32 r3.y, r0.x +mul.f r1.w, r1.w, r1.z +add.f r1.y, r2.w, r1.y +mad.f32 r1.w, r2.x, r3.x, r1.w +mul.f r2.x, r4.z, r2.y +mad.f32 r1.w, r3.y, r3.y, r1.w +add.f r1.x, r1.y, r1.x +mul.f r1.y, r4.y, r2.y +mul.f r0.y, r4.x, r0.y (rpt2)nop -mul.f r2.x, r2.x, r2.y -mul.f r1.w, r1.w, r2.y -mul.f r0.x, r0.x, r2.y -nop -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x +rsq r1.w, r1.w +(ss)mov.f32f32 r2.y, r1.w +mul.f r0.x, r0.x, r1.w +(rpt1)nop +mul.f r1.z, r1.z, r2.y +(ss)mul.f r1.w, r3.x, r2.y +(rpt1)nop +mul.f r1.z, r1.z, r2.z nop -mul.f r0.w, r2.x, r0.w +mad.f32 r0.z, r1.w, r0.z, r1.z nop -mad.f32 r0.w, r1.w, r1.x, r0.w -(rpt2)nop -mov.f32f32 r0.w, r0.w -nop -mad.f32 r0.x, r0.x, r1.y, r0.w -(rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.x, r0.w, r0.z (rpt2)nop max.f r0.x, r0.x, c6.x (rpt2)nop -mov.f32f32 r0.x, r0.x +mov.f32f32 r0.z, r0.x (rpt2)nop -mov.f32f32 r0.w, r0.x -mov.f32f32 r1.x, r0.x -cmps.f.lt r0.x, c6.x, r0.x +mul.f r0.x, r0.x, r0.z +cmps.f.lt r0.z, c6.x, r0.z (rpt1)nop -mul.f r0.w, r0.w, r1.x -cov.u32f32 r0.x, r0.x +mov.f32f32 r0.w, r0.x +cov.u32f32 r0.z, r0.z (rpt1)nop -mov.f32f32 r0.w, r0.w -cmps.f.ne r0.x, r0.x, c6.x +mul.f r0.x, r0.x, r0.w +cmps.f.ne r0.z, r0.z, c6.x (rpt1)nop +mov.f32f32 r0.w, r0.x +mul.f r0.x, r0.x, c6.w +sel.b32 r1.w, r1.x, r0.z, r2.w +nop mul.f r0.w, r0.w, r0.w -sel.b32 r0.z, r0.z, r0.x, r2.z -(rpt1)nop -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r0.z +add.f r1.x, r2.x, r0.x +add.f r1.z, r1.y, r0.x +add.f r0.x, r0.y, r0.x +mul.f r0.w, r0.w, c6.z +(rpt2)nop +mov.f32f32 r2.y, r0.w +add.f r0.x, r0.x, r0.w (rpt1)nop -mul.f r0.z, r0.w, r0.w -mul.f r0.w, r0.w, c6.w +add.f r0.w, r1.x, r2.y +add.f r1.x, r1.z, r2.y (rpt1)nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r0.w -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.w, r0.w -mul.f r0.z, r0.z, c6.z -add.f r1.x, r3.w, r1.x -add.f r1.y, r4.x, r1.y -add.f r0.w, r1.z, r0.w -mov.f32f32 r0.z, r0.z -(rpt2)nop -mov.f32f32 r1.z, r0.z -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.z, r0.z -nop -add.f r1.x, r1.x, r1.z -add.f r1.y, r1.y, r2.x -add.f r0.z, r0.w, r0.z -nop -sel.b32 r0.w, r1.x, r0.x, r3.y -sel.b32 r1.x, r1.y, r0.x, r3.x -sel.b32 r0.x, r0.z, r0.x, r0.y -nop -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r0.x +sel.b32 r1.z, r0.w, r0.z, r2.x +sel.b32 r1.y, r1.x, r0.z, r1.y +sel.b32 r1.x, r0.x, r0.z, r0.y end nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r0.w (5:19,cm=f,il=12,b=1) r2.x (5:20,cm=f,il=16,b=1) -; FRAG: 254 instructions, 0 half, 5 full +; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.w (5:19,cm=f,il=12,b=1) r2.x (5:20,cm=f,il=16,b=1) +; FRAG: 169 instructions, 0 half, 6 full diff --git a/reference/stk/stk0501.asm b/reference/stk/stk0501.asm index cec698a..4e67c3f 100644 --- a/reference/stk/stk0501.asm +++ b/reference/stk/stk0501.asm @@ -1,16 +1,16 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r4.x) in0 +@in(r4.y) in1 +@in(r4.z) in2 +@in(r4.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r4.x) in8 -@in(r4.y) in9 -@in(r4.z) in10 -@in(r4.w) in11 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,187 +27,140 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.w, c1.x -mul.f r2.x, c8.x, r1.x +mul.f r2.x, c8.x, r4.x mov.f32f32 r2.y, c1.y mov.f32f32 r2.z, c1.z mul.f r2.w, r0.w, r0.x mov.f32f32 r3.x, c2.x -mad.f32 r2.x, c9.x, r1.y, r2.x +mad.f32 r2.x, c9.x, r4.y, r2.x mul.f r3.y, r0.w, r0.w -mad.f32 r2.x, c10.x, r1.z, r2.x +mad.f32 r2.x, c10.x, r4.z, r2.x mad.f32 r2.w, r3.x, r0.y, r2.w -mad.f32 r2.x, c11.x, r1.w, r2.x -mad.f32 r3.x, r2.y, r2.y, r3.y -mul.f r3.y, r2.y, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, c3.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r3.x, r3.x +mov.f32f32 r3.x, c3.x +mad.f32 r2.x, c11.x, r4.w, r2.x +mad.f32 r3.y, r2.y, r2.y, r3.y +mul.f r3.z, r2.y, r0.x +mad.f32 r2.w, r3.x, r0.z, r2.w +mov.f32f32 r3.x, r2.x +mad.f32 r3.y, r2.z, r2.z, r3.y mov.f32f32 r3.w, c2.y -mad.f32 r2.w, r3.z, r0.z, r2.w -mul.f r3.z, r2.x, r2.x -mul.f r5.x, c8.y, r1.x -mad.f32 r3.x, r2.z, r2.z, r3.x -mov.f32f32 r2.w, r2.w -mad.f32 r5.x, c9.y, r1.y, r5.x -mad.f32 r3.y, r3.w, r0.y, r3.y -mul.f r0.x, r2.z, r0.x -mul.f r3.w, r2.w, r2.w -mad.f32 r5.x, c10.y, r1.z, r5.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r5.y, c3.y -mad.f32 r5.x, c11.y, r1.w, r5.x -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r5.z, c2.z -mad.f32 r3.y, r5.y, r0.z, r3.y -mov.f32f32 r5.x, r5.x -mul.f r0.w, r0.w, r3.x -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.y, r3.y -mad.f32 r3.z, r5.x, r5.x, r3.z -mov.f32f32 r5.y, r0.w -mov.f32f32 r2.y, r2.y -mad.f32 r0.w, r3.y, r3.y, r3.w -mov.f32f32 r3.z, r3.z -mul.f r3.w, c8.z, r1.x -mul.f r5.w, c0.x, r5.y +mov.f32f32 r5.x, r2.w +mul.f r3.x, r3.x, r3.x +mul.f r5.y, c8.y, r4.x +mad.f32 r3.z, r3.w, r0.y, r3.z +mul.f r2.w, r2.w, r5.x +mov.f32f32 r3.w, c3.y +mad.f32 r5.y, c9.y, r4.y, r5.y +rsq r3.y, r3.y +(ss)mov.f32f32 r5.z, r3.y +mad.f32 r5.y, c10.y, r4.z, r5.y +mad.f32 r3.z, r3.w, r0.z, r3.z +mad.f32 r3.w, c11.y, r4.w, r5.y +mul.f r0.w, r0.w, r5.z +mul.f r2.y, r2.y, r5.z +mov.f32f32 r5.y, r3.z +mov.f32f32 r5.z, r3.w +mul.f r5.w, c0.x, r0.w mov.f32f32 r0.w, r0.w -mad.f32 r0.x, r5.z, r0.y, r0.x -mad.f32 r0.y, c9.z, r1.y, r3.w -mad.f32 r3.w, c0.y, r2.y, r5.w -mad.f32 r0.y, c10.z, r1.z, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, c3.z -mad.f32 r0.y, c11.z, r1.w, r0.y -mov.f32f32 r3.w, r3.w -mul.f r2.z, r2.z, r3.x -mad.f32 r0.x, r5.z, r0.z, r0.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, c4.w, r1.x -mul.f r3.x, c4.z, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r3.z, r0.y, r0.y, r3.z +mad.f32 r2.w, r3.z, r5.y, r2.w +mul.f r0.x, r2.z, r0.x +mov.f32f32 r3.z, c2.z +mad.f32 r3.x, r3.w, r5.z, r3.x +mul.f r3.w, c8.z, r4.x +mad.f32 r5.w, c0.y, r2.y, r5.w +mad.f32 r0.x, r3.z, r0.y, r0.x +mov.f32f32 r0.y, c3.z +mad.f32 r3.z, c9.z, r4.y, r3.w +mul.f r2.z, r2.z, r3.y +(ss)mad.f32 r3.y, c10.z, r4.z, r3.z +mad.f32 r0.x, r0.y, r0.z, r0.x +mad.f32 r0.y, c11.z, r4.w, r3.y +mad.f32 r3.y, c0.z, r2.z, r5.w +mov.f32f32 r0.z, r2.y +mov.f32f32 r2.y, r0.x +mov.f32f32 r3.z, r0.y +mov.f32f32 r3.w, r3.y mov.f32f32 r2.z, r2.z -mad.f32 r0.z, c5.w, r1.y, r0.z -mad.f32 r0.w, r0.x, r0.x, r0.w -mad.f32 r0.z, c6.w, r1.z, r0.z -mad.f32 r3.x, c5.z, r1.y, r3.x -mul.f r5.z, c4.y, r1.x -mul.f r1.x, c4.x, r1.x -mad.f32 r0.z, c7.w, r1.w, r0.z -mad.f32 r3.x, c6.z, r1.z, r3.x -rsq r0.w, r0.w -(ss)mov.f32f32 r5.w, r0.w -(ss)rsq r0.w, r3.z -(ss)mov.f32f32 r3.z, r0.w -mad.f32 r3.w, c0.z, r2.z, r3.w -mov.f32f32 r0.w, r0.z -mul.f r0.x, r0.x, r5.w -mul.f r0.z, r2.x, r3.z -mul.f r2.x, r2.w, r5.w -mul.f r2.w, r3.y, r5.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -mul.f r3.y, r0.x, r2.y -absneg.f r0.z, (neg)r0.z -mul.f r5.w, c0.x, r2.x -mad.f32 r3.y, r2.w, r2.z, (neg)r3.y -mad.f32 r5.w, c0.y, r2.w, r5.w -mov.f32f32 r6.x, r0.z -mul.f r0.z, r2.x, r2.z -mov.f32f32 r2.z, r3.y -mov.f32f32 r3.y, r5.w -mul.f r5.w, r6.x, r6.x -mul.f r5.x, r5.x, r3.z -mul.f r2.z, c0.x, r2.z -mad.f32 r0.z, r0.x, r5.y, (neg)r0.z -mad.f32 r0.x, c0.z, r0.x, r3.y -mov.f32f32 r3.y, r5.x -mul.f r2.w, r2.w, r5.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -absneg.f r3.y, (neg)r3.y -mad.f32 r2.x, r2.x, r2.y, (neg)r2.w -mad.f32 r0.z, c0.y, r0.z, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r3.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r3.w -mul.f r0.y, r0.y, r3.z -mad.f32 r0.z, c0.z, r2.x, r0.z -mad.f32 r2.x, r2.w, r2.w, r5.w -mul.f r2.z, r2.y, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mad.f32 r3.x, c7.z, r1.w, r3.x -mad.f32 r3.y, c5.y, r1.y, r5.z +mad.f32 r2.y, r2.y, r2.y, r2.w +mad.f32 r0.y, r0.y, r3.z, r3.x +mul.f r2.w, r3.w, r3.w +mul.f r3.x, c4.w, r4.x +mul.f r3.w, c4.z, r4.x +mul.f r5.w, c4.y, r4.x +mul.f r4.x, c4.x, r4.x +rsq r2.y, r2.y +(ss)mov.f32f32 r6.x, r2.y +rsq r0.y, r0.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r2.x, r0.y +mul.f r0.x, r0.x, r2.y +mul.f r2.x, r5.y, r6.x +mul.f r2.y, r5.z, r6.y +mul.f r5.x, r5.x, r6.x +absneg.f r5.y, (neg)r0.y +mov.f32f32 r0.y, r2.x +mov.f32f32 r5.z, r0.x +absneg.f r2.y, (neg)r2.y +mul.f r6.x, c0.x, r5.x +mov.f32f32 r5.x, r5.x +mul.f r6.z, r5.z, r0.z +mov.f32f32 r6.w, r2.y +mad.f32 r6.z, r0.y, r2.z, (neg)r6.z +mov.f32f32 r7.x, r5.y +mad.f32 r2.x, c0.y, r2.x, r6.x +mul.f r2.z, r5.x, r2.z +mul.f r6.x, c0.x, r6.z +mad.f32 r2.z, r5.z, r0.w, (neg)r2.z +mul.f r5.z, r7.x, r7.x +mad.f32 r0.x, c0.z, r0.x, r2.x +mad.f32 r2.x, r2.y, r6.w, r5.z +mad.f32 r2.y, c0.y, r2.z, r6.x +mul.f r0.y, r0.y, r0.w +mul.f r0.w, r3.z, r6.y +mad.f32 r0.y, r5.x, r0.z, (neg)r0.y +mov.f32f32 r0.z, r0.x +mad.f32 r2.z, c5.w, r4.y, r3.x +mad.f32 r3.x, c5.z, r4.y, r3.w +mad.f32 r0.y, c0.z, r0.y, r2.y +absneg.f r2.y, (neg)r0.w mov.f32f32 r3.z, r0.z -absneg.f r0.y, (neg)r0.y -mov.f32f32 r0.z, r3.x -mad.f32 r3.x, c6.y, r1.z, r3.y -mad.f32 r2.z, r3.z, r3.z, r2.z -mov.f32f32 r3.y, r0.y -mad.f32 r0.y, c7.y, r1.w, r3.x -mad.f32 r1.x, c5.x, r1.y, r1.x -mov.f32f32 r1.y, r2.z -mad.f32 r2.x, r3.y, r3.y, r2.x -mad.f32 r1.y, r0.x, r0.x, r1.y -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, c6.x, r1.z, r1.x -mov.f32f32 r1.z, (0.000000) +mad.f32 r0.z, c6.w, r4.z, r2.z +mov.f32f32 r2.z, r0.y +mov.f32f32 r5.x, r2.y +mad.f32 r0.w, c7.w, r4.w, r0.z +mad.f32 r0.z, c6.z, r4.z, r3.x +mov.f32f32 r3.x, r2.z +mad.f32 r2.y, r2.y, r5.x, r2.x +mad.f32 r0.z, c7.z, r4.w, r0.z +mad.f32 r2.x, c5.y, r4.y, r5.w +mad.f32 r0.y, r0.y, r3.x, r2.w +mad.f32 r2.x, c6.y, r4.z, r2.x +mad.f32 r0.x, r0.x, r3.z, r0.y +mad.f32 r0.y, c7.y, r4.w, r2.x +mad.f32 r4.x, c5.x, r4.y, r4.x +mov.f32f32 r3.w, (0.000000) (rpt2)nop -rsq r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -rsq r2.x, r2.x -(ss)mov.f32f32 r3.x, r2.x -mad.f32 r1.x, c7.x, r1.w, r1.x -mov.f32f32 r3.w, r1.z -mul.f r0.x, r0.x, r1.y -mul.f r1.z, r3.z, r1.y -mul.f r1.y, r2.y, r1.y -mul.f r1.w, r3.y, r3.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y -mov.f32f32 r3.y, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r1.y +rsq r0.x, r0.x +(ss)mov.f32f32 r4.y, r0.x +mul.f r2.x, r3.y, r0.x +(ss)rsq r0.x, r2.y +(ss)mov.f32f32 r5.z, r0.x +mul.f r2.w, r5.y, r0.x +mul.f r2.z, r3.z, r4.y +(ss)mul.f r2.y, r3.x, r4.y +mul.f r3.y, r5.x, r5.z +mul.f r3.x, r6.w, r5.z +mad.f32 r0.x, c6.x, r4.z, r4.x nop -mov.f32f32 r2.z, r0.x -mov.f32f32 r2.y, r1.z -(ss)mov.f32f32 r2.x, r1.y -mov.f32f32 r0.x, r3.y -mul.f r1.y, r2.w, r3.x -mul.f r1.z, r6.x, r3.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r0.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r1.x -mov.f32f32 r1.w, r4.w -mov.f32f32 r1.x, r1.y -mov.f32f32 r1.y, r1.z -(rpt1)nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.w, r1.y -mov.f32f32 r1.z, r4.z -mov.f32f32 r1.y, r4.y -mov.f32f32 r3.x, r1.x -mov.f32f32 r1.x, r4.x +mov.f32f32 r3.z, r3.y +mad.f32 r0.x, c7.x, r4.w, r0.x end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:19) r3.x (5:20) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=f,il=16,b=0) -; VERT: 182 instructions, 0 half, 7 full +; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) +; VERT: 131 instructions, 0 half, 8 full diff --git a/reference/stk/stk0600.asm b/reference/stk/stk0600.asm index ff486a7..d610a0c 100644 --- a/reference/stk/stk0600.asm +++ b/reference/stk/stk0600.asm @@ -6,39 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x -bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 1, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt4)nop -sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0 -(sy)mul.f r0.w, r0.w, r1.y -mul.f r0.z, r0.z, r1.z -mul.f r0.x, r0.y, r0.x -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt2)nop +sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)mul.f r1.z, r1.z, r2.x +mul.f r1.y, r1.y, r2.y +(ss)mul.f r1.x, r1.x, r0.x end -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 34 instructions, 0 half, 3 full +; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 14 instructions, 0 half, 3 full diff --git a/reference/stk/stk0601.asm b/reference/stk/stk0601.asm index aadaff7..5f20141 100644 --- a/reference/stk/stk0601.asm +++ b/reference/stk/stk0601.asm @@ -1,20 +1,20 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r3.x) in0 +@in(r3.y) in1 +@in(r3.z) in2 +@in(r3.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 -@in(r0.w) in8 -@in(r1.x) in9 -@in(r1.y) in10 -@in(r1.z) in11 -@in(r3.x) in12 -@in(r3.y) in13 -@in(r3.z) in14 -@in(r3.w) in15 +@in(r1.x) in8 +@in(r1.y) in9 +@in(r1.z) in10 +@in(r1.w) in11 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,135 +27,120 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r1.w, r2.x, c13.x +@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r3.x, c13.x mul.f r4.x, r0.x, c4.x -mad.f32 r1.w, c14.x, r2.y, r1.w +mad.f32 r0.w, c14.x, r3.y, r0.w mad.f32 r4.x, c4.y, r0.y, r4.x -mad.f32 r1.w, c15.x, r2.z, r1.w -mov.f32f32 r1.z, r1.z -mad.f32 r4.y, c16.x, r2.w, r1.w -mov.f32f32 r1.w, r4.x -mov.f32f32 r4.x, c10.z +mad.f32 r0.w, c15.x, r3.z, r0.w +mad.f32 r4.x, c4.z, r0.z, r4.x +mad.f32 r0.w, c16.x, r3.w, r0.w +mov.f32f32 r4.y, c10.z mov.f32f32 r4.z, c10.y -mul.f r4.w, r4.y, r4.y -mul.f r5.x, r2.x, c13.y -mad.f32 r5.y, c4.z, r0.z, r1.w -mad.f32 r1.w, c14.y, r2.y, r5.x -mov.f32f32 r1.z, r1.z -mad.f32 r1.w, c15.y, r2.z, r1.w -mul.f r5.x, r5.y, c11.x -mad.f32 r5.z, c16.y, r2.w, r1.w -mul.f r1.w, r0.x, c5.x -max.f r1.z, r1.z, c20.x -mad.f32 r4.x, c8.z, r4.x, c9.z -mad.f32 r4.w, r5.z, r5.z, r4.w -mad.f32 r5.w, c5.y, r0.y, r1.w -min.f r1.w, r1.z, c20.y -add.f r1.z, c17.z, r4.x -mov.f32f32 r4.x, r4.w -mul.f r4.w, r2.x, c13.z -mov.f32f32 r5.w, r5.w -mad.f32 r4.w, c14.z, r2.y, r4.w +mov.f32f32 r4.w, c10.x +mul.f r5.x, r0.w, r0.w +mul.f r5.y, r3.x, c13.y +mul.f r5.z, r4.x, c11.x +mad.f32 r5.y, c14.y, r3.y, r5.y +mul.f r5.w, r0.x, c5.x +mad.f32 r5.y, c15.y, r3.z, r5.y +mad.f32 r5.w, c5.y, r0.y, r5.w +mad.f32 r5.y, c16.y, r3.w, r5.y mad.f32 r5.w, c5.z, r0.z, r5.w -mad.f32 r4.w, c15.z, r2.z, r4.w +mad.f32 r4.y, c8.z, r4.y, c9.z mad.f32 r4.z, c8.y, r4.z, c9.y -mad.f32 r4.w, c16.z, r2.w, r4.w -mad.f32 r5.x, c11.y, r5.w, r5.x -mov.f32f32 r6.x, c10.x +mad.f32 r5.x, r5.y, r5.y, r5.x +mul.f r6.x, r3.x, c13.z +mad.f32 r5.z, c11.y, r5.w, r5.z +mad.f32 r6.x, c14.z, r3.y, r6.x mul.f r0.x, r0.x, c6.x -mad.f32 r4.x, r4.w, r4.w, r4.x -mov.f32f32 r5.x, r5.x +mad.f32 r6.x, c15.z, r3.z, r6.x mad.f32 r0.x, c6.y, r0.y, r0.x -add.f r0.y, c17.y, r4.z -mad.f32 r4.z, c8.x, r6.x, c9.x -mul.f r6.x, r2.x, c0.w -mul.f r6.y, r2.x, c0.z -rsq r4.x, (abs)r4.x -(ss)mov.f32f32 r4.x, r4.x -mov.f32f32 r0.x, r0.x -add.f r4.z, c17.x, r4.z +mad.f32 r0.y, c16.z, r3.w, r6.x mad.f32 r0.x, c6.z, r0.z, r0.x -mul.f r0.z, r4.y, r4.x -mul.f r4.y, r5.z, r4.x -mul.f r4.x, r4.w, r4.x -mad.f32 r4.w, c11.z, r0.x, r5.x -add.f r0.z, c11.x, (neg)r0.z -add.f r4.y, c11.y, (neg)r4.y -add.f r4.x, c11.z, (neg)r4.x -max.f r5.x, r4.w, c20.x -mul.f r5.z, r0.z, r0.z -cmps.f.lt r4.w, (neg)r4.w, c20.x -mad.f32 r5.z, r4.y, r4.y, r5.z -mov.f32f32 r5.x, r5.x -mul.f r1.x, c18.y, r1.x -mul.f r0.w, c18.x, r0.w -mov.f32f32 r5.z, r5.z -mul.f r1.y, c18.z, r1.y -mad.f32 r5.z, r4.x, r4.x, r5.z -mad.f32 r1.x, r5.x, r1.x, r0.y -mad.f32 r4.z, r5.x, r0.w, r4.z -mad.f32 r0.y, c1.w, r2.y, r6.x -mad.f32 r0.w, c1.z, r2.y, r6.y -mul.f r6.x, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -rsq r5.z, (abs)r5.z -(ss)mov.f32f32 r5.z, r5.z -mad.f32 r1.y, r5.x, r1.y, r1.z -mad.f32 r0.y, c2.w, r2.z, r0.y -mad.f32 r1.z, c2.z, r2.z, r0.w -mul.f r0.z, r0.z, r5.z -mul.f r4.y, r4.y, r5.z -mul.f r4.x, r4.x, r5.z -mad.f32 r0.w, c3.w, r2.w, r0.y -mul.f r0.y, r5.y, r0.z -mad.f32 r0.z, c3.z, r2.w, r1.z -mad.f32 r0.y, r5.w, r4.y, r0.y -mad.f32 r1.z, c1.y, r2.y, r6.x -mad.f32 r2.x, c1.x, r2.y, r2.x -mov.f32f32 r2.y, c7.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c2.y, r2.z, r1.z -mad.f32 r0.x, r0.x, r4.x, r0.y -mad.f32 r0.y, c3.y, r2.w, r1.z -mad.f32 r1.z, c2.x, r2.z, r2.x -min.f r4.x, r2.y, c20.z -max.f r4.y, r0.x, c20.x -mad.f32 r0.x, c3.x, r2.w, r1.z -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x -nop -log2 r1.z, r4.y -(ss)mov.f32f32 r1.z, r1.z -(rpt2)nop -mul.f r1.z, r4.x, r1.z +add.f r0.z, c17.z, r4.y +add.f r4.y, c17.y, r4.z +mad.f32 r4.z, r0.y, r0.y, r5.x +mad.f32 r5.x, c11.z, r0.x, r5.z +mad.f32 r4.w, c8.x, r4.w, c9.x +mul.f r5.z, r3.x, c0.w +mul.f r6.x, r3.x, c0.z +mul.f r6.y, r3.x, c0.y +mul.f r3.x, r3.x, c0.x +rsq r4.z, (abs)r4.z +(ss)mov.f32f32 r6.z, r4.z +mul.f r0.y, r0.y, r4.z +(ss)max.f r4.z, r5.x, c20.x +add.f r4.w, c17.x, r4.w +mul.f r0.w, r0.w, r6.z +mul.f r5.y, r5.y, r6.z +(rpt1)nop +add.f r0.w, c11.x, (neg)r0.w +add.f r5.y, c11.y, (neg)r5.y +add.f r0.y, c11.z, (neg)r0.y +mov.f32f32 r6.z, r4.z +mul.f r6.w, r0.w, r0.w +mul.f r1.z, c18.z, r1.z +mad.f32 r6.w, r5.y, r5.y, r6.w +mul.f r1.y, c18.y, r1.y +mad.f32 r6.w, r0.y, r0.y, r6.w +mad.f32 r1.z, r6.z, r1.z, r0.z +mul.f r0.z, c18.x, r1.x +cmps.f.lt r1.x, (neg)r5.x, c20.x +mad.f32 r5.x, c1.w, r3.y, r5.z +mad.f32 r5.z, c1.z, r3.y, r6.x +mad.f32 r6.x, c1.y, r3.y, r6.y +rsq r6.y, (abs)r6.w +(ss)mov.f32f32 r6.w, r6.y +mul.f r0.y, r0.y, r6.y +mad.f32 r1.y, r6.z, r1.y, r4.y +mad.f32 r4.y, r4.z, r0.z, r4.w +mul.f r0.z, r0.w, r6.w +mul.f r4.z, r5.y, r6.w +mad.f32 r0.w, c2.w, r3.z, r5.x +mad.f32 r4.w, c2.z, r3.z, r5.z +mul.f r0.z, r4.x, r0.z +mad.f32 r0.w, c3.w, r3.w, r0.w +mad.f32 r4.x, r5.w, r4.z, r0.z +mad.f32 r0.z, c3.z, r3.w, r4.w +mad.f32 r0.x, r0.x, r0.y, r4.x +mad.f32 r0.y, c2.y, r3.z, r6.x +mad.f32 r3.x, c1.x, r3.y, r3.x +max.f r1.w, r1.w, c20.x +max.f r0.x, r0.x, c20.x +mad.f32 r0.y, c3.y, r3.w, r0.y +mad.f32 r3.x, c2.x, r3.z, r3.x +min.f r1.w, r1.w, c20.y +mov.f32f32 r3.y, c7.x +(rpt1)nop +log2 r3.z, r0.x +(ss)mad.f32 r0.x, c3.x, r3.w, r3.x +min.f r3.x, r3.y, c20.z (rpt2)nop -mov.f32f32 r1.z, r1.z +(ss)mul.f r3.x, r3.x, r3.z (rpt5)nop -exp2 r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -(rpt2)nop -sel.b32 r1.z, r1.z, r4.w, c20.x -(rpt2)nop -mov.f32f32 r1.z, r1.z -(rpt2)nop -mov.f32f32 r1.z, r1.z +exp2 r3.x, r3.x +(ss)sel.b32 r1.x, r3.x, r1.x, c20.x (rpt2)nop -mad.f32 r1.y, c19.z, r1.z, r1.y -mad.f32 r1.x, c19.y, r1.z, r1.x -mad.f32 r1.z, c19.x, r1.z, r4.z -nop -max.f r1.y, r1.y, c20.x +(ss)mov.f32f32 r3.x, r1.x +mad.f32 r1.x, c19.x, r1.x, r4.y +(rpt1)nop +mov.f32f32 r3.x, r3.x max.f r1.x, r1.x, c20.x -max.f r3.x, r1.z, c20.x +(rpt1)nop +mad.f32 r1.z, c19.z, r3.x, r1.z +mad.f32 r1.y, c19.y, r3.x, r1.y +min.f r1.x, r1.x, c20.y nop -min.f r1.z, r1.y, c20.y -min.f r1.y, r1.x, c20.y -min.f r1.x, r3.x, c20.y +max.f r1.z, r1.z, c20.x +max.f r1.y, r1.y, c20.x +(rpt1)nop +min.f r1.z, r1.z, c20.y +min.f r1.y, r1.y, c20.y end nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) -; VERT: 144 instructions, 0 half, 7 full +; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) +; VERT: 124 instructions, 0 half, 7 full diff --git a/reference/stk/stk0700.asm b/reference/stk/stk0700.asm index ff486a7..d610a0c 100644 --- a/reference/stk/stk0700.asm +++ b/reference/stk/stk0700.asm @@ -6,39 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x -bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -bary.f r1.z, 1, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x +bary.f r2.x, 2, r0.x +bary.f r2.y, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt4)nop -sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0 -(sy)mul.f r0.w, r0.w, r1.y -mul.f r0.z, r0.z, r1.z -mul.f r0.x, r0.y, r0.x -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt2)nop +sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0 +(sy)mul.f r1.z, r1.z, r2.x +mul.f r1.y, r1.y, r2.y +(ss)mul.f r1.x, r1.x, r0.x end -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 34 instructions, 0 half, 3 full +; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 14 instructions, 0 half, 3 full diff --git a/reference/test.asm b/reference/test.asm index bb0a0f9..9242510 100644 --- a/reference/test.asm +++ b/reference/test.asm @@ -7,31 +7,25 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 +@const(c1.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.s r0.z, hr0.x, 2 -bary.f r0.w, 2, r0.x -bary.f r1.x, 1, r0.x -bary.f (ei)r0.x, 3, r0.x -add.s r0.y, r0.z, 1 -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r1.x -nop -cov.s32f32 r0.y, r0.y -mov.f32f32 r1.w, r0.x -(rpt1)nop -mov.f32f32 r0.x, r0.y +bary.f r1.z, 2, r0.x +bary.f r1.y, 1, r0.x +bary.f (ei)r1.w, 3, r0.x +add.s r0.x, r0.z, 1 +(rpt2)nop +cov.s32f32 r0.x, r0.x (rpt2)nop max.f r0.x, r0.x, c0.y (rpt2)nop min.f r0.x, r0.x, c1.x (rpt2)nop -mul.f r0.x, c0.x, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x +mul.f r1.x, c0.x, r0.x end nop nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r63.y (7:0,cm=f,il=8,b=0) r63.y (1:0,cm=f,il=8,b=1) -; FRAG: 30 instructions, 1 half, 2 full +; FRAG: inputs: r63.y (7:0,cm=f,il=8,b=0) r1.x (1:0,cm=f,il=8,b=1) +; FRAG: 22 instructions, 1 half, 2 full diff --git a/reference/test0.asm b/reference/test0.asm index 74062e3..d96960a 100644 --- a/reference/test0.asm +++ b/reference/test0.asm @@ -6,39 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r1.y, r2.z -mul.f r0.w, r2.y, r1.z -mul.f r0.z, r2.x, r0.z -mul.f r0.x, r1.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.y, r1.x +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 35 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/test1.asm b/reference/test1.asm index 242def2..693d844 100644 --- a/reference/test1.asm +++ b/reference/test1.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @@ -11,10 +11,10 @@ @in(r3.y) in9 @in(r3.z) in10 @in(r3.w) in11 -@in(r4.x) in12 -@in(r4.y) in13 -@in(r4.z) in14 -@in(r4.w) in15 +@in(r2.x) in12 +@in(r2.y) in13 +@in(r2.z) in14 +@in(r2.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -27,135 +27,120 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r0.w, r2.x, c12.x -mul.f r1.x, r0.x, c4.x -mad.f32 r0.w, c13.x, r2.y, r0.w -mad.f32 r1.x, c4.y, r0.y, r1.x -mad.f32 r0.w, c14.x, r2.z, r0.w -mov.f32f32 r1.y, r3.w -mad.f32 r0.w, c15.x, r2.w, r0.w -mov.f32f32 r1.x, r1.x -mul.f r1.z, r0.x, c5.x +@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.w, r1.x, c12.x +mul.f r4.x, r0.x, c4.x +mad.f32 r0.w, c13.x, r1.y, r0.w +mad.f32 r4.x, c4.y, r0.y, r4.x +mad.f32 r0.w, c14.x, r1.z, r0.w +mad.f32 r4.x, c4.z, r0.z, r4.x +mad.f32 r4.y, c15.x, r1.w, r0.w +mul.f r0.w, r1.x, c12.y +mul.f r4.z, r1.x, c12.z +mul.f r4.w, r1.x, c0.w +mul.f r5.x, r4.y, r4.y +mad.f32 r0.w, c13.y, r1.y, r0.w +mul.f r5.y, r4.x, c10.x +mad.f32 r0.w, c14.y, r1.z, r0.w +mul.f r5.z, r0.x, c5.x +mad.f32 r5.w, c15.y, r1.w, r0.w +mad.f32 r0.w, c5.y, r0.y, r5.z +mad.f32 r4.z, c13.z, r1.y, r4.z +mad.f32 r4.w, c1.w, r1.y, r4.w +mad.f32 r5.x, r5.w, r5.w, r5.x +mad.f32 r4.z, c14.z, r1.z, r4.z +mad.f32 r5.z, c5.z, r0.z, r0.w +mad.f32 r4.z, c15.z, r1.w, r4.z +mad.f32 r0.w, c2.w, r1.z, r4.w +mul.f r4.w, r1.x, c0.z +mul.f r6.x, r1.x, c0.y +mad.f32 r5.x, r4.z, r4.z, r5.x +mad.f32 r5.y, c10.y, r5.z, r5.y mul.f r0.x, r0.x, c6.x -mul.f r1.w, r0.w, r0.w -mul.f r3.w, r2.x, c12.y -mad.f32 r1.x, c4.z, r0.z, r1.x -mad.f32 r3.w, c13.y, r2.y, r3.w -mov.f32f32 r1.y, r1.y -mad.f32 r3.w, c14.y, r2.z, r3.w -mul.f r5.x, r1.x, c10.x -mad.f32 r3.w, c15.y, r2.w, r3.w -mad.f32 r1.z, c5.y, r0.y, r1.z -max.f r1.y, r1.y, c19.x +mad.f32 r0.w, c3.w, r1.w, r0.w +mad.f32 r4.w, c1.z, r1.y, r4.w +mad.f32 r6.x, c1.y, r1.y, r6.x +mul.f r1.x, r1.x, c0.x +rsq r5.x, (abs)r5.x +(ss)mov.f32f32 r6.y, r5.x +mul.f r4.z, r4.z, r5.x mad.f32 r0.x, c6.y, r0.y, r0.x -mad.f32 r0.y, r3.w, r3.w, r1.w -mov.f32f32 r1.z, r1.z -min.f r1.w, r1.y, c19.y -mad.f32 r1.y, c5.z, r0.z, r1.z -mov.f32f32 r0.y, r0.y -mul.f r1.z, r2.x, c12.z -mov.f32f32 r0.x, r0.x -mad.f32 r1.z, c13.z, r2.y, r1.z -mad.f32 r5.x, c10.y, r1.y, r5.x -mad.f32 r1.z, c14.z, r2.z, r1.z +mad.f32 r0.y, c2.z, r1.z, r4.w +mul.f r4.y, r4.y, r6.y +mul.f r4.w, r5.w, r6.y +(rpt1)nop +add.f r4.y, c10.x, (neg)r4.y +add.f r4.w, c10.y, (neg)r4.w +add.f r4.z, c10.z, (neg)r4.z mad.f32 r0.x, c6.z, r0.z, r0.x -mad.f32 r0.z, c15.z, r2.w, r1.z -mov.f32f32 r1.z, r5.x -mul.f r5.x, r2.x, c0.w -mul.f r5.y, r2.x, c0.z -mad.f32 r0.y, r0.z, r0.z, r0.y -mad.f32 r1.z, c10.z, r0.x, r1.z -mad.f32 r5.x, c1.w, r2.y, r5.x -mad.f32 r5.y, c1.z, r2.y, r5.y -mul.f r5.z, r2.x, c0.y -mul.f r2.x, r2.x, c0.x -mul.f r5.w, c16.z, r3.z +(ss)mul.f r5.x, r4.y, r4.y +mad.f32 r0.z, c3.z, r1.w, r0.y +mad.f32 r0.y, r4.w, r4.w, r5.x +mad.f32 r5.x, c10.z, r0.x, r5.y +mad.f32 r0.y, r4.z, r4.z, r0.y +mad.f32 r5.y, c2.y, r1.z, r6.x +mad.f32 r1.x, c1.x, r1.y, r1.x +max.f r1.y, r3.w, c19.x +mul.f r3.w, c16.z, r3.z +mul.f r5.w, c16.y, r3.y +mul.f r6.x, c16.x, r3.x rsq r0.y, (abs)r0.y -(ss)mov.f32f32 r0.y, r0.y -max.f r6.x, r1.z, c19.x -cmps.f.lt r1.z, (neg)r1.z, c19.x -mad.f32 r5.x, c2.w, r2.z, r5.x -mul.f r0.w, r0.w, r0.y -mul.f r3.w, r3.w, r0.y -mul.f r0.y, r0.z, r0.y -mov.f32f32 r6.x, r6.x -add.f r0.z, c10.x, (neg)r0.w -add.f r3.w, c10.y, (neg)r3.w -add.f r0.y, c10.z, (neg)r0.y -mad.f32 r0.w, c8.z, r3.z, c9.z -mul.f r6.y, r0.z, r0.z -mul.f r6.z, c16.y, r3.y -mad.f32 r6.y, r3.w, r3.w, r6.y -add.f r5.w, r5.w, r0.w -mad.f32 r0.w, c8.y, r3.y, c9.y -mul.f r6.w, c16.x, r3.x -mov.f32f32 r6.y, r6.y +(ss)mov.f32f32 r6.y, r0.y +(ss)mul.f r0.y, r4.z, r0.y +max.f r4.z, r5.x, c19.x +mad.f32 r6.z, c8.x, r3.x, c9.x +mul.f r4.y, r4.y, r6.y +mul.f r4.w, r4.w, r6.y +mov.f32f32 r6.y, r4.z +mad.f32 r6.w, c8.y, r3.y, c9.y +mul.f r4.x, r4.x, r4.y +mad.f32 r4.y, c8.z, r3.z, c9.z +mad.f32 r4.x, r5.z, r4.w, r4.x +add.f r4.w, r5.w, r6.w +mad.f32 r0.x, r0.x, r0.y, r4.x +add.f r3.w, r3.w, r4.y +mul.f r0.y, c17.y, r3.y +add.f r3.y, r6.x, r6.z +max.f r0.x, r0.x, c19.x mul.f r3.z, c17.z, r3.z -mad.f32 r6.y, r0.y, r0.y, r6.y -add.f r6.z, r6.z, r0.w -mad.f32 r7.x, c8.x, r3.x, c9.x -mad.f32 r0.w, c3.w, r2.w, r5.x -mad.f32 r5.x, c2.z, r2.z, r5.y -mad.f32 r5.y, c1.y, r2.y, r5.z -mad.f32 r2.x, c1.x, r2.y, r2.x -rsq r2.y, (abs)r6.y -(ss)mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r6.x, r3.z, r5.w -mul.f r3.y, c17.y, r3.y -add.f r5.z, r6.w, r7.x -mul.f r0.z, r0.z, r2.y -mul.f r3.w, r3.w, r2.y -mul.f r0.y, r0.y, r2.y -mad.f32 r3.y, r6.x, r3.y, r6.z -mul.f r0.z, r1.x, r0.z -mul.f r1.x, c17.x, r3.x -mad.f32 r1.y, r1.y, r3.w, r0.z -mad.f32 r0.z, c3.z, r2.w, r5.x -mad.f32 r2.y, c2.y, r2.z, r5.y -mad.f32 r2.x, c2.x, r2.z, r2.x -mov.f32f32 r1.y, r1.y -mad.f32 r1.x, r6.x, r1.x, r5.z -mad.f32 r1.y, r0.x, r0.y, r1.y -mad.f32 r0.y, c3.y, r2.w, r2.y -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r3.x, c7.x -max.f r1.y, r1.y, c19.x -mov.f32f32 r2.w, r4.w -mov.f32f32 r2.z, r4.z -mov.f32f32 r2.y, r4.y -mov.f32f32 r2.x, r4.x +mad.f32 r4.x, r6.y, r0.y, r4.w +mul.f r3.x, c17.x, r3.x +cmps.f.lt r4.y, (neg)r5.x, c19.x +mad.f32 r0.y, c3.y, r1.w, r5.y +mad.f32 r1.x, c2.x, r1.z, r1.x +log2 r1.z, r0.x +mov.f32f32 r4.w, c7.x +mad.f32 r3.z, r6.y, r3.z, r3.w +mad.f32 r3.x, r4.z, r3.x, r3.y +(ss)mad.f32 r0.x, c3.x, r1.w, r1.x +min.f r1.x, r4.w, c19.z +min.f r1.w, r1.y, c19.y (rpt1)nop -log2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -min.f r3.x, r3.x, c19.z -(rpt2)nop -mul.f r1.y, r3.x, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +(ss)mul.f r1.x, r1.x, r1.z (rpt5)nop -exp2 r1.y, r1.y -(ss)mov.f32f32 r1.y, r1.y -(rpt2)nop -sel.b32 r1.y, r1.y, r1.z, c19.x -(rpt2)nop -mov.f32f32 r1.y, r1.y +exp2 r1.x, r1.x +(ss)sel.b32 r1.x, r1.x, r4.y, c19.x (rpt2)nop +mov.f32f32 r1.y, r1.x +mad.f32 r1.x, c18.x, r1.x, r3.x +(rpt1)nop mov.f32f32 r1.y, r1.y -(rpt2)nop -mad.f32 r1.z, c18.z, r1.y, r3.z -mad.f32 r3.x, c18.y, r1.y, r3.y -mad.f32 r1.x, c18.x, r1.y, r1.x -nop -max.f r1.y, r1.z, c19.x -max.f r3.x, r3.x, c19.x max.f r1.x, r1.x, c19.x -nop -min.f r1.z, r1.y, c19.y -min.f r1.y, r3.x, c19.y +(rpt1)nop +mad.f32 r1.z, c18.z, r1.y, r3.z +mad.f32 r1.y, c18.y, r1.y, r4.x min.f r1.x, r1.x, c19.y +nop +max.f r1.z, r1.z, c19.x +max.f r1.y, r1.y, c19.x +(rpt1)nop +min.f r1.z, r1.z, c19.y +min.f r1.y, r1.y, c19.y end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0) -; VERT: 145 instructions, 0 half, 8 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) +; VERT: 121 instructions, 0 half, 7 full diff --git a/reference/test2.asm b/reference/test2.asm index d1549f6..ce47110 100644 --- a/reference/test2.asm +++ b/reference/test2.asm @@ -32,99 +32,97 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 +@const(c25.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000 +@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.f r0.x, r1.x, c12.x -mul.f r0.y, r4.x, c4.x +mul.f r0.y, r1.x, c12.y mad.f32 r0.x, c13.x, r1.y, r0.x -mad.f32 r0.y, c4.y, r4.y, r0.y +mad.f32 r0.y, c13.y, r1.y, r0.y mad.f32 r0.x, c14.x, r1.z, r0.x -mul.f r0.z, r4.x, c5.x -mad.f32 r0.x, c15.x, r1.w, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c5.y, r4.y, r0.z -mul.f r0.w, r4.x, c6.x -mul.f r3.x, r0.x, r0.x -mul.f r3.y, r1.x, c12.y -mad.f32 r6.x, c4.z, r4.z, r0.y -mad.f32 r0.y, c13.y, r1.y, r3.y -mov.f32f32 r0.z, r0.z mad.f32 r0.y, c14.y, r1.z, r0.y -mad.f32 r6.y, c5.z, r4.z, r0.z -mad.f32 r3.y, c15.y, r1.w, r0.y -mad.f32 r0.y, c6.y, r4.y, r0.w +mad.f32 r0.x, c15.x, r1.w, r0.x +mad.f32 r0.y, c15.y, r1.w, r0.y mul.f r0.z, r1.x, c12.z mul.f r0.w, r1.x, c0.w -mad.f32 r3.x, r3.y, r3.y, r3.x -mov.f32f32 r0.y, r0.y +mul.f r3.x, r0.x, r0.x mad.f32 r0.z, c13.z, r1.y, r0.z -mad.f32 r0.w, c1.w, r1.y, r0.w -mov.f32f32 r3.x, r3.x +mad.f32 r3.x, r0.y, r0.y, r3.x mad.f32 r0.z, c14.z, r1.z, r0.z -mad.f32 r6.z, c6.z, r4.z, r0.y -mad.f32 r3.z, c15.z, r1.w, r0.z -mad.f32 r0.y, c2.w, r1.z, r0.w -mul.f r0.z, r1.x, c0.z -mul.f r3.w, r1.x, c0.y -mad.f32 r3.x, r3.z, r3.z, r3.x -mad.f32 r0.w, c3.w, r1.w, r0.y -mad.f32 r0.y, c1.z, r1.y, r0.z -mad.f32 r0.z, c1.y, r1.y, r3.w +mad.f32 r0.w, c1.w, r1.y, r0.w +mad.f32 r0.z, c15.z, r1.w, r0.z +mad.f32 r0.w, c2.w, r1.z, r0.w +mul.f r3.y, r1.x, c0.z +mul.f r3.z, r1.x, c0.y +mad.f32 r3.x, r0.z, r0.z, r3.x +mad.f32 r0.w, c3.w, r1.w, r0.w +mad.f32 r3.y, c1.z, r1.y, r3.y +mad.f32 r3.z, c1.y, r1.y, r3.z mul.f r3.w, r1.x, c0.x -mad.f32 r0.y, c2.z, r1.z, r0.y -mad.f32 r6.w, c2.y, r1.z, r0.z -rsq r0.z, (abs)r3.x -(ss)mov.f32f32 r3.x, r0.z -mad.f32 r0.z, c3.z, r1.w, r0.y -mad.f32 r0.y, c3.y, r1.w, r6.w -mad.f32 r3.w, c1.x, r1.y, r3.w -mul.f r6.w, r0.x, r3.x -mul.f r7.x, r3.y, r3.x -mul.f r7.y, r3.z, r3.x -mad.f32 r0.x, c2.x, r1.z, r3.w -mul.f r3.x, r6.x, r6.w +mul.f r6.x, r4.x, c4.x +mul.f r6.y, r4.x, c5.x +rsq r3.x, (abs)r3.x +(ss)mov.f32f32 r6.z, r3.x +mul.f r6.w, r0.z, r3.x +mad.f32 r0.z, c2.z, r1.z, r3.y +(ss)mad.f32 r3.x, c2.y, r1.z, r3.z +mul.f r7.x, r0.x, r6.z +mad.f32 r0.x, c4.y, r4.y, r6.x +mul.f r6.x, r0.y, r6.z +mad.f32 r6.z, c4.z, r4.z, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.z +mad.f32 r0.y, c3.y, r1.w, r3.x +mad.f32 r0.x, c1.x, r1.y, r3.w +mul.f r3.x, r6.z, r7.x +mad.f32 r3.y, c5.y, r4.y, r6.y +mad.f32 r0.x, c2.x, r1.z, r0.x +mad.f32 r6.y, c5.z, r4.z, r3.y mad.f32 r0.x, c3.x, r1.w, r0.x -mad.f32 r3.x, r6.y, r7.x, r3.x +mul.f r3.y, r4.x, c6.x add.f r3.w, r2.w, r5.w +mad.f32 r5.w, r6.y, r6.x, r3.x +mad.f32 r2.w, c6.y, r4.y, r3.y add.f r3.z, r2.z, r5.z add.f r3.y, r2.y, r5.y -mov.f32f32 r2.y, r3.x +mov.f32f32 r2.y, r5.w +mad.f32 r5.y, c6.z, r4.z, r2.w add.f r3.x, r2.x, r5.x -mad.f32 r2.x, r6.z, r7.y, r2.y -mad.f32 r2.y, r6.z, r7.y, r2.y add.f r2.w, r1.w, r4.w add.f r2.z, r1.z, r4.z -mad.f32 r1.z, (neg)r2.x, r6.x, r6.w -mad.f32 r1.w, (neg)r2.y, r6.y, r7.x +mad.f32 r1.z, r5.y, r6.w, r2.y +mad.f32 r1.w, r5.y, r6.w, r5.w add.f r2.y, r1.y, r4.y add.f r2.x, r1.x, r4.x -add.f r1.x, r1.z, c25.y -add.f r1.y, r1.w, c25.y +mad.f32 r1.x, (neg)r1.z, r6.z, r7.x +mad.f32 r1.y, (neg)r1.w, r6.y, r6.x mov.f32f32 r4.x, r4.z mov.f32f32 r4.y, r4.w -mul.f r1.x, r1.x, c25.x -mul.f r1.y, r1.y, c25.x +add.f r1.z, r1.x, c25.y +add.f r1.w, r1.y, c25.y +(rpt1)nop +mul.f r1.z, r1.z, c25.x +mul.f r1.w, r1.w, c25.x (rpt1)nop -mad.f32 r1.x, r1.z, r1.x, c25.x -mad.f32 r1.y, r1.w, r1.y, c25.x +mad.f32 r1.x, r1.x, r1.z, c25.x +mad.f32 r1.y, r1.y, r1.w, c25.x (rpt1)nop mul.f r1.z, r1.x, c21.w mul.f r1.w, r1.x, c21.z mad.f32 r1.z, c22.w, r1.y, r1.z mad.f32 r1.w, c22.z, r1.y, r1.w mad.f32 r1.z, c23.w, r4.x, r1.z -mad.f32 r4.z, c23.z, r4.x, r1.w +mad.f32 r5.x, c23.z, r4.x, r1.w mad.f32 r1.w, c24.w, r4.y, r1.z -mad.f32 r1.z, c24.z, r4.y, r4.z -mul.f r4.z, r1.x, c21.y +mad.f32 r1.z, c24.z, r4.y, r5.x +mul.f r5.x, r1.x, c21.y mul.f r1.x, r1.x, c21.x -mad.f32 r4.z, c22.y, r1.y, r4.z +mad.f32 r5.x, c22.y, r1.y, r5.x mad.f32 r1.x, c22.x, r1.y, r1.x -mad.f32 r1.y, c23.y, r4.x, r4.z -mad.f32 r1.x, c23.x, r4.x, r1.x +mad.f32 r1.y, c23.y, r4.x, r5.x +mad.f32 r1.x, c23.x, r4.z, r1.x mad.f32 r1.y, c24.y, r4.y, r1.y -mad.f32 r1.x, c24.x, r4.y, r1.x +mad.f32 r1.x, c24.x, r4.w, r1.x end -nop ; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:0) r3.x (5:0) ; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r4.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) r5.x (0:0,cm=f,il=20,b=0) -; VERT: 93 instructions, 0 half, 8 full +; VERT: 91 instructions, 0 half, 8 full diff --git a/reference/test3.asm b/reference/test3.asm index f75c5c1..fee7419 100644 --- a/reference/test3.asm +++ b/reference/test3.asm @@ -9,29 +9,26 @@ @out(r0.y) out1 @out(r0.z) out2 @out(r0.w) out3 -(sy)(ss)mul.f r1.w, r0.x, c12.x -mul.f r2.x, r0.x, c0.x -mul.f r0.y, r0.w, c6.x +@const(c25.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000 +@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.y, r0.x, c12.x +mul.f r1.w, r0.x, c0.x +mul.f r0.z, r0.w, c6.x mul.f r0.x, r0.x, c0.y -mul.f r0.w, r1.w, r1.w -mul.f r0.z, r2.x, c10.x -mad.f32 r1.x, c6.y, r1.x, r0.y -mad.f32 r0.x, c10.y, r0.x, r0.z -mov.f32f32 r1.z, r1.z +mul.f r0.w, r0.y, r0.y +mul.f r2.x, r1.w, c10.x +mad.f32 r0.z, c6.y, r1.x, r0.z +(rpt3)nop +rsq r0.w, (abs)r0.w +(ss)mul.f r0.w, r0.y, r0.w +mad.f32 r1.x, c10.y, r0.x, r2.x +mad.f32 r1.y, c6.z, r1.y, r0.z mov.f32f32 r0.z, c25.x mov.f32f32 r0.y, c25.x -rsq r0.w, (abs)r0.w -(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r2.y, r0.x -(rpt1)nop -mul.f r0.w, r1.w, r0.w -mov.f32f32 r1.x, r1.x mov.f32f32 r0.x, c25.x -mad.f32 r1.x, c6.z, r1.y, r1.x +mad.f32 r1.x, c10.z, r1.y, r1.x (rpt2)nop -mad.f32 r1.x, c10.z, r1.x, r2.y -(rpt2)nop -mad.f32 r0.w, (neg)r1.x, r2.x, r0.w +mad.f32 r0.w, (neg)r1.x, r1.w, r0.w (rpt2)nop add.f r1.x, r0.w, c25.y (rpt2)nop @@ -45,7 +42,8 @@ mad.f32 r0.w, c24.w, r1.z, r0.w end nop nop +nop ; VERT: outputs: r0.x (0:0) ; VERT: inputs: r0.x (0:0,cm=1,il=8,b=0) r0.w (0:0,cm=f,il=12,b=0) r63.w (0:0,cm=0,il=16,b=0) r63.w (0:0,cm=0,il=20,b=0) -; VERT: 47 instructions, 0 half, 3 full +; VERT: 42 instructions, 0 half, 3 full diff --git a/reference/testN.asm b/reference/testN.asm index 4241e5c..5c49d2b 100644 --- a/reference/testN.asm +++ b/reference/testN.asm @@ -1,73 +1,48 @@ ; options: -; VERT: new compiler +; VERT: TGSI compiler @in(r0.x) in0 @in(r0.y) in1 @in(r0.z) in2 -@out(r0.x) out0 -@out(r0.y) out1 -@out(r0.z) out2 -@out(r0.w) out3 -@out(r1.x) out4 -@out(r1.y) out5 -@out(r1.z) out6 -@out(r1.w) out7 +; in3 unused +@out(r2.y) out0 +@out(r2.z) out1 +@out(r2.w) out2 +@out(r3.x) out3 +@out(r1.y) out4 +@out(r1.z) out5 +@out(r1.w) out6 +@out(r2.x) out7 +@const(c4.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mad.f32 r0.w, c4.x, r0.x, c4.x -mov.f32f32 r1.x, r0.y +mad.f32 r1.x, c4.x, r0.y, c4.x +mov.f32f32 r2.x, (0.000000) +mov.f32f32 r1.w, r0.y mov.f32f32 r1.y, r0.x -mad.f32 r1.z, c4.x, r0.y, c4.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.z -mov.f32f32 r2.x, r0.w -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r2.y, r1.w -mov.f32f32 r0.w, (0.000000) -(rpt1)nop -mul.f r1.y, c0.w, r0.x +mul.f r2.y, c0.w, r0.x mul.f r2.z, c0.z, r0.x mul.f r2.w, c0.y, r0.x -sam (f32)(x)r3.x, r2.x, s#0, t#0 -(sy)add.f r0.z, r0.z, r3.x -mov.f32f32 r1.w, r0.w +sam (f32)(x)r0.w, r0.w, s#0, t#0 +(sy)add.f r1.z, r0.z, r0.w mul.f r0.x, c0.x, r0.x -nop -(ss)mov.f32f32 r2.x, r0.z -(rpt2)nop -mad.f32 r0.z, c1.w, r2.x, r1.y -mad.f32 r0.w, c1.z, r2.x, r2.z -mad.f32 r1.y, c1.y, r2.x, r2.w -mad.f32 r0.x, c1.x, r2.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w +(rpt1)nop +mad.f32 r0.z, c1.w, r1.z, r2.y +(ss)mad.f32 r0.w, c1.z, r1.z, r2.z mad.f32 r0.z, c2.w, r0.y, r0.z mad.f32 r0.w, c2.z, r0.y, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r0.x -add.f r0.z, r0.z, c3.w -add.f r2.y, r0.w, c3.z -mad.f32 r1.y, c2.y, r0.y, r1.y +mad.f32 r1.x, c1.y, r1.z, r2.w +mad.f32 r0.x, c1.x, r1.z, r0.x +add.f r3.x, r0.z, c3.w +add.f r2.w, r0.w, c3.z +mad.f32 r0.z, c2.y, r0.y, r1.x mad.f32 r0.x, c2.x, r0.y, r0.x -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r2.y -add.f r0.y, r1.y, c3.y -add.f r0.x, r0.x, c3.x -mov.f32f32 r1.y, r2.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y -(rpt2)nop -mov.f32f32 r1.y, r1.y +(rpt1)nop +add.f r2.z, r0.z, c3.y +add.f r2.y, r0.x, c3.x end -; VERT: outputs: r0.x (0:0) r1.x (5:20) +nop +nop +; VERT: outputs: r2.y (0:0) r1.y (5:20) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) -; VERT: 63 instructions, 0 half, 4 full +; VERT: 28 instructions, 0 half, 4 full +; pos: r2.y diff --git a/reference/tex-clamp0.asm b/reference/tex-clamp0.asm index 74062e3..d96960a 100644 --- a/reference/tex-clamp0.asm +++ b/reference/tex-clamp0.asm @@ -6,39 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r1.y, r2.z -mul.f r0.w, r2.y, r1.z -mul.f r0.z, r2.x, r0.z -mul.f r0.x, r1.w, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.y, r1.x +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 35 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/tex-clamp1.asm b/reference/tex-clamp1.asm index 91aaa13..a0c4c2c 100644 --- a/reference/tex-clamp1.asm +++ b/reference/tex-clamp1.asm @@ -6,39 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 7, r0.x bary.f r0.w, 4, r0.x mov.f32f32 r1.x, (0.000000) bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r1.x -bary.f r1.x, 2, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w -bary.f r0.z, 1, r0.x +bary.f r1.z, 2, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.z, r1.z, s#0, t#0 -(sy)mul.f r0.y, r1.y, r2.y -mul.f r0.w, r2.x, r1.x -mul.f r0.z, r1.w, r0.z -mul.f r0.x, r1.z, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.y, r1.x +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r63.w (5:0,cm=f,il=12,b=1) -; FRAG: 34 instructions, 0 half, 3 full +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/twoside-frag.asm b/reference/twoside-frag.asm index 0272f05..19cf964 100644 --- a/reference/twoside-frag.asm +++ b/reference/twoside-frag.asm @@ -7,6 +7,8 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x3f800000, 0x3f4ccccd, 0x00000000, 0x00000000 +@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mul.s r0.z, hr0.x, 2 absneg.f r0.w, (neg)c0.x bary.f r1.x, 15, r0.x @@ -19,71 +21,51 @@ cov.s32f32 r0.z, r0.z bary.f r2.x, 6, r0.x bary.f r2.y, 14, r0.x bary.f r2.z, 2, r0.x -mov.f32f32 r2.w, r0.z +max.f r2.w, r0.z, c1.z cmps.f.lt r3.x, r0.z, c1.z cmps.f.lt r3.y, r0.z, c1.z cmps.f.lt r3.z, r0.z, c1.z -max.f r2.w, r2.w, c1.z +min.f r2.w, r2.w, c1.x sel.b32 r1.x, r1.x, r3.x, r1.y -sel.b32 r1.y, r1.w, r3.y, r1.z -sel.b32 r1.z, r2.y, r3.z, r2.x -min.f r1.w, r2.w, c1.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z -add.f r0.w, r0.w, r1.w +sel.b32 r1.y, r2.y, r3.y, r2.x +bary.f r2.x, 13, r0.x +add.f r0.w, r0.w, r2.w mul.f r1.x, c1.y, r1.x -mov.f32f32 r1.y, r1.y -mul.f r1.z, c1.y, r1.z -mov.f32f32 r0.w, r0.w -cmps.f.lt r1.w, r0.z, c1.z -cmps.f.lt r2.x, r0.z, c1.z -cmps.f.lt r2.y, r0.z, c1.z +mul.f r1.y, c1.y, r1.y +bary.f r2.y, 5, r0.x max.f r0.w, r0.w, c1.z -bary.f r2.w, 10, r0.x -bary.f r3.x, 13, r0.x -bary.f r3.y, 9, r0.x +cmps.f.lt r2.w, r0.z, c1.z +cmps.f.lt r3.x, r0.z, c1.z +cmps.f.lt r3.y, r0.z, c1.z min.f r0.w, r0.w, c1.x -sel.b32 r1.w, r2.w, r1.w, r2.z -bary.f r2.z, 5, r0.x -bary.f r2.w, 1, r0.x +sel.b32 r2.x, r2.x, r3.z, r2.y +bary.f r2.y, 12, r0.x +sel.b32 r1.z, r1.w, r3.x, r1.z cmps.f.ne r0.w, r0.w, c1.z -mov.f32f32 r1.w, r1.w -cmps.f.lt r3.z, r0.z, c1.z -sel.b32 r2.y, r3.y, r2.y, r2.w -sel.b32 r1.x, r1.y, r0.w, r1.x -mov.f32f32 r1.y, r1.w -bary.f r2.w, 8, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.x -sel.b32 r1.x, r1.y, r0.w, r1.z -bary.f r1.y, 0, r0.x -mov.f32f32 r2.y, r2.y -sel.b32 r2.x, r3.x, r2.x, r2.z -mov.f32f32 r1.z, r1.x -sel.b32 r1.x, r2.w, r3.z, r1.y +bary.f r3.x, 10, r0.x +mul.f r2.x, c1.y, r2.x +bary.f r3.z, 4, r0.x +sel.b32 r1.w, r1.z, r0.w, r1.x +sel.b32 r1.x, r3.x, r3.y, r2.z +cmps.f.lt r2.z, r0.z, c1.z +sel.b32 r2.y, r2.y, r2.w, r3.z +bary.f r2.w, 9, r0.x +sel.b32 r1.z, r1.x, r0.w, r1.y +bary.f r1.x, 1, r0.x +mul.f r2.y, c1.y, r2.y +(rpt1)nop +sel.b32 r1.x, r2.w, r2.z, r1.x cmps.f.lt r0.z, r0.z, c1.z -mov.f32f32 r1.y, r2.x -bary.f r2.x, 12, r0.x -mov.f32f32 r1.x, r1.x -bary.f (ei)r0.x, 4, r0.x -mul.f r0.y, c1.y, r1.y +bary.f r2.z, 8, r0.x +bary.f (ei)r0.x, 0, r0.x +sel.b32 r1.y, r1.x, r0.w, r2.x +(rpt1)nop +sel.b32 r0.x, r2.z, r0.z, r0.x (rpt2)nop -sel.b32 r0.y, r2.y, r0.w, r0.y -mov.f32f32 r1.x, r1.x -sel.b32 r0.x, r2.x, r0.z, r0.x -nop -mov.f32f32 r1.y, r0.y -nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mul.f r0.x, c1.y, r0.x -(rpt2)nop -sel.b32 r0.x, r1.x, r0.w, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x +sel.b32 r1.x, r0.x, r0.w, r2.y end +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.w (1:0,cm=f,il=8,b=1) r0.z (1:1,cm=f,il=12,b=1) r63.y (7:0,cm=f,il=16,b=0) r1.x (2:0,cm=f,il=16,b=1) r0.y (2:1,cm=f,il=20,b=1) -; FRAG: 84 instructions, 1 half, 4 full +; FRAG: 59 instructions, 1 half, 4 full diff --git a/reference/twoside-vert.asm b/reference/twoside-vert.asm index a3e6d93..9eebb41 100644 --- a/reference/twoside-vert.asm +++ b/reference/twoside-vert.asm @@ -8,10 +8,10 @@ @in(r4.y) in5 @in(r4.z) in6 @in(r4.w) in7 -@in(r0.x) in8 -@in(r0.y) in9 -@in(r0.z) in10 -@in(r0.w) in11 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -28,83 +28,52 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mov.f32f32 r2.x, r0.w -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.z, r0.y -mov.f32f32 r2.w, r0.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.w, r2.w -max.f r2.x, r2.x, c5.x -max.f r2.y, r2.y, c5.x -max.f r2.z, r2.z, c5.x -max.f r2.w, r2.w, c5.x -min.f r3.w, r2.x, c5.y -min.f r3.z, r2.y, c5.y -min.f r3.y, r2.z, c5.y -min.f r3.x, r2.w, c5.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c5.x -max.f r0.z, r0.z, c5.x -max.f r0.y, r0.y, c5.x -max.f r0.x, r0.x, c5.x -min.f r2.w, r0.w, c5.y -min.f r2.z, r0.z, c5.y -min.f r2.y, r0.y, c5.y -min.f r2.x, r0.x, c5.y -mul.f r0.x, c1.w, r1.x +@const(c5.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, c1.w, r1.x mul.f r0.y, c1.z, r1.x mad.f32 r0.x, c2.w, r1.y, r0.x mad.f32 r0.y, c2.z, r1.y, r0.y mad.f32 r0.x, c3.w, r1.z, r0.x mad.f32 r0.y, c3.z, r1.z, r0.y -mad.f32 r0.x, c4.w, r1.w, r0.x -mad.f32 r0.y, c4.z, r1.w, r0.y -mul.f r5.x, c1.y, r1.x -mul.f r1.x, c1.x, r1.x -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mad.f32 r0.x, c2.y, r1.y, r5.x -mad.f32 r0.y, c2.x, r1.y, r1.x +mad.f32 r0.w, c4.w, r1.w, r0.x +mad.f32 r0.z, c4.z, r1.w, r0.y +mul.f r0.x, c1.y, r1.x +mul.f r0.y, c1.x, r1.x +mad.f32 r0.x, c2.y, r1.y, r0.x +mad.f32 r0.y, c2.x, r1.y, r0.y mad.f32 r0.x, c3.y, r1.z, r0.x -mad.f32 r0.y, c3.x, r1.z, r0.y -mad.f32 r0.x, c4.y, r1.w, r0.x -mad.f32 r1.x, c4.x, r1.w, r0.y -mov.f32f32 r1.y, r4.w -mov.f32f32 r1.z, r4.z -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.x -max.f r1.x, r1.y, c5.x -max.f r1.y, r1.z, c5.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.x, r4.x +mad.f32 r1.x, c3.x, r1.z, r0.y +mad.f32 r0.y, c4.y, r1.w, r0.x +mad.f32 r0.x, c4.x, r1.w, r1.x +max.f r1.x, r2.w, c5.x +max.f r1.y, r2.z, c5.x +max.f r1.z, r2.y, c5.x +max.f r1.w, r2.x, c5.x +min.f r3.w, r1.x, c5.y +min.f r3.z, r1.y, c5.y +min.f r3.y, r1.z, c5.y +min.f r3.x, r1.w, c5.y +max.f r1.x, r2.w, c5.x +max.f r1.y, r2.z, c5.x +max.f r1.z, r2.y, c5.x +max.f r1.w, r2.x, c5.x +min.f r2.w, r1.x, c5.y +min.f r2.z, r1.y, c5.y +min.f r2.y, r1.z, c5.y +min.f r2.x, r1.w, c5.y +max.f r1.x, r4.w, c5.x +max.f r1.y, r4.z, c5.x +max.f r4.y, r4.y, c5.x +max.f r4.x, r4.x, c5.x min.f r1.w, r1.x, c5.y min.f r1.z, r1.y, c5.y -max.f r1.x, r4.y, c5.x -max.f r4.x, r4.x, c5.x -(rpt1)nop -min.f r1.y, r1.x, c5.y +min.f r1.y, r4.y, c5.y min.f r1.x, r4.x, c5.y end nop nop +nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (2:0) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r4.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0) -; VERT: 75 instructions, 0 half, 6 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r4.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 41 instructions, 0 half, 5 full diff --git a/reference/vs-op-neg-int.asm b/reference/vs-op-neg-int.asm index 001aeed..b677421 100644 --- a/reference/vs-op-neg-int.asm +++ b/reference/vs-op-neg-int.asm @@ -1,9 +1,9 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 +@in(r0.x) in0 +@in(r0.y) in1 +@in(r0.z) in2 +@in(r0.w) in3 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -12,35 +12,28 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)absneg.f r0.x, (neg)c0.x -mov.f32f32 r2.x, c2.y -mov.f32f32 r2.y, c2.y +@const(c2.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)absneg.f r1.x, (neg)c0.x +mov.f32f32 r1.y, c2.y +mov.f32f32 r1.z, c2.y +mov.f32f32 r2.x, c2.x +cmps.f.eq r1.x, r1.x, c1.x +mov.f32f32 r2.y, c2.x mov.f32f32 r2.z, c2.x -cmps.f.eq r0.x, r0.x, c1.x -mov.f32f32 r2.w, c2.x -mov.f32f32 r3.x, c2.x -mov.f32f32 r3.y, c2.y -cov.u32f32 r0.x, r0.x -mov.f32f32 r3.z, c2.y -mov.f32f32 r3.w, c2.x -mov.f32f32 r0.w, r1.w -mov.f32f32 r1.w, r0.x -mov.f32f32 r0.z, r1.z -mov.f32f32 r0.y, r1.y -mov.f32f32 r0.x, r1.x -cmps.f.ne r1.x, r1.w, c2.x +mov.f32f32 r2.w, c2.y +cov.u32f32 r1.x, r1.x +mov.f32f32 r3.x, c2.y +mov.f32f32 r3.y, c2.x +nop +cmps.f.ne r1.x, r1.x, c2.x (rpt2)nop -sel.b32 r1.y, r2.y, r1.x, r2.x -sel.b32 r1.z, r2.w, r1.x, r2.z -sel.b32 r2.x, r3.y, r1.x, r3.x -sel.b32 r1.x, r3.w, r1.x, r3.z -mov.f32f32 r1.w, r1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.y, r2.x -mov.f32f32 r1.x, r1.x +sel.b32 r1.w, r1.z, r1.x, r1.y +sel.b32 r1.z, r2.y, r1.x, r2.x +sel.b32 r1.y, r2.w, r1.x, r2.z +sel.b32 r1.x, r3.y, r1.x, r3.x end nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) -; VERT: 29 instructions, 0 half, 4 full +; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) +; VERT: 21 instructions, 0 half, 4 full diff --git a/reference/webgl-blob-frag.asm b/reference/webgl-blob-frag.asm index 453a8e7..aba1ca9 100644 --- a/reference/webgl-blob-frag.asm +++ b/reference/webgl-blob-frag.asm @@ -1,32 +1,27 @@ ; options: -; FRAG: new compiler +; FRAG: TGSI compiler @in(r0.x) in0 @in(r0.y) in1 @out(r1.x) out0 @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, c0.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.y -nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x -(rpt5)nop -sam (f32)(xyz)r0.x, r0.y, s#0, t#0 -(sy)mov.f32f32 r0.x, r0.x -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r1.z, r0.x +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r1.w, c0.x +(rpt4)nop +sam (f32)(xyz)r0.x, r0.z, s#0, t#0 +(sy)mov.f32f32 r1.z, r0.x mov.f32f32 r1.y, r0.y mov.f32f32 r1.x, r0.z end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 25 instructions, 0 half, 2 full +; FRAG: 13 instructions, 0 half, 2 full +; pos (bary): r0.x +; color: r1.x diff --git a/reference/webgl-water/webgl-water-13.asm b/reference/webgl-water/webgl-water-13.asm index 2c03e4f..284e180 100644 --- a/reference/webgl-water/webgl-water-13.asm +++ b/reference/webgl-water/webgl-water-13.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/webgl-water/webgl-water-14.asm b/reference/webgl-water/webgl-water-14.asm index 9bf6d4e..1c04ab7 100644 --- a/reference/webgl-water/webgl-water-14.asm +++ b/reference/webgl-water/webgl-water-14.asm @@ -6,35 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -mov.f32f32 r0.w, c1.x -bary.f (ei)r0.x, 1, r0.x -nop -mov.f32f32 r0.y, r0.z -mul.f r0.z, r0.w, c0.x -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.w, r0.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r1.x, r0.x -(rpt5)nop -sam (f32)(xyz)r0.x, r0.w, s#0, t#0 -(sy)mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -nop -mul.f r0.z, r0.z, c0.x -mul.f r0.y, r0.y, c0.x -mul.f r0.x, r0.x, c0.x -nop -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -(ss)mov.f32f32 r1.x, r0.x +bary.f (ei)r0.w, 1, r0.x +mov.f32f32 r0.x, c1.x +(rpt4)nop +sam (f32)(xyz)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.z, r0.w, c0.x +mul.f r1.y, r0.z, c0.x +mul.f r1.x, r0.y, c0.x +mul.f r1.w, r0.x, c0.x end nop nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 30 instructions, 0 half, 2 full +; FRAG: 14 instructions, 0 half, 2 full diff --git a/reference/webgl-water/webgl-water-18.asm b/reference/webgl-water/webgl-water-18.asm index d3161d5..4538b69 100644 --- a/reference/webgl-water/webgl-water-18.asm +++ b/reference/webgl-water/webgl-water-18.asm @@ -6,31 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mul.f r0.w, r0.w, c0.x -mul.f r0.x, r0.x, c0.x -(ss)mul.f r0.y, r0.y, c0.x -mul.f r0.z, r0.z, c0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z +sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 +(sy)mul.f r1.w, r0.w, c0.x +mul.f r1.z, r0.x, c0.x +mul.f r1.y, r0.y, c0.x +mul.f r1.x, r0.z, c0.x end nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 30 instructions, 0 half, 2 full +; FRAG: 14 instructions, 0 half, 2 full diff --git a/reference/webgl-water/webgl-water-20.asm b/reference/webgl-water/webgl-water-20.asm index a2747d4..52b9c56 100644 --- a/reference/webgl-water/webgl-water-20.asm +++ b/reference/webgl-water/webgl-water-20.asm @@ -6,27 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c1.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mul.f r0.w, r0.w, c0.x -(ss)mul.f r0.z, r0.z, c0.x -mul.f r0.y, r0.y, c0.x -mul.f r0.x, r0.x, c0.x -mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 +(sy)mul.f r1.w, r0.w, c0.x +mul.f r1.z, r0.z, c0.x +mul.f r1.y, r0.y, c0.x +mul.f r1.x, r0.x, c0.x end nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 26 instructions, 0 half, 2 full +; FRAG: 14 instructions, 0 half, 2 full diff --git a/reference/webgl-water/webgl-water-27.asm b/reference/webgl-water/webgl-water-27.asm index 2c03e4f..284e180 100644 --- a/reference/webgl-water/webgl-water-27.asm +++ b/reference/webgl-water/webgl-water-27.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/webgl-water/webgl-water-33.asm b/reference/webgl-water/webgl-water-33.asm index 2c03e4f..284e180 100644 --- a/reference/webgl-water/webgl-water-33.asm +++ b/reference/webgl-water/webgl-water-33.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/webgl-water/webgl-water-34.asm b/reference/webgl-water/webgl-water-34.asm index d3d8a07..ccc345b 100644 --- a/reference/webgl-water/webgl-water-34.asm +++ b/reference/webgl-water/webgl-water-34.asm @@ -6,15 +6,16 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r1.x, r0.y, s#0, t#0 +sam (f32)(xyzw)r1.x, r0.z, s#0, t#0 end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) -; FRAG: 14 instructions, 0 half, 2 full +; FRAG: 10 instructions, 0 half, 2 full diff --git a/reference/webgl-water/webgl-water-36.asm b/reference/webgl-water/webgl-water-36.asm index fc933f4..1c768fc 100644 --- a/reference/webgl-water/webgl-water-36.asm +++ b/reference/webgl-water/webgl-water-36.asm @@ -6,71 +6,49 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x40490ff9 +@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)mov.f32f32 r0.z, c4.x -bary.f r0.w, 0, r0.x -mov.f32f32 r1.x, c4.x -bary.f (ei)r0.x, 1, r0.x -mad.f32 r0.y, c1.x, r0.z, c4.x -mov.f32f32 r0.z, r0.w -mad.f32 r1.x, c1.y, r1.x, c4.x -mov.f32f32 r1.y, r0.x -add.f r0.y, r0.y, (neg)r0.w -mov.f32f32 r0.z, r0.z -add.f r0.x, r1.x, (neg)r0.x -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.y, r0.y +mov.f32f32 r0.w, c4.x mov.f32f32 r1.x, c2.x -(rpt1)nop -mul.f r0.y, r0.y, r0.y -mov.f32f32 r0.x, r0.x -sam (f32)(xyzw)r2.x, r0.z, s#0, t#0 -(sy)(ss)mov.f32f32 r0.z, r2.y -mov.f32f32 r0.w, r2.w -mov.f32f32 r1.z, r2.z -mad.f32 r0.x, r0.x, r0.x, r0.y -mov.f32f32 r1.y, r0.z +bary.f r1.y, 0, r0.x +mad.f32 r0.z, c1.x, r0.z, c4.x +mad.f32 r0.w, c1.y, r0.w, c4.x +bary.f (ei)r1.z, 1, r0.x +nop +add.f r0.x, r0.z, (neg)r1.y rcp r0.y, r1.x -mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r0.x -(rpt5)nop -sqrt r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x +(rpt1)nop +add.f r0.z, r0.w, (neg)r1.z +mov.f32f32 r0.w, r0.x +(ss)nop +sam (f32)(xyzw)r1.x, r1.y, s#0, t#0 (rpt2)nop -mul.f r0.x, r0.x, r0.y +mul.f r0.x, r0.x, r0.w +mov.f32f32 r0.w, r0.z (rpt2)nop -mov.f32f32 r0.x, r0.x +mad.f32 r0.x, r0.z, r0.w, r0.x +(rpt5)nop +sqrt r0.x, r0.x +(ss)mul.f r0.x, r0.x, r0.y (rpt2)nop add.f r0.x, c4.z, (neg)r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop max.f r0.x, c4.y, r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop mul.f r0.x, r0.x, c4.w -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop cos r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -(rpt2)nop -mul.f r0.x, r0.x, c4.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.x, r0.x, c4.x (rpt2)nop add.f r0.x, c4.x, (neg)r0.x (rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt2)nop -mad.f32 r0.x, c3.x, r0.x, r2.x -(rpt2)nop -mov.f32f32 r1.x, r0.x +(sy)mad.f32 r1.x, c3.x, r0.x, r1.x end nop nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 101 instructions, 0 half, 3 full +; FRAG: 62 instructions, 0 half, 2 full diff --git a/reference/webgl-water/webgl-water-37.asm b/reference/webgl-water/webgl-water-37.asm index b31dd2c..b548823 100644 --- a/reference/webgl-water/webgl-water-37.asm +++ b/reference/webgl-water/webgl-water-37.asm @@ -11,27 +11,16 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, (0.000000) -mov.f32f32 r1.x, (0.000000) +@const(c0.x) 0x3f800000, 0x3f000000, 0x00000000, 0x00000000 +(sy)(ss)mov.f32f32 r1.w, (0.000000) +mov.f32f32 r1.z, (0.000000) mad.f32 r1.y, c0.y, r0.y, c0.y -mad.f32 r2.x, c0.y, r0.x, c0.y -mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r2.x +mad.f32 r1.x, c0.y, r0.x, c0.y mov.f32f32 r0.w, c0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) ; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) -; VERT: 17 instructions, 0 half, 3 full +; VERT: 6 instructions, 0 half, 2 full diff --git a/reference/webgl-water/webgl-water-38.asm b/reference/webgl-water/webgl-water-38.asm index 015bad5..a32e656 100644 --- a/reference/webgl-water/webgl-water-38.asm +++ b/reference/webgl-water/webgl-water-38.asm @@ -6,351 +6,213 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c3.x) 0x3f000000, 0x3f7fbe77, 0x3f800000, 0x00000000 +@const(c4.x) 0x3f666666, 0x40800000, 0x3f1013a9, 0x3f400d1b +@const(c5.x) 0x3ec00000, 0x40000000, 0xbf800000, 0x3f800000 +@const(c6.x) 0xc39044fe, 0xbe2ab368, 0x41200000, 0x3ef5c28f +@const(c7.x) 0x3ef5c28f, 0x3f8a3d71, 0x3f99999a, 0x00000000 +@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)absneg.f r0.z, (neg)c0.y bary.f r0.w, 0, r0.x -bary.f r1.x, 1, r0.x -bary.f (ei)r0.x, 2, r0.x +bary.f r1.x, 2, r0.x +bary.f (ei)r0.x, 1, r0.x mul.f r0.y, r0.z, r0.z add.f r1.y, r0.w, (neg)c1.x -mad.f32 r1.z, c3.x, r0.w, c3.x +absneg.f r1.z, (abs)r1.x mad.f32 r1.w, c3.x, r0.w, c3.x add.f r0.y, c3.z, (neg)r0.y mul.f r1.y, r1.y, r1.y -add.f r2.x, r1.x, (neg)c1.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r1.w -mad.f32 r1.y, r2.x, r2.x, r1.y -mad.f32 r2.x, c3.x, r1.x, c3.z +add.f r2.y, r0.x, (neg)c1.y +cmps.f.lt r1.z, c3.y, r1.z mul.f r0.y, r0.y, c4.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.y, r0.y -add.f r2.y, r0.x, (neg)c1.z -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.w +mad.f32 r2.x, c3.x, r1.x, c3.x +mad.f32 r2.z, c3.x, r0.w, c3.x +mul.f r2.w, r0.w, r0.w add.f r0.y, c3.z, (neg)r0.y mad.f32 r1.y, r2.y, r2.y, r1.y -mad.f32 r1.w, c3.x, r0.x, c3.x -mad.f32 r2.y, c3.x, r0.x, c3.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r0.y +add.f r2.y, r1.x, (neg)c1.z +cov.u32f32 r1.z, r1.z +mov.f32f32 r3.x, r0.y cmps.f.lt r0.y, r0.y, c3.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -sqrt r1.y, r1.y -mov.f32f32 r2.x, r2.x -(ss)mov.f32f32 r1.y, r1.y -mov.f32f32 r3.x, c2.x -sqrt r2.w, r2.w -(ss)mov.f32f32 r3.y, r2.w -mov.f32f32 r3.z, r0.z +mad.f32 r1.y, r2.y, r2.y, r1.y +cmps.f.ne r1.z, r1.z, c3.w +mad.f32 r3.y, c3.x, r0.x, c3.z +mov.f32f32 r2.y, c3.w +mov.f32f32 r3.w, c3.w +sqrt r3.x, r3.x +(ss)mad.f32 r3.x, c4.w, r0.z, r3.x cov.u32f32 r0.y, r0.y -(ss)mov.f32f32 r2.w, r1.w -mov.f32f32 r1.w, r2.y -mad.f32 r2.y, c4.w, r3.z, r3.y -rcp r3.x, r3.x -(ss)mul.f r1.y, r1.y, r3.x +sqrt r1.y, r1.y +mad.f32 r3.z, c3.x, r0.w, c3.x +mov.f32f32 r4.x, c2.x +mov.f32f32 r4.y, r3.x +mul.f r3.x, r3.x, c3.w +absneg.f r4.z, (neg)c0.z cmps.f.ne r0.y, r0.y, c3.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.y, r1.y -sam (f32)(x)r2.z, r2.z, s#2, t#2 -(ss)nop -sam (f32)(x)r2.w, r1.z, s#2, t#2 -(sy)(ss)cmps.f.lt r1.z, r1.x, r2.z -cmps.f.lt r1.w, r1.x, r2.w -mul.f r2.z, r2.y, c3.w -mul.f r2.w, r2.y, c3.z -mul.f r2.y, r2.y, c3.w -cov.u32f32 r1.z, r1.z -mov.f32f32 r2.z, r2.z -absneg.f r3.x, (neg)c0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.x, r3.x -absneg.f r3.y, (neg)c0.z -mad.f32 r0.z, c4.w, r0.z, (neg)r2.w -mov.f32f32 r2.w, c3.w -mad.f32 r2.z, c4.w, r3.x, (neg)r2.z -mov.f32f32 r3.x, c3.w -mov.f32f32 r3.y, r3.y -sel.b32 r0.z, r2.w, r0.y, r0.z +mul.f r4.w, r4.y, c3.w +absneg.f r5.x, (neg)c0.x +mul.f r4.y, r4.y, c3.z +mad.f32 r3.x, c4.w, r4.z, (neg)r3.x +mad.f32 r0.z, c4.w, r0.z, (neg)r4.y +mad.f32 r4.y, c4.w, r5.x, (neg)r4.w +mov.f32f32 r4.z, c3.w +mov.f32f32 r4.w, c3.w +mov.f32f32 r5.x, c3.w +rcp r4.x, r4.x +(ss)mul.f r1.y, r1.y, r4.x +sel.b32 r4.x, r4.z, r0.y, r4.y +sel.b32 r0.z, r4.w, r0.y, r0.z +sel.b32 r0.y, r5.x, r0.y, r3.x +sam (f32)(xyz)r4.y, r3.y, s#0, t#0 +mad.f32 r3.x, c3.x, r0.w, c3.x +(ss)absneg.f r3.y, (neg)r4.x +absneg.f r0.z, (neg)r0.z +mov.f32f32 r3.z, c3.w +absneg.f r0.y, (neg)r0.y +mul.f r4.x, r0.x, r3.y log2 r1.y, r1.y (ss)mul.f r1.y, c4.y, r1.y -sel.b32 r2.z, r3.x, r0.y, r2.z -mad.f32 r2.y, c4.w, r3.y, (neg)r2.y -absneg.f r0.z, (neg)r0.z -mov.f32f32 r1.y, r1.y -absneg.f r2.z, (neg)r2.z -absneg.f r2.w, (abs)r0.x -mov.f32f32 r3.x, r0.z -mov.f32f32 r3.y, c3.w -mov.f32f32 r3.z, r2.z -cmps.f.lt r2.w, c3.y, r2.w -mov.f32f32 r3.w, r0.z +sel.b32 r2.y, r2.y, r1.z, r3.z +rcp r3.z, r3.y +absneg.f r5.x, (neg)r0.w +rcp r5.y, r0.z +(ss)mul.f r4.x, r4.x, r5.y +rcp r5.z, r0.z +absneg.f r5.w, (neg)r0.x +absneg.f r6.x, (abs)r0.w +add.f r6.y, c5.z, r5.x +add.f r4.x, r0.w, (neg)r4.x +add.f r6.z, c5.z, r5.w +cmps.f.lt r6.x, c3.y, r6.x +mul.f r3.z, r6.y, r3.z +rcp r6.y, r3.y +add.f r5.x, c5.w, r5.x +mad.f32 r6.w, c5.x, r4.x, c3.x +mul.f r4.x, r0.x, r0.y +mov.f32f32 r7.x, r3.z +(ss)mul.f r5.x, r5.x, r6.y +mul.f r5.z, r6.z, r5.z +cov.u32f32 r6.x, r6.x +mul.f r4.x, r4.x, r5.y +max.f r3.z, r3.z, r5.x +rcp r5.y, r0.z +add.f r5.w, c5.y, r5.w +mov.f32f32 r5.x, r5.x +add.f r4.x, r1.x, (neg)r4.x +mov.f32f32 r6.y, r5.z +(ss)mul.f r5.y, r5.w, r5.y +min.f r5.x, r7.x, r5.x +mad.f32 r7.x, c5.x, r4.x, c3.x +cmps.f.ne r4.x, r6.x, c3.w +max.f r5.z, r5.z, r5.y +mov.f32f32 r5.y, r5.y +absneg.f r0.w, (neg)r0.w +rcp r5.w, r0.y +absneg.f r6.x, (neg)r1.x +min.f r3.z, r3.z, r5.z +sam (f32)(xy)r6.z, r6.w, s#1, t#1 +min.f r5.y, r6.y, r5.y +sel.b32 r0.w, r0.w, r4.x, r2.y +add.f r2.y, c5.z, r6.x +rcp r5.z, r0.y +add.f r6.x, c5.w, r6.x +max.f r5.x, r5.x, r5.y +mul.f r0.w, r3.y, r0.w +(ss)mul.f r2.y, r2.y, r5.w +(ss)mul.f r3.y, r6.x, r5.z +mov.f32f32 r5.y, c3.z exp2 r1.y, r1.y nop (ss)rcp r1.y, r1.y -sel.b32 r0.y, r3.y, r0.y, r2.y -rcp r2.y, r3.x -(ss)mov.f32f32 r3.x, r2.z -cov.u32f32 r2.w, r2.w -rcp r3.y, r3.z -(ss)absneg.f r3.z, (neg)r0.w -absneg.f r0.y, (neg)r0.y -mul.f r3.x, r1.x, r3.x -mov.f32f32 r2.w, r2.w -add.f r4.x, c5.z, r3.z -mov.f32f32 r4.y, r0.y -(ss)mul.f r3.x, r3.x, r2.y -cmps.f.ne r2.w, r2.w, c3.w -mul.f r3.y, r4.x, r3.y -mul.f r4.x, r1.x, r4.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, c3.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.z, r2.z -add.f r3.x, r0.w, (neg)r3.x -mul.f r2.y, r4.x, r2.y -mov.f32f32 r4.x, r4.y -mov.f32f32 r4.y, c3.w -mov.f32f32 r4.w, r0.y -rcp r3.w, r3.w -mov.f32f32 r3.x, r3.x -rcp r4.z, r4.z -add.f r3.z, c5.w, r3.z -mov.f32f32 r2.y, r2.y -sel.b32 r4.x, r4.x, r2.w, r4.y -mad.f32 r3.x, c5.x, r3.x, c3.x -mov.f32f32 r3.z, r3.z -add.f r2.y, r0.x, (neg)r2.y -absneg.f r4.y, (abs)r0.w -rcp r4.w, r4.w -mov.f32f32 r3.x, r3.x -(ss)mul.f r3.z, r3.z, r4.z -mov.f32f32 r2.y, r2.y -cmps.f.lt r4.y, c3.y, r4.y -(ss)absneg.f r4.z, (neg)r0.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.x, r3.x -mad.f32 r2.y, c5.x, r2.y, c3.x -cov.u32f32 r4.y, r4.y -max.f r5.x, r3.y, r3.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.y, r2.y -min.f r3.y, r3.y, r3.z -mov.f32f32 r3.z, r5.x -absneg.f r5.x, (neg)r1.x -mov.f32f32 r5.y, r3.x -mov.f32f32 r2.y, r2.y -cmps.f.ne r3.x, r4.y, c3.w -add.f r4.y, c5.z, r5.x -absneg.f r5.z, (neg)r0.w -add.f r5.w, c5.z, r4.z -mov.f32f32 r1.y, r1.y -mul.f r3.w, r4.y, r3.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.y, r5.z -mul.f r4.w, r5.w, r4.w -mov.f32f32 r3.w, r3.w -mov.f32f32 r5.w, r0.z mov.f32f32 r5.z, r2.y -mov.f32f32 r2.y, r4.y -mov.f32f32 r4.y, r4.w -mul.f r1.y, c4.x, r1.y -mov.f32f32 r4.w, r0.y -sel.b32 r2.y, r2.y, r3.x, r4.x -rcp r4.x, r5.w -add.f r5.x, c5.y, r5.x -sam (f32)(xy)r5.y, r5.y, s#1, t#1 -mov.f32f32 r1.y, r1.y -(ss)mov.f32f32 r5.w, r0.z -mul.f r2.y, r2.z, r2.y -mov.f32f32 r2.z, r5.x +max.f r2.y, r2.y, r3.y +sel.b32 r3.w, r3.w, r1.z, r5.y +mov.f32f32 r5.y, c3.w +mov.f32f32 r5.w, r3.y +min.f r2.y, r3.z, r2.y +(ss)mul.f r1.y, c4.x, r1.y +mad.f32 r3.y, c3.x, r1.x, c3.x +sel.b32 r3.z, r5.y, r4.x, r3.w +mov.f32f32 r3.w, r2.y +min.f r5.y, r5.z, r5.w +mad.f32 r2.y, r0.z, r2.y, r0.x +mad.f32 r0.z, r0.z, r3.z, r0.w +absneg.f r0.w, (neg)r1.x +max.f r3.z, r5.x, r5.y +add.f r2.y, r2.y, c6.y mov.f32f32 r5.x, c3.w -rcp r4.w, r4.w add.f r1.y, c3.z, (neg)r1.y -add.f r4.z, c5.w, r4.z -(ss)mul.f r2.z, r2.z, r4.x -mov.f32f32 r4.x, r5.x -mov.f32f32 r5.x, c3.z -mov.f32f32 r4.z, r4.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.y, r1.y -mul.f r6.x, r0.w, r0.w -mul.f r4.z, r4.z, r4.w -(ss)max.f r4.w, r3.w, r2.z -sel.b32 r4.x, r4.x, r2.w, r5.x -mov.f32f32 r5.x, c3.w -min.f r2.z, r3.w, r2.z -mov.f32f32 r3.w, r4.w -mov.f32f32 r4.z, r4.z -mov.f32f32 r4.w, r5.x -mad.f32 r5.x, r1.x, r1.x, r6.x -min.f r3.z, r3.z, r3.w -max.f r3.w, r4.y, r4.z -mov.f32f32 r4.w, r4.w -max.f r2.z, r3.y, r2.z -min.f r3.y, r4.y, r4.z -mov.f32f32 r3.w, r3.w -sel.b32 r4.x, r4.w, r3.x, r4.x -mov.f32f32 r4.y, r5.x -mov.f32f32 r1.z, r1.z -min.f r3.z, r3.z, r3.w -mad.f32 r0.z, r0.z, r4.x, r2.y -max.f r2.y, r2.z, r3.y -nop -mov.f32f32 r2.z, r3.z -mov.f32f32 r0.z, r0.z -absneg.f r3.y, (neg)r0.x -mov.f32f32 r2.y, r2.y -mad.f32 r3.z, r5.w, r2.z, r1.x -mad.f32 r3.w, r0.x, r0.x, r4.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, c3.w -mov.f32f32 r3.z, r3.z -add.f r2.y, r2.z, (neg)r2.y -cmps.f.ne r2.z, r1.z, c3.w -cov.u32f32 r1.z, r1.w -add.f r1.w, r3.z, c6.y -sel.b32 r3.y, r3.y, r2.w, r4.x -mov.f32f32 r3.z, c3.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w -rsq r3.w, r3.w -(ss)mov.f32f32 r3.w, r3.w -mov.f32f32 r3.z, r3.z -mad.f32 r2.y, c6.z, r2.y, c3.z -mul.f r1.w, c6.x, r1.w -mul.f r3.w, c3.x, r3.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r2.y -sel.b32 r3.y, r3.z, r3.x, r3.y -mul.f r1.y, r3.w, r1.y -cmps.f.ne r1.z, r1.z, c3.w -mov.f32f32 r3.z, r2.x -mad.f32 r2.x, c3.x, r0.x, c3.x -mad.f32 r1.x, c3.x, r1.x, c3.z -rcp r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mad.f32 r0.y, r0.y, r3.y, r0.z -mov.f32f32 r0.z, r1.y -mov.f32f32 r1.y, r2.x -mul.f r1.w, r1.w, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.w, r1.w +add.f r3.z, r3.w, (neg)r3.z +mul.f r2.y, c6.x, r2.y +sel.b32 r0.w, r0.w, r1.z, r5.x +mov.f32f32 r3.w, c3.w +mad.f32 r3.z, c6.z, r3.z, c3.z +mad.f32 r2.w, r0.x, r0.x, r2.w +sam (f32)(xyz)r5.x, r3.x, s#0, t#0 +(sy)(ss)sel.b32 r3.x, r4.w, r1.z, r5.z +sel.b32 r3.y, r4.z, r1.z, r5.y +sel.b32 r0.w, r3.w, r4.x, r0.w +sel.b32 r1.z, r4.y, r1.z, r5.x +mad.f32 r3.w, r1.x, r1.x, r2.w +rcp r2.w, r3.z +(ss)mul.f r2.y, r2.y, r2.w +mad.f32 r0.y, r0.y, r0.w, r0.z +mad.f32 r0.z, c3.x, r0.x, c3.z +mad.f32 r0.w, c3.x, r1.x, c3.x +mad.f32 r2.w, c3.x, r1.x, c3.x +sam (f32)(x)r4.y, r1.w, s#2, t#2 max.f r0.y, c3.w, r0.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.x, r1.x -mad.f32 r2.y, c3.x, r0.w, c3.x -mad.f32 r0.x, c3.x, r0.x, c3.x -mad.f32 r0.w, c3.x, r0.w, c3.x -exp2 r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.w, r1.y -mov.f32f32 r1.x, r1.x -add.f r1.y, c3.z, r1.w -mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r4.x, r1.x -mov.f32f32 r1.x, r1.y -(sy)mul.f r1.y, r1.w, r5.y -sam (f32)(xyz)r4.y, r3.z, s#0, t#0 -mov.f32f32 r0.w, r0.w -(sy)mov.f32f32 r1.w, r4.w -mov.f32f32 r3.y, r4.z -(ss)mov.f32f32 r3.z, r4.y -mov.f32f32 r0.w, r0.w -rcp r1.x, r1.x -(ss)mov.f32f32 r1.x, r1.x -mul.f r1.y, r1.y, c5.y -mov.f32f32 r3.w, r1.w -mov.f32f32 r3.y, r3.y -mul.f r0.y, r0.y, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r4.y, r0.w -mov.f32f32 r0.w, r3.z -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, r1.x, r5.z, r0.z +rsq r1.x, r3.w +(ss)mul.f r1.x, c3.x, r1.x +(ss)exp2 r1.w, r2.y +(ss)add.f r2.x, c3.z, r1.w +(sy)(ss)cmps.f.lt r2.y, r0.x, r4.y +mov.f32f32 r1.w, c3.z (rpt1)nop -mad.f32 r0.y, c3.x, r0.y, r2.x -mov.f32f32 r0.z, r0.z -sam (f32)(xyz)r4.x, r4.x, s#0, t#0 -(sy)mov.f32f32 r1.x, r4.z -mov.f32f32 r1.y, r4.y +mul.f r3.z, r0.y, r6.z mov.f32f32 r0.y, r0.y -mov.f32f32 r1.w, r2.y -mov.f32f32 r2.x, r4.x -mov.f32f32 r0.x, r0.x -sel.b32 r0.y, r0.z, r1.z, r0.y -mov.f32f32 r0.z, r1.w -mov.f32f32 r1.z, c3.z +rcp r2.x, r2.x +mul.f r1.x, r1.x, r1.y (rpt1)nop -(ss)mov.f32f32 r4.x, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r1.z -(rpt1)nop -mov.f32f32 r4.y, r0.x -mov.f32f32 r1.w, r0.z -(rpt4)nop -sam (f32)(xyz)r4.x, r4.x, s#0, t#0 -(sy)mov.f32f32 r0.x, r4.z -mov.f32f32 r0.z, r4.y -mov.f32f32 r1.z, r4.x -nop -sel.b32 r0.x, r1.x, r2.w, r0.x -sel.b32 r0.z, r1.y, r2.w, r0.z -sel.b32 r1.x, r2.x, r2.w, r1.z +(ss)mul.f r0.y, r0.y, r2.x +mov.f32f32 r1.y, r1.x +(ss)mul.f r2.x, r3.z, c5.y nop -sel.b32 r0.x, r3.w, r3.x, r0.x -sel.b32 r0.z, r3.y, r3.x, r0.z -sel.b32 r0.w, r0.w, r3.x, r1.x -nop -mul.f r0.x, r0.x, r0.y -mul.f r0.z, r0.z, r0.y -mul.f r0.y, r0.w, r0.y +mad.f32 r0.y, c3.x, r0.y, r1.y +sam (f32)(xyz)r4.y, r0.z, s#0, t#0 +(sy)(ss)sel.b32 r0.z, r4.w, r4.x, r3.x +mad.f32 r0.w, r2.x, r6.w, r1.x +sam (f32)(x)r4.w, r2.z, s#2, t#2 +(sy)cmps.f.lt r0.x, r0.x, r4.w +cov.u32f32 r1.x, r2.y +sel.b32 r1.y, r4.z, r4.x, r3.y +sel.b32 r1.z, r4.y, r4.x, r1.z +cov.u32f32 r0.x, r0.x +cmps.f.ne r1.x, r1.x, c3.w +(rpt1)nop +cmps.f.ne r0.x, r0.x, c3.w +(rpt2)nop +sel.b32 r0.x, r0.w, r0.x, r0.y +(rpt2)nop +mul.f r0.y, r0.z, r0.x +mul.f r0.z, r1.y, r0.x +mul.f r0.x, r1.z, r0.x nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z +mul.f r0.w, r0.y, c7.z mov.f32f32 r0.y, r0.y -nop -mov.f32f32 r0.w, r0.x -mov.f32f32 r1.x, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r0.x, r0.x -mul.f r0.w, r0.w, c7.z -mul.f r1.x, r1.x, c7.y -mul.f r1.y, r1.y, c7.x -mov.f32f32 r0.x, r0.x +mul.f r1.y, r0.z, c7.y +mul.f r2.x, r0.x, c7.x mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +sel.b32 r1.z, r0.w, r1.x, r0.y +mov.f32f32 r0.x, r0.x nop -sel.b32 r0.x, r0.w, r2.z, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y +sel.b32 r1.y, r1.y, r1.x, r0.z nop -mov.f32f32 r1.z, r0.x -sel.b32 r0.x, r1.x, r2.z, r0.z -sel.b32 r0.y, r1.y, r2.z, r0.y -(rpt1)nop -mov.f32f32 r1.y, r0.x -mov.f32f32 r1.x, r0.y +sel.b32 r1.x, r2.x, r1.x, r0.x end nop nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 349 instructions, 0 half, 7 full +; FRAG: 205 instructions, 0 half, 8 full diff --git a/reference/webgl-water/webgl-water-39.asm b/reference/webgl-water/webgl-water-39.asm index 660019f..73f0f60 100644 --- a/reference/webgl-water/webgl-water-39.asm +++ b/reference/webgl-water/webgl-water-39.asm @@ -11,51 +11,32 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c4.x) 0x3f800000, 0x3f155326, 0xbf800000, 0x00000000 (sy)(ss)add.f r0.x, c4.x, (neg)r1.y -mov.f32f32 r0.y, r1.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.w, (0.000000) +mov.f32f32 r1.w, (0.000000) +mul.f r0.y, c0.w, r1.x +mul.f r0.z, c0.z, r1.x mad.f32 r0.x, c4.y, r0.x, c4.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.w, r0.w -mov.f32f32 r0.x, r0.x -mul.f r0.z, c0.w, r1.x -mul.f r0.w, c0.z, r1.x +mul.f r0.w, c0.x, r1.x mul.f r2.x, c0.y, r1.x -mad.f32 r0.z, c1.w, r0.x, r0.z -mad.f32 r0.w, c1.z, r0.x, r0.w -mad.f32 r2.x, c1.y, r0.x, r2.x -mul.f r1.x, c0.x, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, c2.w, r1.z, r0.z -mad.f32 r0.w, c2.z, r1.z, r0.w -mov.f32f32 r2.x, r2.x -mad.f32 r1.x, c1.x, r0.x, r1.x -add.f r0.z, r0.z, c3.w -add.f r2.y, r0.w, c3.z -mad.f32 r2.x, c2.y, r1.z, r2.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.z -mov.f32f32 r0.z, r2.y -add.f r2.x, r2.x, c3.y -mad.f32 r1.x, c2.x, r1.z, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r0.y -mov.f32f32 r0.y, r2.x -add.f r1.x, r1.x, c3.x -mov.f32f32 r2.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r1.y -mov.f32f32 r0.x, r1.x -mov.f32f32 r1.x, r2.x -(rpt2)nop -mov.f32f32 r1.y, r1.x -mov.f32f32 r1.x, r2.y -end nop +mov.f32f32 r1.y, r0.x +mad.f32 r0.x, c1.x, r0.x, r0.w +(rpt1)nop +mad.f32 r0.y, c1.w, r1.y, r0.y +mad.f32 r0.z, c1.z, r1.y, r0.z +mad.f32 r0.y, c2.w, r1.z, r0.y +mad.f32 r0.z, c2.z, r1.z, r0.z +mad.f32 r2.x, c1.y, r1.y, r2.x +mad.f32 r0.x, c2.x, r1.z, r0.x +add.f r0.w, r0.y, c3.w +add.f r0.z, r0.z, c3.z +mad.f32 r0.y, c2.y, r1.z, r2.x +add.f r0.x, r0.x, c3.x +(rpt1)nop +add.f r0.y, r0.y, c3.y +end ; VERT: outputs: r0.x (0:0) r1.x (5:20) ; VERT: inputs: r1.x (0:0,cm=7,il=8,b=0) -; VERT: 45 instructions, 0 half, 3 full +; VERT: 26 instructions, 0 half, 3 full diff --git a/reference/webgl-water/webgl-water-40.asm b/reference/webgl-water/webgl-water-40.asm index 1a0f09e..323714f 100644 --- a/reference/webgl-water/webgl-water-40.asm +++ b/reference/webgl-water/webgl-water-40.asm @@ -6,2331 +6,1360 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c7.x) 0x3f000000, 0x3ba3d70a, 0x3f800000, 0x40000000 +@const(c8.x) 0x3f1013a9, 0x00000000, 0x3f400d1b, 0x3e800000 +@const(c9.x) 0x40400000, 0x40800000, 0x49742400, 0x3f666666 +@const(c10.x) 0x00000000, 0x3f800000, 0x3ec00000, 0xbf800000 +@const(c11.x) 0x3f7fbe77, 0xc39044fe, 0xbe2ab368, 0x41200000 +@const(c12.x) 0x3e2ab368, 0x459c4000, 0x3e800000, 0x3f800000 +@const(c13.x) 0x41200000, 0x41000000, 0x40c00000, 0x3e800000 +@const(c14.x) 0x3e800000, 0x3f800000, 0x3fa00000, 0x00000000 +@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x absneg.f r0.w, (neg)c0.y absneg.f r1.x, (neg)c0.y absneg.f r1.y, (neg)c0.y mad.f32 r1.z, c7.x, r0.z, c7.x -mul.f r1.w, r0.w, r0.w -mul.f r2.x, r1.x, r1.x -mul.f r2.y, r1.y, r1.y -mov.f32f32 r2.z, r1.z -add.f r1.w, c7.z, (neg)r1.w -add.f r2.x, c7.z, (neg)r2.x +bary.f r2.x, 2, r0.x +mul.f r2.y, r0.w, r0.w +mul.f r2.z, r1.x, r1.x +mul.f r2.w, r1.y, r1.y +mad.f32 r1.w, c7.x, r2.x, c7.x add.f r2.y, c7.z, (neg)r2.y -mov.f32f32 r2.z, r2.z -bary.f r3.x, 2, r0.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, c7.x, r3.x, c7.x -mul.f r1.w, r1.w, c8.x -mul.f r2.x, r2.x, c8.x +add.f r2.z, c7.z, (neg)r2.z +add.f r2.w, c7.z, (neg)r2.w +absneg.f r3.x, (neg)c0.y mul.f r2.y, r2.y, c8.x -mov.f32f32 r2.w, r3.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.w, r2.w -add.f r1.w, c7.z, (neg)r1.w -add.f r2.x, c7.z, (neg)r2.x +mul.f r2.z, r2.z, c8.x +sam (f32)(zw)r3.y, r1.z, s#2, t#2 +(sy)mad.f32 r3.y, c7.y, r3.w, r1.z +mad.f32 r3.z, c7.y, r4.x, r1.w +(ss)add.f r1.z, c7.z, (neg)r2.y +add.f r1.w, c7.z, (neg)r2.z +mul.f r2.y, r2.w, c8.x +mul.f r2.z, r3.x, r3.x +mov.f32f32 r2.w, r1.z +mov.f32f32 r3.w, r1.w +sam (f32)(zw)r4.x, r3.y, s#2, t#2 +(ss)mov.f32f32 r3.y, r3.y add.f r2.y, c7.z, (neg)r2.y -absneg.f r3.z, (neg)c0.y -absneg.f r3.w, (neg)c0.y -absneg.f r4.x, (neg)c0.y -sam (f32)(zw)r4.y, r2.z, s#2, t#2 -(sy)mad.f32 r1.z, c7.y, r4.w, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.z, r1.z -(ss)mov.f32f32 r2.z, r1.w -mov.f32f32 r2.w, r2.x -mov.f32f32 r4.y, r2.y -mov.f32f32 r4.z, r1.z -mul.f r4.w, r3.z, r3.z -mul.f r5.y, r3.w, r3.w -mul.f r5.z, r4.x, r4.x -mov.f32f32 r5.w, r4.z -mad.f32 r3.y, c7.y, r5.x, r3.y -sqrt r2.z, r2.z -(ss)mov.f32f32 r2.z, r2.z -mov.f32f32 r4.z, r0.w +(sy)mad.f32 r4.x, c7.y, r4.z, r3.y +mov.f32f32 r3.y, r3.z +add.f r2.z, c7.z, (neg)r2.z +mad.f32 r4.y, c7.y, r4.w, r3.y sqrt r2.w, r2.w -(ss)mov.f32f32 r2.w, r2.w -mov.f32f32 r3.y, r3.y -mov.f32f32 r5.x, r1.x -sqrt r4.y, r4.y -(ss)mov.f32f32 r4.y, r4.y -mad.f32 r2.z, c8.z, r4.z, r2.z -mov.f32f32 r4.z, r3.y -mad.f32 r2.w, c8.z, r5.x, r2.w -mov.f32f32 r5.x, r1.y -add.f r4.w, c7.z, (neg)r4.w -mov.f32f32 r6.x, r4.z -mov.f32f32 r2.z, r2.z +(ss)mad.f32 r2.w, c8.z, r0.w, r2.w +sqrt r3.y, r3.w +(ss)mad.f32 r3.y, c8.z, r1.x, r3.y +mov.f32f32 r3.z, r2.y +mul.f r2.z, r2.z, c8.x +(ss)mul.f r3.w, r2.w, c10.x +absneg.f r4.z, (neg)c0.z +sam (f32)(zw)r4.w, r4.x, s#2, t#2 +(ss)mov.f32f32 r4.x, r4.x mov.f32f32 r2.w, r2.w -mad.f32 r4.y, c8.z, r5.x, r4.y -mov.f32f32 r4.z, r4.w -add.f r4.w, c7.z, (neg)r5.y -add.f r5.x, c7.z, (neg)r5.z -sam (f32)(zw)r5.y, r5.w, s#2, t#2 -(sy)mad.f32 r1.z, c7.y, r5.w, r1.z -mul.f r5.y, r2.z, c10.x -mul.f r5.z, r2.z, c10.y -mul.f r2.z, r2.z, c10.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r5.y, r5.y -(ss)absneg.f r5.w, (neg)c0.z -mov.f32f32 r5.z, r5.z -mov.f32f32 r6.y, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.z, r2.z -absneg.f r6.z, (neg)c0.x -mov.f32f32 r6.w, r6.y -mad.f32 r3.y, c7.y, r6.x, r3.y -mov.f32f32 r5.w, r5.w -mad.f32 r0.w, c8.z, r0.w, (neg)r5.z +(sy)mad.f32 r4.w, c7.y, r5.y, r4.x +mov.f32f32 r4.x, r4.y +mov.f32f32 r4.y, r3.y +mad.f32 r5.x, c7.y, r5.z, r4.x +mul.f r4.x, r2.w, c10.y +mul.f r2.w, r2.w, c10.x +absneg.f r5.y, (neg)c0.x +mad.f32 r0.w, c8.z, r0.w, (neg)r4.x +cmps.f.lt r1.z, r1.z, c8.y +mul.f r4.x, r4.y, c10.y +sam (f32)(zw)r5.z, r4.w, s#2, t#2 +(ss)mov.f32f32 r4.w, r4.w +mad.f32 r2.w, c8.z, r5.y, (neg)r2.w +(sy)mad.f32 r5.y, c7.y, r6.x, r4.w +mov.f32f32 r4.w, r5.x +cov.u32f32 r1.z, r1.z +mad.f32 r5.z, c7.y, r6.y, r4.w +mad.f32 r1.x, c8.z, r1.x, (neg)r4.x cmps.f.lt r1.w, r1.w, c8.y -mov.f32f32 r3.y, r3.y -mad.f32 r5.y, c8.z, r5.w, (neg)r5.y -mov.f32f32 r5.z, r6.z +mul.f r4.x, r4.y, c10.x +cmps.f.ne r1.z, r1.z, c8.y +mov.f32f32 r4.y, c8.y +mov.f32f32 r4.w, c8.y +sam (f32)(zw)r5.w, r5.y, s#2, t#2 +mov.f32f32 r5.x, r5.y cov.u32f32 r1.w, r1.w -mov.f32f32 r5.w, r3.y -mul.f r6.x, r2.w, c10.x -mul.f r6.y, r2.w, c10.y -mul.f r2.w, r2.w, c10.x -mov.f32f32 r7.x, r5.w +(sy)mad.f32 r5.x, c7.y, r6.y, r5.x +(ss)mov.f32f32 r5.y, r5.z +sel.b32 r0.w, r4.y, r1.z, r0.w +mad.f32 r5.y, c7.y, r6.z, r5.y +sel.b32 r2.w, r4.w, r1.z, r2.w cmps.f.ne r1.w, r1.w, c8.y -mov.f32f32 r5.w, c8.y -mov.f32f32 r6.z, c8.y -mad.f32 r2.z, c8.z, r5.z, (neg)r2.z -mov.f32f32 r5.z, c8.y -mov.f32f32 r6.x, r6.x -sam (f32)(zw)r6.w, r6.w, s#2, t#2 -(sy)mad.f32 r1.z, c7.y, r7.y, r1.z -sel.b32 r5.y, r5.w, r1.w, r5.y -sel.b32 r0.w, r6.z, r1.w, r0.w -sel.b32 r1.w, r5.z, r1.w, r2.z -mov.f32f32 r1.z, r1.z -absneg.f r2.z, (neg)r5.y +mov.f32f32 r4.y, c8.y absneg.f r0.w, (neg)r0.w -absneg.f r1.w, (neg)r1.w -mov.f32f32 r5.y, r1.z -mov.f32f32 r5.z, r2.z -mov.f32f32 r5.w, r2.z -mov.f32f32 r6.z, r0.w -(ss)mov.f32f32 r6.w, r5.y -mad.f32 r3.y, c7.y, r7.z, r3.y -mov.f32f32 r5.y, r0.w -mov.f32f32 r7.x, r1.w -mov.f32f32 r7.y, r1.w -mov.f32f32 r3.y, r3.y -rcp r5.z, r5.z +absneg.f r2.w, (neg)r2.w +absneg.f r4.w, (neg)c0.x +sam (f32)(zw)r5.x, r5.x, s#2, t#2 +(sy)(ss)mul.f r5.x, r5.z, r5.z +sel.b32 r1.x, r4.y, r1.w, r1.x +mad.f32 r4.y, r5.w, r5.w, r5.x +mad.f32 r4.x, c8.z, r4.w, (neg)r4.x +mov.f32f32 r4.w, c8.y +absneg.f r1.x, (neg)r1.x +add.f r4.y, c7.z, (neg)r4.y +rcp r5.x, r0.w nop -rcp r5.w, r5.w +rcp r5.y, r0.w nop -rcp r6.z, r6.z -mov.f32f32 r7.z, r0.w -mov.f32f32 r7.w, r3.y -rcp r5.y, r5.y +rcp r6.x, r2.w nop -rcp r8.x, r7.x +rcp r6.y, r2.w nop -rcp r7.y, r7.y -absneg.f r8.y, (neg)c0.z -(ss)mov.f32f32 r7.x, r7.w -mov.f32f32 r6.y, r6.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.w, r2.w -rcp r7.z, r7.z -mov.f32f32 r7.w, r8.y -absneg.f r8.y, (neg)c0.x -mad.f32 r1.x, c8.z, r1.x, (neg)r6.y -sam (f32)(zw)r8.z, r6.w, s#2, t#2 -(sy)mad.f32 r1.z, c7.y, r9.x, r1.z -mad.f32 r3.y, c7.y, r9.y, r3.y -mad.f32 r6.x, c8.z, r7.w, (neg)r6.x -cmps.f.lt r2.x, r2.x, c8.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.y, r8.y -cov.u32f32 r2.x, r2.x -(ss)mov.f32f32 r6.w, r1.z -mov.f32f32 r7.x, r3.y -mad.f32 r2.w, c8.z, r6.y, (neg)r2.w -cmps.f.ne r2.x, r2.x, c8.y -mov.f32f32 r8.y, r6.w -mov.f32f32 r8.z, r7.x -mov.f32f32 r6.y, c8.y -mov.f32f32 r6.w, c8.y -mov.f32f32 r7.x, c8.y -mov.f32f32 r4.y, r4.y -mul.f r4.z, r4.z, c8.x -mov.f32f32 r4.w, r4.w -sam (f32)(zw)r8.y, r8.y, s#2, t#2 -(sy)mad.f32 r1.z, c7.y, r8.w, r1.z -mad.f32 r3.y, c7.y, r9.x, r3.y -sel.b32 r6.x, r6.y, r2.x, r6.x -sel.b32 r1.x, r6.w, r2.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y -absneg.f r6.x, (neg)r6.x -absneg.f r1.x, (neg)r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r6.y, r6.x -mov.f32f32 r6.w, r6.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r7.w, r1.x -(ss)mov.f32f32 r8.y, r1.x -mov.f32f32 r8.z, r1.z -mov.f32f32 r8.w, r3.y -rcp r1.z, r6.y +rcp r6.z, r0.w +sel.b32 r4.x, r4.w, r1.w, r4.x +rcp r4.w, r1.x nop -rcp r3.y, r6.w -sel.b32 r2.x, r7.x, r2.x, r2.w -mov.f32f32 r2.w, r1.x -(ss)rcp r6.y, r7.w -mul.f r6.w, r4.y, c10.x -mul.f r7.x, r4.y, c10.y -(ss)rcp r7.w, r8.y -absneg.f r2.x, (neg)r2.x -sam (f32)(zw)r8.y, r8.z, s#2, t#2 -(sy)(ss)mul.f r8.y, r8.w, r8.w -mov.f32f32 r8.z, r9.x -mad.f32 r8.y, r9.x, r9.x, r8.y -mov.f32f32 r8.w, r8.w -add.f r9.x, r0.z, (neg)c6.x -mov.f32f32 r9.y, r2.x -add.f r8.y, c7.z, (neg)r8.y -mov.f32f32 r9.z, r8.z -mov.f32f32 r9.w, r8.w -mul.f r10.x, r9.x, r9.x -mov.f32f32 r8.y, r8.y -bary.f (ei)r0.x, 1, r0.x -rcp r0.y, r9.y -(ss)mov.f32f32 r9.y, r2.x -rcp r2.w, r2.w -mov.f32f32 r6.w, r6.w -absneg.f r10.y, (neg)c0.z -add.f r10.z, r0.x, (neg)c6.y +rcp r6.w, r1.x +nop +sqrt r4.y, r4.y +(ss)mov.f32f32 r7.x, r4.y +add.f r7.y, r0.z, (neg)c6.x +absneg.f r4.x, (neg)r4.x +rcp r7.z, r1.x +nop +sqrt r3.z, r3.z mov.f32f32 r7.x, r7.x -sqrt r8.y, r8.y -(ss)mov.f32f32 r8.y, r8.y -mov.f32f32 r10.y, r10.y -mov.f32f32 r1.y, r1.y -mul.f r4.y, r4.y, c10.x -mov.f32f32 r8.y, r8.y -mad.f32 r10.x, r10.z, r10.z, r10.x -rcp r9.y, r9.y -mad.f32 r6.w, c8.z, r10.y, (neg)r6.w +mov.f32f32 r7.w, r5.z +mul.f r8.x, r7.y, r7.y +bary.f (ei)r0.x, 1, r0.x +(ss)mad.f32 r0.y, c8.z, r1.y, r3.z +add.f r2.z, c7.z, (neg)r2.z +(ss)mad.f32 r3.z, c8.z, r4.z, (neg)r3.w +add.f r3.w, r0.x, (neg)c6.y +rcp r4.z, r4.x +nop +rcp r8.y, r4.x +mov.f32f32 r8.z, r0.y +mov.f32f32 r8.w, r2.z +mad.f32 r8.x, r3.w, r3.w, r8.x +add.f r9.x, r2.x, (neg)c6.z +mul.f r9.y, r8.z, c10.y +mul.f r8.z, r8.z, c10.x +absneg.f r9.z, (neg)c0.x +mad.f32 r8.x, r9.x, r9.x, r8.x +mad.f32 r1.y, c8.z, r1.y, (neg)r9.y cmps.f.lt r2.y, r2.y, c8.y -mov.f32f32 r10.y, r8.y -mov.f32f32 r10.x, r10.x -add.f r10.w, r3.x, (neg)c6.z +mad.f32 r8.z, c8.z, r9.z, (neg)r8.z +sqrt r8.w, r8.w +(ss)mad.f32 r8.w, c8.z, r3.x, r8.w +mov.f32f32 r9.y, c8.y cov.u32f32 r2.y, r2.y -mad.f32 r1.y, c8.z, r1.y, (neg)r7.x -mov.f32f32 r4.y, r4.y -mad.f32 r7.x, r10.w, r10.w, r10.x +rsq r8.x, r8.x +(ss)mov.f32f32 r9.z, r8.x +mov.f32f32 r9.w, r8.w +sel.b32 r1.z, r9.y, r1.z, r3.z +mul.f r3.y, r3.y, c10.x +mul.f r3.z, r7.y, r9.z cmps.f.ne r2.y, r2.y, c8.y -mov.f32f32 r10.x, c8.y -mov.f32f32 r11.x, c8.y -absneg.f r11.y, (neg)c0.x -mov.f32f32 r4.z, r4.z -mul.f r4.w, r4.w, c8.x -rsq r7.x, r7.x -(ss)mov.f32f32 r7.x, r7.x -sel.b32 r6.w, r10.x, r2.y, r6.w -sel.b32 r1.y, r11.x, r2.y, r1.y -mov.f32f32 r10.x, r11.y -mul.f r9.x, r9.x, r7.x -absneg.f r6.w, (neg)r6.w +mov.f32f32 r7.y, c8.y +mov.f32f32 r9.y, c8.y +mov.f32f32 r10.x, r3.z +mul.f r3.z, r5.z, (neg)r3.z +mul.f r3.w, r3.w, r9.z +sel.b32 r1.y, r7.y, r2.y, r1.y +mul.f r5.z, r7.w, r10.x +mul.f r7.y, r7.w, r10.x +mov.f32f32 r9.z, r3.w +mad.f32 r3.z, r4.y, (neg)r3.w, r3.z +mul.f r3.w, r9.x, r8.x absneg.f r1.y, (neg)r1.y +mad.f32 r4.y, r7.x, r9.z, r5.z +mov.f32f32 r5.z, r5.w +(ss)mov.f32f32 r8.x, r3.w +mad.f32 r7.y, r7.x, r9.z, r7.y +mad.f32 r3.z, r5.w, (neg)r3.w, r3.z +sel.b32 r3.w, r9.y, r2.y, r8.z +mad.f32 r4.y, r5.z, r8.x, r4.y +mad.f32 r5.w, r5.z, r8.x, r7.y +add.f r3.z, c7.z, (neg)r3.z +rcp r7.y, r1.y +nop +rcp r8.z, r1.y +mul.f r9.x, r4.y, r4.y +mul.f r9.y, r5.w, r7.w +mul.f r10.y, r5.w, r5.z +mul.f r5.w, r5.w, r7.x +add.f r9.x, c7.z, (neg)r9.x +mul.f r9.y, c7.w, r9.y +mul.f r10.y, c7.w, r10.y +mul.f r5.w, c7.w, r5.w +mul.f r9.x, r9.x, c8.x +add.f r9.y, r10.x, (neg)r9.y +add.f r10.y, r8.x, (neg)r10.y +add.f r5.w, r9.z, (neg)r5.w +add.f r9.x, c7.z, (neg)r9.x +mov.f32f32 r10.z, r9.y +add.f r10.w, r0.z, (neg)c1.x +mov.f32f32 r11.x, r10.y +mov.f32f32 r11.y, r9.x +mov.f32f32 r11.z, r10.z +mul.f r11.w, r10.w, r10.z +add.f r12.x, r0.x, (neg)c1.y +mov.f32f32 r12.y, r10.z +mul.f r12.z, c0.x, r11.z +mov.f32f32 r12.w, r5.w +sqrt r11.y, r11.y +(ss)mad.f32 r4.y, c8.z, r4.y, r11.y +mad.f32 r5.w, r12.x, r5.w, r11.w +(ss)add.f r11.y, r2.x, (neg)c1.z +rcp r11.w, r11.z +absneg.f r13.x, (neg)r0.z +mov.f32f32 r13.y, r4.y +rcp r13.z, r12.y +absneg.f r13.w, (neg)r0.z +mad.f32 r5.w, r11.y, r10.y, r5.w +add.f r10.y, c7.z, r13.x +mul.f r7.x, r13.y, r7.x +add.f r14.x, c7.z, r13.w +mad.f32 r7.x, c8.z, r9.z, (neg)r7.x +cmps.f.lt r9.x, r9.x, c8.y +mul.f r5.w, c7.w, r5.w +(ss)mul.f r9.z, r10.y, r11.w +rcp r10.y, r10.z +add.f r11.w, c10.w, r13.x +cov.u32f32 r9.x, r9.x +mov.f32f32 r13.x, r5.w +mul.f r14.y, r10.z, r10.z +(ss)mul.f r10.y, r11.w, r10.y +cmps.f.ne r9.x, r9.x, c8.y +mov.f32f32 r11.w, c8.y +mad.f32 r14.y, r12.w, r12.w, r14.y +max.f r9.z, r10.y, r9.z +mad.f32 r10.y, r11.x, r11.x, r14.y +sel.b32 r7.x, r11.w, r9.x, r7.x +mul.f r4.y, r4.y, r7.w +mov.f32f32 r7.w, r12.w mad.f32 r4.y, c8.z, r10.x, (neg)r4.y -mov.f32f32 r9.x, r9.x -mov.f32f32 r10.x, r6.w -mov.f32f32 r11.x, r6.w -mov.f32f32 r11.y, r1.y -mul.f r11.z, r8.w, r9.x -mul.f r10.z, r10.z, r7.x -mul.f r11.w, r8.w, r9.x -mul.f r12.x, r8.w, (neg)r9.x -rcp r10.x, r10.x -mov.f32f32 r12.y, r1.y -mov.f32f32 r10.z, r10.z -rcp r11.x, r11.x +mov.f32f32 r10.x, r7.x +mov.f32f32 r11.w, c8.y +mul.f r14.y, c9.y, r10.y +mul.f r10.w, r10.w, r10.w +mov.f32f32 r14.z, r7.x +sel.b32 r4.y, r11.w, r9.x, r4.y +mad.f32 r10.w, r12.x, r12.x, r10.w +rcp r11.w, r10.x +absneg.f r12.x, (neg)r0.x +add.f r14.w, r0.z, (neg)c1.x +mad.f32 r10.w, r11.y, r11.y, r10.w +mov.f32f32 r11.y, c2.x +add.f r15.x, c10.w, r12.x +mul.f r15.y, r14.w, r4.y +add.f r15.z, r0.x, (neg)c1.y +mul.f r11.y, r11.y, c2.x +(ss)mul.f r11.w, r15.x, r11.w +rcp r15.x, r10.x +add.f r12.x, c7.w, r12.x +mad.f32 r15.y, r15.z, r7.x, r15.y +mul.f r5.z, r13.y, r5.z +add.f r10.w, r10.w, (neg)r11.y +(ss)mul.f r11.y, r12.x, r15.x +mad.f32 r5.z, c8.z, r8.x, (neg)r5.z +mov.f32f32 r8.x, c8.y +mul.f r10.w, r14.y, r10.w +max.f r11.y, r11.w, r11.y +mov.f32f32 r11.w, r4.y +sel.b32 r5.z, r8.x, r9.x, r5.z +add.f r8.x, r2.x, (neg)c1.z +mad.f32 r9.x, r13.x, r13.x, (neg)r10.w +rcp r10.w, r7.w nop -rcp r11.y, r11.y -mov.f32f32 r12.z, c8.y -mov.f32f32 r12.w, r1.y -mad.f32 r11.z, r8.y, r10.z, r11.z -mad.f32 r11.w, r8.y, r10.z, r11.w -mad.f32 r12.x, r8.y, (neg)r10.z, r12.x -rcp r12.y, r12.y -sel.b32 r2.y, r12.z, r2.y, r4.y -mov.f32f32 r4.y, r11.z -mul.f r7.x, r10.w, r7.x -mov.f32f32 r10.w, r11.w -mov.f32f32 r11.z, r12.x -absneg.f r2.y, (neg)r2.y -mov.f32f32 r7.x, r7.x -rcp r11.w, r12.w -add.f r4.z, c7.z, (neg)r4.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r12.x, r2.y -mad.f32 r4.y, r8.z, r7.x, r4.y -mad.f32 r10.w, r8.z, r7.x, r10.w -mad.f32 r11.z, r8.z, (neg)r7.x, r11.z -mov.f32f32 r12.z, r2.y -(ss)mul.f r12.w, r4.y, r4.y -mul.f r8.w, r10.w, r8.w -mul.f r8.z, r10.w, r8.z -mul.f r8.y, r10.w, r8.y -add.f r10.w, c7.z, (neg)r12.w -mov.f32f32 r8.w, r8.w -mov.f32f32 r8.z, r8.z -mov.f32f32 r8.y, r8.y -mov.f32f32 r10.w, r10.w -mul.f r8.w, c7.w, r8.w -mul.f r8.z, c7.w, r8.z -mul.f r8.y, c7.w, r8.y -mul.f r10.w, r10.w, c8.x -mov.f32f32 r8.w, r8.w -mov.f32f32 r8.z, r8.z -mov.f32f32 r8.y, r8.y -mov.f32f32 r10.w, r10.w -add.f r8.w, r9.x, (neg)r8.w -add.f r8.z, r7.x, (neg)r8.z -add.f r8.y, r10.z, (neg)r8.y -add.f r10.w, c7.z, (neg)r10.w -mov.f32f32 r8.w, r8.w -mov.f32f32 r8.z, r8.z -mov.f32f32 r8.y, r8.y -mov.f32f32 r10.w, r10.w -mul.f r12.w, r8.w, r8.w -mov.f32f32 r13.x, r8.w -mad.f32 r12.w, r8.y, r8.y, r12.w -mov.f32f32 r13.y, r10.w -mov.f32f32 r13.z, r8.w -mov.f32f32 r13.w, r13.x -mov.f32f32 r12.w, r12.w -mov.f32f32 r14.x, r13.x -mov.f32f32 r14.y, r13.z -mad.f32 r12.w, r8.z, r8.z, r12.w -sqrt r13.y, r13.y -(ss)mov.f32f32 r13.y, r13.y -mov.f32f32 r4.y, r4.y -rcp r13.w, r13.w -absneg.f r14.z, (neg)r0.z -mul.f r14.w, c9.y, r12.w -add.f r15.x, r0.z, (neg)c1.x -mad.f32 r4.y, c8.z, r4.y, r13.y -add.f r13.y, c10.w, r14.z -rcp r14.y, r14.y -absneg.f r15.y, (neg)r0.z -mul.f r15.z, r15.x, r15.x -mov.f32f32 r4.y, r4.y -add.f r15.w, r0.x, (neg)c1.y -(ss)mul.f r13.y, r13.y, r13.w -(ss)add.f r13.w, c10.w, r15.y -mul.f r9.w, r4.y, r9.w -mad.f32 r15.z, r15.w, r15.w, r15.z -mov.f32f32 r13.y, r13.y -mov.f32f32 r16.x, r13.x -mov.f32f32 r9.w, r9.w -mov.f32f32 r9.x, r9.x -mov.f32f32 r15.z, r15.z -add.f r16.y, r3.x, (neg)c1.z -mul.f r13.w, r13.w, r14.y -mad.f32 r9.x, c8.z, r9.x, (neg)r9.w -cmps.f.lt r9.w, r10.w, c8.y -mad.f32 r10.w, r16.y, r16.y, r15.z -rcp r14.y, r16.x -add.f r14.z, c7.z, r14.z -mov.f32f32 r13.w, r13.w -cov.u32f32 r9.w, r9.w -mov.f32f32 r10.w, r10.w -mov.f32f32 r15.z, c2.x -mov.f32f32 r14.z, r14.z -cmps.f.ne r9.w, r9.w, c8.y -(ss)mov.f32f32 r16.x, c8.y -mul.f r15.z, r15.z, c2.x -(ss)mul.f r14.y, r14.z, r14.y -mov.f32f32 r14.z, r13.z -sel.b32 r9.x, r16.x, r9.w, r9.x -add.f r10.w, r10.w, (neg)r15.z -mov.f32f32 r14.y, r14.y -mul.f r9.z, r4.y, r9.z -mul.f r15.z, r9.x, r9.x -mul.f r4.y, r4.y, r10.y -mov.f32f32 r10.y, r9.x -mov.f32f32 r10.w, r10.w -mov.f32f32 r16.x, r9.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r10.z, r10.z -mov.f32f32 r16.z, r10.y -mul.f r10.w, r14.w, r10.w -mov.f32f32 r14.w, r16.x -mad.f32 r4.y, c8.z, r10.z, (neg)r4.y -mov.f32f32 r10.z, c8.y -mov.f32f32 r10.w, r10.w -mul.f r15.x, r15.x, r8.w -rcp r16.z, r16.z -absneg.f r16.w, (neg)r0.z -sel.b32 r4.y, r10.z, r9.w, r4.y -mad.f32 r10.z, r15.w, r8.y, r15.x -rcp r14.w, r14.w +rcp r12.x, r14.z +absneg.f r13.x, (neg)r0.x +absneg.f r13.y, (neg)r0.x +rcp r14.y, r11.w absneg.f r15.x, (neg)r0.z -add.f r15.w, c10.w, r16.w -mad.f32 r15.z, r4.y, r4.y, r15.z -mov.f32f32 r10.z, r10.z -add.f r17.x, c10.w, r15.x -(ss)mul.f r15.w, r15.w, r16.z -mov.f32f32 r15.z, r15.z -mov.f32f32 r9.z, r9.z -mov.f32f32 r7.x, r7.x -mov.f32f32 r15.w, r15.w -(ss)mov.f32f32 r16.z, r10.y -mad.f32 r10.z, r16.y, r8.z, r10.z -mad.f32 r7.x, c8.z, r7.x, (neg)r9.z -mov.f32f32 r9.z, c8.y +mad.f32 r15.y, r8.x, r5.z, r15.y +add.f r15.w, c10.w, r13.x +add.f r16.x, c10.w, r13.y +add.f r16.y, c7.z, r15.x +mul.f r15.y, c7.w, r15.y +sqrt r16.z, r9.x +(ss)add.f r5.w, (neg)r5.w, (neg)r16.z +mul.f r10.y, c7.w, r10.y +mul.f r14.y, r16.y, r14.y +rcp r16.y, r4.y +add.f r15.x, c10.w, r15.x +mov.f32f32 r16.z, r15.y +mul.f r16.w, r4.y, r4.y +mul.f r10.w, r15.w, r10.w +(ss)mul.f r15.x, r15.x, r16.y +mad.f32 r15.w, r7.x, r7.x, r16.w +rcp r10.y, r10.y +(ss)mul.f r5.w, r5.w, r10.y +(ss)mad.f32 r10.y, r5.z, r5.z, r15.w +max.f r14.y, r15.x, r14.y +rcp r15.x, r7.w +mul.f r12.x, r16.x, r12.x +rcp r14.z, r14.z +cmps.f.lt r15.w, c8.y, r5.w +mul.f r16.x, c9.y, r10.y +min.f r11.y, r14.y, r11.y +mov.f32f32 r14.y, r5.z +mul.f r14.w, r14.w, r14.w +cov.u32f32 r15.w, r15.w +mad.f32 r14.w, r15.z, r15.z, r14.w +add.f r13.x, c7.w, r13.x +add.f r13.y, c7.w, r13.y +mad.f32 r8.x, r8.x, r8.x, r14.w +rcp r14.w, r14.y +absneg.f r15.z, (neg)r2.x +mov.f32f32 r16.y, c2.x +cmps.f.ne r15.w, r15.w, c8.y +mov.f32f32 r16.w, c8.y +add.f r17.x, c7.z, r15.z +mul.f r16.y, r16.y, c2.x +mov.f32f32 r17.y, c7.z +(ss)mul.f r13.x, r13.x, r15.x mul.f r14.w, r17.x, r14.w -mul.f r10.z, c7.w, r10.z -max.f r13.y, r13.y, r14.y -sel.b32 r7.x, r9.z, r9.w, r7.x -rcp r9.z, r16.z -add.f r9.w, c7.z, r16.w -mov.f32f32 r10.z, r10.z -mov.f32f32 r14.y, r14.w -mad.f32 r14.w, r7.x, r7.x, r15.z -mov.f32f32 r9.w, r9.w -mad.f32 r10.w, r10.z, r10.z, (neg)r10.w -mov.f32f32 r15.z, r16.x -mul.f r16.y, c9.y, r14.w -(ss)add.f r16.z, r0.z, (neg)c1.x -(ss)mul.f r9.z, r9.w, r9.z -mov.f32f32 r9.w, r10.w -mov.f32f32 r10.w, r13.y -mul.f r13.y, r16.z, r16.z -add.f r16.w, r0.x, (neg)c1.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r17.x, r9.w -rcp r15.z, r15.z -add.f r15.x, c7.z, r15.x -mad.f32 r13.y, r16.w, r16.w, r13.y -max.f r9.z, r15.w, r9.z -mov.f32f32 r15.w, r8.y -mov.f32f32 r15.x, r15.x -mov.f32f32 r13.y, r13.y -add.f r17.y, r3.x, (neg)c1.z -mov.f32f32 r9.z, r9.z -mov.f32f32 r17.z, r4.y -sqrt r17.x, r17.x -(ss)mov.f32f32 r17.x, r17.x -mad.f32 r13.y, r17.y, r17.y, r13.y -mov.f32f32 r10.z, r10.z -mov.f32f32 r17.w, r17.z -mul.f r15.x, r15.x, r15.z -mov.f32f32 r13.y, r13.y -mov.f32f32 r15.z, c2.x -add.f r10.z, (neg)r10.z, (neg)r17.x -mov.f32f32 r15.x, r15.x -mov.f32f32 r17.x, r15.w -mul.f r15.z, r15.z, c2.x -rcp r17.w, r17.w -absneg.f r18.x, (neg)r0.x -mov.f32f32 r10.z, r10.z -mov.f32f32 r12.w, r12.w -add.f r13.y, r13.y, (neg)r15.z -add.f r15.z, c10.w, r18.x -max.f r14.y, r14.y, r15.x -mul.f r12.w, c7.w, r12.w -mov.f32f32 r13.y, r13.y -(ss)mul.f r15.x, r15.z, r17.w -mov.f32f32 r14.y, r14.y -mov.f32f32 r15.z, r4.y -mul.f r13.y, r16.y, r13.y -mov.f32f32 r15.x, r15.x -mov.f32f32 r16.y, r17.z -mov.f32f32 r12.w, r12.w -mov.f32f32 r13.y, r13.y -mul.f r16.z, r16.z, r9.x -(ss)mov.f32f32 r17.w, r15.z -mad.f32 r16.z, r16.w, r4.y, r16.z -rcp r16.w, r17.x -(ss)absneg.f r17.x, (neg)r0.x -rcp r16.y, r16.y -add.f r18.x, c7.w, r18.x -rcp r12.w, r12.w -(ss)mov.f32f32 r12.w, r12.w -mov.f32f32 r16.z, r16.z -add.f r18.y, c10.w, r17.x -mad.f32 r16.z, r17.y, r7.x, r16.z -mov.f32f32 r17.y, r18.x -mul.f r10.z, r10.z, r12.w -rcp r12.w, r17.w -(ss)absneg.f r17.w, (neg)r0.x -mul.f r16.z, c7.w, r16.z -mul.f r16.y, r17.y, r16.y -mov.f32f32 r10.z, r10.z -add.f r17.y, c10.w, r17.w -mov.f32f32 r16.z, r16.z -mov.f32f32 r16.y, r16.y -cmps.f.lt r18.x, c8.y, r10.z -(ss)mul.f r12.w, r17.y, r12.w -mad.f32 r13.y, r16.z, r16.z, (neg)r13.y -max.f r15.x, r15.x, r16.y -cov.u32f32 r16.y, r18.x -mov.f32f32 r12.w, r12.w -mov.f32f32 r13.y, r13.y -mov.f32f32 r15.x, r15.x +rcp r15.x, r5.z +add.f r15.z, c10.w, r15.z +add.f r8.x, r8.x, (neg)r16.y +sel.b32 r16.y, r16.w, r15.w, r17.y +mov.f32f32 r9.x, r9.x +(ss)mul.f r15.x, r15.z, r15.x +mul.f r8.x, r16.x, r8.x +max.f r10.w, r10.w, r13.x +mul.f r13.x, r13.y, r14.z +max.f r13.y, r15.x, r14.w +mad.f32 r8.x, r16.z, r16.z, (neg)r8.x +(ss)cmps.f.lt r14.z, c8.y, r9.x +min.f r9.z, r9.z, r10.w +min.f r10.w, r11.y, r13.y +mov.f32f32 r11.y, r11.x +max.f r12.x, r12.x, r13.x +mov.f32f32 r13.x, r4.y +mov.f32f32 r13.y, r10.w +sqrt r14.w, r8.x +(ss)add.f r14.w, (neg)r15.y, (neg)r14.w +mul.f r10.y, c7.w, r10.y +cov.u32f32 r14.z, r14.z +mad.f32 r15.x, r11.w, r13.y, r0.z +rcp r15.y, r11.y +absneg.f r15.z, (neg)r2.x +rcp r16.x, r13.x +absneg.f r16.z, (neg)r0.z +cmps.f.ne r14.z, r14.z, c8.y +mov.f32f32 r16.w, r15.x +rcp r10.y, r10.y +(ss)mul.f r10.y, r14.w, r10.y +add.f r14.w, c7.z, r15.z +sel.b32 r16.y, r16.y, r14.z, r17.y +mov.f32f32 r17.x, r16.w +cmps.f.lt r17.y, c8.y, r10.y +mul.f r14.w, r14.w, r15.y +add.f r15.y, c7.z, r16.z +add.f r17.x, c7.z, (neg)r17.x +cov.u32f32 r17.y, r17.y cmps.f.ne r16.y, r16.y, c8.y -mov.f32f32 r17.y, c8.y -mov.f32f32 r18.x, r13.y -min.f r9.z, r9.z, r15.x -mov.f32f32 r15.x, r7.x -mov.f32f32 r18.z, c7.z -mov.f32f32 r18.w, r15.z -mul.f r16.w, r18.y, r16.w -mov.f32f32 r18.y, r15.x -sqrt r18.x, r18.x -(ss)mov.f32f32 r18.x, r18.x -mov.f32f32 r16.z, r16.z -sel.b32 r17.y, r17.y, r16.y, r18.z -cmps.f.lt r19.x, c8.y, r9.w -rcp r18.w, r18.w -add.f r17.w, c7.w, r17.w -add.f r16.z, (neg)r16.z, (neg)r18.x -rcp r18.x, r18.y -(ss)absneg.f r18.y, (neg)r3.x -cov.u32f32 r19.x, r19.x -mov.f32f32 r17.w, r17.w -mov.f32f32 r16.z, r16.z -mov.f32f32 r14.w, r14.w -add.f r19.y, c10.w, r18.y -cmps.f.ne r19.x, r19.x, c8.y -(ss)mul.f r17.w, r17.w, r18.w -mul.f r14.w, c7.w, r14.w -mul.f r18.x, r19.y, r18.x -sel.b32 r17.y, r17.y, r19.x, r18.z -mov.f32f32 r17.w, r17.w -mov.f32f32 r14.w, r14.w -mov.f32f32 r18.x, r18.x -mov.f32f32 r18.z, r15.x +mov.f32f32 r5.w, r5.w +mul.f r6.x, r17.x, r6.x +add.f r17.x, c10.w, (neg)r16.w cmps.f.ne r17.y, r17.y, c8.y -mov.f32f32 r18.w, r10.z -max.f r12.w, r12.w, r17.w -mov.f32f32 r16.w, r16.w -rcp r14.w, r14.w -(ss)mov.f32f32 r14.w, r14.w -mov.f32f32 r17.w, r18.w -rcp r18.z, r18.z -add.f r18.y, c7.z, r18.y -mov.f32f32 r12.w, r12.w -mul.f r14.w, r16.z, r14.w -mov.f32f32 r16.z, r17.w -mov.f32f32 r17.w, r18.y -min.f r12.w, r14.y, r12.w -mov.f32f32 r14.y, r14.w -sel.b32 r10.z, r16.z, r16.y, r10.z -(ss)mul.f r14.w, r17.w, r18.z -mov.f32f32 r16.y, r7.x -cmps.f.lt r16.z, c8.y, r14.y -sel.b32 r9.w, r10.z, r19.x, r9.w -mov.f32f32 r10.z, c9.z -mov.f32f32 r14.w, r14.w -cov.u32f32 r16.z, r16.z -mov.f32f32 r17.w, r16.y -sel.b32 r9.w, r10.z, r17.y, r9.w -max.f r10.z, r18.x, r14.w -cmps.f.ne r14.w, r16.z, c8.y -mov.f32f32 r16.z, c8.y -mov.f32f32 r17.y, c7.z -mov.f32f32 r10.z, r10.z -mov.f32f32 r18.x, r9.w -mov.f32f32 r8.w, r8.w -sel.b32 r16.z, r16.z, r14.w, r17.y -cmps.f.lt r18.y, c8.y, r13.y -min.f r9.z, r9.z, r10.z -mad.f32 r8.w, r8.w, r18.x, r0.z -rcp r10.z, r17.w -(ss)absneg.f r17.w, (neg)r3.x -cov.u32f32 r18.x, r18.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r18.y, r10.y -absneg.f r18.z, (abs)r8.w -cmps.f.ne r18.x, r18.x, c8.y -mov.f32f32 r18.w, c7.z -mad.f32 r18.y, r18.y, r9.z, r0.z -add.f r19.x, c10.w, r17.w -sel.b32 r16.z, r16.z, r18.x, r17.y -add.f r17.y, r18.w, c2.x -mov.f32f32 r18.y, r18.y -(ss)mul.f r10.z, r19.x, r10.z -cmps.f.ne r16.z, r16.z, c8.y -mov.f32f32 r18.w, r14.y -mov.f32f32 r19.x, r18.y -add.f r18.z, r17.y, (neg)r18.z -mov.f32f32 r10.z, r10.z -mov.f32f32 r18.w, r18.w -mov.f32f32 r19.y, r19.x -mov.f32f32 r18.z, r18.z -mov.f32f32 r19.z, c2.x -mov.f32f32 r18.w, r18.w -add.f r19.y, c10.w, (neg)r19.y -mov.f32f32 r19.w, r16.y -mov.f32f32 r20.x, r18.y -sel.b32 r14.y, r18.w, r14.w, r14.y -mul.f r7.y, r19.y, r7.y -rcp r14.w, r19.z -(ss)mul.f r18.z, r18.z, r14.w -add.f r18.w, r20.x, (neg)c1.x -sel.b32 r13.y, r14.y, r18.x, r13.y -mov.f32f32 r14.y, c9.z -mov.f32f32 r7.y, r7.y -mov.f32f32 r18.x, r19.x -mov.f32f32 r18.z, r18.z -sel.b32 r13.y, r14.y, r16.z, r13.y -rcp r14.y, r19.w -add.f r16.z, c7.z, r17.w -add.f r17.w, c7.z, (neg)r18.x -mul.f r18.x, r18.w, r18.w -mov.f32f32 r18.w, r13.y -mov.f32f32 r9.x, r9.x -mul.f r8.x, r17.w, r8.x -log2 r17.w, r18.z -(ss)mul.f r17.w, c9.x, r17.w -mov.f32f32 r16.z, r16.z -mad.f32 r9.x, r9.x, r18.w, r0.z +mov.f32f32 r17.z, c8.y +mov.f32f32 r17.w, c7.z +mul.f r6.y, r17.x, r6.y +sel.b32 r5.w, r5.w, r15.w, r5.w +rcp r15.w, r11.x +mul.f r15.y, r15.y, r16.x +sel.b32 r16.x, r17.z, r17.y, r17.w +max.f r17.x, r6.y, r6.x +mad.f32 r10.w, r7.x, r10.w, r0.x mov.f32f32 r8.x, r8.x -mov.f32f32 r17.w, r17.w -mul.f r14.y, r16.z, r14.y -absneg.f r16.z, (abs)r9.x -(ss)mov.f32f32 r18.z, c7.z -max.f r18.w, r7.y, r8.x -mov.f32f32 r14.y, r14.y -mov.f32f32 r19.x, r13.y -add.f r18.z, r18.z, c2.x -mov.f32f32 r18.w, r18.w -mov.f32f32 r19.y, r17.z -exp2 r17.w, r17.w +sel.b32 r5.w, r5.w, r14.z, r9.x +mov.f32f32 r9.x, c9.z +mov.f32f32 r14.z, r10.w +cmps.f.lt r17.z, c8.y, r8.x +add.f r15.z, c10.w, r15.z +rcp r18.x, r4.y +sel.b32 r5.w, r9.x, r16.y, r5.w +mov.f32f32 r9.x, r14.z +cov.u32f32 r16.y, r17.z +(ss)mul.f r15.z, r15.z, r15.w +add.f r15.w, c10.w, r16.z +add.f r16.z, c7.w, (neg)r9.x +cmps.f.ne r16.y, r16.y, c8.y +mad.f32 r9.y, r9.y, r5.w, r0.z +max.f r14.w, r15.z, r14.w +mul.f r5.x, r16.z, r5.x +add.f r15.z, c10.w, (neg)r14.z +sel.b32 r16.x, r16.x, r16.y, r17.w +absneg.f r16.z, (abs)r9.y +mov.f32f32 r17.z, c7.z +mul.f r5.y, r15.z, r5.y +cmps.f.ne r15.z, r16.x, c8.y +mov.f32f32 r10.y, r10.y +add.f r16.x, r17.z, c2.x +max.f r17.z, r5.y, r5.x +min.f r9.z, r9.z, r14.w +mul.f r14.w, r15.w, r18.x +sel.b32 r10.y, r10.y, r17.y, r10.y +min.f r15.w, r17.x, r17.z +mad.f32 r13.y, r14.y, r13.y, r2.x +add.f r16.z, r16.x, (neg)r16.z +sel.b32 r8.x, r10.y, r16.y, r8.x +mov.f32f32 r10.y, c2.x +mov.f32f32 r16.y, r13.y +mov.f32f32 r17.x, c9.z +mov.f32f32 r17.y, r9.z +max.f r14.w, r14.w, r15.y +mov.f32f32 r15.y, r16.y +sel.b32 r8.x, r17.x, r15.z, r8.x +rcp r10.y, r10.y +(ss)mul.f r15.z, r16.z, r10.y +mad.f32 r16.z, r11.z, r17.y, r0.z +add.f r15.y, c7.z, (neg)r15.y +absneg.f r1.z, (neg)r1.z +(ss)mad.f32 r4.y, r4.y, r8.x, r0.z +mov.f32f32 r17.x, r16.z +min.f r12.x, r14.w, r12.x +mov.f32f32 r14.w, r5.z +absneg.f r17.z, (abs)r4.y +log2 r15.z, r15.z +mov.f32f32 r17.w, c7.z +rcp r18.x, r1.z +(ss)mul.f r15.y, r15.y, r18.x +add.f r18.x, c10.w, (neg)r16.y +rcp r18.y, r1.z +(ss)mul.f r15.z, c9.x, r15.z +add.f r17.w, r17.w, c2.x +mov.f32f32 r18.z, r17.x +(ss)mul.f r18.x, r18.x, r18.y +rcp r18.y, r14.w +absneg.f r18.w, (neg)r2.x +add.f r17.z, r17.w, (neg)r17.z +mov.f32f32 r19.x, c2.x +max.f r19.y, r18.x, r15.y +exp2 r15.z, r15.z nop -(ss)rcp r17.w, r17.w -add.f r16.z, r18.z, (neg)r16.z -(ss)mov.f32f32 r17.w, r17.w -mad.f32 r19.y, r19.y, r9.z, r0.x -max.f r10.z, r10.z, r14.y -mov.f32f32 r14.y, r16.z -mov.f32f32 r16.z, c2.x -mov.f32f32 r19.y, r19.y -mul.f r17.w, c9.w, r17.w -mov.f32f32 r10.z, r10.z -mov.f32f32 r19.z, r4.y -mov.f32f32 r19.w, r19.y -mov.f32f32 r17.w, r17.w -rcp r16.z, r16.z -(ss)mul.f r14.y, r14.y, r16.z -min.f r10.z, r12.w, r10.z -mov.f32f32 r12.w, r19.w -add.f r17.w, c7.z, (neg)r17.w -mov.f32f32 r14.y, r14.y -mov.f32f32 r10.z, r10.z -mov.f32f32 r16.x, r16.x -add.f r12.w, c10.w, (neg)r12.w -mov.f32f32 r17.w, r17.w -mad.f32 r19.x, r19.z, r19.x, r0.x -mad.f32 r16.x, r16.x, r10.z, r0.z -log2 r14.y, r14.y -(ss)mul.f r14.y, c9.x, r14.y -mul.f r5.y, r12.w, r5.y -mul.f r12.w, c7.x, r17.w -mul.f r19.z, c7.x, r17.w -mov.f32f32 r14.y, r14.y +(ss)rcp r15.z, r15.z +(ss)mul.f r15.z, c9.w, r15.z +add.f r18.z, c7.z, (neg)r18.z +min.f r15.w, r15.w, r19.y +absneg.f r3.w, (neg)r3.w +add.f r19.y, c7.z, r18.w +rcp r19.x, r19.x +(ss)mul.f r17.z, r17.z, r19.x +mov.f32f32 r19.z, r15.w +mov.f32f32 r6.x, r6.x +mov.f32f32 r6.y, r6.y +add.f r15.z, c7.z, (neg)r15.z +rcp r19.w, r3.w +mul.f r18.y, r19.y, r18.y +(ss)mul.f r18.z, r18.z, r19.w +min.f r6.x, r6.y, r6.x +mov.f32f32 r5.x, r5.x mov.f32f32 r5.y, r5.y -mov.f32f32 r20.x, r19.w -mov.f32f32 r12.w, r12.w -mov.f32f32 r19.z, r19.z -mul.f r17.w, c7.x, r17.w -add.f r20.x, c7.w, (neg)r20.x -exp2 r14.y, r14.y +log2 r6.y, r17.z +(ss)mul.f r6.y, c9.x, r6.y +(ss)mov.f32f32 r17.z, r15.z +add.f r19.y, c10.w, (neg)r17.x +min.f r5.x, r5.y, r5.x +rcp r5.y, r3.w nop -(ss)rcp r14.y, r14.y -(ss)mov.f32f32 r14.y, r14.y -mov.f32f32 r20.y, r9.w -mov.f32f32 r20.z, r8.z -mov.f32f32 r17.w, r17.w -mul.f r14.y, c9.w, r14.y -mul.f r6.z, r20.x, r6.z -mad.f32 r20.x, r20.z, r20.y, r3.x -mov.f32f32 r16.x, r16.x -mov.f32f32 r14.y, r14.y -mov.f32f32 r6.z, r6.z -absneg.f r20.y, (abs)r20.x -mov.f32f32 r20.z, r16.x -add.f r14.y, c7.z, (neg)r14.y -max.f r20.w, r5.y, r6.z -add.f r17.y, r17.y, (neg)r20.y -add.f r20.y, c10.w, (neg)r20.z -mov.f32f32 r14.y, r14.y -mov.f32f32 r20.z, r20.w -mov.f32f32 r17.y, r17.y -mul.f r9.y, r20.y, r9.y -mul.f r20.y, c7.x, r14.y -mul.f r20.w, c7.x, r14.y -mul.f r14.y, c7.x, r14.y -min.f r18.w, r18.w, r20.z -mov.f32f32 r20.y, r20.y -mov.f32f32 r20.z, r13.y -mov.f32f32 r7.x, r7.x -mov.f32f32 r20.w, r20.w -mov.f32f32 r14.y, r14.y -mov.f32f32 r21.x, r15.x -mad.f32 r7.x, r7.x, r20.z, r3.x -mul.f r17.y, r17.y, r14.w -mov.f32f32 r9.y, r9.y -mov.f32f32 r20.z, r16.x -absneg.f r21.y, (abs)r7.x -mad.f32 r9.z, r21.x, r9.z, r3.x -mov.f32f32 r17.y, r17.y -add.f r20.z, c7.z, (neg)r20.z -add.f r18.z, r18.z, (neg)r21.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r21.x, r19.x -mul.f r0.y, r20.z, r0.y -mov.f32f32 r18.z, r18.z -mov.f32f32 r20.z, r9.z -log2 r17.y, r17.y -(ss)mul.f r17.y, c9.x, r17.y -mov.f32f32 r0.y, r0.y -mul.f r18.z, r18.z, r16.z -mov.f32f32 r21.y, r20.z -mov.f32f32 r17.y, r17.y -max.f r21.z, r9.y, r0.y -mov.f32f32 r18.z, r18.z -add.f r21.y, c10.w, (neg)r21.y -add.f r4.w, c7.z, (neg)r4.w -mov.f32f32 r21.z, r21.z -mov.f32f32 r15.z, r15.z -exp2 r17.y, r17.y +rcp r19.w, r5.z +mul.f r20.x, c7.x, r17.z +mul.f r17.z, c7.x, r17.z +max.f r5.x, r6.x, r5.x +mov.f32f32 r6.x, r15.y +mov.f32f32 r15.y, r18.x +exp2 r6.y, r6.y nop -(ss)rcp r17.y, r17.y -mul.f r5.w, r21.y, r5.w -log2 r18.z, r18.z -(ss)mul.f r18.z, c9.x, r18.z -mov.f32f32 r17.y, r17.y -mad.f32 r15.z, r15.z, r10.z, r0.x -mov.f32f32 r5.w, r5.w -mov.f32f32 r18.z, r18.z -mov.f32f32 r20.z, r20.z -mul.f r17.y, c9.w, r17.y -mov.f32f32 r15.z, r15.z -mov.f32f32 r4.w, r4.w -add.f r20.z, c7.z, (neg)r20.z -mov.f32f32 r17.y, r17.y -exp2 r18.z, r18.z +(ss)rcp r6.y, r6.y +(ss)mul.f r6.y, c9.w, r6.y +mad.f32 r18.x, r11.x, r5.w, r2.x +min.f r6.x, r15.y, r6.x +mul.f r5.y, r19.y, r5.y +add.f r15.y, c10.w, r18.w +add.f r6.y, c7.z, (neg)r6.y +max.f r5.x, r5.x, r6.x +absneg.f r6.x, (abs)r18.x +max.f r18.w, r5.y, r18.z +mul.f r15.y, r15.y, r19.w +add.f r5.x, r19.z, (neg)r5.x +mov.f32f32 r19.y, r6.y +mul.f r6.y, c7.x, r6.y +add.f r6.x, r16.x, (neg)r6.x +mad.f32 r5.x, c11.w, r5.x, c7.z +mul.f r16.x, c7.x, r19.y +mul.f r19.y, c7.x, r19.y +mad.f32 r5.z, r5.z, r8.x, r2.x +mul.f r6.x, r6.x, r10.y +mad.f32 r9.z, r12.w, r9.z, r0.x +max.f r15.y, r15.y, r18.y +rcp r5.x, r5.x +mad.f32 r9.x, r0.w, r15.w, r9.x +absneg.f r15.w, (abs)r5.z +mov.f32f32 r18.y, r9.z +min.f r12.x, r12.x, r15.y +add.f r9.x, r9.x, c11.z +add.f r15.y, r17.w, (neg)r15.w +log2 r6.x, r6.x +(ss)mul.f r6.x, c9.x, r6.x +mov.f32f32 r15.w, r18.y +mul.f r9.x, c11.y, r9.x +mul.f r15.y, r15.y, r19.x +mov.f32f32 r17.w, r12.x +add.f r19.z, c7.w, (neg)r15.w +mul.f r5.x, r9.x, r5.x +exp2 r6.x, r6.x nop -(ss)rcp r18.z, r18.z -(ss)mov.f32f32 r18.z, r18.z -mul.f r5.z, r20.z, r5.z -add.f r17.y, c7.z, (neg)r17.y -mov.f32f32 r20.z, r15.z -mul.f r18.z, c9.w, r18.z -mov.f32f32 r5.z, r5.z -mov.f32f32 r17.y, r17.y -add.f r20.z, c10.w, (neg)r20.z -mov.f32f32 r18.z, r18.z -max.f r21.y, r5.w, r5.z -mul.f r12.w, r12.w, r17.y -mul.f r19.z, r19.z, r17.y -add.f r18.z, c7.z, (neg)r18.z -mov.f32f32 r21.y, r21.y -mov.f32f32 r12.w, r12.w -mov.f32f32 r19.z, r19.z -mov.f32f32 r18.z, r18.z +(ss)rcp r6.x, r6.x +mad.f32 r9.x, r13.x, r17.w, r0.z +(ss)mul.f r6.x, c9.w, r6.x +log2 r13.x, r15.y +(ss)mul.f r13.x, c9.x, r13.x +mul.f r7.y, r19.z, r7.y +(ss)mov.f32f32 r15.y, r9.x +exp2 r5.x, r5.x +(ss)add.f r5.x, c7.z, r5.x +add.f r6.x, c7.z, (neg)r6.x +add.f r19.z, c10.w, (neg)r18.y +add.f r19.w, c10.w, (neg)r15.y +exp2 r13.x, r13.x +mad.f32 r20.y, r7.x, r8.x, r0.x +(ss)rcp r13.x, r13.x +(ss)mul.f r13.x, c9.w, r13.x +mov.f32f32 r20.z, r6.x +rcp r5.x, r5.x +absneg.f r20.w, (abs)r16.y +mul.f r8.z, r19.z, r8.z +mul.f r8.y, r19.w, r8.y +add.f r19.z, c7.z, (neg)r15.y +cmps.f.lt r19.w, c11.x, r20.w +add.f r13.x, c7.z, (neg)r13.x +mul.f r20.x, r20.x, r20.z +mul.f r17.z, r17.z, r20.z +cov.u32f32 r19.w, r19.w +mov.f32f32 r20.z, r13.x +mul.f r6.y, r6.y, r13.x +mad.f32 r13.x, r12.w, r5.w, r0.x +cmps.f.ne r19.w, r19.w, c8.y +mov.f32f32 r20.w, c8.y +mov.f32f32 r21.x, c10.x +mul.f r16.x, r16.x, r20.z +mul.f r19.y, r19.y, r20.z +add.f r20.z, r20.y, c7.z +sel.b32 r20.w, r20.w, r19.w, r21.x +absneg.f r15.x, (abs)r15.x +add.f r21.x, r13.x, c7.z +add.f r20.z, r20.z, c2.x +max.f r21.y, r8.z, r7.y +cmps.f.lt r15.x, c11.x, r15.x +add.f r21.x, r21.x, c2.x +mul.f r20.z, r20.z, r19.x min.f r18.w, r18.w, r21.y -mov.f32f32 r21.y, r9.w -mov.f32f32 r21.w, r8.y -mul.f r20.y, r20.y, r18.z -mul.f r20.w, r20.w, r18.z -mul.f r14.y, r14.y, r18.z -mov.f32f32 r18.z, r18.w -mov.f32f32 r18.w, r20.y -add.f r20.y, r19.x, c7.z -mov.f32f32 r20.w, r20.w -mov.f32f32 r14.y, r14.y -mov.f32f32 r19.w, r19.w -add.f r20.y, r20.y, c2.x -mov.f32f32 r22.x, r0.w -mad.f32 r21.y, r21.w, r21.y, r0.x -mul.f r17.y, r17.w, r17.y -mov.f32f32 r17.w, r20.y -mad.f32 r19.w, r22.x, r18.z, r19.w -add.f r20.y, r21.y, c7.z -mov.f32f32 r17.y, r17.y -mul.f r17.w, r17.w, r16.z -mov.f32f32 r19.w, r19.w -add.f r20.y, r20.y, c2.x -mul.f r7.w, r20.z, r7.w -mov.f32f32 r17.w, r17.w -add.f r19.w, r19.w, c11.z -mov.f32f32 r20.y, r20.y -mov.f32f32 r7.w, r7.w -mov.f32f32 r20.z, r15.z -mov.f32f32 r19.w, r19.w -mul.f r20.y, r20.y, r14.w -log2 r17.w, r17.w -(ss)mul.f r17.w, c9.x, r17.w -add.f r20.z, c7.w, (neg)r20.z -mul.f r19.w, c11.y, r19.w -mov.f32f32 r20.y, r20.y -mov.f32f32 r17.w, r17.w -mul.f r6.y, r20.z, r6.y -mov.f32f32 r19.w, r19.w -min.f r7.y, r7.y, r8.x -mov.f32f32 r8.x, r4.w -min.f r5.y, r5.y, r6.z -mov.f32f32 r6.y, r6.y -exp2 r6.z, r17.w +cov.u32f32 r15.x, r15.x +mul.f r21.x, r21.x, r10.y +mad.f32 r17.y, r11.y, r17.y, r2.x +mul.f r4.z, r19.z, r4.z +cmps.f.ne r15.x, r15.x, c8.y +absneg.f r19.z, (neg)r16.w +log2 r20.z, r20.z +(ss)mul.f r20.z, c9.x, r20.z +mov.f32f32 r21.y, r17.y +log2 r21.x, r21.x +(ss)mul.f r21.x, c9.x, r21.x +sel.b32 r19.z, r19.z, r15.x, r20.w +max.f r20.w, r8.y, r4.z +mad.f32 r12.x, r7.x, r12.x, r0.x +mov.f32f32 r21.z, r21.y +mul.f r19.z, r2.w, r19.z +mov.f32f32 r21.w, c8.y +mov.f32f32 r22.x, c10.y +exp2 r20.z, r20.z nop -(ss)rcp r6.z, r6.z -(ss)mov.f32f32 r6.z, r6.z -max.f r5.y, r7.y, r5.y -min.f r5.z, r5.w, r5.z -log2 r5.w, r20.y -(ss)mul.f r5.w, c9.x, r5.w -mul.f r6.z, c9.w, r6.z -max.f r7.y, r7.w, r6.y -max.f r5.y, r5.y, r5.z -mov.f32f32 r5.z, r5.w -mov.f32f32 r5.w, r6.z -mov.f32f32 r6.z, r7.y -mov.f32f32 r5.y, r5.y -sqrt r7.y, r8.x -(ss)mov.f32f32 r7.y, r7.y -add.f r5.w, c7.z, (neg)r5.w -min.f r6.z, r21.z, r6.z -(ss)mov.f32f32 r8.x, r16.y -add.f r5.y, r18.z, (neg)r5.y -mov.f32f32 r5.w, r5.w -exp2 r5.z, r5.z +(ss)rcp r20.z, r20.z +(ss)mul.f r20.z, c9.w, r20.z +exp2 r21.x, r21.x nop -(ss)rcp r5.z, r5.z -(ss)mov.f32f32 r5.z, r5.z -mov.f32f32 r5.y, r5.y -mul.f r16.y, r18.w, r5.w -mul.f r17.w, r20.w, r5.w -mul.f r5.w, r14.y, r5.w -mad.f32 r5.y, c11.w, r5.y, c7.z -mov.f32f32 r14.y, r16.y -mov.f32f32 r16.y, r3.w -mov.f32f32 r17.w, r17.w -mov.f32f32 r5.w, r5.w -mov.f32f32 r5.y, r5.y -mad.f32 r7.y, c8.z, r16.y, r7.y -mul.f r5.z, c9.w, r5.z -mad.f32 r8.x, r8.x, r10.z, r3.x -mov.f32f32 r10.z, r19.y -mov.f32f32 r7.y, r7.y -mov.f32f32 r5.z, r5.z -rcp r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -mov.f32f32 r8.x, r8.x -mul.f r16.y, r7.y, c10.x -add.f r5.z, c7.z, (neg)r5.z -mul.f r5.y, r19.w, r5.y -mov.f32f32 r18.z, r8.x -mov.f32f32 r16.y, r16.y -absneg.f r18.w, (neg)c0.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r5.z, r5.z -add.f r18.z, c10.w, (neg)r18.z -mov.f32f32 r18.w, r18.w -add.f r10.z, r10.z, (neg)c1.y -mul.f r17.y, r17.y, r5.z -add.f r19.w, r9.x, (neg)c1.x -mad.f32 r16.y, c8.z, r18.w, (neg)r16.y -cmps.f.lt r4.w, r4.w, c8.y -exp2 r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -mul.f r12.w, r12.w, r5.z -mul.f r5.z, r19.z, r5.z -cov.u32f32 r4.w, r4.w -add.f r5.y, c7.z, r5.y -mov.f32f32 r12.w, r12.w -mov.f32f32 r5.z, r5.z -cmps.f.ne r4.w, r4.w, c8.y -mov.f32f32 r18.w, c8.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r19.z, r21.y -mov.f32f32 r5.x, r5.x -sel.b32 r16.y, r18.w, r4.w, r16.y -mov.f32f32 r17.y, r17.y -mul.f r3.y, r18.z, r3.y -mul.f r5.x, r5.x, c8.x -mov.f32f32 r18.z, r16.y -rcp r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -mov.f32f32 r18.w, r9.z -mov.f32f32 r5.x, r5.x -mul.f r18.z, r21.x, r18.z -mul.f r20.y, r7.y, c10.y -mov.f32f32 r20.z, r18.w -add.f r5.x, c7.z, (neg)r5.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r20.y, r20.y -mov.f32f32 r3.w, r3.w -absneg.f r20.z, (abs)r20.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r20.w, r8.x -mad.f32 r3.w, c8.z, r3.w, (neg)r20.y -mov.f32f32 r20.y, c8.y -cmps.f.lt r20.z, c11.x, r20.z -mov.f32f32 r21.x, r5.x -add.f r20.w, c7.z, (neg)r20.w -sel.b32 r3.w, r20.y, r4.w, r3.w -cov.u32f32 r20.y, r20.z -mad.f32 r10.z, r10.z, r10.z, r18.x -mul.f r18.x, r19.w, r16.z -mov.f32f32 r19.w, r3.w -mov.f32f32 r20.y, r20.y -sqrt r20.z, r21.x -(ss)mov.f32f32 r20.z, r20.z -(ss)mov.f32f32 r21.x, r4.x -mul.f r1.z, r20.w, r1.z -cmps.f.ne r20.y, r20.y, c8.y -mov.f32f32 r20.w, c8.y -rcp r19.w, r19.w -(ss)mul.f r18.z, r18.z, r19.w -mad.f32 r20.z, c8.z, r21.x, r20.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r20.w, r20.w -mov.f32f32 r18.z, r18.z -mov.f32f32 r21.x, r9.x -mov.f32f32 r21.z, c10.x -mov.f32f32 r20.z, r20.z -max.f r21.w, r3.y, r1.z -add.f r18.z, r21.x, (neg)r18.z -sel.b32 r20.w, r20.w, r20.y, r21.z -mov.f32f32 r21.x, r18.y -mul.f r21.z, r20.z, c10.x -mov.f32f32 r18.z, r18.z -mov.f32f32 r21.w, r21.w -absneg.f r21.x, (abs)r21.x -mov.f32f32 r21.z, r21.z -mad.f32 r18.z, c10.z, r18.z, c7.x -absneg.f r22.x, (neg)c0.x -cmps.f.lt r21.x, c11.x, r21.x -min.f r6.z, r6.z, r21.w -mov.f32f32 r18.z, r18.z -mov.f32f32 r21.w, r22.x -cov.u32f32 r21.x, r21.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r18.z, r18.z -mad.f32 r21.z, c8.z, r21.w, (neg)r21.z -cmps.f.lt r5.x, r5.x, c8.y -mov.f32f32 r21.x, r21.x -mov.f32f32 r18.z, r18.z -mov.f32f32 r21.w, r15.z -mov.f32f32 r22.x, r1.x -cmps.f.ne r21.x, r21.x, c8.y -mov.f32f32 r22.y, r18.z -mov.f32f32 r18.z, r19.x -mul.f r7.y, r7.y, c10.x -mov.f32f32 r22.z, r18.y -cov.u32f32 r5.x, r5.x -mad.f32 r21.w, r22.x, r6.z, r21.w -mov.f32f32 r7.y, r7.y -absneg.f r22.x, (neg)c0.z -mov.f32f32 r22.z, r22.z -cmps.f.ne r5.x, r5.x, c8.y -mov.f32f32 r22.w, c8.y -mov.f32f32 r22.x, r22.x -absneg.f r22.z, (neg)r22.z -mov.f32f32 r21.w, r21.w -sel.b32 r21.z, r22.w, r5.x, r21.z -mad.f32 r7.y, c8.z, r22.x, (neg)r7.y +(ss)rcp r21.x, r21.x +sel.b32 r21.w, r21.w, r19.w, r22.x mov.f32f32 r22.x, c8.y -mov.f32f32 r22.z, r22.z -mov.f32f32 r22.w, r21.z -add.f r21.w, r21.w, c11.z -sel.b32 r4.w, r22.x, r4.w, r7.y -mov.f32f32 r7.y, r22.z -mul.f r19.z, r19.z, r22.w -mul.f r22.x, r20.z, c10.y -mov.f32f32 r22.z, r4.w -sel.b32 r7.y, r7.y, r21.x, r20.w -mov.f32f32 r20.w, r21.w -mov.f32f32 r21.w, r22.x -mul.f r18.z, r18.z, r22.z -mul.f r7.y, r1.w, r7.y +add.f r20.z, c7.z, (neg)r20.z +(ss)mul.f r21.x, c9.w, r21.x +add.f r21.z, c7.z, (neg)r21.z +sel.b32 r21.w, r22.x, r15.x, r21.w +mov.f32f32 r22.x, r20.z +mul.f r6.y, r6.y, r20.z +add.f r20.z, c7.z, (neg)r21.x +mad.f32 r0.w, r0.w, r21.w, r19.z +mov.f32f32 r19.z, r16.y +mul.f r16.x, r16.x, r22.x +mul.f r19.y, r19.y, r22.x +add.f r21.x, r4.y, (neg)c1.x +absneg.f r21.w, (neg)r19.z +mov.f32f32 r22.x, c10.x +mov.f32f32 r22.y, r20.z +mul.f r0.y, r0.y, c10.x +mov.f32f32 r22.z, r12.x +sel.b32 r21.w, r21.w, r19.w, r22.x mov.f32f32 r22.x, c8.y -mov.f32f32 r4.x, r4.x -mul.f r18.z, r18.z, r19.w -(ss)mul.f r19.w, c11.y, r20.w -mov.f32f32 r20.w, r22.x -mad.f32 r4.x, c8.z, r4.x, (neg)r21.w -mov.f32f32 r18.z, r18.z -mov.f32f32 r21.w, r7.x -mov.f32f32 r22.x, c10.y -mov.f32f32 r22.z, c8.y -mov.f32f32 r19.w, r19.w -add.f r18.z, r21.w, (neg)r18.z -sel.b32 r20.w, r20.w, r20.y, r22.x -mov.f32f32 r21.w, c8.y -sel.b32 r4.x, r22.z, r5.x, r4.x -mov.f32f32 r18.z, r18.z -min.f r0.y, r9.y, r0.y -min.f r6.y, r7.w, r6.y -mov.f32f32 r7.w, r21.w -mad.f32 r9.y, c10.z, r18.z, c7.x -mov.f32f32 r18.z, r4.x -max.f r0.y, r0.y, r6.y -min.f r1.z, r3.y, r1.z -mov.f32f32 r3.y, r9.y -mov.f32f32 r6.y, r7.w -mov.f32f32 r7.w, r10.z -mov.f32f32 r9.y, r18.x -mov.f32f32 r3.y, r3.y -sel.b32 r6.y, r6.y, r21.x, r20.w -rcp r10.z, r18.z -(ss)mul.f r18.x, r19.z, r10.z -max.f r0.y, r0.y, r1.z -mov.f32f32 r1.z, r3.y -mad.f32 r0.w, r0.w, r6.y, r7.y -mov.f32f32 r3.y, r18.x -mov.f32f32 r6.y, r8.w -mov.f32f32 r22.z, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r18.w -add.f r3.y, r6.y, (neg)r3.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r6.y, r9.z -mul.f r7.y, (neg)r16.y, r9.y -sam (f32)(x)r21.w, r22.y, s#1, t#1 -(sy)mul.f r9.y, r21.w, c9.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r3.y, r3.y -add.f r0.y, r6.z, (neg)r0.y -mov.f32f32 r6.z, r9.y -add.f r9.y, r19.x, (neg)c1.y -absneg.f r1.z, (neg)r1.z -mad.f32 r3.y, c10.z, r3.y, c7.x -mov.f32f32 r0.y, r0.y -mul.f r9.y, r9.y, r16.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r16.y, c10.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r9.y, r9.y -mad.f32 r0.y, c11.w, r0.y, c7.z -sel.b32 r1.z, r1.z, r20.y, r16.y -mov.f32f32 r3.y, r3.y -mad.f32 r3.w, (neg)r3.w, r9.y, r7.y -mov.f32f32 r7.y, c8.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.w, r3.w -add.f r9.y, r7.x, (neg)c1.z -mov.f32f32 r7.y, r7.y -mov.f32f32 r21.w, r3.y -mov.f32f32 r3.y, r21.y -mul.f r9.y, r9.y, r16.z -mov.f32f32 r7.y, r7.y -mul.f r16.y, r20.z, c10.x -rcp r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r9.y, r9.y -sel.b32 r1.z, r7.y, r21.x, r1.z -mov.f32f32 r7.y, r16.y -absneg.f r16.y, (neg)c0.z -mad.f32 r3.w, (neg)r4.w, r9.y, r3.w -mad.f32 r0.w, r2.z, r1.z, r0.w -mul.f r0.y, r19.w, r0.y -mov.f32f32 r1.z, r16.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r1.z, c8.z, r1.z, (neg)r7.y -max.f r3.w, c8.y, r3.w +mul.f r21.x, r21.x, r19.x +absneg.f r22.w, (neg)c0.y +mul.f r20.x, r20.x, r22.y +sel.b32 r21.w, r22.x, r15.x, r21.w +add.f r22.x, r9.y, (neg)c1.x +mul.f r17.z, r17.z, r22.y +absneg.f r22.y, (neg)c0.z +mad.f32 r0.w, r1.z, r21.w, r0.w +mul.f r21.w, r22.w, r22.w +mul.f r22.x, r22.x, r10.y +absneg.f r23.x, (neg)c0.y max.f r0.w, c8.y, r0.w -mov.f32f32 r4.w, c8.y -add.f r6.y, r6.y, (neg)c1.z -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.w, r0.w -sel.b32 r1.z, r4.w, r5.x, r1.z -exp2 r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mul.f r3.w, r3.w, c7.x -mov.f32f32 r4.w, r0.w -mov.f32f32 r5.x, r1.z -add.f r0.y, c7.z, r0.y -mov.f32f32 r3.w, r3.w -mul.f r4.w, r4.w, r5.y -mul.f r3.y, r3.y, r5.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.x, r3.w -mov.f32f32 r4.w, r4.w -mad.f32 r5.y, r6.y, r6.y, r7.w -mul.f r3.y, r3.y, r10.z -mul.f r5.x, r5.x, r6.z -mov.f32f32 r6.y, r15.w -rcp r0.y, r0.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r5.x, r5.x -mad.f32 r6.z, c7.x, r9.x, c7.x -mov.f32f32 r7.y, r20.x -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r7.w, r8.x -mov.f32f32 r6.z, r6.z -sqrt r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -mov.f32f32 r9.x, c2.x -add.f r3.y, r7.y, (neg)r3.y -mov.f32f32 r6.z, r6.z -absneg.f r7.y, (abs)r7.w -rcp r6.y, r6.y -add.f r7.w, c7.w, r17.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r16.y, r6.z -mad.f32 r6.z, c7.x, r7.x, c7.x -rcp r7.x, r9.x -(ss)mul.f r5.y, r5.y, r7.x -mad.f32 r3.y, c10.z, r3.y, c7.x -cmps.f.lt r7.x, c11.x, r7.y -mov.f32f32 r6.z, r6.z -mov.f32f32 r5.y, r5.y -mov.f32f32 r3.y, r3.y -cov.u32f32 r7.x, r7.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r7.y, r7.w -add.f r7.w, r16.x, (neg)c1.x -(ss)rcp r9.x, r14.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r16.z, r6.z -log2 r5.y, r5.y -(ss)mul.f r5.y, c9.y, r5.y -mov.f32f32 r6.z, r7.x -mov.f32f32 r3.y, r3.y -mul.f r6.y, r7.y, r6.y -mov.f32f32 r5.y, r5.y -mul.f r7.x, r7.w, r7.w -sam (f32)(x)r22.x, r16.y, s#2, t#2 -(sy)cmps.f.lt r7.y, r19.x, r22.x -mov.f32f32 r22.x, r3.y -cmps.f.ne r3.y, r6.z, c8.y -mov.f32f32 r6.z, c8.y -cov.u32f32 r7.y, r7.y -exp2 r5.y, r5.y +add.f r21.w, c7.z, (neg)r21.w +mad.f32 r0.y, c8.z, r22.y, (neg)r0.y +add.f r22.y, c10.w, (neg)r22.z +mov.f32f32 r23.y, r0.w +mul.f r21.w, r21.w, c8.x +mul.f r23.z, r23.x, r23.x +mov.f32f32 r23.w, c8.y +mul.f r5.x, r23.y, r5.x +add.f r23.y, r16.w, (neg)c1.x +add.f r21.w, c7.z, (neg)r21.w +add.f r23.z, c7.z, (neg)r23.z +sel.b32 r0.y, r23.w, r2.y, r0.y +mul.f r2.y, r23.y, r23.y +add.f r23.y, r14.z, (neg)c1.y +mov.f32f32 r23.w, r21.w +mul.f r23.z, r23.z, c8.x +absneg.f r0.y, (neg)r0.y +mad.f32 r2.y, r23.y, r23.y, r2.y +add.f r23.y, r16.y, (neg)c1.z +add.f r23.z, c7.z, (neg)r23.z +mul.f r6.w, r22.y, r6.w +add.f r22.y, c7.w, (neg)r22.z +mad.f32 r2.y, r23.y, r23.y, r2.y +sqrt r23.y, r23.w +(ss)mad.f32 r23.y, c8.z, r22.w, r23.y +(ss)mov.f32f32 r23.w, r23.z +rcp r24.x, r0.y +mul.f r4.w, r22.y, r4.w +(ss)mul.f r21.z, r21.z, r24.x +mov.f32f32 r22.y, r23.y +add.f r24.x, c10.w, (neg)r21.y +sqrt r2.y, r2.y +mov.f32f32 r24.y, c2.x +rcp r24.z, r0.y +max.f r24.w, r6.w, r4.w +sqrt r23.w, r23.w +(ss)mad.f32 r23.w, c8.z, r23.x, r23.w +mul.f r24.x, r24.x, r24.z +mul.f r24.z, r22.y, c10.x +absneg.f r25.x, (neg)c0.x +mov.f32f32 r25.y, r23.w +rcp r24.y, r24.y +(ss)mul.f r2.y, r2.y, r24.y +(ss)max.f r24.y, r24.x, r21.z +min.f r20.w, r20.w, r24.w +mad.f32 r14.w, r14.w, r17.w, r2.x +mul.f r17.w, r25.y, c10.x +absneg.f r24.w, (neg)c0.x +min.f r18.w, r18.w, r24.y +log2 r2.y, r2.y +(ss)mul.f r2.y, c9.y, r2.y +mad.f32 r24.y, c8.z, r25.x, (neg)r24.z +cmps.f.lt r21.w, r21.w, c8.y +mad.f32 r17.w, c8.z, r24.w, (neg)r17.w +cmps.f.lt r23.z, r23.z, c8.y +mov.f32f32 r24.z, r18.w +mov.f32f32 r24.w, r14.w +exp2 r2.y, r2.y nop -(ss)rcp r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -mov.f32f32 r6.z, r6.z -mov.f32f32 r7.y, r7.y -sam (f32)(x)r21.w, r21.w, s#1, t#1 -(sy)mul.f r7.w, r21.w, c9.y -mul.f r5.y, c9.w, r5.y -mov.f32f32 r9.y, c10.x -cmps.f.ne r7.y, r7.y, c8.y -mov.f32f32 r7.w, r7.w -add.f r10.z, r8.w, (neg)c1.x -mov.f32f32 r5.y, r5.y -sel.b32 r3.w, r5.x, r7.y, r3.w -sel.b32 r5.x, r6.z, r3.y, r9.y -absneg.f r6.z, (abs)r16.x -add.f r5.y, c7.z, (neg)r5.y -add.f r7.y, r14.y, r3.w -add.f r9.y, r17.w, r3.w -add.f r3.w, r5.w, r3.w +(ss)rcp r2.y, r2.y +(ss)mul.f r2.y, c9.w, r2.y +cov.u32f32 r21.w, r21.w +cov.u32f32 r23.z, r23.z +mov.f32f32 r18.z, r18.z +add.f r2.y, c7.z, (neg)r2.y +mul.f r25.x, r16.w, r16.w +cmps.f.ne r21.w, r21.w, c8.y +mad.f32 r25.x, r14.z, r14.z, r25.x +mov.f32f32 r25.z, c8.y +mad.f32 r25.x, r16.y, r16.y, r25.x +cmps.f.ne r23.z, r23.z, c8.y +mov.f32f32 r25.w, c8.y mov.f32f32 r5.y, r5.y -mov.f32f32 r5.w, r7.y -mov.f32f32 r7.y, r9.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r9.y, r18.y -mov.f32f32 r5.w, r5.w -mov.f32f32 r14.y, r18.y +add.f r26.x, c10.w, (neg)r24.w +absneg.f r26.y, (neg)c0.z +sel.b32 r17.w, r25.w, r23.z, r17.w +rsq r25.x, r25.x +(ss)mul.f r25.x, c7.x, r25.x +sel.b32 r24.y, r25.z, r21.w, r24.y +min.f r5.y, r5.y, r18.z +mad.f32 r3.y, c8.z, r26.y, (neg)r3.y +mul.f r2.y, r25.x, r2.y +mul.f r18.z, (neg)r24.y, r21.x +add.f r21.x, r20.y, (neg)c1.y +mul.f r22.x, (neg)r17.w, r22.x +mov.f32f32 r25.x, r2.y +add.f r25.z, r13.x, (neg)c1.y +mad.f32 r5.x, c7.x, r5.x, r25.x +mov.f32f32 r25.x, r14.z +mul.f r21.x, r21.x, r19.x +mul.f r22.y, r22.y, c10.y +mul.f r25.z, r25.z, r10.y +mul.f r1.z, r25.x, r1.z +mad.f32 r22.y, c8.z, r22.w, (neg)r22.y +mov.f32f32 r22.w, c8.y +mul.f r25.x, r25.y, c10.y +mul.f r1.z, r1.z, r6.z +mad.f32 r23.x, c8.z, r23.x, (neg)r25.x +mov.f32f32 r25.x, c8.y mov.f32f32 r7.y, r7.y -mov.f32f32 r3.w, r3.w -mul.f r10.z, r10.z, r14.w -mul.f r9.y, r9.y, r14.y -mov.f32f32 r14.y, r19.y -mov.f32f32 r14.z, r19.y -mov.f32f32 r10.z, r10.z -cmps.f.lt r6.z, c11.x, r6.z -mov.f32f32 r6.y, r6.y -mad.f32 r9.y, r14.y, r14.z, r9.y -mul.f r10.z, (neg)r21.z, r10.z -add.f r14.y, r21.y, (neg)c1.y -cov.u32f32 r6.z, r6.z -mov.f32f32 r9.y, r9.y -mov.f32f32 r14.z, r9.z -mov.f32f32 r16.y, r9.z -mul.f r14.y, r14.y, r14.w -mov.f32f32 r6.z, r6.z -max.f r6.y, r16.w, r6.y -mad.f32 r9.y, r14.z, r16.y, r9.y -mov.f32f32 r14.y, r14.y -cmps.f.ne r6.z, r6.z, c8.y -mov.f32f32 r14.z, r16.x -mov.f32f32 r6.y, r6.y -add.f r16.y, r15.z, (neg)c1.y -add.f r15.y, c7.z, r15.y -rsq r9.y, r9.y -(ss)mov.f32f32 r9.y, r9.y -mad.f32 r4.x, (neg)r4.x, r14.y, r10.z -absneg.f r10.z, (neg)r14.z -min.f r6.y, r10.w, r6.y -mul.f r9.y, c7.x, r9.y -mov.f32f32 r4.x, r4.x -add.f r10.w, r20.x, (neg)c1.z -mov.f32f32 r10.z, r10.z -mul.f r5.y, r9.y, r5.y -mov.f32f32 r9.y, r8.z -mad.f32 r7.x, r16.y, r16.y, r7.x -mov.f32f32 r14.y, r15.y -mov.f32f32 r5.y, r5.y -mul.f r10.w, r10.w, r14.w -mov.f32f32 r10.z, r10.z -mov.f32f32 r14.z, r9.y -mov.f32f32 r14.w, r5.y -mov.f32f32 r10.w, r10.w -mad.f32 r4.w, c7.x, r4.w, r14.w -sel.b32 r5.x, r10.z, r6.z, r5.x -mov.f32f32 r5.y, r5.y -mad.f32 r1.z, (neg)r1.z, r10.w, r4.x -mov.f32f32 r4.x, r4.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r4.w, r19.y -mov.f32f32 r1.z, r1.z -mul.f r5.x, r2.x, r5.x -mov.f32f32 r10.z, c8.y -mov.f32f32 r10.w, r4.w -mov.f32f32 r1.w, r1.w +add.f r1.z, r16.y, (neg)r1.z +sel.b32 r22.y, r22.w, r21.w, r22.y +sel.b32 r22.w, r25.x, r23.z, r23.x +mov.f32f32 r8.z, r8.z +mad.f32 r25.y, c10.z, r1.z, c7.x +mul.f r1.z, r14.z, r2.w +mad.f32 r2.w, (neg)r22.y, r21.x, r18.z +add.f r18.z, r5.z, (neg)c1.z +mad.f32 r21.x, (neg)r22.w, r25.z, r22.x +mul.f r1.z, r1.z, r6.z +add.f r6.z, r18.x, (neg)c1.z +min.f r7.y, r8.z, r7.y +mov.f32f32 r8.z, c8.y +add.f r1.z, r16.w, (neg)r1.z +mul.f r18.z, r18.z, r19.x +mul.f r19.x, r23.y, c10.x +absneg.f r22.x, (neg)c0.z +mad.f32 r25.x, c10.z, r1.z, c7.x +mul.f r1.z, r6.z, r10.y +mul.f r6.z, r23.w, c10.x +max.f r5.y, r5.y, r7.y +mad.f32 r7.y, c8.z, r22.x, (neg)r19.x +mov.f32f32 r10.y, c8.y +absneg.f r19.x, (neg)c0.z +sam (f32)(xy)r25.x, r25.x, s#1, t#1 +(sy)mul.f r0.w, r0.w, r25.x +mov.f32f32 r21.z, r21.z +sel.b32 r1.w, r8.z, r1.w, r3.y +mad.f32 r3.y, c8.z, r19.x, (neg)r6.z +mul.f r0.w, r0.w, c7.w +sel.b32 r6.z, r10.y, r21.w, r7.y +mov.f32f32 r7.y, c8.y +mov.f32f32 r8.z, r24.x +mad.f32 r0.w, r0.w, r25.y, r2.y +mad.f32 r21.w, c7.x, r16.w, c7.x +mad.f32 r22.x, c7.x, r16.y, c7.x +mad.f32 r2.y, (neg)r6.z, r18.z, r2.w +sel.b32 r2.w, r7.y, r23.z, r3.y +min.f r3.y, r8.z, r21.z +absneg.f r1.w, (neg)r1.w +max.f r2.y, c8.y, r2.y +mad.f32 r1.z, (neg)r2.w, r1.z, r21.x +sam (f32)(x)r23.x, r21.w, s#2, t#2 +(sy)cmps.f.lt r7.y, r14.z, r23.x +max.f r3.y, r5.y, r3.y +mul.f r2.y, r2.y, c7.x max.f r1.z, c8.y, r1.z -mov.f32f32 r10.z, r10.z -mov.f32f32 r14.w, c10.y -mul.f r1.w, r10.w, r1.w -mov.f32f32 r1.z, r1.z -rcp r10.w, r14.z -(ss)absneg.f r14.z, (neg)r3.x -sel.b32 r10.z, r10.z, r3.y, r14.w -mul.f r1.w, r1.w, r7.z +cov.u32f32 r5.y, r7.y +mul.f r7.y, r20.y, r24.y +rcp r8.z, r22.y +add.f r3.y, r24.z, (neg)r3.y mul.f r1.z, r1.z, c7.x -mov.f32f32 r14.w, c8.y -add.f r15.y, c10.w, r14.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r16.y, r18.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r14.w, r14.w -(ss)mul.f r10.w, r15.y, r10.w -mov.f32f32 r15.y, r16.y -mov.f32f32 r16.y, r1.z -mov.f32f32 r14.w, r14.w -mov.f32f32 r10.w, r10.w -add.f r1.w, r15.y, (neg)r1.w -mul.f r7.w, r16.y, r7.w -sel.b32 r10.z, r14.w, r6.z, r10.z -mov.f32f32 r14.w, r9.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r7.w, r7.w -mad.f32 r8.w, c7.x, r8.w, c7.x -mad.f32 r1.x, r1.x, r10.z, r5.x -mad.f32 r1.w, c10.z, r1.w, c7.x -mov.f32f32 r5.x, r7.x -rcp r7.x, r14.w -add.f r10.z, c7.z, r14.z -mov.f32f32 r8.w, r8.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r14.z, r8.x -mov.f32f32 r8.w, r8.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r10.z, r10.z -(ss)add.f r14.w, r8.x, (neg)c1.z -mul.f r9.x, r14.y, r9.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r16.y, r8.w -mad.f32 r8.w, c7.x, r20.x, c7.x -mov.f32f32 r14.y, r14.z -mov.f32f32 r16.w, r1.w -mov.f32f32 r1.w, r4.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.w, r8.w -absneg.f r8.w, (neg)r14.y -(ss)mul.f r7.x, r10.z, r7.x -mul.f r1.w, r1.w, r2.z -mov.f32f32 r2.z, r4.w -mov.f32f32 r4.w, r8.w -mov.f32f32 r8.w, c10.x -mul.f r1.w, r1.w, r7.z -mov.f32f32 r16.z, r2.z -mov.f32f32 r2.z, r7.x -mad.f32 r5.x, r14.w, r14.w, r5.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r7.x, r9.z -sel.b32 r4.w, r4.w, r3.y, r8.w -mov.f32f32 r7.z, c8.y -sam (f32)(x)r21.z, r16.y, s#2, t#2 -(sy)cmps.f.lt r8.w, r21.y, r21.z -mov.f32f32 r7.x, r7.x -max.f r2.z, r10.w, r2.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r9.x, r9.x -add.f r1.w, r7.x, (neg)r1.w -cov.u32f32 r7.x, r8.w -mov.f32f32 r7.z, r7.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r7.x, r7.x -mov.f32f32 r7.z, r7.z -min.f r2.z, r6.y, r2.z -mad.f32 r1.w, c10.z, r1.w, c7.x -cmps.f.ne r6.y, r7.x, c8.y -sel.b32 r4.w, r7.z, r6.z, r4.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.w, r1.w -sel.b32 r1.z, r7.w, r6.y, r1.z -mad.f32 r1.x, r6.x, r4.w, r1.x -mov.f32f32 r4.w, r13.x -mov.f32f32 r1.w, r1.w -add.f r6.y, r12.w, r1.z -add.f r5.z, r5.z, r1.z -add.f r1.z, r17.y, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r6.y, r6.y -mov.f32f32 r5.z, r5.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r17.x, r1.w -mov.f32f32 r6.y, r6.y -mov.f32f32 r5.z, r5.z +cmps.f.ne r5.y, r5.y, c8.y +(ss)mul.f r7.y, r7.y, r8.z +mul.f r10.y, r13.x, r17.w +rcp r16.y, r22.w +mad.f32 r3.y, c11.w, r3.y, c7.z +sel.b32 r0.w, r0.w, r5.y, r5.x +mov.f32f32 r5.x, r16.w +add.f r5.y, r4.y, (neg)r7.y +(ss)mul.f r7.y, r10.y, r16.y +rcp r10.y, r1.w +(ss)mul.f r10.y, r26.x, r10.y +mad.f32 r21.z, c7.x, r5.x, c7.x +(ss)mad.f32 r21.w, c7.x, r19.z, c7.x +mad.f32 r5.x, c10.z, r5.y, c7.x +mul.f r5.y, r20.y, r6.z +add.f r6.z, r9.y, (neg)r7.y +rcp r3.y, r3.y +add.f r7.y, c7.z, (neg)r24.w +rcp r17.w, r1.w +mad.f32 r15.w, r1.y, r18.w, r15.w +mul.f r5.y, r5.y, r8.z +sam (f32)(xyz)r21.z, r21.z, s#0, t#0 +mad.f32 r18.w, c7.x, r16.w, c7.x +mad.f32 r18.z, c7.x, r14.z, c7.z +mad.f32 r22.w, c10.z, r6.z, c7.x +add.f r5.y, r5.z, (neg)r5.y +mul.f r2.w, r13.x, r2.w +add.f r6.z, r15.w, c11.z +(ss)mul.f r7.y, r7.y, r17.w +mad.f32 r5.y, c10.z, r5.y, c7.x +sam (f32)(xyz)r23.x, r18.z, s#0, t#0 +(sy)sel.b32 r8.z, r23.z, r19.w, r22.x +(ss)mad.f32 r18.w, c7.x, r13.y, c7.x +mad.f32 r18.z, c7.x, r10.w, c7.z +sel.b32 r10.w, r23.y, r19.w, r21.w +sel.b32 r13.y, r23.x, r19.w, r21.z +mul.f r2.w, r2.w, r16.y +sam (f32)(x)r21.z, r5.x, s#1, t#1 +(sy)(ss)mul.f r5.x, r21.z, c9.y +mul.f r5.y, c11.y, r6.z +max.f r6.z, r10.y, r7.y +sam (f32)(xyz)r21.z, r18.z, s#0, t#0 +(sy)sel.b32 r8.z, r22.x, r15.x, r8.z +sel.b32 r10.w, r21.w, r15.x, r10.w +sel.b32 r13.y, r21.z, r15.x, r13.y +mul.f r5.x, r2.y, r5.x +mul.f r8.z, r8.z, r0.w +cmps.f.lt r14.z, r14.z, c12.x +mul.f r10.w, r10.w, r0.w +mul.f r0.w, r13.y, r0.w +mov.f32f32 r2.y, r2.y +cov.u32f32 r13.y, r14.z +(ss)mad.f32 r18.z, c7.x, r4.y, c7.x +mad.f32 r18.w, c7.x, r5.z, c7.x +add.f r2.w, r18.x, (neg)r2.w +cmps.f.ne r4.y, r13.y, c8.y +mul.f r5.z, c0.x, r11.w +mul.f r3.y, r5.y, r3.y +mad.f32 r5.y, c0.y, r10.x, r5.z +mad.f32 r23.x, c10.z, r2.w, c7.x +mad.f32 r2.w, c0.z, r14.y, r5.y +sam (f32)(x)r21.z, r18.z, s#2, t#2 +(sy)cmps.f.lt r5.y, r20.y, r21.z +min.f r5.z, r20.w, r6.z +mul.f r6.z, c7.x, r15.z +max.f r2.w, c8.y, r2.w +cov.u32f32 r5.y, r5.y +sam (f32)(x)r21.z, r22.w, s#1, t#1 +(sy)mul.f r13.y, r21.z, c9.y +exp2 r3.y, r3.y +mov.f32f32 r14.z, r5.z +mov.f32f32 r8.y, r8.y +cmps.f.ne r5.y, r5.y, c8.y +mul.f r13.y, r1.z, r13.y +log2 r2.w, r2.w +(ss)mul.f r2.w, c12.y, r2.w mov.f32f32 r1.z, r1.z -mad.f32 r1.w, r4.w, r2.z, r0.z -mov.f32f32 r1.x, r1.x -sqrt r4.w, r5.x -(ss)mov.f32f32 r4.w, r4.w +mad.f32 r15.z, c7.x, r9.y, c7.x +add.f r3.y, c7.z, r3.y +mov.f32f32 r4.z, r4.z +mad.f32 r15.w, c7.x, r18.x, c7.x +sel.b32 r2.y, r5.x, r5.y, r2.y +exp2 r2.w, r2.w +mov.f32f32 r18.w, r10.x +mov.f32f32 r19.x, r14.y +mov.f32f32 r18.z, r11.w +add.f r5.x, r16.x, r2.y +add.f r5.y, r19.y, r2.y +add.f r2.y, r6.y, r2.y +sam (f32)(x)r15.z, r15.z, s#2, t#2 +rcp r3.y, r3.y +(sy)cmps.f.lt r6.y, r13.x, r15.z +absneg.f r9.y, (abs)r21.y +min.f r4.z, r8.y, r4.z (ss)nop -sam (f32)(xy)r16.y, r16.w, s#1, t#1 -(sy)mul.f r0.w, r0.w, r16.y -mov.f32f32 r1.w, r1.w -max.f r1.x, c8.y, r1.x -mov.f32f32 r5.x, c2.x -mul.f r0.w, r0.w, c7.w -mov.f32f32 r7.x, r1.w -mov.f32f32 r1.x, r1.x -max.f r7.z, r13.w, r9.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r7.w, r7.x -mov.f32f32 r8.w, r1.x -rcp r5.x, r5.x -(ss)mul.f r4.w, r4.w, r5.x -mad.f32 r0.w, r0.w, r16.z, r5.y -(ss)add.f r5.x, c10.w, (neg)r7.w -rcp r5.y, r12.z -mul.f r0.y, r8.w, r0.y +sam.3d (f32)(xyz)r15.z, r18.z, s#3, t#3 +(sy)(ss)mad.f32 r8.y, c13.z, r2.w, r16.x +mad.f32 r10.x, c13.y, r2.w, r15.w +mad.f32 r2.w, c13.x, r2.w, r15.z +cov.u32f32 r6.y, r6.y +sel.b32 r8.y, r8.z, r4.y, r8.y +mov.f32f32 r6.w, r6.w mov.f32f32 r4.w, r4.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r7.w, r18.y -(ss)mul.f r5.x, r5.x, r5.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r5.y, r7.z -mad.f32 r7.z, c7.x, r7.w, c7.x -mov.f32f32 r5.x, r5.x -mov.f32f32 r7.x, r7.x +sel.b32 r8.z, r10.w, r4.y, r10.x +sel.b32 r0.w, r0.w, r4.y, r2.w +cmps.f.ne r2.w, r6.y, c8.y +min.f r4.y, r6.w, r4.w +cmps.f.lt r4.w, c11.x, r9.y +mul.f r6.x, r6.z, r6.x +add.f r6.y, r15.y, (neg)c1.x +max.f r4.y, r4.z, r4.y +mov.f32f32 r4.z, r10.y +mov.f32f32 r6.z, r7.y +sel.b32 r1.z, r13.y, r2.w, r1.z +cov.u32f32 r2.w, r4.w +mul.f r4.w, r6.x, r20.z +min.f r4.z, r4.z, r6.z +add.f r6.x, r20.x, r1.z +add.f r6.z, r17.z, r1.z +cmps.f.ne r2.w, r2.w, c8.y +max.f r4.y, r4.y, r4.z +mov.f32f32 r4.z, c8.y +mov.f32f32 r6.w, c10.x +add.f r1.z, r4.w, r1.z +add.f r4.y, r14.z, (neg)r4.y +mul.f r4.w, r6.y, r6.y +add.f r6.y, r22.z, (neg)c1.y +sel.b32 r4.z, r4.z, r2.w, r6.w +mad.f32 r4.y, c11.w, r4.y, c7.z +absneg.f r6.w, (abs)r16.z +mad.f32 r4.w, r6.y, r6.y, r4.w +add.f r6.y, r24.w, (neg)c1.z +absneg.f r7.y, (abs)r24.w +mul.f r9.y, r14.x, r13.z +rcp r10.x, r10.z +cmps.f.lt r6.w, c11.x, r6.w +rcp r4.y, r4.y +mad.f32 r5.z, r1.x, r5.z, r22.z +mad.f32 r4.w, r6.y, r6.y, r4.w +cmps.f.lt r6.y, c11.x, r7.y +add.f r7.y, c10.w, r13.w +add.f r5.z, r5.z, c11.z +cov.u32f32 r6.w, r6.w +cov.u32f32 r6.y, r6.y +(ss)mul.f r7.y, r7.y, r10.x +mul.f r5.z, c11.y, r5.z +cmps.f.ne r6.w, r6.w, c8.y +absneg.f r10.x, (neg)r17.x +sqrt r4.w, r4.w +mov.f32f32 r10.y, c2.x +(ss)mul.f r4.y, r5.z, r4.y +cmps.f.ne r5.z, r6.y, c8.y +max.f r6.y, r7.y, r9.y +mov.f32f32 r7.y, r12.w +sel.b32 r4.z, r10.x, r6.w, r4.z +mov.f32f32 r9.y, c8.y +mov.f32f32 r10.x, c10.x +exp2 r4.y, r4.y +(ss)add.f r4.y, c7.z, r4.y +mul.f r4.z, r3.w, r4.z +mov.f32f32 r10.z, c8.y +mov.f32f32 r10.w, c10.y +rcp r10.y, r10.y +(ss)mul.f r4.w, r4.w, r10.y +sel.b32 r9.y, r9.y, r5.z, r10.x +absneg.f r9.x, (abs)r9.x +rcp r4.y, r4.y +sel.b32 r10.x, r10.z, r2.w, r10.w +(ss)mov.f32f32 r10.y, c8.y +rcp r10.z, r7.y +absneg.f r10.w, (neg)r0.x +cmps.f.lt r9.x, c11.x, r9.x log2 r4.w, r4.w (ss)mul.f r4.w, c9.y, r4.w -mov.f32f32 r7.z, r7.z -mov.f32f32 r7.w, r8.y -add.f r7.x, c7.z, (neg)r7.x -mov.f32f32 r4.w, r4.w -mov.f32f32 r7.z, r7.z -rcp r8.w, r12.x -mov.f32f32 r9.x, r7.w -(ss)mul.f r7.x, r7.x, r8.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r10.z, r7.z -mov.f32f32 r7.z, r9.z -mov.f32f32 r7.x, r7.x +sel.b32 r10.x, r10.y, r6.w, r10.x +add.f r10.y, c10.w, r10.w +cov.u32f32 r9.x, r9.x +cmps.f.lt r8.x, r8.x, c9.z +mad.f32 r4.z, r1.y, r10.x, r4.z +mul.f r10.x, r10.y, r10.z +cmps.f.ne r9.x, r9.x, c8.y +absneg.f r10.y, (neg)r15.y +mov.f32f32 r10.z, r21.y exp2 r4.w, r4.w nop (ss)rcp r4.w, r4.w -(ss)mov.f32f32 r4.w, r4.w -mad.f32 r7.z, c7.x, r7.z, c7.x -max.f r8.w, r5.x, r7.x -rcp r9.x, r9.x -absneg.f r12.x, (neg)r0.x -mul.f r4.w, c9.w, r4.w -mov.f32f32 r7.z, r7.z -mov.f32f32 r8.w, r8.w -mov.f32f32 r10.w, r15.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r7.z, r7.z -add.f r12.z, c10.w, r12.x -mad.f32 r12.w, r10.w, r2.z, r0.x +(ss)mul.f r4.w, c9.w, r4.w +sel.b32 r9.y, r10.y, r9.x, r9.y +absneg.f r10.y, (neg)r10.z +mov.f32f32 r11.w, c10.x add.f r4.w, c7.z, (neg)r4.w -mov.f32f32 r10.w, r7.z -(ss)mul.f r7.z, r12.z, r9.x -(ss)mov.f32f32 r9.x, r12.w -mov.f32f32 r4.w, r4.w -mul.f r12.z, r16.x, r16.x -mov.f32f32 r7.z, r7.z -mov.f32f32 r12.w, r7.w -sam (f32)(x)r16.y, r10.z, s#2, t#2 -(ss)mov.f32f32 r10.z, r19.y -mov.f32f32 r10.w, r9.x -mad.f32 r12.z, r15.z, r15.z, r12.z -mov.f32f32 r13.x, r15.z -(sy)cmps.f.lt r10.z, r10.z, r16.y -mov.f32f32 r13.w, r10.w -mov.f32f32 r12.z, r12.z -rcp r12.w, r12.w -add.f r12.x, c7.w, r12.x -cov.u32f32 r10.z, r10.z -add.f r13.w, c10.w, (neg)r13.w -mad.f32 r12.z, r8.x, r8.x, r12.z -mov.f32f32 r12.x, r12.x -mov.f32f32 r10.z, r10.z -mul.f r12.y, r13.w, r12.y -mov.f32f32 r2.x, r2.x -(ss)mul.f r12.x, r12.x, r12.w -cmps.f.ne r10.z, r10.z, c8.y -mov.f32f32 r12.y, r12.y -(ss)mov.f32f32 r12.w, r10.w -rsq r12.z, r12.z -(ss)mov.f32f32 r12.z, r12.z -sel.b32 r0.w, r0.w, r10.z, r4.x -mov.f32f32 r4.x, r19.y -add.f r10.z, c7.w, (neg)r12.w -mul.f r12.z, c7.x, r12.z -mov.f32f32 r12.x, r12.x -mov.f32f32 r4.x, r4.x -mul.f r10.z, r10.z, r11.y -mul.f r4.w, r12.z, r4.w -max.f r7.z, r7.z, r12.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r10.z, r10.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r7.z, r7.z -mad.f32 r4.x, c7.x, r4.x, c7.z -max.f r11.y, r12.y, r10.z -mov.f32f32 r12.x, r4.w -mov.f32f32 r4.w, r4.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r11.y, r11.y -mad.f32 r0.y, c7.x, r0.y, r12.x -mul.f r2.x, r13.x, r2.x -mov.f32f32 r4.x, r4.x -min.f r8.w, r8.w, r11.y -mov.f32f32 r11.y, r9.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r12.z, r4.x -mov.f32f32 r4.x, r18.y -mad.f32 r2.z, r11.y, r2.z, r3.x -mul.f r2.x, r2.x, r2.w -min.f r5.y, r5.y, r7.z -mov.f32f32 r7.z, r4.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r11.y, r16.x -mov.f32f32 r7.z, r7.z -mov.f32f32 r12.x, r2.z -mov.f32f32 r8.z, r8.z -add.f r2.x, r11.y, (neg)r2.x -mad.f32 r7.z, c7.x, r7.z, c7.x -mov.f32f32 r11.y, r12.x -mov.f32f32 r12.w, r8.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r7.z, r7.z -add.f r11.y, c10.w, (neg)r11.y -mov.f32f32 r13.x, r1.w -cmps.f.lt r13.y, r13.y, c9.z -mov.f32f32 r7.z, r7.z -mul.f r11.x, r11.y, r11.x -mad.f32 r2.x, c10.z, r2.x, c7.x -rcp r11.y, r12.w -absneg.f r13.w, (neg)r3.x -(ss)mov.f32f32 r12.w, r7.z -mov.f32f32 r7.z, r11.x -mov.f32f32 r11.x, r12.x -mov.f32f32 r2.x, r2.x -add.f r12.x, c10.w, r13.w -add.f r13.x, r13.x, (neg)c1.x -add.f r11.x, c7.z, (neg)r11.x -sam (f32)(xyz)r16.y, r12.z, s#0, t#0 -(sy)(ss)mov.f32f32 r12.z, r16.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r12.w, r16.z -mov.f32f32 r14.y, r16.y -mul.f r10.x, r11.x, r10.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r2.x, r2.x -(ss)mul.f r11.x, r12.x, r11.y -mov.f32f32 r10.x, r10.x -mad.f32 r4.x, c7.x, r4.x, c7.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r11.x, r11.x -mov.f32f32 r11.y, r8.z -mov.f32f32 r4.x, r4.x -max.f r12.x, r7.z, r10.x -mov.f32f32 r14.z, r2.x -mov.f32f32 r2.x, r15.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r12.x, r12.x -mov.f32f32 r6.x, r6.x -rcp r11.y, r11.y -add.f r13.w, c7.z, r13.w -mov.f32f32 r16.y, r4.x -mov.f32f32 r4.x, r18.w -min.f r8.w, r8.w, r12.x -mul.f r2.x, r2.x, r6.x -mov.f32f32 r6.x, r13.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r8.w, r8.w -mov.f32f32 r10.w, r10.w -mov.f32f32 r12.x, r1.y -mad.f32 r4.x, c7.x, r4.x, c7.x -mul.f r2.x, r2.x, r2.w -(ss)mul.f r2.w, r6.x, r11.y -mad.f32 r6.x, r12.x, r8.w, r10.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r10.w, r8.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r2.w, r2.w -add.f r2.x, r10.w, (neg)r2.x -add.f r6.x, r6.x, c11.z -mov.f32f32 r16.z, r4.x -max.f r2.w, r11.x, r2.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.x, r6.x -mul.f r6.x, r13.x, r13.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r10.w, r9.x -sam (f32)(xyz)r16.y, r16.y, s#0, t#0 -(sy)mov.f32f32 r11.x, r16.w -(ss)mov.f32f32 r11.y, r16.z -mov.f32f32 r12.x, r16.y -mul.f r4.x, c11.y, r4.x -sel.b32 r11.x, r12.z, r20.y, r11.x -mov.f32f32 r12.z, r19.y -sel.b32 r11.y, r12.w, r20.y, r11.y -sel.b32 r12.x, r14.y, r20.y, r12.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r12.z, r12.z -min.f r5.x, r5.x, r7.x -min.f r7.x, r12.y, r10.z -mad.f32 r2.x, c10.z, r2.x, c7.x -mad.f32 r10.z, c7.x, r12.z, c7.z -min.f r2.w, r5.y, r2.w -max.f r5.x, r5.x, r7.x -min.f r5.y, r7.z, r10.x -mov.f32f32 r7.x, r10.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r7.z, r13.z -mov.f32f32 r7.x, r7.x -max.f r5.x, r5.x, r5.y -mov.f32f32 r2.x, r2.x -mad.f32 r0.z, r7.z, r2.w, r0.z -mov.f32f32 r12.y, r7.x -mov.f32f32 r5.y, r9.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r5.y, r5.y -add.f r5.x, r8.w, (neg)r5.x -mov.f32f32 r14.w, r2.x -mov.f32f32 r2.x, r0.z -mad.f32 r5.y, c7.x, r5.y, c7.x -mov.f32f32 r5.x, r5.x -add.f r7.x, r10.w, (neg)c1.y -add.f r2.x, c10.w, (neg)r2.x -mov.f32f32 r5.y, r5.y -mad.f32 r5.x, c11.w, r5.x, c7.z -sam (f32)(xy)r16.y, r14.z, s#1, t#1 -(sy)mul.f r1.x, r1.x, r16.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.y, r5.y -mov.f32f32 r5.x, r5.x +mul.f r9.y, r4.x, r9.y +mov.f32f32 r13.x, c8.y +mov.f32f32 r13.y, c10.y +sel.b32 r10.y, r10.y, r2.w, r11.w +mov.f32f32 r11.w, c8.y +mul.f r13.z, r15.y, r15.y +sel.b32 r13.x, r13.x, r5.z, r13.y +mov.f32f32 r13.y, c8.y +sel.b32 r10.y, r11.w, r6.w, r10.y +mad.f32 r11.w, r22.z, r22.z, r13.z +rcp r7.y, r7.y +add.f r10.w, c7.w, r10.w +sel.b32 r13.x, r13.y, r9.x, r13.x +mad.f32 r4.z, r0.y, r10.y, r4.z +mad.f32 r10.y, r24.w, r24.w, r11.w +(ss)mul.f r7.y, r10.w, r7.y +mad.f32 r1.x, r1.x, r13.x, r9.y +absneg.f r9.y, (neg)r24.w +mov.f32f32 r10.w, c10.x +max.f r4.z, c8.y, r4.z +max.f r7.y, r10.x, r7.y +cov.u32f32 r8.x, r8.x +sel.b32 r9.y, r9.y, r5.z, r10.w +mov.f32f32 r10.x, c8.y +mov.f32f32 r10.w, r4.z +rsq r10.y, r10.y +(ss)mul.f r10.y, c7.x, r10.y +min.f r6.y, r6.y, r7.y +sel.b32 r7.y, r10.x, r9.x, r9.y +mul.f r3.y, r10.w, r3.y +add.f r9.y, r17.x, (neg)c1.x +mul.f r4.w, r10.y, r4.w +mad.f32 r1.x, r1.w, r7.y, r1.x +mov.f32f32 r7.y, r11.x +mul.f r9.y, r9.y, r9.y +add.f r10.x, r18.y, (neg)c1.y +max.f r1.x, c8.y, r1.x +mov.f32f32 r10.y, r4.w +mul.f r4.x, r22.z, r4.x +mad.f32 r9.y, r10.x, r10.x, r9.y +mov.f32f32 r10.x, r1.x +add.f r10.w, r21.y, (neg)c1.z +mul.f r4.x, r4.x, r7.z +rcp r11.w, r7.y +absneg.f r13.x, (neg)r2.x +mul.f r4.y, r10.x, r4.y +mad.f32 r9.y, r10.w, r10.w, r9.y +add.f r4.x, r15.y, (neg)r4.x +add.f r10.x, c7.z, r13.x +mad.f32 r4.y, c7.x, r4.y, r10.y +cmps.f.ne r8.x, r8.x, c8.y +mad.f32 r13.y, c10.z, r4.x, c7.x +mul.f r1.w, r22.z, r1.w +sqrt r4.x, r9.y +(ss)mov.f32f32 r9.y, c2.x +(ss)mul.f r10.x, r10.x, r11.w +rcp r10.y, r11.x +add.f r10.w, c10.w, r13.x +mul.f r1.w, r1.w, r7.z +mad.f32 r14.x, c7.x, r15.y, c7.x +mad.f32 r13.w, c7.x, r22.z, c7.z +(ss)mul.f r7.z, r10.w, r10.y +add.f r1.w, r24.w, (neg)r1.w +rcp r9.y, r9.y +(ss)mul.f r4.x, r4.x, r9.y +(ss)mov.f32f32 r9.y, r18.y +max.f r7.z, r7.z, r10.x +mad.f32 r13.z, c10.z, r1.w, c7.x +sam (f32)(xyz)r13.w, r13.w, s#0, t#0 +mad.f32 r10.x, c7.x, r15.y, c7.x +mad.f32 r10.y, c7.x, r24.w, c7.x +min.f r1.w, r6.y, r7.z +log2 r4.x, r4.x +(ss)mul.f r4.x, c9.y, r4.x +mul.f r0.y, r9.y, r0.y +rcp r1.y, r1.y +mad.f32 r10.w, c7.x, r15.y, c7.x +sam (f32)(xy)r15.x, r13.y, s#1, t#1 +(sy)mul.f r1.x, r1.x, r15.x +mov.f32f32 r6.y, r1.w +sam (f32)(xyz)r15.z, r10.x, s#0, t#0 +(sy)sel.b32 r7.z, r14.x, r5.z, r15.w +sel.b32 r9.y, r14.y, r5.z, r16.x mul.f r1.x, r1.x, c7.w -mov.f32f32 r7.z, r4.z -mov.f32f32 r12.z, r5.y -mad.f32 r5.y, r7.x, r7.x, r6.x -add.f r6.x, r0.z, (neg)c1.x -cov.u32f32 r7.x, r13.y -rcp r5.x, r5.x -(ss)mov.f32f32 r5.x, r5.x -mov.f32f32 r1.x, r1.x -sqrt r7.z, r7.z -(ss)mov.f32f32 r7.z, r7.z -sam (f32)(xyz)r12.y, r12.y, s#0, t#0 -(sy)mov.f32f32 r8.w, r12.w -mov.f32f32 r9.z, r12.z -mov.f32f32 r10.x, r12.y -mul.f r4.x, r4.x, r5.x -mov.f32f32 r5.x, r8.w -mov.f32f32 r8.w, r9.z -mov.f32f32 r9.z, r10.x -mov.f32f32 r4.x, r4.x -sel.b32 r5.x, r5.x, r21.x, r11.x -sel.b32 r8.w, r8.w, r21.x, r11.y -sel.b32 r9.z, r9.z, r21.x, r12.x -mad.f32 r1.x, r1.x, r16.z, r4.w -mul.f r4.w, r5.x, r0.w -cmps.f.lt r5.x, r19.y, c12.x -mul.f r8.w, r8.w, r0.w -mul.f r0.w, r9.z, r0.w exp2 r4.x, r4.x -(ss)mov.f32f32 r4.x, r4.x +nop +(ss)rcp r4.x, r4.x +(ss)mul.f r4.x, c9.w, r4.x +mad.f32 r0.z, r12.y, r6.y, r0.z +mad.f32 r1.x, r1.x, r15.y, r4.w +mad.f32 r11.x, c7.x, r24.w, c7.x +add.f r4.x, c7.z, (neg)r4.x +mul.f r4.w, r17.x, r17.x +mov.f32f32 r10.x, r0.z +mad.f32 r4.w, r18.y, r18.y, r4.w +mad.f32 r13.y, c7.x, r14.w, c7.x +mad.f32 r13.x, c7.x, r12.x, c7.z +sam (f32)(x)r14.x, r10.w, s#2, t#2 +(sy)cmps.f.lt r10.y, r22.z, r14.x +mad.f32 r4.w, r21.y, r21.y, r4.w +(ss)add.f r10.w, c10.w, (neg)r10.x +mul.f r11.x, r9.w, c10.x +cov.u32f32 r10.y, r10.y +absneg.f r11.w, (neg)c0.x +sam (f32)(xyz)r14.x, r13.x, s#0, t#0 +(sy)sel.b32 r9.y, r14.z, r9.x, r9.y +sel.b32 r7.z, r14.y, r9.x, r7.z +cmps.f.ne r10.y, r10.y, c8.y +rsq r4.w, r4.w +(ss)mul.f r4.w, c7.x, r4.w +mad.f32 r11.x, c8.z, r11.w, (neg)r11.x +cmps.f.lt r2.z, r2.z, c8.y +sel.b32 r1.x, r1.x, r10.y, r4.y +sel.b32 r4.y, r13.w, r5.z, r15.z +mul.f r4.x, r4.w, r4.x +cov.u32f32 r2.z, r2.z +mul.f r4.w, r9.y, r1.x +cmps.f.lt r5.z, r7.x, c8.y +mul.f r7.z, r7.z, r1.x +sel.b32 r4.y, r14.x, r9.x, r4.y +mov.f32f32 r9.x, r4.x +cov.u32f32 r5.z, r5.z +mad.f32 r3.y, c7.x, r3.y, r9.x +mul.f r0.y, r0.y, r1.y +mul.f r1.x, r4.y, r1.x +cmps.f.ne r4.y, r5.z, c8.y +cmps.f.ne r2.z, r2.z, c8.y +mov.f32f32 r5.z, c8.y +add.f r0.y, r21.y, (neg)r0.y +sel.b32 r4.w, r4.w, r4.y, r8.y +sel.b32 r7.z, r7.z, r4.y, r8.z +sel.b32 r0.w, r1.x, r4.y, r0.w +mad.f32 r8.z, c10.z, r0.y, c7.x +sel.b32 r0.y, r5.x, r8.x, r4.w +sel.b32 r1.x, r5.y, r8.x, r7.z +sel.b32 r0.w, r2.y, r8.x, r0.w +mul.f r2.y, r18.y, r3.w +mul.f r3.w, r0.y, c14.z +cmps.f.lt r4.y, r7.x, c8.y +mul.f r4.w, r1.x, c14.y +mul.f r5.x, r0.w, c14.x +mul.f r1.y, r2.y, r1.y +cov.u32f32 r2.y, r4.y +sel.b32 r4.y, r5.z, r2.z, r11.x +add.f r5.y, r10.x, (neg)c1.x +add.f r1.y, r17.x, (neg)r1.y +cmps.f.ne r2.y, r2.y, c8.y +absneg.f r4.y, (neg)r4.y +mul.f r5.y, r5.y, r5.y +mad.f32 r8.y, c10.z, r1.y, c7.x +sel.b32 r0.y, r3.w, r2.y, r0.y +log2 r1.y, r3.z +(ss)mul.f r1.y, c9.x, r1.y +sel.b32 r1.x, r4.w, r2.y, r1.x +sel.b32 r0.w, r5.x, r2.y, r0.w +rcp r2.y, r4.y +(ss)mul.f r2.y, r10.w, r2.y +(ss)add.f r3.z, c7.z, (neg)r10.x +sam (f32)(xy)r13.x, r8.y, s#1, t#1 +(sy)mul.f r3.w, r4.z, r13.x +rcp r4.z, r4.y +mad.f32 r0.x, r12.w, r1.w, r0.x +exp2 r1.y, r1.y +(ss)add.f r1.w, c15.y, (neg)r1.y +mul.f r3.z, r3.z, r4.z +mul.f r3.w, r3.w, c7.w +mov.f32f32 r4.z, r0.x +mul.f r1.w, r1.w, c8.w +(ss)mul.f r1.y, r1.y, c7.z +mad.f32 r3.w, r3.w, r13.y, r4.x +mad.f32 r4.w, c7.x, r17.x, c7.x +mad.f32 r5.x, c7.x, r21.y, c7.x +add.f r1.y, r1.y, r1.w +max.f r1.w, r2.y, r3.z +add.f r4.x, c10.w, (neg)r4.z +mul.f r5.z, r9.w, c10.y +add.f r7.x, c15.y, (neg)r1.y +add.f r7.z, c15.y, (neg)r1.y +add.f r8.x, c15.y, (neg)r1.y +sam (f32)(x)r13.x, r4.w, s#2, t#2 +(sy)(ss)cmps.f.lt r4.w, r18.y, r13.x +mul.f r0.y, r7.x, r0.y +mul.f r1.x, r7.z, r1.x +mul.f r0.w, r8.x, r0.w +cov.u32f32 r4.w, r4.w +mad.f32 r3.x, c8.z, r3.x, (neg)r5.z +mov.f32f32 r5.x, c8.y +add.f r5.z, r4.z, (neg)c1.y +cmps.f.ne r4.w, r4.w, c8.y +mad.f32 r2.x, r7.y, r6.y, r2.x +sel.b32 r3.x, r5.x, r2.z, r3.x +mad.f32 r5.x, r5.z, r5.z, r5.y +sel.b32 r3.y, r3.w, r4.w, r3.y +mov.f32f32 r3.w, r17.x +absneg.f r3.x, (neg)r3.x +mov.f32f32 r4.w, r2.x +cmps.f.lt r5.y, r5.w, c9.z +mad.f32 r5.z, c7.x, r3.w, c7.x +mad.f32 r5.w, c7.x, r10.z, c7.x +add.f r3.w, r4.w, (neg)c1.z +absneg.f r6.y, (abs)r4.w +rcp r7.x, r3.x +(ss)mul.f r4.x, r4.x, r7.x +add.f r7.x, c7.w, (neg)r4.z +rcp r7.y, r3.x +mad.f32 r3.w, r3.w, r3.w, r5.x +cmps.f.lt r5.x, c11.x, r6.y +sam (f32)(xyz)r10.y, r5.z, s#0, t#0 +(ss)mad.f32 r5.w, c7.x, r17.x, c7.x +mad.f32 r5.z, c7.x, r18.y, c7.z +(ss)mul.f r6.y, r7.x, r7.y cov.u32f32 r5.x, r5.x -mov.f32f32 r1.x, r1.x -mad.f32 r9.z, c7.x, r16.x, c7.x -add.f r4.x, c7.z, r4.x +cov.u32f32 r5.y, r5.y +sqrt r3.w, r3.w +mov.f32f32 r7.x, c2.x +max.f r7.y, r4.x, r6.y cmps.f.ne r5.x, r5.x, c8.y -mov.f32f32 r10.x, r10.y -mov.f32f32 r9.z, r9.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r10.y, r3.z -mov.f32f32 r10.z, r10.x -mov.f32f32 r9.z, r9.z -mov.f32f32 r5.y, r5.y -mad.f32 r7.z, c8.z, r10.y, r7.z -mul.f r10.y, c0.x, r10.z -mov.f32f32 r10.z, r17.z -rcp r4.x, r4.x -(ss)mov.f32f32 r4.x, r4.x -mov.f32f32 r10.w, r2.z -mov.f32f32 r11.x, r9.z -mov.f32f32 r9.z, r10.z -mad.f32 r11.y, c7.x, r8.x, c7.x -mov.f32f32 r12.x, r10.w -mov.f32f32 r7.z, r7.z -mad.f32 r9.z, c0.y, r9.z, r10.y -mov.f32f32 r10.y, r11.y -absneg.f r11.y, (abs)r12.x -mul.f r12.x, r7.z, c10.x -mov.f32f32 r9.z, r9.z -mov.f32f32 r12.y, r15.x -cmps.f.lt r11.y, c11.x, r11.y -mov.f32f32 r10.y, r10.y -mov.f32f32 r12.x, r12.x -mov.f32f32 r12.z, r12.y -cov.u32f32 r12.w, r11.y -mov.f32f32 r11.y, r10.y -absneg.f r10.y, (neg)c0.x -mad.f32 r9.z, c0.z, r12.z, r9.z -mov.f32f32 r12.z, r12.w -mov.f32f32 r12.w, r2.z -mov.f32f32 r10.y, r10.y -max.f r9.z, c8.y, r9.z -cmps.f.ne r12.z, r12.z, c8.y -mov.f32f32 r13.x, c8.y -sam (f32)(x)r14.y, r11.x, s#2, t#2 -(sy)(ss)cmps.f.lt r11.x, r15.z, r14.y -mov.f32f32 r9.z, r9.z -mad.f32 r10.y, c8.z, r10.y, (neg)r12.x -cmps.f.lt r4.z, r4.z, c8.y -mov.f32f32 r11.y, r13.x -mov.f32f32 r12.x, c10.x -add.f r12.w, r12.w, (neg)c1.z -cov.u32f32 r4.z, r4.z -log2 r9.z, r9.z -(ss)mul.f r9.z, c12.y, r9.z -sel.b32 r11.y, r11.y, r12.z, r12.x -mov.f32f32 r12.x, r1.w -cov.u32f32 r11.x, r11.x -mov.f32f32 r9.z, r9.z -cmps.f.ne r4.z, r4.z, c8.y -mov.f32f32 r13.x, c8.y -absneg.f r12.x, (abs)r12.x -mad.f32 r5.y, r12.w, r12.w, r5.y -mul.f r6.x, r6.x, r6.x -mov.f32f32 r7.w, r7.w -exp2 r9.z, r9.z -mov.f32f32 r10.x, r10.x -cmps.f.lt r12.x, c11.x, r12.x -mov.f32f32 r11.x, r11.x -sel.b32 r10.y, r13.x, r4.z, r10.y -mov.f32f32 r10.x, r10.x -cov.u32f32 r12.x, r12.x -cmps.f.ne r11.x, r11.x, c8.y -absneg.f r10.y, (neg)r10.y -mov.f32f32 r12.w, r10.x -mov.f32f32 r10.x, r10.z -mov.f32f32 r10.z, r12.x -sel.b32 r0.y, r1.x, r11.x, r0.y -mov.f32f32 r1.x, r15.z -mov.f32f32 r10.x, r10.x -cmps.f.ne r10.z, r10.z, c8.y -mov.f32f32 r11.x, r1.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r13.x, r10.x -mov.f32f32 r10.x, r12.y -mov.f32f32 r11.x, r11.x -mad.f32 r1.x, c7.x, r1.x, c7.z -mov.f32f32 r12.x, r10.y -mov.f32f32 r10.x, r10.x -absneg.f r11.x, (neg)r11.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r13.y, r10.x -mov.f32f32 r10.x, r11.x -mov.f32f32 r1.x, r1.x -rcp r11.x, r12.x -(ss)mul.f r2.x, r2.x, r11.x -mad.f32 r0.x, r7.w, r2.w, r0.x -cmps.f.ne r7.x, r7.x, c8.y -sqrt r5.y, r5.y -mov.f32f32 r7.w, r10.x -sam.3d (f32)(xyz)r12.w, r12.w, s#3, t#3 -(sy)mad.f32 r10.x, c13.z, r9.z, r13.y -mad.f32 r11.x, c13.y, r9.z, r13.x -(ss)mad.f32 r9.z, c13.x, r9.z, r12.w -sel.b32 r7.w, r7.w, r10.z, r11.y -sel.b32 r4.w, r4.w, r5.x, r10.x -mov.f32f32 r12.x, r1.x -mov.f32f32 r1.x, r16.x -sel.b32 r8.w, r8.w, r5.x, r11.x -sel.b32 r0.w, r0.w, r5.x, r9.z -mul.f r5.x, r2.y, r7.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r7.w, c8.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r9.z, r0.z -mad.f32 r1.x, c7.x, r1.x, c7.x -mov.f32f32 r7.w, r7.w -mov.f32f32 r10.x, c10.y -add.f r9.z, c7.z, (neg)r9.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r11.x, r10.y -sel.b32 r7.w, r7.w, r12.z, r10.x -mov.f32f32 r10.x, c8.y -mov.f32f32 r1.x, r1.x -(ss)mov.f32f32 r5.y, r5.y -mov.f32f32 r11.y, c2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r12.y, r1.x -mov.f32f32 r1.x, r10.x -rcp r10.x, r11.x -(ss)mul.f r9.z, r9.z, r10.x -add.f r10.x, r0.x, (neg)c1.y -mov.f32f32 r8.z, r8.z -(ss)rcp r11.x, r11.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r9.z, r9.z -sam (f32)(xyz)r12.w, r12.x, s#0, t#0 -(sy)(ss)mov.f32f32 r11.y, r13.y -mov.f32f32 r12.x, r16.x -mov.f32f32 r12.y, r13.x -mov.f32f32 r12.w, r12.w -sel.b32 r1.x, r1.x, r10.z, r7.w -mov.f32f32 r7.w, r12.x -max.f r12.x, r2.x, r9.z -(ss)mul.f r5.y, r5.y, r11.x -mad.f32 r6.x, r10.x, r10.x, r6.x -mad.f32 r7.w, c7.x, r7.w, c7.x -mad.f32 r1.x, r1.y, r1.x, r5.x -mov.f32f32 r1.y, r12.x -mov.f32f32 r5.x, r0.x -mov.f32f32 r7.w, r7.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r10.x, r10.w -add.f r5.x, c10.w, (neg)r5.x -mov.f32f32 r7.w, r7.w -mul.f r11.x, r7.z, c10.y -mov.f32f32 r10.x, r10.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r13.x, r7.w -mov.f32f32 r7.w, r8.x -absneg.f r10.x, (neg)r10.x -mov.f32f32 r11.x, r11.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r7.w, r7.w -mov.f32f32 r10.x, r10.x -mov.f32f32 r12.x, c10.x -mad.f32 r3.z, c8.z, r3.z, (neg)r11.x -mad.f32 r7.w, c7.x, r7.w, c7.x -mov.f32f32 r11.x, c8.y -sel.b32 r10.x, r10.x, r12.z, r12.x -mov.f32f32 r12.x, c8.y -mov.f32f32 r7.w, r7.w -sel.b32 r3.z, r11.x, r4.z, r3.z -log2 r5.y, r5.y -(ss)mul.f r5.y, c9.y, r5.y -mov.f32f32 r11.x, r12.x -mov.f32f32 r7.w, r7.w -absneg.f r3.z, (neg)r3.z -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r13.y, r7.w -mov.f32f32 r7.w, r11.x -mov.f32f32 r11.x, r3.z -mad.f32 r2.w, r8.z, r2.w, r3.x -mov.f32f32 r3.x, r15.z -exp2 r5.y, r5.y -sel.b32 r7.w, r7.w, r10.z, r10.x -(ss)rcp r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -sam (f32)(xyz)r13.x, r13.x, s#0, t#0 -(sy)mov.f32f32 r8.z, r13.z -mov.f32f32 r10.x, r13.y -mov.f32f32 r12.x, r13.x -mad.f32 r1.x, r6.w, r7.w, r1.x -sel.b32 r7.w, r11.y, r3.y, r8.z -mad.f32 r3.x, c7.x, r3.x, c7.z -sel.b32 r8.z, r12.y, r3.y, r10.x -sel.b32 r3.y, r12.w, r3.y, r12.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -rcp r10.x, r11.x -(ss)mul.f r5.x, r5.x, r10.x -mul.f r5.y, c9.w, r5.y -max.f r1.x, c8.y, r1.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r5.x, r5.x -mov.f32f32 r10.x, r0.x -mov.f32f32 r1.x, r1.x -(ss)mov.f32f32 r11.x, r3.x -mov.f32f32 r3.x, r8.x -add.f r8.x, c7.w, (neg)r10.x -mov.f32f32 r10.x, r3.z -mov.f32f32 r11.y, r1.x -mad.f32 r3.x, c7.x, r3.x, c7.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mul.f r4.x, r11.y, r4.x -rcp r10.x, r10.x -(ss)mul.f r8.x, r8.x, r10.x -add.f r5.y, c7.z, (neg)r5.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r8.x, r8.x -mov.f32f32 r5.y, r5.y -mov.f32f32 r11.y, r3.x -mov.f32f32 r3.x, r1.w -(ss)mov.f32f32 r10.x, r1.w -max.f r12.x, r5.x, r8.x -add.f r12.y, r2.w, (neg)c1.z -mov.f32f32 r12.w, r9.x -mov.f32f32 r13.x, r2.w -sam (f32)(xyz)r14.y, r11.x, s#0, t#0 -(sy)(ss)mov.f32f32 r11.x, r14.w -mov.f32f32 r11.y, r14.z -mov.f32f32 r13.y, r14.y -mul.f r3.x, r3.x, r10.x -mov.f32f32 r10.x, r11.x -mov.f32f32 r11.x, r11.y -mov.f32f32 r11.y, r13.y -mov.f32f32 r13.y, r9.x -sel.b32 r7.w, r10.x, r6.z, r7.w -sel.b32 r8.z, r11.x, r6.z, r8.z -sel.b32 r3.y, r11.y, r6.z, r3.y -mov.f32f32 r6.z, r9.x -mul.f r7.w, r7.w, r0.y -mov.f32f32 r10.x, r4.y -mul.f r8.z, r8.z, r0.y -mul.f r0.y, r3.y, r0.y -mad.f32 r3.x, r13.y, r6.z, r3.x -cmps.f.lt r3.y, r10.x, c8.y -mov.f32f32 r6.z, r12.x -mad.f32 r6.x, r12.y, r12.y, r6.x -mov.f32f32 r3.x, r3.x -cov.u32f32 r3.y, r3.y -mov.f32f32 r10.x, r2.z -mov.f32f32 r11.x, r2.z -min.f r1.y, r1.y, r6.z -cmps.f.ne r3.y, r3.y, c8.y -mov.f32f32 r6.z, r2.w -mad.f32 r3.x, r10.x, r11.x, r3.x -mov.f32f32 r6.x, r6.x -sel.b32 r4.w, r7.w, r3.y, r4.w -sel.b32 r7.w, r8.z, r3.y, r8.w -sel.b32 r0.y, r0.y, r3.y, r0.w -add.f r0.w, c10.w, (neg)r6.z -sel.b32 r3.y, r5.w, r7.x, r4.w -sel.b32 r4.w, r7.y, r7.x, r7.w -sel.b32 r0.y, r3.w, r7.x, r0.y -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r3.w, r3.y -mov.f32f32 r5.w, r4.w -mov.f32f32 r6.z, r0.y -mul.f r3.x, c7.x, r3.x -mul.f r3.w, r3.w, c14.z -mul.f r5.w, r5.w, c14.y -mul.f r6.z, r6.z, c14.x -mul.f r3.x, r3.x, r5.y -mov.f32f32 r3.w, r3.w -cmps.f.lt r4.y, r4.y, c8.y -mov.f32f32 r5.y, r5.w -mov.f32f32 r5.w, r6.z -mov.f32f32 r3.x, r3.x -cov.u32f32 r4.y, r4.y -mul.f r6.z, r7.z, c10.x -sqrt r6.x, r6.x -(ss)mov.f32f32 r6.x, r6.x -mov.f32f32 r7.x, r3.x -cmps.f.ne r4.y, r4.y, c8.y -mad.f32 r4.x, c7.x, r4.x, r7.x -mov.f32f32 r6.z, r6.z -absneg.f r7.x, (neg)c0.z -sel.b32 r3.y, r3.w, r4.y, r3.y -mov.f32f32 r3.w, r11.z -sel.b32 r4.w, r5.y, r4.y, r4.w -sel.b32 r0.y, r5.w, r4.y, r0.y -mov.f32f32 r4.x, r4.x -add.f r3.w, c7.z, (neg)r3.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r12.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r5.y, r7.x -mov.f32f32 r5.w, c2.x -mul.f r2.y, r4.y, r2.y -absneg.f r4.y, (abs)r13.x -mad.f32 r5.y, c8.z, r5.y, (neg)r6.z -mov.f32f32 r6.z, c8.y +sam (f32)(xyz)r13.x, r5.z, s#0, t#0 +(sy)(ss)sel.b32 r5.z, r13.z, r2.w, r10.w +mad.f32 r8.y, c7.x, r17.y, c7.x +mad.f32 r8.x, c7.x, r9.z, c7.z +sel.b32 r5.w, r13.y, r2.w, r10.z +sel.b32 r2.w, r13.x, r2.w, r10.y +min.f r1.w, r1.w, r7.y +add.f r7.y, c10.w, (neg)r4.w +mul.f r7.z, r8.w, c10.x +absneg.f r8.z, (neg)c0.z +sam (f32)(xyz)r8.w, r8.x, s#0, t#0 +(sy)sel.b32 r5.z, r9.y, r6.w, r5.z +sel.b32 r5.w, r9.x, r6.w, r5.w +sel.b32 r2.w, r8.w, r6.w, r2.w +mad.f32 r6.w, c8.z, r8.z, (neg)r7.z +mul.f r5.z, r5.z, r3.y +cmps.f.lt r7.z, r18.y, c12.x +mul.f r5.w, r5.w, r3.y +mul.f r2.w, r2.w, r3.y +mov.f32f32 r3.y, c8.y +cov.u32f32 r7.z, r7.z +rcp r7.x, r7.x +(ss)mul.f r3.w, r3.w, r7.x +(ss)mov.f32f32 r7.x, c8.y +sel.b32 r2.z, r3.y, r2.z, r6.w +cmps.f.ne r3.y, r7.z, c8.y +mad.f32 r6.w, c0.y, r7.w, r12.z +mov.f32f32 r7.z, c10.x +mad.f32 r6.w, c0.z, r11.y, r6.w +absneg.f r2.z, (neg)r2.z log2 r3.w, r3.w -(ss)mul.f r3.w, c9.x, r3.w -mul.f r2.y, r2.y, r11.w -rcp r5.w, r5.w -(ss)mul.f r5.w, r6.x, r5.w -sel.b32 r4.z, r6.z, r4.z, r5.y -mov.f32f32 r3.w, r3.w +(ss)mul.f r3.w, c9.y, r3.w +sel.b32 r7.x, r7.x, r5.x, r7.z +max.f r6.w, c8.y, r6.w +absneg.f r0.z, (abs)r0.z +cmps.f.ne r5.y, r5.y, c8.y +mad.f32 r8.y, c7.x, r10.x, c7.x +mad.f32 r8.x, c7.x, r4.z, c7.z mov.f32f32 r2.y, r2.y -mov.f32f32 r5.y, r1.w -absneg.f r4.z, (neg)r4.z -mov.f32f32 r5.w, r5.w -cmps.f.lt r4.y, c11.x, r4.y -mov.f32f32 r5.y, r5.y +rcp r7.z, r2.z +(ss)mul.f r7.y, r7.y, r7.z +log2 r6.w, r6.w +(ss)mul.f r6.w, c12.y, r6.w +add.f r7.z, c7.z, (neg)r4.w +rcp r8.z, r2.z +nop exp2 r3.w, r3.w -(ss)add.f r6.x, c15.y, (neg)r3.w -mov.f32f32 r6.z, r4.z -cov.u32f32 r4.y, r4.y -add.f r2.y, r5.y, (neg)r2.y -mul.f r5.y, r6.x, c8.w -(ss)mul.f r3.w, r3.w, c7.z -log2 r5.w, r5.w -(ss)mul.f r5.w, c9.y, r5.w -mov.f32f32 r2.y, r2.y -rcp r6.x, r6.z -(ss)mul.f r0.w, r0.w, r6.x -add.f r3.w, r3.w, r5.y -mov.f32f32 r5.y, r5.w -mad.f32 r2.y, c10.z, r2.y, c7.x -mov.f32f32 r0.w, r0.w -add.f r5.w, c15.y, (neg)r3.w -add.f r6.x, c15.y, (neg)r3.w -(ss)add.f r6.z, c15.y, (neg)r3.w -mov.f32f32 r2.y, r2.y -mul.f r3.y, r5.w, r3.y -mul.f r4.w, r6.x, r4.w -mul.f r0.y, r6.z, r0.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r5.w, r2.w -exp2 r5.y, r5.y nop -(ss)rcp r5.y, r5.y -(ss)mov.f32f32 r5.y, r5.y -mov.f32f32 r2.y, r2.y -add.f r5.w, c7.z, (neg)r5.w -mov.f32f32 r6.x, r4.z -mul.f r5.y, c9.w, r5.y -mov.f32f32 r7.x, r2.y -mov.f32f32 r2.y, r12.w -mov.f32f32 r6.z, r6.w -mov.f32f32 r5.y, r5.y -mov.f32f32 r4.y, r4.y -rcp r6.x, r6.x -(ss)mul.f r5.w, r5.w, r6.x -mul.f r2.y, r2.y, r6.z -add.f r5.y, c7.z, (neg)r5.y -cmps.f.ne r4.y, r4.y, c8.y -mov.f32f32 r5.w, r5.w -mul.f r2.y, r2.y, r11.w -mov.f32f32 r5.y, r5.y -(ss)mul.f r6.x, r0.z, r0.z -max.f r6.z, r0.w, r5.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r6.w, r2.z -mad.f32 r6.x, r0.x, r0.x, r6.x -mov.f32f32 r6.z, r6.z -mov.f32f32 r7.y, c8.y -mov.f32f32 r6.w, r6.w -mov.f32f32 r6.x, r6.x -min.f r1.y, r1.y, r6.z -mad.f32 r6.x, r2.w, r2.w, r6.x -add.f r2.y, r6.w, (neg)r2.y -mov.f32f32 r6.z, r7.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r6.w, c10.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.y, r0.x -mov.f32f32 r7.z, r3.z -rsq r6.x, r6.x -(ss)mov.f32f32 r6.x, r6.x -mad.f32 r2.y, c10.z, r2.y, c7.x -sel.b32 r6.z, r6.z, r4.y, r6.w -mad.f32 r6.w, r7.z, r1.y, r7.y -absneg.f r7.y, (abs)r0.z -mov.f32f32 r2.y, r2.y -mul.f r6.x, c7.x, r6.x -mov.f32f32 r6.w, r6.w -cmps.f.lt r7.y, c11.x, r7.y -mov.f32f32 r2.y, r2.y -mul.f r5.y, r6.x, r5.y -add.f r6.x, r6.w, c11.z -cov.u32f32 r6.w, r7.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r5.y, r5.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r6.w, r6.w -mov.f32f32 r7.y, r2.y -mov.f32f32 r2.y, r5.y -mov.f32f32 r5.y, r5.y -mul.f r6.x, c11.y, r6.x -cmps.f.ne r6.w, r6.w, c8.y -mov.f32f32 r7.z, r0.z -min.f r2.x, r2.x, r9.z -sam (f32)(xy)r11.x, r7.x, s#1, t#1 -(sy)mul.f r1.x, r1.x, r11.x -mov.f32f32 r6.x, r6.x -min.f r5.x, r5.x, r8.x -(ss)absneg.f r7.x, (neg)r7.z -mul.f r1.x, r1.x, c7.w -cmps.f.lt r7.y, r9.w, c9.z -max.f r2.x, r2.x, r5.x -min.f r0.w, r0.w, r5.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r5.x, r7.x -cov.u32f32 r5.w, r7.y -max.f r0.w, r2.x, r0.w -mad.f32 r1.x, r1.x, r11.y, r3.x -mov.f32f32 r2.x, r5.x -cmps.f.ne r3.x, r5.w, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r5.x, r1.w -sel.b32 r2.x, r2.x, r6.w, r6.z -add.f r0.w, r1.y, (neg)r0.w -mov.f32f32 r1.y, r0.x -mad.f32 r5.x, c7.x, r5.x, c7.x -mul.f r2.x, r10.y, r2.x -mov.f32f32 r5.w, c8.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r5.x, r5.x -mov.f32f32 r6.z, r10.y -mov.f32f32 r5.w, r5.w -mov.f32f32 r7.x, r0.x -mov.f32f32 r5.x, r5.x -mad.f32 r0.w, c11.w, r0.w, c7.z -mov.f32f32 r7.y, c10.y -mul.f r1.y, r1.y, r6.z -mov.f32f32 r7.z, r5.x -mov.f32f32 r5.x, r2.z -mov.f32f32 r0.w, r0.w -sel.b32 r5.w, r5.w, r4.y, r7.y -mov.f32f32 r6.z, c8.y -mad.f32 r5.x, c7.x, r5.x, c7.x -mov.f32f32 r7.y, r3.z -mov.f32f32 r7.w, r4.z -mov.f32f32 r6.z, r6.z -mov.f32f32 r5.x, r5.x -rcp r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mul.f r7.x, r7.x, r7.w -mov.f32f32 r6.z, r6.z -mov.f32f32 r5.x, r5.x -mul.f r0.w, r6.x, r0.w -rcp r6.x, r7.y -(ss)mul.f r1.y, r1.y, r6.x -sel.b32 r5.w, r6.z, r6.w, r5.w -mov.f32f32 r7.w, r5.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mov.f32f32 r5.x, r0.z -mad.f32 r2.x, r3.z, r5.w, r2.x -mul.f r3.z, r7.x, r6.x -mov.f32f32 r5.w, r9.x -sam (f32)(x)r7.x, r7.z, s#2, t#2 -mov.f32f32 r6.x, r9.x -exp2 r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r6.z, r2.w -(sy)cmps.f.lt r6.x, r6.x, r7.x -add.f r0.w, c7.z, r0.w -add.f r1.y, r5.x, (neg)r1.y -mov.f32f32 r5.x, r6.z -cov.u32f32 r6.x, r6.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -absneg.f r5.x, (neg)r5.x -mov.f32f32 r6.x, r6.x -mov.f32f32 r3.z, r3.z -mad.f32 r1.y, c10.z, r1.y, c7.x -mov.f32f32 r5.x, r5.x -cmps.f.ne r6.x, r6.x, c8.y -rcp r0.w, r0.w -(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r6.z, c10.x -mov.f32f32 r1.y, r1.y -sel.b32 r1.x, r1.x, r6.x, r4.x -mov.f32f32 r4.x, r5.w -sel.b32 r5.x, r5.x, r4.y, r6.z -mov.f32f32 r5.w, c8.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r6.x, r2.w -mov.f32f32 r5.w, r5.w -mov.f32f32 r1.y, r1.y -mad.f32 r4.x, c7.x, r4.x, c7.z -add.f r3.z, r6.x, (neg)r3.z -mov.f32f32 r5.w, r5.w -mov.f32f32 r6.x, r0.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r7.x, r1.y -mov.f32f32 r1.y, r3.z -sel.b32 r3.z, r5.w, r6.w, r5.x -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.x, r6.x -mad.f32 r1.y, c10.z, r1.y, c7.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.w, r4.x -mad.f32 r2.x, r4.z, r3.z, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r3.z, r1.w -mad.f32 r4.x, c7.x, r5.x, c7.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r4.z, r9.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r4.x, r4.x -mad.f32 r3.z, c7.x, r3.z, c7.x -max.f r2.x, c8.y, r2.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r4.x, r4.x +(ss)rcp r3.w, r3.w +(ss)mul.f r3.w, c9.w, r3.w +mul.f r7.z, r7.z, r8.z +cmps.f.lt r0.z, c11.x, r0.z +exp2 r6.w, r6.w +mov.f32f32 r8.z, r11.z +mov.f32f32 r8.w, r7.w +mov.f32f32 r9.x, r11.y +max.f r7.w, r7.y, r7.z +add.f r3.w, c7.z, (neg)r3.w +mul.f r9.y, r10.x, r10.x +cov.u32f32 r0.z, r0.z +min.f r1.w, r1.w, r7.w +mad.f32 r7.w, r4.z, r4.z, r9.y +sam.3d (f32)(xyz)r8.z, r8.z, s#3, t#3 +(sy)(ss)mad.f32 r9.x, c13.z, r6.w, r9.x +mad.f32 r8.w, c13.y, r6.w, r8.w +mad.f32 r6.w, c13.x, r6.w, r8.z +mov.f32f32 r8.z, r1.w +sel.b32 r5.z, r5.z, r3.y, r9.x mov.f32f32 r3.z, r3.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r7.y, r1.y -mov.f32f32 r7.z, r4.x -mov.f32f32 r1.y, r3.z -mov.f32f32 r3.z, r2.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r4.x, r0.z -mov.f32f32 r6.x, r1.y -mul.f r0.w, r3.z, r0.w -sam (f32)(xy)r9.z, r7.x, s#1, t#1 -(sy)mul.f r1.y, r2.x, r9.z -mov.f32f32 r2.x, r4.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.z, r4.z -mad.f32 r4.x, c7.x, r0.z, c7.x -sam (f32)(xyz)r11.x, r5.w, s#0, t#0 -(sy)mov.f32f32 r4.z, r11.z -mad.f32 r1.w, c7.x, r1.w, c7.x -mov.f32f32 r5.x, r11.y -(ss)mov.f32f32 r5.w, r11.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r1.w -mul.f r1.y, r1.y, c7.w -mad.f32 r2.x, c7.x, r2.x, c7.x -mad.f32 r3.z, c7.x, r3.z, c7.z -mov.f32f32 r1.w, r1.w -mad.f32 r0.w, c7.x, r0.w, r5.y -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r7.x, r1.w -mov.f32f32 r1.w, r10.w -mov.f32f32 r0.w, r0.w -mad.f32 r1.y, r1.y, r9.w, r2.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.y, r3.z +sel.b32 r5.w, r5.w, r3.y, r8.w +sel.b32 r2.w, r2.w, r3.y, r6.w +mad.f32 r3.y, r4.w, r4.w, r7.w +min.f r2.y, r2.y, r3.z mov.f32f32 r3.z, r4.x -mov.f32f32 r7.w, r2.x -mad.f32 r1.w, c7.x, r1.w, c7.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r3.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.z, r0.z -sam (f32)(xyz)r9.z, r7.z, s#0, t#0 -mov.f32f32 r3.z, r0.x -(ss)mov.f32f32 r7.z, r2.x -mov.f32f32 r1.w, r1.w -mad.f32 r2.x, c7.x, r2.w, c7.x -(sy)mov.f32f32 r4.x, r10.x -mov.f32f32 r5.y, r9.w -mov.f32f32 r7.y, r1.w -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r9.z -mov.f32f32 r8.z, r2.y -mov.f32f32 r2.y, r2.z -mad.f32 r0.z, c7.x, r0.z, c7.x -sam (f32)(xyz)r9.z, r7.x, s#0, t#0 -(sy)mov.f32f32 r2.z, r10.x -mov.f32f32 r6.x, r9.w -mov.f32f32 r6.z, r9.z -mov.f32f32 r1.w, r1.w -sel.b32 r2.z, r4.z, r12.z, r2.z -mov.f32f32 r2.y, r2.y -sel.b32 r4.z, r5.x, r12.z, r6.x -sel.b32 r5.x, r5.w, r12.z, r6.z -mov.f32f32 r7.w, r1.w -mad.f32 r1.w, c7.x, r2.y, c7.x -mov.f32f32 r0.z, r0.z -mad.f32 r2.y, c7.x, r3.z, c7.z -mov.f32f32 r3.z, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(x)r7.x, r7.z, s#2, t#2 -mov.f32f32 r2.y, r2.y -(sy)cmps.f.lt r0.x, r0.x, r7.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r5.w, r0.z -mov.f32f32 r0.z, r3.z -cov.u32f32 r0.x, r0.x -mov.f32f32 r8.w, r1.w -mov.f32f32 r1.w, r2.y -mad.f32 r0.z, c7.x, r0.z, c7.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r2.w -mov.f32f32 r7.x, r1.w -mov.f32f32 r0.z, r0.z -(ss)nop -sam (f32)(xyz)r7.y, r8.z, s#0, t#0 -(sy)mov.f32f32 r1.w, r7.w -mov.f32f32 r2.w, r7.z -mov.f32f32 r3.z, r7.y -cmps.f.ne r0.x, r0.x, c8.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.z, r3.z -sel.b32 r0.x, r1.y, r0.x, r0.w -sel.b32 r0.w, r1.w, r10.z, r2.z -sel.b32 r1.y, r2.w, r10.z, r4.z -sel.b32 r1.w, r3.z, r10.z, r5.x -mov.f32f32 r0.z, r0.z -mul.f r0.w, r0.w, r1.x -cmps.f.lt r2.z, r9.x, c12.x -mul.f r1.y, r1.y, r1.x -mul.f r1.x, r1.w, r1.x -mov.f32f32 r6.x, r0.z -cov.u32f32 r0.z, r2.z -mad.f32 r1.w, c7.x, r2.y, c7.x -mov.f32f32 r2.y, r14.x -mov.f32f32 r2.z, r14.x +mov.f32f32 r4.x, r6.y cmps.f.ne r0.z, r0.z, c8.y -mov.f32f32 r1.w, r1.w -mul.f r2.y, c0.x, r2.y -mov.f32f32 r2.w, r15.w -sam (f32)(xyz)r7.y, r5.w, s#0, t#0 -(sy)mov.f32f32 r3.z, r7.z -mov.f32f32 r4.z, r7.y -mov.f32f32 r5.x, r7.w -(ss)mov.f32f32 r5.w, r2.w -sel.b32 r3.z, r5.y, r4.y, r3.z -sel.b32 r2.x, r2.x, r4.y, r4.z -sel.b32 r4.x, r4.x, r4.y, r5.x -mad.f32 r2.y, c0.y, r5.w, r2.y -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r4.y, r9.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.y, r1.w -mov.f32f32 r7.z, r2.z -mov.f32f32 r1.w, r4.y -mov.f32f32 r2.z, r2.w -mov.f32f32 r2.w, r4.y -cmps.f.lt r4.y, r8.y, c8.y -mad.f32 r1.w, c0.z, r1.w, r2.y -sam (f32)(xyz)r8.z, r7.x, s#0, t#0 -(sy)mov.f32f32 r2.y, r8.z -mov.f32f32 r4.z, r9.x -mov.f32f32 r5.x, r8.w -max.f r1.w, c8.y, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r1.w, r1.w -sel.b32 r2.x, r2.y, r6.w, r2.x -sel.b32 r2.y, r4.z, r6.w, r4.x -sel.b32 r3.z, r5.x, r6.w, r3.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.w, r2.w -cov.u32f32 r4.x, r4.y -log2 r1.w, r1.w -(ss)mul.f r1.w, c12.y, r1.w -mul.f r2.y, r2.y, r0.x -mul.f r3.z, r3.z, r0.x -mul.f r0.x, r2.x, r0.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r7.w, r2.z -mov.f32f32 r8.x, r2.w -mov.f32f32 r2.x, r4.x -mov.f32f32 r2.z, r8.y -mov.f32f32 r2.w, c7.z -nop +absneg.f r6.y, (neg)r10.x +mad.f32 r1.w, r3.x, r1.w, r4.z +min.f r3.z, r3.z, r4.x +rsq r3.y, r3.y +(ss)mul.f r3.y, c7.x, r3.y +sel.b32 r4.x, r6.y, r0.z, r7.x +add.f r1.w, r1.w, c11.z +max.f r2.y, r2.y, r3.z +mov.f32f32 r3.z, r7.y +mov.f32f32 r6.y, r7.z +mul.f r3.y, r3.y, r3.w +mul.f r3.w, r4.y, r4.x +mov.f32f32 r4.x, c8.y +min.f r3.z, r3.z, r6.y +mov.f32f32 r6.y, r3.y +mov.f32f32 r6.w, c10.y +mul.f r1.w, c11.y, r1.w +max.f r2.y, r2.y, r3.z +sam (f32)(xyz)r7.x, r8.x, s#0, t#0 +absneg.f r3.z, (neg)r4.w +sel.b32 r4.x, r4.x, r5.x, r6.w +mov.f32f32 r6.w, c8.y +add.f r2.y, r8.z, (neg)r2.y +mad.f32 r7.w, c7.x, r10.x, c7.x +(ss)mov.f32f32 r8.y, c10.x +sel.b32 r4.x, r6.w, r0.z, r4.x +mad.f32 r2.y, c11.w, r2.y, c7.z +mad.f32 r8.x, c7.x, r4.w, c7.x +sel.b32 r3.z, r3.z, r5.x, r8.y +mad.f32 r8.y, c7.x, r10.x, c7.x +mul.f r4.y, r4.z, r4.y +rcp r6.w, r3.x +(ss)mad.f32 r3.x, r3.x, r4.x, r3.w +mov.f32f32 r3.w, c8.y +rcp r2.y, r2.y +(ss)mul.f r1.w, r1.w, r2.y +sam (f32)(xyz)r8.z, r7.w, s#0, t#0 +(sy)(ss)sel.b32 r2.y, r7.x, r5.x, r8.z +sel.b32 r4.x, r7.z, r5.x, r9.x +sel.b32 r5.x, r7.y, r5.x, r8.w +sel.b32 r3.z, r3.w, r0.z, r3.z +mad.f32 r7.y, c7.x, r2.x, c7.x +mad.f32 r8.z, c7.x, r4.w, c7.x exp2 r1.w, r1.w -cmps.f.ne r2.x, r2.x, c8.y -cmps.f.lt r2.z, r2.z, c8.y -sam.3d (f32)(xyz)r6.z, r7.z, s#3, t#3 -(sy)(ss)mad.f32 r4.x, c13.z, r1.w, r7.x -mad.f32 r4.y, c13.y, r1.w, r6.w -(ss)mad.f32 r1.w, c13.x, r1.w, r6.z -cov.u32f32 r2.z, r2.z -sel.b32 r0.w, r0.w, r0.z, r4.x -sel.b32 r1.y, r1.y, r0.z, r4.y -sel.b32 r0.z, r1.x, r0.z, r1.w -cmps.f.ne r1.x, r2.z, c8.y -mov.f32f32 r1.w, r2.w -(rpt1)nop -sel.b32 r0.w, r2.y, r1.x, r0.w -sel.b32 r1.y, r3.z, r1.x, r1.y -sel.b32 r0.x, r0.x, r1.x, r0.z +(ss)add.f r1.w, c7.z, r1.w +mad.f32 r2.x, r2.z, r3.z, r3.x +mad.f32 r7.x, c7.x, r0.x, c7.z +mul.f r0.x, r4.y, r6.w +mul.f r2.z, r4.z, r2.z +cmps.f.lt r3.x, r12.w, c8.y +max.f r2.x, c8.y, r2.x +rcp r1.w, r1.w +add.f r0.x, r10.x, (neg)r0.x +sam (f32)(x)r7.z, r8.y, s#2, t#2 +(sy)cmps.f.lt r3.z, r4.z, r7.z +mul.f r2.z, r2.z, r6.w +mov.f32f32 r3.w, r2.x +mad.f32 r4.y, c10.z, r0.x, c7.x +sam (f32)(xyz)r6.w, r7.x, s#0, t#0 +(sy)sel.b32 r0.x, r7.x, r0.z, r5.x +add.f r2.z, r4.w, (neg)r2.z +(ss)mul.f r1.w, r3.w, r1.w +sel.b32 r3.w, r7.y, r0.z, r4.x +cov.u32f32 r3.z, r3.z +mad.f32 r4.z, c10.z, r2.z, c7.x +mad.f32 r2.z, c7.x, r1.w, r6.y +sel.b32 r0.z, r6.w, r0.z, r2.y +cmps.f.ne r2.y, r3.z, c8.y +cov.u32f32 r3.x, r3.x +cmps.f.lt r3.z, r12.w, c8.y +mov.f32f32 r1.w, c7.z +sam (f32)(xy)r4.x, r4.y, s#1, t#1 +(sy)mul.f r2.x, r2.x, r4.x +cmps.f.ne r3.x, r3.x, c8.y +cov.u32f32 r3.z, r3.z nop -sel.b32 r0.z, r6.y, r3.x, r0.w -sel.b32 r0.w, r5.z, r3.x, r1.y -sel.b32 r0.x, r1.z, r3.x, r0.x +mul.f r2.x, r2.x, c7.w +(rpt2)nop +mad.f32 r2.x, r2.x, r4.y, r3.y +cmps.f.ne r3.y, r3.z, c8.y +(rpt1)nop +sel.b32 r2.x, r2.x, r2.y, r2.z +(rpt2)nop +mul.f r2.y, r3.w, r2.x +mul.f r0.x, r0.x, r2.x +mul.f r0.z, r0.z, r2.x nop -mov.f32f32 r1.x, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.z, r0.x +sel.b32 r2.x, r2.y, r3.y, r5.z +sel.b32 r0.x, r0.x, r3.y, r5.w +sel.b32 r0.z, r0.z, r3.y, r2.w nop -mul.f r1.x, r1.x, c14.z -mul.f r1.y, r1.y, c14.y -mul.f r1.z, r1.z, c14.x +sel.b32 r2.x, r6.x, r5.y, r2.x +sel.b32 r0.x, r6.z, r5.y, r0.x +sel.b32 r0.z, r1.z, r5.y, r0.z nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r1.z +mul.f r1.z, r2.x, c14.z +mul.f r2.y, r0.x, c14.y +mul.f r2.z, r0.z, c14.x nop -sel.b32 r0.z, r1.x, r2.x, r0.z -sel.b32 r0.w, r1.y, r2.x, r0.w -sel.b32 r0.x, r1.z, r2.x, r0.x +sel.b32 r1.z, r1.z, r3.x, r2.x +sel.b32 r0.x, r2.y, r3.x, r0.x +sel.b32 r0.z, r2.z, r3.x, r0.z nop -mul.f r0.z, r3.w, r0.z -mul.f r0.w, r3.w, r0.w -mul.f r0.x, r3.w, r0.x +mul.f r1.z, r1.y, r1.z +mul.f r0.x, r1.y, r0.x +mul.f r0.z, r1.y, r0.z nop -add.f r0.z, r0.z, r3.y -add.f r0.w, r0.w, r4.w -add.f r0.x, r0.x, r0.y -nop -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +add.f r1.z, r1.z, r0.y +add.f r1.y, r0.x, r1.x +add.f r1.x, r0.z, r0.w end nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 2323 instructions, 0 half, 23 full +; FRAG: 1348 instructions, 0 half, 27 full diff --git a/reference/xa-composite-fs.asm b/reference/xa-composite-fs.asm index 4d9b7bc..99d1410 100644 --- a/reference/xa-composite-fs.asm +++ b/reference/xa-composite-fs.asm @@ -6,47 +6,40 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x bary.f r0.w, 1, r0.x bary.f (ei)r0.x, 7, r0.x nop cmps.f.lt r0.y, r0.z, c0.y cmps.f.lt r1.x, r0.w, c0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.z, r0.w +cmps.f.gt r1.y, r0.w, c0.x +cmps.f.gt r1.z, r0.z, c0.x cov.u32f32 r0.y, r0.y -cmps.f.gt r0.z, r0.z, c0.x cov.u32f32 r1.x, r1.x -cmps.f.gt r0.w, r0.w, c0.x -nop -cov.u32f32 r0.z, r0.z -sam (f32)(xyzw)r1.y, r1.y, s#0, t#0 -nop -cov.u32f32 r0.w, r0.w -nop -min.f r0.y, r0.y, r0.z -nop -min.f r0.z, r1.x, r0.w +cov.u32f32 r1.y, r1.y +cov.u32f32 r1.z, r1.z +sam (f32)(xyzw)r1.w, r0.z, s#0, t#0 +(rpt2)nop +min.f r0.y, r0.y, r1.z +(ss)min.f r0.z, r1.x, r1.y (rpt2)nop min.f r0.w, r0.y, r0.z min.f r1.x, r0.y, r0.z -min.f r2.y, r0.y, r0.z +min.f r1.y, r0.y, r0.z min.f r0.y, r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r1.x -mov.f32f32 r1.x, r2.y -mov.f32f32 r0.y, r0.y -(sy)mul.f r0.z, r2.x, r0.z -mul.f r0.w, r1.w, r0.w -mul.f r1.x, r1.z, r1.x -mul.f r0.y, r1.y, r0.y +(sy)mul.f r0.z, r2.z, r0.w +mul.f r0.w, r2.y, r1.x +mul.f r1.x, r2.x, r1.y +mul.f r0.y, r1.w, r0.y mul.f r1.w, r0.z, r0.x -(ss)mul.f r1.z, r0.w, r0.x +mul.f r1.z, r0.w, r0.x mul.f r1.y, r1.x, r0.x mul.f r1.x, r0.y, r0.x end nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r63.y (1:0,cm=f,il=12,b=1) -; FRAG: 41 instructions, 0 half, 3 full +; FRAG: 34 instructions, 0 half, 3 full diff --git a/reference/xon1.asm b/reference/xon1.asm index b4aedcd..1867490 100644 --- a/reference/xon1.asm +++ b/reference/xon1.asm @@ -6,211 +6,146 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c10.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000 +@const(c11.x) 0x3f800000, 0x3e800000, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 4, r0.x -bary.f r1.y, 3, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w -mul.f r2.x, r1.x, r1.x -bary.f r2.y, 5, r0.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mov.f32f32 r3.x, r1.w -bary.f r1.w, 1, r0.x -mad.f32 r2.x, r2.y, r2.y, r2.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r0.w -mov.f32f32 r3.y, r1.w -mov.f32f32 r2.x, r2.x -bary.f r3.z, 6, r0.x -mov.f32f32 r3.w, r1.z -mov.f32f32 r3.y, r3.y -sam (f32)(xyz)r4.x, r2.z, s#4, t#4 -(sy)mad.f32 r1.z, c10.y, r4.x, c10.z -mad.f32 r2.x, r3.z, r3.z, r2.x -(ss)mov.f32f32 r2.z, r1.w -mad.f32 r2.w, c10.y, r4.y, c10.z -mov.f32f32 r1.z, r1.z +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x +bary.f r1.z, 4, r0.x +mov.f32f32 r1.w, r0.z +mov.f32f32 r2.x, r0.w +mov.f32f32 r2.y, r1.x +bary.f r1.y, 1, r0.x +sam (f32)(xyz)r2.z, r0.z, s#4, t#4 +(sy)(ss)mad.f32 r0.z, c10.y, r2.z, c10.z +mov.f32f32 r3.y, r1.x +bary.f r0.w, 16, r0.x +mul.f r2.z, r1.z, r1.z +mov.f32f32 r3.w, r0.z bary.f r4.x, 12, r0.x -sam (f32)(xyz)r4.w, r3.x, s#0, t#0 -(sy)(ss)add.f r3.x, r4.w, c10.x -bary.f r3.y, 16, r0.x +mov.f32f32 r3.z, r1.y bary.f r4.y, 8, r0.x -mul.f r4.x, r1.z, r4.x -mov.f32f32 r2.w, r2.w -bary.f r4.w, 13, r0.x -mov.f32f32 r3.x, r3.x -mul.f r3.y, r1.z, r3.y -mul.f r1.z, r1.z, r4.y -mad.f32 r4.x, r2.w, r4.w, r4.x -mul.f r4.y, r3.x, r3.x -add.f r4.w, r5.x, c10.x -bary.f r5.x, 17, r0.x -mov.f32f32 r4.x, r4.x -mad.f32 r4.z, c10.y, r4.z, c10.z -mov.f32f32 r4.w, r4.w -mad.f32 r3.y, r2.w, r5.x, r3.y +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r3.w, r4.x +mad.f32 r2.w, c10.y, r2.w, c10.z +mul.f r3.w, r3.w, r4.y +bary.f r4.x, 17, r0.x +sam (f32)(xyz)r4.y, r3.y, s#0, t#0 +(sy)(ss)add.f r3.y, r4.y, c10.x +mov.f32f32 r3.z, r2.w +bary.f r4.y, 13, r0.x bary.f r5.x, 9, r0.x -mov.f32f32 r4.z, r4.z -bary.f r5.z, 14, r0.x -mad.f32 r4.y, r4.w, r4.w, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r1.z, r2.w, r5.x, r1.z -mad.f32 r2.w, r4.z, r5.z, r4.x -mov.f32f32 r4.x, r4.y -add.f r4.y, r5.y, c10.x -bary.f r5.x, 18, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r4.y -mad.f32 r3.y, r4.z, r5.x, r3.y -rsq r2.x, r2.x -(ss)mov.f32f32 r2.x, r2.x -mad.f32 r0.x, r4.z, r0.x, r1.z -mad.f32 r1.z, r0.y, r0.y, r4.x -mov.f32f32 r3.y, r3.y -mul.f r1.x, r1.x, r2.x -mul.f r4.x, r0.x, r0.x -mul.f r2.y, r2.y, r2.x -mad.f32 r4.x, r2.w, r2.w, r4.x -mov.f32f32 r3.y, r3.y -rsq r1.z, r1.z -mov.f32f32 r1.x, r1.x -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.x, r3.y, r3.y, r4.x -mul.f r3.x, r3.x, r1.z -mul.f r4.y, r4.w, r1.z -mul.f r0.y, r0.y, r1.z -(rpt2)nop -rsq r1.z, r4.x -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r0.y, r0.y -mul.f r0.x, r0.x, r1.z -mul.f r3.y, r3.y, r1.z -mul.f r1.z, r2.w, r1.z -mul.f r2.x, r3.z, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r3.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -mul.f r3.y, r3.x, r0.x -mul.f r3.z, r3.x, r0.x -mad.f32 r3.y, r4.y, r1.z, r3.y -mad.f32 r3.z, r4.y, r1.z, r3.z -max.f r4.z, c11.y, r2.w -(ss)mov.f32f32 r4.x, r2.z -mov.f32f32 r2.z, r3.y -mov.f32f32 r3.y, r3.z -mad.f32 r2.z, r0.y, r2.w, r2.z -mad.f32 r3.y, r0.y, r2.w, r3.y +mad.f32 r0.z, r2.w, r4.x, r0.z +bary.f r2.w, 5, r0.x +mad.f32 r0.w, r3.z, r4.y, r0.w +mad.f32 r3.x, c10.y, r3.x, c10.z +mov.f32f32 r4.x, r3.y +mad.f32 r3.z, r3.z, r5.x, r3.w +bary.f r3.w, 18, r0.x +mov.f32f32 r4.y, r3.x +bary.f r5.x, 14, r0.x +mul.f r3.y, r3.y, r4.x +add.f r4.z, r4.z, c10.x +bary.f r5.y, 10, r0.x +mad.f32 r0.w, r4.y, r5.x, r0.w +mad.f32 r0.z, r3.x, r3.w, r0.z +mad.f32 r2.z, r2.w, r2.w, r2.z +bary.f (ei)r0.x, 6, r0.x +mov.f32f32 r0.y, r0.w +mad.f32 r3.x, r4.y, r5.y, r3.z mov.f32f32 r3.z, r4.z -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.w, r0.z +mad.f32 r4.y, r0.x, r0.x, r2.z +mul.f r2.z, r3.x, r3.x +mad.f32 r3.y, r4.z, r3.z, r3.y +mad.f32 r0.w, r0.w, r0.y, r2.z +mov.f32f32 r3.w, r3.w +add.f r4.z, r4.w, c10.x +mov.f32f32 r2.z, r1.y +rsq r4.y, r4.y +(ss)mov.f32f32 r4.w, r4.y +mad.f32 r0.w, r3.w, r3.w, r0.w +mov.f32f32 r3.w, r4.z +mul.f r0.x, r0.x, r4.y +sam (f32)(xyz)r5.x, r1.w, s#3, t#3 +mul.f r1.z, r1.z, r4.w +(ss)mul.f r2.x, r2.w, r4.w +sam (f32)(xyzw)r5.w, r2.y, s#1, t#1 +(ss)mad.f32 r2.y, r3.w, r3.w, r3.y +(sy)mul.f r1.w, r6.z, c9.x +rsq r0.w, r0.w +(ss)mov.f32f32 r2.z, r0.w +mul.f r0.z, r0.z, r0.w +(ss)mul.f r0.w, r6.y, c5.z +mul.f r2.w, r6.y, c6.z +mul.f r3.x, r3.x, r2.z +rsq r2.y, r2.y +(ss)mov.f32f32 r3.y, r2.y +mov.f32f32 r3.w, r0.z +mul.f r0.y, r0.y, r2.z +(ss)mul.f r2.y, r4.z, r2.y +mul.f r2.z, r4.x, r3.y +mov.f32f32 r4.x, r3.x +max.f r4.y, c11.y, r3.w +mov.f32f32 r4.z, r0.y mul.f r3.x, r2.z, r3.x -mul.f r4.y, r2.z, r4.y -mul.f r0.y, r2.z, r0.y -mov.f32f32 r2.z, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r4.y -mov.f32f32 r0.y, r0.y -max.f r2.z, r2.z, c10.w -mul.f r3.x, c10.y, r3.x -mul.f r3.y, c10.y, r3.y -mul.f r0.y, c10.y, r0.y +mul.f r3.y, r3.z, r3.y mov.f32f32 r2.z, r2.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r0.y, r0.y -sam (f32)(xyzw)r3.w, r3.w, s#1, t#1 -(sy)mul.f r4.w, r4.y, c6.z -add.f r0.x, r0.x, (neg)r3.x -add.f r1.z, r1.z, (neg)r3.y -add.f r0.y, r2.w, (neg)r0.y -mul.f r2.w, r4.w, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -mul.f r0.x, r0.x, r1.x -mul.f r1.x, r4.x, c6.y -mad.f32 r0.x, r1.z, r2.y, r0.x -mul.f r1.z, r3.w, c6.x -rcp r2.y, r3.z -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mul.f r1.x, r1.x, r2.z -mad.f32 r0.x, r0.y, r2.x, r0.x -mul.f r0.y, r1.z, r2.z -(rpt1)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.x, r0.z +mov.f32f32 r3.z, r2.y +mul.f r4.w, r6.x, c5.y +mad.f32 r0.y, r3.y, r0.y, r3.x +mul.f r3.x, r2.z, r4.x +mad.f32 r0.y, r2.y, r3.w, r0.y +mov.f32f32 r2.y, r3.y +rcp r3.y, r4.y +(ss)mov.f32f32 r4.y, r3.y +mul.f r3.y, r5.x, r3.y +mul.f r2.z, r0.y, r2.z +mad.f32 r3.x, r2.y, r4.z, r3.x +mul.f r2.y, r0.y, r2.y +mul.f r0.y, r0.y, r3.z +mul.f r2.z, c10.y, r2.z +mad.f32 r3.x, r3.z, r3.w, r3.x +mul.f r2.y, c10.y, r2.y +mul.f r0.y, c10.y, r0.y +add.f r2.z, r4.x, (neg)r2.z +max.f r3.x, r3.x, c10.w +add.f r2.y, r4.z, (neg)r2.y +mul.f r3.z, r5.w, c6.x +mul.f r1.z, r2.z, r1.z +mov.f32f32 r2.z, r3.x +mad.f32 r1.z, r2.y, r2.x, r1.z +add.f r0.y, r0.z, (neg)r0.y +mul.f r0.z, r6.x, c6.y +mul.f r2.x, r3.z, r3.x +mul.f r2.y, r5.z, r4.y +mad.f32 r0.x, r0.y, r0.x, r1.z +mul.f r0.y, r2.w, r2.z +mul.f r0.z, r0.z, r2.z +mul.f r2.z, r5.y, r4.y max.f r0.x, (neg)r0.x, c10.w -mov.f32f32 r0.z, r1.y -mul.f r1.y, r4.z, c9.x -mul.f r1.z, r4.y, c5.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.y, r0.z -mov.f32f32 r0.z, r1.y -mov.f32f32 r1.y, r1.z -mul.f r1.z, r4.x, c5.y -mul.f r2.x, r3.w, c5.x -(ss)mov.f32f32 r3.z, r0.w -log2 r0.x, r0.x -mov.f32f32 r0.w, r1.w -sam (f32)(xyz)r3.w, r3.x, s#3, t#3 -(sy)mul.f r1.w, r3.w, r2.y -mul.f r2.z, r4.y, r2.y -mul.f r2.y, r4.x, r2.y -mov.f32f32 r3.w, r0.w -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r0.z -(ss)nop -sam (f32)(xyzw)r3.x, r3.z, s#2, t#2 -(sy)mad.f32 r0.z, c8.x, r3.w, c11.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -(ss)mul.f r3.z, r3.z, c7.z -mov.f32f32 r0.z, r0.z +mul.f r2.w, r5.w, c5.x +sam (f32)(xyzw)r3.z, r1.x, s#2, t#2 +(sy)(ss)mul.f r1.x, r4.x, c7.z +mad.f32 r1.y, c8.x, r4.y, c11.x +mul.f r1.z, r3.z, c7.x nop -mul.f r3.y, r3.y, c7.y -mul.f r3.x, r3.x, c7.x -(ss)mul.f r0.x, r0.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r3.x, r3.w, c7.y +log2 r0.x, r0.x +(ss)mul.f r0.x, r1.y, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r0.z, r3.z, r0.x, r2.w -mad.f32 r1.x, r3.y, r0.x, r1.x -(ss)mad.f32 r0.x, r3.x, r0.x, r0.y -nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.x, r0.x +(ss)mad.f32 r0.y, r1.x, r0.x, r0.y +mad.f32 r0.z, r3.x, r0.x, r0.z +(ss)mad.f32 r0.x, r1.z, r0.x, r2.x nop -mad.f32 r0.y, r0.y, r2.z, r1.y -mad.f32 r0.z, r0.z, r2.y, r1.z -mad.f32 r0.x, r0.x, r0.w, r2.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mad.f32 r1.z, r0.y, r2.y, r0.w +mad.f32 r1.y, r0.z, r2.z, r4.w +mad.f32 r1.x, r0.x, r3.y, r2.w end nop -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) -; FRAG: 211 instructions, 0 half, 6 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) +; FRAG: 140 instructions, 0 half, 7 full diff --git a/reference/xon2.asm b/reference/xon2.asm index 9a1b31d..26f774e 100644 --- a/reference/xon2.asm +++ b/reference/xon2.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @in(r0.w) in7 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r3.w) in12 -@in(r4.x) in13 -@in(r4.y) in14 -@in(r4.z) in16 -@in(r4.w) in17 -@in(r5.x) in18 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r4.x) in12 +@in(r4.y) in13 +@in(r4.z) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @in(r1.z) in20 @in(r1.w) in21 @out(r0.x) out0 @@ -43,99 +43,53 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)add.f r1.x, c4.x, (neg)r2.w +@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r1.x, c4.x, (neg)r6.x mul.f r1.y, c5.y, r0.x mul.f r0.x, c5.x, r0.x +mul.f r2.w, c0.w, r6.x +mul.f r2.x, r1.x, r5.x +add.f r2.y, c4.y, (neg)r6.y +mul.f r2.z, r1.x, r4.x +mul.f r1.x, r1.x, r3.x mad.f32 r1.y, c6.y, r0.y, r1.y -mul.f r2.x, r1.x, r4.z -add.f r5.y, c4.y, (neg)r3.x -mad.f32 r1.y, c7.y, r0.z, r1.y +mad.f32 r2.x, r2.y, r5.y, r2.x +add.f r3.w, c4.z, (neg)r6.z +mad.f32 r4.w, r2.y, r4.y, r2.z +mad.f32 r1.x, r2.y, r3.y, r1.x +nop +mad.f32 r2.z, r3.w, r5.z, r2.x +mad.f32 r2.y, r3.w, r4.z, r4.w +mad.f32 r2.x, r3.w, r3.z, r1.x +mad.f32 r1.x, c7.y, r0.z, r1.y mad.f32 r0.x, c6.x, r0.y, r0.x -mul.f r0.y, r1.x, r3.w -mad.f32 r2.x, r5.y, r4.w, r2.x -mad.f32 r1.y, c8.y, r0.w, r1.y +mad.f32 r1.y, c8.y, r0.w, r1.x mad.f32 r0.x, c7.x, r0.z, r0.x -mad.f32 r0.y, r5.y, r4.x, r0.y -mov.f32f32 r0.z, r2.x -add.f r2.x, c4.z, (neg)r3.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, c8.x, r0.w, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, r2.x, r5.x, r0.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, r2.x, r4.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.z, r0.z -mul.f r0.y, r1.x, r6.x -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, r5.y, r6.y, r0.y -mul.f r0.y, c0.w, r2.w -mul.f r0.z, c0.z, r2.w -mul.f r0.w, c0.y, r2.w -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r3.x, r0.y -mad.f32 r0.x, r2.x, r6.z, r0.x -mad.f32 r0.y, c2.w, r3.y, r0.y -mad.f32 r0.z, c1.z, r3.x, r0.z -mad.f32 r0.w, c1.y, r3.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c3.w, r3.z, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c2.y, r3.y, r0.w -mul.f r2.w, c0.x, r2.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c3.y, r3.z, r0.z -mad.f32 r2.w, c1.x, r3.x, r2.w -mov.f32f32 r3.x, c9.x -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c2.x, r3.y, r2.w -mov.f32f32 r5.w, r3.x -mad.f32 r0.x, c3.x, r3.z, r0.x -mov.f32f32 r2.w, r5.x -mov.f32f32 r3.x, r4.w -mov.f32f32 r3.y, r4.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r5.y, r3.x -mov.f32f32 r5.x, r3.y -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r4.y -mov.f32f32 r3.y, r4.x -mov.f32f32 r3.z, r3.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r3.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r4.x, r3.z -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r6.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r3.w, r2.w -mov.f32f32 r3.z, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r6.x +mad.f32 r0.y, c1.w, r6.y, r2.w +mad.f32 r1.x, c8.x, r0.w, r0.x +mad.f32 r0.x, c2.w, r6.z, r0.y +mul.f r0.y, c0.z, r6.x +mad.f32 r0.w, c3.w, r6.w, r0.x +mad.f32 r0.x, c1.z, r6.y, r0.y +mul.f r0.y, c0.y, r6.x +mad.f32 r0.x, c2.z, r6.z, r0.x +mad.f32 r0.y, c1.y, r6.y, r0.y +mad.f32 r0.z, c3.z, r6.w, r0.x +mad.f32 r0.x, c2.y, r6.z, r0.y +mul.f r2.w, c0.x, r6.x +mad.f32 r0.y, c3.y, r6.w, r0.x +mad.f32 r0.x, c1.x, r6.y, r2.w +mov.f32f32 r5.w, c9.x +mad.f32 r0.x, c2.x, r6.z, r0.x +mov.f32f32 r4.w, c9.x +mad.f32 r0.x, c3.x, r6.w, r0.x +mov.f32f32 r3.w, c9.x mov.f32f32 r2.w, c9.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -nop -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) -; VERT: 89 instructions, 0 half, 7 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) +; VERT: 42 instructions, 0 half, 7 full diff --git a/reference/xon3.asm b/reference/xon3.asm index 0c5831e..fc88e42 100644 --- a/reference/xon3.asm +++ b/reference/xon3.asm @@ -6,219 +6,158 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c12.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000 +@const(c13.x) 0x3f800000, 0x3e800000, 0x00000000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 4, r0.x -bary.f r1.y, 3, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w -mul.f r2.x, r1.x, r1.x -bary.f r2.y, 5, r0.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mov.f32f32 r3.x, r1.w -bary.f r3.z, 1, r0.x -mad.f32 r1.w, r2.y, r2.y, r2.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r0.w -mov.f32f32 r2.x, r3.z -mov.f32f32 r1.w, r1.w -bary.f r3.w, 6, r0.x -mov.f32f32 r4.x, r1.z -mov.f32f32 r3.y, r2.x -sam (f32)(xyz)r4.y, r2.z, s#5, t#5 -(sy)mad.f32 r1.z, c12.y, r4.y, c12.z -mad.f32 r1.w, r3.w, r3.w, r1.w -mov.f32f32 r2.x, r3.z -(ss)mad.f32 r2.z, c12.y, r4.z, c12.z -mov.f32f32 r1.z, r1.z -bary.f r2.w, 12, r0.x -sam (f32)(xyz)r5.x, r3.x, s#0, t#0 -(sy)(ss)add.f r3.x, r5.x, c12.x -bary.f r3.y, 16, r0.x +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x +bary.f r1.z, 4, r0.x +mov.f32f32 r1.w, r0.z +mov.f32f32 r2.x, r0.w +mov.f32f32 r2.y, r1.x +mov.f32f32 r2.w, r1.x +sam (f32)(xyz)r3.x, r0.z, s#5, t#5 +(sy)(ss)mad.f32 r0.z, c12.y, r3.x, c12.z +mov.f32f32 r3.w, r1.x +bary.f r1.y, 1, r0.x +bary.f r0.w, 16, r0.x +mov.f32f32 r2.z, r0.z +bary.f r3.x, 12, r0.x +mov.f32f32 r4.x, r1.y bary.f r4.y, 8, r0.x -mul.f r2.w, r1.z, r2.w -mov.f32f32 r2.z, r2.z -bary.f r4.z, 13, r0.x -mov.f32f32 r3.x, r3.x -mul.f r3.y, r1.z, r3.y -mul.f r1.z, r1.z, r4.y -mad.f32 r2.w, r2.z, r4.z, r2.w -mul.f r4.y, r3.x, r3.x -add.f r4.z, r5.y, c12.x -bary.f r5.x, 17, r0.x -mov.f32f32 r2.w, r2.w -mad.f32 r4.w, c12.y, r4.w, c12.z -mov.f32f32 r4.z, r4.z -mad.f32 r3.y, r2.z, r5.x, r3.y +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r2.z, r3.x +mad.f32 r3.x, c12.y, r3.y, c12.z +mul.f r2.z, r2.z, r4.y +bary.f r3.y, 17, r0.x +sam (f32)(xyz)r3.w, r3.w, s#0, t#0 +(sy)(ss)add.f r3.w, r3.w, c12.x +mov.f32f32 r4.z, r3.x +bary.f r4.w, 13, r0.x bary.f r5.x, 9, r0.x -mov.f32f32 r4.w, r4.w -bary.f r5.y, 14, r0.x -mad.f32 r4.y, r4.z, r4.z, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r1.z, r2.z, r5.x, r1.z -mad.f32 r2.z, r4.w, r5.y, r2.w -mov.f32f32 r2.w, r4.y -add.f r4.y, r5.z, c12.x -bary.f r5.x, 18, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.z, r1.z -bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r4.y -mad.f32 r3.y, r4.w, r5.x, r3.y -rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mad.f32 r0.x, r4.w, r0.x, r1.z -mad.f32 r1.z, r0.y, r0.y, r2.w -mov.f32f32 r2.w, r3.y -mul.f r1.x, r1.x, r1.w -mul.f r3.y, r0.x, r0.x -mul.f r2.y, r2.y, r1.w -mad.f32 r3.y, r2.z, r2.z, r3.y -mov.f32f32 r2.w, r2.w -rsq r1.z, r1.z -mov.f32f32 r1.x, r1.x -(ss)mov.f32f32 r1.z, r1.z +mad.f32 r0.z, r3.x, r3.y, r0.z +mul.f r3.x, r1.z, r1.z +mad.f32 r0.w, r4.z, r4.w, r0.w +mad.f32 r3.y, c12.y, r3.z, c12.z +mov.f32f32 r3.z, r3.w +mad.f32 r2.z, r4.z, r5.x, r2.z +bary.f r4.z, 18, r0.x +mov.f32f32 r4.w, r3.y +bary.f r5.x, 14, r0.x +mul.f r3.w, r3.w, r3.z +add.f r4.x, r4.x, c12.x +bary.f r5.y, 10, r0.x +mad.f32 r0.w, r4.w, r5.x, r0.w +mad.f32 r0.z, r3.y, r4.z, r0.z +bary.f r3.y, 5, r0.x +mov.f32f32 r4.z, r4.x +mov.f32f32 r5.x, r0.w +mad.f32 r4.w, r4.w, r5.y, r2.z +mov.f32f32 r2.z, r0.z +mad.f32 r3.x, r3.y, r3.y, r3.x +bary.f (ei)r0.x, 6, r0.x +mul.f r0.y, r4.w, r4.w +mad.f32 r3.w, r4.x, r4.z, r3.w +mad.f32 r0.y, r0.w, r5.x, r0.y +mov.f32f32 r0.w, r2.z +add.f r4.x, r4.y, c12.x +mad.f32 r4.y, r0.x, r0.x, r3.x +mov.f32f32 r2.z, r1.y +mad.f32 r0.y, r0.w, r0.w, r0.y +mov.f32f32 r0.w, r4.x +mov.f32f32 r3.x, r1.y +sam (f32)(xyz)r5.y, r1.w, s#4, t#4 +sam (f32)(xyz)r6.x, r1.x, s#3, t#3 +(rpt3)nop +rsq r0.y, r0.y +(ss)mov.f32f32 r1.x, r0.y +mul.f r0.y, r0.z, r0.y +mad.f32 r0.z, r0.w, r0.w, r3.w +rsq r0.w, r4.y +(ss)mov.f32f32 r1.y, r0.w +mul.f r1.w, r4.w, r1.x +mov.f32f32 r2.x, r0.y +mul.f r1.x, r5.x, r1.x +mul.f r1.z, r1.z, r1.y +mul.f r1.y, r3.y, r1.y +rsq r0.z, r0.z +(ss)mov.f32f32 r3.y, r0.z +mov.f32f32 r3.w, r1.w +(ss)max.f r4.y, c13.y, r2.x +mov.f32f32 r4.w, r1.x +mul.f r3.z, r3.z, r3.y +mul.f r3.y, r4.z, r3.y +mul.f r0.z, r4.x, r0.z +nop +mul.f r1.w, r3.z, r1.w +mov.f32f32 r3.z, r3.z +mad.f32 r1.x, r3.y, r1.x, r1.w +rcp r1.w, r4.y +(ss)mov.f32f32 r4.x, r1.w +mad.f32 r1.x, r0.z, r2.x, r1.x +(ss)mul.f r4.y, r3.z, r3.w mov.f32f32 r3.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, r2.w, r2.w, r3.y -mul.f r3.x, r3.x, r1.z -mul.f r4.y, r4.z, r1.z -mul.f r0.y, r0.y, r1.z -(rpt2)nop -rsq r1.z, r3.y -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r3.x, r3.x -(ss)mov.f32f32 r3.y, r4.y -mov.f32f32 r0.y, r0.y -mul.f r0.x, r0.x, r1.z -mul.f r2.w, r2.w, r1.z -mul.f r1.z, r2.z, r1.z -mul.f r1.w, r3.w, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.w, r3.x, r0.x -mul.f r3.w, r3.x, r0.x -mad.f32 r2.w, r3.y, r1.z, r2.w -mad.f32 r3.w, r3.y, r1.z, r3.w -max.f r4.z, c13.y, r2.z -mov.f32f32 r4.y, r2.x -mov.f32f32 r2.x, r2.w -mov.f32f32 r2.w, r3.w -mad.f32 r2.x, r0.y, r2.z, r2.x -mad.f32 r2.w, r0.y, r2.z, r2.w -mov.f32f32 r3.w, r4.z -mov.f32f32 r4.z, r0.w -mul.f r3.x, r2.x, r3.x -mul.f r3.y, r2.x, r3.y -mul.f r0.y, r2.x, r0.y -mov.f32f32 r2.x, r2.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r3.x, r3.y -mov.f32f32 r0.y, r0.y -max.f r2.x, r2.x, c12.w -mul.f r2.w, c12.y, r2.w -mul.f r3.x, c12.y, r3.x -mul.f r0.y, c12.y, r0.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -sam (f32)(xyzw)r4.w, r4.x, s#1, t#1 -(sy)mul.f r3.y, r5.y, c7.z -add.f r0.x, r0.x, (neg)r2.w -add.f r1.z, r1.z, (neg)r3.x -add.f r0.y, r2.z, (neg)r0.y -mul.f r2.z, r3.y, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -mul.f r0.x, r0.x, r1.x -mul.f r1.x, r5.x, c7.y -mad.f32 r0.x, r1.z, r2.y, r0.x -mul.f r1.z, r4.w, c7.x -rcp r2.y, r3.w -(ss)mov.f32f32 r2.y, r2.y mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mul.f r1.x, r1.x, r2.x -mad.f32 r0.x, r0.y, r1.w, r0.x -mul.f r0.y, r1.z, r2.x -(rpt1)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r0.z +mul.f r3.z, r1.x, r3.z +(sy)mul.f r4.z, r5.w, r4.x +mul.f r4.x, r5.z, r4.x +mad.f32 r4.y, r3.y, r4.w, r4.y +mul.f r3.z, c12.y, r3.z +mad.f32 r2.x, r0.z, r2.x, r4.y +mul.f r3.y, r1.x, r3.y +mul.f r0.z, r1.x, r0.z +add.f r1.x, r3.w, (neg)r3.z +max.f r2.x, r2.x, c12.w +sam (f32)(xyzw)r6.w, r2.y, s#1, t#1 +(ss)mul.f r2.y, c12.y, r3.y +(sy)mul.f r2.z, r6.w, c7.x +mul.f r1.x, r1.x, r1.z +mov.f32f32 r1.z, r2.x +add.f r2.y, r4.w, (neg)r2.y +mul.f r2.x, r2.z, r2.x +mul.f r2.z, r7.y, c7.z +mul.f r3.y, r7.x, c7.y +mad.f32 r1.x, r2.y, r1.y, r1.x +mul.f r0.z, c12.y, r0.z +mul.f r1.y, r2.z, r1.z +mul.f r1.z, r3.y, r1.z +mul.f r2.y, r5.y, r1.w +add.f r0.y, r0.y, (neg)r0.z +mul.f r0.x, r0.x, r0.w +mul.f r0.z, r7.y, c6.z +mul.f r1.w, r7.z, c11.x +sam (f32)(xyzw)r2.z, r2.w, s#2, t#2 +(sy)mul.f r0.w, r3.x, c8.z +mad.f32 r0.x, r0.y, r0.x, r1.x +mul.f r0.y, r7.x, c6.y +mul.f r1.x, r6.w, c6.x +(ss)mul.f r2.w, r2.w, c8.y max.f r0.x, (neg)r0.x, c12.w -mov.f32f32 r0.z, r1.y -mul.f r1.y, r5.z, c11.x -mul.f r2.x, r5.y, c6.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r0.z, r1.y -mov.f32f32 r1.y, r2.x -mul.f r2.x, r5.x, c6.y -mul.f r2.w, r4.w, c6.x -mov.f32f32 r3.x, r4.z +(rpt3)nop +mad.f32 r3.x, c9.x, r3.y, c13.x +mul.f r2.z, r2.z, c8.x log2 r0.x, r0.x -mov.f32f32 r3.y, r3.z -(ss)nop -sam (f32)(xyz)r3.w, r1.z, s#4, t#4 -(sy)(ss)mul.f r1.z, r3.w, r2.y -mul.f r1.w, r4.y, r2.y -mul.f r2.y, r4.x, r2.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r3.w, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r0.z -sam (f32)(xyzw)r4.x, r3.x, s#2, t#2 -(sy)mad.f32 r0.z, c9.x, r4.w, c13.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -(ss)mul.f r3.x, r4.z, c8.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mul.f r3.y, r4.y, c8.y -mul.f r4.x, r4.x, c8.x -(ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r3.z -nop -mov.f32f32 r0.x, r0.x +(rpt1)nop +(ss)mul.f r0.x, r3.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r2.z, r3.x, r0.x, r2.z -mad.f32 r1.x, r3.y, r0.x, r1.x -(ss)mad.f32 r0.x, r4.x, r0.x, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.x, r0.x +(ss)mad.f32 r0.w, r0.w, r0.x, r1.y +mad.f32 r1.y, r2.w, r0.x, r1.z +(ss)mad.f32 r0.x, r2.z, r0.x, r2.x nop -mad.f32 r0.y, r0.y, r3.w, r1.y -mad.f32 r1.x, r1.x, r2.y, r2.x -sam (f32)(xyz)r3.x, r0.z, s#3, t#3 -(sy)mad.f32 r0.y, c10.z, r3.z, r0.y -mad.f32 r0.x, r0.x, r1.z, r2.w -(ss)mad.f32 r0.z, c10.y, r3.y, r1.x -mad.f32 r0.x, c10.x, r3.x, r0.x -mov.f32f32 r0.y, r0.y -(rpt2)nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +mad.f32 r0.z, r0.w, r4.z, r0.z +mad.f32 r0.y, r1.y, r4.x, r0.y +mad.f32 r1.z, c10.z, r6.z, r0.z +mad.f32 r1.y, c10.y, r6.y, r0.y +mad.f32 r0.x, r0.x, r2.y, r1.x +nop +mad.f32 r1.x, c10.x, r6.x, r0.x end nop +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) -; FRAG: 222 instructions, 0 half, 6 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) +; FRAG: 157 instructions, 0 half, 8 full diff --git a/reference/xon4.asm b/reference/xon4.asm index 9a1b31d..26f774e 100644 --- a/reference/xon4.asm +++ b/reference/xon4.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @in(r0.w) in7 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r3.w) in12 -@in(r4.x) in13 -@in(r4.y) in14 -@in(r4.z) in16 -@in(r4.w) in17 -@in(r5.x) in18 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r4.x) in12 +@in(r4.y) in13 +@in(r4.z) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @in(r1.z) in20 @in(r1.w) in21 @out(r0.x) out0 @@ -43,99 +43,53 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)add.f r1.x, c4.x, (neg)r2.w +@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r1.x, c4.x, (neg)r6.x mul.f r1.y, c5.y, r0.x mul.f r0.x, c5.x, r0.x +mul.f r2.w, c0.w, r6.x +mul.f r2.x, r1.x, r5.x +add.f r2.y, c4.y, (neg)r6.y +mul.f r2.z, r1.x, r4.x +mul.f r1.x, r1.x, r3.x mad.f32 r1.y, c6.y, r0.y, r1.y -mul.f r2.x, r1.x, r4.z -add.f r5.y, c4.y, (neg)r3.x -mad.f32 r1.y, c7.y, r0.z, r1.y +mad.f32 r2.x, r2.y, r5.y, r2.x +add.f r3.w, c4.z, (neg)r6.z +mad.f32 r4.w, r2.y, r4.y, r2.z +mad.f32 r1.x, r2.y, r3.y, r1.x +nop +mad.f32 r2.z, r3.w, r5.z, r2.x +mad.f32 r2.y, r3.w, r4.z, r4.w +mad.f32 r2.x, r3.w, r3.z, r1.x +mad.f32 r1.x, c7.y, r0.z, r1.y mad.f32 r0.x, c6.x, r0.y, r0.x -mul.f r0.y, r1.x, r3.w -mad.f32 r2.x, r5.y, r4.w, r2.x -mad.f32 r1.y, c8.y, r0.w, r1.y +mad.f32 r1.y, c8.y, r0.w, r1.x mad.f32 r0.x, c7.x, r0.z, r0.x -mad.f32 r0.y, r5.y, r4.x, r0.y -mov.f32f32 r0.z, r2.x -add.f r2.x, c4.z, (neg)r3.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, c8.x, r0.w, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, r2.x, r5.x, r0.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, r2.x, r4.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.z, r0.z -mul.f r0.y, r1.x, r6.x -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, r5.y, r6.y, r0.y -mul.f r0.y, c0.w, r2.w -mul.f r0.z, c0.z, r2.w -mul.f r0.w, c0.y, r2.w -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r3.x, r0.y -mad.f32 r0.x, r2.x, r6.z, r0.x -mad.f32 r0.y, c2.w, r3.y, r0.y -mad.f32 r0.z, c1.z, r3.x, r0.z -mad.f32 r0.w, c1.y, r3.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c3.w, r3.z, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c2.y, r3.y, r0.w -mul.f r2.w, c0.x, r2.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c3.y, r3.z, r0.z -mad.f32 r2.w, c1.x, r3.x, r2.w -mov.f32f32 r3.x, c9.x -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c2.x, r3.y, r2.w -mov.f32f32 r5.w, r3.x -mad.f32 r0.x, c3.x, r3.z, r0.x -mov.f32f32 r2.w, r5.x -mov.f32f32 r3.x, r4.w -mov.f32f32 r3.y, r4.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r5.y, r3.x -mov.f32f32 r5.x, r3.y -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r4.y -mov.f32f32 r3.y, r4.x -mov.f32f32 r3.z, r3.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r3.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r4.x, r3.z -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r6.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r3.w, r2.w -mov.f32f32 r3.z, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r6.x +mad.f32 r0.y, c1.w, r6.y, r2.w +mad.f32 r1.x, c8.x, r0.w, r0.x +mad.f32 r0.x, c2.w, r6.z, r0.y +mul.f r0.y, c0.z, r6.x +mad.f32 r0.w, c3.w, r6.w, r0.x +mad.f32 r0.x, c1.z, r6.y, r0.y +mul.f r0.y, c0.y, r6.x +mad.f32 r0.x, c2.z, r6.z, r0.x +mad.f32 r0.y, c1.y, r6.y, r0.y +mad.f32 r0.z, c3.z, r6.w, r0.x +mad.f32 r0.x, c2.y, r6.z, r0.y +mul.f r2.w, c0.x, r6.x +mad.f32 r0.y, c3.y, r6.w, r0.x +mad.f32 r0.x, c1.x, r6.y, r2.w +mov.f32f32 r5.w, c9.x +mad.f32 r0.x, c2.x, r6.z, r0.x +mov.f32f32 r4.w, c9.x +mad.f32 r0.x, c3.x, r6.w, r0.x +mov.f32f32 r3.w, c9.x mov.f32f32 r2.w, c9.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -nop -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) -; VERT: 89 instructions, 0 half, 7 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) +; VERT: 42 instructions, 0 half, 7 full diff --git a/reference/xon5.asm b/reference/xon5.asm index be51a42..20eca61 100644 --- a/reference/xon5.asm +++ b/reference/xon5.asm @@ -6,143 +6,106 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c7.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000 +@const(c8.x) 0x3e800000, 0x00000000, 0x00000000, 0x00000000 +@const(c9.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 3, r0.x +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x bary.f r1.y, 1, r0.x mov.f32f32 r1.z, r0.z mov.f32f32 r1.w, r0.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r1.z -mov.f32f32 r1.z, r1.x -mov.f32f32 r2.z, r1.w -mov.f32f32 r1.w, r1.y -mov.f32f32 r3.x, r0.w -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r2.w, r1.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r3.y, r0.w -bary.f r0.w, 4, r0.x -sam (f32)(xyz)r1.z, r2.x, s#3, t#3 -(sy)mad.f32 r1.x, c7.y, r1.z, c7.z -mad.f32 r1.z, c7.y, r1.w, c7.z -sam (f32)(xyz)r3.z, r2.z, s#0, t#0 -(sy)add.f r1.w, r3.z, c7.x -(ss)mad.f32 r2.x, c7.y, r2.x, c7.z -mov.f32f32 r1.x, r1.x -bary.f r2.y, 8, r0.x -mov.f32f32 r1.w, r1.w -bary.f r2.z, 12, r0.x -mul.f r0.w, r1.x, r0.w -mul.f r2.y, r1.x, r2.y -mov.f32f32 r1.z, r1.z -bary.f r2.w, 9, r0.x -mul.f r3.z, r1.w, r1.w +mov.f32f32 r2.x, r1.x +mov.f32f32 r2.y, r1.y +sam (f32)(xyz)r2.z, r0.z, s#3, t#3 +(sy)(ss)mad.f32 r0.z, c7.y, r2.z, c7.z +bary.f r0.w, 12, r0.x +mad.f32 r2.z, c7.y, r2.w, c7.z +mad.f32 r2.w, c7.y, r3.x, c7.z +mov.f32f32 r3.x, r0.z +bary.f r3.y, 8, r0.x +sam (f32)(xyz)r3.z, r2.x, s#0, t#0 +(sy)(ss)add.f r2.x, r3.z, c7.x +bary.f r2.y, 4, r0.x +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r3.x, r3.y +mov.f32f32 r3.y, r2.z +bary.f r3.z, 9, r0.x +mov.f32f32 r4.y, r2.x +mul.f r2.y, r3.x, r2.y +bary.f r3.x, 13, r0.x +mad.f32 r0.w, r3.y, r3.z, r0.w +mov.f32f32 r3.z, r2.w +bary.f r4.z, 10, r0.x +mul.f r2.x, r2.x, r4.y add.f r3.w, r3.w, c7.x -mul.f r1.x, r1.x, r2.z -mad.f32 r2.y, r1.z, r2.w, r2.y -bary.f r2.z, 13, r0.x -mov.f32f32 r2.w, r3.w -bary.f r3.w, 5, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.x, r2.x -bary.f r4.y, 10, r0.x -mad.f32 r3.z, r2.w, r2.w, r3.z -mad.f32 r1.x, r1.z, r2.z, r1.x -mad.f32 r0.w, r1.z, r3.w, r0.w -mad.f32 r1.z, r2.x, r4.y, r2.y -mov.f32f32 r2.y, r3.z -add.f r2.z, r4.x, c7.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.z, r1.z -mov.f32f32 r0.w, r0.w -bary.f r1.z, 6, r0.x -mov.f32f32 r2.z, r2.z +bary.f r4.w, 5, r0.x +mad.f32 r0.w, r3.z, r4.z, r0.w +mad.f32 r0.z, r2.z, r3.x, r0.z +mov.f32f32 r2.z, r3.w +mad.f32 r2.y, r3.y, r4.w, r2.y +mov.f32f32 r3.x, r0.w +bary.f r3.y, 6, r0.x +mad.f32 r2.x, r3.w, r2.z, r2.x +add.f r3.w, r4.x, c7.x bary.f (ei)r0.x, 14, r0.x -sam (f32)(xyzw)r3.w, r3.x, s#1, t#1 -(sy)mul.f r0.y, r4.z, c6.x -mad.f32 r0.w, r2.x, r1.z, r0.w -mad.f32 r2.y, r2.z, r2.z, r2.y -mad.f32 r0.x, r2.x, r0.x, r1.x -mov.f32f32 r0.y, r0.y -mul.f r1.x, r0.w, r0.w -mov.f32f32 r1.z, r0.z -mad.f32 r0.z, r3.z, r3.z, r1.x -mov.f32f32 r0.x, r0.x -rsq r1.x, r2.y +mad.f32 r0.y, r3.z, r3.y, r2.y +sam (f32)(xyz)r4.z, r1.z, s#2, t#2 +sam (f32)(xyzw)r5.y, r1.x, s#1, t#1 +(sy)(ss)mul.f r1.w, r6.x, c6.x +mov.f32f32 r1.x, r3.w +mad.f32 r0.x, r2.w, r0.x, r0.z +mul.f r0.z, r0.y, r0.y nop -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, r0.w, r3.x, r0.z +mov.f32f32 r0.w, r0.x +mad.f32 r1.x, r1.x, r1.x, r2.x (rpt1)nop -mad.f32 r0.z, r0.x, r0.x, r0.z -mul.f r1.w, r1.w, r1.x -mul.f r2.x, r2.w, r1.x -mul.f r1.x, r2.z, r1.x +mov.f32f32 r0.w, r0.w (rpt2)nop +mad.f32 r0.z, r0.w, r0.w, r0.z +rsq r0.w, r1.x +(ss)mov.f32f32 r1.x, r0.w +(rpt3)nop +mul.f r1.y, r4.y, r1.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.x, r1.x -mul.f r0.w, r0.w, r0.z +(ss)mov.f32f32 r1.z, r0.z mul.f r0.x, r0.x, r0.z -mul.f r0.z, r3.z, r0.z -mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -sam (f32)(xyz)r2.z, r1.y, s#2, t#2 +(ss)mul.f r0.z, r2.z, r1.x +mul.f r0.w, r3.w, r0.w +mul.f r0.y, r0.y, r1.z +mov.f32f32 r1.x, r0.x +mul.f r1.z, r3.x, r1.z nop -mul.f r0.y, r2.y, r0.y -max.f r0.w, c8.x, r0.x -mad.f32 r0.y, r2.x, r0.z, r0.y -(rpt2)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mad.f32 r0.x, r1.x, r0.x, r0.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r1.y, r0.y +max.f r1.x, c8.x, r1.x +mad.f32 r0.y, r0.z, r1.z, r0.y +nop +mad.f32 r0.x, r0.w, r0.x, r0.y (rpt2)nop max.f r0.x, r0.x, c7.w -rcp r0.y, r0.z -(ss)mov.f32f32 r0.y, r0.y -(rpt1)nop -mov.f32f32 r0.x, r0.x -(sy)(ss)mul.f r0.z, r3.x, r0.y -mul.f r0.w, r2.w, r0.y -mul.f r0.y, r2.z, r0.y -mul.f r1.x, c5.z, r0.x -mul.f r1.y, c5.y, r0.x +rcp r0.y, r1.x +(ss)mov.f32f32 r0.z, r0.y +mul.f r0.y, r4.z, r0.y +nop +mov.f32f32 r0.w, r0.x mul.f r0.x, c5.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, r1.x, r0.z, c4.z -mov.f32f32 r0.y, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mad.f32 r0.w, r1.y, r0.w, c4.y +(ss)mul.f r1.x, r5.x, r0.z +mul.f r0.z, r4.w, r0.z +mul.f r1.y, c5.z, r0.w +mul.f r0.w, c5.y, r0.w mad.f32 r0.x, r0.x, r0.y, c4.x nop -mul.f r0.y, r4.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mul.f r0.y, r4.x, r0.z -mul.f r0.x, r3.w, r0.x +mad.f32 r0.y, r1.y, r1.x, c4.z +mad.f32 r0.z, r0.w, r0.z, c4.y (rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +mul.f r1.z, r5.w, r0.y +mul.f r1.y, r5.z, r0.z +mul.f r1.x, r5.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) -; FRAG: 148 instructions, 0 half, 5 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) +; FRAG: 103 instructions, 0 half, 7 full diff --git a/reference/xon6.asm b/reference/xon6.asm index 7603f89..d468c8e 100644 --- a/reference/xon6.asm +++ b/reference/xon6.asm @@ -1,24 +1,24 @@ ; options: ; VERT: new compiler -@in(r1.z) in0 -@in(r1.w) in1 -@in(r2.x) in2 -@in(r2.y) in3 +@in(r4.w) in0 +@in(r5.x) in1 +@in(r5.y) in2 +@in(r5.z) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @in(r0.w) in7 -@in(r5.x) in8 -@in(r5.y) in9 -@in(r5.z) in10 -@in(r2.z) in12 -@in(r2.w) in13 -@in(r3.x) in14 -@in(r3.y) in16 -@in(r3.z) in17 -@in(r3.w) in18 -@in(r5.w) in20 -@in(r6.x) in21 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r3.x) in12 +@in(r3.y) in13 +@in(r3.z) in14 +@in(r4.x) in16 +@in(r4.y) in17 +@in(r4.z) in18 +@in(r1.z) in20 +@in(r1.w) in21 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -39,75 +39,36 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r1.x, c4.y, r0.x mul.f r0.x, c4.x, r0.x mad.f32 r1.x, c5.y, r0.y, r1.x mad.f32 r0.x, c5.x, r0.y, r0.x mad.f32 r0.y, c6.y, r0.z, r1.x mad.f32 r0.x, c6.x, r0.z, r0.x -mad.f32 r0.y, c7.y, r0.w, r0.y -mad.f32 r0.x, c7.x, r0.w, r0.x -mul.f r0.z, c0.w, r1.z -mul.f r0.w, c0.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c1.w, r1.w, r0.z -mad.f32 r0.w, c1.z, r1.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c2.w, r2.x, r0.z -mad.f32 r0.w, c2.z, r2.x, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, c3.w, r2.y, r0.z -mad.f32 r0.y, c3.z, r2.y, r0.w -mul.f r4.x, c0.y, r1.z -mul.f r1.z, c0.x, r1.z -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mad.f32 r0.x, c1.y, r1.w, r4.x -mad.f32 r0.y, c1.x, r1.w, r1.z -mad.f32 r0.x, c2.y, r2.x, r0.x -mad.f32 r0.y, c2.x, r2.x, r0.y -mad.f32 r0.x, c3.y, r2.y, r0.x -mad.f32 r1.z, c3.x, r2.y, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z -mov.f32f32 r1.z, c8.x -mov.f32f32 r1.w, r3.w -mov.f32f32 r2.x, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r4.w, r1.z -mov.f32f32 r4.z, r1.w -mov.f32f32 r4.y, r2.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r1.z, c8.x -mov.f32f32 r1.w, r3.x -mov.f32f32 r2.x, r2.w -mov.f32f32 r2.y, r2.z -mov.f32f32 r3.w, r1.z -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.x, r2.y -mov.f32f32 r1.z, c8.x -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r5.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r2.z, r1.w -mov.f32f32 r2.y, r2.x -mov.f32f32 r2.x, r5.x -mov.f32f32 r1.z, r6.x -mov.f32f32 r5.x, r5.w -(rpt1)nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r1.z, r5.x +mad.f32 r1.y, c7.y, r0.w, r0.y +mad.f32 r1.x, c7.x, r0.w, r0.x +mul.f r0.x, c0.w, r4.w +mul.f r0.y, c0.z, r4.w +mad.f32 r0.x, c1.w, r5.x, r0.x +mad.f32 r0.y, c1.z, r5.x, r0.y +mad.f32 r0.x, c2.w, r5.y, r0.x +mad.f32 r0.y, c2.z, r5.y, r0.y +mad.f32 r0.w, c3.w, r5.z, r0.x +mad.f32 r0.z, c3.z, r5.z, r0.y +mul.f r0.x, c0.y, r4.w +mul.f r0.y, c0.x, r4.w +mad.f32 r0.x, c1.y, r5.x, r0.x +mad.f32 r0.y, c1.x, r5.x, r0.y +mad.f32 r0.x, c2.y, r5.y, r0.x +mad.f32 r2.w, c2.x, r5.y, r0.y +mad.f32 r0.y, c3.y, r5.z, r0.x +mad.f32 r0.x, c3.x, r5.z, r2.w +mov.f32f32 r4.w, c8.x +mov.f32f32 r3.w, c8.x +mov.f32f32 r2.w, c8.x end ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) -; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r5.x (0:0,cm=7,il=16,b=0) r2.z (0:0,cm=7,il=20,b=0) r3.y (0:0,cm=7,il=24,b=0) r5.w (0:0,cm=3,il=28,b=0) -; VERT: 71 instructions, 0 half, 7 full +; VERT: inputs: r4.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=7,il=20,b=0) r4.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) +; VERT: 28 instructions, 0 half, 6 full diff --git a/reference/xon7.asm b/reference/xon7.asm index 3846190..faa1d33 100644 --- a/reference/xon7.asm +++ b/reference/xon7.asm @@ -6,215 +6,150 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c10.x) 0x3f000000, 0xbf000000, 0x40000000, 0xbf800000 +@const(c11.x) 0x00000000, 0x3f800000, 0x3e800000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 4, r0.x -bary.f r1.y, 3, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w -mov.f32f32 r2.y, r0.w -mul.f r2.z, r1.x, r1.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r1.y -mov.f32f32 r3.y, r1.w -bary.f r3.w, 1, r0.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r3.x, r1.z -bary.f r1.z, 5, r0.x -mov.f32f32 r1.w, r3.w -mov.f32f32 r2.y, r3.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mad.f32 r2.z, r1.z, r1.z, r2.z -sam (f32)(xyz)r4.z, r2.w, s#4, t#4 -(sy)(ss)mad.f32 r2.w, c10.z, r4.z, c10.w -mov.f32f32 r3.z, r1.w -mov.f32f32 r4.y, r2.y -mov.f32f32 r1.w, r2.z -mov.f32f32 r2.y, r2.w -bary.f r2.z, 12, r0.x -bary.f r2.w, 16, r0.x -bary.f r3.x, 8, r0.x -sam (f32)(xyz)r5.y, r3.y, s#0, t#0 -(sy)(ss)add.f r3.y, r5.y, c10.y -mul.f r2.z, r2.y, r2.z -mad.f32 r3.z, c10.z, r4.w, c10.w -mul.f r2.w, r2.y, r2.w -mov.f32f32 r3.y, r3.y -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.x, r3.z -bary.f r3.z, 13, r0.x -mul.f r4.z, r3.y, r3.y -add.f r4.w, r5.z, c10.y -bary.f r5.y, 17, r0.x -mad.f32 r2.z, r3.x, r3.z, r2.z -bary.f r3.z, 9, r0.x -mov.f32f32 r4.w, r4.w -mad.f32 r2.w, r3.x, r5.y, r2.w -mov.f32f32 r2.z, r2.z -mad.f32 r5.x, c10.z, r5.x, c10.w -mad.f32 r4.z, r4.w, r4.w, r4.z -mov.f32f32 r2.w, r2.w -mad.f32 r2.y, r3.x, r3.z, r2.y -mov.f32f32 r3.x, r5.x -bary.f r3.z, 14, r0.x -mov.f32f32 r4.z, r4.z -add.f r5.x, r5.w, c10.y -bary.f r5.y, 18, r0.x -mad.f32 r2.z, r3.x, r3.z, r2.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.z, r5.x -bary.f r5.x, 10, r0.x -mov.f32f32 r2.z, r2.z -mad.f32 r2.w, r3.x, r5.y, r2.w -mad.f32 r4.z, r3.z, r3.z, r4.z -mad.f32 r2.y, r3.x, r5.x, r2.y +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x +bary.f r1.z, 4, r0.x +mov.f32f32 r2.y, r0.z +mov.f32f32 r2.z, r0.w +mov.f32f32 r2.w, r1.x +bary.f r1.y, 1, r0.x +sam (f32)(xyz)r3.y, r0.z, s#4, t#4 +(sy)(ss)mad.f32 r0.z, c10.z, r3.y, c10.w +mov.f32f32 r4.x, r1.x +bary.f r0.w, 16, r0.x +mul.f r1.w, r1.z, r1.z +mov.f32f32 r3.y, r0.z +bary.f r4.z, 12, r0.x +mov.f32f32 r4.y, r1.y +mov.f32f32 r3.x, r1.y +bary.f r4.w, 8, r0.x +mul.f r4.z, r3.y, r4.z +mad.f32 r3.z, c10.z, r3.z, c10.w +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r3.y, r4.w +bary.f r3.y, 17, r0.x +mov.f32f32 r4.w, r3.z +bary.f r5.x, 13, r0.x +sam (f32)(xyz)r5.y, r4.x, s#0, t#0 +(sy)(ss)add.f r4.x, r5.y, c10.y +sam (f32)(xyzw)r6.x, r2.w, s#1, t#1 +(ss)bary.f r2.w, 9, r0.x +(sy)cmps.f.lt r3.x, r6.w, c10.x +mad.f32 r4.y, r4.w, r5.x, r4.z +mad.f32 r3.w, c10.z, r3.w, c10.w +mov.f32f32 r4.z, r4.x +cov.u32f32 r3.x, r3.x +mad.f32 r0.w, r4.w, r2.w, r0.w +mov.f32f32 r2.w, r3.w +bary.f r4.w, 14, r0.x +mul.f r4.x, r4.x, r4.z +add.f r5.x, r5.z, c10.y +mov.f32f32 r5.y, (0.000000) +mad.f32 r4.y, r2.w, r4.w, r4.y +bary.f r4.w, 10, r0.x +mad.f32 r0.z, r3.z, r3.y, r0.z +bary.f r3.y, 5, r0.x +mov.f32f32 r3.z, r4.y +mad.f32 r0.w, r2.w, r4.w, r0.w +mov.f32f32 r2.w, r5.x +cmps.f.ne p0.x, r3.x, r5.y +bary.f r3.x, 18, r0.x +mul.f r4.w, r0.w, r0.w +mad.f32 r4.x, r5.x, r2.w, r4.x +mad.f32 r4.y, r4.y, r3.z, r4.w +mad.f32 r0.z, r3.w, r3.x, r0.z +add.f r3.x, r5.w, c10.y bary.f (ei)r0.x, 6, r0.x -sam (f32)(xyzw)r5.x, r4.x, s#1, t#1 -mov.f32f32 r0.y, r2.w -(sy)cmps.f.lt r2.w, r5.w, c10.x -mul.f r3.x, r2.y, r2.y -mad.f32 r1.w, r0.x, r0.x, r1.w -mad.f32 r3.x, r2.z, r2.z, r3.x -(ss)rsq r4.x, r4.z -(ss)mov.f32f32 r4.x, r4.x +kill p0.x +mov.f32f32 r0.y, r0.z +mov.f32f32 r3.w, r3.x +mad.f32 r4.w, r3.y, r3.y, r1.w +mul.f r1.w, r6.w, c9.x mov.f32f32 r0.y, r0.y -cov.u32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mul.f r3.y, r3.y, r4.x -mad.f32 r3.x, r0.y, r0.y, r3.x -mul.f r4.y, r4.w, r4.x -mul.f r3.z, r3.z, r4.x -(rpt3)nop -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, r4.y +mad.f32 r3.w, r3.w, r3.w, r4.x +mad.f32 r4.x, r0.x, r0.x, r4.w +mul.f r4.w, r6.z, c5.z +mad.f32 r0.y, r0.y, r0.y, r4.y +mul.f r4.y, r6.z, c6.z +mul.f r5.x, r6.y, c6.y +mul.f r5.y, r6.x, c6.x +rsq r3.w, r3.w +(ss)mov.f32f32 r5.z, r3.w +mul.f r3.x, r3.x, r3.w +(ss)rsq r3.w, r4.x +(ss)mov.f32f32 r4.x, r3.w +rsq r0.y, r0.y +(ss)mov.f32f32 r5.w, r0.y +(ss)mul.f r0.y, r0.z, r0.y +mul.f r0.z, r4.z, r5.z +mul.f r2.w, r2.w, r5.z +mul.f r0.w, r0.w, r5.w +mov.f32f32 r4.z, r0.y +mul.f r3.z, r3.z, r5.w +mov.f32f32 r5.z, r0.z +mul.f r0.z, r0.z, r0.w +mov.f32f32 r0.w, r0.w +mad.f32 r0.z, r2.w, r3.z, r0.z +max.f r5.w, c11.z, r4.z +mad.f32 r0.z, r3.x, r4.z, r0.z +mul.f r6.z, r5.z, r0.w mov.f32f32 r3.z, r3.z -mul.f r2.y, r2.y, r3.x -mul.f r0.y, r0.y, r3.x -mul.f r2.z, r2.z, r3.x -mov.f32f32 r3.x, (0.000000) -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -cmps.f.ne p0.x, r2.w, r3.x -mul.f r2.w, r3.y, r2.y -mul.f r3.x, r3.y, r2.y -mad.f32 r2.w, r4.x, r2.z, r2.w -mad.f32 r3.x, r4.x, r2.z, r3.x -max.f r4.y, c11.z, r0.y -rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w mov.f32f32 r2.w, r2.w +mul.f r5.z, r0.z, r5.z mov.f32f32 r3.x, r3.x -mad.f32 r2.w, r3.z, r0.y, r2.w -mad.f32 r3.x, r3.z, r0.y, r3.x -mov.f32f32 r4.y, r4.y -kill p0.x -mul.f r3.y, r2.w, r3.y -mul.f r4.x, r2.w, r4.x -mul.f r2.w, r2.w, r3.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r4.x -mov.f32f32 r2.w, r2.w -max.f r3.x, r3.x, c11.x -mul.f r3.y, c10.z, r3.y -mul.f r3.z, c10.z, r3.z +rcp r5.w, r5.w +(ss)mov.f32f32 r6.w, r5.w +mad.f32 r6.z, r2.w, r3.z, r6.z +mul.f r5.z, c10.z, r5.z +mad.f32 r4.z, r3.x, r4.z, r6.z +mul.f r2.w, r0.z, r2.w +mul.f r0.z, r0.z, r3.x +add.f r0.w, r0.w, (neg)r5.z +mul.f r1.z, r1.z, r4.x +max.f r3.x, r4.z, c11.x mul.f r2.w, c10.z, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -mul.f r4.x, r5.z, c6.z -add.f r2.y, r2.y, (neg)r3.y -add.f r2.z, r2.z, (neg)r3.z -add.f r0.y, r0.y, (neg)r2.w -mul.f r2.w, r4.x, r3.x -mov.f32f32 r2.y, r2.y -mul.f r1.x, r1.x, r1.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.x, r1.x -mul.f r3.y, r5.y, c6.y -mul.f r3.z, r5.x, c6.x -rcp r4.x, r4.y -(ss)mov.f32f32 r4.x, r4.x -mul.f r1.x, r2.y, r1.x -mul.f r1.z, r1.z, r1.w -mul.f r2.y, r3.y, r3.x -mul.f r3.x, r3.z, r3.x -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r0.z -mad.f32 r0.z, r2.z, r1.z, r1.x -mov.f32f32 r1.x, r1.y -mul.f r0.x, r0.x, r1.w -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r1.x -mov.f32f32 r0.x, r0.x -mul.f r1.x, r5.w, c9.x -mul.f r1.y, r5.z, c5.z -mul.f r1.z, r5.y, c5.y -mad.f32 r0.x, r0.y, r0.x, r0.z -mov.f32f32 r0.y, r1.x -nop -sam (f32)(xyz)r5.y, r3.y, s#3, t#3 -(sy)mul.f r0.z, r5.w, r4.x -mov.f32f32 r0.x, r0.x -mul.f r1.x, r5.z, r4.x -mul.f r1.w, r5.y, r4.x -nop +mul.f r0.z, c10.z, r0.z +mul.f r0.w, r0.w, r1.z +mov.f32f32 r1.z, r3.x +add.f r2.w, r3.z, (neg)r2.w +mul.f r3.y, r3.y, r4.x +mul.f r3.x, r5.y, r3.x +add.f r0.y, r0.y, (neg)r0.z +sam (f32)(xyz)r7.x, r2.y, s#3, t#3 +mul.f r0.z, r4.y, r1.z +mad.f32 r0.w, r2.w, r3.y, r0.w +mul.f r0.x, r0.x, r3.w +mul.f r1.z, r5.x, r1.z +(sy)(ss)mul.f r2.y, r7.z, r6.w +mul.f r2.z, r7.y, r6.w +mad.f32 r0.x, r0.y, r0.x, r0.w +mul.f r0.y, r6.y, c5.y +mul.f r0.w, r6.x, c5.x +mul.f r2.w, r7.x, r5.w max.f r0.x, (neg)r0.x, c11.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.z, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r1.y -mov.f32f32 r1.y, r1.z -mul.f r1.z, r5.x, c5.x -(ss)mov.f32f32 r3.y, r0.w -mov.f32f32 r0.w, r3.w -log2 r0.x, r0.x -(rpt2)nop -mov.f32f32 r3.z, r0.w -mov.f32f32 r0.w, r1.z -(rpt4)nop -sam (f32)(xyzw)r3.y, r3.y, s#2, t#2 -(sy)mad.f32 r1.z, c8.x, r4.x, c11.y -mul.f r3.w, r3.w, c7.z -(ss)mul.f r3.z, r3.z, c7.y +sam (f32)(xyzw)r3.y, r1.x, s#2, t#2 +(sy)(ss)mul.f r1.x, r3.w, c7.z +mul.f r1.y, r3.z, c7.y +mad.f32 r3.z, c8.x, r4.x, c11.y mul.f r3.y, r3.y, c7.x -mov.f32f32 r1.z, r1.z -(rpt2)nop -(ss)mul.f r0.x, r1.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(rpt1)nop +log2 r0.x, r0.x +(ss)mul.f r0.x, r3.z, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r1.z, r3.w, r0.x, r2.w -mad.f32 r2.y, r3.z, r0.x, r2.y +(ss)mad.f32 r0.z, r1.x, r0.x, r0.z +mad.f32 r1.x, r1.y, r0.x, r1.z (ss)mad.f32 r0.x, r3.y, r0.x, r3.x nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x -nop -mad.f32 r0.y, r1.z, r0.z, r0.y -mad.f32 r0.z, r2.y, r1.x, r1.y -mad.f32 r0.x, r0.x, r2.z, r0.w -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mad.f32 r1.z, r0.z, r2.y, r4.w +mad.f32 r1.y, r1.x, r2.z, r0.y +mad.f32 r1.x, r0.x, r2.w, r0.w end ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) -; FRAG: 226 instructions, 0 half, 6 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) +; FRAG: 146 instructions, 0 half, 8 full diff --git a/reference/xon8.asm b/reference/xon8.asm index 9a1b31d..26f774e 100644 --- a/reference/xon8.asm +++ b/reference/xon8.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @in(r0.w) in7 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r3.w) in12 -@in(r4.x) in13 -@in(r4.y) in14 -@in(r4.z) in16 -@in(r4.w) in17 -@in(r5.x) in18 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r4.x) in12 +@in(r4.y) in13 +@in(r4.z) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @in(r1.z) in20 @in(r1.w) in21 @out(r0.x) out0 @@ -43,99 +43,53 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)add.f r1.x, c4.x, (neg)r2.w +@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r1.x, c4.x, (neg)r6.x mul.f r1.y, c5.y, r0.x mul.f r0.x, c5.x, r0.x +mul.f r2.w, c0.w, r6.x +mul.f r2.x, r1.x, r5.x +add.f r2.y, c4.y, (neg)r6.y +mul.f r2.z, r1.x, r4.x +mul.f r1.x, r1.x, r3.x mad.f32 r1.y, c6.y, r0.y, r1.y -mul.f r2.x, r1.x, r4.z -add.f r5.y, c4.y, (neg)r3.x -mad.f32 r1.y, c7.y, r0.z, r1.y +mad.f32 r2.x, r2.y, r5.y, r2.x +add.f r3.w, c4.z, (neg)r6.z +mad.f32 r4.w, r2.y, r4.y, r2.z +mad.f32 r1.x, r2.y, r3.y, r1.x +nop +mad.f32 r2.z, r3.w, r5.z, r2.x +mad.f32 r2.y, r3.w, r4.z, r4.w +mad.f32 r2.x, r3.w, r3.z, r1.x +mad.f32 r1.x, c7.y, r0.z, r1.y mad.f32 r0.x, c6.x, r0.y, r0.x -mul.f r0.y, r1.x, r3.w -mad.f32 r2.x, r5.y, r4.w, r2.x -mad.f32 r1.y, c8.y, r0.w, r1.y +mad.f32 r1.y, c8.y, r0.w, r1.x mad.f32 r0.x, c7.x, r0.z, r0.x -mad.f32 r0.y, r5.y, r4.x, r0.y -mov.f32f32 r0.z, r2.x -add.f r2.x, c4.z, (neg)r3.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, c8.x, r0.w, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, r2.x, r5.x, r0.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, r2.x, r4.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.z, r0.z -mul.f r0.y, r1.x, r6.x -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, r5.y, r6.y, r0.y -mul.f r0.y, c0.w, r2.w -mul.f r0.z, c0.z, r2.w -mul.f r0.w, c0.y, r2.w -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r3.x, r0.y -mad.f32 r0.x, r2.x, r6.z, r0.x -mad.f32 r0.y, c2.w, r3.y, r0.y -mad.f32 r0.z, c1.z, r3.x, r0.z -mad.f32 r0.w, c1.y, r3.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c3.w, r3.z, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c2.y, r3.y, r0.w -mul.f r2.w, c0.x, r2.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c3.y, r3.z, r0.z -mad.f32 r2.w, c1.x, r3.x, r2.w -mov.f32f32 r3.x, c9.x -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c2.x, r3.y, r2.w -mov.f32f32 r5.w, r3.x -mad.f32 r0.x, c3.x, r3.z, r0.x -mov.f32f32 r2.w, r5.x -mov.f32f32 r3.x, r4.w -mov.f32f32 r3.y, r4.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r5.y, r3.x -mov.f32f32 r5.x, r3.y -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r4.y -mov.f32f32 r3.y, r4.x -mov.f32f32 r3.z, r3.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r3.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r4.x, r3.z -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r6.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r3.w, r2.w -mov.f32f32 r3.z, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r6.x +mad.f32 r0.y, c1.w, r6.y, r2.w +mad.f32 r1.x, c8.x, r0.w, r0.x +mad.f32 r0.x, c2.w, r6.z, r0.y +mul.f r0.y, c0.z, r6.x +mad.f32 r0.w, c3.w, r6.w, r0.x +mad.f32 r0.x, c1.z, r6.y, r0.y +mul.f r0.y, c0.y, r6.x +mad.f32 r0.x, c2.z, r6.z, r0.x +mad.f32 r0.y, c1.y, r6.y, r0.y +mad.f32 r0.z, c3.z, r6.w, r0.x +mad.f32 r0.x, c2.y, r6.z, r0.y +mul.f r2.w, c0.x, r6.x +mad.f32 r0.y, c3.y, r6.w, r0.x +mad.f32 r0.x, c1.x, r6.y, r2.w +mov.f32f32 r5.w, c9.x +mad.f32 r0.x, c2.x, r6.z, r0.x +mov.f32f32 r4.w, c9.x +mad.f32 r0.x, c3.x, r6.w, r0.x +mov.f32f32 r3.w, c9.x mov.f32f32 r2.w, c9.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -nop -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) -; VERT: 89 instructions, 0 half, 7 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) +; VERT: 42 instructions, 0 half, 7 full diff --git a/reference/xon9.asm b/reference/xon9.asm index 47cf0de..6189f11 100644 --- a/reference/xon9.asm +++ b/reference/xon9.asm @@ -6,91 +6,57 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c9.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x -mov.f32f32 r1.x, c9.y -bary.f r1.y, 1, r0.x +bary.f r0.w, 1, r0.x +bary.f r1.x, 4, r0.x +mov.f32f32 r1.y, (0.000000) mov.f32f32 r1.z, r0.z -mul.f r0.w, r0.w, r0.w -bary.f r1.w, 5, r0.x -mov.f32f32 r2.y, r0.z -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mad.f32 r0.w, r1.w, r1.w, r0.w -mov.f32f32 r3.x, r2.y -mov.f32f32 r1.w, r1.y -mov.f32f32 r2.w, r1.z -mov.f32f32 r0.w, r0.w +mov.f32f32 r1.w, r0.w +mul.f r1.x, r1.x, r1.x +bary.f r2.y, 5, r0.x +sam (f32)(xyzw)r2.z, r0.z, s#0, t#0 +(sy)cmps.f.lt r3.z, r3.y, c9.x +mov.f32f32 r3.w, r0.z +mov.f32f32 r4.x, r0.w +(ss)mad.f32 r0.z, r2.y, r2.y, r1.x +sam (f32)(xyz)r4.y, r1.z, s#1, t#1 +(sy)mul.f r0.w, r4.w, c3.z +mul.f r1.x, r4.z, c3.y +(ss)mul.f r1.z, r4.y, c3.x +cov.u32f32 r1.w, r3.z bary.f (ei)r0.x, 6, r0.x -mov.f32f32 r3.y, r1.w -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r1.x -mad.f32 r0.x, r0.x, r0.x, r0.w -sam (f32)(xyz)r3.z, r2.z, s#1, t#1 -(sy)mul.f r0.w, r4.x, c3.z -mul.f r1.x, r3.w, c3.y -mul.f r1.z, r3.z, c3.x -(ss)nop -sam (f32)(xyzw)r2.y, r3.x, s#0, t#0 -(sy)cmps.f.lt r1.w, r3.x, c9.x -mov.f32f32 r0.w, r0.w -(ss)mov.f32f32 r3.y, r0.y -mov.f32f32 r0.y, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.z -sqrt r0.x, r0.x -(ss)mov.f32f32 r0.x, r0.x -mov.f32f32 r3.z, r0.y -cov.u32f32 r0.y, r1.w -mov.f32f32 r1.z, (0.000000) -mul.f r1.w, r3.x, c7.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -cmps.f.ne p0.x, r0.y, r1.z -sam (f32)(xyz)r3.x, r3.y, s#2, t#2 -(sy)mad.f32 r0.y, c4.z, r3.z, r0.w -mad.f32 r0.w, c4.y, r3.y, r1.x -mad.f32 r1.x, c4.x, r3.x, r1.y -mov.f32f32 r1.y, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.w, r1.x -mov.f32f32 r1.z, r0.z -add.f r0.x, r2.w, r0.x -add.f r0.y, r2.z, r0.y -add.f r0.z, r2.y, r0.w +sam (f32)(xyz)r3.z, r3.w, s#2, t#2 +(sy)mad.f32 r0.y, c4.z, r4.x, r0.w +mad.f32 r0.w, c4.y, r3.w, r1.x +mad.f32 r1.x, c4.x, r3.z, r1.z +cmps.f.ne p0.x, r1.w, r1.y +add.f r0.y, r3.x, r0.y +add.f r0.w, r2.w, r0.w +add.f r1.x, r2.z, r1.x +mad.f32 r0.x, r0.x, r0.x, r0.z +mul.f r0.y, r0.y, c6.z +mul.f r0.z, r0.w, c6.y +mul.f r0.w, r1.x, c6.x kill p0.x -mul.f r0.x, r0.x, c6.z -mul.f r0.y, r0.y, c6.y -mul.f r0.z, r0.z, c6.x -sam (f32)(x)r0.w, r1.y, s#3, t#3 -mov.f32f32 r1.x, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.w, r1.x -mul.f r0.x, r0.x, c8.z -mul.f r0.y, r0.y, c8.y -mul.f r0.z, r0.z, c8.x -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -nop -(sy)mul.f r0.x, r0.x, r0.w -mul.f r0.y, r0.y, r0.w -mul.f r0.z, r0.z, r0.w -nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -nop -(ss)mov.f32f32 r1.z, r0.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.z +mul.f r0.y, r0.y, c8.z +mul.f r0.z, r0.z, c8.y +sqrt r1.x, r0.x +mov.f32f32 r1.y, c9.y +(ss)mul.f r0.x, r0.w, c8.x +(rpt1)nop +mul.f r1.w, r3.y, c7.x +(rpt1)nop +(ss)nop +sam (f32)(x)r2.y, r1.x, s#3, t#3 +(sy)mul.f r1.z, r0.y, r2.y +(ss)mul.f r1.y, r0.z, r2.y +mul.f r1.x, r0.x, r2.y end nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 83 instructions, 0 half, 5 full +; FRAG: 48 instructions, 0 half, 5 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-01.asm b/reference/xonotic-gl2/xonotic-glx-gl2-01.asm index 2c03e4f..284e180 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-01.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-01.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-02.asm b/reference/xonotic-gl2/xonotic-glx-gl2-02.asm index 71537bc..45a3f44 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-02.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-02.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 5, r0.x bary.f r1.x, 3, r0.x bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -bary.f r1.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -(rpt5)nop -sam (f32)(xyzw)r1.w, r0.y, s#0, t#0 -(sy)(ss)mul.f r0.y, r1.x, r2.z -mul.f r0.z, r1.y, r2.y -mul.f r0.w, r1.z, r2.x -mul.f r0.x, r0.x, r1.w -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam (f32)(xyzw)r2.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.x, r3.x +mul.f r1.z, r1.y, r2.w +mul.f r1.y, r2.x, r2.z +mul.f r1.x, r0.x, r2.y end nop nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 26 instructions, 0 half, 3 full +; FRAG: 14 instructions, 0 half, 4 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-04.asm b/reference/xonotic-gl2/xonotic-glx-gl2-04.asm index 71537bc..45a3f44 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-04.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-04.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 5, r0.x bary.f r1.x, 3, r0.x bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -bary.f r1.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -(rpt5)nop -sam (f32)(xyzw)r1.w, r0.y, s#0, t#0 -(sy)(ss)mul.f r0.y, r1.x, r2.z -mul.f r0.z, r1.y, r2.y -mul.f r0.w, r1.z, r2.x -mul.f r0.x, r0.x, r1.w -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam (f32)(xyzw)r2.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.x, r3.x +mul.f r1.z, r1.y, r2.w +mul.f r1.y, r2.x, r2.z +mul.f r1.x, r0.x, r2.y end nop nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 26 instructions, 0 half, 3 full +; FRAG: 14 instructions, 0 half, 4 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-06.asm b/reference/xonotic-gl2/xonotic-glx-gl2-06.asm index 71537bc..45a3f44 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-06.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-06.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 5, r0.x bary.f r1.x, 3, r0.x bary.f r1.y, 2, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -bary.f r1.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -(rpt5)nop -sam (f32)(xyzw)r1.w, r0.y, s#0, t#0 -(sy)(ss)mul.f r0.y, r1.x, r2.z -mul.f r0.z, r1.y, r2.y -mul.f r0.w, r1.z, r2.x -mul.f r0.x, r0.x, r1.w -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam (f32)(xyzw)r2.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.x, r3.x +mul.f r1.z, r1.y, r2.w +mul.f r1.y, r2.x, r2.z +mul.f r1.x, r0.x, r2.y end nop nop nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 26 instructions, 0 half, 3 full +; FRAG: 14 instructions, 0 half, 4 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-10.asm b/reference/xonotic-gl2/xonotic-glx-gl2-10.asm index ea7d51f..d26b519 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-10.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-10.asm @@ -4,6 +4,7 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r1.w, c0.x mov.f32f32 r1.z, c0.x mov.f32f32 r1.y, c0.x diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-12.asm b/reference/xonotic-gl2/xonotic-glx-gl2-12.asm index b4aedcd..1867490 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-12.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-12.asm @@ -6,211 +6,146 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c10.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000 +@const(c11.x) 0x3f800000, 0x3e800000, 0x00000000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 4, r0.x -bary.f r1.y, 3, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w -mul.f r2.x, r1.x, r1.x -bary.f r2.y, 5, r0.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mov.f32f32 r3.x, r1.w -bary.f r1.w, 1, r0.x -mad.f32 r2.x, r2.y, r2.y, r2.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r0.w -mov.f32f32 r3.y, r1.w -mov.f32f32 r2.x, r2.x -bary.f r3.z, 6, r0.x -mov.f32f32 r3.w, r1.z -mov.f32f32 r3.y, r3.y -sam (f32)(xyz)r4.x, r2.z, s#4, t#4 -(sy)mad.f32 r1.z, c10.y, r4.x, c10.z -mad.f32 r2.x, r3.z, r3.z, r2.x -(ss)mov.f32f32 r2.z, r1.w -mad.f32 r2.w, c10.y, r4.y, c10.z -mov.f32f32 r1.z, r1.z +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x +bary.f r1.z, 4, r0.x +mov.f32f32 r1.w, r0.z +mov.f32f32 r2.x, r0.w +mov.f32f32 r2.y, r1.x +bary.f r1.y, 1, r0.x +sam (f32)(xyz)r2.z, r0.z, s#4, t#4 +(sy)(ss)mad.f32 r0.z, c10.y, r2.z, c10.z +mov.f32f32 r3.y, r1.x +bary.f r0.w, 16, r0.x +mul.f r2.z, r1.z, r1.z +mov.f32f32 r3.w, r0.z bary.f r4.x, 12, r0.x -sam (f32)(xyz)r4.w, r3.x, s#0, t#0 -(sy)(ss)add.f r3.x, r4.w, c10.x -bary.f r3.y, 16, r0.x +mov.f32f32 r3.z, r1.y bary.f r4.y, 8, r0.x -mul.f r4.x, r1.z, r4.x -mov.f32f32 r2.w, r2.w -bary.f r4.w, 13, r0.x -mov.f32f32 r3.x, r3.x -mul.f r3.y, r1.z, r3.y -mul.f r1.z, r1.z, r4.y -mad.f32 r4.x, r2.w, r4.w, r4.x -mul.f r4.y, r3.x, r3.x -add.f r4.w, r5.x, c10.x -bary.f r5.x, 17, r0.x -mov.f32f32 r4.x, r4.x -mad.f32 r4.z, c10.y, r4.z, c10.z -mov.f32f32 r4.w, r4.w -mad.f32 r3.y, r2.w, r5.x, r3.y +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r3.w, r4.x +mad.f32 r2.w, c10.y, r2.w, c10.z +mul.f r3.w, r3.w, r4.y +bary.f r4.x, 17, r0.x +sam (f32)(xyz)r4.y, r3.y, s#0, t#0 +(sy)(ss)add.f r3.y, r4.y, c10.x +mov.f32f32 r3.z, r2.w +bary.f r4.y, 13, r0.x bary.f r5.x, 9, r0.x -mov.f32f32 r4.z, r4.z -bary.f r5.z, 14, r0.x -mad.f32 r4.y, r4.w, r4.w, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r1.z, r2.w, r5.x, r1.z -mad.f32 r2.w, r4.z, r5.z, r4.x -mov.f32f32 r4.x, r4.y -add.f r4.y, r5.y, c10.x -bary.f r5.x, 18, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.z, r1.z -bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r4.y -mad.f32 r3.y, r4.z, r5.x, r3.y -rsq r2.x, r2.x -(ss)mov.f32f32 r2.x, r2.x -mad.f32 r0.x, r4.z, r0.x, r1.z -mad.f32 r1.z, r0.y, r0.y, r4.x -mov.f32f32 r3.y, r3.y -mul.f r1.x, r1.x, r2.x -mul.f r4.x, r0.x, r0.x -mul.f r2.y, r2.y, r2.x -mad.f32 r4.x, r2.w, r2.w, r4.x -mov.f32f32 r3.y, r3.y -rsq r1.z, r1.z -mov.f32f32 r1.x, r1.x -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r4.x, r4.x -mov.f32f32 r2.y, r2.y -mad.f32 r4.x, r3.y, r3.y, r4.x -mul.f r3.x, r3.x, r1.z -mul.f r4.y, r4.w, r1.z -mul.f r0.y, r0.y, r1.z -(rpt2)nop -rsq r1.z, r4.x -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r0.y, r0.y -mul.f r0.x, r0.x, r1.z -mul.f r3.y, r3.y, r1.z -mul.f r1.z, r2.w, r1.z -mul.f r2.x, r3.z, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r3.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -mul.f r3.y, r3.x, r0.x -mul.f r3.z, r3.x, r0.x -mad.f32 r3.y, r4.y, r1.z, r3.y -mad.f32 r3.z, r4.y, r1.z, r3.z -max.f r4.z, c11.y, r2.w -(ss)mov.f32f32 r4.x, r2.z -mov.f32f32 r2.z, r3.y -mov.f32f32 r3.y, r3.z -mad.f32 r2.z, r0.y, r2.w, r2.z -mad.f32 r3.y, r0.y, r2.w, r3.y +mad.f32 r0.z, r2.w, r4.x, r0.z +bary.f r2.w, 5, r0.x +mad.f32 r0.w, r3.z, r4.y, r0.w +mad.f32 r3.x, c10.y, r3.x, c10.z +mov.f32f32 r4.x, r3.y +mad.f32 r3.z, r3.z, r5.x, r3.w +bary.f r3.w, 18, r0.x +mov.f32f32 r4.y, r3.x +bary.f r5.x, 14, r0.x +mul.f r3.y, r3.y, r4.x +add.f r4.z, r4.z, c10.x +bary.f r5.y, 10, r0.x +mad.f32 r0.w, r4.y, r5.x, r0.w +mad.f32 r0.z, r3.x, r3.w, r0.z +mad.f32 r2.z, r2.w, r2.w, r2.z +bary.f (ei)r0.x, 6, r0.x +mov.f32f32 r0.y, r0.w +mad.f32 r3.x, r4.y, r5.y, r3.z mov.f32f32 r3.z, r4.z -mov.f32f32 r0.w, r0.w +mov.f32f32 r3.w, r0.z +mad.f32 r4.y, r0.x, r0.x, r2.z +mul.f r2.z, r3.x, r3.x +mad.f32 r3.y, r4.z, r3.z, r3.y +mad.f32 r0.w, r0.w, r0.y, r2.z +mov.f32f32 r3.w, r3.w +add.f r4.z, r4.w, c10.x +mov.f32f32 r2.z, r1.y +rsq r4.y, r4.y +(ss)mov.f32f32 r4.w, r4.y +mad.f32 r0.w, r3.w, r3.w, r0.w +mov.f32f32 r3.w, r4.z +mul.f r0.x, r0.x, r4.y +sam (f32)(xyz)r5.x, r1.w, s#3, t#3 +mul.f r1.z, r1.z, r4.w +(ss)mul.f r2.x, r2.w, r4.w +sam (f32)(xyzw)r5.w, r2.y, s#1, t#1 +(ss)mad.f32 r2.y, r3.w, r3.w, r3.y +(sy)mul.f r1.w, r6.z, c9.x +rsq r0.w, r0.w +(ss)mov.f32f32 r2.z, r0.w +mul.f r0.z, r0.z, r0.w +(ss)mul.f r0.w, r6.y, c5.z +mul.f r2.w, r6.y, c6.z +mul.f r3.x, r3.x, r2.z +rsq r2.y, r2.y +(ss)mov.f32f32 r3.y, r2.y +mov.f32f32 r3.w, r0.z +mul.f r0.y, r0.y, r2.z +(ss)mul.f r2.y, r4.z, r2.y +mul.f r2.z, r4.x, r3.y +mov.f32f32 r4.x, r3.x +max.f r4.y, c11.y, r3.w +mov.f32f32 r4.z, r0.y mul.f r3.x, r2.z, r3.x -mul.f r4.y, r2.z, r4.y -mul.f r0.y, r2.z, r0.y -mov.f32f32 r2.z, r3.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r4.y -mov.f32f32 r0.y, r0.y -max.f r2.z, r2.z, c10.w -mul.f r3.x, c10.y, r3.x -mul.f r3.y, c10.y, r3.y -mul.f r0.y, c10.y, r0.y +mul.f r3.y, r3.z, r3.y mov.f32f32 r2.z, r2.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r0.y, r0.y -sam (f32)(xyzw)r3.w, r3.w, s#1, t#1 -(sy)mul.f r4.w, r4.y, c6.z -add.f r0.x, r0.x, (neg)r3.x -add.f r1.z, r1.z, (neg)r3.y -add.f r0.y, r2.w, (neg)r0.y -mul.f r2.w, r4.w, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -mul.f r0.x, r0.x, r1.x -mul.f r1.x, r4.x, c6.y -mad.f32 r0.x, r1.z, r2.y, r0.x -mul.f r1.z, r3.w, c6.x -rcp r2.y, r3.z -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mul.f r1.x, r1.x, r2.z -mad.f32 r0.x, r0.y, r2.x, r0.x -mul.f r0.y, r1.z, r2.z -(rpt1)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r3.x, r0.z +mov.f32f32 r3.z, r2.y +mul.f r4.w, r6.x, c5.y +mad.f32 r0.y, r3.y, r0.y, r3.x +mul.f r3.x, r2.z, r4.x +mad.f32 r0.y, r2.y, r3.w, r0.y +mov.f32f32 r2.y, r3.y +rcp r3.y, r4.y +(ss)mov.f32f32 r4.y, r3.y +mul.f r3.y, r5.x, r3.y +mul.f r2.z, r0.y, r2.z +mad.f32 r3.x, r2.y, r4.z, r3.x +mul.f r2.y, r0.y, r2.y +mul.f r0.y, r0.y, r3.z +mul.f r2.z, c10.y, r2.z +mad.f32 r3.x, r3.z, r3.w, r3.x +mul.f r2.y, c10.y, r2.y +mul.f r0.y, c10.y, r0.y +add.f r2.z, r4.x, (neg)r2.z +max.f r3.x, r3.x, c10.w +add.f r2.y, r4.z, (neg)r2.y +mul.f r3.z, r5.w, c6.x +mul.f r1.z, r2.z, r1.z +mov.f32f32 r2.z, r3.x +mad.f32 r1.z, r2.y, r2.x, r1.z +add.f r0.y, r0.z, (neg)r0.y +mul.f r0.z, r6.x, c6.y +mul.f r2.x, r3.z, r3.x +mul.f r2.y, r5.z, r4.y +mad.f32 r0.x, r0.y, r0.x, r1.z +mul.f r0.y, r2.w, r2.z +mul.f r0.z, r0.z, r2.z +mul.f r2.z, r5.y, r4.y max.f r0.x, (neg)r0.x, c10.w -mov.f32f32 r0.z, r1.y -mul.f r1.y, r4.z, c9.x -mul.f r1.z, r4.y, c5.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r3.y, r0.z -mov.f32f32 r0.z, r1.y -mov.f32f32 r1.y, r1.z -mul.f r1.z, r4.x, c5.y -mul.f r2.x, r3.w, c5.x -(ss)mov.f32f32 r3.z, r0.w -log2 r0.x, r0.x -mov.f32f32 r0.w, r1.w -sam (f32)(xyz)r3.w, r3.x, s#3, t#3 -(sy)mul.f r1.w, r3.w, r2.y -mul.f r2.z, r4.y, r2.y -mul.f r2.y, r4.x, r2.y -mov.f32f32 r3.w, r0.w -mov.f32f32 r0.w, r1.w -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r0.z -(ss)nop -sam (f32)(xyzw)r3.x, r3.z, s#2, t#2 -(sy)mad.f32 r0.z, c8.x, r3.w, c11.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -(ss)mul.f r3.z, r3.z, c7.z -mov.f32f32 r0.z, r0.z +mul.f r2.w, r5.w, c5.x +sam (f32)(xyzw)r3.z, r1.x, s#2, t#2 +(sy)(ss)mul.f r1.x, r4.x, c7.z +mad.f32 r1.y, c8.x, r4.y, c11.x +mul.f r1.z, r3.z, c7.x nop -mul.f r3.y, r3.y, c7.y -mul.f r3.x, r3.x, c7.x -(ss)mul.f r0.x, r0.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r3.x, r3.w, c7.y +log2 r0.x, r0.x +(ss)mul.f r0.x, r1.y, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r0.z, r3.z, r0.x, r2.w -mad.f32 r1.x, r3.y, r0.x, r1.x -(ss)mad.f32 r0.x, r3.x, r0.x, r0.y -nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.x, r0.x +(ss)mad.f32 r0.y, r1.x, r0.x, r0.y +mad.f32 r0.z, r3.x, r0.x, r0.z +(ss)mad.f32 r0.x, r1.z, r0.x, r2.x nop -mad.f32 r0.y, r0.y, r2.z, r1.y -mad.f32 r0.z, r0.z, r2.y, r1.z -mad.f32 r0.x, r0.x, r0.w, r2.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mad.f32 r1.z, r0.y, r2.y, r0.w +mad.f32 r1.y, r0.z, r2.z, r4.w +mad.f32 r1.x, r0.x, r3.y, r2.w end nop -nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) -; FRAG: 211 instructions, 0 half, 6 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) +; FRAG: 140 instructions, 0 half, 7 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-13.asm b/reference/xonotic-gl2/xonotic-glx-gl2-13.asm index 9a1b31d..26f774e 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-13.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-13.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @in(r0.w) in7 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r3.w) in12 -@in(r4.x) in13 -@in(r4.y) in14 -@in(r4.z) in16 -@in(r4.w) in17 -@in(r5.x) in18 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r4.x) in12 +@in(r4.y) in13 +@in(r4.z) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @in(r1.z) in20 @in(r1.w) in21 @out(r0.x) out0 @@ -43,99 +43,53 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)add.f r1.x, c4.x, (neg)r2.w +@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r1.x, c4.x, (neg)r6.x mul.f r1.y, c5.y, r0.x mul.f r0.x, c5.x, r0.x +mul.f r2.w, c0.w, r6.x +mul.f r2.x, r1.x, r5.x +add.f r2.y, c4.y, (neg)r6.y +mul.f r2.z, r1.x, r4.x +mul.f r1.x, r1.x, r3.x mad.f32 r1.y, c6.y, r0.y, r1.y -mul.f r2.x, r1.x, r4.z -add.f r5.y, c4.y, (neg)r3.x -mad.f32 r1.y, c7.y, r0.z, r1.y +mad.f32 r2.x, r2.y, r5.y, r2.x +add.f r3.w, c4.z, (neg)r6.z +mad.f32 r4.w, r2.y, r4.y, r2.z +mad.f32 r1.x, r2.y, r3.y, r1.x +nop +mad.f32 r2.z, r3.w, r5.z, r2.x +mad.f32 r2.y, r3.w, r4.z, r4.w +mad.f32 r2.x, r3.w, r3.z, r1.x +mad.f32 r1.x, c7.y, r0.z, r1.y mad.f32 r0.x, c6.x, r0.y, r0.x -mul.f r0.y, r1.x, r3.w -mad.f32 r2.x, r5.y, r4.w, r2.x -mad.f32 r1.y, c8.y, r0.w, r1.y +mad.f32 r1.y, c8.y, r0.w, r1.x mad.f32 r0.x, c7.x, r0.z, r0.x -mad.f32 r0.y, r5.y, r4.x, r0.y -mov.f32f32 r0.z, r2.x -add.f r2.x, c4.z, (neg)r3.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, c8.x, r0.w, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, r2.x, r5.x, r0.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, r2.x, r4.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.z, r0.z -mul.f r0.y, r1.x, r6.x -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, r5.y, r6.y, r0.y -mul.f r0.y, c0.w, r2.w -mul.f r0.z, c0.z, r2.w -mul.f r0.w, c0.y, r2.w -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r3.x, r0.y -mad.f32 r0.x, r2.x, r6.z, r0.x -mad.f32 r0.y, c2.w, r3.y, r0.y -mad.f32 r0.z, c1.z, r3.x, r0.z -mad.f32 r0.w, c1.y, r3.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c3.w, r3.z, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c2.y, r3.y, r0.w -mul.f r2.w, c0.x, r2.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c3.y, r3.z, r0.z -mad.f32 r2.w, c1.x, r3.x, r2.w -mov.f32f32 r3.x, c9.x -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c2.x, r3.y, r2.w -mov.f32f32 r5.w, r3.x -mad.f32 r0.x, c3.x, r3.z, r0.x -mov.f32f32 r2.w, r5.x -mov.f32f32 r3.x, r4.w -mov.f32f32 r3.y, r4.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r5.y, r3.x -mov.f32f32 r5.x, r3.y -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r4.y -mov.f32f32 r3.y, r4.x -mov.f32f32 r3.z, r3.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r3.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r4.x, r3.z -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r6.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r3.w, r2.w -mov.f32f32 r3.z, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r6.x +mad.f32 r0.y, c1.w, r6.y, r2.w +mad.f32 r1.x, c8.x, r0.w, r0.x +mad.f32 r0.x, c2.w, r6.z, r0.y +mul.f r0.y, c0.z, r6.x +mad.f32 r0.w, c3.w, r6.w, r0.x +mad.f32 r0.x, c1.z, r6.y, r0.y +mul.f r0.y, c0.y, r6.x +mad.f32 r0.x, c2.z, r6.z, r0.x +mad.f32 r0.y, c1.y, r6.y, r0.y +mad.f32 r0.z, c3.z, r6.w, r0.x +mad.f32 r0.x, c2.y, r6.z, r0.y +mul.f r2.w, c0.x, r6.x +mad.f32 r0.y, c3.y, r6.w, r0.x +mad.f32 r0.x, c1.x, r6.y, r2.w +mov.f32f32 r5.w, c9.x +mad.f32 r0.x, c2.x, r6.z, r0.x +mov.f32f32 r4.w, c9.x +mad.f32 r0.x, c3.x, r6.w, r0.x +mov.f32f32 r3.w, c9.x mov.f32f32 r2.w, c9.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -nop -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) -; VERT: 89 instructions, 0 half, 7 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) +; VERT: 42 instructions, 0 half, 7 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-14.asm b/reference/xonotic-gl2/xonotic-glx-gl2-14.asm index 0c5831e..fc88e42 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-14.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-14.asm @@ -6,219 +6,158 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c12.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000 +@const(c13.x) 0x3f800000, 0x3e800000, 0x00000000, 0x00000000 +@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 4, r0.x -bary.f r1.y, 3, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w -mul.f r2.x, r1.x, r1.x -bary.f r2.y, 5, r0.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mov.f32f32 r3.x, r1.w -bary.f r3.z, 1, r0.x -mad.f32 r1.w, r2.y, r2.y, r2.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r0.w -mov.f32f32 r2.x, r3.z -mov.f32f32 r1.w, r1.w -bary.f r3.w, 6, r0.x -mov.f32f32 r4.x, r1.z -mov.f32f32 r3.y, r2.x -sam (f32)(xyz)r4.y, r2.z, s#5, t#5 -(sy)mad.f32 r1.z, c12.y, r4.y, c12.z -mad.f32 r1.w, r3.w, r3.w, r1.w -mov.f32f32 r2.x, r3.z -(ss)mad.f32 r2.z, c12.y, r4.z, c12.z -mov.f32f32 r1.z, r1.z -bary.f r2.w, 12, r0.x -sam (f32)(xyz)r5.x, r3.x, s#0, t#0 -(sy)(ss)add.f r3.x, r5.x, c12.x -bary.f r3.y, 16, r0.x +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x +bary.f r1.z, 4, r0.x +mov.f32f32 r1.w, r0.z +mov.f32f32 r2.x, r0.w +mov.f32f32 r2.y, r1.x +mov.f32f32 r2.w, r1.x +sam (f32)(xyz)r3.x, r0.z, s#5, t#5 +(sy)(ss)mad.f32 r0.z, c12.y, r3.x, c12.z +mov.f32f32 r3.w, r1.x +bary.f r1.y, 1, r0.x +bary.f r0.w, 16, r0.x +mov.f32f32 r2.z, r0.z +bary.f r3.x, 12, r0.x +mov.f32f32 r4.x, r1.y bary.f r4.y, 8, r0.x -mul.f r2.w, r1.z, r2.w -mov.f32f32 r2.z, r2.z -bary.f r4.z, 13, r0.x -mov.f32f32 r3.x, r3.x -mul.f r3.y, r1.z, r3.y -mul.f r1.z, r1.z, r4.y -mad.f32 r2.w, r2.z, r4.z, r2.w -mul.f r4.y, r3.x, r3.x -add.f r4.z, r5.y, c12.x -bary.f r5.x, 17, r0.x -mov.f32f32 r2.w, r2.w -mad.f32 r4.w, c12.y, r4.w, c12.z -mov.f32f32 r4.z, r4.z -mad.f32 r3.y, r2.z, r5.x, r3.y +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r2.z, r3.x +mad.f32 r3.x, c12.y, r3.y, c12.z +mul.f r2.z, r2.z, r4.y +bary.f r3.y, 17, r0.x +sam (f32)(xyz)r3.w, r3.w, s#0, t#0 +(sy)(ss)add.f r3.w, r3.w, c12.x +mov.f32f32 r4.z, r3.x +bary.f r4.w, 13, r0.x bary.f r5.x, 9, r0.x -mov.f32f32 r4.w, r4.w -bary.f r5.y, 14, r0.x -mad.f32 r4.y, r4.z, r4.z, r4.y -mov.f32f32 r3.y, r3.y -mad.f32 r1.z, r2.z, r5.x, r1.z -mad.f32 r2.z, r4.w, r5.y, r2.w -mov.f32f32 r2.w, r4.y -add.f r4.y, r5.z, c12.x -bary.f r5.x, 18, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.z, r1.z -bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r0.y, r4.y -mad.f32 r3.y, r4.w, r5.x, r3.y -rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mad.f32 r0.x, r4.w, r0.x, r1.z -mad.f32 r1.z, r0.y, r0.y, r2.w -mov.f32f32 r2.w, r3.y -mul.f r1.x, r1.x, r1.w -mul.f r3.y, r0.x, r0.x -mul.f r2.y, r2.y, r1.w -mad.f32 r3.y, r2.z, r2.z, r3.y -mov.f32f32 r2.w, r2.w -rsq r1.z, r1.z -mov.f32f32 r1.x, r1.x -(ss)mov.f32f32 r1.z, r1.z +mad.f32 r0.z, r3.x, r3.y, r0.z +mul.f r3.x, r1.z, r1.z +mad.f32 r0.w, r4.z, r4.w, r0.w +mad.f32 r3.y, c12.y, r3.z, c12.z +mov.f32f32 r3.z, r3.w +mad.f32 r2.z, r4.z, r5.x, r2.z +bary.f r4.z, 18, r0.x +mov.f32f32 r4.w, r3.y +bary.f r5.x, 14, r0.x +mul.f r3.w, r3.w, r3.z +add.f r4.x, r4.x, c12.x +bary.f r5.y, 10, r0.x +mad.f32 r0.w, r4.w, r5.x, r0.w +mad.f32 r0.z, r3.y, r4.z, r0.z +bary.f r3.y, 5, r0.x +mov.f32f32 r4.z, r4.x +mov.f32f32 r5.x, r0.w +mad.f32 r4.w, r4.w, r5.y, r2.z +mov.f32f32 r2.z, r0.z +mad.f32 r3.x, r3.y, r3.y, r3.x +bary.f (ei)r0.x, 6, r0.x +mul.f r0.y, r4.w, r4.w +mad.f32 r3.w, r4.x, r4.z, r3.w +mad.f32 r0.y, r0.w, r5.x, r0.y +mov.f32f32 r0.w, r2.z +add.f r4.x, r4.y, c12.x +mad.f32 r4.y, r0.x, r0.x, r3.x +mov.f32f32 r2.z, r1.y +mad.f32 r0.y, r0.w, r0.w, r0.y +mov.f32f32 r0.w, r4.x +mov.f32f32 r3.x, r1.y +sam (f32)(xyz)r5.y, r1.w, s#4, t#4 +sam (f32)(xyz)r6.x, r1.x, s#3, t#3 +(rpt3)nop +rsq r0.y, r0.y +(ss)mov.f32f32 r1.x, r0.y +mul.f r0.y, r0.z, r0.y +mad.f32 r0.z, r0.w, r0.w, r3.w +rsq r0.w, r4.y +(ss)mov.f32f32 r1.y, r0.w +mul.f r1.w, r4.w, r1.x +mov.f32f32 r2.x, r0.y +mul.f r1.x, r5.x, r1.x +mul.f r1.z, r1.z, r1.y +mul.f r1.y, r3.y, r1.y +rsq r0.z, r0.z +(ss)mov.f32f32 r3.y, r0.z +mov.f32f32 r3.w, r1.w +(ss)max.f r4.y, c13.y, r2.x +mov.f32f32 r4.w, r1.x +mul.f r3.z, r3.z, r3.y +mul.f r3.y, r4.z, r3.y +mul.f r0.z, r4.x, r0.z +nop +mul.f r1.w, r3.z, r1.w +mov.f32f32 r3.z, r3.z +mad.f32 r1.x, r3.y, r1.x, r1.w +rcp r1.w, r4.y +(ss)mov.f32f32 r4.x, r1.w +mad.f32 r1.x, r0.z, r2.x, r1.x +(ss)mul.f r4.y, r3.z, r3.w mov.f32f32 r3.y, r3.y -mov.f32f32 r2.y, r2.y -mad.f32 r3.y, r2.w, r2.w, r3.y -mul.f r3.x, r3.x, r1.z -mul.f r4.y, r4.z, r1.z -mul.f r0.y, r0.y, r1.z -(rpt2)nop -rsq r1.z, r3.y -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r3.x, r3.x -(ss)mov.f32f32 r3.y, r4.y -mov.f32f32 r0.y, r0.y -mul.f r0.x, r0.x, r1.z -mul.f r2.w, r2.w, r1.z -mul.f r1.z, r2.z, r1.z -mul.f r1.w, r3.w, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r2.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mul.f r2.w, r3.x, r0.x -mul.f r3.w, r3.x, r0.x -mad.f32 r2.w, r3.y, r1.z, r2.w -mad.f32 r3.w, r3.y, r1.z, r3.w -max.f r4.z, c13.y, r2.z -mov.f32f32 r4.y, r2.x -mov.f32f32 r2.x, r2.w -mov.f32f32 r2.w, r3.w -mad.f32 r2.x, r0.y, r2.z, r2.x -mad.f32 r2.w, r0.y, r2.z, r2.w -mov.f32f32 r3.w, r4.z -mov.f32f32 r4.z, r0.w -mul.f r3.x, r2.x, r3.x -mul.f r3.y, r2.x, r3.y -mul.f r0.y, r2.x, r0.y -mov.f32f32 r2.x, r2.w -mov.f32f32 r2.w, r3.x -mov.f32f32 r3.x, r3.y -mov.f32f32 r0.y, r0.y -max.f r2.x, r2.x, c12.w -mul.f r2.w, c12.y, r2.w -mul.f r3.x, c12.y, r3.x -mul.f r0.y, c12.y, r0.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.y, r0.y -sam (f32)(xyzw)r4.w, r4.x, s#1, t#1 -(sy)mul.f r3.y, r5.y, c7.z -add.f r0.x, r0.x, (neg)r2.w -add.f r1.z, r1.z, (neg)r3.x -add.f r0.y, r2.z, (neg)r0.y -mul.f r2.z, r3.y, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -mul.f r0.x, r0.x, r1.x -mul.f r1.x, r5.x, c7.y -mad.f32 r0.x, r1.z, r2.y, r0.x -mul.f r1.z, r4.w, c7.x -rcp r2.y, r3.w -(ss)mov.f32f32 r2.y, r2.y mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mul.f r1.x, r1.x, r2.x -mad.f32 r0.x, r0.y, r1.w, r0.x -mul.f r0.y, r1.z, r2.x -(rpt1)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r0.z +mul.f r3.z, r1.x, r3.z +(sy)mul.f r4.z, r5.w, r4.x +mul.f r4.x, r5.z, r4.x +mad.f32 r4.y, r3.y, r4.w, r4.y +mul.f r3.z, c12.y, r3.z +mad.f32 r2.x, r0.z, r2.x, r4.y +mul.f r3.y, r1.x, r3.y +mul.f r0.z, r1.x, r0.z +add.f r1.x, r3.w, (neg)r3.z +max.f r2.x, r2.x, c12.w +sam (f32)(xyzw)r6.w, r2.y, s#1, t#1 +(ss)mul.f r2.y, c12.y, r3.y +(sy)mul.f r2.z, r6.w, c7.x +mul.f r1.x, r1.x, r1.z +mov.f32f32 r1.z, r2.x +add.f r2.y, r4.w, (neg)r2.y +mul.f r2.x, r2.z, r2.x +mul.f r2.z, r7.y, c7.z +mul.f r3.y, r7.x, c7.y +mad.f32 r1.x, r2.y, r1.y, r1.x +mul.f r0.z, c12.y, r0.z +mul.f r1.y, r2.z, r1.z +mul.f r1.z, r3.y, r1.z +mul.f r2.y, r5.y, r1.w +add.f r0.y, r0.y, (neg)r0.z +mul.f r0.x, r0.x, r0.w +mul.f r0.z, r7.y, c6.z +mul.f r1.w, r7.z, c11.x +sam (f32)(xyzw)r2.z, r2.w, s#2, t#2 +(sy)mul.f r0.w, r3.x, c8.z +mad.f32 r0.x, r0.y, r0.x, r1.x +mul.f r0.y, r7.x, c6.y +mul.f r1.x, r6.w, c6.x +(ss)mul.f r2.w, r2.w, c8.y max.f r0.x, (neg)r0.x, c12.w -mov.f32f32 r0.z, r1.y -mul.f r1.y, r5.z, c11.x -mul.f r2.x, r5.y, c6.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r0.z, r1.y -mov.f32f32 r1.y, r2.x -mul.f r2.x, r5.x, c6.y -mul.f r2.w, r4.w, c6.x -mov.f32f32 r3.x, r4.z +(rpt3)nop +mad.f32 r3.x, c9.x, r3.y, c13.x +mul.f r2.z, r2.z, c8.x log2 r0.x, r0.x -mov.f32f32 r3.y, r3.z -(ss)nop -sam (f32)(xyz)r3.w, r1.z, s#4, t#4 -(sy)(ss)mul.f r1.z, r3.w, r2.y -mul.f r1.w, r4.y, r2.y -mul.f r2.y, r4.x, r2.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.z, r0.z -nop -mov.f32f32 r3.w, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r0.z -sam (f32)(xyzw)r4.x, r3.x, s#2, t#2 -(sy)mad.f32 r0.z, c9.x, r4.w, c13.x -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.w, r2.w -(ss)mul.f r3.x, r4.z, c8.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mul.f r3.y, r4.y, c8.y -mul.f r4.x, r4.x, c8.x -(ss)mul.f r0.x, r0.z, r0.x -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r3.z -nop -mov.f32f32 r0.x, r0.x +(rpt1)nop +(ss)mul.f r0.x, r3.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r2.z, r3.x, r0.x, r2.z -mad.f32 r1.x, r3.y, r0.x, r1.x -(ss)mad.f32 r0.x, r4.x, r0.x, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r2.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.x, r0.x +(ss)mad.f32 r0.w, r0.w, r0.x, r1.y +mad.f32 r1.y, r2.w, r0.x, r1.z +(ss)mad.f32 r0.x, r2.z, r0.x, r2.x nop -mad.f32 r0.y, r0.y, r3.w, r1.y -mad.f32 r1.x, r1.x, r2.y, r2.x -sam (f32)(xyz)r3.x, r0.z, s#3, t#3 -(sy)mad.f32 r0.y, c10.z, r3.z, r0.y -mad.f32 r0.x, r0.x, r1.z, r2.w -(ss)mad.f32 r0.z, c10.y, r3.y, r1.x -mad.f32 r0.x, c10.x, r3.x, r0.x -mov.f32f32 r0.y, r0.y -(rpt2)nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +mad.f32 r0.z, r0.w, r4.z, r0.z +mad.f32 r0.y, r1.y, r4.x, r0.y +mad.f32 r1.z, c10.z, r6.z, r0.z +mad.f32 r1.y, c10.y, r6.y, r0.y +mad.f32 r0.x, r0.x, r2.y, r1.x +nop +mad.f32 r1.x, c10.x, r6.x, r0.x end nop +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) -; FRAG: 222 instructions, 0 half, 6 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) +; FRAG: 157 instructions, 0 half, 8 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-15.asm b/reference/xonotic-gl2/xonotic-glx-gl2-15.asm index 9a1b31d..26f774e 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-15.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-15.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @in(r0.w) in7 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r3.w) in12 -@in(r4.x) in13 -@in(r4.y) in14 -@in(r4.z) in16 -@in(r4.w) in17 -@in(r5.x) in18 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r4.x) in12 +@in(r4.y) in13 +@in(r4.z) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @in(r1.z) in20 @in(r1.w) in21 @out(r0.x) out0 @@ -43,99 +43,53 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)add.f r1.x, c4.x, (neg)r2.w +@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r1.x, c4.x, (neg)r6.x mul.f r1.y, c5.y, r0.x mul.f r0.x, c5.x, r0.x +mul.f r2.w, c0.w, r6.x +mul.f r2.x, r1.x, r5.x +add.f r2.y, c4.y, (neg)r6.y +mul.f r2.z, r1.x, r4.x +mul.f r1.x, r1.x, r3.x mad.f32 r1.y, c6.y, r0.y, r1.y -mul.f r2.x, r1.x, r4.z -add.f r5.y, c4.y, (neg)r3.x -mad.f32 r1.y, c7.y, r0.z, r1.y +mad.f32 r2.x, r2.y, r5.y, r2.x +add.f r3.w, c4.z, (neg)r6.z +mad.f32 r4.w, r2.y, r4.y, r2.z +mad.f32 r1.x, r2.y, r3.y, r1.x +nop +mad.f32 r2.z, r3.w, r5.z, r2.x +mad.f32 r2.y, r3.w, r4.z, r4.w +mad.f32 r2.x, r3.w, r3.z, r1.x +mad.f32 r1.x, c7.y, r0.z, r1.y mad.f32 r0.x, c6.x, r0.y, r0.x -mul.f r0.y, r1.x, r3.w -mad.f32 r2.x, r5.y, r4.w, r2.x -mad.f32 r1.y, c8.y, r0.w, r1.y +mad.f32 r1.y, c8.y, r0.w, r1.x mad.f32 r0.x, c7.x, r0.z, r0.x -mad.f32 r0.y, r5.y, r4.x, r0.y -mov.f32f32 r0.z, r2.x -add.f r2.x, c4.z, (neg)r3.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, c8.x, r0.w, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, r2.x, r5.x, r0.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, r2.x, r4.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.z, r0.z -mul.f r0.y, r1.x, r6.x -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, r5.y, r6.y, r0.y -mul.f r0.y, c0.w, r2.w -mul.f r0.z, c0.z, r2.w -mul.f r0.w, c0.y, r2.w -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r3.x, r0.y -mad.f32 r0.x, r2.x, r6.z, r0.x -mad.f32 r0.y, c2.w, r3.y, r0.y -mad.f32 r0.z, c1.z, r3.x, r0.z -mad.f32 r0.w, c1.y, r3.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c3.w, r3.z, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c2.y, r3.y, r0.w -mul.f r2.w, c0.x, r2.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c3.y, r3.z, r0.z -mad.f32 r2.w, c1.x, r3.x, r2.w -mov.f32f32 r3.x, c9.x -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c2.x, r3.y, r2.w -mov.f32f32 r5.w, r3.x -mad.f32 r0.x, c3.x, r3.z, r0.x -mov.f32f32 r2.w, r5.x -mov.f32f32 r3.x, r4.w -mov.f32f32 r3.y, r4.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r5.y, r3.x -mov.f32f32 r5.x, r3.y -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r4.y -mov.f32f32 r3.y, r4.x -mov.f32f32 r3.z, r3.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r3.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r4.x, r3.z -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r6.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r3.w, r2.w -mov.f32f32 r3.z, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r6.x +mad.f32 r0.y, c1.w, r6.y, r2.w +mad.f32 r1.x, c8.x, r0.w, r0.x +mad.f32 r0.x, c2.w, r6.z, r0.y +mul.f r0.y, c0.z, r6.x +mad.f32 r0.w, c3.w, r6.w, r0.x +mad.f32 r0.x, c1.z, r6.y, r0.y +mul.f r0.y, c0.y, r6.x +mad.f32 r0.x, c2.z, r6.z, r0.x +mad.f32 r0.y, c1.y, r6.y, r0.y +mad.f32 r0.z, c3.z, r6.w, r0.x +mad.f32 r0.x, c2.y, r6.z, r0.y +mul.f r2.w, c0.x, r6.x +mad.f32 r0.y, c3.y, r6.w, r0.x +mad.f32 r0.x, c1.x, r6.y, r2.w +mov.f32f32 r5.w, c9.x +mad.f32 r0.x, c2.x, r6.z, r0.x +mov.f32f32 r4.w, c9.x +mad.f32 r0.x, c3.x, r6.w, r0.x +mov.f32f32 r3.w, c9.x mov.f32f32 r2.w, c9.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -nop -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) -; VERT: 89 instructions, 0 half, 7 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) +; VERT: 42 instructions, 0 half, 7 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-16.asm b/reference/xonotic-gl2/xonotic-glx-gl2-16.asm index be51a42..20eca61 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-16.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-16.asm @@ -6,143 +6,106 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c7.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000 +@const(c8.x) 0x3e800000, 0x00000000, 0x00000000, 0x00000000 +@const(c9.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 3, r0.x +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x bary.f r1.y, 1, r0.x mov.f32f32 r1.z, r0.z mov.f32f32 r1.w, r0.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r1.z -mov.f32f32 r1.z, r1.x -mov.f32f32 r2.z, r1.w -mov.f32f32 r1.w, r1.y -mov.f32f32 r3.x, r0.w -mov.f32f32 r2.y, r1.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r2.w, r1.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r3.y, r0.w -bary.f r0.w, 4, r0.x -sam (f32)(xyz)r1.z, r2.x, s#3, t#3 -(sy)mad.f32 r1.x, c7.y, r1.z, c7.z -mad.f32 r1.z, c7.y, r1.w, c7.z -sam (f32)(xyz)r3.z, r2.z, s#0, t#0 -(sy)add.f r1.w, r3.z, c7.x -(ss)mad.f32 r2.x, c7.y, r2.x, c7.z -mov.f32f32 r1.x, r1.x -bary.f r2.y, 8, r0.x -mov.f32f32 r1.w, r1.w -bary.f r2.z, 12, r0.x -mul.f r0.w, r1.x, r0.w -mul.f r2.y, r1.x, r2.y -mov.f32f32 r1.z, r1.z -bary.f r2.w, 9, r0.x -mul.f r3.z, r1.w, r1.w +mov.f32f32 r2.x, r1.x +mov.f32f32 r2.y, r1.y +sam (f32)(xyz)r2.z, r0.z, s#3, t#3 +(sy)(ss)mad.f32 r0.z, c7.y, r2.z, c7.z +bary.f r0.w, 12, r0.x +mad.f32 r2.z, c7.y, r2.w, c7.z +mad.f32 r2.w, c7.y, r3.x, c7.z +mov.f32f32 r3.x, r0.z +bary.f r3.y, 8, r0.x +sam (f32)(xyz)r3.z, r2.x, s#0, t#0 +(sy)(ss)add.f r2.x, r3.z, c7.x +bary.f r2.y, 4, r0.x +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r3.x, r3.y +mov.f32f32 r3.y, r2.z +bary.f r3.z, 9, r0.x +mov.f32f32 r4.y, r2.x +mul.f r2.y, r3.x, r2.y +bary.f r3.x, 13, r0.x +mad.f32 r0.w, r3.y, r3.z, r0.w +mov.f32f32 r3.z, r2.w +bary.f r4.z, 10, r0.x +mul.f r2.x, r2.x, r4.y add.f r3.w, r3.w, c7.x -mul.f r1.x, r1.x, r2.z -mad.f32 r2.y, r1.z, r2.w, r2.y -bary.f r2.z, 13, r0.x -mov.f32f32 r2.w, r3.w -bary.f r3.w, 5, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.x, r2.x -bary.f r4.y, 10, r0.x -mad.f32 r3.z, r2.w, r2.w, r3.z -mad.f32 r1.x, r1.z, r2.z, r1.x -mad.f32 r0.w, r1.z, r3.w, r0.w -mad.f32 r1.z, r2.x, r4.y, r2.y -mov.f32f32 r2.y, r3.z -add.f r2.z, r4.x, c7.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.z, r1.z -mov.f32f32 r0.w, r0.w -bary.f r1.z, 6, r0.x -mov.f32f32 r2.z, r2.z +bary.f r4.w, 5, r0.x +mad.f32 r0.w, r3.z, r4.z, r0.w +mad.f32 r0.z, r2.z, r3.x, r0.z +mov.f32f32 r2.z, r3.w +mad.f32 r2.y, r3.y, r4.w, r2.y +mov.f32f32 r3.x, r0.w +bary.f r3.y, 6, r0.x +mad.f32 r2.x, r3.w, r2.z, r2.x +add.f r3.w, r4.x, c7.x bary.f (ei)r0.x, 14, r0.x -sam (f32)(xyzw)r3.w, r3.x, s#1, t#1 -(sy)mul.f r0.y, r4.z, c6.x -mad.f32 r0.w, r2.x, r1.z, r0.w -mad.f32 r2.y, r2.z, r2.z, r2.y -mad.f32 r0.x, r2.x, r0.x, r1.x -mov.f32f32 r0.y, r0.y -mul.f r1.x, r0.w, r0.w -mov.f32f32 r1.z, r0.z -mad.f32 r0.z, r3.z, r3.z, r1.x -mov.f32f32 r0.x, r0.x -rsq r1.x, r2.y +mad.f32 r0.y, r3.z, r3.y, r2.y +sam (f32)(xyz)r4.z, r1.z, s#2, t#2 +sam (f32)(xyzw)r5.y, r1.x, s#1, t#1 +(sy)(ss)mul.f r1.w, r6.x, c6.x +mov.f32f32 r1.x, r3.w +mad.f32 r0.x, r2.w, r0.x, r0.z +mul.f r0.z, r0.y, r0.y nop -(ss)mov.f32f32 r1.x, r1.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y +mad.f32 r0.z, r0.w, r3.x, r0.z +mov.f32f32 r0.w, r0.x +mad.f32 r1.x, r1.x, r1.x, r2.x (rpt1)nop -mad.f32 r0.z, r0.x, r0.x, r0.z -mul.f r1.w, r1.w, r1.x -mul.f r2.x, r2.w, r1.x -mul.f r1.x, r2.z, r1.x +mov.f32f32 r0.w, r0.w (rpt2)nop +mad.f32 r0.z, r0.w, r0.w, r0.z +rsq r0.w, r1.x +(ss)mov.f32f32 r1.x, r0.w +(rpt3)nop +mul.f r1.y, r4.y, r1.x rsq r0.z, r0.z -(ss)mov.f32f32 r0.z, r0.z -mov.f32f32 r2.y, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.x, r1.x -mul.f r0.w, r0.w, r0.z +(ss)mov.f32f32 r1.z, r0.z mul.f r0.x, r0.x, r0.z -mul.f r0.z, r3.z, r0.z -mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.z, r0.z -sam (f32)(xyz)r2.z, r1.y, s#2, t#2 +(ss)mul.f r0.z, r2.z, r1.x +mul.f r0.w, r3.w, r0.w +mul.f r0.y, r0.y, r1.z +mov.f32f32 r1.x, r0.x +mul.f r1.z, r3.x, r1.z nop -mul.f r0.y, r2.y, r0.y -max.f r0.w, c8.x, r0.x -mad.f32 r0.y, r2.x, r0.z, r0.y -(rpt2)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.w -mad.f32 r0.x, r1.x, r0.x, r0.y -(rpt2)nop -mov.f32f32 r0.x, r0.x +mul.f r0.y, r1.y, r0.y +max.f r1.x, c8.x, r1.x +mad.f32 r0.y, r0.z, r1.z, r0.y +nop +mad.f32 r0.x, r0.w, r0.x, r0.y (rpt2)nop max.f r0.x, r0.x, c7.w -rcp r0.y, r0.z -(ss)mov.f32f32 r0.y, r0.y -(rpt1)nop -mov.f32f32 r0.x, r0.x -(sy)(ss)mul.f r0.z, r3.x, r0.y -mul.f r0.w, r2.w, r0.y -mul.f r0.y, r2.z, r0.y -mul.f r1.x, c5.z, r0.x -mul.f r1.y, c5.y, r0.x +rcp r0.y, r1.x +(ss)mov.f32f32 r0.z, r0.y +mul.f r0.y, r4.z, r0.y +nop +mov.f32f32 r0.w, r0.x mul.f r0.x, c5.x, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.z, r1.x, r0.z, c4.z -mov.f32f32 r0.y, r0.y -(rpt1)nop -mov.f32f32 r0.z, r0.z -mad.f32 r0.w, r1.y, r0.w, c4.y +(ss)mul.f r1.x, r5.x, r0.z +mul.f r0.z, r4.w, r0.z +mul.f r1.y, c5.z, r0.w +mul.f r0.w, c5.y, r0.w mad.f32 r0.x, r0.x, r0.y, c4.x nop -mul.f r0.y, r4.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mul.f r0.y, r4.x, r0.z -mul.f r0.x, r3.w, r0.x +mad.f32 r0.y, r1.y, r1.x, c4.z +mad.f32 r0.z, r0.w, r0.z, c4.y (rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +mul.f r1.z, r5.w, r0.y +mul.f r1.y, r5.z, r0.z +mul.f r1.x, r5.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) -; FRAG: 148 instructions, 0 half, 5 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) +; FRAG: 103 instructions, 0 half, 7 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-17.asm b/reference/xonotic-gl2/xonotic-glx-gl2-17.asm index 7603f89..d468c8e 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-17.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-17.asm @@ -1,24 +1,24 @@ ; options: ; VERT: new compiler -@in(r1.z) in0 -@in(r1.w) in1 -@in(r2.x) in2 -@in(r2.y) in3 +@in(r4.w) in0 +@in(r5.x) in1 +@in(r5.y) in2 +@in(r5.z) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @in(r0.w) in7 -@in(r5.x) in8 -@in(r5.y) in9 -@in(r5.z) in10 -@in(r2.z) in12 -@in(r2.w) in13 -@in(r3.x) in14 -@in(r3.y) in16 -@in(r3.z) in17 -@in(r3.w) in18 -@in(r5.w) in20 -@in(r6.x) in21 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r3.x) in12 +@in(r3.y) in13 +@in(r3.z) in14 +@in(r4.x) in16 +@in(r4.y) in17 +@in(r4.z) in18 +@in(r1.z) in20 +@in(r1.w) in21 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -39,75 +39,36 @@ @out(r4.y) out17 @out(r4.z) out18 @out(r4.w) out19 +@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r1.x, c4.y, r0.x mul.f r0.x, c4.x, r0.x mad.f32 r1.x, c5.y, r0.y, r1.x mad.f32 r0.x, c5.x, r0.y, r0.x mad.f32 r0.y, c6.y, r0.z, r1.x mad.f32 r0.x, c6.x, r0.z, r0.x -mad.f32 r0.y, c7.y, r0.w, r0.y -mad.f32 r0.x, c7.x, r0.w, r0.x -mul.f r0.z, c0.w, r1.z -mul.f r0.w, c0.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c1.w, r1.w, r0.z -mad.f32 r0.w, c1.z, r1.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c2.w, r2.x, r0.z -mad.f32 r0.w, c2.z, r2.x, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, c3.w, r2.y, r0.z -mad.f32 r0.y, c3.z, r2.y, r0.w -mul.f r4.x, c0.y, r1.z -mul.f r1.z, c0.x, r1.z -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mad.f32 r0.x, c1.y, r1.w, r4.x -mad.f32 r0.y, c1.x, r1.w, r1.z -mad.f32 r0.x, c2.y, r2.x, r0.x -mad.f32 r0.y, c2.x, r2.x, r0.y -mad.f32 r0.x, c3.y, r2.y, r0.x -mad.f32 r1.z, c3.x, r2.y, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z -mov.f32f32 r1.z, c8.x -mov.f32f32 r1.w, r3.w -mov.f32f32 r2.x, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r4.w, r1.z -mov.f32f32 r4.z, r1.w -mov.f32f32 r4.y, r2.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r1.z, c8.x -mov.f32f32 r1.w, r3.x -mov.f32f32 r2.x, r2.w -mov.f32f32 r2.y, r2.z -mov.f32f32 r3.w, r1.z -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r2.x -mov.f32f32 r3.x, r2.y -mov.f32f32 r1.z, c8.x -mov.f32f32 r1.w, r5.z -mov.f32f32 r2.x, r5.y -mov.f32f32 r5.x, r5.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r2.z, r1.w -mov.f32f32 r2.y, r2.x -mov.f32f32 r2.x, r5.x -mov.f32f32 r1.z, r6.x -mov.f32f32 r5.x, r5.w -(rpt1)nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r1.z, r5.x +mad.f32 r1.y, c7.y, r0.w, r0.y +mad.f32 r1.x, c7.x, r0.w, r0.x +mul.f r0.x, c0.w, r4.w +mul.f r0.y, c0.z, r4.w +mad.f32 r0.x, c1.w, r5.x, r0.x +mad.f32 r0.y, c1.z, r5.x, r0.y +mad.f32 r0.x, c2.w, r5.y, r0.x +mad.f32 r0.y, c2.z, r5.y, r0.y +mad.f32 r0.w, c3.w, r5.z, r0.x +mad.f32 r0.z, c3.z, r5.z, r0.y +mul.f r0.x, c0.y, r4.w +mul.f r0.y, c0.x, r4.w +mad.f32 r0.x, c1.y, r5.x, r0.x +mad.f32 r0.y, c1.x, r5.x, r0.y +mad.f32 r0.x, c2.y, r5.y, r0.x +mad.f32 r2.w, c2.x, r5.y, r0.y +mad.f32 r0.y, c3.y, r5.z, r0.x +mad.f32 r0.x, c3.x, r5.z, r2.w +mov.f32f32 r4.w, c8.x +mov.f32f32 r3.w, c8.x +mov.f32f32 r2.w, c8.x end ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) -; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r5.x (0:0,cm=7,il=16,b=0) r2.z (0:0,cm=7,il=20,b=0) r3.y (0:0,cm=7,il=24,b=0) r5.w (0:0,cm=3,il=28,b=0) -; VERT: 71 instructions, 0 half, 7 full +; VERT: inputs: r4.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=7,il=20,b=0) r4.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) +; VERT: 28 instructions, 0 half, 6 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-18.asm b/reference/xonotic-gl2/xonotic-glx-gl2-18.asm index 3846190..faa1d33 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-18.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-18.asm @@ -6,215 +6,150 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c10.x) 0x3f000000, 0xbf000000, 0x40000000, 0xbf800000 +@const(c11.x) 0x00000000, 0x3f800000, 0x3e800000, 0x00000000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 2, r0.x -bary.f r0.w, 0, r0.x -bary.f r1.x, 4, r0.x -bary.f r1.y, 3, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w -mov.f32f32 r2.y, r0.w -mul.f r2.z, r1.x, r1.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r1.y -mov.f32f32 r3.y, r1.w -bary.f r3.w, 1, r0.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r3.x, r1.z -bary.f r1.z, 5, r0.x -mov.f32f32 r1.w, r3.w -mov.f32f32 r2.y, r3.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mad.f32 r2.z, r1.z, r1.z, r2.z -sam (f32)(xyz)r4.z, r2.w, s#4, t#4 -(sy)(ss)mad.f32 r2.w, c10.z, r4.z, c10.w -mov.f32f32 r3.z, r1.w -mov.f32f32 r4.y, r2.y -mov.f32f32 r1.w, r2.z -mov.f32f32 r2.y, r2.w -bary.f r2.z, 12, r0.x -bary.f r2.w, 16, r0.x -bary.f r3.x, 8, r0.x -sam (f32)(xyz)r5.y, r3.y, s#0, t#0 -(sy)(ss)add.f r3.y, r5.y, c10.y -mul.f r2.z, r2.y, r2.z -mad.f32 r3.z, c10.z, r4.w, c10.w -mul.f r2.w, r2.y, r2.w -mov.f32f32 r3.y, r3.y -mul.f r2.y, r2.y, r3.x -mov.f32f32 r3.x, r3.z -bary.f r3.z, 13, r0.x -mul.f r4.z, r3.y, r3.y -add.f r4.w, r5.z, c10.y -bary.f r5.y, 17, r0.x -mad.f32 r2.z, r3.x, r3.z, r2.z -bary.f r3.z, 9, r0.x -mov.f32f32 r4.w, r4.w -mad.f32 r2.w, r3.x, r5.y, r2.w -mov.f32f32 r2.z, r2.z -mad.f32 r5.x, c10.z, r5.x, c10.w -mad.f32 r4.z, r4.w, r4.w, r4.z -mov.f32f32 r2.w, r2.w -mad.f32 r2.y, r3.x, r3.z, r2.y -mov.f32f32 r3.x, r5.x -bary.f r3.z, 14, r0.x -mov.f32f32 r4.z, r4.z -add.f r5.x, r5.w, c10.y -bary.f r5.y, 18, r0.x -mad.f32 r2.z, r3.x, r3.z, r2.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.z, r5.x -bary.f r5.x, 10, r0.x -mov.f32f32 r2.z, r2.z -mad.f32 r2.w, r3.x, r5.y, r2.w -mad.f32 r4.z, r3.z, r3.z, r4.z -mad.f32 r2.y, r3.x, r5.x, r2.y +bary.f r0.w, 3, r0.x +bary.f r1.x, 0, r0.x +bary.f r1.z, 4, r0.x +mov.f32f32 r2.y, r0.z +mov.f32f32 r2.z, r0.w +mov.f32f32 r2.w, r1.x +bary.f r1.y, 1, r0.x +sam (f32)(xyz)r3.y, r0.z, s#4, t#4 +(sy)(ss)mad.f32 r0.z, c10.z, r3.y, c10.w +mov.f32f32 r4.x, r1.x +bary.f r0.w, 16, r0.x +mul.f r1.w, r1.z, r1.z +mov.f32f32 r3.y, r0.z +bary.f r4.z, 12, r0.x +mov.f32f32 r4.y, r1.y +mov.f32f32 r3.x, r1.y +bary.f r4.w, 8, r0.x +mul.f r4.z, r3.y, r4.z +mad.f32 r3.z, c10.z, r3.z, c10.w +mul.f r0.z, r0.z, r0.w +mul.f r0.w, r3.y, r4.w +bary.f r3.y, 17, r0.x +mov.f32f32 r4.w, r3.z +bary.f r5.x, 13, r0.x +sam (f32)(xyz)r5.y, r4.x, s#0, t#0 +(sy)(ss)add.f r4.x, r5.y, c10.y +sam (f32)(xyzw)r6.x, r2.w, s#1, t#1 +(ss)bary.f r2.w, 9, r0.x +(sy)cmps.f.lt r3.x, r6.w, c10.x +mad.f32 r4.y, r4.w, r5.x, r4.z +mad.f32 r3.w, c10.z, r3.w, c10.w +mov.f32f32 r4.z, r4.x +cov.u32f32 r3.x, r3.x +mad.f32 r0.w, r4.w, r2.w, r0.w +mov.f32f32 r2.w, r3.w +bary.f r4.w, 14, r0.x +mul.f r4.x, r4.x, r4.z +add.f r5.x, r5.z, c10.y +mov.f32f32 r5.y, (0.000000) +mad.f32 r4.y, r2.w, r4.w, r4.y +bary.f r4.w, 10, r0.x +mad.f32 r0.z, r3.z, r3.y, r0.z +bary.f r3.y, 5, r0.x +mov.f32f32 r3.z, r4.y +mad.f32 r0.w, r2.w, r4.w, r0.w +mov.f32f32 r2.w, r5.x +cmps.f.ne p0.x, r3.x, r5.y +bary.f r3.x, 18, r0.x +mul.f r4.w, r0.w, r0.w +mad.f32 r4.x, r5.x, r2.w, r4.x +mad.f32 r4.y, r4.y, r3.z, r4.w +mad.f32 r0.z, r3.w, r3.x, r0.z +add.f r3.x, r5.w, c10.y bary.f (ei)r0.x, 6, r0.x -sam (f32)(xyzw)r5.x, r4.x, s#1, t#1 -mov.f32f32 r0.y, r2.w -(sy)cmps.f.lt r2.w, r5.w, c10.x -mul.f r3.x, r2.y, r2.y -mad.f32 r1.w, r0.x, r0.x, r1.w -mad.f32 r3.x, r2.z, r2.z, r3.x -(ss)rsq r4.x, r4.z -(ss)mov.f32f32 r4.x, r4.x +kill p0.x +mov.f32f32 r0.y, r0.z +mov.f32f32 r3.w, r3.x +mad.f32 r4.w, r3.y, r3.y, r1.w +mul.f r1.w, r6.w, c9.x mov.f32f32 r0.y, r0.y -cov.u32f32 r2.w, r2.w -mov.f32f32 r3.x, r3.x -mul.f r3.y, r3.y, r4.x -mad.f32 r3.x, r0.y, r0.y, r3.x -mul.f r4.y, r4.w, r4.x -mul.f r3.z, r3.z, r4.x -(rpt3)nop -rsq r3.x, r3.x -(ss)mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, r4.y +mad.f32 r3.w, r3.w, r3.w, r4.x +mad.f32 r4.x, r0.x, r0.x, r4.w +mul.f r4.w, r6.z, c5.z +mad.f32 r0.y, r0.y, r0.y, r4.y +mul.f r4.y, r6.z, c6.z +mul.f r5.x, r6.y, c6.y +mul.f r5.y, r6.x, c6.x +rsq r3.w, r3.w +(ss)mov.f32f32 r5.z, r3.w +mul.f r3.x, r3.x, r3.w +(ss)rsq r3.w, r4.x +(ss)mov.f32f32 r4.x, r3.w +rsq r0.y, r0.y +(ss)mov.f32f32 r5.w, r0.y +(ss)mul.f r0.y, r0.z, r0.y +mul.f r0.z, r4.z, r5.z +mul.f r2.w, r2.w, r5.z +mul.f r0.w, r0.w, r5.w +mov.f32f32 r4.z, r0.y +mul.f r3.z, r3.z, r5.w +mov.f32f32 r5.z, r0.z +mul.f r0.z, r0.z, r0.w +mov.f32f32 r0.w, r0.w +mad.f32 r0.z, r2.w, r3.z, r0.z +max.f r5.w, c11.z, r4.z +mad.f32 r0.z, r3.x, r4.z, r0.z +mul.f r6.z, r5.z, r0.w mov.f32f32 r3.z, r3.z -mul.f r2.y, r2.y, r3.x -mul.f r0.y, r0.y, r3.x -mul.f r2.z, r2.z, r3.x -mov.f32f32 r3.x, (0.000000) -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -cmps.f.ne p0.x, r2.w, r3.x -mul.f r2.w, r3.y, r2.y -mul.f r3.x, r3.y, r2.y -mad.f32 r2.w, r4.x, r2.z, r2.w -mad.f32 r3.x, r4.x, r2.z, r3.x -max.f r4.y, c11.z, r0.y -rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w mov.f32f32 r2.w, r2.w +mul.f r5.z, r0.z, r5.z mov.f32f32 r3.x, r3.x -mad.f32 r2.w, r3.z, r0.y, r2.w -mad.f32 r3.x, r3.z, r0.y, r3.x -mov.f32f32 r4.y, r4.y -kill p0.x -mul.f r3.y, r2.w, r3.y -mul.f r4.x, r2.w, r4.x -mul.f r2.w, r2.w, r3.z -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r4.x -mov.f32f32 r2.w, r2.w -max.f r3.x, r3.x, c11.x -mul.f r3.y, c10.z, r3.y -mul.f r3.z, c10.z, r3.z +rcp r5.w, r5.w +(ss)mov.f32f32 r6.w, r5.w +mad.f32 r6.z, r2.w, r3.z, r6.z +mul.f r5.z, c10.z, r5.z +mad.f32 r4.z, r3.x, r4.z, r6.z +mul.f r2.w, r0.z, r2.w +mul.f r0.z, r0.z, r3.x +add.f r0.w, r0.w, (neg)r5.z +mul.f r1.z, r1.z, r4.x +max.f r3.x, r4.z, c11.x mul.f r2.w, c10.z, r2.w -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -mul.f r4.x, r5.z, c6.z -add.f r2.y, r2.y, (neg)r3.y -add.f r2.z, r2.z, (neg)r3.z -add.f r0.y, r0.y, (neg)r2.w -mul.f r2.w, r4.x, r3.x -mov.f32f32 r2.y, r2.y -mul.f r1.x, r1.x, r1.w -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.x, r1.x -mul.f r3.y, r5.y, c6.y -mul.f r3.z, r5.x, c6.x -rcp r4.x, r4.y -(ss)mov.f32f32 r4.x, r4.x -mul.f r1.x, r2.y, r1.x -mul.f r1.z, r1.z, r1.w -mul.f r2.y, r3.y, r3.x -mul.f r3.x, r3.z, r3.x -nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r0.z -mad.f32 r0.z, r2.z, r1.z, r1.x -mov.f32f32 r1.x, r1.y -mul.f r0.x, r0.x, r1.w -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r1.x -mov.f32f32 r0.x, r0.x -mul.f r1.x, r5.w, c9.x -mul.f r1.y, r5.z, c5.z -mul.f r1.z, r5.y, c5.y -mad.f32 r0.x, r0.y, r0.x, r0.z -mov.f32f32 r0.y, r1.x -nop -sam (f32)(xyz)r5.y, r3.y, s#3, t#3 -(sy)mul.f r0.z, r5.w, r4.x -mov.f32f32 r0.x, r0.x -mul.f r1.x, r5.z, r4.x -mul.f r1.w, r5.y, r4.x -nop +mul.f r0.z, c10.z, r0.z +mul.f r0.w, r0.w, r1.z +mov.f32f32 r1.z, r3.x +add.f r2.w, r3.z, (neg)r2.w +mul.f r3.y, r3.y, r4.x +mul.f r3.x, r5.y, r3.x +add.f r0.y, r0.y, (neg)r0.z +sam (f32)(xyz)r7.x, r2.y, s#3, t#3 +mul.f r0.z, r4.y, r1.z +mad.f32 r0.w, r2.w, r3.y, r0.w +mul.f r0.x, r0.x, r3.w +mul.f r1.z, r5.x, r1.z +(sy)(ss)mul.f r2.y, r7.z, r6.w +mul.f r2.z, r7.y, r6.w +mad.f32 r0.x, r0.y, r0.x, r0.w +mul.f r0.y, r6.y, c5.y +mul.f r0.w, r6.x, c5.x +mul.f r2.w, r7.x, r5.w max.f r0.x, (neg)r0.x, c11.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r2.z, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.y -mov.f32f32 r0.y, r1.y -mov.f32f32 r1.y, r1.z -mul.f r1.z, r5.x, c5.x -(ss)mov.f32f32 r3.y, r0.w -mov.f32f32 r0.w, r3.w -log2 r0.x, r0.x -(rpt2)nop -mov.f32f32 r3.z, r0.w -mov.f32f32 r0.w, r1.z -(rpt4)nop -sam (f32)(xyzw)r3.y, r3.y, s#2, t#2 -(sy)mad.f32 r1.z, c8.x, r4.x, c11.y -mul.f r3.w, r3.w, c7.z -(ss)mul.f r3.z, r3.z, c7.y +sam (f32)(xyzw)r3.y, r1.x, s#2, t#2 +(sy)(ss)mul.f r1.x, r3.w, c7.z +mul.f r1.y, r3.z, c7.y +mad.f32 r3.z, c8.x, r4.x, c11.y mul.f r3.y, r3.y, c7.x -mov.f32f32 r1.z, r1.z -(rpt2)nop -(ss)mul.f r0.x, r1.z, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(rpt1)nop +log2 r0.x, r0.x +(ss)mul.f r0.x, r3.z, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r1.z, r3.w, r0.x, r2.w -mad.f32 r2.y, r3.z, r0.x, r2.y +(ss)mad.f32 r0.z, r1.x, r0.x, r0.z +mad.f32 r1.x, r1.y, r0.x, r1.z (ss)mad.f32 r0.x, r3.y, r0.x, r3.x nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x -nop -mad.f32 r0.y, r1.z, r0.z, r0.y -mad.f32 r0.z, r2.y, r1.x, r1.y -mad.f32 r0.x, r0.x, r2.z, r0.w -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mad.f32 r1.z, r0.z, r2.y, r4.w +mad.f32 r1.y, r1.x, r2.z, r0.y +mad.f32 r1.x, r0.x, r2.w, r0.w end ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) -; FRAG: 226 instructions, 0 half, 6 full +; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) +; FRAG: 146 instructions, 0 half, 8 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-19.asm b/reference/xonotic-gl2/xonotic-glx-gl2-19.asm index 9a1b31d..26f774e 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-19.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-19.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 +@in(r6.x) in0 +@in(r6.y) in1 +@in(r6.z) in2 +@in(r6.w) in3 @in(r0.x) in4 @in(r0.y) in5 @in(r0.z) in6 @in(r0.w) in7 -@in(r6.x) in8 -@in(r6.y) in9 -@in(r6.z) in10 -@in(r3.w) in12 -@in(r4.x) in13 -@in(r4.y) in14 -@in(r4.z) in16 -@in(r4.w) in17 -@in(r5.x) in18 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r4.x) in12 +@in(r4.y) in13 +@in(r4.z) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @in(r1.z) in20 @in(r1.w) in21 @out(r0.x) out0 @@ -43,99 +43,53 @@ @out(r5.y) out21 @out(r5.z) out22 @out(r5.w) out23 -(sy)(ss)add.f r1.x, c4.x, (neg)r2.w +@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r1.x, c4.x, (neg)r6.x mul.f r1.y, c5.y, r0.x mul.f r0.x, c5.x, r0.x +mul.f r2.w, c0.w, r6.x +mul.f r2.x, r1.x, r5.x +add.f r2.y, c4.y, (neg)r6.y +mul.f r2.z, r1.x, r4.x +mul.f r1.x, r1.x, r3.x mad.f32 r1.y, c6.y, r0.y, r1.y -mul.f r2.x, r1.x, r4.z -add.f r5.y, c4.y, (neg)r3.x -mad.f32 r1.y, c7.y, r0.z, r1.y +mad.f32 r2.x, r2.y, r5.y, r2.x +add.f r3.w, c4.z, (neg)r6.z +mad.f32 r4.w, r2.y, r4.y, r2.z +mad.f32 r1.x, r2.y, r3.y, r1.x +nop +mad.f32 r2.z, r3.w, r5.z, r2.x +mad.f32 r2.y, r3.w, r4.z, r4.w +mad.f32 r2.x, r3.w, r3.z, r1.x +mad.f32 r1.x, c7.y, r0.z, r1.y mad.f32 r0.x, c6.x, r0.y, r0.x -mul.f r0.y, r1.x, r3.w -mad.f32 r2.x, r5.y, r4.w, r2.x -mad.f32 r1.y, c8.y, r0.w, r1.y +mad.f32 r1.y, c8.y, r0.w, r1.x mad.f32 r0.x, c7.x, r0.z, r0.x -mad.f32 r0.y, r5.y, r4.x, r0.y -mov.f32f32 r0.z, r2.x -add.f r2.x, c4.z, (neg)r3.y -mov.f32f32 r1.y, r1.y -mad.f32 r0.x, c8.x, r0.w, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, r2.x, r5.x, r0.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, r2.x, r4.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.z, r0.z -mul.f r0.y, r1.x, r6.x -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, r5.y, r6.y, r0.y -mul.f r0.y, c0.w, r2.w -mul.f r0.z, c0.z, r2.w -mul.f r0.w, c0.y, r2.w -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r3.x, r0.y -mad.f32 r0.x, r2.x, r6.z, r0.x -mad.f32 r0.y, c2.w, r3.y, r0.y -mad.f32 r0.z, c1.z, r3.x, r0.z -mad.f32 r0.w, c1.y, r3.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c3.w, r3.z, r0.y -mad.f32 r0.y, c2.z, r3.y, r0.z -mad.f32 r0.z, c2.y, r3.y, r0.w -mul.f r2.w, c0.x, r2.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c3.y, r3.z, r0.z -mad.f32 r2.w, c1.x, r3.x, r2.w -mov.f32f32 r3.x, c9.x -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c2.x, r3.y, r2.w -mov.f32f32 r5.w, r3.x -mad.f32 r0.x, c3.x, r3.z, r0.x -mov.f32f32 r2.w, r5.x -mov.f32f32 r3.x, r4.w -mov.f32f32 r3.y, r4.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r5.z, r2.w -mov.f32f32 r5.y, r3.x -mov.f32f32 r5.x, r3.y -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r4.y -mov.f32f32 r3.y, r4.x -mov.f32f32 r3.z, r3.w -mov.f32f32 r4.w, r2.w -mov.f32f32 r4.z, r3.x -mov.f32f32 r4.y, r3.y -mov.f32f32 r4.x, r3.z -mov.f32f32 r2.w, c9.x -mov.f32f32 r3.x, r6.z -mov.f32f32 r3.y, r6.y -mov.f32f32 r6.x, r6.x -mov.f32f32 r3.w, r2.w -mov.f32f32 r3.z, r3.x -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.x, r6.x +mad.f32 r0.y, c1.w, r6.y, r2.w +mad.f32 r1.x, c8.x, r0.w, r0.x +mad.f32 r0.x, c2.w, r6.z, r0.y +mul.f r0.y, c0.z, r6.x +mad.f32 r0.w, c3.w, r6.w, r0.x +mad.f32 r0.x, c1.z, r6.y, r0.y +mul.f r0.y, c0.y, r6.x +mad.f32 r0.x, c2.z, r6.z, r0.x +mad.f32 r0.y, c1.y, r6.y, r0.y +mad.f32 r0.z, c3.z, r6.w, r0.x +mad.f32 r0.x, c2.y, r6.z, r0.y +mul.f r2.w, c0.x, r6.x +mad.f32 r0.y, c3.y, r6.w, r0.x +mad.f32 r0.x, c1.x, r6.y, r2.w +mov.f32f32 r5.w, c9.x +mad.f32 r0.x, c2.x, r6.z, r0.x +mov.f32f32 r4.w, c9.x +mad.f32 r0.x, c3.x, r6.w, r0.x +mov.f32f32 r3.w, c9.x mov.f32f32 r2.w, c9.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z -nop -mov.f32f32 r2.w, r2.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r1.z end nop nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) -; VERT: 89 instructions, 0 half, 7 full +; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0) +; VERT: 42 instructions, 0 half, 7 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-20.asm b/reference/xonotic-gl2/xonotic-glx-gl2-20.asm index 3c50540..3c90a57 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-20.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-20.asm @@ -6,75 +6,44 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.w, r0.z +mov.f32f32 r0.x, r0.z +mov.f32f32 r0.y, r0.w mov.f32f32 r1.x, r0.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r0.y, r0.x -mov.f32f32 r1.w, r0.w -mov.f32f32 r0.w, r0.x -mov.f32f32 r2.y, r1.x -mov.f32f32 r1.z, r0.y -mov.f32f32 r0.y, r0.x -mov.f32f32 r2.x, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r0.y -nop -sam (f32)(xyz)r0.w, r1.y, s#2, t#2 -(sy)mul.f r0.y, r1.y, c4.z -mul.f r1.x, r1.x, c4.y -mul.f r0.w, r0.w, c4.x -(ss)nop -sam (f32)(xyzw)r1.y, r1.w, s#0, t#0 -(sy)(ss)mul.f r2.x, r2.x, c8.x -mov.f32f32 r0.y, r0.y -sam (f32)(xyz)r2.y, r2.y, s#3, t#3 -mov.f32f32 r1.x, r1.x -(sy)mad.f32 r0.y, c5.z, r2.w, r0.y -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c5.y, r2.z, r1.x -mad.f32 r0.w, c5.x, r2.y, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -add.f r0.y, r1.w, r0.y -mov.f32f32 r1.w, r2.x -add.f r1.x, r1.z, r1.x -add.f r1.y, r1.y, r0.w -mul.f r0.y, r0.y, c6.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r0.w, r0.x -nop -mov.f32f32 r0.x, r0.y -mul.f r0.y, r1.x, c6.y -mul.f r1.x, r1.y, c6.x +mov.f32f32 r1.y, r0.w +mov.f32f32 r2.x, r0.z +mov.f32f32 r2.y, r0.w +sam (f32)(xyz)r2.z, r0.z, s#1, t#1 (rpt1)nop (ss)nop -sam (f32)(xyz)r2.x, r0.z, s#1, t#1 -(sy)mad.f32 r0.x, c7.z, r2.z, r0.x -mov.f32f32 r0.y, r0.y -(ss)mov.f32f32 r0.z, r1.x -mad.f32 r0.y, c7.y, r2.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c7.x, r2.x, r0.z +sam (f32)(xyz)r0.x, r0.x, s#2, t#2 +(sy)mul.f r0.z, r0.z, c4.z +(ss)mul.f r0.y, r0.y, c4.y +mul.f r0.x, r0.x, c4.x +sam (f32)(xyzw)r0.w, r1.x, s#0, t#0 +(sy)mul.f r1.w, r1.z, c8.x +sam (f32)(xyz)r3.y, r2.x, s#3, t#3 +(sy)mad.f32 r0.z, c5.z, r3.w, r0.z +mad.f32 r0.y, c5.y, r3.z, r0.y +mad.f32 r0.x, c5.x, r3.y, r0.x nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.z, r0.x -mov.f32f32 r0.x, r0.z +add.f r0.z, r1.y, r0.z +add.f r0.y, r1.x, r0.y +add.f r0.x, r0.w, r0.x nop -mov.f32f32 r1.y, r0.y +mul.f r0.z, r0.z, c6.z +mul.f r0.y, r0.y, c6.y +mad.f32 r1.z, c7.z, r3.x, r0.z +(ss)mad.f32 r1.y, c7.y, r2.w, r0.y +mul.f r0.x, r0.x, c6.x nop -mov.f32f32 r1.x, r0.x +mad.f32 r1.x, c7.x, r2.z, r0.x end nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 68 instructions, 0 half, 3 full +; FRAG: 37 instructions, 0 half, 4 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-21.asm b/reference/xonotic-gl2/xonotic-glx-gl2-21.asm index 25893e0..b0eaf15 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-21.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-21.asm @@ -16,51 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r1.x, c4.y, r0.x mul.f r0.x, c4.x, r0.x mad.f32 r1.x, c5.y, r0.y, r1.x mad.f32 r0.x, c5.x, r0.y, r0.x mad.f32 r0.y, c6.y, r0.z, r1.x mad.f32 r0.x, c6.x, r0.z, r0.x -mad.f32 r0.y, c7.y, r0.w, r0.y -mad.f32 r0.x, c7.x, r0.w, r0.x -mul.f r0.z, c0.w, r1.z -mul.f r0.w, c0.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c1.w, r1.w, r0.z -mad.f32 r0.w, c1.z, r1.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, c2.w, r2.x, r0.z -mad.f32 r0.y, c2.z, r2.x, r0.w -mad.f32 r0.x, c3.w, r2.y, r0.x -mad.f32 r0.y, c3.z, r2.y, r0.y -mul.f r2.z, c0.y, r1.z -mul.f r1.z, c0.x, r1.z -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mad.f32 r0.x, c1.y, r1.w, r2.z -mad.f32 r0.y, c1.x, r1.w, r1.z +mad.f32 r1.y, c7.y, r0.w, r0.y +mad.f32 r1.x, c7.x, r0.w, r0.x +mul.f r0.x, c0.w, r1.z +mul.f r0.y, c0.z, r1.z +mad.f32 r0.x, c1.w, r1.w, r0.x +mad.f32 r0.y, c1.z, r1.w, r0.y +mad.f32 r0.x, c2.w, r2.x, r0.x +mad.f32 r0.y, c2.z, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.y, r0.x +mad.f32 r0.z, c3.z, r2.y, r0.y +mul.f r0.x, c0.y, r1.z +mul.f r0.y, c0.x, r1.z +mad.f32 r0.x, c1.y, r1.w, r0.x +mad.f32 r0.y, c1.x, r1.w, r0.y mad.f32 r0.x, c2.y, r2.x, r0.x -mad.f32 r0.y, c2.x, r2.x, r0.y -mad.f32 r0.x, c3.y, r2.y, r0.x -mad.f32 r1.z, c3.x, r2.y, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z +mad.f32 r1.z, c2.x, r2.x, r0.y +mad.f32 r0.y, c3.y, r2.y, r0.x +mad.f32 r0.x, c3.x, r2.y, r1.z +mov.f32f32 r1.w, c8.x mov.f32f32 r1.z, c8.x -mov.f32f32 r2.x, c8.x -(rpt1)nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r1.z, r2.x end nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) ; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 45 instructions, 0 half, 3 full +; VERT: 27 instructions, 0 half, 3 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-22.asm b/reference/xonotic-gl2/xonotic-glx-gl2-22.asm index 8ac7ce0..5030726 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-22.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-22.asm @@ -6,271 +6,193 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c21.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000 +@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x bary.f r1.x, 20, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.y, 4, r0.x +bary.f r0.w, 1, r0.x mov.f32f32 r1.z, r0.z -mul.f r1.w, r0.w, r0.w -bary.f r2.x, 5, r0.x -mul.f r2.y, r1.x, r1.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mad.f32 r1.w, r2.x, r2.x, r1.w -bary.f r3.x, 21, r0.x +mul.f r2.x, r1.x, r1.x +bary.f r2.y, 21, r0.x +mov.f32f32 r1.w, r0.w +mul.f r2.z, r1.y, r1.y +bary.f r2.w, 5, r0.x +mad.f32 r2.x, r2.y, r2.y, r2.x +bary.f r3.x, 22, r0.x mov.f32f32 r3.y, r0.z -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r1.w -bary.f r1.w, 6, r0.x -mad.f32 r2.y, r3.x, r3.x, r2.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r1.y -mov.f32f32 r3.w, r0.z -sam (f32)(xyz)r4.x, r2.z, s#0, t#0 -(sy)(ss)add.f r2.z, r4.x, c21.x -mad.f32 r1.z, r1.w, r1.w, r1.z -mov.f32f32 r2.y, r2.y -bary.f r2.w, 22, r0.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r3.w, r3.w -mad.f32 r2.y, r2.w, r2.w, r2.y -mul.f r4.x, r2.z, r2.z +mov.f32f32 r3.z, r0.w +sam (f32)(xyz)r3.w, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, r3.w, c21.x +mad.f32 r1.w, r3.x, r3.x, r2.x +mad.f32 r2.x, r2.w, r2.w, r2.z +bary.f r2.z, 6, r0.x +mov.f32f32 r3.w, r1.z +add.f r4.x, r4.x, c21.x add.f r4.y, r4.y, c21.x +sam (f32)(xyz)r4.z, r3.y, s#4, t#4 +mad.f32 r2.x, r2.z, r2.z, r2.x +mul.f r1.z, r1.z, r3.w +(ss)mov.f32f32 r3.y, r4.x +rsq r1.w, r1.w +(ss)mov.f32f32 r3.z, r1.w +(ss)mul.f r1.w, r3.x, r1.w +(sy)mul.f r3.x, r5.x, c6.z +mad.f32 r1.z, r4.x, r3.y, r1.z +mov.f32f32 r4.x, r4.y +mul.f r1.x, r1.x, r3.z +mul.f r2.y, r2.y, r3.z +rsq r2.x, r2.x +(ss)mov.f32f32 r3.z, r2.x +mad.f32 r1.z, r4.x, r4.x, r1.z +mov.f32f32 r4.x, r1.x +mov.f32f32 r5.x, r2.y +mul.f r5.y, r1.y, r3.z +mul.f r3.z, r2.w, r3.z +mov.f32f32 r5.z, r1.w +(ss)mul.f r2.x, r2.z, r2.x rsq r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mov.f32f32 r4.w, r1.y -sam (f32)(xyz)r5.x, r3.y, s#4, t#4 -(sy)(ss)mul.f r3.y, r5.z, c6.z -mov.f32f32 r3.z, r4.y -mul.f r4.y, r0.w, r1.z -mul.f r5.z, r2.x, r1.z -mul.f r1.z, r1.w, r1.z -mad.f32 r4.x, r3.z, r3.z, r4.x -mov.f32f32 r4.y, r4.y -mov.f32f32 r5.z, r5.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r4.x, r4.x -add.f r4.z, r4.z, c21.x -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r5.w, r0.z -mov.f32f32 r4.z, r4.z -mul.f r1.x, r1.x, r2.y -mul.f r3.x, r3.x, r2.y -mul.f r2.y, r2.w, r2.y -mad.f32 r2.w, r4.z, r4.z, r4.x -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r5.w, r5.w -mov.f32f32 r6.x, r1.y -mul.f r5.y, r5.y, c6.y -rsq r2.w, r2.w -(ss)mov.f32f32 r2.w, r2.w -mul.f r5.x, r5.x, c6.x -mov.f32f32 r4.x, r4.w -mov.f32f32 r6.x, r6.x -mul.f r2.z, r2.z, r2.w -mul.f r3.z, r3.z, r2.w -mul.f r2.w, r4.z, r2.w -nop -mov.f32f32 r2.z, r2.z -absneg.f r0.w, (neg)r0.w -mov.f32f32 r3.z, r3.z -mov.f32f32 r2.w, r2.w -mul.f r4.z, r2.z, r1.x -mul.f r4.w, r2.z, r0.w -absneg.f r2.x, (neg)r2.x -mad.f32 r4.z, r3.z, r3.x, r4.z -mul.f r6.y, r2.z, r1.x -sam (f32)(xyz)r6.z, r5.w, s#5, t#5 -(sy)mad.f32 r3.y, c7.z, r7.x, r3.y -mad.f32 r4.w, r3.z, r2.x, r4.w -mov.f32f32 r4.z, r4.z -(ss)mad.f32 r5.w, r3.z, r3.x, r6.y -mad.f32 r4.z, r2.w, r2.y, r4.z -mov.f32f32 r4.w, r4.w -absneg.f r1.w, (neg)r1.w -mov.f32f32 r5.w, r5.w -mul.f r6.x, r4.z, r2.z -mul.f r6.y, r4.z, r3.z -mad.f32 r4.w, r2.w, r1.w, r4.w -mul.f r4.z, r4.z, r2.w -mov.f32f32 r6.x, r6.x -mov.f32f32 r6.y, r6.y -mul.f r2.z, r4.w, r2.z -mul.f r3.z, r4.w, r3.z -mul.f r4.w, r4.w, r2.w -mul.f r6.x, c21.y, r6.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r6.x, r6.x -mul.f r2.z, c21.y, r2.z -mul.f r3.z, c21.y, r3.z -mul.f r4.w, c21.y, r4.w -add.f r1.x, r1.x, (neg)r6.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r4.w, r4.w -mov.f32f32 r1.x, r1.x -add.f r0.w, r0.w, (neg)r2.z -add.f r2.x, r2.x, (neg)r3.z -add.f r1.w, r1.w, (neg)r4.w -mul.f r2.z, c21.y, r6.y -mov.f32f32 r0.w, r0.w -bary.f r3.z, 8, r0.x +(ss)mov.f32f32 r5.w, r1.z +(ss)mul.f r1.z, r4.y, r1.z +mov.f32f32 r6.x, r0.z +mov.f32f32 r6.y, r0.w +mul.f r3.w, r3.w, r5.w +mul.f r3.y, r3.y, r5.w +mov.f32f32 r4.y, r1.z +mul.f r4.w, r4.w, c6.y +mov.f32f32 r5.w, r3.w +absneg.f r1.y, (neg)r1.y +mul.f r1.x, r3.w, r1.x +mov.f32f32 r3.w, r3.y +sam (f32)(xyz)r6.x, r6.x, s#5, t#5 +mad.f32 r1.x, r3.y, r2.y, r1.x +mul.f r2.y, r5.w, r1.y +absneg.f r2.w, (neg)r2.w +mad.f32 r1.x, r1.z, r5.z, r1.x +mul.f r1.z, r5.w, r4.x +(sy)mad.f32 r3.x, c7.z, r6.z, r3.x +mad.f32 r2.y, r3.w, r2.w, r2.y +absneg.f r2.z, (neg)r2.z +mul.f r3.y, r1.x, r5.w +mad.f32 r1.z, r3.w, r5.x, r1.z +mul.f r6.z, r1.x, r3.w +mad.f32 r2.y, r4.y, r2.z, r2.y +mul.f r3.y, c21.y, r3.y +mad.f32 r1.z, r4.y, r5.z, r1.z +mul.f r5.z, c21.y, r6.z +mul.f r5.w, r2.y, r5.w +add.f r3.y, r4.x, (neg)r3.y +mul.f r3.w, r2.y, r3.w +mul.f r2.y, r2.y, r4.y +mul.f r4.x, c21.y, r5.w +mul.f r3.y, r3.y, r5.y +add.f r5.x, r5.x, (neg)r5.z +mul.f r3.w, c21.y, r3.w +add.f r1.y, r1.y, (neg)r4.x +bary.f r4.x, 10, r0.x +mad.f32 r3.y, r5.x, r3.z, r3.y +add.f r2.w, r2.w, (neg)r3.w +mov.f32f32 r3.z, r1.y +bary.f r3.w, 8, r0.x mul.f r1.x, r1.x, r4.y -mov.f32f32 r2.z, r2.z bary.f r4.y, 9, r0.x -mul.f r3.z, r0.w, r3.z -mov.f32f32 r2.x, r2.x -bary.f r4.w, 12, r0.x -add.f r2.z, r3.x, (neg)r2.z -mul.f r3.x, r0.w, r4.y -bary.f r4.y, 10, r0.x -mad.f32 r3.z, r2.x, r4.w, r3.z -mov.f32f32 r2.z, r2.z -bary.f r4.w, 13, r0.x -mul.f r0.w, r0.w, r4.y -mov.f32f32 r3.z, r3.z -mov.f32f32 r1.w, r1.w -bary.f r4.y, 16, r0.x -mad.f32 r1.x, r2.z, r5.z, r1.x -mad.f32 r2.z, r2.x, r4.w, r3.x -bary.f r3.x, 14, r0.x -mad.f32 r3.z, r1.w, r4.y, r3.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r4.y, r4.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r3.z, r3.z -bary.f r4.z, 17, r0.x -mad.f32 r0.w, r2.x, r3.x, r0.w -mul.f r2.x, c21.y, r4.y -mul.f r3.x, c14.x, r3.z -mad.f32 r2.z, r1.w, r4.z, r2.z -mul.f r4.y, c14.z, r3.z -mul.f r3.z, c14.y, r3.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.w, r0.w -bary.f (ei)r0.x, 18, r0.x -add.f r0.y, r2.y, (neg)r2.x -mad.f32 r2.x, c15.x, r2.z, r3.x -mad.f32 r3.x, c15.z, r2.z, r4.y -mad.f32 r0.x, r1.w, r0.x, r0.w -mov.f32f32 r0.y, r0.y -mad.f32 r0.w, c15.y, r2.z, r3.z -mad.f32 r1.w, r2.w, r2.y, r5.w -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, r0.y, r1.z, r1.x -mov.f32f32 r1.x, r3.y -mov.f32f32 r1.z, r5.y -mad.f32 r2.x, c16.x, r0.x, r2.x -mov.f32f32 r0.y, r0.y -mad.f32 r2.y, c16.z, r0.x, r3.x -mad.f32 r0.x, c16.y, r0.x, r0.w -mov.f32f32 r0.w, r2.x -max.f r0.y, (neg)r0.y, c21.z -mov.f32f32 r2.x, r2.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.y, r0.w -log2 r0.y, r0.y -mov.f32f32 r0.w, r0.z -mov.f32f32 r2.w, r2.x -mov.f32f32 r2.z, r0.x -max.f r0.x, r1.w, c21.z +mul.f r1.y, r1.y, r4.x +mul.f r3.w, r3.z, r3.w +mov.f32f32 r4.x, r2.w +bary.f r5.x, 12, r0.x +mul.f r1.x, c21.y, r1.x +mul.f r3.z, r3.z, r4.y +bary.f r4.y, 14, r0.x +mad.f32 r3.w, r4.x, r5.x, r3.w +mul.f r2.y, c21.y, r2.y +add.f r1.x, r1.w, (neg)r1.x +bary.f r1.w, 13, r0.x +mad.f32 r1.y, r2.w, r4.y, r1.y +add.f r2.y, r2.z, (neg)r2.y +mad.f32 r1.x, r1.x, r2.x, r3.y +mad.f32 r1.w, r4.x, r1.w, r3.z +bary.f r2.x, 18, r0.x +mov.f32f32 r2.z, r2.y +bary.f r2.w, 16, r0.x +max.f r1.x, (neg)r1.x, c21.z +bary.f (ei)r0.x, 17, r0.x +mad.f32 r0.y, r2.y, r2.x, r1.y +mad.f32 r1.y, r2.z, r2.w, r3.w +max.f r1.z, r1.z, c21.z (rpt1)nop -mov.f32f32 r1.w, r0.w -mov.f32f32 r0.w, r1.y -mov.f32f32 r0.x, r0.x -sam.3d (f32)(xyz)r2.y, r2.y, s#7, t#7 -sam (f32)(xyzw)r3.x, r3.w, s#1, t#1 -(sy)add.f r1.x, r3.z, r1.x -mov.f32f32 r3.z, r0.z -mad.f32 r1.z, c7.y, r6.w, r1.z -(ss)mov.f32f32 r4.x, r5.x +mov.f32f32 r2.x, r1.y +log2 r1.x, r1.x +mul.f r1.y, c14.x, r1.y +mad.f32 r0.x, r2.z, r0.x, r1.w +mov.f32f32 r2.y, r0.z +mul.f r1.w, c14.y, r2.x +mul.f r2.x, c14.z, r2.x +mov.f32f32 r2.w, r0.x +mov.f32f32 r2.z, r0.w +mad.f32 r0.x, c15.x, r0.x, r1.y +nop +mad.f32 r1.y, c15.y, r2.w, r1.w +mov.f32f32 r1.w, r0.y +mad.f32 r2.x, c15.z, r2.w, r2.x +mad.f32 r3.y, c16.x, r0.y, r0.x +sam (f32)(xyzw)r5.x, r2.y, s#2, t#2 +(sy)mad.f32 r0.x, c11.x, r5.w, c21.w +mad.f32 r3.z, c16.y, r1.w, r1.y +mad.f32 r3.w, c16.z, r1.w, r2.x +mov.f32f32 r0.y, r1.z +mov.f32f32 r1.w, r0.z +(ss)mul.f r0.x, r0.x, r1.x mov.f32f32 r2.x, r0.w -mov.f32f32 r4.y, r3.z -mov.f32f32 r0.w, r1.y -mov.f32f32 r1.z, r1.z -mad.f32 r3.z, c7.x, r6.z, r4.x -mul.f r3.w, r3.w, c13.x -mov.f32f32 r4.z, r0.w -add.f r0.w, r3.y, r1.z -mov.f32f32 r1.z, r3.z -sam (f32)(xyzw)r4.w, r1.w, s#2, t#2 -(sy)(ss)mad.f32 r1.w, c11.x, r5.z, c21.w -mov.f32f32 r2.x, r3.w -mul.f r3.y, r5.y, c10.z -mul.f r3.z, r5.x, c10.y -sam (f32)(xyz)r3.w, r4.y, s#6, t#6 -(sy)mad.f32 r1.x, r4.y, r2.w, r1.x -add.f r1.z, r3.x, r1.z -mad.f32 r0.w, r4.x, r2.z, r0.w -mad.f32 r1.z, r3.w, r2.y, r1.z -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.x, r2.x +(ss)mad.f32 r1.x, c7.y, r6.y, r4.w +mul.f r1.y, r4.z, c6.x +sam.3d (f32)(xyz)r3.y, r3.y, s#7, t#7 +mul.f r2.y, r5.z, c10.z +mad.f32 r1.y, c7.x, r6.x, r1.y +mul.f r2.z, r5.y, c10.y +mul.f r2.w, r5.x, c10.x +sam (f32)(xyzw)r4.x, r1.w, s#1, t#1 +(sy)(ss)add.f r2.x, r4.z, r3.x +mov.f32f32 r5.x, r0.z +mov.f32f32 r5.y, r0.w +add.f r1.x, r4.y, r1.x +add.f r1.y, r4.x, r1.y +exp2 r0.x, r0.x +mul.f r1.w, r4.w, c13.x +sam (f32)(xyz)r4.x, r0.z, s#3, t#3 +(rpt2)nop +sam (f32)(xyz)r4.w, r5.x, s#6, t#6 +(sy)(ss)mad.f32 r0.z, r5.y, r3.w, r2.x +mad.f32 r0.w, r5.x, r3.z, r1.x +mad.f32 r1.x, r4.w, r3.y, r1.y nop -mul.f r2.y, r1.x, c9.z +mul.f r1.y, r0.z, c9.z +mul.f r2.x, r0.w, c9.y +mul.f r3.x, r1.x, c9.x +mov.f32f32 r0.z, r0.z +mul.f r1.y, r1.y, r0.y +mul.f r0.y, r2.x, r0.y +(ss)mad.f32 r1.y, r2.y, r0.x, r1.y +mul.f r0.z, r0.z, c8.z +mad.f32 r0.y, r2.z, r0.x, r0.y +mul.f r2.x, r3.x, r1.z +mad.f32 r0.z, c20.z, r1.y, r0.z mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mul.f r1.x, r1.x, c8.z -mul.f r2.y, r2.y, r0.x -mul.f r2.z, r0.w, c9.y -mul.f r2.w, r1.z, c9.x +mad.f32 r1.z, c12.z, r4.z, r0.z +mad.f32 r0.x, r2.w, r0.x, r2.x +mov.f32f32 r0.z, r1.x mul.f r0.w, r0.w, c8.y -mov.f32f32 r2.y, r2.y -(ss)mul.f r0.y, r1.w, r0.y -mul.f r1.w, r2.z, r0.x -mul.f r0.x, r2.w, r0.x -mul.f r1.z, r1.z, c8.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r1.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r2.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y -mul.f r2.x, r4.w, c10.x -exp2 r0.y, r0.y -(ss)mad.f32 r2.y, r3.y, r0.y, r2.y -mad.f32 r2.z, r3.z, r0.y, r2.z -mov.f32f32 r2.w, r0.z -mov.f32f32 r3.x, r1.y -mov.f32f32 r0.z, r2.y -mov.f32f32 r1.y, r2.z -mad.f32 r0.x, r2.x, r0.y, r0.x nop -(ss)mad.f32 r0.y, c20.z, r0.z, r1.x -mad.f32 r0.z, c20.y, r1.y, r0.w -sam (f32)(xyz)r2.x, r2.w, s#3, t#3 -(sy)mad.f32 r0.y, c12.z, r2.z, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c12.y, r2.y, r0.z -nop -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c20.x, r0.x, r1.z -mov.f32f32 r0.z, r0.z -mad.f32 r0.x, c12.x, r2.x, r0.x -mov.f32f32 r1.z, r0.y +mad.f32 r0.y, c20.y, r0.y, r0.w +mul.f r0.z, r0.z, c8.x +mad.f32 r1.y, c12.y, r4.y, r0.y +mad.f32 r0.x, c20.x, r0.x, r0.z nop -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x +mad.f32 r1.x, c12.x, r4.x, r0.x end +nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) r5.x (5:25,cm=f,il=28,b=1) -; FRAG: 267 instructions, 0 half, 8 full +; FRAG: 184 instructions, 0 half, 7 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-23.asm b/reference/xonotic-gl2/xonotic-glx-gl2-23.asm index e0c9a79..a4f6c1a 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-23.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-23.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 -@in(r3.w) in4 -@in(r4.x) in5 -@in(r4.y) in6 -@in(r4.z) in7 -@in(r7.x) in8 -@in(r7.y) in9 -@in(r7.z) in10 -@in(r7.w) in12 -@in(r8.x) in13 -@in(r8.y) in14 -@in(r4.w) in16 -@in(r5.x) in17 -@in(r5.y) in18 +@in(r5.w) in0 +@in(r6.x) in1 +@in(r6.y) in2 +@in(r6.z) in3 +@in(r6.w) in4 +@in(r7.x) in5 +@in(r7.y) in6 +@in(r7.z) in7 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r4.x) in12 +@in(r4.y) in13 +@in(r4.z) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -45,123 +45,65 @@ @out(r6.y) out25 @out(r6.z) out26 @out(r6.w) out27 -(sy)(ss)mul.f r0.x, c5.x, r4.w -mul.f r0.y, c5.x, r7.w -mad.f32 r0.x, c5.y, r5.x, r0.x -mad.f32 r0.y, c5.y, r8.x, r0.y -add.f r0.z, c4.x, (neg)r2.w -mul.f r0.w, c5.x, r7.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c5.z, r5.y, r0.x -mad.f32 r0.y, c5.z, r8.y, r0.y -mul.f r1.x, r0.z, r4.w -mad.f32 r0.w, c5.y, r7.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r1.y, c4.y, (neg)r3.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, r1.y, r5.x, r1.x -mad.f32 r0.w, c5.z, r7.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r6.z, r0.x -mov.f32f32 r6.y, r0.y -add.f r0.x, c4.z, (neg)r3.y -mov.f32f32 r0.y, r0.w -mul.f r0.w, r0.z, r7.w -mul.f r1.z, c6.y, r3.w -mad.f32 r1.x, r0.x, r5.y, r1.x -mov.f32f32 r6.x, r0.y -mad.f32 r0.y, r1.y, r8.x, r0.w -mad.f32 r0.w, c7.y, r4.x, r1.z -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, c8.y, r4.y, r0.w -mul.f r1.z, c6.x, r3.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.y, r0.x, r8.y, r0.y -mad.f32 r0.w, c9.y, r4.z, r0.w -mad.f32 r1.z, c7.x, r4.x, r1.z -mov.f32f32 r2.z, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c8.x, r4.y, r1.z -mul.f r0.z, r0.z, r7.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r0.y, r0.w -mad.f32 r0.w, c9.x, r4.z, r1.x -mad.f32 r0.z, r1.y, r7.y, r0.z -mul.f r1.x, c0.w, r2.w -mov.f32f32 r1.y, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mad.f32 r0.w, c1.w, r3.x, r1.x -mul.f r1.z, c0.z, r2.w -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, r0.x, r7.z, r0.z -(rpt1)nop -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r3.y, r0.w -mad.f32 r0.y, c1.z, r3.x, r1.z -mad.f32 r0.x, c3.w, r3.z, r0.x -mad.f32 r0.y, c2.z, r3.y, r0.y -mul.f r0.z, c0.y, r2.w -mul.f r1.z, c0.x, r2.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c1.y, r3.x, r0.z -mad.f32 r1.z, c1.x, r3.x, r1.z -mul.f r1.w, c6.w, r3.w -mov.f32f32 r0.z, r0.x -mad.f32 r0.x, c2.y, r3.y, r0.y -mad.f32 r0.y, c2.x, r3.y, r1.z -mad.f32 r0.x, c3.y, r3.z, r0.x -mad.f32 r1.z, c3.x, r3.z, r0.y -mad.f32 r1.w, c7.w, r4.x, r1.w +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r5.w +mul.f r0.y, c6.y, r6.w +mul.f r0.z, c6.x, r6.w +mul.f r0.w, c0.w, r5.w +mul.f r1.x, r0.x, r5.x +add.f r1.y, c4.y, (neg)r6.x +mul.f r1.z, r0.x, r4.x +mul.f r0.x, r0.x, r3.x +mad.f32 r0.y, c7.y, r7.x, r0.y +mad.f32 r1.x, r1.y, r5.y, r1.x +add.f r1.w, c4.z, (neg)r6.y +mad.f32 r1.z, r1.y, r4.y, r1.z +mad.f32 r0.x, r1.y, r3.y, r0.x +nop +mad.f32 r2.z, r1.w, r5.z, r1.x +mad.f32 r2.y, r1.w, r4.z, r1.z +mad.f32 r2.x, r1.w, r3.z, r0.x +mad.f32 r0.x, c8.y, r7.y, r0.y +mad.f32 r0.y, c7.x, r7.x, r0.z +mad.f32 r1.y, c9.y, r7.z, r0.x +mad.f32 r0.x, c8.x, r7.y, r0.y +mad.f32 r0.y, c1.w, r6.x, r0.w +mad.f32 r1.x, c9.x, r7.z, r0.x +mad.f32 r0.x, c2.w, r6.y, r0.y +mul.f r0.y, c0.z, r5.w +mad.f32 r0.w, c3.w, r6.z, r0.x +mad.f32 r0.x, c1.z, r6.x, r0.y +mul.f r0.y, c0.y, r5.w +mad.f32 r0.x, c2.z, r6.y, r0.x +mad.f32 r0.y, c1.y, r6.x, r0.y +mad.f32 r0.z, c3.z, r6.z, r0.x +mad.f32 r0.x, c2.y, r6.y, r0.y +mul.f r1.z, c0.x, r5.w +mad.f32 r0.y, c3.y, r6.z, r0.x +mad.f32 r0.x, c1.x, r6.x, r1.z +mul.f r1.z, c6.w, r6.w +mad.f32 r0.x, c2.x, r6.y, r0.x +mad.f32 r1.z, c7.w, r7.x, r1.z +mad.f32 r0.x, c3.x, r6.z, r0.x +mad.f32 r6.w, c8.w, r7.y, r1.z +mul.f r1.z, c5.x, r5.x +mul.f r1.w, c5.x, r4.x +mad.f32 r1.z, c5.y, r5.y, r1.z +mad.f32 r1.w, c5.y, r4.y, r1.w +mad.f32 r6.z, c5.z, r5.z, r1.z +mad.f32 r6.y, c5.z, r4.z, r1.w +mul.f r1.z, c5.x, r3.x +mov.f32f32 r5.w, c10.x +mad.f32 r1.z, c5.y, r3.y, r1.z +mov.f32f32 r4.w, c10.x +mad.f32 r6.x, c5.z, r3.z, r1.z +mov.f32f32 r3.w, c10.x mov.f32f32 r2.w, c10.x -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z -mad.f32 r1.z, c8.w, r4.y, r1.w -mov.f32f32 r5.w, r2.w -mov.f32f32 r1.w, r5.y -mov.f32f32 r2.w, r5.x -mov.f32f32 r6.w, r1.z -mov.f32f32 r1.z, r4.w -mov.f32f32 r5.z, r1.w -mov.f32f32 r5.y, r2.w -mov.f32f32 r1.w, c10.x -mov.f32f32 r5.x, r1.z -mov.f32f32 r1.z, r8.y -mov.f32f32 r2.w, r8.x -mov.f32f32 r4.w, r1.w -mov.f32f32 r1.w, r7.w -mov.f32f32 r4.z, r1.z -mov.f32f32 r4.y, r2.w -mov.f32f32 r1.z, c10.x -mov.f32f32 r4.x, r1.w -mov.f32f32 r1.w, r7.z -mov.f32f32 r2.w, r7.y -mov.f32f32 r3.w, r1.z -mov.f32f32 r1.z, r7.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r2.w mov.f32f32 r1.w, c10.x -mov.f32f32 r3.x, r1.z mov.f32f32 r1.z, c10.x -mov.f32f32 r7.x, c10.x -mov.f32f32 r2.w, r1.w -nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r1.z, r7.x end -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) r6.x (5:25) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r7.x (0:0,cm=7,il=16,b=0) r7.w (0:0,cm=7,il=20,b=0) r4.w (0:0,cm=7,il=24,b=0) -; VERT: 116 instructions, 0 half, 9 full +; VERT: inputs: r5.w (0:0,cm=f,il=8,b=0) r6.w (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) +; VERT: 56 instructions, 0 half, 8 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-24.asm b/reference/xonotic-gl2/xonotic-glx-gl2-24.asm index 87c43ef..155e9a7 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-24.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-24.asm @@ -6,127 +6,85 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c10.x) 0x3f000000, 0xbf000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x -bary.f r1.x, 1, r0.x +bary.f r0.w, 1, r0.x +bary.f r1.x, 4, r0.x mov.f32f32 r1.y, (0.000000) mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.z -mul.f r2.y, r0.w, r0.w +mov.f32f32 r1.w, r0.w +mul.f r2.y, r1.x, r1.x bary.f r2.z, 5, r0.x -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r1.x -mov.f32f32 r3.y, r1.w -mad.f32 r1.w, r2.z, r2.z, r2.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r3.x, r1.z -mov.f32f32 r1.z, r1.x -mov.f32f32 r1.w, r1.w +sam (f32)(xyzw)r2.w, r0.z, s#1, t#1 +(sy)cmps.f.lt r3.w, r3.z, c10.x +mov.f32f32 r4.x, r0.z +mov.f32f32 r4.y, r0.w +mad.f32 r2.y, r2.z, r2.z, r2.y +sam (f32)(xyz)r4.z, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, r4.z, c10.y +cov.u32f32 r1.w, r3.w bary.f (ei)r0.x, 6, r0.x -mov.f32f32 r3.w, r2.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r3.z, r1.z -sam (f32)(xyz)r4.y, r2.w, s#0, t#0 -(sy)add.f r1.z, r4.y, c10.y -mad.f32 r1.w, r0.x, r0.x, r1.w -mov.f32f32 r4.x, r0.y -add.f r0.y, r4.z, c10.y -mov.f32f32 r1.z, r1.z -add.f r2.y, r4.w, c10.y -(ss)nop -sam (f32)(xyzw)r2.w, r3.y, s#1, t#1 -(sy)cmps.f.lt r4.y, r3.z, c10.x -(ss)mul.f r3.z, r3.z, c8.x -mul.f r4.z, r1.z, r1.z -mov.f32f32 r0.y, r0.y -cov.u32f32 r4.y, r4.y -rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -sam (f32)(xyz)r4.w, r3.w, s#2, t#2 -(sy)(ss)mul.f r3.w, r5.y, c4.z -mad.f32 r4.x, r0.y, r0.y, r4.z -cmps.f.ne p0.x, r4.y, r1.y -mul.f r0.w, r0.w, r1.w -mul.f r1.y, r2.z, r1.w -mov.f32f32 r2.z, r4.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.y, r1.y -mul.f r0.x, r0.x, r1.w -mad.f32 r1.w, r2.y, r2.y, r2.z +add.f r0.y, r4.w, c10.y +mov.f32f32 r3.w, r1.z +cmps.f.ne p0.x, r1.w, r1.y +mad.f32 r1.y, r0.x, r0.x, r2.y +sam (f32)(xyz)r4.x, r4.x, s#2, t#2 +(sy)mul.f r1.w, r4.z, c4.z +mul.f r1.z, r1.z, r3.w +mov.f32f32 r2.y, r0.y +mov.f32f32 r4.z, r0.z +mov.f32f32 r4.w, r0.w +mul.f r0.z, r4.y, c4.y +mad.f32 r0.y, r0.y, r2.y, r1.z +add.f r0.w, r5.x, c10.y kill p0.x -mov.f32f32 r2.z, r3.w -mov.f32f32 r0.z, r0.z -mul.f r3.w, r5.x, c4.y -mov.f32f32 r0.x, r0.x -mul.f r4.x, r4.w, c4.x -rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -mov.f32f32 r4.y, r0.z -mov.f32f32 r0.z, r3.w -mov.f32f32 r3.w, r4.x -mul.f r1.z, r1.z, r1.w -mul.f r0.y, r0.y, r1.w -mul.f r1.w, r2.y, r1.w +rsq r1.y, r1.y +(ss)mov.f32f32 r1.z, r1.y +mul.f r0.x, r0.x, r1.y +(ss)mov.f32f32 r1.y, r0.w +sam (f32)(xyz)r4.y, r4.z, s#3, t#3 +mul.f r4.x, r4.x, c4.x +mul.f r1.x, r1.x, r1.z +mul.f r1.z, r2.z, r1.z +mad.f32 r0.y, r1.y, r1.y, r0.y +(sy)mad.f32 r1.y, c5.z, r4.w, r1.w +mad.f32 r0.z, c5.y, r4.z, r0.z +mad.f32 r2.z, c5.x, r4.y, r4.x +mul.f r1.w, r3.z, c8.x +(rpt1)nop +rsq r0.y, r0.y +(ss)mov.f32f32 r3.z, r0.y +(ss)mul.f r0.y, r0.w, r0.y +add.f r0.w, r3.y, r1.y +add.f r0.z, r3.x, r0.z +mul.f r1.y, r3.w, r3.z +mul.f r2.y, r2.y, r3.z +add.f r2.z, r2.w, r2.z nop -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r1.w -mov.f32f32 r1.x, r1.x -mul.f r0.w, r1.z, r0.w -mov.f32f32 r1.z, r3.z -mad.f32 r0.y, r0.y, r1.y, r0.w -mov.f32f32 r4.z, r1.x +mul.f r1.x, r1.y, r1.x nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r0.y, r0.y +mad.f32 r1.x, r2.y, r1.z, r1.x nop -mad.f32 r0.x, r2.y, r0.x, r0.y +mad.f32 r0.x, r0.y, r0.x, r1.x (rpt2)nop -mov.f32f32 r0.x, r0.x -sam (f32)(xyz)r0.w, r4.y, s#3, t#3 -(sy)mad.f32 r0.y, c5.y, r1.x, r0.z -mad.f32 r0.z, c5.x, r0.w, r3.w -mad.f32 r0.w, c5.z, r1.y, r2.z max.f r0.x, r0.x, c10.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -add.f r0.y, r3.x, r0.y -add.f r0.z, r2.w, r0.z -nop -mul.f r1.x, c7.z, r0.x -mul.f r1.y, c7.y, r0.x +(rpt2)nop +mov.f32f32 r0.y, r0.x mul.f r0.x, c7.x, r0.x -add.f r0.w, r3.y, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r0.x -nop -mad.f32 r1.x, c9.z, r1.x, c6.z -mad.f32 r1.y, c9.y, r1.y, c6.y +(rpt1)nop +mul.f r1.x, c7.z, r0.y +mul.f r0.y, c7.y, r0.y mad.f32 r0.x, c9.x, r0.x, c6.x nop -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r0.x, r0.x -nop -mul.f r0.w, r0.w, r1.x -mul.f r0.y, r0.y, r1.y -mul.f r0.x, r0.z, r0.x -nop -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +mad.f32 r1.x, c9.z, r1.x, c6.z +mad.f32 r0.y, c9.y, r0.y, c6.y +(rpt1)nop +mul.f r1.z, r0.w, r1.x +mul.f r1.y, r0.z, r0.y +mul.f r1.x, r2.z, r0.x end -nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 120 instructions, 0 half, 6 full +; FRAG: 83 instructions, 0 half, 6 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-25.asm b/reference/xonotic-gl2/xonotic-glx-gl2-25.asm index 293acc0..9882cc7 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-25.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-25.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 -@in(r3.w) in4 -@in(r4.x) in5 -@in(r4.y) in6 -@in(r4.z) in7 -@in(r0.x) in8 -@in(r0.y) in9 -@in(r0.z) in10 -@in(r0.w) in12 -@in(r1.x) in13 -@in(r1.y) in14 -@in(r1.z) in16 -@in(r1.w) in17 -@in(r2.x) in18 +@in(r1.z) in0 +@in(r1.w) in1 +@in(r2.x) in2 +@in(r2.y) in3 +@in(r2.z) in4 +@in(r2.w) in5 +@in(r3.x) in6 +@in(r3.y) in7 +@in(r3.z) in8 +@in(r3.w) in9 +@in(r4.x) in10 +@in(r4.y) in12 +@in(r4.z) in13 +@in(r4.w) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -29,79 +29,49 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mul.f r1.z, c4.x, r1.z -mul.f r0.w, c4.x, r0.w -mad.f32 r1.z, c4.y, r1.w, r1.z -mad.f32 r0.w, c4.y, r1.x, r0.w -mul.f r0.x, c4.x, r0.x -mul.f r1.x, c5.y, r3.w -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.w, r0.w -mad.f32 r1.z, c4.z, r2.x, r1.z -mad.f32 r0.w, c4.z, r1.y, r0.w -mad.f32 r0.x, c4.y, r0.y, r0.x -mad.f32 r0.y, c6.y, r4.x, r1.x -mov.f32f32 r1.x, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c7.y, r4.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c4.z, r0.z, r0.x -mad.f32 r0.y, c8.y, r4.z, r0.y -mov.f32f32 r0.z, r1.x -mov.f32f32 r0.w, r0.w -(rpt1)nop -mov.f32f32 r2.z, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mul.f r0.z, c5.x, r3.w -mul.f r0.w, c0.w, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.z, c6.x, r4.x, r0.z -mad.f32 r0.w, c1.w, r3.x, r0.w -mov.f32f32 r2.x, r0.x -mov.f32f32 r1.y, r0.y -mad.f32 r0.x, c7.x, r4.y, r0.z -mad.f32 r0.y, c2.w, r3.y, r0.w -mad.f32 r0.x, c8.x, r4.z, r0.x -mad.f32 r0.y, c3.w, r3.z, r0.y -mul.f r0.z, c0.z, r2.w -mul.f r1.x, c0.y, r2.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.y -mad.f32 r0.y, c1.z, r3.x, r0.z -mad.f32 r0.z, c1.y, r3.x, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c2.z, r3.y, r0.y -mad.f32 r0.z, c2.y, r3.y, r0.z -mul.f r1.z, c0.x, r2.w -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c3.y, r3.z, r0.z -mad.f32 r1.z, c1.x, r3.x, r1.z -mul.f r1.w, c5.w, r3.w -mov.f32f32 r0.z, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c2.x, r3.y, r1.z -mad.f32 r1.z, c6.w, r4.x, r1.w -mad.f32 r0.x, c3.x, r3.z, r0.x -mad.f32 r1.z, c7.w, r4.y, r1.z -(rpt1)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r1.z +@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)mul.f r0.x, c5.y, r2.z +mul.f r0.y, c5.x, r2.z +mad.f32 r0.x, c6.y, r2.w, r0.x +mad.f32 r0.y, c6.x, r2.w, r0.y +mad.f32 r0.x, c7.y, r3.x, r0.x +mad.f32 r0.y, c7.x, r3.x, r0.y +mad.f32 r1.y, c8.y, r3.y, r0.x +mad.f32 r1.x, c8.x, r3.y, r0.y +mul.f r0.x, c0.w, r1.z +mul.f r0.y, c0.z, r1.z +mad.f32 r0.x, c1.w, r1.w, r0.x +mad.f32 r0.y, c1.z, r1.w, r0.y +mad.f32 r0.x, c2.w, r2.x, r0.x +mad.f32 r0.y, c2.z, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.y, r0.x +mad.f32 r0.z, c3.z, r2.y, r0.y +mul.f r0.x, c0.y, r1.z +mul.f r0.y, c0.x, r1.z +mad.f32 r0.x, c1.y, r1.w, r0.x +mad.f32 r0.y, c1.x, r1.w, r0.y +mad.f32 r0.x, c2.y, r2.x, r0.x +mad.f32 r1.z, c2.x, r2.x, r0.y +mad.f32 r0.y, c3.y, r2.y, r0.x +mad.f32 r0.x, c3.x, r2.y, r1.z +mul.f r1.z, c5.w, r2.z +mul.f r1.w, c4.x, r5.x +mad.f32 r1.z, c6.w, r2.w, r1.z +mad.f32 r1.w, c4.y, r5.y, r1.w +mad.f32 r2.w, c7.w, r3.x, r1.z +mad.f32 r2.z, c4.z, r5.z, r1.w +mul.f r1.z, c4.x, r4.y +mul.f r1.w, c4.x, r3.z +mad.f32 r1.z, c4.y, r4.z, r1.z +mad.f32 r1.w, c4.y, r3.w, r1.w +mad.f32 r2.y, c4.z, r4.w, r1.z +mad.f32 r2.x, c4.z, r4.x, r1.w +mov.f32f32 r1.w, c9.x mov.f32f32 r1.z, c9.x -mov.f32f32 r3.x, c9.x -(rpt1)nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r1.z, r3.x end nop -nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r0.w (0:0,cm=7,il=20,b=0) r1.z (0:0,cm=7,il=24,b=0) -; VERT: 72 instructions, 0 half, 5 full +; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r2.z (0:0,cm=f,il=12,b=0) r3.z (0:0,cm=7,il=16,b=0) r4.y (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) +; VERT: 39 instructions, 0 half, 6 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-26.asm b/reference/xonotic-gl2/xonotic-glx-gl2-26.asm index 889056b..a97e5c3 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-26.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-26.asm @@ -6,251 +6,177 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c17.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000 +@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x bary.f r1.x, 20, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.y, 4, r0.x +bary.f r0.w, 1, r0.x mov.f32f32 r1.z, r0.z -mul.f r1.w, r0.w, r0.w -bary.f r2.x, 5, r0.x -mul.f r2.y, r1.x, r1.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mad.f32 r1.w, r2.x, r2.x, r1.w -bary.f r3.x, 21, r0.x +mul.f r2.x, r1.x, r1.x +bary.f r2.y, 21, r0.x +mov.f32f32 r1.w, r0.w +mul.f r2.z, r1.y, r1.y +bary.f r2.w, 5, r0.x +mad.f32 r2.x, r2.y, r2.y, r2.x +bary.f r3.x, 22, r0.x mov.f32f32 r3.y, r0.z -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r1.w -bary.f r3.z, 6, r0.x -mad.f32 r1.w, r3.x, r3.x, r2.y -mov.f32f32 r3.w, r3.y -mov.f32f32 r2.y, r1.y -mad.f32 r1.z, r3.z, r3.z, r1.z -sam (f32)(xyz)r4.y, r2.z, s#0, t#0 -(sy)(ss)add.f r2.z, r4.y, c17.x -mov.f32f32 r1.w, r1.w -bary.f r2.w, 22, r0.x -mov.f32f32 r4.x, r2.y -mov.f32f32 r2.y, r2.z -mov.f32f32 r2.z, r0.z -rsq r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mad.f32 r1.w, r2.w, r2.w, r1.w -mul.f r3.y, r2.y, r2.y +mov.f32f32 r3.w, r0.z +sam (f32)(xyz)r4.x, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, r4.x, c17.x +mad.f32 r1.w, r3.x, r3.x, r2.x +mad.f32 r2.x, r2.w, r2.w, r2.z +bary.f r2.z, 6, r0.x +mov.f32f32 r4.w, r1.z +mov.f32f32 r3.z, r0.w +add.f r4.x, r4.y, c17.x add.f r4.y, r4.z, c17.x -mul.f r4.z, r0.w, r1.z -mul.f r5.x, r2.x, r1.z -mul.f r1.z, r3.z, r1.z -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.z, r4.z -mov.f32f32 r5.x, r5.x -mov.f32f32 r1.z, r1.z -mad.f32 r3.y, r4.y, r4.y, r3.y +mul.f r1.z, r1.z, r4.w rsq r1.w, r1.w -(ss)mov.f32f32 r1.w, r1.w -sam (f32)(xyzw)r5.y, r3.w, s#1, t#1 -(sy)(ss)mul.f r3.w, r6.x, c9.x -add.f r4.x, r4.w, c17.x -mov.f32f32 r3.y, r3.y -mul.f r1.x, r1.x, r1.w +(ss)mov.f32f32 r4.z, r1.w +mov.f32f32 r5.x, r4.x +mad.f32 r2.x, r2.z, r2.z, r2.x mul.f r3.x, r3.x, r1.w -mov.f32f32 r4.x, r4.x -mul.f r1.w, r2.w, r1.w -mov.f32f32 r2.w, r3.w -mov.f32f32 r6.x, r2.z -mad.f32 r2.z, r4.x, r4.x, r3.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r3.y, r1.w -mov.f32f32 r1.w, r2.w -(rpt1)nop -rsq r2.z, r2.z -(ss)mov.f32f32 r2.z, r2.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r2.w, r1.y -mov.f32f32 r3.w, r0.z -mul.f r2.y, r2.y, r2.z -mul.f r4.y, r4.y, r2.z -mul.f r2.z, r4.x, r2.z -nop -mov.f32f32 r2.y, r2.y -absneg.f r0.w, (neg)r0.w -mov.f32f32 r4.x, r4.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r6.y, r2.w -mul.f r2.w, r2.y, r0.w -absneg.f r2.x, (neg)r2.x -mul.f r4.y, r2.y, r1.x -mul.f r4.w, r2.y, r1.x -mad.f32 r4.y, r4.x, r3.x, r4.y -mad.f32 r2.w, r4.x, r2.x, r2.w -mad.f32 r4.w, r4.x, r3.x, r4.w -sam (f32)(xyzw)r6.x, r6.x, s#2, t#2 -(sy)mad.f32 r6.w, c7.x, r6.w, c17.w -mul.f r6.z, r6.z, c6.z -mov.f32f32 r2.w, r2.w -absneg.f r3.z, (neg)r3.z -mov.f32f32 r4.y, r4.y -mov.f32f32 r4.w, r4.w -mad.f32 r4.y, r2.z, r3.y, r4.y -mad.f32 r2.w, r2.z, r3.z, r2.w -mad.f32 r4.w, r2.z, r3.y, r4.w -mov.f32f32 r6.w, r6.w -mul.f r7.x, r4.y, r2.y -mul.f r2.y, r2.w, r2.y -mul.f r7.y, r2.w, r4.x -mul.f r2.w, r2.w, r2.z -mov.f32f32 r7.x, r7.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.y, r7.y -mov.f32f32 r2.w, r2.w -mul.f r7.x, c17.y, r7.x -mul.f r2.y, c17.y, r2.y -mul.f r7.y, c17.y, r7.y -mul.f r2.w, c17.y, r2.w -mov.f32f32 r7.x, r7.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r7.y, r7.y -mov.f32f32 r2.w, r2.w -add.f r1.x, r1.x, (neg)r7.x -add.f r0.w, r0.w, (neg)r2.y -add.f r2.x, r2.x, (neg)r7.y -add.f r2.y, r3.z, (neg)r2.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -bary.f r2.w, 8, r0.x -bary.f r3.z, 9, r0.x -bary.f r7.x, 10, r0.x +(ss)mov.f32f32 r1.w, r4.y +mad.f32 r1.z, r4.x, r5.x, r1.z mul.f r1.x, r1.x, r4.z -mul.f r2.w, r0.w, r2.w -mov.f32f32 r2.x, r2.x -bary.f r4.z, 12, r0.x -mul.f r4.x, r4.y, r4.x -mul.f r3.z, r0.w, r3.z -mul.f r0.w, r0.w, r7.x -mad.f32 r2.w, r2.x, r4.z, r2.w -mov.f32f32 r4.x, r4.x -bary.f r4.z, 13, r0.x -bary.f r7.x, 14, r0.x -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.y, r2.y -bary.f r7.y, 16, r0.x +mul.f r2.y, r2.y, r4.z +mad.f32 r1.z, r1.w, r1.w, r1.z +rsq r1.w, r2.x +(ss)mov.f32f32 r2.x, r1.w +mov.f32f32 r4.z, r3.x +mul.f r5.y, r2.z, r1.w +mov.f32f32 r4.x, r0.w +mov.f32f32 r5.z, r1.x +mov.f32f32 r5.w, r2.y +rsq r1.z, r1.z +(ss)mov.f32f32 r1.w, r1.z +(ss)mul.f r1.z, r4.y, r1.z +mul.f r4.y, r1.y, r2.x +mul.f r2.x, r2.w, r2.x +mul.f r4.w, r4.w, r1.w +mul.f r5.x, r5.x, r1.w +mov.f32f32 r6.x, r1.z +sam (f32)(xyzw)r6.y, r3.y, s#1, t#1 +(sy)mul.f r1.w, r7.x, c9.x +(ss)mov.f32f32 r3.y, r4.w +absneg.f r1.y, (neg)r1.y +mul.f r1.x, r4.w, r1.x +mov.f32f32 r3.z, r5.x +mad.f32 r1.x, r5.x, r2.y, r1.x +mul.f r2.y, r3.y, r1.y +absneg.f r2.w, (neg)r2.w +mad.f32 r1.x, r1.z, r4.z, r1.x +mul.f r1.z, r3.y, r5.z +sam (f32)(xyzw)r7.x, r3.w, s#2, t#2 +(sy)(ss)mul.f r3.w, r7.z, c6.z +mad.f32 r2.y, r3.z, r2.w, r2.y +absneg.f r2.z, (neg)r2.z +mul.f r4.x, r1.x, r3.y +mad.f32 r1.z, r3.z, r5.w, r1.z +mul.f r4.w, r1.x, r3.z +mad.f32 r2.y, r6.x, r2.z, r2.y mul.f r4.x, c17.y, r4.x -mad.f32 r3.z, r2.x, r4.z, r3.z -mad.f32 r0.w, r2.x, r7.x, r0.w -mad.f32 r2.x, r2.y, r7.y, r2.w -mov.f32f32 r2.w, r4.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -add.f r2.w, r3.x, (neg)r2.w -bary.f r3.x, 17, r0.x -bary.f (ei)r0.x, 18, r0.x -mul.f r0.y, c10.x, r2.x -mov.f32f32 r2.w, r2.w -mad.f32 r3.x, r2.y, r3.x, r3.z -mul.f r3.z, c10.z, r2.x -mul.f r2.x, c10.y, r2.x -mad.f32 r1.x, r2.w, r5.x, r1.x -mov.f32f32 r2.w, r3.x -mad.f32 r0.x, r2.y, r0.x, r0.w -mul.f r0.w, r4.y, r2.z -mov.f32f32 r2.y, r4.w -mad.f32 r0.y, c11.x, r2.w, r0.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r1.x -mad.f32 r2.z, c11.z, r2.w, r3.z -mad.f32 r2.x, c11.y, r2.w, r2.x -mad.f32 r0.y, c12.x, r0.x, r0.y -mov.f32f32 r0.w, r0.w -mad.f32 r2.z, c12.z, r0.x, r2.z -mad.f32 r0.x, c12.y, r0.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r0.w, c17.y, r0.w -mov.f32f32 r2.x, r2.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r0.w, r3.y, (neg)r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r3.x, r2.x +mad.f32 r1.z, r6.x, r4.z, r1.z +mul.f r4.z, c17.y, r4.w +mul.f r3.y, r2.y, r3.y +add.f r4.x, r5.z, (neg)r4.x +mul.f r3.z, r2.y, r3.z +mul.f r2.y, r2.y, r6.x +mul.f r3.y, c17.y, r3.y +mul.f r4.x, r4.x, r4.y +add.f r4.y, r5.w, (neg)r4.z +mul.f r3.z, c17.y, r3.z +add.f r1.y, r1.y, (neg)r3.y +bary.f r3.y, 10, r0.x +mad.f32 r2.x, r4.y, r2.x, r4.x +add.f r2.w, r2.w, (neg)r3.z +mov.f32f32 r3.z, r1.y +bary.f r4.x, 8, r0.x +mul.f r1.x, r1.x, r6.x +bary.f r4.y, 9, r0.x +mul.f r1.y, r1.y, r3.y +mul.f r3.y, r3.z, r4.x +mov.f32f32 r4.x, r2.w +bary.f r4.z, 12, r0.x +mul.f r1.x, c17.y, r1.x +mul.f r3.z, r3.z, r4.y +bary.f r4.y, 14, r0.x +mad.f32 r3.y, r4.x, r4.z, r3.y +mul.f r2.y, c17.y, r2.y +add.f r1.x, r3.x, (neg)r1.x +bary.f r3.x, 13, r0.x +mad.f32 r1.y, r2.w, r4.y, r1.y +add.f r2.y, r2.z, (neg)r2.y +mad.f32 r1.x, r1.x, r5.y, r2.x +mad.f32 r2.x, r4.x, r3.x, r3.z +bary.f r2.z, 18, r0.x +mov.f32f32 r2.w, r2.y +bary.f r3.x, 16, r0.x +max.f r1.x, (neg)r1.x, c17.z +bary.f (ei)r0.x, 17, r0.x +mad.f32 r0.y, r2.y, r2.z, r1.y +mad.f32 r1.y, r2.w, r3.x, r3.y +max.f r1.z, r1.z, c17.z +(rpt1)nop +mov.f32f32 r2.y, r1.y +log2 r1.x, r1.x +mul.f r1.y, c10.x, r1.y +mad.f32 r0.x, r2.w, r0.x, r2.x +mad.f32 r2.x, c7.x, r7.w, c17.w +mul.f r2.z, c10.y, r2.y +mul.f r2.y, c10.z, r2.y mov.f32f32 r2.w, r0.x -max.f r0.x, r2.y, c17.z +(ss)mul.f r1.x, r2.x, r1.x +mad.f32 r0.x, c11.x, r0.x, r1.y +nop +mad.f32 r1.y, c11.y, r2.w, r2.z +mov.f32f32 r2.x, r0.y +mad.f32 r2.y, c11.z, r2.w, r2.y +mad.f32 r2.z, c12.x, r0.y, r0.x +mov.f32f32 r0.x, r1.z +mad.f32 r2.w, c12.y, r2.x, r1.y +mad.f32 r3.x, c12.z, r2.x, r2.y +exp2 r0.y, r1.x +(ss)mul.f r1.x, r7.y, c6.y +mul.f r1.y, r7.x, c6.x +mov.f32f32 r2.x, r0.z +mov.f32f32 r2.y, r0.w +sam (f32)(xyz)r4.x, r0.z, s#3, t#3 (rpt1)nop -mad.f32 r0.y, r0.y, r1.z, r1.x -mov.f32f32 r0.x, r0.x -mul.f r0.w, r6.y, c6.y -sam.3d (f32)(xyz)r2.x, r2.z, s#5, t#5 -(ss)mov.f32f32 r2.w, r3.w -mov.f32f32 r1.x, r1.y -mov.f32f32 r0.y, r0.y +sam.3d (f32)(xyz)r2.z, r2.z, s#5, t#5 +(rpt3)nop +sam (f32)(xyz)r4.w, r2.x, s#4, t#4 +(sy)(ss)mad.f32 r0.z, r5.y, r3.x, r6.w +mad.f32 r0.w, r5.x, r2.w, r6.z +mad.f32 r2.x, r4.w, r2.z, r6.y +nop +mul.f r2.y, r0.z, c5.z +mul.f r2.z, r0.w, c5.y +mul.f r2.w, r2.x, c5.x mov.f32f32 r0.z, r0.z -mul.f r1.z, r6.x, c6.x -mov.f32f32 r3.x, r1.x -max.f r0.y, (neg)r0.y, c17.z -mov.f32f32 r3.y, r0.z -mov.f32f32 r0.z, r1.y -(rpt1)nop -mov.f32f32 r0.y, r0.y -sam (f32)(xyz)r3.z, r2.w, s#4, t#4 -(sy)mad.f32 r1.x, r4.x, r2.z, r5.w -mad.f32 r1.y, r3.w, r2.y, r5.z -mad.f32 r2.x, r3.z, r2.x, r5.y -mov.f32f32 r3.z, r0.z -mov.f32f32 r0.z, r1.x -mov.f32f32 r1.x, r1.y -mov.f32f32 r1.y, r2.x -log2 r0.y, r0.y -(ss)mul.f r0.y, r6.w, r0.y -mul.f r2.x, r0.z, c5.z -mul.f r2.y, r1.x, c5.y -mul.f r2.z, r1.y, c5.x -mul.f r0.z, r0.z, c4.z -mul.f r2.x, r2.x, r0.x mul.f r2.y, r2.y, r0.x mul.f r0.x, r2.z, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r0.x, r0.x -mul.f r1.x, r1.x, c4.y -mul.f r1.y, r1.y, c4.x -(rpt1)nop -exp2 r0.y, r0.y -(ss)mad.f32 r2.x, r6.z, r0.y, r2.x -mad.f32 r0.w, r0.w, r0.y, r2.y -mad.f32 r0.x, r1.z, r0.y, r0.x -(ss)mov.f32f32 r0.y, r1.x -mov.f32f32 r1.x, r2.x +(ss)mad.f32 r2.y, r3.w, r0.y, r2.y +mul.f r0.z, r0.z, c4.z +mad.f32 r0.x, r1.x, r0.y, r0.x +mul.f r1.x, r2.w, r1.z +mad.f32 r0.z, c16.z, r2.y, r0.z mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.y, r1.y -mad.f32 r0.z, c16.z, r1.x, r0.z -sam (f32)(xyz)r2.x, r3.y, s#3, t#3 -mad.f32 r0.y, c16.y, r0.w, r0.y -(sy)mad.f32 r0.z, c8.z, r2.z, r0.z -mad.f32 r0.x, c16.x, r0.x, r1.y -mad.f32 r0.y, c8.y, r2.y, r0.y -mad.f32 r0.x, c8.x, r2.x, r0.x -mov.f32f32 r0.z, r0.z -(rpt2)nop -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x -end +mad.f32 r1.z, c8.z, r4.z, r0.z +mad.f32 r0.y, r1.y, r0.y, r1.x +mov.f32f32 r0.z, r2.x +mul.f r0.w, r0.w, c4.y +nop +mad.f32 r0.x, c16.y, r0.x, r0.w +mul.f r0.z, r0.z, c4.x +mad.f32 r1.y, c8.y, r4.y, r0.x +mad.f32 r0.x, c16.x, r0.y, r0.z nop +mad.f32 r1.x, c8.x, r4.x, r0.x +end nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) r5.x (5:25,cm=f,il=28,b=1) -; FRAG: 249 instructions, 0 half, 8 full +; FRAG: 172 instructions, 0 half, 8 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-27.asm b/reference/xonotic-gl2/xonotic-glx-gl2-27.asm index e0c9a79..a4f6c1a 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-27.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-27.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r2.w) in0 -@in(r3.x) in1 -@in(r3.y) in2 -@in(r3.z) in3 -@in(r3.w) in4 -@in(r4.x) in5 -@in(r4.y) in6 -@in(r4.z) in7 -@in(r7.x) in8 -@in(r7.y) in9 -@in(r7.z) in10 -@in(r7.w) in12 -@in(r8.x) in13 -@in(r8.y) in14 -@in(r4.w) in16 -@in(r5.x) in17 -@in(r5.y) in18 +@in(r5.w) in0 +@in(r6.x) in1 +@in(r6.y) in2 +@in(r6.z) in3 +@in(r6.w) in4 +@in(r7.x) in5 +@in(r7.y) in6 +@in(r7.z) in7 +@in(r3.x) in8 +@in(r3.y) in9 +@in(r3.z) in10 +@in(r4.x) in12 +@in(r4.y) in13 +@in(r4.z) in14 +@in(r5.x) in16 +@in(r5.y) in17 +@in(r5.z) in18 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -45,123 +45,65 @@ @out(r6.y) out25 @out(r6.z) out26 @out(r6.w) out27 -(sy)(ss)mul.f r0.x, c5.x, r4.w -mul.f r0.y, c5.x, r7.w -mad.f32 r0.x, c5.y, r5.x, r0.x -mad.f32 r0.y, c5.y, r8.x, r0.y -add.f r0.z, c4.x, (neg)r2.w -mul.f r0.w, c5.x, r7.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, c5.z, r5.y, r0.x -mad.f32 r0.y, c5.z, r8.y, r0.y -mul.f r1.x, r0.z, r4.w -mad.f32 r0.w, c5.y, r7.y, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -add.f r1.y, c4.y, (neg)r3.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mad.f32 r1.x, r1.y, r5.x, r1.x -mad.f32 r0.w, c5.z, r7.z, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r6.z, r0.x -mov.f32f32 r6.y, r0.y -add.f r0.x, c4.z, (neg)r3.y -mov.f32f32 r0.y, r0.w -mul.f r0.w, r0.z, r7.w -mul.f r1.z, c6.y, r3.w -mad.f32 r1.x, r0.x, r5.y, r1.x -mov.f32f32 r6.x, r0.y -mad.f32 r0.y, r1.y, r8.x, r0.w -mad.f32 r0.w, c7.y, r4.x, r1.z -mov.f32f32 r1.x, r1.x -mad.f32 r0.w, c8.y, r4.y, r0.w -mul.f r1.z, c6.x, r3.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mad.f32 r0.y, r0.x, r8.y, r0.y -mad.f32 r0.w, c9.y, r4.z, r0.w -mad.f32 r1.z, c7.x, r4.x, r1.z -mov.f32f32 r2.z, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c8.x, r4.y, r1.z -mul.f r0.z, r0.z, r7.x -mov.f32f32 r2.y, r0.y -mov.f32f32 r0.y, r0.w -mad.f32 r0.w, c9.x, r4.z, r1.x -mad.f32 r0.z, r1.y, r7.y, r0.z -mul.f r1.x, c0.w, r2.w -mov.f32f32 r1.y, r0.y -mov.f32f32 r0.y, r0.w -mov.f32f32 r0.z, r0.z -mad.f32 r0.w, c1.w, r3.x, r1.x -mul.f r1.z, c0.z, r2.w -mov.f32f32 r0.y, r0.y -mad.f32 r0.x, r0.x, r7.z, r0.z -(rpt1)nop -mov.f32f32 r1.x, r0.y -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r3.y, r0.w -mad.f32 r0.y, c1.z, r3.x, r1.z -mad.f32 r0.x, c3.w, r3.z, r0.x -mad.f32 r0.y, c2.z, r3.y, r0.y -mul.f r0.z, c0.y, r2.w -mul.f r1.z, c0.x, r2.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r3.z, r0.y -mad.f32 r0.y, c1.y, r3.x, r0.z -mad.f32 r1.z, c1.x, r3.x, r1.z -mul.f r1.w, c6.w, r3.w -mov.f32f32 r0.z, r0.x -mad.f32 r0.x, c2.y, r3.y, r0.y -mad.f32 r0.y, c2.x, r3.y, r1.z -mad.f32 r0.x, c3.y, r3.z, r0.x -mad.f32 r1.z, c3.x, r3.z, r0.y -mad.f32 r1.w, c7.w, r4.x, r1.w +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r5.w +mul.f r0.y, c6.y, r6.w +mul.f r0.z, c6.x, r6.w +mul.f r0.w, c0.w, r5.w +mul.f r1.x, r0.x, r5.x +add.f r1.y, c4.y, (neg)r6.x +mul.f r1.z, r0.x, r4.x +mul.f r0.x, r0.x, r3.x +mad.f32 r0.y, c7.y, r7.x, r0.y +mad.f32 r1.x, r1.y, r5.y, r1.x +add.f r1.w, c4.z, (neg)r6.y +mad.f32 r1.z, r1.y, r4.y, r1.z +mad.f32 r0.x, r1.y, r3.y, r0.x +nop +mad.f32 r2.z, r1.w, r5.z, r1.x +mad.f32 r2.y, r1.w, r4.z, r1.z +mad.f32 r2.x, r1.w, r3.z, r0.x +mad.f32 r0.x, c8.y, r7.y, r0.y +mad.f32 r0.y, c7.x, r7.x, r0.z +mad.f32 r1.y, c9.y, r7.z, r0.x +mad.f32 r0.x, c8.x, r7.y, r0.y +mad.f32 r0.y, c1.w, r6.x, r0.w +mad.f32 r1.x, c9.x, r7.z, r0.x +mad.f32 r0.x, c2.w, r6.y, r0.y +mul.f r0.y, c0.z, r5.w +mad.f32 r0.w, c3.w, r6.z, r0.x +mad.f32 r0.x, c1.z, r6.x, r0.y +mul.f r0.y, c0.y, r5.w +mad.f32 r0.x, c2.z, r6.y, r0.x +mad.f32 r0.y, c1.y, r6.x, r0.y +mad.f32 r0.z, c3.z, r6.z, r0.x +mad.f32 r0.x, c2.y, r6.y, r0.y +mul.f r1.z, c0.x, r5.w +mad.f32 r0.y, c3.y, r6.z, r0.x +mad.f32 r0.x, c1.x, r6.x, r1.z +mul.f r1.z, c6.w, r6.w +mad.f32 r0.x, c2.x, r6.y, r0.x +mad.f32 r1.z, c7.w, r7.x, r1.z +mad.f32 r0.x, c3.x, r6.z, r0.x +mad.f32 r6.w, c8.w, r7.y, r1.z +mul.f r1.z, c5.x, r5.x +mul.f r1.w, c5.x, r4.x +mad.f32 r1.z, c5.y, r5.y, r1.z +mad.f32 r1.w, c5.y, r4.y, r1.w +mad.f32 r6.z, c5.z, r5.z, r1.z +mad.f32 r6.y, c5.z, r4.z, r1.w +mul.f r1.z, c5.x, r3.x +mov.f32f32 r5.w, c10.x +mad.f32 r1.z, c5.y, r3.y, r1.z +mov.f32f32 r4.w, c10.x +mad.f32 r6.x, c5.z, r3.z, r1.z +mov.f32f32 r3.w, c10.x mov.f32f32 r2.w, c10.x -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z -mad.f32 r1.z, c8.w, r4.y, r1.w -mov.f32f32 r5.w, r2.w -mov.f32f32 r1.w, r5.y -mov.f32f32 r2.w, r5.x -mov.f32f32 r6.w, r1.z -mov.f32f32 r1.z, r4.w -mov.f32f32 r5.z, r1.w -mov.f32f32 r5.y, r2.w -mov.f32f32 r1.w, c10.x -mov.f32f32 r5.x, r1.z -mov.f32f32 r1.z, r8.y -mov.f32f32 r2.w, r8.x -mov.f32f32 r4.w, r1.w -mov.f32f32 r1.w, r7.w -mov.f32f32 r4.z, r1.z -mov.f32f32 r4.y, r2.w -mov.f32f32 r1.z, c10.x -mov.f32f32 r4.x, r1.w -mov.f32f32 r1.w, r7.z -mov.f32f32 r2.w, r7.y -mov.f32f32 r3.w, r1.z -mov.f32f32 r1.z, r7.x -mov.f32f32 r3.z, r1.w -mov.f32f32 r3.y, r2.w mov.f32f32 r1.w, c10.x -mov.f32f32 r3.x, r1.z mov.f32f32 r1.z, c10.x -mov.f32f32 r7.x, c10.x -mov.f32f32 r2.w, r1.w -nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r1.z, r7.x end -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) r6.x (5:25) -; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r7.x (0:0,cm=7,il=16,b=0) r7.w (0:0,cm=7,il=20,b=0) r4.w (0:0,cm=7,il=24,b=0) -; VERT: 116 instructions, 0 half, 9 full +; VERT: inputs: r5.w (0:0,cm=f,il=8,b=0) r6.w (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) +; VERT: 56 instructions, 0 half, 8 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-28.asm b/reference/xonotic-gl2/xonotic-glx-gl2-28.asm index be69eca..8941a0d 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-28.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-28.asm @@ -6,175 +6,117 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c11.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000 +@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x bary.f r1.x, 8, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.y, 4, r0.x +bary.f r0.w, 1, r0.x mov.f32f32 r1.z, r0.z -mul.f r1.w, r0.w, r0.w -bary.f r2.x, 5, r0.x -mul.f r2.y, r1.x, r1.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mad.f32 r1.w, r2.x, r2.x, r1.w -bary.f r3.x, 9, r0.x +mul.f r2.x, r1.x, r1.x +bary.f r2.y, 9, r0.x +mov.f32f32 r1.w, r0.w +mul.f r2.z, r1.y, r1.y +bary.f r2.w, 5, r0.x +mad.f32 r2.x, r2.y, r2.y, r2.x +bary.f r3.x, 10, r0.x mov.f32f32 r3.y, r0.z -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r1.w -bary.f r1.w, 6, r0.x -mad.f32 r2.y, r3.x, r3.x, r2.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r1.y +mov.f32f32 r3.z, r0.w +sam (f32)(xyz)r3.w, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, r3.w, c11.x +mad.f32 r1.w, r3.x, r3.x, r2.x +mad.f32 r2.x, r2.w, r2.w, r2.z +bary.f (ei)r0.x, 6, r0.x +mov.f32f32 r0.y, r1.z +add.f r2.z, r4.x, c11.x +add.f r3.w, r4.y, c11.x +sam (f32)(xyzw)r4.x, r3.y, s#1, t#1 +mad.f32 r2.x, r0.x, r0.x, r2.x +mul.f r1.z, r1.z, r0.y +(ss)mov.f32f32 r3.y, r2.z +rsq r1.w, r1.w +(ss)mov.f32f32 r3.z, r1.w +mul.f r3.x, r3.x, r1.w +(ss)mov.f32f32 r1.w, r3.w +mad.f32 r1.z, r2.z, r3.y, r1.z +mul.f r1.x, r1.x, r3.z +mul.f r2.y, r2.y, r3.z mad.f32 r1.z, r1.w, r1.w, r1.z -sam (f32)(xyz)r3.w, r2.z, s#0, t#0 -(sy)(ss)add.f r2.z, r3.w, c11.x -mov.f32f32 r2.y, r2.y -bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r0.y, r2.z -mov.f32f32 r2.z, r0.z +rsq r1.w, r2.x +(ss)mov.f32f32 r2.x, r1.w +mov.f32f32 r2.z, r3.x +mul.f r0.x, r0.x, r1.w +mov.f32f32 r3.z, r1.x +mov.f32f32 r5.x, r2.y +mul.f r1.y, r1.y, r2.x rsq r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mad.f32 r2.y, r0.x, r0.x, r2.y -mul.f r2.w, r0.y, r0.y -add.f r3.w, r4.x, c11.x -mul.f r0.w, r0.w, r1.z -mul.f r2.x, r2.x, r1.z -mul.f r1.z, r1.w, r1.z -mov.f32f32 r1.w, r3.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mad.f32 r2.w, r1.w, r1.w, r2.w -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -sam (f32)(xyzw)r3.y, r3.y, s#1, t#1 -(sy)mul.f r4.x, r4.x, c9.x -mul.f r4.z, r3.w, c4.z -mov.f32f32 r2.w, r2.w -add.f r4.y, r4.y, c11.x -mul.f r1.x, r1.x, r2.y -mul.f r3.x, r3.x, r2.y -mul.f r0.x, r0.x, r2.y -mov.f32f32 r2.y, r4.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x -mad.f32 r2.w, r2.y, r2.y, r2.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r4.y, r4.z -mov.f32f32 r4.z, r2.z -mul.f r2.z, r3.w, c5.z -mov.f32f32 r3.w, r4.x -nop -rsq r2.w, r2.w -(ss)mov.f32f32 r2.w, r2.w -mul.f r4.x, r3.z, c4.y -mul.f r4.w, r3.y, c4.x -mov.f32f32 r5.x, r1.y -mul.f r0.y, r0.y, r2.w -mul.f r1.w, r1.w, r2.w -mul.f r2.y, r2.y, r2.w -nop +(ss)mov.f32f32 r5.y, r1.z +(ss)mul.f r1.z, r3.w, r1.z +mul.f r2.x, r2.w, r2.x +(sy)mul.f r1.w, r4.w, c9.x +mul.f r0.y, r0.y, r5.y +mul.f r2.w, r3.y, r5.y +mov.f32f32 r3.y, r1.z +mul.f r3.w, r4.z, c4.z +mul.f r1.x, r0.y, r1.x mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r3.w -mul.f r3.w, r0.y, r1.x -mul.f r5.y, r0.y, r1.x -mad.f32 r3.w, r2.w, r3.x, r3.w -mad.f32 r5.y, r2.w, r3.x, r5.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r5.z, r4.w -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.w, r5.y -mad.f32 r3.w, r2.y, r0.x, r3.w -mad.f32 r5.y, r2.y, r0.x, r4.w -mov.f32f32 r4.w, r5.x -mul.f r3.z, r3.z, c5.y -mul.f r0.y, r3.w, r0.y -mul.f r2.w, r3.w, r2.w -mul.f r2.y, r3.w, r2.y -mov.f32f32 r3.w, r5.y -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.y, r2.y -max.f r3.w, r3.w, c11.z +mad.f32 r1.x, r2.w, r2.y, r1.x +mov.f32f32 r2.y, r2.w +mad.f32 r1.x, r1.z, r2.z, r1.x +mul.f r1.z, r0.y, r3.z +mul.f r2.w, r4.z, c5.z +mov.f32f32 r4.z, r0.z +mul.f r0.y, r1.x, r0.y +mad.f32 r1.z, r2.y, r5.x, r1.z +mul.f r2.y, r1.x, r2.y +mul.f r1.x, r1.x, r3.y mul.f r0.y, c11.y, r0.y -mul.f r2.w, c11.y, r2.w +mad.f32 r1.z, r3.y, r2.z, r1.z mul.f r2.y, c11.y, r2.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r2.y, r2.y -mul.f r2.z, r2.z, r3.w -add.f r0.y, r1.x, (neg)r0.y -add.f r1.x, r3.x, (neg)r2.w -add.f r0.x, r0.x, (neg)r2.y -mov.f32f32 r2.y, r2.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.x, r0.x -mul.f r2.z, r3.z, r3.w -mul.f r0.y, r0.y, r0.w -mul.f r0.w, r3.y, c5.x -mad.f32 r0.y, r1.x, r2.x, r0.y -mov.f32f32 r1.x, r2.z -sam (f32)(xyzw)r2.z, r4.z, s#2, t#2 -(sy)mad.f32 r2.x, c7.x, r3.y, c11.w -mul.f r3.x, r3.x, c6.z -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r3.w -mad.f32 r0.x, r0.x, r1.z, r0.y -mov.f32f32 r0.y, r2.x -mul.f r1.z, r2.w, c6.y -mul.f r2.x, r2.z, c6.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r1.y, r1.y +mul.f r1.x, c11.y, r1.x +add.f r0.y, r3.z, (neg)r0.y +max.f r1.z, r1.z, c11.z +add.f r2.y, r5.x, (neg)r2.y +mul.f r2.z, r4.x, c5.x +mul.f r0.y, r0.y, r1.y +mov.f32f32 r1.y, r1.z +mad.f32 r0.y, r2.y, r2.x, r0.y +add.f r1.x, r3.x, (neg)r1.x +mul.f r1.z, r2.z, r1.z +mov.f32f32 r4.w, r0.w +mul.f r2.x, r2.w, r1.y +mad.f32 r0.x, r1.x, r0.x, r0.y +mul.f r0.y, r4.y, c5.y +mul.f r1.x, r4.y, c4.y +mul.f r2.y, r4.x, c4.x max.f r0.x, (neg)r0.x, c11.z -(rpt2)nop -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.z, r0.z -mov.f32f32 r2.w, r1.y -(rpt3)nop +mul.f r0.y, r0.y, r1.y +sam (f32)(xyzw)r2.z, r4.z, s#2, t#2 +(sy)mul.f r1.y, r3.x, c6.z +mul.f r2.w, r2.w, c6.y +mad.f32 r3.x, c7.x, r3.y, c11.w +mul.f r2.z, r2.z, c6.x +(ss)nop +sam (f32)(xyz)r4.x, r0.z, s#3, t#3 +nop log2 r0.x, r0.x -(ss)mul.f r0.x, r0.y, r0.x -(rpt1)nop -sam (f32)(xyz)r4.z, r2.z, s#3, t#3 nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.x, r3.x, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r0.y, r3.x, r0.x, r2.y -mad.f32 r0.z, r1.z, r0.x, r1.x -(ss)mad.f32 r0.x, r2.x, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mad.f32 r0.y, c10.z, r0.y, r4.y -mad.f32 r0.z, c10.y, r0.z, r4.x -(sy)mad.f32 r0.y, c8.z, r5.x, r0.y -mad.f32 r0.z, c8.y, r4.w, r0.z -mad.f32 r0.x, c10.x, r0.x, r5.z +(ss)mad.f32 r0.z, r1.y, r0.x, r2.x +mad.f32 r0.y, r2.w, r0.x, r0.y +(ss)mad.f32 r0.x, r2.z, r0.x, r1.z nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.x, c8.x, r4.z, r0.x +mad.f32 r0.z, c10.z, r0.z, r3.w +mad.f32 r0.y, c10.y, r0.y, r1.x +(sy)mad.f32 r1.z, c8.z, r4.z, r0.z +mad.f32 r1.y, c8.y, r4.y, r0.y +mad.f32 r0.x, c10.x, r0.x, r2.y nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x +mad.f32 r1.x, c8.x, r4.x, r0.x end -nop -nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) -; FRAG: 178 instructions, 0 half, 6 full +; FRAG: 113 instructions, 0 half, 6 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-29.asm b/reference/xonotic-gl2/xonotic-glx-gl2-29.asm index 9d9cf0a..a1e4514 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-29.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-29.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r3.w) in0 -@in(r4.x) in1 -@in(r4.y) in2 -@in(r4.z) in3 -@in(r4.w) in4 -@in(r5.x) in5 -@in(r5.y) in6 -@in(r5.z) in7 -@in(r0.x) in8 -@in(r0.y) in9 -@in(r0.z) in10 -@in(r0.w) in12 -@in(r1.x) in13 -@in(r1.y) in14 -@in(r1.z) in16 -@in(r1.w) in17 -@in(r2.x) in18 +@in(r2.w) in0 +@in(r3.x) in1 +@in(r3.y) in2 +@in(r3.z) in3 +@in(r3.w) in4 +@in(r4.x) in5 +@in(r4.y) in6 +@in(r4.z) in7 +@in(r4.w) in8 +@in(r5.x) in9 +@in(r5.y) in10 +@in(r5.z) in12 +@in(r5.w) in13 +@in(r6.x) in14 +@in(r6.y) in16 +@in(r6.z) in17 +@in(r6.w) in18 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -33,99 +33,65 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r2.y, c5.x, r1.z -mul.f r2.z, c5.x, r0.w -mad.f32 r2.y, c5.y, r1.w, r2.y -mad.f32 r2.z, c5.y, r1.x, r2.z -add.f r2.w, c4.x, (neg)r3.w -mul.f r3.x, c5.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mad.f32 r2.y, c5.z, r2.x, r2.y -mad.f32 r2.z, c5.z, r1.y, r2.z -mul.f r1.z, r2.w, r1.z -mad.f32 r3.x, c5.y, r0.y, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -add.f r5.w, c4.y, (neg)r4.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mad.f32 r1.z, r5.w, r1.w, r1.z -mad.f32 r1.w, c5.z, r0.z, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.z, r2.y -mov.f32f32 r3.y, r2.z -add.f r6.x, c4.z, (neg)r4.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r2.w, r0.w -mul.f r2.y, c6.y, r4.w -mad.f32 r1.z, r6.x, r2.x, r1.z -mov.f32f32 r3.x, r1.w -mad.f32 r0.w, r5.w, r1.x, r0.w -mad.f32 r1.x, c7.y, r5.x, r2.y -mov.f32f32 r1.z, r1.z -mad.f32 r1.x, c8.y, r5.y, r1.x -mul.f r1.w, c6.x, r4.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, r6.x, r1.y, r0.w -mad.f32 r1.x, c9.y, r5.z, r1.x -mad.f32 r1.y, c7.x, r5.x, r1.w -mov.f32f32 r2.z, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mad.f32 r1.y, c8.x, r5.y, r1.y -mul.f r0.x, r2.w, r0.x -mov.f32f32 r2.y, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r1.x, c9.x, r5.z, r1.y -mad.f32 r0.x, r5.w, r0.y, r0.x -mul.f r0.y, c0.w, r3.w -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r4.x, r0.y -mul.f r1.z, c0.z, r3.w -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, r6.x, r0.z, r0.x -(rpt1)nop -mov.f32f32 r1.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r4.y, r0.y -mad.f32 r0.y, c1.z, r4.x, r1.z -mad.f32 r0.x, c3.w, r4.z, r0.x -mad.f32 r0.y, c2.z, r4.y, r0.y -mul.f r0.z, c0.y, r3.w -mul.f r1.z, c0.x, r3.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r4.z, r0.y -mad.f32 r0.y, c1.y, r4.x, r0.z -mad.f32 r1.z, c1.x, r4.x, r1.z -mul.f r1.w, c6.w, r4.w -mov.f32f32 r0.z, r0.x -mad.f32 r0.x, c2.y, r4.y, r0.y -mad.f32 r0.y, c2.x, r4.y, r1.z -mad.f32 r0.x, c3.y, r4.z, r0.x -mad.f32 r1.z, c3.x, r4.z, r0.y -mad.f32 r1.w, c7.w, r5.x, r1.w +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r2.w +mul.f r0.y, c6.y, r3.w +mul.f r0.z, c6.x, r3.w +mul.f r0.w, c0.w, r2.w +mul.f r1.x, r0.x, r6.y +add.f r1.y, c4.y, (neg)r3.x +mul.f r1.z, r0.x, r5.z +mul.f r0.x, r0.x, r4.w +mad.f32 r0.y, c7.y, r4.x, r0.y +mad.f32 r1.x, r1.y, r6.z, r1.x +add.f r1.w, c4.z, (neg)r3.y +mad.f32 r1.z, r1.y, r5.w, r1.z +mad.f32 r0.x, r1.y, r5.x, r0.x +nop +mad.f32 r2.z, r1.w, r6.w, r1.x +mad.f32 r2.y, r1.w, r6.x, r1.z +mad.f32 r2.x, r1.w, r5.y, r0.x +mad.f32 r0.x, c8.y, r4.y, r0.y +mad.f32 r0.y, c7.x, r4.x, r0.z +mad.f32 r1.y, c9.y, r4.z, r0.x +mad.f32 r0.x, c8.x, r4.y, r0.y +mad.f32 r0.y, c1.w, r3.x, r0.w +mad.f32 r1.x, c9.x, r4.z, r0.x +mad.f32 r0.x, c2.w, r3.y, r0.y +mul.f r0.y, c0.z, r2.w +mad.f32 r0.w, c3.w, r3.z, r0.x +mad.f32 r0.x, c1.z, r3.x, r0.y +mul.f r0.y, c0.y, r2.w +mad.f32 r0.x, c2.z, r3.y, r0.x +mad.f32 r0.y, c1.y, r3.x, r0.y +mad.f32 r0.z, c3.z, r3.z, r0.x +mad.f32 r0.x, c2.y, r3.y, r0.y +mul.f r1.z, c0.x, r2.w +mad.f32 r0.y, c3.y, r3.z, r0.x +mad.f32 r0.x, c1.x, r3.x, r1.z +mul.f r1.z, c6.w, r3.w +mad.f32 r0.x, c2.x, r3.y, r0.x +mad.f32 r1.z, c7.w, r4.x, r1.z +mad.f32 r0.x, c3.x, r3.z, r0.x +mad.f32 r3.w, c8.w, r4.y, r1.z +mul.f r1.z, c5.x, r6.y +mul.f r1.w, c5.x, r5.z +mad.f32 r1.z, c5.y, r6.z, r1.z +mad.f32 r1.w, c5.y, r5.w, r1.w +mad.f32 r3.z, c5.z, r6.w, r1.z +mad.f32 r3.y, c5.z, r6.x, r1.w +mul.f r1.z, c5.x, r4.w mov.f32f32 r2.w, c10.x -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z -mad.f32 r1.z, c8.w, r5.y, r1.w -mov.f32f32 r2.w, r2.w +mad.f32 r1.z, c5.y, r5.x, r1.z mov.f32f32 r1.w, c10.x -mov.f32f32 r4.x, c10.x -mov.f32f32 r3.w, r1.z -nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r4.x +mad.f32 r3.x, c5.z, r5.y, r1.z +mov.f32f32 r1.z, c10.x end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) -; VERT: inputs: r3.w (0:0,cm=f,il=8,b=0) r4.w (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r0.w (0:0,cm=7,il=20,b=0) r1.z (0:0,cm=7,il=24,b=0) -; VERT: 92 instructions, 0 half, 7 full +; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r4.w (0:0,cm=7,il=16,b=0) r5.z (0:0,cm=7,il=20,b=0) r6.y (0:0,cm=7,il=24,b=0) +; VERT: 53 instructions, 0 half, 7 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-30.asm b/reference/xonotic-gl2/xonotic-glx-gl2-30.asm index d225560..45395a7 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-30.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-30.asm @@ -6,39 +6,28 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.x, r0.y -mov.f32f32 r0.y, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.z, r0.x -(rpt4)nop -sam (f32)(xyzw)r0.w, r1.x, s#0, t#0 -(sy)mul.f r0.x, r1.z, c4.x -(ss)mul.f r1.y, r1.y, c2.z -mul.f r1.x, r1.x, c2.y -mul.f r0.w, r0.w, c2.x -mov.f32f32 r0.x, r0.x -sam (f32)(xyz)r1.z, r0.y, s#1, t#1 -(sy)(ss)mad.f32 r0.y, c3.x, r1.z, r0.w -mad.f32 r0.z, c3.z, r2.x, r1.y -mad.f32 r0.w, c3.y, r1.w, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.w, r0.x -mov.f32f32 r1.x, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w +mov.f32f32 r0.x, r0.z +mov.f32f32 r0.y, r0.w +(rpt1)nop +sam (f32)(xyz)r2.x, r0.z, s#1, t#1 +(rpt3)nop +(ss)nop +sam (f32)(xyzw)r0.x, r0.x, s#0, t#0 +(sy)mul.f r0.z, r0.z, c2.z +(ss)mul.f r0.y, r0.y, c2.y +mad.f32 r1.z, c3.z, r2.z, r0.z +mad.f32 r1.y, c3.y, r2.y, r0.y +mul.f r0.x, r0.x, c2.x +mul.f r1.w, r0.w, c4.x +mad.f32 r1.x, c3.x, r2.x, r0.x end nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 36 instructions, 0 half, 3 full +; FRAG: 23 instructions, 0 half, 3 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-31.asm b/reference/xonotic-gl2/xonotic-glx-gl2-31.asm index 25893e0..b0eaf15 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-31.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-31.asm @@ -16,51 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r1.x, c4.y, r0.x mul.f r0.x, c4.x, r0.x mad.f32 r1.x, c5.y, r0.y, r1.x mad.f32 r0.x, c5.x, r0.y, r0.x mad.f32 r0.y, c6.y, r0.z, r1.x mad.f32 r0.x, c6.x, r0.z, r0.x -mad.f32 r0.y, c7.y, r0.w, r0.y -mad.f32 r0.x, c7.x, r0.w, r0.x -mul.f r0.z, c0.w, r1.z -mul.f r0.w, c0.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c1.w, r1.w, r0.z -mad.f32 r0.w, c1.z, r1.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, c2.w, r2.x, r0.z -mad.f32 r0.y, c2.z, r2.x, r0.w -mad.f32 r0.x, c3.w, r2.y, r0.x -mad.f32 r0.y, c3.z, r2.y, r0.y -mul.f r2.z, c0.y, r1.z -mul.f r1.z, c0.x, r1.z -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mad.f32 r0.x, c1.y, r1.w, r2.z -mad.f32 r0.y, c1.x, r1.w, r1.z +mad.f32 r1.y, c7.y, r0.w, r0.y +mad.f32 r1.x, c7.x, r0.w, r0.x +mul.f r0.x, c0.w, r1.z +mul.f r0.y, c0.z, r1.z +mad.f32 r0.x, c1.w, r1.w, r0.x +mad.f32 r0.y, c1.z, r1.w, r0.y +mad.f32 r0.x, c2.w, r2.x, r0.x +mad.f32 r0.y, c2.z, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.y, r0.x +mad.f32 r0.z, c3.z, r2.y, r0.y +mul.f r0.x, c0.y, r1.z +mul.f r0.y, c0.x, r1.z +mad.f32 r0.x, c1.y, r1.w, r0.x +mad.f32 r0.y, c1.x, r1.w, r0.y mad.f32 r0.x, c2.y, r2.x, r0.x -mad.f32 r0.y, c2.x, r2.x, r0.y -mad.f32 r0.x, c3.y, r2.y, r0.x -mad.f32 r1.z, c3.x, r2.y, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z +mad.f32 r1.z, c2.x, r2.x, r0.y +mad.f32 r0.y, c3.y, r2.y, r0.x +mad.f32 r0.x, c3.x, r2.y, r1.z +mov.f32f32 r1.w, c8.x mov.f32f32 r1.z, c8.x -mov.f32f32 r2.x, c8.x -(rpt1)nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r1.z, r2.x end nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) ; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 45 instructions, 0 half, 3 full +; VERT: 27 instructions, 0 half, 3 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-32.asm b/reference/xonotic-gl2/xonotic-glx-gl2-32.asm index 8c54389..3b778fa 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-32.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-32.asm @@ -6,31 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c3.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f (ei)r0.x, 1, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.x +bary.f (ei)r0.w, 1, r0.x (rpt5)nop -sam (f32)(xyzw)r0.x, r0.y, s#0, t#0 -(sy)mul.f r0.w, r0.w, c2.x -(ss)mul.f r0.z, r0.z, c1.z -mul.f r0.y, r0.y, c1.y -mul.f r0.x, r0.x, c1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x -mov.f32f32 r0.x, r0.w -(rpt2)nop -mov.f32f32 r1.w, r0.x +sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 +(sy)mul.f r1.w, r0.w, c2.x +mul.f r1.z, r0.z, c1.z +mul.f r1.y, r0.y, c1.y +mul.f r1.x, r0.x, c1.x end nop nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) -; FRAG: 31 instructions, 0 half, 2 full +; FRAG: 14 instructions, 0 half, 2 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-33.asm b/reference/xonotic-gl2/xonotic-glx-gl2-33.asm index 25893e0..b0eaf15 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-33.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-33.asm @@ -16,51 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 +@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r1.x, c4.y, r0.x mul.f r0.x, c4.x, r0.x mad.f32 r1.x, c5.y, r0.y, r1.x mad.f32 r0.x, c5.x, r0.y, r0.x mad.f32 r0.y, c6.y, r0.z, r1.x mad.f32 r0.x, c6.x, r0.z, r0.x -mad.f32 r0.y, c7.y, r0.w, r0.y -mad.f32 r0.x, c7.x, r0.w, r0.x -mul.f r0.z, c0.w, r1.z -mul.f r0.w, c0.z, r1.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c1.w, r1.w, r0.z -mad.f32 r0.w, c1.z, r1.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x -mad.f32 r0.x, c2.w, r2.x, r0.z -mad.f32 r0.y, c2.z, r2.x, r0.w -mad.f32 r0.x, c3.w, r2.y, r0.x -mad.f32 r0.y, c3.z, r2.y, r0.y -mul.f r2.z, c0.y, r1.z -mul.f r1.z, c0.x, r1.z -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mad.f32 r0.x, c1.y, r1.w, r2.z -mad.f32 r0.y, c1.x, r1.w, r1.z +mad.f32 r1.y, c7.y, r0.w, r0.y +mad.f32 r1.x, c7.x, r0.w, r0.x +mul.f r0.x, c0.w, r1.z +mul.f r0.y, c0.z, r1.z +mad.f32 r0.x, c1.w, r1.w, r0.x +mad.f32 r0.y, c1.z, r1.w, r0.y +mad.f32 r0.x, c2.w, r2.x, r0.x +mad.f32 r0.y, c2.z, r2.x, r0.y +mad.f32 r0.w, c3.w, r2.y, r0.x +mad.f32 r0.z, c3.z, r2.y, r0.y +mul.f r0.x, c0.y, r1.z +mul.f r0.y, c0.x, r1.z +mad.f32 r0.x, c1.y, r1.w, r0.x +mad.f32 r0.y, c1.x, r1.w, r0.y mad.f32 r0.x, c2.y, r2.x, r0.x -mad.f32 r0.y, c2.x, r2.x, r0.y -mad.f32 r0.x, c3.y, r2.y, r0.x -mad.f32 r1.z, c3.x, r2.y, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z +mad.f32 r1.z, c2.x, r2.x, r0.y +mad.f32 r0.y, c3.y, r2.y, r0.x +mad.f32 r0.x, c3.x, r2.y, r1.z +mov.f32f32 r1.w, c8.x mov.f32f32 r1.z, c8.x -mov.f32f32 r2.x, c8.x -(rpt1)nop -mov.f32f32 r1.w, r1.z -mov.f32f32 r1.z, r2.x end nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) ; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 45 instructions, 0 half, 3 full +; VERT: 27 instructions, 0 half, 3 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-34.asm b/reference/xonotic-gl2/xonotic-glx-gl2-34.asm index f0423b2..50c6e03 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-34.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-34.asm @@ -6,35 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c4.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x bary.f r0.w, 5, r0.x bary.f r1.x, 2, r0.x bary.f r1.y, 1, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r1.x, c2.z, r1.x, c1.z -mad.f32 r1.y, c2.y, r1.y, c1.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.w bary.f (ei)r0.x, 0, r0.x -(rpt4)nop -sam (f32)(xyzw)r1.w, r1.z, s#0, t#0 -(sy)mul.f r0.y, r2.z, c3.x -mul.f r0.z, r2.y, r1.x -mul.f r0.w, r2.x, r1.y -mad.f32 r0.x, c2.x, r0.x, c1.x -mov.f32f32 r0.y, r0.y -(ss)mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mul.f r0.x, r1.w, r0.x -mov.f32f32 r0.y, r0.y (rpt2)nop -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.x, r0.x +sam (f32)(xyzw)r2.x, r0.z, s#0, t#0 +mad.f32 r0.y, c2.z, r1.x, c1.z +(ss)mad.f32 r0.z, c2.y, r1.y, c1.y +mad.f32 r0.x, c2.x, r0.x, c1.x +(sy)mul.f r1.w, r2.w, c3.x +mul.f r1.z, r2.z, r0.y +mul.f r1.y, r2.y, r0.z +mul.f r1.x, r2.x, r0.x end nop -nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 32 instructions, 0 half, 3 full +; FRAG: 17 instructions, 0 half, 3 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-35.asm b/reference/xonotic-gl2/xonotic-glx-gl2-35.asm index 6f0d253..2fd4b2d 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-35.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-35.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 -@in(r3.x) in4 -@in(r3.y) in5 -@in(r3.z) in6 -@in(r3.w) in7 +@in(r2.z) in0 +@in(r2.w) in1 +@in(r3.x) in2 +@in(r3.y) in3 +@in(r1.x) in4 +@in(r1.y) in5 +@in(r1.z) in6 +@in(r1.w) in7 @in(r0.x) in8 @in(r0.y) in9 @in(r0.z) in10 @@ -24,55 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 +@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r2.x, c4.y, r0.x mul.f r0.x, c4.x, r0.x mad.f32 r2.x, c5.y, r0.y, r2.x mad.f32 r0.x, c5.x, r0.y, r0.x mad.f32 r0.y, c6.y, r0.z, r2.x mad.f32 r0.x, c6.x, r0.z, r0.x -mad.f32 r0.y, c7.y, r0.w, r0.y -mad.f32 r0.x, c7.x, r0.w, r0.x -mul.f r0.z, c0.w, r1.x -mul.f r0.w, c0.z, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c1.w, r1.y, r0.z -mad.f32 r0.w, c1.z, r1.y, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r1.z, r0.z -mad.f32 r0.y, c2.z, r1.z, r0.w -mad.f32 r0.x, c3.w, r1.w, r0.x -mad.f32 r0.y, c3.z, r1.w, r0.y -mul.f r2.z, c0.y, r1.x -mul.f r1.x, c0.x, r1.x -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mad.f32 r0.x, c1.y, r1.y, r2.z -mad.f32 r0.y, c1.x, r1.y, r1.x -mad.f32 r0.x, c2.y, r1.z, r0.x -mad.f32 r0.y, c2.x, r1.z, r0.y -mad.f32 r0.x, c3.y, r1.w, r0.x -mad.f32 r1.x, c3.x, r1.w, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.x -mov.f32f32 r1.x, c8.x -mov.f32f32 r1.y, c8.x -(rpt1)nop -mov.f32f32 r2.w, r1.x -mov.f32f32 r2.z, r1.y -mov.f32f32 r1.w, r3.w -mov.f32f32 r1.z, r3.z -mov.f32f32 r1.y, r3.y -mov.f32f32 r1.x, r3.x +mad.f32 r2.y, c7.y, r0.w, r0.y +mad.f32 r2.x, c7.x, r0.w, r0.x +mul.f r0.x, c0.w, r2.z +mul.f r0.y, c0.z, r2.z +mad.f32 r0.x, c1.w, r2.w, r0.x +mad.f32 r0.y, c1.z, r2.w, r0.y +mad.f32 r0.x, c2.w, r3.x, r0.x +mad.f32 r0.y, c2.z, r3.x, r0.y +mad.f32 r0.w, c3.w, r3.y, r0.x +mad.f32 r0.z, c3.z, r3.y, r0.y +mul.f r0.x, c0.y, r2.z +mul.f r0.y, c0.x, r2.z +mad.f32 r0.x, c1.y, r2.w, r0.x +mad.f32 r0.y, c1.x, r2.w, r0.y +mad.f32 r0.x, c2.y, r3.x, r0.x +mad.f32 r2.z, c2.x, r3.x, r0.y +mad.f32 r0.y, c3.y, r3.y, r0.x +mad.f32 r0.x, c3.x, r3.y, r2.z +mov.f32f32 r2.w, c8.x +mov.f32f32 r2.z, c8.x end nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0) -; VERT: 49 instructions, 0 half, 4 full +; VERT: inputs: r2.z (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0) +; VERT: 27 instructions, 0 half, 4 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-36.asm b/reference/xonotic-gl2/xonotic-glx-gl2-36.asm index b6d4a3c..2725ce9 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-36.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-36.asm @@ -6,47 +6,32 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c6.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 5, r0.x bary.f r1.x, 2, r0.x +bary.f r0.w, 5, r0.x bary.f r1.y, 1, r0.x mov.f32f32 r1.z, r0.z -mov.f32f32 r0.z, r0.z +mad.f32 r1.x, c3.z, r1.x, c2.z mov.f32f32 r1.w, r0.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r1.z -mov.f32f32 r2.z, r0.z -mov.f32f32 r2.y, r1.w -mov.f32f32 r2.w, r0.w -mad.f32 r0.z, c3.z, r1.x, c2.z -mad.f32 r0.w, c3.y, r1.y, c2.y +mad.f32 r1.y, c3.y, r1.y, c2.y bary.f (ei)r0.x, 0, r0.x -(rpt1)nop -sam (f32)(xyzw)r1.x, r2.x, s#0, t#0 -(sy)mul.f r0.y, r1.w, c5.x -mul.f r0.z, r1.z, r0.z -mul.f r0.w, r1.y, r0.w +sam (f32)(xyz)r2.x, r0.z, s#1, t#1 +(rpt3)nop +sam (f32)(xyzw)r2.w, r1.z, s#0, t#0 +(sy)mul.f r0.y, r3.y, r1.x +(ss)mul.f r0.z, r3.x, r1.y +mad.f32 r1.z, c4.z, r2.z, r0.y +mad.f32 r1.y, c4.y, r2.y, r0.z mad.f32 r0.x, c3.x, r0.x, c2.x -mov.f32f32 r0.y, r0.y -sam (f32)(xyz)r1.y, r2.z, s#1, t#1 -(sy)mad.f32 r0.z, c4.z, r1.w, r0.z -mad.f32 r0.w, c4.y, r1.z, r0.w -mul.f r0.x, r1.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, c4.x, r1.y, r0.x -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x -end -nop +(rpt1)nop +mul.f r1.w, r3.z, c5.x +mul.f r0.x, r2.w, r0.x nop +mad.f32 r1.x, c4.x, r2.x, r0.x +end nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) -; FRAG: 40 instructions, 0 half, 3 full +; FRAG: 27 instructions, 0 half, 4 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-37.asm b/reference/xonotic-gl2/xonotic-glx-gl2-37.asm index 6f0d253..2fd4b2d 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-37.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-37.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r1.x) in0 -@in(r1.y) in1 -@in(r1.z) in2 -@in(r1.w) in3 -@in(r3.x) in4 -@in(r3.y) in5 -@in(r3.z) in6 -@in(r3.w) in7 +@in(r2.z) in0 +@in(r2.w) in1 +@in(r3.x) in2 +@in(r3.y) in3 +@in(r1.x) in4 +@in(r1.y) in5 +@in(r1.z) in6 +@in(r1.w) in7 @in(r0.x) in8 @in(r0.y) in9 @in(r0.z) in10 @@ -24,55 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 +@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)mul.f r2.x, c4.y, r0.x mul.f r0.x, c4.x, r0.x mad.f32 r2.x, c5.y, r0.y, r2.x mad.f32 r0.x, c5.x, r0.y, r0.x mad.f32 r0.y, c6.y, r0.z, r2.x mad.f32 r0.x, c6.x, r0.z, r0.x -mad.f32 r0.y, c7.y, r0.w, r0.y -mad.f32 r0.x, c7.x, r0.w, r0.x -mul.f r0.z, c0.w, r1.x -mul.f r0.w, c0.z, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -mad.f32 r0.z, c1.w, r1.y, r0.z -mad.f32 r0.w, c1.z, r1.y, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -(rpt1)nop -mov.f32f32 r2.y, r0.y -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r1.z, r0.z -mad.f32 r0.y, c2.z, r1.z, r0.w -mad.f32 r0.x, c3.w, r1.w, r0.x -mad.f32 r0.y, c3.z, r1.w, r0.y -mul.f r2.z, c0.y, r1.x -mul.f r1.x, c0.x, r1.x -mov.f32f32 r0.w, r0.x -mov.f32f32 r0.z, r0.y -mad.f32 r0.x, c1.y, r1.y, r2.z -mad.f32 r0.y, c1.x, r1.y, r1.x -mad.f32 r0.x, c2.y, r1.z, r0.x -mad.f32 r0.y, c2.x, r1.z, r0.y -mad.f32 r0.x, c3.y, r1.w, r0.x -mad.f32 r1.x, c3.x, r1.w, r0.y -(rpt1)nop -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.x -mov.f32f32 r1.x, c8.x -mov.f32f32 r1.y, c8.x -(rpt1)nop -mov.f32f32 r2.w, r1.x -mov.f32f32 r2.z, r1.y -mov.f32f32 r1.w, r3.w -mov.f32f32 r1.z, r3.z -mov.f32f32 r1.y, r3.y -mov.f32f32 r1.x, r3.x +mad.f32 r2.y, c7.y, r0.w, r0.y +mad.f32 r2.x, c7.x, r0.w, r0.x +mul.f r0.x, c0.w, r2.z +mul.f r0.y, c0.z, r2.z +mad.f32 r0.x, c1.w, r2.w, r0.x +mad.f32 r0.y, c1.z, r2.w, r0.y +mad.f32 r0.x, c2.w, r3.x, r0.x +mad.f32 r0.y, c2.z, r3.x, r0.y +mad.f32 r0.w, c3.w, r3.y, r0.x +mad.f32 r0.z, c3.z, r3.y, r0.y +mul.f r0.x, c0.y, r2.z +mul.f r0.y, c0.x, r2.z +mad.f32 r0.x, c1.y, r2.w, r0.x +mad.f32 r0.y, c1.x, r2.w, r0.y +mad.f32 r0.x, c2.y, r3.x, r0.x +mad.f32 r2.z, c2.x, r3.x, r0.y +mad.f32 r0.y, c3.y, r3.y, r0.x +mad.f32 r0.x, c3.x, r3.y, r2.z +mov.f32f32 r2.w, c8.x +mov.f32f32 r2.z, c8.x end nop -nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) -; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0) -; VERT: 49 instructions, 0 half, 4 full +; VERT: inputs: r2.z (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0) +; VERT: 27 instructions, 0 half, 4 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-38.asm b/reference/xonotic-gl2/xonotic-glx-gl2-38.asm index 5865f1f..6eea1c6 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-38.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-38.asm @@ -6,159 +6,109 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c9.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000 +@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x bary.f r1.x, 8, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.y, 4, r0.x +bary.f r0.w, 1, r0.x mov.f32f32 r1.z, r0.z -mul.f r1.w, r0.w, r0.w -bary.f r2.x, 5, r0.x -mul.f r2.y, r1.x, r1.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mad.f32 r1.w, r2.x, r2.x, r1.w -bary.f r3.x, 9, r0.x +mul.f r2.x, r1.x, r1.x +bary.f r2.y, 9, r0.x +mov.f32f32 r1.w, r0.w +mul.f r2.z, r1.y, r1.y +bary.f r2.w, 5, r0.x +mad.f32 r2.x, r2.y, r2.y, r2.x +bary.f r3.x, 10, r0.x mov.f32f32 r3.y, r0.z -mov.f32f32 r2.w, r1.z -mov.f32f32 r1.z, r1.w -bary.f r1.w, 6, r0.x -mad.f32 r2.y, r3.x, r3.x, r2.y -mov.f32f32 r3.y, r3.y -mov.f32f32 r3.z, r1.y +mov.f32f32 r3.z, r0.w +sam (f32)(xyz)r3.w, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, r3.w, c9.x +mad.f32 r1.w, r3.x, r3.x, r2.x +mad.f32 r2.x, r2.w, r2.w, r2.z +bary.f (ei)r0.x, 6, r0.x +mov.f32f32 r0.y, r1.z +add.f r2.z, r4.x, c9.x +add.f r3.w, r4.y, c9.x +sam (f32)(xyzw)r4.x, r3.y, s#1, t#1 +mad.f32 r2.x, r0.x, r0.x, r2.x +mul.f r1.z, r1.z, r0.y +(ss)mov.f32f32 r3.y, r2.z +rsq r1.w, r1.w +(ss)mov.f32f32 r3.z, r1.w +mul.f r3.x, r3.x, r1.w +(ss)mov.f32f32 r1.w, r3.w +mad.f32 r1.z, r2.z, r3.y, r1.z +mul.f r1.x, r1.x, r3.z +mul.f r2.y, r2.y, r3.z mad.f32 r1.z, r1.w, r1.w, r1.z -sam (f32)(xyz)r3.w, r2.z, s#0, t#0 -(sy)(ss)add.f r2.z, r3.w, c9.x -mov.f32f32 r2.y, r2.y -bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r3.z, r3.z -mov.f32f32 r0.y, r2.z -mov.f32f32 r0.z, r0.z +rsq r1.w, r2.x +(ss)mov.f32f32 r2.x, r1.w +mov.f32f32 r2.z, r3.x +mul.f r0.x, r0.x, r1.w +mov.f32f32 r3.z, r1.x +mov.f32f32 r5.x, r2.y +mul.f r1.y, r1.y, r2.x rsq r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mad.f32 r2.y, r0.x, r0.x, r2.y -mul.f r2.z, r0.y, r0.y -add.f r2.w, r4.x, c9.x -mul.f r0.w, r0.w, r1.z -mul.f r2.x, r2.x, r1.z -mul.f r1.z, r1.w, r1.z -mov.f32f32 r1.w, r2.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.x, r2.x -mov.f32f32 r1.z, r1.z -mad.f32 r2.z, r1.w, r1.w, r2.z -rsq r2.y, r2.y -(ss)mov.f32f32 r2.y, r2.y -sam (f32)(xyzw)r3.y, r3.y, s#1, t#1 -(sy)mul.f r2.w, r4.x, c7.x -mul.f r4.x, r3.w, c3.z -mov.f32f32 r2.z, r2.z -add.f r4.y, r4.y, c9.x -mul.f r1.x, r1.x, r2.y -mul.f r3.x, r3.x, r2.y -mul.f r0.x, r0.x, r2.y -mov.f32f32 r2.y, r4.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r0.x, r0.x -mad.f32 r2.z, r2.y, r2.y, r2.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.x, r4.x -mov.f32f32 r4.y, r0.z -mul.f r0.z, r3.w, c4.z -mov.f32f32 r2.w, r2.w -nop -rsq r2.z, r2.z -(ss)mov.f32f32 r2.z, r2.z -mul.f r3.w, r3.z, c3.y -mul.f r4.z, r3.y, c3.x -mov.f32f32 r1.y, r1.y -mul.f r0.y, r0.y, r2.z -mul.f r1.w, r1.w, r2.z -mul.f r2.y, r2.y, r2.z -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r1.w -mov.f32f32 r2.y, r2.y -mov.f32f32 r1.w, r2.w -mul.f r2.w, r0.y, r1.x -mul.f r4.w, r0.y, r1.x -mad.f32 r2.w, r2.z, r3.x, r2.w -mad.f32 r4.w, r2.z, r3.x, r4.w -mov.f32f32 r3.w, r3.w -mov.f32f32 r5.x, r4.z -mov.f32f32 r2.w, r2.w -mov.f32f32 r4.z, r4.w -mad.f32 r2.w, r2.y, r0.x, r2.w -mad.f32 r4.w, r2.y, r0.x, r4.z -mov.f32f32 r4.z, r1.y -mul.f r1.y, r3.z, c4.y -mul.f r0.y, r2.w, r0.y -mul.f r2.z, r2.w, r2.z -mul.f r2.y, r2.w, r2.y -mov.f32f32 r2.w, r4.w +(ss)mov.f32f32 r5.y, r1.z +(ss)mul.f r1.z, r3.w, r1.z +mul.f r2.x, r2.w, r2.x +(sy)mul.f r1.w, r4.w, c7.x +mul.f r0.y, r0.y, r5.y +mul.f r2.w, r3.y, r5.y +mov.f32f32 r3.y, r1.z +mul.f r3.w, r4.z, c3.z +mul.f r1.x, r0.y, r1.x mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.y, r2.y -max.f r2.w, r2.w, c9.z +mad.f32 r1.x, r2.w, r2.y, r1.x +mov.f32f32 r2.y, r2.w +mad.f32 r1.x, r1.z, r2.z, r1.x +mul.f r1.z, r0.y, r3.z +mul.f r2.w, r4.z, c4.z +mad.f32 r1.z, r2.y, r5.x, r1.z +mul.f r0.y, r1.x, r0.y +mad.f32 r1.z, r3.y, r2.z, r1.z +mul.f r2.y, r1.x, r2.y +mul.f r1.x, r1.x, r3.y mul.f r0.y, c9.y, r0.y -mul.f r2.z, c9.y, r2.z +max.f r1.z, r1.z, c9.z mul.f r2.y, c9.y, r2.y -mov.f32f32 r2.w, r2.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r2.y, r2.y -mul.f r0.z, r0.z, r2.w -add.f r0.y, r1.x, (neg)r0.y -add.f r1.x, r3.x, (neg)r2.z -add.f r0.x, r0.x, (neg)r2.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.x, r0.x -mul.f r1.y, r1.y, r2.w -mul.f r0.y, r0.y, r0.w -mul.f r0.w, r3.y, c4.x -mad.f32 r0.y, r1.x, r2.x, r0.y -mov.f32f32 r1.x, r1.y -sam (f32)(xyzw)r5.y, r4.y, s#2, t#2 -(sy)mad.f32 r1.y, c6.x, r6.x, c9.w -mul.f r2.x, r5.w, c5.z -mov.f32f32 r0.y, r0.y -mul.f r0.w, r0.w, r2.w -mad.f32 r0.x, r0.x, r1.z, r0.y -mov.f32f32 r0.y, r1.y -mul.f r1.y, r5.z, c5.y -mul.f r1.z, r5.y, c5.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r0.w +mul.f r2.z, r4.x, c4.x +add.f r0.y, r3.z, (neg)r0.y +mov.f32f32 r3.y, r1.z +mul.f r3.z, r4.y, c4.y +mul.f r1.z, r2.z, r1.z +mul.f r0.y, r0.y, r1.y +add.f r1.y, r5.x, (neg)r2.y +mul.f r2.y, r2.w, r3.y +mul.f r2.z, r3.z, r3.y +mul.f r1.x, c9.y, r1.x +mad.f32 r0.y, r1.y, r2.x, r0.y +mul.f r1.y, r4.y, c3.y +mul.f r2.x, r4.x, c3.x +add.f r1.x, r3.x, (neg)r1.x +sam (f32)(xyzw)r2.w, r0.z, s#2, t#2 +(sy)(ss)mad.f32 r0.z, c6.x, r3.z, c9.w +mul.f r0.w, r3.y, c5.z +mul.f r3.x, r3.x, c5.y +mad.f32 r0.x, r1.x, r0.x, r0.y (rpt1)nop +mul.f r0.y, r2.w, c5.x max.f r0.x, (neg)r0.x, c9.z -(rpt2)nop -mov.f32f32 r0.x, r0.x (rpt5)nop log2 r0.x, r0.x -(ss)mul.f r0.x, r0.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x +(ss)mul.f r0.x, r0.z, r0.x (rpt5)nop exp2 r0.x, r0.x -(ss)mad.f32 r0.y, r2.x, r0.x, r0.z -mad.f32 r0.z, r1.y, r0.x, r1.x -(ss)mad.f32 r0.x, r1.z, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mad.f32 r0.y, c8.z, r0.y, r4.x -mad.f32 r0.z, c8.y, r0.z, r3.w -mad.f32 r0.x, c8.x, r0.x, r5.x +(ss)mad.f32 r0.z, r0.w, r0.x, r2.y +mad.f32 r0.w, r3.x, r0.x, r2.z +(ss)mad.f32 r0.x, r0.y, r0.x, r1.z nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +mad.f32 r1.z, c8.z, r0.z, r3.w +mad.f32 r1.y, c8.y, r0.w, r1.y +mad.f32 r1.x, c8.x, r0.x, r2.x end ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) -; FRAG: 167 instructions, 0 half, 7 full +; FRAG: 111 instructions, 0 half, 6 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-39.asm b/reference/xonotic-gl2/xonotic-glx-gl2-39.asm index 9d9cf0a..a1e4514 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-39.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-39.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r3.w) in0 -@in(r4.x) in1 -@in(r4.y) in2 -@in(r4.z) in3 -@in(r4.w) in4 -@in(r5.x) in5 -@in(r5.y) in6 -@in(r5.z) in7 -@in(r0.x) in8 -@in(r0.y) in9 -@in(r0.z) in10 -@in(r0.w) in12 -@in(r1.x) in13 -@in(r1.y) in14 -@in(r1.z) in16 -@in(r1.w) in17 -@in(r2.x) in18 +@in(r2.w) in0 +@in(r3.x) in1 +@in(r3.y) in2 +@in(r3.z) in3 +@in(r3.w) in4 +@in(r4.x) in5 +@in(r4.y) in6 +@in(r4.z) in7 +@in(r4.w) in8 +@in(r5.x) in9 +@in(r5.y) in10 +@in(r5.z) in12 +@in(r5.w) in13 +@in(r6.x) in14 +@in(r6.y) in16 +@in(r6.z) in17 +@in(r6.w) in18 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -33,99 +33,65 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r2.y, c5.x, r1.z -mul.f r2.z, c5.x, r0.w -mad.f32 r2.y, c5.y, r1.w, r2.y -mad.f32 r2.z, c5.y, r1.x, r2.z -add.f r2.w, c4.x, (neg)r3.w -mul.f r3.x, c5.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mad.f32 r2.y, c5.z, r2.x, r2.y -mad.f32 r2.z, c5.z, r1.y, r2.z -mul.f r1.z, r2.w, r1.z -mad.f32 r3.x, c5.y, r0.y, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -add.f r5.w, c4.y, (neg)r4.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mad.f32 r1.z, r5.w, r1.w, r1.z -mad.f32 r1.w, c5.z, r0.z, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.z, r2.y -mov.f32f32 r3.y, r2.z -add.f r6.x, c4.z, (neg)r4.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r2.w, r0.w -mul.f r2.y, c6.y, r4.w -mad.f32 r1.z, r6.x, r2.x, r1.z -mov.f32f32 r3.x, r1.w -mad.f32 r0.w, r5.w, r1.x, r0.w -mad.f32 r1.x, c7.y, r5.x, r2.y -mov.f32f32 r1.z, r1.z -mad.f32 r1.x, c8.y, r5.y, r1.x -mul.f r1.w, c6.x, r4.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, r6.x, r1.y, r0.w -mad.f32 r1.x, c9.y, r5.z, r1.x -mad.f32 r1.y, c7.x, r5.x, r1.w -mov.f32f32 r2.z, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mad.f32 r1.y, c8.x, r5.y, r1.y -mul.f r0.x, r2.w, r0.x -mov.f32f32 r2.y, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r1.x, c9.x, r5.z, r1.y -mad.f32 r0.x, r5.w, r0.y, r0.x -mul.f r0.y, c0.w, r3.w -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r4.x, r0.y -mul.f r1.z, c0.z, r3.w -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, r6.x, r0.z, r0.x -(rpt1)nop -mov.f32f32 r1.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r4.y, r0.y -mad.f32 r0.y, c1.z, r4.x, r1.z -mad.f32 r0.x, c3.w, r4.z, r0.x -mad.f32 r0.y, c2.z, r4.y, r0.y -mul.f r0.z, c0.y, r3.w -mul.f r1.z, c0.x, r3.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r4.z, r0.y -mad.f32 r0.y, c1.y, r4.x, r0.z -mad.f32 r1.z, c1.x, r4.x, r1.z -mul.f r1.w, c6.w, r4.w -mov.f32f32 r0.z, r0.x -mad.f32 r0.x, c2.y, r4.y, r0.y -mad.f32 r0.y, c2.x, r4.y, r1.z -mad.f32 r0.x, c3.y, r4.z, r0.x -mad.f32 r1.z, c3.x, r4.z, r0.y -mad.f32 r1.w, c7.w, r5.x, r1.w +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r2.w +mul.f r0.y, c6.y, r3.w +mul.f r0.z, c6.x, r3.w +mul.f r0.w, c0.w, r2.w +mul.f r1.x, r0.x, r6.y +add.f r1.y, c4.y, (neg)r3.x +mul.f r1.z, r0.x, r5.z +mul.f r0.x, r0.x, r4.w +mad.f32 r0.y, c7.y, r4.x, r0.y +mad.f32 r1.x, r1.y, r6.z, r1.x +add.f r1.w, c4.z, (neg)r3.y +mad.f32 r1.z, r1.y, r5.w, r1.z +mad.f32 r0.x, r1.y, r5.x, r0.x +nop +mad.f32 r2.z, r1.w, r6.w, r1.x +mad.f32 r2.y, r1.w, r6.x, r1.z +mad.f32 r2.x, r1.w, r5.y, r0.x +mad.f32 r0.x, c8.y, r4.y, r0.y +mad.f32 r0.y, c7.x, r4.x, r0.z +mad.f32 r1.y, c9.y, r4.z, r0.x +mad.f32 r0.x, c8.x, r4.y, r0.y +mad.f32 r0.y, c1.w, r3.x, r0.w +mad.f32 r1.x, c9.x, r4.z, r0.x +mad.f32 r0.x, c2.w, r3.y, r0.y +mul.f r0.y, c0.z, r2.w +mad.f32 r0.w, c3.w, r3.z, r0.x +mad.f32 r0.x, c1.z, r3.x, r0.y +mul.f r0.y, c0.y, r2.w +mad.f32 r0.x, c2.z, r3.y, r0.x +mad.f32 r0.y, c1.y, r3.x, r0.y +mad.f32 r0.z, c3.z, r3.z, r0.x +mad.f32 r0.x, c2.y, r3.y, r0.y +mul.f r1.z, c0.x, r2.w +mad.f32 r0.y, c3.y, r3.z, r0.x +mad.f32 r0.x, c1.x, r3.x, r1.z +mul.f r1.z, c6.w, r3.w +mad.f32 r0.x, c2.x, r3.y, r0.x +mad.f32 r1.z, c7.w, r4.x, r1.z +mad.f32 r0.x, c3.x, r3.z, r0.x +mad.f32 r3.w, c8.w, r4.y, r1.z +mul.f r1.z, c5.x, r6.y +mul.f r1.w, c5.x, r5.z +mad.f32 r1.z, c5.y, r6.z, r1.z +mad.f32 r1.w, c5.y, r5.w, r1.w +mad.f32 r3.z, c5.z, r6.w, r1.z +mad.f32 r3.y, c5.z, r6.x, r1.w +mul.f r1.z, c5.x, r4.w mov.f32f32 r2.w, c10.x -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z -mad.f32 r1.z, c8.w, r5.y, r1.w -mov.f32f32 r2.w, r2.w +mad.f32 r1.z, c5.y, r5.x, r1.z mov.f32f32 r1.w, c10.x -mov.f32f32 r4.x, c10.x -mov.f32f32 r3.w, r1.z -nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r4.x +mad.f32 r3.x, c5.z, r5.y, r1.z +mov.f32f32 r1.z, c10.x end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) -; VERT: inputs: r3.w (0:0,cm=f,il=8,b=0) r4.w (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r0.w (0:0,cm=7,il=20,b=0) r1.z (0:0,cm=7,il=24,b=0) -; VERT: 92 instructions, 0 half, 7 full +; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r4.w (0:0,cm=7,il=16,b=0) r5.z (0:0,cm=7,il=20,b=0) r6.y (0:0,cm=7,il=24,b=0) +; VERT: 53 instructions, 0 half, 7 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-40.asm b/reference/xonotic-gl2/xonotic-glx-gl2-40.asm index 9b18924..ac67816 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-40.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-40.asm @@ -6,195 +6,133 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c15.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000 +@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x bary.f r1.x, 8, r0.x -bary.f r1.y, 1, r0.x +bary.f r1.y, 4, r0.x +bary.f r0.w, 1, r0.x mov.f32f32 r1.z, r0.z -mov.f32f32 r1.w, r0.z -mul.f r2.x, r0.w, r0.w -bary.f r2.y, 5, r0.x -mov.f32f32 r2.z, r1.z -mov.f32f32 r1.z, r1.y -mov.f32f32 r3.x, r1.w -mov.f32f32 r1.w, r1.y +mul.f r2.x, r1.x, r1.x +bary.f r2.y, 9, r0.x +mov.f32f32 r1.w, r0.w +mul.f r2.z, r1.y, r1.y +bary.f r2.w, 5, r0.x mad.f32 r2.x, r2.y, r2.y, r2.x -mov.f32f32 r2.w, r1.z -mul.f r1.z, r1.x, r1.x -bary.f r3.z, 9, r0.x -mov.f32f32 r3.y, r1.w -mov.f32f32 r1.w, r2.x -bary.f r2.x, 6, r0.x -mad.f32 r1.z, r3.z, r3.z, r1.z -sam (f32)(xyz)r3.w, r2.z, s#0, t#0 -(sy)(ss)add.f r2.z, r3.w, c15.x -mov.f32f32 r2.w, r0.z -mad.f32 r1.w, r2.x, r2.x, r1.w -sam (f32)(xyz)r4.z, r3.x, s#4, t#4 -(sy)(ss)mul.f r3.x, r5.x, c6.z -mov.f32f32 r2.z, r2.z -mul.f r3.y, r4.w, c6.y -mul.f r3.w, r4.z, c6.x -mov.f32f32 r1.z, r1.z -mul.f r4.z, r2.z, r2.z -add.f r4.x, r4.x, c15.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.w, r0.z -mov.f32f32 r3.y, r3.y -mov.f32f32 r4.x, r4.x -mov.f32f32 r3.w, r3.w -mov.f32f32 r4.w, r4.w +bary.f r3.x, 10, r0.x +mov.f32f32 r3.y, r0.z +mov.f32f32 r3.z, r0.w +sam (f32)(xyz)r3.w, r1.z, s#0, t#0 +(sy)(ss)add.f r1.z, r3.w, c15.x +mad.f32 r1.w, r3.x, r3.x, r2.x +mad.f32 r2.x, r2.w, r2.w, r2.z +bary.f (ei)r0.x, 6, r0.x +mov.f32f32 r0.y, r1.z +add.f r2.z, r4.x, c15.x +add.f r3.w, r4.y, c15.x +sam (f32)(xyz)r4.x, r3.y, s#4, t#4 +mad.f32 r2.x, r0.x, r0.x, r2.x +mul.f r1.z, r1.z, r0.y +(ss)mov.f32f32 r3.y, r2.z rsq r1.w, r1.w -mov.f32f32 r5.x, r1.y -mad.f32 r4.z, r4.x, r4.x, r4.z -(ss)mov.f32f32 r1.w, r1.w -bary.f (ei)r0.x, 10, r0.x -mov.f32f32 r5.x, r5.x -mov.f32f32 r0.y, r4.z -add.f r4.y, r4.y, c15.x -mul.f r0.w, r0.w, r1.w -mul.f r2.y, r2.y, r1.w -mul.f r1.w, r2.x, r1.w -mov.f32f32 r2.x, r4.y -sam (f32)(xyz)r4.y, r4.w, s#5, t#5 -(sy)mad.f32 r3.x, c7.z, r4.w, r3.x -mad.f32 r3.y, c7.y, r4.z, r3.y -mad.f32 r3.w, c7.x, r4.y, r3.w -mad.f32 r0.y, r2.x, r2.x, r0.y -mov.f32f32 r3.x, r3.x -mov.f32f32 r4.y, r2.w -mov.f32f32 r2.w, r3.y -mov.f32f32 r3.y, r1.y -mov.f32f32 r3.w, r3.w -mov.f32f32 r0.w, r0.w -rsq r0.y, r0.y -(ss)mov.f32f32 r0.y, r0.y -mov.f32f32 r4.z, r3.y -mov.f32f32 r2.y, r2.y -mov.f32f32 r3.y, r1.w -mul.f r1.w, r2.z, r0.y -mul.f r2.z, r4.x, r0.y -mul.f r0.y, r2.x, r0.y -nop -mov.f32f32 r2.x, r1.w -mad.f32 r1.z, r0.x, r0.x, r1.z -mov.f32f32 r2.z, r2.z -mov.f32f32 r0.y, r0.y -sam (f32)(xyzw)r4.x, r4.y, s#1, t#1 -(sy)add.f r1.w, r4.y, r2.w -add.f r2.w, r4.z, r3.x -add.f r3.x, r4.x, r3.w -mul.f r3.w, r4.w, c13.x +(ss)mov.f32f32 r3.z, r1.w +(ss)mul.f r1.w, r3.x, r1.w +(sy)mul.f r3.x, r4.z, c6.z +mad.f32 r1.z, r2.z, r3.y, r1.z +mov.f32f32 r2.z, r3.w +mul.f r1.x, r1.x, r3.z +mul.f r2.y, r2.y, r3.z +rsq r2.x, r2.x +(ss)mov.f32f32 r3.z, r2.x +mad.f32 r1.z, r2.z, r2.z, r1.z +mov.f32f32 r2.z, r1.x +mov.f32f32 r4.z, r2.y +mul.f r1.y, r1.y, r3.z +mul.f r2.w, r2.w, r3.z +mov.f32f32 r3.z, r1.w +mov.f32f32 r4.w, r0.z rsq r1.z, r1.z -(ss)mov.f32f32 r1.z, r1.z -mul.f r4.x, r2.w, c8.z -mul.f r2.w, r2.w, c9.z -mul.f r4.y, r1.w, c8.y -mul.f r1.x, r1.x, r1.z -mul.f r4.z, r1.w, c9.y -mul.f r4.w, r3.x, c8.x -mul.f r3.x, r3.x, c9.x -mov.f32f32 r1.x, r1.x -mul.f r1.w, r3.z, r1.z -mul.f r0.x, r0.x, r1.z -mov.f32f32 r1.z, r3.w -mul.f r3.z, r2.x, r1.x -mov.f32f32 r3.w, r1.w -mul.f r1.w, r2.x, r1.x -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.z, r1.z -mad.f32 r3.z, r2.z, r3.w, r3.z -mad.f32 r1.w, r2.z, r3.w, r1.w -mov.f32f32 r5.x, r0.z -mov.f32f32 r0.z, r0.z -mov.f32f32 r3.z, r3.z -mov.f32f32 r1.w, r1.w -mad.f32 r3.z, r0.y, r0.x, r3.z -mad.f32 r5.y, r0.y, r0.x, r1.w -mov.f32f32 r1.w, r1.z -mov.f32f32 r5.z, r5.x -mul.f r1.z, r3.z, r2.x -mul.f r2.x, r3.z, r2.z -mul.f r0.y, r3.z, r0.y -mov.f32f32 r2.z, r5.y -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x +(ss)mov.f32f32 r5.y, r1.z +(ss)mul.f r1.z, r3.w, r1.z +mov.f32f32 r5.x, r0.w +mul.f r3.w, r4.y, c6.y +mul.f r0.y, r0.y, r5.y +mul.f r3.y, r3.y, r5.y +mov.f32f32 r4.y, r1.z +mul.f r4.x, r4.x, c6.x +mul.f r1.x, r0.y, r1.x mov.f32f32 r0.y, r0.y -max.f r2.z, r2.z, c15.z -mul.f r1.z, c15.y, r1.z -mul.f r2.x, c15.y, r2.x +mad.f32 r1.x, r3.y, r2.y, r1.x +mov.f32f32 r2.y, r3.y +mad.f32 r1.x, r1.z, r3.z, r1.x +mul.f r1.z, r0.y, r2.z +sam (f32)(xyz)r4.w, r4.w, s#5, t#5 +(sy)mad.f32 r3.x, c7.z, r5.y, r3.x +mov.f32f32 r5.y, r0.z +mul.f r0.y, r1.x, r0.y +mad.f32 r1.z, r2.y, r4.z, r1.z +mul.f r2.y, r1.x, r2.y +mul.f r1.x, r1.x, r4.y mul.f r0.y, c15.y, r0.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r2.x, r2.x -mov.f32f32 r0.y, r0.y -mul.f r2.w, r2.w, r2.z -add.f r1.x, r1.x, (neg)r1.z -add.f r1.z, r3.w, (neg)r2.x -add.f r0.x, r0.x, (neg)r0.y -mov.f32f32 r0.y, r2.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.z, r1.z -mov.f32f32 r0.x, r0.x -mul.f r2.x, r4.z, r2.z -mul.f r0.w, r1.x, r0.w -mul.f r1.x, r3.x, r2.z -mad.f32 r0.w, r1.z, r2.y, r0.w -mov.f32f32 r1.z, r2.x -mov.f32f32 r2.x, r1.y -mov.f32f32 r2.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r0.x, r0.x, r3.y, r0.z -mov.f32f32 r5.w, r2.x -mov.f32f32 r0.z, r1.y -nop -mov.f32f32 r0.x, r0.x +mad.f32 r1.z, r4.y, r3.z, r1.z +mul.f r2.y, c15.y, r2.y +mul.f r1.x, c15.y, r1.x +add.f r0.y, r2.z, (neg)r0.y +max.f r1.z, r1.z, c15.z +add.f r2.y, r4.z, (neg)r2.y +mad.f32 r2.z, c7.x, r4.w, r4.x +mul.f r0.y, r0.y, r1.y +mov.f32f32 r1.y, r1.z +mad.f32 r0.y, r2.y, r2.w, r0.y +add.f r1.x, r1.w, (neg)r1.x +mul.f r0.x, r0.x, r2.x +mov.f32f32 r5.z, r0.w +mad.f32 r1.w, c7.y, r5.x, r3.w +mov.f32f32 r2.x, r0.z +mad.f32 r0.x, r1.x, r0.x, r0.y +mov.f32f32 r2.y, r0.w +sam (f32)(xyz)r0.y, r0.z, s#3, t#3 (rpt1)nop -mov.f32f32 r2.z, r0.z max.f r0.x, (neg)r0.x, c15.z -sam (f32)(xyzw)r2.w, r5.z, s#2, t#2 -(sy)mul.f r0.z, r3.y, c10.z -mul.f r1.x, r3.x, c10.y -mad.f32 r1.y, c11.x, r3.z, c15.w -mov.f32f32 r0.x, r0.x -(rpt3)nop -mov.f32f32 r1.y, r1.y -mul.f r2.x, r2.w, c10.x +sam (f32)(xyzw)r3.y, r5.y, s#1, t#1 +(sy)add.f r1.x, r3.w, r3.x +add.f r2.w, r3.z, r1.w +add.f r2.z, r3.y, r2.z +sam (f32)(xyzw)r3.x, r2.x, s#2, t#2 +mul.f r1.w, r4.x, c13.x +(ss)mul.f r2.x, r1.x, c9.z +mul.f r2.y, r2.w, c9.y log2 r0.x, r0.x -sam (f32)(xyz)r2.y, r2.y, s#3, t#3 -(rpt1)nop -(ss)mul.f r0.x, r1.y, r0.x -(rpt2)nop -mov.f32f32 r0.x, r0.x -(rpt5)nop +(sy)mad.f32 r3.w, c11.x, r3.w, c15.w +mul.f r4.x, r2.z, c9.x +mul.f r1.x, r1.x, c8.z +mul.f r2.w, r2.w, c8.y +(ss)mul.f r0.x, r3.w, r0.x +mul.f r2.x, r2.x, r1.y +mul.f r1.y, r2.y, r1.y +mul.f r1.z, r4.x, r1.z +mul.f r2.y, r2.z, c8.x +mul.f r2.z, r3.z, c10.z +mul.f r3.y, r3.y, c10.y exp2 r0.x, r0.x -(ss)mad.f32 r0.y, r0.z, r0.x, r0.y -mad.f32 r0.z, r1.x, r0.x, r1.z -(ss)mad.f32 r0.x, r2.x, r0.x, r0.w -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x +mul.f r3.x, r3.x, c10.x nop -mad.f32 r0.y, c14.z, r0.y, r4.x -mad.f32 r0.z, c14.y, r0.z, r4.y -(sy)mad.f32 r0.y, c12.z, r2.w, r0.y -mad.f32 r0.z, c12.y, r2.z, r0.z -mad.f32 r0.x, c14.x, r0.x, r4.w +(ss)mad.f32 r2.x, r2.z, r0.x, r2.x +mad.f32 r1.y, r3.y, r0.x, r1.y +(ss)mad.f32 r0.x, r3.x, r0.x, r1.z nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mad.f32 r0.x, c12.x, r2.y, r0.x +mad.f32 r1.x, c14.z, r2.x, r1.x +mad.f32 r1.y, c14.y, r1.y, r2.w +mad.f32 r1.z, c12.z, r0.w, r1.x +mad.f32 r1.y, c12.y, r0.z, r1.y +mad.f32 r0.x, c14.x, r0.x, r2.y nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r0.x, r0.x -(rpt2)nop -mov.f32f32 r1.x, r0.x +mad.f32 r1.x, c12.x, r0.y, r0.x end nop +nop +nop ; FRAG: outputs: r1.x (1:0) ; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) -; FRAG: 201 instructions, 0 half, 6 full +; FRAG: 122 instructions, 0 half, 6 full diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-41.asm b/reference/xonotic-gl2/xonotic-glx-gl2-41.asm index 9d9cf0a..a1e4514 100644 --- a/reference/xonotic-gl2/xonotic-glx-gl2-41.asm +++ b/reference/xonotic-gl2/xonotic-glx-gl2-41.asm @@ -1,22 +1,22 @@ ; options: ; VERT: new compiler -@in(r3.w) in0 -@in(r4.x) in1 -@in(r4.y) in2 -@in(r4.z) in3 -@in(r4.w) in4 -@in(r5.x) in5 -@in(r5.y) in6 -@in(r5.z) in7 -@in(r0.x) in8 -@in(r0.y) in9 -@in(r0.z) in10 -@in(r0.w) in12 -@in(r1.x) in13 -@in(r1.y) in14 -@in(r1.z) in16 -@in(r1.w) in17 -@in(r2.x) in18 +@in(r2.w) in0 +@in(r3.x) in1 +@in(r3.y) in2 +@in(r3.z) in3 +@in(r3.w) in4 +@in(r4.x) in5 +@in(r4.y) in6 +@in(r4.z) in7 +@in(r4.w) in8 +@in(r5.x) in9 +@in(r5.y) in10 +@in(r5.z) in12 +@in(r5.w) in13 +@in(r6.x) in14 +@in(r6.y) in16 +@in(r6.z) in17 +@in(r6.w) in18 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -33,99 +33,65 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mul.f r2.y, c5.x, r1.z -mul.f r2.z, c5.x, r0.w -mad.f32 r2.y, c5.y, r1.w, r2.y -mad.f32 r2.z, c5.y, r1.x, r2.z -add.f r2.w, c4.x, (neg)r3.w -mul.f r3.x, c5.x, r0.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mad.f32 r2.y, c5.z, r2.x, r2.y -mad.f32 r2.z, c5.z, r1.y, r2.z -mul.f r1.z, r2.w, r1.z -mad.f32 r3.x, c5.y, r0.y, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -add.f r5.w, c4.y, (neg)r4.x -mov.f32f32 r3.x, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mad.f32 r1.z, r5.w, r1.w, r1.z -mad.f32 r1.w, c5.z, r0.z, r3.x -mov.f32f32 r2.y, r2.y -mov.f32f32 r2.z, r2.z -mov.f32f32 r1.z, r1.z -mov.f32f32 r1.w, r1.w -mov.f32f32 r3.z, r2.y -mov.f32f32 r3.y, r2.z -add.f r6.x, c4.z, (neg)r4.y -mov.f32f32 r1.w, r1.w -mul.f r0.w, r2.w, r0.w -mul.f r2.y, c6.y, r4.w -mad.f32 r1.z, r6.x, r2.x, r1.z -mov.f32f32 r3.x, r1.w -mad.f32 r0.w, r5.w, r1.x, r0.w -mad.f32 r1.x, c7.y, r5.x, r2.y -mov.f32f32 r1.z, r1.z -mad.f32 r1.x, c8.y, r5.y, r1.x -mul.f r1.w, c6.x, r4.w -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.z, r1.z -mad.f32 r0.w, r6.x, r1.y, r0.w -mad.f32 r1.x, c9.y, r5.z, r1.x -mad.f32 r1.y, c7.x, r5.x, r1.w -mov.f32f32 r2.z, r1.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mad.f32 r1.y, c8.x, r5.y, r1.y -mul.f r0.x, r2.w, r0.x -mov.f32f32 r2.y, r0.w -mov.f32f32 r0.w, r1.x -mad.f32 r1.x, c9.x, r5.z, r1.y -mad.f32 r0.x, r5.w, r0.y, r0.x -mul.f r0.y, c0.w, r3.w -mov.f32f32 r1.y, r0.w -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.x, r0.x -mad.f32 r0.y, c1.w, r4.x, r0.y -mul.f r1.z, c0.z, r3.w -mov.f32f32 r0.w, r0.w -mad.f32 r0.x, r6.x, r0.z, r0.x -(rpt1)nop -mov.f32f32 r1.x, r0.w -mov.f32f32 r2.x, r0.x -mad.f32 r0.x, c2.w, r4.y, r0.y -mad.f32 r0.y, c1.z, r4.x, r1.z -mad.f32 r0.x, c3.w, r4.z, r0.x -mad.f32 r0.y, c2.z, r4.y, r0.y -mul.f r0.z, c0.y, r3.w -mul.f r1.z, c0.x, r3.w -mov.f32f32 r0.w, r0.x -mad.f32 r0.x, c3.z, r4.z, r0.y -mad.f32 r0.y, c1.y, r4.x, r0.z -mad.f32 r1.z, c1.x, r4.x, r1.z -mul.f r1.w, c6.w, r4.w -mov.f32f32 r0.z, r0.x -mad.f32 r0.x, c2.y, r4.y, r0.y -mad.f32 r0.y, c2.x, r4.y, r1.z -mad.f32 r0.x, c3.y, r4.z, r0.x -mad.f32 r1.z, c3.x, r4.z, r0.y -mad.f32 r1.w, c7.w, r5.x, r1.w +@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 +@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000 +(sy)(ss)add.f r0.x, c4.x, (neg)r2.w +mul.f r0.y, c6.y, r3.w +mul.f r0.z, c6.x, r3.w +mul.f r0.w, c0.w, r2.w +mul.f r1.x, r0.x, r6.y +add.f r1.y, c4.y, (neg)r3.x +mul.f r1.z, r0.x, r5.z +mul.f r0.x, r0.x, r4.w +mad.f32 r0.y, c7.y, r4.x, r0.y +mad.f32 r1.x, r1.y, r6.z, r1.x +add.f r1.w, c4.z, (neg)r3.y +mad.f32 r1.z, r1.y, r5.w, r1.z +mad.f32 r0.x, r1.y, r5.x, r0.x +nop +mad.f32 r2.z, r1.w, r6.w, r1.x +mad.f32 r2.y, r1.w, r6.x, r1.z +mad.f32 r2.x, r1.w, r5.y, r0.x +mad.f32 r0.x, c8.y, r4.y, r0.y +mad.f32 r0.y, c7.x, r4.x, r0.z +mad.f32 r1.y, c9.y, r4.z, r0.x +mad.f32 r0.x, c8.x, r4.y, r0.y +mad.f32 r0.y, c1.w, r3.x, r0.w +mad.f32 r1.x, c9.x, r4.z, r0.x +mad.f32 r0.x, c2.w, r3.y, r0.y +mul.f r0.y, c0.z, r2.w +mad.f32 r0.w, c3.w, r3.z, r0.x +mad.f32 r0.x, c1.z, r3.x, r0.y +mul.f r0.y, c0.y, r2.w +mad.f32 r0.x, c2.z, r3.y, r0.x +mad.f32 r0.y, c1.y, r3.x, r0.y +mad.f32 r0.z, c3.z, r3.z, r0.x +mad.f32 r0.x, c2.y, r3.y, r0.y +mul.f r1.z, c0.x, r2.w +mad.f32 r0.y, c3.y, r3.z, r0.x +mad.f32 r0.x, c1.x, r3.x, r1.z +mul.f r1.z, c6.w, r3.w +mad.f32 r0.x, c2.x, r3.y, r0.x +mad.f32 r1.z, c7.w, r4.x, r1.z +mad.f32 r0.x, c3.x, r3.z, r0.x +mad.f32 r3.w, c8.w, r4.y, r1.z +mul.f r1.z, c5.x, r6.y +mul.f r1.w, c5.x, r5.z +mad.f32 r1.z, c5.y, r6.z, r1.z +mad.f32 r1.w, c5.y, r5.w, r1.w +mad.f32 r3.z, c5.z, r6.w, r1.z +mad.f32 r3.y, c5.z, r6.x, r1.w +mul.f r1.z, c5.x, r4.w mov.f32f32 r2.w, c10.x -mov.f32f32 r0.y, r0.x -mov.f32f32 r0.x, r1.z -mad.f32 r1.z, c8.w, r5.y, r1.w -mov.f32f32 r2.w, r2.w +mad.f32 r1.z, c5.y, r5.x, r1.z mov.f32f32 r1.w, c10.x -mov.f32f32 r4.x, c10.x -mov.f32f32 r3.w, r1.z -nop -mov.f32f32 r1.w, r1.w -mov.f32f32 r1.z, r4.x +mad.f32 r3.x, c5.z, r5.y, r1.z +mov.f32f32 r1.z, c10.x end nop +nop +nop ; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) -; VERT: inputs: r3.w (0:0,cm=f,il=8,b=0) r4.w (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r0.w (0:0,cm=7,il=20,b=0) r1.z (0:0,cm=7,il=24,b=0) -; VERT: 92 instructions, 0 half, 7 full +; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r4.w (0:0,cm=7,il=16,b=0) r5.z (0:0,cm=7,il=20,b=0) r6.y (0:0,cm=7,il=24,b=0) +; VERT: 53 instructions, 0 half, 7 full diff --git a/reference/xonotic/xonotic05.asm b/reference/xonotic/xonotic05.asm index 2c03e4f..284e180 100644 --- a/reference/xonotic/xonotic05.asm +++ b/reference/xonotic/xonotic05.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/xonotic/xonotic06.asm b/reference/xonotic/xonotic06.asm index 694d656..59b8c44 100644 --- a/reference/xonotic/xonotic06.asm +++ b/reference/xonotic/xonotic06.asm @@ -6,31 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 3, r0.x -bary.f (ei)r0.x, 1, r0.x -nop -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, r0.w -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.w, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +bary.f r0.w, 1, r0.x +bary.f (ei)r1.x, 3, r0.x (rpt5)nop -sam.p (f32)(xyzw)r0.x, r0.w, s#0, t#0 -(sy)(ss)mul.f r0.w, r0.w, c4.w -mul.f r0.z, r0.z, c4.z -mul.f r0.y, r0.y, c4.y -mul.f r0.x, r0.x, c4.x -mov.f32f32 r1.w, r0.w -mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +sam.p (f32)(xyzw)r0.x, r0.z, s#0, t#0 +(sy)mul.f r1.w, r0.w, c4.w +mul.f r1.z, r0.z, c4.z +mul.f r1.y, r0.y, c4.y +(ss)mul.f r1.x, r0.x, c4.x end nop nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) -; FRAG: 27 instructions, 0 half, 2 full +; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1) +; FRAG: 15 instructions, 0 half, 2 full diff --git a/reference/xonotic/xonotic08.asm b/reference/xonotic/xonotic08.asm index 2c03e4f..284e180 100644 --- a/reference/xonotic/xonotic08.asm +++ b/reference/xonotic/xonotic08.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/xonotic/xonotic09.asm b/reference/xonotic/xonotic09.asm index 4a651b1..16ecc3c 100644 --- a/reference/xonotic/xonotic09.asm +++ b/reference/xonotic/xonotic09.asm @@ -6,31 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 7, r0.x -bary.f r1.x, 5, r0.x +bary.f r0.w, 5, r0.x +bary.f r1.x, 7, r0.x bary.f r1.y, 3, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x bary.f r1.z, 2, r0.x -mov.f32f32 r1.w, r0.z -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.x, r1.x -bary.f r0.z, 1, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt3)nop -sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0 -(sy)mul.f r0.y, r2.z, r1.y -mul.f r0.w, r2.y, r1.z -mul.f r0.z, r2.x, r0.z -mul.f r0.x, r1.w, r0.x -(ss)mov.f32f32 r1.w, r0.y -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.w, r1.x, r1.y +mul.f r1.z, r0.w, r1.z +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1) -; FRAG: 27 instructions, 0 half, 3 full +; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 15 instructions, 0 half, 3 full diff --git a/reference/xonotic/xonotic10.asm b/reference/xonotic/xonotic10.asm index cca09e5..0583b5d 100644 --- a/reference/xonotic/xonotic10.asm +++ b/reference/xonotic/xonotic10.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r3.x) in4 +@in(r3.y) in5 +@in(r3.z) in6 +@in(r3.w) in7 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,43 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r3.w, c4.x +max.f r1.y, r3.z, c4.x +max.f r3.y, r3.y, c4.x +max.f r3.x, r3.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r3.y, c4.y +min.f r1.x, r3.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 33 instructions, 0 half, 4 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 25 instructions, 0 half, 4 full diff --git a/reference/xonotic/xonotic11.asm b/reference/xonotic/xonotic11.asm index 2c03e4f..284e180 100644 --- a/reference/xonotic/xonotic11.asm +++ b/reference/xonotic/xonotic11.asm @@ -1,13 +1,13 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r2.x) in4 +@in(r2.y) in5 +@in(r2.z) in6 +@in(r2.w) in7 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -16,39 +16,36 @@ @out(r1.y) out5 @out(r1.z) out6 @out(r1.w) out7 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r2.w, c4.x +max.f r1.y, r2.z, c4.x +max.f r2.y, r2.y, c4.x +max.f r2.x, r2.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r2.y, c4.y +min.f r1.x, r2.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) -; VERT: 29 instructions, 0 half, 3 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0) +; VERT: 25 instructions, 0 half, 3 full diff --git a/reference/xonotic/xonotic14.asm b/reference/xonotic/xonotic14.asm index ed21067..1948189 100644 --- a/reference/xonotic/xonotic14.asm +++ b/reference/xonotic/xonotic14.asm @@ -6,67 +6,40 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c6.x) 0x40800000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 4, r0.x +bary.f r0.w, 1, r0.x bary.f r1.x, 3, r0.x -bary.f r1.y, 1, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r2.y, r0.w -bary.f r0.z, 5, r0.x -mov.f32f32 r1.w, r1.y -mov.f32f32 r2.x, r1.x -bary.f (ei)r0.x, 7, r0.x -mov.f32f32 r0.y, r0.z -mov.f32f32 r0.z, c5.w -(rpt2)nop -sam.p (f32)(xyzw)r0.w, r1.z, s#1, t#1 -(sy)mul.f r1.y, r1.y, c5.z -mul.f r1.x, r1.x, c5.y -mul.f r0.w, r0.w, c5.x -mov.f32f32 r2.z, r0.y -mov.f32f32 r0.y, r1.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r1.x, r1.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r2.w, r0.x -mov.f32f32 r0.x, r0.y -mov.f32f32 r0.y, r1.x -mov.f32f32 r0.w, r0.w -(rpt2)nop -(ss)nop -sam.p (f32)(xyzw)r1.x, r2.y, s#0, t#0 -(sy)mul.f r0.x, r1.z, r0.x -mul.f r0.y, r1.y, r0.y -mul.f r0.w, r1.x, r0.w -mul.f r0.z, r1.w, r0.z -mul.f r0.x, r0.x, c6.x -mul.f r0.y, r0.y, c6.x +bary.f r1.y, 4, r0.x +bary.f r1.z, 5, r0.x +bary.f (ei)r1.w, 7, r0.x +mov.f32f32 r0.x, c5.w +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#1, t#1 +(sy)(ss)mul.f r0.w, r0.w, c5.z +mul.f r0.z, r0.z, c5.y +mul.f r0.y, r0.y, c5.x +sam.p (f32)(xyzw)r1.x, r1.y, s#0, t#0 +(sy)mul.f r0.x, r1.w, r0.x +mul.f r0.w, r1.z, r0.w +mul.f r0.z, r1.y, r0.z +mul.f r0.y, r1.x, r0.y +mul.f r0.x, r0.x, c6.y mul.f r0.w, r0.w, c6.x -mul.f r0.z, r0.z, c6.y +mul.f r0.z, r0.z, c6.x +mul.f r0.y, r0.y, c6.x max.f r0.x, r0.x, c6.z -max.f r0.y, r0.y, c6.z max.f r0.w, r0.w, c6.z max.f r0.z, r0.z, c6.z -min.f r0.x, r0.x, c6.y -min.f r0.y, r0.y, c6.y -min.f r0.w, r0.w, c6.y -nop -mov.f32f32 r1.z, r0.x -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.w -min.f r0.x, r0.z, c6.y -(rpt2)nop -mov.f32f32 r1.w, r0.x +max.f r0.y, r0.y, c6.z +(ss)min.f r1.w, r0.x, c6.y +min.f r1.z, r0.w, c6.y +min.f r1.y, r0.z, c6.y +min.f r1.x, r0.y, c6.y end nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1) r63.y (5:1,cm=f,il=12,b=1) -; FRAG: 65 instructions, 0 half, 3 full +; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1) r1.x (5:1,cm=f,il=12,b=1) +; FRAG: 31 instructions, 0 half, 2 full diff --git a/reference/xonotic/xonotic16.asm b/reference/xonotic/xonotic16.asm index 36bb179..5a863b3 100644 --- a/reference/xonotic/xonotic16.asm +++ b/reference/xonotic/xonotic16.asm @@ -6,43 +6,20 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 0, r0.x -bary.f r0.w, 3, r0.x -bary.f (ei)r0.x, 1, r0.x -mov.f32f32 r0.y, c4.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.x, r0.x -mov.f32f32 r1.w, r0.y -mov.f32f32 r1.x, r0.z -mov.f32f32 r1.z, r0.w -mov.f32f32 r1.y, r0.x -(rpt5)nop -sam.p (f32)(xyzw)r0.x, r1.x, s#0, t#0 -(sy)mul.f r0.z, r0.z, c4.z -mul.f r0.y, r0.y, c4.y -mul.f r0.x, r0.x, c4.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -nop -(ss)mov.f32f32 r1.z, r0.z -mov.f32f32 r1.y, r0.y -mov.f32f32 r1.x, r0.x +bary.f r0.w, 1, r0.x +bary.f (ei)r1.x, 3, r0.x +mov.f32f32 r1.w, c4.w +(rpt4)nop +sam.p (f32)(xyzw)r0.x, r0.z, s#0, t#0 +(sy)mul.f r1.z, r0.z, c4.z +mul.f r1.y, r0.y, c4.y +(ss)mul.f r1.x, r0.x, c4.x end nop nop -nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) -; FRAG: 38 instructions, 0 half, 2 full +; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1) +; FRAG: 14 instructions, 0 half, 2 full diff --git a/reference/xonotic/xonotic17.asm b/reference/xonotic/xonotic17.asm index e65c627..8afec54 100644 --- a/reference/xonotic/xonotic17.asm +++ b/reference/xonotic/xonotic17.asm @@ -6,67 +6,44 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c11.x) 0x40000000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)mov.f32f32 r0.z, c10.x -bary.f r0.w, 4, r0.x -bary.f r1.x, 7, r0.x -bary.f r1.y, 5, r0.x -mov.f32f32 r1.z, r0.z -mov.f32f32 r0.z, c10.y -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.w, r0.z -mov.f32f32 r0.z, c10.w -mov.f32f32 r2.y, r0.w -mov.f32f32 r2.z, r1.y -mov.f32f32 r2.w, r1.x -mov.f32f32 r2.x, r0.z -bary.f r0.z, 3, r0.x -(rpt1)nop -bary.f r0.w, 2, r0.x -bary.f r1.x, 1, r0.x -sam.p (f32)(xyzw)r2.y, r2.y, s#0, t#0 -mov.f32f32 r0.z, r0.z -sam.p (f32)(xyzw)r1.y, r1.z, s#1, t#1 +mov.f32f32 r0.w, c10.y +mov.f32f32 r1.x, c10.w +bary.f r1.y, 4, r0.x +bary.f r1.z, 5, r0.x +bary.f r1.w, 7, r0.x +bary.f r2.x, 3, r0.x +bary.f r2.y, 2, r0.x +bary.f r2.z, 1, r0.x +sam.p (f32)(xyzw)r2.w, r0.z, s#1, t#1 bary.f (ei)r0.x, 0, r0.x -(sy)mul.f r0.y, r1.w, r0.w -mul.f r0.w, r1.z, r1.x -mul.f r0.z, r3.x, r0.z -mul.f r0.x, r1.y, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -mul.f r0.z, r0.z, c11.y -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -max.f r0.z, r0.z, c11.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.w, r0.w -min.f r0.z, r0.z, c11.y -mov.f32f32 r0.x, r0.x -mul.f r0.y, r2.w, r0.y -mul.f r0.w, r2.z, r0.w -(ss)mov.f32f32 r1.w, r0.z -mul.f r0.x, r2.y, r0.x +nop +(sy)mul.f r0.y, r3.y, r2.y +(ss)nop +sam.p (f32)(xyzw)r0.z, r1.y, s#0, t#0 +(ss)mul.f r1.z, r3.x, r2.z +mul.f r0.x, r2.w, r0.x +(sy)mul.f r1.y, r1.y, r2.x +mul.f r0.y, r1.x, r0.y +mul.f r0.w, r0.w, r1.z +mul.f r0.x, r0.z, r0.x +mul.f r0.z, r1.y, c11.y mul.f r0.y, r0.y, c11.x -mul.f r0.z, r0.w, c11.x -(rpt1)nop -max.f r0.y, r0.y, c11.z -max.f r0.z, r0.z, c11.z +mul.f r0.w, r0.w, c11.x mul.f r0.x, r0.x, c11.x -nop -min.f r0.y, r0.y, c11.y -min.f r0.z, r0.z, c11.y +max.f r0.z, r0.z, c11.z +max.f r0.y, r0.y, c11.z +max.f r0.w, r0.w, c11.z max.f r0.x, r0.x, c11.z -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -min.f r0.x, r0.x, c11.y -(rpt2)nop -mov.f32f32 r1.x, r0.x +min.f r1.w, r0.z, c11.y +min.f r1.z, r0.y, c11.y +min.f r1.y, r0.w, c11.y +min.f r1.x, r0.x, c11.y end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r63.w (1:0,cm=f,il=8,b=1) r0.y (5:1,cm=f,il=12,b=1) -; FRAG: 64 instructions, 0 half, 4 full +; FRAG: inputs: r1.y (1:0,cm=f,il=8,b=1) r1.x (5:1,cm=f,il=12,b=1) +; FRAG: 34 instructions, 0 half, 4 full diff --git a/reference/xonotic/xonotic18.asm b/reference/xonotic/xonotic18.asm index 2be832a..04c1964 100644 --- a/reference/xonotic/xonotic18.asm +++ b/reference/xonotic/xonotic18.asm @@ -1,17 +1,17 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r3.x) in8 -@in(r3.y) in9 -@in(r3.z) in10 -@in(r3.w) in11 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r3.x) in4 +@in(r3.y) in5 +@in(r3.z) in6 +@in(r3.w) in7 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -24,43 +24,36 @@ @out(r2.y) out9 @out(r2.z) out10 @out(r2.w) out11 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r2.w, r3.w -mov.f32f32 r2.z, r3.z -mov.f32f32 r2.y, r3.y -mov.f32f32 r2.x, r3.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r3.w, c4.x +max.f r1.y, r3.z, c4.x +max.f r3.y, r3.y, c4.x +max.f r3.x, r3.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r3.y, c4.y +min.f r1.x, r3.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:1) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) -; VERT: 33 instructions, 0 half, 4 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) +; VERT: 25 instructions, 0 half, 4 full diff --git a/reference/xonotic/xonotic20.asm b/reference/xonotic/xonotic20.asm index f8cdc3f..78eedee 100644 --- a/reference/xonotic/xonotic20.asm +++ b/reference/xonotic/xonotic20.asm @@ -6,47 +6,24 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 3, r0.x +bary.f r0.w, 5, r0.x bary.f r1.x, 7, r0.x -bary.f r1.y, 5, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r2.x, r0.z -mov.f32f32 r1.w, r0.w -mov.f32f32 r2.z, r1.x -mov.f32f32 r2.y, r1.y -bary.f r0.z, 2, r0.x -bary.f r0.w, 1, r0.x +bary.f r1.w, 3, r0.x +bary.f r1.y, 2, r0.x +bary.f r2.x, 1, r0.x bary.f (ei)r0.x, 0, r0.x -(rpt2)nop -sam.p (f32)(xyzw)r2.x, r2.x, s#0, t#0 -(sy)mul.f r0.y, r2.z, r0.z -mul.f r0.z, r2.y, r0.w -mul.f r0.x, r2.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x +(rpt1)nop +sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0 +(sy)mul.f r1.z, r0.w, r1.y +mul.f r1.y, r0.z, r2.x +(ss)mul.f r1.x, r0.y, r0.x end nop nop nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) -; FRAG: 39 instructions, 0 half, 3 full +; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) +; FRAG: 14 instructions, 0 half, 3 full diff --git a/reference/xonotic/xonotic21.asm b/reference/xonotic/xonotic21.asm index e5f803f..ca2a36c 100644 --- a/reference/xonotic/xonotic21.asm +++ b/reference/xonotic/xonotic21.asm @@ -6,67 +6,44 @@ @out(r1.y) out1 @out(r1.z) out2 @out(r1.w) out3 +@const(c0.x) 0x40800000, 0x3f800000, 0x00000000, 0x00000000 (sy)(ss)bary.f r0.z, 4, r0.x -bary.f r0.w, 8, r0.x +bary.f r0.w, 5, r0.x bary.f r1.x, 7, r0.x -bary.f r1.y, 5, r0.x -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.w, r0.w -mov.f32f32 r1.x, r1.x -mov.f32f32 r1.y, r1.y -mov.f32f32 r1.z, r0.z -mov.f32f32 r2.y, r0.w -bary.f r0.z, 9, r0.x -mov.f32f32 r1.w, r1.y -mov.f32f32 r2.x, r1.x -bary.f r0.w, 11, r0.x -mov.f32f32 r0.z, r0.z -bary.f r1.x, 3, r0.x -bary.f r1.y, 2, r0.x -bary.f r2.w, 1, r0.x +bary.f r1.y, 8, r0.x +bary.f r1.z, 9, r0.x +bary.f r1.w, 11, r0.x +bary.f r2.x, 3, r0.x +bary.f r2.y, 2, r0.x +bary.f r2.z, 1, r0.x +sam.p (f32)(xyzw)r2.w, r0.z, s#1, t#1 bary.f (ei)r0.x, 0, r0.x -sam.p (f32)(xyzw)r3.x, r1.z, s#1, t#1 -mov.f32f32 r2.z, r0.z -(sy)mul.f r0.y, r3.z, r1.y -mul.f r0.z, r3.y, r2.w -mul.f r0.x, r3.x, r0.x -mov.f32f32 r0.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r2.w, r0.w -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.x, r0.x -mov.f32f32 r0.w, r1.x -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.z, r0.z +nop +(sy)mul.f r0.y, r3.y, r2.y (ss)nop -sam.p (f32)(xyzw)r1.x, r2.y, s#0, t#0 -mov.f32f32 r0.x, r0.x -(sy)mul.f r0.w, r1.w, r0.w -mul.f r0.y, r1.z, r0.y -mul.f r0.z, r1.y, r0.z -mul.f r0.x, r1.x, r0.x -mul.f r0.w, r0.w, c0.y +sam.p (f32)(xyzw)r0.z, r1.y, s#0, t#0 +(ss)mul.f r1.z, r3.x, r2.z +mul.f r0.x, r2.w, r0.x +(sy)mul.f r1.y, r1.y, r2.x +mul.f r0.y, r1.x, r0.y +mul.f r0.w, r0.w, r1.z +mul.f r0.x, r0.z, r0.x +mul.f r0.z, r1.y, c0.y mul.f r0.y, r0.y, c0.x -mul.f r0.z, r0.z, c0.x +mul.f r0.w, r0.w, c0.x mul.f r0.x, r0.x, c0.x -max.f r0.w, r0.w, c0.z -max.f r0.y, r0.y, c0.z max.f r0.z, r0.z, c0.z +max.f r0.y, r0.y, c0.z +max.f r0.w, r0.w, c0.z max.f r0.x, r0.x, c0.z -min.f r0.w, r0.w, c0.y -min.f r0.y, r0.y, c0.y -min.f r0.z, r0.z, c0.y -min.f r0.x, r0.x, c0.y -nop -mov.f32f32 r1.z, r0.y -mov.f32f32 r1.y, r0.z -mov.f32f32 r1.x, r0.x -mov.f32f32 r1.w, r0.w +min.f r1.w, r0.z, c0.y +min.f r1.z, r0.y, c0.y +min.f r1.y, r0.w, c0.y +min.f r1.x, r0.x, c0.y end +nop +nop ; FRAG: outputs: r1.x (1:0) -; FRAG: inputs: r0.y (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) r0.x (5:1,cm=f,il=16,b=1) -; FRAG: 60 instructions, 0 half, 4 full +; FRAG: inputs: r1.y (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) r1.x (5:1,cm=f,il=16,b=1) +; FRAG: 34 instructions, 0 half, 4 full diff --git a/reference/xonotic/xonotic22.asm b/reference/xonotic/xonotic22.asm index 9446af7..c880414 100644 --- a/reference/xonotic/xonotic22.asm +++ b/reference/xonotic/xonotic22.asm @@ -1,21 +1,21 @@ ; options: ; VERT: new compiler -@in(r2.x) in0 -@in(r2.y) in1 -@in(r2.z) in2 -@in(r2.w) in3 -@in(r0.x) in4 -@in(r0.y) in5 -@in(r0.z) in6 -@in(r0.w) in7 -@in(r4.x) in8 -@in(r4.y) in9 -@in(r4.z) in10 -@in(r4.w) in11 -@in(r5.x) in12 -@in(r5.y) in13 -@in(r5.z) in14 -@in(r5.w) in15 +@in(r1.x) in0 +@in(r1.y) in1 +@in(r1.z) in2 +@in(r1.w) in3 +@in(r4.x) in4 +@in(r4.y) in5 +@in(r4.z) in6 +@in(r4.w) in7 +@in(r2.x) in8 +@in(r2.y) in9 +@in(r2.z) in10 +@in(r2.w) in11 +@in(r3.x) in12 +@in(r3.y) in13 +@in(r3.z) in14 +@in(r3.w) in15 @out(r0.x) out0 @out(r0.y) out1 @out(r0.z) out2 @@ -32,47 +32,36 @@ @out(r3.y) out13 @out(r3.z) out14 @out(r3.w) out15 -(sy)(ss)mov.f32f32 r0.w, r0.w -mov.f32f32 r0.z, r0.z -mov.f32f32 r0.y, r0.y -mov.f32f32 r0.x, r0.x -max.f r0.w, r0.w, c4.x -max.f r0.z, r0.z, c4.x -max.f r0.y, r0.y, c4.x -max.f r0.x, r0.x, c4.x -min.f r1.w, r0.w, c4.y -min.f r1.z, r0.z, c4.y -min.f r1.y, r0.y, c4.y -min.f r1.x, r0.x, c4.y -mul.f r0.x, r2.x, c0.w -mul.f r0.y, r2.x, c0.z -mad.f32 r0.x, c1.w, r2.y, r0.x -mad.f32 r0.y, c1.z, r2.y, r0.y -mad.f32 r0.x, c2.w, r2.z, r0.x -mad.f32 r0.y, c2.z, r2.z, r0.y -mad.f32 r0.w, c3.w, r2.w, r0.x -mad.f32 r0.z, c3.z, r2.w, r0.y -mul.f r0.x, r2.x, c0.y -mul.f r0.y, r2.x, c0.x -mad.f32 r0.x, c1.y, r2.y, r0.x -mad.f32 r0.y, c1.x, r2.y, r0.y -mad.f32 r0.x, c2.y, r2.z, r0.x -mad.f32 r2.x, c2.x, r2.z, r0.y -mad.f32 r0.y, c3.y, r2.w, r0.x -mad.f32 r0.x, c3.x, r2.w, r2.x -mov.f32f32 r3.w, r5.w -mov.f32f32 r3.z, r5.z -mov.f32f32 r3.y, r5.y -mov.f32f32 r3.x, r5.x -mov.f32f32 r2.w, r4.w -mov.f32f32 r2.z, r4.z -mov.f32f32 r2.y, r4.y -mov.f32f32 r2.x, r4.x +@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000 +(sy)(ss)mul.f r0.x, r1.x, c0.w +mul.f r0.y, r1.x, c0.z +mad.f32 r0.x, c1.w, r1.y, r0.x +mad.f32 r0.y, c1.z, r1.y, r0.y +mad.f32 r0.x, c2.w, r1.z, r0.x +mad.f32 r0.y, c2.z, r1.z, r0.y +mad.f32 r0.w, c3.w, r1.w, r0.x +mad.f32 r0.z, c3.z, r1.w, r0.y +mul.f r0.x, r1.x, c0.y +mul.f r0.y, r1.x, c0.x +mad.f32 r0.x, c1.y, r1.y, r0.x +mad.f32 r0.y, c1.x, r1.y, r0.y +mad.f32 r0.x, c2.y, r1.z, r0.x +mad.f32 r1.x, c2.x, r1.z, r0.y +mad.f32 r0.y, c3.y, r1.w, r0.x +mad.f32 r0.x, c3.x, r1.w, r1.x +max.f r1.x, r4.w, c4.x +max.f r1.y, r4.z, c4.x +max.f r4.y, r4.y, c4.x +max.f r4.x, r4.x, c4.x +min.f r1.w, r1.x, c4.y +min.f r1.z, r1.y, c4.y +min.f r1.y, r4.y, c4.y +min.f r1.x, r4.x, c4.y end nop nop nop ; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1) -; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r4.x (0:0,cm=f,il=16,b=0) r5.x (0:0,cm=f,il=20,b=0) -; VERT: 37 instructions, 0 half, 6 full +; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r4.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) +; VERT: 25 instructions, 0 half, 5 full -- cgit v1.2.3