summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Clark <robdclark@gmail.com>2015-04-01 12:06:24 -0400
committerRob Clark <robdclark@gmail.com>2015-04-01 15:23:49 -0400
commit954092869f4c4224c999193ea1a37c2817063d97 (patch)
tree4ce0b952bfd0206743fd7e0f5fe42fd4b199a8c4
parented394f8454ead6e8bdc5dcf8b1e34f5f1a05cc21 (diff)
update (passing) reference shaders w/ @const
for passing shaders, where reference shader did not contain @const, copy the output shader over. $ir3compiler $args $f >& $out ./ir3test $ref $out res=$? if [ $res = 0 ]; then pass=$((pass + 1)) + grep @const $out > /dev/null + outconst=$? + grep @const $ref > /dev/null + refconst=$? + if [ $outconst = 0 ] && [ $refconst != 0 ]; then + cp $out $ref + fi else
-rw-r--r--reference/0ad-alpine-valley/0ad-100.asm282
-rw-r--r--reference/0ad-alpine-valley/0ad-101.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-102.asm333
-rw-r--r--reference/0ad-alpine-valley/0ad-103.asm282
-rw-r--r--reference/0ad-alpine-valley/0ad-104.asm333
-rw-r--r--reference/0ad-alpine-valley/0ad-105.asm83
-rw-r--r--reference/0ad-alpine-valley/0ad-106.asm57
-rw-r--r--reference/0ad-alpine-valley/0ad-107.asm83
-rw-r--r--reference/0ad-alpine-valley/0ad-115.asm48
-rw-r--r--reference/0ad-alpine-valley/0ad-118.asm48
-rw-r--r--reference/0ad-alpine-valley/0ad-121.asm48
-rw-r--r--reference/0ad-alpine-valley/0ad-124.asm48
-rw-r--r--reference/0ad-alpine-valley/0ad-127.asm48
-rw-r--r--reference/0ad-alpine-valley/0ad-130.asm48
-rw-r--r--reference/0ad-alpine-valley/0ad-133.asm48
-rw-r--r--reference/0ad-alpine-valley/0ad-136.asm19
-rw-r--r--reference/0ad-alpine-valley/0ad-139.asm19
-rw-r--r--reference/0ad-alpine-valley/0ad-142.asm31
-rw-r--r--reference/0ad-alpine-valley/0ad-145.asm19
-rw-r--r--reference/0ad-alpine-valley/0ad-148.asm29
-rw-r--r--reference/0ad-alpine-valley/0ad-151.asm21
-rw-r--r--reference/0ad-alpine-valley/0ad-34.asm31
-rw-r--r--reference/0ad-alpine-valley/0ad-36.asm231
-rw-r--r--reference/0ad-alpine-valley/0ad-37.asm31
-rw-r--r--reference/0ad-alpine-valley/0ad-38.asm231
-rw-r--r--reference/0ad-alpine-valley/0ad-40.asm71
-rw-r--r--reference/0ad-alpine-valley/0ad-46.asm276
-rw-r--r--reference/0ad-alpine-valley/0ad-49.asm401
-rw-r--r--reference/0ad-alpine-valley/0ad-51.asm256
-rw-r--r--reference/0ad-alpine-valley/0ad-52.asm825
-rw-r--r--reference/0ad-alpine-valley/0ad-53.asm256
-rw-r--r--reference/0ad-alpine-valley/0ad-54.asm262
-rw-r--r--reference/0ad-alpine-valley/0ad-55.asm813
-rw-r--r--reference/0ad-alpine-valley/0ad-56.asm262
-rw-r--r--reference/0ad-alpine-valley/0ad-58.asm431
-rw-r--r--reference/0ad-alpine-valley/0ad-61.asm296
-rw-r--r--reference/0ad-alpine-valley/0ad-63.asm248
-rw-r--r--reference/0ad-alpine-valley/0ad-64.asm490
-rw-r--r--reference/0ad-alpine-valley/0ad-65.asm248
-rw-r--r--reference/0ad-alpine-valley/0ad-66.asm124
-rw-r--r--reference/0ad-alpine-valley/0ad-67.asm322
-rw-r--r--reference/0ad-alpine-valley/0ad-68.asm124
-rw-r--r--reference/0ad-alpine-valley/0ad-69.asm124
-rw-r--r--reference/0ad-alpine-valley/0ad-70.asm278
-rw-r--r--reference/0ad-alpine-valley/0ad-71.asm124
-rw-r--r--reference/0ad-alpine-valley/0ad-72.asm124
-rw-r--r--reference/0ad-alpine-valley/0ad-73.asm322
-rw-r--r--reference/0ad-alpine-valley/0ad-74.asm124
-rw-r--r--reference/0ad-alpine-valley/0ad-75.asm222
-rw-r--r--reference/0ad-alpine-valley/0ad-76.asm326
-rw-r--r--reference/0ad-alpine-valley/0ad-77.asm222
-rw-r--r--reference/0ad-alpine-valley/0ad-78.asm222
-rw-r--r--reference/0ad-alpine-valley/0ad-79.asm384
-rw-r--r--reference/0ad-alpine-valley/0ad-80.asm222
-rw-r--r--reference/0ad-alpine-valley/0ad-81.asm222
-rw-r--r--reference/0ad-alpine-valley/0ad-82.asm356
-rw-r--r--reference/0ad-alpine-valley/0ad-83.asm222
-rw-r--r--reference/0ad-alpine-valley/0ad-84.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-85.asm322
-rw-r--r--reference/0ad-alpine-valley/0ad-86.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-87.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-88.asm278
-rw-r--r--reference/0ad-alpine-valley/0ad-89.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-90.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-91.asm322
-rw-r--r--reference/0ad-alpine-valley/0ad-92.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-93.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-94.asm282
-rw-r--r--reference/0ad-alpine-valley/0ad-95.asm154
-rw-r--r--reference/0ad-alpine-valley/0ad-96.asm333
-rw-r--r--reference/0ad-alpine-valley/0ad-97.asm282
-rw-r--r--reference/0ad-alpine-valley/0ad-98.asm333
-rw-r--r--reference/0ad-alpine-valley/0ad-99.asm154
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm61
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm46
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm41
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm63
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm63
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm63
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm63
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm31
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm31
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm43
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm31
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm31
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm23
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm49
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm79
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm122
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm90
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm50
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm43
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm63
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm369
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm259
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm43
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm250
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm43
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm250
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm43
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm43
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm250
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm37
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm285
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm838
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm333
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm493
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm335
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm826
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm339
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm311
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm501
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm353
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm503
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm323
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm307
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm335
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm175
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm335
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm175
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm335
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm207
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm369
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm285
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm339
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm285
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm335
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm207
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm397
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm285
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm285
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm207
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm1041
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm566
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm1437
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm566
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm303
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm394
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm303
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm207
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm1041
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm566
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm1437
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm566
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm303
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm394
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm303
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm207
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm83
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm1649
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm274
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm69
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm122
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm43
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm250
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm63
-rw-r--r--reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm63
-rw-r--r--reference/0ad-frag-1.asm282
-rw-r--r--reference/0ad-frag-2.asm278
-rw-r--r--reference/0ad-frag.asm163
-rw-r--r--reference/2color-after.asm23
-rw-r--r--reference/ChameleonMan-vert.asm1261
-rw-r--r--reference/builtin2.asm15
-rw-r--r--reference/bump/bump-12.asm258
-rw-r--r--reference/bump/bump-13.asm191
-rw-r--r--reference/bump1.asm191
-rw-r--r--reference/bump2.asm258
-rw-r--r--reference/chrome/bad.asm155
-rw-r--r--reference/crazy-frag-conflict.asm47
-rw-r--r--reference/crazy-frag.asm47
-rw-r--r--reference/dd.asm409
-rw-r--r--reference/es2gears-vert.asm112
-rw-r--r--reference/face.asm16
-rw-r--r--reference/ffox-otmc/ffox-otmc-03.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-04.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-05.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-06.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-08.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-13.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-14.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-15.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-16.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-17.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-18.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-19.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-24.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-26.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-27.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-28.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-29.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-30.asm41
-rw-r--r--reference/ffox-otmc/ffox-otmc-31.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-32.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-33.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-34.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-36.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-39.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-41.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-42.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-43.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-44.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-45.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-46.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-48.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-50.asm3
-rw-r--r--reference/ffox-otmc/ffox-otmc-51.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-52.asm3
-rw-r--r--reference/ffox-otmc/ffox-otmc-54.asm2
-rw-r--r--reference/ffox-otmc/ffox-otmc-57.asm1
-rw-r--r--reference/ffox-otmc/ffox-otmc-59.asm1
-rw-r--r--reference/ffox-vert.asm127
-rw-r--r--reference/flow.asm315
-rw-r--r--reference/foo.asm19
-rw-r--r--reference/fragProg1/fragProg1-08.asm1
-rw-r--r--reference/fragProg1/fragProg1-09.asm1
-rw-r--r--reference/fragProg1/fragProg1-10.asm1
-rw-r--r--reference/fragProg1/fragProg1-11.asm1
-rw-r--r--reference/fragProg1/fragProg1-12.asm23
-rw-r--r--reference/fragProg1/fragProg1-13.asm1
-rw-r--r--reference/fragProg1/fragProg1-14.asm1
-rw-r--r--reference/fragProg1/fragProg1-15.asm1
-rw-r--r--reference/fragProg1/fragProg1-16.asm13
-rw-r--r--reference/fragProg1/fragProg1-17.asm8
-rw-r--r--reference/fragProg1/fragProg1-18.asm15
-rw-r--r--reference/fragProg1/fragProg1-19.asm28
-rw-r--r--reference/fragProg1/fragProg1-20.asm2
-rw-r--r--reference/fragProg1/fragProg1-21.asm2
-rw-r--r--reference/fragProg1/fragProg1-22.asm2
-rw-r--r--reference/fragProg1/fragProg1-23.asm2
-rw-r--r--reference/fragProg1/fragProg1-24.asm2
-rw-r--r--reference/fragProg1/fragProg1-25.asm30
-rw-r--r--reference/fragProg1/fragProg1-26.asm30
-rw-r--r--reference/fragProg1/fragProg1-27.asm30
-rw-r--r--reference/fragProg1/fragProg1-28.asm2
-rw-r--r--reference/fragProg1/fragProg1-35.asm12
-rw-r--r--reference/fragProg1/fragProg1-36.asm1
-rw-r--r--reference/fragProg1/fragProg1-37.asm1
-rw-r--r--reference/fragProg1/fragProg1-38.asm1
-rw-r--r--reference/fragProg1/fragProg1-39.asm1
-rw-r--r--reference/fragProg1/fragProg1-40.asm2
-rw-r--r--reference/fragProg1/fragProg1-42.asm1
-rw-r--r--reference/fragProg1/fragProg1-43.asm1
-rw-r--r--reference/fragProg1/fragProg1-46.asm2
-rw-r--r--reference/fragProg1/fragProg1-47.asm1
-rw-r--r--reference/fragProg1/fragProg1-50.asm1
-rw-r--r--reference/fragProg1/fragProg1-54.asm45
-rw-r--r--reference/fragProg1/fragProg1-55.asm1
-rw-r--r--reference/fragProg1/fragProg1-56.asm35
-rw-r--r--reference/fragProg1/fragProg1-57.asm57
-rw-r--r--reference/fragProg1/fragProg1-58.asm35
-rw-r--r--reference/fragProg1/fragProg1-59.asm35
-rw-r--r--reference/fragProg1/fragProg1-60.asm49
-rw-r--r--reference/glmark1.asm22
-rw-r--r--reference/glmark2.asm111
-rw-r--r--reference/glmark3.asm492
-rw-r--r--reference/glsl-fs-raytrace-bug27060.asm2206
-rw-r--r--reference/gmaps-frag.asm100
-rw-r--r--reference/idiv-vert.asm154
-rw-r--r--reference/jellyfish-frag.asm593
-rw-r--r--reference/maniadrive/maniadrive-01.asm73
-rw-r--r--reference/maniadrive/maniadrive-02.asm45
-rw-r--r--reference/maniadrive/maniadrive-03.asm444
-rw-r--r--reference/maniadrive/maniadrive-04.asm31
-rw-r--r--reference/maniadrive/maniadrive-06.asm37
-rw-r--r--reference/maniadrive/maniadrive-07.asm41
-rw-r--r--reference/maniadrive/maniadrive-08.asm85
-rw-r--r--reference/maniadrive/maniadrive-09.asm45
-rw-r--r--reference/maniadrive/maniadrive-10.asm360
-rw-r--r--reference/maniadrive/maniadrive-13.asm43
-rw-r--r--reference/maniadrive/maniadrive-14.asm344
-rw-r--r--reference/maniadrive/maniadrive-15.asm374
-rw-r--r--reference/maniadrive/maniadrive-17.asm346
-rw-r--r--reference/maniadrive/maniadrive-18.asm422
-rw-r--r--reference/maniadrive/maniadrive-19.asm35
-rw-r--r--reference/multi-kill.asm73
-rw-r--r--reference/piglit-arb_framebuffer_srgb-blit-frag1.asm1
-rw-r--r--reference/piglit-arb_framebuffer_srgb-blit-frag2.asm1
-rw-r--r--reference/piglit-fs-uniform-array-mat2-index-rd.asm30
-rw-r--r--reference/piglit-glsl-fs-varying-array.asm3
-rw-r--r--reference/piglit-tex-miplevel-selection-1d-shadow.asm1
-rw-r--r--reference/piglit-vs-temp-mat3-row-rd.asm57
-rw-r--r--reference/piglit-vs-varying-array-mat2-index-rd.asm3
-rw-r--r--reference/piglit-vs-varying-array-mat4-index-rd.asm3
-rw-r--r--reference/problem/0ad-frag.asm68
-rw-r--r--reference/problem/frag-conflict-1.asm17
-rw-r--r--reference/problem/frag-conflict-2.asm31
-rw-r--r--reference/relative-lowered.asm33
-rw-r--r--reference/relative-med.asm17
-rw-r--r--reference/relative-piglit-bad.asm82
-rw-r--r--reference/relative-temp/fs-temp-mat3-col-row-wr.asm3
-rw-r--r--reference/sad-frag.asm1
-rw-r--r--reference/simple-frag.asm39
-rw-r--r--reference/simple-if-else.asm37
-rw-r--r--reference/simple-if.asm25
-rw-r--r--reference/simple-vert.asm8
-rw-r--r--reference/simple.asm25
-rw-r--r--reference/simpletest.asm17
-rw-r--r--reference/stk-mines/stk-mines-00.asm35
-rw-r--r--reference/stk-mines/stk-mines-01.asm81
-rw-r--r--reference/stk-mines/stk-mines-02.asm85
-rw-r--r--reference/stk-mines/stk-mines-03.asm43
-rw-r--r--reference/stk-mines/stk-mines-05.asm73
-rw-r--r--reference/stk-mines/stk-mines-06.asm73
-rw-r--r--reference/stk-mines/stk-mines-07.asm85
-rw-r--r--reference/stk-mines/stk-mines-08.asm235
-rw-r--r--reference/stk-mines/stk-mines-09.asm235
-rw-r--r--reference/stk-mines/stk-mines-10.asm57
-rw-r--r--reference/stk-mines/stk-mines-11.asm248
-rw-r--r--reference/stk-mines/stk-mines-12.asm37
-rw-r--r--reference/stk-mines/stk-mines-13.asm236
-rw-r--r--reference/stk-mines/stk-mines-14.asm276
-rw-r--r--reference/stk-mines/stk-mines-15.asm248
-rw-r--r--reference/stk-mines/stk-mines-16.asm79
-rw-r--r--reference/stk-mines/stk-mines-17.asm254
-rw-r--r--reference/stk-mines/stk-mines-18.asm312
-rw-r--r--reference/stk-mines/stk-mines-19.asm235
-rw-r--r--reference/stk-mines/stk-mines-20.asm248
-rw-r--r--reference/stk-mines/stk-mines-21.asm43
-rw-r--r--reference/stk-mines/stk-mines-22.asm412
-rw-r--r--reference/stk-mines/stk-mines-23.asm307
-rw-r--r--reference/stk-mines/stk-mines-24.asm57
-rw-r--r--reference/stk-mines/stk-mines-25.asm43
-rw-r--r--reference/stk-mines/stk-mines-26.asm239
-rw-r--r--reference/stk-mines/stk-mines-27.asm412
-rw-r--r--reference/stk-mines/stk-mines-28.asm307
-rw-r--r--reference/stk-mines/stk-mines-29.asm412
-rw-r--r--reference/stk-mines/stk-mines-30.asm307
-rw-r--r--reference/stk-mines/stk-mines-31.asm412
-rw-r--r--reference/stk-mines/stk-mines-32.asm307
-rw-r--r--reference/stk-mines/stk-mines-33.asm412
-rw-r--r--reference/stk-mines/stk-mines-34.asm307
-rw-r--r--reference/stk-mines/stk-mines-35.asm235
-rw-r--r--reference/stk-mines/stk-mines-36.asm412
-rw-r--r--reference/stk-mines/stk-mines-37.asm307
-rw-r--r--reference/stk-mines/stk-mines-38.asm43
-rw-r--r--reference/stk/stk0100.asm35
-rw-r--r--reference/stk/stk0101.asm81
-rw-r--r--reference/stk/stk0102.asm85
-rw-r--r--reference/stk/stk0200.asm43
-rw-r--r--reference/stk/stk0301.asm73
-rw-r--r--reference/stk/stk0302.asm73
-rw-r--r--reference/stk/stk0303.asm85
-rw-r--r--reference/stk/stk0304.asm235
-rw-r--r--reference/stk/stk0305.asm235
-rw-r--r--reference/stk/stk0306.asm255
-rw-r--r--reference/stk/stk0307.asm312
-rw-r--r--reference/stk/stk0400.asm43
-rw-r--r--reference/stk/stk0500.asm315
-rw-r--r--reference/stk/stk0501.asm303
-rw-r--r--reference/stk/stk0600.asm43
-rw-r--r--reference/stk/stk0601.asm239
-rw-r--r--reference/stk/stk0700.asm43
-rw-r--r--reference/test.asm28
-rw-r--r--reference/test0.asm43
-rw-r--r--reference/test1.asm235
-rw-r--r--reference/test2.asm120
-rw-r--r--reference/test3.asm36
-rw-r--r--reference/testN.asm91
-rw-r--r--reference/tex-clamp0.asm43
-rw-r--r--reference/tex-clamp1.asm37
-rw-r--r--reference/twoside-frag.asm90
-rw-r--r--reference/twoside-vert.asm109
-rw-r--r--reference/vs-op-neg-int.asm53
-rw-r--r--reference/webgl-blob-frag.asm31
-rw-r--r--reference/webgl-water/webgl-water-13.asm73
-rw-r--r--reference/webgl-water/webgl-water-14.asm35
-rw-r--r--reference/webgl-water/webgl-water-18.asm31
-rw-r--r--reference/webgl-water/webgl-water-20.asm27
-rw-r--r--reference/webgl-water/webgl-water-27.asm73
-rw-r--r--reference/webgl-water/webgl-water-33.asm73
-rw-r--r--reference/webgl-water/webgl-water-34.asm13
-rw-r--r--reference/webgl-water/webgl-water-36.asm70
-rw-r--r--reference/webgl-water/webgl-water-37.asm21
-rw-r--r--reference/webgl-water/webgl-water-38.asm484
-rw-r--r--reference/webgl-water/webgl-water-39.asm63
-rw-r--r--reference/webgl-water/webgl-water-40.asm3525
-rw-r--r--reference/xa-composite-fs.asm41
-rw-r--r--reference/xon1.asm319
-rw-r--r--reference/xon2.asm150
-rw-r--r--reference/xon3.asm343
-rw-r--r--reference/xon4.asm150
-rw-r--r--reference/xon5.asm201
-rw-r--r--reference/xon6.asm117
-rw-r--r--reference/xon7.asm327
-rw-r--r--reference/xon8.asm150
-rw-r--r--reference/xon9.asm124
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-01.asm73
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-02.asm25
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-04.asm25
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-06.asm25
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-10.asm1
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-12.asm319
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-13.asm150
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-14.asm343
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-15.asm150
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-16.asm201
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-17.asm117
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-18.asm327
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-19.asm150
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-20.asm87
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-21.asm55
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-22.asm420
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-23.asm204
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-24.asm170
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-25.asm146
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-26.asm382
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-27.asm204
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-28.asm246
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-29.asm180
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-30.asm47
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-31.asm55
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-32.asm29
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-33.asm55
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-34.asm31
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-35.asm81
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-36.asm49
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-37.asm81
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-38.asm222
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-39.asm180
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-40.asm282
-rw-r--r--reference/xonotic-gl2/xonotic-glx-gl2-41.asm180
-rw-r--r--reference/xonotic/xonotic05.asm73
-rw-r--r--reference/xonotic/xonotic06.asm31
-rw-r--r--reference/xonotic/xonotic08.asm73
-rw-r--r--reference/xonotic/xonotic09.asm35
-rw-r--r--reference/xonotic/xonotic10.asm85
-rw-r--r--reference/xonotic/xonotic11.asm73
-rw-r--r--reference/xonotic/xonotic14.asm81
-rw-r--r--reference/xonotic/xonotic16.asm45
-rw-r--r--reference/xonotic/xonotic17.asm89
-rw-r--r--reference/xonotic/xonotic18.asm85
-rw-r--r--reference/xonotic/xonotic20.asm47
-rw-r--r--reference/xonotic/xonotic21.asm83
-rw-r--r--reference/xonotic/xonotic22.asm97
434 files changed, 30858 insertions, 45588 deletions
diff --git a/reference/0ad-alpine-valley/0ad-100.asm b/reference/0ad-alpine-valley/0ad-100.asm
index 12fbb01..be30c1a 100644
--- a/reference/0ad-alpine-valley/0ad-100.asm
+++ b/reference/0ad-alpine-valley/0ad-100.asm
@@ -8,203 +8,139 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.y, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.w, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r4.x, c9.x, r0.z
-mov.f32f32 r3.z, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r4.x
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r4.x, r3.y, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.w, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.y, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r3.y
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.y, r3.z
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r3.y
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.y, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.y, r3.y
-bary.f r3.y, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.y, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.y, r2.w
-mov.f32f32 r5.z, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r6.y, r3.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r7.w, r5.x, s#2, t#2
-(sy)mov.f32f32 r1.w, r7.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r7.w, r6.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r7.w
-(ss)nop
-sam.s (f32)(x)r5.x, r5.w, s#2, t#2
-(sy)mov.f32f32 r2.w, r5.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.y, r3.w
-add.f r3.z, c12.y, (neg)r0.y
-add.f r3.w, c12.y, (neg)r0.y
-add.f r5.x, c12.y, (neg)r0.y
-mul.f r5.y, r2.z, r3.y
-mul.f r3.z, r3.z, c6.z
-mul.f r3.w, r3.w, c6.y
-mul.f r5.x, r5.x, c6.x
-mul.f r1.w, r5.y, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.w, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.y, r1.z, r3.y
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.y, r2.w, r1.w
-sam.s (f32)(x)r5.y, r7.y, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r5.y
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r5.y, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r0.z, r4.w
-mov.f32f32 r5.z, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.z, r1.x
-mul.f r0.w, r4.y, r0.w
-mov.f32f32 r2.w, r0.z
-mul.f r0.x, c10.w, r0.x
-mul.f r0.z, r4.x, r2.z
-sam (f32)(w)r1.y, r5.y, s#1, t#1
-nop
-(sy)cmps.f.lt r1.y, r2.x, c11.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r1.w, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r0.z, r0.x
+add.f r0.y, r0.z, r1.z
cov.u32f32 r0.z, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.z, r1.x
-mad.f32 r0.w, c5.y, r4.y, r0.w
-mov.f32f32 r0.x, r0.x
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
cmps.f.ne r0.z, r0.z, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r4.x, r0.x
-mov.f32f32 r1.y, r1.z
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.z, r1.w, r0.z, r1.y
-add.f r1.x, r1.x, r3.z
-add.f r0.w, r0.w, r3.w
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r0.z
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
mul.f r0.w, r0.w, r0.z
-add.f r0.x, r0.x, r5.x
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, c4.x
end
nop
nop
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 196 instructions, 0 half, 8 full
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-101.asm b/reference/0ad-alpine-valley/0ad-101.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-101.asm
+++ b/reference/0ad-alpine-valley/0ad-101.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-102.asm b/reference/0ad-alpine-valley/0ad-102.asm
index 36d09e4..63ee7bc 100644
--- a/reference/0ad-alpine-valley/0ad-102.asm
+++ b/reference/0ad-alpine-valley/0ad-102.asm
@@ -6,8 +6,8 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,223 +24,164 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)floor.f r1.z, c14.z
floor.f r1.w, c14.x
absneg.f r2.x, (abs)c17.x
absneg.f r2.y, (abs)c17.y
add.f r1.z, c14.z, (neg)r1.z
add.f r1.w, c14.x, (neg)r1.w
-mul.f r2.z, c11.x, r0.w
-add.f r2.x, r2.x, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.y, c12.x, r1.x, r2.z
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r2.z, c18.y
+mul.f r2.w, c11.x, r0.w
max.f r1.z, r1.z, c18.y
max.f r1.w, r1.w, c18.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, c16.x, r2.x
+add.f r2.x, r2.x, r2.y
+add.f r2.y, r2.z, c19.x
min.f r1.z, r1.z, c22.y
min.f r1.w, r1.w, c22.y
-mul.f r2.w, c11.z, r0.x
-mad.f32 r2.y, c13.x, r1.y, r2.y
+mul.f r2.z, c16.x, r2.x
+mul.f r3.x, c11.z, r0.x
max.f r1.z, r1.z, c18.x
max.f r1.w, r1.w, c18.x
-mad.f32 r2.w, c12.z, r0.y, r2.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.w, c13.z, r0.z, r2.w
-mul.f r3.x, c11.x, r0.x
+mul.f r3.y, c11.x, r0.x
+mad.f32 r3.x, c12.z, r0.y, r3.x
mul.f r1.z, c16.x, r1.z
-mad.f32 r3.x, c12.x, r0.y, r3.x
-add.f r2.w, r2.w, c14.z
-mad.f32 r3.x, c13.x, r0.z, r3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c18.w, r2.z, r2.w
-mul.f r2.y, r2.y, (neg)c4.x
-mul.f r3.y, c11.y, r0.w
+mad.f32 r3.y, c12.x, r0.y, r3.y
+mad.f32 r3.x, c13.z, r0.z, r3.x
+mad.f32 r3.y, c13.x, r0.z, r3.y
mad.f32 r1.z, c18.z, r1.z, c14.x
-add.f r3.x, r3.x, c14.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r1.w, c16.x, r1.w, r3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.y, c12.y, r1.x, r3.y
-mov.f32f32 r2.z, r2.z
+floor.f r4.x, r2.y
+mad.f32 r2.w, c12.x, r1.x, r2.w
+add.f r3.y, r3.y, c14.x
+add.f r1.z, r1.z, c19.x
+mad.f32 r1.w, c16.x, r1.w, r3.y
+add.f r3.x, r3.x, c14.z
+add.f r2.y, r2.y, (neg)r4.x
+floor.f r4.x, r1.z
add.f r1.w, r1.w, c19.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
+mad.f32 r2.z, c18.w, r2.z, r3.x
+mad.f32 r2.y, c19.y, r2.y, c19.z
+add.f r1.z, r1.z, (neg)r4.x
+floor.f r4.x, r1.w
add.f r2.z, r2.z, c19.x
-mad.f32 r3.y, c13.y, r1.y, r3.y
-add.f r1.z, r1.z, c19.x
-floor.f r3.z, r1.w
-floor.f r3.w, r2.z
-mov.f32f32 r3.y, r3.y
-floor.f r4.z, r1.z
-add.f r1.w, r1.w, (neg)r3.z
-add.f r2.z, r2.z, (neg)r3.w
-mad.f32 r2.y, (neg)c4.y, r3.y, r2.y
-add.f r1.z, r1.z, (neg)r4.z
-mad.f32 r1.w, c19.y, r1.w, c19.z
-mad.f32 r2.z, c19.y, r2.z, c19.z
-mov.f32f32 r2.y, r2.y
+absneg.f r2.y, (abs)r2.y
mad.f32 r1.z, c19.y, r1.z, c19.z
-absneg.f r1.w, (abs)r1.w
-absneg.f r2.z, (abs)r2.z
-mul.f r0.w, c11.z, r0.w
+add.f r4.x, r1.w, (neg)r4.x
+floor.f r4.y, r2.z
+mul.f r1.w, r2.y, r2.y
absneg.f r1.z, (abs)r1.z
-mul.f r3.y, c19.y, r1.w
-mul.f r3.z, c19.y, r2.z
-mul.f r1.w, r1.w, r1.w
-mul.f r3.w, c19.y, r1.z
-add.f r3.y, c19.w, (neg)r3.y
-add.f r3.z, c19.w, (neg)r3.z
-mul.f r2.z, r2.z, r2.z
-add.f r3.w, c19.w, (neg)r3.w
+mad.f32 r2.y, c19.y, r4.x, c19.z
+add.f r2.z, r2.z, (neg)r4.y
+mad.f32 r2.w, c13.x, r1.y, r2.w
+mul.f r4.x, c19.y, r1.z
+absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c19.y, r2.z, c19.z
mul.f r1.z, r1.z, r1.z
-mul.f r1.w, r1.w, r3.y
-mul.f r2.z, r2.z, r3.z
+add.f r4.x, c19.w, (neg)r4.x
+mul.f r4.y, c19.y, r2.y
+absneg.f r2.z, (abs)r2.z
+mul.f r2.y, r2.y, r2.y
+mul.f r1.z, r1.z, r4.x
+mul.f r4.x, r0.y, c21.x
+add.f r4.y, c19.w, (neg)r4.y
+mul.f r4.z, c19.y, r2.z
+mul.f r2.z, r2.z, r2.z
+max.f r4.x, r4.x, c18.y
+mul.f r2.y, r2.y, r4.y
+mul.f r4.y, r0.x, r0.z
+add.f r4.z, c19.w, (neg)r4.z
+min.f r4.x, r4.x, c22.y
+mul.f r4.w, r0.y, c20.x
+mul.f r2.w, r2.w, (neg)c4.x
+mul.f r5.x, c11.y, r0.w
+min.f r4.x, r4.x, c18.w
+mul.f r4.y, r4.y, r4.w
+mul.f r2.z, r2.z, r4.z
+mad.f32 r4.z, c12.y, r1.x, r5.x
+mul.f r1.z, r1.z, r4.x
+max.f r4.x, r4.y, c18.y
+mad.f32 r4.y, c13.y, r1.y, r4.z
+mov.f32f32 r2.x, r2.x
+mov.f32f32 r4.z, r1.z
+min.f r4.x, r4.x, c22.y
+mad.f32 r2.w, (neg)c4.y, r4.y, r2.w
+mul.f r0.w, c11.z, r0.w
+max.f r2.x, r2.x, c20.z
+min.f r4.x, r4.x, c20.y
mad.f32 r0.w, c12.z, r1.x, r0.w
-mul.f r1.x, r1.z, r3.w
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.z
-mul.f r2.z, r0.x, r0.z
-mov.f32f32 r1.x, r1.x
-mul.f r3.y, r0.y, c21.x
-mul.f r3.z, r0.y, c20.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.w, c18.y
-mov.f32f32 r3.y, r3.y
-mul.f r2.z, r2.z, r3.z
-mad.f32 r0.w, c13.z, r1.y, r0.w
-add.f r1.y, r3.w, c19.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-floor.f r3.z, r1.y
-max.f r3.y, r3.y, c18.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.w, (neg)c4.z, r0.w, r2.y
-add.f r1.y, r1.y, (neg)r3.z
-min.f r2.y, r3.y, c22.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.y, c19.y, r1.y, c19.z
-min.f r2.y, r2.y, c18.w
-max.f r2.z, r2.z, c18.y
-max.f r0.w, c18.y, r0.w
-absneg.f r1.y, (abs)r1.y
-mov.f32f32 r2.y, r2.y
-min.f r2.z, r2.z, c22.y
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, r1.y, r1.y
-mul.f r1.x, r1.x, r2.y
-min.f r1.y, r2.z, c20.y
-mul.f r2.y, r0.w, c5.z
-mul.f r2.z, r0.w, c5.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mul.f r0.w, r0.w, c5.x
-mul.f r3.z, r1.z, r1.y
-mul.f r3.w, r1.w, r1.y
-max.f r1.w, r2.x, c20.z
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r1.y, r2.z
-mad.f32 r2.y, c17.x, r1.x, r2.x
-mad.f32 r1.x, c17.y, r1.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r1.x
-min.f r2.z, r1.w, c20.w
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r1.w, r3.y
-mul.f r0.w, c11.y, r0.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.w, c12.y, r0.y, r0.w
+mul.f r1.x, c11.y, r0.x
mul.f r0.x, c11.w, r0.x
-mov.f32f32 r3.z, r4.y
-mad.f32 r3.x, r3.w, r2.z, r3.x
-mad.f32 r2.w, r3.w, r2.z, r2.w
-mad.f32 r0.w, c13.y, r0.z, r0.w
+mov.f32f32 r4.y, r4.x
+mul.f r2.z, r2.z, r4.x
+min.f r2.x, r2.x, c20.w
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, r2.y, r4.y
+mad.f32 r1.x, c12.y, r0.y, r1.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-add.f r0.y, r3.x, r2.x
-add.f r2.x, r2.w, r2.y
-add.f r0.w, r0.w, c14.y
-nop
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r3.y, r2.x
-mad.f32 r0.y, r3.w, r2.z, r0.w
+mad.f32 r0.y, c13.y, r0.z, r1.x
+mov.f32f32 r1.x, r1.y
+mad.f32 r1.y, c17.y, r1.z, r1.y
+mad.f32 r1.x, c17.x, r4.z, r1.x
+mov.f32f32 r1.z, r2.x
+mad.f32 r2.x, r2.z, r2.x, r3.x
+add.f r0.y, r0.y, c14.y
+mad.f32 r0.w, (neg)c4.z, r0.w, r2.w
+mad.f32 r2.y, r2.z, r1.z, r3.y
+add.f r1.y, r2.x, r1.y
+mad.f32 r0.y, r2.z, r1.z, r0.y
+max.f r4.x, c18.y, r0.w
+add.f r0.w, r2.y, r1.x
+mov.f32f32 r1.x, r1.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mul.f r0.z, c7.w, r3.x
-mul.f r0.w, c7.z, r3.x
-mad.f32 r0.z, c8.w, r0.y, r0.z
-mad.f32 r0.w, c8.z, r0.y, r0.w
-mul.f r2.x, c7.y, r3.x
-mul.f r2.y, c7.x, r3.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c9.w, r3.y, r0.z
-add.f r0.x, r0.x, c14.w
-mad.f32 r0.w, c9.z, r3.y, r0.w
-mad.f32 r2.x, c8.y, r0.y, r2.x
-mad.f32 r2.y, c8.x, r0.y, r2.y
-mad.f32 r0.z, c10.w, r0.x, r0.z
-mad.f32 r0.w, c10.z, r0.x, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c9.y, r3.y, r2.x
-mad.f32 r2.y, c9.x, r3.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c10.y, r0.x, r2.x
-mad.f32 r2.y, c10.x, r0.x, r2.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r2.z, r0.w
-mov.f32f32 r0.z, r2.x
-mov.f32f32 r0.w, r2.y
-mul.f r3.w, c0.w, r3.x
-mul.f r4.y, c0.z, r3.x
-mul.f r0.z, r0.z, c15.y
-mul.f r0.w, r0.w, c15.x
-(rpt1)nop
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.w
-mad.f32 r0.z, c1.w, r0.y, r3.w
-mad.f32 r0.w, c1.z, r0.y, r4.y
-mad.f32 r0.z, c2.w, r3.y, r0.z
-mad.f32 r0.w, c2.z, r3.y, r0.w
-mad.f32 r0.z, c3.w, r0.x, r0.z
-mad.f32 r3.w, c3.z, r0.x, r0.w
-mul.f r4.y, c0.y, r3.x
-mul.f r4.z, c0.x, r3.x
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.w
-mad.f32 r3.w, c1.y, r0.y, r4.y
-mad.f32 r0.y, c1.x, r0.y, r4.z
-mad.f32 r3.w, c2.y, r3.y, r3.w
-mad.f32 r0.y, c2.x, r3.y, r0.y
-mad.f32 r3.w, c3.y, r0.x, r3.w
-mad.f32 r0.x, c3.x, r0.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r3.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r3.y, c6.x, r3.y, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-(rpt1)nop
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r3.z, r4.x
-(rpt2)nop
-mov.f32f32 r3.z, r3.z
+nop
+mov.f32f32 r1.z, r0.w
+mul.f r3.x, c0.x, r0.w
+mad.f32 r3.y, c6.x, r1.x, c6.y
+mov.f32f32 r4.y, r4.x
+mul.f r0.z, c7.y, r1.z
+mul.f r0.w, c7.x, r1.z
+mad.f32 r0.z, c8.y, r0.y, r0.z
+mad.f32 r0.w, c8.x, r0.y, r0.w
+mad.f32 r0.z, c9.y, r1.x, r0.z
+add.f r4.z, r0.x, c14.w
+mad.f32 r0.x, c9.x, r1.x, r0.w
+mul.f r0.w, c7.w, r1.z
+mul.f r2.x, c7.z, r1.z
+mad.f32 r0.z, c10.y, r4.z, r0.z
+mad.f32 r0.x, c10.x, r4.z, r0.x
+mad.f32 r0.w, c8.w, r0.y, r0.w
+mad.f32 r2.z, c8.z, r0.y, r2.x
+mul.f r2.y, r0.z, c15.y
+mul.f r2.x, r0.x, c15.x
+mad.f32 r0.x, c9.w, r1.x, r0.w
+mad.f32 r0.z, c9.z, r1.x, r2.z
+mad.f32 r2.w, c10.w, r4.z, r0.x
+mad.f32 r2.z, c10.z, r4.z, r0.z
+mul.f r0.x, c0.w, r1.z
+mul.f r0.z, c0.z, r1.z
+mad.f32 r0.x, c1.w, r0.y, r0.x
+mad.f32 r0.z, c1.z, r0.y, r0.z
+mad.f32 r0.x, c2.w, r1.x, r0.x
+mad.f32 r0.z, c2.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r4.z, r0.x
+mad.f32 r0.z, c3.z, r4.z, r0.z
+mul.f r0.x, c0.y, r1.z
+mad.f32 r3.x, c1.x, r0.y, r3.x
+mad.f32 r0.x, c1.y, r0.y, r0.x
+mad.f32 r0.y, c2.x, r1.y, r3.x
+mad.f32 r1.x, c2.y, r1.x, r0.x
+mad.f32 r0.x, c3.x, r4.z, r0.y
+mad.f32 r0.y, c3.y, r4.z, r1.x
+mad.f32 r3.x, c6.x, r1.z, c6.y
+mul.f r1.z, r4.y, c5.z
+mul.f r1.y, r4.y, c5.y
+mul.f r1.x, r4.x, c5.x
end
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 221 instructions, 0 half, 5 full
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 152 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-103.asm b/reference/0ad-alpine-valley/0ad-103.asm
index 12fbb01..be30c1a 100644
--- a/reference/0ad-alpine-valley/0ad-103.asm
+++ b/reference/0ad-alpine-valley/0ad-103.asm
@@ -8,203 +8,139 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.y, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.w, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r4.x, c9.x, r0.z
-mov.f32f32 r3.z, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r4.x
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r4.x, r3.y, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.w, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.y, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r3.y
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.y, r3.z
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r3.y
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.y, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.y, r3.y
-bary.f r3.y, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.y, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.y, r2.w
-mov.f32f32 r5.z, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r6.y, r3.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r7.w, r5.x, s#2, t#2
-(sy)mov.f32f32 r1.w, r7.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r7.w, r6.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r7.w
-(ss)nop
-sam.s (f32)(x)r5.x, r5.w, s#2, t#2
-(sy)mov.f32f32 r2.w, r5.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.y, r3.w
-add.f r3.z, c12.y, (neg)r0.y
-add.f r3.w, c12.y, (neg)r0.y
-add.f r5.x, c12.y, (neg)r0.y
-mul.f r5.y, r2.z, r3.y
-mul.f r3.z, r3.z, c6.z
-mul.f r3.w, r3.w, c6.y
-mul.f r5.x, r5.x, c6.x
-mul.f r1.w, r5.y, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.w, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.y, r1.z, r3.y
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.y, r2.w, r1.w
-sam.s (f32)(x)r5.y, r7.y, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r5.y
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r5.y, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r0.z, r4.w
-mov.f32f32 r5.z, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.z, r1.x
-mul.f r0.w, r4.y, r0.w
-mov.f32f32 r2.w, r0.z
-mul.f r0.x, c10.w, r0.x
-mul.f r0.z, r4.x, r2.z
-sam (f32)(w)r1.y, r5.y, s#1, t#1
-nop
-(sy)cmps.f.lt r1.y, r2.x, c11.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r1.w, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r0.z, r0.x
+add.f r0.y, r0.z, r1.z
cov.u32f32 r0.z, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.z, r1.x
-mad.f32 r0.w, c5.y, r4.y, r0.w
-mov.f32f32 r0.x, r0.x
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
cmps.f.ne r0.z, r0.z, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r4.x, r0.x
-mov.f32f32 r1.y, r1.z
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.z, r1.w, r0.z, r1.y
-add.f r1.x, r1.x, r3.z
-add.f r0.w, r0.w, r3.w
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r0.z
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
mul.f r0.w, r0.w, r0.z
-add.f r0.x, r0.x, r5.x
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, c4.x
end
nop
nop
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 196 instructions, 0 half, 8 full
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-104.asm b/reference/0ad-alpine-valley/0ad-104.asm
index 36d09e4..63ee7bc 100644
--- a/reference/0ad-alpine-valley/0ad-104.asm
+++ b/reference/0ad-alpine-valley/0ad-104.asm
@@ -6,8 +6,8 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,223 +24,164 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)floor.f r1.z, c14.z
floor.f r1.w, c14.x
absneg.f r2.x, (abs)c17.x
absneg.f r2.y, (abs)c17.y
add.f r1.z, c14.z, (neg)r1.z
add.f r1.w, c14.x, (neg)r1.w
-mul.f r2.z, c11.x, r0.w
-add.f r2.x, r2.x, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.y, c12.x, r1.x, r2.z
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r2.z, c18.y
+mul.f r2.w, c11.x, r0.w
max.f r1.z, r1.z, c18.y
max.f r1.w, r1.w, c18.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, c16.x, r2.x
+add.f r2.x, r2.x, r2.y
+add.f r2.y, r2.z, c19.x
min.f r1.z, r1.z, c22.y
min.f r1.w, r1.w, c22.y
-mul.f r2.w, c11.z, r0.x
-mad.f32 r2.y, c13.x, r1.y, r2.y
+mul.f r2.z, c16.x, r2.x
+mul.f r3.x, c11.z, r0.x
max.f r1.z, r1.z, c18.x
max.f r1.w, r1.w, c18.x
-mad.f32 r2.w, c12.z, r0.y, r2.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.w, c13.z, r0.z, r2.w
-mul.f r3.x, c11.x, r0.x
+mul.f r3.y, c11.x, r0.x
+mad.f32 r3.x, c12.z, r0.y, r3.x
mul.f r1.z, c16.x, r1.z
-mad.f32 r3.x, c12.x, r0.y, r3.x
-add.f r2.w, r2.w, c14.z
-mad.f32 r3.x, c13.x, r0.z, r3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c18.w, r2.z, r2.w
-mul.f r2.y, r2.y, (neg)c4.x
-mul.f r3.y, c11.y, r0.w
+mad.f32 r3.y, c12.x, r0.y, r3.y
+mad.f32 r3.x, c13.z, r0.z, r3.x
+mad.f32 r3.y, c13.x, r0.z, r3.y
mad.f32 r1.z, c18.z, r1.z, c14.x
-add.f r3.x, r3.x, c14.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r1.w, c16.x, r1.w, r3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.y, c12.y, r1.x, r3.y
-mov.f32f32 r2.z, r2.z
+floor.f r4.x, r2.y
+mad.f32 r2.w, c12.x, r1.x, r2.w
+add.f r3.y, r3.y, c14.x
+add.f r1.z, r1.z, c19.x
+mad.f32 r1.w, c16.x, r1.w, r3.y
+add.f r3.x, r3.x, c14.z
+add.f r2.y, r2.y, (neg)r4.x
+floor.f r4.x, r1.z
add.f r1.w, r1.w, c19.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
+mad.f32 r2.z, c18.w, r2.z, r3.x
+mad.f32 r2.y, c19.y, r2.y, c19.z
+add.f r1.z, r1.z, (neg)r4.x
+floor.f r4.x, r1.w
add.f r2.z, r2.z, c19.x
-mad.f32 r3.y, c13.y, r1.y, r3.y
-add.f r1.z, r1.z, c19.x
-floor.f r3.z, r1.w
-floor.f r3.w, r2.z
-mov.f32f32 r3.y, r3.y
-floor.f r4.z, r1.z
-add.f r1.w, r1.w, (neg)r3.z
-add.f r2.z, r2.z, (neg)r3.w
-mad.f32 r2.y, (neg)c4.y, r3.y, r2.y
-add.f r1.z, r1.z, (neg)r4.z
-mad.f32 r1.w, c19.y, r1.w, c19.z
-mad.f32 r2.z, c19.y, r2.z, c19.z
-mov.f32f32 r2.y, r2.y
+absneg.f r2.y, (abs)r2.y
mad.f32 r1.z, c19.y, r1.z, c19.z
-absneg.f r1.w, (abs)r1.w
-absneg.f r2.z, (abs)r2.z
-mul.f r0.w, c11.z, r0.w
+add.f r4.x, r1.w, (neg)r4.x
+floor.f r4.y, r2.z
+mul.f r1.w, r2.y, r2.y
absneg.f r1.z, (abs)r1.z
-mul.f r3.y, c19.y, r1.w
-mul.f r3.z, c19.y, r2.z
-mul.f r1.w, r1.w, r1.w
-mul.f r3.w, c19.y, r1.z
-add.f r3.y, c19.w, (neg)r3.y
-add.f r3.z, c19.w, (neg)r3.z
-mul.f r2.z, r2.z, r2.z
-add.f r3.w, c19.w, (neg)r3.w
+mad.f32 r2.y, c19.y, r4.x, c19.z
+add.f r2.z, r2.z, (neg)r4.y
+mad.f32 r2.w, c13.x, r1.y, r2.w
+mul.f r4.x, c19.y, r1.z
+absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c19.y, r2.z, c19.z
mul.f r1.z, r1.z, r1.z
-mul.f r1.w, r1.w, r3.y
-mul.f r2.z, r2.z, r3.z
+add.f r4.x, c19.w, (neg)r4.x
+mul.f r4.y, c19.y, r2.y
+absneg.f r2.z, (abs)r2.z
+mul.f r2.y, r2.y, r2.y
+mul.f r1.z, r1.z, r4.x
+mul.f r4.x, r0.y, c21.x
+add.f r4.y, c19.w, (neg)r4.y
+mul.f r4.z, c19.y, r2.z
+mul.f r2.z, r2.z, r2.z
+max.f r4.x, r4.x, c18.y
+mul.f r2.y, r2.y, r4.y
+mul.f r4.y, r0.x, r0.z
+add.f r4.z, c19.w, (neg)r4.z
+min.f r4.x, r4.x, c22.y
+mul.f r4.w, r0.y, c20.x
+mul.f r2.w, r2.w, (neg)c4.x
+mul.f r5.x, c11.y, r0.w
+min.f r4.x, r4.x, c18.w
+mul.f r4.y, r4.y, r4.w
+mul.f r2.z, r2.z, r4.z
+mad.f32 r4.z, c12.y, r1.x, r5.x
+mul.f r1.z, r1.z, r4.x
+max.f r4.x, r4.y, c18.y
+mad.f32 r4.y, c13.y, r1.y, r4.z
+mov.f32f32 r2.x, r2.x
+mov.f32f32 r4.z, r1.z
+min.f r4.x, r4.x, c22.y
+mad.f32 r2.w, (neg)c4.y, r4.y, r2.w
+mul.f r0.w, c11.z, r0.w
+max.f r2.x, r2.x, c20.z
+min.f r4.x, r4.x, c20.y
mad.f32 r0.w, c12.z, r1.x, r0.w
-mul.f r1.x, r1.z, r3.w
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.z
-mul.f r2.z, r0.x, r0.z
-mov.f32f32 r1.x, r1.x
-mul.f r3.y, r0.y, c21.x
-mul.f r3.z, r0.y, c20.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.w, c18.y
-mov.f32f32 r3.y, r3.y
-mul.f r2.z, r2.z, r3.z
-mad.f32 r0.w, c13.z, r1.y, r0.w
-add.f r1.y, r3.w, c19.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-floor.f r3.z, r1.y
-max.f r3.y, r3.y, c18.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.w, (neg)c4.z, r0.w, r2.y
-add.f r1.y, r1.y, (neg)r3.z
-min.f r2.y, r3.y, c22.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.y, c19.y, r1.y, c19.z
-min.f r2.y, r2.y, c18.w
-max.f r2.z, r2.z, c18.y
-max.f r0.w, c18.y, r0.w
-absneg.f r1.y, (abs)r1.y
-mov.f32f32 r2.y, r2.y
-min.f r2.z, r2.z, c22.y
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, r1.y, r1.y
-mul.f r1.x, r1.x, r2.y
-min.f r1.y, r2.z, c20.y
-mul.f r2.y, r0.w, c5.z
-mul.f r2.z, r0.w, c5.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mul.f r0.w, r0.w, c5.x
-mul.f r3.z, r1.z, r1.y
-mul.f r3.w, r1.w, r1.y
-max.f r1.w, r2.x, c20.z
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r1.y, r2.z
-mad.f32 r2.y, c17.x, r1.x, r2.x
-mad.f32 r1.x, c17.y, r1.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r1.x
-min.f r2.z, r1.w, c20.w
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r1.w, r3.y
-mul.f r0.w, c11.y, r0.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.w, c12.y, r0.y, r0.w
+mul.f r1.x, c11.y, r0.x
mul.f r0.x, c11.w, r0.x
-mov.f32f32 r3.z, r4.y
-mad.f32 r3.x, r3.w, r2.z, r3.x
-mad.f32 r2.w, r3.w, r2.z, r2.w
-mad.f32 r0.w, c13.y, r0.z, r0.w
+mov.f32f32 r4.y, r4.x
+mul.f r2.z, r2.z, r4.x
+min.f r2.x, r2.x, c20.w
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, r2.y, r4.y
+mad.f32 r1.x, c12.y, r0.y, r1.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-add.f r0.y, r3.x, r2.x
-add.f r2.x, r2.w, r2.y
-add.f r0.w, r0.w, c14.y
-nop
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r3.y, r2.x
-mad.f32 r0.y, r3.w, r2.z, r0.w
+mad.f32 r0.y, c13.y, r0.z, r1.x
+mov.f32f32 r1.x, r1.y
+mad.f32 r1.y, c17.y, r1.z, r1.y
+mad.f32 r1.x, c17.x, r4.z, r1.x
+mov.f32f32 r1.z, r2.x
+mad.f32 r2.x, r2.z, r2.x, r3.x
+add.f r0.y, r0.y, c14.y
+mad.f32 r0.w, (neg)c4.z, r0.w, r2.w
+mad.f32 r2.y, r2.z, r1.z, r3.y
+add.f r1.y, r2.x, r1.y
+mad.f32 r0.y, r2.z, r1.z, r0.y
+max.f r4.x, c18.y, r0.w
+add.f r0.w, r2.y, r1.x
+mov.f32f32 r1.x, r1.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mul.f r0.z, c7.w, r3.x
-mul.f r0.w, c7.z, r3.x
-mad.f32 r0.z, c8.w, r0.y, r0.z
-mad.f32 r0.w, c8.z, r0.y, r0.w
-mul.f r2.x, c7.y, r3.x
-mul.f r2.y, c7.x, r3.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c9.w, r3.y, r0.z
-add.f r0.x, r0.x, c14.w
-mad.f32 r0.w, c9.z, r3.y, r0.w
-mad.f32 r2.x, c8.y, r0.y, r2.x
-mad.f32 r2.y, c8.x, r0.y, r2.y
-mad.f32 r0.z, c10.w, r0.x, r0.z
-mad.f32 r0.w, c10.z, r0.x, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c9.y, r3.y, r2.x
-mad.f32 r2.y, c9.x, r3.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c10.y, r0.x, r2.x
-mad.f32 r2.y, c10.x, r0.x, r2.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r2.z, r0.w
-mov.f32f32 r0.z, r2.x
-mov.f32f32 r0.w, r2.y
-mul.f r3.w, c0.w, r3.x
-mul.f r4.y, c0.z, r3.x
-mul.f r0.z, r0.z, c15.y
-mul.f r0.w, r0.w, c15.x
-(rpt1)nop
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.w
-mad.f32 r0.z, c1.w, r0.y, r3.w
-mad.f32 r0.w, c1.z, r0.y, r4.y
-mad.f32 r0.z, c2.w, r3.y, r0.z
-mad.f32 r0.w, c2.z, r3.y, r0.w
-mad.f32 r0.z, c3.w, r0.x, r0.z
-mad.f32 r3.w, c3.z, r0.x, r0.w
-mul.f r4.y, c0.y, r3.x
-mul.f r4.z, c0.x, r3.x
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.w
-mad.f32 r3.w, c1.y, r0.y, r4.y
-mad.f32 r0.y, c1.x, r0.y, r4.z
-mad.f32 r3.w, c2.y, r3.y, r3.w
-mad.f32 r0.y, c2.x, r3.y, r0.y
-mad.f32 r3.w, c3.y, r0.x, r3.w
-mad.f32 r0.x, c3.x, r0.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r3.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r3.y, c6.x, r3.y, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-(rpt1)nop
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r3.z, r4.x
-(rpt2)nop
-mov.f32f32 r3.z, r3.z
+nop
+mov.f32f32 r1.z, r0.w
+mul.f r3.x, c0.x, r0.w
+mad.f32 r3.y, c6.x, r1.x, c6.y
+mov.f32f32 r4.y, r4.x
+mul.f r0.z, c7.y, r1.z
+mul.f r0.w, c7.x, r1.z
+mad.f32 r0.z, c8.y, r0.y, r0.z
+mad.f32 r0.w, c8.x, r0.y, r0.w
+mad.f32 r0.z, c9.y, r1.x, r0.z
+add.f r4.z, r0.x, c14.w
+mad.f32 r0.x, c9.x, r1.x, r0.w
+mul.f r0.w, c7.w, r1.z
+mul.f r2.x, c7.z, r1.z
+mad.f32 r0.z, c10.y, r4.z, r0.z
+mad.f32 r0.x, c10.x, r4.z, r0.x
+mad.f32 r0.w, c8.w, r0.y, r0.w
+mad.f32 r2.z, c8.z, r0.y, r2.x
+mul.f r2.y, r0.z, c15.y
+mul.f r2.x, r0.x, c15.x
+mad.f32 r0.x, c9.w, r1.x, r0.w
+mad.f32 r0.z, c9.z, r1.x, r2.z
+mad.f32 r2.w, c10.w, r4.z, r0.x
+mad.f32 r2.z, c10.z, r4.z, r0.z
+mul.f r0.x, c0.w, r1.z
+mul.f r0.z, c0.z, r1.z
+mad.f32 r0.x, c1.w, r0.y, r0.x
+mad.f32 r0.z, c1.z, r0.y, r0.z
+mad.f32 r0.x, c2.w, r1.x, r0.x
+mad.f32 r0.z, c2.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r4.z, r0.x
+mad.f32 r0.z, c3.z, r4.z, r0.z
+mul.f r0.x, c0.y, r1.z
+mad.f32 r3.x, c1.x, r0.y, r3.x
+mad.f32 r0.x, c1.y, r0.y, r0.x
+mad.f32 r0.y, c2.x, r1.y, r3.x
+mad.f32 r1.x, c2.y, r1.x, r0.x
+mad.f32 r0.x, c3.x, r4.z, r0.y
+mad.f32 r0.y, c3.y, r4.z, r1.x
+mad.f32 r3.x, c6.x, r1.z, c6.y
+mul.f r1.z, r4.y, c5.z
+mul.f r1.y, r4.y, c5.y
+mul.f r1.x, r4.x, c5.x
end
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 221 instructions, 0 half, 5 full
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 152 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-105.asm b/reference/0ad-alpine-valley/0ad-105.asm
index 8e79f89..270bdcf 100644
--- a/reference/0ad-alpine-valley/0ad-105.asm
+++ b/reference/0ad-alpine-valley/0ad-105.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r4.w) in3
@in(r1.x) in4
@in(r1.y) in5
@in(r1.z) in6
@in(r1.w) in7
-@in(r4.x) in8
-@in(r4.y) in9
-@in(r4.z) in10
-@in(r4.w) in11
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -28,55 +28,40 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r0.x, c3.x, r2.x
-mul.f r0.y, c2.x, r2.x
-mad.f32 r0.x, c3.y, r2.y, r0.x
-mad.f32 r0.y, c2.y, r2.y, r0.y
-mul.f r0.z, c1.x, r2.x
-mul.f r0.w, c0.x, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.z, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r3.x, c1.y, r2.y, r0.z
-mad.f32 r2.y, c0.y, r2.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c2.w, r2.w, r0.y
-mov.f32f32 r0.x, r3.x
-mov.f32f32 r0.y, r2.y
-mad.f32 r0.x, c1.z, r2.z, r0.x
-mad.f32 r0.y, c0.z, r2.z, r0.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r0.y, c1.w, r2.w, r0.x
-mad.f32 r0.x, c0.w, r2.w, r2.y
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.x, c3.x, r4.x
+mul.f r0.y, c2.x, r4.x
+mad.f32 r0.x, c3.y, r4.y, r0.x
+mad.f32 r0.y, c2.y, r4.y, r0.y
+mad.f32 r0.x, c3.z, r4.z, r0.x
+mad.f32 r0.y, c2.z, r4.z, r0.y
+mad.f32 r0.w, c3.w, r4.w, r0.x
+mad.f32 r0.z, c2.w, r4.w, r0.y
+mul.f r0.x, c1.x, r4.x
+mul.f r0.y, c0.x, r4.x
+mad.f32 r0.x, c1.y, r4.y, r0.x
+mad.f32 r0.y, c0.y, r4.y, r0.y
+mad.f32 r0.x, c1.z, r4.z, r0.x
+mad.f32 r3.x, c0.z, r4.z, r0.y
+mad.f32 r0.y, c1.w, r4.w, r0.x
+mad.f32 r0.x, c0.w, r4.w, r3.x
max.f r1.w, r1.w, c5.x
max.f r1.z, r1.z, c5.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-min.f r1.w, r1.w, c5.y
-min.f r1.z, r1.z, c5.y
max.f r1.y, r1.y, c5.x
max.f r1.x, r1.x, c5.x
-(rpt1)nop
+min.f r1.w, r1.w, c5.y
+min.f r1.z, r1.z, c5.y
min.f r1.y, r1.y, c5.y
min.f r1.x, r1.x, c5.y
-mad.f32 r3.w, c4.x, r2.z, c4.y
-mad.f32 r3.z, c4.x, r2.z, c4.y
-mad.f32 r3.y, c4.x, r2.z, c4.y
-mad.f32 r3.x, c4.x, r2.x, c4.y
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r2.z, r4.z
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r2.x, r4.x
+mad.f32 r3.w, c4.x, r4.z, c4.y
+mad.f32 r3.z, c4.x, r4.z, c4.y
+mad.f32 r3.y, c4.x, r4.z, c4.y
+mad.f32 r3.x, c4.x, r4.x, c4.y
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r4.x (0:0,cm=f,il=16,b=0)
-; VERT: 47 instructions, 0 half, 5 full
+; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 29 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-106.asm b/reference/0ad-alpine-valley/0ad-106.asm
index f6c10a2..e8e697e 100644
--- a/reference/0ad-alpine-valley/0ad-106.asm
+++ b/reference/0ad-alpine-valley/0ad-106.asm
@@ -6,43 +6,40 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
-bary.f r1.x, 5, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r2.x, r0.w
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r2.y, r1.x
-(rpt3)nop
-sam (f32)(xyz)r0.w, r1.y, s#1, t#1
-(sy)add.f r0.x, c1.y, (neg)r1.y
-sam (f32)(xyzw)r2.z, r0.y, s#0, t#0
-(ss)add.f r0.y, c1.y, (neg)r1.x
-add.f r0.z, c1.y, (neg)r0.w
-(sy)mul.f r1.w, c0.w, r3.y
-mul.f r0.x, r0.x, r3.x
-mul.f r1.y, r1.y, c0.z
-mul.f r0.y, r0.y, r2.w
-mul.f r0.z, r0.z, r2.z
-mul.f r1.x, r1.x, c0.y
-add.f r0.x, r1.y, r0.x
-sam (f32)(w)r2.x, r2.x, s#2, t#2
-mul.f r0.w, r0.w, c0.x
+bary.f r0.w, 1, r0.x
+bary.f r1.x, 4, r0.x
+bary.f (ei)r1.y, 5, r0.x
+mov.f32f32 r0.x, r0.z
+mov.f32f32 r0.y, r0.w
(rpt1)nop
-(sy)mul.f r1.z, r0.x, r2.w
-add.f r0.x, r1.x, r0.y
-add.f r0.y, r0.w, r0.z
+sam (f32)(xyz)r1.z, r0.z, s#1, t#1
+(sy)(ss)add.f r0.z, c1.y, (neg)r2.x
+mul.f r0.w, r2.x, c0.z
+mul.f r2.x, r1.w, c0.y
+add.f r1.w, c1.y, (neg)r1.w
+sam (f32)(xyzw)r2.y, r0.x, s#0, t#0
+(sy)(ss)mul.f r0.x, r0.z, r2.w
+add.f r0.y, c1.y, (neg)r1.z
+mul.f r0.z, r1.z, c0.x
+mul.f r2.z, r1.w, r2.z
+add.f r0.x, r0.w, r0.x
+sam (f32)(w)r3.y, r1.x, s#2, t#2
+mul.f r0.y, r0.y, r2.y
+mul.f r1.w, c0.w, r3.x
+nop
+(sy)mul.f r1.z, r0.x, r4.x
+add.f r0.x, r2.x, r2.z
+add.f r0.y, r0.z, r0.y
(rpt1)nop
-mul.f r1.y, r0.x, r2.w
-mul.f r1.x, r0.y, r2.w
+(ss)mul.f r1.y, r0.x, r4.x
+mul.f r1.x, r0.y, r4.x
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:1,cm=f,il=12,b=1)
-; FRAG: 38 instructions, 0 half, 4 full
+; FRAG: 31 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-107.asm b/reference/0ad-alpine-valley/0ad-107.asm
index 8e79f89..270bdcf 100644
--- a/reference/0ad-alpine-valley/0ad-107.asm
+++ b/reference/0ad-alpine-valley/0ad-107.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r4.w) in3
@in(r1.x) in4
@in(r1.y) in5
@in(r1.z) in6
@in(r1.w) in7
-@in(r4.x) in8
-@in(r4.y) in9
-@in(r4.z) in10
-@in(r4.w) in11
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -28,55 +28,40 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r0.x, c3.x, r2.x
-mul.f r0.y, c2.x, r2.x
-mad.f32 r0.x, c3.y, r2.y, r0.x
-mad.f32 r0.y, c2.y, r2.y, r0.y
-mul.f r0.z, c1.x, r2.x
-mul.f r0.w, c0.x, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.z, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r3.x, c1.y, r2.y, r0.z
-mad.f32 r2.y, c0.y, r2.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c2.w, r2.w, r0.y
-mov.f32f32 r0.x, r3.x
-mov.f32f32 r0.y, r2.y
-mad.f32 r0.x, c1.z, r2.z, r0.x
-mad.f32 r0.y, c0.z, r2.z, r0.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r0.y, c1.w, r2.w, r0.x
-mad.f32 r0.x, c0.w, r2.w, r2.y
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.x, c3.x, r4.x
+mul.f r0.y, c2.x, r4.x
+mad.f32 r0.x, c3.y, r4.y, r0.x
+mad.f32 r0.y, c2.y, r4.y, r0.y
+mad.f32 r0.x, c3.z, r4.z, r0.x
+mad.f32 r0.y, c2.z, r4.z, r0.y
+mad.f32 r0.w, c3.w, r4.w, r0.x
+mad.f32 r0.z, c2.w, r4.w, r0.y
+mul.f r0.x, c1.x, r4.x
+mul.f r0.y, c0.x, r4.x
+mad.f32 r0.x, c1.y, r4.y, r0.x
+mad.f32 r0.y, c0.y, r4.y, r0.y
+mad.f32 r0.x, c1.z, r4.z, r0.x
+mad.f32 r3.x, c0.z, r4.z, r0.y
+mad.f32 r0.y, c1.w, r4.w, r0.x
+mad.f32 r0.x, c0.w, r4.w, r3.x
max.f r1.w, r1.w, c5.x
max.f r1.z, r1.z, c5.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-min.f r1.w, r1.w, c5.y
-min.f r1.z, r1.z, c5.y
max.f r1.y, r1.y, c5.x
max.f r1.x, r1.x, c5.x
-(rpt1)nop
+min.f r1.w, r1.w, c5.y
+min.f r1.z, r1.z, c5.y
min.f r1.y, r1.y, c5.y
min.f r1.x, r1.x, c5.y
-mad.f32 r3.w, c4.x, r2.z, c4.y
-mad.f32 r3.z, c4.x, r2.z, c4.y
-mad.f32 r3.y, c4.x, r2.z, c4.y
-mad.f32 r3.x, c4.x, r2.x, c4.y
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r2.z, r4.z
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r2.x, r4.x
+mad.f32 r3.w, c4.x, r4.z, c4.y
+mad.f32 r3.z, c4.x, r4.z, c4.y
+mad.f32 r3.y, c4.x, r4.z, c4.y
+mad.f32 r3.x, c4.x, r4.x, c4.y
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r4.x (0:0,cm=f,il=16,b=0)
-; VERT: 47 instructions, 0 half, 5 full
+; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 29 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-115.asm b/reference/0ad-alpine-valley/0ad-115.asm
index 3be9d09..85a697c 100644
--- a/reference/0ad-alpine-valley/0ad-115.asm
+++ b/reference/0ad-alpine-valley/0ad-115.asm
@@ -6,44 +6,34 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r2.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r1.y, r0.x, c0.y
-add.f r0.y, r1.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r2.z, r0.y
-add.f r0.y, r1.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
+nop
+nop
+nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-alpine-valley/0ad-118.asm b/reference/0ad-alpine-valley/0ad-118.asm
index 3be9d09..85a697c 100644
--- a/reference/0ad-alpine-valley/0ad-118.asm
+++ b/reference/0ad-alpine-valley/0ad-118.asm
@@ -6,44 +6,34 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r2.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r1.y, r0.x, c0.y
-add.f r0.y, r1.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r2.z, r0.y
-add.f r0.y, r1.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
+nop
+nop
+nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-alpine-valley/0ad-121.asm b/reference/0ad-alpine-valley/0ad-121.asm
index 3be9d09..85a697c 100644
--- a/reference/0ad-alpine-valley/0ad-121.asm
+++ b/reference/0ad-alpine-valley/0ad-121.asm
@@ -6,44 +6,34 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r2.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r1.y, r0.x, c0.y
-add.f r0.y, r1.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r2.z, r0.y
-add.f r0.y, r1.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
+nop
+nop
+nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-alpine-valley/0ad-124.asm b/reference/0ad-alpine-valley/0ad-124.asm
index 3be9d09..85a697c 100644
--- a/reference/0ad-alpine-valley/0ad-124.asm
+++ b/reference/0ad-alpine-valley/0ad-124.asm
@@ -6,44 +6,34 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r2.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r1.y, r0.x, c0.y
-add.f r0.y, r1.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r2.z, r0.y
-add.f r0.y, r1.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
+nop
+nop
+nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-alpine-valley/0ad-127.asm b/reference/0ad-alpine-valley/0ad-127.asm
index 3be9d09..85a697c 100644
--- a/reference/0ad-alpine-valley/0ad-127.asm
+++ b/reference/0ad-alpine-valley/0ad-127.asm
@@ -6,44 +6,34 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r2.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r1.y, r0.x, c0.y
-add.f r0.y, r1.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r2.z, r0.y
-add.f r0.y, r1.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
+nop
+nop
+nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-alpine-valley/0ad-130.asm b/reference/0ad-alpine-valley/0ad-130.asm
index 3be9d09..85a697c 100644
--- a/reference/0ad-alpine-valley/0ad-130.asm
+++ b/reference/0ad-alpine-valley/0ad-130.asm
@@ -6,44 +6,34 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r2.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r1.y, r0.x, c0.y
-add.f r0.y, r1.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r2.z, r0.y
-add.f r0.y, r1.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
+nop
+nop
+nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-alpine-valley/0ad-133.asm b/reference/0ad-alpine-valley/0ad-133.asm
index 3be9d09..85a697c 100644
--- a/reference/0ad-alpine-valley/0ad-133.asm
+++ b/reference/0ad-alpine-valley/0ad-133.asm
@@ -6,44 +6,34 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r2.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -55,18 +45,18 @@ mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r1.y, r0.x, c0.y
-add.f r0.y, r1.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r2.z, r0.y
-add.f r0.y, r1.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
+nop
+nop
+nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-alpine-valley/0ad-136.asm b/reference/0ad-alpine-valley/0ad-136.asm
index 9114513..13ea129 100644
--- a/reference/0ad-alpine-valley/0ad-136.asm
+++ b/reference/0ad-alpine-valley/0ad-136.asm
@@ -6,23 +6,16 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 22 instructions, 0 half, 2 full
+; FRAG: 10 instructions, 0 half, 2 full
diff --git a/reference/0ad-alpine-valley/0ad-139.asm b/reference/0ad-alpine-valley/0ad-139.asm
index 9114513..13ea129 100644
--- a/reference/0ad-alpine-valley/0ad-139.asm
+++ b/reference/0ad-alpine-valley/0ad-139.asm
@@ -6,23 +6,16 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 22 instructions, 0 half, 2 full
+; FRAG: 10 instructions, 0 half, 2 full
diff --git a/reference/0ad-alpine-valley/0ad-142.asm b/reference/0ad-alpine-valley/0ad-142.asm
index c46fb79..7495bc9 100644
--- a/reference/0ad-alpine-valley/0ad-142.asm
+++ b/reference/0ad-alpine-valley/0ad-142.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)add.f r0.w, r0.w, c0.w
-(ss)add.f r0.z, r0.z, c0.z
+sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
+(sy)(ss)add.f r0.w, r0.w, c0.w
+add.f r0.z, r0.z, c0.z
add.f r0.y, r0.y, c0.y
add.f r0.x, r0.x, c0.x
-mul.f r0.w, r0.w, c1.w
-mul.f r0.z, r0.z, c1.z
-mul.f r0.y, r0.y, c1.y
-mul.f r0.x, r0.x, c1.x
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+mul.f r1.w, r0.w, c1.w
+mul.f r1.z, r0.z, c1.z
+mul.f r1.y, r0.y, c1.y
+mul.f r1.x, r0.x, c1.x
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 30 instructions, 0 half, 2 full
+; FRAG: 18 instructions, 0 half, 2 full
diff --git a/reference/0ad-alpine-valley/0ad-145.asm b/reference/0ad-alpine-valley/0ad-145.asm
index 9114513..13ea129 100644
--- a/reference/0ad-alpine-valley/0ad-145.asm
+++ b/reference/0ad-alpine-valley/0ad-145.asm
@@ -6,23 +6,16 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 22 instructions, 0 half, 2 full
+; FRAG: 10 instructions, 0 half, 2 full
diff --git a/reference/0ad-alpine-valley/0ad-148.asm b/reference/0ad-alpine-valley/0ad-148.asm
index 600d62a..a965927 100644
--- a/reference/0ad-alpine-valley/0ad-148.asm
+++ b/reference/0ad-alpine-valley/0ad-148.asm
@@ -6,31 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, c0.x
-mov.f32f32 r0.w, c0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.x, c0.x
-(rpt4)nop
-sam (f32)(w)r0.y, r0.y, s#0, t#0
-(sy)(ss)add.f r0.y, c0.y, (neg)r1.x
-mov.f32f32 r1.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r1.z, c0.x
+mov.f32f32 r1.y, c0.x
+mov.f32f32 r1.x, c0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.w, r0.x
+sam (f32)(w)r0.x, r0.z, s#0, t#0
+(sy)add.f r1.w, c0.y, (neg)r0.w
end
nop
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 31 instructions, 0 half, 2 full
+; FRAG: 11 instructions, 0 half, 2 full
diff --git a/reference/0ad-alpine-valley/0ad-151.asm b/reference/0ad-alpine-valley/0ad-151.asm
index 9f44609..1171e2e 100644
--- a/reference/0ad-alpine-valley/0ad-151.asm
+++ b/reference/0ad-alpine-valley/0ad-151.asm
@@ -6,19 +6,16 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 1, r0.x
-bary.f (ei)r0.x, 0, r0.x
-mov.f32f32 r0.y, c0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+(sy)(ss)mov.f32f32 r1.w, c0.x
+bary.f r1.z, 2, r0.x
+bary.f r1.y, 1, r0.x
+bary.f (ei)r1.x, 0, r0.x
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 12 instructions, 0 half, 2 full
+; FRAG: 5 instructions, 0 half, 2 full
diff --git a/reference/0ad-alpine-valley/0ad-34.asm b/reference/0ad-alpine-valley/0ad-34.asm
index 63e7be5..22ca830 100644
--- a/reference/0ad-alpine-valley/0ad-34.asm
+++ b/reference/0ad-alpine-valley/0ad-34.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r2.y, r0.z, s#0, t#0
-(sy)cmps.f.lt r0.x, r3.x, c0.x
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r1.z, r2.w
-mov.f32f32 r1.y, r2.z
-cov.u32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.x, r2.y
-cmps.f.ne p0.x, r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, (0.000000)
+(rpt4)nop
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)cmps.f.lt r0.y, r1.w, c0.x
+(rpt2)nop
+cov.u32f32 r0.y, r0.y
+(rpt2)nop
+cmps.f.ne p0.x, r0.y, r0.x
(rpt5)nop
kill p0.x
end
nop
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 34 instructions, 0 half, 4 full
+; FRAG: 26 instructions, 0 half, 2 full
diff --git a/reference/0ad-alpine-valley/0ad-36.asm b/reference/0ad-alpine-valley/0ad-36.asm
index 42c6c0a..f362ccd 100644
--- a/reference/0ad-alpine-valley/0ad-36.asm
+++ b/reference/0ad-alpine-valley/0ad-36.asm
@@ -3,8 +3,8 @@
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
+@in(r1.x) in4
+@in(r1.y) in5
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -13,159 +13,124 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)floor.f r1.y, c11.z
+@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r0.w, c11.z
floor.f r1.z, c11.x
absneg.f r1.w, (abs)c14.x
absneg.f r2.x, (abs)c14.y
-add.f r1.y, c11.z, (neg)r1.y
+add.f r0.w, c11.z, (neg)r0.w
add.f r1.z, c11.x, (neg)r1.z
mul.f r2.y, r0.x, r0.z
add.f r1.w, r1.w, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r0.y, c17.x
-mov.f32f32 r1.w, r1.w
-max.f r1.y, r1.y, c15.y
+max.f r0.w, r0.w, c15.y
max.f r1.z, r1.z, c15.y
-mul.f r2.x, r2.y, r2.x
-mul.f r2.y, c13.x, r1.w
-min.f r1.y, r1.y, c19.y
+mul.f r2.x, r0.y, c17.x
+mul.f r2.z, c13.x, r1.w
+min.f r0.w, r0.w, c19.y
min.f r1.z, r1.z, c19.y
-mul.f r2.z, c8.z, r0.x
-mov.f32f32 r2.x, r2.x
-max.f r1.y, r1.y, c15.x
+mul.f r2.w, c8.z, r0.x
+mov.f32f32 r1.w, r1.w
+max.f r0.w, r0.w, c15.x
max.f r1.z, r1.z, c15.x
-mad.f32 r2.z, c9.z, r0.y, r2.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c10.z, r0.z, r2.z
-mul.f r2.w, c8.x, r0.x
-mul.f r1.y, c13.x, r1.y
-mad.f32 r2.w, c9.x, r0.y, r2.w
-add.f r2.z, r2.z, c11.z
-mad.f32 r2.w, c10.x, r0.z, r2.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.y, c15.w, r2.y, r2.z
-mov.f32f32 r2.x, r2.x
-add.f r2.w, r2.w, c11.x
-mad.f32 r1.y, c15.z, r1.y, c11.x
-mad.f32 r1.z, c13.x, r1.z, r2.w
-mov.f32f32 r2.y, r2.y
-max.f r2.x, r2.x, c15.y
-mov.f32f32 r1.y, r1.y
+mul.f r3.x, c8.x, r0.x
+mad.f32 r2.w, c9.z, r0.y, r2.w
+mul.f r0.w, c13.x, r0.w
+mad.f32 r3.x, c9.x, r0.y, r3.x
+mad.f32 r2.w, c10.z, r0.z, r2.w
+mad.f32 r3.x, c10.x, r0.z, r3.x
+mad.f32 r0.w, c15.z, r0.w, c11.x
+max.f r1.w, r1.w, c17.z
+mul.f r2.x, r2.y, r2.x
+add.f r2.y, r3.x, c11.x
+add.f r0.w, r0.w, c16.x
+mad.f32 r1.z, c13.x, r1.z, r2.y
+add.f r2.w, r2.w, c11.z
+min.f r1.w, r1.w, c17.w
+floor.f r3.x, r0.w
add.f r1.z, r1.z, c16.x
-mov.f32f32 r2.y, r2.y
-min.f r2.x, r2.x, c19.y
-mov.f32f32 r1.y, r1.y
+mad.f32 r2.z, c15.w, r2.z, r2.w
+mov.f32f32 r3.y, r1.w
+add.f r0.w, r0.w, (neg)r3.x
floor.f r3.x, r1.z
-add.f r2.y, r2.y, c16.x
-min.f r2.x, r2.x, c17.y
-add.f r1.y, r1.y, c16.x
+add.f r2.z, r2.z, c16.x
+max.f r2.x, r2.x, c15.y
+mad.f32 r0.w, c16.y, r0.w, c16.z
add.f r1.z, r1.z, (neg)r3.x
-floor.f r3.x, r2.y
-mov.f32f32 r2.x, r2.x
-floor.f r3.y, r1.y
+floor.f r3.x, r2.z
+min.f r2.x, r2.x, c19.y
+absneg.f r0.w, (abs)r0.w
mad.f32 r1.z, c16.y, r1.z, c16.z
-add.f r2.y, r2.y, (neg)r3.x
-mul.f r3.x, r0.y, c18.x
-add.f r1.y, r1.y, (neg)r3.y
+add.f r2.z, r2.z, (neg)r3.x
+min.f r2.x, r2.x, c17.y
+mul.f r3.x, c16.y, r0.w
absneg.f r1.z, (abs)r1.z
-mad.f32 r2.y, c16.y, r2.y, c16.z
-mov.f32f32 r3.x, r3.x
-mad.f32 r1.y, c16.y, r1.y, c16.z
-mul.f r3.y, c16.y, r1.z
-absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c16.y, r2.z, c16.z
+mul.f r0.w, r0.w, r0.w
+add.f r3.x, c16.w, (neg)r3.x
+mul.f r3.z, c16.y, r1.z
+absneg.f r2.z, (abs)r2.z
mul.f r1.z, r1.z, r1.z
-absneg.f r1.y, (abs)r1.y
-add.f r3.y, c16.w, (neg)r3.y
-mul.f r3.z, c16.y, r2.y
-mul.f r2.y, r2.y, r2.y
-mul.f r3.w, c16.y, r1.y
-mul.f r1.z, r1.z, r3.y
-add.f r3.y, c16.w, (neg)r3.z
-mul.f r1.y, r1.y, r1.y
-add.f r3.z, c16.w, (neg)r3.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.y, r2.y, r3.y
-mov.f32f32 r3.x, r3.x
-mul.f r1.y, r1.y, r3.z
-mul.f r1.z, r1.z, r2.x
-mov.f32f32 r2.y, r2.y
+mul.f r0.w, r0.w, r3.x
+mul.f r3.x, r0.y, c18.x
+add.f r3.z, c16.w, (neg)r3.z
+mul.f r3.w, c16.y, r2.z
+mul.f r2.z, r2.z, r2.z
max.f r3.x, r3.x, c15.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r2.y, r2.x
-min.f r2.y, r3.x, c19.y
-max.f r1.w, r1.w, c17.z
-mul.f r3.x, c8.y, r0.x
+mul.f r1.z, r1.z, r3.z
+mov.f32f32 r3.z, r2.x
+add.f r3.w, c16.w, (neg)r3.w
+min.f r3.x, r3.x, c19.y
+mul.f r4.x, c8.y, r0.x
mul.f r0.x, c8.w, r0.x
-min.f r2.y, r2.y, c15.w
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.x, c9.y, r0.y, r3.x
+mad.f32 r4.x, c9.y, r0.y, r4.x
+min.f r3.x, r3.x, c15.w
+mul.f r1.z, r1.z, r3.z
+mul.f r2.z, r2.z, r3.w
+mad.f32 r3.z, c10.y, r0.z, r4.x
+mul.f r0.w, r0.w, r3.x
+mov.f32f32 r3.x, r1.z
+mul.f r2.x, r2.z, r2.x
+add.f r2.z, r3.z, c11.y
+mov.f32f32 r3.z, r0.w
+mad.f32 r0.w, c14.y, r0.w, r1.z
+mad.f32 r1.z, r2.x, r1.w, r2.w
+mad.f32 r1.w, r2.x, r3.y, r2.y
+mad.f32 r2.y, c14.x, r3.z, r3.x
+mad.f32 r2.x, r2.x, r3.y, r2.z
mad.f32 r0.x, c9.w, r0.y, r0.x
-mov.f32f32 r0.y, r2.y
-min.f r1.w, r1.w, c17.w
-mad.f32 r2.y, c10.y, r0.z, r3.x
-mad.f32 r0.x, c10.w, r0.z, r0.x
-mul.f r0.y, r1.y, r0.y
-mov.f32f32 r0.z, r1.w
-add.f r1.y, r2.y, c11.y
-add.f r0.x, r0.x, c11.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r2.x, r0.z, r2.w
-mad.f32 r2.y, r2.x, r0.z, r1.y
-mad.f32 r0.z, r2.x, r0.z, r2.z
-mad.f32 r1.y, c14.x, r0.y, r1.z
-mad.f32 r0.y, c14.y, r0.y, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.w
-add.f r0.w, r1.w, r1.z
-add.f r0.y, r0.z, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mul.f r0.w, c0.w, r0.z
-mul.f r1.z, c0.z, r0.z
-mad.f32 r0.w, c1.w, r2.y, r0.w
-mad.f32 r1.z, c1.z, r2.y, r1.z
-mul.f r1.w, c0.y, r0.z
-mul.f r0.z, c0.x, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c2.w, r0.y, r0.w
-mad.f32 r1.z, c2.z, r0.y, r1.z
-mad.f32 r0.w, c3.w, r0.x, r2.x
-mad.f32 r2.z, c3.z, r0.x, r1.z
-mad.f32 r1.w, c1.y, r2.y, r1.w
-mad.f32 r0.z, c1.x, r2.y, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r2.y
-mad.f32 r1.w, c2.y, r0.y, r1.w
-mad.f32 r0.y, c2.x, r0.y, r2.z
-mad.f32 r2.y, c3.y, r0.x, r1.w
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r0.x, r0.x
+nop
+add.f r0.y, r1.w, r2.y
+add.f r2.y, r1.z, r0.w
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
+mov.f32f32 r0.w, r0.y
+mul.f r0.y, c0.x, r0.y
+mov.f32f32 r2.z, r2.y
+mad.f32 r0.x, c10.w, r0.z, r0.x
+mul.f r0.z, c0.w, r0.w
+mul.f r1.z, c0.z, r0.w
+mad.f32 r0.z, c1.w, r2.x, r0.z
+mad.f32 r1.z, c1.z, r2.x, r1.z
+mad.f32 r1.w, c2.w, r2.z, r0.z
+add.f r2.w, r0.x, c11.w
+mad.f32 r1.z, c2.z, r2.z, r1.z
+mul.f r0.x, c0.y, r0.w
+mad.f32 r0.y, c1.x, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.w, r1.w
+mad.f32 r0.z, c3.z, r2.w, r1.z
+mad.f32 r0.x, c1.y, r2.x, r0.x
+mad.f32 r0.y, c2.x, r2.y, r0.y
+mad.f32 r2.x, c2.y, r2.z, r0.x
+mad.f32 r0.x, c3.x, r2.w, r0.y
+mad.f32 r0.y, c3.y, r2.w, r2.x
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0)
-; VERT: 152 instructions, 0 half, 4 full
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0)
+; VERT: 110 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-37.asm b/reference/0ad-alpine-valley/0ad-37.asm
index 63e7be5..22ca830 100644
--- a/reference/0ad-alpine-valley/0ad-37.asm
+++ b/reference/0ad-alpine-valley/0ad-37.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r2.y, r0.z, s#0, t#0
-(sy)cmps.f.lt r0.x, r3.x, c0.x
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r1.z, r2.w
-mov.f32f32 r1.y, r2.z
-cov.u32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.x, r2.y
-cmps.f.ne p0.x, r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, (0.000000)
+(rpt4)nop
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)cmps.f.lt r0.y, r1.w, c0.x
+(rpt2)nop
+cov.u32f32 r0.y, r0.y
+(rpt2)nop
+cmps.f.ne p0.x, r0.y, r0.x
(rpt5)nop
kill p0.x
end
nop
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 34 instructions, 0 half, 4 full
+; FRAG: 26 instructions, 0 half, 2 full
diff --git a/reference/0ad-alpine-valley/0ad-38.asm b/reference/0ad-alpine-valley/0ad-38.asm
index 42c6c0a..f362ccd 100644
--- a/reference/0ad-alpine-valley/0ad-38.asm
+++ b/reference/0ad-alpine-valley/0ad-38.asm
@@ -3,8 +3,8 @@
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
+@in(r1.x) in4
+@in(r1.y) in5
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -13,159 +13,124 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)floor.f r1.y, c11.z
+@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r0.w, c11.z
floor.f r1.z, c11.x
absneg.f r1.w, (abs)c14.x
absneg.f r2.x, (abs)c14.y
-add.f r1.y, c11.z, (neg)r1.y
+add.f r0.w, c11.z, (neg)r0.w
add.f r1.z, c11.x, (neg)r1.z
mul.f r2.y, r0.x, r0.z
add.f r1.w, r1.w, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r0.y, c17.x
-mov.f32f32 r1.w, r1.w
-max.f r1.y, r1.y, c15.y
+max.f r0.w, r0.w, c15.y
max.f r1.z, r1.z, c15.y
-mul.f r2.x, r2.y, r2.x
-mul.f r2.y, c13.x, r1.w
-min.f r1.y, r1.y, c19.y
+mul.f r2.x, r0.y, c17.x
+mul.f r2.z, c13.x, r1.w
+min.f r0.w, r0.w, c19.y
min.f r1.z, r1.z, c19.y
-mul.f r2.z, c8.z, r0.x
-mov.f32f32 r2.x, r2.x
-max.f r1.y, r1.y, c15.x
+mul.f r2.w, c8.z, r0.x
+mov.f32f32 r1.w, r1.w
+max.f r0.w, r0.w, c15.x
max.f r1.z, r1.z, c15.x
-mad.f32 r2.z, c9.z, r0.y, r2.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c10.z, r0.z, r2.z
-mul.f r2.w, c8.x, r0.x
-mul.f r1.y, c13.x, r1.y
-mad.f32 r2.w, c9.x, r0.y, r2.w
-add.f r2.z, r2.z, c11.z
-mad.f32 r2.w, c10.x, r0.z, r2.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.y, c15.w, r2.y, r2.z
-mov.f32f32 r2.x, r2.x
-add.f r2.w, r2.w, c11.x
-mad.f32 r1.y, c15.z, r1.y, c11.x
-mad.f32 r1.z, c13.x, r1.z, r2.w
-mov.f32f32 r2.y, r2.y
-max.f r2.x, r2.x, c15.y
-mov.f32f32 r1.y, r1.y
+mul.f r3.x, c8.x, r0.x
+mad.f32 r2.w, c9.z, r0.y, r2.w
+mul.f r0.w, c13.x, r0.w
+mad.f32 r3.x, c9.x, r0.y, r3.x
+mad.f32 r2.w, c10.z, r0.z, r2.w
+mad.f32 r3.x, c10.x, r0.z, r3.x
+mad.f32 r0.w, c15.z, r0.w, c11.x
+max.f r1.w, r1.w, c17.z
+mul.f r2.x, r2.y, r2.x
+add.f r2.y, r3.x, c11.x
+add.f r0.w, r0.w, c16.x
+mad.f32 r1.z, c13.x, r1.z, r2.y
+add.f r2.w, r2.w, c11.z
+min.f r1.w, r1.w, c17.w
+floor.f r3.x, r0.w
add.f r1.z, r1.z, c16.x
-mov.f32f32 r2.y, r2.y
-min.f r2.x, r2.x, c19.y
-mov.f32f32 r1.y, r1.y
+mad.f32 r2.z, c15.w, r2.z, r2.w
+mov.f32f32 r3.y, r1.w
+add.f r0.w, r0.w, (neg)r3.x
floor.f r3.x, r1.z
-add.f r2.y, r2.y, c16.x
-min.f r2.x, r2.x, c17.y
-add.f r1.y, r1.y, c16.x
+add.f r2.z, r2.z, c16.x
+max.f r2.x, r2.x, c15.y
+mad.f32 r0.w, c16.y, r0.w, c16.z
add.f r1.z, r1.z, (neg)r3.x
-floor.f r3.x, r2.y
-mov.f32f32 r2.x, r2.x
-floor.f r3.y, r1.y
+floor.f r3.x, r2.z
+min.f r2.x, r2.x, c19.y
+absneg.f r0.w, (abs)r0.w
mad.f32 r1.z, c16.y, r1.z, c16.z
-add.f r2.y, r2.y, (neg)r3.x
-mul.f r3.x, r0.y, c18.x
-add.f r1.y, r1.y, (neg)r3.y
+add.f r2.z, r2.z, (neg)r3.x
+min.f r2.x, r2.x, c17.y
+mul.f r3.x, c16.y, r0.w
absneg.f r1.z, (abs)r1.z
-mad.f32 r2.y, c16.y, r2.y, c16.z
-mov.f32f32 r3.x, r3.x
-mad.f32 r1.y, c16.y, r1.y, c16.z
-mul.f r3.y, c16.y, r1.z
-absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c16.y, r2.z, c16.z
+mul.f r0.w, r0.w, r0.w
+add.f r3.x, c16.w, (neg)r3.x
+mul.f r3.z, c16.y, r1.z
+absneg.f r2.z, (abs)r2.z
mul.f r1.z, r1.z, r1.z
-absneg.f r1.y, (abs)r1.y
-add.f r3.y, c16.w, (neg)r3.y
-mul.f r3.z, c16.y, r2.y
-mul.f r2.y, r2.y, r2.y
-mul.f r3.w, c16.y, r1.y
-mul.f r1.z, r1.z, r3.y
-add.f r3.y, c16.w, (neg)r3.z
-mul.f r1.y, r1.y, r1.y
-add.f r3.z, c16.w, (neg)r3.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.y, r2.y, r3.y
-mov.f32f32 r3.x, r3.x
-mul.f r1.y, r1.y, r3.z
-mul.f r1.z, r1.z, r2.x
-mov.f32f32 r2.y, r2.y
+mul.f r0.w, r0.w, r3.x
+mul.f r3.x, r0.y, c18.x
+add.f r3.z, c16.w, (neg)r3.z
+mul.f r3.w, c16.y, r2.z
+mul.f r2.z, r2.z, r2.z
max.f r3.x, r3.x, c15.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r2.y, r2.x
-min.f r2.y, r3.x, c19.y
-max.f r1.w, r1.w, c17.z
-mul.f r3.x, c8.y, r0.x
+mul.f r1.z, r1.z, r3.z
+mov.f32f32 r3.z, r2.x
+add.f r3.w, c16.w, (neg)r3.w
+min.f r3.x, r3.x, c19.y
+mul.f r4.x, c8.y, r0.x
mul.f r0.x, c8.w, r0.x
-min.f r2.y, r2.y, c15.w
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.x, c9.y, r0.y, r3.x
+mad.f32 r4.x, c9.y, r0.y, r4.x
+min.f r3.x, r3.x, c15.w
+mul.f r1.z, r1.z, r3.z
+mul.f r2.z, r2.z, r3.w
+mad.f32 r3.z, c10.y, r0.z, r4.x
+mul.f r0.w, r0.w, r3.x
+mov.f32f32 r3.x, r1.z
+mul.f r2.x, r2.z, r2.x
+add.f r2.z, r3.z, c11.y
+mov.f32f32 r3.z, r0.w
+mad.f32 r0.w, c14.y, r0.w, r1.z
+mad.f32 r1.z, r2.x, r1.w, r2.w
+mad.f32 r1.w, r2.x, r3.y, r2.y
+mad.f32 r2.y, c14.x, r3.z, r3.x
+mad.f32 r2.x, r2.x, r3.y, r2.z
mad.f32 r0.x, c9.w, r0.y, r0.x
-mov.f32f32 r0.y, r2.y
-min.f r1.w, r1.w, c17.w
-mad.f32 r2.y, c10.y, r0.z, r3.x
-mad.f32 r0.x, c10.w, r0.z, r0.x
-mul.f r0.y, r1.y, r0.y
-mov.f32f32 r0.z, r1.w
-add.f r1.y, r2.y, c11.y
-add.f r0.x, r0.x, c11.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r2.x, r0.z, r2.w
-mad.f32 r2.y, r2.x, r0.z, r1.y
-mad.f32 r0.z, r2.x, r0.z, r2.z
-mad.f32 r1.y, c14.x, r0.y, r1.z
-mad.f32 r0.y, c14.y, r0.y, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.w
-add.f r0.w, r1.w, r1.z
-add.f r0.y, r0.z, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mul.f r0.w, c0.w, r0.z
-mul.f r1.z, c0.z, r0.z
-mad.f32 r0.w, c1.w, r2.y, r0.w
-mad.f32 r1.z, c1.z, r2.y, r1.z
-mul.f r1.w, c0.y, r0.z
-mul.f r0.z, c0.x, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c2.w, r0.y, r0.w
-mad.f32 r1.z, c2.z, r0.y, r1.z
-mad.f32 r0.w, c3.w, r0.x, r2.x
-mad.f32 r2.z, c3.z, r0.x, r1.z
-mad.f32 r1.w, c1.y, r2.y, r1.w
-mad.f32 r0.z, c1.x, r2.y, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r2.y
-mad.f32 r1.w, c2.y, r0.y, r1.w
-mad.f32 r0.y, c2.x, r0.y, r2.z
-mad.f32 r2.y, c3.y, r0.x, r1.w
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r0.x, r0.x
+nop
+add.f r0.y, r1.w, r2.y
+add.f r2.y, r1.z, r0.w
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
+mov.f32f32 r0.w, r0.y
+mul.f r0.y, c0.x, r0.y
+mov.f32f32 r2.z, r2.y
+mad.f32 r0.x, c10.w, r0.z, r0.x
+mul.f r0.z, c0.w, r0.w
+mul.f r1.z, c0.z, r0.w
+mad.f32 r0.z, c1.w, r2.x, r0.z
+mad.f32 r1.z, c1.z, r2.x, r1.z
+mad.f32 r1.w, c2.w, r2.z, r0.z
+add.f r2.w, r0.x, c11.w
+mad.f32 r1.z, c2.z, r2.z, r1.z
+mul.f r0.x, c0.y, r0.w
+mad.f32 r0.y, c1.x, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.w, r1.w
+mad.f32 r0.z, c3.z, r2.w, r1.z
+mad.f32 r0.x, c1.y, r2.x, r0.x
+mad.f32 r0.y, c2.x, r2.y, r0.y
+mad.f32 r2.x, c2.y, r2.z, r0.x
+mad.f32 r0.x, c3.x, r2.w, r0.y
+mad.f32 r0.y, c3.y, r2.w, r2.x
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0)
-; VERT: 152 instructions, 0 half, 4 full
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0)
+; VERT: 110 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-40.asm b/reference/0ad-alpine-valley/0ad-40.asm
index c9ec6ed..df4bfc0 100644
--- a/reference/0ad-alpine-valley/0ad-40.asm
+++ b/reference/0ad-alpine-valley/0ad-40.asm
@@ -6,59 +6,32 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 1, r0.x
-bary.f r0.w, 0, r0.x
-bary.f (ei)r0.x, 2, r0.x
+@const(c0.x) 0x3e800000, 0x40800000, 0x00000000, 0x00000000
+(sy)(ss)bary.f r0.w, 1, r0.x
+bary.f r0.z, 0, r0.x
+bary.f (ei)r1.x, 2, r0.x
nop
-add.f r0.y, c0.x, (neg)r0.z
-mov.f32f32 r0.w, r0.w
-cmps.f.lt r1.x, c0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mul.f r0.y, r0.y, c0.y
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.z, r0.z
-cov.u32f32 r0.w, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r1.z, r0.z
+add.f r0.x, c0.x, (neg)r0.w
+cmps.f.lt r0.y, c0.z, r0.w
+(rpt1)nop
+mul.f r0.x, r0.x, c0.y
+sam.3d (f32)(xyzw)r2.x, r0.z, s#0, t#0
+cov.u32f32 r0.y, r0.y
+(rpt1)nop
+(ss)mov.f32f32 r0.z, r0.x
+(sy)mul.f r0.x, r2.x, r0.x
+cmps.f.ne r0.y, r0.y, c0.z
nop
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.z, r0.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.y, r0.y
-cmps.f.ne r0.w, r0.w, c0.z
-sam.3d (f32)(xyzw)r1.y, r1.y, s#0, t#0
-(sy)mov.f32f32 r2.y, r2.x
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r3.x, r1.y
-mul.f r0.x, r2.y, r0.x
-mul.f r0.z, r2.z, r0.z
-mul.f r1.x, r2.w, r1.x
-mul.f r0.y, r3.x, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.x, r2.x
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r0.x, r0.w, r2.x
-mov.f32f32 r2.x, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r0.x
-sel.b32 r0.x, r0.z, r0.w, r2.x
-sel.b32 r0.z, r1.x, r0.w, r1.z
-sel.b32 r0.y, r0.y, r0.w, r1.y
+mul.f r0.w, r2.w, r0.z
+mul.f r1.x, r2.z, r0.z
+mul.f r0.z, r2.y, r0.z
nop
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.y
+sel.b32 r1.w, r0.w, r0.y, r2.w
+sel.b32 r1.z, r1.x, r0.y, r2.z
+sel.b32 r1.y, r0.z, r0.y, r2.y
+sel.b32 r1.x, r0.x, r0.y, r2.x
end
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 52 instructions, 0 half, 4 full
+; FRAG: 26 instructions, 0 half, 3 full
diff --git a/reference/0ad-alpine-valley/0ad-46.asm b/reference/0ad-alpine-valley/0ad-46.asm
index b06167e..eb8f852 100644
--- a/reference/0ad-alpine-valley/0ad-46.asm
+++ b/reference/0ad-alpine-valley/0ad-46.asm
@@ -8,199 +8,131 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c8.x) 0x3f000000, 0x00000000, 0x3f800000, 0xba03126f
+@const(c9.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3fb8aa65
+@const(c10.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 0, r1.x
add.f r0.y, r0.w, c8.y
bary.f r0.w, 1, r1.x
bary.f r1.z, 4, r1.x
-add.f r1.w, r0.x, c9.x
-bary.f r2.x, 6, r1.x
-bary.f r2.y, 2, r1.x
-add.f r2.z, r0.w, c9.x
-floor.f r2.w, r1.w
+add.f r2.x, r0.x, c9.x
+bary.f r1.w, 5, r1.x
+add.f r2.y, r0.w, c9.x
+bary.f r2.z, 2, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c8.y
-mov.f32f32 r1.z, r1.z
-floor.f r3.x, r2.z
-add.f r1.w, r1.w, (neg)r2.w
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c8.w
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-mov.f32f32 r3.y, r1.z
-add.f r0.z, r2.z, (neg)r3.x
-mov.f32f32 r1.z, r1.w
+absneg.f r0.z, (neg)c6.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c8.z
+mul.f r0.z, r0.z, c6.x
+sam (f32)(w)r4.x, r1.z, s#1, t#1
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c8.x, r2.z
+add.f r2.z, c9.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r1.w, (neg)c6.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c8.x, r1.z
-add.f r2.w, c9.y, (neg)r1.z
-mul.f r1.w, r1.w, c6.x
-add.f r3.x, c9.y, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c8.x, r0.z
-mul.f r1.w, r1.w, r0.y
-mov.f32f32 r2.w, r2.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r2.z
-mul.f r0.y, r1.w, r0.y
-mul.f r1.w, r2.w, r3.x
-add.f r2.z, c9.x, r0.x
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c8.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c9.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c9.w
-add.f r3.z, c9.z, r0.w
-mul.f r2.z, r2.z, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r2.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-add.f r0.w, c9.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c11.y, (neg)r0.y
-mov.f32f32 r5.w, r3.w
-mul.f r3.z, r3.z, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c6.y
-mul.f r0.y, r0.y, c8.z
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.z, r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r4.z, r4.x
-add.f r2.y, r2.y, c8.w
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r3.w
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r4.w, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r5.z, r0.y
-sam.s (f32)(x)r3.z, r4.y, s#2, t#2
-(sy)mov.f32f32 r0.y, r3.z
+add.f r0.z, c9.x, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c3.z
+add.f r0.x, c9.z, r0.w
+mul.f r4.x, r0.z, c3.z
+add.f r0.z, c9.x, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c3.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c11.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#2, t#2
+add.f r0.z, c9.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#2, t#2
+mul.f r0.x, r0.x, c8.z
+add.f r0.w, r2.y, c8.z
+mul.f r0.y, r0.y, c6.y
+(ss)nop
+sam.s (f32)(x)r5.x, r5.w, s#2, t#2
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#2, t#2
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c10.y
+bary.f r2.x, 6, r1.x
+mul.f r0.y, r0.y, r5.x
max.f r0.x, r0.x, c8.y
-mov.f32f32 r7.x, r0.w
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
min.f r0.x, r0.x, c8.z
-sam.s (f32)(x)r3.z, r5.w, s#2, t#2
-nop
-(sy)mov.f32f32 r1.w, r3.z
-mul.f r0.y, r0.w, r0.y
-sam.s (f32)(x)r3.z, r5.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r3.z
-add.f r1.z, r1.z, c8.z
-add.f r2.y, c11.y, (neg)r0.x
-add.f r2.z, c11.y, (neg)r0.x
-add.f r3.z, c11.y, (neg)r0.x
-mul.f r3.x, r1.z, r3.x
-mul.f r2.y, r2.y, c5.z
-mul.f r2.z, r2.z, c5.y
-mul.f r3.w, r3.z, c5.x
-mov.f32f32 r3.x, r3.x
-sam.s (f32)(x)r4.x, r6.z, s#2, t#2
-add.f r0.z, r0.z, c8.z
-(sy)mov.f32f32 r3.z, r4.x
-bary.f r4.x, 5, r1.x
-mad.f32 r0.y, r3.x, r0.w, r0.y
-mul.f r0.w, r2.w, r0.z
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c8.y
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.z, r1.z, r0.z
-mov.f32f32 r1.z, r4.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, r0.w, r3.z, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r4.x, r2.x
-mov.f32f32 r0.y, r0.y
-bary.f r0.w, 7, r1.x
-mad.f32 r0.y, r0.z, r1.w, r0.y
-mov.f32f32 r0.z, c8.z
-bary.f r1.z, 10, r1.x
-sam (f32)(w)r2.w, r3.y, s#1, t#1
-(sy)cmps.f.lt r1.w, r3.z, c10.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.w, r0.z
mul.f r0.y, c10.x, r0.y
-cov.u32f32 r0.z, r1.w
-mov.f32f32 r1.w, r2.x
-(ss)mov.f32f32 r4.y, r0.w
-mov.f32f32 r0.y, r0.y
-cmps.f.ne r0.z, r0.z, c8.y
-nop
-mov.f32f32 r0.w, c8.y
-bary.f r2.x, 9, r1.x
+bary.f r2.y, 7, r1.x
+add.f r0.w, c11.y, (neg)r0.x
+add.f r1.z, c11.y, (neg)r0.x
+mov.f32f32 r1.w, r0.y
+add.f r2.z, c11.y, (neg)r0.x
+(rpt1)nop
+sam (f32)(xyz)r2.w, r2.x, s#0, t#0
+(ss)bary.f r2.x, 10, r1.x
+bary.f r2.y, 9, r1.x
bary.f (ei)r1.x, 8, r1.x
-sam (f32)(xyz)r4.x, r4.x, s#0, t#0
-(sy)mul.f r1.y, r4.z, r1.z
-sel.b32 r0.z, r0.w, r0.z, r1.w
-mul.f r0.w, r4.y, r2.x
-mul.f r1.x, r4.x, r1.x
-mul.f r1.y, r1.y, r0.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-mul.f r0.w, r0.w, r0.y
-mad.f32 r1.y, c4.z, r4.z, r1.y
+mul.f r0.w, r0.w, c5.z
+(sy)mul.f r1.y, r3.y, r2.x
+mul.f r2.x, r3.x, r2.y
+mul.f r1.x, r2.w, r1.x
+mul.f r1.z, r1.z, c5.y
+mul.f r1.y, r1.y, r1.w
+mul.f r1.w, r2.x, r1.w
+mad.f32 r1.y, c4.z, r3.y, r1.y
+mad.f32 r1.w, c4.y, r3.x, r1.w
mul.f r0.y, r1.x, r0.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, c4.y, r4.y, r0.w
-mul.f r1.x, r0.x, r1.x
-mad.f32 r0.y, c4.x, r4.x, r0.y
-(rpt1)nop
-add.f r1.x, r1.x, r2.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
+mul.f r1.x, r2.z, c5.x
+mul.f r1.y, r0.x, r1.y
+mul.f r1.w, r0.x, r1.w
+mad.f32 r0.y, c4.x, r2.w, r0.y
+mov.f32f32 r2.x, c8.y
+add.f r0.w, r1.y, r0.w
+add.f r1.y, r1.w, r1.z
nop
-mul.f r1.x, r1.x, r0.z
-mul.f r0.w, r0.x, r0.w
+sel.b32 r0.z, r2.x, r0.z, r4.w
mul.f r0.x, r0.x, r0.y
+mov.f32f32 r2.w, c8.z
nop
-mov.f32f32 r0.y, r1.x
-add.f r0.w, r0.w, r2.z
-add.f r0.x, r0.x, r3.w
+mul.f r2.z, r0.w, r0.z
+mul.f r2.y, r1.y, r0.z
+add.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, r0.z
+end
nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r0.z
-mul.f r0.x, r0.x, r0.z
nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
-end
nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r0.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1)
-; FRAG: 198 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1)
+; FRAG: 121 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-49.asm b/reference/0ad-alpine-valley/0ad-49.asm
index bdc5151..2dd080b 100644
--- a/reference/0ad-alpine-valley/0ad-49.asm
+++ b/reference/0ad-alpine-valley/0ad-49.asm
@@ -8,6 +8,11 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c8.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097
+@const(c9.x) 0x3cff9724, 0xba03126f, 0xbf000000, 0x40000000
+@const(c10.x) 0x3f800000, 0xbf000000, 0x3fb8aa65, 0x3de38866
+@const(c11.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 9, r1.x
bary.f r0.y, 0, r1.x
add.f r0.w, r0.w, c8.y
@@ -16,275 +21,171 @@ mul.f r1.w, r0.x, r0.x
bary.f r2.x, 10, r1.x
add.f r2.y, r0.y, c9.z
add.f r2.z, r1.z, c9.z
-bary.f r2.w, 4, r1.x
+bary.f r3.x, 4, r1.x
mad.f32 r1.w, r2.x, r2.x, r1.w
-floor.f r3.x, r2.y
+bary.f r2.w, 11, r1.x
+floor.f r3.y, r2.y
rcp r0.w, r0.w
add.f r0.z, r0.z, c8.y
-floor.f r3.y, r2.z
-mov.f32f32 r1.w, r1.w
-bary.f r3.z, 11, r1.x
-add.f r2.y, r2.y, (neg)r3.x
+floor.f r3.z, r2.z
+mad.f32 r1.w, r2.w, r2.w, r1.w
+add.f r2.y, r2.y, (neg)r3.y
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.z, (neg)r3.y
-mad.f32 r1.w, r3.z, r3.z, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-absneg.f r2.z, (neg)c6.x
-mov.f32f32 r0.w, r0.w
-mul.f r3.x, c8.x, r2.y
-add.f r3.y, c9.w, (neg)r2.y
+(ss)absneg.f r0.w, (neg)c6.x
+add.f r2.z, r2.z, (neg)r3.z
+mov.f32f32 r3.y, r2.y
+add.f r2.y, r2.y, c8.z
rsq r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mul.f r2.z, r2.z, c6.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.w, c8.x, r0.w
-mul.f r0.x, r0.x, r1.w
-mul.f r2.z, r2.z, r0.z
-add.f r0.y, r0.y, (neg)r3.x
-mov.f32f32 r3.x, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.y, r0.y
-add.f r1.z, r1.z, (neg)r3.x
-absneg.f r0.x, (abs)r0.x
-mul.f r0.z, r2.z, r0.z
-add.f r2.z, c10.y, r0.y
-add.f r0.y, c10.x, r0.y
-mov.f32f32 r0.x, r0.x
+(ss)mov.f32f32 r3.z, r1.w
+mul.f r0.w, r0.w, c6.x
+mul.f r3.w, c8.x, r3.y
+mov.f32f32 r4.x, r2.z
+mul.f r0.x, r0.x, r3.z
+mul.f r0.w, r0.w, r0.z
mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.y, r0.y
+add.f r0.y, r0.y, (neg)r3.w
+absneg.f r0.x, (abs)r0.x
+mul.f r3.w, c8.x, r4.x
+mul.f r0.z, r0.w, r0.z
+add.f r0.w, c10.x, r0.y
add.f r0.x, r0.x, c8.w
+mov.f32f32 r0.y, r0.y
mul.f r0.z, r0.z, c10.z
-mul.f r2.z, r2.z, c3.z
-mul.f r0.y, r0.y, c3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r2.z
-mov.f32f32 r3.w, r0.y
+add.f r1.z, r1.z, (neg)r3.w
max.f r0.x, r0.x, c8.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.x, r3.x
-mov.f32f32 r0.x, r0.x
-mul.f r2.x, r2.x, r1.w
-mov.f32f32 r1.z, r1.z
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.w
-mov.f32f32 r2.x, r2.x
-add.f r3.w, c10.y, r1.z
-add.f r4.y, c12.y, (neg)r0.z
-mov.f32f32 r4.w, r3.x
-absneg.f r2.x, (abs)r2.x
-mov.f32f32 r3.x, r3.w
-mul.f r3.w, r4.y, c6.y
-mul.f r0.z, r0.z, c8.z
-mov.f32f32 r2.x, r2.x
-mul.f r3.x, r3.x, c3.w
+mul.f r2.x, r2.x, r3.z
+add.f r0.y, c10.y, r0.y
+mov.f32f32 r3.z, r1.z
+mul.f r4.y, r0.w, c3.z
+absneg.f r0.w, (abs)r2.x
+mul.f r5.x, r0.y, c3.z
+exp2 r0.y, r0.z
+(ss)mov.f32f32 r0.z, r0.y
+add.f r2.x, c10.y, r3.z
+add.f r0.w, r0.w, c8.w
+mov.f32f32 r5.w, r5.x
+add.f r0.z, c12.y, (neg)r0.z
+mul.f r6.w, r2.x, c3.w
+max.f r0.w, r0.w, c8.y
+mov.f32f32 r6.z, r4.y
add.f r1.z, c10.x, r1.z
-add.f r0.z, r0.z, r3.w
-add.f r2.x, r2.x, c8.w
-mov.f32f32 r3.w, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.y, r3.w
-bary.f r3.w, 2, r1.x
-mov.f32f32 r0.z, r0.z
-max.f r2.x, r2.x, c8.y
-mul.f r1.z, r1.z, c3.w
-add.f r3.w, r3.w, c9.y
-mov.f32f32 r5.z, r0.y
-mov.f32f32 r0.y, r2.x
-max.f r0.z, r0.z, c8.y
-mov.f32f32 r2.x, r3.w
-mov.f32f32 r5.x, r1.z
-add.f r6.x, r0.x, r0.y
-mul.f r1.w, r3.z, r1.w
-mov.f32f32 r4.z, r2.x
-min.f r0.z, r0.z, c8.z
-mov.f32f32 r2.x, r5.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r6.y, r2.z
-add.f r2.z, c12.y, (neg)r0.z
+mul.f r0.z, r0.z, c6.y
+add.f r2.x, r0.x, r0.w
+mul.f r1.w, r2.w, r1.w
+mov.f32f32 r6.x, r6.w
+mul.f r0.y, r0.y, c8.z
+bary.f r2.w, 2, r1.x
absneg.f r1.w, (abs)r1.w
-sam.s (f32)(x)r6.z, r4.x, s#2, t#2
-(sy)mov.f32f32 r3.z, r6.z
-(ss)add.f r4.x, c12.y, (neg)r0.z
-mul.f r2.z, r2.z, c5.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.y, r3.y
-add.f r4.y, c9.w, (neg)r0.w
-add.f r1.w, r1.w, c8.w
-mul.f r4.x, r4.x, c5.y
-add.f r4.z, c12.y, (neg)r0.z
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.x, r2.x
-mov.f32f32 r2.x, r3.w
-mov.f32f32 r5.w, r3.x
-max.f r1.w, r1.w, c8.y
-mul.f r3.x, r3.y, r4.y
-mul.f r4.z, r4.z, c5.x
-mov.f32f32 r5.y, r2.x
-mov.f32f32 r1.w, r1.w
-mul.f r2.x, r3.x, r3.z
-mov.f32f32 r3.x, r3.w
-mov.f32f32 r1.z, r1.z
-add.f r3.z, r6.x, r1.w
-add.f r2.y, r2.y, c8.z
-add.f r0.w, r0.w, c8.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r6.x, r3.x
-sam.s (f32)(x)r6.z, r4.w, s#2, t#2
-(rpt3)nop
-(sy)mov.f32f32 r3.x, r6.z
-(ss)rcp r4.w, r3.z
-(ss)mov.f32f32 r4.w, r4.w
-sam.s (f32)(x)r5.x, r5.z, s#2, t#2
-(sy)mov.f32f32 r5.x, r5.x
-mul.f r4.y, r2.y, r4.y
-rcp r5.y, r3.z
-(ss)mov.f32f32 r5.y, r5.y
-mul.f r0.x, r0.x, r4.w
-(ss)rcp r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-mad.f32 r2.x, r4.y, r5.x, r2.x
-mul.f r0.y, r0.y, r5.y
+mul.f r4.z, r1.z, c3.w
+add.f r0.y, r0.y, r0.z
+add.f r4.w, r2.w, c9.y
+add.f r0.z, r1.w, c8.w
+mov.f32f32 r5.y, r4.z
mov.f32f32 r0.x, r0.x
-bary.f r4.y, 13, r1.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r6.z, r1.z
-mov.f32f32 r1.z, r3.w
-mul.f r3.w, r4.y, c9.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.w, r1.w, r3.z
-mov.f32f32 r6.w, r1.z
-mov.f32f32 r1.z, r3.w
-mul.f r3.y, r3.y, r0.w
-mov.f32f32 r4.w, r2.w
-mul.f r0.w, r2.y, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-sam.s (f32)(x)r5.x, r6.y, s#2, t#2
-(sy)mov.f32f32 r2.y, r5.x
-bary.f r2.w, 5, r1.x
-mov.f32f32 r5.y, r1.z
-bary.f r1.z, 14, r1.x
-mad.f32 r2.x, r3.y, r2.y, r2.x
-mov.f32f32 r2.y, r2.w
-mov.f32f32 r2.w, r3.w
-mul.f r1.z, r1.z, c9.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r5.x, r2.y
-mov.f32f32 r3.y, r2.w
-mov.f32f32 r2.y, r1.z
-mad.f32 r0.w, r0.w, r3.x, r2.x
-mov.f32f32 r1.z, r1.z
-bary.f r2.x, 12, r1.x
-mov.f32f32 r2.y, r2.y
mov.f32f32 r0.w, r0.w
+max.f r0.z, r0.z, c8.y
+mov.f32f32 r6.y, r4.w
+max.f r0.y, r0.y, c8.y
+mov.f32f32 r7.x, r4.w
+mov.f32f32 r1.z, r0.z
+mov.f32f32 r5.z, r4.w
+min.f r0.y, r0.y, c8.z
+sam.s (f32)(x)r7.y, r4.y, s#2, t#2
+add.f r1.w, c9.w, (neg)r3.y
+add.f r1.z, r2.x, r1.z
+sam.s (f32)(x)r7.z, r5.w, s#2, t#2
+nop
+add.f r2.x, c12.y, (neg)r0.y
+add.f r2.w, c12.y, (neg)r0.y
+mov.f32f32 r3.y, r1.z
+mov.f32f32 r3.z, r1.w
+mul.f r2.x, r2.x, c5.z
+mul.f r3.w, r2.w, c5.y
+add.f r2.w, c12.y, (neg)r0.y
+add.f r4.x, c9.w, (neg)r4.x
+rcp r1.z, r1.z
+(ss)mul.f r0.z, r0.z, r1.z
+(ss)rcp r1.z, r3.y
+(ss)mul.f r0.x, r0.x, r1.z
+bary.f r1.z, 13, r1.x
+mov.f32f32 r4.y, r4.x
+(ss)rcp r3.y, r3.y
+(ss)mul.f r0.w, r0.w, r3.y
+mov.f32f32 r4.z, r0.x
+mul.f r5.w, r1.z, c9.x
+mul.f r1.z, r3.z, r4.y
+mov.f32f32 r3.z, r0.w
+mul.f r4.y, r2.w, c5.x
+mov.f32f32 r6.x, r5.w
+bary.f r2.w, 14, r1.x
+(sy)mul.f r1.z, r1.z, r7.z
+sam.s (f32)(x)r7.z, r6.z, s#2, t#2
+mov.f32f32 r4.w, r0.z
+mul.f r4.x, r2.y, r4.x
+mul.f r6.y, r2.w, c9.x
+sam.s (f32)(x)r7.w, r5.x, s#2, t#2
+add.f r2.z, r2.z, c8.z
+(ss)bary.f r3.y, 5, r1.x
+(sy)mad.f32 r1.z, r4.x, r7.z, r1.z
+bary.f r4.x, 12, r1.x
+mov.f32f32 r2.w, c8.z
+bary.f r5.x, 8, r1.x
+sam (f32)(xyzw)r8.x, r6.x, s#0, t#0
+(sy)mul.f r5.y, r8.z, r4.z
+(ss)mul.f r6.x, r4.x, c9.x
+mul.f r4.x, r8.y, r4.z
+mul.f r0.x, r8.x, r0.x
+mul.f r1.w, r1.w, r2.z
+mov.f32f32 r6.z, r6.x
+mul.f r2.y, r2.y, r2.z
+sam (f32)(w)r8.x, r3.x, s#1, t#1
+(sy)cmps.f.lt r2.z, r8.w, c11.x
+mad.f32 r1.z, r1.w, r7.w, r1.z
+sam (f32)(xyzw)r7.z, r5.w, s#0, t#0
+bary.f r1.w, 7, r1.x
+mad.f32 r1.z, r2.y, r7.y, r1.z
+cov.u32f32 r2.y, r2.z
(ss)nop
-sam (f32)(w)r5.z, r4.w, s#1, t#1
-(sy)cmps.f.lt r2.w, r6.y, c11.x
-mov.f32f32 r3.x, r6.y
-mov.f32f32 r5.z, r2.y
-mul.f r0.w, c10.w, r0.w
-cov.u32f32 r2.y, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.z, r1.z
-mul.f r1.z, r2.x, c9.x
-mov.f32f32 r2.x, c8.z
-(ss)nop
-sam (f32)(xyzw)r4.w, r5.y, s#0, t#0
-(sy)mul.f r4.y, r5.y, r0.x
-mul.f r5.x, r5.x, r0.x
-mov.f32f32 r2.w, r1.z
-mul.f r0.x, r4.w, r0.x
-mov.f32f32 r0.w, r0.w
-cmps.f.ne r2.y, r2.y, c8.y
-mov.f32f32 r3.w, r2.w
-mov.f32f32 r4.w, c8.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.w, r2.x
-bary.f r2.x, 8, r1.x
-(ss)bary.f r5.y, 7, r1.x
+sam (f32)(xyzw)r5.z, r6.y, s#0, t#0
+(sy)mad.f32 r2.z, r6.x, r3.z, r5.y
+mad.f32 r3.x, r5.w, r3.z, r4.x
+mad.f32 r2.z, r8.x, r4.w, r2.z
+mad.f32 r3.x, r7.w, r4.w, r3.x
+mad.f32 r0.x, r5.z, r0.w, r0.x
+mul.f r0.w, c10.w, r1.z
+mov.f32f32 r1.z, r2.z
+mov.f32f32 r3.y, r3.x
+mad.f32 r0.x, r7.z, r0.z, r0.x
+mov.f32f32 r0.z, r0.w
+mul.f r1.z, r1.z, r5.x
+mul.f r1.w, r3.y, r1.w
+mov.f32f32 r3.y, r0.x
bary.f (ei)r1.x, 6, r1.x
-sam (f32)(xyzw)r5.z, r3.z, s#0, t#0
-(sy)mad.f32 r1.y, r6.x, r0.y, r4.y
-(ss)mov.f32f32 r3.z, r1.z
-mad.f32 r1.z, r5.w, r0.y, r5.x
-mad.f32 r0.x, r5.z, r0.y, r0.x
-sel.b32 r0.y, r4.w, r2.y, r3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r5.y
-mov.f32f32 r1.x, r1.x
-sam (f32)(xyzw)r3.x, r3.y, s#0, t#0
-(sy)mad.f32 r1.y, r3.z, r1.w, r1.y
-mad.f32 r1.z, r3.y, r1.w, r1.z
-mad.f32 r0.x, r3.x, r1.w, r0.x
-nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r1.w, r1.y, r2.x
-mul.f r2.x, r1.z, r2.y
-mul.f r1.x, r0.x, r1.x
-nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.x, r1.x
-nop
-mul.f r1.w, r1.w, r0.w
-mul.f r2.x, r2.x, r0.w
+mul.f r1.y, r1.z, r0.z
+mul.f r0.z, r1.w, r0.z
+mad.f32 r1.y, c4.z, r2.z, r1.y
+mad.f32 r0.z, c4.y, r3.x, r0.z
+mul.f r1.x, r3.y, r1.x
+cmps.f.ne r1.z, r2.y, c8.y
+mul.f r1.y, r0.y, r1.y
+mul.f r0.z, r0.y, r0.z
mul.f r0.w, r1.x, r0.w
+mov.f32f32 r1.x, c8.y
+add.f r1.y, r1.y, r2.x
+add.f r0.z, r0.z, r3.w
nop
-mov.f32f32 r1.x, r1.w
-mov.f32f32 r1.w, r2.x
-mad.f32 r1.x, c4.z, r1.y, r1.x
-mad.f32 r1.y, c4.y, r1.z, r1.w
-mov.f32f32 r0.w, r0.w
-nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
+sel.b32 r1.x, r1.x, r1.z, r8.w
mad.f32 r0.x, c4.x, r0.x, r0.w
-nop
-mul.f r0.w, r0.z, r1.x
-mul.f r1.x, r0.z, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-add.f r0.w, r0.w, r2.z
-add.f r1.x, r1.x, r4.x
-mul.f r0.x, r0.z, r0.x
-nop
-mul.f r0.z, r0.w, r0.y
-mul.f r0.w, r1.x, r0.y
-add.f r0.x, r0.x, r4.z
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
+(rpt1)nop
+mul.f r2.z, r1.y, r1.x
+mul.f r2.y, r0.z, r1.x
+mul.f r0.x, r0.y, r0.x
+(rpt2)nop
+add.f r0.x, r0.x, r4.y
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, r1.x
end
-nop
-nop
-nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r2.w (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1)
-; FRAG: 278 instructions, 0 half, 7 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.x (5:10,cm=f,il=12,b=1) r2.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1)
+; FRAG: 177 instructions, 0 half, 9 full
diff --git a/reference/0ad-alpine-valley/0ad-51.asm b/reference/0ad-alpine-valley/0ad-51.asm
index 890976a..4dd0acb 100644
--- a/reference/0ad-alpine-valley/0ad-51.asm
+++ b/reference/0ad-alpine-valley/0ad-51.asm
@@ -1,14 +1,14 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r5.x) in4
-@in(r5.y) in5
-@in(r5.z) in6
-@in(r8.x) in8
-@in(r8.y) in9
-@in(r8.z) in10
+@in(r6.z) in0
+@in(r6.w) in1
+@in(r7.x) in2
+@in(r5.w) in4
+@in(r6.x) in5
+@in(r6.y) in6
+@in(r2.w) in8
+@in(r3.x) in9
+@in(r3.y) in10
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -41,163 +41,105 @@
@out(r7.y) out29
@out(r7.z) out30
@out(r7.w) out31
-(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
-mul.f r0.y, r5.x, r5.x
-mul.f r0.z, c8.w, r2.w
-mul.f r0.w, c8.z, r2.w
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r6.z
+mul.f r0.y, r5.w, r5.w
+mul.f r0.z, c8.y, r6.z
+mul.f r0.w, c8.x, r6.z
mul.f r1.x, r0.x, r0.x
-add.f r1.y, c4.y, (neg)r3.x
+add.f r1.z, c4.y, (neg)r6.w
add.f r0.y, c13.x, (neg)r0.y
-mad.f32 r0.z, c9.w, r3.x, r0.z
-mad.f32 r0.w, c9.z, r3.x, r0.w
-mad.f32 r1.x, r1.y, r1.y, r1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.w, r3.y, r0.z
-mad.f32 r0.w, c10.z, r3.y, r0.w
-mov.f32f32 r1.x, r1.x
-add.f r2.x, c4.z, (neg)r3.y
-mul.f r2.y, r0.y, r0.y
-mul.f r1.z, r5.y, r5.x
-add.f r0.z, r0.z, c11.w
-mad.f32 r1.x, r2.x, r2.x, r1.x
-add.f r0.w, r0.w, c11.z
-mul.f r2.z, c8.y, r2.w
-mul.f r3.z, c8.x, r2.w
-add.f r1.z, c13.y, (neg)r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r0.z, c9.y, r6.w, r0.z
+mad.f32 r0.w, c9.x, r6.w, r0.w
+mad.f32 r1.x, r1.z, r1.z, r1.x
+add.f r1.w, c4.z, (neg)r7.x
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.x, r0.z
+mad.f32 r0.w, c10.x, r7.x, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.x, r5.w
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.y, r5.w
+mul.f r2.z, c8.w, r6.z
rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
+(ss)mov.f32f32 r3.z, r1.x
+add.f r3.w, c13.y, (neg)r1.y
mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
-mad.f32 r0.z, r3.w, r3.w, r2.y
-mad.f32 r0.w, r1.y, r1.x, (neg)c5.y
-mad.f32 r1.x, r2.x, r1.x, (neg)c5.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mul.f r1.y, r5.z, r5.x
-mov.f32f32 r0.w, r0.w
-mul.f r2.x, r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r1.y, c13.y, (neg)r1.y
-mad.f32 r2.x, r0.w, r0.w, r2.x
-mad.f32 r2.y, c9.y, r3.x, r2.z
-mad.f32 r2.z, c9.x, r3.x, r3.z
-mad.f32 r2.y, c10.y, r3.y, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r1.y
-mad.f32 r1.y, r1.x, r1.x, r2.x
-add.f r2.x, r2.y, c11.y
-mad.f32 r2.y, c10.x, r3.y, r2.z
-mul.f r5.w, c0.w, r2.w
-mul.f r6.x, c0.z, r2.w
-mul.f r6.y, c0.y, r2.w
-mul.f r6.z, c0.x, r2.w
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mad.f32 r0.z, r3.z, r3.z, r0.z
-mul.f r2.x, r2.x, c12.y
-add.f r2.y, r2.y, c11.x
-mul.f r1.x, r1.x, r1.y
-mul.f r0.w, r0.w, r1.y
-mul.f r0.x, r0.x, r1.y
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r3.z, (neg)c5.y
+mov.f32f32 r1.z, r3.w
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r3.z, (neg)c5.z
+mov.f32f32 r3.z, r0.z
+mad.f32 r2.x, r3.w, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r3.z, r1.x
+mov.f32f32 r3.w, r1.w
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r6.w, r2.z
+mad.f32 r0.z, r1.w, r3.w, r0.z
+mad.f32 r1.w, c10.w, r7.x, r2.z
+mul.f r2.z, c8.z, r6.z
+mul.f r4.x, c0.w, r6.z
+mul.f r5.x, c0.z, r6.z
+mul.f r5.y, c0.y, r6.z
+mul.f r5.z, c0.x, r6.z
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
+(ss)mov.f32f32 r4.z, r0.z
+mul.f r4.y, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r4.w, r3.w, r4.z
+mul.f r4.z, r3.z, r4.z
+(ss)mad.f32 r0.z, c9.z, r6.w, r2.z
+mad.f32 r2.y, c1.w, r6.w, r4.x
+mad.f32 r5.x, c1.z, r6.w, r5.x
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.z, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.x, r0.z
+mad.f32 r0.y, c2.w, r7.x, r2.y
+mul.f r2.z, r0.w, r3.z
+mul.f r2.y, r1.z, r3.z
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r4.x, r2.y
+mul.f r7.y, r6.x, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r5.w, r0.x
+mul.f r3.z, r6.y, r4.x
+mad.f32 r3.w, r6.y, r0.z, (neg)r0.y
+mad.f32 r3.z, r6.x, r0.x, (neg)r3.z
+mad.f32 r4.x, r5.w, r4.x, (neg)r7.y
+mad.f32 r0.x, c2.z, r7.x, r5.x
+mad.f32 r0.y, c1.y, r6.w, r5.y
+mad.f32 r5.x, c1.x, r6.w, r5.z
+mad.f32 r0.y, c2.y, r7.x, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.x, r5.x
nop
-mov.f32f32 r4.w, r1.x
-mov.f32f32 r4.z, r0.w
-mov.f32f32 r4.y, r0.x
-mul.f r0.x, r0.y, r0.z
-mul.f r0.y, r3.z, r0.z
-mul.f r0.z, r3.w, r0.z
-mul.f r0.w, r2.y, c12.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r0.w
-mul.f r0.w, r5.y, r0.x
-mul.f r2.x, r5.x, r0.y
-mad.f32 r0.w, r5.x, r0.z, (neg)r0.w
-mad.f32 r2.x, r5.z, r0.x, (neg)r2.x
-mul.f r2.y, r5.z, r0.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, r5.y, r0.y, (neg)r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.w, r2.x
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r3.z, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r3.x, r5.w
-mad.f32 r0.z, c1.z, r3.x, r6.x
-mad.f32 r0.w, c1.y, r3.x, r6.y
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r3.y, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c2.y, r3.y, r0.w
-mad.f32 r0.w, c1.x, r3.x, r6.z
-add.f r0.x, r0.x, c3.w
-add.f r0.y, r0.y, c3.z
-add.f r5.w, r0.z, c3.y
-mad.f32 r6.x, c2.x, r3.y, r0.w
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mov.f32f32 r0.y, r5.w
-add.f r0.x, r6.x, c3.x
-mov.f32f32 r5.w, (0.000000)
-mov.f32f32 r6.x, (0.000000)
-mov.f32f32 r6.y, (0.000000)
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r7.w, r5.w
-mov.f32f32 r7.z, r6.x
-mov.f32f32 r7.y, r6.y
-mov.f32f32 r5.w, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r6.x, r2.w
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r7.x, r5.w
-mov.f32f32 r6.w, r3.x
-mov.f32f32 r6.z, r6.x
-mov.f32f32 r6.y, r5.z
-mov.f32f32 r3.x, r5.y
-mov.f32f32 r5.x, r5.x
-mul.f r5.y, r8.z, c6.z
-mul.f r8.y, r8.y, c6.y
-mov.f32f32 r6.x, r3.x
-mov.f32f32 r5.w, r5.x
-mov.f32f32 r5.z, r5.y
-mov.f32f32 r5.y, r8.y
-mul.f r3.x, r8.x, c6.x
-mad.f32 r3.y, c7.x, r3.y, c7.y
-mad.f32 r2.w, c7.x, r2.w, c7.y
-mov.f32f32 r8.x, c13.z
-mov.f32f32 r5.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r2.w, r8.x
+add.f r0.y, r0.y, c3.y
+mov.f32f32 r7.w, (0.000000)
+add.f r0.x, r0.x, c3.x
+mov.f32f32 r7.z, (0.000000)
+mov.f32f32 r7.y, (0.000000)
+mul.f r5.z, r3.y, c6.z
+mul.f r5.y, r3.x, c6.y
+mul.f r5.x, r2.w, c6.x
+mad.f32 r3.y, c7.x, r7.x, c7.y
+mad.f32 r3.x, c7.x, r6.z, c7.y
+mov.f32f32 r2.w, c13.z
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
-; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r5.x (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0)
-; VERT: 153 instructions, 0 half, 9 full
+; VERT: inputs: r6.z (0:0,cm=7,il=8,b=0) r5.w (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0)
+; VERT: 93 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-52.asm b/reference/0ad-alpine-valley/0ad-52.asm
index 538f644..dc38031 100644
--- a/reference/0ad-alpine-valley/0ad-52.asm
+++ b/reference/0ad-alpine-valley/0ad-52.asm
@@ -8,539 +8,364 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097
+@const(c14.x) 0x3cff9724, 0x40000000, 0xbf800000, 0xba03126f
+@const(c15.x) 0xbf000000, 0x3f800000, 0x3fb8aa65, 0x3de38866
+@const(c16.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 19, r1.x
bary.f r0.y, 0, r1.x
add.f r0.w, r0.w, c13.y
bary.f r1.z, 1, r1.x
-mov.f32f32 r0.x, r0.x
-add.f r1.w, r0.y, c15.x
-bary.f r2.x, 23, r1.x
-bary.f r2.y, 24, r1.x
-mul.f r2.z, r0.x, r0.x
+mov.f32f32 r1.w, r0.x
+add.f r2.x, r0.y, c15.x
+bary.f r2.y, 8, r1.x
+bary.f r2.z, 23, r1.x
+mul.f r0.x, r0.x, r1.w
bary.f r2.w, 20, r1.x
-floor.f r3.x, r1.w
+floor.f r3.x, r2.x
rcp r0.w, r0.w
add.f r0.z, r0.z, c13.y
add.f r3.y, r1.z, c15.x
-mov.f32f32 r2.w, r2.w
-add.f r1.w, r1.w, (neg)r3.x
+mov.f32f32 r3.z, r2.w
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.z, r0.z, r0.w
-(ss)floor.f r0.w, r3.y
-mad.f32 r2.z, r2.w, r2.w, r2.z
-mov.f32f32 r1.w, r1.w
+(ss)absneg.f r0.w, (neg)c10.x
+mad.f32 r0.x, r2.w, r3.z, r0.x
+bary.f r2.w, 21, r1.x
+mov.f32f32 r3.x, r2.x
+mul.f r0.w, r0.w, c10.x
+floor.f r3.w, r3.y
+mov.f32f32 r4.x, r2.w
+mul.f r4.y, c13.x, r3.x
+mul.f r0.w, r0.w, r0.z
mov.f32f32 r0.z, r0.z
-absneg.f r3.x, (neg)c10.x
-mov.f32f32 r2.z, r2.z
-bary.f r3.z, 21, r1.x
-mul.f r3.w, c13.x, r1.w
-mul.f r3.x, r3.x, c10.x
-add.f r0.w, r3.y, (neg)r0.w
-mov.f32f32 r3.y, r3.z
-mov.f32f32 r3.z, r3.w
-mul.f r3.x, r3.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.z, r3.y, r3.y, r2.z
-add.f r0.y, r0.y, (neg)r3.z
-mov.f32f32 r3.x, r3.x
-mul.f r3.z, c13.x, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.y, r0.y
-rsq r3.w, r2.z
-(ss)mul.f r4.x, r0.x, r3.w
-rsq r4.y, r2.z
-(ss)mov.f32f32 r4.y, r4.y
-(ss)rsq r2.z, r2.z
-(ss)mul.f r4.z, r0.x, r2.z
-add.f r4.w, c15.x, r0.y
-mov.f32f32 r4.x, r4.x
-mul.f r5.x, r0.x, r4.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.w, r4.w
-absneg.f r4.x, (abs)r4.x
-mov.f32f32 r5.x, r5.x
+mad.f32 r0.x, r4.x, r4.x, r0.x
+add.f r0.y, r0.y, (neg)r4.y
+add.f r3.y, r3.y, (neg)r3.w
+mul.f r0.z, r0.w, r0.z
+add.f r0.w, c14.y, (neg)r3.x
+mov.f32f32 r3.x, r0.y
+mov.f32f32 r3.w, r3.y
+rsq r4.y, r0.x
+(ss)mul.f r4.z, r1.w, r4.y
+rsq r4.w, r0.x
+(ss)mov.f32f32 r5.x, r4.w
+(ss)rsq r0.x, r0.x
+(ss)mul.f r5.y, r1.w, r0.x
+add.f r3.x, c15.x, r3.x
absneg.f r4.z, (abs)r4.z
-mul.f r4.w, r4.w, c5.z
-mov.f32f32 r4.x, r4.x
-absneg.f r5.x, (abs)r5.x
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.y, r4.w
-add.f r4.x, r4.x, c13.w
-mov.f32f32 r5.x, r5.x
+mul.f r5.z, r1.w, r5.x
+absneg.f r5.y, (abs)r5.y
+mul.f r5.w, r3.x, c5.z
+add.f r3.x, r4.z, c13.w
+absneg.f r4.z, (abs)r5.z
+add.f r5.y, r5.y, c13.w
+mov.f32f32 r6.x, r5.w
+max.f r3.x, r3.x, c13.y
+mul.f r5.z, r3.z, r4.y
add.f r4.z, r4.z, c13.w
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r4.x, r4.x
-add.f r5.x, r5.x, c13.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r3.z, r3.z
-max.f r4.x, r4.x, c13.y
-mov.f32f32 r5.x, r5.x
-max.f r4.z, r4.z, c13.y
-add.f r1.z, r1.z, (neg)r3.z
-mov.f32f32 r3.z, r4.x
-mul.f r4.x, r2.w, r3.w
-max.f r5.x, r5.x, c13.y
-mov.f32f32 r4.z, r4.z
-mul.f r5.z, r2.w, r2.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.x, r5.x
-mul.f r5.w, r2.w, r4.y
-mov.f32f32 r5.z, r5.z
-absneg.f r4.x, (abs)r4.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.w, r5.w
+max.f r5.y, r5.y, c13.y
+mul.f r6.y, r3.z, r0.x
absneg.f r5.z, (abs)r5.z
-mov.f32f32 r4.x, r4.x
-add.f r6.x, c15.x, r1.z
-absneg.f r5.w, (abs)r5.w
-mov.f32f32 r5.z, r5.z
-add.f r4.x, r4.x, c13.w
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.w, r5.w
+max.f r4.z, r4.z, c13.y
+mul.f r5.x, r3.z, r5.x
+absneg.f r6.y, (abs)r6.y
add.f r5.z, r5.z, c13.w
-mov.f32f32 r4.x, r4.x
-mul.f r6.x, r6.x, c5.w
-add.f r5.w, r5.w, c13.w
-mov.f32f32 r5.z, r5.z
-max.f r4.x, r4.x, c13.y
-mov.f32f32 r6.y, r6.x
-mov.f32f32 r5.w, r5.w
-max.f r6.z, r5.z, c13.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.z, r6.y
-max.f r5.w, r5.w, c13.y
-mov.f32f32 r6.y, r6.z
-add.f r6.z, r3.z, r4.x
-mul.f r3.w, r3.y, r3.w
-mov.f32f32 r6.w, r5.w
-add.f r7.x, r4.z, r6.y
-(ss)mul.f r2.z, r3.y, r2.z
-mov.f32f32 r3.w, r3.w
-add.f r7.y, r5.x, r6.w
-mul.f r4.y, r3.y, r4.y
-mov.f32f32 r2.z, r2.z
-absneg.f r3.w, (abs)r3.w
-bary.f r5.w, 2, r1.x
-mov.f32f32 r4.y, r4.y
-absneg.f r2.z, (abs)r2.z
-mov.f32f32 r3.w, r3.w
-add.f r7.z, r5.w, c14.w
+mul.f r6.z, c13.x, r3.w
+mul.f r0.z, r0.z, c15.z
+absneg.f r5.x, (abs)r5.x
+max.f r5.z, r5.z, c13.y
+add.f r6.y, r6.y, c13.w
+add.f r1.z, r1.z, (neg)r6.z
+add.f r5.x, r5.x, c13.w
+add.f r6.z, r3.x, r5.z
+mul.f r4.y, r4.x, r4.y
+max.f r6.w, r6.y, c13.y
+max.f r5.x, r5.x, c13.y
+mov.f32f32 r6.y, r1.z
+absneg.f r4.y, (abs)r4.y
+add.f r7.x, r5.y, r6.w
+add.f r7.y, r4.z, r5.x
+(ss)mul.f r0.x, r2.w, r0.x
+add.f r2.w, r4.y, c13.w
+mul.f r4.y, r4.x, r4.w
+add.f r4.w, c15.x, r6.y
+absneg.f r0.x, (abs)r0.x
+max.f r2.w, r2.w, c13.y
absneg.f r4.y, (abs)r4.y
-mov.f32f32 r2.z, r2.z
-add.f r3.w, r3.w, c13.w
-mov.f32f32 r5.w, r7.z
-mov.f32f32 r4.y, r4.y
-add.f r2.z, r2.z, c13.w
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r5.w, r5.w
+mul.f r7.w, r4.w, c5.w
+add.f r0.x, r0.x, c13.w
+mov.f32f32 r4.w, r2.w
add.f r4.y, r4.y, c13.w
-mov.f32f32 r2.z, r2.z
-max.f r3.w, r3.w, c13.y
-mul.f r0.z, r3.x, r0.z
-mov.f32f32 r3.x, r4.y
-max.f r2.z, r2.z, c13.y
-mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r7.w, r5.y, s#4, t#4
-(sy)mov.f32f32 r4.y, r7.w
-max.f r3.x, r3.x, c13.y
-mov.f32f32 r2.z, r2.z
-(ss)add.f r5.y, r6.z, r3.w
-mov.f32f32 r4.y, r4.y
+mov.f32f32 r6.y, r7.w
+max.f r0.x, r0.x, c13.y
+add.f r4.w, r6.z, r4.w
+max.f r4.y, r4.y, c13.y
+bary.f r6.z, 2, r1.x
+mov.f32f32 r7.z, r0.x
+mov.f32f32 r8.x, r4.w
+mov.f32f32 r8.y, r4.y
+add.f r9.x, r6.z, c14.w
+add.f r7.x, r7.x, r7.z
+exp2 r0.z, r0.z
+(ss)mov.f32f32 r7.z, r0.z
+add.f r7.y, r7.y, r8.y
+mov.f32f32 r6.z, r9.x
+rcp r8.y, r8.x
mov.f32f32 r3.x, r3.x
-add.f r5.z, r7.x, r2.z
-mov.f32f32 r5.y, r5.y
-add.f r5.w, c14.y, (neg)r1.w
-add.f r6.z, r7.y, r3.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.w, r5.w
-add.f r7.x, c14.y, (neg)r0.w
-rcp r7.y, r5.y
-(ss)mov.f32f32 r7.y, r7.y
-mov.f32f32 r6.z, r6.z
-mul.f r0.z, r0.z, c15.z
-rcp r7.w, r5.y
-nop
-rcp r8.x, r5.z
-mul.f r3.z, r3.z, r7.y
-(ss)mov.f32f32 r7.y, r8.x
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r3.z
-mul.f r2.x, r2.x, c14.x
-rcp r8.x, r6.z
-(ss)mov.f32f32 r8.x, r8.x
-mul.f r4.z, r4.z, r7.y
-mul.f r7.y, r5.w, r7.x
-mov.f32f32 r2.x, r2.x
-mul.f r5.x, r5.x, r8.x
+mov.f32f32 r8.z, r7.x
+mov.f32f32 r8.w, r7.y
+add.f r7.z, c17.y, (neg)r7.z
+(ss)mul.f r3.x, r3.x, r8.y
+mul.f r9.y, r2.z, c14.x
+sam.s (f32)(x)r9.w, r6.x, s#4, t#4
+rcp r2.z, r7.y
+(ss)mov.f32f32 r6.x, r0.w
+mul.f r6.y, r7.z, c10.y
+mov.f32f32 r6.z, r3.x
+mov.f32f32 r7.y, r9.y
+rcp r7.z, r8.w
mov.f32f32 r4.z, r4.z
-mul.f r4.y, r7.y, r4.y
-mov.f32f32 r7.y, r2.x
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r8.x, r2.x
-mov.f32f32 r8.y, r2.x
-mov.f32f32 r8.z, r7.y
-mul.f r2.y, r2.y, c14.x
-mov.f32f32 r7.y, r8.x
-mov.f32f32 r8.x, r8.y
-add.f r0.y, c15.y, r0.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r9.x, r7.y
-rcp r7.y, r6.z
-nop
-(ss)rcp r6.z, r6.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r8.y, r2.y
-mov.f32f32 r9.y, r2.y
-mov.f32f32 r9.z, r2.y
-mul.f r0.y, r0.y, c5.z
-mov.f32f32 r8.w, r8.y
-mov.f32f32 r9.y, r9.y
-mov.f32f32 r8.y, r9.z
-mov.f32f32 r9.z, r0.y
-(ss)mov.f32f32 r7.y, r7.y
-(ss)mov.f32f32 r6.z, r6.z
-mov.f32f32 r9.y, r9.y
-sam (f32)(xyzw)r9.w, r8.z, s#2, t#2
-(sy)(ss)mov.f32f32 r8.z, r9.w
-add.f r8.w, c13.z, (neg)r10.x
-mov.f32f32 r9.w, r10.y
-sam (f32)(xyzw)r10.x, r8.x, s#0, t#0
-(sy)(ss)mul.f r8.x, r10.z, r4.z
-mul.f r8.y, r8.z, r3.z
-mov.f32f32 r7.w, r7.w
-sam (f32)(xyzw)r10.z, r9.x, s#3, t#3
-(sy)mul.f r8.z, r10.w, r5.x
-(ss)mul.f r9.x, r10.z, r5.x
-mul.f r5.x, r11.x, r5.x
-mul.f r4.x, r4.x, r7.w
-mul.f r6.w, r6.w, r7.y
-mov.f32f32 r7.y, r8.w
-mul.f r7.w, r9.w, r3.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r8.w, r2.y
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r9.y, r2.y
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r10.z, r8.w
-bary.f r8.w, 22, r1.x
-mov.f32f32 r11.x, r9.y
-mul.f r3.z, r7.y, r3.z
-rcp r7.y, r5.z
-(ss)mov.f32f32 r7.y, r7.y
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r9.y, r9.z
-mul.f r10.y, r10.y, r4.z
-mul.f r4.z, r10.x, r4.z
-mul.f r8.w, r8.w, c14.x
-mul.f r6.y, r6.y, r7.y
-mov.f32f32 r6.x, r6.x
-mul.f r3.x, r3.x, r6.z
-mov.f32f32 r6.z, r8.w
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r9.z, r6.x
-mov.f32f32 r6.x, r6.z
-mov.f32f32 r7.y, r6.z
-mov.f32f32 r11.z, r2.y
-mov.f32f32 r2.y, r6.z
-mov.f32f32 r10.w, r6.x
-mov.f32f32 r11.y, r7.y
-mov.f32f32 r6.x, r7.z
-mov.f32f32 r11.w, r2.y
-mov.f32f32 r2.y, r3.x
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r9.w, r6.x
-sam (f32)(xyzw)r12.x, r10.z, s#2, t#2
-(sy)mov.f32f32 r3.x, r12.y
-(ss)nop
-sam (f32)(xyzw)r10.z, r11.x, s#3, t#3
-(sy)mad.f32 r6.x, r10.w, r6.w, r8.z
-mad.f32 r7.y, r10.z, r6.w, r9.x
-mad.f32 r5.x, r11.x, r6.w, r5.x
-mad.f32 r3.x, r3.x, r4.x, r8.y
-rcp r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.w, r2.x
-add.f r8.y, c13.z, (neg)r12.x
-mov.f32f32 r8.z, r12.z
-mul.f r3.w, r3.w, r5.y
-mov.f32f32 r8.w, r6.w
-mov.f32f32 r5.y, r6.z
-mov.f32f32 r6.w, r8.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r8.y, r2.x
-mov.f32f32 r9.x, r5.y
-mov.f32f32 r5.y, r6.w
-mad.f32 r6.w, r8.z, r4.x, r7.w
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r7.w, r6.z
-mad.f32 r3.z, r5.y, r4.x, r3.z
-sam (f32)(xyzw)r10.z, r11.z, s#0, t#0
-(sy)mad.f32 r4.x, r10.w, r6.y, r10.y
-(ss)nop
-sam (f32)(xyzw)r11.y, r8.w, s#3, t#3
-(sy)mad.f32 r5.x, r11.w, r2.y, r5.x
-mov.f32f32 r8.z, r7.w
-mad.f32 r5.y, r11.z, r2.y, r6.x
-mad.f32 r2.y, r11.y, r2.y, r7.y
-mov.f32f32 r5.x, r5.x
-mad.f32 r6.x, r11.x, r6.y, r8.x
-rcp r5.z, r5.z
+rcp r7.w, r8.z
mov.f32f32 r5.y, r5.y
-mov.f32f32 r2.y, r2.y
-sam (f32)(xyzw)r7.w, r8.y, s#2, t#2
-(sy)mov.f32f32 r7.y, r8.x
-mul.f r5.x, c7.z, r5.x
-mul.f r5.y, c7.y, r5.y
-mul.f r2.y, c7.x, r2.y
-mad.f32 r3.x, r7.y, r3.w, r3.x
-add.f r7.y, c13.z, (neg)r7.w
-mov.f32f32 r7.w, r8.y
-(ss)mov.f32f32 r5.z, r5.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r7.y, r7.y
-mad.f32 r6.w, r7.w, r3.w, r6.w
-mul.f r2.z, r2.z, r5.z
-mad.f32 r3.x, c14.y, r3.x, c14.z
-mov.f32f32 r5.z, r7.y
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.x, r3.x
-bary.f r7.y, 4, r1.x
-bary.f r7.w, 5, r1.x
-bary.f r8.x, 6, r1.x
-mad.f32 r3.z, r5.z, r3.w, r3.z
-mul.f r3.w, r7.y, r3.x
-mul.f r5.z, r7.w, r3.x
-mul.f r3.x, r8.x, r3.x
-mov.f32f32 r3.z, r3.z
-mad.f32 r6.w, c14.y, r6.w, c14.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r4.z, r10.z, r6.y, r4.z
-mad.f32 r3.z, c14.y, r3.z, c14.z
-mov.f32f32 r6.y, r6.w
-mov.f32f32 r7.w, r2.x
-mov.f32f32 r2.x, r6.z
-mov.f32f32 r3.z, r3.z
-bary.f r6.z, 10, r1.x
-bary.f r6.w, 11, r1.x
-bary.f r7.y, 12, r1.x
-mov.f32f32 r8.x, r2.x
-mov.f32f32 r2.x, r6.z
-bary.f r6.z, 7, r1.x
+rcp r8.y, r8.w
+mov.f32f32 r5.x, r5.x
+mov.f32f32 r10.x, r7.y
+(ss)bary.f r8.w, 24, r1.x
+(ss)mul.f r4.z, r4.z, r7.z
+mul.f r5.y, r5.y, r7.w
+mov.f32f32 r10.z, r7.y
+mul.f r11.x, r8.w, c14.x
+mov.f32f32 r7.w, r4.z
+mov.f32f32 r8.w, r5.y
+mul.f r5.x, r5.x, r8.y
+mov.f32f32 r7.z, r11.x
+add.f r3.w, c14.y, (neg)r3.w
+mul.f r0.z, r0.z, c13.z
+mul.f r2.z, r4.y, r2.z
+mov.f32f32 r10.y, r7.z
+mov.f32f32 r10.w, r7.z
+mov.f32f32 r4.y, r5.x
+sam (f32)(xyzw)r11.y, r7.y, s#0, t#0
+(sy)mul.f r8.y, r11.w, r8.w
+rcp r8.z, r8.z
mov.f32f32 r6.w, r6.w
-mov.f32f32 r7.y, r7.y
-sam.s (f32)(x)r8.y, r9.y, s#4, t#4
-(sy)mov.f32f32 r8.y, r8.y
-mul.f r2.x, r2.x, (neg)r6.z
-mul.f r6.w, r6.w, (neg)r6.z
-mul.f r6.z, r7.y, (neg)r6.z
+mul.f r8.w, r11.z, r8.w
+mul.f r5.y, r11.y, r5.y
+sam (f32)(xyzw)r11.y, r10.x, s#2, t#2
+(sy)(ss)mul.f r10.x, r11.y, r6.z
+rcp r8.x, r8.x
+mov.f32f32 r5.z, r5.z
+sam (f32)(xyzw)r12.x, r10.z, s#3, t#3
+(sy)mul.f r10.y, r12.y, r7.w
+add.f r9.z, c13.z, (neg)r11.z
+(ss)mul.f r10.z, r12.z, r7.w
+(ss)mul.f r5.z, r5.z, r8.x
+mov.f32f32 r12.y, r7.z
+bary.f r7.w, 22, r1.x
+mul.f r6.z, r9.z, r6.z
+mov.f32f32 r8.x, r5.z
+mul.f r6.w, r6.w, r8.z
+mul.f r9.z, r7.w, c14.x
+mul.f r4.z, r12.x, r4.z
+mul.f r3.x, r11.w, r3.x
+mov.f32f32 r8.z, r6.w
+mov.f32f32 r11.y, r9.z
+mov.f32f32 r10.w, r3.w
+add.f r0.z, r0.z, r6.y
+mov.f32f32 r6.y, r2.z
+mov.f32f32 r7.w, r11.y
+mov.f32f32 r12.z, r11.y
+mul.f r6.x, r6.x, r10.w
+sam (f32)(xyzw)r12.w, r11.x, s#0, t#0
+(sy)mad.f32 r8.y, r13.y, r8.z, r8.y
+rcp r7.x, r7.x
+(ss)mul.f r0.x, r0.x, r7.x
+(ss)mad.f32 r7.x, r13.x, r8.z, r8.w
+mad.f32 r5.y, r12.w, r6.w, r5.y
+sam (f32)(xyzw)r12.w, r7.z, s#2, t#2
+(sy)mad.f32 r6.w, r13.x, r8.x, r10.x
+rcp r4.w, r4.w
+(ss)mul.f r2.w, r2.w, r4.w
+sam (f32)(xyzw)r11.z, r12.y, s#3, t#3
+(sy)(ss)mad.f32 r4.w, r11.w, r4.y, r10.y
+add.f r7.z, c13.z, (neg)r12.w
+mad.f32 r4.y, r12.x, r4.y, r10.z
+mov.f32f32 r8.z, r2.w
+mov.f32f32 r10.x, r7.y
+mov.f32f32 r10.y, r11.y
+mov.f32f32 r10.z, r7.y
+mov.f32f32 r10.w, r11.y
+mad.f32 r6.z, r7.z, r8.x, r6.z
+mov.f32f32 r7.y, r0.x
+sam (f32)(xyzw)r11.w, r9.y, s#0, t#0
+mad.f32 r4.z, r11.z, r5.x, r4.z
+mad.f32 r3.x, r13.y, r5.z, r3.x
+sam (f32)(xyzw)r12.z, r10.x, s#2, t#2
+(sy)mad.f32 r5.x, r12.w, r8.z, r6.w
+mad.f32 r5.z, r12.y, r7.y, r8.y
(ss)nop
-sam (f32)(xyzw)r8.z, r7.w, s#0, t#0
-(sy)mad.f32 r6.x, r9.x, r2.z, r6.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r6.x, r6.x
-mad.f32 r2.x, r2.x, r3.z, r3.w
-mad.f32 r3.w, r6.w, r3.z, r5.z
-mad.f32 r3.x, r6.z, r3.z, r3.x
-mad.f32 r3.z, r8.w, r2.z, r4.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.x, r0.x, r6.y, r2.x
-mad.f32 r2.x, r2.w, r6.y, r3.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.w, r3.y, r6.y, r2.w
-nop
-mul.f r3.y, r0.x, r0.x
-mad.f32 r2.z, r8.z, r2.z, r4.z
-mad.f32 r3.y, r2.x, r2.x, r3.y
-mov.f32f32 r2.w, r2.w
-add.f r1.w, r1.w, c13.z
-add.f r3.z, c17.y, (neg)r0.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r3.y, r2.w, r2.w, r3.y
-mul.f r3.w, r1.w, r7.x
-mul.f r3.z, r3.z, c10.y
-mul.f r0.z, r0.z, c13.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.w
-add.f r1.z, c15.y, r1.z
-rsq r3.y, r3.y
-(ss)mov.f32f32 r3.y, r3.y
-mad.f32 r3.w, r3.w, r8.y, r4.y
-add.f r0.z, r0.z, r3.z
-mov.f32f32 r0.y, r0.y
-mul.f r0.x, r0.x, r3.y
-mul.f r2.x, r2.x, r3.y
-mul.f r2.w, r2.w, r3.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.y, r3.w
-mul.f r3.z, r0.x, r0.x
-mul.f r3.w, (neg)c8.x, r0.x
-mad.f32 r3.z, r2.x, r2.x, r3.z
-mad.f32 r3.w, (neg)c8.y, r2.x, r3.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mad.f32 r3.z, r2.w, r2.w, r3.z
-mad.f32 r3.w, (neg)c8.z, r2.w, r3.w
-mul.f r1.z, r1.z, c5.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r6.y, r0.y
-add.f r0.y, r0.w, c13.z
-bary.f r0.w, 8, r1.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, r1.z
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r3.z
-max.f r3.w, r3.w, c13.y
-mul.f r2.x, r2.x, r3.z
-mul.f r2.w, r2.w, r3.z
-mov.f32f32 r0.x, r0.x
-bary.f r3.z, 13, r1.x
-mov.f32f32 r3.w, r3.w
-bary.f r4.z, 17, r1.x
-bary.f r4.w, 16, r1.x
-mul.f r0.x, r0.x, r3.z
-mov.f32f32 r2.x, r2.x
-bary.f r3.z, 14, r1.x
-bary.f r5.z, 18, r1.x
-mad.f32 r6.z, c7.y, r3.w, (neg)r4.z
-mad.f32 r6.w, c7.x, r3.w, (neg)r4.w
-mad.f32 r0.x, r2.x, r3.z, r0.x
-mad.f32 r2.x, c7.z, r3.w, (neg)r5.z
-mov.f32f32 r3.z, r6.z
-mov.f32f32 r3.w, r6.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r2.w
-bary.f r6.z, 15, r1.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.z, c11.x, r3.z, r4.z
-mad.f32 r3.w, c11.x, r3.w, r4.w
-mad.f32 r0.x, r2.w, r6.z, r0.x
-mad.f32 r2.x, c11.x, r2.x, r5.z
-mov.f32f32 r2.w, r3.z
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.w, r7.z
-max.f r0.x, c13.y, r0.x
+sam (f32)(xyzw)r10.x, r10.z, s#3, t#3
+(sy)mad.f32 r4.y, r10.z, r6.y, r4.y
+mad.f32 r4.w, r10.y, r6.y, r4.w
+mad.f32 r5.x, c14.y, r5.x, c14.z
+bary.f r6.y, 6, r1.x
+mul.f r4.y, c7.z, r4.y
+mul.f r4.w, c7.y, r4.w
+mov.f32f32 r6.w, r5.x
+bary.f r7.z, 4, r1.x
+bary.f r8.x, 5, r1.x
+mul.f r5.x, r6.y, r5.x
+add.f r6.y, c13.z, (neg)r12.z
+mul.f r7.z, r7.z, r6.w
+mul.f r6.w, r8.x, r6.w
+mov.f32f32 r8.y, r5.z
+mad.f32 r6.y, r6.y, r8.z, r6.z
+mad.f32 r6.z, r12.x, r7.y, r7.x
+mad.f32 r2.z, r10.x, r2.z, r4.z
+mad.f32 r2.w, r13.x, r2.w, r3.x
+mad.f32 r3.x, c14.y, r6.y, c14.z
+bary.f r4.z, 12, r1.x
+bary.f r6.y, 7, r1.x
+mov.f32f32 r7.x, r6.z
+mov.f32f32 r7.y, r3.x
+bary.f r8.x, 10, r1.x
+bary.f r8.z, 11, r1.x
+mul.f r4.z, r4.z, (neg)r6.y
+mul.f r9.y, c7.x, r2.z
+mul.f r2.z, r8.x, (neg)r6.y
+mul.f r6.y, r8.z, (neg)r6.y
+mad.f32 r3.x, r4.z, r3.x, r5.x
+mad.f32 r2.w, c14.y, r2.w, c14.z
+mad.f32 r2.z, r2.z, r7.y, r7.z
+mad.f32 r4.z, r6.y, r7.y, r6.w
+mad.f32 r0.x, r11.w, r0.x, r5.y
+mov.f32f32 r5.x, r2.w
+mad.f32 r2.w, r4.x, r2.w, r3.x
+(rpt1)nop
+mad.f32 r1.w, r1.w, r5.x, r2.z
+mad.f32 r2.z, r3.z, r5.x, r4.z
+(rpt1)nop
+mov.f32f32 r3.x, r1.w
+mov.f32f32 r3.z, r2.z
+mov.f32f32 r4.x, r2.w
+mov.f32f32 r4.z, r0.x
+mul.f r1.w, r1.w, r3.x
+mul.f r5.x, r6.x, r9.w
+mad.f32 r1.w, r2.z, r3.z, r1.w
+add.f r0.y, c15.y, r0.y
+mad.f32 r1.w, r4.x, r4.x, r1.w
max.f r0.z, r0.z, c13.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r0.x, r0.x
+add.f r1.z, c15.y, r1.z
+mul.f r8.z, r0.y, c5.z
+add.f r0.y, r2.x, c13.z
+add.f r2.x, r3.y, c13.z
+bary.f r2.z, 9, r1.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r3.y, r1.w
+(ss)mul.f r1.w, r2.w, r1.w
+mov.f32f32 r7.z, r8.z
min.f r0.z, r0.z, c13.z
-mov.f32f32 r1.z, r1.z
-mul.f r3.w, r5.w, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.y, r1.w, r0.y
-sam.s (f32)(x)r4.x, r4.x, s#4, t#4
-(sy)mov.f32f32 r1.w, r4.x
-log2 r0.x, r0.x
-(ss)mul.f r0.x, c11.y, r0.x
-add.f r4.x, c17.y, (neg)r0.z
-add.f r4.y, c17.y, (neg)r0.z
-add.f r4.z, c17.y, (neg)r0.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.w, r3.w, r1.w, r3.y
-mul.f r3.y, r4.x, c9.z
-mul.f r3.w, r4.y, c9.y
-mul.f r4.x, r4.z, c9.x
-mov.f32f32 r1.w, r1.w
+mul.f r2.w, r3.x, r3.y
+mul.f r3.x, r3.z, r3.y
+mov.f32f32 r3.y, r1.w
+mov.f32f32 r8.x, r9.x
+mov.f32f32 r3.z, r2.w
+mul.f r2.w, (neg)c8.x, r2.w
+mov.f32f32 r4.x, r3.x
+mad.f32 r2.w, (neg)c8.y, r3.x, r2.w
+mul.f r3.x, r3.z, r3.z
+mad.f32 r1.w, (neg)c8.z, r1.w, r2.w
+mad.f32 r2.w, r4.x, r4.x, r3.x
+sam.s (f32)(x)r7.y, r7.z, s#4, t#4
+mul.f r3.x, r0.y, r3.w
+mad.f32 r2.w, r3.y, r3.y, r2.w
+max.f r1.w, r1.w, c13.y
+bary.f r3.w, 16, r1.x
+(sy)mad.f32 r3.x, r3.x, r7.y, r5.x
+add.f r5.x, c17.y, (neg)r0.z
+add.f r5.y, c17.y, (neg)r0.z
+add.f r6.x, c17.y, (neg)r0.z
+rsq r2.w, r2.w
+(ss)mov.f32f32 r6.y, r2.w
+mov.f32f32 r6.w, r1.w
+bary.f r7.y, 17, r1.x
+(ss)bary.f r7.z, 18, r1.x
+mul.f r3.z, r3.z, r6.y
+bary.f r7.w, 13, r1.x
+mad.f32 r8.x, c7.z, r6.w, (neg)r7.z
+mad.f32 r6.w, c7.y, r6.w, (neg)r7.y
+mul.f r4.x, r4.x, r6.y
+mul.f r3.z, r3.z, r7.w
+bary.f r6.y, 14, r1.x
+mad.f32 r7.z, c11.x, r8.x, r7.z
+mad.f32 r6.w, c11.x, r6.w, r7.y
+mul.f r2.w, r3.y, r2.w
+mad.f32 r3.y, r4.x, r6.y, r3.z
+bary.f (ei)r1.x, 15, r1.x
+mad.f32 r1.y, c7.x, r1.w, (neg)r3.w
+(rpt1)nop
+mad.f32 r1.x, r2.w, r1.x, r3.y
+mad.f32 r1.y, c11.x, r1.y, r3.w
+mul.f r8.w, r1.z, c5.w
+mul.f r1.z, r5.x, c9.z
+max.f r1.x, c13.y, r1.x
+mul.f r1.w, r5.y, c9.y
+mul.f r3.y, r6.x, c9.x
+mov.f32f32 r6.x, r8.w
+mov.f32f32 r6.y, r9.x
nop
-exp2 r0.x, r0.x
-(ss)mul.f r4.y, r5.x, r0.x
-mul.f r4.z, r5.y, r0.x
-mad.f32 r2.x, r6.x, r2.x, r4.y
-mad.f32 r2.w, r3.x, r2.w, r4.z
-(ss)mul.f r0.x, r2.y, r0.x
-mov.f32f32 r6.z, r1.z
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r2.x, r7.z
-mov.f32f32 r2.y, r2.w
-mad.f32 r0.x, r2.z, r3.z, r0.x
-mov.f32f32 r4.y, r0.w
-mov.f32f32 r6.w, r2.x
-bary.f (ei)r0.w, 9, r1.x
-mov.f32f32 r1.x, c13.z
-mov.f32f32 r1.y, c13.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.w, r1.x
-sam.s (f32)(x)r4.z, r6.y, s#4, t#4
-(sy)mov.f32f32 r1.x, r4.z
-(rpt2)nop
-mad.f32 r0.y, r0.y, r1.x, r1.w
-mov.f32f32 r4.z, r0.w
+sam.s (f32)(x)r9.z, r8.z, s#4, t#4
+mul.f r0.w, r0.w, r2.x
+log2 r1.x, r1.x
+(ss)mul.f r1.x, c11.y, r1.x
+mul.f r0.y, r0.y, r2.x
+sam (f32)(w)r9.w, r2.y, s#1, t#1
+(sy)cmps.f.lt r2.x, r10.z, c16.x
+mov.f32f32 r2.w, c13.z
+(ss)mov.f32f32 r2.y, c13.y
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mul.f r0.y, c15.w, r0.y
-sam (f32)(w)r4.y, r4.y, s#1, t#1
-(sy)mov.f32f32 r0.w, r5.x
-cmps.f.lt r1.x, r5.x, c16.x
+exp2 r1.x, r1.x
+(ss)mul.f r2.z, r4.y, r1.x
+mul.f r3.z, r4.w, r1.x
+mad.f32 r2.z, r8.y, r7.z, r2.z
+sam.s (f32)(x)r7.y, r5.w, s#4, t#4
+(sy)mad.f32 r0.w, r0.w, r7.y, r3.x
+mad.f32 r3.x, r7.x, r6.w, r3.z
+mad.f32 r0.y, r0.y, r9.z, r0.w
+mul.f r0.w, r9.y, r1.x
+(ss)cov.u32f32 r1.x, r2.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-cov.u32f32 r1.x, r1.x
+mul.f r0.y, c15.w, r0.y
+mad.f32 r0.w, r4.z, r1.y, r0.w
+cmps.f.ne r1.x, r1.x, c13.y
nop
-mul.f r1.z, r1.z, r0.y
-mul.f r1.w, r2.y, r0.y
-mul.f r0.x, r0.x, r0.y
-cmps.f.ne r0.y, r1.x, c13.y
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r1.z, r1.w
-mad.f32 r1.x, c6.z, r6.x, r1.x
-mad.f32 r1.z, c6.y, r3.x, r1.z
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.y, r1.y, r0.y, r0.w
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r1.x, r1.z
-mad.f32 r0.x, c6.x, r2.z, r0.x
+mov.f32f32 r1.y, r0.y
+mul.f r0.y, r0.w, r0.y
+sel.b32 r0.w, r2.y, r1.x, r10.z
nop
-mul.f r0.w, r0.z, r0.w
-mul.f r1.x, r0.z, r1.x
-mov.f32f32 r0.x, r0.x
+mul.f r1.x, r2.z, r1.y
+mul.f r1.y, r3.x, r1.y
+mad.f32 r1.x, c6.z, r5.z, r1.x
+mad.f32 r1.y, c6.y, r6.z, r1.y
+mad.f32 r0.x, c6.x, r0.x, r0.y
nop
-add.f r0.w, r0.w, r3.y
-add.f r1.x, r1.x, r3.w
+mul.f r0.y, r0.z, r1.x
+mul.f r1.x, r0.z, r1.y
mul.f r0.x, r0.z, r0.x
nop
-mul.f r0.z, r0.w, r0.y
-mul.f r0.w, r1.x, r0.y
-add.f r0.x, r0.x, r4.x
+add.f r0.y, r0.y, r1.z
+add.f r0.z, r1.x, r1.w
+add.f r0.x, r0.x, r3.y
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, r0.y
+mul.f r2.z, r0.y, r0.w
+mul.f r2.y, r0.z, r0.w
+mul.f r2.x, r0.x, r0.w
+end
nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r2.x, r0.x
-end
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r5.w (5:10,cm=f,il=12,b=1) r6.x (5:11,cm=f,il=16,b=1) r5.w (5:12,cm=f,il=20,b=1) r63.y (5:13,cm=f,il=24,b=1) r1.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 539 instructions, 0 half, 13 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r5.z (5:10,cm=f,il=12,b=1) r7.w (5:11,cm=f,il=16,b=1) r0.y (5:12,cm=f,il=20,b=1) r63.y (5:13,cm=f,il=24,b=1) r1.w (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 354 instructions, 0 half, 14 full
diff --git a/reference/0ad-alpine-valley/0ad-53.asm b/reference/0ad-alpine-valley/0ad-53.asm
index 890976a..4dd0acb 100644
--- a/reference/0ad-alpine-valley/0ad-53.asm
+++ b/reference/0ad-alpine-valley/0ad-53.asm
@@ -1,14 +1,14 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r5.x) in4
-@in(r5.y) in5
-@in(r5.z) in6
-@in(r8.x) in8
-@in(r8.y) in9
-@in(r8.z) in10
+@in(r6.z) in0
+@in(r6.w) in1
+@in(r7.x) in2
+@in(r5.w) in4
+@in(r6.x) in5
+@in(r6.y) in6
+@in(r2.w) in8
+@in(r3.x) in9
+@in(r3.y) in10
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -41,163 +41,105 @@
@out(r7.y) out29
@out(r7.z) out30
@out(r7.w) out31
-(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
-mul.f r0.y, r5.x, r5.x
-mul.f r0.z, c8.w, r2.w
-mul.f r0.w, c8.z, r2.w
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r6.z
+mul.f r0.y, r5.w, r5.w
+mul.f r0.z, c8.y, r6.z
+mul.f r0.w, c8.x, r6.z
mul.f r1.x, r0.x, r0.x
-add.f r1.y, c4.y, (neg)r3.x
+add.f r1.z, c4.y, (neg)r6.w
add.f r0.y, c13.x, (neg)r0.y
-mad.f32 r0.z, c9.w, r3.x, r0.z
-mad.f32 r0.w, c9.z, r3.x, r0.w
-mad.f32 r1.x, r1.y, r1.y, r1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.w, r3.y, r0.z
-mad.f32 r0.w, c10.z, r3.y, r0.w
-mov.f32f32 r1.x, r1.x
-add.f r2.x, c4.z, (neg)r3.y
-mul.f r2.y, r0.y, r0.y
-mul.f r1.z, r5.y, r5.x
-add.f r0.z, r0.z, c11.w
-mad.f32 r1.x, r2.x, r2.x, r1.x
-add.f r0.w, r0.w, c11.z
-mul.f r2.z, c8.y, r2.w
-mul.f r3.z, c8.x, r2.w
-add.f r1.z, c13.y, (neg)r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r0.z, c9.y, r6.w, r0.z
+mad.f32 r0.w, c9.x, r6.w, r0.w
+mad.f32 r1.x, r1.z, r1.z, r1.x
+add.f r1.w, c4.z, (neg)r7.x
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.x, r0.z
+mad.f32 r0.w, c10.x, r7.x, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.x, r5.w
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.y, r5.w
+mul.f r2.z, c8.w, r6.z
rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
+(ss)mov.f32f32 r3.z, r1.x
+add.f r3.w, c13.y, (neg)r1.y
mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
-mad.f32 r0.z, r3.w, r3.w, r2.y
-mad.f32 r0.w, r1.y, r1.x, (neg)c5.y
-mad.f32 r1.x, r2.x, r1.x, (neg)c5.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mul.f r1.y, r5.z, r5.x
-mov.f32f32 r0.w, r0.w
-mul.f r2.x, r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r1.y, c13.y, (neg)r1.y
-mad.f32 r2.x, r0.w, r0.w, r2.x
-mad.f32 r2.y, c9.y, r3.x, r2.z
-mad.f32 r2.z, c9.x, r3.x, r3.z
-mad.f32 r2.y, c10.y, r3.y, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r1.y
-mad.f32 r1.y, r1.x, r1.x, r2.x
-add.f r2.x, r2.y, c11.y
-mad.f32 r2.y, c10.x, r3.y, r2.z
-mul.f r5.w, c0.w, r2.w
-mul.f r6.x, c0.z, r2.w
-mul.f r6.y, c0.y, r2.w
-mul.f r6.z, c0.x, r2.w
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mad.f32 r0.z, r3.z, r3.z, r0.z
-mul.f r2.x, r2.x, c12.y
-add.f r2.y, r2.y, c11.x
-mul.f r1.x, r1.x, r1.y
-mul.f r0.w, r0.w, r1.y
-mul.f r0.x, r0.x, r1.y
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r3.z, (neg)c5.y
+mov.f32f32 r1.z, r3.w
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r3.z, (neg)c5.z
+mov.f32f32 r3.z, r0.z
+mad.f32 r2.x, r3.w, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r3.z, r1.x
+mov.f32f32 r3.w, r1.w
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r6.w, r2.z
+mad.f32 r0.z, r1.w, r3.w, r0.z
+mad.f32 r1.w, c10.w, r7.x, r2.z
+mul.f r2.z, c8.z, r6.z
+mul.f r4.x, c0.w, r6.z
+mul.f r5.x, c0.z, r6.z
+mul.f r5.y, c0.y, r6.z
+mul.f r5.z, c0.x, r6.z
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
+(ss)mov.f32f32 r4.z, r0.z
+mul.f r4.y, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r4.w, r3.w, r4.z
+mul.f r4.z, r3.z, r4.z
+(ss)mad.f32 r0.z, c9.z, r6.w, r2.z
+mad.f32 r2.y, c1.w, r6.w, r4.x
+mad.f32 r5.x, c1.z, r6.w, r5.x
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.z, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.x, r0.z
+mad.f32 r0.y, c2.w, r7.x, r2.y
+mul.f r2.z, r0.w, r3.z
+mul.f r2.y, r1.z, r3.z
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r4.x, r2.y
+mul.f r7.y, r6.x, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r5.w, r0.x
+mul.f r3.z, r6.y, r4.x
+mad.f32 r3.w, r6.y, r0.z, (neg)r0.y
+mad.f32 r3.z, r6.x, r0.x, (neg)r3.z
+mad.f32 r4.x, r5.w, r4.x, (neg)r7.y
+mad.f32 r0.x, c2.z, r7.x, r5.x
+mad.f32 r0.y, c1.y, r6.w, r5.y
+mad.f32 r5.x, c1.x, r6.w, r5.z
+mad.f32 r0.y, c2.y, r7.x, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.x, r5.x
nop
-mov.f32f32 r4.w, r1.x
-mov.f32f32 r4.z, r0.w
-mov.f32f32 r4.y, r0.x
-mul.f r0.x, r0.y, r0.z
-mul.f r0.y, r3.z, r0.z
-mul.f r0.z, r3.w, r0.z
-mul.f r0.w, r2.y, c12.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r0.w
-mul.f r0.w, r5.y, r0.x
-mul.f r2.x, r5.x, r0.y
-mad.f32 r0.w, r5.x, r0.z, (neg)r0.w
-mad.f32 r2.x, r5.z, r0.x, (neg)r2.x
-mul.f r2.y, r5.z, r0.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, r5.y, r0.y, (neg)r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.w, r2.x
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r3.z, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r3.x, r5.w
-mad.f32 r0.z, c1.z, r3.x, r6.x
-mad.f32 r0.w, c1.y, r3.x, r6.y
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r3.y, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c2.y, r3.y, r0.w
-mad.f32 r0.w, c1.x, r3.x, r6.z
-add.f r0.x, r0.x, c3.w
-add.f r0.y, r0.y, c3.z
-add.f r5.w, r0.z, c3.y
-mad.f32 r6.x, c2.x, r3.y, r0.w
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mov.f32f32 r0.y, r5.w
-add.f r0.x, r6.x, c3.x
-mov.f32f32 r5.w, (0.000000)
-mov.f32f32 r6.x, (0.000000)
-mov.f32f32 r6.y, (0.000000)
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r7.w, r5.w
-mov.f32f32 r7.z, r6.x
-mov.f32f32 r7.y, r6.y
-mov.f32f32 r5.w, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r6.x, r2.w
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r7.x, r5.w
-mov.f32f32 r6.w, r3.x
-mov.f32f32 r6.z, r6.x
-mov.f32f32 r6.y, r5.z
-mov.f32f32 r3.x, r5.y
-mov.f32f32 r5.x, r5.x
-mul.f r5.y, r8.z, c6.z
-mul.f r8.y, r8.y, c6.y
-mov.f32f32 r6.x, r3.x
-mov.f32f32 r5.w, r5.x
-mov.f32f32 r5.z, r5.y
-mov.f32f32 r5.y, r8.y
-mul.f r3.x, r8.x, c6.x
-mad.f32 r3.y, c7.x, r3.y, c7.y
-mad.f32 r2.w, c7.x, r2.w, c7.y
-mov.f32f32 r8.x, c13.z
-mov.f32f32 r5.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r2.w, r8.x
+add.f r0.y, r0.y, c3.y
+mov.f32f32 r7.w, (0.000000)
+add.f r0.x, r0.x, c3.x
+mov.f32f32 r7.z, (0.000000)
+mov.f32f32 r7.y, (0.000000)
+mul.f r5.z, r3.y, c6.z
+mul.f r5.y, r3.x, c6.y
+mul.f r5.x, r2.w, c6.x
+mad.f32 r3.y, c7.x, r7.x, c7.y
+mad.f32 r3.x, c7.x, r6.z, c7.y
+mov.f32f32 r2.w, c13.z
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
-; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r5.x (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0)
-; VERT: 153 instructions, 0 half, 9 full
+; VERT: inputs: r6.z (0:0,cm=7,il=8,b=0) r5.w (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0)
+; VERT: 93 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-54.asm b/reference/0ad-alpine-valley/0ad-54.asm
index d4490e5..494f814 100644
--- a/reference/0ad-alpine-valley/0ad-54.asm
+++ b/reference/0ad-alpine-valley/0ad-54.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r8.x) in4
-@in(r8.y) in5
-@in(r8.z) in6
-@in(r6.y) in8
-@in(r6.z) in9
-@in(r6.w) in10
-@in(r8.w) in12
-@in(r9.x) in13
+@in(r7.x) in0
+@in(r7.y) in1
+@in(r7.z) in2
+@in(r6.y) in4
+@in(r6.z) in5
+@in(r6.w) in6
+@in(r8.x) in8
+@in(r8.y) in9
+@in(r8.z) in10
+@in(r3.x) in12
+@in(r3.y) in13
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -43,167 +43,101 @@
@out(r7.y) out29
@out(r7.z) out30
@out(r7.w) out31
-(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
-mul.f r0.y, r8.x, r8.x
-mul.f r0.z, c8.w, r2.w
-mul.f r0.w, c8.z, r2.w
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r7.x
+mul.f r0.y, r6.y, r6.y
+mul.f r0.z, c8.y, r7.x
+mul.f r0.w, c8.x, r7.x
mul.f r1.x, r0.x, r0.x
-add.f r1.y, c4.y, (neg)r3.x
+add.f r1.z, c4.y, (neg)r7.y
add.f r0.y, c13.x, (neg)r0.y
-mad.f32 r0.z, c9.w, r3.x, r0.z
-mad.f32 r0.w, c9.z, r3.x, r0.w
-mad.f32 r1.x, r1.y, r1.y, r1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.w, r3.y, r0.z
-mad.f32 r0.w, c10.z, r3.y, r0.w
-mov.f32f32 r1.x, r1.x
-add.f r1.z, c4.z, (neg)r3.y
-mul.f r1.w, r0.y, r0.y
-mul.f r2.x, r8.y, r8.x
-add.f r0.z, r0.z, c11.w
+mad.f32 r0.z, c9.y, r7.y, r0.z
+mad.f32 r0.w, c9.x, r7.y, r0.w
mad.f32 r1.x, r1.z, r1.z, r1.x
-add.f r0.w, r0.w, c11.z
-mul.f r2.y, c8.y, r2.w
-mul.f r2.z, c8.x, r2.w
-add.f r2.x, c13.y, (neg)r2.x
-mul.f r3.z, r6.w, c6.z
-mov.f32f32 r0.z, r0.z
+add.f r1.w, c4.z, (neg)r7.z
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.z, r0.z
+mad.f32 r0.w, c10.x, r7.z, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.z, r6.y
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.w, r6.y
+mul.f r2.z, c8.w, r7.x
rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.y, c9.y, r3.x, r2.y
+(ss)mov.f32f32 r2.w, r1.x
+add.f r3.z, c13.y, (neg)r1.y
mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
-mad.f32 r1.w, r2.x, r2.x, r1.w
-mad.f32 r1.y, r1.y, r1.x, (neg)c5.y
-mad.f32 r1.x, r1.z, r1.x, (neg)c5.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r1.w
-mul.f r1.z, r8.z, r8.x
-mov.f32f32 r1.y, r1.y
-mul.f r1.w, r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r4.x, c13.y, (neg)r1.z
-mad.f32 r4.y, r1.y, r1.y, r1.w
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
-mad.f32 r0.z, c10.y, r3.y, r2.y
-mov.f32f32 r0.w, r4.y
-mov.f32f32 r2.y, r4.x
-mad.f32 r0.w, r1.x, r1.x, r0.w
-add.f r0.z, r0.z, c11.y
-mad.f32 r2.z, c9.x, r3.x, r2.z
-mov.f32f32 r3.z, r3.z
-mad.f32 r2.z, c10.x, r3.y, r2.z
-mul.f r5.z, c0.w, r2.w
-mul.f r5.w, c0.z, r2.w
-rsq r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mad.f32 r3.w, r2.y, r2.y, r3.w
-mul.f r0.z, r0.z, c12.y
-add.f r2.z, r2.z, c11.x
-mul.f r1.x, r1.x, r0.w
-mul.f r4.x, r1.y, r0.w
-mul.f r0.x, r0.x, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r0.x, r0.x
-rsq r1.x, r3.w
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r2.w, (neg)c5.y
+mov.f32f32 r1.z, r3.z
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r2.w, (neg)c5.z
+mov.f32f32 r2.w, r0.z
+mad.f32 r2.x, r3.z, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r2.w, r1.x
+mov.f32f32 r3.z, r1.w
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r7.y, r2.z
+mad.f32 r0.z, r1.w, r3.z, r0.z
+mad.f32 r1.w, c10.w, r7.z, r2.z
+mul.f r2.z, c8.z, r7.x
+mul.f r3.w, c0.w, r7.x
+mul.f r4.x, c0.z, r7.x
+mul.f r5.z, c0.y, r7.x
+mul.f r5.w, c0.x, r7.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r4.y, r0.z
+mul.f r4.w, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r5.y, r3.z, r4.y
+mul.f r5.x, r2.w, r4.y
+(ss)mad.f32 r0.z, c9.z, r7.y, r2.z
+mad.f32 r2.y, c1.w, r7.y, r3.w
+mad.f32 r2.w, c1.z, r7.y, r4.x
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.z, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.z, r0.z
+mad.f32 r0.y, c2.w, r7.z, r2.y
+mul.f r2.z, r0.w, r3.z
+mul.f r2.y, r1.z, r3.z
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r3.z, r2.y
+mul.f r3.w, r6.z, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r6.y, r0.x
+mul.f r4.x, r6.w, r3.z
+mad.f32 r4.y, r6.w, r0.z, (neg)r0.y
+mad.f32 r4.x, r6.z, r0.x, (neg)r4.x
+mad.f32 r4.z, r6.y, r3.z, (neg)r3.w
+mad.f32 r0.x, c2.z, r7.z, r2.w
+mad.f32 r0.y, c1.y, r7.y, r5.z
+mad.f32 r2.w, c1.x, r7.y, r5.w
+mad.f32 r0.y, c2.y, r7.z, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.z, r2.w
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-(rpt1)nop
-mov.f32f32 r5.y, r0.z
-mov.f32f32 r5.x, r0.w
-mov.f32f32 r4.w, r0.x
-mul.f r0.x, r0.y, r1.x
-mul.f r0.y, r2.y, r1.x
-mul.f r0.z, r2.x, r1.x
-mul.f r0.w, r2.z, c12.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r0.w
-mul.f r0.w, r8.y, r0.x
-mul.f r2.x, r8.x, r0.y
-mad.f32 r0.w, r8.x, r0.z, (neg)r0.w
-mad.f32 r2.x, r8.z, r0.x, (neg)r2.x
-mul.f r2.y, r8.z, r0.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, r8.y, r0.y, (neg)r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.z, r0.w
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r4.x, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.x, r3.z
-mad.f32 r0.y, c1.w, r3.x, r5.z
-mad.f32 r0.z, c1.z, r3.x, r5.w
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r6.x, r0.x
-mad.f32 r0.x, c2.w, r3.y, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c1.y, r3.x, r0.w
-mul.f r0.w, c0.x, r2.w
-add.f r0.x, r0.x, c3.w
-add.f r0.y, r0.y, c3.z
-mad.f32 r3.z, c2.y, r3.y, r0.z
-(ss)mad.f32 r3.w, c1.x, r3.x, r0.w
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-add.f r0.x, r3.z, c3.y
-mad.f32 r3.z, c2.x, r3.y, r3.w
-mul.f r3.w, r6.z, c6.y
-mul.f r5.z, r6.y, c6.x
-mov.f32f32 r0.y, r0.x
-add.f r0.x, r3.z, c3.x
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r3.w, r5.z
-mad.f32 r6.y, c7.x, r3.y, c7.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.w, r3.z
-mov.f32f32 r5.z, r3.w
-mov.f32f32 r3.z, r6.y
-mad.f32 r6.y, c7.x, r2.w, c7.y
-mov.f32f32 r6.z, (0.000000)
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r3.z, r6.y
-mov.f32f32 r7.w, r6.z
-mov.f32f32 r7.z, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.y, r8.z
-mov.f32f32 r7.y, r3.x
-mov.f32f32 r3.x, r8.y
-mov.f32f32 r7.x, r2.w
-mov.f32f32 r6.w, r3.y
-mov.f32f32 r2.w, r8.x
-mov.f32f32 r6.z, r3.x
-mov.f32f32 r3.x, r9.x
-mov.f32f32 r8.x, r8.w
-mov.f32f32 r6.y, r2.w
+add.f r0.y, r0.y, c3.y
+mov.f32f32 r7.w, (0.000000)
+add.f r0.x, r0.x, c3.x
+mul.f r6.x, r8.z, c6.z
+mul.f r5.w, r8.y, c6.y
+mul.f r5.z, r8.x, c6.x
+mad.f32 r3.w, c7.x, r7.z, c7.y
+mad.f32 r3.z, c7.x, r7.x, c7.y
mov.f32f32 r2.w, c13.z
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r8.x
-nop
-mov.f32f32 r2.w, r2.w
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
-; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r8.x (0:0,cm=7,il=12,b=0) r6.y (0:0,cm=7,il=16,b=0) r8.w (0:0,cm=3,il=20,b=0)
-; VERT: 160 instructions, 0 half, 10 full
+; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=3,il=20,b=0)
+; VERT: 91 instructions, 0 half, 9 full
diff --git a/reference/0ad-alpine-valley/0ad-55.asm b/reference/0ad-alpine-valley/0ad-55.asm
index ece3ada..4760742 100644
--- a/reference/0ad-alpine-valley/0ad-55.asm
+++ b/reference/0ad-alpine-valley/0ad-55.asm
@@ -8,6 +8,11 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097
+@const(c15.x) 0x3cff9724, 0x40000000, 0xbf800000, 0xba03126f
+@const(c16.x) 0xbf000000, 0x3f800000, 0x3fb8aa65, 0x3de38866
+@const(c17.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 21, r1.x
bary.f r0.y, 0, r1.x
add.f r0.w, r0.w, c14.y
@@ -16,531 +21,347 @@ mul.f r1.w, r0.x, r0.x
bary.f r2.x, 22, r1.x
add.f r2.y, r0.y, c16.x
add.f r2.z, r1.z, c16.x
-bary.f r2.w, 8, r1.x
+bary.f r2.w, 10, r1.x
mad.f32 r1.w, r2.x, r2.x, r1.w
-floor.f r3.x, r2.y
+bary.f r3.x, 23, r1.x
+floor.f r3.y, r2.y
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.y, r2.z
-mov.f32f32 r1.w, r1.w
-bary.f r3.z, 23, r1.x
-add.f r2.y, r2.y, (neg)r3.x
+floor.f r3.z, r2.z
+mad.f32 r1.w, r3.x, r3.x, r1.w
+add.f r2.y, r2.y, (neg)r3.y
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.z, (neg)r3.y
-mad.f32 r1.w, r3.z, r3.z, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-absneg.f r2.z, (neg)c11.x
-mov.f32f32 r0.w, r0.w
-mul.f r3.x, c14.x, r2.y
-mov.f32f32 r2.w, r2.w
-rsq r3.y, r1.w
-(ss)mul.f r3.w, r0.x, r3.y
+(ss)absneg.f r0.w, (neg)c11.x
+add.f r2.z, r2.z, (neg)r3.z
+mov.f32f32 r3.y, r2.y
+add.f r2.y, r2.y, c14.z
+rsq r3.z, r1.w
+(ss)mul.f r3.w, r0.x, r3.z
rsq r4.x, r1.w
-(ss)mov.f32f32 r4.x, r4.x
-mov.f32f32 r3.x, r3.x
+(ss)mov.f32f32 r4.y, r4.x
(ss)rsq r1.w, r1.w
-(ss)mul.f r4.y, r0.x, r1.w
-mov.f32f32 r3.w, r3.w
-mul.f r4.z, r0.x, r4.x
-add.f r0.y, r0.y, (neg)r3.x
-mov.f32f32 r3.x, r4.y
+(ss)mul.f r4.z, r0.x, r1.w
+mul.f r4.w, c14.x, r3.y
absneg.f r3.w, (abs)r3.w
-mov.f32f32 r4.y, r4.z
-mov.f32f32 r0.y, r0.y
-absneg.f r3.x, (abs)r3.x
-mov.f32f32 r3.w, r3.w
-absneg.f r4.y, (abs)r4.y
-add.f r4.z, c16.x, r0.y
-mov.f32f32 r3.x, r3.x
+mul.f r5.x, r0.x, r4.y
+absneg.f r4.z, (abs)r4.z
+add.f r0.y, r0.y, (neg)r4.w
add.f r3.w, r3.w, c14.w
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.z, r4.z
-add.f r3.x, r3.x, c14.w
-mov.f32f32 r3.w, r3.w
-add.f r4.y, r4.y, c14.w
-mul.f r4.z, r4.z, c6.z
-mov.f32f32 r3.x, r3.x
+absneg.f r4.w, (abs)r5.x
+add.f r4.z, r4.z, c14.w
+mov.f32f32 r5.x, r0.y
max.f r3.w, r3.w, c14.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.w, r4.z
-max.f r3.x, r3.x, c14.y
-mov.f32f32 r3.w, r3.w
-mul.f r5.x, r2.x, r3.y
-max.f r4.y, r4.y, c14.y
-mov.f32f32 r5.y, r4.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.w, r5.x
-mov.f32f32 r4.y, r4.y
-mul.f r5.x, r2.x, r4.x
-mul.f r5.z, c14.x, r0.w
-absneg.f r4.w, (abs)r4.w
-mul.f r5.w, r2.x, r1.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.w, r5.w
-absneg.f r5.x, (abs)r5.x
-add.f r1.z, r1.z, (neg)r5.z
+mul.f r5.y, r2.x, r3.z
add.f r4.w, r4.w, c14.w
-absneg.f r5.z, (abs)r5.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.z, r5.z
-add.f r5.x, r5.x, c14.w
-add.f r5.w, c16.x, r1.z
+max.f r4.z, r4.z, c14.y
+mul.f r5.z, r2.x, r1.w
+absneg.f r5.y, (abs)r5.y
max.f r4.w, r4.w, c14.y
+mul.f r4.y, r2.x, r4.y
+absneg.f r5.z, (abs)r5.z
+add.f r5.y, r5.y, c14.w
+add.f r5.x, c16.x, r5.x
+absneg.f r4.y, (abs)r4.y
+mul.f r0.w, r0.w, c11.x
+max.f r5.y, r5.y, c14.y
add.f r5.z, r5.z, c14.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.z, r5.z
-max.f r5.x, r5.x, c14.y
-mul.f r6.x, r5.w, c6.w
-add.f r6.y, r3.w, r4.w
-mul.f r3.y, r3.z, r3.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.w, r6.x
-max.f r6.z, r5.z, c14.y
-mov.f32f32 r3.y, r3.y
-add.f r6.w, r4.y, r5.x
-mul.f r4.x, r3.z, r4.x
-mov.f32f32 r5.z, r5.w
-absneg.f r3.y, (abs)r3.y
-bary.f r5.w, 2, r1.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r3.y, r3.y
-add.f r7.x, r5.w, c15.w
+add.f r4.y, r4.y, c14.w
+mul.f r5.w, r5.x, c6.z
+add.f r5.x, r3.w, r5.y
+mul.f r3.z, r3.x, r3.z
+max.f r4.y, r4.y, c14.y
+max.f r5.z, r5.z, c14.y
+mov.f32f32 r6.x, r5.w
+absneg.f r3.z, (abs)r3.z
+add.f r6.y, r4.w, r4.y
+mul.f r4.x, r3.x, r4.x
+add.f r6.z, r4.z, r5.z
+add.f r3.z, r3.z, c14.w
+(ss)mul.f r1.w, r3.x, r1.w
absneg.f r4.x, (abs)r4.x
-add.f r7.y, r3.x, r6.z
-add.f r3.y, r3.y, c14.w
-mov.f32f32 r5.w, r7.x
-mov.f32f32 r4.x, r4.x
-(ss)mul.f r1.w, r3.z, r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r5.w, r5.w
-add.f r4.x, r4.x, c14.w
-mov.f32f32 r1.w, r1.w
-max.f r3.y, r3.y, c14.y
-mul.f r2.z, r2.z, c11.x
-mov.f32f32 r4.x, r4.x
+mov.f32f32 r6.w, r2.z
+max.f r3.z, r3.z, c14.y
absneg.f r1.w, (abs)r1.w
-mov.f32f32 r3.y, r3.y
-sam.s (f32)(x)r7.z, r5.y, s#5, t#5
-(sy)(ss)mov.f32f32 r5.y, r7.z
-max.f r4.x, r4.x, c14.y
-mov.f32f32 r1.w, r1.w
-add.f r5.z, r6.y, r3.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r4.x, r4.x
+add.f r4.x, r4.x, c14.w
+mul.f r7.x, c14.x, r6.w
+mov.f32f32 r7.y, r3.z
add.f r1.w, r1.w, c14.w
-mov.f32f32 r5.z, r5.z
-add.f r5.w, c15.y, (neg)r2.y
-add.f r6.y, r6.w, r4.x
-mov.f32f32 r1.w, r1.w
-mul.f r2.z, r2.z, r0.z
-mov.f32f32 r5.w, r5.w
-add.f r6.w, c15.y, (neg)r0.w
-rcp r7.z, r5.z
-(ss)mov.f32f32 r7.z, r7.z
-mov.f32f32 r6.y, r6.y
+max.f r4.x, r4.x, c14.y
+add.f r1.z, r1.z, (neg)r7.x
+add.f r5.x, r5.x, r7.y
max.f r1.w, r1.w, c14.y
-mov.f32f32 r6.w, r6.w
-mul.f r3.w, r3.w, r7.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.w, r1.w
-mul.f r7.z, r5.w, r6.w
-mov.f32f32 r3.w, r3.w
-bary.f r7.w, 25, r1.x
-rcp r8.x, r6.y
-(ss)mov.f32f32 r8.x, r8.x
-mul.f r5.y, r7.z, r5.y
-add.f r0.y, c16.y, r0.y
-mul.f r7.z, r7.w, c15.x
-mul.f r4.y, r4.y, r8.x
-add.f r7.y, r7.y, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r7.w, r7.z
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r8.x, r7.z
-mul.f r0.y, r0.y, c6.z
-mov.f32f32 r8.y, r7.w
-bary.f r7.w, 26, r1.x
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r8.z, r0.y
-mov.f32f32 r7.y, r7.y
-mul.f r7.w, r7.w, c15.x
-mov.f32f32 r8.w, r8.x
-mov.f32f32 r9.y, r8.z
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r8.x, r7.w
-mov.f32f32 r9.x, r7.w
-rcp r8.z, r7.y
-(ss)mov.f32f32 r9.w, r8.z
-mov.f32f32 r9.z, r6.x
-mov.f32f32 r8.z, r8.x
-mov.f32f32 r6.x, r9.x
-mov.f32f32 r8.x, r7.x
-mul.f r3.x, r3.x, r9.w
-mul.f r0.z, r2.z, r0.z
-rcp r2.z, r6.y
-nop
-(ss)rcp r6.y, r6.y
-mov.f32f32 r9.x, r6.x
-sam (f32)(xyzw)r9.w, r8.y, s#3, t#3
-(sy)mov.f32f32 r6.x, r9.w
-(ss)add.f r8.y, c14.z, (neg)r10.x
-mov.f32f32 r8.z, r10.y
-mov.f32f32 r9.w, r8.x
-mul.f r6.x, r6.x, r3.w
-rcp r8.x, r5.z
-(ss)mov.f32f32 r8.x, r8.x
-sam (f32)(xyzw)r10.x, r8.w, s#4, t#4
-(sy)(ss)mul.f r8.w, r10.y, r4.y
-mul.f r9.x, r10.x, r4.y
-mul.f r4.y, r10.z, r4.y
-mul.f r4.w, r4.w, r8.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r8.x, r8.y
-mul.f r8.y, r8.z, r3.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r8.z, r7.w
-mul.f r2.z, r5.x, r2.z
-mov.f32f32 r5.x, r8.x
-sam.s (f32)(x)r9.y, r9.y, s#5, t#5
-(sy)mov.f32f32 r8.x, r9.y
-(ss)mov.f32f32 r9.y, r8.z
-bary.f r8.z, 24, r1.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r9.z, r7.w
-mul.f r3.w, r5.x, r3.w
-mul.f r5.x, r8.z, c15.x
-add.f r2.y, r2.y, c14.z
-mov.f32f32 r9.w, r9.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r8.z, r5.x
-mov.f32f32 r10.x, r5.x
-mul.f r6.w, r2.y, r6.w
-mov.f32f32 r10.y, r7.z
-mov.f32f32 r9.z, r8.z
-mov.f32f32 r10.x, r10.x
-mad.f32 r5.y, r6.w, r8.x, r5.y
-mov.f32f32 r10.y, r10.y
-mov.f32f32 r6.w, r7.w
+mov.f32f32 r7.x, r4.x
+mov.f32f32 r7.y, r1.z
+mov.f32f32 r7.z, r5.x
+mov.f32f32 r7.w, r1.w
+add.f r7.x, r6.y, r7.x
+add.f r6.y, c16.x, r7.y
+mul.f r0.w, r0.w, r0.z
+add.f r7.y, r6.z, r7.w
mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.y, r5.y
-sam (f32)(xyzw)r10.z, r9.y, s#3, t#3
-(sy)mov.f32f32 r8.x, r10.w
-(ss)nop
-sam (f32)(xyzw)r9.y, r9.w, s#4, t#4
-(sy)mad.f32 r8.z, r9.z, r2.z, r8.w
-mad.f32 r8.w, r9.y, r2.z, r9.x
-mad.f32 r2.z, r9.w, r2.z, r4.y
-mad.f32 r4.y, r8.x, r4.w, r6.x
-rcp r5.z, r5.z
-(ss)mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.x, r6.y
-add.f r6.y, c14.z, (neg)r10.z
-mov.f32f32 r8.x, r11.x
-mul.f r3.y, r3.y, r5.z
-mul.f r4.x, r4.x, r6.x
-mov.f32f32 r5.z, r6.y
-mad.f32 r6.x, r8.x, r4.w, r8.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.y, r7.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r8.x, r7.z
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r9.x, r6.y
-mov.f32f32 r6.y, r5.x
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r8.y, r5.x
-mad.f32 r3.w, r5.z, r4.w, r3.w
-mov.f32f32 r9.y, r6.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r10.z, r6.w
-mul.f r0.z, r0.z, c16.z
-mov.f32f32 r9.z, r4.z
-add.f r1.z, c16.y, r1.z
-sam (f32)(xyzw)r10.w, r9.x, s#3, t#3
-(sy)mov.f32f32 r4.z, r11.x
-add.f r4.w, c14.z, (neg)r10.w
-mov.f32f32 r5.z, r11.y
-sam (f32)(xyzw)r10.w, r8.x, s#4, t#4
-(sy)mad.f32 r2.z, r11.y, r4.x, r2.z
-mad.f32 r4.y, r4.z, r3.y, r4.y
-mad.f32 r4.z, r11.x, r4.x, r8.z
-mad.f32 r4.x, r10.w, r4.x, r8.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.x, r4.x
-mul.f r2.z, c8.z, r2.z
-mad.f32 r4.y, c15.y, r4.y, c15.z
-mul.f r4.z, c8.y, r4.z
-mul.f r4.x, c8.x, r4.x
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r4.y, r4.y
-bary.f r6.y, 4, r1.x
-bary.f r6.w, 5, r1.x
-(ss)bary.f r8.x, 6, r1.x
-mov.f32f32 r2.z, r2.z
-mul.f r6.y, r6.y, r4.y
+rcp r6.z, r7.z
+mov.f32f32 r3.w, r3.w
+mov.f32f32 r7.w, r7.x
+mov.f32f32 r8.x, r7.y
+mul.f r8.z, r6.y, c6.w
+(ss)mul.f r3.w, r3.w, r6.z
+bary.f r6.z, 25, r1.x
+mul.f r0.z, r0.w, r0.z
+mov.f32f32 r6.y, r8.z
+mov.f32f32 r0.w, r3.w
+mul.f r8.w, r6.z, c15.x
+rcp r6.z, r7.w
mov.f32f32 r4.w, r4.w
-mul.f r6.w, r6.w, r4.y
-mul.f r4.y, r8.x, r4.y
+rcp r8.y, r8.x
mov.f32f32 r4.z, r4.z
-mad.f32 r3.w, r4.w, r3.y, r3.w
-mov.f32f32 r4.x, r4.x
-mad.f32 r3.y, r5.z, r3.y, r6.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r3.w
-sam (f32)(xyzw)r8.x, r10.y, s#0, t#0
-(sy)mul.f r4.w, r8.x, r3.x
-mov.f32f32 r3.y, r3.y
-mul.f r1.z, r1.z, c6.w
-mad.f32 r3.w, c15.y, r3.w, c15.z
-mul.f r5.z, r8.z, r3.x
-mul.f r3.x, r8.y, r3.x
-rcp r6.x, r7.y
-mad.f32 r3.y, c15.y, r3.y, c15.z
-mov.f32f32 r3.w, r3.w
-bary.f r8.x, 12, r1.x
-bary.f r8.y, 7, r1.x
-bary.f r8.z, 13, r1.x
-bary.f r8.w, 14, r1.x
-mov.f32f32 r3.y, r3.y
-mul.f r8.x, r8.x, (neg)r8.y
-mul.f r8.z, r8.z, (neg)r8.y
-mul.f r8.y, r8.w, (neg)r8.y
-mov.f32f32 r8.w, r1.z
-mad.f32 r6.y, r8.x, r3.w, r6.y
-mad.f32 r6.w, r8.z, r3.w, r6.w
-mad.f32 r3.w, r8.y, r3.w, r4.y
+bary.f r9.x, 2, r1.x
+mov.f32f32 r9.y, r8.w
+bary.f r9.z, 26, r1.x
+(ss)mul.f r4.w, r4.w, r6.z
+mul.f r4.z, r4.z, r8.y
mov.f32f32 r9.w, r8.w
-mov.f32f32 r4.y, r6.y
-mov.f32f32 r6.y, r6.w
-mad.f32 r0.x, r0.x, r3.y, r4.y
-mad.f32 r2.x, r2.x, r3.y, r6.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, r7.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.y, r3.z, r3.y, r3.w
-nop
-mul.f r3.z, r0.x, r0.x
-mov.f32f32 r10.x, r4.y
-mad.f32 r3.z, r2.x, r2.x, r3.z
-mov.f32f32 r3.y, r3.y
-(ss)mov.f32f32 r3.w, r6.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r3.z
-rcp r4.y, r7.y
-(ss)mov.f32f32 r4.y, r4.y
-mad.f32 r3.z, r3.y, r3.y, r3.z
-sam.s (f32)(x)r8.x, r9.z, s#5, t#5
-mul.f r3.w, r6.z, r3.w
-(sy)mov.f32f32 r6.x, r8.x
-add.f r0.w, r0.w, c14.z
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r6.y, r7.w
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-mul.f r5.w, r5.w, r0.w
-add.f r6.z, c18.y, (neg)r0.z
-mul.f r1.w, r1.w, r4.y
-mul.f r0.x, r0.x, r3.z
-mul.f r2.x, r2.x, r3.z
-mul.f r3.y, r3.y, r3.z
+mul.f r10.z, r9.z, c15.x
+mov.f32f32 r6.z, r4.w
+mov.f32f32 r8.y, r4.z
+mov.f32f32 r10.y, r8.w
+mov.f32f32 r9.z, r10.z
+mov.f32f32 r10.x, r10.z
+add.f r11.z, r9.x, c15.w
+mul.f r0.z, r0.z, c16.z
+(ss)rcp r7.w, r7.w
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r3.z, r5.w, r6.x, r5.y
-mul.f r4.y, r0.x, r0.x
-mul.f r5.y, (neg)c9.x, r0.x
-mad.f32 r4.y, r2.x, r2.x, r4.y
-mad.f32 r5.y, (neg)c9.y, r2.x, r5.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r5.w, r6.y
+rcp r7.x, r7.x
mov.f32f32 r4.y, r4.y
+sam (f32)(xyzw)r11.w, r10.y, s#0, t#0
+(sy)(ss)mul.f r10.y, r12.y, r8.y
+sam (f32)(xyzw)r12.y, r9.y, s#3, t#3
+(sy)(ss)mul.f r9.y, r12.y, r0.w
+rcp r7.z, r7.z
mov.f32f32 r5.y, r5.y
-mad.f32 r4.y, r3.y, r3.y, r4.y
-mad.f32 r5.y, (neg)c9.z, r3.y, r5.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r5.x
-mul.f r6.y, r6.z, c11.y
+sam (f32)(xyzw)r13.x, r9.w, s#4, t#4
+(sy)mul.f r9.z, r13.y, r6.z
+add.f r9.x, c14.z, (neg)r12.z
+(ss)mul.f r9.w, r13.z, r6.z
+(ss)mul.f r5.y, r5.y, r7.z
+mul.f r4.y, r4.y, r7.w
+mul.f r0.w, r9.x, r0.w
+rcp r6.z, r8.x
+mov.f32f32 r5.z, r5.z
+mov.f32f32 r7.z, r5.y
+mov.f32f32 r7.w, r10.z
+(ss)bary.f r8.x, 24, r1.x
+mov.f32f32 r10.x, r4.y
+mov.f32f32 r11.x, r10.z
+(ss)mul.f r5.z, r5.z, r6.z
+mul.f r9.x, r8.x, c15.x
+mul.f r8.y, r12.x, r8.y
+mul.f r4.w, r13.x, r4.w
+mul.f r3.w, r12.w, r3.w
+mov.f32f32 r8.x, r9.x
+mov.f32f32 r11.y, r9.x
+mov.f32f32 r12.x, r5.z
+mov.f32f32 r10.w, r9.x
+mov.f32f32 r6.z, r11.z
+mul.f r4.z, r11.w, r4.z
+exp2 r0.z, r0.z
+(ss)mov.f32f32 r11.w, r0.z
+sam (f32)(xyzw)r12.y, r7.w, s#3, t#3
+(sy)(ss)mad.f32 r7.w, r12.z, r7.z, r9.y
+rcp r5.x, r5.x
+(ss)mul.f r3.z, r3.z, r5.x
+sam (f32)(xyzw)r13.x, r11.x, s#4, t#4
+(sy)(ss)mad.f32 r5.x, r13.y, r10.x, r9.z
+add.f r8.x, c14.z, (neg)r12.y
+mad.f32 r9.y, r13.z, r10.x, r9.w
+mov.f32f32 r9.z, r3.z
+mov.f32f32 r9.w, r8.w
+mov.f32f32 r10.x, r9.x
+mul.f r4.x, r4.x, r7.x
+mad.f32 r0.w, r8.x, r7.z, r0.w
+sam (f32)(xyzw)r10.z, r10.z, s#0, t#0
+(sy)mad.f32 r7.x, r11.x, r12.x, r10.y
+rcp r7.y, r7.y
+(ss)mul.f r1.w, r1.w, r7.y
+(ss)mov.f32f32 r7.y, r4.x
+mad.f32 r7.z, r10.w, r12.x, r8.y
+sam (f32)(xyzw)r13.y, r9.w, s#3, t#3
+(sy)mad.f32 r7.w, r13.z, r9.z, r7.w
+mov.f32f32 r8.x, r8.w
+mov.f32f32 r8.y, r9.x
+(ss)add.f r9.w, c14.z, (neg)r13.y
+mad.f32 r7.w, c15.y, r7.w, c15.z
+bary.f r10.x, 6, r1.x
+mov.f32f32 r10.y, r1.w
+mad.f32 r0.w, r9.w, r9.z, r0.w
+mov.f32f32 r9.z, r7.w
+bary.f r9.w, 4, r1.x
+bary.f r10.w, 5, r1.x
+mul.f r7.w, r10.x, r7.w
+mad.f32 r0.w, c15.y, r0.w, c15.z
+mul.f r9.w, r9.w, r9.z
+mul.f r9.z, r10.w, r9.z
+bary.f r10.x, 14, r1.x
+mov.f32f32 r10.w, r0.w
+bary.f r11.x, 12, r1.x
+bary.f r11.y, 7, r1.x
+bary.f r12.x, 13, r1.x
+sam (f32)(xyzw)r14.x, r8.x, s#4, t#4
+(sy)mad.f32 r5.x, r14.y, r7.y, r5.x
+mad.f32 r7.y, r14.z, r7.y, r9.y
+(ss)mul.f r8.x, r11.x, (neg)r11.y
+mul.f r8.y, r12.x, (neg)r11.y
+mul.f r9.y, r10.x, (neg)r11.y
+mul.f r7.y, c8.z, r7.y
+mad.f32 r8.x, r8.x, r10.w, r9.w
+mad.f32 r3.w, r12.w, r5.y, r3.w
+mad.f32 r5.y, r8.y, r10.w, r9.z
+mad.f32 r3.z, r13.w, r3.z, r3.w
+mad.f32 r0.w, r9.y, r0.w, r7.w
+mul.f r3.w, c8.y, r5.x
+sam (f32)(xyzw)r8.w, r8.w, s#0, t#0
+(sy)mad.f32 r5.x, r9.y, r10.y, r7.x
+mad.f32 r3.z, c15.y, r3.z, c15.z
+mad.f32 r4.y, r13.x, r4.y, r4.w
+mad.f32 r4.w, r9.x, r10.y, r7.z
+mov.f32f32 r7.x, r5.x
+mov.f32f32 r7.z, r3.z
+mad.f32 r0.w, r3.x, r3.z, r0.w
+(rpt1)nop
+mad.f32 r0.x, r0.x, r7.z, r8.x
+mad.f32 r2.x, r2.x, r7.z, r5.y
+(rpt1)nop
+mov.f32f32 r3.x, r0.x
+mov.f32f32 r3.z, r2.x
+mov.f32f32 r5.y, r0.w
+mov.f32f32 r7.z, r4.w
+mul.f r0.x, r0.x, r3.x
+mad.f32 r4.x, r14.x, r4.x, r4.y
+mad.f32 r0.x, r2.x, r3.z, r0.x
+mad.f32 r2.x, r10.z, r5.z, r4.z
+mad.f32 r0.x, r5.y, r5.y, r0.x
+mul.f r4.x, c8.x, r4.x
+add.f r4.y, c18.y, (neg)r11.w
+mad.f32 r1.w, r8.w, r1.w, r2.x
+(ss)nop
+sam.s (f32)(x)r8.w, r6.x, s#5, t#5
+add.f r2.x, c15.y, (neg)r3.y
+add.f r0.y, c16.y, r0.y
+add.f r1.z, c16.y, r1.z
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.y, r0.x
+(ss)mul.f r0.x, r0.w, r0.x
+mov.f32f32 r0.w, r1.w
+mov.f32f32 r4.z, r2.x
+mul.f r3.x, r3.x, r3.y
+mul.f r3.y, r3.z, r3.y
+mov.f32f32 r3.z, r0.x
+add.f r5.y, c15.y, (neg)r6.w
+mov.f32f32 r5.z, r3.x
+mul.f r3.x, (neg)c9.x, r3.x
+mov.f32f32 r6.x, r3.y
+mov.f32f32 r6.y, r5.y
+mul.f r6.z, r5.z, r5.z
+mad.f32 r3.x, (neg)c9.y, r3.y, r3.x
+mad.f32 r3.y, r6.x, r6.x, r6.z
+mad.f32 r0.x, (neg)c9.z, r0.x, r3.x
+mad.f32 r3.x, r3.z, r3.z, r3.y
+mul.f r3.y, r4.z, r6.y
+mul.f r4.y, r4.y, c11.y
mul.f r0.z, r0.z, c14.z
-mov.f32f32 r0.y, r0.y
-rsq r4.y, r4.y
-(ss)mov.f32f32 r4.y, r4.y
-max.f r5.y, r5.y, c14.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r7.w, r0.y
-mul.f r0.x, r0.x, r4.y
-mov.f32f32 r0.y, r5.y
-bary.f r5.y, 19, r1.x
-bary.f r6.z, 18, r1.x
-mov.f32f32 r0.x, r0.x
-bary.f r6.w, 15, r1.x
-bary.f r7.y, 20, r1.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r7.y, r7.y
-mad.f32 r8.x, c8.y, r0.y, (neg)r5.y
-mad.f32 r8.y, c8.x, r0.y, (neg)r6.z
-mul.f r0.x, r0.x, r6.w
-mul.f r2.x, r2.x, r4.y
-mad.f32 r0.y, c8.z, r0.y, (neg)r7.y
-mov.f32f32 r6.w, r8.x
-mov.f32f32 r8.x, r8.y
-mov.f32f32 r2.x, r2.x
-bary.f r8.y, 16, r1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r5.y, c12.x, r6.w, r5.y
-mad.f32 r6.z, c12.x, r8.x, r6.z
-mov.f32f32 r6.w, r8.y
-mad.f32 r0.y, c12.x, r0.y, r7.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.z, r6.z
-mad.f32 r0.x, r2.x, r6.w, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r2.x, r3.y, r4.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-bary.f r3.y, 17, r1.x
-sam (f32)(xyzw)r8.x, r5.w, s#0, t#0
-(sy)mad.f32 r4.y, r8.x, r3.w, r4.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r4.w, r8.z, r3.w, r5.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.x, r8.y, r3.w, r3.x
-mov.f32f32 r3.w, r7.z
-mad.f32 r0.x, r2.x, r3.y, r0.x
-mov.f32f32 r8.x, r1.z
-mov.f32f32 r1.z, r7.x
-mov.f32f32 r5.z, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r5.x
-mov.f32f32 r8.y, r1.z
-add.f r0.z, r0.z, r6.y
-max.f r0.x, c14.y, r0.x
-(ss)mov.f32f32 r5.w, r2.x
-mov.f32f32 r6.x, r2.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-sam.s (f32)(x)r6.w, r7.w, s#5, t#5
-(sy)mov.f32f32 r1.z, r6.w
-mul.f r0.w, r2.y, r0.w
-mov.f32f32 r0.z, r0.z
-bary.f r2.x, 9, r1.x
-sam (f32)(xyzw)r6.w, r5.z, s#0, t#0
-(sy)mad.f32 r2.y, r7.x, r1.w, r3.x
-mad.f32 r0.w, r0.w, r1.z, r3.z
-log2 r0.x, r0.x
-(ss)mul.f r0.x, c12.y, r0.x
-mad.f32 r1.z, r7.y, r1.w, r4.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.w, r6.w, r1.w, r4.y
-mul.f r0.w, c16.w, r0.w
+mul.f r11.x, r0.y, c6.z
+mul.f r11.y, r1.z, c6.w
+mul.f r0.y, r2.y, r5.y
+rsq r1.z, r3.x
+(ss)mov.f32f32 r3.x, r1.z
+max.f r0.x, r0.x, c14.y
+mul.f r1.z, r3.z, r1.z
+bary.f r3.z, 18, r1.x
+mul.f r4.z, r5.z, r3.x
+bary.f r5.y, 15, r1.x
+mov.f32f32 r5.z, r0.x
+bary.f r6.y, 19, r1.x
+mul.f r3.x, r6.x, r3.x
+mul.f r4.z, r4.z, r5.y
+bary.f r5.y, 16, r1.x
+bary.f r6.x, 20, r1.x
+mad.f32 r6.z, c8.y, r5.z, (neg)r6.y
+mad.f32 r5.z, c8.z, r5.z, (neg)r6.x
+mad.f32 r3.x, r3.x, r5.y, r4.z
+bary.f r4.z, 17, r1.x
+mov.f32f32 r5.y, r6.x
+mov.f32f32 r6.x, r6.y
+mad.f32 r0.x, c8.x, r0.x, (neg)r3.z
+mad.f32 r1.z, r1.z, r4.z, r3.x
+mad.f32 r3.x, c12.x, r5.z, r5.y
+mad.f32 r4.z, c12.x, r6.z, r6.x
+mov.f32f32 r3.z, r3.z
+max.f r1.z, c14.y, r1.z
+mad.f32 r0.x, c12.x, r0.x, r3.z
+(sy)mul.f r3.y, r3.y, r8.w
+add.f r0.z, r0.z, r4.y
+mov.f32f32 r8.y, r11.x
+mov.f32f32 r8.w, r11.z
+mov.f32f32 r6.x, r11.y
+log2 r1.z, r1.z
+(ss)mul.f r1.z, c12.y, r1.z
max.f r0.z, r0.z, c14.y
-mov.f32f32 r2.x, r2.x
-bary.f r2.w, 10, r1.x
-exp2 r0.x, r0.x
-(ss)mul.f r2.z, r2.z, r0.x
-mul.f r3.x, r4.z, r0.x
-mad.f32 r0.y, r1.z, r0.y, r2.z
-mad.f32 r2.z, r2.y, r5.y, r3.x
-(ss)mul.f r0.x, r4.x, r0.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.x, r1.w, r6.z, r0.x
+mov.f32f32 r6.y, r11.z
+(rpt1)nop
+sam.s (f32)(x)r7.w, r8.y, s#5, t#5
min.f r0.z, r0.z, c14.z
-mul.f r0.y, r0.y, r0.w
-mul.f r2.z, r2.z, r0.w
-mov.f32f32 r0.x, r0.x
+(sy)mad.f32 r0.y, r0.y, r7.w, r3.y
+exp2 r1.z, r1.z
+(ss)mul.f r3.y, r7.y, r1.z
+mul.f r3.z, r3.w, r1.z
+mad.f32 r3.y, r7.x, r3.x, r3.y
+sam.s (f32)(x)r5.y, r5.w, s#5, t#5
+add.f r2.z, r2.z, c14.z
+mad.f32 r3.z, r7.z, r4.z, r3.z
+(ss)mul.f r1.z, r4.x, r1.z
add.f r3.x, c18.y, (neg)r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.y, c7.z, r1.z, r0.y
-mad.f32 r1.z, c7.y, r2.y, r2.z
-mul.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.x, c10.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r0.x
-add.f r2.y, c18.y, (neg)r0.z
-mul.f r0.y, r0.z, r0.y
-mul.f r1.z, r0.z, r1.z
-mad.f32 r0.x, c7.x, r1.w, r0.x
-mul.f r1.w, r2.y, c10.y
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r1.z, r1.z, r1.w
-add.f r1.w, c18.y, (neg)r0.z
-mov.f32f32 r2.y, r0.w
-bary.f (ei)r0.w, 11, r1.x
-mul.f r0.x, r0.z, r0.x
-mul.f r0.z, r1.w, c10.x
-mov.f32f32 r6.y, r2.x
-mov.f32f32 r0.w, r0.w
+mul.f r2.x, r2.x, r2.z
+mad.f32 r0.x, r0.w, r0.x, r1.z
+add.f r0.w, c18.y, (neg)r0.z
+add.f r1.z, c18.y, (neg)r0.z
+(sy)mad.f32 r0.y, r2.x, r5.y, r0.y
+sam.s (f32)(x)r3.w, r11.x, s#5, t#5
+mul.f r2.x, r2.y, r2.z
+mul.f r2.y, r3.x, c10.z
+mul.f r0.w, r0.w, c10.y
+mul.f r1.z, r1.z, c10.x
+(sy)mad.f32 r0.y, r2.x, r3.w, r0.y
+bary.f r3.x, 11, r1.x
+bary.f r3.w, 8, r1.x
+bary.f (ei)r4.x, 9, r1.x
+mul.f r0.y, c16.w, r0.y
mov.f32f32 r1.x, c14.y
(rpt1)nop
-mov.f32f32 r2.z, r0.w
-add.f r0.x, r0.x, r0.z
-sam (f32)(w)r2.w, r6.x, s#1, t#1
-(sy)add.f r0.z, c14.z, (neg)r3.z
-(rpt3)nop
-sam (f32)(w)r1.w, r2.y, s#2, t#2
-(sy)cmps.f.lt r0.w, r2.z, c17.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r2.z
-nop
-cov.u32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-nop
-cmps.f.ne r0.w, r0.w, c14.y
-mov.f32f32 r0.z, r0.z
-(rpt1)nop
-sel.b32 r0.w, r1.x, r0.w, r1.y
-mov.f32f32 r2.w, r0.z
-(rpt1)nop
-mul.f r0.y, r0.y, r0.w
-mul.f r0.z, r1.z, r0.w
-mul.f r0.x, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-(ss)mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.x
+mov.f32f32 r1.y, r0.y
+mul.f r0.x, r0.x, r0.y
+sam (f32)(w)r5.y, r2.w, s#2, t#2
+(sy)cmps.f.lt r0.y, r6.x, c17.x
+sam (f32)(w)r3.w, r3.w, s#1, t#1
+mad.f32 r0.x, c7.x, r1.w, r0.x
+mul.f r1.w, r3.y, r1.y
+mul.f r1.y, r3.z, r1.y
+mad.f32 r1.w, c7.z, r5.x, r1.w
+mad.f32 r1.y, c7.y, r4.w, r1.y
+mul.f r0.x, r0.z, r0.x
+cov.u32f32 r0.y, r0.y
+mul.f r1.w, r0.z, r1.w
+mul.f r0.z, r0.z, r1.y
+add.f r0.x, r0.x, r1.z
+cmps.f.ne r0.y, r0.y, c14.y
+add.f r1.y, r1.w, r2.y
+add.f r0.z, r0.z, r0.w
+(sy)(ss)add.f r2.w, c14.z, (neg)r4.z
+sel.b32 r0.y, r1.x, r0.y, r6.x
+(rpt2)nop
+mul.f r2.z, r1.y, r0.y
+mul.f r2.y, r0.z, r0.y
+mul.f r2.x, r0.x, r0.y
end
-nop
-nop
-nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r7.z (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1) r6.x (5:12,cm=f,il=20,b=1) r4.z (5:13,cm=f,il=24,b=1) r2.w (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 535 instructions, 0 half, 12 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r10.z (5:10,cm=f,il=12,b=1) r2.y (5:11,cm=f,il=16,b=1) r4.z (5:12,cm=f,il=20,b=1) r5.z (5:13,cm=f,il=24,b=1) r2.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 354 instructions, 0 half, 15 full
diff --git a/reference/0ad-alpine-valley/0ad-56.asm b/reference/0ad-alpine-valley/0ad-56.asm
index d4490e5..494f814 100644
--- a/reference/0ad-alpine-valley/0ad-56.asm
+++ b/reference/0ad-alpine-valley/0ad-56.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r8.x) in4
-@in(r8.y) in5
-@in(r8.z) in6
-@in(r6.y) in8
-@in(r6.z) in9
-@in(r6.w) in10
-@in(r8.w) in12
-@in(r9.x) in13
+@in(r7.x) in0
+@in(r7.y) in1
+@in(r7.z) in2
+@in(r6.y) in4
+@in(r6.z) in5
+@in(r6.w) in6
+@in(r8.x) in8
+@in(r8.y) in9
+@in(r8.z) in10
+@in(r3.x) in12
+@in(r3.y) in13
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -43,167 +43,101 @@
@out(r7.y) out29
@out(r7.z) out30
@out(r7.w) out31
-(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
-mul.f r0.y, r8.x, r8.x
-mul.f r0.z, c8.w, r2.w
-mul.f r0.w, c8.z, r2.w
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r7.x
+mul.f r0.y, r6.y, r6.y
+mul.f r0.z, c8.y, r7.x
+mul.f r0.w, c8.x, r7.x
mul.f r1.x, r0.x, r0.x
-add.f r1.y, c4.y, (neg)r3.x
+add.f r1.z, c4.y, (neg)r7.y
add.f r0.y, c13.x, (neg)r0.y
-mad.f32 r0.z, c9.w, r3.x, r0.z
-mad.f32 r0.w, c9.z, r3.x, r0.w
-mad.f32 r1.x, r1.y, r1.y, r1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.w, r3.y, r0.z
-mad.f32 r0.w, c10.z, r3.y, r0.w
-mov.f32f32 r1.x, r1.x
-add.f r1.z, c4.z, (neg)r3.y
-mul.f r1.w, r0.y, r0.y
-mul.f r2.x, r8.y, r8.x
-add.f r0.z, r0.z, c11.w
+mad.f32 r0.z, c9.y, r7.y, r0.z
+mad.f32 r0.w, c9.x, r7.y, r0.w
mad.f32 r1.x, r1.z, r1.z, r1.x
-add.f r0.w, r0.w, c11.z
-mul.f r2.y, c8.y, r2.w
-mul.f r2.z, c8.x, r2.w
-add.f r2.x, c13.y, (neg)r2.x
-mul.f r3.z, r6.w, c6.z
-mov.f32f32 r0.z, r0.z
+add.f r1.w, c4.z, (neg)r7.z
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.z, r0.z
+mad.f32 r0.w, c10.x, r7.z, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.z, r6.y
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.w, r6.y
+mul.f r2.z, c8.w, r7.x
rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.y, c9.y, r3.x, r2.y
+(ss)mov.f32f32 r2.w, r1.x
+add.f r3.z, c13.y, (neg)r1.y
mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
-mad.f32 r1.w, r2.x, r2.x, r1.w
-mad.f32 r1.y, r1.y, r1.x, (neg)c5.y
-mad.f32 r1.x, r1.z, r1.x, (neg)c5.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r1.w
-mul.f r1.z, r8.z, r8.x
-mov.f32f32 r1.y, r1.y
-mul.f r1.w, r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r4.x, c13.y, (neg)r1.z
-mad.f32 r4.y, r1.y, r1.y, r1.w
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
-mad.f32 r0.z, c10.y, r3.y, r2.y
-mov.f32f32 r0.w, r4.y
-mov.f32f32 r2.y, r4.x
-mad.f32 r0.w, r1.x, r1.x, r0.w
-add.f r0.z, r0.z, c11.y
-mad.f32 r2.z, c9.x, r3.x, r2.z
-mov.f32f32 r3.z, r3.z
-mad.f32 r2.z, c10.x, r3.y, r2.z
-mul.f r5.z, c0.w, r2.w
-mul.f r5.w, c0.z, r2.w
-rsq r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mad.f32 r3.w, r2.y, r2.y, r3.w
-mul.f r0.z, r0.z, c12.y
-add.f r2.z, r2.z, c11.x
-mul.f r1.x, r1.x, r0.w
-mul.f r4.x, r1.y, r0.w
-mul.f r0.x, r0.x, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r0.x, r0.x
-rsq r1.x, r3.w
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r2.w, (neg)c5.y
+mov.f32f32 r1.z, r3.z
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r2.w, (neg)c5.z
+mov.f32f32 r2.w, r0.z
+mad.f32 r2.x, r3.z, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r2.w, r1.x
+mov.f32f32 r3.z, r1.w
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r7.y, r2.z
+mad.f32 r0.z, r1.w, r3.z, r0.z
+mad.f32 r1.w, c10.w, r7.z, r2.z
+mul.f r2.z, c8.z, r7.x
+mul.f r3.w, c0.w, r7.x
+mul.f r4.x, c0.z, r7.x
+mul.f r5.z, c0.y, r7.x
+mul.f r5.w, c0.x, r7.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r4.y, r0.z
+mul.f r4.w, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r5.y, r3.z, r4.y
+mul.f r5.x, r2.w, r4.y
+(ss)mad.f32 r0.z, c9.z, r7.y, r2.z
+mad.f32 r2.y, c1.w, r7.y, r3.w
+mad.f32 r2.w, c1.z, r7.y, r4.x
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.z, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.z, r0.z
+mad.f32 r0.y, c2.w, r7.z, r2.y
+mul.f r2.z, r0.w, r3.z
+mul.f r2.y, r1.z, r3.z
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r3.z, r2.y
+mul.f r3.w, r6.z, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r6.y, r0.x
+mul.f r4.x, r6.w, r3.z
+mad.f32 r4.y, r6.w, r0.z, (neg)r0.y
+mad.f32 r4.x, r6.z, r0.x, (neg)r4.x
+mad.f32 r4.z, r6.y, r3.z, (neg)r3.w
+mad.f32 r0.x, c2.z, r7.z, r2.w
+mad.f32 r0.y, c1.y, r7.y, r5.z
+mad.f32 r2.w, c1.x, r7.y, r5.w
+mad.f32 r0.y, c2.y, r7.z, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.z, r2.w
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-(rpt1)nop
-mov.f32f32 r5.y, r0.z
-mov.f32f32 r5.x, r0.w
-mov.f32f32 r4.w, r0.x
-mul.f r0.x, r0.y, r1.x
-mul.f r0.y, r2.y, r1.x
-mul.f r0.z, r2.x, r1.x
-mul.f r0.w, r2.z, c12.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r0.w
-mul.f r0.w, r8.y, r0.x
-mul.f r2.x, r8.x, r0.y
-mad.f32 r0.w, r8.x, r0.z, (neg)r0.w
-mad.f32 r2.x, r8.z, r0.x, (neg)r2.x
-mul.f r2.y, r8.z, r0.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, r8.y, r0.y, (neg)r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.z, r0.w
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r4.x, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.x, r3.z
-mad.f32 r0.y, c1.w, r3.x, r5.z
-mad.f32 r0.z, c1.z, r3.x, r5.w
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r6.x, r0.x
-mad.f32 r0.x, c2.w, r3.y, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c1.y, r3.x, r0.w
-mul.f r0.w, c0.x, r2.w
-add.f r0.x, r0.x, c3.w
-add.f r0.y, r0.y, c3.z
-mad.f32 r3.z, c2.y, r3.y, r0.z
-(ss)mad.f32 r3.w, c1.x, r3.x, r0.w
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-add.f r0.x, r3.z, c3.y
-mad.f32 r3.z, c2.x, r3.y, r3.w
-mul.f r3.w, r6.z, c6.y
-mul.f r5.z, r6.y, c6.x
-mov.f32f32 r0.y, r0.x
-add.f r0.x, r3.z, c3.x
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r3.w, r5.z
-mad.f32 r6.y, c7.x, r3.y, c7.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.w, r3.z
-mov.f32f32 r5.z, r3.w
-mov.f32f32 r3.z, r6.y
-mad.f32 r6.y, c7.x, r2.w, c7.y
-mov.f32f32 r6.z, (0.000000)
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r3.z, r6.y
-mov.f32f32 r7.w, r6.z
-mov.f32f32 r7.z, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.y, r8.z
-mov.f32f32 r7.y, r3.x
-mov.f32f32 r3.x, r8.y
-mov.f32f32 r7.x, r2.w
-mov.f32f32 r6.w, r3.y
-mov.f32f32 r2.w, r8.x
-mov.f32f32 r6.z, r3.x
-mov.f32f32 r3.x, r9.x
-mov.f32f32 r8.x, r8.w
-mov.f32f32 r6.y, r2.w
+add.f r0.y, r0.y, c3.y
+mov.f32f32 r7.w, (0.000000)
+add.f r0.x, r0.x, c3.x
+mul.f r6.x, r8.z, c6.z
+mul.f r5.w, r8.y, c6.y
+mul.f r5.z, r8.x, c6.x
+mad.f32 r3.w, c7.x, r7.z, c7.y
+mad.f32 r3.z, c7.x, r7.x, c7.y
mov.f32f32 r2.w, c13.z
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r8.x
-nop
-mov.f32f32 r2.w, r2.w
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
-; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r8.x (0:0,cm=7,il=12,b=0) r6.y (0:0,cm=7,il=16,b=0) r8.w (0:0,cm=3,il=20,b=0)
-; VERT: 160 instructions, 0 half, 10 full
+; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=3,il=20,b=0)
+; VERT: 91 instructions, 0 half, 9 full
diff --git a/reference/0ad-alpine-valley/0ad-58.asm b/reference/0ad-alpine-valley/0ad-58.asm
index 0f377f1..9adec78 100644
--- a/reference/0ad-alpine-valley/0ad-58.asm
+++ b/reference/0ad-alpine-valley/0ad-58.asm
@@ -8,299 +8,192 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097
+@const(c10.x) 0x3cff9724, 0xba03126f, 0xbf000000, 0x40000000
+@const(c11.x) 0x3f800000, 0xbf000000, 0x3fb8aa65, 0x3de38866
+@const(c12.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 11, r1.x
bary.f r0.y, 0, r1.x
add.f r0.w, r0.w, c9.y
bary.f r1.z, 1, r1.x
-mov.f32f32 r0.x, r0.x
-add.f r1.w, r0.y, c10.z
-bary.f r2.x, 4, r1.x
-bary.f r2.y, 15, r1.x
-mul.f r2.z, r0.x, r0.x
+mov.f32f32 r1.w, r0.x
+add.f r2.x, r0.y, c10.z
+bary.f r2.y, 6, r1.x
+add.f r2.z, r1.z, c10.z
+mul.f r0.x, r0.x, r1.w
bary.f r2.w, 12, r1.x
-floor.f r3.x, r1.w
+floor.f r3.x, r2.x
rcp r0.w, r0.w
add.f r0.z, r0.z, c9.y
-add.f r3.y, r1.z, c10.z
-mov.f32f32 r2.w, r2.w
-add.f r1.w, r1.w, (neg)r3.x
+floor.f r3.y, r2.z
+mov.f32f32 r3.z, r2.w
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.z, r0.z, r0.w
-(ss)floor.f r0.w, r3.y
-mad.f32 r2.z, r2.w, r2.w, r2.z
-mov.f32f32 r1.w, r1.w
+(ss)absneg.f r0.w, (neg)c7.x
+mad.f32 r0.x, r2.w, r3.z, r0.x
+bary.f r2.w, 13, r1.x
+mov.f32f32 r3.x, r2.x
+mul.f r0.w, r0.w, c7.x
+add.f r2.z, r2.z, (neg)r3.y
+mov.f32f32 r3.y, r2.w
+mul.f r3.w, c9.x, r3.x
+mul.f r0.w, r0.w, r0.z
mov.f32f32 r0.z, r0.z
-absneg.f r3.x, (neg)c7.x
-mov.f32f32 r2.z, r2.z
-bary.f r3.z, 13, r1.x
-mul.f r3.w, c9.x, r1.w
-mul.f r3.x, r3.x, c7.x
-add.f r0.w, r3.y, (neg)r0.w
-mov.f32f32 r3.y, r3.z
-mov.f32f32 r3.z, r3.w
-mul.f r3.x, r3.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.z, r3.y, r3.y, r2.z
-add.f r0.y, r0.y, (neg)r3.z
-mov.f32f32 r3.x, r3.x
-mul.f r3.z, c9.x, r0.w
-add.f r3.w, c10.w, (neg)r1.w
-add.f r4.x, c10.w, (neg)r0.w
-mov.f32f32 r0.y, r0.y
-rsq r2.z, r2.z
-(ss)mov.f32f32 r2.z, r2.z
-mul.f r0.z, r3.x, r0.z
-mov.f32f32 r3.x, r3.z
-add.f r3.z, c11.x, r0.y
-mul.f r0.x, r0.x, r2.z
-add.f r0.y, c11.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.x, r3.y, r3.y, r0.x
+add.f r0.y, r0.y, (neg)r3.w
+mov.f32f32 r3.y, r2.z
+mul.f r0.z, r0.w, r0.z
+add.f r0.w, c10.w, (neg)r3.x
+mov.f32f32 r3.x, r0.y
+mul.f r3.w, c9.x, r3.y
+rsq r0.x, r0.x
+(ss)mov.f32f32 r4.x, r0.x
mul.f r0.z, r0.z, c11.z
-mul.f r3.z, r3.z, c4.z
-absneg.f r0.x, (abs)r0.x
-mul.f r0.y, r0.y, c4.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, r0.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r0.y, r0.y
-add.f r0.x, r0.x, c9.w
-mov.f32f32 r4.z, r4.z
-add.f r1.z, r1.z, (neg)r3.x
+add.f r3.x, c11.y, r3.x
+add.f r1.z, r1.z, (neg)r3.w
+mul.f r1.w, r1.w, r4.x
+add.f r0.y, c11.x, r0.y
+mul.f r4.y, r3.x, c4.z
+mul.f r3.x, r3.z, r4.x
+absneg.f r1.w, (abs)r1.w
exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.y, r3.z
-max.f r0.x, r0.x, c9.y
-add.f r3.z, c13.y, (neg)r0.z
-add.f r4.y, c11.y, r1.z
-mov.f32f32 r6.x, r3.x
-mov.f32f32 r0.x, r0.x
-mul.f r2.w, r2.w, r2.z
-mov.f32f32 r3.x, r4.y
+(ss)mov.f32f32 r3.z, r0.z
+mov.f32f32 r4.w, r4.y
+mov.f32f32 r3.w, r1.z
+add.f r1.w, r1.w, c9.w
+add.f r3.z, c13.y, (neg)r3.z
+mul.f r5.z, r0.y, c4.z
+add.f r0.y, c11.x, r1.z
+max.f r1.z, r1.w, c9.y
+absneg.f r1.w, (abs)r3.x
+add.f r3.x, c11.y, r3.w
mul.f r3.z, r3.z, c7.y
-mul.f r0.z, r0.z, c9.z
-mov.f32f32 r2.w, r2.w
-mul.f r3.x, r3.x, c4.w
-add.f r1.z, c11.x, r1.z
+(ss)mul.f r0.z, r0.z, c9.z
+add.f r1.w, r1.w, c9.w
+mul.f r6.z, r3.x, c4.w
+mov.f32f32 r6.y, r5.z
+mul.f r5.w, r0.y, c4.w
+max.f r0.y, r1.w, c9.y
+mov.f32f32 r5.x, r6.z
add.f r0.z, r0.z, r3.z
-absneg.f r2.w, (abs)r2.w
-mov.f32f32 r3.z, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.w, r3.z
-bary.f r3.z, 2, r1.x
-mov.f32f32 r0.z, r0.z
-add.f r2.w, r2.w, c9.w
-mul.f r1.z, r1.z, c4.w
-add.f r3.z, r3.z, c10.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.w, r2.w
+bary.f r1.w, 2, r1.x
+add.f r3.x, r1.z, r0.y
+mul.f r0.x, r2.w, r0.x
max.f r0.z, r0.z, c9.y
-mov.f32f32 r4.y, r3.z
-mov.f32f32 r5.z, r1.z
-max.f r2.w, r2.w, c9.y
+add.f r6.x, r1.w, c10.y
+mov.f32f32 r4.z, r5.w
+absneg.f r0.x, (abs)r0.x
min.f r0.z, r0.z, c9.z
-mov.f32f32 r5.x, r4.y
-mov.f32f32 r4.y, r5.z
-mov.f32f32 r2.w, r2.w
-add.f r5.z, c13.y, (neg)r0.z
-add.f r5.w, c13.y, (neg)r0.z
-add.f r6.y, c13.y, (neg)r0.z
-add.f r6.w, r0.x, r2.w
-mul.f r2.z, r3.y, r2.z
-sam.s (f32)(x)r7.x, r4.z, s#3, t#3
-(sy)mov.f32f32 r3.y, r7.x
-(ss)mul.f r4.z, r5.z, c6.z
-mul.f r4.w, r5.w, c6.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r4.x
-absneg.f r2.z, (abs)r2.z
-mul.f r5.x, r6.y, c6.x
-mov.f32f32 r6.y, r4.y
-mov.f32f32 r4.y, r3.z
-mov.f32f32 r2.z, r2.z
-mul.f r5.w, r3.w, r4.x
-mov.f32f32 r5.z, r3.x
-mov.f32f32 r7.x, r0.y
-add.f r0.y, r2.z, c9.w
-mov.f32f32 r2.z, r5.w
-mov.f32f32 r6.z, r4.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.y, r0.y
-mul.f r2.z, r2.z, r3.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.w, r3.x
-max.f r0.y, r0.y, c9.y
-sam.s (f32)(x)r7.y, r6.x, s#3, t#3
-(rpt1)nop
-(sy)mov.f32f32 r3.x, r7.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r7.y, r1.z
-sam.s (f32)(x)r5.y, r5.y, s#3, t#3
-mov.f32f32 r1.z, r3.z
-(sy)mov.f32f32 r3.y, r5.y
-add.f r3.z, r6.w, r0.y
-add.f r1.w, r1.w, c9.z
-mov.f32f32 r7.z, r1.z
-add.f r0.w, r0.w, c9.z
-mov.f32f32 r1.z, r3.z
-mul.f r3.z, r1.w, r4.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mul.f r3.w, r3.w, r0.w
-mul.f r0.w, r1.w, r0.w
-mov.f32f32 r1.w, r3.z
-rcp r3.z, r1.z
-(ss)mov.f32f32 r3.z, r3.z
-rcp r4.x, r1.z
-nop
-(ss)rcp r1.z, r1.z
-(ss)mov.f32f32 r4.x, r4.x
-(ss)mov.f32f32 r1.z, r1.z
-mul.f r0.x, r0.x, r3.z
-mad.f32 r1.w, r1.w, r3.y, r2.z
-mul.f r2.z, r2.w, r4.x
-mul.f r0.y, r0.y, r1.z
-mov.f32f32 r0.x, r0.x
-mul.f r1.z, r2.y, c10.x
-mov.f32f32 r1.w, r1.w
-sam.s (f32)(x)r5.y, r7.x, s#3, t#3
-mov.f32f32 r2.y, r2.z
-(sy)mov.f32f32 r2.z, r5.y
+mov.f32f32 r5.y, r6.x
+mov.f32f32 r6.w, r6.x
+add.f r0.x, r0.x, c9.w
+add.f r1.w, c13.y, (neg)r0.z
+add.f r2.w, c13.y, (neg)r0.z
+add.f r3.z, c13.y, (neg)r0.z
+max.f r0.x, r0.x, c9.y
+mul.f r1.w, r1.w, c6.z
+mul.f r3.w, r2.w, c6.y
+sam.s (f32)(x)r7.x, r4.w, s#3, t#3
+mov.f32f32 r2.w, r0.w
+mov.f32f32 r4.x, r0.x
+add.f r3.y, c10.w, (neg)r3.y
+mul.f r3.z, r3.z, c6.x
+sam.s (f32)(x)r7.y, r6.y, s#3, t#3
+(ss)mov.f32f32 r4.w, r6.x
+add.f r3.x, r3.x, r4.x
+mov.f32f32 r4.x, r3.y
mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.w, r3.w
mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r2.x, r1.z
-mad.f32 r1.w, r2.w, r2.z, r1.w
-bary.f r2.z, 5, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.z, r2.z
-bary.f r2.w, 6, r1.x
-mov.f32f32 r3.w, r2.x
-bary.f r2.x, 16, r1.x
-mad.f32 r0.w, r0.w, r3.x, r1.w
-mov.f32f32 r3.z, r2.z
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.y, r1.w
-mul.f r1.w, r2.x, c10.x
-mul.f r0.w, c11.w, r0.w
-sam (f32)(w)r2.z, r3.y, s#1, t#1
-(sy)add.f r2.x, c9.z, (neg)r3.y
+mov.f32f32 r5.x, r3.x
+mul.f r2.w, r2.w, r4.x
+(rpt3)nop
+(sy)mul.f r2.w, r2.w, r7.x
+rcp r4.x, r5.x
+(ss)mul.f r1.z, r1.z, r4.x
+bary.f r4.x, 15, r1.x
+(ss)rcp r5.x, r5.x
+add.f r2.x, r2.x, c9.z
+(ss)mul.f r0.y, r0.y, r5.x
+(ss)mov.f32f32 r5.x, r1.z
+mul.f r6.y, r4.x, c10.x
+mul.f r3.y, r2.x, r3.y
+rcp r3.x, r3.x
+(ss)mul.f r0.x, r0.x, r3.x
+(ss)mov.f32f32 r3.x, r0.y
+mov.f32f32 r6.z, r6.y
+bary.f r4.x, 16, r1.x
+mad.f32 r2.w, r3.y, r7.y, r2.w
+mov.f32f32 r3.y, r0.x
+sam.s (f32)(x)r7.x, r4.y, s#3, t#3
+add.f r2.z, r2.z, c9.z
+mul.f r4.x, r4.x, c10.x
+(rpt1)nop
+mul.f r0.w, r0.w, r2.z
+mov.f32f32 r6.w, r4.x
+sam.s (f32)(x)r5.y, r5.z, s#3, t#3
+mul.f r2.x, r2.x, r2.z
bary.f r2.z, 7, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.w, r1.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r1.z
-bary.f r1.z, 14, r1.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r5.z, r2.z
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r4.x, r2.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r1.z, r1.z
-bary.f r2.x, 10, r1.x
-bary.f r2.z, 9, r1.x
-(ss)mov.f32f32 r3.z, c9.y
+(sy)mad.f32 r0.w, r0.w, r7.x, r2.w
+bary.f r2.w, 14, r1.x
+(ss)bary.f r4.z, 4, r1.x
+mad.f32 r0.w, r2.x, r5.y, r0.w
+sam (f32)(xyzw)r5.y, r6.z, s#0, t#0
+(sy)mul.f r2.x, r5.w, r5.x
+(ss)mul.f r6.z, r2.w, c10.x
+mul.f r2.w, r5.z, r5.x
+mul.f r1.z, r5.y, r1.z
+mul.f r0.w, c11.w, r0.w
+mov.f32f32 r4.y, r6.z
+sam (f32)(w)r5.x, r2.y, s#2, t#2
+(sy)(ss)cmps.f.lt r2.y, r5.w, c12.x
+bary.f r4.w, 5, r1.x
+mov.f32f32 r2.z, r0.w
+sam (f32)(xyzw)r6.x, r6.y, s#0, t#0
+bary.f r5.x, 10, r1.x
+bary.f r5.y, 9, r1.x
+cov.u32f32 r2.y, r2.y
+sam (f32)(xyzw)r6.w, r4.x, s#0, t#0
+(sy)mad.f32 r2.x, r7.y, r3.x, r2.x
+mad.f32 r2.w, r7.x, r3.x, r2.w
+mad.f32 r2.x, r6.z, r3.y, r2.x
+mad.f32 r2.w, r6.y, r3.y, r2.w
+mad.f32 r0.y, r6.w, r0.y, r1.z
+cmps.f.ne r1.z, r2.y, c9.y
+mov.f32f32 r2.y, r2.x
+mov.f32f32 r3.x, r2.w
+mad.f32 r0.x, r6.x, r0.x, r0.y
+mov.f32f32 r0.y, c9.y
+mul.f r2.y, r2.y, r5.x
+mul.f r3.x, r3.x, r5.y
+mov.f32f32 r3.y, r0.x
bary.f (ei)r1.x, 8, r1.x
-sam (f32)(xyzw)r6.x, r3.w, s#0, t#0
-(sy)mul.f r1.y, r6.z, r0.x
-mul.f r1.z, r1.z, c10.x
-(ss)mul.f r3.w, r6.y, r0.x
-mul.f r0.x, r6.x, r0.x
-mov.f32f32 r2.w, r1.w
-mov.f32f32 r1.z, r1.z
-sam (f32)(w)r6.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r1.w, r6.w
-cmps.f.lt r3.y, r6.w, c12.x
-nop
-mov.f32f32 r4.x, r1.z
-mov.f32f32 r1.z, r1.z
-cov.u32f32 r4.y, r3.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r6.x, r4.x
-mov.f32f32 r3.y, r1.z
-cmps.f.ne r1.z, r4.y, c9.y
-(rpt3)nop
-(ss)nop
-sam (f32)(xyzw)r5.y, r5.w, s#0, t#0
-(sy)mad.f32 r1.y, r5.w, r2.y, r1.y
-(ss)nop
-sam (f32)(xyzw)r5.w, r3.x, s#0, t#0
-(ss)mad.f32 r3.x, r5.z, r2.y, r3.w
-(sy)mad.f32 r1.y, r6.y, r0.y, r1.y
-mad.f32 r3.x, r6.x, r0.y, r3.x
-mad.f32 r0.x, r5.y, r2.y, r0.x
-sel.b32 r1.z, r3.z, r1.z, r1.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r3.x
-mad.f32 r0.x, r5.w, r0.y, r0.x
-nop
-mul.f r0.y, r1.y, r2.x
-mul.f r2.x, r1.w, r2.z
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.y, r0.w
-mul.f r2.x, r2.x, r0.w
-mul.f r1.x, r0.x, r1.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, c5.z, r1.y, r0.y
-mad.f32 r1.y, c5.y, r1.w, r2.x
+mul.f r1.y, r2.y, r2.z
+mul.f r2.y, r3.x, r2.z
+mad.f32 r1.y, c5.z, r2.x, r1.y
+mad.f32 r2.x, c5.y, r2.w, r2.y
+mul.f r1.x, r3.y, r1.x
+sel.b32 r0.y, r0.y, r1.z, r5.w
+mul.f r1.y, r0.z, r1.y
+mul.f r1.z, r0.z, r2.x
mul.f r0.w, r1.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-nop
-mul.f r0.y, r0.z, r0.y
-mul.f r1.x, r0.z, r1.x
+sam (f32)(w)r2.x, r4.z, s#1, t#1
+(sy)add.f r2.w, c9.z, (neg)r2.w
+add.f r1.x, r1.y, r1.w
+add.f r1.y, r1.z, r3.w
+(rpt1)nop
+mul.f r2.z, r1.x, r0.y
+mul.f r2.y, r1.y, r0.y
mad.f32 r0.x, c5.x, r0.x, r0.w
-nop
-add.f r0.y, r0.y, r4.z
-add.f r0.w, r1.x, r4.w
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.y, r1.z
-mul.f r0.w, r0.w, r1.z
+(rpt2)nop
mul.f r0.x, r0.z, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-add.f r0.x, r0.x, r5.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r1.z
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+add.f r0.x, r0.x, r3.z
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, r0.y
end
+nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.w (5:10,cm=f,il=12,b=1) r63.y (5:11,cm=f,il=16,b=1) r1.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
-; FRAG: 300 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.w (5:10,cm=f,il=12,b=1) r63.y (5:11,cm=f,il=16,b=1) r3.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
+; FRAG: 190 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-61.asm b/reference/0ad-alpine-valley/0ad-61.asm
index e890c72..95d5c75 100644
--- a/reference/0ad-alpine-valley/0ad-61.asm
+++ b/reference/0ad-alpine-valley/0ad-61.asm
@@ -8,211 +8,135 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xba03126f
+@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3fb8aa65
+@const(c11.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 0, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 1, r1.x
-bary.f r1.z, 4, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 6, r1.x
+bary.f r1.z, 6, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 7, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 2, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c9.w
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 5, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c9.z
+mul.f r0.z, r0.z, c7.x
+sam (f32)(w)r4.x, r1.z, s#2, t#2
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c9.x, r2.z
+add.f r2.z, c10.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.y, (neg)r1.z
-add.f r3.z, c10.y, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r3.w, c9.x, r0.z
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(w)r3.w, r3.x, s#1, t#1
-(sy)add.f r2.y, c9.z, (neg)r4.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c9.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c10.z, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.z, r0.w
-mul.f r1.w, r1.w, c4.z
mul.f r0.y, r0.y, c10.w
-mul.f r0.x, r0.x, c4.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.x
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r4.z, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.y, r3.x
-mul.f r1.w, r2.w, c4.w
-mul.f r0.w, r0.w, c4.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r6.x, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.x, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c9.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r4.x, r3.x
-bary.f r3.x, 2, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.x, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.z, r1.w
-mov.f32f32 r4.w, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r4.y, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r5.x, r3.x
-mov.f32f32 r6.y, r0.x
-mov.f32f32 r0.x, r2.w
-mov.f32f32 r0.w, r2.y
-sam.s (f32)(x)r6.z, r3.w, s#3, t#3
-(sy)mov.f32f32 r1.w, r6.z
-min.f r0.y, r0.y, c9.z
-sam.s (f32)(x)r6.z, r5.y, s#3, t#3
-(sy)mov.f32f32 r2.y, r6.z
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c4.z
+add.f r0.x, c10.z, r0.w
+mul.f r4.x, r0.z, c4.z
+add.f r0.z, c10.x, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c4.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c4.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c12.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#3, t#3
+add.f r0.z, c10.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#3, t#3
+mul.f r0.x, r0.x, c9.z
+add.f r0.w, r2.y, c9.z
+mul.f r0.y, r0.y, c7.y
(ss)nop
-sam.s (f32)(x)r3.w, r4.z, s#3, t#3
-(sy)mov.f32f32 r2.w, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.z
-add.f r3.y, c12.y, (neg)r0.y
-add.f r3.z, c12.y, (neg)r0.y
-add.f r3.w, c12.y, (neg)r0.y
-mul.f r4.x, r2.z, r3.x
-mul.f r3.y, r3.y, c6.z
-mul.f r3.z, r3.z, c6.y
-mul.f r3.w, r3.w, c6.x
-mul.f r1.w, r4.x, r1.w
-add.f r1.z, r1.z, c9.z
-mov.f32f32 r6.z, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.z, r0.z, c9.z
-mul.f r0.w, r1.z, r3.x
-mov.f32f32 r2.x, r2.x
-bary.f r3.x, 8, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.w, r0.w, r2.w, r1.w
-sam.s (f32)(x)r4.x, r6.x, s#3, t#3
+sam.s (f32)(x)r5.x, r5.w, s#3, t#3
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#3, t#3
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c11.y
+bary.f r3.z, 4, r1.x
+mul.f r0.y, r0.y, r5.x
+max.f r0.x, r0.x, c9.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
+min.f r0.x, r0.x, c9.z
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c9.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r4.x
-mov.f32f32 r0.w, r0.w
-mul.f r2.z, r2.z, r0.z
-mov.f32f32 r2.w, r0.x
-mov.f32f32 r4.x, r2.x
-mul.f r0.x, r1.z, r0.z
-mad.f32 r0.z, r2.z, r1.w, r0.w
-bary.f r0.w, 7, r1.x
-mov.f32f32 r1.z, r3.x
+mul.f r0.y, c11.x, r0.y
+bary.f r1.z, 8, r1.x
bary.f r1.w, 9, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, r0.x, r2.y, r0.z
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r0.z, r1.w
-mov.f32f32 r4.y, r0.w
-mov.f32f32 r0.x, r0.x
-bary.f r0.w, 12, r1.x
-bary.f r1.z, 11, r1.x
-bary.f (ei)r1.x, 10, r1.x
-mul.f r0.x, c11.x, r0.x
-mov.f32f32 r2.y, r0.z
+add.f r0.w, c12.y, (neg)r0.x
+mov.f32f32 r2.x, r0.y
+add.f r2.y, c12.y, (neg)r0.x
+add.f r2.z, c12.y, (neg)r0.x
+(rpt1)nop
(ss)nop
-sam (f32)(w)r4.x, r4.x, s#2, t#2
-(sy)cmps.f.lt r0.z, r4.w, c11.y
-mov.f32f32 r1.y, r4.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-sam (f32)(xyz)r1.w, r2.x, s#0, t#0
-cov.u32f32 r0.z, r0.z
-(sy)mul.f r0.w, r2.y, r0.w
-mul.f r1.z, r2.x, r1.z
-mul.f r1.x, r1.w, r1.x
-cmps.f.ne r0.z, r0.z, c9.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mul.f r0.w, r0.w, r0.x
-mul.f r1.z, r1.z, r0.x
-mul.f r0.x, r1.x, r0.x
-mov.f32f32 r1.x, c9.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, c5.z, r2.y, r0.w
-mad.f32 r1.z, c5.y, r2.x, r1.z
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.z, r1.x, r0.z, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.z
-mad.f32 r0.x, c5.x, r1.w, r0.x
-nop
-mul.f r0.w, r0.y, r0.w
-mul.f r1.x, r0.y, r1.x
-mov.f32f32 r0.x, r0.x
-nop
-add.f r0.w, r0.w, r3.y
-add.f r1.x, r1.x, r3.z
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r0.w, r0.z
-mul.f r0.w, r1.x, r0.z
-add.f r0.x, r0.x, r3.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, r0.z
+sam (f32)(xyz)r3.w, r1.z, s#0, t#0
+(ss)bary.f r1.z, 12, r1.x
+bary.f r1.w, 11, r1.x
+bary.f r2.w, 10, r1.x
+mul.f r0.w, r0.w, c6.z
+(sy)mul.f r1.z, r4.y, r1.z
+mul.f r1.w, r4.x, r1.w
+mul.f r2.w, r3.w, r2.w
+mul.f r2.y, r2.y, c6.y
+mul.f r1.z, r1.z, r2.x
+mul.f r1.w, r1.w, r2.x
+mad.f32 r1.z, c5.z, r4.y, r1.z
+mad.f32 r1.w, c5.y, r4.x, r1.w
+mul.f r0.y, r2.w, r0.y
+mul.f r2.x, r2.z, c6.x
+mul.f r1.z, r0.x, r1.z
+mul.f r1.w, r0.x, r1.w
+mad.f32 r0.y, c5.x, r3.w, r0.y
+mov.f32f32 r2.z, c9.y
+add.f r0.w, r1.z, r0.w
+add.f r1.z, r1.w, r2.y
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
+sel.b32 r0.z, r2.z, r0.z, r4.w
+mul.f r0.x, r0.x, r0.y
+bary.f (ei)r3.w, 5, r1.x
nop
-mov.f32f32 r2.z, r0.y
-(ss)mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.z, r0.w, r0.z
+mul.f r2.y, r1.z, r0.z
+add.f r0.x, r0.x, r2.x
+(rpt1)nop
+sam (f32)(w)r0.w, r3.z, s#1, t#1
+(sy)add.f r2.w, c9.z, (neg)r1.z
+mul.f r2.x, r0.x, r0.z
end
nop
nop
nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r0.x (5:10,cm=f,il=12,b=1) r0.w (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1)
-; FRAG: 204 instructions, 0 half, 7 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1)
+; FRAG: 124 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-63.asm b/reference/0ad-alpine-valley/0ad-63.asm
index 3ac80f9..b4c4f88 100644
--- a/reference/0ad-alpine-valley/0ad-63.asm
+++ b/reference/0ad-alpine-valley/0ad-63.asm
@@ -1,14 +1,14 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r7.x) in4
-@in(r7.y) in5
-@in(r7.z) in6
-@in(r6.y) in8
-@in(r6.z) in9
-@in(r6.w) in10
+@in(r7.x) in0
+@in(r7.y) in1
+@in(r7.z) in2
+@in(r6.y) in4
+@in(r6.z) in5
+@in(r6.w) in6
+@in(r2.w) in8
+@in(r3.x) in9
+@in(r3.y) in10
@in(r3.z) in12
@in(r3.w) in13
@out(r0.x) out0
@@ -39,159 +39,101 @@
@out(r6.y) out25
@out(r6.z) out26
@out(r6.w) out27
-(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
-mul.f r0.y, r7.x, r7.x
-mul.f r0.z, c8.w, r2.w
-mul.f r0.w, c8.z, r2.w
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r7.x
+mul.f r0.y, r6.y, r6.y
+mul.f r0.z, c8.y, r7.x
+mul.f r0.w, c8.x, r7.x
mul.f r1.x, r0.x, r0.x
-add.f r1.y, c4.y, (neg)r3.x
+add.f r1.z, c4.y, (neg)r7.y
add.f r0.y, c13.x, (neg)r0.y
-mad.f32 r0.z, c9.w, r3.x, r0.z
-mad.f32 r0.w, c9.z, r3.x, r0.w
-mad.f32 r1.x, r1.y, r1.y, r1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.w, r3.y, r0.z
-mad.f32 r0.w, c10.z, r3.y, r0.w
-mov.f32f32 r1.x, r1.x
-add.f r1.z, c4.z, (neg)r3.y
-mul.f r1.w, r0.y, r0.y
-mul.f r2.x, r7.y, r7.x
-add.f r0.z, r0.z, c11.w
+mad.f32 r0.z, c9.y, r7.y, r0.z
+mad.f32 r0.w, c9.x, r7.y, r0.w
mad.f32 r1.x, r1.z, r1.z, r1.x
-add.f r0.w, r0.w, c11.z
-mul.f r2.y, c8.y, r2.w
-mul.f r2.z, c8.x, r2.w
-add.f r2.x, c13.y, (neg)r2.x
-mul.f r4.x, r6.w, c6.z
-mov.f32f32 r0.z, r0.z
+add.f r1.w, c4.z, (neg)r7.z
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.z, r0.z
+mad.f32 r0.w, c10.x, r7.z, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.z, r6.y
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.w, r6.y
+mul.f r2.z, c8.w, r7.x
rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.y, c9.y, r3.x, r2.y
+(ss)mov.f32f32 r4.x, r1.x
+add.f r4.y, c13.y, (neg)r1.y
mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
-mad.f32 r1.w, r2.x, r2.x, r1.w
-mad.f32 r1.y, r1.y, r1.x, (neg)c5.y
-mad.f32 r1.x, r1.z, r1.x, (neg)c5.z
-mov.f32f32 r0.x, r0.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r4.x, (neg)c5.y
+mov.f32f32 r1.z, r4.y
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r4.x, (neg)c5.z
+mov.f32f32 r4.x, r0.z
+mad.f32 r2.x, r4.y, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r4.x, r1.x
mov.f32f32 r4.y, r1.w
-mul.f r1.z, r7.z, r7.x
-mov.f32f32 r1.y, r1.y
-mul.f r1.w, r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r4.z, c13.y, (neg)r1.z
-mad.f32 r4.w, r1.y, r1.y, r1.w
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
-mad.f32 r0.z, c10.y, r3.y, r2.y
-mov.f32f32 r0.w, r4.w
-mov.f32f32 r2.y, r4.z
-mad.f32 r0.w, r1.x, r1.x, r0.w
-add.f r0.z, r0.z, c11.y
-mad.f32 r2.z, c9.x, r3.x, r2.z
-mov.f32f32 r5.z, r4.x
-mad.f32 r2.z, c10.x, r3.y, r2.z
-mul.f r5.w, c0.w, r2.w
-mul.f r6.x, c0.z, r2.w
-rsq r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mad.f32 r4.x, r2.y, r2.y, r4.y
-mul.f r0.z, r0.z, c12.y
-add.f r2.z, r2.z, c11.x
-mul.f r1.x, r1.x, r0.w
-mul.f r4.y, r1.y, r0.w
-mul.f r0.x, r0.x, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.w, r4.y
-mov.f32f32 r0.x, r0.x
-rsq r1.x, r4.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-(rpt1)nop
-mov.f32f32 r5.y, r0.z
-mov.f32f32 r5.x, r0.w
-mov.f32f32 r4.w, r0.x
-mul.f r0.x, r0.y, r1.x
-mul.f r0.y, r2.y, r1.x
-mul.f r0.z, r2.x, r1.x
-mul.f r0.w, r2.z, c12.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r0.w
-mul.f r0.w, r7.y, r0.x
-mul.f r2.x, r7.x, r0.y
-mad.f32 r0.w, r7.x, r0.z, (neg)r0.w
-mad.f32 r2.x, r7.z, r0.x, (neg)r2.x
-mul.f r2.y, r7.z, r0.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, r7.y, r0.y, (neg)r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.z, r0.w
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r7.y, r2.z
+mad.f32 r0.z, r1.w, r4.y, r0.z
+mad.f32 r1.w, c10.w, r7.z, r2.z
+mul.f r2.z, c8.z, r7.x
+mul.f r4.z, c0.w, r7.x
+mul.f r5.z, c0.z, r7.x
+mul.f r5.w, c0.y, r7.x
+mul.f r6.x, c0.x, r7.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r5.x, r0.z
+mul.f r4.w, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r5.y, r4.y, r5.x
+mul.f r5.x, r4.x, r5.x
+(ss)mad.f32 r0.z, c9.z, r7.y, r2.z
+mad.f32 r2.y, c1.w, r7.y, r4.z
+mad.f32 r5.z, c1.z, r7.y, r5.z
+rsq r0.x, r0.x
+(ss)mov.f32f32 r4.x, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.z, r0.z
+mad.f32 r0.y, c2.w, r7.z, r2.y
+mul.f r2.z, r0.w, r4.x
+mul.f r2.y, r1.z, r4.x
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r4.z, r2.y
+mul.f r7.w, r6.z, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r6.y, r0.x
+mul.f r4.x, r6.w, r4.z
+mad.f32 r4.y, r6.w, r0.z, (neg)r0.y
+mad.f32 r4.x, r6.z, r0.x, (neg)r4.x
+mad.f32 r4.z, r6.y, r4.z, (neg)r7.w
+mad.f32 r0.x, c2.z, r7.z, r5.z
+mad.f32 r0.y, c1.y, r7.y, r5.w
+mad.f32 r5.z, c1.x, r7.y, r6.x
+mad.f32 r0.y, c2.y, r7.z, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.z, r5.z
nop
-(ss)mov.f32f32 r4.x, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.x, r5.z
-mad.f32 r0.y, c1.w, r3.x, r5.w
-mad.f32 r0.z, c1.z, r3.x, r6.x
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r6.x, r0.x
-mad.f32 r0.x, c2.w, r3.y, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c1.y, r3.x, r0.w
-mul.f r0.w, c0.x, r2.w
-add.f r0.x, r0.x, c3.w
-add.f r0.y, r0.y, c3.z
-mad.f32 r5.z, c2.y, r3.y, r0.z
-mad.f32 r3.x, c1.x, r3.x, r0.w
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-add.f r0.x, r5.z, c3.y
-mad.f32 r3.x, c2.x, r3.y, r3.x
-mul.f r5.z, r6.z, c6.y
-mul.f r5.w, r6.y, c6.x
-mov.f32f32 r0.y, r0.x
-add.f r0.x, r3.x, c3.x
-mov.f32f32 r3.x, r5.z
-mov.f32f32 r5.z, r5.w
-mov.f32f32 r6.y, r7.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.w, r3.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.w, r6.y
-mov.f32f32 r3.x, r7.y
-mov.f32f32 r6.y, r7.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r6.z, r3.x
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r3.x, c7.x, r3.y, c7.y
-mad.f32 r2.w, c7.x, r2.w, c7.y
-mov.f32f32 r7.x, c13.z
-nop
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r2.w, r7.x
+add.f r0.y, r0.y, c3.y
+mul.f r6.x, r3.y, c6.z
+add.f r0.x, r0.x, c3.x
+mul.f r5.w, r3.x, c6.y
+mul.f r5.z, r2.w, c6.x
+mad.f32 r3.y, c7.x, r7.z, c7.y
+mad.f32 r3.x, c7.x, r7.x, c7.y
+mov.f32f32 r2.w, c13.z
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14)
-; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r7.x (0:0,cm=7,il=12,b=0) r6.y (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0)
-; VERT: 150 instructions, 0 half, 8 full
+; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0)
+; VERT: 90 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-64.asm b/reference/0ad-alpine-valley/0ad-64.asm
index 9fd1082..78c7452 100644
--- a/reference/0ad-alpine-valley/0ad-64.asm
+++ b/reference/0ad-alpine-valley/0ad-64.asm
@@ -8,327 +8,223 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x40000000, 0xbf800000
+@const(c15.x) 0xba03126f, 0xbf000000, 0x3f800000, 0x3fb8aa65
+@const(c16.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 10, r1.x
-bary.f r0.y, 0, r1.x
+bary.f r1.z, 0, r1.x
add.f r0.w, r0.w, c14.y
-bary.f r1.z, 1, r1.x
-mov.f32f32 r1.w, r0.x
-add.f r2.x, r0.y, c15.y
-bary.f r2.y, 8, r1.x
+bary.f r1.w, 1, r1.x
+mov.f32f32 r2.x, r0.x
+bary.f r0.y, 11, r1.x
add.f r2.z, r1.z, c15.y
-mov.f32f32 r2.w, r1.w
-bary.f r1.w, 11, r1.x
-floor.f r3.x, r2.x
+add.f r2.w, r1.w, c15.y
+bary.f r3.x, 8, r1.x
+mov.f32f32 r2.y, r0.y
+floor.f r3.z, r2.z
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.y, r2.z
-mov.f32f32 r3.z, r1.w
-add.f r2.x, r2.x, (neg)r3.x
+floor.f r3.w, r2.w
+bary.f r3.y, 9, r1.x
+add.f r2.z, r2.z, (neg)r3.z
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.z, (neg)r3.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-absneg.f r2.z, (neg)c11.x
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, c14.x, r2.x
-add.f r3.z, c14.z, (neg)r2.x
-sam (f32)(xyz)r3.w, r2.w, s#2, t#2
-(sy)(ss)mad.f32 r2.w, c14.z, r3.w, c14.w
-mul.f r2.z, r2.z, c11.x
-mov.f32f32 r3.x, r3.y
-mul.f r3.y, c14.x, r0.w
-mov.f32f32 r2.w, r2.w
+sam (f32)(xyz)r4.x, r2.x, s#2, t#2
+(sy)(ss)mad.f32 r0.w, c14.z, r4.x, c14.w
+absneg.f r2.x, (neg)c11.x
+mov.f32f32 r2.y, r2.z
+add.f r2.w, r2.w, (neg)r3.w
+mov.f32f32 r3.z, r0.w
bary.f r3.w, 4, r1.x
-add.f r0.y, r0.y, (neg)r3.x
-mul.f r2.z, r2.z, r0.z
-mov.f32f32 r3.x, r3.y
-mul.f r3.y, r3.w, r2.w
-mad.f32 r3.w, c14.z, r4.x, c14.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-add.f r1.z, r1.z, (neg)r3.x
-mov.f32f32 r3.x, r3.w
-bary.f r3.w, 12, r1.x
-bary.f r4.x, 7, r1.x
-add.f r4.z, c15.y, r0.y
-mul.f r0.z, r2.z, r0.z
-add.f r0.y, c15.z, r0.y
-mul.f r2.z, r3.w, (neg)r4.x
-mov.f32f32 r3.w, r4.z
+mul.f r4.x, c14.x, r2.y
+mul.f r2.x, r2.x, c11.x
+mov.f32f32 r4.w, r2.w
+mul.f r3.w, r3.w, r3.z
+mad.f32 r4.y, c14.z, r4.y, c14.w
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r2.x, r2.x, r0.z
mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.z, r2.z, r3.x, r3.y
-mul.f r3.y, r3.w, c5.z
+mov.f32f32 r4.x, r4.y
+bary.f r5.x, 12, r1.x
+bary.f r5.y, 7, r1.x
+mov.f32f32 r5.z, r1.z
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, c14.x, r4.w
+mul.f r5.x, r5.x, (neg)r5.y
+add.f r5.z, c15.y, r5.z
mul.f r0.z, r0.z, c15.w
-mul.f r0.y, r0.y, c5.z
-mov.f32f32 r2.z, r2.z
-mad.f32 r3.w, c14.z, r4.y, c14.w
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.z, r0.y
-mov.f32f32 r3.w, r3.w
-bary.f r4.w, 21, r1.x
-mov.f32f32 r5.x, r4.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.y, r4.z
-mad.f32 r2.z, r4.w, r3.w, r2.z
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-add.f r4.z, c15.y, r1.z
-mov.f32f32 r5.w, r4.y
-mov.f32f32 r2.z, r2.z
-add.f r4.y, c17.y, (neg)r0.z
-mov.f32f32 r4.z, r4.z
+add.f r1.w, r1.w, (neg)r2.x
+mad.f32 r2.x, r5.x, r4.x, r3.w
+mad.f32 r3.w, c14.z, r4.z, c14.w
+mul.f r5.z, r5.z, c5.z
+mov.f32f32 r4.z, r1.w
add.f r1.z, c15.z, r1.z
-mul.f r4.w, r2.z, r2.z
-bary.f r5.y, 5, r1.x
-mul.f r4.z, r4.z, c5.w
-mul.f r4.y, r4.y, c11.y
-mul.f r0.z, r0.z, c15.z
-mul.f r5.z, r5.y, r2.w
-bary.f r5.y, 13, r1.x
-mov.f32f32 r6.x, r4.z
-add.f r0.z, r0.z, r4.y
-mov.f32f32 r1.z, r1.z
-mul.f r4.y, r5.y, (neg)r4.x
-mov.f32f32 r5.y, r6.x
-bary.f r6.x, 2, r1.x
-mov.f32f32 r0.z, r0.z
-mad.f32 r4.y, r4.y, r3.x, r5.z
-mul.f r1.z, r1.z, c5.w
-add.f r6.z, r6.x, c15.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.y, r4.y
-bary.f r5.z, 22, r1.x
-mov.f32f32 r6.x, r6.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r6.y, r1.z
-mad.f32 r4.y, r5.z, r3.w, r4.y
-mov.f32f32 r5.z, r6.x
+mov.f32f32 r5.x, r3.w
+bary.f r5.w, 21, r1.x
+mov.f32f32 r6.y, r5.z
+exp2 r0.z, r0.z
+(ss)mov.f32f32 r6.x, r0.z
+add.f r4.z, c15.y, r4.z
+mad.f32 r2.x, r5.w, r5.x, r2.x
+mul.f r7.x, r1.z, c5.z
+add.f r1.z, c15.z, r1.w
+add.f r1.w, c17.y, (neg)r6.x
+mov.f32f32 r7.w, r2.x
+mul.f r8.y, r4.z, c5.w
+mov.f32f32 r8.x, r7.x
+mul.f r7.y, r1.z, c5.w
+mul.f r1.z, r2.x, r7.w
+bary.f r2.x, 5, r1.x
+mov.f32f32 r6.z, r8.y
+mul.f r1.w, r1.w, c11.y
+(ss)mul.f r0.z, r0.z, c15.z
+mul.f r2.x, r2.x, r3.z
+bary.f r3.z, 13, r1.x
+bary.f r4.z, 2, r1.x
+add.f r0.z, r0.z, r1.w
+mov.f32f32 r5.w, r7.y
+mul.f r1.w, r3.z, (neg)r5.y
+add.f r7.z, r4.z, c15.x
max.f r0.z, r0.z, c14.y
-mov.f32f32 r6.x, r6.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r6.w, r0.y
-mov.f32f32 r0.y, r3.y
+add.f r2.y, c14.z, (neg)r2.y
+mad.f32 r1.w, r1.w, r4.x, r2.x
+bary.f r2.x, 22, r1.x
+mov.f32f32 r6.w, r7.z
min.f r0.z, r0.z, c15.z
-mad.f32 r3.y, r4.y, r4.y, r4.w
-sam.s (f32)(x)r4.w, r5.x, s#4, t#4
-(sy)mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.x, r6.x
-(ss)add.f r5.x, c17.y, (neg)r0.z
-mov.f32f32 r3.y, r3.y
-bary.f r5.y, 6, r1.x
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r3.z, r3.z
-add.f r5.z, c14.z, (neg)r0.w
-mul.f r2.w, r5.y, r2.w
-bary.f r5.y, 14, r1.x
-mul.f r5.x, r5.x, c10.z
-add.f r6.y, c17.y, (neg)r0.z
-add.f r7.x, c17.y, (neg)r0.z
-mul.f r4.x, r5.y, (neg)r4.x
-mov.f32f32 r5.y, r5.z
-mul.f r5.z, r6.y, c10.y
-mul.f r7.z, r7.x, c10.x
-mad.f32 r2.w, r4.x, r3.x, r2.w
-mul.f r3.x, r3.z, r5.y
-mov.f32f32 r4.x, r6.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r2.w
-bary.f r6.y, 23, r1.x
-mul.f r3.x, r3.x, r4.w
-mov.f32f32 r7.x, r4.z
-mov.f32f32 r4.z, r6.z
-mad.f32 r2.w, r6.y, r3.w, r2.w
-mov.f32f32 r6.y, r4.x
-mov.f32f32 r7.w, r0.y
-mov.f32f32 r7.y, r4.z
-mov.f32f32 r0.y, r2.w
-mov.f32f32 r1.z, r1.z
-add.f r2.x, r2.x, c15.z
-add.f r0.w, r0.w, c15.z
-mad.f32 r2.w, r0.y, r0.y, r3.y
-sam.s (f32)(x)r8.y, r5.w, s#4, t#4
-mov.f32f32 r8.x, r1.z
-sam.s (f32)(x)r8.z, r6.w, s#4, t#4
-mov.f32f32 r1.z, r6.z
-mul.f r3.y, r2.x, r5.y
-mul.f r3.z, r3.z, r0.w
-(sy)mov.f32f32 r3.w, r8.z
-mov.f32f32 r4.x, r8.y
-rsq r2.w, r2.w
-(ss)mov.f32f32 r2.w, r2.w
-mov.f32f32 r8.y, r1.z
-mov.f32f32 r1.z, r2.y
-mul.f r0.w, r2.x, r0.w
-mul.f r2.x, r2.z, r2.w
-mad.f32 r2.y, r3.y, r3.w, r3.x
-mul.f r2.z, r4.y, r2.w
-mul.f r0.y, r0.y, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-sam.s (f32)(x)r5.w, r7.w, s#4, t#4
-(sy)mov.f32f32 r2.w, r5.w
-mov.f32f32 r2.z, r2.z
-mul.f r3.x, r2.x, r2.x
-mul.f r3.y, (neg)c9.x, r2.x
-mad.f32 r2.y, r3.z, r2.w, r2.y
-mad.f32 r2.w, r2.z, r2.z, r3.x
-mad.f32 r3.x, (neg)c9.y, r2.z, r3.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r1.z
-mov.f32f32 r1.z, r2.w
-mov.f32f32 r2.w, r3.x
-mad.f32 r1.z, r0.y, r0.y, r1.z
-mad.f32 r2.w, (neg)c9.z, r0.y, r2.w
-mov.f32f32 r2.y, r2.y
-bary.f r3.x, 9, r1.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r1.w
+mov.f32f32 r8.z, r7.z
+mad.f32 r1.w, r2.x, r5.x, r1.w
+mov.f32f32 r6.x, r7.z
+add.f r2.x, c17.y, (neg)r0.z
+add.f r3.z, c17.y, (neg)r0.z
+mov.f32f32 r4.x, r1.w
+sam.s (f32)(x)r8.w, r6.y, s#4, t#4
+add.f r4.z, c17.y, (neg)r0.z
+mov.f32f32 r5.x, r2.y
+add.f r4.w, c14.z, (neg)r4.w
+mad.f32 r1.z, r1.w, r4.x, r1.z
+bary.f r1.w, 6, r1.x
+(ss)mul.f r6.y, r2.x, c10.z
+mul.f r3.z, r3.z, c10.y
+mul.f r4.z, r4.z, c10.x
+mul.f r0.w, r1.w, r0.w
+bary.f r1.w, 14, r1.x
+mov.f32f32 r2.x, r4.w
+sam.s (f32)(x)r9.x, r8.x, s#4, t#4
+sam.s (f32)(x)r9.y, r5.z, s#4, t#4
+sam.s (f32)(x)r6.z, r7.x, s#4, t#4
+add.f r2.z, r2.z, c15.z
+add.f r2.w, r2.w, c15.z
+mul.f r1.w, r1.w, (neg)r5.y
+mul.f r2.x, r5.x, r2.x
+mul.f r4.w, r2.z, r4.w
+mul.f r2.y, r2.y, r2.w
+mad.f32 r0.w, r1.w, r4.y, r0.w
+bary.f r1.w, 23, r1.x
+(sy)mul.f r2.x, r2.x, r8.w
+mul.f r2.z, r2.z, r2.w
+mad.f32 r2.x, r4.w, r9.x, r2.x
+mad.f32 r0.w, r1.w, r3.w, r0.w
+mad.f32 r1.w, r2.y, r9.y, r2.x
+(ss)nop
+sam (f32)(w)r4.w, r3.x, s#1, t#1
+(sy)cmps.f.lt r2.x, r5.z, c16.y
+mov.f32f32 r2.w, r0.x
+mov.f32f32 r2.y, r0.w
+mad.f32 r1.w, r2.z, r6.z, r1.w
+cov.u32f32 r2.x, r2.x
+(ss)mov.f32f32 r3.x, r0.y
+mad.f32 r1.z, r2.y, r2.y, r1.z
+mul.f r1.w, c16.x, r1.w
+cmps.f.ne r2.x, r2.x, c14.y
+(rpt3)nop
rsq r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.w, r0.w, r4.x, r2.y
-mov.f32f32 r2.y, r3.x
-mul.f r2.x, r2.x, r1.z
-max.f r2.w, r2.w, c14.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.z, r2.z, r1.z
-mov.f32f32 r2.x, r2.x
-bary.f r3.x, 15, r1.x
-mov.f32f32 r2.w, r2.w
-bary.f r3.z, 19, r1.x
-bary.f r4.x, 18, r1.x
-mov.f32f32 r3.x, r3.x
-bary.f r4.z, 20, r1.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r4.x, r4.x
-mul.f r2.x, r2.x, r3.x
-mov.f32f32 r2.z, r2.z
-bary.f r3.x, 16, r1.x
-mov.f32f32 r4.z, r4.z
-mad.f32 r4.w, c8.y, r2.w, (neg)r3.z
-mad.f32 r5.y, c8.x, r2.w, (neg)r4.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r2.w, c8.z, r2.w, (neg)r4.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.y, r5.y
-mad.f32 r2.x, r2.z, r3.x, r2.x
-mov.f32f32 r2.z, r2.w
-mad.f32 r2.w, c12.x, r4.w, r3.z
-mad.f32 r3.x, c12.x, r5.y, r4.x
-mov.f32f32 r2.x, r2.x
-mul.f r0.y, r0.y, r1.z
-mad.f32 r1.z, c12.x, r2.z, r4.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
+(ss)mov.f32f32 r2.y, r1.z
+mov.f32f32 r3.y, r1.w
+mul.f r0.w, r0.w, r1.z
+(ss)mov.f32f32 r1.z, c14.y
+mul.f r2.z, r7.w, r2.y
+mul.f r2.y, r4.x, r2.y
+mov.f32f32 r3.w, r0.w
+sel.b32 r1.z, r1.z, r2.x, r5.z
+mov.f32f32 r2.x, r2.z
+mul.f r2.z, (neg)c9.x, r2.z
+mov.f32f32 r4.x, r2.y
+mad.f32 r2.y, (neg)c9.y, r2.y, r2.z
+mul.f r2.z, r2.x, r2.x
+mad.f32 r0.w, (neg)c9.z, r0.w, r2.y
+mad.f32 r2.y, r4.x, r4.x, r2.z
+sam (f32)(xyz)r4.w, r2.w, s#3, t#3
+(sy)(ss)mul.f r3.x, c8.z, r5.y
+mad.f32 r2.y, r3.w, r3.w, r2.y
+max.f r0.w, r0.w, c14.y
+bary.f r2.z, 18, r1.x
+(rpt1)nop
+mov.f32f32 r2.w, r0.w
+bary.f r4.y, 19, r1.x
+rsq r2.y, r2.y
+(ss)mov.f32f32 r5.y, r2.y
+bary.f r5.z, 20, r1.x
+mad.f32 r5.w, c8.y, r2.w, (neg)r4.y
+mad.f32 r2.w, c8.z, r2.w, (neg)r5.z
+mul.f r2.x, r2.x, r5.y
+bary.f r6.x, 15, r1.x
+mov.f32f32 r5.z, r5.z
+mov.f32f32 r4.y, r4.y
+mul.f r4.x, r4.x, r5.y
+mul.f r2.x, r2.x, r6.x
+bary.f r5.y, 16, r1.x
+mad.f32 r5.z, c12.x, r2.w, r5.z
+mad.f32 r4.y, c12.x, r5.w, r4.y
+(ss)mul.f r2.y, r3.w, r2.y
+mad.f32 r2.x, r4.x, r5.y, r2.x
bary.f (ei)r1.x, 17, r1.x
-mov.f32f32 r1.y, r1.z
-mul.f r0.w, c16.x, r0.w
-nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.z, r2.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.z, r0.x
-mad.f32 r0.x, r0.y, r1.x, r2.x
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r4.w, r4.y
-mov.f32f32 r0.x, r0.x
-sam (f32)(w)r1.z, r3.y, s#1, t#1
-(sy)mov.f32f32 r1.x, r2.y
-cmps.f.lt r1.z, r2.y, c16.y
-mov.f32f32 r4.x, r0.w
-max.f r0.x, c14.y, r0.x
-mov.f32f32 r0.w, r1.x
-cov.u32f32 r1.x, r1.z
-sam (f32)(xyz)r1.z, r4.z, s#3, t#3
-(sy)mul.f r2.x, c8.z, r2.x
-mov.f32f32 r0.x, r0.x
-mul.f r1.w, c8.y, r1.w
-mul.f r1.z, c8.x, r1.z
-(ss)nop
-sam (f32)(xyzw)r3.y, r3.w, s#0, t#0
-cmps.f.ne r1.x, r1.x, c14.y
-(rpt2)nop
-log2 r0.x, r0.x
+mad.f32 r0.w, c8.x, r0.w, (neg)r2.z
+(rpt1)nop
+mad.f32 r1.x, r2.y, r1.x, r2.x
+mov.f32f32 r1.y, r2.z
+mul.f r3.w, c8.y, r5.x
+mul.f r4.x, c8.x, r4.w
+max.f r1.x, c14.y, r1.x
+mad.f32 r0.w, c12.x, r0.w, r1.y
+sam (f32)(xyzw)r2.x, r0.x, s#0, t#0
+(rpt4)nop
+(ss)log2 r0.x, r1.x
(ss)mul.f r0.x, c12.y, r0.x
-mov.f32f32 r2.y, c14.y
-mov.f32f32 r2.x, r2.x
-(sy)mov.f32f32 r2.w, r4.x
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.w, r2.y, r1.x, r0.w
-mov.f32f32 r1.x, r1.w
-mov.f32f32 r1.z, r1.z
-nop
-mov.f32f32 r2.w, r2.w
-nop
+(rpt5)nop
exp2 r0.x, r0.x
-(ss)mul.f r1.w, r2.x, r0.x
-mul.f r1.x, r1.x, r0.x
-mad.f32 r1.y, r3.w, r1.y, r1.w
-mad.f32 r1.x, r3.z, r2.z, r1.x
-(ss)mul.f r0.x, r1.z, r0.x
-nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.x, r3.y, r3.x, r0.x
-nop
-mul.f r1.y, r1.y, r0.y
-mul.f r1.x, r1.x, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r1.y, c7.z, r3.w, r1.y
-mad.f32 r1.x, c7.y, r3.z, r1.x
-mul.f r0.x, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.y, r3.x, r0.x
+(ss)mul.f r1.x, r3.w, r0.x
+(sy)mad.f32 r0.y, r2.z, r5.z, r0.y
+mad.f32 r1.x, r2.y, r4.y, r1.x
+mul.f r0.x, r4.x, r0.x
+nop
+mul.f r0.y, r0.y, r3.y
+mul.f r1.x, r1.x, r3.y
+mad.f32 r0.y, c7.z, r2.z, r0.y
+mad.f32 r1.x, c7.y, r2.y, r1.x
+mad.f32 r0.x, r2.x, r0.w, r0.x
nop
mul.f r0.y, r0.z, r0.y
-mul.f r1.x, r0.z, r1.x
-mad.f32 r0.x, c7.x, r3.y, r0.x
+mul.f r0.w, r0.z, r1.x
+mul.f r0.x, r0.x, r1.w
nop
-add.f r0.y, r0.y, r5.x
-add.f r1.x, r1.x, r5.z
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, r6.y
+add.f r0.w, r0.w, r3.z
+mad.f32 r0.x, c7.x, r2.x, r0.x
nop
-mul.f r0.y, r0.y, r0.w
-mul.f r1.x, r1.x, r0.w
+mul.f r0.y, r0.y, r1.z
+mul.f r0.w, r0.w, r1.z
mul.f r0.x, r0.z, r0.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r1.x
-add.f r0.x, r0.x, r7.z
-nop
-mul.f r0.y, r0.y, c6.z
-mul.f r0.z, r0.z, c6.y
-mul.f r0.x, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c6.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r2.z, r0.y, c6.z
+mul.f r2.y, r0.w, c6.y
+add.f r0.x, r0.x, r4.z
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r1.z
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, c6.x
end
-nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r3.y (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r2.w (5:13,cm=f,il=24,b=1) r5.z (5:14,cm=f,il=28,b=1)
-; FRAG: 325 instructions, 0 half, 9 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r63.z (5:11,cm=f,il=16,b=1) r5.y (5:12,cm=f,il=20,b=1) r3.z (5:13,cm=f,il=24,b=1) r1.x (5:14,cm=f,il=28,b=1)
+; FRAG: 230 instructions, 0 half, 10 full
diff --git a/reference/0ad-alpine-valley/0ad-65.asm b/reference/0ad-alpine-valley/0ad-65.asm
index 3ac80f9..b4c4f88 100644
--- a/reference/0ad-alpine-valley/0ad-65.asm
+++ b/reference/0ad-alpine-valley/0ad-65.asm
@@ -1,14 +1,14 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r7.x) in4
-@in(r7.y) in5
-@in(r7.z) in6
-@in(r6.y) in8
-@in(r6.z) in9
-@in(r6.w) in10
+@in(r7.x) in0
+@in(r7.y) in1
+@in(r7.z) in2
+@in(r6.y) in4
+@in(r6.z) in5
+@in(r6.w) in6
+@in(r2.w) in8
+@in(r3.x) in9
+@in(r3.y) in10
@in(r3.z) in12
@in(r3.w) in13
@out(r0.x) out0
@@ -39,159 +39,101 @@
@out(r6.y) out25
@out(r6.z) out26
@out(r6.w) out27
-(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
-mul.f r0.y, r7.x, r7.x
-mul.f r0.z, c8.w, r2.w
-mul.f r0.w, c8.z, r2.w
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r7.x
+mul.f r0.y, r6.y, r6.y
+mul.f r0.z, c8.y, r7.x
+mul.f r0.w, c8.x, r7.x
mul.f r1.x, r0.x, r0.x
-add.f r1.y, c4.y, (neg)r3.x
+add.f r1.z, c4.y, (neg)r7.y
add.f r0.y, c13.x, (neg)r0.y
-mad.f32 r0.z, c9.w, r3.x, r0.z
-mad.f32 r0.w, c9.z, r3.x, r0.w
-mad.f32 r1.x, r1.y, r1.y, r1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.w, r3.y, r0.z
-mad.f32 r0.w, c10.z, r3.y, r0.w
-mov.f32f32 r1.x, r1.x
-add.f r1.z, c4.z, (neg)r3.y
-mul.f r1.w, r0.y, r0.y
-mul.f r2.x, r7.y, r7.x
-add.f r0.z, r0.z, c11.w
+mad.f32 r0.z, c9.y, r7.y, r0.z
+mad.f32 r0.w, c9.x, r7.y, r0.w
mad.f32 r1.x, r1.z, r1.z, r1.x
-add.f r0.w, r0.w, c11.z
-mul.f r2.y, c8.y, r2.w
-mul.f r2.z, c8.x, r2.w
-add.f r2.x, c13.y, (neg)r2.x
-mul.f r4.x, r6.w, c6.z
-mov.f32f32 r0.z, r0.z
+add.f r1.w, c4.z, (neg)r7.z
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.z, r0.z
+mad.f32 r0.w, c10.x, r7.z, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.z, r6.y
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.w, r6.y
+mul.f r2.z, c8.w, r7.x
rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.y, c9.y, r3.x, r2.y
+(ss)mov.f32f32 r4.x, r1.x
+add.f r4.y, c13.y, (neg)r1.y
mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
-mad.f32 r1.w, r2.x, r2.x, r1.w
-mad.f32 r1.y, r1.y, r1.x, (neg)c5.y
-mad.f32 r1.x, r1.z, r1.x, (neg)c5.z
-mov.f32f32 r0.x, r0.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r4.x, (neg)c5.y
+mov.f32f32 r1.z, r4.y
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r4.x, (neg)c5.z
+mov.f32f32 r4.x, r0.z
+mad.f32 r2.x, r4.y, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r4.x, r1.x
mov.f32f32 r4.y, r1.w
-mul.f r1.z, r7.z, r7.x
-mov.f32f32 r1.y, r1.y
-mul.f r1.w, r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r4.z, c13.y, (neg)r1.z
-mad.f32 r4.w, r1.y, r1.y, r1.w
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
-mad.f32 r0.z, c10.y, r3.y, r2.y
-mov.f32f32 r0.w, r4.w
-mov.f32f32 r2.y, r4.z
-mad.f32 r0.w, r1.x, r1.x, r0.w
-add.f r0.z, r0.z, c11.y
-mad.f32 r2.z, c9.x, r3.x, r2.z
-mov.f32f32 r5.z, r4.x
-mad.f32 r2.z, c10.x, r3.y, r2.z
-mul.f r5.w, c0.w, r2.w
-mul.f r6.x, c0.z, r2.w
-rsq r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mad.f32 r4.x, r2.y, r2.y, r4.y
-mul.f r0.z, r0.z, c12.y
-add.f r2.z, r2.z, c11.x
-mul.f r1.x, r1.x, r0.w
-mul.f r4.y, r1.y, r0.w
-mul.f r0.x, r0.x, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.w, r4.y
-mov.f32f32 r0.x, r0.x
-rsq r1.x, r4.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-(rpt1)nop
-mov.f32f32 r5.y, r0.z
-mov.f32f32 r5.x, r0.w
-mov.f32f32 r4.w, r0.x
-mul.f r0.x, r0.y, r1.x
-mul.f r0.y, r2.y, r1.x
-mul.f r0.z, r2.x, r1.x
-mul.f r0.w, r2.z, c12.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r0.w
-mul.f r0.w, r7.y, r0.x
-mul.f r2.x, r7.x, r0.y
-mad.f32 r0.w, r7.x, r0.z, (neg)r0.w
-mad.f32 r2.x, r7.z, r0.x, (neg)r2.x
-mul.f r2.y, r7.z, r0.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, r7.y, r0.y, (neg)r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.z, r0.w
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r7.y, r2.z
+mad.f32 r0.z, r1.w, r4.y, r0.z
+mad.f32 r1.w, c10.w, r7.z, r2.z
+mul.f r2.z, c8.z, r7.x
+mul.f r4.z, c0.w, r7.x
+mul.f r5.z, c0.z, r7.x
+mul.f r5.w, c0.y, r7.x
+mul.f r6.x, c0.x, r7.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r5.x, r0.z
+mul.f r4.w, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r5.y, r4.y, r5.x
+mul.f r5.x, r4.x, r5.x
+(ss)mad.f32 r0.z, c9.z, r7.y, r2.z
+mad.f32 r2.y, c1.w, r7.y, r4.z
+mad.f32 r5.z, c1.z, r7.y, r5.z
+rsq r0.x, r0.x
+(ss)mov.f32f32 r4.x, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.z, r0.z
+mad.f32 r0.y, c2.w, r7.z, r2.y
+mul.f r2.z, r0.w, r4.x
+mul.f r2.y, r1.z, r4.x
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r4.z, r2.y
+mul.f r7.w, r6.z, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r6.y, r0.x
+mul.f r4.x, r6.w, r4.z
+mad.f32 r4.y, r6.w, r0.z, (neg)r0.y
+mad.f32 r4.x, r6.z, r0.x, (neg)r4.x
+mad.f32 r4.z, r6.y, r4.z, (neg)r7.w
+mad.f32 r0.x, c2.z, r7.z, r5.z
+mad.f32 r0.y, c1.y, r7.y, r5.w
+mad.f32 r5.z, c1.x, r7.y, r6.x
+mad.f32 r0.y, c2.y, r7.z, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.z, r5.z
nop
-(ss)mov.f32f32 r4.x, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.x, r5.z
-mad.f32 r0.y, c1.w, r3.x, r5.w
-mad.f32 r0.z, c1.z, r3.x, r6.x
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r6.x, r0.x
-mad.f32 r0.x, c2.w, r3.y, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c1.y, r3.x, r0.w
-mul.f r0.w, c0.x, r2.w
-add.f r0.x, r0.x, c3.w
-add.f r0.y, r0.y, c3.z
-mad.f32 r5.z, c2.y, r3.y, r0.z
-mad.f32 r3.x, c1.x, r3.x, r0.w
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-add.f r0.x, r5.z, c3.y
-mad.f32 r3.x, c2.x, r3.y, r3.x
-mul.f r5.z, r6.z, c6.y
-mul.f r5.w, r6.y, c6.x
-mov.f32f32 r0.y, r0.x
-add.f r0.x, r3.x, c3.x
-mov.f32f32 r3.x, r5.z
-mov.f32f32 r5.z, r5.w
-mov.f32f32 r6.y, r7.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.w, r3.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.w, r6.y
-mov.f32f32 r3.x, r7.y
-mov.f32f32 r6.y, r7.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r6.z, r3.x
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r3.x, c7.x, r3.y, c7.y
-mad.f32 r2.w, c7.x, r2.w, c7.y
-mov.f32f32 r7.x, c13.z
-nop
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r2.w, r7.x
+add.f r0.y, r0.y, c3.y
+mul.f r6.x, r3.y, c6.z
+add.f r0.x, r0.x, c3.x
+mul.f r5.w, r3.x, c6.y
+mul.f r5.z, r2.w, c6.x
+mad.f32 r3.y, c7.x, r7.z, c7.y
+mad.f32 r3.x, c7.x, r7.x, c7.y
+mov.f32f32 r2.w, c13.z
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14)
-; VERT: inputs: r2.w (0:0,cm=7,il=8,b=0) r7.x (0:0,cm=7,il=12,b=0) r6.y (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0)
-; VERT: 150 instructions, 0 half, 8 full
+; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0)
+; VERT: 90 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-66.asm b/reference/0ad-alpine-valley/0ad-66.asm
index 035c379..5b14079 100644
--- a/reference/0ad-alpine-valley/0ad-66.asm
+++ b/reference/0ad-alpine-valley/0ad-66.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,79 +24,57 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x
-mul.f r0.w, c7.w, r3.x
+mul.f r0.w, c7.y, r4.x
mad.f32 r0.x, (neg)c4.y, r0.y, r0.x
-mad.f32 r0.y, c8.w, r3.y, r0.w
-mul.f r0.w, c7.z, r3.x
-mul.f r1.x, c7.y, r3.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c9.w, r3.z, r0.y
+mad.f32 r0.y, c8.y, r4.y, r0.w
mad.f32 r0.x, (neg)c4.z, r0.z, r0.x
-mad.f32 r0.z, c8.z, r3.y, r0.w
-mad.f32 r0.w, c8.y, r3.y, r1.x
-mul.f r1.w, c7.x, r3.x
+mad.f32 r0.y, c9.y, r4.z, r0.y
+mul.f r0.z, c7.x, r4.x
+mul.f r0.w, c7.w, r4.x
max.f r0.x, c12.x, r0.x
-add.f r0.y, r0.y, c10.w
-mad.f32 r0.z, c9.z, r3.z, r0.z
-mad.f32 r0.w, c9.y, r3.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.z, c10.z
-add.f r0.w, r0.w, c10.y
-mul.f r1.x, r0.x, c5.z
-mul.f r1.y, r0.x, c5.y
-mul.f r0.x, r0.x, c5.x
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r0.x, r0.z
-mul.f r0.y, r0.w, c11.y
-mad.f32 r0.z, c8.x, r3.y, r1.w
-mul.f r0.w, c0.w, r3.x
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r0.x, c9.x, r3.z, r0.z
-mad.f32 r0.y, c1.w, r3.y, r0.w
-mul.f r0.z, c0.z, r3.x
-mul.f r0.w, c0.y, r3.x
-add.f r0.x, r0.x, c10.x
-mad.f32 r0.y, c2.w, r3.z, r0.y
-mad.f32 r0.z, c1.z, r3.y, r0.z
-mad.f32 r0.w, c1.y, r3.y, r0.w
-mul.f r0.x, r0.x, c11.x
-add.f r0.y, r0.y, c3.w
-mad.f32 r0.z, c2.z, r3.z, r0.z
-mad.f32 r1.w, c2.y, r3.z, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.w, r0.y
-add.f r0.x, r0.z, c3.z
-add.f r0.y, r1.w, c3.y
-mul.f r1.w, c0.x, r3.x
-mov.f32f32 r3.w, r4.y
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c1.x, r3.y, r1.w
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.x, c2.x, r3.z, r0.x
-mov.f32f32 r1.w, r4.x
-mad.f32 r3.y, c6.x, r3.z, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-add.f r0.x, r0.x, c3.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, c10.y
+mad.f32 r0.z, c8.x, r4.y, r0.z
+mad.f32 r0.w, c8.w, r4.y, r0.w
+mov.f32f32 r1.y, r0.x
+mul.f r2.y, r0.y, c11.y
+mad.f32 r0.y, c9.x, r4.z, r0.z
+mul.f r1.x, r0.x, c5.x
+mul.f r1.z, r1.y, c5.z
+mul.f r1.y, r1.y, c5.y
+add.f r0.x, r0.y, c10.x
+mad.f32 r0.y, c9.w, r4.z, r0.w
+mul.f r0.z, c7.z, r4.x
+mul.f r0.w, c0.w, r4.x
+mul.f r2.x, r0.x, c11.x
+add.f r2.w, r0.y, c10.w
+mad.f32 r0.x, c8.z, r4.y, r0.z
+mad.f32 r0.y, c1.w, r4.y, r0.w
+mad.f32 r0.x, c9.z, r4.z, r0.x
+mad.f32 r0.y, c2.w, r4.z, r0.y
+mul.f r0.z, c0.z, r4.x
+mul.f r1.w, c0.y, r4.x
+add.f r2.z, r0.x, c10.z
+add.f r0.w, r0.y, c3.w
+mad.f32 r0.x, c1.z, r4.y, r0.z
+mad.f32 r0.y, c1.y, r4.y, r1.w
+mad.f32 r0.x, c2.z, r4.z, r0.x
+mad.f32 r0.y, c2.y, r4.z, r0.y
+mul.f r1.w, c0.x, r4.x
+mad.f32 r3.y, c6.x, r4.z, c6.y
+add.f r0.z, r0.x, c3.z
+add.f r0.y, r0.y, c3.y
+mad.f32 r0.x, c1.x, r4.y, r1.w
+mad.f32 r3.x, c6.x, r4.x, c6.y
+mad.f32 r0.x, c2.x, r4.z, r0.x
mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
+(rpt1)nop
+add.f r0.x, r0.x, c3.x
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 74 instructions, 0 half, 5 full
+; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 48 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-67.asm b/reference/0ad-alpine-valley/0ad-67.asm
index 1d73264..f1c05da 100644
--- a/reference/0ad-alpine-valley/0ad-67.asm
+++ b/reference/0ad-alpine-valley/0ad-67.asm
@@ -8,215 +8,155 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c10.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c11.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c11.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c11.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 8, r1.x
+floor.f r3.x, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c10.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.y, r2.y
+bary.f r2.w, 9, r1.x
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c8.x
+add.f r2.y, r2.y, (neg)r3.y
+mov.f32f32 r3.x, r2.x
+sam (f32)(xyzw)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, c13.y, (neg)r4.x
+mul.f r0.z, r0.z, c8.x
+mov.f32f32 r1.w, r2.y
+mul.f r4.y, c10.x, r3.x
+add.f r3.x, c11.y, (neg)r3.x
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c10.x, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, r2.y, c8.x
-add.f r2.w, c11.y, (neg)r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c10.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r1.w
-mul.f r0.y, r2.y, r0.y
-sam (f32)(xyzw)r2.w, r3.x, s#0, t#0
-(sy)add.f r1.w, c13.y, (neg)r3.z
-add.f r2.y, c11.x, r0.x
+add.f r0.x, r0.x, (neg)r4.y
+mul.f r4.y, c10.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r4.y
add.f r0.x, c11.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c12.x
-add.f r3.w, c11.z, r0.w
-mul.f r2.y, r2.y, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.x
-add.f r0.w, c11.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c13.y, (neg)r0.y
-mov.f32f32 r6.x, r4.x
-mul.f r3.w, r3.w, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c8.y
-mul.f r0.y, r0.y, c10.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.w, r2.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r4.w, r4.y
-bary.f r3.w, 6, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r5.z, r0.x
-add.f r0.x, r3.w, c10.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r5.x, r0.w
-max.f r0.y, r0.y, c10.y
-mov.f32f32 r6.z, r2.y
-mov.f32f32 r5.w, r3.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r1.w, c4.z
-add.f r1.w, c13.y, (neg)r3.z
-sam.s (f32)(x)r3.w, r4.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r3.w
-min.f r0.y, r0.y, c10.z
-sam.s (f32)(x)r3.w, r6.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-add.f r4.y, c11.y, (neg)r0.z
-(ss)add.f r4.z, c13.y, (neg)r0.y
-add.f r4.w, c13.y, (neg)r0.y
-add.f r5.x, c13.y, (neg)r0.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c7.z
-mul.f r4.w, r4.w, c7.y
-mul.f r5.x, r5.x, c7.x
-mul.f r5.y, r2.z, r4.y
-mov.f32f32 r7.y, r0.x
-mul.f r0.x, r3.z, c10.z
-mul.f r1.w, r1.w, c4.y
-mul.f r2.y, r5.y, r2.y
-add.f r1.z, r1.z, c10.z
-add.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.z, c10.z
-sam.s (f32)(x)r5.y, r6.w, s#2, t#2
-(sy)mov.f32f32 r5.y, r5.y
-mul.f r4.y, r1.z, r4.y
-add.f r5.z, c13.y, (neg)r3.z
-mul.f r0.x, r3.y, r0.x
-add.f r0.w, r0.w, r1.w
-mad.f32 r1.w, r4.y, r4.x, r2.y
-mul.f r2.y, r5.z, c4.x
-mov.f32f32 r0.x, r0.x
-bary.f r3.y, 2, r1.x
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r0.z, c10.z
-mul.f r0.w, r3.x, r0.w
-mul.f r3.x, r3.z, c10.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.z, r2.z, r0.z
-mul.f r3.y, r0.x, r3.y
-mov.f32f32 r0.w, r0.w
-add.f r2.y, r3.x, r2.y
-mad.f32 r1.w, r2.z, r5.y, r1.w
-bary.f r2.z, 1, r1.x
-mov.f32f32 r4.x, r2.x
-mul.f r2.x, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.z, r1.z, r0.z
-mul.f r1.z, r0.w, r2.z
-mov.f32f32 r2.x, r2.x
-nop
-mad.f32 r0.z, r0.z, r3.w, r1.w
-bary.f r1.w, 0, r1.x
-bary.f (ei)r1.x, 9, r1.x
-mov.f32f32 r1.y, c10.z
-mov.f32f32 r0.z, r0.z
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.w, r1.y
-mul.f r0.z, c11.w, r0.z
-mov.f32f32 r1.y, c10.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r1.x
+add.f r0.z, c11.x, r0.z
+mov.f32f32 r4.y, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c11.z, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c11.x, r4.y
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c13.y, (neg)r0.y
+add.f r5.y, r0.z, c10.w
+add.f r0.z, c11.y, (neg)r1.w
+mul.f r0.x, r0.x, c10.z
+add.f r0.w, c13.y, (neg)r4.x
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c8.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-mul.f r1.x, r3.y, r0.z
-mul.f r1.z, r1.z, r0.z
-mul.f r0.z, r1.w, r0.z
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.x, c6.z, r0.x, r1.x
-mad.f32 r0.w, c6.y, r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-sam (f32)(w)r3.x, r4.x, s#1, t#1
-(sy)cmps.f.lt r1.x, r3.w, c12.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c6.x, r2.x, r0.z
-cov.u32f32 r1.x, r1.x
-mul.f r0.x, r0.y, r0.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r1.x, r1.x, c10.y
-add.f r0.x, r0.x, r4.z
-mov.f32f32 r1.z, r3.w
-add.f r0.w, r0.w, r4.w
-mul.f r0.y, r0.y, r0.z
-nop
-mov.f32f32 r0.z, r1.z
-(rpt2)nop
-sel.b32 r0.z, r1.y, r1.x, r0.z
-add.f r0.y, r0.y, r5.x
-(rpt1)nop
-mul.f r0.x, r0.x, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
+mul.f r0.w, r0.w, c4.z
+add.f r1.w, c13.y, (neg)r4.x
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c10.y
+sam.s (f32)(x)r4.y, r5.z, s#2, t#2
+mul.f r4.z, r4.x, c10.z
+mul.f r1.w, r1.w, c4.y
+(sy)mul.f r0.y, r0.y, r7.x
+add.f r2.x, r2.x, c10.z
+min.f r0.x, r0.x, c10.z
+add.f r0.w, r4.z, r0.w
+mul.f r4.z, r4.x, c10.z
+mul.f r0.z, r2.x, r0.z
+(ss)add.f r4.w, c13.y, (neg)r0.x
+add.f r5.x, c13.y, (neg)r0.x
+add.f r5.y, c13.y, (neg)r0.x
+mad.f32 r0.y, r0.z, r7.y, r0.y
+add.f r0.z, r2.y, c10.z
+mul.f r2.y, r4.w, c7.z
+mul.f r4.w, r5.x, c7.y
+mul.f r5.x, r5.y, c7.x
+mul.f r3.x, r3.x, r0.z
+add.f r1.w, r4.z, r1.w
+mul.f r1.z, r1.z, c4.x
+mul.f r0.w, r3.w, r0.w
+mad.f32 r0.y, r3.x, r4.y, r0.y
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, r4.x, c10.z
+mov.f32f32 r3.x, r0.w
+bary.f r3.w, 2, r1.x
+mad.f32 r0.y, r0.z, r7.z, r0.y
+mul.f r0.z, r3.z, r1.w
+add.f r1.z, r2.x, r1.z
+mul.f r1.w, r3.x, r3.w
+mul.f r0.y, c11.w, r0.y
+mov.f32f32 r2.x, r0.z
+mul.f r1.z, r3.y, r1.z
+bary.f r3.x, 1, r1.x
+mov.f32f32 r3.y, r0.y
+sam (f32)(w)r3.z, r2.z, s#1, t#1
+(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y
+mov.f32f32 r3.z, r1.z
+mul.f r2.x, r2.x, r3.x
+mul.f r1.w, r1.w, r3.y
+bary.f (ei)r1.x, 0, r1.x
+mad.f32 r0.w, c6.z, r0.w, r1.w
+mul.f r1.y, r2.x, r3.y
+cov.u32f32 r1.w, r2.z
+mov.f32f32 r2.w, c10.z
+mul.f r0.w, r0.x, r0.w
+mad.f32 r0.z, c6.y, r0.z, r1.y
+mul.f r1.x, r3.z, r1.x
+cmps.f.ne r1.y, r1.w, c10.y
+add.f r0.w, r0.w, r2.y
+mov.f32f32 r1.w, c10.y
+mul.f r0.z, r0.x, r0.z
+mul.f r0.y, r1.x, r0.y
nop
-mul.f r0.x, r0.x, c5.z
-mul.f r0.z, r0.w, c5.y
-mul.f r0.y, r0.y, c5.x
+sel.b32 r1.x, r1.w, r1.y, r4.y
+add.f r0.z, r0.z, r4.w
+mad.f32 r0.y, c6.x, r1.z, r0.y
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+mul.f r0.w, r0.w, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.w, c5.z
+mul.f r2.y, r0.z, c5.y
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+add.f r0.x, r0.x, r5.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.y
-end
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 212 instructions, 0 half, 8 full
+; FRAG: 149 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-68.asm b/reference/0ad-alpine-valley/0ad-68.asm
index 035c379..5b14079 100644
--- a/reference/0ad-alpine-valley/0ad-68.asm
+++ b/reference/0ad-alpine-valley/0ad-68.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,79 +24,57 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x
-mul.f r0.w, c7.w, r3.x
+mul.f r0.w, c7.y, r4.x
mad.f32 r0.x, (neg)c4.y, r0.y, r0.x
-mad.f32 r0.y, c8.w, r3.y, r0.w
-mul.f r0.w, c7.z, r3.x
-mul.f r1.x, c7.y, r3.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c9.w, r3.z, r0.y
+mad.f32 r0.y, c8.y, r4.y, r0.w
mad.f32 r0.x, (neg)c4.z, r0.z, r0.x
-mad.f32 r0.z, c8.z, r3.y, r0.w
-mad.f32 r0.w, c8.y, r3.y, r1.x
-mul.f r1.w, c7.x, r3.x
+mad.f32 r0.y, c9.y, r4.z, r0.y
+mul.f r0.z, c7.x, r4.x
+mul.f r0.w, c7.w, r4.x
max.f r0.x, c12.x, r0.x
-add.f r0.y, r0.y, c10.w
-mad.f32 r0.z, c9.z, r3.z, r0.z
-mad.f32 r0.w, c9.y, r3.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.z, c10.z
-add.f r0.w, r0.w, c10.y
-mul.f r1.x, r0.x, c5.z
-mul.f r1.y, r0.x, c5.y
-mul.f r0.x, r0.x, c5.x
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r0.x, r0.z
-mul.f r0.y, r0.w, c11.y
-mad.f32 r0.z, c8.x, r3.y, r1.w
-mul.f r0.w, c0.w, r3.x
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r0.x, c9.x, r3.z, r0.z
-mad.f32 r0.y, c1.w, r3.y, r0.w
-mul.f r0.z, c0.z, r3.x
-mul.f r0.w, c0.y, r3.x
-add.f r0.x, r0.x, c10.x
-mad.f32 r0.y, c2.w, r3.z, r0.y
-mad.f32 r0.z, c1.z, r3.y, r0.z
-mad.f32 r0.w, c1.y, r3.y, r0.w
-mul.f r0.x, r0.x, c11.x
-add.f r0.y, r0.y, c3.w
-mad.f32 r0.z, c2.z, r3.z, r0.z
-mad.f32 r1.w, c2.y, r3.z, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.w, r0.y
-add.f r0.x, r0.z, c3.z
-add.f r0.y, r1.w, c3.y
-mul.f r1.w, c0.x, r3.x
-mov.f32f32 r3.w, r4.y
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c1.x, r3.y, r1.w
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.x, c2.x, r3.z, r0.x
-mov.f32f32 r1.w, r4.x
-mad.f32 r3.y, c6.x, r3.z, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-add.f r0.x, r0.x, c3.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, c10.y
+mad.f32 r0.z, c8.x, r4.y, r0.z
+mad.f32 r0.w, c8.w, r4.y, r0.w
+mov.f32f32 r1.y, r0.x
+mul.f r2.y, r0.y, c11.y
+mad.f32 r0.y, c9.x, r4.z, r0.z
+mul.f r1.x, r0.x, c5.x
+mul.f r1.z, r1.y, c5.z
+mul.f r1.y, r1.y, c5.y
+add.f r0.x, r0.y, c10.x
+mad.f32 r0.y, c9.w, r4.z, r0.w
+mul.f r0.z, c7.z, r4.x
+mul.f r0.w, c0.w, r4.x
+mul.f r2.x, r0.x, c11.x
+add.f r2.w, r0.y, c10.w
+mad.f32 r0.x, c8.z, r4.y, r0.z
+mad.f32 r0.y, c1.w, r4.y, r0.w
+mad.f32 r0.x, c9.z, r4.z, r0.x
+mad.f32 r0.y, c2.w, r4.z, r0.y
+mul.f r0.z, c0.z, r4.x
+mul.f r1.w, c0.y, r4.x
+add.f r2.z, r0.x, c10.z
+add.f r0.w, r0.y, c3.w
+mad.f32 r0.x, c1.z, r4.y, r0.z
+mad.f32 r0.y, c1.y, r4.y, r1.w
+mad.f32 r0.x, c2.z, r4.z, r0.x
+mad.f32 r0.y, c2.y, r4.z, r0.y
+mul.f r1.w, c0.x, r4.x
+mad.f32 r3.y, c6.x, r4.z, c6.y
+add.f r0.z, r0.x, c3.z
+add.f r0.y, r0.y, c3.y
+mad.f32 r0.x, c1.x, r4.y, r1.w
+mad.f32 r3.x, c6.x, r4.x, c6.y
+mad.f32 r0.x, c2.x, r4.z, r0.x
mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
+(rpt1)nop
+add.f r0.x, r0.x, c3.x
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 74 instructions, 0 half, 5 full
+; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 48 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-69.asm b/reference/0ad-alpine-valley/0ad-69.asm
index 035c379..5b14079 100644
--- a/reference/0ad-alpine-valley/0ad-69.asm
+++ b/reference/0ad-alpine-valley/0ad-69.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,79 +24,57 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x
-mul.f r0.w, c7.w, r3.x
+mul.f r0.w, c7.y, r4.x
mad.f32 r0.x, (neg)c4.y, r0.y, r0.x
-mad.f32 r0.y, c8.w, r3.y, r0.w
-mul.f r0.w, c7.z, r3.x
-mul.f r1.x, c7.y, r3.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c9.w, r3.z, r0.y
+mad.f32 r0.y, c8.y, r4.y, r0.w
mad.f32 r0.x, (neg)c4.z, r0.z, r0.x
-mad.f32 r0.z, c8.z, r3.y, r0.w
-mad.f32 r0.w, c8.y, r3.y, r1.x
-mul.f r1.w, c7.x, r3.x
+mad.f32 r0.y, c9.y, r4.z, r0.y
+mul.f r0.z, c7.x, r4.x
+mul.f r0.w, c7.w, r4.x
max.f r0.x, c12.x, r0.x
-add.f r0.y, r0.y, c10.w
-mad.f32 r0.z, c9.z, r3.z, r0.z
-mad.f32 r0.w, c9.y, r3.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.z, c10.z
-add.f r0.w, r0.w, c10.y
-mul.f r1.x, r0.x, c5.z
-mul.f r1.y, r0.x, c5.y
-mul.f r0.x, r0.x, c5.x
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r0.x, r0.z
-mul.f r0.y, r0.w, c11.y
-mad.f32 r0.z, c8.x, r3.y, r1.w
-mul.f r0.w, c0.w, r3.x
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r0.x, c9.x, r3.z, r0.z
-mad.f32 r0.y, c1.w, r3.y, r0.w
-mul.f r0.z, c0.z, r3.x
-mul.f r0.w, c0.y, r3.x
-add.f r0.x, r0.x, c10.x
-mad.f32 r0.y, c2.w, r3.z, r0.y
-mad.f32 r0.z, c1.z, r3.y, r0.z
-mad.f32 r0.w, c1.y, r3.y, r0.w
-mul.f r0.x, r0.x, c11.x
-add.f r0.y, r0.y, c3.w
-mad.f32 r0.z, c2.z, r3.z, r0.z
-mad.f32 r1.w, c2.y, r3.z, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.w, r0.y
-add.f r0.x, r0.z, c3.z
-add.f r0.y, r1.w, c3.y
-mul.f r1.w, c0.x, r3.x
-mov.f32f32 r3.w, r4.y
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c1.x, r3.y, r1.w
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.x, c2.x, r3.z, r0.x
-mov.f32f32 r1.w, r4.x
-mad.f32 r3.y, c6.x, r3.z, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-add.f r0.x, r0.x, c3.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, c10.y
+mad.f32 r0.z, c8.x, r4.y, r0.z
+mad.f32 r0.w, c8.w, r4.y, r0.w
+mov.f32f32 r1.y, r0.x
+mul.f r2.y, r0.y, c11.y
+mad.f32 r0.y, c9.x, r4.z, r0.z
+mul.f r1.x, r0.x, c5.x
+mul.f r1.z, r1.y, c5.z
+mul.f r1.y, r1.y, c5.y
+add.f r0.x, r0.y, c10.x
+mad.f32 r0.y, c9.w, r4.z, r0.w
+mul.f r0.z, c7.z, r4.x
+mul.f r0.w, c0.w, r4.x
+mul.f r2.x, r0.x, c11.x
+add.f r2.w, r0.y, c10.w
+mad.f32 r0.x, c8.z, r4.y, r0.z
+mad.f32 r0.y, c1.w, r4.y, r0.w
+mad.f32 r0.x, c9.z, r4.z, r0.x
+mad.f32 r0.y, c2.w, r4.z, r0.y
+mul.f r0.z, c0.z, r4.x
+mul.f r1.w, c0.y, r4.x
+add.f r2.z, r0.x, c10.z
+add.f r0.w, r0.y, c3.w
+mad.f32 r0.x, c1.z, r4.y, r0.z
+mad.f32 r0.y, c1.y, r4.y, r1.w
+mad.f32 r0.x, c2.z, r4.z, r0.x
+mad.f32 r0.y, c2.y, r4.z, r0.y
+mul.f r1.w, c0.x, r4.x
+mad.f32 r3.y, c6.x, r4.z, c6.y
+add.f r0.z, r0.x, c3.z
+add.f r0.y, r0.y, c3.y
+mad.f32 r0.x, c1.x, r4.y, r1.w
+mad.f32 r3.x, c6.x, r4.x, c6.y
+mad.f32 r0.x, c2.x, r4.z, r0.x
mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
+(rpt1)nop
+add.f r0.x, r0.x, c3.x
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 74 instructions, 0 half, 5 full
+; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 48 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-70.asm b/reference/0ad-alpine-valley/0ad-70.asm
index 4377ecf..227a081 100644
--- a/reference/0ad-alpine-valley/0ad-70.asm
+++ b/reference/0ad-alpine-valley/0ad-70.asm
@@ -8,199 +8,135 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 8, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 10, r1.x
-bary.f r2.y, 6, r1.x
-add.f r2.z, r0.w, c10.x
-floor.f r2.w, r1.w
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 9, r1.x
+add.f r2.y, r0.w, c10.x
+bary.f r2.z, 6, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
-mov.f32f32 r1.z, r1.z
-floor.f r3.x, r2.z
-add.f r1.w, r1.w, (neg)r2.w
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c9.w
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-mov.f32f32 r3.y, r1.z
-add.f r0.z, r2.z, (neg)r3.x
-mov.f32f32 r1.z, r1.w
+absneg.f r0.z, (neg)c7.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c9.z
+mul.f r0.z, r0.z, c7.x
+sam (f32)(w)r4.x, r1.z, s#1, t#1
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c9.x, r2.z
+add.f r2.z, c10.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r1.w, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-add.f r2.w, c10.y, (neg)r1.z
-mul.f r1.w, r1.w, c7.x
-bary.f r3.x, 9, r1.x
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c9.x, r0.z
-mul.f r1.w, r1.w, r0.y
-mov.f32f32 r2.w, r2.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r1.w, r1.w
-add.f r3.z, c10.y, (neg)r0.z
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r2.z
-mul.f r0.y, r1.w, r0.y
-mov.f32f32 r1.w, r3.z
-add.f r2.z, c10.x, r0.x
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c9.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c10.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c11.x
-add.f r3.z, c10.z, r0.w
-mul.f r2.z, r2.z, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r2.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-add.f r0.w, c10.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c12.y, (neg)r0.y
-mov.f32f32 r5.w, r3.w
-mul.f r3.z, r3.z, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c7.y
-mul.f r0.y, r0.y, c9.z
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.z, r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r4.z, r4.x
-add.f r2.y, r2.y, c9.w
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r3.w
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r4.w, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r5.z, r0.y
-sam.s (f32)(x)r3.z, r4.y, s#2, t#2
-(sy)mov.f32f32 r0.y, r3.z
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c3.z
+add.f r0.x, c10.z, r0.w
+mul.f r4.x, r0.z, c3.z
+add.f r0.z, c10.x, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c3.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c12.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#2, t#2
+add.f r0.z, c10.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#2, t#2
+mul.f r0.x, r0.x, c9.z
+add.f r0.w, r2.y, c9.z
+mul.f r0.y, r0.y, c7.y
+(ss)nop
+sam.s (f32)(x)r5.x, r5.w, s#2, t#2
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#2, t#2
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c11.y
+bary.f r2.x, 10, r1.x
+mul.f r0.y, r0.y, r5.x
max.f r0.x, r0.x, c9.y
-mov.f32f32 r7.x, r0.w
-mul.f r0.w, r2.w, r1.w
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
min.f r0.x, r0.x, c9.z
-sam.s (f32)(x)r3.z, r5.w, s#2, t#2
-nop
-(sy)mov.f32f32 r2.y, r3.z
-mul.f r0.y, r0.w, r0.y
-sam.s (f32)(x)r3.z, r5.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r3.z
-add.f r1.z, r1.z, c9.z
-add.f r2.z, c12.y, (neg)r0.x
-add.f r3.z, c12.y, (neg)r0.x
-add.f r3.w, c12.y, (neg)r0.x
-mul.f r1.w, r1.z, r1.w
-mul.f r2.z, r2.z, c6.z
-mul.f r4.x, r3.z, c6.y
-mul.f r3.w, r3.w, c6.x
-mad.f32 r0.y, r1.w, r0.w, r0.y
-(ss)nop
-sam.s (f32)(x)r4.y, r6.z, s#2, t#2
-add.f r0.z, r0.z, c9.z
-(sy)mov.f32f32 r0.w, r4.y
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r0.y, r0.y
-mul.f r2.w, r2.w, r0.z
-mul.f r0.z, r1.z, r0.z
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.z, r2.x
-mad.f32 r0.y, r2.w, r0.w, r0.y
-bary.f r0.w, 11, r1.x
-mov.f32f32 r1.w, c9.z
-bary.f r2.x, 2, r1.x
-mov.f32f32 r0.y, r0.y
-sam (f32)(w)r2.w, r3.y, s#1, t#1
-(sy)cmps.f.lt r2.w, r3.z, c11.y
-mad.f32 r0.y, r0.z, r2.y, r0.y
-mov.f32f32 r0.z, r3.z
-mov.f32f32 r3.x, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-cov.u32f32 r1.z, r2.w
-mov.f32f32 r0.z, r0.z
-(ss)mov.f32f32 r3.y, r0.w
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt1)nop
mul.f r0.y, c10.w, r0.y
-cmps.f.ne r0.w, r1.z, c9.y
+bary.f r2.y, 11, r1.x
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.z, c12.y, (neg)r0.x
+mov.f32f32 r1.w, r0.y
+add.f r2.z, c12.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, c9.y
-sam (f32)(xyz)r4.y, r3.x, s#0, t#0
-(sy)mul.f r2.x, r4.w, r2.x
+sam (f32)(xyz)r2.w, r2.x, s#0, t#0
+(ss)bary.f r2.x, 2, r1.x
bary.f r2.y, 1, r1.x
bary.f (ei)r1.x, 0, r1.x
-sel.b32 r0.z, r1.z, r0.w, r0.z
-mul.f r0.w, r2.x, r0.y
-mul.f r1.y, r4.z, r2.y
-mul.f r1.x, r4.y, r1.x
-mov.f32f32 r2.w, r1.w
-mov.f32f32 r0.w, r0.w
-mul.f r1.y, r1.y, r0.y
-mad.f32 r0.w, c5.z, r4.w, r0.w
+mul.f r0.w, r0.w, c6.z
+(sy)mul.f r1.y, r3.y, r2.x
+mul.f r2.x, r3.x, r2.y
+mul.f r1.x, r2.w, r1.x
+mul.f r1.z, r1.z, c6.y
+mul.f r1.y, r1.y, r1.w
+mul.f r1.w, r2.x, r1.w
+mad.f32 r1.y, c5.z, r3.y, r1.y
+mad.f32 r1.w, c5.y, r3.x, r1.w
mul.f r0.y, r1.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, c5.y, r4.z, r1.x
-mul.f r0.w, r0.x, r0.w
-mad.f32 r0.y, c5.x, r4.y, r0.y
-(rpt1)nop
-add.f r0.w, r0.w, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r0.w, r0.w, r0.z
-mul.f r1.x, r0.x, r1.x
+mul.f r1.x, r2.z, c6.x
+mul.f r1.y, r0.x, r1.y
+mul.f r1.w, r0.x, r1.w
+mad.f32 r0.y, c5.x, r2.w, r0.y
+mov.f32f32 r2.x, c9.y
+add.f r0.w, r1.y, r0.w
+add.f r1.y, r1.w, r1.z
mul.f r0.x, r0.x, r0.y
-nop
-mul.f r0.y, r0.w, c4.z
-add.f r0.w, r1.x, r4.x
-add.f r0.x, r0.x, r3.w
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r0.z
-mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r0.w, c4.y
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
+sel.b32 r0.y, r2.x, r0.z, r4.w
+mov.f32f32 r2.w, c9.z
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
+mul.f r0.z, r0.w, r0.y
+mul.f r0.w, r1.y, r0.y
(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+mul.f r2.z, r0.z, c4.z
+mul.f r2.y, r0.w, c4.y
+add.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
end
nop
nop
+nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1)
-; FRAG: 195 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.z (5:11,cm=f,il=16,b=1)
+; FRAG: 129 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-71.asm b/reference/0ad-alpine-valley/0ad-71.asm
index 035c379..5b14079 100644
--- a/reference/0ad-alpine-valley/0ad-71.asm
+++ b/reference/0ad-alpine-valley/0ad-71.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,79 +24,57 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x
-mul.f r0.w, c7.w, r3.x
+mul.f r0.w, c7.y, r4.x
mad.f32 r0.x, (neg)c4.y, r0.y, r0.x
-mad.f32 r0.y, c8.w, r3.y, r0.w
-mul.f r0.w, c7.z, r3.x
-mul.f r1.x, c7.y, r3.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c9.w, r3.z, r0.y
+mad.f32 r0.y, c8.y, r4.y, r0.w
mad.f32 r0.x, (neg)c4.z, r0.z, r0.x
-mad.f32 r0.z, c8.z, r3.y, r0.w
-mad.f32 r0.w, c8.y, r3.y, r1.x
-mul.f r1.w, c7.x, r3.x
+mad.f32 r0.y, c9.y, r4.z, r0.y
+mul.f r0.z, c7.x, r4.x
+mul.f r0.w, c7.w, r4.x
max.f r0.x, c12.x, r0.x
-add.f r0.y, r0.y, c10.w
-mad.f32 r0.z, c9.z, r3.z, r0.z
-mad.f32 r0.w, c9.y, r3.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.z, c10.z
-add.f r0.w, r0.w, c10.y
-mul.f r1.x, r0.x, c5.z
-mul.f r1.y, r0.x, c5.y
-mul.f r0.x, r0.x, c5.x
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r0.x, r0.z
-mul.f r0.y, r0.w, c11.y
-mad.f32 r0.z, c8.x, r3.y, r1.w
-mul.f r0.w, c0.w, r3.x
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r0.x, c9.x, r3.z, r0.z
-mad.f32 r0.y, c1.w, r3.y, r0.w
-mul.f r0.z, c0.z, r3.x
-mul.f r0.w, c0.y, r3.x
-add.f r0.x, r0.x, c10.x
-mad.f32 r0.y, c2.w, r3.z, r0.y
-mad.f32 r0.z, c1.z, r3.y, r0.z
-mad.f32 r0.w, c1.y, r3.y, r0.w
-mul.f r0.x, r0.x, c11.x
-add.f r0.y, r0.y, c3.w
-mad.f32 r0.z, c2.z, r3.z, r0.z
-mad.f32 r1.w, c2.y, r3.z, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.w, r0.y
-add.f r0.x, r0.z, c3.z
-add.f r0.y, r1.w, c3.y
-mul.f r1.w, c0.x, r3.x
-mov.f32f32 r3.w, r4.y
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c1.x, r3.y, r1.w
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.x, c2.x, r3.z, r0.x
-mov.f32f32 r1.w, r4.x
-mad.f32 r3.y, c6.x, r3.z, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-add.f r0.x, r0.x, c3.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, c10.y
+mad.f32 r0.z, c8.x, r4.y, r0.z
+mad.f32 r0.w, c8.w, r4.y, r0.w
+mov.f32f32 r1.y, r0.x
+mul.f r2.y, r0.y, c11.y
+mad.f32 r0.y, c9.x, r4.z, r0.z
+mul.f r1.x, r0.x, c5.x
+mul.f r1.z, r1.y, c5.z
+mul.f r1.y, r1.y, c5.y
+add.f r0.x, r0.y, c10.x
+mad.f32 r0.y, c9.w, r4.z, r0.w
+mul.f r0.z, c7.z, r4.x
+mul.f r0.w, c0.w, r4.x
+mul.f r2.x, r0.x, c11.x
+add.f r2.w, r0.y, c10.w
+mad.f32 r0.x, c8.z, r4.y, r0.z
+mad.f32 r0.y, c1.w, r4.y, r0.w
+mad.f32 r0.x, c9.z, r4.z, r0.x
+mad.f32 r0.y, c2.w, r4.z, r0.y
+mul.f r0.z, c0.z, r4.x
+mul.f r1.w, c0.y, r4.x
+add.f r2.z, r0.x, c10.z
+add.f r0.w, r0.y, c3.w
+mad.f32 r0.x, c1.z, r4.y, r0.z
+mad.f32 r0.y, c1.y, r4.y, r1.w
+mad.f32 r0.x, c2.z, r4.z, r0.x
+mad.f32 r0.y, c2.y, r4.z, r0.y
+mul.f r1.w, c0.x, r4.x
+mad.f32 r3.y, c6.x, r4.z, c6.y
+add.f r0.z, r0.x, c3.z
+add.f r0.y, r0.y, c3.y
+mad.f32 r0.x, c1.x, r4.y, r1.w
+mad.f32 r3.x, c6.x, r4.x, c6.y
+mad.f32 r0.x, c2.x, r4.z, r0.x
mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
+(rpt1)nop
+add.f r0.x, r0.x, c3.x
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 74 instructions, 0 half, 5 full
+; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 48 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-72.asm b/reference/0ad-alpine-valley/0ad-72.asm
index 035c379..5b14079 100644
--- a/reference/0ad-alpine-valley/0ad-72.asm
+++ b/reference/0ad-alpine-valley/0ad-72.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,79 +24,57 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x
-mul.f r0.w, c7.w, r3.x
+mul.f r0.w, c7.y, r4.x
mad.f32 r0.x, (neg)c4.y, r0.y, r0.x
-mad.f32 r0.y, c8.w, r3.y, r0.w
-mul.f r0.w, c7.z, r3.x
-mul.f r1.x, c7.y, r3.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c9.w, r3.z, r0.y
+mad.f32 r0.y, c8.y, r4.y, r0.w
mad.f32 r0.x, (neg)c4.z, r0.z, r0.x
-mad.f32 r0.z, c8.z, r3.y, r0.w
-mad.f32 r0.w, c8.y, r3.y, r1.x
-mul.f r1.w, c7.x, r3.x
+mad.f32 r0.y, c9.y, r4.z, r0.y
+mul.f r0.z, c7.x, r4.x
+mul.f r0.w, c7.w, r4.x
max.f r0.x, c12.x, r0.x
-add.f r0.y, r0.y, c10.w
-mad.f32 r0.z, c9.z, r3.z, r0.z
-mad.f32 r0.w, c9.y, r3.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.z, c10.z
-add.f r0.w, r0.w, c10.y
-mul.f r1.x, r0.x, c5.z
-mul.f r1.y, r0.x, c5.y
-mul.f r0.x, r0.x, c5.x
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r0.x, r0.z
-mul.f r0.y, r0.w, c11.y
-mad.f32 r0.z, c8.x, r3.y, r1.w
-mul.f r0.w, c0.w, r3.x
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r0.x, c9.x, r3.z, r0.z
-mad.f32 r0.y, c1.w, r3.y, r0.w
-mul.f r0.z, c0.z, r3.x
-mul.f r0.w, c0.y, r3.x
-add.f r0.x, r0.x, c10.x
-mad.f32 r0.y, c2.w, r3.z, r0.y
-mad.f32 r0.z, c1.z, r3.y, r0.z
-mad.f32 r0.w, c1.y, r3.y, r0.w
-mul.f r0.x, r0.x, c11.x
-add.f r0.y, r0.y, c3.w
-mad.f32 r0.z, c2.z, r3.z, r0.z
-mad.f32 r1.w, c2.y, r3.z, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.w, r0.y
-add.f r0.x, r0.z, c3.z
-add.f r0.y, r1.w, c3.y
-mul.f r1.w, c0.x, r3.x
-mov.f32f32 r3.w, r4.y
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c1.x, r3.y, r1.w
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.x, c2.x, r3.z, r0.x
-mov.f32f32 r1.w, r4.x
-mad.f32 r3.y, c6.x, r3.z, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-add.f r0.x, r0.x, c3.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, c10.y
+mad.f32 r0.z, c8.x, r4.y, r0.z
+mad.f32 r0.w, c8.w, r4.y, r0.w
+mov.f32f32 r1.y, r0.x
+mul.f r2.y, r0.y, c11.y
+mad.f32 r0.y, c9.x, r4.z, r0.z
+mul.f r1.x, r0.x, c5.x
+mul.f r1.z, r1.y, c5.z
+mul.f r1.y, r1.y, c5.y
+add.f r0.x, r0.y, c10.x
+mad.f32 r0.y, c9.w, r4.z, r0.w
+mul.f r0.z, c7.z, r4.x
+mul.f r0.w, c0.w, r4.x
+mul.f r2.x, r0.x, c11.x
+add.f r2.w, r0.y, c10.w
+mad.f32 r0.x, c8.z, r4.y, r0.z
+mad.f32 r0.y, c1.w, r4.y, r0.w
+mad.f32 r0.x, c9.z, r4.z, r0.x
+mad.f32 r0.y, c2.w, r4.z, r0.y
+mul.f r0.z, c0.z, r4.x
+mul.f r1.w, c0.y, r4.x
+add.f r2.z, r0.x, c10.z
+add.f r0.w, r0.y, c3.w
+mad.f32 r0.x, c1.z, r4.y, r0.z
+mad.f32 r0.y, c1.y, r4.y, r1.w
+mad.f32 r0.x, c2.z, r4.z, r0.x
+mad.f32 r0.y, c2.y, r4.z, r0.y
+mul.f r1.w, c0.x, r4.x
+mad.f32 r3.y, c6.x, r4.z, c6.y
+add.f r0.z, r0.x, c3.z
+add.f r0.y, r0.y, c3.y
+mad.f32 r0.x, c1.x, r4.y, r1.w
+mad.f32 r3.x, c6.x, r4.x, c6.y
+mad.f32 r0.x, c2.x, r4.z, r0.x
mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
+(rpt1)nop
+add.f r0.x, r0.x, c3.x
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 74 instructions, 0 half, 5 full
+; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 48 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-73.asm b/reference/0ad-alpine-valley/0ad-73.asm
index 1d73264..f1c05da 100644
--- a/reference/0ad-alpine-valley/0ad-73.asm
+++ b/reference/0ad-alpine-valley/0ad-73.asm
@@ -8,215 +8,155 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c10.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c11.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c11.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c11.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 8, r1.x
+floor.f r3.x, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c10.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.y, r2.y
+bary.f r2.w, 9, r1.x
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c8.x
+add.f r2.y, r2.y, (neg)r3.y
+mov.f32f32 r3.x, r2.x
+sam (f32)(xyzw)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, c13.y, (neg)r4.x
+mul.f r0.z, r0.z, c8.x
+mov.f32f32 r1.w, r2.y
+mul.f r4.y, c10.x, r3.x
+add.f r3.x, c11.y, (neg)r3.x
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c10.x, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, r2.y, c8.x
-add.f r2.w, c11.y, (neg)r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c10.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r1.w
-mul.f r0.y, r2.y, r0.y
-sam (f32)(xyzw)r2.w, r3.x, s#0, t#0
-(sy)add.f r1.w, c13.y, (neg)r3.z
-add.f r2.y, c11.x, r0.x
+add.f r0.x, r0.x, (neg)r4.y
+mul.f r4.y, c10.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r4.y
add.f r0.x, c11.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c12.x
-add.f r3.w, c11.z, r0.w
-mul.f r2.y, r2.y, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.x
-add.f r0.w, c11.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c13.y, (neg)r0.y
-mov.f32f32 r6.x, r4.x
-mul.f r3.w, r3.w, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c8.y
-mul.f r0.y, r0.y, c10.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.w, r2.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r4.w, r4.y
-bary.f r3.w, 6, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r5.z, r0.x
-add.f r0.x, r3.w, c10.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r5.x, r0.w
-max.f r0.y, r0.y, c10.y
-mov.f32f32 r6.z, r2.y
-mov.f32f32 r5.w, r3.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r1.w, c4.z
-add.f r1.w, c13.y, (neg)r3.z
-sam.s (f32)(x)r3.w, r4.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r3.w
-min.f r0.y, r0.y, c10.z
-sam.s (f32)(x)r3.w, r6.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-add.f r4.y, c11.y, (neg)r0.z
-(ss)add.f r4.z, c13.y, (neg)r0.y
-add.f r4.w, c13.y, (neg)r0.y
-add.f r5.x, c13.y, (neg)r0.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c7.z
-mul.f r4.w, r4.w, c7.y
-mul.f r5.x, r5.x, c7.x
-mul.f r5.y, r2.z, r4.y
-mov.f32f32 r7.y, r0.x
-mul.f r0.x, r3.z, c10.z
-mul.f r1.w, r1.w, c4.y
-mul.f r2.y, r5.y, r2.y
-add.f r1.z, r1.z, c10.z
-add.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.z, c10.z
-sam.s (f32)(x)r5.y, r6.w, s#2, t#2
-(sy)mov.f32f32 r5.y, r5.y
-mul.f r4.y, r1.z, r4.y
-add.f r5.z, c13.y, (neg)r3.z
-mul.f r0.x, r3.y, r0.x
-add.f r0.w, r0.w, r1.w
-mad.f32 r1.w, r4.y, r4.x, r2.y
-mul.f r2.y, r5.z, c4.x
-mov.f32f32 r0.x, r0.x
-bary.f r3.y, 2, r1.x
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r0.z, c10.z
-mul.f r0.w, r3.x, r0.w
-mul.f r3.x, r3.z, c10.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.z, r2.z, r0.z
-mul.f r3.y, r0.x, r3.y
-mov.f32f32 r0.w, r0.w
-add.f r2.y, r3.x, r2.y
-mad.f32 r1.w, r2.z, r5.y, r1.w
-bary.f r2.z, 1, r1.x
-mov.f32f32 r4.x, r2.x
-mul.f r2.x, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.z, r1.z, r0.z
-mul.f r1.z, r0.w, r2.z
-mov.f32f32 r2.x, r2.x
-nop
-mad.f32 r0.z, r0.z, r3.w, r1.w
-bary.f r1.w, 0, r1.x
-bary.f (ei)r1.x, 9, r1.x
-mov.f32f32 r1.y, c10.z
-mov.f32f32 r0.z, r0.z
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.w, r1.y
-mul.f r0.z, c11.w, r0.z
-mov.f32f32 r1.y, c10.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r1.x
+add.f r0.z, c11.x, r0.z
+mov.f32f32 r4.y, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c11.z, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c11.x, r4.y
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c13.y, (neg)r0.y
+add.f r5.y, r0.z, c10.w
+add.f r0.z, c11.y, (neg)r1.w
+mul.f r0.x, r0.x, c10.z
+add.f r0.w, c13.y, (neg)r4.x
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c8.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-mul.f r1.x, r3.y, r0.z
-mul.f r1.z, r1.z, r0.z
-mul.f r0.z, r1.w, r0.z
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.x, c6.z, r0.x, r1.x
-mad.f32 r0.w, c6.y, r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-sam (f32)(w)r3.x, r4.x, s#1, t#1
-(sy)cmps.f.lt r1.x, r3.w, c12.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c6.x, r2.x, r0.z
-cov.u32f32 r1.x, r1.x
-mul.f r0.x, r0.y, r0.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r1.x, r1.x, c10.y
-add.f r0.x, r0.x, r4.z
-mov.f32f32 r1.z, r3.w
-add.f r0.w, r0.w, r4.w
-mul.f r0.y, r0.y, r0.z
-nop
-mov.f32f32 r0.z, r1.z
-(rpt2)nop
-sel.b32 r0.z, r1.y, r1.x, r0.z
-add.f r0.y, r0.y, r5.x
-(rpt1)nop
-mul.f r0.x, r0.x, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
+mul.f r0.w, r0.w, c4.z
+add.f r1.w, c13.y, (neg)r4.x
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c10.y
+sam.s (f32)(x)r4.y, r5.z, s#2, t#2
+mul.f r4.z, r4.x, c10.z
+mul.f r1.w, r1.w, c4.y
+(sy)mul.f r0.y, r0.y, r7.x
+add.f r2.x, r2.x, c10.z
+min.f r0.x, r0.x, c10.z
+add.f r0.w, r4.z, r0.w
+mul.f r4.z, r4.x, c10.z
+mul.f r0.z, r2.x, r0.z
+(ss)add.f r4.w, c13.y, (neg)r0.x
+add.f r5.x, c13.y, (neg)r0.x
+add.f r5.y, c13.y, (neg)r0.x
+mad.f32 r0.y, r0.z, r7.y, r0.y
+add.f r0.z, r2.y, c10.z
+mul.f r2.y, r4.w, c7.z
+mul.f r4.w, r5.x, c7.y
+mul.f r5.x, r5.y, c7.x
+mul.f r3.x, r3.x, r0.z
+add.f r1.w, r4.z, r1.w
+mul.f r1.z, r1.z, c4.x
+mul.f r0.w, r3.w, r0.w
+mad.f32 r0.y, r3.x, r4.y, r0.y
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, r4.x, c10.z
+mov.f32f32 r3.x, r0.w
+bary.f r3.w, 2, r1.x
+mad.f32 r0.y, r0.z, r7.z, r0.y
+mul.f r0.z, r3.z, r1.w
+add.f r1.z, r2.x, r1.z
+mul.f r1.w, r3.x, r3.w
+mul.f r0.y, c11.w, r0.y
+mov.f32f32 r2.x, r0.z
+mul.f r1.z, r3.y, r1.z
+bary.f r3.x, 1, r1.x
+mov.f32f32 r3.y, r0.y
+sam (f32)(w)r3.z, r2.z, s#1, t#1
+(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y
+mov.f32f32 r3.z, r1.z
+mul.f r2.x, r2.x, r3.x
+mul.f r1.w, r1.w, r3.y
+bary.f (ei)r1.x, 0, r1.x
+mad.f32 r0.w, c6.z, r0.w, r1.w
+mul.f r1.y, r2.x, r3.y
+cov.u32f32 r1.w, r2.z
+mov.f32f32 r2.w, c10.z
+mul.f r0.w, r0.x, r0.w
+mad.f32 r0.z, c6.y, r0.z, r1.y
+mul.f r1.x, r3.z, r1.x
+cmps.f.ne r1.y, r1.w, c10.y
+add.f r0.w, r0.w, r2.y
+mov.f32f32 r1.w, c10.y
+mul.f r0.z, r0.x, r0.z
+mul.f r0.y, r1.x, r0.y
nop
-mul.f r0.x, r0.x, c5.z
-mul.f r0.z, r0.w, c5.y
-mul.f r0.y, r0.y, c5.x
+sel.b32 r1.x, r1.w, r1.y, r4.y
+add.f r0.z, r0.z, r4.w
+mad.f32 r0.y, c6.x, r1.z, r0.y
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+mul.f r0.w, r0.w, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.w, c5.z
+mul.f r2.y, r0.z, c5.y
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+add.f r0.x, r0.x, r5.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.y
-end
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 212 instructions, 0 half, 8 full
+; FRAG: 149 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-74.asm b/reference/0ad-alpine-valley/0ad-74.asm
index 035c379..5b14079 100644
--- a/reference/0ad-alpine-valley/0ad-74.asm
+++ b/reference/0ad-alpine-valley/0ad-74.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,79 +24,57 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x
-mul.f r0.w, c7.w, r3.x
+mul.f r0.w, c7.y, r4.x
mad.f32 r0.x, (neg)c4.y, r0.y, r0.x
-mad.f32 r0.y, c8.w, r3.y, r0.w
-mul.f r0.w, c7.z, r3.x
-mul.f r1.x, c7.y, r3.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c9.w, r3.z, r0.y
+mad.f32 r0.y, c8.y, r4.y, r0.w
mad.f32 r0.x, (neg)c4.z, r0.z, r0.x
-mad.f32 r0.z, c8.z, r3.y, r0.w
-mad.f32 r0.w, c8.y, r3.y, r1.x
-mul.f r1.w, c7.x, r3.x
+mad.f32 r0.y, c9.y, r4.z, r0.y
+mul.f r0.z, c7.x, r4.x
+mul.f r0.w, c7.w, r4.x
max.f r0.x, c12.x, r0.x
-add.f r0.y, r0.y, c10.w
-mad.f32 r0.z, c9.z, r3.z, r0.z
-mad.f32 r0.w, c9.y, r3.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.z, c10.z
-add.f r0.w, r0.w, c10.y
-mul.f r1.x, r0.x, c5.z
-mul.f r1.y, r0.x, c5.y
-mul.f r0.x, r0.x, c5.x
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r0.x, r0.z
-mul.f r0.y, r0.w, c11.y
-mad.f32 r0.z, c8.x, r3.y, r1.w
-mul.f r0.w, c0.w, r3.x
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r0.x, c9.x, r3.z, r0.z
-mad.f32 r0.y, c1.w, r3.y, r0.w
-mul.f r0.z, c0.z, r3.x
-mul.f r0.w, c0.y, r3.x
-add.f r0.x, r0.x, c10.x
-mad.f32 r0.y, c2.w, r3.z, r0.y
-mad.f32 r0.z, c1.z, r3.y, r0.z
-mad.f32 r0.w, c1.y, r3.y, r0.w
-mul.f r0.x, r0.x, c11.x
-add.f r0.y, r0.y, c3.w
-mad.f32 r0.z, c2.z, r3.z, r0.z
-mad.f32 r1.w, c2.y, r3.z, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.w, r0.y
-add.f r0.x, r0.z, c3.z
-add.f r0.y, r1.w, c3.y
-mul.f r1.w, c0.x, r3.x
-mov.f32f32 r3.w, r4.y
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c1.x, r3.y, r1.w
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.x, c2.x, r3.z, r0.x
-mov.f32f32 r1.w, r4.x
-mad.f32 r3.y, c6.x, r3.z, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-add.f r0.x, r0.x, c3.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, c10.y
+mad.f32 r0.z, c8.x, r4.y, r0.z
+mad.f32 r0.w, c8.w, r4.y, r0.w
+mov.f32f32 r1.y, r0.x
+mul.f r2.y, r0.y, c11.y
+mad.f32 r0.y, c9.x, r4.z, r0.z
+mul.f r1.x, r0.x, c5.x
+mul.f r1.z, r1.y, c5.z
+mul.f r1.y, r1.y, c5.y
+add.f r0.x, r0.y, c10.x
+mad.f32 r0.y, c9.w, r4.z, r0.w
+mul.f r0.z, c7.z, r4.x
+mul.f r0.w, c0.w, r4.x
+mul.f r2.x, r0.x, c11.x
+add.f r2.w, r0.y, c10.w
+mad.f32 r0.x, c8.z, r4.y, r0.z
+mad.f32 r0.y, c1.w, r4.y, r0.w
+mad.f32 r0.x, c9.z, r4.z, r0.x
+mad.f32 r0.y, c2.w, r4.z, r0.y
+mul.f r0.z, c0.z, r4.x
+mul.f r1.w, c0.y, r4.x
+add.f r2.z, r0.x, c10.z
+add.f r0.w, r0.y, c3.w
+mad.f32 r0.x, c1.z, r4.y, r0.z
+mad.f32 r0.y, c1.y, r4.y, r1.w
+mad.f32 r0.x, c2.z, r4.z, r0.x
+mad.f32 r0.y, c2.y, r4.z, r0.y
+mul.f r1.w, c0.x, r4.x
+mad.f32 r3.y, c6.x, r4.z, c6.y
+add.f r0.z, r0.x, c3.z
+add.f r0.y, r0.y, c3.y
+mad.f32 r0.x, c1.x, r4.y, r1.w
+mad.f32 r3.x, c6.x, r4.x, c6.y
+mad.f32 r0.x, c2.x, r4.z, r0.x
mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
+(rpt1)nop
+add.f r0.x, r0.x, c3.x
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r3.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 74 instructions, 0 half, 5 full
+; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 48 instructions, 0 half, 5 full
diff --git a/reference/0ad-alpine-valley/0ad-75.asm b/reference/0ad-alpine-valley/0ad-75.asm
index 67a2ad5..e7bcae3 100644
--- a/reference/0ad-alpine-valley/0ad-75.asm
+++ b/reference/0ad-alpine-valley/0ad-75.asm
@@ -32,159 +32,113 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c12.x, r0.x
mul.f r1.w, c12.x, r0.w
mad.f32 r1.z, c13.x, r0.y, r1.z
mad.f32 r1.w, c13.x, r1.x, r1.w
mad.f32 r1.z, c14.x, r0.z, r1.z
-mul.f r2.x, c12.z, r0.x
-mul.f r2.y, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r1.z, r1.z, c15.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, c13.z, r0.y, r2.x
-mad.f32 r2.y, c13.z, r1.x, r2.y
-add.f r2.z, c4.x, (neg)r1.z
mad.f32 r1.w, c14.x, r1.y, r1.w
-mul.f r2.w, c8.w, r1.z
-mul.f r3.x, c8.z, r1.z
-mul.f r3.y, r2.z, r2.z
-mul.f r3.z, c12.y, r0.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.z, c13.y, r0.y, r3.z
-absneg.f r3.w, (neg)c5.x
-mad.f32 r3.z, c14.y, r0.z, r3.z
-mul.f r4.x, c8.y, r1.z
-mul.f r4.y, c8.x, r1.z
-mul.f r5.x, r1.w, r3.w
-add.f r5.w, r3.z, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r1.z
-mul.f r3.z, c0.z, r1.z
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
add.f r5.y, c4.y, (neg)r5.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.w, c9.w, r5.w, r2.w
-mad.f32 r3.x, c9.z, r5.w, r3.x
-mad.f32 r3.y, r5.y, r5.y, r3.y
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.x, c14.z, r0.z, r2.x
-mad.f32 r4.x, c9.y, r5.w, r4.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.x, r0.w
-add.f r2.x, r2.x, c15.z
-absneg.f r0.w, (neg)c5.y
-mad.f32 r4.y, c9.x, r5.w, r4.y
-mad.f32 r1.x, c1.w, r5.w, r1.x
-add.f r5.z, c4.z, (neg)r2.x
-mad.f32 r5.x, r6.x, r0.w, r5.x
-mad.f32 r2.w, c10.w, r2.x, r2.w
-mad.f32 r3.x, c10.z, r2.x, r3.x
-mad.f32 r3.y, r5.z, r5.z, r3.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.y, r2.y
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.x, c10.y, r2.x, r4.x
-mad.f32 r4.y, c10.x, r2.x, r4.y
-mad.f32 r1.y, c14.z, r1.y, r2.y
-rsq r2.y, r3.y
-(ss)mov.f32f32 r2.y, r2.y
-(ss)absneg.f r3.y, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r6.y, r1.y
-mad.f32 r0.y, r2.z, r2.y, r3.w
-mad.f32 r0.w, r5.y, r2.y, r0.w
-mad.f32 r1.y, r5.z, r2.y, r3.y
-mad.f32 r2.y, r6.y, r3.y, r5.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, r0.y, r0.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r0.w, r0.w, r2.z
-max.f r2.y, c17.x, r2.y
-mad.f32 r2.z, c2.w, r2.x, r1.x
-mad.f32 r1.x, c1.z, r5.w, r3.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r0.z, r1.y, r1.y, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r6.z, c2.z, r2.x, r1.x
-mul.f r6.w, c0.y, r1.z
-mul.f r7.x, c0.x, r1.z
-mad.f32 r7.y, c7.x, r2.x, c7.y
-mad.f32 r7.z, c7.x, r1.z, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r1.x, r2.y, c6.z
-mul.f r3.y, r2.y, c6.y
-mul.f r2.y, r2.y, c6.x
-mul.f r1.y, r1.y, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.z, r1.x
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r5.x, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.y, r3.y
-mov.f32f32 r0.z, r2.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-mad.f32 r0.y, c11.w, r0.x, r2.w
-mad.f32 r0.z, c11.z, r0.x, r3.x
-mad.f32 r0.w, c11.y, r0.x, r4.x
-mad.f32 r2.y, c11.x, r0.x, r4.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c16.y
-mul.f r2.y, r2.y, c16.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r3.y, r0.w
-mov.f32f32 r3.x, r2.y
-mad.f32 r0.y, c3.w, r0.x, r2.z
-mad.f32 r0.z, c3.z, r0.x, r6.z
-mad.f32 r2.y, c1.y, r5.w, r6.w
-mad.f32 r2.z, c1.x, r5.w, r7.x
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.y, c2.y, r2.x, r2.y
-mad.f32 r2.x, c2.x, r2.x, r2.z
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
mad.f32 r0.x, c3.x, r0.x, r2.x
-mov.f32f32 r2.x, r7.y
-mov.f32f32 r2.y, r7.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r2.x, r6.y
+mov.f32f32 r2.z, r6.y
mov.f32f32 r2.y, r6.x
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r2.y, r2.y
mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r6.x, (0.000000)
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r6.x
-mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 8 full
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-76.asm b/reference/0ad-alpine-valley/0ad-76.asm
index b853b0d..d0ec086 100644
--- a/reference/0ad-alpine-valley/0ad-76.asm
+++ b/reference/0ad-alpine-valley/0ad-76.asm
@@ -8,6 +8,10 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 8, r1.x
add.f r0.y, r0.w, c12.y
bary.f r0.w, 4, r1.x
@@ -23,224 +27,144 @@ add.f r3.x, r1.z, c13.x
mad.f32 r2.y, r2.z, r2.z, r2.y
add.f r1.w, r1.w, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-floor.f r0.z, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
+absneg.f r0.z, (neg)c9.x
+bary.f r2.w, 6, r1.x
+mov.f32f32 r3.y, r1.w
+floor.f r3.z, r3.x
+mul.f r0.z, r0.z, c9.x
+mad.f32 r2.y, r2.w, r2.w, r2.y
+mul.f r3.w, c12.x, r3.y
+add.f r3.x, r3.x, (neg)r3.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.w, (neg)c9.x
-bary.f r3.y, 6, r1.x
-mul.f r3.z, c12.x, r1.w
-add.f r0.z, r3.x, (neg)r0.z
-mul.f r2.w, r2.w, c9.x
-mad.f32 r2.y, r3.y, r3.y, r2.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.z, r0.z
-mul.f r2.w, r2.w, r0.y
-add.f r3.z, c13.y, (neg)r1.w
-add.f r0.x, r0.x, (neg)r3.x
-mul.f r3.x, c12.x, r0.z
-mov.f32f32 r2.w, r2.w
+add.f r0.x, r0.x, (neg)r3.w
+mov.f32f32 r3.z, r3.x
rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mul.f r0.y, r2.w, r0.y
-mul.f r0.w, r0.w, r2.y
-add.f r2.w, c13.x, r0.x
+(ss)mov.f32f32 r3.w, r2.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+mul.f r4.x, c12.x, r3.z
add.f r0.x, c13.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.w, r2.w
-bary.f r3.w, 16, r1.x
-mul.f r0.y, r0.y, c14.x
-mov.f32f32 r0.x, r0.x
-mul.f r2.w, r2.w, c4.z
mul.f r0.w, r0.w, r3.w
-mul.f r2.z, r2.z, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r2.w
-mul.f r0.x, r0.x, c4.z
-mov.f32f32 r2.z, r2.z
-bary.f r4.x, 17, r1.x
-mov.f32f32 r4.y, r3.w
-add.f r1.z, r1.z, (neg)r3.x
+add.f r0.z, c13.x, r0.z
+mul.f r0.y, r0.y, c14.x
+bary.f r4.y, 16, r1.x
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r4.z, r0.z, c4.z
+mul.f r5.y, r0.x, c4.z
+mul.f r0.x, r0.w, r4.y
+add.f r0.z, c13.z, r1.z
+mov.f32f32 r6.x, r4.z
+mov.f32f32 r0.w, r1.z
exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, r2.z, r4.x, r0.w
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r1.z, r1.z
-add.f r3.x, c15.y, (neg)r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.y, r3.y, r2.y
-add.f r3.y, c13.x, r1.z
-mul.f r3.x, r3.x, c9.y
-mul.f r0.y, r0.y, c12.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.y, r3.y
-bary.f r3.w, 18, r1.x
-add.f r0.y, r0.y, r3.x
-mov.f32f32 r2.z, r2.z
-mul.f r3.x, r3.y, c4.w
-mad.f32 r0.w, r2.y, r3.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.x, r2.z
-mov.f32f32 r2.y, r3.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-add.f r1.z, c13.z, r1.z
-mov.f32f32 r4.z, r2.y
-bary.f r2.y, 10, r1.x
+(ss)mov.f32f32 r1.z, r0.y
+mul.f r2.z, r2.z, r3.w
+bary.f r3.w, 17, r1.x
+add.f r0.w, c13.x, r0.w
+add.f r1.z, c15.y, (neg)r1.z
+mov.f32f32 r6.w, r5.y
+mul.f r5.z, r0.z, c4.w
+mul.f r7.x, r0.w, c4.w
+mul.f r0.z, r1.z, c9.y
+(ss)mul.f r0.y, r0.y, c12.z
+mad.f32 r0.x, r2.z, r3.w, r0.x
+mov.f32f32 r6.y, r7.x
+bary.f r0.w, 10, r1.x
+add.f r0.y, r0.y, r0.z
+mul.f r0.z, r2.w, r2.y
+bary.f r1.z, 18, r1.x
+add.f r5.w, r0.w, c12.w
max.f r0.y, r0.y, c12.y
-max.f r0.w, c12.y, r0.w
-mov.f32f32 r1.z, r1.z
-add.f r2.y, r2.y, c12.w
+mov.f32f32 r4.w, r5.z
+mad.f32 r0.x, r0.z, r1.z, r0.x
+mov.f32f32 r6.z, r5.w
min.f r0.y, r0.y, c12.z
-mov.f32f32 r0.w, r0.w
-mul.f r1.z, r1.z, c4.w
-mov.f32f32 r2.z, r2.y
-add.f r3.y, c15.y, (neg)r0.y
-add.f r3.w, c15.y, (neg)r0.y
-add.f r4.x, c15.y, (neg)r0.y
-mov.f32f32 r4.w, r2.z
-mul.f r2.z, r3.y, c8.z
-mul.f r3.y, r3.w, c8.y
-mul.f r3.w, r4.x, c8.x
-log2 r0.w, r0.w
-(ss)mul.f r0.w, c10.y, r0.w
-mov.f32f32 r4.x, r1.z
-mov.f32f32 r0.x, r0.x
-sam.s (f32)(x)r5.y, r4.y, s#3, t#3
-(sy)(ss)mov.f32f32 r4.y, r5.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r0.x, r4.y
-mov.f32f32 r3.z, r3.z
-add.f r4.y, c13.y, (neg)r0.z
-mov.f32f32 r5.y, r4.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r4.y
-exp2 r0.w, r0.w
-bary.f r4.z, 14, r1.x
-mov.f32f32 r5.z, r4.x
-mov.f32f32 r6.x, r3.x
-mul.f r3.x, r3.z, r4.y
-mov.f32f32 r4.x, r4.z
-mov.f32f32 r4.w, r2.y
-mov.f32f32 r2.w, r2.w
-mul.f r0.x, r3.x, r0.x
-mov.f32f32 r6.z, r4.x
-mov.f32f32 r6.y, r4.w
-bary.f r3.x, 15, r1.x
-sam.s (f32)(x)r4.w, r5.x, s#3, t#3
+mov.f32f32 r7.y, r5.w
+mov.f32f32 r5.x, r5.w
+max.f r0.x, c12.y, r0.x
+nop
+add.f r0.z, c15.y, (neg)r0.y
+sam.s (f32)(x)r7.z, r6.x, s#3, t#3
+add.f r0.w, c13.y, (neg)r3.y
+add.f r1.z, c15.y, (neg)r0.y
+add.f r2.y, c15.y, (neg)r0.y
+mul.f r0.z, r0.z, c8.z
+mov.f32f32 r2.z, r0.w
+add.f r2.w, c13.y, (neg)r3.z
+mul.f r1.z, r1.z, c8.y
+mul.f r3.y, r2.y, c8.x
+log2 r0.x, r0.x
+(ss)mul.f r0.x, c10.y, r0.x
+mov.f32f32 r2.y, r2.w
+sam.s (f32)(x)r3.z, r6.w, s#3, t#3
nop
-(sy)mov.f32f32 r4.x, r4.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r1.z, r1.z
+sam.s (f32)(x)r3.w, r4.z, s#3, t#3
+sam.s (f32)(x)r4.x, r5.y, s#3, t#3
add.f r1.w, r1.w, c12.z
-(ss)nop
-sam.s (f32)(x)r5.x, r5.w, s#3, t#3
-(sy)mov.f32f32 r2.w, r5.x
-mov.f32f32 r5.y, r3.x
-mov.f32f32 r5.x, r1.z
-mul.f r1.z, r1.w, r4.y
-mov.f32f32 r2.y, r2.y
-add.f r0.z, r0.z, c12.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.x, r1.z, r2.w, r0.x
-mov.f32f32 r6.w, r5.y
-mov.f32f32 r5.y, r2.y
-mul.f r1.z, r3.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
+add.f r3.x, r3.x, c12.z
+mul.f r2.z, r2.z, r2.y
bary.f r2.y, 13, r1.x
-mul.f r0.z, r1.w, r0.z
-(ss)nop
-sam (f32)(xyz)r5.z, r6.z, s#2, t#2
-(sy)mul.f r1.w, c7.z, r6.x
-sam.s (f32)(x)r6.x, r4.w, s#3, t#3
-(sy)mov.f32f32 r2.w, r6.x
-mul.f r3.z, c7.y, r5.w
-mul.f r4.y, c7.x, r5.z
-(ss)mul.f r1.w, r1.w, r0.w
-mad.f32 r0.x, r1.z, r2.w, r0.x
-mov.f32f32 r1.z, r4.z
-mul.f r3.z, r3.z, r0.w
-mul.f r0.w, r4.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r1.z
-mad.f32 r0.x, r0.z, r4.x, r0.x
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r1.z, r2.y
+bary.f r4.y, 14, r1.x
+(ss)bary.f r4.z, 15, r1.x
+(sy)mul.f r2.z, r2.z, r7.z
+mul.f r2.w, r1.w, r2.w
+exp2 r0.x, r0.x
+mov.f32f32 r4.w, r4.y
+mov.f32f32 r5.x, r4.z
+mul.f r0.w, r0.w, r3.x
+mad.f32 r2.z, r2.w, r3.z, r2.z
+mul.f r1.w, r1.w, r3.x
+sam (f32)(w)r5.y, r2.x, s#1, t#1
+(sy)(ss)cmps.f.lt r2.x, r6.x, c14.y
+mad.f32 r0.w, r0.w, r3.w, r2.z
+sam (f32)(xyz)r6.y, r4.y, s#0, t#0
mov.f32f32 r2.w, c12.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, r0.z
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r2.w, r2.w
-mul.f r0.x, c13.w, r0.x
-bary.f r0.z, 2, r1.x
-bary.f r1.z, 1, r1.x
-mov.f32f32 r3.x, c12.y
-mov.f32f32 r0.x, r0.x
-sam (f32)(xyz)r4.x, r4.y, s#0, t#0
-(sy)mad.f32 r0.z, r4.z, r0.z, r1.w
-mad.f32 r1.z, r4.y, r1.z, r3.z
+mad.f32 r0.w, r1.w, r4.x, r0.w
+sam (f32)(xyz)r3.z, r4.w, s#2, t#2
+(sy)mul.f r1.w, c7.y, r3.w
+mul.f r2.y, c7.x, r3.z
+mul.f r2.z, c7.z, r4.x
+mul.f r0.w, c13.w, r0.w
+(ss)mul.f r1.w, r1.w, r0.x
+mul.f r2.y, r2.y, r0.x
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r2.z, r0.w
+bary.f r3.x, 2, r1.x
+bary.f r3.z, 1, r1.x
bary.f (ei)r1.x, 0, r1.x
-sam (f32)(w)r4.w, r2.x, s#1, t#1
-(sy)cmps.f.lt r1.y, r5.z, c14.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, r4.x, r1.x, r0.w
-cov.u32f32 r1.x, r1.y
-mul.f r0.z, r0.z, r0.x
-mul.f r1.y, r1.z, r0.x
-mov.f32f32 r0.w, r0.w
-cmps.f.ne r1.x, r1.x, c12.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.z, c6.z, r4.z, r0.z
-mad.f32 r1.y, c6.y, r4.y, r1.y
-mul.f r0.x, r0.w, r0.x
-mov.f32f32 r0.w, r5.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mul.f r0.z, r0.y, r0.z
-mul.f r1.y, r0.y, r1.y
-mad.f32 r0.x, c6.x, r4.x, r0.x
-sel.b32 r0.w, r3.x, r1.x, r0.w
-add.f r0.z, r0.z, r2.z
-add.f r1.x, r1.y, r3.y
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.z, r0.z, r0.w
-mul.f r1.x, r1.x, r0.w
+cov.u32f32 r1.y, r2.x
+mad.f32 r0.x, r6.w, r3.x, r0.x
+mad.f32 r1.w, r6.z, r3.z, r1.w
+mad.f32 r1.x, r6.y, r1.x, r2.y
+cmps.f.ne r1.y, r1.y, c12.y
+mul.f r0.x, r0.x, r2.z
+mul.f r1.w, r1.w, r2.z
+mad.f32 r0.x, c6.z, r6.w, r0.x
+mad.f32 r1.w, c6.y, r6.z, r1.w
+mul.f r0.w, r1.x, r0.w
+mov.f32f32 r1.x, c12.y
mul.f r0.x, r0.y, r0.x
+mul.f r1.w, r0.y, r1.w
+mad.f32 r0.w, c6.x, r6.y, r0.w
+sel.b32 r1.x, r1.x, r1.y, r6.x
+add.f r0.x, r0.x, r0.z
+add.f r0.z, r1.w, r1.z
+mul.f r0.y, r0.y, r0.w
nop
-mul.f r0.y, r0.z, c5.z
-mul.f r0.z, r1.x, c5.y
-add.f r0.x, r0.x, r3.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c5.x
-nop
-mov.f32f32 r2.z, r0.y
-(ss)mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.x, c5.z
+mul.f r2.y, r0.z, c5.y
+add.f r0.x, r0.y, r3.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r1.x
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, c5.x
end
nop
+nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
-; FRAG: 235 instructions, 0 half, 7 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r3.w (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
+; FRAG: 155 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-77.asm b/reference/0ad-alpine-valley/0ad-77.asm
index 67a2ad5..e7bcae3 100644
--- a/reference/0ad-alpine-valley/0ad-77.asm
+++ b/reference/0ad-alpine-valley/0ad-77.asm
@@ -32,159 +32,113 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c12.x, r0.x
mul.f r1.w, c12.x, r0.w
mad.f32 r1.z, c13.x, r0.y, r1.z
mad.f32 r1.w, c13.x, r1.x, r1.w
mad.f32 r1.z, c14.x, r0.z, r1.z
-mul.f r2.x, c12.z, r0.x
-mul.f r2.y, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r1.z, r1.z, c15.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, c13.z, r0.y, r2.x
-mad.f32 r2.y, c13.z, r1.x, r2.y
-add.f r2.z, c4.x, (neg)r1.z
mad.f32 r1.w, c14.x, r1.y, r1.w
-mul.f r2.w, c8.w, r1.z
-mul.f r3.x, c8.z, r1.z
-mul.f r3.y, r2.z, r2.z
-mul.f r3.z, c12.y, r0.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.z, c13.y, r0.y, r3.z
-absneg.f r3.w, (neg)c5.x
-mad.f32 r3.z, c14.y, r0.z, r3.z
-mul.f r4.x, c8.y, r1.z
-mul.f r4.y, c8.x, r1.z
-mul.f r5.x, r1.w, r3.w
-add.f r5.w, r3.z, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r1.z
-mul.f r3.z, c0.z, r1.z
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
add.f r5.y, c4.y, (neg)r5.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.w, c9.w, r5.w, r2.w
-mad.f32 r3.x, c9.z, r5.w, r3.x
-mad.f32 r3.y, r5.y, r5.y, r3.y
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.x, c14.z, r0.z, r2.x
-mad.f32 r4.x, c9.y, r5.w, r4.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.x, r0.w
-add.f r2.x, r2.x, c15.z
-absneg.f r0.w, (neg)c5.y
-mad.f32 r4.y, c9.x, r5.w, r4.y
-mad.f32 r1.x, c1.w, r5.w, r1.x
-add.f r5.z, c4.z, (neg)r2.x
-mad.f32 r5.x, r6.x, r0.w, r5.x
-mad.f32 r2.w, c10.w, r2.x, r2.w
-mad.f32 r3.x, c10.z, r2.x, r3.x
-mad.f32 r3.y, r5.z, r5.z, r3.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.y, r2.y
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.x, c10.y, r2.x, r4.x
-mad.f32 r4.y, c10.x, r2.x, r4.y
-mad.f32 r1.y, c14.z, r1.y, r2.y
-rsq r2.y, r3.y
-(ss)mov.f32f32 r2.y, r2.y
-(ss)absneg.f r3.y, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r6.y, r1.y
-mad.f32 r0.y, r2.z, r2.y, r3.w
-mad.f32 r0.w, r5.y, r2.y, r0.w
-mad.f32 r1.y, r5.z, r2.y, r3.y
-mad.f32 r2.y, r6.y, r3.y, r5.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, r0.y, r0.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r0.w, r0.w, r2.z
-max.f r2.y, c17.x, r2.y
-mad.f32 r2.z, c2.w, r2.x, r1.x
-mad.f32 r1.x, c1.z, r5.w, r3.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r0.z, r1.y, r1.y, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r6.z, c2.z, r2.x, r1.x
-mul.f r6.w, c0.y, r1.z
-mul.f r7.x, c0.x, r1.z
-mad.f32 r7.y, c7.x, r2.x, c7.y
-mad.f32 r7.z, c7.x, r1.z, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r1.x, r2.y, c6.z
-mul.f r3.y, r2.y, c6.y
-mul.f r2.y, r2.y, c6.x
-mul.f r1.y, r1.y, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.z, r1.x
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r5.x, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.y, r3.y
-mov.f32f32 r0.z, r2.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-mad.f32 r0.y, c11.w, r0.x, r2.w
-mad.f32 r0.z, c11.z, r0.x, r3.x
-mad.f32 r0.w, c11.y, r0.x, r4.x
-mad.f32 r2.y, c11.x, r0.x, r4.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c16.y
-mul.f r2.y, r2.y, c16.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r3.y, r0.w
-mov.f32f32 r3.x, r2.y
-mad.f32 r0.y, c3.w, r0.x, r2.z
-mad.f32 r0.z, c3.z, r0.x, r6.z
-mad.f32 r2.y, c1.y, r5.w, r6.w
-mad.f32 r2.z, c1.x, r5.w, r7.x
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.y, c2.y, r2.x, r2.y
-mad.f32 r2.x, c2.x, r2.x, r2.z
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
mad.f32 r0.x, c3.x, r0.x, r2.x
-mov.f32f32 r2.x, r7.y
-mov.f32f32 r2.y, r7.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r2.x, r6.y
+mov.f32f32 r2.z, r6.y
mov.f32f32 r2.y, r6.x
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r2.y, r2.y
mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r6.x, (0.000000)
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r6.x
-mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 8 full
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-78.asm b/reference/0ad-alpine-valley/0ad-78.asm
index 67a2ad5..e7bcae3 100644
--- a/reference/0ad-alpine-valley/0ad-78.asm
+++ b/reference/0ad-alpine-valley/0ad-78.asm
@@ -32,159 +32,113 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c12.x, r0.x
mul.f r1.w, c12.x, r0.w
mad.f32 r1.z, c13.x, r0.y, r1.z
mad.f32 r1.w, c13.x, r1.x, r1.w
mad.f32 r1.z, c14.x, r0.z, r1.z
-mul.f r2.x, c12.z, r0.x
-mul.f r2.y, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r1.z, r1.z, c15.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, c13.z, r0.y, r2.x
-mad.f32 r2.y, c13.z, r1.x, r2.y
-add.f r2.z, c4.x, (neg)r1.z
mad.f32 r1.w, c14.x, r1.y, r1.w
-mul.f r2.w, c8.w, r1.z
-mul.f r3.x, c8.z, r1.z
-mul.f r3.y, r2.z, r2.z
-mul.f r3.z, c12.y, r0.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.z, c13.y, r0.y, r3.z
-absneg.f r3.w, (neg)c5.x
-mad.f32 r3.z, c14.y, r0.z, r3.z
-mul.f r4.x, c8.y, r1.z
-mul.f r4.y, c8.x, r1.z
-mul.f r5.x, r1.w, r3.w
-add.f r5.w, r3.z, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r1.z
-mul.f r3.z, c0.z, r1.z
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
add.f r5.y, c4.y, (neg)r5.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.w, c9.w, r5.w, r2.w
-mad.f32 r3.x, c9.z, r5.w, r3.x
-mad.f32 r3.y, r5.y, r5.y, r3.y
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.x, c14.z, r0.z, r2.x
-mad.f32 r4.x, c9.y, r5.w, r4.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.x, r0.w
-add.f r2.x, r2.x, c15.z
-absneg.f r0.w, (neg)c5.y
-mad.f32 r4.y, c9.x, r5.w, r4.y
-mad.f32 r1.x, c1.w, r5.w, r1.x
-add.f r5.z, c4.z, (neg)r2.x
-mad.f32 r5.x, r6.x, r0.w, r5.x
-mad.f32 r2.w, c10.w, r2.x, r2.w
-mad.f32 r3.x, c10.z, r2.x, r3.x
-mad.f32 r3.y, r5.z, r5.z, r3.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.y, r2.y
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.x, c10.y, r2.x, r4.x
-mad.f32 r4.y, c10.x, r2.x, r4.y
-mad.f32 r1.y, c14.z, r1.y, r2.y
-rsq r2.y, r3.y
-(ss)mov.f32f32 r2.y, r2.y
-(ss)absneg.f r3.y, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r6.y, r1.y
-mad.f32 r0.y, r2.z, r2.y, r3.w
-mad.f32 r0.w, r5.y, r2.y, r0.w
-mad.f32 r1.y, r5.z, r2.y, r3.y
-mad.f32 r2.y, r6.y, r3.y, r5.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, r0.y, r0.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r0.w, r0.w, r2.z
-max.f r2.y, c17.x, r2.y
-mad.f32 r2.z, c2.w, r2.x, r1.x
-mad.f32 r1.x, c1.z, r5.w, r3.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r0.z, r1.y, r1.y, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r6.z, c2.z, r2.x, r1.x
-mul.f r6.w, c0.y, r1.z
-mul.f r7.x, c0.x, r1.z
-mad.f32 r7.y, c7.x, r2.x, c7.y
-mad.f32 r7.z, c7.x, r1.z, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r1.x, r2.y, c6.z
-mul.f r3.y, r2.y, c6.y
-mul.f r2.y, r2.y, c6.x
-mul.f r1.y, r1.y, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.z, r1.x
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r5.x, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.y, r3.y
-mov.f32f32 r0.z, r2.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-mad.f32 r0.y, c11.w, r0.x, r2.w
-mad.f32 r0.z, c11.z, r0.x, r3.x
-mad.f32 r0.w, c11.y, r0.x, r4.x
-mad.f32 r2.y, c11.x, r0.x, r4.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c16.y
-mul.f r2.y, r2.y, c16.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r3.y, r0.w
-mov.f32f32 r3.x, r2.y
-mad.f32 r0.y, c3.w, r0.x, r2.z
-mad.f32 r0.z, c3.z, r0.x, r6.z
-mad.f32 r2.y, c1.y, r5.w, r6.w
-mad.f32 r2.z, c1.x, r5.w, r7.x
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.y, c2.y, r2.x, r2.y
-mad.f32 r2.x, c2.x, r2.x, r2.z
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
mad.f32 r0.x, c3.x, r0.x, r2.x
-mov.f32f32 r2.x, r7.y
-mov.f32f32 r2.y, r7.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r2.x, r6.y
+mov.f32f32 r2.z, r6.y
mov.f32f32 r2.y, r6.x
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r2.y, r2.y
mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r6.x, (0.000000)
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r6.x
-mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 8 full
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-79.asm b/reference/0ad-alpine-valley/0ad-79.asm
index f56593b..57bb137 100644
--- a/reference/0ad-alpine-valley/0ad-79.asm
+++ b/reference/0ad-alpine-valley/0ad-79.asm
@@ -8,251 +8,183 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c14.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c15.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 8, r1.x
add.f r0.y, r0.w, c13.y
bary.f r0.w, 4, r1.x
bary.f r1.z, 9, r1.x
add.f r1.w, r0.x, c14.x
bary.f r2.x, 14, r1.x
-mul.f r2.y, r0.w, r0.w
-bary.f r2.z, 5, r1.x
+bary.f r2.y, 15, r1.x
+add.f r2.z, r1.z, c14.x
floor.f r2.w, r1.w
rcp r0.y, r0.y
add.f r0.z, r0.z, c13.y
-add.f r3.x, r1.z, c14.x
-mad.f32 r2.y, r2.z, r2.z, r2.y
+mul.f r3.x, r0.w, r0.w
+bary.f r3.y, 5, r1.x
add.f r1.w, r1.w, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-floor.f r0.z, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
+absneg.f r0.z, (neg)c9.x
+mad.f32 r2.w, r3.y, r3.y, r3.x
+mov.f32f32 r3.x, r1.w
+bary.f r3.z, 6, r1.x
+mul.f r0.z, r0.z, c9.x
+floor.f r3.w, r2.z
+mul.f r4.x, c13.x, r3.x
+mad.f32 r2.w, r3.z, r3.z, r2.w
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.w, (neg)c9.x
-bary.f r3.y, 6, r1.x
-mul.f r3.z, c13.x, r1.w
-add.f r0.z, r3.x, (neg)r0.z
-mul.f r2.w, r2.w, c9.x
-mad.f32 r2.y, r3.y, r3.y, r2.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.z, r0.z
-mul.f r2.w, r2.w, r0.y
-mov.f32f32 r2.x, r2.x
-add.f r0.x, r0.x, (neg)r3.x
-mul.f r3.x, c13.x, r0.z
-mov.f32f32 r2.w, r2.w
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mul.f r0.y, r2.w, r0.y
-mul.f r0.w, r0.w, r2.y
-add.f r2.w, c14.x, r0.x
-add.f r0.x, c14.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.w, r2.w
-bary.f r3.z, 16, r1.x
+add.f r0.x, r0.x, (neg)r4.x
+add.f r2.z, r2.z, (neg)r3.w
+add.f r3.x, c14.y, (neg)r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+rsq r2.w, r2.w
+(ss)mov.f32f32 r3.w, r2.w
+mov.f32f32 r4.x, r2.z
mul.f r0.y, r0.y, c15.x
-mov.f32f32 r0.x, r0.x
-mul.f r2.w, r2.w, c3.z
-mul.f r0.w, r0.w, r3.z
-mul.f r2.z, r2.z, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r2.w
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.z, r2.z
-bary.f r3.w, 17, r1.x
-mov.f32f32 r4.x, r3.z
-add.f r1.z, r1.z, (neg)r3.x
+add.f r0.z, c14.x, r0.z
+mul.f r0.w, r0.w, r3.w
+bary.f r4.y, 16, r1.x
+mul.f r4.z, c13.x, r4.x
+mul.f r4.w, r0.z, c3.z
+add.f r0.x, c14.z, r0.x
+mul.f r0.z, r0.w, r4.y
exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, r2.z, r3.w, r0.w
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r1.z, r1.z
-add.f r3.x, c16.y, (neg)r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.y, r3.y, r2.y
-add.f r3.y, c14.x, r1.z
-mul.f r3.x, r3.x, c9.y
-mul.f r0.y, r0.y, c13.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.y, r3.y
-bary.f r3.z, 18, r1.x
-add.f r0.y, r0.y, r3.x
-mov.f32f32 r2.z, r2.z
-mul.f r3.x, r3.y, c3.w
-mad.f32 r0.w, r2.y, r3.z, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r2.z
-mov.f32f32 r2.y, r3.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-add.f r1.z, c14.z, r1.z
-mov.f32f32 r4.y, r2.y
-bary.f r2.y, 10, r1.x
-max.f r0.y, r0.y, c13.y
-max.f r0.w, c13.y, r0.w
-mov.f32f32 r1.z, r1.z
-add.f r2.y, r2.y, c13.w
-min.f r0.y, r0.y, c13.z
-mov.f32f32 r0.w, r0.w
-mul.f r1.z, r1.z, c3.w
-mov.f32f32 r2.z, r2.y
-add.f r3.z, c16.y, (neg)r0.y
-add.f r3.w, c16.y, (neg)r0.y
-add.f r4.w, c16.y, (neg)r0.y
-mov.f32f32 r4.z, r2.z
-mul.f r2.z, r3.z, c8.z
-mul.f r5.x, r3.w, c8.y
-mul.f r4.w, r4.w, c8.x
-log2 r0.w, r0.w
-(ss)mul.f r0.w, c10.x, r0.w
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r0.x, r0.x
-sam.s (f32)(x)r3.w, r4.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.z, r3.z
-(ss)mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r3.w
-add.f r3.w, c14.y, (neg)r1.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r4.z, r2.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r5.y, r3.w
-add.f r3.w, c14.y, (neg)r0.z
-exp2 r0.w, r0.w
-mov.f32f32 r4.y, c7.y
-mov.f32f32 r5.z, c7.x
-mov.f32f32 r5.w, c7.z
-mov.f32f32 r6.x, r3.w
-mul.f r3.w, r4.y, c11.y
-mul.f r4.y, r5.z, c11.x
-mul.f r5.z, r5.w, c11.z
-mul.f r5.w, r5.y, r6.x
-(ss)mul.f r6.y, r3.w, r0.w
-mul.f r6.z, r4.y, r0.w
-(ss)mul.f r0.w, r5.z, r0.w
-mul.f r0.x, r5.w, r0.x
-mov.f32f32 r4.y, r3.x
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r5.z, r2.x
-bary.f r2.x, 15, r1.x
-mov.f32f32 r3.w, r4.z
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-add.f r1.w, r1.w, c13.z
-add.f r0.z, r0.z, c13.z
-sam.s (f32)(x)r3.x, r3.y, s#2, t#2
-(sy)mov.f32f32 r3.x, r3.x
-(ss)nop
-sam.s (f32)(x)r3.y, r4.x, s#2, t#2
-(sy)mov.f32f32 r3.y, r3.y
-mul.f r3.z, r1.w, r6.x
-mov.f32f32 r5.w, r2.x
-mov.f32f32 r3.w, r2.w
-(ss)mov.f32f32 r4.x, r1.z
-mad.f32 r0.x, r3.z, r3.y, r0.x
-mov.f32f32 r1.z, r2.y
-mul.f r2.x, r5.y, r0.z
-bary.f r2.y, 12, r1.x
-mov.f32f32 r0.x, r0.x
+(ss)mov.f32f32 r0.w, r0.y
+mov.f32f32 r5.z, r4.w
+add.f r1.z, r1.z, (neg)r4.z
+mul.f r3.y, r3.y, r3.w
+bary.f r3.w, 17, r1.x
+add.f r0.w, c16.y, (neg)r0.w
mov.f32f32 r4.y, r1.z
-sam (f32)(xyzw)r5.y, r5.z, s#0, t#0
-(sy)add.f r1.z, c16.y, (neg)r6.x
-add.f r2.w, c16.y, (neg)r6.x
-add.f r3.y, c16.y, (neg)r6.x
-mov.f32f32 r2.y, r2.y
-mul.f r1.z, r1.z, c4.x
-mul.f r0.z, r1.w, r0.z
-sam.s (f32)(x)r3.z, r3.w, s#2, t#2
-(sy)mov.f32f32 r1.w, r3.z
-mul.f r2.w, r2.w, c4.z
-mul.f r3.y, r3.y, c4.y
-mul.f r3.z, r6.x, c13.z
-mad.f32 r0.x, r2.x, r1.w, r0.x
-mul.f r1.w, r6.x, c13.z
-mul.f r2.x, r6.x, c13.z
-add.f r1.z, r3.z, r1.z
-mov.f32f32 r0.x, r0.x
-add.f r1.w, r1.w, r2.w
-mad.f32 r0.x, r0.z, r3.x, r0.x
-add.f r0.z, r2.x, r3.y
-mul.f r1.z, r5.y, r1.z
-mul.f r1.w, r5.w, r1.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.z, r5.z, r0.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r0.x, c14.w, r0.x
-bary.f r2.x, 2, r1.x
-mov.f32f32 r0.z, r0.z
-bary.f r2.w, 0, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.w, r1.w, r2.x, r0.w
-bary.f r2.x, 1, r1.x
-mad.f32 r2.w, r1.z, r2.w, r6.z
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, r0.z, r2.x, r6.y
-mov.f32f32 r2.y, r2.w
-bary.f (ei)r1.x, 13, r1.x
-mul.f r0.w, r0.w, r0.x
-mov.f32f32 r1.y, r2.x
-mul.f r2.x, r2.y, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r1.y, r0.x
-mad.f32 r0.w, c6.z, r1.w, r0.w
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r3.y, r1.x
-mov.f32f32 r1.x, c13.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.y, c6.x, r1.z, r1.y
-mad.f32 r0.x, c6.y, r0.z, r0.x
-mul.f r0.z, r0.y, r0.w
-sam (f32)(w)r1.z, r3.x, s#1, t#1
-(sy)cmps.f.lt r0.w, r2.y, c15.y
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r2.w, r1.x
-add.f r0.z, r0.z, r2.z
-cov.u32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r1.y, r1.z
-cmps.f.ne r0.w, r0.w, c13.y
-mov.f32f32 r1.z, c13.y
-mul.f r0.x, r0.y, r0.x
-mul.f r0.y, r0.y, r1.x
-nop
-sel.b32 r0.w, r1.z, r0.w, r1.y
-add.f r0.x, r0.x, r5.x
-add.f r0.y, r0.y, r4.w
+mul.f r6.y, r0.x, c3.z
+add.f r0.x, c14.z, r1.z
+mad.f32 r0.z, r3.y, r3.w, r0.z
+add.f r1.z, c14.x, r4.y
+mul.f r0.w, r0.w, c9.y
+(ss)mul.f r0.y, r0.y, c13.z
+mul.f r2.w, r3.z, r2.w
+mul.f r3.z, r1.z, c3.w
+bary.f r1.z, 18, r1.x
+add.f r0.y, r0.y, r0.w
+mov.f32f32 r3.y, r6.y
+mov.f32f32 r5.w, r3.z
+bary.f r0.w, 10, r1.x
+max.f r0.y, r0.y, c13.y
+mad.f32 r0.z, r2.w, r1.z, r0.z
+mul.f r6.z, r0.x, c3.w
+add.f r6.w, r0.w, c13.w
+min.f r0.x, r0.y, c13.z
+max.f r0.y, c13.y, r0.z
+mov.f32f32 r5.x, r6.z
+mov.f32f32 r6.x, r6.w
+add.f r0.z, c16.y, (neg)r0.x
+add.f r0.w, c16.y, (neg)r0.x
+add.f r1.z, c16.y, (neg)r0.x
+mov.f32f32 r3.w, r6.w
+mov.f32f32 r5.y, r6.w
+log2 r0.y, r0.y
+mul.f r0.z, r0.z, c8.z
+sam.s (f32)(x)r7.x, r5.z, s#2, t#2
+mov.f32f32 r2.w, r3.x
+add.f r4.x, c14.y, (neg)r4.x
+mul.f r0.w, r0.w, c8.y
+mul.f r1.z, r1.z, c8.x
+(ss)mul.f r0.y, c10.x, r0.y
+mov.f32f32 r4.y, r4.x
nop
-mul.f r0.z, r0.z, r0.w
-mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
+sam.s (f32)(x)r7.y, r3.y, s#2, t#2
+sam.s (f32)(x)r4.z, r4.w, s#2, t#2
nop
-mul.f r0.z, r0.z, c5.z
-mul.f r0.x, r0.x, c5.y
-mul.f r0.y, r0.y, c5.x
+(ss)nop
+sam.s (f32)(x)r4.w, r6.y, s#2, t#2
+add.f r1.w, r1.w, c13.z
+mul.f r2.w, r2.w, r4.y
+add.f r2.z, r2.z, c13.z
+sam (f32)(xyzw)r5.x, r2.x, s#0, t#0
+exp2 r0.y, r0.y
+(ss)mov.f32f32 r2.x, c7.y
+mov.f32f32 r2.y, c7.z
+(sy)mul.f r2.w, r2.w, r7.x
+mul.f r3.y, r1.w, r4.x
+mul.f r2.x, r2.x, c11.y
+mov.f32f32 r3.z, c7.x
+mul.f r2.y, r2.y, c11.z
+mad.f32 r2.w, r3.y, r7.y, r2.w
+mul.f r3.x, r3.x, r2.z
+(ss)mul.f r2.x, r2.x, r0.y
+mul.f r3.y, r3.z, c11.x
+mul.f r2.y, r2.y, r0.y
+mad.f32 r2.w, r3.x, r4.z, r2.w
+mul.f r1.w, r1.w, r2.z
+add.f r2.z, c16.y, (neg)r5.w
+mul.f r0.y, r3.y, r0.y
+add.f r3.x, c16.y, (neg)r5.w
+mad.f32 r1.w, r1.w, r4.w, r2.w
+mul.f r2.z, r2.z, c4.y
+add.f r2.w, c16.y, (neg)r5.w
+mul.f r3.x, r3.x, c4.z
+mul.f r1.w, c14.w, r1.w
+mul.f r3.y, r5.w, c13.z
+mul.f r3.z, r5.w, c13.z
+mul.f r2.w, r2.w, c4.x
+mov.f32f32 r3.w, r1.w
+add.f r3.x, r3.y, r3.x
+add.f r2.z, r3.z, r2.z
+mul.f r3.y, r5.w, c13.z
+bary.f r4.x, 12, r1.x
+mul.f r3.x, r5.z, r3.x
+mul.f r2.z, r5.y, r2.z
+add.f r2.w, r3.y, r2.w
+bary.f r4.y, 13, r1.x
+mov.f32f32 r3.y, r3.x
+bary.f r3.z, 2, r1.x
+mov.f32f32 r4.z, r2.z
+bary.f r4.w, 1, r1.x
+mul.f r5.x, r5.x, r2.w
+mad.f32 r2.y, r3.y, r3.z, r2.y
+sam (f32)(w)r5.y, r4.x, s#1, t#1
+(sy)cmps.f.lt r3.y, r6.x, c15.y
+mov.f32f32 r2.w, c13.z
+mov.f32f32 r3.z, c13.y
+mul.f r2.y, r2.y, r3.w
+mad.f32 r2.x, r4.z, r4.w, r2.x
+mad.f32 r2.y, c6.z, r3.x, r2.y
+mov.f32f32 r3.x, r5.x
+bary.f (ei)r1.x, 0, r1.x
+mul.f r1.y, r2.x, r3.w
+mul.f r2.x, r0.x, r2.y
+mad.f32 r1.y, c6.y, r2.z, r1.y
+mad.f32 r0.y, r3.x, r1.x, r0.y
+cov.u32f32 r1.x, r3.y
+add.f r0.z, r2.x, r0.z
+mul.f r1.y, r0.x, r1.y
+mul.f r0.y, r0.y, r1.w
+cmps.f.ne r1.x, r1.x, c13.y
+mad.f32 r0.y, c6.x, r5.x, r0.y
+add.f r0.w, r1.y, r0.w
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+sel.b32 r1.x, r3.z, r1.x, r6.x
+mul.f r0.x, r0.x, r0.y
+(rpt1)nop
+mul.f r0.y, r0.z, r1.x
+mul.f r0.z, r0.w, r1.x
+(rpt1)nop
+mul.f r2.z, r0.y, c5.z
+mul.f r2.y, r0.z, c5.y
+add.f r0.x, r0.x, r1.z
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r2.x, r0.y
-end
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
-; FRAG: 244 instructions, 0 half, 7 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
+; FRAG: 176 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-80.asm b/reference/0ad-alpine-valley/0ad-80.asm
index 67a2ad5..e7bcae3 100644
--- a/reference/0ad-alpine-valley/0ad-80.asm
+++ b/reference/0ad-alpine-valley/0ad-80.asm
@@ -32,159 +32,113 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c12.x, r0.x
mul.f r1.w, c12.x, r0.w
mad.f32 r1.z, c13.x, r0.y, r1.z
mad.f32 r1.w, c13.x, r1.x, r1.w
mad.f32 r1.z, c14.x, r0.z, r1.z
-mul.f r2.x, c12.z, r0.x
-mul.f r2.y, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r1.z, r1.z, c15.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, c13.z, r0.y, r2.x
-mad.f32 r2.y, c13.z, r1.x, r2.y
-add.f r2.z, c4.x, (neg)r1.z
mad.f32 r1.w, c14.x, r1.y, r1.w
-mul.f r2.w, c8.w, r1.z
-mul.f r3.x, c8.z, r1.z
-mul.f r3.y, r2.z, r2.z
-mul.f r3.z, c12.y, r0.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.z, c13.y, r0.y, r3.z
-absneg.f r3.w, (neg)c5.x
-mad.f32 r3.z, c14.y, r0.z, r3.z
-mul.f r4.x, c8.y, r1.z
-mul.f r4.y, c8.x, r1.z
-mul.f r5.x, r1.w, r3.w
-add.f r5.w, r3.z, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r1.z
-mul.f r3.z, c0.z, r1.z
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
add.f r5.y, c4.y, (neg)r5.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.w, c9.w, r5.w, r2.w
-mad.f32 r3.x, c9.z, r5.w, r3.x
-mad.f32 r3.y, r5.y, r5.y, r3.y
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.x, c14.z, r0.z, r2.x
-mad.f32 r4.x, c9.y, r5.w, r4.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.x, r0.w
-add.f r2.x, r2.x, c15.z
-absneg.f r0.w, (neg)c5.y
-mad.f32 r4.y, c9.x, r5.w, r4.y
-mad.f32 r1.x, c1.w, r5.w, r1.x
-add.f r5.z, c4.z, (neg)r2.x
-mad.f32 r5.x, r6.x, r0.w, r5.x
-mad.f32 r2.w, c10.w, r2.x, r2.w
-mad.f32 r3.x, c10.z, r2.x, r3.x
-mad.f32 r3.y, r5.z, r5.z, r3.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.y, r2.y
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.x, c10.y, r2.x, r4.x
-mad.f32 r4.y, c10.x, r2.x, r4.y
-mad.f32 r1.y, c14.z, r1.y, r2.y
-rsq r2.y, r3.y
-(ss)mov.f32f32 r2.y, r2.y
-(ss)absneg.f r3.y, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r6.y, r1.y
-mad.f32 r0.y, r2.z, r2.y, r3.w
-mad.f32 r0.w, r5.y, r2.y, r0.w
-mad.f32 r1.y, r5.z, r2.y, r3.y
-mad.f32 r2.y, r6.y, r3.y, r5.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, r0.y, r0.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r0.w, r0.w, r2.z
-max.f r2.y, c17.x, r2.y
-mad.f32 r2.z, c2.w, r2.x, r1.x
-mad.f32 r1.x, c1.z, r5.w, r3.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r0.z, r1.y, r1.y, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r6.z, c2.z, r2.x, r1.x
-mul.f r6.w, c0.y, r1.z
-mul.f r7.x, c0.x, r1.z
-mad.f32 r7.y, c7.x, r2.x, c7.y
-mad.f32 r7.z, c7.x, r1.z, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r1.x, r2.y, c6.z
-mul.f r3.y, r2.y, c6.y
-mul.f r2.y, r2.y, c6.x
-mul.f r1.y, r1.y, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.z, r1.x
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r5.x, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.y, r3.y
-mov.f32f32 r0.z, r2.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-mad.f32 r0.y, c11.w, r0.x, r2.w
-mad.f32 r0.z, c11.z, r0.x, r3.x
-mad.f32 r0.w, c11.y, r0.x, r4.x
-mad.f32 r2.y, c11.x, r0.x, r4.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c16.y
-mul.f r2.y, r2.y, c16.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r3.y, r0.w
-mov.f32f32 r3.x, r2.y
-mad.f32 r0.y, c3.w, r0.x, r2.z
-mad.f32 r0.z, c3.z, r0.x, r6.z
-mad.f32 r2.y, c1.y, r5.w, r6.w
-mad.f32 r2.z, c1.x, r5.w, r7.x
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.y, c2.y, r2.x, r2.y
-mad.f32 r2.x, c2.x, r2.x, r2.z
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
mad.f32 r0.x, c3.x, r0.x, r2.x
-mov.f32f32 r2.x, r7.y
-mov.f32f32 r2.y, r7.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r2.x, r6.y
+mov.f32f32 r2.z, r6.y
mov.f32f32 r2.y, r6.x
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r2.y, r2.y
mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r6.x, (0.000000)
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r6.x
-mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 8 full
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-81.asm b/reference/0ad-alpine-valley/0ad-81.asm
index 67a2ad5..e7bcae3 100644
--- a/reference/0ad-alpine-valley/0ad-81.asm
+++ b/reference/0ad-alpine-valley/0ad-81.asm
@@ -32,159 +32,113 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c12.x, r0.x
mul.f r1.w, c12.x, r0.w
mad.f32 r1.z, c13.x, r0.y, r1.z
mad.f32 r1.w, c13.x, r1.x, r1.w
mad.f32 r1.z, c14.x, r0.z, r1.z
-mul.f r2.x, c12.z, r0.x
-mul.f r2.y, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r1.z, r1.z, c15.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, c13.z, r0.y, r2.x
-mad.f32 r2.y, c13.z, r1.x, r2.y
-add.f r2.z, c4.x, (neg)r1.z
mad.f32 r1.w, c14.x, r1.y, r1.w
-mul.f r2.w, c8.w, r1.z
-mul.f r3.x, c8.z, r1.z
-mul.f r3.y, r2.z, r2.z
-mul.f r3.z, c12.y, r0.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.z, c13.y, r0.y, r3.z
-absneg.f r3.w, (neg)c5.x
-mad.f32 r3.z, c14.y, r0.z, r3.z
-mul.f r4.x, c8.y, r1.z
-mul.f r4.y, c8.x, r1.z
-mul.f r5.x, r1.w, r3.w
-add.f r5.w, r3.z, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r1.z
-mul.f r3.z, c0.z, r1.z
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
add.f r5.y, c4.y, (neg)r5.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.w, c9.w, r5.w, r2.w
-mad.f32 r3.x, c9.z, r5.w, r3.x
-mad.f32 r3.y, r5.y, r5.y, r3.y
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.x, c14.z, r0.z, r2.x
-mad.f32 r4.x, c9.y, r5.w, r4.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.x, r0.w
-add.f r2.x, r2.x, c15.z
-absneg.f r0.w, (neg)c5.y
-mad.f32 r4.y, c9.x, r5.w, r4.y
-mad.f32 r1.x, c1.w, r5.w, r1.x
-add.f r5.z, c4.z, (neg)r2.x
-mad.f32 r5.x, r6.x, r0.w, r5.x
-mad.f32 r2.w, c10.w, r2.x, r2.w
-mad.f32 r3.x, c10.z, r2.x, r3.x
-mad.f32 r3.y, r5.z, r5.z, r3.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.y, r2.y
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.x, c10.y, r2.x, r4.x
-mad.f32 r4.y, c10.x, r2.x, r4.y
-mad.f32 r1.y, c14.z, r1.y, r2.y
-rsq r2.y, r3.y
-(ss)mov.f32f32 r2.y, r2.y
-(ss)absneg.f r3.y, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r6.y, r1.y
-mad.f32 r0.y, r2.z, r2.y, r3.w
-mad.f32 r0.w, r5.y, r2.y, r0.w
-mad.f32 r1.y, r5.z, r2.y, r3.y
-mad.f32 r2.y, r6.y, r3.y, r5.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, r0.y, r0.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r0.w, r0.w, r2.z
-max.f r2.y, c17.x, r2.y
-mad.f32 r2.z, c2.w, r2.x, r1.x
-mad.f32 r1.x, c1.z, r5.w, r3.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r0.z, r1.y, r1.y, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r6.z, c2.z, r2.x, r1.x
-mul.f r6.w, c0.y, r1.z
-mul.f r7.x, c0.x, r1.z
-mad.f32 r7.y, c7.x, r2.x, c7.y
-mad.f32 r7.z, c7.x, r1.z, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r1.x, r2.y, c6.z
-mul.f r3.y, r2.y, c6.y
-mul.f r2.y, r2.y, c6.x
-mul.f r1.y, r1.y, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.z, r1.x
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r5.x, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.y, r3.y
-mov.f32f32 r0.z, r2.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-mad.f32 r0.y, c11.w, r0.x, r2.w
-mad.f32 r0.z, c11.z, r0.x, r3.x
-mad.f32 r0.w, c11.y, r0.x, r4.x
-mad.f32 r2.y, c11.x, r0.x, r4.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c16.y
-mul.f r2.y, r2.y, c16.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r3.y, r0.w
-mov.f32f32 r3.x, r2.y
-mad.f32 r0.y, c3.w, r0.x, r2.z
-mad.f32 r0.z, c3.z, r0.x, r6.z
-mad.f32 r2.y, c1.y, r5.w, r6.w
-mad.f32 r2.z, c1.x, r5.w, r7.x
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.y, c2.y, r2.x, r2.y
-mad.f32 r2.x, c2.x, r2.x, r2.z
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
mad.f32 r0.x, c3.x, r0.x, r2.x
-mov.f32f32 r2.x, r7.y
-mov.f32f32 r2.y, r7.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r2.x, r6.y
+mov.f32f32 r2.z, r6.y
mov.f32f32 r2.y, r6.x
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r2.y, r2.y
mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r6.x, (0.000000)
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r6.x
-mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 8 full
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-82.asm b/reference/0ad-alpine-valley/0ad-82.asm
index 52f5266..593f290 100644
--- a/reference/0ad-alpine-valley/0ad-82.asm
+++ b/reference/0ad-alpine-valley/0ad-82.asm
@@ -8,239 +8,163 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 8, r1.x
-bary.f r0.y, 4, r1.x
-add.f r0.w, r0.w, c12.y
+add.f r0.y, r0.w, c12.y
+bary.f r0.w, 4, r1.x
bary.f r1.z, 9, r1.x
add.f r1.w, r0.x, c13.x
-mul.f r2.x, r0.y, r0.y
-bary.f r2.y, 5, r1.x
-add.f r2.z, r1.z, c13.x
+bary.f r2.x, 12, r1.x
+mul.f r2.y, r0.w, r0.w
+bary.f r2.z, 5, r1.x
floor.f r2.w, r1.w
-rcp r0.w, r0.w
+rcp r0.y, r0.y
add.f r0.z, r0.z, c12.y
-mad.f32 r2.x, r2.y, r2.y, r2.x
-floor.f r3.x, r2.z
+add.f r3.x, r1.z, c13.x
+mad.f32 r2.y, r2.z, r2.z, r2.y
add.f r1.w, r1.w, (neg)r2.w
-(ss)mul.f r0.z, r0.z, r0.w
-(ss)mov.f32f32 r0.w, r2.x
-bary.f r2.x, 6, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.z, r0.z
-absneg.f r2.w, (neg)c8.x
-mad.f32 r0.w, r2.x, r2.x, r0.w
-mul.f r3.y, c12.x, r1.w
-add.f r2.z, r2.z, (neg)r3.x
-mul.f r2.w, r2.w, c8.x
-add.f r3.x, c13.y, (neg)r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mul.f r2.w, r2.w, r0.z
-rsq r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-add.f r0.x, r0.x, (neg)r3.y
-mul.f r3.y, c12.x, r2.z
-mov.f32f32 r2.w, r2.w
-mul.f r0.y, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.y, r3.y
-mul.f r0.z, r2.w, r0.z
+(ss)mul.f r0.y, r0.z, r0.y
+absneg.f r0.z, (neg)c8.x
+bary.f r2.w, 6, r1.x
+mov.f32f32 r3.y, r1.w
+floor.f r3.z, r3.x
+mul.f r0.z, r0.z, c8.x
+mad.f32 r2.y, r2.w, r2.w, r2.y
+mul.f r3.w, c12.x, r3.y
+add.f r3.x, r3.x, (neg)r3.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-add.f r2.w, c13.x, r0.x
-bary.f r3.z, 16, r1.x
-mov.f32f32 r0.z, r0.z
+add.f r0.x, r0.x, (neg)r3.w
+mov.f32f32 r3.z, r3.x
+rsq r2.y, r2.y
+(ss)mov.f32f32 r3.w, r2.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+mul.f r4.x, c12.x, r3.z
add.f r0.x, c13.z, r0.x
-mov.f32f32 r2.w, r2.w
-mul.f r0.y, r0.y, r3.z
-mul.f r2.y, r2.y, r0.w
-mul.f r0.z, r0.z, c14.x
-mul.f r2.w, r2.w, c3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r2.w
-bary.f r3.w, 17, r1.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.x, r3.z
-add.f r1.z, r1.z, (neg)r3.y
-mad.f32 r0.y, r2.y, r3.w, r0.y
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r2.x, r0.w
-add.f r2.x, c15.y, (neg)r0.z
-add.f r3.y, c13.x, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.x, r2.x, c8.y
-mov.f32f32 r3.y, r3.y
-bary.f r3.z, 18, r1.x
-mul.f r0.z, r0.z, c12.z
-mov.f32f32 r4.w, r2.y
-mul.f r2.y, r3.y, c3.w
-mad.f32 r0.y, r0.w, r3.z, r0.y
-add.f r0.z, r0.z, r2.x
-add.f r0.w, c13.z, r1.z
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.y, r1.z
-bary.f r1.z, 10, r1.x
-max.f r0.y, c12.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c3.w
-add.f r1.z, r1.z, c12.w
-mov.f32f32 r0.y, r0.y
-max.f r0.z, r0.z, c12.y
-mov.f32f32 r2.x, r0.w
-mov.f32f32 r3.y, r1.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.z, r3.y
-log2 r0.y, r0.y
-(ss)mul.f r0.y, c9.x, r0.y
-min.f r0.z, r0.z, c12.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r5.w, r0.w
-mov.f32f32 r0.x, r2.y
-sam.s (f32)(x)r3.z, r4.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r3.z
-mov.f32f32 r0.y, r0.y
-add.f r2.y, c15.y, (neg)r0.z
-add.f r2.w, c15.y, (neg)r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.x, r3.x
-add.f r3.z, c13.y, (neg)r2.z
-mul.f r2.y, r2.y, c7.z
-(ss)mul.f r4.x, r2.w, c7.y
-add.f r2.w, c15.y, (neg)r0.z
-mov.f32f32 r4.y, r3.z
+mul.f r0.w, r0.w, r3.w
+add.f r0.z, c13.x, r0.z
+mul.f r0.y, r0.y, c14.x
+bary.f r4.y, 16, r1.x
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r4.z, r0.z, c3.z
+mul.f r5.y, r0.x, c3.z
+mul.f r0.x, r0.w, r4.y
+add.f r0.z, c13.z, r1.z
+mov.f32f32 r6.x, r4.z
+mov.f32f32 r0.w, r1.z
exp2 r0.y, r0.y
-mov.f32f32 r3.z, c6.y
-mov.f32f32 r3.w, c6.x
-mov.f32f32 r4.z, c6.z
-mul.f r5.x, r3.x, r4.y
-mul.f r5.y, r3.z, c10.y
-mul.f r3.w, r3.w, c10.x
-mul.f r6.y, r2.w, c7.x
-mul.f r0.w, r5.x, r0.w
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r0.x, r1.z
-mul.f r2.w, r4.z, c10.z
-(ss)mul.f r4.z, r5.y, r0.y
-mul.f r5.x, r3.w, r0.y
-mov.f32f32 r3.w, r0.x
-mul.f r0.x, r2.w, r0.y
-(ss)mov.f32f32 r0.y, r4.z
-mov.f32f32 r2.w, r5.x
-bary.f r4.z, 14, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r2.x
-sam.s (f32)(x)r6.z, r3.y, s#2, t#2
-(sy)mov.f32f32 r2.x, r6.z
-add.f r1.w, r1.w, c12.z
-(ss)mov.f32f32 r3.y, r4.z
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r1.z, r1.z
-mul.f r3.w, r1.w, r4.y
-mov.f32f32 r4.y, r3.y
-bary.f r3.y, 15, r1.x
-mov.f32f32 r5.y, r3.z
-mad.f32 r0.w, r3.w, r2.x, r0.w
-mov.f32f32 r6.x, r1.z
-add.f r1.z, r2.z, c12.z
-bary.f r2.x, 12, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r3.y
-sam.s (f32)(x)r4.z, r4.w, s#2, t#2
-(sy)mov.f32f32 r3.y, r4.z
-mul.f r3.x, r3.x, r1.z
-sam.s (f32)(x)r4.z, r5.z, s#2, t#2
-(sy)mov.f32f32 r3.z, r4.z
-mov.f32f32 r4.z, r2.z
-mov.f32f32 r2.x, r2.x
-mul.f r1.z, r1.w, r1.z
-mad.f32 r0.w, r3.x, r3.z, r0.w
-bary.f r1.w, 13, r1.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, c12.z
-mov.f32f32 r0.w, r0.w
-(ss)nop
-sam (f32)(xyz)r4.y, r4.y, s#0, t#0
-bary.f r2.z, 2, r1.x
-mad.f32 r0.w, r1.z, r3.y, r0.w
-bary.f r1.z, 1, r1.x
-bary.f (ei)r1.x, 0, r1.x
-(sy)mad.f32 r0.x, r4.w, r2.z, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, r4.z, r1.z, r0.y
-mad.f32 r1.x, r4.y, r1.x, r2.w
+(ss)mov.f32f32 r1.z, r0.y
+mul.f r2.z, r2.z, r3.w
+bary.f r3.w, 17, r1.x
+add.f r0.w, c13.x, r0.w
+add.f r1.z, c15.y, (neg)r1.z
+mov.f32f32 r6.w, r5.y
+mul.f r5.z, r0.z, c3.w
+mul.f r7.x, r0.w, c3.w
+mul.f r0.z, r1.z, c8.y
+(ss)mul.f r0.y, r0.y, c12.z
+mad.f32 r0.x, r2.z, r3.w, r0.x
+mov.f32f32 r6.y, r7.x
+bary.f r0.w, 10, r1.x
+add.f r0.y, r0.y, r0.z
+mul.f r0.z, r2.w, r2.y
+bary.f r1.z, 18, r1.x
+add.f r5.w, r0.w, c12.w
+max.f r0.y, r0.y, c12.y
+mov.f32f32 r4.w, r5.z
+mad.f32 r0.x, r0.z, r1.z, r0.x
+mov.f32f32 r6.z, r5.w
+min.f r0.y, r0.y, c12.z
+mov.f32f32 r7.y, r5.w
+mov.f32f32 r5.x, r5.w
+max.f r0.x, c12.y, r0.x
+nop
+add.f r0.z, c15.y, (neg)r0.y
+sam.s (f32)(x)r7.z, r6.x, s#2, t#2
+add.f r0.w, c13.y, (neg)r3.y
+add.f r1.z, c15.y, (neg)r0.y
+add.f r2.y, c15.y, (neg)r0.y
+mul.f r0.z, r0.z, c7.z
+mov.f32f32 r2.z, r0.w
+add.f r2.w, c13.y, (neg)r3.z
+mul.f r1.z, r1.z, c7.y
+mul.f r3.y, r2.y, c7.x
+log2 r0.x, r0.x
+(ss)mul.f r0.x, c9.x, r0.x
+mov.f32f32 r2.y, r2.w
+sam.s (f32)(x)r3.z, r6.w, s#2, t#2
nop
+sam.s (f32)(x)r3.w, r4.z, s#2, t#2
+sam.s (f32)(x)r4.x, r5.y, s#2, t#2
+add.f r1.w, r1.w, c12.z
+add.f r3.x, r3.x, c12.z
+mul.f r2.z, r2.z, r2.y
+bary.f r2.y, 13, r1.x
+mov.f32f32 r4.y, c6.z
+(ss)bary.f r4.z, 14, r1.x
+(sy)mul.f r2.z, r2.z, r7.z
+mul.f r2.w, r1.w, r2.w
+exp2 r0.x, r0.x
+mov.f32f32 r4.w, c6.y
+mov.f32f32 r5.x, c6.x
+mul.f r4.y, r4.y, c10.z
+mad.f32 r2.z, r2.w, r3.z, r2.z
+mul.f r0.w, r0.w, r3.x
+mul.f r2.w, r4.w, c10.y
+mul.f r3.z, r5.x, c10.x
+(ss)mul.f r4.y, r4.y, r0.x
+mad.f32 r0.w, r0.w, r3.w, r2.z
+mul.f r1.w, r1.w, r3.x
+bary.f r4.w, 15, r1.x
+mul.f r2.z, r2.w, r0.x
+(ss)mul.f r0.x, r3.z, r0.x
+mad.f32 r0.w, r1.w, r4.x, r0.w
+sam (f32)(w)r5.x, r2.x, s#1, t#1
+(sy)cmps.f.lt r1.w, r5.w, c14.y
+mov.f32f32 r2.w, c12.z
+(ss)bary.f r2.x, 2, r1.x
mul.f r0.w, c13.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r2.w, r2.x
-mov.f32f32 r1.z, c12.y
+sam (f32)(xyz)r4.z, r4.z, s#0, t#0
+bary.f r2.y, 0, r1.x
+bary.f (ei)r1.x, 1, r1.x
+cov.u32f32 r1.y, r1.w
+mov.f32f32 r1.w, r0.w
+(sy)mad.f32 r2.x, r5.x, r2.x, r4.y
+mad.f32 r1.x, r4.w, r1.x, r2.z
+mad.f32 r0.x, r4.z, r2.y, r0.x
+cmps.f.ne r1.y, r1.y, c12.y
+mul.f r2.x, r2.x, r1.w
+mul.f r1.x, r1.x, r1.w
+mad.f32 r1.w, c5.z, r5.x, r2.x
+mad.f32 r1.x, c5.y, r4.w, r1.x
mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.w, r1.x, r0.w
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c5.z, r4.w, r0.x
-mad.f32 r0.y, c5.y, r4.z, r0.y
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r0.w, c12.y
+mul.f r1.w, r0.y, r1.w
+mul.f r1.x, r0.y, r1.x
+mad.f32 r0.x, c5.x, r4.z, r0.x
+sel.b32 r0.w, r0.w, r1.y, r5.w
+add.f r0.z, r1.w, r0.z
+add.f r1.x, r1.x, r1.z
+mul.f r0.x, r0.y, r0.x
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, c5.x, r4.y, r0.w
-sam (f32)(w)r3.x, r3.z, s#1, t#1
-(sy)cmps.f.lt r1.x, r3.w, c14.y
-mul.f r0.x, r0.z, r0.x
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r0.w, r0.w
-cov.u32f32 r1.x, r1.x
-add.f r0.x, r0.x, r2.y
-add.f r0.y, r0.y, r4.x
-mul.f r0.z, r0.z, r0.w
-cmps.f.ne r0.w, r1.x, c12.y
-mov.f32f32 r1.x, r3.w
-(rpt2)nop
-mov.f32f32 r1.x, r1.x
-add.f r0.z, r0.z, r6.y
+mul.f r0.y, r0.z, r0.w
+mul.f r0.z, r1.x, r0.w
(rpt1)nop
-sel.b32 r0.w, r1.z, r0.w, r1.x
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.z, c4.y
+add.f r0.x, r0.x, r3.y
(rpt2)nop
mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.z, r0.z, r0.w
-nop
-mul.f r0.x, r0.x, c4.z
-mul.f r0.y, r0.y, c4.y
-mul.f r0.z, r0.z, c4.x
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.z
-end
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
-; FRAG: 237 instructions, 0 half, 7 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
+; FRAG: 155 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-83.asm b/reference/0ad-alpine-valley/0ad-83.asm
index 67a2ad5..e7bcae3 100644
--- a/reference/0ad-alpine-valley/0ad-83.asm
+++ b/reference/0ad-alpine-valley/0ad-83.asm
@@ -32,159 +32,113 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c12.x, r0.x
mul.f r1.w, c12.x, r0.w
mad.f32 r1.z, c13.x, r0.y, r1.z
mad.f32 r1.w, c13.x, r1.x, r1.w
mad.f32 r1.z, c14.x, r0.z, r1.z
-mul.f r2.x, c12.z, r0.x
-mul.f r2.y, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r1.z, r1.z, c15.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, c13.z, r0.y, r2.x
-mad.f32 r2.y, c13.z, r1.x, r2.y
-add.f r2.z, c4.x, (neg)r1.z
mad.f32 r1.w, c14.x, r1.y, r1.w
-mul.f r2.w, c8.w, r1.z
-mul.f r3.x, c8.z, r1.z
-mul.f r3.y, r2.z, r2.z
-mul.f r3.z, c12.y, r0.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.z, c13.y, r0.y, r3.z
-absneg.f r3.w, (neg)c5.x
-mad.f32 r3.z, c14.y, r0.z, r3.z
-mul.f r4.x, c8.y, r1.z
-mul.f r4.y, c8.x, r1.z
-mul.f r5.x, r1.w, r3.w
-add.f r5.w, r3.z, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r1.z
-mul.f r3.z, c0.z, r1.z
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
add.f r5.y, c4.y, (neg)r5.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.w, c9.w, r5.w, r2.w
-mad.f32 r3.x, c9.z, r5.w, r3.x
-mad.f32 r3.y, r5.y, r5.y, r3.y
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.x, c14.z, r0.z, r2.x
-mad.f32 r4.x, c9.y, r5.w, r4.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.x, r0.w
-add.f r2.x, r2.x, c15.z
-absneg.f r0.w, (neg)c5.y
-mad.f32 r4.y, c9.x, r5.w, r4.y
-mad.f32 r1.x, c1.w, r5.w, r1.x
-add.f r5.z, c4.z, (neg)r2.x
-mad.f32 r5.x, r6.x, r0.w, r5.x
-mad.f32 r2.w, c10.w, r2.x, r2.w
-mad.f32 r3.x, c10.z, r2.x, r3.x
-mad.f32 r3.y, r5.z, r5.z, r3.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.y, r2.y
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.x, c10.y, r2.x, r4.x
-mad.f32 r4.y, c10.x, r2.x, r4.y
-mad.f32 r1.y, c14.z, r1.y, r2.y
-rsq r2.y, r3.y
-(ss)mov.f32f32 r2.y, r2.y
-(ss)absneg.f r3.y, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r6.y, r1.y
-mad.f32 r0.y, r2.z, r2.y, r3.w
-mad.f32 r0.w, r5.y, r2.y, r0.w
-mad.f32 r1.y, r5.z, r2.y, r3.y
-mad.f32 r2.y, r6.y, r3.y, r5.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, r0.y, r0.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r0.w, r0.w, r2.z
-max.f r2.y, c17.x, r2.y
-mad.f32 r2.z, c2.w, r2.x, r1.x
-mad.f32 r1.x, c1.z, r5.w, r3.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r0.z, r1.y, r1.y, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r6.z, c2.z, r2.x, r1.x
-mul.f r6.w, c0.y, r1.z
-mul.f r7.x, c0.x, r1.z
-mad.f32 r7.y, c7.x, r2.x, c7.y
-mad.f32 r7.z, c7.x, r1.z, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r1.x, r2.y, c6.z
-mul.f r3.y, r2.y, c6.y
-mul.f r2.y, r2.y, c6.x
-mul.f r1.y, r1.y, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.z, r1.x
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r5.x, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.y, r3.y
-mov.f32f32 r0.z, r2.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-mad.f32 r0.y, c11.w, r0.x, r2.w
-mad.f32 r0.z, c11.z, r0.x, r3.x
-mad.f32 r0.w, c11.y, r0.x, r4.x
-mad.f32 r2.y, c11.x, r0.x, r4.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c16.y
-mul.f r2.y, r2.y, c16.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r3.y, r0.w
-mov.f32f32 r3.x, r2.y
-mad.f32 r0.y, c3.w, r0.x, r2.z
-mad.f32 r0.z, c3.z, r0.x, r6.z
-mad.f32 r2.y, c1.y, r5.w, r6.w
-mad.f32 r2.z, c1.x, r5.w, r7.x
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.y, c2.y, r2.x, r2.y
-mad.f32 r2.x, c2.x, r2.x, r2.z
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
mad.f32 r0.x, c3.x, r0.x, r2.x
-mov.f32f32 r2.x, r7.y
-mov.f32f32 r2.y, r7.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r2.x, r6.y
+mov.f32f32 r2.z, r6.y
mov.f32f32 r2.y, r6.x
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r2.y, r2.y
mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r6.x, (0.000000)
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r6.x
-mov.f32f32 r1.w, (0.000000)
-(rpt2)nop
-mov.f32f32 r1.w, r1.w
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 8 full
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-84.asm b/reference/0ad-alpine-valley/0ad-84.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-84.asm
+++ b/reference/0ad-alpine-valley/0ad-84.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-85.asm b/reference/0ad-alpine-valley/0ad-85.asm
index 1d73264..f1c05da 100644
--- a/reference/0ad-alpine-valley/0ad-85.asm
+++ b/reference/0ad-alpine-valley/0ad-85.asm
@@ -8,215 +8,155 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c10.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c11.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c11.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c11.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 8, r1.x
+floor.f r3.x, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c10.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.y, r2.y
+bary.f r2.w, 9, r1.x
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c8.x
+add.f r2.y, r2.y, (neg)r3.y
+mov.f32f32 r3.x, r2.x
+sam (f32)(xyzw)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, c13.y, (neg)r4.x
+mul.f r0.z, r0.z, c8.x
+mov.f32f32 r1.w, r2.y
+mul.f r4.y, c10.x, r3.x
+add.f r3.x, c11.y, (neg)r3.x
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c10.x, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, r2.y, c8.x
-add.f r2.w, c11.y, (neg)r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c10.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r1.w
-mul.f r0.y, r2.y, r0.y
-sam (f32)(xyzw)r2.w, r3.x, s#0, t#0
-(sy)add.f r1.w, c13.y, (neg)r3.z
-add.f r2.y, c11.x, r0.x
+add.f r0.x, r0.x, (neg)r4.y
+mul.f r4.y, c10.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r4.y
add.f r0.x, c11.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c12.x
-add.f r3.w, c11.z, r0.w
-mul.f r2.y, r2.y, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.x
-add.f r0.w, c11.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c13.y, (neg)r0.y
-mov.f32f32 r6.x, r4.x
-mul.f r3.w, r3.w, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c8.y
-mul.f r0.y, r0.y, c10.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.w, r2.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r4.w, r4.y
-bary.f r3.w, 6, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r5.z, r0.x
-add.f r0.x, r3.w, c10.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r5.x, r0.w
-max.f r0.y, r0.y, c10.y
-mov.f32f32 r6.z, r2.y
-mov.f32f32 r5.w, r3.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r1.w, c4.z
-add.f r1.w, c13.y, (neg)r3.z
-sam.s (f32)(x)r3.w, r4.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r3.w
-min.f r0.y, r0.y, c10.z
-sam.s (f32)(x)r3.w, r6.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-add.f r4.y, c11.y, (neg)r0.z
-(ss)add.f r4.z, c13.y, (neg)r0.y
-add.f r4.w, c13.y, (neg)r0.y
-add.f r5.x, c13.y, (neg)r0.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c7.z
-mul.f r4.w, r4.w, c7.y
-mul.f r5.x, r5.x, c7.x
-mul.f r5.y, r2.z, r4.y
-mov.f32f32 r7.y, r0.x
-mul.f r0.x, r3.z, c10.z
-mul.f r1.w, r1.w, c4.y
-mul.f r2.y, r5.y, r2.y
-add.f r1.z, r1.z, c10.z
-add.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.z, c10.z
-sam.s (f32)(x)r5.y, r6.w, s#2, t#2
-(sy)mov.f32f32 r5.y, r5.y
-mul.f r4.y, r1.z, r4.y
-add.f r5.z, c13.y, (neg)r3.z
-mul.f r0.x, r3.y, r0.x
-add.f r0.w, r0.w, r1.w
-mad.f32 r1.w, r4.y, r4.x, r2.y
-mul.f r2.y, r5.z, c4.x
-mov.f32f32 r0.x, r0.x
-bary.f r3.y, 2, r1.x
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r0.z, c10.z
-mul.f r0.w, r3.x, r0.w
-mul.f r3.x, r3.z, c10.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.z, r2.z, r0.z
-mul.f r3.y, r0.x, r3.y
-mov.f32f32 r0.w, r0.w
-add.f r2.y, r3.x, r2.y
-mad.f32 r1.w, r2.z, r5.y, r1.w
-bary.f r2.z, 1, r1.x
-mov.f32f32 r4.x, r2.x
-mul.f r2.x, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.z, r1.z, r0.z
-mul.f r1.z, r0.w, r2.z
-mov.f32f32 r2.x, r2.x
-nop
-mad.f32 r0.z, r0.z, r3.w, r1.w
-bary.f r1.w, 0, r1.x
-bary.f (ei)r1.x, 9, r1.x
-mov.f32f32 r1.y, c10.z
-mov.f32f32 r0.z, r0.z
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.w, r1.y
-mul.f r0.z, c11.w, r0.z
-mov.f32f32 r1.y, c10.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r1.x
+add.f r0.z, c11.x, r0.z
+mov.f32f32 r4.y, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c11.z, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c11.x, r4.y
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c13.y, (neg)r0.y
+add.f r5.y, r0.z, c10.w
+add.f r0.z, c11.y, (neg)r1.w
+mul.f r0.x, r0.x, c10.z
+add.f r0.w, c13.y, (neg)r4.x
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c8.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-mul.f r1.x, r3.y, r0.z
-mul.f r1.z, r1.z, r0.z
-mul.f r0.z, r1.w, r0.z
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.x, c6.z, r0.x, r1.x
-mad.f32 r0.w, c6.y, r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-sam (f32)(w)r3.x, r4.x, s#1, t#1
-(sy)cmps.f.lt r1.x, r3.w, c12.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c6.x, r2.x, r0.z
-cov.u32f32 r1.x, r1.x
-mul.f r0.x, r0.y, r0.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r1.x, r1.x, c10.y
-add.f r0.x, r0.x, r4.z
-mov.f32f32 r1.z, r3.w
-add.f r0.w, r0.w, r4.w
-mul.f r0.y, r0.y, r0.z
-nop
-mov.f32f32 r0.z, r1.z
-(rpt2)nop
-sel.b32 r0.z, r1.y, r1.x, r0.z
-add.f r0.y, r0.y, r5.x
-(rpt1)nop
-mul.f r0.x, r0.x, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
+mul.f r0.w, r0.w, c4.z
+add.f r1.w, c13.y, (neg)r4.x
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c10.y
+sam.s (f32)(x)r4.y, r5.z, s#2, t#2
+mul.f r4.z, r4.x, c10.z
+mul.f r1.w, r1.w, c4.y
+(sy)mul.f r0.y, r0.y, r7.x
+add.f r2.x, r2.x, c10.z
+min.f r0.x, r0.x, c10.z
+add.f r0.w, r4.z, r0.w
+mul.f r4.z, r4.x, c10.z
+mul.f r0.z, r2.x, r0.z
+(ss)add.f r4.w, c13.y, (neg)r0.x
+add.f r5.x, c13.y, (neg)r0.x
+add.f r5.y, c13.y, (neg)r0.x
+mad.f32 r0.y, r0.z, r7.y, r0.y
+add.f r0.z, r2.y, c10.z
+mul.f r2.y, r4.w, c7.z
+mul.f r4.w, r5.x, c7.y
+mul.f r5.x, r5.y, c7.x
+mul.f r3.x, r3.x, r0.z
+add.f r1.w, r4.z, r1.w
+mul.f r1.z, r1.z, c4.x
+mul.f r0.w, r3.w, r0.w
+mad.f32 r0.y, r3.x, r4.y, r0.y
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, r4.x, c10.z
+mov.f32f32 r3.x, r0.w
+bary.f r3.w, 2, r1.x
+mad.f32 r0.y, r0.z, r7.z, r0.y
+mul.f r0.z, r3.z, r1.w
+add.f r1.z, r2.x, r1.z
+mul.f r1.w, r3.x, r3.w
+mul.f r0.y, c11.w, r0.y
+mov.f32f32 r2.x, r0.z
+mul.f r1.z, r3.y, r1.z
+bary.f r3.x, 1, r1.x
+mov.f32f32 r3.y, r0.y
+sam (f32)(w)r3.z, r2.z, s#1, t#1
+(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y
+mov.f32f32 r3.z, r1.z
+mul.f r2.x, r2.x, r3.x
+mul.f r1.w, r1.w, r3.y
+bary.f (ei)r1.x, 0, r1.x
+mad.f32 r0.w, c6.z, r0.w, r1.w
+mul.f r1.y, r2.x, r3.y
+cov.u32f32 r1.w, r2.z
+mov.f32f32 r2.w, c10.z
+mul.f r0.w, r0.x, r0.w
+mad.f32 r0.z, c6.y, r0.z, r1.y
+mul.f r1.x, r3.z, r1.x
+cmps.f.ne r1.y, r1.w, c10.y
+add.f r0.w, r0.w, r2.y
+mov.f32f32 r1.w, c10.y
+mul.f r0.z, r0.x, r0.z
+mul.f r0.y, r1.x, r0.y
nop
-mul.f r0.x, r0.x, c5.z
-mul.f r0.z, r0.w, c5.y
-mul.f r0.y, r0.y, c5.x
+sel.b32 r1.x, r1.w, r1.y, r4.y
+add.f r0.z, r0.z, r4.w
+mad.f32 r0.y, c6.x, r1.z, r0.y
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+mul.f r0.w, r0.w, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.w, c5.z
+mul.f r2.y, r0.z, c5.y
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+add.f r0.x, r0.x, r5.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.y
-end
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 212 instructions, 0 half, 8 full
+; FRAG: 149 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-86.asm b/reference/0ad-alpine-valley/0ad-86.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-86.asm
+++ b/reference/0ad-alpine-valley/0ad-86.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-87.asm b/reference/0ad-alpine-valley/0ad-87.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-87.asm
+++ b/reference/0ad-alpine-valley/0ad-87.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-88.asm b/reference/0ad-alpine-valley/0ad-88.asm
index 4377ecf..227a081 100644
--- a/reference/0ad-alpine-valley/0ad-88.asm
+++ b/reference/0ad-alpine-valley/0ad-88.asm
@@ -8,199 +8,135 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 8, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 10, r1.x
-bary.f r2.y, 6, r1.x
-add.f r2.z, r0.w, c10.x
-floor.f r2.w, r1.w
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 9, r1.x
+add.f r2.y, r0.w, c10.x
+bary.f r2.z, 6, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
-mov.f32f32 r1.z, r1.z
-floor.f r3.x, r2.z
-add.f r1.w, r1.w, (neg)r2.w
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c9.w
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-mov.f32f32 r3.y, r1.z
-add.f r0.z, r2.z, (neg)r3.x
-mov.f32f32 r1.z, r1.w
+absneg.f r0.z, (neg)c7.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c9.z
+mul.f r0.z, r0.z, c7.x
+sam (f32)(w)r4.x, r1.z, s#1, t#1
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c9.x, r2.z
+add.f r2.z, c10.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r1.w, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-add.f r2.w, c10.y, (neg)r1.z
-mul.f r1.w, r1.w, c7.x
-bary.f r3.x, 9, r1.x
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c9.x, r0.z
-mul.f r1.w, r1.w, r0.y
-mov.f32f32 r2.w, r2.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r1.w, r1.w
-add.f r3.z, c10.y, (neg)r0.z
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r2.z
-mul.f r0.y, r1.w, r0.y
-mov.f32f32 r1.w, r3.z
-add.f r2.z, c10.x, r0.x
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c9.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c10.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c11.x
-add.f r3.z, c10.z, r0.w
-mul.f r2.z, r2.z, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r2.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-add.f r0.w, c10.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c12.y, (neg)r0.y
-mov.f32f32 r5.w, r3.w
-mul.f r3.z, r3.z, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c7.y
-mul.f r0.y, r0.y, c9.z
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.z, r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r4.z, r4.x
-add.f r2.y, r2.y, c9.w
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r3.w
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r4.w, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r5.z, r0.y
-sam.s (f32)(x)r3.z, r4.y, s#2, t#2
-(sy)mov.f32f32 r0.y, r3.z
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c3.z
+add.f r0.x, c10.z, r0.w
+mul.f r4.x, r0.z, c3.z
+add.f r0.z, c10.x, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c3.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c12.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#2, t#2
+add.f r0.z, c10.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#2, t#2
+mul.f r0.x, r0.x, c9.z
+add.f r0.w, r2.y, c9.z
+mul.f r0.y, r0.y, c7.y
+(ss)nop
+sam.s (f32)(x)r5.x, r5.w, s#2, t#2
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#2, t#2
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c11.y
+bary.f r2.x, 10, r1.x
+mul.f r0.y, r0.y, r5.x
max.f r0.x, r0.x, c9.y
-mov.f32f32 r7.x, r0.w
-mul.f r0.w, r2.w, r1.w
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
min.f r0.x, r0.x, c9.z
-sam.s (f32)(x)r3.z, r5.w, s#2, t#2
-nop
-(sy)mov.f32f32 r2.y, r3.z
-mul.f r0.y, r0.w, r0.y
-sam.s (f32)(x)r3.z, r5.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r3.z
-add.f r1.z, r1.z, c9.z
-add.f r2.z, c12.y, (neg)r0.x
-add.f r3.z, c12.y, (neg)r0.x
-add.f r3.w, c12.y, (neg)r0.x
-mul.f r1.w, r1.z, r1.w
-mul.f r2.z, r2.z, c6.z
-mul.f r4.x, r3.z, c6.y
-mul.f r3.w, r3.w, c6.x
-mad.f32 r0.y, r1.w, r0.w, r0.y
-(ss)nop
-sam.s (f32)(x)r4.y, r6.z, s#2, t#2
-add.f r0.z, r0.z, c9.z
-(sy)mov.f32f32 r0.w, r4.y
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r0.y, r0.y
-mul.f r2.w, r2.w, r0.z
-mul.f r0.z, r1.z, r0.z
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.z, r2.x
-mad.f32 r0.y, r2.w, r0.w, r0.y
-bary.f r0.w, 11, r1.x
-mov.f32f32 r1.w, c9.z
-bary.f r2.x, 2, r1.x
-mov.f32f32 r0.y, r0.y
-sam (f32)(w)r2.w, r3.y, s#1, t#1
-(sy)cmps.f.lt r2.w, r3.z, c11.y
-mad.f32 r0.y, r0.z, r2.y, r0.y
-mov.f32f32 r0.z, r3.z
-mov.f32f32 r3.x, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-cov.u32f32 r1.z, r2.w
-mov.f32f32 r0.z, r0.z
-(ss)mov.f32f32 r3.y, r0.w
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt1)nop
mul.f r0.y, c10.w, r0.y
-cmps.f.ne r0.w, r1.z, c9.y
+bary.f r2.y, 11, r1.x
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.z, c12.y, (neg)r0.x
+mov.f32f32 r1.w, r0.y
+add.f r2.z, c12.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, c9.y
-sam (f32)(xyz)r4.y, r3.x, s#0, t#0
-(sy)mul.f r2.x, r4.w, r2.x
+sam (f32)(xyz)r2.w, r2.x, s#0, t#0
+(ss)bary.f r2.x, 2, r1.x
bary.f r2.y, 1, r1.x
bary.f (ei)r1.x, 0, r1.x
-sel.b32 r0.z, r1.z, r0.w, r0.z
-mul.f r0.w, r2.x, r0.y
-mul.f r1.y, r4.z, r2.y
-mul.f r1.x, r4.y, r1.x
-mov.f32f32 r2.w, r1.w
-mov.f32f32 r0.w, r0.w
-mul.f r1.y, r1.y, r0.y
-mad.f32 r0.w, c5.z, r4.w, r0.w
+mul.f r0.w, r0.w, c6.z
+(sy)mul.f r1.y, r3.y, r2.x
+mul.f r2.x, r3.x, r2.y
+mul.f r1.x, r2.w, r1.x
+mul.f r1.z, r1.z, c6.y
+mul.f r1.y, r1.y, r1.w
+mul.f r1.w, r2.x, r1.w
+mad.f32 r1.y, c5.z, r3.y, r1.y
+mad.f32 r1.w, c5.y, r3.x, r1.w
mul.f r0.y, r1.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, c5.y, r4.z, r1.x
-mul.f r0.w, r0.x, r0.w
-mad.f32 r0.y, c5.x, r4.y, r0.y
-(rpt1)nop
-add.f r0.w, r0.w, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r0.w, r0.w, r0.z
-mul.f r1.x, r0.x, r1.x
+mul.f r1.x, r2.z, c6.x
+mul.f r1.y, r0.x, r1.y
+mul.f r1.w, r0.x, r1.w
+mad.f32 r0.y, c5.x, r2.w, r0.y
+mov.f32f32 r2.x, c9.y
+add.f r0.w, r1.y, r0.w
+add.f r1.y, r1.w, r1.z
mul.f r0.x, r0.x, r0.y
-nop
-mul.f r0.y, r0.w, c4.z
-add.f r0.w, r1.x, r4.x
-add.f r0.x, r0.x, r3.w
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r0.z
-mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r0.w, c4.y
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
+sel.b32 r0.y, r2.x, r0.z, r4.w
+mov.f32f32 r2.w, c9.z
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
+mul.f r0.z, r0.w, r0.y
+mul.f r0.w, r1.y, r0.y
(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+mul.f r2.z, r0.z, c4.z
+mul.f r2.y, r0.w, c4.y
+add.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
end
nop
nop
+nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1)
-; FRAG: 195 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.z (5:11,cm=f,il=16,b=1)
+; FRAG: 129 instructions, 0 half, 7 full
diff --git a/reference/0ad-alpine-valley/0ad-89.asm b/reference/0ad-alpine-valley/0ad-89.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-89.asm
+++ b/reference/0ad-alpine-valley/0ad-89.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-90.asm b/reference/0ad-alpine-valley/0ad-90.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-90.asm
+++ b/reference/0ad-alpine-valley/0ad-90.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-91.asm b/reference/0ad-alpine-valley/0ad-91.asm
index 1d73264..f1c05da 100644
--- a/reference/0ad-alpine-valley/0ad-91.asm
+++ b/reference/0ad-alpine-valley/0ad-91.asm
@@ -8,215 +8,155 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c10.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c11.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c11.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c11.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 8, r1.x
+floor.f r3.x, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c10.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.y, r2.y
+bary.f r2.w, 9, r1.x
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c8.x
+add.f r2.y, r2.y, (neg)r3.y
+mov.f32f32 r3.x, r2.x
+sam (f32)(xyzw)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, c13.y, (neg)r4.x
+mul.f r0.z, r0.z, c8.x
+mov.f32f32 r1.w, r2.y
+mul.f r4.y, c10.x, r3.x
+add.f r3.x, c11.y, (neg)r3.x
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c10.x, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, r2.y, c8.x
-add.f r2.w, c11.y, (neg)r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c10.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r1.w
-mul.f r0.y, r2.y, r0.y
-sam (f32)(xyzw)r2.w, r3.x, s#0, t#0
-(sy)add.f r1.w, c13.y, (neg)r3.z
-add.f r2.y, c11.x, r0.x
+add.f r0.x, r0.x, (neg)r4.y
+mul.f r4.y, c10.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r4.y
add.f r0.x, c11.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c12.x
-add.f r3.w, c11.z, r0.w
-mul.f r2.y, r2.y, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.x
-add.f r0.w, c11.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c13.y, (neg)r0.y
-mov.f32f32 r6.x, r4.x
-mul.f r3.w, r3.w, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c8.y
-mul.f r0.y, r0.y, c10.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.w, r2.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r4.w, r4.y
-bary.f r3.w, 6, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r5.z, r0.x
-add.f r0.x, r3.w, c10.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r5.x, r0.w
-max.f r0.y, r0.y, c10.y
-mov.f32f32 r6.z, r2.y
-mov.f32f32 r5.w, r3.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r1.w, c4.z
-add.f r1.w, c13.y, (neg)r3.z
-sam.s (f32)(x)r3.w, r4.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r3.w
-min.f r0.y, r0.y, c10.z
-sam.s (f32)(x)r3.w, r6.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-add.f r4.y, c11.y, (neg)r0.z
-(ss)add.f r4.z, c13.y, (neg)r0.y
-add.f r4.w, c13.y, (neg)r0.y
-add.f r5.x, c13.y, (neg)r0.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c7.z
-mul.f r4.w, r4.w, c7.y
-mul.f r5.x, r5.x, c7.x
-mul.f r5.y, r2.z, r4.y
-mov.f32f32 r7.y, r0.x
-mul.f r0.x, r3.z, c10.z
-mul.f r1.w, r1.w, c4.y
-mul.f r2.y, r5.y, r2.y
-add.f r1.z, r1.z, c10.z
-add.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.z, c10.z
-sam.s (f32)(x)r5.y, r6.w, s#2, t#2
-(sy)mov.f32f32 r5.y, r5.y
-mul.f r4.y, r1.z, r4.y
-add.f r5.z, c13.y, (neg)r3.z
-mul.f r0.x, r3.y, r0.x
-add.f r0.w, r0.w, r1.w
-mad.f32 r1.w, r4.y, r4.x, r2.y
-mul.f r2.y, r5.z, c4.x
-mov.f32f32 r0.x, r0.x
-bary.f r3.y, 2, r1.x
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r0.z, c10.z
-mul.f r0.w, r3.x, r0.w
-mul.f r3.x, r3.z, c10.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.z, r2.z, r0.z
-mul.f r3.y, r0.x, r3.y
-mov.f32f32 r0.w, r0.w
-add.f r2.y, r3.x, r2.y
-mad.f32 r1.w, r2.z, r5.y, r1.w
-bary.f r2.z, 1, r1.x
-mov.f32f32 r4.x, r2.x
-mul.f r2.x, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.z, r1.z, r0.z
-mul.f r1.z, r0.w, r2.z
-mov.f32f32 r2.x, r2.x
-nop
-mad.f32 r0.z, r0.z, r3.w, r1.w
-bary.f r1.w, 0, r1.x
-bary.f (ei)r1.x, 9, r1.x
-mov.f32f32 r1.y, c10.z
-mov.f32f32 r0.z, r0.z
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.w, r1.y
-mul.f r0.z, c11.w, r0.z
-mov.f32f32 r1.y, c10.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r1.x
+add.f r0.z, c11.x, r0.z
+mov.f32f32 r4.y, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c11.z, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c11.x, r4.y
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c13.y, (neg)r0.y
+add.f r5.y, r0.z, c10.w
+add.f r0.z, c11.y, (neg)r1.w
+mul.f r0.x, r0.x, c10.z
+add.f r0.w, c13.y, (neg)r4.x
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c8.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-mul.f r1.x, r3.y, r0.z
-mul.f r1.z, r1.z, r0.z
-mul.f r0.z, r1.w, r0.z
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.x, c6.z, r0.x, r1.x
-mad.f32 r0.w, c6.y, r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-sam (f32)(w)r3.x, r4.x, s#1, t#1
-(sy)cmps.f.lt r1.x, r3.w, c12.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c6.x, r2.x, r0.z
-cov.u32f32 r1.x, r1.x
-mul.f r0.x, r0.y, r0.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r1.x, r1.x, c10.y
-add.f r0.x, r0.x, r4.z
-mov.f32f32 r1.z, r3.w
-add.f r0.w, r0.w, r4.w
-mul.f r0.y, r0.y, r0.z
-nop
-mov.f32f32 r0.z, r1.z
-(rpt2)nop
-sel.b32 r0.z, r1.y, r1.x, r0.z
-add.f r0.y, r0.y, r5.x
-(rpt1)nop
-mul.f r0.x, r0.x, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
+mul.f r0.w, r0.w, c4.z
+add.f r1.w, c13.y, (neg)r4.x
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c10.y
+sam.s (f32)(x)r4.y, r5.z, s#2, t#2
+mul.f r4.z, r4.x, c10.z
+mul.f r1.w, r1.w, c4.y
+(sy)mul.f r0.y, r0.y, r7.x
+add.f r2.x, r2.x, c10.z
+min.f r0.x, r0.x, c10.z
+add.f r0.w, r4.z, r0.w
+mul.f r4.z, r4.x, c10.z
+mul.f r0.z, r2.x, r0.z
+(ss)add.f r4.w, c13.y, (neg)r0.x
+add.f r5.x, c13.y, (neg)r0.x
+add.f r5.y, c13.y, (neg)r0.x
+mad.f32 r0.y, r0.z, r7.y, r0.y
+add.f r0.z, r2.y, c10.z
+mul.f r2.y, r4.w, c7.z
+mul.f r4.w, r5.x, c7.y
+mul.f r5.x, r5.y, c7.x
+mul.f r3.x, r3.x, r0.z
+add.f r1.w, r4.z, r1.w
+mul.f r1.z, r1.z, c4.x
+mul.f r0.w, r3.w, r0.w
+mad.f32 r0.y, r3.x, r4.y, r0.y
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, r4.x, c10.z
+mov.f32f32 r3.x, r0.w
+bary.f r3.w, 2, r1.x
+mad.f32 r0.y, r0.z, r7.z, r0.y
+mul.f r0.z, r3.z, r1.w
+add.f r1.z, r2.x, r1.z
+mul.f r1.w, r3.x, r3.w
+mul.f r0.y, c11.w, r0.y
+mov.f32f32 r2.x, r0.z
+mul.f r1.z, r3.y, r1.z
+bary.f r3.x, 1, r1.x
+mov.f32f32 r3.y, r0.y
+sam (f32)(w)r3.z, r2.z, s#1, t#1
+(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y
+mov.f32f32 r3.z, r1.z
+mul.f r2.x, r2.x, r3.x
+mul.f r1.w, r1.w, r3.y
+bary.f (ei)r1.x, 0, r1.x
+mad.f32 r0.w, c6.z, r0.w, r1.w
+mul.f r1.y, r2.x, r3.y
+cov.u32f32 r1.w, r2.z
+mov.f32f32 r2.w, c10.z
+mul.f r0.w, r0.x, r0.w
+mad.f32 r0.z, c6.y, r0.z, r1.y
+mul.f r1.x, r3.z, r1.x
+cmps.f.ne r1.y, r1.w, c10.y
+add.f r0.w, r0.w, r2.y
+mov.f32f32 r1.w, c10.y
+mul.f r0.z, r0.x, r0.z
+mul.f r0.y, r1.x, r0.y
nop
-mul.f r0.x, r0.x, c5.z
-mul.f r0.z, r0.w, c5.y
-mul.f r0.y, r0.y, c5.x
+sel.b32 r1.x, r1.w, r1.y, r4.y
+add.f r0.z, r0.z, r4.w
+mad.f32 r0.y, c6.x, r1.z, r0.y
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+mul.f r0.w, r0.w, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.w, c5.z
+mul.f r2.y, r0.z, c5.y
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+add.f r0.x, r0.x, r5.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.y
-end
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 212 instructions, 0 half, 8 full
+; FRAG: 149 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-92.asm b/reference/0ad-alpine-valley/0ad-92.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-92.asm
+++ b/reference/0ad-alpine-valley/0ad-92.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-93.asm b/reference/0ad-alpine-valley/0ad-93.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-93.asm
+++ b/reference/0ad-alpine-valley/0ad-93.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-94.asm b/reference/0ad-alpine-valley/0ad-94.asm
index 12fbb01..5630fc2 100644
--- a/reference/0ad-alpine-valley/0ad-94.asm
+++ b/reference/0ad-alpine-valley/0ad-94.asm
@@ -8,203 +8,139 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f233333, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.y, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.w, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r4.x, c9.x, r0.z
-mov.f32f32 r3.z, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r4.x
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r4.x, r3.y, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.w, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.y, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r3.y
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.y, r3.z
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r3.y
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.y, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.y, r3.y
-bary.f r3.y, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.y, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.y, r2.w
-mov.f32f32 r5.z, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r6.y, r3.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r7.w, r5.x, s#2, t#2
-(sy)mov.f32f32 r1.w, r7.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r7.w, r6.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r7.w
-(ss)nop
-sam.s (f32)(x)r5.x, r5.w, s#2, t#2
-(sy)mov.f32f32 r2.w, r5.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.y, r3.w
-add.f r3.z, c12.y, (neg)r0.y
-add.f r3.w, c12.y, (neg)r0.y
-add.f r5.x, c12.y, (neg)r0.y
-mul.f r5.y, r2.z, r3.y
-mul.f r3.z, r3.z, c6.z
-mul.f r3.w, r3.w, c6.y
-mul.f r5.x, r5.x, c6.x
-mul.f r1.w, r5.y, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.w, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.y, r1.z, r3.y
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.y, r2.w, r1.w
-sam.s (f32)(x)r5.y, r7.y, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r5.y
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r5.y, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r0.z, r4.w
-mov.f32f32 r5.z, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.z, r1.x
-mul.f r0.w, r4.y, r0.w
-mov.f32f32 r2.w, r0.z
-mul.f r0.x, c10.w, r0.x
-mul.f r0.z, r4.x, r2.z
-sam (f32)(w)r1.y, r5.y, s#1, t#1
-nop
-(sy)cmps.f.lt r1.y, r2.x, c11.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r1.w, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r0.z, r0.x
+add.f r0.y, r0.z, r1.z
cov.u32f32 r0.z, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.z, r1.x
-mad.f32 r0.w, c5.y, r4.y, r0.w
-mov.f32f32 r0.x, r0.x
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
cmps.f.ne r0.z, r0.z, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r4.x, r0.x
-mov.f32f32 r1.y, r1.z
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.z, r1.w, r0.z, r1.y
-add.f r1.x, r1.x, r3.z
-add.f r0.w, r0.w, r3.w
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r0.z
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
mul.f r0.w, r0.w, r0.z
-add.f r0.x, r0.x, r5.x
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, c4.x
end
nop
nop
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 196 instructions, 0 half, 8 full
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-95.asm b/reference/0ad-alpine-valley/0ad-95.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-95.asm
+++ b/reference/0ad-alpine-valley/0ad-95.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-96.asm b/reference/0ad-alpine-valley/0ad-96.asm
index 36d09e4..63ee7bc 100644
--- a/reference/0ad-alpine-valley/0ad-96.asm
+++ b/reference/0ad-alpine-valley/0ad-96.asm
@@ -6,8 +6,8 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,223 +24,164 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)floor.f r1.z, c14.z
floor.f r1.w, c14.x
absneg.f r2.x, (abs)c17.x
absneg.f r2.y, (abs)c17.y
add.f r1.z, c14.z, (neg)r1.z
add.f r1.w, c14.x, (neg)r1.w
-mul.f r2.z, c11.x, r0.w
-add.f r2.x, r2.x, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.y, c12.x, r1.x, r2.z
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r2.z, c18.y
+mul.f r2.w, c11.x, r0.w
max.f r1.z, r1.z, c18.y
max.f r1.w, r1.w, c18.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, c16.x, r2.x
+add.f r2.x, r2.x, r2.y
+add.f r2.y, r2.z, c19.x
min.f r1.z, r1.z, c22.y
min.f r1.w, r1.w, c22.y
-mul.f r2.w, c11.z, r0.x
-mad.f32 r2.y, c13.x, r1.y, r2.y
+mul.f r2.z, c16.x, r2.x
+mul.f r3.x, c11.z, r0.x
max.f r1.z, r1.z, c18.x
max.f r1.w, r1.w, c18.x
-mad.f32 r2.w, c12.z, r0.y, r2.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.w, c13.z, r0.z, r2.w
-mul.f r3.x, c11.x, r0.x
+mul.f r3.y, c11.x, r0.x
+mad.f32 r3.x, c12.z, r0.y, r3.x
mul.f r1.z, c16.x, r1.z
-mad.f32 r3.x, c12.x, r0.y, r3.x
-add.f r2.w, r2.w, c14.z
-mad.f32 r3.x, c13.x, r0.z, r3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c18.w, r2.z, r2.w
-mul.f r2.y, r2.y, (neg)c4.x
-mul.f r3.y, c11.y, r0.w
+mad.f32 r3.y, c12.x, r0.y, r3.y
+mad.f32 r3.x, c13.z, r0.z, r3.x
+mad.f32 r3.y, c13.x, r0.z, r3.y
mad.f32 r1.z, c18.z, r1.z, c14.x
-add.f r3.x, r3.x, c14.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r1.w, c16.x, r1.w, r3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.y, c12.y, r1.x, r3.y
-mov.f32f32 r2.z, r2.z
+floor.f r4.x, r2.y
+mad.f32 r2.w, c12.x, r1.x, r2.w
+add.f r3.y, r3.y, c14.x
+add.f r1.z, r1.z, c19.x
+mad.f32 r1.w, c16.x, r1.w, r3.y
+add.f r3.x, r3.x, c14.z
+add.f r2.y, r2.y, (neg)r4.x
+floor.f r4.x, r1.z
add.f r1.w, r1.w, c19.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
+mad.f32 r2.z, c18.w, r2.z, r3.x
+mad.f32 r2.y, c19.y, r2.y, c19.z
+add.f r1.z, r1.z, (neg)r4.x
+floor.f r4.x, r1.w
add.f r2.z, r2.z, c19.x
-mad.f32 r3.y, c13.y, r1.y, r3.y
-add.f r1.z, r1.z, c19.x
-floor.f r3.z, r1.w
-floor.f r3.w, r2.z
-mov.f32f32 r3.y, r3.y
-floor.f r4.z, r1.z
-add.f r1.w, r1.w, (neg)r3.z
-add.f r2.z, r2.z, (neg)r3.w
-mad.f32 r2.y, (neg)c4.y, r3.y, r2.y
-add.f r1.z, r1.z, (neg)r4.z
-mad.f32 r1.w, c19.y, r1.w, c19.z
-mad.f32 r2.z, c19.y, r2.z, c19.z
-mov.f32f32 r2.y, r2.y
+absneg.f r2.y, (abs)r2.y
mad.f32 r1.z, c19.y, r1.z, c19.z
-absneg.f r1.w, (abs)r1.w
-absneg.f r2.z, (abs)r2.z
-mul.f r0.w, c11.z, r0.w
+add.f r4.x, r1.w, (neg)r4.x
+floor.f r4.y, r2.z
+mul.f r1.w, r2.y, r2.y
absneg.f r1.z, (abs)r1.z
-mul.f r3.y, c19.y, r1.w
-mul.f r3.z, c19.y, r2.z
-mul.f r1.w, r1.w, r1.w
-mul.f r3.w, c19.y, r1.z
-add.f r3.y, c19.w, (neg)r3.y
-add.f r3.z, c19.w, (neg)r3.z
-mul.f r2.z, r2.z, r2.z
-add.f r3.w, c19.w, (neg)r3.w
+mad.f32 r2.y, c19.y, r4.x, c19.z
+add.f r2.z, r2.z, (neg)r4.y
+mad.f32 r2.w, c13.x, r1.y, r2.w
+mul.f r4.x, c19.y, r1.z
+absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c19.y, r2.z, c19.z
mul.f r1.z, r1.z, r1.z
-mul.f r1.w, r1.w, r3.y
-mul.f r2.z, r2.z, r3.z
+add.f r4.x, c19.w, (neg)r4.x
+mul.f r4.y, c19.y, r2.y
+absneg.f r2.z, (abs)r2.z
+mul.f r2.y, r2.y, r2.y
+mul.f r1.z, r1.z, r4.x
+mul.f r4.x, r0.y, c21.x
+add.f r4.y, c19.w, (neg)r4.y
+mul.f r4.z, c19.y, r2.z
+mul.f r2.z, r2.z, r2.z
+max.f r4.x, r4.x, c18.y
+mul.f r2.y, r2.y, r4.y
+mul.f r4.y, r0.x, r0.z
+add.f r4.z, c19.w, (neg)r4.z
+min.f r4.x, r4.x, c22.y
+mul.f r4.w, r0.y, c20.x
+mul.f r2.w, r2.w, (neg)c4.x
+mul.f r5.x, c11.y, r0.w
+min.f r4.x, r4.x, c18.w
+mul.f r4.y, r4.y, r4.w
+mul.f r2.z, r2.z, r4.z
+mad.f32 r4.z, c12.y, r1.x, r5.x
+mul.f r1.z, r1.z, r4.x
+max.f r4.x, r4.y, c18.y
+mad.f32 r4.y, c13.y, r1.y, r4.z
+mov.f32f32 r2.x, r2.x
+mov.f32f32 r4.z, r1.z
+min.f r4.x, r4.x, c22.y
+mad.f32 r2.w, (neg)c4.y, r4.y, r2.w
+mul.f r0.w, c11.z, r0.w
+max.f r2.x, r2.x, c20.z
+min.f r4.x, r4.x, c20.y
mad.f32 r0.w, c12.z, r1.x, r0.w
-mul.f r1.x, r1.z, r3.w
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.z
-mul.f r2.z, r0.x, r0.z
-mov.f32f32 r1.x, r1.x
-mul.f r3.y, r0.y, c21.x
-mul.f r3.z, r0.y, c20.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.w, c18.y
-mov.f32f32 r3.y, r3.y
-mul.f r2.z, r2.z, r3.z
-mad.f32 r0.w, c13.z, r1.y, r0.w
-add.f r1.y, r3.w, c19.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-floor.f r3.z, r1.y
-max.f r3.y, r3.y, c18.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.w, (neg)c4.z, r0.w, r2.y
-add.f r1.y, r1.y, (neg)r3.z
-min.f r2.y, r3.y, c22.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.y, c19.y, r1.y, c19.z
-min.f r2.y, r2.y, c18.w
-max.f r2.z, r2.z, c18.y
-max.f r0.w, c18.y, r0.w
-absneg.f r1.y, (abs)r1.y
-mov.f32f32 r2.y, r2.y
-min.f r2.z, r2.z, c22.y
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, r1.y, r1.y
-mul.f r1.x, r1.x, r2.y
-min.f r1.y, r2.z, c20.y
-mul.f r2.y, r0.w, c5.z
-mul.f r2.z, r0.w, c5.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mul.f r0.w, r0.w, c5.x
-mul.f r3.z, r1.z, r1.y
-mul.f r3.w, r1.w, r1.y
-max.f r1.w, r2.x, c20.z
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r1.y, r2.z
-mad.f32 r2.y, c17.x, r1.x, r2.x
-mad.f32 r1.x, c17.y, r1.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r1.x
-min.f r2.z, r1.w, c20.w
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r1.w, r3.y
-mul.f r0.w, c11.y, r0.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.w, c12.y, r0.y, r0.w
+mul.f r1.x, c11.y, r0.x
mul.f r0.x, c11.w, r0.x
-mov.f32f32 r3.z, r4.y
-mad.f32 r3.x, r3.w, r2.z, r3.x
-mad.f32 r2.w, r3.w, r2.z, r2.w
-mad.f32 r0.w, c13.y, r0.z, r0.w
+mov.f32f32 r4.y, r4.x
+mul.f r2.z, r2.z, r4.x
+min.f r2.x, r2.x, c20.w
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, r2.y, r4.y
+mad.f32 r1.x, c12.y, r0.y, r1.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-add.f r0.y, r3.x, r2.x
-add.f r2.x, r2.w, r2.y
-add.f r0.w, r0.w, c14.y
-nop
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r3.y, r2.x
-mad.f32 r0.y, r3.w, r2.z, r0.w
+mad.f32 r0.y, c13.y, r0.z, r1.x
+mov.f32f32 r1.x, r1.y
+mad.f32 r1.y, c17.y, r1.z, r1.y
+mad.f32 r1.x, c17.x, r4.z, r1.x
+mov.f32f32 r1.z, r2.x
+mad.f32 r2.x, r2.z, r2.x, r3.x
+add.f r0.y, r0.y, c14.y
+mad.f32 r0.w, (neg)c4.z, r0.w, r2.w
+mad.f32 r2.y, r2.z, r1.z, r3.y
+add.f r1.y, r2.x, r1.y
+mad.f32 r0.y, r2.z, r1.z, r0.y
+max.f r4.x, c18.y, r0.w
+add.f r0.w, r2.y, r1.x
+mov.f32f32 r1.x, r1.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mul.f r0.z, c7.w, r3.x
-mul.f r0.w, c7.z, r3.x
-mad.f32 r0.z, c8.w, r0.y, r0.z
-mad.f32 r0.w, c8.z, r0.y, r0.w
-mul.f r2.x, c7.y, r3.x
-mul.f r2.y, c7.x, r3.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c9.w, r3.y, r0.z
-add.f r0.x, r0.x, c14.w
-mad.f32 r0.w, c9.z, r3.y, r0.w
-mad.f32 r2.x, c8.y, r0.y, r2.x
-mad.f32 r2.y, c8.x, r0.y, r2.y
-mad.f32 r0.z, c10.w, r0.x, r0.z
-mad.f32 r0.w, c10.z, r0.x, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c9.y, r3.y, r2.x
-mad.f32 r2.y, c9.x, r3.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c10.y, r0.x, r2.x
-mad.f32 r2.y, c10.x, r0.x, r2.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r2.z, r0.w
-mov.f32f32 r0.z, r2.x
-mov.f32f32 r0.w, r2.y
-mul.f r3.w, c0.w, r3.x
-mul.f r4.y, c0.z, r3.x
-mul.f r0.z, r0.z, c15.y
-mul.f r0.w, r0.w, c15.x
-(rpt1)nop
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.w
-mad.f32 r0.z, c1.w, r0.y, r3.w
-mad.f32 r0.w, c1.z, r0.y, r4.y
-mad.f32 r0.z, c2.w, r3.y, r0.z
-mad.f32 r0.w, c2.z, r3.y, r0.w
-mad.f32 r0.z, c3.w, r0.x, r0.z
-mad.f32 r3.w, c3.z, r0.x, r0.w
-mul.f r4.y, c0.y, r3.x
-mul.f r4.z, c0.x, r3.x
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.w
-mad.f32 r3.w, c1.y, r0.y, r4.y
-mad.f32 r0.y, c1.x, r0.y, r4.z
-mad.f32 r3.w, c2.y, r3.y, r3.w
-mad.f32 r0.y, c2.x, r3.y, r0.y
-mad.f32 r3.w, c3.y, r0.x, r3.w
-mad.f32 r0.x, c3.x, r0.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r3.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r3.y, c6.x, r3.y, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-(rpt1)nop
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r3.z, r4.x
-(rpt2)nop
-mov.f32f32 r3.z, r3.z
+nop
+mov.f32f32 r1.z, r0.w
+mul.f r3.x, c0.x, r0.w
+mad.f32 r3.y, c6.x, r1.x, c6.y
+mov.f32f32 r4.y, r4.x
+mul.f r0.z, c7.y, r1.z
+mul.f r0.w, c7.x, r1.z
+mad.f32 r0.z, c8.y, r0.y, r0.z
+mad.f32 r0.w, c8.x, r0.y, r0.w
+mad.f32 r0.z, c9.y, r1.x, r0.z
+add.f r4.z, r0.x, c14.w
+mad.f32 r0.x, c9.x, r1.x, r0.w
+mul.f r0.w, c7.w, r1.z
+mul.f r2.x, c7.z, r1.z
+mad.f32 r0.z, c10.y, r4.z, r0.z
+mad.f32 r0.x, c10.x, r4.z, r0.x
+mad.f32 r0.w, c8.w, r0.y, r0.w
+mad.f32 r2.z, c8.z, r0.y, r2.x
+mul.f r2.y, r0.z, c15.y
+mul.f r2.x, r0.x, c15.x
+mad.f32 r0.x, c9.w, r1.x, r0.w
+mad.f32 r0.z, c9.z, r1.x, r2.z
+mad.f32 r2.w, c10.w, r4.z, r0.x
+mad.f32 r2.z, c10.z, r4.z, r0.z
+mul.f r0.x, c0.w, r1.z
+mul.f r0.z, c0.z, r1.z
+mad.f32 r0.x, c1.w, r0.y, r0.x
+mad.f32 r0.z, c1.z, r0.y, r0.z
+mad.f32 r0.x, c2.w, r1.x, r0.x
+mad.f32 r0.z, c2.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r4.z, r0.x
+mad.f32 r0.z, c3.z, r4.z, r0.z
+mul.f r0.x, c0.y, r1.z
+mad.f32 r3.x, c1.x, r0.y, r3.x
+mad.f32 r0.x, c1.y, r0.y, r0.x
+mad.f32 r0.y, c2.x, r1.y, r3.x
+mad.f32 r1.x, c2.y, r1.x, r0.x
+mad.f32 r0.x, c3.x, r4.z, r0.y
+mad.f32 r0.y, c3.y, r4.z, r1.x
+mad.f32 r3.x, c6.x, r1.z, c6.y
+mul.f r1.z, r4.y, c5.z
+mul.f r1.y, r4.y, c5.y
+mul.f r1.x, r4.x, c5.x
end
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 221 instructions, 0 half, 5 full
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 152 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-97.asm b/reference/0ad-alpine-valley/0ad-97.asm
index 12fbb01..5630fc2 100644
--- a/reference/0ad-alpine-valley/0ad-97.asm
+++ b/reference/0ad-alpine-valley/0ad-97.asm
@@ -8,203 +8,139 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f233333, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.y, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.w, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r4.x, c9.x, r0.z
-mov.f32f32 r3.z, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r4.x
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r4.x, r3.y, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.w, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.y, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r3.y
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.y, r3.z
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r3.y
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.y, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.y, r3.y
-bary.f r3.y, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.y, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.y, r2.w
-mov.f32f32 r5.z, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r6.y, r3.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r7.w, r5.x, s#2, t#2
-(sy)mov.f32f32 r1.w, r7.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r7.w, r6.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r7.w
-(ss)nop
-sam.s (f32)(x)r5.x, r5.w, s#2, t#2
-(sy)mov.f32f32 r2.w, r5.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.y, r3.w
-add.f r3.z, c12.y, (neg)r0.y
-add.f r3.w, c12.y, (neg)r0.y
-add.f r5.x, c12.y, (neg)r0.y
-mul.f r5.y, r2.z, r3.y
-mul.f r3.z, r3.z, c6.z
-mul.f r3.w, r3.w, c6.y
-mul.f r5.x, r5.x, c6.x
-mul.f r1.w, r5.y, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.w, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.y, r1.z, r3.y
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.y, r2.w, r1.w
-sam.s (f32)(x)r5.y, r7.y, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r5.y
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r5.y, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r0.z, r4.w
-mov.f32f32 r5.z, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.z, r1.x
-mul.f r0.w, r4.y, r0.w
-mov.f32f32 r2.w, r0.z
-mul.f r0.x, c10.w, r0.x
-mul.f r0.z, r4.x, r2.z
-sam (f32)(w)r1.y, r5.y, s#1, t#1
-nop
-(sy)cmps.f.lt r1.y, r2.x, c11.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r1.w, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r0.z, r0.x
+add.f r0.y, r0.z, r1.z
cov.u32f32 r0.z, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.z, r1.x
-mad.f32 r0.w, c5.y, r4.y, r0.w
-mov.f32f32 r0.x, r0.x
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
cmps.f.ne r0.z, r0.z, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r4.x, r0.x
-mov.f32f32 r1.y, r1.z
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.z, r1.w, r0.z, r1.y
-add.f r1.x, r1.x, r3.z
-add.f r0.w, r0.w, r3.w
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r0.z
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
mul.f r0.w, r0.w, r0.z
-add.f r0.x, r0.x, r5.x
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, c4.x
end
nop
nop
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 196 instructions, 0 half, 8 full
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-alpine-valley/0ad-98.asm b/reference/0ad-alpine-valley/0ad-98.asm
index 36d09e4..63ee7bc 100644
--- a/reference/0ad-alpine-valley/0ad-98.asm
+++ b/reference/0ad-alpine-valley/0ad-98.asm
@@ -6,8 +6,8 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r4.x) in8
-@in(r4.y) in9
+@in(r3.z) in8
+@in(r3.w) in9
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,223 +24,164 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)floor.f r1.z, c14.z
floor.f r1.w, c14.x
absneg.f r2.x, (abs)c17.x
absneg.f r2.y, (abs)c17.y
add.f r1.z, c14.z, (neg)r1.z
add.f r1.w, c14.x, (neg)r1.w
-mul.f r2.z, c11.x, r0.w
-add.f r2.x, r2.x, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.y, c12.x, r1.x, r2.z
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r2.z, c18.y
+mul.f r2.w, c11.x, r0.w
max.f r1.z, r1.z, c18.y
max.f r1.w, r1.w, c18.y
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, c16.x, r2.x
+add.f r2.x, r2.x, r2.y
+add.f r2.y, r2.z, c19.x
min.f r1.z, r1.z, c22.y
min.f r1.w, r1.w, c22.y
-mul.f r2.w, c11.z, r0.x
-mad.f32 r2.y, c13.x, r1.y, r2.y
+mul.f r2.z, c16.x, r2.x
+mul.f r3.x, c11.z, r0.x
max.f r1.z, r1.z, c18.x
max.f r1.w, r1.w, c18.x
-mad.f32 r2.w, c12.z, r0.y, r2.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.w, c13.z, r0.z, r2.w
-mul.f r3.x, c11.x, r0.x
+mul.f r3.y, c11.x, r0.x
+mad.f32 r3.x, c12.z, r0.y, r3.x
mul.f r1.z, c16.x, r1.z
-mad.f32 r3.x, c12.x, r0.y, r3.x
-add.f r2.w, r2.w, c14.z
-mad.f32 r3.x, c13.x, r0.z, r3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c18.w, r2.z, r2.w
-mul.f r2.y, r2.y, (neg)c4.x
-mul.f r3.y, c11.y, r0.w
+mad.f32 r3.y, c12.x, r0.y, r3.y
+mad.f32 r3.x, c13.z, r0.z, r3.x
+mad.f32 r3.y, c13.x, r0.z, r3.y
mad.f32 r1.z, c18.z, r1.z, c14.x
-add.f r3.x, r3.x, c14.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r1.w, c16.x, r1.w, r3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.y, c12.y, r1.x, r3.y
-mov.f32f32 r2.z, r2.z
+floor.f r4.x, r2.y
+mad.f32 r2.w, c12.x, r1.x, r2.w
+add.f r3.y, r3.y, c14.x
+add.f r1.z, r1.z, c19.x
+mad.f32 r1.w, c16.x, r1.w, r3.y
+add.f r3.x, r3.x, c14.z
+add.f r2.y, r2.y, (neg)r4.x
+floor.f r4.x, r1.z
add.f r1.w, r1.w, c19.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
+mad.f32 r2.z, c18.w, r2.z, r3.x
+mad.f32 r2.y, c19.y, r2.y, c19.z
+add.f r1.z, r1.z, (neg)r4.x
+floor.f r4.x, r1.w
add.f r2.z, r2.z, c19.x
-mad.f32 r3.y, c13.y, r1.y, r3.y
-add.f r1.z, r1.z, c19.x
-floor.f r3.z, r1.w
-floor.f r3.w, r2.z
-mov.f32f32 r3.y, r3.y
-floor.f r4.z, r1.z
-add.f r1.w, r1.w, (neg)r3.z
-add.f r2.z, r2.z, (neg)r3.w
-mad.f32 r2.y, (neg)c4.y, r3.y, r2.y
-add.f r1.z, r1.z, (neg)r4.z
-mad.f32 r1.w, c19.y, r1.w, c19.z
-mad.f32 r2.z, c19.y, r2.z, c19.z
-mov.f32f32 r2.y, r2.y
+absneg.f r2.y, (abs)r2.y
mad.f32 r1.z, c19.y, r1.z, c19.z
-absneg.f r1.w, (abs)r1.w
-absneg.f r2.z, (abs)r2.z
-mul.f r0.w, c11.z, r0.w
+add.f r4.x, r1.w, (neg)r4.x
+floor.f r4.y, r2.z
+mul.f r1.w, r2.y, r2.y
absneg.f r1.z, (abs)r1.z
-mul.f r3.y, c19.y, r1.w
-mul.f r3.z, c19.y, r2.z
-mul.f r1.w, r1.w, r1.w
-mul.f r3.w, c19.y, r1.z
-add.f r3.y, c19.w, (neg)r3.y
-add.f r3.z, c19.w, (neg)r3.z
-mul.f r2.z, r2.z, r2.z
-add.f r3.w, c19.w, (neg)r3.w
+mad.f32 r2.y, c19.y, r4.x, c19.z
+add.f r2.z, r2.z, (neg)r4.y
+mad.f32 r2.w, c13.x, r1.y, r2.w
+mul.f r4.x, c19.y, r1.z
+absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c19.y, r2.z, c19.z
mul.f r1.z, r1.z, r1.z
-mul.f r1.w, r1.w, r3.y
-mul.f r2.z, r2.z, r3.z
+add.f r4.x, c19.w, (neg)r4.x
+mul.f r4.y, c19.y, r2.y
+absneg.f r2.z, (abs)r2.z
+mul.f r2.y, r2.y, r2.y
+mul.f r1.z, r1.z, r4.x
+mul.f r4.x, r0.y, c21.x
+add.f r4.y, c19.w, (neg)r4.y
+mul.f r4.z, c19.y, r2.z
+mul.f r2.z, r2.z, r2.z
+max.f r4.x, r4.x, c18.y
+mul.f r2.y, r2.y, r4.y
+mul.f r4.y, r0.x, r0.z
+add.f r4.z, c19.w, (neg)r4.z
+min.f r4.x, r4.x, c22.y
+mul.f r4.w, r0.y, c20.x
+mul.f r2.w, r2.w, (neg)c4.x
+mul.f r5.x, c11.y, r0.w
+min.f r4.x, r4.x, c18.w
+mul.f r4.y, r4.y, r4.w
+mul.f r2.z, r2.z, r4.z
+mad.f32 r4.z, c12.y, r1.x, r5.x
+mul.f r1.z, r1.z, r4.x
+max.f r4.x, r4.y, c18.y
+mad.f32 r4.y, c13.y, r1.y, r4.z
+mov.f32f32 r2.x, r2.x
+mov.f32f32 r4.z, r1.z
+min.f r4.x, r4.x, c22.y
+mad.f32 r2.w, (neg)c4.y, r4.y, r2.w
+mul.f r0.w, c11.z, r0.w
+max.f r2.x, r2.x, c20.z
+min.f r4.x, r4.x, c20.y
mad.f32 r0.w, c12.z, r1.x, r0.w
-mul.f r1.x, r1.z, r3.w
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.z
-mul.f r2.z, r0.x, r0.z
-mov.f32f32 r1.x, r1.x
-mul.f r3.y, r0.y, c21.x
-mul.f r3.z, r0.y, c20.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.w, c18.y
-mov.f32f32 r3.y, r3.y
-mul.f r2.z, r2.z, r3.z
-mad.f32 r0.w, c13.z, r1.y, r0.w
-add.f r1.y, r3.w, c19.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-floor.f r3.z, r1.y
-max.f r3.y, r3.y, c18.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.w, (neg)c4.z, r0.w, r2.y
-add.f r1.y, r1.y, (neg)r3.z
-min.f r2.y, r3.y, c22.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.y, c19.y, r1.y, c19.z
-min.f r2.y, r2.y, c18.w
-max.f r2.z, r2.z, c18.y
-max.f r0.w, c18.y, r0.w
-absneg.f r1.y, (abs)r1.y
-mov.f32f32 r2.y, r2.y
-min.f r2.z, r2.z, c22.y
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, r1.y, r1.y
-mul.f r1.x, r1.x, r2.y
-min.f r1.y, r2.z, c20.y
-mul.f r2.y, r0.w, c5.z
-mul.f r2.z, r0.w, c5.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mul.f r0.w, r0.w, c5.x
-mul.f r3.z, r1.z, r1.y
-mul.f r3.w, r1.w, r1.y
-max.f r1.w, r2.x, c20.z
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r1.y, r2.z
-mad.f32 r2.y, c17.x, r1.x, r2.x
-mad.f32 r1.x, c17.y, r1.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r1.x
-min.f r2.z, r1.w, c20.w
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r1.w, r3.y
-mul.f r0.w, c11.y, r0.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.w, c12.y, r0.y, r0.w
+mul.f r1.x, c11.y, r0.x
mul.f r0.x, c11.w, r0.x
-mov.f32f32 r3.z, r4.y
-mad.f32 r3.x, r3.w, r2.z, r3.x
-mad.f32 r2.w, r3.w, r2.z, r2.w
-mad.f32 r0.w, c13.y, r0.z, r0.w
+mov.f32f32 r4.y, r4.x
+mul.f r2.z, r2.z, r4.x
+min.f r2.x, r2.x, c20.w
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, r2.y, r4.y
+mad.f32 r1.x, c12.y, r0.y, r1.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-add.f r0.y, r3.x, r2.x
-add.f r2.x, r2.w, r2.y
-add.f r0.w, r0.w, c14.y
-nop
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r3.y, r2.x
-mad.f32 r0.y, r3.w, r2.z, r0.w
+mad.f32 r0.y, c13.y, r0.z, r1.x
+mov.f32f32 r1.x, r1.y
+mad.f32 r1.y, c17.y, r1.z, r1.y
+mad.f32 r1.x, c17.x, r4.z, r1.x
+mov.f32f32 r1.z, r2.x
+mad.f32 r2.x, r2.z, r2.x, r3.x
+add.f r0.y, r0.y, c14.y
+mad.f32 r0.w, (neg)c4.z, r0.w, r2.w
+mad.f32 r2.y, r2.z, r1.z, r3.y
+add.f r1.y, r2.x, r1.y
+mad.f32 r0.y, r2.z, r1.z, r0.y
+max.f r4.x, c18.y, r0.w
+add.f r0.w, r2.y, r1.x
+mov.f32f32 r1.x, r1.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mul.f r0.z, c7.w, r3.x
-mul.f r0.w, c7.z, r3.x
-mad.f32 r0.z, c8.w, r0.y, r0.z
-mad.f32 r0.w, c8.z, r0.y, r0.w
-mul.f r2.x, c7.y, r3.x
-mul.f r2.y, c7.x, r3.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c9.w, r3.y, r0.z
-add.f r0.x, r0.x, c14.w
-mad.f32 r0.w, c9.z, r3.y, r0.w
-mad.f32 r2.x, c8.y, r0.y, r2.x
-mad.f32 r2.y, c8.x, r0.y, r2.y
-mad.f32 r0.z, c10.w, r0.x, r0.z
-mad.f32 r0.w, c10.z, r0.x, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c9.y, r3.y, r2.x
-mad.f32 r2.y, c9.x, r3.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c10.y, r0.x, r2.x
-mad.f32 r2.y, c10.x, r0.x, r2.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r2.z, r0.w
-mov.f32f32 r0.z, r2.x
-mov.f32f32 r0.w, r2.y
-mul.f r3.w, c0.w, r3.x
-mul.f r4.y, c0.z, r3.x
-mul.f r0.z, r0.z, c15.y
-mul.f r0.w, r0.w, c15.x
-(rpt1)nop
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.w
-mad.f32 r0.z, c1.w, r0.y, r3.w
-mad.f32 r0.w, c1.z, r0.y, r4.y
-mad.f32 r0.z, c2.w, r3.y, r0.z
-mad.f32 r0.w, c2.z, r3.y, r0.w
-mad.f32 r0.z, c3.w, r0.x, r0.z
-mad.f32 r3.w, c3.z, r0.x, r0.w
-mul.f r4.y, c0.y, r3.x
-mul.f r4.z, c0.x, r3.x
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.w
-mad.f32 r3.w, c1.y, r0.y, r4.y
-mad.f32 r0.y, c1.x, r0.y, r4.z
-mad.f32 r3.w, c2.y, r3.y, r3.w
-mad.f32 r0.y, c2.x, r3.y, r0.y
-mad.f32 r3.w, c3.y, r0.x, r3.w
-mad.f32 r0.x, c3.x, r0.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r3.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r3.y, c6.x, r3.y, c6.y
-mad.f32 r3.x, c6.x, r3.x, c6.y
-(rpt1)nop
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r3.z, r4.x
-(rpt2)nop
-mov.f32f32 r3.z, r3.z
+nop
+mov.f32f32 r1.z, r0.w
+mul.f r3.x, c0.x, r0.w
+mad.f32 r3.y, c6.x, r1.x, c6.y
+mov.f32f32 r4.y, r4.x
+mul.f r0.z, c7.y, r1.z
+mul.f r0.w, c7.x, r1.z
+mad.f32 r0.z, c8.y, r0.y, r0.z
+mad.f32 r0.w, c8.x, r0.y, r0.w
+mad.f32 r0.z, c9.y, r1.x, r0.z
+add.f r4.z, r0.x, c14.w
+mad.f32 r0.x, c9.x, r1.x, r0.w
+mul.f r0.w, c7.w, r1.z
+mul.f r2.x, c7.z, r1.z
+mad.f32 r0.z, c10.y, r4.z, r0.z
+mad.f32 r0.x, c10.x, r4.z, r0.x
+mad.f32 r0.w, c8.w, r0.y, r0.w
+mad.f32 r2.z, c8.z, r0.y, r2.x
+mul.f r2.y, r0.z, c15.y
+mul.f r2.x, r0.x, c15.x
+mad.f32 r0.x, c9.w, r1.x, r0.w
+mad.f32 r0.z, c9.z, r1.x, r2.z
+mad.f32 r2.w, c10.w, r4.z, r0.x
+mad.f32 r2.z, c10.z, r4.z, r0.z
+mul.f r0.x, c0.w, r1.z
+mul.f r0.z, c0.z, r1.z
+mad.f32 r0.x, c1.w, r0.y, r0.x
+mad.f32 r0.z, c1.z, r0.y, r0.z
+mad.f32 r0.x, c2.w, r1.x, r0.x
+mad.f32 r0.z, c2.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r4.z, r0.x
+mad.f32 r0.z, c3.z, r4.z, r0.z
+mul.f r0.x, c0.y, r1.z
+mad.f32 r3.x, c1.x, r0.y, r3.x
+mad.f32 r0.x, c1.y, r0.y, r0.x
+mad.f32 r0.y, c2.x, r1.y, r3.x
+mad.f32 r1.x, c2.y, r1.x, r0.x
+mad.f32 r0.x, c3.x, r4.z, r0.y
+mad.f32 r0.y, c3.y, r4.z, r1.x
+mad.f32 r3.x, c6.x, r1.z, c6.y
+mul.f r1.z, r4.y, c5.z
+mul.f r1.y, r4.y, c5.y
+mul.f r1.x, r4.x, c5.x
end
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=3,il=16,b=0)
-; VERT: 221 instructions, 0 half, 5 full
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 152 instructions, 0 half, 6 full
diff --git a/reference/0ad-alpine-valley/0ad-99.asm b/reference/0ad-alpine-valley/0ad-99.asm
index c6a3a6f..9c8ac11 100644
--- a/reference/0ad-alpine-valley/0ad-99.asm
+++ b/reference/0ad-alpine-valley/0ad-99.asm
@@ -24,115 +24,81 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r1.z, c11.x, r0.w
mul.f r1.w, c11.x, r0.x
mad.f32 r1.z, c12.x, r1.x, r1.z
mad.f32 r1.w, c12.x, r0.y, r1.w
-mul.f r2.x, c11.z, r0.x
-mad.f32 r1.w, c13.x, r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.x, c12.z, r0.y, r2.x
mad.f32 r1.z, c13.x, r1.y, r1.z
-add.f r1.w, r1.w, c14.x
-mad.f32 r2.x, c13.z, r0.z, r2.x
-mul.f r2.y, c11.y, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, c7.w, r1.w
-mul.f r2.w, c7.z, r1.w
-mul.f r3.x, c7.y, r1.w
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
mul.f r1.z, r1.z, (neg)c4.x
-mad.f32 r2.y, c12.y, r1.x, r2.y
-mul.f r3.y, c11.y, r0.x
-mul.f r4.x, c7.x, r1.w
-mad.f32 r3.y, c12.y, r0.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.y, r0.z, r3.y
-mad.f32 r2.y, c13.y, r1.y, r2.y
-mul.f r4.y, c0.w, r1.w
-mul.f r4.z, c0.z, r1.w
-mul.f r4.w, c0.y, r1.w
-mov.f32f32 r2.y, r2.y
-add.f r3.y, r3.y, c14.y
-mul.f r5.x, c0.x, r1.w
-add.f r5.y, r2.x, c14.z
-mad.f32 r1.z, (neg)c4.y, r2.y, r1.z
-mad.f32 r2.x, c8.w, r3.y, r2.z
-mad.f32 r2.y, c8.z, r3.y, r2.w
-mad.f32 r2.z, c8.y, r3.y, r3.x
-mov.f32f32 r1.z, r1.z
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.x, c9.w, r5.y, r2.x
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r5.y, r2.y
-mad.f32 r2.y, c9.y, r5.y, r2.z
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.y, r4.x
-mad.f32 r0.w, c1.w, r3.y, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r5.y, r0.z
-mad.f32 r0.w, c2.w, r5.y, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r1.z
-mad.f32 r1.y, c10.w, r0.x, r2.x
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r1.z, c10.y, r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c1.z, r3.y, r4.z
-mul.f r1.x, r0.y, c5.z
-mul.f r1.y, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, c2.z, r5.y, r3.x
-mad.f32 r0.z, c1.y, r3.y, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r3.x, c2.y, r5.y, r0.z
-mad.f32 r3.y, c1.x, r3.y, r5.x
-mad.f32 r4.x, c6.x, r5.y, c6.y
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.y, c3.y, r0.x, r3.x
-mad.f32 r3.x, c2.x, r5.y, r3.y
-mov.f32f32 r3.y, r4.x
-mad.f32 r1.w, c6.x, r1.w, c6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c3.x, r0.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r4.x, (0.000000)
-mov.f32f32 r3.w, r3.w
-nop
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.w, r4.x
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 6 full
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm
index 4829aaa..ca5a064 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-00.asm
@@ -2,49 +2,40 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r2.x) out0
-@out(r2.y) out1
-@out(r2.z) out2
-@out(r2.w) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c3.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.z, c2.x
-bary.f r0.w, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-max.f r0.z, r0.z, c3.x
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r0.x
-min.f r0.z, r0.z, c3.y
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.y
-add.f r1.w, c3.y, (neg)r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r1.y
-mov.f32f32 r2.z, r0.y
+bary.f r1.w, 0, r0.x
+bary.f (ei)r2.x, 1, r0.x
+mov.f32f32 r1.z, (0.000000)
+max.f r0.x, r0.z, c3.x
+mov.f32f32 r1.x, r1.w
+mov.f32f32 r1.y, r2.x
+nop
+min.f r0.x, r0.x, c3.y
+sam (f32)(w)r1.w, r1.w, s#0, t#0
+(rpt2)nop
+add.f r0.y, c3.y, (neg)r0.x
+sam (f32)(w)r2.w, r1.x, s#1, t#1
+(sy)mul.f r0.x, r0.x, r2.z
(rpt1)nop
-sam (f32)(w)r3.x, r0.w, s#1, t#1
-(sy)mul.f r0.y, r1.w, r3.w
-mov.f32f32 r1.w, r0.x
-(rpt5)nop
-sam (f32)(w)r0.w, r1.z, s#0, t#0
-(sy)mul.f r0.x, r0.z, r1.z
+mul.f r0.y, r0.y, r3.z
(rpt2)nop
-add.f r0.x, r0.x, r0.y
+(ss)add.f r1.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r2.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.x, r0.y
+mov.f32f32 r0.x, r1.x
+(rpt2)nop
+mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r2.w, r0.x
+mov.f32f32 r1.w, r0.x
end
nop
nop
nop
-; FRAG: outputs: r2.x (1:0)
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 46 instructions, 0 half, 4 full
-; pos (bary): r0.x
-; color: r2.x
+; FRAG: 36 instructions, 0 half, 4 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm
index f2dc1b6..5ad7d4d 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-01.asm
@@ -3,38 +3,22 @@
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@out(r0.w) out0
-@out(r1.x) out1
-@out(r1.y) out2
-@out(r1.z) out3
-@out(r1.w) out4
-@out(r2.x) out5
-@out(r2.y) out6
-@out(r2.z) out7
-(sy)(ss)mov.f32f32 r1.y, (0.000000)
+@in(r1.x) in4
+@in(r1.y) in5
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+(sy)(ss)mov.f32f32 r1.w, (0.000000)
mov.f32f32 r1.z, (0.000000)
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r1.y
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r2.x, r1.x
-mov.f32f32 r1.w, r0.w
mov.f32f32 r0.w, c0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.w, r0.x
end
-nop
-nop
-nop
-; VERT: outputs: r0.w (0:0) r1.w (5:9)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0)
-; VERT: 17 instructions, 0 half, 3 full
-; pos: r0.w
+; VERT: outputs: r0.x (0:0) r1.x (5:9)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0)
+; VERT: 4 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm
index ab7ad75..809d0f3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-02.asm
@@ -1,36 +1,23 @@
; options:
; FRAG: new compiler
-@out(r0.x) out0
-@out(r0.y) out1
-@out(r0.z) out2
-@out(r0.w) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c9.x
-mov.f32f32 r0.y, c9.w
-mov.f32f32 r0.z, c9.y
-mov.f32f32 r0.w, c4.z
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r1.w, c4.y
-mov.f32f32 r2.x, c4.x
+mov.f32f32 r0.y, c9.y
+mov.f32f32 r0.z, c9.w
+mov.f32f32 r1.z, c4.z
+mov.f32f32 r1.y, c4.y
+mov.f32f32 r1.x, c4.x
(rpt2)nop
-sam.p (f32)(w)r2.y, r1.x, s#0, t#0
-(sy)(ss)mul.f r1.x, r3.x, c4.w
-mov.f32f32 r0.y, r1.w
-mov.f32f32 r0.x, r2.x
-nop
-mov.f32f32 r1.x, r1.x
-(rpt2)nop
-mov.f32f32 r1.x, r1.x
-(rpt2)nop
-mov.f32f32 r0.w, r1.x
+sam.p (f32)(w)r0.x, r0.x, s#0, t#0
+(sy)mul.f r1.w, r0.w, c4.w
end
nop
nop
-; FRAG: outputs: r0.x (1:0)
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 28 instructions, 0 half, 4 full
-; pos (bary): r0.x
-; color: r0.x
+; FRAG: 12 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm
index 7fd0d8e..85a697c 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-101.asm
@@ -2,48 +2,38 @@
; FRAG: new compiler
@in(r0.z) in2
@in(r0.w) in3
-@out(r1.x) out0
-@out(r1.y) out1
-@out(r1.z) out2
-@out(r1.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r1.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z
add.f r0.y, c5.y, (neg)r0.x
add.f r0.z, c5.y, (neg)r0.x
add.f r0.w, c5.y, (neg)r0.x
-mul.f r2.x, r0.x, c0.z
+mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r2.y, r0.x, c0.y
-add.f r0.y, r2.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r1.z, r0.y
-add.f r0.y, r2.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
-; FRAG: outputs: r1.x (1:0)
+nop
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
-; pos (bary): r1.x
-; color: r1.x
-; fragcoord: r0.x
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm
index 7fd0d8e..85a697c 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-103.asm
@@ -2,48 +2,38 @@
; FRAG: new compiler
@in(r0.z) in2
@in(r0.w) in3
-@out(r1.x) out0
-@out(r1.y) out1
-@out(r1.z) out2
-@out(r1.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r1.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z
add.f r0.y, c5.y, (neg)r0.x
add.f r0.z, c5.y, (neg)r0.x
add.f r0.w, c5.y, (neg)r0.x
-mul.f r2.x, r0.x, c0.z
+mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r2.y, r0.x, c0.y
-add.f r0.y, r2.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r1.z, r0.y
-add.f r0.y, r2.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
-; FRAG: outputs: r1.x (1:0)
+nop
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
-; pos (bary): r1.x
-; color: r1.x
-; fragcoord: r0.x
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm
index 7fd0d8e..85a697c 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-105.asm
@@ -2,48 +2,38 @@
; FRAG: new compiler
@in(r0.z) in2
@in(r0.w) in3
-@out(r1.x) out0
-@out(r1.y) out1
-@out(r1.z) out2
-@out(r1.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r1.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z
add.f r0.y, c5.y, (neg)r0.x
add.f r0.z, c5.y, (neg)r0.x
add.f r0.w, c5.y, (neg)r0.x
-mul.f r2.x, r0.x, c0.z
+mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r2.y, r0.x, c0.y
-add.f r0.y, r2.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r1.z, r0.y
-add.f r0.y, r2.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
-; FRAG: outputs: r1.x (1:0)
+nop
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
-; pos (bary): r1.x
-; color: r1.x
-; fragcoord: r0.x
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm
index 7fd0d8e..85a697c 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-107.asm
@@ -2,48 +2,38 @@
; FRAG: new compiler
@in(r0.z) in2
@in(r0.w) in3
-@out(r1.x) out0
-@out(r1.y) out1
-@out(r1.z) out2
-@out(r1.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r1.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z
add.f r0.y, c5.y, (neg)r0.x
add.f r0.z, c5.y, (neg)r0.x
add.f r0.w, c5.y, (neg)r0.x
-mul.f r2.x, r0.x, c0.z
+mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r2.y, r0.x, c0.y
-add.f r0.y, r2.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r1.z, r0.y
-add.f r0.y, r2.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
-; FRAG: outputs: r1.x (1:0)
+nop
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
-; pos (bary): r1.x
-; color: r1.x
-; fragcoord: r0.x
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm
index 7eb689b..13ea129 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-111.asm
@@ -2,29 +2,20 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r0.w) out0
-@out(r1.x) out1
-@out(r1.y) out2
-@out(r1.z) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.w, r0.x
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
end
nop
-; FRAG: outputs: r0.w (1:0)
+nop
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 22 instructions, 0 half, 2 full
-; pos (bary): r0.x
-; color: r0.w
+; FRAG: 10 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm
index 7eb689b..13ea129 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-114.asm
@@ -2,29 +2,20 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r0.w) out0
-@out(r1.x) out1
-@out(r1.y) out2
-@out(r1.z) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.w, r0.x
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
end
nop
-; FRAG: outputs: r0.w (1:0)
+nop
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 22 instructions, 0 half, 2 full
-; pos (bary): r0.x
-; color: r0.w
+; FRAG: 10 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm
index 9e5b9a0..7495bc9 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-116.asm
@@ -2,37 +2,28 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r0.w) out0
-@out(r1.x) out1
-@out(r1.y) out2
-@out(r1.z) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)add.f r0.w, r0.w, c0.w
-(ss)add.f r0.z, r0.z, c0.z
+sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
+(sy)(ss)add.f r0.w, r0.w, c0.w
+add.f r0.z, r0.z, c0.z
add.f r0.y, r0.y, c0.y
add.f r0.x, r0.x, c0.x
-mul.f r0.w, r0.w, c1.w
-mul.f r0.z, r0.z, c1.z
-mul.f r0.y, r0.y, c1.y
-mul.f r0.x, r0.x, c1.x
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.w, r0.x
+mul.f r1.w, r0.w, c1.w
+mul.f r1.z, r0.z, c1.z
+mul.f r1.y, r0.y, c1.y
+mul.f r1.x, r0.x, c1.x
end
nop
-; FRAG: outputs: r0.w (1:0)
+nop
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 30 instructions, 0 half, 2 full
-; pos (bary): r0.x
-; color: r0.w
+; FRAG: 18 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm
index 7eb689b..13ea129 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-118.asm
@@ -2,29 +2,20 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r0.w) out0
-@out(r1.x) out1
-@out(r1.y) out2
-@out(r1.z) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.w, r0.x
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
end
nop
-; FRAG: outputs: r0.w (1:0)
+nop
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 22 instructions, 0 half, 2 full
-; pos (bary): r0.x
-; color: r0.w
+; FRAG: 10 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm
index 51a801a..a965927 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-120.asm
@@ -6,33 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, c0.x
-mov.f32f32 r0.w, c0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.x, c0.x
-(rpt4)nop
-sam (f32)(w)r2.x, r0.y, s#0, t#0
-(sy)(ss)add.f r0.y, c0.y, (neg)r2.w
-mov.f32f32 r1.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r1.z, c0.x
+mov.f32f32 r1.y, c0.x
+mov.f32f32 r1.x, c0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.w, r0.x
+sam (f32)(w)r0.x, r0.z, s#0, t#0
+(sy)add.f r1.w, c0.y, (neg)r0.w
end
nop
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 31 instructions, 0 half, 3 full
-; pos (bary): r0.x
-; color: r1.x
+; FRAG: 11 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm
index 4c2f276..1171e2e 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-122.asm
@@ -6,21 +6,16 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 1, r0.x
-bary.f (ei)r0.x, 0, r0.x
-mov.f32f32 r0.y, c0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+(sy)(ss)mov.f32f32 r1.w, c0.x
+bary.f r1.z, 2, r0.x
+bary.f r1.y, 1, r0.x
+bary.f (ei)r1.x, 0, r0.x
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 12 instructions, 0 half, 2 full
-; pos (bary): r0.x
-; color: r1.x
+; FRAG: 5 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm
index 6d04ab5..159d8e0 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-126.asm
@@ -1,44 +1,23 @@
; options:
; FRAG: new compiler
-@out(r1.w) out0
-@out(r2.x) out1
-@out(r2.y) out2
-@out(r2.z) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c9.x
-mov.f32f32 r0.y, c4.w
+mov.f32f32 r0.y, c9.y
mov.f32f32 r0.z, c9.w
-mov.f32f32 r0.w, c9.y
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-(rpt5)nop
-sam.p (f32)(xyzw)r0.x, r1.x, s#0, t#0
-(sy)mul.f r0.z, r0.z, c4.z
-mul.f r0.y, r0.y, c4.y
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.y
-mov.f32f32 r1.w, r0.x
+mov.f32f32 r1.w, c4.w
+(rpt4)nop
+sam.p (f32)(xyzw)r0.x, r0.x, s#0, t#0
+(sy)mul.f r1.z, r0.z, c4.z
+mul.f r1.y, r0.y, c4.y
+mul.f r1.x, r0.x, c4.x
end
nop
nop
-; FRAG: outputs: r1.w (1:0)
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 35 instructions, 0 half, 3 full
-; pos (bary): r0.x
-; color: r1.w
+; FRAG: 14 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm
index 5de51e6..72bfca4 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-127.asm
@@ -2,53 +2,44 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r4.x) out0
-@out(r4.y) out1
-@out(r4.z) out2
-@out(r4.w) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 8, r0.x
-bary.f r1.x, 9, r0.x
-bary.f r1.y, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 8, r0.x
+bary.f r1.y, 9, r0.x
mov.f32f32 r1.z, r0.z
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r2.y, r1.y
-mov.f32f32 r0.w, r1.x
-bary.f r1.x, 3, r0.x
-bary.f r1.y, 2, r0.x
-bary.f r2.z, 1, r0.x
+mov.f32f32 r1.w, r0.w
+bary.f r2.x, 3, r0.x
+bary.f r2.y, 2, r0.x
+sam (f32)(xyz)r2.z, r0.z, s#1, t#1
+(sy)(ss)add.f r0.z, c0.y, (neg)r3.x
+bary.f r0.w, 1, r0.x
+add.f r3.y, c0.y, (neg)r2.w
+add.f r3.z, c0.y, (neg)r2.z
+sam (f32)(xyzw)r3.w, r1.z, s#0, t#0
+(sy)mul.f r0.z, r0.z, r4.y
+(ss)mul.f r1.z, r3.x, r2.y
+mul.f r1.w, r3.y, r4.x
+mul.f r0.w, r2.w, r0.w
+mul.f r2.y, r3.z, r3.w
+add.f r0.z, r1.z, r0.z
+sam (f32)(w)r2.w, r1.x, s#2, t#2
bary.f (ei)r0.x, 0, r0.x
-sam (f32)(xyz)r2.w, r1.z, s#1, t#1
-(sy)add.f r0.y, c0.y, (neg)r3.y
-(ss)nop
-sam (f32)(xyzw)r1.z, r2.x, s#0, t#0
-add.f r3.z, c0.y, (neg)r3.x
-add.f r3.w, c0.y, (neg)r2.w
-(sy)mul.f r4.w, r1.x, r2.y
-mul.f r0.y, r0.y, r2.x
-mul.f r1.x, r3.y, r1.y
-mul.f r1.y, r3.z, r1.w
-mul.f r1.z, r3.w, r1.z
-mul.f r1.w, r3.x, r2.z
-add.f r0.y, r1.x, r0.y
-sam (f32)(w)r3.x, r0.z, s#2, t#2
-mul.f r0.x, r2.w, r0.x
+add.f r0.y, r0.w, r1.w
+mul.f r1.w, r2.x, r4.z
+(sy)mul.f r1.z, r0.z, r3.z
+mul.f r0.x, r2.z, r0.x
+(ss)mul.f r1.y, r0.y, r3.z
(rpt1)nop
-(sy)mul.f r4.z, r0.y, r3.w
-add.f r0.y, r1.w, r1.y
-add.f r0.x, r0.x, r1.z
-(rpt1)nop
-mul.f r4.y, r0.y, r3.w
-mul.f r4.x, r0.x, r3.w
+add.f r0.x, r0.x, r2.y
+(rpt2)nop
+mul.f r1.x, r0.x, r3.z
end
-nop
-nop
-nop
-; FRAG: outputs: r4.x (1:0)
-; FRAG: inputs: r0.y (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r2.x (5:1,cm=f,il=16,b=1)
-; FRAG: 39 instructions, 0 half, 5 full
-; pos (bary): r0.x
-; color: r4.x
+; FRAG: outputs: r1.x (1:0)
+; FRAG: inputs: r1.y (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r2.x (5:1,cm=f,il=16,b=1)
+; FRAG: 35 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm
index 948d48e..270bdcf 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-128.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in3
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r4.w) in3
@in(r1.x) in4
@in(r1.y) in5
@in(r1.z) in6
@@ -12,72 +12,56 @@
@in(r2.y) in9
@in(r2.z) in10
@in(r2.w) in11
-@out(r3.w) out0
-@out(r4.x) out1
-@out(r4.y) out2
-@out(r4.z) out3
-@out(r4.w) out4
-@out(r5.x) out5
-@out(r5.y) out6
-@out(r5.z) out7
-@out(r2.w) out8
-@out(r3.x) out9
-@out(r3.y) out10
-@out(r3.z) out11
-@out(r0.w) out12
-@out(r1.x) out13
-@out(r1.y) out14
-@out(r1.z) out15
-(sy)(ss)mul.f r3.x, c3.x, r0.x
-mul.f r3.y, c2.x, r0.x
-mad.f32 r3.x, c3.y, r0.y, r3.x
-mad.f32 r3.y, c2.y, r0.y, r3.y
-mul.f r3.z, c1.x, r0.x
-mul.f r3.w, c0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r3.x, c3.z, r0.z, r3.x
-mad.f32 r3.y, c2.z, r0.z, r3.y
-mad.f32 r3.z, c1.y, r0.y, r3.z
-mad.f32 r0.y, c0.y, r0.y, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r4.z, c3.w, r0.w, r3.x
-mad.f32 r4.y, c2.w, r0.w, r3.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r3.x, c1.z, r0.z, r3.x
-mad.f32 r0.y, c0.z, r0.z, r0.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r4.x, c1.w, r0.w, r3.x
-mad.f32 r3.w, c0.w, r0.w, r0.y
-max.f r0.y, r1.w, c5.x
-max.f r0.w, r1.z, c5.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-min.f r5.z, r0.y, c5.y
-min.f r5.y, r0.w, c5.y
-max.f r0.y, r1.y, c5.x
-max.f r0.w, r1.x, c5.x
-(rpt1)nop
-min.f r5.x, r0.y, c5.y
-min.f r4.w, r0.w, c5.y
-mad.f32 r1.z, c4.x, r0.z, c4.y
-mad.f32 r1.y, c4.x, r0.z, c4.y
-mad.f32 r1.x, c4.x, r0.z, c4.y
-mad.f32 r0.w, c4.x, r0.x, c4.y
-mov.f32f32 r3.z, r2.w
-mov.f32f32 r3.y, r2.z
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r2.w, r2.x
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.x, c3.x, r4.x
+mul.f r0.y, c2.x, r4.x
+mad.f32 r0.x, c3.y, r4.y, r0.x
+mad.f32 r0.y, c2.y, r4.y, r0.y
+mad.f32 r0.x, c3.z, r4.z, r0.x
+mad.f32 r0.y, c2.z, r4.z, r0.y
+mad.f32 r0.w, c3.w, r4.w, r0.x
+mad.f32 r0.z, c2.w, r4.w, r0.y
+mul.f r0.x, c1.x, r4.x
+mul.f r0.y, c0.x, r4.x
+mad.f32 r0.x, c1.y, r4.y, r0.x
+mad.f32 r0.y, c0.y, r4.y, r0.y
+mad.f32 r0.x, c1.z, r4.z, r0.x
+mad.f32 r3.x, c0.z, r4.z, r0.y
+mad.f32 r0.y, c1.w, r4.w, r0.x
+mad.f32 r0.x, c0.w, r4.w, r3.x
+max.f r1.w, r1.w, c5.x
+max.f r1.z, r1.z, c5.x
+max.f r1.y, r1.y, c5.x
+max.f r1.x, r1.x, c5.x
+min.f r1.w, r1.w, c5.y
+min.f r1.z, r1.z, c5.y
+min.f r1.y, r1.y, c5.y
+min.f r1.x, r1.x, c5.y
+mad.f32 r3.w, c4.x, r4.z, c4.y
+mad.f32 r3.z, c4.x, r4.z, c4.y
+mad.f32 r3.y, c4.x, r4.z, c4.y
+mad.f32 r3.x, c4.x, r4.x, c4.y
end
nop
nop
-; VERT: outputs: r3.w (0:0) r4.w (1:0) r2.w (5:0) r0.w (5:1)
-; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
-; VERT: 47 instructions, 0 half, 6 full
-; pos: r3.w
+nop
+; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1)
+; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 29 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm
index eedb778..284e180 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-130.asm
@@ -1,55 +1,51 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in3
-@in(r1.x) in4
-@in(r1.y) in5
-@in(r1.z) in6
-@in(r1.w) in7
-@out(r2.w) out0
-@out(r3.x) out1
-@out(r3.y) out2
-@out(r3.z) out3
-@out(r1.w) out4
-@out(r2.x) out5
-@out(r2.y) out6
-@out(r2.z) out7
-(sy)(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-max.f r1.w, r1.w, c4.x
-max.f r1.z, r1.z, c4.x
-max.f r1.y, r1.y, c4.x
-max.f r1.x, r1.x, c4.x
-min.f r2.z, r1.w, c4.y
-min.f r2.y, r1.z, c4.y
-min.f r2.x, r1.y, c4.y
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
min.f r1.w, r1.x, c4.y
-mul.f r1.x, r0.x, c0.w
-mul.f r1.y, r0.x, c0.z
-mad.f32 r1.x, c1.w, r0.y, r1.x
-mad.f32 r1.y, c1.z, r0.y, r1.y
-mad.f32 r1.x, c2.w, r0.z, r1.x
-mad.f32 r1.y, c2.z, r0.z, r1.y
-mad.f32 r3.z, c3.w, r0.w, r1.x
-mad.f32 r3.y, c3.z, r0.w, r1.y
-mul.f r1.x, r0.x, c0.y
-mul.f r0.x, r0.x, c0.x
-mad.f32 r1.x, c1.y, r0.y, r1.x
-mad.f32 r0.x, c1.x, r0.y, r0.x
-mad.f32 r0.y, c2.y, r0.z, r1.x
-mad.f32 r0.x, c2.x, r0.z, r0.x
-mad.f32 r3.x, c3.y, r0.w, r0.y
-mad.f32 r2.w, c3.x, r0.w, r0.x
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
-; VERT: outputs: r2.w (0:0) r1.w (1:0)
-; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 4 full
-; pos: r2.w
+; VERT: outputs: r0.x (0:0) r1.x (1:0)
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm
index 2639ca3..3db9988 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-131.asm
@@ -2,45 +2,33 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r0.w) out0
-@out(r1.x) out1
-@out(r1.y) out2
-@out(r1.z) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3e99999a, 0x3f170a3d, 0x3de147ae, 0x00000000
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mul.f r0.x, r0.x, c0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c0.y, r0.y, r0.x
-(rpt1)nop
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r0.x, r0.x
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)mul.f r0.x, r1.x, c0.x
nop
-mad.f32 r0.x, c0.z, r0.z, r0.x
+mad.f32 r0.x, c0.y, r1.y, r0.x
+nop
+mad.f32 r1.x, c0.z, r1.z, r0.x
(rpt2)nop
-(ss)mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.z, r0.x
+mov.f32f32 r0.x, r1.x
+mov.f32f32 r0.y, r1.x
+(rpt1)nop
mov.f32f32 r0.x, r0.x
-nop
mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
+(rpt1)nop
+mov.f32f32 r1.z, r0.x
mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-mov.f32f32 r0.w, r0.x
end
nop
-; FRAG: outputs: r0.w (1:0)
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 41 instructions, 0 half, 2 full
-; pos (bary): r0.x
-; color: r0.w
+; FRAG: 28 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm
index a7cfae4..22ca830 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-133.asm
@@ -2,37 +2,28 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r0.z, r0.z, s#0, t#0
-(sy)cmps.f.lt r0.x, r1.y, c0.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r1.w, r1.x
-mov.f32f32 r1.z, r0.w
-cov.u32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.z
-cmps.f.ne p0.x, r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, (0.000000)
+(rpt4)nop
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)cmps.f.lt r0.y, r1.w, c0.x
+(rpt2)nop
+cov.u32f32 r0.y, r0.y
+(rpt2)nop
+cmps.f.ne p0.x, r0.y, r0.x
(rpt5)nop
kill p0.x
end
nop
nop
-; FRAG: outputs: r1.y (1:0)
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
-; pos (bary): r0.x
-; color: r1.y
+; FRAG: 26 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm
index 7fd0d8e..85a697c 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-135.asm
@@ -2,48 +2,38 @@
; FRAG: new compiler
@in(r0.z) in2
@in(r0.w) in3
-@out(r1.x) out0
-@out(r1.y) out1
-@out(r1.z) out2
-@out(r1.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r1.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z
add.f r0.y, c5.y, (neg)r0.x
add.f r0.z, c5.y, (neg)r0.x
add.f r0.w, c5.y, (neg)r0.x
-mul.f r2.x, r0.x, c0.z
+mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r2.y, r0.x, c0.y
-add.f r0.y, r2.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r1.z, r0.y
-add.f r0.y, r2.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
-; FRAG: outputs: r1.x (1:0)
+nop
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
-; pos (bary): r1.x
-; color: r1.x
-; fragcoord: r0.x
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm
index c6e09ad..593f290 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-139.asm
@@ -4,246 +4,167 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r5.y) out0
-@out(r5.z) out1
-@out(r5.w) out2
-@out(r6.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 8, r1.x
-bary.f r0.y, 4, r1.x
-add.f r0.w, r0.w, c12.y
+add.f r0.y, r0.w, c12.y
+bary.f r0.w, 4, r1.x
bary.f r1.z, 9, r1.x
add.f r1.w, r0.x, c13.x
-mul.f r2.x, r0.y, r0.y
-bary.f r2.y, 5, r1.x
-add.f r2.z, r1.z, c13.x
+bary.f r2.x, 12, r1.x
+mul.f r2.y, r0.w, r0.w
+bary.f r2.z, 5, r1.x
floor.f r2.w, r1.w
-rcp r0.w, r0.w
+rcp r0.y, r0.y
add.f r0.z, r0.z, c12.y
-mad.f32 r2.x, r2.y, r2.y, r2.x
-floor.f r3.x, r2.z
+add.f r3.x, r1.z, c13.x
+mad.f32 r2.y, r2.z, r2.z, r2.y
add.f r1.w, r1.w, (neg)r2.w
-(ss)mul.f r0.z, r0.z, r0.w
-(ss)mov.f32f32 r0.w, r2.x
-bary.f r2.x, 6, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.z, r0.z
-absneg.f r2.w, (neg)c8.x
-mad.f32 r0.w, r2.x, r2.x, r0.w
-mul.f r3.y, c12.x, r1.w
-add.f r2.z, r2.z, (neg)r3.x
-mul.f r2.w, r2.w, c8.x
-add.f r3.x, c13.y, (neg)r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mul.f r2.w, r2.w, r0.z
-rsq r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-add.f r0.x, r0.x, (neg)r3.y
-mul.f r3.y, c12.x, r2.z
-mov.f32f32 r2.w, r2.w
-mul.f r0.y, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.y, r3.y
-mul.f r0.z, r2.w, r0.z
+(ss)mul.f r0.y, r0.z, r0.y
+absneg.f r0.z, (neg)c8.x
+bary.f r2.w, 6, r1.x
+mov.f32f32 r3.y, r1.w
+floor.f r3.z, r3.x
+mul.f r0.z, r0.z, c8.x
+mad.f32 r2.y, r2.w, r2.w, r2.y
+mul.f r3.w, c12.x, r3.y
+add.f r3.x, r3.x, (neg)r3.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-add.f r2.w, c13.x, r0.x
-bary.f r3.z, 16, r1.x
-mov.f32f32 r0.z, r0.z
+add.f r0.x, r0.x, (neg)r3.w
+mov.f32f32 r3.z, r3.x
+rsq r2.y, r2.y
+(ss)mov.f32f32 r3.w, r2.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+mul.f r4.x, c12.x, r3.z
add.f r0.x, c13.z, r0.x
-mov.f32f32 r2.w, r2.w
-mul.f r0.y, r0.y, r3.z
-mul.f r2.y, r2.y, r0.w
-mul.f r0.z, r0.z, c14.x
-mul.f r2.w, r2.w, c3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r2.w
-bary.f r3.w, 17, r1.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.x, r3.z
-add.f r1.z, r1.z, (neg)r3.y
-mad.f32 r0.y, r2.y, r3.w, r0.y
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r2.x, r0.w
-add.f r2.x, c15.y, (neg)r0.z
-add.f r3.y, c13.x, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.x, r2.x, c8.y
-mov.f32f32 r3.y, r3.y
-bary.f r3.z, 18, r1.x
-mul.f r0.z, r0.z, c12.z
-mov.f32f32 r4.w, r2.y
-mul.f r2.y, r3.y, c3.w
-mad.f32 r0.y, r0.w, r3.z, r0.y
-add.f r0.z, r0.z, r2.x
-add.f r0.w, c13.z, r1.z
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.y, r1.z
-bary.f r1.z, 10, r1.x
-max.f r0.y, c12.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c3.w
-add.f r1.z, r1.z, c12.w
-mov.f32f32 r0.y, r0.y
-max.f r0.z, r0.z, c12.y
-mov.f32f32 r2.x, r0.w
-mov.f32f32 r3.y, r1.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.z, r3.y
-log2 r0.y, r0.y
-(ss)mul.f r0.y, c9.x, r0.y
-min.f r0.z, r0.z, c12.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r5.w, r0.w
-mov.f32f32 r0.x, r2.y
-sam.s (f32)(x)r0.w, r4.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-add.f r2.y, c15.y, (neg)r0.z
-add.f r2.w, c15.y, (neg)r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.x, r3.x
-add.f r3.z, c13.y, (neg)r2.z
-mul.f r2.y, r2.y, c7.z
-mul.f r2.w, r2.w, c7.y
-add.f r3.w, c15.y, (neg)r0.z
-(ss)mov.f32f32 r4.x, r3.z
+mul.f r0.w, r0.w, r3.w
+add.f r0.z, c13.x, r0.z
+mul.f r0.y, r0.y, c14.x
+bary.f r4.y, 16, r1.x
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r4.z, r0.z, c3.z
+mul.f r5.y, r0.x, c3.z
+mul.f r0.x, r0.w, r4.y
+add.f r0.z, c13.z, r1.z
+mov.f32f32 r6.x, r4.z
+mov.f32f32 r0.w, r1.z
exp2 r0.y, r0.y
-mov.f32f32 r3.z, c6.y
-mov.f32f32 r4.y, c6.x
-mov.f32f32 r4.z, c6.z
-mul.f r5.x, r3.x, r4.x
-mul.f r5.y, r3.z, c10.y
-mul.f r4.y, r4.y, c10.x
-mul.f r6.y, r3.w, c7.x
-mul.f r0.w, r5.x, r0.w
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r0.x, r1.z
-mul.f r4.z, r4.z, c10.z
-(ss)mul.f r5.x, r5.y, r0.y
-mul.f r4.y, r4.y, r0.y
-mov.f32f32 r3.w, r0.x
-mul.f r0.x, r4.z, r0.y
-(ss)mov.f32f32 r0.y, r5.x
-mov.f32f32 r4.y, r4.y
-bary.f r4.z, 14, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r2.x
-sam.s (f32)(x)r2.x, r3.y, s#2, t#2
-(sy)mov.f32f32 r2.x, r2.x
-add.f r1.w, r1.w, c12.z
-(ss)mov.f32f32 r3.y, r4.z
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r1.z, r1.z
-mul.f r3.w, r1.w, r4.x
-mov.f32f32 r6.z, r3.y
-bary.f r3.y, 15, r1.x
-mov.f32f32 r5.y, r3.z
-mad.f32 r0.w, r3.w, r2.x, r0.w
-mov.f32f32 r6.x, r1.z
-add.f r1.z, r2.z, c12.z
-bary.f r2.x, 12, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r3.y
-sam.s (f32)(x)r3.y, r4.w, s#2, t#2
-(sy)mov.f32f32 r3.y, r3.y
-mul.f r3.x, r3.x, r1.z
-sam.s (f32)(x)r3.z, r5.z, s#2, t#2
-(sy)mov.f32f32 r3.z, r3.z
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r2.x, r2.x
-mul.f r1.z, r1.w, r1.z
-mad.f32 r0.w, r3.x, r3.z, r0.w
-bary.f r1.w, 13, r1.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, c12.z
-mov.f32f32 r0.w, r0.w
-(ss)nop
-sam (f32)(xyz)r4.z, r6.z, s#0, t#0
-bary.f r2.z, 2, r1.x
-mad.f32 r0.w, r1.z, r3.y, r0.w
-bary.f r1.z, 1, r1.x
-bary.f (ei)r1.x, 0, r1.x
-(sy)mad.f32 r0.x, r5.x, r2.z, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, r4.w, r1.z, r0.y
-mad.f32 r1.x, r4.z, r1.x, r4.y
+(ss)mov.f32f32 r1.z, r0.y
+mul.f r2.z, r2.z, r3.w
+bary.f r3.w, 17, r1.x
+add.f r0.w, c13.x, r0.w
+add.f r1.z, c15.y, (neg)r1.z
+mov.f32f32 r6.w, r5.y
+mul.f r5.z, r0.z, c3.w
+mul.f r7.x, r0.w, c3.w
+mul.f r0.z, r1.z, c8.y
+(ss)mul.f r0.y, r0.y, c12.z
+mad.f32 r0.x, r2.z, r3.w, r0.x
+mov.f32f32 r6.y, r7.x
+bary.f r0.w, 10, r1.x
+add.f r0.y, r0.y, r0.z
+mul.f r0.z, r2.w, r2.y
+bary.f r1.z, 18, r1.x
+add.f r5.w, r0.w, c12.w
+max.f r0.y, r0.y, c12.y
+mov.f32f32 r4.w, r5.z
+mad.f32 r0.x, r0.z, r1.z, r0.x
+mov.f32f32 r6.z, r5.w
+min.f r0.y, r0.y, c12.z
+mov.f32f32 r7.y, r5.w
+mov.f32f32 r5.x, r5.w
+max.f r0.x, c12.y, r0.x
+nop
+add.f r0.z, c15.y, (neg)r0.y
+sam.s (f32)(x)r7.z, r6.x, s#2, t#2
+add.f r0.w, c13.y, (neg)r3.y
+add.f r1.z, c15.y, (neg)r0.y
+add.f r2.y, c15.y, (neg)r0.y
+mul.f r0.z, r0.z, c7.z
+mov.f32f32 r2.z, r0.w
+add.f r2.w, c13.y, (neg)r3.z
+mul.f r1.z, r1.z, c7.y
+mul.f r3.y, r2.y, c7.x
+log2 r0.x, r0.x
+(ss)mul.f r0.x, c9.x, r0.x
+mov.f32f32 r2.y, r2.w
+sam.s (f32)(x)r3.z, r6.w, s#2, t#2
nop
+sam.s (f32)(x)r3.w, r4.z, s#2, t#2
+sam.s (f32)(x)r4.x, r5.y, s#2, t#2
+add.f r1.w, r1.w, c12.z
+add.f r3.x, r3.x, c12.z
+mul.f r2.z, r2.z, r2.y
+bary.f r2.y, 13, r1.x
+mov.f32f32 r4.y, c6.z
+(ss)bary.f r4.z, 14, r1.x
+(sy)mul.f r2.z, r2.z, r7.z
+mul.f r2.w, r1.w, r2.w
+exp2 r0.x, r0.x
+mov.f32f32 r4.w, c6.y
+mov.f32f32 r5.x, c6.x
+mul.f r4.y, r4.y, c10.z
+mad.f32 r2.z, r2.w, r3.z, r2.z
+mul.f r0.w, r0.w, r3.x
+mul.f r2.w, r4.w, c10.y
+mul.f r3.z, r5.x, c10.x
+(ss)mul.f r4.y, r4.y, r0.x
+mad.f32 r0.w, r0.w, r3.w, r2.z
+mul.f r1.w, r1.w, r3.x
+bary.f r4.w, 15, r1.x
+mul.f r2.z, r2.w, r0.x
+(ss)mul.f r0.x, r3.z, r0.x
+mad.f32 r0.w, r1.w, r4.x, r0.w
+sam (f32)(w)r5.x, r2.x, s#1, t#1
+(sy)cmps.f.lt r1.w, r5.w, c14.y
+mov.f32f32 r2.w, c12.z
+(ss)bary.f r2.x, 2, r1.x
mul.f r0.w, c13.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r6.x, r2.x
-mov.f32f32 r1.z, c12.y
+sam (f32)(xyz)r4.z, r4.z, s#0, t#0
+bary.f r2.y, 0, r1.x
+bary.f (ei)r1.x, 1, r1.x
+cov.u32f32 r1.y, r1.w
+mov.f32f32 r1.w, r0.w
+(sy)mad.f32 r2.x, r5.x, r2.x, r4.y
+mad.f32 r1.x, r4.w, r1.x, r2.z
+mad.f32 r0.x, r4.z, r2.y, r0.x
+cmps.f.ne r1.y, r1.y, c12.y
+mul.f r2.x, r2.x, r1.w
+mul.f r1.x, r1.x, r1.w
+mad.f32 r1.w, c5.z, r5.x, r2.x
+mad.f32 r1.x, c5.y, r4.w, r1.x
mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.w, r1.x, r0.w
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c5.z, r5.x, r0.x
-mad.f32 r0.y, c5.y, r4.w, r0.y
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r0.w, c12.y
+mul.f r1.w, r0.y, r1.w
+mul.f r1.x, r0.y, r1.x
+mad.f32 r0.x, c5.x, r4.z, r0.x
+sel.b32 r0.w, r0.w, r1.y, r5.w
+add.f r0.z, r1.w, r0.z
+add.f r1.x, r1.x, r1.z
+mul.f r0.x, r0.y, r0.x
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, c5.x, r4.z, r0.w
-sam (f32)(w)r3.x, r3.z, s#1, t#1
-(sy)cmps.f.lt r1.x, r3.w, c14.y
-mul.f r0.x, r0.z, r0.x
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r0.w, r0.w
-cov.u32f32 r1.x, r1.x
-add.f r0.x, r0.x, r2.y
-add.f r0.y, r0.y, r2.w
-mul.f r0.z, r0.z, r0.w
-cmps.f.ne r0.w, r1.x, c12.y
-mov.f32f32 r1.x, r3.w
-(rpt2)nop
-mov.f32f32 r1.x, r1.x
-add.f r0.z, r0.z, r6.y
+mul.f r0.y, r0.z, r0.w
+mul.f r0.z, r1.x, r0.w
(rpt1)nop
-sel.b32 r0.w, r1.z, r0.w, r1.x
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.z, c4.y
+add.f r0.x, r0.x, r3.y
(rpt2)nop
mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.z, r0.z, r0.w
-nop
-mul.f r0.x, r0.x, c4.z
-mul.f r0.y, r0.y, c4.y
-mul.f r0.z, r0.z, c4.x
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
nop
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r5.z, r0.y
-mov.f32f32 r5.y, r0.z
-end
-; FRAG: outputs: r5.y (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
-; FRAG: 237 instructions, 0 half, 7 full
-; pos (bary): r1.x
-; color: r5.y
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
+; FRAG: 155 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm
index 8e2237c..042d36d 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-140.asm
@@ -1,155 +1,116 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-@out(r6.x) out4
-@out(r6.y) out5
-@out(r6.z) out6
-@out(r6.w) out7
-@out(r4.x) out8
-@out(r4.y) out9
-@out(r4.z) out10
-@out(r4.w) out11
-@out(r5.x) out12
-@out(r5.y) out13
-@out(r5.z) out14
-@out(r5.w) out15
-@out(r1.w) out16
-@out(r2.x) out17
-@out(r2.y) out18
-@out(r2.z) out19
-@out(r7.x) out20
-@out(r7.y) out21
-@out(r7.z) out22
-@out(r7.w) out23
-(sy)(ss)add.f r2.x, c4.x, (neg)r0.x
-absneg.f r2.y, (neg)c5.x
-mul.f r2.z, c8.w, r0.x
-mul.f r2.w, c8.z, r0.x
-mul.f r3.x, r2.x, r2.x
-add.f r3.y, c4.y, (neg)r0.y
-mul.f r3.z, r0.w, r2.y
-absneg.f r3.w, (neg)c5.y
-mad.f32 r2.z, c9.w, r0.y, r2.z
-mad.f32 r3.x, r3.y, r3.y, r3.x
-mad.f32 r2.z, c10.w, r0.z, r2.z
-mad.f32 r2.w, c9.z, r0.y, r2.w
-mul.f r4.x, c8.y, r0.x
-mov.f32f32 r3.x, r3.x
-add.f r4.y, c4.z, (neg)r0.z
-mad.f32 r3.z, r1.x, r3.w, r3.z
-add.f r2.z, r2.z, c11.w
-mad.f32 r2.w, c10.z, r0.z, r2.w
-mad.f32 r3.x, r4.y, r4.y, r3.x
-mov.f32f32 r3.z, r3.z
-absneg.f r4.z, (neg)c5.z
-mov.f32f32 r2.z, r2.z
-add.f r2.w, r2.w, c11.z
-mad.f32 r4.x, c9.y, r0.y, r4.x
-mul.f r4.w, c8.x, r0.x
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mad.f32 r3.z, r1.y, r4.z, r3.z
-mov.f32f32 r5.w, r2.z
-mov.f32f32 r2.z, r2.w
-mad.f32 r2.x, r2.x, r3.x, r2.y
-mov.f32f32 r2.y, r3.z
-mad.f32 r2.w, r3.y, r3.x, r3.w
-mad.f32 r3.x, r4.y, r3.x, r4.z
-mov.f32f32 r2.x, r2.x
-max.f r2.y, c13.x, r2.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mul.f r3.y, r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, r2.w, r2.w, r3.y
-mov.f32f32 r5.z, r2.z
-mad.f32 r2.z, c10.y, r0.z, r4.x
-mad.f32 r3.z, c9.x, r0.y, r4.w
-mov.f32f32 r3.y, r3.y
-mul.f r3.w, r2.y, c6.z
-mad.f32 r3.y, r3.x, r3.x, r3.y
-mul.f r4.x, r2.y, c6.y
-mul.f r2.y, r2.y, c6.x
-mov.f32f32 r3.w, r3.w
-add.f r2.z, r2.z, c11.y
-mad.f32 r3.z, c10.x, r0.z, r3.z
-mul.f r4.y, c0.w, r0.x
-rsq r3.y, r3.y
-(ss)mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.z, r3.w
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r2.y, r2.y
-mul.f r3.x, r3.x, r3.y
-mul.f r2.w, r2.w, r3.y
-mul.f r2.x, r2.x, r3.y
-mov.f32f32 r6.y, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.x, r2.x
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r4.z) in8
+@in(r4.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@const(c13.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r6.x
+absneg.f r0.y, (neg)c5.x
+mul.f r0.z, c8.y, r6.x
+mul.f r0.w, c8.x, r6.x
+mul.f r1.x, r0.x, r0.x
+add.f r1.y, c4.y, (neg)r6.y
+mul.f r1.z, r2.x, r0.y
+absneg.f r1.w, (neg)c5.y
+mad.f32 r0.z, c9.y, r6.y, r0.z
+mad.f32 r1.x, r1.y, r1.y, r1.x
+add.f r2.w, c4.z, (neg)r6.z
+mad.f32 r1.z, r2.y, r1.w, r1.z
+absneg.f r3.x, (neg)c5.z
+mad.f32 r0.z, c10.y, r6.z, r0.z
+mad.f32 r1.x, r2.w, r2.w, r1.x
+mad.f32 r0.w, c9.x, r6.y, r0.w
+mad.f32 r1.z, r2.z, r3.x, r1.z
+mul.f r3.z, c8.w, r6.x
+mul.f r3.w, c8.z, r6.x
+add.f r0.z, r0.z, c11.y
+mad.f32 r0.w, c10.x, r6.z, r0.w
+rsq r1.x, r1.x
+(ss)mov.f32f32 r4.x, r1.x
+mad.f32 r0.x, r0.x, r1.x, r0.y
+max.f r0.y, c13.x, r1.z
+mul.f r3.y, r0.z, c12.y
+mad.f32 r0.z, r1.y, r4.x, r1.w
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.y, r2.w, r4.x, r3.x
nop
-mov.f32f32 r7.z, r3.x
-mov.f32f32 r7.y, r2.w
-mov.f32f32 r7.x, r2.x
-mov.f32f32 r6.x, r2.y
-mul.f r2.x, r2.z, c12.y
-add.f r2.y, r3.z, c11.x
-mad.f32 r2.z, c1.w, r0.y, r4.y
-mul.f r2.w, c0.z, r0.x
-mov.f32f32 r5.y, r2.x
-mul.f r2.x, r2.y, c12.x
-mad.f32 r2.y, c2.w, r0.z, r2.z
-mad.f32 r2.z, c1.z, r0.y, r2.w
-mul.f r2.w, c0.y, r0.x
-mov.f32f32 r5.x, r2.x
-add.f r2.x, r2.y, c3.w
-mad.f32 r2.y, c2.z, r0.z, r2.z
-mad.f32 r2.z, c1.y, r0.y, r2.w
-mul.f r2.w, c0.x, r0.x
-mov.f32f32 r3.w, r2.x
-add.f r2.x, r2.y, c3.z
-mad.f32 r2.y, c2.y, r0.z, r2.z
-mad.f32 r0.y, c1.x, r0.y, r2.w
-mov.f32f32 r2.z, (0.000000)
-mov.f32f32 r3.z, r2.x
-add.f r2.x, r2.y, c3.y
-mad.f32 r0.y, c2.x, r0.z, r0.y
-mov.f32f32 r7.w, r2.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.y, r2.x
-add.f r0.y, r0.y, c3.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.z, c7.x, r0.z, c7.y
-mad.f32 r0.x, c7.x, r0.x, c7.y
+mov.f32f32 r1.w, r0.z
+mul.f r1.x, r1.x, r1.x
+mov.f32f32 r2.w, r1.y
mov.f32f32 r3.x, r0.y
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r0.x, (0.000000)
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.w, r0.x
-mov.f32f32 r4.z, r0.y
-mov.f32f32 r4.y, r0.z
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r0.x, (0.000000)
-(rpt2)nop
-mov.f32f32 r6.w, r0.x
+mad.f32 r0.z, r0.z, r1.w, r1.x
+mul.f r1.x, r0.y, c6.x
+mad.f32 r0.y, r1.y, r2.w, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+add.f r0.z, r0.w, c11.x
+mad.f32 r0.w, c9.w, r6.y, r3.z
+mad.f32 r3.z, c9.z, r6.y, r3.w
+mul.f r4.x, c0.w, r6.x
+rsq r0.y, r0.y
+(ss)mov.f32f32 r3.w, r0.y
+mul.f r5.x, r0.x, r0.y
+mul.f r3.x, r0.z, c12.x
+mad.f32 r0.x, c10.w, r6.z, r0.w
+mul.f r5.z, r2.w, r3.w
+mul.f r5.y, r1.w, r3.w
+(ss)mad.f32 r0.y, c10.z, r6.z, r3.z
+add.f r3.w, r0.x, c11.w
+mad.f32 r0.x, c1.w, r6.y, r4.x
+mul.f r0.z, c0.z, r6.x
+add.f r3.z, r0.y, c11.z
+mad.f32 r0.x, c2.w, r6.z, r0.x
+mad.f32 r0.y, c1.z, r6.y, r0.z
+mul.f r0.z, c0.y, r6.x
+mul.f r1.w, c0.x, r6.x
+add.f r0.w, r0.x, c3.w
+mad.f32 r0.x, c2.z, r6.z, r0.y
+mad.f32 r0.y, c1.y, r6.y, r0.z
+mad.f32 r1.w, c1.x, r6.y, r1.w
+mad.f32 r0.y, c2.y, r6.z, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r6.z, r1.w
+nop
+add.f r0.y, r0.y, c3.y
+mov.f32f32 r5.w, (0.000000)
+add.f r0.x, r0.x, c3.x
+mad.f32 r4.y, c7.x, r6.z, c7.y
+mad.f32 r4.x, c7.x, r6.x, c7.y
+mov.f32f32 r2.w, (0.000000)
+mov.f32f32 r1.w, (0.000000)
end
-; VERT: outputs: r3.x (0:0) r6.x (5:9) r4.x (5:10) r5.x (5:11) r1.w (5:12) r7.x (5:13)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 118 instructions, 0 half, 8 full
-; pos: r3.x
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
+; VERT: inputs: r6.x (0:0,cm=7,il=8,b=0) r2.x (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
+; VERT: 75 instructions, 0 half, 7 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm
index a7cfae4..22ca830 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-21.asm
@@ -2,37 +2,28 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r0.z, r0.z, s#0, t#0
-(sy)cmps.f.lt r0.x, r1.y, c0.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r1.w, r1.x
-mov.f32f32 r1.z, r0.w
-cov.u32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.z
-cmps.f.ne p0.x, r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, (0.000000)
+(rpt4)nop
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)cmps.f.lt r0.y, r1.w, c0.x
+(rpt2)nop
+cov.u32f32 r0.y, r0.y
+(rpt2)nop
+cmps.f.ne p0.x, r0.y, r0.x
(rpt5)nop
kill p0.x
end
nop
nop
-; FRAG: outputs: r1.y (1:0)
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
-; pos (bary): r0.x
-; color: r1.y
+; FRAG: 26 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm
index 08421de..f362ccd 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-22.asm
@@ -3,170 +3,134 @@
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-@out(r2.x) out4
-@out(r2.y) out5
-@out(r2.z) out6
-@out(r2.w) out7
-(sy)(ss)floor.f r1.y, c11.z
+@in(r1.x) in4
+@in(r1.y) in5
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r0.w, c11.z
floor.f r1.z, c11.x
absneg.f r1.w, (abs)c14.x
absneg.f r2.x, (abs)c14.y
-add.f r1.y, c11.z, (neg)r1.y
+add.f r0.w, c11.z, (neg)r0.w
add.f r1.z, c11.x, (neg)r1.z
mul.f r2.y, r0.x, r0.z
add.f r1.w, r1.w, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r0.y, c17.x
-mov.f32f32 r1.w, r1.w
-max.f r1.y, r1.y, c15.y
+max.f r0.w, r0.w, c15.y
max.f r1.z, r1.z, c15.y
-mul.f r2.x, r2.y, r2.x
-mul.f r2.y, c13.x, r1.w
-min.f r1.y, r1.y, c19.y
+mul.f r2.x, r0.y, c17.x
+mul.f r2.z, c13.x, r1.w
+min.f r0.w, r0.w, c19.y
min.f r1.z, r1.z, c19.y
-mul.f r2.z, c8.z, r0.x
-mov.f32f32 r2.x, r2.x
-max.f r1.y, r1.y, c15.x
+mul.f r2.w, c8.z, r0.x
+mov.f32f32 r1.w, r1.w
+max.f r0.w, r0.w, c15.x
max.f r1.z, r1.z, c15.x
-mad.f32 r2.z, c9.z, r0.y, r2.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c10.z, r0.z, r2.z
-mul.f r2.w, c8.x, r0.x
-mul.f r1.y, c13.x, r1.y
-mad.f32 r2.w, c9.x, r0.y, r2.w
-add.f r2.z, r2.z, c11.z
-mad.f32 r2.w, c10.x, r0.z, r2.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.y, c15.w, r2.y, r2.z
-mov.f32f32 r2.x, r2.x
-add.f r2.w, r2.w, c11.x
-mad.f32 r1.y, c15.z, r1.y, c11.x
-mad.f32 r1.z, c13.x, r1.z, r2.w
-mov.f32f32 r2.y, r2.y
-max.f r2.x, r2.x, c15.y
-mov.f32f32 r1.y, r1.y
+mul.f r3.x, c8.x, r0.x
+mad.f32 r2.w, c9.z, r0.y, r2.w
+mul.f r0.w, c13.x, r0.w
+mad.f32 r3.x, c9.x, r0.y, r3.x
+mad.f32 r2.w, c10.z, r0.z, r2.w
+mad.f32 r3.x, c10.x, r0.z, r3.x
+mad.f32 r0.w, c15.z, r0.w, c11.x
+max.f r1.w, r1.w, c17.z
+mul.f r2.x, r2.y, r2.x
+add.f r2.y, r3.x, c11.x
+add.f r0.w, r0.w, c16.x
+mad.f32 r1.z, c13.x, r1.z, r2.y
+add.f r2.w, r2.w, c11.z
+min.f r1.w, r1.w, c17.w
+floor.f r3.x, r0.w
add.f r1.z, r1.z, c16.x
-mov.f32f32 r2.y, r2.y
-min.f r2.x, r2.x, c19.y
-mov.f32f32 r1.y, r1.y
+mad.f32 r2.z, c15.w, r2.z, r2.w
+mov.f32f32 r3.y, r1.w
+add.f r0.w, r0.w, (neg)r3.x
floor.f r3.x, r1.z
-add.f r2.y, r2.y, c16.x
-min.f r2.x, r2.x, c17.y
-add.f r1.y, r1.y, c16.x
+add.f r2.z, r2.z, c16.x
+max.f r2.x, r2.x, c15.y
+mad.f32 r0.w, c16.y, r0.w, c16.z
add.f r1.z, r1.z, (neg)r3.x
-floor.f r3.x, r2.y
-mov.f32f32 r2.x, r2.x
-floor.f r3.y, r1.y
+floor.f r3.x, r2.z
+min.f r2.x, r2.x, c19.y
+absneg.f r0.w, (abs)r0.w
mad.f32 r1.z, c16.y, r1.z, c16.z
-add.f r2.y, r2.y, (neg)r3.x
-mul.f r3.x, r0.y, c18.x
-add.f r1.y, r1.y, (neg)r3.y
+add.f r2.z, r2.z, (neg)r3.x
+min.f r2.x, r2.x, c17.y
+mul.f r3.x, c16.y, r0.w
absneg.f r1.z, (abs)r1.z
-mad.f32 r2.y, c16.y, r2.y, c16.z
-mov.f32f32 r3.x, r3.x
-mad.f32 r1.y, c16.y, r1.y, c16.z
-mul.f r3.y, c16.y, r1.z
-absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c16.y, r2.z, c16.z
+mul.f r0.w, r0.w, r0.w
+add.f r3.x, c16.w, (neg)r3.x
+mul.f r3.z, c16.y, r1.z
+absneg.f r2.z, (abs)r2.z
mul.f r1.z, r1.z, r1.z
-absneg.f r1.y, (abs)r1.y
-add.f r3.y, c16.w, (neg)r3.y
-mul.f r3.z, c16.y, r2.y
-mul.f r2.y, r2.y, r2.y
-mul.f r3.w, c16.y, r1.y
-mul.f r1.z, r1.z, r3.y
-add.f r3.y, c16.w, (neg)r3.z
-mul.f r1.y, r1.y, r1.y
-add.f r3.z, c16.w, (neg)r3.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.y, r2.y, r3.y
-mov.f32f32 r3.x, r3.x
-mul.f r1.y, r1.y, r3.z
-mul.f r1.z, r1.z, r2.x
-mov.f32f32 r2.y, r2.y
+mul.f r0.w, r0.w, r3.x
+mul.f r3.x, r0.y, c18.x
+add.f r3.z, c16.w, (neg)r3.z
+mul.f r3.w, c16.y, r2.z
+mul.f r2.z, r2.z, r2.z
max.f r3.x, r3.x, c15.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r2.y, r2.x
-min.f r2.y, r3.x, c19.y
-max.f r1.w, r1.w, c17.z
-mul.f r3.x, c8.y, r0.x
+mul.f r1.z, r1.z, r3.z
+mov.f32f32 r3.z, r2.x
+add.f r3.w, c16.w, (neg)r3.w
+min.f r3.x, r3.x, c19.y
+mul.f r4.x, c8.y, r0.x
mul.f r0.x, c8.w, r0.x
-min.f r2.y, r2.y, c15.w
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.x, c9.y, r0.y, r3.x
+mad.f32 r4.x, c9.y, r0.y, r4.x
+min.f r3.x, r3.x, c15.w
+mul.f r1.z, r1.z, r3.z
+mul.f r2.z, r2.z, r3.w
+mad.f32 r3.z, c10.y, r0.z, r4.x
+mul.f r0.w, r0.w, r3.x
+mov.f32f32 r3.x, r1.z
+mul.f r2.x, r2.z, r2.x
+add.f r2.z, r3.z, c11.y
+mov.f32f32 r3.z, r0.w
+mad.f32 r0.w, c14.y, r0.w, r1.z
+mad.f32 r1.z, r2.x, r1.w, r2.w
+mad.f32 r1.w, r2.x, r3.y, r2.y
+mad.f32 r2.y, c14.x, r3.z, r3.x
+mad.f32 r2.x, r2.x, r3.y, r2.z
mad.f32 r0.x, c9.w, r0.y, r0.x
-mov.f32f32 r0.y, r2.y
-min.f r1.w, r1.w, c17.w
-mad.f32 r2.y, c10.y, r0.z, r3.x
-mad.f32 r0.x, c10.w, r0.z, r0.x
-mul.f r0.y, r1.y, r0.y
-mov.f32f32 r0.z, r1.w
-add.f r1.y, r2.y, c11.y
-add.f r0.x, r0.x, c11.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r2.x, r0.z, r2.w
-mad.f32 r1.y, r2.x, r0.z, r1.y
-mad.f32 r0.z, r2.x, r0.z, r2.z
-mad.f32 r2.x, c14.x, r0.y, r1.z
-mad.f32 r0.y, c14.y, r0.y, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.x, r0.w
-add.f r0.w, r1.w, r1.z
-add.f r0.y, r0.z, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mul.f r0.w, c0.w, r0.z
-mul.f r1.x, c0.z, r0.z
-mad.f32 r0.w, c1.w, r1.y, r0.w
-mad.f32 r1.x, c1.z, r1.y, r1.x
-mul.f r1.z, c0.y, r0.z
-mul.f r0.z, c0.x, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, c2.w, r0.y, r0.w
-mad.f32 r1.x, c2.z, r0.y, r1.x
-mad.f32 r1.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c3.z, r0.x, r1.x
-mad.f32 r1.z, c1.y, r1.y, r1.z
-mad.f32 r0.z, c1.x, r1.y, r0.z
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r3.z, r1.w
-mad.f32 r1.y, c2.y, r0.y, r1.z
-mad.f32 r0.y, c2.x, r0.y, r0.z
-mad.f32 r0.z, c3.y, r0.x, r1.y
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
+nop
+add.f r0.y, r1.w, r2.y
+add.f r2.y, r1.z, r0.w
(rpt1)nop
-mov.f32f32 r3.y, r0.y
-mov.f32f32 r3.x, r0.x
+mov.f32f32 r0.w, r0.y
+mul.f r0.y, c0.x, r0.y
+mov.f32f32 r2.z, r2.y
+mad.f32 r0.x, c10.w, r0.z, r0.x
+mul.f r0.z, c0.w, r0.w
+mul.f r1.z, c0.z, r0.w
+mad.f32 r0.z, c1.w, r2.x, r0.z
+mad.f32 r1.z, c1.z, r2.x, r1.z
+mad.f32 r1.w, c2.w, r2.z, r0.z
+add.f r2.w, r0.x, c11.w
+mad.f32 r1.z, c2.z, r2.z, r1.z
+mul.f r0.x, c0.y, r0.w
+mad.f32 r0.y, c1.x, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.w, r1.w
+mad.f32 r0.z, c3.z, r2.w, r1.z
+mad.f32 r0.x, c1.y, r2.x, r0.x
+mad.f32 r0.y, c2.x, r2.y, r0.y
+mad.f32 r2.x, c2.y, r2.z, r0.x
+mad.f32 r0.x, c3.x, r2.w, r0.y
+mad.f32 r0.y, c3.y, r2.w, r2.x
end
nop
nop
nop
-; VERT: outputs: r3.x (0:0) r2.x (5:9)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0)
-; VERT: 152 instructions, 0 half, 4 full
-; pos: r3.x
+; VERT: outputs: r0.x (0:0) r1.x (5:9)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0)
+; VERT: 110 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm
index a7cfae4..22ca830 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-23.asm
@@ -2,37 +2,28 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r0.z, r0.z, s#0, t#0
-(sy)cmps.f.lt r0.x, r1.y, c0.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r1.w, r1.x
-mov.f32f32 r1.z, r0.w
-cov.u32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.z
-cmps.f.ne p0.x, r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, (0.000000)
+(rpt4)nop
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)cmps.f.lt r0.y, r1.w, c0.x
+(rpt2)nop
+cov.u32f32 r0.y, r0.y
+(rpt2)nop
+cmps.f.ne p0.x, r0.y, r0.x
(rpt5)nop
kill p0.x
end
nop
nop
-; FRAG: outputs: r1.y (1:0)
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
-; pos (bary): r0.x
-; color: r1.y
+; FRAG: 26 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm
index 08421de..f362ccd 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-24.asm
@@ -3,170 +3,134 @@
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-@out(r2.x) out4
-@out(r2.y) out5
-@out(r2.z) out6
-@out(r2.w) out7
-(sy)(ss)floor.f r1.y, c11.z
+@in(r1.x) in4
+@in(r1.y) in5
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r0.w, c11.z
floor.f r1.z, c11.x
absneg.f r1.w, (abs)c14.x
absneg.f r2.x, (abs)c14.y
-add.f r1.y, c11.z, (neg)r1.y
+add.f r0.w, c11.z, (neg)r0.w
add.f r1.z, c11.x, (neg)r1.z
mul.f r2.y, r0.x, r0.z
add.f r1.w, r1.w, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r0.y, c17.x
-mov.f32f32 r1.w, r1.w
-max.f r1.y, r1.y, c15.y
+max.f r0.w, r0.w, c15.y
max.f r1.z, r1.z, c15.y
-mul.f r2.x, r2.y, r2.x
-mul.f r2.y, c13.x, r1.w
-min.f r1.y, r1.y, c19.y
+mul.f r2.x, r0.y, c17.x
+mul.f r2.z, c13.x, r1.w
+min.f r0.w, r0.w, c19.y
min.f r1.z, r1.z, c19.y
-mul.f r2.z, c8.z, r0.x
-mov.f32f32 r2.x, r2.x
-max.f r1.y, r1.y, c15.x
+mul.f r2.w, c8.z, r0.x
+mov.f32f32 r1.w, r1.w
+max.f r0.w, r0.w, c15.x
max.f r1.z, r1.z, c15.x
-mad.f32 r2.z, c9.z, r0.y, r2.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c10.z, r0.z, r2.z
-mul.f r2.w, c8.x, r0.x
-mul.f r1.y, c13.x, r1.y
-mad.f32 r2.w, c9.x, r0.y, r2.w
-add.f r2.z, r2.z, c11.z
-mad.f32 r2.w, c10.x, r0.z, r2.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.y, c15.w, r2.y, r2.z
-mov.f32f32 r2.x, r2.x
-add.f r2.w, r2.w, c11.x
-mad.f32 r1.y, c15.z, r1.y, c11.x
-mad.f32 r1.z, c13.x, r1.z, r2.w
-mov.f32f32 r2.y, r2.y
-max.f r2.x, r2.x, c15.y
-mov.f32f32 r1.y, r1.y
+mul.f r3.x, c8.x, r0.x
+mad.f32 r2.w, c9.z, r0.y, r2.w
+mul.f r0.w, c13.x, r0.w
+mad.f32 r3.x, c9.x, r0.y, r3.x
+mad.f32 r2.w, c10.z, r0.z, r2.w
+mad.f32 r3.x, c10.x, r0.z, r3.x
+mad.f32 r0.w, c15.z, r0.w, c11.x
+max.f r1.w, r1.w, c17.z
+mul.f r2.x, r2.y, r2.x
+add.f r2.y, r3.x, c11.x
+add.f r0.w, r0.w, c16.x
+mad.f32 r1.z, c13.x, r1.z, r2.y
+add.f r2.w, r2.w, c11.z
+min.f r1.w, r1.w, c17.w
+floor.f r3.x, r0.w
add.f r1.z, r1.z, c16.x
-mov.f32f32 r2.y, r2.y
-min.f r2.x, r2.x, c19.y
-mov.f32f32 r1.y, r1.y
+mad.f32 r2.z, c15.w, r2.z, r2.w
+mov.f32f32 r3.y, r1.w
+add.f r0.w, r0.w, (neg)r3.x
floor.f r3.x, r1.z
-add.f r2.y, r2.y, c16.x
-min.f r2.x, r2.x, c17.y
-add.f r1.y, r1.y, c16.x
+add.f r2.z, r2.z, c16.x
+max.f r2.x, r2.x, c15.y
+mad.f32 r0.w, c16.y, r0.w, c16.z
add.f r1.z, r1.z, (neg)r3.x
-floor.f r3.x, r2.y
-mov.f32f32 r2.x, r2.x
-floor.f r3.y, r1.y
+floor.f r3.x, r2.z
+min.f r2.x, r2.x, c19.y
+absneg.f r0.w, (abs)r0.w
mad.f32 r1.z, c16.y, r1.z, c16.z
-add.f r2.y, r2.y, (neg)r3.x
-mul.f r3.x, r0.y, c18.x
-add.f r1.y, r1.y, (neg)r3.y
+add.f r2.z, r2.z, (neg)r3.x
+min.f r2.x, r2.x, c17.y
+mul.f r3.x, c16.y, r0.w
absneg.f r1.z, (abs)r1.z
-mad.f32 r2.y, c16.y, r2.y, c16.z
-mov.f32f32 r3.x, r3.x
-mad.f32 r1.y, c16.y, r1.y, c16.z
-mul.f r3.y, c16.y, r1.z
-absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c16.y, r2.z, c16.z
+mul.f r0.w, r0.w, r0.w
+add.f r3.x, c16.w, (neg)r3.x
+mul.f r3.z, c16.y, r1.z
+absneg.f r2.z, (abs)r2.z
mul.f r1.z, r1.z, r1.z
-absneg.f r1.y, (abs)r1.y
-add.f r3.y, c16.w, (neg)r3.y
-mul.f r3.z, c16.y, r2.y
-mul.f r2.y, r2.y, r2.y
-mul.f r3.w, c16.y, r1.y
-mul.f r1.z, r1.z, r3.y
-add.f r3.y, c16.w, (neg)r3.z
-mul.f r1.y, r1.y, r1.y
-add.f r3.z, c16.w, (neg)r3.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.y, r2.y, r3.y
-mov.f32f32 r3.x, r3.x
-mul.f r1.y, r1.y, r3.z
-mul.f r1.z, r1.z, r2.x
-mov.f32f32 r2.y, r2.y
+mul.f r0.w, r0.w, r3.x
+mul.f r3.x, r0.y, c18.x
+add.f r3.z, c16.w, (neg)r3.z
+mul.f r3.w, c16.y, r2.z
+mul.f r2.z, r2.z, r2.z
max.f r3.x, r3.x, c15.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r2.y, r2.x
-min.f r2.y, r3.x, c19.y
-max.f r1.w, r1.w, c17.z
-mul.f r3.x, c8.y, r0.x
+mul.f r1.z, r1.z, r3.z
+mov.f32f32 r3.z, r2.x
+add.f r3.w, c16.w, (neg)r3.w
+min.f r3.x, r3.x, c19.y
+mul.f r4.x, c8.y, r0.x
mul.f r0.x, c8.w, r0.x
-min.f r2.y, r2.y, c15.w
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.x, c9.y, r0.y, r3.x
+mad.f32 r4.x, c9.y, r0.y, r4.x
+min.f r3.x, r3.x, c15.w
+mul.f r1.z, r1.z, r3.z
+mul.f r2.z, r2.z, r3.w
+mad.f32 r3.z, c10.y, r0.z, r4.x
+mul.f r0.w, r0.w, r3.x
+mov.f32f32 r3.x, r1.z
+mul.f r2.x, r2.z, r2.x
+add.f r2.z, r3.z, c11.y
+mov.f32f32 r3.z, r0.w
+mad.f32 r0.w, c14.y, r0.w, r1.z
+mad.f32 r1.z, r2.x, r1.w, r2.w
+mad.f32 r1.w, r2.x, r3.y, r2.y
+mad.f32 r2.y, c14.x, r3.z, r3.x
+mad.f32 r2.x, r2.x, r3.y, r2.z
mad.f32 r0.x, c9.w, r0.y, r0.x
-mov.f32f32 r0.y, r2.y
-min.f r1.w, r1.w, c17.w
-mad.f32 r2.y, c10.y, r0.z, r3.x
-mad.f32 r0.x, c10.w, r0.z, r0.x
-mul.f r0.y, r1.y, r0.y
-mov.f32f32 r0.z, r1.w
-add.f r1.y, r2.y, c11.y
-add.f r0.x, r0.x, c11.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r2.x, r0.z, r2.w
-mad.f32 r1.y, r2.x, r0.z, r1.y
-mad.f32 r0.z, r2.x, r0.z, r2.z
-mad.f32 r2.x, c14.x, r0.y, r1.z
-mad.f32 r0.y, c14.y, r0.y, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.x, r0.w
-add.f r0.w, r1.w, r1.z
-add.f r0.y, r0.z, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mul.f r0.w, c0.w, r0.z
-mul.f r1.x, c0.z, r0.z
-mad.f32 r0.w, c1.w, r1.y, r0.w
-mad.f32 r1.x, c1.z, r1.y, r1.x
-mul.f r1.z, c0.y, r0.z
-mul.f r0.z, c0.x, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, c2.w, r0.y, r0.w
-mad.f32 r1.x, c2.z, r0.y, r1.x
-mad.f32 r1.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c3.z, r0.x, r1.x
-mad.f32 r1.z, c1.y, r1.y, r1.z
-mad.f32 r0.z, c1.x, r1.y, r0.z
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r3.z, r1.w
-mad.f32 r1.y, c2.y, r0.y, r1.z
-mad.f32 r0.y, c2.x, r0.y, r0.z
-mad.f32 r0.z, c3.y, r0.x, r1.y
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
+nop
+add.f r0.y, r1.w, r2.y
+add.f r2.y, r1.z, r0.w
(rpt1)nop
-mov.f32f32 r3.y, r0.y
-mov.f32f32 r3.x, r0.x
+mov.f32f32 r0.w, r0.y
+mul.f r0.y, c0.x, r0.y
+mov.f32f32 r2.z, r2.y
+mad.f32 r0.x, c10.w, r0.z, r0.x
+mul.f r0.z, c0.w, r0.w
+mul.f r1.z, c0.z, r0.w
+mad.f32 r0.z, c1.w, r2.x, r0.z
+mad.f32 r1.z, c1.z, r2.x, r1.z
+mad.f32 r1.w, c2.w, r2.z, r0.z
+add.f r2.w, r0.x, c11.w
+mad.f32 r1.z, c2.z, r2.z, r1.z
+mul.f r0.x, c0.y, r0.w
+mad.f32 r0.y, c1.x, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.w, r1.w
+mad.f32 r0.z, c3.z, r2.w, r1.z
+mad.f32 r0.x, c1.y, r2.x, r0.x
+mad.f32 r0.y, c2.x, r2.y, r0.y
+mad.f32 r2.x, c2.y, r2.z, r0.x
+mad.f32 r0.x, c3.x, r2.w, r0.y
+mad.f32 r0.y, c3.y, r2.w, r2.x
end
nop
nop
nop
-; VERT: outputs: r3.x (0:0) r2.x (5:9)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0)
-; VERT: 152 instructions, 0 half, 4 full
-; pos: r3.x
+; VERT: outputs: r0.x (0:0) r1.x (5:9)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0)
+; VERT: 110 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm
index a7cfae4..22ca830 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-25.asm
@@ -2,37 +2,28 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r0.z, r0.z, s#0, t#0
-(sy)cmps.f.lt r0.x, r1.y, c0.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r1.w, r1.x
-mov.f32f32 r1.z, r0.w
-cov.u32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.z
-cmps.f.ne p0.x, r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, (0.000000)
+(rpt4)nop
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)cmps.f.lt r0.y, r1.w, c0.x
+(rpt2)nop
+cov.u32f32 r0.y, r0.y
+(rpt2)nop
+cmps.f.ne p0.x, r0.y, r0.x
(rpt5)nop
kill p0.x
end
nop
nop
-; FRAG: outputs: r1.y (1:0)
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
-; pos (bary): r0.x
-; color: r1.y
+; FRAG: 26 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm
index a7cfae4..22ca830 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-27.asm
@@ -2,37 +2,28 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r0.z, r0.z, s#0, t#0
-(sy)cmps.f.lt r0.x, r1.y, c0.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r1.w, r1.x
-mov.f32f32 r1.z, r0.w
-cov.u32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.z
-cmps.f.ne p0.x, r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, (0.000000)
+(rpt4)nop
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)cmps.f.lt r0.y, r1.w, c0.x
+(rpt2)nop
+cov.u32f32 r0.y, r0.y
+(rpt2)nop
+cmps.f.ne p0.x, r0.y, r0.x
(rpt5)nop
kill p0.x
end
nop
nop
-; FRAG: outputs: r1.y (1:0)
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
-; pos (bary): r0.x
-; color: r1.y
+; FRAG: 26 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm
index 08421de..f362ccd 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-28.asm
@@ -3,170 +3,134 @@
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-@out(r2.x) out4
-@out(r2.y) out5
-@out(r2.z) out6
-@out(r2.w) out7
-(sy)(ss)floor.f r1.y, c11.z
+@in(r1.x) in4
+@in(r1.y) in5
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r0.w, c11.z
floor.f r1.z, c11.x
absneg.f r1.w, (abs)c14.x
absneg.f r2.x, (abs)c14.y
-add.f r1.y, c11.z, (neg)r1.y
+add.f r0.w, c11.z, (neg)r0.w
add.f r1.z, c11.x, (neg)r1.z
mul.f r2.y, r0.x, r0.z
add.f r1.w, r1.w, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r0.y, c17.x
-mov.f32f32 r1.w, r1.w
-max.f r1.y, r1.y, c15.y
+max.f r0.w, r0.w, c15.y
max.f r1.z, r1.z, c15.y
-mul.f r2.x, r2.y, r2.x
-mul.f r2.y, c13.x, r1.w
-min.f r1.y, r1.y, c19.y
+mul.f r2.x, r0.y, c17.x
+mul.f r2.z, c13.x, r1.w
+min.f r0.w, r0.w, c19.y
min.f r1.z, r1.z, c19.y
-mul.f r2.z, c8.z, r0.x
-mov.f32f32 r2.x, r2.x
-max.f r1.y, r1.y, c15.x
+mul.f r2.w, c8.z, r0.x
+mov.f32f32 r1.w, r1.w
+max.f r0.w, r0.w, c15.x
max.f r1.z, r1.z, c15.x
-mad.f32 r2.z, c9.z, r0.y, r2.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c10.z, r0.z, r2.z
-mul.f r2.w, c8.x, r0.x
-mul.f r1.y, c13.x, r1.y
-mad.f32 r2.w, c9.x, r0.y, r2.w
-add.f r2.z, r2.z, c11.z
-mad.f32 r2.w, c10.x, r0.z, r2.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.y, c15.w, r2.y, r2.z
-mov.f32f32 r2.x, r2.x
-add.f r2.w, r2.w, c11.x
-mad.f32 r1.y, c15.z, r1.y, c11.x
-mad.f32 r1.z, c13.x, r1.z, r2.w
-mov.f32f32 r2.y, r2.y
-max.f r2.x, r2.x, c15.y
-mov.f32f32 r1.y, r1.y
+mul.f r3.x, c8.x, r0.x
+mad.f32 r2.w, c9.z, r0.y, r2.w
+mul.f r0.w, c13.x, r0.w
+mad.f32 r3.x, c9.x, r0.y, r3.x
+mad.f32 r2.w, c10.z, r0.z, r2.w
+mad.f32 r3.x, c10.x, r0.z, r3.x
+mad.f32 r0.w, c15.z, r0.w, c11.x
+max.f r1.w, r1.w, c17.z
+mul.f r2.x, r2.y, r2.x
+add.f r2.y, r3.x, c11.x
+add.f r0.w, r0.w, c16.x
+mad.f32 r1.z, c13.x, r1.z, r2.y
+add.f r2.w, r2.w, c11.z
+min.f r1.w, r1.w, c17.w
+floor.f r3.x, r0.w
add.f r1.z, r1.z, c16.x
-mov.f32f32 r2.y, r2.y
-min.f r2.x, r2.x, c19.y
-mov.f32f32 r1.y, r1.y
+mad.f32 r2.z, c15.w, r2.z, r2.w
+mov.f32f32 r3.y, r1.w
+add.f r0.w, r0.w, (neg)r3.x
floor.f r3.x, r1.z
-add.f r2.y, r2.y, c16.x
-min.f r2.x, r2.x, c17.y
-add.f r1.y, r1.y, c16.x
+add.f r2.z, r2.z, c16.x
+max.f r2.x, r2.x, c15.y
+mad.f32 r0.w, c16.y, r0.w, c16.z
add.f r1.z, r1.z, (neg)r3.x
-floor.f r3.x, r2.y
-mov.f32f32 r2.x, r2.x
-floor.f r3.y, r1.y
+floor.f r3.x, r2.z
+min.f r2.x, r2.x, c19.y
+absneg.f r0.w, (abs)r0.w
mad.f32 r1.z, c16.y, r1.z, c16.z
-add.f r2.y, r2.y, (neg)r3.x
-mul.f r3.x, r0.y, c18.x
-add.f r1.y, r1.y, (neg)r3.y
+add.f r2.z, r2.z, (neg)r3.x
+min.f r2.x, r2.x, c17.y
+mul.f r3.x, c16.y, r0.w
absneg.f r1.z, (abs)r1.z
-mad.f32 r2.y, c16.y, r2.y, c16.z
-mov.f32f32 r3.x, r3.x
-mad.f32 r1.y, c16.y, r1.y, c16.z
-mul.f r3.y, c16.y, r1.z
-absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c16.y, r2.z, c16.z
+mul.f r0.w, r0.w, r0.w
+add.f r3.x, c16.w, (neg)r3.x
+mul.f r3.z, c16.y, r1.z
+absneg.f r2.z, (abs)r2.z
mul.f r1.z, r1.z, r1.z
-absneg.f r1.y, (abs)r1.y
-add.f r3.y, c16.w, (neg)r3.y
-mul.f r3.z, c16.y, r2.y
-mul.f r2.y, r2.y, r2.y
-mul.f r3.w, c16.y, r1.y
-mul.f r1.z, r1.z, r3.y
-add.f r3.y, c16.w, (neg)r3.z
-mul.f r1.y, r1.y, r1.y
-add.f r3.z, c16.w, (neg)r3.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.y, r2.y, r3.y
-mov.f32f32 r3.x, r3.x
-mul.f r1.y, r1.y, r3.z
-mul.f r1.z, r1.z, r2.x
-mov.f32f32 r2.y, r2.y
+mul.f r0.w, r0.w, r3.x
+mul.f r3.x, r0.y, c18.x
+add.f r3.z, c16.w, (neg)r3.z
+mul.f r3.w, c16.y, r2.z
+mul.f r2.z, r2.z, r2.z
max.f r3.x, r3.x, c15.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r2.y, r2.x
-min.f r2.y, r3.x, c19.y
-max.f r1.w, r1.w, c17.z
-mul.f r3.x, c8.y, r0.x
+mul.f r1.z, r1.z, r3.z
+mov.f32f32 r3.z, r2.x
+add.f r3.w, c16.w, (neg)r3.w
+min.f r3.x, r3.x, c19.y
+mul.f r4.x, c8.y, r0.x
mul.f r0.x, c8.w, r0.x
-min.f r2.y, r2.y, c15.w
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.x, c9.y, r0.y, r3.x
+mad.f32 r4.x, c9.y, r0.y, r4.x
+min.f r3.x, r3.x, c15.w
+mul.f r1.z, r1.z, r3.z
+mul.f r2.z, r2.z, r3.w
+mad.f32 r3.z, c10.y, r0.z, r4.x
+mul.f r0.w, r0.w, r3.x
+mov.f32f32 r3.x, r1.z
+mul.f r2.x, r2.z, r2.x
+add.f r2.z, r3.z, c11.y
+mov.f32f32 r3.z, r0.w
+mad.f32 r0.w, c14.y, r0.w, r1.z
+mad.f32 r1.z, r2.x, r1.w, r2.w
+mad.f32 r1.w, r2.x, r3.y, r2.y
+mad.f32 r2.y, c14.x, r3.z, r3.x
+mad.f32 r2.x, r2.x, r3.y, r2.z
mad.f32 r0.x, c9.w, r0.y, r0.x
-mov.f32f32 r0.y, r2.y
-min.f r1.w, r1.w, c17.w
-mad.f32 r2.y, c10.y, r0.z, r3.x
-mad.f32 r0.x, c10.w, r0.z, r0.x
-mul.f r0.y, r1.y, r0.y
-mov.f32f32 r0.z, r1.w
-add.f r1.y, r2.y, c11.y
-add.f r0.x, r0.x, c11.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r2.x, r0.z, r2.w
-mad.f32 r1.y, r2.x, r0.z, r1.y
-mad.f32 r0.z, r2.x, r0.z, r2.z
-mad.f32 r2.x, c14.x, r0.y, r1.z
-mad.f32 r0.y, c14.y, r0.y, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.x, r0.w
-add.f r0.w, r1.w, r1.z
-add.f r0.y, r0.z, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mul.f r0.w, c0.w, r0.z
-mul.f r1.x, c0.z, r0.z
-mad.f32 r0.w, c1.w, r1.y, r0.w
-mad.f32 r1.x, c1.z, r1.y, r1.x
-mul.f r1.z, c0.y, r0.z
-mul.f r0.z, c0.x, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, c2.w, r0.y, r0.w
-mad.f32 r1.x, c2.z, r0.y, r1.x
-mad.f32 r1.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c3.z, r0.x, r1.x
-mad.f32 r1.z, c1.y, r1.y, r1.z
-mad.f32 r0.z, c1.x, r1.y, r0.z
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r3.z, r1.w
-mad.f32 r1.y, c2.y, r0.y, r1.z
-mad.f32 r0.y, c2.x, r0.y, r0.z
-mad.f32 r0.z, c3.y, r0.x, r1.y
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
+nop
+add.f r0.y, r1.w, r2.y
+add.f r2.y, r1.z, r0.w
(rpt1)nop
-mov.f32f32 r3.y, r0.y
-mov.f32f32 r3.x, r0.x
+mov.f32f32 r0.w, r0.y
+mul.f r0.y, c0.x, r0.y
+mov.f32f32 r2.z, r2.y
+mad.f32 r0.x, c10.w, r0.z, r0.x
+mul.f r0.z, c0.w, r0.w
+mul.f r1.z, c0.z, r0.w
+mad.f32 r0.z, c1.w, r2.x, r0.z
+mad.f32 r1.z, c1.z, r2.x, r1.z
+mad.f32 r1.w, c2.w, r2.z, r0.z
+add.f r2.w, r0.x, c11.w
+mad.f32 r1.z, c2.z, r2.z, r1.z
+mul.f r0.x, c0.y, r0.w
+mad.f32 r0.y, c1.x, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.w, r1.w
+mad.f32 r0.z, c3.z, r2.w, r1.z
+mad.f32 r0.x, c1.y, r2.x, r0.x
+mad.f32 r0.y, c2.x, r2.y, r0.y
+mad.f32 r2.x, c2.y, r2.z, r0.x
+mad.f32 r0.x, c3.x, r2.w, r0.y
+mad.f32 r0.y, c3.y, r2.w, r2.x
end
nop
nop
nop
-; VERT: outputs: r3.x (0:0) r2.x (5:9)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0)
-; VERT: 152 instructions, 0 half, 4 full
-; pos: r3.x
+; VERT: outputs: r0.x (0:0) r1.x (5:9)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0)
+; VERT: 110 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm
index 6eeb0e0..e298c69 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-29.asm
@@ -1,32 +1,23 @@
; options:
; FRAG: new compiler
-@out(r0.w) out0
-@out(r1.x) out1
-@out(r1.y) out2
-@out(r1.z) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c9.x
-mov.f32f32 r0.y, c9.w
-mov.f32f32 r0.z, c9.y
-nop
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
+mov.f32f32 r0.y, c9.y
+mov.f32f32 r0.z, c9.w
(rpt5)nop
-sam.p (f32)(xyzw)r0.x, r0.w, s#0, t#0
-(sy)(ss)mul.f r0.w, r0.w, c4.w
-mul.f r0.z, r0.z, c4.z
-mul.f r0.y, r0.y, c4.y
-mul.f r0.x, r0.x, c4.x
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.w, r0.x
+sam.p (f32)(xyzw)r0.x, r0.x, s#0, t#0
+(sy)mul.f r1.w, r0.w, c4.w
+mul.f r1.z, r0.z, c4.z
+mul.f r1.y, r0.y, c4.y
+mul.f r1.x, r0.x, c4.x
end
nop
nop
-; FRAG: outputs: r0.w (1:0)
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 23 instructions, 0 half, 2 full
-; pos (bary): r0.x
-; color: r0.w
+; FRAG: 15 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm
index 1b081fa..eb8f852 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-30.asm
@@ -4,210 +4,135 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r4.x) out0
-@out(r4.y) out1
-@out(r4.z) out2
-@out(r4.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c8.x) 0x3f000000, 0x00000000, 0x3f800000, 0xba03126f
+@const(c9.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3fb8aa65
+@const(c10.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 0, r1.x
add.f r0.y, r0.w, c8.y
bary.f r0.w, 1, r1.x
bary.f r1.z, 4, r1.x
-add.f r1.w, r0.x, c9.x
-bary.f r2.x, 6, r1.x
-bary.f r2.y, 2, r1.x
-add.f r2.z, r0.w, c9.x
-floor.f r2.w, r1.w
+add.f r2.x, r0.x, c9.x
+bary.f r1.w, 5, r1.x
+add.f r2.y, r0.w, c9.x
+bary.f r2.z, 2, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c8.y
-mov.f32f32 r1.z, r1.z
-floor.f r3.x, r2.z
-add.f r1.w, r1.w, (neg)r2.w
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c8.w
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-mov.f32f32 r3.y, r1.z
-add.f r0.z, r2.z, (neg)r3.x
-mov.f32f32 r1.z, r1.w
+absneg.f r0.z, (neg)c6.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c8.z
+mul.f r0.z, r0.z, c6.x
+sam (f32)(w)r4.x, r1.z, s#1, t#1
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c8.x, r2.z
+add.f r2.z, c9.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r1.w, (neg)c6.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c8.x, r1.z
-add.f r2.w, c9.y, (neg)r1.z
-mul.f r1.w, r1.w, c6.x
-add.f r3.x, c9.y, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c8.x, r0.z
-mul.f r1.w, r1.w, r0.y
-mov.f32f32 r2.w, r2.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r2.z
-mul.f r0.y, r1.w, r0.y
-mul.f r1.w, r2.w, r3.x
-add.f r2.z, c9.x, r0.x
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c8.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c9.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c9.w
-add.f r3.z, c9.z, r0.w
-mul.f r2.z, r2.z, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r2.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-add.f r0.w, c9.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c11.y, (neg)r0.y
-mov.f32f32 r5.w, r3.w
-mul.f r3.z, r3.z, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c6.y
-mul.f r0.y, r0.y, c8.z
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.z, r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r4.z, r4.x
-add.f r2.y, r2.y, c8.w
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r3.w
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r4.w, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r5.z, r0.y
-sam.s (f32)(x)r0.y, r4.y, s#2, t#2
-(sy)mov.f32f32 r0.y, r0.y
+add.f r0.z, c9.x, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c3.z
+add.f r0.x, c9.z, r0.w
+mul.f r4.x, r0.z, c3.z
+add.f r0.z, c9.x, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c3.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c11.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#2, t#2
+add.f r0.z, c9.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#2, t#2
+mul.f r0.x, r0.x, c8.z
+add.f r0.w, r2.y, c8.z
+mul.f r0.y, r0.y, c6.y
+(ss)nop
+sam.s (f32)(x)r5.x, r5.w, s#2, t#2
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#2, t#2
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c10.y
+bary.f r2.x, 6, r1.x
+mul.f r0.y, r0.y, r5.x
max.f r0.x, r0.x, c8.y
-mov.f32f32 r7.x, r0.w
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
min.f r0.x, r0.x, c8.z
-sam.s (f32)(x)r1.w, r5.w, s#2, t#2
-nop
-(sy)mov.f32f32 r1.w, r1.w
-mul.f r0.y, r0.w, r0.y
-sam.s (f32)(x)r0.w, r5.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r0.w
-add.f r1.z, r1.z, c8.z
-add.f r2.y, c11.y, (neg)r0.x
-add.f r2.z, c11.y, (neg)r0.x
-add.f r3.z, c11.y, (neg)r0.x
-mul.f r3.x, r1.z, r3.x
-mul.f r2.y, r2.y, c5.z
-mul.f r2.z, r2.z, c5.y
-mul.f r3.w, r3.z, c5.x
-mov.f32f32 r3.x, r3.x
-sam.s (f32)(x)r3.z, r6.z, s#2, t#2
-add.f r0.z, r0.z, c8.z
-(sy)mov.f32f32 r3.z, r3.z
-bary.f r4.x, 5, r1.x
-mad.f32 r0.y, r3.x, r0.w, r0.y
-mul.f r0.w, r2.w, r0.z
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c8.y
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.z, r1.z, r0.z
-mov.f32f32 r1.z, r4.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.y, r0.w, r3.z, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r2.w, r2.x
-mov.f32f32 r0.y, r0.y
-bary.f r0.w, 7, r1.x
-mad.f32 r0.y, r0.z, r1.w, r0.y
-mov.f32f32 r0.z, c8.z
-bary.f r1.z, 10, r1.x
-(ss)nop
-sam (f32)(w)r4.x, r3.y, s#1, t#1
-(sy)cmps.f.lt r1.w, r4.w, c10.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.x, r4.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.w, r0.z
mul.f r0.y, c10.x, r0.y
-cov.u32f32 r0.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r3.x, r0.w
-mov.f32f32 r0.y, r0.y
-cmps.f.ne r0.z, r0.z, c8.y
-nop
-mov.f32f32 r0.w, c8.y
-bary.f r2.x, 9, r1.x
+bary.f r2.y, 7, r1.x
+add.f r0.w, c11.y, (neg)r0.x
+add.f r1.z, c11.y, (neg)r0.x
+mov.f32f32 r1.w, r0.y
+add.f r2.z, c11.y, (neg)r0.x
+(rpt1)nop
+sam (f32)(xyz)r2.w, r2.x, s#0, t#0
+(ss)bary.f r2.x, 10, r1.x
+bary.f r2.y, 9, r1.x
bary.f (ei)r1.x, 8, r1.x
-(ss)nop
-sam (f32)(xyz)r2.w, r2.w, s#0, t#0
-(sy)mul.f r1.y, r3.y, r1.z
-sel.b32 r0.z, r0.w, r0.z, r1.w
-mul.f r0.w, r3.x, r2.x
+mul.f r0.w, r0.w, c5.z
+(sy)mul.f r1.y, r3.y, r2.x
+mul.f r2.x, r3.x, r2.y
mul.f r1.x, r2.w, r1.x
-mul.f r1.y, r1.y, r0.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-mul.f r0.w, r0.w, r0.y
+mul.f r1.z, r1.z, c5.y
+mul.f r1.y, r1.y, r1.w
+mul.f r1.w, r2.x, r1.w
mad.f32 r1.y, c4.z, r3.y, r1.y
+mad.f32 r1.w, c4.y, r3.x, r1.w
mul.f r0.y, r1.x, r0.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, c4.y, r3.x, r0.w
-mul.f r1.x, r0.x, r1.x
+mul.f r1.x, r2.z, c5.x
+mul.f r1.y, r0.x, r1.y
+mul.f r1.w, r0.x, r1.w
mad.f32 r0.y, c4.x, r2.w, r0.y
-(rpt1)nop
-add.f r1.x, r1.x, r2.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r2.x, c8.y
+add.f r0.w, r1.y, r0.w
+add.f r1.y, r1.w, r1.z
nop
-mul.f r1.x, r1.x, r0.z
-mul.f r0.w, r0.x, r0.w
+sel.b32 r0.z, r2.x, r0.z, r4.w
mul.f r0.x, r0.x, r0.y
+mov.f32f32 r2.w, c8.z
nop
-mov.f32f32 r0.y, r1.x
-add.f r0.w, r0.w, r2.z
-add.f r0.x, r0.x, r3.w
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r0.z
-mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r4.z, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r4.y, r0.y
-mov.f32f32 r4.x, r0.x
+mul.f r2.z, r0.w, r0.z
+mul.f r2.y, r1.y, r0.z
+add.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, r0.z
end
nop
nop
nop
-; FRAG: outputs: r4.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r0.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1)
-; FRAG: 200 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r4.x
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1)
+; FRAG: 121 instructions, 0 half, 7 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm
index a1623b7..dc38031 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-32.asm
@@ -4,546 +4,368 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r4.w) out0
-@out(r5.x) out1
-@out(r5.y) out2
-@out(r5.z) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097
+@const(c14.x) 0x3cff9724, 0x40000000, 0xbf800000, 0xba03126f
+@const(c15.x) 0xbf000000, 0x3f800000, 0x3fb8aa65, 0x3de38866
+@const(c16.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 19, r1.x
bary.f r0.y, 0, r1.x
add.f r0.w, r0.w, c13.y
bary.f r1.z, 1, r1.x
-mov.f32f32 r0.x, r0.x
-add.f r1.w, r0.y, c15.x
-bary.f r2.x, 23, r1.x
-bary.f r2.y, 24, r1.x
-mul.f r2.z, r0.x, r0.x
+mov.f32f32 r1.w, r0.x
+add.f r2.x, r0.y, c15.x
+bary.f r2.y, 8, r1.x
+bary.f r2.z, 23, r1.x
+mul.f r0.x, r0.x, r1.w
bary.f r2.w, 20, r1.x
-floor.f r3.x, r1.w
+floor.f r3.x, r2.x
rcp r0.w, r0.w
add.f r0.z, r0.z, c13.y
add.f r3.y, r1.z, c15.x
-mov.f32f32 r2.w, r2.w
-add.f r1.w, r1.w, (neg)r3.x
+mov.f32f32 r3.z, r2.w
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.z, r0.z, r0.w
-(ss)floor.f r0.w, r3.y
-mad.f32 r2.z, r2.w, r2.w, r2.z
-mov.f32f32 r1.w, r1.w
+(ss)absneg.f r0.w, (neg)c10.x
+mad.f32 r0.x, r2.w, r3.z, r0.x
+bary.f r2.w, 21, r1.x
+mov.f32f32 r3.x, r2.x
+mul.f r0.w, r0.w, c10.x
+floor.f r3.w, r3.y
+mov.f32f32 r4.x, r2.w
+mul.f r4.y, c13.x, r3.x
+mul.f r0.w, r0.w, r0.z
mov.f32f32 r0.z, r0.z
-absneg.f r3.x, (neg)c10.x
-mov.f32f32 r2.z, r2.z
-bary.f r3.z, 21, r1.x
-mul.f r3.w, c13.x, r1.w
-mul.f r3.x, r3.x, c10.x
-add.f r0.w, r3.y, (neg)r0.w
-mov.f32f32 r3.y, r3.z
-mov.f32f32 r3.z, r3.w
-mul.f r3.x, r3.x, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.z, r3.y, r3.y, r2.z
-add.f r0.y, r0.y, (neg)r3.z
-mov.f32f32 r3.x, r3.x
-mul.f r3.z, c13.x, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.y, r0.y
-rsq r3.w, r2.z
-(ss)mul.f r4.x, r0.x, r3.w
-rsq r4.y, r2.z
-(ss)mov.f32f32 r4.y, r4.y
-(ss)rsq r2.z, r2.z
-(ss)mul.f r4.z, r0.x, r2.z
-add.f r4.w, c15.x, r0.y
-mov.f32f32 r4.x, r4.x
-mul.f r5.x, r0.x, r4.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.w, r4.w
-absneg.f r4.x, (abs)r4.x
-mov.f32f32 r5.x, r5.x
+mad.f32 r0.x, r4.x, r4.x, r0.x
+add.f r0.y, r0.y, (neg)r4.y
+add.f r3.y, r3.y, (neg)r3.w
+mul.f r0.z, r0.w, r0.z
+add.f r0.w, c14.y, (neg)r3.x
+mov.f32f32 r3.x, r0.y
+mov.f32f32 r3.w, r3.y
+rsq r4.y, r0.x
+(ss)mul.f r4.z, r1.w, r4.y
+rsq r4.w, r0.x
+(ss)mov.f32f32 r5.x, r4.w
+(ss)rsq r0.x, r0.x
+(ss)mul.f r5.y, r1.w, r0.x
+add.f r3.x, c15.x, r3.x
absneg.f r4.z, (abs)r4.z
-mul.f r4.w, r4.w, c5.z
-mov.f32f32 r4.x, r4.x
-absneg.f r5.x, (abs)r5.x
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.y, r4.w
-add.f r4.x, r4.x, c13.w
-mov.f32f32 r5.x, r5.x
+mul.f r5.z, r1.w, r5.x
+absneg.f r5.y, (abs)r5.y
+mul.f r5.w, r3.x, c5.z
+add.f r3.x, r4.z, c13.w
+absneg.f r4.z, (abs)r5.z
+add.f r5.y, r5.y, c13.w
+mov.f32f32 r6.x, r5.w
+max.f r3.x, r3.x, c13.y
+mul.f r5.z, r3.z, r4.y
add.f r4.z, r4.z, c13.w
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r4.x, r4.x
-add.f r5.x, r5.x, c13.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r3.z, r3.z
-max.f r4.x, r4.x, c13.y
-mov.f32f32 r5.x, r5.x
-max.f r4.z, r4.z, c13.y
-add.f r1.z, r1.z, (neg)r3.z
-mov.f32f32 r3.z, r4.x
-mul.f r4.x, r2.w, r3.w
-max.f r5.x, r5.x, c13.y
-mov.f32f32 r4.z, r4.z
-mul.f r5.z, r2.w, r2.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.x, r5.x
-mul.f r5.w, r2.w, r4.y
-mov.f32f32 r5.z, r5.z
-absneg.f r4.x, (abs)r4.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.w, r5.w
+max.f r5.y, r5.y, c13.y
+mul.f r6.y, r3.z, r0.x
absneg.f r5.z, (abs)r5.z
-mov.f32f32 r4.x, r4.x
-add.f r6.x, c15.x, r1.z
-absneg.f r5.w, (abs)r5.w
-mov.f32f32 r5.z, r5.z
-add.f r4.x, r4.x, c13.w
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.w, r5.w
+max.f r4.z, r4.z, c13.y
+mul.f r5.x, r3.z, r5.x
+absneg.f r6.y, (abs)r6.y
add.f r5.z, r5.z, c13.w
-mov.f32f32 r4.x, r4.x
-mul.f r6.x, r6.x, c5.w
-add.f r5.w, r5.w, c13.w
-mov.f32f32 r5.z, r5.z
-max.f r4.x, r4.x, c13.y
-mov.f32f32 r6.y, r6.x
-mov.f32f32 r5.w, r5.w
-max.f r6.z, r5.z, c13.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.z, r6.y
-max.f r5.w, r5.w, c13.y
-mov.f32f32 r6.y, r6.z
-add.f r6.z, r3.z, r4.x
-mul.f r3.w, r3.y, r3.w
-mov.f32f32 r6.w, r5.w
-add.f r7.x, r4.z, r6.y
-(ss)mul.f r2.z, r3.y, r2.z
-mov.f32f32 r3.w, r3.w
-add.f r7.y, r5.x, r6.w
-mul.f r4.y, r3.y, r4.y
-mov.f32f32 r2.z, r2.z
-absneg.f r3.w, (abs)r3.w
-bary.f r5.w, 2, r1.x
-mov.f32f32 r4.y, r4.y
-absneg.f r2.z, (abs)r2.z
-mov.f32f32 r3.w, r3.w
-add.f r7.z, r5.w, c14.w
+mul.f r6.z, c13.x, r3.w
+mul.f r0.z, r0.z, c15.z
+absneg.f r5.x, (abs)r5.x
+max.f r5.z, r5.z, c13.y
+add.f r6.y, r6.y, c13.w
+add.f r1.z, r1.z, (neg)r6.z
+add.f r5.x, r5.x, c13.w
+add.f r6.z, r3.x, r5.z
+mul.f r4.y, r4.x, r4.y
+max.f r6.w, r6.y, c13.y
+max.f r5.x, r5.x, c13.y
+mov.f32f32 r6.y, r1.z
+absneg.f r4.y, (abs)r4.y
+add.f r7.x, r5.y, r6.w
+add.f r7.y, r4.z, r5.x
+(ss)mul.f r0.x, r2.w, r0.x
+add.f r2.w, r4.y, c13.w
+mul.f r4.y, r4.x, r4.w
+add.f r4.w, c15.x, r6.y
+absneg.f r0.x, (abs)r0.x
+max.f r2.w, r2.w, c13.y
absneg.f r4.y, (abs)r4.y
-mov.f32f32 r2.z, r2.z
-add.f r3.w, r3.w, c13.w
-mov.f32f32 r5.w, r7.z
-mov.f32f32 r4.y, r4.y
-add.f r2.z, r2.z, c13.w
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r5.w, r5.w
+mul.f r7.w, r4.w, c5.w
+add.f r0.x, r0.x, c13.w
+mov.f32f32 r4.w, r2.w
add.f r4.y, r4.y, c13.w
-mov.f32f32 r2.z, r2.z
-max.f r3.w, r3.w, c13.y
-mul.f r0.z, r3.x, r0.z
-mov.f32f32 r3.x, r4.y
-max.f r2.z, r2.z, c13.y
-mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.y, r5.y, s#4, t#4
-(sy)mov.f32f32 r4.y, r4.y
-max.f r3.x, r3.x, c13.y
-mov.f32f32 r2.z, r2.z
-(ss)add.f r5.y, r6.z, r3.w
-mov.f32f32 r4.y, r4.y
+mov.f32f32 r6.y, r7.w
+max.f r0.x, r0.x, c13.y
+add.f r4.w, r6.z, r4.w
+max.f r4.y, r4.y, c13.y
+bary.f r6.z, 2, r1.x
+mov.f32f32 r7.z, r0.x
+mov.f32f32 r8.x, r4.w
+mov.f32f32 r8.y, r4.y
+add.f r9.x, r6.z, c14.w
+add.f r7.x, r7.x, r7.z
+exp2 r0.z, r0.z
+(ss)mov.f32f32 r7.z, r0.z
+add.f r7.y, r7.y, r8.y
+mov.f32f32 r6.z, r9.x
+rcp r8.y, r8.x
mov.f32f32 r3.x, r3.x
-add.f r5.z, r7.x, r2.z
-mov.f32f32 r5.y, r5.y
-add.f r5.w, c14.y, (neg)r1.w
-add.f r6.z, r7.y, r3.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.w, r5.w
-add.f r7.x, c14.y, (neg)r0.w
-rcp r7.y, r5.y
-(ss)mov.f32f32 r7.y, r7.y
-mov.f32f32 r6.z, r6.z
-mul.f r0.z, r0.z, c15.z
-rcp r7.w, r5.y
-nop
-rcp r8.x, r5.z
-mul.f r3.z, r3.z, r7.y
-(ss)mov.f32f32 r7.y, r8.x
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r3.z
-mul.f r2.x, r2.x, c14.x
-rcp r8.x, r6.z
-(ss)mov.f32f32 r8.x, r8.x
-mul.f r4.z, r4.z, r7.y
-mul.f r7.y, r5.w, r7.x
-mov.f32f32 r2.x, r2.x
-mul.f r5.x, r5.x, r8.x
+mov.f32f32 r8.z, r7.x
+mov.f32f32 r8.w, r7.y
+add.f r7.z, c17.y, (neg)r7.z
+(ss)mul.f r3.x, r3.x, r8.y
+mul.f r9.y, r2.z, c14.x
+sam.s (f32)(x)r9.w, r6.x, s#4, t#4
+rcp r2.z, r7.y
+(ss)mov.f32f32 r6.x, r0.w
+mul.f r6.y, r7.z, c10.y
+mov.f32f32 r6.z, r3.x
+mov.f32f32 r7.y, r9.y
+rcp r7.z, r8.w
mov.f32f32 r4.z, r4.z
-mul.f r4.y, r7.y, r4.y
-mov.f32f32 r7.y, r2.x
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r8.x, r2.x
-mov.f32f32 r8.y, r2.x
-mov.f32f32 r8.z, r7.y
-mul.f r2.y, r2.y, c14.x
-mov.f32f32 r7.y, r8.x
-mov.f32f32 r8.x, r8.y
-add.f r0.y, c15.y, r0.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r9.x, r7.y
-rcp r7.y, r6.z
-nop
-(ss)rcp r6.z, r6.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r8.y, r2.y
-mov.f32f32 r9.y, r2.y
-mov.f32f32 r9.z, r2.y
-mul.f r0.y, r0.y, c5.z
-mov.f32f32 r8.w, r8.y
-mov.f32f32 r9.y, r9.y
-mov.f32f32 r8.y, r9.z
-mov.f32f32 r9.z, r0.y
-(ss)mov.f32f32 r7.y, r7.y
-(ss)mov.f32f32 r6.z, r6.z
-mov.f32f32 r9.y, r9.y
-sam (f32)(xyzw)r9.w, r8.z, s#2, t#2
-(sy)(ss)mov.f32f32 r8.z, r9.w
-add.f r8.w, c13.z, (neg)r10.x
-mov.f32f32 r9.w, r10.y
-sam (f32)(xyzw)r10.x, r8.x, s#0, t#0
-(sy)(ss)mul.f r8.x, r10.z, r4.z
-mul.f r8.y, r8.z, r3.z
-mov.f32f32 r7.w, r7.w
-sam (f32)(xyzw)r10.z, r9.x, s#3, t#3
-(sy)mul.f r8.z, r10.w, r5.x
-(ss)mul.f r9.x, r10.z, r5.x
-mul.f r5.x, r11.x, r5.x
-mul.f r4.x, r4.x, r7.w
-mul.f r6.w, r6.w, r7.y
-mov.f32f32 r7.y, r8.w
-mul.f r7.w, r9.w, r3.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r8.w, r2.y
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r9.y, r2.y
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r10.z, r8.w
-bary.f r8.w, 22, r1.x
-mov.f32f32 r11.x, r9.y
-mul.f r3.z, r7.y, r3.z
-rcp r7.y, r5.z
-(ss)mov.f32f32 r7.y, r7.y
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r9.y, r9.z
-mul.f r10.y, r10.y, r4.z
-mul.f r4.z, r10.x, r4.z
-mul.f r8.w, r8.w, c14.x
-mul.f r6.y, r6.y, r7.y
-mov.f32f32 r6.x, r6.x
-mul.f r3.x, r3.x, r6.z
-mov.f32f32 r6.z, r8.w
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r9.z, r6.x
-mov.f32f32 r6.x, r6.z
-mov.f32f32 r7.y, r6.z
-mov.f32f32 r11.z, r2.y
-mov.f32f32 r2.y, r6.z
-mov.f32f32 r10.w, r6.x
-mov.f32f32 r11.y, r7.y
-mov.f32f32 r6.x, r7.z
-mov.f32f32 r11.w, r2.y
-mov.f32f32 r2.y, r3.x
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r9.w, r6.x
-sam (f32)(xyzw)r12.x, r10.z, s#2, t#2
-(sy)mov.f32f32 r3.x, r12.y
-(ss)nop
-sam (f32)(xyzw)r10.z, r11.x, s#3, t#3
-(sy)mad.f32 r6.x, r10.w, r6.w, r8.z
-mad.f32 r7.y, r10.z, r6.w, r9.x
-mad.f32 r5.x, r11.x, r6.w, r5.x
-mad.f32 r3.x, r3.x, r4.x, r8.y
-rcp r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.w, r2.x
-add.f r8.y, c13.z, (neg)r12.x
-mov.f32f32 r8.z, r12.z
-mul.f r3.w, r3.w, r5.y
-mov.f32f32 r8.w, r6.w
-mov.f32f32 r5.y, r6.z
-mov.f32f32 r6.w, r8.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r8.y, r2.x
-mov.f32f32 r9.x, r5.y
-mov.f32f32 r5.y, r6.w
-mad.f32 r6.w, r8.z, r4.x, r7.w
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r7.w, r6.z
-mad.f32 r3.z, r5.y, r4.x, r3.z
-sam (f32)(xyzw)r10.z, r11.z, s#0, t#0
-(sy)mad.f32 r4.x, r10.w, r6.y, r10.y
-(ss)nop
-sam (f32)(xyzw)r11.y, r8.w, s#3, t#3
-(sy)mad.f32 r5.x, r11.w, r2.y, r5.x
-mov.f32f32 r8.z, r7.w
-mad.f32 r5.y, r11.z, r2.y, r6.x
-mad.f32 r2.y, r11.y, r2.y, r7.y
-mov.f32f32 r5.x, r5.x
-mad.f32 r6.x, r11.x, r6.y, r8.x
-rcp r5.z, r5.z
+rcp r7.w, r8.z
mov.f32f32 r5.y, r5.y
-mov.f32f32 r2.y, r2.y
-sam (f32)(xyzw)r7.w, r8.y, s#2, t#2
-(sy)mov.f32f32 r7.y, r8.x
-mul.f r5.x, c7.z, r5.x
-mul.f r5.y, c7.y, r5.y
-mul.f r2.y, c7.x, r2.y
-mad.f32 r3.x, r7.y, r3.w, r3.x
-add.f r7.y, c13.z, (neg)r7.w
-mov.f32f32 r7.w, r8.y
-(ss)mov.f32f32 r5.z, r5.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r7.y, r7.y
-mad.f32 r6.w, r7.w, r3.w, r6.w
-mul.f r2.z, r2.z, r5.z
-mad.f32 r3.x, c14.y, r3.x, c14.z
-mov.f32f32 r5.z, r7.y
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.x, r3.x
-bary.f r7.y, 4, r1.x
-bary.f r7.w, 5, r1.x
-bary.f r8.x, 6, r1.x
-mad.f32 r3.z, r5.z, r3.w, r3.z
-mul.f r3.w, r7.y, r3.x
-mul.f r5.z, r7.w, r3.x
-mul.f r3.x, r8.x, r3.x
-mov.f32f32 r3.z, r3.z
-mad.f32 r6.w, c14.y, r6.w, c14.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r4.z, r10.z, r6.y, r4.z
-mad.f32 r3.z, c14.y, r3.z, c14.z
-mov.f32f32 r6.y, r6.w
-mov.f32f32 r7.w, r2.x
-mov.f32f32 r2.x, r6.z
-mov.f32f32 r3.z, r3.z
-bary.f r6.z, 10, r1.x
-bary.f r6.w, 11, r1.x
-bary.f r7.y, 12, r1.x
-mov.f32f32 r8.x, r2.x
-mov.f32f32 r2.x, r6.z
-bary.f r6.z, 7, r1.x
+rcp r8.y, r8.w
+mov.f32f32 r5.x, r5.x
+mov.f32f32 r10.x, r7.y
+(ss)bary.f r8.w, 24, r1.x
+(ss)mul.f r4.z, r4.z, r7.z
+mul.f r5.y, r5.y, r7.w
+mov.f32f32 r10.z, r7.y
+mul.f r11.x, r8.w, c14.x
+mov.f32f32 r7.w, r4.z
+mov.f32f32 r8.w, r5.y
+mul.f r5.x, r5.x, r8.y
+mov.f32f32 r7.z, r11.x
+add.f r3.w, c14.y, (neg)r3.w
+mul.f r0.z, r0.z, c13.z
+mul.f r2.z, r4.y, r2.z
+mov.f32f32 r10.y, r7.z
+mov.f32f32 r10.w, r7.z
+mov.f32f32 r4.y, r5.x
+sam (f32)(xyzw)r11.y, r7.y, s#0, t#0
+(sy)mul.f r8.y, r11.w, r8.w
+rcp r8.z, r8.z
mov.f32f32 r6.w, r6.w
-mov.f32f32 r7.y, r7.y
-sam.s (f32)(x)r8.y, r9.y, s#4, t#4
-(sy)mov.f32f32 r8.y, r8.y
-mul.f r2.x, r2.x, (neg)r6.z
-mul.f r6.w, r6.w, (neg)r6.z
-mul.f r6.z, r7.y, (neg)r6.z
+mul.f r8.w, r11.z, r8.w
+mul.f r5.y, r11.y, r5.y
+sam (f32)(xyzw)r11.y, r10.x, s#2, t#2
+(sy)(ss)mul.f r10.x, r11.y, r6.z
+rcp r8.x, r8.x
+mov.f32f32 r5.z, r5.z
+sam (f32)(xyzw)r12.x, r10.z, s#3, t#3
+(sy)mul.f r10.y, r12.y, r7.w
+add.f r9.z, c13.z, (neg)r11.z
+(ss)mul.f r10.z, r12.z, r7.w
+(ss)mul.f r5.z, r5.z, r8.x
+mov.f32f32 r12.y, r7.z
+bary.f r7.w, 22, r1.x
+mul.f r6.z, r9.z, r6.z
+mov.f32f32 r8.x, r5.z
+mul.f r6.w, r6.w, r8.z
+mul.f r9.z, r7.w, c14.x
+mul.f r4.z, r12.x, r4.z
+mul.f r3.x, r11.w, r3.x
+mov.f32f32 r8.z, r6.w
+mov.f32f32 r11.y, r9.z
+mov.f32f32 r10.w, r3.w
+add.f r0.z, r0.z, r6.y
+mov.f32f32 r6.y, r2.z
+mov.f32f32 r7.w, r11.y
+mov.f32f32 r12.z, r11.y
+mul.f r6.x, r6.x, r10.w
+sam (f32)(xyzw)r12.w, r11.x, s#0, t#0
+(sy)mad.f32 r8.y, r13.y, r8.z, r8.y
+rcp r7.x, r7.x
+(ss)mul.f r0.x, r0.x, r7.x
+(ss)mad.f32 r7.x, r13.x, r8.z, r8.w
+mad.f32 r5.y, r12.w, r6.w, r5.y
+sam (f32)(xyzw)r12.w, r7.z, s#2, t#2
+(sy)mad.f32 r6.w, r13.x, r8.x, r10.x
+rcp r4.w, r4.w
+(ss)mul.f r2.w, r2.w, r4.w
+sam (f32)(xyzw)r11.z, r12.y, s#3, t#3
+(sy)(ss)mad.f32 r4.w, r11.w, r4.y, r10.y
+add.f r7.z, c13.z, (neg)r12.w
+mad.f32 r4.y, r12.x, r4.y, r10.z
+mov.f32f32 r8.z, r2.w
+mov.f32f32 r10.x, r7.y
+mov.f32f32 r10.y, r11.y
+mov.f32f32 r10.z, r7.y
+mov.f32f32 r10.w, r11.y
+mad.f32 r6.z, r7.z, r8.x, r6.z
+mov.f32f32 r7.y, r0.x
+sam (f32)(xyzw)r11.w, r9.y, s#0, t#0
+mad.f32 r4.z, r11.z, r5.x, r4.z
+mad.f32 r3.x, r13.y, r5.z, r3.x
+sam (f32)(xyzw)r12.z, r10.x, s#2, t#2
+(sy)mad.f32 r5.x, r12.w, r8.z, r6.w
+mad.f32 r5.z, r12.y, r7.y, r8.y
(ss)nop
-sam (f32)(xyzw)r8.z, r7.w, s#0, t#0
-(sy)mad.f32 r6.x, r9.x, r2.z, r6.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r6.x, r6.x
-mad.f32 r2.x, r2.x, r3.z, r3.w
-mad.f32 r3.w, r6.w, r3.z, r5.z
-mad.f32 r3.x, r6.z, r3.z, r3.x
-mad.f32 r3.z, r8.w, r2.z, r4.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.x, r0.x, r6.y, r2.x
-mad.f32 r2.x, r2.w, r6.y, r3.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.w, r3.y, r6.y, r2.w
-nop
-mul.f r3.y, r0.x, r0.x
-mad.f32 r2.z, r8.z, r2.z, r4.z
-mad.f32 r3.y, r2.x, r2.x, r3.y
-mov.f32f32 r2.w, r2.w
-add.f r1.w, r1.w, c13.z
-add.f r3.z, c17.y, (neg)r0.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r3.y, r2.w, r2.w, r3.y
-mul.f r3.w, r1.w, r7.x
-mul.f r3.z, r3.z, c10.y
-mul.f r0.z, r0.z, c13.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.w
-add.f r1.z, c15.y, r1.z
-rsq r3.y, r3.y
-(ss)mov.f32f32 r3.y, r3.y
-mad.f32 r3.w, r3.w, r8.y, r4.y
-add.f r0.z, r0.z, r3.z
-mov.f32f32 r0.y, r0.y
-mul.f r0.x, r0.x, r3.y
-mul.f r2.x, r2.x, r3.y
-mul.f r2.w, r2.w, r3.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.y, r3.w
-mul.f r3.z, r0.x, r0.x
-mul.f r3.w, (neg)c8.x, r0.x
-mad.f32 r3.z, r2.x, r2.x, r3.z
-mad.f32 r3.w, (neg)c8.y, r2.x, r3.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mad.f32 r3.z, r2.w, r2.w, r3.z
-mad.f32 r3.w, (neg)c8.z, r2.w, r3.w
-mul.f r1.z, r1.z, c5.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r6.y, r0.y
-add.f r0.y, r0.w, c13.z
-bary.f r0.w, 8, r1.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, r1.z
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r3.z
-max.f r3.w, r3.w, c13.y
-mul.f r2.x, r2.x, r3.z
-mul.f r2.w, r2.w, r3.z
-mov.f32f32 r0.x, r0.x
-bary.f r3.z, 13, r1.x
-mov.f32f32 r3.w, r3.w
-bary.f r4.z, 17, r1.x
-bary.f r4.w, 16, r1.x
-mul.f r0.x, r0.x, r3.z
-mov.f32f32 r2.x, r2.x
-bary.f r3.z, 14, r1.x
-bary.f r5.z, 18, r1.x
-mad.f32 r6.z, c7.y, r3.w, (neg)r4.z
-mad.f32 r6.w, c7.x, r3.w, (neg)r4.w
-mad.f32 r0.x, r2.x, r3.z, r0.x
-mad.f32 r2.x, c7.z, r3.w, (neg)r5.z
-mov.f32f32 r3.z, r6.z
-mov.f32f32 r3.w, r6.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r2.w
-bary.f r6.z, 15, r1.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.z, c11.x, r3.z, r4.z
-mad.f32 r3.w, c11.x, r3.w, r4.w
-mad.f32 r0.x, r2.w, r6.z, r0.x
-mad.f32 r2.x, c11.x, r2.x, r5.z
-mov.f32f32 r2.w, r3.z
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.w, r7.z
-max.f r0.x, c13.y, r0.x
+sam (f32)(xyzw)r10.x, r10.z, s#3, t#3
+(sy)mad.f32 r4.y, r10.z, r6.y, r4.y
+mad.f32 r4.w, r10.y, r6.y, r4.w
+mad.f32 r5.x, c14.y, r5.x, c14.z
+bary.f r6.y, 6, r1.x
+mul.f r4.y, c7.z, r4.y
+mul.f r4.w, c7.y, r4.w
+mov.f32f32 r6.w, r5.x
+bary.f r7.z, 4, r1.x
+bary.f r8.x, 5, r1.x
+mul.f r5.x, r6.y, r5.x
+add.f r6.y, c13.z, (neg)r12.z
+mul.f r7.z, r7.z, r6.w
+mul.f r6.w, r8.x, r6.w
+mov.f32f32 r8.y, r5.z
+mad.f32 r6.y, r6.y, r8.z, r6.z
+mad.f32 r6.z, r12.x, r7.y, r7.x
+mad.f32 r2.z, r10.x, r2.z, r4.z
+mad.f32 r2.w, r13.x, r2.w, r3.x
+mad.f32 r3.x, c14.y, r6.y, c14.z
+bary.f r4.z, 12, r1.x
+bary.f r6.y, 7, r1.x
+mov.f32f32 r7.x, r6.z
+mov.f32f32 r7.y, r3.x
+bary.f r8.x, 10, r1.x
+bary.f r8.z, 11, r1.x
+mul.f r4.z, r4.z, (neg)r6.y
+mul.f r9.y, c7.x, r2.z
+mul.f r2.z, r8.x, (neg)r6.y
+mul.f r6.y, r8.z, (neg)r6.y
+mad.f32 r3.x, r4.z, r3.x, r5.x
+mad.f32 r2.w, c14.y, r2.w, c14.z
+mad.f32 r2.z, r2.z, r7.y, r7.z
+mad.f32 r4.z, r6.y, r7.y, r6.w
+mad.f32 r0.x, r11.w, r0.x, r5.y
+mov.f32f32 r5.x, r2.w
+mad.f32 r2.w, r4.x, r2.w, r3.x
+(rpt1)nop
+mad.f32 r1.w, r1.w, r5.x, r2.z
+mad.f32 r2.z, r3.z, r5.x, r4.z
+(rpt1)nop
+mov.f32f32 r3.x, r1.w
+mov.f32f32 r3.z, r2.z
+mov.f32f32 r4.x, r2.w
+mov.f32f32 r4.z, r0.x
+mul.f r1.w, r1.w, r3.x
+mul.f r5.x, r6.x, r9.w
+mad.f32 r1.w, r2.z, r3.z, r1.w
+add.f r0.y, c15.y, r0.y
+mad.f32 r1.w, r4.x, r4.x, r1.w
max.f r0.z, r0.z, c13.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r0.x, r0.x
+add.f r1.z, c15.y, r1.z
+mul.f r8.z, r0.y, c5.z
+add.f r0.y, r2.x, c13.z
+add.f r2.x, r3.y, c13.z
+bary.f r2.z, 9, r1.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r3.y, r1.w
+(ss)mul.f r1.w, r2.w, r1.w
+mov.f32f32 r7.z, r8.z
min.f r0.z, r0.z, c13.z
-mov.f32f32 r1.z, r1.z
-mul.f r3.w, r5.w, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.y, r1.w, r0.y
-sam.s (f32)(x)r1.w, r4.x, s#4, t#4
-(sy)mov.f32f32 r1.w, r1.w
-log2 r0.x, r0.x
-(ss)mul.f r0.x, c11.y, r0.x
-add.f r4.x, c17.y, (neg)r0.z
-add.f r4.y, c17.y, (neg)r0.z
-add.f r4.z, c17.y, (neg)r0.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.w, r3.w, r1.w, r3.y
-mul.f r3.y, r4.x, c9.z
-mul.f r3.w, r4.y, c9.y
-mul.f r4.x, r4.z, c9.x
-mov.f32f32 r1.w, r1.w
+mul.f r2.w, r3.x, r3.y
+mul.f r3.x, r3.z, r3.y
+mov.f32f32 r3.y, r1.w
+mov.f32f32 r8.x, r9.x
+mov.f32f32 r3.z, r2.w
+mul.f r2.w, (neg)c8.x, r2.w
+mov.f32f32 r4.x, r3.x
+mad.f32 r2.w, (neg)c8.y, r3.x, r2.w
+mul.f r3.x, r3.z, r3.z
+mad.f32 r1.w, (neg)c8.z, r1.w, r2.w
+mad.f32 r2.w, r4.x, r4.x, r3.x
+sam.s (f32)(x)r7.y, r7.z, s#4, t#4
+mul.f r3.x, r0.y, r3.w
+mad.f32 r2.w, r3.y, r3.y, r2.w
+max.f r1.w, r1.w, c13.y
+bary.f r3.w, 16, r1.x
+(sy)mad.f32 r3.x, r3.x, r7.y, r5.x
+add.f r5.x, c17.y, (neg)r0.z
+add.f r5.y, c17.y, (neg)r0.z
+add.f r6.x, c17.y, (neg)r0.z
+rsq r2.w, r2.w
+(ss)mov.f32f32 r6.y, r2.w
+mov.f32f32 r6.w, r1.w
+bary.f r7.y, 17, r1.x
+(ss)bary.f r7.z, 18, r1.x
+mul.f r3.z, r3.z, r6.y
+bary.f r7.w, 13, r1.x
+mad.f32 r8.x, c7.z, r6.w, (neg)r7.z
+mad.f32 r6.w, c7.y, r6.w, (neg)r7.y
+mul.f r4.x, r4.x, r6.y
+mul.f r3.z, r3.z, r7.w
+bary.f r6.y, 14, r1.x
+mad.f32 r7.z, c11.x, r8.x, r7.z
+mad.f32 r6.w, c11.x, r6.w, r7.y
+mul.f r2.w, r3.y, r2.w
+mad.f32 r3.y, r4.x, r6.y, r3.z
+bary.f (ei)r1.x, 15, r1.x
+mad.f32 r1.y, c7.x, r1.w, (neg)r3.w
+(rpt1)nop
+mad.f32 r1.x, r2.w, r1.x, r3.y
+mad.f32 r1.y, c11.x, r1.y, r3.w
+mul.f r8.w, r1.z, c5.w
+mul.f r1.z, r5.x, c9.z
+max.f r1.x, c13.y, r1.x
+mul.f r1.w, r5.y, c9.y
+mul.f r3.y, r6.x, c9.x
+mov.f32f32 r6.x, r8.w
+mov.f32f32 r6.y, r9.x
nop
-exp2 r0.x, r0.x
-(ss)mul.f r4.y, r5.x, r0.x
-mul.f r4.z, r5.y, r0.x
-mad.f32 r2.x, r6.x, r2.x, r4.y
-mad.f32 r2.w, r3.x, r2.w, r4.z
-(ss)mul.f r0.x, r2.y, r0.x
-mov.f32f32 r6.z, r1.z
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r2.x, r7.z
-mov.f32f32 r2.y, r2.w
-mad.f32 r0.x, r2.z, r3.z, r0.x
-mov.f32f32 r4.y, r0.w
-mov.f32f32 r6.w, r2.x
-bary.f (ei)r0.w, 9, r1.x
-mov.f32f32 r1.x, c13.z
-mov.f32f32 r1.y, c13.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r5.z, r1.x
-sam.s (f32)(x)r1.x, r6.y, s#4, t#4
-(sy)mov.f32f32 r1.x, r1.x
-(rpt2)nop
-mad.f32 r0.y, r0.y, r1.x, r1.w
-mov.f32f32 r4.z, r0.w
+sam.s (f32)(x)r9.z, r8.z, s#4, t#4
+mul.f r0.w, r0.w, r2.x
+log2 r1.x, r1.x
+(ss)mul.f r1.x, c11.y, r1.x
+mul.f r0.y, r0.y, r2.x
+sam (f32)(w)r9.w, r2.y, s#1, t#1
+(sy)cmps.f.lt r2.x, r10.z, c16.x
+mov.f32f32 r2.w, c13.z
+(ss)mov.f32f32 r2.y, c13.y
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mul.f r0.y, c15.w, r0.y
-sam (f32)(w)r6.y, r4.y, s#1, t#1
-(sy)mov.f32f32 r0.w, r7.x
-cmps.f.lt r1.x, r7.x, c16.x
+exp2 r1.x, r1.x
+(ss)mul.f r2.z, r4.y, r1.x
+mul.f r3.z, r4.w, r1.x
+mad.f32 r2.z, r8.y, r7.z, r2.z
+sam.s (f32)(x)r7.y, r5.w, s#4, t#4
+(sy)mad.f32 r0.w, r0.w, r7.y, r3.x
+mad.f32 r3.x, r7.x, r6.w, r3.z
+mad.f32 r0.y, r0.y, r9.z, r0.w
+mul.f r0.w, r9.y, r1.x
+(ss)cov.u32f32 r1.x, r2.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-cov.u32f32 r1.x, r1.x
+mul.f r0.y, c15.w, r0.y
+mad.f32 r0.w, r4.z, r1.y, r0.w
+cmps.f.ne r1.x, r1.x, c13.y
nop
-mul.f r1.z, r1.z, r0.y
-mul.f r1.w, r2.y, r0.y
-mul.f r0.x, r0.x, r0.y
-cmps.f.ne r0.y, r1.x, c13.y
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r1.z, r1.w
-mad.f32 r1.x, c6.z, r6.x, r1.x
-mad.f32 r1.z, c6.y, r3.x, r1.z
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.y, r1.y, r0.y, r0.w
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r1.x, r1.z
-mad.f32 r0.x, c6.x, r2.z, r0.x
+mov.f32f32 r1.y, r0.y
+mul.f r0.y, r0.w, r0.y
+sel.b32 r0.w, r2.y, r1.x, r10.z
nop
-mul.f r0.w, r0.z, r0.w
-mul.f r1.x, r0.z, r1.x
-mov.f32f32 r0.x, r0.x
+mul.f r1.x, r2.z, r1.y
+mul.f r1.y, r3.x, r1.y
+mad.f32 r1.x, c6.z, r5.z, r1.x
+mad.f32 r1.y, c6.y, r6.z, r1.y
+mad.f32 r0.x, c6.x, r0.x, r0.y
nop
-add.f r0.w, r0.w, r3.y
-add.f r1.x, r1.x, r3.w
+mul.f r0.y, r0.z, r1.x
+mul.f r1.x, r0.z, r1.y
mul.f r0.x, r0.z, r0.x
nop
-mul.f r0.z, r0.w, r0.y
-mul.f r0.w, r1.x, r0.y
-add.f r0.x, r0.x, r4.x
+add.f r0.y, r0.y, r1.z
+add.f r0.z, r1.x, r1.w
+add.f r0.x, r0.x, r3.y
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, r0.y
+mul.f r2.z, r0.y, r0.w
+mul.f r2.y, r0.z, r0.w
+mul.f r2.x, r0.x, r0.w
+end
nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
nop
-mov.f32f32 r5.y, r0.y
-mov.f32f32 r5.x, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r4.w, r0.x
-end
-; FRAG: outputs: r4.w (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r5.w (5:10,cm=f,il=12,b=1) r6.x (5:11,cm=f,il=16,b=1) r5.w (5:12,cm=f,il=20,b=1) r63.y (5:13,cm=f,il=24,b=1) r1.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 539 instructions, 0 half, 65 full
-; pos (bary): r1.x
-; color: r4.w
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r5.z (5:10,cm=f,il=12,b=1) r7.w (5:11,cm=f,il=16,b=1) r0.y (5:12,cm=f,il=20,b=1) r63.y (5:13,cm=f,il=24,b=1) r1.w (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 354 instructions, 0 half, 14 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm
index 54e5b92..4dd0acb 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-33.asm
@@ -1,204 +1,145 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@out(r8.w) out0
-@out(r9.x) out1
-@out(r9.y) out2
-@out(r9.z) out3
-@out(r5.x) out4
-@out(r5.y) out5
-@out(r5.z) out6
-@out(r5.w) out7
-@out(r3.z) out8
-@out(r3.w) out9
-@out(r4.x) out10
-@out(r4.y) out11
-@out(r7.w) out12
-@out(r8.x) out13
-@out(r8.y) out14
-@out(r8.z) out15
-@out(r6.w) out16
-@out(r7.x) out17
-@out(r7.y) out18
-@out(r7.z) out19
-@out(r1.w) out20
-@out(r2.x) out21
-@out(r2.y) out22
-@out(r2.z) out23
-@out(r10.w) out24
-@out(r11.x) out25
-@out(r11.y) out26
-@out(r11.z) out27
-@out(r9.w) out28
-@out(r10.x) out29
-@out(r10.y) out30
-@out(r10.z) out31
-(sy)(ss)add.f r2.y, c4.x, (neg)r0.x
-mul.f r2.z, r0.w, r0.w
-mul.f r2.w, c8.w, r0.x
-mul.f r3.x, c8.z, r0.x
-mul.f r3.y, r2.y, r2.y
-add.f r3.z, c4.y, (neg)r0.y
-add.f r2.z, c13.x, (neg)r2.z
-mad.f32 r2.w, c9.w, r0.y, r2.w
-mad.f32 r3.x, c9.z, r0.y, r3.x
-mad.f32 r3.y, r3.z, r3.z, r3.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r2.w, c10.w, r0.z, r2.w
-mad.f32 r3.x, c10.z, r0.z, r3.x
-mov.f32f32 r3.y, r3.y
-add.f r3.w, c4.z, (neg)r0.z
-mul.f r4.x, r2.z, r2.z
-mul.f r4.y, r1.x, r0.w
-add.f r2.w, r2.w, c11.w
-mad.f32 r3.y, r3.w, r3.w, r3.y
-add.f r3.x, r3.x, c11.z
-mul.f r4.z, c8.y, r0.x
-mul.f r4.w, c8.x, r0.x
-add.f r4.y, c13.y, (neg)r4.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-rsq r3.y, r3.y
-(ss)mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r5.w, r2.w
-mov.f32f32 r5.z, r3.x
-mad.f32 r2.y, r2.y, r3.y, (neg)c5.x
-mad.f32 r2.w, r4.y, r4.y, r4.x
-mad.f32 r3.x, r3.z, r3.y, (neg)c5.y
-mad.f32 r3.y, r3.w, r3.y, (neg)c5.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-mul.f r3.z, r1.y, r0.w
-mov.f32f32 r3.x, r3.x
-mul.f r3.w, r2.y, r2.y
-mov.f32f32 r3.y, r3.y
-add.f r3.z, c13.y, (neg)r3.z
-mad.f32 r3.w, r3.x, r3.x, r3.w
-mad.f32 r4.x, c9.y, r0.y, r4.z
-mad.f32 r4.z, c9.x, r0.y, r4.w
-mad.f32 r4.x, c10.y, r0.z, r4.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r3.w, r3.y, r3.y, r3.w
-add.f r4.x, r4.x, c11.y
-mad.f32 r4.z, c10.x, r0.z, r4.z
-mul.f r4.w, c0.w, r0.x
-mul.f r6.x, c0.z, r0.x
-mul.f r6.y, c0.y, r0.x
-mul.f r6.z, c0.x, r0.x
-rsq r3.w, r3.w
-(ss)mov.f32f32 r3.w, r3.w
-mad.f32 r2.w, r3.z, r3.z, r2.w
-mul.f r4.x, r4.x, c12.y
-add.f r4.z, r4.z, c11.x
-mul.f r3.y, r3.y, r3.w
-mul.f r3.x, r3.x, r3.w
-mul.f r2.y, r2.y, r3.w
-mov.f32f32 r5.y, r4.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.y, r2.y
-rsq r2.w, r2.w
-(ss)mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.y, r2.y
+@in(r6.z) in0
+@in(r6.w) in1
+@in(r7.x) in2
+@in(r5.w) in4
+@in(r6.x) in5
+@in(r6.y) in6
+@in(r2.w) in8
+@in(r3.x) in9
+@in(r3.y) in10
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@out(r7.x) out28
+@out(r7.y) out29
+@out(r7.z) out30
+@out(r7.w) out31
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r6.z
+mul.f r0.y, r5.w, r5.w
+mul.f r0.z, c8.y, r6.z
+mul.f r0.w, c8.x, r6.z
+mul.f r1.x, r0.x, r0.x
+add.f r1.z, c4.y, (neg)r6.w
+add.f r0.y, c13.x, (neg)r0.y
+mad.f32 r0.z, c9.y, r6.w, r0.z
+mad.f32 r0.w, c9.x, r6.w, r0.w
+mad.f32 r1.x, r1.z, r1.z, r1.x
+add.f r1.w, c4.z, (neg)r7.x
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.x, r0.z
+mad.f32 r0.w, c10.x, r7.x, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.x, r5.w
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.y, r5.w
+mul.f r2.z, c8.w, r6.z
+rsq r1.x, r1.x
+(ss)mov.f32f32 r3.z, r1.x
+add.f r3.w, c13.y, (neg)r1.y
+mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r3.z, (neg)c5.y
+mov.f32f32 r1.z, r3.w
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r3.z, (neg)c5.z
+mov.f32f32 r3.z, r0.z
+mad.f32 r2.x, r3.w, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r3.z, r1.x
+mov.f32f32 r3.w, r1.w
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r6.w, r2.z
+mad.f32 r0.z, r1.w, r3.w, r0.z
+mad.f32 r1.w, c10.w, r7.x, r2.z
+mul.f r2.z, c8.z, r6.z
+mul.f r4.x, c0.w, r6.z
+mul.f r5.x, c0.z, r6.z
+mul.f r5.y, c0.y, r6.z
+mul.f r5.z, c0.x, r6.z
+rsq r0.z, r0.z
+(ss)mov.f32f32 r4.z, r0.z
+mul.f r4.y, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r4.w, r3.w, r4.z
+mul.f r4.z, r3.z, r4.z
+(ss)mad.f32 r0.z, c9.z, r6.w, r2.z
+mad.f32 r2.y, c1.w, r6.w, r4.x
+mad.f32 r5.x, c1.z, r6.w, r5.x
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.z, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.x, r0.z
+mad.f32 r0.y, c2.w, r7.x, r2.y
+mul.f r2.z, r0.w, r3.z
+mul.f r2.y, r1.z, r3.z
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r4.x, r2.y
+mul.f r7.y, r6.x, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r5.w, r0.x
+mul.f r3.z, r6.y, r4.x
+mad.f32 r3.w, r6.y, r0.z, (neg)r0.y
+mad.f32 r3.z, r6.x, r0.x, (neg)r3.z
+mad.f32 r4.x, r5.w, r4.x, (neg)r7.y
+mad.f32 r0.x, c2.z, r7.x, r5.x
+mad.f32 r0.y, c1.y, r6.w, r5.y
+mad.f32 r5.x, c1.x, r6.w, r5.z
+mad.f32 r0.y, c2.y, r7.x, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.x, r5.x
nop
-mov.f32f32 r7.z, r3.y
-mov.f32f32 r7.y, r3.x
-mov.f32f32 r7.x, r2.y
-mul.f r2.y, r2.z, r2.w
-mul.f r2.z, r3.z, r2.w
-mul.f r2.w, r4.y, r2.w
-mul.f r3.x, r4.z, c12.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r5.x, r3.x
-mul.f r3.x, r1.x, r2.y
-mul.f r3.y, r0.w, r2.z
-mad.f32 r3.x, r0.w, r2.w, (neg)r3.x
-mad.f32 r3.y, r1.y, r2.y, (neg)r3.y
-mul.f r3.z, r1.y, r2.w
-mov.f32f32 r3.w, r2.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.z, r1.x, r2.z, (neg)r3.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r8.z, r3.y
-mov.f32f32 r2.z, r2.z
-nop
-mov.f32f32 r6.w, r3.x
-mov.f32f32 r3.w, r2.w
-mov.f32f32 r8.y, r2.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r2.z, c1.w, r0.y, r4.w
-mad.f32 r2.w, c1.z, r0.y, r6.x
-mad.f32 r3.x, c1.y, r0.y, r6.y
-mov.f32f32 r3.z, r2.y
-mad.f32 r2.y, c2.w, r0.z, r2.z
-mad.f32 r2.z, c2.z, r0.z, r2.w
-mad.f32 r2.w, c2.y, r0.z, r3.x
-mad.f32 r3.x, c1.x, r0.y, r6.z
-add.f r2.y, r2.y, c3.w
-add.f r2.z, r2.z, c3.z
-add.f r2.w, r2.w, c3.y
-mad.f32 r3.x, c2.x, r0.z, r3.x
-mov.f32f32 r9.z, r2.y
-mov.f32f32 r9.y, r2.z
-mov.f32f32 r9.x, r2.w
-add.f r2.y, r3.x, c3.x
-mov.f32f32 r2.z, (0.000000)
-mov.f32f32 r2.w, (0.000000)
-mov.f32f32 r3.x, (0.000000)
-mov.f32f32 r8.w, r2.y
-mov.f32f32 r10.z, r2.z
-mov.f32f32 r10.y, r2.w
-mov.f32f32 r10.x, r3.x
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r9.w, r2.y
-mov.f32f32 r11.z, r0.y
-mov.f32f32 r11.y, r2.z
-mov.f32f32 r11.x, r1.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-mul.f r1.x, r2.x, c6.z
-mul.f r1.y, r1.w, c6.y
-mov.f32f32 r10.w, r0.y
-mov.f32f32 r2.z, r0.w
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.x, r1.y
-mul.f r0.y, r1.z, c6.x
-mad.f32 r0.z, c7.x, r0.z, c7.y
-mad.f32 r0.x, c7.x, r0.x, c7.y
-mov.f32f32 r0.w, c13.z
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r8.x, r0.z
-mov.f32f32 r7.w, r0.x
-mov.f32f32 r4.y, r0.w
+add.f r0.y, r0.y, c3.y
+mov.f32f32 r7.w, (0.000000)
+add.f r0.x, r0.x, c3.x
+mov.f32f32 r7.z, (0.000000)
+mov.f32f32 r7.y, (0.000000)
+mul.f r5.z, r3.y, c6.z
+mul.f r5.y, r3.x, c6.y
+mul.f r5.x, r2.w, c6.x
+mad.f32 r3.y, c7.x, r7.x, c7.y
+mad.f32 r3.x, c7.x, r6.z, c7.y
+mov.f32f32 r2.w, c13.z
end
nop
nop
nop
-; VERT: outputs: r8.w (0:0) r5.x (5:9) r3.z (5:10) r7.w (5:11) r6.w (5:12) r1.w (5:13) r10.w (5:14) r9.w (5:15)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0)
-; VERT: 153 instructions, 0 half, 12 full
-; pos: r8.w
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
+; VERT: inputs: r6.z (0:0,cm=7,il=8,b=0) r5.w (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0)
+; VERT: 93 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm
index 1058343..18b73b5 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-34.asm
@@ -4,326 +4,227 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r5.w) out0
-@out(r6.x) out1
-@out(r6.y) out2
-@out(r6.z) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0x40000000
+@const(c14.x) 0xbf800000, 0xba03126f, 0xbf000000, 0x3f800000
+@const(c15.x) 0x3fb8aa65, 0x3de38866, 0x3cf5c28f, 0x00000000
+@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 10, r1.x
-bary.f r0.y, 0, r1.x
+bary.f r1.z, 0, r1.x
add.f r0.w, r0.w, c13.y
-bary.f r1.z, 1, r1.x
-mov.f32f32 r1.w, r0.x
-add.f r2.x, r0.y, c14.z
-bary.f r2.y, 8, r1.x
+bary.f r1.w, 1, r1.x
+mov.f32f32 r2.x, r0.x
+bary.f r0.y, 11, r1.x
add.f r2.z, r1.z, c14.z
-mov.f32f32 r2.w, r1.w
-bary.f r1.w, 11, r1.x
-floor.f r3.x, r2.x
+add.f r2.w, r1.w, c14.z
+bary.f r3.x, 8, r1.x
+mov.f32f32 r2.y, r0.y
+floor.f r3.z, r2.z
rcp r0.w, r0.w
add.f r0.z, r0.z, c13.y
-floor.f r3.y, r2.z
-mov.f32f32 r3.z, r1.w
-add.f r2.x, r2.x, (neg)r3.x
+floor.f r3.w, r2.w
+bary.f r3.y, 9, r1.x
+add.f r2.z, r2.z, (neg)r3.z
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.z, (neg)r3.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-absneg.f r2.z, (neg)c10.x
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, c13.x, r2.x
-add.f r3.z, c13.w, (neg)r2.x
-sam (f32)(xyz)r3.w, r2.w, s#2, t#2
-(sy)(ss)mad.f32 r2.w, c13.w, r3.w, c14.x
-mul.f r2.z, r2.z, c10.x
-mov.f32f32 r3.x, r3.y
-mul.f r3.y, c13.x, r0.w
-mov.f32f32 r2.w, r2.w
+sam (f32)(xyz)r4.x, r2.x, s#2, t#2
+(sy)(ss)mad.f32 r0.w, c13.w, r4.x, c14.x
+absneg.f r2.x, (neg)c10.x
+mov.f32f32 r2.y, r2.z
+add.f r2.w, r2.w, (neg)r3.w
+mov.f32f32 r3.z, r0.w
bary.f r3.w, 4, r1.x
-add.f r0.y, r0.y, (neg)r3.x
-mul.f r2.z, r2.z, r0.z
-mov.f32f32 r3.x, r3.y
-mul.f r3.y, r3.w, r2.w
-mad.f32 r3.w, c13.w, r4.x, c14.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-add.f r1.z, r1.z, (neg)r3.x
-mov.f32f32 r3.x, r3.w
-bary.f r3.w, 12, r1.x
-bary.f r4.x, 7, r1.x
-add.f r4.z, c14.z, r0.y
-mul.f r0.z, r2.z, r0.z
-add.f r0.y, c14.w, r0.y
-mul.f r2.z, r3.w, (neg)r4.x
-mov.f32f32 r3.w, r4.z
+mul.f r4.x, c13.x, r2.y
+mul.f r2.x, r2.x, c10.x
+mov.f32f32 r4.w, r2.w
+mul.f r3.w, r3.w, r3.z
+mad.f32 r4.y, c13.w, r4.y, c14.x
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r2.x, r2.x, r0.z
mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.z, r2.z, r3.x, r3.y
-mul.f r3.y, r3.w, c5.z
+mov.f32f32 r4.x, r4.y
+bary.f r5.x, 12, r1.x
+bary.f r5.y, 7, r1.x
+mov.f32f32 r5.z, r1.z
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, c13.x, r4.w
+mul.f r5.x, r5.x, (neg)r5.y
+add.f r5.z, c14.z, r5.z
mul.f r0.z, r0.z, c15.x
-mul.f r0.y, r0.y, c5.z
-mov.f32f32 r2.z, r2.z
-mad.f32 r3.w, c13.w, r4.y, c14.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.z, r0.y
-mov.f32f32 r3.w, r3.w
-bary.f r4.w, 21, r1.x
-mov.f32f32 r5.x, r4.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.y, r4.z
-mad.f32 r2.z, r4.w, r3.w, r2.z
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-add.f r4.z, c14.z, r1.z
-mov.f32f32 r5.w, r4.y
-mov.f32f32 r2.z, r2.z
-add.f r4.y, c16.y, (neg)r0.z
-mov.f32f32 r4.z, r4.z
+add.f r1.w, r1.w, (neg)r2.x
+mad.f32 r2.x, r5.x, r4.x, r3.w
+mad.f32 r3.w, c13.w, r4.z, c14.x
+mul.f r5.z, r5.z, c5.z
+mov.f32f32 r4.z, r1.w
add.f r1.z, c14.w, r1.z
-mul.f r4.w, r2.z, r2.z
-bary.f r5.y, 5, r1.x
-mul.f r4.z, r4.z, c5.w
-mul.f r4.y, r4.y, c10.y
-mul.f r0.z, r0.z, c13.z
-mul.f r5.z, r5.y, r2.w
-bary.f r5.y, 13, r1.x
-mov.f32f32 r6.x, r4.z
-add.f r0.z, r0.z, r4.y
-mov.f32f32 r1.z, r1.z
-mul.f r4.y, r5.y, (neg)r4.x
-mov.f32f32 r5.y, r6.x
-bary.f r6.x, 2, r1.x
-mov.f32f32 r0.z, r0.z
-mad.f32 r4.y, r4.y, r3.x, r5.z
-mul.f r1.z, r1.z, c5.w
-add.f r6.z, r6.x, c14.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.y, r4.y
-bary.f r5.z, 22, r1.x
-mov.f32f32 r6.x, r6.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r6.y, r1.z
-mad.f32 r4.y, r5.z, r3.w, r4.y
-mov.f32f32 r5.z, r6.x
+mov.f32f32 r5.x, r3.w
+bary.f r5.w, 21, r1.x
+mov.f32f32 r6.y, r5.z
+exp2 r0.z, r0.z
+(ss)mov.f32f32 r6.x, r0.z
+add.f r4.z, c14.z, r4.z
+mad.f32 r2.x, r5.w, r5.x, r2.x
+mul.f r7.x, r1.z, c5.z
+add.f r1.z, c14.w, r1.w
+add.f r1.w, c16.y, (neg)r6.x
+mov.f32f32 r7.w, r2.x
+mul.f r8.y, r4.z, c5.w
+mov.f32f32 r8.x, r7.x
+mul.f r7.y, r1.z, c5.w
+mul.f r1.z, r2.x, r7.w
+bary.f r2.x, 5, r1.x
+mov.f32f32 r6.z, r8.y
+mul.f r1.w, r1.w, c10.y
+(ss)mul.f r0.z, r0.z, c13.z
+mul.f r2.x, r2.x, r3.z
+bary.f r3.z, 13, r1.x
+bary.f r4.z, 2, r1.x
+add.f r0.z, r0.z, r1.w
+mov.f32f32 r5.w, r7.y
+mul.f r1.w, r3.z, (neg)r5.y
+add.f r7.z, r4.z, c14.y
max.f r0.z, r0.z, c13.y
-mov.f32f32 r6.x, r6.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r6.w, r0.y
-mov.f32f32 r0.y, r3.y
+add.f r2.y, c13.w, (neg)r2.y
+mad.f32 r1.w, r1.w, r4.x, r2.x
+bary.f r2.x, 22, r1.x
+mov.f32f32 r6.w, r7.z
min.f r0.z, r0.z, c13.z
-mad.f32 r3.y, r4.y, r4.y, r4.w
-sam.s (f32)(x)r4.w, r5.x, s#4, t#4
-(sy)mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.x, r6.x
-(ss)add.f r5.x, c16.y, (neg)r0.z
-mov.f32f32 r3.y, r3.y
-bary.f r5.y, 6, r1.x
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r3.z, r3.z
-add.f r5.z, c13.w, (neg)r0.w
-mul.f r2.w, r5.y, r2.w
-bary.f r5.y, 14, r1.x
-mul.f r5.x, r5.x, c9.z
-add.f r6.y, c16.y, (neg)r0.z
-add.f r7.x, c16.y, (neg)r0.z
-mul.f r4.x, r5.y, (neg)r4.x
-mov.f32f32 r5.y, r5.z
-mul.f r5.z, r6.y, c9.y
-mul.f r7.z, r7.x, c9.x
-mad.f32 r2.w, r4.x, r3.x, r2.w
-mul.f r3.x, r3.z, r5.y
-mov.f32f32 r4.x, r6.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r2.w
-bary.f r6.y, 23, r1.x
-mul.f r3.x, r3.x, r4.w
-mov.f32f32 r7.x, r4.z
-mov.f32f32 r4.z, r6.z
-mad.f32 r2.w, r6.y, r3.w, r2.w
-mov.f32f32 r6.y, r4.x
-mov.f32f32 r7.w, r0.y
-mov.f32f32 r7.y, r4.z
-mov.f32f32 r0.y, r2.w
-mov.f32f32 r1.z, r1.z
-add.f r2.x, r2.x, c13.z
-add.f r0.w, r0.w, c13.z
-mad.f32 r2.w, r0.y, r0.y, r3.y
-sam.s (f32)(x)r3.y, r5.w, s#4, t#4
-mov.f32f32 r8.x, r1.z
-sam.s (f32)(x)r1.z, r6.w, s#4, t#4
-mov.f32f32 r3.w, r6.z
-mul.f r4.x, r2.x, r5.y
-mul.f r3.z, r3.z, r0.w
-(sy)mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
-rsq r2.w, r2.w
-(ss)mov.f32f32 r2.w, r2.w
-mov.f32f32 r8.y, r3.w
-mov.f32f32 r2.y, r2.y
-mul.f r0.w, r2.x, r0.w
-mul.f r2.x, r2.z, r2.w
-mad.f32 r1.z, r4.x, r1.z, r3.x
-mul.f r2.z, r4.y, r2.w
-mul.f r0.y, r0.y, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-sam.s (f32)(x)r2.w, r7.w, s#4, t#4
-(sy)mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.z, r2.z
-mul.f r3.x, r2.x, r2.x
-mul.f r3.w, (neg)c8.x, r2.x
-mad.f32 r1.z, r3.z, r2.w, r1.z
-mad.f32 r2.w, r2.z, r2.z, r3.x
-mad.f32 r3.x, (neg)c8.y, r2.z, r3.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r2.y
-mov.f32f32 r2.y, r2.w
-mov.f32f32 r2.w, r3.x
-mad.f32 r2.y, r0.y, r0.y, r2.y
-mad.f32 r2.w, (neg)c8.z, r0.y, r2.w
-mov.f32f32 r1.z, r1.z
-bary.f r3.x, 9, r1.x
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r1.w
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.w, r0.w, r3.y, r1.z
-mov.f32f32 r1.z, r3.x
-mul.f r2.x, r2.x, r2.y
-max.f r2.w, r2.w, c13.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.z, r2.z, r2.y
-mov.f32f32 r2.x, r2.x
-bary.f r3.x, 15, r1.x
-mov.f32f32 r2.w, r2.w
-bary.f r3.y, 19, r1.x
-bary.f r3.w, 18, r1.x
-mov.f32f32 r3.x, r3.x
-bary.f r4.z, 20, r1.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.x, r2.x, r3.x
-mov.f32f32 r2.z, r2.z
-bary.f r3.x, 16, r1.x
-mov.f32f32 r4.z, r4.z
-mad.f32 r4.w, c7.y, r2.w, (neg)r3.y
-mad.f32 r5.y, c7.x, r2.w, (neg)r3.w
-mov.f32f32 r3.x, r3.x
-mad.f32 r2.w, c7.z, r2.w, (neg)r4.z
-mov.f32f32 r4.w, r4.w
+mov.f32f32 r8.z, r7.z
+mad.f32 r1.w, r2.x, r5.x, r1.w
+mov.f32f32 r6.x, r7.z
+add.f r2.x, c16.y, (neg)r0.z
+add.f r3.z, c16.y, (neg)r0.z
+mov.f32f32 r4.x, r1.w
+sam.s (f32)(x)r8.w, r6.y, s#4, t#4
+add.f r4.z, c16.y, (neg)r0.z
+mov.f32f32 r5.x, r2.y
+add.f r4.w, c13.w, (neg)r4.w
+mad.f32 r1.z, r1.w, r4.x, r1.z
+bary.f r1.w, 6, r1.x
+mul.f r2.x, r2.x, c9.z
+mul.f r3.z, r3.z, c9.y
+mul.f r4.z, r4.z, c9.x
+mul.f r0.w, r1.w, r0.w
+bary.f r1.w, 14, r1.x
+(ss)mov.f32f32 r6.y, r4.w
+sam.s (f32)(x)r9.x, r8.x, s#4, t#4
+sam.s (f32)(x)r9.y, r5.z, s#4, t#4
+sam.s (f32)(x)r6.z, r7.x, s#4, t#4
+add.f r2.z, r2.z, c13.z
+add.f r2.w, r2.w, c13.z
+mul.f r1.w, r1.w, (neg)r5.y
+mul.f r5.x, r5.x, r6.y
+mul.f r4.w, r2.z, r4.w
+mul.f r2.y, r2.y, r2.w
+mad.f32 r0.w, r1.w, r4.y, r0.w
+bary.f r1.w, 23, r1.x
+(sy)mul.f r4.y, r5.x, r8.w
+mul.f r2.z, r2.z, r2.w
+mad.f32 r2.w, r4.w, r9.x, r4.y
+mad.f32 r0.w, r1.w, r3.w, r0.w
+mad.f32 r1.w, r2.y, r9.y, r2.w
+(ss)nop
+sam (f32)(w)r4.w, r3.x, s#1, t#1
+(sy)cmps.f.lt r2.y, r5.z, c15.z
+mov.f32f32 r2.w, r0.x
+(ss)mov.f32f32 r3.y, r0.w
+mad.f32 r1.w, r2.z, r6.z, r1.w
+cov.u32f32 r2.y, r2.y
+mov.f32f32 r3.x, r0.y
+mad.f32 r1.z, r3.y, r3.y, r1.z
+mul.f r1.w, c15.y, r1.w
+cmps.f.ne r2.y, r2.y, c13.y
+(rpt3)nop
+rsq r1.z, r1.z
+(ss)mov.f32f32 r2.z, r1.z
+mov.f32f32 r3.y, r1.w
+mul.f r0.w, r0.w, r1.z
+(ss)mov.f32f32 r1.z, c13.y
+mul.f r3.w, r7.w, r2.z
+mul.f r2.z, r4.x, r2.z
+mov.f32f32 r4.x, r0.w
+sel.b32 r1.z, r1.z, r2.y, r5.z
+mov.f32f32 r2.y, r3.w
+mul.f r3.w, (neg)c8.x, r3.w
+mov.f32f32 r4.y, r2.z
+mad.f32 r2.z, (neg)c8.y, r2.z, r3.w
+mul.f r3.w, r2.y, r2.y
+mad.f32 r0.w, (neg)c8.z, r0.w, r2.z
+mad.f32 r2.z, r4.y, r4.y, r3.w
+sam (f32)(xyz)r4.w, r2.w, s#3, t#3
+(sy)(ss)mul.f r3.x, c7.z, r5.y
+mad.f32 r2.z, r4.x, r4.x, r2.z
+max.f r0.w, r0.w, c13.y
+bary.f r2.w, 18, r1.x
+(rpt1)nop
+mov.f32f32 r3.w, r0.w
+bary.f r5.y, 19, r1.x
+rsq r2.z, r2.z
+(ss)mov.f32f32 r5.z, r2.z
+bary.f r5.w, 20, r1.x
+mad.f32 r6.x, c7.y, r3.w, (neg)r5.y
+mad.f32 r3.w, c7.z, r3.w, (neg)r5.w
+mul.f r2.y, r2.y, r5.z
+bary.f r6.y, 15, r1.x
+mov.f32f32 r5.w, r5.w
mov.f32f32 r5.y, r5.y
-mad.f32 r2.x, r2.z, r3.x, r2.x
-mov.f32f32 r2.z, r2.w
-mad.f32 r2.w, c11.x, r4.w, r3.y
-mad.f32 r3.x, c11.x, r5.y, r3.w
-mov.f32f32 r2.x, r2.x
-mul.f r0.y, r0.y, r2.y
-mad.f32 r2.y, c11.x, r2.z, r4.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r0.y, r0.y
+mul.f r4.y, r4.y, r5.z
+mul.f r2.y, r2.y, r6.y
+bary.f r5.z, 16, r1.x
+mad.f32 r3.w, c11.x, r3.w, r5.w
+mad.f32 r5.y, c11.x, r6.x, r5.y
+(ss)mul.f r2.z, r4.x, r2.z
+mad.f32 r2.y, r4.y, r5.z, r2.y
bary.f (ei)r1.x, 17, r1.x
-mov.f32f32 r1.y, r2.y
-mul.f r0.w, c15.y, r0.w
-nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r3.x, r4.x
-mov.f32f32 r3.y, r4.y
-mad.f32 r0.y, r0.y, r1.x, r2.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.y, r0.y
-sam (f32)(w)r3.z, r3.z, s#1, t#1
-(sy)mov.f32f32 r1.x, r4.y
-cmps.f.lt r1.z, r4.y, c15.z
-(ss)nop
-sam (f32)(xyz)r3.x, r3.x, s#3, t#3
-(sy)mul.f r1.w, c7.z, r3.z
-max.f r0.y, c13.y, r0.y
-mov.f32f32 r1.x, r1.x
-cov.u32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-(ss)mul.f r3.y, c7.y, r3.y
-mul.f r3.x, c7.x, r3.x
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r0.x, c13.z
-nop
-cmps.f.ne r1.z, r1.z, c13.y
-log2 r0.y, r0.y
-(ss)mul.f r0.y, c11.y, r0.y
-mov.f32f32 r3.z, c13.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-sel.b32 r1.x, r3.z, r1.z, r1.x
-sam (f32)(xyz)r3.z, r2.x, s#0, t#0
-mov.f32f32 r6.z, r0.x
+mad.f32 r0.w, c7.x, r0.w, (neg)r2.w
+(rpt1)nop
+mad.f32 r1.x, r2.z, r1.x, r2.y
+mov.f32f32 r1.y, r2.w
+mul.f r2.y, c7.y, r5.x
+mul.f r2.z, c7.x, r4.w
+max.f r1.x, c13.y, r1.x
+mad.f32 r0.w, c11.x, r0.w, r1.y
+sam (f32)(xyz)r5.z, r0.x, s#0, t#0
+mov.f32f32 r2.w, c13.z
(rpt3)nop
-exp2 r0.x, r0.y
-(ss)mul.f r0.y, r1.w, r0.x
-mul.f r1.z, r3.y, r0.x
-(sy)mad.f32 r0.y, r4.x, r1.y, r0.y
-mad.f32 r1.y, r3.w, r2.z, r1.z
-mul.f r0.x, r3.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, r3.z, r2.w, r0.x
-nop
-mul.f r0.y, r0.y, r0.w
-mul.f r1.y, r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.y, c6.z, r4.x, r0.y
-mad.f32 r1.y, c6.y, r3.w, r1.y
-mul.f r0.x, r0.x, r0.w
+(ss)log2 r0.x, r1.x
+(ss)mul.f r0.x, c11.y, r0.x
+(rpt5)nop
+exp2 r0.x, r0.x
+(ss)mul.f r0.y, r3.x, r0.x
+(ss)mul.f r1.x, r2.y, r0.x
+(sy)mad.f32 r0.y, r6.x, r3.w, r0.y
+mad.f32 r1.x, r5.w, r5.y, r1.x
+mul.f r0.x, r2.z, r0.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, r3.y
+mul.f r1.x, r1.x, r3.y
+mad.f32 r0.y, c6.z, r6.x, r0.y
+mad.f32 r1.x, c6.y, r5.w, r1.x
+mad.f32 r0.x, r5.z, r0.w, r0.x
nop
mul.f r0.y, r0.z, r0.y
-mul.f r0.w, r0.z, r0.w
-mad.f32 r0.x, c6.x, r3.z, r0.x
+mul.f r0.w, r0.z, r1.x
+mul.f r0.x, r0.x, r1.w
nop
-add.f r0.y, r0.y, r5.x
-add.f r0.w, r0.w, r5.z
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, r2.x
+add.f r0.w, r0.w, r3.z
+mad.f32 r0.x, c6.x, r5.z, r0.x
nop
-mul.f r0.y, r0.y, r1.x
-mul.f r0.w, r0.w, r1.x
+mul.f r2.z, r0.y, r1.z
+mul.f r2.y, r0.w, r1.z
mul.f r0.x, r0.z, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-add.f r0.x, r0.x, r7.z
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r1.x
-nop
-mov.f32f32 r6.y, r0.y
-mov.f32f32 r6.x, r0.z
-mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+add.f r0.x, r0.x, r4.z
(rpt2)nop
-mov.f32f32 r5.w, r0.x
+mul.f r2.x, r0.x, r1.z
end
nop
nop
-; FRAG: outputs: r5.w (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r3.y (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r2.z (5:13,cm=f,il=24,b=1) r5.z (5:14,cm=f,il=28,b=1)
-; FRAG: 317 instructions, 0 half, 9 full
-; pos (bary): r1.x
-; color: r5.w
-; fragcoord: r0.x
+nop
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r63.z (5:11,cm=f,il=16,b=1) r5.z (5:12,cm=f,il=20,b=1) r4.z (5:13,cm=f,il=24,b=1) r1.x (5:14,cm=f,il=28,b=1)
+; FRAG: 226 instructions, 0 half, 10 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm
index 497186f..e6a0565 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-35.asm
@@ -1,204 +1,141 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@out(r9.z) out0
-@out(r9.w) out1
-@out(r10.x) out2
-@out(r10.y) out3
-@out(r2.w) out4
-@out(r3.x) out5
-@out(r3.y) out6
-@out(r3.z) out7
-@out(r8.z) out8
-@out(r8.w) out9
-@out(r9.x) out10
-@out(r9.y) out11
-@out(r5.z) out12
-@out(r5.w) out13
-@out(r6.x) out14
-@out(r6.y) out15
-@out(r7.z) out16
-@out(r7.w) out17
-@out(r8.x) out18
-@out(r8.y) out19
-@out(r6.z) out20
-@out(r6.w) out21
-@out(r7.x) out22
-@out(r7.y) out23
-@out(r4.x) out24
-@out(r4.y) out25
-@out(r4.z) out26
-@out(r4.w) out27
-(sy)(ss)add.f r2.y, c4.x, (neg)r0.x
-mul.f r2.z, r0.w, r0.w
-absneg.f r2.w, (neg)c7.y
-mul.f r3.x, r0.x, c7.x
-mul.f r3.y, r2.y, r2.y
-add.f r3.z, c4.y, (neg)r0.y
-add.f r2.z, c14.x, (neg)r2.z
-mul.f r3.w, r0.x, (neg)r2.w
-mad.f32 r2.w, r0.z, (neg)r2.w, r3.x
-mad.f32 r3.x, r3.z, r3.z, r3.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.y, r3.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-add.f r3.w, c4.z, (neg)r0.z
-mul.f r4.x, r2.z, r2.z
-mul.f r4.y, r1.x, r0.w
-mad.f32 r3.y, (neg)c7.x, r0.z, r3.y
-mad.f32 r3.x, r3.w, r3.w, r3.x
-mov.f32f32 r2.w, r2.w
-mul.f r4.z, c9.w, r0.x
-mul.f r4.w, c9.z, r0.x
-mul.f r5.x, c9.y, r0.x
-mul.f r5.y, c9.x, r0.x
-mov.f32f32 r3.y, r3.y
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-add.f r4.y, c14.y, (neg)r4.y
-mov.f32f32 r6.x, r2.w
-mad.f32 r2.w, c10.w, r0.y, r4.z
-mad.f32 r2.y, r2.y, r3.x, (neg)c5.x
-mov.f32f32 r4.y, r4.y
-mad.f32 r3.z, r3.z, r3.x, (neg)c5.y
-mad.f32 r3.x, r3.w, r3.x, (neg)c5.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.w, r4.y, r4.y, r4.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.x, r3.x
-mul.f r4.x, r2.y, r2.y
-mov.f32f32 r3.w, r3.w
-mad.f32 r4.x, r3.z, r3.z, r4.x
-mul.f r4.z, r1.y, r0.w
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.w, c11.w, r0.z, r2.w
-mov.f32f32 r4.x, r4.x
-add.f r4.z, c14.y, (neg)r4.z
-mad.f32 r4.x, r3.x, r3.x, r4.x
-mov.f32f32 r3.y, r3.y
-(rpt4)nop
-rsq r4.x, r4.x
-(ss)mov.f32f32 r4.x, r4.x
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r6.y, r3.y
-add.f r2.w, r2.w, c12.w
-mul.f r3.x, r3.x, r4.x
-mul.f r3.y, r3.z, r4.x
-mul.f r2.y, r2.y, r4.x
-mad.f32 r3.z, r4.z, r4.z, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.y, r2.y
+@in(r7.x) in0
+@in(r7.y) in1
+@in(r7.z) in2
+@in(r6.y) in4
+@in(r6.z) in5
+@in(r6.w) in6
+@in(r2.w) in8
+@in(r3.x) in9
+@in(r3.y) in10
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@const(c14.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r7.x
+mul.f r0.y, r6.y, r6.y
+mul.f r0.z, c9.y, r7.x
+mul.f r0.w, c9.x, r7.x
+mul.f r1.x, r0.x, r0.x
+add.f r1.z, c4.y, (neg)r7.y
+add.f r0.y, c14.x, (neg)r0.y
+mad.f32 r0.z, c10.y, r7.y, r0.z
+mad.f32 r0.w, c10.x, r7.y, r0.w
+mad.f32 r1.x, r1.z, r1.z, r1.x
+add.f r1.w, c4.z, (neg)r7.z
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c11.y, r7.z, r0.z
+mad.f32 r0.w, c11.x, r7.z, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.z, r6.y
+add.f r0.z, r0.z, c12.y
+add.f r0.w, r0.w, c12.x
+mul.f r2.y, r6.w, r6.y
+mul.f r2.z, c9.w, r7.x
+rsq r1.x, r1.x
+(ss)mov.f32f32 r3.z, r1.x
+add.f r3.w, c14.y, (neg)r1.y
+mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
+mul.f r1.y, r0.z, c13.y
+mad.f32 r0.z, r1.z, r3.z, (neg)c5.y
+mov.f32f32 r1.z, r3.w
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r3.z, (neg)c5.z
+mov.f32f32 r3.z, r0.z
+mad.f32 r2.x, r3.w, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c14.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r3.z, r1.x
+mov.f32f32 r3.w, r1.w
+mul.f r1.x, r0.w, c13.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c10.w, r7.y, r2.z
+mad.f32 r0.z, r1.w, r3.w, r0.z
+mad.f32 r1.w, c11.w, r7.z, r2.z
+mul.f r2.z, c9.z, r7.x
+mul.f r4.x, c0.w, r7.x
+mul.f r4.y, c0.z, r7.x
+mul.f r5.z, c0.y, r7.x
+mul.f r5.w, c0.x, r7.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r4.z, r0.z
+mul.f r4.w, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c12.w
+mul.f r5.y, r3.w, r4.z
+mul.f r5.x, r3.z, r4.z
+(ss)mad.f32 r0.z, c10.z, r7.y, r2.z
+mad.f32 r2.y, c1.w, r7.y, r4.x
+mad.f32 r3.z, c1.z, r7.y, r4.y
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.w, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c11.z, r7.z, r0.z
+mad.f32 r0.y, c2.w, r7.z, r2.y
+mul.f r2.z, r0.w, r3.w
+mul.f r2.y, r1.z, r3.w
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c12.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r3.w, r2.y
+mul.f r4.z, r6.z, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r6.y, r0.x
+mul.f r4.x, r6.w, r3.w
+mad.f32 r4.y, r6.w, r0.z, (neg)r0.y
+mad.f32 r4.x, r6.z, r0.x, (neg)r4.x
+mad.f32 r4.z, r6.y, r3.w, (neg)r4.z
+mad.f32 r0.x, c2.z, r7.z, r3.z
+mad.f32 r0.y, c1.y, r7.y, r5.z
+mad.f32 r3.z, c1.x, r7.y, r5.w
+absneg.f r5.z, (neg)c7.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.y, r7.z, r0.y
+mad.f32 r3.z, c2.x, r7.z, r3.z
+mul.f r3.w, r7.x, (neg)r5.z
nop
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-(rpt1)nop
-mov.f32f32 r6.w, r3.x
-mov.f32f32 r6.z, r3.y
-mov.f32f32 r8.y, r2.y
-rsq r2.y, r3.z
-(ss)mov.f32f32 r2.y, r2.y
-(ss)mov.f32f32 r3.z, r2.w
-mad.f32 r3.w, c10.z, r0.y, r4.w
-mad.f32 r4.x, c10.y, r0.y, r5.x
-mul.f r2.z, r2.z, r2.y
-mul.f r4.z, r4.z, r2.y
-mul.f r2.y, r4.y, r2.y
-mad.f32 r3.w, c11.z, r0.z, r3.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r4.z
-mov.f32f32 r2.y, r2.y
-add.f r3.w, r3.w, c12.z
-mul.f r4.z, r1.x, r2.z
-mul.f r4.w, r0.w, r4.y
-mad.f32 r4.z, r0.w, r2.y, (neg)r4.z
-mad.f32 r4.w, r1.y, r2.z, (neg)r4.w
-mul.f r5.x, r1.y, r2.y
-mov.f32f32 r8.z, r4.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.w, r4.w
-mad.f32 r4.y, r1.x, r4.y, (neg)r5.x
-mov.f32f32 r9.x, r8.z
-mov.f32f32 r8.x, r4.z
-mov.f32f32 r7.w, r4.w
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-nop
-mov.f32f32 r7.z, r4.y
-mov.f32f32 r8.w, r2.y
-mov.f32f32 r8.z, r2.z
-mov.f32f32 r2.y, r3.w
-mad.f32 r2.z, c11.y, r0.z, r4.x
-mad.f32 r3.w, c10.x, r0.y, r5.y
-mul.f r2.x, r2.x, c6.z
-mov.f32f32 r3.y, r2.y
-add.f r2.y, r2.z, c12.y
-mad.f32 r2.z, c11.x, r0.z, r3.w
-mov.f32f32 r2.x, r2.x
-mul.f r3.w, c0.w, r0.x
-mul.f r2.y, r2.y, c13.y
-add.f r2.z, r2.z, c12.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.w, c1.w, r0.y, r3.w
-mov.f32f32 r3.x, r2.y
-mul.f r2.y, r2.z, c13.x
-mov.f32f32 r4.x, r2.x
-mad.f32 r2.x, c2.w, r0.z, r3.w
-mul.f r2.z, c0.z, r0.x
-mov.f32f32 r2.w, r2.y
-mad.f32 r2.y, c1.z, r0.y, r2.z
-add.f r2.x, r2.x, c3.w
-mad.f32 r2.y, c2.z, r0.z, r2.y
-mul.f r2.z, c0.y, r0.x
-mul.f r3.w, c0.x, r0.x
-mov.f32f32 r10.y, r2.x
-add.f r2.x, r2.y, c3.z
-mad.f32 r2.y, c1.y, r0.y, r2.z
-mad.f32 r0.y, c1.x, r0.y, r3.w
-mul.f r1.w, r1.w, c6.y
-mov.f32f32 r10.x, r2.x
-mad.f32 r2.x, c2.y, r0.z, r2.y
-mad.f32 r0.y, c2.x, r0.z, r0.y
-mov.f32f32 r1.w, r1.w
-mul.f r1.z, r1.z, c6.x
-add.f r2.x, r2.x, c3.y
-add.f r0.y, r0.y, c3.x
-mov.f32f32 r7.y, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r9.w, r2.x
-mov.f32f32 r9.z, r0.y
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r7.x, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.w, r0.y
-mad.f32 r0.y, c8.x, r0.z, c8.y
-mov.f32f32 r4.z, r1.x
-mov.f32f32 r4.y, r0.w
-mad.f32 r0.x, c8.x, r0.x, c8.y
-mov.f32f32 r5.w, r0.y
-mov.f32f32 r0.y, c14.z
-nop
-mov.f32f32 r5.z, r0.x
-nop
-mov.f32f32 r9.y, r0.y
+add.f r0.y, r0.x, c3.y
+add.f r0.x, r3.z, c3.x
+mad.f32 r3.w, (neg)c7.x, r7.z, r3.w
+mul.f r3.z, r7.x, c7.x
+mul.f r6.x, r3.y, c6.z
+mad.f32 r3.z, r7.z, (neg)r5.z, r3.z
+mul.f r5.w, r3.x, c6.y
+mul.f r5.z, r2.w, c6.x
+mad.f32 r3.y, c8.x, r7.z, c8.y
+mad.f32 r3.x, c8.x, r7.x, c8.y
+mov.f32f32 r2.w, c14.z
end
-; VERT: outputs: r9.z (0:0) r2.w (5:9) r8.z (5:10) r5.z (5:11) r7.z (5:12) r6.z (5:13) r4.x (5:14)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0)
-; VERT: 165 instructions, 0 half, 11 full
-; pos: r9.z
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14)
+; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0)
+; VERT: 95 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm
index a72e66e..4760742 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-36.asm
@@ -4,10 +4,15 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r1.w) out0
-@out(r2.x) out1
-@out(r2.y) out2
-@out(r2.z) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbeef9097
+@const(c15.x) 0x3cff9724, 0x40000000, 0xbf800000, 0xba03126f
+@const(c16.x) 0xbf000000, 0x3f800000, 0x3fb8aa65, 0x3de38866
+@const(c17.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 21, r1.x
bary.f r0.y, 0, r1.x
add.f r0.w, r0.w, c14.y
@@ -16,534 +21,347 @@ mul.f r1.w, r0.x, r0.x
bary.f r2.x, 22, r1.x
add.f r2.y, r0.y, c16.x
add.f r2.z, r1.z, c16.x
-bary.f r2.w, 8, r1.x
+bary.f r2.w, 10, r1.x
mad.f32 r1.w, r2.x, r2.x, r1.w
-floor.f r3.x, r2.y
+bary.f r3.x, 23, r1.x
+floor.f r3.y, r2.y
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.y, r2.z
-mov.f32f32 r1.w, r1.w
-bary.f r3.z, 23, r1.x
-add.f r2.y, r2.y, (neg)r3.x
+floor.f r3.z, r2.z
+mad.f32 r1.w, r3.x, r3.x, r1.w
+add.f r2.y, r2.y, (neg)r3.y
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.z, (neg)r3.y
-mad.f32 r1.w, r3.z, r3.z, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-absneg.f r2.z, (neg)c11.x
-mov.f32f32 r0.w, r0.w
-mul.f r3.x, c14.x, r2.y
-mov.f32f32 r2.w, r2.w
-rsq r3.y, r1.w
-(ss)mul.f r3.w, r0.x, r3.y
+(ss)absneg.f r0.w, (neg)c11.x
+add.f r2.z, r2.z, (neg)r3.z
+mov.f32f32 r3.y, r2.y
+add.f r2.y, r2.y, c14.z
+rsq r3.z, r1.w
+(ss)mul.f r3.w, r0.x, r3.z
rsq r4.x, r1.w
-(ss)mov.f32f32 r4.x, r4.x
-mov.f32f32 r3.x, r3.x
+(ss)mov.f32f32 r4.y, r4.x
(ss)rsq r1.w, r1.w
-(ss)mul.f r4.y, r0.x, r1.w
-mov.f32f32 r3.w, r3.w
-mul.f r4.z, r0.x, r4.x
-add.f r0.y, r0.y, (neg)r3.x
-mov.f32f32 r3.x, r4.y
+(ss)mul.f r4.z, r0.x, r1.w
+mul.f r4.w, c14.x, r3.y
absneg.f r3.w, (abs)r3.w
-mov.f32f32 r4.y, r4.z
-mov.f32f32 r0.y, r0.y
-absneg.f r3.x, (abs)r3.x
-mov.f32f32 r3.w, r3.w
-absneg.f r4.y, (abs)r4.y
-add.f r4.z, c16.x, r0.y
-mov.f32f32 r3.x, r3.x
+mul.f r5.x, r0.x, r4.y
+absneg.f r4.z, (abs)r4.z
+add.f r0.y, r0.y, (neg)r4.w
add.f r3.w, r3.w, c14.w
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.z, r4.z
-add.f r3.x, r3.x, c14.w
-mov.f32f32 r3.w, r3.w
-add.f r4.y, r4.y, c14.w
-mul.f r4.z, r4.z, c6.z
-mov.f32f32 r3.x, r3.x
+absneg.f r4.w, (abs)r5.x
+add.f r4.z, r4.z, c14.w
+mov.f32f32 r5.x, r0.y
max.f r3.w, r3.w, c14.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.w, r4.z
-max.f r3.x, r3.x, c14.y
-mov.f32f32 r3.w, r3.w
-mul.f r5.x, r2.x, r3.y
-max.f r4.y, r4.y, c14.y
-mov.f32f32 r5.y, r4.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.w, r5.x
-mov.f32f32 r4.y, r4.y
-mul.f r5.x, r2.x, r4.x
-mul.f r5.z, c14.x, r0.w
-absneg.f r4.w, (abs)r4.w
-mul.f r5.w, r2.x, r1.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.w, r5.w
-absneg.f r5.x, (abs)r5.x
-add.f r1.z, r1.z, (neg)r5.z
+mul.f r5.y, r2.x, r3.z
add.f r4.w, r4.w, c14.w
-absneg.f r5.z, (abs)r5.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.z, r5.z
-add.f r5.x, r5.x, c14.w
-add.f r5.w, c16.x, r1.z
+max.f r4.z, r4.z, c14.y
+mul.f r5.z, r2.x, r1.w
+absneg.f r5.y, (abs)r5.y
max.f r4.w, r4.w, c14.y
+mul.f r4.y, r2.x, r4.y
+absneg.f r5.z, (abs)r5.z
+add.f r5.y, r5.y, c14.w
+add.f r5.x, c16.x, r5.x
+absneg.f r4.y, (abs)r4.y
+mul.f r0.w, r0.w, c11.x
+max.f r5.y, r5.y, c14.y
add.f r5.z, r5.z, c14.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.z, r5.z
-max.f r5.x, r5.x, c14.y
-mul.f r6.x, r5.w, c6.w
-add.f r6.y, r3.w, r4.w
-mul.f r3.y, r3.z, r3.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.w, r6.x
-max.f r6.z, r5.z, c14.y
-mov.f32f32 r3.y, r3.y
-add.f r6.w, r4.y, r5.x
-mul.f r4.x, r3.z, r4.x
-mov.f32f32 r5.z, r5.w
-absneg.f r3.y, (abs)r3.y
-bary.f r5.w, 2, r1.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r3.y, r3.y
-add.f r7.x, r5.w, c15.w
+add.f r4.y, r4.y, c14.w
+mul.f r5.w, r5.x, c6.z
+add.f r5.x, r3.w, r5.y
+mul.f r3.z, r3.x, r3.z
+max.f r4.y, r4.y, c14.y
+max.f r5.z, r5.z, c14.y
+mov.f32f32 r6.x, r5.w
+absneg.f r3.z, (abs)r3.z
+add.f r6.y, r4.w, r4.y
+mul.f r4.x, r3.x, r4.x
+add.f r6.z, r4.z, r5.z
+add.f r3.z, r3.z, c14.w
+(ss)mul.f r1.w, r3.x, r1.w
absneg.f r4.x, (abs)r4.x
-add.f r7.y, r3.x, r6.z
-add.f r3.y, r3.y, c14.w
-mov.f32f32 r5.w, r7.x
-mov.f32f32 r4.x, r4.x
-(ss)mul.f r1.w, r3.z, r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r5.w, r5.w
-add.f r4.x, r4.x, c14.w
-mov.f32f32 r1.w, r1.w
-max.f r3.y, r3.y, c14.y
-mul.f r2.z, r2.z, c11.x
-mov.f32f32 r4.x, r4.x
+mov.f32f32 r6.w, r2.z
+max.f r3.z, r3.z, c14.y
absneg.f r1.w, (abs)r1.w
-mov.f32f32 r3.y, r3.y
-sam.s (f32)(x)r5.y, r5.y, s#5, t#5
-(sy)(ss)mov.f32f32 r5.y, r5.y
-max.f r4.x, r4.x, c14.y
-mov.f32f32 r1.w, r1.w
-add.f r5.z, r6.y, r3.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r4.x, r4.x
+add.f r4.x, r4.x, c14.w
+mul.f r7.x, c14.x, r6.w
+mov.f32f32 r7.y, r3.z
add.f r1.w, r1.w, c14.w
-mov.f32f32 r5.z, r5.z
-add.f r5.w, c15.y, (neg)r2.y
-add.f r6.y, r6.w, r4.x
-mov.f32f32 r1.w, r1.w
-mul.f r2.z, r2.z, r0.z
-mov.f32f32 r5.w, r5.w
-add.f r6.w, c15.y, (neg)r0.w
-rcp r7.z, r5.z
-(ss)mov.f32f32 r7.z, r7.z
-mov.f32f32 r6.y, r6.y
+max.f r4.x, r4.x, c14.y
+add.f r1.z, r1.z, (neg)r7.x
+add.f r5.x, r5.x, r7.y
max.f r1.w, r1.w, c14.y
-mov.f32f32 r6.w, r6.w
-mul.f r3.w, r3.w, r7.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.w, r1.w
-mul.f r7.z, r5.w, r6.w
-mov.f32f32 r3.w, r3.w
-bary.f r7.w, 25, r1.x
-rcp r8.x, r6.y
-(ss)mov.f32f32 r8.x, r8.x
-mul.f r5.y, r7.z, r5.y
-add.f r0.y, c16.y, r0.y
-mul.f r7.z, r7.w, c15.x
-mul.f r4.y, r4.y, r8.x
-add.f r7.y, r7.y, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r7.w, r7.z
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r8.x, r7.z
-mul.f r0.y, r0.y, c6.z
-mov.f32f32 r8.y, r7.w
-bary.f r7.w, 26, r1.x
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r8.z, r0.y
-mov.f32f32 r7.y, r7.y
-mul.f r7.w, r7.w, c15.x
-mov.f32f32 r8.w, r8.x
-mov.f32f32 r9.y, r8.z
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r8.x, r7.w
-mov.f32f32 r9.x, r7.w
-rcp r8.z, r7.y
-(ss)mov.f32f32 r9.w, r8.z
-mov.f32f32 r9.z, r6.x
-mov.f32f32 r8.z, r8.x
-mov.f32f32 r6.x, r9.x
-mov.f32f32 r8.x, r7.x
-mul.f r3.x, r3.x, r9.w
-mul.f r0.z, r2.z, r0.z
-rcp r2.z, r6.y
-nop
-(ss)rcp r6.y, r6.y
-mov.f32f32 r9.x, r6.x
-sam (f32)(xyzw)r9.w, r8.y, s#3, t#3
-(sy)mov.f32f32 r6.x, r9.w
-(ss)add.f r8.y, c14.z, (neg)r10.x
-mov.f32f32 r8.z, r10.y
-mov.f32f32 r9.w, r8.x
-mul.f r6.x, r6.x, r3.w
-rcp r8.x, r5.z
-(ss)mov.f32f32 r8.x, r8.x
-sam (f32)(xyzw)r10.x, r8.w, s#4, t#4
-(sy)(ss)mul.f r8.w, r10.y, r4.y
-mul.f r9.x, r10.x, r4.y
-mul.f r4.y, r10.z, r4.y
-mul.f r4.w, r4.w, r8.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r8.x, r8.y
-mul.f r8.y, r8.z, r3.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r8.z, r7.w
-mul.f r2.z, r5.x, r2.z
-mov.f32f32 r5.x, r8.x
-sam.s (f32)(x)r8.x, r9.y, s#5, t#5
-(sy)mov.f32f32 r8.x, r8.x
-(ss)mov.f32f32 r9.y, r8.z
-bary.f r8.z, 24, r1.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r9.z, r7.w
-mul.f r3.w, r5.x, r3.w
-mul.f r5.x, r8.z, c15.x
-add.f r2.y, r2.y, c14.z
-mov.f32f32 r9.w, r9.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r8.z, r5.x
-mov.f32f32 r10.x, r5.x
-mul.f r6.w, r2.y, r6.w
-mov.f32f32 r10.y, r7.z
-mov.f32f32 r9.z, r8.z
-mov.f32f32 r10.x, r10.x
-mad.f32 r5.y, r6.w, r8.x, r5.y
-mov.f32f32 r10.y, r10.y
-mov.f32f32 r6.w, r7.w
+mov.f32f32 r7.x, r4.x
+mov.f32f32 r7.y, r1.z
+mov.f32f32 r7.z, r5.x
+mov.f32f32 r7.w, r1.w
+add.f r7.x, r6.y, r7.x
+add.f r6.y, c16.x, r7.y
+mul.f r0.w, r0.w, r0.z
+add.f r7.y, r6.z, r7.w
mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.y, r5.y
-sam (f32)(xyzw)r10.z, r9.y, s#3, t#3
-(sy)mov.f32f32 r8.x, r10.w
-(ss)nop
-sam (f32)(xyzw)r9.y, r9.w, s#4, t#4
-(sy)mad.f32 r8.z, r9.z, r2.z, r8.w
-mad.f32 r8.w, r9.y, r2.z, r9.x
-mad.f32 r2.z, r9.w, r2.z, r4.y
-mad.f32 r4.y, r8.x, r4.w, r6.x
-rcp r5.z, r5.z
-(ss)mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.x, r6.y
-add.f r6.y, c14.z, (neg)r10.z
-mov.f32f32 r8.x, r11.x
-mul.f r3.y, r3.y, r5.z
-mul.f r4.x, r4.x, r6.x
-mov.f32f32 r5.z, r6.y
-mad.f32 r6.x, r8.x, r4.w, r8.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.y, r7.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r8.x, r7.z
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r9.x, r6.y
-mov.f32f32 r6.y, r5.x
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r8.y, r5.x
-mad.f32 r3.w, r5.z, r4.w, r3.w
-mov.f32f32 r9.y, r6.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r10.z, r6.w
-mul.f r0.z, r0.z, c16.z
-mov.f32f32 r9.z, r4.z
-add.f r1.z, c16.y, r1.z
-sam (f32)(xyzw)r10.w, r9.x, s#3, t#3
-(sy)mov.f32f32 r4.z, r11.x
-add.f r4.w, c14.z, (neg)r10.w
-mov.f32f32 r5.z, r11.y
-sam (f32)(xyzw)r10.w, r8.x, s#4, t#4
-(sy)mad.f32 r2.z, r11.y, r4.x, r2.z
-mad.f32 r4.y, r4.z, r3.y, r4.y
-mad.f32 r4.z, r11.x, r4.x, r8.z
-mad.f32 r4.x, r10.w, r4.x, r8.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.x, r4.x
-mul.f r2.z, c8.z, r2.z
-mad.f32 r4.y, c15.y, r4.y, c15.z
-mul.f r4.z, c8.y, r4.z
-mul.f r4.x, c8.x, r4.x
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r4.y, r4.y
-bary.f r6.y, 4, r1.x
-bary.f r6.w, 5, r1.x
-(ss)bary.f r8.x, 6, r1.x
-mov.f32f32 r2.z, r2.z
-mul.f r6.y, r6.y, r4.y
+rcp r6.z, r7.z
+mov.f32f32 r3.w, r3.w
+mov.f32f32 r7.w, r7.x
+mov.f32f32 r8.x, r7.y
+mul.f r8.z, r6.y, c6.w
+(ss)mul.f r3.w, r3.w, r6.z
+bary.f r6.z, 25, r1.x
+mul.f r0.z, r0.w, r0.z
+mov.f32f32 r6.y, r8.z
+mov.f32f32 r0.w, r3.w
+mul.f r8.w, r6.z, c15.x
+rcp r6.z, r7.w
mov.f32f32 r4.w, r4.w
-mul.f r6.w, r6.w, r4.y
-mul.f r4.y, r8.x, r4.y
+rcp r8.y, r8.x
mov.f32f32 r4.z, r4.z
-mad.f32 r3.w, r4.w, r3.y, r3.w
-mov.f32f32 r4.x, r4.x
-mad.f32 r3.y, r5.z, r3.y, r6.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r3.w
-sam (f32)(xyzw)r8.x, r10.y, s#0, t#0
-(sy)mul.f r4.w, r8.x, r3.x
-mov.f32f32 r3.y, r3.y
-mul.f r1.z, r1.z, c6.w
-mad.f32 r3.w, c15.y, r3.w, c15.z
-mul.f r5.z, r8.z, r3.x
-mul.f r3.x, r8.y, r3.x
-rcp r6.x, r7.y
-mad.f32 r3.y, c15.y, r3.y, c15.z
-mov.f32f32 r3.w, r3.w
-bary.f r8.x, 12, r1.x
-bary.f r8.y, 7, r1.x
-bary.f r8.z, 13, r1.x
-bary.f r8.w, 14, r1.x
-mov.f32f32 r3.y, r3.y
-mul.f r8.x, r8.x, (neg)r8.y
-mul.f r8.z, r8.z, (neg)r8.y
-mul.f r8.y, r8.w, (neg)r8.y
-mov.f32f32 r8.w, r1.z
-mad.f32 r6.y, r8.x, r3.w, r6.y
-mad.f32 r6.w, r8.z, r3.w, r6.w
-mad.f32 r3.w, r8.y, r3.w, r4.y
+bary.f r9.x, 2, r1.x
+mov.f32f32 r9.y, r8.w
+bary.f r9.z, 26, r1.x
+(ss)mul.f r4.w, r4.w, r6.z
+mul.f r4.z, r4.z, r8.y
mov.f32f32 r9.w, r8.w
-mov.f32f32 r4.y, r6.y
-mov.f32f32 r6.y, r6.w
-mad.f32 r0.x, r0.x, r3.y, r4.y
-mad.f32 r2.x, r2.x, r3.y, r6.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, r7.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.y, r3.z, r3.y, r3.w
-nop
-mul.f r3.z, r0.x, r0.x
-mov.f32f32 r10.x, r4.y
-mad.f32 r3.z, r2.x, r2.x, r3.z
-mov.f32f32 r3.y, r3.y
-(ss)mov.f32f32 r3.w, r6.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r3.z
-rcp r4.y, r7.y
-(ss)mov.f32f32 r4.y, r4.y
-mad.f32 r3.z, r3.y, r3.y, r3.z
-sam.s (f32)(x)r6.x, r9.z, s#5, t#5
-mul.f r3.w, r6.z, r3.w
-(sy)mov.f32f32 r6.x, r6.x
-add.f r0.w, r0.w, c14.z
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r6.y, r7.w
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-mul.f r5.w, r5.w, r0.w
-add.f r6.z, c18.y, (neg)r0.z
-mul.f r1.w, r1.w, r4.y
-mul.f r0.x, r0.x, r3.z
-mul.f r2.x, r2.x, r3.z
-mul.f r3.y, r3.y, r3.z
+mul.f r10.z, r9.z, c15.x
+mov.f32f32 r6.z, r4.w
+mov.f32f32 r8.y, r4.z
+mov.f32f32 r10.y, r8.w
+mov.f32f32 r9.z, r10.z
+mov.f32f32 r10.x, r10.z
+add.f r11.z, r9.x, c15.w
+mul.f r0.z, r0.z, c16.z
+(ss)rcp r7.w, r7.w
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r3.z, r5.w, r6.x, r5.y
-mul.f r4.y, r0.x, r0.x
-mul.f r5.y, (neg)c9.x, r0.x
-mad.f32 r4.y, r2.x, r2.x, r4.y
-mad.f32 r5.y, (neg)c9.y, r2.x, r5.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r5.w, r6.y
+rcp r7.x, r7.x
mov.f32f32 r4.y, r4.y
+sam (f32)(xyzw)r11.w, r10.y, s#0, t#0
+(sy)(ss)mul.f r10.y, r12.y, r8.y
+sam (f32)(xyzw)r12.y, r9.y, s#3, t#3
+(sy)(ss)mul.f r9.y, r12.y, r0.w
+rcp r7.z, r7.z
mov.f32f32 r5.y, r5.y
-mad.f32 r4.y, r3.y, r3.y, r4.y
-mad.f32 r5.y, (neg)c9.z, r3.y, r5.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r5.x
-mul.f r6.y, r6.z, c11.y
+sam (f32)(xyzw)r13.x, r9.w, s#4, t#4
+(sy)mul.f r9.z, r13.y, r6.z
+add.f r9.x, c14.z, (neg)r12.z
+(ss)mul.f r9.w, r13.z, r6.z
+(ss)mul.f r5.y, r5.y, r7.z
+mul.f r4.y, r4.y, r7.w
+mul.f r0.w, r9.x, r0.w
+rcp r6.z, r8.x
+mov.f32f32 r5.z, r5.z
+mov.f32f32 r7.z, r5.y
+mov.f32f32 r7.w, r10.z
+(ss)bary.f r8.x, 24, r1.x
+mov.f32f32 r10.x, r4.y
+mov.f32f32 r11.x, r10.z
+(ss)mul.f r5.z, r5.z, r6.z
+mul.f r9.x, r8.x, c15.x
+mul.f r8.y, r12.x, r8.y
+mul.f r4.w, r13.x, r4.w
+mul.f r3.w, r12.w, r3.w
+mov.f32f32 r8.x, r9.x
+mov.f32f32 r11.y, r9.x
+mov.f32f32 r12.x, r5.z
+mov.f32f32 r10.w, r9.x
+mov.f32f32 r6.z, r11.z
+mul.f r4.z, r11.w, r4.z
+exp2 r0.z, r0.z
+(ss)mov.f32f32 r11.w, r0.z
+sam (f32)(xyzw)r12.y, r7.w, s#3, t#3
+(sy)(ss)mad.f32 r7.w, r12.z, r7.z, r9.y
+rcp r5.x, r5.x
+(ss)mul.f r3.z, r3.z, r5.x
+sam (f32)(xyzw)r13.x, r11.x, s#4, t#4
+(sy)(ss)mad.f32 r5.x, r13.y, r10.x, r9.z
+add.f r8.x, c14.z, (neg)r12.y
+mad.f32 r9.y, r13.z, r10.x, r9.w
+mov.f32f32 r9.z, r3.z
+mov.f32f32 r9.w, r8.w
+mov.f32f32 r10.x, r9.x
+mul.f r4.x, r4.x, r7.x
+mad.f32 r0.w, r8.x, r7.z, r0.w
+sam (f32)(xyzw)r10.z, r10.z, s#0, t#0
+(sy)mad.f32 r7.x, r11.x, r12.x, r10.y
+rcp r7.y, r7.y
+(ss)mul.f r1.w, r1.w, r7.y
+(ss)mov.f32f32 r7.y, r4.x
+mad.f32 r7.z, r10.w, r12.x, r8.y
+sam (f32)(xyzw)r13.y, r9.w, s#3, t#3
+(sy)mad.f32 r7.w, r13.z, r9.z, r7.w
+mov.f32f32 r8.x, r8.w
+mov.f32f32 r8.y, r9.x
+(ss)add.f r9.w, c14.z, (neg)r13.y
+mad.f32 r7.w, c15.y, r7.w, c15.z
+bary.f r10.x, 6, r1.x
+mov.f32f32 r10.y, r1.w
+mad.f32 r0.w, r9.w, r9.z, r0.w
+mov.f32f32 r9.z, r7.w
+bary.f r9.w, 4, r1.x
+bary.f r10.w, 5, r1.x
+mul.f r7.w, r10.x, r7.w
+mad.f32 r0.w, c15.y, r0.w, c15.z
+mul.f r9.w, r9.w, r9.z
+mul.f r9.z, r10.w, r9.z
+bary.f r10.x, 14, r1.x
+mov.f32f32 r10.w, r0.w
+bary.f r11.x, 12, r1.x
+bary.f r11.y, 7, r1.x
+bary.f r12.x, 13, r1.x
+sam (f32)(xyzw)r14.x, r8.x, s#4, t#4
+(sy)mad.f32 r5.x, r14.y, r7.y, r5.x
+mad.f32 r7.y, r14.z, r7.y, r9.y
+(ss)mul.f r8.x, r11.x, (neg)r11.y
+mul.f r8.y, r12.x, (neg)r11.y
+mul.f r9.y, r10.x, (neg)r11.y
+mul.f r7.y, c8.z, r7.y
+mad.f32 r8.x, r8.x, r10.w, r9.w
+mad.f32 r3.w, r12.w, r5.y, r3.w
+mad.f32 r5.y, r8.y, r10.w, r9.z
+mad.f32 r3.z, r13.w, r3.z, r3.w
+mad.f32 r0.w, r9.y, r0.w, r7.w
+mul.f r3.w, c8.y, r5.x
+sam (f32)(xyzw)r8.w, r8.w, s#0, t#0
+(sy)mad.f32 r5.x, r9.y, r10.y, r7.x
+mad.f32 r3.z, c15.y, r3.z, c15.z
+mad.f32 r4.y, r13.x, r4.y, r4.w
+mad.f32 r4.w, r9.x, r10.y, r7.z
+mov.f32f32 r7.x, r5.x
+mov.f32f32 r7.z, r3.z
+mad.f32 r0.w, r3.x, r3.z, r0.w
+(rpt1)nop
+mad.f32 r0.x, r0.x, r7.z, r8.x
+mad.f32 r2.x, r2.x, r7.z, r5.y
+(rpt1)nop
+mov.f32f32 r3.x, r0.x
+mov.f32f32 r3.z, r2.x
+mov.f32f32 r5.y, r0.w
+mov.f32f32 r7.z, r4.w
+mul.f r0.x, r0.x, r3.x
+mad.f32 r4.x, r14.x, r4.x, r4.y
+mad.f32 r0.x, r2.x, r3.z, r0.x
+mad.f32 r2.x, r10.z, r5.z, r4.z
+mad.f32 r0.x, r5.y, r5.y, r0.x
+mul.f r4.x, c8.x, r4.x
+add.f r4.y, c18.y, (neg)r11.w
+mad.f32 r1.w, r8.w, r1.w, r2.x
+(ss)nop
+sam.s (f32)(x)r8.w, r6.x, s#5, t#5
+add.f r2.x, c15.y, (neg)r3.y
+add.f r0.y, c16.y, r0.y
+add.f r1.z, c16.y, r1.z
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.y, r0.x
+(ss)mul.f r0.x, r0.w, r0.x
+mov.f32f32 r0.w, r1.w
+mov.f32f32 r4.z, r2.x
+mul.f r3.x, r3.x, r3.y
+mul.f r3.y, r3.z, r3.y
+mov.f32f32 r3.z, r0.x
+add.f r5.y, c15.y, (neg)r6.w
+mov.f32f32 r5.z, r3.x
+mul.f r3.x, (neg)c9.x, r3.x
+mov.f32f32 r6.x, r3.y
+mov.f32f32 r6.y, r5.y
+mul.f r6.z, r5.z, r5.z
+mad.f32 r3.x, (neg)c9.y, r3.y, r3.x
+mad.f32 r3.y, r6.x, r6.x, r6.z
+mad.f32 r0.x, (neg)c9.z, r0.x, r3.x
+mad.f32 r3.x, r3.z, r3.z, r3.y
+mul.f r3.y, r4.z, r6.y
+mul.f r4.y, r4.y, c11.y
mul.f r0.z, r0.z, c14.z
-mov.f32f32 r0.y, r0.y
-rsq r4.y, r4.y
-(ss)mov.f32f32 r4.y, r4.y
-max.f r5.y, r5.y, c14.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r7.w, r0.y
-mul.f r0.x, r0.x, r4.y
-mov.f32f32 r0.y, r5.y
-bary.f r5.y, 19, r1.x
-bary.f r6.z, 18, r1.x
-mov.f32f32 r0.x, r0.x
-bary.f r6.w, 15, r1.x
-bary.f r7.y, 20, r1.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r7.y, r7.y
-mad.f32 r8.x, c8.y, r0.y, (neg)r5.y
-mad.f32 r8.y, c8.x, r0.y, (neg)r6.z
-mul.f r0.x, r0.x, r6.w
-mul.f r2.x, r2.x, r4.y
-mad.f32 r0.y, c8.z, r0.y, (neg)r7.y
-mov.f32f32 r6.w, r8.x
-mov.f32f32 r8.x, r8.y
-mov.f32f32 r2.x, r2.x
-bary.f r8.y, 16, r1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r5.y, c12.x, r6.w, r5.y
-mad.f32 r6.z, c12.x, r8.x, r6.z
-mov.f32f32 r6.w, r8.y
-mad.f32 r0.y, c12.x, r0.y, r7.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.z, r6.z
-mad.f32 r0.x, r2.x, r6.w, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r2.x, r3.y, r4.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-bary.f r3.y, 17, r1.x
-sam (f32)(xyzw)r8.x, r5.w, s#0, t#0
-(sy)mad.f32 r4.y, r8.x, r3.w, r4.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r4.w, r8.z, r3.w, r5.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.x, r8.y, r3.w, r3.x
-mov.f32f32 r3.w, r7.z
-mad.f32 r0.x, r2.x, r3.y, r0.x
-mov.f32f32 r8.x, r1.z
-mov.f32f32 r1.z, r7.x
-mov.f32f32 r5.z, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r5.x
-mov.f32f32 r8.y, r1.z
-add.f r0.z, r0.z, r6.y
-max.f r0.x, c14.y, r0.x
-(ss)mov.f32f32 r5.w, r2.x
-mov.f32f32 r6.x, r2.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-sam.s (f32)(x)r1.z, r7.w, s#5, t#5
-(sy)mov.f32f32 r1.z, r1.z
-mul.f r0.w, r2.y, r0.w
-mov.f32f32 r0.z, r0.z
-bary.f r2.x, 9, r1.x
-sam (f32)(xyzw)r6.w, r5.z, s#0, t#0
-(sy)mad.f32 r2.y, r7.x, r1.w, r3.x
-mad.f32 r0.w, r0.w, r1.z, r3.z
-log2 r0.x, r0.x
-(ss)mul.f r0.x, c12.y, r0.x
-mad.f32 r1.z, r7.y, r1.w, r4.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.w, r6.w, r1.w, r4.y
-mul.f r0.w, c16.w, r0.w
+mul.f r11.x, r0.y, c6.z
+mul.f r11.y, r1.z, c6.w
+mul.f r0.y, r2.y, r5.y
+rsq r1.z, r3.x
+(ss)mov.f32f32 r3.x, r1.z
+max.f r0.x, r0.x, c14.y
+mul.f r1.z, r3.z, r1.z
+bary.f r3.z, 18, r1.x
+mul.f r4.z, r5.z, r3.x
+bary.f r5.y, 15, r1.x
+mov.f32f32 r5.z, r0.x
+bary.f r6.y, 19, r1.x
+mul.f r3.x, r6.x, r3.x
+mul.f r4.z, r4.z, r5.y
+bary.f r5.y, 16, r1.x
+bary.f r6.x, 20, r1.x
+mad.f32 r6.z, c8.y, r5.z, (neg)r6.y
+mad.f32 r5.z, c8.z, r5.z, (neg)r6.x
+mad.f32 r3.x, r3.x, r5.y, r4.z
+bary.f r4.z, 17, r1.x
+mov.f32f32 r5.y, r6.x
+mov.f32f32 r6.x, r6.y
+mad.f32 r0.x, c8.x, r0.x, (neg)r3.z
+mad.f32 r1.z, r1.z, r4.z, r3.x
+mad.f32 r3.x, c12.x, r5.z, r5.y
+mad.f32 r4.z, c12.x, r6.z, r6.x
+mov.f32f32 r3.z, r3.z
+max.f r1.z, c14.y, r1.z
+mad.f32 r0.x, c12.x, r0.x, r3.z
+(sy)mul.f r3.y, r3.y, r8.w
+add.f r0.z, r0.z, r4.y
+mov.f32f32 r8.y, r11.x
+mov.f32f32 r8.w, r11.z
+mov.f32f32 r6.x, r11.y
+log2 r1.z, r1.z
+(ss)mul.f r1.z, c12.y, r1.z
max.f r0.z, r0.z, c14.y
-mov.f32f32 r2.x, r2.x
-bary.f r2.w, 10, r1.x
-exp2 r0.x, r0.x
-(ss)mul.f r2.z, r2.z, r0.x
-mul.f r3.x, r4.z, r0.x
-mad.f32 r0.y, r1.z, r0.y, r2.z
-mad.f32 r2.z, r2.y, r5.y, r3.x
-(ss)mul.f r0.x, r4.x, r0.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.x, r1.w, r6.z, r0.x
+mov.f32f32 r6.y, r11.z
+(rpt1)nop
+sam.s (f32)(x)r7.w, r8.y, s#5, t#5
min.f r0.z, r0.z, c14.z
-mul.f r0.y, r0.y, r0.w
-mul.f r2.z, r2.z, r0.w
-mov.f32f32 r0.x, r0.x
+(sy)mad.f32 r0.y, r0.y, r7.w, r3.y
+exp2 r1.z, r1.z
+(ss)mul.f r3.y, r7.y, r1.z
+mul.f r3.z, r3.w, r1.z
+mad.f32 r3.y, r7.x, r3.x, r3.y
+sam.s (f32)(x)r5.y, r5.w, s#5, t#5
+add.f r2.z, r2.z, c14.z
+mad.f32 r3.z, r7.z, r4.z, r3.z
+(ss)mul.f r1.z, r4.x, r1.z
add.f r3.x, c18.y, (neg)r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.y, c7.z, r1.z, r0.y
-mad.f32 r1.z, c7.y, r2.y, r2.z
-mul.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.x, c10.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r0.x
-add.f r2.y, c18.y, (neg)r0.z
-mul.f r0.y, r0.z, r0.y
-mul.f r1.z, r0.z, r1.z
-mad.f32 r0.x, c7.x, r1.w, r0.x
-mul.f r1.w, r2.y, c10.y
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r1.z, r1.z, r1.w
-add.f r1.w, c18.y, (neg)r0.z
-mov.f32f32 r2.y, r0.w
-bary.f (ei)r0.w, 11, r1.x
-mul.f r0.x, r0.z, r0.x
-mul.f r0.z, r1.w, c10.x
-mov.f32f32 r6.y, r2.x
-mov.f32f32 r0.w, r0.w
+mul.f r2.x, r2.x, r2.z
+mad.f32 r0.x, r0.w, r0.x, r1.z
+add.f r0.w, c18.y, (neg)r0.z
+add.f r1.z, c18.y, (neg)r0.z
+(sy)mad.f32 r0.y, r2.x, r5.y, r0.y
+sam.s (f32)(x)r3.w, r11.x, s#5, t#5
+mul.f r2.x, r2.y, r2.z
+mul.f r2.y, r3.x, c10.z
+mul.f r0.w, r0.w, c10.y
+mul.f r1.z, r1.z, c10.x
+(sy)mad.f32 r0.y, r2.x, r3.w, r0.y
+bary.f r3.x, 11, r1.x
+bary.f r3.w, 8, r1.x
+bary.f (ei)r4.x, 9, r1.x
+mul.f r0.y, c16.w, r0.y
mov.f32f32 r1.x, c14.y
(rpt1)nop
-mov.f32f32 r2.z, r0.w
-add.f r0.x, r0.x, r0.z
-sam (f32)(w)r2.w, r6.x, s#1, t#1
-(sy)add.f r0.z, c14.z, (neg)r3.z
-(rpt3)nop
-sam (f32)(w)r1.w, r2.y, s#2, t#2
-(sy)cmps.f.lt r0.w, r2.z, c17.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r2.z
-nop
-cov.u32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-nop
-cmps.f.ne r0.w, r0.w, c14.y
-mov.f32f32 r0.z, r0.z
-(rpt1)nop
-sel.b32 r0.w, r1.x, r0.w, r1.y
-(ss)mov.f32f32 r2.z, r0.z
-(rpt1)nop
-mul.f r0.y, r0.y, r0.w
-mul.f r0.z, r1.z, r0.w
-mul.f r0.x, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r1.w, r0.x
+mov.f32f32 r1.y, r0.y
+mul.f r0.x, r0.x, r0.y
+sam (f32)(w)r5.y, r2.w, s#2, t#2
+(sy)cmps.f.lt r0.y, r6.x, c17.x
+sam (f32)(w)r3.w, r3.w, s#1, t#1
+mad.f32 r0.x, c7.x, r1.w, r0.x
+mul.f r1.w, r3.y, r1.y
+mul.f r1.y, r3.z, r1.y
+mad.f32 r1.w, c7.z, r5.x, r1.w
+mad.f32 r1.y, c7.y, r4.w, r1.y
+mul.f r0.x, r0.z, r0.x
+cov.u32f32 r0.y, r0.y
+mul.f r1.w, r0.z, r1.w
+mul.f r0.z, r0.z, r1.y
+add.f r0.x, r0.x, r1.z
+cmps.f.ne r0.y, r0.y, c14.y
+add.f r1.y, r1.w, r2.y
+add.f r0.z, r0.z, r0.w
+(sy)(ss)add.f r2.w, c14.z, (neg)r4.z
+sel.b32 r0.y, r1.x, r0.y, r6.x
+(rpt2)nop
+mul.f r2.z, r1.y, r0.y
+mul.f r2.y, r0.z, r0.y
+mul.f r2.x, r0.x, r0.y
end
-nop
-nop
-nop
-; FRAG: outputs: r1.w (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r7.z (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1) r6.x (5:12,cm=f,il=20,b=1) r4.z (5:13,cm=f,il=24,b=1) r2.w (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 535 instructions, 0 half, 12 full
-; pos (bary): r1.x
-; color: r1.w
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r10.z (5:10,cm=f,il=12,b=1) r2.y (5:11,cm=f,il=16,b=1) r4.z (5:12,cm=f,il=20,b=1) r5.z (5:13,cm=f,il=24,b=1) r2.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 354 instructions, 0 half, 15 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm
index 83ef8e1..494f814 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-37.asm
@@ -1,210 +1,143 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in12
-@in(r2.z) in13
-@out(r9.w) out0
-@out(r10.x) out1
-@out(r10.y) out2
-@out(r10.z) out3
-@out(r5.w) out4
-@out(r6.x) out5
-@out(r6.y) out6
-@out(r6.z) out7
-@out(r4.x) out8
-@out(r4.y) out9
-@out(r4.z) out10
-@out(r4.w) out11
-@out(r2.w) out12
-@out(r3.x) out13
-@out(r3.y) out14
-@out(r3.z) out15
-@out(r7.w) out16
-@out(r8.x) out17
-@out(r8.y) out18
-@out(r8.z) out19
-@out(r6.w) out20
-@out(r7.x) out21
-@out(r7.y) out22
-@out(r7.z) out23
-@out(r8.w) out24
-@out(r9.x) out25
-@out(r9.y) out26
-@out(r9.z) out27
-@out(r10.w) out28
-@out(r11.x) out29
-@out(r11.y) out30
-@out(r11.z) out31
-(sy)(ss)add.f r2.w, c4.x, (neg)r0.x
-mul.f r3.x, r0.w, r0.w
-mul.f r3.y, c8.w, r0.x
-mul.f r3.z, c8.z, r0.x
-mul.f r3.w, r2.w, r2.w
-add.f r4.x, c4.y, (neg)r0.y
-add.f r3.x, c13.x, (neg)r3.x
-mad.f32 r3.y, c9.w, r0.y, r3.y
-mad.f32 r3.z, c9.z, r0.y, r3.z
-mad.f32 r3.w, r4.x, r4.x, r3.w
-mov.f32f32 r3.x, r3.x
-mad.f32 r3.y, c10.w, r0.z, r3.y
-mad.f32 r3.z, c10.z, r0.z, r3.z
-mov.f32f32 r3.w, r3.w
-add.f r4.y, c4.z, (neg)r0.z
-mul.f r4.z, r3.x, r3.x
-mul.f r4.w, r1.x, r0.w
-add.f r3.y, r3.y, c11.w
-mad.f32 r3.w, r4.y, r4.y, r3.w
-add.f r3.z, r3.z, c11.z
-mul.f r5.x, c8.y, r0.x
-mul.f r5.y, c8.x, r0.x
-add.f r4.w, c13.y, (neg)r4.w
-mul.f r2.x, r2.x, c6.z
-mov.f32f32 r3.y, r3.y
-rsq r3.w, r3.w
-(ss)mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r5.x, c9.y, r0.y, r5.x
-mad.f32 r2.w, r2.w, r3.w, (neg)c5.x
-mad.f32 r4.z, r4.w, r4.w, r4.z
-mad.f32 r4.x, r4.x, r3.w, (neg)c5.y
-mad.f32 r3.w, r4.y, r3.w, (neg)c5.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.y, r4.z
-mul.f r4.z, r1.y, r0.w
-mov.f32f32 r4.x, r4.x
-mul.f r5.z, r2.w, r2.w
-mov.f32f32 r3.w, r3.w
-add.f r4.z, c13.y, (neg)r4.z
-mad.f32 r5.z, r4.x, r4.x, r5.z
-mov.f32f32 r6.z, r3.y
-mov.f32f32 r6.y, r3.z
-mad.f32 r3.y, c10.y, r0.z, r5.x
-mov.f32f32 r3.z, r5.z
-mov.f32f32 r4.z, r4.z
-mad.f32 r3.z, r3.w, r3.w, r3.z
-add.f r3.y, r3.y, c11.y
-mad.f32 r5.x, c9.x, r0.y, r5.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r5.x, c10.x, r0.z, r5.x
-mul.f r5.y, c0.w, r0.x
-mul.f r5.z, c0.z, r0.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-mad.f32 r4.y, r4.z, r4.z, r4.y
-mul.f r3.y, r3.y, c12.y
-add.f r5.x, r5.x, c11.x
-mul.f r3.w, r3.w, r3.z
-mul.f r4.x, r4.x, r3.z
-mul.f r2.w, r2.w, r3.z
-mov.f32f32 r6.x, r3.y
-mov.f32f32 r3.y, r3.w
-mov.f32f32 r3.z, r4.x
-mov.f32f32 r2.w, r2.w
-rsq r3.w, r4.y
-(ss)mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
-nop
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-(rpt1)nop
-mov.f32f32 r7.x, r3.y
-mov.f32f32 r6.w, r3.z
-mov.f32f32 r8.z, r2.w
-mul.f r2.w, r3.x, r3.w
-mul.f r3.x, r4.z, r3.w
-mul.f r3.y, r4.w, r3.w
-mul.f r3.z, r5.x, c12.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r5.w, r3.z
-mul.f r3.z, r1.x, r2.w
-mul.f r3.w, r0.w, r3.x
-mad.f32 r3.z, r0.w, r3.y, (neg)r3.z
-mad.f32 r3.w, r1.y, r2.w, (neg)r3.w
-mul.f r4.x, r1.y, r3.y
-(ss)mov.f32f32 r4.y, r3.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mad.f32 r3.x, r1.x, r3.x, (neg)r4.x
-mov.f32f32 r4.z, r4.y
-mov.f32f32 r8.y, r3.z
-mov.f32f32 r8.x, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.w, r2.w
-nop
-mov.f32f32 r7.w, r3.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r4.x, r2.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.w, c1.w, r0.y, r5.y
-mad.f32 r3.x, c1.z, r0.y, r5.z
-mul.f r3.y, c0.y, r0.x
-mov.f32f32 r8.w, r2.x
-mad.f32 r2.x, c2.w, r0.z, r2.w
-mad.f32 r2.w, c2.z, r0.z, r3.x
-mad.f32 r3.x, c1.y, r0.y, r3.y
-mul.f r3.y, c0.x, r0.x
-add.f r2.x, r2.x, c3.w
-add.f r2.w, r2.w, c3.z
-mad.f32 r3.x, c2.y, r0.z, r3.x
-mad.f32 r3.y, c1.x, r0.y, r3.y
-mov.f32f32 r10.z, r2.x
-mov.f32f32 r10.y, r2.w
-add.f r2.x, r3.x, c3.y
-mad.f32 r2.w, c2.x, r0.z, r3.y
-mul.f r1.w, r1.w, c6.y
-mul.f r1.z, r1.z, c6.x
-mov.f32f32 r10.x, r2.x
-add.f r2.x, r2.w, c3.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.w, c7.x, r0.z, c7.y
-mov.f32f32 r9.w, r2.x
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r7.y, r1.z
-mov.f32f32 r1.z, r2.w
-mad.f32 r1.w, c7.x, r0.x, c7.y
-mov.f32f32 r2.x, (0.000000)
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r11.z, r2.x
-mov.f32f32 r11.y, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r1.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r1.y
-mov.f32f32 r11.x, r0.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r10.w, r0.x
-mov.f32f32 r9.z, r0.z
-mov.f32f32 r0.x, r0.w
-mov.f32f32 r9.y, r0.y
-mov.f32f32 r0.y, r2.z
-mov.f32f32 r0.z, r2.y
-mov.f32f32 r9.x, r0.x
-mov.f32f32 r0.x, c13.z
-mov.f32f32 r3.x, r0.y
+@in(r7.x) in0
+@in(r7.y) in1
+@in(r7.z) in2
+@in(r6.y) in4
+@in(r6.z) in5
+@in(r6.w) in6
+@in(r8.x) in8
+@in(r8.y) in9
+@in(r8.z) in10
+@in(r3.x) in12
+@in(r3.y) in13
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@out(r7.x) out28
+@out(r7.y) out29
+@out(r7.z) out30
+@out(r7.w) out31
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r7.x
+mul.f r0.y, r6.y, r6.y
+mul.f r0.z, c8.y, r7.x
+mul.f r0.w, c8.x, r7.x
+mul.f r1.x, r0.x, r0.x
+add.f r1.z, c4.y, (neg)r7.y
+add.f r0.y, c13.x, (neg)r0.y
+mad.f32 r0.z, c9.y, r7.y, r0.z
+mad.f32 r0.w, c9.x, r7.y, r0.w
+mad.f32 r1.x, r1.z, r1.z, r1.x
+add.f r1.w, c4.z, (neg)r7.z
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.z, r0.z
+mad.f32 r0.w, c10.x, r7.z, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.z, r6.y
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.w, r6.y
+mul.f r2.z, c8.w, r7.x
+rsq r1.x, r1.x
+(ss)mov.f32f32 r2.w, r1.x
+add.f r3.z, c13.y, (neg)r1.y
+mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r2.w, (neg)c5.y
+mov.f32f32 r1.z, r3.z
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r2.w, (neg)c5.z
mov.f32f32 r2.w, r0.z
+mad.f32 r2.x, r3.z, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r2.w, r1.x
+mov.f32f32 r3.z, r1.w
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r7.y, r2.z
+mad.f32 r0.z, r1.w, r3.z, r0.z
+mad.f32 r1.w, c10.w, r7.z, r2.z
+mul.f r2.z, c8.z, r7.x
+mul.f r3.w, c0.w, r7.x
+mul.f r4.x, c0.z, r7.x
+mul.f r5.z, c0.y, r7.x
+mul.f r5.w, c0.x, r7.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r4.y, r0.z
+mul.f r4.w, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r5.y, r3.z, r4.y
+mul.f r5.x, r2.w, r4.y
+(ss)mad.f32 r0.z, c9.z, r7.y, r2.z
+mad.f32 r2.y, c1.w, r7.y, r3.w
+mad.f32 r2.w, c1.z, r7.y, r4.x
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.z, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.z, r0.z
+mad.f32 r0.y, c2.w, r7.z, r2.y
+mul.f r2.z, r0.w, r3.z
+mul.f r2.y, r1.z, r3.z
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r3.z, r2.y
+mul.f r3.w, r6.z, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r6.y, r0.x
+mul.f r4.x, r6.w, r3.z
+mad.f32 r4.y, r6.w, r0.z, (neg)r0.y
+mad.f32 r4.x, r6.z, r0.x, (neg)r4.x
+mad.f32 r4.z, r6.y, r3.z, (neg)r3.w
+mad.f32 r0.x, c2.z, r7.z, r2.w
+mad.f32 r0.y, c1.y, r7.y, r5.z
+mad.f32 r2.w, c1.x, r7.y, r5.w
+mad.f32 r0.y, c2.y, r7.z, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.z, r2.w
nop
-mov.f32f32 r4.w, r0.x
+add.f r0.y, r0.y, c3.y
+mov.f32f32 r7.w, (0.000000)
+add.f r0.x, r0.x, c3.x
+mul.f r6.x, r8.z, c6.z
+mul.f r5.w, r8.y, c6.y
+mul.f r5.z, r8.x, c6.x
+mad.f32 r3.w, c7.x, r7.z, c7.y
+mad.f32 r3.z, c7.x, r7.x, c7.y
+mov.f32f32 r2.w, c13.z
end
nop
-; VERT: outputs: r9.w (0:0) r5.w (5:9) r4.x (5:10) r2.w (5:11) r7.w (5:12) r6.w (5:13) r8.w (5:14) r10.w (5:15)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=3,il=20,b=0)
-; VERT: 160 instructions, 0 half, 12 full
-; pos: r9.w
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
+; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=3,il=20,b=0)
+; VERT: 91 instructions, 0 half, 9 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm
index bb354e6..95d5c75 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-38.asm
@@ -4,214 +4,139 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r4.x) out0
-@out(r4.y) out1
-@out(r4.z) out2
-@out(r4.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xba03126f
+@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3fb8aa65
+@const(c11.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 0, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 1, r1.x
-bary.f r1.z, 4, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 6, r1.x
+bary.f r1.z, 6, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 7, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 2, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c9.w
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 5, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c9.z
+mul.f r0.z, r0.z, c7.x
+sam (f32)(w)r4.x, r1.z, s#2, t#2
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c9.x, r2.z
+add.f r2.z, c10.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.y, (neg)r1.z
-add.f r3.z, c10.y, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r3.w, c9.x, r0.z
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(w)r3.w, r3.x, s#1, t#1
-(sy)add.f r2.y, c9.z, (neg)r4.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c9.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c10.z, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.z, r0.w
-mul.f r1.w, r1.w, c4.z
mul.f r0.y, r0.y, c10.w
-mul.f r0.x, r0.x, c4.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.x
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r4.z, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.y, r3.x
-mul.f r1.w, r2.w, c4.w
-mul.f r0.w, r0.w, c4.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r6.x, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.x, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c9.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r4.x, r3.x
-bary.f r3.x, 2, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.x, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.z, r1.w
-mov.f32f32 r4.w, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r4.y, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r5.w, r1.w
-mov.f32f32 r5.x, r3.x
-mov.f32f32 r6.y, r0.x
-mov.f32f32 r0.x, r2.w
-mov.f32f32 r0.w, r2.y
-sam.s (f32)(x)r1.w, r3.w, s#3, t#3
-(sy)mov.f32f32 r1.w, r1.w
-min.f r0.y, r0.y, c9.z
-sam.s (f32)(x)r2.y, r5.y, s#3, t#3
-(sy)mov.f32f32 r2.y, r2.y
-sam.s (f32)(x)r2.w, r4.z, s#3, t#3
-(sy)mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.z
-add.f r3.y, c12.y, (neg)r0.y
-add.f r3.z, c12.y, (neg)r0.y
-(ss)add.f r3.w, c12.y, (neg)r0.y
-mul.f r4.x, r2.z, r3.x
-mul.f r3.y, r3.y, c6.z
-mul.f r3.z, r3.z, c6.y
-mul.f r3.w, r3.w, c6.x
-mul.f r1.w, r4.x, r1.w
-add.f r1.z, r1.z, c9.z
-mov.f32f32 r6.z, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.z, r0.z, c9.z
-mul.f r0.w, r1.z, r3.x
-mov.f32f32 r2.x, r2.x
-bary.f r3.x, 8, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.w, r0.w, r2.w, r1.w
-sam.s (f32)(x)r1.w, r6.x, s#3, t#3
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c4.z
+add.f r0.x, c10.z, r0.w
+mul.f r4.x, r0.z, c4.z
+add.f r0.z, c10.x, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c4.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c4.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c12.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#3, t#3
+add.f r0.z, c10.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#3, t#3
+mul.f r0.x, r0.x, c9.z
+add.f r0.w, r2.y, c9.z
+mul.f r0.y, r0.y, c7.y
+(ss)nop
+sam.s (f32)(x)r5.x, r5.w, s#3, t#3
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#3, t#3
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c11.y
+bary.f r3.z, 4, r1.x
+mul.f r0.y, r0.y, r5.x
+max.f r0.x, r0.x, c9.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
+min.f r0.x, r0.x, c9.z
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c9.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mul.f r2.z, r2.z, r0.z
-mov.f32f32 r4.w, r0.x
-mov.f32f32 r5.x, r2.x
-mul.f r0.x, r1.z, r0.z
-mad.f32 r0.z, r2.z, r1.w, r0.w
-bary.f r0.w, 7, r1.x
-mov.f32f32 r1.z, r3.x
+mul.f r0.y, c11.x, r0.y
+bary.f r1.z, 8, r1.x
bary.f r1.w, 9, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, r0.x, r2.y, r0.z
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r0.z, r1.w
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r0.x, r0.x
-bary.f r0.w, 12, r1.x
-bary.f r1.z, 11, r1.x
-bary.f (ei)r1.x, 10, r1.x
-mul.f r0.x, c11.x, r0.x
-mov.f32f32 r2.y, r0.z
-sam (f32)(w)r5.x, r5.x, s#2, t#2
-(sy)cmps.f.lt r0.z, r5.w, c11.y
-mov.f32f32 r1.y, r5.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-sam (f32)(xyz)r1.w, r2.x, s#0, t#0
-cov.u32f32 r0.z, r0.z
-(sy)mul.f r0.w, r2.y, r0.w
-mul.f r1.z, r2.x, r1.z
-mul.f r1.x, r1.w, r1.x
-cmps.f.ne r0.z, r0.z, c9.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mul.f r0.w, r0.w, r0.x
-mul.f r1.z, r1.z, r0.x
-mul.f r0.x, r1.x, r0.x
-mov.f32f32 r1.x, c9.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, c5.z, r2.y, r0.w
-mad.f32 r1.z, c5.y, r2.x, r1.z
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.z, r1.x, r0.z, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.z
-mad.f32 r0.x, c5.x, r1.w, r0.x
-nop
-mul.f r0.w, r0.y, r0.w
-mul.f r1.x, r0.y, r1.x
-mov.f32f32 r0.x, r0.x
-nop
-add.f r0.w, r0.w, r3.y
-add.f r1.x, r1.x, r3.z
-mul.f r0.x, r0.y, r0.x
+add.f r0.w, c12.y, (neg)r0.x
+mov.f32f32 r2.x, r0.y
+add.f r2.y, c12.y, (neg)r0.x
+add.f r2.z, c12.y, (neg)r0.x
+(rpt1)nop
+(ss)nop
+sam (f32)(xyz)r3.w, r1.z, s#0, t#0
+(ss)bary.f r1.z, 12, r1.x
+bary.f r1.w, 11, r1.x
+bary.f r2.w, 10, r1.x
+mul.f r0.w, r0.w, c6.z
+(sy)mul.f r1.z, r4.y, r1.z
+mul.f r1.w, r4.x, r1.w
+mul.f r2.w, r3.w, r2.w
+mul.f r2.y, r2.y, c6.y
+mul.f r1.z, r1.z, r2.x
+mul.f r1.w, r1.w, r2.x
+mad.f32 r1.z, c5.z, r4.y, r1.z
+mad.f32 r1.w, c5.y, r4.x, r1.w
+mul.f r0.y, r2.w, r0.y
+mul.f r2.x, r2.z, c6.x
+mul.f r1.z, r0.x, r1.z
+mul.f r1.w, r0.x, r1.w
+mad.f32 r0.y, c5.x, r3.w, r0.y
+mov.f32f32 r2.z, c9.y
+add.f r0.w, r1.z, r0.w
+add.f r1.z, r1.w, r2.y
nop
-mul.f r0.y, r0.w, r0.z
-mul.f r0.w, r1.x, r0.z
-add.f r0.x, r0.x, r3.w
+sel.b32 r0.z, r2.z, r0.z, r4.w
+mul.f r0.x, r0.x, r0.y
+bary.f (ei)r3.w, 5, r1.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, r0.z
+mul.f r2.z, r0.w, r0.z
+mul.f r2.y, r1.z, r0.z
+add.f r0.x, r0.x, r2.x
+(rpt1)nop
+sam (f32)(w)r0.w, r3.z, s#1, t#1
+(sy)add.f r2.w, c9.z, (neg)r1.z
+mul.f r2.x, r0.x, r0.z
+end
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
nop
-mov.f32f32 r4.z, r0.y
-mov.f32f32 r4.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r4.x, r0.x
-end
nop
-; FRAG: outputs: r4.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r0.x (5:10,cm=f,il=12,b=1) r0.w (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1)
-; FRAG: 202 instructions, 0 half, 7 full
-; pos (bary): r1.x
-; color: r4.x
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1)
+; FRAG: 124 instructions, 0 half, 7 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm
index e895f8d..15beb9d 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-40.asm
@@ -4,334 +4,227 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r3.z) out0
-@out(r3.w) out1
-@out(r4.x) out2
-@out(r4.y) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x3f800000, 0x40000000
+@const(c15.x) 0xbf800000, 0xba03126f, 0xbf000000, 0x3f800000
+@const(c16.x) 0x3fb8aa65, 0x3de38866, 0x3cf5c28f, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 12, r1.x
-bary.f r0.y, 0, r1.x
+bary.f r1.z, 0, r1.x
add.f r0.w, r0.w, c14.y
-bary.f r1.z, 1, r1.x
-mov.f32f32 r1.w, r0.x
-add.f r2.x, r0.y, c15.z
-bary.f r2.y, 8, r1.x
+bary.f r1.w, 1, r1.x
+mov.f32f32 r2.x, r0.x
+bary.f r0.y, 13, r1.x
add.f r2.z, r1.z, c15.z
-mov.f32f32 r2.w, r1.w
-bary.f r1.w, 13, r1.x
-floor.f r3.x, r2.x
+add.f r2.w, r1.w, c15.z
+bary.f r3.x, 10, r1.x
+mov.f32f32 r2.y, r0.y
+floor.f r3.z, r2.z
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.y, r2.z
-mov.f32f32 r3.z, r1.w
-add.f r2.x, r2.x, (neg)r3.x
+floor.f r3.w, r2.w
+bary.f r3.y, 11, r1.x
+add.f r2.z, r2.z, (neg)r3.z
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.z, (neg)r3.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r2.x, r2.x
+sam (f32)(xyz)r4.x, r2.x, s#3, t#3
+(sy)(ss)mad.f32 r0.w, c14.w, r4.x, c15.x
+absneg.f r2.x, (neg)c11.x
+mov.f32f32 r2.y, r2.z
+add.f r2.w, r2.w, (neg)r3.w
+mov.f32f32 r3.z, r0.w
+bary.f r3.w, 4, r1.x
+mul.f r4.x, c14.x, r2.y
+mul.f r2.x, r2.x, c11.x
+mov.f32f32 r4.w, r2.w
+mul.f r3.w, r3.w, r3.z
+mad.f32 r4.y, c14.w, r4.y, c15.x
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r2.x, r2.x, r0.z
mov.f32f32 r0.z, r0.z
-absneg.f r2.z, (neg)c11.x
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, c14.x, r2.x
-mov.f32f32 r2.y, r2.y
-sam (f32)(xyz)r3.z, r2.w, s#3, t#3
-(sy)(ss)mad.f32 r2.w, c14.w, r3.z, c15.x
-mul.f r2.z, r2.z, c11.x
-mov.f32f32 r3.x, r3.y
-mul.f r3.y, c14.x, r0.w
-mov.f32f32 r2.w, r2.w
-bary.f r3.z, 4, r1.x
-add.f r0.y, r0.y, (neg)r3.x
-mul.f r2.z, r2.z, r0.z
-mov.f32f32 r3.x, r3.y
-mul.f r3.y, r3.z, r2.w
-mad.f32 r3.z, c14.w, r3.w, c15.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-add.f r1.z, r1.z, (neg)r3.x
-mov.f32f32 r3.x, r3.z
-bary.f r3.z, 14, r1.x
-add.f r3.w, c15.z, r0.y
-mul.f r0.z, r2.z, r0.z
-add.f r0.y, c15.w, r0.y
-mov.f32f32 r2.z, r3.z
-bary.f r3.z, 7, r1.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mul.f r2.z, r2.z, (neg)r3.z
-mul.f r3.w, r3.w, c6.z
+mov.f32f32 r4.x, r4.y
+bary.f r5.x, 14, r1.x
+bary.f r5.y, 7, r1.x
+mov.f32f32 r5.z, r1.z
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, c14.x, r4.w
+mul.f r5.x, r5.x, (neg)r5.y
+add.f r5.z, c15.z, r5.z
mul.f r0.z, r0.z, c16.x
-mul.f r0.y, r0.y, c6.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.z, r0.y
-mad.f32 r2.z, r2.z, r3.x, r3.y
-mov.f32f32 r4.w, r4.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r2.z, r2.z
-mad.f32 r4.x, c14.w, r4.x, c15.x
-add.f r4.y, c15.z, r1.z
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.z, r3.y
-mov.f32f32 r3.y, r4.x
-bary.f r4.x, 23, r1.x
-mov.f32f32 r4.y, r4.y
-add.f r4.z, c17.y, (neg)r0.z
+add.f r1.w, r1.w, (neg)r2.x
+mad.f32 r2.x, r5.x, r4.x, r3.w
+mad.f32 r3.w, c14.w, r4.z, c15.x
+mul.f r5.z, r5.z, c6.z
+mov.f32f32 r4.z, r1.w
add.f r1.z, c15.w, r1.z
-mov.f32f32 r4.x, r4.x
-mul.f r4.y, r4.y, c6.w
-mul.f r4.z, r4.z, c11.y
-mul.f r0.z, r0.z, c14.z
-mad.f32 r2.z, r4.x, r3.y, r2.z
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r1.z, r1.z
-add.f r0.z, r0.z, r4.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r5.x, r4.x
-bary.f r4.x, 2, r1.x
-mov.f32f32 r0.z, r0.z
-mul.f r4.z, r2.z, r2.z
-bary.f r5.y, 5, r1.x
-add.f r4.x, r4.x, c15.y
-mov.f32f32 r0.z, r0.z
-mul.f r1.z, r1.z, c6.w
-mul.f r5.w, r5.y, r2.w
-bary.f r5.y, 15, r1.x
-mov.f32f32 r6.x, r4.x
+mov.f32f32 r5.x, r3.w
+bary.f r5.w, 23, r1.x
+mov.f32f32 r6.y, r5.z
+exp2 r0.z, r0.z
+(ss)mov.f32f32 r6.x, r0.z
+add.f r4.z, c15.z, r4.z
+mad.f32 r2.x, r5.w, r5.x, r2.x
+mul.f r7.x, r1.z, c6.z
+add.f r1.z, c15.w, r1.w
+add.f r1.w, c17.y, (neg)r6.x
+mov.f32f32 r7.w, r2.x
+mul.f r8.y, r4.z, c6.w
+mov.f32f32 r8.x, r7.x
+mul.f r7.y, r1.z, c6.w
+mul.f r1.z, r2.x, r7.w
+bary.f r2.x, 5, r1.x
+mov.f32f32 r6.z, r8.y
+mul.f r1.w, r1.w, c11.y
+(ss)mul.f r0.z, r0.z, c14.z
+mul.f r2.x, r2.x, r3.z
+bary.f r3.z, 15, r1.x
+bary.f r4.z, 2, r1.x
+add.f r0.z, r0.z, r1.w
+mov.f32f32 r5.w, r7.y
+mul.f r1.w, r3.z, (neg)r5.y
+add.f r7.z, r4.z, c15.y
max.f r0.z, r0.z, c14.y
-mov.f32f32 r6.y, r1.z
-mov.f32f32 r6.z, r5.y
-mov.f32f32 r5.y, r6.x
+add.f r2.y, c14.w, (neg)r2.y
+mad.f32 r1.w, r1.w, r4.x, r2.x
+bary.f r2.x, 24, r1.x
+mov.f32f32 r6.w, r7.z
min.f r0.z, r0.z, c14.z
-mov.f32f32 r6.x, r6.y
-mul.f r6.y, r6.z, (neg)r3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-add.f r6.z, c17.y, (neg)r0.z
-mov.f32f32 r6.y, r6.y
-sam.s (f32)(x)r4.w, r4.w, s#5, t#5
-(sy)(ss)mov.f32f32 r4.w, r4.w
-add.f r5.x, c17.y, (neg)r0.z
-mul.f r5.y, r6.z, c10.z
-mad.f32 r5.w, r6.y, r3.x, r5.w
-mov.f32f32 r4.w, r4.w
-add.f r6.y, c14.w, (neg)r2.x
-mul.f r5.x, r5.x, c10.y
-mov.f32f32 r6.z, r5.w
-bary.f r5.w, 24, r1.x
-mov.f32f32 r6.y, r6.y
-add.f r6.w, c14.w, (neg)r0.w
-add.f r7.x, c17.y, (neg)r0.z
-mov.f32f32 r7.y, r5.w
-mov.f32f32 r5.w, r6.x
-mov.f32f32 r6.x, r4.x
-mov.f32f32 r7.z, r0.y
-mad.f32 r0.y, r7.y, r3.y, r6.z
-mov.f32f32 r6.z, r6.w
-mul.f r6.w, r7.x, c10.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r0.y, r0.y
-mul.f r7.x, r6.y, r6.z
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r8.y, r3.w
-mad.f32 r3.w, r0.y, r0.y, r4.z
-mul.f r4.z, r7.x, r4.w
-mov.f32f32 r7.w, r4.y
-mov.f32f32 r4.y, r4.x
-mov.f32f32 r3.w, r3.w
-bary.f r4.w, 6, r1.x
-sam.s (f32)(x)r5.z, r5.z, s#5, t#5
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r8.x, r4.y
-(sy)mov.f32f32 r4.y, r5.z
-mul.f r2.w, r4.w, r2.w
-bary.f r4.w, 16, r1.x
-mov.f32f32 r8.z, r1.z
-mov.f32f32 r1.z, r4.x
-(ss)mov.f32f32 r5.z, r2.y
-mov.f32f32 r2.y, r4.w
-sam.s (f32)(x)r4.x, r7.z, s#5, t#5
-(sy)mov.f32f32 r4.x, r4.x
-add.f r2.x, r2.x, c14.z
-mov.f32f32 r8.w, r1.z
-mul.f r1.z, r2.y, (neg)r3.z
-bary.f r2.y, 9, r1.x
-mul.f r3.z, r2.x, r6.z
-add.f r0.w, r0.w, c14.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-bary.f r4.w, 10, r1.x
-mad.f32 r3.z, r3.z, r4.x, r4.z
-mad.f32 r1.z, r1.z, r3.x, r2.w
-sam.s (f32)(x)r2.w, r8.y, s#5, t#5
-mov.f32f32 r5.w, r2.y
-mul.f r2.y, r6.y, r0.w
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r1.z, r1.z
-bary.f r3.z, 25, r1.x
-(sy)mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.x, r4.w
-sam (f32)(w)r5.z, r5.z, s#1, t#1
-(sy)add.f r4.z, c14.z, (neg)r6.y
-mov.f32f32 r3.z, r3.z
-mad.f32 r2.y, r2.y, r2.w, r3.x
-mov.f32f32 r2.w, r4.x
-mul.f r0.w, r2.x, r0.w
-mad.f32 r1.z, r3.z, r3.y, r1.z
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r4.z
-bary.f r3.x, 11, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, r0.w, r4.y, r2.x
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r3.x
-mad.f32 r3.x, r1.z, r1.z, r3.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r8.z, r7.z
+mad.f32 r1.w, r2.x, r5.x, r1.w
+mov.f32f32 r6.x, r7.z
+add.f r2.x, c17.y, (neg)r0.z
+add.f r3.z, c17.y, (neg)r0.z
+mov.f32f32 r4.x, r1.w
+sam.s (f32)(x)r8.w, r6.y, s#5, t#5
+add.f r4.z, c17.y, (neg)r0.z
+mov.f32f32 r5.x, r2.y
+add.f r4.w, c14.w, (neg)r4.w
+mad.f32 r1.z, r1.w, r4.x, r1.z
+bary.f r1.w, 6, r1.x
+mul.f r2.x, r2.x, c10.z
+mul.f r3.z, r3.z, c10.y
+mul.f r4.z, r4.z, c10.x
+mul.f r0.w, r1.w, r0.w
+bary.f r1.w, 16, r1.x
+(ss)mov.f32f32 r6.y, r4.w
+sam.s (f32)(x)r9.x, r8.x, s#5, t#5
+sam.s (f32)(x)r9.y, r5.z, s#5, t#5
+sam.s (f32)(x)r6.z, r7.x, s#5, t#5
+add.f r2.z, r2.z, c14.z
+add.f r2.w, r2.w, c14.z
+mul.f r1.w, r1.w, (neg)r5.y
+mul.f r5.x, r5.x, r6.y
+mul.f r4.w, r2.z, r4.w
+mul.f r2.y, r2.y, r2.w
+mad.f32 r0.w, r1.w, r4.y, r0.w
+bary.f r1.w, 25, r1.x
+(sy)mul.f r4.y, r5.x, r8.w
+mul.f r2.z, r2.z, r2.w
+mad.f32 r2.w, r4.w, r9.x, r4.y
+mad.f32 r0.w, r1.w, r3.w, r0.w
+mad.f32 r1.w, r2.y, r9.y, r2.w
+(ss)nop
+sam (f32)(w)r4.w, r3.x, s#2, t#2
+(sy)cmps.f.lt r2.y, r5.z, c16.z
+mov.f32f32 r2.w, r0.x
+(ss)mov.f32f32 r3.y, r0.w
+mad.f32 r1.w, r2.z, r6.z, r1.w
+cov.u32f32 r2.y, r2.y
+mov.f32f32 r3.x, r0.y
+mad.f32 r1.z, r3.y, r3.y, r1.z
+mul.f r1.w, c16.y, r1.w
+cmps.f.ne r2.y, r2.y, c14.y
(rpt3)nop
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.y, r3.x
-mul.f r0.w, c16.y, r0.w
-mov.f32f32 r4.y, r2.x
-(ss)mov.f32f32 r3.x, r2.y
-mul.f r2.x, r2.z, r3.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.y, r0.y, r3.y
-mul.f r1.z, r1.z, r3.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r0.x
-sam (f32)(w)r5.z, r2.w, s#2, t#2
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, r2.x, r2.x
-(ss)mul.f r2.w, (neg)c9.x, r2.x
-mad.f32 r2.z, r0.y, r0.y, r2.z
-mad.f32 r2.w, (neg)c9.y, r0.y, r2.w
-(sy)cmps.f.lt r3.x, r6.y, c16.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.z, r1.z, r1.z, r2.z
-mad.f32 r2.w, (neg)c9.z, r1.z, r2.w
-cov.u32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.z, r2.y
-mov.f32f32 r2.y, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.z, r1.z
+(ss)mov.f32f32 r2.z, r1.z
+mov.f32f32 r3.y, r1.w
+mul.f r0.w, r0.w, r1.z
+(ss)mov.f32f32 r1.z, c14.y
+mul.f r3.w, r7.w, r2.z
+mul.f r2.z, r4.x, r2.z
+mov.f32f32 r4.x, r0.w
+sel.b32 r1.z, r1.z, r2.y, r5.z
+mov.f32f32 r2.y, r3.w
+mul.f r3.w, (neg)c9.x, r3.w
+mov.f32f32 r4.y, r2.z
+mad.f32 r2.z, (neg)c9.y, r2.z, r3.w
+mul.f r3.w, r2.y, r2.y
+mad.f32 r0.w, (neg)c9.z, r0.w, r2.z
+mad.f32 r2.z, r4.y, r4.y, r3.w
+sam (f32)(xyz)r4.w, r2.w, s#4, t#4
+(sy)(ss)mul.f r3.x, c8.z, r5.y
+mad.f32 r2.z, r4.x, r4.x, r2.z
+max.f r0.w, r0.w, c14.y
+bary.f r2.w, 20, r1.x
+(rpt1)nop
+mov.f32f32 r3.w, r0.w
+bary.f r5.y, 21, r1.x
rsq r2.z, r2.z
-(ss)mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.w, r2.w
-cmps.f.ne r3.x, r3.x, c14.y
-nop
-mul.f r2.x, r2.x, r2.z
-max.f r2.w, r2.w, c14.y
-mul.f r0.y, r0.y, r2.z
-mul.f r1.z, r1.z, r2.z
-mov.f32f32 r2.x, r2.x
-bary.f r2.z, 17, r1.x
-mov.f32f32 r2.w, r2.w
-bary.f r4.w, 21, r1.x
-bary.f r5.z, 20, r1.x
-mul.f r2.x, r2.x, r2.z
-mov.f32f32 r0.y, r0.y
-bary.f r2.z, 18, r1.x
+(ss)mov.f32f32 r5.z, r2.z
bary.f r5.w, 22, r1.x
-mad.f32 r6.x, c8.y, r2.w, (neg)r4.w
-mad.f32 r6.y, c8.x, r2.w, (neg)r5.z
-mad.f32 r0.y, r0.y, r2.z, r2.x
-mad.f32 r2.x, c8.z, r2.w, (neg)r5.w
-mov.f32f32 r2.z, r6.x
-mov.f32f32 r2.w, r6.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-bary.f (ei)r1.x, 19, r1.x
-mov.f32f32 r1.y, r2.x
-mad.f32 r2.x, c12.x, r2.z, r4.w
-mad.f32 r2.z, c12.x, r2.w, r5.z
-mad.f32 r0.y, r1.z, r1.x, r0.y
-mad.f32 r1.x, c12.x, r1.y, r5.w
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r1.z, r2.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.x, c14.y
-mov.f32f32 r4.w, r2.y
-max.f r0.y, c14.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r0.x, r1.w
-sel.b32 r1.w, r2.x, r3.x, r3.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r6.x, c8.y, r3.w, (neg)r5.y
+mad.f32 r3.w, c8.z, r3.w, (neg)r5.w
+mul.f r2.y, r2.y, r5.z
+bary.f r6.y, 17, r1.x
+mad.f32 r5.y, c12.x, r6.x, r5.y
+mul.f r4.y, r4.y, r5.z
+(ss)mul.f r2.z, r4.x, r2.z
+mul.f r2.y, r2.y, r6.y
+bary.f r4.x, 18, r1.x
+mad.f32 r3.w, c12.x, r3.w, r5.w
+mad.f32 r0.w, c8.x, r0.w, (neg)r2.w
+nop
+mad.f32 r2.y, r4.y, r4.x, r2.y
+bary.f r4.x, 19, r1.x
+mad.f32 r0.w, c12.x, r0.w, r2.w
+mul.f r4.y, c8.y, r5.x
+mul.f r4.w, c8.x, r4.w
+mad.f32 r2.y, r2.z, r4.x, r2.y
+bary.f r2.z, 8, r1.x
+bary.f (ei)r2.w, 9, r1.x
+sam (f32)(xyz)r5.z, r0.x, s#0, t#0
+nop
+(ss)max.f r0.x, c14.y, r2.y
(rpt5)nop
-log2 r0.y, r0.y
-(ss)mul.f r0.y, c12.y, r0.y
-sam (f32)(xyz)r2.z, r4.z, s#4, t#4
-(sy)mul.f r2.x, c8.y, r2.w
-mul.f r2.w, c8.x, r2.z
-mul.f r3.x, c8.z, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r0.x
+log2 r0.x, r0.x
+(ss)mul.f r0.x, c12.y, r0.x
+sam (f32)(w)r6.y, r2.z, s#1, t#1
(rpt4)nop
-exp2 r0.x, r0.y
+(sy)(ss)add.f r2.w, c14.z, (neg)r7.x
+exp2 r0.x, r0.x
(ss)mul.f r0.y, r3.x, r0.x
-sam (f32)(xyz)r5.z, r2.y, s#0, t#0
-mul.f r2.x, r2.x, r0.x
-(sy)mad.f32 r0.y, r6.x, r1.x, r0.y
-mul.f r0.x, r2.w, r0.x
-mad.f32 r1.x, r5.w, r1.y, r2.x
-mad.f32 r0.x, r5.z, r1.z, r0.x
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mul.f r0.y, r0.y, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.x, r0.x
+mul.f r1.x, r4.y, r0.x
+mad.f32 r0.y, r6.x, r3.w, r0.y
+mad.f32 r1.x, r5.w, r5.y, r1.x
+(ss)mul.f r0.x, r4.w, r0.x
nop
-mov.f32f32 r0.y, r0.y
-mul.f r1.x, r1.x, r0.w
+mul.f r0.y, r0.y, r3.y
+mul.f r1.x, r1.x, r3.y
mad.f32 r0.y, c7.z, r6.x, r0.y
-mul.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.w, c7.y, r5.w, r0.w
+mad.f32 r1.x, c7.y, r5.w, r1.x
+mad.f32 r0.x, r5.z, r0.w, r0.x
+nop
mul.f r0.y, r0.z, r0.y
+mul.f r0.w, r0.z, r1.x
+mul.f r0.x, r0.x, r1.w
+nop
+add.f r0.y, r0.y, r2.x
+add.f r0.w, r0.w, r3.z
mad.f32 r0.x, c7.x, r5.z, r0.x
-(rpt1)nop
-add.f r0.y, r0.y, r5.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
nop
-mul.f r0.y, r0.y, r1.w
-mul.f r0.w, r0.z, r0.w
+mul.f r2.z, r0.y, r1.z
+mul.f r2.y, r0.w, r1.z
mul.f r0.x, r0.z, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.w, r5.x
-add.f r0.x, r0.x, r6.w
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r0.z, r1.w
-mul.f r0.x, r0.x, r1.w
-nop
-mov.f32f32 r4.x, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.x
+(rpt2)nop
+add.f r0.x, r0.x, r4.z
+(rpt2)nop
+mul.f r2.x, r0.x, r1.z
end
nop
nop
-; FRAG: outputs: r3.z (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.w (5:10,cm=f,il=12,b=1) r2.y (5:11,cm=f,il=16,b=1) r4.z (5:12,cm=f,il=20,b=1) r0.y (5:13,cm=f,il=24,b=1) r3.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 336 instructions, 0 half, 9 full
-; pos (bary): r1.x
-; color: r3.z
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r2.z (5:11,cm=f,il=16,b=1) r2.w (5:12,cm=f,il=20,b=1) r3.y (5:13,cm=f,il=24,b=1) r5.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 227 instructions, 0 half, 10 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm
index d73d764..5a712c6 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-41.asm
@@ -1,218 +1,151 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in12
-@in(r2.z) in13
-@out(r4.w) out0
-@out(r5.x) out1
-@out(r5.y) out2
-@out(r5.z) out3
-@out(r3.y) out4
-@out(r3.z) out5
-@out(r3.w) out6
-@out(r4.x) out7
-@out(r8.x) out8
-@out(r8.y) out9
-@out(r8.z) out10
-@out(r8.w) out11
-@out(r9.x) out12
-@out(r9.y) out13
-@out(r9.z) out14
-@out(r9.w) out15
-@out(r6.x) out16
-@out(r6.y) out17
-@out(r6.z) out18
-@out(r6.w) out19
-@out(r7.x) out20
-@out(r7.y) out21
-@out(r7.z) out22
-@out(r7.w) out23
-@out(r11.x) out24
-@out(r11.y) out25
-@out(r11.z) out26
-@out(r11.w) out27
-@out(r10.x) out28
-@out(r10.y) out29
-@out(r10.z) out30
-@out(r10.w) out31
-(sy)(ss)add.f r2.w, c4.x, (neg)r0.x
-mul.f r3.x, r0.w, r0.w
-absneg.f r3.y, (neg)c7.y
-mul.f r3.z, r0.x, c7.x
-mul.f r3.w, r2.w, r2.w
-add.f r4.x, c4.y, (neg)r0.y
-add.f r3.x, c14.x, (neg)r3.x
-mul.f r4.y, r0.x, (neg)r3.y
-mad.f32 r3.y, r0.z, (neg)r3.y, r3.z
-mad.f32 r3.z, r4.x, r4.x, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r4.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-add.f r4.y, c4.z, (neg)r0.z
-mul.f r4.z, r3.x, r3.x
-mul.f r4.w, r1.x, r0.w
-mad.f32 r3.w, (neg)c7.x, r0.z, r3.w
-mad.f32 r3.z, r4.y, r4.y, r3.z
-mov.f32f32 r3.y, r3.y
-mul.f r5.x, c9.w, r0.x
-mul.f r5.y, c9.z, r0.x
-mul.f r5.z, c9.y, r0.x
-mul.f r5.w, c9.x, r0.x
-mov.f32f32 r3.w, r3.w
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-add.f r4.w, c14.y, (neg)r4.w
-mov.f32f32 r6.x, r3.y
-mad.f32 r3.y, c10.w, r0.y, r5.x
-mad.f32 r2.w, r2.w, r3.z, (neg)c5.x
-mov.f32f32 r4.w, r4.w
-mad.f32 r4.x, r4.x, r3.z, (neg)c5.y
-mad.f32 r3.z, r4.y, r3.z, (neg)c5.z
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.y, r4.w, r4.w, r4.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r3.z, r3.z
-mul.f r4.z, r2.w, r2.w
-mov.f32f32 r4.y, r4.y
-mad.f32 r4.z, r4.x, r4.x, r4.z
-mul.f r5.x, r1.y, r0.w
-mov.f32f32 r3.w, r3.w
-mad.f32 r3.y, c11.w, r0.z, r3.y
-mov.f32f32 r4.z, r4.z
-add.f r5.x, c14.y, (neg)r5.x
-mad.f32 r4.z, r3.z, r3.z, r4.z
-mov.f32f32 r3.w, r3.w
-(rpt4)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r6.y, r3.w
-add.f r3.y, r3.y, c12.w
-mul.f r3.z, r3.z, r4.z
-mul.f r3.w, r4.x, r4.z
-mul.f r2.w, r2.w, r4.z
-mad.f32 r4.x, r5.x, r5.x, r4.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r2.w, r2.w
-nop
-mov.f32f32 r7.w, r3.z
-mov.f32f32 r7.z, r3.w
-mov.f32f32 r7.y, r2.w
-rsq r2.w, r4.x
-(ss)mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r4.x, r3.y
-mad.f32 r4.y, c10.z, r0.y, r5.y
-mad.f32 r4.z, c10.y, r0.y, r5.z
-mul.f r3.x, r3.x, r2.w
-mul.f r5.x, r5.x, r2.w
-mul.f r2.w, r4.w, r2.w
-mad.f32 r4.y, c11.z, r0.z, r4.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.w, r5.x
-mov.f32f32 r2.w, r2.w
-add.f r4.y, r4.y, c12.z
-mul.f r5.x, r1.x, r3.x
-mul.f r5.y, r0.w, r4.w
-mad.f32 r5.x, r0.w, r2.w, (neg)r5.x
-mad.f32 r5.y, r1.y, r3.x, (neg)r5.y
-mul.f r5.z, r1.y, r2.w
-mov.f32f32 r8.x, r4.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.y, r5.y
-mad.f32 r4.w, r1.x, r4.w, (neg)r5.z
-mov.f32f32 r8.z, r8.x
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r6.w, r5.y
-mov.f32f32 r4.w, r4.w
-nop
-mov.f32f32 r7.x, r5.x
-mov.f32f32 r8.y, r2.w
-mov.f32f32 r6.z, r4.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r3.x, r4.y
-mad.f32 r4.y, c11.y, r0.z, r4.z
-mad.f32 r4.z, c10.x, r0.y, r5.w
-mov.f32f32 r8.x, r2.w
-mov.f32f32 r3.w, r3.x
-add.f r2.w, r4.y, c12.y
-mad.f32 r3.x, c11.x, r0.z, r4.z
-mul.f r4.y, c0.w, r0.x
-mul.f r4.z, c0.z, r0.x
-mul.f r2.w, r2.w, c13.y
-add.f r3.x, r3.x, c12.x
-mad.f32 r4.y, c1.w, r0.y, r4.y
-mad.f32 r4.z, c1.z, r0.y, r4.z
-mov.f32f32 r3.z, r2.w
-mul.f r2.w, r3.x, c13.x
-mad.f32 r3.x, c2.w, r0.z, r4.y
-mad.f32 r4.y, c2.z, r0.z, r4.z
-mul.f r4.z, c0.y, r0.x
-mov.f32f32 r3.y, r2.w
-add.f r2.w, r3.x, c3.w
-add.f r3.x, r4.y, c3.z
-mad.f32 r4.y, c1.y, r0.y, r4.z
-mul.f r4.z, c0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r5.y, r3.x
-mad.f32 r2.w, c2.y, r0.z, r4.y
-mad.f32 r0.y, c1.x, r0.y, r4.z
-mad.f32 r3.x, c8.x, r0.z, c8.y
-mad.f32 r0.x, c8.x, r0.x, c8.y
-add.f r2.w, r2.w, c3.y
-mad.f32 r0.y, c2.x, r0.z, r0.y
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r2.w
-add.f r0.y, r0.y, c3.x
-mov.f32f32 r9.w, r0.z
-mov.f32f32 r9.z, r0.x
-mov.f32f32 r0.x, (0.000000)
-mov.f32f32 r4.w, r0.y
-mov.f32f32 r0.y, (0.000000)
-mov.f32f32 r0.z, r1.y
-mov.f32f32 r10.w, r0.x
-mov.f32f32 r0.x, r1.x
-mov.f32f32 r10.z, r0.y
-mov.f32f32 r10.y, r0.z
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r10.x, r0.x
-mul.f r0.x, r2.x, c6.z
-mul.f r0.z, r1.w, c6.y
-mov.f32f32 r11.w, r0.y
-mul.f r0.y, r1.z, c6.x
-mov.f32f32 r11.z, r0.x
-mov.f32f32 r11.y, r0.z
+@in(r8.x) in0
+@in(r8.y) in1
+@in(r8.z) in2
+@in(r6.w) in4
+@in(r7.x) in5
+@in(r7.y) in6
+@in(r8.w) in8
+@in(r9.x) in9
+@in(r9.y) in10
+@in(r3.x) in12
+@in(r3.y) in13
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@out(r7.x) out28
+@out(r7.y) out29
+@out(r7.z) out30
+@out(r7.w) out31
+@const(c14.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r8.x
+mul.f r0.y, r6.w, r6.w
+mul.f r0.z, c9.y, r8.x
+mul.f r0.w, c9.x, r8.x
+mul.f r1.x, r0.x, r0.x
+add.f r1.z, c4.y, (neg)r8.y
+add.f r0.y, c14.x, (neg)r0.y
+mad.f32 r0.z, c10.y, r8.y, r0.z
+mad.f32 r0.w, c10.x, r8.y, r0.w
+mad.f32 r1.x, r1.z, r1.z, r1.x
+add.f r1.w, c4.z, (neg)r8.z
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c11.y, r8.z, r0.z
+mad.f32 r0.w, c11.x, r8.z, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r7.x, r6.w
+add.f r0.z, r0.z, c12.y
+add.f r0.w, r0.w, c12.x
+mul.f r2.y, r7.y, r6.w
+mul.f r2.z, c9.w, r8.x
+rsq r1.x, r1.x
+(ss)mov.f32f32 r2.w, r1.x
+add.f r3.z, c14.y, (neg)r1.y
+mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
+mul.f r1.y, r0.z, c13.y
+mad.f32 r0.z, r1.z, r2.w, (neg)c5.y
+mov.f32f32 r1.z, r3.z
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r2.w, (neg)c5.z
+mov.f32f32 r2.w, r0.z
+mad.f32 r2.x, r3.z, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c14.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r2.w, r1.x
+mov.f32f32 r3.z, r1.w
+mul.f r1.x, r0.w, c13.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c10.w, r8.y, r2.z
+mad.f32 r0.z, r1.w, r3.z, r0.z
+mad.f32 r1.w, c11.w, r8.z, r2.z
+mul.f r2.z, c9.z, r8.x
+mul.f r3.w, c0.w, r8.x
+mul.f r4.x, c0.z, r8.x
+mul.f r4.y, c0.y, r8.x
+mul.f r6.x, c0.x, r8.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r4.z, r0.z
+mul.f r5.y, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c12.w
+mul.f r5.w, r3.z, r4.z
+mul.f r5.z, r2.w, r4.z
+(ss)mad.f32 r0.z, c10.z, r8.y, r2.z
+mad.f32 r2.y, c1.w, r8.y, r3.w
+mad.f32 r2.w, c1.z, r8.y, r4.x
+rsq r0.x, r0.x
+(ss)mov.f32f32 r3.z, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c11.z, r8.z, r0.z
+mad.f32 r0.y, c2.w, r8.z, r2.y
+mul.f r2.z, r0.w, r3.z
+mul.f r2.y, r1.z, r3.z
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c12.z
mov.f32f32 r0.x, r2.z
-mov.f32f32 r11.x, r0.y
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r0.z, c14.z
-mov.f32f32 r9.y, r0.x
+mov.f32f32 r3.z, r2.y
+mul.f r3.w, r7.x, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r6.w, r0.x
+mul.f r4.x, r7.y, r3.z
+mad.f32 r4.w, r7.y, r0.z, (neg)r0.y
+mad.f32 r4.z, r7.x, r0.x, (neg)r4.x
+mad.f32 r5.x, r6.w, r3.z, (neg)r3.w
+mad.f32 r0.x, c2.z, r8.z, r2.w
+mad.f32 r0.y, c1.y, r8.y, r4.y
+mad.f32 r2.w, c1.x, r8.y, r6.x
+absneg.f r3.z, (neg)c7.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.y, r8.z, r0.y
+mad.f32 r2.w, c2.x, r8.z, r2.w
+mul.f r3.w, r8.x, (neg)r3.z
nop
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r8.w, r0.z
+add.f r0.y, r0.x, c3.y
+add.f r0.x, r2.w, c3.x
+mad.f32 r4.y, (neg)c7.x, r8.z, r3.w
+mul.f r2.w, r8.x, c7.x
+mov.f32f32 r7.w, (0.000000)
+mad.f32 r4.x, r8.z, (neg)r3.z, r2.w
+mov.f32f32 r7.z, (0.000000)
+mul.f r6.z, r9.y, c6.z
+mul.f r6.y, r9.x, c6.y
+mul.f r6.x, r8.w, c6.x
+mad.f32 r3.w, c8.x, r8.z, c8.y
+mad.f32 r3.z, c8.x, r8.x, c8.y
+mov.f32f32 r2.w, c14.z
end
nop
nop
-; VERT: outputs: r4.w (0:0) r3.y (5:9) r8.x (5:10) r9.x (5:11) r6.x (5:12) r7.x (5:13) r11.x (5:14) r10.x (5:15)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=3,il=20,b=0)
-; VERT: 170 instructions, 0 half, 12 full
-; pos: r4.w
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
+; VERT: inputs: r8.x (0:0,cm=7,il=8,b=0) r6.w (0:0,cm=7,il=12,b=0) r8.w (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=3,il=20,b=0)
+; VERT: 97 instructions, 0 half, 10 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm
index 2f17090..78c7452 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-42.asm
@@ -4,334 +4,227 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r3.w) out0
-@out(r4.x) out1
-@out(r4.y) out2
-@out(r4.z) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x40000000, 0xbf800000
+@const(c15.x) 0xba03126f, 0xbf000000, 0x3f800000, 0x3fb8aa65
+@const(c16.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 10, r1.x
-bary.f r0.y, 0, r1.x
+bary.f r1.z, 0, r1.x
add.f r0.w, r0.w, c14.y
-bary.f r1.z, 1, r1.x
-mov.f32f32 r1.w, r0.x
-add.f r2.x, r0.y, c15.y
-bary.f r2.y, 8, r1.x
+bary.f r1.w, 1, r1.x
+mov.f32f32 r2.x, r0.x
+bary.f r0.y, 11, r1.x
add.f r2.z, r1.z, c15.y
-mov.f32f32 r2.w, r1.w
-bary.f r1.w, 11, r1.x
-floor.f r3.x, r2.x
+add.f r2.w, r1.w, c15.y
+bary.f r3.x, 8, r1.x
+mov.f32f32 r2.y, r0.y
+floor.f r3.z, r2.z
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.y, r2.z
-mov.f32f32 r3.z, r1.w
-add.f r2.x, r2.x, (neg)r3.x
+floor.f r3.w, r2.w
+bary.f r3.y, 9, r1.x
+add.f r2.z, r2.z, (neg)r3.z
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.z, (neg)r3.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-absneg.f r2.z, (neg)c11.x
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, c14.x, r2.x
-add.f r3.z, c14.z, (neg)r2.x
-sam (f32)(xyz)r3.w, r2.w, s#2, t#2
-(sy)(ss)mad.f32 r2.w, c14.z, r3.w, c14.w
-mul.f r2.z, r2.z, c11.x
-mov.f32f32 r3.x, r3.y
-mul.f r3.y, c14.x, r0.w
-mov.f32f32 r2.w, r2.w
+sam (f32)(xyz)r4.x, r2.x, s#2, t#2
+(sy)(ss)mad.f32 r0.w, c14.z, r4.x, c14.w
+absneg.f r2.x, (neg)c11.x
+mov.f32f32 r2.y, r2.z
+add.f r2.w, r2.w, (neg)r3.w
+mov.f32f32 r3.z, r0.w
bary.f r3.w, 4, r1.x
-add.f r0.y, r0.y, (neg)r3.x
-mul.f r2.z, r2.z, r0.z
-mov.f32f32 r3.x, r3.y
-mul.f r3.y, r3.w, r2.w
-mad.f32 r3.w, c14.z, r4.x, c14.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-add.f r1.z, r1.z, (neg)r3.x
-mov.f32f32 r3.x, r3.w
-bary.f r3.w, 12, r1.x
-bary.f r4.x, 7, r1.x
-add.f r4.z, c15.y, r0.y
-mul.f r0.z, r2.z, r0.z
-add.f r0.y, c15.z, r0.y
-mul.f r2.z, r3.w, (neg)r4.x
-mov.f32f32 r3.w, r4.z
+mul.f r4.x, c14.x, r2.y
+mul.f r2.x, r2.x, c11.x
+mov.f32f32 r4.w, r2.w
+mul.f r3.w, r3.w, r3.z
+mad.f32 r4.y, c14.z, r4.y, c14.w
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r2.x, r2.x, r0.z
mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.z, r2.z, r3.x, r3.y
-mul.f r3.y, r3.w, c5.z
+mov.f32f32 r4.x, r4.y
+bary.f r5.x, 12, r1.x
+bary.f r5.y, 7, r1.x
+mov.f32f32 r5.z, r1.z
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, c14.x, r4.w
+mul.f r5.x, r5.x, (neg)r5.y
+add.f r5.z, c15.y, r5.z
mul.f r0.z, r0.z, c15.w
-mul.f r0.y, r0.y, c5.z
-mov.f32f32 r2.z, r2.z
-mad.f32 r3.w, c14.z, r4.y, c14.w
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.z, r0.y
-mov.f32f32 r3.w, r3.w
-bary.f r4.w, 21, r1.x
-mov.f32f32 r5.x, r4.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.y, r4.z
-mad.f32 r2.z, r4.w, r3.w, r2.z
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-add.f r4.z, c15.y, r1.z
-mov.f32f32 r5.w, r4.y
-mov.f32f32 r2.z, r2.z
-add.f r4.y, c17.y, (neg)r0.z
-mov.f32f32 r4.z, r4.z
+add.f r1.w, r1.w, (neg)r2.x
+mad.f32 r2.x, r5.x, r4.x, r3.w
+mad.f32 r3.w, c14.z, r4.z, c14.w
+mul.f r5.z, r5.z, c5.z
+mov.f32f32 r4.z, r1.w
add.f r1.z, c15.z, r1.z
-mul.f r4.w, r2.z, r2.z
-bary.f r5.y, 5, r1.x
-mul.f r4.z, r4.z, c5.w
-mul.f r4.y, r4.y, c11.y
-mul.f r0.z, r0.z, c15.z
-mul.f r5.z, r5.y, r2.w
-bary.f r5.y, 13, r1.x
-mov.f32f32 r6.x, r4.z
-add.f r0.z, r0.z, r4.y
-mov.f32f32 r1.z, r1.z
-mul.f r4.y, r5.y, (neg)r4.x
-mov.f32f32 r5.y, r6.x
-bary.f r6.x, 2, r1.x
-mov.f32f32 r0.z, r0.z
-mad.f32 r4.y, r4.y, r3.x, r5.z
-mul.f r1.z, r1.z, c5.w
-add.f r6.z, r6.x, c15.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.y, r4.y
-bary.f r5.z, 22, r1.x
-mov.f32f32 r6.x, r6.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r6.y, r1.z
-mad.f32 r4.y, r5.z, r3.w, r4.y
-mov.f32f32 r5.z, r6.x
+mov.f32f32 r5.x, r3.w
+bary.f r5.w, 21, r1.x
+mov.f32f32 r6.y, r5.z
+exp2 r0.z, r0.z
+(ss)mov.f32f32 r6.x, r0.z
+add.f r4.z, c15.y, r4.z
+mad.f32 r2.x, r5.w, r5.x, r2.x
+mul.f r7.x, r1.z, c5.z
+add.f r1.z, c15.z, r1.w
+add.f r1.w, c17.y, (neg)r6.x
+mov.f32f32 r7.w, r2.x
+mul.f r8.y, r4.z, c5.w
+mov.f32f32 r8.x, r7.x
+mul.f r7.y, r1.z, c5.w
+mul.f r1.z, r2.x, r7.w
+bary.f r2.x, 5, r1.x
+mov.f32f32 r6.z, r8.y
+mul.f r1.w, r1.w, c11.y
+(ss)mul.f r0.z, r0.z, c15.z
+mul.f r2.x, r2.x, r3.z
+bary.f r3.z, 13, r1.x
+bary.f r4.z, 2, r1.x
+add.f r0.z, r0.z, r1.w
+mov.f32f32 r5.w, r7.y
+mul.f r1.w, r3.z, (neg)r5.y
+add.f r7.z, r4.z, c15.x
max.f r0.z, r0.z, c14.y
-mov.f32f32 r6.x, r6.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r6.w, r0.y
-mov.f32f32 r0.y, r3.y
+add.f r2.y, c14.z, (neg)r2.y
+mad.f32 r1.w, r1.w, r4.x, r2.x
+bary.f r2.x, 22, r1.x
+mov.f32f32 r6.w, r7.z
min.f r0.z, r0.z, c15.z
-mad.f32 r3.y, r4.y, r4.y, r4.w
-sam.s (f32)(x)r4.w, r5.x, s#4, t#4
-(sy)mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.x, r6.x
-(ss)add.f r5.x, c17.y, (neg)r0.z
-mov.f32f32 r3.y, r3.y
-bary.f r5.y, 6, r1.x
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r3.z, r3.z
-add.f r5.z, c14.z, (neg)r0.w
-mul.f r2.w, r5.y, r2.w
-bary.f r5.y, 14, r1.x
-mul.f r5.x, r5.x, c10.z
-add.f r6.y, c17.y, (neg)r0.z
-add.f r7.x, c17.y, (neg)r0.z
-mul.f r4.x, r5.y, (neg)r4.x
-mov.f32f32 r5.y, r5.z
-mul.f r5.z, r6.y, c10.y
-mul.f r7.z, r7.x, c10.x
-mad.f32 r2.w, r4.x, r3.x, r2.w
-mul.f r3.x, r3.z, r5.y
-mov.f32f32 r4.x, r6.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.w, r2.w
-bary.f r6.y, 23, r1.x
-mul.f r3.x, r3.x, r4.w
-mov.f32f32 r7.x, r4.z
-mov.f32f32 r4.z, r6.z
-mad.f32 r2.w, r6.y, r3.w, r2.w
-mov.f32f32 r6.y, r4.x
-mov.f32f32 r7.w, r0.y
-mov.f32f32 r7.y, r4.z
-mov.f32f32 r0.y, r2.w
-mov.f32f32 r1.z, r1.z
-add.f r2.x, r2.x, c15.z
-add.f r0.w, r0.w, c15.z
-mad.f32 r2.w, r0.y, r0.y, r3.y
-sam.s (f32)(x)r3.y, r5.w, s#4, t#4
-mov.f32f32 r8.x, r1.z
-sam.s (f32)(x)r1.z, r6.w, s#4, t#4
-mov.f32f32 r3.w, r6.z
-mul.f r4.x, r2.x, r5.y
-mul.f r3.z, r3.z, r0.w
-(sy)mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
-rsq r2.w, r2.w
-(ss)mov.f32f32 r2.w, r2.w
-mov.f32f32 r8.y, r3.w
-mov.f32f32 r2.y, r2.y
-mul.f r0.w, r2.x, r0.w
-mul.f r2.x, r2.z, r2.w
-mad.f32 r1.z, r4.x, r1.z, r3.x
-mul.f r2.z, r4.y, r2.w
-mul.f r0.y, r0.y, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-sam.s (f32)(x)r2.w, r7.w, s#4, t#4
-(sy)mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.z, r2.z
-mul.f r3.x, r2.x, r2.x
-mul.f r3.w, (neg)c9.x, r2.x
-mad.f32 r1.z, r3.z, r2.w, r1.z
-mad.f32 r2.w, r2.z, r2.z, r3.x
-mad.f32 r3.x, (neg)c9.y, r2.z, r3.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r2.y
-mov.f32f32 r2.y, r2.w
-mov.f32f32 r2.w, r3.x
-mad.f32 r2.y, r0.y, r0.y, r2.y
-mad.f32 r2.w, (neg)c9.z, r0.y, r2.w
-mov.f32f32 r1.z, r1.z
-bary.f r3.x, 9, r1.x
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r1.w
+mov.f32f32 r8.z, r7.z
+mad.f32 r1.w, r2.x, r5.x, r1.w
+mov.f32f32 r6.x, r7.z
+add.f r2.x, c17.y, (neg)r0.z
+add.f r3.z, c17.y, (neg)r0.z
+mov.f32f32 r4.x, r1.w
+sam.s (f32)(x)r8.w, r6.y, s#4, t#4
+add.f r4.z, c17.y, (neg)r0.z
+mov.f32f32 r5.x, r2.y
+add.f r4.w, c14.z, (neg)r4.w
+mad.f32 r1.z, r1.w, r4.x, r1.z
+bary.f r1.w, 6, r1.x
+(ss)mul.f r6.y, r2.x, c10.z
+mul.f r3.z, r3.z, c10.y
+mul.f r4.z, r4.z, c10.x
+mul.f r0.w, r1.w, r0.w
+bary.f r1.w, 14, r1.x
+mov.f32f32 r2.x, r4.w
+sam.s (f32)(x)r9.x, r8.x, s#4, t#4
+sam.s (f32)(x)r9.y, r5.z, s#4, t#4
+sam.s (f32)(x)r6.z, r7.x, s#4, t#4
+add.f r2.z, r2.z, c15.z
+add.f r2.w, r2.w, c15.z
+mul.f r1.w, r1.w, (neg)r5.y
+mul.f r2.x, r5.x, r2.x
+mul.f r4.w, r2.z, r4.w
+mul.f r2.y, r2.y, r2.w
+mad.f32 r0.w, r1.w, r4.y, r0.w
+bary.f r1.w, 23, r1.x
+(sy)mul.f r2.x, r2.x, r8.w
+mul.f r2.z, r2.z, r2.w
+mad.f32 r2.x, r4.w, r9.x, r2.x
+mad.f32 r0.w, r1.w, r3.w, r0.w
+mad.f32 r1.w, r2.y, r9.y, r2.x
+(ss)nop
+sam (f32)(w)r4.w, r3.x, s#1, t#1
+(sy)cmps.f.lt r2.x, r5.z, c16.y
+mov.f32f32 r2.w, r0.x
+mov.f32f32 r2.y, r0.w
+mad.f32 r1.w, r2.z, r6.z, r1.w
+cov.u32f32 r2.x, r2.x
+(ss)mov.f32f32 r3.x, r0.y
+mad.f32 r1.z, r2.y, r2.y, r1.z
+mul.f r1.w, c16.x, r1.w
+cmps.f.ne r2.x, r2.x, c14.y
+(rpt3)nop
+rsq r1.z, r1.z
+(ss)mov.f32f32 r2.y, r1.z
+mov.f32f32 r3.y, r1.w
+mul.f r0.w, r0.w, r1.z
+(ss)mov.f32f32 r1.z, c14.y
+mul.f r2.z, r7.w, r2.y
+mul.f r2.y, r4.x, r2.y
+mov.f32f32 r3.w, r0.w
+sel.b32 r1.z, r1.z, r2.x, r5.z
+mov.f32f32 r2.x, r2.z
+mul.f r2.z, (neg)c9.x, r2.z
+mov.f32f32 r4.x, r2.y
+mad.f32 r2.y, (neg)c9.y, r2.y, r2.z
+mul.f r2.z, r2.x, r2.x
+mad.f32 r0.w, (neg)c9.z, r0.w, r2.y
+mad.f32 r2.y, r4.x, r4.x, r2.z
+sam (f32)(xyz)r4.w, r2.w, s#3, t#3
+(sy)(ss)mul.f r3.x, c8.z, r5.y
+mad.f32 r2.y, r3.w, r3.w, r2.y
+max.f r0.w, r0.w, c14.y
+bary.f r2.z, 18, r1.x
+(rpt1)nop
+mov.f32f32 r2.w, r0.w
+bary.f r4.y, 19, r1.x
rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.w, r0.w, r3.y, r1.z
-mov.f32f32 r1.z, r3.x
-mul.f r2.x, r2.x, r2.y
-max.f r2.w, r2.w, c14.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.z, r2.z, r2.y
-mov.f32f32 r2.x, r2.x
-bary.f r3.x, 15, r1.x
-mov.f32f32 r2.w, r2.w
-bary.f r3.y, 19, r1.x
-bary.f r3.w, 18, r1.x
-mov.f32f32 r3.x, r3.x
-bary.f r4.z, 20, r1.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.x, r2.x, r3.x
-mov.f32f32 r2.z, r2.z
-bary.f r3.x, 16, r1.x
-mov.f32f32 r4.z, r4.z
-mad.f32 r4.w, c8.y, r2.w, (neg)r3.y
-mad.f32 r5.y, c8.x, r2.w, (neg)r3.w
-mov.f32f32 r3.x, r3.x
-mad.f32 r2.w, c8.z, r2.w, (neg)r4.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.y, r5.y
-mad.f32 r2.x, r2.z, r3.x, r2.x
-mov.f32f32 r2.z, r2.w
-mad.f32 r2.w, c12.x, r4.w, r3.y
-mad.f32 r3.x, c12.x, r5.y, r3.w
-mov.f32f32 r2.x, r2.x
-mul.f r0.y, r0.y, r2.y
-mad.f32 r2.y, c12.x, r2.z, r4.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r0.y, r0.y
+(ss)mov.f32f32 r5.y, r2.y
+bary.f r5.z, 20, r1.x
+mad.f32 r5.w, c8.y, r2.w, (neg)r4.y
+mad.f32 r2.w, c8.z, r2.w, (neg)r5.z
+mul.f r2.x, r2.x, r5.y
+bary.f r6.x, 15, r1.x
+mov.f32f32 r5.z, r5.z
+mov.f32f32 r4.y, r4.y
+mul.f r4.x, r4.x, r5.y
+mul.f r2.x, r2.x, r6.x
+bary.f r5.y, 16, r1.x
+mad.f32 r5.z, c12.x, r2.w, r5.z
+mad.f32 r4.y, c12.x, r5.w, r4.y
+(ss)mul.f r2.y, r3.w, r2.y
+mad.f32 r2.x, r4.x, r5.y, r2.x
bary.f (ei)r1.x, 17, r1.x
-mov.f32f32 r1.y, r2.y
-mul.f r0.w, c16.x, r0.w
-nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r3.x, r4.x
-mov.f32f32 r4.z, r0.x
-mad.f32 r0.x, r0.y, r1.x, r2.x
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r4.w, r4.y
-mov.f32f32 r0.x, r0.x
-sam (f32)(w)r1.z, r3.z, s#1, t#1
-(sy)mov.f32f32 r1.x, r2.y
-cmps.f.lt r1.z, r2.y, c16.y
-mov.f32f32 r3.y, r0.w
-max.f r0.x, c14.y, r0.x
-mov.f32f32 r0.w, r1.x
-cov.u32f32 r1.x, r1.z
-sam (f32)(xyz)r1.z, r4.z, s#3, t#3
-(sy)mul.f r2.x, c8.z, r2.x
-mov.f32f32 r0.x, r0.x
-mul.f r1.w, c8.y, r1.w
-mul.f r1.z, c8.x, r1.z
-(ss)nop
-sam (f32)(xyzw)r3.x, r3.x, s#0, t#0
-cmps.f.ne r1.x, r1.x, c14.y
-(rpt2)nop
-log2 r0.x, r0.x
+mad.f32 r0.w, c8.x, r0.w, (neg)r2.z
+(rpt1)nop
+mad.f32 r1.x, r2.y, r1.x, r2.x
+mov.f32f32 r1.y, r2.z
+mul.f r3.w, c8.y, r5.x
+mul.f r4.x, c8.x, r4.w
+max.f r1.x, c14.y, r1.x
+mad.f32 r0.w, c12.x, r0.w, r1.y
+sam (f32)(xyzw)r2.x, r0.x, s#0, t#0
+(rpt4)nop
+(ss)log2 r0.x, r1.x
(ss)mul.f r0.x, c12.y, r0.x
-mov.f32f32 r2.y, c14.y
-mov.f32f32 r2.x, r2.x
-(sy)mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.w, r2.y, r1.x, r0.w
-mov.f32f32 r1.x, r1.w
-mov.f32f32 r1.z, r1.z
-nop
-mov.f32f32 r4.z, r3.w
-nop
+(rpt5)nop
exp2 r0.x, r0.x
-(ss)mul.f r1.w, r2.x, r0.x
-mul.f r1.x, r1.x, r0.x
-mad.f32 r1.y, r3.z, r1.y, r1.w
-mad.f32 r1.x, r3.y, r2.z, r1.x
-(ss)mul.f r0.x, r1.z, r0.x
-nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.x, r3.x, r2.w, r0.x
-nop
-mul.f r1.y, r1.y, r0.y
-mul.f r1.x, r1.x, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r1.y, c7.z, r3.z, r1.y
-mad.f32 r1.x, c7.y, r3.y, r1.x
-mul.f r0.x, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.y, r3.x, r0.x
+(ss)mul.f r1.x, r3.w, r0.x
+(sy)mad.f32 r0.y, r2.z, r5.z, r0.y
+mad.f32 r1.x, r2.y, r4.y, r1.x
+mul.f r0.x, r4.x, r0.x
+nop
+mul.f r0.y, r0.y, r3.y
+mul.f r1.x, r1.x, r3.y
+mad.f32 r0.y, c7.z, r2.z, r0.y
+mad.f32 r1.x, c7.y, r2.y, r1.x
+mad.f32 r0.x, r2.x, r0.w, r0.x
nop
mul.f r0.y, r0.z, r0.y
-mul.f r1.x, r0.z, r1.x
-mad.f32 r0.x, c7.x, r3.x, r0.x
+mul.f r0.w, r0.z, r1.x
+mul.f r0.x, r0.x, r1.w
nop
-add.f r0.y, r0.y, r5.x
-add.f r1.x, r1.x, r5.z
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, r6.y
+add.f r0.w, r0.w, r3.z
+mad.f32 r0.x, c7.x, r2.x, r0.x
nop
-mul.f r0.y, r0.y, r0.w
-mul.f r1.x, r1.x, r0.w
+mul.f r0.y, r0.y, r1.z
+mul.f r0.w, r0.w, r1.z
mul.f r0.x, r0.z, r0.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r1.x
-add.f r0.x, r0.x, r7.z
-nop
-mul.f r0.y, r0.y, c6.z
-mul.f r0.z, r0.z, c6.y
-mul.f r0.x, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c6.x
-nop
-mov.f32f32 r4.y, r0.y
-mov.f32f32 r4.x, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r2.z, r0.y, c6.z
+mul.f r2.y, r0.w, c6.y
+add.f r0.x, r0.x, r4.z
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r1.z
(rpt2)nop
-mov.f32f32 r3.w, r0.x
+mul.f r2.x, r0.x, c6.x
end
-nop
-; FRAG: outputs: r3.w (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r3.y (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r2.z (5:13,cm=f,il=24,b=1) r5.z (5:14,cm=f,il=28,b=1)
-; FRAG: 325 instructions, 0 half, 9 full
-; pos (bary): r1.x
-; color: r3.w
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r4.z (5:10,cm=f,il=12,b=1) r63.z (5:11,cm=f,il=16,b=1) r5.y (5:12,cm=f,il=20,b=1) r3.z (5:13,cm=f,il=24,b=1) r1.x (5:14,cm=f,il=28,b=1)
+; FRAG: 230 instructions, 0 half, 10 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm
index a5989e6..b4c4f88 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-43.asm
@@ -1,198 +1,139 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in12
-@in(r2.z) in13
-@out(r9.w) out0
-@out(r10.x) out1
-@out(r10.y) out2
-@out(r10.z) out3
-@out(r5.w) out4
-@out(r6.x) out5
-@out(r6.y) out6
-@out(r6.z) out7
-@out(r4.x) out8
-@out(r4.y) out9
-@out(r4.z) out10
-@out(r4.w) out11
-@out(r1.z) out12
-@out(r1.w) out13
-@out(r2.x) out14
-@out(r2.y) out15
-@out(r7.w) out16
-@out(r8.x) out17
-@out(r8.y) out18
-@out(r8.z) out19
-@out(r6.w) out20
-@out(r7.x) out21
-@out(r7.y) out22
-@out(r7.z) out23
-@out(r8.w) out24
-@out(r9.x) out25
-@out(r9.y) out26
-@out(r9.z) out27
-(sy)(ss)add.f r2.w, c4.x, (neg)r0.x
-mul.f r3.x, r0.w, r0.w
-mul.f r3.y, c8.w, r0.x
-mul.f r3.z, c8.z, r0.x
-mul.f r3.w, r2.w, r2.w
-add.f r4.x, c4.y, (neg)r0.y
-add.f r3.x, c13.x, (neg)r3.x
-mad.f32 r3.y, c9.w, r0.y, r3.y
-mad.f32 r3.z, c9.z, r0.y, r3.z
-mad.f32 r3.w, r4.x, r4.x, r3.w
-mov.f32f32 r3.x, r3.x
-mad.f32 r3.y, c10.w, r0.z, r3.y
-mad.f32 r3.z, c10.z, r0.z, r3.z
-mov.f32f32 r3.w, r3.w
-add.f r4.y, c4.z, (neg)r0.z
-mul.f r4.z, r3.x, r3.x
-mul.f r4.w, r1.x, r0.w
-add.f r3.y, r3.y, c11.w
-mad.f32 r3.w, r4.y, r4.y, r3.w
-add.f r3.z, r3.z, c11.z
-mul.f r5.x, c8.y, r0.x
-mul.f r5.y, c8.x, r0.x
-add.f r4.w, c13.y, (neg)r4.w
-mul.f r2.x, r2.x, c6.z
-mov.f32f32 r3.y, r3.y
-rsq r3.w, r3.w
-(ss)mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r5.x, c9.y, r0.y, r5.x
-mad.f32 r2.w, r2.w, r3.w, (neg)c5.x
-mad.f32 r4.z, r4.w, r4.w, r4.z
-mad.f32 r4.x, r4.x, r3.w, (neg)c5.y
-mad.f32 r3.w, r4.y, r3.w, (neg)c5.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.y, r4.z
-mul.f r4.z, r1.y, r0.w
-mov.f32f32 r4.x, r4.x
-mul.f r5.z, r2.w, r2.w
-mov.f32f32 r3.w, r3.w
-add.f r4.z, c13.y, (neg)r4.z
-mad.f32 r5.z, r4.x, r4.x, r5.z
-mov.f32f32 r6.z, r3.y
-mov.f32f32 r6.y, r3.z
-mad.f32 r3.y, c10.y, r0.z, r5.x
-mov.f32f32 r3.z, r5.z
-mov.f32f32 r4.z, r4.z
-mad.f32 r3.z, r3.w, r3.w, r3.z
-add.f r3.y, r3.y, c11.y
-mad.f32 r5.x, c9.x, r0.y, r5.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r5.x, c10.x, r0.z, r5.x
-mul.f r5.y, c0.w, r0.x
-mul.f r5.z, c0.z, r0.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-mad.f32 r4.y, r4.z, r4.z, r4.y
-mul.f r3.y, r3.y, c12.y
-add.f r5.x, r5.x, c11.x
-mul.f r3.w, r3.w, r3.z
-mul.f r4.x, r4.x, r3.z
-mul.f r2.w, r2.w, r3.z
-mov.f32f32 r6.x, r3.y
-mov.f32f32 r3.y, r3.w
-mov.f32f32 r3.z, r4.x
-mov.f32f32 r2.w, r2.w
-rsq r3.w, r4.y
-(ss)mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
+@in(r7.x) in0
+@in(r7.y) in1
+@in(r7.z) in2
+@in(r6.y) in4
+@in(r6.z) in5
+@in(r6.w) in6
+@in(r2.w) in8
+@in(r3.x) in9
+@in(r3.y) in10
+@in(r3.z) in12
+@in(r3.w) in13
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@const(c13.x) 0x3f800000, 0x00000000, 0xbf800000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r7.x
+mul.f r0.y, r6.y, r6.y
+mul.f r0.z, c8.y, r7.x
+mul.f r0.w, c8.x, r7.x
+mul.f r1.x, r0.x, r0.x
+add.f r1.z, c4.y, (neg)r7.y
+add.f r0.y, c13.x, (neg)r0.y
+mad.f32 r0.z, c9.y, r7.y, r0.z
+mad.f32 r0.w, c9.x, r7.y, r0.w
+mad.f32 r1.x, r1.z, r1.z, r1.x
+add.f r1.w, c4.z, (neg)r7.z
+mov.f32f32 r1.y, r0.y
+mad.f32 r0.z, c10.y, r7.z, r0.z
+mad.f32 r0.w, c10.x, r7.z, r0.w
+mad.f32 r1.x, r1.w, r1.w, r1.x
+mul.f r2.x, r1.y, r1.y
+mul.f r1.y, r6.z, r6.y
+add.f r0.z, r0.z, c11.y
+add.f r0.w, r0.w, c11.x
+mul.f r2.y, r6.w, r6.y
+mul.f r2.z, c8.w, r7.x
+rsq r1.x, r1.x
+(ss)mov.f32f32 r4.x, r1.x
+add.f r4.y, c13.y, (neg)r1.y
+mad.f32 r0.x, r0.x, r1.x, (neg)c5.x
+mul.f r1.y, r0.z, c12.y
+mad.f32 r0.z, r1.z, r4.x, (neg)c5.y
+mov.f32f32 r1.z, r4.y
+(ss)mov.f32f32 r1.x, r0.x
+mad.f32 r1.w, r1.w, r4.x, (neg)c5.z
+mov.f32f32 r4.x, r0.z
+mad.f32 r2.x, r4.y, r1.z, r2.x
+mul.f r1.x, r1.x, r1.x
+add.f r2.y, c13.y, (neg)r2.y
+mad.f32 r0.z, r0.z, r4.x, r1.x
+mov.f32f32 r4.y, r1.w
+mul.f r1.x, r0.w, c12.x
+mov.f32f32 r0.w, r2.y
+mad.f32 r2.z, c9.w, r7.y, r2.z
+mad.f32 r0.z, r1.w, r4.y, r0.z
+mad.f32 r1.w, c10.w, r7.z, r2.z
+mul.f r2.z, c8.z, r7.x
+mul.f r4.z, c0.w, r7.x
+mul.f r5.z, c0.z, r7.x
+mul.f r5.w, c0.y, r7.x
+mul.f r6.x, c0.x, r7.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r5.x, r0.z
+mul.f r4.w, r0.x, r0.z
+mad.f32 r0.x, r2.y, r0.w, r2.x
+add.f r1.w, r1.w, c11.w
+mul.f r5.y, r4.y, r5.x
+mul.f r5.x, r4.x, r5.x
+(ss)mad.f32 r0.z, c9.z, r7.y, r2.z
+mad.f32 r2.y, c1.w, r7.y, r4.z
+mad.f32 r5.z, c1.z, r7.y, r5.z
+rsq r0.x, r0.x
+(ss)mov.f32f32 r4.x, r0.x
+mul.f r2.x, r0.y, r0.x
+(ss)mad.f32 r0.x, c10.z, r7.z, r0.z
+mad.f32 r0.y, c2.w, r7.z, r2.y
+mul.f r2.z, r0.w, r4.x
+mul.f r2.y, r1.z, r4.x
+mov.f32f32 r0.z, r2.x
+add.f r1.z, r0.x, c11.z
+mov.f32f32 r0.x, r2.z
+mov.f32f32 r4.z, r2.y
+mul.f r7.w, r6.z, r0.z
+add.f r0.w, r0.y, c3.w
+mul.f r0.y, r6.y, r0.x
+mul.f r4.x, r6.w, r4.z
+mad.f32 r4.y, r6.w, r0.z, (neg)r0.y
+mad.f32 r4.x, r6.z, r0.x, (neg)r4.x
+mad.f32 r4.z, r6.y, r4.z, (neg)r7.w
+mad.f32 r0.x, c2.z, r7.z, r5.z
+mad.f32 r0.y, c1.y, r7.y, r5.w
+mad.f32 r5.z, c1.x, r7.y, r6.x
+mad.f32 r0.y, c2.y, r7.z, r0.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r7.z, r5.z
nop
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-(rpt1)nop
-mov.f32f32 r7.x, r3.y
-mov.f32f32 r6.w, r3.z
-mov.f32f32 r8.z, r2.w
-mul.f r2.w, r3.x, r3.w
-mul.f r3.x, r4.z, r3.w
-mul.f r3.y, r4.w, r3.w
-mul.f r3.z, r5.x, c12.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r5.w, r3.z
-mul.f r3.z, r1.x, r2.w
-mul.f r3.w, r0.w, r3.x
-mad.f32 r3.z, r0.w, r3.y, (neg)r3.z
-mad.f32 r3.w, r1.y, r2.w, (neg)r3.w
-mul.f r4.x, r1.y, r3.y
-(ss)mov.f32f32 r4.y, r3.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mad.f32 r3.x, r1.x, r3.x, (neg)r4.x
-mov.f32f32 r4.z, r4.y
-mov.f32f32 r8.y, r3.z
-mov.f32f32 r8.x, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.w, r2.w
-nop
-mov.f32f32 r7.w, r3.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r4.x, r2.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.w, c1.w, r0.y, r5.y
-mad.f32 r3.x, c1.z, r0.y, r5.z
-mul.f r3.y, c0.y, r0.x
-mov.f32f32 r8.w, r2.x
-mad.f32 r2.x, c2.w, r0.z, r2.w
-mad.f32 r2.w, c2.z, r0.z, r3.x
-mad.f32 r3.x, c1.y, r0.y, r3.y
-mul.f r3.y, c0.x, r0.x
-add.f r2.x, r2.x, c3.w
-add.f r2.w, r2.w, c3.z
-mad.f32 r3.x, c2.y, r0.z, r3.x
-mad.f32 r0.y, c1.x, r0.y, r3.y
-mov.f32f32 r10.z, r2.x
-mov.f32f32 r10.y, r2.w
-add.f r2.x, r3.x, c3.y
-mad.f32 r0.y, c2.x, r0.z, r0.y
-mul.f r1.w, r1.w, c6.y
-mul.f r1.z, r1.z, c6.x
-mov.f32f32 r10.x, r2.x
-add.f r0.y, r0.y, c3.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r9.w, r0.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r7.y, r1.z
-mov.f32f32 r9.z, r1.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r2.z
-mov.f32f32 r1.y, r2.y
-mov.f32f32 r9.y, r0.y
-mov.f32f32 r9.x, r0.w
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.x, r1.y
-mad.f32 r0.y, c7.x, r0.z, c7.y
-mad.f32 r0.x, c7.x, r0.x, c7.y
-mov.f32f32 r0.z, c13.z
-nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r4.w, r0.z
+add.f r0.y, r0.y, c3.y
+mul.f r6.x, r3.y, c6.z
+add.f r0.x, r0.x, c3.x
+mul.f r5.w, r3.x, c6.y
+mul.f r5.z, r2.w, c6.x
+mad.f32 r3.y, c7.x, r7.z, c7.y
+mad.f32 r3.x, c7.x, r7.x, c7.y
+mov.f32f32 r2.w, c13.z
end
nop
nop
-nop
-; VERT: outputs: r9.w (0:0) r5.w (5:9) r4.x (5:10) r1.z (5:11) r7.w (5:12) r6.w (5:13) r8.w (5:14)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=3,il=20,b=0)
-; VERT: 150 instructions, 0 half, 11 full
-; pos: r9.w
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14)
+; VERT: inputs: r7.x (0:0,cm=7,il=8,b=0) r6.y (0:0,cm=7,il=12,b=0) r2.w (0:0,cm=7,il=16,b=0) r3.z (0:0,cm=3,il=20,b=0)
+; VERT: 90 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm
index f9f5af9..7a7273d 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-44.asm
@@ -4,214 +4,135 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r2.y) out0
-@out(r2.z) out1
-@out(r2.w) out2
-@out(r3.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0xba03126f, 0xbf000000
+@const(c10.x) 0x3f800000, 0x40000000, 0xbf000000, 0x3fb8aa65
+@const(c11.x) 0x3de38866, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 0, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 1, r1.x
bary.f r1.z, 4, r1.x
-add.f r1.w, r0.x, c9.w
-bary.f r2.x, 6, r1.x
+add.f r2.x, r0.x, c9.w
+bary.f r1.w, 5, r1.x
add.f r2.y, r0.w, c9.w
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 2, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c9.z
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 5, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c10.x
+mul.f r0.z, r0.z, c7.x
+sam (f32)(w)r4.x, r1.z, s#1, t#1
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c9.x, r2.z
+add.f r2.z, c10.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-add.f r2.w, c10.y, (neg)r1.z
-mul.f r2.y, r2.y, c7.x
-add.f r3.y, c10.y, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c9.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r2.w, r2.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r2.z
-mul.f r0.y, r2.y, r0.y
-mul.f r2.y, r2.w, r3.y
-add.f r2.z, c10.z, r0.x
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c9.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c10.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c10.w
-add.f r3.z, c10.x, r0.w
-mul.f r2.z, r2.z, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r2.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-add.f r0.w, c10.z, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c12.y, (neg)r0.y
-mov.f32f32 r5.w, r3.w
-mul.f r3.z, r3.z, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c7.y
-mul.f r0.y, r0.y, c10.x
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.z, r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r4.z, r4.x
-bary.f r3.z, 2, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r3.w
-mov.f32f32 r5.y, r0.x
-add.f r0.x, r3.z, c9.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r4.w, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r6.y, r2.z
-mov.f32f32 r5.z, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r2.y
-add.f r1.z, r1.z, c10.x
-sam.s (f32)(x)r2.y, r4.y, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-min.f r0.y, r0.y, c10.x
-sam.s (f32)(x)r2.z, r5.w, s#2, t#2
-(sy)mov.f32f32 r2.z, r2.z
-sam.s (f32)(x)r3.z, r5.x, s#2, t#2
-(sy)mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.y, r2.y
-add.f r3.w, c12.y, (neg)r0.y
-add.f r4.x, c12.y, (neg)r0.y
-(ss)add.f r4.y, c12.y, (neg)r0.y
-mul.f r0.w, r0.w, r2.y
-mul.f r2.y, r1.z, r3.y
-mul.f r3.w, r3.w, c6.z
-mul.f r4.x, r4.x, c6.y
-mul.f r4.y, r4.y, c6.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.x, r0.x
-add.f r0.x, r0.z, c10.x
-mov.f32f32 r0.z, r1.w
-mad.f32 r0.w, r2.y, r3.z, r0.w
-mov.f32f32 r1.w, r2.x
-bary.f r2.x, 7, r1.x
-mul.f r2.y, r2.w, r0.x
-mov.f32f32 r0.w, r0.w
-sam.s (f32)(x)r2.w, r6.z, s#2, t#2
-(sy)mov.f32f32 r2.w, r2.w
-mul.f r0.x, r1.z, r0.x
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r4.z, r1.w
-mov.f32f32 r0.z, r2.x
-mad.f32 r0.w, r1.z, r2.w, r0.w
-mov.f32f32 r0.x, r0.x
-bary.f r1.z, 10, r1.x
-mov.f32f32 r4.w, r0.z
-mov.f32f32 r0.z, r0.w
-sam (f32)(w)r2.w, r3.x, s#1, t#1
-(sy)cmps.f.lt r0.w, r3.z, c11.y
-mad.f32 r0.x, r0.x, r2.z, r0.z
-mov.f32f32 r0.z, r3.z
-bary.f r1.w, 9, r1.x
-cov.u32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-sam (f32)(xyzw)r2.x, r4.z, s#0, t#0
-mov.f32f32 r0.z, r0.z
-(sy)mov.f32f32 r2.w, r2.w
-nop
-mul.f r0.x, c11.x, r0.x
-cmps.f.ne r0.w, r0.w, c9.y
+add.f r0.z, c10.z, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c3.z
+add.f r0.x, c10.x, r0.w
+mul.f r4.x, r0.z, c3.z
+add.f r0.z, c10.z, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c3.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c12.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#2, t#2
+add.f r0.z, c10.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#2, t#2
+mul.f r0.x, r0.x, c10.x
+add.f r0.w, r2.y, c10.x
+mul.f r0.y, r0.y, c7.y
+(ss)nop
+sam.s (f32)(x)r5.x, r5.w, s#2, t#2
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#2, t#2
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c11.y
+bary.f r2.x, 6, r1.x
+mul.f r0.y, r0.y, r5.x
+max.f r0.x, r0.x, c9.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
+min.f r0.x, r0.x, c10.x
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt1)nop
+mul.f r0.y, c11.x, r0.y
+bary.f r2.y, 7, r1.x
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.z, c12.y, (neg)r0.x
+mov.f32f32 r1.w, r0.y
+add.f r3.x, c12.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mul.f r1.z, r2.z, r1.z
-mul.f r1.w, r2.y, r1.w
+sam (f32)(xyzw)r2.x, r2.x, s#0, t#0
+bary.f r3.y, 10, r1.x
+bary.f r3.z, 9, r1.x
bary.f (ei)r1.x, 8, r1.x
-mov.f32f32 r1.y, c9.y
-mul.f r1.z, r1.z, r0.x
-mul.f r1.w, r1.w, r0.x
+mul.f r0.w, r0.w, c6.z
+(sy)mul.f r1.y, r2.z, r3.y
+mul.f r3.y, r2.y, r3.z
mul.f r1.x, r2.x, r1.x
-sel.b32 r0.z, r1.y, r0.w, r0.z
-mov.f32f32 r0.w, r1.z
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.w, c5.z, r2.z, r0.w
-mad.f32 r1.y, c5.y, r2.y, r1.y
-mul.f r0.x, r1.x, r0.x
-(ss)mov.f32f32 r3.x, r2.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.w, r0.y, r0.w
-mul.f r1.x, r0.y, r1.x
-mad.f32 r0.x, c5.x, r2.x, r0.x
-nop
-add.f r0.w, r0.w, r3.w
-add.f r1.x, r1.x, r4.x
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.w, r0.w, r0.z
-mul.f r1.x, r1.x, r0.z
-mul.f r0.x, r0.y, r0.x
-nop
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.w, r1.x
-add.f r0.x, r0.x, r4.y
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
-mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r1.z, r1.z, c6.y
+mul.f r1.y, r1.y, r1.w
+mul.f r1.w, r3.y, r1.w
+mad.f32 r1.y, c5.z, r2.z, r1.y
+mad.f32 r1.w, c5.y, r2.y, r1.w
+mul.f r0.y, r1.x, r0.y
+mul.f r1.x, r3.x, c6.x
+mul.f r1.y, r0.x, r1.y
+mul.f r1.w, r0.x, r1.w
+mad.f32 r0.y, c5.x, r2.x, r0.y
+(ss)mov.f32f32 r2.x, c9.y
+add.f r0.w, r1.y, r0.w
+add.f r1.y, r1.w, r1.z
+mul.f r0.x, r0.x, r0.y
+sel.b32 r0.y, r2.x, r0.z, r4.w
+(rpt1)nop
+add.f r0.x, r0.x, r1.x
+mul.f r0.z, r0.w, r0.y
+mul.f r0.w, r1.y, r0.y
+(rpt1)nop
+mul.f r2.z, r0.z, c4.z
+mul.f r2.y, r0.w, c4.y
+mul.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r2.y, r0.x
+mul.f r2.x, r0.x, c4.x
end
nop
-nop
-nop
-; FRAG: outputs: r2.y (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1)
-; FRAG: 202 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r2.y
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r1.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1)
+; FRAG: 125 instructions, 0 half, 7 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm
index 6e58225..f1c05da 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-46.asm
@@ -4,222 +4,159 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r2.y) out0
-@out(r2.z) out1
-@out(r2.w) out2
-@out(r3.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c10.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c11.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c11.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c11.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 8, r1.x
+floor.f r3.x, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c10.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.y, r2.y
+bary.f r2.w, 9, r1.x
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c8.x
+add.f r2.y, r2.y, (neg)r3.y
+mov.f32f32 r3.x, r2.x
+sam (f32)(xyzw)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, c13.y, (neg)r4.x
+mul.f r0.z, r0.z, c8.x
+mov.f32f32 r1.w, r2.y
+mul.f r4.y, c10.x, r3.x
+add.f r3.x, c11.y, (neg)r3.x
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c10.x, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, r2.y, c8.x
-add.f r2.w, c11.y, (neg)r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c10.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r1.w
-mul.f r0.y, r2.y, r0.y
-sam (f32)(xyzw)r2.w, r3.x, s#0, t#0
-(sy)add.f r1.w, c13.y, (neg)r3.z
-add.f r2.y, c11.x, r0.x
+add.f r0.x, r0.x, (neg)r4.y
+mul.f r4.y, c10.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r4.y
add.f r0.x, c11.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c12.x
-add.f r3.w, c11.z, r0.w
-mul.f r2.y, r2.y, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.x
-add.f r0.w, c11.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c13.y, (neg)r0.y
-mov.f32f32 r6.x, r4.x
-mul.f r3.w, r3.w, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c8.y
-mul.f r0.y, r0.y, c10.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.w, r2.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r4.w, r4.y
-bary.f r3.w, 6, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r5.z, r0.x
-add.f r0.x, r3.w, c10.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r5.x, r0.w
-max.f r0.y, r0.y, c10.y
-mov.f32f32 r6.z, r2.y
-mov.f32f32 r5.w, r3.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r1.w, c4.z
-add.f r1.w, c13.y, (neg)r3.z
-sam.s (f32)(x)r2.y, r4.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-min.f r0.y, r0.y, c10.z
-sam.s (f32)(x)r3.w, r6.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-add.f r4.y, c11.y, (neg)r0.z
-(ss)add.f r4.z, c13.y, (neg)r0.y
-add.f r4.w, c13.y, (neg)r0.y
-add.f r5.x, c13.y, (neg)r0.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c7.z
-mul.f r4.w, r4.w, c7.y
-mul.f r5.x, r5.x, c7.x
-mul.f r5.y, r2.z, r4.y
-mov.f32f32 r7.y, r0.x
-mul.f r0.x, r3.z, c10.z
-mul.f r1.w, r1.w, c4.y
-mul.f r2.y, r5.y, r2.y
-add.f r1.z, r1.z, c10.z
-add.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.z, c10.z
-sam.s (f32)(x)r5.y, r6.w, s#2, t#2
-(sy)mov.f32f32 r5.y, r5.y
-mul.f r4.y, r1.z, r4.y
-add.f r5.z, c13.y, (neg)r3.z
-mul.f r0.x, r3.y, r0.x
-add.f r0.w, r0.w, r1.w
-mad.f32 r1.w, r4.y, r4.x, r2.y
-mul.f r2.y, r5.z, c4.x
-mov.f32f32 r0.x, r0.x
-bary.f r3.y, 2, r1.x
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r0.z, c10.z
-mul.f r0.w, r3.x, r0.w
-mul.f r3.x, r3.z, c10.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.z, r2.z, r0.z
-mul.f r3.y, r0.x, r3.y
-mov.f32f32 r0.w, r0.w
-add.f r2.y, r3.x, r2.y
-mad.f32 r1.w, r2.z, r5.y, r1.w
-bary.f r2.z, 1, r1.x
-mov.f32f32 r4.x, r2.x
-mul.f r2.x, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.z, r1.z, r0.z
-mul.f r1.z, r0.w, r2.z
-mov.f32f32 r2.x, r2.x
-nop
-mad.f32 r0.z, r0.z, r3.w, r1.w
-bary.f r1.w, 0, r1.x
-bary.f (ei)r1.x, 9, r1.x
-mov.f32f32 r1.y, c10.z
-mov.f32f32 r0.z, r0.z
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r1.y
-mul.f r0.z, c11.w, r0.z
-mov.f32f32 r1.y, c10.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r1.x
+add.f r0.z, c11.x, r0.z
+mov.f32f32 r4.y, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c11.z, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c11.x, r4.y
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c13.y, (neg)r0.y
+add.f r5.y, r0.z, c10.w
+add.f r0.z, c11.y, (neg)r1.w
+mul.f r0.x, r0.x, c10.z
+add.f r0.w, c13.y, (neg)r4.x
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c8.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-mul.f r1.x, r3.y, r0.z
-mul.f r1.z, r1.z, r0.z
-mul.f r0.z, r1.w, r0.z
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.x, c6.z, r0.x, r1.x
-mad.f32 r0.w, c6.y, r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-sam (f32)(w)r3.y, r4.x, s#1, t#1
-(sy)cmps.f.lt r1.x, r4.x, c12.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c6.x, r2.x, r0.z
-cov.u32f32 r1.x, r1.x
-mul.f r0.x, r0.y, r0.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r1.x, r1.x, c10.y
-add.f r0.x, r0.x, r4.z
-mov.f32f32 r1.z, r4.x
-add.f r0.w, r0.w, r4.w
-mul.f r0.y, r0.y, r0.z
-nop
-mov.f32f32 r0.z, r1.z
-(rpt2)nop
-sel.b32 r0.z, r1.y, r1.x, r0.z
-add.f r0.y, r0.y, r5.x
-(rpt1)nop
-mul.f r0.x, r0.x, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
+mul.f r0.w, r0.w, c4.z
+add.f r1.w, c13.y, (neg)r4.x
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c10.y
+sam.s (f32)(x)r4.y, r5.z, s#2, t#2
+mul.f r4.z, r4.x, c10.z
+mul.f r1.w, r1.w, c4.y
+(sy)mul.f r0.y, r0.y, r7.x
+add.f r2.x, r2.x, c10.z
+min.f r0.x, r0.x, c10.z
+add.f r0.w, r4.z, r0.w
+mul.f r4.z, r4.x, c10.z
+mul.f r0.z, r2.x, r0.z
+(ss)add.f r4.w, c13.y, (neg)r0.x
+add.f r5.x, c13.y, (neg)r0.x
+add.f r5.y, c13.y, (neg)r0.x
+mad.f32 r0.y, r0.z, r7.y, r0.y
+add.f r0.z, r2.y, c10.z
+mul.f r2.y, r4.w, c7.z
+mul.f r4.w, r5.x, c7.y
+mul.f r5.x, r5.y, c7.x
+mul.f r3.x, r3.x, r0.z
+add.f r1.w, r4.z, r1.w
+mul.f r1.z, r1.z, c4.x
+mul.f r0.w, r3.w, r0.w
+mad.f32 r0.y, r3.x, r4.y, r0.y
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, r4.x, c10.z
+mov.f32f32 r3.x, r0.w
+bary.f r3.w, 2, r1.x
+mad.f32 r0.y, r0.z, r7.z, r0.y
+mul.f r0.z, r3.z, r1.w
+add.f r1.z, r2.x, r1.z
+mul.f r1.w, r3.x, r3.w
+mul.f r0.y, c11.w, r0.y
+mov.f32f32 r2.x, r0.z
+mul.f r1.z, r3.y, r1.z
+bary.f r3.x, 1, r1.x
+mov.f32f32 r3.y, r0.y
+sam (f32)(w)r3.z, r2.z, s#1, t#1
+(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y
+mov.f32f32 r3.z, r1.z
+mul.f r2.x, r2.x, r3.x
+mul.f r1.w, r1.w, r3.y
+bary.f (ei)r1.x, 0, r1.x
+mad.f32 r0.w, c6.z, r0.w, r1.w
+mul.f r1.y, r2.x, r3.y
+cov.u32f32 r1.w, r2.z
+mov.f32f32 r2.w, c10.z
+mul.f r0.w, r0.x, r0.w
+mad.f32 r0.z, c6.y, r0.z, r1.y
+mul.f r1.x, r3.z, r1.x
+cmps.f.ne r1.y, r1.w, c10.y
+add.f r0.w, r0.w, r2.y
+mov.f32f32 r1.w, c10.y
+mul.f r0.z, r0.x, r0.z
+mul.f r0.y, r1.x, r0.y
nop
-mul.f r0.x, r0.x, c5.z
-mul.f r0.z, r0.w, c5.y
-mul.f r0.y, r0.y, c5.x
+sel.b32 r1.x, r1.w, r1.y, r4.y
+add.f r0.z, r0.z, r4.w
+mad.f32 r0.y, c6.x, r1.z, r0.y
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+mul.f r0.w, r0.w, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.w, c5.z
+mul.f r2.y, r0.z, c5.y
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+add.f r0.x, r0.x, r5.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.w, r0.x
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r2.y, r0.y
-end
nop
-; FRAG: outputs: r2.y (1:0)
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 212 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r2.y
-; fragcoord: r0.x
+; FRAG: 149 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm
index 78028ad..5b14079 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-47.asm
@@ -1,103 +1,80 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r5.x) out0
-@out(r5.y) out1
-@out(r5.z) out2
-@out(r5.w) out3
-@out(r4.x) out4
-@out(r4.y) out5
-@out(r4.z) out6
-@out(r4.w) out7
-@out(r3.x) out8
-@out(r3.y) out9
-@out(r3.z) out10
-@out(r3.w) out11
-@out(r1.w) out12
-@out(r2.x) out13
-@out(r2.y) out14
-@out(r2.z) out15
-(sy)(ss)mul.f r0.w, r0.w, (neg)c4.x
-mul.f r2.x, c7.w, r0.x
-mad.f32 r0.w, (neg)c4.y, r1.x, r0.w
-mad.f32 r1.x, c8.w, r0.y, r2.x
-mul.f r2.x, c7.z, r0.x
-mul.f r2.y, c7.y, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c9.w, r0.z, r1.x
-mad.f32 r0.w, (neg)c4.z, r1.y, r0.w
-mad.f32 r1.y, c8.z, r0.y, r2.x
-mad.f32 r2.x, c8.y, r0.y, r2.y
-mul.f r2.y, c7.x, r0.x
-max.f r0.w, c12.x, r0.w
-add.f r1.x, r1.x, c10.w
-mad.f32 r1.y, c9.z, r0.z, r1.y
-mad.f32 r2.x, c9.y, r0.z, r2.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-add.f r1.y, r1.y, c10.z
-add.f r2.x, r2.x, c10.y
-mul.f r2.z, r0.w, c5.z
-mul.f r2.w, r0.w, c5.y
-mul.f r0.w, r0.w, c5.x
-mov.f32f32 r3.w, r1.x
-mov.f32f32 r1.x, r2.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.w, r0.w
-nop
-mov.f32f32 r4.z, r1.x
-mov.f32f32 r4.y, r2.z
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r0.w, r1.y
-mul.f r1.x, r2.x, c11.y
-mad.f32 r1.y, c8.x, r0.y, r2.y
-mul.f r2.x, c0.w, r0.x
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r3.y, r1.x
-mad.f32 r0.w, c9.x, r0.z, r1.y
-mad.f32 r1.x, c1.w, r0.y, r2.x
-mul.f r1.y, c0.z, r0.x
-mul.f r2.x, c0.y, r0.x
-add.f r0.w, r0.w, c10.x
-mad.f32 r1.x, c2.w, r0.z, r1.x
-mad.f32 r1.y, c1.z, r0.y, r1.y
-mad.f32 r2.x, c1.y, r0.y, r2.x
-mul.f r0.w, r0.w, c11.x
-add.f r1.x, r1.x, c3.w
-mad.f32 r1.y, c2.z, r0.z, r1.y
-mad.f32 r2.x, c2.y, r0.z, r2.x
-mov.f32f32 r3.x, r0.w
-mov.f32f32 r5.w, r1.x
-add.f r0.w, r1.y, c3.z
-add.f r1.x, r2.x, c3.y
-mul.f r1.y, c0.x, r0.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.z, r0.w
-mov.f32f32 r5.y, r1.x
-mad.f32 r0.y, c1.x, r0.y, r1.y
-mov.f32f32 r2.z, r1.w
-mad.f32 r0.y, c2.x, r0.z, r0.y
-mov.f32f32 r0.w, r1.z
-mad.f32 r0.z, c6.x, r0.z, c6.y
-mad.f32 r0.x, c6.x, r0.x, c6.y
-add.f r0.y, r0.y, c3.x
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r5.x, r0.y
-mov.f32f32 r0.x, (0.000000)
-(rpt2)nop
-mov.f32f32 r4.w, r0.x
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r0.x) in4
+@in(r0.y) in5
+@in(r0.z) in6
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x
+mul.f r0.w, c7.y, r4.x
+mad.f32 r0.x, (neg)c4.y, r0.y, r0.x
+mad.f32 r0.y, c8.y, r4.y, r0.w
+mad.f32 r0.x, (neg)c4.z, r0.z, r0.x
+mad.f32 r0.y, c9.y, r4.z, r0.y
+mul.f r0.z, c7.x, r4.x
+mul.f r0.w, c7.w, r4.x
+max.f r0.x, c12.x, r0.x
+add.f r0.y, r0.y, c10.y
+mad.f32 r0.z, c8.x, r4.y, r0.z
+mad.f32 r0.w, c8.w, r4.y, r0.w
+mov.f32f32 r1.y, r0.x
+mul.f r2.y, r0.y, c11.y
+mad.f32 r0.y, c9.x, r4.z, r0.z
+mul.f r1.x, r0.x, c5.x
+mul.f r1.z, r1.y, c5.z
+mul.f r1.y, r1.y, c5.y
+add.f r0.x, r0.y, c10.x
+mad.f32 r0.y, c9.w, r4.z, r0.w
+mul.f r0.z, c7.z, r4.x
+mul.f r0.w, c0.w, r4.x
+mul.f r2.x, r0.x, c11.x
+add.f r2.w, r0.y, c10.w
+mad.f32 r0.x, c8.z, r4.y, r0.z
+mad.f32 r0.y, c1.w, r4.y, r0.w
+mad.f32 r0.x, c9.z, r4.z, r0.x
+mad.f32 r0.y, c2.w, r4.z, r0.y
+mul.f r0.z, c0.z, r4.x
+mul.f r1.w, c0.y, r4.x
+add.f r2.z, r0.x, c10.z
+add.f r0.w, r0.y, c3.w
+mad.f32 r0.x, c1.z, r4.y, r0.z
+mad.f32 r0.y, c1.y, r4.y, r1.w
+mad.f32 r0.x, c2.z, r4.z, r0.x
+mad.f32 r0.y, c2.y, r4.z, r0.y
+mul.f r1.w, c0.x, r4.x
+mad.f32 r3.y, c6.x, r4.z, c6.y
+add.f r0.z, r0.x, c3.z
+add.f r0.y, r0.y, c3.y
+mad.f32 r0.x, c1.x, r4.y, r1.w
+mad.f32 r3.x, c6.x, r4.x, c6.y
+mad.f32 r0.x, c2.x, r4.z, r0.x
+mov.f32f32 r1.w, (0.000000)
+(rpt1)nop
+add.f r0.x, r0.x, c3.x
end
-; VERT: outputs: r5.x (0:0) r4.x (5:9) r3.x (5:10) r1.w (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 74 instructions, 0 half, 6 full
-; pos: r5.x
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 48 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm
index 6e58225..f1c05da 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-48.asm
@@ -4,222 +4,159 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r2.y) out0
-@out(r2.z) out1
-@out(r2.w) out2
-@out(r3.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c10.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c11.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c11.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c11.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 8, r1.x
+floor.f r3.x, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c10.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.y, r2.y
+bary.f r2.w, 9, r1.x
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c8.x
+add.f r2.y, r2.y, (neg)r3.y
+mov.f32f32 r3.x, r2.x
+sam (f32)(xyzw)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, c13.y, (neg)r4.x
+mul.f r0.z, r0.z, c8.x
+mov.f32f32 r1.w, r2.y
+mul.f r4.y, c10.x, r3.x
+add.f r3.x, c11.y, (neg)r3.x
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c10.x, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, r2.y, c8.x
-add.f r2.w, c11.y, (neg)r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c10.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r1.w
-mul.f r0.y, r2.y, r0.y
-sam (f32)(xyzw)r2.w, r3.x, s#0, t#0
-(sy)add.f r1.w, c13.y, (neg)r3.z
-add.f r2.y, c11.x, r0.x
+add.f r0.x, r0.x, (neg)r4.y
+mul.f r4.y, c10.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r4.y
add.f r0.x, c11.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c12.x
-add.f r3.w, c11.z, r0.w
-mul.f r2.y, r2.y, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.x
-add.f r0.w, c11.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c13.y, (neg)r0.y
-mov.f32f32 r6.x, r4.x
-mul.f r3.w, r3.w, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c8.y
-mul.f r0.y, r0.y, c10.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.w, r2.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r4.w, r4.y
-bary.f r3.w, 6, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r5.z, r0.x
-add.f r0.x, r3.w, c10.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r5.x, r0.w
-max.f r0.y, r0.y, c10.y
-mov.f32f32 r6.z, r2.y
-mov.f32f32 r5.w, r3.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r1.w, c4.z
-add.f r1.w, c13.y, (neg)r3.z
-sam.s (f32)(x)r2.y, r4.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-min.f r0.y, r0.y, c10.z
-sam.s (f32)(x)r3.w, r6.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-add.f r4.y, c11.y, (neg)r0.z
-(ss)add.f r4.z, c13.y, (neg)r0.y
-add.f r4.w, c13.y, (neg)r0.y
-add.f r5.x, c13.y, (neg)r0.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c7.z
-mul.f r4.w, r4.w, c7.y
-mul.f r5.x, r5.x, c7.x
-mul.f r5.y, r2.z, r4.y
-mov.f32f32 r7.y, r0.x
-mul.f r0.x, r3.z, c10.z
-mul.f r1.w, r1.w, c4.y
-mul.f r2.y, r5.y, r2.y
-add.f r1.z, r1.z, c10.z
-add.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.z, c10.z
-sam.s (f32)(x)r5.y, r6.w, s#2, t#2
-(sy)mov.f32f32 r5.y, r5.y
-mul.f r4.y, r1.z, r4.y
-add.f r5.z, c13.y, (neg)r3.z
-mul.f r0.x, r3.y, r0.x
-add.f r0.w, r0.w, r1.w
-mad.f32 r1.w, r4.y, r4.x, r2.y
-mul.f r2.y, r5.z, c4.x
-mov.f32f32 r0.x, r0.x
-bary.f r3.y, 2, r1.x
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r0.z, c10.z
-mul.f r0.w, r3.x, r0.w
-mul.f r3.x, r3.z, c10.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.z, r2.z, r0.z
-mul.f r3.y, r0.x, r3.y
-mov.f32f32 r0.w, r0.w
-add.f r2.y, r3.x, r2.y
-mad.f32 r1.w, r2.z, r5.y, r1.w
-bary.f r2.z, 1, r1.x
-mov.f32f32 r4.x, r2.x
-mul.f r2.x, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.z, r1.z, r0.z
-mul.f r1.z, r0.w, r2.z
-mov.f32f32 r2.x, r2.x
-nop
-mad.f32 r0.z, r0.z, r3.w, r1.w
-bary.f r1.w, 0, r1.x
-bary.f (ei)r1.x, 9, r1.x
-mov.f32f32 r1.y, c10.z
-mov.f32f32 r0.z, r0.z
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r1.y
-mul.f r0.z, c11.w, r0.z
-mov.f32f32 r1.y, c10.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r1.x
+add.f r0.z, c11.x, r0.z
+mov.f32f32 r4.y, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c11.z, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c11.x, r4.y
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c13.y, (neg)r0.y
+add.f r5.y, r0.z, c10.w
+add.f r0.z, c11.y, (neg)r1.w
+mul.f r0.x, r0.x, c10.z
+add.f r0.w, c13.y, (neg)r4.x
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c8.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-mul.f r1.x, r3.y, r0.z
-mul.f r1.z, r1.z, r0.z
-mul.f r0.z, r1.w, r0.z
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.x, c6.z, r0.x, r1.x
-mad.f32 r0.w, c6.y, r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-sam (f32)(w)r3.y, r4.x, s#1, t#1
-(sy)cmps.f.lt r1.x, r4.x, c12.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c6.x, r2.x, r0.z
-cov.u32f32 r1.x, r1.x
-mul.f r0.x, r0.y, r0.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r1.x, r1.x, c10.y
-add.f r0.x, r0.x, r4.z
-mov.f32f32 r1.z, r4.x
-add.f r0.w, r0.w, r4.w
-mul.f r0.y, r0.y, r0.z
-nop
-mov.f32f32 r0.z, r1.z
-(rpt2)nop
-sel.b32 r0.z, r1.y, r1.x, r0.z
-add.f r0.y, r0.y, r5.x
-(rpt1)nop
-mul.f r0.x, r0.x, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
+mul.f r0.w, r0.w, c4.z
+add.f r1.w, c13.y, (neg)r4.x
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c10.y
+sam.s (f32)(x)r4.y, r5.z, s#2, t#2
+mul.f r4.z, r4.x, c10.z
+mul.f r1.w, r1.w, c4.y
+(sy)mul.f r0.y, r0.y, r7.x
+add.f r2.x, r2.x, c10.z
+min.f r0.x, r0.x, c10.z
+add.f r0.w, r4.z, r0.w
+mul.f r4.z, r4.x, c10.z
+mul.f r0.z, r2.x, r0.z
+(ss)add.f r4.w, c13.y, (neg)r0.x
+add.f r5.x, c13.y, (neg)r0.x
+add.f r5.y, c13.y, (neg)r0.x
+mad.f32 r0.y, r0.z, r7.y, r0.y
+add.f r0.z, r2.y, c10.z
+mul.f r2.y, r4.w, c7.z
+mul.f r4.w, r5.x, c7.y
+mul.f r5.x, r5.y, c7.x
+mul.f r3.x, r3.x, r0.z
+add.f r1.w, r4.z, r1.w
+mul.f r1.z, r1.z, c4.x
+mul.f r0.w, r3.w, r0.w
+mad.f32 r0.y, r3.x, r4.y, r0.y
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, r4.x, c10.z
+mov.f32f32 r3.x, r0.w
+bary.f r3.w, 2, r1.x
+mad.f32 r0.y, r0.z, r7.z, r0.y
+mul.f r0.z, r3.z, r1.w
+add.f r1.z, r2.x, r1.z
+mul.f r1.w, r3.x, r3.w
+mul.f r0.y, c11.w, r0.y
+mov.f32f32 r2.x, r0.z
+mul.f r1.z, r3.y, r1.z
+bary.f r3.x, 1, r1.x
+mov.f32f32 r3.y, r0.y
+sam (f32)(w)r3.z, r2.z, s#1, t#1
+(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y
+mov.f32f32 r3.z, r1.z
+mul.f r2.x, r2.x, r3.x
+mul.f r1.w, r1.w, r3.y
+bary.f (ei)r1.x, 0, r1.x
+mad.f32 r0.w, c6.z, r0.w, r1.w
+mul.f r1.y, r2.x, r3.y
+cov.u32f32 r1.w, r2.z
+mov.f32f32 r2.w, c10.z
+mul.f r0.w, r0.x, r0.w
+mad.f32 r0.z, c6.y, r0.z, r1.y
+mul.f r1.x, r3.z, r1.x
+cmps.f.ne r1.y, r1.w, c10.y
+add.f r0.w, r0.w, r2.y
+mov.f32f32 r1.w, c10.y
+mul.f r0.z, r0.x, r0.z
+mul.f r0.y, r1.x, r0.y
nop
-mul.f r0.x, r0.x, c5.z
-mul.f r0.z, r0.w, c5.y
-mul.f r0.y, r0.y, c5.x
+sel.b32 r1.x, r1.w, r1.y, r4.y
+add.f r0.z, r0.z, r4.w
+mad.f32 r0.y, c6.x, r1.z, r0.y
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+mul.f r0.w, r0.w, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.w, c5.z
+mul.f r2.y, r0.z, c5.y
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+add.f r0.x, r0.x, r5.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.w, r0.x
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r2.y, r0.y
-end
nop
-; FRAG: outputs: r2.y (1:0)
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 212 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r2.y
-; fragcoord: r0.x
+; FRAG: 149 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm
index 78028ad..5b14079 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-49.asm
@@ -1,103 +1,80 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r5.x) out0
-@out(r5.y) out1
-@out(r5.z) out2
-@out(r5.w) out3
-@out(r4.x) out4
-@out(r4.y) out5
-@out(r4.z) out6
-@out(r4.w) out7
-@out(r3.x) out8
-@out(r3.y) out9
-@out(r3.z) out10
-@out(r3.w) out11
-@out(r1.w) out12
-@out(r2.x) out13
-@out(r2.y) out14
-@out(r2.z) out15
-(sy)(ss)mul.f r0.w, r0.w, (neg)c4.x
-mul.f r2.x, c7.w, r0.x
-mad.f32 r0.w, (neg)c4.y, r1.x, r0.w
-mad.f32 r1.x, c8.w, r0.y, r2.x
-mul.f r2.x, c7.z, r0.x
-mul.f r2.y, c7.y, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c9.w, r0.z, r1.x
-mad.f32 r0.w, (neg)c4.z, r1.y, r0.w
-mad.f32 r1.y, c8.z, r0.y, r2.x
-mad.f32 r2.x, c8.y, r0.y, r2.y
-mul.f r2.y, c7.x, r0.x
-max.f r0.w, c12.x, r0.w
-add.f r1.x, r1.x, c10.w
-mad.f32 r1.y, c9.z, r0.z, r1.y
-mad.f32 r2.x, c9.y, r0.z, r2.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-add.f r1.y, r1.y, c10.z
-add.f r2.x, r2.x, c10.y
-mul.f r2.z, r0.w, c5.z
-mul.f r2.w, r0.w, c5.y
-mul.f r0.w, r0.w, c5.x
-mov.f32f32 r3.w, r1.x
-mov.f32f32 r1.x, r2.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.w, r0.w
-nop
-mov.f32f32 r4.z, r1.x
-mov.f32f32 r4.y, r2.z
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r0.w, r1.y
-mul.f r1.x, r2.x, c11.y
-mad.f32 r1.y, c8.x, r0.y, r2.y
-mul.f r2.x, c0.w, r0.x
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r3.y, r1.x
-mad.f32 r0.w, c9.x, r0.z, r1.y
-mad.f32 r1.x, c1.w, r0.y, r2.x
-mul.f r1.y, c0.z, r0.x
-mul.f r2.x, c0.y, r0.x
-add.f r0.w, r0.w, c10.x
-mad.f32 r1.x, c2.w, r0.z, r1.x
-mad.f32 r1.y, c1.z, r0.y, r1.y
-mad.f32 r2.x, c1.y, r0.y, r2.x
-mul.f r0.w, r0.w, c11.x
-add.f r1.x, r1.x, c3.w
-mad.f32 r1.y, c2.z, r0.z, r1.y
-mad.f32 r2.x, c2.y, r0.z, r2.x
-mov.f32f32 r3.x, r0.w
-mov.f32f32 r5.w, r1.x
-add.f r0.w, r1.y, c3.z
-add.f r1.x, r2.x, c3.y
-mul.f r1.y, c0.x, r0.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.z, r0.w
-mov.f32f32 r5.y, r1.x
-mad.f32 r0.y, c1.x, r0.y, r1.y
-mov.f32f32 r2.z, r1.w
-mad.f32 r0.y, c2.x, r0.z, r0.y
-mov.f32f32 r0.w, r1.z
-mad.f32 r0.z, c6.x, r0.z, c6.y
-mad.f32 r0.x, c6.x, r0.x, c6.y
-add.f r0.y, r0.y, c3.x
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r5.x, r0.y
-mov.f32f32 r0.x, (0.000000)
-(rpt2)nop
-mov.f32f32 r4.w, r0.x
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r0.x) in4
+@in(r0.y) in5
+@in(r0.z) in6
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.x, r0.x, (neg)c4.x
+mul.f r0.w, c7.y, r4.x
+mad.f32 r0.x, (neg)c4.y, r0.y, r0.x
+mad.f32 r0.y, c8.y, r4.y, r0.w
+mad.f32 r0.x, (neg)c4.z, r0.z, r0.x
+mad.f32 r0.y, c9.y, r4.z, r0.y
+mul.f r0.z, c7.x, r4.x
+mul.f r0.w, c7.w, r4.x
+max.f r0.x, c12.x, r0.x
+add.f r0.y, r0.y, c10.y
+mad.f32 r0.z, c8.x, r4.y, r0.z
+mad.f32 r0.w, c8.w, r4.y, r0.w
+mov.f32f32 r1.y, r0.x
+mul.f r2.y, r0.y, c11.y
+mad.f32 r0.y, c9.x, r4.z, r0.z
+mul.f r1.x, r0.x, c5.x
+mul.f r1.z, r1.y, c5.z
+mul.f r1.y, r1.y, c5.y
+add.f r0.x, r0.y, c10.x
+mad.f32 r0.y, c9.w, r4.z, r0.w
+mul.f r0.z, c7.z, r4.x
+mul.f r0.w, c0.w, r4.x
+mul.f r2.x, r0.x, c11.x
+add.f r2.w, r0.y, c10.w
+mad.f32 r0.x, c8.z, r4.y, r0.z
+mad.f32 r0.y, c1.w, r4.y, r0.w
+mad.f32 r0.x, c9.z, r4.z, r0.x
+mad.f32 r0.y, c2.w, r4.z, r0.y
+mul.f r0.z, c0.z, r4.x
+mul.f r1.w, c0.y, r4.x
+add.f r2.z, r0.x, c10.z
+add.f r0.w, r0.y, c3.w
+mad.f32 r0.x, c1.z, r4.y, r0.z
+mad.f32 r0.y, c1.y, r4.y, r1.w
+mad.f32 r0.x, c2.z, r4.z, r0.x
+mad.f32 r0.y, c2.y, r4.z, r0.y
+mul.f r1.w, c0.x, r4.x
+mad.f32 r3.y, c6.x, r4.z, c6.y
+add.f r0.z, r0.x, c3.z
+add.f r0.y, r0.y, c3.y
+mad.f32 r0.x, c1.x, r4.y, r1.w
+mad.f32 r3.x, c6.x, r4.x, c6.y
+mad.f32 r0.x, c2.x, r4.z, r0.x
+mov.f32f32 r1.w, (0.000000)
+(rpt1)nop
+add.f r0.x, r0.x, c3.x
end
-; VERT: outputs: r5.x (0:0) r4.x (5:9) r3.x (5:10) r1.w (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 74 instructions, 0 half, 6 full
-; pos: r5.x
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r4.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 48 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm
index 6e58225..f1c05da 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-50.asm
@@ -4,222 +4,159 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r2.y) out0
-@out(r2.z) out1
-@out(r2.w) out2
-@out(r3.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c10.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c11.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c11.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c11.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 8, r1.x
+floor.f r3.x, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c10.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.y, r2.y
+bary.f r2.w, 9, r1.x
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c8.x
+add.f r2.y, r2.y, (neg)r3.y
+mov.f32f32 r3.x, r2.x
+sam (f32)(xyzw)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, c13.y, (neg)r4.x
+mul.f r0.z, r0.z, c8.x
+mov.f32f32 r1.w, r2.y
+mul.f r4.y, c10.x, r3.x
+add.f r3.x, c11.y, (neg)r3.x
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c10.x, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, r2.y, c8.x
-add.f r2.w, c11.y, (neg)r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c10.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r1.w
-mul.f r0.y, r2.y, r0.y
-sam (f32)(xyzw)r2.w, r3.x, s#0, t#0
-(sy)add.f r1.w, c13.y, (neg)r3.z
-add.f r2.y, c11.x, r0.x
+add.f r0.x, r0.x, (neg)r4.y
+mul.f r4.y, c10.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r4.y
add.f r0.x, c11.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c12.x
-add.f r3.w, c11.z, r0.w
-mul.f r2.y, r2.y, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.x
-add.f r0.w, c11.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c13.y, (neg)r0.y
-mov.f32f32 r6.x, r4.x
-mul.f r3.w, r3.w, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c8.y
-mul.f r0.y, r0.y, c10.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.w, r2.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r4.w, r4.y
-bary.f r3.w, 6, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r5.z, r0.x
-add.f r0.x, r3.w, c10.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r5.x, r0.w
-max.f r0.y, r0.y, c10.y
-mov.f32f32 r6.z, r2.y
-mov.f32f32 r5.w, r3.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r1.w, c4.z
-add.f r1.w, c13.y, (neg)r3.z
-sam.s (f32)(x)r2.y, r4.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-min.f r0.y, r0.y, c10.z
-sam.s (f32)(x)r3.w, r6.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-add.f r4.y, c11.y, (neg)r0.z
-(ss)add.f r4.z, c13.y, (neg)r0.y
-add.f r4.w, c13.y, (neg)r0.y
-add.f r5.x, c13.y, (neg)r0.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c7.z
-mul.f r4.w, r4.w, c7.y
-mul.f r5.x, r5.x, c7.x
-mul.f r5.y, r2.z, r4.y
-mov.f32f32 r7.y, r0.x
-mul.f r0.x, r3.z, c10.z
-mul.f r1.w, r1.w, c4.y
-mul.f r2.y, r5.y, r2.y
-add.f r1.z, r1.z, c10.z
-add.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.z, c10.z
-sam.s (f32)(x)r5.y, r6.w, s#2, t#2
-(sy)mov.f32f32 r5.y, r5.y
-mul.f r4.y, r1.z, r4.y
-add.f r5.z, c13.y, (neg)r3.z
-mul.f r0.x, r3.y, r0.x
-add.f r0.w, r0.w, r1.w
-mad.f32 r1.w, r4.y, r4.x, r2.y
-mul.f r2.y, r5.z, c4.x
-mov.f32f32 r0.x, r0.x
-bary.f r3.y, 2, r1.x
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r0.z, c10.z
-mul.f r0.w, r3.x, r0.w
-mul.f r3.x, r3.z, c10.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.z, r2.z, r0.z
-mul.f r3.y, r0.x, r3.y
-mov.f32f32 r0.w, r0.w
-add.f r2.y, r3.x, r2.y
-mad.f32 r1.w, r2.z, r5.y, r1.w
-bary.f r2.z, 1, r1.x
-mov.f32f32 r4.x, r2.x
-mul.f r2.x, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.z, r1.z, r0.z
-mul.f r1.z, r0.w, r2.z
-mov.f32f32 r2.x, r2.x
-nop
-mad.f32 r0.z, r0.z, r3.w, r1.w
-bary.f r1.w, 0, r1.x
-bary.f (ei)r1.x, 9, r1.x
-mov.f32f32 r1.y, c10.z
-mov.f32f32 r0.z, r0.z
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r1.y
-mul.f r0.z, c11.w, r0.z
-mov.f32f32 r1.y, c10.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r1.x
+add.f r0.z, c11.x, r0.z
+mov.f32f32 r4.y, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c11.z, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c11.x, r4.y
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c13.y, (neg)r0.y
+add.f r5.y, r0.z, c10.w
+add.f r0.z, c11.y, (neg)r1.w
+mul.f r0.x, r0.x, c10.z
+add.f r0.w, c13.y, (neg)r4.x
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c8.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-mul.f r1.x, r3.y, r0.z
-mul.f r1.z, r1.z, r0.z
-mul.f r0.z, r1.w, r0.z
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.x, c6.z, r0.x, r1.x
-mad.f32 r0.w, c6.y, r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-sam (f32)(w)r3.y, r4.x, s#1, t#1
-(sy)cmps.f.lt r1.x, r4.x, c12.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c6.x, r2.x, r0.z
-cov.u32f32 r1.x, r1.x
-mul.f r0.x, r0.y, r0.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r1.x, r1.x, c10.y
-add.f r0.x, r0.x, r4.z
-mov.f32f32 r1.z, r4.x
-add.f r0.w, r0.w, r4.w
-mul.f r0.y, r0.y, r0.z
-nop
-mov.f32f32 r0.z, r1.z
-(rpt2)nop
-sel.b32 r0.z, r1.y, r1.x, r0.z
-add.f r0.y, r0.y, r5.x
-(rpt1)nop
-mul.f r0.x, r0.x, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
+mul.f r0.w, r0.w, c4.z
+add.f r1.w, c13.y, (neg)r4.x
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c10.y
+sam.s (f32)(x)r4.y, r5.z, s#2, t#2
+mul.f r4.z, r4.x, c10.z
+mul.f r1.w, r1.w, c4.y
+(sy)mul.f r0.y, r0.y, r7.x
+add.f r2.x, r2.x, c10.z
+min.f r0.x, r0.x, c10.z
+add.f r0.w, r4.z, r0.w
+mul.f r4.z, r4.x, c10.z
+mul.f r0.z, r2.x, r0.z
+(ss)add.f r4.w, c13.y, (neg)r0.x
+add.f r5.x, c13.y, (neg)r0.x
+add.f r5.y, c13.y, (neg)r0.x
+mad.f32 r0.y, r0.z, r7.y, r0.y
+add.f r0.z, r2.y, c10.z
+mul.f r2.y, r4.w, c7.z
+mul.f r4.w, r5.x, c7.y
+mul.f r5.x, r5.y, c7.x
+mul.f r3.x, r3.x, r0.z
+add.f r1.w, r4.z, r1.w
+mul.f r1.z, r1.z, c4.x
+mul.f r0.w, r3.w, r0.w
+mad.f32 r0.y, r3.x, r4.y, r0.y
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, r4.x, c10.z
+mov.f32f32 r3.x, r0.w
+bary.f r3.w, 2, r1.x
+mad.f32 r0.y, r0.z, r7.z, r0.y
+mul.f r0.z, r3.z, r1.w
+add.f r1.z, r2.x, r1.z
+mul.f r1.w, r3.x, r3.w
+mul.f r0.y, c11.w, r0.y
+mov.f32f32 r2.x, r0.z
+mul.f r1.z, r3.y, r1.z
+bary.f r3.x, 1, r1.x
+mov.f32f32 r3.y, r0.y
+sam (f32)(w)r3.z, r2.z, s#1, t#1
+(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y
+mov.f32f32 r3.z, r1.z
+mul.f r2.x, r2.x, r3.x
+mul.f r1.w, r1.w, r3.y
+bary.f (ei)r1.x, 0, r1.x
+mad.f32 r0.w, c6.z, r0.w, r1.w
+mul.f r1.y, r2.x, r3.y
+cov.u32f32 r1.w, r2.z
+mov.f32f32 r2.w, c10.z
+mul.f r0.w, r0.x, r0.w
+mad.f32 r0.z, c6.y, r0.z, r1.y
+mul.f r1.x, r3.z, r1.x
+cmps.f.ne r1.y, r1.w, c10.y
+add.f r0.w, r0.w, r2.y
+mov.f32f32 r1.w, c10.y
+mul.f r0.z, r0.x, r0.z
+mul.f r0.y, r1.x, r0.y
nop
-mul.f r0.x, r0.x, c5.z
-mul.f r0.z, r0.w, c5.y
-mul.f r0.y, r0.y, c5.x
+sel.b32 r1.x, r1.w, r1.y, r4.y
+add.f r0.z, r0.z, r4.w
+mad.f32 r0.y, c6.x, r1.z, r0.y
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+mul.f r0.w, r0.w, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.w, c5.z
+mul.f r2.y, r0.z, c5.y
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+add.f r0.x, r0.x, r5.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.w, r0.x
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r2.y, r0.y
-end
nop
-; FRAG: outputs: r2.y (1:0)
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 212 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r2.y
-; fragcoord: r0.x
+; FRAG: 149 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm
index 0e4d5ee..9c8ac11 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-51.asm
@@ -6,134 +6,99 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r6.y) out0
-@out(r6.z) out1
-@out(r6.w) out2
-@out(r7.x) out3
-@out(r5.y) out4
-@out(r5.z) out5
-@out(r5.w) out6
-@out(r6.x) out7
-@out(r2.w) out8
-@out(r3.x) out9
-@out(r3.y) out10
-@out(r3.z) out11
-@out(r3.w) out12
-@out(r4.x) out13
-@out(r4.y) out14
-@out(r4.z) out15
-(sy)(ss)mul.f r2.x, c11.x, r0.w
-mul.f r2.y, c11.x, r0.x
-mad.f32 r2.x, c12.x, r1.x, r2.x
-mad.f32 r2.y, c12.x, r0.y, r2.y
-mul.f r2.z, c11.z, r0.x
-mad.f32 r2.y, c13.x, r0.z, r2.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.z, c12.z, r0.y, r2.z
-mad.f32 r2.x, c13.x, r1.y, r2.x
-add.f r2.y, r2.y, c14.x
-mad.f32 r2.z, c13.z, r0.z, r2.z
-mul.f r2.w, c11.y, r0.w
-mov.f32f32 r2.x, r2.x
-mul.f r3.x, c7.w, r2.y
-mul.f r3.y, c7.z, r2.y
-mul.f r3.z, c7.y, r2.y
-mul.f r2.x, r2.x, (neg)c4.x
-mad.f32 r2.w, c12.y, r1.x, r2.w
-mul.f r3.w, c11.y, r0.x
-mul.f r4.x, c7.x, r2.y
-mad.f32 r3.w, c12.y, r0.y, r3.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r3.w, c13.y, r0.z, r3.w
-mad.f32 r2.w, c13.y, r1.y, r2.w
-mul.f r4.y, c0.w, r2.y
-mul.f r4.z, c0.z, r2.y
-mul.f r4.w, c0.y, r2.y
-mov.f32f32 r2.w, r2.w
-add.f r3.w, r3.w, c14.y
-mul.f r5.x, c0.x, r2.y
-add.f r2.z, r2.z, c14.z
-mad.f32 r2.x, (neg)c4.y, r2.w, r2.x
-mad.f32 r2.w, c8.w, r3.w, r3.x
-mad.f32 r3.x, c8.z, r3.w, r3.y
-mad.f32 r3.y, c8.y, r3.w, r3.z
-mov.f32f32 r2.x, r2.x
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r1.z, c11.x, r0.w
+mul.f r1.w, c11.x, r0.x
+mad.f32 r1.z, c12.x, r1.x, r1.z
+mad.f32 r1.w, c12.x, r0.y, r1.w
+mad.f32 r1.z, c13.x, r1.y, r1.z
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
+mul.f r1.z, r1.z, (neg)c4.x
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.w, c9.w, r2.z, r2.w
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r2.z, r3.x
-mad.f32 r3.x, c9.y, r2.z, r3.y
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.w, r4.x
-mad.f32 r0.w, c1.w, r3.w, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r2.z, r0.z
-mad.f32 r0.w, c2.w, r2.z, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r2.x
-mad.f32 r1.y, c10.w, r0.x, r2.w
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r2.x, c10.y, r0.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r2.x, r2.x, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r3.z, r1.y
-mov.f32f32 r3.y, r1.x
-mov.f32f32 r3.x, r2.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r1.x, c1.z, r3.w, r4.z
-mul.f r1.y, r0.y, c5.z
-mul.f r2.x, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.w, r1.y
-mov.f32f32 r5.z, r2.x
-mov.f32f32 r5.y, r0.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r7.x, r0.w
-mad.f32 r0.y, c2.z, r2.z, r1.x
-mad.f32 r0.z, c1.y, r3.w, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r0.z, c2.y, r2.z, r0.z
-mad.f32 r0.w, c1.x, r3.w, r5.x
-mad.f32 r1.x, c6.x, r2.z, c6.y
-mov.f32f32 r6.w, r0.y
-mad.f32 r0.y, c3.y, r0.x, r0.z
-mad.f32 r0.z, c2.x, r2.z, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r1.x, c6.x, r2.y, c6.y
-mov.f32f32 r6.z, r0.y
-mad.f32 r0.x, c3.x, r0.x, r0.z
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r0.y, r1.x
-nop
-mov.f32f32 r6.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r0.z, (0.000000)
-mov.f32f32 r4.z, r0.x
-nop
-mov.f32f32 r4.y, r0.y
-mov.f32f32 r6.x, r0.z
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
-; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 8 full
-; pos: r6.y
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm
index c6e09ad..593f290 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-52.asm
@@ -4,246 +4,167 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r5.y) out0
-@out(r5.z) out1
-@out(r5.w) out2
-@out(r6.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 8, r1.x
-bary.f r0.y, 4, r1.x
-add.f r0.w, r0.w, c12.y
+add.f r0.y, r0.w, c12.y
+bary.f r0.w, 4, r1.x
bary.f r1.z, 9, r1.x
add.f r1.w, r0.x, c13.x
-mul.f r2.x, r0.y, r0.y
-bary.f r2.y, 5, r1.x
-add.f r2.z, r1.z, c13.x
+bary.f r2.x, 12, r1.x
+mul.f r2.y, r0.w, r0.w
+bary.f r2.z, 5, r1.x
floor.f r2.w, r1.w
-rcp r0.w, r0.w
+rcp r0.y, r0.y
add.f r0.z, r0.z, c12.y
-mad.f32 r2.x, r2.y, r2.y, r2.x
-floor.f r3.x, r2.z
+add.f r3.x, r1.z, c13.x
+mad.f32 r2.y, r2.z, r2.z, r2.y
add.f r1.w, r1.w, (neg)r2.w
-(ss)mul.f r0.z, r0.z, r0.w
-(ss)mov.f32f32 r0.w, r2.x
-bary.f r2.x, 6, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.z, r0.z
-absneg.f r2.w, (neg)c8.x
-mad.f32 r0.w, r2.x, r2.x, r0.w
-mul.f r3.y, c12.x, r1.w
-add.f r2.z, r2.z, (neg)r3.x
-mul.f r2.w, r2.w, c8.x
-add.f r3.x, c13.y, (neg)r1.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.z, r2.z
-mul.f r2.w, r2.w, r0.z
-rsq r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-add.f r0.x, r0.x, (neg)r3.y
-mul.f r3.y, c12.x, r2.z
-mov.f32f32 r2.w, r2.w
-mul.f r0.y, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.y, r3.y
-mul.f r0.z, r2.w, r0.z
+(ss)mul.f r0.y, r0.z, r0.y
+absneg.f r0.z, (neg)c8.x
+bary.f r2.w, 6, r1.x
+mov.f32f32 r3.y, r1.w
+floor.f r3.z, r3.x
+mul.f r0.z, r0.z, c8.x
+mad.f32 r2.y, r2.w, r2.w, r2.y
+mul.f r3.w, c12.x, r3.y
+add.f r3.x, r3.x, (neg)r3.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-add.f r2.w, c13.x, r0.x
-bary.f r3.z, 16, r1.x
-mov.f32f32 r0.z, r0.z
+add.f r0.x, r0.x, (neg)r3.w
+mov.f32f32 r3.z, r3.x
+rsq r2.y, r2.y
+(ss)mov.f32f32 r3.w, r2.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+mul.f r4.x, c12.x, r3.z
add.f r0.x, c13.z, r0.x
-mov.f32f32 r2.w, r2.w
-mul.f r0.y, r0.y, r3.z
-mul.f r2.y, r2.y, r0.w
-mul.f r0.z, r0.z, c14.x
-mul.f r2.w, r2.w, c3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r2.w
-bary.f r3.w, 17, r1.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.x, r3.z
-add.f r1.z, r1.z, (neg)r3.y
-mad.f32 r0.y, r2.y, r3.w, r0.y
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r2.x, r0.w
-add.f r2.x, c15.y, (neg)r0.z
-add.f r3.y, c13.x, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.x, r2.x, c8.y
-mov.f32f32 r3.y, r3.y
-bary.f r3.z, 18, r1.x
-mul.f r0.z, r0.z, c12.z
-mov.f32f32 r4.w, r2.y
-mul.f r2.y, r3.y, c3.w
-mad.f32 r0.y, r0.w, r3.z, r0.y
-add.f r0.z, r0.z, r2.x
-add.f r0.w, c13.z, r1.z
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.y, r1.z
-bary.f r1.z, 10, r1.x
-max.f r0.y, c12.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, c3.w
-add.f r1.z, r1.z, c12.w
-mov.f32f32 r0.y, r0.y
-max.f r0.z, r0.z, c12.y
-mov.f32f32 r2.x, r0.w
-mov.f32f32 r3.y, r1.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.z, r3.y
-log2 r0.y, r0.y
-(ss)mul.f r0.y, c9.x, r0.y
-min.f r0.z, r0.z, c12.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r5.w, r0.w
-mov.f32f32 r0.x, r2.y
-sam.s (f32)(x)r0.w, r4.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-add.f r2.y, c15.y, (neg)r0.z
-add.f r2.w, c15.y, (neg)r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.x, r3.x
-add.f r3.z, c13.y, (neg)r2.z
-mul.f r2.y, r2.y, c7.z
-mul.f r2.w, r2.w, c7.y
-add.f r3.w, c15.y, (neg)r0.z
-(ss)mov.f32f32 r4.x, r3.z
+mul.f r0.w, r0.w, r3.w
+add.f r0.z, c13.x, r0.z
+mul.f r0.y, r0.y, c14.x
+bary.f r4.y, 16, r1.x
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r4.z, r0.z, c3.z
+mul.f r5.y, r0.x, c3.z
+mul.f r0.x, r0.w, r4.y
+add.f r0.z, c13.z, r1.z
+mov.f32f32 r6.x, r4.z
+mov.f32f32 r0.w, r1.z
exp2 r0.y, r0.y
-mov.f32f32 r3.z, c6.y
-mov.f32f32 r4.y, c6.x
-mov.f32f32 r4.z, c6.z
-mul.f r5.x, r3.x, r4.x
-mul.f r5.y, r3.z, c10.y
-mul.f r4.y, r4.y, c10.x
-mul.f r6.y, r3.w, c7.x
-mul.f r0.w, r5.x, r0.w
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r0.x, r1.z
-mul.f r4.z, r4.z, c10.z
-(ss)mul.f r5.x, r5.y, r0.y
-mul.f r4.y, r4.y, r0.y
-mov.f32f32 r3.w, r0.x
-mul.f r0.x, r4.z, r0.y
-(ss)mov.f32f32 r0.y, r5.x
-mov.f32f32 r4.y, r4.y
-bary.f r4.z, 14, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r2.x
-sam.s (f32)(x)r2.x, r3.y, s#2, t#2
-(sy)mov.f32f32 r2.x, r2.x
-add.f r1.w, r1.w, c12.z
-(ss)mov.f32f32 r3.y, r4.z
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r1.z, r1.z
-mul.f r3.w, r1.w, r4.x
-mov.f32f32 r6.z, r3.y
-bary.f r3.y, 15, r1.x
-mov.f32f32 r5.y, r3.z
-mad.f32 r0.w, r3.w, r2.x, r0.w
-mov.f32f32 r6.x, r1.z
-add.f r1.z, r2.z, c12.z
-bary.f r2.x, 12, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r3.y
-sam.s (f32)(x)r3.y, r4.w, s#2, t#2
-(sy)mov.f32f32 r3.y, r3.y
-mul.f r3.x, r3.x, r1.z
-sam.s (f32)(x)r3.z, r5.z, s#2, t#2
-(sy)mov.f32f32 r3.z, r3.z
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r2.x, r2.x
-mul.f r1.z, r1.w, r1.z
-mad.f32 r0.w, r3.x, r3.z, r0.w
-bary.f r1.w, 13, r1.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, c12.z
-mov.f32f32 r0.w, r0.w
-(ss)nop
-sam (f32)(xyz)r4.z, r6.z, s#0, t#0
-bary.f r2.z, 2, r1.x
-mad.f32 r0.w, r1.z, r3.y, r0.w
-bary.f r1.z, 1, r1.x
-bary.f (ei)r1.x, 0, r1.x
-(sy)mad.f32 r0.x, r5.x, r2.z, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.y, r4.w, r1.z, r0.y
-mad.f32 r1.x, r4.z, r1.x, r4.y
+(ss)mov.f32f32 r1.z, r0.y
+mul.f r2.z, r2.z, r3.w
+bary.f r3.w, 17, r1.x
+add.f r0.w, c13.x, r0.w
+add.f r1.z, c15.y, (neg)r1.z
+mov.f32f32 r6.w, r5.y
+mul.f r5.z, r0.z, c3.w
+mul.f r7.x, r0.w, c3.w
+mul.f r0.z, r1.z, c8.y
+(ss)mul.f r0.y, r0.y, c12.z
+mad.f32 r0.x, r2.z, r3.w, r0.x
+mov.f32f32 r6.y, r7.x
+bary.f r0.w, 10, r1.x
+add.f r0.y, r0.y, r0.z
+mul.f r0.z, r2.w, r2.y
+bary.f r1.z, 18, r1.x
+add.f r5.w, r0.w, c12.w
+max.f r0.y, r0.y, c12.y
+mov.f32f32 r4.w, r5.z
+mad.f32 r0.x, r0.z, r1.z, r0.x
+mov.f32f32 r6.z, r5.w
+min.f r0.y, r0.y, c12.z
+mov.f32f32 r7.y, r5.w
+mov.f32f32 r5.x, r5.w
+max.f r0.x, c12.y, r0.x
+nop
+add.f r0.z, c15.y, (neg)r0.y
+sam.s (f32)(x)r7.z, r6.x, s#2, t#2
+add.f r0.w, c13.y, (neg)r3.y
+add.f r1.z, c15.y, (neg)r0.y
+add.f r2.y, c15.y, (neg)r0.y
+mul.f r0.z, r0.z, c7.z
+mov.f32f32 r2.z, r0.w
+add.f r2.w, c13.y, (neg)r3.z
+mul.f r1.z, r1.z, c7.y
+mul.f r3.y, r2.y, c7.x
+log2 r0.x, r0.x
+(ss)mul.f r0.x, c9.x, r0.x
+mov.f32f32 r2.y, r2.w
+sam.s (f32)(x)r3.z, r6.w, s#2, t#2
nop
+sam.s (f32)(x)r3.w, r4.z, s#2, t#2
+sam.s (f32)(x)r4.x, r5.y, s#2, t#2
+add.f r1.w, r1.w, c12.z
+add.f r3.x, r3.x, c12.z
+mul.f r2.z, r2.z, r2.y
+bary.f r2.y, 13, r1.x
+mov.f32f32 r4.y, c6.z
+(ss)bary.f r4.z, 14, r1.x
+(sy)mul.f r2.z, r2.z, r7.z
+mul.f r2.w, r1.w, r2.w
+exp2 r0.x, r0.x
+mov.f32f32 r4.w, c6.y
+mov.f32f32 r5.x, c6.x
+mul.f r4.y, r4.y, c10.z
+mad.f32 r2.z, r2.w, r3.z, r2.z
+mul.f r0.w, r0.w, r3.x
+mul.f r2.w, r4.w, c10.y
+mul.f r3.z, r5.x, c10.x
+(ss)mul.f r4.y, r4.y, r0.x
+mad.f32 r0.w, r0.w, r3.w, r2.z
+mul.f r1.w, r1.w, r3.x
+bary.f r4.w, 15, r1.x
+mul.f r2.z, r2.w, r0.x
+(ss)mul.f r0.x, r3.z, r0.x
+mad.f32 r0.w, r1.w, r4.x, r0.w
+sam (f32)(w)r5.x, r2.x, s#1, t#1
+(sy)cmps.f.lt r1.w, r5.w, c14.y
+mov.f32f32 r2.w, c12.z
+(ss)bary.f r2.x, 2, r1.x
mul.f r0.w, c13.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r6.x, r2.x
-mov.f32f32 r1.z, c12.y
+sam (f32)(xyz)r4.z, r4.z, s#0, t#0
+bary.f r2.y, 0, r1.x
+bary.f (ei)r1.x, 1, r1.x
+cov.u32f32 r1.y, r1.w
+mov.f32f32 r1.w, r0.w
+(sy)mad.f32 r2.x, r5.x, r2.x, r4.y
+mad.f32 r1.x, r4.w, r1.x, r2.z
+mad.f32 r0.x, r4.z, r2.y, r0.x
+cmps.f.ne r1.y, r1.y, c12.y
+mul.f r2.x, r2.x, r1.w
+mul.f r1.x, r1.x, r1.w
+mad.f32 r1.w, c5.z, r5.x, r2.x
+mad.f32 r1.x, c5.y, r4.w, r1.x
mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.w, r1.x, r0.w
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c5.z, r5.x, r0.x
-mad.f32 r0.y, c5.y, r4.w, r0.y
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r0.w, c12.y
+mul.f r1.w, r0.y, r1.w
+mul.f r1.x, r0.y, r1.x
+mad.f32 r0.x, c5.x, r4.z, r0.x
+sel.b32 r0.w, r0.w, r1.y, r5.w
+add.f r0.z, r1.w, r0.z
+add.f r1.x, r1.x, r1.z
+mul.f r0.x, r0.y, r0.x
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, c5.x, r4.z, r0.w
-sam (f32)(w)r3.x, r3.z, s#1, t#1
-(sy)cmps.f.lt r1.x, r3.w, c14.y
-mul.f r0.x, r0.z, r0.x
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r0.w, r0.w
-cov.u32f32 r1.x, r1.x
-add.f r0.x, r0.x, r2.y
-add.f r0.y, r0.y, r2.w
-mul.f r0.z, r0.z, r0.w
-cmps.f.ne r0.w, r1.x, c12.y
-mov.f32f32 r1.x, r3.w
-(rpt2)nop
-mov.f32f32 r1.x, r1.x
-add.f r0.z, r0.z, r6.y
+mul.f r0.y, r0.z, r0.w
+mul.f r0.z, r1.x, r0.w
(rpt1)nop
-sel.b32 r0.w, r1.z, r0.w, r1.x
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.z, c4.y
+add.f r0.x, r0.x, r3.y
(rpt2)nop
mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.z, r0.z, r0.w
-nop
-mul.f r0.x, r0.x, c4.z
-mul.f r0.y, r0.y, c4.y
-mul.f r0.z, r0.z, c4.x
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
nop
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r5.z, r0.y
-mov.f32f32 r5.y, r0.z
-end
-; FRAG: outputs: r5.y (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
-; FRAG: 237 instructions, 0 half, 7 full
-; pos (bary): r1.x
-; color: r5.y
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r4.x (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
+; FRAG: 155 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm
index 9f1027e..e7bcae3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-53.asm
@@ -6,186 +6,139 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-@out(r7.y) out4
-@out(r7.z) out5
-@out(r7.w) out6
-@out(r8.x) out7
+@in(r4.z) in8
+@in(r4.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
@out(r2.x) out8
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-@out(r8.y) out12
-@out(r8.z) out13
-@out(r8.w) out14
-@out(r9.x) out15
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
@out(r4.x) out16
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
-@out(r6.y) out20
-@out(r6.z) out21
-@out(r6.w) out22
-@out(r7.x) out23
-(sy)(ss)mul.f r2.x, c12.x, r0.x
-mul.f r2.y, c12.x, r0.w
-mad.f32 r2.x, c13.x, r0.y, r2.x
-mad.f32 r2.y, c13.x, r1.x, r2.y
-mad.f32 r2.x, c14.x, r0.z, r2.x
-mul.f r2.z, c12.z, r0.x
-mul.f r2.w, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r2.x, r2.x, c15.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r2.z, c13.z, r0.y, r2.z
-mad.f32 r2.w, c13.z, r1.x, r2.w
-add.f r3.x, c4.x, (neg)r2.x
-mad.f32 r2.y, c14.x, r1.y, r2.y
-mul.f r3.y, c8.w, r2.x
-mul.f r3.z, c8.z, r2.x
-mul.f r3.w, r3.x, r3.x
-mul.f r4.x, c12.y, r0.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.x, c13.y, r0.y, r4.x
-absneg.f r4.y, (neg)c5.x
-mad.f32 r4.x, c14.y, r0.z, r4.x
-mul.f r4.z, c8.y, r2.x
-mul.f r4.w, c8.x, r2.x
-mul.f r5.x, r2.y, r4.y
-add.f r4.x, r4.x, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r2.x
-mul.f r5.y, c0.z, r2.x
-add.f r5.z, c4.y, (neg)r4.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r3.y, c9.w, r4.x, r3.y
-mad.f32 r3.z, c9.z, r4.x, r3.z
-mad.f32 r3.w, r5.z, r5.z, r3.w
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.z, c14.z, r0.z, r2.z
-mad.f32 r4.z, c9.y, r4.x, r4.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.w, r0.w
-add.f r2.z, r2.z, c15.z
-absneg.f r5.w, (neg)c5.y
-mad.f32 r4.w, c9.x, r4.x, r4.w
-mad.f32 r1.x, c1.w, r4.x, r1.x
-add.f r6.x, c4.z, (neg)r2.z
-mad.f32 r5.x, r0.w, r5.w, r5.x
-mad.f32 r3.y, c10.w, r2.z, r3.y
-mad.f32 r3.z, c10.z, r2.z, r3.z
-mad.f32 r3.w, r6.x, r6.x, r3.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.w, r2.w
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r1.z, c12.x, r0.x
+mul.f r1.w, c12.x, r0.w
+mad.f32 r1.z, c13.x, r0.y, r1.z
+mad.f32 r1.w, c13.x, r1.x, r1.w
+mad.f32 r1.z, c14.x, r0.z, r1.z
+mad.f32 r1.w, c14.x, r1.y, r1.w
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
+add.f r5.y, c4.y, (neg)r5.w
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.z, c10.y, r2.z, r4.z
-mad.f32 r4.w, c10.x, r2.z, r4.w
-mad.f32 r1.y, c14.z, r1.y, r2.w
-rsq r2.w, r3.w
-(ss)mov.f32f32 r2.w, r2.w
-(ss)absneg.f r3.w, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r0.y, r1.y
-mad.f32 r1.y, r3.x, r2.w, r4.y
-mad.f32 r3.x, r5.z, r2.w, r5.w
-mad.f32 r2.w, r6.x, r2.w, r3.w
-mad.f32 r3.w, r0.y, r3.w, r5.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.w, r3.w
-mul.f r4.y, r1.y, r1.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r3.x, r3.x, r4.y
-max.f r3.w, c17.x, r3.w
-mad.f32 r1.x, c2.w, r2.z, r1.x
-mad.f32 r4.y, c1.z, r4.x, r5.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.z, r2.w, r2.w, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r4.y, c2.z, r2.z, r4.y
-mul.f r5.x, c0.y, r2.x
-mul.f r5.y, c0.x, r2.x
-mad.f32 r5.z, c7.x, r2.z, c7.y
-mad.f32 r2.x, c7.x, r2.x, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r5.w, r3.w, c6.z
-mul.f r6.x, r3.w, c6.y
-mul.f r3.w, r3.w, c6.x
-mul.f r2.w, r2.w, r0.z
-mul.f r3.x, r3.x, r0.z
-mul.f r0.z, r1.y, r0.z
-mov.f32f32 r1.y, r5.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r6.w, r2.w
-mov.f32f32 r6.z, r3.x
-mov.f32f32 r6.y, r0.z
-mov.f32f32 r7.w, r1.y
-mov.f32f32 r0.z, r6.x
-mov.f32f32 r1.y, r3.w
-(rpt1)nop
-mov.f32f32 r7.z, r0.z
-mov.f32f32 r7.y, r1.y
-mad.f32 r0.z, c11.w, r0.x, r3.y
-mad.f32 r1.y, c11.z, r0.x, r3.z
-mad.f32 r2.w, c11.y, r0.x, r4.z
-mad.f32 r3.x, c11.x, r0.x, r4.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.w, r2.w, c16.y
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
mul.f r3.x, r3.x, c16.x
-mov.f32f32 r9.x, r0.z
-mov.f32f32 r8.w, r1.y
-mov.f32f32 r8.z, r2.w
-mov.f32f32 r8.y, r3.x
-mad.f32 r0.z, c3.w, r0.x, r1.x
-mad.f32 r1.x, c3.z, r0.x, r4.y
-mad.f32 r1.y, c1.y, r4.x, r5.x
-mad.f32 r2.w, c1.x, r4.x, r5.y
-mov.f32f32 r3.w, r0.z
-mov.f32f32 r3.z, r1.x
-mad.f32 r0.z, c2.y, r2.z, r1.y
-mad.f32 r1.x, c2.x, r2.z, r2.w
-mad.f32 r0.z, c3.y, r0.x, r0.z
-mad.f32 r0.x, c3.x, r0.x, r1.x
-mov.f32f32 r1.x, r5.z
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r3.x, r0.x
-mov.f32f32 r4.y, r1.x
-mov.f32f32 r4.x, r1.y
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r2.y
-nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.x, (0.000000)
-mov.f32f32 r0.y, r1.w
-mov.f32f32 r0.z, r1.z
-mov.f32f32 r0.w, (0.000000)
-mov.f32f32 r7.x, r0.x
-mov.f32f32 r4.w, r0.y
-mov.f32f32 r4.z, r0.z
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r0.x, (0.000000)
-(rpt2)nop
-mov.f32f32 r8.x, r0.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
+mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
+mad.f32 r0.x, c3.x, r0.x, r2.x
+mov.f32f32 r2.z, r6.y
+mov.f32f32 r2.y, r6.x
+mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
+mov.f32f32 r1.w, (0.000000)
end
nop
-; VERT: outputs: r3.x (0:0) r7.y (5:9) r2.x (5:10) r8.y (5:11) r4.x (5:12) r6.y (5:13)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 10 full
-; pos: r3.x
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm
index 4bf6e1b..d0ec086 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-54.asm
@@ -4,10 +4,14 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r4.w) out0
-@out(r5.x) out1
-@out(r5.y) out2
-@out(r5.z) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c12.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c13.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c14.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 8, r1.x
add.f r0.y, r0.w, c12.y
bary.f r0.w, 4, r1.x
@@ -23,227 +27,144 @@ add.f r3.x, r1.z, c13.x
mad.f32 r2.y, r2.z, r2.z, r2.y
add.f r1.w, r1.w, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-floor.f r0.z, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
+absneg.f r0.z, (neg)c9.x
+bary.f r2.w, 6, r1.x
+mov.f32f32 r3.y, r1.w
+floor.f r3.z, r3.x
+mul.f r0.z, r0.z, c9.x
+mad.f32 r2.y, r2.w, r2.w, r2.y
+mul.f r3.w, c12.x, r3.y
+add.f r3.x, r3.x, (neg)r3.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.w, (neg)c9.x
-bary.f r3.y, 6, r1.x
-mul.f r3.z, c12.x, r1.w
-add.f r0.z, r3.x, (neg)r0.z
-mul.f r2.w, r2.w, c9.x
-mad.f32 r2.y, r3.y, r3.y, r2.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.z, r0.z
-mul.f r2.w, r2.w, r0.y
-add.f r3.z, c13.y, (neg)r1.w
-add.f r0.x, r0.x, (neg)r3.x
-mul.f r3.x, c12.x, r0.z
-mov.f32f32 r2.w, r2.w
+add.f r0.x, r0.x, (neg)r3.w
+mov.f32f32 r3.z, r3.x
rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mul.f r0.y, r2.w, r0.y
-mul.f r0.w, r0.w, r2.y
-add.f r2.w, c13.x, r0.x
+(ss)mov.f32f32 r3.w, r2.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+mul.f r4.x, c12.x, r3.z
add.f r0.x, c13.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.w, r2.w
-bary.f r3.w, 16, r1.x
-mul.f r0.y, r0.y, c14.x
-mov.f32f32 r0.x, r0.x
-mul.f r2.w, r2.w, c4.z
mul.f r0.w, r0.w, r3.w
-mul.f r2.z, r2.z, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r2.w
-mul.f r0.x, r0.x, c4.z
-mov.f32f32 r2.z, r2.z
-bary.f r4.x, 17, r1.x
-mov.f32f32 r4.y, r3.w
-add.f r1.z, r1.z, (neg)r3.x
+add.f r0.z, c13.x, r0.z
+mul.f r0.y, r0.y, c14.x
+bary.f r4.y, 16, r1.x
+add.f r1.z, r1.z, (neg)r4.x
+mul.f r4.z, r0.z, c4.z
+mul.f r5.y, r0.x, c4.z
+mul.f r0.x, r0.w, r4.y
+add.f r0.z, c13.z, r1.z
+mov.f32f32 r6.x, r4.z
+mov.f32f32 r0.w, r1.z
exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, r2.z, r4.x, r0.w
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r1.z, r1.z
-add.f r3.x, c15.y, (neg)r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.y, r3.y, r2.y
-add.f r3.y, c13.x, r1.z
-mul.f r3.x, r3.x, c9.y
-mul.f r0.y, r0.y, c12.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.y, r3.y
-bary.f r3.w, 18, r1.x
-add.f r0.y, r0.y, r3.x
-mov.f32f32 r2.z, r2.z
-mul.f r3.x, r3.y, c4.w
-mad.f32 r0.w, r2.y, r3.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.x, r2.z
-mov.f32f32 r2.y, r3.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-add.f r1.z, c13.z, r1.z
-mov.f32f32 r4.z, r2.y
-bary.f r2.y, 10, r1.x
+(ss)mov.f32f32 r1.z, r0.y
+mul.f r2.z, r2.z, r3.w
+bary.f r3.w, 17, r1.x
+add.f r0.w, c13.x, r0.w
+add.f r1.z, c15.y, (neg)r1.z
+mov.f32f32 r6.w, r5.y
+mul.f r5.z, r0.z, c4.w
+mul.f r7.x, r0.w, c4.w
+mul.f r0.z, r1.z, c9.y
+(ss)mul.f r0.y, r0.y, c12.z
+mad.f32 r0.x, r2.z, r3.w, r0.x
+mov.f32f32 r6.y, r7.x
+bary.f r0.w, 10, r1.x
+add.f r0.y, r0.y, r0.z
+mul.f r0.z, r2.w, r2.y
+bary.f r1.z, 18, r1.x
+add.f r5.w, r0.w, c12.w
max.f r0.y, r0.y, c12.y
-max.f r0.w, c12.y, r0.w
-mov.f32f32 r1.z, r1.z
-add.f r2.y, r2.y, c12.w
+mov.f32f32 r4.w, r5.z
+mad.f32 r0.x, r0.z, r1.z, r0.x
+mov.f32f32 r6.z, r5.w
min.f r0.y, r0.y, c12.z
-mov.f32f32 r0.w, r0.w
-mul.f r1.z, r1.z, c4.w
-mov.f32f32 r2.z, r2.y
-add.f r3.y, c15.y, (neg)r0.y
-add.f r3.w, c15.y, (neg)r0.y
-add.f r4.x, c15.y, (neg)r0.y
-mov.f32f32 r4.w, r2.z
-mul.f r2.z, r3.y, c8.z
-mul.f r3.y, r3.w, c8.y
-mul.f r3.w, r4.x, c8.x
-log2 r0.w, r0.w
-(ss)mul.f r0.w, c10.y, r0.w
-mov.f32f32 r4.x, r1.z
-mov.f32f32 r0.x, r0.x
-sam.s (f32)(x)r4.y, r4.y, s#3, t#3
-(sy)(ss)mov.f32f32 r4.y, r4.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r0.x, r4.y
-mov.f32f32 r3.z, r3.z
-add.f r4.y, c13.y, (neg)r0.z
-mov.f32f32 r5.y, r4.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r4.y
-exp2 r0.w, r0.w
-bary.f r4.z, 14, r1.x
-mov.f32f32 r5.z, r4.x
-mov.f32f32 r6.x, r3.x
-mul.f r3.x, r3.z, r4.y
-mov.f32f32 r4.x, r4.z
-mov.f32f32 r4.w, r2.y
-mov.f32f32 r2.w, r2.w
-mul.f r0.x, r3.x, r0.x
-mov.f32f32 r6.z, r4.x
-mov.f32f32 r6.y, r4.w
-bary.f r3.x, 15, r1.x
-sam.s (f32)(x)r4.x, r5.x, s#3, t#3
+mov.f32f32 r7.y, r5.w
+mov.f32f32 r5.x, r5.w
+max.f r0.x, c12.y, r0.x
+nop
+add.f r0.z, c15.y, (neg)r0.y
+sam.s (f32)(x)r7.z, r6.x, s#3, t#3
+add.f r0.w, c13.y, (neg)r3.y
+add.f r1.z, c15.y, (neg)r0.y
+add.f r2.y, c15.y, (neg)r0.y
+mul.f r0.z, r0.z, c8.z
+mov.f32f32 r2.z, r0.w
+add.f r2.w, c13.y, (neg)r3.z
+mul.f r1.z, r1.z, c8.y
+mul.f r3.y, r2.y, c8.x
+log2 r0.x, r0.x
+(ss)mul.f r0.x, c10.y, r0.x
+mov.f32f32 r2.y, r2.w
+sam.s (f32)(x)r3.z, r6.w, s#3, t#3
nop
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r1.z, r1.z
+sam.s (f32)(x)r3.w, r4.z, s#3, t#3
+sam.s (f32)(x)r4.x, r5.y, s#3, t#3
add.f r1.w, r1.w, c12.z
-sam.s (f32)(x)r2.w, r5.w, s#3, t#3
-(sy)mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r5.y, r3.x
-mov.f32f32 r5.x, r1.z
-mul.f r1.z, r1.w, r4.y
-mov.f32f32 r2.y, r2.y
-add.f r0.z, r0.z, c12.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.x, r1.z, r2.w, r0.x
-mov.f32f32 r6.w, r5.y
-mov.f32f32 r5.y, r2.y
-mul.f r1.z, r3.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
+add.f r3.x, r3.x, c12.z
+mul.f r2.z, r2.z, r2.y
bary.f r2.y, 13, r1.x
-mul.f r0.z, r1.w, r0.z
-sam (f32)(xyz)r5.z, r6.z, s#2, t#2
-(sy)mul.f r1.w, c7.z, r6.x
-sam.s (f32)(x)r2.w, r4.w, s#3, t#3
-(sy)mov.f32f32 r2.w, r2.w
-mul.f r3.z, c7.y, r5.w
-mul.f r4.y, c7.x, r5.z
-(ss)mul.f r1.w, r1.w, r0.w
-mad.f32 r0.x, r1.z, r2.w, r0.x
-mov.f32f32 r1.z, r4.z
-mul.f r2.w, r3.z, r0.w
-mul.f r0.w, r4.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.y, r1.z
-mad.f32 r0.x, r0.z, r4.x, r0.x
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r3.x, c12.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, r0.z
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r5.z, r3.x
-mul.f r0.x, c13.w, r0.x
-bary.f r0.z, 2, r1.x
-bary.f r1.z, 1, r1.x
-mov.f32f32 r3.x, c12.y
-mov.f32f32 r0.x, r0.x
-sam (f32)(xyz)r5.w, r4.y, s#0, t#0
-(sy)mad.f32 r0.z, r6.y, r0.z, r1.w
-mad.f32 r1.z, r6.x, r1.z, r2.w
+bary.f r4.y, 14, r1.x
+(ss)bary.f r4.z, 15, r1.x
+(sy)mul.f r2.z, r2.z, r7.z
+mul.f r2.w, r1.w, r2.w
+exp2 r0.x, r0.x
+mov.f32f32 r4.w, r4.y
+mov.f32f32 r5.x, r4.z
+mul.f r0.w, r0.w, r3.x
+mad.f32 r2.z, r2.w, r3.z, r2.z
+mul.f r1.w, r1.w, r3.x
+sam (f32)(w)r5.y, r2.x, s#1, t#1
+(sy)(ss)cmps.f.lt r2.x, r6.x, c14.y
+mad.f32 r0.w, r0.w, r3.w, r2.z
+sam (f32)(xyz)r6.y, r4.y, s#0, t#0
+mov.f32f32 r2.w, c12.z
+mad.f32 r0.w, r1.w, r4.x, r0.w
+sam (f32)(xyz)r3.z, r4.w, s#2, t#2
+(sy)mul.f r1.w, c7.y, r3.w
+mul.f r2.y, c7.x, r3.z
+mul.f r2.z, c7.z, r4.x
+mul.f r0.w, c13.w, r0.w
+(ss)mul.f r1.w, r1.w, r0.x
+mul.f r2.y, r2.y, r0.x
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r2.z, r0.w
+bary.f r3.x, 2, r1.x
+bary.f r3.z, 1, r1.x
bary.f (ei)r1.x, 0, r1.x
-sam (f32)(w)r6.z, r2.x, s#1, t#1
-(sy)cmps.f.lt r1.y, r7.y, c14.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, r5.w, r1.x, r0.w
-cov.u32f32 r1.x, r1.y
-mul.f r0.z, r0.z, r0.x
-mul.f r1.y, r1.z, r0.x
-mov.f32f32 r0.w, r0.w
-cmps.f.ne r1.x, r1.x, c12.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.z, c6.z, r6.y, r0.z
-mad.f32 r1.y, c6.y, r6.x, r1.y
-mul.f r0.x, r0.w, r0.x
-mov.f32f32 r0.w, r7.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mul.f r0.z, r0.y, r0.z
-mul.f r1.y, r0.y, r1.y
-mad.f32 r0.x, c6.x, r5.w, r0.x
-sel.b32 r0.w, r3.x, r1.x, r0.w
-add.f r0.z, r0.z, r2.z
-add.f r1.x, r1.y, r3.y
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.z, r0.z, r0.w
-mul.f r1.x, r1.x, r0.w
+cov.u32f32 r1.y, r2.x
+mad.f32 r0.x, r6.w, r3.x, r0.x
+mad.f32 r1.w, r6.z, r3.z, r1.w
+mad.f32 r1.x, r6.y, r1.x, r2.y
+cmps.f.ne r1.y, r1.y, c12.y
+mul.f r0.x, r0.x, r2.z
+mul.f r1.w, r1.w, r2.z
+mad.f32 r0.x, c6.z, r6.w, r0.x
+mad.f32 r1.w, c6.y, r6.z, r1.w
+mul.f r0.w, r1.x, r0.w
+mov.f32f32 r1.x, c12.y
mul.f r0.x, r0.y, r0.x
+mul.f r1.w, r0.y, r1.w
+mad.f32 r0.w, c6.x, r6.y, r0.w
+sel.b32 r1.x, r1.x, r1.y, r6.x
+add.f r0.x, r0.x, r0.z
+add.f r0.z, r1.w, r1.z
+mul.f r0.y, r0.y, r0.w
nop
-mul.f r0.y, r0.z, c5.z
-mul.f r0.z, r1.x, c5.y
-add.f r0.x, r0.x, r3.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c5.x
-nop
-(ss)mov.f32f32 r5.y, r0.y
-mov.f32f32 r5.x, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.x, c5.z
+mul.f r2.y, r0.z, c5.y
+add.f r0.x, r0.y, r3.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r1.x
(rpt2)nop
-mov.f32f32 r4.w, r0.x
+mul.f r2.x, r0.x, c5.x
end
nop
nop
-nop
-; FRAG: outputs: r4.w (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
-; FRAG: 233 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r4.w
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r3.w (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
+; FRAG: 155 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm
index 9f1027e..e7bcae3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-55.asm
@@ -6,186 +6,139 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-@out(r7.y) out4
-@out(r7.z) out5
-@out(r7.w) out6
-@out(r8.x) out7
+@in(r4.z) in8
+@in(r4.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
@out(r2.x) out8
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-@out(r8.y) out12
-@out(r8.z) out13
-@out(r8.w) out14
-@out(r9.x) out15
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
@out(r4.x) out16
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
-@out(r6.y) out20
-@out(r6.z) out21
-@out(r6.w) out22
-@out(r7.x) out23
-(sy)(ss)mul.f r2.x, c12.x, r0.x
-mul.f r2.y, c12.x, r0.w
-mad.f32 r2.x, c13.x, r0.y, r2.x
-mad.f32 r2.y, c13.x, r1.x, r2.y
-mad.f32 r2.x, c14.x, r0.z, r2.x
-mul.f r2.z, c12.z, r0.x
-mul.f r2.w, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r2.x, r2.x, c15.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r2.z, c13.z, r0.y, r2.z
-mad.f32 r2.w, c13.z, r1.x, r2.w
-add.f r3.x, c4.x, (neg)r2.x
-mad.f32 r2.y, c14.x, r1.y, r2.y
-mul.f r3.y, c8.w, r2.x
-mul.f r3.z, c8.z, r2.x
-mul.f r3.w, r3.x, r3.x
-mul.f r4.x, c12.y, r0.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.x, c13.y, r0.y, r4.x
-absneg.f r4.y, (neg)c5.x
-mad.f32 r4.x, c14.y, r0.z, r4.x
-mul.f r4.z, c8.y, r2.x
-mul.f r4.w, c8.x, r2.x
-mul.f r5.x, r2.y, r4.y
-add.f r4.x, r4.x, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r2.x
-mul.f r5.y, c0.z, r2.x
-add.f r5.z, c4.y, (neg)r4.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r3.y, c9.w, r4.x, r3.y
-mad.f32 r3.z, c9.z, r4.x, r3.z
-mad.f32 r3.w, r5.z, r5.z, r3.w
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.z, c14.z, r0.z, r2.z
-mad.f32 r4.z, c9.y, r4.x, r4.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.w, r0.w
-add.f r2.z, r2.z, c15.z
-absneg.f r5.w, (neg)c5.y
-mad.f32 r4.w, c9.x, r4.x, r4.w
-mad.f32 r1.x, c1.w, r4.x, r1.x
-add.f r6.x, c4.z, (neg)r2.z
-mad.f32 r5.x, r0.w, r5.w, r5.x
-mad.f32 r3.y, c10.w, r2.z, r3.y
-mad.f32 r3.z, c10.z, r2.z, r3.z
-mad.f32 r3.w, r6.x, r6.x, r3.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.w, r2.w
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r1.z, c12.x, r0.x
+mul.f r1.w, c12.x, r0.w
+mad.f32 r1.z, c13.x, r0.y, r1.z
+mad.f32 r1.w, c13.x, r1.x, r1.w
+mad.f32 r1.z, c14.x, r0.z, r1.z
+mad.f32 r1.w, c14.x, r1.y, r1.w
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
+add.f r5.y, c4.y, (neg)r5.w
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.z, c10.y, r2.z, r4.z
-mad.f32 r4.w, c10.x, r2.z, r4.w
-mad.f32 r1.y, c14.z, r1.y, r2.w
-rsq r2.w, r3.w
-(ss)mov.f32f32 r2.w, r2.w
-(ss)absneg.f r3.w, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r0.y, r1.y
-mad.f32 r1.y, r3.x, r2.w, r4.y
-mad.f32 r3.x, r5.z, r2.w, r5.w
-mad.f32 r2.w, r6.x, r2.w, r3.w
-mad.f32 r3.w, r0.y, r3.w, r5.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.w, r3.w
-mul.f r4.y, r1.y, r1.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r3.x, r3.x, r4.y
-max.f r3.w, c17.x, r3.w
-mad.f32 r1.x, c2.w, r2.z, r1.x
-mad.f32 r4.y, c1.z, r4.x, r5.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.z, r2.w, r2.w, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r4.y, c2.z, r2.z, r4.y
-mul.f r5.x, c0.y, r2.x
-mul.f r5.y, c0.x, r2.x
-mad.f32 r5.z, c7.x, r2.z, c7.y
-mad.f32 r2.x, c7.x, r2.x, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r5.w, r3.w, c6.z
-mul.f r6.x, r3.w, c6.y
-mul.f r3.w, r3.w, c6.x
-mul.f r2.w, r2.w, r0.z
-mul.f r3.x, r3.x, r0.z
-mul.f r0.z, r1.y, r0.z
-mov.f32f32 r1.y, r5.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r6.w, r2.w
-mov.f32f32 r6.z, r3.x
-mov.f32f32 r6.y, r0.z
-mov.f32f32 r7.w, r1.y
-mov.f32f32 r0.z, r6.x
-mov.f32f32 r1.y, r3.w
-(rpt1)nop
-mov.f32f32 r7.z, r0.z
-mov.f32f32 r7.y, r1.y
-mad.f32 r0.z, c11.w, r0.x, r3.y
-mad.f32 r1.y, c11.z, r0.x, r3.z
-mad.f32 r2.w, c11.y, r0.x, r4.z
-mad.f32 r3.x, c11.x, r0.x, r4.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.w, r2.w, c16.y
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
mul.f r3.x, r3.x, c16.x
-mov.f32f32 r9.x, r0.z
-mov.f32f32 r8.w, r1.y
-mov.f32f32 r8.z, r2.w
-mov.f32f32 r8.y, r3.x
-mad.f32 r0.z, c3.w, r0.x, r1.x
-mad.f32 r1.x, c3.z, r0.x, r4.y
-mad.f32 r1.y, c1.y, r4.x, r5.x
-mad.f32 r2.w, c1.x, r4.x, r5.y
-mov.f32f32 r3.w, r0.z
-mov.f32f32 r3.z, r1.x
-mad.f32 r0.z, c2.y, r2.z, r1.y
-mad.f32 r1.x, c2.x, r2.z, r2.w
-mad.f32 r0.z, c3.y, r0.x, r0.z
-mad.f32 r0.x, c3.x, r0.x, r1.x
-mov.f32f32 r1.x, r5.z
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r3.x, r0.x
-mov.f32f32 r4.y, r1.x
-mov.f32f32 r4.x, r1.y
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r2.y
-nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.x, (0.000000)
-mov.f32f32 r0.y, r1.w
-mov.f32f32 r0.z, r1.z
-mov.f32f32 r0.w, (0.000000)
-mov.f32f32 r7.x, r0.x
-mov.f32f32 r4.w, r0.y
-mov.f32f32 r4.z, r0.z
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r0.x, (0.000000)
-(rpt2)nop
-mov.f32f32 r8.x, r0.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
+mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
+mad.f32 r0.x, c3.x, r0.x, r2.x
+mov.f32f32 r2.z, r6.y
+mov.f32f32 r2.y, r6.x
+mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
+mov.f32f32 r1.w, (0.000000)
end
nop
-; VERT: outputs: r3.x (0:0) r7.y (5:9) r2.x (5:10) r8.y (5:11) r4.x (5:12) r6.y (5:13)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 10 full
-; pos: r3.x
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm
index 6e58225..f1c05da 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-56.asm
@@ -4,222 +4,159 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r2.y) out0
-@out(r2.z) out1
-@out(r2.w) out2
-@out(r3.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c10.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c11.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c12.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c10.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c11.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c11.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c11.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r2.z, 8, r1.x
+floor.f r3.x, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c10.y
-floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+floor.f r3.y, r2.y
+bary.f r2.w, 9, r1.x
+add.f r2.x, r2.x, (neg)r3.x
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c8.x
+add.f r2.y, r2.y, (neg)r3.y
+mov.f32f32 r3.x, r2.x
+sam (f32)(xyzw)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, c13.y, (neg)r4.x
+mul.f r0.z, r0.z, c8.x
+mov.f32f32 r1.w, r2.y
+mul.f r4.y, c10.x, r3.x
+add.f r3.x, c11.y, (neg)r3.x
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c10.x, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, r2.y, c8.x
-add.f r2.w, c11.y, (neg)r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c10.x, r0.z
-mul.f r2.y, r2.y, r0.y
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r1.w, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r1.w
-mul.f r0.y, r2.y, r0.y
-sam (f32)(xyzw)r2.w, r3.x, s#0, t#0
-(sy)add.f r1.w, c13.y, (neg)r3.z
-add.f r2.y, c11.x, r0.x
+add.f r0.x, r0.x, (neg)r4.y
+mul.f r4.y, c10.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r4.y
add.f r0.x, c11.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c12.x
-add.f r3.w, c11.z, r0.w
-mul.f r2.y, r2.y, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.x
-add.f r0.w, c11.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c13.y, (neg)r0.y
-mov.f32f32 r6.x, r4.x
-mul.f r3.w, r3.w, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c8.y
-mul.f r0.y, r0.y, c10.z
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.w, r2.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r4.w, r4.y
-bary.f r3.w, 6, r1.x
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r4.x
-mov.f32f32 r5.z, r0.x
-add.f r0.x, r3.w, c10.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r0.x
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r5.x, r0.w
-max.f r0.y, r0.y, c10.y
-mov.f32f32 r6.z, r2.y
-mov.f32f32 r5.w, r3.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r1.w, c4.z
-add.f r1.w, c13.y, (neg)r3.z
-sam.s (f32)(x)r2.y, r4.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-min.f r0.y, r0.y, c10.z
-sam.s (f32)(x)r3.w, r6.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-sam.s (f32)(x)r4.x, r5.y, s#2, t#2
-(sy)mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-add.f r4.y, c11.y, (neg)r0.z
-(ss)add.f r4.z, c13.y, (neg)r0.y
-add.f r4.w, c13.y, (neg)r0.y
-add.f r5.x, c13.y, (neg)r0.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c7.z
-mul.f r4.w, r4.w, c7.y
-mul.f r5.x, r5.x, c7.x
-mul.f r5.y, r2.z, r4.y
-mov.f32f32 r7.y, r0.x
-mul.f r0.x, r3.z, c10.z
-mul.f r1.w, r1.w, c4.y
-mul.f r2.y, r5.y, r2.y
-add.f r1.z, r1.z, c10.z
-add.f r0.x, r0.x, r0.w
-mul.f r0.w, r3.z, c10.z
-sam.s (f32)(x)r5.y, r6.w, s#2, t#2
-(sy)mov.f32f32 r5.y, r5.y
-mul.f r4.y, r1.z, r4.y
-add.f r5.z, c13.y, (neg)r3.z
-mul.f r0.x, r3.y, r0.x
-add.f r0.w, r0.w, r1.w
-mad.f32 r1.w, r4.y, r4.x, r2.y
-mul.f r2.y, r5.z, c4.x
-mov.f32f32 r0.x, r0.x
-bary.f r3.y, 2, r1.x
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r0.z, c10.z
-mul.f r0.w, r3.x, r0.w
-mul.f r3.x, r3.z, c10.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.z, r2.z, r0.z
-mul.f r3.y, r0.x, r3.y
-mov.f32f32 r0.w, r0.w
-add.f r2.y, r3.x, r2.y
-mad.f32 r1.w, r2.z, r5.y, r1.w
-bary.f r2.z, 1, r1.x
-mov.f32f32 r4.x, r2.x
-mul.f r2.x, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.z, r1.z, r0.z
-mul.f r1.z, r0.w, r2.z
-mov.f32f32 r2.x, r2.x
-nop
-mad.f32 r0.z, r0.z, r3.w, r1.w
-bary.f r1.w, 0, r1.x
-bary.f (ei)r1.x, 9, r1.x
-mov.f32f32 r1.y, c10.z
-mov.f32f32 r0.z, r0.z
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r1.y
-mul.f r0.z, c11.w, r0.z
-mov.f32f32 r1.y, c10.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.y, r1.x
+add.f r0.z, c11.x, r0.z
+mov.f32f32 r4.y, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c11.z, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c11.x, r4.y
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c13.y, (neg)r0.y
+add.f r5.y, r0.z, c10.w
+add.f r0.z, c11.y, (neg)r1.w
+mul.f r0.x, r0.x, c10.z
+add.f r0.w, c13.y, (neg)r4.x
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c8.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-mul.f r1.x, r3.y, r0.z
-mul.f r1.z, r1.z, r0.z
-mul.f r0.z, r1.w, r0.z
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.x, c6.z, r0.x, r1.x
-mad.f32 r0.w, c6.y, r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-sam (f32)(w)r3.y, r4.x, s#1, t#1
-(sy)cmps.f.lt r1.x, r4.x, c12.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c6.x, r2.x, r0.z
-cov.u32f32 r1.x, r1.x
-mul.f r0.x, r0.y, r0.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r1.x, r1.x, c10.y
-add.f r0.x, r0.x, r4.z
-mov.f32f32 r1.z, r4.x
-add.f r0.w, r0.w, r4.w
-mul.f r0.y, r0.y, r0.z
-nop
-mov.f32f32 r0.z, r1.z
-(rpt2)nop
-sel.b32 r0.z, r1.y, r1.x, r0.z
-add.f r0.y, r0.y, r5.x
-(rpt1)nop
-mul.f r0.x, r0.x, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.y, r0.y, r0.z
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
+mul.f r0.w, r0.w, c4.z
+add.f r1.w, c13.y, (neg)r4.x
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c10.y
+sam.s (f32)(x)r4.y, r5.z, s#2, t#2
+mul.f r4.z, r4.x, c10.z
+mul.f r1.w, r1.w, c4.y
+(sy)mul.f r0.y, r0.y, r7.x
+add.f r2.x, r2.x, c10.z
+min.f r0.x, r0.x, c10.z
+add.f r0.w, r4.z, r0.w
+mul.f r4.z, r4.x, c10.z
+mul.f r0.z, r2.x, r0.z
+(ss)add.f r4.w, c13.y, (neg)r0.x
+add.f r5.x, c13.y, (neg)r0.x
+add.f r5.y, c13.y, (neg)r0.x
+mad.f32 r0.y, r0.z, r7.y, r0.y
+add.f r0.z, r2.y, c10.z
+mul.f r2.y, r4.w, c7.z
+mul.f r4.w, r5.x, c7.y
+mul.f r5.x, r5.y, c7.x
+mul.f r3.x, r3.x, r0.z
+add.f r1.w, r4.z, r1.w
+mul.f r1.z, r1.z, c4.x
+mul.f r0.w, r3.w, r0.w
+mad.f32 r0.y, r3.x, r4.y, r0.y
+mul.f r0.z, r2.x, r0.z
+mul.f r2.x, r4.x, c10.z
+mov.f32f32 r3.x, r0.w
+bary.f r3.w, 2, r1.x
+mad.f32 r0.y, r0.z, r7.z, r0.y
+mul.f r0.z, r3.z, r1.w
+add.f r1.z, r2.x, r1.z
+mul.f r1.w, r3.x, r3.w
+mul.f r0.y, c11.w, r0.y
+mov.f32f32 r2.x, r0.z
+mul.f r1.z, r3.y, r1.z
+bary.f r3.x, 1, r1.x
+mov.f32f32 r3.y, r0.y
+sam (f32)(w)r3.z, r2.z, s#1, t#1
+(sy)(ss)cmps.f.lt r2.z, r4.y, c12.y
+mov.f32f32 r3.z, r1.z
+mul.f r2.x, r2.x, r3.x
+mul.f r1.w, r1.w, r3.y
+bary.f (ei)r1.x, 0, r1.x
+mad.f32 r0.w, c6.z, r0.w, r1.w
+mul.f r1.y, r2.x, r3.y
+cov.u32f32 r1.w, r2.z
+mov.f32f32 r2.w, c10.z
+mul.f r0.w, r0.x, r0.w
+mad.f32 r0.z, c6.y, r0.z, r1.y
+mul.f r1.x, r3.z, r1.x
+cmps.f.ne r1.y, r1.w, c10.y
+add.f r0.w, r0.w, r2.y
+mov.f32f32 r1.w, c10.y
+mul.f r0.z, r0.x, r0.z
+mul.f r0.y, r1.x, r0.y
nop
-mul.f r0.x, r0.x, c5.z
-mul.f r0.z, r0.w, c5.y
-mul.f r0.y, r0.y, c5.x
+sel.b32 r1.x, r1.w, r1.y, r4.y
+add.f r0.z, r0.z, r4.w
+mad.f32 r0.y, c6.x, r1.z, r0.y
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+mul.f r0.w, r0.w, r1.x
+mul.f r0.z, r0.z, r1.x
+(rpt1)nop
+mul.f r2.z, r0.w, c5.z
+mul.f r2.y, r0.z, c5.y
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+add.f r0.x, r0.x, r5.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
nop
-mov.f32f32 r2.w, r0.x
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r2.y, r0.y
-end
nop
-; FRAG: outputs: r2.y (1:0)
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 212 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r2.y
-; fragcoord: r0.x
+; FRAG: 149 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm
index 0e4d5ee..9c8ac11 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-57.asm
@@ -6,134 +6,99 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r6.y) out0
-@out(r6.z) out1
-@out(r6.w) out2
-@out(r7.x) out3
-@out(r5.y) out4
-@out(r5.z) out5
-@out(r5.w) out6
-@out(r6.x) out7
-@out(r2.w) out8
-@out(r3.x) out9
-@out(r3.y) out10
-@out(r3.z) out11
-@out(r3.w) out12
-@out(r4.x) out13
-@out(r4.y) out14
-@out(r4.z) out15
-(sy)(ss)mul.f r2.x, c11.x, r0.w
-mul.f r2.y, c11.x, r0.x
-mad.f32 r2.x, c12.x, r1.x, r2.x
-mad.f32 r2.y, c12.x, r0.y, r2.y
-mul.f r2.z, c11.z, r0.x
-mad.f32 r2.y, c13.x, r0.z, r2.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.z, c12.z, r0.y, r2.z
-mad.f32 r2.x, c13.x, r1.y, r2.x
-add.f r2.y, r2.y, c14.x
-mad.f32 r2.z, c13.z, r0.z, r2.z
-mul.f r2.w, c11.y, r0.w
-mov.f32f32 r2.x, r2.x
-mul.f r3.x, c7.w, r2.y
-mul.f r3.y, c7.z, r2.y
-mul.f r3.z, c7.y, r2.y
-mul.f r2.x, r2.x, (neg)c4.x
-mad.f32 r2.w, c12.y, r1.x, r2.w
-mul.f r3.w, c11.y, r0.x
-mul.f r4.x, c7.x, r2.y
-mad.f32 r3.w, c12.y, r0.y, r3.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r3.w, c13.y, r0.z, r3.w
-mad.f32 r2.w, c13.y, r1.y, r2.w
-mul.f r4.y, c0.w, r2.y
-mul.f r4.z, c0.z, r2.y
-mul.f r4.w, c0.y, r2.y
-mov.f32f32 r2.w, r2.w
-add.f r3.w, r3.w, c14.y
-mul.f r5.x, c0.x, r2.y
-add.f r2.z, r2.z, c14.z
-mad.f32 r2.x, (neg)c4.y, r2.w, r2.x
-mad.f32 r2.w, c8.w, r3.w, r3.x
-mad.f32 r3.x, c8.z, r3.w, r3.y
-mad.f32 r3.y, c8.y, r3.w, r3.z
-mov.f32f32 r2.x, r2.x
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r1.z, c11.x, r0.w
+mul.f r1.w, c11.x, r0.x
+mad.f32 r1.z, c12.x, r1.x, r1.z
+mad.f32 r1.w, c12.x, r0.y, r1.w
+mad.f32 r1.z, c13.x, r1.y, r1.z
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
+mul.f r1.z, r1.z, (neg)c4.x
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.w, c9.w, r2.z, r2.w
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r2.z, r3.x
-mad.f32 r3.x, c9.y, r2.z, r3.y
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.w, r4.x
-mad.f32 r0.w, c1.w, r3.w, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r2.z, r0.z
-mad.f32 r0.w, c2.w, r2.z, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r2.x
-mad.f32 r1.y, c10.w, r0.x, r2.w
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r2.x, c10.y, r0.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r2.x, r2.x, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r3.z, r1.y
-mov.f32f32 r3.y, r1.x
-mov.f32f32 r3.x, r2.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r1.x, c1.z, r3.w, r4.z
-mul.f r1.y, r0.y, c5.z
-mul.f r2.x, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.w, r1.y
-mov.f32f32 r5.z, r2.x
-mov.f32f32 r5.y, r0.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r7.x, r0.w
-mad.f32 r0.y, c2.z, r2.z, r1.x
-mad.f32 r0.z, c1.y, r3.w, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r0.z, c2.y, r2.z, r0.z
-mad.f32 r0.w, c1.x, r3.w, r5.x
-mad.f32 r1.x, c6.x, r2.z, c6.y
-mov.f32f32 r6.w, r0.y
-mad.f32 r0.y, c3.y, r0.x, r0.z
-mad.f32 r0.z, c2.x, r2.z, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r1.x, c6.x, r2.y, c6.y
-mov.f32f32 r6.z, r0.y
-mad.f32 r0.x, c3.x, r0.x, r0.z
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r0.y, r1.x
-nop
-mov.f32f32 r6.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r0.z, (0.000000)
-mov.f32f32 r4.z, r0.x
-nop
-mov.f32f32 r4.y, r0.y
-mov.f32f32 r6.x, r0.z
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
-; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 8 full
-; pos: r6.y
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm
index 2316052..57bb137 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-58.asm
@@ -4,258 +4,187 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r2.w) out0
-@out(r3.x) out1
-@out(r3.y) out2
-@out(r3.z) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c13.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c14.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c15.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 8, r1.x
add.f r0.y, r0.w, c13.y
bary.f r0.w, 4, r1.x
bary.f r1.z, 9, r1.x
add.f r1.w, r0.x, c14.x
bary.f r2.x, 14, r1.x
-mul.f r2.y, r0.w, r0.w
-bary.f r2.z, 5, r1.x
+bary.f r2.y, 15, r1.x
+add.f r2.z, r1.z, c14.x
floor.f r2.w, r1.w
rcp r0.y, r0.y
add.f r0.z, r0.z, c13.y
-add.f r3.x, r1.z, c14.x
-mad.f32 r2.y, r2.z, r2.z, r2.y
+mul.f r3.x, r0.w, r0.w
+bary.f r3.y, 5, r1.x
add.f r1.w, r1.w, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-floor.f r0.z, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
+absneg.f r0.z, (neg)c9.x
+mad.f32 r2.w, r3.y, r3.y, r3.x
+mov.f32f32 r3.x, r1.w
+bary.f r3.z, 6, r1.x
+mul.f r0.z, r0.z, c9.x
+floor.f r3.w, r2.z
+mul.f r4.x, c13.x, r3.x
+mad.f32 r2.w, r3.z, r3.z, r2.w
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.w, (neg)c9.x
-bary.f r3.y, 6, r1.x
-mul.f r3.z, c13.x, r1.w
-add.f r0.z, r3.x, (neg)r0.z
-mul.f r2.w, r2.w, c9.x
-mad.f32 r2.y, r3.y, r3.y, r2.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.z, r0.z
-mul.f r2.w, r2.w, r0.y
-mov.f32f32 r2.x, r2.x
-add.f r0.x, r0.x, (neg)r3.x
-mul.f r3.x, c13.x, r0.z
-mov.f32f32 r2.w, r2.w
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mul.f r0.y, r2.w, r0.y
-mul.f r0.w, r0.w, r2.y
-add.f r2.w, c14.x, r0.x
-add.f r0.x, c14.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.w, r2.w
-bary.f r3.z, 16, r1.x
+add.f r0.x, r0.x, (neg)r4.x
+add.f r2.z, r2.z, (neg)r3.w
+add.f r3.x, c14.y, (neg)r3.x
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+rsq r2.w, r2.w
+(ss)mov.f32f32 r3.w, r2.w
+mov.f32f32 r4.x, r2.z
mul.f r0.y, r0.y, c15.x
-mov.f32f32 r0.x, r0.x
-mul.f r2.w, r2.w, c3.z
-mul.f r0.w, r0.w, r3.z
-mul.f r2.z, r2.z, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r2.w
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.z, r2.z
-bary.f r3.w, 17, r1.x
-mov.f32f32 r4.x, r3.z
-add.f r1.z, r1.z, (neg)r3.x
+add.f r0.z, c14.x, r0.z
+mul.f r0.w, r0.w, r3.w
+bary.f r4.y, 16, r1.x
+mul.f r4.z, c13.x, r4.x
+mul.f r4.w, r0.z, c3.z
+add.f r0.x, c14.z, r0.x
+mul.f r0.z, r0.w, r4.y
exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, r2.z, r3.w, r0.w
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r1.z, r1.z
-add.f r3.x, c16.y, (neg)r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r2.y, r3.y, r2.y
-add.f r3.y, c14.x, r1.z
-mul.f r3.x, r3.x, c9.y
-mul.f r0.y, r0.y, c13.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.y, r3.y
-bary.f r3.z, 18, r1.x
-add.f r0.y, r0.y, r3.x
-mov.f32f32 r2.z, r2.z
-mul.f r3.x, r3.y, c3.w
-mad.f32 r0.w, r2.y, r3.z, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r2.z
-mov.f32f32 r2.y, r3.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-add.f r1.z, c14.z, r1.z
-mov.f32f32 r4.y, r2.y
-bary.f r2.y, 10, r1.x
-max.f r0.y, r0.y, c13.y
-max.f r0.w, c13.y, r0.w
-mov.f32f32 r1.z, r1.z
-add.f r2.y, r2.y, c13.w
-min.f r0.y, r0.y, c13.z
-mov.f32f32 r0.w, r0.w
-mul.f r1.z, r1.z, c3.w
-mov.f32f32 r2.z, r2.y
-add.f r3.z, c16.y, (neg)r0.y
-add.f r3.w, c16.y, (neg)r0.y
-add.f r4.w, c16.y, (neg)r0.y
-mov.f32f32 r4.z, r2.z
-mul.f r2.z, r3.z, c8.z
-mul.f r5.x, r3.w, c8.y
-mul.f r4.w, r4.w, c8.x
-log2 r0.w, r0.w
-(ss)mul.f r0.w, c10.x, r0.w
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r0.x, r0.x
-sam.s (f32)(x)r3.w, r4.x, s#2, t#2
-(sy)mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.z, r3.z
-(ss)mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r3.w
-add.f r3.w, c14.y, (neg)r1.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r4.z, r2.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r5.y, r3.w
-add.f r3.w, c14.y, (neg)r0.z
-exp2 r0.w, r0.w
-mov.f32f32 r4.y, c7.y
-mov.f32f32 r5.z, c7.x
-mov.f32f32 r5.w, c7.z
-mov.f32f32 r6.x, r3.w
-mul.f r3.w, r4.y, c11.y
-mul.f r4.y, r5.z, c11.x
-mul.f r5.z, r5.w, c11.z
-mul.f r5.w, r5.y, r6.x
-(ss)mul.f r6.y, r3.w, r0.w
-mul.f r6.z, r4.y, r0.w
-(ss)mul.f r0.w, r5.z, r0.w
-mul.f r0.x, r5.w, r0.x
-mov.f32f32 r4.y, r3.x
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r5.z, r2.x
-bary.f r2.x, 15, r1.x
-mov.f32f32 r3.w, r4.z
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-add.f r1.w, r1.w, c13.z
-add.f r0.z, r0.z, c13.z
-sam.s (f32)(x)r3.x, r3.y, s#2, t#2
-(sy)mov.f32f32 r3.x, r3.x
-(ss)nop
-sam.s (f32)(x)r3.y, r4.x, s#2, t#2
-(sy)mov.f32f32 r3.y, r3.y
-mul.f r3.z, r1.w, r6.x
-mov.f32f32 r5.w, r2.x
-mov.f32f32 r3.w, r2.w
-(ss)mov.f32f32 r4.x, r1.z
-mad.f32 r0.x, r3.z, r3.y, r0.x
-mov.f32f32 r1.z, r2.y
-mul.f r2.x, r5.y, r0.z
-bary.f r2.y, 12, r1.x
-mov.f32f32 r0.x, r0.x
+(ss)mov.f32f32 r0.w, r0.y
+mov.f32f32 r5.z, r4.w
+add.f r1.z, r1.z, (neg)r4.z
+mul.f r3.y, r3.y, r3.w
+bary.f r3.w, 17, r1.x
+add.f r0.w, c16.y, (neg)r0.w
mov.f32f32 r4.y, r1.z
-sam (f32)(xyzw)r5.y, r5.z, s#0, t#0
-(sy)add.f r1.z, c16.y, (neg)r6.x
-add.f r2.w, c16.y, (neg)r6.x
-add.f r3.y, c16.y, (neg)r6.x
-mov.f32f32 r2.y, r2.y
-mul.f r1.z, r1.z, c4.x
-mul.f r0.z, r1.w, r0.z
-sam.s (f32)(x)r1.w, r3.w, s#2, t#2
-(sy)mov.f32f32 r1.w, r1.w
-mul.f r2.w, r2.w, c4.z
-mul.f r3.y, r3.y, c4.y
-mul.f r3.z, r6.x, c13.z
-mad.f32 r0.x, r2.x, r1.w, r0.x
-mul.f r1.w, r6.x, c13.z
-mul.f r2.x, r6.x, c13.z
-add.f r1.z, r3.z, r1.z
-mov.f32f32 r0.x, r0.x
-add.f r1.w, r1.w, r2.w
-mad.f32 r0.x, r0.z, r3.x, r0.x
-add.f r0.z, r2.x, r3.y
-mul.f r1.z, r5.y, r1.z
-mul.f r1.w, r5.w, r1.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.z, r5.z, r0.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r0.x, c14.w, r0.x
-bary.f r2.x, 2, r1.x
-mov.f32f32 r0.z, r0.z
-bary.f r2.w, 0, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.w, r1.w, r2.x, r0.w
-bary.f r2.x, 1, r1.x
-mad.f32 r2.w, r1.z, r2.w, r6.z
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, r0.z, r2.x, r6.y
-mov.f32f32 r2.y, r2.w
-bary.f (ei)r1.x, 13, r1.x
-mul.f r0.w, r0.w, r0.x
-mov.f32f32 r1.y, r2.x
-mul.f r2.x, r2.y, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r1.y, r0.x
-mad.f32 r0.w, c6.z, r1.w, r0.w
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r3.y, r1.x
-mov.f32f32 r1.x, c13.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.y, c6.x, r1.z, r1.y
-mad.f32 r0.x, c6.y, r0.z, r0.x
-mul.f r0.z, r0.y, r0.w
-sam (f32)(w)r1.z, r3.x, s#1, t#1
-(sy)cmps.f.lt r0.w, r2.y, c15.y
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r3.z, r1.x
-add.f r0.z, r0.z, r2.z
-cov.u32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r1.y, r1.z
-cmps.f.ne r0.w, r0.w, c13.y
-mov.f32f32 r1.z, c13.y
-mul.f r0.x, r0.y, r0.x
-mul.f r0.y, r0.y, r1.x
-nop
-sel.b32 r0.w, r1.z, r0.w, r1.y
-add.f r0.x, r0.x, r5.x
-add.f r0.y, r0.y, r4.w
+mul.f r6.y, r0.x, c3.z
+add.f r0.x, c14.z, r1.z
+mad.f32 r0.z, r3.y, r3.w, r0.z
+add.f r1.z, c14.x, r4.y
+mul.f r0.w, r0.w, c9.y
+(ss)mul.f r0.y, r0.y, c13.z
+mul.f r2.w, r3.z, r2.w
+mul.f r3.z, r1.z, c3.w
+bary.f r1.z, 18, r1.x
+add.f r0.y, r0.y, r0.w
+mov.f32f32 r3.y, r6.y
+mov.f32f32 r5.w, r3.z
+bary.f r0.w, 10, r1.x
+max.f r0.y, r0.y, c13.y
+mad.f32 r0.z, r2.w, r1.z, r0.z
+mul.f r6.z, r0.x, c3.w
+add.f r6.w, r0.w, c13.w
+min.f r0.x, r0.y, c13.z
+max.f r0.y, c13.y, r0.z
+mov.f32f32 r5.x, r6.z
+mov.f32f32 r6.x, r6.w
+add.f r0.z, c16.y, (neg)r0.x
+add.f r0.w, c16.y, (neg)r0.x
+add.f r1.z, c16.y, (neg)r0.x
+mov.f32f32 r3.w, r6.w
+mov.f32f32 r5.y, r6.w
+log2 r0.y, r0.y
+mul.f r0.z, r0.z, c8.z
+sam.s (f32)(x)r7.x, r5.z, s#2, t#2
+mov.f32f32 r2.w, r3.x
+add.f r4.x, c14.y, (neg)r4.x
+mul.f r0.w, r0.w, c8.y
+mul.f r1.z, r1.z, c8.x
+(ss)mul.f r0.y, c10.x, r0.y
+mov.f32f32 r4.y, r4.x
nop
-mul.f r0.z, r0.z, r0.w
-mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
+sam.s (f32)(x)r7.y, r3.y, s#2, t#2
+sam.s (f32)(x)r4.z, r4.w, s#2, t#2
nop
-mul.f r0.z, r0.z, c5.z
-mul.f r0.x, r0.x, c5.y
-mul.f r0.y, r0.y, c5.x
+(ss)nop
+sam.s (f32)(x)r4.w, r6.y, s#2, t#2
+add.f r1.w, r1.w, c13.z
+mul.f r2.w, r2.w, r4.y
+add.f r2.z, r2.z, c13.z
+sam (f32)(xyzw)r5.x, r2.x, s#0, t#0
+exp2 r0.y, r0.y
+(ss)mov.f32f32 r2.x, c7.y
+mov.f32f32 r2.y, c7.z
+(sy)mul.f r2.w, r2.w, r7.x
+mul.f r3.y, r1.w, r4.x
+mul.f r2.x, r2.x, c11.y
+mov.f32f32 r3.z, c7.x
+mul.f r2.y, r2.y, c11.z
+mad.f32 r2.w, r3.y, r7.y, r2.w
+mul.f r3.x, r3.x, r2.z
+(ss)mul.f r2.x, r2.x, r0.y
+mul.f r3.y, r3.z, c11.x
+mul.f r2.y, r2.y, r0.y
+mad.f32 r2.w, r3.x, r4.z, r2.w
+mul.f r1.w, r1.w, r2.z
+add.f r2.z, c16.y, (neg)r5.w
+mul.f r0.y, r3.y, r0.y
+add.f r3.x, c16.y, (neg)r5.w
+mad.f32 r1.w, r1.w, r4.w, r2.w
+mul.f r2.z, r2.z, c4.y
+add.f r2.w, c16.y, (neg)r5.w
+mul.f r3.x, r3.x, c4.z
+mul.f r1.w, c14.w, r1.w
+mul.f r3.y, r5.w, c13.z
+mul.f r3.z, r5.w, c13.z
+mul.f r2.w, r2.w, c4.x
+mov.f32f32 r3.w, r1.w
+add.f r3.x, r3.y, r3.x
+add.f r2.z, r3.z, r2.z
+mul.f r3.y, r5.w, c13.z
+bary.f r4.x, 12, r1.x
+mul.f r3.x, r5.z, r3.x
+mul.f r2.z, r5.y, r2.z
+add.f r2.w, r3.y, r2.w
+bary.f r4.y, 13, r1.x
+mov.f32f32 r3.y, r3.x
+bary.f r3.z, 2, r1.x
+mov.f32f32 r4.z, r2.z
+bary.f r4.w, 1, r1.x
+mul.f r5.x, r5.x, r2.w
+mad.f32 r2.y, r3.y, r3.z, r2.y
+sam (f32)(w)r5.y, r4.x, s#1, t#1
+(sy)cmps.f.lt r3.y, r6.x, c15.y
+mov.f32f32 r2.w, c13.z
+mov.f32f32 r3.z, c13.y
+mul.f r2.y, r2.y, r3.w
+mad.f32 r2.x, r4.z, r4.w, r2.x
+mad.f32 r2.y, c6.z, r3.x, r2.y
+mov.f32f32 r3.x, r5.x
+bary.f (ei)r1.x, 0, r1.x
+mul.f r1.y, r2.x, r3.w
+mul.f r2.x, r0.x, r2.y
+mad.f32 r1.y, c6.y, r2.z, r1.y
+mad.f32 r0.y, r3.x, r1.x, r0.y
+cov.u32f32 r1.x, r3.y
+add.f r0.z, r2.x, r0.z
+mul.f r1.y, r0.x, r1.y
+mul.f r0.y, r0.y, r1.w
+cmps.f.ne r1.x, r1.x, c13.y
+mad.f32 r0.y, c6.x, r5.x, r0.y
+add.f r0.w, r1.y, r0.w
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+sel.b32 r1.x, r3.z, r1.x, r6.x
+mul.f r0.x, r0.x, r0.y
+(rpt1)nop
+mul.f r0.y, r0.z, r1.x
+mul.f r0.z, r0.w, r1.x
+(rpt1)nop
+mul.f r2.z, r0.y, c5.z
+mul.f r2.y, r0.z, c5.y
+add.f r0.x, r0.x, r1.z
+(rpt2)nop
+mul.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r2.x, r0.x, c5.x
+end
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
nop
-(ss)mov.f32f32 r3.y, r0.z
-mov.f32f32 r3.x, r0.x
-mov.f32f32 r2.w, r0.y
-end
-; FRAG: outputs: r2.w (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.y (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
-; FRAG: 244 instructions, 0 half, 7 full
-; pos (bary): r1.x
-; color: r2.w
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.z (5:12,cm=f,il=20,b=1) r5.x (5:13,cm=f,il=24,b=1)
+; FRAG: 176 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm
index 9f1027e..e7bcae3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-59.asm
@@ -6,186 +6,139 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-@out(r7.y) out4
-@out(r7.z) out5
-@out(r7.w) out6
-@out(r8.x) out7
+@in(r4.z) in8
+@in(r4.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
@out(r2.x) out8
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-@out(r8.y) out12
-@out(r8.z) out13
-@out(r8.w) out14
-@out(r9.x) out15
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
@out(r4.x) out16
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
-@out(r6.y) out20
-@out(r6.z) out21
-@out(r6.w) out22
-@out(r7.x) out23
-(sy)(ss)mul.f r2.x, c12.x, r0.x
-mul.f r2.y, c12.x, r0.w
-mad.f32 r2.x, c13.x, r0.y, r2.x
-mad.f32 r2.y, c13.x, r1.x, r2.y
-mad.f32 r2.x, c14.x, r0.z, r2.x
-mul.f r2.z, c12.z, r0.x
-mul.f r2.w, c12.z, r0.w
-mul.f r0.w, c12.y, r0.w
-add.f r2.x, r2.x, c15.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r2.z, c13.z, r0.y, r2.z
-mad.f32 r2.w, c13.z, r1.x, r2.w
-add.f r3.x, c4.x, (neg)r2.x
-mad.f32 r2.y, c14.x, r1.y, r2.y
-mul.f r3.y, c8.w, r2.x
-mul.f r3.z, c8.z, r2.x
-mul.f r3.w, r3.x, r3.x
-mul.f r4.x, c12.y, r0.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.x, c13.y, r0.y, r4.x
-absneg.f r4.y, (neg)c5.x
-mad.f32 r4.x, c14.y, r0.z, r4.x
-mul.f r4.z, c8.y, r2.x
-mul.f r4.w, c8.x, r2.x
-mul.f r5.x, r2.y, r4.y
-add.f r4.x, r4.x, c15.y
-mad.f32 r0.w, c13.y, r1.x, r0.w
-mul.f r1.x, c0.w, r2.x
-mul.f r5.y, c0.z, r2.x
-add.f r5.z, c4.y, (neg)r4.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r3.y, c9.w, r4.x, r3.y
-mad.f32 r3.z, c9.z, r4.x, r3.z
-mad.f32 r3.w, r5.z, r5.z, r3.w
-mad.f32 r0.w, c14.y, r1.y, r0.w
-mad.f32 r2.z, c14.z, r0.z, r2.z
-mad.f32 r4.z, c9.y, r4.x, r4.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.w, r0.w
-add.f r2.z, r2.z, c15.z
-absneg.f r5.w, (neg)c5.y
-mad.f32 r4.w, c9.x, r4.x, r4.w
-mad.f32 r1.x, c1.w, r4.x, r1.x
-add.f r6.x, c4.z, (neg)r2.z
-mad.f32 r5.x, r0.w, r5.w, r5.x
-mad.f32 r3.y, c10.w, r2.z, r3.y
-mad.f32 r3.z, c10.z, r2.z, r3.z
-mad.f32 r3.w, r6.x, r6.x, r3.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.w, r2.w
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@const(c17.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r1.z, c12.x, r0.x
+mul.f r1.w, c12.x, r0.w
+mad.f32 r1.z, c13.x, r0.y, r1.z
+mad.f32 r1.w, c13.x, r1.x, r1.w
+mad.f32 r1.z, c14.x, r0.z, r1.z
+mad.f32 r1.w, c14.x, r1.y, r1.w
+absneg.f r2.x, (neg)c5.x
+mul.f r2.y, c12.y, r0.x
+add.f r2.z, r1.z, c15.x
+mad.f32 r1.z, c13.y, r0.y, r2.y
+mul.f r2.y, c12.z, r0.x
+mul.f r2.w, r1.w, r2.x
+add.f r3.x, c4.x, (neg)r2.z
+mul.f r3.y, c12.y, r0.w
+mul.f r3.z, c8.y, r2.z
+mul.f r3.w, c8.x, r2.z
+mul.f r4.x, r3.x, r3.x
+mad.f32 r1.z, c14.y, r0.z, r1.z
+mad.f32 r3.y, c13.y, r1.x, r3.y
+mul.f r4.y, c8.w, r2.z
+mul.f r5.x, c8.z, r2.z
+add.f r5.w, r1.z, c15.y
+mad.f32 r6.x, c14.y, r1.y, r3.y
+absneg.f r1.z, (neg)c5.y
+mul.f r3.y, c0.w, r2.z
+add.f r5.y, c4.y, (neg)r5.w
+mad.f32 r3.z, c9.y, r5.w, r3.z
+mad.f32 r3.w, c9.x, r5.w, r3.w
+mad.f32 r2.w, r6.x, r1.z, r2.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r2.y, c13.z, r0.y, r2.y
+mul.f r0.w, c12.z, r0.w
+mad.f32 r2.y, c14.z, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.x, r0.w
+mad.f32 r1.x, c9.w, r5.w, r4.y
+mad.f32 r4.y, c9.z, r5.w, r5.x
+add.f r2.y, r2.y, c15.z
+mad.f32 r6.y, c14.z, r1.y, r0.w
+absneg.f r0.w, (neg)c5.z
+mad.f32 r1.y, c1.w, r5.w, r3.y
+add.f r3.y, c4.z, (neg)r2.y
+mad.f32 r3.z, c10.y, r2.y, r3.z
+mad.f32 r3.w, c10.x, r2.y, r3.w
+mad.f32 r2.w, r6.y, r0.w, r2.w
+mad.f32 r4.x, r3.y, r3.y, r4.x
mul.f r0.x, c12.w, r0.x
-mad.f32 r4.z, c10.y, r2.z, r4.z
-mad.f32 r4.w, c10.x, r2.z, r4.w
-mad.f32 r1.y, c14.z, r1.y, r2.w
-rsq r2.w, r3.w
-(ss)mov.f32f32 r2.w, r2.w
-(ss)absneg.f r3.w, (neg)c5.z
+mad.f32 r5.x, c10.w, r2.y, r1.x
+mad.f32 r4.y, c10.z, r2.y, r4.y
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r0.y, r1.y
-mad.f32 r1.y, r3.x, r2.w, r4.y
-mad.f32 r3.x, r5.z, r2.w, r5.w
-mad.f32 r2.w, r6.x, r2.w, r3.w
-mad.f32 r3.w, r0.y, r3.w, r5.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.w, r3.w
-mul.f r4.y, r1.y, r1.y
+mad.f32 r0.y, c2.w, r2.y, r1.y
+mul.f r6.z, c0.z, r2.z
+rsq r1.x, r4.x
+(ss)mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, r3.x, r1.x, r2.x
+max.f r1.x, c17.x, r2.w
mad.f32 r0.x, c14.w, r0.z, r0.x
-mad.f32 r0.z, r3.x, r3.x, r4.y
-max.f r3.w, c17.x, r3.w
-mad.f32 r1.x, c2.w, r2.z, r1.x
-mad.f32 r4.y, c1.z, r4.x, r5.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r3.w
-mad.f32 r0.z, r2.w, r2.w, r0.z
+mad.f32 r0.z, r5.y, r1.y, r1.z
+mov.f32f32 r1.z, r2.x
+mad.f32 r0.w, r3.y, r1.y, r0.w
+nop
+mov.f32f32 r2.w, r0.z
+mul.f r1.y, r1.z, r1.z
+(ss)mov.f32f32 r4.x, r0.w
+mov.f32f32 r3.x, r1.x
+mad.f32 r0.z, r0.z, r2.w, r1.y
add.f r0.x, r0.x, c15.w
-mad.f32 r4.y, c2.z, r2.z, r4.y
-mul.f r5.x, c0.y, r2.x
-mul.f r5.y, c0.x, r2.x
-mad.f32 r5.z, c7.x, r2.z, c7.y
-mad.f32 r2.x, c7.x, r2.x, c7.y
+mad.f32 r0.z, r0.w, r4.x, r0.z
+mul.f r1.z, r3.x, c6.z
+mul.f r1.y, r3.x, c6.y
+mad.f32 r0.w, c11.y, r0.x, r3.z
+mad.f32 r3.x, c11.x, r0.x, r3.w
+mul.f r1.x, r1.x, c6.x
+mad.f32 r3.w, c11.w, r0.x, r5.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mul.f r5.w, r3.w, c6.z
-mul.f r6.x, r3.w, c6.y
-mul.f r3.w, r3.w, c6.x
-mul.f r2.w, r2.w, r0.z
-mul.f r3.x, r3.x, r0.z
-mul.f r0.z, r1.y, r0.z
-mov.f32f32 r1.y, r5.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r6.w, r2.w
-mov.f32f32 r6.z, r3.x
-mov.f32f32 r6.y, r0.z
-mov.f32f32 r7.w, r1.y
-mov.f32f32 r0.z, r6.x
-mov.f32f32 r1.y, r3.w
-(rpt1)nop
-mov.f32f32 r7.z, r0.z
-mov.f32f32 r7.y, r1.y
-mad.f32 r0.z, c11.w, r0.x, r3.y
-mad.f32 r1.y, c11.z, r0.x, r3.z
-mad.f32 r2.w, c11.y, r0.x, r4.z
-mad.f32 r3.x, c11.x, r0.x, r4.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.w, r2.w, c16.y
+(ss)mov.f32f32 r3.z, r0.z
+mul.f r5.x, r2.x, r0.z
+mul.f r3.y, r0.w, c16.y
mul.f r3.x, r3.x, c16.x
-mov.f32f32 r9.x, r0.z
-mov.f32f32 r8.w, r1.y
-mov.f32f32 r8.z, r2.w
-mov.f32f32 r8.y, r3.x
-mad.f32 r0.z, c3.w, r0.x, r1.x
-mad.f32 r1.x, c3.z, r0.x, r4.y
-mad.f32 r1.y, c1.y, r4.x, r5.x
-mad.f32 r2.w, c1.x, r4.x, r5.y
-mov.f32f32 r3.w, r0.z
-mov.f32f32 r3.z, r1.x
-mad.f32 r0.z, c2.y, r2.z, r1.y
-mad.f32 r1.x, c2.x, r2.z, r2.w
-mad.f32 r0.z, c3.y, r0.x, r0.z
-mad.f32 r0.x, c3.x, r0.x, r1.x
-mov.f32f32 r1.x, r5.z
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r3.x, r0.x
-mov.f32f32 r4.y, r1.x
-mov.f32f32 r4.x, r1.y
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r2.y
-nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.x, (0.000000)
-mov.f32f32 r0.y, r1.w
-mov.f32f32 r0.z, r1.z
-mov.f32f32 r0.w, (0.000000)
-mov.f32f32 r7.x, r0.x
-mov.f32f32 r4.w, r0.y
-mov.f32f32 r4.z, r0.z
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r0.x, (0.000000)
-(rpt2)nop
-mov.f32f32 r8.x, r0.x
+mul.f r5.z, r4.x, r3.z
+mul.f r5.y, r2.w, r3.z
+mad.f32 r3.z, c11.z, r0.x, r4.y
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c1.z, r5.w, r6.z
+(ss)mul.f r0.z, c0.y, r2.z
+mad.f32 r0.y, c2.z, r2.y, r0.y
+mad.f32 r2.x, c1.y, r5.w, r0.z
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c2.y, r2.y, r2.x
+mul.f r2.x, c0.x, r2.z
+mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r2.x, c1.x, r5.w, r2.x
+mad.f32 r4.y, c7.x, r2.y, c7.y
+mad.f32 r2.x, c2.x, r2.y, r2.x
+mad.f32 r4.x, c7.x, r2.z, c7.y
+mad.f32 r0.x, c3.x, r0.x, r2.x
+mov.f32f32 r2.z, r6.y
+mov.f32f32 r2.y, r6.x
+mov.f32f32 r2.x, r1.w
+mov.f32f32 r5.w, (0.000000)
+mov.f32f32 r2.w, (0.000000)
+mov.f32f32 r1.w, (0.000000)
end
nop
-; VERT: outputs: r3.x (0:0) r7.y (5:9) r2.x (5:10) r8.y (5:11) r4.x (5:12) r6.y (5:13)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 154 instructions, 0 half, 10 full
-; pos: r3.x
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r4.z (0:0,cm=3,il=16,b=0)
+; VERT: 102 instructions, 0 half, 7 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm
index 9303ad4..227a081 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-60.asm
@@ -4,206 +4,139 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r1.z) out0
-@out(r1.w) out1
-@out(r2.x) out2
-@out(r2.y) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 8, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 10, r1.x
-bary.f r2.y, 6, r1.x
-add.f r2.z, r0.w, c10.x
-floor.f r2.w, r1.w
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 9, r1.x
+add.f r2.y, r0.w, c10.x
+bary.f r2.z, 6, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
-mov.f32f32 r1.z, r1.z
-floor.f r3.x, r2.z
-add.f r1.w, r1.w, (neg)r2.w
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c9.w
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-mov.f32f32 r3.y, r1.z
-add.f r0.z, r2.z, (neg)r3.x
-mov.f32f32 r1.z, r1.w
+absneg.f r0.z, (neg)c7.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c9.z
+mul.f r0.z, r0.z, c7.x
+sam (f32)(w)r4.x, r1.z, s#1, t#1
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c9.x, r2.z
+add.f r2.z, c10.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r1.w, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-add.f r2.w, c10.y, (neg)r1.z
-mul.f r1.w, r1.w, c7.x
-bary.f r3.x, 9, r1.x
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c9.x, r0.z
-mul.f r1.w, r1.w, r0.y
-mov.f32f32 r2.w, r2.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r1.w, r1.w
-add.f r3.z, c10.y, (neg)r0.z
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r2.z
-mul.f r0.y, r1.w, r0.y
-mov.f32f32 r1.w, r3.z
-add.f r2.z, c10.x, r0.x
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c9.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c10.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c11.x
-add.f r3.z, c10.z, r0.w
-mul.f r2.z, r2.z, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r2.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-add.f r0.w, c10.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c12.y, (neg)r0.y
-mov.f32f32 r5.w, r3.w
-mul.f r3.z, r3.z, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c7.y
-mul.f r0.y, r0.y, c9.z
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.z, r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r4.z, r4.x
-add.f r2.y, r2.y, c9.w
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r3.w
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r4.w, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r5.z, r0.y
-sam.s (f32)(x)r0.y, r4.y, s#2, t#2
-(sy)mov.f32f32 r0.y, r0.y
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c3.z
+add.f r0.x, c10.z, r0.w
+mul.f r4.x, r0.z, c3.z
+add.f r0.z, c10.x, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c3.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c12.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#2, t#2
+add.f r0.z, c10.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#2, t#2
+mul.f r0.x, r0.x, c9.z
+add.f r0.w, r2.y, c9.z
+mul.f r0.y, r0.y, c7.y
+(ss)nop
+sam.s (f32)(x)r5.x, r5.w, s#2, t#2
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#2, t#2
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c11.y
+bary.f r2.x, 10, r1.x
+mul.f r0.y, r0.y, r5.x
max.f r0.x, r0.x, c9.y
-mov.f32f32 r7.x, r0.w
-mul.f r0.w, r2.w, r1.w
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
min.f r0.x, r0.x, c9.z
-sam.s (f32)(x)r2.y, r5.w, s#2, t#2
-nop
-(sy)mov.f32f32 r2.y, r2.y
-mul.f r0.y, r0.w, r0.y
-sam.s (f32)(x)r0.w, r5.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r0.w
-add.f r1.z, r1.z, c9.z
-add.f r2.z, c12.y, (neg)r0.x
-add.f r3.z, c12.y, (neg)r0.x
-add.f r3.w, c12.y, (neg)r0.x
-mul.f r1.w, r1.z, r1.w
-mul.f r2.z, r2.z, c6.z
-mul.f r4.x, r3.z, c6.y
-mul.f r3.w, r3.w, c6.x
-mad.f32 r0.y, r1.w, r0.w, r0.y
-sam.s (f32)(x)r0.w, r6.z, s#2, t#2
-add.f r0.z, r0.z, c9.z
-(sy)mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r0.y, r0.y
-mul.f r2.w, r2.w, r0.z
-mul.f r0.z, r1.z, r0.z
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.z, r2.x
-mad.f32 r0.y, r2.w, r0.w, r0.y
-bary.f r0.w, 11, r1.x
-mov.f32f32 r1.w, c9.z
-bary.f r2.x, 2, r1.x
-mov.f32f32 r0.y, r0.y
-sam (f32)(w)r2.w, r3.y, s#1, t#1
-(sy)cmps.f.lt r2.w, r3.z, c11.y
-mad.f32 r0.y, r0.z, r2.y, r0.y
-mov.f32f32 r0.z, r3.z
-mov.f32f32 r3.x, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-cov.u32f32 r1.z, r2.w
-mov.f32f32 r0.z, r0.z
-(ss)mov.f32f32 r3.y, r0.w
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt1)nop
mul.f r0.y, c10.w, r0.y
-cmps.f.ne r0.w, r1.z, c9.y
+bary.f r2.y, 11, r1.x
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.z, c12.y, (neg)r0.x
+mov.f32f32 r1.w, r0.y
+add.f r2.z, c12.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, c9.y
-sam (f32)(xyz)r2.w, r3.x, s#0, t#0
-(sy)mul.f r2.x, r3.y, r2.x
+sam (f32)(xyz)r2.w, r2.x, s#0, t#0
+(ss)bary.f r2.x, 2, r1.x
bary.f r2.y, 1, r1.x
bary.f (ei)r1.x, 0, r1.x
-sel.b32 r0.z, r1.z, r0.w, r0.z
-mul.f r0.w, r2.x, r0.y
-mul.f r1.y, r3.x, r2.y
+mul.f r0.w, r0.w, c6.z
+(sy)mul.f r1.y, r3.y, r2.x
+mul.f r2.x, r3.x, r2.y
mul.f r1.x, r2.w, r1.x
-mov.f32f32 r2.y, r1.w
-mov.f32f32 r0.w, r0.w
-mul.f r1.y, r1.y, r0.y
-mad.f32 r0.w, c5.z, r3.y, r0.w
+mul.f r1.z, r1.z, c6.y
+mul.f r1.y, r1.y, r1.w
+mul.f r1.w, r2.x, r1.w
+mad.f32 r1.y, c5.z, r3.y, r1.y
+mad.f32 r1.w, c5.y, r3.x, r1.w
mul.f r0.y, r1.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, c5.y, r3.x, r1.x
-mul.f r0.w, r0.x, r0.w
+mul.f r1.x, r2.z, c6.x
+mul.f r1.y, r0.x, r1.y
+mul.f r1.w, r0.x, r1.w
mad.f32 r0.y, c5.x, r2.w, r0.y
-(rpt1)nop
-add.f r0.w, r0.w, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r0.w, r0.w, r0.z
-mul.f r1.x, r0.x, r1.x
+mov.f32f32 r2.x, c9.y
+add.f r0.w, r1.y, r0.w
+add.f r1.y, r1.w, r1.z
mul.f r0.x, r0.x, r0.y
-nop
-mul.f r0.y, r0.w, c4.z
-add.f r0.w, r1.x, r4.x
-add.f r0.x, r0.x, r3.w
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r0.z
-mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r0.w, c4.y
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r2.x, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
+sel.b32 r0.y, r2.x, r0.z, r4.w
+mov.f32f32 r2.w, c9.z
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
+mul.f r0.z, r0.w, r0.y
+mul.f r0.w, r1.y, r0.y
(rpt1)nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.x
+mul.f r2.z, r0.z, c4.z
+mul.f r2.y, r0.w, c4.y
+add.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
end
nop
nop
nop
-; FRAG: outputs: r1.z (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1)
-; FRAG: 194 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r1.z
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.z (5:11,cm=f,il=16,b=1)
+; FRAG: 129 instructions, 0 half, 7 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm
index 0e4d5ee..9c8ac11 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-61.asm
@@ -6,134 +6,99 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r6.y) out0
-@out(r6.z) out1
-@out(r6.w) out2
-@out(r7.x) out3
-@out(r5.y) out4
-@out(r5.z) out5
-@out(r5.w) out6
-@out(r6.x) out7
-@out(r2.w) out8
-@out(r3.x) out9
-@out(r3.y) out10
-@out(r3.z) out11
-@out(r3.w) out12
-@out(r4.x) out13
-@out(r4.y) out14
-@out(r4.z) out15
-(sy)(ss)mul.f r2.x, c11.x, r0.w
-mul.f r2.y, c11.x, r0.x
-mad.f32 r2.x, c12.x, r1.x, r2.x
-mad.f32 r2.y, c12.x, r0.y, r2.y
-mul.f r2.z, c11.z, r0.x
-mad.f32 r2.y, c13.x, r0.z, r2.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.z, c12.z, r0.y, r2.z
-mad.f32 r2.x, c13.x, r1.y, r2.x
-add.f r2.y, r2.y, c14.x
-mad.f32 r2.z, c13.z, r0.z, r2.z
-mul.f r2.w, c11.y, r0.w
-mov.f32f32 r2.x, r2.x
-mul.f r3.x, c7.w, r2.y
-mul.f r3.y, c7.z, r2.y
-mul.f r3.z, c7.y, r2.y
-mul.f r2.x, r2.x, (neg)c4.x
-mad.f32 r2.w, c12.y, r1.x, r2.w
-mul.f r3.w, c11.y, r0.x
-mul.f r4.x, c7.x, r2.y
-mad.f32 r3.w, c12.y, r0.y, r3.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r3.w, c13.y, r0.z, r3.w
-mad.f32 r2.w, c13.y, r1.y, r2.w
-mul.f r4.y, c0.w, r2.y
-mul.f r4.z, c0.z, r2.y
-mul.f r4.w, c0.y, r2.y
-mov.f32f32 r2.w, r2.w
-add.f r3.w, r3.w, c14.y
-mul.f r5.x, c0.x, r2.y
-add.f r2.z, r2.z, c14.z
-mad.f32 r2.x, (neg)c4.y, r2.w, r2.x
-mad.f32 r2.w, c8.w, r3.w, r3.x
-mad.f32 r3.x, c8.z, r3.w, r3.y
-mad.f32 r3.y, c8.y, r3.w, r3.z
-mov.f32f32 r2.x, r2.x
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r1.z, c11.x, r0.w
+mul.f r1.w, c11.x, r0.x
+mad.f32 r1.z, c12.x, r1.x, r1.z
+mad.f32 r1.w, c12.x, r0.y, r1.w
+mad.f32 r1.z, c13.x, r1.y, r1.z
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
+mul.f r1.z, r1.z, (neg)c4.x
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.w, c9.w, r2.z, r2.w
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r2.z, r3.x
-mad.f32 r3.x, c9.y, r2.z, r3.y
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.w, r4.x
-mad.f32 r0.w, c1.w, r3.w, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r2.z, r0.z
-mad.f32 r0.w, c2.w, r2.z, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r2.x
-mad.f32 r1.y, c10.w, r0.x, r2.w
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r2.x, c10.y, r0.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r2.x, r2.x, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r3.z, r1.y
-mov.f32f32 r3.y, r1.x
-mov.f32f32 r3.x, r2.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r1.x, c1.z, r3.w, r4.z
-mul.f r1.y, r0.y, c5.z
-mul.f r2.x, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.w, r1.y
-mov.f32f32 r5.z, r2.x
-mov.f32f32 r5.y, r0.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r7.x, r0.w
-mad.f32 r0.y, c2.z, r2.z, r1.x
-mad.f32 r0.z, c1.y, r3.w, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r0.z, c2.y, r2.z, r0.z
-mad.f32 r0.w, c1.x, r3.w, r5.x
-mad.f32 r1.x, c6.x, r2.z, c6.y
-mov.f32f32 r6.w, r0.y
-mad.f32 r0.y, c3.y, r0.x, r0.z
-mad.f32 r0.z, c2.x, r2.z, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r1.x, c6.x, r2.y, c6.y
-mov.f32f32 r6.z, r0.y
-mad.f32 r0.x, c3.x, r0.x, r0.z
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r0.y, r1.x
-nop
-mov.f32f32 r6.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r0.z, (0.000000)
-mov.f32f32 r4.z, r0.x
-nop
-mov.f32f32 r4.y, r0.y
-mov.f32f32 r6.x, r0.z
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
-; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 8 full
-; pos: r6.y
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm
index 60492a1..9e235d2 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-62.asm
@@ -4,710 +4,481 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-(sy)(ss)bary.f r0.x, 7, r1.x
-bary.f r0.y, 8, r1.x
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x43160000, 0x3bdb8bac
+@const(c15.x) 0x41000000, 0x3f600000, 0x3e000000, 0x3f233333
+@const(c16.x) 0x40000000, 0xbf800000, 0xbb449ba6, 0xbf000000
+@const(c17.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x3fb8aa65
+@const(c18.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)bary.f r0.x, 20, r1.x
+bary.f r0.y, 7, r1.x
+bary.f r1.z, 8, r1.x
add.f r0.w, r0.w, c14.y
-bary.f r1.z, 9, r1.x
-mov.f32f32 r0.x, r0.x
-bary.f r1.w, 20, r1.x
-add.f r2.x, r0.y, c16.w
-add.f r2.y, r1.z, c16.w
-bary.f r2.z, 18, r1.x
-mul.f r2.w, r1.w, r0.x
-bary.f r3.x, 15, r1.x
-floor.f r3.y, r2.x
+bary.f r1.w, 9, r1.x
+mul.f r2.x, r0.x, r0.y
+bary.f r2.y, 21, r1.x
+bary.f r2.z, 15, r1.x
+add.f r2.w, r1.z, c16.w
+add.f r3.y, r1.w, c16.w
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.z, r2.y
-mov.f32f32 r3.x, r3.x
-bary.f r3.w, 21, r1.x
-add.f r2.x, r2.x, (neg)r3.y
+mad.f32 r2.x, r2.y, r2.z, r2.x
+bary.f r3.z, 22, r1.x
+bary.f r3.w, 3, r1.x
+floor.f r4.x, r2.w
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.y, (neg)r3.z
-mad.f32 r2.y, r3.w, r3.x, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-absneg.f r2.w, (neg)c11.x
-mov.f32f32 r2.y, r2.y
-bary.f r3.y, 3, r1.x
-mul.f r3.z, c14.x, r2.x
-mul.f r2.w, r2.w, c11.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.y, r3.y
-bary.f r4.x, 22, r1.x
-mov.f32f32 r3.z, r3.z
-mul.f r2.w, r2.w, r0.z
-mul.f r4.y, c14.x, r0.w
-mad.f32 r2.y, r4.x, r3.y, r2.y
-add.f r0.y, r0.y, (neg)r3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r2.y, r2.y
-bary.f r4.y, 12, r1.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r2.w, r0.z
-add.f r1.z, r1.z, (neg)r3.z
-mul.f r2.w, r1.w, r4.y
-bary.f r3.z, 13, r1.x
-add.f r4.z, c17.y, r0.y
+(ss)absneg.f r0.w, (neg)c11.x
+mad.f32 r2.x, r3.z, r3.w, r2.x
+add.f r2.w, r2.w, (neg)r4.x
+floor.f r4.x, r3.y
+mul.f r0.w, r0.w, c11.x
+mov.f32f32 r4.y, r2.x
+bary.f r4.z, 12, r1.x
+mov.f32f32 r4.w, r2.w
+mul.f r0.w, r0.w, r0.z
mov.f32f32 r0.z, r0.z
-add.f r0.y, c17.x, r0.y
-mad.f32 r2.w, r3.w, r3.z, r2.w
-mov.f32f32 r4.z, r4.z
+mul.f r5.x, r0.x, r4.z
+bary.f r5.y, 13, r1.x
+mul.f r5.z, c14.x, r4.w
+mul.f r0.z, r0.w, r0.z
+add.f r0.w, r3.y, (neg)r4.x
+mad.f32 r3.y, r2.y, r5.y, r5.x
+bary.f r4.x, 14, r1.x
+add.f r1.z, r1.z, (neg)r5.z
mul.f r0.z, r0.z, c17.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-bary.f r4.w, 14, r1.x
-mul.f r4.z, r4.z, c5.z
-mov.f32f32 r0.z, r0.z
-mul.f r0.y, r0.y, c5.z
-mad.f32 r2.w, r4.x, r4.w, r2.w
-mov.f32f32 r5.x, r4.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.y, r0.y
-mul.f r5.z, r2.w, r2.w
-mov.f32f32 r5.w, r5.x
-mad.f32 r5.x, r2.y, r2.y, r5.z
-mov.f32f32 r1.z, r1.z
+mov.f32f32 r5.x, r0.w
+mad.f32 r3.y, r3.z, r4.x, r3.y
+mov.f32f32 r5.z, r1.z
+add.f r1.z, c17.x, r1.z
+mul.f r5.w, c14.x, r5.x
+mul.f r6.x, r3.y, r3.y
+add.f r5.z, c17.y, r5.z
+mad.f32 r2.x, r2.x, r4.y, r6.x
+bary.f r6.x, 4, r1.x
exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.x, r5.x
-bary.f r5.z, 4, r1.x
-add.f r6.x, c17.y, r1.z
-add.f r6.y, c19.y, (neg)r0.z
-mov.f32f32 r0.y, r0.y
-mul.f r6.z, r1.w, r5.z
-bary.f r6.w, 5, r1.x
-mov.f32f32 r6.x, r6.x
+(ss)mov.f32f32 r6.y, r0.z
+mul.f r6.z, r5.z, c5.z
+add.f r1.w, r1.w, (neg)r5.w
+mul.f r5.z, r0.x, r6.x
+bary.f r5.w, 5, r1.x
+mov.f32f32 r7.y, r6.z
+add.f r6.y, c19.y, (neg)r6.y
+mov.f32f32 r6.w, r1.w
+mad.f32 r5.z, r2.y, r5.w, r5.z
+bary.f r8.x, 6, r1.x
mul.f r6.y, r6.y, c11.y
-mul.f r0.z, r0.z, c17.x
-mad.f32 r6.z, r3.w, r6.w, r6.z
-mul.f r7.x, r6.x, c5.w
-mov.f32f32 r7.y, r0.y
-add.f r0.y, c17.x, r1.z
-mov.f32f32 r1.z, r6.z
-bary.f r6.z, 6, r1.x
-mov.f32f32 r6.x, r7.x
+(ss)mul.f r0.z, r0.z, c17.x
+add.f r6.w, c17.y, r6.w
+mad.f32 r5.z, r3.z, r8.x, r5.z
+mul.f r8.y, r1.z, c5.z
+add.f r1.z, c17.x, r1.w
add.f r0.z, r0.z, r6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, r4.x, r6.z, r1.z
-mov.f32f32 r6.x, r6.x
-bary.f r6.y, 10, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.z, r1.z
-mul.f r0.y, r0.y, c5.w
-add.f r8.x, r6.y, c16.z
-mov.f32f32 r8.y, r5.y
-mad.f32 r5.x, r1.z, r1.z, r5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.y, r8.x
-mov.f32f32 r7.z, r0.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r9.x, r4.z
-mov.f32f32 r6.y, r5.y
-rsq r4.z, r5.x
-(ss)mov.f32f32 r4.z, r4.z
+mov.f32f32 r1.w, r5.z
+mul.f r9.y, r6.w, c5.w
+mov.f32f32 r9.x, r8.y
+mul.f r8.z, r1.z, c5.w
+mad.f32 r1.z, r5.z, r1.w, r2.x
+mov.f32f32 r7.z, r9.y
max.f r0.z, r0.z, c14.y
-(ss)mov.f32f32 r5.x, r7.z
-mov.f32f32 r8.z, r7.x
-mul.f r1.z, r1.z, r4.z
+bary.f r2.x, 10, r1.x
+mov.f32f32 r6.w, r8.z
+add.f r4.w, c16.x, (neg)r4.w
+mul.f r0.x, r0.x, r0.x
+rsq r1.z, r1.z
+(ss)mov.f32f32 r5.z, r1.z
+add.f r8.w, r2.x, c16.z
min.f r0.z, r0.z, c17.x
-sam.s (f32)(x)r5.y, r5.w, s#4, t#4
-(sy)mov.f32f32 r5.y, r5.y
-mov.f32f32 r7.z, r5.x
-mov.f32f32 r1.z, r1.z
-add.f r5.x, c19.y, (neg)r0.z
-(ss)add.f r5.w, c19.y, (neg)r0.z
-add.f r6.x, c19.y, (neg)r0.z
-mul.f r1.z, r1.z, c15.x
-mov.f32f32 r5.y, r5.y
-add.f r6.y, c16.x, (neg)r2.x
-mul.f r5.x, r5.x, c10.z
-mov.f32f32 r1.z, r1.z
-mul.f r5.w, r5.w, c10.y
-mul.f r6.x, r6.x, c10.x
-mov.f32f32 r6.y, r6.y
-add.f r7.x, c16.x, (neg)r0.w
-mov.f32f32 r7.w, r8.x
-mul.f r2.y, r2.y, r4.z
-rcp r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mul.f r2.w, r2.w, r4.z
-mov.f32f32 r4.z, r7.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r2.w, r2.w
-mul.f r7.x, r6.y, r4.z
-mov.f32f32 r2.y, r2.y
-mul.f r1.w, r1.w, r1.w
-absneg.f r2.w, (neg)r2.w
-mad.f32 r1.w, r3.w, r3.w, r1.w
-mul.f r3.w, r7.x, r5.y
-mov.f32f32 r5.y, r8.x
-sam.s (f32)(x)r7.x, r7.y, s#4, t#4
-(sy)mov.f32f32 r7.x, r7.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r4.x, r4.x, r1.w
-mov.f32f32 r8.w, r5.y
-mov.f32f32 r4.x, r2.z
-add.f r2.x, r2.x, c17.x
+(ss)mul.f r1.z, r3.y, r1.z
+mul.f r1.w, r1.w, r5.z
+mov.f32f32 r7.w, r8.w
+add.f r2.x, c19.y, (neg)r0.z
+add.f r3.y, c19.y, (neg)r0.z
+mul.f r1.w, r1.w, c15.x
+add.f r6.y, c19.y, (neg)r0.z
+mov.f32f32 r9.z, r8.w
+mov.f32f32 r7.x, r8.w
+nop
+sam.s (f32)(x)r9.w, r7.y, s#4, t#4
+(ss)mul.f r7.y, r2.x, c10.z
+mov.f32f32 r2.x, r4.w
+rcp r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+mul.f r4.y, r4.y, r5.z
+mad.f32 r0.x, r2.y, r2.y, r0.x
+add.f r2.y, c16.x, (neg)r5.x
+mad.f32 r0.x, r3.z, r3.z, r0.x
+mul.f r3.y, r3.y, c10.y
+mul.f r3.z, r6.y, c10.x
+mov.f32f32 r5.x, r2.y
+absneg.f r1.z, (neg)r1.z
+sam.s (f32)(x)r10.x, r9.x, s#4, t#4
+sam.s (f32)(x)r6.y, r6.z, s#4, t#4
+sam.s (f32)(x)r8.y, r8.y, s#4, t#4
+add.f r2.w, r2.w, c17.x
add.f r0.w, r0.w, c17.x
-bary.f r5.y, 16, r1.x
-(ss)mov.f32f32 r7.y, r4.x
-sqrt r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-sam.s (f32)(x)r4.x, r8.y, s#4, t#4
-(sy)mov.f32f32 r4.x, r4.x
-mul.f r4.z, r2.x, r4.z
-mov.f32f32 r9.y, r0.y
-add.f r0.y, c14.z, (neg)r1.w
-mov.f32f32 r1.w, r8.x
-bary.f r7.w, 19, r1.x
-mul.f r6.y, r6.y, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r3.w, r4.z, r4.x, r3.w
-mov.f32f32 r9.z, r1.w
-mov.f32f32 r1.w, r7.w
-mul.f r0.y, c12.z, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r5.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r0.y, r0.y
-sam.s (f32)(x)r1.w, r9.x, s#4, t#4
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r8.x, r4.x
-bary.f r4.x, 17, r1.x
-mul.f r0.y, r0.y, c14.w
-mad.f32 r1.w, r6.y, r1.w, r3.w
-(ss)nop
-sam (f32)(w)r8.y, r7.y, s#2, t#2
-(sy)cmps.f.lt r3.w, r9.x, c15.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r2.x, r0.w
-cov.u32f32 r2.x, r3.w
-mul.f r2.w, r2.w, r0.y
-mul.f r0.y, r2.y, r0.y
-mad.f32 r0.w, r0.w, r7.x, r1.w
-cmps.f.ne r1.w, r2.x, c14.y
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, c15.z
-mul.f r2.x, r2.x, r1.z
-mul.f r0.y, r0.y, r1.z
+sqrt r0.x, r0.x
+(ss)add.f r0.x, c14.z, (neg)r0.x
+mul.f r2.x, r2.x, r5.x
+mul.f r2.y, r2.w, r2.y
+mul.f r4.w, r4.w, r0.w
+mul.f r0.x, c12.z, r0.x
+(sy)mul.f r2.x, r2.x, r9.w
+bary.f r6.z, 18, r1.x
+mul.f r0.w, r2.w, r0.w
+mul.f r0.x, r0.x, c14.w
+mad.f32 r2.x, r2.y, r10.x, r2.x
+bary.f r6.w, 19, r1.x
+bary.f r8.z, 16, r1.x
+mov.f32f32 r2.y, r0.x
+mad.f32 r2.x, r4.w, r6.y, r2.x
+mul.f r0.x, r1.z, r0.x
+mad.f32 r0.w, r0.w, r8.y, r2.x
+mul.f r1.z, r4.y, r2.y
+sam (f32)(w)r9.x, r6.z, s#2, t#2
+bary.f r8.w, 17, r1.x
+mul.f r0.x, r0.x, r1.w
+(sy)cmps.f.lt r1.w, r9.w, c15.y
+mul.f r1.z, r1.z, r7.z
mul.f r0.w, c17.z, r0.w
-mov.f32f32 r1.z, c14.y
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r2.x, r0.x
+cov.u32f32 r1.w, r1.w
+mov.f32f32 r2.y, r1.z
+cmps.f.lt r2.w, r9.w, c15.y
+mov.f32f32 r4.y, r0.w
+cmps.f.ne r1.w, r1.w, c14.y
+mov.f32f32 r4.w, c14.y
+mov.f32f32 r5.x, c14.y
+cov.u32f32 r2.w, r2.w
+sam (f32)(w)r8.y, r8.z, s#1, t#1
+(sy)cmps.f.lt r5.z, r9.x, c18.x
+sel.b32 r1.z, r1.z, r1.w, r4.w
+sel.b32 r0.x, r0.x, r1.w, r5.x
+(rpt1)nop
+add.f r1.w, r6.w, r1.z
+add.f r1.z, r6.z, r0.x
+cmps.f.ne r0.x, r2.w, c14.y
+cov.u32f32 r2.w, r5.z
+mov.f32f32 r4.w, c15.z
+mov.f32f32 r5.x, c14.y
mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-sel.b32 r1.z, r2.y, r1.w, r1.z
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r1.w, r1.w
-cmps.f.lt r4.z, r9.x, c15.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
+mov.f32f32 r5.z, r2.z
+sam (f32)(w)r6.y, r1.z, s#2, t#2
+cmps.f.ne r2.z, r2.w, c14.y
+sel.b32 r0.x, r4.w, r0.x, r5.x
+mov.f32f32 r2.w, c14.y
mov.f32f32 r3.w, r3.w
-cov.u32f32 r4.z, r4.z
-mov.f32f32 r5.y, r2.x
-mov.f32f32 r6.y, r0.y
-mov.f32f32 r7.x, r2.x
-cmps.f.ne r4.z, r4.z, c14.y
-(ss)mov.f32f32 r7.y, c14.y
-mov.f32f32 r7.z, c14.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.y, r6.y
-sel.b32 r1.w, r1.w, r4.z, r7.y
-sel.b32 r2.y, r2.y, r4.z, r7.z
-mov.f32f32 r4.z, r7.x
-mov.f32f32 r7.x, r0.y
-add.f r1.w, r2.z, r1.w
-add.f r2.y, r7.w, r2.y
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r7.y, r0.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r7.w, r2.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r8.y, r7.z
-mov.f32f32 r8.z, r7.w
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r7.z, r2.x
-mov.f32f32 r7.w, r0.y
-mov.f32f32 r8.w, r2.x
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r9.y, r2.x
-sam (f32)(w)r9.z, r8.y, s#2, t#2
-add.f r1.z, c15.y, (neg)r1.z
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r7.w, r7.w
-(ss)mov.f32f32 r8.z, r8.w
-(sy)cmps.f.lt r8.y, r10.y, r1.z
-cmps.f.lt r8.w, r10.y, r1.z
-mov.f32f32 r9.x, r9.x
-mov.f32f32 r9.y, r9.y
+mov.f32f32 r4.w, (0.000000)
+add.f r0.x, c15.y, (neg)r0.x
+sel.b32 r5.x, r2.w, r2.z, r9.x
+bary.f r6.y, 2, r1.x
+(ss)bary.f r6.z, 1, r1.x
+(sy)cmps.f.lt r2.z, r7.x, r0.x
+cmps.f.lt r2.w, r7.x, r0.x
+bary.f r6.w, 23, r1.x
+bary.f r7.x, 24, r1.x
+cov.u32f32 r2.z, r2.z
+cov.u32f32 r2.w, r2.w
+bary.f r7.z, 25, r1.x
+bary.f (ei)r1.x, 0, r1.x
+cmps.f.ne r1.y, r2.z, c14.y
+mov.f32f32 r2.z, c14.y
+cmps.f.ne r2.w, r2.w, c14.y
+mov.f32f32 r7.w, c14.y
+mov.f32f32 r8.y, c15.z
+sel.b32 r2.z, r2.x, r1.y, r2.z
+mov.f32f32 r8.z, c14.y
+sel.b32 r1.y, r2.y, r1.y, r7.w
+mov.f32f32 r7.w, c14.y
+add.f r1.z, r1.z, r2.z
+sel.b32 r2.z, r8.y, r2.w, r8.z
+add.f r8.z, r1.w, r1.y
+mov.f32f32 r1.y, c14.y
+mov.f32f32 r8.y, r1.z
+add.f r0.x, r0.x, (neg)r2.z
+mov.f32f32 r1.w, r8.z
+mov.f32f32 r2.z, c14.y
+mov.f32f32 r2.w, c15.z
+mov.f32f32 r8.w, c14.y
+mov.f32f32 r9.x, c14.y
+sam (f32)(w)r9.y, r8.y, s#2, t#2
+(sy)(ss)cmps.f.lt r8.y, r10.x, r0.x
+mov.f32f32 r0.x, r0.x
+mov.f32f32 r8.z, c14.y
+mov.f32f32 r9.y, c15.z
cov.u32f32 r8.y, r8.y
-cov.u32f32 r8.w, r8.w
-mov.f32f32 r9.z, r2.x
-nop
-mov.f32f32 r8.y, r8.y
-cmps.f.ne r8.w, r8.w, c14.y
-mov.f32f32 r9.w, c15.z
+cmps.f.lt r9.z, r10.x, r0.x
+mov.f32f32 r9.w, c14.y
mov.f32f32 r10.x, c14.y
-cmps.f.ne r10.y, r8.y, c14.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r10.z, c14.y
-mov.f32f32 r10.w, c14.y
-sel.b32 r8.w, r9.w, r8.w, r10.x
-mov.f32f32 r8.y, r4.x
-sel.b32 r4.x, r9.z, r10.y, r10.z
-sel.b32 r9.x, r9.x, r10.y, r10.w
-add.f r1.z, r1.z, (neg)r8.w
-bary.f r8.w, 23, r1.x
-add.f r1.w, r1.w, r4.x
-add.f r2.y, r2.y, r9.x
-mov.f32f32 r1.z, r1.z
-sam (f32)(w)r9.z, r8.x, s#1, t#1
-(sy)cmps.f.lt r4.x, r10.y, c18.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(ss)mov.f32f32 r8.x, r10.y
-cov.u32f32 r4.x, r4.x
-mov.f32f32 r8.y, r1.w
-mov.f32f32 r9.x, r2.y
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r9.z, r8.y
-mov.f32f32 r9.w, r9.x
-cmps.f.ne r4.x, r4.x, c14.y
-(rpt1)nop
-mov.f32f32 r8.y, c14.y
-bary.f r9.x, 24, r1.x
-bary.f r10.x, 25, r1.x
-sam (f32)(w)r10.y, r9.z, s#2, t#2
-(sy)(ss)cmps.f.lt r9.z, r11.x, r1.z
-cmps.f.lt r9.w, r11.x, r1.z
-sel.b32 r4.x, r8.y, r4.x, r8.x
-mov.f32f32 r8.x, r9.x
-cov.u32f32 r8.y, r9.z
-cov.u32f32 r9.x, r9.w
-mov.f32f32 r9.z, r10.x
-mov.f32f32 r9.w, (0.000000)
-mov.f32f32 r8.y, r8.y
-cmps.f.ne r9.x, r9.x, c14.y
-mov.f32f32 r10.x, c15.z
-mov.f32f32 r10.y, c14.y
cmps.f.ne r8.y, r8.y, c14.y
+mov.f32f32 r10.y, c14.y
+cov.u32f32 r9.z, r9.z
mov.f32f32 r10.z, c14.y
mov.f32f32 r10.w, c14.y
-sel.b32 r9.x, r10.x, r9.x, r10.y
-bary.f r10.x, 2, r1.x
-sel.b32 r9.y, r9.y, r8.y, r10.z
-sel.b32 r7.w, r7.w, r8.y, r10.w
-add.f r1.z, r1.z, (neg)r9.x
-bary.f r8.y, 1, r1.x
-add.f r1.w, r1.w, r9.y
-add.f r2.y, r2.y, r7.w
-mov.f32f32 r1.z, r1.z
-bary.f (ei)r1.x, 0, r1.x
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r1.w, r2.y
-mov.f32f32 r2.y, c14.y
-mov.f32f32 r7.w, c14.y
-mov.f32f32 r9.x, r1.y
-mov.f32f32 r9.y, r1.w
-mov.f32f32 r10.y, c14.y
+sel.b32 r10.y, r2.x, r8.y, r10.y
+cmps.f.ne r9.z, r9.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r10.z
mov.f32f32 r10.z, c15.z
-mov.f32f32 r10.w, r9.x
-mov.f32f32 r11.x, r9.y
-mov.f32f32 r9.x, c14.y
-mov.f32f32 r9.y, c14.y
-mov.f32f32 r11.y, c14.y
-mov.f32f32 r11.z, c15.z
-mov.f32f32 r11.w, c14.y
-mov.f32f32 r12.x, c14.y
-sam (f32)(w)r12.y, r10.w, s#2, t#2
-(sy)(ss)cmps.f.lt r10.w, r13.x, r1.z
-cmps.f.lt r11.x, r13.x, r1.z
-mov.f32f32 r12.y, c14.y
-mov.f32f32 r12.z, c15.z
-cov.u32f32 r10.w, r10.w
-cov.u32f32 r11.x, r11.x
-mov.f32f32 r12.w, c14.y
-mov.f32f32 r13.x, c14.y
-mov.f32f32 r10.w, r10.w
-cmps.f.ne r11.x, r11.x, c14.y
-mov.f32f32 r13.y, c15.z
-mov.f32f32 r13.z, c14.y
-cmps.f.ne r10.w, r10.w, c14.y
-mov.f32f32 r13.w, c14.y
-mov.f32f32 r14.x, c14.y
-sel.b32 r11.x, r13.y, r11.x, r13.z
-nop
-sel.b32 r8.z, r8.z, r10.w, r13.w
-sel.b32 r7.y, r7.y, r10.w, r14.x
-add.f r1.z, r1.z, (neg)r11.x
-nop
-add.f r1.y, r1.y, r8.z
-add.f r1.w, r1.w, r7.y
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r10.y
+mov.f32f32 r10.y, c14.y
+add.f r11.y, r1.w, r8.y
+mov.f32f32 r1.w, c15.z
+mov.f32f32 r11.x, r1.z
+sel.b32 r8.y, r10.z, r9.z, r10.y
+mov.f32f32 r9.z, r11.y
+mov.f32f32 r10.y, c14.y
+mov.f32f32 r10.z, c14.y
+mov.f32f32 r11.z, c14.y
+mov.f32f32 r11.w, c15.z
+sam (f32)(w)r12.x, r11.x, s#2, t#2
+add.f r0.x, r0.x, (neg)r8.y
+mov.f32f32 r8.y, c14.y
+(ss)mov.f32f32 r11.x, c14.y
nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-(rpt1)nop
-mov.f32f32 r7.y, r1.y
-mov.f32f32 r8.z, r1.w
-(rpt1)nop
-mov.f32f32 r10.w, r7.y
-mov.f32f32 r11.x, r8.z
-(rpt5)nop
-sam (f32)(w)r13.y, r10.w, s#2, t#2
-(sy)cmps.f.lt r7.y, r14.x, r1.z
-cmps.f.lt r8.z, r14.x, r1.z
+(sy)cmps.f.lt r11.y, r12.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-cov.u32f32 r7.y, r7.y
-cov.u32f32 r8.z, r8.z
+cov.u32f32 r11.y, r11.y
+cmps.f.lt r12.x, r12.w, r0.x
(rpt1)nop
-mov.f32f32 r7.y, r7.y
-cmps.f.ne r8.z, r8.z, c14.y
+cmps.f.ne r11.y, r11.y, c14.y
+cov.u32f32 r12.x, r12.x
(rpt1)nop
-cmps.f.ne r7.y, r7.y, c14.y
-sel.b32 r8.z, r12.z, r8.z, r12.y
-(rpt1)nop
-sel.b32 r7.z, r7.z, r7.y, r13.x
-sel.b32 r7.x, r7.x, r7.y, r12.w
-add.f r1.z, r1.z, (neg)r8.z
+sel.b32 r11.x, r2.x, r11.y, r11.x
+cmps.f.ne r12.x, r12.x, c14.y
+sel.b32 r8.y, r2.y, r11.y, r8.y
nop
-add.f r1.y, r1.y, r7.z
-add.f r1.w, r1.w, r7.x
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r11.x
+sel.b32 r11.x, r11.w, r12.x, r11.z
+add.f r11.z, r9.z, r8.y
nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-(rpt1)nop
-mov.f32f32 r7.x, r1.y
-mov.f32f32 r7.y, r1.w
-(rpt1)nop
-(ss)mov.f32f32 r10.w, r7.x
-mov.f32f32 r11.x, r7.y
-(rpt5)nop
-sam (f32)(w)r12.y, r10.w, s#2, t#2
-(sy)cmps.f.lt r7.x, r13.x, r1.z
-cmps.f.lt r7.y, r13.x, r1.z
-(rpt1)nop
-cov.u32f32 r7.x, r7.x
-cov.u32f32 r7.y, r7.y
+mov.f32f32 r11.y, r1.z
+add.f r0.x, r0.x, (neg)r11.x
+mov.f32f32 r8.y, r11.z
+(rpt3)nop
+sam (f32)(w)r11.x, r11.y, s#2, t#2
+(sy)cmps.f.lt r9.z, r11.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r7.x, r7.x
-cmps.f.ne r7.y, r7.y, c14.y
+cov.u32f32 r9.z, r9.z
+cmps.f.lt r11.x, r11.w, r0.x
(rpt1)nop
-cmps.f.ne r7.x, r7.x, c14.y
-sel.b32 r7.y, r11.z, r7.y, r11.y
+cmps.f.ne r9.z, r9.z, c14.y
+cov.u32f32 r11.x, r11.x
(rpt1)nop
-sel.b32 r2.z, r2.z, r7.x, r12.x
-sel.b32 r6.y, r6.y, r7.x, r11.w
-add.f r1.z, r1.z, (neg)r7.y
+sel.b32 r10.z, r2.x, r9.z, r10.z
+cmps.f.ne r11.x, r11.x, c14.y
+sel.b32 r9.z, r2.y, r9.z, r10.y
nop
-add.f r1.y, r1.y, r2.z
-add.f r1.w, r1.w, r6.y
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r10.z
+sel.b32 r1.w, r1.w, r11.x, r10.w
+add.f r10.z, r8.y, r9.z
nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
+mov.f32f32 r10.y, r1.z
+add.f r0.x, r0.x, (neg)r1.w
+mov.f32f32 r1.w, r10.z
+(rpt3)nop
+sam (f32)(w)r10.y, r10.y, s#2, t#2
+(sy)cmps.f.lt r8.y, r11.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r2.z, r1.y
-mov.f32f32 r6.y, r1.w
+cov.u32f32 r8.y, r8.y
+cmps.f.lt r9.z, r11.x, r0.x
(rpt1)nop
-mov.f32f32 r7.x, r2.z
-mov.f32f32 r7.y, r6.y
-(rpt5)nop
-sam (f32)(w)r10.w, r7.x, s#2, t#2
-(sy)cmps.f.lt r2.z, r11.z, r1.z
-cmps.f.lt r6.y, r11.z, r1.z
+cmps.f.ne r8.y, r8.y, c14.y
+cov.u32f32 r9.z, r9.z
(rpt1)nop
-cov.u32f32 r2.z, r2.z
-cov.u32f32 r6.y, r6.y
+sel.b32 r10.x, r2.x, r8.y, r10.x
+cmps.f.ne r9.z, r9.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r9.w
+nop
+add.f r1.z, r1.z, r10.x
+sel.b32 r8.z, r9.y, r9.z, r8.z
+add.f r9.z, r1.w, r8.y
+nop
+mov.f32f32 r9.y, r1.z
+add.f r0.x, r0.x, (neg)r8.z
+mov.f32f32 r1.w, r9.z
+(rpt3)nop
+sam (f32)(w)r9.y, r9.y, s#2, t#2
+(sy)cmps.f.lt r8.y, r10.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r2.z, r2.z
-cmps.f.ne r6.y, r6.y, c14.y
+cov.u32f32 r8.y, r8.y
+cmps.f.lt r8.z, r10.x, r0.x
(rpt1)nop
-cmps.f.ne r2.z, r2.z, c14.y
-sel.b32 r6.y, r10.z, r6.y, r10.y
+cmps.f.ne r8.y, r8.y, c14.y
+cov.u32f32 r8.z, r8.z
(rpt1)nop
-sel.b32 r4.z, r4.z, r2.z, r9.y
-sel.b32 r2.z, r3.w, r2.z, r9.x
-add.f r1.z, r1.z, (neg)r6.y
+sel.b32 r9.x, r2.x, r8.y, r9.x
+cmps.f.ne r8.z, r8.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r8.w
nop
-add.f r1.y, r1.y, r4.z
-add.f r1.w, r1.w, r2.z
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r9.x
+sel.b32 r2.z, r2.w, r8.z, r2.z
+add.f r8.z, r1.w, r8.y
nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-(rpt1)nop
-mov.f32f32 r2.z, r1.y
-mov.f32f32 r3.w, r1.w
+mov.f32f32 r8.y, r1.z
+add.f r0.x, r0.x, (neg)r2.z
+mov.f32f32 r1.w, r8.z
+(rpt3)nop
+sam (f32)(w)r8.y, r8.y, s#2, t#2
+(sy)cmps.f.lt r2.z, r9.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-(ss)mov.f32f32 r7.x, r2.z
-mov.f32f32 r7.y, r3.w
-(rpt5)nop
-sam (f32)(w)r10.y, r7.x, s#2, t#2
-(sy)cmps.f.lt r2.z, r11.x, r1.z
-(rpt2)nop
cov.u32f32 r2.z, r2.z
(rpt2)nop
cmps.f.ne r2.z, r2.z, c14.y
(rpt2)nop
-sel.b32 r3.w, r5.y, r2.z, r7.w
-sel.b32 r2.y, r2.w, r2.z, r2.y
+sel.b32 r1.y, r2.x, r2.z, r1.y
+sel.b32 r2.z, r2.y, r2.z, r7.w
(rpt1)nop
-add.f r1.y, r1.y, r3.w
-add.f r1.w, r1.w, r2.y
+add.f r1.y, r1.z, r1.y
+add.f r1.w, r1.w, r2.z
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
+mov.f32f32 r1.z, r1.y
+mov.f32f32 r2.z, r1.w
(rpt1)nop
-add.f r2.y, r1.y, (neg)r2.x
-mov.f32f32 r2.z, r1.y
-add.f r2.w, r1.w, (neg)r0.y
-mov.f32f32 r3.w, r1.w
-mov.f32f32 r2.y, r2.y
-(ss)mov.f32f32 r7.x, r2.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r7.y, r3.w
-mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mov.f32f32 r7.z, r2.y
-mov.f32f32 r2.y, r2.z
-sam (f32)(w)r10.y, r7.x, s#2, t#2
-(sy)add.f r2.z, c15.z, r11.x
-add.f r1.z, r11.x, (neg)r1.z
-nop
-mov.f32f32 r7.w, r2.y
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r1.z, r1.z
-(rpt3)nop
-sam (f32)(w)r7.x, r7.z, s#2, t#2
-(sy)add.f r2.y, r2.y, (neg)r7.w
-(rpt2)nop
-mov.f32f32 r2.y, r2.y
+(ss)add.f r8.y, r1.z, (neg)r2.x
+add.f r8.z, r2.z, (neg)r2.y
+(rpt1)nop
+sam (f32)(w)r8.w, r1.z, s#2, t#2
+(sy)(ss)add.f r1.z, c15.z, r9.z
+add.f r0.x, r9.z, (neg)r0.x
+(rpt1)nop
+sam (f32)(w)r8.y, r8.y, s#2, t#2
+(sy)add.f r1.z, r1.z, (neg)r9.x
(rpt5)nop
-rcp r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mul.f r1.z, r1.z, r2.y
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
+rcp r1.z, r1.z
+(ss)mul.f r0.x, r0.x, r1.z
(rpt2)nop
-mul.f r2.x, r2.x, r1.z
-mul.f r0.y, r0.y, r1.z
-(rpt1)nop
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r0.y, r0.y
+(ss)mov.f32f32 r1.z, r0.x
+mul.f r0.x, r2.x, r0.x
(rpt1)nop
-add.f r1.y, r1.y, (neg)r1.z
-add.f r0.y, r1.w, (neg)r0.y
+mul.f r1.z, r2.y, r1.z
+add.f r1.w, r1.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
+add.f r2.x, r2.z, (neg)r1.z
+mov.f32f32 r1.y, r1.w
(rpt1)nop
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r0.y
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.z, r1.z
mov.f32f32 r1.z, r2.x
-mov.f32f32 r7.y, r0.y
-nop
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r2.x, r1.z
+mov.f32f32 r8.y, r1.y
nop
-sam (f32)(xyz)r10.y, r2.y, s#2, t#2
-(sy)mad.f32 r0.y, c16.x, r10.z, c16.y
-mad.f32 r1.y, c16.x, r10.y, c16.y
-sam (f32)(xyzw)r7.x, r7.x, s#0, t#0
-(sy)cmps.f.lt r1.z, r7.w, c15.w
-(ss)mov.f32f32 r2.y, r7.w
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyz)r11.x, r1.w, s#3, t#3
-(sy)(ss)mul.f r1.w, c8.y, r11.y
-mul.f r2.x, c8.x, r11.x
-cov.u32f32 r1.z, r1.z
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r1.y, r1.y
-mul.f r2.z, c8.z, r11.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mul.f r2.w, r4.y, r1.y
-mul.f r3.z, r3.z, r1.y
-mad.f32 r0.x, r0.x, r0.y, r2.w
-mad.f32 r2.w, r3.x, r0.y, r3.z
-mul.f r1.y, r4.w, r1.y
-cmps.f.ne p0.x, r1.z, r9.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c16.x, r10.w, c16.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.y, r3.y, r0.y, r1.y
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r1.z, r1.z
+sam (f32)(xyzw)r2.x, r1.w, s#0, t#0
+(sy)cmps.f.lt r0.x, r2.w, c15.w
+mov.f32f32 r8.z, r1.z
+(rpt1)nop
+sam (f32)(xyz)r8.w, r1.y, s#2, t#2
+(sy)(ss)mad.f32 r1.y, c16.x, r8.w, c16.y
+cov.u32f32 r0.x, r0.x
+mad.f32 r1.z, c16.x, r9.x, c16.y
+mad.f32 r1.w, c16.x, r9.y, c16.y
+mov.f32f32 r7.w, r1.y
+cmps.f.ne p0.x, r0.x, r4.w
+absneg.f r0.x, (neg)r1.z
+mul.f r1.y, r4.x, r1.y
+mul.f r1.z, r4.z, r7.w
+mul.f r4.x, r5.y, r7.w
+mad.f32 r0.y, r0.y, r0.x, r1.z
+mov.f32f32 r1.z, r1.w
+mad.f32 r4.x, r5.z, r0.x, r4.x
+mad.f32 r0.x, r3.w, r0.x, r1.y
kill p0.x
-mov.f32f32 r3.w, r2.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, r5.z, r1.z, r0.x
-mad.f32 r2.y, r6.w, r1.z, r2.w
-mad.f32 r0.y, r6.z, r1.z, r0.y
+mad.f32 r0.y, r6.x, r1.z, r0.y
+mad.f32 r1.y, r5.w, r1.z, r4.x
+mad.f32 r0.x, r8.x, r1.w, r0.x
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r2.y, r0.x, r0.x
-nop
-mad.f32 r2.y, r1.z, r1.z, r2.y
-(rpt2)nop
-mov.f32f32 r2.y, r2.y
-nop
-mad.f32 r2.y, r0.y, r0.y, r2.y
+mov.f32f32 r1.z, r0.y
+mov.f32f32 r1.w, r1.y
+mov.f32f32 r3.w, r0.x
+sam (f32)(xyz)r5.y, r8.y, s#3, t#3
+(sy)mul.f r4.x, c8.z, r5.w
+mul.f r0.y, r0.y, r1.z
+mul.f r4.z, c8.y, r5.z
+mad.f32 r0.y, r1.y, r1.w, r0.y
+mul.f r1.y, c8.x, r5.y
+mad.f32 r0.y, r3.w, r3.w, r0.y
(rpt5)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mul.f r0.x, r0.x, r2.y
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r2.y, r0.x, r0.x
-mul.f r2.w, (neg)c9.x, r0.x
-mad.f32 r2.y, r1.z, r1.z, r2.y
-mad.f32 r2.w, (neg)c9.y, r1.z, r2.w
-(rpt1)nop
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.y, r0.y, r0.y, r2.y
-mad.f32 r2.w, (neg)c9.z, r0.y, r2.w
-(rpt4)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-max.f r2.w, r2.w, c14.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r3.w, r0.y
+mul.f r0.x, r0.x, r0.y
(rpt1)nop
-mul.f r0.x, r0.x, r2.y
-mov.f32f32 r2.w, r2.w
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.y, c8.z, r2.w, (neg)r10.x
-mad.f32 r4.y, c8.y, r2.w, (neg)r8.y
-mad.f32 r2.w, c8.x, r2.w, (neg)r1.x
-mul.f r0.x, r0.x, r8.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.x, r1.z, r8.x, r0.x
-mad.f32 r1.z, c12.x, r2.y, r10.x
-mad.f32 r2.y, c12.x, r4.y, r8.y
-mad.f32 r1.x, c12.x, r2.w, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.x, r0.y, r9.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-max.f r0.x, c14.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.y, r1.z, r3.w
+mul.f r1.z, r1.w, r3.w
+mov.f32f32 r1.w, r0.x
+nop
+mov.f32f32 r3.w, r0.y
+mul.f r0.y, (neg)c9.x, r0.y
+mov.f32f32 r4.w, r1.z
+nop
+mul.f r5.y, r3.w, r3.w
+mad.f32 r0.y, (neg)c9.y, r1.z, r0.y
+mad.f32 r1.z, r4.w, r4.w, r5.y
+mad.f32 r0.x, (neg)c9.z, r0.x, r0.y
+mad.f32 r0.y, r1.w, r1.w, r1.z
(rpt5)nop
-log2 r0.x, r0.x
-(ss)mul.f r0.x, c12.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+rsq r0.y, r0.y
+(ss)mov.f32f32 r1.z, r0.y
+max.f r0.x, r0.x, c14.y
+(ss)mul.f r0.y, r1.w, r0.y
+nop
+mul.f r1.w, r3.w, r1.z
+mov.f32f32 r3.w, r0.x
+mul.f r1.z, r4.w, r1.z
+mad.f32 r0.x, c8.x, r0.x, (neg)r1.x
+mul.f r1.w, r1.w, r6.w
+mad.f32 r4.w, c8.z, r3.w, (neg)r6.y
+mad.f32 r1.z, r1.z, r7.x, r1.w
+mad.f32 r1.w, c8.y, r3.w, (neg)r6.z
+mad.f32 r0.y, r0.y, r7.z, r1.z
+mad.f32 r1.z, c12.x, r4.w, r6.y
+mad.f32 r0.x, c12.x, r0.x, r1.x
+nop
+max.f r0.y, c14.y, r0.y
+mad.f32 r1.x, c12.x, r1.w, r6.z
+(rpt4)nop
+log2 r0.y, r0.y
+(ss)mul.f r0.y, c12.y, r0.y
(rpt5)nop
-exp2 r0.x, r0.x
-(ss)mul.f r0.y, r1.y, r0.x
-mul.f r1.y, r1.w, r0.x
-mad.f32 r0.y, r7.z, r1.z, r0.y
-mad.f32 r1.y, r7.y, r2.y, r1.y
-(ss)mul.f r0.x, r2.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, r7.x, r1.x, r0.x
-nop
-mul.f r0.y, r0.y, r0.w
-mul.f r1.x, r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.y, c7.z, r7.z, r0.y
-mad.f32 r1.x, c7.y, r7.y, r1.x
+exp2 r0.y, r0.y
+(ss)mul.f r1.w, r4.x, r0.y
+mul.f r3.w, r4.z, r0.y
+mad.f32 r1.z, r2.z, r1.z, r1.w
+mad.f32 r1.x, r2.y, r1.x, r3.w
+(ss)mul.f r0.y, r1.y, r0.y
+nop
+mul.f r1.y, r1.z, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r1.y, c7.z, r2.z, r1.y
+mad.f32 r1.x, c7.y, r2.y, r1.x
+mad.f32 r0.x, r2.x, r0.x, r0.y
+nop
+mul.f r0.y, r0.z, r1.y
+mul.f r1.x, r0.z, r1.x
mul.f r0.x, r0.x, r0.w
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.z, r0.y
-mul.f r0.w, r0.z, r0.w
-mad.f32 r0.x, c7.x, r7.x, r0.x
-nop
-add.f r0.y, r0.y, r5.x
-add.f r0.w, r0.w, r5.w
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, r7.y
+add.f r0.w, r1.x, r3.y
+mad.f32 r0.x, c7.x, r2.x, r0.x
nop
-mul.f r0.y, r0.y, r4.x
-mul.f r0.w, r0.w, r4.x
+mul.f r0.y, r0.y, r5.x
+mul.f r0.w, r0.w, r5.x
mul.f r0.x, r0.z, r0.x
nop
-mul.f r0.y, r0.y, c6.z
-mul.f r0.z, r0.w, c6.y
-add.f r0.x, r0.x, r6.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r4.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c6.x
-nop
-mov.f32f32 r3.z, r0.y
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r2.z, r0.y, c6.z
+mul.f r2.y, r0.w, c6.y
+add.f r0.x, r0.x, r3.z
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r5.x
(rpt2)nop
-mov.f32f32 r3.x, r0.x
+mul.f r2.x, r0.x, c6.x
end
nop
nop
-nop
-; FRAG: outputs: r3.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.z (5:9,cm=f,il=8,b=1) r63.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r7.x (5:13,cm=f,il=24,b=1) r8.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 802 instructions, 0 half, 65 full
-; pos (bary): r1.x
-; color: r3.x
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r3.x (5:9,cm=f,il=8,b=1) r63.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.w (5:12,cm=f,il=20,b=1) r6.x (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 532 instructions, 0 half, 13 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm
index ad4df45..b2e35b3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-63.asm
@@ -6,346 +6,250 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in11
-@in(r2.z) in12
-@in(r2.w) in13
-@out(r11.z) out0
-@out(r11.w) out1
-@out(r12.x) out2
-@out(r12.y) out3
-@out(r7.x) out4
-@out(r7.y) out5
-@out(r7.z) out6
-@out(r7.w) out7
-@out(r3.x) out8
-@out(r3.y) out9
-@out(r3.z) out10
-@out(r3.w) out11
-@out(r9.z) out12
-@out(r9.w) out13
-@out(r10.x) out14
-@out(r10.y) out15
-@out(r1.y) out16
-@out(r1.z) out17
-@out(r1.w) out18
-@out(r2.x) out19
-@out(r12.z) out20
-@out(r12.w) out21
-@out(r13.x) out22
-@out(r13.y) out23
-@out(r8.z) out24
-@out(r8.w) out25
-@out(r9.x) out26
-@out(r9.y) out27
-@out(r10.z) out28
-@out(r10.w) out29
-@out(r11.x) out30
-@out(r11.y) out31
-(sy)(ss)floor.f r3.x, c15.z
-absneg.f r3.y, (abs)c18.x
-absneg.f r3.z, (abs)c18.y
-floor.f r3.w, c15.x
-add.f r3.x, c15.z, (neg)r3.x
-mul.f r4.x, c12.x, r1.z
-mul.f r4.y, c12.x, r0.w
-add.f r3.w, c15.x, (neg)r3.w
-mov.f32f32 r3.x, r3.x
-add.f r3.y, r3.y, r3.z
-mad.f32 r3.z, c13.x, r1.w, r4.x
-mad.f32 r4.x, c13.x, r1.x, r4.y
-max.f r3.x, r3.x, c19.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-min.f r3.x, r3.x, c23.y
-mul.f r4.y, c17.x, r3.y
-mul.f r4.z, c12.z, r0.x
-max.f r3.w, r3.w, c19.y
-max.f r3.x, r3.x, c19.x
-mad.f32 r4.z, c13.z, r0.y, r4.z
-mad.f32 r3.z, c14.x, r2.x, r3.z
-mad.f32 r4.x, c14.x, r1.y, r4.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r4.z, c14.z, r0.z, r4.z
-min.f r3.w, r3.w, c23.y
-mov.f32f32 r3.z, r3.z
-mul.f r3.x, c17.x, r3.x
-add.f r4.z, r4.z, c15.z
-max.f r3.w, r3.w, c19.x
-mad.f32 r4.y, c19.w, r4.y, r4.z
-mov.f32f32 r3.x, r3.x
-mul.f r4.w, c12.y, r0.w
-mov.f32f32 r4.x, r4.x
-absneg.f r5.x, (neg)c5.x
-mad.f32 r3.x, c19.z, r3.x, c15.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.w, r3.w
-mul.f r5.y, c12.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r4.y
-mad.f32 r5.y, c13.x, r0.y, r5.y
-mad.f32 r4.w, c13.y, r1.x, r4.w
-mov.f32f32 r3.x, r3.x
-add.f r4.y, r4.y, c20.x
-mad.f32 r5.y, c14.x, r0.z, r5.y
-mad.f32 r4.w, c14.y, r1.y, r4.w
-add.f r3.x, r3.x, c20.x
-floor.f r5.z, r4.y
-add.f r5.y, r5.y, c15.x
-mov.f32f32 r4.w, r4.w
-floor.f r5.w, r3.x
-add.f r4.y, r4.y, (neg)r5.z
-mad.f32 r3.w, c17.x, r3.w, r5.y
-mul.f r5.z, r4.w, r3.z
-add.f r3.x, r3.x, (neg)r5.w
-mad.f32 r4.y, c20.y, r4.y, c20.z
-add.f r3.w, r3.w, c20.x
-mul.f r5.w, c12.y, r1.z
-mad.f32 r3.x, c20.y, r3.x, c20.z
-absneg.f r4.y, (abs)r4.y
-floor.f r6.x, r3.w
-mad.f32 r5.w, c13.y, r1.w, r5.w
-absneg.f r3.x, (abs)r3.x
-mul.f r6.y, c20.y, r4.y
-add.f r3.w, r3.w, (neg)r6.x
-mul.f r4.y, r4.y, r4.y
-mul.f r6.x, c20.y, r3.x
-add.f r6.y, c20.w, (neg)r6.y
-mad.f32 r3.w, c20.y, r3.w, c20.z
-mul.f r3.x, r3.x, r3.x
-add.f r6.x, c20.w, (neg)r6.x
-mul.f r4.y, r4.y, r6.y
-absneg.f r3.w, (abs)r3.w
-mov.f32f32 r5.w, r5.w
-mul.f r3.x, r3.x, r6.x
-mov.f32f32 r4.y, r4.y
-mul.f r6.x, r0.x, r0.z
-mul.f r6.y, r0.y, c21.x
-mov.f32f32 r3.x, r3.x
-mul.f r6.z, r0.y, c22.x
-mul.f r6.w, c20.y, r3.w
-mul.f r6.x, r6.x, r6.y
-mul.f r3.w, r3.w, r3.w
-mov.f32f32 r6.y, r6.z
-add.f r6.z, c20.w, (neg)r6.w
-mov.f32f32 r6.x, r6.x
-mad.f32 r5.w, c14.y, r2.x, r5.w
-mov.f32f32 r6.y, r6.y
-mul.f r3.w, r3.w, r6.z
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.w, r5.w
-max.f r6.y, r6.y, c19.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r6.x, r6.x
-mad.f32 r5.z, r4.x, r5.w, (neg)r5.z
-min.f r6.y, r6.y, c23.y
-mul.f r6.z, r4.x, r5.x
-absneg.f r6.w, (neg)c5.y
-mul.f r1.z, c12.z, r1.z
-min.f r6.y, r6.y, c19.w
-max.f r6.x, r6.x, c19.y
-mov.f32f32 r5.z, r5.z
-mad.f32 r6.z, r4.w, r6.w, r6.z
-mov.f32f32 r6.y, r6.y
-min.f r6.x, r6.x, c23.y
-mul.f r5.z, r5.z, r2.y
-mov.f32f32 r6.z, r6.z
-mul.f r3.x, r3.x, r6.y
-min.f r6.x, r6.x, c21.y
-mov.f32f32 r5.z, r5.z
+@in(r8.x) in8
+@in(r8.y) in9
+@in(r8.z) in10
+@in(r8.w) in11
+@in(r5.z) in12
+@in(r5.w) in13
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@out(r7.x) out28
+@out(r7.y) out29
+@out(r7.z) out30
+@out(r7.w) out31
+@const(c19.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c20.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c21.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c22.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r1.z, c15.z
+floor.f r1.w, c15.x
+absneg.f r2.x, (abs)c18.x
+absneg.f r2.y, (abs)c18.y
+add.f r1.z, c15.z, (neg)r1.z
+add.f r1.w, c15.x, (neg)r1.w
+mul.f r2.z, c12.x, r0.w
+add.f r2.x, r2.x, r2.y
+max.f r1.z, r1.z, c19.y
+max.f r1.w, r1.w, c19.y
+mad.f32 r2.y, c13.x, r1.x, r2.z
+mul.f r2.z, c17.x, r2.x
+min.f r1.z, r1.z, c23.y
+min.f r1.w, r1.w, c23.y
+mul.f r2.w, c12.z, r0.x
+mad.f32 r2.y, c14.x, r1.y, r2.y
+max.f r1.z, r1.z, c19.x
+max.f r1.w, r1.w, c19.x
+mad.f32 r2.w, c13.z, r0.y, r2.w
+mul.f r3.x, c12.x, r0.x
+mul.f r1.z, c17.x, r1.z
+mad.f32 r3.x, c13.x, r0.y, r3.x
+mad.f32 r2.w, c14.z, r0.z, r2.w
+mad.f32 r3.x, c14.x, r0.z, r3.x
+mad.f32 r1.z, c19.z, r1.z, c15.x
+absneg.f r3.y, (neg)c5.x
+add.f r2.w, r2.w, c15.z
+mov.f32f32 r2.x, r2.x
+add.f r1.z, r1.z, c20.x
+add.f r3.x, r3.x, c15.x
+mad.f32 r2.z, c19.w, r2.z, r2.w
+mad.f32 r1.w, c17.x, r1.w, r3.x
+floor.f r3.z, r1.z
+mul.f r3.w, r2.y, r3.y
+mul.f r4.x, c12.y, r0.w
+add.f r1.w, r1.w, c20.x
+add.f r1.z, r1.z, (neg)r3.z
+add.f r2.z, r2.z, c20.x
+mad.f32 r3.z, c13.y, r1.x, r4.x
+max.f r2.x, r2.x, c21.z
+mad.f32 r1.z, c20.y, r1.z, c20.z
+floor.f r4.x, r1.w
+floor.f r4.y, r2.z
+mad.f32 r3.z, c14.y, r1.y, r3.z
+absneg.f r1.z, (abs)r1.z
+add.f r1.w, r1.w, (neg)r4.x
+add.f r2.z, r2.z, (neg)r4.y
+absneg.f r4.x, (neg)c5.y
+mul.f r4.y, c20.y, r1.z
+mad.f32 r1.w, c20.y, r1.w, c20.z
+mad.f32 r2.z, c20.y, r2.z, c20.z
+mul.f r1.z, r1.z, r1.z
+add.f r4.y, c20.w, (neg)r4.y
+absneg.f r1.w, (abs)r1.w
+absneg.f r2.z, (abs)r2.z
+mad.f32 r3.w, r3.z, r4.x, r3.w
+mul.f r1.z, r1.z, r4.y
+mul.f r4.y, r0.y, c22.x
+mul.f r4.z, c20.y, r1.w
+mul.f r4.w, c20.y, r2.z
+mul.f r1.w, r1.w, r1.w
+max.f r4.y, r4.y, c19.y
+add.f r4.z, c20.w, (neg)r4.z
+add.f r4.w, c20.w, (neg)r4.w
+mul.f r2.z, r2.z, r2.z
+min.f r4.y, r4.y, c23.y
+mul.f r1.w, r1.w, r4.z
+mul.f r4.z, r0.x, r0.z
+mul.f r4.w, r2.z, r4.w
+min.f r2.z, r4.y, c19.w
+mul.f r4.y, r0.y, c21.x
mul.f r0.w, c12.z, r0.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.z, r5.z
+min.f r5.x, r2.x, c21.w
+mul.f r1.z, r1.z, r2.z
+mul.f r2.x, r4.z, r4.y
mad.f32 r0.w, c13.z, r1.x, r0.w
-mad.f32 r1.x, c13.z, r1.w, r1.z
-mul.f r1.z, r3.w, r6.x
-mul.f r1.w, r4.y, r6.x
-max.f r3.y, r3.y, c21.z
-mov.f32f32 r3.w, r5.z
-mov.f32f32 r1.z, r1.z
+mov.f32f32 r1.x, r5.x
+mov.f32f32 r4.y, r1.z
+max.f r2.z, r2.x, c19.y
mad.f32 r0.w, c14.z, r1.y, r0.w
-mad.f32 r1.y, c18.x, r3.x, r1.z
-mad.f32 r1.z, c18.y, r3.x, r1.z
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r7.w, r3.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-min.f r3.x, r3.x, c21.w
-mov.f32f32 r0.w, r0.w
-absneg.f r3.y, (neg)c5.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.w, c12.y, r0.x
-mad.f32 r4.y, r0.w, r3.y, r6.z
-mad.f32 r1.x, c14.z, r2.x, r1.x
-mad.f32 r2.x, r1.w, r3.x, r5.y
-mad.f32 r4.z, r1.w, r3.x, r4.z
-mad.f32 r3.w, c13.y, r0.y, r3.w
-mov.f32f32 r4.y, r4.y
-add.f r1.y, r2.x, r1.y
-add.f r1.z, r4.z, r1.z
-mad.f32 r2.x, c14.y, r0.z, r3.w
-max.f r3.w, c19.y, r4.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-add.f r2.x, r2.x, c15.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, r1.y
-mul.f r4.z, c8.w, r1.y
-mul.f r5.y, c8.z, r1.y
-mul.f r5.z, c8.y, r1.y
-add.f r4.y, c4.x, (neg)r4.y
-mad.f32 r1.w, r1.w, r3.x, r2.x
-mul.f r2.x, c8.x, r1.y
-mov.f32f32 r3.x, r1.z
-mul.f r6.x, r4.y, r4.y
-add.f r6.y, c4.y, (neg)r1.w
-mad.f32 r6.z, c9.w, r1.w, r4.z
-mad.f32 r8.x, c9.z, r1.w, r5.y
-mad.f32 r5.z, c9.y, r1.w, r5.z
-mad.f32 r6.x, r6.y, r6.y, r6.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.x, r6.x
-add.f r3.x, c4.z, (neg)r3.x
-mad.f32 r6.z, c10.w, r1.z, r6.z
-mad.f32 r8.x, c10.z, r1.z, r8.x
-mad.f32 r5.z, c10.y, r1.z, r5.z
-mad.f32 r6.x, r3.x, r3.x, r6.x
+absneg.f r1.y, (neg)c5.z
+mov.f32f32 r2.x, r2.y
+min.f r4.z, r2.z, c23.y
+mul.f r5.y, c12.z, r8.x
+mov.f32f32 r2.z, r0.w
+mov.f32f32 r2.y, r3.z
+min.f r3.z, r4.z, c21.y
+mad.f32 r0.w, r0.w, r1.y, r3.w
+mad.f32 r4.z, c13.z, r8.y, r5.y
+mul.f r9.x, c12.y, r8.x
+mov.f32f32 r3.w, r3.z
+mul.f r3.z, r4.w, r3.z
+mul.f r4.w, c12.y, r0.x
+max.f r9.y, c19.y, r0.w
+mul.f r0.w, r1.w, r3.w
+mad.f32 r1.w, c13.y, r0.y, r4.w
+mad.f32 r3.x, r3.z, r1.x, r3.x
+mad.f32 r1.w, c14.y, r0.z, r1.w
+mov.f32f32 r3.w, r0.w
+mad.f32 r0.w, c18.y, r1.z, r0.w
+mad.f32 r1.z, c18.x, r4.y, r3.w
+mad.f32 r2.w, r3.z, r5.x, r2.w
+add.f r1.w, r1.w, c15.y
+mov.f32f32 r4.y, r9.y
+add.f r1.z, r3.x, r1.z
+add.f r2.w, r2.w, r0.w
+mad.f32 r1.x, r3.z, r1.x, r1.w
+nop
+mov.f32f32 r1.w, r1.z
+mul.f r1.z, c0.x, r1.z
+mov.f32f32 r4.w, r2.w
+add.f r6.y, c4.y, (neg)r1.x
+add.f r6.x, c4.x, (neg)r1.w
+mul.f r0.w, c8.y, r1.w
+mul.f r3.x, c8.x, r1.w
+mul.f r7.w, c8.w, r1.w
+mul.f r3.z, r6.x, r6.x
+mad.f32 r0.w, c9.y, r1.x, r0.w
+mad.f32 r3.z, r6.y, r6.y, r3.z
+add.f r6.z, c4.z, (neg)r4.w
+mad.f32 r0.w, c10.y, r4.w, r0.w
+mad.f32 r3.x, c9.x, r1.x, r3.x
+mad.f32 r3.w, c9.w, r1.x, r7.w
+mad.f32 r3.z, r6.z, r6.z, r3.z
mul.f r0.x, c12.w, r0.x
-mad.f32 r2.x, c9.x, r1.w, r2.x
+mad.f32 r3.x, c10.x, r4.w, r3.x
+mad.f32 r3.w, c10.w, r4.w, r3.w
+mul.f r7.z, c8.z, r1.w
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r0.y, r3.x
-mov.f32f32 r8.y, r4.y
+mul.f r0.y, c0.w, r1.w
+rsq r3.z, r3.z
+(ss)mov.f32f32 r5.x, r3.z
+mad.f32 r3.y, r6.x, r3.z, r3.y
mad.f32 r0.x, c14.w, r0.z, r0.x
-rsq r0.z, r6.x
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r8.z, r8.y
-mad.f32 r0.y, r4.y, r0.z, r5.x
-mad.f32 r4.y, r6.y, r0.z, r6.w
-mad.f32 r0.z, r3.x, r0.z, r3.y
-add.f r0.x, r0.x, c15.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r3.y, c11.w, r0.x, r6.z
-mul.f r4.y, r0.y, r0.y
-mad.f32 r5.x, c11.z, r0.x, r8.x
-mad.f32 r4.y, r3.x, r3.x, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r5.z, c11.y, r0.x, r5.z
-mad.f32 r2.x, c10.x, r1.z, r2.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r4.y, r0.z, r0.z, r4.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.z, r5.z
-mad.f32 r2.x, c11.x, r0.x, r2.x
-(ss)mul.f r6.x, c0.w, r1.y
-mul.f r6.z, c0.z, r1.y
-mul.f r6.w, c0.y, r1.y
-rsq r4.y, r4.y
-(ss)mov.f32f32 r4.y, r4.y
-mov.f32f32 r10.y, r3.y
-mov.f32f32 r3.y, r5.x
-mul.f r5.x, r5.z, c16.y
-mul.f r0.z, r0.z, r4.y
-mul.f r3.x, r3.x, r4.y
-mul.f r0.y, r0.y, r4.y
-mov.f32f32 r10.x, r3.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r9.w, r5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r1.x, r7.z
+(ss)mad.f32 r3.z, r6.y, r5.x, r4.x
+mov.f32f32 r4.x, r3.y
+mad.f32 r1.y, r6.z, r5.x, r1.y
nop
-mov.f32f32 r10.w, r0.z
-mov.f32f32 r10.z, r3.x
-mov.f32f32 r9.y, r0.y
-mov.f32f32 r0.y, r2.x
-mad.f32 r0.z, c1.w, r1.w, r6.x
-mad.f32 r2.x, c1.z, r1.w, r6.z
-mad.f32 r3.x, c1.y, r1.w, r6.w
-mul.f r0.y, r0.y, c16.x
-mad.f32 r0.z, c2.w, r1.z, r0.z
-mad.f32 r2.x, c2.z, r1.z, r2.x
-mad.f32 r3.x, c2.y, r1.z, r3.x
-mov.f32f32 r9.z, r0.y
-mad.f32 r0.y, c3.w, r0.x, r0.z
-mad.f32 r0.z, c3.z, r0.x, r2.x
-mad.f32 r2.x, c3.y, r0.x, r3.x
-mul.f r3.x, c0.x, r1.y
-mov.f32f32 r12.y, r0.y
-mov.f32f32 r12.x, r0.z
-mov.f32f32 r11.w, r2.x
-mad.f32 r0.y, c1.x, r1.w, r3.x
-mad.f32 r0.z, c7.x, r1.z, c7.y
-mad.f32 r0.y, c2.x, r1.z, r0.y
-mad.f32 r1.y, c7.x, r1.y, c7.y
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r11.y, r4.z
-mov.f32f32 r11.x, r5.y
-mov.f32f32 r11.z, r0.x
-mov.f32f32 r12.w, r0.y
-mov.f32f32 r0.x, r1.y
-mov.f32f32 r0.y, r6.y
-(rpt1)nop
-mov.f32f32 r12.z, r0.x
-mov.f32f32 r8.w, r0.y
-mul.f r0.x, r3.w, c6.z
-mul.f r0.y, r3.w, c6.y
-mul.f r0.z, r3.w, c6.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r7.y, r0.y
-mov.f32f32 r7.x, r0.z
-mul.f r0.x, r4.x, r1.x
-mul.f r0.y, r0.w, r5.w
-mad.f32 r0.x, r0.w, r3.z, (neg)r0.x
-mad.f32 r0.y, r4.w, r1.x, (neg)r0.y
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r5.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r1.x
-mul.f r0.x, r0.x, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r0.z, r3.z
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r5.x, r3.z
+mul.f r4.x, r4.x, r4.x
+mov.f32f32 r5.y, r1.y
+add.f r0.x, r0.x, c15.w
+mad.f32 r3.z, r3.z, r5.x, r4.x
+mad.f32 r0.z, c10.z, r4.w, r0.z
+mad.f32 r1.y, r1.y, r5.y, r3.z
+mad.f32 r0.w, c11.y, r0.x, r0.w
+mad.f32 r3.x, c11.x, r0.x, r3.x
+mad.f32 r3.w, c11.w, r0.x, r3.w
+mad.f32 r3.z, c11.z, r0.x, r0.z
+mad.f32 r0.y, c1.w, r1.x, r0.y
+mul.f r0.z, c0.z, r1.w
+rsq r1.y, r1.y
+(ss)mov.f32f32 r4.x, r1.y
+mul.f r6.w, r3.y, r1.y
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r7.y, r5.y, r4.x
+mul.f r7.x, r5.x, r4.x
+mad.f32 r0.y, c2.w, r4.w, r0.y
+mad.f32 r0.z, c1.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c2.z, r4.w, r0.z
+(ss)mul.f r1.y, c0.y, r1.w
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c1.y, r1.x, r1.y
+mad.f32 r1.x, c1.x, r1.x, r1.z
+mad.f32 r0.y, c2.y, r4.w, r0.y
+mad.f32 r1.x, c2.x, r2.w, r1.x
+mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r0.x, c3.x, r0.x, r1.x
+mad.f32 r5.x, c7.x, r1.w, c7.y
+mad.f32 r5.y, c7.x, r4.w, c7.y
+mul.f r1.z, r4.y, c6.z
+mul.f r1.y, r4.y, c6.y
+mul.f r1.x, r9.y, c6.x
+mad.f32 r1.w, c14.z, r8.z, r4.z
+mad.f32 r2.w, c13.y, r8.y, r9.x
+mul.f r4.x, c12.x, r8.x
+mad.f32 r2.w, c14.y, r8.z, r2.w
+mov.f32f32 r4.z, r1.w
+mad.f32 r4.x, c13.x, r8.y, r4.x
(rpt1)nop
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r0.x, r4.w
-mov.f32f32 r0.y, r4.x
+mul.f r4.w, r2.x, r4.z
+mad.f32 r8.x, c14.x, r8.z, r4.x
+mov.f32f32 r4.y, r2.w
(rpt1)nop
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r0.x, r2.w
-mov.f32f32 r0.y, r2.z
+mov.f32f32 r4.x, r8.x
+mul.f r8.y, r2.z, r4.y
+mul.f r8.x, r2.y, r8.x
+mad.f32 r1.w, r2.y, r1.w, (neg)r8.y
+mad.f32 r4.w, r2.z, r4.x, (neg)r4.w
+mad.f32 r8.x, r2.x, r2.w, (neg)r8.x
(rpt1)nop
-mov.f32f32 r13.y, r0.x
-mov.f32f32 r13.x, r0.y
+mul.f r4.w, r4.w, r8.w
+mul.f r2.w, r1.w, r8.w
+mul.f r1.w, r8.x, r8.w
end
-; VERT: outputs: r11.z (0:0) r7.x (5:9) r3.x (5:10) r9.z (5:11) r1.y (5:12) r12.z (5:13) r8.z (5:14) r10.z (5:15)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=f,il=16,b=0) r2.z (0:0,cm=3,il=20,b=0)
-; VERT: 304 instructions, 0 half, 14 full
-; pos: r11.z
+nop
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=f,il=16,b=0) r5.z (0:0,cm=3,il=20,b=0)
+; VERT: 201 instructions, 0 half, 10 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm
index 82c1168..8ea6dd3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-64.asm
@@ -4,990 +4,677 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-(sy)(ss)bary.f r0.x, 7, r1.x
-bary.f r0.y, 8, r1.x
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x43160000, 0x3bdb8bac
+@const(c15.x) 0x41800000, 0x3f700000, 0x3d800000, 0x3f233333
+@const(c16.x) 0x40000000, 0xbf800000, 0xbb449ba6, 0xbf000000
+@const(c17.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x3fb8aa65
+@const(c18.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)bary.f r0.x, 20, r1.x
+bary.f r0.y, 7, r1.x
+bary.f r1.z, 8, r1.x
add.f r0.w, r0.w, c14.y
-bary.f r1.z, 9, r1.x
-mov.f32f32 r0.x, r0.x
-bary.f r1.w, 20, r1.x
-add.f r2.x, r0.y, c16.w
-add.f r2.y, r1.z, c16.w
-bary.f r2.z, 18, r1.x
-mul.f r2.w, r1.w, r0.x
-bary.f r3.x, 15, r1.x
-floor.f r3.y, r2.x
+bary.f r1.w, 9, r1.x
+mul.f r2.x, r0.x, r0.y
+bary.f r2.y, 21, r1.x
+bary.f r2.z, 15, r1.x
+add.f r2.w, r1.z, c16.w
+add.f r3.y, r1.w, c16.w
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.z, r2.y
-mov.f32f32 r3.x, r3.x
-bary.f r3.w, 21, r1.x
-add.f r2.x, r2.x, (neg)r3.y
+mad.f32 r2.x, r2.y, r2.z, r2.x
+bary.f r3.z, 22, r1.x
+bary.f r3.w, 3, r1.x
+floor.f r4.x, r2.w
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.y, (neg)r3.z
-mad.f32 r2.y, r3.w, r3.x, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-absneg.f r2.w, (neg)c11.x
-mov.f32f32 r2.y, r2.y
-bary.f r3.y, 3, r1.x
-mul.f r3.z, c14.x, r2.x
-mul.f r2.w, r2.w, c11.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.y, r3.y
-bary.f r4.x, 22, r1.x
-mov.f32f32 r3.z, r3.z
-mul.f r2.w, r2.w, r0.z
-mul.f r4.y, c14.x, r0.w
-mad.f32 r2.y, r4.x, r3.y, r2.y
-add.f r0.y, r0.y, (neg)r3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r2.y, r2.y
-bary.f r4.y, 12, r1.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r2.w, r0.z
-add.f r1.z, r1.z, (neg)r3.z
-mul.f r2.w, r1.w, r4.y
-bary.f r3.z, 13, r1.x
-add.f r4.z, c17.y, r0.y
+(ss)absneg.f r0.w, (neg)c11.x
+mad.f32 r2.x, r3.z, r3.w, r2.x
+add.f r2.w, r2.w, (neg)r4.x
+floor.f r4.x, r3.y
+mul.f r0.w, r0.w, c11.x
+mov.f32f32 r4.y, r2.x
+bary.f r4.z, 12, r1.x
+mov.f32f32 r4.w, r2.w
+mul.f r0.w, r0.w, r0.z
mov.f32f32 r0.z, r0.z
-add.f r0.y, c17.x, r0.y
-mad.f32 r2.w, r3.w, r3.z, r2.w
-mov.f32f32 r4.z, r4.z
+mul.f r5.x, r0.x, r4.z
+bary.f r5.y, 13, r1.x
+mul.f r5.z, c14.x, r4.w
+mul.f r0.z, r0.w, r0.z
+add.f r0.w, r3.y, (neg)r4.x
+mad.f32 r3.y, r2.y, r5.y, r5.x
+bary.f r4.x, 14, r1.x
+add.f r1.z, r1.z, (neg)r5.z
mul.f r0.z, r0.z, c17.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-bary.f r4.w, 14, r1.x
-mul.f r4.z, r4.z, c5.z
-mov.f32f32 r0.z, r0.z
-mul.f r0.y, r0.y, c5.z
-mad.f32 r2.w, r4.x, r4.w, r2.w
-mov.f32f32 r5.x, r4.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.y, r0.y
-mul.f r5.z, r2.w, r2.w
-mov.f32f32 r5.w, r5.x
-mad.f32 r5.x, r2.y, r2.y, r5.z
-mov.f32f32 r1.z, r1.z
+mov.f32f32 r5.x, r0.w
+mad.f32 r3.y, r3.z, r4.x, r3.y
+mov.f32f32 r5.z, r1.z
+add.f r1.z, c17.x, r1.z
+mul.f r5.w, c14.x, r5.x
+mul.f r6.x, r3.y, r3.y
+add.f r5.z, c17.y, r5.z
+mad.f32 r2.x, r2.x, r4.y, r6.x
+bary.f r6.x, 4, r1.x
exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.x, r5.x
-bary.f r5.z, 4, r1.x
-add.f r6.x, c17.y, r1.z
-add.f r6.y, c19.y, (neg)r0.z
-mov.f32f32 r0.y, r0.y
-mul.f r6.z, r1.w, r5.z
-bary.f r6.w, 5, r1.x
-mov.f32f32 r6.x, r6.x
+(ss)mov.f32f32 r6.y, r0.z
+mul.f r6.z, r5.z, c5.z
+add.f r1.w, r1.w, (neg)r5.w
+mul.f r5.z, r0.x, r6.x
+bary.f r5.w, 5, r1.x
+mov.f32f32 r7.y, r6.z
+add.f r6.y, c19.y, (neg)r6.y
+mov.f32f32 r6.w, r1.w
+mad.f32 r5.z, r2.y, r5.w, r5.z
+bary.f r8.x, 6, r1.x
mul.f r6.y, r6.y, c11.y
-mul.f r0.z, r0.z, c17.x
-mad.f32 r6.z, r3.w, r6.w, r6.z
-mul.f r7.x, r6.x, c5.w
-mov.f32f32 r7.y, r0.y
-add.f r0.y, c17.x, r1.z
-mov.f32f32 r1.z, r6.z
-bary.f r6.z, 6, r1.x
-mov.f32f32 r6.x, r7.x
+(ss)mul.f r0.z, r0.z, c17.x
+add.f r6.w, c17.y, r6.w
+mad.f32 r5.z, r3.z, r8.x, r5.z
+mul.f r8.y, r1.z, c5.z
+add.f r1.z, c17.x, r1.w
add.f r0.z, r0.z, r6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, r4.x, r6.z, r1.z
-mov.f32f32 r6.x, r6.x
-bary.f r6.y, 10, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.z, r1.z
-mul.f r0.y, r0.y, c5.w
-add.f r8.x, r6.y, c16.z
-mov.f32f32 r8.y, r5.y
-mad.f32 r5.x, r1.z, r1.z, r5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.y, r8.x
-mov.f32f32 r7.z, r0.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r9.x, r4.z
-mov.f32f32 r6.y, r5.y
-rsq r4.z, r5.x
-(ss)mov.f32f32 r4.z, r4.z
+mov.f32f32 r1.w, r5.z
+mul.f r9.y, r6.w, c5.w
+mov.f32f32 r9.x, r8.y
+mul.f r8.z, r1.z, c5.w
+mad.f32 r1.z, r5.z, r1.w, r2.x
+mov.f32f32 r7.z, r9.y
max.f r0.z, r0.z, c14.y
-(ss)mov.f32f32 r5.x, r7.z
-mov.f32f32 r8.z, r7.x
-mul.f r1.z, r1.z, r4.z
+bary.f r2.x, 10, r1.x
+mov.f32f32 r6.w, r8.z
+add.f r4.w, c16.x, (neg)r4.w
+mul.f r0.x, r0.x, r0.x
+rsq r1.z, r1.z
+(ss)mov.f32f32 r5.z, r1.z
+add.f r8.w, r2.x, c16.z
min.f r0.z, r0.z, c17.x
-sam.s (f32)(x)r5.y, r5.w, s#4, t#4
-(sy)mov.f32f32 r5.y, r5.y
-mov.f32f32 r7.z, r5.x
-mov.f32f32 r1.z, r1.z
-add.f r5.x, c19.y, (neg)r0.z
-(ss)add.f r5.w, c19.y, (neg)r0.z
-add.f r6.x, c19.y, (neg)r0.z
-mul.f r1.z, r1.z, c15.x
-mov.f32f32 r5.y, r5.y
-add.f r6.y, c16.x, (neg)r2.x
-mul.f r5.x, r5.x, c10.z
-mov.f32f32 r1.z, r1.z
-mul.f r5.w, r5.w, c10.y
-mul.f r6.x, r6.x, c10.x
-mov.f32f32 r6.y, r6.y
-add.f r7.x, c16.x, (neg)r0.w
-mov.f32f32 r7.w, r8.x
-mul.f r2.y, r2.y, r4.z
-rcp r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mul.f r2.w, r2.w, r4.z
-mov.f32f32 r4.z, r7.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r2.w, r2.w
-mul.f r7.x, r6.y, r4.z
-mov.f32f32 r2.y, r2.y
-mul.f r1.w, r1.w, r1.w
-absneg.f r2.w, (neg)r2.w
-mad.f32 r1.w, r3.w, r3.w, r1.w
-mul.f r3.w, r7.x, r5.y
-mov.f32f32 r5.y, r8.x
-sam.s (f32)(x)r7.x, r7.y, s#4, t#4
-(sy)mov.f32f32 r7.x, r7.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r4.x, r4.x, r1.w
-mov.f32f32 r8.w, r5.y
-mov.f32f32 r4.x, r2.z
-add.f r2.x, r2.x, c17.x
+(ss)mul.f r1.z, r3.y, r1.z
+mul.f r1.w, r1.w, r5.z
+mov.f32f32 r7.w, r8.w
+add.f r2.x, c19.y, (neg)r0.z
+add.f r3.y, c19.y, (neg)r0.z
+mul.f r1.w, r1.w, c15.x
+add.f r6.y, c19.y, (neg)r0.z
+mov.f32f32 r9.z, r8.w
+mov.f32f32 r7.x, r8.w
+nop
+sam.s (f32)(x)r9.w, r7.y, s#4, t#4
+(ss)mul.f r7.y, r2.x, c10.z
+mov.f32f32 r2.x, r4.w
+rcp r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+mul.f r4.y, r4.y, r5.z
+mad.f32 r0.x, r2.y, r2.y, r0.x
+add.f r2.y, c16.x, (neg)r5.x
+mad.f32 r0.x, r3.z, r3.z, r0.x
+mul.f r3.y, r3.y, c10.y
+mul.f r3.z, r6.y, c10.x
+mov.f32f32 r5.x, r2.y
+absneg.f r1.z, (neg)r1.z
+sam.s (f32)(x)r10.x, r9.x, s#4, t#4
+sam.s (f32)(x)r6.y, r6.z, s#4, t#4
+sam.s (f32)(x)r8.y, r8.y, s#4, t#4
+add.f r2.w, r2.w, c17.x
add.f r0.w, r0.w, c17.x
-bary.f r5.y, 16, r1.x
-(ss)mov.f32f32 r7.y, r4.x
-sqrt r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-sam.s (f32)(x)r4.x, r8.y, s#4, t#4
-(sy)mov.f32f32 r4.x, r4.x
-mul.f r4.z, r2.x, r4.z
-mov.f32f32 r9.y, r0.y
-add.f r0.y, c14.z, (neg)r1.w
-mov.f32f32 r1.w, r8.x
-bary.f r7.w, 19, r1.x
-mul.f r6.y, r6.y, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r3.w, r4.z, r4.x, r3.w
-mov.f32f32 r9.z, r1.w
-mov.f32f32 r1.w, r7.w
-mul.f r0.y, c12.z, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r5.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r0.y, r0.y
-sam.s (f32)(x)r1.w, r9.x, s#4, t#4
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r8.x, r4.x
-bary.f r4.x, 17, r1.x
-mul.f r0.y, r0.y, c14.w
-mad.f32 r1.w, r6.y, r1.w, r3.w
-(ss)nop
-sam (f32)(w)r8.y, r7.y, s#2, t#2
-(sy)cmps.f.lt r3.w, r9.x, c15.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r2.x, r0.w
-cov.u32f32 r2.x, r3.w
-mul.f r2.w, r2.w, r0.y
-mul.f r0.y, r2.y, r0.y
-mad.f32 r0.w, r0.w, r7.x, r1.w
-cmps.f.ne r1.w, r2.x, c14.y
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, c15.z
-mul.f r2.x, r2.x, r1.z
-mul.f r0.y, r0.y, r1.z
+sqrt r0.x, r0.x
+(ss)add.f r0.x, c14.z, (neg)r0.x
+mul.f r2.x, r2.x, r5.x
+mul.f r2.y, r2.w, r2.y
+mul.f r4.w, r4.w, r0.w
+mul.f r0.x, c12.z, r0.x
+(sy)mul.f r2.x, r2.x, r9.w
+bary.f r6.z, 18, r1.x
+mul.f r0.w, r2.w, r0.w
+mul.f r0.x, r0.x, c14.w
+mad.f32 r2.x, r2.y, r10.x, r2.x
+bary.f r6.w, 19, r1.x
+bary.f r8.z, 16, r1.x
+mov.f32f32 r2.y, r0.x
+mad.f32 r2.x, r4.w, r6.y, r2.x
+mul.f r0.x, r1.z, r0.x
+mad.f32 r0.w, r0.w, r8.y, r2.x
+mul.f r1.z, r4.y, r2.y
+sam (f32)(w)r9.x, r6.z, s#2, t#2
+bary.f r8.w, 17, r1.x
+mul.f r0.x, r0.x, r1.w
+(sy)cmps.f.lt r1.w, r9.w, c15.y
+mul.f r1.z, r1.z, r7.z
mul.f r0.w, c17.z, r0.w
-mov.f32f32 r1.z, c14.y
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r2.x, r0.x
+cov.u32f32 r1.w, r1.w
+mov.f32f32 r2.y, r1.z
+cmps.f.lt r2.w, r9.w, c15.y
+mov.f32f32 r4.y, r0.w
+cmps.f.ne r1.w, r1.w, c14.y
+mov.f32f32 r4.w, c14.y
+mov.f32f32 r5.x, c14.y
+cov.u32f32 r2.w, r2.w
+sam (f32)(w)r8.y, r8.z, s#1, t#1
+(sy)cmps.f.lt r5.z, r9.x, c18.x
+sel.b32 r1.z, r1.z, r1.w, r4.w
+sel.b32 r0.x, r0.x, r1.w, r5.x
+(rpt1)nop
+add.f r1.w, r6.w, r1.z
+add.f r1.z, r6.z, r0.x
+cmps.f.ne r0.x, r2.w, c14.y
+cov.u32f32 r2.w, r5.z
+mov.f32f32 r4.w, c15.z
+mov.f32f32 r5.x, c14.y
mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-sel.b32 r1.z, r2.y, r1.w, r1.z
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r1.w, r1.w
-cmps.f.lt r4.z, r9.x, c15.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
+mov.f32f32 r5.z, r2.z
+sam (f32)(w)r6.y, r1.z, s#2, t#2
+cmps.f.ne r2.z, r2.w, c14.y
+sel.b32 r0.x, r4.w, r0.x, r5.x
+mov.f32f32 r2.w, c14.y
mov.f32f32 r3.w, r3.w
-cov.u32f32 r4.z, r4.z
-mov.f32f32 r5.y, r2.x
-mov.f32f32 r6.y, r0.y
-mov.f32f32 r7.x, r2.x
-cmps.f.ne r4.z, r4.z, c14.y
-(ss)mov.f32f32 r7.y, c14.y
-mov.f32f32 r7.z, c14.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.y, r6.y
-sel.b32 r1.w, r1.w, r4.z, r7.y
-sel.b32 r2.y, r2.y, r4.z, r7.z
-mov.f32f32 r4.z, r7.x
-mov.f32f32 r7.x, r0.y
-add.f r1.w, r2.z, r1.w
-add.f r2.y, r7.w, r2.y
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r7.y, r0.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r7.w, r2.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r8.y, r7.z
-mov.f32f32 r8.z, r7.w
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r7.z, r2.x
-mov.f32f32 r7.w, r0.y
-mov.f32f32 r8.w, r2.x
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r9.y, r2.x
-sam (f32)(w)r9.z, r8.y, s#2, t#2
-add.f r1.z, c15.y, (neg)r1.z
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r7.w, r7.w
-(ss)mov.f32f32 r8.z, r8.w
-(sy)cmps.f.lt r8.y, r10.y, r1.z
-cmps.f.lt r8.w, r10.y, r1.z
-mov.f32f32 r9.x, r9.x
-mov.f32f32 r9.y, r9.y
+mov.f32f32 r4.w, (0.000000)
+add.f r0.x, c15.y, (neg)r0.x
+sel.b32 r5.x, r2.w, r2.z, r9.x
+bary.f r6.y, 2, r1.x
+(ss)bary.f r6.z, 1, r1.x
+(sy)cmps.f.lt r2.z, r7.x, r0.x
+cmps.f.lt r2.w, r7.x, r0.x
+bary.f r6.w, 23, r1.x
+bary.f r7.x, 24, r1.x
+cov.u32f32 r2.z, r2.z
+cov.u32f32 r2.w, r2.w
+bary.f r7.z, 25, r1.x
+bary.f (ei)r1.x, 0, r1.x
+cmps.f.ne r1.y, r2.z, c14.y
+mov.f32f32 r2.z, c14.y
+cmps.f.ne r2.w, r2.w, c14.y
+mov.f32f32 r7.w, c14.y
+mov.f32f32 r8.y, c15.z
+sel.b32 r2.z, r2.x, r1.y, r2.z
+mov.f32f32 r8.z, c14.y
+sel.b32 r1.y, r2.y, r1.y, r7.w
+mov.f32f32 r7.w, c14.y
+add.f r1.z, r1.z, r2.z
+sel.b32 r2.z, r8.y, r2.w, r8.z
+add.f r8.z, r1.w, r1.y
+mov.f32f32 r1.y, c14.y
+mov.f32f32 r8.y, r1.z
+add.f r0.x, r0.x, (neg)r2.z
+mov.f32f32 r1.w, r8.z
+mov.f32f32 r2.z, c14.y
+mov.f32f32 r2.w, c15.z
+mov.f32f32 r8.w, c14.y
+mov.f32f32 r9.x, c14.y
+sam (f32)(w)r9.y, r8.y, s#2, t#2
+(sy)(ss)cmps.f.lt r8.y, r10.x, r0.x
+mov.f32f32 r0.x, r0.x
+mov.f32f32 r8.z, c14.y
+mov.f32f32 r9.y, c15.z
cov.u32f32 r8.y, r8.y
-cov.u32f32 r8.w, r8.w
-mov.f32f32 r9.z, r0.y
-mov.f32f32 r9.w, r2.x
-mov.f32f32 r8.y, r8.y
-cmps.f.ne r8.w, r8.w, c14.y
-mov.f32f32 r10.x, c15.z
-mov.f32f32 r10.y, c14.y
+cmps.f.lt r9.z, r10.x, r0.x
+mov.f32f32 r9.w, c14.y
+mov.f32f32 r10.x, c14.y
cmps.f.ne r8.y, r8.y, c14.y
-mov.f32f32 r10.z, r2.x
-mov.f32f32 r10.w, r0.y
-sel.b32 r8.w, r10.x, r8.w, r10.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r10.x, r10.z
mov.f32f32 r10.y, c14.y
-mov.f32f32 r10.z, r10.w
+cov.u32f32 r9.z, r9.z
+mov.f32f32 r10.z, c14.y
mov.f32f32 r10.w, c14.y
-add.f r1.z, r1.z, (neg)r8.w
-sel.b32 r8.w, r10.x, r8.y, r10.y
-mov.f32f32 r9.w, r9.w
-mov.f32f32 r10.x, r0.y
-mov.f32f32 r10.y, r2.x
-add.f r1.w, r1.w, r8.w
-sel.b32 r8.y, r10.z, r8.y, r10.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r8.w, r10.x
-mov.f32f32 r1.w, r1.w
-add.f r2.y, r2.y, r8.y
-mov.f32f32 r10.x, r10.y
-mov.f32f32 r8.y, r0.y
-mov.f32f32 r10.y, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r10.z, r2.x
-mov.f32f32 r10.w, r0.y
-mov.f32f32 r11.x, r10.y
-mov.f32f32 r10.y, r2.y
-mov.f32f32 r11.z, r8.y
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r10.w, r10.w
-mov.f32f32 r11.y, r10.y
-mov.f32f32 r8.y, r2.x
-mov.f32f32 r10.y, r0.y
-mov.f32f32 r11.w, r2.x
-mov.f32f32 r12.x, r0.y
-mov.f32f32 r12.y, r2.x
-mov.f32f32 r12.z, r0.y
-sam (f32)(w)r12.w, r11.x, s#2, t#2
-(sy)(ss)cmps.f.lt r11.x, r13.z, r1.z
-cmps.f.lt r11.y, r13.z, r1.z
-mov.f32f32 r12.w, r8.y
-mov.f32f32 r10.y, r10.y
-cov.u32f32 r8.y, r11.x
-cov.u32f32 r11.x, r11.y
-mov.f32f32 r11.y, r11.w
-mov.f32f32 r11.w, r12.x
-mov.f32f32 r8.y, r8.y
-cmps.f.ne r11.x, r11.x, c14.y
-mov.f32f32 r12.x, c15.z
+sel.b32 r10.y, r2.x, r8.y, r10.y
+cmps.f.ne r9.z, r9.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r10.z
+mov.f32f32 r10.z, c15.z
+add.f r1.z, r1.z, r10.y
+mov.f32f32 r10.y, c14.y
+add.f r11.y, r1.w, r8.y
+mov.f32f32 r1.w, c15.z
+mov.f32f32 r11.x, r1.z
+sel.b32 r8.y, r10.z, r9.z, r10.y
+mov.f32f32 r9.z, r11.y
+mov.f32f32 r10.y, c14.y
+mov.f32f32 r10.z, c14.y
+mov.f32f32 r11.z, c14.y
+mov.f32f32 r11.w, c15.z
+sam (f32)(w)r12.x, r11.x, s#2, t#2
+add.f r0.x, r0.x, (neg)r8.y
+mov.f32f32 r8.y, c14.y
+(ss)mov.f32f32 r11.x, c14.y
+mov.f32f32 r11.y, c14.y
+(sy)cmps.f.lt r12.x, r12.w, r0.x
+mov.f32f32 r0.x, r0.x
+mov.f32f32 r12.y, c15.z
+mov.f32f32 r12.z, c14.y
+cov.u32f32 r12.x, r12.x
+cmps.f.lt r12.w, r12.w, r0.x
mov.f32f32 r13.x, c14.y
-cmps.f.ne r8.y, r8.y, c14.y
-mov.f32f32 r13.y, r2.x
-mov.f32f32 r12.z, r12.z
+mov.f32f32 r13.y, c14.y
+cmps.f.ne r12.x, r12.x, c14.y
mov.f32f32 r13.z, c14.y
-sel.b32 r11.x, r12.x, r11.x, r13.x
-mov.f32f32 r12.x, r13.y
-mov.f32f32 r13.x, c14.y
-sel.b32 r12.z, r12.z, r8.y, r13.z
-add.f r1.z, r1.z, (neg)r11.x
-mov.f32f32 r11.x, r12.y
-sel.b32 r8.y, r12.x, r8.y, r13.x
-add.f r2.y, r2.y, r12.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r12.x, r2.x
-add.f r1.w, r1.w, r8.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r8.y, r4.x
-bary.f r4.x, 23, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r12.y, r2.y
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r12.z, r1.w
-mov.f32f32 r13.y, r12.y
-sam (f32)(w)r13.z, r8.x, s#1, t#1
-(sy)(ss)cmps.f.lt r8.x, r14.y, c18.x
-mov.f32f32 r8.y, r14.y
-mov.f32f32 r13.x, r12.z
-bary.f r12.y, 24, r1.x
-bary.f r12.z, 25, r1.x
-mov.f32f32 r13.z, (0.000000)
-cov.u32f32 r8.x, r8.x
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r12.y, r12.y
-sam (f32)(w)r13.w, r13.x, s#2, t#2
-(sy)(ss)cmps.f.lt r13.x, r14.z, r1.z
-cmps.f.lt r13.y, r14.z, r1.z
-cmps.f.ne r8.x, r8.x, c14.y
-nop
-cov.u32f32 r13.x, r13.x
-cov.u32f32 r13.y, r13.y
+cov.u32f32 r12.w, r12.w
mov.f32f32 r13.w, c14.y
-mov.f32f32 r12.z, r12.z
-mov.f32f32 r13.x, r13.x
-cmps.f.ne r13.y, r13.y, c14.y
mov.f32f32 r14.x, c15.z
-mov.f32f32 r14.y, c14.y
-cmps.f.ne r13.x, r13.x, c14.y
-mov.f32f32 r14.z, c14.y
-mov.f32f32 r14.w, c14.y
-sel.b32 r13.y, r14.x, r13.y, r14.y
-sel.b32 r8.x, r13.w, r8.x, r8.y
-sel.b32 r8.y, r12.x, r13.x, r14.z
-sel.b32 r11.w, r11.w, r13.x, r14.w
-add.f r1.z, r1.z, (neg)r13.y
-bary.f r12.x, 2, r1.x
-add.f r1.w, r1.w, r8.y
-add.f r2.y, r2.y, r11.w
-mov.f32f32 r1.z, r1.z
-bary.f r8.y, 1, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-bary.f (ei)r1.x, 0, r1.x
-mov.f32f32 r1.y, c14.y
-mov.f32f32 r11.w, r1.w
-mov.f32f32 r13.x, r2.y
-mov.f32f32 r13.y, c14.y
+sel.b32 r13.z, r2.x, r12.x, r13.z
+cmps.f.ne r12.w, r12.w, c14.y
+sel.b32 r12.x, r2.y, r12.x, r13.w
+mov.f32f32 r13.w, c15.z
+add.f r1.z, r1.z, r13.z
+mov.f32f32 r13.z, c14.y
+add.f r14.z, r9.z, r12.x
+mov.f32f32 r9.z, c14.y
+mov.f32f32 r14.y, r1.z
+sel.b32 r12.x, r13.w, r12.w, r13.z
+mov.f32f32 r12.w, r14.z
+mov.f32f32 r13.z, c14.y
mov.f32f32 r13.w, c14.y
-mov.f32f32 r14.x, r11.w
-mov.f32f32 r14.y, r13.x
-mov.f32f32 r11.w, c15.z
-mov.f32f32 r13.x, c14.y
-mov.f32f32 r14.z, c14.y
-mov.f32f32 r14.w, c14.y
-mov.f32f32 r15.x, c15.z
-mov.f32f32 r15.y, c14.y
-sam (f32)(w)r15.z, r14.x, s#2, t#2
-(sy)(ss)cmps.f.lt r14.x, r16.y, r1.z
-cmps.f.lt r14.y, r16.y, r1.z
+mov.f32f32 r14.w, c15.z
+mov.f32f32 r15.x, c14.y
+sam (f32)(w)r15.y, r14.y, s#2, t#2
+add.f r0.x, r0.x, (neg)r12.x
+mov.f32f32 r12.x, c14.y
+(ss)mov.f32f32 r14.y, c14.y
+mov.f32f32 r14.z, c15.z
+(sy)cmps.f.lt r15.y, r16.x, r0.x
+mov.f32f32 r0.x, r0.x
mov.f32f32 r15.z, c14.y
mov.f32f32 r15.w, c14.y
-cov.u32f32 r14.x, r14.x
-cov.u32f32 r14.y, r14.y
-mov.f32f32 r16.x, c15.z
+cov.u32f32 r15.y, r15.y
+cmps.f.lt r16.x, r16.x, r0.x
mov.f32f32 r16.y, c14.y
-mov.f32f32 r14.x, r14.x
-cmps.f.ne r14.y, r14.y, c14.y
mov.f32f32 r16.z, c15.z
+cmps.f.ne r15.y, r15.y, c14.y
mov.f32f32 r16.w, c14.y
-cmps.f.ne r14.x, r14.x, c14.y
+cov.u32f32 r16.x, r16.x
mov.f32f32 r17.x, c14.y
mov.f32f32 r17.y, c14.y
-sel.b32 r14.y, r16.z, r14.y, r16.w
-mov.f32f32 r16.z, c14.y
-sel.b32 r11.x, r11.x, r14.x, r17.x
-sel.b32 r10.y, r10.y, r14.x, r17.y
-add.f r1.z, r1.z, (neg)r14.y
-mov.f32f32 r14.x, c14.y
-add.f r1.w, r1.w, r11.x
-add.f r2.y, r2.y, r10.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r10.y, c15.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r11.x, c14.y
-mov.f32f32 r14.y, c14.y
-mov.f32f32 r16.w, r1.w
-mov.f32f32 r17.x, r2.y
-mov.f32f32 r17.y, c14.y
-mov.f32f32 r17.z, c15.z
-mov.f32f32 r17.w, r16.w
-mov.f32f32 r18.x, r17.x
+sel.b32 r16.w, r2.x, r15.y, r16.w
+cmps.f.ne r16.x, r16.x, c14.y
+sel.b32 r15.y, r2.y, r15.y, r17.x
+mov.f32f32 r17.x, c15.z
+add.f r1.z, r1.z, r16.w
mov.f32f32 r16.w, c14.y
-mov.f32f32 r17.x, c14.y
-mov.f32f32 r18.y, c14.y
-mov.f32f32 r18.z, c15.z
-mov.f32f32 r18.w, c14.y
-mov.f32f32 r19.x, c14.y
-sam (f32)(w)r19.y, r17.w, s#2, t#2
-(sy)(ss)cmps.f.lt r17.w, r20.x, r1.z
-cmps.f.lt r18.x, r20.x, r1.z
-mov.f32f32 r19.y, c14.y
-mov.f32f32 r19.z, c15.z
-cov.u32f32 r17.w, r17.w
-cov.u32f32 r18.x, r18.x
-mov.f32f32 r19.w, c14.y
-mov.f32f32 r20.x, c14.y
-mov.f32f32 r17.w, r17.w
-cmps.f.ne r18.x, r18.x, c14.y
-mov.f32f32 r20.y, c15.z
-mov.f32f32 r20.z, c14.y
-cmps.f.ne r17.w, r17.w, c14.y
-mov.f32f32 r20.w, c14.y
-mov.f32f32 r21.x, c14.y
-sel.b32 r18.x, r20.y, r18.x, r20.z
-mov.f32f32 r20.y, c14.y
-sel.b32 r11.y, r11.y, r17.w, r20.w
-sel.b32 r10.w, r10.w, r17.w, r21.x
-add.f r1.z, r1.z, (neg)r18.x
-mov.f32f32 r17.w, c15.z
-add.f r1.w, r1.w, r11.y
-add.f r2.y, r2.y, r10.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r10.w, c14.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r11.y, c14.y
+add.f r17.w, r12.w, r15.y
+mov.f32f32 r12.w, c14.y
+mov.f32f32 r17.z, r1.z
+sel.b32 r15.y, r17.x, r16.x, r16.w
+mov.f32f32 r16.x, r17.w
+mov.f32f32 r16.w, c14.y
+mov.f32f32 r17.x, c15.z
mov.f32f32 r18.x, c14.y
-mov.f32f32 r20.z, r1.w
-mov.f32f32 r20.w, r2.y
-mov.f32f32 r21.x, c15.z
-mov.f32f32 r21.y, c14.y
-mov.f32f32 r21.z, r20.z
-mov.f32f32 r21.w, r20.w
-mov.f32f32 r20.z, c14.y
-(rpt4)nop
-sam (f32)(w)r21.z, r21.z, s#2, t#2
-(sy)cmps.f.lt r20.w, r22.y, r1.z
-(ss)cmps.f.lt r21.z, r22.y, r1.z
-(rpt1)nop
-cov.u32f32 r20.w, r20.w
-cov.u32f32 r21.z, r21.z
+mov.f32f32 r18.y, c14.y
+sam (f32)(w)r18.z, r17.z, s#2, t#2
+add.f r0.x, r0.x, (neg)r15.y
+(rpt2)nop
+(sy)cmps.f.lt r15.y, r19.y, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r20.w, r20.w
-cmps.f.ne r21.z, r21.z, c14.y
+cov.u32f32 r15.y, r15.y
+(ss)cmps.f.lt r17.z, r19.y, r0.x
(rpt1)nop
-cmps.f.ne r20.w, r20.w, c14.y
-sel.b32 r18.x, r21.x, r21.z, r18.x
+cmps.f.ne r15.y, r15.y, c14.y
+cov.u32f32 r17.z, r17.z
(rpt1)nop
-sel.b32 r12.w, r12.w, r20.w, r20.z
-sel.b32 r11.z, r11.z, r20.w, r21.y
-add.f r1.z, r1.z, (neg)r18.x
+sel.b32 r17.w, r2.x, r15.y, r18.y
+cmps.f.ne r17.z, r17.z, c14.y
+sel.b32 r15.y, r2.y, r15.y, r18.x
nop
-add.f r1.w, r1.w, r12.w
-add.f r2.y, r2.y, r11.z
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r17.w
+sel.b32 r16.w, r17.x, r17.z, r16.w
+add.f r17.w, r16.x, r15.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r11.z, r1.w
-mov.f32f32 r12.w, r2.y
-(rpt1)nop
-mov.f32f32 r20.z, r11.z
-mov.f32f32 r20.w, r12.w
-(rpt5)nop
-sam (f32)(w)r20.z, r20.z, s#2, t#2
-(sy)cmps.f.lt r11.z, r21.y, r1.z
-cmps.f.lt r12.w, r21.y, r1.z
+mov.f32f32 r17.z, r1.z
+add.f r0.x, r0.x, (neg)r16.w
+mov.f32f32 r15.y, r17.w
+(rpt3)nop
+sam (f32)(w)r17.z, r17.z, s#2, t#2
+(sy)cmps.f.lt r16.x, r18.y, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-cov.u32f32 r11.z, r11.z
-cov.u32f32 r12.w, r12.w
+cov.u32f32 r16.x, r16.x
+cmps.f.lt r16.w, r18.y, r0.x
(rpt1)nop
-mov.f32f32 r11.z, r11.z
-cmps.f.ne r12.w, r12.w, c14.y
+cmps.f.ne r16.x, r16.x, c14.y
+cov.u32f32 r16.w, r16.w
(rpt1)nop
-cmps.f.ne r11.z, r11.z, c14.y
-sel.b32 r12.w, r17.w, r12.w, r20.y
-(rpt1)nop
-sel.b32 r10.z, r10.z, r11.z, r11.y
-sel.b32 r8.w, r8.w, r11.z, r10.w
-add.f r1.z, r1.z, (neg)r12.w
+sel.b32 r12.w, r2.x, r16.x, r12.w
+cmps.f.ne r16.w, r16.w, c14.y
+sel.b32 r16.x, r2.y, r16.x, r17.y
nop
-add.f r1.w, r1.w, r10.z
-add.f r2.y, r2.y, r8.w
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r12.w
+sel.b32 r12.w, r16.z, r16.w, r16.y
+add.f r16.y, r15.y, r16.x
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r8.w, r1.w
-mov.f32f32 r10.z, r2.y
-(rpt1)nop
-mov.f32f32 r11.y, r8.w
-mov.f32f32 r11.z, r10.z
-(rpt5)nop
-sam (f32)(w)r20.y, r11.y, s#2, t#2
-(sy)cmps.f.lt r8.w, r21.x, r1.z
-cmps.f.lt r10.z, r21.x, r1.z
-(rpt1)nop
-cov.u32f32 r8.w, r8.w
-cov.u32f32 r10.z, r10.z
+mov.f32f32 r16.x, r1.z
+add.f r0.x, r0.x, (neg)r12.w
+mov.f32f32 r12.w, r16.y
+(rpt3)nop
+sam (f32)(w)r16.x, r16.x, s#2, t#2
+(sy)cmps.f.lt r15.y, r16.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r8.w, r8.w
-cmps.f.ne r10.z, r10.z, c14.y
+cov.u32f32 r15.y, r15.y
+(ss)cmps.f.lt r16.x, r16.w, r0.x
(rpt1)nop
-cmps.f.ne r8.w, r8.w, c14.y
-sel.b32 r10.z, r19.z, r10.z, r19.y
+cmps.f.ne r15.y, r15.y, c14.y
+cov.u32f32 r16.x, r16.x
(rpt1)nop
-sel.b32 r10.x, r10.x, r8.w, r20.x
-sel.b32 r8.w, r9.z, r8.w, r19.w
-add.f r1.z, r1.z, (neg)r10.z
+sel.b32 r15.w, r2.x, r15.y, r15.w
+cmps.f.ne r16.x, r16.x, c14.y
+sel.b32 r15.y, r2.y, r15.y, r15.z
nop
-add.f r1.w, r1.w, r10.x
-add.f r2.y, r2.y, r8.w
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r15.w
+sel.b32 r14.y, r14.z, r16.x, r14.y
+add.f r15.z, r12.w, r15.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r8.w, r1.w
-mov.f32f32 r9.z, r2.y
-(rpt1)nop
-mov.f32f32 r10.z, r8.w
-mov.f32f32 r10.w, r9.z
-(rpt5)nop
-sam (f32)(w)r19.y, r10.z, s#2, t#2
-(sy)cmps.f.lt r8.w, r20.x, r1.z
-cmps.f.lt r9.z, r20.x, r1.z
-(rpt1)nop
-cov.u32f32 r8.w, r8.w
-cov.u32f32 r9.z, r9.z
+mov.f32f32 r15.y, r1.z
+add.f r0.x, r0.x, (neg)r14.y
+mov.f32f32 r12.w, r15.z
+(rpt3)nop
+sam (f32)(w)r15.y, r15.y, s#2, t#2
+(sy)cmps.f.lt r14.y, r16.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r8.w, r8.w
-cmps.f.ne r9.z, r9.z, c14.y
+cov.u32f32 r14.y, r14.y
+cmps.f.lt r14.z, r16.x, r0.x
(rpt1)nop
-cmps.f.ne r8.w, r8.w, c14.y
-sel.b32 r9.z, r18.z, r9.z, r18.y
+cmps.f.ne r14.y, r14.y, c14.y
+cov.u32f32 r14.z, r14.z
(rpt1)nop
-sel.b32 r9.w, r9.w, r8.w, r19.x
-sel.b32 r8.w, r9.x, r8.w, r18.w
-add.f r1.z, r1.z, (neg)r9.z
+sel.b32 r12.x, r2.x, r14.y, r12.x
+cmps.f.ne r14.z, r14.z, c14.y
+sel.b32 r14.y, r2.y, r14.y, r15.x
nop
-add.f r1.w, r1.w, r9.w
-add.f r2.y, r2.y, r8.w
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r12.x
+sel.b32 r12.x, r14.w, r14.z, r13.w
+add.f r14.z, r12.w, r14.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r8.w, r1.w
-mov.f32f32 r9.x, r2.y
-(rpt1)nop
-mov.f32f32 r9.z, r8.w
-mov.f32f32 r9.w, r9.x
-(rpt5)nop
-sam (f32)(w)r17.w, r9.z, s#2, t#2
-(sy)cmps.f.lt r8.w, r18.z, r1.z
-cmps.f.lt r9.x, r18.z, r1.z
-(rpt1)nop
-cov.u32f32 r8.w, r8.w
-cov.u32f32 r9.x, r9.x
+mov.f32f32 r14.y, r1.z
+add.f r0.x, r0.x, (neg)r12.x
+mov.f32f32 r12.x, r14.z
+(rpt3)nop
+sam (f32)(w)r14.y, r14.y, s#2, t#2
+(sy)cmps.f.lt r12.w, r15.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r8.w, r8.w
-cmps.f.ne r9.x, r9.x, c14.y
+cov.u32f32 r12.w, r12.w
+cmps.f.lt r13.w, r15.x, r0.x
(rpt1)nop
-cmps.f.ne r8.w, r8.w, c14.y
-sel.b32 r9.x, r17.z, r9.x, r17.y
+cmps.f.ne r12.w, r12.w, c14.y
+cov.u32f32 r13.w, r13.w
(rpt1)nop
-sel.b32 r9.y, r9.y, r8.w, r17.x
-sel.b32 r7.w, r7.w, r8.w, r16.w
-add.f r1.z, r1.z, (neg)r9.x
+sel.b32 r13.z, r2.x, r12.w, r13.z
+cmps.f.ne r13.w, r13.w, c14.y
+sel.b32 r9.z, r2.y, r12.w, r9.z
nop
-add.f r1.w, r1.w, r9.y
-add.f r2.y, r2.y, r7.w
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r13.z
+sel.b32 r12.w, r14.x, r13.w, r13.y
+add.f r13.z, r12.x, r9.z
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r7.w, r1.w
-mov.f32f32 r8.w, r2.y
-(rpt1)nop
-mov.f32f32 r9.x, r7.w
-mov.f32f32 r9.y, r8.w
-(rpt5)nop
-(ss)nop
-sam (f32)(w)r8.w, r9.x, s#2, t#2
-(sy)cmps.f.lt r7.w, r9.z, r1.z
-cmps.f.lt r8.w, r9.z, r1.z
-(rpt1)nop
-cov.u32f32 r7.w, r7.w
-cov.u32f32 r8.w, r8.w
+mov.f32f32 r13.y, r1.z
+add.f r0.x, r0.x, (neg)r12.w
+mov.f32f32 r9.z, r13.z
+(rpt3)nop
+sam (f32)(w)r13.y, r13.y, s#2, t#2
+(sy)cmps.f.lt r12.x, r14.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r7.w, r7.w
-cmps.f.ne r8.w, r8.w, c14.y
+cov.u32f32 r12.x, r12.x
+cmps.f.lt r12.w, r14.x, r0.x
(rpt1)nop
-cmps.f.ne r7.w, r7.w, c14.y
-sel.b32 r8.w, r10.y, r8.w, r14.x
+cmps.f.ne r12.x, r12.x, c14.y
+cov.u32f32 r12.w, r12.w
(rpt1)nop
-sel.b32 r8.z, r8.z, r7.w, r14.y
-sel.b32 r7.y, r7.y, r7.w, r11.x
-add.f r1.z, r1.z, (neg)r8.w
+sel.b32 r13.x, r2.x, r12.x, r13.x
+cmps.f.ne r12.w, r12.w, c14.y
+sel.b32 r12.x, r2.y, r12.x, r12.z
nop
-add.f r1.w, r1.w, r8.z
-add.f r2.y, r2.y, r7.y
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r13.x
+sel.b32 r11.y, r12.y, r12.w, r11.y
+add.f r12.y, r9.z, r12.x
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r7.y, r1.w
-mov.f32f32 r7.w, r2.y
-(rpt1)nop
-mov.f32f32 r8.z, r7.y
-mov.f32f32 r8.w, r7.w
-(rpt5)nop
-(ss)nop
-sam (f32)(w)r8.z, r8.z, s#2, t#2
-(sy)cmps.f.lt r7.y, r9.y, r1.z
-cmps.f.lt r7.w, r9.y, r1.z
-(rpt1)nop
-cov.u32f32 r7.y, r7.y
-cov.u32f32 r7.w, r7.w
+mov.f32f32 r12.x, r1.z
+add.f r0.x, r0.x, (neg)r11.y
+mov.f32f32 r9.z, r12.y
+(rpt3)nop
+sam (f32)(w)r12.x, r12.x, s#2, t#2
+(sy)cmps.f.lt r11.y, r12.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r7.y, r7.y
-cmps.f.ne r7.w, r7.w, c14.y
+cov.u32f32 r11.y, r11.y
+(ss)cmps.f.lt r12.x, r12.w, r0.x
(rpt1)nop
-cmps.f.ne r7.y, r7.y, c14.y
-sel.b32 r7.w, r16.x, r7.w, r15.w
+cmps.f.ne r11.y, r11.y, c14.y
+cov.u32f32 r12.x, r12.x
(rpt1)nop
-sel.b32 r7.z, r7.z, r7.y, r16.z
-sel.b32 r7.x, r7.x, r7.y, r16.y
-add.f r1.z, r1.z, (neg)r7.w
+sel.b32 r11.x, r2.x, r11.y, r11.x
+cmps.f.ne r12.x, r12.x, c14.y
+sel.b32 r8.y, r2.y, r11.y, r8.y
nop
-add.f r1.w, r1.w, r7.z
-add.f r2.y, r2.y, r7.x
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r11.x
+sel.b32 r11.x, r11.w, r12.x, r11.z
+add.f r11.z, r9.z, r8.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r7.y, r2.y
-(rpt1)nop
-mov.f32f32 r7.z, r7.x
-mov.f32f32 r7.w, r7.y
-(rpt5)nop
-sam (f32)(w)r7.x, r7.z, s#2, t#2
-(sy)cmps.f.lt r7.x, r7.w, r1.z
-cmps.f.lt r7.y, r7.w, r1.z
-(rpt1)nop
-cov.u32f32 r7.x, r7.x
-cov.u32f32 r7.y, r7.y
+mov.f32f32 r11.y, r1.z
+add.f r0.x, r0.x, (neg)r11.x
+mov.f32f32 r8.y, r11.z
+(rpt3)nop
+sam (f32)(w)r11.x, r11.y, s#2, t#2
+(sy)cmps.f.lt r9.z, r11.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r7.x, r7.x
-cmps.f.ne r7.y, r7.y, c14.y
+cov.u32f32 r9.z, r9.z
+cmps.f.lt r11.x, r11.w, r0.x
(rpt1)nop
-cmps.f.ne r7.x, r7.x, c14.y
-sel.b32 r7.y, r15.x, r7.y, r14.w
+cmps.f.ne r9.z, r9.z, c14.y
+cov.u32f32 r11.x, r11.x
(rpt1)nop
-sel.b32 r2.z, r2.z, r7.x, r15.z
-sel.b32 r6.y, r6.y, r7.x, r15.y
-add.f r1.z, r1.z, (neg)r7.y
+sel.b32 r10.z, r2.x, r9.z, r10.z
+cmps.f.ne r11.x, r11.x, c14.y
+sel.b32 r9.z, r2.y, r9.z, r10.y
nop
-add.f r1.w, r1.w, r2.z
-add.f r2.y, r2.y, r6.y
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r10.z
+sel.b32 r1.w, r1.w, r11.x, r10.w
+add.f r10.z, r8.y, r9.z
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
+mov.f32f32 r10.y, r1.z
+add.f r0.x, r0.x, (neg)r1.w
+mov.f32f32 r1.w, r10.z
+(rpt3)nop
+sam (f32)(w)r10.y, r10.y, s#2, t#2
+(sy)cmps.f.lt r8.y, r11.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r6.y, r2.y
+cov.u32f32 r8.y, r8.y
+cmps.f.lt r9.z, r11.x, r0.x
(rpt1)nop
-mov.f32f32 r7.x, r2.z
-mov.f32f32 r7.y, r6.y
-(rpt5)nop
-(ss)nop
-sam (f32)(w)r7.x, r7.x, s#2, t#2
-(sy)cmps.f.lt r2.z, r7.w, r1.z
-cmps.f.lt r6.y, r7.w, r1.z
+cmps.f.ne r8.y, r8.y, c14.y
+cov.u32f32 r9.z, r9.z
(rpt1)nop
-cov.u32f32 r2.z, r2.z
-cov.u32f32 r6.y, r6.y
+sel.b32 r10.x, r2.x, r8.y, r10.x
+cmps.f.ne r9.z, r9.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r9.w
+nop
+add.f r1.z, r1.z, r10.x
+sel.b32 r8.z, r9.y, r9.z, r8.z
+add.f r9.z, r1.w, r8.y
+nop
+mov.f32f32 r9.y, r1.z
+add.f r0.x, r0.x, (neg)r8.z
+mov.f32f32 r1.w, r9.z
+(rpt3)nop
+sam (f32)(w)r9.y, r9.y, s#2, t#2
+(sy)cmps.f.lt r8.y, r10.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r2.z, r2.z
-cmps.f.ne r6.y, r6.y, c14.y
+cov.u32f32 r8.y, r8.y
+cmps.f.lt r8.z, r10.x, r0.x
(rpt1)nop
-cmps.f.ne r2.z, r2.z, c14.y
-sel.b32 r6.y, r11.w, r6.y, r13.w
+cmps.f.ne r8.y, r8.y, c14.y
+cov.u32f32 r8.z, r8.z
(rpt1)nop
-sel.b32 r4.z, r4.z, r2.z, r14.z
-sel.b32 r2.z, r3.w, r2.z, r13.x
-add.f r1.z, r1.z, (neg)r6.y
+sel.b32 r9.x, r2.x, r8.y, r9.x
+cmps.f.ne r8.z, r8.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r8.w
nop
-add.f r1.w, r1.w, r4.z
-add.f r2.y, r2.y, r2.z
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r9.x
+sel.b32 r2.z, r2.w, r8.z, r2.z
+add.f r8.z, r1.w, r8.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r3.w, r2.y
+mov.f32f32 r8.y, r1.z
+add.f r0.x, r0.x, (neg)r2.z
+mov.f32f32 r1.w, r8.z
+(rpt3)nop
+sam (f32)(w)r8.y, r8.y, s#2, t#2
+(sy)cmps.f.lt r2.z, r9.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-(ss)mov.f32f32 r7.x, r2.z
-mov.f32f32 r7.y, r3.w
-(rpt5)nop
-sam (f32)(w)r7.x, r7.x, s#2, t#2
-(sy)cmps.f.lt r2.z, r7.w, r1.z
-(rpt2)nop
cov.u32f32 r2.z, r2.z
(rpt2)nop
cmps.f.ne r2.z, r2.z, c14.y
(rpt2)nop
-sel.b32 r3.w, r5.y, r2.z, r13.y
-sel.b32 r1.y, r2.w, r2.z, r1.y
+sel.b32 r1.y, r2.x, r2.z, r1.y
+sel.b32 r2.z, r2.y, r2.z, r7.w
(rpt1)nop
-add.f r1.w, r1.w, r3.w
-add.f r1.y, r2.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.y, r1.y
+add.f r1.y, r1.z, r1.y
+add.f r1.w, r1.w, r2.z
(rpt1)nop
-add.f r2.y, r1.w, (neg)r2.x
+mov.f32f32 r1.z, r1.y
mov.f32f32 r2.z, r1.w
-add.f r2.w, r1.y, (neg)r0.y
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r2.y, r2.y
-(ss)mov.f32f32 r7.x, r2.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r7.y, r3.w
-mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mov.f32f32 r7.z, r2.y
-mov.f32f32 r2.y, r2.z
-sam (f32)(w)r8.z, r7.x, s#2, t#2
-(sy)add.f r2.z, c15.z, r9.y
-add.f r1.z, r9.y, (neg)r1.z
-nop
-mov.f32f32 r7.w, r2.y
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r1.z, r1.z
-(rpt3)nop
-sam (f32)(w)r7.x, r7.z, s#2, t#2
-(sy)add.f r2.y, r2.y, (neg)r7.w
-(rpt2)nop
-mov.f32f32 r2.y, r2.y
+(rpt1)nop
+(ss)add.f r8.y, r1.z, (neg)r2.x
+add.f r8.z, r2.z, (neg)r2.y
+(rpt1)nop
+sam (f32)(w)r8.w, r1.z, s#2, t#2
+(sy)(ss)add.f r1.z, c15.z, r9.z
+add.f r0.x, r9.z, (neg)r0.x
+(rpt1)nop
+sam (f32)(w)r8.y, r8.y, s#2, t#2
+(sy)add.f r1.z, r1.z, (neg)r9.x
(rpt5)nop
-rcp r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mul.f r1.z, r1.z, r2.y
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
+rcp r1.z, r1.z
+(ss)mul.f r0.x, r0.x, r1.z
(rpt2)nop
-mul.f r2.x, r2.x, r1.z
-mul.f r0.y, r0.y, r1.z
+(ss)mov.f32f32 r1.z, r0.x
+mul.f r0.x, r2.x, r0.x
(rpt1)nop
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-add.f r1.z, r1.w, (neg)r1.z
-add.f r0.y, r1.y, (neg)r0.y
+mul.f r1.z, r2.y, r1.z
+add.f r1.w, r1.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r1.y, r1.z
-mov.f32f32 r0.y, r0.y
+add.f r2.x, r2.z, (neg)r1.z
+mov.f32f32 r1.y, r1.w
(rpt1)nop
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r0.y
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.z, r1.z
mov.f32f32 r1.z, r2.x
-mov.f32f32 r7.y, r0.y
+mov.f32f32 r8.y, r1.y
nop
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r2.x, r1.z
-nop
-sam (f32)(xyz)r8.z, r2.y, s#2, t#2
-(sy)mad.f32 r0.y, c16.x, r8.w, c16.y
-mad.f32 r1.y, c16.x, r8.z, c16.y
-sam (f32)(xyzw)r7.x, r7.x, s#0, t#0
-(sy)cmps.f.lt r1.z, r7.w, c15.w
-(ss)mov.f32f32 r2.y, r7.w
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyz)r9.y, r1.w, s#3, t#3
-(sy)(ss)mul.f r1.w, c8.y, r9.z
-mul.f r2.x, c8.x, r9.y
-cov.u32f32 r1.z, r1.z
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r1.y, r1.y
-mul.f r2.z, c8.z, r9.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mul.f r2.w, r4.y, r1.y
-mul.f r3.z, r3.z, r1.y
-mad.f32 r0.x, r0.x, r0.y, r2.w
-mad.f32 r2.w, r3.x, r0.y, r3.z
-mul.f r1.y, r4.w, r1.y
-cmps.f.ne p0.x, r1.z, r13.z
-mov.f32f32 r0.x, r0.x
+sam (f32)(xyzw)r2.x, r1.w, s#0, t#0
+(sy)cmps.f.lt r0.x, r2.w, c15.w
+mov.f32f32 r8.z, r1.z
+(rpt1)nop
+sam (f32)(xyz)r8.w, r1.y, s#2, t#2
+(sy)(ss)mad.f32 r1.y, c16.x, r8.w, c16.y
+cov.u32f32 r0.x, r0.x
mad.f32 r1.z, c16.x, r9.x, c16.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.y, r3.y, r0.y, r1.y
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r1.z, r1.z
+mad.f32 r1.w, c16.x, r9.y, c16.y
+mov.f32f32 r7.w, r1.y
+cmps.f.ne p0.x, r0.x, r4.w
+absneg.f r0.x, (neg)r1.z
+mul.f r1.y, r4.x, r1.y
+mul.f r1.z, r4.z, r7.w
+mul.f r4.x, r5.y, r7.w
+mad.f32 r0.y, r0.y, r0.x, r1.z
+mov.f32f32 r1.z, r1.w
+mad.f32 r4.x, r5.z, r0.x, r4.x
+mad.f32 r0.x, r3.w, r0.x, r1.y
kill p0.x
-mov.f32f32 r3.w, r2.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, r5.z, r1.z, r0.x
-mad.f32 r2.y, r6.w, r1.z, r2.w
-mad.f32 r0.y, r6.z, r1.z, r0.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r6.x, r1.z, r0.y
+mad.f32 r1.y, r5.w, r1.z, r4.x
+mad.f32 r0.x, r8.x, r1.w, r0.x
nop
-mul.f r2.y, r0.x, r0.x
-nop
-mad.f32 r2.y, r1.z, r1.z, r2.y
-(rpt2)nop
-mov.f32f32 r2.y, r2.y
-nop
-mad.f32 r2.y, r0.y, r0.y, r2.y
+mov.f32f32 r1.z, r0.y
+mov.f32f32 r1.w, r1.y
+mov.f32f32 r3.w, r0.x
+sam (f32)(xyz)r5.y, r8.y, s#3, t#3
+(sy)mul.f r4.x, c8.z, r5.w
+mul.f r0.y, r0.y, r1.z
+mul.f r4.z, c8.y, r5.z
+mad.f32 r0.y, r1.y, r1.w, r0.y
+mul.f r1.y, c8.x, r5.y
+mad.f32 r0.y, r3.w, r3.w, r0.y
(rpt5)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mul.f r0.x, r0.x, r2.y
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r2.y, r0.x, r0.x
-mul.f r2.w, (neg)c9.x, r0.x
-mad.f32 r2.y, r1.z, r1.z, r2.y
-mad.f32 r2.w, (neg)c9.y, r1.z, r2.w
+rsq r0.y, r0.y
+(ss)mov.f32f32 r3.w, r0.y
+mul.f r0.x, r0.x, r0.y
(rpt1)nop
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.y, r0.y, r0.y, r2.y
-mad.f32 r2.w, (neg)c9.z, r0.y, r2.w
-(rpt4)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-max.f r2.w, r2.w, c14.y
-(rpt1)nop
-mul.f r0.x, r0.x, r2.y
-mov.f32f32 r2.w, r2.w
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.y, c8.z, r2.w, (neg)r12.x
-mad.f32 r4.y, c8.y, r2.w, (neg)r8.y
-mad.f32 r2.w, c8.x, r2.w, (neg)r1.x
-mul.f r0.x, r0.x, r4.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.x, r1.z, r12.y, r0.x
-mad.f32 r1.z, c12.x, r2.y, r12.x
-mad.f32 r2.y, c12.x, r4.x, r8.y
-mad.f32 r1.x, c12.x, r2.w, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.x, r0.y, r12.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-max.f r0.x, c14.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.y, r1.z, r3.w
+mul.f r1.z, r1.w, r3.w
+mov.f32f32 r1.w, r0.x
+nop
+mov.f32f32 r3.w, r0.y
+mul.f r0.y, (neg)c9.x, r0.y
+mov.f32f32 r4.w, r1.z
+nop
+mul.f r5.y, r3.w, r3.w
+mad.f32 r0.y, (neg)c9.y, r1.z, r0.y
+mad.f32 r1.z, r4.w, r4.w, r5.y
+mad.f32 r0.x, (neg)c9.z, r0.x, r0.y
+mad.f32 r0.y, r1.w, r1.w, r1.z
(rpt5)nop
-log2 r0.x, r0.x
-(ss)mul.f r0.x, c12.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+rsq r0.y, r0.y
+(ss)mov.f32f32 r1.z, r0.y
+max.f r0.x, r0.x, c14.y
+(ss)mul.f r0.y, r1.w, r0.y
+nop
+mul.f r1.w, r3.w, r1.z
+mov.f32f32 r3.w, r0.x
+mul.f r1.z, r4.w, r1.z
+mad.f32 r0.x, c8.x, r0.x, (neg)r1.x
+mul.f r1.w, r1.w, r6.w
+mad.f32 r4.w, c8.z, r3.w, (neg)r6.y
+mad.f32 r1.z, r1.z, r7.x, r1.w
+mad.f32 r1.w, c8.y, r3.w, (neg)r6.z
+mad.f32 r0.y, r0.y, r7.z, r1.z
+mad.f32 r1.z, c12.x, r4.w, r6.y
+mad.f32 r0.x, c12.x, r0.x, r1.x
+nop
+max.f r0.y, c14.y, r0.y
+mad.f32 r1.x, c12.x, r1.w, r6.z
+(rpt4)nop
+log2 r0.y, r0.y
+(ss)mul.f r0.y, c12.y, r0.y
(rpt5)nop
-exp2 r0.x, r0.x
-(ss)mul.f r0.y, r1.y, r0.x
-mul.f r1.y, r1.w, r0.x
-mad.f32 r0.y, r7.z, r1.z, r0.y
-mad.f32 r1.y, r7.y, r2.y, r1.y
-(ss)mul.f r0.x, r2.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, r7.x, r1.x, r0.x
-nop
-mul.f r0.y, r0.y, r0.w
-mul.f r1.x, r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.y, c7.z, r7.z, r0.y
-mad.f32 r1.x, c7.y, r7.y, r1.x
+exp2 r0.y, r0.y
+(ss)mul.f r1.w, r4.x, r0.y
+mul.f r3.w, r4.z, r0.y
+mad.f32 r1.z, r2.z, r1.z, r1.w
+mad.f32 r1.x, r2.y, r1.x, r3.w
+(ss)mul.f r0.y, r1.y, r0.y
+nop
+mul.f r1.y, r1.z, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r1.y, c7.z, r2.z, r1.y
+mad.f32 r1.x, c7.y, r2.y, r1.x
+mad.f32 r0.x, r2.x, r0.x, r0.y
+nop
+mul.f r0.y, r0.z, r1.y
+mul.f r1.x, r0.z, r1.x
mul.f r0.x, r0.x, r0.w
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.z, r0.y
-mul.f r0.w, r0.z, r0.w
-mad.f32 r0.x, c7.x, r7.x, r0.x
+add.f r0.y, r0.y, r7.y
+add.f r0.w, r1.x, r3.y
+mad.f32 r0.x, c7.x, r2.x, r0.x
nop
-add.f r0.y, r0.y, r5.x
-add.f r0.w, r0.w, r5.w
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.y, r8.x
-mul.f r0.w, r0.w, r8.x
+mul.f r0.y, r0.y, r5.x
+mul.f r0.w, r0.w, r5.x
mul.f r0.x, r0.z, r0.x
nop
-mul.f r0.y, r0.y, c6.z
-mul.f r0.z, r0.w, c6.y
-add.f r0.x, r0.x, r6.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r8.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c6.x
-nop
-mov.f32f32 r3.z, r0.y
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r2.z, r0.y, c6.z
+mul.f r2.y, r0.w, c6.y
+add.f r0.x, r0.x, r3.z
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r5.x
(rpt2)nop
-mov.f32f32 r3.x, r0.x
+mul.f r2.x, r0.x, c6.x
end
-; FRAG: outputs: r3.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.z (5:9,cm=f,il=8,b=1) r63.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r7.x (5:13,cm=f,il=24,b=1) r3.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 1147 instructions, 0 half, 65 full
-; pos (bary): r1.x
-; color: r3.x
-; fragcoord: r0.x
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r3.x (5:9,cm=f,il=8,b=1) r63.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.w (5:12,cm=f,il=20,b=1) r6.x (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 766 instructions, 0 half, 20 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm
index ad4df45..b2e35b3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-65.asm
@@ -6,346 +6,250 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in11
-@in(r2.z) in12
-@in(r2.w) in13
-@out(r11.z) out0
-@out(r11.w) out1
-@out(r12.x) out2
-@out(r12.y) out3
-@out(r7.x) out4
-@out(r7.y) out5
-@out(r7.z) out6
-@out(r7.w) out7
-@out(r3.x) out8
-@out(r3.y) out9
-@out(r3.z) out10
-@out(r3.w) out11
-@out(r9.z) out12
-@out(r9.w) out13
-@out(r10.x) out14
-@out(r10.y) out15
-@out(r1.y) out16
-@out(r1.z) out17
-@out(r1.w) out18
-@out(r2.x) out19
-@out(r12.z) out20
-@out(r12.w) out21
-@out(r13.x) out22
-@out(r13.y) out23
-@out(r8.z) out24
-@out(r8.w) out25
-@out(r9.x) out26
-@out(r9.y) out27
-@out(r10.z) out28
-@out(r10.w) out29
-@out(r11.x) out30
-@out(r11.y) out31
-(sy)(ss)floor.f r3.x, c15.z
-absneg.f r3.y, (abs)c18.x
-absneg.f r3.z, (abs)c18.y
-floor.f r3.w, c15.x
-add.f r3.x, c15.z, (neg)r3.x
-mul.f r4.x, c12.x, r1.z
-mul.f r4.y, c12.x, r0.w
-add.f r3.w, c15.x, (neg)r3.w
-mov.f32f32 r3.x, r3.x
-add.f r3.y, r3.y, r3.z
-mad.f32 r3.z, c13.x, r1.w, r4.x
-mad.f32 r4.x, c13.x, r1.x, r4.y
-max.f r3.x, r3.x, c19.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-min.f r3.x, r3.x, c23.y
-mul.f r4.y, c17.x, r3.y
-mul.f r4.z, c12.z, r0.x
-max.f r3.w, r3.w, c19.y
-max.f r3.x, r3.x, c19.x
-mad.f32 r4.z, c13.z, r0.y, r4.z
-mad.f32 r3.z, c14.x, r2.x, r3.z
-mad.f32 r4.x, c14.x, r1.y, r4.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r4.z, c14.z, r0.z, r4.z
-min.f r3.w, r3.w, c23.y
-mov.f32f32 r3.z, r3.z
-mul.f r3.x, c17.x, r3.x
-add.f r4.z, r4.z, c15.z
-max.f r3.w, r3.w, c19.x
-mad.f32 r4.y, c19.w, r4.y, r4.z
-mov.f32f32 r3.x, r3.x
-mul.f r4.w, c12.y, r0.w
-mov.f32f32 r4.x, r4.x
-absneg.f r5.x, (neg)c5.x
-mad.f32 r3.x, c19.z, r3.x, c15.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.w, r3.w
-mul.f r5.y, c12.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r4.y
-mad.f32 r5.y, c13.x, r0.y, r5.y
-mad.f32 r4.w, c13.y, r1.x, r4.w
-mov.f32f32 r3.x, r3.x
-add.f r4.y, r4.y, c20.x
-mad.f32 r5.y, c14.x, r0.z, r5.y
-mad.f32 r4.w, c14.y, r1.y, r4.w
-add.f r3.x, r3.x, c20.x
-floor.f r5.z, r4.y
-add.f r5.y, r5.y, c15.x
-mov.f32f32 r4.w, r4.w
-floor.f r5.w, r3.x
-add.f r4.y, r4.y, (neg)r5.z
-mad.f32 r3.w, c17.x, r3.w, r5.y
-mul.f r5.z, r4.w, r3.z
-add.f r3.x, r3.x, (neg)r5.w
-mad.f32 r4.y, c20.y, r4.y, c20.z
-add.f r3.w, r3.w, c20.x
-mul.f r5.w, c12.y, r1.z
-mad.f32 r3.x, c20.y, r3.x, c20.z
-absneg.f r4.y, (abs)r4.y
-floor.f r6.x, r3.w
-mad.f32 r5.w, c13.y, r1.w, r5.w
-absneg.f r3.x, (abs)r3.x
-mul.f r6.y, c20.y, r4.y
-add.f r3.w, r3.w, (neg)r6.x
-mul.f r4.y, r4.y, r4.y
-mul.f r6.x, c20.y, r3.x
-add.f r6.y, c20.w, (neg)r6.y
-mad.f32 r3.w, c20.y, r3.w, c20.z
-mul.f r3.x, r3.x, r3.x
-add.f r6.x, c20.w, (neg)r6.x
-mul.f r4.y, r4.y, r6.y
-absneg.f r3.w, (abs)r3.w
-mov.f32f32 r5.w, r5.w
-mul.f r3.x, r3.x, r6.x
-mov.f32f32 r4.y, r4.y
-mul.f r6.x, r0.x, r0.z
-mul.f r6.y, r0.y, c21.x
-mov.f32f32 r3.x, r3.x
-mul.f r6.z, r0.y, c22.x
-mul.f r6.w, c20.y, r3.w
-mul.f r6.x, r6.x, r6.y
-mul.f r3.w, r3.w, r3.w
-mov.f32f32 r6.y, r6.z
-add.f r6.z, c20.w, (neg)r6.w
-mov.f32f32 r6.x, r6.x
-mad.f32 r5.w, c14.y, r2.x, r5.w
-mov.f32f32 r6.y, r6.y
-mul.f r3.w, r3.w, r6.z
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.w, r5.w
-max.f r6.y, r6.y, c19.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r6.x, r6.x
-mad.f32 r5.z, r4.x, r5.w, (neg)r5.z
-min.f r6.y, r6.y, c23.y
-mul.f r6.z, r4.x, r5.x
-absneg.f r6.w, (neg)c5.y
-mul.f r1.z, c12.z, r1.z
-min.f r6.y, r6.y, c19.w
-max.f r6.x, r6.x, c19.y
-mov.f32f32 r5.z, r5.z
-mad.f32 r6.z, r4.w, r6.w, r6.z
-mov.f32f32 r6.y, r6.y
-min.f r6.x, r6.x, c23.y
-mul.f r5.z, r5.z, r2.y
-mov.f32f32 r6.z, r6.z
-mul.f r3.x, r3.x, r6.y
-min.f r6.x, r6.x, c21.y
-mov.f32f32 r5.z, r5.z
+@in(r8.x) in8
+@in(r8.y) in9
+@in(r8.z) in10
+@in(r8.w) in11
+@in(r5.z) in12
+@in(r5.w) in13
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@out(r7.x) out28
+@out(r7.y) out29
+@out(r7.z) out30
+@out(r7.w) out31
+@const(c19.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c20.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c21.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c22.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r1.z, c15.z
+floor.f r1.w, c15.x
+absneg.f r2.x, (abs)c18.x
+absneg.f r2.y, (abs)c18.y
+add.f r1.z, c15.z, (neg)r1.z
+add.f r1.w, c15.x, (neg)r1.w
+mul.f r2.z, c12.x, r0.w
+add.f r2.x, r2.x, r2.y
+max.f r1.z, r1.z, c19.y
+max.f r1.w, r1.w, c19.y
+mad.f32 r2.y, c13.x, r1.x, r2.z
+mul.f r2.z, c17.x, r2.x
+min.f r1.z, r1.z, c23.y
+min.f r1.w, r1.w, c23.y
+mul.f r2.w, c12.z, r0.x
+mad.f32 r2.y, c14.x, r1.y, r2.y
+max.f r1.z, r1.z, c19.x
+max.f r1.w, r1.w, c19.x
+mad.f32 r2.w, c13.z, r0.y, r2.w
+mul.f r3.x, c12.x, r0.x
+mul.f r1.z, c17.x, r1.z
+mad.f32 r3.x, c13.x, r0.y, r3.x
+mad.f32 r2.w, c14.z, r0.z, r2.w
+mad.f32 r3.x, c14.x, r0.z, r3.x
+mad.f32 r1.z, c19.z, r1.z, c15.x
+absneg.f r3.y, (neg)c5.x
+add.f r2.w, r2.w, c15.z
+mov.f32f32 r2.x, r2.x
+add.f r1.z, r1.z, c20.x
+add.f r3.x, r3.x, c15.x
+mad.f32 r2.z, c19.w, r2.z, r2.w
+mad.f32 r1.w, c17.x, r1.w, r3.x
+floor.f r3.z, r1.z
+mul.f r3.w, r2.y, r3.y
+mul.f r4.x, c12.y, r0.w
+add.f r1.w, r1.w, c20.x
+add.f r1.z, r1.z, (neg)r3.z
+add.f r2.z, r2.z, c20.x
+mad.f32 r3.z, c13.y, r1.x, r4.x
+max.f r2.x, r2.x, c21.z
+mad.f32 r1.z, c20.y, r1.z, c20.z
+floor.f r4.x, r1.w
+floor.f r4.y, r2.z
+mad.f32 r3.z, c14.y, r1.y, r3.z
+absneg.f r1.z, (abs)r1.z
+add.f r1.w, r1.w, (neg)r4.x
+add.f r2.z, r2.z, (neg)r4.y
+absneg.f r4.x, (neg)c5.y
+mul.f r4.y, c20.y, r1.z
+mad.f32 r1.w, c20.y, r1.w, c20.z
+mad.f32 r2.z, c20.y, r2.z, c20.z
+mul.f r1.z, r1.z, r1.z
+add.f r4.y, c20.w, (neg)r4.y
+absneg.f r1.w, (abs)r1.w
+absneg.f r2.z, (abs)r2.z
+mad.f32 r3.w, r3.z, r4.x, r3.w
+mul.f r1.z, r1.z, r4.y
+mul.f r4.y, r0.y, c22.x
+mul.f r4.z, c20.y, r1.w
+mul.f r4.w, c20.y, r2.z
+mul.f r1.w, r1.w, r1.w
+max.f r4.y, r4.y, c19.y
+add.f r4.z, c20.w, (neg)r4.z
+add.f r4.w, c20.w, (neg)r4.w
+mul.f r2.z, r2.z, r2.z
+min.f r4.y, r4.y, c23.y
+mul.f r1.w, r1.w, r4.z
+mul.f r4.z, r0.x, r0.z
+mul.f r4.w, r2.z, r4.w
+min.f r2.z, r4.y, c19.w
+mul.f r4.y, r0.y, c21.x
mul.f r0.w, c12.z, r0.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.z, r5.z
+min.f r5.x, r2.x, c21.w
+mul.f r1.z, r1.z, r2.z
+mul.f r2.x, r4.z, r4.y
mad.f32 r0.w, c13.z, r1.x, r0.w
-mad.f32 r1.x, c13.z, r1.w, r1.z
-mul.f r1.z, r3.w, r6.x
-mul.f r1.w, r4.y, r6.x
-max.f r3.y, r3.y, c21.z
-mov.f32f32 r3.w, r5.z
-mov.f32f32 r1.z, r1.z
+mov.f32f32 r1.x, r5.x
+mov.f32f32 r4.y, r1.z
+max.f r2.z, r2.x, c19.y
mad.f32 r0.w, c14.z, r1.y, r0.w
-mad.f32 r1.y, c18.x, r3.x, r1.z
-mad.f32 r1.z, c18.y, r3.x, r1.z
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r7.w, r3.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-min.f r3.x, r3.x, c21.w
-mov.f32f32 r0.w, r0.w
-absneg.f r3.y, (neg)c5.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.w, c12.y, r0.x
-mad.f32 r4.y, r0.w, r3.y, r6.z
-mad.f32 r1.x, c14.z, r2.x, r1.x
-mad.f32 r2.x, r1.w, r3.x, r5.y
-mad.f32 r4.z, r1.w, r3.x, r4.z
-mad.f32 r3.w, c13.y, r0.y, r3.w
-mov.f32f32 r4.y, r4.y
-add.f r1.y, r2.x, r1.y
-add.f r1.z, r4.z, r1.z
-mad.f32 r2.x, c14.y, r0.z, r3.w
-max.f r3.w, c19.y, r4.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-add.f r2.x, r2.x, c15.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, r1.y
-mul.f r4.z, c8.w, r1.y
-mul.f r5.y, c8.z, r1.y
-mul.f r5.z, c8.y, r1.y
-add.f r4.y, c4.x, (neg)r4.y
-mad.f32 r1.w, r1.w, r3.x, r2.x
-mul.f r2.x, c8.x, r1.y
-mov.f32f32 r3.x, r1.z
-mul.f r6.x, r4.y, r4.y
-add.f r6.y, c4.y, (neg)r1.w
-mad.f32 r6.z, c9.w, r1.w, r4.z
-mad.f32 r8.x, c9.z, r1.w, r5.y
-mad.f32 r5.z, c9.y, r1.w, r5.z
-mad.f32 r6.x, r6.y, r6.y, r6.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.x, r6.x
-add.f r3.x, c4.z, (neg)r3.x
-mad.f32 r6.z, c10.w, r1.z, r6.z
-mad.f32 r8.x, c10.z, r1.z, r8.x
-mad.f32 r5.z, c10.y, r1.z, r5.z
-mad.f32 r6.x, r3.x, r3.x, r6.x
+absneg.f r1.y, (neg)c5.z
+mov.f32f32 r2.x, r2.y
+min.f r4.z, r2.z, c23.y
+mul.f r5.y, c12.z, r8.x
+mov.f32f32 r2.z, r0.w
+mov.f32f32 r2.y, r3.z
+min.f r3.z, r4.z, c21.y
+mad.f32 r0.w, r0.w, r1.y, r3.w
+mad.f32 r4.z, c13.z, r8.y, r5.y
+mul.f r9.x, c12.y, r8.x
+mov.f32f32 r3.w, r3.z
+mul.f r3.z, r4.w, r3.z
+mul.f r4.w, c12.y, r0.x
+max.f r9.y, c19.y, r0.w
+mul.f r0.w, r1.w, r3.w
+mad.f32 r1.w, c13.y, r0.y, r4.w
+mad.f32 r3.x, r3.z, r1.x, r3.x
+mad.f32 r1.w, c14.y, r0.z, r1.w
+mov.f32f32 r3.w, r0.w
+mad.f32 r0.w, c18.y, r1.z, r0.w
+mad.f32 r1.z, c18.x, r4.y, r3.w
+mad.f32 r2.w, r3.z, r5.x, r2.w
+add.f r1.w, r1.w, c15.y
+mov.f32f32 r4.y, r9.y
+add.f r1.z, r3.x, r1.z
+add.f r2.w, r2.w, r0.w
+mad.f32 r1.x, r3.z, r1.x, r1.w
+nop
+mov.f32f32 r1.w, r1.z
+mul.f r1.z, c0.x, r1.z
+mov.f32f32 r4.w, r2.w
+add.f r6.y, c4.y, (neg)r1.x
+add.f r6.x, c4.x, (neg)r1.w
+mul.f r0.w, c8.y, r1.w
+mul.f r3.x, c8.x, r1.w
+mul.f r7.w, c8.w, r1.w
+mul.f r3.z, r6.x, r6.x
+mad.f32 r0.w, c9.y, r1.x, r0.w
+mad.f32 r3.z, r6.y, r6.y, r3.z
+add.f r6.z, c4.z, (neg)r4.w
+mad.f32 r0.w, c10.y, r4.w, r0.w
+mad.f32 r3.x, c9.x, r1.x, r3.x
+mad.f32 r3.w, c9.w, r1.x, r7.w
+mad.f32 r3.z, r6.z, r6.z, r3.z
mul.f r0.x, c12.w, r0.x
-mad.f32 r2.x, c9.x, r1.w, r2.x
+mad.f32 r3.x, c10.x, r4.w, r3.x
+mad.f32 r3.w, c10.w, r4.w, r3.w
+mul.f r7.z, c8.z, r1.w
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r0.y, r3.x
-mov.f32f32 r8.y, r4.y
+mul.f r0.y, c0.w, r1.w
+rsq r3.z, r3.z
+(ss)mov.f32f32 r5.x, r3.z
+mad.f32 r3.y, r6.x, r3.z, r3.y
mad.f32 r0.x, c14.w, r0.z, r0.x
-rsq r0.z, r6.x
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r8.z, r8.y
-mad.f32 r0.y, r4.y, r0.z, r5.x
-mad.f32 r4.y, r6.y, r0.z, r6.w
-mad.f32 r0.z, r3.x, r0.z, r3.y
-add.f r0.x, r0.x, c15.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r3.y, c11.w, r0.x, r6.z
-mul.f r4.y, r0.y, r0.y
-mad.f32 r5.x, c11.z, r0.x, r8.x
-mad.f32 r4.y, r3.x, r3.x, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r5.z, c11.y, r0.x, r5.z
-mad.f32 r2.x, c10.x, r1.z, r2.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r4.y, r0.z, r0.z, r4.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.z, r5.z
-mad.f32 r2.x, c11.x, r0.x, r2.x
-(ss)mul.f r6.x, c0.w, r1.y
-mul.f r6.z, c0.z, r1.y
-mul.f r6.w, c0.y, r1.y
-rsq r4.y, r4.y
-(ss)mov.f32f32 r4.y, r4.y
-mov.f32f32 r10.y, r3.y
-mov.f32f32 r3.y, r5.x
-mul.f r5.x, r5.z, c16.y
-mul.f r0.z, r0.z, r4.y
-mul.f r3.x, r3.x, r4.y
-mul.f r0.y, r0.y, r4.y
-mov.f32f32 r10.x, r3.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r9.w, r5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r1.x, r7.z
+(ss)mad.f32 r3.z, r6.y, r5.x, r4.x
+mov.f32f32 r4.x, r3.y
+mad.f32 r1.y, r6.z, r5.x, r1.y
nop
-mov.f32f32 r10.w, r0.z
-mov.f32f32 r10.z, r3.x
-mov.f32f32 r9.y, r0.y
-mov.f32f32 r0.y, r2.x
-mad.f32 r0.z, c1.w, r1.w, r6.x
-mad.f32 r2.x, c1.z, r1.w, r6.z
-mad.f32 r3.x, c1.y, r1.w, r6.w
-mul.f r0.y, r0.y, c16.x
-mad.f32 r0.z, c2.w, r1.z, r0.z
-mad.f32 r2.x, c2.z, r1.z, r2.x
-mad.f32 r3.x, c2.y, r1.z, r3.x
-mov.f32f32 r9.z, r0.y
-mad.f32 r0.y, c3.w, r0.x, r0.z
-mad.f32 r0.z, c3.z, r0.x, r2.x
-mad.f32 r2.x, c3.y, r0.x, r3.x
-mul.f r3.x, c0.x, r1.y
-mov.f32f32 r12.y, r0.y
-mov.f32f32 r12.x, r0.z
-mov.f32f32 r11.w, r2.x
-mad.f32 r0.y, c1.x, r1.w, r3.x
-mad.f32 r0.z, c7.x, r1.z, c7.y
-mad.f32 r0.y, c2.x, r1.z, r0.y
-mad.f32 r1.y, c7.x, r1.y, c7.y
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r11.y, r4.z
-mov.f32f32 r11.x, r5.y
-mov.f32f32 r11.z, r0.x
-mov.f32f32 r12.w, r0.y
-mov.f32f32 r0.x, r1.y
-mov.f32f32 r0.y, r6.y
-(rpt1)nop
-mov.f32f32 r12.z, r0.x
-mov.f32f32 r8.w, r0.y
-mul.f r0.x, r3.w, c6.z
-mul.f r0.y, r3.w, c6.y
-mul.f r0.z, r3.w, c6.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r7.y, r0.y
-mov.f32f32 r7.x, r0.z
-mul.f r0.x, r4.x, r1.x
-mul.f r0.y, r0.w, r5.w
-mad.f32 r0.x, r0.w, r3.z, (neg)r0.x
-mad.f32 r0.y, r4.w, r1.x, (neg)r0.y
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r5.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r1.x
-mul.f r0.x, r0.x, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r0.z, r3.z
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r5.x, r3.z
+mul.f r4.x, r4.x, r4.x
+mov.f32f32 r5.y, r1.y
+add.f r0.x, r0.x, c15.w
+mad.f32 r3.z, r3.z, r5.x, r4.x
+mad.f32 r0.z, c10.z, r4.w, r0.z
+mad.f32 r1.y, r1.y, r5.y, r3.z
+mad.f32 r0.w, c11.y, r0.x, r0.w
+mad.f32 r3.x, c11.x, r0.x, r3.x
+mad.f32 r3.w, c11.w, r0.x, r3.w
+mad.f32 r3.z, c11.z, r0.x, r0.z
+mad.f32 r0.y, c1.w, r1.x, r0.y
+mul.f r0.z, c0.z, r1.w
+rsq r1.y, r1.y
+(ss)mov.f32f32 r4.x, r1.y
+mul.f r6.w, r3.y, r1.y
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r7.y, r5.y, r4.x
+mul.f r7.x, r5.x, r4.x
+mad.f32 r0.y, c2.w, r4.w, r0.y
+mad.f32 r0.z, c1.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c2.z, r4.w, r0.z
+(ss)mul.f r1.y, c0.y, r1.w
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c1.y, r1.x, r1.y
+mad.f32 r1.x, c1.x, r1.x, r1.z
+mad.f32 r0.y, c2.y, r4.w, r0.y
+mad.f32 r1.x, c2.x, r2.w, r1.x
+mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r0.x, c3.x, r0.x, r1.x
+mad.f32 r5.x, c7.x, r1.w, c7.y
+mad.f32 r5.y, c7.x, r4.w, c7.y
+mul.f r1.z, r4.y, c6.z
+mul.f r1.y, r4.y, c6.y
+mul.f r1.x, r9.y, c6.x
+mad.f32 r1.w, c14.z, r8.z, r4.z
+mad.f32 r2.w, c13.y, r8.y, r9.x
+mul.f r4.x, c12.x, r8.x
+mad.f32 r2.w, c14.y, r8.z, r2.w
+mov.f32f32 r4.z, r1.w
+mad.f32 r4.x, c13.x, r8.y, r4.x
(rpt1)nop
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r0.x, r4.w
-mov.f32f32 r0.y, r4.x
+mul.f r4.w, r2.x, r4.z
+mad.f32 r8.x, c14.x, r8.z, r4.x
+mov.f32f32 r4.y, r2.w
(rpt1)nop
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r0.x, r2.w
-mov.f32f32 r0.y, r2.z
+mov.f32f32 r4.x, r8.x
+mul.f r8.y, r2.z, r4.y
+mul.f r8.x, r2.y, r8.x
+mad.f32 r1.w, r2.y, r1.w, (neg)r8.y
+mad.f32 r4.w, r2.z, r4.x, (neg)r4.w
+mad.f32 r8.x, r2.x, r2.w, (neg)r8.x
(rpt1)nop
-mov.f32f32 r13.y, r0.x
-mov.f32f32 r13.x, r0.y
+mul.f r4.w, r4.w, r8.w
+mul.f r2.w, r1.w, r8.w
+mul.f r1.w, r8.x, r8.w
end
-; VERT: outputs: r11.z (0:0) r7.x (5:9) r3.x (5:10) r9.z (5:11) r1.y (5:12) r12.z (5:13) r8.z (5:14) r10.z (5:15)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=f,il=16,b=0) r2.z (0:0,cm=3,il=20,b=0)
-; VERT: 304 instructions, 0 half, 14 full
-; pos: r11.z
+nop
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=f,il=16,b=0) r5.z (0:0,cm=3,il=20,b=0)
+; VERT: 201 instructions, 0 half, 10 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm
index 0274fe9..5630fc2 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-66.asm
@@ -4,206 +4,143 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f233333, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.z, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r3.w, c9.x, r0.z
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r3.w, r3.x, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.z, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.w, r3.x
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r5.z, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r3.x
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.x, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.x, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.x, r3.x
-bary.f r3.x, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.x, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r1.w
-mov.f32f32 r5.w, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r5.y, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r3.x
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r1.w, r4.w, s#2, t#2
-(sy)mov.f32f32 r1.w, r1.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r2.y, r6.y, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-sam.s (f32)(x)r2.w, r5.z, s#2, t#2
-(sy)mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.z
-add.f r3.y, c12.y, (neg)r0.y
-add.f r3.z, c12.y, (neg)r0.y
-(ss)add.f r4.w, c12.y, (neg)r0.y
-mul.f r5.x, r2.z, r3.x
-mul.f r3.y, r3.y, c6.z
-mul.f r3.z, r3.z, c6.y
-mul.f r4.w, r4.w, c6.x
-mul.f r1.w, r5.x, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.x, r1.z, r3.x
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.x, r2.w, r1.w
-sam.s (f32)(x)r1.w, r7.x, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r2.w, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r1.z, r4.z
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.y, r1.x
-mul.f r0.w, r4.x, r0.w
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, c10.w, r0.x
-mul.f r2.y, r3.w, r2.z
-sam (f32)(w)r5.x, r2.w, s#1, t#1
-nop
-(sy)cmps.f.lt r2.z, r5.w, c11.y
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r2.w, r5.w
-mov.f32f32 r3.x, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r2.y, r0.x
-cov.u32f32 r2.y, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.y, r1.x
-mad.f32 r0.w, c5.y, r4.x, r0.w
-mov.f32f32 r0.x, r0.x
-cmps.f.ne r2.y, r2.y, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r3.w, r0.x
-mov.f32f32 r2.z, r2.w
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r2.y, r3.x, r2.y, r2.z
-add.f r1.x, r1.x, r3.y
-add.f r0.w, r0.w, r3.z
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r2.y
-mul.f r0.w, r0.w, r2.y
-add.f r0.x, r0.x, r4.w
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
-mul.f r0.x, r0.x, r2.y
+add.f r0.y, r0.z, r1.z
+cov.u32f32 r0.z, r1.y
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
+mul.f r0.w, r0.w, r0.z
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
+mul.f r0.x, r0.x, r0.z
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
+end
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, c4.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
-end
-; FRAG: outputs: r1.y (1:0)
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 195 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r1.y
-; fragcoord: r0.x
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm
index b65a363..63ee7bc 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-67.asm
@@ -6,242 +6,182 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r5.x) out0
-@out(r5.y) out1
-@out(r5.z) out2
-@out(r5.w) out3
-@out(r4.x) out4
-@out(r4.y) out5
-@out(r4.z) out6
-@out(r4.w) out7
-@out(r2.z) out8
-@out(r2.w) out9
-@out(r3.x) out10
-@out(r3.y) out11
-@out(r0.z) out12
-@out(r0.w) out13
-@out(r1.x) out14
-@out(r1.y) out15
-(sy)(ss)floor.f r2.x, c14.z
-floor.f r2.y, c14.x
-absneg.f r2.z, (abs)c17.x
-absneg.f r2.w, (abs)c17.y
-add.f r2.x, c14.z, (neg)r2.x
-add.f r2.y, c14.x, (neg)r2.y
-mul.f r3.x, c11.x, r0.w
-add.f r2.z, r2.z, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r2.w, c12.x, r1.x, r3.x
-mov.f32f32 r2.z, r2.z
-max.f r2.x, r2.x, c18.y
-max.f r2.y, r2.y, c18.y
-mov.f32f32 r2.w, r2.w
-mul.f r3.x, c16.x, r2.z
-min.f r2.x, r2.x, c22.y
-min.f r2.y, r2.y, c22.y
-mul.f r3.y, c11.z, r0.x
-mad.f32 r2.w, c13.x, r1.y, r2.w
-max.f r2.x, r2.x, c18.x
-max.f r2.y, r2.y, c18.x
-mad.f32 r3.y, c12.z, r0.y, r3.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.z, r0.z, r3.y
-mul.f r3.z, c11.x, r0.x
-mul.f r2.x, c16.x, r2.x
-mad.f32 r3.z, c12.x, r0.y, r3.z
-add.f r3.y, r3.y, c14.z
-mad.f32 r3.z, c13.x, r0.z, r3.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c18.w, r3.x, r3.y
-mul.f r2.w, r2.w, (neg)c4.x
-mul.f r3.w, c11.y, r0.w
-mad.f32 r2.x, c18.z, r2.x, c14.x
-add.f r3.z, r3.z, c14.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r2.y, c16.x, r2.y, r3.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.w, c12.y, r1.x, r3.w
-mov.f32f32 r3.x, r3.x
-add.f r2.y, r2.y, c19.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r3.w
-add.f r3.x, r3.x, c19.x
-mad.f32 r3.w, c13.y, r1.y, r3.w
-add.f r2.x, r2.x, c19.x
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r1.z, c14.z
+floor.f r1.w, c14.x
+absneg.f r2.x, (abs)c17.x
+absneg.f r2.y, (abs)c17.y
+add.f r1.z, c14.z, (neg)r1.z
+add.f r1.w, c14.x, (neg)r1.w
+mov.f32f32 r2.z, c18.y
+mul.f r2.w, c11.x, r0.w
+max.f r1.z, r1.z, c18.y
+max.f r1.w, r1.w, c18.y
+add.f r2.x, r2.x, r2.y
+add.f r2.y, r2.z, c19.x
+min.f r1.z, r1.z, c22.y
+min.f r1.w, r1.w, c22.y
+mul.f r2.z, c16.x, r2.x
+mul.f r3.x, c11.z, r0.x
+max.f r1.z, r1.z, c18.x
+max.f r1.w, r1.w, c18.x
+mul.f r3.y, c11.x, r0.x
+mad.f32 r3.x, c12.z, r0.y, r3.x
+mul.f r1.z, c16.x, r1.z
+mad.f32 r3.y, c12.x, r0.y, r3.y
+mad.f32 r3.x, c13.z, r0.z, r3.x
+mad.f32 r3.y, c13.x, r0.z, r3.y
+mad.f32 r1.z, c18.z, r1.z, c14.x
floor.f r4.x, r2.y
-floor.f r4.y, r3.x
-mov.f32f32 r3.w, r3.w
-floor.f r4.z, r2.x
+mad.f32 r2.w, c12.x, r1.x, r2.w
+add.f r3.y, r3.y, c14.x
+add.f r1.z, r1.z, c19.x
+mad.f32 r1.w, c16.x, r1.w, r3.y
+add.f r3.x, r3.x, c14.z
add.f r2.y, r2.y, (neg)r4.x
-add.f r3.x, r3.x, (neg)r4.y
-mad.f32 r2.w, (neg)c4.y, r3.w, r2.w
-add.f r2.x, r2.x, (neg)r4.z
+floor.f r4.x, r1.z
+add.f r1.w, r1.w, c19.x
+mad.f32 r2.z, c18.w, r2.z, r3.x
mad.f32 r2.y, c19.y, r2.y, c19.z
-mad.f32 r3.x, c19.y, r3.x, c19.z
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.x, c19.y, r2.x, c19.z
+add.f r1.z, r1.z, (neg)r4.x
+floor.f r4.x, r1.w
+add.f r2.z, r2.z, c19.x
absneg.f r2.y, (abs)r2.y
-absneg.f r3.x, (abs)r3.x
-mul.f r0.w, c11.z, r0.w
-absneg.f r2.x, (abs)r2.x
-mul.f r3.w, c19.y, r2.y
-mul.f r4.x, c19.y, r3.x
-mul.f r2.y, r2.y, r2.y
-mul.f r4.y, c19.y, r2.x
-add.f r3.w, c19.w, (neg)r3.w
+mad.f32 r1.z, c19.y, r1.z, c19.z
+add.f r4.x, r1.w, (neg)r4.x
+floor.f r4.y, r2.z
+mul.f r1.w, r2.y, r2.y
+absneg.f r1.z, (abs)r1.z
+mad.f32 r2.y, c19.y, r4.x, c19.z
+add.f r2.z, r2.z, (neg)r4.y
+mad.f32 r2.w, c13.x, r1.y, r2.w
+mul.f r4.x, c19.y, r1.z
+absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c19.y, r2.z, c19.z
+mul.f r1.z, r1.z, r1.z
add.f r4.x, c19.w, (neg)r4.x
-mul.f r3.x, r3.x, r3.x
+mul.f r4.y, c19.y, r2.y
+absneg.f r2.z, (abs)r2.z
+mul.f r2.y, r2.y, r2.y
+mul.f r1.z, r1.z, r4.x
+mul.f r4.x, r0.y, c21.x
add.f r4.y, c19.w, (neg)r4.y
-mul.f r2.x, r2.x, r2.x
-mul.f r2.y, r2.y, r3.w
-mul.f r3.x, r3.x, r4.x
-mad.f32 r0.w, c12.z, r1.x, r0.w
-mul.f r1.x, r2.x, r4.y
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r3.x
-mul.f r3.x, r0.x, r0.z
-mov.f32f32 r1.x, r1.x
-mul.f r3.w, r0.y, c21.x
-mul.f r4.x, r0.y, c20.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.y, c18.y
-mov.f32f32 r3.w, r3.w
-mul.f r3.x, r3.x, r4.x
-mad.f32 r0.w, c13.z, r1.y, r0.w
-add.f r1.y, r4.y, c19.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-floor.f r4.x, r1.y
-max.f r3.w, r3.w, c18.y
-mov.f32f32 r3.x, r3.x
-mad.f32 r0.w, (neg)c4.z, r0.w, r2.w
-add.f r1.y, r1.y, (neg)r4.x
-min.f r2.w, r3.w, c22.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.y, c19.y, r1.y, c19.z
-min.f r2.w, r2.w, c18.w
-max.f r3.x, r3.x, c18.y
-max.f r0.w, c18.y, r0.w
-absneg.f r1.y, (abs)r1.y
-mov.f32f32 r2.w, r2.w
-min.f r3.x, r3.x, c22.y
-mov.f32f32 r0.w, r0.w
-mul.f r1.y, r1.y, r1.y
-mul.f r1.x, r1.x, r2.w
-min.f r2.w, r3.x, c20.y
-mul.f r3.x, r0.w, c5.z
-mul.f r3.w, r0.w, c5.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.w
-mul.f r0.w, r0.w, c5.x
-mul.f r2.x, r2.x, r2.w
-mul.f r2.y, r2.y, r2.w
-max.f r2.z, r2.z, c20.z
-mov.f32f32 r4.z, r3.x
+mul.f r4.z, c19.y, r2.z
+mul.f r2.z, r2.z, r2.z
+max.f r4.x, r4.x, c18.y
+mul.f r2.y, r2.y, r4.y
+mul.f r4.y, r0.x, r0.z
+add.f r4.z, c19.w, (neg)r4.z
+min.f r4.x, r4.x, c22.y
+mul.f r4.w, r0.y, c20.x
+mul.f r2.w, r2.w, (neg)c4.x
+mul.f r5.x, c11.y, r0.w
+min.f r4.x, r4.x, c18.w
+mul.f r4.y, r4.y, r4.w
+mul.f r2.z, r2.z, r4.z
+mad.f32 r4.z, c12.y, r1.x, r5.x
+mul.f r1.z, r1.z, r4.x
+max.f r4.x, r4.y, c18.y
+mad.f32 r4.y, c13.y, r1.y, r4.z
mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.y, r3.w
-mad.f32 r2.w, c17.x, r1.x, r2.x
-mad.f32 r1.x, c17.y, r1.x, r2.x
-mov.f32f32 r2.x, r2.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r1.x, r1.x
-min.f r2.x, r2.x, c20.w
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r4.w, r1.y
-mul.f r0.w, c11.y, r0.x
-mov.f32f32 r1.y, r2.x
-mad.f32 r0.w, c12.y, r0.y, r0.w
+mov.f32f32 r4.z, r1.z
+min.f r4.x, r4.x, c22.y
+mad.f32 r2.w, (neg)c4.y, r4.y, r2.w
+mul.f r0.w, c11.z, r0.w
+max.f r2.x, r2.x, c20.z
+min.f r4.x, r4.x, c20.y
+mad.f32 r0.w, c12.z, r1.x, r0.w
+mul.f r1.x, c11.y, r0.x
mul.f r0.x, c11.w, r0.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, r2.y, r1.y, r3.z
-mad.f32 r2.w, r2.y, r1.y, r3.y
-mad.f32 r0.w, c13.y, r0.z, r0.w
+mov.f32f32 r4.y, r4.x
+mul.f r2.z, r2.z, r4.x
+min.f r2.x, r2.x, c20.w
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, r2.y, r4.y
+mad.f32 r1.x, c12.y, r0.y, r1.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-add.f r0.y, r2.x, r2.z
-add.f r1.x, r2.w, r1.x
-add.f r0.w, r0.w, c14.y
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, r2.y, r1.y, r0.w
+mad.f32 r0.y, c13.y, r0.z, r1.x
+mov.f32f32 r1.x, r1.y
+mad.f32 r1.y, c17.y, r1.z, r1.y
+mad.f32 r1.x, c17.x, r4.z, r1.x
+mov.f32f32 r1.z, r2.x
+mad.f32 r2.x, r2.z, r2.x, r3.x
+add.f r0.y, r0.y, c14.y
+mad.f32 r0.w, (neg)c4.z, r0.w, r2.w
+mad.f32 r2.y, r2.z, r1.z, r3.y
+add.f r1.y, r2.x, r1.y
+mad.f32 r0.y, r2.z, r1.z, r0.y
+max.f r4.x, c18.y, r0.w
+add.f r0.w, r2.y, r1.x
+mov.f32f32 r1.x, r1.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mul.f r0.z, c7.w, r0.y
-mul.f r1.y, c7.z, r0.y
-mad.f32 r0.z, c8.w, r0.w, r0.z
-mad.f32 r1.y, c8.z, r0.w, r1.y
-mul.f r2.x, c7.y, r0.y
-mul.f r2.y, c7.x, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.z, c9.w, r1.x, r0.z
-add.f r0.x, r0.x, c14.w
-mad.f32 r1.y, c9.z, r1.x, r1.y
-mad.f32 r2.x, c8.y, r0.w, r2.x
-mad.f32 r2.y, c8.x, r0.w, r2.y
-mad.f32 r0.z, c10.w, r0.x, r0.z
-mad.f32 r1.y, c10.z, r0.x, r1.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.x, c9.y, r1.x, r2.x
-mad.f32 r2.y, c9.x, r1.x, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.x, c10.y, r0.x, r2.x
-mad.f32 r2.y, c10.x, r0.x, r2.y
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r0.z, r2.x
-mov.f32f32 r1.y, r2.y
-mul.f r2.x, c0.w, r0.y
-mul.f r2.y, c0.z, r0.y
-mul.f r0.z, r0.z, c15.y
-mul.f r1.y, r1.y, c15.x
-(rpt1)nop
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r2.z, r1.y
-mad.f32 r0.z, c1.w, r0.w, r2.x
-mad.f32 r1.y, c1.z, r0.w, r2.y
-mad.f32 r0.z, c2.w, r1.x, r0.z
-mad.f32 r1.y, c2.z, r1.x, r1.y
-mad.f32 r0.z, c3.w, r0.x, r0.z
-mad.f32 r1.y, c3.z, r0.x, r1.y
-mul.f r2.x, c0.y, r0.y
-mul.f r2.y, c0.x, r0.y
-mov.f32f32 r5.w, r0.z
-mov.f32f32 r5.z, r1.y
-mad.f32 r0.z, c1.y, r0.w, r2.x
-mad.f32 r0.w, c1.x, r0.w, r2.y
-mad.f32 r0.z, c2.y, r1.x, r0.z
-mad.f32 r0.w, c2.x, r1.x, r0.w
-mad.f32 r0.z, c3.y, r0.x, r0.z
-mad.f32 r0.x, c3.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r5.y, r0.z
-mov.f32f32 r5.x, r0.x
-mad.f32 r0.x, c6.x, r1.x, c6.y
-mad.f32 r0.y, c6.x, r0.y, c6.y
-(rpt1)nop
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r0.x, r1.z
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+nop
+mov.f32f32 r1.z, r0.w
+mul.f r3.x, c0.x, r0.w
+mad.f32 r3.y, c6.x, r1.x, c6.y
+mov.f32f32 r4.y, r4.x
+mul.f r0.z, c7.y, r1.z
+mul.f r0.w, c7.x, r1.z
+mad.f32 r0.z, c8.y, r0.y, r0.z
+mad.f32 r0.w, c8.x, r0.y, r0.w
+mad.f32 r0.z, c9.y, r1.x, r0.z
+add.f r4.z, r0.x, c14.w
+mad.f32 r0.x, c9.x, r1.x, r0.w
+mul.f r0.w, c7.w, r1.z
+mul.f r2.x, c7.z, r1.z
+mad.f32 r0.z, c10.y, r4.z, r0.z
+mad.f32 r0.x, c10.x, r4.z, r0.x
+mad.f32 r0.w, c8.w, r0.y, r0.w
+mad.f32 r2.z, c8.z, r0.y, r2.x
+mul.f r2.y, r0.z, c15.y
+mul.f r2.x, r0.x, c15.x
+mad.f32 r0.x, c9.w, r1.x, r0.w
+mad.f32 r0.z, c9.z, r1.x, r2.z
+mad.f32 r2.w, c10.w, r4.z, r0.x
+mad.f32 r2.z, c10.z, r4.z, r0.z
+mul.f r0.x, c0.w, r1.z
+mul.f r0.z, c0.z, r1.z
+mad.f32 r0.x, c1.w, r0.y, r0.x
+mad.f32 r0.z, c1.z, r0.y, r0.z
+mad.f32 r0.x, c2.w, r1.x, r0.x
+mad.f32 r0.z, c2.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r4.z, r0.x
+mad.f32 r0.z, c3.z, r4.z, r0.z
+mul.f r0.x, c0.y, r1.z
+mad.f32 r3.x, c1.x, r0.y, r3.x
+mad.f32 r0.x, c1.y, r0.y, r0.x
+mad.f32 r0.y, c2.x, r1.y, r3.x
+mad.f32 r1.x, c2.y, r1.x, r0.x
+mad.f32 r0.x, c3.x, r4.z, r0.y
+mad.f32 r0.y, c3.y, r4.z, r1.x
+mad.f32 r3.x, c6.x, r1.z, c6.y
+mul.f r1.z, r4.y, c5.z
+mul.f r1.y, r4.y, c5.y
+mul.f r1.x, r4.x, c5.x
end
-; VERT: outputs: r5.x (0:0) r4.x (5:9) r2.z (5:10) r0.z (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 221 instructions, 0 half, 6 full
-; pos: r5.x
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 152 instructions, 0 half, 6 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm
index 0274fe9..5630fc2 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-68.asm
@@ -4,206 +4,143 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f233333, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.z, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r3.w, c9.x, r0.z
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r3.w, r3.x, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.z, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.w, r3.x
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r5.z, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r3.x
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.x, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.x, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.x, r3.x
-bary.f r3.x, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.x, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r1.w
-mov.f32f32 r5.w, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r5.y, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r3.x
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r1.w, r4.w, s#2, t#2
-(sy)mov.f32f32 r1.w, r1.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r2.y, r6.y, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-sam.s (f32)(x)r2.w, r5.z, s#2, t#2
-(sy)mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.z
-add.f r3.y, c12.y, (neg)r0.y
-add.f r3.z, c12.y, (neg)r0.y
-(ss)add.f r4.w, c12.y, (neg)r0.y
-mul.f r5.x, r2.z, r3.x
-mul.f r3.y, r3.y, c6.z
-mul.f r3.z, r3.z, c6.y
-mul.f r4.w, r4.w, c6.x
-mul.f r1.w, r5.x, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.x, r1.z, r3.x
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.x, r2.w, r1.w
-sam.s (f32)(x)r1.w, r7.x, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r2.w, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r1.z, r4.z
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.y, r1.x
-mul.f r0.w, r4.x, r0.w
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, c10.w, r0.x
-mul.f r2.y, r3.w, r2.z
-sam (f32)(w)r5.x, r2.w, s#1, t#1
-nop
-(sy)cmps.f.lt r2.z, r5.w, c11.y
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r2.w, r5.w
-mov.f32f32 r3.x, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r2.y, r0.x
-cov.u32f32 r2.y, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.y, r1.x
-mad.f32 r0.w, c5.y, r4.x, r0.w
-mov.f32f32 r0.x, r0.x
-cmps.f.ne r2.y, r2.y, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r3.w, r0.x
-mov.f32f32 r2.z, r2.w
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r2.y, r3.x, r2.y, r2.z
-add.f r1.x, r1.x, r3.y
-add.f r0.w, r0.w, r3.z
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r2.y
-mul.f r0.w, r0.w, r2.y
-add.f r0.x, r0.x, r4.w
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
-mul.f r0.x, r0.x, r2.y
+add.f r0.y, r0.z, r1.z
+cov.u32f32 r0.z, r1.y
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
+mul.f r0.w, r0.w, r0.z
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
+mul.f r0.x, r0.x, r0.z
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
+end
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, c4.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
-end
-; FRAG: outputs: r1.y (1:0)
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 195 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r1.y
-; fragcoord: r0.x
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm
index 0e4d5ee..9c8ac11 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-69.asm
@@ -6,134 +6,99 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r6.y) out0
-@out(r6.z) out1
-@out(r6.w) out2
-@out(r7.x) out3
-@out(r5.y) out4
-@out(r5.z) out5
-@out(r5.w) out6
-@out(r6.x) out7
-@out(r2.w) out8
-@out(r3.x) out9
-@out(r3.y) out10
-@out(r3.z) out11
-@out(r3.w) out12
-@out(r4.x) out13
-@out(r4.y) out14
-@out(r4.z) out15
-(sy)(ss)mul.f r2.x, c11.x, r0.w
-mul.f r2.y, c11.x, r0.x
-mad.f32 r2.x, c12.x, r1.x, r2.x
-mad.f32 r2.y, c12.x, r0.y, r2.y
-mul.f r2.z, c11.z, r0.x
-mad.f32 r2.y, c13.x, r0.z, r2.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.z, c12.z, r0.y, r2.z
-mad.f32 r2.x, c13.x, r1.y, r2.x
-add.f r2.y, r2.y, c14.x
-mad.f32 r2.z, c13.z, r0.z, r2.z
-mul.f r2.w, c11.y, r0.w
-mov.f32f32 r2.x, r2.x
-mul.f r3.x, c7.w, r2.y
-mul.f r3.y, c7.z, r2.y
-mul.f r3.z, c7.y, r2.y
-mul.f r2.x, r2.x, (neg)c4.x
-mad.f32 r2.w, c12.y, r1.x, r2.w
-mul.f r3.w, c11.y, r0.x
-mul.f r4.x, c7.x, r2.y
-mad.f32 r3.w, c12.y, r0.y, r3.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r3.w, c13.y, r0.z, r3.w
-mad.f32 r2.w, c13.y, r1.y, r2.w
-mul.f r4.y, c0.w, r2.y
-mul.f r4.z, c0.z, r2.y
-mul.f r4.w, c0.y, r2.y
-mov.f32f32 r2.w, r2.w
-add.f r3.w, r3.w, c14.y
-mul.f r5.x, c0.x, r2.y
-add.f r2.z, r2.z, c14.z
-mad.f32 r2.x, (neg)c4.y, r2.w, r2.x
-mad.f32 r2.w, c8.w, r3.w, r3.x
-mad.f32 r3.x, c8.z, r3.w, r3.y
-mad.f32 r3.y, c8.y, r3.w, r3.z
-mov.f32f32 r2.x, r2.x
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r1.z, c11.x, r0.w
+mul.f r1.w, c11.x, r0.x
+mad.f32 r1.z, c12.x, r1.x, r1.z
+mad.f32 r1.w, c12.x, r0.y, r1.w
+mad.f32 r1.z, c13.x, r1.y, r1.z
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
+mul.f r1.z, r1.z, (neg)c4.x
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.w, c9.w, r2.z, r2.w
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r2.z, r3.x
-mad.f32 r3.x, c9.y, r2.z, r3.y
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.w, r4.x
-mad.f32 r0.w, c1.w, r3.w, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r2.z, r0.z
-mad.f32 r0.w, c2.w, r2.z, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r2.x
-mad.f32 r1.y, c10.w, r0.x, r2.w
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r2.x, c10.y, r0.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r2.x, r2.x, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r3.z, r1.y
-mov.f32f32 r3.y, r1.x
-mov.f32f32 r3.x, r2.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r1.x, c1.z, r3.w, r4.z
-mul.f r1.y, r0.y, c5.z
-mul.f r2.x, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.w, r1.y
-mov.f32f32 r5.z, r2.x
-mov.f32f32 r5.y, r0.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r7.x, r0.w
-mad.f32 r0.y, c2.z, r2.z, r1.x
-mad.f32 r0.z, c1.y, r3.w, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r0.z, c2.y, r2.z, r0.z
-mad.f32 r0.w, c1.x, r3.w, r5.x
-mad.f32 r1.x, c6.x, r2.z, c6.y
-mov.f32f32 r6.w, r0.y
-mad.f32 r0.y, c3.y, r0.x, r0.z
-mad.f32 r0.z, c2.x, r2.z, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r1.x, c6.x, r2.y, c6.y
-mov.f32f32 r6.z, r0.y
-mad.f32 r0.x, c3.x, r0.x, r0.z
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r0.y, r1.x
-nop
-mov.f32f32 r6.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r0.z, (0.000000)
-mov.f32f32 r4.z, r0.x
-nop
-mov.f32f32 r4.y, r0.y
-mov.f32f32 r6.x, r0.z
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
-; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 8 full
-; pos: r6.y
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm
index 60492a1..2d7ae09 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-70.asm
@@ -4,710 +4,481 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-(sy)(ss)bary.f r0.x, 7, r1.x
-bary.f r0.y, 8, r1.x
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x43160000, 0x3bdb8bac
+@const(c15.x) 0x41000000, 0x3f600000, 0x3e000000, 0x3d4ccccd
+@const(c16.x) 0x40000000, 0xbf800000, 0xbb449ba6, 0xbf000000
+@const(c17.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x3fb8aa65
+@const(c18.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)bary.f r0.x, 20, r1.x
+bary.f r0.y, 7, r1.x
+bary.f r1.z, 8, r1.x
add.f r0.w, r0.w, c14.y
-bary.f r1.z, 9, r1.x
-mov.f32f32 r0.x, r0.x
-bary.f r1.w, 20, r1.x
-add.f r2.x, r0.y, c16.w
-add.f r2.y, r1.z, c16.w
-bary.f r2.z, 18, r1.x
-mul.f r2.w, r1.w, r0.x
-bary.f r3.x, 15, r1.x
-floor.f r3.y, r2.x
+bary.f r1.w, 9, r1.x
+mul.f r2.x, r0.x, r0.y
+bary.f r2.y, 21, r1.x
+bary.f r2.z, 15, r1.x
+add.f r2.w, r1.z, c16.w
+add.f r3.y, r1.w, c16.w
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.z, r2.y
-mov.f32f32 r3.x, r3.x
-bary.f r3.w, 21, r1.x
-add.f r2.x, r2.x, (neg)r3.y
+mad.f32 r2.x, r2.y, r2.z, r2.x
+bary.f r3.z, 22, r1.x
+bary.f r3.w, 3, r1.x
+floor.f r4.x, r2.w
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.y, (neg)r3.z
-mad.f32 r2.y, r3.w, r3.x, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-absneg.f r2.w, (neg)c11.x
-mov.f32f32 r2.y, r2.y
-bary.f r3.y, 3, r1.x
-mul.f r3.z, c14.x, r2.x
-mul.f r2.w, r2.w, c11.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.y, r3.y
-bary.f r4.x, 22, r1.x
-mov.f32f32 r3.z, r3.z
-mul.f r2.w, r2.w, r0.z
-mul.f r4.y, c14.x, r0.w
-mad.f32 r2.y, r4.x, r3.y, r2.y
-add.f r0.y, r0.y, (neg)r3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r2.y, r2.y
-bary.f r4.y, 12, r1.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r2.w, r0.z
-add.f r1.z, r1.z, (neg)r3.z
-mul.f r2.w, r1.w, r4.y
-bary.f r3.z, 13, r1.x
-add.f r4.z, c17.y, r0.y
+(ss)absneg.f r0.w, (neg)c11.x
+mad.f32 r2.x, r3.z, r3.w, r2.x
+add.f r2.w, r2.w, (neg)r4.x
+floor.f r4.x, r3.y
+mul.f r0.w, r0.w, c11.x
+mov.f32f32 r4.y, r2.x
+bary.f r4.z, 12, r1.x
+mov.f32f32 r4.w, r2.w
+mul.f r0.w, r0.w, r0.z
mov.f32f32 r0.z, r0.z
-add.f r0.y, c17.x, r0.y
-mad.f32 r2.w, r3.w, r3.z, r2.w
-mov.f32f32 r4.z, r4.z
+mul.f r5.x, r0.x, r4.z
+bary.f r5.y, 13, r1.x
+mul.f r5.z, c14.x, r4.w
+mul.f r0.z, r0.w, r0.z
+add.f r0.w, r3.y, (neg)r4.x
+mad.f32 r3.y, r2.y, r5.y, r5.x
+bary.f r4.x, 14, r1.x
+add.f r1.z, r1.z, (neg)r5.z
mul.f r0.z, r0.z, c17.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-bary.f r4.w, 14, r1.x
-mul.f r4.z, r4.z, c5.z
-mov.f32f32 r0.z, r0.z
-mul.f r0.y, r0.y, c5.z
-mad.f32 r2.w, r4.x, r4.w, r2.w
-mov.f32f32 r5.x, r4.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.y, r0.y
-mul.f r5.z, r2.w, r2.w
-mov.f32f32 r5.w, r5.x
-mad.f32 r5.x, r2.y, r2.y, r5.z
-mov.f32f32 r1.z, r1.z
+mov.f32f32 r5.x, r0.w
+mad.f32 r3.y, r3.z, r4.x, r3.y
+mov.f32f32 r5.z, r1.z
+add.f r1.z, c17.x, r1.z
+mul.f r5.w, c14.x, r5.x
+mul.f r6.x, r3.y, r3.y
+add.f r5.z, c17.y, r5.z
+mad.f32 r2.x, r2.x, r4.y, r6.x
+bary.f r6.x, 4, r1.x
exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.x, r5.x
-bary.f r5.z, 4, r1.x
-add.f r6.x, c17.y, r1.z
-add.f r6.y, c19.y, (neg)r0.z
-mov.f32f32 r0.y, r0.y
-mul.f r6.z, r1.w, r5.z
-bary.f r6.w, 5, r1.x
-mov.f32f32 r6.x, r6.x
+(ss)mov.f32f32 r6.y, r0.z
+mul.f r6.z, r5.z, c5.z
+add.f r1.w, r1.w, (neg)r5.w
+mul.f r5.z, r0.x, r6.x
+bary.f r5.w, 5, r1.x
+mov.f32f32 r7.y, r6.z
+add.f r6.y, c19.y, (neg)r6.y
+mov.f32f32 r6.w, r1.w
+mad.f32 r5.z, r2.y, r5.w, r5.z
+bary.f r8.x, 6, r1.x
mul.f r6.y, r6.y, c11.y
-mul.f r0.z, r0.z, c17.x
-mad.f32 r6.z, r3.w, r6.w, r6.z
-mul.f r7.x, r6.x, c5.w
-mov.f32f32 r7.y, r0.y
-add.f r0.y, c17.x, r1.z
-mov.f32f32 r1.z, r6.z
-bary.f r6.z, 6, r1.x
-mov.f32f32 r6.x, r7.x
+(ss)mul.f r0.z, r0.z, c17.x
+add.f r6.w, c17.y, r6.w
+mad.f32 r5.z, r3.z, r8.x, r5.z
+mul.f r8.y, r1.z, c5.z
+add.f r1.z, c17.x, r1.w
add.f r0.z, r0.z, r6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, r4.x, r6.z, r1.z
-mov.f32f32 r6.x, r6.x
-bary.f r6.y, 10, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.z, r1.z
-mul.f r0.y, r0.y, c5.w
-add.f r8.x, r6.y, c16.z
-mov.f32f32 r8.y, r5.y
-mad.f32 r5.x, r1.z, r1.z, r5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.y, r8.x
-mov.f32f32 r7.z, r0.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r9.x, r4.z
-mov.f32f32 r6.y, r5.y
-rsq r4.z, r5.x
-(ss)mov.f32f32 r4.z, r4.z
+mov.f32f32 r1.w, r5.z
+mul.f r9.y, r6.w, c5.w
+mov.f32f32 r9.x, r8.y
+mul.f r8.z, r1.z, c5.w
+mad.f32 r1.z, r5.z, r1.w, r2.x
+mov.f32f32 r7.z, r9.y
max.f r0.z, r0.z, c14.y
-(ss)mov.f32f32 r5.x, r7.z
-mov.f32f32 r8.z, r7.x
-mul.f r1.z, r1.z, r4.z
+bary.f r2.x, 10, r1.x
+mov.f32f32 r6.w, r8.z
+add.f r4.w, c16.x, (neg)r4.w
+mul.f r0.x, r0.x, r0.x
+rsq r1.z, r1.z
+(ss)mov.f32f32 r5.z, r1.z
+add.f r8.w, r2.x, c16.z
min.f r0.z, r0.z, c17.x
-sam.s (f32)(x)r5.y, r5.w, s#4, t#4
-(sy)mov.f32f32 r5.y, r5.y
-mov.f32f32 r7.z, r5.x
-mov.f32f32 r1.z, r1.z
-add.f r5.x, c19.y, (neg)r0.z
-(ss)add.f r5.w, c19.y, (neg)r0.z
-add.f r6.x, c19.y, (neg)r0.z
-mul.f r1.z, r1.z, c15.x
-mov.f32f32 r5.y, r5.y
-add.f r6.y, c16.x, (neg)r2.x
-mul.f r5.x, r5.x, c10.z
-mov.f32f32 r1.z, r1.z
-mul.f r5.w, r5.w, c10.y
-mul.f r6.x, r6.x, c10.x
-mov.f32f32 r6.y, r6.y
-add.f r7.x, c16.x, (neg)r0.w
-mov.f32f32 r7.w, r8.x
-mul.f r2.y, r2.y, r4.z
-rcp r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mul.f r2.w, r2.w, r4.z
-mov.f32f32 r4.z, r7.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r2.w, r2.w
-mul.f r7.x, r6.y, r4.z
-mov.f32f32 r2.y, r2.y
-mul.f r1.w, r1.w, r1.w
-absneg.f r2.w, (neg)r2.w
-mad.f32 r1.w, r3.w, r3.w, r1.w
-mul.f r3.w, r7.x, r5.y
-mov.f32f32 r5.y, r8.x
-sam.s (f32)(x)r7.x, r7.y, s#4, t#4
-(sy)mov.f32f32 r7.x, r7.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r4.x, r4.x, r1.w
-mov.f32f32 r8.w, r5.y
-mov.f32f32 r4.x, r2.z
-add.f r2.x, r2.x, c17.x
+(ss)mul.f r1.z, r3.y, r1.z
+mul.f r1.w, r1.w, r5.z
+mov.f32f32 r7.w, r8.w
+add.f r2.x, c19.y, (neg)r0.z
+add.f r3.y, c19.y, (neg)r0.z
+mul.f r1.w, r1.w, c15.x
+add.f r6.y, c19.y, (neg)r0.z
+mov.f32f32 r9.z, r8.w
+mov.f32f32 r7.x, r8.w
+nop
+sam.s (f32)(x)r9.w, r7.y, s#4, t#4
+(ss)mul.f r7.y, r2.x, c10.z
+mov.f32f32 r2.x, r4.w
+rcp r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+mul.f r4.y, r4.y, r5.z
+mad.f32 r0.x, r2.y, r2.y, r0.x
+add.f r2.y, c16.x, (neg)r5.x
+mad.f32 r0.x, r3.z, r3.z, r0.x
+mul.f r3.y, r3.y, c10.y
+mul.f r3.z, r6.y, c10.x
+mov.f32f32 r5.x, r2.y
+absneg.f r1.z, (neg)r1.z
+sam.s (f32)(x)r10.x, r9.x, s#4, t#4
+sam.s (f32)(x)r6.y, r6.z, s#4, t#4
+sam.s (f32)(x)r8.y, r8.y, s#4, t#4
+add.f r2.w, r2.w, c17.x
add.f r0.w, r0.w, c17.x
-bary.f r5.y, 16, r1.x
-(ss)mov.f32f32 r7.y, r4.x
-sqrt r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-sam.s (f32)(x)r4.x, r8.y, s#4, t#4
-(sy)mov.f32f32 r4.x, r4.x
-mul.f r4.z, r2.x, r4.z
-mov.f32f32 r9.y, r0.y
-add.f r0.y, c14.z, (neg)r1.w
-mov.f32f32 r1.w, r8.x
-bary.f r7.w, 19, r1.x
-mul.f r6.y, r6.y, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r3.w, r4.z, r4.x, r3.w
-mov.f32f32 r9.z, r1.w
-mov.f32f32 r1.w, r7.w
-mul.f r0.y, c12.z, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r5.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r0.y, r0.y
-sam.s (f32)(x)r1.w, r9.x, s#4, t#4
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r8.x, r4.x
-bary.f r4.x, 17, r1.x
-mul.f r0.y, r0.y, c14.w
-mad.f32 r1.w, r6.y, r1.w, r3.w
-(ss)nop
-sam (f32)(w)r8.y, r7.y, s#2, t#2
-(sy)cmps.f.lt r3.w, r9.x, c15.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r2.x, r0.w
-cov.u32f32 r2.x, r3.w
-mul.f r2.w, r2.w, r0.y
-mul.f r0.y, r2.y, r0.y
-mad.f32 r0.w, r0.w, r7.x, r1.w
-cmps.f.ne r1.w, r2.x, c14.y
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, c15.z
-mul.f r2.x, r2.x, r1.z
-mul.f r0.y, r0.y, r1.z
+sqrt r0.x, r0.x
+(ss)add.f r0.x, c14.z, (neg)r0.x
+mul.f r2.x, r2.x, r5.x
+mul.f r2.y, r2.w, r2.y
+mul.f r4.w, r4.w, r0.w
+mul.f r0.x, c12.z, r0.x
+(sy)mul.f r2.x, r2.x, r9.w
+bary.f r6.z, 18, r1.x
+mul.f r0.w, r2.w, r0.w
+mul.f r0.x, r0.x, c14.w
+mad.f32 r2.x, r2.y, r10.x, r2.x
+bary.f r6.w, 19, r1.x
+bary.f r8.z, 16, r1.x
+mov.f32f32 r2.y, r0.x
+mad.f32 r2.x, r4.w, r6.y, r2.x
+mul.f r0.x, r1.z, r0.x
+mad.f32 r0.w, r0.w, r8.y, r2.x
+mul.f r1.z, r4.y, r2.y
+sam (f32)(w)r9.x, r6.z, s#2, t#2
+bary.f r8.w, 17, r1.x
+mul.f r0.x, r0.x, r1.w
+(sy)cmps.f.lt r1.w, r9.w, c15.y
+mul.f r1.z, r1.z, r7.z
mul.f r0.w, c17.z, r0.w
-mov.f32f32 r1.z, c14.y
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r2.x, r0.x
+cov.u32f32 r1.w, r1.w
+mov.f32f32 r2.y, r1.z
+cmps.f.lt r2.w, r9.w, c15.y
+mov.f32f32 r4.y, r0.w
+cmps.f.ne r1.w, r1.w, c14.y
+mov.f32f32 r4.w, c14.y
+mov.f32f32 r5.x, c14.y
+cov.u32f32 r2.w, r2.w
+sam (f32)(w)r8.y, r8.z, s#1, t#1
+(sy)cmps.f.lt r5.z, r9.x, c18.x
+sel.b32 r1.z, r1.z, r1.w, r4.w
+sel.b32 r0.x, r0.x, r1.w, r5.x
+(rpt1)nop
+add.f r1.w, r6.w, r1.z
+add.f r1.z, r6.z, r0.x
+cmps.f.ne r0.x, r2.w, c14.y
+cov.u32f32 r2.w, r5.z
+mov.f32f32 r4.w, c15.z
+mov.f32f32 r5.x, c14.y
mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-sel.b32 r1.z, r2.y, r1.w, r1.z
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r1.w, r1.w
-cmps.f.lt r4.z, r9.x, c15.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
+mov.f32f32 r5.z, r2.z
+sam (f32)(w)r6.y, r1.z, s#2, t#2
+cmps.f.ne r2.z, r2.w, c14.y
+sel.b32 r0.x, r4.w, r0.x, r5.x
+mov.f32f32 r2.w, c14.y
mov.f32f32 r3.w, r3.w
-cov.u32f32 r4.z, r4.z
-mov.f32f32 r5.y, r2.x
-mov.f32f32 r6.y, r0.y
-mov.f32f32 r7.x, r2.x
-cmps.f.ne r4.z, r4.z, c14.y
-(ss)mov.f32f32 r7.y, c14.y
-mov.f32f32 r7.z, c14.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.y, r6.y
-sel.b32 r1.w, r1.w, r4.z, r7.y
-sel.b32 r2.y, r2.y, r4.z, r7.z
-mov.f32f32 r4.z, r7.x
-mov.f32f32 r7.x, r0.y
-add.f r1.w, r2.z, r1.w
-add.f r2.y, r7.w, r2.y
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r7.y, r0.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r7.w, r2.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r8.y, r7.z
-mov.f32f32 r8.z, r7.w
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r7.z, r2.x
-mov.f32f32 r7.w, r0.y
-mov.f32f32 r8.w, r2.x
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r9.y, r2.x
-sam (f32)(w)r9.z, r8.y, s#2, t#2
-add.f r1.z, c15.y, (neg)r1.z
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r7.w, r7.w
-(ss)mov.f32f32 r8.z, r8.w
-(sy)cmps.f.lt r8.y, r10.y, r1.z
-cmps.f.lt r8.w, r10.y, r1.z
-mov.f32f32 r9.x, r9.x
-mov.f32f32 r9.y, r9.y
+mov.f32f32 r4.w, (0.000000)
+add.f r0.x, c15.y, (neg)r0.x
+sel.b32 r5.x, r2.w, r2.z, r9.x
+bary.f r6.y, 2, r1.x
+(ss)bary.f r6.z, 1, r1.x
+(sy)cmps.f.lt r2.z, r7.x, r0.x
+cmps.f.lt r2.w, r7.x, r0.x
+bary.f r6.w, 23, r1.x
+bary.f r7.x, 24, r1.x
+cov.u32f32 r2.z, r2.z
+cov.u32f32 r2.w, r2.w
+bary.f r7.z, 25, r1.x
+bary.f (ei)r1.x, 0, r1.x
+cmps.f.ne r1.y, r2.z, c14.y
+mov.f32f32 r2.z, c14.y
+cmps.f.ne r2.w, r2.w, c14.y
+mov.f32f32 r7.w, c14.y
+mov.f32f32 r8.y, c15.z
+sel.b32 r2.z, r2.x, r1.y, r2.z
+mov.f32f32 r8.z, c14.y
+sel.b32 r1.y, r2.y, r1.y, r7.w
+mov.f32f32 r7.w, c14.y
+add.f r1.z, r1.z, r2.z
+sel.b32 r2.z, r8.y, r2.w, r8.z
+add.f r8.z, r1.w, r1.y
+mov.f32f32 r1.y, c14.y
+mov.f32f32 r8.y, r1.z
+add.f r0.x, r0.x, (neg)r2.z
+mov.f32f32 r1.w, r8.z
+mov.f32f32 r2.z, c14.y
+mov.f32f32 r2.w, c15.z
+mov.f32f32 r8.w, c14.y
+mov.f32f32 r9.x, c14.y
+sam (f32)(w)r9.y, r8.y, s#2, t#2
+(sy)(ss)cmps.f.lt r8.y, r10.x, r0.x
+mov.f32f32 r0.x, r0.x
+mov.f32f32 r8.z, c14.y
+mov.f32f32 r9.y, c15.z
cov.u32f32 r8.y, r8.y
-cov.u32f32 r8.w, r8.w
-mov.f32f32 r9.z, r2.x
-nop
-mov.f32f32 r8.y, r8.y
-cmps.f.ne r8.w, r8.w, c14.y
-mov.f32f32 r9.w, c15.z
+cmps.f.lt r9.z, r10.x, r0.x
+mov.f32f32 r9.w, c14.y
mov.f32f32 r10.x, c14.y
-cmps.f.ne r10.y, r8.y, c14.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r10.z, c14.y
-mov.f32f32 r10.w, c14.y
-sel.b32 r8.w, r9.w, r8.w, r10.x
-mov.f32f32 r8.y, r4.x
-sel.b32 r4.x, r9.z, r10.y, r10.z
-sel.b32 r9.x, r9.x, r10.y, r10.w
-add.f r1.z, r1.z, (neg)r8.w
-bary.f r8.w, 23, r1.x
-add.f r1.w, r1.w, r4.x
-add.f r2.y, r2.y, r9.x
-mov.f32f32 r1.z, r1.z
-sam (f32)(w)r9.z, r8.x, s#1, t#1
-(sy)cmps.f.lt r4.x, r10.y, c18.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(ss)mov.f32f32 r8.x, r10.y
-cov.u32f32 r4.x, r4.x
-mov.f32f32 r8.y, r1.w
-mov.f32f32 r9.x, r2.y
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r9.z, r8.y
-mov.f32f32 r9.w, r9.x
-cmps.f.ne r4.x, r4.x, c14.y
-(rpt1)nop
-mov.f32f32 r8.y, c14.y
-bary.f r9.x, 24, r1.x
-bary.f r10.x, 25, r1.x
-sam (f32)(w)r10.y, r9.z, s#2, t#2
-(sy)(ss)cmps.f.lt r9.z, r11.x, r1.z
-cmps.f.lt r9.w, r11.x, r1.z
-sel.b32 r4.x, r8.y, r4.x, r8.x
-mov.f32f32 r8.x, r9.x
-cov.u32f32 r8.y, r9.z
-cov.u32f32 r9.x, r9.w
-mov.f32f32 r9.z, r10.x
-mov.f32f32 r9.w, (0.000000)
-mov.f32f32 r8.y, r8.y
-cmps.f.ne r9.x, r9.x, c14.y
-mov.f32f32 r10.x, c15.z
-mov.f32f32 r10.y, c14.y
cmps.f.ne r8.y, r8.y, c14.y
+mov.f32f32 r10.y, c14.y
+cov.u32f32 r9.z, r9.z
mov.f32f32 r10.z, c14.y
mov.f32f32 r10.w, c14.y
-sel.b32 r9.x, r10.x, r9.x, r10.y
-bary.f r10.x, 2, r1.x
-sel.b32 r9.y, r9.y, r8.y, r10.z
-sel.b32 r7.w, r7.w, r8.y, r10.w
-add.f r1.z, r1.z, (neg)r9.x
-bary.f r8.y, 1, r1.x
-add.f r1.w, r1.w, r9.y
-add.f r2.y, r2.y, r7.w
-mov.f32f32 r1.z, r1.z
-bary.f (ei)r1.x, 0, r1.x
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r1.w, r2.y
-mov.f32f32 r2.y, c14.y
-mov.f32f32 r7.w, c14.y
-mov.f32f32 r9.x, r1.y
-mov.f32f32 r9.y, r1.w
-mov.f32f32 r10.y, c14.y
+sel.b32 r10.y, r2.x, r8.y, r10.y
+cmps.f.ne r9.z, r9.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r10.z
mov.f32f32 r10.z, c15.z
-mov.f32f32 r10.w, r9.x
-mov.f32f32 r11.x, r9.y
-mov.f32f32 r9.x, c14.y
-mov.f32f32 r9.y, c14.y
-mov.f32f32 r11.y, c14.y
-mov.f32f32 r11.z, c15.z
-mov.f32f32 r11.w, c14.y
-mov.f32f32 r12.x, c14.y
-sam (f32)(w)r12.y, r10.w, s#2, t#2
-(sy)(ss)cmps.f.lt r10.w, r13.x, r1.z
-cmps.f.lt r11.x, r13.x, r1.z
-mov.f32f32 r12.y, c14.y
-mov.f32f32 r12.z, c15.z
-cov.u32f32 r10.w, r10.w
-cov.u32f32 r11.x, r11.x
-mov.f32f32 r12.w, c14.y
-mov.f32f32 r13.x, c14.y
-mov.f32f32 r10.w, r10.w
-cmps.f.ne r11.x, r11.x, c14.y
-mov.f32f32 r13.y, c15.z
-mov.f32f32 r13.z, c14.y
-cmps.f.ne r10.w, r10.w, c14.y
-mov.f32f32 r13.w, c14.y
-mov.f32f32 r14.x, c14.y
-sel.b32 r11.x, r13.y, r11.x, r13.z
-nop
-sel.b32 r8.z, r8.z, r10.w, r13.w
-sel.b32 r7.y, r7.y, r10.w, r14.x
-add.f r1.z, r1.z, (neg)r11.x
-nop
-add.f r1.y, r1.y, r8.z
-add.f r1.w, r1.w, r7.y
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r10.y
+mov.f32f32 r10.y, c14.y
+add.f r11.y, r1.w, r8.y
+mov.f32f32 r1.w, c15.z
+mov.f32f32 r11.x, r1.z
+sel.b32 r8.y, r10.z, r9.z, r10.y
+mov.f32f32 r9.z, r11.y
+mov.f32f32 r10.y, c14.y
+mov.f32f32 r10.z, c14.y
+mov.f32f32 r11.z, c14.y
+mov.f32f32 r11.w, c15.z
+sam (f32)(w)r12.x, r11.x, s#2, t#2
+add.f r0.x, r0.x, (neg)r8.y
+mov.f32f32 r8.y, c14.y
+(ss)mov.f32f32 r11.x, c14.y
nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-(rpt1)nop
-mov.f32f32 r7.y, r1.y
-mov.f32f32 r8.z, r1.w
-(rpt1)nop
-mov.f32f32 r10.w, r7.y
-mov.f32f32 r11.x, r8.z
-(rpt5)nop
-sam (f32)(w)r13.y, r10.w, s#2, t#2
-(sy)cmps.f.lt r7.y, r14.x, r1.z
-cmps.f.lt r8.z, r14.x, r1.z
+(sy)cmps.f.lt r11.y, r12.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-cov.u32f32 r7.y, r7.y
-cov.u32f32 r8.z, r8.z
+cov.u32f32 r11.y, r11.y
+cmps.f.lt r12.x, r12.w, r0.x
(rpt1)nop
-mov.f32f32 r7.y, r7.y
-cmps.f.ne r8.z, r8.z, c14.y
+cmps.f.ne r11.y, r11.y, c14.y
+cov.u32f32 r12.x, r12.x
(rpt1)nop
-cmps.f.ne r7.y, r7.y, c14.y
-sel.b32 r8.z, r12.z, r8.z, r12.y
-(rpt1)nop
-sel.b32 r7.z, r7.z, r7.y, r13.x
-sel.b32 r7.x, r7.x, r7.y, r12.w
-add.f r1.z, r1.z, (neg)r8.z
+sel.b32 r11.x, r2.x, r11.y, r11.x
+cmps.f.ne r12.x, r12.x, c14.y
+sel.b32 r8.y, r2.y, r11.y, r8.y
nop
-add.f r1.y, r1.y, r7.z
-add.f r1.w, r1.w, r7.x
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r11.x
+sel.b32 r11.x, r11.w, r12.x, r11.z
+add.f r11.z, r9.z, r8.y
nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-(rpt1)nop
-mov.f32f32 r7.x, r1.y
-mov.f32f32 r7.y, r1.w
-(rpt1)nop
-(ss)mov.f32f32 r10.w, r7.x
-mov.f32f32 r11.x, r7.y
-(rpt5)nop
-sam (f32)(w)r12.y, r10.w, s#2, t#2
-(sy)cmps.f.lt r7.x, r13.x, r1.z
-cmps.f.lt r7.y, r13.x, r1.z
-(rpt1)nop
-cov.u32f32 r7.x, r7.x
-cov.u32f32 r7.y, r7.y
+mov.f32f32 r11.y, r1.z
+add.f r0.x, r0.x, (neg)r11.x
+mov.f32f32 r8.y, r11.z
+(rpt3)nop
+sam (f32)(w)r11.x, r11.y, s#2, t#2
+(sy)cmps.f.lt r9.z, r11.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r7.x, r7.x
-cmps.f.ne r7.y, r7.y, c14.y
+cov.u32f32 r9.z, r9.z
+cmps.f.lt r11.x, r11.w, r0.x
(rpt1)nop
-cmps.f.ne r7.x, r7.x, c14.y
-sel.b32 r7.y, r11.z, r7.y, r11.y
+cmps.f.ne r9.z, r9.z, c14.y
+cov.u32f32 r11.x, r11.x
(rpt1)nop
-sel.b32 r2.z, r2.z, r7.x, r12.x
-sel.b32 r6.y, r6.y, r7.x, r11.w
-add.f r1.z, r1.z, (neg)r7.y
+sel.b32 r10.z, r2.x, r9.z, r10.z
+cmps.f.ne r11.x, r11.x, c14.y
+sel.b32 r9.z, r2.y, r9.z, r10.y
nop
-add.f r1.y, r1.y, r2.z
-add.f r1.w, r1.w, r6.y
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r10.z
+sel.b32 r1.w, r1.w, r11.x, r10.w
+add.f r10.z, r8.y, r9.z
nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
+mov.f32f32 r10.y, r1.z
+add.f r0.x, r0.x, (neg)r1.w
+mov.f32f32 r1.w, r10.z
+(rpt3)nop
+sam (f32)(w)r10.y, r10.y, s#2, t#2
+(sy)cmps.f.lt r8.y, r11.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r2.z, r1.y
-mov.f32f32 r6.y, r1.w
+cov.u32f32 r8.y, r8.y
+cmps.f.lt r9.z, r11.x, r0.x
(rpt1)nop
-mov.f32f32 r7.x, r2.z
-mov.f32f32 r7.y, r6.y
-(rpt5)nop
-sam (f32)(w)r10.w, r7.x, s#2, t#2
-(sy)cmps.f.lt r2.z, r11.z, r1.z
-cmps.f.lt r6.y, r11.z, r1.z
+cmps.f.ne r8.y, r8.y, c14.y
+cov.u32f32 r9.z, r9.z
(rpt1)nop
-cov.u32f32 r2.z, r2.z
-cov.u32f32 r6.y, r6.y
+sel.b32 r10.x, r2.x, r8.y, r10.x
+cmps.f.ne r9.z, r9.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r9.w
+nop
+add.f r1.z, r1.z, r10.x
+sel.b32 r8.z, r9.y, r9.z, r8.z
+add.f r9.z, r1.w, r8.y
+nop
+mov.f32f32 r9.y, r1.z
+add.f r0.x, r0.x, (neg)r8.z
+mov.f32f32 r1.w, r9.z
+(rpt3)nop
+sam (f32)(w)r9.y, r9.y, s#2, t#2
+(sy)cmps.f.lt r8.y, r10.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r2.z, r2.z
-cmps.f.ne r6.y, r6.y, c14.y
+cov.u32f32 r8.y, r8.y
+cmps.f.lt r8.z, r10.x, r0.x
(rpt1)nop
-cmps.f.ne r2.z, r2.z, c14.y
-sel.b32 r6.y, r10.z, r6.y, r10.y
+cmps.f.ne r8.y, r8.y, c14.y
+cov.u32f32 r8.z, r8.z
(rpt1)nop
-sel.b32 r4.z, r4.z, r2.z, r9.y
-sel.b32 r2.z, r3.w, r2.z, r9.x
-add.f r1.z, r1.z, (neg)r6.y
+sel.b32 r9.x, r2.x, r8.y, r9.x
+cmps.f.ne r8.z, r8.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r8.w
nop
-add.f r1.y, r1.y, r4.z
-add.f r1.w, r1.w, r2.z
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r9.x
+sel.b32 r2.z, r2.w, r8.z, r2.z
+add.f r8.z, r1.w, r8.y
nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-(rpt1)nop
-mov.f32f32 r2.z, r1.y
-mov.f32f32 r3.w, r1.w
+mov.f32f32 r8.y, r1.z
+add.f r0.x, r0.x, (neg)r2.z
+mov.f32f32 r1.w, r8.z
+(rpt3)nop
+sam (f32)(w)r8.y, r8.y, s#2, t#2
+(sy)cmps.f.lt r2.z, r9.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-(ss)mov.f32f32 r7.x, r2.z
-mov.f32f32 r7.y, r3.w
-(rpt5)nop
-sam (f32)(w)r10.y, r7.x, s#2, t#2
-(sy)cmps.f.lt r2.z, r11.x, r1.z
-(rpt2)nop
cov.u32f32 r2.z, r2.z
(rpt2)nop
cmps.f.ne r2.z, r2.z, c14.y
(rpt2)nop
-sel.b32 r3.w, r5.y, r2.z, r7.w
-sel.b32 r2.y, r2.w, r2.z, r2.y
+sel.b32 r1.y, r2.x, r2.z, r1.y
+sel.b32 r2.z, r2.y, r2.z, r7.w
(rpt1)nop
-add.f r1.y, r1.y, r3.w
-add.f r1.w, r1.w, r2.y
+add.f r1.y, r1.z, r1.y
+add.f r1.w, r1.w, r2.z
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
+mov.f32f32 r1.z, r1.y
+mov.f32f32 r2.z, r1.w
(rpt1)nop
-add.f r2.y, r1.y, (neg)r2.x
-mov.f32f32 r2.z, r1.y
-add.f r2.w, r1.w, (neg)r0.y
-mov.f32f32 r3.w, r1.w
-mov.f32f32 r2.y, r2.y
-(ss)mov.f32f32 r7.x, r2.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r7.y, r3.w
-mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mov.f32f32 r7.z, r2.y
-mov.f32f32 r2.y, r2.z
-sam (f32)(w)r10.y, r7.x, s#2, t#2
-(sy)add.f r2.z, c15.z, r11.x
-add.f r1.z, r11.x, (neg)r1.z
-nop
-mov.f32f32 r7.w, r2.y
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r1.z, r1.z
-(rpt3)nop
-sam (f32)(w)r7.x, r7.z, s#2, t#2
-(sy)add.f r2.y, r2.y, (neg)r7.w
-(rpt2)nop
-mov.f32f32 r2.y, r2.y
+(ss)add.f r8.y, r1.z, (neg)r2.x
+add.f r8.z, r2.z, (neg)r2.y
+(rpt1)nop
+sam (f32)(w)r8.w, r1.z, s#2, t#2
+(sy)(ss)add.f r1.z, c15.z, r9.z
+add.f r0.x, r9.z, (neg)r0.x
+(rpt1)nop
+sam (f32)(w)r8.y, r8.y, s#2, t#2
+(sy)add.f r1.z, r1.z, (neg)r9.x
(rpt5)nop
-rcp r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mul.f r1.z, r1.z, r2.y
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
+rcp r1.z, r1.z
+(ss)mul.f r0.x, r0.x, r1.z
(rpt2)nop
-mul.f r2.x, r2.x, r1.z
-mul.f r0.y, r0.y, r1.z
-(rpt1)nop
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r0.y, r0.y
+(ss)mov.f32f32 r1.z, r0.x
+mul.f r0.x, r2.x, r0.x
(rpt1)nop
-add.f r1.y, r1.y, (neg)r1.z
-add.f r0.y, r1.w, (neg)r0.y
+mul.f r1.z, r2.y, r1.z
+add.f r1.w, r1.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
+add.f r2.x, r2.z, (neg)r1.z
+mov.f32f32 r1.y, r1.w
(rpt1)nop
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r0.y
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.z, r1.z
mov.f32f32 r1.z, r2.x
-mov.f32f32 r7.y, r0.y
-nop
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r2.x, r1.z
+mov.f32f32 r8.y, r1.y
nop
-sam (f32)(xyz)r10.y, r2.y, s#2, t#2
-(sy)mad.f32 r0.y, c16.x, r10.z, c16.y
-mad.f32 r1.y, c16.x, r10.y, c16.y
-sam (f32)(xyzw)r7.x, r7.x, s#0, t#0
-(sy)cmps.f.lt r1.z, r7.w, c15.w
-(ss)mov.f32f32 r2.y, r7.w
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyz)r11.x, r1.w, s#3, t#3
-(sy)(ss)mul.f r1.w, c8.y, r11.y
-mul.f r2.x, c8.x, r11.x
-cov.u32f32 r1.z, r1.z
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r1.y, r1.y
-mul.f r2.z, c8.z, r11.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mul.f r2.w, r4.y, r1.y
-mul.f r3.z, r3.z, r1.y
-mad.f32 r0.x, r0.x, r0.y, r2.w
-mad.f32 r2.w, r3.x, r0.y, r3.z
-mul.f r1.y, r4.w, r1.y
-cmps.f.ne p0.x, r1.z, r9.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c16.x, r10.w, c16.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.y, r3.y, r0.y, r1.y
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r1.z, r1.z
+sam (f32)(xyzw)r2.x, r1.w, s#0, t#0
+(sy)cmps.f.lt r0.x, r2.w, c15.w
+mov.f32f32 r8.z, r1.z
+(rpt1)nop
+sam (f32)(xyz)r8.w, r1.y, s#2, t#2
+(sy)(ss)mad.f32 r1.y, c16.x, r8.w, c16.y
+cov.u32f32 r0.x, r0.x
+mad.f32 r1.z, c16.x, r9.x, c16.y
+mad.f32 r1.w, c16.x, r9.y, c16.y
+mov.f32f32 r7.w, r1.y
+cmps.f.ne p0.x, r0.x, r4.w
+absneg.f r0.x, (neg)r1.z
+mul.f r1.y, r4.x, r1.y
+mul.f r1.z, r4.z, r7.w
+mul.f r4.x, r5.y, r7.w
+mad.f32 r0.y, r0.y, r0.x, r1.z
+mov.f32f32 r1.z, r1.w
+mad.f32 r4.x, r5.z, r0.x, r4.x
+mad.f32 r0.x, r3.w, r0.x, r1.y
kill p0.x
-mov.f32f32 r3.w, r2.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, r5.z, r1.z, r0.x
-mad.f32 r2.y, r6.w, r1.z, r2.w
-mad.f32 r0.y, r6.z, r1.z, r0.y
+mad.f32 r0.y, r6.x, r1.z, r0.y
+mad.f32 r1.y, r5.w, r1.z, r4.x
+mad.f32 r0.x, r8.x, r1.w, r0.x
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r2.y, r0.x, r0.x
-nop
-mad.f32 r2.y, r1.z, r1.z, r2.y
-(rpt2)nop
-mov.f32f32 r2.y, r2.y
-nop
-mad.f32 r2.y, r0.y, r0.y, r2.y
+mov.f32f32 r1.z, r0.y
+mov.f32f32 r1.w, r1.y
+mov.f32f32 r3.w, r0.x
+sam (f32)(xyz)r5.y, r8.y, s#3, t#3
+(sy)mul.f r4.x, c8.z, r5.w
+mul.f r0.y, r0.y, r1.z
+mul.f r4.z, c8.y, r5.z
+mad.f32 r0.y, r1.y, r1.w, r0.y
+mul.f r1.y, c8.x, r5.y
+mad.f32 r0.y, r3.w, r3.w, r0.y
(rpt5)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mul.f r0.x, r0.x, r2.y
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r2.y, r0.x, r0.x
-mul.f r2.w, (neg)c9.x, r0.x
-mad.f32 r2.y, r1.z, r1.z, r2.y
-mad.f32 r2.w, (neg)c9.y, r1.z, r2.w
-(rpt1)nop
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.y, r0.y, r0.y, r2.y
-mad.f32 r2.w, (neg)c9.z, r0.y, r2.w
-(rpt4)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-max.f r2.w, r2.w, c14.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r3.w, r0.y
+mul.f r0.x, r0.x, r0.y
(rpt1)nop
-mul.f r0.x, r0.x, r2.y
-mov.f32f32 r2.w, r2.w
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.y, c8.z, r2.w, (neg)r10.x
-mad.f32 r4.y, c8.y, r2.w, (neg)r8.y
-mad.f32 r2.w, c8.x, r2.w, (neg)r1.x
-mul.f r0.x, r0.x, r8.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.x, r1.z, r8.x, r0.x
-mad.f32 r1.z, c12.x, r2.y, r10.x
-mad.f32 r2.y, c12.x, r4.y, r8.y
-mad.f32 r1.x, c12.x, r2.w, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.x, r0.y, r9.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-max.f r0.x, c14.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.y, r1.z, r3.w
+mul.f r1.z, r1.w, r3.w
+mov.f32f32 r1.w, r0.x
+nop
+mov.f32f32 r3.w, r0.y
+mul.f r0.y, (neg)c9.x, r0.y
+mov.f32f32 r4.w, r1.z
+nop
+mul.f r5.y, r3.w, r3.w
+mad.f32 r0.y, (neg)c9.y, r1.z, r0.y
+mad.f32 r1.z, r4.w, r4.w, r5.y
+mad.f32 r0.x, (neg)c9.z, r0.x, r0.y
+mad.f32 r0.y, r1.w, r1.w, r1.z
(rpt5)nop
-log2 r0.x, r0.x
-(ss)mul.f r0.x, c12.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+rsq r0.y, r0.y
+(ss)mov.f32f32 r1.z, r0.y
+max.f r0.x, r0.x, c14.y
+(ss)mul.f r0.y, r1.w, r0.y
+nop
+mul.f r1.w, r3.w, r1.z
+mov.f32f32 r3.w, r0.x
+mul.f r1.z, r4.w, r1.z
+mad.f32 r0.x, c8.x, r0.x, (neg)r1.x
+mul.f r1.w, r1.w, r6.w
+mad.f32 r4.w, c8.z, r3.w, (neg)r6.y
+mad.f32 r1.z, r1.z, r7.x, r1.w
+mad.f32 r1.w, c8.y, r3.w, (neg)r6.z
+mad.f32 r0.y, r0.y, r7.z, r1.z
+mad.f32 r1.z, c12.x, r4.w, r6.y
+mad.f32 r0.x, c12.x, r0.x, r1.x
+nop
+max.f r0.y, c14.y, r0.y
+mad.f32 r1.x, c12.x, r1.w, r6.z
+(rpt4)nop
+log2 r0.y, r0.y
+(ss)mul.f r0.y, c12.y, r0.y
(rpt5)nop
-exp2 r0.x, r0.x
-(ss)mul.f r0.y, r1.y, r0.x
-mul.f r1.y, r1.w, r0.x
-mad.f32 r0.y, r7.z, r1.z, r0.y
-mad.f32 r1.y, r7.y, r2.y, r1.y
-(ss)mul.f r0.x, r2.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, r7.x, r1.x, r0.x
-nop
-mul.f r0.y, r0.y, r0.w
-mul.f r1.x, r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.y, c7.z, r7.z, r0.y
-mad.f32 r1.x, c7.y, r7.y, r1.x
+exp2 r0.y, r0.y
+(ss)mul.f r1.w, r4.x, r0.y
+mul.f r3.w, r4.z, r0.y
+mad.f32 r1.z, r2.z, r1.z, r1.w
+mad.f32 r1.x, r2.y, r1.x, r3.w
+(ss)mul.f r0.y, r1.y, r0.y
+nop
+mul.f r1.y, r1.z, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r1.y, c7.z, r2.z, r1.y
+mad.f32 r1.x, c7.y, r2.y, r1.x
+mad.f32 r0.x, r2.x, r0.x, r0.y
+nop
+mul.f r0.y, r0.z, r1.y
+mul.f r1.x, r0.z, r1.x
mul.f r0.x, r0.x, r0.w
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.z, r0.y
-mul.f r0.w, r0.z, r0.w
-mad.f32 r0.x, c7.x, r7.x, r0.x
-nop
-add.f r0.y, r0.y, r5.x
-add.f r0.w, r0.w, r5.w
-mov.f32f32 r0.x, r0.x
+add.f r0.y, r0.y, r7.y
+add.f r0.w, r1.x, r3.y
+mad.f32 r0.x, c7.x, r2.x, r0.x
nop
-mul.f r0.y, r0.y, r4.x
-mul.f r0.w, r0.w, r4.x
+mul.f r0.y, r0.y, r5.x
+mul.f r0.w, r0.w, r5.x
mul.f r0.x, r0.z, r0.x
nop
-mul.f r0.y, r0.y, c6.z
-mul.f r0.z, r0.w, c6.y
-add.f r0.x, r0.x, r6.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r4.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c6.x
-nop
-mov.f32f32 r3.z, r0.y
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r2.z, r0.y, c6.z
+mul.f r2.y, r0.w, c6.y
+add.f r0.x, r0.x, r3.z
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r5.x
(rpt2)nop
-mov.f32f32 r3.x, r0.x
+mul.f r2.x, r0.x, c6.x
end
nop
nop
-nop
-; FRAG: outputs: r3.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.z (5:9,cm=f,il=8,b=1) r63.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r7.x (5:13,cm=f,il=24,b=1) r8.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 802 instructions, 0 half, 65 full
-; pos (bary): r1.x
-; color: r3.x
-; fragcoord: r0.x
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r3.x (5:9,cm=f,il=8,b=1) r63.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.w (5:12,cm=f,il=20,b=1) r6.x (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 532 instructions, 0 half, 13 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm
index ad4df45..b2e35b3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-71.asm
@@ -6,346 +6,250 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in11
-@in(r2.z) in12
-@in(r2.w) in13
-@out(r11.z) out0
-@out(r11.w) out1
-@out(r12.x) out2
-@out(r12.y) out3
-@out(r7.x) out4
-@out(r7.y) out5
-@out(r7.z) out6
-@out(r7.w) out7
-@out(r3.x) out8
-@out(r3.y) out9
-@out(r3.z) out10
-@out(r3.w) out11
-@out(r9.z) out12
-@out(r9.w) out13
-@out(r10.x) out14
-@out(r10.y) out15
-@out(r1.y) out16
-@out(r1.z) out17
-@out(r1.w) out18
-@out(r2.x) out19
-@out(r12.z) out20
-@out(r12.w) out21
-@out(r13.x) out22
-@out(r13.y) out23
-@out(r8.z) out24
-@out(r8.w) out25
-@out(r9.x) out26
-@out(r9.y) out27
-@out(r10.z) out28
-@out(r10.w) out29
-@out(r11.x) out30
-@out(r11.y) out31
-(sy)(ss)floor.f r3.x, c15.z
-absneg.f r3.y, (abs)c18.x
-absneg.f r3.z, (abs)c18.y
-floor.f r3.w, c15.x
-add.f r3.x, c15.z, (neg)r3.x
-mul.f r4.x, c12.x, r1.z
-mul.f r4.y, c12.x, r0.w
-add.f r3.w, c15.x, (neg)r3.w
-mov.f32f32 r3.x, r3.x
-add.f r3.y, r3.y, r3.z
-mad.f32 r3.z, c13.x, r1.w, r4.x
-mad.f32 r4.x, c13.x, r1.x, r4.y
-max.f r3.x, r3.x, c19.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-min.f r3.x, r3.x, c23.y
-mul.f r4.y, c17.x, r3.y
-mul.f r4.z, c12.z, r0.x
-max.f r3.w, r3.w, c19.y
-max.f r3.x, r3.x, c19.x
-mad.f32 r4.z, c13.z, r0.y, r4.z
-mad.f32 r3.z, c14.x, r2.x, r3.z
-mad.f32 r4.x, c14.x, r1.y, r4.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r4.z, c14.z, r0.z, r4.z
-min.f r3.w, r3.w, c23.y
-mov.f32f32 r3.z, r3.z
-mul.f r3.x, c17.x, r3.x
-add.f r4.z, r4.z, c15.z
-max.f r3.w, r3.w, c19.x
-mad.f32 r4.y, c19.w, r4.y, r4.z
-mov.f32f32 r3.x, r3.x
-mul.f r4.w, c12.y, r0.w
-mov.f32f32 r4.x, r4.x
-absneg.f r5.x, (neg)c5.x
-mad.f32 r3.x, c19.z, r3.x, c15.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.w, r3.w
-mul.f r5.y, c12.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r4.y
-mad.f32 r5.y, c13.x, r0.y, r5.y
-mad.f32 r4.w, c13.y, r1.x, r4.w
-mov.f32f32 r3.x, r3.x
-add.f r4.y, r4.y, c20.x
-mad.f32 r5.y, c14.x, r0.z, r5.y
-mad.f32 r4.w, c14.y, r1.y, r4.w
-add.f r3.x, r3.x, c20.x
-floor.f r5.z, r4.y
-add.f r5.y, r5.y, c15.x
-mov.f32f32 r4.w, r4.w
-floor.f r5.w, r3.x
-add.f r4.y, r4.y, (neg)r5.z
-mad.f32 r3.w, c17.x, r3.w, r5.y
-mul.f r5.z, r4.w, r3.z
-add.f r3.x, r3.x, (neg)r5.w
-mad.f32 r4.y, c20.y, r4.y, c20.z
-add.f r3.w, r3.w, c20.x
-mul.f r5.w, c12.y, r1.z
-mad.f32 r3.x, c20.y, r3.x, c20.z
-absneg.f r4.y, (abs)r4.y
-floor.f r6.x, r3.w
-mad.f32 r5.w, c13.y, r1.w, r5.w
-absneg.f r3.x, (abs)r3.x
-mul.f r6.y, c20.y, r4.y
-add.f r3.w, r3.w, (neg)r6.x
-mul.f r4.y, r4.y, r4.y
-mul.f r6.x, c20.y, r3.x
-add.f r6.y, c20.w, (neg)r6.y
-mad.f32 r3.w, c20.y, r3.w, c20.z
-mul.f r3.x, r3.x, r3.x
-add.f r6.x, c20.w, (neg)r6.x
-mul.f r4.y, r4.y, r6.y
-absneg.f r3.w, (abs)r3.w
-mov.f32f32 r5.w, r5.w
-mul.f r3.x, r3.x, r6.x
-mov.f32f32 r4.y, r4.y
-mul.f r6.x, r0.x, r0.z
-mul.f r6.y, r0.y, c21.x
-mov.f32f32 r3.x, r3.x
-mul.f r6.z, r0.y, c22.x
-mul.f r6.w, c20.y, r3.w
-mul.f r6.x, r6.x, r6.y
-mul.f r3.w, r3.w, r3.w
-mov.f32f32 r6.y, r6.z
-add.f r6.z, c20.w, (neg)r6.w
-mov.f32f32 r6.x, r6.x
-mad.f32 r5.w, c14.y, r2.x, r5.w
-mov.f32f32 r6.y, r6.y
-mul.f r3.w, r3.w, r6.z
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.w, r5.w
-max.f r6.y, r6.y, c19.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r6.x, r6.x
-mad.f32 r5.z, r4.x, r5.w, (neg)r5.z
-min.f r6.y, r6.y, c23.y
-mul.f r6.z, r4.x, r5.x
-absneg.f r6.w, (neg)c5.y
-mul.f r1.z, c12.z, r1.z
-min.f r6.y, r6.y, c19.w
-max.f r6.x, r6.x, c19.y
-mov.f32f32 r5.z, r5.z
-mad.f32 r6.z, r4.w, r6.w, r6.z
-mov.f32f32 r6.y, r6.y
-min.f r6.x, r6.x, c23.y
-mul.f r5.z, r5.z, r2.y
-mov.f32f32 r6.z, r6.z
-mul.f r3.x, r3.x, r6.y
-min.f r6.x, r6.x, c21.y
-mov.f32f32 r5.z, r5.z
+@in(r8.x) in8
+@in(r8.y) in9
+@in(r8.z) in10
+@in(r8.w) in11
+@in(r5.z) in12
+@in(r5.w) in13
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@out(r7.x) out28
+@out(r7.y) out29
+@out(r7.z) out30
+@out(r7.w) out31
+@const(c19.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c20.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c21.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c22.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r1.z, c15.z
+floor.f r1.w, c15.x
+absneg.f r2.x, (abs)c18.x
+absneg.f r2.y, (abs)c18.y
+add.f r1.z, c15.z, (neg)r1.z
+add.f r1.w, c15.x, (neg)r1.w
+mul.f r2.z, c12.x, r0.w
+add.f r2.x, r2.x, r2.y
+max.f r1.z, r1.z, c19.y
+max.f r1.w, r1.w, c19.y
+mad.f32 r2.y, c13.x, r1.x, r2.z
+mul.f r2.z, c17.x, r2.x
+min.f r1.z, r1.z, c23.y
+min.f r1.w, r1.w, c23.y
+mul.f r2.w, c12.z, r0.x
+mad.f32 r2.y, c14.x, r1.y, r2.y
+max.f r1.z, r1.z, c19.x
+max.f r1.w, r1.w, c19.x
+mad.f32 r2.w, c13.z, r0.y, r2.w
+mul.f r3.x, c12.x, r0.x
+mul.f r1.z, c17.x, r1.z
+mad.f32 r3.x, c13.x, r0.y, r3.x
+mad.f32 r2.w, c14.z, r0.z, r2.w
+mad.f32 r3.x, c14.x, r0.z, r3.x
+mad.f32 r1.z, c19.z, r1.z, c15.x
+absneg.f r3.y, (neg)c5.x
+add.f r2.w, r2.w, c15.z
+mov.f32f32 r2.x, r2.x
+add.f r1.z, r1.z, c20.x
+add.f r3.x, r3.x, c15.x
+mad.f32 r2.z, c19.w, r2.z, r2.w
+mad.f32 r1.w, c17.x, r1.w, r3.x
+floor.f r3.z, r1.z
+mul.f r3.w, r2.y, r3.y
+mul.f r4.x, c12.y, r0.w
+add.f r1.w, r1.w, c20.x
+add.f r1.z, r1.z, (neg)r3.z
+add.f r2.z, r2.z, c20.x
+mad.f32 r3.z, c13.y, r1.x, r4.x
+max.f r2.x, r2.x, c21.z
+mad.f32 r1.z, c20.y, r1.z, c20.z
+floor.f r4.x, r1.w
+floor.f r4.y, r2.z
+mad.f32 r3.z, c14.y, r1.y, r3.z
+absneg.f r1.z, (abs)r1.z
+add.f r1.w, r1.w, (neg)r4.x
+add.f r2.z, r2.z, (neg)r4.y
+absneg.f r4.x, (neg)c5.y
+mul.f r4.y, c20.y, r1.z
+mad.f32 r1.w, c20.y, r1.w, c20.z
+mad.f32 r2.z, c20.y, r2.z, c20.z
+mul.f r1.z, r1.z, r1.z
+add.f r4.y, c20.w, (neg)r4.y
+absneg.f r1.w, (abs)r1.w
+absneg.f r2.z, (abs)r2.z
+mad.f32 r3.w, r3.z, r4.x, r3.w
+mul.f r1.z, r1.z, r4.y
+mul.f r4.y, r0.y, c22.x
+mul.f r4.z, c20.y, r1.w
+mul.f r4.w, c20.y, r2.z
+mul.f r1.w, r1.w, r1.w
+max.f r4.y, r4.y, c19.y
+add.f r4.z, c20.w, (neg)r4.z
+add.f r4.w, c20.w, (neg)r4.w
+mul.f r2.z, r2.z, r2.z
+min.f r4.y, r4.y, c23.y
+mul.f r1.w, r1.w, r4.z
+mul.f r4.z, r0.x, r0.z
+mul.f r4.w, r2.z, r4.w
+min.f r2.z, r4.y, c19.w
+mul.f r4.y, r0.y, c21.x
mul.f r0.w, c12.z, r0.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.z, r5.z
+min.f r5.x, r2.x, c21.w
+mul.f r1.z, r1.z, r2.z
+mul.f r2.x, r4.z, r4.y
mad.f32 r0.w, c13.z, r1.x, r0.w
-mad.f32 r1.x, c13.z, r1.w, r1.z
-mul.f r1.z, r3.w, r6.x
-mul.f r1.w, r4.y, r6.x
-max.f r3.y, r3.y, c21.z
-mov.f32f32 r3.w, r5.z
-mov.f32f32 r1.z, r1.z
+mov.f32f32 r1.x, r5.x
+mov.f32f32 r4.y, r1.z
+max.f r2.z, r2.x, c19.y
mad.f32 r0.w, c14.z, r1.y, r0.w
-mad.f32 r1.y, c18.x, r3.x, r1.z
-mad.f32 r1.z, c18.y, r3.x, r1.z
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r7.w, r3.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-min.f r3.x, r3.x, c21.w
-mov.f32f32 r0.w, r0.w
-absneg.f r3.y, (neg)c5.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.w, c12.y, r0.x
-mad.f32 r4.y, r0.w, r3.y, r6.z
-mad.f32 r1.x, c14.z, r2.x, r1.x
-mad.f32 r2.x, r1.w, r3.x, r5.y
-mad.f32 r4.z, r1.w, r3.x, r4.z
-mad.f32 r3.w, c13.y, r0.y, r3.w
-mov.f32f32 r4.y, r4.y
-add.f r1.y, r2.x, r1.y
-add.f r1.z, r4.z, r1.z
-mad.f32 r2.x, c14.y, r0.z, r3.w
-max.f r3.w, c19.y, r4.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-add.f r2.x, r2.x, c15.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, r1.y
-mul.f r4.z, c8.w, r1.y
-mul.f r5.y, c8.z, r1.y
-mul.f r5.z, c8.y, r1.y
-add.f r4.y, c4.x, (neg)r4.y
-mad.f32 r1.w, r1.w, r3.x, r2.x
-mul.f r2.x, c8.x, r1.y
-mov.f32f32 r3.x, r1.z
-mul.f r6.x, r4.y, r4.y
-add.f r6.y, c4.y, (neg)r1.w
-mad.f32 r6.z, c9.w, r1.w, r4.z
-mad.f32 r8.x, c9.z, r1.w, r5.y
-mad.f32 r5.z, c9.y, r1.w, r5.z
-mad.f32 r6.x, r6.y, r6.y, r6.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.x, r6.x
-add.f r3.x, c4.z, (neg)r3.x
-mad.f32 r6.z, c10.w, r1.z, r6.z
-mad.f32 r8.x, c10.z, r1.z, r8.x
-mad.f32 r5.z, c10.y, r1.z, r5.z
-mad.f32 r6.x, r3.x, r3.x, r6.x
+absneg.f r1.y, (neg)c5.z
+mov.f32f32 r2.x, r2.y
+min.f r4.z, r2.z, c23.y
+mul.f r5.y, c12.z, r8.x
+mov.f32f32 r2.z, r0.w
+mov.f32f32 r2.y, r3.z
+min.f r3.z, r4.z, c21.y
+mad.f32 r0.w, r0.w, r1.y, r3.w
+mad.f32 r4.z, c13.z, r8.y, r5.y
+mul.f r9.x, c12.y, r8.x
+mov.f32f32 r3.w, r3.z
+mul.f r3.z, r4.w, r3.z
+mul.f r4.w, c12.y, r0.x
+max.f r9.y, c19.y, r0.w
+mul.f r0.w, r1.w, r3.w
+mad.f32 r1.w, c13.y, r0.y, r4.w
+mad.f32 r3.x, r3.z, r1.x, r3.x
+mad.f32 r1.w, c14.y, r0.z, r1.w
+mov.f32f32 r3.w, r0.w
+mad.f32 r0.w, c18.y, r1.z, r0.w
+mad.f32 r1.z, c18.x, r4.y, r3.w
+mad.f32 r2.w, r3.z, r5.x, r2.w
+add.f r1.w, r1.w, c15.y
+mov.f32f32 r4.y, r9.y
+add.f r1.z, r3.x, r1.z
+add.f r2.w, r2.w, r0.w
+mad.f32 r1.x, r3.z, r1.x, r1.w
+nop
+mov.f32f32 r1.w, r1.z
+mul.f r1.z, c0.x, r1.z
+mov.f32f32 r4.w, r2.w
+add.f r6.y, c4.y, (neg)r1.x
+add.f r6.x, c4.x, (neg)r1.w
+mul.f r0.w, c8.y, r1.w
+mul.f r3.x, c8.x, r1.w
+mul.f r7.w, c8.w, r1.w
+mul.f r3.z, r6.x, r6.x
+mad.f32 r0.w, c9.y, r1.x, r0.w
+mad.f32 r3.z, r6.y, r6.y, r3.z
+add.f r6.z, c4.z, (neg)r4.w
+mad.f32 r0.w, c10.y, r4.w, r0.w
+mad.f32 r3.x, c9.x, r1.x, r3.x
+mad.f32 r3.w, c9.w, r1.x, r7.w
+mad.f32 r3.z, r6.z, r6.z, r3.z
mul.f r0.x, c12.w, r0.x
-mad.f32 r2.x, c9.x, r1.w, r2.x
+mad.f32 r3.x, c10.x, r4.w, r3.x
+mad.f32 r3.w, c10.w, r4.w, r3.w
+mul.f r7.z, c8.z, r1.w
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r0.y, r3.x
-mov.f32f32 r8.y, r4.y
+mul.f r0.y, c0.w, r1.w
+rsq r3.z, r3.z
+(ss)mov.f32f32 r5.x, r3.z
+mad.f32 r3.y, r6.x, r3.z, r3.y
mad.f32 r0.x, c14.w, r0.z, r0.x
-rsq r0.z, r6.x
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r8.z, r8.y
-mad.f32 r0.y, r4.y, r0.z, r5.x
-mad.f32 r4.y, r6.y, r0.z, r6.w
-mad.f32 r0.z, r3.x, r0.z, r3.y
-add.f r0.x, r0.x, c15.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r3.y, c11.w, r0.x, r6.z
-mul.f r4.y, r0.y, r0.y
-mad.f32 r5.x, c11.z, r0.x, r8.x
-mad.f32 r4.y, r3.x, r3.x, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r5.z, c11.y, r0.x, r5.z
-mad.f32 r2.x, c10.x, r1.z, r2.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r4.y, r0.z, r0.z, r4.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.z, r5.z
-mad.f32 r2.x, c11.x, r0.x, r2.x
-(ss)mul.f r6.x, c0.w, r1.y
-mul.f r6.z, c0.z, r1.y
-mul.f r6.w, c0.y, r1.y
-rsq r4.y, r4.y
-(ss)mov.f32f32 r4.y, r4.y
-mov.f32f32 r10.y, r3.y
-mov.f32f32 r3.y, r5.x
-mul.f r5.x, r5.z, c16.y
-mul.f r0.z, r0.z, r4.y
-mul.f r3.x, r3.x, r4.y
-mul.f r0.y, r0.y, r4.y
-mov.f32f32 r10.x, r3.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r9.w, r5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r1.x, r7.z
+(ss)mad.f32 r3.z, r6.y, r5.x, r4.x
+mov.f32f32 r4.x, r3.y
+mad.f32 r1.y, r6.z, r5.x, r1.y
nop
-mov.f32f32 r10.w, r0.z
-mov.f32f32 r10.z, r3.x
-mov.f32f32 r9.y, r0.y
-mov.f32f32 r0.y, r2.x
-mad.f32 r0.z, c1.w, r1.w, r6.x
-mad.f32 r2.x, c1.z, r1.w, r6.z
-mad.f32 r3.x, c1.y, r1.w, r6.w
-mul.f r0.y, r0.y, c16.x
-mad.f32 r0.z, c2.w, r1.z, r0.z
-mad.f32 r2.x, c2.z, r1.z, r2.x
-mad.f32 r3.x, c2.y, r1.z, r3.x
-mov.f32f32 r9.z, r0.y
-mad.f32 r0.y, c3.w, r0.x, r0.z
-mad.f32 r0.z, c3.z, r0.x, r2.x
-mad.f32 r2.x, c3.y, r0.x, r3.x
-mul.f r3.x, c0.x, r1.y
-mov.f32f32 r12.y, r0.y
-mov.f32f32 r12.x, r0.z
-mov.f32f32 r11.w, r2.x
-mad.f32 r0.y, c1.x, r1.w, r3.x
-mad.f32 r0.z, c7.x, r1.z, c7.y
-mad.f32 r0.y, c2.x, r1.z, r0.y
-mad.f32 r1.y, c7.x, r1.y, c7.y
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r11.y, r4.z
-mov.f32f32 r11.x, r5.y
-mov.f32f32 r11.z, r0.x
-mov.f32f32 r12.w, r0.y
-mov.f32f32 r0.x, r1.y
-mov.f32f32 r0.y, r6.y
-(rpt1)nop
-mov.f32f32 r12.z, r0.x
-mov.f32f32 r8.w, r0.y
-mul.f r0.x, r3.w, c6.z
-mul.f r0.y, r3.w, c6.y
-mul.f r0.z, r3.w, c6.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r7.y, r0.y
-mov.f32f32 r7.x, r0.z
-mul.f r0.x, r4.x, r1.x
-mul.f r0.y, r0.w, r5.w
-mad.f32 r0.x, r0.w, r3.z, (neg)r0.x
-mad.f32 r0.y, r4.w, r1.x, (neg)r0.y
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r5.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r1.x
-mul.f r0.x, r0.x, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r0.z, r3.z
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r5.x, r3.z
+mul.f r4.x, r4.x, r4.x
+mov.f32f32 r5.y, r1.y
+add.f r0.x, r0.x, c15.w
+mad.f32 r3.z, r3.z, r5.x, r4.x
+mad.f32 r0.z, c10.z, r4.w, r0.z
+mad.f32 r1.y, r1.y, r5.y, r3.z
+mad.f32 r0.w, c11.y, r0.x, r0.w
+mad.f32 r3.x, c11.x, r0.x, r3.x
+mad.f32 r3.w, c11.w, r0.x, r3.w
+mad.f32 r3.z, c11.z, r0.x, r0.z
+mad.f32 r0.y, c1.w, r1.x, r0.y
+mul.f r0.z, c0.z, r1.w
+rsq r1.y, r1.y
+(ss)mov.f32f32 r4.x, r1.y
+mul.f r6.w, r3.y, r1.y
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r7.y, r5.y, r4.x
+mul.f r7.x, r5.x, r4.x
+mad.f32 r0.y, c2.w, r4.w, r0.y
+mad.f32 r0.z, c1.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c2.z, r4.w, r0.z
+(ss)mul.f r1.y, c0.y, r1.w
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c1.y, r1.x, r1.y
+mad.f32 r1.x, c1.x, r1.x, r1.z
+mad.f32 r0.y, c2.y, r4.w, r0.y
+mad.f32 r1.x, c2.x, r2.w, r1.x
+mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r0.x, c3.x, r0.x, r1.x
+mad.f32 r5.x, c7.x, r1.w, c7.y
+mad.f32 r5.y, c7.x, r4.w, c7.y
+mul.f r1.z, r4.y, c6.z
+mul.f r1.y, r4.y, c6.y
+mul.f r1.x, r9.y, c6.x
+mad.f32 r1.w, c14.z, r8.z, r4.z
+mad.f32 r2.w, c13.y, r8.y, r9.x
+mul.f r4.x, c12.x, r8.x
+mad.f32 r2.w, c14.y, r8.z, r2.w
+mov.f32f32 r4.z, r1.w
+mad.f32 r4.x, c13.x, r8.y, r4.x
(rpt1)nop
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r0.x, r4.w
-mov.f32f32 r0.y, r4.x
+mul.f r4.w, r2.x, r4.z
+mad.f32 r8.x, c14.x, r8.z, r4.x
+mov.f32f32 r4.y, r2.w
(rpt1)nop
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r0.x, r2.w
-mov.f32f32 r0.y, r2.z
+mov.f32f32 r4.x, r8.x
+mul.f r8.y, r2.z, r4.y
+mul.f r8.x, r2.y, r8.x
+mad.f32 r1.w, r2.y, r1.w, (neg)r8.y
+mad.f32 r4.w, r2.z, r4.x, (neg)r4.w
+mad.f32 r8.x, r2.x, r2.w, (neg)r8.x
(rpt1)nop
-mov.f32f32 r13.y, r0.x
-mov.f32f32 r13.x, r0.y
+mul.f r4.w, r4.w, r8.w
+mul.f r2.w, r1.w, r8.w
+mul.f r1.w, r8.x, r8.w
end
-; VERT: outputs: r11.z (0:0) r7.x (5:9) r3.x (5:10) r9.z (5:11) r1.y (5:12) r12.z (5:13) r8.z (5:14) r10.z (5:15)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=f,il=16,b=0) r2.z (0:0,cm=3,il=20,b=0)
-; VERT: 304 instructions, 0 half, 14 full
-; pos: r11.z
+nop
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=f,il=16,b=0) r5.z (0:0,cm=3,il=20,b=0)
+; VERT: 201 instructions, 0 half, 10 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm
index 82c1168..55cb4f7 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-72.asm
@@ -4,990 +4,677 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-(sy)(ss)bary.f r0.x, 7, r1.x
-bary.f r0.y, 8, r1.x
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c14.x) 0x3f000000, 0x00000000, 0x43160000, 0x3bdb8bac
+@const(c15.x) 0x41800000, 0x3f700000, 0x3d800000, 0x3d4ccccd
+@const(c16.x) 0x40000000, 0xbf800000, 0xbb449ba6, 0xbf000000
+@const(c17.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x3fb8aa65
+@const(c18.x) 0x3cf5c28f, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)bary.f r0.x, 20, r1.x
+bary.f r0.y, 7, r1.x
+bary.f r1.z, 8, r1.x
add.f r0.w, r0.w, c14.y
-bary.f r1.z, 9, r1.x
-mov.f32f32 r0.x, r0.x
-bary.f r1.w, 20, r1.x
-add.f r2.x, r0.y, c16.w
-add.f r2.y, r1.z, c16.w
-bary.f r2.z, 18, r1.x
-mul.f r2.w, r1.w, r0.x
-bary.f r3.x, 15, r1.x
-floor.f r3.y, r2.x
+bary.f r1.w, 9, r1.x
+mul.f r2.x, r0.x, r0.y
+bary.f r2.y, 21, r1.x
+bary.f r2.z, 15, r1.x
+add.f r2.w, r1.z, c16.w
+add.f r3.y, r1.w, c16.w
rcp r0.w, r0.w
add.f r0.z, r0.z, c14.y
-floor.f r3.z, r2.y
-mov.f32f32 r3.x, r3.x
-bary.f r3.w, 21, r1.x
-add.f r2.x, r2.x, (neg)r3.y
+mad.f32 r2.x, r2.y, r2.z, r2.x
+bary.f r3.z, 22, r1.x
+bary.f r3.w, 3, r1.x
+floor.f r4.x, r2.w
(ss)mul.f r0.z, r0.z, r0.w
-(ss)add.f r0.w, r2.y, (neg)r3.z
-mad.f32 r2.y, r3.w, r3.x, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-absneg.f r2.w, (neg)c11.x
-mov.f32f32 r2.y, r2.y
-bary.f r3.y, 3, r1.x
-mul.f r3.z, c14.x, r2.x
-mul.f r2.w, r2.w, c11.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.y, r3.y
-bary.f r4.x, 22, r1.x
-mov.f32f32 r3.z, r3.z
-mul.f r2.w, r2.w, r0.z
-mul.f r4.y, c14.x, r0.w
-mad.f32 r2.y, r4.x, r3.y, r2.y
-add.f r0.y, r0.y, (neg)r3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r2.y, r2.y
-bary.f r4.y, 12, r1.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r2.w, r0.z
-add.f r1.z, r1.z, (neg)r3.z
-mul.f r2.w, r1.w, r4.y
-bary.f r3.z, 13, r1.x
-add.f r4.z, c17.y, r0.y
+(ss)absneg.f r0.w, (neg)c11.x
+mad.f32 r2.x, r3.z, r3.w, r2.x
+add.f r2.w, r2.w, (neg)r4.x
+floor.f r4.x, r3.y
+mul.f r0.w, r0.w, c11.x
+mov.f32f32 r4.y, r2.x
+bary.f r4.z, 12, r1.x
+mov.f32f32 r4.w, r2.w
+mul.f r0.w, r0.w, r0.z
mov.f32f32 r0.z, r0.z
-add.f r0.y, c17.x, r0.y
-mad.f32 r2.w, r3.w, r3.z, r2.w
-mov.f32f32 r4.z, r4.z
+mul.f r5.x, r0.x, r4.z
+bary.f r5.y, 13, r1.x
+mul.f r5.z, c14.x, r4.w
+mul.f r0.z, r0.w, r0.z
+add.f r0.w, r3.y, (neg)r4.x
+mad.f32 r3.y, r2.y, r5.y, r5.x
+bary.f r4.x, 14, r1.x
+add.f r1.z, r1.z, (neg)r5.z
mul.f r0.z, r0.z, c17.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-bary.f r4.w, 14, r1.x
-mul.f r4.z, r4.z, c5.z
-mov.f32f32 r0.z, r0.z
-mul.f r0.y, r0.y, c5.z
-mad.f32 r2.w, r4.x, r4.w, r2.w
-mov.f32f32 r5.x, r4.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.y, r0.y
-mul.f r5.z, r2.w, r2.w
-mov.f32f32 r5.w, r5.x
-mad.f32 r5.x, r2.y, r2.y, r5.z
-mov.f32f32 r1.z, r1.z
+mov.f32f32 r5.x, r0.w
+mad.f32 r3.y, r3.z, r4.x, r3.y
+mov.f32f32 r5.z, r1.z
+add.f r1.z, c17.x, r1.z
+mul.f r5.w, c14.x, r5.x
+mul.f r6.x, r3.y, r3.y
+add.f r5.z, c17.y, r5.z
+mad.f32 r2.x, r2.x, r4.y, r6.x
+bary.f r6.x, 4, r1.x
exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.x, r5.x
-bary.f r5.z, 4, r1.x
-add.f r6.x, c17.y, r1.z
-add.f r6.y, c19.y, (neg)r0.z
-mov.f32f32 r0.y, r0.y
-mul.f r6.z, r1.w, r5.z
-bary.f r6.w, 5, r1.x
-mov.f32f32 r6.x, r6.x
+(ss)mov.f32f32 r6.y, r0.z
+mul.f r6.z, r5.z, c5.z
+add.f r1.w, r1.w, (neg)r5.w
+mul.f r5.z, r0.x, r6.x
+bary.f r5.w, 5, r1.x
+mov.f32f32 r7.y, r6.z
+add.f r6.y, c19.y, (neg)r6.y
+mov.f32f32 r6.w, r1.w
+mad.f32 r5.z, r2.y, r5.w, r5.z
+bary.f r8.x, 6, r1.x
mul.f r6.y, r6.y, c11.y
-mul.f r0.z, r0.z, c17.x
-mad.f32 r6.z, r3.w, r6.w, r6.z
-mul.f r7.x, r6.x, c5.w
-mov.f32f32 r7.y, r0.y
-add.f r0.y, c17.x, r1.z
-mov.f32f32 r1.z, r6.z
-bary.f r6.z, 6, r1.x
-mov.f32f32 r6.x, r7.x
+(ss)mul.f r0.z, r0.z, c17.x
+add.f r6.w, c17.y, r6.w
+mad.f32 r5.z, r3.z, r8.x, r5.z
+mul.f r8.y, r1.z, c5.z
+add.f r1.z, c17.x, r1.w
add.f r0.z, r0.z, r6.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, r4.x, r6.z, r1.z
-mov.f32f32 r6.x, r6.x
-bary.f r6.y, 10, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.z, r1.z
-mul.f r0.y, r0.y, c5.w
-add.f r8.x, r6.y, c16.z
-mov.f32f32 r8.y, r5.y
-mad.f32 r5.x, r1.z, r1.z, r5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.y, r8.x
-mov.f32f32 r7.z, r0.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r9.x, r4.z
-mov.f32f32 r6.y, r5.y
-rsq r4.z, r5.x
-(ss)mov.f32f32 r4.z, r4.z
+mov.f32f32 r1.w, r5.z
+mul.f r9.y, r6.w, c5.w
+mov.f32f32 r9.x, r8.y
+mul.f r8.z, r1.z, c5.w
+mad.f32 r1.z, r5.z, r1.w, r2.x
+mov.f32f32 r7.z, r9.y
max.f r0.z, r0.z, c14.y
-(ss)mov.f32f32 r5.x, r7.z
-mov.f32f32 r8.z, r7.x
-mul.f r1.z, r1.z, r4.z
+bary.f r2.x, 10, r1.x
+mov.f32f32 r6.w, r8.z
+add.f r4.w, c16.x, (neg)r4.w
+mul.f r0.x, r0.x, r0.x
+rsq r1.z, r1.z
+(ss)mov.f32f32 r5.z, r1.z
+add.f r8.w, r2.x, c16.z
min.f r0.z, r0.z, c17.x
-sam.s (f32)(x)r5.y, r5.w, s#4, t#4
-(sy)mov.f32f32 r5.y, r5.y
-mov.f32f32 r7.z, r5.x
-mov.f32f32 r1.z, r1.z
-add.f r5.x, c19.y, (neg)r0.z
-(ss)add.f r5.w, c19.y, (neg)r0.z
-add.f r6.x, c19.y, (neg)r0.z
-mul.f r1.z, r1.z, c15.x
-mov.f32f32 r5.y, r5.y
-add.f r6.y, c16.x, (neg)r2.x
-mul.f r5.x, r5.x, c10.z
-mov.f32f32 r1.z, r1.z
-mul.f r5.w, r5.w, c10.y
-mul.f r6.x, r6.x, c10.x
-mov.f32f32 r6.y, r6.y
-add.f r7.x, c16.x, (neg)r0.w
-mov.f32f32 r7.w, r8.x
-mul.f r2.y, r2.y, r4.z
-rcp r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mul.f r2.w, r2.w, r4.z
-mov.f32f32 r4.z, r7.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r2.w, r2.w
-mul.f r7.x, r6.y, r4.z
-mov.f32f32 r2.y, r2.y
-mul.f r1.w, r1.w, r1.w
-absneg.f r2.w, (neg)r2.w
-mad.f32 r1.w, r3.w, r3.w, r1.w
-mul.f r3.w, r7.x, r5.y
-mov.f32f32 r5.y, r8.x
-sam.s (f32)(x)r7.x, r7.y, s#4, t#4
-(sy)mov.f32f32 r7.x, r7.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r4.x, r4.x, r1.w
-mov.f32f32 r8.w, r5.y
-mov.f32f32 r4.x, r2.z
-add.f r2.x, r2.x, c17.x
+(ss)mul.f r1.z, r3.y, r1.z
+mul.f r1.w, r1.w, r5.z
+mov.f32f32 r7.w, r8.w
+add.f r2.x, c19.y, (neg)r0.z
+add.f r3.y, c19.y, (neg)r0.z
+mul.f r1.w, r1.w, c15.x
+add.f r6.y, c19.y, (neg)r0.z
+mov.f32f32 r9.z, r8.w
+mov.f32f32 r7.x, r8.w
+nop
+sam.s (f32)(x)r9.w, r7.y, s#4, t#4
+(ss)mul.f r7.y, r2.x, c10.z
+mov.f32f32 r2.x, r4.w
+rcp r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+mul.f r4.y, r4.y, r5.z
+mad.f32 r0.x, r2.y, r2.y, r0.x
+add.f r2.y, c16.x, (neg)r5.x
+mad.f32 r0.x, r3.z, r3.z, r0.x
+mul.f r3.y, r3.y, c10.y
+mul.f r3.z, r6.y, c10.x
+mov.f32f32 r5.x, r2.y
+absneg.f r1.z, (neg)r1.z
+sam.s (f32)(x)r10.x, r9.x, s#4, t#4
+sam.s (f32)(x)r6.y, r6.z, s#4, t#4
+sam.s (f32)(x)r8.y, r8.y, s#4, t#4
+add.f r2.w, r2.w, c17.x
add.f r0.w, r0.w, c17.x
-bary.f r5.y, 16, r1.x
-(ss)mov.f32f32 r7.y, r4.x
-sqrt r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-sam.s (f32)(x)r4.x, r8.y, s#4, t#4
-(sy)mov.f32f32 r4.x, r4.x
-mul.f r4.z, r2.x, r4.z
-mov.f32f32 r9.y, r0.y
-add.f r0.y, c14.z, (neg)r1.w
-mov.f32f32 r1.w, r8.x
-bary.f r7.w, 19, r1.x
-mul.f r6.y, r6.y, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r3.w, r4.z, r4.x, r3.w
-mov.f32f32 r9.z, r1.w
-mov.f32f32 r1.w, r7.w
-mul.f r0.y, c12.z, r0.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r5.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r0.y, r0.y
-sam.s (f32)(x)r1.w, r9.x, s#4, t#4
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r8.x, r4.x
-bary.f r4.x, 17, r1.x
-mul.f r0.y, r0.y, c14.w
-mad.f32 r1.w, r6.y, r1.w, r3.w
-(ss)nop
-sam (f32)(w)r8.y, r7.y, s#2, t#2
-(sy)cmps.f.lt r3.w, r9.x, c15.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r2.x, r0.w
-cov.u32f32 r2.x, r3.w
-mul.f r2.w, r2.w, r0.y
-mul.f r0.y, r2.y, r0.y
-mad.f32 r0.w, r0.w, r7.x, r1.w
-cmps.f.ne r1.w, r2.x, c14.y
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, c15.z
-mul.f r2.x, r2.x, r1.z
-mul.f r0.y, r0.y, r1.z
+sqrt r0.x, r0.x
+(ss)add.f r0.x, c14.z, (neg)r0.x
+mul.f r2.x, r2.x, r5.x
+mul.f r2.y, r2.w, r2.y
+mul.f r4.w, r4.w, r0.w
+mul.f r0.x, c12.z, r0.x
+(sy)mul.f r2.x, r2.x, r9.w
+bary.f r6.z, 18, r1.x
+mul.f r0.w, r2.w, r0.w
+mul.f r0.x, r0.x, c14.w
+mad.f32 r2.x, r2.y, r10.x, r2.x
+bary.f r6.w, 19, r1.x
+bary.f r8.z, 16, r1.x
+mov.f32f32 r2.y, r0.x
+mad.f32 r2.x, r4.w, r6.y, r2.x
+mul.f r0.x, r1.z, r0.x
+mad.f32 r0.w, r0.w, r8.y, r2.x
+mul.f r1.z, r4.y, r2.y
+sam (f32)(w)r9.x, r6.z, s#2, t#2
+bary.f r8.w, 17, r1.x
+mul.f r0.x, r0.x, r1.w
+(sy)cmps.f.lt r1.w, r9.w, c15.y
+mul.f r1.z, r1.z, r7.z
mul.f r0.w, c17.z, r0.w
-mov.f32f32 r1.z, c14.y
-mov.f32f32 r2.x, r2.x
+mov.f32f32 r2.x, r0.x
+cov.u32f32 r1.w, r1.w
+mov.f32f32 r2.y, r1.z
+cmps.f.lt r2.w, r9.w, c15.y
+mov.f32f32 r4.y, r0.w
+cmps.f.ne r1.w, r1.w, c14.y
+mov.f32f32 r4.w, c14.y
+mov.f32f32 r5.x, c14.y
+cov.u32f32 r2.w, r2.w
+sam (f32)(w)r8.y, r8.z, s#1, t#1
+(sy)cmps.f.lt r5.z, r9.x, c18.x
+sel.b32 r1.z, r1.z, r1.w, r4.w
+sel.b32 r0.x, r0.x, r1.w, r5.x
+(rpt1)nop
+add.f r1.w, r6.w, r1.z
+add.f r1.z, r6.z, r0.x
+cmps.f.ne r0.x, r2.w, c14.y
+cov.u32f32 r2.w, r5.z
+mov.f32f32 r4.w, c15.z
+mov.f32f32 r5.x, c14.y
mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-sel.b32 r1.z, r2.y, r1.w, r1.z
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.w, r0.y
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r1.w, r1.w
-cmps.f.lt r4.z, r9.x, c15.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
+mov.f32f32 r5.z, r2.z
+sam (f32)(w)r6.y, r1.z, s#2, t#2
+cmps.f.ne r2.z, r2.w, c14.y
+sel.b32 r0.x, r4.w, r0.x, r5.x
+mov.f32f32 r2.w, c14.y
mov.f32f32 r3.w, r3.w
-cov.u32f32 r4.z, r4.z
-mov.f32f32 r5.y, r2.x
-mov.f32f32 r6.y, r0.y
-mov.f32f32 r7.x, r2.x
-cmps.f.ne r4.z, r4.z, c14.y
-(ss)mov.f32f32 r7.y, c14.y
-mov.f32f32 r7.z, c14.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.y, r6.y
-sel.b32 r1.w, r1.w, r4.z, r7.y
-sel.b32 r2.y, r2.y, r4.z, r7.z
-mov.f32f32 r4.z, r7.x
-mov.f32f32 r7.x, r0.y
-add.f r1.w, r2.z, r1.w
-add.f r2.y, r7.w, r2.y
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r7.y, r0.y
-mov.f32f32 r7.z, r1.w
-mov.f32f32 r7.w, r2.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r8.y, r7.z
-mov.f32f32 r8.z, r7.w
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r7.z, r2.x
-mov.f32f32 r7.w, r0.y
-mov.f32f32 r8.w, r2.x
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r9.y, r2.x
-sam (f32)(w)r9.z, r8.y, s#2, t#2
-add.f r1.z, c15.y, (neg)r1.z
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r7.w, r7.w
-(ss)mov.f32f32 r8.z, r8.w
-(sy)cmps.f.lt r8.y, r10.y, r1.z
-cmps.f.lt r8.w, r10.y, r1.z
-mov.f32f32 r9.x, r9.x
-mov.f32f32 r9.y, r9.y
+mov.f32f32 r4.w, (0.000000)
+add.f r0.x, c15.y, (neg)r0.x
+sel.b32 r5.x, r2.w, r2.z, r9.x
+bary.f r6.y, 2, r1.x
+(ss)bary.f r6.z, 1, r1.x
+(sy)cmps.f.lt r2.z, r7.x, r0.x
+cmps.f.lt r2.w, r7.x, r0.x
+bary.f r6.w, 23, r1.x
+bary.f r7.x, 24, r1.x
+cov.u32f32 r2.z, r2.z
+cov.u32f32 r2.w, r2.w
+bary.f r7.z, 25, r1.x
+bary.f (ei)r1.x, 0, r1.x
+cmps.f.ne r1.y, r2.z, c14.y
+mov.f32f32 r2.z, c14.y
+cmps.f.ne r2.w, r2.w, c14.y
+mov.f32f32 r7.w, c14.y
+mov.f32f32 r8.y, c15.z
+sel.b32 r2.z, r2.x, r1.y, r2.z
+mov.f32f32 r8.z, c14.y
+sel.b32 r1.y, r2.y, r1.y, r7.w
+mov.f32f32 r7.w, c14.y
+add.f r1.z, r1.z, r2.z
+sel.b32 r2.z, r8.y, r2.w, r8.z
+add.f r8.z, r1.w, r1.y
+mov.f32f32 r1.y, c14.y
+mov.f32f32 r8.y, r1.z
+add.f r0.x, r0.x, (neg)r2.z
+mov.f32f32 r1.w, r8.z
+mov.f32f32 r2.z, c14.y
+mov.f32f32 r2.w, c15.z
+mov.f32f32 r8.w, c14.y
+mov.f32f32 r9.x, c14.y
+sam (f32)(w)r9.y, r8.y, s#2, t#2
+(sy)(ss)cmps.f.lt r8.y, r10.x, r0.x
+mov.f32f32 r0.x, r0.x
+mov.f32f32 r8.z, c14.y
+mov.f32f32 r9.y, c15.z
cov.u32f32 r8.y, r8.y
-cov.u32f32 r8.w, r8.w
-mov.f32f32 r9.z, r0.y
-mov.f32f32 r9.w, r2.x
-mov.f32f32 r8.y, r8.y
-cmps.f.ne r8.w, r8.w, c14.y
-mov.f32f32 r10.x, c15.z
-mov.f32f32 r10.y, c14.y
+cmps.f.lt r9.z, r10.x, r0.x
+mov.f32f32 r9.w, c14.y
+mov.f32f32 r10.x, c14.y
cmps.f.ne r8.y, r8.y, c14.y
-mov.f32f32 r10.z, r2.x
-mov.f32f32 r10.w, r0.y
-sel.b32 r8.w, r10.x, r8.w, r10.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r10.x, r10.z
mov.f32f32 r10.y, c14.y
-mov.f32f32 r10.z, r10.w
+cov.u32f32 r9.z, r9.z
+mov.f32f32 r10.z, c14.y
mov.f32f32 r10.w, c14.y
-add.f r1.z, r1.z, (neg)r8.w
-sel.b32 r8.w, r10.x, r8.y, r10.y
-mov.f32f32 r9.w, r9.w
-mov.f32f32 r10.x, r0.y
-mov.f32f32 r10.y, r2.x
-add.f r1.w, r1.w, r8.w
-sel.b32 r8.y, r10.z, r8.y, r10.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r8.w, r10.x
-mov.f32f32 r1.w, r1.w
-add.f r2.y, r2.y, r8.y
-mov.f32f32 r10.x, r10.y
-mov.f32f32 r8.y, r0.y
-mov.f32f32 r10.y, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r10.z, r2.x
-mov.f32f32 r10.w, r0.y
-mov.f32f32 r11.x, r10.y
-mov.f32f32 r10.y, r2.y
-mov.f32f32 r11.z, r8.y
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r10.w, r10.w
-mov.f32f32 r11.y, r10.y
-mov.f32f32 r8.y, r2.x
-mov.f32f32 r10.y, r0.y
-mov.f32f32 r11.w, r2.x
-mov.f32f32 r12.x, r0.y
-mov.f32f32 r12.y, r2.x
-mov.f32f32 r12.z, r0.y
-sam (f32)(w)r12.w, r11.x, s#2, t#2
-(sy)(ss)cmps.f.lt r11.x, r13.z, r1.z
-cmps.f.lt r11.y, r13.z, r1.z
-mov.f32f32 r12.w, r8.y
-mov.f32f32 r10.y, r10.y
-cov.u32f32 r8.y, r11.x
-cov.u32f32 r11.x, r11.y
-mov.f32f32 r11.y, r11.w
-mov.f32f32 r11.w, r12.x
-mov.f32f32 r8.y, r8.y
-cmps.f.ne r11.x, r11.x, c14.y
-mov.f32f32 r12.x, c15.z
+sel.b32 r10.y, r2.x, r8.y, r10.y
+cmps.f.ne r9.z, r9.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r10.z
+mov.f32f32 r10.z, c15.z
+add.f r1.z, r1.z, r10.y
+mov.f32f32 r10.y, c14.y
+add.f r11.y, r1.w, r8.y
+mov.f32f32 r1.w, c15.z
+mov.f32f32 r11.x, r1.z
+sel.b32 r8.y, r10.z, r9.z, r10.y
+mov.f32f32 r9.z, r11.y
+mov.f32f32 r10.y, c14.y
+mov.f32f32 r10.z, c14.y
+mov.f32f32 r11.z, c14.y
+mov.f32f32 r11.w, c15.z
+sam (f32)(w)r12.x, r11.x, s#2, t#2
+add.f r0.x, r0.x, (neg)r8.y
+mov.f32f32 r8.y, c14.y
+(ss)mov.f32f32 r11.x, c14.y
+mov.f32f32 r11.y, c14.y
+(sy)cmps.f.lt r12.x, r12.w, r0.x
+mov.f32f32 r0.x, r0.x
+mov.f32f32 r12.y, c15.z
+mov.f32f32 r12.z, c14.y
+cov.u32f32 r12.x, r12.x
+cmps.f.lt r12.w, r12.w, r0.x
mov.f32f32 r13.x, c14.y
-cmps.f.ne r8.y, r8.y, c14.y
-mov.f32f32 r13.y, r2.x
-mov.f32f32 r12.z, r12.z
+mov.f32f32 r13.y, c14.y
+cmps.f.ne r12.x, r12.x, c14.y
mov.f32f32 r13.z, c14.y
-sel.b32 r11.x, r12.x, r11.x, r13.x
-mov.f32f32 r12.x, r13.y
-mov.f32f32 r13.x, c14.y
-sel.b32 r12.z, r12.z, r8.y, r13.z
-add.f r1.z, r1.z, (neg)r11.x
-mov.f32f32 r11.x, r12.y
-sel.b32 r8.y, r12.x, r8.y, r13.x
-add.f r2.y, r2.y, r12.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r12.x, r2.x
-add.f r1.w, r1.w, r8.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r8.y, r4.x
-bary.f r4.x, 23, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r12.y, r2.y
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r12.z, r1.w
-mov.f32f32 r13.y, r12.y
-sam (f32)(w)r13.z, r8.x, s#1, t#1
-(sy)(ss)cmps.f.lt r8.x, r14.y, c18.x
-mov.f32f32 r8.y, r14.y
-mov.f32f32 r13.x, r12.z
-bary.f r12.y, 24, r1.x
-bary.f r12.z, 25, r1.x
-mov.f32f32 r13.z, (0.000000)
-cov.u32f32 r8.x, r8.x
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r12.y, r12.y
-sam (f32)(w)r13.w, r13.x, s#2, t#2
-(sy)(ss)cmps.f.lt r13.x, r14.z, r1.z
-cmps.f.lt r13.y, r14.z, r1.z
-cmps.f.ne r8.x, r8.x, c14.y
-nop
-cov.u32f32 r13.x, r13.x
-cov.u32f32 r13.y, r13.y
+cov.u32f32 r12.w, r12.w
mov.f32f32 r13.w, c14.y
-mov.f32f32 r12.z, r12.z
-mov.f32f32 r13.x, r13.x
-cmps.f.ne r13.y, r13.y, c14.y
mov.f32f32 r14.x, c15.z
-mov.f32f32 r14.y, c14.y
-cmps.f.ne r13.x, r13.x, c14.y
-mov.f32f32 r14.z, c14.y
-mov.f32f32 r14.w, c14.y
-sel.b32 r13.y, r14.x, r13.y, r14.y
-sel.b32 r8.x, r13.w, r8.x, r8.y
-sel.b32 r8.y, r12.x, r13.x, r14.z
-sel.b32 r11.w, r11.w, r13.x, r14.w
-add.f r1.z, r1.z, (neg)r13.y
-bary.f r12.x, 2, r1.x
-add.f r1.w, r1.w, r8.y
-add.f r2.y, r2.y, r11.w
-mov.f32f32 r1.z, r1.z
-bary.f r8.y, 1, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-bary.f (ei)r1.x, 0, r1.x
-mov.f32f32 r1.y, c14.y
-mov.f32f32 r11.w, r1.w
-mov.f32f32 r13.x, r2.y
-mov.f32f32 r13.y, c14.y
+sel.b32 r13.z, r2.x, r12.x, r13.z
+cmps.f.ne r12.w, r12.w, c14.y
+sel.b32 r12.x, r2.y, r12.x, r13.w
+mov.f32f32 r13.w, c15.z
+add.f r1.z, r1.z, r13.z
+mov.f32f32 r13.z, c14.y
+add.f r14.z, r9.z, r12.x
+mov.f32f32 r9.z, c14.y
+mov.f32f32 r14.y, r1.z
+sel.b32 r12.x, r13.w, r12.w, r13.z
+mov.f32f32 r12.w, r14.z
+mov.f32f32 r13.z, c14.y
mov.f32f32 r13.w, c14.y
-mov.f32f32 r14.x, r11.w
-mov.f32f32 r14.y, r13.x
-mov.f32f32 r11.w, c15.z
-mov.f32f32 r13.x, c14.y
-mov.f32f32 r14.z, c14.y
-mov.f32f32 r14.w, c14.y
-mov.f32f32 r15.x, c15.z
-mov.f32f32 r15.y, c14.y
-sam (f32)(w)r15.z, r14.x, s#2, t#2
-(sy)(ss)cmps.f.lt r14.x, r16.y, r1.z
-cmps.f.lt r14.y, r16.y, r1.z
+mov.f32f32 r14.w, c15.z
+mov.f32f32 r15.x, c14.y
+sam (f32)(w)r15.y, r14.y, s#2, t#2
+add.f r0.x, r0.x, (neg)r12.x
+mov.f32f32 r12.x, c14.y
+(ss)mov.f32f32 r14.y, c14.y
+mov.f32f32 r14.z, c15.z
+(sy)cmps.f.lt r15.y, r16.x, r0.x
+mov.f32f32 r0.x, r0.x
mov.f32f32 r15.z, c14.y
mov.f32f32 r15.w, c14.y
-cov.u32f32 r14.x, r14.x
-cov.u32f32 r14.y, r14.y
-mov.f32f32 r16.x, c15.z
+cov.u32f32 r15.y, r15.y
+cmps.f.lt r16.x, r16.x, r0.x
mov.f32f32 r16.y, c14.y
-mov.f32f32 r14.x, r14.x
-cmps.f.ne r14.y, r14.y, c14.y
mov.f32f32 r16.z, c15.z
+cmps.f.ne r15.y, r15.y, c14.y
mov.f32f32 r16.w, c14.y
-cmps.f.ne r14.x, r14.x, c14.y
+cov.u32f32 r16.x, r16.x
mov.f32f32 r17.x, c14.y
mov.f32f32 r17.y, c14.y
-sel.b32 r14.y, r16.z, r14.y, r16.w
-mov.f32f32 r16.z, c14.y
-sel.b32 r11.x, r11.x, r14.x, r17.x
-sel.b32 r10.y, r10.y, r14.x, r17.y
-add.f r1.z, r1.z, (neg)r14.y
-mov.f32f32 r14.x, c14.y
-add.f r1.w, r1.w, r11.x
-add.f r2.y, r2.y, r10.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r10.y, c15.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r11.x, c14.y
-mov.f32f32 r14.y, c14.y
-mov.f32f32 r16.w, r1.w
-mov.f32f32 r17.x, r2.y
-mov.f32f32 r17.y, c14.y
-mov.f32f32 r17.z, c15.z
-mov.f32f32 r17.w, r16.w
-mov.f32f32 r18.x, r17.x
+sel.b32 r16.w, r2.x, r15.y, r16.w
+cmps.f.ne r16.x, r16.x, c14.y
+sel.b32 r15.y, r2.y, r15.y, r17.x
+mov.f32f32 r17.x, c15.z
+add.f r1.z, r1.z, r16.w
mov.f32f32 r16.w, c14.y
-mov.f32f32 r17.x, c14.y
-mov.f32f32 r18.y, c14.y
-mov.f32f32 r18.z, c15.z
-mov.f32f32 r18.w, c14.y
-mov.f32f32 r19.x, c14.y
-sam (f32)(w)r19.y, r17.w, s#2, t#2
-(sy)(ss)cmps.f.lt r17.w, r20.x, r1.z
-cmps.f.lt r18.x, r20.x, r1.z
-mov.f32f32 r19.y, c14.y
-mov.f32f32 r19.z, c15.z
-cov.u32f32 r17.w, r17.w
-cov.u32f32 r18.x, r18.x
-mov.f32f32 r19.w, c14.y
-mov.f32f32 r20.x, c14.y
-mov.f32f32 r17.w, r17.w
-cmps.f.ne r18.x, r18.x, c14.y
-mov.f32f32 r20.y, c15.z
-mov.f32f32 r20.z, c14.y
-cmps.f.ne r17.w, r17.w, c14.y
-mov.f32f32 r20.w, c14.y
-mov.f32f32 r21.x, c14.y
-sel.b32 r18.x, r20.y, r18.x, r20.z
-mov.f32f32 r20.y, c14.y
-sel.b32 r11.y, r11.y, r17.w, r20.w
-sel.b32 r10.w, r10.w, r17.w, r21.x
-add.f r1.z, r1.z, (neg)r18.x
-mov.f32f32 r17.w, c15.z
-add.f r1.w, r1.w, r11.y
-add.f r2.y, r2.y, r10.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r10.w, c14.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r11.y, c14.y
+add.f r17.w, r12.w, r15.y
+mov.f32f32 r12.w, c14.y
+mov.f32f32 r17.z, r1.z
+sel.b32 r15.y, r17.x, r16.x, r16.w
+mov.f32f32 r16.x, r17.w
+mov.f32f32 r16.w, c14.y
+mov.f32f32 r17.x, c15.z
mov.f32f32 r18.x, c14.y
-mov.f32f32 r20.z, r1.w
-mov.f32f32 r20.w, r2.y
-mov.f32f32 r21.x, c15.z
-mov.f32f32 r21.y, c14.y
-mov.f32f32 r21.z, r20.z
-mov.f32f32 r21.w, r20.w
-mov.f32f32 r20.z, c14.y
-(rpt4)nop
-sam (f32)(w)r21.z, r21.z, s#2, t#2
-(sy)cmps.f.lt r20.w, r22.y, r1.z
-(ss)cmps.f.lt r21.z, r22.y, r1.z
-(rpt1)nop
-cov.u32f32 r20.w, r20.w
-cov.u32f32 r21.z, r21.z
+mov.f32f32 r18.y, c14.y
+sam (f32)(w)r18.z, r17.z, s#2, t#2
+add.f r0.x, r0.x, (neg)r15.y
+(rpt2)nop
+(sy)cmps.f.lt r15.y, r19.y, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r20.w, r20.w
-cmps.f.ne r21.z, r21.z, c14.y
+cov.u32f32 r15.y, r15.y
+(ss)cmps.f.lt r17.z, r19.y, r0.x
(rpt1)nop
-cmps.f.ne r20.w, r20.w, c14.y
-sel.b32 r18.x, r21.x, r21.z, r18.x
+cmps.f.ne r15.y, r15.y, c14.y
+cov.u32f32 r17.z, r17.z
(rpt1)nop
-sel.b32 r12.w, r12.w, r20.w, r20.z
-sel.b32 r11.z, r11.z, r20.w, r21.y
-add.f r1.z, r1.z, (neg)r18.x
+sel.b32 r17.w, r2.x, r15.y, r18.y
+cmps.f.ne r17.z, r17.z, c14.y
+sel.b32 r15.y, r2.y, r15.y, r18.x
nop
-add.f r1.w, r1.w, r12.w
-add.f r2.y, r2.y, r11.z
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r17.w
+sel.b32 r16.w, r17.x, r17.z, r16.w
+add.f r17.w, r16.x, r15.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r11.z, r1.w
-mov.f32f32 r12.w, r2.y
-(rpt1)nop
-mov.f32f32 r20.z, r11.z
-mov.f32f32 r20.w, r12.w
-(rpt5)nop
-sam (f32)(w)r20.z, r20.z, s#2, t#2
-(sy)cmps.f.lt r11.z, r21.y, r1.z
-cmps.f.lt r12.w, r21.y, r1.z
+mov.f32f32 r17.z, r1.z
+add.f r0.x, r0.x, (neg)r16.w
+mov.f32f32 r15.y, r17.w
+(rpt3)nop
+sam (f32)(w)r17.z, r17.z, s#2, t#2
+(sy)cmps.f.lt r16.x, r18.y, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-cov.u32f32 r11.z, r11.z
-cov.u32f32 r12.w, r12.w
+cov.u32f32 r16.x, r16.x
+cmps.f.lt r16.w, r18.y, r0.x
(rpt1)nop
-mov.f32f32 r11.z, r11.z
-cmps.f.ne r12.w, r12.w, c14.y
+cmps.f.ne r16.x, r16.x, c14.y
+cov.u32f32 r16.w, r16.w
(rpt1)nop
-cmps.f.ne r11.z, r11.z, c14.y
-sel.b32 r12.w, r17.w, r12.w, r20.y
-(rpt1)nop
-sel.b32 r10.z, r10.z, r11.z, r11.y
-sel.b32 r8.w, r8.w, r11.z, r10.w
-add.f r1.z, r1.z, (neg)r12.w
+sel.b32 r12.w, r2.x, r16.x, r12.w
+cmps.f.ne r16.w, r16.w, c14.y
+sel.b32 r16.x, r2.y, r16.x, r17.y
nop
-add.f r1.w, r1.w, r10.z
-add.f r2.y, r2.y, r8.w
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r12.w
+sel.b32 r12.w, r16.z, r16.w, r16.y
+add.f r16.y, r15.y, r16.x
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r8.w, r1.w
-mov.f32f32 r10.z, r2.y
-(rpt1)nop
-mov.f32f32 r11.y, r8.w
-mov.f32f32 r11.z, r10.z
-(rpt5)nop
-sam (f32)(w)r20.y, r11.y, s#2, t#2
-(sy)cmps.f.lt r8.w, r21.x, r1.z
-cmps.f.lt r10.z, r21.x, r1.z
-(rpt1)nop
-cov.u32f32 r8.w, r8.w
-cov.u32f32 r10.z, r10.z
+mov.f32f32 r16.x, r1.z
+add.f r0.x, r0.x, (neg)r12.w
+mov.f32f32 r12.w, r16.y
+(rpt3)nop
+sam (f32)(w)r16.x, r16.x, s#2, t#2
+(sy)cmps.f.lt r15.y, r16.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r8.w, r8.w
-cmps.f.ne r10.z, r10.z, c14.y
+cov.u32f32 r15.y, r15.y
+(ss)cmps.f.lt r16.x, r16.w, r0.x
(rpt1)nop
-cmps.f.ne r8.w, r8.w, c14.y
-sel.b32 r10.z, r19.z, r10.z, r19.y
+cmps.f.ne r15.y, r15.y, c14.y
+cov.u32f32 r16.x, r16.x
(rpt1)nop
-sel.b32 r10.x, r10.x, r8.w, r20.x
-sel.b32 r8.w, r9.z, r8.w, r19.w
-add.f r1.z, r1.z, (neg)r10.z
+sel.b32 r15.w, r2.x, r15.y, r15.w
+cmps.f.ne r16.x, r16.x, c14.y
+sel.b32 r15.y, r2.y, r15.y, r15.z
nop
-add.f r1.w, r1.w, r10.x
-add.f r2.y, r2.y, r8.w
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r15.w
+sel.b32 r14.y, r14.z, r16.x, r14.y
+add.f r15.z, r12.w, r15.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r8.w, r1.w
-mov.f32f32 r9.z, r2.y
-(rpt1)nop
-mov.f32f32 r10.z, r8.w
-mov.f32f32 r10.w, r9.z
-(rpt5)nop
-sam (f32)(w)r19.y, r10.z, s#2, t#2
-(sy)cmps.f.lt r8.w, r20.x, r1.z
-cmps.f.lt r9.z, r20.x, r1.z
-(rpt1)nop
-cov.u32f32 r8.w, r8.w
-cov.u32f32 r9.z, r9.z
+mov.f32f32 r15.y, r1.z
+add.f r0.x, r0.x, (neg)r14.y
+mov.f32f32 r12.w, r15.z
+(rpt3)nop
+sam (f32)(w)r15.y, r15.y, s#2, t#2
+(sy)cmps.f.lt r14.y, r16.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r8.w, r8.w
-cmps.f.ne r9.z, r9.z, c14.y
+cov.u32f32 r14.y, r14.y
+cmps.f.lt r14.z, r16.x, r0.x
(rpt1)nop
-cmps.f.ne r8.w, r8.w, c14.y
-sel.b32 r9.z, r18.z, r9.z, r18.y
+cmps.f.ne r14.y, r14.y, c14.y
+cov.u32f32 r14.z, r14.z
(rpt1)nop
-sel.b32 r9.w, r9.w, r8.w, r19.x
-sel.b32 r8.w, r9.x, r8.w, r18.w
-add.f r1.z, r1.z, (neg)r9.z
+sel.b32 r12.x, r2.x, r14.y, r12.x
+cmps.f.ne r14.z, r14.z, c14.y
+sel.b32 r14.y, r2.y, r14.y, r15.x
nop
-add.f r1.w, r1.w, r9.w
-add.f r2.y, r2.y, r8.w
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r12.x
+sel.b32 r12.x, r14.w, r14.z, r13.w
+add.f r14.z, r12.w, r14.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r8.w, r1.w
-mov.f32f32 r9.x, r2.y
-(rpt1)nop
-mov.f32f32 r9.z, r8.w
-mov.f32f32 r9.w, r9.x
-(rpt5)nop
-sam (f32)(w)r17.w, r9.z, s#2, t#2
-(sy)cmps.f.lt r8.w, r18.z, r1.z
-cmps.f.lt r9.x, r18.z, r1.z
-(rpt1)nop
-cov.u32f32 r8.w, r8.w
-cov.u32f32 r9.x, r9.x
+mov.f32f32 r14.y, r1.z
+add.f r0.x, r0.x, (neg)r12.x
+mov.f32f32 r12.x, r14.z
+(rpt3)nop
+sam (f32)(w)r14.y, r14.y, s#2, t#2
+(sy)cmps.f.lt r12.w, r15.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r8.w, r8.w
-cmps.f.ne r9.x, r9.x, c14.y
+cov.u32f32 r12.w, r12.w
+cmps.f.lt r13.w, r15.x, r0.x
(rpt1)nop
-cmps.f.ne r8.w, r8.w, c14.y
-sel.b32 r9.x, r17.z, r9.x, r17.y
+cmps.f.ne r12.w, r12.w, c14.y
+cov.u32f32 r13.w, r13.w
(rpt1)nop
-sel.b32 r9.y, r9.y, r8.w, r17.x
-sel.b32 r7.w, r7.w, r8.w, r16.w
-add.f r1.z, r1.z, (neg)r9.x
+sel.b32 r13.z, r2.x, r12.w, r13.z
+cmps.f.ne r13.w, r13.w, c14.y
+sel.b32 r9.z, r2.y, r12.w, r9.z
nop
-add.f r1.w, r1.w, r9.y
-add.f r2.y, r2.y, r7.w
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r13.z
+sel.b32 r12.w, r14.x, r13.w, r13.y
+add.f r13.z, r12.x, r9.z
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r7.w, r1.w
-mov.f32f32 r8.w, r2.y
-(rpt1)nop
-mov.f32f32 r9.x, r7.w
-mov.f32f32 r9.y, r8.w
-(rpt5)nop
-(ss)nop
-sam (f32)(w)r8.w, r9.x, s#2, t#2
-(sy)cmps.f.lt r7.w, r9.z, r1.z
-cmps.f.lt r8.w, r9.z, r1.z
-(rpt1)nop
-cov.u32f32 r7.w, r7.w
-cov.u32f32 r8.w, r8.w
+mov.f32f32 r13.y, r1.z
+add.f r0.x, r0.x, (neg)r12.w
+mov.f32f32 r9.z, r13.z
+(rpt3)nop
+sam (f32)(w)r13.y, r13.y, s#2, t#2
+(sy)cmps.f.lt r12.x, r14.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r7.w, r7.w
-cmps.f.ne r8.w, r8.w, c14.y
+cov.u32f32 r12.x, r12.x
+cmps.f.lt r12.w, r14.x, r0.x
(rpt1)nop
-cmps.f.ne r7.w, r7.w, c14.y
-sel.b32 r8.w, r10.y, r8.w, r14.x
+cmps.f.ne r12.x, r12.x, c14.y
+cov.u32f32 r12.w, r12.w
(rpt1)nop
-sel.b32 r8.z, r8.z, r7.w, r14.y
-sel.b32 r7.y, r7.y, r7.w, r11.x
-add.f r1.z, r1.z, (neg)r8.w
+sel.b32 r13.x, r2.x, r12.x, r13.x
+cmps.f.ne r12.w, r12.w, c14.y
+sel.b32 r12.x, r2.y, r12.x, r12.z
nop
-add.f r1.w, r1.w, r8.z
-add.f r2.y, r2.y, r7.y
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r13.x
+sel.b32 r11.y, r12.y, r12.w, r11.y
+add.f r12.y, r9.z, r12.x
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r7.y, r1.w
-mov.f32f32 r7.w, r2.y
-(rpt1)nop
-mov.f32f32 r8.z, r7.y
-mov.f32f32 r8.w, r7.w
-(rpt5)nop
-(ss)nop
-sam (f32)(w)r8.z, r8.z, s#2, t#2
-(sy)cmps.f.lt r7.y, r9.y, r1.z
-cmps.f.lt r7.w, r9.y, r1.z
-(rpt1)nop
-cov.u32f32 r7.y, r7.y
-cov.u32f32 r7.w, r7.w
+mov.f32f32 r12.x, r1.z
+add.f r0.x, r0.x, (neg)r11.y
+mov.f32f32 r9.z, r12.y
+(rpt3)nop
+sam (f32)(w)r12.x, r12.x, s#2, t#2
+(sy)cmps.f.lt r11.y, r12.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r7.y, r7.y
-cmps.f.ne r7.w, r7.w, c14.y
+cov.u32f32 r11.y, r11.y
+(ss)cmps.f.lt r12.x, r12.w, r0.x
(rpt1)nop
-cmps.f.ne r7.y, r7.y, c14.y
-sel.b32 r7.w, r16.x, r7.w, r15.w
+cmps.f.ne r11.y, r11.y, c14.y
+cov.u32f32 r12.x, r12.x
(rpt1)nop
-sel.b32 r7.z, r7.z, r7.y, r16.z
-sel.b32 r7.x, r7.x, r7.y, r16.y
-add.f r1.z, r1.z, (neg)r7.w
+sel.b32 r11.x, r2.x, r11.y, r11.x
+cmps.f.ne r12.x, r12.x, c14.y
+sel.b32 r8.y, r2.y, r11.y, r8.y
nop
-add.f r1.w, r1.w, r7.z
-add.f r2.y, r2.y, r7.x
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r11.x
+sel.b32 r11.x, r11.w, r12.x, r11.z
+add.f r11.z, r9.z, r8.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r7.y, r2.y
-(rpt1)nop
-mov.f32f32 r7.z, r7.x
-mov.f32f32 r7.w, r7.y
-(rpt5)nop
-sam (f32)(w)r7.x, r7.z, s#2, t#2
-(sy)cmps.f.lt r7.x, r7.w, r1.z
-cmps.f.lt r7.y, r7.w, r1.z
-(rpt1)nop
-cov.u32f32 r7.x, r7.x
-cov.u32f32 r7.y, r7.y
+mov.f32f32 r11.y, r1.z
+add.f r0.x, r0.x, (neg)r11.x
+mov.f32f32 r8.y, r11.z
+(rpt3)nop
+sam (f32)(w)r11.x, r11.y, s#2, t#2
+(sy)cmps.f.lt r9.z, r11.w, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r7.x, r7.x
-cmps.f.ne r7.y, r7.y, c14.y
+cov.u32f32 r9.z, r9.z
+cmps.f.lt r11.x, r11.w, r0.x
(rpt1)nop
-cmps.f.ne r7.x, r7.x, c14.y
-sel.b32 r7.y, r15.x, r7.y, r14.w
+cmps.f.ne r9.z, r9.z, c14.y
+cov.u32f32 r11.x, r11.x
(rpt1)nop
-sel.b32 r2.z, r2.z, r7.x, r15.z
-sel.b32 r6.y, r6.y, r7.x, r15.y
-add.f r1.z, r1.z, (neg)r7.y
+sel.b32 r10.z, r2.x, r9.z, r10.z
+cmps.f.ne r11.x, r11.x, c14.y
+sel.b32 r9.z, r2.y, r9.z, r10.y
nop
-add.f r1.w, r1.w, r2.z
-add.f r2.y, r2.y, r6.y
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r10.z
+sel.b32 r1.w, r1.w, r11.x, r10.w
+add.f r10.z, r8.y, r9.z
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
+mov.f32f32 r10.y, r1.z
+add.f r0.x, r0.x, (neg)r1.w
+mov.f32f32 r1.w, r10.z
+(rpt3)nop
+sam (f32)(w)r10.y, r10.y, s#2, t#2
+(sy)cmps.f.lt r8.y, r11.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r6.y, r2.y
+cov.u32f32 r8.y, r8.y
+cmps.f.lt r9.z, r11.x, r0.x
(rpt1)nop
-mov.f32f32 r7.x, r2.z
-mov.f32f32 r7.y, r6.y
-(rpt5)nop
-(ss)nop
-sam (f32)(w)r7.x, r7.x, s#2, t#2
-(sy)cmps.f.lt r2.z, r7.w, r1.z
-cmps.f.lt r6.y, r7.w, r1.z
+cmps.f.ne r8.y, r8.y, c14.y
+cov.u32f32 r9.z, r9.z
(rpt1)nop
-cov.u32f32 r2.z, r2.z
-cov.u32f32 r6.y, r6.y
+sel.b32 r10.x, r2.x, r8.y, r10.x
+cmps.f.ne r9.z, r9.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r9.w
+nop
+add.f r1.z, r1.z, r10.x
+sel.b32 r8.z, r9.y, r9.z, r8.z
+add.f r9.z, r1.w, r8.y
+nop
+mov.f32f32 r9.y, r1.z
+add.f r0.x, r0.x, (neg)r8.z
+mov.f32f32 r1.w, r9.z
+(rpt3)nop
+sam (f32)(w)r9.y, r9.y, s#2, t#2
+(sy)cmps.f.lt r8.y, r10.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-mov.f32f32 r2.z, r2.z
-cmps.f.ne r6.y, r6.y, c14.y
+cov.u32f32 r8.y, r8.y
+cmps.f.lt r8.z, r10.x, r0.x
(rpt1)nop
-cmps.f.ne r2.z, r2.z, c14.y
-sel.b32 r6.y, r11.w, r6.y, r13.w
+cmps.f.ne r8.y, r8.y, c14.y
+cov.u32f32 r8.z, r8.z
(rpt1)nop
-sel.b32 r4.z, r4.z, r2.z, r14.z
-sel.b32 r2.z, r3.w, r2.z, r13.x
-add.f r1.z, r1.z, (neg)r6.y
+sel.b32 r9.x, r2.x, r8.y, r9.x
+cmps.f.ne r8.z, r8.z, c14.y
+sel.b32 r8.y, r2.y, r8.y, r8.w
nop
-add.f r1.w, r1.w, r4.z
-add.f r2.y, r2.y, r2.z
-mov.f32f32 r1.z, r1.z
+add.f r1.z, r1.z, r9.x
+sel.b32 r2.z, r2.w, r8.z, r2.z
+add.f r8.z, r1.w, r8.y
nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-(rpt1)nop
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r3.w, r2.y
+mov.f32f32 r8.y, r1.z
+add.f r0.x, r0.x, (neg)r2.z
+mov.f32f32 r1.w, r8.z
+(rpt3)nop
+sam (f32)(w)r8.y, r8.y, s#2, t#2
+(sy)cmps.f.lt r2.z, r9.x, r0.x
+mov.f32f32 r0.x, r0.x
(rpt1)nop
-(ss)mov.f32f32 r7.x, r2.z
-mov.f32f32 r7.y, r3.w
-(rpt5)nop
-sam (f32)(w)r7.x, r7.x, s#2, t#2
-(sy)cmps.f.lt r2.z, r7.w, r1.z
-(rpt2)nop
cov.u32f32 r2.z, r2.z
(rpt2)nop
cmps.f.ne r2.z, r2.z, c14.y
(rpt2)nop
-sel.b32 r3.w, r5.y, r2.z, r13.y
-sel.b32 r1.y, r2.w, r2.z, r1.y
+sel.b32 r1.y, r2.x, r2.z, r1.y
+sel.b32 r2.z, r2.y, r2.z, r7.w
(rpt1)nop
-add.f r1.w, r1.w, r3.w
-add.f r1.y, r2.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.y, r1.y
+add.f r1.y, r1.z, r1.y
+add.f r1.w, r1.w, r2.z
(rpt1)nop
-add.f r2.y, r1.w, (neg)r2.x
+mov.f32f32 r1.z, r1.y
mov.f32f32 r2.z, r1.w
-add.f r2.w, r1.y, (neg)r0.y
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r2.y, r2.y
-(ss)mov.f32f32 r7.x, r2.z
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r7.y, r3.w
-mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mov.f32f32 r7.z, r2.y
-mov.f32f32 r2.y, r2.z
-sam (f32)(w)r8.z, r7.x, s#2, t#2
-(sy)add.f r2.z, c15.z, r9.y
-add.f r1.z, r9.y, (neg)r1.z
-nop
-mov.f32f32 r7.w, r2.y
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r1.z, r1.z
-(rpt3)nop
-sam (f32)(w)r7.x, r7.z, s#2, t#2
-(sy)add.f r2.y, r2.y, (neg)r7.w
-(rpt2)nop
-mov.f32f32 r2.y, r2.y
+(rpt1)nop
+(ss)add.f r8.y, r1.z, (neg)r2.x
+add.f r8.z, r2.z, (neg)r2.y
+(rpt1)nop
+sam (f32)(w)r8.w, r1.z, s#2, t#2
+(sy)(ss)add.f r1.z, c15.z, r9.z
+add.f r0.x, r9.z, (neg)r0.x
+(rpt1)nop
+sam (f32)(w)r8.y, r8.y, s#2, t#2
+(sy)add.f r1.z, r1.z, (neg)r9.x
(rpt5)nop
-rcp r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mul.f r1.z, r1.z, r2.y
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
+rcp r1.z, r1.z
+(ss)mul.f r0.x, r0.x, r1.z
(rpt2)nop
-mul.f r2.x, r2.x, r1.z
-mul.f r0.y, r0.y, r1.z
+(ss)mov.f32f32 r1.z, r0.x
+mul.f r0.x, r2.x, r0.x
(rpt1)nop
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-add.f r1.z, r1.w, (neg)r1.z
-add.f r0.y, r1.y, (neg)r0.y
+mul.f r1.z, r2.y, r1.z
+add.f r1.w, r1.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r1.y, r1.z
-mov.f32f32 r0.y, r0.y
+add.f r2.x, r2.z, (neg)r1.z
+mov.f32f32 r1.y, r1.w
(rpt1)nop
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r0.y
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.z, r1.z
mov.f32f32 r1.z, r2.x
-mov.f32f32 r7.y, r0.y
+mov.f32f32 r8.y, r1.y
nop
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r2.x, r1.z
-nop
-sam (f32)(xyz)r8.z, r2.y, s#2, t#2
-(sy)mad.f32 r0.y, c16.x, r8.w, c16.y
-mad.f32 r1.y, c16.x, r8.z, c16.y
-sam (f32)(xyzw)r7.x, r7.x, s#0, t#0
-(sy)cmps.f.lt r1.z, r7.w, c15.w
-(ss)mov.f32f32 r2.y, r7.w
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyz)r9.y, r1.w, s#3, t#3
-(sy)(ss)mul.f r1.w, c8.y, r9.z
-mul.f r2.x, c8.x, r9.y
-cov.u32f32 r1.z, r1.z
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r1.y, r1.y
-mul.f r2.z, c8.z, r9.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mul.f r2.w, r4.y, r1.y
-mul.f r3.z, r3.z, r1.y
-mad.f32 r0.x, r0.x, r0.y, r2.w
-mad.f32 r2.w, r3.x, r0.y, r3.z
-mul.f r1.y, r4.w, r1.y
-cmps.f.ne p0.x, r1.z, r13.z
-mov.f32f32 r0.x, r0.x
+sam (f32)(xyzw)r2.x, r1.w, s#0, t#0
+(sy)cmps.f.lt r0.x, r2.w, c15.w
+mov.f32f32 r8.z, r1.z
+(rpt1)nop
+sam (f32)(xyz)r8.w, r1.y, s#2, t#2
+(sy)(ss)mad.f32 r1.y, c16.x, r8.w, c16.y
+cov.u32f32 r0.x, r0.x
mad.f32 r1.z, c16.x, r9.x, c16.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.y, r3.y, r0.y, r1.y
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r1.z, r1.z
+mad.f32 r1.w, c16.x, r9.y, c16.y
+mov.f32f32 r7.w, r1.y
+cmps.f.ne p0.x, r0.x, r4.w
+absneg.f r0.x, (neg)r1.z
+mul.f r1.y, r4.x, r1.y
+mul.f r1.z, r4.z, r7.w
+mul.f r4.x, r5.y, r7.w
+mad.f32 r0.y, r0.y, r0.x, r1.z
+mov.f32f32 r1.z, r1.w
+mad.f32 r4.x, r5.z, r0.x, r4.x
+mad.f32 r0.x, r3.w, r0.x, r1.y
kill p0.x
-mov.f32f32 r3.w, r2.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, r5.z, r1.z, r0.x
-mad.f32 r2.y, r6.w, r1.z, r2.w
-mad.f32 r0.y, r6.z, r1.z, r0.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r6.x, r1.z, r0.y
+mad.f32 r1.y, r5.w, r1.z, r4.x
+mad.f32 r0.x, r8.x, r1.w, r0.x
nop
-mul.f r2.y, r0.x, r0.x
-nop
-mad.f32 r2.y, r1.z, r1.z, r2.y
-(rpt2)nop
-mov.f32f32 r2.y, r2.y
-nop
-mad.f32 r2.y, r0.y, r0.y, r2.y
+mov.f32f32 r1.z, r0.y
+mov.f32f32 r1.w, r1.y
+mov.f32f32 r3.w, r0.x
+sam (f32)(xyz)r5.y, r8.y, s#3, t#3
+(sy)mul.f r4.x, c8.z, r5.w
+mul.f r0.y, r0.y, r1.z
+mul.f r4.z, c8.y, r5.z
+mad.f32 r0.y, r1.y, r1.w, r0.y
+mul.f r1.y, c8.x, r5.y
+mad.f32 r0.y, r3.w, r3.w, r0.y
(rpt5)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-(rpt2)nop
-mul.f r0.x, r0.x, r2.y
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r2.y, r0.x, r0.x
-mul.f r2.w, (neg)c9.x, r0.x
-mad.f32 r2.y, r1.z, r1.z, r2.y
-mad.f32 r2.w, (neg)c9.y, r1.z, r2.w
+rsq r0.y, r0.y
+(ss)mov.f32f32 r3.w, r0.y
+mul.f r0.x, r0.x, r0.y
(rpt1)nop
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.y, r0.y, r0.y, r2.y
-mad.f32 r2.w, (neg)c9.z, r0.y, r2.w
-(rpt4)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-max.f r2.w, r2.w, c14.y
-(rpt1)nop
-mul.f r0.x, r0.x, r2.y
-mov.f32f32 r2.w, r2.w
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.y, c8.z, r2.w, (neg)r12.x
-mad.f32 r4.y, c8.y, r2.w, (neg)r8.y
-mad.f32 r2.w, c8.x, r2.w, (neg)r1.x
-mul.f r0.x, r0.x, r4.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r0.x, r1.z, r12.y, r0.x
-mad.f32 r1.z, c12.x, r2.y, r12.x
-mad.f32 r2.y, c12.x, r4.x, r8.y
-mad.f32 r1.x, c12.x, r2.w, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.x, r0.y, r12.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-max.f r0.x, c14.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.y, r1.z, r3.w
+mul.f r1.z, r1.w, r3.w
+mov.f32f32 r1.w, r0.x
+nop
+mov.f32f32 r3.w, r0.y
+mul.f r0.y, (neg)c9.x, r0.y
+mov.f32f32 r4.w, r1.z
+nop
+mul.f r5.y, r3.w, r3.w
+mad.f32 r0.y, (neg)c9.y, r1.z, r0.y
+mad.f32 r1.z, r4.w, r4.w, r5.y
+mad.f32 r0.x, (neg)c9.z, r0.x, r0.y
+mad.f32 r0.y, r1.w, r1.w, r1.z
(rpt5)nop
-log2 r0.x, r0.x
-(ss)mul.f r0.x, c12.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+rsq r0.y, r0.y
+(ss)mov.f32f32 r1.z, r0.y
+max.f r0.x, r0.x, c14.y
+(ss)mul.f r0.y, r1.w, r0.y
+nop
+mul.f r1.w, r3.w, r1.z
+mov.f32f32 r3.w, r0.x
+mul.f r1.z, r4.w, r1.z
+mad.f32 r0.x, c8.x, r0.x, (neg)r1.x
+mul.f r1.w, r1.w, r6.w
+mad.f32 r4.w, c8.z, r3.w, (neg)r6.y
+mad.f32 r1.z, r1.z, r7.x, r1.w
+mad.f32 r1.w, c8.y, r3.w, (neg)r6.z
+mad.f32 r0.y, r0.y, r7.z, r1.z
+mad.f32 r1.z, c12.x, r4.w, r6.y
+mad.f32 r0.x, c12.x, r0.x, r1.x
+nop
+max.f r0.y, c14.y, r0.y
+mad.f32 r1.x, c12.x, r1.w, r6.z
+(rpt4)nop
+log2 r0.y, r0.y
+(ss)mul.f r0.y, c12.y, r0.y
(rpt5)nop
-exp2 r0.x, r0.x
-(ss)mul.f r0.y, r1.y, r0.x
-mul.f r1.y, r1.w, r0.x
-mad.f32 r0.y, r7.z, r1.z, r0.y
-mad.f32 r1.y, r7.y, r2.y, r1.y
-(ss)mul.f r0.x, r2.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, r7.x, r1.x, r0.x
-nop
-mul.f r0.y, r0.y, r0.w
-mul.f r1.x, r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.y, c7.z, r7.z, r0.y
-mad.f32 r1.x, c7.y, r7.y, r1.x
+exp2 r0.y, r0.y
+(ss)mul.f r1.w, r4.x, r0.y
+mul.f r3.w, r4.z, r0.y
+mad.f32 r1.z, r2.z, r1.z, r1.w
+mad.f32 r1.x, r2.y, r1.x, r3.w
+(ss)mul.f r0.y, r1.y, r0.y
+nop
+mul.f r1.y, r1.z, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r1.y, c7.z, r2.z, r1.y
+mad.f32 r1.x, c7.y, r2.y, r1.x
+mad.f32 r0.x, r2.x, r0.x, r0.y
+nop
+mul.f r0.y, r0.z, r1.y
+mul.f r1.x, r0.z, r1.x
mul.f r0.x, r0.x, r0.w
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.z, r0.y
-mul.f r0.w, r0.z, r0.w
-mad.f32 r0.x, c7.x, r7.x, r0.x
+add.f r0.y, r0.y, r7.y
+add.f r0.w, r1.x, r3.y
+mad.f32 r0.x, c7.x, r2.x, r0.x
nop
-add.f r0.y, r0.y, r5.x
-add.f r0.w, r0.w, r5.w
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.y, r8.x
-mul.f r0.w, r0.w, r8.x
+mul.f r0.y, r0.y, r5.x
+mul.f r0.w, r0.w, r5.x
mul.f r0.x, r0.z, r0.x
nop
-mul.f r0.y, r0.y, c6.z
-mul.f r0.z, r0.w, c6.y
-add.f r0.x, r0.x, r6.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r8.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, c6.x
-nop
-mov.f32f32 r3.z, r0.y
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r2.z, r0.y, c6.z
+mul.f r2.y, r0.w, c6.y
+add.f r0.x, r0.x, r3.z
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.x, r0.x, r5.x
(rpt2)nop
-mov.f32f32 r3.x, r0.x
+mul.f r2.x, r0.x, c6.x
end
-; FRAG: outputs: r3.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.z (5:9,cm=f,il=8,b=1) r63.y (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r2.y (5:12,cm=f,il=20,b=1) r7.x (5:13,cm=f,il=24,b=1) r3.y (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
-; FRAG: 1147 instructions, 0 half, 65 full
-; pos (bary): r1.x
-; color: r3.x
-; fragcoord: r0.x
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r3.x (5:9,cm=f,il=8,b=1) r63.z (5:10,cm=f,il=12,b=1) r3.x (5:11,cm=f,il=16,b=1) r1.w (5:12,cm=f,il=20,b=1) r6.x (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1) r7.x (5:15,cm=f,il=32,b=1)
+; FRAG: 766 instructions, 0 half, 20 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm
index ad4df45..b2e35b3 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-73.asm
@@ -6,346 +6,250 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in11
-@in(r2.z) in12
-@in(r2.w) in13
-@out(r11.z) out0
-@out(r11.w) out1
-@out(r12.x) out2
-@out(r12.y) out3
-@out(r7.x) out4
-@out(r7.y) out5
-@out(r7.z) out6
-@out(r7.w) out7
-@out(r3.x) out8
-@out(r3.y) out9
-@out(r3.z) out10
-@out(r3.w) out11
-@out(r9.z) out12
-@out(r9.w) out13
-@out(r10.x) out14
-@out(r10.y) out15
-@out(r1.y) out16
-@out(r1.z) out17
-@out(r1.w) out18
-@out(r2.x) out19
-@out(r12.z) out20
-@out(r12.w) out21
-@out(r13.x) out22
-@out(r13.y) out23
-@out(r8.z) out24
-@out(r8.w) out25
-@out(r9.x) out26
-@out(r9.y) out27
-@out(r10.z) out28
-@out(r10.w) out29
-@out(r11.x) out30
-@out(r11.y) out31
-(sy)(ss)floor.f r3.x, c15.z
-absneg.f r3.y, (abs)c18.x
-absneg.f r3.z, (abs)c18.y
-floor.f r3.w, c15.x
-add.f r3.x, c15.z, (neg)r3.x
-mul.f r4.x, c12.x, r1.z
-mul.f r4.y, c12.x, r0.w
-add.f r3.w, c15.x, (neg)r3.w
-mov.f32f32 r3.x, r3.x
-add.f r3.y, r3.y, r3.z
-mad.f32 r3.z, c13.x, r1.w, r4.x
-mad.f32 r4.x, c13.x, r1.x, r4.y
-max.f r3.x, r3.x, c19.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-min.f r3.x, r3.x, c23.y
-mul.f r4.y, c17.x, r3.y
-mul.f r4.z, c12.z, r0.x
-max.f r3.w, r3.w, c19.y
-max.f r3.x, r3.x, c19.x
-mad.f32 r4.z, c13.z, r0.y, r4.z
-mad.f32 r3.z, c14.x, r2.x, r3.z
-mad.f32 r4.x, c14.x, r1.y, r4.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r4.z, c14.z, r0.z, r4.z
-min.f r3.w, r3.w, c23.y
-mov.f32f32 r3.z, r3.z
-mul.f r3.x, c17.x, r3.x
-add.f r4.z, r4.z, c15.z
-max.f r3.w, r3.w, c19.x
-mad.f32 r4.y, c19.w, r4.y, r4.z
-mov.f32f32 r3.x, r3.x
-mul.f r4.w, c12.y, r0.w
-mov.f32f32 r4.x, r4.x
-absneg.f r5.x, (neg)c5.x
-mad.f32 r3.x, c19.z, r3.x, c15.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.w, r3.w
-mul.f r5.y, c12.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r4.y
-mad.f32 r5.y, c13.x, r0.y, r5.y
-mad.f32 r4.w, c13.y, r1.x, r4.w
-mov.f32f32 r3.x, r3.x
-add.f r4.y, r4.y, c20.x
-mad.f32 r5.y, c14.x, r0.z, r5.y
-mad.f32 r4.w, c14.y, r1.y, r4.w
-add.f r3.x, r3.x, c20.x
-floor.f r5.z, r4.y
-add.f r5.y, r5.y, c15.x
-mov.f32f32 r4.w, r4.w
-floor.f r5.w, r3.x
-add.f r4.y, r4.y, (neg)r5.z
-mad.f32 r3.w, c17.x, r3.w, r5.y
-mul.f r5.z, r4.w, r3.z
-add.f r3.x, r3.x, (neg)r5.w
-mad.f32 r4.y, c20.y, r4.y, c20.z
-add.f r3.w, r3.w, c20.x
-mul.f r5.w, c12.y, r1.z
-mad.f32 r3.x, c20.y, r3.x, c20.z
-absneg.f r4.y, (abs)r4.y
-floor.f r6.x, r3.w
-mad.f32 r5.w, c13.y, r1.w, r5.w
-absneg.f r3.x, (abs)r3.x
-mul.f r6.y, c20.y, r4.y
-add.f r3.w, r3.w, (neg)r6.x
-mul.f r4.y, r4.y, r4.y
-mul.f r6.x, c20.y, r3.x
-add.f r6.y, c20.w, (neg)r6.y
-mad.f32 r3.w, c20.y, r3.w, c20.z
-mul.f r3.x, r3.x, r3.x
-add.f r6.x, c20.w, (neg)r6.x
-mul.f r4.y, r4.y, r6.y
-absneg.f r3.w, (abs)r3.w
-mov.f32f32 r5.w, r5.w
-mul.f r3.x, r3.x, r6.x
-mov.f32f32 r4.y, r4.y
-mul.f r6.x, r0.x, r0.z
-mul.f r6.y, r0.y, c21.x
-mov.f32f32 r3.x, r3.x
-mul.f r6.z, r0.y, c22.x
-mul.f r6.w, c20.y, r3.w
-mul.f r6.x, r6.x, r6.y
-mul.f r3.w, r3.w, r3.w
-mov.f32f32 r6.y, r6.z
-add.f r6.z, c20.w, (neg)r6.w
-mov.f32f32 r6.x, r6.x
-mad.f32 r5.w, c14.y, r2.x, r5.w
-mov.f32f32 r6.y, r6.y
-mul.f r3.w, r3.w, r6.z
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.w, r5.w
-max.f r6.y, r6.y, c19.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r6.x, r6.x
-mad.f32 r5.z, r4.x, r5.w, (neg)r5.z
-min.f r6.y, r6.y, c23.y
-mul.f r6.z, r4.x, r5.x
-absneg.f r6.w, (neg)c5.y
-mul.f r1.z, c12.z, r1.z
-min.f r6.y, r6.y, c19.w
-max.f r6.x, r6.x, c19.y
-mov.f32f32 r5.z, r5.z
-mad.f32 r6.z, r4.w, r6.w, r6.z
-mov.f32f32 r6.y, r6.y
-min.f r6.x, r6.x, c23.y
-mul.f r5.z, r5.z, r2.y
-mov.f32f32 r6.z, r6.z
-mul.f r3.x, r3.x, r6.y
-min.f r6.x, r6.x, c21.y
-mov.f32f32 r5.z, r5.z
+@in(r8.x) in8
+@in(r8.y) in9
+@in(r8.z) in10
+@in(r8.w) in11
+@in(r5.z) in12
+@in(r5.w) in13
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@out(r4.x) out16
+@out(r4.y) out17
+@out(r4.z) out18
+@out(r4.w) out19
+@out(r5.x) out20
+@out(r5.y) out21
+@out(r5.z) out22
+@out(r5.w) out23
+@out(r6.x) out24
+@out(r6.y) out25
+@out(r6.z) out26
+@out(r6.w) out27
+@out(r7.x) out28
+@out(r7.y) out29
+@out(r7.z) out30
+@out(r7.w) out31
+@const(c19.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c20.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c21.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c22.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r1.z, c15.z
+floor.f r1.w, c15.x
+absneg.f r2.x, (abs)c18.x
+absneg.f r2.y, (abs)c18.y
+add.f r1.z, c15.z, (neg)r1.z
+add.f r1.w, c15.x, (neg)r1.w
+mul.f r2.z, c12.x, r0.w
+add.f r2.x, r2.x, r2.y
+max.f r1.z, r1.z, c19.y
+max.f r1.w, r1.w, c19.y
+mad.f32 r2.y, c13.x, r1.x, r2.z
+mul.f r2.z, c17.x, r2.x
+min.f r1.z, r1.z, c23.y
+min.f r1.w, r1.w, c23.y
+mul.f r2.w, c12.z, r0.x
+mad.f32 r2.y, c14.x, r1.y, r2.y
+max.f r1.z, r1.z, c19.x
+max.f r1.w, r1.w, c19.x
+mad.f32 r2.w, c13.z, r0.y, r2.w
+mul.f r3.x, c12.x, r0.x
+mul.f r1.z, c17.x, r1.z
+mad.f32 r3.x, c13.x, r0.y, r3.x
+mad.f32 r2.w, c14.z, r0.z, r2.w
+mad.f32 r3.x, c14.x, r0.z, r3.x
+mad.f32 r1.z, c19.z, r1.z, c15.x
+absneg.f r3.y, (neg)c5.x
+add.f r2.w, r2.w, c15.z
+mov.f32f32 r2.x, r2.x
+add.f r1.z, r1.z, c20.x
+add.f r3.x, r3.x, c15.x
+mad.f32 r2.z, c19.w, r2.z, r2.w
+mad.f32 r1.w, c17.x, r1.w, r3.x
+floor.f r3.z, r1.z
+mul.f r3.w, r2.y, r3.y
+mul.f r4.x, c12.y, r0.w
+add.f r1.w, r1.w, c20.x
+add.f r1.z, r1.z, (neg)r3.z
+add.f r2.z, r2.z, c20.x
+mad.f32 r3.z, c13.y, r1.x, r4.x
+max.f r2.x, r2.x, c21.z
+mad.f32 r1.z, c20.y, r1.z, c20.z
+floor.f r4.x, r1.w
+floor.f r4.y, r2.z
+mad.f32 r3.z, c14.y, r1.y, r3.z
+absneg.f r1.z, (abs)r1.z
+add.f r1.w, r1.w, (neg)r4.x
+add.f r2.z, r2.z, (neg)r4.y
+absneg.f r4.x, (neg)c5.y
+mul.f r4.y, c20.y, r1.z
+mad.f32 r1.w, c20.y, r1.w, c20.z
+mad.f32 r2.z, c20.y, r2.z, c20.z
+mul.f r1.z, r1.z, r1.z
+add.f r4.y, c20.w, (neg)r4.y
+absneg.f r1.w, (abs)r1.w
+absneg.f r2.z, (abs)r2.z
+mad.f32 r3.w, r3.z, r4.x, r3.w
+mul.f r1.z, r1.z, r4.y
+mul.f r4.y, r0.y, c22.x
+mul.f r4.z, c20.y, r1.w
+mul.f r4.w, c20.y, r2.z
+mul.f r1.w, r1.w, r1.w
+max.f r4.y, r4.y, c19.y
+add.f r4.z, c20.w, (neg)r4.z
+add.f r4.w, c20.w, (neg)r4.w
+mul.f r2.z, r2.z, r2.z
+min.f r4.y, r4.y, c23.y
+mul.f r1.w, r1.w, r4.z
+mul.f r4.z, r0.x, r0.z
+mul.f r4.w, r2.z, r4.w
+min.f r2.z, r4.y, c19.w
+mul.f r4.y, r0.y, c21.x
mul.f r0.w, c12.z, r0.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.z, r5.z
+min.f r5.x, r2.x, c21.w
+mul.f r1.z, r1.z, r2.z
+mul.f r2.x, r4.z, r4.y
mad.f32 r0.w, c13.z, r1.x, r0.w
-mad.f32 r1.x, c13.z, r1.w, r1.z
-mul.f r1.z, r3.w, r6.x
-mul.f r1.w, r4.y, r6.x
-max.f r3.y, r3.y, c21.z
-mov.f32f32 r3.w, r5.z
-mov.f32f32 r1.z, r1.z
+mov.f32f32 r1.x, r5.x
+mov.f32f32 r4.y, r1.z
+max.f r2.z, r2.x, c19.y
mad.f32 r0.w, c14.z, r1.y, r0.w
-mad.f32 r1.y, c18.x, r3.x, r1.z
-mad.f32 r1.z, c18.y, r3.x, r1.z
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r7.w, r3.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-min.f r3.x, r3.x, c21.w
-mov.f32f32 r0.w, r0.w
-absneg.f r3.y, (neg)c5.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.w, c12.y, r0.x
-mad.f32 r4.y, r0.w, r3.y, r6.z
-mad.f32 r1.x, c14.z, r2.x, r1.x
-mad.f32 r2.x, r1.w, r3.x, r5.y
-mad.f32 r4.z, r1.w, r3.x, r4.z
-mad.f32 r3.w, c13.y, r0.y, r3.w
-mov.f32f32 r4.y, r4.y
-add.f r1.y, r2.x, r1.y
-add.f r1.z, r4.z, r1.z
-mad.f32 r2.x, c14.y, r0.z, r3.w
-max.f r3.w, c19.y, r4.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-add.f r2.x, r2.x, c15.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, r1.y
-mul.f r4.z, c8.w, r1.y
-mul.f r5.y, c8.z, r1.y
-mul.f r5.z, c8.y, r1.y
-add.f r4.y, c4.x, (neg)r4.y
-mad.f32 r1.w, r1.w, r3.x, r2.x
-mul.f r2.x, c8.x, r1.y
-mov.f32f32 r3.x, r1.z
-mul.f r6.x, r4.y, r4.y
-add.f r6.y, c4.y, (neg)r1.w
-mad.f32 r6.z, c9.w, r1.w, r4.z
-mad.f32 r8.x, c9.z, r1.w, r5.y
-mad.f32 r5.z, c9.y, r1.w, r5.z
-mad.f32 r6.x, r6.y, r6.y, r6.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.x, r6.x
-add.f r3.x, c4.z, (neg)r3.x
-mad.f32 r6.z, c10.w, r1.z, r6.z
-mad.f32 r8.x, c10.z, r1.z, r8.x
-mad.f32 r5.z, c10.y, r1.z, r5.z
-mad.f32 r6.x, r3.x, r3.x, r6.x
+absneg.f r1.y, (neg)c5.z
+mov.f32f32 r2.x, r2.y
+min.f r4.z, r2.z, c23.y
+mul.f r5.y, c12.z, r8.x
+mov.f32f32 r2.z, r0.w
+mov.f32f32 r2.y, r3.z
+min.f r3.z, r4.z, c21.y
+mad.f32 r0.w, r0.w, r1.y, r3.w
+mad.f32 r4.z, c13.z, r8.y, r5.y
+mul.f r9.x, c12.y, r8.x
+mov.f32f32 r3.w, r3.z
+mul.f r3.z, r4.w, r3.z
+mul.f r4.w, c12.y, r0.x
+max.f r9.y, c19.y, r0.w
+mul.f r0.w, r1.w, r3.w
+mad.f32 r1.w, c13.y, r0.y, r4.w
+mad.f32 r3.x, r3.z, r1.x, r3.x
+mad.f32 r1.w, c14.y, r0.z, r1.w
+mov.f32f32 r3.w, r0.w
+mad.f32 r0.w, c18.y, r1.z, r0.w
+mad.f32 r1.z, c18.x, r4.y, r3.w
+mad.f32 r2.w, r3.z, r5.x, r2.w
+add.f r1.w, r1.w, c15.y
+mov.f32f32 r4.y, r9.y
+add.f r1.z, r3.x, r1.z
+add.f r2.w, r2.w, r0.w
+mad.f32 r1.x, r3.z, r1.x, r1.w
+nop
+mov.f32f32 r1.w, r1.z
+mul.f r1.z, c0.x, r1.z
+mov.f32f32 r4.w, r2.w
+add.f r6.y, c4.y, (neg)r1.x
+add.f r6.x, c4.x, (neg)r1.w
+mul.f r0.w, c8.y, r1.w
+mul.f r3.x, c8.x, r1.w
+mul.f r7.w, c8.w, r1.w
+mul.f r3.z, r6.x, r6.x
+mad.f32 r0.w, c9.y, r1.x, r0.w
+mad.f32 r3.z, r6.y, r6.y, r3.z
+add.f r6.z, c4.z, (neg)r4.w
+mad.f32 r0.w, c10.y, r4.w, r0.w
+mad.f32 r3.x, c9.x, r1.x, r3.x
+mad.f32 r3.w, c9.w, r1.x, r7.w
+mad.f32 r3.z, r6.z, r6.z, r3.z
mul.f r0.x, c12.w, r0.x
-mad.f32 r2.x, c9.x, r1.w, r2.x
+mad.f32 r3.x, c10.x, r4.w, r3.x
+mad.f32 r3.w, c10.w, r4.w, r3.w
+mul.f r7.z, c8.z, r1.w
mad.f32 r0.x, c13.w, r0.y, r0.x
-mov.f32f32 r0.y, r3.x
-mov.f32f32 r8.y, r4.y
+mul.f r0.y, c0.w, r1.w
+rsq r3.z, r3.z
+(ss)mov.f32f32 r5.x, r3.z
+mad.f32 r3.y, r6.x, r3.z, r3.y
mad.f32 r0.x, c14.w, r0.z, r0.x
-rsq r0.z, r6.x
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r8.z, r8.y
-mad.f32 r0.y, r4.y, r0.z, r5.x
-mad.f32 r4.y, r6.y, r0.z, r6.w
-mad.f32 r0.z, r3.x, r0.z, r3.y
-add.f r0.x, r0.x, c15.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r3.y, c11.w, r0.x, r6.z
-mul.f r4.y, r0.y, r0.y
-mad.f32 r5.x, c11.z, r0.x, r8.x
-mad.f32 r4.y, r3.x, r3.x, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r5.z, c11.y, r0.x, r5.z
-mad.f32 r2.x, c10.x, r1.z, r2.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r4.y, r0.z, r0.z, r4.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.z, r5.z
-mad.f32 r2.x, c11.x, r0.x, r2.x
-(ss)mul.f r6.x, c0.w, r1.y
-mul.f r6.z, c0.z, r1.y
-mul.f r6.w, c0.y, r1.y
-rsq r4.y, r4.y
-(ss)mov.f32f32 r4.y, r4.y
-mov.f32f32 r10.y, r3.y
-mov.f32f32 r3.y, r5.x
-mul.f r5.x, r5.z, c16.y
-mul.f r0.z, r0.z, r4.y
-mul.f r3.x, r3.x, r4.y
-mul.f r0.y, r0.y, r4.y
-mov.f32f32 r10.x, r3.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r9.w, r5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r1.x, r7.z
+(ss)mad.f32 r3.z, r6.y, r5.x, r4.x
+mov.f32f32 r4.x, r3.y
+mad.f32 r1.y, r6.z, r5.x, r1.y
nop
-mov.f32f32 r10.w, r0.z
-mov.f32f32 r10.z, r3.x
-mov.f32f32 r9.y, r0.y
-mov.f32f32 r0.y, r2.x
-mad.f32 r0.z, c1.w, r1.w, r6.x
-mad.f32 r2.x, c1.z, r1.w, r6.z
-mad.f32 r3.x, c1.y, r1.w, r6.w
-mul.f r0.y, r0.y, c16.x
-mad.f32 r0.z, c2.w, r1.z, r0.z
-mad.f32 r2.x, c2.z, r1.z, r2.x
-mad.f32 r3.x, c2.y, r1.z, r3.x
-mov.f32f32 r9.z, r0.y
-mad.f32 r0.y, c3.w, r0.x, r0.z
-mad.f32 r0.z, c3.z, r0.x, r2.x
-mad.f32 r2.x, c3.y, r0.x, r3.x
-mul.f r3.x, c0.x, r1.y
-mov.f32f32 r12.y, r0.y
-mov.f32f32 r12.x, r0.z
-mov.f32f32 r11.w, r2.x
-mad.f32 r0.y, c1.x, r1.w, r3.x
-mad.f32 r0.z, c7.x, r1.z, c7.y
-mad.f32 r0.y, c2.x, r1.z, r0.y
-mad.f32 r1.y, c7.x, r1.y, c7.y
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r11.y, r4.z
-mov.f32f32 r11.x, r5.y
-mov.f32f32 r11.z, r0.x
-mov.f32f32 r12.w, r0.y
-mov.f32f32 r0.x, r1.y
-mov.f32f32 r0.y, r6.y
-(rpt1)nop
-mov.f32f32 r12.z, r0.x
-mov.f32f32 r8.w, r0.y
-mul.f r0.x, r3.w, c6.z
-mul.f r0.y, r3.w, c6.y
-mul.f r0.z, r3.w, c6.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r7.y, r0.y
-mov.f32f32 r7.x, r0.z
-mul.f r0.x, r4.x, r1.x
-mul.f r0.y, r0.w, r5.w
-mad.f32 r0.x, r0.w, r3.z, (neg)r0.x
-mad.f32 r0.y, r4.w, r1.x, (neg)r0.y
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r5.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r1.x
-mul.f r0.x, r0.x, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r0.z, r3.z
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r5.x, r3.z
+mul.f r4.x, r4.x, r4.x
+mov.f32f32 r5.y, r1.y
+add.f r0.x, r0.x, c15.w
+mad.f32 r3.z, r3.z, r5.x, r4.x
+mad.f32 r0.z, c10.z, r4.w, r0.z
+mad.f32 r1.y, r1.y, r5.y, r3.z
+mad.f32 r0.w, c11.y, r0.x, r0.w
+mad.f32 r3.x, c11.x, r0.x, r3.x
+mad.f32 r3.w, c11.w, r0.x, r3.w
+mad.f32 r3.z, c11.z, r0.x, r0.z
+mad.f32 r0.y, c1.w, r1.x, r0.y
+mul.f r0.z, c0.z, r1.w
+rsq r1.y, r1.y
+(ss)mov.f32f32 r4.x, r1.y
+mul.f r6.w, r3.y, r1.y
+mul.f r3.y, r0.w, c16.y
+mul.f r3.x, r3.x, c16.x
+mul.f r7.y, r5.y, r4.x
+mul.f r7.x, r5.x, r4.x
+mad.f32 r0.y, c2.w, r4.w, r0.y
+mad.f32 r0.z, c1.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r0.x, r0.y
+mad.f32 r0.y, c2.z, r4.w, r0.z
+(ss)mul.f r1.y, c0.y, r1.w
+mad.f32 r0.z, c3.z, r0.x, r0.y
+mad.f32 r0.y, c1.y, r1.x, r1.y
+mad.f32 r1.x, c1.x, r1.x, r1.z
+mad.f32 r0.y, c2.y, r4.w, r0.y
+mad.f32 r1.x, c2.x, r2.w, r1.x
+mad.f32 r0.y, c3.y, r0.x, r0.y
+mad.f32 r0.x, c3.x, r0.x, r1.x
+mad.f32 r5.x, c7.x, r1.w, c7.y
+mad.f32 r5.y, c7.x, r4.w, c7.y
+mul.f r1.z, r4.y, c6.z
+mul.f r1.y, r4.y, c6.y
+mul.f r1.x, r9.y, c6.x
+mad.f32 r1.w, c14.z, r8.z, r4.z
+mad.f32 r2.w, c13.y, r8.y, r9.x
+mul.f r4.x, c12.x, r8.x
+mad.f32 r2.w, c14.y, r8.z, r2.w
+mov.f32f32 r4.z, r1.w
+mad.f32 r4.x, c13.x, r8.y, r4.x
(rpt1)nop
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r0.x, r4.w
-mov.f32f32 r0.y, r4.x
+mul.f r4.w, r2.x, r4.z
+mad.f32 r8.x, c14.x, r8.z, r4.x
+mov.f32f32 r4.y, r2.w
(rpt1)nop
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r0.x, r2.w
-mov.f32f32 r0.y, r2.z
+mov.f32f32 r4.x, r8.x
+mul.f r8.y, r2.z, r4.y
+mul.f r8.x, r2.y, r8.x
+mad.f32 r1.w, r2.y, r1.w, (neg)r8.y
+mad.f32 r4.w, r2.z, r4.x, (neg)r4.w
+mad.f32 r8.x, r2.x, r2.w, (neg)r8.x
(rpt1)nop
-mov.f32f32 r13.y, r0.x
-mov.f32f32 r13.x, r0.y
+mul.f r4.w, r4.w, r8.w
+mul.f r2.w, r1.w, r8.w
+mul.f r1.w, r8.x, r8.w
end
-; VERT: outputs: r11.z (0:0) r7.x (5:9) r3.x (5:10) r9.z (5:11) r1.y (5:12) r12.z (5:13) r8.z (5:14) r10.z (5:15)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=f,il=16,b=0) r2.z (0:0,cm=3,il=20,b=0)
-; VERT: 304 instructions, 0 half, 14 full
-; pos: r11.z
+nop
+nop
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11) r4.x (5:12) r5.x (5:13) r6.x (5:14) r7.x (5:15)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r8.x (0:0,cm=f,il=16,b=0) r5.z (0:0,cm=3,il=20,b=0)
+; VERT: 201 instructions, 0 half, 10 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm
index 0274fe9..be30c1a 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-74.asm
@@ -4,206 +4,143 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.z, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r3.w, c9.x, r0.z
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r3.w, r3.x, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.z, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.w, r3.x
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r5.z, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r3.x
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.x, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.x, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.x, r3.x
-bary.f r3.x, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.x, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r1.w
-mov.f32f32 r5.w, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r5.y, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r3.x
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r1.w, r4.w, s#2, t#2
-(sy)mov.f32f32 r1.w, r1.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r2.y, r6.y, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-sam.s (f32)(x)r2.w, r5.z, s#2, t#2
-(sy)mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.z
-add.f r3.y, c12.y, (neg)r0.y
-add.f r3.z, c12.y, (neg)r0.y
-(ss)add.f r4.w, c12.y, (neg)r0.y
-mul.f r5.x, r2.z, r3.x
-mul.f r3.y, r3.y, c6.z
-mul.f r3.z, r3.z, c6.y
-mul.f r4.w, r4.w, c6.x
-mul.f r1.w, r5.x, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.x, r1.z, r3.x
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.x, r2.w, r1.w
-sam.s (f32)(x)r1.w, r7.x, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r2.w, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r1.z, r4.z
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.y, r1.x
-mul.f r0.w, r4.x, r0.w
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, c10.w, r0.x
-mul.f r2.y, r3.w, r2.z
-sam (f32)(w)r5.x, r2.w, s#1, t#1
-nop
-(sy)cmps.f.lt r2.z, r5.w, c11.y
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r2.w, r5.w
-mov.f32f32 r3.x, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r2.y, r0.x
-cov.u32f32 r2.y, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.y, r1.x
-mad.f32 r0.w, c5.y, r4.x, r0.w
-mov.f32f32 r0.x, r0.x
-cmps.f.ne r2.y, r2.y, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r3.w, r0.x
-mov.f32f32 r2.z, r2.w
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r2.y, r3.x, r2.y, r2.z
-add.f r1.x, r1.x, r3.y
-add.f r0.w, r0.w, r3.z
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r2.y
-mul.f r0.w, r0.w, r2.y
-add.f r0.x, r0.x, r4.w
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
-mul.f r0.x, r0.x, r2.y
+add.f r0.y, r0.z, r1.z
+cov.u32f32 r0.z, r1.y
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
+mul.f r0.w, r0.w, r0.z
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
+mul.f r0.x, r0.x, r0.z
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
+end
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, c4.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
-end
-; FRAG: outputs: r1.y (1:0)
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 195 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r1.y
-; fragcoord: r0.x
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm
index b65a363..63ee7bc 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-75.asm
@@ -6,242 +6,182 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r5.x) out0
-@out(r5.y) out1
-@out(r5.z) out2
-@out(r5.w) out3
-@out(r4.x) out4
-@out(r4.y) out5
-@out(r4.z) out6
-@out(r4.w) out7
-@out(r2.z) out8
-@out(r2.w) out9
-@out(r3.x) out10
-@out(r3.y) out11
-@out(r0.z) out12
-@out(r0.w) out13
-@out(r1.x) out14
-@out(r1.y) out15
-(sy)(ss)floor.f r2.x, c14.z
-floor.f r2.y, c14.x
-absneg.f r2.z, (abs)c17.x
-absneg.f r2.w, (abs)c17.y
-add.f r2.x, c14.z, (neg)r2.x
-add.f r2.y, c14.x, (neg)r2.y
-mul.f r3.x, c11.x, r0.w
-add.f r2.z, r2.z, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r2.w, c12.x, r1.x, r3.x
-mov.f32f32 r2.z, r2.z
-max.f r2.x, r2.x, c18.y
-max.f r2.y, r2.y, c18.y
-mov.f32f32 r2.w, r2.w
-mul.f r3.x, c16.x, r2.z
-min.f r2.x, r2.x, c22.y
-min.f r2.y, r2.y, c22.y
-mul.f r3.y, c11.z, r0.x
-mad.f32 r2.w, c13.x, r1.y, r2.w
-max.f r2.x, r2.x, c18.x
-max.f r2.y, r2.y, c18.x
-mad.f32 r3.y, c12.z, r0.y, r3.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c13.z, r0.z, r3.y
-mul.f r3.z, c11.x, r0.x
-mul.f r2.x, c16.x, r2.x
-mad.f32 r3.z, c12.x, r0.y, r3.z
-add.f r3.y, r3.y, c14.z
-mad.f32 r3.z, c13.x, r0.z, r3.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c18.w, r3.x, r3.y
-mul.f r2.w, r2.w, (neg)c4.x
-mul.f r3.w, c11.y, r0.w
-mad.f32 r2.x, c18.z, r2.x, c14.x
-add.f r3.z, r3.z, c14.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r2.y, c16.x, r2.y, r3.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.w, c12.y, r1.x, r3.w
-mov.f32f32 r3.x, r3.x
-add.f r2.y, r2.y, c19.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r3.w
-add.f r3.x, r3.x, c19.x
-mad.f32 r3.w, c13.y, r1.y, r3.w
-add.f r2.x, r2.x, c19.x
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c18.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c19.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c20.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c21.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r1.z, c14.z
+floor.f r1.w, c14.x
+absneg.f r2.x, (abs)c17.x
+absneg.f r2.y, (abs)c17.y
+add.f r1.z, c14.z, (neg)r1.z
+add.f r1.w, c14.x, (neg)r1.w
+mov.f32f32 r2.z, c18.y
+mul.f r2.w, c11.x, r0.w
+max.f r1.z, r1.z, c18.y
+max.f r1.w, r1.w, c18.y
+add.f r2.x, r2.x, r2.y
+add.f r2.y, r2.z, c19.x
+min.f r1.z, r1.z, c22.y
+min.f r1.w, r1.w, c22.y
+mul.f r2.z, c16.x, r2.x
+mul.f r3.x, c11.z, r0.x
+max.f r1.z, r1.z, c18.x
+max.f r1.w, r1.w, c18.x
+mul.f r3.y, c11.x, r0.x
+mad.f32 r3.x, c12.z, r0.y, r3.x
+mul.f r1.z, c16.x, r1.z
+mad.f32 r3.y, c12.x, r0.y, r3.y
+mad.f32 r3.x, c13.z, r0.z, r3.x
+mad.f32 r3.y, c13.x, r0.z, r3.y
+mad.f32 r1.z, c18.z, r1.z, c14.x
floor.f r4.x, r2.y
-floor.f r4.y, r3.x
-mov.f32f32 r3.w, r3.w
-floor.f r4.z, r2.x
+mad.f32 r2.w, c12.x, r1.x, r2.w
+add.f r3.y, r3.y, c14.x
+add.f r1.z, r1.z, c19.x
+mad.f32 r1.w, c16.x, r1.w, r3.y
+add.f r3.x, r3.x, c14.z
add.f r2.y, r2.y, (neg)r4.x
-add.f r3.x, r3.x, (neg)r4.y
-mad.f32 r2.w, (neg)c4.y, r3.w, r2.w
-add.f r2.x, r2.x, (neg)r4.z
+floor.f r4.x, r1.z
+add.f r1.w, r1.w, c19.x
+mad.f32 r2.z, c18.w, r2.z, r3.x
mad.f32 r2.y, c19.y, r2.y, c19.z
-mad.f32 r3.x, c19.y, r3.x, c19.z
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.x, c19.y, r2.x, c19.z
+add.f r1.z, r1.z, (neg)r4.x
+floor.f r4.x, r1.w
+add.f r2.z, r2.z, c19.x
absneg.f r2.y, (abs)r2.y
-absneg.f r3.x, (abs)r3.x
-mul.f r0.w, c11.z, r0.w
-absneg.f r2.x, (abs)r2.x
-mul.f r3.w, c19.y, r2.y
-mul.f r4.x, c19.y, r3.x
-mul.f r2.y, r2.y, r2.y
-mul.f r4.y, c19.y, r2.x
-add.f r3.w, c19.w, (neg)r3.w
+mad.f32 r1.z, c19.y, r1.z, c19.z
+add.f r4.x, r1.w, (neg)r4.x
+floor.f r4.y, r2.z
+mul.f r1.w, r2.y, r2.y
+absneg.f r1.z, (abs)r1.z
+mad.f32 r2.y, c19.y, r4.x, c19.z
+add.f r2.z, r2.z, (neg)r4.y
+mad.f32 r2.w, c13.x, r1.y, r2.w
+mul.f r4.x, c19.y, r1.z
+absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c19.y, r2.z, c19.z
+mul.f r1.z, r1.z, r1.z
add.f r4.x, c19.w, (neg)r4.x
-mul.f r3.x, r3.x, r3.x
+mul.f r4.y, c19.y, r2.y
+absneg.f r2.z, (abs)r2.z
+mul.f r2.y, r2.y, r2.y
+mul.f r1.z, r1.z, r4.x
+mul.f r4.x, r0.y, c21.x
add.f r4.y, c19.w, (neg)r4.y
-mul.f r2.x, r2.x, r2.x
-mul.f r2.y, r2.y, r3.w
-mul.f r3.x, r3.x, r4.x
-mad.f32 r0.w, c12.z, r1.x, r0.w
-mul.f r1.x, r2.x, r4.y
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r3.x
-mul.f r3.x, r0.x, r0.z
-mov.f32f32 r1.x, r1.x
-mul.f r3.w, r0.y, c21.x
-mul.f r4.x, r0.y, c20.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.y, c18.y
-mov.f32f32 r3.w, r3.w
-mul.f r3.x, r3.x, r4.x
-mad.f32 r0.w, c13.z, r1.y, r0.w
-add.f r1.y, r4.y, c19.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-floor.f r4.x, r1.y
-max.f r3.w, r3.w, c18.y
-mov.f32f32 r3.x, r3.x
-mad.f32 r0.w, (neg)c4.z, r0.w, r2.w
-add.f r1.y, r1.y, (neg)r4.x
-min.f r2.w, r3.w, c22.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.y, c19.y, r1.y, c19.z
-min.f r2.w, r2.w, c18.w
-max.f r3.x, r3.x, c18.y
-max.f r0.w, c18.y, r0.w
-absneg.f r1.y, (abs)r1.y
-mov.f32f32 r2.w, r2.w
-min.f r3.x, r3.x, c22.y
-mov.f32f32 r0.w, r0.w
-mul.f r1.y, r1.y, r1.y
-mul.f r1.x, r1.x, r2.w
-min.f r2.w, r3.x, c20.y
-mul.f r3.x, r0.w, c5.z
-mul.f r3.w, r0.w, c5.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.w
-mul.f r0.w, r0.w, c5.x
-mul.f r2.x, r2.x, r2.w
-mul.f r2.y, r2.y, r2.w
-max.f r2.z, r2.z, c20.z
-mov.f32f32 r4.z, r3.x
+mul.f r4.z, c19.y, r2.z
+mul.f r2.z, r2.z, r2.z
+max.f r4.x, r4.x, c18.y
+mul.f r2.y, r2.y, r4.y
+mul.f r4.y, r0.x, r0.z
+add.f r4.z, c19.w, (neg)r4.z
+min.f r4.x, r4.x, c22.y
+mul.f r4.w, r0.y, c20.x
+mul.f r2.w, r2.w, (neg)c4.x
+mul.f r5.x, c11.y, r0.w
+min.f r4.x, r4.x, c18.w
+mul.f r4.y, r4.y, r4.w
+mul.f r2.z, r2.z, r4.z
+mad.f32 r4.z, c12.y, r1.x, r5.x
+mul.f r1.z, r1.z, r4.x
+max.f r4.x, r4.y, c18.y
+mad.f32 r4.y, c13.y, r1.y, r4.z
mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.y, r3.w
-mad.f32 r2.w, c17.x, r1.x, r2.x
-mad.f32 r1.x, c17.y, r1.x, r2.x
-mov.f32f32 r2.x, r2.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r1.x, r1.x
-min.f r2.x, r2.x, c20.w
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r4.w, r1.y
-mul.f r0.w, c11.y, r0.x
-mov.f32f32 r1.y, r2.x
-mad.f32 r0.w, c12.y, r0.y, r0.w
+mov.f32f32 r4.z, r1.z
+min.f r4.x, r4.x, c22.y
+mad.f32 r2.w, (neg)c4.y, r4.y, r2.w
+mul.f r0.w, c11.z, r0.w
+max.f r2.x, r2.x, c20.z
+min.f r4.x, r4.x, c20.y
+mad.f32 r0.w, c12.z, r1.x, r0.w
+mul.f r1.x, c11.y, r0.x
mul.f r0.x, c11.w, r0.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, r2.y, r1.y, r3.z
-mad.f32 r2.w, r2.y, r1.y, r3.y
-mad.f32 r0.w, c13.y, r0.z, r0.w
+mov.f32f32 r4.y, r4.x
+mul.f r2.z, r2.z, r4.x
+min.f r2.x, r2.x, c20.w
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, r2.y, r4.y
+mad.f32 r1.x, c12.y, r0.y, r1.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-add.f r0.y, r2.x, r2.z
-add.f r1.x, r2.w, r1.x
-add.f r0.w, r0.w, c14.y
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, r2.y, r1.y, r0.w
+mad.f32 r0.y, c13.y, r0.z, r1.x
+mov.f32f32 r1.x, r1.y
+mad.f32 r1.y, c17.y, r1.z, r1.y
+mad.f32 r1.x, c17.x, r4.z, r1.x
+mov.f32f32 r1.z, r2.x
+mad.f32 r2.x, r2.z, r2.x, r3.x
+add.f r0.y, r0.y, c14.y
+mad.f32 r0.w, (neg)c4.z, r0.w, r2.w
+mad.f32 r2.y, r2.z, r1.z, r3.y
+add.f r1.y, r2.x, r1.y
+mad.f32 r0.y, r2.z, r1.z, r0.y
+max.f r4.x, c18.y, r0.w
+add.f r0.w, r2.y, r1.x
+mov.f32f32 r1.x, r1.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mul.f r0.z, c7.w, r0.y
-mul.f r1.y, c7.z, r0.y
-mad.f32 r0.z, c8.w, r0.w, r0.z
-mad.f32 r1.y, c8.z, r0.w, r1.y
-mul.f r2.x, c7.y, r0.y
-mul.f r2.y, c7.x, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.z, c9.w, r1.x, r0.z
-add.f r0.x, r0.x, c14.w
-mad.f32 r1.y, c9.z, r1.x, r1.y
-mad.f32 r2.x, c8.y, r0.w, r2.x
-mad.f32 r2.y, c8.x, r0.w, r2.y
-mad.f32 r0.z, c10.w, r0.x, r0.z
-mad.f32 r1.y, c10.z, r0.x, r1.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.x, c9.y, r1.x, r2.x
-mad.f32 r2.y, c9.x, r1.x, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.x, c10.y, r0.x, r2.x
-mad.f32 r2.y, c10.x, r0.x, r2.y
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r0.z, r2.x
-mov.f32f32 r1.y, r2.y
-mul.f r2.x, c0.w, r0.y
-mul.f r2.y, c0.z, r0.y
-mul.f r0.z, r0.z, c15.y
-mul.f r1.y, r1.y, c15.x
-(rpt1)nop
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r2.z, r1.y
-mad.f32 r0.z, c1.w, r0.w, r2.x
-mad.f32 r1.y, c1.z, r0.w, r2.y
-mad.f32 r0.z, c2.w, r1.x, r0.z
-mad.f32 r1.y, c2.z, r1.x, r1.y
-mad.f32 r0.z, c3.w, r0.x, r0.z
-mad.f32 r1.y, c3.z, r0.x, r1.y
-mul.f r2.x, c0.y, r0.y
-mul.f r2.y, c0.x, r0.y
-mov.f32f32 r5.w, r0.z
-mov.f32f32 r5.z, r1.y
-mad.f32 r0.z, c1.y, r0.w, r2.x
-mad.f32 r0.w, c1.x, r0.w, r2.y
-mad.f32 r0.z, c2.y, r1.x, r0.z
-mad.f32 r0.w, c2.x, r1.x, r0.w
-mad.f32 r0.z, c3.y, r0.x, r0.z
-mad.f32 r0.x, c3.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r5.y, r0.z
-mov.f32f32 r5.x, r0.x
-mad.f32 r0.x, c6.x, r1.x, c6.y
-mad.f32 r0.y, c6.x, r0.y, c6.y
-(rpt1)nop
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r0.x, r1.z
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+nop
+mov.f32f32 r1.z, r0.w
+mul.f r3.x, c0.x, r0.w
+mad.f32 r3.y, c6.x, r1.x, c6.y
+mov.f32f32 r4.y, r4.x
+mul.f r0.z, c7.y, r1.z
+mul.f r0.w, c7.x, r1.z
+mad.f32 r0.z, c8.y, r0.y, r0.z
+mad.f32 r0.w, c8.x, r0.y, r0.w
+mad.f32 r0.z, c9.y, r1.x, r0.z
+add.f r4.z, r0.x, c14.w
+mad.f32 r0.x, c9.x, r1.x, r0.w
+mul.f r0.w, c7.w, r1.z
+mul.f r2.x, c7.z, r1.z
+mad.f32 r0.z, c10.y, r4.z, r0.z
+mad.f32 r0.x, c10.x, r4.z, r0.x
+mad.f32 r0.w, c8.w, r0.y, r0.w
+mad.f32 r2.z, c8.z, r0.y, r2.x
+mul.f r2.y, r0.z, c15.y
+mul.f r2.x, r0.x, c15.x
+mad.f32 r0.x, c9.w, r1.x, r0.w
+mad.f32 r0.z, c9.z, r1.x, r2.z
+mad.f32 r2.w, c10.w, r4.z, r0.x
+mad.f32 r2.z, c10.z, r4.z, r0.z
+mul.f r0.x, c0.w, r1.z
+mul.f r0.z, c0.z, r1.z
+mad.f32 r0.x, c1.w, r0.y, r0.x
+mad.f32 r0.z, c1.z, r0.y, r0.z
+mad.f32 r0.x, c2.w, r1.x, r0.x
+mad.f32 r0.z, c2.z, r1.x, r0.z
+mad.f32 r0.w, c3.w, r4.z, r0.x
+mad.f32 r0.z, c3.z, r4.z, r0.z
+mul.f r0.x, c0.y, r1.z
+mad.f32 r3.x, c1.x, r0.y, r3.x
+mad.f32 r0.x, c1.y, r0.y, r0.x
+mad.f32 r0.y, c2.x, r1.y, r3.x
+mad.f32 r1.x, c2.y, r1.x, r0.x
+mad.f32 r0.x, c3.x, r4.z, r0.y
+mad.f32 r0.y, c3.y, r4.z, r1.x
+mad.f32 r3.x, c6.x, r1.z, c6.y
+mul.f r1.z, r4.y, c5.z
+mul.f r1.y, r4.y, c5.y
+mul.f r1.x, r4.x, c5.x
end
-; VERT: outputs: r5.x (0:0) r4.x (5:9) r2.z (5:10) r0.z (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 221 instructions, 0 half, 6 full
-; pos: r5.x
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 152 instructions, 0 half, 6 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm
index 0274fe9..be30c1a 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-76.asm
@@ -4,206 +4,143 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.x, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.z, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r3.w, c9.x, r0.z
-mov.f32f32 r3.y, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r3.w, r3.x, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.z, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.x, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.w, r3.x
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r5.z, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r3.x
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.x, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.x, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.x, r3.x
-bary.f r3.x, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.x, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r1.w
-mov.f32f32 r5.w, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r5.y, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r3.x
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r1.w, r4.w, s#2, t#2
-(sy)mov.f32f32 r1.w, r1.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r2.y, r6.y, s#2, t#2
-(sy)mov.f32f32 r2.y, r2.y
-sam.s (f32)(x)r2.w, r5.z, s#2, t#2
-(sy)mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.z
-add.f r3.y, c12.y, (neg)r0.y
-add.f r3.z, c12.y, (neg)r0.y
-(ss)add.f r4.w, c12.y, (neg)r0.y
-mul.f r5.x, r2.z, r3.x
-mul.f r3.y, r3.y, c6.z
-mul.f r3.z, r3.z, c6.y
-mul.f r4.w, r4.w, c6.x
-mul.f r1.w, r5.x, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.x, r1.z, r3.x
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.x, r2.w, r1.w
-sam.s (f32)(x)r1.w, r7.x, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r2.w, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r1.z, r4.z
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.y, r1.x
-mul.f r0.w, r4.x, r0.w
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, c10.w, r0.x
-mul.f r2.y, r3.w, r2.z
-sam (f32)(w)r5.x, r2.w, s#1, t#1
-nop
-(sy)cmps.f.lt r2.z, r5.w, c11.y
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r2.w, r5.w
-mov.f32f32 r3.x, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r2.y, r0.x
-cov.u32f32 r2.y, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.y, r1.x
-mad.f32 r0.w, c5.y, r4.x, r0.w
-mov.f32f32 r0.x, r0.x
-cmps.f.ne r2.y, r2.y, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r3.w, r0.x
-mov.f32f32 r2.z, r2.w
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r2.y, r3.x, r2.y, r2.z
-add.f r1.x, r1.x, r3.y
-add.f r0.w, r0.w, r3.z
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r2.y
-mul.f r0.w, r0.w, r2.y
-add.f r0.x, r0.x, r4.w
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
-mul.f r0.x, r0.x, r2.y
+add.f r0.y, r0.z, r1.z
+cov.u32f32 r0.z, r1.y
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
+mul.f r0.w, r0.w, r0.z
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
+mul.f r0.x, r0.x, r0.z
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
+end
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, c4.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
-end
-; FRAG: outputs: r1.y (1:0)
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 195 instructions, 0 half, 8 full
-; pos (bary): r1.x
-; color: r1.y
-; fragcoord: r0.x
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm
index 0e4d5ee..9c8ac11 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-77.asm
@@ -6,134 +6,99 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@out(r6.y) out0
-@out(r6.z) out1
-@out(r6.w) out2
-@out(r7.x) out3
-@out(r5.y) out4
-@out(r5.z) out5
-@out(r5.w) out6
-@out(r6.x) out7
-@out(r2.w) out8
-@out(r3.x) out9
-@out(r3.y) out10
-@out(r3.z) out11
-@out(r3.w) out12
-@out(r4.x) out13
-@out(r4.y) out14
-@out(r4.z) out15
-(sy)(ss)mul.f r2.x, c11.x, r0.w
-mul.f r2.y, c11.x, r0.x
-mad.f32 r2.x, c12.x, r1.x, r2.x
-mad.f32 r2.y, c12.x, r0.y, r2.y
-mul.f r2.z, c11.z, r0.x
-mad.f32 r2.y, c13.x, r0.z, r2.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.z, c12.z, r0.y, r2.z
-mad.f32 r2.x, c13.x, r1.y, r2.x
-add.f r2.y, r2.y, c14.x
-mad.f32 r2.z, c13.z, r0.z, r2.z
-mul.f r2.w, c11.y, r0.w
-mov.f32f32 r2.x, r2.x
-mul.f r3.x, c7.w, r2.y
-mul.f r3.y, c7.z, r2.y
-mul.f r3.z, c7.y, r2.y
-mul.f r2.x, r2.x, (neg)c4.x
-mad.f32 r2.w, c12.y, r1.x, r2.w
-mul.f r3.w, c11.y, r0.x
-mul.f r4.x, c7.x, r2.y
-mad.f32 r3.w, c12.y, r0.y, r3.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r3.w, c13.y, r0.z, r3.w
-mad.f32 r2.w, c13.y, r1.y, r2.w
-mul.f r4.y, c0.w, r2.y
-mul.f r4.z, c0.z, r2.y
-mul.f r4.w, c0.y, r2.y
-mov.f32f32 r2.w, r2.w
-add.f r3.w, r3.w, c14.y
-mul.f r5.x, c0.x, r2.y
-add.f r2.z, r2.z, c14.z
-mad.f32 r2.x, (neg)c4.y, r2.w, r2.x
-mad.f32 r2.w, c8.w, r3.w, r3.x
-mad.f32 r3.x, c8.z, r3.w, r3.y
-mad.f32 r3.y, c8.y, r3.w, r3.z
-mov.f32f32 r2.x, r2.x
+@in(r3.z) in8
+@in(r3.w) in9
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c16.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c17.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r1.z, c11.x, r0.w
+mul.f r1.w, c11.x, r0.x
+mad.f32 r1.z, c12.x, r1.x, r1.z
+mad.f32 r1.w, c12.x, r0.y, r1.w
+mad.f32 r1.z, c13.x, r1.y, r1.z
+mad.f32 r1.w, c13.x, r0.z, r1.w
+mul.f r2.x, c11.z, r0.x
+mul.f r2.y, c11.y, r0.x
+mul.f r1.z, r1.z, (neg)c4.x
+mul.f r2.z, c11.y, r0.w
+add.f r1.w, r1.w, c14.x
+mad.f32 r2.z, c12.y, r1.x, r2.z
+mad.f32 r2.x, c12.z, r0.y, r2.x
+mad.f32 r2.z, c13.y, r1.y, r2.z
+mul.f r2.w, c7.y, r1.w
+mul.f r3.x, c7.x, r1.w
+mul.f r3.y, c7.w, r1.w
+mad.f32 r1.z, (neg)c4.y, r2.z, r1.z
mul.f r0.w, c11.z, r0.w
-mad.f32 r2.w, c9.w, r2.z, r2.w
+mad.f32 r2.y, c12.y, r0.y, r2.y
mad.f32 r0.w, c12.z, r1.x, r0.w
+mad.f32 r1.x, c13.y, r0.z, r2.y
+mad.f32 r0.w, c13.z, r1.y, r0.w
+mul.f r1.y, c7.z, r1.w
+mul.f r2.y, c0.w, r1.w
+mul.f r2.z, c0.z, r1.w
+mad.f32 r0.w, (neg)c4.z, r0.w, r1.z
+add.f r4.x, r1.x, c14.y
+mul.f r4.y, c0.y, r1.w
+mul.f r4.z, c0.x, r1.w
+max.f r0.w, c16.x, r0.w
+mad.f32 r2.w, c8.y, r4.x, r2.w
+mad.f32 r3.x, c8.x, r4.x, r3.x
+mad.f32 r3.y, c8.w, r4.x, r3.y
+mov.f32f32 r4.w, r0.w
+mad.f32 r2.x, c13.z, r0.z, r2.x
+mul.f r1.x, r0.w, c5.x
+mad.f32 r0.w, c8.z, r4.x, r1.y
+mul.f r1.z, r4.w, c5.z
+mul.f r1.y, r4.w, c5.y
+add.f r4.w, r2.x, c14.z
+mad.f32 r2.x, c1.w, r4.x, r2.y
+mad.f32 r2.y, c1.z, r4.x, r2.z
+mad.f32 r2.z, c1.y, r4.x, r4.y
+mad.f32 r2.w, c9.y, r4.w, r2.w
mul.f r0.x, c11.w, r0.x
-mad.f32 r1.x, c9.z, r2.z, r3.x
-mad.f32 r3.x, c9.y, r2.z, r3.y
-mov.f32f32 r0.w, r0.w
+mad.f32 r3.x, c9.x, r4.w, r3.x
mad.f32 r0.x, c12.w, r0.y, r0.x
-mad.f32 r0.y, c13.z, r1.y, r0.w
+mad.f32 r0.y, c9.w, r4.w, r3.y
mad.f32 r0.x, c13.w, r0.z, r0.x
-mad.f32 r0.z, c8.x, r3.w, r4.x
-mad.f32 r0.w, c1.w, r3.w, r4.y
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, c9.z, r4.w, r0.w
+mad.f32 r0.w, c2.w, r4.w, r2.x
+mad.f32 r4.y, c2.z, r4.w, r2.y
add.f r0.x, r0.x, c14.w
-mad.f32 r0.z, c9.x, r2.z, r0.z
-mad.f32 r0.w, c2.w, r2.z, r0.w
-mad.f32 r0.y, (neg)c4.z, r0.y, r2.x
-mad.f32 r1.y, c10.w, r0.x, r2.w
-mad.f32 r1.x, c10.z, r0.x, r1.x
-mad.f32 r2.x, c10.y, r0.x, r3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r2.x, r2.x, c15.y
-max.f r0.y, c16.x, r0.y
-mov.f32f32 r3.z, r1.y
-mov.f32f32 r3.y, r1.x
-mov.f32f32 r3.x, r2.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c10.x, r0.x, r0.z
+mad.f32 r5.x, c2.y, r4.w, r2.z
+mad.f32 r4.x, c1.x, r4.x, r4.z
+mad.f32 r3.y, c6.x, r4.w, c6.y
+mad.f32 r2.x, c10.y, r0.x, r2.w
+mad.f32 r3.x, c10.x, r0.x, r3.x
+mad.f32 r2.w, c10.w, r0.x, r0.y
+mad.f32 r2.z, c10.z, r0.x, r0.z
+mul.f r2.y, r2.x, c15.y
+mul.f r2.x, r3.x, c15.x
mad.f32 r0.w, c3.w, r0.x, r0.w
-mad.f32 r1.x, c1.z, r3.w, r4.z
-mul.f r1.y, r0.y, c5.z
-mul.f r2.x, r0.y, c5.y
-mul.f r0.y, r0.y, c5.x
-mul.f r0.z, r0.z, c15.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r5.w, r1.y
-mov.f32f32 r5.z, r2.x
-mov.f32f32 r5.y, r0.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r7.x, r0.w
-mad.f32 r0.y, c2.z, r2.z, r1.x
-mad.f32 r0.z, c1.y, r3.w, r4.w
-mad.f32 r0.y, c3.z, r0.x, r0.y
-mad.f32 r0.z, c2.y, r2.z, r0.z
-mad.f32 r0.w, c1.x, r3.w, r5.x
-mad.f32 r1.x, c6.x, r2.z, c6.y
-mov.f32f32 r6.w, r0.y
-mad.f32 r0.y, c3.y, r0.x, r0.z
-mad.f32 r0.z, c2.x, r2.z, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r1.x, c6.x, r2.y, c6.y
-mov.f32f32 r6.z, r0.y
-mad.f32 r0.x, c3.x, r0.x, r0.z
-mov.f32f32 r4.x, r0.w
-mov.f32f32 r0.y, r1.x
-nop
-mov.f32f32 r6.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r0.z, (0.000000)
-mov.f32f32 r4.z, r0.x
-nop
-mov.f32f32 r4.y, r0.y
-mov.f32f32 r6.x, r0.z
+mad.f32 r0.z, c3.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r0.x, r5.x
+mad.f32 r4.x, c2.x, r4.w, r4.x
+mad.f32 r3.x, c6.x, r1.w, c6.y
+mad.f32 r0.x, c3.x, r0.x, r4.x
+mov.f32f32 r1.w, (0.000000)
end
nop
-; VERT: outputs: r6.y (0:0) r5.y (5:9) r2.w (5:10) r3.w (5:11)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=3,il=16,b=0)
-; VERT: 107 instructions, 0 half, 8 full
-; pos: r6.y
+; VERT: outputs: r0.x (0:0) r1.x (5:9) r2.x (5:10) r3.x (5:11)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r3.z (0:0,cm=3,il=16,b=0)
+; VERT: 71 instructions, 0 half, 6 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm
index f46066a..df4bfc0 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-81.asm
@@ -2,65 +2,36 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r2.x) out0
-@out(r2.y) out1
-@out(r2.z) out2
-@out(r2.w) out3
-(sy)(ss)bary.f r0.z, 1, r0.x
-bary.f r0.w, 0, r0.x
-bary.f (ei)r0.x, 2, r0.x
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3e800000, 0x40800000, 0x00000000, 0x00000000
+(sy)(ss)bary.f r0.w, 1, r0.x
+bary.f r0.z, 0, r0.x
+bary.f (ei)r1.x, 2, r0.x
nop
-add.f r0.y, c0.x, (neg)r0.z
-mov.f32f32 r0.w, r0.w
-cmps.f.lt r1.x, c0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mul.f r0.y, r0.y, c0.y
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.z, r0.z
-cov.u32f32 r0.w, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r1.z, r0.z
+add.f r0.x, c0.x, (neg)r0.w
+cmps.f.lt r0.y, c0.z, r0.w
+(rpt1)nop
+mul.f r0.x, r0.x, c0.y
+sam.3d (f32)(xyzw)r2.x, r0.z, s#0, t#0
+cov.u32f32 r0.y, r0.y
+(rpt1)nop
+(ss)mov.f32f32 r0.z, r0.x
+(sy)mul.f r0.x, r2.x, r0.x
+cmps.f.ne r0.y, r0.y, c0.z
nop
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.z, r0.y
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.y, r0.y
-cmps.f.ne r0.w, r0.w, c0.z
-sam.3d (f32)(xyzw)r1.y, r1.y, s#0, t#0
-(sy)mov.f32f32 r2.y, r2.x
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r3.x, r1.y
-mul.f r0.x, r2.y, r0.x
-mul.f r0.z, r2.z, r0.z
-mul.f r1.x, r2.w, r1.x
-mul.f r0.y, r3.x, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.x, r2.x
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r0.x, r0.w, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.w, r0.x
-sel.b32 r0.x, r0.z, r0.w, r1.w
-sel.b32 r0.z, r1.x, r0.w, r1.z
-sel.b32 r0.y, r0.y, r0.w, r1.y
+mul.f r0.w, r2.w, r0.z
+mul.f r1.x, r2.z, r0.z
+mul.f r0.z, r2.y, r0.z
nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.x, r0.y
+sel.b32 r1.w, r0.w, r0.y, r2.w
+sel.b32 r1.z, r1.x, r0.y, r2.z
+sel.b32 r1.y, r0.z, r0.y, r2.y
+sel.b32 r1.x, r0.x, r0.y, r2.x
end
-; FRAG: outputs: r2.x (1:0)
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 52 instructions, 0 half, 4 full
-; pos (bary): r0.x
-; color: r2.x
+; FRAG: 26 instructions, 0 half, 3 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm
index 86cd6d2..46ab29e 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-85.asm
@@ -6,1030 +6,711 @@
@in(r0.w) in3
@in(r1.x) in4
@in(r1.y) in5
-@out(r10.y) out0
-@out(r10.z) out1
-@out(r10.w) out2
-@out(r11.x) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c28.x) 0x3f000000, 0x00000000, 0x40f00000, 0x3dcccccd
+@const(c29.x) 0xbf000000, 0x80000000, 0x40400000, 0x3f800000
+@const(c30.x) 0x00000000, 0x3f800000, 0x46800000, 0x45801000
+@const(c31.x) 0x40000000, 0xbf800000, 0x457fe000, 0x3b23d70a
+@const(c32.x) 0x3d4ccccd, 0x3a83126f, 0x3f7fbe77, 0x3f866666
+@const(c33.x) 0x3fa66666, 0x3f4ccccd, 0x40a00000, 0x3f316f00
+@const(c34.x) 0x43480000, 0x3e800000, 0x40e00000, 0xc2c80000
+@const(c35.x) 0xc4bb8000, 0x43fa0000, 0x44bb8000, 0x40800000
+@const(c36.x) 0x44fa0000, 0x3fc00000, 0x3cf5c28f, 0x3ca3d70a
+@const(c37.x) 0x3e2ab368, 0x3fb8aa65, 0x3ecccccd, 0x40266666
+@const(c38.x) 0x3e3a29c7, 0x00000000, 0x00000000, 0x00000000
+@const(c39.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r1.z, 2, r1.x
add.s r0.y, r0.y, -8
bary.f r1.w, 18, r1.x
-add.f r0.w, r0.w, c28.y
-mul.f r2.x, r1.z, c23.w
+mov.f32f32 r2.x, c22.x
+mul.f r2.y, r1.z, c23.w
shr.b r0.y, r0.y, 4
-mul.f r2.y, r1.z, c23.z
-mov.f32f32 r2.z, r1.w
-mul.f r2.w, r2.x, c8.x
+add.f r2.z, c6.x, (neg)r1.w
+add.f r0.w, r0.w, c28.y
+mul.f r2.w, r2.y, c8.x
+bary.f r3.x, 0, r1.x
cov.u32f32 r0.y, r0.y
-mul.f r3.x, r2.y, c8.x
-add.f r2.z, c6.x, (neg)r2.z
-mov.f32f32 r2.w, r2.w
-bary.f r3.y, 0, r1.x
+mov.f32f32 r3.y, r2.z
+mad.f32 r2.w, c28.w, r2.w, r3.x
+rcp r2.x, r2.x
+(ss)mul.f r2.x, c8.x, r2.x
add.f r0.y, r0.y, c28.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r2.w, c28.w, r2.w, r3.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r0.y, c27.x, r0.y, c27.y
-rcp r0.w, r0.w
-mad.f32 r3.x, c28.w, r3.x, r3.y
-mov.f32f32 r2.w, r2.w
-mul.f r3.z, r2.z, r2.z
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.z, c28.y
+mul.f r2.z, r2.z, r3.y
mul.f r2.w, r2.w, c22.y
-mov.f32f32 r3.w, c14.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.x, c22.x
-mov.f32f32 r2.w, r2.w
-bary.f r4.y, 19, r1.x
-(ss)mul.f r0.w, r0.z, r0.w
-mul.f r4.z, r1.z, c23.z
-mul.f r2.w, r2.w, c29.z
-rcp r3.w, r3.w
-(ss)mul.f r3.w, r0.y, r3.w
-rcp r4.x, r4.x
-(ss)mul.f r4.x, c8.x, r4.x
-mov.f32f32 r4.w, r4.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r4.x
-add.f r4.w, c6.y, (neg)r4.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r5.x, r3.w
-add.f r5.y, c22.y, (neg)r4.x
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r5.w, r5.x
-add.s r0.x, r0.x, -8
-mov.f32f32 r5.x, r5.y
-mov.f32f32 r6.x, r2.w
+bary.f r3.z, 19, r1.x
+add.f r3.w, c22.y, (neg)r2.x
+mad.f32 r0.y, c27.x, r0.y, c27.y
+mul.f r4.x, r2.w, c29.z
bary.f r2.w, 3, r1.x
+add.f r4.y, c6.y, (neg)r3.z
+mov.f32f32 r4.z, r3.w
+mov.f32f32 r4.w, r0.y
+mul.f r5.x, r2.w, c23.w
+mov.f32f32 r5.y, c14.y
+mov.f32f32 r5.z, r4.y
+mul.f r5.w, r2.w, c23.z
+mul.f r6.x, r5.x, c8.x
+bary.f r6.y, 1, r1.x
+mad.f32 r2.z, r4.y, r5.z, r2.z
+mul.f r4.y, r5.w, c8.x
+mad.f32 r6.x, c28.w, r6.x, r6.y
+rcp r5.y, r5.y
+(ss)mul.f r6.w, r4.w, r5.y
+bary.f r4.w, 20, r1.x
+(ss)mad.f32 r5.y, c28.w, r4.y, r6.y
+mul.f r4.y, r6.x, c22.y
+mov.f32f32 r7.y, r6.w
+add.s r0.x, r0.x, -8
+add.f r6.x, c6.z, (neg)r4.w
+mul.f r4.y, r4.y, c29.z
+mul.f r7.w, r5.y, r4.z
+mul.f r4.z, r1.z, c23.z
shr.b r0.x, r0.x, 4
-mul.f r3.x, r3.x, r5.x
-mad.f32 r3.z, r4.w, r4.w, r3.z
-mul.f r5.y, r2.w, c23.w
+mov.f32f32 r5.y, r6.x
+rcp r0.w, r0.w
+add.f r0.z, r0.z, c28.y
+mul.f r6.z, r4.z, c8.x
+sam (f32)(xyz)r8.x, r4.x, s#3, t#3
+(ss)mov.f32f32 r4.x, c28.z
cov.u32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.z, r3.z
-mul.f r5.z, r5.y, c8.x
+mad.f32 r2.z, r5.y, r5.y, r2.z
+mad.f32 r4.y, c28.w, r6.z, r3.x
+mul.f r4.x, r4.x, c15.x
add.f r0.x, r0.x, c28.x
-mov.f32f32 r6.y, c14.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r5.z, r5.z
-bary.f r6.z, 1, r1.x
-bary.f r6.w, 20, r1.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r5.z, c28.w, r5.z, r6.z
-rcp r6.y, r6.y
-(ss)mul.f r6.y, r0.x, r6.y
-mov.f32f32 r7.x, r6.w
-mov.f32f32 r7.y, r3.x
-mov.f32f32 r3.x, r5.z
-mov.f32f32 r7.w, r6.y
-mul.f r8.x, r2.w, c23.z
-add.f r5.z, c6.z, (neg)r7.x
-mul.f r3.x, r3.x, c22.y
-mov.f32f32 r6.y, r7.w
-mul.f r7.x, r8.x, c8.x
-mov.f32f32 r8.y, r5.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r5.z, r6.y
-mov.f32f32 r6.y, r7.x
-mad.f32 r3.z, r8.y, r8.y, r3.z
-mul.f r3.x, r3.x, c29.z
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r5.y, c14.x
+mul.f r7.z, r4.y, r3.w
+floor.f r3.w, r4.x
+rsq r2.z, r2.z
+(ss)mul.f r0.w, r0.z, r0.w
+mov.f32f32 r4.y, r2.z
absneg.f r7.x, (neg)c13.x
-mad.f32 r6.y, c28.w, r6.y, r6.z
-mov.f32f32 r3.x, r3.x
-sam (f32)(x)r5.z, r5.z, s#8, t#8
-(sy)(ss)mad.f32 r5.w, c31.x, r5.z, c31.y
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r3.x, r3.x
-mul.f r5.w, r5.w, c31.z
-mul.f r4.w, r4.w, r3.z
-mul.f r5.x, r6.y, r5.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r6.y, r3.x
-add.f r3.x, c30.w, (neg)r5.w
-mul.f r5.w, r4.w, r4.w
-mov.f32f32 r5.x, r5.x
-mul.f r7.x, r7.x, c13.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r5.w, r5.w
-sam (f32)(xyz)r8.z, r6.x, s#3, t#3
-(ss)mov.f32f32 r6.x, c28.z
-mov.f32f32 r5.x, r5.x
-mul.f r6.y, r7.x, r0.w
-mul.f r5.w, r5.w, c31.x
-mul.f r6.x, r6.x, c15.x
-rcp r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
+add.f r3.w, r4.x, (neg)r3.w
+rcp r4.x, r5.y
+(ss)mul.f r6.z, r0.x, r4.x
+mul.f r4.x, r5.z, r4.y
+sam (f32)(w)r8.w, r7.z, s#3, t#3
+(ss)mul.f r5.y, r7.x, c13.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r7.x, r6.z
+mov.f32f32 r7.z, r4.x
+mul.f r5.y, r5.y, r0.w
+(sy)mul.f r7.w, r5.z, r8.y
+mul.f r2.y, r2.y, c8.x
+mul.f r8.x, r5.z, r8.x
+mul.f r8.y, r7.z, r7.z
+sam (f32)(x)r9.w, r7.x, s#8, t#8
+(sy)mad.f32 r8.w, c31.x, r9.w, c31.y
+mad.f32 r2.y, c28.w, r2.y, r3.x
+mul.f r9.x, r5.z, r9.z
+mov.f32f32 r2.x, r2.x
+mul.f r8.w, r8.w, c31.z
+mul.f r2.y, r2.y, c22.y
+mul.f r8.y, r8.y, c31.x
+add.f r2.x, c22.y, (neg)r2.x
+mul.f r4.z, r4.z, c8.x
+mul.f r10.x, r2.y, c29.z
+mul.f r2.y, r5.x, c8.x
+add.f r5.x, c30.w, (neg)r8.w
+max.f r8.y, r8.y, c28.y
+mad.f32 r4.z, c28.w, r4.z, r3.x
+mad.f32 r2.y, c28.w, r2.y, r6.y
+mul.f r8.z, r5.z, r8.z
+mov.f32f32 r0.w, r0.w
+mul.f r3.y, r3.y, r4.y
+mul.f r2.y, r2.y, c22.y
+rcp r4.y, r5.x
mad.f32 r0.z, c31.x, r0.z, c31.y
-mov.f32f32 r7.z, r5.x
-floor.f r5.x, r6.x
-mov.f32f32 r5.w, r5.w
+(ss)min.f r5.x, r8.y, c29.w
+mul.f r10.z, r4.z, r2.x
+mul.f r10.y, r2.y, c29.z
mul.f r0.z, r0.z, c31.z
-mov.f32f32 r6.y, r6.y
-add.f r5.x, r6.x, (neg)r5.x
-mov.f32f32 r5.w, r5.w
-sam (f32)(w)r9.y, r7.y, s#3, t#3
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r6.y, r0.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.w, r5.w
+add.f r2.y, c39.y, (neg)r5.x
+mul.f r4.z, r5.w, c8.x
+mul.f r0.w, r5.y, r0.w
+mov.f32f32 r5.y, r3.y
+absneg.f r5.w, (neg)c0.x
+sam (f32)(xyz)r11.x, r10.x, s#2, t#2
+add.f r8.y, c39.y, (neg)r5.z
add.f r0.z, c30.w, (neg)r0.z
-mov.f32f32 r0.w, r0.w
-(sy)mul.f r6.x, r5.x, r8.w
-mul.f r2.x, r2.x, c8.x
-mov.f32f32 r0.z, r0.z
-mul.f r6.y, r5.x, r8.z
-mul.f r7.x, r5.x, r10.x
-mad.f32 r2.x, c28.w, r2.x, r3.y
-mul.f r2.y, r2.y, c8.x
-(ss)mul.f r7.y, r5.x, r9.x
-max.f r5.w, r5.w, c28.y
-mov.f32f32 r2.x, r2.x
+mul.f r2.y, r2.y, c33.z
+add.f r8.w, c39.y, (neg)r5.z
+(sy)mul.f r8.y, r8.y, r11.y
+mul.f r5.x, r5.x, c36.x
+mad.f32 r4.z, c28.w, r4.z, r6.y
+mul.f r8.w, r8.w, r11.x
+add.f r7.w, r7.w, r8.y
rcp r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mad.f32 r2.y, c28.w, r2.y, r3.y
-min.f r5.w, r5.w, c29.w
-mul.f r2.x, r2.x, c22.y
-mul.f r0.z, c30.z, r0.z
-mov.f32f32 r2.y, r2.y
-add.f r4.x, c22.y, (neg)r4.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-add.f r7.z, c39.y, (neg)r5.w
-mul.f r2.y, r2.y, r4.x
-mul.f r2.x, r2.x, c29.z
-mad.f32 r3.x, c30.z, r3.x, (neg)r0.z
-mul.f r7.z, r7.z, c33.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
-mul.f r5.w, r5.w, c36.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.x, r2.x
-mul.f r3.x, r3.x, c31.w
-add.f r5.w, r5.w, r7.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
+(ss)mul.f r0.z, c30.z, r0.z
+add.f r2.y, r5.x, r2.y
+mad.f32 r4.y, c30.z, r4.y, (neg)r0.z
+add.f r5.x, r7.w, c29.x
+add.f r7.w, r8.x, r8.w
+mul.f r10.w, r4.z, r2.x
+mul.f r2.x, r4.y, c31.w
+mov.f32f32 r4.y, r5.x
+bary.f r4.z, 5, r1.x
+add.f r7.w, r7.w, c29.x
+max.f r2.x, r2.x, c28.y
+add.f r8.x, c39.y, (neg)r5.z
+mul.f r4.y, r4.y, r4.z
+mov.f32f32 r8.y, r7.w
+bary.f r8.w, 4, r1.x
+min.f r2.x, r2.x, c29.w
+mul.f r7.w, r7.w, r4.z
+sam (f32)(w)r10.x, r10.z, s#2, t#2
+add.f r9.y, c39.y, (neg)r5.z
+mad.f32 r4.y, r8.y, r8.w, (neg)r4.y
+mov.f32f32 r8.y, c22.x
+min.f r2.x, r2.x, c32.x
+mad.f32 r5.x, r5.x, r8.w, r7.w
+(sy)mul.f r7.w, r9.y, r10.w
+mul.f r8.x, r8.x, r11.z
mul.f r0.w, r0.w, c37.y
-mov.f32f32 r8.z, r2.y
-mov.f32f32 r2.x, r2.x
-mul.f r2.y, r5.y, c8.x
-mov.f32f32 r3.x, r3.x
-mul.f r5.y, r8.x, c8.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.y, c28.w, r2.y, r6.z
-mov.f32f32 r3.x, r3.x
-mad.f32 r5.y, c28.w, r5.y, r6.z
-mul.f r2.z, r2.z, r3.z
-mov.f32f32 r2.y, r2.y
-max.f r3.x, r3.x, c28.y
-mov.f32f32 r5.y, r5.y
+add.f r9.y, r5.w, r5.y
+rcp r8.y, r8.y
+(ss)mul.f r8.y, c8.x, r8.y
+mov.f32f32 r10.x, r2.x
+mov.f32f32 r10.y, r5.x
+add.f r7.w, r9.x, r7.w
+add.f r9.x, c22.y, (neg)r8.y
+add.f r10.z, c39.y, (neg)r5.z
+mul.f r11.x, r3.x, c37.x
+add.f r8.x, r8.z, r8.x
+mov.f32f32 r8.z, r9.x
+mul.f r11.y, r2.w, c23.z
+mov.f32f32 r10.y, r10.y
+bary.f r11.z, 8, r1.x
+mul.f r11.w, r1.z, c36.w
+mul.f r12.x, r11.y, c8.x
+add.f r8.x, r8.x, c29.y
exp2 r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mul.f r2.y, r2.y, c22.y
-min.f r3.x, r3.x, c29.w
-mul.f r4.x, r5.y, r4.x
-add.f r5.y, c39.y, (neg)r0.w
-mov.f32f32 r2.y, r2.y
-min.f r3.x, r3.x, c32.x
-mov.f32f32 r4.x, r4.x
-mul.f r5.y, r5.y, c13.y
-mul.f r2.y, r2.y, c29.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.x, r4.x
-mul.f r0.w, r0.w, c29.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-absneg.f r7.z, (neg)c0.x
-add.f r0.w, r0.w, r5.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.x, r4.x
-add.f r5.y, r7.z, r2.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r8.w, r4.x
-mul.f r4.x, r5.y, r5.y
-absneg.f r8.x, (neg)c0.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.w, r0.w
-mul.f r9.x, r4.z, c8.x
-add.f r9.y, r8.x, r4.w
-sam (f32)(w)r10.y, r8.z, s#2, t#2
-(ss)add.f r8.z, c39.y, (neg)r5.x
+(ss)mov.f32f32 r12.y, r0.w
+add.f r12.z, c39.y, (neg)r11.z
+mad.f32 r12.x, c28.w, r12.x, r6.y
+add.f r12.w, r11.x, (neg)r11.w
+mul.f r13.y, r6.y, c37.x
+mov.f32f32 r13.x, r8.x
+mul.f r13.w, r12.x, r8.z
+mul.f r1.z, r1.z, c23.z
+mul.f r8.z, r12.z, r10.y
+mul.f r2.w, r2.w, c36.w
+mov.f32f32 r10.y, r13.x
+mul.f r12.x, r1.z, c8.x
+add.f r12.z, c39.y, (neg)r11.z
+add.f r12.y, c39.y, (neg)r12.y
+mul.f r14.x, r9.y, r9.y
+mad.f32 r12.x, c28.w, r12.x, r3.x
+add.f r13.x, r13.y, (neg)r2.w
+mul.f r10.y, r12.z, r10.y
+mul.f r12.y, r12.y, c13.y
+mul.f r13.z, r12.x, r9.x
+(ss)mul.f r0.w, r0.w, c29.w
+absneg.f r9.x, (neg)c0.y
+mov.f32f32 r12.x, r4.y
+sam (f32)(w)r14.y, r12.w, s#2, t#2
+add.f r12.z, c39.y, (neg)r11.z
+(sy)mul.f r10.z, r10.z, r15.x
+(ss)add.f r12.w, r11.x, (neg)r11.w
+sam (f32)(xyz)r15.y, r13.z, s#3, t#3
+(sy)mul.f r11.x, r5.z, r15.z
+mov.f32f32 r8.y, r8.y
+mul.f r11.w, r5.z, r15.y
+add.f r13.x, r13.y, (neg)r2.w
+mul.f r2.w, r5.z, r15.w
+add.f r8.y, c22.y, (neg)r8.y
+mul.f r1.z, r1.z, c8.x
+add.f r0.w, r0.w, r12.y
+add.f r12.y, r9.x, r7.z
+mul.f r12.x, r12.z, r12.x
+mad.f32 r1.z, c28.w, r1.z, r3.x
+sam (f32)(w)r12.z, r12.w, s#3, t#3
max.f r0.w, r0.w, c28.y
-mov.f32f32 r8.w, r9.x
-sam (f32)(xyz)r11.y, r2.x, s#2, t#2
-(ss)add.f r2.x, c39.y, (neg)r5.x
-add.f r2.y, c39.y, (neg)r5.x
-(sy)mul.f r8.z, r8.z, r11.x
-add.f r9.x, c39.y, (neg)r5.x
-mul.f r2.x, r2.x, r11.z
-mul.f r2.y, r2.y, r11.y
-add.f r7.x, r7.x, r8.z
-mul.f r8.z, r9.x, r11.w
-add.f r2.x, r6.x, r2.x
-add.f r2.y, r6.y, r2.y
-mul.f r6.x, r3.y, c37.x
-add.f r6.y, r7.y, r8.z
-add.f r2.x, r2.x, c29.x
-add.f r2.y, r2.y, c29.x
-mul.f r1.z, r1.z, c36.w
-add.f r6.y, r6.y, c29.y
-mov.f32f32 r2.x, r2.x
-bary.f r7.y, 5, r1.x
-mov.f32f32 r2.y, r2.y
-add.f r8.z, r6.x, (neg)r1.z
-mov.f32f32 r6.y, r6.y
-mul.f r9.x, r2.x, r7.y
-bary.f r9.z, 4, r1.x
-mul.f r9.w, r2.y, r7.y
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r6.y, r6.y
-mad.f32 r2.y, r2.y, r9.z, (neg)r9.x
-mad.f32 r2.x, r2.x, r9.z, r9.w
-mov.f32f32 r8.z, r8.z
-mad.f32 r8.w, c28.w, r8.w, r3.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r10.y, r8.z
-mov.f32f32 r8.z, r8.w
-mov.f32f32 r8.w, c22.x
-mov.f32f32 r2.x, r2.x
-mul.f r9.x, r6.z, c37.x
-mul.f r9.w, r2.w, c36.w
-bary.f r10.w, 8, r1.x
+(sy)mul.f r3.x, r5.z, r13.y
+mad.f32 r12.z, r12.y, r12.y, r14.x
+(ss)mul.f r12.w, r1.z, r8.y
+mul.f r1.z, r11.y, c8.x
+add.f r3.x, r3.x, r10.z
min.f r0.w, r0.w, c29.w
-mad.f32 r4.x, r9.y, r9.y, r4.x
-rcp r8.w, r8.w
-(ss)mul.f r8.w, c8.x, r8.w
-add.f r10.z, r9.x, (neg)r9.w
-add.f r11.y, c39.y, (neg)r10.w
-add.f r11.z, c39.y, (neg)r0.w
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r10.z, r10.z
-mul.f r11.y, r11.y, r2.x
-mul.f r11.z, r11.z, c12.z
-add.f r11.w, c22.y, (neg)r8.w
-mov.f32f32 r10.z, r10.z
-add.f r12.x, c39.y, (neg)r0.w
-add.f r12.y, c39.y, (neg)r0.w
-mov.f32f32 r11.w, r11.w
-mov.f32f32 r10.z, r10.z
-mul.f r12.x, r12.x, c12.y
-mul.f r12.y, r12.y, c12.x
-mul.f r8.z, r8.z, r11.w
-add.f r12.z, c39.y, (neg)r10.w
-add.f r12.w, c39.y, (neg)r10.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r8.z, r8.z
-sam (f32)(w)r13.x, r10.y, s#3, t#3
-(sy)(ss)mul.f r10.y, r5.x, r13.w
-add.f r1.z, r6.x, (neg)r1.z
-mul.f r6.x, r12.z, r2.y
-mov.f32f32 r8.z, r8.z
-mul.f r10.z, r12.w, r6.y
-mul.f r3.z, r8.y, r3.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r8.y, r8.z
-mad.f32 r8.z, c33.z, r4.w, c29.w
-mov.f32f32 r3.z, r3.z
-absneg.f r12.z, (neg)c0.z
-mov.f32f32 r12.w, r8.y
-mul.f r2.w, r2.w, c23.z
-mov.f32f32 r1.z, r1.z
-add.f r8.y, r12.z, r3.z
-log2 r8.z, r8.z
-(ss)mov.f32f32 r8.z, r8.z
-mul.f r13.x, r2.w, c8.x
-mov.f32f32 r13.y, r1.z
-add.f r1.z, r9.x, (neg)r9.w
-mad.f32 r4.x, r8.y, r8.y, r4.x
-mov.f32f32 r9.x, r13.x
-mul.f r8.z, r8.z, c33.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r9.w, r3.w
-mad.f32 r9.x, c28.w, r9.x, r6.z
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r1.z, r1.z
-rsq r4.x, r4.x
-(ss)mov.f32f32 r4.x, r4.x
-mov.f32f32 r9.x, r9.x
-min.f r8.z, c29.w, r8.z
-mov.f32f32 r13.z, r1.z
-mov.f32f32 r14.y, r9.w
-mul.f r1.z, r9.x, r11.w
-mul.f r5.y, r5.y, r4.x
-mul.f r9.x, r9.y, r4.x
-mul.f r4.x, r8.y, r4.x
-mov.f32f32 r1.z, r1.z
-sam (f32)(w)r14.z, r13.y, s#2, t#2
-add.f r8.y, c39.y, (neg)r5.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r9.x, r9.x
-mov.f32f32 r1.z, r1.z
-(sy)mul.f r8.y, r8.y, r15.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r1.z, r1.z
-add.f r8.y, r10.y, r8.y
-mov.f32f32 r9.y, r7.w
-mul.f r9.w, r5.x, r10.x
-mov.f32f32 r13.x, r1.z
-mul.f r1.z, r7.x, r8.y
-mov.f32f32 r14.x, r9.y
-add.f r7.x, c39.y, (neg)r5.x
-mov.f32f32 r8.y, c14.y
-mov.f32f32 r9.y, r5.z
-mov.f32f32 r1.z, r1.z
-(ss)nop
-sam (f32)(xyz)r12.w, r12.w, s#3, t#3
-(sy)mul.f r10.x, r5.x, r13.x
-mul.f r4.z, r4.z, c8.x
-mul.f r10.y, r5.x, r12.w
-mul.f r1.z, r1.z, r7.y
-mul.f r7.x, r7.x, r11.x
-mad.f32 r3.y, c28.w, r4.z, r3.y
-mul.f r4.z, r5.x, r13.y
-(ss)nop
-sam (f32)(xzw)r12.w, r14.x, s#4, t#4
-(sy)cmps.f.lt r11.x, r13.z, c32.x
-add.f r7.x, r9.w, r7.x
-mov.f32f32 r3.y, r3.y
-add.f r8.w, c22.y, (neg)r8.w
-mul.f r9.w, r5.x, r13.w
-add.f r11.w, c39.y, (neg)r5.x
-cov.u32f32 r11.x, r11.x
-mul.f r3.y, r3.y, r8.w
-bary.f r13.x, 11, r1.x
-mul.f r11.w, r11.w, r15.y
-rcp r8.y, r8.y
-(ss)mul.f r0.y, r0.y, r8.y
-mov.f32f32 r3.y, r3.y
-(ss)cmps.f.lt r8.y, r13.x, c31.y
-add.f r9.w, r9.w, r11.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r3.y
-cov.u32f32 r8.y, r8.y
-mul.f r7.x, r7.x, r9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r3.y
-mul.f r8.y, r11.x, r8.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r13.w, r3.y
-mul.f r2.w, r2.w, c8.x
-mad.f32 r1.z, r7.x, r9.z, (neg)r1.z
-mov.f32f32 r3.y, c33.z
-mov.f32f32 r7.x, r8.y
-mad.f32 r2.w, c28.w, r2.w, r6.z
-mov.f32f32 r14.z, r0.y
-mov.f32f32 r0.y, r9.y
-mul.f r6.z, r13.z, c28.x
-mov.f32f32 r2.w, r2.w
-add.f r3.y, r3.y, c8.x
-cmps.f.ne r7.x, r7.x, c28.y
-mov.f32f32 r8.y, c14.x
-mul.f r2.w, r2.w, r8.w
-mul.f r1.z, r1.z, r3.y
-add.f r3.y, c39.y, (neg)r6.z
-mul.f r8.w, r6.z, r12.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-rcp r8.y, r8.y
-add.f r9.y, c39.y, (neg)r6.z
-(ss)mul.f r0.x, r0.x, r8.y
-mov.f32f32 r2.w, r2.w
-(ss)max.f r8.y, r4.w, c28.w
-add.f r9.w, c29.w, (neg)r13.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r2.w
-log2 r1.z, r1.z
-mov.f32f32 r11.x, c8.x
-mul.f r6.z, r6.z, r13.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r14.x, r2.w
-mul.f r2.w, r11.x, c38.x
-mov.f32f32 r11.x, c11.x
-mov.f32f32 r11.w, c22.z
-mov.f32f32 r0.x, r0.x
-bary.f r12.w, 6, r1.x
-mov.f32f32 r13.x, c8.x
-sam (f32)(xyz)r14.w, r13.w, s#2, t#2
-add.f r13.y, c39.y, (neg)r5.x
-(ss)add.f r13.w, c39.y, (neg)r5.x
-add.f r2.w, c37.w, (neg)r2.w
-add.f r5.x, c39.y, (neg)r5.x
-(sy)mul.f r13.y, r13.y, r15.x
-mul.f r13.w, r13.w, r14.w
-mov.f32f32 r2.w, r2.w
-mul.f r5.x, r5.x, r15.y
-add.f r10.x, r10.x, r13.y
-add.f r10.y, r10.y, r13.w
-(ss)mul.f r1.z, r2.w, r1.z
-add.f r2.w, r4.z, r5.x
-add.f r4.z, r10.x, c29.x
-add.f r5.x, r10.y, c29.x
-mov.f32f32 r1.z, r1.z
-add.f r2.w, r2.w, c29.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r14.y, r0.x
-log2 r0.x, r11.x
-mov.f32f32 r2.w, r2.w
-mul.f r10.x, r4.z, r7.y
-mul.f r7.y, r5.x, r7.y
-mad.f32 r5.x, r5.x, r9.z, (neg)r10.x
-mad.f32 r4.z, r4.z, r9.z, r7.y
-exp2 r1.z, r1.z
-mov.f32f32 r2.w, r2.w
-sam (f32)(xw)r13.w, r14.y, s#5, t#5
-(sy)mul.f r7.y, r13.w, r14.z
-mov.f32f32 r5.x, r5.x
-bary.f r9.z, 10, r1.x
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r7.y, r7.y
-(ss)mul.f r0.x, c34.y, r0.x
-mul.f r10.x, r9.z, c28.w
-mov.f32f32 r4.z, r4.z
-(ss)mad.f32 r1.z, c37.z, r7.y, r1.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r7.y, c23.x, r10.x, c23.y
-rcp r10.x, r11.w
-(ss)mad.f32 r9.z, r9.z, r10.x, c22.w
-mov.f32f32 r10.x, r12.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r9.z, r9.z
-exp2 r0.x, r0.x
-(ss)add.f r10.y, c39.y, (neg)r0.x
-mov.f32f32 r13.w, r10.x
-mad.f32 r2.y, r2.y, r7.y, r5.x
-mad.f32 r2.x, r2.x, r7.y, r4.z
-mad.f32 r2.w, r6.y, r7.y, r2.w
-mul.f r4.z, r10.y, c34.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-(ss)mul.f r0.x, r0.x, c28.w
-mul.f r2.y, r10.w, r2.y
-mul.f r2.x, r10.w, r2.x
-mul.f r2.w, r10.w, r2.w
-add.f r0.x, r0.x, r4.z
-add.f r2.y, r2.y, r6.x
-mov.f32f32 r4.z, r9.z
-add.f r2.x, r2.x, r11.y
+mul.f r2.z, r6.x, r2.z
+mad.f32 r1.z, c28.w, r1.z, r6.y
+mul.f r3.x, r7.w, r3.x
+add.f r6.x, c39.y, (neg)r0.w
+add.f r6.y, c39.y, (neg)r0.w
+mul.f r13.x, r1.z, r8.y
+mul.f r1.z, r3.x, r4.z
+mul.f r3.x, r3.w, r9.z
+mul.f r3.w, r6.x, c12.z
+add.f r6.x, c39.y, (neg)r5.z
+mul.f r6.y, r6.y, c12.y
+add.f r7.w, c39.y, (neg)r0.w
+sam (f32)(xyz)r13.z, r12.w, s#2, t#2
+add.f r8.y, c39.y, (neg)r5.z
+add.f r9.z, c39.y, (neg)r5.z
+mul.f r6.x, r6.x, r10.w
+add.f r10.z, c39.y, (neg)r5.z
+(sy)mul.f r8.y, r8.y, r13.w
+mul.f r9.z, r9.z, r13.z
+add.f r3.x, r3.x, r6.x
+add.f r6.x, c39.y, (neg)r5.z
+add.f r8.y, r11.x, r8.y
+add.f r9.z, r11.w, r9.z
+mul.f r10.z, r10.z, r14.x
+mul.f r7.w, r7.w, c12.x
+add.f r8.y, r8.y, c29.x
+add.f r9.z, r9.z, c29.x
+mul.f r6.x, r6.x, r15.x
+mul.f r5.z, r5.z, r13.y
+mov.f32f32 r10.w, r8.y
+mul.f r11.x, r9.z, r4.z
add.f r2.w, r2.w, r10.z
-bary.f r5.x, 7, r1.x
-mov.f32f32 r4.z, r4.z
-rcp r6.x, r0.x
-nop
-(ss)rcp r0.x, r0.x
-(ss)mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.x, r5.x
+mov.f32f32 r10.z, r2.z
+mul.f r4.z, r10.w, r4.z
+mov.f32f32 r9.z, r9.z
+mad.f32 r8.y, r8.y, r8.w, r11.x
+bary.f r10.w, 10, r1.x
+add.f r5.z, r5.z, r6.x
+mad.f32 r4.z, r9.z, r8.w, (neg)r4.z
+add.f r2.w, r2.w, c29.y
+mul.f r6.x, r10.w, c28.w
+mul.f r3.x, r3.x, r5.z
+absneg.f r5.z, (neg)c0.z
+mad.f32 r9.z, c33.z, r7.z, c29.w
+mad.f32 r6.x, c23.x, r6.x, c23.y
+mad.f32 r1.z, r3.x, r8.w, (neg)r1.z
+mov.f32f32 r3.x, c33.z
+add.f r8.w, r5.z, r10.z
+mov.f32f32 r11.x, r6.x
+mad.f32 r5.x, r5.x, r6.x, r8.y
+add.f r3.x, r3.x, c8.x
+mad.f32 r6.x, r8.w, r8.w, r12.z
+mad.f32 r4.y, r4.y, r11.x, r4.z
+mul.f r4.z, r11.z, r5.x
+mul.f r1.z, r1.z, r3.x
+mad.f32 r2.w, r8.x, r11.x, r2.w
+mul.f r3.x, r11.z, r4.y
+add.f r4.y, r4.z, r8.z
+mov.f32f32 r4.z, c22.z
+mul.f r2.w, r11.z, r2.w
+add.f r3.x, r3.x, r12.x
+log2 r1.z, r1.z
+mov.f32f32 r5.x, c8.x
+rsq r6.x, r6.x
+(ss)mov.f32f32 r8.x, r6.x
+add.f r2.w, r2.w, r10.y
+(ss)mul.f r6.x, r8.w, r6.x
+rcp r4.z, r4.z
+(ss)mad.f32 r4.z, r10.w, r4.z, c22.w
+mul.f r5.x, r5.x, c38.x
+mul.f r8.y, r9.y, r8.x
+mul.f r8.x, r12.y, r8.x
max.f r4.z, r4.z, c28.y
-mul.f r6.y, r13.x, c28.w
-mov.f32f32 r7.y, c16.x
-mov.f32f32 r14.x, r5.x
+add.f r5.x, c37.w, (neg)r5.x
+log2 r8.z, r9.z
+(ss)mul.f r8.z, r8.z, c33.w
+sam (f32)(xzw)r11.w, r6.z, s#4, t#4
+(sy)(ss)cmps.f.lt r6.z, r12.z, c32.x
min.f r4.z, r4.z, c29.w
-add.f r5.x, c39.y, (neg)r6.y
-mul.f r7.y, r7.y, c28.x
-absneg.f r9.z, (neg)c16.x
-mul.f r2.y, r4.z, r2.y
-add.f r10.x, c39.y, (neg)r4.z
-mul.f r2.x, r4.z, r2.x
+mul.f r1.z, r5.x, r1.z
+min.f r5.x, c29.w, r8.z
+cov.u32f32 r6.z, r6.z
+mul.f r3.x, r4.z, r3.x
+add.f r6.w, c39.y, (neg)r4.z
+mul.f r4.y, r4.z, r4.y
+(rpt1)nop
+mul.f r6.w, r6.w, c30.x
+add.f r8.z, c39.y, (neg)r4.z
+exp2 r1.z, r1.z
+nop
+mov.f32f32 r8.w, c14.y
+add.f r3.x, r3.x, r6.w
+mul.f r6.w, r8.z, c30.x
mul.f r2.w, r4.z, r2.w
nop
-mul.f r10.x, r10.x, c30.x
-add.f r10.y, c39.y, (neg)r4.z
+mul.f r8.z, r3.x, r3.x
add.f r4.z, c39.y, (neg)r4.z
-sam (f32)(w)r13.w, r13.w, s#0, t#0
-(sy)cmps.f.lt r10.z, r14.z, c36.z
-add.f r2.y, r2.y, r10.x
-mul.f r10.x, r10.y, c30.x
-mul.f r4.z, r4.z, c30.y
-nop
-mul.f r10.y, r2.y, r2.y
-add.f r2.x, r2.x, r10.x
-add.f r2.w, r2.w, r4.z
-cov.u32f32 r4.z, r10.z
-mul.f r5.x, r5.x, c29.w
-mul.f r6.y, r6.y, c35.w
-mad.f32 r10.x, r2.w, r2.w, r10.y
+add.f r4.y, r4.y, r6.w
+rcp r6.w, r8.w
+(ss)bary.f r8.w, 11, r1.x
+(ss)mul.f r9.z, r0.y, r6.w
+mul.f r0.y, r4.z, c30.y
+mov.f32f32 r4.z, c14.x
+cmps.f.lt r6.w, r8.w, c31.y
+max.f r8.w, r7.z, c28.w
+add.f r0.y, r2.w, r0.y
+mul.f r2.w, r12.z, c28.x
+cov.u32f32 r6.w, r6.w
+mov.f32f32 r10.y, c11.x
+mad.f32 r8.z, r0.y, r0.y, r8.z
+rcp r4.z, r4.z
+(ss)mul.f r9.y, r0.x, r4.z
+mad.f32 r0.x, r4.y, r4.y, r8.z
+(ss)mul.f r4.z, r6.z, r6.w
+(rpt4)nop
+rsq r0.x, r0.x
+(ss)mov.f32f32 r6.z, r0.x
+(ss)mul.f r0.x, r4.y, r0.x
+mad.f32 r4.y, c28.x, r11.z, c28.x
+sam (f32)(xw)r10.w, r9.y, s#5, t#5
+(sy)mul.f r6.w, r10.w, r11.z
+mul.f r3.x, r3.x, r6.z
+mul.f r0.y, r0.y, r6.z
+mul.f r0.x, r4.y, r0.x
+add.f r6.z, c39.y, (neg)r4.y
+mul.f r3.x, r4.y, r3.x
+add.f r8.z, c39.y, (neg)r4.y
+mul.f r0.y, r4.y, r0.y
+mul.f r6.z, r6.z, c30.x
+add.f r4.y, c39.y, (neg)r4.y
+mul.f r8.z, r8.z, c30.x
+mad.f32 r1.z, c37.z, r6.w, r1.z
cmps.f.ne r4.z, r4.z, c28.y
-add.f r10.y, c35.x, r7.y
-add.f r10.z, c35.z, r7.y
-mov.f32f32 r10.x, r10.x
-mov.f32f32 r11.x, r14.z
-mad.f32 r10.x, r2.x, r2.x, r10.x
-add.f r5.x, r6.y, r5.x
-mov.f32f32 r6.y, r10.y
-mov.f32f32 r10.y, r10.z
-mov.f32f32 r10.z, r11.x
-mul.f r11.x, r6.w, c34.y
-add.f r11.y, c35.x, r7.y
-rsq r10.x, r10.x
-(ss)mov.f32f32 r10.x, r10.x
-mov.f32f32 r11.w, c28.y
-mul.f r5.x, c8.x, r5.x
+add.f r6.w, c39.y, (neg)r2.w
+add.f r3.x, r3.x, r8.z
+add.f r0.x, r0.x, r6.z
+add.f r6.z, c39.y, (neg)r2.w
+mul.f r4.y, r4.y, c30.y
+mul.f r3.x, r6.w, r3.x
+mul.f r6.w, r2.w, r11.w
+mul.f r0.x, r6.z, r0.x
+mul.f r2.w, r2.w, r12.y
+add.f r0.y, r0.y, r4.y
+add.f r3.x, r6.w, r3.x
+log2 r4.y, r10.y
+(ss)mul.f r4.y, c34.y, r4.y
+add.f r6.z, c29.w, (neg)r12.z
+add.f r0.x, r2.w, r0.x
+absneg.f r3.x, (neg)r3.x
+mov.f32f32 r6.w, c8.x
+bary.f r2.w, 15, r1.x
+absneg.f r0.x, (neg)r0.x
+mul.f r2.x, r3.x, r2.x
+mul.f r8.z, r3.x, r5.y
+(ss)mov.f32f32 r9.y, r0.y
+mul.f r3.y, r3.x, r3.y
+add.f r2.x, r7.x, (neg)r2.x
+mul.f r7.x, r3.x, r8.y
+mad.f32 r8.y, r9.y, r7.z, r8.z
+mad.f32 r3.y, r9.y, r4.x, r3.y
+max.f r2.x, r2.x, c32.y
+mad.f32 r4.x, r0.x, r10.z, r8.y
+mad.f32 r2.z, r0.x, r2.z, r3.y
+mad.f32 r3.y, r9.y, r8.x, r7.x
+min.f r8.x, r2.x, c32.z
+mul.f r2.x, r0.x, r10.x
+mul.f r7.x, r4.x, r9.y
+max.f r2.z, r2.z, c28.y
+mul.f r8.y, r4.x, r0.x
+add.f r2.x, r7.y, (neg)r2.x
+mul.f r7.x, c31.x, r7.x
+min.f r2.z, r2.z, c29.w
+mul.f r7.y, c31.x, r8.y
+max.f r2.x, r2.x, c32.y
+add.f r7.x, r7.z, (neg)r7.x
+add.f r2.z, c32.w, (neg)r2.z
+add.f r7.y, r10.z, (neg)r7.y
+min.f r8.y, r2.x, c32.z
+mov.f32f32 r2.x, r7.x
+mul.f r4.x, r4.x, r3.x
+mov.f32f32 r7.z, r7.y
+mad.f32 r3.y, r0.x, r6.x, r3.y
+rcp r6.x, r7.x
nop
-mul.f r2.y, r2.y, r10.x
-mul.f r2.x, r2.x, r10.x
-mul.f r2.w, r2.w, r10.x
-sel.b32 r4.z, r11.w, r4.z, r10.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r10.x, c28.x, r10.w, c28.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r5.x, r5.x
-mul.f r2.y, r10.x, r2.y
-add.f r10.z, c39.y, (neg)r10.x
-mul.f r2.x, r10.x, r2.x
-add.f r10.w, c39.y, (neg)r10.x
-mul.f r2.w, r10.x, r2.w
-mul.f r10.z, r10.z, c30.x
-add.f r10.x, c39.y, (neg)r10.x
-mul.f r10.w, r10.w, c30.x
-mov.f32f32 r11.x, r11.x
-add.f r2.y, r2.y, r10.z
-mul.f r10.x, r10.x, c30.y
-add.f r2.x, r2.x, r10.w
-add.f r6.y, r6.y, (neg)r11.x
-mul.f r2.y, r3.y, r2.y
-add.f r2.w, r2.w, r10.x
-mul.f r2.x, r9.y, r2.x
-mov.f32f32 r3.y, r6.y
-add.f r2.y, r8.w, r2.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r6.y, r10.y
+log2 r2.z, r2.z
+(ss)mul.f r2.z, c33.x, r2.z
+sam (f32)(x)r10.x, r8.x, s#8, t#8
+(sy)cmps.f.lt r7.x, r10.x, r9.w
+(ss)rcp r8.x, r2.x
+mov.f32f32 r8.y, r3.z
+mov.f32f32 r8.z, c34.w
+rcp r9.y, r7.z
+mov.f32f32 r9.z, c16.x
+cov.u32f32 r7.x, r7.x
+mul.f r4.x, c31.x, r4.x
+exp2 r2.z, r2.z
nop
-absneg.f r2.y, (neg)r2.y
-add.f r2.x, r6.z, r2.x
-add.f r6.y, r6.y, (neg)r11.x
-mul.f r6.z, r1.w, c34.y
-mul.f r8.w, r2.y, r3.x
-mul.f r9.y, r2.y, r2.z
-mul.f r10.x, r2.y, r2.z
-mad.f32 r9.y, r2.w, r4.w, r9.y
-mov.f32f32 r8.w, r8.w
-mad.f32 r10.x, r2.w, r4.w, r10.x
-absneg.f r2.x, (neg)r2.x
-mov.f32f32 r9.y, r9.y
-add.f r7.w, r7.w, (neg)r8.w
-mov.f32f32 r8.w, r10.x
-mad.f32 r9.y, r2.x, r3.z, r9.y
-mad.f32 r8.w, r2.x, r3.z, r8.w
-mov.f32f32 r7.w, r7.w
-mul.f r10.x, r5.x, r2.x
-mul.f r10.y, r9.y, r2.x
-mul.f r10.z, r9.y, r2.w
-max.f r7.w, r7.w, c32.y
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r10.x, r10.x
-bary.f r10.w, 12, r1.x
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r8.w, r8.w
-mad.f32 r10.x, c28.x, r10.w, (neg)r10.x
-min.f r7.w, r7.w, c32.z
-mul.f r10.z, c31.x, r10.z
-max.f r8.w, r8.w, c28.y
-mov.f32f32 r10.x, r10.x
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r10.z, r10.z
-min.f r8.w, r8.w, c29.w
-bary.f r10.w, 14, r1.x
-mov.f32f32 r7.w, r7.w
-add.f r4.w, r4.w, (neg)r10.z
-add.f r8.w, c32.w, (neg)r8.w
-mov.f32f32 r10.y, r10.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r8.w, r8.w
-rcp r10.z, r10.w
-(ss)mad.f32 r10.x, r10.x, r10.z, c28.x
-(ss)mov.f32f32 r10.w, r7.w
-mul.f r3.x, r2.x, r3.x
-mul.f r7.w, c31.x, r10.y
-mul.f r9.y, r9.y, r2.y
-mov.f32f32 r10.x, r10.x
-mov.f32f32 r3.x, r3.x
-rcp r10.y, r4.w
-mov.f32f32 r11.x, r4.y
-mov.f32f32 r11.w, c35.y
-log2 r8.w, r8.w
-(ss)mul.f r8.w, c33.x, r8.w
-add.f r3.x, r3.w, (neg)r3.x
-mov.f32f32 r3.w, r10.x
-mov.f32f32 r7.w, r7.w
-add.f r10.x, r11.w, (neg)r11.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r3.w, r3.w
-add.f r3.z, r3.z, (neg)r7.w
-max.f r3.x, r3.x, c32.y
-mul.f r7.w, r10.x, r10.y
-max.f r3.w, r3.w, c28.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r7.w, r7.w
-rcp r10.x, r4.w
-mov.f32f32 r10.y, c34.w
-exp2 r8.w, r8.w
-(ss)mov.f32f32 r8.w, r8.w
-min.f r3.x, r3.x, c32.z
-min.f r3.w, r3.w, c29.w
-rcp r11.w, r3.z
-add.f r10.y, r10.y, (neg)r11.x
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.w
-(ss)mul.f r6.y, r6.y, r11.w
-mov.f32f32 r10.y, r10.y
-mov.f32f32 r3.x, r3.x
-max.f r8.w, r8.w, c28.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r3.x, r3.x
-mul.f r10.x, r10.y, r10.x
-min.f r8.w, r8.w, c29.w
-mov.f32f32 r12.w, r3.w
-mov.f32f32 r11.x, r3.x
-mov.f32f32 r3.x, r10.x
-min.f r3.w, r8.w, c33.y
-mul.f r5.x, r5.x, r2.y
-rcp r8.w, r3.z
-mov.f32f32 r9.y, r9.y
-(ss)mul.f r3.y, r3.y, r8.w
-max.f r3.x, r7.w, r3.x
-sam (f32)(x)r7.w, r10.w, s#8, t#8
-(sy)cmps.f.lt r5.z, r7.w, r5.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r5.x, r5.x
-bary.f r8.w, 13, r1.x
-cov.u32f32 r5.z, r5.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mul.f r3.w, r3.w, r8.z
-mov.f32f32 r5.z, r5.z
-mad.f32 r5.x, c28.x, r8.w, (neg)r5.x
-max.f r3.y, r6.y, r3.y
-mul.f r6.y, c31.x, r9.y
-cmps.f.ne r5.z, r5.z, c28.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r5.x, r5.x
-sel.b32 r0.y, r0.y, r5.z, r7.w
-min.f r3.x, r3.x, r3.y
-mov.f32f32 r3.y, r6.y
-mad.f32 r3.w, c33.y, r3.w, c28.w
-mad.f32 r0.y, c31.x, r0.y, c31.y
-mad.f32 r5.x, r5.x, r10.z, c28.x
-add.f r2.z, r2.z, (neg)r3.y
-mov.f32f32 r3.y, r2.y
-mul.f r0.y, r0.y, c31.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r0.y, r0.y
-bary.f r5.z, 15, r1.x
-mul.f r5.y, r2.y, r5.y
-mov.f32f32 r6.y, r2.x
-add.f r0.y, c30.w, (neg)r0.y
-rcp r7.w, r2.z
-add.f r7.y, c35.z, r7.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r0.y, r0.y
-add.f r7.y, r7.y, (neg)r6.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r8.z, r5.z
-mad.f32 r5.y, r2.w, r9.x, r5.y
-rcp r8.w, r2.z
-(ss)mul.f r7.y, r7.y, r7.w
-max.f r5.x, r5.x, c28.y
-rcp r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r3.y, c28.x, r8.z, r3.y
-mov.f32f32 r5.y, r5.y
-add.f r6.z, r11.y, (neg)r6.z
-mad.f32 r0.y, c30.z, r0.y, (neg)r0.z
-mov.f32f32 r0.z, r7.y
-min.f r5.x, r5.x, c29.w
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r3.y
-bary.f r7.y, 17, r1.x
-mov.f32f32 r5.x, r5.x
-max.f r0.y, r0.y, r13.z
-mul.f r6.z, r6.z, r8.w
-mad.f32 r4.x, r2.x, r4.x, r5.y
-bary.f (ei)r1.x, 16, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r6.z
-mov.f32f32 r5.x, r5.x
-rcp r5.y, r7.y
-(ss)mad.f32 r3.y, r3.y, r5.y, c28.x
-mul.f r6.z, r0.y, c28.x
-mul.f r7.w, r0.y, r8.y
-mov.f32f32 r8.z, r0.y
-mul.f r8.w, r0.y, c33.z
-max.f r6.z, r6.z, c28.y
-mov.f32f32 r7.w, r7.w
-max.f r8.z, r8.z, c28.y
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r6.z, r6.z
-mul.f r6.x, r7.w, r6.x
-min.f r7.w, r8.z, c29.w
-mov.f32f32 r8.z, r8.w
-min.f r6.z, r6.z, c34.z
-mov.f32f32 r6.x, r6.x
-add.f r8.w, c39.y, (neg)r7.w
-max.f r8.z, r8.z, c28.y
-mov.f32f32 r6.z, r6.z
-add.f r6.x, c29.w, (neg)r6.x
-mul.f r8.w, r8.w, r9.w
-mul.f r7.w, r7.w, c29.w
-mul.f r9.x, r2.y, r6.z
-mul.f r6.z, r2.x, r6.z
-mov.f32f32 r6.x, r6.x
-add.f r7.w, r7.w, r8.w
-mov.f32f32 r8.w, r9.x
-mov.f32f32 r6.z, r6.z
-max.f r6.x, c28.y, r6.x
-mul.f r3.w, r3.w, r7.w
-mul.f r7.w, r8.w, c34.z
-mul.f r6.z, r6.z, c34.z
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r6.z, r6.z
-mad.f32 r5.z, c28.x, r5.z, (neg)r7.w
-mov.f32f32 r1.x, r1.x
-add.f r7.w, c39.y, (neg)r6.x
-add.f r8.w, c39.y, (neg)r6.x
-mov.f32f32 r5.z, r5.z
-(ss)rcp r7.y, r7.y
-mad.f32 r6.z, c28.x, r1.x, (neg)r6.z
-mul.f r7.w, r7.w, c9.z
-mul.f r8.w, r8.w, c9.y
-(ss)mad.f32 r5.z, r5.z, r7.y, c28.x
-mov.f32f32 r6.z, r6.z
-add.f r9.x, c39.y, (neg)r6.x
-add.f r9.y, c39.y, (neg)r3.w
-mov.f32f32 r5.z, r5.z
-mad.f32 r6.z, r6.z, r7.y, c28.x
-(ss)mul.f r7.y, r9.x, c9.x
-max.f r0.z, r0.z, r1.y
-mov.f32f32 r1.y, r5.z
-mov.f32f32 r5.z, r6.z
-add.f r6.z, c39.y, (neg)r3.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r5.z, r5.z
-add.f r9.x, c39.y, (neg)r3.w
-min.f r0.z, r0.z, r3.x
-max.f r1.y, r1.y, c28.y
-mov.f32f32 r3.x, r5.z
-min.f r5.z, r8.z, c29.w
-mul.f r0.y, r0.y, r8.y
-min.f r1.y, r1.y, c29.w
-max.f r3.x, r3.x, c28.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r1.y, r1.y
-min.f r3.x, r3.x, c29.w
-mul.f r2.z, r2.z, r0.z
-mul.f r1.w, (neg)r1.w, c34.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r8.y, c28.y
-mul.f r0.x, r0.y, r0.x
-mov.f32f32 r9.w, r1.y
-mov.f32f32 r0.y, r3.x
-add.f r1.y, r1.w, r2.z
-mul.f r1.w, r9.z, c28.x
-sel.b32 r2.z, r8.y, r7.x, r5.z
-mov.f32f32 r10.x, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r0.y, r1.y, (neg)r1.w
-mul.f r1.y, r4.w, r0.z
-mov.f32f32 r11.x, r2.z
-add.f r0.x, c29.w, (neg)r0.x
-mul.f r1.w, r0.y, c3.x
-sam (f32)(xyz)r13.x, r9.w, s#7, t#7
-(sy)add.f r2.z, r13.y, r13.z
-mov.f32f32 r3.x, r13.z
-mov.f32f32 r4.w, r13.y
-mov.f32f32 r5.z, r13.x
-add.f r2.z, r2.z, c34.y
-mov.f32f32 r7.x, r4.y
-mov.f32f32 r0.x, r0.x
-mul.f r8.y, r0.y, c4.x
-mov.f32f32 r2.z, r2.z
-add.f r1.y, r7.x, r1.y
-mov.f32f32 r4.y, r4.y
-max.f r0.x, c28.y, r0.x
-cmps.f.lt r2.z, r2.z, r13.x
-mul.f r0.y, r0.y, c2.x
-add.f r1.y, r1.y, (neg)r4.y
-mul.f r0.z, r3.z, r0.z
-cov.u32f32 r2.z, r2.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.w, c3.y, r1.y, r1.w
-mad.f32 r3.z, c4.y, r1.y, r8.y
-mov.f32f32 r2.z, r2.z
-add.f r4.y, c39.y, (neg)r0.x
-add.f r7.x, c39.y, (neg)r0.x
-add.f r8.y, c39.y, (neg)r0.x
-cmps.f.ne r2.z, r2.z, c28.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.w, r1.w
-mul.f r6.w, (neg)r6.w, c34.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.y, r3.y
-mad.f32 r0.y, c2.y, r1.y, r0.y
-mov.f32f32 r1.y, r6.w
-mov.f32f32 r13.x, r5.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r1.y, r0.z
-mul.f r1.y, r9.z, c28.x
-max.f r3.y, r3.y, c28.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-min.f r3.y, r3.y, c29.w
-sam (f32)(xyzw)r12.w, r12.w, s#6, t#6
-(sy)mul.f r5.x, r12.w, r13.z
-mul.f r6.w, r13.y, r13.z
-mul.f r8.z, r13.x, r13.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r3.y, r3.y
-add.f r0.z, r0.z, (neg)r1.y
-mov.f32f32 r1.y, r5.x
-add.f r5.x, c29.w, (neg)r13.z
-mov.f32f32 r9.z, r3.y
-mad.f32 r1.x, c28.x, r1.x, r6.y
-mad.f32 r1.w, c3.z, r0.z, r1.w
-mad.f32 r3.y, c4.z, r0.z, r3.z
-mad.f32 r0.y, c2.z, r0.z, r0.y
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r1.w
-mov.f32f32 r1.w, c29.w
-mov.f32f32 r3.z, c28.y
-mad.f32 r0.z, r0.z, r5.y, c28.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-add.f r1.w, r1.w, r3.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, c28.y
-log2 r4.x, r4.x
-(ss)mul.f r4.x, r5.w, r4.x
-mul.f r2.y, r2.y, r7.z
-mov.f32f32 r0.z, r0.z
-add.f r1.w, r1.w, (neg)r3.z
-mov.f32f32 r3.z, r4.x
-mad.f32 r2.y, r2.w, r8.x, r2.y
+add.f r8.y, r8.z, (neg)r8.y
+cmps.f.ne r7.x, r7.x, c28.y
+(ss)max.f r2.z, r2.z, c28.y
+mul.f r8.z, r9.z, c28.x
+mul.f r8.x, r8.y, r8.x
+sel.b32 r7.x, r9.w, r7.x, r10.x
+mov.f32f32 r8.y, c35.y
+min.f r2.z, r2.z, c29.w
+add.f r9.z, c35.z, r8.z
+mad.f32 r7.x, c31.x, r7.x, c31.y
+add.f r8.y, r8.y, (neg)r3.z
+min.f r2.z, r2.z, c33.y
+mul.f r9.w, r4.w, c34.y
+mul.f r7.x, r7.x, c31.z
+mul.f r6.x, r8.y, r6.x
+mul.f r2.z, r2.z, r5.x
+add.f r5.x, r9.z, (neg)r9.w
+add.f r7.x, c30.w, (neg)r7.x
+max.f r6.x, r6.x, r8.x
+mad.f32 r2.z, c33.y, r2.z, c28.w
+mul.f r5.x, r5.x, r9.y
+rcp r7.z, r7.z
+add.f r8.x, c35.x, r8.z
+mov.f32f32 r8.y, r9.w
+add.f r4.x, r5.y, (neg)r4.x
+rcp r5.y, r7.x
mov.f32f32 r0.z, r0.z
-mad.f32 r1.x, c3.w, r1.w, r1.x
-mad.f32 r2.w, c4.w, r1.w, r3.y
-mad.f32 r0.y, c2.w, r1.w, r0.y
+log2 r3.y, r3.y
+(ss)mul.f r2.y, r2.y, r3.y
+mad.f32 r0.z, c30.z, r5.y, (neg)r0.z
+(ss)add.f r3.y, r8.x, (neg)r8.y
+mov.f32f32 r5.y, r4.x
+mul.f r5.w, r3.x, r5.w
+max.f r0.z, r0.z, r12.z
+mul.f r3.y, r3.y, r7.z
+exp2 r2.y, r2.y
+(ss)mul.f r7.x, r2.y, c1.y
+mul.f r7.z, r2.y, c1.z
+mov.f32f32 r8.x, r0.z
max.f r0.z, r0.z, c28.y
-mul.f r1.x, r1.x, c35.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
+max.f r3.y, r5.x, r3.y
+rcp r5.x, r5.y
+add.f r8.y, c35.x, r8.z
+mul.f r9.y, r8.x, c28.x
+mul.f r9.z, r8.x, r8.w
min.f r0.z, r0.z, c29.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r12.w, r0.y
-mov.f32f32 r9.w, r0.z
-mov.f32f32 r13.x, r1.x
-mov.f32f32 r13.y, r1.w
-exp2 r0.y, r3.z
-(ss)mul.f r0.z, r0.y, c1.z
-mul.f r1.x, r0.y, c1.y
-mul.f r0.y, r0.y, c1.x
-mov.f32f32 r1.w, r2.y
-sam (f32)(xyz)r13.z, r9.z, s#7, t#7
-(sy)mov.f32f32 r2.y, r14.x
-mov.f32f32 r2.w, r13.w
-mov.f32f32 r3.y, r13.z
-sam.3d (f32)(xyz)r12.w, r12.w, s#1, t#1
-(sy)(ss)mad.f32 r3.z, r13.y, r5.x, r6.w
-sel.b32 r2.y, r2.y, r2.z, r3.x
-sel.b32 r2.w, r2.w, r2.z, r4.w
-sel.b32 r2.z, r3.y, r2.z, r5.z
-mov.f32f32 r3.x, r3.z
-mul.f r3.y, r2.y, c10.z
-mul.f r3.z, r2.w, c10.y
-mul.f r4.x, r2.z, c10.x
-mul.f r2.y, r0.x, r2.y
-mul.f r3.y, r4.y, r3.y
-mul.f r3.z, r7.x, r3.z
-mul.f r4.x, r8.y, r4.x
-mul.f r2.w, r0.x, r2.w
-add.f r2.y, r2.y, r3.y
-mul.f r0.x, r0.x, r2.z
-mul.f r2.z, r3.w, r3.x
-add.f r2.w, r2.w, r3.z
-mul.f r2.y, r6.x, r2.y
-add.f r0.x, r0.x, r4.x
-mad.f32 r3.x, r13.x, r5.x, r8.z
-mul.f r2.w, r6.x, r2.w
-add.f r2.y, r2.y, r7.w
-mul.f r0.x, r6.x, r0.x
-mov.f32f32 r3.x, r3.x
-mad.f32 r1.y, r12.w, r5.x, r1.y
-mul.f r2.y, r9.y, r2.y
-add.f r2.w, r2.w, r8.w
-add.f r0.x, r0.x, r7.y
-mul.f r3.x, r3.w, r3.x
-add.f r2.y, r2.z, r2.y
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, r6.z, r2.w
-mul.f r0.x, r9.x, r0.x
-mov.f32f32 r1.y, r1.y
-mul.f r0.z, r0.z, c36.y
-add.f r2.z, r3.x, r2.z
-mov.f32f32 r1.x, r1.x
-mul.f r1.y, r3.w, r1.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.x, r1.x, c36.y
-mad.f32 r1.w, r2.x, r12.z, r1.w
-add.f r0.z, r2.y, r0.z
-add.f r0.x, r1.y, r0.x
-mov.f32f32 r1.x, r1.x
-mul.f r0.y, r0.y, c36.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.w
+exp2 r4.y, r4.y
+(ss)add.f r9.w, c39.y, (neg)r4.y
+max.f r9.y, r9.y, c28.y
+mul.f r8.w, r8.x, r8.w
+add.f r10.x, c39.y, (neg)r0.z
+mul.f r9.w, r9.w, c34.x
+min.f r9.y, r9.y, c34.z
+(ss)mul.f r4.y, r4.y, c28.w
+mul.f r6.z, r10.x, r6.z
+mul.f r0.z, r0.z, c29.w
+mov.f32f32 r10.x, r9.y
+mul.f r9.y, r3.x, r9.y
+add.f r4.y, r4.y, r9.w
+add.f r0.z, r0.z, r6.z
+mul.f r6.z, r0.x, r10.x
+mul.f r9.y, r9.y, c34.z
+mov.f32f32 r9.w, r2.w
+mul.f r0.z, r2.z, r0.z
+mul.f r2.z, r6.z, c34.z
+bary.f r6.z, 16, r1.x
+mad.f32 r9.y, c28.x, r9.w, (neg)r9.y
+bary.f r9.w, 17, r1.x
+rcp r10.x, r4.y
+(ss)mul.f r9.z, r9.z, r10.x
+mov.f32f32 r10.x, r6.z
+mov.f32f32 r10.y, r0.z
+min.f r3.y, r6.x, r3.y
+add.f r6.x, c29.w, (neg)r9.z
+mad.f32 r2.z, c28.x, r10.x, (neg)r2.z
+rcp r9.z, r9.w
+(ss)mad.f32 r9.y, r9.y, r9.z, c28.x
+add.f r10.x, c39.y, (neg)r10.y
+mul.f r10.z, r1.w, c34.y
+mad.f32 r2.z, r2.z, r9.z, c28.x
+max.f r9.y, r9.y, c28.y
(rpt1)nop
-mul.f r0.z, r0.w, r0.z
-add.f r1.x, r2.z, r1.x
-mov.f32f32 r0.y, r0.y
-add.f r1.y, r1.y, c29.w
-add.f r0.z, r0.z, r11.z
-mov.f32f32 r1.x, r1.x
-add.f r0.x, r0.x, r0.y
-mov.f32f32 r0.y, r1.y
-mul.f r0.z, r0.z, r4.z
-mul.f r1.x, r0.w, r1.x
-mov.f32f32 r0.x, r0.x
+max.f r2.z, r2.z, c28.y
+min.f r9.y, r9.y, c29.w
+max.f r6.x, c28.y, r6.x
+add.f r8.y, r8.y, (neg)r10.z
+min.f r9.z, r2.z, c29.w
+add.f r2.z, c39.y, (neg)r10.y
+mov.f32f32 r10.w, r6.x
+add.f r11.x, c39.y, (neg)r10.y
+mul.f r5.x, r8.y, r5.x
+rcp r4.x, r4.x
+nop
+(ss)rcp r4.y, r4.y
+add.f r8.y, c39.y, (neg)r10.w
+sam (f32)(xyz)r11.y, r9.y, s#7, t#7
+(sy)(ss)add.f r9.y, r11.z, r11.w
+add.f r8.z, c35.z, r8.z
+add.f r9.z, c39.y, (neg)r10.w
+mul.f r8.y, r8.y, c9.z
+add.f r9.y, r9.y, c34.y
+add.f r8.z, r8.z, (neg)r10.z
+mul.f r9.z, r9.z, c9.y
+add.f r10.z, c39.y, (neg)r10.w
+cmps.f.lt r9.y, r9.y, r11.y
+(ss)mul.f r4.x, r8.z, r4.x
+mul.f r4.y, r8.w, r4.y
+mul.f r8.x, r8.x, c33.z
+cov.u32f32 r8.z, r9.y
+max.f r4.x, r4.x, r5.x
+mul.f r5.x, r10.z, c9.x
+add.f r4.y, c29.w, (neg)r4.y
+cmps.f.ne r8.z, r8.z, c28.y
+mad.f32 r2.w, c28.x, r2.w, r3.x
+rcp r8.w, r9.w
+min.f r3.y, r4.x, r3.y
+max.f r4.x, c28.y, r4.y
+max.f r4.y, r8.x, c28.y
+(ss)mad.f32 r2.w, r2.w, r8.w, c28.x
+mov.f32f32 r8.x, r3.y
+mov.f32f32 r9.y, r4.x
+min.f r4.y, r4.y, c29.w
+max.f r2.w, r2.w, c28.y
+mul.f r5.y, r5.y, r8.x
+mul.f r1.w, (neg)r1.w, c34.y
+(ss)add.f r9.w, c39.y, (neg)r9.y
+min.f r12.x, r2.w, c29.w
+mad.f32 r2.w, c28.x, r6.z, r0.x
+add.f r1.w, r1.w, r5.y
+absneg.f r5.y, (neg)c16.x
+add.f r6.z, c39.y, (neg)r9.y
+mad.f32 r2.w, r2.w, r8.w, c28.x
+add.f r8.w, c39.y, (neg)r9.y
+mul.f r10.z, r5.y, c28.x
+mov.f32f32 r12.z, c28.y
+max.f r2.w, r2.w, c28.y
+mul.f r2.x, r2.x, r8.x
+mul.f r3.y, r7.y, r3.y
+mul.f r4.w, (neg)r4.w, c34.y
+min.f r12.y, r2.w, c29.w
+add.f r1.w, r1.w, (neg)r10.z
+sel.b32 r2.w, r12.z, r4.z, r4.y
+add.f r2.x, r3.z, r2.x
+add.f r3.y, r4.w, r3.y
+(rpt1)nop
+sam (f32)(xyz)r12.x, r12.x, s#7, t#7
+(sy)sel.b32 r4.y, r12.z, r8.z, r11.w
+sel.b32 r4.z, r12.y, r8.z, r11.z
+sel.b32 r4.w, r12.x, r8.z, r11.y
+mul.f r7.y, r1.w, c3.x
+mul.f r8.x, r4.y, c10.z
+mul.f r8.z, r4.z, c10.y
+mul.f r10.z, r4.w, c10.x
+mul.f r4.y, r9.y, r4.y
+mul.f r8.x, r9.w, r8.x
+mul.f r6.z, r6.z, r8.z
+mul.f r8.z, r8.w, r10.z
+mul.f r4.z, r9.y, r4.z
+add.f r4.y, r4.y, r8.x
+mul.f r4.x, r4.x, r4.w
+add.f r2.x, r2.x, (neg)r3.z
+mul.f r3.z, r1.w, c4.x
+mul.f r4.y, r10.w, r4.y
+add.f r4.z, r4.z, r6.z
+add.f r4.x, r4.x, r8.z
+mad.f32 r4.w, c3.y, r2.x, r7.y
+add.f r4.y, r4.y, r8.y
+mul.f r4.z, r10.w, r4.z
+mul.f r4.x, r6.x, r4.x
+mul.f r5.y, r5.y, c28.x
+mul.f r4.y, r10.x, r4.y
+add.f r4.z, r4.z, r9.z
+add.f r4.x, r4.x, r5.x
+add.f r3.y, r3.y, (neg)r5.y
+mad.f32 r3.z, c4.y, r2.x, r3.z
+mul.f r1.w, r1.w, c2.x
+mul.f r5.x, r7.z, c36.y
+mad.f32 r4.w, c3.z, r3.y, r4.w
+mov.f32f32 r5.y, c29.w
+mov.f32f32 r6.x, c28.y
+mul.f r2.z, r2.z, r4.z
+mul.f r4.x, r11.x, r4.x
+mad.f32 r3.z, c4.z, r3.y, r3.z
+add.f r4.z, r5.y, r6.x
+mov.f32f32 r5.y, c28.y
+mad.f32 r1.w, c2.y, r2.x, r1.w
+mul.f r2.x, r7.x, c36.y
+mul.f r2.y, r2.y, c1.x
+add.f r4.z, r4.z, (neg)r5.y
+mad.f32 r1.w, c2.z, r3.y, r1.w
+(rpt1)nop
+mad.f32 r3.y, c3.w, r4.z, r4.w
+mad.f32 r7.z, c4.w, r4.z, r3.z
+mad.f32 r7.x, c2.w, r4.z, r1.w
+mul.f r1.w, r2.y, c36.y
+mul.f r7.y, r3.y, c35.w
+mad.f32 r0.y, r0.y, r9.x, r5.w
+mul.f r2.y, r6.w, c28.w
+mad.f32 r0.y, r0.x, r5.z, r0.y
+bary.f r3.y, 6, r1.x
+bary.f r3.z, 7, r1.x
+add.f r4.z, c39.y, (neg)r2.y
+sam.3d (f32)(xyz)r5.y, r7.x, s#1, t#1
+add.f r0.y, r0.y, c29.w
+mul.f r2.y, r2.y, c35.w
+bary.f r4.w, 14, r1.x
+mul.f r4.z, r4.z, c29.w
mul.f r0.y, r0.y, c28.x
-mov.f32f32 r0.z, r0.z
-add.f r1.x, r1.x, r12.x
-mul.f r0.x, r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
+(ss)nop
+sam (f32)(w)r6.z, r3.y, s#0, t#0
+(sy)(ss)cmps.f.lt r3.y, r7.y, c36.z
+bary.f r3.z, 13, r1.x
+add.f r2.y, r2.y, r4.z
mul.f r0.y, r1.z, r0.y
-mul.f r0.w, r1.x, r4.z
-add.f r0.x, r0.x, r12.y
+cov.u32f32 r1.z, r3.y
+rcp r3.y, r4.w
+bary.f (ei)r1.x, 12, r1.x
+mul.f r1.y, c8.x, r2.y
+mov.f32f32 r2.y, r0.y
+cmps.f.ne r1.z, r1.z, c28.y
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, r0.x, r4.z
+mov.f32f32 r4.z, r1.y
+mul.f r0.x, r1.y, r0.x
+mov.f32f32 r1.y, c28.y
+nop
+mul.f r3.x, r4.z, r3.x
+mad.f32 r0.x, c28.x, r1.x, (neg)r0.x
+mad.f32 r1.x, c28.x, r3.z, (neg)r3.x
+sel.b32 r1.y, r1.y, r1.z, r7.y
+(rpt1)nop
+(ss)mad.f32 r1.x, r1.x, r3.y, c28.x
+mad.f32 r0.x, r0.x, r3.y, c28.x
+(rpt1)nop
+max.f r1.x, r1.x, c28.y
+max.f r0.x, r0.x, c28.y
+(rpt1)nop
+min.f r3.y, r1.x, c29.w
+min.f r3.x, r0.x, c29.w
+(rpt5)nop
+sam (f32)(xyzw)r6.z, r3.x, s#6, t#6
+(sy)add.f r0.x, c29.w, (neg)r7.y
+mul.f r1.x, r7.x, r7.y
+mul.f r1.z, r6.w, r7.y
+(ss)mul.f r3.x, r6.z, r7.y
+mad.f32 r1.x, r5.w, r0.x, r1.x
+mad.f32 r1.z, r5.z, r0.x, r1.z
+mad.f32 r0.x, r5.y, r0.x, r3.x
nop
-mad.f32 r0.z, r0.y, r4.z, r0.z
-mad.f32 r0.w, r0.y, r4.z, r0.w
-mov.f32f32 r0.x, r0.x
+mul.f r1.x, r10.y, r1.x
+mul.f r1.z, r10.y, r1.z
+mul.f r0.x, r0.z, r0.x
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, r0.y, r4.z, r0.x
+add.f r0.z, r1.x, r4.y
+add.f r1.x, r1.z, r2.z
+add.f r0.x, r0.x, r4.x
+nop
+add.f r0.z, r0.z, r5.x
+add.f r1.x, r1.x, r2.x
+add.f r0.x, r0.x, r1.w
nop
-mov.f32f32 r10.w, r0.z
-mov.f32f32 r10.z, r0.w
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r10.y, r0.x
+mul.f r0.z, r0.w, r0.z
+mul.f r1.x, r0.w, r1.x
+mul.f r0.x, r0.w, r0.x
+nop
+add.f r0.z, r0.z, r3.w
+add.f r0.w, r1.x, r6.y
+add.f r0.x, r0.x, r7.w
+nop
+mul.f r0.z, r0.z, r1.y
+mul.f r0.w, r0.w, r1.y
+mad.f32 r2.z, r2.y, r1.y, r0.z
+mad.f32 r2.y, r2.y, r1.y, r0.w
+mul.f r0.x, r0.x, r1.y
+nop
+mad.f32 r2.x, r0.y, r1.y, r0.x
end
nop
nop
-; FRAG: outputs: r10.y (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.x (5:9,cm=f,il=8,b=1) r4.y (5:10,cm=f,il=12,b=1) r12.y (5:11,cm=f,il=16,b=1) r4.w (5:12,cm=f,il=20,b=1) r3.z (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1)
-; FRAG: 1019 instructions, 0 half, 16 full
-; pos (bary): r1.x
-; color: r10.y
-; fragcoord: r0.x
+nop
+; FRAG: outputs: r2.x (1:0)
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r2.x (5:9,cm=f,il=8,b=1) r2.w (5:10,cm=f,il=12,b=1) r8.x (5:11,cm=f,il=16,b=1) r2.x (5:12,cm=f,il=20,b=1) r2.w (5:13,cm=f,il=24,b=1) r6.x (5:14,cm=f,il=28,b=1)
+; FRAG: 701 instructions, 0 half, 16 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm
index c62562e..a754b2c 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-86.asm
@@ -1,180 +1,116 @@
; options:
-; VERT: new compiler
+; VERT: TGSI compiler
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@out(r2.x) out0
-@out(r2.y) out1
-@out(r2.z) out2
-@out(r2.w) out3
-@out(r4.y) out4
-@out(r4.z) out5
-@out(r4.w) out6
-@out(r5.x) out7
-@out(r6.y) out8
-@out(r6.z) out9
-@out(r6.w) out10
-@out(r7.x) out11
-@out(r7.y) out12
-@out(r7.z) out13
-@out(r7.w) out14
-@out(r8.x) out15
-@out(r5.y) out16
-@out(r5.z) out17
-@out(r5.w) out18
-@out(r6.x) out19
-@out(r3.y) out20
-@out(r3.z) out21
-@out(r3.w) out22
-@out(r4.x) out23
-@out(r8.y) out24
-@out(r8.z) out25
-@out(r8.w) out26
-@out(r9.x) out27
-(sy)(ss)absneg.f r1.y, (neg)c13.x
-mul.f r1.z, c4.w, r0.x
-mul.f r1.w, c4.y, r0.x
-mul.f r2.x, c0.w, r0.x
-mul.f r2.y, c0.y, r0.x
-mul.f r2.z, c0.x, r0.x
-mad.f32 r1.z, c5.w, r0.y, r1.z
-sin r2.w, r1.y
-(ss)mov.f32f32 r2.w, r2.w
-mad.f32 r1.z, c6.w, r0.z, r1.z
-mad.f32 r1.w, c5.y, r0.y, r1.w
-mad.f32 r2.x, c1.w, r0.y, r2.x
-mul.f r3.x, r0.x, r2.w
-add.f r1.z, r1.z, c7.w
-mad.f32 r1.w, c6.y, r0.z, r1.w
-mad.f32 r2.x, c2.w, r0.z, r2.x
-mov.f32f32 r3.x, r3.x
-(ss)cos r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-(ss)mad.f32 r3.x, r0.z, r1.y, r3.x
-add.f r1.w, r1.w, c7.y
-add.f r2.x, r2.x, c3.w
-mad.f32 r2.y, c1.y, r0.y, r2.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mul.f r3.x, r3.x, c12.x
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r4.z, r3.x
-mad.f32 r2.x, c2.y, r0.z, r2.y
-mov.f32f32 r3.y, r1.z
-mov.f32f32 r5.w, r1.w
-mad.f32 r1.z, c1.x, r0.y, r2.z
-add.f r1.w, r2.x, c3.y
-mad.f32 r1.z, c2.x, r0.z, r1.z
-mov.f32f32 r2.x, r2.w
-mul.f r2.y, r0.z, r2.w
-mov.f32f32 r1.w, r1.w
-add.f r1.z, r1.z, c3.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.y, r0.x, r1.y, (neg)r2.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-mul.f r2.y, r2.y, c12.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r6.z, r2.x
-mov.f32f32 r4.y, r2.y
-mov.f32f32 r5.z, r1.w
-mov.f32f32 r1.z, r1.z
-mul.f r1.w, c4.x, r0.x
-mad.f32 r2.x, c20.z, r0.w, c20.w
-mul.f r2.y, c8.y, r0.x
-mov.f32f32 r5.y, r1.z
-mad.f32 r1.z, c5.x, r0.y, r1.w
-mul.f r1.w, c14.x, r2.x
-mad.f32 r1.z, c6.x, r0.z, r1.z
-mad.f32 r2.x, c9.y, r0.y, r2.y
-mul.f r2.y, c8.x, r0.x
-(ss)mov.f32f32 r1.y, r1.y
-add.f r1.z, r1.z, c7.x
-mov.f32f32 r1.w, r1.w
+; in3 unused
+@in(r1.z) in4
+@in(r1.w) in5
+; in6 unused
+; in7 unused
+@out(r7.w) out0
+@out(r8.x) out1
+@out(r8.y) out2
+@out(r8.z) out3
+@out(r4.w) out4
+@out(r5.x) out5
+@out(r5.y) out6
+@out(r5.z) out7
+@out(r3.w) out8
+@out(r4.x) out9
+@out(r4.y) out10
+@out(r4.z) out11
+@out(r2.x) out12
+@out(r2.y) out13
+@out(r2.z) out14
+@out(r2.w) out15
+@out(r6.w) out16
+@out(r7.x) out17
+@out(r7.y) out18
+@out(r7.z) out19
+@out(r5.w) out20
+@out(r6.x) out21
+@out(r6.y) out22
+@out(r6.z) out23
+@out(r0.z) out24
+@out(r0.w) out25
+@out(r1.x) out26
+@out(r1.y) out27
+@const(c20.x) 0x41700000, 0x00000000, 0x3f5e9e1b, 0x3e19999a
+(sy)(ss)mov.f32f32 r2.x, c13.x
+mul.f r2.y, c4.w, r0.x
+mul.f r2.z, c4.y, r0.x
+mul.f r2.w, c4.x, r0.x
+mul.f r3.x, c0.w, r0.x
+mul.f r3.y, c0.y, r0.x
+mul.f r3.z, c0.x, r0.x
+sin r4.x, (neg)r2.x
+(ss)mul.f r4.w, r0.x, r4.x
+cos r3.w, (neg)r2.x
+(ss)mul.f r2.x, r0.z, r4.x
+(ss)mad.f32 r4.w, r0.z, r3.w, r4.w
+mad.f32 r2.x, r0.x, r3.w, (neg)r2.x
+(rpt1)nop
+mul.f r5.x, r4.w, c12.x
+mul.f r4.w, r2.x, c12.x
+mad.f32 r2.x, c5.w, r0.y, r2.y
+mad.f32 r2.y, c5.y, r0.y, r2.z
+mad.f32 r2.x, c6.w, r0.z, r2.x
+mad.f32 r2.y, c6.y, r0.z, r2.y
+mad.f32 r2.z, c5.x, r0.y, r2.w
+mad.f32 r2.w, c1.w, r0.y, r3.x
+add.f r6.x, r2.x, c7.w
+add.f r5.w, r2.y, c7.y
+mad.f32 r2.x, c6.x, r0.z, r2.z
+mad.f32 r2.y, c2.w, r0.z, r2.w
+mad.f32 r2.z, c1.y, r0.y, r3.y
+mad.f32 r2.w, c1.x, r0.y, r3.z
+add.f r7.z, r2.x, c7.x
+add.f r7.y, r2.y, c3.w
+mad.f32 r2.x, c2.y, r0.z, r2.z
+mad.f32 r2.y, c2.x, r0.z, r2.w
+mul.f r2.z, c8.y, r0.x
+mul.f r2.w, c8.x, r0.x
+add.f r7.x, r2.x, c3.y
+add.f r6.w, r2.y, c3.x
+mad.f32 r2.x, c9.y, r0.y, r2.z
+mad.f32 r2.y, c9.x, r0.y, r2.w
mad.f32 r2.x, c10.y, r0.z, r2.x
-mad.f32 r2.y, c9.x, r0.y, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.x, r2.x, c11.y
mad.f32 r2.y, c10.x, r0.z, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-add.f r2.y, r2.y, c11.x
-mov.f32f32 r6.x, r1.z
-mov.f32f32 r7.w, r1.w
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r1.w, r2.y
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, c16.w, r0.x
-mov.f32f32 r7.x, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.y, r1.y
-(rpt1)nop
-mov.f32f32 r6.w, r1.z
-mov.f32f32 r6.y, r1.y
-mad.f32 r1.y, c17.w, r0.y, r2.x
-mul.f r1.z, c16.z, r0.x
-mad.f32 r1.y, c18.w, r0.z, r1.y
-mad.f32 r1.z, c17.z, r0.y, r1.z
-mul.f r1.w, c16.y, r0.x
-mul.f r2.x, c16.x, r0.x
-add.f r1.y, r1.y, c19.w
-mad.f32 r1.z, c18.z, r0.z, r1.z
-mad.f32 r1.w, c17.y, r0.y, r1.w
-mad.f32 r0.y, c17.x, r0.y, r2.x
-mov.f32f32 r2.w, r1.y
-add.f r1.y, r1.z, c19.z
-mad.f32 r1.z, c18.y, r0.z, r1.w
+mul.f r2.z, c16.w, r0.x
+mul.f r2.w, c16.z, r0.x
+add.f r4.z, r2.x, c11.y
+add.f r4.y, r2.y, c11.x
+mad.f32 r2.x, c17.w, r0.y, r2.z
+mad.f32 r2.y, c17.z, r0.y, r2.w
+mad.f32 r2.x, c18.w, r0.z, r2.x
+mad.f32 r2.y, c18.z, r0.z, r2.y
+mul.f r2.z, c16.y, r0.x
+mul.f r2.w, c16.x, r0.x
+add.f r8.z, r2.x, c19.w
+add.f r8.y, r2.y, c19.z
+mad.f32 r2.x, c17.y, r0.y, r2.z
+mad.f32 r0.y, c17.x, r0.y, r2.w
+mad.f32 r2.x, c18.y, r0.z, r2.x
mad.f32 r0.y, c18.x, r0.z, r0.y
-mov.f32f32 r1.w, c20.x
-mov.f32f32 r2.z, r1.y
-add.f r1.y, r1.z, c19.y
-add.f r0.y, r0.y, c19.x
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r1.y
-mov.f32f32 r2.x, r0.y
-mov.f32f32 r4.x, r1.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, c20.y
-mov.f32f32 r1.y, c15.x
-mov.f32f32 r1.z, (0.000000)
-mov.f32f32 r3.w, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r9.x, r1.z
+mov.f32f32 r6.z, c20.x
+mad.f32 r2.y, c20.z, r1.z, c20.w
+add.f r8.x, r2.x, c19.y
+add.f r7.w, r0.y, c19.x
+mov.f32f32 r0.w, r6.z
+mul.f r2.z, c14.x, r2.y
mov.f32f32 r1.y, (0.000000)
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r4.w, r0.y
-mov.f32f32 r8.z, r1.w
-mov.f32f32 r8.w, r1.y
-mov.f32f32 r0.x, r0.z
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r8.y, r0.x
-mov.f32f32 r8.x, r0.y
-mov.f32f32 r7.z, r0.z
-mov.f32f32 r7.y, r0.w
+mov.f32f32 r1.x, (0.000000)
+mov.f32f32 r6.y, r0.x
+mov.f32f32 r2.w, r1.w
+mov.f32f32 r2.y, r1.w
+mov.f32f32 r2.x, r1.z
+mov.f32f32 r5.z, c20.y
+mov.f32f32 r5.y, c15.x
end
nop
-nop
-nop
-; VERT: outputs: r2.x (0:0) r4.y (5:9) r6.y (5:10) r7.y (5:11) r5.y (5:12) r3.y (5:13) r8.y (5:14)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0)
-; VERT: 138 instructions, 0 half, 10 full
-; pos: r2.x
+; VERT: outputs: r7.w (0:0) r4.w (5:9) r3.w (5:10) r2.x (5:11) r6.w (5:12) r5.w (5:13) r0.z (5:14)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.z (0:0,cm=3,il=12,b=0)
+; VERT: 72 instructions, 0 half, 9 full
+; pos: r7.w
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm
index dc36d85..e8e697e 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-87.asm
@@ -2,49 +2,44 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
-bary.f r1.x, 5, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r2.x, r1.x
-(rpt3)nop
-sam (f32)(xyz)r0.w, r1.y, s#1, t#1
-(sy)add.f r0.x, c1.y, (neg)r1.y
-sam (f32)(xyzw)r2.y, r0.y, s#0, t#0
-(ss)add.f r0.y, c1.y, (neg)r1.x
-add.f r0.z, c1.y, (neg)r0.w
-(sy)mul.f r3.w, c0.w, r3.x
-mul.f r0.x, r0.x, r2.w
-mul.f r1.y, r1.y, c0.z
-mul.f r0.y, r0.y, r2.z
-mul.f r0.z, r0.z, r2.y
-mul.f r1.x, r1.x, c0.y
-add.f r0.x, r1.y, r0.x
-sam (f32)(w)r1.y, r1.w, s#2, t#2
-mul.f r0.w, r0.w, c0.x
+bary.f r0.w, 1, r0.x
+bary.f r1.x, 4, r0.x
+bary.f (ei)r1.y, 5, r0.x
+mov.f32f32 r0.x, r0.z
+mov.f32f32 r0.y, r0.w
(rpt1)nop
-(sy)mul.f r3.z, r0.x, r2.x
-add.f r0.x, r1.x, r0.y
-add.f r0.y, r0.w, r0.z
+sam (f32)(xyz)r1.z, r0.z, s#1, t#1
+(sy)(ss)add.f r0.z, c1.y, (neg)r2.x
+mul.f r0.w, r2.x, c0.z
+mul.f r2.x, r1.w, c0.y
+add.f r1.w, c1.y, (neg)r1.w
+sam (f32)(xyzw)r2.y, r0.x, s#0, t#0
+(sy)(ss)mul.f r0.x, r0.z, r2.w
+add.f r0.y, c1.y, (neg)r1.z
+mul.f r0.z, r1.z, c0.x
+mul.f r2.z, r1.w, r2.z
+add.f r0.x, r0.w, r0.x
+sam (f32)(w)r3.y, r1.x, s#2, t#2
+mul.f r0.y, r0.y, r2.y
+mul.f r1.w, c0.w, r3.x
+nop
+(sy)mul.f r1.z, r0.x, r4.x
+add.f r0.x, r2.x, r2.z
+add.f r0.y, r0.z, r0.y
(rpt1)nop
-mul.f r3.y, r0.x, r2.x
-mul.f r3.x, r0.y, r2.x
+(ss)mul.f r1.y, r0.x, r4.x
+mul.f r1.x, r0.y, r4.x
end
nop
nop
nop
-; FRAG: outputs: r3.x (1:0)
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:1,cm=f,il=12,b=1)
-; FRAG: 38 instructions, 0 half, 4 full
-; pos (bary): r0.x
-; color: r3.x
+; FRAG: 31 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm
index 948d48e..270bdcf 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-88.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in3
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r4.w) in3
@in(r1.x) in4
@in(r1.y) in5
@in(r1.z) in6
@@ -12,72 +12,56 @@
@in(r2.y) in9
@in(r2.z) in10
@in(r2.w) in11
-@out(r3.w) out0
-@out(r4.x) out1
-@out(r4.y) out2
-@out(r4.z) out3
-@out(r4.w) out4
-@out(r5.x) out5
-@out(r5.y) out6
-@out(r5.z) out7
-@out(r2.w) out8
-@out(r3.x) out9
-@out(r3.y) out10
-@out(r3.z) out11
-@out(r0.w) out12
-@out(r1.x) out13
-@out(r1.y) out14
-@out(r1.z) out15
-(sy)(ss)mul.f r3.x, c3.x, r0.x
-mul.f r3.y, c2.x, r0.x
-mad.f32 r3.x, c3.y, r0.y, r3.x
-mad.f32 r3.y, c2.y, r0.y, r3.y
-mul.f r3.z, c1.x, r0.x
-mul.f r3.w, c0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r3.x, c3.z, r0.z, r3.x
-mad.f32 r3.y, c2.z, r0.z, r3.y
-mad.f32 r3.z, c1.y, r0.y, r3.z
-mad.f32 r0.y, c0.y, r0.y, r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r4.z, c3.w, r0.w, r3.x
-mad.f32 r4.y, c2.w, r0.w, r3.y
-mov.f32f32 r3.x, r3.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r3.x, c1.z, r0.z, r3.x
-mad.f32 r0.y, c0.z, r0.z, r0.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r4.x, c1.w, r0.w, r3.x
-mad.f32 r3.w, c0.w, r0.w, r0.y
-max.f r0.y, r1.w, c5.x
-max.f r0.w, r1.z, c5.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-min.f r5.z, r0.y, c5.y
-min.f r5.y, r0.w, c5.y
-max.f r0.y, r1.y, c5.x
-max.f r0.w, r1.x, c5.x
-(rpt1)nop
-min.f r5.x, r0.y, c5.y
-min.f r4.w, r0.w, c5.y
-mad.f32 r1.z, c4.x, r0.z, c4.y
-mad.f32 r1.y, c4.x, r0.z, c4.y
-mad.f32 r1.x, c4.x, r0.z, c4.y
-mad.f32 r0.w, c4.x, r0.x, c4.y
-mov.f32f32 r3.z, r2.w
-mov.f32f32 r3.y, r2.z
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r2.w, r2.x
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@out(r2.x) out8
+@out(r2.y) out9
+@out(r2.z) out10
+@out(r2.w) out11
+@out(r3.x) out12
+@out(r3.y) out13
+@out(r3.z) out14
+@out(r3.w) out15
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.x, c3.x, r4.x
+mul.f r0.y, c2.x, r4.x
+mad.f32 r0.x, c3.y, r4.y, r0.x
+mad.f32 r0.y, c2.y, r4.y, r0.y
+mad.f32 r0.x, c3.z, r4.z, r0.x
+mad.f32 r0.y, c2.z, r4.z, r0.y
+mad.f32 r0.w, c3.w, r4.w, r0.x
+mad.f32 r0.z, c2.w, r4.w, r0.y
+mul.f r0.x, c1.x, r4.x
+mul.f r0.y, c0.x, r4.x
+mad.f32 r0.x, c1.y, r4.y, r0.x
+mad.f32 r0.y, c0.y, r4.y, r0.y
+mad.f32 r0.x, c1.z, r4.z, r0.x
+mad.f32 r3.x, c0.z, r4.z, r0.y
+mad.f32 r0.y, c1.w, r4.w, r0.x
+mad.f32 r0.x, c0.w, r4.w, r3.x
+max.f r1.w, r1.w, c5.x
+max.f r1.z, r1.z, c5.x
+max.f r1.y, r1.y, c5.x
+max.f r1.x, r1.x, c5.x
+min.f r1.w, r1.w, c5.y
+min.f r1.z, r1.z, c5.y
+min.f r1.y, r1.y, c5.y
+min.f r1.x, r1.x, c5.y
+mad.f32 r3.w, c4.x, r4.z, c4.y
+mad.f32 r3.z, c4.x, r4.z, c4.y
+mad.f32 r3.y, c4.x, r4.z, c4.y
+mad.f32 r3.x, c4.x, r4.x, c4.y
end
nop
nop
-; VERT: outputs: r3.w (0:0) r4.w (1:0) r2.w (5:0) r0.w (5:1)
-; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
-; VERT: 47 instructions, 0 half, 6 full
-; pos: r3.w
+nop
+; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1)
+; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 29 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm
index a7cfae4..22ca830 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-95.asm
@@ -2,37 +2,28 @@
; FRAG: new compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r1.y) out0
-@out(r1.z) out1
-@out(r1.w) out2
-@out(r2.x) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3ecccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, (0.000000)
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r0.z, r0.z, s#0, t#0
-(sy)cmps.f.lt r0.x, r1.y, c0.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r1.w, r1.x
-mov.f32f32 r1.z, r0.w
-cov.u32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.z
-cmps.f.ne p0.x, r0.x, r0.y
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, (0.000000)
+(rpt4)nop
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)cmps.f.lt r0.y, r1.w, c0.x
+(rpt2)nop
+cov.u32f32 r0.y, r0.y
+(rpt2)nop
+cmps.f.ne p0.x, r0.y, r0.x
(rpt5)nop
kill p0.x
end
nop
nop
-; FRAG: outputs: r1.y (1:0)
+nop
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
-; pos (bary): r0.x
-; color: r1.y
+; FRAG: 26 instructions, 0 half, 2 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm
index 08421de..f362ccd 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-96.asm
@@ -3,170 +3,134 @@
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@out(r3.x) out0
-@out(r3.y) out1
-@out(r3.z) out2
-@out(r3.w) out3
-@out(r2.x) out4
-@out(r2.y) out5
-@out(r2.z) out6
-@out(r2.w) out7
-(sy)(ss)floor.f r1.y, c11.z
+@in(r1.x) in4
+@in(r1.y) in5
+@out(r0.x) out0
+@out(r0.y) out1
+@out(r0.z) out2
+@out(r0.w) out3
+@out(r1.x) out4
+@out(r1.y) out5
+@out(r1.z) out6
+@out(r1.w) out7
+@const(c15.x) 0x3ecccccd, 0x00000000, 0x3eaaa64c, 0x3e800000
+@const(c16.x) 0x3f000000, 0x40000000, 0xbf800000, 0x40400000
+@const(c17.x) 0x399d4952, 0x3e4ccccd, 0x3f99999a, 0x3fd9999a
+@const(c18.x) 0x3c88ce70, 0x00000000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)floor.f r0.w, c11.z
floor.f r1.z, c11.x
absneg.f r1.w, (abs)c14.x
absneg.f r2.x, (abs)c14.y
-add.f r1.y, c11.z, (neg)r1.y
+add.f r0.w, c11.z, (neg)r0.w
add.f r1.z, c11.x, (neg)r1.z
mul.f r2.y, r0.x, r0.z
add.f r1.w, r1.w, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r0.y, c17.x
-mov.f32f32 r1.w, r1.w
-max.f r1.y, r1.y, c15.y
+max.f r0.w, r0.w, c15.y
max.f r1.z, r1.z, c15.y
-mul.f r2.x, r2.y, r2.x
-mul.f r2.y, c13.x, r1.w
-min.f r1.y, r1.y, c19.y
+mul.f r2.x, r0.y, c17.x
+mul.f r2.z, c13.x, r1.w
+min.f r0.w, r0.w, c19.y
min.f r1.z, r1.z, c19.y
-mul.f r2.z, c8.z, r0.x
-mov.f32f32 r2.x, r2.x
-max.f r1.y, r1.y, c15.x
+mul.f r2.w, c8.z, r0.x
+mov.f32f32 r1.w, r1.w
+max.f r0.w, r0.w, c15.x
max.f r1.z, r1.z, c15.x
-mad.f32 r2.z, c9.z, r0.y, r2.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, c10.z, r0.z, r2.z
-mul.f r2.w, c8.x, r0.x
-mul.f r1.y, c13.x, r1.y
-mad.f32 r2.w, c9.x, r0.y, r2.w
-add.f r2.z, r2.z, c11.z
-mad.f32 r2.w, c10.x, r0.z, r2.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.y, c15.w, r2.y, r2.z
-mov.f32f32 r2.x, r2.x
-add.f r2.w, r2.w, c11.x
-mad.f32 r1.y, c15.z, r1.y, c11.x
-mad.f32 r1.z, c13.x, r1.z, r2.w
-mov.f32f32 r2.y, r2.y
-max.f r2.x, r2.x, c15.y
-mov.f32f32 r1.y, r1.y
+mul.f r3.x, c8.x, r0.x
+mad.f32 r2.w, c9.z, r0.y, r2.w
+mul.f r0.w, c13.x, r0.w
+mad.f32 r3.x, c9.x, r0.y, r3.x
+mad.f32 r2.w, c10.z, r0.z, r2.w
+mad.f32 r3.x, c10.x, r0.z, r3.x
+mad.f32 r0.w, c15.z, r0.w, c11.x
+max.f r1.w, r1.w, c17.z
+mul.f r2.x, r2.y, r2.x
+add.f r2.y, r3.x, c11.x
+add.f r0.w, r0.w, c16.x
+mad.f32 r1.z, c13.x, r1.z, r2.y
+add.f r2.w, r2.w, c11.z
+min.f r1.w, r1.w, c17.w
+floor.f r3.x, r0.w
add.f r1.z, r1.z, c16.x
-mov.f32f32 r2.y, r2.y
-min.f r2.x, r2.x, c19.y
-mov.f32f32 r1.y, r1.y
+mad.f32 r2.z, c15.w, r2.z, r2.w
+mov.f32f32 r3.y, r1.w
+add.f r0.w, r0.w, (neg)r3.x
floor.f r3.x, r1.z
-add.f r2.y, r2.y, c16.x
-min.f r2.x, r2.x, c17.y
-add.f r1.y, r1.y, c16.x
+add.f r2.z, r2.z, c16.x
+max.f r2.x, r2.x, c15.y
+mad.f32 r0.w, c16.y, r0.w, c16.z
add.f r1.z, r1.z, (neg)r3.x
-floor.f r3.x, r2.y
-mov.f32f32 r2.x, r2.x
-floor.f r3.y, r1.y
+floor.f r3.x, r2.z
+min.f r2.x, r2.x, c19.y
+absneg.f r0.w, (abs)r0.w
mad.f32 r1.z, c16.y, r1.z, c16.z
-add.f r2.y, r2.y, (neg)r3.x
-mul.f r3.x, r0.y, c18.x
-add.f r1.y, r1.y, (neg)r3.y
+add.f r2.z, r2.z, (neg)r3.x
+min.f r2.x, r2.x, c17.y
+mul.f r3.x, c16.y, r0.w
absneg.f r1.z, (abs)r1.z
-mad.f32 r2.y, c16.y, r2.y, c16.z
-mov.f32f32 r3.x, r3.x
-mad.f32 r1.y, c16.y, r1.y, c16.z
-mul.f r3.y, c16.y, r1.z
-absneg.f r2.y, (abs)r2.y
+mad.f32 r2.z, c16.y, r2.z, c16.z
+mul.f r0.w, r0.w, r0.w
+add.f r3.x, c16.w, (neg)r3.x
+mul.f r3.z, c16.y, r1.z
+absneg.f r2.z, (abs)r2.z
mul.f r1.z, r1.z, r1.z
-absneg.f r1.y, (abs)r1.y
-add.f r3.y, c16.w, (neg)r3.y
-mul.f r3.z, c16.y, r2.y
-mul.f r2.y, r2.y, r2.y
-mul.f r3.w, c16.y, r1.y
-mul.f r1.z, r1.z, r3.y
-add.f r3.y, c16.w, (neg)r3.z
-mul.f r1.y, r1.y, r1.y
-add.f r3.z, c16.w, (neg)r3.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.y, r2.y, r3.y
-mov.f32f32 r3.x, r3.x
-mul.f r1.y, r1.y, r3.z
-mul.f r1.z, r1.z, r2.x
-mov.f32f32 r2.y, r2.y
+mul.f r0.w, r0.w, r3.x
+mul.f r3.x, r0.y, c18.x
+add.f r3.z, c16.w, (neg)r3.z
+mul.f r3.w, c16.y, r2.z
+mul.f r2.z, r2.z, r2.z
max.f r3.x, r3.x, c15.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r2.y, r2.x
-min.f r2.y, r3.x, c19.y
-max.f r1.w, r1.w, c17.z
-mul.f r3.x, c8.y, r0.x
+mul.f r1.z, r1.z, r3.z
+mov.f32f32 r3.z, r2.x
+add.f r3.w, c16.w, (neg)r3.w
+min.f r3.x, r3.x, c19.y
+mul.f r4.x, c8.y, r0.x
mul.f r0.x, c8.w, r0.x
-min.f r2.y, r2.y, c15.w
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.x, c9.y, r0.y, r3.x
+mad.f32 r4.x, c9.y, r0.y, r4.x
+min.f r3.x, r3.x, c15.w
+mul.f r1.z, r1.z, r3.z
+mul.f r2.z, r2.z, r3.w
+mad.f32 r3.z, c10.y, r0.z, r4.x
+mul.f r0.w, r0.w, r3.x
+mov.f32f32 r3.x, r1.z
+mul.f r2.x, r2.z, r2.x
+add.f r2.z, r3.z, c11.y
+mov.f32f32 r3.z, r0.w
+mad.f32 r0.w, c14.y, r0.w, r1.z
+mad.f32 r1.z, r2.x, r1.w, r2.w
+mad.f32 r1.w, r2.x, r3.y, r2.y
+mad.f32 r2.y, c14.x, r3.z, r3.x
+mad.f32 r2.x, r2.x, r3.y, r2.z
mad.f32 r0.x, c9.w, r0.y, r0.x
-mov.f32f32 r0.y, r2.y
-min.f r1.w, r1.w, c17.w
-mad.f32 r2.y, c10.y, r0.z, r3.x
-mad.f32 r0.x, c10.w, r0.z, r0.x
-mul.f r0.y, r1.y, r0.y
-mov.f32f32 r0.z, r1.w
-add.f r1.y, r2.y, c11.y
-add.f r0.x, r0.x, c11.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.w, r2.x, r0.z, r2.w
-mad.f32 r1.y, r2.x, r0.z, r1.y
-mad.f32 r0.z, r2.x, r0.z, r2.z
-mad.f32 r2.x, c14.x, r0.y, r1.z
-mad.f32 r0.y, c14.y, r0.y, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.x, r0.w
-add.f r0.w, r1.w, r1.z
-add.f r0.y, r0.z, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mul.f r0.w, c0.w, r0.z
-mul.f r1.x, c0.z, r0.z
-mad.f32 r0.w, c1.w, r1.y, r0.w
-mad.f32 r1.x, c1.z, r1.y, r1.x
-mul.f r1.z, c0.y, r0.z
-mul.f r0.z, c0.x, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, c2.w, r0.y, r0.w
-mad.f32 r1.x, c2.z, r0.y, r1.x
-mad.f32 r1.w, c3.w, r0.x, r0.w
-mad.f32 r3.x, c3.z, r0.x, r1.x
-mad.f32 r1.z, c1.y, r1.y, r1.z
-mad.f32 r0.z, c1.x, r1.y, r0.z
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r3.z, r1.w
-mad.f32 r1.y, c2.y, r0.y, r1.z
-mad.f32 r0.y, c2.x, r0.y, r0.z
-mad.f32 r0.z, c3.y, r0.x, r1.y
-mad.f32 r0.x, c3.x, r0.x, r0.y
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
+nop
+add.f r0.y, r1.w, r2.y
+add.f r2.y, r1.z, r0.w
(rpt1)nop
-mov.f32f32 r3.y, r0.y
-mov.f32f32 r3.x, r0.x
+mov.f32f32 r0.w, r0.y
+mul.f r0.y, c0.x, r0.y
+mov.f32f32 r2.z, r2.y
+mad.f32 r0.x, c10.w, r0.z, r0.x
+mul.f r0.z, c0.w, r0.w
+mul.f r1.z, c0.z, r0.w
+mad.f32 r0.z, c1.w, r2.x, r0.z
+mad.f32 r1.z, c1.z, r2.x, r1.z
+mad.f32 r1.w, c2.w, r2.z, r0.z
+add.f r2.w, r0.x, c11.w
+mad.f32 r1.z, c2.z, r2.z, r1.z
+mul.f r0.x, c0.y, r0.w
+mad.f32 r0.y, c1.x, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.w, r1.w
+mad.f32 r0.z, c3.z, r2.w, r1.z
+mad.f32 r0.x, c1.y, r2.x, r0.x
+mad.f32 r0.y, c2.x, r2.y, r0.y
+mad.f32 r2.x, c2.y, r2.z, r0.x
+mad.f32 r0.x, c3.x, r2.w, r0.y
+mad.f32 r0.y, c3.y, r2.w, r2.x
end
nop
nop
nop
-; VERT: outputs: r3.x (0:0) r2.x (5:9)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=3,il=12,b=0)
-; VERT: 152 instructions, 0 half, 4 full
-; pos: r3.x
+; VERT: outputs: r0.x (0:0) r1.x (5:9)
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0)
+; VERT: 110 instructions, 0 half, 5 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm
index 7fd0d8e..85a697c 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-97.asm
@@ -2,48 +2,38 @@
; FRAG: new compiler
@in(r0.z) in2
@in(r0.w) in3
-@out(r1.x) out0
-@out(r1.y) out1
-@out(r1.z) out2
-@out(r1.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r1.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z
add.f r0.y, c5.y, (neg)r0.x
add.f r0.z, c5.y, (neg)r0.x
add.f r0.w, c5.y, (neg)r0.x
-mul.f r2.x, r0.x, c0.z
+mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r2.y, r0.x, c0.y
-add.f r0.y, r2.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r1.z, r0.y
-add.f r0.y, r2.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
-; FRAG: outputs: r1.x (1:0)
+nop
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
-; pos (bary): r1.x
-; color: r1.x
-; fragcoord: r0.x
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm
index 7fd0d8e..85a697c 100644
--- a/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm
+++ b/reference/0ad-cycladic-archipelago/0ad-cycladic-archipelago-99.asm
@@ -2,48 +2,38 @@
; FRAG: new compiler
@in(r0.z) in2
@in(r0.w) in3
-@out(r1.x) out0
-@out(r1.y) out1
-@out(r1.z) out2
-@out(r1.w) out3
+@out(r2.x) out0
+@out(r2.y) out1
+@out(r2.z) out2
+@out(r2.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x3fb8aa65
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)add.f r0.x, r0.w, c4.y
-mov.f32f32 r0.y, c0.w
-absneg.f r0.w, (neg)c2.x
+absneg.f r0.y, (neg)c2.x
+mov.f32f32 r2.w, c0.w
add.f r0.z, r0.z, c4.y
(rpt2)nop
rcp r0.x, r0.x
(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r1.w, r0.y
-mul.f r0.y, r0.w, c2.x
-nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r0.y, c2.x
(rpt2)nop
mul.f r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.x, r0.x
(rpt2)nop
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-add.f r0.y, c5.y, (neg)r0.x
-mul.f r0.x, r0.x, c4.z
+(ss)mov.f32f32 r0.y, r0.x
+(ss)mul.f r0.x, r0.x, c4.z
(rpt1)nop
+add.f r0.y, c5.y, (neg)r0.y
+(rpt2)nop
mul.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r0.x, r0.x, r0.y
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c4.y
(rpt2)nop
min.f r0.x, r0.x, c4.z
@@ -51,25 +41,22 @@ min.f r0.x, r0.x, c4.z
add.f r0.y, c5.y, (neg)r0.x
add.f r0.z, c5.y, (neg)r0.x
add.f r0.w, c5.y, (neg)r0.x
-mul.f r2.x, r0.x, c0.z
+mul.f r1.x, r0.x, c0.z
mul.f r0.y, r0.y, c1.z
mul.f r0.z, r0.z, c1.y
mul.f r0.w, r0.w, c1.x
-mul.f r2.y, r0.x, c0.y
-add.f r0.y, r2.x, r0.y
+nop
+add.f r2.z, r1.x, r0.y
+mul.f r0.y, r0.x, c0.y
mul.f r0.x, r0.x, c0.x
(rpt1)nop
-mov.f32f32 r1.z, r0.y
-add.f r0.y, r2.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+add.f r2.y, r0.y, r0.z
+add.f r2.x, r0.x, r0.w
end
-; FRAG: outputs: r1.x (1:0)
+nop
+nop
+nop
+; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0)
-; FRAG: 96 instructions, 0 half, 3 full
-; pos (bary): r1.x
-; color: r1.x
-; fragcoord: r0.x
+; FRAG: 70 instructions, 0 half, 3 full
diff --git a/reference/0ad-frag-1.asm b/reference/0ad-frag-1.asm
index 12fbb01..be30c1a 100644
--- a/reference/0ad-frag-1.asm
+++ b/reference/0ad-frag-1.asm
@@ -8,203 +8,139 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3d4ccccd, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x3f800000, 0x40000000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 10, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 8, r1.x
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 11, r1.x
add.f r2.y, r0.w, c10.x
-mov.f32f32 r1.z, r1.z
-floor.f r2.z, r1.w
+bary.f r3.y, 8, r1.x
+floor.f r2.z, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
floor.f r2.w, r2.y
-mov.f32f32 r3.y, r1.z
-add.f r1.z, r1.w, (neg)r2.z
+bary.f r3.z, 9, r1.x
+add.f r3.w, r2.x, (neg)r2.z
(ss)mul.f r0.y, r0.z, r0.y
-add.f r0.z, r2.y, (neg)r2.w
-bary.f r1.w, 11, r1.x
-mov.f32f32 r1.z, r1.z
+absneg.f r0.z, (neg)c7.x
+add.f r4.x, r2.y, (neg)r2.w
+mov.f32f32 r4.y, r3.w
+sam (f32)(xyzw)r2.x, r1.z, s#0, t#0
+(ss)add.f r1.z, r3.w, c10.y
+mul.f r0.z, r0.z, c7.x
+mov.f32f32 r1.w, r4.x
+mul.f r3.w, c9.x, r4.y
+add.f r4.y, c10.z, (neg)r4.y
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r2.y, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-mov.f32f32 r1.w, r1.w
-add.f r2.w, c10.z, (neg)r1.z
-add.f r3.w, c10.z, (neg)r0.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.y, r2.y, c7.x
-mul.f r4.x, c9.x, r0.z
-mov.f32f32 r3.z, r1.w
-add.f r0.x, r0.x, (neg)r2.z
-mul.f r1.w, r2.y, r0.y
-mov.f32f32 r2.y, r4.x
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r1.w
-add.f r0.w, r0.w, (neg)r2.y
-sam (f32)(xyzw)r4.x, r3.y, s#0, t#0
-(sy)cmps.f.lt r2.y, r4.w, c9.z
-add.f r2.w, c10.x, r0.x
-mul.f r0.y, r1.w, r0.y
+add.f r0.x, r0.x, (neg)r3.w
+mul.f r3.w, c9.x, r1.w
+mov.f32f32 r4.z, r4.y
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r3.w
add.f r0.x, c10.y, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-add.f r2.w, c10.y, r0.w
-mul.f r1.w, r1.w, c3.z
mul.f r0.y, r0.y, c11.x
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r2.w, r2.w
-(ss)mov.f32f32 r3.y, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, r3.y
-add.f r0.w, c10.x, r0.w
-mov.f32f32 r3.y, r3.z
-mov.f32f32 r5.w, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r0.w, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.z, r3.y
-mul.f r1.w, r2.w, c3.w
-mul.f r0.w, r0.w, c3.w
-add.f r2.w, c12.y, (neg)r0.y
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r0.x, r1.w
-mov.f32f32 r3.y, r0.w
-mul.f r2.w, r2.w, c7.y
-mul.f r0.y, r0.y, c10.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.y, r3.y
-bary.f r3.y, 6, r1.x
-add.f r0.y, r0.y, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-add.f r2.w, r3.y, c9.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.w, r1.w
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r3.y, r2.w
-mov.f32f32 r5.z, r0.w
-max.f r0.y, r0.y, c9.y
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r6.y, r3.y
-mov.f32f32 r7.z, r0.x
-mov.f32f32 r0.x, r2.w
-cov.u32f32 r0.w, r2.y
-sam.s (f32)(x)r7.w, r5.x, s#2, t#2
-(sy)mov.f32f32 r1.w, r7.w
-min.f r0.y, r0.y, c10.y
-sam.s (f32)(x)r7.w, r6.z, s#2, t#2
-(sy)mov.f32f32 r2.y, r7.w
-(ss)nop
-sam.s (f32)(x)r5.x, r5.w, s#2, t#2
-(sy)mov.f32f32 r2.w, r5.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.y, r3.w
-add.f r3.z, c12.y, (neg)r0.y
-add.f r3.w, c12.y, (neg)r0.y
-add.f r5.x, c12.y, (neg)r0.y
-mul.f r5.y, r2.z, r3.y
-mul.f r3.z, r3.z, c6.z
-mul.f r3.w, r3.w, c6.y
-mul.f r5.x, r5.x, c6.x
-mul.f r1.w, r5.y, r1.w
-add.f r1.z, r1.z, c10.y
-mov.f32f32 r7.w, r0.x
-mov.f32f32 r0.x, (0.000000)
-add.f r0.z, r0.z, c10.y
-mul.f r3.y, r1.z, r3.y
-mov.f32f32 r2.x, r2.x
-cmps.f.ne p0.x, r0.w, r0.x
-mul.f r0.x, r2.z, r0.z
-mad.f32 r0.w, r3.y, r2.w, r1.w
-sam.s (f32)(x)r5.y, r7.y, s#2, t#2
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r3.w, r0.w
+mul.f r4.w, r0.x, c3.z
+add.f r0.x, c10.y, r0.w
+mul.f r5.z, r0.z, c3.z
+add.f r0.z, c10.x, r3.w
+mov.f32f32 r6.y, r4.w
+mul.f r5.x, r0.x, c3.w
+mov.f32f32 r7.x, r5.z
+mul.f r6.z, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+bary.f r0.z, 6, r1.x
+mov.f32f32 r5.w, r5.x
+mov.f32f32 r7.y, r6.z
+add.f r0.y, c12.y, (neg)r0.y
+add.f r5.y, r0.z, c9.w
+add.f r0.z, c10.z, (neg)r1.w
+mul.f r0.x, r0.x, c10.y
+add.f r0.w, r4.x, c10.y
+mov.f32f32 r7.z, r5.y
+mul.f r0.y, r0.y, c7.y
+mov.f32f32 r6.w, r5.y
+mov.f32f32 r6.x, r5.y
(rpt1)nop
-(sy)mov.f32f32 r1.w, r5.y
-mov.f32f32 r0.w, r0.w
-bary.f r2.z, 0, r1.x
-mov.f32f32 r5.y, r2.x
-mad.f32 r0.x, r0.x, r1.w, r0.w
-bary.f r0.w, 1, r1.x
-bary.f r1.w, 9, r1.x
+add.f r0.x, r0.x, r0.y
+sam.s (f32)(x)r7.x, r7.x, s#2, t#2
+mov.f32f32 r0.y, r0.z
+nop
+(ss)nop
+sam.s (f32)(x)r7.y, r6.y, s#2, t#2
+sam.s (f32)(x)r7.z, r4.w, s#2, t#2
mul.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-bary.f (ei)r1.x, 2, r1.x
-mov.f32f32 r1.y, r1.w
-mad.f32 r0.x, r0.z, r2.y, r0.x
+mul.f r1.w, r4.y, r0.w
+mul.f r0.y, r4.z, r0.y
+max.f r0.x, r0.x, c9.y
+sam.s (f32)(x)r3.w, r5.z, s#2, t#2
+(sy)cmps.f.lt r4.x, r2.w, c9.z
+mul.f r0.w, r1.z, r0.w
+mul.f r0.y, r0.y, r7.x
+min.f r0.x, r0.x, c10.y
+mad.f32 r0.y, r0.z, r7.y, r0.y
+cov.u32f32 r0.z, r4.x
+mad.f32 r0.y, r1.w, r3.w, r0.y
+add.f r1.z, c12.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r7.z, r0.y
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.w, c12.y, (neg)r0.x
+mov.f32f32 r3.w, (0.000000)
+mul.f r0.y, c10.w, r0.y
+bary.f r4.x, 0, r1.x
+mul.f r1.z, r1.z, c6.z
+mul.f r0.w, r0.w, c6.y
+mov.f32f32 r4.y, r0.y
+bary.f r4.z, 2, r1.x
+bary.f (ei)r1.x, 1, r1.x
+mul.f r1.y, r2.x, r4.x
+mul.f r1.w, r1.w, c6.x
+mul.f r4.x, r2.z, r4.z
+mul.f r1.x, r2.y, r1.x
+mul.f r0.y, r1.y, r0.y
+cmps.f.ne p0.x, r0.z, r3.w
+mul.f r0.z, r4.x, r4.y
+mul.f r1.x, r1.x, r4.y
+mad.f32 r0.z, c5.z, r2.z, r0.z
+mad.f32 r1.x, c5.y, r2.y, r1.x
+mad.f32 r0.y, c5.x, r2.x, r0.y
+sam (f32)(w)r3.y, r3.y, s#1, t#1
+(sy)cmps.f.lt r1.y, r4.x, c11.y
+mul.f r0.z, r0.x, r0.z
+mul.f r1.x, r0.x, r1.x
+mul.f r0.x, r0.x, r0.y
kill p0.x
-mov.f32f32 r0.z, r4.w
-mov.f32f32 r5.z, r1.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r4.z, r1.x
-mul.f r0.w, r4.y, r0.w
-mov.f32f32 r2.w, r0.z
-mul.f r0.x, c10.w, r0.x
-mul.f r0.z, r4.x, r2.z
-sam (f32)(w)r1.y, r5.y, s#1, t#1
-nop
-(sy)cmps.f.lt r1.y, r2.x, c11.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r1.w, c9.y
-nop
-mul.f r1.x, r1.x, r0.x
-mul.f r0.w, r0.w, r0.x
-mul.f r0.x, r0.z, r0.x
+add.f r0.y, r0.z, r1.z
cov.u32f32 r0.z, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.z, r4.z, r1.x
-mad.f32 r0.w, c5.y, r4.y, r0.w
-mov.f32f32 r0.x, r0.x
+add.f r0.w, r1.x, r0.w
+add.f r0.x, r0.x, r1.w
+mov.f32f32 r1.x, c9.y
cmps.f.ne r0.z, r0.z, c9.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c5.x, r4.x, r0.x
-mov.f32f32 r1.y, r1.z
-mul.f r1.x, r0.y, r1.x
-mul.f r0.w, r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.z, r1.w, r0.z, r1.y
-add.f r1.x, r1.x, r3.z
-add.f r0.w, r0.w, r3.w
-mul.f r0.x, r0.y, r0.x
-nop
-mul.f r0.y, r1.x, r0.z
+(rpt2)nop
+sel.b32 r0.z, r1.x, r0.z, r4.x
+(rpt2)nop
+mul.f r0.y, r0.y, r0.z
mul.f r0.w, r0.w, r0.z
-add.f r0.x, r0.x, r5.x
-nop
-mul.f r0.y, r0.y, c4.z
-mul.f r0.w, r0.w, c4.y
+(rpt1)nop
+mul.f r2.z, r0.y, c4.z
+mul.f r2.y, r0.w, c4.y
mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r2.x, r0.x
+mul.f r2.x, r0.x, c4.x
end
nop
nop
nop
; FRAG: outputs: r2.x (1:0)
; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.x (5:11,cm=f,il=16,b=1)
-; FRAG: 196 instructions, 0 half, 8 full
+; FRAG: 133 instructions, 0 half, 8 full
diff --git a/reference/0ad-frag-2.asm b/reference/0ad-frag-2.asm
index 4377ecf..227a081 100644
--- a/reference/0ad-frag-2.asm
+++ b/reference/0ad-frag-2.asm
@@ -8,199 +8,135 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x3f800000, 0xbb449ba6
+@const(c10.x) 0xbf000000, 0x40000000, 0x3f800000, 0x3de38866
+@const(c11.x) 0x3fb8aa65, 0x3cf5c28f, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.x, 4, r1.x
add.f r0.y, r0.w, c9.y
bary.f r0.w, 5, r1.x
bary.f r1.z, 8, r1.x
-add.f r1.w, r0.x, c10.x
-bary.f r2.x, 10, r1.x
-bary.f r2.y, 6, r1.x
-add.f r2.z, r0.w, c10.x
-floor.f r2.w, r1.w
+add.f r2.x, r0.x, c10.x
+bary.f r1.w, 9, r1.x
+add.f r2.y, r0.w, c10.x
+bary.f r2.z, 6, r1.x
+floor.f r2.w, r2.x
rcp r0.y, r0.y
add.f r0.z, r0.z, c9.y
-mov.f32f32 r1.z, r1.z
-floor.f r3.x, r2.z
-add.f r1.w, r1.w, (neg)r2.w
+floor.f r3.x, r2.y
+add.f r3.w, r2.z, c9.w
+add.f r2.x, r2.x, (neg)r2.w
(ss)mul.f r0.y, r0.z, r0.y
-mov.f32f32 r3.y, r1.z
-add.f r0.z, r2.z, (neg)r3.x
-mov.f32f32 r1.z, r1.w
+absneg.f r0.z, (neg)c7.x
+add.f r2.y, r2.y, (neg)r3.x
+mov.f32f32 r2.z, r2.x
+add.f r2.x, r2.x, c9.z
+mul.f r0.z, r0.z, c7.x
+sam (f32)(w)r4.x, r1.z, s#1, t#1
+(ss)mov.f32f32 r1.z, r2.y
+mul.f r1.w, c9.x, r2.z
+add.f r2.z, c10.y, (neg)r2.z
+mul.f r0.z, r0.z, r0.y
mov.f32f32 r0.y, r0.y
-absneg.f r1.w, (neg)c7.x
-mov.f32f32 r0.z, r0.z
-mul.f r2.z, c9.x, r1.z
-add.f r2.w, c10.y, (neg)r1.z
-mul.f r1.w, r1.w, c7.x
-bary.f r3.x, 9, r1.x
-mov.f32f32 r2.z, r2.z
-mul.f r3.z, c9.x, r0.z
-mul.f r1.w, r1.w, r0.y
-mov.f32f32 r2.w, r2.w
-add.f r0.x, r0.x, (neg)r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r1.w, r1.w
-add.f r3.z, c10.y, (neg)r0.z
-mov.f32f32 r0.x, r0.x
-add.f r0.w, r0.w, (neg)r2.z
-mul.f r0.y, r1.w, r0.y
-mov.f32f32 r1.w, r3.z
-add.f r2.z, c10.x, r0.x
+add.f r0.x, r0.x, (neg)r1.w
+mul.f r1.w, c9.x, r1.z
+mov.f32f32 r2.w, r2.z
+mul.f r0.y, r0.z, r0.y
+mov.f32f32 r0.z, r0.x
+add.f r0.w, r0.w, (neg)r1.w
add.f r0.x, c10.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.x, r0.x
mul.f r0.y, r0.y, c11.x
-add.f r3.z, c10.z, r0.w
-mul.f r2.z, r2.z, c3.z
-mul.f r0.x, r0.x, c3.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r2.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-add.f r0.w, c10.x, r0.w
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r0.x, r0.w
-add.f r0.w, c12.y, (neg)r0.y
-mov.f32f32 r5.w, r3.w
-mul.f r3.z, r3.z, c3.w
-mul.f r0.x, r0.x, c3.w
-mul.f r0.w, r0.w, c7.y
-mul.f r0.y, r0.y, c9.z
-mov.f32f32 r3.w, r3.z
-mov.f32f32 r4.x, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.z, r2.z
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r4.z, r4.x
-add.f r2.y, r2.y, c9.w
-add.f r0.y, r0.y, r0.w
-mov.f32f32 r0.w, r3.w
-mov.f32f32 r5.y, r0.x
-mov.f32f32 r0.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r4.w, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r2.y
-mov.f32f32 r6.w, r2.z
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r0.w, r2.y
-mov.f32f32 r5.z, r0.y
-sam.s (f32)(x)r3.z, r4.y, s#2, t#2
-(sy)mov.f32f32 r0.y, r3.z
+add.f r0.z, c10.x, r0.z
+mov.f32f32 r1.w, r0.w
+mul.f r3.y, r0.x, c3.z
+add.f r0.x, c10.z, r0.w
+mul.f r4.x, r0.z, c3.z
+add.f r0.z, c10.x, r1.w
+mov.f32f32 r5.x, r3.y
+mul.f r3.z, r0.x, c3.w
+mov.f32f32 r5.w, r4.x
+mul.f r5.y, r0.z, c3.w
+exp2 r0.x, r0.y
+(ss)mov.f32f32 r0.y, r0.x
+mov.f32f32 r5.z, r3.w
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r6.x, r5.y
+mov.f32f32 r6.y, r3.w
+add.f r0.y, c12.y, (neg)r0.y
+mov.f32f32 r4.z, r3.w
+sam.s (f32)(x)r3.x, r3.y, s#2, t#2
+add.f r0.z, c10.y, (neg)r1.z
+sam.s (f32)(x)r6.z, r5.x, s#2, t#2
+mul.f r0.x, r0.x, c9.z
+add.f r0.w, r2.y, c9.z
+mul.f r0.y, r0.y, c7.y
+(ss)nop
+sam.s (f32)(x)r5.x, r5.w, s#2, t#2
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r2.x, r0.z
+mul.f r1.w, r2.z, r0.w
+add.f r0.x, r0.x, r0.y
+mul.f r0.y, r2.w, r1.z
+sam.s (f32)(x)r3.y, r4.x, s#2, t#2
+mul.f r0.w, r2.x, r0.w
+(sy)cmps.f.lt r1.z, r4.w, c11.y
+bary.f r2.x, 10, r1.x
+mul.f r0.y, r0.y, r5.x
max.f r0.x, r0.x, c9.y
-mov.f32f32 r7.x, r0.w
-mul.f r0.w, r2.w, r1.w
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r0.z, r6.z, r0.y
+cov.u32f32 r0.z, r1.z
+mad.f32 r0.y, r1.w, r3.y, r0.y
min.f r0.x, r0.x, c9.z
-sam.s (f32)(x)r3.z, r5.w, s#2, t#2
-nop
-(sy)mov.f32f32 r2.y, r3.z
-mul.f r0.y, r0.w, r0.y
-sam.s (f32)(x)r3.z, r5.x, s#2, t#2
-(sy)mov.f32f32 r0.w, r3.z
-add.f r1.z, r1.z, c9.z
-add.f r2.z, c12.y, (neg)r0.x
-add.f r3.z, c12.y, (neg)r0.x
-add.f r3.w, c12.y, (neg)r0.x
-mul.f r1.w, r1.z, r1.w
-mul.f r2.z, r2.z, c6.z
-mul.f r4.x, r3.z, c6.y
-mul.f r3.w, r3.w, c6.x
-mad.f32 r0.y, r1.w, r0.w, r0.y
-(ss)nop
-sam.s (f32)(x)r4.y, r6.z, s#2, t#2
-add.f r0.z, r0.z, c9.z
-(sy)mov.f32f32 r0.w, r4.y
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r0.y, r0.y
-mul.f r2.w, r2.w, r0.z
-mul.f r0.z, r1.z, r0.z
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r1.z, r2.x
-mad.f32 r0.y, r2.w, r0.w, r0.y
-bary.f r0.w, 11, r1.x
-mov.f32f32 r1.w, c9.z
-bary.f r2.x, 2, r1.x
-mov.f32f32 r0.y, r0.y
-sam (f32)(w)r2.w, r3.y, s#1, t#1
-(sy)cmps.f.lt r2.w, r3.z, c11.y
-mad.f32 r0.y, r0.z, r2.y, r0.y
-mov.f32f32 r0.z, r3.z
-mov.f32f32 r3.x, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-cov.u32f32 r1.z, r2.w
-mov.f32f32 r0.z, r0.z
-(ss)mov.f32f32 r3.y, r0.w
+mad.f32 r0.y, r0.w, r3.x, r0.y
+cmps.f.ne r0.z, r0.z, c9.y
+(rpt1)nop
mul.f r0.y, c10.w, r0.y
-cmps.f.ne r0.w, r1.z, c9.y
+bary.f r2.y, 11, r1.x
+add.f r0.w, c12.y, (neg)r0.x
+add.f r1.z, c12.y, (neg)r0.x
+mov.f32f32 r1.w, r0.y
+add.f r2.z, c12.y, (neg)r0.x
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, c9.y
-sam (f32)(xyz)r4.y, r3.x, s#0, t#0
-(sy)mul.f r2.x, r4.w, r2.x
+sam (f32)(xyz)r2.w, r2.x, s#0, t#0
+(ss)bary.f r2.x, 2, r1.x
bary.f r2.y, 1, r1.x
bary.f (ei)r1.x, 0, r1.x
-sel.b32 r0.z, r1.z, r0.w, r0.z
-mul.f r0.w, r2.x, r0.y
-mul.f r1.y, r4.z, r2.y
-mul.f r1.x, r4.y, r1.x
-mov.f32f32 r2.w, r1.w
-mov.f32f32 r0.w, r0.w
-mul.f r1.y, r1.y, r0.y
-mad.f32 r0.w, c5.z, r4.w, r0.w
+mul.f r0.w, r0.w, c6.z
+(sy)mul.f r1.y, r3.y, r2.x
+mul.f r2.x, r3.x, r2.y
+mul.f r1.x, r2.w, r1.x
+mul.f r1.z, r1.z, c6.y
+mul.f r1.y, r1.y, r1.w
+mul.f r1.w, r2.x, r1.w
+mad.f32 r1.y, c5.z, r3.y, r1.y
+mad.f32 r1.w, c5.y, r3.x, r1.w
mul.f r0.y, r1.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, c5.y, r4.z, r1.x
-mul.f r0.w, r0.x, r0.w
-mad.f32 r0.y, c5.x, r4.y, r0.y
-(rpt1)nop
-add.f r0.w, r0.w, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r0.w, r0.w, r0.z
-mul.f r1.x, r0.x, r1.x
+mul.f r1.x, r2.z, c6.x
+mul.f r1.y, r0.x, r1.y
+mul.f r1.w, r0.x, r1.w
+mad.f32 r0.y, c5.x, r2.w, r0.y
+mov.f32f32 r2.x, c9.y
+add.f r0.w, r1.y, r0.w
+add.f r1.y, r1.w, r1.z
mul.f r0.x, r0.x, r0.y
-nop
-mul.f r0.y, r0.w, c4.z
-add.f r0.w, r1.x, r4.x
-add.f r0.x, r0.x, r3.w
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r0.z
-mul.f r0.x, r0.x, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, r0.w, c4.y
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
+sel.b32 r0.y, r2.x, r0.z, r4.w
+mov.f32f32 r2.w, c9.z
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
+mul.f r0.z, r0.w, r0.y
+mul.f r0.w, r1.y, r0.y
(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
+mul.f r2.z, r0.z, c4.z
+mul.f r2.y, r0.w, c4.y
+add.f r0.x, r0.x, r1.x
+(rpt2)nop
+mul.f r0.x, r0.x, r0.y
+(rpt2)nop
+mul.f r2.x, r0.x, c4.x
end
nop
nop
+nop
; FRAG: outputs: r2.x (1:0)
-; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r0.x (5:11,cm=f,il=16,b=1)
-; FRAG: 195 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (5:9,cm=f,il=8,b=1) r2.x (5:10,cm=f,il=12,b=1) r1.z (5:11,cm=f,il=16,b=1)
+; FRAG: 129 instructions, 0 half, 7 full
diff --git a/reference/0ad-frag.asm b/reference/0ad-frag.asm
index 26fc842..58b406d 100644
--- a/reference/0ad-frag.asm
+++ b/reference/0ad-frag.asm
@@ -6,131 +6,94 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c5.x) 0x3a03126f, 0x3f000000, 0x3f800000, 0x40000000
+@const(c6.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x00000000
+@const(c7.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 8, r0.x
bary.f r0.w, 9, r0.x
bary.f r1.x, 10, r0.x
mov.f32f32 r1.y, c5.x
add.f r1.z, r0.z, (neg)c5.y
add.f r1.w, r0.w, (neg)c5.y
-bary.f r2.x, 4, r0.x
-bary.f r2.y, 12, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 4, r0.x
floor.f r2.z, r1.z
floor.f r2.w, r1.w
-add.f r1.x, r1.x, (neg)r1.y
-mov.f32f32 r3.x, r2.x
+add.f r3.z, r1.x, (neg)r1.y
+mul.f r1.x, r2.x, c5.w
add.f r1.y, r1.z, (neg)r2.z
add.f r1.z, r1.w, (neg)r2.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.w, 5, r0.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r1.x
-mov.f32f32 r4.z, r1.x
+mov.f32f32 r4.y, r3.z
+bary.f r2.z, 5, r0.x
mad.f32 r0.z, (neg)c5.y, r1.y, r0.z
mad.f32 r0.w, (neg)c5.y, r1.z, r0.w
-add.f r2.x, c5.w, (neg)r1.y
-add.f r1.y, r1.y, c5.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-add.f r2.z, c5.w, (neg)r1.z
-add.f r1.z, r1.z, c5.z
-add.f r2.w, c6.y, r0.z
+mov.f32f32 r1.y, r1.y
+mov.f32f32 r1.z, r1.z
+mov.f32f32 r1.w, r0.z
+mov.f32f32 r2.x, r0.w
add.f r0.z, c6.x, r0.z
-add.f r3.y, c6.y, r0.w
add.f r0.w, c6.x, r0.w
-mul.f r2.w, r2.w, c2.z
-mul.f r0.z, r0.z, c2.z
-mul.f r3.z, r3.y, c2.w
-mul.f r0.w, r0.w, c2.w
-mov.f32f32 r3.y, r2.w
-mov.f32f32 r4.x, r0.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.y, r3.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r5.z, r0.z
-mov.f32f32 r5.w, r0.w
-mov.f32f32 r5.x, r0.w
-mov.f32f32 r5.y, r1.x
-mov.f32f32 r6.x, r1.x
-sam.s (f32)(x)r6.y, r3.y, s#2, t#2
-mul.f r0.z, r2.x, r2.z
-(ss)nop
-sam.s (f32)(y)r3.y, r4.x, s#2, t#2
-mul.f r0.w, r1.y, r2.z
-mul.f r1.x, r2.x, r1.z
-mul.f r1.y, r1.y, r1.z
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam.s (f32)(z)r3.w, r4.w, s#2, t#2
+add.f r1.w, c6.y, r1.w
+add.f r2.x, c6.y, r2.x
+mul.f r3.x, r0.z, c2.z
+mul.f r3.y, r0.w, c2.w
+mul.f r3.w, r1.w, c2.z
+mul.f r4.w, r2.x, c2.w
+mov.f32f32 r4.z, r3.x
+mov.f32f32 r5.x, r4.y
+mov.f32f32 r5.y, r3.w
+mov.f32f32 r5.z, r4.w
+mov.f32f32 r5.w, r4.y
+mov.f32f32 r4.x, r3.y
+sam.s (f32)(w)r2.w, r3.x, s#2, t#2
+nop
+add.f r0.z, c5.w, (neg)r1.y
+sam.s (f32)(y)r6.x, r4.z, s#2, t#2
(rpt1)nop
+add.f r0.w, c5.w, (neg)r1.z
(ss)nop
-sam.s (f32)(w)r4.z, r5.z, s#2, t#2
-mov.f32f32 r0.w, r0.w
-(sy)mul.f r0.z, r6.y, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.z, r3.z, r0.w, r0.z
-mov.f32f32 r3.y, r1.w
-mov.f32f32 r2.x, r2.y
-bary.f r0.w, 13, r0.x
-mov.f32f32 r0.z, r0.z
-bary.f r1.z, 2, r0.x
-mad.f32 r0.z, r4.y, r1.x, r0.z
-mov.f32f32 r2.y, r0.w
-sam (f32)(xyzw)r2.z, r3.x, s#0, t#0
-nop
-mul.f r0.w, r1.z, c5.w
-mov.f32f32 r0.z, r0.z
-bary.f r1.x, 1, r0.x
-mad.f32 r0.z, r5.y, r1.y, r0.z
+sam.s (f32)(x)r4.z, r5.y, s#2, t#2
+add.f r1.y, r1.y, c5.z
+add.f r1.z, r1.z, c5.z
+(ss)nop
+sam.s (f32)(z)r4.w, r3.w, s#2, t#2
+sam (f32)(xyzw)r2.x, r2.y, s#0, t#0
+bary.f r1.w, 1, r0.x
+mul.f r2.w, r0.z, r0.w
+mul.f r0.w, r1.y, r0.w
+mul.f r0.z, r0.z, r1.z
+mul.f r1.y, r1.y, r1.z
+(sy)mul.f r1.z, r4.z, r2.w
+mul.f r2.w, r1.w, c5.w
+mad.f32 r0.w, r6.y, r0.w, r1.z
+bary.f r3.x, 12, r0.x
+mad.f32 r0.z, r5.y, r0.z, r0.w
+bary.f r3.y, 13, r0.x
+mad.f32 r0.z, r3.z, r1.y, r0.z
bary.f (ei)r0.x, 0, r0.x
mov.f32f32 r1.w, (0.000000)
nop
-mov.f32f32 r0.y, r0.z
-sam (f32)(w)r3.y, r2.x, s#3, t#3
-(rpt1)nop
-mul.f r0.z, r1.x, c5.w
-mul.f r0.y, r0.y, c6.z
+mul.f r0.y, r0.z, c6.z
mul.f r0.x, r0.x, c5.w
+(ss)nop
+sam (f32)(w)r3.x, r3.x, s#3, t#3
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mad.f32 r0.w, r0.w, r0.y, c4.z
-mad.f32 r0.z, r0.z, r0.y, c4.y
+mov.f32f32 r0.z, r0.y
mad.f32 r0.x, r0.x, r0.y, c4.x
+(rpt1)nop
+mad.f32 r0.y, r1.x, r0.z, c4.z
+mad.f32 r0.z, r2.w, r0.z, c4.y
+mul.f r0.x, r2.x, r0.x
nop
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-(sy)mul.f r0.y, r3.x, r0.y
-mul.f r0.z, r2.w, r0.z
-mul.f r0.x, r2.z, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.y, r0.y, r4.x
-mul.f r0.z, r0.z, r4.x
-mul.f r0.x, r0.x, r4.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mul.f r0.y, r2.z, r0.y
+mul.f r0.z, r2.y, r0.z
+(rpt1)nop
+(sy)mul.f r1.z, r0.y, r3.w
+mul.f r1.y, r0.z, r3.w
+mul.f r1.x, r0.x, r3.w
end
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r2.x (5:2,cm=f,il=16,b=1) r3.x (5:3,cm=f,il=20,b=1)
-; FRAG: 128 instructions, 0 half, 7 full
+; FRAG: 87 instructions, 0 half, 7 full
diff --git a/reference/2color-after.asm b/reference/2color-after.asm
index f44d151..e58817e 100644
--- a/reference/2color-after.asm
+++ b/reference/2color-after.asm
@@ -7,35 +7,32 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.s r0.z, hr0.x, 2
bary.f r0.w, 3, r0.x
bary.f r1.x, 6, r0.x
bary.f r1.y, 7, r0.x
add.s r0.z, r0.z, 1
bary.f r1.z, 2, r0.x
-bary.f r1.w, 5, r0.x
-bary.f r2.x, 1, r0.x
+bary.f r2.x, 5, r0.x
+bary.f r2.y, 1, r0.x
cov.s32f32 r0.z, r0.z
-bary.f r2.y, 4, r0.x
+bary.f r2.z, 4, r0.x
bary.f (ei)r0.x, 0, r0.x
nop
cmps.f.lt r0.y, r0.z, c0.x
-cmps.f.lt r2.z, r0.z, c0.x
cmps.f.lt r2.w, r0.z, c0.x
+cmps.f.lt r3.x, r0.z, c0.x
cmps.f.lt r0.z, r0.z, c0.x
-sel.b32 r0.y, r0.w, r0.y, r1.y
-sel.b32 r0.w, r1.z, r2.z, r1.x
-sel.b32 r1.x, r2.x, r2.w, r1.w
-sel.b32 r0.x, r0.x, r0.z, r2.y
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
+sel.b32 r1.w, r0.w, r0.y, r1.y
+sel.b32 r1.z, r1.z, r2.w, r1.x
+sel.b32 r1.y, r2.y, r3.x, r2.x
+sel.b32 r1.x, r0.x, r0.z, r2.z
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.z (2:0,cm=f,il=12,b=1) r63.y (7:0,cm=f,il=16,b=0)
-; FRAG: 25 instructions, 1 half, 3 full
+; FRAG: 21 instructions, 1 half, 4 full
diff --git a/reference/ChameleonMan-vert.asm b/reference/ChameleonMan-vert.asm
index 2a8bdc3..c8ce259 100644
--- a/reference/ChameleonMan-vert.asm
+++ b/reference/ChameleonMan-vert.asm
@@ -3,24 +3,24 @@
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@in(r0.w) in4
-@in(r1.x) in5
-@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in12
-@in(r2.z) in13
-@in(r2.w) in14
-@in(r3.x) in16
-@in(r3.y) in17
-@in(r3.z) in20
-@in(r3.w) in21
-@in(r4.x) in22
-@in(r4.y) in23
-@in(r4.z) in24
-@in(r4.w) in25
-@in(r5.x) in26
+@in(r1.z) in4
+@in(r1.w) in5
+@in(r2.x) in6
+@in(r2.y) in8
+@in(r2.z) in9
+@in(r2.w) in10
+@in(r3.x) in12
+@in(r3.y) in13
+@in(r3.z) in14
+@in(r1.x) in16
+@in(r1.y) in17
+@in(r3.w) in20
+@in(r4.x) in21
+@in(r4.y) in22
+@in(r4.z) in23
+@in(r4.w) in24
+@in(r5.x) in25
+@in(r5.y) in26
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -33,783 +33,510 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)trunc.f r4.z, r4.z
-mov.f32f32 r5.y, c63.w
+@const(c63.x) 0x00000000, 0x40800000, 0x40400000, 0x3f800000
+@const(c64.x) 0x40000000, 0x00000000, 0x00000000, 0x00000000
+@const(c65.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mov.f32f32 r0.w, c63.w
trunc.f r4.w, r4.w
+trunc.f r5.x, r5.x
mov.f32f32 r5.z, c63.x
-mul.f r5.w, r4.z, c63.z
-cmps.f.lt r5.y, r5.y, c5.x
-mul.f r4.z, r4.z, c63.y
-mov.f32f32 r6.x, r4.w
+cmps.f.lt r0.w, r0.w, c5.x
+mul.f r5.w, r4.w, c63.z
+mul.f r4.w, r4.w, c63.y
+mul.f r6.x, r5.x, c63.z
+cov.u32f32 r0.w, r0.w
cov.f32s16 hr0.x, r5.w
-cov.u32f32 r5.y, r5.y
-cov.f32s16 hr0.y, r4.z
-mul.f r6.x, r6.x, c63.z
+cov.f32s16 hr0.y, r4.w
+mov.f32f32 r6.y, r6.x
+cmps.f.ne r0.w, r0.w, c63.x
+trunc.f r5.y, r5.y
shl.b hr0.x, hr0.x, 2
-cmps.f.ne r5.y, r5.y, c63.x
-trunc.f r5.x, r5.x
shl.b hr0.y, hr0.y, 2
+cov.f32s16 hr0.z, r6.y
+sel.b32 r5.y, r5.y, r0.w, r5.x
mova a0.x, hr0.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r6.y, r4.w
-mov.f32f32 r6.z, r4.x
-mov.f32f32 r6.w, r3.w
-mov.f32f32 r5.x, r5.x
+mul.f r5.x, r5.x, c63.y
+shl.b hr0.x, hr0.z, 2
+mov.f32f32 r6.z, r5.y
+mul.f r5.y, r5.y, c63.y
+cov.f32s16 hr0.z, r5.x
+sel.b32 r6.w, r4.y, r0.w, r4.x
+mov.f32f32 r6.z, r6.z
+cov.f32s16 hr0.w, r5.y
mov.f32f32 r7.x, c<a0.x + 152>
-mov.f32f32 r4.w, r4.w
mov.f32f32 r7.y, c<a0.x + 153>
-(ul)mov.f32f32 r7.z, c<a0.x + 154>
-mov.f32f32 r7.w, r7.x
-mov.f32f32 r8.x, r7.x
-sel.b32 r4.w, r5.x, r5.y, r4.w
-mov.f32f32 r5.x, r7.y
-mul.f r7.w, r7.w, r2.y
-cov.f32s16 hr0.x, r5.w
-mul.f r8.x, r8.x, r1.z
-mov.f32f32 r4.w, r4.w
-mul.f r5.x, r5.x, r2.y
-shl.b hr0.x, hr0.x, 2
-mov.f32f32 r8.y, r7.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r8.z, r7.y
-mova a0.x, hr0.x
-mul.f r8.y, r8.y, r2.y
-mov.f32f32 r4.w, r4.w
-mul.f r8.z, r8.z, r1.z
-mov.f32f32 r8.w, r7.z
-mul.f r7.x, r7.x, r0.w
-mul.f r7.y, r7.y, r0.w
-mov.f32f32 r9.x, c<a0.x + 156>
-mul.f r9.y, r4.w, c63.z
-mul.f r4.w, r4.w, c63.y
-mov.f32f32 r9.z, c<a0.x + 157>
-mov.f32f32 r9.w, r9.x
-mov.f32f32 r10.x, r9.x
-cov.f32s16 hr0.x, r9.y
-cov.f32s16 hr0.z, r4.w
-mad.f32 r7.w, r9.w, r2.z, r7.w
-mad.f32 r8.x, r10.x, r1.w, r8.x
-shl.b hr0.x, hr0.x, 2
-shl.b hr0.z, hr0.z, 2
-mov.f32f32 r7.w, r7.w
-cov.f32s16 hr0.w, r5.w
-mov.f32f32 r5.w, r8.x
-mov.f32f32 r8.x, r9.z
-(ul)mov.f32f32 r9.w, c<a0.x + 158>
+mov.f32f32 r6.z, r6.z
shl.b hr0.w, hr0.w, 2
-mov.f32f32 r10.x, r9.z
-mul.f r8.w, r8.w, r1.z
-mov.f32f32 r10.y, r9.w
-mova a0.x, hr0.w
-mad.f32 r5.x, r8.x, r2.z, r5.x
-mov.f32f32 r8.x, r9.w
-mad.f32 r8.z, r10.x, r1.w, r8.z
-mad.f32 r8.w, r10.y, r1.w, r8.w
-mov.f32f32 r5.x, r5.x
-mad.f32 r8.x, r8.x, r2.z, r8.y
-mov.f32f32 r8.y, c<a0.x + 160>
-mov.f32f32 r10.x, c<a0.x + 161>
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r10.y, r8.y
-mov.f32f32 r10.z, r8.y
-mov.f32f32 r10.w, r10.x
-(ul)mov.f32f32 r11.x, c<a0.x + 162>
-mad.f32 r7.w, r10.y, r2.w, r7.w
-mad.f32 r5.w, r10.z, r2.x, r5.w
-mad.f32 r5.x, r10.w, r2.w, r5.x
-mov.f32f32 r10.y, r11.x
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r5.x, r5.x
-mad.f32 r8.x, r10.y, r2.w, r8.x
-mul.f r7.w, r7.w, r3.z
-mul.f r5.w, r5.w, r3.z
-mul.f r5.x, r5.x, r3.z
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r10.y, c62.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r5.x, r5.x
-mul.f r8.x, r8.x, r3.z
-cmps.f.ne r10.y, r10.y, c63.x
-mov.f32f32 r10.z, (0.000000)
-mov.f32f32 r10.w, (0.000000)
-mov.f32f32 r11.y, (0.000000)
-mov.f32f32 r8.x, r8.x
-sel.b32 r7.w, r7.w, r10.y, r10.z
-sel.b32 r5.w, r5.w, r10.y, r10.w
-sel.b32 r5.x, r5.x, r10.y, r11.y
-mov.f32f32 r10.z, (0.000000)
-mov.f32f32 r10.w, r7.w
-mov.f32f32 r11.y, r5.w
-mov.f32f32 r11.z, r5.x
-sel.b32 r8.x, r8.x, r10.y, r10.z
-mov.f32f32 r10.z, r10.w
-cov.f32s16 hr0.w, r6.x
-mov.f32f32 r10.w, r11.y
-mov.f32f32 r11.y, r11.z
-mov.f32f32 r11.z, r8.x
-shl.b hr0.w, hr0.w, 2
-mov.f32f32 r11.w, r10.x
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r12.x, r11.x
-mova a0.x, hr0.w
-mov.f32f32 r11.z, r11.z
-mad.f32 r8.z, r11.w, r2.x, r8.z
-mad.f32 r8.w, r12.x, r2.x, r8.w
-mad.f32 r7.x, r9.x, r1.x, r7.x
-mad.f32 r7.y, r9.z, r1.x, r7.y
-mul.f r7.z, r7.z, r0.w
+mul.f r7.z, r7.x, r3.x
+mul.f r7.w, r7.y, r3.x
+mul.f r6.z, r6.z, c63.z
+cov.f32s16 hr1.x, r5.w
+mul.f r8.x, r7.x, r2.y
+(ul)mov.f32f32 r8.y, c<a0.x + 154>
+cov.f32s16 hr1.y, r6.z
+cov.f32s16 hr1.z, r6.z
+cov.f32s16 hr1.w, r6.z
+shl.b hr1.x, hr1.x, 2
+shl.b hr1.y, hr1.y, 2
+shl.b hr1.z, hr1.z, 2
+shl.b hr1.w, hr1.w, 2
+mul.f r6.z, r8.y, r3.x
+mova a0.x, hr1.y
+mul.f r8.z, r7.y, r2.y
+mul.f r8.w, r8.y, r2.y
+mul.f r7.x, r7.x, r1.z
+mul.f r7.y, r7.y, r1.z
+mul.f r8.y, r8.y, r1.z
+cov.f32s16 hr1.y, r5.y
mov.f32f32 r9.x, c<a0.x + 152>
-mov.f32f32 r9.z, c<a0.x + 153>
-(ul)mov.f32f32 r11.w, c<a0.x + 154>
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r12.x, r9.x
-mov.f32f32 r12.y, r9.x
-mov.f32f32 r12.z, r9.z
-mov.f32f32 r12.w, r11.w
-mul.f r12.x, r12.x, r2.y
-cov.f32s16 hr0.w, r6.x
-mul.f r12.y, r12.y, r1.z
-mul.f r12.z, r12.z, r2.y
-mul.f r12.w, r12.w, r2.y
-shl.b hr0.w, hr0.w, 2
-mul.f r8.z, r8.z, r3.z
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r7.x, r7.x
-mova a0.x, hr0.w
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r13.x, (0.000000)
-mul.f r8.w, r8.w, r3.z
-mad.f32 r7.x, r8.y, r1.y, r7.x
-mov.f32f32 r7.y, r7.y
-mad.f32 r7.z, r9.w, r1.x, r7.z
-mov.f32f32 r8.y, c<a0.x + 156>
-mov.f32f32 r9.w, c<a0.x + 157>
-(ul)mov.f32f32 r13.y, c<a0.x + 158>
-sel.b32 r8.z, r8.z, r10.y, r13.x
-mov.f32f32 r13.x, r8.y
-mov.f32f32 r13.z, r8.y
-mov.f32f32 r13.w, r9.w
-mov.f32f32 r14.x, r13.y
-mad.f32 r12.x, r13.x, r2.z, r12.x
-mad.f32 r12.y, r13.z, r1.w, r12.y
-mad.f32 r12.z, r13.w, r2.z, r12.z
-mad.f32 r12.w, r14.x, r2.z, r12.w
-mov.f32f32 r12.x, r12.x
-cov.f32s16 hr0.w, r6.x
-mov.f32f32 r6.x, r12.y
-mov.f32f32 r12.y, r12.z
-mov.f32f32 r12.z, r12.w
-shl.b hr0.w, hr0.w, 2
-mov.f32f32 r12.w, r8.z
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r13.x, (0.000000)
-mova a0.x, hr0.w
-mov.f32f32 r12.w, r12.w
-mov.f32f32 r13.z, r9.z
-sel.b32 r8.w, r8.w, r10.y, r13.x
-mov.f32f32 r7.x, r7.x
-mad.f32 r7.y, r10.x, r1.y, r7.y
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r10.x, c<a0.x + 160>
-mov.f32f32 r10.y, c<a0.x + 161>
-(ul)mov.f32f32 r13.x, c<a0.x + 162>
-mul.f r13.z, r13.z, r1.z
-mov.f32f32 r13.w, r10.x
-mov.f32f32 r14.x, r10.x
-mov.f32f32 r14.y, r10.y
-mov.f32f32 r14.z, r13.x
-mad.f32 r12.x, r13.w, r2.w, r12.x
-mad.f32 r6.x, r14.x, r2.x, r6.x
-mad.f32 r12.y, r14.y, r2.w, r12.y
-mad.f32 r12.z, r14.z, r2.w, r12.z
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r12.y, r12.y
-mov.f32f32 r12.z, r12.z
-mad.f32 r10.z, r12.x, r3.w, r10.z
-mad.f32 r6.x, r6.x, r3.w, r10.w
-mad.f32 r10.w, r12.y, r3.w, r11.y
-mad.f32 r11.y, r12.z, r3.w, r11.z
-mov.f32f32 r10.z, r10.z
+mov.f32f32 r9.y, c<a0.x + 153>
+(ul)mov.f32f32 r9.z, c<a0.x + 154>
+shl.b hr1.y, hr1.y, 2
+mul.f r9.w, r9.x, r3.x
+mova a0.x, hr1.z
+mul.f r10.x, r9.x, r1.z
+mul.f r9.x, r9.x, r2.y
+mul.f r10.y, r9.y, r3.x
+mul.f r10.z, r9.z, r3.x
+mul.f r10.w, r9.y, r2.y
+mul.f r11.x, r9.z, r2.y
+mov.f32f32 r11.y, c<a0.x + 156>
+mov.f32f32 r11.z, c<a0.x + 157>
+(ul)mov.f32f32 r11.w, c<a0.x + 158>
+mul.f r9.y, r9.y, r1.z
+mad.f32 r9.w, r11.y, r3.y, r9.w
+mova a0.x, hr1.w
+mad.f32 r10.x, r11.y, r1.w, r10.x
+mad.f32 r9.x, r11.y, r2.z, r9.x
+mad.f32 r10.y, r11.z, r3.y, r10.y
+mad.f32 r10.z, r11.w, r3.y, r10.z
+mad.f32 r10.w, r11.z, r2.z, r10.w
+mad.f32 r11.x, r11.w, r2.z, r11.x
+mov.f32f32 r11.y, c<a0.x + 160>
+mov.f32f32 r12.x, c<a0.x + 161>
+(ul)mov.f32f32 r12.y, c<a0.x + 162>
+mad.f32 r9.y, r11.z, r1.w, r9.y
+mad.f32 r9.w, r11.y, r3.z, r9.w
+mova a0.x, hr1.x
+mad.f32 r10.x, r11.y, r2.x, r10.x
+mad.f32 r9.x, r11.y, r2.w, r9.x
+mad.f32 r10.y, r12.x, r3.z, r10.y
+mad.f32 r10.z, r12.y, r3.z, r10.z
+mad.f32 r10.w, r12.x, r2.w, r10.w
+mad.f32 r11.x, r12.y, r2.w, r11.x
+mov.f32f32 r11.y, c<a0.x + 156>
+mov.f32f32 r11.z, c<a0.x + 157>
+(ul)mov.f32f32 r12.z, c<a0.x + 158>
+mad.f32 r9.y, r12.x, r2.x, r9.y
+mad.f32 r7.z, r11.y, r3.y, r7.z
+cov.f32s16 hr1.x, r5.w
+mad.f32 r5.w, r11.y, r2.z, r8.x
+mad.f32 r7.w, r11.z, r3.y, r7.w
+mad.f32 r6.z, r12.z, r3.y, r6.z
+shl.b hr1.x, hr1.x, 2
+mad.f32 r8.x, r11.z, r2.z, r8.z
+mad.f32 r8.z, r12.z, r2.z, r8.w
+mul.f r8.w, r9.z, r1.z
+mova a0.x, hr1.x
+mad.f32 r8.w, r11.w, r1.w, r8.w
+mad.f32 r7.x, r11.y, r1.w, r7.x
+mad.f32 r7.y, r11.z, r1.w, r7.y
+mad.f32 r8.y, r12.z, r1.w, r8.y
+cov.f32s16 hr1.x, r5.y
+cov.f32s16 hr1.z, r5.y
+mov.f32f32 r5.y, c<a0.x + 160>
+mov.f32f32 r9.z, c<a0.x + 161>
+(ul)mov.f32f32 r11.y, c<a0.x + 162>
+mad.f32 r8.w, r12.y, r2.x, r8.w
+mad.f32 r7.z, r5.y, r3.z, r7.z
+mad.f32 r5.w, r5.y, r2.w, r5.w
+mad.f32 r7.w, r9.z, r3.z, r7.w
+mad.f32 r6.z, r11.y, r3.z, r6.z
+mul.f r7.z, r7.z, r3.w
mov.f32f32 r11.z, c62.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r10.w, r10.w
-mov.f32f32 r11.y, r11.y
+mul.f r5.w, r5.w, r3.w
+mul.f r7.w, r7.w, r3.w
+mul.f r6.z, r6.z, r3.w
cmps.f.ne r11.z, r11.z, c63.x
-mov.f32f32 r12.x, r9.w
-mov.f32f32 r12.y, r8.w
-mul.f r7.x, r7.x, r3.z
-sel.b32 r10.z, r10.z, r11.z, r7.w
-sel.b32 r6.x, r6.x, r11.z, r5.w
-sel.b32 r10.w, r10.w, r11.z, r5.x
-sel.b32 r11.y, r11.y, r11.z, r8.x
-sel.b32 r7.w, r10.z, r5.y, r7.w
-sel.b32 r5.w, r6.x, r5.y, r5.w
-sel.b32 r5.x, r10.w, r5.y, r5.x
-sel.b32 r6.x, r11.y, r5.y, r8.x
-mov.f32f32 r8.x, r7.w
-mov.f32f32 r10.z, r5.w
-mov.f32f32 r10.w, r5.x
-mov.f32f32 r11.y, r6.x
-mov.f32f32 r8.x, r8.x
-mova a0.x, hr0.x
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r10.w, r10.w
-mov.f32f32 r11.y, r11.y
-mad.f32 r12.x, r12.x, r1.w, r13.z
-mov.f32f32 r12.y, r12.y
-mov.f32f32 r12.z, r11.w
-mov.f32f32 r13.z, c<a0.x + 152>
-mov.f32f32 r13.w, c<a0.x + 153>
-(ul)mov.f32f32 r14.x, c<a0.x + 154>
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r14.y, r13.z
-mov.f32f32 r14.z, r13.z
-mul.f r13.z, r13.z, r0.w
-cov.f32s16 hr0.x, r9.y
-mul.f r14.y, r14.y, r2.y
-mul.f r14.z, r14.z, r1.z
-mov.f32f32 r14.w, r13.w
-shl.b hr0.x, hr0.x, 2
-mov.f32f32 r15.x, r14.x
-mov.f32f32 r15.y, r10.y
-mul.f r12.z, r12.z, r1.z
-mova a0.x, hr0.x
-mul.f r14.w, r14.w, r2.y
-mul.f r2.y, r15.x, r2.y
-mad.f32 r12.x, r15.y, r2.x, r12.x
-mov.f32f32 r15.x, r13.y
-mul.f r15.y, r13.w, r0.w
-mul.f r15.z, r14.x, r0.w
-mov.f32f32 r15.w, c<a0.x + 156>
-mov.f32f32 r16.x, c<a0.x + 157>
-(ul)mov.f32f32 r16.y, c<a0.x + 158>
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r16.z, r15.w
-mov.f32f32 r16.w, r15.w
-mad.f32 r13.z, r15.w, r1.x, r13.z
-mov.f32f32 r15.w, r16.x
-mad.f32 r14.y, r16.z, r2.z, r14.y
-mad.f32 r14.z, r16.w, r1.w, r14.z
-mov.f32f32 r13.z, r13.z
-cov.f32s16 hr0.x, r9.y
-mov.f32f32 r9.y, r14.y
-mov.f32f32 r14.y, r14.z
-mad.f32 r14.z, r15.w, r2.z, r14.w
-shl.b hr0.x, hr0.x, 2
-mov.f32f32 r14.w, r16.y
-mad.f32 r12.x, r12.x, r3.w, r12.w
-mad.f32 r12.z, r15.x, r1.w, r12.z
-mova a0.x, hr0.x
-mov.f32f32 r12.w, r14.z
-mad.f32 r2.y, r14.w, r2.z, r2.y
-mov.f32f32 r2.z, r12.x
-mov.f32f32 r12.x, r12.z
-mov.f32f32 r12.z, r13.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r14.z, c<a0.x + 160>
-mov.f32f32 r14.w, c<a0.x + 161>
-(ul)mov.f32f32 r15.x, c<a0.x + 162>
-sel.b32 r2.z, r2.z, r11.z, r8.z
-mov.f32f32 r15.w, r14.z
-mov.f32f32 r16.z, r14.z
-mad.f32 r13.z, r14.z, r1.y, r13.z
-mov.f32f32 r14.z, r14.w
-mad.f32 r9.y, r15.w, r2.w, r9.y
-mad.f32 r14.y, r16.z, r2.x, r14.y
-mov.f32f32 r13.z, r13.z
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r9.y, r9.y
-sel.b32 r6.z, r6.z, r5.y, r6.w
-mov.f32f32 r6.w, r14.y
-mov.f32f32 r14.y, r7.x
-mul.f r9.x, r9.x, r0.w
-mov.f32f32 r15.w, r6.z
-mad.f32 r8.y, r8.y, r1.x, r9.x
-mad.f32 r9.x, r14.z, r2.w, r12.w
-mov.f32f32 r12.w, r15.x
-mov.f32f32 r14.z, r15.w
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r9.x, r9.x
-mad.f32 r2.y, r12.w, r2.w, r2.y
-mov.f32f32 r2.w, r14.z
-mad.f32 r8.y, r10.x, r1.y, r8.y
-sel.b32 r2.z, r2.z, r5.y, r8.z
-mad.f32 r8.z, r12.z, r2.x, r12.x
-mov.f32f32 r10.x, r2.w
-mov.f32f32 r12.x, r2.w
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r12.z, r2.w
-mad.f32 r8.x, r9.y, r10.x, r8.x
-mad.f32 r6.w, r6.w, r12.x, r10.z
-mad.f32 r8.y, r8.y, r3.w, r14.y
-mad.f32 r9.x, r9.x, r12.z, r10.w
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r9.y, c62.x
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r9.x, r9.x
-cmps.f.ne r9.y, r9.y, c63.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r10.x, r2.w
-sel.b32 r7.x, r8.y, r5.y, r7.x
-sel.b32 r8.x, r8.x, r9.y, r7.w
-mov.f32f32 r8.y, c64.x
-sel.b32 r6.w, r6.w, r9.y, r5.w
-mov.f32f32 r10.z, r7.x
-sel.b32 r9.x, r9.x, r9.y, r5.x
-cmps.f.lt r8.y, r8.y, c5.x
-mad.f32 r10.z, r13.z, r2.w, r10.z
-mad.f32 r2.y, r2.y, r10.x, r11.y
-mov.f32f32 r10.x, r2.z
-cov.u32f32 r8.y, r8.y
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r10.x, r10.x
-cmps.f.ne r8.y, r8.y, c63.x
-mov.f32f32 r10.w, r13.w
-mov.f32f32 r8.z, r8.z
-sel.b32 r2.y, r2.y, r9.y, r6.x
-sel.b32 r7.w, r8.x, r8.y, r7.w
-sel.b32 r5.w, r6.w, r8.y, r5.w
-sel.b32 r6.w, r10.z, r8.y, r7.x
-sel.b32 r5.x, r9.x, r8.y, r5.x
-mov.f32f32 r7.x, r7.w
-mov.f32f32 r8.x, r7.w
-mov.f32f32 r9.x, r5.w
-mov.f32f32 r10.z, r5.w
-mov.f32f32 r11.y, r6.w
-mul.f r7.x, r7.x, r8.x
-mov.f32f32 r8.x, r5.x
-mov.f32f32 r12.x, r5.x
-mul.f r9.x, r9.x, r10.z
-mul.f r10.z, r10.w, r1.z
-mov.f32f32 r10.w, r16.x
-mad.f32 r7.x, r8.x, r12.x, r7.x
-mov.f32f32 r8.x, r6.w
-mov.f32f32 r12.x, r6.w
-mad.f32 r10.z, r10.w, r1.w, r10.z
-mov.f32f32 r7.x, r7.x
-sel.b32 r2.y, r2.y, r8.y, r6.x
-mul.f r6.x, r11.y, r8.x
-mov.f32f32 r8.x, r10.z
-mad.f32 r10.z, r16.x, r1.x, r15.y
-mov.f32f32 r10.w, r2.y
-mov.f32f32 r11.y, r2.y
-mov.f32f32 r12.z, r14.w
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r12.w, r6.w
-mad.f32 r7.x, r10.w, r11.y, r7.x
-mad.f32 r8.x, r12.z, r2.x, r8.x
-mad.f32 r10.z, r14.w, r1.y, r10.z
-mul.f r10.w, r12.x, r12.w
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.y, r2.y
-rsq r7.x, r7.x
-(ss)mov.f32f32 r7.x, r7.x
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r11.y, r2.w
-mov.f32f32 r10.z, r10.z
-mul.f r7.w, r7.w, r7.x
-mov.f32f32 r7.y, r7.y
-mad.f32 r8.x, r8.x, r11.y, r10.x
-mul.f r5.x, r5.x, r7.x
-mov.f32f32 r7.w, r7.w
-mova a0.x, hr0.z
-mov.f32f32 r8.x, r8.x
-mul.f r7.y, r7.y, r3.z
-mov.f32f32 r5.x, r5.x
-mul.f r2.y, r2.y, r7.x
-sel.b32 r7.x, r8.x, r9.y, r2.z
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r8.x, c<a0.x + 24>
-mov.f32f32 r2.y, r2.y
-sel.b32 r2.z, r7.x, r8.y, r2.z
-mov.f32f32 r7.x, r7.y
-mul.f r8.x, r8.x, r0.x
-cov.f32s16 hr0.x, r4.w
-mov.f32f32 r10.x, r2.z
-mov.f32f32 r11.y, r2.z
-mul.f r9.z, r9.z, r0.w
-shl.b hr0.x, hr0.x, 2
-mad.f32 r9.z, r9.w, r1.x, r9.z
-mad.f32 r9.x, r10.x, r11.y, r9.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r9.w, c<a0.x + 25>
-mov.f32f32 r10.x, c<a0.x + 26>
-(ul)mov.f32f32 r11.y, c<a0.x + 27>
+mov.f32f32 r11.w, (0.000000)
+mov.f32f32 r12.x, (0.000000)
+mov.f32f32 r12.y, (0.000000)
+mov.f32f32 r12.z, (0.000000)
+sel.b32 r7.z, r7.z, r11.z, r11.w
mova a0.x, hr0.x
-mov.f32f32 r9.x, r9.x
-mad.f32 r8.z, r8.z, r3.w, r12.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r2.z, r2.z
-mad.f32 r9.z, r10.y, r1.y, r9.z
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r10.y, c<a0.x + 28>
-mov.f32f32 r12.x, r6.w
-mov.f32f32 r9.z, r9.z
-mad.f32 r12.y, r16.y, r1.x, r15.z
-mad.f32 r8.x, r10.y, r0.y, r8.x
-cov.f32s16 hr0.x, r4.w
-sel.b32 r8.z, r8.z, r11.z, r8.w
-mad.f32 r7.x, r9.z, r3.w, r7.x
-mov.f32f32 r9.z, r12.y
+sel.b32 r5.w, r5.w, r11.z, r12.x
+sel.b32 r7.w, r7.w, r11.z, r12.y
+sel.b32 r6.z, r6.z, r11.z, r12.z
+mad.f32 r8.x, r9.z, r2.w, r8.x
+mad.f32 r8.z, r11.y, r2.w, r8.z
+mad.f32 r5.y, r5.y, r2.x, r7.x
+mov.f32f32 r7.x, c<a0.x + 152>
+mul.f r8.x, r8.x, r3.w
+mov.f32f32 r11.w, c<a0.x + 153>
+(ul)mov.f32f32 r12.x, c<a0.x + 154>
+mul.f r12.y, r7.x, r3.x
+cov.f32s16 hr0.x, r6.y
+mul.f r6.y, r7.x, r1.z
+mul.f r7.x, r7.x, r2.y
+mul.f r12.z, r11.w, r3.x
shl.b hr0.x, hr0.x, 2
-sel.b32 r8.z, r8.z, r5.y, r8.w
-mov.f32f32 r7.x, r7.x
-mad.f32 r8.w, r15.x, r1.y, r9.z
-mov.f32f32 r9.z, c<a0.x + 29>
-mov.f32f32 r10.y, c<a0.x + 30>
-(ul)mov.f32f32 r11.z, c<a0.x + 31>
+mul.f r3.x, r12.x, r3.x
+mov.f32f32 r12.w, (0.000000)
+mul.f r8.z, r8.z, r3.w
mova a0.x, hr0.x
-mov.f32f32 r12.y, r8.z
-sel.b32 r7.x, r7.x, r5.y, r7.y
-mov.f32f32 r7.y, r8.w
-mad.f32 r7.z, r11.x, r1.y, r7.z
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r8.w, r7.x
-mov.f32f32 r11.x, c<a0.x + 32>
-mov.f32f32 r12.y, r12.y
-mov.f32f32 r12.z, r14.x
-mad.f32 r8.w, r10.z, r2.w, r8.w
-mad.f32 r8.x, r11.x, r0.z, r8.x
-cov.f32s16 hr0.x, r4.w
-mul.f r1.z, r12.z, r1.z
-mov.f32f32 r4.w, r16.y
-mov.f32f32 r8.w, r8.w
+mov.f32f32 r13.x, (0.000000)
+mul.f r13.y, r11.w, r1.z
+mul.f r1.z, r12.x, r1.z
+sel.b32 r8.x, r8.x, r11.z, r12.w
+sel.b32 r8.z, r8.z, r11.z, r13.x
+mul.f r11.z, r11.w, r2.y
+mov.f32f32 r11.w, c<a0.x + 156>
+mov.f32f32 r12.w, c<a0.x + 157>
+(ul)mov.f32f32 r13.x, c<a0.x + 158>
+mul.f r2.y, r12.x, r2.y
+mad.f32 r12.x, r11.w, r3.y, r12.y
+cov.f32s16 hr0.x, r6.x
+mad.f32 r6.x, r11.w, r1.w, r6.y
+mad.f32 r6.y, r11.w, r2.z, r7.x
+mad.f32 r7.x, r12.w, r3.y, r12.z
shl.b hr0.x, hr0.x, 2
-mov.f32f32 r7.z, r7.z
-mul.f r9.w, r9.w, r0.x
-mul.f r10.x, r10.x, r0.x
-mov.f32f32 r10.z, c<a0.x + 33>
-mov.f32f32 r11.x, c<a0.x + 34>
-(ul)mov.f32f32 r12.z, c<a0.x + 35>
+mad.f32 r3.x, r13.x, r3.y, r3.x
+mad.f32 r3.y, r12.w, r2.z, r11.z
+mad.f32 r2.y, r13.x, r2.z, r2.y
mova a0.x, hr0.x
-mad.f32 r1.z, r4.w, r1.w, r1.z
-sel.b32 r1.w, r8.w, r8.y, r7.x
-mul.f r4.w, r7.z, r3.z
-mad.f32 r7.x, r9.z, r0.y, r9.w
-mad.f32 r7.z, r10.y, r0.y, r10.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r8.w, c<a0.x + 36>
-mov.f32f32 r9.z, r15.x
-mov.f32f32 r9.w, r1.w
-mov.f32f32 r10.x, r1.w
-add.f r8.x, r8.x, r8.w
-mov.f32f32 r8.w, c<a0.x + 37>
-mov.f32f32 r10.y, c<a0.x + 38>
-(ul)mov.f32f32 r12.w, c<a0.x + 39>
-mova a0.x, hr0.y
-mad.f32 r1.z, r9.z, r2.x, r1.z
-mad.f32 r2.x, r9.w, r10.x, r6.x
-mov.f32f32 r6.x, r1.w
-mov.f32f32 r9.z, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r9.w, c<a0.x + 24>
-mov.f32f32 r10.x, r2.w
-mov.f32f32 r4.w, r4.w
-mad.f32 r6.x, r6.x, r9.z, r10.w
-mul.f r9.z, r9.w, r0.x
-cov.f32s16 hr0.x, r4.z
-mad.f32 r1.z, r1.z, r10.x, r12.y
-mov.f32f32 r9.w, r4.w
-mul.f r0.w, r11.w, r0.w
-shl.b hr0.x, hr0.x, 2
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, r13.y, r1.x, r0.w
-mov.f32f32 r1.x, r6.x
-mov.f32f32 r6.x, c<a0.x + 25>
-mov.f32f32 r10.x, c<a0.x + 26>
-(ul)mov.f32f32 r10.w, c<a0.x + 27>
+mad.f32 r2.z, r12.w, r1.w, r13.y
+mad.f32 r1.z, r13.x, r1.w, r1.z
+mul.f r1.w, r5.y, r3.w
+mad.f32 r5.y, r9.z, r2.x, r7.y
+mad.f32 r7.y, r11.y, r2.x, r8.y
+shl.b hr0.x, hr1.x, 2
+mov.f32f32 r8.y, c<a0.x + 160>
+mov.f32f32 r9.z, c<a0.x + 161>
+(ul)mov.f32f32 r11.y, c<a0.x + 162>
+mov.f32f32 r11.z, r1.w
+mad.f32 r11.w, r8.y, r3.z, r12.x
+mad.f32 r6.x, r8.y, r2.x, r6.x
+mad.f32 r6.y, r8.y, r2.w, r6.y
+mad.f32 r7.x, r9.z, r3.z, r7.x
+mad.f32 r8.y, r11.w, r4.x, r7.z
+mov.f32f32 r11.w, c62.x
+mad.f32 r1.w, r6.x, r4.x, r1.w
+mad.f32 r6.x, r6.y, r4.x, r5.w
+mad.f32 r6.y, r7.x, r4.x, r7.w
+cmps.f.ne r7.x, r11.w, c63.x
+sel.b32 r1.w, r1.w, r0.w, r11.z
+mov.f32f32 r11.z, r6.w
+mad.f32 r3.x, r11.y, r3.z, r3.x
+sel.b32 r3.z, r8.y, r7.x, r7.z
+sel.b32 r6.x, r6.x, r7.x, r5.w
+mov.f32f32 r8.y, r11.z
+sel.b32 r6.y, r6.y, r7.x, r7.w
+sel.b32 r3.z, r3.z, r0.w, r7.z
+sel.b32 r5.w, r6.x, r0.w, r5.w
+mov.f32f32 r6.x, r8.y
+sel.b32 r6.y, r6.y, r0.w, r7.w
+mad.f32 r3.x, r3.x, r4.x, r6.z
+mad.f32 r3.y, r9.z, r2.w, r3.y
+mad.f32 r7.z, r9.w, r6.x, r3.z
+mov.f32f32 r7.w, c62.x
+mad.f32 r8.y, r10.x, r6.x, r1.w
+mad.f32 r9.x, r9.x, r6.x, r5.w
+mov.f32f32 r9.w, c64.x
+cmps.f.ne r7.w, r7.w, c63.x
+mad.f32 r10.x, r10.y, r6.x, r6.y
+sel.b32 r3.x, r3.x, r7.x, r6.z
+cmps.f.lt r9.w, r9.w, c5.x
+sel.b32 r7.z, r7.z, r7.w, r3.z
+sel.b32 r9.x, r9.x, r7.w, r5.w
+sel.b32 r10.x, r10.x, r7.w, r6.y
+cov.u32f32 r9.w, r9.w
+sel.b32 r3.x, r3.x, r0.w, r6.z
+mad.f32 r3.y, r3.y, r4.x, r8.x
+mad.f32 r2.y, r11.y, r2.w, r2.y
+cmps.f.ne r6.z, r9.w, c63.x
+mad.f32 r2.w, r10.z, r6.x, r3.x
+sel.b32 r3.y, r3.y, r7.x, r8.x
+mad.f32 r2.y, r2.y, r4.x, r8.z
+sel.b32 r3.z, r7.z, r6.z, r3.z
+sel.b32 r1.w, r8.y, r6.z, r1.w
+sel.b32 r5.w, r9.x, r6.z, r5.w
+sel.b32 r6.y, r10.x, r6.z, r6.y
+mul.f r7.z, r3.z, r3.z
+mul.f r8.y, r1.w, r1.w
+mul.f r9.x, r5.w, r5.w
+mad.f32 r7.z, r6.y, r6.y, r7.z
+sel.b32 r2.w, r2.w, r7.w, r3.x
+mad.f32 r2.z, r9.z, r2.x, r2.z
+sel.b32 r3.y, r3.y, r0.w, r8.x
+mul.f r5.y, r5.y, r3.w
+sel.b32 r2.w, r2.w, r6.z, r3.x
+mad.f32 r2.z, r2.z, r4.x, r5.y
+mad.f32 r3.x, r10.w, r6.x, r3.y
+mov.f32f32 r5.y, r5.y
+mad.f32 r7.z, r2.w, r2.w, r7.z
+mul.f r8.x, r1.w, r1.w
+sel.b32 r3.x, r3.x, r7.w, r3.y
+sel.b32 r2.z, r2.z, r0.w, r5.y
+sel.b32 r2.y, r2.y, r7.x, r8.z
+mad.f32 r5.y, r9.y, r6.x, r2.z
+mad.f32 r1.z, r11.y, r2.x, r1.z
+rsq r2.x, r7.z
+(ss)mov.f32f32 r7.x, r2.x
+sel.b32 r3.x, r3.x, r6.z, r3.y
+sel.b32 r2.z, r5.y, r6.z, r2.z
+mul.f r2.x, r2.w, r2.x
+mul.f r3.y, r3.z, r7.x
+mova a0.x, hr0.w
+mad.f32 r2.w, r2.z, r2.z, r8.y
+mad.f32 r3.z, r3.x, r3.x, r9.x
+mul.f r5.y, r7.y, r3.w
+sel.b32 r2.y, r2.y, r0.w, r8.z
+mad.f32 r1.z, r1.z, r4.x, r5.y
+mad.f32 r7.y, r11.x, r6.x, r2.y
+(ss)mov.f32f32 r7.z, c<a0.x + 24>
+mov.f32f32 r5.y, r5.y
+mad.f32 r8.x, r2.z, r2.z, r8.x
+sel.b32 r7.y, r7.y, r7.w, r2.y
+mul.f r7.z, r7.z, r0.x
+mov.f32f32 r7.w, c<a0.x + 25>
+mov.f32f32 r8.y, c<a0.x + 26>
+(ul)mov.f32f32 r8.z, c<a0.x + 27>
mova a0.x, hr0.x
-sel.b32 r1.z, r1.z, r9.y, r8.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r9.y, r1.w
-mad.f32 r0.w, r13.x, r1.y, r0.w
-mov.f32f32 r1.y, r1.w
-mad.f32 r1.w, r10.z, r0.z, r7.x
-mov.f32f32 r7.x, c<a0.x + 28>
-sel.b32 r1.z, r1.z, r8.y, r8.z
-mov.f32f32 r0.w, r0.w
-add.f r1.w, r1.w, r8.w
-mad.f32 r7.x, r7.x, r0.y, r9.z
-cov.f32s16 hr0.x, r4.z
-mov.f32f32 r8.z, r1.z
-mov.f32f32 r8.w, r1.z
-mad.f32 r0.w, r0.w, r3.w, r9.w
-shl.b hr0.x, hr0.x, 2
-mov.f32f32 r1.z, r1.z
-mul.f r6.x, r6.x, r0.x
-mad.f32 r7.z, r11.x, r0.z, r7.z
-mov.f32f32 r9.z, c<a0.x + 29>
-mov.f32f32 r9.w, c<a0.x + 30>
-(ul)mov.f32f32 r10.z, c<a0.x + 31>
+sel.b32 r1.z, r1.z, r0.w, r5.y
+sel.b32 r2.y, r7.y, r6.z, r2.y
+mad.f32 r5.y, r8.w, r6.x, r1.z
+mul.f r6.y, r6.y, r7.x
+mul.f r7.x, r7.w, r0.x
+mul.f r7.y, r8.y, r0.x
+mov.f32f32 r7.w, c<a0.x + 28>
+sel.b32 r1.z, r5.y, r6.z, r1.z
+mad.f32 r3.z, r2.y, r2.y, r3.z
+mov.f32f32 r5.y, c<a0.x + 29>
+mad.f32 r7.z, r7.w, r0.y, r7.z
+shl.b hr0.x, hr1.z, 2
+mad.f32 r2.w, r1.z, r1.z, r2.w
+mad.f32 r7.w, r1.z, r1.z, r8.x
+mad.f32 r5.y, r5.y, r0.y, r7.x
+mov.f32f32 r7.x, c<a0.x + 30>
+(ul)mov.f32f32 r8.x, c<a0.x + 31>
mova a0.x, hr0.x
-mad.f32 r8.z, r8.z, r8.w, r9.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r6.x, r9.z, r0.y, r6.x
-add.f r7.z, r7.z, r10.y
-mul.f r8.w, r10.x, r0.x
-mul.f r9.x, r11.y, r0.x
-mov.f32f32 r9.z, c<a0.x + 32>
-rsq r8.z, r8.z
-(ss)mov.f32f32 r8.z, r8.z
-sel.b32 r0.w, r0.w, r5.y, r4.w
-mov.f32f32 r4.w, c<a0.x + 33>
-mad.f32 r7.x, r9.z, r0.z, r7.x
-cov.f32s16 hr0.x, r4.z
-mul.f r4.z, r5.w, r8.z
-mov.f32f32 r5.w, r0.w
-mul.f r2.z, r2.z, r8.z
-shl.b hr0.x, hr0.x, 2
-mov.f32f32 r4.z, r4.z
-mad.f32 r5.w, r7.y, r2.w, r5.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r7.y, c<a0.x + 34>
+rsq r3.z, r3.z
+(ss)mov.f32f32 r8.y, r3.z
+rsq r2.w, r2.w
+(ss)mov.f32f32 r8.w, r2.w
+rsq r7.w, r7.w
+(ss)mov.f32f32 r9.x, r7.w
+mul.f r2.y, r2.y, r3.z
+(ss)mul.f r3.z, r1.z, r2.w
+mul.f r1.z, r1.z, r7.w
+mov.f32f32 r2.w, c<a0.x + 32>
+mul.f r7.w, r1.w, r8.w
+mul.f r5.w, r5.w, r8.y
+mul.f r1.w, r1.w, r9.x
+mad.f32 r2.w, r2.w, r0.z, r7.z
+mov.f32f32 r7.z, c<a0.x + 33>
+mov.f32f32 r9.y, c<a0.x + 34>
(ul)mov.f32f32 r9.z, c<a0.x + 35>
+mova a0.x, hr1.y
+mul.f r3.x, r3.x, r8.y
+mul.f r8.y, r2.z, r8.w
+mul.f r2.z, r2.z, r9.x
+mad.f32 r5.y, r7.z, r0.z, r5.y
+mad.f32 r7.x, r7.x, r0.y, r7.y
+mul.f r7.y, r8.z, r0.x
+mov.f32f32 r7.z, c<a0.x + 36>
+mad.f32 r7.x, r9.y, r0.z, r7.x
+mov.f32f32 r8.z, c<a0.x + 37>
+mov.f32f32 r8.w, c<a0.x + 38>
+add.f r7.z, r2.w, r7.z
+(ul)mov.f32f32 r2.w, c<a0.x + 39>
+mova a0.x, hr0.y
+add.f r5.y, r5.y, r8.z
+add.f r7.x, r7.x, r8.w
+mad.f32 r7.y, r8.x, r0.y, r7.y
+shl.b hr0.x, hr0.z, 2
+cov.f32s16 hr0.y, r4.w
+cov.f32s16 hr0.z, r4.w
+mov.f32f32 r8.x, c<a0.x + 24>
+mov.f32f32 r8.z, c<a0.x + 25>
+mov.f32f32 r8.w, c<a0.x + 26>
+mad.f32 r7.y, r9.z, r0.z, r7.y
+mul.f r8.x, r8.x, r0.x
+shl.b hr0.z, hr0.z, 2
+mul.f r8.z, r8.z, r0.x
+mul.f r8.w, r8.w, r0.x
+add.f r7.y, r7.y, r2.w
+(ul)mov.f32f32 r2.w, c<a0.x + 27>
+mova a0.x, hr0.z
+shl.b hr0.y, hr0.y, 2
+cov.f32s16 hr0.z, r4.w
+cov.f32s16 hr0.w, r5.x
+cov.f32s16 hr1.x, r5.x
+cov.f32s16 hr1.y, r5.x
+shl.b hr0.z, hr0.z, 2
+mov.f32f32 r4.w, c<a0.x + 28>
+mov.f32f32 r5.x, c<a0.x + 29>
+mov.f32f32 r9.x, c<a0.x + 30>
+mul.f r2.w, r2.w, r0.x
+mad.f32 r4.w, r4.w, r0.y, r8.x
+(ul)mov.f32f32 r8.x, c<a0.x + 31>
+mova a0.x, hr0.z
+mad.f32 r5.x, r5.x, r0.y, r8.z
+mad.f32 r8.z, r9.x, r0.y, r8.w
+mad.f32 r2.w, r8.x, r0.y, r2.w
+shl.b hr0.z, hr0.w, 2
+shl.b hr0.w, hr1.x, 2
+shl.b hr1.x, hr1.y, 2
+mov.f32f32 r8.x, c<a0.x + 32>
+mov.f32f32 r8.w, c<a0.x + 33>
+mov.f32f32 r9.x, c<a0.x + 34>
+(ul)mov.f32f32 r9.y, c<a0.x + 35>
+mad.f32 r4.w, r8.x, r0.z, r4.w
+mova a0.x, hr0.y
+mad.f32 r5.x, r8.w, r0.z, r5.x
+mad.f32 r8.x, r9.x, r0.z, r8.z
+mad.f32 r2.w, r9.y, r0.z, r2.w
+sel.b32 r8.z, r3.w, r0.w, r4.z
+(rpt1)nop
+mov.f32f32 r8.w, c<a0.x + 36>
+mov.f32f32 r9.x, c<a0.x + 37>
+mov.f32f32 r9.y, c<a0.x + 38>
+(ul)mov.f32f32 r9.z, c<a0.x + 39>
+add.f r4.w, r4.w, r8.w
+add.f r5.x, r5.x, r9.x
+add.f r8.x, r8.x, r9.y
+add.f r2.w, r2.w, r9.z
+mul.f r4.w, r4.w, r3.w
mova a0.x, hr0.x
-mov.f32f32 r5.w, r5.w
-mul.f r1.z, r1.z, r8.z
-mad.f32 r4.w, r4.w, r0.z, r6.x
-mad.f32 r6.x, r9.w, r0.y, r8.w
-mad.f32 r8.z, r11.z, r0.y, r9.x
-mad.f32 r6.x, r7.y, r0.z, r6.x
-mov.f32f32 r7.y, c<a0.x + 36>
-sel.b32 r0.w, r5.w, r8.y, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.w, c<a0.x + 37>
-add.f r7.x, r7.x, r7.y
-mov.f32f32 r7.y, r0.w
-mov.f32f32 r8.w, r0.w
-mov.f32f32 r9.x, r0.w
-mul.f r7.x, r7.x, r3.z
-mov.f32f32 r9.w, r0.w
-mad.f32 r2.x, r7.y, r8.w, r2.x
-mov.f32f32 r7.y, r0.w
-mov.f32f32 r8.w, r7.x
-mul.f r6.y, r6.y, c63.y
-mad.f32 r1.x, r9.x, r9.w, r1.x
-mov.f32f32 r0.w, r0.w
-add.f r4.w, r4.w, r5.w
-cov.f32s16 hr0.x, r6.y
-rsq r2.x, r2.x
-(ss)mov.f32f32 r2.x, r2.x
-mov.f32f32 r5.w, c<a0.x + 38>
-mul.f r4.w, r4.w, r3.z
-shl.b hr0.x, hr0.x, 2
-mul.f r9.x, r12.x, r2.x
-rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mul.f r9.y, r9.y, r2.x
-(ul)mov.f32f32 r9.w, c<a0.x + 39>
-mova a0.x, hr0.x
-mov.f32f32 r9.x, r9.x
-mul.f r6.w, r6.w, r1.x
-mov.f32f32 r9.y, r9.y
-mul.f r2.x, r7.y, r2.x
-mul.f r1.y, r1.y, r1.x
-mul.f r0.w, r0.w, r1.x
-mov.f32f32 r1.x, c<a0.x + 24>
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.y, r1.y
-mul.f r1.x, r1.x, r0.x
-cov.f32s16 hr0.x, r6.y
-mov.f32f32 r7.y, r0.w
-mov.f32f32 r0.w, r4.w
-add.f r5.w, r6.x, r5.w
-shl.b hr0.x, hr0.x, 2
-mov.f32f32 r6.x, c<a0.x + 25>
-mad.f32 r8.z, r12.z, r0.z, r8.z
-mul.f r5.w, r5.w, r3.z
-mov.f32f32 r10.x, c<a0.x + 26>
-(ul)mov.f32f32 r10.y, c<a0.x + 27>
-mova a0.x, hr0.x
-mul.f r6.x, r6.x, r0.x
-mov.f32f32 r11.x, r5.w
-mul.f r10.x, r10.x, r0.x
-add.f r8.z, r8.z, r12.w
-mul.f r10.w, r10.w, r0.x
-mul.f r0.x, r10.y, r0.x
-mov.f32f32 r10.y, c<a0.x + 28>
-mov.f32f32 r11.y, c<a0.x + 29>
-mov.f32f32 r11.z, c<a0.x + 30>
-mad.f32 r10.z, r10.z, r0.y, r10.w
-mad.f32 r1.x, r10.y, r0.y, r1.x
-cov.f32s16 hr0.x, r6.y
-mad.f32 r6.x, r11.y, r0.y, r6.x
-mad.f32 r10.x, r11.z, r0.y, r10.x
-mad.f32 r9.z, r9.z, r0.z, r10.z
-shl.b hr0.x, hr0.x, 2
+mul.f r5.x, r5.x, r3.w
+mul.f r8.x, r8.x, r3.w
+mul.f r3.w, r2.w, r3.w
+cmps.f.lt r2.w, r5.z, c5.x
+sel.b32 r4.y, r4.z, r0.w, r4.y
+mov.f32f32 r4.z, c62.x
+mov.f32f32 r5.z, c<a0.x + 24>
+mov.f32f32 r8.w, c<a0.x + 25>
+mov.f32f32 r9.x, c<a0.x + 26>
+(ul)mov.f32f32 r9.y, c<a0.x + 27>
+mul.f r5.z, r5.z, r0.x
+mova a0.x, hr0.w
+mul.f r8.w, r8.w, r0.x
+mul.f r9.x, r9.x, r0.x
+mul.f r0.x, r9.y, r0.x
+cov.u32f32 r9.y, r2.w
+cmps.f.ne r4.z, r4.z, c63.x
+mov.f32f32 r2.w, (0.000000)
+mov.f32f32 r9.z, c<a0.x + 28>
+mov.f32f32 r9.w, c<a0.x + 29>
+mov.f32f32 r10.x, c<a0.x + 30>
(ul)mov.f32f32 r10.y, c<a0.x + 31>
-cov.f32s16 hr0.y, r6.y
-add.f r6.y, r9.z, r9.w
-mova a0.x, hr0.x
+mad.f32 r5.z, r9.z, r0.y, r5.z
+mova a0.x, hr1.x
+mad.f32 r8.w, r9.w, r0.y, r8.w
+mad.f32 r9.x, r10.x, r0.y, r9.x
mad.f32 r0.x, r10.y, r0.y, r0.x
-shl.b hr0.x, hr0.y, 2
-mov.f32f32 r0.y, r3.z
-mov.f32f32 r9.z, r4.y
-mov.f32f32 r4.y, r4.y
-cmps.f.lt r5.z, r5.z, c5.x
-mov.f32f32 r9.w, c<a0.x + 32>
+cmps.f.ne r9.z, r9.y, c63.x
+mov.f32f32 r9.w, (0.000000)
+mov.f32f32 r10.x, (0.000000)
+mov.f32f32 r0.y, c<a0.x + 32>
mov.f32f32 r10.y, c<a0.x + 33>
mov.f32f32 r10.z, c<a0.x + 34>
-mul.f r3.z, r6.y, r3.z
-mad.f32 r1.x, r9.w, r0.z, r1.x
-(ul)mov.f32f32 r6.y, c<a0.x + 35>
-mova a0.x, hr0.x
-mad.f32 r6.x, r10.y, r0.z, r6.x
-mad.f32 r9.w, r10.z, r0.z, r10.x
-mov.f32f32 r10.x, r3.z
-mad.f32 r0.x, r6.y, r0.z, r0.x
-sel.b32 r6.y, r0.y, r5.y, r9.z
-cov.u32f32 r5.z, r5.z
-mov.f32f32 r0.y, c<a0.x + 36>
-mov.f32f32 r0.z, c<a0.x + 37>
-mov.f32f32 r9.z, c<a0.x + 38>
-(ul)mov.f32f32 r10.y, c<a0.x + 39>
-add.f r0.y, r1.x, r0.y
-add.f r0.z, r6.x, r0.z
-add.f r1.x, r9.w, r9.z
-add.f r0.x, r0.x, r10.y
-mad.f32 r0.y, r0.y, r3.w, r8.w
-mad.f32 r0.z, r0.z, r3.w, r0.w
-mad.f32 r0.w, r1.x, r3.w, r11.x
-mad.f32 r0.x, r0.x, r3.w, r10.x
-sel.b32 r0.y, r0.y, r5.y, r7.x
-sel.b32 r0.z, r0.z, r5.y, r4.w
-sel.b32 r0.w, r0.w, r5.y, r5.w
-sel.b32 r0.x, r0.x, r5.y, r3.z
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r3.z, r0.z
-mad.f32 r1.x, r8.x, r2.w, r1.x
-mad.f32 r1.w, r1.w, r2.w, r3.z
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r3.w, r0.x
-sel.b32 r0.y, r1.x, r8.y, r0.y
-sel.b32 r0.z, r1.w, r8.y, r0.z
-mad.f32 r1.x, r7.z, r2.w, r3.z
-mad.f32 r1.w, r8.z, r2.w, r3.w
-add.f r2.w, c4.x, (neg)r0.y
-mul.f r3.z, c0.w, r0.y
-mul.f r3.w, c0.z, r0.y
-mul.f r4.w, c0.y, r0.y
-mul.f r5.w, r2.w, r2.w
-add.f r6.x, c4.y, (neg)r0.z
-mad.f32 r3.z, c1.w, r0.z, r3.z
-mad.f32 r3.w, c1.z, r0.z, r3.w
-mad.f32 r4.w, c1.y, r0.z, r4.w
-mad.f32 r5.w, r6.x, r6.x, r5.w
-sel.b32 r0.w, r1.x, r8.y, r0.w
-mul.f r0.y, c0.x, r0.y
-sel.b32 r0.x, r1.w, r8.y, r0.x
-mov.f32f32 r1.x, r5.w
-add.f r1.w, c4.z, (neg)r0.w
-mad.f32 r3.z, c2.w, r0.w, r3.z
-mad.f32 r3.w, c2.z, r0.w, r3.w
-mad.f32 r4.w, c2.y, r0.w, r4.w
-mad.f32 r1.x, r1.w, r1.w, r1.x
-mad.f32 r3.z, c3.w, r0.x, r3.z
-mad.f32 r3.w, c3.z, r0.x, r3.w
-mad.f32 r4.w, c3.y, r0.x, r4.w
-mad.f32 r0.y, c1.x, r0.z, r0.y
-cmps.f.ne r5.w, r5.z, c63.x
+(ul)mov.f32f32 r10.w, c<a0.x + 35>
+mad.f32 r0.y, r0.y, r0.z, r5.z
+mova a0.x, hr0.z
+mad.f32 r5.z, r10.y, r0.z, r8.w
+mad.f32 r8.w, r10.z, r0.z, r9.x
+mad.f32 r0.x, r10.w, r0.z, r0.x
+sel.b32 r2.w, r8.z, r9.z, r2.w
+mov.f32f32 r8.z, (0.000000)
mov.f32f32 r0.z, (0.000000)
-rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r7.x, (0.000000)
-mov.f32f32 r7.z, (0.000000)
-mad.f32 r8.x, c2.x, r0.w, r0.y
-mul.f r0.y, r2.w, r1.x
-mul.f r0.w, r6.x, r1.x
-mul.f r1.x, r1.w, r1.x
-sel.b32 r1.w, r3.z, r5.w, r7.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r3.z, r0.y
-mov.f32f32 r6.x, r0.y
-mov.f32f32 r0.y, r0.y
-mul.f r1.w, r7.w, r1.w
-mov.f32f32 r7.x, r2.w
-mul.f r3.z, r4.z, r3.z
-mov.f32f32 r4.z, r2.w
-mul.f r6.x, r9.x, r6.x
-mad.f32 r1.w, r5.x, r7.x, r1.w
-mov.f32f32 r5.x, r2.w
-mad.f32 r2.z, r2.z, r4.z, r3.z
-mul.f r0.y, r6.w, r0.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.z, r1.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.z, r1.x
-mad.f32 r5.x, r9.y, r5.x, r6.x
-mad.f32 r1.w, r2.y, r3.z, r1.w
-mov.f32f32 r2.y, r2.w
-mov.f32f32 r2.w, r1.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r1.z, r1.z, r4.z, r2.z
-mov.f32f32 r2.z, r5.x
-mad.f32 r0.y, r1.y, r2.y, r0.y
-mov.f32f32 r1.y, r1.w
-mov.f32f32 r1.w, c62.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.x, r2.x, r2.w, r2.z
-cmps.f.ne r2.z, r1.w, c63.x
-mad.f32 r0.y, r7.y, r1.x, r0.y
-sel.b32 r0.z, r3.w, r5.w, r0.z
-sel.b32 r1.x, r4.w, r5.w, r7.z
-sel.b32 r1.y, r1.y, r2.z, r6.z
-mov.f32f32 r1.w, (0.000000)
-sel.b32 r1.z, r1.z, r2.z, r0.y
-mov.f32f32 r2.w, (0.000000)
-mov.f32f32 r2.x, r2.x
-sel.b32 r1.y, r1.y, r5.w, r1.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r1.x
-mad.f32 r0.x, c3.x, r0.x, r8.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r2.y, r1.y
-sel.b32 r1.y, r1.z, r5.w, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.x
-mov.f32f32 r1.x, r4.x
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r0.x, r5.w, r5.z
-(rpt1)nop
-mov.f32f32 r1.z, r1.y
-sel.b32 r1.x, r4.y, r5.y, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, (0.000000)
-mov.f32f32 r3.y, r3.y
-sel.b32 r1.x, r2.x, r2.z, r1.x
-mov.f32f32 r2.x, (0.000000)
-sel.b32 r1.y, r6.y, r5.w, r1.y
-(rpt1)nop
-sel.b32 r1.x, r1.x, r5.w, r2.x
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r1.y, r3.y
-mov.f32f32 r2.x, r3.x
-mov.f32f32 r3.x, r1.x
-mov.f32f32 r2.z, r1.x
-(rpt1)nop
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r2.x
+mov.f32f32 r9.x, c<a0.x + 36>
+mov.f32f32 r10.y, c<a0.x + 37>
+mov.f32f32 r10.z, c<a0.x + 38>
+(ul)mov.f32f32 r10.w, c<a0.x + 39>
+add.f r0.y, r0.y, r9.x
+add.f r5.z, r5.z, r10.y
+add.f r8.w, r8.w, r10.z
+add.f r0.x, r0.x, r10.w
+mad.f32 r0.y, r0.y, r4.x, r4.w
+mad.f32 r5.z, r5.z, r4.x, r5.x
+mad.f32 r8.w, r8.w, r4.x, r8.x
+mad.f32 r0.x, r0.x, r4.x, r3.w
+sel.b32 r0.y, r0.y, r0.w, r4.w
+sel.b32 r4.x, r5.z, r0.w, r5.x
+mad.f32 r4.w, r7.z, r6.x, r0.y
+mad.f32 r5.x, r5.y, r6.x, r4.x
+sel.b32 r5.y, r8.w, r0.w, r8.x
+sel.b32 r0.x, r0.x, r0.w, r3.w
+sel.b32 r0.y, r4.w, r6.z, r0.y
+sel.b32 r0.w, r5.x, r6.z, r4.x
+mad.f32 r3.w, r7.x, r6.x, r5.y
+mad.f32 r4.x, r7.y, r6.w, r0.x
+add.f r4.w, c4.x, (neg)r0.y
+mul.f r5.x, c0.w, r0.y
+mul.f r5.z, c0.z, r0.y
+mul.f r6.x, c0.y, r0.y
+mul.f r7.x, r4.w, r4.w
+add.f r7.y, c4.y, (neg)r0.w
+mad.f32 r5.x, c1.w, r0.w, r5.x
+mad.f32 r5.z, c1.z, r0.w, r5.z
+mad.f32 r6.x, c1.y, r0.w, r6.x
+mad.f32 r7.x, r7.y, r7.y, r7.x
+sel.b32 r3.w, r3.w, r6.z, r5.y
+mul.f r0.y, c0.x, r0.y
+sel.b32 r0.x, r4.x, r6.z, r0.x
+mov.f32f32 r4.x, (0.000000)
+add.f r5.y, c4.z, (neg)r3.w
+mad.f32 r5.x, c2.w, r3.w, r5.x
+mad.f32 r5.z, c2.z, r3.w, r5.z
+mad.f32 r6.x, c2.y, r3.w, r6.x
+mad.f32 r6.z, r5.y, r5.y, r7.x
+mad.f32 r5.x, c3.w, r0.x, r5.x
+mad.f32 r5.z, c3.z, r0.x, r5.z
+mad.f32 r6.x, c3.y, r0.x, r6.x
+mad.f32 r7.x, c1.x, r0.w, r0.y
+mov.f32f32 r0.y, (0.000000)
+nop
+rsq r0.w, r6.z
+(ss)mov.f32f32 r6.z, r0.w
+mul.f r5.y, r5.y, r0.w
+sel.b32 r0.w, r5.x, r9.z, r0.z
+sel.b32 r0.z, r5.z, r9.z, r4.x
+mul.f r4.x, r4.w, r6.z
+mul.f r4.w, r7.y, r6.z
+mov.f32f32 r5.x, r5.y
+sel.b32 r0.y, r6.x, r9.z, r0.y
+mov.f32f32 r5.z, r4.x
+mul.f r4.x, r5.w, r4.x
+mov.f32f32 r5.w, r4.w
+mad.f32 r3.w, c2.x, r3.w, r7.x
+mul.f r3.y, r3.y, r5.z
+mul.f r6.x, r7.w, r5.z
+mad.f32 r3.y, r6.y, r5.w, r3.y
+mad.f32 r6.x, r8.y, r5.w, r6.x
+mad.f32 r2.x, r2.x, r5.x, r3.y
+mad.f32 r3.y, r3.z, r5.x, r6.x
+mad.f32 r3.x, r3.x, r4.w, r4.x
+mul.f r1.w, r1.w, r5.z
+sel.b32 r2.x, r2.x, r4.z, r6.w
+sel.b32 r3.y, r3.y, r4.z, r4.y
+mad.f32 r3.x, r2.y, r5.y, r3.x
+mad.f32 r2.y, r2.z, r5.w, r1.w
+sel.b32 r1.w, r2.x, r9.z, r10.x
+sel.b32 r2.x, r3.y, r9.z, r9.w
+mad.f32 r1.z, r1.z, r5.x, r2.y
+mad.f32 r0.x, c3.x, r0.x, r3.w
+mov.f32f32 r2.y, r1.w
+mov.f32f32 r2.z, r2.x
+sel.b32 r1.z, r3.x, r4.z, r1.z
+sel.b32 r0.x, r0.x, r9.z, r9.y
(rpt1)nop
-mov.f32f32 r2.x, r3.x
+sel.b32 r1.z, r1.z, r9.z, r8.z
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=7,il=20,b=0) r3.x (0:0,cm=3,il=24,b=0) r3.z (0:0,cm=f,il=28,b=0) r4.z (0:0,cm=7,il=32,b=0)
-; VERT: 779 instructions, 1 half, 17 full
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.z (0:0,cm=7,il=12,b=0) r2.y (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=7,il=20,b=0) r1.x (0:0,cm=3,il=24,b=0) r3.w (0:0,cm=f,il=28,b=0) r4.w (0:0,cm=7,il=32,b=0)
+; VERT: 499 instructions, 2 half, 14 full
diff --git a/reference/builtin2.asm b/reference/builtin2.asm
index 0731536..2949ecf 100644
--- a/reference/builtin2.asm
+++ b/reference/builtin2.asm
@@ -6,15 +6,16 @@
@out(hr1.x) out1
@out(hr1.y) out2
@out(hr1.z) out3
-(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+(sy)(ss)bary.f r1.w, 0, r0.x
+bary.f (ei)r2.x, 1, r0.x
(rpt5)nop
-sam (f16)(xyzw)hr0.w, r0.y, s#0, t#0
+sam (f16)(xyzw)hr0.w, r1.w, s#0, t#0
end
+nop
+nop
+nop
; FRAG: outputs: r0.w (1:0)
; FRAG: inputs: r0.x (19:0,cm=f,il=8,b=1)
-; FRAG: 14 instructions, 2 half, 1 full
+; FRAG: 10 instructions, 2 half, 3 full
diff --git a/reference/bump/bump-12.asm b/reference/bump/bump-12.asm
index 0f8742e..9112ce0 100644
--- a/reference/bump/bump-12.asm
+++ b/reference/bump/bump-12.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
+@in(r3.w) in0
+@in(r4.x) in1
+@in(r4.y) in2
@in(r1.x) in4
@in(r1.y) in5
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r3.w) in12
-@in(r4.x) in13
-@in(r4.y) in14
+@in(r0.x) in8
+@in(r0.y) in9
+@in(r0.z) in10
+@in(r4.z) in12
+@in(r4.w) in13
+@in(r5.x) in14
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,151 +27,109 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r0.w, r2.x, r4.x
-mul.f r2.y, c4.x, r1.z
-mad.f32 r0.w, r1.w, r4.y, (neg)r0.w
-mad.f32 r2.y, c5.x, r1.w, r2.y
-mul.f r2.z, c4.x, r3.w
-mad.f32 r2.y, c6.x, r2.x, r2.y
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.z, c5.x, r4.x, r2.z
-mul.f r2.w, c4.y, r3.w
-mul.f r3.x, c4.z, r3.w
-mul.f r3.y, c4.x, r0.w
-mul.f r3.z, r1.z, r4.y
-add.f r2.y, r2.y, c7.x
-mad.f32 r3.z, r2.x, r3.w, (neg)r3.z
-mad.f32 r2.z, c6.x, r4.y, r2.z
-mul.f r4.z, c4.y, r0.w
-mul.f r0.w, c4.z, r0.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r4.w, r2.y
-add.f r2.y, r2.z, c7.x
-mad.f32 r2.z, c5.y, r4.x, r2.w
-mad.f32 r2.w, c5.x, r3.z, r3.y
-mul.f r3.y, r1.w, r3.w
-mul.f r5.x, r4.w, r4.w
-mad.f32 r3.y, r1.z, r4.x, (neg)r3.y
-mul.f r5.y, c4.y, r1.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r5.y, c5.y, r1.w, r5.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r5.y, c6.y, r2.x, r5.y
-mul.f r5.z, r2.y, r2.y
-mad.f32 r2.z, c6.y, r4.y, r2.z
-mad.f32 r2.w, c6.x, r3.y, r2.w
-add.f r5.y, r5.y, c7.y
-mad.f32 r4.z, c5.y, r3.z, r4.z
-mad.f32 r0.w, c5.z, r3.z, r0.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r5.y, r5.y
-add.f r2.z, r2.z, c7.y
-mad.f32 r3.z, c6.y, r3.y, r4.z
-add.f r2.w, r2.w, c7.x
-mad.f32 r4.z, r5.y, r5.y, r5.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.z, r4.z
-mad.f32 r5.x, r2.z, r2.z, r5.z
-mul.f r1.z, c4.z, r1.z
-mul.f r5.z, r2.w, r2.w
-add.f r3.z, r3.z, c7.y
-mad.f32 r1.z, c5.z, r1.w, r1.z
-mov.f32f32 r1.w, r5.x
-mad.f32 r3.x, c5.z, r4.x, r3.x
-mov.f32f32 r3.z, r3.z
-mad.f32 r1.z, c6.z, r2.x, r1.z
-mad.f32 r2.x, c6.z, r4.y, r3.x
-mad.f32 r0.w, c6.z, r3.y, r0.w
-mad.f32 r3.x, r3.z, r3.z, r5.z
-add.f r1.z, r1.z, c7.z
-add.f r2.x, r2.x, c7.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-add.f r0.w, r0.w, c7.z
-mul.f r3.y, c0.w, r0.x
-mul.f r5.x, c0.z, r0.x
-mul.f r5.z, c0.y, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r4.z, r1.z, r1.z, r4.z
-mad.f32 r1.w, r2.x, r2.x, r1.w
-mad.f32 r3.y, c1.w, r0.y, r3.y
-mad.f32 r3.x, r0.w, r0.w, r3.x
-mad.f32 r3.y, c2.w, r0.z, r3.y
-mad.f32 r5.x, c1.z, r0.y, r5.x
-mad.f32 r5.z, c1.y, r0.y, r5.z
-mul.f r0.x, c0.x, r0.x
-mul.f r3.w, c4.w, r3.w
-add.f r5.w, r3.y, c3.w
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-rsq r3.y, r4.z
-nop
-rsq r1.w, r1.w
-(ss)mov.f32f32 r4.z, r3.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r0.w, r3.x
-mul.f r3.y, r3.z, r3.x
-mul.f r2.w, r2.w, r3.x
-mul.f r1.z, r1.z, r4.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r2.w, r2.w
-nop
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r0.w, r1.z
-mul.f r1.z, r2.x, r1.w
-mul.f r2.x, r2.z, r1.w
-mul.f r1.w, r2.y, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.x
-mov.f32f32 r1.w, r1.w
+@const(c8.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c9.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r0.z, r4.w
+mul.f r1.z, c4.x, r4.z
+mad.f32 r0.w, r0.y, r5.x, (neg)r0.w
+mad.f32 r1.z, c5.x, r4.w, r1.z
+mul.f r1.w, c4.x, r0.x
+mad.f32 r1.z, c6.x, r5.x, r1.z
mov.f32f32 r2.x, r0.w
+mad.f32 r1.w, c5.x, r0.y, r1.w
+mul.f r0.w, c4.x, r0.w
+add.f r1.z, r1.z, c7.x
+mul.f r2.y, c4.y, r2.x
+mul.f r2.z, r0.x, r5.x
+mad.f32 r1.w, c6.x, r0.z, r1.w
+mad.f32 r2.z, r0.z, r4.z, (neg)r2.z
mov.f32f32 r2.w, r1.z
-mov.f32f32 r2.z, r2.y
-mov.f32f32 r2.y, r1.w
-mul.f r1.z, r5.y, r4.z
-mul.f r1.w, r4.w, r4.z
-mov.f32f32 r0.w, r5.w
-mad.f32 r4.z, c2.z, r0.z, r5.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r4.w, c2.y, r0.z, r5.z
-mad.f32 r0.x, c1.x, r0.y, r0.x
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-(rpt1)nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r1.z
-add.f r0.y, r4.z, c3.z
-add.f r4.z, r4.w, c3.y
-mad.f32 r0.x, c2.x, r0.z, r0.x
-mad.f32 r3.w, c5.w, r4.x, r3.w
-mov.f32f32 r0.z, r0.y
-mov.f32f32 r0.y, r4.z
+mul.f r2.x, c4.z, r2.x
+add.f r1.w, r1.w, c7.x
+mov.f32f32 r3.x, r2.z
+mul.f r2.w, r2.w, r2.w
+mul.f r3.y, c4.y, r4.z
+mov.f32f32 r3.z, r1.w
+mad.f32 r2.y, c5.y, r3.x, r2.y
+mul.f r5.y, r0.y, r4.z
+mad.f32 r3.y, c5.y, r4.w, r3.y
+mad.f32 r5.y, r0.x, r4.w, (neg)r5.y
+mad.f32 r3.y, c6.y, r5.x, r3.y
+mul.f r3.z, r3.z, r3.z
+mul.f r5.z, c4.y, r0.x
+mov.f32f32 r5.w, r5.y
+add.f r3.y, r3.y, c7.y
+mad.f32 r5.z, c5.y, r0.y, r5.z
+mad.f32 r0.w, c5.x, r2.z, r0.w
+mad.f32 r2.y, c6.y, r5.w, r2.y
+mov.f32f32 r2.z, r3.y
+mad.f32 r5.z, c6.y, r0.z, r5.z
+mad.f32 r0.w, c6.x, r5.y, r0.w
+add.f r2.y, r2.y, c7.y
+mad.f32 r2.w, r3.y, r2.z, r2.w
+mul.f r3.y, c4.z, r4.z
+add.f r5.y, r5.z, c7.y
+mov.f32f32 r5.z, r2.y
+add.f r0.w, r0.w, c7.x
+mad.f32 r3.y, c5.z, r4.w, r3.y
+mov.f32f32 r6.x, r5.y
+mad.f32 r3.y, c6.z, r5.x, r3.y
+mov.f32f32 r6.y, r0.w
+mad.f32 r2.x, c5.z, r3.x, r2.x
+mad.f32 r5.y, r5.y, r6.x, r3.z
+mad.f32 r2.x, c6.z, r5.w, r2.x
+mul.f r3.x, r6.y, r6.y
+add.f r3.y, r3.y, c7.z
+mad.f32 r2.y, r2.y, r5.z, r3.x
+add.f r2.x, r2.x, c7.z
+mul.f r0.x, c4.z, r0.x
+mov.f32f32 r5.w, r3.y
+mad.f32 r0.x, c5.z, r0.y, r0.x
+mov.f32f32 r0.y, r2.x
+mad.f32 r0.x, c6.z, r0.z, r0.x
+mad.f32 r0.z, r3.y, r5.w, r2.w
+mul.f r2.w, c0.w, r3.w
+mad.f32 r2.x, r2.x, r0.y, r2.y
+add.f r0.x, r0.x, c7.z
+mad.f32 r6.y, c1.w, r4.x, r2.w
+mul.f r6.z, c0.z, r3.w
+mul.f r6.w, c0.y, r3.w
+mul.f r3.w, c0.x, r3.w
+mul.f r4.z, c4.w, r4.z
+rsq r2.x, r2.x
+(ss)mov.f32f32 r2.y, r2.x
+mul.f r3.x, r0.w, r2.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r0.w, r0.z
+(ss)mov.f32f32 r2.x, r0.x
+mul.f r3.z, r0.y, r2.y
+mul.f r3.y, r5.z, r2.y
+mul.f r2.w, r5.w, r0.w
+mul.f r2.z, r2.z, r0.w
+mad.f32 r0.x, r0.x, r2.x, r5.y
+mul.f r2.y, r1.z, r0.z
+mad.f32 r0.y, c2.w, r4.y, r6.y
+mad.f32 r0.z, c1.z, r4.x, r6.z
+mad.f32 r5.y, c1.y, r4.x, r6.w
+mad.f32 r3.w, c1.x, r4.x, r3.w
+mad.f32 r4.x, c5.w, r4.w, r4.z
+rsq r0.x, r0.x
+(ss)mov.f32f32 r4.z, r0.x
+mul.f r1.z, r1.w, r0.x
+add.f r0.w, r0.y, c3.w
+(ss)mad.f32 r0.x, c2.z, r4.y, r0.z
+mul.f r2.x, r2.x, r4.z
+mul.f r1.w, r6.x, r4.z
+mad.f32 r0.y, c2.y, r4.y, r5.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r4.y, r3.w
+mad.f32 r3.w, c6.w, r5.x, r4.x
+add.f r0.y, r0.y, c3.y
+nop
add.f r0.x, r0.x, c3.x
-mad.f32 r3.w, c6.w, r4.y, r3.w
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0)
-; VERT: 146 instructions, 0 half, 6 full
+; VERT: inputs: r3.w (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r4.z (0:0,cm=7,il=20,b=0)
+; VERT: 99 instructions, 0 half, 7 full
diff --git a/reference/bump/bump-13.asm b/reference/bump/bump-13.asm
index a946e9f..3ae555a 100644
--- a/reference/bump/bump-13.asm
+++ b/reference/bump/bump-13.asm
@@ -6,143 +6,100 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3a83126f, 0x00000000, 0x41500000, 0x42c80000
+@const(c1.x) 0x3f4ccccd, 0x3f800000, 0x3f2aacda, 0x3eaaa64c
+@const(c2.x) 0x3e23d70a, 0x3f800000, 0x3ed0ff97, 0x3f510625
+@const(c3.x) 0x3dcccccd, 0x3f800000, 0x00000000, 0x00000000
+@const(c4.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f r0.w, 1, r0.x
-bary.f r1.x, 2, r0.x
-bary.f r1.y, 3, r0.x
+bary.f r1.x, 8, r0.x
+bary.f r1.y, 5, r0.x
add.f r1.z, r0.z, c0.x
-add.f r1.w, r0.z, c0.y
-add.f r2.x, r0.w, c0.x
-add.f r2.y, r0.w, c0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r2.w, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-bary.f r2.x, 4, r0.x
-sam (f32)(x)r3.x, r1.z, s#0, t#0
-(ss)mov.f32f32 r1.z, r0.z
-sam (f32)(x)r3.y, r2.z, s#0, t#0
-mov.f32f32 r1.w, r0.w
-bary.f r0.z, 8, r0.x
-bary.f r0.w, 5, r0.x
-bary.f r2.y, 9, r0.x
-mov.f32f32 r2.x, r2.x
-(ss)bary.f r2.z, 6, r0.x
-bary.f r2.w, 10, r0.x
-sam (f32)(x)r3.z, r1.z, s#0, t#0
-(sy)(ss)add.f r1.z, r3.y, (neg)r3.z
-add.f r1.w, r3.x, (neg)r3.z
-bary.f (ei)r0.x, 7, r0.x
-nop
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-(rpt1)nop
-mul.f r0.y, c0.z, r0.y
+add.f r1.w, r0.w, c0.y
+add.f r2.x, r0.z, c0.y
+add.f r2.y, r0.w, c0.x
+sam (f32)(x)r2.z, r0.z, s#0, t#0
+(ss)bary.f r0.z, 2, r0.x
+bary.f r0.w, 9, r0.x
+bary.f r2.w, 6, r0.x
+bary.f r3.x, 3, r0.x
+sam (f32)(x)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, r3.y, (neg)r2.z
+bary.f r1.w, 10, r0.x
+bary.f r3.y, 7, r0.x
+sam (f32)(x)r3.z, r2.x, s#0, t#0
+(sy)(ss)add.f r2.x, r3.z, (neg)r2.z
mul.f r1.z, c0.z, r1.z
+bary.f (ei)r0.x, 4, r0.x
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-(rpt1)nop
-mul.f r0.w, r0.y, r0.w
-mul.f r1.w, r0.y, r2.z
-mul.f r0.x, r0.y, r0.x
-mul.f r0.y, r1.z, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r1.x, (neg)r0.z
-add.f r0.w, r1.y, (neg)r0.w
-add.f r0.x, r2.x, (neg)r0.x
-mul.f r1.x, r1.z, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r0.y, r0.z, (neg)r0.y
-mul.f r0.z, r1.z, r2.w
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-add.f r0.w, r0.w, (neg)r1.x
-mov.f32f32 r0.z, r0.z
+mov.f32f32 r0.y, r1.z
+mul.f r1.z, r1.z, r3.y
+mul.f r2.x, c0.z, r2.x
+nop
+mul.f r1.y, r0.y, r1.y
+mul.f r0.y, r0.y, r2.w
+add.f r0.x, r0.x, (neg)r1.z
+mul.f r1.z, r2.x, r1.w
+add.f r0.z, r0.z, (neg)r1.y
+mov.f32f32 r1.y, r2.x
+add.f r0.y, r3.x, (neg)r0.y
+add.f r0.x, r0.x, (neg)r1.z
+nop
+mul.f r1.x, r1.y, r1.x
+mul.f r0.w, r1.y, r0.w
+mov.f32f32 r1.y, r0.x
nop
-mul.f r1.x, r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-add.f r0.x, r0.x, (neg)r0.z
+add.f r0.z, r0.z, (neg)r1.x
+add.f r0.y, r0.y, (neg)r0.w
(rpt1)nop
-mad.f32 r0.z, r0.w, r0.w, r1.x
-mov.f32f32 r0.x, r0.x
+mov.f32f32 r0.w, r0.z
+mov.f32f32 r1.x, r0.y
(rpt1)nop
-mov.f32f32 r0.z, r0.z
+mul.f r0.z, r0.z, r0.w
nop
-mad.f32 r0.z, r0.x, r0.x, r0.z
-(rpt5)nop
-rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-(rpt2)nop
-mul.f r0.y, r0.y, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.x, r0.x, r0.z
+mad.f32 r0.y, r0.y, r1.x, r0.z
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.y, r1.y, r1.y, r0.y
+(rpt5)nop
+rsq r0.y, r0.y
+(ss)mov.f32f32 r0.z, r0.y
+mul.f r0.x, r0.x, r0.y
+(rpt1)nop
+(ss)mul.f r0.y, r0.w, r0.z
+mul.f r0.z, r1.x, r0.z
+mov.f32f32 r0.w, r0.x
nop
-mul.f r0.w, r0.y, c2.z
+mov.f32f32 r1.x, r0.y
mul.f r0.y, r0.y, c1.z
-mad.f32 r0.w, c2.z, r0.z, r0.w
+mov.f32f32 r1.y, r0.z
+nop
+mul.f r1.x, r1.x, c2.z
mad.f32 r0.y, c1.z, r0.z, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c2.w, r0.x, r0.z
+mad.f32 r0.z, c2.z, r1.y, r1.x
mad.f32 r0.x, c1.w, r0.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-max.f r0.x, r0.x, c0.y
-(rpt1)nop
+mad.f32 r0.y, c2.w, r0.w, r0.z
+(rpt2)nop
max.f r0.y, r0.y, c0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-(rpt5)nop
+max.f r0.x, r0.x, c0.y
+(rpt4)nop
log2 r0.y, r0.y
(ss)mul.f r0.y, c0.w, r0.y
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
-(rpt5)nop
+mov.f32f32 r0.z, r0.x
+(rpt4)nop
exp2 r0.y, r0.y
-(ss)mad.f32 r0.z, c2.y, r0.y, c3.y
-mad.f32 r0.w, c2.x, r0.y, c3.x
+(ss)mad.f32 r0.w, c2.y, r0.y, c3.y
mad.f32 r1.x, c2.x, r0.y, c3.x
+mad.f32 r1.w, c1.y, r0.z, r0.w
+mad.f32 r1.z, c1.x, r0.z, r1.x
+mad.f32 r0.w, c2.x, r0.y, c3.x
(ss)mad.f32 r0.y, c2.x, r0.y, c3.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c1.y, r0.x, r0.z
-mad.f32 r0.w, c1.x, r0.x, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
-mad.f32 r0.z, c1.x, r0.x, r1.x
-mad.f32 r0.x, c1.x, r0.x, r0.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.y, c1.x, r0.z, r0.w
+mad.f32 r1.x, c1.x, r0.x, r0.y
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r63.y (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1)
-; FRAG: 166 instructions, 0 half, 4 full
+; FRAG: inputs: r2.y (5:20,cm=f,il=8,b=1) r2.z (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1)
+; FRAG: 105 instructions, 0 half, 4 full
diff --git a/reference/bump1.asm b/reference/bump1.asm
index a946e9f..3ae555a 100644
--- a/reference/bump1.asm
+++ b/reference/bump1.asm
@@ -6,143 +6,100 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3a83126f, 0x00000000, 0x41500000, 0x42c80000
+@const(c1.x) 0x3f4ccccd, 0x3f800000, 0x3f2aacda, 0x3eaaa64c
+@const(c2.x) 0x3e23d70a, 0x3f800000, 0x3ed0ff97, 0x3f510625
+@const(c3.x) 0x3dcccccd, 0x3f800000, 0x00000000, 0x00000000
+@const(c4.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f r0.w, 1, r0.x
-bary.f r1.x, 2, r0.x
-bary.f r1.y, 3, r0.x
+bary.f r1.x, 8, r0.x
+bary.f r1.y, 5, r0.x
add.f r1.z, r0.z, c0.x
-add.f r1.w, r0.z, c0.y
-add.f r2.x, r0.w, c0.x
-add.f r2.y, r0.w, c0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r2.w, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-bary.f r2.x, 4, r0.x
-sam (f32)(x)r3.x, r1.z, s#0, t#0
-(ss)mov.f32f32 r1.z, r0.z
-sam (f32)(x)r3.y, r2.z, s#0, t#0
-mov.f32f32 r1.w, r0.w
-bary.f r0.z, 8, r0.x
-bary.f r0.w, 5, r0.x
-bary.f r2.y, 9, r0.x
-mov.f32f32 r2.x, r2.x
-(ss)bary.f r2.z, 6, r0.x
-bary.f r2.w, 10, r0.x
-sam (f32)(x)r3.z, r1.z, s#0, t#0
-(sy)(ss)add.f r1.z, r3.y, (neg)r3.z
-add.f r1.w, r3.x, (neg)r3.z
-bary.f (ei)r0.x, 7, r0.x
-nop
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-(rpt1)nop
-mul.f r0.y, c0.z, r0.y
+add.f r1.w, r0.w, c0.y
+add.f r2.x, r0.z, c0.y
+add.f r2.y, r0.w, c0.x
+sam (f32)(x)r2.z, r0.z, s#0, t#0
+(ss)bary.f r0.z, 2, r0.x
+bary.f r0.w, 9, r0.x
+bary.f r2.w, 6, r0.x
+bary.f r3.x, 3, r0.x
+sam (f32)(x)r3.y, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, r3.y, (neg)r2.z
+bary.f r1.w, 10, r0.x
+bary.f r3.y, 7, r0.x
+sam (f32)(x)r3.z, r2.x, s#0, t#0
+(sy)(ss)add.f r2.x, r3.z, (neg)r2.z
mul.f r1.z, c0.z, r1.z
+bary.f (ei)r0.x, 4, r0.x
(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r1.z
-(rpt1)nop
-mul.f r0.w, r0.y, r0.w
-mul.f r1.w, r0.y, r2.z
-mul.f r0.x, r0.y, r0.x
-mul.f r0.y, r1.z, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r1.x, (neg)r0.z
-add.f r0.w, r1.y, (neg)r0.w
-add.f r0.x, r2.x, (neg)r0.x
-mul.f r1.x, r1.z, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r0.y, r0.z, (neg)r0.y
-mul.f r0.z, r1.z, r2.w
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-add.f r0.w, r0.w, (neg)r1.x
-mov.f32f32 r0.z, r0.z
+mov.f32f32 r0.y, r1.z
+mul.f r1.z, r1.z, r3.y
+mul.f r2.x, c0.z, r2.x
+nop
+mul.f r1.y, r0.y, r1.y
+mul.f r0.y, r0.y, r2.w
+add.f r0.x, r0.x, (neg)r1.z
+mul.f r1.z, r2.x, r1.w
+add.f r0.z, r0.z, (neg)r1.y
+mov.f32f32 r1.y, r2.x
+add.f r0.y, r3.x, (neg)r0.y
+add.f r0.x, r0.x, (neg)r1.z
+nop
+mul.f r1.x, r1.y, r1.x
+mul.f r0.w, r1.y, r0.w
+mov.f32f32 r1.y, r0.x
nop
-mul.f r1.x, r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-add.f r0.x, r0.x, (neg)r0.z
+add.f r0.z, r0.z, (neg)r1.x
+add.f r0.y, r0.y, (neg)r0.w
(rpt1)nop
-mad.f32 r0.z, r0.w, r0.w, r1.x
-mov.f32f32 r0.x, r0.x
+mov.f32f32 r0.w, r0.z
+mov.f32f32 r1.x, r0.y
(rpt1)nop
-mov.f32f32 r0.z, r0.z
+mul.f r0.z, r0.z, r0.w
nop
-mad.f32 r0.z, r0.x, r0.x, r0.z
-(rpt5)nop
-rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-(rpt2)nop
-mul.f r0.y, r0.y, r0.z
-mul.f r0.w, r0.w, r0.z
-mul.f r0.x, r0.x, r0.z
+mad.f32 r0.y, r0.y, r1.x, r0.z
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.y, r1.y, r1.y, r0.y
+(rpt5)nop
+rsq r0.y, r0.y
+(ss)mov.f32f32 r0.z, r0.y
+mul.f r0.x, r0.x, r0.y
+(rpt1)nop
+(ss)mul.f r0.y, r0.w, r0.z
+mul.f r0.z, r1.x, r0.z
+mov.f32f32 r0.w, r0.x
nop
-mul.f r0.w, r0.y, c2.z
+mov.f32f32 r1.x, r0.y
mul.f r0.y, r0.y, c1.z
-mad.f32 r0.w, c2.z, r0.z, r0.w
+mov.f32f32 r1.y, r0.z
+nop
+mul.f r1.x, r1.x, c2.z
mad.f32 r0.y, c1.z, r0.z, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c2.w, r0.x, r0.z
+mad.f32 r0.z, c2.z, r1.y, r1.x
mad.f32 r0.x, c1.w, r0.x, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-max.f r0.x, r0.x, c0.y
-(rpt1)nop
+mad.f32 r0.y, c2.w, r0.w, r0.z
+(rpt2)nop
max.f r0.y, r0.y, c0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-(rpt5)nop
+max.f r0.x, r0.x, c0.y
+(rpt4)nop
log2 r0.y, r0.y
(ss)mul.f r0.y, c0.w, r0.y
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
-(rpt5)nop
+mov.f32f32 r0.z, r0.x
+(rpt4)nop
exp2 r0.y, r0.y
-(ss)mad.f32 r0.z, c2.y, r0.y, c3.y
-mad.f32 r0.w, c2.x, r0.y, c3.x
+(ss)mad.f32 r0.w, c2.y, r0.y, c3.y
mad.f32 r1.x, c2.x, r0.y, c3.x
+mad.f32 r1.w, c1.y, r0.z, r0.w
+mad.f32 r1.z, c1.x, r0.z, r1.x
+mad.f32 r0.w, c2.x, r0.y, c3.x
(ss)mad.f32 r0.y, c2.x, r0.y, c3.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c1.y, r0.x, r0.z
-mad.f32 r0.w, c1.x, r0.x, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
-mad.f32 r0.z, c1.x, r0.x, r1.x
-mad.f32 r0.x, c1.x, r0.x, r0.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.y, c1.x, r0.z, r0.w
+mad.f32 r1.x, c1.x, r0.x, r0.y
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r63.y (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1)
-; FRAG: 166 instructions, 0 half, 4 full
+; FRAG: inputs: r2.y (5:20,cm=f,il=8,b=1) r2.z (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1)
+; FRAG: 105 instructions, 0 half, 4 full
diff --git a/reference/bump2.asm b/reference/bump2.asm
index 0f8742e..9112ce0 100644
--- a/reference/bump2.asm
+++ b/reference/bump2.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
+@in(r3.w) in0
+@in(r4.x) in1
+@in(r4.y) in2
@in(r1.x) in4
@in(r1.y) in5
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r3.w) in12
-@in(r4.x) in13
-@in(r4.y) in14
+@in(r0.x) in8
+@in(r0.y) in9
+@in(r0.z) in10
+@in(r4.z) in12
+@in(r4.w) in13
+@in(r5.x) in14
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,151 +27,109 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r0.w, r2.x, r4.x
-mul.f r2.y, c4.x, r1.z
-mad.f32 r0.w, r1.w, r4.y, (neg)r0.w
-mad.f32 r2.y, c5.x, r1.w, r2.y
-mul.f r2.z, c4.x, r3.w
-mad.f32 r2.y, c6.x, r2.x, r2.y
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.z, c5.x, r4.x, r2.z
-mul.f r2.w, c4.y, r3.w
-mul.f r3.x, c4.z, r3.w
-mul.f r3.y, c4.x, r0.w
-mul.f r3.z, r1.z, r4.y
-add.f r2.y, r2.y, c7.x
-mad.f32 r3.z, r2.x, r3.w, (neg)r3.z
-mad.f32 r2.z, c6.x, r4.y, r2.z
-mul.f r4.z, c4.y, r0.w
-mul.f r0.w, c4.z, r0.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r4.w, r2.y
-add.f r2.y, r2.z, c7.x
-mad.f32 r2.z, c5.y, r4.x, r2.w
-mad.f32 r2.w, c5.x, r3.z, r3.y
-mul.f r3.y, r1.w, r3.w
-mul.f r5.x, r4.w, r4.w
-mad.f32 r3.y, r1.z, r4.x, (neg)r3.y
-mul.f r5.y, c4.y, r1.z
-mov.f32f32 r2.y, r2.y
-mad.f32 r5.y, c5.y, r1.w, r5.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r5.y, c6.y, r2.x, r5.y
-mul.f r5.z, r2.y, r2.y
-mad.f32 r2.z, c6.y, r4.y, r2.z
-mad.f32 r2.w, c6.x, r3.y, r2.w
-add.f r5.y, r5.y, c7.y
-mad.f32 r4.z, c5.y, r3.z, r4.z
-mad.f32 r0.w, c5.z, r3.z, r0.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r5.y, r5.y
-add.f r2.z, r2.z, c7.y
-mad.f32 r3.z, c6.y, r3.y, r4.z
-add.f r2.w, r2.w, c7.x
-mad.f32 r4.z, r5.y, r5.y, r5.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.z, r4.z
-mad.f32 r5.x, r2.z, r2.z, r5.z
-mul.f r1.z, c4.z, r1.z
-mul.f r5.z, r2.w, r2.w
-add.f r3.z, r3.z, c7.y
-mad.f32 r1.z, c5.z, r1.w, r1.z
-mov.f32f32 r1.w, r5.x
-mad.f32 r3.x, c5.z, r4.x, r3.x
-mov.f32f32 r3.z, r3.z
-mad.f32 r1.z, c6.z, r2.x, r1.z
-mad.f32 r2.x, c6.z, r4.y, r3.x
-mad.f32 r0.w, c6.z, r3.y, r0.w
-mad.f32 r3.x, r3.z, r3.z, r5.z
-add.f r1.z, r1.z, c7.z
-add.f r2.x, r2.x, c7.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-add.f r0.w, r0.w, c7.z
-mul.f r3.y, c0.w, r0.x
-mul.f r5.x, c0.z, r0.x
-mul.f r5.z, c0.y, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r4.z, r1.z, r1.z, r4.z
-mad.f32 r1.w, r2.x, r2.x, r1.w
-mad.f32 r3.y, c1.w, r0.y, r3.y
-mad.f32 r3.x, r0.w, r0.w, r3.x
-mad.f32 r3.y, c2.w, r0.z, r3.y
-mad.f32 r5.x, c1.z, r0.y, r5.x
-mad.f32 r5.z, c1.y, r0.y, r5.z
-mul.f r0.x, c0.x, r0.x
-mul.f r3.w, c4.w, r3.w
-add.f r5.w, r3.y, c3.w
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-rsq r3.y, r4.z
-nop
-rsq r1.w, r1.w
-(ss)mov.f32f32 r4.z, r3.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r0.w, r3.x
-mul.f r3.y, r3.z, r3.x
-mul.f r2.w, r2.w, r3.x
-mul.f r1.z, r1.z, r4.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r2.w, r2.w
-nop
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r0.w, r1.z
-mul.f r1.z, r2.x, r1.w
-mul.f r2.x, r2.z, r1.w
-mul.f r1.w, r2.y, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.x
-mov.f32f32 r1.w, r1.w
+@const(c8.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c9.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r0.z, r4.w
+mul.f r1.z, c4.x, r4.z
+mad.f32 r0.w, r0.y, r5.x, (neg)r0.w
+mad.f32 r1.z, c5.x, r4.w, r1.z
+mul.f r1.w, c4.x, r0.x
+mad.f32 r1.z, c6.x, r5.x, r1.z
mov.f32f32 r2.x, r0.w
+mad.f32 r1.w, c5.x, r0.y, r1.w
+mul.f r0.w, c4.x, r0.w
+add.f r1.z, r1.z, c7.x
+mul.f r2.y, c4.y, r2.x
+mul.f r2.z, r0.x, r5.x
+mad.f32 r1.w, c6.x, r0.z, r1.w
+mad.f32 r2.z, r0.z, r4.z, (neg)r2.z
mov.f32f32 r2.w, r1.z
-mov.f32f32 r2.z, r2.y
-mov.f32f32 r2.y, r1.w
-mul.f r1.z, r5.y, r4.z
-mul.f r1.w, r4.w, r4.z
-mov.f32f32 r0.w, r5.w
-mad.f32 r4.z, c2.z, r0.z, r5.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r4.w, c2.y, r0.z, r5.z
-mad.f32 r0.x, c1.x, r0.y, r0.x
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-(rpt1)nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r1.z
-add.f r0.y, r4.z, c3.z
-add.f r4.z, r4.w, c3.y
-mad.f32 r0.x, c2.x, r0.z, r0.x
-mad.f32 r3.w, c5.w, r4.x, r3.w
-mov.f32f32 r0.z, r0.y
-mov.f32f32 r0.y, r4.z
+mul.f r2.x, c4.z, r2.x
+add.f r1.w, r1.w, c7.x
+mov.f32f32 r3.x, r2.z
+mul.f r2.w, r2.w, r2.w
+mul.f r3.y, c4.y, r4.z
+mov.f32f32 r3.z, r1.w
+mad.f32 r2.y, c5.y, r3.x, r2.y
+mul.f r5.y, r0.y, r4.z
+mad.f32 r3.y, c5.y, r4.w, r3.y
+mad.f32 r5.y, r0.x, r4.w, (neg)r5.y
+mad.f32 r3.y, c6.y, r5.x, r3.y
+mul.f r3.z, r3.z, r3.z
+mul.f r5.z, c4.y, r0.x
+mov.f32f32 r5.w, r5.y
+add.f r3.y, r3.y, c7.y
+mad.f32 r5.z, c5.y, r0.y, r5.z
+mad.f32 r0.w, c5.x, r2.z, r0.w
+mad.f32 r2.y, c6.y, r5.w, r2.y
+mov.f32f32 r2.z, r3.y
+mad.f32 r5.z, c6.y, r0.z, r5.z
+mad.f32 r0.w, c6.x, r5.y, r0.w
+add.f r2.y, r2.y, c7.y
+mad.f32 r2.w, r3.y, r2.z, r2.w
+mul.f r3.y, c4.z, r4.z
+add.f r5.y, r5.z, c7.y
+mov.f32f32 r5.z, r2.y
+add.f r0.w, r0.w, c7.x
+mad.f32 r3.y, c5.z, r4.w, r3.y
+mov.f32f32 r6.x, r5.y
+mad.f32 r3.y, c6.z, r5.x, r3.y
+mov.f32f32 r6.y, r0.w
+mad.f32 r2.x, c5.z, r3.x, r2.x
+mad.f32 r5.y, r5.y, r6.x, r3.z
+mad.f32 r2.x, c6.z, r5.w, r2.x
+mul.f r3.x, r6.y, r6.y
+add.f r3.y, r3.y, c7.z
+mad.f32 r2.y, r2.y, r5.z, r3.x
+add.f r2.x, r2.x, c7.z
+mul.f r0.x, c4.z, r0.x
+mov.f32f32 r5.w, r3.y
+mad.f32 r0.x, c5.z, r0.y, r0.x
+mov.f32f32 r0.y, r2.x
+mad.f32 r0.x, c6.z, r0.z, r0.x
+mad.f32 r0.z, r3.y, r5.w, r2.w
+mul.f r2.w, c0.w, r3.w
+mad.f32 r2.x, r2.x, r0.y, r2.y
+add.f r0.x, r0.x, c7.z
+mad.f32 r6.y, c1.w, r4.x, r2.w
+mul.f r6.z, c0.z, r3.w
+mul.f r6.w, c0.y, r3.w
+mul.f r3.w, c0.x, r3.w
+mul.f r4.z, c4.w, r4.z
+rsq r2.x, r2.x
+(ss)mov.f32f32 r2.y, r2.x
+mul.f r3.x, r0.w, r2.x
+rsq r0.z, r0.z
+(ss)mov.f32f32 r0.w, r0.z
+(ss)mov.f32f32 r2.x, r0.x
+mul.f r3.z, r0.y, r2.y
+mul.f r3.y, r5.z, r2.y
+mul.f r2.w, r5.w, r0.w
+mul.f r2.z, r2.z, r0.w
+mad.f32 r0.x, r0.x, r2.x, r5.y
+mul.f r2.y, r1.z, r0.z
+mad.f32 r0.y, c2.w, r4.y, r6.y
+mad.f32 r0.z, c1.z, r4.x, r6.z
+mad.f32 r5.y, c1.y, r4.x, r6.w
+mad.f32 r3.w, c1.x, r4.x, r3.w
+mad.f32 r4.x, c5.w, r4.w, r4.z
+rsq r0.x, r0.x
+(ss)mov.f32f32 r4.z, r0.x
+mul.f r1.z, r1.w, r0.x
+add.f r0.w, r0.y, c3.w
+(ss)mad.f32 r0.x, c2.z, r4.y, r0.z
+mul.f r2.x, r2.x, r4.z
+mul.f r1.w, r6.x, r4.z
+mad.f32 r0.y, c2.y, r4.y, r5.y
+add.f r0.z, r0.x, c3.z
+mad.f32 r0.x, c2.x, r4.y, r3.w
+mad.f32 r3.w, c6.w, r5.x, r4.x
+add.f r0.y, r0.y, c3.y
+nop
add.f r0.x, r0.x, c3.x
-mad.f32 r3.w, c6.w, r4.y, r3.w
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0)
-; VERT: 146 instructions, 0 half, 6 full
+; VERT: inputs: r3.w (0:0,cm=7,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r4.z (0:0,cm=7,il=20,b=0)
+; VERT: 99 instructions, 0 half, 7 full
diff --git a/reference/chrome/bad.asm b/reference/chrome/bad.asm
index 386d1c8..9cccfa5 100644
--- a/reference/chrome/bad.asm
+++ b/reference/chrome/bad.asm
@@ -1,12 +1,12 @@
; options:
; VERT: new compiler
-@in(r0.x) in0
-@in(r0.y) in1
-@in(r0.z) in2
-@in(r0.w) in3
-@in(r1.x) in4
-@in(r1.y) in5
-@in(r1.z) in8
+@in(r2.x) in0
+@in(r2.y) in1
+@in(r2.z) in2
+@in(r2.w) in3
+@in(r0.x) in4
+@in(r0.y) in5
+@in(r0.z) in8
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -15,107 +15,76 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mul.f r1.z, r1.z, c40.x
+@const(c40.x) 0x3e800000, 0x00000000, 0x40e00000, 0x40800000
+(sy)(ss)mul.f r0.z, r0.z, c40.x
(rpt2)nop
-trunc.f r1.z, r1.z
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
+trunc.f r0.z, r0.z
(rpt2)nop
-max.f r1.w, r1.z, c40.y
-max.f r2.x, r1.z, c40.y
-max.f r2.y, r1.z, c40.y
-max.f r2.z, r1.z, c40.y
-min.f r1.w, r1.w, c40.z
-mov.f32f32 r2.x, r2.x
-min.f r2.y, r2.y, c40.z
-min.f r2.z, r2.z, c40.z
-mov.f32f32 r1.w, r1.w
-min.f r2.x, r2.x, c40.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-trunc.f r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-trunc.f r2.y, r2.y
-trunc.f r2.z, r2.z
-mov.f32f32 r1.w, r1.w
-trunc.f r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mul.f r1.w, r1.w, c40.w
-mov.f32f32 r2.x, r2.x
-mul.f r2.y, r2.y, c40.w
-mul.f r2.z, r2.z, c40.w
-mov.f32f32 r1.w, r1.w
-cov.f32s16 hr0.x, r2.x
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r2.y, r2.z
-cov.f32s16 hr0.y, r1.w
+mov.f32f32 r0.w, r0.z
+max.f r0.z, r0.z, c40.y
+(rpt1)nop
+max.f r1.x, r0.w, c40.y
+max.f r1.y, r0.w, c40.y
+max.f r1.z, r0.w, c40.y
+max.f r0.w, r0.w, c40.y
+min.f r1.x, r1.x, c40.z
+min.f r1.y, r1.y, c40.z
+min.f r1.z, r1.z, c40.z
+min.f r0.w, r0.w, c40.z
+trunc.f r1.x, r1.x
+trunc.f r1.y, r1.y
+trunc.f r1.z, r1.z
+trunc.f r0.w, r0.w
+mul.f r1.x, r1.x, c40.w
+cov.f32s16 hr0.x, r1.y
+mul.f r1.y, r1.z, c40.w
+mul.f r0.w, r0.w, c40.w
+cov.f32s16 hr0.y, r1.x
shl.b hr0.x, hr0.x, 2
-cov.f32s16 hr0.z, r2.x
-cov.f32s16 hr0.w, r2.y
+cov.f32s16 hr0.z, r1.y
+cov.f32s16 hr0.w, r0.w
shl.b hr0.y, hr0.y, 2
mova a0.x, hr0.x
shl.b hr0.x, hr0.z, 2
shl.b hr0.z, hr0.w, 2
-max.f r2.x, r1.z, c40.y
+min.f r0.z, r0.z, c40.z
(rpt2)nop
-mov.f32f32 r1.z, c<a0.x + 129>
+mov.f32f32 r0.w, c<a0.x + 129>
mov.f32f32 r1.w, c<a0.x + 131>
-mov.f32f32 r2.y, c<a0.x + 128>
-(ul)mov.f32f32 r2.z, c<a0.x + 130>
+mov.f32f32 r1.x, c<a0.x + 128>
+(ul)mov.f32f32 r1.z, c<a0.x + 130>
mova a0.x, hr0.y
-mad.f32 r1.y, r1.y, r1.w, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r2.z
-mad.f32 r1.x, r1.x, r2.z, r2.y
-min.f r2.x, r2.x, c40.z
-nop
-mul.f r2.y, c<a0.x>, r0.x
-mul.f r2.z, c<a0.x>, r0.x
-mul.f r2.w, c<a0.x>, r0.x
-(ul)mul.f r0.x, c<a0.x>, r0.x
+mad.f32 r1.y, r0.y, r1.w, r0.w
+trunc.f r0.y, r0.z
+(rpt3)nop
+mul.f r0.z, c<a0.x>, r2.x
+mul.f r0.w, c<a0.x>, r2.x
+mul.f r3.x, c<a0.x>, r2.x
+(ul)mul.f r2.x, c<a0.x>, r2.x
mova a0.x, hr0.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.x, r2.x
-(rpt2)nop
-mad.f32 r2.y, c<a0.x + 4>, r0.y, r2.y
-mad.f32 r2.z, c<a0.x + 4>, r0.y, r2.z
-mad.f32 r2.w, c<a0.x + 4>, r0.y, r2.w
-(ul)mad.f32 r0.x, c<a0.x + 4>, r0.y, r0.x
+mad.f32 r1.x, r0.x, r1.z, r1.x
+mul.f r0.x, r0.y, c40.w
+(rpt3)nop
+mad.f32 r0.y, c<a0.x + 4>, r2.y, r0.z
+mad.f32 r0.z, c<a0.x + 4>, r2.y, r0.w
+mad.f32 r0.w, c<a0.x + 4>, r2.y, r3.x
+(ul)mad.f32 r2.x, c<a0.x + 4>, r2.y, r2.x
mova a0.x, hr0.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-trunc.f r0.y, r2.x
-(rpt2)nop
-mad.f32 r2.x, c<a0.x + 8>, r0.z, r2.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.y, c<a0.x + 8>, r0.z, r2.z
-mad.f32 r2.z, c<a0.x + 8>, r0.z, r2.w
-(ul)mad.f32 r0.x, c<a0.x + 8>, r0.z, r0.x
-mul.f r0.y, r0.y, c40.w
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
-cov.f32s16 hr0.x, r0.y
-(rpt2)nop
+cov.f32s16 hr0.x, r0.x
+(rpt4)nop
+mad.f32 r0.x, c<a0.x + 8>, r2.z, r0.y
shl.b hr0.x, hr0.x, 2
-(rpt2)nop
+mad.f32 r0.y, c<a0.x + 8>, r2.z, r0.z
+mad.f32 r2.y, c<a0.x + 8>, r2.z, r0.w
+(ul)mad.f32 r2.x, c<a0.x + 8>, r2.z, r2.x
mova a0.x, hr0.x
(rpt5)nop
-mad.f32 r0.y, c<a0.x + 12>, r0.w, r2.x
-mad.f32 r0.z, c<a0.x + 12>, r0.w, r2.y
-mad.f32 r2.x, c<a0.x + 12>, r0.w, r2.z
-(ul)mad.f32 r0.x, c<a0.x + 12>, r0.w, r0.x
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r2.x
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.w, c<a0.x + 12>, r2.w, r0.x
+mad.f32 r0.z, c<a0.x + 12>, r2.w, r0.y
+mad.f32 r0.y, c<a0.x + 12>, r2.w, r2.y
+(ul)mad.f32 r0.x, c<a0.x + 12>, r2.w, r2.x
end
-nop
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
-; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=3,il=12,b=0) r1.z (0:0,cm=1,il=16,b=0)
-; VERT: 122 instructions, 1 half, 3 full
+; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=3,il=12,b=0) r0.z (0:0,cm=1,il=16,b=0)
+; VERT: 90 instructions, 1 half, 4 full
diff --git a/reference/crazy-frag-conflict.asm b/reference/crazy-frag-conflict.asm
index 7544805..aeb5150 100644
--- a/reference/crazy-frag-conflict.asm
+++ b/reference/crazy-frag-conflict.asm
@@ -6,39 +6,32 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r2.x, r0.z
-mul.f r0.z, r0.w, r1.y
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.z, r0.w
-bary.f r0.w, 1, r0.x
+bary.f r2.x, 1, r0.x
+mov.f32f32 r1.w, r1.x
bary.f (ei)r0.x, 0, r0.x
nop
-mov.f32f32 r1.w, r0.z
-(rpt1)nop
-sam.p (f32)(xyz)r2.x, r2.x, s#0, t#0
-(sy)(ss)mov.f32f32 r2.z, r2.x
-mov.f32f32 r2.w, r2.y
-mov.f32f32 r3.x, r2.x
-(rpt5)nop
-sam.p (f32)(xyz)r2.x, r2.z, s#0, t#0
-(sy)mul.f r0.y, r2.z, r1.z
-mul.f r0.z, r2.y, r0.w
-mul.f r0.x, r2.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+sam.p (f32)(xyz)r0.y, r0.z, s#0, t#0
+(sy)mov.f32f32 r2.y, r0.y
+mov.f32f32 r2.z, r0.z
+mov.f32f32 r2.w, r0.y
+mul.f r1.w, r1.w, r1.y
+(rpt4)nop
+(ss)nop
+sam.p (f32)(xyz)r0.y, r2.y, s#0, t#0
+(sy)mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+mul.f r1.x, r0.y, r0.x
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 37 instructions, 0 half, 4 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 25 instructions, 0 half, 3 full
diff --git a/reference/crazy-frag.asm b/reference/crazy-frag.asm
index 81c6d71..5ef0248 100644
--- a/reference/crazy-frag.asm
+++ b/reference/crazy-frag.asm
@@ -6,39 +6,32 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r2.x, r0.z
-mul.f r0.z, r0.w, r1.y
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.z, r0.w
-bary.f r0.w, 1, r0.x
+bary.f r2.x, 1, r0.x
+mov.f32f32 r1.w, r1.x
bary.f (ei)r0.x, 0, r0.x
nop
-mov.f32f32 r1.w, r0.z
-(rpt1)nop
-sam.p (f32)(xyz)r2.x, r2.x, s#0, t#0
-(sy)mov.f32f32 r2.w, r2.x
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r3.y, r2.z
-(rpt5)nop
+sam.p (f32)(xyz)r0.y, r0.z, s#0, t#0
+(sy)mov.f32f32 r2.y, r0.y
+mov.f32f32 r2.z, r0.z
+mov.f32f32 r2.w, r0.w
+mul.f r1.w, r1.w, r1.y
+(rpt4)nop
(ss)nop
-sam.p (f32)(xyz)r2.x, r2.w, s#0, t#0
-(sy)mul.f r0.y, r2.z, r1.z
-mul.f r0.z, r2.y, r0.w
-mul.f r0.x, r2.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+sam.p (f32)(xyz)r0.y, r2.y, s#0, t#0
+(sy)mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+mul.f r1.x, r0.y, r0.x
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 38 instructions, 0 half, 4 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 25 instructions, 0 half, 3 full
diff --git a/reference/dd.asm b/reference/dd.asm
index 3008314..332831f 100644
--- a/reference/dd.asm
+++ b/reference/dd.asm
@@ -6,263 +6,196 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c4.x) 0x3f800000, 0x00000000, 0x3e4ccccd, 0x3f1013a9
+@const(c5.x) 0x3f400d1b, 0xbf800000, 0x3ccccccd, 0x3d4ccccd
+@const(c6.x) 0x3fb8aa65, 0x40000000, 0x3f800000, 0xc39044fe
+@const(c7.x) 0xbe2ab368, 0x41200000, 0x00000000, 0x00000000
+@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)absneg.f r0.z, (neg)c0.y
-bary.f r0.w, 3, r0.x
-bary.f r1.x, 0, r0.x
+bary.f r0.w, 5, r0.x
bary.f r1.y, 4, r0.x
+bary.f r1.x, 3, r0.x
mul.f r1.z, r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r1.x
-bary.f r2.y, 1, r0.x
+mov.f32f32 r1.w, r0.w
+mov.f32f32 r2.x, r1.y
+mov.f32f32 r2.y, r1.x
add.f r1.z, c4.x, (neg)r1.z
-mov.f32f32 r2.z, r0.w
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r1.z, r1.z
-mul.f r3.y, c3.x, r0.w
-mov.f32f32 r2.w, r3.x
-mul.f r3.w, c3.x, r1.x
-mul.f r1.x, r1.z, c4.w
-mul.f r3.z, c3.x, r3.x
-dsx (f32)(xy)r4.y, r1.w
-(sy)mul.f r1.z, r4.y, r4.y
-mul.f r4.x, c3.x, r2.y
-mov.f32f32 r1.x, r1.x
-(ss)nop
-dsx (f32)(xy)r1.w, r2.z
-(sy)mul.f r1.w, r1.w, r1.w
-mad.f32 r1.z, r4.z, r4.z, r1.z
-mad.f32 r1.w, r2.x, r2.x, r1.w
-add.f r1.x, c4.x, (neg)r1.x
-dsy (f32)(xy)r2.x, r3.y
-(sy)mul.f r2.x, r2.x, r2.x
-(ss)nop
-dsy (f32)(xy)r3.y, r3.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.x, r1.x
-bary.f r2.z, 5, r0.x
-bary.f (ei)r0.x, 2, r0.x
-mad.f32 r0.y, r2.y, r2.y, r2.x
-mov.f32f32 r2.x, r1.x
-mov.f32f32 r2.y, r2.z
-cmps.f.lt r1.x, r1.x, c4.y
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.y
-cov.u32f32 r1.x, r1.x
-sqrt r2.x, r2.x
-(ss)mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r0.z
-mul.f r4.x, c3.x, r2.y
-(sy)mul.f r3.y, r3.y, r3.y
-dsx (f32)(x)r4.y, r2.z
-cmps.f.ne r1.x, r1.x, c4.y
-mad.f32 r2.x, c5.x, r3.w, r2.x
-dsx (f32)(x)r4.z, r2.w
-(sy)mad.f32 r1.w, r4.z, r4.z, r1.w
-mad.f32 r1.z, r4.y, r4.y, r1.z
-(ss)mad.f32 r2.z, r3.z, r3.z, r3.y
-mov.f32f32 r2.x, r2.x
-dsy (f32)(x)r3.y, r4.x
-(sy)mad.f32 r0.y, r3.y, r3.y, r0.y
+mul.f r2.z, c3.x, r1.w
+absneg.f r1.w, (neg)r1.w
+absneg.f r2.w, (neg)r2.x
+mul.f r1.z, r1.z, c4.w
+absneg.f r3.x, (neg)r2.y
+bary.f r3.y, 2, r0.x
+mov.f32f32 r3.z, r2.w
+add.f r1.z, c4.x, (neg)r1.z
+dsy (f32)(x)r3.w, r2.z
+mul.f r2.y, c3.x, r2.y
+(ss)mul.f r2.z, c3.x, r2.x
+mov.f32f32 r2.x, r1.w
+mov.f32f32 r4.x, r1.z
+cmps.f.lt r1.z, r1.z, c4.y
+add.f r4.y, c1.y, r3.z
+mov.f32f32 r4.z, r3.x
+mul.f r4.w, c3.x, r3.y
+mov.f32f32 r5.x, c2.x
+dsy (f32)(xy)r5.y, r2.y
+(sy)(ss)mul.f r2.y, r5.y, r5.y
+sqrt r2.z, r4.x
+(ss)mad.f32 r2.z, c5.x, r0.z, r2.z
+mad.f32 r2.y, r5.z, r5.z, r2.y
+cov.u32f32 r1.z, r1.z
+mad.f32 r2.y, r3.w, r3.w, r2.y
+mov.f32f32 r3.w, r2.z
+mul.f r2.z, r2.z, c4.x
+cmps.f.ne r1.z, r1.z, c4.y
+(ss)add.f r4.x, c1.z, r2.x
+mul.f r5.y, r3.w, c4.y
+absneg.f r5.z, (neg)c0.x
+mul.f r3.w, r3.w, c4.y
+absneg.f r5.w, (neg)c0.z
+mad.f32 r0.z, c5.x, r0.z, (neg)r2.z
+mad.f32 r2.z, c5.x, r5.z, (neg)r5.y
+mov.f32f32 r5.y, c4.y
+mad.f32 r3.w, c5.x, r5.w, (neg)r3.w
+mov.f32f32 r5.z, c4.y
+mov.f32f32 r5.w, c4.y
+sel.b32 r2.z, r5.y, r1.z, r2.z
+add.f r5.y, c1.x, r4.z
+rcp r5.x, r5.x
+sel.b32 r3.w, r5.z, r1.z, r3.w
+sel.b32 r0.z, r5.w, r1.z, r0.z
+absneg.f r1.z, (neg)r2.z
+(ss)mul.f r5.y, r5.y, r5.x
+mul.f r4.x, r4.x, r5.x
+absneg.f r5.z, (neg)r3.w
+absneg.f r5.w, (neg)r0.z
+mul.f r6.x, r5.y, (neg)r2.z
+mov.f32f32 r6.y, r4.x
+rcp r6.z, r1.z
+add.f r3.x, c5.y, r3.x
+mul.f r4.y, r4.y, r5.x
+(ss)rcp r1.z, r1.z
+add.f r4.z, c6.z, r4.z
+mul.f r5.x, r6.y, r0.z
+(ss)mul.f r3.x, r3.x, r6.z
+mad.f32 r6.x, r4.y, (neg)r0.z, r6.x
+(ss)mul.f r1.z, r4.z, r1.z
+mad.f32 r4.x, r4.x, (neg)r3.w, r6.x
+mov.f32f32 r4.z, r3.x
+mov.f32f32 r4.y, r4.y
+max.f r3.x, r3.x, r1.z
+rcp r6.x, r5.w
+add.f r2.w, c5.y, r2.w
+mov.f32f32 r6.z, r4.x
+mul.f r4.x, r4.x, c6.y
mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.z, r2.z
-mul.f r2.w, r2.x, c4.x
-mul.f r3.y, r2.x, c4.y
-mul.f r2.x, r2.x, c4.y
-sqrt r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.y, r3.y
-absneg.f r3.z, (neg)c0.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.z, c5.x, r0.z, (neg)r2.w
-mov.f32f32 r2.w, c4.y
+(ss)mul.f r2.w, r2.w, r6.x
+(ss)rcp r5.w, r5.w
+add.f r3.z, c6.y, r3.z
+mad.f32 r6.x, c5.z, r6.z, c5.w
+max.f r4.x, r4.x, c4.y
+min.f r1.z, r4.z, r1.z
+(ss)mul.f r3.z, r3.z, r5.w
+mov.f32f32 r4.z, r2.w
+min.f r4.x, r4.x, c4.x
+mad.f32 r5.x, r4.y, r3.w, (neg)r5.x
+max.f r2.w, r2.w, r3.z
+(ss)rcp r5.w, r6.x
mov.f32f32 r3.z, r3.z
-absneg.f r3.w, (neg)c0.z
-sqrt r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-sel.b32 r0.z, r2.w, r1.x, r0.z
-absneg.f r2.y, (neg)r2.y
-mad.f32 r2.w, c5.x, r3.z, (neg)r3.y
-mov.f32f32 r3.y, c4.y
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r2.y, r2.y
-absneg.f r3.w, (neg)r0.z
-sel.b32 r2.w, r3.y, r1.x, r2.w
-absneg.f r0.w, (neg)r0.w
-add.f r3.y, c1.z, r2.y
-mov.f32f32 r4.x, c2.x
-absneg.f r4.y, (neg)r2.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c5.x, r3.z, (neg)r2.x
-rcp r3.z, r3.w
-mov.f32f32 r4.z, c4.y
-absneg.f r3.x, (neg)r3.x
-add.f r4.w, c1.x, r0.w
-rcp r4.x, r4.x
-(ss)mul.f r3.y, r3.y, r4.x
-rcp r5.x, r4.y
-add.f r5.y, c5.y, r0.w
-sel.b32 r1.x, r4.z, r1.x, r2.x
-mul.f r2.x, r4.w, r4.x
-mov.f32f32 r3.y, r3.y
-(ss)mul.f r4.z, r5.y, r5.x
-absneg.f r4.w, (neg)r1.x
-mov.f32f32 r3.x, r3.x
-mul.f r5.x, r3.y, r0.z
-mov.f32f32 r4.z, r4.z
-(ss)rcp r4.y, r4.y
-add.f r0.w, c6.z, r0.w
-add.f r5.y, c1.y, r3.x
+(ss)add.f r6.x, c8.y, (neg)r4.x
+mov.f32f32 r6.z, r5.x
+min.f r2.w, r3.x, r2.w
+rcp r3.x, r5.z
+add.f r1.w, c5.y, r1.w
+mul.f r6.x, r6.x, c4.x
+mul.f r5.x, r5.x, r6.z
+min.f r3.z, r4.z, r3.z
+(ss)mul.f r1.w, r1.w, r3.x
+rcp r3.x, r5.z
+add.f r2.x, c6.z, r2.x
+mov.f32f32 r4.z, r5.y
+max.f r1.z, r1.z, r3.z
+mov.f32f32 r3.z, r1.w
+(ss)mul.f r2.x, r2.x, r3.x
+mul.f r3.x, r4.z, r3.w
+mul.f r3.w, r4.y, r2.z
+mad.f32 r2.z, r6.y, r2.z, (neg)r3.x
+max.f r1.w, r1.w, r2.x
mov.f32f32 r2.x, r2.x
-rcp r5.z, r4.w
-add.f r5.w, c5.y, r3.x
-mov.f32f32 r0.w, r0.w
-mul.f r4.x, r5.y, r4.x
-mul.f r5.y, r2.x, (neg)r2.w
-add.f r6.x, c5.y, r2.y
-mul.f r3.z, r5.w, r3.z
-mov.f32f32 r4.x, r4.x
-(ss)mul.f r0.w, r0.w, r4.y
-(ss)mul.f r4.y, r6.x, r5.z
-mov.f32f32 r3.z, r3.z
-mad.f32 r5.x, r4.x, r1.x, (neg)r5.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r5.y, r4.x, (neg)r0.z, r5.y
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r5.x, r5.x
-max.f r5.z, r4.z, r0.w
-mov.f32f32 r5.y, r5.y
-min.f r0.w, r4.z, r0.w
-mul.f r4.z, r5.x, r5.x
-mul.f r5.x, r2.x, r1.x
-mov.f32f32 r5.z, r5.z
-mad.f32 r5.x, r3.y, r2.w, (neg)r5.x
-rcp r3.w, r3.w
-add.f r3.x, c6.y, r3.x
-mad.f32 r1.x, r3.y, (neg)r1.x, r5.y
-rcp r3.y, r4.w
-add.f r2.y, c6.z, r2.y
-(ss)mov.f32f32 r4.w, r5.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.z, r4.w, r4.w, r4.z
-(ss)mul.f r3.x, r3.x, r3.w
-mul.f r3.w, r1.x, c6.y
-mad.f32 r1.x, c5.z, r1.x, c5.w
-mov.f32f32 r4.z, r4.z
-mul.f r2.w, r4.x, r2.w
-mov.f32f32 r3.x, r3.x
-mad.f32 r2.x, r2.x, r0.z, (neg)r2.w
-mov.f32f32 r2.w, r3.w
-mul.f r2.y, r2.y, r3.y
-max.f r3.y, r3.z, r3.x
-mov.f32f32 r2.x, r2.x
-max.f r2.w, r2.w, c4.y
-min.f r3.x, r3.z, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.x, r2.x, r2.x, r4.z
-min.f r2.w, r2.w, c4.x
-max.f r0.w, r0.w, r3.x
-min.f r3.x, r5.z, r3.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-add.f r3.y, c8.y, (neg)r2.w
-rcp r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-add.f r2.x, r2.x, c5.y
-max.f r3.z, r4.y, r2.y
-mul.f r3.y, r3.y, c4.x
-min.f r2.y, r4.y, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r3.z
-mul.f r0.y, r1.w, r0.y
-sqrt r1.z, r1.z
-max.f r0.w, r0.w, r2.y
-mad.f32 r1.x, r2.x, r1.x, c4.x
-min.f r1.w, r3.x, r3.z
-mov.f32f32 r0.y, r0.y
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.w
-mul.f r0.x, c3.x, r0.x
-mul.f r1.x, (neg)r1.x, c6.x
+mad.f32 r3.x, r4.z, r0.z, (neg)r3.w
+nop
+min.f r1.w, r2.w, r1.w
+mov.f32f32 r2.w, r2.z
+min.f r2.x, r3.z, r2.x
+mov.f32f32 r3.z, r3.x
+mov.f32f32 r3.w, r1.w
+mad.f32 r2.z, r2.z, r2.w, r5.x
+max.f r1.z, r1.z, r2.x
+mad.f32 r2.x, r3.x, r3.z, r2.z
mul.f r0.z, r0.z, r1.w
-add.f r0.w, r1.w, (neg)r0.w
-rcp r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
+sqrt r2.y, r2.y
+dsx (f32)(x)r6.y, r0.w
+dsx (f32)(xy)r6.z, r1.x
+(sy)(ss)mul.f r0.w, r6.z, r6.z
+add.f r1.x, r3.w, (neg)r1.z
+add.f r1.z, r2.x, c5.y
add.f r0.z, r1.y, (neg)r0.z
-mov.f32f32 r0.w, r0.w
-dsy (f32)(x)r3.z, r0.x
-(sy)(ss)mad.f32 r0.x, r3.z, r3.z, r2.z
-mov.f32f32 r1.y, c4.y
-mov.f32f32 r2.x, c4.y
-mov.f32f32 r0.z, r0.z
-exp2 r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, c7.y, r0.w, c4.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r1.y
-add.f r1.x, c4.x, r1.x
+mad.f32 r0.w, r6.w, r6.w, r0.w
+mad.f32 r1.x, c7.y, r1.x, c4.x
+mad.f32 r1.y, r1.z, r5.w, c4.x
add.f r0.z, r0.z, c7.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-sqrt r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r2.x
-(rpt1)nop
+mad.f32 r0.w, r6.y, r6.y, r0.w
+dsy (f32)(x)r4.y, r4.w
+bary.f r2.z, 0, r0.x
+dsx (f32)(x)r3.x, r3.y
+mul.f r1.y, (neg)r1.y, c6.x
mul.f r0.z, c6.w, r0.z
rcp r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-rcp r0.w, r0.w
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-(ss)mov.f32f32 r0.w, r0.w
-mul.f r0.x, r1.z, r0.x
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r1.x, r1.x
-mul.f r0.z, r0.z, r0.w
-mov.f32f32 r0.x, r0.x
+bary.f (ei)r2.w, 1, r0.x
+mul.f r0.x, c3.x, r2.z
+mov.f32f32 r1.w, c4.y
+(ss)mul.f r0.z, r0.z, r1.x
+sqrt r0.y, r0.w
+(ss)mul.f r0.w, r0.y, r2.y
+mul.f r0.y, c3.x, r2.w
+mov.f32f32 r1.z, c4.y
nop
-max.f r0.w, r1.x, c4.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r0.x, r0.y
-nop
-min.f r0.y, r0.w, c4.x
-(rpt2)nop
-mul.f r0.y, r2.w, r0.y
+exp2 r1.x, r1.y
+(ss)add.f r1.x, c4.x, r1.x
+dsx (f32)(xy)r2.x, r2.z
+(sy)(ss)mul.f r1.y, r2.x, r2.x
exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
+(ss)add.f r0.z, c4.x, r0.z
+rcp r0.w, r0.w
+mad.f32 r1.y, r2.y, r2.y, r1.y
+dsy (f32)(xy)r2.x, r0.x
nop
-add.f r0.y, r0.y, r3.y
-add.f r0.z, c4.x, r0.z
-mul.f r0.x, r0.x, c4.z
+(sy)(ss)mul.f r0.x, r2.x, r2.x
+mad.f32 r0.y, r3.x, r3.x, r1.y
+mad.f32 r0.x, r2.y, r2.y, r0.x
+rcp r1.x, r1.x
+(ss)max.f r1.x, r1.x, c4.y
+rcp r0.z, r0.z
+mad.f32 r0.x, r4.y, r4.y, r0.x
+(rpt5)nop
+sqrt r0.x, r0.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
+sqrt r0.y, r0.y
+(ss)mul.f r0.x, r0.y, r0.x
+min.f r0.y, r1.x, c4.x
+(rpt1)nop
+mul.f r0.x, r0.x, r0.w
+mul.f r0.y, r4.x, r0.y
(rpt1)nop
-mov.f32f32 r1.y, r0.y
-(rpt2)nop
-rcp r0.y, r0.z
-(ss)mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mul.f r0.x, r0.x, r0.y
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+mul.f r0.x, r0.x, c4.z
+add.f r1.y, r0.y, r6.x
+(rpt1)nop
+mul.f r1.x, r0.x, r0.z
end
nop
nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 265 instructions, 0 half, 7 full
+; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
+; FRAG: 189 instructions, 0 half, 7 full
diff --git a/reference/es2gears-vert.asm b/reference/es2gears-vert.asm
index bc04597..b3981b3 100644
--- a/reference/es2gears-vert.asm
+++ b/reference/es2gears-vert.asm
@@ -14,6 +14,8 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.w, c4.x, r0.x
mov.f32f32 r1.w, c8.x
mad.f32 r0.w, c5.x, r0.y, r0.w
@@ -25,88 +27,60 @@ mad.f32 r2.x, c5.y, r0.y, r2.x
add.f r0.w, r0.w, c7.x
mad.f32 r2.x, c6.y, r0.z, r2.x
mul.f r0.x, c4.z, r0.x
-mul.f r2.z, c0.w, r1.x
-mov.f32f32 r2.w, r0.w
-mad.f32 r0.w, c8.y, r2.y, r1.w
-add.f r1.w, r2.x, c7.y
+mad.f32 r1.w, c8.y, r2.y, r1.w
+mov.f32f32 r2.y, r0.w
+mov.f32f32 r2.z, c8.z
+add.f r2.x, r2.x, c7.y
mad.f32 r0.x, c5.z, r0.y, r0.x
-mul.f r0.y, r2.w, r2.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, c8.z
+mul.f r0.y, r0.w, r2.y
+mad.f32 r0.w, c8.z, r2.z, r1.w
+mov.f32f32 r1.w, r2.x
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c1.w, r1.y, r2.z
-mad.f32 r0.y, r1.w, r1.w, r0.y
-mad.f32 r0.w, c8.z, r2.x, r0.w
+mul.f r0.z, c0.w, r1.x
+mul.f r2.z, c0.z, r1.x
+mad.f32 r0.y, r2.x, r1.w, r0.y
add.f r0.x, r0.x, c7.z
-mad.f32 r0.z, c2.w, r1.z, r0.z
-mov.f32f32 r0.y, r0.y
-mul.f r2.x, c0.z, r1.x
-mov.f32f32 r0.x, r0.x
-add.f r0.z, r0.z, c3.w
-mad.f32 r2.x, c1.z, r1.y, r2.x
-mul.f r2.y, c0.y, r1.x
-mad.f32 r0.y, r0.x, r0.x, r0.y
rsq r0.w, r0.w
-(ss)mov.f32f32 r2.z, r0.w
-(ss)mov.f32f32 r0.w, r0.z
-mad.f32 r0.z, c2.z, r1.z, r2.x
-mad.f32 r2.x, c1.y, r1.y, r2.y
+(ss)mov.f32f32 r2.x, r0.w
+mul.f r2.w, c8.z, r0.w
+mad.f32 r0.z, c1.w, r1.y, r0.z
+(ss)mov.f32f32 r0.w, r0.x
+mul.f r3.x, c8.x, r2.x
+mul.f r2.x, c8.y, r2.x
+mad.f32 r0.z, c2.w, r1.z, r0.z
+mad.f32 r0.y, r0.w, r0.w, r0.y
+mad.f32 r0.w, c1.z, r1.y, r2.z
+mul.f r2.z, c0.y, r1.x
mul.f r1.x, c0.x, r1.x
-mul.f r2.y, c8.x, r2.z
+mad.f32 r3.y, c2.z, r1.z, r0.w
+mad.f32 r2.z, c1.y, r1.y, r2.z
+mad.f32 r1.x, c1.x, r1.y, r1.x
rsq r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mul.f r3.x, c8.y, r2.z
-mul.f r2.z, c8.z, r2.z
-nop
-mul.f r2.w, r2.w, r0.y
-mul.f r1.w, r1.w, r0.y
+(ss)mov.f32f32 r1.y, r0.y
mul.f r0.x, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r2.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r3.x
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r2.y, r2.z
-add.f r0.z, r0.z, c3.z
-mad.f32 r0.y, r1.w, r2.w, r0.y
-mad.f32 r1.w, c2.y, r1.z, r2.x
-mad.f32 r1.x, c1.x, r1.y, r1.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.x, r0.x, r2.y, r0.y
-add.f r0.y, r1.w, c3.y
+add.f r0.w, r0.z, c3.w
+add.f r0.z, r3.y, c3.z
+(ss)mul.f r0.y, r2.y, r1.y
+mul.f r1.y, r1.w, r1.y
+mad.f32 r1.w, c2.y, r1.z, r2.z
mad.f32 r1.x, c2.x, r1.z, r1.x
+mul.f r0.y, r0.y, r3.x
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r1.x, r1.x, c3.x
-nop
-max.f r0.x, r0.x, c10.x
+mad.f32 r1.y, r1.y, r2.x, r0.y
+add.f r0.y, r1.w, c3.y
+mad.f32 r1.y, r0.x, r2.w, r1.y
+add.f r0.x, r1.x, c3.x
+(rpt1)nop
+max.f r1.x, r1.y, c10.x
(rpt2)nop
-mov.f32f32 r1.y, r0.x
-mov.f32f32 r0.x, r1.x
+mov.f32f32 r1.y, r1.x
+mul.f r1.x, r1.x, c9.x
(rpt1)nop
-mul.f r1.x, r1.y, c9.w
+mul.f r1.w, r1.y, c9.w
mul.f r1.z, r1.y, c9.z
-mul.f r1.w, r1.y, c9.y
-mul.f r1.y, r1.y, c9.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r1.w
-mov.f32f32 r2.y, r1.y
-mov.f32f32 r1.w, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r1.x, r2.y
+mul.f r1.y, r1.y, c9.y
end
-nop
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:19)
; VERT: inputs: r1.x (0:0,cm=7,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0)
-; VERT: 92 instructions, 0 half, 4 full
+; VERT: 68 instructions, 0 half, 4 full
diff --git a/reference/face.asm b/reference/face.asm
index 0e81ac5..d7e2e1c 100644
--- a/reference/face.asm
+++ b/reference/face.asm
@@ -5,27 +5,25 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
+@const(c1.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.s r0.x, hr0.x, 2
(rpt2)nop
add.s r0.x, r0.x, 1
(rpt2)nop
cov.s32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, r0.x, c0.y
(rpt2)nop
min.f r0.x, r0.x, c1.x
(rpt2)nop
-mul.f r0.x, c0.x, r0.x
+mul.f r1.x, c0.x, r0.x
(rpt2)nop
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r1.y, r0.x
-mov.f32f32 r1.x, r0.x
+mov.f32f32 r1.w, r1.x
+mov.f32f32 r1.z, r1.x
+mov.f32f32 r1.y, r1.x
end
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r63.y (7:0,cm=f,il=8,b=0)
-; FRAG: 33 instructions, 1 half, 2 full
+; FRAG: 28 instructions, 1 half, 2 full
diff --git a/reference/ffox-otmc/ffox-otmc-03.asm b/reference/ffox-otmc/ffox-otmc-03.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-03.asm
+++ b/reference/ffox-otmc/ffox-otmc-03.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-04.asm b/reference/ffox-otmc/ffox-otmc-04.asm
index 4afc961..8eae3b4 100644
--- a/reference/ffox-otmc/ffox-otmc-04.asm
+++ b/reference/ffox-otmc/ffox-otmc-04.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.w, 1, r0.x
mov.f32f32 r1.w, c0.x
diff --git a/reference/ffox-otmc/ffox-otmc-05.asm b/reference/ffox-otmc/ffox-otmc-05.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-05.asm
+++ b/reference/ffox-otmc/ffox-otmc-05.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-06.asm b/reference/ffox-otmc/ffox-otmc-06.asm
index c5ef2b9..34cd110 100644
--- a/reference/ffox-otmc/ffox-otmc-06.asm
+++ b/reference/ffox-otmc/ffox-otmc-06.asm
@@ -7,6 +7,7 @@
@out(r0.y) out1
@out(r0.z) out2
@out(r0.w) out3
+@const(c13.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.z, r0.w
(rpt2)nop
cov.f32s16 hr0.x, r0.z
diff --git a/reference/ffox-otmc/ffox-otmc-08.asm b/reference/ffox-otmc/ffox-otmc-08.asm
index c5ef2b9..34cd110 100644
--- a/reference/ffox-otmc/ffox-otmc-08.asm
+++ b/reference/ffox-otmc/ffox-otmc-08.asm
@@ -7,6 +7,7 @@
@out(r0.y) out1
@out(r0.z) out2
@out(r0.w) out3
+@const(c13.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.z, r0.w
(rpt2)nop
cov.f32s16 hr0.x, r0.z
diff --git a/reference/ffox-otmc/ffox-otmc-13.asm b/reference/ffox-otmc/ffox-otmc-13.asm
index a023928..36f82a6 100644
--- a/reference/ffox-otmc/ffox-otmc-13.asm
+++ b/reference/ffox-otmc/ffox-otmc-13.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
diff --git a/reference/ffox-otmc/ffox-otmc-14.asm b/reference/ffox-otmc/ffox-otmc-14.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-14.asm
+++ b/reference/ffox-otmc/ffox-otmc-14.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-15.asm b/reference/ffox-otmc/ffox-otmc-15.asm
index c99bf54..13ea129 100644
--- a/reference/ffox-otmc/ffox-otmc-15.asm
+++ b/reference/ffox-otmc/ffox-otmc-15.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
diff --git a/reference/ffox-otmc/ffox-otmc-16.asm b/reference/ffox-otmc/ffox-otmc-16.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-16.asm
+++ b/reference/ffox-otmc/ffox-otmc-16.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-17.asm b/reference/ffox-otmc/ffox-otmc-17.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-17.asm
+++ b/reference/ffox-otmc/ffox-otmc-17.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-18.asm b/reference/ffox-otmc/ffox-otmc-18.asm
index 7e1ef51..78e569c 100644
--- a/reference/ffox-otmc/ffox-otmc-18.asm
+++ b/reference/ffox-otmc/ffox-otmc-18.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
diff --git a/reference/ffox-otmc/ffox-otmc-19.asm b/reference/ffox-otmc/ffox-otmc-19.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-19.asm
+++ b/reference/ffox-otmc/ffox-otmc-19.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-24.asm b/reference/ffox-otmc/ffox-otmc-24.asm
index 33986ec..938e6a5 100644
--- a/reference/ffox-otmc/ffox-otmc-24.asm
+++ b/reference/ffox-otmc/ffox-otmc-24.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
diff --git a/reference/ffox-otmc/ffox-otmc-26.asm b/reference/ffox-otmc/ffox-otmc-26.asm
index a28588b..f65517a 100644
--- a/reference/ffox-otmc/ffox-otmc-26.asm
+++ b/reference/ffox-otmc/ffox-otmc-26.asm
@@ -34,6 +34,8 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
+@const(c19.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000
+@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r2.x, c15.x
mul.f r2.y, c4.x, r3.w
mul.f r2.z, c4.x, r4.w
diff --git a/reference/ffox-otmc/ffox-otmc-27.asm b/reference/ffox-otmc/ffox-otmc-27.asm
index 4307641..39b56be 100644
--- a/reference/ffox-otmc/ffox-otmc-27.asm
+++ b/reference/ffox-otmc/ffox-otmc-27.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 6, r0.x
mov.f32f32 r0.w, c0.x
bary.f r1.x, 4, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-28.asm b/reference/ffox-otmc/ffox-otmc-28.asm
index a28588b..f65517a 100644
--- a/reference/ffox-otmc/ffox-otmc-28.asm
+++ b/reference/ffox-otmc/ffox-otmc-28.asm
@@ -34,6 +34,8 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
+@const(c19.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000
+@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r2.x, c15.x
mul.f r2.y, c4.x, r3.w
mul.f r2.z, c4.x, r4.w
diff --git a/reference/ffox-otmc/ffox-otmc-29.asm b/reference/ffox-otmc/ffox-otmc-29.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-29.asm
+++ b/reference/ffox-otmc/ffox-otmc-29.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-30.asm b/reference/ffox-otmc/ffox-otmc-30.asm
index ca29128..8f87710 100644
--- a/reference/ffox-otmc/ffox-otmc-30.asm
+++ b/reference/ffox-otmc/ffox-otmc-30.asm
@@ -1,34 +1,27 @@
; options:
-; FRAG: new compiler
+; FRAG: TGSI compiler
@in(r0.x) in0
@in(r0.y) in1
-@out(r0.w) out0
-@out(r1.x) out1
-@out(r1.y) out2
-@out(r1.z) out3
+@out(r1.x) out0
+@out(r1.y) out1
+@out(r1.z) out2
+@out(r1.w) out3
+@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, c0.x
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r1.w, c0.x
+(rpt4)nop
+sam (f32)(xyz)r0.x, r0.z, s#0, t#0
+(sy)mov.f32f32 r1.z, r0.x
+mov.f32f32 r1.y, r0.y
+mov.f32f32 r1.x, r0.z
+end
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r0.y
nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
-(rpt5)nop
-sam (f32)(xyz)r1.w, r0.y, s#0, t#0
-(sy)mov.f32f32 r0.x, r1.w
-(ss)mov.f32f32 r0.y, r2.x
-mov.f32f32 r0.z, r2.y
nop
-mov.f32f32 r1.y, r0.x
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.w, r0.z
-end
-; FRAG: outputs: r0.w (1:0)
+; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:9,cm=f,il=8,b=1)
-; FRAG: 25 instructions, 0 half, 3 full
+; FRAG: 13 instructions, 0 half, 2 full
; pos (bary): r0.x
-; color: r0.w
+; color: r1.x
diff --git a/reference/ffox-otmc/ffox-otmc-31.asm b/reference/ffox-otmc/ffox-otmc-31.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-31.asm
+++ b/reference/ffox-otmc/ffox-otmc-31.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-32.asm b/reference/ffox-otmc/ffox-otmc-32.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-32.asm
+++ b/reference/ffox-otmc/ffox-otmc-32.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-33.asm b/reference/ffox-otmc/ffox-otmc-33.asm
index 1664e6b..abba978 100644
--- a/reference/ffox-otmc/ffox-otmc-33.asm
+++ b/reference/ffox-otmc/ffox-otmc-33.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c3.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.w, 1, r0.x
mov.f32f32 r0.x, c2.x
diff --git a/reference/ffox-otmc/ffox-otmc-34.asm b/reference/ffox-otmc/ffox-otmc-34.asm
index 72ac097..be2ee4c 100644
--- a/reference/ffox-otmc/ffox-otmc-34.asm
+++ b/reference/ffox-otmc/ffox-otmc-34.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-36.asm b/reference/ffox-otmc/ffox-otmc-36.asm
index 50f2536..8619b6b 100644
--- a/reference/ffox-otmc/ffox-otmc-36.asm
+++ b/reference/ffox-otmc/ffox-otmc-36.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c7.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 6, r0.x
bary.f r0.w, 3, r0.x
bary.f r1.x, 9, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-39.asm b/reference/ffox-otmc/ffox-otmc-39.asm
index e07af58..8450922 100644
--- a/reference/ffox-otmc/ffox-otmc-39.asm
+++ b/reference/ffox-otmc/ffox-otmc-39.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c6.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c7.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
bary.f r0.w, 5, r0.x
bary.f r2.x, 8, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-41.asm b/reference/ffox-otmc/ffox-otmc-41.asm
index 1cd75ff..5c6db50 100644
--- a/reference/ffox-otmc/ffox-otmc-41.asm
+++ b/reference/ffox-otmc/ffox-otmc-41.asm
@@ -25,6 +25,8 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x3f800000, 0x00000000, 0x40000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c5.x
mov.f32f32 r0.y, c9.x
mov.f32f32 r0.z, c1.w
diff --git a/reference/ffox-otmc/ffox-otmc-42.asm b/reference/ffox-otmc/ffox-otmc-42.asm
index 50f2536..8619b6b 100644
--- a/reference/ffox-otmc/ffox-otmc-42.asm
+++ b/reference/ffox-otmc/ffox-otmc-42.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c7.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 6, r0.x
bary.f r0.w, 3, r0.x
bary.f r1.x, 9, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-43.asm b/reference/ffox-otmc/ffox-otmc-43.asm
index 1cd75ff..5c6db50 100644
--- a/reference/ffox-otmc/ffox-otmc-43.asm
+++ b/reference/ffox-otmc/ffox-otmc-43.asm
@@ -25,6 +25,8 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x3f800000, 0x00000000, 0x40000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c5.x
mov.f32f32 r0.y, c9.x
mov.f32f32 r0.z, c1.w
diff --git a/reference/ffox-otmc/ffox-otmc-44.asm b/reference/ffox-otmc/ffox-otmc-44.asm
index 1cd75ff..5c6db50 100644
--- a/reference/ffox-otmc/ffox-otmc-44.asm
+++ b/reference/ffox-otmc/ffox-otmc-44.asm
@@ -25,6 +25,8 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x3f800000, 0x00000000, 0x40000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c5.x
mov.f32f32 r0.y, c9.x
mov.f32f32 r0.z, c1.w
diff --git a/reference/ffox-otmc/ffox-otmc-45.asm b/reference/ffox-otmc/ffox-otmc-45.asm
index 50f2536..8619b6b 100644
--- a/reference/ffox-otmc/ffox-otmc-45.asm
+++ b/reference/ffox-otmc/ffox-otmc-45.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c7.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 6, r0.x
bary.f r0.w, 3, r0.x
bary.f r1.x, 9, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-46.asm b/reference/ffox-otmc/ffox-otmc-46.asm
index 1cd75ff..5c6db50 100644
--- a/reference/ffox-otmc/ffox-otmc-46.asm
+++ b/reference/ffox-otmc/ffox-otmc-46.asm
@@ -25,6 +25,8 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x3f800000, 0x00000000, 0x40000000, 0x00000000
+@const(c13.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c5.x
mov.f32f32 r0.y, c9.x
mov.f32f32 r0.z, c1.w
diff --git a/reference/ffox-otmc/ffox-otmc-48.asm b/reference/ffox-otmc/ffox-otmc-48.asm
index 8cc8ce8..d73ae0a 100644
--- a/reference/ffox-otmc/ffox-otmc-48.asm
+++ b/reference/ffox-otmc/ffox-otmc-48.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c9.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 6, r0.x
bary.f r0.w, 3, r0.x
bary.f r1.x, 9, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-50.asm b/reference/ffox-otmc/ffox-otmc-50.asm
index b25f88e..fb6c094 100644
--- a/reference/ffox-otmc/ffox-otmc-50.asm
+++ b/reference/ffox-otmc/ffox-otmc-50.asm
@@ -29,6 +29,9 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c14.x) 0x3f800000, 0x00000000, 0xc0800000, 0x3d8f5c29
+@const(c15.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
+@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r1.x, c12.x
mov.f32f32 r1.y, c13.x
mul.f r1.z, c0.z, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-51.asm b/reference/ffox-otmc/ffox-otmc-51.asm
index 0b2b2d2..bc785ef 100644
--- a/reference/ffox-otmc/ffox-otmc-51.asm
+++ b/reference/ffox-otmc/ffox-otmc-51.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c10.x) 0x3e99999a, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 6, r0.x
bary.f r0.w, 3, r0.x
bary.f r1.x, 9, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-52.asm b/reference/ffox-otmc/ffox-otmc-52.asm
index b25f88e..fb6c094 100644
--- a/reference/ffox-otmc/ffox-otmc-52.asm
+++ b/reference/ffox-otmc/ffox-otmc-52.asm
@@ -29,6 +29,9 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c14.x) 0x3f800000, 0x00000000, 0xc0800000, 0x3d8f5c29
+@const(c15.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
+@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r1.x, c12.x
mov.f32f32 r1.y, c13.x
mul.f r1.z, c0.z, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-54.asm b/reference/ffox-otmc/ffox-otmc-54.asm
index a8a0af6..85e3640 100644
--- a/reference/ffox-otmc/ffox-otmc-54.asm
+++ b/reference/ffox-otmc/ffox-otmc-54.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x40000000, 0x3f8ccccd, 0x3f800000, 0x3e99999a
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
bary.f r0.w, 5, r0.x
bary.f r1.x, 0, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-57.asm b/reference/ffox-otmc/ffox-otmc-57.asm
index 937bced..6d3a071 100644
--- a/reference/ffox-otmc/ffox-otmc-57.asm
+++ b/reference/ffox-otmc/ffox-otmc-57.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f r0.w, 1, r0.x
bary.f (ei)r1.x, 2, r0.x
diff --git a/reference/ffox-otmc/ffox-otmc-59.asm b/reference/ffox-otmc/ffox-otmc-59.asm
index 46a2696..ccc345b 100644
--- a/reference/ffox-otmc/ffox-otmc-59.asm
+++ b/reference/ffox-otmc/ffox-otmc-59.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
diff --git a/reference/ffox-vert.asm b/reference/ffox-vert.asm
index 0a2d6cc..631a17f 100644
--- a/reference/ffox-vert.asm
+++ b/reference/ffox-vert.asm
@@ -11,6 +11,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c21.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)trunc.f r0.x, r1.w
(rpt2)nop
cov.f32s16 hr0.x, r0.x
@@ -27,66 +28,57 @@ mov.f32f32 r0.z, c<a0.x + 19>
(ul)mov.f32f32 r0.w, c<a0.x + 17>
mad.f32 r0.x, r1.x, r0.x, r0.y
(rpt2)nop
+mul.f r0.y, c8.w, r0.x
+mad.f32 r0.z, r1.y, r0.z, r0.w
mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, r1.y, r0.z, r0.w
(rpt1)nop
-mul.f r0.z, c8.w, r0.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, c9.w, r0.z, r0.y
mul.f r0.w, c8.x, r0.x
mul.f r1.z, c8.y, r0.x
mul.f r0.x, c8.z, r0.x
-mad.f32 r0.z, c9.w, r0.y, r0.z
-mad.f32 r0.w, c9.x, r0.y, r0.w
-mad.f32 r1.z, c9.y, r0.y, r1.z
-mad.f32 r0.x, c9.z, r0.y, r0.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r1.z
-mov.f32f32 r0.x, r0.x
add.f r0.y, r0.y, c11.w
-add.f r0.z, r0.z, c11.x
-add.f r0.w, r0.w, c11.y
+mov.f32f32 r0.z, r0.z
+(rpt4)nop
+rcp r1.w, r0.y
+mad.f32 r0.w, c9.x, r0.z, r0.w
+mad.f32 r1.z, c9.y, r0.z, r1.z
+mad.f32 r0.x, c9.z, r0.z, r0.x
+(ss)add.f r0.y, r0.y, (neg)c12.w
+add.f r0.z, r0.w, c11.x
+add.f r0.w, r1.z, c11.y
add.f r0.x, r0.x, c11.z
-(rpt2)nop
-rcp r1.z, r0.y
-(ss)mul.f r0.z, r0.z, r1.z
-mul.f r0.w, r0.w, r1.z
-mul.f r0.x, r0.x, r1.z
-(ss)mov.f32f32 r0.y, r0.y
+nop
+(ss)mul.f r0.z, r0.z, r1.w
+mul.f r0.w, r0.w, r1.w
+mul.f r0.x, r0.x, r1.w
+nop
add.f r0.z, r0.z, (neg)c12.x
add.f r0.w, r0.w, (neg)c12.y
add.f r0.x, r0.x, (neg)c12.z
-add.f r0.y, r0.y, (neg)c12.w
-(rpt2)nop
+nop
mul.f r0.z, r0.z, r0.y
-mul.f r0.w, r0.w, r0.y
+mul.f r1.z, r0.w, r0.y
mul.f r0.x, r0.x, r0.y
nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.w, c0.w, r0.z
-mul.f r1.w, c0.z, r0.z
-mad.f32 r0.w, c1.w, r1.z, r0.w
-mad.f32 r1.w, c1.z, r1.z, r1.w
-mad.f32 r0.w, c2.w, r0.x, r0.w
-mad.f32 r1.w, c2.z, r0.x, r1.w
-mad.f32 r0.w, c3.w, r0.y, r0.w
-mad.f32 r1.w, c3.z, r0.y, r1.w
-mul.f r2.x, c0.y, r0.z
-mul.f r2.y, c0.x, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r1.w
-mad.f32 r1.w, c1.y, r1.z, r2.x
-mad.f32 r1.z, c1.x, r1.z, r2.y
-mad.f32 r1.w, c2.y, r0.x, r1.w
+mov.f32f32 r1.w, r0.z
+mul.f r2.x, c0.x, r0.z
+mov.f32f32 r2.y, r1.z
+mov.f32f32 r2.z, r0.x
+mul.f r0.z, c0.w, r1.w
+mul.f r0.w, c0.z, r1.w
+mad.f32 r0.z, c1.w, r2.y, r0.z
+mad.f32 r0.w, c1.z, r2.y, r0.w
+mad.f32 r0.z, c2.w, r2.z, r0.z
+mad.f32 r2.w, c2.z, r2.z, r0.w
+mad.f32 r0.w, c3.w, r0.y, r0.z
+mad.f32 r0.z, c3.z, r0.y, r2.w
+mul.f r1.w, c0.y, r1.w
+mad.f32 r1.z, c1.x, r1.z, r2.x
+mad.f32 r1.w, c1.y, r2.y, r1.w
mad.f32 r0.x, c2.x, r0.x, r1.z
-mad.f32 r1.z, c3.y, r0.y, r1.w
+mad.f32 r1.z, c2.y, r2.z, r1.w
mad.f32 r0.x, c3.x, r0.y, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.y, c3.y, r0.y, r1.z
mova a0.x, hr0.y
(rpt5)nop
mov.f32f32 r1.z, c<a0.x + 70>
@@ -95,39 +87,24 @@ mov.f32f32 r2.x, c<a0.x + 71>
(ul)mov.f32f32 r2.y, c<a0.x + 69>
mad.f32 r1.x, r1.x, r1.z, r1.w
(rpt2)nop
-mov.f32f32 r1.x, r1.x
+mov.f32f32 r1.z, r1.x
+mul.f r1.x, c13.x, r1.x
mad.f32 r1.y, r1.y, r2.x, r2.y
-(rpt1)nop
-mul.f r1.z, c13.y, r1.x
-mov.f32f32 r1.y, r1.y
-mul.f r1.w, c13.x, r1.x
-mul.f r2.x, c13.w, r1.x
-mul.f r1.x, c13.z, r1.x
-mad.f32 r1.z, c14.y, r1.y, r1.z
-mad.f32 r1.w, c14.x, r1.y, r1.w
-mad.f32 r2.x, c14.w, r1.y, r2.x
-mad.f32 r1.x, c14.z, r1.y, r1.x
-mov.f32f32 r1.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r1.x, r1.x
+nop
+mul.f r1.w, c13.y, r1.z
+mul.f r2.x, c13.w, r1.z
+mov.f32f32 r2.y, r1.y
+mul.f r1.z, c13.z, r1.z
+mad.f32 r1.x, c14.x, r1.y, r1.x
+nop
+mad.f32 r1.y, c14.y, r2.y, r1.w
+mad.f32 r1.w, c14.w, r2.y, r2.x
+mad.f32 r1.z, c14.z, r2.y, r1.z
+add.f r1.x, r1.x, c16.x
add.f r1.y, r1.y, c16.y
-add.f r2.x, r1.z, c16.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r1.y, r2.x
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.x, r1.y
-(rpt1)nop
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r2.x
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r1.x (0:0,cm=b,il=8,b=0)
-; VERT: 144 instructions, 1 half, 3 full
+; VERT: 114 instructions, 1 half, 3 full
diff --git a/reference/flow.asm b/reference/flow.asm
index e9b7b37..c61e896 100644
--- a/reference/flow.asm
+++ b/reference/flow.asm
@@ -6,227 +6,154 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c5.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000
+@const(c6.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd
+@const(c7.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
mov.f32f32 r0.w, c3.x
bary.f r1.x, 1, r0.x
mov.f32f32 r1.y, c6.x
-add.f r1.z, r0.z, c3.x
-add.f r1.w, r0.z, c4.x
+add.f r1.z, r0.z, c4.x
+add.f r2.x, r0.z, c3.x
+add.f r1.w, r1.x, c4.y
+add.f r2.y, r1.x, c3.y
add.f r0.z, r0.z, r0.w
add.f r0.w, r1.x, r1.y
-mov.f32f32 r1.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r1.y
-add.f r1.y, r1.x, c3.y
-mov.f32f32 r2.y, r1.z
-add.f r1.x, r1.x, c4.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 7, r0.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r3.x, r0.w
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r0.w, r1.z
-bary.f r1.x, 8, r0.x
-bary.f r1.y, 9, r0.x
-mov.f32f32 r2.z, r0.z
-sam (f32)(xyzw)r2.w, r2.w, s#2, t#2
-mov.f32f32 r0.z, c6.x
-mov.f32f32 r1.z, c6.x
-sam (f32)(xyz)r3.w, r1.w, s#0, t#0
-(sy)(ss)mad.f32 r1.w, c5.x, r4.x, c5.y
-mad.f32 r2.x, c5.x, r3.w, c5.y
-mad.f32 r3.w, c5.x, r4.y, c5.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r1.w
-sam (f32)(xyz)r4.x, r2.y, s#1, t#1
-(sy)(ss)mad.f32 r2.y, c5.x, r4.y, c5.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.z, c5.x, r4.x, c5.y
+mov.f32f32 r1.x, c6.x
+mov.f32f32 r1.y, c6.x
+bary.f r2.z, 7, r0.x
+sam (f32)(xyz)r2.w, r1.z, s#1, t#1
+(sy)(ss)mad.f32 r1.z, c5.x, r3.x, c5.y
+sam (f32)(xyz)r3.z, r2.x, s#0, t#0
+(sy)mad.f32 r1.w, c5.x, r3.w, c5.y
+(ss)mad.f32 r2.x, c5.x, r2.w, c5.y
+mad.f32 r2.y, c5.x, r3.z, c5.y
+mul.f r1.z, c5.z, r1.z
mul.f r1.w, c5.z, r1.w
-mov.f32f32 r2.y, r2.y
-mul.f r4.x, r2.x, r2.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, c5.z, r2.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r2.z, r2.z
-mad.f32 r4.x, r1.w, r1.w, r4.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.y, c5.x, r4.z, c5.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.x, r3.w, r3.w, r4.x
-mul.f r4.z, r2.z, r2.z
-mov.f32f32 r4.y, r4.y
-(rpt3)nop
-rsq r4.x, r4.x
-(ss)mov.f32f32 r4.x, r4.x
-mad.f32 r4.z, r2.y, r2.y, r4.z
-mov.f32f32 r4.y, r4.y
-bary.f r4.w, 4, r0.x
-mul.f r2.x, r2.x, r4.x
-mul.f r1.w, r1.w, r4.x
-mul.f r3.w, r3.w, r4.x
-mov.f32f32 r4.x, r4.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r4.x, r4.y, r4.y, r4.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.w, r3.w
-bary.f r4.z, 5, r0.x
+mov.f32f32 r2.w, r2.x
+mov.f32f32 r3.x, r2.y
+mov.f32f32 r3.z, r1.z
+mov.f32f32 r3.w, r1.w
+mov.f32f32 r2.w, r2.w
+mul.f r2.y, r2.y, r3.x
+mov.f32f32 r3.z, r3.z
+mad.f32 r1.w, r1.w, r3.w, r2.y
+mul.f r2.x, r2.x, r2.w
+mad.f32 r2.y, c5.x, r4.x, c5.y
+mad.f32 r1.z, r1.z, r3.z, r2.x
+mad.f32 r2.x, c5.x, r3.y, c5.y
+sam (f32)(xyzw)r4.x, r0.z, s#2, t#2
+(ss)bary.f r0.z, 8, r0.x
+bary.f r0.w, 9, r0.x
+bary.f r3.y, 4, r0.x
+mov.f32f32 r5.x, r2.x
+mov.f32f32 r5.y, r2.y
+bary.f r5.z, 5, r0.x
bary.f (ei)r0.x, 6, r0.x
+mov.f32f32 r0.y, r5.x
+mad.f32 r1.w, r5.y, r5.y, r1.w
(rpt1)nop
-rsq r0.y, r4.x
-(ss)mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mad.f32 r2.x, r2.z, r0.y, r2.x
-mad.f32 r1.w, r2.y, r0.y, r1.w
-mad.f32 r0.y, r4.y, r0.y, r3.w
-nop
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r0.y, r0.y, r1.z
+(rpt5)nop
+rsq r0.y, r0.y
+(ss)mov.f32f32 r1.z, r0.y
+rsq r1.w, r1.w
+(ss)mov.f32f32 r5.x, r1.w
+(ss)mul.f r1.w, r2.y, r1.w
+(rpt1)nop
+mul.f r2.y, r3.x, r5.x
+mul.f r3.x, r3.w, r5.x
+mad.f32 r2.y, r2.w, r1.z, r2.y
+mad.f32 r1.z, r3.z, r1.z, r3.x
+mad.f32 r0.y, r2.x, r0.y, r1.w
nop
-mul.f r2.x, r2.x, c5.w
-mul.f r1.w, r1.w, c5.w
+mul.f r1.w, r2.y, c5.w
+mul.f r1.z, r1.z, c5.w
mul.f r0.y, r0.y, c5.w
nop
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r2.y, r2.x, r4.w
-mul.f r2.z, r4.w, r2.x
-mad.f32 r2.y, r1.w, r4.z, r2.y
-mad.f32 r2.z, r4.z, r1.w, r2.z
-(rpt1)nop
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r2.y, r0.y, r0.x, r2.y
-mad.f32 r2.z, r0.x, r0.y, r2.z
-(rpt1)nop
-mul.f r2.x, r2.y, r2.x
-max.f r2.z, r2.z, c6.x
-mul.f r1.w, r2.y, r1.w
-mul.f r0.y, r2.y, r0.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mul.f r2.x, c5.x, r2.x
-mad.f32 r2.y, c6.y, r2.y, c6.z
-mul.f r1.w, c5.x, r1.w
-mul.f r0.y, c5.x, r0.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-add.f r2.x, r4.w, (neg)r2.x
-mul.f r2.z, r3.z, r2.y
-mul.f r3.y, r3.y, r2.y
-mul.f r3.x, r3.x, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r2.z
-mov.f32f32 r3.w, r3.y
-(ss)mov.f32f32 r4.x, r3.x
-mul.f r4.y, r2.x, r2.x
-add.f r1.w, r4.z, (neg)r1.w
-add.f r1.z, r3.z, r1.z
-add.f r0.x, r0.x, (neg)r0.y
-mul.f r0.y, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r0.y
-mad.f32 r2.y, r1.w, r1.w, r4.y
+mov.f32f32 r2.x, r1.w
+mul.f r1.w, r3.y, r1.w
+mov.f32f32 r2.y, r1.z
+mov.f32f32 r2.w, r0.y
+mul.f r3.x, r2.x, r3.y
+mad.f32 r1.z, r5.z, r1.z, r1.w
+mad.f32 r1.w, r2.y, r5.z, r3.x
+mad.f32 r0.y, r0.x, r0.y, r1.z
+mad.f32 r1.z, r2.w, r0.x, r1.w
(rpt2)nop
-mov.f32f32 r2.y, r2.y
-nop
-mad.f32 r2.y, r0.x, r0.x, r2.y
-(rpt5)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
+mul.f r1.w, r1.z, r2.x
+max.f r0.y, r0.y, c6.x
+mul.f r2.x, r1.z, r2.y
+mul.f r1.z, r1.z, r2.w
+mul.f r1.w, c5.x, r1.w
+mad.f32 r0.y, c6.y, r0.y, c6.z
+mul.f r2.x, c5.x, r2.x
+mul.f r1.z, c5.x, r1.z
+add.f r1.w, r3.y, (neg)r1.w
+mov.f32f32 r2.y, r0.y
+add.f r2.x, r5.z, (neg)r2.x
+add.f r0.x, r0.x, (neg)r1.z
+mov.f32f32 r1.z, r1.w
+(sy)mul.f r2.w, r4.w, r2.y
+mov.f32f32 r3.x, r2.x
+mov.f32f32 r3.y, r0.x
+mul.f r1.w, r1.w, r1.z
+add.f r1.y, r2.w, r1.y
+mad.f32 r1.w, r2.x, r3.x, r1.w
+mul.f r2.x, r4.z, r2.y
+mad.f32 r1.w, r3.y, r3.y, r1.w
+add.f r1.x, r1.y, r1.x
+mul.f r1.y, r4.y, r2.y
+mul.f r0.y, r4.x, r0.y
(rpt2)nop
-mul.f r2.x, r2.x, r2.y
-mul.f r1.w, r1.w, r2.y
-mul.f r0.x, r0.x, r2.y
-nop
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r2.y, r1.w
+mul.f r0.x, r0.x, r1.w
+(rpt1)nop
+mul.f r1.z, r1.z, r2.y
+(ss)mul.f r1.w, r3.x, r2.y
+(rpt1)nop
+mul.f r1.z, r1.z, r2.z
nop
-mul.f r0.w, r2.x, r0.w
+mad.f32 r0.z, r1.w, r0.z, r1.z
nop
-mad.f32 r0.w, r1.w, r1.x, r0.w
-(rpt2)nop
-mov.f32f32 r0.w, r0.w
-nop
-mad.f32 r0.x, r0.x, r1.y, r0.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.x, r0.w, r0.z
(rpt2)nop
max.f r0.x, r0.x, c6.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mov.f32f32 r0.z, r0.x
(rpt2)nop
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r1.x, r0.x
-cmps.f.lt r0.x, c6.x, r0.x
+mul.f r0.x, r0.x, r0.z
+cmps.f.lt r0.z, c6.x, r0.z
(rpt1)nop
-mul.f r0.w, r0.w, r1.x
-cov.u32f32 r0.x, r0.x
+mov.f32f32 r0.w, r0.x
+cov.u32f32 r0.z, r0.z
(rpt1)nop
-mov.f32f32 r0.w, r0.w
-cmps.f.ne r0.x, r0.x, c6.x
+mul.f r0.x, r0.x, r0.w
+cmps.f.ne r0.z, r0.z, c6.x
(rpt1)nop
+mov.f32f32 r0.w, r0.x
+mul.f r0.x, r0.x, c6.w
+sel.b32 r1.w, r1.x, r0.z, r2.w
+nop
mul.f r0.w, r0.w, r0.w
-sel.b32 r0.z, r0.z, r0.x, r2.z
-(rpt1)nop
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r0.z
+add.f r1.x, r2.x, r0.x
+add.f r1.z, r1.y, r0.x
+add.f r0.x, r0.y, r0.x
+mul.f r0.w, r0.w, c6.z
+(rpt2)nop
+mov.f32f32 r2.y, r0.w
+add.f r0.x, r0.x, r0.w
(rpt1)nop
-mul.f r0.z, r0.w, r0.w
-mul.f r0.w, r0.w, c6.w
+add.f r0.w, r1.x, r2.y
+add.f r1.x, r1.z, r2.y
(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.w, r0.w
-mul.f r0.z, r0.z, c6.z
-add.f r1.x, r3.w, r1.x
-add.f r1.y, r4.x, r1.y
-add.f r0.w, r1.z, r0.w
-mov.f32f32 r0.z, r0.z
-(rpt2)nop
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.z, r0.z
-nop
-add.f r1.x, r1.x, r1.z
-add.f r1.y, r1.y, r2.x
-add.f r0.z, r0.w, r0.z
-nop
-sel.b32 r0.w, r1.x, r0.x, r3.y
-sel.b32 r1.x, r1.y, r0.x, r3.x
-sel.b32 r0.x, r0.z, r0.x, r0.y
-nop
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
+sel.b32 r1.z, r0.w, r0.z, r2.x
+sel.b32 r1.y, r1.x, r0.z, r1.y
+sel.b32 r1.x, r0.x, r0.z, r0.y
end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r0.w (5:19,cm=f,il=12,b=1) r2.x (5:20,cm=f,il=16,b=1)
-; FRAG: 254 instructions, 0 half, 5 full
+; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.w (5:19,cm=f,il=12,b=1) r2.x (5:20,cm=f,il=16,b=1)
+; FRAG: 169 instructions, 0 half, 6 full
diff --git a/reference/foo.asm b/reference/foo.asm
index 60e36ef..f984014 100644
--- a/reference/foo.asm
+++ b/reference/foo.asm
@@ -1,24 +1,27 @@
; options:
-; FRAG: new compiler
+; FRAG: TGSI compiler
@in(r0.x) in0
@in(r0.y) in1
@out(r1.x) out0
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
+bary.f (ei)r0.w, 1, r0.x
mov.f32f32 r1.w, c0.y
-nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
-(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
+(rpt4)nop
+sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
(sy)mov.f32f32 r1.z, r0.x
mov.f32f32 r1.y, r0.y
mov.f32f32 r1.x, r0.z
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1)
-; FRAG: 17 instructions, 0 half, 2 full
+; FRAG: 13 instructions, 0 half, 2 full
+; pos (bary): r0.x
+; color: r1.x
diff --git a/reference/fragProg1/fragProg1-08.asm b/reference/fragProg1/fragProg1-08.asm
index 2c30bd8..6c590d1 100644
--- a/reference/fragProg1/fragProg1-08.asm
+++ b/reference/fragProg1/fragProg1-08.asm
@@ -12,6 +12,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c5.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c4.w
mov.f32f32 r0.y, c4.z
mov.f32f32 r0.z, c4.y
diff --git a/reference/fragProg1/fragProg1-09.asm b/reference/fragProg1/fragProg1-09.asm
index 505da11..2e6f7f9 100644
--- a/reference/fragProg1/fragProg1-09.asm
+++ b/reference/fragProg1/fragProg1-09.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-10.asm b/reference/fragProg1/fragProg1-10.asm
index 2c95a4d..a2f25c9 100644
--- a/reference/fragProg1/fragProg1-10.asm
+++ b/reference/fragProg1/fragProg1-10.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x3e800000, 0x00000000, 0x3f000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-11.asm b/reference/fragProg1/fragProg1-11.asm
index 2c95a4d..3683d38 100644
--- a/reference/fragProg1/fragProg1-11.asm
+++ b/reference/fragProg1/fragProg1-11.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0xbe800000, 0xbe4ccccd, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-12.asm b/reference/fragProg1/fragProg1-12.asm
index c6c895a..620d464 100644
--- a/reference/fragProg1/fragProg1-12.asm
+++ b/reference/fragProg1/fragProg1-12.asm
@@ -4,31 +4,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x40000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.z
mov.f32f32 r0.y, c0.y
mov.f32f32 r0.z, c0.x
-mov.f32f32 r0.w, c0.w
+mov.f32f32 r1.w, c0.w
mul.f r0.x, r0.x, c1.x
mul.f r0.y, r0.y, c1.x
mul.f r0.z, r0.z, c1.x
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
nop
-add.f r0.x, r0.x, (neg)c1.y
-add.f r0.y, r0.y, (neg)c1.y
-add.f r0.z, r0.z, (neg)c1.y
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
+add.f r1.z, r0.x, (neg)c1.y
+add.f r1.y, r0.y, (neg)c1.y
+add.f r1.x, r0.z, (neg)c1.y
end
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 24 instructions, 0 half, 2 full
+; FRAG: 12 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-13.asm b/reference/fragProg1/fragProg1-13.asm
index 568337d..a2bf346 100644
--- a/reference/fragProg1/fragProg1-13.asm
+++ b/reference/fragProg1/fragProg1-13.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c3.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-14.asm b/reference/fragProg1/fragProg1-14.asm
index 1ee49dc..6b8d0ba 100644
--- a/reference/fragProg1/fragProg1-14.asm
+++ b/reference/fragProg1/fragProg1-14.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x40490ff9, 0x3f000000, 0x3f800000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-15.asm b/reference/fragProg1/fragProg1-15.asm
index 1ee49dc..88369af 100644
--- a/reference/fragProg1/fragProg1-15.asm
+++ b/reference/fragProg1/fragProg1-15.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x40d90ff9, 0x40e90ff9, 0x40c90ff9, 0xc0b90ff9
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-16.asm b/reference/fragProg1/fragProg1-16.asm
index d3ba6ce..9bf4264 100644
--- a/reference/fragProg1/fragProg1-16.asm
+++ b/reference/fragProg1/fragProg1-16.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f r0.w, 1, r0.x
bary.f (ei)r0.x, 2, r0.x
@@ -14,15 +15,15 @@ mul.f r0.y, c0.x, r0.z
nop
mad.f32 r0.y, c0.y, r0.w, r0.y
(rpt2)nop
-mov.f32f32 r0.y, r0.y
-nop
-mad.f32 r1.w, c0.z, r0.x, r0.y
-mad.f32 r1.z, c0.z, r0.x, r0.y
-mad.f32 r1.y, c0.z, r0.x, r0.y
+mov.f32f32 r0.z, r0.y
mad.f32 r1.x, c0.z, r0.x, r0.y
+mad.f32 r1.w, c0.z, r0.x, r0.z
+mad.f32 r1.z, c0.z, r0.x, r0.z
+mad.f32 r1.y, c0.z, r0.x, r0.z
end
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1)
-; FRAG: 17 instructions, 0 half, 2 full
+; FRAG: 16 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-17.asm b/reference/fragProg1/fragProg1-17.asm
index c07ebd3..ee3ea36 100644
--- a/reference/fragProg1/fragProg1-17.asm
+++ b/reference/fragProg1/fragProg1-17.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.x
mov.f32f32 r0.y, c0.y
mov.f32f32 r0.z, c0.z
@@ -11,8 +13,6 @@ nop
mul.f r0.x, r0.x, r0.x
nop
mad.f32 r0.x, r0.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
nop
mad.f32 r0.x, r0.z, r0.z, r0.x
(rpt2)nop
@@ -22,9 +22,7 @@ mul.f r1.y, r0.x, c1.x
mul.f r1.x, r0.x, c1.x
end
nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 21 instructions, 0 half, 2 full
+; FRAG: 17 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-18.asm b/reference/fragProg1/fragProg1-18.asm
index 47b9fb9..23a8db4 100644
--- a/reference/fragProg1/fragProg1-18.asm
+++ b/reference/fragProg1/fragProg1-18.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f r0.w, 1, r0.x
bary.f r1.x, 2, r0.x
@@ -13,20 +14,16 @@ bary.f (ei)r0.x, 3, r0.x
mul.f r0.y, c0.x, r0.z
nop
mad.f32 r0.y, c0.y, r0.w, r0.y
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
nop
mad.f32 r0.y, c0.z, r1.x, r0.y
(rpt2)nop
-mov.f32f32 r0.y, r0.y
-nop
-mad.f32 r1.w, c0.w, r0.x, r0.y
-mad.f32 r1.z, c0.w, r0.x, r0.y
-mad.f32 r1.y, c0.w, r0.x, r0.y
+mov.f32f32 r0.z, r0.y
mad.f32 r1.x, c0.w, r0.x, r0.y
+mad.f32 r1.w, c0.w, r0.x, r0.z
+mad.f32 r1.z, c0.w, r0.x, r0.z
+mad.f32 r1.y, c0.w, r0.x, r0.z
end
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1)
-; FRAG: 23 instructions, 0 half, 2 full
+; FRAG: 18 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-19.asm b/reference/fragProg1/fragProg1-19.asm
index 4939cbc..9e18be0 100644
--- a/reference/fragProg1/fragProg1-19.asm
+++ b/reference/fragProg1/fragProg1-19.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x3dcccccd, 0x00000000, 0x00000000, 0x00000000
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f r0.w, 1, r0.x
bary.f r1.x, 2, r0.x
@@ -13,24 +15,24 @@ bary.f (ei)r0.x, 3, r0.x
mul.f r0.y, c0.x, r0.z
nop
mad.f32 r0.y, c0.y, r0.w, r0.y
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
nop
mad.f32 r0.y, c0.z, r1.x, r0.y
(rpt2)nop
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
-add.f r0.z, r0.x, r0.y
-add.f r0.w, r0.x, r0.y
-add.f r1.x, r0.x, r0.y
-add.f r0.x, r0.x, r0.y
-mul.f r1.w, r0.z, c1.x
-mul.f r1.z, r0.w, c1.x
-mul.f r1.y, r1.x, c1.x
-mul.f r1.x, r0.x, c1.x
+mov.f32f32 r0.z, r0.y
+add.f r0.y, r0.x, r0.y
+(rpt1)nop
+add.f r0.w, r0.x, r0.z
+add.f r1.x, r0.x, r0.z
+add.f r0.x, r0.x, r0.z
+nop
+mul.f r1.w, r0.w, c1.x
+mul.f r1.z, r1.x, c1.x
+mul.f r1.y, r0.x, c1.x
+mul.f r1.x, r0.y, c1.x
end
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1)
-; FRAG: 29 instructions, 0 half, 2 full
+; FRAG: 25 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-20.asm b/reference/fragProg1/fragProg1-20.asm
index 1c607f5..22b4ba3 100644
--- a/reference/fragProg1/fragProg1-20.asm
+++ b/reference/fragProg1/fragProg1-20.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x411e6666, 0x3e23d70a, 0x40200000, 0x00000000
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.y
mov.f32f32 r1.w, c0.z
mov.f32f32 r1.z, c0.y
diff --git a/reference/fragProg1/fragProg1-21.asm b/reference/fragProg1/fragProg1-21.asm
index bddc018..f1b7f67 100644
--- a/reference/fragProg1/fragProg1-21.asm
+++ b/reference/fragProg1/fragProg1-21.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x3f800000, 0x40800000, 0xc0000000
+@const(c1.x) 0x3c23d70a, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-22.asm b/reference/fragProg1/fragProg1-22.asm
index 07fde5f..3dca99e 100644
--- a/reference/fragProg1/fragProg1-22.asm
+++ b/reference/fragProg1/fragProg1-22.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x4099999a, 0x3e99999a, 0xbe4ccccd, 0x3f99999a
+@const(c1.x) 0x3dcccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)floor.f r0.x, c0.w
floor.f r0.y, c0.z
floor.f r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-23.asm b/reference/fragProg1/fragProg1-23.asm
index df9ee98..6b41ab8 100644
--- a/reference/fragProg1/fragProg1-23.asm
+++ b/reference/fragProg1/fragProg1-23.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0xbf8ccccd, 0x3dcccccd, 0xc00ccccd, 0x4019999a
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)floor.f r0.x, c0.w
floor.f r0.y, c0.z
floor.f r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-24.asm b/reference/fragProg1/fragProg1-24.asm
index 5b6302b..f37f50f 100644
--- a/reference/fragProg1/fragProg1-24.asm
+++ b/reference/fragProg1/fragProg1-24.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x42800000, 0x3f800000, 0x41f00000, 0x40800000
+@const(c1.x) 0x3dcccccd, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-25.asm b/reference/fragProg1/fragProg1-25.asm
index 8235a7f..82ed199 100644
--- a/reference/fragProg1/fragProg1-25.asm
+++ b/reference/fragProg1/fragProg1-25.asm
@@ -4,35 +4,25 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f266666, 0x3f666666, 0x00000000, 0x41000000
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.y
-mov.f32f32 r0.y, c0.x
-absneg.f r0.z, (neg)c0.x
+absneg.f r0.y, (neg)c0.x
+mov.f32f32 r0.z, c0.x
mov.f32f32 r1.w, c1.y
max.f r0.x, r0.x, c1.x
-max.f r0.y, r0.y, c1.x
-cmps.f.lt r0.z, r0.z, c0.z
-min.f r0.w, c0.w, c1.z
+cmps.f.lt r0.y, r0.y, c0.z
+max.f r1.y, r0.z, c1.x
+min.f r0.z, c0.w, c1.z
mov.f32f32 r1.x, c1.y
(rpt1)nop
log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r0.y
-(rpt1)nop
-mul.f r0.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.x, r0.z, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-sel.b32 r0.x, r0.x, r0.z, c1.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.z, r0.x
+(ss)sel.b32 r1.z, r0.x, r0.y, c1.x
end
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 42 instructions, 0 half, 2 full
+; FRAG: 22 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-26.asm b/reference/fragProg1/fragProg1-26.asm
index 5eacda2..66a7344 100644
--- a/reference/fragProg1/fragProg1-26.asm
+++ b/reference/fragProg1/fragProg1-26.asm
@@ -4,35 +4,25 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f266666, 0x00000000, 0x00000000, 0x00000000
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.y
-mov.f32f32 r0.y, c0.x
-absneg.f r0.z, (neg)c0.x
+absneg.f r0.y, (neg)c0.x
+mov.f32f32 r0.z, c0.x
mov.f32f32 r1.w, c1.y
max.f r0.x, r0.x, c1.x
-max.f r0.y, r0.y, c1.x
-cmps.f.lt r0.z, r0.z, c0.y
-min.f r0.w, c0.y, c1.z
+cmps.f.lt r0.y, r0.y, c0.y
+max.f r1.y, r0.z, c1.x
+min.f r0.z, c0.y, c1.z
mov.f32f32 r1.x, c1.y
(rpt1)nop
log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r0.y
-(rpt1)nop
-mul.f r0.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.x, r0.z, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-sel.b32 r0.x, r0.x, r0.z, c1.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.z, r0.x
+(ss)sel.b32 r1.z, r0.x, r0.y, c1.x
end
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 42 instructions, 0 half, 2 full
+; FRAG: 22 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-27.asm b/reference/fragProg1/fragProg1-27.asm
index 5eacda2..a009294 100644
--- a/reference/fragProg1/fragProg1-27.asm
+++ b/reference/fragProg1/fragProg1-27.asm
@@ -4,35 +4,25 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0xbf000000, 0x00000000, 0x00000000, 0x00000000
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.y
-mov.f32f32 r0.y, c0.x
-absneg.f r0.z, (neg)c0.x
+absneg.f r0.y, (neg)c0.x
+mov.f32f32 r0.z, c0.x
mov.f32f32 r1.w, c1.y
max.f r0.x, r0.x, c1.x
-max.f r0.y, r0.y, c1.x
-cmps.f.lt r0.z, r0.z, c0.y
-min.f r0.w, c0.y, c1.z
+cmps.f.lt r0.y, r0.y, c0.y
+max.f r1.y, r0.z, c1.x
+min.f r0.z, c0.y, c1.z
mov.f32f32 r1.x, c1.y
(rpt1)nop
log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r0.y
-(rpt1)nop
-mul.f r0.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.x, r0.z, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-sel.b32 r0.x, r0.x, r0.z, c1.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.z, r0.x
+(ss)sel.b32 r1.z, r0.x, r0.y, c1.x
end
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 42 instructions, 0 half, 2 full
+; FRAG: 22 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-28.asm b/reference/fragProg1/fragProg1-28.asm
index bd4ce12..87d7d7b 100644
--- a/reference/fragProg1/fragProg1-28.asm
+++ b/reference/fragProg1/fragProg1-28.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x3e4ccccd, 0x3f000000, 0x3f800000, 0x00000000
+@const(c3.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.z, c3.y
mov.f32f32 r0.w, c3.y
mov.f32f32 r1.x, c3.y
diff --git a/reference/fragProg1/fragProg1-35.asm b/reference/fragProg1/fragProg1-35.asm
index 09de760..8c6badb 100644
--- a/reference/fragProg1/fragProg1-35.asm
+++ b/reference/fragProg1/fragProg1-35.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f000000, 0x40000000, 0x40400000, 0x40800000
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.x
mov.f32f32 r0.z, c0.x
@@ -14,18 +16,14 @@ log2 r0.x, r0.x
log2 r0.y, r0.y
(ss)mul.f r0.y, c0.w, r0.y
log2 r0.z, r0.z
+(rpt2)nop
(ss)mul.f r0.z, c0.z, r0.z
log2 r0.w, r0.w
(ss)mul.f r0.w, c0.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-(rpt2)nop
exp2 r1.w, r0.x
nop
exp2 r1.z, r0.y
-nop
+(rpt3)nop
exp2 r1.y, r0.z
nop
exp2 r1.x, r0.w
@@ -34,5 +32,5 @@ nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 30 instructions, 0 half, 2 full
+; FRAG: 29 instructions, 0 half, 2 full
diff --git a/reference/fragProg1/fragProg1-36.asm b/reference/fragProg1/fragProg1-36.asm
index 6a0d9e3..c5a3b7d 100644
--- a/reference/fragProg1/fragProg1-36.asm
+++ b/reference/fragProg1/fragProg1-36.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x41000000, 0xc1200000, 0x3f800000, 0x41400000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-37.asm b/reference/fragProg1/fragProg1-37.asm
index 69c0cfe..53c389c 100644
--- a/reference/fragProg1/fragProg1-37.asm
+++ b/reference/fragProg1/fragProg1-37.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x41000000, 0xc1200000, 0x3f800000, 0x41400000
(sy)(ss)mov.f32f32 r0.x, c0.x
(rpt5)nop
rcp r1.w, r0.x
diff --git a/reference/fragProg1/fragProg1-38.asm b/reference/fragProg1/fragProg1-38.asm
index 74debfe..b32fb5d 100644
--- a/reference/fragProg1/fragProg1-38.asm
+++ b/reference/fragProg1/fragProg1-38.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f800000, 0x40800000, 0x41100000, 0x42c80000
(sy)(ss)absneg.f r0.x, (abs)c0.w
absneg.f r0.y, (abs)c0.z
absneg.f r0.z, (abs)c0.y
diff --git a/reference/fragProg1/fragProg1-39.asm b/reference/fragProg1/fragProg1-39.asm
index 74debfe..970892b 100644
--- a/reference/fragProg1/fragProg1-39.asm
+++ b/reference/fragProg1/fragProg1-39.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0xc2c80000, 0xc0a00000, 0xbf800000
(sy)(ss)absneg.f r0.x, (abs)c0.w
absneg.f r0.y, (abs)c0.z
absneg.f r0.z, (abs)c0.y
diff --git a/reference/fragProg1/fragProg1-40.asm b/reference/fragProg1/fragProg1-40.asm
index 3d9646b..c1fcc94 100644
--- a/reference/fragProg1/fragProg1-40.asm
+++ b/reference/fragProg1/fragProg1-40.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
+@const(c1.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.x
mov.f32f32 r0.y, c0.x
mov.f32f32 r1.w, (0.000000)
diff --git a/reference/fragProg1/fragProg1-42.asm b/reference/fragProg1/fragProg1-42.asm
index c611cf9..cc6bad8 100644
--- a/reference/fragProg1/fragProg1-42.asm
+++ b/reference/fragProg1/fragProg1-42.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3fc90ff9, 0xbfc90ff9, 0x3f000000, 0x3f800000
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-43.asm b/reference/fragProg1/fragProg1-43.asm
index c611cf9..119976b 100644
--- a/reference/fragProg1/fragProg1-43.asm
+++ b/reference/fragProg1/fragProg1-43.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x40490ff9, 0xc0490ff9, 0x40d90ff9, 0xc0b90ff9
(sy)(ss)mov.f32f32 r0.x, c0.w
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-46.asm b/reference/fragProg1/fragProg1-46.asm
index b4e00f3..d3a01c4 100644
--- a/reference/fragProg1/fragProg1-46.asm
+++ b/reference/fragProg1/fragProg1-46.asm
@@ -6,6 +6,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x3dcccccd, 0x00000000, 0x00000000, 0x00000000
+@const(c2.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 3, r0.x
bary.f r0.w, 2, r0.x
bary.f r1.x, 1, r0.x
diff --git a/reference/fragProg1/fragProg1-47.asm b/reference/fragProg1/fragProg1-47.asm
index ceaaf94..5b982b1 100644
--- a/reference/fragProg1/fragProg1-47.asm
+++ b/reference/fragProg1/fragProg1-47.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x3f800000, 0xbf800000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c1.x
mov.f32f32 r0.y, c1.y
mov.f32f32 r0.z, c1.z
diff --git a/reference/fragProg1/fragProg1-50.asm b/reference/fragProg1/fragProg1-50.asm
index a547737..a0d3e20 100644
--- a/reference/fragProg1/fragProg1-50.asm
+++ b/reference/fragProg1/fragProg1-50.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.x
mov.f32f32 r0.y, c0.z
mov.f32f32 r0.z, c0.y
diff --git a/reference/fragProg1/fragProg1-54.asm b/reference/fragProg1/fragProg1-54.asm
index f6fe50e..f739db5 100644
--- a/reference/fragProg1/fragProg1-54.asm
+++ b/reference/fragProg1/fragProg1-54.asm
@@ -6,47 +6,44 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 3, r0.x
bary.f r1.x, 2, r0.x
bary.f r1.y, 1, r0.x
mad.f32 r0.z, c0.x, r0.z, c0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-max.f r0.z, r0.z, c2.x
max.f r0.w, r0.w, c2.x
max.f r1.x, r1.x, c2.x
max.f r1.y, r1.y, c2.x
+max.f r0.z, r0.z, c2.x
+min.f r1.w, r0.w, c2.y
+min.f r0.w, r1.x, c2.y
+min.f r1.x, r1.y, c2.y
min.f r0.z, r0.z, c2.y
-min.f r0.w, r0.w, c2.y
-min.f r1.x, r1.x, c2.y
-min.f r1.y, r1.y, c2.y
-add.f r1.z, c2.y, (neg)r0.z
-add.f r2.x, c2.y, (neg)r0.z
-add.f r2.y, c2.y, (neg)r0.z
-mov.f32f32 r1.w, r0.w
-mul.f r0.w, r1.z, c1.z
-mul.f r1.x, r0.z, r1.x
-mul.f r2.x, r2.x, c1.y
-mul.f r2.y, r2.y, c1.x
-nop
-add.f r1.z, r1.x, r0.w
-mul.f r0.w, r0.z, r1.y
bary.f (ei)r0.x, 0, r0.x
(rpt1)nop
-add.f r1.y, r0.w, r2.x
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
+add.f r0.y, c2.y, (neg)r0.z
+add.f r1.y, c2.y, (neg)r0.z
+add.f r1.z, c2.y, (neg)r0.z
+mul.f r0.w, r0.z, r0.w
+mul.f r0.y, r0.y, c1.z
+mul.f r1.y, r1.y, c1.y
+mul.f r2.x, r1.z, c1.x
+nop
+add.f r1.z, r0.w, r0.y
+mul.f r0.y, r0.z, r1.x
max.f r0.x, r0.x, c2.x
-(rpt2)nop
+(rpt1)nop
+add.f r1.y, r0.y, r1.y
min.f r0.x, r0.x, c2.y
(rpt2)nop
mul.f r0.x, r0.z, r0.x
(rpt2)nop
-add.f r1.x, r0.x, r2.y
+add.f r1.x, r0.x, r2.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
-; FRAG: 49 instructions, 0 half, 3 full
+; FRAG: 40 instructions, 0 half, 3 full
diff --git a/reference/fragProg1/fragProg1-55.asm b/reference/fragProg1/fragProg1-55.asm
index e06354b..fd73990 100644
--- a/reference/fragProg1/fragProg1-55.asm
+++ b/reference/fragProg1/fragProg1-55.asm
@@ -16,6 +16,7 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
+@const(c6.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c4.w
mov.f32f32 r0.y, c4.z
mov.f32f32 r0.z, c4.y
diff --git a/reference/fragProg1/fragProg1-56.asm b/reference/fragProg1/fragProg1-56.asm
index d82d6b8..dbb2e48 100644
--- a/reference/fragProg1/fragProg1-56.asm
+++ b/reference/fragProg1/fragProg1-56.asm
@@ -6,36 +6,37 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 3, r0.x
-bary.f r1.x, 2, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.w, 3, r0.x
+bary.f r0.w, 2, r0.x
+bary.f r1.x, 1, r0.x
+add.f r1.y, c0.z, (neg)r0.z
add.f r1.z, c0.z, (neg)r0.z
-add.f r2.x, c0.z, (neg)r0.z
add.f r0.z, c0.z, (neg)r0.z
-mov.f32f32 r1.w, r0.w
-mul.f r0.w, r1.z, c0.w
-mul.f r1.z, r2.x, c0.w
-mul.f r0.z, r0.z, c0.w
bary.f (ei)r0.x, 0, r0.x
-max.f r0.y, r0.w, c2.x
-max.f r0.w, r1.z, c2.x
+mul.f r0.y, r1.y, c0.w
+mul.f r1.y, r1.z, c0.w
+mul.f r0.z, r0.z, c0.w
+nop
+max.f r0.y, r0.y, c2.x
+max.f r1.y, r1.y, c2.x
max.f r0.z, r0.z, c2.x
nop
min.f r0.y, r0.y, c2.y
-min.f r0.w, r0.w, c2.y
+min.f r1.y, r1.y, c2.y
min.f r0.z, r0.z, c2.y
nop
add.f r1.z, c2.y, (neg)r0.y
-add.f r2.x, c2.y, (neg)r0.w
+add.f r2.x, c2.y, (neg)r1.y
add.f r2.y, c2.y, (neg)r0.z
-mul.f r0.y, r0.y, r1.x
-mul.f r1.x, r1.z, c1.z
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r1.z, c1.z
mul.f r2.x, r2.x, c1.y
mul.f r2.y, r2.y, c1.x
nop
-add.f r1.z, r0.y, r1.x
-mul.f r0.y, r0.w, r1.y
+add.f r1.z, r0.y, r0.w
+mul.f r0.y, r1.y, r1.x
mul.f r0.x, r0.z, r0.x
(rpt1)nop
add.f r1.y, r0.y, r2.x
@@ -43,6 +44,6 @@ add.f r1.x, r0.x, r2.y
end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
+; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
; FRAG: 36 instructions, 0 half, 3 full
diff --git a/reference/fragProg1/fragProg1-57.asm b/reference/fragProg1/fragProg1-57.asm
index 75a9b0c..de526fe 100644
--- a/reference/fragProg1/fragProg1-57.asm
+++ b/reference/fragProg1/fragProg1-57.asm
@@ -6,51 +6,44 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 3, r0.x
bary.f r1.x, 2, r0.x
bary.f r1.y, 1, r0.x
mul.f r0.z, c0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
+max.f r0.w, r0.w, c2.x
+max.f r1.x, r1.x, c2.x
+max.f r1.y, r1.y, c2.x
bary.f (ei)r0.x, 0, r0.x
-max.f r0.y, r0.w, c2.x
-max.f r0.w, r1.x, c2.x
-exp2 r0.z, (neg)r0.z
-(ss)mov.f32f32 r0.z, r0.z
-max.f r1.x, r1.y, c2.x
-mov.f32f32 r0.x, r0.x
-nop
-max.f r0.z, r0.z, c2.x
-min.f r0.y, r0.y, c2.y
(rpt1)nop
-min.f r0.z, r0.z, c2.y
-min.f r0.w, r0.w, c2.y
-min.f r1.x, r1.x, c2.y
+exp2 r0.y, (neg)r0.z
+(ss)max.f r0.y, r0.y, c2.x
+min.f r1.w, r0.w, c2.y
+(ss)min.f r0.z, r1.x, c2.y
+min.f r0.w, r1.y, c2.y
+min.f r0.y, r0.y, c2.y
max.f r0.x, r0.x, c2.x
-add.f r1.y, c2.y, (neg)r0.z
-add.f r1.z, c2.y, (neg)r0.z
-add.f r1.w, c2.y, (neg)r0.z
-mul.f r0.w, r0.z, r0.w
-mul.f r1.y, r1.y, c1.z
-mul.f r2.x, r1.z, c1.y
-mul.f r2.y, r1.w, c1.x
+(rpt1)nop
+add.f r1.x, c2.y, (neg)r0.y
+add.f r1.y, c2.y, (neg)r0.y
+add.f r1.z, c2.y, (neg)r0.y
+mul.f r0.z, r0.y, r0.z
+mul.f r1.x, r1.x, c1.z
+mul.f r1.y, r1.y, c1.y
+mul.f r2.x, r1.z, c1.x
nop
-add.f r1.z, r0.w, r1.y
-mul.f r0.w, r0.z, r1.x
+add.f r1.z, r0.z, r1.x
+mul.f r0.z, r0.y, r0.w
min.f r0.x, r0.x, c2.y
-mov.f32f32 r1.w, r0.y
-nop
-add.f r1.y, r0.w, r2.x
-mul.f r0.x, r0.z, r0.x
+(rpt1)nop
+add.f r1.y, r0.z, r1.y
+mul.f r0.x, r0.y, r0.x
(rpt2)nop
-add.f r1.x, r0.x, r2.y
+add.f r1.x, r0.x, r2.x
end
nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
-; FRAG: 44 instructions, 0 half, 3 full
+; FRAG: 40 instructions, 0 half, 3 full
diff --git a/reference/fragProg1/fragProg1-58.asm b/reference/fragProg1/fragProg1-58.asm
index d7dd979..af3f774 100644
--- a/reference/fragProg1/fragProg1-58.asm
+++ b/reference/fragProg1/fragProg1-58.asm
@@ -6,43 +6,44 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 3, r0.x
-bary.f r1.x, 2, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.w, 3, r0.x
+bary.f r0.w, 2, r0.x
+bary.f r1.x, 1, r0.x
mul.f r0.z, c0.x, r0.z
-mov.f32f32 r1.w, r0.w
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
+(rpt4)nop
exp2 r0.y, (neg)r0.z
(ss)max.f r0.y, r0.y, c2.x
-exp2 r0.w, (neg)r0.z
+exp2 r1.y, (neg)r0.z
nop
(ss)exp2 r0.z, (neg)r0.z
-(ss)max.f r0.w, r0.w, c2.x
+(ss)max.f r1.y, r1.y, c2.x
(ss)max.f r0.z, r0.z, c2.x
min.f r0.y, r0.y, c2.y
(rpt2)nop
add.f r1.z, c2.y, (neg)r0.y
-min.f r0.w, r0.w, c2.y
+min.f r1.y, r1.y, c2.y
min.f r0.z, r0.z, c2.y
-mul.f r0.y, r0.y, r1.x
-mul.f r1.x, r1.z, c1.z
-add.f r2.x, c2.y, (neg)r0.w
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r1.z, c1.z
+add.f r2.x, c2.y, (neg)r1.y
add.f r2.y, c2.y, (neg)r0.z
-mul.f r0.w, r0.w, r1.y
-add.f r1.z, r0.y, r1.x
+mul.f r1.x, r1.y, r1.x
+add.f r1.z, r0.y, r0.w
mul.f r0.y, r2.x, c1.y
-mul.f r1.x, r2.y, c1.x
+mul.f r0.w, r2.y, c1.x
(rpt1)nop
-add.f r1.y, r0.w, r0.y
+add.f r1.y, r1.x, r0.y
mul.f r0.x, r0.z, r0.x
(rpt2)nop
-add.f r1.x, r0.x, r1.x
+add.f r1.x, r0.x, r0.w
end
nop
nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
+; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
; FRAG: 42 instructions, 0 half, 3 full
diff --git a/reference/fragProg1/fragProg1-59.asm b/reference/fragProg1/fragProg1-59.asm
index bf15c36..3774aca 100644
--- a/reference/fragProg1/fragProg1-59.asm
+++ b/reference/fragProg1/fragProg1-59.asm
@@ -6,33 +6,29 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 3, r0.x
bary.f r1.x, 2, r0.x
bary.f r1.y, 1, r0.x
mul.f r0.z, c0.w, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mul.f r0.z, r0.z, r0.z
max.f r0.w, r0.w, c2.x
max.f r1.x, r1.x, c2.x
max.f r1.y, r1.y, c2.x
-mov.f32f32 r0.z, r0.z
-min.f r0.w, r0.w, c2.y
-(rpt4)nop
-exp2 r0.z, (neg)r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.w, r0.w
+mul.f r0.z, r0.z, r0.z
+min.f r1.w, r0.w, c2.y
min.f r0.w, r1.x, c2.y
min.f r1.x, r1.y, c2.y
-max.f r0.z, r0.z, c2.x
bary.f (ei)r0.x, 0, r0.x
(rpt1)nop
-min.f r0.y, r0.z, c2.y
-mov.f32f32 r0.x, r0.x
+exp2 r0.y, (neg)r0.z
+(ss)max.f r0.y, r0.y, c2.x
+max.f r0.x, r0.x, c2.x
+(rpt1)nop
+min.f r0.y, r0.y, c2.y
+min.f r0.x, r0.x, c2.y
(rpt1)nop
-add.f r0.z, c2.y, (neg)r0.y
+(ss)add.f r0.z, c2.y, (neg)r0.y
add.f r1.y, c2.y, (neg)r0.y
add.f r1.z, c2.y, (neg)r0.y
mul.f r0.w, r0.y, r0.w
@@ -42,19 +38,12 @@ mul.f r2.x, r1.z, c1.x
nop
add.f r1.z, r0.w, r0.z
mul.f r0.z, r0.y, r1.x
-max.f r0.x, r0.x, c2.x
+mul.f r0.x, r0.y, r0.x
(rpt1)nop
add.f r1.y, r0.z, r1.y
-min.f r0.x, r0.x, c2.y
-(rpt2)nop
-mul.f r0.x, r0.y, r0.x
-(rpt2)nop
add.f r1.x, r0.x, r2.x
end
-nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
-; FRAG: 56 instructions, 0 half, 3 full
+; FRAG: 40 instructions, 0 half, 3 full
diff --git a/reference/fragProg1/fragProg1-60.asm b/reference/fragProg1/fragProg1-60.asm
index da7a509..46a5a61 100644
--- a/reference/fragProg1/fragProg1-60.asm
+++ b/reference/fragProg1/fragProg1-60.asm
@@ -6,47 +6,48 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 3, r0.x
-bary.f r1.x, 2, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.w, 3, r0.x
+bary.f r0.w, 2, r0.x
+bary.f r1.x, 1, r0.x
mul.f r0.z, c0.x, r0.z
-mov.f32f32 r1.w, r0.w
bary.f (ei)r0.x, 0, r0.x
-nop
+(rpt1)nop
mul.f r0.y, r0.z, r0.z
(rpt2)nop
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.z, r0.y
(rpt5)nop
-exp2 r0.z, (neg)r0.y
+exp2 r1.y, (neg)r0.z
+(ss)max.f r1.y, r1.y, c2.x
+(ss)exp2 r0.z, (neg)r0.z
(ss)max.f r0.z, r0.z, c2.x
-exp2 r0.w, (neg)r0.y
-nop
-(ss)exp2 r0.y, (neg)r0.y
-(ss)max.f r0.w, r0.w, c2.x
+exp2 r0.y, (neg)r0.y
(ss)max.f r0.y, r0.y, c2.x
+nop
+min.f r1.y, r1.y, c2.y
min.f r0.z, r0.z, c2.y
-(rpt2)nop
-add.f r1.z, c2.y, (neg)r0.z
-min.f r0.w, r0.w, c2.y
min.f r0.y, r0.y, c2.y
-mul.f r0.z, r0.z, r1.x
-mul.f r1.x, r1.z, c1.z
-add.f r2.x, c2.y, (neg)r0.w
+nop
+add.f r1.z, c2.y, (neg)r1.y
+add.f r2.x, c2.y, (neg)r0.z
add.f r2.y, c2.y, (neg)r0.y
-mul.f r0.w, r0.w, r1.y
-add.f r1.z, r0.z, r1.x
-mul.f r0.z, r2.x, c1.y
-mul.f r1.x, r2.y, c1.x
+mul.f r0.w, r1.y, r0.w
+mul.f r1.y, r1.z, c1.z
+mul.f r2.x, r2.x, c1.y
+(rpt1)nop
+add.f r1.z, r0.w, r1.y
+mul.f r0.z, r0.z, r1.x
+mul.f r0.w, r2.y, c1.x
(rpt1)nop
-add.f r1.y, r0.w, r0.z
+add.f r1.y, r0.z, r2.x
mul.f r0.x, r0.y, r0.x
(rpt2)nop
-add.f r1.x, r0.x, r1.x
+add.f r1.x, r0.x, r0.w
end
nop
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
+; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
; FRAG: 50 instructions, 0 half, 3 full
diff --git a/reference/glmark1.asm b/reference/glmark1.asm
index 88f3652..2e188d9 100644
--- a/reference/glmark1.asm
+++ b/reference/glmark1.asm
@@ -6,13 +6,14 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c2.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c3.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, c2.x
+mov.f32f32 r1.w, c2.x
(rpt1)nop
mul.f r0.x, r0.z, r0.x
-mov.f32f32 r1.w, r0.y
-(rpt1)nop
+(rpt2)nop
add.f r0.y, c3.y, (neg)r0.x
add.f r0.z, c3.y, (neg)r0.x
add.f r0.w, c3.y, (neg)r0.x
@@ -20,21 +21,18 @@ mul.f r1.x, r0.x, c1.z
mul.f r0.y, r0.y, c0.z
mul.f r0.z, r0.z, c0.y
mul.f r0.w, r0.w, c0.x
-mul.f r1.y, r0.x, c1.y
-add.f r0.y, r1.x, r0.y
+nop
+add.f r1.z, r1.x, r0.y
+mul.f r0.y, r0.x, c1.y
mul.f r0.x, r0.x, c1.x
(rpt1)nop
-mov.f32f32 r1.z, r0.y
-add.f r0.y, r1.y, r0.z
-add.f r0.x, r0.x, r0.w
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+add.f r1.y, r0.y, r0.z
+add.f r1.x, r0.x, r0.w
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 29 instructions, 0 half, 2 full
+; FRAG: 25 instructions, 0 half, 2 full
diff --git a/reference/glmark2.asm b/reference/glmark2.asm
index b3885a6..6f1a27b 100644
--- a/reference/glmark2.asm
+++ b/reference/glmark2.asm
@@ -6,83 +6,50 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 1, r0.x
-bary.f r1.x, 12, r0.x
+@const(c3.x) 0x3d4ccccd, 0x3d2acd9f, 0x3ccccccd, 0x3caa64c3
+@const(c4.x) 0x40000000, 0xbe99999a, 0x00000000, 0x00000000
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)bary.f r0.z, 12, r0.x
+bary.f r0.w, 2, r0.x
+bary.f r1.x, 1, r0.x
bary.f r1.y, 0, r0.x
bary.f r1.z, 4, r0.x
-add.f r0.z, r0.z, (neg)r0.w
-bary.f r0.w, 8, r0.x
-bary.f r1.w, 7, r0.x
-mov.f32f32 r1.z, r1.z
-mul.f r0.z, r0.z, c3.w
-log2 r1.x, r1.x
-(ss)mul.f r1.x, c4.x, r1.x
-mul.f r1.y, r1.y, c3.y
-mov.f32f32 r2.x, c3.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, c3.z
-mov.f32f32 r1.x, r1.x
-mad.f32 r1.y, c2.x, r2.x, r1.y
-mov.f32f32 r2.z, r1.z
-mad.f32 r0.z, c2.x, r2.y, r0.z
-bary.f r1.z, 5, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.z, r0.z
-exp2 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
+bary.f r1.w, 5, r0.x
bary.f r2.x, 11, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.y, r1.y
-add.f r0.w, r2.x, r0.w
-mov.f32f32 r0.z, r0.z
-bary.f r1.z, 10, r0.x
-bary.f r2.x, 6, r0.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.z, r0.z
-sam (f32)(xyzw)r2.y, r2.z, s#0, t#0
-(sy)(ss)add.f r1.x, r3.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r0.z, r1.x
-add.f r1.x, r1.z, r1.w
-mov.f32f32 r1.y, r2.x
-bary.f (ei)r0.x, 9, r0.x
-(rpt1)nop
-sam (f32)(xyz)r3.x, r3.x, s#1, t#1
-(sy)add.f r0.y, r0.w, r3.z
-mov.f32f32 r0.w, r1.x
-add.f r0.x, r0.x, r1.y
-add.f r0.z, r0.z, c4.y
-mov.f32f32 r0.y, r0.y
-add.f r0.w, r0.w, r3.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mul.f r0.y, r0.y, r2.w
-mov.f32f32 r0.w, r0.w
-add.f r0.x, r0.x, r3.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r0.y, r0.w
-mul.f r0.x, r0.x, r2.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r0.x, r0.x
+log2 r0.z, r0.z
+(ss)mul.f r0.z, c4.x, r0.z
+add.f r0.w, r0.w, (neg)r1.x
+mul.f r1.x, r1.y, c3.y
+bary.f r1.y, 8, r0.x
+bary.f r2.y, 10, r0.x
+sam (f32)(xyzw)r2.z, r1.z, s#0, t#0
+mul.f r0.w, r0.w, c3.w
+(ss)mov.f32f32 r1.z, c3.z
+exp2 r0.z, r0.z
+(sy)(ss)add.f r0.z, r3.y, r0.z
+mov.f32f32 r3.y, c3.x
+add.f r1.y, r2.x, r1.y
+mad.f32 r3.w, c2.x, r1.z, r0.w
+add.f r1.w, r0.z, c4.y
+mad.f32 r3.z, c2.x, r3.y, r1.x
+bary.f r0.z, 7, r0.x
+bary.f r0.w, 9, r0.x
+bary.f (ei)r0.x, 6, r0.x
(rpt2)nop
-mov.f32f32 r1.x, r0.x
-end
-nop
+sam (f32)(xyz)r3.y, r3.z, s#1, t#1
+(sy)add.f r0.y, r1.y, r3.w
+add.f r0.z, r2.y, r0.z
+add.f r0.x, r0.w, r0.x
nop
+mul.f r1.z, r0.y, r3.x
+add.f r0.y, r0.z, r3.z
+add.f r0.x, r0.x, r3.y
+(rpt1)nop
+mul.f r1.y, r0.y, r2.w
+mul.f r1.x, r0.x, r2.z
+end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r1.y (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1)
-; FRAG: 77 instructions, 0 half, 4 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r63.w (5:21,cm=f,il=12,b=1) r1.y (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1)
+; FRAG: 42 instructions, 0 half, 4 full
diff --git a/reference/glmark3.asm b/reference/glmark3.asm
index 52cd74b..818e982 100644
--- a/reference/glmark3.asm
+++ b/reference/glmark3.asm
@@ -6,11 +6,11 @@
@in(r0.w) in4
@in(r1.x) in5
@in(r1.y) in6
-@in(r1.z) in8
-@in(r1.w) in9
-@in(r2.x) in10
-@in(r2.y) in12
-@in(r2.z) in13
+@in(r2.z) in8
+@in(r2.w) in9
+@in(r3.x) in10
+@in(r2.x) in12
+@in(r2.y) in13
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,303 +31,223 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
-(sy)(ss)add.f r2.w, (neg)r0.y, c19.z
-mul.f r3.x, c8.x, r0.w
+@const(c19.x) 0x3d889a02, 0x00000000, 0xbf4ccccd, 0x3dcccccd
+@const(c20.x) 0x40400000, 0x40000000, 0x3daa9931, 0x3f000000
+@const(c21.x) 0x41700000, 0x3f800000, 0x3e000000, 0x41f00000
+@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r1.z, (neg)r0.y, c19.z
+mul.f r1.w, c8.x, r0.w
mov.f32f32 r3.y, c18.x
-mad.f32 r3.x, c9.x, r1.x, r3.x
-max.f r2.w, c19.y, r2.w
-mad.f32 r3.x, c10.x, r1.y, r3.x
+mad.f32 r1.w, c9.x, r1.x, r1.w
+max.f r1.z, c19.y, r1.z
+mad.f32 r1.w, c10.x, r1.y, r1.w
mul.f r3.y, r3.y, c19.x
mul.f r3.z, c8.y, r0.w
-mov.f32f32 r2.w, r2.w
-add.f r3.x, r3.x, c11.x
-mul.f r3.w, r3.y, c21.w
-mul.f r3.y, r3.y, c21.x
-mul.f r2.w, r2.w, c19.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.w
-mad.f32 r3.y, c20.w, r0.y, r3.y
-mov.f32f32 r2.w, r2.w
-mul.f r4.x, r3.x, r3.x
+mul.f r1.z, r1.z, c19.w
+add.f r1.w, r1.w, c11.x
+mul.f r3.w, r3.y, c21.x
+mul.f r3.y, r3.y, c21.w
+max.f r1.z, r1.z, c19.y
+mov.f32f32 r4.x, r1.w
+mad.f32 r3.w, c20.w, r0.y, r3.w
+mad.f32 r3.y, c20.y, r0.y, r3.y
+min.f r1.z, r1.z, c21.y
+mul.f r1.w, r1.w, r4.x
mad.f32 r3.z, c9.y, r1.x, r3.z
-mad.f32 r3.w, c20.y, r0.y, r3.w
-max.f r2.w, r2.w, c19.y
-mad.f32 r3.z, c10.y, r1.y, r3.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-min.f r2.w, r2.w, c21.y
-add.f r3.z, r3.z, c11.y
mul.f r0.w, c8.z, r0.w
-mov.f32f32 r4.y, c13.x
-mul.f r4.z, c20.y, r2.w
-mov.f32f32 r3.z, r3.z
+mul.f r4.y, c20.y, r1.z
+mad.f32 r3.z, c10.y, r1.y, r3.z
sin r3.w, r3.w
-(ss)mov.f32f32 r3.w, r3.w
-mul.f r4.w, r1.z, c21.z
-add.f r4.z, c20.x, (neg)r4.z
-mad.f32 r4.x, r3.z, r3.z, r4.x
-mul.f r5.x, r1.w, c21.z
-mul.f r5.y, r2.x, c21.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.x, r4.x
+nop
+sin r3.y, r3.y
+(ss)mov.f32f32 r4.z, r3.w
+add.f r4.y, c20.x, (neg)r4.y
+add.f r3.z, r3.z, c11.y
+mul.f r4.w, r2.z, c20.z
+mov.f32f32 r5.x, r3.y
+mul.f r4.y, r1.z, r4.y
+mov.f32f32 r5.y, r3.z
+mul.f r4.w, r4.w, r4.z
+mul.f r2.z, r2.z, c21.z
+mul.f r1.z, r1.z, r4.y
+mad.f32 r1.w, r3.z, r5.y, r1.w
mad.f32 r0.w, c9.z, r1.x, r0.w
-mul.f r1.x, r4.w, r3.w
-mul.f r4.z, r2.w, r4.z
+mul.f r1.x, r2.z, r5.x
+mov.f32f32 r2.z, r1.z
mad.f32 r0.w, c10.z, r1.y, r0.w
-mul.f r1.y, r5.x, r3.w
-mul.f r3.w, r5.y, r3.w
-mov.f32f32 r4.z, r4.z
-add.f r0.w, r0.w, c11.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mul.f r2.w, r2.w, r4.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.w, r3.w
-sin r3.y, r3.y
-(ss)mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.x, r0.w, r0.w, r4.x
-mul.f r1.z, r1.z, c20.z
-mul.f r1.w, r1.w, c20.z
-add.f r4.z, c21.y, (neg)r2.w
-mul.f r2.x, r2.x, c20.z
-mul.f r1.z, r1.z, r3.y
-mul.f r1.w, r1.w, r3.y
-rsq r4.x, r4.x
-(ss)mov.f32f32 r4.x, r4.x
-mul.f r2.x, r2.x, r3.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r3.x, r3.x, r4.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.x, r1.z, r4.z, r0.x
-mad.f32 r0.y, r1.w, r4.z, r0.y
-mov.f32f32 r1.w, r3.x
-mad.f32 r0.z, r2.x, r4.z, r0.z
-mov.f32f32 r0.x, r0.x
-add.f r1.z, c21.y, (neg)r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mul.f r2.x, r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.w, r3.z, r4.x
-mul.f r0.w, r0.w, r4.x
-nop
-mad.f32 r0.x, r1.x, r1.z, r0.x
-mad.f32 r0.y, r1.y, r1.z, r0.y
-mad.f32 r0.z, r3.w, r1.z, r0.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.x, r0.z
-mad.f32 r0.z, r2.w, r2.w, r2.x
-mul.f r1.x, c0.w, r0.x
-mul.f r1.y, c0.x, r0.x
-mad.f32 r1.x, c1.w, r0.y, r1.x
-mad.f32 r1.y, c1.x, r0.y, r1.y
-mad.f32 r1.x, c2.w, r3.x, r1.x
-mad.f32 r1.y, c2.x, r3.x, r1.y
-mul.f r1.z, c0.x, r0.x
-mul.f r2.x, c0.y, r0.x
-mov.f32f32 r1.x, r1.x
-add.f r3.y, r1.y, c3.x
-mad.f32 r1.y, c1.x, r0.y, r1.z
-mad.f32 r1.z, c1.y, r0.y, r2.x
-add.f r1.x, r1.x, c3.w
-add.f r2.x, c12.x, (neg)r3.y
-add.f r3.z, c12.x, (neg)r3.y
-mad.f32 r1.y, c2.x, r3.x, r1.y
-mad.f32 r1.z, c2.y, r3.x, r1.z
-mul.f r3.w, c0.z, r0.x
-mul.f r4.x, c0.y, r0.x
-rcp r4.z, r1.x
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, r2.x, r2.x
-mad.f32 r4.x, c1.y, r0.y, r4.x
-mul.f r4.w, r3.z, r3.z
-add.f r1.y, r1.y, c3.x
-mad.f32 r4.x, c2.y, r3.x, r4.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.w, c1.z, r0.y, r3.w
-(ss)mul.f r1.y, r1.y, r4.z
-add.f r4.x, r4.x, c3.y
-add.f r1.z, r1.z, c3.y
-mad.f32 r3.w, c2.z, r3.x, r3.w
-mov.f32f32 r5.x, r1.y
-add.f r1.y, c12.y, (neg)r4.x
-add.f r5.y, c12.y, (neg)r4.x
-mul.f r1.z, r1.z, r4.z
-mul.f r5.z, r5.x, r5.x
-mad.f32 r1.y, r1.y, r1.y, r2.x
-mad.f32 r2.x, r5.y, r5.y, r4.w
-mov.f32f32 r5.w, r1.z
-mov.f32f32 r1.z, r3.w
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.y, r5.w, r5.w, r5.z
-mul.f r4.w, c0.z, r0.x
-add.f r1.z, r1.z, c3.z
-mad.f32 r4.w, c1.z, r0.y, r4.w
-mov.f32f32 r5.z, r1.y
-mad.f32 r1.y, c2.z, r3.x, r4.w
+add.f r1.y, c21.y, (neg)r1.z
+mul.f r1.z, r2.w, c20.z
+add.f r2.z, c21.y, (neg)r2.z
+add.f r3.z, r0.w, c11.z
+mul.f r0.w, r3.x, c20.z
mul.f r1.z, r1.z, r4.z
-mov.f32f32 r4.w, r1.x
-(ss)mul.f r1.x, c0.w, r0.x
-add.f r4.z, r1.y, c3.z
-mov.f32f32 r6.x, r1.z
-mad.f32 r1.z, c1.w, r0.y, r1.x
-mov.f32f32 r1.y, r4.x
-mov.f32f32 r1.x, r3.y
-mad.f32 r3.y, r6.x, r6.x, r5.z
-add.f r4.x, c12.z, (neg)r4.z
-add.f r5.z, c12.z, (neg)r4.z
-mad.f32 r6.y, c2.w, r3.x, r1.z
-mov.f32f32 r1.z, r4.z
-mul.f r4.z, c4.w, r0.x
-mul.f r6.z, c4.z, r0.x
-rsq r3.y, r3.y
-(ss)mov.f32f32 r3.y, r3.y
-mad.f32 r3.w, r4.x, r4.x, r3.w
-mad.f32 r2.x, r5.z, r5.z, r2.x
-add.f r4.x, r6.y, c3.w
-mul.f r5.x, r5.x, r3.y
-mov.f32f32 r3.w, r3.w
-mul.f r5.w, r5.w, r3.y
-mul.f r3.y, r6.x, r3.y
-mov.f32f32 r5.x, r5.x
-rsq r2.x, r2.x
-(ss)mov.f32f32 r2.x, r2.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r3.y, r3.y
-mul.f r5.x, r1.w, (neg)r5.x
-sqrt r3.w, r3.w
-(ss)mov.f32f32 r3.w, r3.w
-mad.f32 r5.x, r2.w, (neg)r5.w, r5.x
-rcp r4.y, r4.y
-mul.f r3.z, r3.z, r2.x
-mul.f r5.y, r5.y, r2.x
-(ss)mul.f r3.w, r3.w, r4.y
-(ss)mov.f32f32 r4.y, r5.x
-mov.f32f32 r5.x, r0.w
-mov.f32f32 r0.w, r3.z
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r0.z, r0.z
-mad.f32 r3.y, r5.x, (neg)r3.y, r4.y
-mad.f32 r0.z, r5.x, r5.x, r0.z
-add.f r3.z, c21.y, (neg)r3.z
-mov.f32f32 r3.w, r5.y
-mov.f32f32 r3.y, r3.y
-mul.f r2.x, r5.z, r2.x
-(rpt1)nop
-absneg.f r3.y, (abs)r3.y
-mov.f32f32 r3.z, r3.z
-rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.y
-max.f r3.z, r3.z, c19.y
-mul.f r4.y, r1.w, r0.z
-mov.f32f32 r1.w, r4.x
-max.f r3.y, r3.y, c19.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r4.x, r4.y
-mad.f32 r4.y, c5.w, r0.y, r4.z
-mov.f32f32 r3.y, r3.y
-mad.f32 r4.y, c6.w, r3.x, r4.y
-mad.f32 r4.z, c5.z, r0.y, r6.z
-mul.f r5.y, c4.y, r0.x
-add.f r3.y, c21.y, (neg)r3.y
-log2 r3.z, r3.z
-(ss)mul.f r3.z, c20.y, r3.z
-mul.f r4.x, r4.x, r0.w
-mul.f r0.w, r2.w, r0.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-add.f r4.y, r4.y, c7.w
-mad.f32 r4.z, c6.z, r3.x, r4.z
-mad.f32 r5.y, c5.y, r0.y, r5.y
+mad.f32 r0.x, r4.w, r2.z, r0.x
+mov.f32f32 r4.y, r1.y
+mov.f32f32 r4.z, r3.z
+mad.f32 r0.y, r1.z, r2.z, r0.y
+mul.f r0.w, r0.w, r3.w
+mad.f32 r0.x, r1.x, r4.y, r0.x
+(ss)mad.f32 r3.w, r4.z, r4.z, r1.w
+mul.f r1.x, r2.w, c21.z
+mad.f32 r0.z, r0.w, r2.z, r0.z
+mov.f32f32 r0.w, r0.x
mul.f r0.x, c4.x, r0.x
-mad.f32 r5.y, c6.y, r3.x, r5.y
-log2 r3.y, r3.y
-(ss)mul.f r3.y, c17.x, r3.y
-exp2 r3.z, r3.z
-mov.f32f32 r5.z, r0.w
-mov.f32f32 r0.w, r4.y
-add.f r4.y, r4.z, c7.z
-mov.f32f32 r3.y, r3.y
-mad.f32 r3.w, r5.z, r3.w, r4.x
-add.f r4.x, r5.y, c7.y
+mul.f r1.x, r1.x, r5.x
+mul.f r1.z, r3.x, c21.z
+mul.f r1.w, c0.w, r0.w
+mul.f r2.z, c0.x, r0.w
+mad.f32 r0.y, r1.x, r4.y, r0.y
+mul.f r1.x, c0.y, r0.w
+mul.f r2.w, c0.z, r0.w
+mul.f r3.x, c0.x, r0.w
+mov.f32f32 r4.y, r0.y
+mul.f r4.z, c0.y, r0.w
+mul.f r4.w, c0.z, r0.w
+mul.f r5.x, c0.w, r0.w
+mad.f32 r1.w, c1.w, r4.y, r1.w
+mul.f r1.z, r1.z, r3.y
+mad.f32 r2.z, c1.x, r4.y, r2.z
+mad.f32 r1.x, c1.y, r4.y, r1.x
+mad.f32 r2.w, c1.z, r4.y, r2.w
+mad.f32 r3.y, r1.z, r1.y, r0.z
+mad.f32 r0.z, c1.x, r4.y, r3.x
+mad.f32 r3.x, c1.y, r4.y, r4.z
+mad.f32 r4.z, c1.z, r4.y, r4.w
+mov.f32f32 r5.z, r3.y
+mad.f32 r5.x, c1.w, r4.y, r5.x
+mul.f r5.w, c4.w, r0.w
+mul.f r6.x, c4.z, r0.w
+mad.f32 r1.y, c2.w, r5.z, r1.w
+mad.f32 r1.z, c2.x, r5.z, r2.z
+mad.f32 r1.w, c2.y, r5.z, r1.x
+mad.f32 r2.z, c2.z, r5.z, r2.w
+add.f r4.w, r1.y, c3.w
+add.f r1.x, r1.z, c3.x
+add.f r1.y, r1.w, c3.y
+add.f r1.z, r2.z, c3.z
+mad.f32 r0.z, c2.x, r5.z, r0.z
+mad.f32 r1.w, c2.y, r5.z, r3.x
+mad.f32 r2.z, c2.z, r5.z, r4.z
+rcp r2.w, r4.w
+add.f r3.x, c12.x, (neg)r1.x
+add.f r0.z, r0.z, c3.x
+add.f r4.z, c12.x, (neg)r1.x
+add.f r1.w, r1.w, c3.y
+add.f r2.z, r2.z, c3.z
+(ss)mul.f r0.z, r0.z, r2.w
+mul.f r3.x, r3.x, r3.x
+add.f r6.y, c12.y, (neg)r1.y
+mul.f r6.z, r4.z, r4.z
+mov.f32f32 r6.w, r0.z
+add.f r7.x, c12.y, (neg)r1.y
+mad.f32 r3.x, r6.y, r6.y, r3.x
+add.f r6.y, c12.z, (neg)r1.z
+mul.f r0.z, r0.z, r6.w
+mul.f r1.w, r1.w, r2.w
+mad.f32 r6.z, r7.x, r7.x, r6.z
+mad.f32 r3.x, r6.y, r6.y, r3.x
+add.f r6.y, c12.z, (neg)r1.z
+mov.f32f32 r7.y, r1.w
+mul.f r2.z, r2.z, r2.w
+mad.f32 r2.w, c2.w, r5.z, r5.x
+mad.f32 r5.x, c5.w, r4.y, r5.w
+mad.f32 r0.z, r1.w, r7.y, r0.z
+mov.f32f32 r5.w, r2.z
+sqrt r3.x, r3.x
+mov.f32f32 r7.z, c13.x
+mad.f32 r6.z, r6.y, r6.y, r6.z
+add.f r1.w, r2.w, c3.w
+mad.f32 r0.z, r5.w, r5.w, r0.z
+mad.f32 r2.w, c6.w, r5.z, r5.x
+mad.f32 r5.x, c5.z, r4.y, r6.x
+mul.f r0.w, c4.y, r0.w
+mad.f32 r5.x, c6.z, r5.z, r5.x
+mad.f32 r4.y, c5.y, r4.y, r0.w
mad.f32 r0.x, c5.x, r0.y, r0.x
-mul.f r0.y, r5.x, r0.z
-(rpt1)nop
-exp2 r0.z, r3.y
-(ss)mul.f r3.y, c16.w, r0.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.z, r0.y
-mov.f32f32 r0.z, r4.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r0.y, r4.x
-mad.f32 r0.x, c6.x, r3.x, r0.x
-mov.f32f32 r3.x, c15.z
-mul.f r4.x, c16.z, r3.y
-mul.f r4.y, c16.y, r3.y
-mul.f r3.y, c16.x, r3.y
-mad.f32 r2.x, r4.z, r2.x, r3.w
-mov.f32f32 r3.w, r4.x
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r3.y, r3.y
-nop
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r4.y, r4.x
-mov.f32f32 r4.x, r3.y
-mov.f32f32 r2.x, r2.x
+rsq r0.y, r0.z
+(ss)mov.f32f32 r0.z, r0.y
+rcp r0.w, r7.z
+(ss)mul.f r0.w, r3.x, r0.w
+rsq r3.x, r6.z
+(ss)mov.f32f32 r5.w, r3.x
+mul.f r2.z, r2.z, r0.y
+mul.f r0.y, r6.w, r0.z
+rsq r3.w, r3.w
+(ss)mov.f32f32 r6.x, r3.w
+add.f r0.w, c21.y, (neg)r0.w
+mul.f r4.z, r4.z, r5.w
+(ss)mul.f r6.z, r7.y, r0.z
+mul.f r4.x, r4.x, r6.x
+max.f r6.w, r0.w, c19.y
+mul.f r5.w, r7.x, r5.w
+mul.f r3.x, r6.y, r3.x
+mov.f32f32 r6.y, r4.x
+add.f r0.w, r2.w, c7.w
+add.f r0.z, r5.x, c7.z
+mad.f32 r2.w, c6.y, r5.z, r4.y
+mul.f r4.y, r6.y, (neg)r0.y
+mul.f r5.x, r5.y, r6.x
+log2 r0.y, r6.w
+(ss)mul.f r5.y, c20.y, r0.y
+mul.f r4.x, r4.x, r6.y
+add.f r0.y, r2.w, c7.y
+mov.f32f32 r2.w, r5.x
+mad.f32 r0.x, c6.x, r3.y, r0.x
+mov.f32f32 r3.y, c15.z
+mov.f32f32 r5.z, c15.y
+mad.f32 r4.y, r2.w, (neg)r6.z, r4.y
+mul.f r6.x, r3.z, r3.w
+exp2 r5.y, r5.y
+mad.f32 r3.z, r5.x, r2.w, r4.x
add.f r0.x, r0.x, c7.x
-(rpt1)nop
-max.f r2.x, r2.x, c19.y
-mov.f32f32 r0.x, r0.x
-mul.f r3.x, r3.x, c15.w
-mov.f32f32 r3.y, c15.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, c15.x
-mul.f r3.x, r3.x, r2.w
mul.f r3.y, r3.y, c15.w
-mul.f r2.x, r2.x, r3.z
-mul.f r3.z, r3.w, c15.w
-mov.f32f32 r3.x, r3.x
+mov.f32f32 r3.w, r6.x
+mul.f r4.x, r5.z, c15.w
+mov.f32f32 r5.x, c15.x
+nop
+mad.f32 r2.z, r3.w, (neg)r2.z, r4.y
+mad.f32 r4.y, r3.w, r3.w, r3.z
+mul.f r3.w, r3.y, r2.w
+mul.f r3.z, r4.x, r2.w
+absneg.f r2.z, (abs)r2.z
+mul.f r3.y, r5.x, c15.w
+(rpt1)nop
+max.f r2.z, r2.z, c19.y
+rsq r4.x, r4.y
+(ss)mov.f32f32 r4.y, r4.x
+mul.f r5.x, r6.x, r4.x
mul.f r3.y, r3.y, r2.w
-mov.f32f32 r2.x, r2.x
-mul.f r2.w, r3.z, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mul.f r2.x, r2.x, c14.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r5.x, r2.z
-mov.f32f32 r5.y, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r3.x
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.z, r2.w
-mul.f r2.w, c14.z, r2.x
-mul.f r3.x, c14.y, r2.x
-mul.f r2.x, c14.x, r2.x
-mov.f32f32 r3.z, r2.y
-mov.f32f32 r2.y, r2.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r2.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r2.x, r2.x
+add.f r2.z, c21.y, (neg)r2.z
+mul.f r4.x, r6.y, r4.y
+mul.f r2.w, r2.w, r4.y
+(rpt3)nop
+log2 r2.z, r2.z
+(ss)mul.f r2.z, c17.x, r2.z
+mul.f r4.x, r4.x, r4.z
+(rpt4)nop
+exp2 r2.z, r2.z
+(ss)mul.f r2.z, c16.w, r2.z
+mad.f32 r2.w, r2.w, r5.w, r4.x
+(rpt1)nop
+mov.f32f32 r4.y, r2.z
+mul.f r4.x, c16.x, r2.z
+mad.f32 r2.z, r5.x, r3.x, r2.w
nop
-mov.f32f32 r3.x, r2.y
+mul.f r4.z, c16.z, r4.y
+mul.f r4.y, c16.y, r4.y
+max.f r2.z, r2.z, c19.y
+(rpt2)nop
+mul.f r2.z, r2.z, r5.y
+(rpt2)nop
+mul.f r2.z, r2.z, c14.w
+(rpt2)nop
mov.f32f32 r2.w, r2.z
-mov.f32f32 r2.z, r2.x
-mov.f32f32 r2.y, r5.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.x, r5.y
+mul.f r2.z, c14.x, r2.z
+(rpt1)nop
+mul.f r3.x, c14.z, r2.w
+mul.f r2.w, c14.y, r2.w
end
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23)
-; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r1.z (0:0,cm=7,il=16,b=0) r2.y (0:0,cm=3,il=20,b=0)
-; VERT: 298 instructions, 0 half, 7 full
+; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=7,il=12,b=0) r2.z (0:0,cm=7,il=16,b=0) r2.x (0:0,cm=3,il=20,b=0)
+; VERT: 228 instructions, 0 half, 8 full
diff --git a/reference/glsl-fs-raytrace-bug27060.asm b/reference/glsl-fs-raytrace-bug27060.asm
index f3551bf..d126427 100644
--- a/reference/glsl-fs-raytrace-bug27060.asm
+++ b/reference/glsl-fs-raytrace-bug27060.asm
@@ -6,1335 +6,953 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c3.x) 0xbf000000, 0xbf800000, 0x461c3f9a, 0x3f000000
+@const(c4.x) 0x00000000, 0xc0800000, 0x40800000, 0x3e800000
+@const(c5.x) 0x00000000, 0x00000000, 0x40400000, 0x40000000
+@const(c6.x) 0x80000000, 0x3f800000, 0x3fc00000, 0x40100000
+@const(c7.x) 0xc0400000, 0x00000000, 0xc0800000, 0x3f2aacda
+@const(c8.x) 0x40400000, 0x00000000, 0x40800000, 0xc0800000
+@const(c9.x) 0x40400000, 0x80000000, 0x3f800000, 0xc0400000
+@const(c10.x) 0x00000000, 0xc0400000, 0x40800000, 0xc0000000
+@const(c11.x) 0x40000000, 0x00000000, 0xc0800000, 0xbf800000
+@const(c12.x) 0xc0000000, 0x80000000, 0x3f800000, 0x00000000
+@const(c13.x) 0x3e4ccccd, 0x3e99999a, 0x3ecccccd, 0x3f800000
+@const(c14.x) 0xc0400000, 0x00000000, 0xbf800000, 0x40400000
+@const(c15.x) 0x00000000, 0x41000000, 0x3f800000, 0x41800000
+@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
mov.f32f32 r0.w, (0.000000)
mov.f32f32 r1.x, (0.000000)
mov.f32f32 r1.y, (0.000000)
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mul.f r1.z, r0.z, c1.x
+mov.f32f32 r1.z, r0.z
+mul.f r0.z, r0.z, c2.x
bary.f (ei)r0.x, 1, r0.x
-mul.f r0.y, r0.z, c2.x
-mul.f r0.z, r0.z, c0.x
-mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, (0.000000)
-mov.f32f32 r2.y, (0.000000)
-mov.f32f32 r2.z, c13.w
-mad.f32 r1.z, c1.y, r0.x, r1.z
-mad.f32 r0.y, c2.y, r0.x, r0.y
-mad.f32 r0.x, c0.y, r0.x, r0.z
-mov.f32f32 r0.z, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, c3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.z, c1.z, r1.w, r1.z
-mad.f32 r0.y, c2.z, r1.w, r0.y
-mad.f32 r0.x, c0.z, r1.w, r0.x
-mov.f32f32 r1.w, r2.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r2.y, r0.x, r0.x
-mov.f32f32 r2.w, (0.000000)
-mad.f32 r2.y, r1.z, r1.z, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.x, c12.w
-mov.f32f32 r3.y, c12.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.z, c12.w
-mad.f32 r2.y, r0.y, r0.y, r2.y
-mov.f32f32 r3.w, c12.w
-mov.f32f32 r4.x, c12.w
-mov.f32f32 r4.y, c12.w
+mov.f32f32 r0.y, r0.w
+mul.f r1.w, r1.z, c1.x
+mul.f r1.z, r1.z, c0.x
+mov.f32f32 r2.x, r0.x
+mad.f32 r0.x, c2.y, r0.x, r0.z
+mov.f32f32 r0.z, c3.x
+mov.f32f32 r2.y, r1.x
+mad.f32 r1.w, c1.y, r2.x, r1.w
+mad.f32 r1.z, c0.y, r2.x, r1.z
+mad.f32 r1.w, c1.z, r0.z, r1.w
+mad.f32 r1.z, c0.z, r0.z, r1.z
+mad.f32 r0.x, c2.z, r0.z, r0.x
+mov.f32f32 r0.z, r1.y
+mov.f32f32 r2.x, r1.w
+mul.f r2.z, r1.z, r1.z
+mov.f32f32 r2.w, r0.x
+nop
+mad.f32 r1.w, r1.w, r2.x, r2.z
+mov.f32f32 r2.z, (0.000000)
+mov.f32f32 r2.w, r2.w
+mov.f32f32 r3.x, (0.000000)
+mov.f32f32 r3.y, (0.000000)
+mov.f32f32 r3.z, c13.w
+mad.f32 r1.w, r2.w, r2.w, r1.w
+mov.f32f32 r2.w, r2.z
+mov.f32f32 r3.w, r3.x
+mov.f32f32 r4.x, r3.y
+mov.f32f32 r4.y, (0.000000)
mov.f32f32 r4.z, c12.w
mov.f32f32 r4.w, c12.w
-mov.f32f32 r5.x, c13.w
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r5.y, c13.z
-mov.f32f32 r5.z, c12.w
-mov.f32f32 r5.w, c12.z
-mul.f r0.x, r0.x, r2.y
-mul.f r1.z, r1.z, r2.y
-mul.f r0.y, r0.y, r2.y
-mov.f32f32 r2.y, c12.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.x, c12.w
-absneg.f r6.y, (neg)r0.x
-absneg.f r6.z, (neg)r0.x
-absneg.f r6.w, (neg)r0.x
-absneg.f r7.x, (neg)r0.x
-mul.f r7.y, r6.y, r0.x
-absneg.f r7.z, (neg)r1.z
-mul.f r7.w, r6.z, r0.x
-mul.f r8.x, r6.w, r0.x
-absneg.f r8.y, (neg)r1.z
-mad.f32 r7.y, r7.z, r1.z, r7.y
-absneg.f r8.z, (neg)r1.z
-mul.f r8.w, r7.x, r0.x
-mad.f32 r7.w, r8.y, r1.z, r7.w
-mov.f32f32 r7.y, r7.y
-absneg.f r9.x, (neg)r0.y
-mad.f32 r8.x, r8.z, r1.z, r8.x
-mov.f32f32 r7.w, r7.w
-absneg.f r9.y, (neg)r0.y
-mad.f32 r7.y, r9.x, r0.y, r7.y
-mov.f32f32 r8.x, r8.x
-absneg.f r9.z, (neg)r0.y
-mad.f32 r7.w, r9.y, r0.y, r7.w
-mov.f32f32 r7.y, r7.y
-absneg.f r9.w, (neg)r1.z
-mad.f32 r8.x, r9.z, r0.y, r8.x
-mov.f32f32 r7.w, r7.w
+rsq r1.w, r1.w
+(ss)mov.f32f32 r5.x, r1.w
+mul.f r0.x, r0.x, r1.w
+(ss)mov.f32f32 r1.w, c12.w
+mov.f32f32 r5.y, c12.w
+mul.f r1.z, r1.z, r5.x
+mul.f r2.x, r2.x, r5.x
+mov.f32f32 r5.x, r0.x
+absneg.f r0.x, (neg)r0.x
+mov.f32f32 r5.z, r1.z
+absneg.f r1.z, (neg)r1.z
+mov.f32f32 r5.w, r2.x
+absneg.f r6.x, (neg)r5.x
+absneg.f r6.y, (neg)r5.z
+absneg.f r6.z, (neg)r5.z
+absneg.f r6.w, (neg)r5.z
+mul.f r7.x, r1.z, r5.z
+mul.f r7.y, r6.y, r5.z
+absneg.f r7.z, (neg)r5.w
+mul.f r7.w, r6.z, r5.z
+mul.f r8.x, r6.w, r5.z
+absneg.f r8.y, (neg)r5.w
+mad.f32 r7.y, r7.z, r5.w, r7.y
+absneg.f r8.z, (neg)r5.w
+mad.f32 r7.y, r6.x, r5.x, r7.y
+mad.f32 r7.w, r8.y, r5.w, r7.w
+absneg.f r8.w, (neg)r5.x
+mad.f32 r8.x, r8.z, r5.w, r8.x
+absneg.f r9.x, (neg)r5.x
+absneg.f r2.x, (neg)r2.x
+mad.f32 r7.w, r8.w, r5.x, r7.w
+rcp r7.y, r7.y
mul.f r6.y, c4.x, r6.y
+mad.f32 r8.x, r9.x, r5.x, r8.x
+mad.f32 r6.y, c4.x, r7.z, r6.y
+mad.f32 r7.x, r2.x, r5.w, r7.x
+mad.f32 r6.x, c4.y, r6.x, r6.y
+mad.f32 r6.y, r0.x, r5.x, r7.x
+rcp r7.x, r7.w
mul.f r6.z, c7.x, r6.z
mul.f r6.w, c8.y, r6.w
-rcp r7.y, r7.y
-(ss)mov.f32f32 r7.y, r7.y
-mad.f32 r6.y, c4.x, r7.z, r6.y
-mov.f32f32 r7.z, r8.x
-rcp r7.w, r7.w
-(ss)mov.f32f32 r7.w, r7.w
+(ss)mul.f r6.x, r6.x, r7.y
mad.f32 r6.z, c7.y, r8.y, r6.z
-mov.f32f32 r6.y, r6.y
-mad.f32 r8.x, r9.w, r1.z, r8.w
-mad.f32 r6.y, c4.y, r9.x, r6.y
-mov.f32f32 r6.z, r6.z
-rcp r7.z, r7.z
-(ss)mov.f32f32 r7.z, r7.z
-mad.f32 r6.z, c7.z, r9.y, r6.z
-mul.f r6.y, r6.y, r7.y
+(ss)rcp r7.y, r8.x
mad.f32 r6.w, c8.x, r8.z, r6.w
-mov.f32f32 r7.y, r8.x
-mul.f r6.z, r6.z, r7.w
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r6.w, r6.w
-absneg.f r7.w, (neg)r0.y
-mov.f32f32 r6.z, r6.z
-mad.f32 r8.x, r0.x, r6.y, c4.x
-mad.f32 r6.w, c8.w, r9.z, r6.w
-mad.f32 r7.y, r7.w, r0.y, r7.y
-mad.f32 r8.y, r0.x, r6.z, c8.x
-mul.f r8.x, r8.x, r8.x
-mad.f32 r8.z, r1.z, r6.y, c4.x
-mul.f r6.w, r6.w, r7.z
-mul.f r7.z, r8.y, r8.y
-mad.f32 r8.y, r1.z, r6.z, c8.y
-mad.f32 r8.x, r8.z, r8.z, r8.x
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r7.y, r7.y
-mad.f32 r7.z, r8.y, r8.y, r7.z
-mov.f32f32 r8.x, r8.x
-mad.f32 r8.y, r0.y, r6.y, c4.z
-mad.f32 r8.z, r0.x, r6.w, c10.x
-mov.f32f32 r7.z, r7.z
-mad.f32 r8.w, r0.y, r6.z, c8.z
-mad.f32 r8.x, r8.y, r8.y, r8.x
-mul.f r8.y, r8.z, r8.z
-mad.f32 r8.z, r1.z, r6.w, c10.y
-mad.f32 r7.z, r8.w, r8.w, r7.z
-mov.f32f32 r8.x, r8.x
-rcp r7.y, r7.y
-(ss)mov.f32f32 r7.y, r7.y
-mad.f32 r8.y, r8.z, r8.z, r8.y
-mul.f r7.x, c11.x, r7.x
-mov.f32f32 r7.z, r7.z
-mad.f32 r7.x, c11.y, r9.w, r7.x
-mov.f32f32 r8.y, r8.y
-sqrt r8.x, r8.x
-(ss)mov.f32f32 r8.x, r8.x
-mad.f32 r8.z, r0.y, r6.w, c10.z
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r8.w, r8.x
-mov.f32f32 r9.x, r8.x
-sqrt r7.z, r7.z
-mad.f32 r8.y, r8.z, r8.z, r8.y
-(ss)mov.f32f32 r7.z, r7.z
-mad.f32 r7.x, c11.z, r7.w, r7.x
-mul.f r7.w, r8.w, r9.x
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r8.z, r7.z
-mov.f32f32 r8.w, r7.z
-mov.f32f32 r7.w, r7.w
-mul.f r7.x, r7.x, r7.y
-cmps.f.ge r7.y, c6.z, r7.z
-cmps.f.ge r7.z, c3.w, r8.x
-add.f r7.w, c4.w, (neg)r7.w
-mul.f r8.x, r8.z, r8.w
-sqrt r8.y, r8.y
-(ss)mov.f32f32 r8.y, r8.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r8.z, r8.y
-mov.f32f32 r8.w, r8.y
-mad.f32 r9.x, r0.x, r7.x, c10.w
-add.f r8.x, c6.w, (neg)r8.x
-cmps.f.ge r8.y, c3.w, r8.y
-sqrt r7.w, r7.w
-(ss)mov.f32f32 r7.w, r7.w
-mul.f r8.z, r8.z, r8.w
-mov.f32f32 r8.x, r8.x
-mul.f r8.w, r9.x, r9.x
-add.f r6.y, r6.y, (neg)r7.w
-mov.f32f32 r7.w, r8.z
-mad.f32 r8.z, r1.z, r7.x, c10.x
-cov.u32f32 r8.y, r8.y
-mov.f32f32 r6.y, r6.y
-sqrt r8.x, r8.x
-add.f r7.w, c4.w, (neg)r7.w
-(ss)mov.f32f32 r8.x, r8.x
-mov.f32f32 r6.z, r6.z
-cmps.f.ge r9.x, r6.y, c4.x
-mov.f32f32 r9.y, r6.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r9.z, r6.y
-cov.u32f32 r9.x, r9.x
-cmps.f.ge r9.w, r6.y, c5.x
-mov.f32f32 r10.x, r0.x
-add.f r6.z, r6.z, (neg)r8.x
-mov.f32f32 r8.x, r1.z
-cov.u32f32 r9.w, r9.w
-mov.f32f32 r10.x, r10.x
-mov.f32f32 r6.z, r6.z
-sqrt r7.w, r7.w
-(ss)mov.f32f32 r7.w, r7.w
-mul.f r9.x, r9.x, r9.w
-mad.f32 r9.y, r10.x, r9.y, c5.y
-mov.f32f32 r9.w, r6.z
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r9.x, r9.x
-cmps.f.ge r10.x, c3.z, r6.y
-add.f r10.y, r9.y, c6.x
-mov.f32f32 r10.z, r0.x
-add.f r6.w, r6.w, (neg)r7.w
-cov.u32f32 r7.w, r10.x
-mul.f r10.x, r10.y, c5.w
-mov.f32f32 r10.y, r10.z
-mov.f32f32 r6.w, r6.w
-mul.f r7.w, r9.x, r7.w
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r9.x, r6.z
-mov.f32f32 r10.z, r6.w
-mov.f32f32 r7.w, r7.w
-mad.f32 r9.w, r10.y, r9.w, c5.y
-mov.f32f32 r10.y, r6.w
-mad.f32 r8.x, r8.x, r9.z, c5.y
-cmps.f.ne r7.w, r7.w, c4.x
-mov.f32f32 r9.z, r6.y
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r10.w, r0.y
-mov.f32f32 r11.x, c4.x
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r11.y, c3.z
-sel.b32 r10.x, r10.x, r7.w, r0.w
-add.f r11.z, r8.x, c6.x
-sel.b32 r9.y, r9.y, r7.w, r1.y
-sel.b32 r9.z, r9.z, r7.w, r11.y
-cov.u32f32 r7.z, r7.z
-mul.f r11.z, r11.z, c5.w
-sel.b32 r8.x, r8.x, r7.w, r0.z
-mov.f32f32 r10.w, r10.w
-cmps.f.ne r7.z, r7.z, c4.x
-sel.b32 r11.z, r11.z, r7.w, r1.x
-mov.f32f32 r11.w, c3.y
-mad.f32 r6.y, r10.w, r6.y, c5.z
-sel.b32 r9.z, r9.z, r7.z, r11.y
-sel.b32 r0.w, r10.x, r7.z, r0.w
-sel.b32 r1.x, r11.z, r7.z, r1.x
-sel.b32 r1.y, r9.y, r7.z, r1.y
-mov.f32f32 r9.y, r9.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-cmps.f.ge r9.y, r9.y, r6.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-cov.u32f32 r9.y, r9.y
-sel.b32 r0.z, r8.x, r7.z, r0.z
-sel.b32 r8.x, r6.y, r7.w, r2.x
-add.f r6.y, r6.y, c6.y
-mov.f32f32 r9.y, r9.y
-cmps.f.ge r10.x, r6.z, c4.x
-mov.f32f32 r0.z, r0.z
-sel.b32 r2.x, r8.x, r7.z, r2.x
-mul.f r6.y, r6.y, c5.w
-cov.u32f32 r8.x, r10.x
-cmps.f.ge r10.x, r6.z, c5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-sel.b32 r6.y, r6.y, r7.w, r1.w
-cov.u32f32 r10.x, r10.x
-sel.b32 r7.w, r11.x, r7.w, r11.w
-mov.f32f32 r9.z, r9.z
-sel.b32 r1.w, r6.y, r7.z, r1.w
-mul.f r6.y, r8.x, r10.x
-mov.f32f32 r2.x, r2.x
-sel.b32 r7.z, r7.w, r7.z, r11.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r6.y, r6.y
-add.f r7.w, r9.w, c9.x
-mov.f32f32 r8.x, r0.x
-mov.f32f32 r10.x, r1.z
-mul.f r6.y, r6.y, r9.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r7.z, r7.z
-nop
-mov.f32f32 r6.y, r6.y
-mul.f r7.w, r7.w, c7.w
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r9.y, r10.x
-cmps.f.ne r6.y, r6.y, c4.x
-mov.f32f32 r10.x, r6.z
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r10.w, r0.y
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r10.x, r10.x
-sel.b32 r7.w, r7.w, r6.y, r0.w
-mad.f32 r9.x, r9.y, r9.x, c5.y
-sel.b32 r9.y, r9.w, r6.y, r1.y
-sel.b32 r9.w, r10.x, r6.y, r9.z
-cov.u32f32 r7.y, r7.y
-add.f r10.x, r9.x, c9.y
-sel.b32 r9.x, r9.x, r6.y, r0.z
-mov.f32f32 r10.w, r10.w
-cmps.f.ne r7.y, r7.y, c4.x
-mul.f r10.x, r10.x, c7.w
-mov.f32f32 r11.x, c6.y
-mad.f32 r6.z, r10.w, r6.z, c5.z
-sel.b32 r9.z, r9.w, r7.y, r9.z
-sel.b32 r0.w, r7.w, r7.y, r0.w
-sel.b32 r7.w, r10.x, r6.y, r1.x
-sel.b32 r1.y, r9.y, r7.y, r1.y
-mov.f32f32 r9.y, r9.z
-mov.f32f32 r0.w, r0.w
-sel.b32 r1.x, r7.w, r7.y, r1.x
-mov.f32f32 r1.y, r1.y
-cmps.f.ge r7.w, r9.y, r6.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
+mad.f32 r6.z, c7.z, r8.w, r6.z
+mov.f32f32 r7.z, r6.x
+mad.f32 r6.w, c8.w, r9.x, r6.w
+rcp r6.y, r6.y
+mul.f r1.z, c11.x, r1.z
+mul.f r6.z, r6.z, r7.x
+mad.f32 r7.x, r5.z, r7.z, c4.x
+(ss)mul.f r6.w, r6.w, r7.y
+mad.f32 r1.z, c11.y, r2.x, r1.z
+mov.f32f32 r2.x, r6.z
+mul.f r7.x, r7.x, r7.x
+mad.f32 r7.y, r5.w, r7.z, c4.x
+mov.f32f32 r7.w, r6.w
+(ss)mad.f32 r8.x, r5.z, r2.x, c8.x
+mad.f32 r0.x, c11.z, r0.x, r1.z
+mad.f32 r1.z, r7.y, r7.y, r7.x
+mad.f32 r7.x, r5.x, r7.z, c4.z
+mul.f r7.y, r8.x, r8.x
+mad.f32 r7.z, r5.z, r7.w, c10.x
+mad.f32 r8.x, r5.w, r2.x, c8.y
+mad.f32 r1.z, r7.x, r7.x, r1.z
+mul.f r0.x, r0.x, r6.y
+mul.f r6.y, r7.z, r7.z
+mad.f32 r7.x, r8.x, r8.x, r7.y
+mad.f32 r7.y, r5.w, r7.w, c10.y
+mad.f32 r2.x, r5.x, r2.x, c8.z
+mov.f32f32 r7.z, r0.x
+sqrt r1.z, r1.z
+(ss)mov.f32f32 r8.x, r1.z
+mad.f32 r6.y, r7.y, r7.y, r6.y
+mad.f32 r2.x, r2.x, r2.x, r7.x
+mad.f32 r7.x, r5.x, r7.w, c10.z
+(ss)mul.f r1.z, r1.z, r8.x
+mad.f32 r7.y, r5.z, r7.z, c10.w
+cmps.f.ge r7.w, c3.w, r8.x
+mad.f32 r6.y, r7.x, r7.x, r6.y
+add.f r1.z, c4.w, (neg)r1.z
+sqrt r2.x, r2.x
+(ss)mov.f32f32 r7.x, r2.x
+mul.f r7.y, r7.y, r7.y
+mad.f32 r8.x, r5.w, r7.z, c10.x
cov.u32f32 r7.w, r7.w
-sel.b32 r0.z, r9.x, r7.y, r0.z
-sel.b32 r9.x, r6.z, r6.y, r2.x
-add.f r6.z, r6.z, c9.z
-mov.f32f32 r7.w, r7.w
-cmps.f.ge r9.y, r6.w, c4.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-sel.b32 r2.x, r9.x, r7.y, r2.x
-cov.u32f32 r9.x, r9.y
-cmps.f.ge r9.y, r6.w, c5.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mul.f r6.z, r6.z, c7.w
-cov.u32f32 r9.y, r9.y
-sel.b32 r9.w, r11.x, r6.y, r7.z
-mov.f32f32 r9.z, r9.z
-sel.b32 r6.y, r6.z, r6.y, r1.w
-mul.f r6.z, r9.x, r9.y
-mov.f32f32 r2.x, r2.x
-sel.b32 r7.z, r9.w, r7.y, r7.z
-sel.b32 r1.w, r6.y, r7.y, r1.w
-mov.f32f32 r6.y, r6.z
-mad.f32 r6.z, r8.x, r10.y, c5.y
+cmps.f.ge r8.y, c6.z, r7.x
+(ss)mul.f r2.x, r2.x, r7.x
+sqrt r1.z, r1.z
+(ss)add.f r1.z, r6.x, (neg)r1.z
+sqrt r6.x, r6.y
+(ss)mov.f32f32 r6.y, r6.x
+mad.f32 r7.x, r8.x, r8.x, r7.y
+add.f r2.x, c6.w, (neg)r2.x
mov.f32f32 r7.y, r1.z
-mov.f32f32 r8.x, r6.w
-mul.f r6.y, r6.y, r7.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r7.z, r7.z
-nop
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r7.z, r7.z
-add.f r7.w, r6.z, c9.y
-cmps.f.ne r6.y, r6.y, c4.x
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r9.x, r0.y
-mov.f32f32 r9.y, c5.w
-mov.f32f32 r6.w, r6.w
-mul.f r7.w, r7.w, c5.w
-mad.f32 r7.y, r7.y, r10.z, c5.y
-sel.b32 r6.z, r6.z, r6.y, r1.y
-sel.b32 r6.w, r6.w, r6.y, r9.z
-cmps.f.ne r8.y, r8.y, c4.x
-sel.b32 r7.w, r7.w, r6.y, r0.w
-add.f r9.w, r7.y, c9.w
-sel.b32 r7.y, r7.y, r6.y, r0.z
-sel.b32 r6.w, r6.w, r8.y, r9.z
-sel.b32 r0.w, r7.w, r8.y, r0.w
-mul.f r7.w, r9.w, c5.w
-sel.b32 r1.y, r6.z, r8.y, r1.y
-mov.f32f32 r6.z, r6.w
-mad.f32 r6.w, r8.z, r8.z, r8.w
-mov.f32f32 r0.w, r0.w
-sel.b32 r7.w, r7.w, r6.y, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r6.w, r6.w
-mad.f32 r8.z, r0.y, r7.x, c10.z
-mov.f32f32 r0.w, r0.w
-sel.b32 r1.x, r7.w, r8.y, r1.x
+mul.f r6.x, r6.x, r6.y
+mad.f32 r7.z, r5.x, r7.z, c10.z
+cmps.f.ge r6.y, c3.w, r6.y
+cmps.f.ge r8.x, r7.y, c4.x
+mad.f32 r8.z, r5.z, r7.y, c5.y
+sqrt r2.x, r2.x
+add.f r6.x, c4.w, (neg)r6.x
+(ss)add.f r2.x, r6.z, (neg)r2.x
+cov.u32f32 r6.z, r8.x
+cmps.f.ge r8.x, r7.y, c5.x
+add.f r8.w, r8.z, c6.x
+mov.f32f32 r9.x, r2.x
+mad.f32 r9.y, r5.w, r7.y, c5.y
+cov.u32f32 r8.x, r8.x
+mul.f r8.w, r8.w, c5.w
+mad.f32 r9.z, r5.z, r9.x, c5.y
+sqrt r6.x, r6.x
+(ss)add.f r6.x, r6.w, (neg)r6.x
+mul.f r6.z, r6.z, r8.x
+cmps.f.ge r6.w, c3.z, r7.y
+add.f r8.x, r9.z, c9.x
+mov.f32f32 r9.w, r6.x
+add.f r10.x, r9.y, c6.x
+cov.u32f32 r6.w, r6.w
+mul.f r8.x, r8.x, c7.w
+mad.f32 r10.y, r5.z, r9.w, c5.y
+mul.f r10.x, r10.x, c5.w
+mul.f r6.z, r6.z, r6.w
+mad.f32 r6.w, r5.w, r9.x, c5.y
+mad.f32 r10.z, r5.w, r9.w, c5.y
+mad.f32 r7.y, r5.x, r7.y, c5.z
+cmps.f.ne r6.z, r6.z, c4.x
+mov.f32f32 r10.w, c3.z
+mov.f32f32 r11.x, c4.x
+add.f r11.y, r7.y, c6.y
+mov.f32f32 r11.z, c3.y
+sel.b32 r1.z, r1.z, r6.z, r10.w
+cmps.f.ne r7.w, r7.w, c4.x
+sel.b32 r0.w, r8.w, r6.z, r0.w
+sel.b32 r1.x, r10.x, r6.z, r1.x
+sel.b32 r1.y, r8.z, r6.z, r1.y
+sel.b32 r1.z, r1.z, r7.w, r10.w
+sel.b32 r0.y, r0.w, r7.w, r0.y
+sel.b32 r0.w, r1.x, r7.w, r2.y
+sel.b32 r0.z, r1.y, r7.w, r0.z
+cmps.f.ge r1.x, r1.z, r9.x
+mov.f32f32 r1.y, r0.y
+mov.f32f32 r2.y, r0.w
+mov.f32f32 r8.z, r0.z
+cov.u32f32 r1.x, r1.x
+cmps.f.ge r8.w, r9.x, c4.x
mov.f32f32 r1.y, r1.y
-mad.f32 r6.w, r8.z, r8.z, r6.w
-sel.b32 r0.z, r7.y, r8.y, r0.z
-mov.f32f32 r7.y, r9.x
-sel.b32 r7.w, r9.y, r6.y, r7.z
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-mad.f32 r7.y, r7.y, r8.x, c5.z
-sel.b32 r7.z, r7.w, r8.y, r7.z
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r7.w, r0.x
-sqrt r6.w, r6.w
-(ss)mov.f32f32 r6.w, r6.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-sel.b32 r8.x, r7.y, r6.y, r2.x
-mov.f32f32 r8.z, r6.w
-mov.f32f32 r8.w, r6.w
-add.f r7.y, r7.y, c9.z
-mov.f32f32 r7.z, r7.z
-sel.b32 r2.x, r8.x, r8.y, r2.x
-mul.f r8.x, r8.z, r8.w
-mul.f r7.y, r7.y, c5.w
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r8.x, r8.x
-sel.b32 r6.y, r7.y, r6.y, r1.w
-cmps.f.ge r6.w, c6.y, r6.w
-mov.f32f32 r7.y, r7.w
-add.f r7.w, c6.y, (neg)r8.x
+mov.f32f32 r2.y, r2.y
+mov.f32f32 r8.z, r8.z
+cov.u32f32 r8.w, r8.w
+cmps.f.ge r10.x, r9.x, c5.x
+sel.b32 r2.z, r9.y, r6.z, r2.z
+sel.b32 r3.x, r7.y, r6.z, r3.x
+mul.f r7.y, r11.y, c5.w
+cov.u32f32 r9.y, r10.x
+sel.b32 r2.z, r2.z, r7.w, r2.w
+sel.b32 r2.w, r3.x, r7.w, r3.w
+sel.b32 r3.x, r7.y, r6.z, r3.y
+mul.f r3.y, r8.w, r9.y
+mov.f32f32 r3.w, r2.z
+mov.f32f32 r7.y, r2.w
+sel.b32 r3.x, r3.x, r7.w, r4.x
+mul.f r1.x, r3.y, r1.x
+mov.f32f32 r3.y, r3.w
+mov.f32f32 r3.w, r7.y
+mov.f32f32 r4.x, r3.x
+cmps.f.ne r1.x, r1.x, c4.x
+add.f r7.y, r6.w, c9.y
+mad.f32 r8.w, r5.x, r9.x, c5.z
+sel.b32 r6.z, r11.x, r6.z, r11.z
+sel.b32 r2.x, r2.x, r1.x, r1.z
+mov.f32f32 r1.z, r1.z
+cov.u32f32 r8.y, r8.y
+sel.b32 r0.y, r8.x, r1.x, r0.y
+mul.f r7.y, r7.y, c7.w
+sel.b32 r0.z, r9.z, r1.x, r0.z
+cmps.f.ne r8.x, r8.y, c4.x
+sel.b32 r2.z, r6.w, r1.x, r2.z
+sel.b32 r2.w, r8.w, r1.x, r2.w
+add.f r6.w, r8.w, c9.z
+sel.b32 r1.z, r2.x, r8.x, r1.z
+sel.b32 r0.y, r0.y, r8.x, r1.y
+sel.b32 r0.w, r7.y, r1.x, r0.w
+sel.b32 r0.z, r0.z, r8.x, r8.z
+cmps.f.ge r1.y, r1.z, r9.w
+mov.f32f32 r2.x, r0.y
+sel.b32 r0.w, r0.w, r8.x, r2.y
+mov.f32f32 r2.y, r0.z
+cov.u32f32 r1.y, r1.y
+cmps.f.ge r7.y, r9.w, c4.x
mov.f32f32 r2.x, r2.x
-sel.b32 r1.w, r6.y, r8.y, r1.w
-cov.u32f32 r6.y, r6.w
-mov.f32f32 r6.w, r7.w
-mov.f32f32 r7.w, r1.z
-mov.f32f32 r8.x, r0.y
-mov.f32f32 r8.y, r0.x
-mov.f32f32 r8.z, r1.z
-mov.f32f32 r0.x, r0.x
+mov.f32f32 r8.y, r0.w
+mov.f32f32 r2.y, r2.y
+cov.u32f32 r7.y, r7.y
+cmps.f.ge r8.z, r9.w, c5.x
+mov.f32f32 r8.y, r8.y
+sel.b32 r2.z, r2.z, r8.x, r3.y
+sel.b32 r2.w, r2.w, r8.x, r3.w
+cov.u32f32 r3.y, r8.z
+mul.f r3.w, r6.w, c7.w
+sel.b32 r6.z, r6.z, r7.w, r11.z
+mov.f32f32 r6.w, r2.z
+mul.f r3.y, r7.y, r3.y
+mov.f32f32 r7.y, r2.w
+sel.b32 r3.x, r3.w, r1.x, r3.x
+mov.f32f32 r3.w, c6.y
+mul.f r1.y, r3.y, r1.y
+mov.f32f32 r3.y, r6.w
+mov.f32f32 r6.w, r7.y
+mov.f32f32 r4.x, r4.x
+cmps.f.ne r1.y, r1.y, c4.x
+add.f r7.y, r10.y, c9.y
+add.f r7.w, r10.z, c9.w
+mad.f32 r8.z, r5.x, r9.w, c5.z
+sel.b32 r6.x, r6.x, r1.y, r1.z
mov.f32f32 r1.z, r1.z
-sqrt r6.w, r6.w
-(ss)mov.f32f32 r6.w, r6.w
-mov.f32f32 r1.w, r1.w
+cov.u32f32 r6.y, r6.y
+mul.f r7.y, r7.y, c5.w
+mul.f r7.w, r7.w, c5.w
+sel.b32 r0.z, r10.y, r1.y, r0.z
cmps.f.ne r6.y, r6.y, c4.x
-mov.f32f32 r7.w, r7.w
-add.f r6.w, r7.x, (neg)r6.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r7.x, r8.x
-mov.f32f32 r8.x, r0.y
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r8.w, c12.w
-mov.f32f32 r9.x, c12.z
-cmps.f.ge r6.z, r6.z, r6.w
-cmps.f.ge r9.y, r6.w, c4.x
-mov.f32f32 r9.z, r6.w
-mov.f32f32 r9.w, r6.w
-cov.u32f32 r6.z, r6.z
-cov.u32f32 r9.y, r9.y
-mad.f32 r7.y, r7.y, r9.z, c5.y
-mad.f32 r7.w, r7.w, r9.w, c5.y
+sel.b32 r0.y, r7.y, r1.y, r0.y
+sel.b32 r0.w, r7.w, r1.y, r0.w
+sel.b32 r2.z, r10.z, r1.y, r2.z
+sel.b32 r1.z, r6.x, r6.y, r1.z
+mad.f32 r6.x, r7.z, r7.z, r7.x
+sel.b32 r0.y, r0.y, r6.y, r2.x
+sel.b32 r0.w, r0.w, r6.y, r8.y
+sel.b32 r0.z, r0.z, r6.y, r2.y
+sel.b32 r2.x, r2.z, r6.y, r3.y
+sel.b32 r2.y, r8.z, r1.y, r2.w
+sel.b32 r2.z, r3.x, r8.x, r4.x
+sqrt r2.w, r6.x
+(ss)mov.f32f32 r3.x, r2.w
+mov.f32f32 r3.y, r0.y
+mov.f32f32 r4.x, r0.w
+(ss)mov.f32f32 r6.x, r0.z
+mul.f r2.w, r2.w, r3.x
+mov.f32f32 r3.y, r3.y
+mov.f32f32 r4.x, r4.x
+mov.f32f32 r6.x, r6.x
+add.f r2.w, c6.y, (neg)r2.w
+mov.f32f32 r7.x, r2.x
+sel.b32 r2.y, r2.y, r6.y, r6.w
+add.f r6.w, r8.z, c9.z
+sel.b32 r1.x, r3.w, r1.x, r6.z
+mov.f32f32 r3.w, r6.z
+mov.f32f32 r6.z, r2.z
+sqrt r2.w, r2.w
+(ss)add.f r0.x, r0.x, (neg)r2.w
+(ss)mov.f32f32 r2.w, r7.x
+mov.f32f32 r7.x, r2.y
+mul.f r6.w, r6.w, c5.w
+mov.f32f32 r7.y, r0.x
+mov.f32f32 r3.w, r3.w
mov.f32f32 r6.z, r6.z
-cmps.f.ge r9.z, r6.w, c5.x
-add.f r9.w, r7.y, c12.x
-add.f r10.x, r7.w, c12.y
-mov.f32f32 r6.w, r6.w
+cmps.f.ge r0.x, r0.x, c4.x
+cmps.f.ge r1.z, r1.z, r7.y
+mov.f32f32 r7.x, r7.x
+sel.b32 r2.z, r6.w, r1.y, r2.z
+sel.b32 r1.x, r1.x, r8.x, r3.w
+cov.u32f32 r1.z, r1.z
+cmps.f.ge r3.w, r7.y, c5.x
+sel.b32 r2.z, r2.z, r6.y, r6.z
+mov.f32f32 r6.z, c5.w
+mov.f32f32 r6.w, r1.x
+cov.u32f32 r3.w, r3.w
+cov.u32f32 r0.x, r0.x
+mov.f32f32 r7.z, r2.z
+sel.b32 r1.x, r6.z, r1.y, r1.x
+mov.f32f32 r1.y, r6.w
+mul.f r0.x, r0.x, r3.w
+mov.f32f32 r3.w, r7.z
+mad.f32 r6.z, r5.z, r7.y, c5.y
+mad.f32 r6.w, r5.w, r7.y, c5.y
+mul.f r0.x, r0.x, r1.z
+sel.b32 r1.x, r1.x, r6.y, r1.y
+add.f r1.y, r6.z, c12.x
+add.f r1.z, r6.w, c12.y
+cmps.f.ne r0.x, r0.x, c4.x
+mov.f32f32 r6.y, c5.z
+mad.f32 r7.y, r5.x, r7.y, c5.z
+mov.f32f32 r7.z, r1.x
+sel.b32 r0.y, r1.y, r0.x, r0.y
+cmps.f.ge r1.y, c6.y, r3.x
+sel.b32 r0.z, r6.z, r0.x, r0.z
+sel.b32 r1.x, r6.y, r0.x, r1.x
+mov.f32f32 r3.x, r7.z
+cov.u32f32 r1.y, r1.y
+sel.b32 r2.x, r6.w, r0.x, r2.x
+sel.b32 r2.y, r7.y, r0.x, r2.y
+sel.b32 r0.w, r1.z, r0.x, r0.w
+cmps.f.ne r1.y, r1.y, c4.x
+add.f r1.z, r7.y, c12.z
+mov.f32f32 r6.y, c12.w
+mov.f32f32 r6.z, c12.w
+sel.b32 r0.y, r0.y, r1.y, r3.y
+sel.b32 r0.z, r0.z, r1.y, r6.x
+sel.b32 r1.x, r1.x, r1.y, r3.x
+sel.b32 r2.x, r2.x, r1.y, r2.w
+mul.f r2.w, r0.y, r5.z
+sel.b32 r0.w, r0.w, r1.y, r4.x
+add.f r3.x, c15.x, (neg)r0.z
+mov.f32f32 r3.y, r1.x
+add.f r4.x, c5.y, (neg)r0.z
+mad.f32 r2.w, r0.w, r5.w, r2.w
+sel.b32 r0.x, r1.z, r0.x, r2.z
+mul.f r1.z, r3.x, r3.x
+add.f r2.z, c15.y, (neg)r2.x
+mov.f32f32 r6.x, r3.y
+sel.b32 r0.x, r0.x, r1.y, r3.w
+mov.f32f32 r3.w, r4.x
+mad.f32 r1.z, r2.z, r2.z, r1.z
+sel.b32 r1.y, r2.y, r1.y, r7.x
+mad.f32 r2.y, r0.x, r5.x, r2.w
+cmps.f.eq r2.w, r6.x, c5.z
+mul.f r4.x, r4.x, r3.w
+add.f r6.x, c5.y, (neg)r2.x
+mul.f r6.w, r2.y, r0.y
+add.f r7.x, c15.z, (neg)r1.y
+cov.u32f32 r2.w, r2.w
+mov.f32f32 r7.y, r6.x
+mul.f r6.w, c5.w, r6.w
+mad.f32 r1.z, r7.x, r7.x, r1.z
+cmps.f.ne r2.w, r2.w, c4.x
+cmps.f.eq r7.z, r1.x, c4.x
+add.f r5.z, r5.z, (neg)r6.w
+mad.f32 r4.x, r6.x, r7.y, r4.x
+add.f r6.x, c5.z, (neg)r1.y
+mul.f r6.w, r2.y, r0.w
+mov.f32f32 r7.w, r5.z
+absneg.f r5.z, (neg)r5.z
+rsq r1.z, r1.z
+(ss)mov.f32f32 r8.x, r1.z
+cov.u32f32 r7.z, r7.z
+absneg.f r8.y, (neg)r7.w
+absneg.f r8.z, (neg)r7.w
+absneg.f r8.w, (neg)r7.w
+mul.f r9.x, r5.z, r7.w
+mul.f r9.y, r8.y, r7.w
+mul.f r6.w, c5.w, r6.w
+mul.f r9.z, r8.z, r7.w
+mul.f r9.w, r8.w, r7.w
+mul.f r3.x, r3.x, r8.x
+add.f r5.w, r5.w, (neg)r6.w
+add.f r10.x, c11.y, (neg)r0.z
+add.f r10.y, c14.x, (neg)r0.z
+add.f r10.z, c14.y, (neg)r0.z
+mov.f32f32 r10.w, r5.w
+absneg.f r5.w, (neg)r5.w
+mov.f32f32 r11.x, r3.x
+mul.f r3.x, r0.y, r3.x
+absneg.f r11.y, (neg)r10.w
+absneg.f r11.z, (neg)r10.w
+absneg.f r11.w, (neg)r10.w
+mad.f32 r9.x, r5.w, r10.w, r9.x
+mad.f32 r9.y, r11.y, r10.w, r9.y
+mul.f r2.y, r2.y, r0.x
+mad.f32 r9.z, r11.z, r10.w, r9.z
+mad.f32 r9.w, r11.w, r10.w, r9.w
+absneg.f r11.x, (neg)r11.x
+mul.f r2.y, c5.w, r2.y
+mul.f r2.z, r2.z, r8.x
+mul.f r8.x, r10.x, r8.y
+mul.f r8.y, r10.y, r8.z
+add.f r5.x, r5.x, (neg)r2.y
+mul.f r8.z, r0.y, r11.x
+mov.f32f32 r10.x, r2.z
+mad.f32 r2.z, r0.w, r2.z, r3.x
+mov.f32f32 r3.x, r5.x
+absneg.f r5.x, (neg)r5.x
+absneg.f r10.x, (neg)r10.x
+(ss)mul.f r1.z, r7.x, r1.z
+absneg.f r7.x, (neg)r3.x
+absneg.f r10.y, (neg)r3.x
+absneg.f r12.x, (neg)r3.x
+mad.f32 r9.x, r5.x, r3.x, r9.x
+mad.f32 r9.y, r7.x, r3.x, r9.y
+mad.f32 r9.z, r10.y, r3.x, r9.z
+mad.f32 r9.w, r12.x, r3.x, r9.w
+mad.f32 r8.z, r0.w, r10.x, r8.z
+mov.f32f32 r12.y, r1.z
+mad.f32 r1.z, r0.x, r1.z, r2.z
+add.f r2.z, c11.y, (neg)r2.x
+rcp r9.y, r9.y
+nop
+rcp r9.x, r9.x
+add.f r12.z, c11.x, (neg)r0.z
+rcp r9.z, r9.z
+add.f r12.w, c14.y, (neg)r2.x
+mad.f32 r2.z, r2.z, r11.y, r8.x
+add.f r8.x, c11.w, (neg)r1.y
+rcp r9.w, r9.w
+mul.f r8.w, r10.z, r8.w
+mad.f32 r8.y, r12.w, r11.z, r8.y
+add.f r10.z, c14.z, (neg)r1.y
+mad.f32 r2.z, r8.x, r7.x, r2.z
+add.f r7.x, c14.w, (neg)r2.x
+mul.f r5.z, r12.z, r5.z
+add.f r8.x, c11.y, (neg)r2.x
+(ss)mul.f r2.z, r2.z, r9.y
+mad.f32 r8.y, r10.z, r10.y, r8.y
+mad.f32 r7.x, r7.x, r11.w, r8.w
+add.f r8.w, c14.z, (neg)r1.y
+(ss)mov.f32f32 r9.y, r2.z
+mul.f r8.y, r8.y, r9.z
+mad.f32 r5.z, r8.x, r5.w, r5.z
+add.f r5.w, c11.w, (neg)r1.y
+mad.f32 r8.x, r7.w, r9.y, r0.z
+mov.f32f32 r9.z, r8.y
+mad.f32 r7.x, r8.w, r12.x, r7.x
+mad.f32 r5.x, r5.w, r5.x, r5.z
+add.f r5.z, r8.x, c6.x
+mad.f32 r5.w, r7.w, r9.z, r0.z
+mul.f r7.x, r7.x, r9.w
+mul.f r5.x, r5.x, r9.x
+mov.f32f32 r8.x, r5.z
+add.f r5.w, r5.w, c9.x
+mov.f32f32 r8.w, r7.x
+mov.f32f32 r9.x, r5.x
+mul.f r5.z, r5.z, r8.x
+mad.f32 r8.x, r10.w, r9.y, r2.x
+mov.f32f32 r9.w, r5.w
+mad.f32 r10.y, r7.w, r8.w, r0.z
+mad.f32 r10.z, r7.w, r9.x, r0.z
+add.f r8.x, r8.x, c6.x
+mul.f r5.w, r5.w, r9.w
+add.f r9.w, r10.y, c9.y
+mad.f32 r10.y, r10.w, r9.z, r2.x
+mov.f32f32 r11.y, r8.x
+add.f r10.z, r10.z, c12.x
+mov.f32f32 r11.z, r9.w
+add.f r10.y, r10.y, c9.y
+mad.f32 r5.z, r8.x, r11.y, r5.z
+mad.f32 r8.x, r3.x, r9.y, r1.y
+mul.f r9.y, r9.w, r11.z
+mov.f32f32 r9.w, r10.y
+mad.f32 r11.y, r10.w, r8.w, r2.x
+add.f r8.x, r8.x, c6.y
+mov.f32f32 r11.z, r10.z
+mad.f32 r5.w, r10.y, r9.w, r5.w
+add.f r9.w, r11.y, c9.w
+mov.f32f32 r10.y, r8.x
+mad.f32 r9.z, r3.x, r9.z, r1.y
+mul.f r10.z, r10.z, r11.z
+mad.f32 r11.y, r10.w, r9.x, r2.x
+mad.f32 r5.z, r8.x, r10.y, r5.z
+add.f r8.x, r9.z, c9.z
+mov.f32f32 r9.z, r9.w
+add.f r10.y, r11.y, c12.y
+absneg.f r11.y, (neg)r12.y
+mov.f32f32 r11.z, r8.x
+mad.f32 r9.y, r9.w, r9.z, r9.y
+sqrt r5.z, r5.z
+(ss)mov.f32f32 r9.z, r5.z
+mad.f32 r8.w, r3.x, r8.w, r1.y
+mad.f32 r5.w, r8.x, r11.z, r5.w
+mov.f32f32 r8.x, r10.y
+(ss)mul.f r5.z, r5.z, r9.z
+add.f r8.w, r8.w, c9.z
+cmps.f.ge r9.z, c3.w, r9.z
+mad.f32 r8.x, r10.y, r8.x, r10.z
+add.f r5.z, c4.w, (neg)r5.z
+sqrt r5.w, r5.w
+mov.f32f32 r9.w, r8.w
+(ss)mov.f32f32 r10.y, r5.w
+mad.f32 r9.x, r3.x, r9.x, r1.y
cov.u32f32 r9.z, r9.z
-mov.f32f32 r10.y, c12.w
-mov.f32f32 r10.z, c12.w
-mov.f32f32 r10.w, c13.z
-mul.f r9.y, r9.y, r9.z
-mad.f32 r6.w, r7.x, r6.w, c5.z
-(rpt1)nop
-mov.f32f32 r7.x, r9.y
-add.f r9.y, r6.w, c12.z
-mov.f32f32 r9.z, c13.y
-mov.f32f32 r11.x, c12.z
-mul.f r6.z, r7.x, r6.z
-mov.f32f32 r7.x, c12.w
-mov.f32f32 r11.y, c12.z
-mov.f32f32 r11.z, c12.w
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r11.w, c12.z
-mov.f32f32 r12.x, c12.w
-mov.f32f32 r12.y, c12.z
-cmps.f.ne r6.z, r6.z, c4.x
-mov.f32f32 r12.z, c5.z
-mov.f32f32 r12.w, c12.w
-mov.f32f32 r13.x, c13.y
-sel.b32 r9.w, r9.w, r6.z, r0.w
-sel.b32 r7.y, r7.y, r6.z, r1.y
-sel.b32 r12.z, r12.z, r6.z, r7.z
-sel.b32 r7.w, r7.w, r6.z, r0.z
-sel.b32 r0.w, r9.w, r6.y, r0.w
-sel.b32 r1.y, r7.y, r6.y, r1.y
-sel.b32 r7.y, r12.z, r6.y, r7.z
-sel.b32 r0.z, r7.w, r6.y, r0.z
-mov.f32f32 r7.z, r0.w
-mov.f32f32 r7.w, r1.y
-mov.f32f32 r9.w, r7.y
-mov.f32f32 r12.z, r1.y
-mul.f r0.x, r7.z, r0.x
-sel.b32 r7.z, r10.x, r6.z, r1.x
-add.f r7.w, c15.x, (neg)r7.w
-mov.f32f32 r10.x, r9.w
-add.f r12.z, c5.y, (neg)r12.z
-sel.b32 r1.x, r7.z, r6.y, r1.x
-mul.f r7.z, r7.w, r7.w
-mov.f32f32 r13.y, r0.z
-mov.f32f32 r13.z, r10.x
-mov.f32f32 r13.w, r1.x
-mov.f32f32 r12.z, r12.z
-add.f r13.y, c15.y, (neg)r13.y
-mov.f32f32 r13.z, r13.z
-mad.f32 r0.x, r13.w, r1.z, r0.x
-mul.f r1.z, r12.z, r12.z
-mov.f32f32 r13.w, r0.z
-mad.f32 r7.z, r13.y, r13.y, r7.z
-mov.f32f32 r0.x, r0.x
-sel.b32 r9.y, r9.y, r6.z, r1.w
-cmps.f.eq r13.z, r13.z, c5.z
-mov.f32f32 r7.z, r7.z
-sel.b32 r6.z, r6.w, r6.z, r2.x
-sel.b32 r1.w, r9.y, r6.y, r1.w
-cov.u32f32 r6.w, r13.z
-add.f r9.y, c5.y, (neg)r13.w
-sel.b32 r2.x, r6.z, r6.y, r2.x
-mov.f32f32 r6.y, r1.w
-mov.f32f32 r6.z, r6.w
-mov.f32f32 r6.w, r9.y
-mov.f32f32 r9.y, r2.x
-mad.f32 r0.x, r6.y, r8.x, r0.x
-mov.f32f32 r6.y, r0.w
-mov.f32f32 r8.x, r1.x
-mov.f32f32 r13.z, r1.w
-cmps.f.ne r6.z, r6.z, c4.x
-mul.f r6.y, r0.x, r6.y
-add.f r9.y, c15.z, (neg)r9.y
-mul.f r8.x, r0.x, r8.x
-mul.f r0.x, r0.x, r13.z
-mov.f32f32 r6.y, r6.y
-mad.f32 r7.z, r9.y, r9.y, r7.z
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r0.x, r0.x
-mul.f r6.y, c5.w, r6.y
-mov.f32f32 r13.z, r7.y
-mul.f r8.x, c5.w, r8.x
-mul.f r0.x, c5.w, r0.x
-mov.f32f32 r6.y, r6.y
-rsq r7.z, r7.z
-(ss)mov.f32f32 r7.z, r7.z
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r0.x, r0.x
-add.f r6.y, r8.y, (neg)r6.y
-mul.f r7.w, r7.w, r7.z
-add.f r8.y, r8.z, (neg)r8.x
-add.f r0.y, r0.y, (neg)r0.x
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r8.z, r0.w
-mov.f32f32 r8.y, r8.y
-absneg.f r13.w, (neg)r6.y
-absneg.f r14.x, (neg)r6.y
-absneg.f r14.y, (neg)r6.y
-absneg.f r14.z, (neg)r6.y
-mul.f r14.w, r13.w, r6.y
-absneg.f r15.x, (neg)r8.y
-mul.f r15.y, r14.x, r6.y
-mul.f r15.z, r14.y, r6.y
-absneg.f r15.w, (neg)r8.y
-mad.f32 r14.w, r15.x, r8.y, r14.w
-absneg.f r16.x, (neg)r8.y
-mul.f r16.y, r14.z, r6.y
-mad.f32 r15.y, r15.w, r8.y, r15.y
-mov.f32f32 r14.w, r14.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r15.z, r16.x, r8.y, r15.z
-mov.f32f32 r15.y, r15.y
-absneg.f r16.z, (neg)r8.y
-absneg.f r16.w, (neg)r0.y
-absneg.f r17.x, (neg)r0.y
-mov.f32f32 r15.z, r15.z
-absneg.f r17.y, (neg)r0.y
-mad.f32 r14.w, r16.w, r0.y, r14.w
-mad.f32 r15.y, r17.x, r0.y, r15.y
-mad.f32 r16.y, r16.z, r8.y, r16.y
-mad.f32 r15.z, r17.y, r0.y, r15.z
-mov.f32f32 r14.w, r14.w
-mov.f32f32 r15.y, r15.y
-mov.f32f32 r16.y, r16.y
-mov.f32f32 r15.z, r15.z
-absneg.f r17.z, (neg)r0.y
-absneg.f r17.w, (neg)r7.w
-mov.f32f32 r18.x, r0.w
-rcp r14.w, r14.w
-(ss)mov.f32f32 r14.w, r14.w
-mov.f32f32 r18.y, r1.y
-rcp r15.y, r15.y
-(ss)mov.f32f32 r15.y, r15.y
-rcp r15.z, r15.z
-mov.f32f32 r18.z, r1.y
-(ss)mov.f32f32 r15.z, r15.z
-add.f r18.y, c11.y, (neg)r18.y
-mov.f32f32 r18.w, r1.y
-add.f r18.z, c14.x, (neg)r18.z
-mad.f32 r16.y, r17.z, r0.y, r16.y
-mul.f r13.w, r18.y, r13.w
-mov.f32f32 r18.y, r0.z
-mul.f r14.x, r18.z, r14.x
-add.f r18.z, c14.y, (neg)r18.w
-mov.f32f32 r18.w, r0.z
-add.f r18.y, c11.y, (neg)r18.y
-mov.f32f32 r16.y, r16.y
-mul.f r14.y, r18.z, r14.y
-add.f r18.z, c14.y, (neg)r18.w
-mad.f32 r13.w, r18.y, r15.x, r13.w
-mov.f32f32 r15.x, r0.z
-mul.f r18.x, r18.x, r17.w
-mad.f32 r14.x, r18.z, r15.w, r14.x
-mov.f32f32 r13.w, r13.w
-mov.f32f32 r15.w, r2.x
-add.f r15.x, c14.w, (neg)r15.x
-mov.f32f32 r14.x, r14.x
-mov.f32f32 r18.y, r2.x
-add.f r15.w, c11.w, (neg)r15.w
-mad.f32 r14.y, r15.x, r16.x, r14.y
-rcp r15.x, r16.y
-(ss)mov.f32f32 r15.x, r15.x
-add.f r16.x, c14.z, (neg)r18.y
-mad.f32 r13.w, r15.w, r16.w, r13.w
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r15.w, r2.x
-mad.f32 r14.x, r16.x, r17.x, r14.x
-mov.f32f32 r13.w, r13.w
-mov.f32f32 r16.x, r1.y
-add.f r15.w, c14.z, (neg)r15.w
-mov.f32f32 r14.x, r14.x
-mul.f r13.w, r13.w, r14.w
-add.f r14.w, c11.x, (neg)r16.x
-mad.f32 r14.y, r15.w, r17.y, r14.y
-mul.f r14.x, r14.x, r15.y
-mov.f32f32 r13.w, r13.w
-mov.f32f32 r15.y, r1.y
-mov.f32f32 r14.y, r14.y
+mad.f32 r8.w, r8.w, r9.w, r9.y
+(ss)mul.f r5.w, r5.w, r10.y
+sqrt r5.z, r5.z
+(ss)add.f r2.z, r2.z, (neg)r5.z
+(ss)add.f r5.z, r9.x, c12.z
+cmps.f.ge r9.x, c6.z, r10.y
+cmps.f.ne r9.y, r9.z, c4.x
+mov.f32f32 r9.z, r2.z
+add.f r5.w, c6.w, (neg)r5.w
+sqrt r8.w, r8.w
+(ss)mov.f32f32 r9.w, r8.w
+mov.f32f32 r10.y, r5.z
+cmps.f.ge r10.z, r9.z, c4.x
+mad.f32 r11.z, r7.w, r9.z, r0.z
+(ss)mul.f r8.w, r8.w, r9.w
+mad.f32 r11.w, r10.w, r9.z, r2.x
+cov.u32f32 r10.z, r10.z
+cmps.f.ge r12.x, r9.z, c5.x
+add.f r12.y, r11.z, c6.x
+sqrt r5.w, r5.w
+add.f r8.w, c4.w, (neg)r8.w
+(ss)add.f r5.w, r8.y, (neg)r5.w
+cov.u32f32 r8.y, r12.x
+mul.f r12.x, r12.y, c5.w
+add.f r12.y, r11.w, c6.x
+mov.f32f32 r12.z, r5.w
+mul.f r8.y, r10.z, r8.y
+cmps.f.ge r10.z, c3.z, r9.z
+sqrt r8.w, r8.w
+mul.f r12.y, r12.y, c5.w
+mad.f32 r12.w, r10.w, r12.z, r2.x
+mad.f32 r13.x, r7.w, r12.z, r0.z
+cov.u32f32 r10.z, r10.z
+(ss)add.f r7.x, r7.x, (neg)r8.w
+(ss)add.f r8.w, r12.w, c9.y
+mad.f32 r9.z, r3.x, r9.z, r1.y
+mul.f r8.y, r8.y, r10.z
+add.f r10.z, r13.x, c9.x
+mov.f32f32 r13.y, r7.x
+mul.f r13.z, r8.w, c7.w
+cmps.f.ne r8.y, r8.y, c4.x
+mov.f32f32 r13.w, c3.z
+add.f r14.w, r9.z, c6.y
+mov.f32f32 r15.z, c4.x
+sel.b32 r12.x, r12.x, r8.y, r14.x
+sel.b32 r2.z, r2.z, r8.y, r13.w
mov.f32f32 r14.x, r14.x
-mad.f32 r15.y, r6.y, r13.w, r15.y
-mov.f32f32 r15.w, r1.y
-mul.f r14.y, r14.y, r15.z
-mad.f32 r15.z, r6.y, r14.x, r15.w
-add.f r15.y, r15.y, c6.x
-mul.f r14.z, r14.w, r14.z
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r14.w, r0.z
-mov.f32f32 r15.y, r15.y
-add.f r15.z, r15.z, c9.x
-mov.f32f32 r15.w, r1.y
-add.f r14.w, c11.y, (neg)r14.w
-mul.f r15.y, r15.y, r15.y
-mov.f32f32 r16.x, r0.z
-mov.f32f32 r15.z, r15.z
-mad.f32 r16.x, r8.y, r13.w, r16.x
-mad.f32 r15.w, r6.y, r14.y, r15.w
-mad.f32 r14.z, r14.w, r16.z, r14.z
-mul.f r14.w, r15.z, r15.z
-add.f r15.z, r16.x, c6.x
-mov.f32f32 r16.x, r0.z
-add.f r15.w, r15.w, c9.y
-mad.f32 r16.x, r8.y, r14.x, r16.x
-mov.f32f32 r15.z, r15.z
+sel.b32 r12.y, r12.y, r8.y, r14.z
+sel.b32 r11.z, r11.z, r8.y, r14.y
+sel.b32 r2.z, r2.z, r9.y, r13.w
+sel.b32 r12.x, r12.x, r9.y, r14.x
+mov.f32f32 r13.w, r14.z
+mov.f32f32 r14.x, r14.y
+cmps.f.ge r14.y, r2.z, r12.z
+mov.f32f32 r14.z, r12.x
+sel.b32 r12.y, r12.y, r9.y, r13.w
+sel.b32 r11.z, r11.z, r9.y, r14.x
+cov.u32f32 r13.w, r14.y
+cmps.f.ge r14.x, r12.z, c4.x
+mov.f32f32 r14.y, r14.z
+mov.f32f32 r14.z, r12.y
+mov.f32f32 r15.w, r11.z
+cov.u32f32 r14.x, r14.x
+cmps.f.ge r16.x, r12.z, c5.x
mov.f32f32 r14.z, r14.z
mov.f32f32 r15.w, r15.w
-(ss)mov.f32f32 r16.y, r2.x
-mad.f32 r15.y, r15.z, r15.z, r15.y
-add.f r15.z, r16.x, c9.y
-mul.f r15.w, r15.w, r15.w
-mov.f32f32 r16.x, r0.z
-mov.f32f32 r15.y, r15.y
-mov.f32f32 r16.z, r2.x
-mov.f32f32 r15.z, r15.z
-mad.f32 r16.z, r0.y, r13.w, r16.z
-mad.f32 r16.x, r8.y, r14.y, r16.x
-add.f r16.y, c11.w, (neg)r16.y
-mad.f32 r14.w, r15.z, r15.z, r14.w
-add.f r15.z, r16.z, c6.y
-add.f r16.x, r16.x, c9.w
-mad.f32 r14.z, r16.y, r17.z, r14.z
-mov.f32f32 r14.w, r14.w
-mov.f32f32 r15.z, r15.z
-mov.f32f32 r16.y, r2.x
-mov.f32f32 r16.x, r16.x
-mad.f32 r16.y, r0.y, r14.x, r16.y
-mad.f32 r15.y, r15.z, r15.z, r15.y
-mov.f32f32 r14.z, r14.z
-mad.f32 r15.z, r16.x, r16.x, r15.w
-add.f r15.w, r16.y, c9.z
-mov.f32f32 r15.y, r15.y
-mul.f r14.z, r14.z, r15.x
-mov.f32f32 r15.x, r15.z
-mov.f32f32 r15.z, r15.w
-mov.f32f32 r15.w, r2.x
-mov.f32f32 r14.z, r14.z
-mad.f32 r15.w, r0.y, r14.y, r15.w
-sqrt r15.y, r15.y
-(ss)mov.f32f32 r15.y, r15.y
-mad.f32 r14.w, r15.z, r15.z, r14.w
-mov.f32f32 r15.z, r1.y
-add.f r15.w, r15.w, c9.z
-mov.f32f32 r16.x, r15.y
-mov.f32f32 r16.y, r15.y
-mov.f32f32 r14.w, r14.w
-mov.f32f32 r15.w, r15.w
-mad.f32 r15.z, r6.y, r14.z, r15.z
-mul.f r16.x, r16.x, r16.y
-cmps.f.ge r15.y, c3.w, r15.y
-mad.f32 r15.x, r15.w, r15.w, r15.x
-add.f r15.z, r15.z, c12.x
-mov.f32f32 r15.w, r16.x
-sqrt r14.w, r14.w
-(ss)mov.f32f32 r14.w, r14.w
+sel.b32 r11.w, r11.w, r8.y, r15.x
+cov.u32f32 r16.x, r16.x
mov.f32f32 r15.x, r15.x
-mov.f32f32 r15.z, r15.z
-add.f r15.w, c4.w, (neg)r15.w
-mov.f32f32 r16.x, r14.w
-mov.f32f32 r16.y, r14.w
-mul.f r15.z, r15.z, r15.z
-mov.f32f32 r15.w, r15.w
-sqrt r15.x, r15.x
-(ss)mov.f32f32 r15.x, r15.x
-mul.f r16.x, r16.x, r16.y
-mov.f32f32 r16.y, r0.z
-cmps.f.ge r14.w, c6.z, r14.w
-cov.u32f32 r15.y, r15.y
-mov.f32f32 r16.x, r16.x
-sqrt r15.w, r15.w
-(ss)mov.f32f32 r15.w, r15.w
-mov.f32f32 r13.w, r13.w
-mov.f32f32 r16.z, r15.x
-add.f r16.x, c6.w, (neg)r16.x
-mov.f32f32 r16.w, r15.x
-add.f r13.w, r13.w, (neg)r15.w
-mad.f32 r15.w, r8.y, r14.z, r16.y
-mov.f32f32 r16.x, r16.x
-mul.f r16.y, r16.z, r16.w
-mov.f32f32 r13.w, r13.w
-add.f r15.w, r15.w, c12.y
-cmps.f.ge r15.x, c3.w, r15.x
-cov.u32f32 r14.w, r14.w
-cmps.f.ge r16.z, r13.w, c4.x
-mov.f32f32 r16.w, r13.w
-sqrt r16.x, r16.x
-mov.f32f32 r16.y, r16.y
-mov.f32f32 r17.x, r6.y
-cov.u32f32 r16.z, r16.z
-cmps.f.ge r17.y, r13.w, c5.x
-(ss)mov.f32f32 r16.x, r16.x
-add.f r16.y, c4.w, (neg)r16.y
-mov.f32f32 r17.z, r13.w
-cov.u32f32 r17.y, r17.y
-mov.f32f32 r17.x, r17.x
-mov.f32f32 r14.x, r14.x
-mov.f32f32 r16.y, r16.y
-mul.f r16.z, r16.z, r17.y
-mov.f32f32 r17.y, r1.y
-add.f r14.x, r14.x, (neg)r16.x
-mov.f32f32 r16.x, r8.y
-mov.f32f32 r16.z, r16.z
-cmps.f.ge r18.y, c3.z, r13.w
-mov.f32f32 r17.y, r17.y
-mov.f32f32 r14.x, r14.x
-sqrt r16.y, r16.y
-(ss)mov.f32f32 r16.y, r16.y
-cov.u32f32 r18.y, r18.y
-mov.f32f32 r17.y, r17.y
-mov.f32f32 r18.z, r14.x
-mov.f32f32 r14.y, r14.y
-mul.f r16.z, r16.z, r18.y
-mad.f32 r16.w, r17.x, r16.w, r17.y
-mov.f32f32 r17.x, r6.y
-add.f r14.y, r14.y, (neg)r16.y
-mov.f32f32 r16.y, r16.z
-add.f r16.z, r16.w, c6.x
-mov.f32f32 r17.x, r17.x
-mov.f32f32 r14.y, r14.y
-cmps.f.ne r16.y, r16.y, c4.x
-mov.f32f32 r17.y, r13.w
-mul.f r16.z, r16.z, c5.w
-mov.f32f32 r16.x, r16.x
-mov.f32f32 r18.w, r18.w
-mov.f32f32 r17.y, r17.y
-mov.f32f32 r19.x, c3.z
-mov.f32f32 r19.y, r0.z
-mov.f32f32 r18.y, r18.y
-mov.f32f32 r13.w, r13.w
-sel.b32 r17.y, r17.y, r16.y, r19.x
-cmps.f.ne r15.y, r15.y, c4.x
-sel.b32 r16.z, r16.z, r16.y, r18.w
-mov.f32f32 r19.y, r19.y
-sel.b32 r16.w, r16.w, r16.y, r18.y
-sel.b32 r17.y, r17.y, r15.y, r19.x
-sel.b32 r16.z, r16.z, r15.y, r18.w
-mov.f32f32 r18.w, r19.y
-sel.b32 r16.w, r16.w, r15.y, r18.y
-mov.f32f32 r18.y, r17.y
-mov.f32f32 r16.z, r16.z
-mad.f32 r16.x, r16.x, r17.z, r18.w
-mov.f32f32 r16.w, r16.w
-cmps.f.ge r17.z, r18.y, r14.x
-mov.f32f32 r16.z, r16.z
-add.f r18.y, r16.x, c6.x
-mov.f32f32 r16.w, r16.w
-cov.u32f32 r17.z, r17.z
-mov.f32f32 r18.w, r0.y
-mov.f32f32 r19.y, c4.x
-mul.f r18.y, r18.y, c5.w
-mov.f32f32 r17.z, r17.z
-cmps.f.ge r19.z, r14.x, c4.x
-mov.f32f32 r19.x, r19.x
-mov.f32f32 r18.w, r18.w
-mov.f32f32 r19.w, r20.x
-cov.u32f32 r19.z, r19.z
-cmps.f.ge r20.x, r14.x, c5.x
-sel.b32 r16.x, r16.x, r16.y, r19.x
-mov.f32f32 r20.y, r2.x
-mov.f32f32 r20.z, c3.y
-cov.u32f32 r20.x, r20.x
-sel.b32 r18.y, r18.y, r16.y, r19.w
-sel.b32 r16.x, r16.x, r15.y, r19.x
-mov.f32f32 r19.x, r20.y
-mul.f r19.z, r19.z, r20.x
-sel.b32 r18.y, r18.y, r15.y, r19.w
-mov.f32f32 r16.x, r16.x
-mov.f32f32 r19.x, r19.x
-mov.f32f32 r19.z, r19.z
-mov.f32f32 r18.y, r18.y
-mov.f32f32 r16.x, r16.x
-mad.f32 r13.w, r18.w, r13.w, r19.x
-mul.f r17.z, r19.z, r17.z
-mov.f32f32 r18.y, r18.y
-sel.b32 r18.w, r19.y, r16.y, r20.z
-mov.f32f32 r19.x, r19.y
-mov.f32f32 r17.z, r17.z
-add.f r19.y, r13.w, c6.y
-sel.b32 r18.w, r18.w, r15.y, r20.z
-sel.b32 r13.w, r13.w, r16.y, r19.x
-cmps.f.ne r17.z, r17.z, c4.x
-mov.f32f32 r17.y, r17.y
-mov.f32f32 r19.z, r14.x
-mov.f32f32 r19.w, r1.y
-mov.f32f32 r20.x, r14.x
-mov.f32f32 r20.y, r8.y
-mov.f32f32 r19.z, r19.z
-mov.f32f32 r19.w, r19.w
-sel.b32 r13.w, r13.w, r15.y, r19.x
-mul.f r19.x, r19.y, c5.w
-sel.b32 r19.y, r19.z, r17.z, r17.y
-cmps.f.ne r14.w, r14.w, c4.x
-mov.f32f32 r19.z, r19.w
-mov.f32f32 r19.w, r20.y
-mad.f32 r17.x, r17.x, r18.z, r19.z
-sel.b32 r17.y, r19.y, r14.w, r17.y
-mov.f32f32 r18.z, r0.z
-mov.f32f32 r13.w, r13.w
-add.f r19.y, r17.x, c9.x
-mov.f32f32 r19.z, r17.y
-mov.f32f32 r18.z, r18.z
-sel.b32 r17.x, r17.x, r17.z, r16.w
-mov.f32f32 r13.w, r13.w
-cmps.f.ge r19.z, r19.z, r14.y
-mul.f r19.y, r19.y, c7.w
-mov.f32f32 r18.z, r18.z
-sel.b32 r16.w, r17.x, r14.w, r16.w
-cov.u32f32 r17.x, r19.z
-sel.b32 r19.y, r19.y, r17.z, r16.z
-mad.f32 r18.z, r19.w, r20.x, r18.z
-mov.f32f32 r16.w, r16.w
-mov.f32f32 r17.x, r17.x
-cmps.f.ge r19.z, r14.y, c4.x
-sel.b32 r16.z, r19.y, r14.w, r16.z
-add.f r19.y, r18.z, c9.y
-mov.f32f32 r16.w, r16.w
-cov.u32f32 r19.z, r19.z
-cmps.f.ge r20.x, r14.y, c5.x
-mov.f32f32 r16.z, r16.z
-mul.f r20.y, r19.y, c7.w
-sel.b32 r18.z, r18.z, r17.z, r16.x
-cov.u32f32 r20.x, r20.x
-mov.f32f32 r16.z, r16.z
-sel.b32 r20.y, r20.y, r17.z, r18.y
-sel.b32 r16.x, r18.z, r14.w, r16.x
-mul.f r18.z, r19.z, r20.x
-mov.f32f32 r14.x, r14.x
-mov.f32f32 r19.z, r19.w
-mov.f32f32 r18.w, r18.w
-mov.f32f32 r18.z, r18.z
-sel.b32 r18.y, r20.y, r14.w, r18.y
-mov.f32f32 r16.x, r16.x
-mov.f32f32 r19.w, r0.y
-mul.f r17.x, r18.z, r17.x
-mov.f32f32 r18.y, r18.y
-mov.f32f32 r16.x, r16.x
-mov.f32f32 r18.z, r19.w
-mov.f32f32 r17.x, r17.x
-mov.f32f32 r18.y, r18.y
-mov.f32f32 r19.w, r2.x
-sel.b32 r16.y, r19.x, r16.y, r19.z
-cmps.f.ne r17.x, r17.x, c4.x
-mov.f32f32 r17.y, r17.y
-mov.f32f32 r19.x, r14.y
-mov.f32f32 r20.x, r14.y
-mov.f32f32 r20.y, r14.y
-mov.f32f32 r20.z, r6.y
-mov.f32f32 r19.x, r19.x
-mov.f32f32 r20.w, r8.y
-mov.f32f32 r19.w, r19.w
-sel.b32 r15.y, r16.y, r15.y, r19.z
-sel.b32 r16.y, r19.x, r17.x, r17.y
-cov.u32f32 r15.x, r15.x
-mov.f32f32 r19.x, r20.z
-mov.f32f32 r19.z, r20.w
-mov.f32f32 r20.z, r1.y
-cmps.f.ne r15.x, r15.x, c4.x
-mov.f32f32 r20.w, r0.z
-mov.f32f32 r19.w, r19.w
-mov.f32f32 r15.y, r15.y
-sel.b32 r16.y, r16.y, r15.x, r17.y
-mov.f32f32 r17.y, r20.z
-mov.f32f32 r20.z, r20.w
-mad.f32 r14.x, r18.z, r14.x, r19.w
-mov.f32f32 r16.y, r16.y
-mov.f32f32 r15.w, r15.w
-mov.f32f32 r17.y, r17.y
-mov.f32f32 r18.z, r20.z
-mad.f32 r17.y, r19.x, r20.x, r17.y
-mad.f32 r15.z, r15.w, r15.w, r15.z
-mad.f32 r15.w, r19.z, r20.y, r18.z
-sel.b32 r18.z, r14.x, r17.z, r13.w
-mov.f32f32 r15.y, r15.y
-mov.f32f32 r15.z, r15.z
-mov.f32f32 r19.x, r2.x
-add.f r19.z, r17.y, c9.y
-mad.f32 r19.x, r0.y, r14.z, r19.x
-add.f r19.w, r15.w, c9.w
-sel.b32 r17.y, r17.y, r17.x, r16.w
-sel.b32 r15.w, r15.w, r17.x, r16.x
-add.f r19.x, r19.x, c12.z
-mul.f r19.z, r19.z, c5.w
-mul.f r19.w, r19.w, c5.w
-sel.b32 r16.w, r17.y, r15.x, r16.w
-mov.f32f32 r17.y, r19.x
-sel.b32 r19.x, r19.z, r17.x, r16.z
-sel.b32 r19.z, r19.w, r17.x, r18.y
-mov.f32f32 r16.w, r16.w
-mad.f32 r15.z, r17.y, r17.y, r15.z
-sel.b32 r16.z, r19.x, r15.x, r16.z
-sel.b32 r17.y, r19.z, r15.x, r18.y
-mov.f32f32 r16.w, r16.w
-mov.f32f32 r15.z, r15.z
-mov.f32f32 r16.z, r16.z
-mov.f32f32 r17.y, r17.y
-sel.b32 r15.w, r15.w, r15.x, r16.x
-sel.b32 r13.w, r18.z, r14.w, r13.w
-add.f r14.x, r14.x, c9.z
-mov.f32f32 r16.x, r18.w
-sqrt r15.z, r15.z
-(ss)mov.f32f32 r15.z, r15.z
-mov.f32f32 r16.z, r16.z
-mov.f32f32 r17.y, r17.y
-mov.f32f32 r15.w, r15.w
-mov.f32f32 r18.y, r15.z
-mov.f32f32 r18.z, r15.z
-mov.f32f32 r13.w, r13.w
-mul.f r18.w, r14.x, c7.w
-mov.f32f32 r19.x, c6.y
-mul.f r18.y, r18.y, r18.z
-mov.f32f32 r15.w, r15.w
-mov.f32f32 r13.w, r13.w
-sel.b32 r18.z, r18.w, r17.z, r15.y
-mov.f32f32 r18.y, r18.y
-mov.f32f32 r14.y, r14.y
-sel.b32 r18.w, r19.x, r17.z, r16.x
-mov.f32f32 r19.x, r0.y
-add.f r18.y, c6.y, (neg)r18.y
-sel.b32 r15.y, r18.z, r14.w, r15.y
-sel.b32 r16.x, r18.w, r14.w, r16.x
-mov.f32f32 r18.z, r19.x
-mov.f32f32 r18.y, r18.y
-mov.f32f32 r18.w, r2.x
-mov.f32f32 r15.y, r15.y
-mov.f32f32 r16.x, r16.x
-sel.b32 r14.x, r14.x, r17.z, r0.x
-sel.b32 r17.z, r19.y, r17.z, r8.x
-nop
-sqrt r18.y, r18.y
-(ss)mov.f32f32 r18.y, r18.y
-mov.f32f32 r14.z, r14.z
-mov.f32f32 r18.w, r18.w
-mov.f32f32 r15.y, r15.y
-mov.f32f32 r16.x, r16.x
-add.f r14.z, r14.z, (neg)r18.y
-mov.f32f32 r18.y, r18.w
-mov.f32f32 r18.w, c5.w
-mad.f32 r14.y, r18.z, r14.y, r18.y
-mov.f32f32 r14.z, r14.z
-sel.b32 r0.x, r14.x, r14.w, r0.x
-sel.b32 r8.x, r17.z, r14.w, r8.x
-add.f r14.x, r14.y, c9.z
-cmps.f.ge r14.w, r16.y, r14.z
-sel.b32 r14.y, r14.y, r17.x, r13.w
-sel.b32 r16.y, r18.w, r17.x, r16.x
-mul.f r14.x, r14.x, c5.w
-cov.u32f32 r14.w, r14.w
-sel.b32 r13.w, r14.y, r15.x, r13.w
-sel.b32 r14.y, r16.y, r15.x, r16.x
-sel.b32 r14.x, r14.x, r17.x, r15.y
-mov.f32f32 r14.w, r14.w
-cmps.f.ge r16.x, r14.z, c4.x
-mov.f32f32 r13.w, r13.w
-sel.b32 r14.x, r14.x, r15.x, r15.y
-mov.f32f32 r14.y, r14.y
-cov.u32f32 r15.x, r16.x
-cmps.f.ge r15.y, r14.z, c5.x
-mov.f32f32 r13.w, r13.w
+sel.b32 r9.z, r9.z, r8.y, r15.y
+mul.f r14.w, r14.w, c5.w
+mul.f r14.x, r14.x, r16.x
+sel.b32 r11.w, r11.w, r9.y, r15.x
+mov.f32f32 r15.x, r15.y
+sel.b32 r14.w, r14.w, r8.y, r15.w
+mul.f r13.w, r14.x, r13.w
+mov.f32f32 r14.x, r11.w
+sel.b32 r9.z, r9.z, r9.y, r15.x
+mov.f32f32 r15.x, r15.w
+cmps.f.ne r13.w, r13.w, c4.x
+mul.f r10.z, r10.z, c7.w
+mad.f32 r12.z, r3.x, r12.z, r1.y
+sel.b32 r14.w, r14.w, r9.y, r15.x
+sel.b32 r5.w, r5.w, r13.w, r2.z
+mov.f32f32 r2.z, r2.z
+cov.u32f32 r9.x, r9.x
+sel.b32 r10.z, r10.z, r13.w, r12.x
+sel.b32 r12.x, r13.z, r13.w, r12.y
+sel.b32 r11.z, r13.x, r13.w, r11.z
+cmps.f.ne r9.x, r9.x, c4.x
+sel.b32 r11.w, r12.w, r13.w, r11.w
+sel.b32 r12.y, r12.z, r13.w, r9.z
+add.f r12.z, r12.z, c9.z
+sel.b32 r2.z, r5.w, r9.x, r2.z
+sel.b32 r5.w, r10.z, r9.x, r14.y
+sel.b32 r10.z, r12.x, r9.x, r14.z
+sel.b32 r11.z, r11.z, r9.x, r15.w
+cmps.f.ge r12.x, r2.z, r13.y
+mov.f32f32 r12.w, r5.w
+mov.f32f32 r13.x, r10.z
+mov.f32f32 r13.z, r11.z
+cov.u32f32 r12.x, r12.x
+cmps.f.ge r14.y, r13.y, c4.x
+mov.f32f32 r12.w, r12.w
+mov.f32f32 r13.x, r13.x
+mov.f32f32 r13.z, r13.z
+cov.u32f32 r14.y, r14.y
+cmps.f.ge r14.z, r13.y, c5.x
mov.f32f32 r14.x, r14.x
-mov.f32f32 r14.y, r14.y
-cov.u32f32 r15.y, r15.y
-mov.f32f32 r16.x, r14.z
-mov.f32f32 r16.y, r14.z
-mov.f32f32 r14.z, r14.z
-mul.f r15.x, r15.x, r15.y
+mov.f32f32 r9.z, r9.z
+mul.f r15.x, r12.z, c7.w
+cov.u32f32 r14.z, r14.z
+sel.b32 r11.w, r11.w, r9.x, r14.x
+mov.f32f32 r9.z, r9.z
+sel.b32 r14.x, r15.x, r13.w, r14.w
+mul.f r14.y, r14.y, r14.z
+mov.f32f32 r14.z, r11.w
+sel.b32 r9.z, r12.y, r9.x, r9.z
+mov.f32f32 r12.y, r14.w
+mul.f r12.x, r14.y, r12.x
+mov.f32f32 r14.y, r14.z
+mov.f32f32 r14.z, r9.z
+mov.f32f32 r12.y, r12.y
+cmps.f.ne r12.x, r12.x, c4.x
+mad.f32 r14.w, r7.w, r13.y, r0.z
+mad.f32 r15.x, r10.w, r13.y, r2.x
+mad.f32 r13.y, r3.x, r13.y, r1.y
+sel.b32 r7.x, r7.x, r12.x, r2.z
+mov.f32f32 r2.z, r2.z
+cmps.f.ge r9.w, c3.w, r9.w
+add.f r15.y, r14.w, c9.y
+add.f r15.w, r15.x, c9.w
+sel.b32 r11.z, r14.w, r12.x, r11.z
+cov.u32f32 r9.w, r9.w
+mul.f r14.w, r15.y, c5.w
+mul.f r15.y, r15.w, c5.w
+sel.b32 r11.w, r15.x, r12.x, r11.w
+cmps.f.ne r9.w, r9.w, c4.x
+sel.b32 r5.w, r14.w, r12.x, r5.w
+sel.b32 r10.z, r15.y, r12.x, r10.z
+sel.b32 r9.z, r13.y, r12.x, r9.z
+sel.b32 r2.z, r7.x, r9.w, r2.z
+mad.f32 r5.z, r5.z, r10.y, r8.x
+sel.b32 r5.w, r5.w, r9.w, r12.w
+sel.b32 r7.x, r10.z, r9.w, r13.x
+sel.b32 r8.x, r11.z, r9.w, r13.z
+sel.b32 r10.y, r11.w, r9.w, r14.y
+mov.f32f32 r10.z, r14.z
+sel.b32 r11.z, r14.x, r9.x, r12.y
+sqrt r5.z, r5.z
+(ss)mov.f32f32 r11.w, r5.z
+mov.f32f32 r12.y, r5.w
+mov.f32f32 r12.w, r7.x
+mov.f32f32 r13.x, r8.x
+(ss)mul.f r5.z, r5.z, r11.w
+mov.f32f32 r12.y, r12.y
+mov.f32f32 r12.w, r12.w
+mov.f32f32 r13.x, r13.x
+add.f r5.z, c6.y, (neg)r5.z
+mov.f32f32 r13.z, r10.y
+sel.b32 r9.z, r9.z, r9.w, r10.z
+add.f r10.z, r13.y, c9.z
+mov.f32f32 r13.y, c3.y
+mov.f32f32 r14.x, r11.z
+mov.f32f32 r2.y, r2.y
+sqrt r5.z, r5.z
+(ss)add.f r5.x, r5.x, (neg)r5.z
+(ss)mov.f32f32 r5.z, r13.z
+mov.f32f32 r13.z, r9.z
+mul.f r10.z, r10.z, c5.w
+mov.f32f32 r14.y, r5.x
+sel.b32 r8.y, r15.z, r8.y, r13.y
mov.f32f32 r14.x, r14.x
-mov.f32f32 r6.y, r6.y
+sel.b32 r12.z, r12.z, r13.w, r2.y
+cmps.f.ge r2.z, r2.z, r14.y
+mov.f32f32 r13.z, r13.z
+sel.b32 r10.z, r10.z, r12.x, r11.z
+sel.b32 r8.y, r8.y, r9.y, r13.y
+cov.u32f32 r2.z, r2.z
+cmps.f.ge r9.y, r14.y, c5.x
+sel.b32 r10.z, r10.z, r9.w, r14.x
+mov.f32f32 r11.z, c6.y
+sel.b32 r2.y, r12.z, r9.x, r2.y
+cov.u32f32 r9.y, r9.y
+cmps.f.ge r5.x, r5.x, c4.x
+mov.f32f32 r12.z, r10.z
+sel.b32 r11.z, r11.z, r13.w, r8.y
mov.f32f32 r8.y, r8.y
-mov.f32f32 r15.x, r15.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r6.y
+cov.u32f32 r5.x, r5.x
+mov.f32f32 r12.z, r12.z
+mov.f32f32 r6.w, r6.w
mov.f32f32 r8.y, r8.y
-mul.f r14.w, r15.x, r14.w
-mov.f32f32 r15.x, r1.y
-mov.f32f32 r15.y, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r14.w, r14.w
-mov.f32f32 r15.x, r15.x
-mov.f32f32 r15.y, r15.y
-mov.f32f32 r17.x, r2.x
-cmps.f.ne r14.w, r14.w, c4.x
-mov.f32f32 r15.x, r15.x
-mov.f32f32 r17.z, c5.z
-mad.f32 r6.y, r6.y, r16.x, r15.x
-mov.f32f32 r15.x, r15.y
-mov.f32f32 r15.y, r17.x
-sel.b32 r16.x, r17.z, r14.w, r14.y
-sel.b32 r17.x, r6.y, r14.w, r16.w
-cmps.f.ge r15.z, c6.y, r15.z
-mad.f32 r8.y, r8.y, r16.y, r15.x
-mov.f32f32 r15.x, r15.y
-add.f r6.y, r6.y, c12.x
-cov.u32f32 r15.y, r15.z
-sel.b32 r15.z, r8.y, r14.w, r15.w
-mad.f32 r0.y, r0.y, r14.z, r15.x
-sel.b32 r6.y, r6.y, r14.w, r16.z
-cmps.f.ne r14.z, r15.y, c4.x
-add.f r8.y, r8.y, c12.y
-sel.b32 r15.x, r0.y, r14.w, r13.w
-add.f r0.y, r0.y, c12.z
-sel.b32 r15.y, r17.x, r14.z, r16.w
-sel.b32 r14.y, r16.x, r14.z, r14.y
-sel.b32 r15.z, r15.z, r14.z, r15.w
-sel.b32 r13.w, r15.x, r14.z, r13.w
-mov.f32f32 r15.x, r15.y
-mov.f32f32 r15.w, r14.y
-mov.f32f32 r15.y, r15.y
-mov.f32f32 r1.y, r1.y
-add.f r15.x, c15.x, (neg)r15.x
-mov.f32f32 r16.x, r15.w
-mov.f32f32 r15.w, r15.w
-mov.f32f32 r1.y, r1.y
-mul.f r16.y, r15.x, r15.x
-mov.f32f32 r16.w, r15.z
-mov.f32f32 r17.x, r16.x
-add.f r1.y, r1.y, (neg)r15.y
-mov.f32f32 r15.y, r16.x
-add.f r16.x, c15.y, (neg)r16.w
-mov.f32f32 r16.w, r17.x
-mov.f32f32 r1.y, r1.y
-cmps.f.eq r15.y, r15.y, c5.w
-mad.f32 r16.y, r16.x, r16.x, r16.y
-cmps.f.eq r16.w, r16.w, c5.z
-mul.f r17.x, r1.y, r1.y
-mov.f32f32 r15.z, r15.z
-mov.f32f32 r16.y, r16.y
-mov.f32f32 r17.z, r13.w
-cov.u32f32 r16.w, r16.w
-mov.f32f32 r0.z, r0.z
-cov.u32f32 r15.y, r15.y
-add.f r17.z, c15.z, (neg)r17.z
-mov.f32f32 r16.w, r16.w
-mov.f32f32 r0.z, r0.z
-cmps.f.ne r15.y, r15.y, c4.x
-mad.f32 r16.y, r17.z, r17.z, r16.y
-cmps.f.ne r16.w, r16.w, c4.x
-mov.f32f32 r18.y, r14.y
-add.f r0.z, r0.z, (neg)r15.z
-cmps.f.eq r15.z, r15.w, c6.y
-mov.f32f32 r13.w, r13.w
-cmps.f.eq r15.w, r18.y, c4.x
-rsq r16.y, r16.y
-(ss)mov.f32f32 r16.y, r16.y
-sel.b32 r2.w, r3.x, r16.w, r2.w
-sel.b32 r3.x, r5.z, r16.w, r0.x
-sel.b32 r5.z, r11.x, r16.w, r8.x
-mul.f r11.x, r15.x, r16.y
-sel.b32 r2.w, r3.y, r15.y, r2.w
-sel.b32 r3.x, r5.w, r15.y, r3.x
-sel.b32 r3.y, r7.x, r15.y, r5.z
-mov.f32f32 r5.z, r11.x
-cov.u32f32 r5.w, r15.z
-sel.b32 r6.y, r6.y, r14.z, r16.z
-cov.u32f32 r7.x, r15.w
-absneg.f r11.x, (neg)r5.z
-cmps.f.ne r5.w, r5.w, c4.x
-mov.f32f32 r15.x, r6.y
-mov.f32f32 r15.z, r6.y
-mov.f32f32 r15.w, c12.z
-sel.b32 r2.w, r3.z, r5.w, r2.w
-mul.f r3.z, r15.x, r11.x
-mul.f r15.x, r16.x, r16.y
-cmps.f.ne r16.x, r7.x, c4.x
-sel.b32 r2.y, r2.y, r5.w, r3.x
-sel.b32 r3.x, r11.y, r5.w, r3.y
-mov.f32f32 r3.y, r15.x
-sel.b32 r2.w, r3.w, r16.x, r2.w
-sel.b32 r2.y, r6.x, r16.x, r2.y
-sel.b32 r3.x, r11.z, r16.x, r3.x
-absneg.f r3.w, (neg)r3.y
-sel.b32 r6.x, r8.y, r14.w, r17.y
-mov.f32f32 r8.y, r2.w
-cmps.f.eq r11.y, r14.y, c3.y
-mul.f r5.z, r15.z, r5.z
-sel.b32 r6.x, r6.x, r14.z, r17.y
-sel.b32 r4.x, r4.x, r6.z, r8.y
-cov.u32f32 r8.y, r11.y
-mov.f32f32 r10.x, r10.x
-mov.f32f32 r11.y, r6.x
-mov.f32f32 r11.z, r6.x
-sel.b32 r7.x, r15.w, r16.w, r7.x
-mov.f32f32 r14.y, c12.w
-mad.f32 r3.z, r11.y, r3.w, r3.z
-cmps.f.eq r10.x, r10.x, c5.w
-cmps.f.ne r8.y, r8.y, c4.x
-mad.f32 r3.y, r11.z, r3.y, r5.z
-mov.f32f32 r3.z, r3.z
-mul.f r5.z, r17.z, r16.y
-cov.u32f32 r10.x, r10.x
-sel.b32 r0.x, r0.x, r8.y, r2.y
-sel.b32 r8.x, r8.x, r8.y, r3.x
-mov.f32f32 r5.z, r5.z
-cmps.f.ne r10.x, r10.x, c4.x
-sel.b32 r0.x, r8.w, r6.z, r0.x
-sel.b32 r8.x, r11.w, r6.z, r8.x
-absneg.f r8.w, (neg)r5.z
-sel.b32 r0.y, r0.y, r14.w, r14.x
-sel.b32 r4.x, r4.y, r10.x, r4.x
-sel.b32 r0.x, r9.x, r10.x, r0.x
-sel.b32 r4.y, r12.x, r10.x, r8.x
-sel.b32 r0.y, r0.y, r14.z, r14.x
-mov.f32f32 r8.x, r9.w
-mov.f32f32 r3.y, r3.y
-sel.b32 r7.x, r14.y, r15.y, r7.x
-mov.f32f32 r9.x, r0.y
-cmps.f.eq r8.x, r8.x, c6.y
-mov.f32f32 r9.w, r0.y
-mov.f32f32 r11.y, c12.w
-mad.f32 r3.z, r9.x, r8.w, r3.z
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r0.y, r0.y
+mul.f r5.x, r5.x, r9.y
+mad.f32 r7.w, r7.w, r14.y, r0.z
+mad.f32 r9.y, r10.w, r14.y, r2.x
+mad.f32 r3.x, r3.x, r14.y, r1.y
+mul.f r2.z, r5.x, r2.z
+sel.b32 r5.x, r11.z, r9.x, r8.y
+mov.f32f32 r8.y, c5.w
+sel.b32 r8.w, r8.w, r13.w, r6.w
+cmps.f.ne r2.z, r2.z, c4.x
+add.f r10.w, r7.w, c12.x
+add.f r11.z, r9.y, c12.y
+add.f r13.y, r3.x, c12.z
+sel.b32 r7.w, r7.w, r2.z, r8.x
+cmps.f.ge r8.x, c6.y, r11.w
+sel.b32 r8.y, r8.y, r12.x, r5.x
+mov.f32f32 r5.x, r5.x
+sel.b32 r9.y, r9.y, r2.z, r10.y
cov.u32f32 r8.x, r8.x
-mul.f r6.y, r3.z, r6.y
-mul.f r6.x, r3.z, r6.x
-mul.f r0.y, r3.z, r0.y
-cmps.f.ne r3.z, r8.x, c4.x
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r0.y, r0.y
-sel.b32 r4.x, r4.z, r3.z, r4.x
-mul.f r4.z, c5.w, r6.y
-mul.f r6.x, c5.w, r6.x
-mul.f r0.y, c5.w, r0.y
-cmps.f.eq r6.y, r13.z, c4.x
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r0.y, r0.y
-cov.u32f32 r6.y, r6.y
-add.f r4.z, r11.x, (neg)r4.z
-add.f r3.w, r3.w, (neg)r6.x
-add.f r0.y, r8.w, (neg)r0.y
-nop
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.y, r0.y
-cmps.f.ne r6.x, r6.y, c4.x
-mad.f32 r8.x, r0.z, r0.z, r17.x
-sel.b32 r0.x, r10.y, r3.z, r0.x
-sel.b32 r4.y, r12.y, r3.z, r4.y
-nop
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r8.w, r2.x
-sel.b32 r4.x, r4.w, r6.x, r4.x
-sel.b32 r0.x, r10.z, r6.x, r0.x
-sel.b32 r4.y, r12.w, r6.x, r4.y
-mov.f32f32 r4.w, r8.w
-mad.f32 r3.y, r9.w, r5.z, r3.y
-sel.b32 r5.z, r11.y, r5.w, r7.x
-nop
-add.f r4.w, r4.w, (neg)r13.w
-mov.f32f32 r3.y, r3.y
+sel.b32 r3.x, r3.x, r2.z, r9.z
+mov.f32f32 r5.x, r5.x
+sel.b32 r5.w, r10.w, r2.z, r5.w
+cmps.f.ne r8.x, r8.x, c4.x
+sel.b32 r7.x, r11.z, r2.z, r7.x
+sel.b32 r9.z, r13.y, r2.z, r10.z
+sel.b32 r5.x, r8.y, r9.w, r5.x
+sel.b32 r7.w, r7.w, r8.x, r13.x
+mov.f32f32 r8.y, c5.z
+sel.b32 r5.z, r9.y, r8.x, r5.z
+sel.b32 r3.x, r3.x, r8.x, r13.z
+add.f r9.y, c15.x, (neg)r7.w
+sel.b32 r2.z, r8.y, r2.z, r5.x
+mov.f32f32 r5.x, r5.x
+add.f r0.z, r0.z, (neg)r7.w
+mul.f r7.w, r9.y, r9.y
+add.f r8.y, c15.y, (neg)r5.z
+mov.f32f32 r5.x, r5.x
+mov.f32f32 r9.w, r0.z
+add.f r2.x, r2.x, (neg)r5.z
+mad.f32 r5.z, r8.y, r8.y, r7.w
+add.f r7.w, c15.z, (neg)r3.x
+sel.b32 r2.z, r2.z, r8.x, r5.x
+mul.f r0.z, r0.z, r9.w
+mov.f32f32 r5.x, r2.x
+mad.f32 r5.z, r7.w, r7.w, r5.z
+mov.f32f32 r10.y, r2.z
+cmps.f.eq r10.z, r2.z, c6.y
+cmps.f.eq r10.w, r2.z, c4.x
+cmps.f.eq r2.z, r2.z, c3.y
+add.f r1.y, r1.y, (neg)r3.x
+mov.f32f32 r3.x, r10.y
+rsq r5.z, r5.z
+(ss)mov.f32f32 r11.z, r5.z
+mad.f32 r0.z, r2.x, r5.x, r0.z
+mov.f32f32 r2.x, r1.y
+(ss)mul.f r5.z, r7.w, r5.z
+mul.f r7.w, r9.y, r11.z
+cmps.f.eq r3.x, r3.x, c5.z
+sel.b32 r5.w, r5.w, r8.x, r12.y
+mul.f r8.y, r8.y, r11.z
+mov.f32f32 r9.y, r7.w
+cov.u32f32 r3.x, r3.x
+mul.f r7.w, r5.w, r7.w
+mov.f32f32 r11.z, r8.y
+absneg.f r9.y, (neg)r9.y
+cmps.f.ne r3.x, r3.x, c4.x
+sel.b32 r6.w, r8.w, r9.x, r6.w
+mov.f32f32 r8.w, c12.w
+mul.f r9.x, r5.w, r9.y
+absneg.f r11.z, (neg)r11.z
+sel.b32 r7.x, r7.x, r8.x, r12.w
+sel.b32 r8.w, r8.w, r3.x, r2.y
+mov.f32f32 r11.w, c12.z
+sel.b32 r4.y, r4.z, r3.x, r4.y
+mad.f32 r4.z, r7.x, r11.z, r9.x
+mov.f32f32 r9.x, r5.z
+cmps.f.eq r10.y, r10.y, c5.w
+sel.b32 r11.w, r11.w, r3.x, r6.w
+mad.f32 r7.w, r7.x, r8.y, r7.w
+absneg.f r8.y, (neg)r9.x
+sel.b32 r8.x, r9.z, r8.x, r12.z
+cov.u32f32 r9.x, r10.y
+cov.u32f32 r9.z, r10.w
+mov.f32f32 r10.y, c12.z
+mad.f32 r4.z, r8.x, r8.y, r4.z
+cmps.f.ne r9.x, r9.x, c4.x
+mov.f32f32 r10.w, c12.w
+mov.f32f32 r12.x, c12.z
+mul.f r5.w, r4.z, r5.w
+sel.b32 r4.y, r4.w, r9.x, r4.y
+sel.b32 r4.w, r10.w, r9.x, r11.w
+mul.f r7.x, r4.z, r7.x
+mul.f r5.w, c5.w, r5.w
+sel.b32 r8.w, r12.x, r9.x, r8.w
+cov.u32f32 r10.z, r10.z
+mul.f r7.x, c5.w, r7.x
+add.f r5.w, r9.y, (neg)r5.w
+mad.f32 r0.z, r2.x, r2.x, r0.z
+cmps.f.ne r2.x, r10.z, c4.x
+mov.f32f32 r9.y, c12.z
+mov.f32f32 r10.z, c12.w
+add.f r7.x, r11.z, (neg)r7.x
+mul.f r4.z, r4.z, r8.x
+sel.b32 r1.w, r1.w, r2.x, r4.y
+rsq r0.z, r0.z
+(ss)mov.f32f32 r4.y, r0.z
+sel.b32 r8.w, r10.z, r2.x, r8.w
+sel.b32 r4.w, r9.y, r2.x, r4.w
+cmps.f.ne r9.y, r9.z, c4.x
+mul.f r9.w, r9.w, r4.y
+mov.f32f32 r10.z, c12.w
+mov.f32f32 r10.w, c12.w
+sel.b32 r1.w, r5.y, r9.y, r1.w
+mul.f r5.y, r5.w, r9.w
+mul.f r4.y, r5.x, r4.y
+sel.b32 r5.x, r10.z, r9.y, r8.w
+sel.b32 r4.w, r10.w, r9.y, r4.w
+sel.b32 r5.w, r6.y, r2.w, r1.w
+mad.f32 r4.y, r7.x, r4.y, r5.y
+mul.f r4.z, c5.w, r4.z
+cov.u32f32 r2.z, r2.z
+cmps.f.eq r3.y, r3.y, c5.w
+mad.f32 r5.y, r8.x, r5.z, r7.w
+add.f r4.z, r8.y, (neg)r4.z
+(ss)mul.f r0.z, r1.y, r0.z
+cmps.f.ne r1.y, r2.z, c4.x
+cov.u32f32 r2.z, r3.y
+mov.f32f32 r3.y, r5.y
+mad.f32 r0.z, r4.z, r0.z, r4.y
+sel.b32 r2.y, r2.y, r1.y, r5.x
+sel.b32 r4.y, r6.w, r1.y, r4.w
+cmps.f.ne r2.z, r2.z, c4.x
+mov.f32f32 r4.z, c12.w
+mov.f32f32 r5.z, c12.z
+sel.b32 r3.x, r10.y, r3.x, r9.z
+log2 r0.z, r0.z
+(ss)mul.f r0.z, c15.w, r0.z
+sel.b32 r2.y, r4.z, r2.w, r2.y
+sel.b32 r4.y, r5.z, r2.w, r4.y
+sel.b32 r4.z, r6.z, r2.z, r5.w
+mov.f32f32 r5.z, c12.z
+mov.f32f32 r5.w, c12.w
+cmps.f.eq r6.y, r1.x, c6.y
+exp2 r0.z, r0.z
+(ss)mad.f32 r1.w, r3.y, r1.w, r0.z
+mad.f32 r5.x, r3.y, r5.x, r0.z
+mad.f32 r3.y, r3.y, r4.w, r0.z
+mov.f32f32 r4.w, c12.w
+sel.b32 r1.w, r3.z, r1.y, r1.w
+cov.u32f32 r3.z, r6.y
+mov.f32f32 r6.y, c13.z
+mov.f32f32 r6.z, c13.y
+sel.b32 r3.x, r4.w, r9.x, r3.x
+cmps.f.ne r3.z, r3.z, c4.x
+mov.f32f32 r4.w, c12.w
+sel.b32 r5.x, r6.y, r1.y, r5.x
+sel.b32 r3.y, r6.z, r1.y, r3.y
+mov.f32f32 r6.y, c12.w
+sel.b32 r4.z, r4.w, r3.z, r4.z
+cmps.f.ne r4.w, r7.z, c4.x
+mov.f32f32 r6.z, c12.w
+sel.b32 r2.y, r5.z, r2.z, r2.y
+sel.b32 r4.y, r5.w, r2.z, r4.y
+sel.b32 r2.x, r6.y, r2.x, r3.x
+sel.b32 r3.x, r6.z, r4.w, r4.z
+mov.f32f32 r4.z, c12.w
+mov.f32f32 r5.z, c12.z
mov.f32f32 r5.w, c12.z
-mul.f r7.x, r13.y, r7.z
-mov.f32f32 r4.w, r4.w
-mul.f r7.w, r8.z, r7.w
-mov.f32f32 r8.z, c12.z
-mov.f32f32 r7.x, r7.x
-mad.f32 r8.x, r4.w, r4.w, r8.x
-sel.b32 r5.z, r5.w, r16.x, r5.z
-mov.f32f32 r5.w, r1.x
-absneg.f r8.w, (neg)r7.x
-sel.b32 r6.y, r8.z, r6.z, r6.y
-mov.f32f32 r6.z, r1.x
-mad.f32 r5.w, r5.w, r7.x, r7.w
-rsq r7.x, r8.x
-(ss)mov.f32f32 r7.x, r7.x
-mov.f32f32 r7.w, c12.w
-mad.f32 r6.z, r6.z, r8.w, r18.x
-mov.f32f32 r5.w, r5.w
-mul.f r1.y, r1.y, r7.x
-mul.f r0.z, r0.z, r7.x
-mul.f r4.w, r4.w, r7.x
-nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.z, r6.z
-mul.f r1.y, r4.z, r1.y
-mul.f r4.z, r9.y, r7.z
-mad.f32 r0.z, r3.w, r0.z, r1.y
-sel.b32 r1.y, r7.w, r10.x, r6.y
-mov.f32f32 r3.w, c12.w
-mad.f32 r1.z, r6.w, r6.w, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r4.z, r4.z
-mad.f32 r0.y, r0.y, r4.w, r0.z
-mov.f32f32 r0.z, r1.w
-sel.b32 r1.y, r3.w, r3.z, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-absneg.f r3.z, (neg)r4.z
-mov.f32f32 r3.w, r1.w
-mad.f32 r0.z, r0.z, r4.z, r5.w
-mov.f32f32 r4.z, c12.z
-mov.f32f32 r2.x, r2.x
-cmps.f.eq r4.w, r7.y, c3.y
-log2 r0.y, r0.y
-(ss)mul.f r0.y, c15.w, r0.y
-mad.f32 r3.w, r3.w, r3.z, r6.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r3.w, r0.w
-mul.f r1.x, r3.w, r1.x
-mov.f32f32 r0.z, r0.z
-sel.b32 r1.y, r4.z, r6.x, r1.y
-add.f r2.x, c5.z, (neg)r2.x
-exp2 r0.y, r0.y
-(ss)mad.f32 r2.w, r3.y, r2.w, r0.y
-mad.f32 r2.y, r3.y, r2.y, r0.y
-mad.f32 r3.x, r3.y, r3.x, r0.y
-(ss)mad.f32 r0.y, r3.y, r5.z, r0.y
-sel.b32 r2.z, r2.z, r8.y, r2.w
-sel.b32 r2.y, r5.y, r8.y, r2.y
-sel.b32 r2.w, r9.z, r8.y, r3.x
-mov.f32f32 r3.x, c13.x
-add.f r2.z, r4.x, r2.z
-mov.f32f32 r0.w, r0.w
-add.f r0.x, r0.x, r2.y
-add.f r2.y, r4.y, r2.w
-sel.b32 r0.y, r3.x, r8.y, r0.y
+add.f r1.w, r3.x, r1.w
+mad.f32 r3.x, r0.x, r11.y, r8.z
+sel.b32 r2.y, r4.z, r3.z, r2.y
+sel.b32 r4.y, r5.z, r3.z, r4.y
+sel.b32 r2.x, r5.w, r9.y, r2.x
+mul.f r0.y, r3.x, r0.y
+mov.f32f32 r4.z, c12.w
+mov.f32f32 r5.z, c12.w
+(ss)mad.f32 r0.z, r5.y, r2.x, r0.z
+mul.f r0.y, c5.w, r0.y
+sel.b32 r2.x, r4.z, r4.w, r2.y
+sel.b32 r2.y, r5.z, r4.w, r4.y
+mov.f32f32 r4.y, c13.x
+add.f r0.y, r11.x, (neg)r0.y
+mov.f32f32 r4.z, r6.x
+add.f r2.x, r2.x, r5.x
+add.f r2.y, r2.y, r3.y
+sel.b32 r0.z, r4.y, r1.y, r0.z
+mad.f32 r1.y, r4.z, r4.z, r4.x
+mov.f32f32 r3.y, c12.z
+mul.f r0.w, r3.x, r0.w
+mul.f r0.x, r3.x, r0.x
+mov.f32f32 r3.x, r1.z
+sel.b32 r2.w, r3.y, r2.w, r7.z
+mov.f32f32 r3.y, c12.w
+rsq r1.y, r1.y
+(ss)mov.f32f32 r4.x, r1.y
mul.f r0.w, c5.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.w, r3.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.w, r0.w
-add.f r0.y, r1.y, r0.y
-mul.f r1.x, c5.w, r1.x
-mov.f32f32 r1.y, r1.w
-add.f r0.w, r17.w, (neg)r0.w
-mad.f32 r1.z, r2.x, r2.x, r1.z
-cov.u32f32 r1.w, r4.w
+mul.f r0.x, c5.w, r0.x
nop
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.y, c5.w, r1.y
-cmps.f.ne r1.w, r1.w, c4.x
-rsq r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-add.f r1.x, r8.w, (neg)r1.x
-mov.f32f32 r1.y, r1.y
+mul.f r3.w, r3.w, r4.x
+sel.b32 r2.z, r3.y, r2.z, r2.w
+mov.f32f32 r2.w, c12.w
+add.f r0.w, r10.x, (neg)r0.w
+mul.f r0.y, r0.y, r3.w
+mul.f r3.y, r7.y, r4.x
+sel.b32 r2.z, r2.w, r3.z, r2.z
+mov.f32f32 r2.w, c12.z
+add.f r0.x, r11.y, (neg)r0.x
+mad.f32 r0.y, r0.w, r3.y, r0.y
+mul.f r0.w, r6.x, r1.y
+(ss)sel.b32 r1.y, r2.w, r4.w, r2.z
+cmps.f.eq r1.x, r1.x, c3.y
+mov.f32f32 r2.z, c13.w
+mad.f32 r0.x, r0.x, r0.w, r0.y
+add.f r0.y, r1.y, r0.z
+cov.u32f32 r0.z, r1.x
+(rpt3)nop
+log2 r0.x, r0.x
+(ss)mul.f r0.x, c15.w, r0.x
+cmps.f.ne r0.z, r0.z, c4.x
+mov.f32f32 r0.w, c13.z
+mov.f32f32 r1.x, c13.y
mov.f32f32 r2.w, c13.x
-mul.f r3.x, r12.z, r1.z
-mov.f32f32 r1.x, r1.x
-add.f r1.y, r3.z, (neg)r1.y
-nop
-mov.f32f32 r3.x, r3.x
-mul.f r3.y, r6.w, r1.z
-mul.f r1.z, r2.x, r1.z
-nop
-mul.f r0.w, r0.w, r3.x
-mov.f32f32 r2.x, r3.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-nop
-mad.f32 r0.w, r1.x, r2.x, r0.w
-(rpt2)nop
-mov.f32f32 r0.w, r0.w
-nop
-mad.f32 r0.w, r1.y, r1.z, r0.w
-(rpt2)nop
-mov.f32f32 r0.w, r0.w
-(rpt5)nop
-log2 r0.w, r0.w
-(ss)mul.f r0.w, c15.w, r0.w
-(rpt2)nop
-mov.f32f32 r0.w, r0.w
-(rpt5)nop
-exp2 r0.w, r0.w
-(ss)mad.f32 r1.x, r0.z, r2.z, r0.w
-mad.f32 r0.x, r0.z, r0.x, r0.w
-mad.f32 r1.y, r0.z, r2.y, r0.w
-mad.f32 r0.y, r0.z, r0.y, r0.w
-sel.b32 r0.z, r5.x, r1.w, r1.x
-sel.b32 r0.x, r10.w, r1.w, r0.x
-(ss)sel.b32 r0.w, r13.x, r1.w, r1.y
-sel.b32 r0.y, r2.w, r1.w, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.y
+(rpt1)nop
+exp2 r0.x, r0.x
+(ss)mad.f32 r1.y, r3.x, r1.w, r0.x
+mad.f32 r2.x, r3.x, r2.x, r0.x
+mad.f32 r2.y, r3.x, r2.y, r0.x
+(ss)mad.f32 r0.x, r1.z, r0.y, r0.x
+sel.b32 r1.w, r2.z, r0.z, r1.y
+sel.b32 r1.z, r0.w, r0.z, r2.x
+sel.b32 r1.y, r1.x, r0.z, r2.y
+sel.b32 r1.x, r2.w, r0.z, r0.x
end
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 1344 instructions, 0 half, 21 full
+; FRAG: 934 instructions, 0 half, 17 full
diff --git a/reference/gmaps-frag.asm b/reference/gmaps-frag.asm
index a658d94..b3a1bb0 100644
--- a/reference/gmaps-frag.asm
+++ b/reference/gmaps-frag.asm
@@ -6,79 +6,57 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x3f000000, 0x3f800000, 0x40000000
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 6, r0.x
-bary.f r1.x, 7, r0.x
-bary.f r1.y, 5, r0.x
+bary.f r1.x, 6, r0.x
+bary.f r1.y, 7, r0.x
+bary.f r0.w, 5, r0.x
mul.f r1.z, c0.x, r0.z
-mov.f32f32 r2.x, r0.z
-cmps.f.lt r2.z, c1.x, r0.w
-mul.f r1.w, c0.x, r1.y
-mov.f32f32 r2.y, r1.y
-min.f r0.w, r0.w, r1.x
-cov.u32f32 r2.z, r2.z
-cmps.f.lt r1.x, c1.x, r1.x
-mul.f r2.w, c1.w, r0.z
-add.f r0.w, r0.w, c1.y
-dsy (f32)(xy)r3.x, r1.z
-(sy)(ss)mov.f32f32 r1.z, r3.x
-mov.f32f32 r1.w, r3.y
-dsx (f32)(xy)r3.x, r2.x
-cov.u32f32 r1.x, r1.x
-(sy)(ss)mov.f32f32 r2.x, r3.x
-mad.f32 r1.z, r2.w, r1.z, (neg)r1.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r3.y
-mul.f r1.x, r2.z, r1.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.w, r2.w, r2.x, (neg)r1.w
-min.f r0.w, r0.w, c1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
+mul.f r2.x, c1.w, r0.z
+cmps.f.lt r2.y, c1.x, r1.x
+mul.f r1.w, c0.x, r0.w
+min.f r1.x, r1.x, r1.y
+cmps.f.lt r1.y, c1.x, r1.y
+dsx (f32)(xy)r2.z, r0.z
+(sy)mad.f32 r2.z, r2.x, r2.z, (neg)r2.w
+cov.u32f32 r2.y, r2.y
+add.f r1.x, r1.x, c1.y
+cov.u32f32 r1.y, r1.y
+dsy (f32)(xy)r2.w, r1.z
+(sy)(ss)mad.f32 r1.z, r2.x, r2.w, (neg)r3.x
+mov.f32f32 r1.w, r2.z
+min.f r1.x, r1.x, c1.z
+mad.f32 r0.z, r0.z, r0.z, (neg)r0.w
+mov.f32f32 r0.w, r1.z
+mul.f r1.w, r2.z, r1.w
+mul.f r1.y, r2.y, r1.y
+nop
mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, r0.z, r0.z, (neg)r1.y
-bary.f r1.y, 3, r0.x
-mul.f r1.w, r1.w, r1.w
-cmps.f.ne r1.x, r1.x, c1.x
-mad.f32 r1.z, r1.z, r1.z, r1.w
-bary.f r1.w, 2, r0.x
-bary.f r2.x, 1, r0.x
+bary.f r2.x, 3, r0.x
+bary.f r2.y, 2, r0.x
+bary.f r2.z, 1, r0.x
+mad.f32 r0.w, r1.z, r0.w, r1.w
+cmps.f.ne r1.y, r1.y, c1.x
bary.f (ei)r0.x, 0, r0.x
-mov.f32f32 r0.y, r1.z
-(rpt5)nop
-rsq r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mul.f r0.y, r0.z, r0.y
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
+(rpt3)nop
+rsq r0.y, r0.w
+(ss)mul.f r0.y, r0.z, r0.y
(rpt2)nop
add.f r0.y, c1.y, (neg)r0.y
(rpt2)nop
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
max.f r0.y, r0.y, c1.x
(rpt2)nop
min.f r0.y, r0.y, c1.z
(rpt2)nop
-mov.f32f32 r0.y, r0.y
+sel.b32 r0.y, r1.x, r1.y, r0.y
(rpt2)nop
-sel.b32 r0.y, r0.w, r1.x, r0.y
-(rpt2)nop
-mul.f r0.z, r1.y, r0.y
-mul.f r0.w, r1.w, r0.y
-mul.f r1.x, r2.x, r0.y
-mul.f r0.x, r0.x, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
+mul.f r1.w, r2.x, r0.y
+mul.f r1.z, r2.y, r0.y
+mul.f r1.y, r2.z, r0.y
+mul.f r1.x, r0.x, r0.y
end
-nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r0.y (5:21,cm=f,il=12,b=1)
-; FRAG: 92 instructions, 0 half, 4 full
+; FRAG: inputs: r1.y (5:20,cm=f,il=8,b=1) r0.z (5:21,cm=f,il=12,b=1)
+; FRAG: 61 instructions, 0 half, 4 full
diff --git a/reference/idiv-vert.asm b/reference/idiv-vert.asm
index 292cf79..bb5adce 100644
--- a/reference/idiv-vert.asm
+++ b/reference/idiv-vert.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r0.x) in0
+@in(r0.y) in1
+@in(r0.z) in2
+@in(r0.w) in3
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -12,90 +12,89 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)cov.s32f32 r0.x, c1.x
-cov.s32f32 r0.y, c1.y
-mov.f32f32 r0.z, c0.y
-mov.f32f32 r0.w, c0.x
-absneg.f r0.x, (abs)r0.x
-absneg.f r0.y, (abs)r0.y
-xor.b r0.z, r0.z, c1.y
-xor.b r0.w, r0.w, c1.x
+@const(c3.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c4.x) 0xfffffffe, 0x0000001f, 0x00000000, 0x00000000
+(sy)(ss)cov.s32f32 r1.x, c1.x
+cov.s32f32 r1.y, c1.y
+mov.f32f32 r1.z, c0.y
+mov.f32f32 r1.w, c0.x
+absneg.f r1.x, (abs)r1.x
+absneg.f r1.y, (abs)r1.y
+xor.b r1.z, r1.z, c1.y
+xor.b r1.w, r1.w, c1.x
cov.s32f32 r2.x, c0.y
cov.s32f32 r2.y, c0.x
mov.f32f32 r2.z, c3.y
-rcp r0.x, r0.x
-(ss)add.u r0.x, r0.x, c4.x
-rcp r0.y, r0.y
-(ss)add.u r2.w, r0.y, c4.x
-(ss)absneg.f r0.y, (abs)r2.y
+rcp r1.x, r1.x
+(ss)add.u r1.x, r1.x, c4.x
+rcp r1.y, r1.y
+(ss)add.u r1.y, r1.y, c4.x
+absneg.f r2.y, (abs)r2.y
absneg.f r2.x, (abs)r2.x
-shr.b r2.y, r0.z, c4.y
-shr.b r3.x, r0.w, c4.y
-mul.f r0.y, r0.y, r0.x
-mul.f r0.z, r2.x, r2.w
-mov.f32f32 r2.x, c3.y
-mov.f32f32 r3.y, c3.x
-cov.f32s32 r3.z, r0.y
-absneg.s r3.w, (abs)c1.x
-cov.f32s32 r4.x, r0.z
-absneg.s r4.y, (abs)c1.y
-mov.f32f32 r4.z, c3.x
-mull.u r0.y, r3.z, r3.w
-mov.f32f32 r4.w, c3.x
-madsh.m16 r0.y, r3.z, r3.w, r0.y
-mull.u r0.z, r4.x, r4.y
-madsh.m16 r0.y, r3.w, r3.z, r0.y
-absneg.s r5.x, (abs)c0.x
-madsh.m16 r0.z, r4.x, r4.y, r0.z
+shr.b r1.z, r1.z, c4.y
+shr.b r1.w, r1.w, c4.y
+mul.f r2.y, r2.y, r1.x
+mul.f r2.x, r2.x, r1.y
+mov.f32f32 r2.w, c3.y
+mov.f32f32 r3.x, c3.x
+cov.f32s32 r2.y, r2.y
+absneg.s r3.y, (abs)c1.x
+cov.f32s32 r2.x, r2.x
+absneg.s r3.z, (abs)c1.y
+mov.f32f32 r3.w, c3.x
+mull.u r4.x, r2.y, r3.y
+mov.f32f32 r4.y, c3.x
+madsh.m16 r4.x, r2.y, r3.y, r4.x
+mull.u r4.z, r2.x, r3.z
+madsh.m16 r4.x, r3.y, r2.y, r4.x
+absneg.s r4.w, (abs)c0.x
+madsh.m16 r4.z, r2.x, r3.z, r4.z
+mov.f32f32 r5.x, c3.y
mov.f32f32 r5.y, c3.y
-mov.f32f32 r5.z, c3.y
-sub.u r0.y, r5.x, r0.y
-madsh.m16 r5.w, r4.y, r4.x, r0.z
-absneg.s r6.x, (abs)c0.y
-mov.f32f32 r6.y, c3.x
-cov.u32f32 r6.z, r0.y
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.z, r1.z
-mov.f32f32 r0.y, r1.y
-mul.f r1.y, r6.z, r0.x
-sub.u r1.z, r6.x, r5.w
-mov.f32f32 r0.x, r1.x
-nop
-cov.f32u32 r1.x, r1.y
-cov.u32f32 r1.y, r1.z
+sub.u r4.x, r4.w, r4.x
+madsh.m16 r4.z, r3.z, r2.x, r4.z
+absneg.s r5.z, (abs)c0.y
+mov.f32f32 r5.w, c3.x
+cov.u32f32 r4.x, r4.x
+(rpt2)nop
+mul.f r1.x, r4.x, r1.x
+sub.u r4.x, r5.z, r4.z
(rpt1)nop
-add.u r1.x, r3.z, r1.x
-mul.f r1.y, r1.y, r2.w
+cov.f32u32 r1.x, r1.x
+cov.u32f32 r4.x, r4.x
(rpt1)nop
-mull.u r1.z, r1.x, r3.w
+add.u r1.x, r2.y, r1.x
+mul.f r1.y, r4.x, r1.y
+(rpt1)nop
+mull.u r2.y, r1.x, r3.y
cov.f32u32 r1.y, r1.y
-madsh.m16 r1.z, r1.x, r3.w, r1.z
+madsh.m16 r2.y, r1.x, r3.y, r2.y
nop
-madsh.m16 r1.z, r3.w, r1.x, r1.z
-add.u r1.y, r4.x, r1.y
+madsh.m16 r2.y, r3.y, r1.x, r2.y
+add.u r1.y, r2.x, r1.y
(rpt1)nop
-sub.u r1.z, r5.x, r1.z
-mull.u r1.w, r1.y, r4.y
+sub.u r2.x, r4.w, r2.y
+mull.u r2.y, r1.y, r3.z
(rpt1)nop
-cmps.u.ge r1.z, r1.z, r3.w
-madsh.m16 r1.w, r1.y, r4.y, r1.w
+cmps.u.ge r2.x, r2.x, r3.y
+madsh.m16 r2.y, r1.y, r3.z, r2.y
(rpt1)nop
-add.u r1.x, r1.x, r1.z
-madsh.m16 r1.z, r4.y, r1.y, r1.w
+add.u r1.x, r1.x, r2.x
+madsh.m16 r2.x, r3.z, r1.y, r2.y
(rpt1)nop
-absneg.s r1.w, (neg)r1.x
-sub.u r1.z, r6.x, r1.z
+absneg.s r2.y, (neg)r1.x
+sub.u r2.x, r5.z, r2.x
(rpt1)nop
-sel.b32 r1.x, r1.w, r3.x, r1.x
-cmps.u.ge r1.z, r1.z, r4.y
+sel.b32 r1.x, r2.y, r1.w, r1.x
+cmps.u.ge r1.w, r2.x, r3.z
(rpt1)nop
cmps.u.eq r1.x, r1.x, c2.x
-add.u r1.y, r1.y, r1.z
+add.u r1.y, r1.y, r1.w
(rpt1)nop
absneg.s r1.x, (neg)r1.x
-absneg.s r1.z, (neg)r1.y
+absneg.s r1.w, (neg)r1.y
(rpt2)nop
-sel.b32 r1.y, r1.z, r2.y, r1.y
+sel.b32 r1.y, r1.w, r1.z, r1.y
(rpt2)nop
cmps.u.eq r1.y, r1.y, c2.y
(rpt2)nop
@@ -105,18 +104,13 @@ and.b r1.x, r1.x, r1.y
(rpt2)nop
cmps.u.ne r1.x, r1.x, c3.x
(rpt2)nop
-sel.b32 r1.y, r2.x, r1.x, r2.z
-sel.b32 r1.z, r4.z, r1.x, r3.y
-sel.b32 r2.x, r5.y, r1.x, r4.w
-sel.b32 r1.x, r6.y, r1.x, r5.z
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r1.x, r1.x
+sel.b32 r1.w, r2.w, r1.x, r2.z
+sel.b32 r1.z, r3.w, r1.x, r3.x
+sel.b32 r1.y, r5.x, r1.x, r4.y
+sel.b32 r1.x, r5.w, r1.x, r5.y
end
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0)
-; VERT: 123 instructions, 0 half, 7 full
+; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0)
+; VERT: 119 instructions, 0 half, 6 full
diff --git a/reference/jellyfish-frag.asm b/reference/jellyfish-frag.asm
index 38d65d6..e29f320 100644
--- a/reference/jellyfish-frag.asm
+++ b/reference/jellyfish-frag.asm
@@ -6,411 +6,226 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c8.x) 0x3daa9931, 0x3caa64c3, 0x3c2c0831, 0xbf800000
+@const(c9.x) 0x3e4ccccd, 0x3e99999a, 0x3f000000, 0xbf333333
+@const(c10.x) 0x41200000, 0xbdcccccd, 0xc0200000, 0x3fcccccd
+@const(c11.x) 0x40000000, 0x00000000, 0x3f800000, 0x3eaaa64c
+@const(c12.x) 0x40400000, 0x40800000, 0x40a00000, 0x40c00000
+@const(c13.x) 0x40e00000, 0x41000000, 0x00000000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 8, r0.x
-bary.f r0.w, 2, r0.x
-bary.f r1.x, 1, r0.x
-bary.f r1.y, 11, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.z, r0.z
-add.f r0.w, r0.w, (neg)r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r1.y, r1.z
-bary.f r1.z, 9, r0.x
-mul.f r0.w, r0.w, c8.z
-mov.f32f32 r1.w, c2.x
+mov.f32f32 r1.x, c7.x
+bary.f r1.w, 11, r0.x
+bary.f r1.y, 2, r0.x
mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.z, r1.z
-mov.f32f32 r0.w, r0.w
-mul.f r1.w, r1.w, c2.w
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.z, r0.w
-add.f r0.w, r1.x, r1.w
-bary.f r2.z, 12, r0.x
-mov.f32f32 r2.w, c8.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.x, c2.y
-mov.f32f32 r0.z, r0.z
+bary.f r0.w, 9, r0.x
+cmps.f.eq r1.x, r1.x, c13.y
+mov.f32f32 r1.z, r1.w
+mov.f32f32 r2.z, c2.x
+mov.f32f32 r2.y, r0.w
+cov.u32f32 r1.x, r1.x
+bary.f r2.w, 1, r0.x
+mul.f r2.z, r2.z, c2.w
+mov.f32f32 r3.x, c8.w
+cmps.f.ne r1.x, r1.x, c11.y
mov.f32f32 r3.y, c8.w
-samb (f32)(xyzw)r3.z, r1.y, r2.w, s#2, t#2
-(sy)(ss)add.f r1.y, (neg)r4.y, c10.y
-mov.f32f32 r1.z, c6.x
-mov.f32f32 r4.w, r0.z
-bary.f r0.z, 0, r0.x
-mul.f r2.w, r3.x, c2.w
-mul.f r1.z, r1.z, c9.x
-samb (f32)(xyzw)r5.x, r2.x, r3.y, s#0, t#0
-(ss)add.f r2.x, r4.y, c6.x
-mul.f r0.z, r0.z, c8.y
-mov.f32f32 r2.y, c8.x
-add.f r1.y, r1.y, (neg)r1.z
-add.f r3.x, r2.z, r2.w
-(sy)mov.f32f32 r3.y, r5.w
-mad.f32 r0.z, c0.x, r2.y, r0.z
-mov.f32f32 r1.y, r1.y
-add.f r0.w, r0.w, r3.x
-bary.f r2.y, 13, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.x, c7.x
-mul.f r2.x, r2.x, c9.y
-mov.f32f32 r5.w, r5.w
+add.f r3.z, r1.z, r2.z
+mov.f32f32 r3.w, c2.y
+add.f r1.y, r1.y, (neg)r2.w
+mov.f32f32 r2.w, c6.x
+samb (f32)(xyzw)r4.x, r2.x, r3.x, s#2, t#2
+(sy)(ss)add.f r2.x, (neg)r4.w, c10.y
+mul.f r2.y, r3.w, c2.w
+bary.f r3.x, 12, r0.x
+mul.f r2.w, r2.w, c9.x
+samb (f32)(xyzw)r5.x, r0.z, r3.y, s#0, t#0
+(ss)mov.f32f32 r0.z, c11.z
+mul.f r6.y, r1.y, c8.z
+add.f r0.w, r4.w, c6.x
+add.f r1.y, r2.x, (neg)r2.w
+(sy)sel.b32 r0.z, r0.z, r1.x, r5.w
+add.f r2.x, r3.x, r2.y
+mov.f32f32 r3.y, c7.x
+bary.f r3.w, 0, r0.x
+mul.f r0.w, r0.w, c9.y
+mov.f32f32 r6.z, r5.w
sin r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, c2.z
-mul.f r1.y, r1.y, r3.z
-mov.f32f32 r0.z, r0.z
-cmps.f.eq r3.x, r3.x, c13.y
-mul.f r3.z, r4.z, c2.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r4.z, r0.z
-cov.u32f32 r0.z, r3.x
-add.f r3.x, r2.y, r3.z
+(ss)mul.f r1.y, r1.y, r4.x
+cmps.f.eq r3.y, r3.y, c13.x
+add.f r2.x, r3.z, r2.x
+mov.f32f32 r3.z, c2.z
mul.f r1.y, r1.y, c10.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r5.w, r5.w
-add.f r0.w, r0.w, r3.x
-mov.f32f32 r1.y, r1.y
-add.f r3.x, (neg)r4.y, c9.w
-sam (f32)(xyz)r6.x, r4.z, s#1, t#1
-(sy)(ss)mad.f32 r4.z, c1.w, r6.y, r2.z
-mad.f32 r4.w, c1.w, r6.x, r1.x
-mad.f32 r6.w, c1.w, r6.z, r2.y
-add.f r3.x, r3.x, (neg)r1.z
-add.f r4.z, r4.z, r2.w
-add.f r4.w, r4.w, r1.w
-add.f r6.w, r6.w, r3.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r6.y, r6.y
-bary.f r7.x, 16, r0.x
-sin r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-bary.f r7.y, 15, r0.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r7.z, r7.x
-mul.f r3.x, r3.x, r3.w
-mad.f32 r3.w, r6.w, r5.z, r7.z
-mov.f32f32 r6.w, r7.y
-bary.f r7.w, 14, r0.x
+add.f r4.x, (neg)r4.w, c9.w
+cov.u32f32 r3.y, r3.y
+mul.f r3.z, r3.z, c2.w
+bary.f r6.w, 13, r0.x
+add.f r4.x, r4.x, (neg)r2.w
+cmps.f.ne r3.y, r3.y, c11.y
+mov.f32f32 r6.x, c11.z
+add.f r7.x, r6.w, r3.z
+mul.f r3.w, r3.w, c8.y
+sin r0.w, r0.w
+mov.f32f32 r7.y, c8.x
+sel.b32 r0.z, r6.x, r3.y, r0.z
+sin r4.x, r4.x
+(ss)mul.f r4.x, r4.x, r4.y
+mov.f32f32 r4.y, c7.x
+add.f r2.x, r2.x, r7.x
+mad.f32 r6.x, c0.x, r7.y, r3.w
+mad.f32 r1.y, c10.x, r4.x, r1.y
+add.f r3.w, (neg)r4.w, c10.z
+cmps.f.eq r4.x, r4.y, c12.w
+mul.f r2.x, r2.x, c11.w
+mul.f r0.w, r0.w, c9.z
+add.f r3.w, r3.w, (neg)r2.w
+cov.u32f32 r4.x, r4.x
+add.f r2.x, c11.z, (neg)r2.x
+sam (f32)(xyz)r7.x, r6.x, s#1, t#1
+mov.f32f32 r4.y, r0.w
mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.w
-mad.f32 r4.z, r4.z, r5.y, r6.w
-mov.f32f32 r8.x, r7.w
-mad.f32 r1.y, c10.x, r3.x, r1.y
-bary.f r3.x, 7, r0.x
-mov.f32f32 r4.z, r4.z
-mad.f32 r4.w, r4.w, r5.x, r8.x
-mov.f32f32 r1.y, r1.y
-add.f r4.y, (neg)r4.y, c10.z
-add.f r8.y, c14.y, (neg)r3.x
-add.f r8.z, c14.y, (neg)r3.x
-mov.f32f32 r4.w, r4.w
-add.f r4.y, r4.y, (neg)r1.z
-mul.f r3.w, r8.y, r3.w
-mul.f r4.z, r8.z, r4.z
-add.f r8.y, c14.y, (neg)r3.x
-mov.f32f32 r4.y, r4.y
-bary.f r8.z, 6, r0.x
-bary.f r8.w, 5, r0.x
-mul.f r4.w, r8.y, r4.w
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r6.y, r6.y
-bary.f r8.y, 4, r0.x
-sin r4.y, r4.y
-(ss)mov.f32f32 r4.y, r4.y
-mul.f r8.z, r3.x, r8.z
-mul.f r8.w, r3.x, r8.w
-mul.f r3.x, r3.x, r8.y
-mul.f r4.x, r4.y, r4.x
-add.f r3.w, r8.z, r3.w
-add.f r4.y, r8.w, r4.z
-add.f r3.x, r3.x, r4.w
-mov.f32f32 r4.x, r4.x
-add.f r1.z, c10.w, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.z, r6.z
-mov.f32f32 r4.w, r6.y
-mov.f32f32 r1.z, r1.z
-mul.f r0.w, r0.w, c11.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.z, r4.z
-cos r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.x, r6.x
-add.f r0.w, c11.z, (neg)r0.w
-mul.f r1.z, r7.x, r1.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r1.z, r1.z
-add.f r6.y, r7.w, r7.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.w, r4.w
-mad.f32 r1.z, c11.x, r1.z, r6.y
-mov.f32f32 r6.x, r6.x
-cmps.f.ne r0.z, r0.z, c11.y
-mov.f32f32 r6.y, c11.z
-mov.f32f32 r1.z, r1.z
-log2 r0.w, r0.w
-(ss)mul.f r0.w, c12.x, r0.w
-mad.f32 r1.z, c10.x, r4.x, r1.z
-mov.f32f32 r4.x, r6.x
-mov.f32f32 r6.x, r6.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.x, r4.x
-sel.b32 r3.y, r6.x, r0.z, r3.y
-mov.f32f32 r6.x, c7.x
-add.f r1.y, r1.y, r1.z
-mov.f32f32 r1.z, r4.x
-exp2 r0.w, r0.w
-(ss)add.f r4.x, c14.y, (neg)r0.w
-add.f r6.y, c14.y, (neg)r0.w
-mov.f32f32 r1.y, r1.y
-add.f r6.z, c14.y, (neg)r0.w
-(rpt1)nop
+cmps.f.ne r4.x, r4.x, c11.y
+mov.f32f32 r4.w, c11.z
+sin r3.w, r3.w
+(ss)mul.f r3.w, r3.w, r4.z
+add.f r2.w, c10.w, r2.w
+log2 r2.x, r2.x
+(ss)mul.f r2.x, c12.x, r2.x
+sel.b32 r0.z, r4.w, r4.x, r0.z
+mov.f32f32 r4.z, c7.x
+mov.f32f32 r4.w, r6.w
+mul.f r6.x, r5.z, r4.y
+(sy)mad.f32 r6.y, c1.w, r7.z, r4.w
+cos r2.w, r2.w
+bary.f r6.w, 16, r0.x
+cmps.f.eq r4.z, r4.z, c12.z
+exp2 r2.x, r2.x
+(ss)add.f r7.w, c14.y, (neg)r2.x
+add.f r8.x, c14.y, (neg)r2.x
+(ss)mul.f r2.w, r6.w, r2.w
+bary.f r8.y, 14, r0.x
+bary.f r8.z, 15, r0.x
+cov.u32f32 r4.z, r4.z
+add.f r6.y, r6.y, r3.z
+mad.f32 r8.w, c1.w, r7.y, r3.x
+add.f r9.x, r8.y, r8.z
+cmps.f.ne r4.z, r4.z, c11.y
+mad.f32 r2.w, c11.x, r2.w, r9.x
+mov.f32f32 r9.x, c11.z
+mad.f32 r2.w, c10.x, r3.w, r2.w
+mov.f32f32 r3.w, r6.w
+add.f r8.w, r8.w, r2.y
+add.f r9.y, c14.y, (neg)r2.x
+add.f r1.y, r1.y, r2.w
+sel.b32 r0.z, r9.x, r4.z, r0.z
+mad.f32 r2.w, r6.y, r5.z, r3.w
+mov.f32f32 r3.w, r8.z
max.f r1.y, r1.y, c11.y
-mul.f r6.y, r6.y, r3.w
-mul.f r4.x, r4.x, r4.y
-mul.f r6.z, r6.z, r3.x
-mov.f32f32 r1.y, r1.y
-sin r2.x, r2.x
-(ss)mov.f32f32 r2.x, r2.x
-cmps.f.eq r6.x, r6.x, c13.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r7.x, r5.z
-mul.f r2.x, r2.x, c9.z
-cov.u32f32 r6.x, r6.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r2.x, r2.x
-cmps.f.ne r6.x, r6.x, c11.y
-mov.f32f32 r7.y, c11.z
-mov.f32f32 r5.w, r5.w
-mul.f r5.z, r5.z, r2.x
-mul.f r7.w, r5.y, r2.x
-mul.f r2.x, r5.x, r2.x
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r2.x, r2.x
-sel.b32 r3.y, r7.y, r6.x, r3.y
-mul.f r5.z, r5.z, r1.y
-mul.f r7.y, r7.w, r1.y
-mul.f r1.y, r2.x, r1.y
-mov.f32f32 r2.x, c7.x
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r1.y, r1.y
-cmps.f.eq r2.x, r2.x, c12.w
-mov.f32f32 r7.w, r5.z
-mov.f32f32 r8.y, r7.y
-mov.f32f32 r8.z, r1.y
-add.f r3.w, r3.w, r5.z
-mov.f32f32 r5.z, r7.w
-mov.f32f32 r7.w, r8.y
-mov.f32f32 r8.y, r8.z
-mul.f r3.w, r0.w, r3.w
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r8.y, r8.y
-add.f r3.w, r3.w, r6.y
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.y, r7.w
-mov.f32f32 r7.w, r8.y
-add.f r4.y, r4.y, r7.y
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r7.y, r7.w
-mul.f r4.y, r0.w, r4.y
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r7.y, r7.y
-add.f r4.x, r4.y, r4.x
-mov.f32f32 r4.y, r5.z
-mov.f32f32 r5.z, r6.y
-mov.f32f32 r6.y, r7.y
-add.f r1.y, r3.x, r1.y
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r4.y, r5.z
-mov.f32f32 r5.z, r6.y
+mul.f r0.w, r5.x, r0.w
+mov.f32f32 r6.y, c7.x
+bary.f r9.x, 7, r0.x
+mov.f32f32 r9.z, r1.y
+mul.f r4.y, r5.y, r4.y
mul.f r0.w, r0.w, r1.y
-mov.f32f32 r1.y, r3.x
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r4.y, r5.z
-add.f r0.w, r0.w, r6.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.x, r3.x
+cmps.f.eq r1.y, r6.y, c12.y
+mul.f r6.x, r6.x, r9.z
+mul.f r4.y, r4.y, r9.z
+mov.f32f32 r6.y, r0.w
+mad.f32 r1.z, c1.w, r7.x, r1.z
+mov.f32f32 r9.z, r6.x
+add.f r9.w, c14.y, (neg)r9.x
+mov.f32f32 r10.x, r4.y
+mad.f32 r3.w, r8.w, r5.y, r3.w
+add.f r1.z, r1.z, r2.z
+mul.f r2.w, r9.w, r2.w
+bary.f r8.w, 6, r0.x
+add.f r9.w, c14.y, (neg)r9.x
+mov.f32f32 r10.y, r8.y
+cov.u32f32 r1.y, r1.y
+mul.f r8.w, r9.x, r8.w
+mul.f r3.w, r9.w, r3.w
+bary.f r9.w, 5, r0.x
+mad.f32 r1.z, r1.z, r5.x, r10.y
+add.f r2.w, r8.w, r2.w
+add.f r8.w, c14.y, (neg)r9.x
+mul.f r9.w, r9.x, r9.w
+cmps.f.ne r1.y, r1.y, c11.y
+add.f r9.z, r2.w, r9.z
+mul.f r1.z, r8.w, r1.z
+add.f r3.w, r9.w, r3.w
+bary.f r8.w, 4, r0.x
+mul.f r9.z, r2.x, r9.z
+mul.f r2.w, r8.x, r2.w
+add.f r8.x, r3.w, r10.x
+mul.f r8.w, r9.x, r8.w
+mov.f32f32 r9.x, c11.z
+add.f r2.w, r9.z, r2.w
+mul.f r8.x, r2.x, r8.x
+mul.f r3.w, r7.w, r3.w
+add.f r1.z, r8.w, r1.z
+sel.b32 r2.w, r6.x, r1.x, r2.w
+mov.f32f32 r6.x, r6.z
+add.f r3.w, r8.x, r3.w
+add.f r6.y, r1.z, r6.y
+sel.b32 r0.z, r9.x, r1.y, r0.z
+mov.f32f32 r6.x, r6.x
+sel.b32 r3.w, r4.y, r1.x, r3.w
+mul.f r2.x, r2.x, r6.y
+mul.f r1.z, r9.y, r1.z
+mov.f32f32 r4.y, r6.x
+mov.f32f32 r6.x, c7.x
+mov.f32f32 r6.y, c7.x
+mov.f32f32 r6.z, c7.x
mov.f32f32 r4.y, r4.y
-cov.u32f32 r2.x, r2.x
-sel.b32 r1.y, r1.y, r0.z, r3.w
-mov.f32f32 r3.w, r5.w
-sel.b32 r3.x, r3.x, r0.z, r4.x
-sel.b32 r0.z, r4.y, r0.z, r0.w
-cmps.f.ne r0.w, r2.x, c11.y
-mov.f32f32 r2.x, r3.w
-mov.f32f32 r4.x, r3.w
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.y, c11.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r3.w, r3.w
+add.f r1.z, r2.x, r1.z
+cmps.f.eq r2.x, r6.x, c12.x
+cmps.f.eq r6.x, r6.y, c11.x
mov.f32f32 r4.y, r4.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r3.w, r3.w
-sel.b32 r3.y, r4.y, r0.w, r3.y
-sel.b32 r1.y, r2.x, r6.x, r1.y
-mov.f32f32 r2.x, r7.x
-sel.b32 r3.x, r4.x, r6.x, r3.x
-sel.b32 r0.z, r3.w, r6.x, r0.z
-mov.f32f32 r3.w, r5.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.x, r5.x
-mov.f32f32 r4.y, c7.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.x, r4.x
-cmps.f.eq r4.y, r4.y, c12.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.x, r4.x
-cov.u32f32 r4.y, r4.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.x, r4.x
-cmps.f.ne r4.y, r4.y, c11.y
-mov.f32f32 r5.x, c11.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.x, r5.x
-sel.b32 r1.y, r2.x, r0.w, r1.y
-mov.f32f32 r2.x, r3.w
-mov.f32f32 r3.w, r4.x
-sel.b32 r3.y, r5.x, r4.y, r3.y
-sel.b32 r1.y, r4.z, r4.y, r1.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.x, c7.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.w, r3.w
-cmps.f.eq r4.x, r4.x, c12.y
-mov.f32f32 r3.z, r3.z
-sel.b32 r2.x, r2.x, r0.w, r3.x
-sel.b32 r0.z, r3.w, r0.w, r0.z
-cov.u32f32 r0.w, r4.x
-mov.f32f32 r3.x, r3.z
-sel.b32 r2.x, r4.w, r4.y, r2.x
-sel.b32 r0.z, r1.z, r4.y, r0.z
-mov.f32f32 r1.z, r2.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r1.w, r1.w
-cmps.f.ne r0.w, r0.w, c11.y
-mov.f32f32 r3.x, c11.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.x, r3.x
-sel.b32 r1.y, r2.w, r0.w, r1.y
-mov.f32f32 r2.w, r7.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-sel.b32 r3.x, r3.x, r0.w, r3.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.y, c7.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-cmps.f.eq r3.y, r3.y, c12.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-cov.u32f32 r3.y, r3.y
-mov.f32f32 r2.w, r2.w
-sel.b32 r1.z, r1.z, r0.w, r2.x
-sel.b32 r0.z, r1.w, r0.w, r0.z
-cmps.f.ne r0.w, r3.y, c11.y
-mov.f32f32 r1.w, r6.w
-mov.f32f32 r2.x, r8.x
+sel.b32 r0.w, r0.w, r1.x, r1.z
+cov.u32f32 r1.x, r2.x
+cov.u32f32 r1.z, r6.x
+sel.b32 r2.x, r4.y, r3.y, r2.w
+sel.b32 r2.w, r4.y, r3.y, r3.w
+sel.b32 r0.w, r5.w, r3.y, r0.w
+cmps.f.ne r1.x, r1.x, c11.y
+sel.b32 r2.x, r5.z, r4.x, r2.x
+sel.b32 r2.w, r5.y, r4.x, r2.w
+sel.b32 r0.w, r5.x, r4.x, r0.w
mov.f32f32 r3.y, c11.z
-sel.b32 r1.y, r2.w, r0.w, r1.y
-mov.f32f32 r2.w, c7.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.y
-cmps.f.eq r2.w, r2.w, c11.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-sel.b32 r3.x, r3.y, r0.w, r3.x
-cov.u32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-cmps.f.ne r2.w, r2.w, c11.y
-bary.f r3.y, 19, r0.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, c11.z
-mov.f32f32 r3.y, r3.y
-sel.b32 r1.z, r1.w, r0.w, r1.z
-sel.b32 r0.z, r2.x, r0.w, r0.z
-bary.f r0.w, 18, r0.x
-mov.f32f32 r1.w, r3.y
+sel.b32 r2.x, r7.z, r4.z, r2.x
+sel.b32 r2.w, r7.y, r4.z, r2.w
+sel.b32 r0.w, r7.x, r4.z, r0.w
+sel.b32 r0.z, r3.y, r1.x, r0.z
+sel.b32 r2.x, r3.z, r1.y, r2.x
+sel.b32 r2.y, r2.y, r1.y, r2.w
+sel.b32 r0.w, r2.z, r1.y, r0.w
+cmps.f.ne r2.z, r1.z, c11.y
+sel.b32 r1.y, r6.w, r1.x, r2.x
+bary.f r1.z, 19, r0.x
+sel.b32 r2.x, r8.z, r1.x, r2.y
+bary.f r2.y, 18, r0.x
+sel.b32 r0.w, r8.y, r1.x, r0.w
+sel.b32 r1.x, r1.z, r2.z, r1.y
+cmps.f.eq r1.y, r6.z, c11.z
+sel.b32 r2.x, r2.y, r2.z, r2.x
+(rpt1)nop
+cov.u32f32 r1.y, r1.y
bary.f (ei)r0.x, 17, r0.x
-mov.f32f32 r0.y, r3.z
-mov.f32f32 r2.x, r2.y
-sel.b32 r1.y, r1.w, r2.w, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r2.x, c7.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.y, r0.y, r2.w, r3.x
-cmps.f.eq r2.x, r2.x, c11.z
-sel.b32 r0.w, r0.w, r2.w, r1.z
-sel.b32 r0.x, r0.x, r2.w, r0.z
-mov.f32f32 r0.z, r2.z
-cov.u32f32 r1.z, r2.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.x, c11.z
-nop
-cmps.f.ne r2.y, r1.z, c11.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r2.x
-sel.b32 r1.y, r1.w, r2.y, r1.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-sel.b32 r0.y, r1.z, r2.y, r0.y
-mov.f32f32 r1.z, r1.y
-sel.b32 r0.z, r0.z, r2.y, r0.w
-sel.b32 r0.x, r1.x, r2.y, r0.x
+mov.f32f32 r0.y, c11.z
+mov.f32f32 r2.y, c11.z
+cmps.f.ne r2.w, r1.y, c11.y
+sel.b32 r0.x, r0.x, r2.z, r0.w
(rpt1)nop
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r1.w, r0.y
+sel.b32 r1.z, r4.w, r2.w, r1.x
+sel.b32 r1.y, r3.x, r2.w, r2.x
+sel.b32 r1.x, r1.w, r2.w, r0.x
+sel.b32 r0.x, r0.y, r2.z, r0.z
+(rpt2)nop
+sel.b32 r1.w, r2.y, r2.w, r0.x
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r2.y (5:21,cm=f,il=12,b=1) r0.z (5:22,cm=f,il=16,b=1) r6.z (5:23,cm=f,il=20,b=1) r2.z (5:24,cm=f,il=24,b=1)
-; FRAG: 403 instructions, 0 half, 9 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r8.y (5:21,cm=f,il=12,b=1) r1.x (5:22,cm=f,il=16,b=1) r7.w (5:23,cm=f,il=20,b=1) r0.w (5:24,cm=f,il=24,b=1)
+; FRAG: 213 instructions, 0 half, 11 full
diff --git a/reference/maniadrive/maniadrive-01.asm b/reference/maniadrive/maniadrive-01.asm
index 2c03e4f..284e180 100644
--- a/reference/maniadrive/maniadrive-01.asm
+++ b/reference/maniadrive/maniadrive-01.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/maniadrive/maniadrive-02.asm b/reference/maniadrive/maniadrive-02.asm
index 31895e9..ac30adb 100644
--- a/reference/maniadrive/maniadrive-02.asm
+++ b/reference/maniadrive/maniadrive-02.asm
@@ -6,43 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 8, r0.x
-bary.f r0.w, 11, r0.x
-bary.f r1.x, 9, r0.x
+bary.f r0.w, 9, r0.x
+bary.f r1.x, 11, r0.x
bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
+bary.f r1.w, 1, r0.x
+bary.f r2.x, 0, r0.x
bary.f r1.z, 6, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-add.f r0.z, r1.y, r1.z
-bary.f r0.w, 1, r0.x
-bary.f r1.x, 0, r0.x
-bary.f r1.y, 5, r0.x
+bary.f r2.y, 5, r0.x
bary.f r2.z, 4, r0.x
+sam.p (f32)(w)r2.w, r0.z, s#0, t#0
bary.f (ei)r0.x, 3, r0.x
-sam.p (f32)(w)r2.w, r1.w, s#0, t#0
-mov.f32f32 r1.z, r0.z
-add.f r0.y, r0.w, r1.y
-add.f r0.z, r1.x, r2.z
-(sy)mul.f r0.x, r3.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-(ss)mov.f32f32 r1.w, r0.x
+add.f r1.z, r1.y, r1.z
+add.f r1.y, r1.w, r2.y
+(ss)add.f r1.x, r2.x, r2.z
+(sy)mul.f r1.w, r3.z, r0.x
end
-nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) r0.x (5:0,cm=f,il=16,b=1)
-; FRAG: 39 instructions, 0 half, 4 full
+; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) r0.y (5:0,cm=f,il=16,b=1)
+; FRAG: 16 instructions, 0 half, 4 full
diff --git a/reference/maniadrive/maniadrive-03.asm b/reference/maniadrive/maniadrive-03.asm
index 90cb336..cb658a2 100644
--- a/reference/maniadrive/maniadrive-03.asm
+++ b/reference/maniadrive/maniadrive-03.asm
@@ -1,34 +1,34 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
-@in(r3.w) in3
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r6.w) in11
-@in(r7.x) in12
-@in(r7.y) in13
-@in(r7.z) in14
-@in(r7.w) in15
-@in(r0.w) in16
-@in(r1.x) in17
-@in(r1.y) in18
-@in(r2.x) in20
-@in(r2.y) in21
-@in(r2.z) in22
-@in(r5.x) in24
-@in(r5.y) in25
-@in(r5.z) in26
+@in(r7.x) in8
+@in(r7.y) in9
+@in(r7.z) in10
+@in(r7.w) in11
+@in(r3.x) in12
+@in(r3.y) in13
+@in(r3.z) in14
+@in(r3.w) in15
+@in(r1.x) in16
+@in(r1.y) in17
+@in(r1.z) in18
+@in(r1.w) in20
+@in(r2.x) in21
+@in(r2.y) in22
+@in(r8.x) in24
+@in(r8.y) in25
+@in(r8.z) in26
@in(r4.x) in28
@in(r4.y) in29
@in(r4.z) in30
-@in(r1.z) in32
-@in(r8.x) in36
+@in(r2.z) in32
+@in(r0.w) in36
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -53,231 +53,197 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)mul.f r1.w, r3.x, c10.x
-mov.f32f32 r2.w, c18.y
-mad.f32 r1.w, c11.x, r3.y, r1.w
-mov.f32f32 r4.w, r6.w
-mad.f32 r1.w, c12.x, r3.z, r1.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r1.w, c13.x, r3.w, r1.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r5.w, c18.y
-mov.f32f32 r6.w, r6.w
-add.f r8.y, c9.x, (neg)r1.w
-max.f r1.w, r2.w, c18.x
-max.f r2.w, r4.w, c18.x
-mov.f32f32 r8.z, r5.w
-mul.f r8.w, r8.y, r8.y
-mul.f r4.w, r3.x, c10.y
-min.f r5.w, r1.w, c18.y
-mad.f32 r1.w, c11.y, r3.y, r4.w
-min.f r4.w, r2.w, c18.y
-mad.f32 r1.w, c12.y, r3.z, r1.w
-max.f r2.w, r8.z, c18.x
-mad.f32 r1.w, c13.y, r3.w, r1.w
-mov.f32f32 r6.w, r6.w
-mul.f r8.z, r3.x, c10.z
-mul.f r9.x, r0.x, c4.x
-add.f r9.y, c9.y, (neg)r1.w
-min.f r2.w, r2.w, c18.y
-max.f r1.w, r6.w, c18.x
-mad.f32 r6.w, c11.z, r3.y, r8.z
-mad.f32 r8.z, r9.y, r9.y, r8.w
-mad.f32 r6.w, c12.z, r3.z, r6.w
-mad.f32 r8.w, c4.y, r0.y, r9.x
-mul.f r9.x, r0.x, c5.x
-mov.f32f32 r8.z, r8.z
-mad.f32 r6.w, c13.z, r3.w, r6.w
-min.f r1.w, r1.w, c18.y
-mov.f32f32 r8.w, r8.w
-mad.f32 r9.x, c5.y, r0.y, r9.x
-add.f r6.w, c9.z, (neg)r6.w
-mad.f32 r8.w, c4.z, r0.z, r8.w
+@const(c18.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r2.w, r6.x, c10.x
+mul.f r4.w, r6.x, c10.y
+mad.f32 r2.w, c11.x, r6.y, r2.w
+mad.f32 r4.w, c11.y, r6.y, r4.w
+mad.f32 r2.w, c12.x, r6.z, r2.w
+mad.f32 r4.w, c12.y, r6.z, r4.w
+mad.f32 r2.w, c13.x, r6.w, r2.w
+mad.f32 r4.w, c13.y, r6.w, r4.w
+mul.f r5.x, r6.x, c10.z
+mov.f32f32 r5.y, c18.y
+add.f r2.w, c9.x, (neg)r2.w
+add.f r4.w, c9.y, (neg)r4.w
+mad.f32 r5.x, c11.z, r6.y, r5.x
+max.f r5.y, r5.y, c18.x
+mul.f r5.z, r2.w, r2.w
+mad.f32 r5.x, c12.z, r6.z, r5.x
+mad.f32 r5.z, r4.w, r4.w, r5.z
+mad.f32 r5.x, c13.z, r6.w, r5.x
+min.f r5.w, r5.y, c18.y
+mov.f32f32 r5.y, c18.y
+mul.f r8.w, r6.x, c0.w
+add.f r5.x, c9.z, (neg)r5.x
+mad.f32 r8.w, c1.w, r6.y, r8.w
+mul.f r9.x, r6.x, c0.z
+mul.f r9.y, r6.x, c0.y
+mad.f32 r5.z, r5.x, r5.x, r5.z
+max.f r5.y, r5.y, c18.x
+mad.f32 r8.w, c2.w, r6.z, r8.w
+mad.f32 r9.x, c1.z, r6.y, r9.x
+mad.f32 r9.y, c1.y, r6.y, r9.y
+mul.f r6.x, r6.x, c0.x
+absneg.f r9.z, (neg)r0.w
+rsq r0.w, (abs)r5.z
+(ss)mul.f r2.w, r2.w, r0.w
+(ss)rcp r5.z, r0.w
+(ss)mov.f32f32 r9.w, r5.z
+mul.f r10.x, r0.x, c4.x
+rsq r10.y, (abs)r0.w
+(ss)mul.f r4.w, r4.w, r10.y
+add.f r10.y, r2.w, c18.x
+mul.f r10.z, r0.w, r9.w
+mad.f32 r10.x, c4.y, r0.y, r10.x
+add.f r10.w, r4.w, c18.x
+mul.f r11.x, r10.y, r10.y
+mul.f r10.z, c14.x, r10.z
+mad.f32 r10.x, c4.z, r0.z, r10.x
+mad.f32 r11.x, r10.w, r10.w, r11.x
+rsq r11.y, (abs)r0.w
+(ss)mul.f r5.x, r5.x, r11.y
+mad.f32 r9.w, c14.y, r9.w, r10.z
+(ss)rcp r0.w, r0.w
+(ss)mul.f r5.z, r0.w, r5.z
+mul.f r10.z, r10.x, r2.w
+add.f r11.y, r5.x, c18.y
+mul.f r11.z, r0.x, c5.x
+min.f r2.w, r5.y, c18.y
+(ss)mad.f32 r0.w, c3.w, r6.w, r8.w
+mad.f32 r5.y, r11.y, r11.y, r11.x
+mad.f32 r5.z, c14.z, r5.z, r9.w
+mad.f32 r8.w, c5.y, r0.y, r11.z
+mad.f32 r9.x, c2.z, r6.z, r9.x
+mad.f32 r9.y, c2.y, r6.z, r9.y
+mad.f32 r6.x, c1.x, r6.y, r6.x
+min.f r6.y, (neg)r9.z, c19.z
+rsq r5.y, (abs)r5.y
+(ss)mov.f32f32 r9.z, r5.y
+mov.f32f32 r9.w, r5.z
+rcp r11.x, r5.z
+(ss)mad.f32 r5.z, c5.z, r0.z, r8.w
+mul.f r5.y, r11.y, r5.y
+mul.f r8.w, r10.y, r9.z
+mul.f r9.z, r10.w, r9.z
+mad.f32 r4.w, r5.z, r4.w, r10.z
mul.f r0.x, r0.x, c6.x
-mov.f32f32 r9.x, r9.x
-mad.f32 r8.z, r6.w, r6.w, r8.z
-mad.f32 r9.x, c5.z, r0.z, r9.x
+mul.f r8.w, r10.x, r8.w
+rcp r10.x, r9.w
+mov.f32f32 r10.y, c19.y
+mad.f32 r5.z, r5.z, r9.z, r8.w
mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r3.x, c0.w
-mul.f r9.z, r3.x, c0.z
-mul.f r9.w, r3.x, c0.y
-mul.f r3.x, r3.x, c0.x
-rsq r8.z, (abs)r8.z
-(ss)mul.f r8.y, r8.y, r8.z
-rcp r10.x, r8.z
-(ss)mov.f32f32 r10.x, r10.x
-rcp r10.y, r8.z
-(ss)mov.f32f32 r10.y, r10.y
-rsq r10.z, (abs)r8.z
-(ss)mul.f r9.y, r9.y, r10.z
-add.f r10.z, r8.y, c18.x
-mul.f r10.w, r8.z, r10.x
-mul.f r8.y, r8.w, r8.y
-mul.f r10.y, r10.y, r10.x
-mul.f r11.x, r10.z, r10.z
-add.f r11.y, r9.y, c18.x
-mov.f32f32 r10.w, r10.w
-mad.f32 r8.y, r9.x, r9.y, r8.y
-mov.f32f32 r9.y, r10.y
-mad.f32 r10.y, r11.y, r11.y, r11.x
-mul.f r10.w, c14.x, r10.w
-mov.f32f32 r8.y, r8.y
-(ss)rsq r8.z, (abs)r8.z
-mad.f32 r10.x, c14.y, r10.x, r10.w
-mov.f32f32 r10.y, r10.y
-(ss)mul.f r6.w, r6.w, r8.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r8.z, r10.x
+mov.f32f32 r0.y, c19.y
mad.f32 r0.x, c6.z, r0.z, r0.x
-add.f r0.z, r6.w, c18.y
-mad.f32 r8.z, c14.z, r9.y, r8.z
-mad.f32 r0.y, c1.w, r3.y, r0.y
-mad.f32 r9.y, c1.z, r3.y, r9.z
-mad.f32 r9.z, r0.z, r0.z, r10.y
-mov.f32f32 r8.z, r8.z
-mad.f32 r6.w, r0.x, r6.w, r8.y
-mad.f32 r0.y, c2.w, r3.z, r0.y
-mad.f32 r8.y, c2.z, r3.z, r9.y
-mad.f32 r9.y, c1.y, r3.y, r9.w
-mad.f32 r3.x, c1.x, r3.y, r3.x
-rsq r3.y, (abs)r9.z
-(ss)mov.f32f32 r3.y, r3.y
-(ss)rcp r9.z, r8.z
-mov.f32f32 r9.w, c19.y
-mov.f32f32 r10.x, c19.y
-rcp r10.y, r8.z
-max.f r10.w, (neg)r6.w, c19.x
-mul.f r10.z, r10.z, r3.y
-(ss)mul.f r9.w, r9.w, r9.z
-mul.f r11.x, c15.y, r6.y
-mul.f r11.z, c15.x, r6.x
-mul.f r8.w, r8.w, r10.z
-mul.f r10.z, r11.y, r3.y
-mul.f r11.y, c15.z, r6.z
-mad.f32 r4.y, c7.y, r6.y, r4.y
-mad.f32 r4.x, c7.x, r6.x, r4.x
-mad.f32 r8.w, r9.x, r10.z, r8.w
-mad.f32 r4.z, c7.z, r6.z, r4.z
-mad.f32 r4.y, r9.w, r11.x, r4.y
-mad.f32 r4.x, r9.w, r11.z, r4.x
-mov.f32f32 r8.w, r8.w
-mul.f r0.z, r0.z, r3.y
-mad.f32 r3.y, r9.w, r11.y, r4.z
-mov.f32f32 r4.z, r10.w
-mul.f r9.x, r10.x, r9.z
-mad.f32 r0.x, r0.x, r0.z, r8.w
-mul.f r0.z, c15.z, r6.z
-mul.f r8.w, c15.y, r6.y
-mul.f r9.z, c15.x, r6.x
-max.f r9.w, (neg)r0.x, c19.x
-max.f r0.x, r0.x, c19.x
-mul.f r4.z, r4.z, r10.y
-mul.f r10.x, c16.y, r6.y
-mul.f r10.z, c16.x, r6.x
-mad.f32 r5.z, c7.z, r6.z, r5.z
-mad.f32 r5.y, c7.y, r6.y, r5.y
-log2 r9.w, r9.w
-(ss)mov.f32f32 r9.w, r9.w
-absneg.f r8.x, (neg)r8.x
-log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mul.f r10.w, c16.z, r6.z
-min.f r8.x, (neg)r8.x, c19.z
-mad.f32 r4.y, r4.z, r10.x, r4.y
-mad.f32 r4.x, r4.z, r10.z, r4.x
-mad.f32 r0.z, r9.x, r0.z, r5.z
-mul.f r5.z, r8.x, r9.w
-min.f r1.z, r1.z, c19.z
-mad.f32 r3.y, r4.z, r10.w, r3.y
-max.f r4.y, r4.y, c18.x
-mov.f32f32 r4.z, r5.z
-mul.f r0.x, r1.z, r0.x
-max.f r1.z, r3.y, c18.x
-min.f r4.y, r4.y, c18.y
-max.f r3.y, r4.x, c18.x
-max.f r5.z, r6.w, c19.x
-mad.f32 r8.x, r9.x, r8.w, r5.y
-exp2 r4.x, r4.z
-(ss)mov.f32f32 r5.y, r4.x
-cmps.f.lt r8.w, (neg)r6.w, c18.x
-mov.f32f32 r0.x, r0.x
-(ss)min.f r4.z, r1.z, c18.y
-min.f r4.x, r3.y, c18.y
-sel.b32 r1.z, r5.y, r8.w, c19.x
-mov.f32f32 r3.y, r5.z
-mad.f32 r5.x, c7.x, r6.x, r5.x
-rcp r5.y, r8.z
-cmps.f.lt r5.z, (neg)r6.w, c18.x
-mov.f32f32 r1.z, r1.z
-exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mul.f r3.y, r3.y, r10.y
-mul.f r6.z, c16.z, r6.z
-mov.f32f32 r1.z, r1.z
-sel.b32 r0.x, r0.x, r5.z, c19.x
-mul.f r6.y, c16.y, r6.y
-mad.f32 r6.w, r9.x, r9.z, r5.x
-mul.f r1.z, r1.z, r5.y
-mul.f r2.z, c17.z, r2.z
-mov.f32f32 r5.x, c18.x
-mul.f r2.y, c17.y, r2.y
-mul.f r2.x, c17.x, r2.x
-mad.f32 r2.z, r1.z, r2.z, r5.x
-mov.f32f32 r5.x, c18.x
+(ss)mul.f r0.z, r10.y, r10.x
+mul.f r8.w, c15.y, r7.y
+mul.f r9.z, c15.x, r7.x
+mad.f32 r5.y, r0.x, r5.y, r5.z
+mul.f r5.z, c15.z, r7.z
+mad.f32 r4.y, c7.y, r7.y, r4.y
+mad.f32 r4.x, c7.x, r7.x, r4.x
+max.f r10.y, (neg)r5.y, c19.x
+max.f r5.y, r5.y, c19.x
+mad.f32 r4.z, c7.z, r7.z, r4.z
+mad.f32 r4.y, r0.z, r8.w, r4.y
+mad.f32 r4.x, r0.z, r9.z, r4.x
+mul.f r0.y, r0.y, r10.x
+mul.f r8.w, c15.y, r7.y
+log2 r9.z, r10.y
+(ss)mul.f r6.y, r6.y, r9.z
+log2 r5.y, r5.y
+min.f r2.z, r2.z, c19.z
+mad.f32 r0.z, r0.z, r5.z, r4.z
+mad.f32 r0.x, r0.x, r5.x, r4.w
+mul.f r4.z, c15.z, r7.z
+mad.f32 r4.w, c7.y, r7.y, r8.y
+(ss)mul.f r2.z, r2.z, r5.y
+exp2 r5.x, r6.y
+(ss)cmps.f.lt r5.y, (neg)r0.x, c18.x
+max.f r5.z, (neg)r0.x, c19.x
+mad.f32 r6.y, c7.z, r7.z, r8.z
+mad.f32 r4.w, r0.y, r8.w, r4.w
+(ss)sel.b32 r5.x, r5.x, r5.y, c19.x
+rcp r8.y, r9.w
+mul.f r8.z, r5.z, r11.x
+mul.f r8.w, c16.y, r7.y
+mul.f r9.z, c16.x, r7.x
+(ss)mul.f r5.x, r5.x, r8.y
+mul.f r2.y, c17.z, r2.y
+mov.f32f32 r5.y, c18.x
+mul.f r2.x, c17.y, r2.x
+mul.f r1.w, c17.x, r1.w
+mad.f32 r2.y, r5.x, r2.y, r5.y
+mov.f32f32 r5.y, c18.x
mov.f32f32 r5.z, c18.x
-mov.f32f32 r0.x, r0.x
-max.f r2.z, r2.z, c18.x
-mad.f32 r2.y, r1.z, r2.y, r5.x
-mad.f32 r1.z, r1.z, r2.x, r5.z
-mov.f32f32 r0.x, r0.x
-min.f r5.z, r2.z, c18.y
-max.f r2.x, r2.y, c18.x
-max.f r1.z, r1.z, c18.x
-mul.f r0.x, r0.x, r5.y
-mul.f r1.x, c17.y, r1.x
+exp2 r2.z, r2.z
+mad.f32 r2.x, r5.x, r2.x, r5.y
+max.f r2.y, r2.y, c18.x
+mad.f32 r1.w, r5.x, r1.w, r5.z
+cmps.f.lt r5.x, (neg)r0.x, c18.x
+max.f r2.x, r2.x, c18.x
+min.f r5.z, r2.y, c18.y
+max.f r1.w, r1.w, c18.x
+(ss)sel.b32 r2.y, r2.z, r5.x, c19.x
min.f r5.y, r2.x, c18.y
-min.f r5.x, r1.z, c18.y
-mul.f r1.y, c17.z, r1.y
-mov.f32f32 r1.z, c18.x
-mov.f32f32 r2.x, c18.x
-mul.f r0.w, c17.x, r0.w
-mad.f32 r1.y, r0.x, r1.y, r1.z
-mad.f32 r1.x, r0.x, r1.x, r2.x
-mov.f32f32 r1.z, c18.x
-mad.f32 r0.z, r3.y, r6.z, r0.z
+mul.f r2.x, c16.z, r7.z
+min.f r5.x, r1.w, c18.y
+mul.f r1.w, r2.y, r8.y
+mul.f r1.z, c17.z, r1.z
+mov.f32f32 r2.y, c18.x
+mul.f r1.y, c17.y, r1.y
+mul.f r1.x, c17.x, r1.x
+mad.f32 r1.z, r1.w, r1.z, r2.y
+mov.f32f32 r2.y, c18.x
+(ss)mov.f32f32 r2.z, c18.x
+mad.f32 r1.y, r1.w, r1.y, r2.y
+max.f r1.z, r1.z, c18.x
+mad.f32 r1.x, r1.w, r1.x, r2.z
+mad.f32 r0.z, r8.z, r2.x, r0.z
+mad.f32 r1.w, r8.z, r8.w, r4.y
+min.f r2.z, r1.z, c18.y
max.f r1.y, r1.y, c18.x
max.f r1.x, r1.x, c18.x
-mad.f32 r0.x, r0.x, r0.w, r1.z
+(rpt1)nop
+min.f r2.y, r1.y, c18.y
+min.f r2.x, r1.x, c18.y
max.f r0.z, r0.z, c18.x
-min.f r2.z, r1.y, c18.y
-min.f r2.y, r1.x, c18.y
-max.f r0.x, r0.x, c18.x
-min.f r1.z, r0.z, c18.y
-mad.f32 r0.z, r3.y, r6.y, r8.x
-mul.f r1.x, c16.x, r6.x
-min.f r2.x, r0.x, c18.y
-mad.f32 r0.w, c3.w, r3.w, r0.y
-max.f r0.x, r0.z, c18.x
-mad.f32 r0.y, r3.y, r1.x, r6.w
-mad.f32 r0.z, c3.z, r3.w, r8.y
-mad.f32 r1.x, c2.y, r3.z, r9.y
-min.f r1.y, r0.x, c18.y
-max.f r0.x, r0.y, c18.x
-mad.f32 r0.y, c3.y, r3.w, r1.x
-mad.f32 r3.x, c2.x, r3.z, r3.x
+max.f r1.x, r1.w, c18.x
+mad.f32 r1.y, r8.z, r9.z, r4.x
+mad.f32 r1.z, r0.y, r4.z, r6.y
+min.f r4.z, r0.z, c18.y
+min.f r4.y, r1.x, c18.y
+max.f r0.z, r1.y, c18.x
+max.f r0.x, r0.x, c19.x
+mul.f r1.x, c15.x, r7.x
+mad.f32 r1.y, c7.x, r7.x, r8.x
+min.f r4.x, r0.z, c18.y
+mul.f r0.x, r0.x, r11.x
+mul.f r0.z, c16.z, r7.z
+mul.f r1.w, c16.y, r7.y
+mad.f32 r1.x, r0.y, r1.x, r1.y
+mul.f r1.y, c16.x, r7.x
+mad.f32 r1.z, r0.x, r0.z, r1.z
+mad.f32 r1.w, r0.x, r1.w, r4.w
+mad.f32 r0.z, c3.z, r6.w, r9.x
+mad.f32 r0.y, c3.y, r6.w, r9.y
+max.f r1.z, r1.z, c18.x
+max.f r1.w, r1.w, c18.x
+mad.f32 r0.x, r0.x, r1.y, r1.x
+mad.f32 r1.x, c2.x, r6.z, r6.x
+min.f r1.z, r1.z, c18.y
+min.f r1.y, r1.w, c18.y
+max.f r1.w, r0.x, c18.x
+mad.f32 r0.x, c3.x, r6.w, r1.x
+max.f r4.w, r7.w, c18.x
+max.f r6.x, r7.w, c18.x
+min.f r1.x, r1.w, c18.y
nop
-min.f r1.x, r0.x, c18.y
-mad.f32 r0.x, c3.x, r3.w, r3.x
-mov.f32f32 r3.w, r7.w
-mov.f32f32 r3.z, r7.z
-mov.f32f32 r3.y, r7.y
-mov.f32f32 r3.x, r7.x
+min.f r4.w, r4.w, c18.y
+min.f r1.w, r6.x, c18.y
end
-nop
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (5:0) r4.x (2:0) r5.x (2:1)
-; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r6.x (0:0,cm=f,il=16,b=0) r7.x (0:0,cm=f,il=20,b=0) r0.w (0:0,cm=7,il=24,b=0) r2.x (0:0,cm=7,il=28,b=0) r5.x (0:0,cm=7,il=32,b=0) r4.x (0:0,cm=7,il=36,b=0) r1.z (0:0,cm=1,il=40,b=0) r8.x (0:0,cm=1,il=44,b=0)
-; VERT: 221 instructions, 0 half, 12 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r7.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) r1.x (0:0,cm=7,il=24,b=0) r1.w (0:0,cm=7,il=28,b=0) r8.x (0:0,cm=7,il=32,b=0) r4.x (0:0,cm=7,il=36,b=0) r2.z (0:0,cm=1,il=40,b=0) r0.w (0:0,cm=1,il=44,b=0)
+; VERT: 189 instructions, 0 half, 12 full
diff --git a/reference/maniadrive/maniadrive-04.asm b/reference/maniadrive/maniadrive-04.asm
index 694d656..59b8c44 100644
--- a/reference/maniadrive/maniadrive-04.asm
+++ b/reference/maniadrive/maniadrive-04.asm
@@ -6,31 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 3, r0.x
-bary.f (ei)r0.x, 1, r0.x
-nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+bary.f r0.w, 1, r0.x
+bary.f (ei)r1.x, 3, r0.x
(rpt5)nop
-sam.p (f32)(xyzw)r0.x, r0.w, s#0, t#0
-(sy)(ss)mul.f r0.w, r0.w, c4.w
-mul.f r0.z, r0.z, c4.z
-mul.f r0.y, r0.y, c4.y
-mul.f r0.x, r0.x, c4.x
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+sam.p (f32)(xyzw)r0.x, r0.z, s#0, t#0
+(sy)mul.f r1.w, r0.w, c4.w
+mul.f r1.z, r0.z, c4.z
+mul.f r1.y, r0.y, c4.y
+(ss)mul.f r1.x, r0.x, c4.x
end
nop
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1)
-; FRAG: 27 instructions, 0 half, 2 full
+; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1)
+; FRAG: 15 instructions, 0 half, 2 full
diff --git a/reference/maniadrive/maniadrive-06.asm b/reference/maniadrive/maniadrive-06.asm
index 2e8a6fe..0251eb7 100644
--- a/reference/maniadrive/maniadrive-06.asm
+++ b/reference/maniadrive/maniadrive-06.asm
@@ -6,35 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 3, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, c4.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r0.x, c4.y
-mov.f32f32 r0.y, c4.x
-(rpt3)nop
-sam.p (f32)(w)r0.z, r1.w, s#0, t#0
-(sy)mul.f r0.z, r1.y, c4.w
-mov.f32f32 r1.y, r0.x
-mov.f32f32 r1.x, r0.y
-nop
-mov.f32f32 r0.x, r0.z
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+(sy)(ss)bary.f r1.w, 0, r0.x
+bary.f r2.x, 1, r0.x
+bary.f (ei)r2.y, 3, r0.x
+mov.f32f32 r1.z, c4.z
+mov.f32f32 r1.y, c4.y
+mov.f32f32 r1.x, c4.x
(rpt2)nop
-(ss)mov.f32f32 r1.w, r0.x
+sam.p (f32)(w)r0.x, r1.w, s#0, t#0
+(sy)(ss)mul.f r1.w, r0.w, c4.w
end
nop
nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1)
-; FRAG: 32 instructions, 0 half, 3 full
+; FRAG: inputs: r1.z (5:0,cm=f,il=8,b=1)
+; FRAG: 12 instructions, 0 half, 3 full
diff --git a/reference/maniadrive/maniadrive-07.asm b/reference/maniadrive/maniadrive-07.asm
index 0a1e555..e61b89b 100644
--- a/reference/maniadrive/maniadrive-07.asm
+++ b/reference/maniadrive/maniadrive-07.asm
@@ -6,39 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
-bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-mov.f32f32 r1.z, r1.y
-bary.f r0.z, 1, r0.x
-bary.f r0.w, 0, r0.x
-bary.f (ei)r0.x, 3, r0.x
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+(sy)(ss)bary.f r1.w, 4, r0.x
+bary.f r2.x, 5, r0.x
+bary.f r2.y, 7, r0.x
+bary.f r0.z, 3, r0.x
+bary.f r1.z, 2, r0.x
+bary.f r1.y, 1, r0.x
+bary.f (ei)r1.x, 0, r0.x
(rpt1)nop
sam.p (f32)(w)r1.w, r1.w, s#0, t#0
-mov.f32f32 r0.y, r0.z
-(sy)mul.f r0.x, r2.z, r0.x
-mov.f32f32 r0.z, r0.w
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
-nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-(ss)mov.f32f32 r1.w, r0.x
+(sy)(ss)mul.f r1.w, r2.z, r0.z
end
nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 33 instructions, 0 half, 3 full
+; FRAG: inputs: r63.w (1:0,cm=f,il=8,b=1) r1.z (5:0,cm=f,il=12,b=1)
+; FRAG: 12 instructions, 0 half, 3 full
diff --git a/reference/maniadrive/maniadrive-08.asm b/reference/maniadrive/maniadrive-08.asm
index cca09e5..0583b5d 100644
--- a/reference/maniadrive/maniadrive-08.asm
+++ b/reference/maniadrive/maniadrive-08.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r3.x) in4
+@in(r3.y) in5
+@in(r3.z) in6
+@in(r3.w) in7
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,43 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r3.w, c4.x
+max.f r1.y, r3.z, c4.x
+max.f r3.y, r3.y, c4.x
+max.f r3.x, r3.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r3.y, c4.y
+min.f r1.x, r3.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 33 instructions, 0 half, 4 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 25 instructions, 0 half, 4 full
diff --git a/reference/maniadrive/maniadrive-09.asm b/reference/maniadrive/maniadrive-09.asm
index 6a4268b..cc94e0f 100644
--- a/reference/maniadrive/maniadrive-09.asm
+++ b/reference/maniadrive/maniadrive-09.asm
@@ -6,39 +6,32 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 8, r0.x
-bary.f r0.w, 11, r0.x
-bary.f r1.x, 9, r0.x
+bary.f r0.w, 9, r0.x
+bary.f r1.x, 11, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 6, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 2, r0.x
-bary.f r0.w, 5, r0.x
-bary.f r1.x, 1, r0.x
+bary.f r1.w, 2, r0.x
+bary.f r2.x, 5, r0.x
+bary.f r2.y, 1, r0.x
bary.f r2.z, 4, r0.x
+sam.p (f32)(xyzw)r2.w, r0.z, s#0, t#0
+(sy)(ss)mul.f r0.z, r3.y, r1.w
bary.f (ei)r0.x, 0, r0.x
-nop
-sam.p (f32)(xyzw)r2.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r3.z, r1.y
-mul.f r0.z, r3.y, r0.z
-mul.f r1.x, r3.x, r1.x
+mul.f r1.w, r3.z, r1.y
+mul.f r0.y, r3.x, r2.y
+add.f r1.z, r0.z, r1.z
mul.f r0.x, r2.w, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.z, r1.z
-add.f r0.w, r1.x, r0.w
-add.f r0.x, r0.x, r2.z
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+nop
+add.f r1.y, r0.y, r2.x
+nop
+add.f r1.x, r0.x, r2.z
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) r0.x (5:0,cm=f,il=16,b=1)
-; FRAG: 31 instructions, 0 half, 4 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1) r0.y (5:0,cm=f,il=16,b=1)
+; FRAG: 21 instructions, 0 half, 4 full
diff --git a/reference/maniadrive/maniadrive-10.asm b/reference/maniadrive/maniadrive-10.asm
index cdcbf04..0babf82 100644
--- a/reference/maniadrive/maniadrive-10.asm
+++ b/reference/maniadrive/maniadrive-10.asm
@@ -4,10 +4,10 @@
@in(r6.y) in1
@in(r6.z) in2
@in(r6.w) in3
-@in(r7.x) in4
-@in(r7.y) in5
-@in(r7.z) in6
-@in(r7.w) in7
+@in(r3.x) in4
+@in(r3.y) in5
+@in(r3.z) in6
+@in(r3.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -32,219 +32,193 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
+@const(c25.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r6.x, c14.x
-mov.f32f32 r0.y, c25.y
+mul.f r0.y, r6.x, c14.y
mad.f32 r0.x, c15.x, r6.y, r0.x
-mov.f32f32 r0.z, c12.w
+mad.f32 r0.y, c15.y, r6.y, r0.y
mad.f32 r0.x, c16.x, r6.z, r0.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, c16.y, r6.z, r0.y
mad.f32 r0.x, c17.x, r6.w, r0.x
-mov.f32f32 r0.z, r0.z
+mad.f32 r0.y, c17.y, r6.w, r0.y
+mul.f r0.z, r6.x, c14.z
mov.f32f32 r0.w, c25.y
-mov.f32f32 r1.x, c9.w
add.f r0.x, c13.x, (neg)r0.x
-max.f r0.y, r0.y, c25.x
-max.f r0.z, r0.z, c25.x
-mov.f32f32 r0.w, r0.w
-mul.f r1.y, r0.x, r0.x
-mul.f r1.z, r6.x, c14.y
-min.f r5.w, r0.y, c25.y
-mad.f32 r0.y, c15.y, r6.y, r1.z
-min.f r4.w, r0.z, c25.y
-mad.f32 r0.y, c16.y, r6.z, r0.y
-max.f r0.z, r0.w, c25.x
-mad.f32 r0.y, c17.y, r6.w, r0.y
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r1.x, c4.x
-mov.f32f32 r1.z, c4.x
add.f r0.y, c13.y, (neg)r0.y
-min.f r2.w, r0.z, c25.y
-max.f r0.z, r0.w, c25.x
-mul.f r0.w, r1.x, c5.x
-mad.f32 r1.x, r0.y, r0.y, r1.y
-mov.f32f32 r1.y, c5.y
-mul.f r1.z, r1.z, c6.x
-mov.f32f32 r2.x, c4.x
-mov.f32f32 r1.x, r1.x
-mul.f r2.y, r6.x, c14.z
-min.f r1.w, r0.z, c25.y
-mad.f32 r0.z, c15.z, r6.y, r2.y
-mad.f32 r0.w, c4.y, r1.y, r0.w
+mad.f32 r0.z, c15.z, r6.y, r0.z
+max.f r0.w, r0.w, c25.x
+mul.f r1.x, r0.x, r0.x
mad.f32 r0.z, c16.z, r6.z, r0.z
-mov.f32f32 r1.y, c6.y
+mad.f32 r1.x, r0.y, r0.y, r1.x
mad.f32 r0.z, c17.z, r6.w, r0.z
-mov.f32f32 r0.w, r0.w
-mul.f r2.x, r2.x, c7.x
-mad.f32 r1.y, c4.y, r1.y, r1.z
+min.f r5.w, r0.w, c25.y
+mov.f32f32 r0.w, c12.w
+mov.f32f32 r1.y, c25.y
add.f r0.z, c13.z, (neg)r0.z
-mov.f32f32 r1.z, c5.z
-mov.f32f32 r2.y, c7.y
-mov.f32f32 r1.y, r1.y
+mov.f32f32 r1.z, c9.w
+mov.f32f32 r1.w, c4.x
+mov.f32f32 r2.x, c4.x
mad.f32 r1.x, r0.z, r0.z, r1.x
-mad.f32 r0.w, c4.z, r1.z, r0.w
-mov.f32f32 r1.z, c6.z
-mad.f32 r2.x, c4.y, r2.y, r2.x
-mul.f r2.y, r6.x, c0.w
-mul.f r2.z, r6.x, c0.z
-mul.f r3.x, r6.x, c0.y
+max.f r0.w, r0.w, c25.x
+max.f r1.y, r1.y, c25.x
+max.f r1.z, r1.z, c25.x
+mul.f r1.w, r1.w, c5.x
+mul.f r2.x, r2.x, c6.x
+mov.f32f32 r2.y, c4.x
rsq r1.x, (abs)r1.x
(ss)mul.f r0.x, r0.x, r1.x
-rcp r3.y, r1.x
-(ss)mov.f32f32 r3.y, r3.y
-rcp r3.z, r1.x
-(ss)mov.f32f32 r3.z, r3.z
-rsq r3.w, (abs)r1.x
-(ss)mul.f r0.y, r0.y, r3.w
-add.f r3.w, r0.x, c25.x
-mul.f r4.x, r1.x, r3.y
-mul.f r0.x, r0.w, r0.x
-mad.f32 r1.y, c4.z, r1.z, r1.y
-mul.f r1.z, r3.w, r3.w
-add.f r4.y, r0.y, c25.x
-mov.f32f32 r4.x, r4.x
-mad.f32 r0.x, r1.y, r0.y, r0.x
-mul.f r0.y, r3.z, r3.y
-mad.f32 r1.z, r4.y, r4.y, r1.z
-mul.f r3.z, c18.x, r4.x
-mov.f32f32 r0.x, r0.x
-(ss)rsq r1.x, (abs)r1.x
-mad.f32 r3.y, c18.y, r3.y, r3.z
-mov.f32f32 r1.z, r1.z
-(ss)mul.f r0.z, r0.z, r1.x
-(ss)mov.f32f32 r1.x, r2.x
-mov.f32f32 r2.x, c7.z
-mov.f32f32 r3.y, r3.y
-add.f r3.z, r0.z, c25.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, c4.z, r2.x, r1.x
-mad.f32 r2.x, c1.w, r6.y, r2.y
-mad.f32 r1.z, r3.z, r3.z, r1.z
-mad.f32 r0.y, c18.z, r0.y, r3.y
-mad.f32 r0.x, r1.x, r0.z, r0.x
-mad.f32 r0.z, c2.w, r6.z, r2.x
-mad.f32 r3.y, c1.z, r6.y, r2.z
-mad.f32 r3.x, c1.y, r6.y, r3.x
-mul.f r6.x, r6.x, c0.x
-rsq r1.z, (abs)r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-max.f r2.x, (neg)r0.x, c26.x
-max.f r2.y, r0.x, c26.x
-mul.f r2.z, r3.w, r1.z
-mul.f r3.w, r4.y, r1.z
-mul.f r1.z, r3.z, r1.z
-mov.f32f32 r2.x, r2.x
-mul.f r0.w, r0.w, r2.z
-rcp r2.z, r0.y
-mov.f32f32 r3.z, c26.y
-mad.f32 r0.w, r1.y, r3.w, r0.w
-mov.f32f32 r1.y, c26.y
-rcp r3.w, r0.y
-mov.f32f32 r2.y, r2.y
-(ss)mul.f r3.z, r3.z, r2.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.x, c12.z
-mad.f32 r0.w, r1.x, r1.z, r0.w
-mad.f32 r1.x, c22.z, r3.z, r4.x
-mov.f32f32 r1.z, c12.y
-mov.f32f32 r4.x, c12.x
-max.f r4.y, (neg)r0.w, c26.x
+rcp r2.z, r1.x
+(ss)mov.f32f32 r2.w, r2.z
+mov.f32f32 r4.x, c5.y
+rsq r4.y, (abs)r1.x
+(ss)mul.f r0.y, r0.y, r4.y
+add.f r4.y, r0.x, c25.x
+mul.f r4.z, r1.x, r2.w
+mad.f32 r1.w, c4.y, r4.x, r1.w
+mov.f32f32 r4.x, c5.z
+mul.f r4.w, r4.y, r4.y
+add.f r5.x, r0.y, c25.x
+mul.f r4.z, c18.x, r4.z
+mad.f32 r4.x, c4.z, r4.x, r1.w
+mad.f32 r1.w, c18.y, r2.w, r4.z
+mad.f32 r4.z, r5.x, r5.x, r4.w
+rsq r2.w, (abs)r1.x
+(ss)mul.f r0.z, r0.z, r2.w
+(ss)rcp r1.x, r1.x
+(ss)mul.f r1.x, r1.x, r2.z
+mul.f r0.x, r4.x, r0.x
+mov.f32f32 r2.z, c6.y
+add.f r5.y, r0.z, c25.y
+mad.f32 r1.x, c18.z, r1.x, r1.w
+min.f r4.w, r0.w, c25.y
+min.f r2.w, r1.y, c25.y
+mad.f32 r0.w, r5.y, r5.y, r4.z
+mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, c4.y, r2.z, r2.x
+mov.f32f32 r2.z, c6.z
+min.f r1.w, r1.z, c25.y
+mul.f r1.z, r2.y, c7.x
+mov.f32f32 r2.y, c7.y
+rsq r0.w, (abs)r0.w
+(ss)mov.f32f32 r4.z, r0.w
+rcp r5.z, r1.y
+mov.f32f32 r7.x, c26.y
+mov.f32f32 r7.y, c26.y
+(ss)rcp r1.y, r1.y
+nop
+rcp r1.x, r1.x
+mul.f r4.y, r4.y, r4.z
+(ss)mul.f r7.x, r7.x, r5.z
+mov.f32f32 r7.z, c12.y
+mov.f32f32 r7.w, c12.x
+mul.f r4.x, r4.x, r4.y
+mul.f r4.y, r5.x, r4.z
+mad.f32 r2.x, c4.z, r2.z, r2.x
+mov.f32f32 r2.z, c12.z
+mad.f32 r4.z, c22.y, r7.x, r7.z
+mad.f32 r5.x, c22.x, r7.x, r7.w
+mad.f32 r4.x, r2.x, r4.y, r4.x
+mul.f r0.w, r5.y, r0.w
+mad.f32 r1.z, c4.y, r2.y, r1.z
+mov.f32f32 r2.y, c7.z
+mad.f32 r2.z, c22.z, r7.x, r2.z
+mad.f32 r0.x, r2.x, r0.y, r0.x
+mul.f r0.y, r7.y, r5.z
+mad.f32 r1.z, c4.z, r2.y, r1.z
+mov.f32f32 r2.x, c9.z
+mov.f32f32 r2.y, c9.y
+mov.f32f32 r4.y, c9.x
+mad.f32 r0.w, r1.z, r0.w, r4.x
+mad.f32 r0.x, r1.z, r0.z, r0.x
+mad.f32 r0.z, c19.z, r0.y, r2.x
+mad.f32 r1.z, c19.y, r0.y, r2.y
+max.f r2.x, (neg)r0.w, c26.x
max.f r0.w, r0.w, c26.x
-mul.f r2.x, r2.x, r3.w
-mad.f32 r1.z, c22.y, r3.z, r1.z
-mad.f32 r3.z, c22.x, r3.z, r4.x
-mul.f r1.y, r1.y, r2.z
-mov.f32f32 r2.z, c9.y
-log2 r4.x, r4.y
-(ss)mov.f32f32 r4.x, r4.x
-(ss)absneg.f r4.y, (neg)c11.x
-log2 r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.z, c8.x
-mad.f32 r1.x, c23.z, r2.x, r1.x
-min.f r4.y, (neg)r4.y, c26.z
-mad.f32 r1.z, c23.y, r2.x, r1.z
-mad.f32 r2.x, c23.x, r2.x, r3.z
-mov.f32f32 r3.z, c9.z
-mul.f r4.x, r4.y, r4.x
-min.f r4.y, r4.z, c26.z
-max.f r1.x, r1.x, c25.x
-max.f r1.z, r1.z, c25.x
-mov.f32f32 r4.x, r4.x
-mul.f r0.w, r4.y, r0.w
-min.f r4.z, r1.x, c25.y
-min.f r4.y, r1.z, c25.y
-max.f r1.x, r2.x, c25.x
-mad.f32 r1.z, c19.z, r1.y, r3.z
-mad.f32 r2.x, c19.y, r1.y, r2.z
-exp2 r2.z, r4.x
-(ss)mov.f32f32 r2.z, r2.z
-cmps.f.lt r3.z, (neg)r0.x, c25.x
-mov.f32f32 r0.w, r0.w
-(ss)min.f r4.x, r1.x, c25.y
-mul.f r1.x, r2.y, r3.w
-sel.b32 r2.y, r2.z, r3.z, c26.x
-mov.f32f32 r2.z, c9.x
-rcp r0.y, r0.y
+max.f r2.y, (neg)r0.x, c26.x
+max.f r4.x, r0.x, c26.x
+mad.f32 r0.y, c19.x, r0.y, r4.y
+cmps.f.lt r4.y, (neg)r0.x, c25.x
cmps.f.lt r0.x, (neg)r0.x, c25.x
-mad.f32 r1.z, c20.z, r1.x, r1.z
-mov.f32f32 r2.y, r2.y
+log2 r2.x, r2.x
+absneg.f r5.y, (neg)c11.x
+log2 r0.w, r0.w
+mov.f32f32 r5.z, c8.x
+mul.f r2.y, r2.y, r1.x
+(ss)mul.f r1.x, r4.x, r1.x
+min.f r4.x, (neg)r5.y, c26.z
+min.f r5.y, r5.z, c26.z
+mad.f32 r2.z, c23.z, r2.y, r2.z
+mad.f32 r4.z, c23.y, r2.y, r4.z
+(ss)mul.f r2.x, r4.x, r2.x
+mul.f r0.w, r5.y, r0.w
+max.f r2.z, r2.z, c25.x
+max.f r4.x, r4.z, c25.x
+mad.f32 r2.y, c23.x, r2.y, r5.x
+mad.f32 r0.z, c20.z, r1.x, r0.z
+mad.f32 r7.x, c20.y, r1.x, r1.z
+exp2 r1.z, r2.x
+(ss)sel.b32 r1.z, r1.z, r4.y, c26.x
exp2 r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c20.y, r1.x, r2.x
-max.f r1.z, r1.z, c25.x
-mov.f32f32 r2.y, r2.y
-sel.b32 r0.x, r0.w, r0.x, c26.x
-max.f r0.w, r2.x, c25.x
-mad.f32 r3.z, c19.x, r1.y, r2.z
-mul.f r1.y, r2.y, r0.y
+(ss)sel.b32 r0.x, r0.w, r0.x, c26.x
+min.f r4.z, r2.z, c25.y
+min.f r4.y, r4.x, c25.y
+(ss)mul.f r0.w, r1.z, r1.y
+mov.f32f32 r1.z, c25.x
mov.f32f32 r2.x, c25.x
-mov.f32f32 r2.y, c25.x
mov.f32f32 r2.z, c25.x
-mad.f32 r2.x, c24.z, r1.y, r2.x
-mad.f32 r2.y, c24.y, r1.y, r2.y
-mad.f32 r1.y, c24.x, r1.y, r2.z
-mov.f32f32 r0.x, r0.x
-max.f r2.x, r2.x, c25.x
-max.f r2.y, r2.y, c25.x
-max.f r1.y, r1.y, c25.x
-mov.f32f32 r0.x, r0.x
-min.f r5.z, r2.x, c25.y
-min.f r5.y, r2.y, c25.y
-min.f r5.x, r1.y, c25.y
-mul.f r0.x, r0.x, r0.y
-mov.f32f32 r0.y, c25.x
-mov.f32f32 r1.y, c25.x
+mad.f32 r1.z, c24.z, r0.w, r1.z
+mad.f32 r2.x, c24.y, r0.w, r2.x
+mad.f32 r0.w, c24.x, r0.w, r2.z
+mul.f r0.x, r0.x, r1.y
+max.f r1.y, r1.z, c25.x
+max.f r1.z, r2.x, c25.x
+max.f r0.w, r0.w, c25.x
mov.f32f32 r2.x, c25.x
-mad.f32 r0.y, c21.z, r0.x, r0.y
+min.f r5.z, r1.y, c25.y
+min.f r5.y, r1.z, c25.y
+min.f r5.x, r0.w, c25.y
+mad.f32 r0.w, c21.z, r0.x, r2.x
+mov.f32f32 r1.y, c25.x
+mov.f32f32 r1.z, c25.x
mad.f32 r1.y, c21.y, r0.x, r1.y
-mad.f32 r0.x, c21.x, r0.x, r2.x
-min.f r1.z, r1.z, c25.y
-max.f r0.y, r0.y, c25.x
-max.f r1.y, r1.y, c25.x
+max.f r0.w, r0.w, c25.x
+mad.f32 r0.x, c21.x, r0.x, r1.z
+max.f r1.z, r2.y, c25.x
+max.f r0.z, r0.z, c25.x
+min.f r2.z, r0.w, c25.y
+max.f r0.w, r1.y, c25.x
max.f r0.x, r0.x, c25.x
-nop
-min.f r2.z, r0.y, c25.y
-min.f r2.y, r1.y, c25.y
+(rpt1)nop
+min.f r2.y, r0.w, c25.y
min.f r2.x, r0.x, c25.y
-min.f r1.y, r0.w, c25.y
-mad.f32 r0.x, c20.x, r1.x, r3.z
-mad.f32 r0.w, c3.w, r6.w, r0.z
-mad.f32 r0.y, c2.z, r6.z, r3.y
-mad.f32 r1.x, c2.y, r6.z, r3.x
-max.f r0.x, r0.x, c25.x
-mad.f32 r0.z, c3.z, r6.w, r0.y
-mad.f32 r0.y, c3.y, r6.w, r1.x
-mad.f32 r3.x, c1.x, r6.y, r6.x
+min.f r4.x, r1.z, c25.y
+min.f r1.z, r0.z, c25.y
+max.f r0.x, r7.x, c25.x
+mad.f32 r0.y, c20.x, r1.x, r0.y
+mul.f r0.z, r6.x, c0.w
+mul.f r0.w, r6.x, c0.z
+min.f r1.y, r0.x, c25.y
+max.f r0.x, r0.y, c25.x
+mad.f32 r0.y, c1.w, r6.y, r0.z
+mad.f32 r0.z, c1.z, r6.y, r0.w
+mul.f r7.x, r6.x, c0.y
min.f r1.x, r0.x, c25.y
-mad.f32 r0.x, c2.x, r6.z, r3.x
-mov.f32f32 r3.w, r7.w
+mad.f32 r0.x, c2.w, r6.z, r0.y
+mad.f32 r0.y, c2.z, r6.z, r0.z
+mad.f32 r0.w, c3.w, r6.w, r0.x
+mad.f32 r0.z, c3.z, r6.w, r0.y
+mad.f32 r0.x, c1.y, r6.y, r7.x
+mul.f r0.y, r6.x, c0.x
+mad.f32 r0.x, c2.y, r6.z, r0.x
+mad.f32 r6.x, c1.x, r6.y, r0.y
+mad.f32 r0.y, c3.y, r6.w, r0.x
+mad.f32 r0.x, c2.x, r6.z, r6.x
+nop
mad.f32 r0.x, c3.x, r6.w, r0.x
-mov.f32f32 r3.z, r7.z
-mov.f32f32 r3.y, r7.y
-mov.f32f32 r3.x, r7.x
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (5:0) r4.x (2:0) r5.x (2:1)
-; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r7.x (0:0,cm=f,il=12,b=0)
-; VERT: 212 instructions, 0 half, 8 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0)
+; VERT: 184 instructions, 0 half, 8 full
diff --git a/reference/maniadrive/maniadrive-13.asm b/reference/maniadrive/maniadrive-13.asm
index 0c29dd4..4fe07cd 100644
--- a/reference/maniadrive/maniadrive-13.asm
+++ b/reference/maniadrive/maniadrive-13.asm
@@ -6,35 +6,32 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.z, c9.x
-mov.f32f32 r0.w, c9.w
-mov.f32f32 r1.x, c9.y
-bary.f r1.y, 3, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r2.x, r0.w
-mov.f32f32 r1.w, r1.x
-bary.f r0.z, 6, r0.x
-bary.f r0.w, 2, r0.x
-bary.f r1.x, 5, r0.x
+mov.f32f32 r0.w, c9.y
+mov.f32f32 r1.x, c9.w
+bary.f r1.y, 6, r0.x
+bary.f r1.z, 2, r0.x
+bary.f r1.w, 3, r0.x
+bary.f r2.x, 5, r0.x
bary.f r2.y, 1, r0.x
bary.f r2.z, 4, r0.x
+sam.p (f32)(xyzw)r2.w, r0.z, s#0, t#0
+(sy)(ss)mul.f r0.z, r3.y, r1.z
bary.f (ei)r0.x, 0, r0.x
-sam.p (f32)(xyzw)r2.w, r1.z, s#0, t#0
-(sy)mul.f r0.y, r3.z, r1.y
-mul.f r0.w, r3.y, r0.w
-mul.f r1.y, r3.x, r2.y
+mul.f r1.w, r3.z, r1.w
+mul.f r0.y, r3.x, r2.y
+add.f r1.z, r0.z, r1.y
mul.f r0.x, r2.w, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.z, r0.w, r0.z
-add.f r0.w, r1.y, r1.x
-add.f r0.x, r0.x, r2.z
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+nop
+add.f r1.y, r0.y, r2.x
+nop
+add.f r1.x, r0.x, r2.z
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1)
-; FRAG: 27 instructions, 0 half, 4 full
+; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (1:1,cm=f,il=12,b=1)
+; FRAG: 21 instructions, 0 half, 4 full
diff --git a/reference/maniadrive/maniadrive-14.asm b/reference/maniadrive/maniadrive-14.asm
index 888c361..cd3f2f7 100644
--- a/reference/maniadrive/maniadrive-14.asm
+++ b/reference/maniadrive/maniadrive-14.asm
@@ -24,219 +24,193 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c25.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r5.x, c14.x
-mov.f32f32 r0.y, c25.y
+mul.f r0.y, r5.x, c14.y
mad.f32 r0.x, c15.x, r5.y, r0.x
-mov.f32f32 r0.z, c12.w
+mad.f32 r0.y, c15.y, r5.y, r0.y
mad.f32 r0.x, c16.x, r5.z, r0.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, c16.y, r5.z, r0.y
mad.f32 r0.x, c17.x, r5.w, r0.x
-mov.f32f32 r0.z, r0.z
+mad.f32 r0.y, c17.y, r5.w, r0.y
+mul.f r0.z, r5.x, c14.z
mov.f32f32 r0.w, c25.y
-mov.f32f32 r1.x, c9.w
add.f r0.x, c13.x, (neg)r0.x
-max.f r0.y, r0.y, c25.x
-max.f r0.z, r0.z, c25.x
-mov.f32f32 r0.w, r0.w
-mul.f r1.y, r0.x, r0.x
-mul.f r1.z, r5.x, c14.y
-min.f r4.w, r0.y, c25.y
-mad.f32 r0.y, c15.y, r5.y, r1.z
-min.f r3.w, r0.z, c25.y
-mad.f32 r0.y, c16.y, r5.z, r0.y
-max.f r0.z, r0.w, c25.x
-mad.f32 r0.y, c17.y, r5.w, r0.y
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r1.x, c4.x
-mov.f32f32 r1.z, c4.x
add.f r0.y, c13.y, (neg)r0.y
-min.f r2.w, r0.z, c25.y
-max.f r0.z, r0.w, c25.x
-mul.f r0.w, r1.x, c5.x
-mad.f32 r1.x, r0.y, r0.y, r1.y
-mov.f32f32 r1.y, c5.y
-mul.f r1.z, r1.z, c6.x
-mov.f32f32 r2.x, c4.x
-mov.f32f32 r1.x, r1.x
-mul.f r2.y, r5.x, c14.z
-min.f r1.w, r0.z, c25.y
-mad.f32 r0.z, c15.z, r5.y, r2.y
-mad.f32 r0.w, c4.y, r1.y, r0.w
+mad.f32 r0.z, c15.z, r5.y, r0.z
+max.f r0.w, r0.w, c25.x
+mul.f r1.x, r0.x, r0.x
mad.f32 r0.z, c16.z, r5.z, r0.z
-mov.f32f32 r1.y, c6.y
+mad.f32 r1.x, r0.y, r0.y, r1.x
mad.f32 r0.z, c17.z, r5.w, r0.z
-mov.f32f32 r0.w, r0.w
-mul.f r2.x, r2.x, c7.x
-mad.f32 r1.y, c4.y, r1.y, r1.z
+min.f r4.w, r0.w, c25.y
+mov.f32f32 r0.w, c12.w
+mov.f32f32 r1.y, c25.y
add.f r0.z, c13.z, (neg)r0.z
-mov.f32f32 r1.z, c5.z
-mov.f32f32 r2.y, c7.y
-mov.f32f32 r1.y, r1.y
+mov.f32f32 r1.z, c9.w
+mov.f32f32 r1.w, c4.x
+mov.f32f32 r2.x, c4.x
mad.f32 r1.x, r0.z, r0.z, r1.x
-mad.f32 r0.w, c4.z, r1.z, r0.w
-mov.f32f32 r1.z, c6.z
-mad.f32 r2.x, c4.y, r2.y, r2.x
-mul.f r2.y, r5.x, c0.w
-mul.f r2.z, r5.x, c0.z
-mul.f r3.x, r5.x, c0.y
+max.f r0.w, r0.w, c25.x
+max.f r1.y, r1.y, c25.x
+max.f r1.z, r1.z, c25.x
+mul.f r1.w, r1.w, c5.x
+mul.f r2.x, r2.x, c6.x
+mov.f32f32 r2.y, c4.x
rsq r1.x, (abs)r1.x
(ss)mul.f r0.x, r0.x, r1.x
-rcp r3.y, r1.x
-(ss)mov.f32f32 r3.y, r3.y
-rcp r3.z, r1.x
-(ss)mov.f32f32 r3.z, r3.z
-rsq r4.x, (abs)r1.x
-(ss)mul.f r0.y, r0.y, r4.x
-add.f r4.x, r0.x, c25.x
-mul.f r4.y, r1.x, r3.y
-mul.f r0.x, r0.w, r0.x
-mad.f32 r1.y, c4.z, r1.z, r1.y
-mul.f r1.z, r4.x, r4.x
-add.f r4.z, r0.y, c25.x
-mov.f32f32 r4.y, r4.y
-mad.f32 r0.x, r1.y, r0.y, r0.x
-mul.f r0.y, r3.z, r3.y
-mad.f32 r1.z, r4.z, r4.z, r1.z
-mul.f r3.z, c18.x, r4.y
-mov.f32f32 r0.x, r0.x
-(ss)rsq r1.x, (abs)r1.x
-mad.f32 r3.y, c18.y, r3.y, r3.z
-mov.f32f32 r1.z, r1.z
-(ss)mul.f r0.z, r0.z, r1.x
-(ss)mov.f32f32 r1.x, r2.x
-mov.f32f32 r2.x, c7.z
-mov.f32f32 r3.y, r3.y
-add.f r3.z, r0.z, c25.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, c4.z, r2.x, r1.x
-mad.f32 r2.x, c1.w, r5.y, r2.y
-mad.f32 r1.z, r3.z, r3.z, r1.z
-mad.f32 r0.y, c18.z, r0.y, r3.y
-mad.f32 r0.x, r1.x, r0.z, r0.x
-mad.f32 r0.z, c2.w, r5.z, r2.x
-mad.f32 r6.x, c1.z, r5.y, r2.z
-mad.f32 r6.y, c1.y, r5.y, r3.x
-mul.f r5.x, r5.x, c0.x
-rsq r1.z, (abs)r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-max.f r2.x, (neg)r0.x, c26.x
-max.f r2.y, r0.x, c26.x
-mul.f r2.z, r4.x, r1.z
-mul.f r3.x, r4.z, r1.z
-mul.f r1.z, r3.z, r1.z
-mov.f32f32 r2.x, r2.x
-mul.f r0.w, r0.w, r2.z
-rcp r2.z, r0.y
-mov.f32f32 r3.y, c26.y
-mad.f32 r0.w, r1.y, r3.x, r0.w
-mov.f32f32 r1.y, c26.y
-rcp r4.x, r0.y
-mov.f32f32 r2.y, r2.y
-(ss)mul.f r3.x, r3.y, r2.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r3.y, c12.z
-mad.f32 r0.w, r1.x, r1.z, r0.w
-mad.f32 r1.x, c22.z, r3.x, r3.y
-mov.f32f32 r1.z, c12.y
-mov.f32f32 r3.y, c12.x
-max.f r3.z, (neg)r0.w, c26.x
+rcp r2.z, r1.x
+(ss)mov.f32f32 r2.w, r2.z
+mov.f32f32 r3.x, c5.y
+rsq r3.y, (abs)r1.x
+(ss)mul.f r0.y, r0.y, r3.y
+add.f r3.y, r0.x, c25.x
+mul.f r3.z, r1.x, r2.w
+mad.f32 r1.w, c4.y, r3.x, r1.w
+mov.f32f32 r3.x, c5.z
+mul.f r3.w, r3.y, r3.y
+add.f r4.x, r0.y, c25.x
+mul.f r3.z, c18.x, r3.z
+mad.f32 r3.x, c4.z, r3.x, r1.w
+mad.f32 r1.w, c18.y, r2.w, r3.z
+mad.f32 r3.z, r4.x, r4.x, r3.w
+rsq r2.w, (abs)r1.x
+(ss)mul.f r0.z, r0.z, r2.w
+(ss)rcp r1.x, r1.x
+(ss)mul.f r1.x, r1.x, r2.z
+mul.f r0.x, r3.x, r0.x
+mov.f32f32 r2.z, c6.y
+add.f r4.y, r0.z, c25.y
+mad.f32 r1.x, c18.z, r1.x, r1.w
+min.f r3.w, r0.w, c25.y
+min.f r2.w, r1.y, c25.y
+mad.f32 r0.w, r4.y, r4.y, r3.z
+mov.f32f32 r1.y, r1.x
+mad.f32 r2.x, c4.y, r2.z, r2.x
+mov.f32f32 r2.z, c6.z
+min.f r1.w, r1.z, c25.y
+mul.f r1.z, r2.y, c7.x
+mov.f32f32 r2.y, c7.y
+rsq r0.w, (abs)r0.w
+(ss)mov.f32f32 r3.z, r0.w
+rcp r4.z, r1.y
+mov.f32f32 r6.x, c26.y
+mov.f32f32 r6.y, c26.y
+(ss)rcp r1.y, r1.y
+nop
+rcp r1.x, r1.x
+mul.f r3.y, r3.y, r3.z
+(ss)mul.f r6.x, r6.x, r4.z
+mov.f32f32 r6.z, c12.y
+mov.f32f32 r6.w, c12.x
+mul.f r3.x, r3.x, r3.y
+mul.f r3.y, r4.x, r3.z
+mad.f32 r2.x, c4.z, r2.z, r2.x
+mov.f32f32 r2.z, c12.z
+mad.f32 r3.z, c22.y, r6.x, r6.z
+mad.f32 r4.x, c22.x, r6.x, r6.w
+mad.f32 r3.x, r2.x, r3.y, r3.x
+mul.f r0.w, r4.y, r0.w
+mad.f32 r1.z, c4.y, r2.y, r1.z
+mov.f32f32 r2.y, c7.z
+mad.f32 r2.z, c22.z, r6.x, r2.z
+mad.f32 r0.x, r2.x, r0.y, r0.x
+mul.f r0.y, r6.y, r4.z
+mad.f32 r1.z, c4.z, r2.y, r1.z
+mov.f32f32 r2.x, c9.z
+mov.f32f32 r2.y, c9.y
+mov.f32f32 r3.y, c9.x
+mad.f32 r0.w, r1.z, r0.w, r3.x
+mad.f32 r0.x, r1.z, r0.z, r0.x
+mad.f32 r0.z, c19.z, r0.y, r2.x
+mad.f32 r1.z, c19.y, r0.y, r2.y
+max.f r2.x, (neg)r0.w, c26.x
max.f r0.w, r0.w, c26.x
-mul.f r2.x, r2.x, r4.x
-mad.f32 r1.z, c22.y, r3.x, r1.z
-mad.f32 r3.x, c22.x, r3.x, r3.y
-mul.f r1.y, r1.y, r2.z
-mov.f32f32 r2.z, c9.y
-log2 r3.y, r3.z
-(ss)mov.f32f32 r3.y, r3.y
-(ss)absneg.f r3.z, (neg)c11.x
-log2 r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.y, c8.x
-mad.f32 r1.x, c23.z, r2.x, r1.x
-min.f r3.z, (neg)r3.z, c26.z
-mad.f32 r1.z, c23.y, r2.x, r1.z
-mad.f32 r2.x, c23.x, r2.x, r3.x
-mov.f32f32 r3.x, c9.z
-mul.f r3.y, r3.z, r3.y
-min.f r3.z, r4.y, c26.z
-max.f r1.x, r1.x, c25.x
-max.f r1.z, r1.z, c25.x
-mov.f32f32 r4.y, r3.y
-mul.f r0.w, r3.z, r0.w
-min.f r3.z, r1.x, c25.y
-min.f r3.y, r1.z, c25.y
-max.f r1.x, r2.x, c25.x
-mad.f32 r1.z, c19.z, r1.y, r3.x
-mad.f32 r2.x, c19.y, r1.y, r2.z
-exp2 r2.z, r4.y
-(ss)mov.f32f32 r2.z, r2.z
-(ss)cmps.f.lt r4.y, (neg)r0.x, c25.x
-mov.f32f32 r0.w, r0.w
-min.f r3.x, r1.x, c25.y
-mul.f r1.x, r2.y, r4.x
-sel.b32 r2.y, r2.z, r4.y, c26.x
-mov.f32f32 r2.z, c9.x
-rcp r0.y, r0.y
+max.f r2.y, (neg)r0.x, c26.x
+max.f r3.x, r0.x, c26.x
+mad.f32 r0.y, c19.x, r0.y, r3.y
+cmps.f.lt r3.y, (neg)r0.x, c25.x
cmps.f.lt r0.x, (neg)r0.x, c25.x
-mad.f32 r1.z, c20.z, r1.x, r1.z
-mov.f32f32 r2.y, r2.y
+log2 r2.x, r2.x
+absneg.f r4.y, (neg)c11.x
+log2 r0.w, r0.w
+mov.f32f32 r4.z, c8.x
+mul.f r2.y, r2.y, r1.x
+(ss)mul.f r1.x, r3.x, r1.x
+min.f r3.x, (neg)r4.y, c26.z
+min.f r4.y, r4.z, c26.z
+mad.f32 r2.z, c23.z, r2.y, r2.z
+mad.f32 r3.z, c23.y, r2.y, r3.z
+(ss)mul.f r2.x, r3.x, r2.x
+mul.f r0.w, r4.y, r0.w
+max.f r2.z, r2.z, c25.x
+max.f r3.x, r3.z, c25.x
+mad.f32 r2.y, c23.x, r2.y, r4.x
+mad.f32 r0.z, c20.z, r1.x, r0.z
+mad.f32 r6.x, c20.y, r1.x, r1.z
+exp2 r1.z, r2.x
+(ss)sel.b32 r1.z, r1.z, r3.y, c26.x
exp2 r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mad.f32 r2.x, c20.y, r1.x, r2.x
-max.f r1.z, r1.z, c25.x
-mov.f32f32 r2.y, r2.y
-sel.b32 r0.x, r0.w, r0.x, c26.x
-max.f r0.w, r2.x, c25.x
-mad.f32 r6.z, c19.x, r1.y, r2.z
-mul.f r1.y, r2.y, r0.y
+(ss)sel.b32 r0.x, r0.w, r0.x, c26.x
+min.f r3.z, r2.z, c25.y
+min.f r3.y, r3.x, c25.y
+(ss)mul.f r0.w, r1.z, r1.y
+mov.f32f32 r1.z, c25.x
mov.f32f32 r2.x, c25.x
-mov.f32f32 r2.y, c25.x
mov.f32f32 r2.z, c25.x
-mad.f32 r2.x, c24.z, r1.y, r2.x
-mad.f32 r2.y, c24.y, r1.y, r2.y
-mad.f32 r1.y, c24.x, r1.y, r2.z
-mov.f32f32 r0.x, r0.x
-max.f r2.x, r2.x, c25.x
-max.f r2.y, r2.y, c25.x
-max.f r1.y, r1.y, c25.x
-mov.f32f32 r0.x, r0.x
-min.f r4.z, r2.x, c25.y
-min.f r4.y, r2.y, c25.y
-min.f r4.x, r1.y, c25.y
-mul.f r0.x, r0.x, r0.y
-mov.f32f32 r0.y, c25.x
-mov.f32f32 r1.y, c25.x
+mad.f32 r1.z, c24.z, r0.w, r1.z
+mad.f32 r2.x, c24.y, r0.w, r2.x
+mad.f32 r0.w, c24.x, r0.w, r2.z
+mul.f r0.x, r0.x, r1.y
+max.f r1.y, r1.z, c25.x
+max.f r1.z, r2.x, c25.x
+max.f r0.w, r0.w, c25.x
mov.f32f32 r2.x, c25.x
-mad.f32 r0.y, c21.z, r0.x, r0.y
+min.f r4.z, r1.y, c25.y
+min.f r4.y, r1.z, c25.y
+min.f r4.x, r0.w, c25.y
+mad.f32 r0.w, c21.z, r0.x, r2.x
+mov.f32f32 r1.y, c25.x
+mov.f32f32 r1.z, c25.x
mad.f32 r1.y, c21.y, r0.x, r1.y
-mad.f32 r0.x, c21.x, r0.x, r2.x
-min.f r1.z, r1.z, c25.y
-max.f r0.y, r0.y, c25.x
-max.f r1.y, r1.y, c25.x
+max.f r0.w, r0.w, c25.x
+mad.f32 r0.x, c21.x, r0.x, r1.z
+max.f r1.z, r2.y, c25.x
+max.f r0.z, r0.z, c25.x
+min.f r2.z, r0.w, c25.y
+max.f r0.w, r1.y, c25.x
max.f r0.x, r0.x, c25.x
-nop
-min.f r2.z, r0.y, c25.y
-min.f r2.y, r1.y, c25.y
+(rpt1)nop
+min.f r2.y, r0.w, c25.y
min.f r2.x, r0.x, c25.y
-min.f r1.y, r0.w, c25.y
-mad.f32 r0.x, c20.x, r1.x, r6.z
-mad.f32 r0.w, c3.w, r5.w, r0.z
-mad.f32 r0.y, c2.z, r5.z, r6.x
-mad.f32 r1.x, c2.y, r5.z, r6.y
-max.f r0.x, r0.x, c25.x
-mad.f32 r0.z, c3.z, r5.w, r0.y
-mad.f32 r0.y, c3.y, r5.w, r1.x
-mad.f32 r5.x, c1.x, r5.y, r5.x
+min.f r3.x, r1.z, c25.y
+min.f r1.z, r0.z, c25.y
+max.f r0.x, r6.x, c25.x
+mad.f32 r0.y, c20.x, r1.x, r0.y
+mul.f r0.z, r5.x, c0.w
+mul.f r0.w, r5.x, c0.z
+min.f r1.y, r0.x, c25.y
+max.f r0.x, r0.y, c25.x
+mad.f32 r0.y, c1.w, r5.y, r0.z
+mad.f32 r0.z, c1.z, r5.y, r0.w
+mul.f r6.x, r5.x, c0.y
min.f r1.x, r0.x, c25.y
+mad.f32 r0.x, c2.w, r5.z, r0.y
+mad.f32 r0.y, c2.z, r5.z, r0.z
+mad.f32 r0.w, c3.w, r5.w, r0.x
+mad.f32 r0.z, c3.z, r5.w, r0.y
+mad.f32 r0.x, c1.y, r5.y, r6.x
+mul.f r0.y, r5.x, c0.x
+mad.f32 r0.x, c2.y, r5.z, r0.x
+mad.f32 r5.x, c1.x, r5.y, r0.y
+mad.f32 r0.y, c3.y, r5.w, r0.x
mad.f32 r0.x, c2.x, r5.z, r5.x
nop
mad.f32 r0.x, c3.x, r5.w, r0.x
end
nop
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (2:0) r4.x (2:1)
; VERT: inputs: r5.x (0:0,cm=f,il=8,b=0)
-; VERT: 209 instructions, 0 half, 7 full
+; VERT: 184 instructions, 0 half, 7 full
diff --git a/reference/maniadrive/maniadrive-15.asm b/reference/maniadrive/maniadrive-15.asm
index 7441bc0..a69b40d 100644
--- a/reference/maniadrive/maniadrive-15.asm
+++ b/reference/maniadrive/maniadrive-15.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
-@in(r3.w) in3
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r6.w) in11
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r3.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -35,211 +35,185 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)mul.f r0.w, r3.x, c13.x
-mov.f32f32 r1.x, c24.y
-mad.f32 r0.w, c14.x, r3.y, r0.w
-mov.f32f32 r1.y, c11.w
-mad.f32 r0.w, c15.x, r3.z, r0.w
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, c16.x, r3.w, r0.w
-mov.f32f32 r1.y, r1.y
+@const(c24.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c25.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r6.x, c13.x
+mul.f r1.x, r6.x, c13.y
+mad.f32 r0.w, c14.x, r6.y, r0.w
+mad.f32 r1.x, c14.y, r6.y, r1.x
+mad.f32 r0.w, c15.x, r6.z, r0.w
+mad.f32 r1.x, c15.y, r6.z, r1.x
+mad.f32 r0.w, c16.x, r6.w, r0.w
+mad.f32 r1.x, c16.y, r6.w, r1.x
+mul.f r1.y, r6.x, c13.z
mov.f32f32 r1.z, c24.y
-mov.f32f32 r1.w, c8.w
add.f r0.w, c12.x, (neg)r0.w
-max.f r1.x, r1.x, c24.x
-max.f r1.y, r1.y, c24.x
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r0.w, r0.w
-mul.f r2.y, r3.x, c13.y
-min.f r5.w, r1.x, c24.y
-mad.f32 r1.x, c14.y, r3.y, r2.y
-min.f r4.w, r1.y, c24.y
-mad.f32 r1.x, c15.y, r3.z, r1.x
-max.f r1.y, r1.z, c24.x
-mad.f32 r1.x, c16.y, r3.w, r1.x
-mov.f32f32 r1.z, r1.w
-mul.f r1.w, r3.x, c13.z
-mul.f r2.y, r0.x, c4.x
add.f r1.x, c12.y, (neg)r1.x
-min.f r2.w, r1.y, c24.y
-max.f r1.y, r1.z, c24.x
-mad.f32 r1.z, c14.z, r3.y, r1.w
-mad.f32 r1.w, r1.x, r1.x, r2.x
-mad.f32 r1.z, c15.z, r3.z, r1.z
-mad.f32 r2.x, c4.y, r0.y, r2.y
-mul.f r2.y, r0.x, c5.x
-mov.f32f32 r2.z, r1.w
-mad.f32 r1.z, c16.z, r3.w, r1.z
-min.f r1.w, r1.y, c24.y
-mov.f32f32 r1.y, r2.x
-mad.f32 r2.x, c5.y, r0.y, r2.y
-add.f r1.z, c12.z, (neg)r1.z
-mad.f32 r1.y, c4.z, r0.z, r1.y
-mul.f r0.x, r0.x, c6.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.y, r1.z, r1.z, r2.z
-mad.f32 r2.x, c5.z, r0.z, r2.x
-mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r3.x, c0.w
-mul.f r2.z, r3.x, c0.z
-mul.f r4.x, r3.x, c0.y
-mul.f r3.x, r3.x, c0.x
-rsq r2.y, (abs)r2.y
-(ss)mul.f r0.w, r0.w, r2.y
-rcp r4.y, r2.y
-(ss)mov.f32f32 r4.y, r4.y
-rcp r4.z, r2.y
-(ss)mov.f32f32 r4.z, r4.z
-rsq r5.x, (abs)r2.y
+mad.f32 r1.y, c14.z, r6.y, r1.y
+max.f r1.z, r1.z, c24.x
+mul.f r1.w, r0.w, r0.w
+mad.f32 r1.y, c15.z, r6.z, r1.y
+mad.f32 r1.w, r1.x, r1.x, r1.w
+mad.f32 r1.y, c16.z, r6.w, r1.y
+min.f r5.w, r1.z, c24.y
+mov.f32f32 r1.z, c11.w
+mov.f32f32 r2.x, c24.y
+add.f r1.y, c12.z, (neg)r1.y
+mov.f32f32 r2.y, c8.w
+mul.f r2.z, r6.x, c0.w
+mul.f r2.w, r6.x, c0.z
+mad.f32 r1.w, r1.y, r1.y, r1.w
+max.f r1.z, r1.z, c24.x
+max.f r2.x, r2.x, c24.x
+max.f r2.y, r2.y, c24.x
+mad.f32 r2.z, c1.w, r6.y, r2.z
+mad.f32 r4.x, c1.z, r6.y, r2.w
+mul.f r4.y, r6.x, c0.y
+rsq r1.w, (abs)r1.w
+(ss)mul.f r0.w, r0.w, r1.w
+rcp r2.w, r1.w
+(ss)mov.f32f32 r4.z, r2.w
+mul.f r4.w, r0.x, c4.x
+rsq r5.x, (abs)r1.w
(ss)mul.f r1.x, r1.x, r5.x
add.f r5.x, r0.w, c24.x
-mul.f r5.y, r2.y, r4.y
-mul.f r0.w, r1.y, r0.w
-mul.f r4.z, r4.z, r4.y
-mul.f r5.z, r5.x, r5.x
-add.f r7.x, r1.x, c24.x
-mov.f32f32 r5.y, r5.y
-mad.f32 r0.w, r2.x, r1.x, r0.w
-mov.f32f32 r1.x, r4.z
-mad.f32 r4.z, r7.x, r7.x, r5.z
+mul.f r5.y, r1.w, r4.z
+mad.f32 r4.w, c4.y, r0.y, r4.w
+add.f r5.z, r1.x, c24.x
+mul.f r7.x, r5.x, r5.x
mul.f r5.y, c17.x, r5.y
-mov.f32f32 r0.w, r0.w
-(ss)rsq r2.y, (abs)r2.y
-mad.f32 r4.y, c17.y, r4.y, r5.y
-mov.f32f32 r4.z, r4.z
-(ss)mul.f r1.z, r1.z, r2.y
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r2.y, r4.y
+mad.f32 r7.y, c4.z, r0.z, r4.w
+mad.f32 r7.x, r5.z, r5.z, r7.x
+rsq r4.w, (abs)r1.w
+(ss)mul.f r1.y, r1.y, r4.w
+mad.f32 r4.z, c17.y, r4.z, r5.y
+(ss)rcp r1.w, r1.w
+(ss)mul.f r1.w, r1.w, r2.w
+mul.f r0.w, r7.y, r0.w
+add.f r5.y, r1.y, c24.y
+mul.f r7.z, r0.x, c5.x
+min.f r4.w, r1.z, c24.y
+min.f r2.w, r2.x, c24.y
+mad.f32 r1.z, r5.y, r5.y, r7.x
+mad.f32 r2.x, c17.z, r1.w, r4.z
+mad.f32 r4.z, c5.y, r0.y, r7.z
+min.f r1.w, r2.y, c24.y
+mad.f32 r7.x, c2.w, r6.z, r2.z
+mad.f32 r7.z, c2.z, r6.z, r4.x
+mad.f32 r7.w, c1.y, r6.y, r4.y
+rsq r1.z, (abs)r1.z
+(ss)mov.f32f32 r2.y, r1.z
+mov.f32f32 r2.z, r2.x
+rcp r2.x, r2.x
+mad.f32 r4.x, c5.z, r0.z, r4.z
+(ss)mul.f r1.z, r5.y, r1.z
+mul.f r4.y, r5.x, r2.y
+mul.f r2.y, r5.z, r2.y
+mad.f32 r0.w, r4.x, r1.x, r0.w
+mul.f r0.x, r0.x, c6.x
+mul.f r1.x, r7.y, r4.y
+rcp r4.y, r2.z
+mov.f32f32 r4.z, c25.y
+mad.f32 r1.x, r4.x, r2.y, r1.x
+mad.f32 r0.x, c6.y, r0.y, r0.x
+mov.f32f32 r0.y, c25.y
mad.f32 r0.x, c6.z, r0.z, r0.x
-add.f r0.z, r1.z, c24.y
-mad.f32 r1.x, c17.z, r1.x, r2.y
-mad.f32 r0.y, c1.w, r3.y, r0.y
-mad.f32 r2.y, c1.z, r3.y, r2.z
-mad.f32 r2.z, r0.z, r0.z, r4.z
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, r0.x, r1.z, r0.w
-mad.f32 r0.y, c2.w, r3.z, r0.y
-mad.f32 r2.y, c2.z, r3.z, r2.y
-mad.f32 r7.y, c1.y, r3.y, r4.x
-mad.f32 r3.x, c1.x, r3.y, r3.x
-rsq r1.z, (abs)r2.z
-(ss)mov.f32f32 r1.z, r1.z
-(ss)rcp r2.z, r1.x
-mov.f32f32 r3.y, c25.y
-mov.f32f32 r4.x, c25.y
-rcp r5.y, r1.x
-max.f r4.y, (neg)r0.w, c25.x
-mul.f r4.z, r5.x, r1.z
-(ss)mul.f r3.y, r3.y, r2.z
-mov.f32f32 r5.x, c11.y
-mov.f32f32 r5.z, c11.x
-mul.f r1.y, r1.y, r4.z
-mul.f r4.z, r7.x, r1.z
-mov.f32f32 r7.x, c11.z
-mad.f32 r5.x, c21.y, r3.y, r5.x
-mad.f32 r5.z, c21.x, r3.y, r5.z
-mad.f32 r1.y, r2.x, r4.z, r1.y
-mad.f32 r2.x, c21.z, r3.y, r7.x
-mov.f32f32 r3.y, r4.y
-mul.f r2.z, r4.x, r2.z
-mov.f32f32 r1.y, r1.y
-mul.f r0.z, r0.z, r1.z
-mul.f r1.z, r3.y, r5.y
-mov.f32f32 r3.y, c8.z
-mov.f32f32 r4.x, c8.y
-mad.f32 r0.x, r0.x, r0.z, r1.y
-mad.f32 r0.z, c22.z, r1.z, r2.x
-mad.f32 r1.y, c22.y, r1.z, r5.x
-mad.f32 r1.z, c22.x, r1.z, r5.z
-max.f r2.x, (neg)r0.x, c25.x
-max.f r0.x, r0.x, c25.x
+(ss)mul.f r0.z, r4.z, r4.y
+mov.f32f32 r2.y, c11.y
+mov.f32f32 r4.x, c11.x
+mad.f32 r1.x, r0.x, r1.z, r1.x
+mov.f32f32 r1.z, c11.z
+mad.f32 r2.y, c21.y, r0.z, r2.y
+mad.f32 r4.x, c21.x, r0.z, r4.x
+max.f r4.z, (neg)r1.x, c25.x
+max.f r1.x, r1.x, c25.x
+mad.f32 r0.z, c21.z, r0.z, r1.z
+mad.f32 r0.x, r0.x, r1.y, r0.w
+mul.f r0.y, r0.y, r4.y
+mov.f32f32 r0.w, c8.y
+mov.f32f32 r1.y, c8.x
+log2 r1.z, r4.z
+absneg.f r4.y, (neg)c10.x
+log2 r1.x, r1.x
+(ss)mov.f32f32 r4.z, c7.x
+max.f r5.x, (neg)r0.x, c25.x
+mov.f32f32 r5.y, c8.z
+min.f r4.y, (neg)r4.y, c25.z
+min.f r4.z, r4.z, c25.z
+mul.f r5.x, r5.x, r2.x
+mad.f32 r7.y, c18.z, r0.y, r5.y
+(ss)mul.f r1.z, r4.y, r1.z
+mul.f r1.x, r4.z, r1.x
+mad.f32 r0.z, c22.z, r5.x, r0.z
+mad.f32 r2.y, c22.y, r5.x, r2.y
+mad.f32 r4.x, c22.x, r5.x, r4.x
+max.f r8.x, r0.x, c25.x
+mad.f32 r0.w, c18.y, r0.y, r0.w
+exp2 r1.z, r1.z
+cmps.f.lt r4.y, (neg)r0.x, c24.x
+exp2 r1.x, r1.x
+cmps.f.lt r0.x, (neg)r0.x, c24.x
max.f r0.z, r0.z, c24.x
-max.f r1.y, r1.y, c24.x
-max.f r1.z, r1.z, c24.x
-mad.f32 r3.y, c18.z, r2.z, r3.y
-mad.f32 r5.x, c18.y, r2.z, r4.x
-log2 r2.x, r2.x
-(ss)mov.f32f32 r2.x, r2.x
-absneg.f r4.x, (neg)c10.x
-log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, c7.x
+max.f r2.y, r2.y, c24.x
+(ss)sel.b32 r1.z, r1.z, r4.y, c25.x
+rcp r2.z, r2.z
+sel.b32 r0.x, r1.x, r0.x, c25.x
min.f r4.z, r0.z, c24.y
-min.f r0.z, (neg)r4.x, c25.z
-min.f r4.y, r1.y, c24.y
-min.f r4.x, r1.z, c24.y
-max.f r1.y, r0.w, c25.x
-mul.f r0.z, r0.z, r2.x
-min.f r1.z, r5.z, c25.z
-mov.f32f32 r2.x, c8.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r1.z, r0.x
-mad.f32 r1.z, c18.x, r2.z, r2.x
-mul.f r1.y, r1.y, r5.y
-rcp r2.x, r1.x
-(ss)cmps.f.lt r1.x, (neg)r0.w, c24.x
-cmps.f.lt r0.w, (neg)r0.w, c24.x
-mov.f32f32 r0.x, r0.x
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mad.f32 r2.z, c19.z, r1.y, r3.y
-mad.f32 r3.y, c19.y, r1.y, r5.x
-mad.f32 r1.y, c19.x, r1.y, r1.z
-sel.b32 r0.z, r0.z, r1.x, c25.x
-max.f r1.x, r2.z, c24.x
-max.f r2.z, r3.y, c24.x
-max.f r3.y, r1.y, c24.x
-mov.f32f32 r0.z, r0.z
-exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-min.f r1.z, r1.x, c24.y
-min.f r1.y, r2.z, c24.y
-mov.f32f32 r0.z, r0.z
-sel.b32 r0.x, r0.x, r0.w, c25.x
-min.f r1.x, r3.y, c24.y
-mad.f32 r0.w, c3.w, r3.w, r0.y
-mul.f r0.y, r0.z, r2.x
-mov.f32f32 r0.z, c24.x
-mov.f32f32 r2.z, c24.x
-mov.f32f32 r3.y, c24.x
-mad.f32 r0.z, c23.z, r0.y, r0.z
-mad.f32 r2.z, c23.y, r0.y, r2.z
-mad.f32 r0.y, c23.x, r0.y, r3.y
-mov.f32f32 r0.x, r0.x
+min.f r4.y, r2.y, c24.y
+(ss)mul.f r0.z, r1.z, r2.z
+mov.f32f32 r1.x, c24.x
+mov.f32f32 r1.z, c24.x
+mov.f32f32 r2.y, c24.x
+mad.f32 r1.x, c23.z, r0.z, r1.x
+mad.f32 r1.z, c23.y, r0.z, r1.z
+mad.f32 r0.z, c23.x, r0.z, r2.y
+mul.f r0.x, r0.x, r2.z
+max.f r1.x, r1.x, c24.x
+max.f r1.z, r1.z, c24.x
+max.f r0.z, r0.z, c24.x
+mov.f32f32 r2.y, c24.x
+min.f r5.z, r1.x, c24.y
+min.f r5.y, r1.z, c24.y
+min.f r5.x, r0.z, c24.y
+mad.f32 r0.z, c20.z, r0.x, r2.y
+mov.f32f32 r1.x, c24.x
+mov.f32f32 r1.z, c24.x
+mad.f32 r1.x, c20.y, r0.x, r1.x
max.f r0.z, r0.z, c24.x
-max.f r2.z, r2.z, c24.x
-max.f r0.y, r0.y, c24.x
-mov.f32f32 r0.x, r0.x
-min.f r5.z, r0.z, c24.y
-min.f r5.y, r2.z, c24.y
-min.f r5.x, r0.y, c24.y
-mul.f r0.x, r0.x, r2.x
-mov.f32f32 r0.y, c24.x
-mov.f32f32 r0.z, c24.x
-mov.f32f32 r2.x, c24.x
-mad.f32 r0.y, c20.z, r0.x, r0.y
-mad.f32 r2.z, c20.y, r0.x, r0.z
-mad.f32 r0.x, c20.x, r0.x, r2.x
-mad.f32 r0.z, c3.z, r3.w, r2.y
-max.f r0.y, r0.y, c24.x
-max.f r2.x, r2.z, c24.x
+mad.f32 r0.x, c20.x, r0.x, r1.z
+max.f r1.z, r4.x, c24.x
+mul.f r8.x, r8.x, r2.x
+(ss)min.f r2.z, r0.z, c24.y
+max.f r0.z, r1.x, c24.x
max.f r0.x, r0.x, c24.x
-nop
-min.f r2.z, r0.y, c24.y
-min.f r2.y, r2.x, c24.y
+(rpt1)nop
+min.f r2.y, r0.z, c24.y
min.f r2.x, r0.x, c24.y
-mad.f32 r0.x, c2.y, r3.z, r7.y
-mad.f32 r3.x, c2.x, r3.z, r3.x
-mad.f32 r0.y, c3.y, r3.w, r0.x
-mad.f32 r0.x, c3.x, r3.w, r3.x
-mov.f32f32 r3.w, r6.w
-mov.f32f32 r3.z, r6.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r3.x, r6.x
+min.f r4.x, r1.z, c24.y
+mad.f32 r0.x, c19.z, r8.x, r7.y
+mad.f32 r0.z, c19.y, r8.x, r0.w
+mad.f32 r0.y, c18.x, r0.y, r1.y
+mad.f32 r0.w, c3.w, r6.w, r7.x
+max.f r0.x, r0.x, c24.x
+max.f r1.x, r0.z, c24.x
+mad.f32 r0.y, c19.x, r8.x, r0.y
+mad.f32 r0.z, c3.z, r6.w, r7.z
+min.f r1.z, r0.x, c24.y
+min.f r1.y, r1.x, c24.y
+max.f r0.x, r0.y, c24.x
+mad.f32 r0.y, c2.y, r6.z, r7.w
+mul.f r6.x, r6.x, c0.x
+nop
+min.f r1.x, r0.x, c24.y
+mad.f32 r0.y, c3.y, r6.w, r0.y
+mad.f32 r0.x, c1.x, r6.y, r6.x
+nop
+mad.f32 r0.x, c2.x, r6.z, r0.x
+nop
+mad.f32 r0.x, c3.x, r6.w, r0.x
end
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (5:0) r4.x (2:0) r5.x (2:1)
-; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r6.x (0:0,cm=f,il=16,b=0)
-; VERT: 203 instructions, 0 half, 8 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
+; VERT: 176 instructions, 0 half, 9 full
diff --git a/reference/maniadrive/maniadrive-17.asm b/reference/maniadrive/maniadrive-17.asm
index 7d03d2c..5f47c68 100644
--- a/reference/maniadrive/maniadrive-17.asm
+++ b/reference/maniadrive/maniadrive-17.asm
@@ -27,207 +27,185 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c24.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c25.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.w, r5.x, c13.x
-mov.f32f32 r1.x, c24.y
+mul.f r1.x, r5.x, c13.y
mad.f32 r0.w, c14.x, r5.y, r0.w
-mov.f32f32 r1.y, c11.w
+mad.f32 r1.x, c14.y, r5.y, r1.x
mad.f32 r0.w, c15.x, r5.z, r0.w
-mov.f32f32 r1.x, r1.x
+mad.f32 r1.x, c15.y, r5.z, r1.x
mad.f32 r0.w, c16.x, r5.w, r0.w
-mov.f32f32 r1.y, r1.y
+mad.f32 r1.x, c16.y, r5.w, r1.x
+mul.f r1.y, r5.x, c13.z
mov.f32f32 r1.z, c24.y
-mov.f32f32 r1.w, c8.w
add.f r0.w, c12.x, (neg)r0.w
-max.f r1.x, r1.x, c24.x
-max.f r1.y, r1.y, c24.x
-mov.f32f32 r1.z, r1.z
-mul.f r2.x, r0.w, r0.w
-mul.f r2.y, r5.x, c13.y
-min.f r4.w, r1.x, c24.y
-mad.f32 r1.x, c14.y, r5.y, r2.y
-min.f r3.w, r1.y, c24.y
-mad.f32 r1.x, c15.y, r5.z, r1.x
-max.f r1.y, r1.z, c24.x
-mad.f32 r1.x, c16.y, r5.w, r1.x
-mov.f32f32 r1.z, r1.w
-mul.f r1.w, r5.x, c13.z
-mul.f r2.y, r0.x, c4.x
add.f r1.x, c12.y, (neg)r1.x
-min.f r2.w, r1.y, c24.y
-max.f r1.y, r1.z, c24.x
-mad.f32 r1.z, c14.z, r5.y, r1.w
-mad.f32 r1.w, r1.x, r1.x, r2.x
-mad.f32 r1.z, c15.z, r5.z, r1.z
-mad.f32 r2.x, c4.y, r0.y, r2.y
-mul.f r2.y, r0.x, c5.x
-mov.f32f32 r2.z, r1.w
-mad.f32 r1.z, c16.z, r5.w, r1.z
-min.f r1.w, r1.y, c24.y
-mov.f32f32 r1.y, r2.x
-mad.f32 r2.x, c5.y, r0.y, r2.y
-add.f r1.z, c12.z, (neg)r1.z
-mad.f32 r1.y, c4.z, r0.z, r1.y
+mad.f32 r1.y, c14.z, r5.y, r1.y
+max.f r1.z, r1.z, c24.x
+mul.f r1.w, r0.w, r0.w
+mad.f32 r1.y, c15.z, r5.z, r1.y
+mad.f32 r1.w, r1.x, r1.x, r1.w
+mad.f32 r1.y, c16.z, r5.w, r1.y
+min.f r4.w, r1.z, c24.y
+mov.f32f32 r1.z, c11.w
+mov.f32f32 r2.x, c24.y
+add.f r1.y, c12.z, (neg)r1.y
+mov.f32f32 r2.y, c8.w
+mul.f r2.z, r5.x, c0.w
+mul.f r2.w, r5.x, c0.z
+mad.f32 r1.w, r1.y, r1.y, r1.w
+max.f r1.z, r1.z, c24.x
+max.f r2.x, r2.x, c24.x
+max.f r2.y, r2.y, c24.x
+mad.f32 r2.z, c1.w, r5.y, r2.z
+mad.f32 r3.x, c1.z, r5.y, r2.w
+mul.f r3.y, r5.x, c0.y
+rsq r1.w, (abs)r1.w
+(ss)mul.f r0.w, r0.w, r1.w
+rcp r2.w, r1.w
+(ss)mov.f32f32 r3.z, r2.w
+mul.f r3.w, r0.x, c4.x
+rsq r4.x, (abs)r1.w
+(ss)mul.f r1.x, r1.x, r4.x
+add.f r4.x, r0.w, c24.x
+mul.f r4.y, r1.w, r3.z
+mad.f32 r3.w, c4.y, r0.y, r3.w
+add.f r4.z, r1.x, c24.x
+mul.f r6.x, r4.x, r4.x
+mul.f r4.y, c17.x, r4.y
+mad.f32 r6.y, c4.z, r0.z, r3.w
+mad.f32 r6.x, r4.z, r4.z, r6.x
+rsq r3.w, (abs)r1.w
+(ss)mul.f r1.y, r1.y, r3.w
+mad.f32 r3.z, c17.y, r3.z, r4.y
+(ss)rcp r1.w, r1.w
+(ss)mul.f r1.w, r1.w, r2.w
+mul.f r0.w, r6.y, r0.w
+add.f r4.y, r1.y, c24.y
+mul.f r6.z, r0.x, c5.x
+min.f r3.w, r1.z, c24.y
+min.f r2.w, r2.x, c24.y
+mad.f32 r1.z, r4.y, r4.y, r6.x
+mad.f32 r2.x, c17.z, r1.w, r3.z
+mad.f32 r3.z, c5.y, r0.y, r6.z
+min.f r1.w, r2.y, c24.y
+mad.f32 r6.x, c2.w, r5.z, r2.z
+mad.f32 r6.z, c2.z, r5.z, r3.x
+mad.f32 r6.w, c1.y, r5.y, r3.y
+rsq r1.z, (abs)r1.z
+(ss)mov.f32f32 r2.y, r1.z
+mov.f32f32 r2.z, r2.x
+rcp r2.x, r2.x
+mad.f32 r3.x, c5.z, r0.z, r3.z
+(ss)mul.f r1.z, r4.y, r1.z
+mul.f r3.y, r4.x, r2.y
+mul.f r2.y, r4.z, r2.y
+mad.f32 r0.w, r3.x, r1.x, r0.w
mul.f r0.x, r0.x, c6.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.y, r1.z, r1.z, r2.z
-mad.f32 r2.x, c5.z, r0.z, r2.x
+mul.f r1.x, r6.y, r3.y
+rcp r3.y, r2.z
+mov.f32f32 r3.z, c25.y
+mad.f32 r1.x, r3.x, r2.y, r1.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r5.x, c0.w
-mul.f r2.z, r5.x, c0.z
-mul.f r3.x, r5.x, c0.y
-mul.f r3.y, r5.x, c0.x
-rsq r2.y, (abs)r2.y
-(ss)mul.f r0.w, r0.w, r2.y
-rcp r3.z, r2.y
-(ss)mov.f32f32 r3.z, r3.z
-rcp r4.x, r2.y
-(ss)mov.f32f32 r4.x, r4.x
-rsq r4.y, (abs)r2.y
-(ss)mul.f r1.x, r1.x, r4.y
-add.f r4.y, r0.w, c24.x
-mul.f r4.z, r2.y, r3.z
-mul.f r0.w, r1.y, r0.w
-mul.f r4.x, r4.x, r3.z
-mul.f r5.x, r4.y, r4.y
-add.f r6.x, r1.x, c24.x
-mov.f32f32 r4.z, r4.z
-mad.f32 r0.w, r2.x, r1.x, r0.w
-mov.f32f32 r1.x, r4.x
-mad.f32 r4.x, r6.x, r6.x, r5.x
-mul.f r4.z, c17.x, r4.z
-mov.f32f32 r0.w, r0.w
-(ss)rsq r2.y, (abs)r2.y
-mad.f32 r3.z, c17.y, r3.z, r4.z
-mov.f32f32 r4.x, r4.x
-(ss)mul.f r1.z, r1.z, r2.y
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r2.y, r3.z
+mov.f32f32 r0.y, c25.y
mad.f32 r0.x, c6.z, r0.z, r0.x
-add.f r0.z, r1.z, c24.y
-mad.f32 r1.x, c17.z, r1.x, r2.y
-mad.f32 r0.y, c1.w, r5.y, r0.y
-mad.f32 r2.y, c1.z, r5.y, r2.z
-mad.f32 r2.z, r0.z, r0.z, r4.x
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, r0.x, r1.z, r0.w
-mad.f32 r0.y, c2.w, r5.z, r0.y
-mad.f32 r2.y, c2.z, r5.z, r2.y
-mad.f32 r5.x, c1.y, r5.y, r3.x
-mad.f32 r5.y, c1.x, r5.y, r3.y
-rsq r1.z, (abs)r2.z
-(ss)mov.f32f32 r1.z, r1.z
-(ss)rcp r2.z, r1.x
-mov.f32f32 r3.x, c25.y
-mov.f32f32 r3.y, c25.y
-rcp r4.x, r1.x
-max.f r3.z, (neg)r0.w, c25.x
-mul.f r4.y, r4.y, r1.z
-(ss)mul.f r3.x, r3.x, r2.z
-mov.f32f32 r4.z, c11.y
-mov.f32f32 r6.y, c11.x
-mul.f r1.y, r1.y, r4.y
-mul.f r4.y, r6.x, r1.z
-mov.f32f32 r6.x, c11.z
-mad.f32 r4.z, c21.y, r3.x, r4.z
-mad.f32 r6.y, c21.x, r3.x, r6.y
-mad.f32 r1.y, r2.x, r4.y, r1.y
-mad.f32 r2.x, c21.z, r3.x, r6.x
-mov.f32f32 r3.x, r3.z
-mul.f r2.z, r3.y, r2.z
-mov.f32f32 r1.y, r1.y
-mul.f r0.z, r0.z, r1.z
-mul.f r1.z, r3.x, r4.x
-mov.f32f32 r3.x, c8.z
-mov.f32f32 r3.y, c8.y
-mad.f32 r0.x, r0.x, r0.z, r1.y
-mad.f32 r0.z, c22.z, r1.z, r2.x
-mad.f32 r1.y, c22.y, r1.z, r4.z
-mad.f32 r1.z, c22.x, r1.z, r6.y
-max.f r2.x, (neg)r0.x, c25.x
-max.f r0.x, r0.x, c25.x
+(ss)mul.f r0.z, r3.z, r3.y
+mov.f32f32 r2.y, c11.y
+mov.f32f32 r3.x, c11.x
+mad.f32 r1.x, r0.x, r1.z, r1.x
+mov.f32f32 r1.z, c11.z
+mad.f32 r2.y, c21.y, r0.z, r2.y
+mad.f32 r3.x, c21.x, r0.z, r3.x
+max.f r3.z, (neg)r1.x, c25.x
+max.f r1.x, r1.x, c25.x
+mad.f32 r0.z, c21.z, r0.z, r1.z
+mad.f32 r0.x, r0.x, r1.y, r0.w
+mul.f r0.y, r0.y, r3.y
+mov.f32f32 r0.w, c8.y
+mov.f32f32 r1.y, c8.x
+log2 r1.z, r3.z
+absneg.f r3.y, (neg)c10.x
+log2 r1.x, r1.x
+(ss)mov.f32f32 r3.z, c7.x
+max.f r4.x, (neg)r0.x, c25.x
+mov.f32f32 r4.y, c8.z
+min.f r3.y, (neg)r3.y, c25.z
+min.f r3.z, r3.z, c25.z
+mul.f r4.x, r4.x, r2.x
+mad.f32 r6.y, c18.z, r0.y, r4.y
+(ss)mul.f r1.z, r3.y, r1.z
+mul.f r1.x, r3.z, r1.x
+mad.f32 r0.z, c22.z, r4.x, r0.z
+mad.f32 r2.y, c22.y, r4.x, r2.y
+mad.f32 r3.x, c22.x, r4.x, r3.x
+max.f r7.x, r0.x, c25.x
+mad.f32 r0.w, c18.y, r0.y, r0.w
+exp2 r1.z, r1.z
+cmps.f.lt r3.y, (neg)r0.x, c24.x
+exp2 r1.x, r1.x
+cmps.f.lt r0.x, (neg)r0.x, c24.x
max.f r0.z, r0.z, c24.x
-max.f r1.y, r1.y, c24.x
-max.f r1.z, r1.z, c24.x
-mad.f32 r4.y, c18.z, r2.z, r3.x
-mad.f32 r4.z, c18.y, r2.z, r3.y
-log2 r2.x, r2.x
-(ss)mov.f32f32 r2.x, r2.x
-absneg.f r3.x, (neg)c10.x
-log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r6.x, c7.x
+max.f r2.y, r2.y, c24.x
+(ss)sel.b32 r1.z, r1.z, r3.y, c25.x
+rcp r2.z, r2.z
+sel.b32 r0.x, r1.x, r0.x, c25.x
min.f r3.z, r0.z, c24.y
-min.f r0.z, (neg)r3.x, c25.z
-min.f r3.y, r1.y, c24.y
-min.f r3.x, r1.z, c24.y
-max.f r1.y, r0.w, c25.x
-mul.f r0.z, r0.z, r2.x
-min.f r1.z, r6.x, c25.z
-mov.f32f32 r2.x, c8.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.z, r0.z
-mul.f r0.x, r1.z, r0.x
-mad.f32 r1.z, c18.x, r2.z, r2.x
-mul.f r1.y, r1.y, r4.x
-rcp r2.x, r1.x
-(ss)cmps.f.lt r1.x, (neg)r0.w, c24.x
-cmps.f.lt r0.w, (neg)r0.w, c24.x
-mov.f32f32 r0.x, r0.x
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mad.f32 r2.z, c19.z, r1.y, r4.y
-mad.f32 r4.x, c19.y, r1.y, r4.z
-mad.f32 r1.y, c19.x, r1.y, r1.z
-sel.b32 r0.z, r0.z, r1.x, c25.x
-max.f r1.x, r2.z, c24.x
-max.f r2.z, r4.x, c24.x
-max.f r4.x, r1.y, c24.x
-mov.f32f32 r0.z, r0.z
-exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-min.f r1.z, r1.x, c24.y
-min.f r1.y, r2.z, c24.y
-mov.f32f32 r0.z, r0.z
-sel.b32 r0.x, r0.x, r0.w, c25.x
-min.f r1.x, r4.x, c24.y
-mad.f32 r0.w, c3.w, r5.w, r0.y
-mul.f r0.y, r0.z, r2.x
-mov.f32f32 r0.z, c24.x
-mov.f32f32 r2.z, c24.x
-mov.f32f32 r4.x, c24.x
-mad.f32 r0.z, c23.z, r0.y, r0.z
-mad.f32 r2.z, c23.y, r0.y, r2.z
-mad.f32 r0.y, c23.x, r0.y, r4.x
-mov.f32f32 r0.x, r0.x
+min.f r3.y, r2.y, c24.y
+(ss)mul.f r0.z, r1.z, r2.z
+mov.f32f32 r1.x, c24.x
+mov.f32f32 r1.z, c24.x
+mov.f32f32 r2.y, c24.x
+mad.f32 r1.x, c23.z, r0.z, r1.x
+mad.f32 r1.z, c23.y, r0.z, r1.z
+mad.f32 r0.z, c23.x, r0.z, r2.y
+mul.f r0.x, r0.x, r2.z
+max.f r1.x, r1.x, c24.x
+max.f r1.z, r1.z, c24.x
+max.f r0.z, r0.z, c24.x
+mov.f32f32 r2.y, c24.x
+min.f r4.z, r1.x, c24.y
+min.f r4.y, r1.z, c24.y
+min.f r4.x, r0.z, c24.y
+mad.f32 r0.z, c20.z, r0.x, r2.y
+mov.f32f32 r1.x, c24.x
+mov.f32f32 r1.z, c24.x
+mad.f32 r1.x, c20.y, r0.x, r1.x
max.f r0.z, r0.z, c24.x
-max.f r2.z, r2.z, c24.x
-max.f r0.y, r0.y, c24.x
-mov.f32f32 r0.x, r0.x
-min.f r4.z, r0.z, c24.y
-min.f r4.y, r2.z, c24.y
-min.f r4.x, r0.y, c24.y
-mul.f r0.x, r0.x, r2.x
-mov.f32f32 r0.y, c24.x
-mov.f32f32 r0.z, c24.x
-mov.f32f32 r2.x, c24.x
-mad.f32 r0.y, c20.z, r0.x, r0.y
-mad.f32 r2.z, c20.y, r0.x, r0.z
-mad.f32 r0.x, c20.x, r0.x, r2.x
-mad.f32 r0.z, c3.z, r5.w, r2.y
-max.f r0.y, r0.y, c24.x
-max.f r2.x, r2.z, c24.x
+mad.f32 r0.x, c20.x, r0.x, r1.z
+max.f r1.z, r3.x, c24.x
+mul.f r7.x, r7.x, r2.x
+(ss)min.f r2.z, r0.z, c24.y
+max.f r0.z, r1.x, c24.x
max.f r0.x, r0.x, c24.x
-nop
-min.f r2.z, r0.y, c24.y
-min.f r2.y, r2.x, c24.y
+(rpt1)nop
+min.f r2.y, r0.z, c24.y
min.f r2.x, r0.x, c24.y
-mad.f32 r0.x, c2.y, r5.z, r5.x
-mad.f32 r5.x, c2.x, r5.z, r5.y
-mad.f32 r0.y, c3.y, r5.w, r0.x
-mad.f32 r0.x, c3.x, r5.w, r5.x
+min.f r3.x, r1.z, c24.y
+mad.f32 r0.x, c19.z, r7.x, r6.y
+mad.f32 r0.z, c19.y, r7.x, r0.w
+mad.f32 r0.y, c18.x, r0.y, r1.y
+mad.f32 r0.w, c3.w, r5.w, r6.x
+max.f r0.x, r0.x, c24.x
+max.f r1.x, r0.z, c24.x
+mad.f32 r0.y, c19.x, r7.x, r0.y
+mad.f32 r0.z, c3.z, r5.w, r6.z
+min.f r1.z, r0.x, c24.y
+min.f r1.y, r1.x, c24.y
+max.f r0.x, r0.y, c24.x
+mad.f32 r0.y, c2.y, r5.z, r6.w
+mul.f r5.x, r5.x, c0.x
+nop
+min.f r1.x, r0.x, c24.y
+mad.f32 r0.y, c3.y, r5.w, r0.y
+mad.f32 r0.x, c1.x, r5.y, r5.x
+nop
+mad.f32 r0.x, c2.x, r5.z, r0.x
+nop
+mad.f32 r0.x, c3.x, r5.w, r0.x
end
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (2:0) r4.x (2:1)
; VERT: inputs: r5.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0)
-; VERT: 199 instructions, 0 half, 7 full
+; VERT: 176 instructions, 0 half, 8 full
diff --git a/reference/maniadrive/maniadrive-18.asm b/reference/maniadrive/maniadrive-18.asm
index 94e8c0b..2f37b32 100644
--- a/reference/maniadrive/maniadrive-18.asm
+++ b/reference/maniadrive/maniadrive-18.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
-@in(r3.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r6.w) in11
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
+@in(r7.x) in4
+@in(r7.y) in5
+@in(r7.z) in6
+@in(r7.w) in7
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r3.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -36,235 +36,205 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)mul.f r1.x, r3.x, c15.x
-mov.f32f32 r1.y, c24.y
-mad.f32 r1.x, c16.x, r3.y, r1.x
-mov.f32f32 r1.z, r0.w
-mad.f32 r1.x, c17.x, r3.z, r1.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r1.x, c18.x, r3.w, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, c24.y
-mov.f32f32 r0.w, r0.w
-add.f r1.x, c14.x, (neg)r1.x
-max.f r1.y, r1.y, c24.x
-max.f r1.z, r1.z, c24.x
-mov.f32f32 r1.w, r1.w
-mul.f r2.x, r1.x, r1.x
-mul.f r2.y, r3.x, c15.y
-min.f r5.w, r1.y, c24.y
-mad.f32 r1.y, c16.y, r3.y, r2.y
-min.f r4.w, r1.z, c24.y
-mad.f32 r1.y, c17.y, r3.z, r1.y
-max.f r1.z, r1.w, c24.x
-mad.f32 r1.y, c18.y, r3.w, r1.y
-mov.f32f32 r0.w, r0.w
+@const(c24.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c25.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.x, r6.x, c15.x
+mul.f r0.y, r6.x, c15.y
+mad.f32 r0.x, c16.x, r6.y, r0.x
+mad.f32 r0.y, c16.y, r6.y, r0.y
+mad.f32 r0.x, c17.x, r6.z, r0.x
+mad.f32 r0.y, c17.y, r6.z, r0.y
+mad.f32 r0.x, c18.x, r6.w, r0.x
+mad.f32 r0.y, c18.y, r6.w, r0.y
+mul.f r0.z, r6.x, c15.z
+mov.f32f32 r0.w, c24.y
+add.f r0.x, c14.x, (neg)r0.x
+add.f r0.y, c14.y, (neg)r0.y
+mad.f32 r0.z, c16.z, r6.y, r0.z
+max.f r0.w, r0.w, c24.x
+mul.f r1.x, r0.x, r0.x
+mad.f32 r0.z, c17.z, r6.z, r0.z
+mad.f32 r1.x, r0.y, r0.y, r1.x
+mad.f32 r0.z, c18.z, r6.w, r0.z
+min.f r5.w, r0.w, c24.y
+mov.f32f32 r0.w, c24.y
+mov.f32f32 r1.y, c4.x
+add.f r0.z, c14.z, (neg)r0.z
+mov.f32f32 r1.z, c4.x
mov.f32f32 r1.w, c4.x
-mov.f32f32 r2.y, c4.x
-add.f r1.y, c14.y, (neg)r1.y
-min.f r2.w, r1.z, c24.y
+mul.f r1.y, r1.y, c5.x
+mad.f32 r1.x, r0.z, r0.z, r1.x
max.f r0.w, r0.w, c24.x
-mul.f r1.z, r1.w, c5.x
-mad.f32 r1.w, r1.y, r1.y, r2.x
mov.f32f32 r2.x, c5.y
-mul.f r2.y, r2.y, c6.x
-mov.f32f32 r2.z, c4.x
-mov.f32f32 r4.x, r1.w
-mul.f r4.y, r3.x, c15.z
-min.f r1.w, r0.w, c24.y
-mad.f32 r0.w, c16.z, r3.y, r4.y
-mad.f32 r1.z, c4.y, r2.x, r1.z
-mad.f32 r0.w, c17.z, r3.z, r0.w
-mov.f32f32 r2.x, c6.y
-mad.f32 r0.w, c18.z, r3.w, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r2.z, r2.z, c7.x
-mad.f32 r2.x, c4.y, r2.x, r2.y
-add.f r0.w, c14.z, (neg)r0.w
-mov.f32f32 r2.y, c5.z
-mov.f32f32 r4.y, c7.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r4.x, r0.w, r0.w, r4.x
-mad.f32 r1.z, c4.z, r2.y, r1.z
-mov.f32f32 r2.y, c6.z
-mad.f32 r2.z, c4.y, r4.y, r2.z
-mul.f r4.y, r3.x, c0.w
-mul.f r4.z, r3.x, c0.z
-mul.f r5.x, r3.x, c0.y
-rsq r4.x, (abs)r4.x
-(ss)mul.f r1.x, r1.x, r4.x
-rcp r5.y, r4.x
-(ss)mov.f32f32 r5.y, r5.y
-rcp r5.z, r4.x
-(ss)mov.f32f32 r5.z, r5.z
-rsq r7.x, (abs)r4.x
-(ss)mul.f r1.y, r1.y, r7.x
-add.f r7.x, r1.x, c24.x
-mul.f r7.y, r4.x, r5.y
-mul.f r1.x, r1.z, r1.x
-mad.f32 r2.x, c4.z, r2.y, r2.x
-mul.f r2.y, r7.x, r7.x
-add.f r7.z, r1.y, c24.x
-mov.f32f32 r7.y, r7.y
-mad.f32 r1.x, r2.x, r1.y, r1.x
-mul.f r1.y, r5.z, r5.y
-mad.f32 r2.y, r7.z, r7.z, r2.y
-mul.f r5.z, c19.x, r7.y
-mov.f32f32 r1.x, r1.x
-(ss)rsq r4.x, (abs)r4.x
-mad.f32 r5.y, c19.y, r5.y, r5.z
-mov.f32f32 r2.y, r2.y
-(ss)mul.f r0.w, r0.w, r4.x
-mov.f32f32 r2.z, r2.z
-(ss)mov.f32f32 r4.x, c7.z
-mov.f32f32 r5.y, r5.y
-add.f r5.z, r0.w, c24.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.z, c4.z, r4.x, r2.z
-mad.f32 r4.x, c1.w, r3.y, r4.y
-mad.f32 r2.y, r5.z, r5.z, r2.y
-mad.f32 r1.y, c19.z, r1.y, r5.y
-mad.f32 r0.w, r2.z, r0.w, r1.x
-mad.f32 r1.x, c2.w, r3.z, r4.x
-mad.f32 r7.y, c1.z, r3.y, r4.z
-mad.f32 r7.w, c1.y, r3.y, r5.x
-mul.f r3.x, r3.x, c0.x
-rsq r2.y, (abs)r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.y, r1.y
-max.f r4.x, (neg)r0.w, c25.x
-max.f r4.y, r0.w, c25.x
-mul.f r4.z, r7.x, r2.y
-mul.f r5.x, r7.z, r2.y
-mul.f r2.y, r5.z, r2.y
-mov.f32f32 r4.x, r4.x
-mul.f r1.z, r1.z, r4.z
-rcp r4.z, r1.y
+mul.f r1.z, r1.z, c6.x
+mul.f r1.w, r1.w, c7.x
+mov.f32f32 r2.y, c6.y
+mov.f32f32 r2.z, c7.y
+rsq r1.x, (abs)r1.x
+(ss)mul.f r0.x, r0.x, r1.x
+rcp r2.w, r1.x
+(ss)mov.f32f32 r4.x, r2.w
+mad.f32 r1.y, c4.y, r2.x, r1.y
+mov.f32f32 r2.x, c5.z
+add.f r4.y, r0.x, c24.x
+mul.f r4.z, r1.x, r4.x
+rsq r4.w, (abs)r1.x
+nop
+rsq r5.x, (abs)r1.x
+mad.f32 r1.y, c4.z, r2.x, r1.y
+mul.f r2.x, r4.y, r4.y
+(ss)mul.f r0.y, r0.y, r4.w
+mul.f r4.z, c19.x, r4.z
+mul.f r0.x, r1.y, r0.x
+mad.f32 r1.z, c4.y, r2.y, r1.z
+add.f r2.y, r0.y, c24.x
+mad.f32 r4.x, c19.y, r4.x, r4.z
+(ss)rcp r1.x, r1.x
+(ss)mul.f r1.x, r1.x, r2.w
+mov.f32f32 r2.w, c6.z
+mad.f32 r2.x, r2.y, r2.y, r2.x
+mul.f r0.z, r0.z, r5.x
+mad.f32 r1.x, c19.z, r1.x, r4.x
+mad.f32 r1.z, c4.z, r2.w, r1.z
+min.f r2.w, r0.w, c24.y
+add.f r0.w, r0.z, c24.y
+mov.f32f32 r4.x, r1.x
+mad.f32 r0.x, r1.z, r0.y, r0.x
+mad.f32 r0.y, c4.y, r2.z, r1.w
+mad.f32 r1.w, r0.w, r0.w, r2.x
+rcp r1.x, r1.x
+mov.f32f32 r2.x, c7.z
+mul.f r4.w, r6.x, c0.w
+mul.f r8.x, r6.x, c0.z
+mul.f r8.y, r6.x, c0.y
+mul.f r6.x, r6.x, c0.x
+rcp r2.z, r4.x
+mov.f32f32 r4.z, c25.y
+rsq r1.w, (abs)r1.w
+(ss)mov.f32f32 r5.x, r1.w
mov.f32f32 r5.y, c25.y
-mad.f32 r1.z, r2.x, r5.x, r1.z
-mov.f32f32 r2.x, c25.y
-rcp r5.x, r1.y
-mov.f32f32 r5.z, r4.y
-(ss)mul.f r4.y, r5.y, r4.z
-mov.f32f32 r1.z, r1.z
-mul.f r5.y, c20.z, r0.z
-mad.f32 r1.z, r2.z, r2.y, r1.z
-mad.f32 r2.y, c9.z, r0.z, c13.z
-mul.f r2.z, c20.y, r0.y
-mul.f r7.x, c20.x, r0.x
-max.f r7.z, (neg)r1.z, c25.x
-max.f r1.z, r1.z, c25.x
-mad.f32 r2.y, r4.y, r5.y, r2.y
-mad.f32 r5.y, c9.y, r0.y, c13.y
-mad.f32 r8.x, c9.x, r0.x, c13.x
-mul.f r8.y, r2.x, r4.z
-mul.f r2.x, c20.y, r0.y
-log2 r4.z, r7.z
-(ss)mov.f32f32 r4.z, r4.z
-(ss)absneg.f r7.z, (neg)c12.x
-log2 r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r8.z, c8.x
-mul.f r4.x, r4.x, r5.x
-min.f r7.z, (neg)r7.z, c25.z
-mul.f r8.w, c21.z, r0.z
-mad.f32 r2.z, r4.y, r2.z, r5.y
-mad.f32 r4.y, r4.y, r7.x, r8.x
-mul.f r4.z, r7.z, r4.z
-min.f r5.y, r8.z, c25.z
-mad.f32 r2.y, r4.x, r8.w, r2.y
-mul.f r7.x, c21.y, r0.y
-mov.f32f32 r4.z, r4.z
-mul.f r1.z, r5.y, r1.z
-max.f r2.y, r2.y, c24.x
-mad.f32 r2.z, r4.x, r7.x, r2.z
-mul.f r5.y, c21.x, r0.x
-mul.f r7.x, c20.z, r0.z
-mad.f32 r7.z, c9.y, r0.y, c10.y
-exp2 r4.z, r4.z
-(ss)mov.f32f32 r8.x, r4.z
-cmps.f.lt r8.z, (neg)r0.w, c24.x
-mov.f32f32 r1.z, r1.z
-(ss)min.f r4.z, r2.y, c24.y
-max.f r2.y, r2.z, c24.x
-sel.b32 r2.z, r8.x, r8.z, c25.x
-mad.f32 r4.x, r4.x, r5.y, r4.y
-mad.f32 r5.y, c9.z, r0.z, c10.z
-mad.f32 r7.z, r8.y, r2.x, r7.z
-mov.f32f32 r2.x, r2.z
-exp2 r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-cmps.f.lt r0.w, (neg)r0.w, c24.x
-min.f r4.y, r2.y, c24.y
-mov.f32f32 r2.x, r2.x
-rcp r1.y, r1.y
-max.f r2.y, r4.x, c24.x
-mad.f32 r2.z, r8.y, r7.x, r5.y
-mul.f r7.x, r5.z, r5.x
-(ss)mul.f r2.x, r2.x, r1.y
-mov.f32f32 r4.x, c24.x
-mov.f32f32 r5.x, c24.x
-mov.f32f32 r5.y, c24.x
-mad.f32 r4.x, c23.z, r2.x, r4.x
-mad.f32 r5.x, c23.y, r2.x, r5.x
-mad.f32 r2.x, c23.x, r2.x, r5.y
-sel.b32 r0.w, r1.z, r0.w, c25.x
-max.f r1.z, r4.x, c24.x
-max.f r4.x, r5.x, c24.x
+(ss)rcp r4.x, r4.x
+mad.f32 r0.y, c4.z, r2.x, r0.y
+mul.f r2.x, r4.z, r2.z
+mul.f r4.y, r4.y, r5.x
+mul.f r4.z, c20.z, r7.z
+mul.f r5.z, c20.y, r7.y
+mul.f r8.z, c20.x, r7.x
+mul.f r1.y, r1.y, r4.y
+mul.f r2.y, r2.y, r5.x
+mad.f32 r4.y, c9.z, r7.z, c13.z
+mad.f32 r5.x, c9.y, r7.y, c13.y
+mad.f32 r8.w, c9.x, r7.x, c13.x
+mad.f32 r1.y, r1.z, r2.y, r1.y
+mul.f r0.w, r0.w, r1.w
+mad.f32 r1.z, r2.x, r4.z, r4.y
+mad.f32 r1.w, r2.x, r5.z, r5.x
+mad.f32 r2.x, r2.x, r8.z, r8.w
+mad.f32 r0.w, r0.y, r0.w, r1.y
+mad.f32 r0.x, r0.y, r0.z, r0.x
+mul.f r0.y, r5.y, r2.z
+mul.f r0.z, c20.y, r7.y
+max.f r1.y, (neg)r0.w, c25.x
+max.f r0.w, r0.w, c25.x
+max.f r2.y, (neg)r0.x, c25.x
+mul.f r2.z, c20.z, r7.z
+mad.f32 r4.y, c9.y, r7.y, c10.y
+mul.f r8.z, c20.x, r7.x
+mul.f r2.y, r2.y, r1.x
+log2 r1.y, r1.y
+absneg.f r4.z, (neg)c12.x
+log2 r0.w, r0.w
+mov.f32f32 r5.x, c8.x
+mul.f r5.y, c21.z, r7.z
+mul.f r5.z, c21.y, r7.y
+min.f r4.z, (neg)r4.z, c25.z
+min.f r5.x, r5.x, c25.z
+mad.f32 r1.z, r2.y, r5.y, r1.z
+mad.f32 r1.w, r2.y, r5.z, r1.w
+(ss)mul.f r1.y, r4.z, r1.y
+mul.f r0.w, r5.x, r0.w
+max.f r1.z, r1.z, c24.x
+max.f r1.w, r1.w, c24.x
+mul.f r5.x, c21.x, r7.x
+mad.f32 r5.y, c9.z, r7.z, c10.z
+mad.f32 r0.z, r0.y, r0.z, r4.y
+exp2 r1.y, r1.y
+cmps.f.lt r5.z, (neg)r0.x, c24.x
+exp2 r0.w, r0.w
+cmps.f.lt r8.w, (neg)r0.x, c24.x
+min.f r4.z, r1.z, c24.y
+min.f r4.y, r1.w, c24.y
+(ss)sel.b32 r1.y, r1.y, r5.z, c25.x
+sel.b32 r0.w, r0.w, r8.w, c25.x
+mad.f32 r1.z, r2.y, r5.x, r2.x
+mad.f32 r1.w, r0.y, r2.z, r5.y
+mul.f r1.y, r1.y, r4.x
+mov.f32f32 r2.x, c24.x
+mov.f32f32 r2.y, c24.x
+mov.f32f32 r2.z, c24.x
+mad.f32 r2.x, c23.z, r1.y, r2.x
+mad.f32 r2.y, c23.y, r1.y, r2.y
+mad.f32 r1.y, c23.x, r1.y, r2.z
+mul.f r0.w, r0.w, r4.x
max.f r2.x, r2.x, c24.x
-mov.f32f32 r0.w, r0.w
-min.f r5.z, r1.z, c24.y
-min.f r5.y, r4.x, c24.y
-min.f r5.x, r2.x, c24.y
-mov.f32f32 r0.w, r0.w
-min.f r4.x, r2.y, c24.y
-mul.f r0.z, c21.z, r0.z
-mul.f r0.y, c21.y, r0.y
-mul.f r0.w, r0.w, r1.y
-(ss)mov.f32f32 r1.y, c24.x
-mov.f32f32 r1.z, c24.x
+max.f r2.y, r2.y, c24.x
+max.f r1.y, r1.y, c24.x
+mov.f32f32 r2.z, c24.x
+min.f r5.z, r2.x, c24.y
+min.f r5.y, r2.y, c24.y
+min.f r5.x, r1.y, c24.y
+mad.f32 r1.y, c22.z, r0.w, r2.z
mov.f32f32 r2.x, c24.x
-mad.f32 r1.y, c22.z, r0.w, r1.y
-mad.f32 r1.z, c22.y, r0.w, r1.z
-mad.f32 r0.w, c22.x, r0.w, r2.x
-mad.f32 r0.z, r7.x, r0.z, r2.z
+mov.f32f32 r2.y, c24.x
+mad.f32 r2.x, c22.y, r0.w, r2.x
max.f r1.y, r1.y, c24.x
+mad.f32 r0.w, c22.x, r0.w, r2.y
max.f r1.z, r1.z, c24.x
-max.f r0.w, r0.w, c24.x
-nop
+max.f r0.x, r0.x, c25.x
min.f r2.z, r1.y, c24.y
-min.f r2.y, r1.z, c24.y
+max.f r1.y, r2.x, c24.x
+max.f r0.w, r0.w, c24.x
+(rpt1)nop
+min.f r2.y, r1.y, c24.y
min.f r2.x, r0.w, c24.y
+min.f r4.x, r1.z, c24.y
+mul.f r0.x, r0.x, r1.x
+mul.f r0.w, c21.z, r7.z
+mul.f r1.x, c21.y, r7.y
+mad.f32 r1.y, c9.x, r7.x, c10.x
+mad.f32 r1.z, c1.w, r6.y, r4.w
+mad.f32 r0.w, r0.x, r0.w, r1.w
+mad.f32 r0.z, r0.x, r1.x, r0.z
+mad.f32 r0.y, r0.y, r8.z, r1.y
+mul.f r1.x, c21.x, r7.x
+max.f r0.w, r0.w, c24.x
max.f r0.z, r0.z, c24.x
-mad.f32 r0.y, r7.x, r0.y, r7.z
-mul.f r0.w, c20.x, r0.x
-mad.f32 r1.y, c9.x, r0.x, c10.x
-min.f r1.z, r0.z, c24.y
-max.f r0.y, r0.y, c24.x
-mad.f32 r0.z, r8.y, r0.w, r1.y
-mul.f r0.x, c21.x, r0.x
-mad.f32 r0.w, c3.w, r3.w, r1.x
-min.f r1.y, r0.y, c24.y
-mad.f32 r0.y, c2.z, r3.z, r7.y
-mad.f32 r0.x, r7.x, r0.x, r0.z
-mad.f32 r0.z, c3.z, r3.w, r0.y
-mad.f32 r0.y, c2.y, r3.z, r7.w
-mad.f32 r1.x, c1.x, r3.y, r3.x
+mad.f32 r1.w, c2.w, r6.z, r1.z
+mad.f32 r4.w, c1.z, r6.y, r8.x
+min.f r1.z, r0.w, c24.y
+min.f r1.y, r0.z, c24.y
+mad.f32 r0.x, r0.x, r1.x, r0.y
+mad.f32 r0.w, c3.w, r6.w, r1.w
+mad.f32 r0.y, c2.z, r6.z, r4.w
+mad.f32 r1.x, c1.y, r6.y, r8.y
max.f r0.x, r0.x, c24.x
-mad.f32 r0.y, c3.y, r3.w, r0.y
-mad.f32 r3.x, c2.x, r3.z, r1.x
-nop
+mad.f32 r0.z, c3.z, r6.w, r0.y
+mad.f32 r0.y, c2.y, r6.z, r1.x
+mad.f32 r1.w, c1.x, r6.y, r6.x
min.f r1.x, r0.x, c24.y
-mad.f32 r0.x, c3.x, r3.w, r3.x
-mov.f32f32 r3.w, r6.w
-mov.f32f32 r3.z, r6.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r3.x, r6.x
-end
+mad.f32 r0.y, c3.y, r6.w, r0.y
+mad.f32 r0.x, c2.x, r6.z, r1.w
+max.f r1.w, r7.w, c24.x
+mad.f32 r0.x, c3.x, r6.w, r0.x
+max.f r6.x, r7.w, c24.x
nop
+min.f r4.w, r1.w, c24.y
+nop
+min.f r1.w, r6.x, c24.y
+end
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (5:0) r4.x (2:0) r5.x (2:1)
-; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=f,il=16,b=0)
-; VERT: 225 instructions, 0 half, 9 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r7.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
+; VERT: 195 instructions, 0 half, 9 full
diff --git a/reference/maniadrive/maniadrive-19.asm b/reference/maniadrive/maniadrive-19.asm
index 4a651b1..16ecc3c 100644
--- a/reference/maniadrive/maniadrive-19.asm
+++ b/reference/maniadrive/maniadrive-19.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r2.z, r1.y
-mul.f r0.w, r2.y, r1.z
-mul.f r0.z, r2.x, r0.z
-mul.f r0.x, r1.w, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.x, r1.y
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 27 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/multi-kill.asm b/reference/multi-kill.asm
index 8667b58..50cbc8c 100644
--- a/reference/multi-kill.asm
+++ b/reference/multi-kill.asm
@@ -6,63 +6,44 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 3, r0.x
bary.f r0.w, 2, r0.x
bary.f r2.z, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r2.z
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r3.x, r1.x
-add.f r1.y, r1.y, r1.z
-cmps.f.ne r1.z, r0.x, c0.x
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r3.w, r1.x
-sel.b32 r0.y, r1.y, r1.z, r0.y
-add.f r1.y, r1.w, r3.y
-add.f r1.w, r2.w, r3.z
-add.f r2.w, r3.x, r3.w
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r3.y, r0.y
-sel.b32 r0.z, r1.y, r1.z, r0.z
-sel.b32 r0.w, r1.w, r1.z, r0.w
-sel.b32 r1.x, r2.w, r1.z, r1.x
-mul.f r1.y, r3.x, r3.y
-cmps.f.ne r1.z, r2.z, c0.x
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r3.x, r0.w
-mov.f32f32 r3.y, r1.x
-sel.b32 r0.y, r1.y, r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r3.w, r1.x
-mov.f32f32 r1.w, r0.y
-mul.f r0.y, r2.w, r1.y
-mul.f r1.y, r3.x, r3.z
-mul.f r2.w, r3.y, r3.w
-mov.f32f32 r3.x, (0.000000)
-sel.b32 r0.y, r0.y, r1.z, r0.z
-sel.b32 r0.z, r1.y, r1.z, r0.w
-sel.b32 r0.w, r2.w, r1.z, r1.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
mov.f32f32 r1.x, r0.w
-cmps.f.ne p0.x, r2.z, r3.x
+mov.f32f32 r1.y, r2.z
+mov.f32f32 r1.z, r0.x
+add.f r0.z, r0.z, r0.y
+cmps.f.ne r1.w, r0.x, c0.x
+add.f r0.w, r0.w, r1.x
+add.f r2.w, r2.z, r1.y
+add.f r3.x, r0.x, r1.z
+sel.b32 r0.y, r0.z, r1.w, r0.y
+sel.b32 r0.z, r0.w, r1.w, r1.x
+sel.b32 r0.w, r2.w, r1.w, r1.y
+sel.b32 r1.x, r3.x, r1.w, r1.z
+mul.f r1.y, r0.y, r0.y
+cmps.f.ne r2.w, r2.z, c0.x
+mul.f r1.z, r0.z, r0.z
+mul.f r3.x, r0.w, r0.w
+mul.f r3.y, r1.x, r1.x
+sel.b32 r1.w, r1.y, r2.w, r0.y
+sel.b32 r1.z, r1.z, r2.w, r0.z
+sel.b32 r1.y, r3.x, r2.w, r0.w
+sel.b32 r1.x, r3.y, r2.w, r1.x
mov.f32f32 r0.y, (0.000000)
-(rpt4)nop
+mov.f32f32 r0.z, (0.000000)
+(rpt1)nop
+cmps.f.ne p0.x, r2.z, r0.y
+(rpt5)nop
kill p0.x
-cmps.f.ne p0.x, r0.x, r0.y
+cmps.f.ne p0.x, r0.x, r0.z
(rpt5)nop
kill p0.x
end
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r63.w (5:0,cm=f,il=8,b=1)
-; FRAG: 65 instructions, 0 half, 4 full
+; FRAG: 47 instructions, 0 half, 4 full
diff --git a/reference/piglit-arb_framebuffer_srgb-blit-frag1.asm b/reference/piglit-arb_framebuffer_srgb-blit-frag1.asm
index 8c430d0..92a8207 100644
--- a/reference/piglit-arb_framebuffer_srgb-blit-frag1.asm
+++ b/reference/piglit-arb_framebuffer_srgb-blit-frag1.asm
@@ -5,6 +5,7 @@
@out(r2.y) out1
@out(r2.z) out2
@out(r2.w) out3
+@const(c1.x) 0x3f000000, 0x00000000, 0xbf000000, 0x3b7f9724
(sy)(ss)add.s r0.x, r0.x, -8
(rpt2)nop
shr.b r0.x, r0.x, 4
diff --git a/reference/piglit-arb_framebuffer_srgb-blit-frag2.asm b/reference/piglit-arb_framebuffer_srgb-blit-frag2.asm
index 46a2696..ccc345b 100644
--- a/reference/piglit-arb_framebuffer_srgb-blit-frag2.asm
+++ b/reference/piglit-arb_framebuffer_srgb-blit-frag2.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
diff --git a/reference/piglit-fs-uniform-array-mat2-index-rd.asm b/reference/piglit-fs-uniform-array-mat2-index-rd.asm
index 6deef36..ec0bb12 100644
--- a/reference/piglit-fs-uniform-array-mat2-index-rd.asm
+++ b/reference/piglit-fs-uniform-array-mat2-index-rd.asm
@@ -4,6 +4,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c8.x) 0x00000002, 0x00000000, 0x00000000, 0x00000000
+@const(c9.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.x
mov.f32f32 r0.y, c8.x
mov.f32f32 r0.z, c9.y
@@ -12,11 +14,11 @@ mov.f32f32 r1.x, c9.x
mull.u r1.y, r0.x, r0.y
mov.f32f32 r1.z, c9.x
madsh.m16 r1.y, r0.x, r0.y, r1.y
-mov.f32f32 r1.w, c9.x
+mov.f32f32 r2.x, c9.x
madsh.m16 r0.x, r0.y, r0.x, r1.y
mov.f32f32 r0.y, c9.y
-mov.f32f32 r1.y, c9.y
-mov.f32f32 r2.x, c9.x
+mov.f32f32 r2.y, c9.y
+mov.f32f32 r2.z, c9.x
cov.u32s16 hr0.x, r0.x
(rpt2)nop
shl.b hr0.x, hr0.x, 2
@@ -24,31 +26,27 @@ shl.b hr0.x, hr0.x, 2
mova a0.x, hr0.x
(rpt5)nop
mov.f32f32 r0.x, c<a0.x + 12>
-(ul)mov.f32f32 r2.y, c<a0.x + 13>
+(ul)mov.f32f32 r1.y, c<a0.x + 13>
(rpt1)nop
cmps.f.eq r0.x, r0.x, c1.x
-cmps.f.eq r2.y, r2.y, c1.y
+cmps.f.eq r1.y, r1.y, c1.y
(rpt1)nop
absneg.s r0.x, (neg)r0.x
-absneg.s r2.y, (neg)r2.y
+absneg.s r1.y, (neg)r1.y
(rpt2)nop
-and.b r0.x, r0.x, r2.y
+and.b r0.x, r0.x, r1.y
(rpt2)nop
cmps.u.ne r0.x, r0.x, c8.y
(rpt2)nop
-sel.b32 r0.z, r0.w, r0.x, r0.z
-sel.b32 r0.w, r1.z, r0.x, r1.x
-sel.b32 r0.y, r0.y, r0.x, r1.w
-sel.b32 r0.x, r2.x, r0.x, r1.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+sel.b32 r1.w, r0.w, r0.x, r0.z
+sel.b32 r1.z, r1.z, r0.x, r1.x
+sel.b32 r1.y, r0.y, r0.x, r2.x
+sel.b32 r1.x, r2.z, r0.x, r2.y
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs:
-; FRAG: 58 instructions, 1 half, 3 full
+; FRAG: 54 instructions, 1 half, 3 full
diff --git a/reference/piglit-glsl-fs-varying-array.asm b/reference/piglit-glsl-fs-varying-array.asm
index a9862ea..8f2f395 100644
--- a/reference/piglit-glsl-fs-varying-array.asm
+++ b/reference/piglit-glsl-fs-varying-array.asm
@@ -1,7 +1,4 @@
; options:
-ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying without copy propagation!
-ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying fallback!
-
; FRAG: old compiler
@in(r1.x) in0
@in(r1.y) in0
diff --git a/reference/piglit-tex-miplevel-selection-1d-shadow.asm b/reference/piglit-tex-miplevel-selection-1d-shadow.asm
index 73d2804..536cb14 100644
--- a/reference/piglit-tex-miplevel-selection-1d-shadow.asm
+++ b/reference/piglit-tex-miplevel-selection-1d-shadow.asm
@@ -6,6 +6,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c4.x) 0x80000000, 0xbd4ccccd, 0x00000000, 0x3d4ccccd
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f (ei)r0.x, 2, r0.x
mov.f32f32 r2.z, (0.000000)
diff --git a/reference/piglit-vs-temp-mat3-row-rd.asm b/reference/piglit-vs-temp-mat3-row-rd.asm
index f837f5f..89d1bbe 100644
--- a/reference/piglit-vs-temp-mat3-row-rd.asm
+++ b/reference/piglit-vs-temp-mat3-row-rd.asm
@@ -12,6 +12,9 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c6.x) 0x00000000, 0x00000001, 0x00000002, 0x00000000
+@const(c7.x) 0x40800000, 0x40a00000, 0x40c00000, 0x3f800000
+@const(c8.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c0.x
mov.f32f32 r0.y, c0.x
mul.f r0.z, c2.w, r1.x
@@ -23,56 +26,44 @@ mad.f32 r0.w, c3.z, r1.y, r0.w
absneg.s r0.x, (neg)r0.x
absneg.s r0.y, (neg)r0.y
mad.f32 r0.z, c4.w, r1.z, r0.z
-mad.f32 r0.w, c4.z, r1.z, r0.w
+mad.f32 r2.x, c4.z, r1.z, r0.w
cov.s32f32 r0.x, r0.x
cov.s32f32 r0.y, r0.y
-mad.f32 r0.z, c5.w, r1.w, r0.z
-mad.f32 r2.x, c5.z, r1.w, r0.w
-cmps.f.lt r0.x, r0.x, c6.x
-mov.f32f32 r0.y, r0.y
(rpt1)nop
-sel.b32 r0.x, c7.y, r0.x, c7.x
+cmps.f.lt r0.x, r0.x, c6.x
cmps.f.lt r0.y, r0.y, c6.x
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r2.x
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.w, c5.w, r1.w, r0.z
+mad.f32 r0.z, c5.z, r1.w, r2.x
+sel.b32 r0.x, c7.y, r0.x, c7.x
mul.f r2.x, c2.y, r1.x
mul.f r1.x, c2.x, r1.x
mad.f32 r2.x, c3.y, r1.y, r2.x
sel.b32 r0.x, c7.z, r0.y, r0.x
mad.f32 r0.y, c4.y, r1.z, r2.x
mad.f32 r1.x, c3.x, r1.y, r1.x
-mad.f32 r0.y, c5.y, r1.w, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.x, c4.x, r1.z, r1.x
mov.f32f32 r1.y, c8.y
-mov.f32f32 r1.z, c8.y
cmps.f.eq r0.x, r0.x, c1.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.x, c5.x, r1.w, r1.x
+mad.f32 r0.y, c5.y, r1.w, r0.y
+mad.f32 r1.x, c4.x, r1.z, r1.x
nop
-absneg.s r0.x, (neg)r0.x
-mov.f32f32 r1.x, c8.x
-mov.f32f32 r1.w, c8.x
-mov.f32f32 r2.y, c8.x
-cmps.u.ne r0.x, r0.x, c6.x
+absneg.s r1.z, (neg)r0.x
+mad.f32 r0.x, c5.x, r1.w, r1.x
+mov.f32f32 r1.x, c8.y
+mov.f32f32 r2.x, c8.x
+cmps.u.ne r2.y, r1.z, c6.x
+mov.f32f32 r1.z, c8.x
mov.f32f32 r2.z, c8.y
mov.f32f32 r2.w, c8.x
-mov.f32f32 r3.x, c8.y
-sel.b32 r1.y, r1.z, r0.x, r1.y
-sel.b32 r1.x, r1.w, r0.x, r1.x
-sel.b32 r2.y, r2.z, r0.x, r2.y
-sel.b32 r0.x, r2.w, r0.x, r3.x
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r2.y
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r0.x, r2.x
+sel.b32 r1.w, r1.x, r2.y, r1.y
+sel.b32 r1.z, r1.z, r2.y, r2.x
+mov.f32f32 r1.x, c8.x
+mov.f32f32 r2.x, c8.y
+(rpt1)nop
+sel.b32 r1.y, r2.z, r2.y, r1.x
+sel.b32 r1.x, r2.w, r2.y, r2.x
end
nop
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0)
-; VERT: 58 instructions, 0 half, 4 full
+; VERT: 49 instructions, 0 half, 3 full
diff --git a/reference/piglit-vs-varying-array-mat2-index-rd.asm b/reference/piglit-vs-varying-array-mat2-index-rd.asm
index 46ff3a2..e649199 100644
--- a/reference/piglit-vs-varying-array-mat2-index-rd.asm
+++ b/reference/piglit-vs-varying-array-mat2-index-rd.asm
@@ -1,7 +1,4 @@
; options:
-ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying without copy propagation!
-ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying fallback!
-
; VERT: old compiler
@in(r0.x) in0
@in(r0.y) in0
diff --git a/reference/piglit-vs-varying-array-mat4-index-rd.asm b/reference/piglit-vs-varying-array-mat4-index-rd.asm
index 0242f7f..0d301e0 100644
--- a/reference/piglit-vs-varying-array-mat4-index-rd.asm
+++ b/reference/piglit-vs-varying-array-mat4-index-rd.asm
@@ -1,7 +1,4 @@
; options:
-ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying without copy propagation!
-ir3/ir3_cmdline.c:168:reset_variant: error: new compiler failed, trying fallback!
-
; VERT: old compiler
@in(r0.x) in0
@in(r0.y) in0
diff --git a/reference/problem/0ad-frag.asm b/reference/problem/0ad-frag.asm
index 59da729..588e4b2 100644
--- a/reference/problem/0ad-frag.asm
+++ b/reference/problem/0ad-frag.asm
@@ -6,47 +6,37 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 8, r0.x
-bary.f r0.w, 10, r0.x
-bary.f r1.x, 9, r0.x
-bary.f (ei)r0.x, 11, r0.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r3.y, r0.y
-mov.f32f32 r4.x, r0.z
-mov.f32f32 r2.x, r1.x
+@const(c5.x) 0x3a03126f, 0x3f000000, 0x3f800000, 0x40000000
+@const(c6.x) 0x3f800000, 0xbf000000, 0x3de38866, 0x00000000
+(sy)(ss)bary.f r0.z, 10, r0.x
+bary.f r0.w, 11, r0.x
+bary.f r2.y, 9, r0.x
+bary.f (ei)r1.y, 8, r0.x
+mov.f32f32 r0.x, r0.z
mov.f32f32 r0.y, r0.w
-mov.f32f32 r2.w, r1.x
-mov.f32f32 r0.x, r0.x
+mov.f32f32 r1.x, r2.y
+mov.f32f32 r2.w, r1.y
+mov.f32f32 r1.w, r0.x
+mov.f32f32 r1.z, r0.y
+mov.f32f32 r3.x, r1.x
+mov.f32f32 r2.x, r0.z
+mov.f32f32 r3.y, r1.w
+mov.f32f32 r2.z, r1.w
+mov.f32f32 r1.x, r1.w
nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r3.x, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.w, r0.y
-mov.f32f32 r4.z, r0.y
-(rpt1)nop
-sam.s (f32)(w)r0.y, r1.w, s#2, t#2
-(sy)(ss)mov.f32f32 r1.w, r1.x
-sam.s (f32)(z)r0.y, r2.z, s#2, t#2
-(sy)mov.f32f32 r1.z, r0.w
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r4.y, r0.x
-(rpt4)nop
-sam.s (f32)(y)r0.x, r3.y, s#2, t#2
-(sy)mov.f32f32 r1.y, r0.y
-sam.s (f32)(x)r0.x, r4.x, s#2, t#2
-(sy)mov.f32f32 r1.x, r0.x
+sam.s (f32)(y)r1.x, r1.y, s#2, t#2
+(rpt2)nop
+(ss)nop
+sam.s (f32)(w)r1.x, r2.w, s#2, t#2
+nop
+sam.s (f32)(z)r1.x, r2.x, s#2, t#2
+nop
+sam.s (f32)(x)r1.x, r0.z, s#2, t#2
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r63.y (5:2,cm=f,il=16,b=1) r3.x (5:3,cm=f,il=20,b=1)
-; FRAG: 45 instructions, 0 half, 5 full
+; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1) r0.x (5:2,cm=f,il=16,b=1) r3.x (5:3,cm=f,il=20,b=1)
+; FRAG: 27 instructions, 0 half, 4 full
diff --git a/reference/problem/frag-conflict-1.asm b/reference/problem/frag-conflict-1.asm
index cea918a..5f6c550 100644
--- a/reference/problem/frag-conflict-1.asm
+++ b/reference/problem/frag-conflict-1.asm
@@ -1,24 +1,27 @@
; options:
-; FRAG: new compiler
+; FRAG: TGSI compiler
@in(r0.x) in0
@in(r0.y) in1
@out(r1.x) out0
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
+sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
(sy)mov.f32f32 r1.w, r0.x
mov.f32f32 r1.z, r0.y
mov.f32f32 r1.y, r0.z
mov.f32f32 r1.x, r0.w
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1)
-; FRAG: 18 instructions, 0 half, 2 full
+; FRAG: 14 instructions, 0 half, 2 full
+; pos (bary): r0.x
+; color: r1.x
diff --git a/reference/problem/frag-conflict-2.asm b/reference/problem/frag-conflict-2.asm
index f69882b..b9e1c2d 100644
--- a/reference/problem/frag-conflict-2.asm
+++ b/reference/problem/frag-conflict-2.asm
@@ -6,31 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 1, r0.x
-bary.f r0.w, 0, r0.x
-bary.f (ei)r0.x, 2, r0.x
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+(sy)(ss)bary.f r0.w, 1, r0.x
+bary.f r1.x, 2, r0.x
+bary.f (ei)r0.z, 0, r0.x
nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r2.x, r1.x
-mov.f32f32 r0.y, r0.z
mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r0.y
-(rpt3)nop
-sam.s (f32)(zw)r1.x, r1.y, s#0, t#0
+mov.f32f32 r2.x, r1.x
+mov.f32f32 r1.w, r0.z
(rpt1)nop
-(ss)nop
-sam.s (f32)(xy)r1.x, r2.x, s#0, t#0
+sam.s (f32)(xy)r1.x, r0.z, s#0, t#0
+(rpt3)nop
+sam.s (f32)(zw)r1.x, r1.z, s#0, t#0
end
-nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (5:0,cm=f,il=12,b=1)
-; FRAG: 25 instructions, 0 half, 3 full
+; FRAG: 16 instructions, 0 half, 3 full
diff --git a/reference/relative-lowered.asm b/reference/relative-lowered.asm
index 32ba282..bc41c3e 100644
--- a/reference/relative-lowered.asm
+++ b/reference/relative-lowered.asm
@@ -4,6 +4,7 @@
@out(r0.y) out1
@out(r0.z) out2
@out(r0.w) out3
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)cov.f32s16 hr0.x, c4.x
mov.f32f32 r0.w, c5.y
mov.f32f32 r0.x, c5.y
@@ -13,34 +14,22 @@ shl.b hr0.x, hr0.x, 2
mova a0.x, hr0.x
(rpt5)nop
mov.f32f32 r0.y, c<a0.x + 5>
-mov.f32f32 r0.z, c<a0.x + 4>
-absneg.f r1.x, (neg)c<a0.x + 4>
+absneg.f r0.z, (neg)c<a0.x + 4>
+mov.f32f32 r1.x, c<a0.x + 4>
(ul)min.f r1.y, c<a0.x + 4>, c5.z
-max.f r0.y, r0.y, c5.x
-max.f r0.z, r0.z, c5.x
-cmps.f.lt r1.x, r1.x, c5.x
+max.f r1.z, r0.y, c5.x
+cmps.f.lt r0.z, r0.z, c5.x
+max.f r0.y, r1.x, c5.x
(rpt3)nop
-log2 r0.y, r0.y
-(ss)mov.f32f32 r1.z, r0.y
-(ss)mov.f32f32 r0.y, r0.z
-(rpt1)nop
-mul.f r0.z, r1.y, r1.z
-(rpt2)nop
-mov.f32f32 r0.z, r0.z
+log2 r1.x, r1.z
+(ss)mul.f r1.x, r1.y, r1.x
(rpt5)nop
-exp2 r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-(rpt2)nop
-sel.b32 r0.z, r0.z, r1.x, c5.x
-(rpt2)nop
-mov.f32f32 r0.z, r0.z
-(rpt2)nop
-mov.f32f32 r0.z, r0.z
+exp2 r1.x, r1.x
+(ss)sel.b32 r0.z, r1.x, r0.z, c5.x
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0)
; VERT: inputs:
-; VERT: 57 instructions, 1 half, 2 full
+; VERT: 37 instructions, 1 half, 2 full
diff --git a/reference/relative-med.asm b/reference/relative-med.asm
index 19eed4d..26d7832 100644
--- a/reference/relative-med.asm
+++ b/reference/relative-med.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r0.x) in0
+@in(r0.y) in1
+@in(r0.z) in2
+@in(r0.w) in3
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -12,6 +12,7 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c5.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)cov.f32s16 hr0.x, c4.w
cov.f32s16 hr0.y, c4.z
cov.f32s16 hr0.z, c4.y
@@ -21,11 +22,7 @@ shl.b hr0.y, hr0.y, 2
shl.b hr0.z, hr0.z, 2
shl.b hr0.w, hr0.w, 2
mova a0.x, hr0.x
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.z, r1.z
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r0.x, r1.x
-(rpt1)nop
+(rpt5)nop
(ul)mov.f32f32 r1.x, c<a0.x + 7>
(rpt2)nop
max.f r1.x, r1.x, c5.x
@@ -57,6 +54,6 @@ nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0)
+; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0)
; VERT: 73 instructions, 1 half, 2 full
diff --git a/reference/relative-piglit-bad.asm b/reference/relative-piglit-bad.asm
index 07e242f..becdd49 100644
--- a/reference/relative-piglit-bad.asm
+++ b/reference/relative-piglit-bad.asm
@@ -12,6 +12,8 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c20.x) 0x40800000, 0x00000000, 0x3f800000, 0x40000000
+@const(c21.x) 0x00000000, 0x3f800000, 0x40000000, 0x40400000
(sy)(ss)mov.f32f32 r0.x, c0.x
mov.f32f32 r0.y, c2.x
mov.f32f32 r0.z, c2.x
@@ -24,67 +26,55 @@ add.f r0.x, c1.x, r0.x
cov.u32f32 r0.y, r0.y
cov.u32f32 r0.z, r0.z
cov.u32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-cmps.f.lt r0.y, (neg)r0.y, c20.y
-cmps.f.lt r2.x, (neg)r0.z, c20.y
-cmps.f.lt r2.y, (neg)r0.w, c20.y
cov.f32s16 hr0.x, r0.x
-mul.f r0.x, c16.w, r1.x
-mul.f r0.z, c16.z, r1.x
-mul.f r0.w, c16.y, r1.x
+cmps.f.lt r0.x, (neg)r0.y, c20.y
+cmps.f.lt r0.y, (neg)r0.z, c20.y
+cmps.f.lt r2.x, (neg)r0.w, c20.y
shl.b hr0.x, hr0.x, 2
-mad.f32 r0.x, c17.w, r1.y, r0.x
-mad.f32 r0.z, c17.z, r1.y, r0.z
-mad.f32 r0.w, c17.y, r1.y, r0.w
+mul.f r0.z, c16.w, r1.x
+mul.f r0.w, c16.z, r1.x
+mul.f r2.y, c16.y, r1.x
mova a0.x, hr0.x
-mad.f32 r0.x, c18.w, r1.z, r0.x
-mad.f32 r0.z, c18.z, r1.z, r0.z
-mad.f32 r2.z, c18.y, r1.z, r0.w
+mad.f32 r0.z, c17.w, r1.y, r0.z
+mad.f32 r0.w, c17.z, r1.y, r0.w
+mad.f32 r2.y, c17.y, r1.y, r2.y
mul.f r1.x, c16.x, r1.x
-mad.f32 r0.x, c19.w, r1.w, r0.x
-mad.f32 r0.z, c19.z, r1.w, r0.z
+mad.f32 r0.z, c18.w, r1.z, r0.z
+mad.f32 r2.z, c18.z, r1.z, r0.w
mov.f32f32 r0.w, c<a0.x + 17>
mov.f32f32 r2.w, c<a0.x + 16>
nop
mov.f32f32 r3.x, c<a0.x + 18>
(ul)mov.f32f32 r3.y, c<a0.x + 19>
-sel.b32 r0.y, r0.w, r0.y, r2.w
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.x, c19.y, r1.w, r2.z
-sel.b32 r0.y, r3.x, r2.x, r0.y
+sel.b32 r0.x, r0.w, r0.x, r2.w
+mad.f32 r0.w, c19.w, r1.w, r0.z
+mad.f32 r0.z, c19.z, r1.w, r2.z
+mad.f32 r2.y, c18.y, r1.z, r2.y
+sel.b32 r0.x, r3.x, r0.y, r0.x
+mad.f32 r0.y, c19.y, r1.w, r2.y
mad.f32 r1.x, c17.x, r1.y, r1.x
mov.f32f32 r1.y, c20.z
-mov.f32f32 r2.x, c20.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.y, r0.x
-mad.f32 r0.x, c18.x, r1.z, r1.x
-mov.f32f32 r1.x, c20.y
-sel.b32 r1.z, r3.y, r2.y, r2.z
-mad.f32 r0.x, c19.x, r1.w, r0.x
+sel.b32 r0.x, r3.y, r2.x, r0.x
+mad.f32 r1.x, c18.x, r1.z, r1.x
(rpt1)nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, c20.y
+cmps.f.eq r1.z, r0.x, c3.x
+mad.f32 r0.x, c19.x, r1.w, r1.x
+mov.f32f32 r1.x, c20.z
+mov.f32f32 r2.x, c20.y
+cov.u32f32 r1.z, r1.z
mov.f32f32 r2.y, c20.y
-cmps.f.eq r1.z, r1.z, c3.x
-mov.f32f32 r2.z, c20.z
+mov.f32f32 r2.z, c20.y
mov.f32f32 r2.w, c20.z
-mov.f32f32 r3.x, c20.y
-cov.u32f32 r1.z, r1.z
-(rpt2)nop
-cmps.f.ne r1.z, r1.z, c20.y
-(rpt2)nop
-sel.b32 r1.y, r2.x, r1.z, r1.y
-sel.b32 r1.x, r1.w, r1.z, r1.x
-sel.b32 r2.x, r2.z, r1.z, r2.y
-sel.b32 r2.y, r3.x, r1.z, r2.w
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r1.x, r2.y
+cmps.f.ne r3.x, r1.z, c20.y
+mov.f32f32 r3.y, c20.y
+mov.f32f32 r3.z, c20.z
+nop
+sel.b32 r1.w, r1.x, r3.x, r1.y
+sel.b32 r1.z, r2.y, r3.x, r2.x
+sel.b32 r1.y, r2.w, r3.x, r2.z
+sel.b32 r1.x, r3.y, r3.x, r3.z
end
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0)
-; VERT: 77 instructions, 1 half, 4 full
+; VERT: 61 instructions, 1 half, 4 full
diff --git a/reference/relative-temp/fs-temp-mat3-col-row-wr.asm b/reference/relative-temp/fs-temp-mat3-col-row-wr.asm
index 9486eec..c6d75b9 100644
--- a/reference/relative-temp/fs-temp-mat3-col-row-wr.asm
+++ b/reference/relative-temp/fs-temp-mat3-col-row-wr.asm
@@ -1,7 +1,4 @@
; options:
-ir3/ir3_cmdline.c:213:reset_variant: error: new compiler failed, trying without copy propagation!
-ir3/ir3_cmdline.c:213:reset_variant: error: new compiler failed, trying fallback!
-
; FRAG: old compiler
@out(r1.x) out0
@out(r1.y) out1
diff --git a/reference/sad-frag.asm b/reference/sad-frag.asm
index 3b6fffc..3116ce3 100644
--- a/reference/sad-frag.asm
+++ b/reference/sad-frag.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r1.w, c0.x
mov.f32f32 r1.z, c0.x
mov.f32f32 r1.y, c0.y
diff --git a/reference/simple-frag.asm b/reference/simple-frag.asm
index 2863317..abd34aa 100644
--- a/reference/simple-frag.asm
+++ b/reference/simple-frag.asm
@@ -6,35 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
-(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+(sy)(ss)bary.f r1.x, 7, r0.x
+bary.f r0.z, 4, r0.x
+bary.f r0.w, 5, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 2, r0.x
-mov.f32f32 r2.x, r0.z
-mul.f r0.z, r0.w, r1.y
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r2.z, r0.w
-bary.f r0.w, 1, r0.x
+mov.f32f32 r1.z, r1.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-nop
-mov.f32f32 r1.w, r0.z
-(rpt1)nop
-sam.p (f32)(xyz)r2.x, r2.x, s#0, t#0
-(sy)mul.f r0.y, r2.z, r1.z
-mul.f r0.z, r2.y, r0.w
-mul.f r0.x, r2.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mul.f r1.w, r1.z, r1.y
+sam.p (f32)(xyz)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.z, r0.w, r2.x
+mul.f r1.y, r0.z, r2.y
+(ss)mul.f r1.x, r0.y, r0.x
end
nop
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 27 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 14 instructions, 0 half, 3 full
diff --git a/reference/simple-if-else.asm b/reference/simple-if-else.asm
index 7153aca..abdc8da 100644
--- a/reference/simple-if-else.asm
+++ b/reference/simple-if-else.asm
@@ -15,39 +15,28 @@
@out(r0.y) out1
@out(r0.z) out2
@out(r0.w) out3
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r2.w, c4.z, r0.z
mul.f r0.z, c4.z, r0.z
mul.f r0.y, c4.y, r0.y
mul.f r0.x, c4.x, r0.x
-mov.f32f32 r3.x, r2.w
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r3.z, r0.y
-mov.f32f32 r3.w, r0.x
-add.f r1.z, r3.x, r1.z
-mov.f32f32 r2.w, r2.w
-add.f r1.y, r3.y, r1.y
-add.f r1.x, r3.z, r1.x
-add.f r3.x, r3.w, r0.w
-add.f r0.w, r2.w, (neg)r2.z
-cmps.f.ne r2.z, r0.x, c10.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.w, r1.z, r2.z, r0.w
+add.f r1.z, r2.w, r1.z
+add.f r1.y, r0.z, r1.y
+add.f r1.x, r0.y, r1.x
+cmps.f.ne r3.x, r0.x, c10.x
+add.f r2.z, r2.w, (neg)r2.z
add.f r0.z, r0.z, (neg)r2.y
add.f r0.y, r0.y, (neg)r2.x
+add.f r2.x, r0.x, r0.w
+sel.b32 r0.w, r1.z, r3.x, r2.z
+sel.b32 r0.z, r1.y, r3.x, r0.z
+sel.b32 r0.y, r1.x, r3.x, r0.y
add.f r0.x, r0.x, (neg)r1.w
-mov.f32f32 r0.w, r0.w
-sel.b32 r0.z, r1.y, r2.z, r0.z
-sel.b32 r0.y, r1.x, r2.z, r0.y
-sel.b32 r0.x, r3.x, r2.z, r0.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
+(rpt2)nop
+sel.b32 r0.x, r2.x, r3.x, r0.x
end
nop
; VERT: outputs: r0.x (0:0)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=f,il=12,b=0) r1.w (0:0,cm=f,il=16,b=0)
-; VERT: 31 instructions, 0 half, 4 full
+; VERT: 21 instructions, 0 half, 4 full
diff --git a/reference/simple-if.asm b/reference/simple-if.asm
index d9f08ae..eb97ce7 100644
--- a/reference/simple-if.asm
+++ b/reference/simple-if.asm
@@ -11,31 +11,24 @@
@out(r0.y) out1
@out(r0.z) out2
@out(r0.w) out3
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r1.w, c4.z, r0.z
mul.f r0.z, c4.z, r0.z
mul.f r0.y, c4.y, r0.y
mul.f r0.x, c4.x, r0.x
-mov.f32f32 r2.x, r1.w
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.w, r0.x
-add.f r1.z, r2.x, r1.z
+add.f r1.z, r1.w, r1.z
+add.f r1.y, r0.z, r1.y
+add.f r1.x, r0.y, r1.x
cmps.f.ne r2.x, r0.x, c10.x
-add.f r1.y, r2.y, r1.y
-add.f r1.x, r2.z, r1.x
-add.f r0.w, r2.w, r0.w
-sel.b32 r1.z, r1.z, r2.x, r1.w
+add.f r2.y, r0.x, r0.w
+(rpt1)nop
+sel.b32 r0.w, r1.z, r2.x, r1.w
sel.b32 r0.z, r1.y, r2.x, r0.z
sel.b32 r0.y, r1.x, r2.x, r0.y
-sel.b32 r0.x, r0.w, r2.x, r0.x
-mov.f32f32 r0.w, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
+sel.b32 r0.x, r2.y, r2.x, r0.x
end
nop
-nop
; VERT: outputs: r0.x (0:0)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0) r0.w (0:0,cm=f,il=12,b=0)
-; VERT: 22 instructions, 0 half, 3 full
+; VERT: 16 instructions, 0 half, 3 full
diff --git a/reference/simple-vert.asm b/reference/simple-vert.asm
index 261b6f2..a222610 100644
--- a/reference/simple-vert.asm
+++ b/reference/simple-vert.asm
@@ -4,6 +4,8 @@
@out(r0.y) out1
@out(r0.z) out2
@out(r0.w) out3
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.x, c8.x
mov.f32f32 r0.y, c8.y
mov.f32f32 r0.z, c8.z
@@ -11,20 +13,16 @@ nop
mul.f r0.x, r0.x, c8.x
nop
mad.f32 r0.x, c8.y, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
nop
mad.f32 r0.x, c8.z, r0.z, r0.x
(rpt2)nop
mov.f32f32 r0.w, r0.x
mov.f32f32 r0.z, r0.x
mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r0.x
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0)
; VERT: inputs:
-; VERT: 21 instructions, 0 half, 1 full
+; VERT: 16 instructions, 0 half, 1 full
diff --git a/reference/simple.asm b/reference/simple.asm
index 5533fa5..b3fdbdb 100644
--- a/reference/simple.asm
+++ b/reference/simple.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 5, r0.x
bary.f r1.x, 3, r0.x
bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-bary.f r1.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-(rpt5)nop
-sam (f32)(xyzw)r1.w, r0.y, s#0, t#0
-(sy)(ss)mul.f r0.y, r2.z, r1.x
-mul.f r0.z, r2.y, r1.y
-mul.f r0.w, r2.x, r1.z
-mul.f r0.x, r1.w, r0.x
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam (f32)(xyzw)r2.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r3.x, r1.x
+mul.f r1.z, r2.w, r1.y
+mul.f r1.y, r2.z, r2.x
+mul.f r1.x, r2.y, r0.x
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 26 instructions, 0 half, 3 full
+; FRAG: 14 instructions, 0 half, 4 full
diff --git a/reference/simpletest.asm b/reference/simpletest.asm
index b755903..819ed60 100644
--- a/reference/simpletest.asm
+++ b/reference/simpletest.asm
@@ -6,19 +6,16 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.w
+sam (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mov.f32f32 r1.w, r1.x
+mov.f32f32 r1.z, r1.x
+mov.f32f32 r1.y, r1.x
end
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1)
-; FRAG: 18 instructions, 0 half, 2 full
+; FRAG: 13 instructions, 0 half, 2 full
diff --git a/reference/stk-mines/stk-mines-00.asm b/reference/stk-mines/stk-mines-00.asm
index 4a651b1..16ecc3c 100644
--- a/reference/stk-mines/stk-mines-00.asm
+++ b/reference/stk-mines/stk-mines-00.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r2.z, r1.y
-mul.f r0.w, r2.y, r1.z
-mul.f r0.z, r2.x, r0.z
-mul.f r0.x, r1.w, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.x, r1.y
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 27 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/stk-mines/stk-mines-01.asm b/reference/stk-mines/stk-mines-01.asm
index c16817f..530b9e3 100644
--- a/reference/stk-mines/stk-mines-01.asm
+++ b/reference/stk-mines/stk-mines-01.asm
@@ -1,20 +1,20 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.w) in11
-@in(r3.x) in12
-@in(r3.y) in13
-@in(r3.z) in14
-@in(r3.w) in15
+@in(r3.x) in0
+@in(r3.y) in1
+@in(r3.z) in2
+@in(r3.w) in3
+@in(r4.w) in11
+@in(r2.x) in12
+@in(r2.y) in13
+@in(r2.z) in14
+@in(r2.w) in15
@in(r0.x) in16
@in(r0.y) in17
@in(r0.z) in18
-@in(r1.x) in20
-@in(r1.y) in21
-@in(r1.z) in22
+@in(r0.w) in20
+@in(r1.x) in21
+@in(r1.y) in22
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,47 +27,40 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c7.z, r0.z, r1.z
-mad.f32 r0.y, c7.y, r0.y, r1.y
-mad.f32 r0.x, c7.x, r0.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c8.x
+@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mad.f32 r0.z, c7.z, r0.z, r1.y
+mad.f32 r0.y, c7.y, r0.y, r1.x
+mad.f32 r0.x, c7.x, r0.x, r0.w
+mul.f r0.w, r3.x, c0.w
max.f r0.z, r0.z, c8.x
max.f r0.y, r0.y, c8.x
max.f r0.x, r0.x, c8.x
-min.f r1.w, r0.w, c8.y
+mad.f32 r0.w, c1.w, r3.y, r0.w
min.f r1.z, r0.z, c8.y
min.f r1.y, r0.y, c8.y
min.f r1.x, r0.x, c8.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+mad.f32 r0.x, c2.w, r3.z, r0.w
+mul.f r0.y, r3.x, c0.z
+mad.f32 r0.w, c3.w, r3.w, r0.x
+mad.f32 r0.x, c1.z, r3.y, r0.y
+mul.f r0.y, r3.x, c0.y
+mad.f32 r0.x, c2.z, r3.z, r0.x
+mad.f32 r0.y, c1.y, r3.y, r0.y
+mad.f32 r0.z, c3.z, r3.w, r0.x
+mad.f32 r0.x, c2.y, r3.z, r0.y
+mul.f r1.w, r3.x, c0.x
+mad.f32 r0.y, c3.y, r3.w, r0.x
+mad.f32 r0.x, c1.x, r3.y, r1.w
+max.f r1.w, r4.w, c8.x
+mad.f32 r0.x, c2.x, r3.z, r0.x
+nop
+mad.f32 r0.x, c3.x, r3.w, r0.x
+min.f r1.w, r1.w, c8.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r63.w (0:0,cm=0,il=12,b=0) r0.x (0:0,cm=8,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) r0.x (0:0,cm=7,il=24,b=0) r1.x (0:0,cm=7,il=28,b=0) r63.w (0:0,cm=0,il=32,b=0)
-; VERT: 37 instructions, 0 half, 4 full
+; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r63.w (0:0,cm=0,il=12,b=0) r4.x (0:0,cm=8,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) r0.x (0:0,cm=7,il=24,b=0) r0.w (0:0,cm=7,il=28,b=0) r63.w (0:0,cm=0,il=32,b=0)
+; VERT: 29 instructions, 0 half, 5 full
diff --git a/reference/stk-mines/stk-mines-02.asm b/reference/stk-mines/stk-mines-02.asm
index cca09e5..0583b5d 100644
--- a/reference/stk-mines/stk-mines-02.asm
+++ b/reference/stk-mines/stk-mines-02.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r3.x) in4
+@in(r3.y) in5
+@in(r3.z) in6
+@in(r3.w) in7
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,43 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r3.w, c4.x
+max.f r1.y, r3.z, c4.x
+max.f r3.y, r3.y, c4.x
+max.f r3.x, r3.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r3.y, c4.y
+min.f r1.x, r3.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 33 instructions, 0 half, 4 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 25 instructions, 0 half, 4 full
diff --git a/reference/stk-mines/stk-mines-03.asm b/reference/stk-mines/stk-mines-03.asm
index ff486a7..d610a0c 100644
--- a/reference/stk-mines/stk-mines-03.asm
+++ b/reference/stk-mines/stk-mines-03.asm
@@ -6,39 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
-bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 1, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt4)nop
-sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0
-(sy)mul.f r0.w, r0.w, r1.y
-mul.f r0.z, r0.z, r1.z
-mul.f r0.x, r0.y, r0.x
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt2)nop
+sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)mul.f r1.z, r1.z, r2.x
+mul.f r1.y, r1.y, r2.y
+(ss)mul.f r1.x, r1.x, r0.x
end
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
+; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 14 instructions, 0 half, 3 full
diff --git a/reference/stk-mines/stk-mines-05.asm b/reference/stk-mines/stk-mines-05.asm
index 2c03e4f..284e180 100644
--- a/reference/stk-mines/stk-mines-05.asm
+++ b/reference/stk-mines/stk-mines-05.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/stk-mines/stk-mines-06.asm b/reference/stk-mines/stk-mines-06.asm
index 2c03e4f..284e180 100644
--- a/reference/stk-mines/stk-mines-06.asm
+++ b/reference/stk-mines/stk-mines-06.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/stk-mines/stk-mines-07.asm b/reference/stk-mines/stk-mines-07.asm
index cca09e5..0583b5d 100644
--- a/reference/stk-mines/stk-mines-07.asm
+++ b/reference/stk-mines/stk-mines-07.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r3.x) in4
+@in(r3.y) in5
+@in(r3.z) in6
+@in(r3.w) in7
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,43 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r3.w, c4.x
+max.f r1.y, r3.z, c4.x
+max.f r3.y, r3.y, c4.x
+max.f r3.x, r3.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r3.y, c4.y
+min.f r1.x, r3.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 33 instructions, 0 half, 4 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 25 instructions, 0 half, 4 full
diff --git a/reference/stk-mines/stk-mines-08.asm b/reference/stk-mines/stk-mines-08.asm
index 6763f4d..c381d21 100644
--- a/reference/stk-mines/stk-mines-08.asm
+++ b/reference/stk-mines/stk-mines-08.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -19,131 +19,120 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mul.f r0.w, r2.x, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.y, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.z, r0.w
-mov.f32f32 r1.y, r3.w
-mad.f32 r0.w, c15.x, r2.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r1.x, c12.x
+mul.f r3.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r1.y, r0.w
+mad.f32 r3.x, c4.y, r0.y, r3.x
+mad.f32 r0.w, c14.x, r1.z, r0.w
+mad.f32 r3.x, c4.z, r0.z, r3.x
+mad.f32 r3.y, c15.x, r1.w, r0.w
+mul.f r0.w, r1.x, c12.y
+mul.f r3.z, r1.x, c12.z
+mul.f r3.w, r1.x, c0.w
+mul.f r4.x, r3.y, r3.y
+mad.f32 r0.w, c13.y, r1.y, r0.w
+mul.f r4.y, r3.x, c10.x
+mad.f32 r0.w, c14.y, r1.z, r0.w
+mul.f r4.z, r0.x, c5.x
+mad.f32 r4.w, c15.y, r1.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r4.z
+mad.f32 r3.z, c13.z, r1.y, r3.z
+mad.f32 r3.w, c1.w, r1.y, r3.w
+mad.f32 r4.x, r4.w, r4.w, r4.x
+mad.f32 r3.z, c14.z, r1.z, r3.z
+mad.f32 r4.z, c5.z, r0.z, r0.w
+mad.f32 r3.z, c15.z, r1.w, r3.z
+mad.f32 r0.w, c2.w, r1.z, r3.w
+mul.f r3.w, r1.x, c0.z
+mul.f r5.x, r1.x, c0.y
+mad.f32 r4.x, r3.z, r3.z, r4.x
+mad.f32 r4.y, c10.y, r4.z, r4.y
mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r3.w, r2.x, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r3.w, c13.y, r2.y, r3.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r3.w, c14.y, r2.z, r3.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r3.w, c15.y, r2.w, r3.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c19.x
+mad.f32 r0.w, c3.w, r1.w, r0.w
+mad.f32 r3.w, c1.z, r1.y, r3.w
+mad.f32 r5.x, c1.y, r1.y, r5.x
+mul.f r1.x, r1.x, c0.x
+rsq r4.x, (abs)r4.x
+(ss)mov.f32f32 r5.y, r4.x
+mul.f r3.z, r3.z, r4.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r3.w, r3.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r2.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r2.y, r1.z
-mad.f32 r4.x, c10.y, r1.y, r4.x
-mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r0.y, c2.z, r1.z, r3.w
+mul.f r3.y, r3.y, r5.y
+mul.f r3.w, r4.w, r5.y
+(rpt1)nop
+add.f r3.y, c10.x, (neg)r3.y
+add.f r3.w, c10.y, (neg)r3.w
+add.f r3.z, c10.z, (neg)r3.z
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r2.w, r1.z
-mov.f32f32 r1.z, r4.x
-mul.f r4.x, r2.x, c0.w
-mul.f r4.y, r2.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r4.x, c1.w, r2.y, r4.x
-mad.f32 r4.y, c1.z, r2.y, r4.y
-mul.f r4.z, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-mul.f r4.w, c16.z, r3.z
+(ss)mul.f r4.x, r3.y, r3.y
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mad.f32 r0.y, r3.w, r3.w, r4.x
+mad.f32 r4.x, c10.z, r0.x, r4.y
+mad.f32 r0.y, r3.z, r3.z, r0.y
+mad.f32 r4.y, c2.y, r1.z, r5.x
+mad.f32 r1.x, c1.x, r1.y, r1.x
+max.f r1.y, r2.w, c19.x
+mul.f r2.w, c16.z, r2.z
+mul.f r4.w, c16.y, r2.y
+mul.f r5.x, c16.x, r2.x
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r5.x, r1.z, c19.x
-cmps.f.lt r1.z, (neg)r1.z, c19.x
-mad.f32 r4.x, c2.w, r2.z, r4.x
-mul.f r0.w, r0.w, r0.y
-mul.f r3.w, r3.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r5.x, r5.x
-add.f r0.z, c10.x, (neg)r0.w
-add.f r3.w, c10.y, (neg)r3.w
-add.f r0.y, c10.z, (neg)r0.y
-mad.f32 r0.w, c8.z, r3.z, c9.z
-mul.f r5.y, r0.z, r0.z
-mul.f r5.z, c16.y, r3.y
-mad.f32 r5.y, r3.w, r3.w, r5.y
-add.f r4.w, r4.w, r0.w
-mad.f32 r0.w, c8.y, r3.y, c9.y
-mul.f r5.w, c16.x, r3.x
-mov.f32f32 r5.y, r5.y
-mul.f r3.z, c17.z, r3.z
-mad.f32 r5.y, r0.y, r0.y, r5.y
-add.f r5.z, r5.z, r0.w
-mad.f32 r6.x, c8.x, r3.x, c9.x
-mad.f32 r0.w, c3.w, r2.w, r4.x
-mad.f32 r4.x, c2.z, r2.z, r4.y
-mad.f32 r4.y, c1.y, r2.y, r4.z
-mad.f32 r2.x, c1.x, r2.y, r2.x
-rsq r2.y, (abs)r5.y
-(ss)mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r5.x, r3.z, r4.w
-mul.f r3.y, c17.y, r3.y
-add.f r4.z, r5.w, r6.x
-mul.f r0.z, r0.z, r2.y
-mul.f r3.w, r3.w, r2.y
-mul.f r0.y, r0.y, r2.y
-mad.f32 r2.y, r5.x, r3.y, r5.z
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.x, r3.x
-mad.f32 r1.y, r1.y, r3.w, r0.z
-mad.f32 r0.z, c3.z, r2.w, r4.x
-mad.f32 r3.x, c2.y, r2.z, r4.y
-mad.f32 r2.x, c2.x, r2.z, r2.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r1.x, r5.x, r1.x, r4.z
-mad.f32 r1.y, r0.x, r0.y, r1.y
-mad.f32 r0.y, c3.y, r2.w, r3.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.x, c7.x
-max.f r1.y, r1.y, c19.x
-(rpt5)nop
-log2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-min.f r2.x, r2.x, c19.z
-(rpt2)nop
-mul.f r1.y, r2.x, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(ss)mov.f32f32 r5.y, r0.y
+(ss)mul.f r0.y, r3.z, r0.y
+max.f r3.z, r4.x, c19.x
+mad.f32 r5.z, c8.x, r2.x, c9.x
+mul.f r3.y, r3.y, r5.y
+mul.f r3.w, r3.w, r5.y
+mov.f32f32 r5.y, r3.z
+mad.f32 r5.w, c8.y, r2.y, c9.y
+mul.f r3.x, r3.x, r3.y
+mad.f32 r3.y, c8.z, r2.z, c9.z
+mad.f32 r3.x, r4.z, r3.w, r3.x
+add.f r3.w, r4.w, r5.w
+mad.f32 r0.x, r0.x, r0.y, r3.x
+add.f r2.w, r2.w, r3.y
+mul.f r0.y, c17.y, r2.y
+add.f r2.y, r5.x, r5.z
+max.f r0.x, r0.x, c19.x
+mul.f r2.z, c17.z, r2.z
+mad.f32 r3.x, r5.y, r0.y, r3.w
+mul.f r2.x, c17.x, r2.x
+cmps.f.lt r3.y, (neg)r4.x, c19.x
+mad.f32 r0.y, c3.y, r1.w, r4.y
+mad.f32 r1.x, c2.x, r1.z, r1.x
+log2 r1.z, r0.x
+mov.f32f32 r3.w, c7.x
+mad.f32 r2.z, r5.y, r2.z, r2.w
+mad.f32 r2.x, r3.z, r2.x, r2.y
+(ss)mad.f32 r0.x, c3.x, r1.w, r1.x
+min.f r1.x, r3.w, c19.z
+min.f r1.w, r1.y, c19.y
+(rpt1)nop
+(ss)mul.f r1.x, r1.x, r1.z
(rpt5)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r1.z, c19.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+exp2 r1.x, r1.x
+(ss)sel.b32 r1.x, r1.x, r3.y, c19.x
(rpt2)nop
+mov.f32f32 r1.y, r1.x
+mad.f32 r1.x, c18.x, r1.x, r2.x
+(rpt1)nop
mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.z, c18.z, r1.y, r3.z
-mad.f32 r2.x, c18.y, r1.y, r2.y
-mad.f32 r1.x, c18.x, r1.y, r1.x
-nop
-max.f r1.y, r1.z, c19.x
-max.f r2.x, r2.x, c19.x
max.f r1.x, r1.x, c19.x
-nop
-min.f r1.z, r1.y, c19.y
-min.f r1.y, r2.x, c19.y
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r2.z
+mad.f32 r1.y, c18.y, r1.y, r3.x
min.f r1.x, r1.x, c19.y
+nop
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 145 instructions, 0 half, 7 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 121 instructions, 0 half, 6 full
diff --git a/reference/stk-mines/stk-mines-09.asm b/reference/stk-mines/stk-mines-09.asm
index 242def2..693d844 100644
--- a/reference/stk-mines/stk-mines-09.asm
+++ b/reference/stk-mines/stk-mines-09.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@@ -11,10 +11,10 @@
@in(r3.y) in9
@in(r3.z) in10
@in(r3.w) in11
-@in(r4.x) in12
-@in(r4.y) in13
-@in(r4.z) in14
-@in(r4.w) in15
+@in(r2.x) in12
+@in(r2.y) in13
+@in(r2.z) in14
+@in(r2.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,135 +27,120 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r0.w, r2.x, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.y, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.z, r0.w
-mov.f32f32 r1.y, r3.w
-mad.f32 r0.w, c15.x, r2.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r1.x, c12.x
+mul.f r4.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r1.y, r0.w
+mad.f32 r4.x, c4.y, r0.y, r4.x
+mad.f32 r0.w, c14.x, r1.z, r0.w
+mad.f32 r4.x, c4.z, r0.z, r4.x
+mad.f32 r4.y, c15.x, r1.w, r0.w
+mul.f r0.w, r1.x, c12.y
+mul.f r4.z, r1.x, c12.z
+mul.f r4.w, r1.x, c0.w
+mul.f r5.x, r4.y, r4.y
+mad.f32 r0.w, c13.y, r1.y, r0.w
+mul.f r5.y, r4.x, c10.x
+mad.f32 r0.w, c14.y, r1.z, r0.w
+mul.f r5.z, r0.x, c5.x
+mad.f32 r5.w, c15.y, r1.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r5.z
+mad.f32 r4.z, c13.z, r1.y, r4.z
+mad.f32 r4.w, c1.w, r1.y, r4.w
+mad.f32 r5.x, r5.w, r5.w, r5.x
+mad.f32 r4.z, c14.z, r1.z, r4.z
+mad.f32 r5.z, c5.z, r0.z, r0.w
+mad.f32 r4.z, c15.z, r1.w, r4.z
+mad.f32 r0.w, c2.w, r1.z, r4.w
+mul.f r4.w, r1.x, c0.z
+mul.f r6.x, r1.x, c0.y
+mad.f32 r5.x, r4.z, r4.z, r5.x
+mad.f32 r5.y, c10.y, r5.z, r5.y
mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r3.w, r2.x, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r3.w, c13.y, r2.y, r3.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r3.w, c14.y, r2.z, r3.w
-mul.f r5.x, r1.x, c10.x
-mad.f32 r3.w, c15.y, r2.w, r3.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c19.x
+mad.f32 r0.w, c3.w, r1.w, r0.w
+mad.f32 r4.w, c1.z, r1.y, r4.w
+mad.f32 r6.x, c1.y, r1.y, r6.x
+mul.f r1.x, r1.x, c0.x
+rsq r5.x, (abs)r5.x
+(ss)mov.f32f32 r6.y, r5.x
+mul.f r4.z, r4.z, r5.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r3.w, r3.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r2.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r2.y, r1.z
-mad.f32 r5.x, c10.y, r1.y, r5.x
-mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r0.y, c2.z, r1.z, r4.w
+mul.f r4.y, r4.y, r6.y
+mul.f r4.w, r5.w, r6.y
+(rpt1)nop
+add.f r4.y, c10.x, (neg)r4.y
+add.f r4.w, c10.y, (neg)r4.w
+add.f r4.z, c10.z, (neg)r4.z
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r2.w, r1.z
-mov.f32f32 r1.z, r5.x
-mul.f r5.x, r2.x, c0.w
-mul.f r5.y, r2.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r5.x, c1.w, r2.y, r5.x
-mad.f32 r5.y, c1.z, r2.y, r5.y
-mul.f r5.z, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-mul.f r5.w, c16.z, r3.z
+(ss)mul.f r5.x, r4.y, r4.y
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mad.f32 r0.y, r4.w, r4.w, r5.x
+mad.f32 r5.x, c10.z, r0.x, r5.y
+mad.f32 r0.y, r4.z, r4.z, r0.y
+mad.f32 r5.y, c2.y, r1.z, r6.x
+mad.f32 r1.x, c1.x, r1.y, r1.x
+max.f r1.y, r3.w, c19.x
+mul.f r3.w, c16.z, r3.z
+mul.f r5.w, c16.y, r3.y
+mul.f r6.x, c16.x, r3.x
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r6.x, r1.z, c19.x
-cmps.f.lt r1.z, (neg)r1.z, c19.x
-mad.f32 r5.x, c2.w, r2.z, r5.x
-mul.f r0.w, r0.w, r0.y
-mul.f r3.w, r3.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r6.x, r6.x
-add.f r0.z, c10.x, (neg)r0.w
-add.f r3.w, c10.y, (neg)r3.w
-add.f r0.y, c10.z, (neg)r0.y
-mad.f32 r0.w, c8.z, r3.z, c9.z
-mul.f r6.y, r0.z, r0.z
-mul.f r6.z, c16.y, r3.y
-mad.f32 r6.y, r3.w, r3.w, r6.y
-add.f r5.w, r5.w, r0.w
-mad.f32 r0.w, c8.y, r3.y, c9.y
-mul.f r6.w, c16.x, r3.x
-mov.f32f32 r6.y, r6.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r4.z, r0.y
+max.f r4.z, r5.x, c19.x
+mad.f32 r6.z, c8.x, r3.x, c9.x
+mul.f r4.y, r4.y, r6.y
+mul.f r4.w, r4.w, r6.y
+mov.f32f32 r6.y, r4.z
+mad.f32 r6.w, c8.y, r3.y, c9.y
+mul.f r4.x, r4.x, r4.y
+mad.f32 r4.y, c8.z, r3.z, c9.z
+mad.f32 r4.x, r5.z, r4.w, r4.x
+add.f r4.w, r5.w, r6.w
+mad.f32 r0.x, r0.x, r0.y, r4.x
+add.f r3.w, r3.w, r4.y
+mul.f r0.y, c17.y, r3.y
+add.f r3.y, r6.x, r6.z
+max.f r0.x, r0.x, c19.x
mul.f r3.z, c17.z, r3.z
-mad.f32 r6.y, r0.y, r0.y, r6.y
-add.f r6.z, r6.z, r0.w
-mad.f32 r7.x, c8.x, r3.x, c9.x
-mad.f32 r0.w, c3.w, r2.w, r5.x
-mad.f32 r5.x, c2.z, r2.z, r5.y
-mad.f32 r5.y, c1.y, r2.y, r5.z
-mad.f32 r2.x, c1.x, r2.y, r2.x
-rsq r2.y, (abs)r6.y
-(ss)mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r6.x, r3.z, r5.w
-mul.f r3.y, c17.y, r3.y
-add.f r5.z, r6.w, r7.x
-mul.f r0.z, r0.z, r2.y
-mul.f r3.w, r3.w, r2.y
-mul.f r0.y, r0.y, r2.y
-mad.f32 r3.y, r6.x, r3.y, r6.z
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.x, r3.x
-mad.f32 r1.y, r1.y, r3.w, r0.z
-mad.f32 r0.z, c3.z, r2.w, r5.x
-mad.f32 r2.y, c2.y, r2.z, r5.y
-mad.f32 r2.x, c2.x, r2.z, r2.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r1.x, r6.x, r1.x, r5.z
-mad.f32 r1.y, r0.x, r0.y, r1.y
-mad.f32 r0.y, c3.y, r2.w, r2.y
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r3.x, c7.x
-max.f r1.y, r1.y, c19.x
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r2.z, r4.z
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r2.x, r4.x
+mad.f32 r4.x, r6.y, r0.y, r4.w
+mul.f r3.x, c17.x, r3.x
+cmps.f.lt r4.y, (neg)r5.x, c19.x
+mad.f32 r0.y, c3.y, r1.w, r5.y
+mad.f32 r1.x, c2.x, r1.z, r1.x
+log2 r1.z, r0.x
+mov.f32f32 r4.w, c7.x
+mad.f32 r3.z, r6.y, r3.z, r3.w
+mad.f32 r3.x, r4.z, r3.x, r3.y
+(ss)mad.f32 r0.x, c3.x, r1.w, r1.x
+min.f r1.x, r4.w, c19.z
+min.f r1.w, r1.y, c19.y
(rpt1)nop
-log2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-min.f r3.x, r3.x, c19.z
-(rpt2)nop
-mul.f r1.y, r3.x, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(ss)mul.f r1.x, r1.x, r1.z
(rpt5)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r1.z, c19.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+exp2 r1.x, r1.x
+(ss)sel.b32 r1.x, r1.x, r4.y, c19.x
(rpt2)nop
+mov.f32f32 r1.y, r1.x
+mad.f32 r1.x, c18.x, r1.x, r3.x
+(rpt1)nop
mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.z, c18.z, r1.y, r3.z
-mad.f32 r3.x, c18.y, r1.y, r3.y
-mad.f32 r1.x, c18.x, r1.y, r1.x
-nop
-max.f r1.y, r1.z, c19.x
-max.f r3.x, r3.x, c19.x
max.f r1.x, r1.x, c19.x
-nop
-min.f r1.z, r1.y, c19.y
-min.f r1.y, r3.x, c19.y
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r3.z
+mad.f32 r1.y, c18.y, r1.y, r4.x
min.f r1.x, r1.x, c19.y
+nop
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0)
-; VERT: 145 instructions, 0 half, 8 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0)
+; VERT: 121 instructions, 0 half, 7 full
diff --git a/reference/stk-mines/stk-mines-10.asm b/reference/stk-mines/stk-mines-10.asm
index 508a351..c42aaa4 100644
--- a/reference/stk-mines/stk-mines-10.asm
+++ b/reference/stk-mines/stk-mines-10.asm
@@ -6,51 +6,36 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c4.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 8, r0.x
-bary.f r1.x, 11, r0.x
-bary.f r1.y, 9, r0.x
+bary.f r1.x, 9, r0.x
+bary.f r1.y, 11, r0.x
mad.f32 r0.z, c1.x, r0.z, c1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
+bary.f r1.z, 3, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
max.f r0.z, r0.z, c4.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r2.x, r1.x
-mov.f32f32 r1.w, r1.y
-min.f r0.z, r0.z, c4.x
-bary.f r0.w, 3, r0.x
-bary.f r1.x, 2, r0.x
-bary.f r1.y, 1, r0.x
-add.f r2.y, c4.x, (neg)r0.z
bary.f (ei)r0.x, 0, r0.x
nop
-sam.p (f32)(xyzw)r2.z, r1.z, s#0, t#0
-(sy)mul.f r0.y, r3.y, r0.w
-mul.f r0.w, c2.z, r2.y
-(ss)mul.f r1.z, c2.y, r2.y
-mul.f r1.w, c2.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r1.x, r3.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r1.w
-mul.f r1.y, r2.w, r1.y
-mad.f32 r0.w, r1.x, r0.z, r0.w
+sam.p (f32)(xyzw)r2.z, r0.w, s#0, t#0
+(sy)mul.f r1.w, r3.y, r1.z
+min.f r0.y, r0.z, c4.x
+mul.f r0.z, r3.x, r2.x
+(ss)mul.f r0.w, r2.w, r2.y
mul.f r0.x, r2.z, r0.x
-mov.f32f32 r1.w, r0.y
-mad.f32 r0.y, r1.y, r0.z, r1.z
-mov.f32f32 r1.z, r0.w
-mad.f32 r0.x, r0.x, r0.z, r2.x
+add.f r1.x, c4.x, (neg)r0.y
+(rpt2)nop
+mul.f r1.y, c2.z, r1.x
+mul.f r2.x, c2.y, r1.x
+mad.f32 r1.z, r0.z, r0.y, r1.y
+mad.f32 r1.y, r0.w, r0.y, r2.x
+mul.f r0.z, c2.x, r1.x
nop
-mov.f32f32 r1.y, r0.y
-nop
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.x, r0.x, r0.y, r0.z
end
nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.y (5:0,cm=f,il=16,b=1)
-; FRAG: 41 instructions, 0 half, 4 full
+; FRAG: inputs: r0.w (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.z (5:0,cm=f,il=16,b=1)
+; FRAG: 29 instructions, 0 half, 4 full
diff --git a/reference/stk-mines/stk-mines-11.asm b/reference/stk-mines/stk-mines-11.asm
index e1b8ffe..27f5178 100644
--- a/reference/stk-mines/stk-mines-11.asm
+++ b/reference/stk-mines/stk-mines-11.asm
@@ -1,20 +1,20 @@
; options:
; VERT: new compiler
-@in(r2.y) in0
-@in(r2.z) in1
-@in(r2.w) in2
-@in(r3.x) in3
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r4.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r3.y) in8
-@in(r3.z) in9
-@in(r3.w) in10
-@in(r4.x) in11
-@in(r4.y) in12
-@in(r4.z) in13
-@in(r4.w) in14
-@in(r5.x) in15
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
+@in(r3.x) in12
+@in(r3.y) in13
+@in(r3.z) in14
+@in(r3.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,139 +31,121 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r0.w, r2.y, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.z, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.w, r0.w
-mov.f32f32 r1.y, r4.x
-mad.f32 r0.w, c15.x, r3.x, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r2.y, c12.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, r0.w, r0.w
-mul.f r1.w, r2.y, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r1.w, c13.y, r2.z, r1.w
-max.f r1.y, r1.y, c19.x
-mad.f32 r1.w, c14.y, r2.w, r1.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r5.y, c15.y, r3.x, r1.w
-mul.f r5.z, r0.x, c5.x
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c13.z, r2.z, r1.z
-mad.f32 r1.z, r5.y, r5.y, r2.x
-mad.f32 r2.x, c5.y, r0.y, r5.z
-mad.f32 r1.y, c14.z, r2.w, r1.y
+@const(c19.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r4.x, c12.x
+mul.f r2.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r4.y, r0.w
+mad.f32 r2.x, c4.y, r0.y, r2.x
+mad.f32 r0.w, c14.x, r4.z, r0.w
+mad.f32 r2.y, c4.z, r0.z, r2.x
+mad.f32 r0.w, c15.x, r4.w, r0.w
+mul.f r2.x, r4.x, c12.z
+mul.f r2.z, r4.x, c12.y
+mad.f32 r2.x, c13.z, r4.y, r2.x
+mul.f r2.w, r0.w, r0.w
+mad.f32 r2.z, c13.y, r4.y, r2.z
+mul.f r5.x, r2.y, c10.x
+mad.f32 r2.z, c14.y, r4.z, r2.z
+mul.f r5.y, r0.x, c5.x
+mad.f32 r2.z, c15.y, r4.w, r2.z
+mad.f32 r5.y, c5.y, r0.y, r5.y
+mad.f32 r2.x, c14.z, r4.z, r2.x
+mad.f32 r5.y, c5.z, r0.z, r5.y
+mad.f32 r2.w, r2.z, r2.z, r2.w
+mad.f32 r5.z, c15.z, r4.w, r2.x
+mul.f r5.w, r4.x, c0.w
+mul.f r6.x, r4.x, c0.z
+mul.f r6.y, r4.x, c0.y
+mad.f32 r2.w, r5.z, r5.z, r2.w
+mad.f32 r5.x, c10.y, r5.y, r5.x
mul.f r0.x, r0.x, c6.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.y, c15.z, r3.x, r1.y
-mov.f32f32 r2.x, r2.x
+absneg.f r2.x, (abs)r5.z
+mad.f32 r5.w, c1.w, r4.y, r5.w
+mad.f32 r6.x, c1.z, r4.y, r6.x
+mad.f32 r6.y, c1.y, r4.y, r6.y
+rsq r2.w, (abs)r2.w
+(ss)mov.f32f32 r6.z, r2.w
+(ss)mul.f r2.w, r5.z, r2.w
mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r2.y, c0.w
-mad.f32 r1.z, r1.y, r1.y, r1.z
-mad.f32 r5.z, c5.z, r0.z, r2.x
-absneg.f r2.x, (abs)r1.y
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-rsq r1.z, (abs)r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mad.f32 r4.x, c10.y, r5.z, r4.x
+mad.f32 r0.y, c2.w, r4.z, r5.w
+mul.f r0.w, r0.w, r6.z
+mul.f r2.z, r2.z, r6.z
+(rpt1)nop
+add.f r5.z, c10.x, (neg)r0.w
+add.f r2.z, c10.y, (neg)r2.z
+add.f r2.w, c10.z, (neg)r2.w
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.y, c1.w, r2.z, r0.y
-mul.f r0.z, r0.w, r1.z
-mul.f r0.w, r5.y, r1.z
-mul.f r1.y, r1.y, r1.z
-mov.f32f32 r1.z, r4.x
-add.f r0.z, c10.x, (neg)r0.z
-add.f r4.x, c10.y, (neg)r0.w
-add.f r1.y, c10.z, (neg)r1.y
-mad.f32 r0.w, c10.z, r0.x, r1.z
-mul.f r1.z, r0.z, r0.z
-mad.f32 r0.y, c2.w, r2.w, r0.y
-mad.f32 r1.z, r4.x, r4.x, r1.z
-max.f r5.y, r0.w, c20.x
-cmps.f.lt r5.w, (neg)r0.w, c19.x
-mad.f32 r0.w, c3.w, r3.x, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r1.z, r5.y
-mad.f32 r0.y, r1.y, r1.y, r0.y
-mul.f r5.y, c16.z, r3.w
-mul.f r6.x, c16.y, r3.z
-mul.f r6.y, c16.x, r3.y
-mul.f r6.z, r2.y, c0.z
-mul.f r6.w, r2.y, c0.y
-mul.f r2.y, r2.y, c0.x
+mul.f r0.z, r5.z, r5.z
+mad.f32 r0.w, c3.w, r4.w, r0.y
+mad.f32 r0.y, r2.z, r2.z, r0.z
+mad.f32 r0.z, c10.z, r0.x, r5.x
+mad.f32 r0.y, r2.w, r2.w, r0.y
+mad.f32 r5.x, c2.z, r4.z, r6.x
+mad.f32 r5.w, c2.y, r4.z, r6.y
+mul.f r4.x, r4.x, c0.x
+max.f r1.w, r1.w, c19.x
+mul.f r6.x, c16.z, r1.z
+mul.f r6.y, c16.y, r1.y
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r7.x, c8.z, r3.w, c9.z
-mad.f32 r7.y, c8.y, r3.z, c9.y
-mad.f32 r7.z, c8.x, r3.y, c9.x
-mul.f r0.z, r0.z, r0.y
-mul.f r4.x, r4.x, r0.y
-mul.f r0.y, r1.y, r0.y
-add.f r1.y, r5.y, r7.x
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.z, r3.w
-mad.f32 r0.z, r5.z, r4.x, r0.z
-add.f r3.w, r6.x, r7.y
-add.f r4.x, r6.y, r7.z
-mad.f32 r5.y, c1.z, r2.z, r6.z
-mov.f32f32 r0.z, r0.z
-mad.f32 r1.x, r1.z, r1.x, r1.y
-mad.f32 r0.x, r0.x, r0.y, r0.z
-mul.f r0.y, c17.y, r3.z
-mul.f r0.z, c17.x, r3.y
-mad.f32 r1.y, c2.z, r2.w, r5.y
+(ss)mov.f32f32 r6.z, r0.y
+(ss)mul.f r0.y, r2.w, r0.y
+max.f r6.w, r0.z, c20.x
+mul.f r2.w, c16.x, r1.x
+mul.f r5.z, r5.z, r6.z
+mul.f r2.z, r2.z, r6.z
+mov.f32f32 r6.z, r6.w
+mad.f32 r7.x, c8.y, r1.y, c9.y
+mul.f r2.y, r2.y, r5.z
+mad.f32 r5.z, c8.z, r1.z, c9.z
+mad.f32 r2.y, r5.y, r2.z, r2.y
+add.f r2.z, r6.y, r7.x
+mad.f32 r0.x, r0.x, r0.y, r2.y
+add.f r2.y, r6.x, r5.z
+mul.f r0.y, c17.y, r1.y
+mad.f32 r1.y, c8.x, r1.x, c9.x
max.f r0.x, r0.x, c20.x
-mad.f32 r5.y, r1.z, r0.y, r3.w
-mad.f32 r1.z, r1.z, r0.z, r4.x
-mad.f32 r0.z, c3.z, r3.x, r1.y
-mad.f32 r0.y, c1.y, r2.z, r6.w
-mad.f32 r1.y, c1.x, r2.z, r2.y
-mov.f32f32 r2.y, c7.x
+mul.f r1.z, c17.z, r1.z
+mad.f32 r5.y, r6.z, r0.y, r2.z
+add.f r1.y, r2.w, r1.y
+cmps.f.lt r5.z, (neg)r0.z, c19.x
+mad.f32 r0.z, c3.z, r4.w, r5.x
+mad.f32 r0.y, c3.y, r4.w, r5.w
log2 r0.x, r0.x
-(ss)mov.f32f32 r2.z, r0.x
-(ss)mad.f32 r0.x, c2.y, r2.w, r0.y
-mad.f32 r1.y, c2.x, r2.w, r1.y
-min.f r2.y, r2.y, c20.z
-mad.f32 r0.y, c3.y, r3.x, r0.x
-mad.f32 r0.x, c3.x, r3.x, r1.y
-mov.f32f32 r3.w, r5.x
-mul.f r1.y, r2.y, r2.z
-mov.f32f32 r3.z, r4.w
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r1.y, r1.y
+mov.f32f32 r2.z, c7.x
+mad.f32 r1.z, r6.z, r1.z, r2.y
+mul.f r1.x, c17.x, r1.x
+mad.f32 r2.y, c1.x, r4.y, r4.x
+min.f r2.z, r2.z, c20.z
+mad.f32 r2.y, c2.x, r4.z, r2.y
+min.f r1.w, r1.w, c19.y
mov.f32f32 r2.w, c19.y
+(ss)mul.f r4.x, r2.z, r0.x
+mad.f32 r1.x, r6.w, r1.x, r1.y
+(ss)mad.f32 r0.x, c3.x, r4.w, r2.y
mov.f32f32 r2.z, c19.x
mov.f32f32 r2.y, c19.x
+(rpt1)nop
+exp2 r1.y, r4.x
+(ss)sel.b32 r1.y, r1.y, r5.z, c20.x
(rpt2)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r5.w, c20.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.x, c18.z, r1.y, r1.x
-mad.f32 r4.x, c18.y, r1.y, r5.y
-mad.f32 r1.y, c18.x, r1.y, r1.z
-nop
+(ss)mov.f32f32 r4.x, r1.y
+mad.f32 r1.x, c18.x, r1.y, r1.x
+(rpt1)nop
+mov.f32f32 r1.y, r4.x
max.f r1.x, r1.x, c19.x
-max.f r4.x, r4.x, c19.x
-max.f r4.y, r1.y, c19.x
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r1.z
+mad.f32 r1.y, c18.y, r1.y, r5.y
+min.f r1.x, r1.x, c19.y
nop
-min.f r1.z, r1.x, c19.y
-min.f r1.y, r4.x, c19.y
-min.f r1.x, r4.y, c19.y
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
-nop
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0)
-; VERT: inputs: r2.y (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.y (0:0,cm=f,il=16,b=0) r4.y (0:0,cm=f,il=20,b=0)
-; VERT: 141 instructions, 0 half, 8 full
+; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0)
+; VERT: 119 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-12.asm b/reference/stk-mines/stk-mines-12.asm
index cf606e2..2205708 100644
--- a/reference/stk-mines/stk-mines-12.asm
+++ b/reference/stk-mines/stk-mines-12.asm
@@ -6,39 +6,32 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c3.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 3, r0.x
-bary.f r1.x, 2, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.w, 3, r0.x
+bary.f r0.w, 2, r0.x
+bary.f r1.x, 1, r0.x
mad.f32 r0.z, c0.x, r0.z, c0.y
-mov.f32f32 r0.w, r0.w
bary.f (ei)r0.x, 0, r0.x
-nop
-max.f r0.y, r0.z, c3.y
-mov.f32f32 r1.w, r0.w
(rpt1)nop
+max.f r0.y, r0.z, c3.y
+(rpt2)nop
min.f r0.y, r0.y, c3.x
(rpt2)nop
add.f r0.z, c3.x, (neg)r0.y
(rpt2)nop
-mul.f r0.w, c1.z, r0.z
-mul.f r1.z, c1.y, r0.z
+mul.f r1.y, c1.z, r0.z
+mul.f r2.x, c1.y, r0.z
+mad.f32 r1.z, r0.w, r0.y, r1.y
+mad.f32 r1.y, r1.x, r0.y, r2.x
mul.f r0.z, c1.x, r0.z
nop
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, r1.x, r0.y, r0.w
-mad.f32 r1.x, r1.y, r0.y, r1.z
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r1.x
-mad.f32 r0.x, r0.x, r0.y, r0.z
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.x, r0.x, r0.y, r0.z
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
-; FRAG: 38 instructions, 0 half, 2 full
+; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1)
+; FRAG: 28 instructions, 0 half, 3 full
diff --git a/reference/stk-mines/stk-mines-13.asm b/reference/stk-mines/stk-mines-13.asm
index f952885..bca028b 100644
--- a/reference/stk-mines/stk-mines-13.asm
+++ b/reference/stk-mines/stk-mines-13.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r2.y) in0
-@in(r2.z) in1
-@in(r2.w) in2
-@in(r3.x) in3
+@in(r3.x) in0
+@in(r3.y) in1
+@in(r3.z) in2
+@in(r3.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r3.y) in8
-@in(r3.z) in9
-@in(r3.w) in10
-@in(r4.x) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -23,135 +23,121 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r0.w, r2.y, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.z, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.w, r0.w
-mov.f32f32 r1.y, r4.x
-mad.f32 r0.w, c15.x, r3.x, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r2.y, c12.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, r0.w, r0.w
-mul.f r1.w, r2.y, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r1.w, c13.y, r2.z, r1.w
-max.f r1.y, r1.y, c19.x
-mad.f32 r1.w, c14.y, r2.w, r1.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r4.y, c15.y, r3.x, r1.w
-mul.f r4.z, r0.x, c5.x
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c13.z, r2.z, r1.z
-mad.f32 r1.z, r4.y, r4.y, r2.x
-mad.f32 r2.x, c5.y, r0.y, r4.z
-mad.f32 r1.y, c14.z, r2.w, r1.y
+@const(c19.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r3.x, c12.x
+mul.f r2.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r3.y, r0.w
+mad.f32 r2.x, c4.y, r0.y, r2.x
+mad.f32 r0.w, c14.x, r3.z, r0.w
+mad.f32 r2.y, c4.z, r0.z, r2.x
+mad.f32 r0.w, c15.x, r3.w, r0.w
+mul.f r2.x, r3.x, c12.z
+mul.f r2.z, r3.x, c12.y
+mad.f32 r2.x, c13.z, r3.y, r2.x
+mul.f r2.w, r0.w, r0.w
+mad.f32 r2.z, c13.y, r3.y, r2.z
+mul.f r4.x, r2.y, c10.x
+mad.f32 r2.z, c14.y, r3.z, r2.z
+mul.f r4.y, r0.x, c5.x
+mad.f32 r2.z, c15.y, r3.w, r2.z
+mad.f32 r4.y, c5.y, r0.y, r4.y
+mad.f32 r2.x, c14.z, r3.z, r2.x
+mad.f32 r4.y, c5.z, r0.z, r4.y
+mad.f32 r2.w, r2.z, r2.z, r2.w
+mad.f32 r4.z, c15.z, r3.w, r2.x
+mul.f r4.w, r3.x, c0.w
+mul.f r5.x, r3.x, c0.z
+mul.f r5.y, r3.x, c0.y
+mad.f32 r2.w, r4.z, r4.z, r2.w
+mad.f32 r4.x, c10.y, r4.y, r4.x
mul.f r0.x, r0.x, c6.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.y, c15.z, r3.x, r1.y
-mov.f32f32 r2.x, r2.x
+absneg.f r2.x, (abs)r4.z
+mad.f32 r4.w, c1.w, r3.y, r4.w
+mad.f32 r5.x, c1.z, r3.y, r5.x
+mad.f32 r5.y, c1.y, r3.y, r5.y
+rsq r2.w, (abs)r2.w
+(ss)mov.f32f32 r5.z, r2.w
+(ss)mul.f r2.w, r4.z, r2.w
mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r2.y, c0.w
-mad.f32 r1.z, r1.y, r1.y, r1.z
-mad.f32 r4.z, c5.z, r0.z, r2.x
-absneg.f r2.x, (abs)r1.y
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-rsq r1.z, (abs)r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mad.f32 r4.x, c10.y, r4.z, r4.x
+mad.f32 r0.y, c2.w, r3.z, r4.w
+mul.f r0.w, r0.w, r5.z
+mul.f r2.z, r2.z, r5.z
+(rpt1)nop
+add.f r4.z, c10.x, (neg)r0.w
+add.f r2.z, c10.y, (neg)r2.z
+add.f r2.w, c10.z, (neg)r2.w
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.y, c1.w, r2.z, r0.y
-mul.f r0.z, r0.w, r1.z
-mul.f r0.w, r4.y, r1.z
-mul.f r1.y, r1.y, r1.z
-mov.f32f32 r1.z, r4.x
-add.f r0.z, c10.x, (neg)r0.z
-add.f r4.x, c10.y, (neg)r0.w
-add.f r1.y, c10.z, (neg)r1.y
-mad.f32 r0.w, c10.z, r0.x, r1.z
-mul.f r1.z, r0.z, r0.z
-mad.f32 r0.y, c2.w, r2.w, r0.y
-mad.f32 r1.z, r4.x, r4.x, r1.z
-max.f r4.y, r0.w, c20.x
-cmps.f.lt r4.w, (neg)r0.w, c19.x
-mad.f32 r0.w, c3.w, r3.x, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r1.z, r4.y
-mad.f32 r0.y, r1.y, r1.y, r0.y
-mul.f r4.y, c16.z, r3.w
-mul.f r5.x, c16.y, r3.z
-mul.f r5.y, c16.x, r3.y
-mul.f r5.z, r2.y, c0.z
-mul.f r5.w, r2.y, c0.y
-mul.f r2.y, r2.y, c0.x
+mul.f r0.z, r4.z, r4.z
+mad.f32 r0.w, c3.w, r3.w, r0.y
+mad.f32 r0.y, r2.z, r2.z, r0.z
+mad.f32 r0.z, c10.z, r0.x, r4.x
+mad.f32 r0.y, r2.w, r2.w, r0.y
+mad.f32 r4.x, c2.z, r3.z, r5.x
+mad.f32 r4.w, c2.y, r3.z, r5.y
+mul.f r3.x, r3.x, c0.x
+max.f r1.w, r1.w, c19.x
+mul.f r5.x, c16.z, r1.z
+mul.f r5.y, c16.y, r1.y
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r6.x, c8.z, r3.w, c9.z
-mad.f32 r6.y, c8.y, r3.z, c9.y
-mad.f32 r6.z, c8.x, r3.y, c9.x
-mul.f r0.z, r0.z, r0.y
-mul.f r4.x, r4.x, r0.y
-mul.f r0.y, r1.y, r0.y
-add.f r1.y, r4.y, r6.x
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.z, r3.w
-mad.f32 r0.z, r4.z, r4.x, r0.z
-add.f r3.w, r5.x, r6.y
-add.f r4.x, r5.y, r6.z
-mad.f32 r4.y, c1.z, r2.z, r5.z
-mov.f32f32 r0.z, r0.z
-mad.f32 r1.x, r1.z, r1.x, r1.y
-mad.f32 r0.x, r0.x, r0.y, r0.z
-mul.f r0.y, c17.y, r3.z
-mul.f r0.z, c17.x, r3.y
-mad.f32 r1.y, c2.z, r2.w, r4.y
+(ss)mov.f32f32 r5.z, r0.y
+(ss)mul.f r0.y, r2.w, r0.y
+max.f r5.w, r0.z, c20.x
+mul.f r2.w, c16.x, r1.x
+mul.f r4.z, r4.z, r5.z
+mul.f r2.z, r2.z, r5.z
+mov.f32f32 r5.z, r5.w
+mad.f32 r6.x, c8.y, r1.y, c9.y
+mul.f r2.y, r2.y, r4.z
+mad.f32 r4.z, c8.z, r1.z, c9.z
+mad.f32 r2.y, r4.y, r2.z, r2.y
+add.f r2.z, r5.y, r6.x
+mad.f32 r0.x, r0.x, r0.y, r2.y
+add.f r2.y, r5.x, r4.z
+mul.f r0.y, c17.y, r1.y
+mad.f32 r1.y, c8.x, r1.x, c9.x
max.f r0.x, r0.x, c20.x
-mad.f32 r3.y, r1.z, r0.y, r3.w
-mad.f32 r1.z, r1.z, r0.z, r4.x
-mad.f32 r0.z, c3.z, r3.x, r1.y
-mad.f32 r0.y, c1.y, r2.z, r5.w
-mad.f32 r1.y, c1.x, r2.z, r2.y
-mov.f32f32 r2.y, c7.x
+mul.f r1.z, c17.z, r1.z
+mad.f32 r4.y, r5.z, r0.y, r2.z
+add.f r1.y, r2.w, r1.y
+cmps.f.lt r4.z, (neg)r0.z, c19.x
+mad.f32 r0.z, c3.z, r3.w, r4.x
+mad.f32 r0.y, c3.y, r3.w, r4.w
log2 r0.x, r0.x
-(ss)mov.f32f32 r2.z, r0.x
-(ss)mad.f32 r0.x, c2.y, r2.w, r0.y
-mad.f32 r1.y, c2.x, r2.w, r1.y
-min.f r2.y, r2.y, c20.z
-mad.f32 r0.y, c3.y, r3.x, r0.x
-mad.f32 r0.x, c3.x, r3.x, r1.y
+mov.f32f32 r2.z, c7.x
+mad.f32 r1.z, r5.z, r1.z, r2.y
+mul.f r1.x, c17.x, r1.x
+mad.f32 r2.y, c1.x, r3.y, r3.x
+min.f r2.z, r2.z, c20.z
+mad.f32 r2.y, c2.x, r3.z, r2.y
+min.f r1.w, r1.w, c19.y
mov.f32f32 r2.w, c19.y
-mul.f r1.y, r2.y, r2.z
+(ss)mul.f r3.x, r2.z, r0.x
+mad.f32 r1.x, r5.w, r1.x, r1.y
+(ss)mad.f32 r0.x, c3.x, r3.w, r2.y
mov.f32f32 r2.z, c19.x
mov.f32f32 r2.y, c19.x
-nop
-mov.f32f32 r1.y, r1.y
-(rpt5)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r4.w, c20.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(rpt1)nop
+exp2 r1.y, r3.x
+(ss)sel.b32 r1.y, r1.y, r4.z, c20.x
(rpt2)nop
-mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.x, c18.z, r1.y, r1.x
-mad.f32 r3.x, c18.y, r1.y, r3.y
-mad.f32 r1.y, c18.x, r1.y, r1.z
-nop
+(ss)mov.f32f32 r3.x, r1.y
+mad.f32 r1.x, c18.x, r1.y, r1.x
+(rpt1)nop
+mov.f32f32 r1.y, r3.x
max.f r1.x, r1.x, c19.x
-max.f r3.x, r3.x, c19.x
-max.f r3.y, r1.y, c19.x
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r1.z
+mad.f32 r1.y, c18.y, r1.y, r4.y
+min.f r1.x, r1.x, c19.y
nop
-min.f r1.z, r1.x, c19.y
-min.f r1.y, r3.x, c19.y
-min.f r1.x, r3.y, c19.y
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0)
-; VERT: inputs: r2.y (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.y (0:0,cm=f,il=16,b=0)
-; VERT: 141 instructions, 0 half, 7 full
+; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0)
+; VERT: 119 instructions, 0 half, 7 full
diff --git a/reference/stk-mines/stk-mines-14.asm b/reference/stk-mines/stk-mines-14.asm
index e16d8e5..69665d0 100644
--- a/reference/stk-mines/stk-mines-14.asm
+++ b/reference/stk-mines/stk-mines-14.asm
@@ -29,171 +29,161 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c25.x) 0x00000000, 0x3f800000, 0x3f000000, 0x00000000
+@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.w, r4.x, c12.x
mul.f r1.x, r0.x, c4.x
mad.f32 r0.w, c13.x, r4.y, r0.w
mad.f32 r1.x, c4.y, r0.y, r1.x
mad.f32 r0.w, c14.x, r4.z, r0.w
-mov.f32f32 r1.y, r3.x
-mad.f32 r0.w, c15.x, r4.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r4.x, c12.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, r0.w, r0.w
-mul.f r1.w, r4.x, c12.y
mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r1.w, c13.y, r4.y, r1.w
-max.f r1.y, r1.y, c25.x
-mad.f32 r1.w, c14.y, r4.z, r1.w
-mul.f r3.x, r1.x, c10.x
-mad.f32 r3.y, c15.y, r4.w, r1.w
-mul.f r3.z, r0.x, c5.x
-min.f r1.w, r1.y, c25.y
-mad.f32 r1.y, c13.z, r4.y, r1.z
-mad.f32 r1.z, r3.y, r3.y, r2.x
-mad.f32 r2.x, c5.y, r0.y, r3.z
+mad.f32 r0.w, c15.x, r4.w, r0.w
+mul.f r1.y, r4.x, c12.z
+mul.f r1.z, r4.x, c12.y
+mad.f32 r1.y, c13.z, r4.y, r1.y
+mul.f r1.w, r0.w, r0.w
+mad.f32 r1.z, c13.y, r4.y, r1.z
+mul.f r2.x, r1.x, c10.x
+mad.f32 r1.z, c14.y, r4.z, r1.z
+mul.f r3.y, r0.x, c5.x
+mad.f32 r1.z, c15.y, r4.w, r1.z
+mad.f32 r3.y, c5.y, r0.y, r3.y
mad.f32 r1.y, c14.z, r4.z, r1.y
-mul.f r0.x, r0.x, c6.x
-mov.f32f32 r1.z, r1.z
+mad.f32 r3.y, c5.z, r0.z, r3.y
+mad.f32 r1.w, r1.z, r1.z, r1.w
mad.f32 r1.y, c15.z, r4.w, r1.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r4.x, c0.w
-mad.f32 r1.z, r1.y, r1.y, r1.z
-mad.f32 r3.z, c5.z, r0.z, r2.x
+mul.f r3.z, r4.x, c0.w
+mul.f r3.w, r4.x, c0.z
+mul.f r5.x, r4.x, c0.y
+mad.f32 r1.w, r1.y, r1.y, r1.w
+mad.f32 r5.y, c10.y, r3.y, r2.x
+mul.f r0.x, r0.x, c6.x
absneg.f r2.x, (abs)r1.y
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-rsq r1.z, (abs)r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mad.f32 r3.x, c10.y, r3.z, r3.x
+mad.f32 r3.z, c1.w, r4.y, r3.z
+mad.f32 r3.w, c1.z, r4.y, r3.w
+mad.f32 r5.x, c1.y, r4.y, r5.x
+rsq r1.w, (abs)r1.w
+(ss)mov.f32f32 r6.x, r1.w
+mul.f r1.y, r1.y, r1.w
+mad.f32 r0.x, c6.y, r0.y, r0.x
+mad.f32 r0.y, c2.w, r4.z, r3.z
+(ss)mul.f r1.w, r0.w, r6.x
+mul.f r1.z, r1.z, r6.x
+(rpt1)nop
+add.f r3.z, c10.x, (neg)r1.w
+mul.f r0.w, r1.x, r1.w
+add.f r6.x, c10.y, (neg)r1.z
+add.f r6.y, c10.z, (neg)r1.y
+mul.f r6.z, r3.z, r3.z
+mad.f32 r6.w, r3.y, r1.z, r0.w
+mad.f32 r0.w, r6.x, r6.x, r6.z
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.y, c1.w, r4.y, r0.y
-mul.f r0.z, r0.w, r1.z
-mul.f r3.y, r3.y, r1.z
-mul.f r1.y, r1.y, r1.z
-mov.f32f32 r0.w, r3.x
-add.f r1.z, c10.x, (neg)r0.z
-mul.f r3.x, r1.x, r0.z
-add.f r3.w, c10.y, (neg)r3.y
-add.f r5.x, c10.z, (neg)r1.y
-mul.f r5.y, r1.z, r1.z
-mad.f32 r3.x, r3.z, r3.y, r3.x
-mad.f32 r5.y, r3.w, r3.w, r5.y
-mad.f32 r0.w, c10.z, r0.x, r0.w
-mad.f32 r0.y, c2.w, r4.z, r0.y
-mul.f r6.x, r4.x, c0.z
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r3.x, r3.x
-mad.f32 r5.y, r5.x, r5.x, r5.y
-mad.f32 r6.y, r0.x, r1.y, r3.x
-mad.f32 r6.z, r0.x, r1.y, r3.x
-mad.f32 r3.x, r0.x, r1.y, r3.x
-max.f r6.w, r0.w, c26.x
-cmps.f.lt r7.x, (neg)r0.w, c25.x
+mad.f32 r0.z, r6.y, r6.y, r0.w
+mov.f32f32 r6.z, r6.w
mad.f32 r0.w, c3.w, r4.w, r0.y
-rsq r0.y, (abs)r5.y
-(ss)mov.f32f32 r0.y, r0.y
-(ss)add.f r5.y, r6.y, r6.y
-add.f r6.y, r6.z, r6.z
-add.f r3.x, r3.x, r3.x
-mul.f r1.z, r1.z, r0.y
-mad.f32 r0.z, (neg)r5.y, r1.x, r0.z
-mul.f r3.w, r3.w, r0.y
-mul.f r0.y, r5.x, r0.y
-mul.f r1.x, r1.x, r1.z
-add.f r1.z, r0.z, c25.x
-mad.f32 r1.x, r3.z, r3.w, r1.x
-mad.f32 r3.y, (neg)r6.y, r3.z, r3.y
-mad.f32 r1.y, (neg)r3.x, r0.x, r1.y
-nop
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r1.z, r1.z
-mad.f32 r0.x, r0.x, r0.y, r1.x
-add.f r0.y, r3.y, c25.x
-add.f r1.x, r1.y, c25.y
-mov.f32f32 r1.y, r6.w
-max.f r0.x, r0.x, c26.x
-mad.f32 r0.y, r0.y, r0.y, r1.z
-mul.f r1.z, c16.z, r2.w
-mul.f r3.x, c16.y, r2.z
-mul.f r3.z, c16.x, r2.y
-mad.f32 r3.w, c1.z, r4.y, r6.x
-mul.f r5.x, r4.x, c0.y
-log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.y, c7.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r6.x, c8.z, r2.w, c9.z
+mad.f32 r0.y, c2.z, r4.z, r3.w
+mad.f32 r3.w, c2.y, r4.z, r5.x
+mul.f r4.x, r4.x, c0.x
+max.f r3.x, r3.x, c25.x
+rsq r0.z, (abs)r0.z
+(ss)mov.f32f32 r5.x, r0.z
+mad.f32 r7.x, r0.x, r1.y, r6.z
+mad.f32 r6.z, r0.x, r1.y, r6.z
+mad.f32 r6.w, r0.x, r1.y, r6.w
+mul.f r3.z, r3.z, r5.x
+add.f r7.x, r7.x, r7.x
+add.f r6.z, r6.z, r6.z
+mul.f r5.x, r6.x, r5.x
+mul.f r3.z, r1.x, r3.z
+mad.f32 r1.x, (neg)r7.x, r1.x, r1.w
+mad.f32 r1.y, (neg)r6.z, r0.x, r1.y
+mad.f32 r1.w, r3.y, r5.x, r3.z
+(ss)mul.f r0.z, r6.y, r0.z
+add.f r3.z, r1.x, c25.x
+add.f r1.y, r1.y, c25.y
+add.f r5.x, r6.w, r6.w
+mad.f32 r1.w, r0.x, r0.z, r1.w
+mul.f r3.z, r3.z, r3.z
+mad.f32 r0.x, c10.z, r0.x, r5.y
+mad.f32 r0.z, c3.z, r4.w, r0.y
+max.f r1.w, r1.w, c26.x
+mad.f32 r3.y, (neg)r5.x, r3.y, r1.z
+max.f r1.z, r0.x, c26.x
+mul.f r5.x, c16.x, r2.y
+cmps.f.lt r0.x, (neg)r0.x, c25.x
+mad.f32 r0.y, c3.y, r4.w, r3.w
+mad.f32 r3.w, c1.x, r4.y, r4.x
+log2 r1.w, r1.w
+mov.f32f32 r4.x, c7.x
+add.f r4.y, r3.y, c25.x
+mov.f32f32 r5.y, r1.z
+mul.f r6.x, c16.y, r2.z
+min.f r4.x, r4.x, c26.z
+mad.f32 r3.z, r4.y, r4.y, r3.z
+mul.f r4.y, c16.z, r2.w
mad.f32 r6.y, c8.y, r2.z, c9.y
-min.f r5.y, r5.y, c26.z
-mad.f32 r0.y, r1.x, r1.x, r0.y
-add.f r1.x, r1.z, r6.x
-add.f r1.z, r3.x, r6.y
-mul.f r0.x, r5.y, r0.x
-mov.f32f32 r0.y, r0.y
+(ss)mul.f r4.x, r4.x, r1.w
+mad.f32 r1.y, r1.y, r1.y, r3.z
+mad.f32 r3.z, c8.z, r2.w, c9.z
+add.f r6.x, r6.x, r6.y
+mad.f32 r6.y, c8.x, r2.y, c9.x
+mad.f32 r4.z, c2.x, r4.z, r3.w
+(ss)min.f r1.w, r3.x, c25.y
+exp2 r3.x, r4.x
+(ss)sel.b32 r0.x, r3.x, r0.x, c26.x
+add.f r3.x, r5.x, r6.y
+mul.f r2.y, c17.x, r2.y
+rsq r1.y, (abs)r1.y
+(ss)mul.f r3.w, r1.y, c25.z
+(ss)mov.f32f32 r4.x, r0.x
+rsq r1.y, (abs)r1.y
+(ss)mul.f r5.x, r1.y, c25.z
+(ss)add.f r1.y, r4.y, r3.z
+mad.f32 r1.z, r1.z, r2.y, r3.x
+mov.f32f32 r2.y, r4.x
mul.f r2.w, c17.z, r2.w
mul.f r2.z, c17.y, r2.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r3.x, c8.x, r2.y, c9.x
-mad.f32 r5.y, c2.z, r4.z, r3.w
-mad.f32 r3.w, c1.y, r4.y, r5.x
-mul.f r4.x, r4.x, c0.x
-add.f r3.x, r3.z, r3.x
-mad.f32 r5.x, c2.y, r4.z, r3.w
-exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-rsq r0.y, (abs)r0.y
-(ss)mul.f r3.z, r0.y, c25.z
-(ss)rsq r0.y, (abs)r0.y
-(ss)mul.f r0.y, r0.y, c25.z
-mad.f32 r1.x, r1.y, r2.w, r1.x
-sel.b32 r0.x, r0.x, r7.x, c26.x
-mad.f32 r0.z, r0.z, r3.z, c25.z
-mad.f32 r0.y, r3.y, r0.y, c25.z
-mad.f32 r1.z, r1.y, r2.z, r1.z
-mov.f32f32 r0.x, r0.x
-mul.f r2.z, r0.z, c21.w
-mul.f r2.w, r0.z, c21.z
-mul.f r3.y, r0.z, c21.y
-mov.f32f32 r0.x, r0.x
-mul.f r2.y, c17.x, r2.y
-mad.f32 r2.z, c22.w, r0.y, r2.z
-mad.f32 r2.w, c22.z, r0.y, r2.w
-mad.f32 r1.x, c18.z, r0.x, r1.x
-mad.f32 r1.z, c18.y, r0.x, r1.z
-mad.f32 r1.y, r1.y, r2.y, r3.x
-mov.f32f32 r2.y, r5.z
+mad.f32 r0.x, c18.x, r0.x, r1.z
+mad.f32 r3.x, r1.x, r3.w, c25.z
+mad.f32 r1.x, r5.y, r2.w, r1.y
+mad.f32 r1.y, r5.y, r2.z, r6.x
+mad.f32 r1.x, c18.z, r2.y, r1.x
+mad.f32 r1.y, c18.y, r2.y, r1.y
+max.f r0.x, r0.x, c25.x
+nop
max.f r1.x, r1.x, c25.x
-max.f r3.x, r1.z, c25.x
-mad.f32 r0.x, c18.x, r0.x, r1.y
-mad.f32 r2.z, c23.w, r2.y, r2.z
+max.f r1.y, r1.y, c25.x
+(rpt1)nop
min.f r1.z, r1.x, c25.y
-min.f r1.y, r3.x, c25.y
-max.f r0.x, r0.x, c25.x
-mov.f32f32 r3.x, r5.w
-mad.f32 r2.w, c23.z, r2.y, r2.w
-mad.f32 r3.y, c22.y, r0.y, r3.y
+min.f r1.y, r1.y, c25.y
min.f r1.x, r0.x, c25.y
-mad.f32 r3.w, c24.w, r3.x, r2.z
-mad.f32 r3.z, c24.z, r3.x, r2.w
-mad.f32 r0.x, c23.y, r2.y, r3.y
-mul.f r0.z, r0.z, c21.x
-mad.f32 r3.y, c24.y, r3.x, r0.x
-mad.f32 r0.x, c22.x, r0.y, r0.z
-mad.f32 r0.z, c3.z, r4.w, r5.y
-mad.f32 r0.x, c23.x, r2.y, r0.x
-mad.f32 r0.y, c3.y, r4.w, r5.x
-mad.f32 r3.x, c24.x, r3.x, r0.x
-mad.f32 r0.x, c1.x, r4.y, r4.x
+mul.f r0.x, r3.x, c21.w
+mad.f32 r2.y, r3.y, r5.x, c25.z
+mul.f r2.z, r3.x, c21.z
+mul.f r2.w, r3.x, c21.y
+mul.f r3.x, r3.x, c21.x
+mad.f32 r0.x, c22.w, r2.y, r0.x
+mov.f32f32 r3.y, r5.z
+mad.f32 r2.z, c22.z, r2.y, r2.z
+mad.f32 r2.w, c22.y, r2.y, r2.w
+mad.f32 r2.y, c22.x, r2.y, r3.x
+mad.f32 r0.x, c23.w, r3.y, r0.x
+mov.f32f32 r3.x, r5.w
+mad.f32 r2.z, c23.z, r3.y, r2.z
+mad.f32 r2.w, c23.y, r3.y, r2.w
+mad.f32 r2.y, c23.x, r5.z, r2.y
+mad.f32 r3.w, c24.w, r3.x, r0.x
+mad.f32 r3.z, c24.z, r3.x, r2.z
+mad.f32 r3.y, c24.y, r3.x, r2.w
+mad.f32 r3.x, c24.x, r5.w, r2.y
+mad.f32 r0.x, c3.x, r4.w, r4.z
mov.f32f32 r2.w, c25.y
-mad.f32 r0.x, c2.x, r4.z, r0.x
mov.f32f32 r2.z, c25.x
-mad.f32 r0.x, c3.x, r4.w, r0.x
mov.f32f32 r2.y, c25.x
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0)
; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.y (0:0,cm=f,il=16,b=0) r5.x (0:0,cm=c,il=20,b=0)
-; VERT: 165 instructions, 0 half, 8 full
+; VERT: 152 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-15.asm b/reference/stk-mines/stk-mines-15.asm
index e1b8ffe..27f5178 100644
--- a/reference/stk-mines/stk-mines-15.asm
+++ b/reference/stk-mines/stk-mines-15.asm
@@ -1,20 +1,20 @@
; options:
; VERT: new compiler
-@in(r2.y) in0
-@in(r2.z) in1
-@in(r2.w) in2
-@in(r3.x) in3
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r4.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r3.y) in8
-@in(r3.z) in9
-@in(r3.w) in10
-@in(r4.x) in11
-@in(r4.y) in12
-@in(r4.z) in13
-@in(r4.w) in14
-@in(r5.x) in15
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
+@in(r3.x) in12
+@in(r3.y) in13
+@in(r3.z) in14
+@in(r3.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,139 +31,121 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r0.w, r2.y, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.z, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.w, r0.w
-mov.f32f32 r1.y, r4.x
-mad.f32 r0.w, c15.x, r3.x, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r2.y, c12.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, r0.w, r0.w
-mul.f r1.w, r2.y, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r1.w, c13.y, r2.z, r1.w
-max.f r1.y, r1.y, c19.x
-mad.f32 r1.w, c14.y, r2.w, r1.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r5.y, c15.y, r3.x, r1.w
-mul.f r5.z, r0.x, c5.x
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c13.z, r2.z, r1.z
-mad.f32 r1.z, r5.y, r5.y, r2.x
-mad.f32 r2.x, c5.y, r0.y, r5.z
-mad.f32 r1.y, c14.z, r2.w, r1.y
+@const(c19.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r4.x, c12.x
+mul.f r2.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r4.y, r0.w
+mad.f32 r2.x, c4.y, r0.y, r2.x
+mad.f32 r0.w, c14.x, r4.z, r0.w
+mad.f32 r2.y, c4.z, r0.z, r2.x
+mad.f32 r0.w, c15.x, r4.w, r0.w
+mul.f r2.x, r4.x, c12.z
+mul.f r2.z, r4.x, c12.y
+mad.f32 r2.x, c13.z, r4.y, r2.x
+mul.f r2.w, r0.w, r0.w
+mad.f32 r2.z, c13.y, r4.y, r2.z
+mul.f r5.x, r2.y, c10.x
+mad.f32 r2.z, c14.y, r4.z, r2.z
+mul.f r5.y, r0.x, c5.x
+mad.f32 r2.z, c15.y, r4.w, r2.z
+mad.f32 r5.y, c5.y, r0.y, r5.y
+mad.f32 r2.x, c14.z, r4.z, r2.x
+mad.f32 r5.y, c5.z, r0.z, r5.y
+mad.f32 r2.w, r2.z, r2.z, r2.w
+mad.f32 r5.z, c15.z, r4.w, r2.x
+mul.f r5.w, r4.x, c0.w
+mul.f r6.x, r4.x, c0.z
+mul.f r6.y, r4.x, c0.y
+mad.f32 r2.w, r5.z, r5.z, r2.w
+mad.f32 r5.x, c10.y, r5.y, r5.x
mul.f r0.x, r0.x, c6.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.y, c15.z, r3.x, r1.y
-mov.f32f32 r2.x, r2.x
+absneg.f r2.x, (abs)r5.z
+mad.f32 r5.w, c1.w, r4.y, r5.w
+mad.f32 r6.x, c1.z, r4.y, r6.x
+mad.f32 r6.y, c1.y, r4.y, r6.y
+rsq r2.w, (abs)r2.w
+(ss)mov.f32f32 r6.z, r2.w
+(ss)mul.f r2.w, r5.z, r2.w
mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r2.y, c0.w
-mad.f32 r1.z, r1.y, r1.y, r1.z
-mad.f32 r5.z, c5.z, r0.z, r2.x
-absneg.f r2.x, (abs)r1.y
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-rsq r1.z, (abs)r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mad.f32 r4.x, c10.y, r5.z, r4.x
+mad.f32 r0.y, c2.w, r4.z, r5.w
+mul.f r0.w, r0.w, r6.z
+mul.f r2.z, r2.z, r6.z
+(rpt1)nop
+add.f r5.z, c10.x, (neg)r0.w
+add.f r2.z, c10.y, (neg)r2.z
+add.f r2.w, c10.z, (neg)r2.w
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.y, c1.w, r2.z, r0.y
-mul.f r0.z, r0.w, r1.z
-mul.f r0.w, r5.y, r1.z
-mul.f r1.y, r1.y, r1.z
-mov.f32f32 r1.z, r4.x
-add.f r0.z, c10.x, (neg)r0.z
-add.f r4.x, c10.y, (neg)r0.w
-add.f r1.y, c10.z, (neg)r1.y
-mad.f32 r0.w, c10.z, r0.x, r1.z
-mul.f r1.z, r0.z, r0.z
-mad.f32 r0.y, c2.w, r2.w, r0.y
-mad.f32 r1.z, r4.x, r4.x, r1.z
-max.f r5.y, r0.w, c20.x
-cmps.f.lt r5.w, (neg)r0.w, c19.x
-mad.f32 r0.w, c3.w, r3.x, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r1.z, r5.y
-mad.f32 r0.y, r1.y, r1.y, r0.y
-mul.f r5.y, c16.z, r3.w
-mul.f r6.x, c16.y, r3.z
-mul.f r6.y, c16.x, r3.y
-mul.f r6.z, r2.y, c0.z
-mul.f r6.w, r2.y, c0.y
-mul.f r2.y, r2.y, c0.x
+mul.f r0.z, r5.z, r5.z
+mad.f32 r0.w, c3.w, r4.w, r0.y
+mad.f32 r0.y, r2.z, r2.z, r0.z
+mad.f32 r0.z, c10.z, r0.x, r5.x
+mad.f32 r0.y, r2.w, r2.w, r0.y
+mad.f32 r5.x, c2.z, r4.z, r6.x
+mad.f32 r5.w, c2.y, r4.z, r6.y
+mul.f r4.x, r4.x, c0.x
+max.f r1.w, r1.w, c19.x
+mul.f r6.x, c16.z, r1.z
+mul.f r6.y, c16.y, r1.y
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r7.x, c8.z, r3.w, c9.z
-mad.f32 r7.y, c8.y, r3.z, c9.y
-mad.f32 r7.z, c8.x, r3.y, c9.x
-mul.f r0.z, r0.z, r0.y
-mul.f r4.x, r4.x, r0.y
-mul.f r0.y, r1.y, r0.y
-add.f r1.y, r5.y, r7.x
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.z, r3.w
-mad.f32 r0.z, r5.z, r4.x, r0.z
-add.f r3.w, r6.x, r7.y
-add.f r4.x, r6.y, r7.z
-mad.f32 r5.y, c1.z, r2.z, r6.z
-mov.f32f32 r0.z, r0.z
-mad.f32 r1.x, r1.z, r1.x, r1.y
-mad.f32 r0.x, r0.x, r0.y, r0.z
-mul.f r0.y, c17.y, r3.z
-mul.f r0.z, c17.x, r3.y
-mad.f32 r1.y, c2.z, r2.w, r5.y
+(ss)mov.f32f32 r6.z, r0.y
+(ss)mul.f r0.y, r2.w, r0.y
+max.f r6.w, r0.z, c20.x
+mul.f r2.w, c16.x, r1.x
+mul.f r5.z, r5.z, r6.z
+mul.f r2.z, r2.z, r6.z
+mov.f32f32 r6.z, r6.w
+mad.f32 r7.x, c8.y, r1.y, c9.y
+mul.f r2.y, r2.y, r5.z
+mad.f32 r5.z, c8.z, r1.z, c9.z
+mad.f32 r2.y, r5.y, r2.z, r2.y
+add.f r2.z, r6.y, r7.x
+mad.f32 r0.x, r0.x, r0.y, r2.y
+add.f r2.y, r6.x, r5.z
+mul.f r0.y, c17.y, r1.y
+mad.f32 r1.y, c8.x, r1.x, c9.x
max.f r0.x, r0.x, c20.x
-mad.f32 r5.y, r1.z, r0.y, r3.w
-mad.f32 r1.z, r1.z, r0.z, r4.x
-mad.f32 r0.z, c3.z, r3.x, r1.y
-mad.f32 r0.y, c1.y, r2.z, r6.w
-mad.f32 r1.y, c1.x, r2.z, r2.y
-mov.f32f32 r2.y, c7.x
+mul.f r1.z, c17.z, r1.z
+mad.f32 r5.y, r6.z, r0.y, r2.z
+add.f r1.y, r2.w, r1.y
+cmps.f.lt r5.z, (neg)r0.z, c19.x
+mad.f32 r0.z, c3.z, r4.w, r5.x
+mad.f32 r0.y, c3.y, r4.w, r5.w
log2 r0.x, r0.x
-(ss)mov.f32f32 r2.z, r0.x
-(ss)mad.f32 r0.x, c2.y, r2.w, r0.y
-mad.f32 r1.y, c2.x, r2.w, r1.y
-min.f r2.y, r2.y, c20.z
-mad.f32 r0.y, c3.y, r3.x, r0.x
-mad.f32 r0.x, c3.x, r3.x, r1.y
-mov.f32f32 r3.w, r5.x
-mul.f r1.y, r2.y, r2.z
-mov.f32f32 r3.z, r4.w
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r1.y, r1.y
+mov.f32f32 r2.z, c7.x
+mad.f32 r1.z, r6.z, r1.z, r2.y
+mul.f r1.x, c17.x, r1.x
+mad.f32 r2.y, c1.x, r4.y, r4.x
+min.f r2.z, r2.z, c20.z
+mad.f32 r2.y, c2.x, r4.z, r2.y
+min.f r1.w, r1.w, c19.y
mov.f32f32 r2.w, c19.y
+(ss)mul.f r4.x, r2.z, r0.x
+mad.f32 r1.x, r6.w, r1.x, r1.y
+(ss)mad.f32 r0.x, c3.x, r4.w, r2.y
mov.f32f32 r2.z, c19.x
mov.f32f32 r2.y, c19.x
+(rpt1)nop
+exp2 r1.y, r4.x
+(ss)sel.b32 r1.y, r1.y, r5.z, c20.x
(rpt2)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r5.w, c20.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.x, c18.z, r1.y, r1.x
-mad.f32 r4.x, c18.y, r1.y, r5.y
-mad.f32 r1.y, c18.x, r1.y, r1.z
-nop
+(ss)mov.f32f32 r4.x, r1.y
+mad.f32 r1.x, c18.x, r1.y, r1.x
+(rpt1)nop
+mov.f32f32 r1.y, r4.x
max.f r1.x, r1.x, c19.x
-max.f r4.x, r4.x, c19.x
-max.f r4.y, r1.y, c19.x
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r1.z
+mad.f32 r1.y, c18.y, r1.y, r5.y
+min.f r1.x, r1.x, c19.y
nop
-min.f r1.z, r1.x, c19.y
-min.f r1.y, r4.x, c19.y
-min.f r1.x, r4.y, c19.y
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
-nop
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0)
-; VERT: inputs: r2.y (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.y (0:0,cm=f,il=16,b=0) r4.y (0:0,cm=f,il=20,b=0)
-; VERT: 141 instructions, 0 half, 8 full
+; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0)
+; VERT: 119 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-16.asm b/reference/stk-mines/stk-mines-16.asm
index b80c94e..601a921 100644
--- a/reference/stk-mines/stk-mines-16.asm
+++ b/reference/stk-mines/stk-mines-16.asm
@@ -6,63 +6,44 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c5.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 8, r0.x
-bary.f r1.x, 12, r0.x
+bary.f r1.x, 9, r0.x
bary.f r1.y, 11, r0.x
mad.f32 r0.z, c2.x, r0.z, c2.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
+bary.f r1.z, 12, r0.x
+bary.f r1.w, 13, r0.x
+bary.f r2.x, 15, r0.x
max.f r0.z, r0.z, c5.y
-mov.f32f32 r1.z, r0.w
-bary.f r0.w, 9, r0.x
-mov.f32f32 r2.y, r1.x
+bary.f r2.y, 3, r0.x
+bary.f r2.z, 2, r0.x
+bary.f r2.w, 1, r0.x
min.f r0.z, r0.z, c5.x
-bary.f r1.x, 13, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r1.y
-add.f r1.y, c5.x, (neg)r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r0.w
-bary.f r0.w, 15, r0.x
-mul.f r2.w, c3.z, r1.y
-mul.f r3.x, c3.y, r1.y
-mul.f r1.y, c3.x, r1.y
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r1.x, r2.w
-sam.p (f32)(xyzw)r3.y, r1.z, s#1, t#1
-(ss)bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r1.y, r1.y
-bary.f r2.x, 1, r0.x
-(sy)mul.f r1.z, r3.w, r1.z
-mov.f32f32 r0.w, r0.w
-bary.f r3.x, 0, r0.x
-mul.f r2.x, r3.z, r2.x
-bary.f (ei)r0.x, 3, r0.x
-mov.f32f32 r2.w, r0.w
-mul.f r0.y, r3.y, r3.x
-(rpt1)nop
-mul.f r0.x, r4.x, r0.x
-(rpt1)nop
-sam.p (f32)(xyzw)r2.y, r2.y, s#0, t#0
-(sy)mul.f r0.w, r2.w, r1.z
-mul.f r1.z, r2.z, r2.x
-mul.f r0.y, r2.y, r0.y
+sam.p (f32)(xyzw)r3.x, r0.w, s#1, t#1
+(sy)(ss)mul.f r0.w, r3.w, r2.y
+mul.f r1.x, r3.z, r2.z
+sam.p (f32)(xyzw)r3.z, r1.z, s#0, t#0
+mul.f r1.y, r3.y, r2.w
+(ss)add.f r2.x, c5.x, (neg)r0.z
+(sy)mul.f r1.w, r4.y, r0.w
+mul.f r0.w, r4.x, r1.x
+mul.f r1.x, r3.w, r1.y
+mul.f r1.y, c3.z, r2.x
+mul.f r2.y, c3.y, r2.x
+mad.f32 r1.z, r0.w, r0.z, r1.y
+mad.f32 r1.y, r1.x, r0.z, r2.y
+mul.f r0.w, c3.x, r2.x
+bary.f (ei)r0.x, 0, r0.x
+(rpt2)nop
mul.f r0.x, r3.x, r0.x
-mad.f32 r0.w, r0.w, r0.z, r1.x
-mad.f32 r1.x, r1.z, r0.z, r1.w
-mad.f32 r0.y, r0.y, r0.z, r1.y
-nop
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.x, r0.x
(rpt2)nop
-mov.f32f32 r1.w, r0.x
+mul.f r0.x, r3.z, r0.x
+(rpt2)nop
+mad.f32 r1.x, r0.x, r0.z, r0.w
end
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r63.y (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.z (5:0,cm=f,il=16,b=1) r0.x (5:1,cm=f,il=20,b=1)
-; FRAG: 60 instructions, 0 half, 5 full
+; FRAG: inputs: r1.z (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.z (5:0,cm=f,il=16,b=1) r1.y (5:1,cm=f,il=20,b=1)
+; FRAG: 41 instructions, 0 half, 5 full
diff --git a/reference/stk-mines/stk-mines-17.asm b/reference/stk-mines/stk-mines-17.asm
index 93bdddf..37655fe 100644
--- a/reference/stk-mines/stk-mines-17.asm
+++ b/reference/stk-mines/stk-mines-17.asm
@@ -1,24 +1,24 @@
; options:
; VERT: new compiler
-@in(r2.y) in0
-@in(r2.z) in1
-@in(r2.w) in2
-@in(r3.x) in3
+@in(r5.x) in0
+@in(r5.y) in1
+@in(r5.z) in2
+@in(r5.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r3.y) in8
-@in(r3.z) in9
-@in(r3.w) in10
-@in(r4.x) in11
-@in(r5.x) in12
-@in(r5.y) in13
-@in(r5.z) in14
-@in(r5.w) in15
-@in(r6.x) in16
-@in(r6.y) in17
-@in(r6.z) in18
-@in(r6.w) in19
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
+@in(r3.x) in12
+@in(r3.y) in13
+@in(r3.z) in14
+@in(r3.w) in15
+@in(r4.x) in16
+@in(r4.y) in17
+@in(r4.z) in18
+@in(r4.w) in19
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -39,139 +39,121 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
-(sy)(ss)mul.f r0.w, r2.y, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.z, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.w, r0.w
-mov.f32f32 r1.y, r4.x
-mad.f32 r0.w, c15.x, r3.x, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r2.y, c12.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, r0.w, r0.w
-mul.f r1.w, r2.y, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r1.w, c13.y, r2.z, r1.w
-max.f r1.y, r1.y, c19.x
-mad.f32 r1.w, c14.y, r2.w, r1.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r4.y, c15.y, r3.x, r1.w
-mul.f r4.z, r0.x, c5.x
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c13.z, r2.z, r1.z
-mad.f32 r1.z, r4.y, r4.y, r2.x
-mad.f32 r2.x, c5.y, r0.y, r4.z
-mad.f32 r1.y, c14.z, r2.w, r1.y
+@const(c19.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r5.x, c12.x
+mul.f r2.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r5.y, r0.w
+mad.f32 r2.x, c4.y, r0.y, r2.x
+mad.f32 r0.w, c14.x, r5.z, r0.w
+mad.f32 r2.y, c4.z, r0.z, r2.x
+mad.f32 r0.w, c15.x, r5.w, r0.w
+mul.f r2.x, r5.x, c12.z
+mul.f r2.z, r5.x, c12.y
+mad.f32 r2.x, c13.z, r5.y, r2.x
+mul.f r2.w, r0.w, r0.w
+mad.f32 r2.z, c13.y, r5.y, r2.z
+mul.f r6.x, r2.y, c10.x
+mad.f32 r2.z, c14.y, r5.z, r2.z
+mul.f r6.y, r0.x, c5.x
+mad.f32 r2.z, c15.y, r5.w, r2.z
+mad.f32 r6.y, c5.y, r0.y, r6.y
+mad.f32 r2.x, c14.z, r5.z, r2.x
+mad.f32 r6.y, c5.z, r0.z, r6.y
+mad.f32 r2.w, r2.z, r2.z, r2.w
+mad.f32 r6.z, c15.z, r5.w, r2.x
+mul.f r6.w, r5.x, c0.w
+mul.f r7.x, r5.x, c0.z
+mul.f r7.y, r5.x, c0.y
+mad.f32 r2.w, r6.z, r6.z, r2.w
+mad.f32 r6.x, c10.y, r6.y, r6.x
mul.f r0.x, r0.x, c6.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.y, c15.z, r3.x, r1.y
-mov.f32f32 r2.x, r2.x
+absneg.f r2.x, (abs)r6.z
+mad.f32 r6.w, c1.w, r5.y, r6.w
+mad.f32 r7.x, c1.z, r5.y, r7.x
+mad.f32 r7.y, c1.y, r5.y, r7.y
+rsq r2.w, (abs)r2.w
+(ss)mov.f32f32 r7.z, r2.w
+(ss)mul.f r2.w, r6.z, r2.w
mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r2.y, c0.w
-mad.f32 r1.z, r1.y, r1.y, r1.z
-mad.f32 r4.z, c5.z, r0.z, r2.x
-absneg.f r2.x, (abs)r1.y
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-rsq r1.z, (abs)r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mad.f32 r4.x, c10.y, r4.z, r4.x
+mad.f32 r0.y, c2.w, r5.z, r6.w
+mul.f r0.w, r0.w, r7.z
+mul.f r2.z, r2.z, r7.z
+(rpt1)nop
+add.f r6.z, c10.x, (neg)r0.w
+add.f r2.z, c10.y, (neg)r2.z
+add.f r2.w, c10.z, (neg)r2.w
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.y, c1.w, r2.z, r0.y
-mul.f r0.z, r0.w, r1.z
-mul.f r0.w, r4.y, r1.z
-mul.f r1.y, r1.y, r1.z
-mov.f32f32 r1.z, r4.x
-add.f r0.z, c10.x, (neg)r0.z
-add.f r4.x, c10.y, (neg)r0.w
-add.f r1.y, c10.z, (neg)r1.y
-mad.f32 r0.w, c10.z, r0.x, r1.z
-mul.f r1.z, r0.z, r0.z
-mad.f32 r0.y, c2.w, r2.w, r0.y
-mad.f32 r1.z, r4.x, r4.x, r1.z
-max.f r4.y, r0.w, c20.x
-cmps.f.lt r7.x, (neg)r0.w, c19.x
-mad.f32 r0.w, c3.w, r3.x, r0.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r1.z, r4.y
-mad.f32 r0.y, r1.y, r1.y, r0.y
-mul.f r4.y, c16.z, r3.w
-mul.f r4.w, c16.y, r3.z
-mul.f r7.y, c16.x, r3.y
-mul.f r7.z, r2.y, c0.z
-mul.f r7.w, r2.y, c0.y
-mul.f r2.y, r2.y, c0.x
+mul.f r0.z, r6.z, r6.z
+mad.f32 r0.w, c3.w, r5.w, r0.y
+mad.f32 r0.y, r2.z, r2.z, r0.z
+mad.f32 r0.z, c10.z, r0.x, r6.x
+mad.f32 r0.y, r2.w, r2.w, r0.y
+mad.f32 r6.x, c2.z, r5.z, r7.x
+mad.f32 r6.w, c2.y, r5.z, r7.y
+mul.f r5.x, r5.x, c0.x
+max.f r1.w, r1.w, c19.x
+mul.f r7.x, c16.z, r1.z
+mul.f r7.y, c16.y, r1.y
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r8.x, c8.z, r3.w, c9.z
-mad.f32 r8.y, c8.y, r3.z, c9.y
-mad.f32 r8.z, c8.x, r3.y, c9.x
-mul.f r0.z, r0.z, r0.y
-mul.f r4.x, r4.x, r0.y
-mul.f r0.y, r1.y, r0.y
-add.f r1.y, r4.y, r8.x
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.z, r3.w
-mad.f32 r0.z, r4.z, r4.x, r0.z
-add.f r3.w, r4.w, r8.y
-add.f r4.x, r7.y, r8.z
-mad.f32 r4.y, c1.z, r2.z, r7.z
-mov.f32f32 r0.z, r0.z
-mad.f32 r1.x, r1.z, r1.x, r1.y
-mad.f32 r0.x, r0.x, r0.y, r0.z
-mul.f r0.y, c17.y, r3.z
-mul.f r0.z, c17.x, r3.y
-mad.f32 r1.y, c2.z, r2.w, r4.y
+(ss)mov.f32f32 r7.z, r0.y
+(ss)mul.f r0.y, r2.w, r0.y
+max.f r7.w, r0.z, c20.x
+mul.f r2.w, c16.x, r1.x
+mul.f r6.z, r6.z, r7.z
+mul.f r2.z, r2.z, r7.z
+mov.f32f32 r7.z, r7.w
+mad.f32 r8.x, c8.y, r1.y, c9.y
+mul.f r2.y, r2.y, r6.z
+mad.f32 r6.z, c8.z, r1.z, c9.z
+mad.f32 r2.y, r6.y, r2.z, r2.y
+add.f r2.z, r7.y, r8.x
+mad.f32 r0.x, r0.x, r0.y, r2.y
+add.f r2.y, r7.x, r6.z
+mul.f r0.y, c17.y, r1.y
+mad.f32 r1.y, c8.x, r1.x, c9.x
max.f r0.x, r0.x, c20.x
-mad.f32 r7.y, r1.z, r0.y, r3.w
-mad.f32 r1.z, r1.z, r0.z, r4.x
-mad.f32 r0.z, c3.z, r3.x, r1.y
-mad.f32 r0.y, c1.y, r2.z, r7.w
-mad.f32 r1.y, c1.x, r2.z, r2.y
-mov.f32f32 r2.y, c7.x
+mul.f r1.z, c17.z, r1.z
+mad.f32 r6.y, r7.z, r0.y, r2.z
+add.f r1.y, r2.w, r1.y
+cmps.f.lt r6.z, (neg)r0.z, c19.x
+mad.f32 r0.z, c3.z, r5.w, r6.x
+mad.f32 r0.y, c3.y, r5.w, r6.w
log2 r0.x, r0.x
-(ss)mov.f32f32 r2.z, r0.x
-(ss)mad.f32 r0.x, c2.y, r2.w, r0.y
-mad.f32 r1.y, c2.x, r2.w, r1.y
-min.f r2.y, r2.y, c20.z
-mad.f32 r0.y, c3.y, r3.x, r0.x
-mad.f32 r0.x, c3.x, r3.x, r1.y
-mov.f32f32 r4.w, r6.w
-mul.f r1.y, r2.y, r2.z
-mov.f32f32 r4.z, r6.z
-mov.f32f32 r4.y, r6.y
-mov.f32f32 r4.x, r6.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.w, r5.w
-mov.f32f32 r3.z, r5.z
-mov.f32f32 r3.y, r5.y
-mov.f32f32 r3.x, r5.x
+mov.f32f32 r2.z, c7.x
+mad.f32 r1.z, r7.z, r1.z, r2.y
+mul.f r1.x, c17.x, r1.x
+mad.f32 r2.y, c1.x, r5.y, r5.x
+min.f r2.z, r2.z, c20.z
+mad.f32 r2.y, c2.x, r5.z, r2.y
+min.f r1.w, r1.w, c19.y
mov.f32f32 r2.w, c19.y
+(ss)mul.f r5.x, r2.z, r0.x
+mad.f32 r1.x, r7.w, r1.x, r1.y
+(ss)mad.f32 r0.x, c3.x, r5.w, r2.y
mov.f32f32 r2.z, c19.x
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
mov.f32f32 r2.y, c19.x
(rpt1)nop
-sel.b32 r1.y, r1.y, r7.x, c20.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+exp2 r1.y, r5.x
+(ss)sel.b32 r1.y, r1.y, r6.z, c20.x
(rpt2)nop
-mad.f32 r1.x, c18.z, r1.y, r1.x
-mad.f32 r5.x, c18.y, r1.y, r7.y
-mad.f32 r1.y, c18.x, r1.y, r1.z
-nop
+(ss)mov.f32f32 r5.x, r1.y
+mad.f32 r1.x, c18.x, r1.y, r1.x
+(rpt1)nop
+mov.f32f32 r1.y, r5.x
max.f r1.x, r1.x, c19.x
-max.f r5.x, r5.x, c19.x
-max.f r5.y, r1.y, c19.x
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r1.z
+mad.f32 r1.y, c18.y, r1.y, r6.y
+min.f r1.x, r1.x, c19.y
nop
-min.f r1.z, r1.x, c19.y
-min.f r1.y, r5.x, c19.y
-min.f r1.x, r5.y, c19.y
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0) r4.x (5:1)
-; VERT: inputs: r2.y (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.y (0:0,cm=f,il=16,b=0) r5.x (0:0,cm=f,il=20,b=0) r6.x (0:0,cm=f,il=24,b=0)
-; VERT: 141 instructions, 0 half, 9 full
+; VERT: inputs: r5.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) r4.x (0:0,cm=f,il=24,b=0)
+; VERT: 119 instructions, 0 half, 9 full
diff --git a/reference/stk-mines/stk-mines-18.asm b/reference/stk-mines/stk-mines-18.asm
index f978c20..5b1d566 100644
--- a/reference/stk-mines/stk-mines-18.asm
+++ b/reference/stk-mines/stk-mines-18.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
-@in(r3.w) in3
+@in(r2.x) in0
+@in(r2.y) in1
+@in(r2.z) in2
+@in(r2.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r2.x) in8
-@in(r2.y) in9
-@in(r2.z) in10
-@in(r2.w) in11
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r3.w) in11
@in(r4.z) in14
@in(r4.w) in15
@out(r0.x) out0
@@ -25,171 +25,157 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r0.w, r3.x, c12.x
+@const(c25.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000
+@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r2.x, c12.x
mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r3.y, r0.w
+mad.f32 r0.w, c13.x, r2.y, r0.w
mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r3.z, r0.w
-mov.f32f32 r1.y, r2.w
-mad.f32 r0.w, c15.x, r3.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
-mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r2.w, r3.x, c12.y
+mad.f32 r0.w, c14.x, r2.z, r0.w
mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r2.w, c13.y, r3.y, r2.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.w, c14.y, r3.z, r2.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r2.w, c15.y, r3.w, r2.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c25.y
+mad.f32 r1.y, c15.x, r2.w, r0.w
+mul.f r0.w, r2.x, c12.y
+mul.f r1.z, r2.x, c12.z
+mul.f r1.w, r2.x, c0.w
+mul.f r4.x, r1.y, r1.y
+mad.f32 r0.w, c13.y, r2.y, r0.w
+mul.f r4.y, r1.x, c10.x
+mad.f32 r0.w, c14.y, r2.z, r0.w
+mul.f r5.x, r0.x, c5.x
+mad.f32 r5.y, c15.y, r2.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r5.x
+mad.f32 r1.z, c13.z, r2.y, r1.z
+mad.f32 r1.w, c1.w, r2.y, r1.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r5.x, c5.z, r0.z, r0.w
+mad.f32 r1.z, c15.z, r2.w, r1.z
+mad.f32 r0.w, c2.w, r2.z, r1.w
+mul.f r1.w, r2.x, c0.z
+mul.f r5.z, r2.x, c0.y
+mad.f32 r4.x, r1.z, r1.z, r4.x
+mad.f32 r4.y, c10.y, r5.x, r4.y
+mul.f r0.x, r0.x, c6.x
+mad.f32 r0.w, c3.w, r2.w, r0.w
+mad.f32 r1.w, c1.z, r2.y, r1.w
+mad.f32 r5.z, c1.y, r2.y, r5.z
+mul.f r2.x, r2.x, c0.x
+rsq r4.x, (abs)r4.x
+(ss)mov.f32f32 r5.w, r4.x
+mul.f r1.z, r1.z, r4.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r2.w, r2.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c25.z
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r3.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r3.y, r1.z
-mad.f32 r4.x, c10.y, r1.y, r4.x
-mad.f32 r1.z, c14.z, r3.z, r1.z
-mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r3.w, r1.z
-mov.f32f32 r1.z, r4.x
-mul.f r4.x, r3.x, c0.w
-mul.f r4.y, r3.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r4.x, c1.w, r3.y, r4.x
-mad.f32 r4.y, c1.z, r3.y, r4.y
-mul.f r5.x, r3.x, c0.y
-mul.f r3.x, r3.x, c0.x
-mul.f r5.y, c16.z, r2.z
-rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r5.z, r1.z, c26.x
-cmps.f.lt r1.z, (neg)r1.z, c25.y
-mad.f32 r4.x, c2.w, r3.z, r4.x
-mul.f r0.w, r0.w, r0.y
-mul.f r2.w, r2.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r5.z, r5.z
-add.f r0.z, c10.x, (neg)r0.w
-mul.f r5.w, r1.x, r0.w
-add.f r6.x, c10.y, (neg)r2.w
-add.f r6.y, c10.z, (neg)r0.y
-mul.f r6.z, r0.z, r0.z
-mad.f32 r5.w, r1.y, r2.w, r5.w
-mad.f32 r6.z, r6.x, r6.x, r6.z
-mad.f32 r6.w, c8.z, r2.z, c9.z
-mul.f r7.x, c16.y, r2.y
-mul.f r7.y, c16.x, r2.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r5.w, r5.w
-mad.f32 r6.z, r6.y, r6.y, r6.z
-mad.f32 r7.z, r0.x, r0.y, r5.w
-mad.f32 r7.w, r0.x, r0.y, r5.w
-mad.f32 r5.w, r0.x, r0.y, r5.w
-add.f r5.y, r5.y, r6.w
-mad.f32 r6.w, c8.y, r2.y, c9.y
-mad.f32 r8.x, c8.x, r2.x, c9.x
-rsq r6.z, (abs)r6.z
-(ss)mov.f32f32 r6.z, r6.z
-add.f r7.z, r7.z, r7.z
-add.f r7.w, r7.w, r7.w
-add.f r5.w, r5.w, r5.w
-mul.f r0.z, r0.z, r6.z
-mad.f32 r7.z, (neg)r7.z, r1.x, r0.w
-mul.f r0.w, r6.x, r6.z
-mul.f r6.x, r6.y, r6.z
-mul.f r0.z, r1.x, r0.z
-add.f r1.x, r7.z, c25.y
-mad.f32 r0.z, r1.y, r0.w, r0.z
-mad.f32 r1.y, (neg)r7.w, r1.y, r2.w
-mad.f32 r0.y, (neg)r5.w, r0.x, r0.y
-nop
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r1.x, r1.x
-mad.f32 r0.x, r0.x, r6.x, r0.z
-add.f r0.z, r1.y, c25.y
-add.f r0.y, r0.y, c25.z
-mul.f r1.x, c17.z, r2.z
-max.f r0.x, r0.x, c26.x
-mad.f32 r0.z, r0.z, r0.z, r0.w
-add.f r2.z, r7.x, r6.w
-add.f r2.w, r7.y, r8.x
-mad.f32 r0.w, c3.w, r3.w, r4.x
-mad.f32 r4.x, c2.z, r3.z, r4.y
-mad.f32 r4.y, c1.y, r3.y, r5.x
-log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, c7.x
-mov.f32f32 r0.z, r0.z
-mad.f32 r1.x, r5.z, r1.x, r5.y
-mul.f r2.y, c17.y, r2.y
-min.f r5.x, r5.x, c26.z
-mad.f32 r0.y, r0.y, r0.y, r0.z
-mul.f r2.x, c17.x, r2.x
-mad.f32 r0.z, c3.z, r3.w, r4.x
-mul.f r0.x, r5.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.y, r5.z, r2.y, r2.z
-mad.f32 r2.x, r5.z, r2.x, r2.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.z, c2.y, r3.z, r4.y
-mad.f32 r2.w, c1.x, r3.y, r3.x
-mov.f32f32 r3.x, r4.z
-mov.f32f32 r3.y, r4.w
+mad.f32 r0.y, c2.z, r2.z, r1.w
+mul.f r1.y, r1.y, r5.w
+mul.f r1.w, r5.y, r5.w
(rpt1)nop
-exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-rsq r0.y, (abs)r0.y
-(ss)mul.f r4.x, r0.y, c25.x
-(ss)rsq r0.y, (abs)r0.y
-(ss)mul.f r4.y, r0.y, c25.x
-(ss)mad.f32 r0.y, c3.y, r3.w, r2.z
-sel.b32 r0.x, r0.x, r1.z, c26.x
-mad.f32 r4.x, r7.z, r4.x, c25.x
-mad.f32 r4.y, r1.y, r4.y, c25.x
-mad.f32 r3.z, c2.x, r3.z, r2.w
-mov.f32f32 r0.x, r0.x
-mul.f r1.y, r4.x, c21.w
-mul.f r1.z, r4.x, c21.z
-mul.f r2.z, r4.x, c21.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.y, c22.w, r4.y, r1.y
-mad.f32 r4.z, c22.z, r4.y, r1.z
-mad.f32 r2.z, c22.y, r4.y, r2.z
-mad.f32 r1.x, c18.z, r0.x, r1.x
-mad.f32 r1.z, c18.y, r0.x, r2.y
-mad.f32 r0.x, c18.x, r0.x, r2.x
-mad.f32 r2.x, c23.w, r3.x, r1.y
-max.f r1.x, r1.x, c25.y
-max.f r1.y, r1.z, c25.y
-max.f r0.x, r0.x, c25.y
+(ss)add.f r4.x, c10.x, (neg)r1.y
+mul.f r5.y, r1.x, r1.y
+add.f r5.w, c10.y, (neg)r1.w
+add.f r6.x, c10.z, (neg)r1.z
+mul.f r6.y, r4.x, r4.x
+mad.f32 r5.y, r5.x, r1.w, r5.y
+mad.f32 r6.y, r5.w, r5.w, r6.y
+mad.f32 r0.x, c6.z, r0.z, r0.x
+mad.f32 r6.y, r6.x, r6.x, r6.y
+mov.f32f32 r6.z, r5.y
+mad.f32 r0.z, c3.z, r2.w, r0.y
+mad.f32 r0.y, c2.y, r2.z, r5.z
+mad.f32 r2.x, c1.x, r2.y, r2.x
+max.f r2.y, r3.w, c25.y
+mul.f r3.w, c16.z, r3.z
+rsq r5.z, (abs)r6.y
+(ss)mov.f32f32 r6.y, r5.z
+mad.f32 r6.w, r0.x, r1.z, r6.z
+mad.f32 r6.z, r0.x, r1.z, r6.z
+mad.f32 r5.y, r0.x, r1.z, r5.y
+mul.f r4.x, r4.x, r6.y
+add.f r6.w, r6.w, r6.w
+add.f r6.z, r6.z, r6.z
+mul.f r5.w, r5.w, r6.y
+mul.f r4.x, r1.x, r4.x
+mad.f32 r1.x, (neg)r6.w, r1.x, r1.y
+mad.f32 r1.y, (neg)r6.z, r0.x, r1.z
+mad.f32 r1.z, r5.x, r5.w, r4.x
+mul.f r4.x, r6.x, r5.z
+add.f r5.z, r1.x, c25.y
+add.f r1.y, r1.y, c25.z
+add.f r5.y, r5.y, r5.y
+mad.f32 r1.z, r0.x, r4.x, r1.z
+mul.f r4.x, r5.z, r5.z
+mad.f32 r0.x, c10.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r2.w, r0.y
+max.f r1.z, r1.z, c26.x
+mad.f32 r4.y, (neg)r5.y, r5.x, r1.w
+max.f r5.x, r0.x, c26.x
+mul.f r5.y, c16.x, r3.x
+cmps.f.lt r5.z, (neg)r0.x, c25.y
+mad.f32 r0.x, c2.x, r2.z, r2.x
+min.f r1.w, r2.y, c25.z
+log2 r1.z, r1.z
+mov.f32f32 r2.x, c7.x
+add.f r2.y, r4.y, c25.y
+mov.f32f32 r2.z, r5.x
+mul.f r5.w, c16.y, r3.y
+min.f r2.x, r2.x, c26.z
+mad.f32 r2.y, r2.y, r2.y, r4.x
+mad.f32 r4.x, c8.z, r3.z, c9.z
+mad.f32 r6.x, c8.y, r3.y, c9.y
+(ss)mul.f r1.z, r2.x, r1.z
+mad.f32 r1.y, r1.y, r1.y, r2.y
+add.f r2.x, r3.w, r4.x
+add.f r2.y, r5.w, r6.x
+mad.f32 r3.w, c8.x, r3.x, c9.x
+mad.f32 r0.x, c3.x, r2.w, r0.x
+mov.f32f32 r4.x, r4.z
+exp2 r1.z, r1.z
+(ss)sel.b32 r1.z, r1.z, r5.z, c26.x
+add.f r2.w, r5.y, r3.w
+mul.f r3.x, c17.x, r3.x
+rsq r1.y, (abs)r1.y
+(ss)mul.f r3.w, r1.y, c25.x
+mov.f32f32 r5.y, r1.z
+(ss)rsq r1.y, (abs)r1.y
+(ss)mul.f r1.y, r1.y, c25.x
+mul.f r3.z, c17.z, r3.z
+mad.f32 r2.w, r5.x, r3.x, r2.w
+mov.f32f32 r3.x, r5.y
+mul.f r3.y, c17.y, r3.y
+mad.f32 r2.x, r2.z, r3.z, r2.x
+mad.f32 r1.z, c18.x, r1.z, r2.w
+mad.f32 r2.x, c18.z, r3.x, r2.x
+mad.f32 r2.y, r2.z, r3.y, r2.y
+mad.f32 r2.z, r1.x, r3.w, c25.x
+mad.f32 r3.y, r4.y, r1.y, c25.x
+max.f r1.x, r2.x, c25.y
+mad.f32 r1.y, c18.y, r3.x, r2.y
+max.f r2.x, r1.z, c25.y
nop
min.f r1.z, r1.x, c25.z
+max.f r1.y, r1.y, c25.y
+min.f r1.x, r2.x, c25.z
+mul.f r2.x, r2.z, c21.w
+mul.f r2.y, r2.z, c21.z
min.f r1.y, r1.y, c25.z
-min.f r1.x, r0.x, c25.z
-mad.f32 r2.w, c24.w, r3.y, r2.x
-mad.f32 r0.x, c23.z, r3.x, r4.z
-mad.f32 r2.x, c23.y, r3.x, r2.z
-mad.f32 r2.z, c24.z, r3.y, r0.x
-mad.f32 r2.y, c24.y, r3.y, r2.x
-mul.f r2.x, r4.x, c21.x
-mad.f32 r0.x, c3.x, r3.w, r3.z
-mad.f32 r2.x, c22.x, r4.y, r2.x
-nop
-mad.f32 r2.x, c23.x, r3.x, r2.x
-nop
-mad.f32 r2.x, c24.x, r3.y, r2.x
+mad.f32 r2.x, c22.w, r3.y, r2.x
+mad.f32 r2.y, c22.z, r3.y, r2.y
+mad.f32 r2.x, c23.w, r4.x, r2.x
+mov.f32f32 r3.x, r4.w
+mad.f32 r2.y, c23.z, r4.x, r2.y
+mul.f r3.z, r2.z, c21.y
+mul.f r3.w, r2.z, c21.x
+mad.f32 r2.w, c24.w, r3.x, r2.x
+mad.f32 r2.z, c24.z, r3.x, r2.y
+mad.f32 r2.x, c22.y, r3.y, r3.z
+mad.f32 r2.y, c22.x, r3.y, r3.w
+mad.f32 r2.x, c23.y, r4.x, r2.x
+mad.f32 r3.y, c23.x, r4.z, r2.y
+mad.f32 r2.y, c24.y, r3.x, r2.x
+mad.f32 r2.x, c24.x, r4.w, r3.y
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=c,il=20,b=0)
-; VERT: 163 instructions, 0 half, 9 full
+; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=c,il=20,b=0)
+; VERT: 146 instructions, 0 half, 7 full
diff --git a/reference/stk-mines/stk-mines-19.asm b/reference/stk-mines/stk-mines-19.asm
index 6763f4d..c381d21 100644
--- a/reference/stk-mines/stk-mines-19.asm
+++ b/reference/stk-mines/stk-mines-19.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -19,131 +19,120 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mul.f r0.w, r2.x, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.y, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.z, r0.w
-mov.f32f32 r1.y, r3.w
-mad.f32 r0.w, c15.x, r2.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r1.x, c12.x
+mul.f r3.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r1.y, r0.w
+mad.f32 r3.x, c4.y, r0.y, r3.x
+mad.f32 r0.w, c14.x, r1.z, r0.w
+mad.f32 r3.x, c4.z, r0.z, r3.x
+mad.f32 r3.y, c15.x, r1.w, r0.w
+mul.f r0.w, r1.x, c12.y
+mul.f r3.z, r1.x, c12.z
+mul.f r3.w, r1.x, c0.w
+mul.f r4.x, r3.y, r3.y
+mad.f32 r0.w, c13.y, r1.y, r0.w
+mul.f r4.y, r3.x, c10.x
+mad.f32 r0.w, c14.y, r1.z, r0.w
+mul.f r4.z, r0.x, c5.x
+mad.f32 r4.w, c15.y, r1.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r4.z
+mad.f32 r3.z, c13.z, r1.y, r3.z
+mad.f32 r3.w, c1.w, r1.y, r3.w
+mad.f32 r4.x, r4.w, r4.w, r4.x
+mad.f32 r3.z, c14.z, r1.z, r3.z
+mad.f32 r4.z, c5.z, r0.z, r0.w
+mad.f32 r3.z, c15.z, r1.w, r3.z
+mad.f32 r0.w, c2.w, r1.z, r3.w
+mul.f r3.w, r1.x, c0.z
+mul.f r5.x, r1.x, c0.y
+mad.f32 r4.x, r3.z, r3.z, r4.x
+mad.f32 r4.y, c10.y, r4.z, r4.y
mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r3.w, r2.x, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r3.w, c13.y, r2.y, r3.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r3.w, c14.y, r2.z, r3.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r3.w, c15.y, r2.w, r3.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c19.x
+mad.f32 r0.w, c3.w, r1.w, r0.w
+mad.f32 r3.w, c1.z, r1.y, r3.w
+mad.f32 r5.x, c1.y, r1.y, r5.x
+mul.f r1.x, r1.x, c0.x
+rsq r4.x, (abs)r4.x
+(ss)mov.f32f32 r5.y, r4.x
+mul.f r3.z, r3.z, r4.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r3.w, r3.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r2.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r2.y, r1.z
-mad.f32 r4.x, c10.y, r1.y, r4.x
-mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r0.y, c2.z, r1.z, r3.w
+mul.f r3.y, r3.y, r5.y
+mul.f r3.w, r4.w, r5.y
+(rpt1)nop
+add.f r3.y, c10.x, (neg)r3.y
+add.f r3.w, c10.y, (neg)r3.w
+add.f r3.z, c10.z, (neg)r3.z
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r2.w, r1.z
-mov.f32f32 r1.z, r4.x
-mul.f r4.x, r2.x, c0.w
-mul.f r4.y, r2.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r4.x, c1.w, r2.y, r4.x
-mad.f32 r4.y, c1.z, r2.y, r4.y
-mul.f r4.z, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-mul.f r4.w, c16.z, r3.z
+(ss)mul.f r4.x, r3.y, r3.y
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mad.f32 r0.y, r3.w, r3.w, r4.x
+mad.f32 r4.x, c10.z, r0.x, r4.y
+mad.f32 r0.y, r3.z, r3.z, r0.y
+mad.f32 r4.y, c2.y, r1.z, r5.x
+mad.f32 r1.x, c1.x, r1.y, r1.x
+max.f r1.y, r2.w, c19.x
+mul.f r2.w, c16.z, r2.z
+mul.f r4.w, c16.y, r2.y
+mul.f r5.x, c16.x, r2.x
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r5.x, r1.z, c19.x
-cmps.f.lt r1.z, (neg)r1.z, c19.x
-mad.f32 r4.x, c2.w, r2.z, r4.x
-mul.f r0.w, r0.w, r0.y
-mul.f r3.w, r3.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r5.x, r5.x
-add.f r0.z, c10.x, (neg)r0.w
-add.f r3.w, c10.y, (neg)r3.w
-add.f r0.y, c10.z, (neg)r0.y
-mad.f32 r0.w, c8.z, r3.z, c9.z
-mul.f r5.y, r0.z, r0.z
-mul.f r5.z, c16.y, r3.y
-mad.f32 r5.y, r3.w, r3.w, r5.y
-add.f r4.w, r4.w, r0.w
-mad.f32 r0.w, c8.y, r3.y, c9.y
-mul.f r5.w, c16.x, r3.x
-mov.f32f32 r5.y, r5.y
-mul.f r3.z, c17.z, r3.z
-mad.f32 r5.y, r0.y, r0.y, r5.y
-add.f r5.z, r5.z, r0.w
-mad.f32 r6.x, c8.x, r3.x, c9.x
-mad.f32 r0.w, c3.w, r2.w, r4.x
-mad.f32 r4.x, c2.z, r2.z, r4.y
-mad.f32 r4.y, c1.y, r2.y, r4.z
-mad.f32 r2.x, c1.x, r2.y, r2.x
-rsq r2.y, (abs)r5.y
-(ss)mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r5.x, r3.z, r4.w
-mul.f r3.y, c17.y, r3.y
-add.f r4.z, r5.w, r6.x
-mul.f r0.z, r0.z, r2.y
-mul.f r3.w, r3.w, r2.y
-mul.f r0.y, r0.y, r2.y
-mad.f32 r2.y, r5.x, r3.y, r5.z
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.x, r3.x
-mad.f32 r1.y, r1.y, r3.w, r0.z
-mad.f32 r0.z, c3.z, r2.w, r4.x
-mad.f32 r3.x, c2.y, r2.z, r4.y
-mad.f32 r2.x, c2.x, r2.z, r2.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r1.x, r5.x, r1.x, r4.z
-mad.f32 r1.y, r0.x, r0.y, r1.y
-mad.f32 r0.y, c3.y, r2.w, r3.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.x, c7.x
-max.f r1.y, r1.y, c19.x
-(rpt5)nop
-log2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-min.f r2.x, r2.x, c19.z
-(rpt2)nop
-mul.f r1.y, r2.x, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(ss)mov.f32f32 r5.y, r0.y
+(ss)mul.f r0.y, r3.z, r0.y
+max.f r3.z, r4.x, c19.x
+mad.f32 r5.z, c8.x, r2.x, c9.x
+mul.f r3.y, r3.y, r5.y
+mul.f r3.w, r3.w, r5.y
+mov.f32f32 r5.y, r3.z
+mad.f32 r5.w, c8.y, r2.y, c9.y
+mul.f r3.x, r3.x, r3.y
+mad.f32 r3.y, c8.z, r2.z, c9.z
+mad.f32 r3.x, r4.z, r3.w, r3.x
+add.f r3.w, r4.w, r5.w
+mad.f32 r0.x, r0.x, r0.y, r3.x
+add.f r2.w, r2.w, r3.y
+mul.f r0.y, c17.y, r2.y
+add.f r2.y, r5.x, r5.z
+max.f r0.x, r0.x, c19.x
+mul.f r2.z, c17.z, r2.z
+mad.f32 r3.x, r5.y, r0.y, r3.w
+mul.f r2.x, c17.x, r2.x
+cmps.f.lt r3.y, (neg)r4.x, c19.x
+mad.f32 r0.y, c3.y, r1.w, r4.y
+mad.f32 r1.x, c2.x, r1.z, r1.x
+log2 r1.z, r0.x
+mov.f32f32 r3.w, c7.x
+mad.f32 r2.z, r5.y, r2.z, r2.w
+mad.f32 r2.x, r3.z, r2.x, r2.y
+(ss)mad.f32 r0.x, c3.x, r1.w, r1.x
+min.f r1.x, r3.w, c19.z
+min.f r1.w, r1.y, c19.y
+(rpt1)nop
+(ss)mul.f r1.x, r1.x, r1.z
(rpt5)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r1.z, c19.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+exp2 r1.x, r1.x
+(ss)sel.b32 r1.x, r1.x, r3.y, c19.x
(rpt2)nop
+mov.f32f32 r1.y, r1.x
+mad.f32 r1.x, c18.x, r1.x, r2.x
+(rpt1)nop
mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.z, c18.z, r1.y, r3.z
-mad.f32 r2.x, c18.y, r1.y, r2.y
-mad.f32 r1.x, c18.x, r1.y, r1.x
-nop
-max.f r1.y, r1.z, c19.x
-max.f r2.x, r2.x, c19.x
max.f r1.x, r1.x, c19.x
-nop
-min.f r1.z, r1.y, c19.y
-min.f r1.y, r2.x, c19.y
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r2.z
+mad.f32 r1.y, c18.y, r1.y, r3.x
min.f r1.x, r1.x, c19.y
+nop
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 145 instructions, 0 half, 7 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 121 instructions, 0 half, 6 full
diff --git a/reference/stk-mines/stk-mines-20.asm b/reference/stk-mines/stk-mines-20.asm
index 6846c40..10cfae3 100644
--- a/reference/stk-mines/stk-mines-20.asm
+++ b/reference/stk-mines/stk-mines-20.asm
@@ -7,10 +7,10 @@
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r0.w) in8
-@in(r1.x) in9
-@in(r1.y) in10
-@in(r1.z) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@in(r2.y) in12
@in(r2.z) in13
@in(r2.w) in14
@@ -31,147 +31,133 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r1.w, r4.x, c12.x
+@const(c24.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+@const(c25.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r4.x, c12.x
mul.f r2.x, r0.x, c4.x
-mad.f32 r1.w, c13.x, r4.y, r1.w
+mad.f32 r0.w, c13.x, r4.y, r0.w
mad.f32 r2.x, c4.y, r0.y, r2.x
-mad.f32 r1.w, c14.x, r4.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.y, c15.x, r4.w, r1.w
-mov.f32f32 r1.w, r2.x
+mad.f32 r0.w, c14.x, r4.z, r0.w
+mad.f32 r3.y, c4.z, r0.z, r2.x
+mad.f32 r0.w, c15.x, r4.w, r0.w
mul.f r2.x, r4.x, c12.z
-mov.f32f32 r1.z, r1.z
-mul.f r3.z, r3.y, r3.y
-mul.f r3.w, r4.x, c12.y
-mad.f32 r5.x, c4.z, r0.z, r1.w
-mad.f32 r1.w, c13.y, r4.y, r3.w
-max.f r1.z, r1.z, c24.x
-mad.f32 r1.w, c14.y, r4.z, r1.w
-mul.f r3.w, r5.x, c10.x
-mad.f32 r5.y, c15.y, r4.w, r1.w
-mul.f r5.z, r0.x, c5.x
-min.f r1.w, r1.z, c24.y
-mad.f32 r1.z, c13.z, r4.y, r2.x
-mad.f32 r2.x, r5.y, r5.y, r3.z
-mad.f32 r3.z, c5.y, r0.y, r5.z
-mad.f32 r1.z, c14.z, r4.z, r1.z
+mul.f r3.z, r2.y, c20.w
+mul.f r3.w, r2.y, c20.z
+mul.f r5.x, r0.w, r0.w
+mul.f r5.y, r4.x, c12.y
+mul.f r5.z, r3.y, c10.x
+mad.f32 r5.y, c13.y, r4.y, r5.y
+mul.f r5.w, r0.x, c5.x
+mad.f32 r5.y, c14.y, r4.z, r5.y
+mad.f32 r5.w, c5.y, r0.y, r5.w
+mad.f32 r5.y, c15.y, r4.w, r5.y
+mad.f32 r5.w, c5.z, r0.z, r5.w
+mad.f32 r2.x, c13.z, r4.y, r2.x
+mad.f32 r3.z, c21.w, r2.z, r3.z
+mad.f32 r5.x, r5.y, r5.y, r5.x
+mad.f32 r2.x, c14.z, r4.z, r2.x
+mad.f32 r5.z, c10.y, r5.w, r5.z
+mad.f32 r6.x, c15.z, r4.w, r2.x
mul.f r0.x, r0.x, c6.x
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.z, c15.z, r4.w, r1.z
-mov.f32f32 r3.z, r3.z
+mad.f32 r3.z, c22.w, r2.w, r3.z
+mad.f32 r6.y, c21.z, r2.z, r3.w
+mad.f32 r5.x, r6.x, r6.x, r5.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mul.f r0.y, r2.y, c20.w
-mad.f32 r5.z, r1.z, r1.z, r2.x
-mad.f32 r3.z, c5.z, r0.z, r3.z
-absneg.f r2.x, (abs)r1.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c21.w, r2.z, r0.y
-mul.f r5.w, r2.y, c20.z
-mul.f r6.x, r2.y, c20.y
-rsq r5.z, (abs)r5.z
-(ss)mov.f32f32 r5.z, r5.z
-mad.f32 r3.w, c10.y, r3.z, r3.w
-mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.y, c22.w, r2.w, r0.y
-mul.f r0.z, r3.y, r5.z
-mul.f r3.y, r5.y, r5.z
-mul.f r1.z, r1.z, r5.z
-mov.f32f32 r3.w, r3.w
-add.f r0.z, c10.x, (neg)r0.z
-add.f r3.y, c10.y, (neg)r3.y
-add.f r1.z, c10.z, (neg)r1.z
-mad.f32 r5.y, c10.z, r0.x, r3.w
-mul.f r5.z, r0.z, r0.z
-mad.f32 r3.w, c23.w, r3.x, r0.y
-mad.f32 r0.y, r3.y, r3.y, r5.z
-max.f r5.z, r5.y, c25.x
-cmps.f.lt r5.y, (neg)r5.y, c24.x
-mad.f32 r5.w, c21.z, r2.z, r5.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.z, r5.z
-mad.f32 r0.y, r1.z, r1.z, r0.y
-mul.f r6.y, c16.z, r1.y
-mul.f r6.z, c16.y, r1.x
-mul.f r6.w, c16.x, r0.w
-mad.f32 r5.w, c22.z, r2.w, r5.w
-mad.f32 r6.x, c21.y, r2.z, r6.x
+absneg.f r2.x, (abs)r6.x
+mad.f32 r3.w, c23.w, r3.x, r3.z
+mad.f32 r0.y, c22.z, r2.w, r6.y
+mul.f r6.y, r2.y, c20.y
mul.f r2.y, r2.y, c20.x
-rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mad.f32 r7.x, c8.z, r1.y, c9.z
-mad.f32 r7.y, c8.y, r1.x, c9.y
-mad.f32 r7.z, c8.x, r0.w, c9.x
-mul.f r0.z, r0.z, r0.y
-mul.f r3.y, r3.y, r0.y
-mul.f r0.y, r1.z, r0.y
-add.f r1.z, r6.y, r7.x
-mul.f r0.z, r5.x, r0.z
-mul.f r1.y, c17.z, r1.y
-mad.f32 r0.z, r3.z, r3.y, r0.z
-add.f r3.y, r6.z, r7.y
-add.f r5.x, r6.w, r7.z
-mad.f32 r3.z, c23.z, r3.x, r5.w
-mov.f32f32 r0.z, r0.z
-mad.f32 r1.y, r5.z, r1.y, r1.z
-mad.f32 r0.x, r0.x, r0.y, r0.z
-mul.f r0.y, c17.y, r1.x
-mul.f r0.z, c17.x, r0.w
-mad.f32 r0.w, c22.y, r2.w, r6.x
+rsq r3.z, (abs)r5.x
+(ss)mov.f32f32 r5.x, r3.z
+mul.f r6.x, r6.x, r3.z
+mad.f32 r0.x, c6.z, r0.z, r0.x
+mad.f32 r3.z, c23.z, r3.x, r0.y
+mul.f r0.y, r0.w, r5.x
+mul.f r0.z, r5.y, r5.x
+(rpt1)nop
+add.f r0.y, c10.x, (neg)r0.y
+add.f r0.z, c10.y, (neg)r0.z
+add.f r0.w, c10.z, (neg)r6.x
+mad.f32 r5.x, c10.z, r0.x, r5.z
+mul.f r5.y, r0.y, r0.y
+mad.f32 r5.z, c21.y, r2.z, r6.y
+mad.f32 r5.y, r0.z, r0.z, r5.y
+max.f r6.x, r5.x, c25.x
+mad.f32 r5.y, r0.w, r0.w, r5.y
+mul.f r6.y, c16.x, r1.x
+cmps.f.lt r5.x, (neg)r5.x, c24.x
+mad.f32 r5.z, c22.y, r2.w, r5.z
+mad.f32 r2.y, c21.x, r2.z, r2.y
+mul.f r2.z, r4.x, c0.w
+mul.f r6.z, r4.x, c0.z
+rsq r5.y, (abs)r5.y
+(ss)mov.f32f32 r6.w, r5.y
+mul.f r0.w, r0.w, r5.y
+(ss)mov.f32f32 r5.y, r6.x
+mul.f r7.x, c16.y, r1.y
+mul.f r0.y, r0.y, r6.w
+mul.f r0.z, r0.z, r6.w
+mul.f r6.w, c16.z, r1.z
+mad.f32 r7.y, c8.y, r1.y, c9.y
+mul.f r0.y, r3.y, r0.y
+mad.f32 r3.y, c8.z, r1.z, c9.z
+mad.f32 r0.y, r5.w, r0.z, r0.y
+add.f r0.z, r7.x, r7.y
+mad.f32 r0.x, r0.x, r0.w, r0.y
+add.f r0.y, r6.w, r3.y
+mul.f r0.w, c17.y, r1.y
+mad.f32 r1.y, c8.x, r1.x, c9.x
max.f r0.x, r0.x, c25.x
-mad.f32 r1.x, r5.z, r0.y, r3.y
-mad.f32 r1.z, r5.z, r0.z, r5.x
-mad.f32 r3.y, c23.y, r3.x, r0.w
-mad.f32 r0.y, c21.x, r2.z, r2.y
-mul.f r0.z, r4.x, c0.w
-mul.f r0.w, r4.x, c0.z
+mul.f r1.z, c17.z, r1.z
+mad.f32 r5.w, r5.y, r0.w, r0.z
+add.f r0.z, r6.y, r1.y
+mad.f32 r3.y, c23.y, r3.x, r5.z
+mad.f32 r0.w, c22.x, r2.w, r2.y
+mad.f32 r1.y, c1.w, r4.y, r2.z
log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
mov.f32f32 r2.y, c7.x
-mad.f32 r0.y, c22.x, r2.w, r0.y
-mad.f32 r0.z, c1.w, r4.y, r0.z
-mad.f32 r0.w, c1.z, r4.y, r0.w
-min.f r2.y, r2.y, c25.z
-mad.f32 r3.x, c23.x, r3.x, r0.y
-mad.f32 r0.y, c2.w, r4.z, r0.z
-mad.f32 r0.z, c2.z, r4.z, r0.w
-mul.f r0.x, r2.y, r0.x
-mad.f32 r0.w, c3.w, r4.w, r0.y
+mad.f32 r1.z, r5.y, r1.z, r0.y
+mul.f r0.y, c17.x, r1.x
+mad.f32 r3.x, c23.x, r3.x, r0.w
+min.f r0.w, r2.y, c25.z
+mad.f32 r1.x, c2.w, r4.z, r1.y
+mad.f32 r1.y, c1.z, r4.y, r6.z
+mul.f r2.y, r4.x, c0.y
+(ss)mul.f r0.x, r0.w, r0.x
+mad.f32 r0.y, r6.x, r0.y, r0.z
+mad.f32 r0.w, c3.w, r4.w, r1.x
+mad.f32 r0.z, c2.z, r4.z, r1.y
+mad.f32 r1.x, c1.y, r4.y, r2.y
+mul.f r1.y, r4.x, c0.x
+max.f r1.w, r1.w, c24.x
+exp2 r0.x, r0.x
+(ss)sel.b32 r0.x, r0.x, r5.x, c25.x
mad.f32 r0.z, c3.z, r4.w, r0.z
-mul.f r0.y, r4.x, c0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.y, r4.y, r0.y
-mul.f r2.y, r4.x, c0.x
-mad.f32 r0.y, c2.y, r4.z, r0.y
-mad.f32 r2.y, c1.x, r4.y, r2.y
+mad.f32 r1.x, c2.y, r4.z, r1.x
+mad.f32 r1.y, c1.x, r4.y, r1.y
+mov.f32f32 r2.y, r0.x
+mad.f32 r0.x, c18.x, r0.x, r0.y
+mad.f32 r0.y, c3.y, r4.w, r1.x
+mad.f32 r1.y, c2.x, r4.z, r1.y
+mov.f32f32 r1.x, r2.y
+max.f r0.x, r0.x, c24.x
(rpt1)nop
-exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c3.y, r4.w, r0.y
-mad.f32 r2.y, c2.x, r4.z, r2.y
-nop
-sel.b32 r4.x, r0.x, r5.y, c25.x
-mad.f32 r0.x, c3.x, r4.w, r2.y
+mad.f32 r1.z, c18.z, r1.x, r1.z
+mad.f32 r2.y, c18.y, r1.x, r5.w
+min.f r1.x, r0.x, c24.y
+mad.f32 r0.x, c3.x, r4.w, r1.y
+max.f r1.y, r1.z, c24.x
+max.f r2.y, r2.y, c24.x
+(rpt1)nop
+min.f r1.z, r1.y, c24.y
+min.f r1.y, r2.y, c24.y
+min.f r1.w, r1.w, c24.y
mov.f32f32 r2.w, c24.y
mov.f32f32 r2.z, c24.x
-mov.f32f32 r4.x, r4.x
mov.f32f32 r2.y, c24.x
-(rpt1)nop
-mov.f32f32 r4.x, r4.x
-(rpt2)nop
-mad.f32 r1.y, c18.z, r4.x, r1.y
-mad.f32 r1.x, c18.y, r4.x, r1.x
-mad.f32 r1.z, c18.x, r4.x, r1.z
-nop
-max.f r1.y, r1.y, c24.x
-max.f r1.x, r1.x, c24.x
-max.f r4.x, r1.z, c24.x
-nop
-min.f r1.z, r1.y, c24.y
-min.f r1.y, r1.x, c24.y
-min.f r1.x, r4.x, c24.y
end
-nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (3:0) r3.x (5:0)
-; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r2.y (0:0,cm=f,il=20,b=0)
-; VERT: 143 instructions, 0 half, 8 full
+; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r2.y (0:0,cm=f,il=20,b=0)
+; VERT: 127 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-21.asm b/reference/stk-mines/stk-mines-21.asm
index 74062e3..d96960a 100644
--- a/reference/stk-mines/stk-mines-21.asm
+++ b/reference/stk-mines/stk-mines-21.asm
@@ -6,39 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r1.y, r2.z
-mul.f r0.w, r2.y, r1.z
-mul.f r0.z, r2.x, r0.z
-mul.f r0.x, r1.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.y, r1.x
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 35 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/stk-mines/stk-mines-22.asm b/reference/stk-mines/stk-mines-22.asm
index 1435d60..141701a 100644
--- a/reference/stk-mines/stk-mines-22.asm
+++ b/reference/stk-mines/stk-mines-22.asm
@@ -6,275 +6,191 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000
+@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd
+@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
absneg.f r0.w, (neg)c6.x
-mov.f32f32 r1.x, c3.x
-bary.f r1.y, 1, r0.x
-add.f r1.z, r0.z, c3.x
-add.f r1.w, r0.z, c4.x
-add.f r2.x, c7.x, r0.w
-add.f r0.z, r0.z, r1.x
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, c9.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r1.x
-add.f r1.x, r1.y, c3.y
-mov.f32f32 r2.w, r1.z
-add.f r1.z, r1.y, c4.y
-rcp r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.x, r1.x
-bary.f r3.y, 6, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-add.f r0.w, r3.y, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r2.z, r1.x
-mul.f r0.z, r0.w, r1.w
-mov.f32f32 r3.x, r1.z
-add.f r0.w, r1.y, r2.x
-cmps.f.lt r1.x, c6.x, r3.y
-cmps.f.lt r1.y, c7.x, r3.y
-bary.f r1.z, 11, r0.x
-sam (f32)(xyz)r1.w, r2.y, s#0, t#0
-(sy)mad.f32 r2.x, c8.x, r2.x, c8.y
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam (f32)(xyz)r2.z, r2.w, s#1, t#1
-(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y
-mad.f32 r2.z, c8.x, r2.z, c8.y
-mov.f32f32 r2.x, r2.x
-mul.f r3.y, r0.z, c5.w
-mul.f r3.w, r0.z, c5.z
-mul.f r4.x, r0.z, c5.y
-mul.f r2.x, c8.z, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, c8.x, r1.w, c8.y
-mul.f r2.w, c8.z, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, r2.z, r2.z
-mov.f32f32 r4.x, r4.x
-mul.f r4.z, r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.z, r2.x, r2.x, r4.z
-mul.f r4.w, r0.z, c5.x
-add.f r0.z, c10.x, (neg)r0.z
-mad.f32 r3.w, r2.w, r2.w, r3.w
-mov.f32f32 r4.z, r4.z
+bary.f r1.x, 6, r0.x
+mov.f32f32 r1.y, c3.x
+add.f r1.z, r0.z, c4.x
+bary.f r2.x, 1, r0.x
+add.f r2.y, r0.z, c3.x
+add.f r2.w, c7.x, r0.w
+cmps.f.lt r3.x, c6.x, r1.x
+add.f r1.w, r2.x, c4.y
+add.f r2.z, r2.x, c3.y
+cmps.f.lt r3.y, c7.x, r1.x
+add.f r3.z, r0.z, r1.y
+cov.u32f32 r0.z, r3.x
+rcp r1.y, r2.w
+add.f r0.w, r1.x, r0.w
+cov.u32f32 r1.x, r3.y
+sam (f32)(xyz)r3.w, r1.z, s#1, t#1
+(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y
+sam (f32)(xyz)r2.y, r2.y, s#0, t#0
+(sy)mad.f32 r1.w, c8.x, r2.z, c8.y
+(ss)mul.f r0.w, r0.w, r1.y
+mad.f32 r1.y, c8.x, r3.w, c8.y
+mul.f r1.z, c8.z, r1.z
+mul.f r1.w, c8.z, r1.w
+(ss)mov.f32f32 r2.z, r0.w
+mov.f32f32 r3.x, r1.y
+mov.f32f32 r3.y, r1.z
+mov.f32f32 r4.x, r1.w
mad.f32 r2.y, c8.x, r2.y, c8.y
-mov.f32f32 r4.w, r4.w
-mad.f32 r3.x, c8.x, r3.x, c8.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r3.w
mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r4.z, r2.y, r2.y, r4.z
-cov.u32f32 r1.x, r1.x
-cov.u32f32 r1.y, r1.y
-(rpt3)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r3.y, r3.y
+mul.f r4.z, r2.z, c5.w
+mul.f r4.w, r2.z, c5.z
+mul.f r1.y, r1.y, r3.x
+mov.f32f32 r5.x, r2.y
+mad.f32 r1.y, r1.z, r3.y, r1.y
+mad.f32 r1.z, c8.x, r4.y, c8.y
+mul.f r4.y, r2.z, c5.y
+mul.f r2.z, r2.z, c5.x
+mul.f r2.y, r2.y, r5.x
+mov.f32f32 r3.w, r1.z
+mad.f32 r1.w, r1.w, r4.x, r2.y
+mad.f32 r2.y, c8.x, r2.w, c8.y
+add.f r0.w, c10.x, (neg)r0.w
+mov.f32f32 r2.w, r3.w
+cmps.f.ne r0.z, r0.z, c9.x
cmps.f.ne r1.x, r1.x, c9.x
-mul.f r1.w, r1.w, r4.z
-mul.f r2.x, r2.x, r4.z
-mul.f r2.y, r2.y, r4.z
-mad.f32 r4.z, r3.x, r3.x, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r0.w
-cmps.f.ne r0.w, r1.y, c9.x
-mov.f32f32 r1.y, r1.z
-rsq r1.z, r4.z
-(ss)mov.f32f32 r1.z, r1.z
-(ss)bary.f r4.z, 12, r0.x
-bary.f r5.x, 13, r0.x
+mov.f32f32 r3.w, c9.x
+mad.f32 r1.y, r2.w, r2.w, r1.y
+mov.f32f32 r2.w, r2.y
mov.f32f32 r5.y, c9.x
-mad.f32 r1.w, r2.z, r1.z, r1.w
-mad.f32 r2.x, r2.w, r1.z, r2.x
-mad.f32 r1.z, r3.x, r1.z, r2.y
-sam (f32)(xyzw)r2.y, r3.z, s#2, t#2
-(ss)mov.f32f32 r3.z, r4.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r5.x
-mul.f r1.w, r1.w, c8.w
-mul.f r2.x, r2.x, c8.w
-mul.f r1.z, r1.z, c8.w
-nop
-mov.f32f32 r1.w, r1.w
-bary.f r4.z, 8, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.x, c9.x
-mul.f r5.z, r1.w, r4.z
-bary.f r5.w, 9, r0.x
-mul.f r6.x, r4.z, r1.w
-mov.f32f32 r6.y, c5.w
-mov.f32f32 r6.z, c5.z
-mad.f32 r5.z, r2.x, r5.w, r5.z
-mad.f32 r6.x, r5.w, r2.x, r6.x
-mov.f32f32 r6.w, c5.y
+mov.f32f32 r5.z, c9.x
+mov.f32f32 r5.w, c5.w
+mov.f32f32 r6.x, c5.z
+mov.f32f32 r6.y, c5.y
+rsq r1.y, r1.y
+(ss)mov.f32f32 r6.z, r1.y
+mad.f32 r1.w, r2.w, r2.w, r1.w
+add.f r3.w, r2.x, r3.w
+bary.f r2.x, 11, r0.x
+bary.f r2.w, 12, r0.x
+bary.f r6.w, 13, r0.x
+bary.f r7.x, 8, r0.x
+bary.f r7.y, 9, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+(ss)mul.f r1.w, r2.y, r1.w
+sam (f32)(xyzw)r7.w, r3.z, s#2, t#2
bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r5.z
-mov.f32f32 r5.z, r6.x
-mov.f32f32 r6.x, c5.x
-mad.f32 r0.y, r1.z, r0.x, r0.y
-mad.f32 r5.z, r0.x, r1.z, r5.z
-(rpt1)nop
-mul.f r1.w, r0.y, r1.w
-max.f r5.z, r5.z, c9.x
-mul.f r2.x, r0.y, r2.x
-mul.f r0.y, r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r5.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, c8.x, r1.z
-mad.f32 r1.w, c9.y, r1.w, c9.z
-mul.f r2.x, c8.x, r2.x
-mul.f r0.y, c8.x, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-add.f r1.z, r4.z, (neg)r1.z
-(sy)mul.f r3.x, r3.x, r1.w
-mul.f r2.w, r2.w, r1.w
-mul.f r2.z, r2.z, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r7.x, r2.z
-mul.f r7.y, r1.z, r1.z
-add.f r2.x, r5.w, (neg)r2.x
-add.f r4.z, r4.z, r5.x
-add.f r0.x, r0.x, (neg)r0.y
-mul.f r0.y, r2.y, r1.w
-mov.f32f32 r1.w, r2.x
-add.f r2.x, r4.z, r5.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r4.z, r1.w, r1.w, r7.y
+mov.f32f32 r0.y, c5.x
+mul.f r2.y, r5.x, r7.z
+(ss)mul.f r3.z, r4.x, r7.z
+mad.f32 r2.y, r3.x, r6.z, r2.y
+mad.f32 r3.x, r3.y, r6.z, r3.z
+mad.f32 r1.y, r1.z, r1.y, r1.w
+nop
+mul.f r1.z, r2.y, c8.w
+mul.f r1.w, r3.x, c8.w
+mul.f r1.y, r1.y, c8.w
+nop
+mov.f32f32 r2.y, r1.z
+mul.f r1.z, r7.x, r1.z
+mov.f32f32 r3.x, r1.w
+mov.f32f32 r3.y, r1.y
+mul.f r3.z, r2.y, r7.x
+mad.f32 r1.z, r7.y, r1.w, r1.z
+mad.f32 r1.w, r3.x, r7.y, r3.z
+mad.f32 r1.y, r0.x, r1.y, r1.z
+mad.f32 r1.z, r3.y, r0.x, r1.w
(rpt2)nop
-mov.f32f32 r4.z, r4.z
-nop
-mad.f32 r4.z, r0.x, r0.x, r4.z
-(rpt5)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
+mul.f r1.w, r1.z, r2.y
+max.f r1.y, r1.y, c9.x
+mul.f r2.y, r1.z, r3.x
+mul.f r1.z, r1.z, r3.y
+mul.f r1.w, c8.x, r1.w
+mad.f32 r1.y, c9.y, r1.y, c9.z
+mul.f r2.y, c8.x, r2.y
+mul.f r1.z, c8.x, r1.z
+add.f r1.w, r7.x, (neg)r1.w
+mov.f32f32 r3.x, r1.y
+add.f r2.y, r7.y, (neg)r2.y
+add.f r0.x, r0.x, (neg)r1.z
+mov.f32f32 r1.z, r1.w
+(sy)mul.f r3.y, r8.z, r3.x
+mov.f32f32 r3.z, r2.y
+mov.f32f32 r3.w, r0.x
+mul.f r1.w, r1.w, r1.z
+add.f r4.x, r3.y, r5.z
+mad.f32 r1.w, r2.y, r3.z, r1.w
+mul.f r2.y, r8.y, r3.x
+mad.f32 r1.w, r3.w, r3.w, r1.w
+add.f r3.w, r4.x, r5.y
+mul.f r3.x, r8.x, r3.x
+mul.f r1.y, r7.w, r1.y
(rpt2)nop
-mul.f r1.z, r1.z, r4.z
-mul.f r1.w, r1.w, r4.z
-mul.f r0.x, r0.x, r4.z
-nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r4.x, r1.w
+mul.f r0.x, r0.x, r1.w
+(rpt1)nop
+mul.f r1.z, r1.z, r4.x
+(ss)mul.f r1.w, r3.z, r4.x
+(rpt1)nop
+mul.f r1.z, r1.z, r2.x
nop
-mul.f r1.y, r1.z, r1.y
+mad.f32 r1.z, r1.w, r2.w, r1.z
nop
-mad.f32 r1.y, r1.w, r3.z, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-nop
-mad.f32 r0.x, r0.x, r3.w, r1.y
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.x, r6.w, r1.z
(rpt2)nop
max.f r0.x, r0.x, c9.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
mov.f32f32 r1.z, r0.x
-cmps.f.lt r0.x, c9.x, r0.x
-(rpt1)nop
-mul.f r1.y, r1.y, r1.z
-cov.u32f32 r0.x, r0.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.z
+cmps.f.lt r1.z, c9.x, r1.z
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-cmps.f.ne r0.x, r0.x, c9.x
+mov.f32f32 r1.w, r0.x
+cov.u32f32 r1.z, r1.z
(rpt1)nop
-mul.f r1.y, r1.y, r1.y
-sel.b32 r1.z, r2.x, r0.x, r3.x
+mul.f r0.x, r0.x, r1.w
+cmps.f.ne r1.z, r1.z, c9.x
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.z
+mov.f32f32 r1.w, r0.x
+sel.b32 r2.x, r3.w, r1.z, r3.y
+mul.f r0.x, r0.x, c9.w
+nop
+mul.f r1.w, r1.w, r1.w
+mad.f32 r2.w, r0.w, r2.x, r4.z
+add.f r3.y, r2.y, r0.x
+add.f r3.z, r3.x, r0.x
+mul.f r1.w, r1.w, c9.z
+add.f r0.x, r1.y, r0.x
+sel.b32 r2.x, r2.w, r0.z, r2.x
+nop
+mov.f32f32 r2.w, r1.w
+add.f r0.x, r0.x, r1.w
+sel.b32 r1.w, r5.w, r1.x, r2.x
+nop
+add.f r2.x, r3.y, r2.w
+add.f r2.w, r3.z, r2.w
+sel.b32 r0.x, r0.x, r1.z, r1.y
+nop
+sel.b32 r1.y, r2.x, r1.z, r2.y
+sel.b32 r1.z, r2.w, r1.z, r3.x
+mad.f32 r2.x, r0.w, r0.x, r2.z
+nop
+mad.f32 r2.y, r0.w, r1.y, r4.w
+mad.f32 r0.w, r0.w, r1.z, r4.y
+sel.b32 r0.x, r2.x, r0.z, r0.x
+nop
+sel.b32 r1.y, r2.y, r0.z, r1.y
+sel.b32 r0.z, r0.w, r0.z, r1.z
(rpt1)nop
-mul.f r2.x, r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-mul.f r1.y, r1.y, c9.w
-nop
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, r0.z, r1.w, r3.y
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r3.y, r1.y
-mul.f r2.x, r2.x, c9.z
-sel.b32 r1.z, r1.w, r1.x, r1.z
-add.f r1.w, r5.z, r3.x
-add.f r3.x, r7.x, r3.y
-mov.f32f32 r2.x, r2.x
-sel.b32 r1.z, r6.y, r0.w, r1.z
-mov.f32f32 r1.y, r1.y
-nop
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, r2.x
-add.f r1.y, r2.y, r1.y
-add.f r2.y, r1.w, r3.y
-add.f r3.x, r3.x, r3.z
-mov.f32f32 r1.w, r1.z
-nop
-sel.b32 r1.z, r2.y, r0.x, r2.w
-sel.b32 r2.y, r3.x, r0.x, r2.z
-add.f r1.y, r1.y, r2.x
-nop
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r2.z, r2.y
-sel.b32 r0.x, r1.y, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r2.x
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r2.x, r0.x
-nop
-mad.f32 r0.y, r0.z, r0.y, r4.y
-mad.f32 r1.y, r0.z, r1.y, r4.x
-mov.f32f32 r2.x, r2.x
-nop
-sel.b32 r0.y, r0.y, r1.x, r1.z
-sel.b32 r1.y, r1.y, r1.x, r2.y
-mad.f32 r0.z, r0.z, r2.x, r4.w
+sel.b32 r1.z, r6.x, r1.x, r1.y
+sel.b32 r1.y, r6.y, r1.x, r0.z
+sel.b32 r1.x, r0.y, r1.x, r0.x
+end
nop
-sel.b32 r0.y, r6.z, r0.w, r0.y
-sel.b32 r1.y, r6.w, r0.w, r1.y
-sel.b32 r0.x, r0.z, r1.x, r0.x
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r6.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
-end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
-; FRAG: 297 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
+; FRAG: 193 instructions, 0 half, 9 full
diff --git a/reference/stk-mines/stk-mines-23.asm b/reference/stk-mines/stk-mines-23.asm
index 2d84780..726bef5 100644
--- a/reference/stk-mines/stk-mines-23.asm
+++ b/reference/stk-mines/stk-mines-23.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r4.z) in0
+@in(r4.w) in1
+@in(r5.x) in2
+@in(r5.y) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r5.x) in8
-@in(r5.y) in9
-@in(r5.z) in10
-@in(r5.w) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,191 +31,144 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.w, c1.x
-mul.f r2.x, c8.x, r1.x
+mul.f r2.x, c8.x, r4.z
mov.f32f32 r2.y, c1.y
mov.f32f32 r2.z, c1.z
mul.f r2.w, r0.w, r0.x
mov.f32f32 r3.x, c2.x
-mad.f32 r2.x, c9.x, r1.y, r2.x
+mad.f32 r2.x, c9.x, r4.w, r2.x
mul.f r3.y, r0.w, r0.w
-mad.f32 r2.x, c10.x, r1.z, r2.x
+mad.f32 r2.x, c10.x, r5.x, r2.x
mad.f32 r2.w, r3.x, r0.y, r2.w
-mad.f32 r2.x, c11.x, r1.w, r2.x
-mad.f32 r3.x, r2.y, r2.y, r3.y
-mul.f r3.y, r2.y, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, c3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
+mov.f32f32 r3.x, c3.x
+mad.f32 r2.x, c11.x, r5.y, r2.x
+mad.f32 r3.y, r2.y, r2.y, r3.y
+mul.f r3.z, r2.y, r0.x
+mad.f32 r2.w, r3.x, r0.z, r2.w
+mov.f32f32 r3.x, r2.x
+mad.f32 r3.y, r2.z, r2.z, r3.y
mov.f32f32 r3.w, c2.y
-mad.f32 r2.w, r3.z, r0.z, r2.w
-mul.f r3.z, r2.x, r2.x
-mul.f r4.x, c8.y, r1.x
-mad.f32 r3.x, r2.z, r2.z, r3.x
-mov.f32f32 r4.y, r2.w
-mad.f32 r2.w, c9.y, r1.y, r4.x
-mad.f32 r3.y, r3.w, r0.y, r3.y
-mul.f r0.x, r2.z, r0.x
-mul.f r3.w, r4.y, r4.y
-mad.f32 r2.w, c10.y, r1.z, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, c3.y
-mad.f32 r2.w, c11.y, r1.w, r2.w
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.z, c2.z
-mad.f32 r3.y, r4.x, r0.z, r3.y
mov.f32f32 r4.x, r2.w
-mul.f r0.w, r0.w, r3.x
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.w, r4.x, r4.x, r3.z
+mul.f r3.x, r3.x, r3.x
+mul.f r4.y, c8.y, r4.z
+mad.f32 r3.z, r3.w, r0.y, r3.z
+mul.f r2.w, r2.w, r4.x
+mov.f32f32 r3.w, c3.y
+mad.f32 r4.y, c9.y, r4.w, r4.y
+rsq r3.y, r3.y
+(ss)mov.f32f32 r5.z, r3.y
+mad.f32 r4.y, c10.y, r5.x, r4.y
+mad.f32 r3.z, r3.w, r0.z, r3.z
+mad.f32 r3.w, c11.y, r5.y, r4.y
+mul.f r0.w, r0.w, r5.z
+mul.f r2.y, r2.y, r5.z
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r5.z, r3.w
+mul.f r5.w, c0.x, r0.w
mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r3.y, r3.y, r3.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, c8.z, r1.x
-mul.f r4.w, c0.x, r0.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r0.x, r4.z, r0.y, r0.x
-mad.f32 r0.y, c9.z, r1.y, r3.w
-mad.f32 r3.w, c0.y, r2.y, r4.w
-mad.f32 r0.y, c10.z, r1.z, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, c3.z
-mad.f32 r0.y, c11.z, r1.w, r0.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.z, r2.z, r3.x
-mad.f32 r0.x, r4.z, r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, c4.w, r1.x
-mul.f r3.x, c4.z, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.w, r0.y, r0.y, r2.w
+mad.f32 r2.w, r3.z, r4.y, r2.w
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r3.z, c2.z
+mad.f32 r3.x, r3.w, r5.z, r3.x
+mul.f r3.w, c8.z, r4.z
+mad.f32 r5.w, c0.y, r2.y, r5.w
+mad.f32 r0.x, r3.z, r0.y, r0.x
+mov.f32f32 r0.y, c3.z
+mad.f32 r3.z, c9.z, r4.w, r3.w
+mul.f r2.z, r2.z, r3.y
+(ss)mad.f32 r3.y, c10.z, r5.x, r3.z
+mad.f32 r0.x, r0.y, r0.z, r0.x
+mad.f32 r0.y, c11.z, r5.y, r3.y
+mad.f32 r3.y, c0.z, r2.z, r5.w
+mov.f32f32 r0.z, r2.y
+mov.f32f32 r2.y, r0.x
+mov.f32f32 r3.z, r0.y
+mov.f32f32 r3.w, r3.y
mov.f32f32 r2.z, r2.z
-mad.f32 r0.z, c5.w, r1.y, r0.z
-mad.f32 r3.z, r0.x, r0.x, r3.z
-mad.f32 r0.z, c6.w, r1.z, r0.z
-mad.f32 r3.x, c5.z, r1.y, r3.x
-mul.f r4.z, c4.y, r1.x
-mul.f r1.x, c4.x, r1.x
-mad.f32 r0.z, c7.w, r1.w, r0.z
-mad.f32 r3.x, c6.z, r1.z, r3.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-rsq r2.w, r2.w
-(ss)mov.f32f32 r4.w, r2.w
-mad.f32 r3.w, c0.z, r2.z, r3.w
-(ss)mov.f32f32 r2.w, r0.z
-mul.f r0.x, r0.x, r3.z
-mul.f r2.x, r2.x, r4.w
-mul.f r4.y, r4.y, r3.z
-mul.f r3.y, r3.y, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r3.y, r3.y
-mul.f r4.y, r0.x, r2.y
-absneg.f r2.x, (neg)r2.x
-mul.f r6.x, c0.x, r3.z
-mad.f32 r4.y, r3.y, r2.z, (neg)r4.y
-mad.f32 r6.x, c0.y, r3.y, r6.x
-mov.f32f32 r6.y, r2.x
-mul.f r2.x, r3.z, r2.z
-mov.f32f32 r2.z, r4.y
-mov.f32f32 r4.y, r6.x
-mul.f r6.x, r6.y, r6.y
-mul.f r4.x, r4.x, r4.w
-mul.f r2.z, c0.x, r2.z
-mad.f32 r2.x, r0.x, r0.w, (neg)r2.x
-mad.f32 r0.x, c0.z, r0.x, r4.y
+mad.f32 r2.y, r2.y, r2.y, r2.w
+mad.f32 r0.y, r0.y, r3.z, r3.x
+mul.f r3.x, r3.w, r3.w
+mul.f r2.w, c4.w, r4.z
+mul.f r3.w, c4.z, r4.z
+mul.f r5.w, c4.y, r4.z
+mul.f r4.z, c4.x, r4.z
+rsq r2.y, r2.y
+(ss)mov.f32f32 r6.x, r2.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r2.x, r0.y
+mul.f r0.x, r0.x, r2.y
+mul.f r2.x, r4.y, r6.x
+mul.f r2.y, r5.z, r6.y
+mul.f r4.x, r4.x, r6.x
+absneg.f r4.y, (neg)r0.y
+mov.f32f32 r0.y, r2.x
+mov.f32f32 r5.z, r0.x
+absneg.f r2.y, (neg)r2.y
+mul.f r6.x, c0.x, r4.x
mov.f32f32 r4.x, r4.x
-mul.f r0.w, r3.y, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-absneg.f r3.y, (neg)r4.x
-mad.f32 r0.w, r3.z, r2.y, (neg)r0.w
-mad.f32 r2.x, c0.y, r2.x, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.x, r3.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.w
-mul.f r0.y, r0.y, r4.w
-mad.f32 r0.w, c0.z, r0.w, r2.x
-mad.f32 r2.x, r4.x, r4.x, r6.x
-mul.f r2.y, r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c7.z, r1.w, r3.x
-mad.f32 r3.z, c5.y, r1.y, r4.z
-mov.f32f32 r3.w, r0.w
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r2.z, r3.x
-mad.f32 r0.w, c6.y, r1.z, r3.z
-mad.f32 r2.y, r3.w, r3.w, r2.y
-mov.f32f32 r3.z, r0.y
-mad.f32 r0.y, c7.y, r1.w, r0.w
-mad.f32 r0.w, c5.x, r1.y, r1.x
-mov.f32f32 r1.x, r2.y
-mad.f32 r1.y, r3.z, r3.z, r2.x
-mad.f32 r1.x, r0.x, r0.x, r1.x
+mul.f r6.z, r5.z, r0.z
+mov.f32f32 r6.w, r2.y
+mad.f32 r6.z, r0.y, r2.z, (neg)r6.z
+mov.f32f32 r7.x, r4.y
+mad.f32 r2.x, c0.y, r2.x, r6.x
+mul.f r2.z, r4.x, r2.z
+mul.f r6.x, c0.x, r6.z
+mad.f32 r2.z, r5.z, r0.w, (neg)r2.z
+mul.f r5.z, r7.x, r7.x
+mad.f32 r0.x, c0.z, r0.x, r2.x
+mad.f32 r2.x, r2.y, r6.w, r5.z
+mad.f32 r2.y, c0.y, r2.z, r6.x
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r3.z, r6.y
+mad.f32 r0.y, r4.x, r0.z, (neg)r0.y
+mov.f32f32 r0.z, r0.x
+mad.f32 r2.z, c5.w, r4.w, r2.w
+mad.f32 r2.w, c5.z, r4.w, r3.w
+mad.f32 r0.y, c0.z, r0.y, r2.y
+absneg.f r2.y, (neg)r0.w
+mov.f32f32 r3.z, r0.z
+mad.f32 r0.z, c6.w, r5.x, r2.z
+mov.f32f32 r2.z, r0.y
+mov.f32f32 r4.x, r2.y
+mad.f32 r0.w, c7.w, r5.y, r0.z
+mad.f32 r0.z, c6.z, r5.x, r2.w
+mov.f32f32 r5.z, r2.z
+mad.f32 r2.x, r2.y, r4.x, r2.x
+mov.f32f32 r2.w, r0.w
+mad.f32 r0.z, c7.z, r5.y, r0.z
+mad.f32 r0.y, r0.y, r5.z, r3.x
+mad.f32 r2.y, c5.y, r4.w, r5.w
+mad.f32 r0.x, r0.x, r3.z, r0.y
+mov.f32f32 r2.z, r0.z
+mad.f32 r0.y, c6.y, r5.x, r2.y
+mad.f32 r2.y, c5.x, r4.w, r4.z
+mad.f32 r0.y, c7.y, r5.y, r0.y
+mad.f32 r4.z, c6.x, r5.x, r2.y
+nop
+rsq r0.x, r0.x
+(ss)mov.f32f32 r2.y, r0.x
+mul.f r3.x, r3.y, r0.x
+(ss)rsq r0.x, r2.x
+(ss)mov.f32f32 r2.x, r0.x
+mul.f r3.w, r4.y, r0.x
+mul.f r3.z, r3.z, r2.y
+mul.f r3.y, r5.z, r2.y
+mul.f r4.y, r4.x, r2.x
+mul.f r4.x, r6.w, r2.x
mov.f32f32 r2.y, r0.y
-mad.f32 r1.z, c6.x, r1.z, r0.w
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c7.x, r1.w, r1.z
-rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, r0.x, r1.x
-mul.f r3.x, r3.w, r1.x
-mul.f r1.x, r3.y, r1.x
-mul.f r3.y, r3.z, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r3.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
+mad.f32 r0.x, c7.x, r5.y, r4.z
+mov.f32f32 r4.z, r4.y
+mov.f32f32 r4.w, (0.000000)
nop
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r1.x
-mov.f32f32 r0.x, r4.y
-mul.f r1.x, r4.x, r1.y
-mul.f r1.y, r6.y, r1.y
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r1.z
-mov.f32f32 r4.w, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r1.w, r5.w
-mov.f32f32 r1.z, r5.z
-mov.f32f32 r4.x, r1.x
-mov.f32f32 r1.y, r5.y
-mov.f32f32 r1.x, r5.x
+mov.f32f32 r2.x, r0.x
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0)
-; VERT: 183 instructions, 0 half, 7 full
+; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0)
+; VERT: 133 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-24.asm b/reference/stk-mines/stk-mines-24.asm
index 68f0c23..3d55b19 100644
--- a/reference/stk-mines/stk-mines-24.asm
+++ b/reference/stk-mines/stk-mines-24.asm
@@ -6,51 +6,36 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c4.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 8, r0.x
-bary.f r1.x, 11, r0.x
-bary.f r1.y, 9, r0.x
+bary.f r1.x, 9, r0.x
+bary.f r1.y, 11, r0.x
mad.f32 r0.z, c1.x, r0.z, c1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
+bary.f r1.z, 3, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
max.f r0.z, r0.z, c4.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r2.x, r1.x
-mov.f32f32 r1.w, r1.y
-min.f r0.z, r0.z, c4.x
-bary.f r0.w, 3, r0.x
-bary.f r1.x, 2, r0.x
-bary.f r1.y, 1, r0.x
-add.f r2.y, c4.x, (neg)r0.z
bary.f (ei)r0.x, 0, r0.x
nop
-sam.p (f32)(xyzw)r2.z, r1.z, s#0, t#0
-(sy)mul.f r0.y, r0.w, r3.y
-mul.f r0.w, c2.z, r2.y
-(ss)mul.f r1.z, c2.y, r2.y
-mul.f r1.w, c2.x, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r1.x, r3.x, r1.x
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, r1.x, r0.z, r0.w
-mul.f r1.x, r2.w, r1.y
+sam.p (f32)(xyzw)r2.z, r0.w, s#0, t#0
+(sy)mul.f r1.w, r1.z, r3.y
+min.f r0.y, r0.z, c4.x
+mul.f r0.z, r3.x, r2.x
+(ss)mul.f r0.w, r2.w, r2.y
mul.f r0.x, r2.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r0.w
-mad.f32 r0.w, r1.x, r0.z, r2.x
-mad.f32 r0.x, r0.x, r0.z, r1.w
-mov.f32f32 r1.w, r0.y
+add.f r1.x, c4.x, (neg)r0.y
+(rpt2)nop
+mul.f r1.y, c2.z, r1.x
+mul.f r2.x, c2.y, r1.x
+mad.f32 r1.z, r0.z, r0.y, r1.y
+mad.f32 r1.y, r0.w, r0.y, r2.x
+mul.f r0.z, c2.x, r1.x
nop
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.x, r0.x, r0.y, r0.z
end
nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.y (5:0,cm=f,il=16,b=1)
-; FRAG: 42 instructions, 0 half, 4 full
+; FRAG: inputs: r0.w (1:0,cm=f,il=8,b=1) r1.x (3:0,cm=f,il=12,b=1) r0.z (5:0,cm=f,il=16,b=1)
+; FRAG: 29 instructions, 0 half, 4 full
diff --git a/reference/stk-mines/stk-mines-25.asm b/reference/stk-mines/stk-mines-25.asm
index ff486a7..d610a0c 100644
--- a/reference/stk-mines/stk-mines-25.asm
+++ b/reference/stk-mines/stk-mines-25.asm
@@ -6,39 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
-bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 1, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt4)nop
-sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0
-(sy)mul.f r0.w, r0.w, r1.y
-mul.f r0.z, r0.z, r1.z
-mul.f r0.x, r0.y, r0.x
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt2)nop
+sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)mul.f r1.z, r1.z, r2.x
+mul.f r1.y, r1.y, r2.y
+(ss)mul.f r1.x, r1.x, r0.x
end
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
+; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 14 instructions, 0 half, 3 full
diff --git a/reference/stk-mines/stk-mines-26.asm b/reference/stk-mines/stk-mines-26.asm
index aadaff7..5f20141 100644
--- a/reference/stk-mines/stk-mines-26.asm
+++ b/reference/stk-mines/stk-mines-26.asm
@@ -1,20 +1,20 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r3.x) in0
+@in(r3.y) in1
+@in(r3.z) in2
+@in(r3.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r0.w) in8
-@in(r1.x) in9
-@in(r1.y) in10
-@in(r1.z) in11
-@in(r3.x) in12
-@in(r3.y) in13
-@in(r3.z) in14
-@in(r3.w) in15
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
+@in(r2.x) in12
+@in(r2.y) in13
+@in(r2.z) in14
+@in(r2.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,135 +27,120 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r1.w, r2.x, c13.x
+@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r3.x, c13.x
mul.f r4.x, r0.x, c4.x
-mad.f32 r1.w, c14.x, r2.y, r1.w
+mad.f32 r0.w, c14.x, r3.y, r0.w
mad.f32 r4.x, c4.y, r0.y, r4.x
-mad.f32 r1.w, c15.x, r2.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r4.y, c16.x, r2.w, r1.w
-mov.f32f32 r1.w, r4.x
-mov.f32f32 r4.x, c10.z
+mad.f32 r0.w, c15.x, r3.z, r0.w
+mad.f32 r4.x, c4.z, r0.z, r4.x
+mad.f32 r0.w, c16.x, r3.w, r0.w
+mov.f32f32 r4.y, c10.z
mov.f32f32 r4.z, c10.y
-mul.f r4.w, r4.y, r4.y
-mul.f r5.x, r2.x, c13.y
-mad.f32 r5.y, c4.z, r0.z, r1.w
-mad.f32 r1.w, c14.y, r2.y, r5.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.w, c15.y, r2.z, r1.w
-mul.f r5.x, r5.y, c11.x
-mad.f32 r5.z, c16.y, r2.w, r1.w
-mul.f r1.w, r0.x, c5.x
-max.f r1.z, r1.z, c20.x
-mad.f32 r4.x, c8.z, r4.x, c9.z
-mad.f32 r4.w, r5.z, r5.z, r4.w
-mad.f32 r5.w, c5.y, r0.y, r1.w
-min.f r1.w, r1.z, c20.y
-add.f r1.z, c17.z, r4.x
-mov.f32f32 r4.x, r4.w
-mul.f r4.w, r2.x, c13.z
-mov.f32f32 r5.w, r5.w
-mad.f32 r4.w, c14.z, r2.y, r4.w
+mov.f32f32 r4.w, c10.x
+mul.f r5.x, r0.w, r0.w
+mul.f r5.y, r3.x, c13.y
+mul.f r5.z, r4.x, c11.x
+mad.f32 r5.y, c14.y, r3.y, r5.y
+mul.f r5.w, r0.x, c5.x
+mad.f32 r5.y, c15.y, r3.z, r5.y
+mad.f32 r5.w, c5.y, r0.y, r5.w
+mad.f32 r5.y, c16.y, r3.w, r5.y
mad.f32 r5.w, c5.z, r0.z, r5.w
-mad.f32 r4.w, c15.z, r2.z, r4.w
+mad.f32 r4.y, c8.z, r4.y, c9.z
mad.f32 r4.z, c8.y, r4.z, c9.y
-mad.f32 r4.w, c16.z, r2.w, r4.w
-mad.f32 r5.x, c11.y, r5.w, r5.x
-mov.f32f32 r6.x, c10.x
+mad.f32 r5.x, r5.y, r5.y, r5.x
+mul.f r6.x, r3.x, c13.z
+mad.f32 r5.z, c11.y, r5.w, r5.z
+mad.f32 r6.x, c14.z, r3.y, r6.x
mul.f r0.x, r0.x, c6.x
-mad.f32 r4.x, r4.w, r4.w, r4.x
-mov.f32f32 r5.x, r5.x
+mad.f32 r6.x, c15.z, r3.z, r6.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-add.f r0.y, c17.y, r4.z
-mad.f32 r4.z, c8.x, r6.x, c9.x
-mul.f r6.x, r2.x, c0.w
-mul.f r6.y, r2.x, c0.z
-rsq r4.x, (abs)r4.x
-(ss)mov.f32f32 r4.x, r4.x
-mov.f32f32 r0.x, r0.x
-add.f r4.z, c17.x, r4.z
+mad.f32 r0.y, c16.z, r3.w, r6.x
mad.f32 r0.x, c6.z, r0.z, r0.x
-mul.f r0.z, r4.y, r4.x
-mul.f r4.y, r5.z, r4.x
-mul.f r4.x, r4.w, r4.x
-mad.f32 r4.w, c11.z, r0.x, r5.x
-add.f r0.z, c11.x, (neg)r0.z
-add.f r4.y, c11.y, (neg)r4.y
-add.f r4.x, c11.z, (neg)r4.x
-max.f r5.x, r4.w, c20.x
-mul.f r5.z, r0.z, r0.z
-cmps.f.lt r4.w, (neg)r4.w, c20.x
-mad.f32 r5.z, r4.y, r4.y, r5.z
-mov.f32f32 r5.x, r5.x
-mul.f r1.x, c18.y, r1.x
-mul.f r0.w, c18.x, r0.w
-mov.f32f32 r5.z, r5.z
-mul.f r1.y, c18.z, r1.y
-mad.f32 r5.z, r4.x, r4.x, r5.z
-mad.f32 r1.x, r5.x, r1.x, r0.y
-mad.f32 r4.z, r5.x, r0.w, r4.z
-mad.f32 r0.y, c1.w, r2.y, r6.x
-mad.f32 r0.w, c1.z, r2.y, r6.y
-mul.f r6.x, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-rsq r5.z, (abs)r5.z
-(ss)mov.f32f32 r5.z, r5.z
-mad.f32 r1.y, r5.x, r1.y, r1.z
-mad.f32 r0.y, c2.w, r2.z, r0.y
-mad.f32 r1.z, c2.z, r2.z, r0.w
-mul.f r0.z, r0.z, r5.z
-mul.f r4.y, r4.y, r5.z
-mul.f r4.x, r4.x, r5.z
-mad.f32 r0.w, c3.w, r2.w, r0.y
-mul.f r0.y, r5.y, r0.z
-mad.f32 r0.z, c3.z, r2.w, r1.z
-mad.f32 r0.y, r5.w, r4.y, r0.y
-mad.f32 r1.z, c1.y, r2.y, r6.x
-mad.f32 r2.x, c1.x, r2.y, r2.x
-mov.f32f32 r2.y, c7.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c2.y, r2.z, r1.z
-mad.f32 r0.x, r0.x, r4.x, r0.y
-mad.f32 r0.y, c3.y, r2.w, r1.z
-mad.f32 r1.z, c2.x, r2.z, r2.x
-min.f r4.x, r2.y, c20.z
-max.f r4.y, r0.x, c20.x
-mad.f32 r0.x, c3.x, r2.w, r1.z
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
-nop
-log2 r1.z, r4.y
-(ss)mov.f32f32 r1.z, r1.z
-(rpt2)nop
-mul.f r1.z, r4.x, r1.z
+add.f r0.z, c17.z, r4.y
+add.f r4.y, c17.y, r4.z
+mad.f32 r4.z, r0.y, r0.y, r5.x
+mad.f32 r5.x, c11.z, r0.x, r5.z
+mad.f32 r4.w, c8.x, r4.w, c9.x
+mul.f r5.z, r3.x, c0.w
+mul.f r6.x, r3.x, c0.z
+mul.f r6.y, r3.x, c0.y
+mul.f r3.x, r3.x, c0.x
+rsq r4.z, (abs)r4.z
+(ss)mov.f32f32 r6.z, r4.z
+mul.f r0.y, r0.y, r4.z
+(ss)max.f r4.z, r5.x, c20.x
+add.f r4.w, c17.x, r4.w
+mul.f r0.w, r0.w, r6.z
+mul.f r5.y, r5.y, r6.z
+(rpt1)nop
+add.f r0.w, c11.x, (neg)r0.w
+add.f r5.y, c11.y, (neg)r5.y
+add.f r0.y, c11.z, (neg)r0.y
+mov.f32f32 r6.z, r4.z
+mul.f r6.w, r0.w, r0.w
+mul.f r1.z, c18.z, r1.z
+mad.f32 r6.w, r5.y, r5.y, r6.w
+mul.f r1.y, c18.y, r1.y
+mad.f32 r6.w, r0.y, r0.y, r6.w
+mad.f32 r1.z, r6.z, r1.z, r0.z
+mul.f r0.z, c18.x, r1.x
+cmps.f.lt r1.x, (neg)r5.x, c20.x
+mad.f32 r5.x, c1.w, r3.y, r5.z
+mad.f32 r5.z, c1.z, r3.y, r6.x
+mad.f32 r6.x, c1.y, r3.y, r6.y
+rsq r6.y, (abs)r6.w
+(ss)mov.f32f32 r6.w, r6.y
+mul.f r0.y, r0.y, r6.y
+mad.f32 r1.y, r6.z, r1.y, r4.y
+mad.f32 r4.y, r4.z, r0.z, r4.w
+mul.f r0.z, r0.w, r6.w
+mul.f r4.z, r5.y, r6.w
+mad.f32 r0.w, c2.w, r3.z, r5.x
+mad.f32 r4.w, c2.z, r3.z, r5.z
+mul.f r0.z, r4.x, r0.z
+mad.f32 r0.w, c3.w, r3.w, r0.w
+mad.f32 r4.x, r5.w, r4.z, r0.z
+mad.f32 r0.z, c3.z, r3.w, r4.w
+mad.f32 r0.x, r0.x, r0.y, r4.x
+mad.f32 r0.y, c2.y, r3.z, r6.x
+mad.f32 r3.x, c1.x, r3.y, r3.x
+max.f r1.w, r1.w, c20.x
+max.f r0.x, r0.x, c20.x
+mad.f32 r0.y, c3.y, r3.w, r0.y
+mad.f32 r3.x, c2.x, r3.z, r3.x
+min.f r1.w, r1.w, c20.y
+mov.f32f32 r3.y, c7.x
+(rpt1)nop
+log2 r3.z, r0.x
+(ss)mad.f32 r0.x, c3.x, r3.w, r3.x
+min.f r3.x, r3.y, c20.z
(rpt2)nop
-mov.f32f32 r1.z, r1.z
+(ss)mul.f r3.x, r3.x, r3.z
(rpt5)nop
-exp2 r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-(rpt2)nop
-sel.b32 r1.z, r1.z, r4.w, c20.x
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
+exp2 r3.x, r3.x
+(ss)sel.b32 r1.x, r3.x, r1.x, c20.x
(rpt2)nop
-mad.f32 r1.y, c19.z, r1.z, r1.y
-mad.f32 r1.x, c19.y, r1.z, r1.x
-mad.f32 r1.z, c19.x, r1.z, r4.z
-nop
-max.f r1.y, r1.y, c20.x
+(ss)mov.f32f32 r3.x, r1.x
+mad.f32 r1.x, c19.x, r1.x, r4.y
+(rpt1)nop
+mov.f32f32 r3.x, r3.x
max.f r1.x, r1.x, c20.x
-max.f r3.x, r1.z, c20.x
+(rpt1)nop
+mad.f32 r1.z, c19.z, r3.x, r1.z
+mad.f32 r1.y, c19.y, r3.x, r1.y
+min.f r1.x, r1.x, c20.y
nop
-min.f r1.z, r1.y, c20.y
-min.f r1.y, r1.x, c20.y
-min.f r1.x, r3.x, c20.y
+max.f r1.z, r1.z, c20.x
+max.f r1.y, r1.y, c20.x
+(rpt1)nop
+min.f r1.z, r1.z, c20.y
+min.f r1.y, r1.y, c20.y
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0)
-; VERT: 144 instructions, 0 half, 7 full
+; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0)
+; VERT: 124 instructions, 0 half, 7 full
diff --git a/reference/stk-mines/stk-mines-27.asm b/reference/stk-mines/stk-mines-27.asm
index 1435d60..141701a 100644
--- a/reference/stk-mines/stk-mines-27.asm
+++ b/reference/stk-mines/stk-mines-27.asm
@@ -6,275 +6,191 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000
+@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd
+@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
absneg.f r0.w, (neg)c6.x
-mov.f32f32 r1.x, c3.x
-bary.f r1.y, 1, r0.x
-add.f r1.z, r0.z, c3.x
-add.f r1.w, r0.z, c4.x
-add.f r2.x, c7.x, r0.w
-add.f r0.z, r0.z, r1.x
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, c9.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r1.x
-add.f r1.x, r1.y, c3.y
-mov.f32f32 r2.w, r1.z
-add.f r1.z, r1.y, c4.y
-rcp r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.x, r1.x
-bary.f r3.y, 6, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-add.f r0.w, r3.y, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r2.z, r1.x
-mul.f r0.z, r0.w, r1.w
-mov.f32f32 r3.x, r1.z
-add.f r0.w, r1.y, r2.x
-cmps.f.lt r1.x, c6.x, r3.y
-cmps.f.lt r1.y, c7.x, r3.y
-bary.f r1.z, 11, r0.x
-sam (f32)(xyz)r1.w, r2.y, s#0, t#0
-(sy)mad.f32 r2.x, c8.x, r2.x, c8.y
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam (f32)(xyz)r2.z, r2.w, s#1, t#1
-(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y
-mad.f32 r2.z, c8.x, r2.z, c8.y
-mov.f32f32 r2.x, r2.x
-mul.f r3.y, r0.z, c5.w
-mul.f r3.w, r0.z, c5.z
-mul.f r4.x, r0.z, c5.y
-mul.f r2.x, c8.z, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, c8.x, r1.w, c8.y
-mul.f r2.w, c8.z, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, r2.z, r2.z
-mov.f32f32 r4.x, r4.x
-mul.f r4.z, r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.z, r2.x, r2.x, r4.z
-mul.f r4.w, r0.z, c5.x
-add.f r0.z, c10.x, (neg)r0.z
-mad.f32 r3.w, r2.w, r2.w, r3.w
-mov.f32f32 r4.z, r4.z
+bary.f r1.x, 6, r0.x
+mov.f32f32 r1.y, c3.x
+add.f r1.z, r0.z, c4.x
+bary.f r2.x, 1, r0.x
+add.f r2.y, r0.z, c3.x
+add.f r2.w, c7.x, r0.w
+cmps.f.lt r3.x, c6.x, r1.x
+add.f r1.w, r2.x, c4.y
+add.f r2.z, r2.x, c3.y
+cmps.f.lt r3.y, c7.x, r1.x
+add.f r3.z, r0.z, r1.y
+cov.u32f32 r0.z, r3.x
+rcp r1.y, r2.w
+add.f r0.w, r1.x, r0.w
+cov.u32f32 r1.x, r3.y
+sam (f32)(xyz)r3.w, r1.z, s#1, t#1
+(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y
+sam (f32)(xyz)r2.y, r2.y, s#0, t#0
+(sy)mad.f32 r1.w, c8.x, r2.z, c8.y
+(ss)mul.f r0.w, r0.w, r1.y
+mad.f32 r1.y, c8.x, r3.w, c8.y
+mul.f r1.z, c8.z, r1.z
+mul.f r1.w, c8.z, r1.w
+(ss)mov.f32f32 r2.z, r0.w
+mov.f32f32 r3.x, r1.y
+mov.f32f32 r3.y, r1.z
+mov.f32f32 r4.x, r1.w
mad.f32 r2.y, c8.x, r2.y, c8.y
-mov.f32f32 r4.w, r4.w
-mad.f32 r3.x, c8.x, r3.x, c8.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r3.w
mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r4.z, r2.y, r2.y, r4.z
-cov.u32f32 r1.x, r1.x
-cov.u32f32 r1.y, r1.y
-(rpt3)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r3.y, r3.y
+mul.f r4.z, r2.z, c5.w
+mul.f r4.w, r2.z, c5.z
+mul.f r1.y, r1.y, r3.x
+mov.f32f32 r5.x, r2.y
+mad.f32 r1.y, r1.z, r3.y, r1.y
+mad.f32 r1.z, c8.x, r4.y, c8.y
+mul.f r4.y, r2.z, c5.y
+mul.f r2.z, r2.z, c5.x
+mul.f r2.y, r2.y, r5.x
+mov.f32f32 r3.w, r1.z
+mad.f32 r1.w, r1.w, r4.x, r2.y
+mad.f32 r2.y, c8.x, r2.w, c8.y
+add.f r0.w, c10.x, (neg)r0.w
+mov.f32f32 r2.w, r3.w
+cmps.f.ne r0.z, r0.z, c9.x
cmps.f.ne r1.x, r1.x, c9.x
-mul.f r1.w, r1.w, r4.z
-mul.f r2.x, r2.x, r4.z
-mul.f r2.y, r2.y, r4.z
-mad.f32 r4.z, r3.x, r3.x, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r0.w
-cmps.f.ne r0.w, r1.y, c9.x
-mov.f32f32 r1.y, r1.z
-rsq r1.z, r4.z
-(ss)mov.f32f32 r1.z, r1.z
-(ss)bary.f r4.z, 12, r0.x
-bary.f r5.x, 13, r0.x
+mov.f32f32 r3.w, c9.x
+mad.f32 r1.y, r2.w, r2.w, r1.y
+mov.f32f32 r2.w, r2.y
mov.f32f32 r5.y, c9.x
-mad.f32 r1.w, r2.z, r1.z, r1.w
-mad.f32 r2.x, r2.w, r1.z, r2.x
-mad.f32 r1.z, r3.x, r1.z, r2.y
-sam (f32)(xyzw)r2.y, r3.z, s#2, t#2
-(ss)mov.f32f32 r3.z, r4.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r5.x
-mul.f r1.w, r1.w, c8.w
-mul.f r2.x, r2.x, c8.w
-mul.f r1.z, r1.z, c8.w
-nop
-mov.f32f32 r1.w, r1.w
-bary.f r4.z, 8, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.x, c9.x
-mul.f r5.z, r1.w, r4.z
-bary.f r5.w, 9, r0.x
-mul.f r6.x, r4.z, r1.w
-mov.f32f32 r6.y, c5.w
-mov.f32f32 r6.z, c5.z
-mad.f32 r5.z, r2.x, r5.w, r5.z
-mad.f32 r6.x, r5.w, r2.x, r6.x
-mov.f32f32 r6.w, c5.y
+mov.f32f32 r5.z, c9.x
+mov.f32f32 r5.w, c5.w
+mov.f32f32 r6.x, c5.z
+mov.f32f32 r6.y, c5.y
+rsq r1.y, r1.y
+(ss)mov.f32f32 r6.z, r1.y
+mad.f32 r1.w, r2.w, r2.w, r1.w
+add.f r3.w, r2.x, r3.w
+bary.f r2.x, 11, r0.x
+bary.f r2.w, 12, r0.x
+bary.f r6.w, 13, r0.x
+bary.f r7.x, 8, r0.x
+bary.f r7.y, 9, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+(ss)mul.f r1.w, r2.y, r1.w
+sam (f32)(xyzw)r7.w, r3.z, s#2, t#2
bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r5.z
-mov.f32f32 r5.z, r6.x
-mov.f32f32 r6.x, c5.x
-mad.f32 r0.y, r1.z, r0.x, r0.y
-mad.f32 r5.z, r0.x, r1.z, r5.z
-(rpt1)nop
-mul.f r1.w, r0.y, r1.w
-max.f r5.z, r5.z, c9.x
-mul.f r2.x, r0.y, r2.x
-mul.f r0.y, r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r5.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, c8.x, r1.z
-mad.f32 r1.w, c9.y, r1.w, c9.z
-mul.f r2.x, c8.x, r2.x
-mul.f r0.y, c8.x, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-add.f r1.z, r4.z, (neg)r1.z
-(sy)mul.f r3.x, r3.x, r1.w
-mul.f r2.w, r2.w, r1.w
-mul.f r2.z, r2.z, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r7.x, r2.z
-mul.f r7.y, r1.z, r1.z
-add.f r2.x, r5.w, (neg)r2.x
-add.f r4.z, r4.z, r5.x
-add.f r0.x, r0.x, (neg)r0.y
-mul.f r0.y, r2.y, r1.w
-mov.f32f32 r1.w, r2.x
-add.f r2.x, r4.z, r5.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r4.z, r1.w, r1.w, r7.y
+mov.f32f32 r0.y, c5.x
+mul.f r2.y, r5.x, r7.z
+(ss)mul.f r3.z, r4.x, r7.z
+mad.f32 r2.y, r3.x, r6.z, r2.y
+mad.f32 r3.x, r3.y, r6.z, r3.z
+mad.f32 r1.y, r1.z, r1.y, r1.w
+nop
+mul.f r1.z, r2.y, c8.w
+mul.f r1.w, r3.x, c8.w
+mul.f r1.y, r1.y, c8.w
+nop
+mov.f32f32 r2.y, r1.z
+mul.f r1.z, r7.x, r1.z
+mov.f32f32 r3.x, r1.w
+mov.f32f32 r3.y, r1.y
+mul.f r3.z, r2.y, r7.x
+mad.f32 r1.z, r7.y, r1.w, r1.z
+mad.f32 r1.w, r3.x, r7.y, r3.z
+mad.f32 r1.y, r0.x, r1.y, r1.z
+mad.f32 r1.z, r3.y, r0.x, r1.w
(rpt2)nop
-mov.f32f32 r4.z, r4.z
-nop
-mad.f32 r4.z, r0.x, r0.x, r4.z
-(rpt5)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
+mul.f r1.w, r1.z, r2.y
+max.f r1.y, r1.y, c9.x
+mul.f r2.y, r1.z, r3.x
+mul.f r1.z, r1.z, r3.y
+mul.f r1.w, c8.x, r1.w
+mad.f32 r1.y, c9.y, r1.y, c9.z
+mul.f r2.y, c8.x, r2.y
+mul.f r1.z, c8.x, r1.z
+add.f r1.w, r7.x, (neg)r1.w
+mov.f32f32 r3.x, r1.y
+add.f r2.y, r7.y, (neg)r2.y
+add.f r0.x, r0.x, (neg)r1.z
+mov.f32f32 r1.z, r1.w
+(sy)mul.f r3.y, r8.z, r3.x
+mov.f32f32 r3.z, r2.y
+mov.f32f32 r3.w, r0.x
+mul.f r1.w, r1.w, r1.z
+add.f r4.x, r3.y, r5.z
+mad.f32 r1.w, r2.y, r3.z, r1.w
+mul.f r2.y, r8.y, r3.x
+mad.f32 r1.w, r3.w, r3.w, r1.w
+add.f r3.w, r4.x, r5.y
+mul.f r3.x, r8.x, r3.x
+mul.f r1.y, r7.w, r1.y
(rpt2)nop
-mul.f r1.z, r1.z, r4.z
-mul.f r1.w, r1.w, r4.z
-mul.f r0.x, r0.x, r4.z
-nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r4.x, r1.w
+mul.f r0.x, r0.x, r1.w
+(rpt1)nop
+mul.f r1.z, r1.z, r4.x
+(ss)mul.f r1.w, r3.z, r4.x
+(rpt1)nop
+mul.f r1.z, r1.z, r2.x
nop
-mul.f r1.y, r1.z, r1.y
+mad.f32 r1.z, r1.w, r2.w, r1.z
nop
-mad.f32 r1.y, r1.w, r3.z, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-nop
-mad.f32 r0.x, r0.x, r3.w, r1.y
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.x, r6.w, r1.z
(rpt2)nop
max.f r0.x, r0.x, c9.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
mov.f32f32 r1.z, r0.x
-cmps.f.lt r0.x, c9.x, r0.x
-(rpt1)nop
-mul.f r1.y, r1.y, r1.z
-cov.u32f32 r0.x, r0.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.z
+cmps.f.lt r1.z, c9.x, r1.z
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-cmps.f.ne r0.x, r0.x, c9.x
+mov.f32f32 r1.w, r0.x
+cov.u32f32 r1.z, r1.z
(rpt1)nop
-mul.f r1.y, r1.y, r1.y
-sel.b32 r1.z, r2.x, r0.x, r3.x
+mul.f r0.x, r0.x, r1.w
+cmps.f.ne r1.z, r1.z, c9.x
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.z
+mov.f32f32 r1.w, r0.x
+sel.b32 r2.x, r3.w, r1.z, r3.y
+mul.f r0.x, r0.x, c9.w
+nop
+mul.f r1.w, r1.w, r1.w
+mad.f32 r2.w, r0.w, r2.x, r4.z
+add.f r3.y, r2.y, r0.x
+add.f r3.z, r3.x, r0.x
+mul.f r1.w, r1.w, c9.z
+add.f r0.x, r1.y, r0.x
+sel.b32 r2.x, r2.w, r0.z, r2.x
+nop
+mov.f32f32 r2.w, r1.w
+add.f r0.x, r0.x, r1.w
+sel.b32 r1.w, r5.w, r1.x, r2.x
+nop
+add.f r2.x, r3.y, r2.w
+add.f r2.w, r3.z, r2.w
+sel.b32 r0.x, r0.x, r1.z, r1.y
+nop
+sel.b32 r1.y, r2.x, r1.z, r2.y
+sel.b32 r1.z, r2.w, r1.z, r3.x
+mad.f32 r2.x, r0.w, r0.x, r2.z
+nop
+mad.f32 r2.y, r0.w, r1.y, r4.w
+mad.f32 r0.w, r0.w, r1.z, r4.y
+sel.b32 r0.x, r2.x, r0.z, r0.x
+nop
+sel.b32 r1.y, r2.y, r0.z, r1.y
+sel.b32 r0.z, r0.w, r0.z, r1.z
(rpt1)nop
-mul.f r2.x, r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-mul.f r1.y, r1.y, c9.w
-nop
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, r0.z, r1.w, r3.y
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r3.y, r1.y
-mul.f r2.x, r2.x, c9.z
-sel.b32 r1.z, r1.w, r1.x, r1.z
-add.f r1.w, r5.z, r3.x
-add.f r3.x, r7.x, r3.y
-mov.f32f32 r2.x, r2.x
-sel.b32 r1.z, r6.y, r0.w, r1.z
-mov.f32f32 r1.y, r1.y
-nop
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, r2.x
-add.f r1.y, r2.y, r1.y
-add.f r2.y, r1.w, r3.y
-add.f r3.x, r3.x, r3.z
-mov.f32f32 r1.w, r1.z
-nop
-sel.b32 r1.z, r2.y, r0.x, r2.w
-sel.b32 r2.y, r3.x, r0.x, r2.z
-add.f r1.y, r1.y, r2.x
-nop
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r2.z, r2.y
-sel.b32 r0.x, r1.y, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r2.x
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r2.x, r0.x
-nop
-mad.f32 r0.y, r0.z, r0.y, r4.y
-mad.f32 r1.y, r0.z, r1.y, r4.x
-mov.f32f32 r2.x, r2.x
-nop
-sel.b32 r0.y, r0.y, r1.x, r1.z
-sel.b32 r1.y, r1.y, r1.x, r2.y
-mad.f32 r0.z, r0.z, r2.x, r4.w
+sel.b32 r1.z, r6.x, r1.x, r1.y
+sel.b32 r1.y, r6.y, r1.x, r0.z
+sel.b32 r1.x, r0.y, r1.x, r0.x
+end
nop
-sel.b32 r0.y, r6.z, r0.w, r0.y
-sel.b32 r1.y, r6.w, r0.w, r1.y
-sel.b32 r0.x, r0.z, r1.x, r0.x
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r6.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
-end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
-; FRAG: 297 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
+; FRAG: 193 instructions, 0 half, 9 full
diff --git a/reference/stk-mines/stk-mines-28.asm b/reference/stk-mines/stk-mines-28.asm
index 2d84780..726bef5 100644
--- a/reference/stk-mines/stk-mines-28.asm
+++ b/reference/stk-mines/stk-mines-28.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r4.z) in0
+@in(r4.w) in1
+@in(r5.x) in2
+@in(r5.y) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r5.x) in8
-@in(r5.y) in9
-@in(r5.z) in10
-@in(r5.w) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,191 +31,144 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.w, c1.x
-mul.f r2.x, c8.x, r1.x
+mul.f r2.x, c8.x, r4.z
mov.f32f32 r2.y, c1.y
mov.f32f32 r2.z, c1.z
mul.f r2.w, r0.w, r0.x
mov.f32f32 r3.x, c2.x
-mad.f32 r2.x, c9.x, r1.y, r2.x
+mad.f32 r2.x, c9.x, r4.w, r2.x
mul.f r3.y, r0.w, r0.w
-mad.f32 r2.x, c10.x, r1.z, r2.x
+mad.f32 r2.x, c10.x, r5.x, r2.x
mad.f32 r2.w, r3.x, r0.y, r2.w
-mad.f32 r2.x, c11.x, r1.w, r2.x
-mad.f32 r3.x, r2.y, r2.y, r3.y
-mul.f r3.y, r2.y, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, c3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
+mov.f32f32 r3.x, c3.x
+mad.f32 r2.x, c11.x, r5.y, r2.x
+mad.f32 r3.y, r2.y, r2.y, r3.y
+mul.f r3.z, r2.y, r0.x
+mad.f32 r2.w, r3.x, r0.z, r2.w
+mov.f32f32 r3.x, r2.x
+mad.f32 r3.y, r2.z, r2.z, r3.y
mov.f32f32 r3.w, c2.y
-mad.f32 r2.w, r3.z, r0.z, r2.w
-mul.f r3.z, r2.x, r2.x
-mul.f r4.x, c8.y, r1.x
-mad.f32 r3.x, r2.z, r2.z, r3.x
-mov.f32f32 r4.y, r2.w
-mad.f32 r2.w, c9.y, r1.y, r4.x
-mad.f32 r3.y, r3.w, r0.y, r3.y
-mul.f r0.x, r2.z, r0.x
-mul.f r3.w, r4.y, r4.y
-mad.f32 r2.w, c10.y, r1.z, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, c3.y
-mad.f32 r2.w, c11.y, r1.w, r2.w
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.z, c2.z
-mad.f32 r3.y, r4.x, r0.z, r3.y
mov.f32f32 r4.x, r2.w
-mul.f r0.w, r0.w, r3.x
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.w, r4.x, r4.x, r3.z
+mul.f r3.x, r3.x, r3.x
+mul.f r4.y, c8.y, r4.z
+mad.f32 r3.z, r3.w, r0.y, r3.z
+mul.f r2.w, r2.w, r4.x
+mov.f32f32 r3.w, c3.y
+mad.f32 r4.y, c9.y, r4.w, r4.y
+rsq r3.y, r3.y
+(ss)mov.f32f32 r5.z, r3.y
+mad.f32 r4.y, c10.y, r5.x, r4.y
+mad.f32 r3.z, r3.w, r0.z, r3.z
+mad.f32 r3.w, c11.y, r5.y, r4.y
+mul.f r0.w, r0.w, r5.z
+mul.f r2.y, r2.y, r5.z
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r5.z, r3.w
+mul.f r5.w, c0.x, r0.w
mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r3.y, r3.y, r3.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, c8.z, r1.x
-mul.f r4.w, c0.x, r0.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r0.x, r4.z, r0.y, r0.x
-mad.f32 r0.y, c9.z, r1.y, r3.w
-mad.f32 r3.w, c0.y, r2.y, r4.w
-mad.f32 r0.y, c10.z, r1.z, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, c3.z
-mad.f32 r0.y, c11.z, r1.w, r0.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.z, r2.z, r3.x
-mad.f32 r0.x, r4.z, r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, c4.w, r1.x
-mul.f r3.x, c4.z, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.w, r0.y, r0.y, r2.w
+mad.f32 r2.w, r3.z, r4.y, r2.w
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r3.z, c2.z
+mad.f32 r3.x, r3.w, r5.z, r3.x
+mul.f r3.w, c8.z, r4.z
+mad.f32 r5.w, c0.y, r2.y, r5.w
+mad.f32 r0.x, r3.z, r0.y, r0.x
+mov.f32f32 r0.y, c3.z
+mad.f32 r3.z, c9.z, r4.w, r3.w
+mul.f r2.z, r2.z, r3.y
+(ss)mad.f32 r3.y, c10.z, r5.x, r3.z
+mad.f32 r0.x, r0.y, r0.z, r0.x
+mad.f32 r0.y, c11.z, r5.y, r3.y
+mad.f32 r3.y, c0.z, r2.z, r5.w
+mov.f32f32 r0.z, r2.y
+mov.f32f32 r2.y, r0.x
+mov.f32f32 r3.z, r0.y
+mov.f32f32 r3.w, r3.y
mov.f32f32 r2.z, r2.z
-mad.f32 r0.z, c5.w, r1.y, r0.z
-mad.f32 r3.z, r0.x, r0.x, r3.z
-mad.f32 r0.z, c6.w, r1.z, r0.z
-mad.f32 r3.x, c5.z, r1.y, r3.x
-mul.f r4.z, c4.y, r1.x
-mul.f r1.x, c4.x, r1.x
-mad.f32 r0.z, c7.w, r1.w, r0.z
-mad.f32 r3.x, c6.z, r1.z, r3.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-rsq r2.w, r2.w
-(ss)mov.f32f32 r4.w, r2.w
-mad.f32 r3.w, c0.z, r2.z, r3.w
-(ss)mov.f32f32 r2.w, r0.z
-mul.f r0.x, r0.x, r3.z
-mul.f r2.x, r2.x, r4.w
-mul.f r4.y, r4.y, r3.z
-mul.f r3.y, r3.y, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r3.y, r3.y
-mul.f r4.y, r0.x, r2.y
-absneg.f r2.x, (neg)r2.x
-mul.f r6.x, c0.x, r3.z
-mad.f32 r4.y, r3.y, r2.z, (neg)r4.y
-mad.f32 r6.x, c0.y, r3.y, r6.x
-mov.f32f32 r6.y, r2.x
-mul.f r2.x, r3.z, r2.z
-mov.f32f32 r2.z, r4.y
-mov.f32f32 r4.y, r6.x
-mul.f r6.x, r6.y, r6.y
-mul.f r4.x, r4.x, r4.w
-mul.f r2.z, c0.x, r2.z
-mad.f32 r2.x, r0.x, r0.w, (neg)r2.x
-mad.f32 r0.x, c0.z, r0.x, r4.y
+mad.f32 r2.y, r2.y, r2.y, r2.w
+mad.f32 r0.y, r0.y, r3.z, r3.x
+mul.f r3.x, r3.w, r3.w
+mul.f r2.w, c4.w, r4.z
+mul.f r3.w, c4.z, r4.z
+mul.f r5.w, c4.y, r4.z
+mul.f r4.z, c4.x, r4.z
+rsq r2.y, r2.y
+(ss)mov.f32f32 r6.x, r2.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r2.x, r0.y
+mul.f r0.x, r0.x, r2.y
+mul.f r2.x, r4.y, r6.x
+mul.f r2.y, r5.z, r6.y
+mul.f r4.x, r4.x, r6.x
+absneg.f r4.y, (neg)r0.y
+mov.f32f32 r0.y, r2.x
+mov.f32f32 r5.z, r0.x
+absneg.f r2.y, (neg)r2.y
+mul.f r6.x, c0.x, r4.x
mov.f32f32 r4.x, r4.x
-mul.f r0.w, r3.y, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-absneg.f r3.y, (neg)r4.x
-mad.f32 r0.w, r3.z, r2.y, (neg)r0.w
-mad.f32 r2.x, c0.y, r2.x, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.x, r3.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.w
-mul.f r0.y, r0.y, r4.w
-mad.f32 r0.w, c0.z, r0.w, r2.x
-mad.f32 r2.x, r4.x, r4.x, r6.x
-mul.f r2.y, r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c7.z, r1.w, r3.x
-mad.f32 r3.z, c5.y, r1.y, r4.z
-mov.f32f32 r3.w, r0.w
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r2.z, r3.x
-mad.f32 r0.w, c6.y, r1.z, r3.z
-mad.f32 r2.y, r3.w, r3.w, r2.y
-mov.f32f32 r3.z, r0.y
-mad.f32 r0.y, c7.y, r1.w, r0.w
-mad.f32 r0.w, c5.x, r1.y, r1.x
-mov.f32f32 r1.x, r2.y
-mad.f32 r1.y, r3.z, r3.z, r2.x
-mad.f32 r1.x, r0.x, r0.x, r1.x
+mul.f r6.z, r5.z, r0.z
+mov.f32f32 r6.w, r2.y
+mad.f32 r6.z, r0.y, r2.z, (neg)r6.z
+mov.f32f32 r7.x, r4.y
+mad.f32 r2.x, c0.y, r2.x, r6.x
+mul.f r2.z, r4.x, r2.z
+mul.f r6.x, c0.x, r6.z
+mad.f32 r2.z, r5.z, r0.w, (neg)r2.z
+mul.f r5.z, r7.x, r7.x
+mad.f32 r0.x, c0.z, r0.x, r2.x
+mad.f32 r2.x, r2.y, r6.w, r5.z
+mad.f32 r2.y, c0.y, r2.z, r6.x
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r3.z, r6.y
+mad.f32 r0.y, r4.x, r0.z, (neg)r0.y
+mov.f32f32 r0.z, r0.x
+mad.f32 r2.z, c5.w, r4.w, r2.w
+mad.f32 r2.w, c5.z, r4.w, r3.w
+mad.f32 r0.y, c0.z, r0.y, r2.y
+absneg.f r2.y, (neg)r0.w
+mov.f32f32 r3.z, r0.z
+mad.f32 r0.z, c6.w, r5.x, r2.z
+mov.f32f32 r2.z, r0.y
+mov.f32f32 r4.x, r2.y
+mad.f32 r0.w, c7.w, r5.y, r0.z
+mad.f32 r0.z, c6.z, r5.x, r2.w
+mov.f32f32 r5.z, r2.z
+mad.f32 r2.x, r2.y, r4.x, r2.x
+mov.f32f32 r2.w, r0.w
+mad.f32 r0.z, c7.z, r5.y, r0.z
+mad.f32 r0.y, r0.y, r5.z, r3.x
+mad.f32 r2.y, c5.y, r4.w, r5.w
+mad.f32 r0.x, r0.x, r3.z, r0.y
+mov.f32f32 r2.z, r0.z
+mad.f32 r0.y, c6.y, r5.x, r2.y
+mad.f32 r2.y, c5.x, r4.w, r4.z
+mad.f32 r0.y, c7.y, r5.y, r0.y
+mad.f32 r4.z, c6.x, r5.x, r2.y
+nop
+rsq r0.x, r0.x
+(ss)mov.f32f32 r2.y, r0.x
+mul.f r3.x, r3.y, r0.x
+(ss)rsq r0.x, r2.x
+(ss)mov.f32f32 r2.x, r0.x
+mul.f r3.w, r4.y, r0.x
+mul.f r3.z, r3.z, r2.y
+mul.f r3.y, r5.z, r2.y
+mul.f r4.y, r4.x, r2.x
+mul.f r4.x, r6.w, r2.x
mov.f32f32 r2.y, r0.y
-mad.f32 r1.z, c6.x, r1.z, r0.w
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c7.x, r1.w, r1.z
-rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, r0.x, r1.x
-mul.f r3.x, r3.w, r1.x
-mul.f r1.x, r3.y, r1.x
-mul.f r3.y, r3.z, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r3.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
+mad.f32 r0.x, c7.x, r5.y, r4.z
+mov.f32f32 r4.z, r4.y
+mov.f32f32 r4.w, (0.000000)
nop
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r1.x
-mov.f32f32 r0.x, r4.y
-mul.f r1.x, r4.x, r1.y
-mul.f r1.y, r6.y, r1.y
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r1.z
-mov.f32f32 r4.w, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r1.w, r5.w
-mov.f32f32 r1.z, r5.z
-mov.f32f32 r4.x, r1.x
-mov.f32f32 r1.y, r5.y
-mov.f32f32 r1.x, r5.x
+mov.f32f32 r2.x, r0.x
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0)
-; VERT: 183 instructions, 0 half, 7 full
+; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0)
+; VERT: 133 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-29.asm b/reference/stk-mines/stk-mines-29.asm
index 1435d60..141701a 100644
--- a/reference/stk-mines/stk-mines-29.asm
+++ b/reference/stk-mines/stk-mines-29.asm
@@ -6,275 +6,191 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000
+@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd
+@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
absneg.f r0.w, (neg)c6.x
-mov.f32f32 r1.x, c3.x
-bary.f r1.y, 1, r0.x
-add.f r1.z, r0.z, c3.x
-add.f r1.w, r0.z, c4.x
-add.f r2.x, c7.x, r0.w
-add.f r0.z, r0.z, r1.x
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, c9.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r1.x
-add.f r1.x, r1.y, c3.y
-mov.f32f32 r2.w, r1.z
-add.f r1.z, r1.y, c4.y
-rcp r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.x, r1.x
-bary.f r3.y, 6, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-add.f r0.w, r3.y, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r2.z, r1.x
-mul.f r0.z, r0.w, r1.w
-mov.f32f32 r3.x, r1.z
-add.f r0.w, r1.y, r2.x
-cmps.f.lt r1.x, c6.x, r3.y
-cmps.f.lt r1.y, c7.x, r3.y
-bary.f r1.z, 11, r0.x
-sam (f32)(xyz)r1.w, r2.y, s#0, t#0
-(sy)mad.f32 r2.x, c8.x, r2.x, c8.y
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam (f32)(xyz)r2.z, r2.w, s#1, t#1
-(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y
-mad.f32 r2.z, c8.x, r2.z, c8.y
-mov.f32f32 r2.x, r2.x
-mul.f r3.y, r0.z, c5.w
-mul.f r3.w, r0.z, c5.z
-mul.f r4.x, r0.z, c5.y
-mul.f r2.x, c8.z, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, c8.x, r1.w, c8.y
-mul.f r2.w, c8.z, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, r2.z, r2.z
-mov.f32f32 r4.x, r4.x
-mul.f r4.z, r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.z, r2.x, r2.x, r4.z
-mul.f r4.w, r0.z, c5.x
-add.f r0.z, c10.x, (neg)r0.z
-mad.f32 r3.w, r2.w, r2.w, r3.w
-mov.f32f32 r4.z, r4.z
+bary.f r1.x, 6, r0.x
+mov.f32f32 r1.y, c3.x
+add.f r1.z, r0.z, c4.x
+bary.f r2.x, 1, r0.x
+add.f r2.y, r0.z, c3.x
+add.f r2.w, c7.x, r0.w
+cmps.f.lt r3.x, c6.x, r1.x
+add.f r1.w, r2.x, c4.y
+add.f r2.z, r2.x, c3.y
+cmps.f.lt r3.y, c7.x, r1.x
+add.f r3.z, r0.z, r1.y
+cov.u32f32 r0.z, r3.x
+rcp r1.y, r2.w
+add.f r0.w, r1.x, r0.w
+cov.u32f32 r1.x, r3.y
+sam (f32)(xyz)r3.w, r1.z, s#1, t#1
+(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y
+sam (f32)(xyz)r2.y, r2.y, s#0, t#0
+(sy)mad.f32 r1.w, c8.x, r2.z, c8.y
+(ss)mul.f r0.w, r0.w, r1.y
+mad.f32 r1.y, c8.x, r3.w, c8.y
+mul.f r1.z, c8.z, r1.z
+mul.f r1.w, c8.z, r1.w
+(ss)mov.f32f32 r2.z, r0.w
+mov.f32f32 r3.x, r1.y
+mov.f32f32 r3.y, r1.z
+mov.f32f32 r4.x, r1.w
mad.f32 r2.y, c8.x, r2.y, c8.y
-mov.f32f32 r4.w, r4.w
-mad.f32 r3.x, c8.x, r3.x, c8.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r3.w
mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r4.z, r2.y, r2.y, r4.z
-cov.u32f32 r1.x, r1.x
-cov.u32f32 r1.y, r1.y
-(rpt3)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r3.y, r3.y
+mul.f r4.z, r2.z, c5.w
+mul.f r4.w, r2.z, c5.z
+mul.f r1.y, r1.y, r3.x
+mov.f32f32 r5.x, r2.y
+mad.f32 r1.y, r1.z, r3.y, r1.y
+mad.f32 r1.z, c8.x, r4.y, c8.y
+mul.f r4.y, r2.z, c5.y
+mul.f r2.z, r2.z, c5.x
+mul.f r2.y, r2.y, r5.x
+mov.f32f32 r3.w, r1.z
+mad.f32 r1.w, r1.w, r4.x, r2.y
+mad.f32 r2.y, c8.x, r2.w, c8.y
+add.f r0.w, c10.x, (neg)r0.w
+mov.f32f32 r2.w, r3.w
+cmps.f.ne r0.z, r0.z, c9.x
cmps.f.ne r1.x, r1.x, c9.x
-mul.f r1.w, r1.w, r4.z
-mul.f r2.x, r2.x, r4.z
-mul.f r2.y, r2.y, r4.z
-mad.f32 r4.z, r3.x, r3.x, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r0.w
-cmps.f.ne r0.w, r1.y, c9.x
-mov.f32f32 r1.y, r1.z
-rsq r1.z, r4.z
-(ss)mov.f32f32 r1.z, r1.z
-(ss)bary.f r4.z, 12, r0.x
-bary.f r5.x, 13, r0.x
+mov.f32f32 r3.w, c9.x
+mad.f32 r1.y, r2.w, r2.w, r1.y
+mov.f32f32 r2.w, r2.y
mov.f32f32 r5.y, c9.x
-mad.f32 r1.w, r2.z, r1.z, r1.w
-mad.f32 r2.x, r2.w, r1.z, r2.x
-mad.f32 r1.z, r3.x, r1.z, r2.y
-sam (f32)(xyzw)r2.y, r3.z, s#2, t#2
-(ss)mov.f32f32 r3.z, r4.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r5.x
-mul.f r1.w, r1.w, c8.w
-mul.f r2.x, r2.x, c8.w
-mul.f r1.z, r1.z, c8.w
-nop
-mov.f32f32 r1.w, r1.w
-bary.f r4.z, 8, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.x, c9.x
-mul.f r5.z, r1.w, r4.z
-bary.f r5.w, 9, r0.x
-mul.f r6.x, r4.z, r1.w
-mov.f32f32 r6.y, c5.w
-mov.f32f32 r6.z, c5.z
-mad.f32 r5.z, r2.x, r5.w, r5.z
-mad.f32 r6.x, r5.w, r2.x, r6.x
-mov.f32f32 r6.w, c5.y
+mov.f32f32 r5.z, c9.x
+mov.f32f32 r5.w, c5.w
+mov.f32f32 r6.x, c5.z
+mov.f32f32 r6.y, c5.y
+rsq r1.y, r1.y
+(ss)mov.f32f32 r6.z, r1.y
+mad.f32 r1.w, r2.w, r2.w, r1.w
+add.f r3.w, r2.x, r3.w
+bary.f r2.x, 11, r0.x
+bary.f r2.w, 12, r0.x
+bary.f r6.w, 13, r0.x
+bary.f r7.x, 8, r0.x
+bary.f r7.y, 9, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+(ss)mul.f r1.w, r2.y, r1.w
+sam (f32)(xyzw)r7.w, r3.z, s#2, t#2
bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r5.z
-mov.f32f32 r5.z, r6.x
-mov.f32f32 r6.x, c5.x
-mad.f32 r0.y, r1.z, r0.x, r0.y
-mad.f32 r5.z, r0.x, r1.z, r5.z
-(rpt1)nop
-mul.f r1.w, r0.y, r1.w
-max.f r5.z, r5.z, c9.x
-mul.f r2.x, r0.y, r2.x
-mul.f r0.y, r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r5.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, c8.x, r1.z
-mad.f32 r1.w, c9.y, r1.w, c9.z
-mul.f r2.x, c8.x, r2.x
-mul.f r0.y, c8.x, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-add.f r1.z, r4.z, (neg)r1.z
-(sy)mul.f r3.x, r3.x, r1.w
-mul.f r2.w, r2.w, r1.w
-mul.f r2.z, r2.z, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r7.x, r2.z
-mul.f r7.y, r1.z, r1.z
-add.f r2.x, r5.w, (neg)r2.x
-add.f r4.z, r4.z, r5.x
-add.f r0.x, r0.x, (neg)r0.y
-mul.f r0.y, r2.y, r1.w
-mov.f32f32 r1.w, r2.x
-add.f r2.x, r4.z, r5.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r4.z, r1.w, r1.w, r7.y
+mov.f32f32 r0.y, c5.x
+mul.f r2.y, r5.x, r7.z
+(ss)mul.f r3.z, r4.x, r7.z
+mad.f32 r2.y, r3.x, r6.z, r2.y
+mad.f32 r3.x, r3.y, r6.z, r3.z
+mad.f32 r1.y, r1.z, r1.y, r1.w
+nop
+mul.f r1.z, r2.y, c8.w
+mul.f r1.w, r3.x, c8.w
+mul.f r1.y, r1.y, c8.w
+nop
+mov.f32f32 r2.y, r1.z
+mul.f r1.z, r7.x, r1.z
+mov.f32f32 r3.x, r1.w
+mov.f32f32 r3.y, r1.y
+mul.f r3.z, r2.y, r7.x
+mad.f32 r1.z, r7.y, r1.w, r1.z
+mad.f32 r1.w, r3.x, r7.y, r3.z
+mad.f32 r1.y, r0.x, r1.y, r1.z
+mad.f32 r1.z, r3.y, r0.x, r1.w
(rpt2)nop
-mov.f32f32 r4.z, r4.z
-nop
-mad.f32 r4.z, r0.x, r0.x, r4.z
-(rpt5)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
+mul.f r1.w, r1.z, r2.y
+max.f r1.y, r1.y, c9.x
+mul.f r2.y, r1.z, r3.x
+mul.f r1.z, r1.z, r3.y
+mul.f r1.w, c8.x, r1.w
+mad.f32 r1.y, c9.y, r1.y, c9.z
+mul.f r2.y, c8.x, r2.y
+mul.f r1.z, c8.x, r1.z
+add.f r1.w, r7.x, (neg)r1.w
+mov.f32f32 r3.x, r1.y
+add.f r2.y, r7.y, (neg)r2.y
+add.f r0.x, r0.x, (neg)r1.z
+mov.f32f32 r1.z, r1.w
+(sy)mul.f r3.y, r8.z, r3.x
+mov.f32f32 r3.z, r2.y
+mov.f32f32 r3.w, r0.x
+mul.f r1.w, r1.w, r1.z
+add.f r4.x, r3.y, r5.z
+mad.f32 r1.w, r2.y, r3.z, r1.w
+mul.f r2.y, r8.y, r3.x
+mad.f32 r1.w, r3.w, r3.w, r1.w
+add.f r3.w, r4.x, r5.y
+mul.f r3.x, r8.x, r3.x
+mul.f r1.y, r7.w, r1.y
(rpt2)nop
-mul.f r1.z, r1.z, r4.z
-mul.f r1.w, r1.w, r4.z
-mul.f r0.x, r0.x, r4.z
-nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r4.x, r1.w
+mul.f r0.x, r0.x, r1.w
+(rpt1)nop
+mul.f r1.z, r1.z, r4.x
+(ss)mul.f r1.w, r3.z, r4.x
+(rpt1)nop
+mul.f r1.z, r1.z, r2.x
nop
-mul.f r1.y, r1.z, r1.y
+mad.f32 r1.z, r1.w, r2.w, r1.z
nop
-mad.f32 r1.y, r1.w, r3.z, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-nop
-mad.f32 r0.x, r0.x, r3.w, r1.y
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.x, r6.w, r1.z
(rpt2)nop
max.f r0.x, r0.x, c9.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
mov.f32f32 r1.z, r0.x
-cmps.f.lt r0.x, c9.x, r0.x
-(rpt1)nop
-mul.f r1.y, r1.y, r1.z
-cov.u32f32 r0.x, r0.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.z
+cmps.f.lt r1.z, c9.x, r1.z
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-cmps.f.ne r0.x, r0.x, c9.x
+mov.f32f32 r1.w, r0.x
+cov.u32f32 r1.z, r1.z
(rpt1)nop
-mul.f r1.y, r1.y, r1.y
-sel.b32 r1.z, r2.x, r0.x, r3.x
+mul.f r0.x, r0.x, r1.w
+cmps.f.ne r1.z, r1.z, c9.x
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.z
+mov.f32f32 r1.w, r0.x
+sel.b32 r2.x, r3.w, r1.z, r3.y
+mul.f r0.x, r0.x, c9.w
+nop
+mul.f r1.w, r1.w, r1.w
+mad.f32 r2.w, r0.w, r2.x, r4.z
+add.f r3.y, r2.y, r0.x
+add.f r3.z, r3.x, r0.x
+mul.f r1.w, r1.w, c9.z
+add.f r0.x, r1.y, r0.x
+sel.b32 r2.x, r2.w, r0.z, r2.x
+nop
+mov.f32f32 r2.w, r1.w
+add.f r0.x, r0.x, r1.w
+sel.b32 r1.w, r5.w, r1.x, r2.x
+nop
+add.f r2.x, r3.y, r2.w
+add.f r2.w, r3.z, r2.w
+sel.b32 r0.x, r0.x, r1.z, r1.y
+nop
+sel.b32 r1.y, r2.x, r1.z, r2.y
+sel.b32 r1.z, r2.w, r1.z, r3.x
+mad.f32 r2.x, r0.w, r0.x, r2.z
+nop
+mad.f32 r2.y, r0.w, r1.y, r4.w
+mad.f32 r0.w, r0.w, r1.z, r4.y
+sel.b32 r0.x, r2.x, r0.z, r0.x
+nop
+sel.b32 r1.y, r2.y, r0.z, r1.y
+sel.b32 r0.z, r0.w, r0.z, r1.z
(rpt1)nop
-mul.f r2.x, r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-mul.f r1.y, r1.y, c9.w
-nop
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, r0.z, r1.w, r3.y
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r3.y, r1.y
-mul.f r2.x, r2.x, c9.z
-sel.b32 r1.z, r1.w, r1.x, r1.z
-add.f r1.w, r5.z, r3.x
-add.f r3.x, r7.x, r3.y
-mov.f32f32 r2.x, r2.x
-sel.b32 r1.z, r6.y, r0.w, r1.z
-mov.f32f32 r1.y, r1.y
-nop
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, r2.x
-add.f r1.y, r2.y, r1.y
-add.f r2.y, r1.w, r3.y
-add.f r3.x, r3.x, r3.z
-mov.f32f32 r1.w, r1.z
-nop
-sel.b32 r1.z, r2.y, r0.x, r2.w
-sel.b32 r2.y, r3.x, r0.x, r2.z
-add.f r1.y, r1.y, r2.x
-nop
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r2.z, r2.y
-sel.b32 r0.x, r1.y, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r2.x
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r2.x, r0.x
-nop
-mad.f32 r0.y, r0.z, r0.y, r4.y
-mad.f32 r1.y, r0.z, r1.y, r4.x
-mov.f32f32 r2.x, r2.x
-nop
-sel.b32 r0.y, r0.y, r1.x, r1.z
-sel.b32 r1.y, r1.y, r1.x, r2.y
-mad.f32 r0.z, r0.z, r2.x, r4.w
+sel.b32 r1.z, r6.x, r1.x, r1.y
+sel.b32 r1.y, r6.y, r1.x, r0.z
+sel.b32 r1.x, r0.y, r1.x, r0.x
+end
nop
-sel.b32 r0.y, r6.z, r0.w, r0.y
-sel.b32 r1.y, r6.w, r0.w, r1.y
-sel.b32 r0.x, r0.z, r1.x, r0.x
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r6.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
-end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
-; FRAG: 297 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
+; FRAG: 193 instructions, 0 half, 9 full
diff --git a/reference/stk-mines/stk-mines-30.asm b/reference/stk-mines/stk-mines-30.asm
index 2d84780..726bef5 100644
--- a/reference/stk-mines/stk-mines-30.asm
+++ b/reference/stk-mines/stk-mines-30.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r4.z) in0
+@in(r4.w) in1
+@in(r5.x) in2
+@in(r5.y) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r5.x) in8
-@in(r5.y) in9
-@in(r5.z) in10
-@in(r5.w) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,191 +31,144 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.w, c1.x
-mul.f r2.x, c8.x, r1.x
+mul.f r2.x, c8.x, r4.z
mov.f32f32 r2.y, c1.y
mov.f32f32 r2.z, c1.z
mul.f r2.w, r0.w, r0.x
mov.f32f32 r3.x, c2.x
-mad.f32 r2.x, c9.x, r1.y, r2.x
+mad.f32 r2.x, c9.x, r4.w, r2.x
mul.f r3.y, r0.w, r0.w
-mad.f32 r2.x, c10.x, r1.z, r2.x
+mad.f32 r2.x, c10.x, r5.x, r2.x
mad.f32 r2.w, r3.x, r0.y, r2.w
-mad.f32 r2.x, c11.x, r1.w, r2.x
-mad.f32 r3.x, r2.y, r2.y, r3.y
-mul.f r3.y, r2.y, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, c3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
+mov.f32f32 r3.x, c3.x
+mad.f32 r2.x, c11.x, r5.y, r2.x
+mad.f32 r3.y, r2.y, r2.y, r3.y
+mul.f r3.z, r2.y, r0.x
+mad.f32 r2.w, r3.x, r0.z, r2.w
+mov.f32f32 r3.x, r2.x
+mad.f32 r3.y, r2.z, r2.z, r3.y
mov.f32f32 r3.w, c2.y
-mad.f32 r2.w, r3.z, r0.z, r2.w
-mul.f r3.z, r2.x, r2.x
-mul.f r4.x, c8.y, r1.x
-mad.f32 r3.x, r2.z, r2.z, r3.x
-mov.f32f32 r4.y, r2.w
-mad.f32 r2.w, c9.y, r1.y, r4.x
-mad.f32 r3.y, r3.w, r0.y, r3.y
-mul.f r0.x, r2.z, r0.x
-mul.f r3.w, r4.y, r4.y
-mad.f32 r2.w, c10.y, r1.z, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, c3.y
-mad.f32 r2.w, c11.y, r1.w, r2.w
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.z, c2.z
-mad.f32 r3.y, r4.x, r0.z, r3.y
mov.f32f32 r4.x, r2.w
-mul.f r0.w, r0.w, r3.x
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.w, r4.x, r4.x, r3.z
+mul.f r3.x, r3.x, r3.x
+mul.f r4.y, c8.y, r4.z
+mad.f32 r3.z, r3.w, r0.y, r3.z
+mul.f r2.w, r2.w, r4.x
+mov.f32f32 r3.w, c3.y
+mad.f32 r4.y, c9.y, r4.w, r4.y
+rsq r3.y, r3.y
+(ss)mov.f32f32 r5.z, r3.y
+mad.f32 r4.y, c10.y, r5.x, r4.y
+mad.f32 r3.z, r3.w, r0.z, r3.z
+mad.f32 r3.w, c11.y, r5.y, r4.y
+mul.f r0.w, r0.w, r5.z
+mul.f r2.y, r2.y, r5.z
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r5.z, r3.w
+mul.f r5.w, c0.x, r0.w
mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r3.y, r3.y, r3.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, c8.z, r1.x
-mul.f r4.w, c0.x, r0.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r0.x, r4.z, r0.y, r0.x
-mad.f32 r0.y, c9.z, r1.y, r3.w
-mad.f32 r3.w, c0.y, r2.y, r4.w
-mad.f32 r0.y, c10.z, r1.z, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, c3.z
-mad.f32 r0.y, c11.z, r1.w, r0.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.z, r2.z, r3.x
-mad.f32 r0.x, r4.z, r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, c4.w, r1.x
-mul.f r3.x, c4.z, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.w, r0.y, r0.y, r2.w
+mad.f32 r2.w, r3.z, r4.y, r2.w
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r3.z, c2.z
+mad.f32 r3.x, r3.w, r5.z, r3.x
+mul.f r3.w, c8.z, r4.z
+mad.f32 r5.w, c0.y, r2.y, r5.w
+mad.f32 r0.x, r3.z, r0.y, r0.x
+mov.f32f32 r0.y, c3.z
+mad.f32 r3.z, c9.z, r4.w, r3.w
+mul.f r2.z, r2.z, r3.y
+(ss)mad.f32 r3.y, c10.z, r5.x, r3.z
+mad.f32 r0.x, r0.y, r0.z, r0.x
+mad.f32 r0.y, c11.z, r5.y, r3.y
+mad.f32 r3.y, c0.z, r2.z, r5.w
+mov.f32f32 r0.z, r2.y
+mov.f32f32 r2.y, r0.x
+mov.f32f32 r3.z, r0.y
+mov.f32f32 r3.w, r3.y
mov.f32f32 r2.z, r2.z
-mad.f32 r0.z, c5.w, r1.y, r0.z
-mad.f32 r3.z, r0.x, r0.x, r3.z
-mad.f32 r0.z, c6.w, r1.z, r0.z
-mad.f32 r3.x, c5.z, r1.y, r3.x
-mul.f r4.z, c4.y, r1.x
-mul.f r1.x, c4.x, r1.x
-mad.f32 r0.z, c7.w, r1.w, r0.z
-mad.f32 r3.x, c6.z, r1.z, r3.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-rsq r2.w, r2.w
-(ss)mov.f32f32 r4.w, r2.w
-mad.f32 r3.w, c0.z, r2.z, r3.w
-(ss)mov.f32f32 r2.w, r0.z
-mul.f r0.x, r0.x, r3.z
-mul.f r2.x, r2.x, r4.w
-mul.f r4.y, r4.y, r3.z
-mul.f r3.y, r3.y, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r3.y, r3.y
-mul.f r4.y, r0.x, r2.y
-absneg.f r2.x, (neg)r2.x
-mul.f r6.x, c0.x, r3.z
-mad.f32 r4.y, r3.y, r2.z, (neg)r4.y
-mad.f32 r6.x, c0.y, r3.y, r6.x
-mov.f32f32 r6.y, r2.x
-mul.f r2.x, r3.z, r2.z
-mov.f32f32 r2.z, r4.y
-mov.f32f32 r4.y, r6.x
-mul.f r6.x, r6.y, r6.y
-mul.f r4.x, r4.x, r4.w
-mul.f r2.z, c0.x, r2.z
-mad.f32 r2.x, r0.x, r0.w, (neg)r2.x
-mad.f32 r0.x, c0.z, r0.x, r4.y
+mad.f32 r2.y, r2.y, r2.y, r2.w
+mad.f32 r0.y, r0.y, r3.z, r3.x
+mul.f r3.x, r3.w, r3.w
+mul.f r2.w, c4.w, r4.z
+mul.f r3.w, c4.z, r4.z
+mul.f r5.w, c4.y, r4.z
+mul.f r4.z, c4.x, r4.z
+rsq r2.y, r2.y
+(ss)mov.f32f32 r6.x, r2.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r2.x, r0.y
+mul.f r0.x, r0.x, r2.y
+mul.f r2.x, r4.y, r6.x
+mul.f r2.y, r5.z, r6.y
+mul.f r4.x, r4.x, r6.x
+absneg.f r4.y, (neg)r0.y
+mov.f32f32 r0.y, r2.x
+mov.f32f32 r5.z, r0.x
+absneg.f r2.y, (neg)r2.y
+mul.f r6.x, c0.x, r4.x
mov.f32f32 r4.x, r4.x
-mul.f r0.w, r3.y, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-absneg.f r3.y, (neg)r4.x
-mad.f32 r0.w, r3.z, r2.y, (neg)r0.w
-mad.f32 r2.x, c0.y, r2.x, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.x, r3.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.w
-mul.f r0.y, r0.y, r4.w
-mad.f32 r0.w, c0.z, r0.w, r2.x
-mad.f32 r2.x, r4.x, r4.x, r6.x
-mul.f r2.y, r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c7.z, r1.w, r3.x
-mad.f32 r3.z, c5.y, r1.y, r4.z
-mov.f32f32 r3.w, r0.w
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r2.z, r3.x
-mad.f32 r0.w, c6.y, r1.z, r3.z
-mad.f32 r2.y, r3.w, r3.w, r2.y
-mov.f32f32 r3.z, r0.y
-mad.f32 r0.y, c7.y, r1.w, r0.w
-mad.f32 r0.w, c5.x, r1.y, r1.x
-mov.f32f32 r1.x, r2.y
-mad.f32 r1.y, r3.z, r3.z, r2.x
-mad.f32 r1.x, r0.x, r0.x, r1.x
+mul.f r6.z, r5.z, r0.z
+mov.f32f32 r6.w, r2.y
+mad.f32 r6.z, r0.y, r2.z, (neg)r6.z
+mov.f32f32 r7.x, r4.y
+mad.f32 r2.x, c0.y, r2.x, r6.x
+mul.f r2.z, r4.x, r2.z
+mul.f r6.x, c0.x, r6.z
+mad.f32 r2.z, r5.z, r0.w, (neg)r2.z
+mul.f r5.z, r7.x, r7.x
+mad.f32 r0.x, c0.z, r0.x, r2.x
+mad.f32 r2.x, r2.y, r6.w, r5.z
+mad.f32 r2.y, c0.y, r2.z, r6.x
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r3.z, r6.y
+mad.f32 r0.y, r4.x, r0.z, (neg)r0.y
+mov.f32f32 r0.z, r0.x
+mad.f32 r2.z, c5.w, r4.w, r2.w
+mad.f32 r2.w, c5.z, r4.w, r3.w
+mad.f32 r0.y, c0.z, r0.y, r2.y
+absneg.f r2.y, (neg)r0.w
+mov.f32f32 r3.z, r0.z
+mad.f32 r0.z, c6.w, r5.x, r2.z
+mov.f32f32 r2.z, r0.y
+mov.f32f32 r4.x, r2.y
+mad.f32 r0.w, c7.w, r5.y, r0.z
+mad.f32 r0.z, c6.z, r5.x, r2.w
+mov.f32f32 r5.z, r2.z
+mad.f32 r2.x, r2.y, r4.x, r2.x
+mov.f32f32 r2.w, r0.w
+mad.f32 r0.z, c7.z, r5.y, r0.z
+mad.f32 r0.y, r0.y, r5.z, r3.x
+mad.f32 r2.y, c5.y, r4.w, r5.w
+mad.f32 r0.x, r0.x, r3.z, r0.y
+mov.f32f32 r2.z, r0.z
+mad.f32 r0.y, c6.y, r5.x, r2.y
+mad.f32 r2.y, c5.x, r4.w, r4.z
+mad.f32 r0.y, c7.y, r5.y, r0.y
+mad.f32 r4.z, c6.x, r5.x, r2.y
+nop
+rsq r0.x, r0.x
+(ss)mov.f32f32 r2.y, r0.x
+mul.f r3.x, r3.y, r0.x
+(ss)rsq r0.x, r2.x
+(ss)mov.f32f32 r2.x, r0.x
+mul.f r3.w, r4.y, r0.x
+mul.f r3.z, r3.z, r2.y
+mul.f r3.y, r5.z, r2.y
+mul.f r4.y, r4.x, r2.x
+mul.f r4.x, r6.w, r2.x
mov.f32f32 r2.y, r0.y
-mad.f32 r1.z, c6.x, r1.z, r0.w
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c7.x, r1.w, r1.z
-rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, r0.x, r1.x
-mul.f r3.x, r3.w, r1.x
-mul.f r1.x, r3.y, r1.x
-mul.f r3.y, r3.z, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r3.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
+mad.f32 r0.x, c7.x, r5.y, r4.z
+mov.f32f32 r4.z, r4.y
+mov.f32f32 r4.w, (0.000000)
nop
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r1.x
-mov.f32f32 r0.x, r4.y
-mul.f r1.x, r4.x, r1.y
-mul.f r1.y, r6.y, r1.y
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r1.z
-mov.f32f32 r4.w, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r1.w, r5.w
-mov.f32f32 r1.z, r5.z
-mov.f32f32 r4.x, r1.x
-mov.f32f32 r1.y, r5.y
-mov.f32f32 r1.x, r5.x
+mov.f32f32 r2.x, r0.x
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0)
-; VERT: 183 instructions, 0 half, 7 full
+; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0)
+; VERT: 133 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-31.asm b/reference/stk-mines/stk-mines-31.asm
index 1435d60..141701a 100644
--- a/reference/stk-mines/stk-mines-31.asm
+++ b/reference/stk-mines/stk-mines-31.asm
@@ -6,275 +6,191 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000
+@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd
+@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
absneg.f r0.w, (neg)c6.x
-mov.f32f32 r1.x, c3.x
-bary.f r1.y, 1, r0.x
-add.f r1.z, r0.z, c3.x
-add.f r1.w, r0.z, c4.x
-add.f r2.x, c7.x, r0.w
-add.f r0.z, r0.z, r1.x
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, c9.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r1.x
-add.f r1.x, r1.y, c3.y
-mov.f32f32 r2.w, r1.z
-add.f r1.z, r1.y, c4.y
-rcp r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.x, r1.x
-bary.f r3.y, 6, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-add.f r0.w, r3.y, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r2.z, r1.x
-mul.f r0.z, r0.w, r1.w
-mov.f32f32 r3.x, r1.z
-add.f r0.w, r1.y, r2.x
-cmps.f.lt r1.x, c6.x, r3.y
-cmps.f.lt r1.y, c7.x, r3.y
-bary.f r1.z, 11, r0.x
-sam (f32)(xyz)r1.w, r2.y, s#0, t#0
-(sy)mad.f32 r2.x, c8.x, r2.x, c8.y
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam (f32)(xyz)r2.z, r2.w, s#1, t#1
-(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y
-mad.f32 r2.z, c8.x, r2.z, c8.y
-mov.f32f32 r2.x, r2.x
-mul.f r3.y, r0.z, c5.w
-mul.f r3.w, r0.z, c5.z
-mul.f r4.x, r0.z, c5.y
-mul.f r2.x, c8.z, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, c8.x, r1.w, c8.y
-mul.f r2.w, c8.z, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, r2.z, r2.z
-mov.f32f32 r4.x, r4.x
-mul.f r4.z, r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.z, r2.x, r2.x, r4.z
-mul.f r4.w, r0.z, c5.x
-add.f r0.z, c10.x, (neg)r0.z
-mad.f32 r3.w, r2.w, r2.w, r3.w
-mov.f32f32 r4.z, r4.z
+bary.f r1.x, 6, r0.x
+mov.f32f32 r1.y, c3.x
+add.f r1.z, r0.z, c4.x
+bary.f r2.x, 1, r0.x
+add.f r2.y, r0.z, c3.x
+add.f r2.w, c7.x, r0.w
+cmps.f.lt r3.x, c6.x, r1.x
+add.f r1.w, r2.x, c4.y
+add.f r2.z, r2.x, c3.y
+cmps.f.lt r3.y, c7.x, r1.x
+add.f r3.z, r0.z, r1.y
+cov.u32f32 r0.z, r3.x
+rcp r1.y, r2.w
+add.f r0.w, r1.x, r0.w
+cov.u32f32 r1.x, r3.y
+sam (f32)(xyz)r3.w, r1.z, s#1, t#1
+(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y
+sam (f32)(xyz)r2.y, r2.y, s#0, t#0
+(sy)mad.f32 r1.w, c8.x, r2.z, c8.y
+(ss)mul.f r0.w, r0.w, r1.y
+mad.f32 r1.y, c8.x, r3.w, c8.y
+mul.f r1.z, c8.z, r1.z
+mul.f r1.w, c8.z, r1.w
+(ss)mov.f32f32 r2.z, r0.w
+mov.f32f32 r3.x, r1.y
+mov.f32f32 r3.y, r1.z
+mov.f32f32 r4.x, r1.w
mad.f32 r2.y, c8.x, r2.y, c8.y
-mov.f32f32 r4.w, r4.w
-mad.f32 r3.x, c8.x, r3.x, c8.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r3.w
mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r4.z, r2.y, r2.y, r4.z
-cov.u32f32 r1.x, r1.x
-cov.u32f32 r1.y, r1.y
-(rpt3)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r3.y, r3.y
+mul.f r4.z, r2.z, c5.w
+mul.f r4.w, r2.z, c5.z
+mul.f r1.y, r1.y, r3.x
+mov.f32f32 r5.x, r2.y
+mad.f32 r1.y, r1.z, r3.y, r1.y
+mad.f32 r1.z, c8.x, r4.y, c8.y
+mul.f r4.y, r2.z, c5.y
+mul.f r2.z, r2.z, c5.x
+mul.f r2.y, r2.y, r5.x
+mov.f32f32 r3.w, r1.z
+mad.f32 r1.w, r1.w, r4.x, r2.y
+mad.f32 r2.y, c8.x, r2.w, c8.y
+add.f r0.w, c10.x, (neg)r0.w
+mov.f32f32 r2.w, r3.w
+cmps.f.ne r0.z, r0.z, c9.x
cmps.f.ne r1.x, r1.x, c9.x
-mul.f r1.w, r1.w, r4.z
-mul.f r2.x, r2.x, r4.z
-mul.f r2.y, r2.y, r4.z
-mad.f32 r4.z, r3.x, r3.x, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r0.w
-cmps.f.ne r0.w, r1.y, c9.x
-mov.f32f32 r1.y, r1.z
-rsq r1.z, r4.z
-(ss)mov.f32f32 r1.z, r1.z
-(ss)bary.f r4.z, 12, r0.x
-bary.f r5.x, 13, r0.x
+mov.f32f32 r3.w, c9.x
+mad.f32 r1.y, r2.w, r2.w, r1.y
+mov.f32f32 r2.w, r2.y
mov.f32f32 r5.y, c9.x
-mad.f32 r1.w, r2.z, r1.z, r1.w
-mad.f32 r2.x, r2.w, r1.z, r2.x
-mad.f32 r1.z, r3.x, r1.z, r2.y
-sam (f32)(xyzw)r2.y, r3.z, s#2, t#2
-(ss)mov.f32f32 r3.z, r4.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r5.x
-mul.f r1.w, r1.w, c8.w
-mul.f r2.x, r2.x, c8.w
-mul.f r1.z, r1.z, c8.w
-nop
-mov.f32f32 r1.w, r1.w
-bary.f r4.z, 8, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.x, c9.x
-mul.f r5.z, r1.w, r4.z
-bary.f r5.w, 9, r0.x
-mul.f r6.x, r4.z, r1.w
-mov.f32f32 r6.y, c5.w
-mov.f32f32 r6.z, c5.z
-mad.f32 r5.z, r2.x, r5.w, r5.z
-mad.f32 r6.x, r5.w, r2.x, r6.x
-mov.f32f32 r6.w, c5.y
+mov.f32f32 r5.z, c9.x
+mov.f32f32 r5.w, c5.w
+mov.f32f32 r6.x, c5.z
+mov.f32f32 r6.y, c5.y
+rsq r1.y, r1.y
+(ss)mov.f32f32 r6.z, r1.y
+mad.f32 r1.w, r2.w, r2.w, r1.w
+add.f r3.w, r2.x, r3.w
+bary.f r2.x, 11, r0.x
+bary.f r2.w, 12, r0.x
+bary.f r6.w, 13, r0.x
+bary.f r7.x, 8, r0.x
+bary.f r7.y, 9, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+(ss)mul.f r1.w, r2.y, r1.w
+sam (f32)(xyzw)r7.w, r3.z, s#2, t#2
bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r5.z
-mov.f32f32 r5.z, r6.x
-mov.f32f32 r6.x, c5.x
-mad.f32 r0.y, r1.z, r0.x, r0.y
-mad.f32 r5.z, r0.x, r1.z, r5.z
-(rpt1)nop
-mul.f r1.w, r0.y, r1.w
-max.f r5.z, r5.z, c9.x
-mul.f r2.x, r0.y, r2.x
-mul.f r0.y, r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r5.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, c8.x, r1.z
-mad.f32 r1.w, c9.y, r1.w, c9.z
-mul.f r2.x, c8.x, r2.x
-mul.f r0.y, c8.x, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-add.f r1.z, r4.z, (neg)r1.z
-(sy)mul.f r3.x, r3.x, r1.w
-mul.f r2.w, r2.w, r1.w
-mul.f r2.z, r2.z, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r7.x, r2.z
-mul.f r7.y, r1.z, r1.z
-add.f r2.x, r5.w, (neg)r2.x
-add.f r4.z, r4.z, r5.x
-add.f r0.x, r0.x, (neg)r0.y
-mul.f r0.y, r2.y, r1.w
-mov.f32f32 r1.w, r2.x
-add.f r2.x, r4.z, r5.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r4.z, r1.w, r1.w, r7.y
+mov.f32f32 r0.y, c5.x
+mul.f r2.y, r5.x, r7.z
+(ss)mul.f r3.z, r4.x, r7.z
+mad.f32 r2.y, r3.x, r6.z, r2.y
+mad.f32 r3.x, r3.y, r6.z, r3.z
+mad.f32 r1.y, r1.z, r1.y, r1.w
+nop
+mul.f r1.z, r2.y, c8.w
+mul.f r1.w, r3.x, c8.w
+mul.f r1.y, r1.y, c8.w
+nop
+mov.f32f32 r2.y, r1.z
+mul.f r1.z, r7.x, r1.z
+mov.f32f32 r3.x, r1.w
+mov.f32f32 r3.y, r1.y
+mul.f r3.z, r2.y, r7.x
+mad.f32 r1.z, r7.y, r1.w, r1.z
+mad.f32 r1.w, r3.x, r7.y, r3.z
+mad.f32 r1.y, r0.x, r1.y, r1.z
+mad.f32 r1.z, r3.y, r0.x, r1.w
(rpt2)nop
-mov.f32f32 r4.z, r4.z
-nop
-mad.f32 r4.z, r0.x, r0.x, r4.z
-(rpt5)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
+mul.f r1.w, r1.z, r2.y
+max.f r1.y, r1.y, c9.x
+mul.f r2.y, r1.z, r3.x
+mul.f r1.z, r1.z, r3.y
+mul.f r1.w, c8.x, r1.w
+mad.f32 r1.y, c9.y, r1.y, c9.z
+mul.f r2.y, c8.x, r2.y
+mul.f r1.z, c8.x, r1.z
+add.f r1.w, r7.x, (neg)r1.w
+mov.f32f32 r3.x, r1.y
+add.f r2.y, r7.y, (neg)r2.y
+add.f r0.x, r0.x, (neg)r1.z
+mov.f32f32 r1.z, r1.w
+(sy)mul.f r3.y, r8.z, r3.x
+mov.f32f32 r3.z, r2.y
+mov.f32f32 r3.w, r0.x
+mul.f r1.w, r1.w, r1.z
+add.f r4.x, r3.y, r5.z
+mad.f32 r1.w, r2.y, r3.z, r1.w
+mul.f r2.y, r8.y, r3.x
+mad.f32 r1.w, r3.w, r3.w, r1.w
+add.f r3.w, r4.x, r5.y
+mul.f r3.x, r8.x, r3.x
+mul.f r1.y, r7.w, r1.y
(rpt2)nop
-mul.f r1.z, r1.z, r4.z
-mul.f r1.w, r1.w, r4.z
-mul.f r0.x, r0.x, r4.z
-nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r4.x, r1.w
+mul.f r0.x, r0.x, r1.w
+(rpt1)nop
+mul.f r1.z, r1.z, r4.x
+(ss)mul.f r1.w, r3.z, r4.x
+(rpt1)nop
+mul.f r1.z, r1.z, r2.x
nop
-mul.f r1.y, r1.z, r1.y
+mad.f32 r1.z, r1.w, r2.w, r1.z
nop
-mad.f32 r1.y, r1.w, r3.z, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-nop
-mad.f32 r0.x, r0.x, r3.w, r1.y
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.x, r6.w, r1.z
(rpt2)nop
max.f r0.x, r0.x, c9.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
mov.f32f32 r1.z, r0.x
-cmps.f.lt r0.x, c9.x, r0.x
-(rpt1)nop
-mul.f r1.y, r1.y, r1.z
-cov.u32f32 r0.x, r0.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.z
+cmps.f.lt r1.z, c9.x, r1.z
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-cmps.f.ne r0.x, r0.x, c9.x
+mov.f32f32 r1.w, r0.x
+cov.u32f32 r1.z, r1.z
(rpt1)nop
-mul.f r1.y, r1.y, r1.y
-sel.b32 r1.z, r2.x, r0.x, r3.x
+mul.f r0.x, r0.x, r1.w
+cmps.f.ne r1.z, r1.z, c9.x
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.z
+mov.f32f32 r1.w, r0.x
+sel.b32 r2.x, r3.w, r1.z, r3.y
+mul.f r0.x, r0.x, c9.w
+nop
+mul.f r1.w, r1.w, r1.w
+mad.f32 r2.w, r0.w, r2.x, r4.z
+add.f r3.y, r2.y, r0.x
+add.f r3.z, r3.x, r0.x
+mul.f r1.w, r1.w, c9.z
+add.f r0.x, r1.y, r0.x
+sel.b32 r2.x, r2.w, r0.z, r2.x
+nop
+mov.f32f32 r2.w, r1.w
+add.f r0.x, r0.x, r1.w
+sel.b32 r1.w, r5.w, r1.x, r2.x
+nop
+add.f r2.x, r3.y, r2.w
+add.f r2.w, r3.z, r2.w
+sel.b32 r0.x, r0.x, r1.z, r1.y
+nop
+sel.b32 r1.y, r2.x, r1.z, r2.y
+sel.b32 r1.z, r2.w, r1.z, r3.x
+mad.f32 r2.x, r0.w, r0.x, r2.z
+nop
+mad.f32 r2.y, r0.w, r1.y, r4.w
+mad.f32 r0.w, r0.w, r1.z, r4.y
+sel.b32 r0.x, r2.x, r0.z, r0.x
+nop
+sel.b32 r1.y, r2.y, r0.z, r1.y
+sel.b32 r0.z, r0.w, r0.z, r1.z
(rpt1)nop
-mul.f r2.x, r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-mul.f r1.y, r1.y, c9.w
-nop
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, r0.z, r1.w, r3.y
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r3.y, r1.y
-mul.f r2.x, r2.x, c9.z
-sel.b32 r1.z, r1.w, r1.x, r1.z
-add.f r1.w, r5.z, r3.x
-add.f r3.x, r7.x, r3.y
-mov.f32f32 r2.x, r2.x
-sel.b32 r1.z, r6.y, r0.w, r1.z
-mov.f32f32 r1.y, r1.y
-nop
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, r2.x
-add.f r1.y, r2.y, r1.y
-add.f r2.y, r1.w, r3.y
-add.f r3.x, r3.x, r3.z
-mov.f32f32 r1.w, r1.z
-nop
-sel.b32 r1.z, r2.y, r0.x, r2.w
-sel.b32 r2.y, r3.x, r0.x, r2.z
-add.f r1.y, r1.y, r2.x
-nop
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r2.z, r2.y
-sel.b32 r0.x, r1.y, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r2.x
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r2.x, r0.x
-nop
-mad.f32 r0.y, r0.z, r0.y, r4.y
-mad.f32 r1.y, r0.z, r1.y, r4.x
-mov.f32f32 r2.x, r2.x
-nop
-sel.b32 r0.y, r0.y, r1.x, r1.z
-sel.b32 r1.y, r1.y, r1.x, r2.y
-mad.f32 r0.z, r0.z, r2.x, r4.w
+sel.b32 r1.z, r6.x, r1.x, r1.y
+sel.b32 r1.y, r6.y, r1.x, r0.z
+sel.b32 r1.x, r0.y, r1.x, r0.x
+end
nop
-sel.b32 r0.y, r6.z, r0.w, r0.y
-sel.b32 r1.y, r6.w, r0.w, r1.y
-sel.b32 r0.x, r0.z, r1.x, r0.x
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r6.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
-end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
-; FRAG: 297 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
+; FRAG: 193 instructions, 0 half, 9 full
diff --git a/reference/stk-mines/stk-mines-32.asm b/reference/stk-mines/stk-mines-32.asm
index 2d84780..726bef5 100644
--- a/reference/stk-mines/stk-mines-32.asm
+++ b/reference/stk-mines/stk-mines-32.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r4.z) in0
+@in(r4.w) in1
+@in(r5.x) in2
+@in(r5.y) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r5.x) in8
-@in(r5.y) in9
-@in(r5.z) in10
-@in(r5.w) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,191 +31,144 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.w, c1.x
-mul.f r2.x, c8.x, r1.x
+mul.f r2.x, c8.x, r4.z
mov.f32f32 r2.y, c1.y
mov.f32f32 r2.z, c1.z
mul.f r2.w, r0.w, r0.x
mov.f32f32 r3.x, c2.x
-mad.f32 r2.x, c9.x, r1.y, r2.x
+mad.f32 r2.x, c9.x, r4.w, r2.x
mul.f r3.y, r0.w, r0.w
-mad.f32 r2.x, c10.x, r1.z, r2.x
+mad.f32 r2.x, c10.x, r5.x, r2.x
mad.f32 r2.w, r3.x, r0.y, r2.w
-mad.f32 r2.x, c11.x, r1.w, r2.x
-mad.f32 r3.x, r2.y, r2.y, r3.y
-mul.f r3.y, r2.y, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, c3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
+mov.f32f32 r3.x, c3.x
+mad.f32 r2.x, c11.x, r5.y, r2.x
+mad.f32 r3.y, r2.y, r2.y, r3.y
+mul.f r3.z, r2.y, r0.x
+mad.f32 r2.w, r3.x, r0.z, r2.w
+mov.f32f32 r3.x, r2.x
+mad.f32 r3.y, r2.z, r2.z, r3.y
mov.f32f32 r3.w, c2.y
-mad.f32 r2.w, r3.z, r0.z, r2.w
-mul.f r3.z, r2.x, r2.x
-mul.f r4.x, c8.y, r1.x
-mad.f32 r3.x, r2.z, r2.z, r3.x
-mov.f32f32 r4.y, r2.w
-mad.f32 r2.w, c9.y, r1.y, r4.x
-mad.f32 r3.y, r3.w, r0.y, r3.y
-mul.f r0.x, r2.z, r0.x
-mul.f r3.w, r4.y, r4.y
-mad.f32 r2.w, c10.y, r1.z, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, c3.y
-mad.f32 r2.w, c11.y, r1.w, r2.w
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.z, c2.z
-mad.f32 r3.y, r4.x, r0.z, r3.y
mov.f32f32 r4.x, r2.w
-mul.f r0.w, r0.w, r3.x
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.w, r4.x, r4.x, r3.z
+mul.f r3.x, r3.x, r3.x
+mul.f r4.y, c8.y, r4.z
+mad.f32 r3.z, r3.w, r0.y, r3.z
+mul.f r2.w, r2.w, r4.x
+mov.f32f32 r3.w, c3.y
+mad.f32 r4.y, c9.y, r4.w, r4.y
+rsq r3.y, r3.y
+(ss)mov.f32f32 r5.z, r3.y
+mad.f32 r4.y, c10.y, r5.x, r4.y
+mad.f32 r3.z, r3.w, r0.z, r3.z
+mad.f32 r3.w, c11.y, r5.y, r4.y
+mul.f r0.w, r0.w, r5.z
+mul.f r2.y, r2.y, r5.z
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r5.z, r3.w
+mul.f r5.w, c0.x, r0.w
mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r3.y, r3.y, r3.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, c8.z, r1.x
-mul.f r4.w, c0.x, r0.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r0.x, r4.z, r0.y, r0.x
-mad.f32 r0.y, c9.z, r1.y, r3.w
-mad.f32 r3.w, c0.y, r2.y, r4.w
-mad.f32 r0.y, c10.z, r1.z, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, c3.z
-mad.f32 r0.y, c11.z, r1.w, r0.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.z, r2.z, r3.x
-mad.f32 r0.x, r4.z, r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, c4.w, r1.x
-mul.f r3.x, c4.z, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.w, r0.y, r0.y, r2.w
+mad.f32 r2.w, r3.z, r4.y, r2.w
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r3.z, c2.z
+mad.f32 r3.x, r3.w, r5.z, r3.x
+mul.f r3.w, c8.z, r4.z
+mad.f32 r5.w, c0.y, r2.y, r5.w
+mad.f32 r0.x, r3.z, r0.y, r0.x
+mov.f32f32 r0.y, c3.z
+mad.f32 r3.z, c9.z, r4.w, r3.w
+mul.f r2.z, r2.z, r3.y
+(ss)mad.f32 r3.y, c10.z, r5.x, r3.z
+mad.f32 r0.x, r0.y, r0.z, r0.x
+mad.f32 r0.y, c11.z, r5.y, r3.y
+mad.f32 r3.y, c0.z, r2.z, r5.w
+mov.f32f32 r0.z, r2.y
+mov.f32f32 r2.y, r0.x
+mov.f32f32 r3.z, r0.y
+mov.f32f32 r3.w, r3.y
mov.f32f32 r2.z, r2.z
-mad.f32 r0.z, c5.w, r1.y, r0.z
-mad.f32 r3.z, r0.x, r0.x, r3.z
-mad.f32 r0.z, c6.w, r1.z, r0.z
-mad.f32 r3.x, c5.z, r1.y, r3.x
-mul.f r4.z, c4.y, r1.x
-mul.f r1.x, c4.x, r1.x
-mad.f32 r0.z, c7.w, r1.w, r0.z
-mad.f32 r3.x, c6.z, r1.z, r3.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-rsq r2.w, r2.w
-(ss)mov.f32f32 r4.w, r2.w
-mad.f32 r3.w, c0.z, r2.z, r3.w
-(ss)mov.f32f32 r2.w, r0.z
-mul.f r0.x, r0.x, r3.z
-mul.f r2.x, r2.x, r4.w
-mul.f r4.y, r4.y, r3.z
-mul.f r3.y, r3.y, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r3.y, r3.y
-mul.f r4.y, r0.x, r2.y
-absneg.f r2.x, (neg)r2.x
-mul.f r6.x, c0.x, r3.z
-mad.f32 r4.y, r3.y, r2.z, (neg)r4.y
-mad.f32 r6.x, c0.y, r3.y, r6.x
-mov.f32f32 r6.y, r2.x
-mul.f r2.x, r3.z, r2.z
-mov.f32f32 r2.z, r4.y
-mov.f32f32 r4.y, r6.x
-mul.f r6.x, r6.y, r6.y
-mul.f r4.x, r4.x, r4.w
-mul.f r2.z, c0.x, r2.z
-mad.f32 r2.x, r0.x, r0.w, (neg)r2.x
-mad.f32 r0.x, c0.z, r0.x, r4.y
+mad.f32 r2.y, r2.y, r2.y, r2.w
+mad.f32 r0.y, r0.y, r3.z, r3.x
+mul.f r3.x, r3.w, r3.w
+mul.f r2.w, c4.w, r4.z
+mul.f r3.w, c4.z, r4.z
+mul.f r5.w, c4.y, r4.z
+mul.f r4.z, c4.x, r4.z
+rsq r2.y, r2.y
+(ss)mov.f32f32 r6.x, r2.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r2.x, r0.y
+mul.f r0.x, r0.x, r2.y
+mul.f r2.x, r4.y, r6.x
+mul.f r2.y, r5.z, r6.y
+mul.f r4.x, r4.x, r6.x
+absneg.f r4.y, (neg)r0.y
+mov.f32f32 r0.y, r2.x
+mov.f32f32 r5.z, r0.x
+absneg.f r2.y, (neg)r2.y
+mul.f r6.x, c0.x, r4.x
mov.f32f32 r4.x, r4.x
-mul.f r0.w, r3.y, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-absneg.f r3.y, (neg)r4.x
-mad.f32 r0.w, r3.z, r2.y, (neg)r0.w
-mad.f32 r2.x, c0.y, r2.x, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.x, r3.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.w
-mul.f r0.y, r0.y, r4.w
-mad.f32 r0.w, c0.z, r0.w, r2.x
-mad.f32 r2.x, r4.x, r4.x, r6.x
-mul.f r2.y, r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c7.z, r1.w, r3.x
-mad.f32 r3.z, c5.y, r1.y, r4.z
-mov.f32f32 r3.w, r0.w
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r2.z, r3.x
-mad.f32 r0.w, c6.y, r1.z, r3.z
-mad.f32 r2.y, r3.w, r3.w, r2.y
-mov.f32f32 r3.z, r0.y
-mad.f32 r0.y, c7.y, r1.w, r0.w
-mad.f32 r0.w, c5.x, r1.y, r1.x
-mov.f32f32 r1.x, r2.y
-mad.f32 r1.y, r3.z, r3.z, r2.x
-mad.f32 r1.x, r0.x, r0.x, r1.x
+mul.f r6.z, r5.z, r0.z
+mov.f32f32 r6.w, r2.y
+mad.f32 r6.z, r0.y, r2.z, (neg)r6.z
+mov.f32f32 r7.x, r4.y
+mad.f32 r2.x, c0.y, r2.x, r6.x
+mul.f r2.z, r4.x, r2.z
+mul.f r6.x, c0.x, r6.z
+mad.f32 r2.z, r5.z, r0.w, (neg)r2.z
+mul.f r5.z, r7.x, r7.x
+mad.f32 r0.x, c0.z, r0.x, r2.x
+mad.f32 r2.x, r2.y, r6.w, r5.z
+mad.f32 r2.y, c0.y, r2.z, r6.x
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r3.z, r6.y
+mad.f32 r0.y, r4.x, r0.z, (neg)r0.y
+mov.f32f32 r0.z, r0.x
+mad.f32 r2.z, c5.w, r4.w, r2.w
+mad.f32 r2.w, c5.z, r4.w, r3.w
+mad.f32 r0.y, c0.z, r0.y, r2.y
+absneg.f r2.y, (neg)r0.w
+mov.f32f32 r3.z, r0.z
+mad.f32 r0.z, c6.w, r5.x, r2.z
+mov.f32f32 r2.z, r0.y
+mov.f32f32 r4.x, r2.y
+mad.f32 r0.w, c7.w, r5.y, r0.z
+mad.f32 r0.z, c6.z, r5.x, r2.w
+mov.f32f32 r5.z, r2.z
+mad.f32 r2.x, r2.y, r4.x, r2.x
+mov.f32f32 r2.w, r0.w
+mad.f32 r0.z, c7.z, r5.y, r0.z
+mad.f32 r0.y, r0.y, r5.z, r3.x
+mad.f32 r2.y, c5.y, r4.w, r5.w
+mad.f32 r0.x, r0.x, r3.z, r0.y
+mov.f32f32 r2.z, r0.z
+mad.f32 r0.y, c6.y, r5.x, r2.y
+mad.f32 r2.y, c5.x, r4.w, r4.z
+mad.f32 r0.y, c7.y, r5.y, r0.y
+mad.f32 r4.z, c6.x, r5.x, r2.y
+nop
+rsq r0.x, r0.x
+(ss)mov.f32f32 r2.y, r0.x
+mul.f r3.x, r3.y, r0.x
+(ss)rsq r0.x, r2.x
+(ss)mov.f32f32 r2.x, r0.x
+mul.f r3.w, r4.y, r0.x
+mul.f r3.z, r3.z, r2.y
+mul.f r3.y, r5.z, r2.y
+mul.f r4.y, r4.x, r2.x
+mul.f r4.x, r6.w, r2.x
mov.f32f32 r2.y, r0.y
-mad.f32 r1.z, c6.x, r1.z, r0.w
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c7.x, r1.w, r1.z
-rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, r0.x, r1.x
-mul.f r3.x, r3.w, r1.x
-mul.f r1.x, r3.y, r1.x
-mul.f r3.y, r3.z, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r3.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
+mad.f32 r0.x, c7.x, r5.y, r4.z
+mov.f32f32 r4.z, r4.y
+mov.f32f32 r4.w, (0.000000)
nop
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r1.x
-mov.f32f32 r0.x, r4.y
-mul.f r1.x, r4.x, r1.y
-mul.f r1.y, r6.y, r1.y
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r1.z
-mov.f32f32 r4.w, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r1.w, r5.w
-mov.f32f32 r1.z, r5.z
-mov.f32f32 r4.x, r1.x
-mov.f32f32 r1.y, r5.y
-mov.f32f32 r1.x, r5.x
+mov.f32f32 r2.x, r0.x
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0)
-; VERT: 183 instructions, 0 half, 7 full
+; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0)
+; VERT: 133 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-33.asm b/reference/stk-mines/stk-mines-33.asm
index 1435d60..141701a 100644
--- a/reference/stk-mines/stk-mines-33.asm
+++ b/reference/stk-mines/stk-mines-33.asm
@@ -6,275 +6,191 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000
+@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd
+@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
absneg.f r0.w, (neg)c6.x
-mov.f32f32 r1.x, c3.x
-bary.f r1.y, 1, r0.x
-add.f r1.z, r0.z, c3.x
-add.f r1.w, r0.z, c4.x
-add.f r2.x, c7.x, r0.w
-add.f r0.z, r0.z, r1.x
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, c9.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r1.x
-add.f r1.x, r1.y, c3.y
-mov.f32f32 r2.w, r1.z
-add.f r1.z, r1.y, c4.y
-rcp r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.x, r1.x
-bary.f r3.y, 6, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-add.f r0.w, r3.y, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r2.z, r1.x
-mul.f r0.z, r0.w, r1.w
-mov.f32f32 r3.x, r1.z
-add.f r0.w, r1.y, r2.x
-cmps.f.lt r1.x, c6.x, r3.y
-cmps.f.lt r1.y, c7.x, r3.y
-bary.f r1.z, 11, r0.x
-sam (f32)(xyz)r1.w, r2.y, s#0, t#0
-(sy)mad.f32 r2.x, c8.x, r2.x, c8.y
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam (f32)(xyz)r2.z, r2.w, s#1, t#1
-(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y
-mad.f32 r2.z, c8.x, r2.z, c8.y
-mov.f32f32 r2.x, r2.x
-mul.f r3.y, r0.z, c5.w
-mul.f r3.w, r0.z, c5.z
-mul.f r4.x, r0.z, c5.y
-mul.f r2.x, c8.z, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, c8.x, r1.w, c8.y
-mul.f r2.w, c8.z, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, r2.z, r2.z
-mov.f32f32 r4.x, r4.x
-mul.f r4.z, r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.z, r2.x, r2.x, r4.z
-mul.f r4.w, r0.z, c5.x
-add.f r0.z, c10.x, (neg)r0.z
-mad.f32 r3.w, r2.w, r2.w, r3.w
-mov.f32f32 r4.z, r4.z
+bary.f r1.x, 6, r0.x
+mov.f32f32 r1.y, c3.x
+add.f r1.z, r0.z, c4.x
+bary.f r2.x, 1, r0.x
+add.f r2.y, r0.z, c3.x
+add.f r2.w, c7.x, r0.w
+cmps.f.lt r3.x, c6.x, r1.x
+add.f r1.w, r2.x, c4.y
+add.f r2.z, r2.x, c3.y
+cmps.f.lt r3.y, c7.x, r1.x
+add.f r3.z, r0.z, r1.y
+cov.u32f32 r0.z, r3.x
+rcp r1.y, r2.w
+add.f r0.w, r1.x, r0.w
+cov.u32f32 r1.x, r3.y
+sam (f32)(xyz)r3.w, r1.z, s#1, t#1
+(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y
+sam (f32)(xyz)r2.y, r2.y, s#0, t#0
+(sy)mad.f32 r1.w, c8.x, r2.z, c8.y
+(ss)mul.f r0.w, r0.w, r1.y
+mad.f32 r1.y, c8.x, r3.w, c8.y
+mul.f r1.z, c8.z, r1.z
+mul.f r1.w, c8.z, r1.w
+(ss)mov.f32f32 r2.z, r0.w
+mov.f32f32 r3.x, r1.y
+mov.f32f32 r3.y, r1.z
+mov.f32f32 r4.x, r1.w
mad.f32 r2.y, c8.x, r2.y, c8.y
-mov.f32f32 r4.w, r4.w
-mad.f32 r3.x, c8.x, r3.x, c8.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r3.w
mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r4.z, r2.y, r2.y, r4.z
-cov.u32f32 r1.x, r1.x
-cov.u32f32 r1.y, r1.y
-(rpt3)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r3.y, r3.y
+mul.f r4.z, r2.z, c5.w
+mul.f r4.w, r2.z, c5.z
+mul.f r1.y, r1.y, r3.x
+mov.f32f32 r5.x, r2.y
+mad.f32 r1.y, r1.z, r3.y, r1.y
+mad.f32 r1.z, c8.x, r4.y, c8.y
+mul.f r4.y, r2.z, c5.y
+mul.f r2.z, r2.z, c5.x
+mul.f r2.y, r2.y, r5.x
+mov.f32f32 r3.w, r1.z
+mad.f32 r1.w, r1.w, r4.x, r2.y
+mad.f32 r2.y, c8.x, r2.w, c8.y
+add.f r0.w, c10.x, (neg)r0.w
+mov.f32f32 r2.w, r3.w
+cmps.f.ne r0.z, r0.z, c9.x
cmps.f.ne r1.x, r1.x, c9.x
-mul.f r1.w, r1.w, r4.z
-mul.f r2.x, r2.x, r4.z
-mul.f r2.y, r2.y, r4.z
-mad.f32 r4.z, r3.x, r3.x, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r0.w
-cmps.f.ne r0.w, r1.y, c9.x
-mov.f32f32 r1.y, r1.z
-rsq r1.z, r4.z
-(ss)mov.f32f32 r1.z, r1.z
-(ss)bary.f r4.z, 12, r0.x
-bary.f r5.x, 13, r0.x
+mov.f32f32 r3.w, c9.x
+mad.f32 r1.y, r2.w, r2.w, r1.y
+mov.f32f32 r2.w, r2.y
mov.f32f32 r5.y, c9.x
-mad.f32 r1.w, r2.z, r1.z, r1.w
-mad.f32 r2.x, r2.w, r1.z, r2.x
-mad.f32 r1.z, r3.x, r1.z, r2.y
-sam (f32)(xyzw)r2.y, r3.z, s#2, t#2
-(ss)mov.f32f32 r3.z, r4.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r5.x
-mul.f r1.w, r1.w, c8.w
-mul.f r2.x, r2.x, c8.w
-mul.f r1.z, r1.z, c8.w
-nop
-mov.f32f32 r1.w, r1.w
-bary.f r4.z, 8, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.x, c9.x
-mul.f r5.z, r1.w, r4.z
-bary.f r5.w, 9, r0.x
-mul.f r6.x, r4.z, r1.w
-mov.f32f32 r6.y, c5.w
-mov.f32f32 r6.z, c5.z
-mad.f32 r5.z, r2.x, r5.w, r5.z
-mad.f32 r6.x, r5.w, r2.x, r6.x
-mov.f32f32 r6.w, c5.y
+mov.f32f32 r5.z, c9.x
+mov.f32f32 r5.w, c5.w
+mov.f32f32 r6.x, c5.z
+mov.f32f32 r6.y, c5.y
+rsq r1.y, r1.y
+(ss)mov.f32f32 r6.z, r1.y
+mad.f32 r1.w, r2.w, r2.w, r1.w
+add.f r3.w, r2.x, r3.w
+bary.f r2.x, 11, r0.x
+bary.f r2.w, 12, r0.x
+bary.f r6.w, 13, r0.x
+bary.f r7.x, 8, r0.x
+bary.f r7.y, 9, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+(ss)mul.f r1.w, r2.y, r1.w
+sam (f32)(xyzw)r7.w, r3.z, s#2, t#2
bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r5.z
-mov.f32f32 r5.z, r6.x
-mov.f32f32 r6.x, c5.x
-mad.f32 r0.y, r1.z, r0.x, r0.y
-mad.f32 r5.z, r0.x, r1.z, r5.z
-(rpt1)nop
-mul.f r1.w, r0.y, r1.w
-max.f r5.z, r5.z, c9.x
-mul.f r2.x, r0.y, r2.x
-mul.f r0.y, r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r5.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, c8.x, r1.z
-mad.f32 r1.w, c9.y, r1.w, c9.z
-mul.f r2.x, c8.x, r2.x
-mul.f r0.y, c8.x, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-add.f r1.z, r4.z, (neg)r1.z
-(sy)mul.f r3.x, r3.x, r1.w
-mul.f r2.w, r2.w, r1.w
-mul.f r2.z, r2.z, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r7.x, r2.z
-mul.f r7.y, r1.z, r1.z
-add.f r2.x, r5.w, (neg)r2.x
-add.f r4.z, r4.z, r5.x
-add.f r0.x, r0.x, (neg)r0.y
-mul.f r0.y, r2.y, r1.w
-mov.f32f32 r1.w, r2.x
-add.f r2.x, r4.z, r5.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r4.z, r1.w, r1.w, r7.y
+mov.f32f32 r0.y, c5.x
+mul.f r2.y, r5.x, r7.z
+(ss)mul.f r3.z, r4.x, r7.z
+mad.f32 r2.y, r3.x, r6.z, r2.y
+mad.f32 r3.x, r3.y, r6.z, r3.z
+mad.f32 r1.y, r1.z, r1.y, r1.w
+nop
+mul.f r1.z, r2.y, c8.w
+mul.f r1.w, r3.x, c8.w
+mul.f r1.y, r1.y, c8.w
+nop
+mov.f32f32 r2.y, r1.z
+mul.f r1.z, r7.x, r1.z
+mov.f32f32 r3.x, r1.w
+mov.f32f32 r3.y, r1.y
+mul.f r3.z, r2.y, r7.x
+mad.f32 r1.z, r7.y, r1.w, r1.z
+mad.f32 r1.w, r3.x, r7.y, r3.z
+mad.f32 r1.y, r0.x, r1.y, r1.z
+mad.f32 r1.z, r3.y, r0.x, r1.w
(rpt2)nop
-mov.f32f32 r4.z, r4.z
-nop
-mad.f32 r4.z, r0.x, r0.x, r4.z
-(rpt5)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
+mul.f r1.w, r1.z, r2.y
+max.f r1.y, r1.y, c9.x
+mul.f r2.y, r1.z, r3.x
+mul.f r1.z, r1.z, r3.y
+mul.f r1.w, c8.x, r1.w
+mad.f32 r1.y, c9.y, r1.y, c9.z
+mul.f r2.y, c8.x, r2.y
+mul.f r1.z, c8.x, r1.z
+add.f r1.w, r7.x, (neg)r1.w
+mov.f32f32 r3.x, r1.y
+add.f r2.y, r7.y, (neg)r2.y
+add.f r0.x, r0.x, (neg)r1.z
+mov.f32f32 r1.z, r1.w
+(sy)mul.f r3.y, r8.z, r3.x
+mov.f32f32 r3.z, r2.y
+mov.f32f32 r3.w, r0.x
+mul.f r1.w, r1.w, r1.z
+add.f r4.x, r3.y, r5.z
+mad.f32 r1.w, r2.y, r3.z, r1.w
+mul.f r2.y, r8.y, r3.x
+mad.f32 r1.w, r3.w, r3.w, r1.w
+add.f r3.w, r4.x, r5.y
+mul.f r3.x, r8.x, r3.x
+mul.f r1.y, r7.w, r1.y
(rpt2)nop
-mul.f r1.z, r1.z, r4.z
-mul.f r1.w, r1.w, r4.z
-mul.f r0.x, r0.x, r4.z
-nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r4.x, r1.w
+mul.f r0.x, r0.x, r1.w
+(rpt1)nop
+mul.f r1.z, r1.z, r4.x
+(ss)mul.f r1.w, r3.z, r4.x
+(rpt1)nop
+mul.f r1.z, r1.z, r2.x
nop
-mul.f r1.y, r1.z, r1.y
+mad.f32 r1.z, r1.w, r2.w, r1.z
nop
-mad.f32 r1.y, r1.w, r3.z, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-nop
-mad.f32 r0.x, r0.x, r3.w, r1.y
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.x, r6.w, r1.z
(rpt2)nop
max.f r0.x, r0.x, c9.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
mov.f32f32 r1.z, r0.x
-cmps.f.lt r0.x, c9.x, r0.x
-(rpt1)nop
-mul.f r1.y, r1.y, r1.z
-cov.u32f32 r0.x, r0.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.z
+cmps.f.lt r1.z, c9.x, r1.z
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-cmps.f.ne r0.x, r0.x, c9.x
+mov.f32f32 r1.w, r0.x
+cov.u32f32 r1.z, r1.z
(rpt1)nop
-mul.f r1.y, r1.y, r1.y
-sel.b32 r1.z, r2.x, r0.x, r3.x
+mul.f r0.x, r0.x, r1.w
+cmps.f.ne r1.z, r1.z, c9.x
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.z
+mov.f32f32 r1.w, r0.x
+sel.b32 r2.x, r3.w, r1.z, r3.y
+mul.f r0.x, r0.x, c9.w
+nop
+mul.f r1.w, r1.w, r1.w
+mad.f32 r2.w, r0.w, r2.x, r4.z
+add.f r3.y, r2.y, r0.x
+add.f r3.z, r3.x, r0.x
+mul.f r1.w, r1.w, c9.z
+add.f r0.x, r1.y, r0.x
+sel.b32 r2.x, r2.w, r0.z, r2.x
+nop
+mov.f32f32 r2.w, r1.w
+add.f r0.x, r0.x, r1.w
+sel.b32 r1.w, r5.w, r1.x, r2.x
+nop
+add.f r2.x, r3.y, r2.w
+add.f r2.w, r3.z, r2.w
+sel.b32 r0.x, r0.x, r1.z, r1.y
+nop
+sel.b32 r1.y, r2.x, r1.z, r2.y
+sel.b32 r1.z, r2.w, r1.z, r3.x
+mad.f32 r2.x, r0.w, r0.x, r2.z
+nop
+mad.f32 r2.y, r0.w, r1.y, r4.w
+mad.f32 r0.w, r0.w, r1.z, r4.y
+sel.b32 r0.x, r2.x, r0.z, r0.x
+nop
+sel.b32 r1.y, r2.y, r0.z, r1.y
+sel.b32 r0.z, r0.w, r0.z, r1.z
(rpt1)nop
-mul.f r2.x, r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-mul.f r1.y, r1.y, c9.w
-nop
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, r0.z, r1.w, r3.y
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r3.y, r1.y
-mul.f r2.x, r2.x, c9.z
-sel.b32 r1.z, r1.w, r1.x, r1.z
-add.f r1.w, r5.z, r3.x
-add.f r3.x, r7.x, r3.y
-mov.f32f32 r2.x, r2.x
-sel.b32 r1.z, r6.y, r0.w, r1.z
-mov.f32f32 r1.y, r1.y
-nop
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, r2.x
-add.f r1.y, r2.y, r1.y
-add.f r2.y, r1.w, r3.y
-add.f r3.x, r3.x, r3.z
-mov.f32f32 r1.w, r1.z
-nop
-sel.b32 r1.z, r2.y, r0.x, r2.w
-sel.b32 r2.y, r3.x, r0.x, r2.z
-add.f r1.y, r1.y, r2.x
-nop
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r2.z, r2.y
-sel.b32 r0.x, r1.y, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r2.x
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r2.x, r0.x
-nop
-mad.f32 r0.y, r0.z, r0.y, r4.y
-mad.f32 r1.y, r0.z, r1.y, r4.x
-mov.f32f32 r2.x, r2.x
-nop
-sel.b32 r0.y, r0.y, r1.x, r1.z
-sel.b32 r1.y, r1.y, r1.x, r2.y
-mad.f32 r0.z, r0.z, r2.x, r4.w
+sel.b32 r1.z, r6.x, r1.x, r1.y
+sel.b32 r1.y, r6.y, r1.x, r0.z
+sel.b32 r1.x, r0.y, r1.x, r0.x
+end
nop
-sel.b32 r0.y, r6.z, r0.w, r0.y
-sel.b32 r1.y, r6.w, r0.w, r1.y
-sel.b32 r0.x, r0.z, r1.x, r0.x
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r6.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
-end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
-; FRAG: 297 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
+; FRAG: 193 instructions, 0 half, 9 full
diff --git a/reference/stk-mines/stk-mines-34.asm b/reference/stk-mines/stk-mines-34.asm
index 2d84780..726bef5 100644
--- a/reference/stk-mines/stk-mines-34.asm
+++ b/reference/stk-mines/stk-mines-34.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r4.z) in0
+@in(r4.w) in1
+@in(r5.x) in2
+@in(r5.y) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r5.x) in8
-@in(r5.y) in9
-@in(r5.z) in10
-@in(r5.w) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,191 +31,144 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.w, c1.x
-mul.f r2.x, c8.x, r1.x
+mul.f r2.x, c8.x, r4.z
mov.f32f32 r2.y, c1.y
mov.f32f32 r2.z, c1.z
mul.f r2.w, r0.w, r0.x
mov.f32f32 r3.x, c2.x
-mad.f32 r2.x, c9.x, r1.y, r2.x
+mad.f32 r2.x, c9.x, r4.w, r2.x
mul.f r3.y, r0.w, r0.w
-mad.f32 r2.x, c10.x, r1.z, r2.x
+mad.f32 r2.x, c10.x, r5.x, r2.x
mad.f32 r2.w, r3.x, r0.y, r2.w
-mad.f32 r2.x, c11.x, r1.w, r2.x
-mad.f32 r3.x, r2.y, r2.y, r3.y
-mul.f r3.y, r2.y, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, c3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
+mov.f32f32 r3.x, c3.x
+mad.f32 r2.x, c11.x, r5.y, r2.x
+mad.f32 r3.y, r2.y, r2.y, r3.y
+mul.f r3.z, r2.y, r0.x
+mad.f32 r2.w, r3.x, r0.z, r2.w
+mov.f32f32 r3.x, r2.x
+mad.f32 r3.y, r2.z, r2.z, r3.y
mov.f32f32 r3.w, c2.y
-mad.f32 r2.w, r3.z, r0.z, r2.w
-mul.f r3.z, r2.x, r2.x
-mul.f r4.x, c8.y, r1.x
-mad.f32 r3.x, r2.z, r2.z, r3.x
-mov.f32f32 r4.y, r2.w
-mad.f32 r2.w, c9.y, r1.y, r4.x
-mad.f32 r3.y, r3.w, r0.y, r3.y
-mul.f r0.x, r2.z, r0.x
-mul.f r3.w, r4.y, r4.y
-mad.f32 r2.w, c10.y, r1.z, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, c3.y
-mad.f32 r2.w, c11.y, r1.w, r2.w
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.z, c2.z
-mad.f32 r3.y, r4.x, r0.z, r3.y
mov.f32f32 r4.x, r2.w
-mul.f r0.w, r0.w, r3.x
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.w, r4.x, r4.x, r3.z
+mul.f r3.x, r3.x, r3.x
+mul.f r4.y, c8.y, r4.z
+mad.f32 r3.z, r3.w, r0.y, r3.z
+mul.f r2.w, r2.w, r4.x
+mov.f32f32 r3.w, c3.y
+mad.f32 r4.y, c9.y, r4.w, r4.y
+rsq r3.y, r3.y
+(ss)mov.f32f32 r5.z, r3.y
+mad.f32 r4.y, c10.y, r5.x, r4.y
+mad.f32 r3.z, r3.w, r0.z, r3.z
+mad.f32 r3.w, c11.y, r5.y, r4.y
+mul.f r0.w, r0.w, r5.z
+mul.f r2.y, r2.y, r5.z
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r5.z, r3.w
+mul.f r5.w, c0.x, r0.w
mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r3.y, r3.y, r3.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, c8.z, r1.x
-mul.f r4.w, c0.x, r0.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r0.x, r4.z, r0.y, r0.x
-mad.f32 r0.y, c9.z, r1.y, r3.w
-mad.f32 r3.w, c0.y, r2.y, r4.w
-mad.f32 r0.y, c10.z, r1.z, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, c3.z
-mad.f32 r0.y, c11.z, r1.w, r0.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.z, r2.z, r3.x
-mad.f32 r0.x, r4.z, r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, c4.w, r1.x
-mul.f r3.x, c4.z, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.w, r0.y, r0.y, r2.w
+mad.f32 r2.w, r3.z, r4.y, r2.w
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r3.z, c2.z
+mad.f32 r3.x, r3.w, r5.z, r3.x
+mul.f r3.w, c8.z, r4.z
+mad.f32 r5.w, c0.y, r2.y, r5.w
+mad.f32 r0.x, r3.z, r0.y, r0.x
+mov.f32f32 r0.y, c3.z
+mad.f32 r3.z, c9.z, r4.w, r3.w
+mul.f r2.z, r2.z, r3.y
+(ss)mad.f32 r3.y, c10.z, r5.x, r3.z
+mad.f32 r0.x, r0.y, r0.z, r0.x
+mad.f32 r0.y, c11.z, r5.y, r3.y
+mad.f32 r3.y, c0.z, r2.z, r5.w
+mov.f32f32 r0.z, r2.y
+mov.f32f32 r2.y, r0.x
+mov.f32f32 r3.z, r0.y
+mov.f32f32 r3.w, r3.y
mov.f32f32 r2.z, r2.z
-mad.f32 r0.z, c5.w, r1.y, r0.z
-mad.f32 r3.z, r0.x, r0.x, r3.z
-mad.f32 r0.z, c6.w, r1.z, r0.z
-mad.f32 r3.x, c5.z, r1.y, r3.x
-mul.f r4.z, c4.y, r1.x
-mul.f r1.x, c4.x, r1.x
-mad.f32 r0.z, c7.w, r1.w, r0.z
-mad.f32 r3.x, c6.z, r1.z, r3.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-rsq r2.w, r2.w
-(ss)mov.f32f32 r4.w, r2.w
-mad.f32 r3.w, c0.z, r2.z, r3.w
-(ss)mov.f32f32 r2.w, r0.z
-mul.f r0.x, r0.x, r3.z
-mul.f r2.x, r2.x, r4.w
-mul.f r4.y, r4.y, r3.z
-mul.f r3.y, r3.y, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r3.y, r3.y
-mul.f r4.y, r0.x, r2.y
-absneg.f r2.x, (neg)r2.x
-mul.f r6.x, c0.x, r3.z
-mad.f32 r4.y, r3.y, r2.z, (neg)r4.y
-mad.f32 r6.x, c0.y, r3.y, r6.x
-mov.f32f32 r6.y, r2.x
-mul.f r2.x, r3.z, r2.z
-mov.f32f32 r2.z, r4.y
-mov.f32f32 r4.y, r6.x
-mul.f r6.x, r6.y, r6.y
-mul.f r4.x, r4.x, r4.w
-mul.f r2.z, c0.x, r2.z
-mad.f32 r2.x, r0.x, r0.w, (neg)r2.x
-mad.f32 r0.x, c0.z, r0.x, r4.y
+mad.f32 r2.y, r2.y, r2.y, r2.w
+mad.f32 r0.y, r0.y, r3.z, r3.x
+mul.f r3.x, r3.w, r3.w
+mul.f r2.w, c4.w, r4.z
+mul.f r3.w, c4.z, r4.z
+mul.f r5.w, c4.y, r4.z
+mul.f r4.z, c4.x, r4.z
+rsq r2.y, r2.y
+(ss)mov.f32f32 r6.x, r2.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r2.x, r0.y
+mul.f r0.x, r0.x, r2.y
+mul.f r2.x, r4.y, r6.x
+mul.f r2.y, r5.z, r6.y
+mul.f r4.x, r4.x, r6.x
+absneg.f r4.y, (neg)r0.y
+mov.f32f32 r0.y, r2.x
+mov.f32f32 r5.z, r0.x
+absneg.f r2.y, (neg)r2.y
+mul.f r6.x, c0.x, r4.x
mov.f32f32 r4.x, r4.x
-mul.f r0.w, r3.y, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-absneg.f r3.y, (neg)r4.x
-mad.f32 r0.w, r3.z, r2.y, (neg)r0.w
-mad.f32 r2.x, c0.y, r2.x, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.x, r3.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.w
-mul.f r0.y, r0.y, r4.w
-mad.f32 r0.w, c0.z, r0.w, r2.x
-mad.f32 r2.x, r4.x, r4.x, r6.x
-mul.f r2.y, r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c7.z, r1.w, r3.x
-mad.f32 r3.z, c5.y, r1.y, r4.z
-mov.f32f32 r3.w, r0.w
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r2.z, r3.x
-mad.f32 r0.w, c6.y, r1.z, r3.z
-mad.f32 r2.y, r3.w, r3.w, r2.y
-mov.f32f32 r3.z, r0.y
-mad.f32 r0.y, c7.y, r1.w, r0.w
-mad.f32 r0.w, c5.x, r1.y, r1.x
-mov.f32f32 r1.x, r2.y
-mad.f32 r1.y, r3.z, r3.z, r2.x
-mad.f32 r1.x, r0.x, r0.x, r1.x
+mul.f r6.z, r5.z, r0.z
+mov.f32f32 r6.w, r2.y
+mad.f32 r6.z, r0.y, r2.z, (neg)r6.z
+mov.f32f32 r7.x, r4.y
+mad.f32 r2.x, c0.y, r2.x, r6.x
+mul.f r2.z, r4.x, r2.z
+mul.f r6.x, c0.x, r6.z
+mad.f32 r2.z, r5.z, r0.w, (neg)r2.z
+mul.f r5.z, r7.x, r7.x
+mad.f32 r0.x, c0.z, r0.x, r2.x
+mad.f32 r2.x, r2.y, r6.w, r5.z
+mad.f32 r2.y, c0.y, r2.z, r6.x
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r3.z, r6.y
+mad.f32 r0.y, r4.x, r0.z, (neg)r0.y
+mov.f32f32 r0.z, r0.x
+mad.f32 r2.z, c5.w, r4.w, r2.w
+mad.f32 r2.w, c5.z, r4.w, r3.w
+mad.f32 r0.y, c0.z, r0.y, r2.y
+absneg.f r2.y, (neg)r0.w
+mov.f32f32 r3.z, r0.z
+mad.f32 r0.z, c6.w, r5.x, r2.z
+mov.f32f32 r2.z, r0.y
+mov.f32f32 r4.x, r2.y
+mad.f32 r0.w, c7.w, r5.y, r0.z
+mad.f32 r0.z, c6.z, r5.x, r2.w
+mov.f32f32 r5.z, r2.z
+mad.f32 r2.x, r2.y, r4.x, r2.x
+mov.f32f32 r2.w, r0.w
+mad.f32 r0.z, c7.z, r5.y, r0.z
+mad.f32 r0.y, r0.y, r5.z, r3.x
+mad.f32 r2.y, c5.y, r4.w, r5.w
+mad.f32 r0.x, r0.x, r3.z, r0.y
+mov.f32f32 r2.z, r0.z
+mad.f32 r0.y, c6.y, r5.x, r2.y
+mad.f32 r2.y, c5.x, r4.w, r4.z
+mad.f32 r0.y, c7.y, r5.y, r0.y
+mad.f32 r4.z, c6.x, r5.x, r2.y
+nop
+rsq r0.x, r0.x
+(ss)mov.f32f32 r2.y, r0.x
+mul.f r3.x, r3.y, r0.x
+(ss)rsq r0.x, r2.x
+(ss)mov.f32f32 r2.x, r0.x
+mul.f r3.w, r4.y, r0.x
+mul.f r3.z, r3.z, r2.y
+mul.f r3.y, r5.z, r2.y
+mul.f r4.y, r4.x, r2.x
+mul.f r4.x, r6.w, r2.x
mov.f32f32 r2.y, r0.y
-mad.f32 r1.z, c6.x, r1.z, r0.w
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c7.x, r1.w, r1.z
-rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, r0.x, r1.x
-mul.f r3.x, r3.w, r1.x
-mul.f r1.x, r3.y, r1.x
-mul.f r3.y, r3.z, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r3.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
+mad.f32 r0.x, c7.x, r5.y, r4.z
+mov.f32f32 r4.z, r4.y
+mov.f32f32 r4.w, (0.000000)
nop
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r1.x
-mov.f32f32 r0.x, r4.y
-mul.f r1.x, r4.x, r1.y
-mul.f r1.y, r6.y, r1.y
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r1.z
-mov.f32f32 r4.w, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r1.w, r5.w
-mov.f32f32 r1.z, r5.z
-mov.f32f32 r4.x, r1.x
-mov.f32f32 r1.y, r5.y
-mov.f32f32 r1.x, r5.x
+mov.f32f32 r2.x, r0.x
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0)
-; VERT: 183 instructions, 0 half, 7 full
+; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0)
+; VERT: 133 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-35.asm b/reference/stk-mines/stk-mines-35.asm
index 242def2..693d844 100644
--- a/reference/stk-mines/stk-mines-35.asm
+++ b/reference/stk-mines/stk-mines-35.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@@ -11,10 +11,10 @@
@in(r3.y) in9
@in(r3.z) in10
@in(r3.w) in11
-@in(r4.x) in12
-@in(r4.y) in13
-@in(r4.z) in14
-@in(r4.w) in15
+@in(r2.x) in12
+@in(r2.y) in13
+@in(r2.z) in14
+@in(r2.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,135 +27,120 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r0.w, r2.x, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.y, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.z, r0.w
-mov.f32f32 r1.y, r3.w
-mad.f32 r0.w, c15.x, r2.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r1.x, c12.x
+mul.f r4.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r1.y, r0.w
+mad.f32 r4.x, c4.y, r0.y, r4.x
+mad.f32 r0.w, c14.x, r1.z, r0.w
+mad.f32 r4.x, c4.z, r0.z, r4.x
+mad.f32 r4.y, c15.x, r1.w, r0.w
+mul.f r0.w, r1.x, c12.y
+mul.f r4.z, r1.x, c12.z
+mul.f r4.w, r1.x, c0.w
+mul.f r5.x, r4.y, r4.y
+mad.f32 r0.w, c13.y, r1.y, r0.w
+mul.f r5.y, r4.x, c10.x
+mad.f32 r0.w, c14.y, r1.z, r0.w
+mul.f r5.z, r0.x, c5.x
+mad.f32 r5.w, c15.y, r1.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r5.z
+mad.f32 r4.z, c13.z, r1.y, r4.z
+mad.f32 r4.w, c1.w, r1.y, r4.w
+mad.f32 r5.x, r5.w, r5.w, r5.x
+mad.f32 r4.z, c14.z, r1.z, r4.z
+mad.f32 r5.z, c5.z, r0.z, r0.w
+mad.f32 r4.z, c15.z, r1.w, r4.z
+mad.f32 r0.w, c2.w, r1.z, r4.w
+mul.f r4.w, r1.x, c0.z
+mul.f r6.x, r1.x, c0.y
+mad.f32 r5.x, r4.z, r4.z, r5.x
+mad.f32 r5.y, c10.y, r5.z, r5.y
mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r3.w, r2.x, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r3.w, c13.y, r2.y, r3.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r3.w, c14.y, r2.z, r3.w
-mul.f r5.x, r1.x, c10.x
-mad.f32 r3.w, c15.y, r2.w, r3.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c19.x
+mad.f32 r0.w, c3.w, r1.w, r0.w
+mad.f32 r4.w, c1.z, r1.y, r4.w
+mad.f32 r6.x, c1.y, r1.y, r6.x
+mul.f r1.x, r1.x, c0.x
+rsq r5.x, (abs)r5.x
+(ss)mov.f32f32 r6.y, r5.x
+mul.f r4.z, r4.z, r5.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r3.w, r3.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r2.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r2.y, r1.z
-mad.f32 r5.x, c10.y, r1.y, r5.x
-mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r0.y, c2.z, r1.z, r4.w
+mul.f r4.y, r4.y, r6.y
+mul.f r4.w, r5.w, r6.y
+(rpt1)nop
+add.f r4.y, c10.x, (neg)r4.y
+add.f r4.w, c10.y, (neg)r4.w
+add.f r4.z, c10.z, (neg)r4.z
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r2.w, r1.z
-mov.f32f32 r1.z, r5.x
-mul.f r5.x, r2.x, c0.w
-mul.f r5.y, r2.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r5.x, c1.w, r2.y, r5.x
-mad.f32 r5.y, c1.z, r2.y, r5.y
-mul.f r5.z, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-mul.f r5.w, c16.z, r3.z
+(ss)mul.f r5.x, r4.y, r4.y
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mad.f32 r0.y, r4.w, r4.w, r5.x
+mad.f32 r5.x, c10.z, r0.x, r5.y
+mad.f32 r0.y, r4.z, r4.z, r0.y
+mad.f32 r5.y, c2.y, r1.z, r6.x
+mad.f32 r1.x, c1.x, r1.y, r1.x
+max.f r1.y, r3.w, c19.x
+mul.f r3.w, c16.z, r3.z
+mul.f r5.w, c16.y, r3.y
+mul.f r6.x, c16.x, r3.x
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r6.x, r1.z, c19.x
-cmps.f.lt r1.z, (neg)r1.z, c19.x
-mad.f32 r5.x, c2.w, r2.z, r5.x
-mul.f r0.w, r0.w, r0.y
-mul.f r3.w, r3.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r6.x, r6.x
-add.f r0.z, c10.x, (neg)r0.w
-add.f r3.w, c10.y, (neg)r3.w
-add.f r0.y, c10.z, (neg)r0.y
-mad.f32 r0.w, c8.z, r3.z, c9.z
-mul.f r6.y, r0.z, r0.z
-mul.f r6.z, c16.y, r3.y
-mad.f32 r6.y, r3.w, r3.w, r6.y
-add.f r5.w, r5.w, r0.w
-mad.f32 r0.w, c8.y, r3.y, c9.y
-mul.f r6.w, c16.x, r3.x
-mov.f32f32 r6.y, r6.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r4.z, r0.y
+max.f r4.z, r5.x, c19.x
+mad.f32 r6.z, c8.x, r3.x, c9.x
+mul.f r4.y, r4.y, r6.y
+mul.f r4.w, r4.w, r6.y
+mov.f32f32 r6.y, r4.z
+mad.f32 r6.w, c8.y, r3.y, c9.y
+mul.f r4.x, r4.x, r4.y
+mad.f32 r4.y, c8.z, r3.z, c9.z
+mad.f32 r4.x, r5.z, r4.w, r4.x
+add.f r4.w, r5.w, r6.w
+mad.f32 r0.x, r0.x, r0.y, r4.x
+add.f r3.w, r3.w, r4.y
+mul.f r0.y, c17.y, r3.y
+add.f r3.y, r6.x, r6.z
+max.f r0.x, r0.x, c19.x
mul.f r3.z, c17.z, r3.z
-mad.f32 r6.y, r0.y, r0.y, r6.y
-add.f r6.z, r6.z, r0.w
-mad.f32 r7.x, c8.x, r3.x, c9.x
-mad.f32 r0.w, c3.w, r2.w, r5.x
-mad.f32 r5.x, c2.z, r2.z, r5.y
-mad.f32 r5.y, c1.y, r2.y, r5.z
-mad.f32 r2.x, c1.x, r2.y, r2.x
-rsq r2.y, (abs)r6.y
-(ss)mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r6.x, r3.z, r5.w
-mul.f r3.y, c17.y, r3.y
-add.f r5.z, r6.w, r7.x
-mul.f r0.z, r0.z, r2.y
-mul.f r3.w, r3.w, r2.y
-mul.f r0.y, r0.y, r2.y
-mad.f32 r3.y, r6.x, r3.y, r6.z
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.x, r3.x
-mad.f32 r1.y, r1.y, r3.w, r0.z
-mad.f32 r0.z, c3.z, r2.w, r5.x
-mad.f32 r2.y, c2.y, r2.z, r5.y
-mad.f32 r2.x, c2.x, r2.z, r2.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r1.x, r6.x, r1.x, r5.z
-mad.f32 r1.y, r0.x, r0.y, r1.y
-mad.f32 r0.y, c3.y, r2.w, r2.y
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r3.x, c7.x
-max.f r1.y, r1.y, c19.x
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r2.z, r4.z
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r2.x, r4.x
+mad.f32 r4.x, r6.y, r0.y, r4.w
+mul.f r3.x, c17.x, r3.x
+cmps.f.lt r4.y, (neg)r5.x, c19.x
+mad.f32 r0.y, c3.y, r1.w, r5.y
+mad.f32 r1.x, c2.x, r1.z, r1.x
+log2 r1.z, r0.x
+mov.f32f32 r4.w, c7.x
+mad.f32 r3.z, r6.y, r3.z, r3.w
+mad.f32 r3.x, r4.z, r3.x, r3.y
+(ss)mad.f32 r0.x, c3.x, r1.w, r1.x
+min.f r1.x, r4.w, c19.z
+min.f r1.w, r1.y, c19.y
(rpt1)nop
-log2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-min.f r3.x, r3.x, c19.z
-(rpt2)nop
-mul.f r1.y, r3.x, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(ss)mul.f r1.x, r1.x, r1.z
(rpt5)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r1.z, c19.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+exp2 r1.x, r1.x
+(ss)sel.b32 r1.x, r1.x, r4.y, c19.x
(rpt2)nop
+mov.f32f32 r1.y, r1.x
+mad.f32 r1.x, c18.x, r1.x, r3.x
+(rpt1)nop
mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.z, c18.z, r1.y, r3.z
-mad.f32 r3.x, c18.y, r1.y, r3.y
-mad.f32 r1.x, c18.x, r1.y, r1.x
-nop
-max.f r1.y, r1.z, c19.x
-max.f r3.x, r3.x, c19.x
max.f r1.x, r1.x, c19.x
-nop
-min.f r1.z, r1.y, c19.y
-min.f r1.y, r3.x, c19.y
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r3.z
+mad.f32 r1.y, c18.y, r1.y, r4.x
min.f r1.x, r1.x, c19.y
+nop
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0)
-; VERT: 145 instructions, 0 half, 8 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0)
+; VERT: 121 instructions, 0 half, 7 full
diff --git a/reference/stk-mines/stk-mines-36.asm b/reference/stk-mines/stk-mines-36.asm
index 1435d60..141701a 100644
--- a/reference/stk-mines/stk-mines-36.asm
+++ b/reference/stk-mines/stk-mines-36.asm
@@ -6,275 +6,191 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c8.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000
+@const(c9.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd
+@const(c10.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
absneg.f r0.w, (neg)c6.x
-mov.f32f32 r1.x, c3.x
-bary.f r1.y, 1, r0.x
-add.f r1.z, r0.z, c3.x
-add.f r1.w, r0.z, c4.x
-add.f r2.x, c7.x, r0.w
-add.f r0.z, r0.z, r1.x
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, c9.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r1.x
-add.f r1.x, r1.y, c3.y
-mov.f32f32 r2.w, r1.z
-add.f r1.z, r1.y, c4.y
-rcp r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.x, r1.x
-bary.f r3.y, 6, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-add.f r0.w, r3.y, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r2.z, r1.x
-mul.f r0.z, r0.w, r1.w
-mov.f32f32 r3.x, r1.z
-add.f r0.w, r1.y, r2.x
-cmps.f.lt r1.x, c6.x, r3.y
-cmps.f.lt r1.y, c7.x, r3.y
-bary.f r1.z, 11, r0.x
-sam (f32)(xyz)r1.w, r2.y, s#0, t#0
-(sy)mad.f32 r2.x, c8.x, r2.x, c8.y
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam (f32)(xyz)r2.z, r2.w, s#1, t#1
-(sy)(ss)mad.f32 r2.w, c8.x, r2.w, c8.y
-mad.f32 r2.z, c8.x, r2.z, c8.y
-mov.f32f32 r2.x, r2.x
-mul.f r3.y, r0.z, c5.w
-mul.f r3.w, r0.z, c5.z
-mul.f r4.x, r0.z, c5.y
-mul.f r2.x, c8.z, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, c8.x, r1.w, c8.y
-mul.f r2.w, c8.z, r2.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, r2.z, r2.z
-mov.f32f32 r4.x, r4.x
-mul.f r4.z, r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.z, r2.x, r2.x, r4.z
-mul.f r4.w, r0.z, c5.x
-add.f r0.z, c10.x, (neg)r0.z
-mad.f32 r3.w, r2.w, r2.w, r3.w
-mov.f32f32 r4.z, r4.z
+bary.f r1.x, 6, r0.x
+mov.f32f32 r1.y, c3.x
+add.f r1.z, r0.z, c4.x
+bary.f r2.x, 1, r0.x
+add.f r2.y, r0.z, c3.x
+add.f r2.w, c7.x, r0.w
+cmps.f.lt r3.x, c6.x, r1.x
+add.f r1.w, r2.x, c4.y
+add.f r2.z, r2.x, c3.y
+cmps.f.lt r3.y, c7.x, r1.x
+add.f r3.z, r0.z, r1.y
+cov.u32f32 r0.z, r3.x
+rcp r1.y, r2.w
+add.f r0.w, r1.x, r0.w
+cov.u32f32 r1.x, r3.y
+sam (f32)(xyz)r3.w, r1.z, s#1, t#1
+(sy)(ss)mad.f32 r1.z, c8.x, r4.x, c8.y
+sam (f32)(xyz)r2.y, r2.y, s#0, t#0
+(sy)mad.f32 r1.w, c8.x, r2.z, c8.y
+(ss)mul.f r0.w, r0.w, r1.y
+mad.f32 r1.y, c8.x, r3.w, c8.y
+mul.f r1.z, c8.z, r1.z
+mul.f r1.w, c8.z, r1.w
+(ss)mov.f32f32 r2.z, r0.w
+mov.f32f32 r3.x, r1.y
+mov.f32f32 r3.y, r1.z
+mov.f32f32 r4.x, r1.w
mad.f32 r2.y, c8.x, r2.y, c8.y
-mov.f32f32 r4.w, r4.w
-mad.f32 r3.x, c8.x, r3.x, c8.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r3.w
mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r4.z, r2.y, r2.y, r4.z
-cov.u32f32 r1.x, r1.x
-cov.u32f32 r1.y, r1.y
-(rpt3)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r3.y, r3.y
+mul.f r4.z, r2.z, c5.w
+mul.f r4.w, r2.z, c5.z
+mul.f r1.y, r1.y, r3.x
+mov.f32f32 r5.x, r2.y
+mad.f32 r1.y, r1.z, r3.y, r1.y
+mad.f32 r1.z, c8.x, r4.y, c8.y
+mul.f r4.y, r2.z, c5.y
+mul.f r2.z, r2.z, c5.x
+mul.f r2.y, r2.y, r5.x
+mov.f32f32 r3.w, r1.z
+mad.f32 r1.w, r1.w, r4.x, r2.y
+mad.f32 r2.y, c8.x, r2.w, c8.y
+add.f r0.w, c10.x, (neg)r0.w
+mov.f32f32 r2.w, r3.w
+cmps.f.ne r0.z, r0.z, c9.x
cmps.f.ne r1.x, r1.x, c9.x
-mul.f r1.w, r1.w, r4.z
-mul.f r2.x, r2.x, r4.z
-mul.f r2.y, r2.y, r4.z
-mad.f32 r4.z, r3.x, r3.x, r3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r0.w
-cmps.f.ne r0.w, r1.y, c9.x
-mov.f32f32 r1.y, r1.z
-rsq r1.z, r4.z
-(ss)mov.f32f32 r1.z, r1.z
-(ss)bary.f r4.z, 12, r0.x
-bary.f r5.x, 13, r0.x
+mov.f32f32 r3.w, c9.x
+mad.f32 r1.y, r2.w, r2.w, r1.y
+mov.f32f32 r2.w, r2.y
mov.f32f32 r5.y, c9.x
-mad.f32 r1.w, r2.z, r1.z, r1.w
-mad.f32 r2.x, r2.w, r1.z, r2.x
-mad.f32 r1.z, r3.x, r1.z, r2.y
-sam (f32)(xyzw)r2.y, r3.z, s#2, t#2
-(ss)mov.f32f32 r3.z, r4.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.w, r5.x
-mul.f r1.w, r1.w, c8.w
-mul.f r2.x, r2.x, c8.w
-mul.f r1.z, r1.z, c8.w
-nop
-mov.f32f32 r1.w, r1.w
-bary.f r4.z, 8, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.x, c9.x
-mul.f r5.z, r1.w, r4.z
-bary.f r5.w, 9, r0.x
-mul.f r6.x, r4.z, r1.w
-mov.f32f32 r6.y, c5.w
-mov.f32f32 r6.z, c5.z
-mad.f32 r5.z, r2.x, r5.w, r5.z
-mad.f32 r6.x, r5.w, r2.x, r6.x
-mov.f32f32 r6.w, c5.y
+mov.f32f32 r5.z, c9.x
+mov.f32f32 r5.w, c5.w
+mov.f32f32 r6.x, c5.z
+mov.f32f32 r6.y, c5.y
+rsq r1.y, r1.y
+(ss)mov.f32f32 r6.z, r1.y
+mad.f32 r1.w, r2.w, r2.w, r1.w
+add.f r3.w, r2.x, r3.w
+bary.f r2.x, 11, r0.x
+bary.f r2.w, 12, r0.x
+bary.f r6.w, 13, r0.x
+bary.f r7.x, 8, r0.x
+bary.f r7.y, 9, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r7.z, r1.w
+(ss)mul.f r1.w, r2.y, r1.w
+sam (f32)(xyzw)r7.w, r3.z, s#2, t#2
bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r5.z
-mov.f32f32 r5.z, r6.x
-mov.f32f32 r6.x, c5.x
-mad.f32 r0.y, r1.z, r0.x, r0.y
-mad.f32 r5.z, r0.x, r1.z, r5.z
-(rpt1)nop
-mul.f r1.w, r0.y, r1.w
-max.f r5.z, r5.z, c9.x
-mul.f r2.x, r0.y, r2.x
-mul.f r0.y, r0.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r1.w, r5.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, c8.x, r1.z
-mad.f32 r1.w, c9.y, r1.w, c9.z
-mul.f r2.x, c8.x, r2.x
-mul.f r0.y, c8.x, r0.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-add.f r1.z, r4.z, (neg)r1.z
-(sy)mul.f r3.x, r3.x, r1.w
-mul.f r2.w, r2.w, r1.w
-mul.f r2.z, r2.z, r1.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r7.x, r2.z
-mul.f r7.y, r1.z, r1.z
-add.f r2.x, r5.w, (neg)r2.x
-add.f r4.z, r4.z, r5.x
-add.f r0.x, r0.x, (neg)r0.y
-mul.f r0.y, r2.y, r1.w
-mov.f32f32 r1.w, r2.x
-add.f r2.x, r4.z, r5.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mad.f32 r4.z, r1.w, r1.w, r7.y
+mov.f32f32 r0.y, c5.x
+mul.f r2.y, r5.x, r7.z
+(ss)mul.f r3.z, r4.x, r7.z
+mad.f32 r2.y, r3.x, r6.z, r2.y
+mad.f32 r3.x, r3.y, r6.z, r3.z
+mad.f32 r1.y, r1.z, r1.y, r1.w
+nop
+mul.f r1.z, r2.y, c8.w
+mul.f r1.w, r3.x, c8.w
+mul.f r1.y, r1.y, c8.w
+nop
+mov.f32f32 r2.y, r1.z
+mul.f r1.z, r7.x, r1.z
+mov.f32f32 r3.x, r1.w
+mov.f32f32 r3.y, r1.y
+mul.f r3.z, r2.y, r7.x
+mad.f32 r1.z, r7.y, r1.w, r1.z
+mad.f32 r1.w, r3.x, r7.y, r3.z
+mad.f32 r1.y, r0.x, r1.y, r1.z
+mad.f32 r1.z, r3.y, r0.x, r1.w
(rpt2)nop
-mov.f32f32 r4.z, r4.z
-nop
-mad.f32 r4.z, r0.x, r0.x, r4.z
-(rpt5)nop
-rsq r4.z, r4.z
-(ss)mov.f32f32 r4.z, r4.z
+mul.f r1.w, r1.z, r2.y
+max.f r1.y, r1.y, c9.x
+mul.f r2.y, r1.z, r3.x
+mul.f r1.z, r1.z, r3.y
+mul.f r1.w, c8.x, r1.w
+mad.f32 r1.y, c9.y, r1.y, c9.z
+mul.f r2.y, c8.x, r2.y
+mul.f r1.z, c8.x, r1.z
+add.f r1.w, r7.x, (neg)r1.w
+mov.f32f32 r3.x, r1.y
+add.f r2.y, r7.y, (neg)r2.y
+add.f r0.x, r0.x, (neg)r1.z
+mov.f32f32 r1.z, r1.w
+(sy)mul.f r3.y, r8.z, r3.x
+mov.f32f32 r3.z, r2.y
+mov.f32f32 r3.w, r0.x
+mul.f r1.w, r1.w, r1.z
+add.f r4.x, r3.y, r5.z
+mad.f32 r1.w, r2.y, r3.z, r1.w
+mul.f r2.y, r8.y, r3.x
+mad.f32 r1.w, r3.w, r3.w, r1.w
+add.f r3.w, r4.x, r5.y
+mul.f r3.x, r8.x, r3.x
+mul.f r1.y, r7.w, r1.y
(rpt2)nop
-mul.f r1.z, r1.z, r4.z
-mul.f r1.w, r1.w, r4.z
-mul.f r0.x, r0.x, r4.z
-nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r4.x, r1.w
+mul.f r0.x, r0.x, r1.w
+(rpt1)nop
+mul.f r1.z, r1.z, r4.x
+(ss)mul.f r1.w, r3.z, r4.x
+(rpt1)nop
+mul.f r1.z, r1.z, r2.x
nop
-mul.f r1.y, r1.z, r1.y
+mad.f32 r1.z, r1.w, r2.w, r1.z
nop
-mad.f32 r1.y, r1.w, r3.z, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-nop
-mad.f32 r0.x, r0.x, r3.w, r1.y
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.x, r6.w, r1.z
(rpt2)nop
max.f r0.x, r0.x, c9.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.y, r0.x
mov.f32f32 r1.z, r0.x
-cmps.f.lt r0.x, c9.x, r0.x
-(rpt1)nop
-mul.f r1.y, r1.y, r1.z
-cov.u32f32 r0.x, r0.x
+(rpt2)nop
+mul.f r0.x, r0.x, r1.z
+cmps.f.lt r1.z, c9.x, r1.z
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-cmps.f.ne r0.x, r0.x, c9.x
+mov.f32f32 r1.w, r0.x
+cov.u32f32 r1.z, r1.z
(rpt1)nop
-mul.f r1.y, r1.y, r1.y
-sel.b32 r1.z, r2.x, r0.x, r3.x
+mul.f r0.x, r0.x, r1.w
+cmps.f.ne r1.z, r1.z, c9.x
(rpt1)nop
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.z
+mov.f32f32 r1.w, r0.x
+sel.b32 r2.x, r3.w, r1.z, r3.y
+mul.f r0.x, r0.x, c9.w
+nop
+mul.f r1.w, r1.w, r1.w
+mad.f32 r2.w, r0.w, r2.x, r4.z
+add.f r3.y, r2.y, r0.x
+add.f r3.z, r3.x, r0.x
+mul.f r1.w, r1.w, c9.z
+add.f r0.x, r1.y, r0.x
+sel.b32 r2.x, r2.w, r0.z, r2.x
+nop
+mov.f32f32 r2.w, r1.w
+add.f r0.x, r0.x, r1.w
+sel.b32 r1.w, r5.w, r1.x, r2.x
+nop
+add.f r2.x, r3.y, r2.w
+add.f r2.w, r3.z, r2.w
+sel.b32 r0.x, r0.x, r1.z, r1.y
+nop
+sel.b32 r1.y, r2.x, r1.z, r2.y
+sel.b32 r1.z, r2.w, r1.z, r3.x
+mad.f32 r2.x, r0.w, r0.x, r2.z
+nop
+mad.f32 r2.y, r0.w, r1.y, r4.w
+mad.f32 r0.w, r0.w, r1.z, r4.y
+sel.b32 r0.x, r2.x, r0.z, r0.x
+nop
+sel.b32 r1.y, r2.y, r0.z, r1.y
+sel.b32 r0.z, r0.w, r0.z, r1.z
(rpt1)nop
-mul.f r2.x, r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-mul.f r1.y, r1.y, c9.w
-nop
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.w, r0.z, r1.w, r3.y
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r3.y, r1.y
-mul.f r2.x, r2.x, c9.z
-sel.b32 r1.z, r1.w, r1.x, r1.z
-add.f r1.w, r5.z, r3.x
-add.f r3.x, r7.x, r3.y
-mov.f32f32 r2.x, r2.x
-sel.b32 r1.z, r6.y, r0.w, r1.z
-mov.f32f32 r1.y, r1.y
-nop
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, r2.x
-add.f r1.y, r2.y, r1.y
-add.f r2.y, r1.w, r3.y
-add.f r3.x, r3.x, r3.z
-mov.f32f32 r1.w, r1.z
-nop
-sel.b32 r1.z, r2.y, r0.x, r2.w
-sel.b32 r2.y, r3.x, r0.x, r2.z
-add.f r1.y, r1.y, r2.x
-nop
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r2.z, r2.y
-sel.b32 r0.x, r1.y, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r2.x
-mov.f32f32 r1.y, r2.z
-mov.f32f32 r2.x, r0.x
-nop
-mad.f32 r0.y, r0.z, r0.y, r4.y
-mad.f32 r1.y, r0.z, r1.y, r4.x
-mov.f32f32 r2.x, r2.x
-nop
-sel.b32 r0.y, r0.y, r1.x, r1.z
-sel.b32 r1.y, r1.y, r1.x, r2.y
-mad.f32 r0.z, r0.z, r2.x, r4.w
+sel.b32 r1.z, r6.x, r1.x, r1.y
+sel.b32 r1.y, r6.y, r1.x, r0.z
+sel.b32 r1.x, r0.y, r1.x, r0.x
+end
nop
-sel.b32 r0.y, r6.z, r0.w, r0.y
-sel.b32 r1.y, r6.w, r0.w, r1.y
-sel.b32 r0.x, r0.z, r1.x, r0.x
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r1.y
-sel.b32 r0.x, r6.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
-end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r0.w (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
-; FRAG: 297 instructions, 0 half, 8 full
+; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.x (5:20,cm=f,il=12,b=1) r1.y (5:21,cm=f,il=16,b=1) r3.x (5:22,cm=f,il=20,b=1)
+; FRAG: 193 instructions, 0 half, 9 full
diff --git a/reference/stk-mines/stk-mines-37.asm b/reference/stk-mines/stk-mines-37.asm
index 2d84780..726bef5 100644
--- a/reference/stk-mines/stk-mines-37.asm
+++ b/reference/stk-mines/stk-mines-37.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r4.z) in0
+@in(r4.w) in1
+@in(r5.x) in2
+@in(r5.y) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r5.x) in8
-@in(r5.y) in9
-@in(r5.z) in10
-@in(r5.w) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -31,191 +31,144 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.w, c1.x
-mul.f r2.x, c8.x, r1.x
+mul.f r2.x, c8.x, r4.z
mov.f32f32 r2.y, c1.y
mov.f32f32 r2.z, c1.z
mul.f r2.w, r0.w, r0.x
mov.f32f32 r3.x, c2.x
-mad.f32 r2.x, c9.x, r1.y, r2.x
+mad.f32 r2.x, c9.x, r4.w, r2.x
mul.f r3.y, r0.w, r0.w
-mad.f32 r2.x, c10.x, r1.z, r2.x
+mad.f32 r2.x, c10.x, r5.x, r2.x
mad.f32 r2.w, r3.x, r0.y, r2.w
-mad.f32 r2.x, c11.x, r1.w, r2.x
-mad.f32 r3.x, r2.y, r2.y, r3.y
-mul.f r3.y, r2.y, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, c3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
+mov.f32f32 r3.x, c3.x
+mad.f32 r2.x, c11.x, r5.y, r2.x
+mad.f32 r3.y, r2.y, r2.y, r3.y
+mul.f r3.z, r2.y, r0.x
+mad.f32 r2.w, r3.x, r0.z, r2.w
+mov.f32f32 r3.x, r2.x
+mad.f32 r3.y, r2.z, r2.z, r3.y
mov.f32f32 r3.w, c2.y
-mad.f32 r2.w, r3.z, r0.z, r2.w
-mul.f r3.z, r2.x, r2.x
-mul.f r4.x, c8.y, r1.x
-mad.f32 r3.x, r2.z, r2.z, r3.x
-mov.f32f32 r4.y, r2.w
-mad.f32 r2.w, c9.y, r1.y, r4.x
-mad.f32 r3.y, r3.w, r0.y, r3.y
-mul.f r0.x, r2.z, r0.x
-mul.f r3.w, r4.y, r4.y
-mad.f32 r2.w, c10.y, r1.z, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, c3.y
-mad.f32 r2.w, c11.y, r1.w, r2.w
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.z, c2.z
-mad.f32 r3.y, r4.x, r0.z, r3.y
mov.f32f32 r4.x, r2.w
-mul.f r0.w, r0.w, r3.x
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r2.w, r4.x, r4.x, r3.z
+mul.f r3.x, r3.x, r3.x
+mul.f r4.y, c8.y, r4.z
+mad.f32 r3.z, r3.w, r0.y, r3.z
+mul.f r2.w, r2.w, r4.x
+mov.f32f32 r3.w, c3.y
+mad.f32 r4.y, c9.y, r4.w, r4.y
+rsq r3.y, r3.y
+(ss)mov.f32f32 r5.z, r3.y
+mad.f32 r4.y, c10.y, r5.x, r4.y
+mad.f32 r3.z, r3.w, r0.z, r3.z
+mad.f32 r3.w, c11.y, r5.y, r4.y
+mul.f r0.w, r0.w, r5.z
+mul.f r2.y, r2.y, r5.z
+mov.f32f32 r4.y, r3.z
+mov.f32f32 r5.z, r3.w
+mul.f r5.w, c0.x, r0.w
mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r3.y, r3.y, r3.w
-mov.f32f32 r2.w, r2.w
-mul.f r3.w, c8.z, r1.x
-mul.f r4.w, c0.x, r0.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r0.x, r4.z, r0.y, r0.x
-mad.f32 r0.y, c9.z, r1.y, r3.w
-mad.f32 r3.w, c0.y, r2.y, r4.w
-mad.f32 r0.y, c10.z, r1.z, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.z, c3.z
-mad.f32 r0.y, c11.z, r1.w, r0.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.z, r2.z, r3.x
-mad.f32 r0.x, r4.z, r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, c4.w, r1.x
-mul.f r3.x, c4.z, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.w, r0.y, r0.y, r2.w
+mad.f32 r2.w, r3.z, r4.y, r2.w
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r3.z, c2.z
+mad.f32 r3.x, r3.w, r5.z, r3.x
+mul.f r3.w, c8.z, r4.z
+mad.f32 r5.w, c0.y, r2.y, r5.w
+mad.f32 r0.x, r3.z, r0.y, r0.x
+mov.f32f32 r0.y, c3.z
+mad.f32 r3.z, c9.z, r4.w, r3.w
+mul.f r2.z, r2.z, r3.y
+(ss)mad.f32 r3.y, c10.z, r5.x, r3.z
+mad.f32 r0.x, r0.y, r0.z, r0.x
+mad.f32 r0.y, c11.z, r5.y, r3.y
+mad.f32 r3.y, c0.z, r2.z, r5.w
+mov.f32f32 r0.z, r2.y
+mov.f32f32 r2.y, r0.x
+mov.f32f32 r3.z, r0.y
+mov.f32f32 r3.w, r3.y
mov.f32f32 r2.z, r2.z
-mad.f32 r0.z, c5.w, r1.y, r0.z
-mad.f32 r3.z, r0.x, r0.x, r3.z
-mad.f32 r0.z, c6.w, r1.z, r0.z
-mad.f32 r3.x, c5.z, r1.y, r3.x
-mul.f r4.z, c4.y, r1.x
-mul.f r1.x, c4.x, r1.x
-mad.f32 r0.z, c7.w, r1.w, r0.z
-mad.f32 r3.x, c6.z, r1.z, r3.x
-rsq r3.z, r3.z
-(ss)mov.f32f32 r3.z, r3.z
-rsq r2.w, r2.w
-(ss)mov.f32f32 r4.w, r2.w
-mad.f32 r3.w, c0.z, r2.z, r3.w
-(ss)mov.f32f32 r2.w, r0.z
-mul.f r0.x, r0.x, r3.z
-mul.f r2.x, r2.x, r4.w
-mul.f r4.y, r4.y, r3.z
-mul.f r3.y, r3.y, r3.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r4.y
-mov.f32f32 r3.y, r3.y
-mul.f r4.y, r0.x, r2.y
-absneg.f r2.x, (neg)r2.x
-mul.f r6.x, c0.x, r3.z
-mad.f32 r4.y, r3.y, r2.z, (neg)r4.y
-mad.f32 r6.x, c0.y, r3.y, r6.x
-mov.f32f32 r6.y, r2.x
-mul.f r2.x, r3.z, r2.z
-mov.f32f32 r2.z, r4.y
-mov.f32f32 r4.y, r6.x
-mul.f r6.x, r6.y, r6.y
-mul.f r4.x, r4.x, r4.w
-mul.f r2.z, c0.x, r2.z
-mad.f32 r2.x, r0.x, r0.w, (neg)r2.x
-mad.f32 r0.x, c0.z, r0.x, r4.y
+mad.f32 r2.y, r2.y, r2.y, r2.w
+mad.f32 r0.y, r0.y, r3.z, r3.x
+mul.f r3.x, r3.w, r3.w
+mul.f r2.w, c4.w, r4.z
+mul.f r3.w, c4.z, r4.z
+mul.f r5.w, c4.y, r4.z
+mul.f r4.z, c4.x, r4.z
+rsq r2.y, r2.y
+(ss)mov.f32f32 r6.x, r2.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r2.x, r0.y
+mul.f r0.x, r0.x, r2.y
+mul.f r2.x, r4.y, r6.x
+mul.f r2.y, r5.z, r6.y
+mul.f r4.x, r4.x, r6.x
+absneg.f r4.y, (neg)r0.y
+mov.f32f32 r0.y, r2.x
+mov.f32f32 r5.z, r0.x
+absneg.f r2.y, (neg)r2.y
+mul.f r6.x, c0.x, r4.x
mov.f32f32 r4.x, r4.x
-mul.f r0.w, r3.y, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-absneg.f r3.y, (neg)r4.x
-mad.f32 r0.w, r3.z, r2.y, (neg)r0.w
-mad.f32 r2.x, c0.y, r2.x, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r4.x, r3.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.y, r3.w
-mul.f r0.y, r0.y, r4.w
-mad.f32 r0.w, c0.z, r0.w, r2.x
-mad.f32 r2.x, r4.x, r4.x, r6.x
-mul.f r2.y, r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c7.z, r1.w, r3.x
-mad.f32 r3.z, c5.y, r1.y, r4.z
-mov.f32f32 r3.w, r0.w
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r2.z, r3.x
-mad.f32 r0.w, c6.y, r1.z, r3.z
-mad.f32 r2.y, r3.w, r3.w, r2.y
-mov.f32f32 r3.z, r0.y
-mad.f32 r0.y, c7.y, r1.w, r0.w
-mad.f32 r0.w, c5.x, r1.y, r1.x
-mov.f32f32 r1.x, r2.y
-mad.f32 r1.y, r3.z, r3.z, r2.x
-mad.f32 r1.x, r0.x, r0.x, r1.x
+mul.f r6.z, r5.z, r0.z
+mov.f32f32 r6.w, r2.y
+mad.f32 r6.z, r0.y, r2.z, (neg)r6.z
+mov.f32f32 r7.x, r4.y
+mad.f32 r2.x, c0.y, r2.x, r6.x
+mul.f r2.z, r4.x, r2.z
+mul.f r6.x, c0.x, r6.z
+mad.f32 r2.z, r5.z, r0.w, (neg)r2.z
+mul.f r5.z, r7.x, r7.x
+mad.f32 r0.x, c0.z, r0.x, r2.x
+mad.f32 r2.x, r2.y, r6.w, r5.z
+mad.f32 r2.y, c0.y, r2.z, r6.x
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r3.z, r6.y
+mad.f32 r0.y, r4.x, r0.z, (neg)r0.y
+mov.f32f32 r0.z, r0.x
+mad.f32 r2.z, c5.w, r4.w, r2.w
+mad.f32 r2.w, c5.z, r4.w, r3.w
+mad.f32 r0.y, c0.z, r0.y, r2.y
+absneg.f r2.y, (neg)r0.w
+mov.f32f32 r3.z, r0.z
+mad.f32 r0.z, c6.w, r5.x, r2.z
+mov.f32f32 r2.z, r0.y
+mov.f32f32 r4.x, r2.y
+mad.f32 r0.w, c7.w, r5.y, r0.z
+mad.f32 r0.z, c6.z, r5.x, r2.w
+mov.f32f32 r5.z, r2.z
+mad.f32 r2.x, r2.y, r4.x, r2.x
+mov.f32f32 r2.w, r0.w
+mad.f32 r0.z, c7.z, r5.y, r0.z
+mad.f32 r0.y, r0.y, r5.z, r3.x
+mad.f32 r2.y, c5.y, r4.w, r5.w
+mad.f32 r0.x, r0.x, r3.z, r0.y
+mov.f32f32 r2.z, r0.z
+mad.f32 r0.y, c6.y, r5.x, r2.y
+mad.f32 r2.y, c5.x, r4.w, r4.z
+mad.f32 r0.y, c7.y, r5.y, r0.y
+mad.f32 r4.z, c6.x, r5.x, r2.y
+nop
+rsq r0.x, r0.x
+(ss)mov.f32f32 r2.y, r0.x
+mul.f r3.x, r3.y, r0.x
+(ss)rsq r0.x, r2.x
+(ss)mov.f32f32 r2.x, r0.x
+mul.f r3.w, r4.y, r0.x
+mul.f r3.z, r3.z, r2.y
+mul.f r3.y, r5.z, r2.y
+mul.f r4.y, r4.x, r2.x
+mul.f r4.x, r6.w, r2.x
mov.f32f32 r2.y, r0.y
-mad.f32 r1.z, c6.x, r1.z, r0.w
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r3.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c7.x, r1.w, r1.z
-rsq r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, (0.000000)
-mov.f32f32 r2.x, r1.z
-mul.f r0.x, r0.x, r1.x
-mul.f r3.x, r3.w, r1.x
-mul.f r1.x, r3.y, r1.x
-mul.f r3.y, r3.z, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r3.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r4.y, r3.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r1.x, r1.x
+mad.f32 r0.x, c7.x, r5.y, r4.z
+mov.f32f32 r4.z, r4.y
+mov.f32f32 r4.w, (0.000000)
nop
-mov.f32f32 r3.z, r0.x
-mov.f32f32 r3.y, r3.x
-mov.f32f32 r3.x, r1.x
-mov.f32f32 r0.x, r4.y
-mul.f r1.x, r4.x, r1.y
-mul.f r1.y, r6.y, r1.y
-mov.f32f32 r4.z, r3.w
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r1.z
-mov.f32f32 r4.w, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r1.w, r5.w
-mov.f32f32 r1.z, r5.z
-mov.f32f32 r4.x, r1.x
-mov.f32f32 r1.y, r5.y
-mov.f32f32 r1.x, r5.x
+mov.f32f32 r2.x, r0.x
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:20) r3.x (5:21) r4.x (5:22)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r5.x (0:0,cm=f,il=16,b=0)
-; VERT: 183 instructions, 0 half, 7 full
+; VERT: inputs: r4.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0)
+; VERT: 133 instructions, 0 half, 8 full
diff --git a/reference/stk-mines/stk-mines-38.asm b/reference/stk-mines/stk-mines-38.asm
index ff486a7..d610a0c 100644
--- a/reference/stk-mines/stk-mines-38.asm
+++ b/reference/stk-mines/stk-mines-38.asm
@@ -6,39 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
-bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 1, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt4)nop
-sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0
-(sy)mul.f r0.w, r0.w, r1.y
-mul.f r0.z, r0.z, r1.z
-mul.f r0.x, r0.y, r0.x
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt2)nop
+sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)mul.f r1.z, r1.z, r2.x
+mul.f r1.y, r1.y, r2.y
+(ss)mul.f r1.x, r1.x, r0.x
end
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
+; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 14 instructions, 0 half, 3 full
diff --git a/reference/stk/stk0100.asm b/reference/stk/stk0100.asm
index 4a651b1..16ecc3c 100644
--- a/reference/stk/stk0100.asm
+++ b/reference/stk/stk0100.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r2.z, r1.y
-mul.f r0.w, r2.y, r1.z
-mul.f r0.z, r2.x, r0.z
-mul.f r0.x, r1.w, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.x, r1.y
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 27 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/stk/stk0101.asm b/reference/stk/stk0101.asm
index c16817f..530b9e3 100644
--- a/reference/stk/stk0101.asm
+++ b/reference/stk/stk0101.asm
@@ -1,20 +1,20 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.w) in11
-@in(r3.x) in12
-@in(r3.y) in13
-@in(r3.z) in14
-@in(r3.w) in15
+@in(r3.x) in0
+@in(r3.y) in1
+@in(r3.z) in2
+@in(r3.w) in3
+@in(r4.w) in11
+@in(r2.x) in12
+@in(r2.y) in13
+@in(r2.z) in14
+@in(r2.w) in15
@in(r0.x) in16
@in(r0.y) in17
@in(r0.z) in18
-@in(r1.x) in20
-@in(r1.y) in21
-@in(r1.z) in22
+@in(r0.w) in20
+@in(r1.x) in21
+@in(r1.y) in22
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,47 +27,40 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c7.z, r0.z, r1.z
-mad.f32 r0.y, c7.y, r0.y, r1.y
-mad.f32 r0.x, c7.x, r0.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c8.x
+@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mad.f32 r0.z, c7.z, r0.z, r1.y
+mad.f32 r0.y, c7.y, r0.y, r1.x
+mad.f32 r0.x, c7.x, r0.x, r0.w
+mul.f r0.w, r3.x, c0.w
max.f r0.z, r0.z, c8.x
max.f r0.y, r0.y, c8.x
max.f r0.x, r0.x, c8.x
-min.f r1.w, r0.w, c8.y
+mad.f32 r0.w, c1.w, r3.y, r0.w
min.f r1.z, r0.z, c8.y
min.f r1.y, r0.y, c8.y
min.f r1.x, r0.x, c8.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+mad.f32 r0.x, c2.w, r3.z, r0.w
+mul.f r0.y, r3.x, c0.z
+mad.f32 r0.w, c3.w, r3.w, r0.x
+mad.f32 r0.x, c1.z, r3.y, r0.y
+mul.f r0.y, r3.x, c0.y
+mad.f32 r0.x, c2.z, r3.z, r0.x
+mad.f32 r0.y, c1.y, r3.y, r0.y
+mad.f32 r0.z, c3.z, r3.w, r0.x
+mad.f32 r0.x, c2.y, r3.z, r0.y
+mul.f r1.w, r3.x, c0.x
+mad.f32 r0.y, c3.y, r3.w, r0.x
+mad.f32 r0.x, c1.x, r3.y, r1.w
+max.f r1.w, r4.w, c8.x
+mad.f32 r0.x, c2.x, r3.z, r0.x
+nop
+mad.f32 r0.x, c3.x, r3.w, r0.x
+min.f r1.w, r1.w, c8.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r63.w (0:0,cm=0,il=12,b=0) r0.x (0:0,cm=8,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0) r0.x (0:0,cm=7,il=24,b=0) r1.x (0:0,cm=7,il=28,b=0) r63.w (0:0,cm=0,il=32,b=0)
-; VERT: 37 instructions, 0 half, 4 full
+; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r63.w (0:0,cm=0,il=12,b=0) r4.x (0:0,cm=8,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0) r0.x (0:0,cm=7,il=24,b=0) r0.w (0:0,cm=7,il=28,b=0) r63.w (0:0,cm=0,il=32,b=0)
+; VERT: 29 instructions, 0 half, 5 full
diff --git a/reference/stk/stk0102.asm b/reference/stk/stk0102.asm
index cca09e5..0583b5d 100644
--- a/reference/stk/stk0102.asm
+++ b/reference/stk/stk0102.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r3.x) in4
+@in(r3.y) in5
+@in(r3.z) in6
+@in(r3.w) in7
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,43 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r3.w, c4.x
+max.f r1.y, r3.z, c4.x
+max.f r3.y, r3.y, c4.x
+max.f r3.x, r3.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r3.y, c4.y
+min.f r1.x, r3.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 33 instructions, 0 half, 4 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 25 instructions, 0 half, 4 full
diff --git a/reference/stk/stk0200.asm b/reference/stk/stk0200.asm
index ff486a7..d610a0c 100644
--- a/reference/stk/stk0200.asm
+++ b/reference/stk/stk0200.asm
@@ -6,39 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
-bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 1, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt4)nop
-sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0
-(sy)mul.f r0.w, r0.w, r1.y
-mul.f r0.z, r0.z, r1.z
-mul.f r0.x, r0.y, r0.x
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt2)nop
+sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)mul.f r1.z, r1.z, r2.x
+mul.f r1.y, r1.y, r2.y
+(ss)mul.f r1.x, r1.x, r0.x
end
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
+; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 14 instructions, 0 half, 3 full
diff --git a/reference/stk/stk0301.asm b/reference/stk/stk0301.asm
index 2c03e4f..284e180 100644
--- a/reference/stk/stk0301.asm
+++ b/reference/stk/stk0301.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/stk/stk0302.asm b/reference/stk/stk0302.asm
index 2c03e4f..284e180 100644
--- a/reference/stk/stk0302.asm
+++ b/reference/stk/stk0302.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/stk/stk0303.asm b/reference/stk/stk0303.asm
index cca09e5..0583b5d 100644
--- a/reference/stk/stk0303.asm
+++ b/reference/stk/stk0303.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r3.x) in4
+@in(r3.y) in5
+@in(r3.z) in6
+@in(r3.w) in7
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,43 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r3.w, c4.x
+max.f r1.y, r3.z, c4.x
+max.f r3.y, r3.y, c4.x
+max.f r3.x, r3.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r3.y, c4.y
+min.f r1.x, r3.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 33 instructions, 0 half, 4 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 25 instructions, 0 half, 4 full
diff --git a/reference/stk/stk0304.asm b/reference/stk/stk0304.asm
index 6763f4d..c381d21 100644
--- a/reference/stk/stk0304.asm
+++ b/reference/stk/stk0304.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -19,131 +19,120 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mul.f r0.w, r2.x, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.y, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.z, r0.w
-mov.f32f32 r1.y, r3.w
-mad.f32 r0.w, c15.x, r2.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r1.x, c12.x
+mul.f r3.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r1.y, r0.w
+mad.f32 r3.x, c4.y, r0.y, r3.x
+mad.f32 r0.w, c14.x, r1.z, r0.w
+mad.f32 r3.x, c4.z, r0.z, r3.x
+mad.f32 r3.y, c15.x, r1.w, r0.w
+mul.f r0.w, r1.x, c12.y
+mul.f r3.z, r1.x, c12.z
+mul.f r3.w, r1.x, c0.w
+mul.f r4.x, r3.y, r3.y
+mad.f32 r0.w, c13.y, r1.y, r0.w
+mul.f r4.y, r3.x, c10.x
+mad.f32 r0.w, c14.y, r1.z, r0.w
+mul.f r4.z, r0.x, c5.x
+mad.f32 r4.w, c15.y, r1.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r4.z
+mad.f32 r3.z, c13.z, r1.y, r3.z
+mad.f32 r3.w, c1.w, r1.y, r3.w
+mad.f32 r4.x, r4.w, r4.w, r4.x
+mad.f32 r3.z, c14.z, r1.z, r3.z
+mad.f32 r4.z, c5.z, r0.z, r0.w
+mad.f32 r3.z, c15.z, r1.w, r3.z
+mad.f32 r0.w, c2.w, r1.z, r3.w
+mul.f r3.w, r1.x, c0.z
+mul.f r5.x, r1.x, c0.y
+mad.f32 r4.x, r3.z, r3.z, r4.x
+mad.f32 r4.y, c10.y, r4.z, r4.y
mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r3.w, r2.x, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r3.w, c13.y, r2.y, r3.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r3.w, c14.y, r2.z, r3.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r3.w, c15.y, r2.w, r3.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c19.x
+mad.f32 r0.w, c3.w, r1.w, r0.w
+mad.f32 r3.w, c1.z, r1.y, r3.w
+mad.f32 r5.x, c1.y, r1.y, r5.x
+mul.f r1.x, r1.x, c0.x
+rsq r4.x, (abs)r4.x
+(ss)mov.f32f32 r5.y, r4.x
+mul.f r3.z, r3.z, r4.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r3.w, r3.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r2.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r2.y, r1.z
-mad.f32 r4.x, c10.y, r1.y, r4.x
-mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r0.y, c2.z, r1.z, r3.w
+mul.f r3.y, r3.y, r5.y
+mul.f r3.w, r4.w, r5.y
+(rpt1)nop
+add.f r3.y, c10.x, (neg)r3.y
+add.f r3.w, c10.y, (neg)r3.w
+add.f r3.z, c10.z, (neg)r3.z
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r2.w, r1.z
-mov.f32f32 r1.z, r4.x
-mul.f r4.x, r2.x, c0.w
-mul.f r4.y, r2.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r4.x, c1.w, r2.y, r4.x
-mad.f32 r4.y, c1.z, r2.y, r4.y
-mul.f r4.z, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-mul.f r4.w, c16.z, r3.z
+(ss)mul.f r4.x, r3.y, r3.y
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mad.f32 r0.y, r3.w, r3.w, r4.x
+mad.f32 r4.x, c10.z, r0.x, r4.y
+mad.f32 r0.y, r3.z, r3.z, r0.y
+mad.f32 r4.y, c2.y, r1.z, r5.x
+mad.f32 r1.x, c1.x, r1.y, r1.x
+max.f r1.y, r2.w, c19.x
+mul.f r2.w, c16.z, r2.z
+mul.f r4.w, c16.y, r2.y
+mul.f r5.x, c16.x, r2.x
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r5.x, r1.z, c19.x
-cmps.f.lt r1.z, (neg)r1.z, c19.x
-mad.f32 r4.x, c2.w, r2.z, r4.x
-mul.f r0.w, r0.w, r0.y
-mul.f r3.w, r3.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r5.x, r5.x
-add.f r0.z, c10.x, (neg)r0.w
-add.f r3.w, c10.y, (neg)r3.w
-add.f r0.y, c10.z, (neg)r0.y
-mad.f32 r0.w, c8.z, r3.z, c9.z
-mul.f r5.y, r0.z, r0.z
-mul.f r5.z, c16.y, r3.y
-mad.f32 r5.y, r3.w, r3.w, r5.y
-add.f r4.w, r4.w, r0.w
-mad.f32 r0.w, c8.y, r3.y, c9.y
-mul.f r5.w, c16.x, r3.x
-mov.f32f32 r5.y, r5.y
-mul.f r3.z, c17.z, r3.z
-mad.f32 r5.y, r0.y, r0.y, r5.y
-add.f r5.z, r5.z, r0.w
-mad.f32 r6.x, c8.x, r3.x, c9.x
-mad.f32 r0.w, c3.w, r2.w, r4.x
-mad.f32 r4.x, c2.z, r2.z, r4.y
-mad.f32 r4.y, c1.y, r2.y, r4.z
-mad.f32 r2.x, c1.x, r2.y, r2.x
-rsq r2.y, (abs)r5.y
-(ss)mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r5.x, r3.z, r4.w
-mul.f r3.y, c17.y, r3.y
-add.f r4.z, r5.w, r6.x
-mul.f r0.z, r0.z, r2.y
-mul.f r3.w, r3.w, r2.y
-mul.f r0.y, r0.y, r2.y
-mad.f32 r2.y, r5.x, r3.y, r5.z
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.x, r3.x
-mad.f32 r1.y, r1.y, r3.w, r0.z
-mad.f32 r0.z, c3.z, r2.w, r4.x
-mad.f32 r3.x, c2.y, r2.z, r4.y
-mad.f32 r2.x, c2.x, r2.z, r2.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r1.x, r5.x, r1.x, r4.z
-mad.f32 r1.y, r0.x, r0.y, r1.y
-mad.f32 r0.y, c3.y, r2.w, r3.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.x, c7.x
-max.f r1.y, r1.y, c19.x
-(rpt5)nop
-log2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-min.f r2.x, r2.x, c19.z
-(rpt2)nop
-mul.f r1.y, r2.x, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(ss)mov.f32f32 r5.y, r0.y
+(ss)mul.f r0.y, r3.z, r0.y
+max.f r3.z, r4.x, c19.x
+mad.f32 r5.z, c8.x, r2.x, c9.x
+mul.f r3.y, r3.y, r5.y
+mul.f r3.w, r3.w, r5.y
+mov.f32f32 r5.y, r3.z
+mad.f32 r5.w, c8.y, r2.y, c9.y
+mul.f r3.x, r3.x, r3.y
+mad.f32 r3.y, c8.z, r2.z, c9.z
+mad.f32 r3.x, r4.z, r3.w, r3.x
+add.f r3.w, r4.w, r5.w
+mad.f32 r0.x, r0.x, r0.y, r3.x
+add.f r2.w, r2.w, r3.y
+mul.f r0.y, c17.y, r2.y
+add.f r2.y, r5.x, r5.z
+max.f r0.x, r0.x, c19.x
+mul.f r2.z, c17.z, r2.z
+mad.f32 r3.x, r5.y, r0.y, r3.w
+mul.f r2.x, c17.x, r2.x
+cmps.f.lt r3.y, (neg)r4.x, c19.x
+mad.f32 r0.y, c3.y, r1.w, r4.y
+mad.f32 r1.x, c2.x, r1.z, r1.x
+log2 r1.z, r0.x
+mov.f32f32 r3.w, c7.x
+mad.f32 r2.z, r5.y, r2.z, r2.w
+mad.f32 r2.x, r3.z, r2.x, r2.y
+(ss)mad.f32 r0.x, c3.x, r1.w, r1.x
+min.f r1.x, r3.w, c19.z
+min.f r1.w, r1.y, c19.y
+(rpt1)nop
+(ss)mul.f r1.x, r1.x, r1.z
(rpt5)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r1.z, c19.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+exp2 r1.x, r1.x
+(ss)sel.b32 r1.x, r1.x, r3.y, c19.x
(rpt2)nop
+mov.f32f32 r1.y, r1.x
+mad.f32 r1.x, c18.x, r1.x, r2.x
+(rpt1)nop
mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.z, c18.z, r1.y, r3.z
-mad.f32 r2.x, c18.y, r1.y, r2.y
-mad.f32 r1.x, c18.x, r1.y, r1.x
-nop
-max.f r1.y, r1.z, c19.x
-max.f r2.x, r2.x, c19.x
max.f r1.x, r1.x, c19.x
-nop
-min.f r1.z, r1.y, c19.y
-min.f r1.y, r2.x, c19.y
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r2.z
+mad.f32 r1.y, c18.y, r1.y, r3.x
min.f r1.x, r1.x, c19.y
+nop
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 145 instructions, 0 half, 7 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 121 instructions, 0 half, 6 full
diff --git a/reference/stk/stk0305.asm b/reference/stk/stk0305.asm
index 242def2..693d844 100644
--- a/reference/stk/stk0305.asm
+++ b/reference/stk/stk0305.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@@ -11,10 +11,10 @@
@in(r3.y) in9
@in(r3.z) in10
@in(r3.w) in11
-@in(r4.x) in12
-@in(r4.y) in13
-@in(r4.z) in14
-@in(r4.w) in15
+@in(r2.x) in12
+@in(r2.y) in13
+@in(r2.z) in14
+@in(r2.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,135 +27,120 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r0.w, r2.x, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.y, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.z, r0.w
-mov.f32f32 r1.y, r3.w
-mad.f32 r0.w, c15.x, r2.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r1.x, c12.x
+mul.f r4.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r1.y, r0.w
+mad.f32 r4.x, c4.y, r0.y, r4.x
+mad.f32 r0.w, c14.x, r1.z, r0.w
+mad.f32 r4.x, c4.z, r0.z, r4.x
+mad.f32 r4.y, c15.x, r1.w, r0.w
+mul.f r0.w, r1.x, c12.y
+mul.f r4.z, r1.x, c12.z
+mul.f r4.w, r1.x, c0.w
+mul.f r5.x, r4.y, r4.y
+mad.f32 r0.w, c13.y, r1.y, r0.w
+mul.f r5.y, r4.x, c10.x
+mad.f32 r0.w, c14.y, r1.z, r0.w
+mul.f r5.z, r0.x, c5.x
+mad.f32 r5.w, c15.y, r1.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r5.z
+mad.f32 r4.z, c13.z, r1.y, r4.z
+mad.f32 r4.w, c1.w, r1.y, r4.w
+mad.f32 r5.x, r5.w, r5.w, r5.x
+mad.f32 r4.z, c14.z, r1.z, r4.z
+mad.f32 r5.z, c5.z, r0.z, r0.w
+mad.f32 r4.z, c15.z, r1.w, r4.z
+mad.f32 r0.w, c2.w, r1.z, r4.w
+mul.f r4.w, r1.x, c0.z
+mul.f r6.x, r1.x, c0.y
+mad.f32 r5.x, r4.z, r4.z, r5.x
+mad.f32 r5.y, c10.y, r5.z, r5.y
mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r3.w, r2.x, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r3.w, c13.y, r2.y, r3.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r3.w, c14.y, r2.z, r3.w
-mul.f r5.x, r1.x, c10.x
-mad.f32 r3.w, c15.y, r2.w, r3.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c19.x
+mad.f32 r0.w, c3.w, r1.w, r0.w
+mad.f32 r4.w, c1.z, r1.y, r4.w
+mad.f32 r6.x, c1.y, r1.y, r6.x
+mul.f r1.x, r1.x, c0.x
+rsq r5.x, (abs)r5.x
+(ss)mov.f32f32 r6.y, r5.x
+mul.f r4.z, r4.z, r5.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r3.w, r3.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r2.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r2.y, r1.z
-mad.f32 r5.x, c10.y, r1.y, r5.x
-mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r0.y, c2.z, r1.z, r4.w
+mul.f r4.y, r4.y, r6.y
+mul.f r4.w, r5.w, r6.y
+(rpt1)nop
+add.f r4.y, c10.x, (neg)r4.y
+add.f r4.w, c10.y, (neg)r4.w
+add.f r4.z, c10.z, (neg)r4.z
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r2.w, r1.z
-mov.f32f32 r1.z, r5.x
-mul.f r5.x, r2.x, c0.w
-mul.f r5.y, r2.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r5.x, c1.w, r2.y, r5.x
-mad.f32 r5.y, c1.z, r2.y, r5.y
-mul.f r5.z, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-mul.f r5.w, c16.z, r3.z
+(ss)mul.f r5.x, r4.y, r4.y
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mad.f32 r0.y, r4.w, r4.w, r5.x
+mad.f32 r5.x, c10.z, r0.x, r5.y
+mad.f32 r0.y, r4.z, r4.z, r0.y
+mad.f32 r5.y, c2.y, r1.z, r6.x
+mad.f32 r1.x, c1.x, r1.y, r1.x
+max.f r1.y, r3.w, c19.x
+mul.f r3.w, c16.z, r3.z
+mul.f r5.w, c16.y, r3.y
+mul.f r6.x, c16.x, r3.x
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r6.x, r1.z, c19.x
-cmps.f.lt r1.z, (neg)r1.z, c19.x
-mad.f32 r5.x, c2.w, r2.z, r5.x
-mul.f r0.w, r0.w, r0.y
-mul.f r3.w, r3.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r6.x, r6.x
-add.f r0.z, c10.x, (neg)r0.w
-add.f r3.w, c10.y, (neg)r3.w
-add.f r0.y, c10.z, (neg)r0.y
-mad.f32 r0.w, c8.z, r3.z, c9.z
-mul.f r6.y, r0.z, r0.z
-mul.f r6.z, c16.y, r3.y
-mad.f32 r6.y, r3.w, r3.w, r6.y
-add.f r5.w, r5.w, r0.w
-mad.f32 r0.w, c8.y, r3.y, c9.y
-mul.f r6.w, c16.x, r3.x
-mov.f32f32 r6.y, r6.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r4.z, r0.y
+max.f r4.z, r5.x, c19.x
+mad.f32 r6.z, c8.x, r3.x, c9.x
+mul.f r4.y, r4.y, r6.y
+mul.f r4.w, r4.w, r6.y
+mov.f32f32 r6.y, r4.z
+mad.f32 r6.w, c8.y, r3.y, c9.y
+mul.f r4.x, r4.x, r4.y
+mad.f32 r4.y, c8.z, r3.z, c9.z
+mad.f32 r4.x, r5.z, r4.w, r4.x
+add.f r4.w, r5.w, r6.w
+mad.f32 r0.x, r0.x, r0.y, r4.x
+add.f r3.w, r3.w, r4.y
+mul.f r0.y, c17.y, r3.y
+add.f r3.y, r6.x, r6.z
+max.f r0.x, r0.x, c19.x
mul.f r3.z, c17.z, r3.z
-mad.f32 r6.y, r0.y, r0.y, r6.y
-add.f r6.z, r6.z, r0.w
-mad.f32 r7.x, c8.x, r3.x, c9.x
-mad.f32 r0.w, c3.w, r2.w, r5.x
-mad.f32 r5.x, c2.z, r2.z, r5.y
-mad.f32 r5.y, c1.y, r2.y, r5.z
-mad.f32 r2.x, c1.x, r2.y, r2.x
-rsq r2.y, (abs)r6.y
-(ss)mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r6.x, r3.z, r5.w
-mul.f r3.y, c17.y, r3.y
-add.f r5.z, r6.w, r7.x
-mul.f r0.z, r0.z, r2.y
-mul.f r3.w, r3.w, r2.y
-mul.f r0.y, r0.y, r2.y
-mad.f32 r3.y, r6.x, r3.y, r6.z
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.x, r3.x
-mad.f32 r1.y, r1.y, r3.w, r0.z
-mad.f32 r0.z, c3.z, r2.w, r5.x
-mad.f32 r2.y, c2.y, r2.z, r5.y
-mad.f32 r2.x, c2.x, r2.z, r2.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r1.x, r6.x, r1.x, r5.z
-mad.f32 r1.y, r0.x, r0.y, r1.y
-mad.f32 r0.y, c3.y, r2.w, r2.y
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r3.x, c7.x
-max.f r1.y, r1.y, c19.x
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r2.z, r4.z
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r2.x, r4.x
+mad.f32 r4.x, r6.y, r0.y, r4.w
+mul.f r3.x, c17.x, r3.x
+cmps.f.lt r4.y, (neg)r5.x, c19.x
+mad.f32 r0.y, c3.y, r1.w, r5.y
+mad.f32 r1.x, c2.x, r1.z, r1.x
+log2 r1.z, r0.x
+mov.f32f32 r4.w, c7.x
+mad.f32 r3.z, r6.y, r3.z, r3.w
+mad.f32 r3.x, r4.z, r3.x, r3.y
+(ss)mad.f32 r0.x, c3.x, r1.w, r1.x
+min.f r1.x, r4.w, c19.z
+min.f r1.w, r1.y, c19.y
(rpt1)nop
-log2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-min.f r3.x, r3.x, c19.z
-(rpt2)nop
-mul.f r1.y, r3.x, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(ss)mul.f r1.x, r1.x, r1.z
(rpt5)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r1.z, c19.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+exp2 r1.x, r1.x
+(ss)sel.b32 r1.x, r1.x, r4.y, c19.x
(rpt2)nop
+mov.f32f32 r1.y, r1.x
+mad.f32 r1.x, c18.x, r1.x, r3.x
+(rpt1)nop
mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.z, c18.z, r1.y, r3.z
-mad.f32 r3.x, c18.y, r1.y, r3.y
-mad.f32 r1.x, c18.x, r1.y, r1.x
-nop
-max.f r1.y, r1.z, c19.x
-max.f r3.x, r3.x, c19.x
max.f r1.x, r1.x, c19.x
-nop
-min.f r1.z, r1.y, c19.y
-min.f r1.y, r3.x, c19.y
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r3.z
+mad.f32 r1.y, c18.y, r1.y, r4.x
min.f r1.x, r1.x, c19.y
+nop
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0)
-; VERT: 145 instructions, 0 half, 8 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0)
+; VERT: 121 instructions, 0 half, 7 full
diff --git a/reference/stk/stk0306.asm b/reference/stk/stk0306.asm
index 324cf3b..55ee480 100644
--- a/reference/stk/stk0306.asm
+++ b/reference/stk/stk0306.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
-@in(r3.w) in3
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r0.w) in8
-@in(r1.x) in9
-@in(r1.y) in10
-@in(r1.z) in11
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r3.w) in11
@in(r4.x) in12
@in(r4.y) in13
@in(r4.z) in14
@@ -27,143 +27,132 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r1.w, r3.x, c12.x
+@const(c23.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r1.x, c12.x
mul.f r2.x, r0.x, c4.x
-mad.f32 r1.w, c13.x, r3.y, r1.w
+mad.f32 r0.w, c13.x, r1.y, r0.w
mad.f32 r2.x, c4.y, r0.y, r2.x
-mad.f32 r1.w, c14.x, r3.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.y, c15.x, r3.w, r1.w
-mov.f32f32 r1.w, r2.x
-mul.f r2.x, r0.x, c5.x
+mad.f32 r0.w, c14.x, r1.z, r0.w
+mad.f32 r2.x, c4.z, r0.z, r2.x
+mad.f32 r0.w, c15.x, r1.w, r0.w
+mul.f r2.y, r4.x, c19.w
+mul.f r2.z, r4.x, c19.z
+mul.f r5.x, r4.x, c19.y
+mul.f r2.w, r0.w, r0.w
+mul.f r5.y, r1.x, c12.y
+mul.f r5.z, r2.x, c10.x
+mad.f32 r5.y, c13.y, r1.y, r5.y
+mul.f r5.w, r0.x, c5.x
+mad.f32 r5.y, c14.y, r1.z, r5.y
+mad.f32 r5.w, c5.y, r0.y, r5.w
+mad.f32 r5.y, c15.y, r1.w, r5.y
+mad.f32 r5.w, c5.z, r0.z, r5.w
+mad.f32 r2.y, c20.w, r4.y, r2.y
+mad.f32 r2.z, c20.z, r4.y, r2.z
+mad.f32 r2.w, r5.y, r5.y, r2.w
+mul.f r6.x, r1.x, c12.z
+mad.f32 r5.z, c10.y, r5.w, r5.z
+mad.f32 r6.x, c13.z, r1.y, r6.x
mul.f r0.x, r0.x, c6.x
-mul.f r2.z, r2.y, r2.y
-mul.f r2.w, r3.x, c12.y
-mad.f32 r5.x, c4.z, r0.z, r1.w
-mad.f32 r1.w, c13.y, r3.y, r2.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.w, c14.y, r3.z, r1.w
-mul.f r2.w, r5.x, c10.x
-mad.f32 r5.y, c15.y, r3.w, r1.w
-mad.f32 r1.w, c5.y, r0.y, r2.x
-max.f r1.z, r1.z, c23.x
+mad.f32 r6.x, c14.z, r1.z, r6.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r5.y, r5.y, r2.z
-mov.f32f32 r2.x, r1.w
-min.f r1.w, r1.z, c23.y
-mad.f32 r1.z, c5.z, r0.z, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r2.x, r3.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.x, c13.z, r3.y, r2.x
-mad.f32 r2.z, c10.y, r1.z, r2.w
-mad.f32 r2.x, c14.z, r3.z, r2.x
+mad.f32 r0.y, c15.z, r1.w, r6.x
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r3.w, r2.x
-mov.f32f32 r2.x, r2.z
-mul.f r2.z, r4.x, c19.w
-mul.f r2.w, r4.x, c19.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r2.x, c10.z, r0.x, r2.x
-mad.f32 r2.z, c20.w, r4.y, r2.z
-mad.f32 r5.z, c20.z, r4.y, r2.w
-mul.f r5.w, r4.x, c19.y
-mul.f r4.x, r4.x, c19.x
-mul.f r6.x, r3.x, c0.w
-rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r2.w, r2.x, c23.x
-cmps.f.lt r6.y, (neg)r2.x, c23.x
-mad.f32 r2.x, c21.w, r4.z, r2.z
-mul.f r2.y, r2.y, r0.y
-mul.f r2.z, r5.y, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r0.z, r2.w
-add.f r2.y, c10.x, (neg)r2.y
-add.f r2.z, c10.y, (neg)r2.z
+mad.f32 r0.z, c21.w, r4.z, r2.y
+mad.f32 r2.y, c21.z, r4.z, r2.z
+mad.f32 r6.x, r0.y, r0.y, r2.w
+mad.f32 r5.z, c10.z, r0.x, r5.z
+mad.f32 r2.w, c22.w, r4.w, r0.z
+mad.f32 r2.z, c22.z, r4.w, r2.y
+mad.f32 r0.z, c20.y, r4.y, r5.x
+mul.f r2.y, r4.x, c19.x
+mul.f r4.x, r1.x, c0.w
+rsq r5.x, (abs)r6.x
+(ss)mov.f32f32 r6.x, r5.x
+mul.f r0.y, r0.y, r5.x
+max.f r5.x, r5.z, c23.x
+mul.f r6.y, c16.x, r3.x
+mul.f r0.w, r0.w, r6.x
+mul.f r5.y, r5.y, r6.x
+(rpt1)nop
+add.f r0.w, c10.x, (neg)r0.w
+add.f r5.y, c10.y, (neg)r5.y
add.f r0.y, c10.z, (neg)r0.y
-mul.f r5.y, c16.z, r1.y
-mul.f r2.w, r2.y, r2.y
-mad.f32 r6.z, c8.z, r1.y, c9.z
-mad.f32 r6.w, r2.z, r2.z, r2.w
-mul.f r7.x, c16.y, r1.x
-mul.f r7.y, c16.x, r0.w
-mad.f32 r2.w, c22.w, r4.w, r2.x
-mov.f32f32 r2.x, r6.w
-add.f r5.y, r5.y, r6.z
-mad.f32 r2.x, r0.y, r0.y, r2.x
-mul.f r1.y, c17.z, r1.y
-mad.f32 r6.z, c8.y, r1.x, c9.y
-mad.f32 r6.w, c8.x, r0.w, c9.x
-mad.f32 r5.z, c21.z, r4.z, r5.z
-mad.f32 r5.w, c20.y, r4.y, r5.w
-mad.f32 r4.x, c20.x, r4.y, r4.x
-rsq r2.x, (abs)r2.x
-(ss)mov.f32f32 r2.x, r2.x
-mad.f32 r1.y, r0.z, r1.y, r5.y
-add.f r4.y, r7.x, r6.z
-add.f r5.y, r7.y, r6.w
-mul.f r2.y, r2.y, r2.x
-mul.f r2.z, r2.z, r2.x
-mul.f r0.y, r0.y, r2.x
-mul.f r1.x, c17.y, r1.x
-mul.f r2.x, r5.x, r2.y
-mul.f r0.w, c17.x, r0.w
-mad.f32 r1.z, r1.z, r2.z, r2.x
-mad.f32 r1.x, r0.z, r1.x, r4.y
-mad.f32 r2.z, c22.z, r4.w, r5.z
-mad.f32 r2.x, c21.y, r4.z, r5.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r4.y, r0.z, r0.w, r5.y
-mad.f32 r0.x, r0.x, r0.y, r1.z
-mad.f32 r2.y, c22.y, r4.w, r2.x
-mad.f32 r0.y, c21.x, r4.z, r4.x
-mad.f32 r0.z, c1.w, r3.y, r6.x
+mov.f32f32 r6.x, r5.x
+mul.f r6.z, r0.w, r0.w
+mul.f r6.w, c16.z, r3.z
+mad.f32 r6.z, r5.y, r5.y, r6.z
+mad.f32 r7.x, c8.z, r3.z, c9.z
+mad.f32 r6.z, r0.y, r0.y, r6.z
+mul.f r7.y, c16.y, r3.y
+mad.f32 r7.z, c8.x, r3.x, c9.x
+cmps.f.lt r5.z, (neg)r5.z, c23.x
+mad.f32 r0.z, c21.y, r4.z, r0.z
+mad.f32 r4.y, c20.x, r4.y, r2.y
+mad.f32 r4.x, c1.w, r1.y, r4.x
+rsq r2.y, (abs)r6.z
+(ss)mov.f32f32 r6.z, r2.y
+mul.f r0.y, r0.y, r2.y
+add.f r2.y, r6.w, r7.x
+mad.f32 r6.w, c8.y, r3.y, c9.y
+mul.f r0.w, r0.w, r6.z
+mul.f r5.y, r5.y, r6.z
+mul.f r3.z, c17.z, r3.z
+add.f r6.z, r7.y, r6.w
+mul.f r0.w, r2.x, r0.w
+mul.f r2.x, c17.y, r3.y
+mad.f32 r0.w, r5.w, r5.y, r0.w
+mad.f32 r3.y, r6.x, r3.z, r2.y
+mad.f32 r0.x, r0.x, r0.y, r0.w
+mad.f32 r3.z, r6.x, r2.x, r6.z
+add.f r0.y, r6.y, r7.z
+mad.f32 r2.y, c22.y, r4.w, r0.z
max.f r0.x, r0.x, c23.x
-mad.f32 r2.x, c22.x, r4.w, r0.y
-mad.f32 r0.y, c2.w, r3.z, r0.z
-mul.f r0.z, r3.x, c0.z
-mul.f r1.z, r3.x, c0.y
-mul.f r3.x, r3.x, c0.x
-mov.f32f32 r4.x, c7.x
+mul.f r0.z, c17.x, r3.x
+mad.f32 r0.w, c21.x, r4.z, r4.y
+mad.f32 r3.x, c2.w, r1.z, r4.x
+mul.f r4.x, r1.x, c0.z
+mul.f r4.y, r1.x, c0.y
+mul.f r1.x, r1.x, c0.x
log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mad.f32 r0.w, c3.w, r3.w, r0.y
-mad.f32 r0.y, c1.z, r3.y, r0.z
-min.f r0.z, r4.x, c23.z
-mad.f32 r0.y, c2.z, r3.z, r0.y
-mad.f32 r1.z, c1.y, r3.y, r1.z
-mad.f32 r3.x, c1.x, r3.y, r3.x
-mul.f r0.x, r0.z, r0.x
-mad.f32 r0.z, c3.z, r3.w, r0.y
-mad.f32 r0.y, c2.y, r3.z, r1.z
-mad.f32 r1.z, c2.x, r3.z, r3.x
-mov.f32f32 r3.x, r0.x
-mad.f32 r0.y, c3.y, r3.w, r0.y
-mad.f32 r0.x, c3.x, r3.w, r1.z
-(rpt3)nop
-exp2 r1.z, r3.x
-(ss)mov.f32f32 r1.z, r1.z
-(rpt2)nop
-sel.b32 r1.z, r1.z, r6.y, c23.x
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
-(rpt2)nop
-mad.f32 r1.y, c18.z, r1.z, r1.y
-mad.f32 r1.x, c18.y, r1.z, r1.x
-mad.f32 r1.z, c18.x, r1.z, r4.y
+mov.f32f32 r4.z, c7.x
+mad.f32 r5.x, r5.x, r0.z, r0.y
+mad.f32 r2.x, c22.x, r4.w, r0.w
+mad.f32 r0.w, c3.w, r1.w, r3.x
+min.f r0.y, r4.z, c23.z
+mad.f32 r0.z, c1.z, r1.y, r4.x
+mad.f32 r3.x, c1.y, r1.y, r4.y
+mad.f32 r1.x, c1.x, r1.y, r1.x
+(ss)mul.f r0.x, r0.y, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.z
+mad.f32 r1.y, c2.y, r1.z, r3.x
+mad.f32 r1.x, c2.x, r1.z, r1.x
+max.f r1.z, r3.w, c23.x
+(rpt1)nop
+exp2 r0.x, r0.x
+(ss)sel.b32 r3.x, r0.x, r5.z, c23.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mad.f32 r0.y, c3.y, r1.w, r1.y
+(ss)mad.f32 r0.x, c3.x, r1.w, r1.x
+mov.f32f32 r1.x, r3.x
+mad.f32 r1.y, c18.x, r3.x, r5.x
+min.f r1.w, r1.z, c23.y
nop
+mov.f32f32 r1.x, r1.x
max.f r1.y, r1.y, c23.x
-max.f r1.x, r1.x, c23.x
-(ss)max.f r3.x, r1.z, c23.x
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.x, r3.y
+mad.f32 r3.x, c18.y, r1.x, r3.z
+min.f r1.x, r1.y, c23.y
nop
+max.f r1.y, r1.z, c23.x
+max.f r3.x, r3.x, c23.x
+(rpt1)nop
min.f r1.z, r1.y, c23.y
-min.f r1.y, r1.x, c23.y
-min.f r1.x, r3.x, c23.y
+min.f r1.y, r3.x, c23.y
end
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0)
-; VERT: 147 instructions, 0 half, 8 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0)
+; VERT: 127 instructions, 0 half, 8 full
diff --git a/reference/stk/stk0307.asm b/reference/stk/stk0307.asm
index f978c20..5b1d566 100644
--- a/reference/stk/stk0307.asm
+++ b/reference/stk/stk0307.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r3.x) in0
-@in(r3.y) in1
-@in(r3.z) in2
-@in(r3.w) in3
+@in(r2.x) in0
+@in(r2.y) in1
+@in(r2.z) in2
+@in(r2.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r2.x) in8
-@in(r2.y) in9
-@in(r2.z) in10
-@in(r2.w) in11
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r3.w) in11
@in(r4.z) in14
@in(r4.w) in15
@out(r0.x) out0
@@ -25,171 +25,157 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r0.w, r3.x, c12.x
+@const(c25.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000
+@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r2.x, c12.x
mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r3.y, r0.w
+mad.f32 r0.w, c13.x, r2.y, r0.w
mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r3.z, r0.w
-mov.f32f32 r1.y, r2.w
-mad.f32 r0.w, c15.x, r3.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
-mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r2.w, r3.x, c12.y
+mad.f32 r0.w, c14.x, r2.z, r0.w
mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r2.w, c13.y, r3.y, r2.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r2.w, c14.y, r3.z, r2.w
-mul.f r4.x, r1.x, c10.x
-mad.f32 r2.w, c15.y, r3.w, r2.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c25.y
+mad.f32 r1.y, c15.x, r2.w, r0.w
+mul.f r0.w, r2.x, c12.y
+mul.f r1.z, r2.x, c12.z
+mul.f r1.w, r2.x, c0.w
+mul.f r4.x, r1.y, r1.y
+mad.f32 r0.w, c13.y, r2.y, r0.w
+mul.f r4.y, r1.x, c10.x
+mad.f32 r0.w, c14.y, r2.z, r0.w
+mul.f r5.x, r0.x, c5.x
+mad.f32 r5.y, c15.y, r2.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r5.x
+mad.f32 r1.z, c13.z, r2.y, r1.z
+mad.f32 r1.w, c1.w, r2.y, r1.w
+mad.f32 r4.x, r5.y, r5.y, r4.x
+mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r5.x, c5.z, r0.z, r0.w
+mad.f32 r1.z, c15.z, r2.w, r1.z
+mad.f32 r0.w, c2.w, r2.z, r1.w
+mul.f r1.w, r2.x, c0.z
+mul.f r5.z, r2.x, c0.y
+mad.f32 r4.x, r1.z, r1.z, r4.x
+mad.f32 r4.y, c10.y, r5.x, r4.y
+mul.f r0.x, r0.x, c6.x
+mad.f32 r0.w, c3.w, r2.w, r0.w
+mad.f32 r1.w, c1.z, r2.y, r1.w
+mad.f32 r5.z, c1.y, r2.y, r5.z
+mul.f r2.x, r2.x, c0.x
+rsq r4.x, (abs)r4.x
+(ss)mov.f32f32 r5.w, r4.x
+mul.f r1.z, r1.z, r4.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r2.w, r2.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c25.z
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r3.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r3.y, r1.z
-mad.f32 r4.x, c10.y, r1.y, r4.x
-mad.f32 r1.z, c14.z, r3.z, r1.z
-mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r3.w, r1.z
-mov.f32f32 r1.z, r4.x
-mul.f r4.x, r3.x, c0.w
-mul.f r4.y, r3.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r4.x, c1.w, r3.y, r4.x
-mad.f32 r4.y, c1.z, r3.y, r4.y
-mul.f r5.x, r3.x, c0.y
-mul.f r3.x, r3.x, c0.x
-mul.f r5.y, c16.z, r2.z
-rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r5.z, r1.z, c26.x
-cmps.f.lt r1.z, (neg)r1.z, c25.y
-mad.f32 r4.x, c2.w, r3.z, r4.x
-mul.f r0.w, r0.w, r0.y
-mul.f r2.w, r2.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r5.z, r5.z
-add.f r0.z, c10.x, (neg)r0.w
-mul.f r5.w, r1.x, r0.w
-add.f r6.x, c10.y, (neg)r2.w
-add.f r6.y, c10.z, (neg)r0.y
-mul.f r6.z, r0.z, r0.z
-mad.f32 r5.w, r1.y, r2.w, r5.w
-mad.f32 r6.z, r6.x, r6.x, r6.z
-mad.f32 r6.w, c8.z, r2.z, c9.z
-mul.f r7.x, c16.y, r2.y
-mul.f r7.y, c16.x, r2.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r5.w, r5.w
-mad.f32 r6.z, r6.y, r6.y, r6.z
-mad.f32 r7.z, r0.x, r0.y, r5.w
-mad.f32 r7.w, r0.x, r0.y, r5.w
-mad.f32 r5.w, r0.x, r0.y, r5.w
-add.f r5.y, r5.y, r6.w
-mad.f32 r6.w, c8.y, r2.y, c9.y
-mad.f32 r8.x, c8.x, r2.x, c9.x
-rsq r6.z, (abs)r6.z
-(ss)mov.f32f32 r6.z, r6.z
-add.f r7.z, r7.z, r7.z
-add.f r7.w, r7.w, r7.w
-add.f r5.w, r5.w, r5.w
-mul.f r0.z, r0.z, r6.z
-mad.f32 r7.z, (neg)r7.z, r1.x, r0.w
-mul.f r0.w, r6.x, r6.z
-mul.f r6.x, r6.y, r6.z
-mul.f r0.z, r1.x, r0.z
-add.f r1.x, r7.z, c25.y
-mad.f32 r0.z, r1.y, r0.w, r0.z
-mad.f32 r1.y, (neg)r7.w, r1.y, r2.w
-mad.f32 r0.y, (neg)r5.w, r0.x, r0.y
-nop
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r1.x, r1.x
-mad.f32 r0.x, r0.x, r6.x, r0.z
-add.f r0.z, r1.y, c25.y
-add.f r0.y, r0.y, c25.z
-mul.f r1.x, c17.z, r2.z
-max.f r0.x, r0.x, c26.x
-mad.f32 r0.z, r0.z, r0.z, r0.w
-add.f r2.z, r7.x, r6.w
-add.f r2.w, r7.y, r8.x
-mad.f32 r0.w, c3.w, r3.w, r4.x
-mad.f32 r4.x, c2.z, r3.z, r4.y
-mad.f32 r4.y, c1.y, r3.y, r5.x
-log2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.x, c7.x
-mov.f32f32 r0.z, r0.z
-mad.f32 r1.x, r5.z, r1.x, r5.y
-mul.f r2.y, c17.y, r2.y
-min.f r5.x, r5.x, c26.z
-mad.f32 r0.y, r0.y, r0.y, r0.z
-mul.f r2.x, c17.x, r2.x
-mad.f32 r0.z, c3.z, r3.w, r4.x
-mul.f r0.x, r5.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.y, r5.z, r2.y, r2.z
-mad.f32 r2.x, r5.z, r2.x, r2.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.z, c2.y, r3.z, r4.y
-mad.f32 r2.w, c1.x, r3.y, r3.x
-mov.f32f32 r3.x, r4.z
-mov.f32f32 r3.y, r4.w
+mad.f32 r0.y, c2.z, r2.z, r1.w
+mul.f r1.y, r1.y, r5.w
+mul.f r1.w, r5.y, r5.w
(rpt1)nop
-exp2 r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-rsq r0.y, (abs)r0.y
-(ss)mul.f r4.x, r0.y, c25.x
-(ss)rsq r0.y, (abs)r0.y
-(ss)mul.f r4.y, r0.y, c25.x
-(ss)mad.f32 r0.y, c3.y, r3.w, r2.z
-sel.b32 r0.x, r0.x, r1.z, c26.x
-mad.f32 r4.x, r7.z, r4.x, c25.x
-mad.f32 r4.y, r1.y, r4.y, c25.x
-mad.f32 r3.z, c2.x, r3.z, r2.w
-mov.f32f32 r0.x, r0.x
-mul.f r1.y, r4.x, c21.w
-mul.f r1.z, r4.x, c21.z
-mul.f r2.z, r4.x, c21.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.y, c22.w, r4.y, r1.y
-mad.f32 r4.z, c22.z, r4.y, r1.z
-mad.f32 r2.z, c22.y, r4.y, r2.z
-mad.f32 r1.x, c18.z, r0.x, r1.x
-mad.f32 r1.z, c18.y, r0.x, r2.y
-mad.f32 r0.x, c18.x, r0.x, r2.x
-mad.f32 r2.x, c23.w, r3.x, r1.y
-max.f r1.x, r1.x, c25.y
-max.f r1.y, r1.z, c25.y
-max.f r0.x, r0.x, c25.y
+(ss)add.f r4.x, c10.x, (neg)r1.y
+mul.f r5.y, r1.x, r1.y
+add.f r5.w, c10.y, (neg)r1.w
+add.f r6.x, c10.z, (neg)r1.z
+mul.f r6.y, r4.x, r4.x
+mad.f32 r5.y, r5.x, r1.w, r5.y
+mad.f32 r6.y, r5.w, r5.w, r6.y
+mad.f32 r0.x, c6.z, r0.z, r0.x
+mad.f32 r6.y, r6.x, r6.x, r6.y
+mov.f32f32 r6.z, r5.y
+mad.f32 r0.z, c3.z, r2.w, r0.y
+mad.f32 r0.y, c2.y, r2.z, r5.z
+mad.f32 r2.x, c1.x, r2.y, r2.x
+max.f r2.y, r3.w, c25.y
+mul.f r3.w, c16.z, r3.z
+rsq r5.z, (abs)r6.y
+(ss)mov.f32f32 r6.y, r5.z
+mad.f32 r6.w, r0.x, r1.z, r6.z
+mad.f32 r6.z, r0.x, r1.z, r6.z
+mad.f32 r5.y, r0.x, r1.z, r5.y
+mul.f r4.x, r4.x, r6.y
+add.f r6.w, r6.w, r6.w
+add.f r6.z, r6.z, r6.z
+mul.f r5.w, r5.w, r6.y
+mul.f r4.x, r1.x, r4.x
+mad.f32 r1.x, (neg)r6.w, r1.x, r1.y
+mad.f32 r1.y, (neg)r6.z, r0.x, r1.z
+mad.f32 r1.z, r5.x, r5.w, r4.x
+mul.f r4.x, r6.x, r5.z
+add.f r5.z, r1.x, c25.y
+add.f r1.y, r1.y, c25.z
+add.f r5.y, r5.y, r5.y
+mad.f32 r1.z, r0.x, r4.x, r1.z
+mul.f r4.x, r5.z, r5.z
+mad.f32 r0.x, c10.z, r0.x, r4.y
+mad.f32 r0.y, c3.y, r2.w, r0.y
+max.f r1.z, r1.z, c26.x
+mad.f32 r4.y, (neg)r5.y, r5.x, r1.w
+max.f r5.x, r0.x, c26.x
+mul.f r5.y, c16.x, r3.x
+cmps.f.lt r5.z, (neg)r0.x, c25.y
+mad.f32 r0.x, c2.x, r2.z, r2.x
+min.f r1.w, r2.y, c25.z
+log2 r1.z, r1.z
+mov.f32f32 r2.x, c7.x
+add.f r2.y, r4.y, c25.y
+mov.f32f32 r2.z, r5.x
+mul.f r5.w, c16.y, r3.y
+min.f r2.x, r2.x, c26.z
+mad.f32 r2.y, r2.y, r2.y, r4.x
+mad.f32 r4.x, c8.z, r3.z, c9.z
+mad.f32 r6.x, c8.y, r3.y, c9.y
+(ss)mul.f r1.z, r2.x, r1.z
+mad.f32 r1.y, r1.y, r1.y, r2.y
+add.f r2.x, r3.w, r4.x
+add.f r2.y, r5.w, r6.x
+mad.f32 r3.w, c8.x, r3.x, c9.x
+mad.f32 r0.x, c3.x, r2.w, r0.x
+mov.f32f32 r4.x, r4.z
+exp2 r1.z, r1.z
+(ss)sel.b32 r1.z, r1.z, r5.z, c26.x
+add.f r2.w, r5.y, r3.w
+mul.f r3.x, c17.x, r3.x
+rsq r1.y, (abs)r1.y
+(ss)mul.f r3.w, r1.y, c25.x
+mov.f32f32 r5.y, r1.z
+(ss)rsq r1.y, (abs)r1.y
+(ss)mul.f r1.y, r1.y, c25.x
+mul.f r3.z, c17.z, r3.z
+mad.f32 r2.w, r5.x, r3.x, r2.w
+mov.f32f32 r3.x, r5.y
+mul.f r3.y, c17.y, r3.y
+mad.f32 r2.x, r2.z, r3.z, r2.x
+mad.f32 r1.z, c18.x, r1.z, r2.w
+mad.f32 r2.x, c18.z, r3.x, r2.x
+mad.f32 r2.y, r2.z, r3.y, r2.y
+mad.f32 r2.z, r1.x, r3.w, c25.x
+mad.f32 r3.y, r4.y, r1.y, c25.x
+max.f r1.x, r2.x, c25.y
+mad.f32 r1.y, c18.y, r3.x, r2.y
+max.f r2.x, r1.z, c25.y
nop
min.f r1.z, r1.x, c25.z
+max.f r1.y, r1.y, c25.y
+min.f r1.x, r2.x, c25.z
+mul.f r2.x, r2.z, c21.w
+mul.f r2.y, r2.z, c21.z
min.f r1.y, r1.y, c25.z
-min.f r1.x, r0.x, c25.z
-mad.f32 r2.w, c24.w, r3.y, r2.x
-mad.f32 r0.x, c23.z, r3.x, r4.z
-mad.f32 r2.x, c23.y, r3.x, r2.z
-mad.f32 r2.z, c24.z, r3.y, r0.x
-mad.f32 r2.y, c24.y, r3.y, r2.x
-mul.f r2.x, r4.x, c21.x
-mad.f32 r0.x, c3.x, r3.w, r3.z
-mad.f32 r2.x, c22.x, r4.y, r2.x
-nop
-mad.f32 r2.x, c23.x, r3.x, r2.x
-nop
-mad.f32 r2.x, c24.x, r3.y, r2.x
+mad.f32 r2.x, c22.w, r3.y, r2.x
+mad.f32 r2.y, c22.z, r3.y, r2.y
+mad.f32 r2.x, c23.w, r4.x, r2.x
+mov.f32f32 r3.x, r4.w
+mad.f32 r2.y, c23.z, r4.x, r2.y
+mul.f r3.z, r2.z, c21.y
+mul.f r3.w, r2.z, c21.x
+mad.f32 r2.w, c24.w, r3.x, r2.x
+mad.f32 r2.z, c24.z, r3.x, r2.y
+mad.f32 r2.x, c22.y, r3.y, r3.z
+mad.f32 r2.y, c22.x, r3.y, r3.w
+mad.f32 r2.x, c23.y, r4.x, r2.x
+mad.f32 r3.y, c23.x, r4.z, r2.y
+mad.f32 r2.y, c24.y, r3.x, r2.x
+mad.f32 r2.x, c24.x, r4.w, r3.y
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=c,il=20,b=0)
-; VERT: 163 instructions, 0 half, 9 full
+; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=c,il=20,b=0)
+; VERT: 146 instructions, 0 half, 7 full
diff --git a/reference/stk/stk0400.asm b/reference/stk/stk0400.asm
index 74062e3..d96960a 100644
--- a/reference/stk/stk0400.asm
+++ b/reference/stk/stk0400.asm
@@ -6,39 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r1.y, r2.z
-mul.f r0.w, r2.y, r1.z
-mul.f r0.z, r2.x, r0.z
-mul.f r0.x, r1.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.y, r1.x
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 35 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/stk/stk0500.asm b/reference/stk/stk0500.asm
index e9b7b37..c61e896 100644
--- a/reference/stk/stk0500.asm
+++ b/reference/stk/stk0500.asm
@@ -6,227 +6,154 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c5.x) 0x40000000, 0xbf800000, 0x40800000, 0x3f000000
+@const(c6.x) 0x00000000, 0x3f333333, 0x3e99999a, 0x3d4ccccd
+@const(c7.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
mov.f32f32 r0.w, c3.x
bary.f r1.x, 1, r0.x
mov.f32f32 r1.y, c6.x
-add.f r1.z, r0.z, c3.x
-add.f r1.w, r0.z, c4.x
+add.f r1.z, r0.z, c4.x
+add.f r2.x, r0.z, c3.x
+add.f r1.w, r1.x, c4.y
+add.f r2.y, r1.x, c3.y
add.f r0.z, r0.z, r0.w
add.f r0.w, r1.x, r1.y
-mov.f32f32 r1.y, r1.z
-mov.f32f32 r1.z, r1.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r1.y
-add.f r1.y, r1.x, c3.y
-mov.f32f32 r2.y, r1.z
-add.f r1.x, r1.x, c4.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 7, r0.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r3.x, r0.w
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r0.w, r1.z
-bary.f r1.x, 8, r0.x
-bary.f r1.y, 9, r0.x
-mov.f32f32 r2.z, r0.z
-sam (f32)(xyzw)r2.w, r2.w, s#2, t#2
-mov.f32f32 r0.z, c6.x
-mov.f32f32 r1.z, c6.x
-sam (f32)(xyz)r3.w, r1.w, s#0, t#0
-(sy)(ss)mad.f32 r1.w, c5.x, r4.x, c5.y
-mad.f32 r2.x, c5.x, r3.w, c5.y
-mad.f32 r3.w, c5.x, r4.y, c5.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r1.w
-sam (f32)(xyz)r4.x, r2.y, s#1, t#1
-(sy)(ss)mad.f32 r2.y, c5.x, r4.y, c5.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r2.z, c5.x, r4.x, c5.y
+mov.f32f32 r1.x, c6.x
+mov.f32f32 r1.y, c6.x
+bary.f r2.z, 7, r0.x
+sam (f32)(xyz)r2.w, r1.z, s#1, t#1
+(sy)(ss)mad.f32 r1.z, c5.x, r3.x, c5.y
+sam (f32)(xyz)r3.z, r2.x, s#0, t#0
+(sy)mad.f32 r1.w, c5.x, r3.w, c5.y
+(ss)mad.f32 r2.x, c5.x, r2.w, c5.y
+mad.f32 r2.y, c5.x, r3.z, c5.y
+mul.f r1.z, c5.z, r1.z
mul.f r1.w, c5.z, r1.w
-mov.f32f32 r2.y, r2.y
-mul.f r4.x, r2.x, r2.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, c5.z, r2.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r2.z, r2.z
-mad.f32 r4.x, r1.w, r1.w, r4.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.y, c5.x, r4.z, c5.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.x, r3.w, r3.w, r4.x
-mul.f r4.z, r2.z, r2.z
-mov.f32f32 r4.y, r4.y
-(rpt3)nop
-rsq r4.x, r4.x
-(ss)mov.f32f32 r4.x, r4.x
-mad.f32 r4.z, r2.y, r2.y, r4.z
-mov.f32f32 r4.y, r4.y
-bary.f r4.w, 4, r0.x
-mul.f r2.x, r2.x, r4.x
-mul.f r1.w, r1.w, r4.x
-mul.f r3.w, r3.w, r4.x
-mov.f32f32 r4.x, r4.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r4.x, r4.y, r4.y, r4.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.w, r3.w
-bary.f r4.z, 5, r0.x
+mov.f32f32 r2.w, r2.x
+mov.f32f32 r3.x, r2.y
+mov.f32f32 r3.z, r1.z
+mov.f32f32 r3.w, r1.w
+mov.f32f32 r2.w, r2.w
+mul.f r2.y, r2.y, r3.x
+mov.f32f32 r3.z, r3.z
+mad.f32 r1.w, r1.w, r3.w, r2.y
+mul.f r2.x, r2.x, r2.w
+mad.f32 r2.y, c5.x, r4.x, c5.y
+mad.f32 r1.z, r1.z, r3.z, r2.x
+mad.f32 r2.x, c5.x, r3.y, c5.y
+sam (f32)(xyzw)r4.x, r0.z, s#2, t#2
+(ss)bary.f r0.z, 8, r0.x
+bary.f r0.w, 9, r0.x
+bary.f r3.y, 4, r0.x
+mov.f32f32 r5.x, r2.x
+mov.f32f32 r5.y, r2.y
+bary.f r5.z, 5, r0.x
bary.f (ei)r0.x, 6, r0.x
+mov.f32f32 r0.y, r5.x
+mad.f32 r1.w, r5.y, r5.y, r1.w
(rpt1)nop
-rsq r0.y, r4.x
-(ss)mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mad.f32 r2.x, r2.z, r0.y, r2.x
-mad.f32 r1.w, r2.y, r0.y, r1.w
-mad.f32 r0.y, r4.y, r0.y, r3.w
-nop
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.y, r0.y, r0.y, r1.z
+(rpt5)nop
+rsq r0.y, r0.y
+(ss)mov.f32f32 r1.z, r0.y
+rsq r1.w, r1.w
+(ss)mov.f32f32 r5.x, r1.w
+(ss)mul.f r1.w, r2.y, r1.w
+(rpt1)nop
+mul.f r2.y, r3.x, r5.x
+mul.f r3.x, r3.w, r5.x
+mad.f32 r2.y, r2.w, r1.z, r2.y
+mad.f32 r1.z, r3.z, r1.z, r3.x
+mad.f32 r0.y, r2.x, r0.y, r1.w
nop
-mul.f r2.x, r2.x, c5.w
-mul.f r1.w, r1.w, c5.w
+mul.f r1.w, r2.y, c5.w
+mul.f r1.z, r1.z, c5.w
mul.f r0.y, r0.y, c5.w
nop
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-nop
-mul.f r2.y, r2.x, r4.w
-mul.f r2.z, r4.w, r2.x
-mad.f32 r2.y, r1.w, r4.z, r2.y
-mad.f32 r2.z, r4.z, r1.w, r2.z
-(rpt1)nop
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r2.y, r0.y, r0.x, r2.y
-mad.f32 r2.z, r0.x, r0.y, r2.z
-(rpt1)nop
-mul.f r2.x, r2.y, r2.x
-max.f r2.z, r2.z, c6.x
-mul.f r1.w, r2.y, r1.w
-mul.f r0.y, r2.y, r0.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mul.f r2.x, c5.x, r2.x
-mad.f32 r2.y, c6.y, r2.y, c6.z
-mul.f r1.w, c5.x, r1.w
-mul.f r0.y, c5.x, r0.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-add.f r2.x, r4.w, (neg)r2.x
-mul.f r2.z, r3.z, r2.y
-mul.f r3.y, r3.y, r2.y
-mul.f r3.x, r3.x, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.z, r2.z
-mov.f32f32 r3.w, r3.y
-(ss)mov.f32f32 r4.x, r3.x
-mul.f r4.y, r2.x, r2.x
-add.f r1.w, r4.z, (neg)r1.w
-add.f r1.z, r3.z, r1.z
-add.f r0.x, r0.x, (neg)r0.y
-mul.f r0.y, r2.w, r2.y
-mov.f32f32 r1.w, r1.w
-add.f r0.z, r1.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r0.y
-mad.f32 r2.y, r1.w, r1.w, r4.y
+mov.f32f32 r2.x, r1.w
+mul.f r1.w, r3.y, r1.w
+mov.f32f32 r2.y, r1.z
+mov.f32f32 r2.w, r0.y
+mul.f r3.x, r2.x, r3.y
+mad.f32 r1.z, r5.z, r1.z, r1.w
+mad.f32 r1.w, r2.y, r5.z, r3.x
+mad.f32 r0.y, r0.x, r0.y, r1.z
+mad.f32 r1.z, r2.w, r0.x, r1.w
(rpt2)nop
-mov.f32f32 r2.y, r2.y
-nop
-mad.f32 r2.y, r0.x, r0.x, r2.y
-(rpt5)nop
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
+mul.f r1.w, r1.z, r2.x
+max.f r0.y, r0.y, c6.x
+mul.f r2.x, r1.z, r2.y
+mul.f r1.z, r1.z, r2.w
+mul.f r1.w, c5.x, r1.w
+mad.f32 r0.y, c6.y, r0.y, c6.z
+mul.f r2.x, c5.x, r2.x
+mul.f r1.z, c5.x, r1.z
+add.f r1.w, r3.y, (neg)r1.w
+mov.f32f32 r2.y, r0.y
+add.f r2.x, r5.z, (neg)r2.x
+add.f r0.x, r0.x, (neg)r1.z
+mov.f32f32 r1.z, r1.w
+(sy)mul.f r2.w, r4.w, r2.y
+mov.f32f32 r3.x, r2.x
+mov.f32f32 r3.y, r0.x
+mul.f r1.w, r1.w, r1.z
+add.f r1.y, r2.w, r1.y
+mad.f32 r1.w, r2.x, r3.x, r1.w
+mul.f r2.x, r4.z, r2.y
+mad.f32 r1.w, r3.y, r3.y, r1.w
+add.f r1.x, r1.y, r1.x
+mul.f r1.y, r4.y, r2.y
+mul.f r0.y, r4.x, r0.y
(rpt2)nop
-mul.f r2.x, r2.x, r2.y
-mul.f r1.w, r1.w, r2.y
-mul.f r0.x, r0.x, r2.y
-nop
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r2.y, r1.w
+mul.f r0.x, r0.x, r1.w
+(rpt1)nop
+mul.f r1.z, r1.z, r2.y
+(ss)mul.f r1.w, r3.x, r2.y
+(rpt1)nop
+mul.f r1.z, r1.z, r2.z
nop
-mul.f r0.w, r2.x, r0.w
+mad.f32 r0.z, r1.w, r0.z, r1.z
nop
-mad.f32 r0.w, r1.w, r1.x, r0.w
-(rpt2)nop
-mov.f32f32 r0.w, r0.w
-nop
-mad.f32 r0.x, r0.x, r1.y, r0.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.x, r0.w, r0.z
(rpt2)nop
max.f r0.x, r0.x, c6.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mov.f32f32 r0.z, r0.x
(rpt2)nop
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r1.x, r0.x
-cmps.f.lt r0.x, c6.x, r0.x
+mul.f r0.x, r0.x, r0.z
+cmps.f.lt r0.z, c6.x, r0.z
(rpt1)nop
-mul.f r0.w, r0.w, r1.x
-cov.u32f32 r0.x, r0.x
+mov.f32f32 r0.w, r0.x
+cov.u32f32 r0.z, r0.z
(rpt1)nop
-mov.f32f32 r0.w, r0.w
-cmps.f.ne r0.x, r0.x, c6.x
+mul.f r0.x, r0.x, r0.w
+cmps.f.ne r0.z, r0.z, c6.x
(rpt1)nop
+mov.f32f32 r0.w, r0.x
+mul.f r0.x, r0.x, c6.w
+sel.b32 r1.w, r1.x, r0.z, r2.w
+nop
mul.f r0.w, r0.w, r0.w
-sel.b32 r0.z, r0.z, r0.x, r2.z
-(rpt1)nop
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r0.z
+add.f r1.x, r2.x, r0.x
+add.f r1.z, r1.y, r0.x
+add.f r0.x, r0.y, r0.x
+mul.f r0.w, r0.w, c6.z
+(rpt2)nop
+mov.f32f32 r2.y, r0.w
+add.f r0.x, r0.x, r0.w
(rpt1)nop
-mul.f r0.z, r0.w, r0.w
-mul.f r0.w, r0.w, c6.w
+add.f r0.w, r1.x, r2.y
+add.f r1.x, r1.z, r2.y
(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.w, r0.w
-mul.f r0.z, r0.z, c6.z
-add.f r1.x, r3.w, r1.x
-add.f r1.y, r4.x, r1.y
-add.f r0.w, r1.z, r0.w
-mov.f32f32 r0.z, r0.z
-(rpt2)nop
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.z, r0.z
-nop
-add.f r1.x, r1.x, r1.z
-add.f r1.y, r1.y, r2.x
-add.f r0.z, r0.w, r0.z
-nop
-sel.b32 r0.w, r1.x, r0.x, r3.y
-sel.b32 r1.x, r1.y, r0.x, r3.x
-sel.b32 r0.x, r0.z, r0.x, r0.y
-nop
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r0.x
+sel.b32 r1.z, r0.w, r0.z, r2.x
+sel.b32 r1.y, r1.x, r0.z, r1.y
+sel.b32 r1.x, r0.x, r0.z, r0.y
end
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r0.w (5:19,cm=f,il=12,b=1) r2.x (5:20,cm=f,il=16,b=1)
-; FRAG: 254 instructions, 0 half, 5 full
+; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r1.w (5:19,cm=f,il=12,b=1) r2.x (5:20,cm=f,il=16,b=1)
+; FRAG: 169 instructions, 0 half, 6 full
diff --git a/reference/stk/stk0501.asm b/reference/stk/stk0501.asm
index cec698a..4e67c3f 100644
--- a/reference/stk/stk0501.asm
+++ b/reference/stk/stk0501.asm
@@ -1,16 +1,16 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r4.x) in0
+@in(r4.y) in1
+@in(r4.z) in2
+@in(r4.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r4.x) in8
-@in(r4.y) in9
-@in(r4.z) in10
-@in(r4.w) in11
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,187 +27,140 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.w, c1.x
-mul.f r2.x, c8.x, r1.x
+mul.f r2.x, c8.x, r4.x
mov.f32f32 r2.y, c1.y
mov.f32f32 r2.z, c1.z
mul.f r2.w, r0.w, r0.x
mov.f32f32 r3.x, c2.x
-mad.f32 r2.x, c9.x, r1.y, r2.x
+mad.f32 r2.x, c9.x, r4.y, r2.x
mul.f r3.y, r0.w, r0.w
-mad.f32 r2.x, c10.x, r1.z, r2.x
+mad.f32 r2.x, c10.x, r4.z, r2.x
mad.f32 r2.w, r3.x, r0.y, r2.w
-mad.f32 r2.x, c11.x, r1.w, r2.x
-mad.f32 r3.x, r2.y, r2.y, r3.y
-mul.f r3.y, r2.y, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, c3.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r3.x, r3.x
+mov.f32f32 r3.x, c3.x
+mad.f32 r2.x, c11.x, r4.w, r2.x
+mad.f32 r3.y, r2.y, r2.y, r3.y
+mul.f r3.z, r2.y, r0.x
+mad.f32 r2.w, r3.x, r0.z, r2.w
+mov.f32f32 r3.x, r2.x
+mad.f32 r3.y, r2.z, r2.z, r3.y
mov.f32f32 r3.w, c2.y
-mad.f32 r2.w, r3.z, r0.z, r2.w
-mul.f r3.z, r2.x, r2.x
-mul.f r5.x, c8.y, r1.x
-mad.f32 r3.x, r2.z, r2.z, r3.x
-mov.f32f32 r2.w, r2.w
-mad.f32 r5.x, c9.y, r1.y, r5.x
-mad.f32 r3.y, r3.w, r0.y, r3.y
-mul.f r0.x, r2.z, r0.x
-mul.f r3.w, r2.w, r2.w
-mad.f32 r5.x, c10.y, r1.z, r5.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r5.y, c3.y
-mad.f32 r5.x, c11.y, r1.w, r5.x
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r5.z, c2.z
-mad.f32 r3.y, r5.y, r0.z, r3.y
-mov.f32f32 r5.x, r5.x
-mul.f r0.w, r0.w, r3.x
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.y, r3.y
-mad.f32 r3.z, r5.x, r5.x, r3.z
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r2.y, r2.y
-mad.f32 r0.w, r3.y, r3.y, r3.w
-mov.f32f32 r3.z, r3.z
-mul.f r3.w, c8.z, r1.x
-mul.f r5.w, c0.x, r5.y
+mov.f32f32 r5.x, r2.w
+mul.f r3.x, r3.x, r3.x
+mul.f r5.y, c8.y, r4.x
+mad.f32 r3.z, r3.w, r0.y, r3.z
+mul.f r2.w, r2.w, r5.x
+mov.f32f32 r3.w, c3.y
+mad.f32 r5.y, c9.y, r4.y, r5.y
+rsq r3.y, r3.y
+(ss)mov.f32f32 r5.z, r3.y
+mad.f32 r5.y, c10.y, r4.z, r5.y
+mad.f32 r3.z, r3.w, r0.z, r3.z
+mad.f32 r3.w, c11.y, r4.w, r5.y
+mul.f r0.w, r0.w, r5.z
+mul.f r2.y, r2.y, r5.z
+mov.f32f32 r5.y, r3.z
+mov.f32f32 r5.z, r3.w
+mul.f r5.w, c0.x, r0.w
mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, r5.z, r0.y, r0.x
-mad.f32 r0.y, c9.z, r1.y, r3.w
-mad.f32 r3.w, c0.y, r2.y, r5.w
-mad.f32 r0.y, c10.z, r1.z, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, c3.z
-mad.f32 r0.y, c11.z, r1.w, r0.y
-mov.f32f32 r3.w, r3.w
-mul.f r2.z, r2.z, r3.x
-mad.f32 r0.x, r5.z, r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, c4.w, r1.x
-mul.f r3.x, c4.z, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r3.z, r0.y, r0.y, r3.z
+mad.f32 r2.w, r3.z, r5.y, r2.w
+mul.f r0.x, r2.z, r0.x
+mov.f32f32 r3.z, c2.z
+mad.f32 r3.x, r3.w, r5.z, r3.x
+mul.f r3.w, c8.z, r4.x
+mad.f32 r5.w, c0.y, r2.y, r5.w
+mad.f32 r0.x, r3.z, r0.y, r0.x
+mov.f32f32 r0.y, c3.z
+mad.f32 r3.z, c9.z, r4.y, r3.w
+mul.f r2.z, r2.z, r3.y
+(ss)mad.f32 r3.y, c10.z, r4.z, r3.z
+mad.f32 r0.x, r0.y, r0.z, r0.x
+mad.f32 r0.y, c11.z, r4.w, r3.y
+mad.f32 r3.y, c0.z, r2.z, r5.w
+mov.f32f32 r0.z, r2.y
+mov.f32f32 r2.y, r0.x
+mov.f32f32 r3.z, r0.y
+mov.f32f32 r3.w, r3.y
mov.f32f32 r2.z, r2.z
-mad.f32 r0.z, c5.w, r1.y, r0.z
-mad.f32 r0.w, r0.x, r0.x, r0.w
-mad.f32 r0.z, c6.w, r1.z, r0.z
-mad.f32 r3.x, c5.z, r1.y, r3.x
-mul.f r5.z, c4.y, r1.x
-mul.f r1.x, c4.x, r1.x
-mad.f32 r0.z, c7.w, r1.w, r0.z
-mad.f32 r3.x, c6.z, r1.z, r3.x
-rsq r0.w, r0.w
-(ss)mov.f32f32 r5.w, r0.w
-(ss)rsq r0.w, r3.z
-(ss)mov.f32f32 r3.z, r0.w
-mad.f32 r3.w, c0.z, r2.z, r3.w
-mov.f32f32 r0.w, r0.z
-mul.f r0.x, r0.x, r5.w
-mul.f r0.z, r2.x, r3.z
-mul.f r2.x, r2.w, r5.w
-mul.f r2.w, r3.y, r5.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-mul.f r3.y, r0.x, r2.y
-absneg.f r0.z, (neg)r0.z
-mul.f r5.w, c0.x, r2.x
-mad.f32 r3.y, r2.w, r2.z, (neg)r3.y
-mad.f32 r5.w, c0.y, r2.w, r5.w
-mov.f32f32 r6.x, r0.z
-mul.f r0.z, r2.x, r2.z
-mov.f32f32 r2.z, r3.y
-mov.f32f32 r3.y, r5.w
-mul.f r5.w, r6.x, r6.x
-mul.f r5.x, r5.x, r3.z
-mul.f r2.z, c0.x, r2.z
-mad.f32 r0.z, r0.x, r5.y, (neg)r0.z
-mad.f32 r0.x, c0.z, r0.x, r3.y
-mov.f32f32 r3.y, r5.x
-mul.f r2.w, r2.w, r5.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-absneg.f r3.y, (neg)r3.y
-mad.f32 r2.x, r2.x, r2.y, (neg)r2.w
-mad.f32 r0.z, c0.y, r0.z, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r3.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r3.w
-mul.f r0.y, r0.y, r3.z
-mad.f32 r0.z, c0.z, r2.x, r0.z
-mad.f32 r2.x, r2.w, r2.w, r5.w
-mul.f r2.z, r2.y, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mad.f32 r3.x, c7.z, r1.w, r3.x
-mad.f32 r3.y, c5.y, r1.y, r5.z
+mad.f32 r2.y, r2.y, r2.y, r2.w
+mad.f32 r0.y, r0.y, r3.z, r3.x
+mul.f r2.w, r3.w, r3.w
+mul.f r3.x, c4.w, r4.x
+mul.f r3.w, c4.z, r4.x
+mul.f r5.w, c4.y, r4.x
+mul.f r4.x, c4.x, r4.x
+rsq r2.y, r2.y
+(ss)mov.f32f32 r6.x, r2.y
+rsq r0.y, r0.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r2.x, r0.y
+mul.f r0.x, r0.x, r2.y
+mul.f r2.x, r5.y, r6.x
+mul.f r2.y, r5.z, r6.y
+mul.f r5.x, r5.x, r6.x
+absneg.f r5.y, (neg)r0.y
+mov.f32f32 r0.y, r2.x
+mov.f32f32 r5.z, r0.x
+absneg.f r2.y, (neg)r2.y
+mul.f r6.x, c0.x, r5.x
+mov.f32f32 r5.x, r5.x
+mul.f r6.z, r5.z, r0.z
+mov.f32f32 r6.w, r2.y
+mad.f32 r6.z, r0.y, r2.z, (neg)r6.z
+mov.f32f32 r7.x, r5.y
+mad.f32 r2.x, c0.y, r2.x, r6.x
+mul.f r2.z, r5.x, r2.z
+mul.f r6.x, c0.x, r6.z
+mad.f32 r2.z, r5.z, r0.w, (neg)r2.z
+mul.f r5.z, r7.x, r7.x
+mad.f32 r0.x, c0.z, r0.x, r2.x
+mad.f32 r2.x, r2.y, r6.w, r5.z
+mad.f32 r2.y, c0.y, r2.z, r6.x
+mul.f r0.y, r0.y, r0.w
+mul.f r0.w, r3.z, r6.y
+mad.f32 r0.y, r5.x, r0.z, (neg)r0.y
+mov.f32f32 r0.z, r0.x
+mad.f32 r2.z, c5.w, r4.y, r3.x
+mad.f32 r3.x, c5.z, r4.y, r3.w
+mad.f32 r0.y, c0.z, r0.y, r2.y
+absneg.f r2.y, (neg)r0.w
mov.f32f32 r3.z, r0.z
-absneg.f r0.y, (neg)r0.y
-mov.f32f32 r0.z, r3.x
-mad.f32 r3.x, c6.y, r1.z, r3.y
-mad.f32 r2.z, r3.z, r3.z, r2.z
-mov.f32f32 r3.y, r0.y
-mad.f32 r0.y, c7.y, r1.w, r3.x
-mad.f32 r1.x, c5.x, r1.y, r1.x
-mov.f32f32 r1.y, r2.z
-mad.f32 r2.x, r3.y, r3.y, r2.x
-mad.f32 r1.y, r0.x, r0.x, r1.y
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, c6.x, r1.z, r1.x
-mov.f32f32 r1.z, (0.000000)
+mad.f32 r0.z, c6.w, r4.z, r2.z
+mov.f32f32 r2.z, r0.y
+mov.f32f32 r5.x, r2.y
+mad.f32 r0.w, c7.w, r4.w, r0.z
+mad.f32 r0.z, c6.z, r4.z, r3.x
+mov.f32f32 r3.x, r2.z
+mad.f32 r2.y, r2.y, r5.x, r2.x
+mad.f32 r0.z, c7.z, r4.w, r0.z
+mad.f32 r2.x, c5.y, r4.y, r5.w
+mad.f32 r0.y, r0.y, r3.x, r2.w
+mad.f32 r2.x, c6.y, r4.z, r2.x
+mad.f32 r0.x, r0.x, r3.z, r0.y
+mad.f32 r0.y, c7.y, r4.w, r2.x
+mad.f32 r4.x, c5.x, r4.y, r4.x
+mov.f32f32 r3.w, (0.000000)
(rpt2)nop
-rsq r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-rsq r2.x, r2.x
-(ss)mov.f32f32 r3.x, r2.x
-mad.f32 r1.x, c7.x, r1.w, r1.x
-mov.f32f32 r3.w, r1.z
-mul.f r0.x, r0.x, r1.y
-mul.f r1.z, r3.z, r1.y
-mul.f r1.y, r2.y, r1.y
-mul.f r1.w, r3.y, r3.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.y, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r1.y
+rsq r0.x, r0.x
+(ss)mov.f32f32 r4.y, r0.x
+mul.f r2.x, r3.y, r0.x
+(ss)rsq r0.x, r2.y
+(ss)mov.f32f32 r5.z, r0.x
+mul.f r2.w, r5.y, r0.x
+mul.f r2.z, r3.z, r4.y
+(ss)mul.f r2.y, r3.x, r4.y
+mul.f r3.y, r5.x, r5.z
+mul.f r3.x, r6.w, r5.z
+mad.f32 r0.x, c6.x, r4.z, r4.x
nop
-mov.f32f32 r2.z, r0.x
-mov.f32f32 r2.y, r1.z
-(ss)mov.f32f32 r2.x, r1.y
-mov.f32f32 r0.x, r3.y
-mul.f r1.y, r2.w, r3.x
-mul.f r1.z, r6.x, r3.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r0.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r1.x
-mov.f32f32 r1.w, r4.w
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r1.y, r1.z
-(rpt1)nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r1.z, r4.z
-mov.f32f32 r1.y, r4.y
-mov.f32f32 r3.x, r1.x
-mov.f32f32 r1.x, r4.x
+mov.f32f32 r3.z, r3.y
+mad.f32 r0.x, c7.x, r4.w, r0.x
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:19) r3.x (5:20)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r4.x (0:0,cm=f,il=16,b=0)
-; VERT: 182 instructions, 0 half, 7 full
+; VERT: inputs: r4.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0)
+; VERT: 131 instructions, 0 half, 8 full
diff --git a/reference/stk/stk0600.asm b/reference/stk/stk0600.asm
index ff486a7..d610a0c 100644
--- a/reference/stk/stk0600.asm
+++ b/reference/stk/stk0600.asm
@@ -6,39 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
-bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 1, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt4)nop
-sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0
-(sy)mul.f r0.w, r0.w, r1.y
-mul.f r0.z, r0.z, r1.z
-mul.f r0.x, r0.y, r0.x
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt2)nop
+sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)mul.f r1.z, r1.z, r2.x
+mul.f r1.y, r1.y, r2.y
+(ss)mul.f r1.x, r1.x, r0.x
end
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
+; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 14 instructions, 0 half, 3 full
diff --git a/reference/stk/stk0601.asm b/reference/stk/stk0601.asm
index aadaff7..5f20141 100644
--- a/reference/stk/stk0601.asm
+++ b/reference/stk/stk0601.asm
@@ -1,20 +1,20 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r3.x) in0
+@in(r3.y) in1
+@in(r3.z) in2
+@in(r3.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
-@in(r0.w) in8
-@in(r1.x) in9
-@in(r1.y) in10
-@in(r1.z) in11
-@in(r3.x) in12
-@in(r3.y) in13
-@in(r3.z) in14
-@in(r3.w) in15
+@in(r1.x) in8
+@in(r1.y) in9
+@in(r1.z) in10
+@in(r1.w) in11
+@in(r2.x) in12
+@in(r2.y) in13
+@in(r2.z) in14
+@in(r2.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,135 +27,120 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r1.w, r2.x, c13.x
+@const(c20.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r3.x, c13.x
mul.f r4.x, r0.x, c4.x
-mad.f32 r1.w, c14.x, r2.y, r1.w
+mad.f32 r0.w, c14.x, r3.y, r0.w
mad.f32 r4.x, c4.y, r0.y, r4.x
-mad.f32 r1.w, c15.x, r2.z, r1.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r4.y, c16.x, r2.w, r1.w
-mov.f32f32 r1.w, r4.x
-mov.f32f32 r4.x, c10.z
+mad.f32 r0.w, c15.x, r3.z, r0.w
+mad.f32 r4.x, c4.z, r0.z, r4.x
+mad.f32 r0.w, c16.x, r3.w, r0.w
+mov.f32f32 r4.y, c10.z
mov.f32f32 r4.z, c10.y
-mul.f r4.w, r4.y, r4.y
-mul.f r5.x, r2.x, c13.y
-mad.f32 r5.y, c4.z, r0.z, r1.w
-mad.f32 r1.w, c14.y, r2.y, r5.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.w, c15.y, r2.z, r1.w
-mul.f r5.x, r5.y, c11.x
-mad.f32 r5.z, c16.y, r2.w, r1.w
-mul.f r1.w, r0.x, c5.x
-max.f r1.z, r1.z, c20.x
-mad.f32 r4.x, c8.z, r4.x, c9.z
-mad.f32 r4.w, r5.z, r5.z, r4.w
-mad.f32 r5.w, c5.y, r0.y, r1.w
-min.f r1.w, r1.z, c20.y
-add.f r1.z, c17.z, r4.x
-mov.f32f32 r4.x, r4.w
-mul.f r4.w, r2.x, c13.z
-mov.f32f32 r5.w, r5.w
-mad.f32 r4.w, c14.z, r2.y, r4.w
+mov.f32f32 r4.w, c10.x
+mul.f r5.x, r0.w, r0.w
+mul.f r5.y, r3.x, c13.y
+mul.f r5.z, r4.x, c11.x
+mad.f32 r5.y, c14.y, r3.y, r5.y
+mul.f r5.w, r0.x, c5.x
+mad.f32 r5.y, c15.y, r3.z, r5.y
+mad.f32 r5.w, c5.y, r0.y, r5.w
+mad.f32 r5.y, c16.y, r3.w, r5.y
mad.f32 r5.w, c5.z, r0.z, r5.w
-mad.f32 r4.w, c15.z, r2.z, r4.w
+mad.f32 r4.y, c8.z, r4.y, c9.z
mad.f32 r4.z, c8.y, r4.z, c9.y
-mad.f32 r4.w, c16.z, r2.w, r4.w
-mad.f32 r5.x, c11.y, r5.w, r5.x
-mov.f32f32 r6.x, c10.x
+mad.f32 r5.x, r5.y, r5.y, r5.x
+mul.f r6.x, r3.x, c13.z
+mad.f32 r5.z, c11.y, r5.w, r5.z
+mad.f32 r6.x, c14.z, r3.y, r6.x
mul.f r0.x, r0.x, c6.x
-mad.f32 r4.x, r4.w, r4.w, r4.x
-mov.f32f32 r5.x, r5.x
+mad.f32 r6.x, c15.z, r3.z, r6.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-add.f r0.y, c17.y, r4.z
-mad.f32 r4.z, c8.x, r6.x, c9.x
-mul.f r6.x, r2.x, c0.w
-mul.f r6.y, r2.x, c0.z
-rsq r4.x, (abs)r4.x
-(ss)mov.f32f32 r4.x, r4.x
-mov.f32f32 r0.x, r0.x
-add.f r4.z, c17.x, r4.z
+mad.f32 r0.y, c16.z, r3.w, r6.x
mad.f32 r0.x, c6.z, r0.z, r0.x
-mul.f r0.z, r4.y, r4.x
-mul.f r4.y, r5.z, r4.x
-mul.f r4.x, r4.w, r4.x
-mad.f32 r4.w, c11.z, r0.x, r5.x
-add.f r0.z, c11.x, (neg)r0.z
-add.f r4.y, c11.y, (neg)r4.y
-add.f r4.x, c11.z, (neg)r4.x
-max.f r5.x, r4.w, c20.x
-mul.f r5.z, r0.z, r0.z
-cmps.f.lt r4.w, (neg)r4.w, c20.x
-mad.f32 r5.z, r4.y, r4.y, r5.z
-mov.f32f32 r5.x, r5.x
-mul.f r1.x, c18.y, r1.x
-mul.f r0.w, c18.x, r0.w
-mov.f32f32 r5.z, r5.z
-mul.f r1.y, c18.z, r1.y
-mad.f32 r5.z, r4.x, r4.x, r5.z
-mad.f32 r1.x, r5.x, r1.x, r0.y
-mad.f32 r4.z, r5.x, r0.w, r4.z
-mad.f32 r0.y, c1.w, r2.y, r6.x
-mad.f32 r0.w, c1.z, r2.y, r6.y
-mul.f r6.x, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-rsq r5.z, (abs)r5.z
-(ss)mov.f32f32 r5.z, r5.z
-mad.f32 r1.y, r5.x, r1.y, r1.z
-mad.f32 r0.y, c2.w, r2.z, r0.y
-mad.f32 r1.z, c2.z, r2.z, r0.w
-mul.f r0.z, r0.z, r5.z
-mul.f r4.y, r4.y, r5.z
-mul.f r4.x, r4.x, r5.z
-mad.f32 r0.w, c3.w, r2.w, r0.y
-mul.f r0.y, r5.y, r0.z
-mad.f32 r0.z, c3.z, r2.w, r1.z
-mad.f32 r0.y, r5.w, r4.y, r0.y
-mad.f32 r1.z, c1.y, r2.y, r6.x
-mad.f32 r2.x, c1.x, r2.y, r2.x
-mov.f32f32 r2.y, c7.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c2.y, r2.z, r1.z
-mad.f32 r0.x, r0.x, r4.x, r0.y
-mad.f32 r0.y, c3.y, r2.w, r1.z
-mad.f32 r1.z, c2.x, r2.z, r2.x
-min.f r4.x, r2.y, c20.z
-max.f r4.y, r0.x, c20.x
-mad.f32 r0.x, c3.x, r2.w, r1.z
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
-nop
-log2 r1.z, r4.y
-(ss)mov.f32f32 r1.z, r1.z
-(rpt2)nop
-mul.f r1.z, r4.x, r1.z
+add.f r0.z, c17.z, r4.y
+add.f r4.y, c17.y, r4.z
+mad.f32 r4.z, r0.y, r0.y, r5.x
+mad.f32 r5.x, c11.z, r0.x, r5.z
+mad.f32 r4.w, c8.x, r4.w, c9.x
+mul.f r5.z, r3.x, c0.w
+mul.f r6.x, r3.x, c0.z
+mul.f r6.y, r3.x, c0.y
+mul.f r3.x, r3.x, c0.x
+rsq r4.z, (abs)r4.z
+(ss)mov.f32f32 r6.z, r4.z
+mul.f r0.y, r0.y, r4.z
+(ss)max.f r4.z, r5.x, c20.x
+add.f r4.w, c17.x, r4.w
+mul.f r0.w, r0.w, r6.z
+mul.f r5.y, r5.y, r6.z
+(rpt1)nop
+add.f r0.w, c11.x, (neg)r0.w
+add.f r5.y, c11.y, (neg)r5.y
+add.f r0.y, c11.z, (neg)r0.y
+mov.f32f32 r6.z, r4.z
+mul.f r6.w, r0.w, r0.w
+mul.f r1.z, c18.z, r1.z
+mad.f32 r6.w, r5.y, r5.y, r6.w
+mul.f r1.y, c18.y, r1.y
+mad.f32 r6.w, r0.y, r0.y, r6.w
+mad.f32 r1.z, r6.z, r1.z, r0.z
+mul.f r0.z, c18.x, r1.x
+cmps.f.lt r1.x, (neg)r5.x, c20.x
+mad.f32 r5.x, c1.w, r3.y, r5.z
+mad.f32 r5.z, c1.z, r3.y, r6.x
+mad.f32 r6.x, c1.y, r3.y, r6.y
+rsq r6.y, (abs)r6.w
+(ss)mov.f32f32 r6.w, r6.y
+mul.f r0.y, r0.y, r6.y
+mad.f32 r1.y, r6.z, r1.y, r4.y
+mad.f32 r4.y, r4.z, r0.z, r4.w
+mul.f r0.z, r0.w, r6.w
+mul.f r4.z, r5.y, r6.w
+mad.f32 r0.w, c2.w, r3.z, r5.x
+mad.f32 r4.w, c2.z, r3.z, r5.z
+mul.f r0.z, r4.x, r0.z
+mad.f32 r0.w, c3.w, r3.w, r0.w
+mad.f32 r4.x, r5.w, r4.z, r0.z
+mad.f32 r0.z, c3.z, r3.w, r4.w
+mad.f32 r0.x, r0.x, r0.y, r4.x
+mad.f32 r0.y, c2.y, r3.z, r6.x
+mad.f32 r3.x, c1.x, r3.y, r3.x
+max.f r1.w, r1.w, c20.x
+max.f r0.x, r0.x, c20.x
+mad.f32 r0.y, c3.y, r3.w, r0.y
+mad.f32 r3.x, c2.x, r3.z, r3.x
+min.f r1.w, r1.w, c20.y
+mov.f32f32 r3.y, c7.x
+(rpt1)nop
+log2 r3.z, r0.x
+(ss)mad.f32 r0.x, c3.x, r3.w, r3.x
+min.f r3.x, r3.y, c20.z
(rpt2)nop
-mov.f32f32 r1.z, r1.z
+(ss)mul.f r3.x, r3.x, r3.z
(rpt5)nop
-exp2 r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-(rpt2)nop
-sel.b32 r1.z, r1.z, r4.w, c20.x
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
-(rpt2)nop
-mov.f32f32 r1.z, r1.z
+exp2 r3.x, r3.x
+(ss)sel.b32 r1.x, r3.x, r1.x, c20.x
(rpt2)nop
-mad.f32 r1.y, c19.z, r1.z, r1.y
-mad.f32 r1.x, c19.y, r1.z, r1.x
-mad.f32 r1.z, c19.x, r1.z, r4.z
-nop
-max.f r1.y, r1.y, c20.x
+(ss)mov.f32f32 r3.x, r1.x
+mad.f32 r1.x, c19.x, r1.x, r4.y
+(rpt1)nop
+mov.f32f32 r3.x, r3.x
max.f r1.x, r1.x, c20.x
-max.f r3.x, r1.z, c20.x
+(rpt1)nop
+mad.f32 r1.z, c19.z, r3.x, r1.z
+mad.f32 r1.y, c19.y, r3.x, r1.y
+min.f r1.x, r1.x, c20.y
nop
-min.f r1.z, r1.y, c20.y
-min.f r1.y, r1.x, c20.y
-min.f r1.x, r3.x, c20.y
+max.f r1.z, r1.z, c20.x
+max.f r1.y, r1.y, c20.x
+(rpt1)nop
+min.f r1.z, r1.z, c20.y
+min.f r1.y, r1.y, c20.y
end
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r0.w (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0)
-; VERT: 144 instructions, 0 half, 7 full
+; VERT: inputs: r3.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r1.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0)
+; VERT: 124 instructions, 0 half, 7 full
diff --git a/reference/stk/stk0700.asm b/reference/stk/stk0700.asm
index ff486a7..d610a0c 100644
--- a/reference/stk/stk0700.asm
+++ b/reference/stk/stk0700.asm
@@ -6,39 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
-bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-bary.f r1.z, 1, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
+bary.f r2.x, 2, r0.x
+bary.f r2.y, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt4)nop
-sam.p (f32)(xyzw)r0.y, r1.w, s#0, t#0
-(sy)mul.f r0.w, r0.w, r1.y
-mul.f r0.z, r0.z, r1.z
-mul.f r0.x, r0.y, r0.x
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt2)nop
+sam.p (f32)(xyzw)r1.x, r0.z, s#0, t#0
+(sy)mul.f r1.z, r1.z, r2.x
+mul.f r1.y, r1.y, r2.y
+(ss)mul.f r1.x, r1.x, r0.x
end
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
+; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 14 instructions, 0 half, 3 full
diff --git a/reference/test.asm b/reference/test.asm
index bb0a0f9..9242510 100644
--- a/reference/test.asm
+++ b/reference/test.asm
@@ -7,31 +7,25 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
+@const(c1.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.s r0.z, hr0.x, 2
-bary.f r0.w, 2, r0.x
-bary.f r1.x, 1, r0.x
-bary.f (ei)r0.x, 3, r0.x
-add.s r0.y, r0.z, 1
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r1.x
-nop
-cov.s32f32 r0.y, r0.y
-mov.f32f32 r1.w, r0.x
-(rpt1)nop
-mov.f32f32 r0.x, r0.y
+bary.f r1.z, 2, r0.x
+bary.f r1.y, 1, r0.x
+bary.f (ei)r1.w, 3, r0.x
+add.s r0.x, r0.z, 1
+(rpt2)nop
+cov.s32f32 r0.x, r0.x
(rpt2)nop
max.f r0.x, r0.x, c0.y
(rpt2)nop
min.f r0.x, r0.x, c1.x
(rpt2)nop
-mul.f r0.x, c0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+mul.f r1.x, c0.x, r0.x
end
nop
nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r63.y (7:0,cm=f,il=8,b=0) r63.y (1:0,cm=f,il=8,b=1)
-; FRAG: 30 instructions, 1 half, 2 full
+; FRAG: inputs: r63.y (7:0,cm=f,il=8,b=0) r1.x (1:0,cm=f,il=8,b=1)
+; FRAG: 22 instructions, 1 half, 2 full
diff --git a/reference/test0.asm b/reference/test0.asm
index 74062e3..d96960a 100644
--- a/reference/test0.asm
+++ b/reference/test0.asm
@@ -6,39 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r1.y, r2.z
-mul.f r0.w, r2.y, r1.z
-mul.f r0.z, r2.x, r0.z
-mul.f r0.x, r1.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.y, r1.x
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 35 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/test1.asm b/reference/test1.asm
index 242def2..693d844 100644
--- a/reference/test1.asm
+++ b/reference/test1.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@@ -11,10 +11,10 @@
@in(r3.y) in9
@in(r3.z) in10
@in(r3.w) in11
-@in(r4.x) in12
-@in(r4.y) in13
-@in(r4.z) in14
-@in(r4.w) in15
+@in(r2.x) in12
+@in(r2.y) in13
+@in(r2.z) in14
+@in(r2.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -27,135 +27,120 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r0.w, r2.x, c12.x
-mul.f r1.x, r0.x, c4.x
-mad.f32 r0.w, c13.x, r2.y, r0.w
-mad.f32 r1.x, c4.y, r0.y, r1.x
-mad.f32 r0.w, c14.x, r2.z, r0.w
-mov.f32f32 r1.y, r3.w
-mad.f32 r0.w, c15.x, r2.w, r0.w
-mov.f32f32 r1.x, r1.x
-mul.f r1.z, r0.x, c5.x
+@const(c19.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.w, r1.x, c12.x
+mul.f r4.x, r0.x, c4.x
+mad.f32 r0.w, c13.x, r1.y, r0.w
+mad.f32 r4.x, c4.y, r0.y, r4.x
+mad.f32 r0.w, c14.x, r1.z, r0.w
+mad.f32 r4.x, c4.z, r0.z, r4.x
+mad.f32 r4.y, c15.x, r1.w, r0.w
+mul.f r0.w, r1.x, c12.y
+mul.f r4.z, r1.x, c12.z
+mul.f r4.w, r1.x, c0.w
+mul.f r5.x, r4.y, r4.y
+mad.f32 r0.w, c13.y, r1.y, r0.w
+mul.f r5.y, r4.x, c10.x
+mad.f32 r0.w, c14.y, r1.z, r0.w
+mul.f r5.z, r0.x, c5.x
+mad.f32 r5.w, c15.y, r1.w, r0.w
+mad.f32 r0.w, c5.y, r0.y, r5.z
+mad.f32 r4.z, c13.z, r1.y, r4.z
+mad.f32 r4.w, c1.w, r1.y, r4.w
+mad.f32 r5.x, r5.w, r5.w, r5.x
+mad.f32 r4.z, c14.z, r1.z, r4.z
+mad.f32 r5.z, c5.z, r0.z, r0.w
+mad.f32 r4.z, c15.z, r1.w, r4.z
+mad.f32 r0.w, c2.w, r1.z, r4.w
+mul.f r4.w, r1.x, c0.z
+mul.f r6.x, r1.x, c0.y
+mad.f32 r5.x, r4.z, r4.z, r5.x
+mad.f32 r5.y, c10.y, r5.z, r5.y
mul.f r0.x, r0.x, c6.x
-mul.f r1.w, r0.w, r0.w
-mul.f r3.w, r2.x, c12.y
-mad.f32 r1.x, c4.z, r0.z, r1.x
-mad.f32 r3.w, c13.y, r2.y, r3.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r3.w, c14.y, r2.z, r3.w
-mul.f r5.x, r1.x, c10.x
-mad.f32 r3.w, c15.y, r2.w, r3.w
-mad.f32 r1.z, c5.y, r0.y, r1.z
-max.f r1.y, r1.y, c19.x
+mad.f32 r0.w, c3.w, r1.w, r0.w
+mad.f32 r4.w, c1.z, r1.y, r4.w
+mad.f32 r6.x, c1.y, r1.y, r6.x
+mul.f r1.x, r1.x, c0.x
+rsq r5.x, (abs)r5.x
+(ss)mov.f32f32 r6.y, r5.x
+mul.f r4.z, r4.z, r5.x
mad.f32 r0.x, c6.y, r0.y, r0.x
-mad.f32 r0.y, r3.w, r3.w, r1.w
-mov.f32f32 r1.z, r1.z
-min.f r1.w, r1.y, c19.y
-mad.f32 r1.y, c5.z, r0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mul.f r1.z, r2.x, c12.z
-mov.f32f32 r0.x, r0.x
-mad.f32 r1.z, c13.z, r2.y, r1.z
-mad.f32 r5.x, c10.y, r1.y, r5.x
-mad.f32 r1.z, c14.z, r2.z, r1.z
+mad.f32 r0.y, c2.z, r1.z, r4.w
+mul.f r4.y, r4.y, r6.y
+mul.f r4.w, r5.w, r6.y
+(rpt1)nop
+add.f r4.y, c10.x, (neg)r4.y
+add.f r4.w, c10.y, (neg)r4.w
+add.f r4.z, c10.z, (neg)r4.z
mad.f32 r0.x, c6.z, r0.z, r0.x
-mad.f32 r0.z, c15.z, r2.w, r1.z
-mov.f32f32 r1.z, r5.x
-mul.f r5.x, r2.x, c0.w
-mul.f r5.y, r2.x, c0.z
-mad.f32 r0.y, r0.z, r0.z, r0.y
-mad.f32 r1.z, c10.z, r0.x, r1.z
-mad.f32 r5.x, c1.w, r2.y, r5.x
-mad.f32 r5.y, c1.z, r2.y, r5.y
-mul.f r5.z, r2.x, c0.y
-mul.f r2.x, r2.x, c0.x
-mul.f r5.w, c16.z, r3.z
+(ss)mul.f r5.x, r4.y, r4.y
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mad.f32 r0.y, r4.w, r4.w, r5.x
+mad.f32 r5.x, c10.z, r0.x, r5.y
+mad.f32 r0.y, r4.z, r4.z, r0.y
+mad.f32 r5.y, c2.y, r1.z, r6.x
+mad.f32 r1.x, c1.x, r1.y, r1.x
+max.f r1.y, r3.w, c19.x
+mul.f r3.w, c16.z, r3.z
+mul.f r5.w, c16.y, r3.y
+mul.f r6.x, c16.x, r3.x
rsq r0.y, (abs)r0.y
-(ss)mov.f32f32 r0.y, r0.y
-max.f r6.x, r1.z, c19.x
-cmps.f.lt r1.z, (neg)r1.z, c19.x
-mad.f32 r5.x, c2.w, r2.z, r5.x
-mul.f r0.w, r0.w, r0.y
-mul.f r3.w, r3.w, r0.y
-mul.f r0.y, r0.z, r0.y
-mov.f32f32 r6.x, r6.x
-add.f r0.z, c10.x, (neg)r0.w
-add.f r3.w, c10.y, (neg)r3.w
-add.f r0.y, c10.z, (neg)r0.y
-mad.f32 r0.w, c8.z, r3.z, c9.z
-mul.f r6.y, r0.z, r0.z
-mul.f r6.z, c16.y, r3.y
-mad.f32 r6.y, r3.w, r3.w, r6.y
-add.f r5.w, r5.w, r0.w
-mad.f32 r0.w, c8.y, r3.y, c9.y
-mul.f r6.w, c16.x, r3.x
-mov.f32f32 r6.y, r6.y
+(ss)mov.f32f32 r6.y, r0.y
+(ss)mul.f r0.y, r4.z, r0.y
+max.f r4.z, r5.x, c19.x
+mad.f32 r6.z, c8.x, r3.x, c9.x
+mul.f r4.y, r4.y, r6.y
+mul.f r4.w, r4.w, r6.y
+mov.f32f32 r6.y, r4.z
+mad.f32 r6.w, c8.y, r3.y, c9.y
+mul.f r4.x, r4.x, r4.y
+mad.f32 r4.y, c8.z, r3.z, c9.z
+mad.f32 r4.x, r5.z, r4.w, r4.x
+add.f r4.w, r5.w, r6.w
+mad.f32 r0.x, r0.x, r0.y, r4.x
+add.f r3.w, r3.w, r4.y
+mul.f r0.y, c17.y, r3.y
+add.f r3.y, r6.x, r6.z
+max.f r0.x, r0.x, c19.x
mul.f r3.z, c17.z, r3.z
-mad.f32 r6.y, r0.y, r0.y, r6.y
-add.f r6.z, r6.z, r0.w
-mad.f32 r7.x, c8.x, r3.x, c9.x
-mad.f32 r0.w, c3.w, r2.w, r5.x
-mad.f32 r5.x, c2.z, r2.z, r5.y
-mad.f32 r5.y, c1.y, r2.y, r5.z
-mad.f32 r2.x, c1.x, r2.y, r2.x
-rsq r2.y, (abs)r6.y
-(ss)mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r6.x, r3.z, r5.w
-mul.f r3.y, c17.y, r3.y
-add.f r5.z, r6.w, r7.x
-mul.f r0.z, r0.z, r2.y
-mul.f r3.w, r3.w, r2.y
-mul.f r0.y, r0.y, r2.y
-mad.f32 r3.y, r6.x, r3.y, r6.z
-mul.f r0.z, r1.x, r0.z
-mul.f r1.x, c17.x, r3.x
-mad.f32 r1.y, r1.y, r3.w, r0.z
-mad.f32 r0.z, c3.z, r2.w, r5.x
-mad.f32 r2.y, c2.y, r2.z, r5.y
-mad.f32 r2.x, c2.x, r2.z, r2.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r1.x, r6.x, r1.x, r5.z
-mad.f32 r1.y, r0.x, r0.y, r1.y
-mad.f32 r0.y, c3.y, r2.w, r2.y
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r3.x, c7.x
-max.f r1.y, r1.y, c19.x
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r2.z, r4.z
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r2.x, r4.x
+mad.f32 r4.x, r6.y, r0.y, r4.w
+mul.f r3.x, c17.x, r3.x
+cmps.f.lt r4.y, (neg)r5.x, c19.x
+mad.f32 r0.y, c3.y, r1.w, r5.y
+mad.f32 r1.x, c2.x, r1.z, r1.x
+log2 r1.z, r0.x
+mov.f32f32 r4.w, c7.x
+mad.f32 r3.z, r6.y, r3.z, r3.w
+mad.f32 r3.x, r4.z, r3.x, r3.y
+(ss)mad.f32 r0.x, c3.x, r1.w, r1.x
+min.f r1.x, r4.w, c19.z
+min.f r1.w, r1.y, c19.y
(rpt1)nop
-log2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-min.f r3.x, r3.x, c19.z
-(rpt2)nop
-mul.f r1.y, r3.x, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(ss)mul.f r1.x, r1.x, r1.z
(rpt5)nop
-exp2 r1.y, r1.y
-(ss)mov.f32f32 r1.y, r1.y
-(rpt2)nop
-sel.b32 r1.y, r1.y, r1.z, c19.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+exp2 r1.x, r1.x
+(ss)sel.b32 r1.x, r1.x, r4.y, c19.x
(rpt2)nop
+mov.f32f32 r1.y, r1.x
+mad.f32 r1.x, c18.x, r1.x, r3.x
+(rpt1)nop
mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mad.f32 r1.z, c18.z, r1.y, r3.z
-mad.f32 r3.x, c18.y, r1.y, r3.y
-mad.f32 r1.x, c18.x, r1.y, r1.x
-nop
-max.f r1.y, r1.z, c19.x
-max.f r3.x, r3.x, c19.x
max.f r1.x, r1.x, c19.x
-nop
-min.f r1.z, r1.y, c19.y
-min.f r1.y, r3.x, c19.y
+(rpt1)nop
+mad.f32 r1.z, c18.z, r1.y, r3.z
+mad.f32 r1.y, c18.y, r1.y, r4.x
min.f r1.x, r1.x, c19.y
+nop
+max.f r1.z, r1.z, c19.x
+max.f r1.y, r1.y, c19.x
+(rpt1)nop
+min.f r1.z, r1.z, c19.y
+min.f r1.y, r1.y, c19.y
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r4.x (0:0,cm=f,il=20,b=0)
-; VERT: 145 instructions, 0 half, 8 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=7,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0) r2.x (0:0,cm=f,il=20,b=0)
+; VERT: 121 instructions, 0 half, 7 full
diff --git a/reference/test2.asm b/reference/test2.asm
index d1549f6..ce47110 100644
--- a/reference/test2.asm
+++ b/reference/test2.asm
@@ -32,99 +32,97 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
+@const(c25.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000
+@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.f r0.x, r1.x, c12.x
-mul.f r0.y, r4.x, c4.x
+mul.f r0.y, r1.x, c12.y
mad.f32 r0.x, c13.x, r1.y, r0.x
-mad.f32 r0.y, c4.y, r4.y, r0.y
+mad.f32 r0.y, c13.y, r1.y, r0.y
mad.f32 r0.x, c14.x, r1.z, r0.x
-mul.f r0.z, r4.x, c5.x
-mad.f32 r0.x, c15.x, r1.w, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c5.y, r4.y, r0.z
-mul.f r0.w, r4.x, c6.x
-mul.f r3.x, r0.x, r0.x
-mul.f r3.y, r1.x, c12.y
-mad.f32 r6.x, c4.z, r4.z, r0.y
-mad.f32 r0.y, c13.y, r1.y, r3.y
-mov.f32f32 r0.z, r0.z
mad.f32 r0.y, c14.y, r1.z, r0.y
-mad.f32 r6.y, c5.z, r4.z, r0.z
-mad.f32 r3.y, c15.y, r1.w, r0.y
-mad.f32 r0.y, c6.y, r4.y, r0.w
+mad.f32 r0.x, c15.x, r1.w, r0.x
+mad.f32 r0.y, c15.y, r1.w, r0.y
mul.f r0.z, r1.x, c12.z
mul.f r0.w, r1.x, c0.w
-mad.f32 r3.x, r3.y, r3.y, r3.x
-mov.f32f32 r0.y, r0.y
+mul.f r3.x, r0.x, r0.x
mad.f32 r0.z, c13.z, r1.y, r0.z
-mad.f32 r0.w, c1.w, r1.y, r0.w
-mov.f32f32 r3.x, r3.x
+mad.f32 r3.x, r0.y, r0.y, r3.x
mad.f32 r0.z, c14.z, r1.z, r0.z
-mad.f32 r6.z, c6.z, r4.z, r0.y
-mad.f32 r3.z, c15.z, r1.w, r0.z
-mad.f32 r0.y, c2.w, r1.z, r0.w
-mul.f r0.z, r1.x, c0.z
-mul.f r3.w, r1.x, c0.y
-mad.f32 r3.x, r3.z, r3.z, r3.x
-mad.f32 r0.w, c3.w, r1.w, r0.y
-mad.f32 r0.y, c1.z, r1.y, r0.z
-mad.f32 r0.z, c1.y, r1.y, r3.w
+mad.f32 r0.w, c1.w, r1.y, r0.w
+mad.f32 r0.z, c15.z, r1.w, r0.z
+mad.f32 r0.w, c2.w, r1.z, r0.w
+mul.f r3.y, r1.x, c0.z
+mul.f r3.z, r1.x, c0.y
+mad.f32 r3.x, r0.z, r0.z, r3.x
+mad.f32 r0.w, c3.w, r1.w, r0.w
+mad.f32 r3.y, c1.z, r1.y, r3.y
+mad.f32 r3.z, c1.y, r1.y, r3.z
mul.f r3.w, r1.x, c0.x
-mad.f32 r0.y, c2.z, r1.z, r0.y
-mad.f32 r6.w, c2.y, r1.z, r0.z
-rsq r0.z, (abs)r3.x
-(ss)mov.f32f32 r3.x, r0.z
-mad.f32 r0.z, c3.z, r1.w, r0.y
-mad.f32 r0.y, c3.y, r1.w, r6.w
-mad.f32 r3.w, c1.x, r1.y, r3.w
-mul.f r6.w, r0.x, r3.x
-mul.f r7.x, r3.y, r3.x
-mul.f r7.y, r3.z, r3.x
-mad.f32 r0.x, c2.x, r1.z, r3.w
-mul.f r3.x, r6.x, r6.w
+mul.f r6.x, r4.x, c4.x
+mul.f r6.y, r4.x, c5.x
+rsq r3.x, (abs)r3.x
+(ss)mov.f32f32 r6.z, r3.x
+mul.f r6.w, r0.z, r3.x
+mad.f32 r0.z, c2.z, r1.z, r3.y
+(ss)mad.f32 r3.x, c2.y, r1.z, r3.z
+mul.f r7.x, r0.x, r6.z
+mad.f32 r0.x, c4.y, r4.y, r6.x
+mul.f r6.x, r0.y, r6.z
+mad.f32 r6.z, c4.z, r4.z, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.z
+mad.f32 r0.y, c3.y, r1.w, r3.x
+mad.f32 r0.x, c1.x, r1.y, r3.w
+mul.f r3.x, r6.z, r7.x
+mad.f32 r3.y, c5.y, r4.y, r6.y
+mad.f32 r0.x, c2.x, r1.z, r0.x
+mad.f32 r6.y, c5.z, r4.z, r3.y
mad.f32 r0.x, c3.x, r1.w, r0.x
-mad.f32 r3.x, r6.y, r7.x, r3.x
+mul.f r3.y, r4.x, c6.x
add.f r3.w, r2.w, r5.w
+mad.f32 r5.w, r6.y, r6.x, r3.x
+mad.f32 r2.w, c6.y, r4.y, r3.y
add.f r3.z, r2.z, r5.z
add.f r3.y, r2.y, r5.y
-mov.f32f32 r2.y, r3.x
+mov.f32f32 r2.y, r5.w
+mad.f32 r5.y, c6.z, r4.z, r2.w
add.f r3.x, r2.x, r5.x
-mad.f32 r2.x, r6.z, r7.y, r2.y
-mad.f32 r2.y, r6.z, r7.y, r2.y
add.f r2.w, r1.w, r4.w
add.f r2.z, r1.z, r4.z
-mad.f32 r1.z, (neg)r2.x, r6.x, r6.w
-mad.f32 r1.w, (neg)r2.y, r6.y, r7.x
+mad.f32 r1.z, r5.y, r6.w, r2.y
+mad.f32 r1.w, r5.y, r6.w, r5.w
add.f r2.y, r1.y, r4.y
add.f r2.x, r1.x, r4.x
-add.f r1.x, r1.z, c25.y
-add.f r1.y, r1.w, c25.y
+mad.f32 r1.x, (neg)r1.z, r6.z, r7.x
+mad.f32 r1.y, (neg)r1.w, r6.y, r6.x
mov.f32f32 r4.x, r4.z
mov.f32f32 r4.y, r4.w
-mul.f r1.x, r1.x, c25.x
-mul.f r1.y, r1.y, c25.x
+add.f r1.z, r1.x, c25.y
+add.f r1.w, r1.y, c25.y
+(rpt1)nop
+mul.f r1.z, r1.z, c25.x
+mul.f r1.w, r1.w, c25.x
(rpt1)nop
-mad.f32 r1.x, r1.z, r1.x, c25.x
-mad.f32 r1.y, r1.w, r1.y, c25.x
+mad.f32 r1.x, r1.x, r1.z, c25.x
+mad.f32 r1.y, r1.y, r1.w, c25.x
(rpt1)nop
mul.f r1.z, r1.x, c21.w
mul.f r1.w, r1.x, c21.z
mad.f32 r1.z, c22.w, r1.y, r1.z
mad.f32 r1.w, c22.z, r1.y, r1.w
mad.f32 r1.z, c23.w, r4.x, r1.z
-mad.f32 r4.z, c23.z, r4.x, r1.w
+mad.f32 r5.x, c23.z, r4.x, r1.w
mad.f32 r1.w, c24.w, r4.y, r1.z
-mad.f32 r1.z, c24.z, r4.y, r4.z
-mul.f r4.z, r1.x, c21.y
+mad.f32 r1.z, c24.z, r4.y, r5.x
+mul.f r5.x, r1.x, c21.y
mul.f r1.x, r1.x, c21.x
-mad.f32 r4.z, c22.y, r1.y, r4.z
+mad.f32 r5.x, c22.y, r1.y, r5.x
mad.f32 r1.x, c22.x, r1.y, r1.x
-mad.f32 r1.y, c23.y, r4.x, r4.z
-mad.f32 r1.x, c23.x, r4.x, r1.x
+mad.f32 r1.y, c23.y, r4.x, r5.x
+mad.f32 r1.x, c23.x, r4.z, r1.x
mad.f32 r1.y, c24.y, r4.y, r1.y
-mad.f32 r1.x, c24.x, r4.y, r1.x
+mad.f32 r1.x, c24.x, r4.w, r1.x
end
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:0) r2.x (5:0) r3.x (5:0)
; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r4.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) r5.x (0:0,cm=f,il=20,b=0)
-; VERT: 93 instructions, 0 half, 8 full
+; VERT: 91 instructions, 0 half, 8 full
diff --git a/reference/test3.asm b/reference/test3.asm
index f75c5c1..fee7419 100644
--- a/reference/test3.asm
+++ b/reference/test3.asm
@@ -9,29 +9,26 @@
@out(r0.y) out1
@out(r0.z) out2
@out(r0.w) out3
-(sy)(ss)mul.f r1.w, r0.x, c12.x
-mul.f r2.x, r0.x, c0.x
-mul.f r0.y, r0.w, c6.x
+@const(c25.x) 0x3f000000, 0x00000000, 0x3f800000, 0x00000000
+@const(c26.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.y, r0.x, c12.x
+mul.f r1.w, r0.x, c0.x
+mul.f r0.z, r0.w, c6.x
mul.f r0.x, r0.x, c0.y
-mul.f r0.w, r1.w, r1.w
-mul.f r0.z, r2.x, c10.x
-mad.f32 r1.x, c6.y, r1.x, r0.y
-mad.f32 r0.x, c10.y, r0.x, r0.z
-mov.f32f32 r1.z, r1.z
+mul.f r0.w, r0.y, r0.y
+mul.f r2.x, r1.w, c10.x
+mad.f32 r0.z, c6.y, r1.x, r0.z
+(rpt3)nop
+rsq r0.w, (abs)r0.w
+(ss)mul.f r0.w, r0.y, r0.w
+mad.f32 r1.x, c10.y, r0.x, r2.x
+mad.f32 r1.y, c6.z, r1.y, r0.z
mov.f32f32 r0.z, c25.x
mov.f32f32 r0.y, c25.x
-rsq r0.w, (abs)r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.y, r0.x
-(rpt1)nop
-mul.f r0.w, r1.w, r0.w
-mov.f32f32 r1.x, r1.x
mov.f32f32 r0.x, c25.x
-mad.f32 r1.x, c6.z, r1.y, r1.x
+mad.f32 r1.x, c10.z, r1.y, r1.x
(rpt2)nop
-mad.f32 r1.x, c10.z, r1.x, r2.y
-(rpt2)nop
-mad.f32 r0.w, (neg)r1.x, r2.x, r0.w
+mad.f32 r0.w, (neg)r1.x, r1.w, r0.w
(rpt2)nop
add.f r1.x, r0.w, c25.y
(rpt2)nop
@@ -45,7 +42,8 @@ mad.f32 r0.w, c24.w, r1.z, r0.w
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0)
; VERT: inputs: r0.x (0:0,cm=1,il=8,b=0) r0.w (0:0,cm=f,il=12,b=0) r63.w (0:0,cm=0,il=16,b=0) r63.w (0:0,cm=0,il=20,b=0)
-; VERT: 47 instructions, 0 half, 3 full
+; VERT: 42 instructions, 0 half, 3 full
diff --git a/reference/testN.asm b/reference/testN.asm
index 4241e5c..5c49d2b 100644
--- a/reference/testN.asm
+++ b/reference/testN.asm
@@ -1,73 +1,48 @@
; options:
-; VERT: new compiler
+; VERT: TGSI compiler
@in(r0.x) in0
@in(r0.y) in1
@in(r0.z) in2
-@out(r0.x) out0
-@out(r0.y) out1
-@out(r0.z) out2
-@out(r0.w) out3
-@out(r1.x) out4
-@out(r1.y) out5
-@out(r1.z) out6
-@out(r1.w) out7
+; in3 unused
+@out(r2.y) out0
+@out(r2.z) out1
+@out(r2.w) out2
+@out(r3.x) out3
+@out(r1.y) out4
+@out(r1.z) out5
+@out(r1.w) out6
+@out(r2.x) out7
+@const(c4.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mad.f32 r0.w, c4.x, r0.x, c4.x
-mov.f32f32 r1.x, r0.y
+mad.f32 r1.x, c4.x, r0.y, c4.x
+mov.f32f32 r2.x, (0.000000)
+mov.f32f32 r1.w, r0.y
mov.f32f32 r1.y, r0.x
-mad.f32 r1.z, c4.x, r0.y, c4.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r2.x, r0.w
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r2.y, r1.w
-mov.f32f32 r0.w, (0.000000)
-(rpt1)nop
-mul.f r1.y, c0.w, r0.x
+mul.f r2.y, c0.w, r0.x
mul.f r2.z, c0.z, r0.x
mul.f r2.w, c0.y, r0.x
-sam (f32)(x)r3.x, r2.x, s#0, t#0
-(sy)add.f r0.z, r0.z, r3.x
-mov.f32f32 r1.w, r0.w
+sam (f32)(x)r0.w, r0.w, s#0, t#0
+(sy)add.f r1.z, r0.z, r0.w
mul.f r0.x, c0.x, r0.x
-nop
-(ss)mov.f32f32 r2.x, r0.z
-(rpt2)nop
-mad.f32 r0.z, c1.w, r2.x, r1.y
-mad.f32 r0.w, c1.z, r2.x, r2.z
-mad.f32 r1.y, c1.y, r2.x, r2.w
-mad.f32 r0.x, c1.x, r2.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
+(rpt1)nop
+mad.f32 r0.z, c1.w, r1.z, r2.y
+(ss)mad.f32 r0.w, c1.z, r1.z, r2.z
mad.f32 r0.z, c2.w, r0.y, r0.z
mad.f32 r0.w, c2.z, r0.y, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r0.x
-add.f r0.z, r0.z, c3.w
-add.f r2.y, r0.w, c3.z
-mad.f32 r1.y, c2.y, r0.y, r1.y
+mad.f32 r1.x, c1.y, r1.z, r2.w
+mad.f32 r0.x, c1.x, r1.z, r0.x
+add.f r3.x, r0.z, c3.w
+add.f r2.w, r0.w, c3.z
+mad.f32 r0.z, c2.y, r0.y, r1.x
mad.f32 r0.x, c2.x, r0.y, r0.x
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r2.y
-add.f r0.y, r1.y, c3.y
-add.f r0.x, r0.x, c3.x
-mov.f32f32 r1.y, r2.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
-(rpt2)nop
-mov.f32f32 r1.y, r1.y
+(rpt1)nop
+add.f r2.z, r0.z, c3.y
+add.f r2.y, r0.x, c3.x
end
-; VERT: outputs: r0.x (0:0) r1.x (5:20)
+nop
+nop
+; VERT: outputs: r2.y (0:0) r1.y (5:20)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0)
-; VERT: 63 instructions, 0 half, 4 full
+; VERT: 28 instructions, 0 half, 4 full
+; pos: r2.y
diff --git a/reference/tex-clamp0.asm b/reference/tex-clamp0.asm
index 74062e3..d96960a 100644
--- a/reference/tex-clamp0.asm
+++ b/reference/tex-clamp0.asm
@@ -6,39 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r1.y, r2.z
-mul.f r0.w, r2.y, r1.z
-mul.f r0.z, r2.x, r0.z
-mul.f r0.x, r1.w, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.y, r1.x
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 35 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/tex-clamp1.asm b/reference/tex-clamp1.asm
index 91aaa13..a0c4c2c 100644
--- a/reference/tex-clamp1.asm
+++ b/reference/tex-clamp1.asm
@@ -6,39 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 7, r0.x
bary.f r0.w, 4, r0.x
mov.f32f32 r1.x, (0.000000)
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r1.x, 2, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.w
-bary.f r0.z, 1, r0.x
+bary.f r1.z, 2, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.z, r1.z, s#0, t#0
-(sy)mul.f r0.y, r1.y, r2.y
-mul.f r0.w, r2.x, r1.x
-mul.f r0.z, r1.w, r0.z
-mul.f r0.x, r1.z, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.y, r1.x
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r63.w (5:0,cm=f,il=12,b=1)
-; FRAG: 34 instructions, 0 half, 3 full
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/twoside-frag.asm b/reference/twoside-frag.asm
index 0272f05..19cf964 100644
--- a/reference/twoside-frag.asm
+++ b/reference/twoside-frag.asm
@@ -7,6 +7,8 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x3f800000, 0x3f4ccccd, 0x00000000, 0x00000000
+@const(c2.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mul.s r0.z, hr0.x, 2
absneg.f r0.w, (neg)c0.x
bary.f r1.x, 15, r0.x
@@ -19,71 +21,51 @@ cov.s32f32 r0.z, r0.z
bary.f r2.x, 6, r0.x
bary.f r2.y, 14, r0.x
bary.f r2.z, 2, r0.x
-mov.f32f32 r2.w, r0.z
+max.f r2.w, r0.z, c1.z
cmps.f.lt r3.x, r0.z, c1.z
cmps.f.lt r3.y, r0.z, c1.z
cmps.f.lt r3.z, r0.z, c1.z
-max.f r2.w, r2.w, c1.z
+min.f r2.w, r2.w, c1.x
sel.b32 r1.x, r1.x, r3.x, r1.y
-sel.b32 r1.y, r1.w, r3.y, r1.z
-sel.b32 r1.z, r2.y, r3.z, r2.x
-min.f r1.w, r2.w, c1.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
-add.f r0.w, r0.w, r1.w
+sel.b32 r1.y, r2.y, r3.y, r2.x
+bary.f r2.x, 13, r0.x
+add.f r0.w, r0.w, r2.w
mul.f r1.x, c1.y, r1.x
-mov.f32f32 r1.y, r1.y
-mul.f r1.z, c1.y, r1.z
-mov.f32f32 r0.w, r0.w
-cmps.f.lt r1.w, r0.z, c1.z
-cmps.f.lt r2.x, r0.z, c1.z
-cmps.f.lt r2.y, r0.z, c1.z
+mul.f r1.y, c1.y, r1.y
+bary.f r2.y, 5, r0.x
max.f r0.w, r0.w, c1.z
-bary.f r2.w, 10, r0.x
-bary.f r3.x, 13, r0.x
-bary.f r3.y, 9, r0.x
+cmps.f.lt r2.w, r0.z, c1.z
+cmps.f.lt r3.x, r0.z, c1.z
+cmps.f.lt r3.y, r0.z, c1.z
min.f r0.w, r0.w, c1.x
-sel.b32 r1.w, r2.w, r1.w, r2.z
-bary.f r2.z, 5, r0.x
-bary.f r2.w, 1, r0.x
+sel.b32 r2.x, r2.x, r3.z, r2.y
+bary.f r2.y, 12, r0.x
+sel.b32 r1.z, r1.w, r3.x, r1.z
cmps.f.ne r0.w, r0.w, c1.z
-mov.f32f32 r1.w, r1.w
-cmps.f.lt r3.z, r0.z, c1.z
-sel.b32 r2.y, r3.y, r2.y, r2.w
-sel.b32 r1.x, r1.y, r0.w, r1.x
-mov.f32f32 r1.y, r1.w
-bary.f r2.w, 8, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.x
-sel.b32 r1.x, r1.y, r0.w, r1.z
-bary.f r1.y, 0, r0.x
-mov.f32f32 r2.y, r2.y
-sel.b32 r2.x, r3.x, r2.x, r2.z
-mov.f32f32 r1.z, r1.x
-sel.b32 r1.x, r2.w, r3.z, r1.y
+bary.f r3.x, 10, r0.x
+mul.f r2.x, c1.y, r2.x
+bary.f r3.z, 4, r0.x
+sel.b32 r1.w, r1.z, r0.w, r1.x
+sel.b32 r1.x, r3.x, r3.y, r2.z
+cmps.f.lt r2.z, r0.z, c1.z
+sel.b32 r2.y, r2.y, r2.w, r3.z
+bary.f r2.w, 9, r0.x
+sel.b32 r1.z, r1.x, r0.w, r1.y
+bary.f r1.x, 1, r0.x
+mul.f r2.y, c1.y, r2.y
+(rpt1)nop
+sel.b32 r1.x, r2.w, r2.z, r1.x
cmps.f.lt r0.z, r0.z, c1.z
-mov.f32f32 r1.y, r2.x
-bary.f r2.x, 12, r0.x
-mov.f32f32 r1.x, r1.x
-bary.f (ei)r0.x, 4, r0.x
-mul.f r0.y, c1.y, r1.y
+bary.f r2.z, 8, r0.x
+bary.f (ei)r0.x, 0, r0.x
+sel.b32 r1.y, r1.x, r0.w, r2.x
+(rpt1)nop
+sel.b32 r0.x, r2.z, r0.z, r0.x
(rpt2)nop
-sel.b32 r0.y, r2.y, r0.w, r0.y
-mov.f32f32 r1.x, r1.x
-sel.b32 r0.x, r2.x, r0.z, r0.x
-nop
-mov.f32f32 r1.y, r0.y
-nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mul.f r0.x, c1.y, r0.x
-(rpt2)nop
-sel.b32 r0.x, r1.x, r0.w, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+sel.b32 r1.x, r0.x, r0.w, r2.y
end
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.w (1:0,cm=f,il=8,b=1) r0.z (1:1,cm=f,il=12,b=1) r63.y (7:0,cm=f,il=16,b=0) r1.x (2:0,cm=f,il=16,b=1) r0.y (2:1,cm=f,il=20,b=1)
-; FRAG: 84 instructions, 1 half, 4 full
+; FRAG: 59 instructions, 1 half, 4 full
diff --git a/reference/twoside-vert.asm b/reference/twoside-vert.asm
index a3e6d93..9eebb41 100644
--- a/reference/twoside-vert.asm
+++ b/reference/twoside-vert.asm
@@ -8,10 +8,10 @@
@in(r4.y) in5
@in(r4.z) in6
@in(r4.w) in7
-@in(r0.x) in8
-@in(r0.y) in9
-@in(r0.z) in10
-@in(r0.w) in11
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -28,83 +28,52 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mov.f32f32 r2.x, r0.w
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r2.w, r0.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.w, r2.w
-max.f r2.x, r2.x, c5.x
-max.f r2.y, r2.y, c5.x
-max.f r2.z, r2.z, c5.x
-max.f r2.w, r2.w, c5.x
-min.f r3.w, r2.x, c5.y
-min.f r3.z, r2.y, c5.y
-min.f r3.y, r2.z, c5.y
-min.f r3.x, r2.w, c5.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c5.x
-max.f r0.z, r0.z, c5.x
-max.f r0.y, r0.y, c5.x
-max.f r0.x, r0.x, c5.x
-min.f r2.w, r0.w, c5.y
-min.f r2.z, r0.z, c5.y
-min.f r2.y, r0.y, c5.y
-min.f r2.x, r0.x, c5.y
-mul.f r0.x, c1.w, r1.x
+@const(c5.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, c1.w, r1.x
mul.f r0.y, c1.z, r1.x
mad.f32 r0.x, c2.w, r1.y, r0.x
mad.f32 r0.y, c2.z, r1.y, r0.y
mad.f32 r0.x, c3.w, r1.z, r0.x
mad.f32 r0.y, c3.z, r1.z, r0.y
-mad.f32 r0.x, c4.w, r1.w, r0.x
-mad.f32 r0.y, c4.z, r1.w, r0.y
-mul.f r5.x, c1.y, r1.x
-mul.f r1.x, c1.x, r1.x
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.x, c2.y, r1.y, r5.x
-mad.f32 r0.y, c2.x, r1.y, r1.x
+mad.f32 r0.w, c4.w, r1.w, r0.x
+mad.f32 r0.z, c4.z, r1.w, r0.y
+mul.f r0.x, c1.y, r1.x
+mul.f r0.y, c1.x, r1.x
+mad.f32 r0.x, c2.y, r1.y, r0.x
+mad.f32 r0.y, c2.x, r1.y, r0.y
mad.f32 r0.x, c3.y, r1.z, r0.x
-mad.f32 r0.y, c3.x, r1.z, r0.y
-mad.f32 r0.x, c4.y, r1.w, r0.x
-mad.f32 r1.x, c4.x, r1.w, r0.y
-mov.f32f32 r1.y, r4.w
-mov.f32f32 r1.z, r4.z
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.x
-max.f r1.x, r1.y, c5.x
-max.f r1.y, r1.z, c5.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.x, r4.x
+mad.f32 r1.x, c3.x, r1.z, r0.y
+mad.f32 r0.y, c4.y, r1.w, r0.x
+mad.f32 r0.x, c4.x, r1.w, r1.x
+max.f r1.x, r2.w, c5.x
+max.f r1.y, r2.z, c5.x
+max.f r1.z, r2.y, c5.x
+max.f r1.w, r2.x, c5.x
+min.f r3.w, r1.x, c5.y
+min.f r3.z, r1.y, c5.y
+min.f r3.y, r1.z, c5.y
+min.f r3.x, r1.w, c5.y
+max.f r1.x, r2.w, c5.x
+max.f r1.y, r2.z, c5.x
+max.f r1.z, r2.y, c5.x
+max.f r1.w, r2.x, c5.x
+min.f r2.w, r1.x, c5.y
+min.f r2.z, r1.y, c5.y
+min.f r2.y, r1.z, c5.y
+min.f r2.x, r1.w, c5.y
+max.f r1.x, r4.w, c5.x
+max.f r1.y, r4.z, c5.x
+max.f r4.y, r4.y, c5.x
+max.f r4.x, r4.x, c5.x
min.f r1.w, r1.x, c5.y
min.f r1.z, r1.y, c5.y
-max.f r1.x, r4.y, c5.x
-max.f r4.x, r4.x, c5.x
-(rpt1)nop
-min.f r1.y, r1.x, c5.y
+min.f r1.y, r4.y, c5.y
min.f r1.x, r4.x, c5.y
end
nop
nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (1:1) r3.x (2:0)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r4.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0)
-; VERT: 75 instructions, 0 half, 6 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r4.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 41 instructions, 0 half, 5 full
diff --git a/reference/vs-op-neg-int.asm b/reference/vs-op-neg-int.asm
index 001aeed..b677421 100644
--- a/reference/vs-op-neg-int.asm
+++ b/reference/vs-op-neg-int.asm
@@ -1,9 +1,9 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
+@in(r0.x) in0
+@in(r0.y) in1
+@in(r0.z) in2
+@in(r0.w) in3
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -12,35 +12,28 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)absneg.f r0.x, (neg)c0.x
-mov.f32f32 r2.x, c2.y
-mov.f32f32 r2.y, c2.y
+@const(c2.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)absneg.f r1.x, (neg)c0.x
+mov.f32f32 r1.y, c2.y
+mov.f32f32 r1.z, c2.y
+mov.f32f32 r2.x, c2.x
+cmps.f.eq r1.x, r1.x, c1.x
+mov.f32f32 r2.y, c2.x
mov.f32f32 r2.z, c2.x
-cmps.f.eq r0.x, r0.x, c1.x
-mov.f32f32 r2.w, c2.x
-mov.f32f32 r3.x, c2.x
-mov.f32f32 r3.y, c2.y
-cov.u32f32 r0.x, r0.x
-mov.f32f32 r3.z, c2.y
-mov.f32f32 r3.w, c2.x
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r0.z, r1.z
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r0.x, r1.x
-cmps.f.ne r1.x, r1.w, c2.x
+mov.f32f32 r2.w, c2.y
+cov.u32f32 r1.x, r1.x
+mov.f32f32 r3.x, c2.y
+mov.f32f32 r3.y, c2.x
+nop
+cmps.f.ne r1.x, r1.x, c2.x
(rpt2)nop
-sel.b32 r1.y, r2.y, r1.x, r2.x
-sel.b32 r1.z, r2.w, r1.x, r2.z
-sel.b32 r2.x, r3.y, r1.x, r3.x
-sel.b32 r1.x, r3.w, r1.x, r3.z
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.y, r2.x
-mov.f32f32 r1.x, r1.x
+sel.b32 r1.w, r1.z, r1.x, r1.y
+sel.b32 r1.z, r2.y, r1.x, r2.x
+sel.b32 r1.y, r2.w, r1.x, r2.z
+sel.b32 r1.x, r3.y, r1.x, r3.x
end
nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0)
-; VERT: 29 instructions, 0 half, 4 full
+; VERT: inputs: r0.x (0:0,cm=f,il=8,b=0)
+; VERT: 21 instructions, 0 half, 4 full
diff --git a/reference/webgl-blob-frag.asm b/reference/webgl-blob-frag.asm
index 453a8e7..aba1ca9 100644
--- a/reference/webgl-blob-frag.asm
+++ b/reference/webgl-blob-frag.asm
@@ -1,32 +1,27 @@
; options:
-; FRAG: new compiler
+; FRAG: TGSI compiler
@in(r0.x) in0
@in(r0.y) in1
@out(r1.x) out0
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, c0.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.y
-nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
-(rpt5)nop
-sam (f32)(xyz)r0.x, r0.y, s#0, t#0
-(sy)mov.f32f32 r0.x, r0.x
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r1.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r1.w, c0.x
+(rpt4)nop
+sam (f32)(xyz)r0.x, r0.z, s#0, t#0
+(sy)mov.f32f32 r1.z, r0.x
mov.f32f32 r1.y, r0.y
mov.f32f32 r1.x, r0.z
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 25 instructions, 0 half, 2 full
+; FRAG: 13 instructions, 0 half, 2 full
+; pos (bary): r0.x
+; color: r1.x
diff --git a/reference/webgl-water/webgl-water-13.asm b/reference/webgl-water/webgl-water-13.asm
index 2c03e4f..284e180 100644
--- a/reference/webgl-water/webgl-water-13.asm
+++ b/reference/webgl-water/webgl-water-13.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/webgl-water/webgl-water-14.asm b/reference/webgl-water/webgl-water-14.asm
index 9bf6d4e..1c04ab7 100644
--- a/reference/webgl-water/webgl-water-14.asm
+++ b/reference/webgl-water/webgl-water-14.asm
@@ -6,35 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-mov.f32f32 r0.w, c1.x
-bary.f (ei)r0.x, 1, r0.x
-nop
-mov.f32f32 r0.y, r0.z
-mul.f r0.z, r0.w, c0.x
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r1.x, r0.x
-(rpt5)nop
-sam (f32)(xyz)r0.x, r0.w, s#0, t#0
-(sy)mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.z, r0.z, c0.x
-mul.f r0.y, r0.y, c0.x
-mul.f r0.x, r0.x, c0.x
-nop
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-(ss)mov.f32f32 r1.x, r0.x
+bary.f (ei)r0.w, 1, r0.x
+mov.f32f32 r0.x, c1.x
+(rpt4)nop
+sam (f32)(xyz)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.z, r0.w, c0.x
+mul.f r1.y, r0.z, c0.x
+mul.f r1.x, r0.y, c0.x
+mul.f r1.w, r0.x, c0.x
end
nop
nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 30 instructions, 0 half, 2 full
+; FRAG: 14 instructions, 0 half, 2 full
diff --git a/reference/webgl-water/webgl-water-18.asm b/reference/webgl-water/webgl-water-18.asm
index d3161d5..4538b69 100644
--- a/reference/webgl-water/webgl-water-18.asm
+++ b/reference/webgl-water/webgl-water-18.asm
@@ -6,31 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mul.f r0.w, r0.w, c0.x
-mul.f r0.x, r0.x, c0.x
-(ss)mul.f r0.y, r0.y, c0.x
-mul.f r0.z, r0.z, c0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
+sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
+(sy)mul.f r1.w, r0.w, c0.x
+mul.f r1.z, r0.x, c0.x
+mul.f r1.y, r0.y, c0.x
+mul.f r1.x, r0.z, c0.x
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 30 instructions, 0 half, 2 full
+; FRAG: 14 instructions, 0 half, 2 full
diff --git a/reference/webgl-water/webgl-water-20.asm b/reference/webgl-water/webgl-water-20.asm
index a2747d4..52b9c56 100644
--- a/reference/webgl-water/webgl-water-20.asm
+++ b/reference/webgl-water/webgl-water-20.asm
@@ -6,27 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c1.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mul.f r0.w, r0.w, c0.x
-(ss)mul.f r0.z, r0.z, c0.x
-mul.f r0.y, r0.y, c0.x
-mul.f r0.x, r0.x, c0.x
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
+(sy)mul.f r1.w, r0.w, c0.x
+mul.f r1.z, r0.z, c0.x
+mul.f r1.y, r0.y, c0.x
+mul.f r1.x, r0.x, c0.x
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 26 instructions, 0 half, 2 full
+; FRAG: 14 instructions, 0 half, 2 full
diff --git a/reference/webgl-water/webgl-water-27.asm b/reference/webgl-water/webgl-water-27.asm
index 2c03e4f..284e180 100644
--- a/reference/webgl-water/webgl-water-27.asm
+++ b/reference/webgl-water/webgl-water-27.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/webgl-water/webgl-water-33.asm b/reference/webgl-water/webgl-water-33.asm
index 2c03e4f..284e180 100644
--- a/reference/webgl-water/webgl-water-33.asm
+++ b/reference/webgl-water/webgl-water-33.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/webgl-water/webgl-water-34.asm b/reference/webgl-water/webgl-water-34.asm
index d3d8a07..ccc345b 100644
--- a/reference/webgl-water/webgl-water-34.asm
+++ b/reference/webgl-water/webgl-water-34.asm
@@ -6,15 +6,16 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r1.x, r0.y, s#0, t#0
+sam (f32)(xyzw)r1.x, r0.z, s#0, t#0
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1)
-; FRAG: 14 instructions, 0 half, 2 full
+; FRAG: 10 instructions, 0 half, 2 full
diff --git a/reference/webgl-water/webgl-water-36.asm b/reference/webgl-water/webgl-water-36.asm
index fc933f4..1c768fc 100644
--- a/reference/webgl-water/webgl-water-36.asm
+++ b/reference/webgl-water/webgl-water-36.asm
@@ -6,71 +6,49 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c4.x) 0x3f000000, 0x00000000, 0x3f800000, 0x40490ff9
+@const(c5.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)mov.f32f32 r0.z, c4.x
-bary.f r0.w, 0, r0.x
-mov.f32f32 r1.x, c4.x
-bary.f (ei)r0.x, 1, r0.x
-mad.f32 r0.y, c1.x, r0.z, c4.x
-mov.f32f32 r0.z, r0.w
-mad.f32 r1.x, c1.y, r1.x, c4.x
-mov.f32f32 r1.y, r0.x
-add.f r0.y, r0.y, (neg)r0.w
-mov.f32f32 r0.z, r0.z
-add.f r0.x, r1.x, (neg)r0.x
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.y, r0.y
+mov.f32f32 r0.w, c4.x
mov.f32f32 r1.x, c2.x
-(rpt1)nop
-mul.f r0.y, r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-sam (f32)(xyzw)r2.x, r0.z, s#0, t#0
-(sy)(ss)mov.f32f32 r0.z, r2.y
-mov.f32f32 r0.w, r2.w
-mov.f32f32 r1.z, r2.z
-mad.f32 r0.x, r0.x, r0.x, r0.y
-mov.f32f32 r1.y, r0.z
+bary.f r1.y, 0, r0.x
+mad.f32 r0.z, c1.x, r0.z, c4.x
+mad.f32 r0.w, c1.y, r0.w, c4.x
+bary.f (ei)r1.z, 1, r0.x
+nop
+add.f r0.x, r0.z, (neg)r1.y
rcp r0.y, r1.x
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r0.x
-(rpt5)nop
-sqrt r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
+(rpt1)nop
+add.f r0.z, r0.w, (neg)r1.z
+mov.f32f32 r0.w, r0.x
+(ss)nop
+sam (f32)(xyzw)r1.x, r1.y, s#0, t#0
(rpt2)nop
-mul.f r0.x, r0.x, r0.y
+mul.f r0.x, r0.x, r0.w
+mov.f32f32 r0.w, r0.z
(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r0.x, r0.z, r0.w, r0.x
+(rpt5)nop
+sqrt r0.x, r0.x
+(ss)mul.f r0.x, r0.x, r0.y
(rpt2)nop
add.f r0.x, c4.z, (neg)r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
max.f r0.x, c4.y, r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
mul.f r0.x, r0.x, c4.w
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
cos r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mul.f r0.x, r0.x, c4.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.x, r0.x, c4.x
(rpt2)nop
add.f r0.x, c4.x, (neg)r0.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mad.f32 r0.x, c3.x, r0.x, r2.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+(sy)mad.f32 r1.x, c3.x, r0.x, r1.x
end
nop
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 101 instructions, 0 half, 3 full
+; FRAG: 62 instructions, 0 half, 2 full
diff --git a/reference/webgl-water/webgl-water-37.asm b/reference/webgl-water/webgl-water-37.asm
index b31dd2c..b548823 100644
--- a/reference/webgl-water/webgl-water-37.asm
+++ b/reference/webgl-water/webgl-water-37.asm
@@ -11,27 +11,16 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, (0.000000)
-mov.f32f32 r1.x, (0.000000)
+@const(c0.x) 0x3f800000, 0x3f000000, 0x00000000, 0x00000000
+(sy)(ss)mov.f32f32 r1.w, (0.000000)
+mov.f32f32 r1.z, (0.000000)
mad.f32 r1.y, c0.y, r0.y, c0.y
-mad.f32 r2.x, c0.y, r0.x, c0.y
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r2.x
+mad.f32 r1.x, c0.y, r0.x, c0.y
mov.f32f32 r0.w, c0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r0.x (0:0,cm=7,il=8,b=0)
-; VERT: 17 instructions, 0 half, 3 full
+; VERT: 6 instructions, 0 half, 2 full
diff --git a/reference/webgl-water/webgl-water-38.asm b/reference/webgl-water/webgl-water-38.asm
index 015bad5..a32e656 100644
--- a/reference/webgl-water/webgl-water-38.asm
+++ b/reference/webgl-water/webgl-water-38.asm
@@ -6,351 +6,213 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c3.x) 0x3f000000, 0x3f7fbe77, 0x3f800000, 0x00000000
+@const(c4.x) 0x3f666666, 0x40800000, 0x3f1013a9, 0x3f400d1b
+@const(c5.x) 0x3ec00000, 0x40000000, 0xbf800000, 0x3f800000
+@const(c6.x) 0xc39044fe, 0xbe2ab368, 0x41200000, 0x3ef5c28f
+@const(c7.x) 0x3ef5c28f, 0x3f8a3d71, 0x3f99999a, 0x00000000
+@const(c8.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)absneg.f r0.z, (neg)c0.y
bary.f r0.w, 0, r0.x
-bary.f r1.x, 1, r0.x
-bary.f (ei)r0.x, 2, r0.x
+bary.f r1.x, 2, r0.x
+bary.f (ei)r0.x, 1, r0.x
mul.f r0.y, r0.z, r0.z
add.f r1.y, r0.w, (neg)c1.x
-mad.f32 r1.z, c3.x, r0.w, c3.x
+absneg.f r1.z, (abs)r1.x
mad.f32 r1.w, c3.x, r0.w, c3.x
add.f r0.y, c3.z, (neg)r0.y
mul.f r1.y, r1.y, r1.y
-add.f r2.x, r1.x, (neg)c1.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r1.w
-mad.f32 r1.y, r2.x, r2.x, r1.y
-mad.f32 r2.x, c3.x, r1.x, c3.z
+add.f r2.y, r0.x, (neg)c1.y
+cmps.f.lt r1.z, c3.y, r1.z
mul.f r0.y, r0.y, c4.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.y, r0.y
-add.f r2.y, r0.x, (neg)c1.z
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.w
+mad.f32 r2.x, c3.x, r1.x, c3.x
+mad.f32 r2.z, c3.x, r0.w, c3.x
+mul.f r2.w, r0.w, r0.w
add.f r0.y, c3.z, (neg)r0.y
mad.f32 r1.y, r2.y, r2.y, r1.y
-mad.f32 r1.w, c3.x, r0.x, c3.x
-mad.f32 r2.y, c3.x, r0.x, c3.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r0.y
+add.f r2.y, r1.x, (neg)c1.z
+cov.u32f32 r1.z, r1.z
+mov.f32f32 r3.x, r0.y
cmps.f.lt r0.y, r0.y, c3.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-sqrt r1.y, r1.y
-mov.f32f32 r2.x, r2.x
-(ss)mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.x, c2.x
-sqrt r2.w, r2.w
-(ss)mov.f32f32 r3.y, r2.w
-mov.f32f32 r3.z, r0.z
+mad.f32 r1.y, r2.y, r2.y, r1.y
+cmps.f.ne r1.z, r1.z, c3.w
+mad.f32 r3.y, c3.x, r0.x, c3.z
+mov.f32f32 r2.y, c3.w
+mov.f32f32 r3.w, c3.w
+sqrt r3.x, r3.x
+(ss)mad.f32 r3.x, c4.w, r0.z, r3.x
cov.u32f32 r0.y, r0.y
-(ss)mov.f32f32 r2.w, r1.w
-mov.f32f32 r1.w, r2.y
-mad.f32 r2.y, c4.w, r3.z, r3.y
-rcp r3.x, r3.x
-(ss)mul.f r1.y, r1.y, r3.x
+sqrt r1.y, r1.y
+mad.f32 r3.z, c3.x, r0.w, c3.x
+mov.f32f32 r4.x, c2.x
+mov.f32f32 r4.y, r3.x
+mul.f r3.x, r3.x, c3.w
+absneg.f r4.z, (neg)c0.z
cmps.f.ne r0.y, r0.y, c3.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.y, r1.y
-sam (f32)(x)r2.z, r2.z, s#2, t#2
-(ss)nop
-sam (f32)(x)r2.w, r1.z, s#2, t#2
-(sy)(ss)cmps.f.lt r1.z, r1.x, r2.z
-cmps.f.lt r1.w, r1.x, r2.w
-mul.f r2.z, r2.y, c3.w
-mul.f r2.w, r2.y, c3.z
-mul.f r2.y, r2.y, c3.w
-cov.u32f32 r1.z, r1.z
-mov.f32f32 r2.z, r2.z
-absneg.f r3.x, (neg)c0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.x, r3.x
-absneg.f r3.y, (neg)c0.z
-mad.f32 r0.z, c4.w, r0.z, (neg)r2.w
-mov.f32f32 r2.w, c3.w
-mad.f32 r2.z, c4.w, r3.x, (neg)r2.z
-mov.f32f32 r3.x, c3.w
-mov.f32f32 r3.y, r3.y
-sel.b32 r0.z, r2.w, r0.y, r0.z
+mul.f r4.w, r4.y, c3.w
+absneg.f r5.x, (neg)c0.x
+mul.f r4.y, r4.y, c3.z
+mad.f32 r3.x, c4.w, r4.z, (neg)r3.x
+mad.f32 r0.z, c4.w, r0.z, (neg)r4.y
+mad.f32 r4.y, c4.w, r5.x, (neg)r4.w
+mov.f32f32 r4.z, c3.w
+mov.f32f32 r4.w, c3.w
+mov.f32f32 r5.x, c3.w
+rcp r4.x, r4.x
+(ss)mul.f r1.y, r1.y, r4.x
+sel.b32 r4.x, r4.z, r0.y, r4.y
+sel.b32 r0.z, r4.w, r0.y, r0.z
+sel.b32 r0.y, r5.x, r0.y, r3.x
+sam (f32)(xyz)r4.y, r3.y, s#0, t#0
+mad.f32 r3.x, c3.x, r0.w, c3.x
+(ss)absneg.f r3.y, (neg)r4.x
+absneg.f r0.z, (neg)r0.z
+mov.f32f32 r3.z, c3.w
+absneg.f r0.y, (neg)r0.y
+mul.f r4.x, r0.x, r3.y
log2 r1.y, r1.y
(ss)mul.f r1.y, c4.y, r1.y
-sel.b32 r2.z, r3.x, r0.y, r2.z
-mad.f32 r2.y, c4.w, r3.y, (neg)r2.y
-absneg.f r0.z, (neg)r0.z
-mov.f32f32 r1.y, r1.y
-absneg.f r2.z, (neg)r2.z
-absneg.f r2.w, (abs)r0.x
-mov.f32f32 r3.x, r0.z
-mov.f32f32 r3.y, c3.w
-mov.f32f32 r3.z, r2.z
-cmps.f.lt r2.w, c3.y, r2.w
-mov.f32f32 r3.w, r0.z
+sel.b32 r2.y, r2.y, r1.z, r3.z
+rcp r3.z, r3.y
+absneg.f r5.x, (neg)r0.w
+rcp r5.y, r0.z
+(ss)mul.f r4.x, r4.x, r5.y
+rcp r5.z, r0.z
+absneg.f r5.w, (neg)r0.x
+absneg.f r6.x, (abs)r0.w
+add.f r6.y, c5.z, r5.x
+add.f r4.x, r0.w, (neg)r4.x
+add.f r6.z, c5.z, r5.w
+cmps.f.lt r6.x, c3.y, r6.x
+mul.f r3.z, r6.y, r3.z
+rcp r6.y, r3.y
+add.f r5.x, c5.w, r5.x
+mad.f32 r6.w, c5.x, r4.x, c3.x
+mul.f r4.x, r0.x, r0.y
+mov.f32f32 r7.x, r3.z
+(ss)mul.f r5.x, r5.x, r6.y
+mul.f r5.z, r6.z, r5.z
+cov.u32f32 r6.x, r6.x
+mul.f r4.x, r4.x, r5.y
+max.f r3.z, r3.z, r5.x
+rcp r5.y, r0.z
+add.f r5.w, c5.y, r5.w
+mov.f32f32 r5.x, r5.x
+add.f r4.x, r1.x, (neg)r4.x
+mov.f32f32 r6.y, r5.z
+(ss)mul.f r5.y, r5.w, r5.y
+min.f r5.x, r7.x, r5.x
+mad.f32 r7.x, c5.x, r4.x, c3.x
+cmps.f.ne r4.x, r6.x, c3.w
+max.f r5.z, r5.z, r5.y
+mov.f32f32 r5.y, r5.y
+absneg.f r0.w, (neg)r0.w
+rcp r5.w, r0.y
+absneg.f r6.x, (neg)r1.x
+min.f r3.z, r3.z, r5.z
+sam (f32)(xy)r6.z, r6.w, s#1, t#1
+min.f r5.y, r6.y, r5.y
+sel.b32 r0.w, r0.w, r4.x, r2.y
+add.f r2.y, c5.z, r6.x
+rcp r5.z, r0.y
+add.f r6.x, c5.w, r6.x
+max.f r5.x, r5.x, r5.y
+mul.f r0.w, r3.y, r0.w
+(ss)mul.f r2.y, r2.y, r5.w
+(ss)mul.f r3.y, r6.x, r5.z
+mov.f32f32 r5.y, c3.z
exp2 r1.y, r1.y
nop
(ss)rcp r1.y, r1.y
-sel.b32 r0.y, r3.y, r0.y, r2.y
-rcp r2.y, r3.x
-(ss)mov.f32f32 r3.x, r2.z
-cov.u32f32 r2.w, r2.w
-rcp r3.y, r3.z
-(ss)absneg.f r3.z, (neg)r0.w
-absneg.f r0.y, (neg)r0.y
-mul.f r3.x, r1.x, r3.x
-mov.f32f32 r2.w, r2.w
-add.f r4.x, c5.z, r3.z
-mov.f32f32 r4.y, r0.y
-(ss)mul.f r3.x, r3.x, r2.y
-cmps.f.ne r2.w, r2.w, c3.w
-mul.f r3.y, r4.x, r3.y
-mul.f r4.x, r1.x, r4.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, c3.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.z, r2.z
-add.f r3.x, r0.w, (neg)r3.x
-mul.f r2.y, r4.x, r2.y
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r4.y, c3.w
-mov.f32f32 r4.w, r0.y
-rcp r3.w, r3.w
-mov.f32f32 r3.x, r3.x
-rcp r4.z, r4.z
-add.f r3.z, c5.w, r3.z
-mov.f32f32 r2.y, r2.y
-sel.b32 r4.x, r4.x, r2.w, r4.y
-mad.f32 r3.x, c5.x, r3.x, c3.x
-mov.f32f32 r3.z, r3.z
-add.f r2.y, r0.x, (neg)r2.y
-absneg.f r4.y, (abs)r0.w
-rcp r4.w, r4.w
-mov.f32f32 r3.x, r3.x
-(ss)mul.f r3.z, r3.z, r4.z
-mov.f32f32 r2.y, r2.y
-cmps.f.lt r4.y, c3.y, r4.y
-(ss)absneg.f r4.z, (neg)r0.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.x, r3.x
-mad.f32 r2.y, c5.x, r2.y, c3.x
-cov.u32f32 r4.y, r4.y
-max.f r5.x, r3.y, r3.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.y, r2.y
-min.f r3.y, r3.y, r3.z
-mov.f32f32 r3.z, r5.x
-absneg.f r5.x, (neg)r1.x
-mov.f32f32 r5.y, r3.x
-mov.f32f32 r2.y, r2.y
-cmps.f.ne r3.x, r4.y, c3.w
-add.f r4.y, c5.z, r5.x
-absneg.f r5.z, (neg)r0.w
-add.f r5.w, c5.z, r4.z
-mov.f32f32 r1.y, r1.y
-mul.f r3.w, r4.y, r3.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.y, r5.z
-mul.f r4.w, r5.w, r4.w
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r5.w, r0.z
mov.f32f32 r5.z, r2.y
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r4.y, r4.w
-mul.f r1.y, c4.x, r1.y
-mov.f32f32 r4.w, r0.y
-sel.b32 r2.y, r2.y, r3.x, r4.x
-rcp r4.x, r5.w
-add.f r5.x, c5.y, r5.x
-sam (f32)(xy)r5.y, r5.y, s#1, t#1
-mov.f32f32 r1.y, r1.y
-(ss)mov.f32f32 r5.w, r0.z
-mul.f r2.y, r2.z, r2.y
-mov.f32f32 r2.z, r5.x
+max.f r2.y, r2.y, r3.y
+sel.b32 r3.w, r3.w, r1.z, r5.y
+mov.f32f32 r5.y, c3.w
+mov.f32f32 r5.w, r3.y
+min.f r2.y, r3.z, r2.y
+(ss)mul.f r1.y, c4.x, r1.y
+mad.f32 r3.y, c3.x, r1.x, c3.x
+sel.b32 r3.z, r5.y, r4.x, r3.w
+mov.f32f32 r3.w, r2.y
+min.f r5.y, r5.z, r5.w
+mad.f32 r2.y, r0.z, r2.y, r0.x
+mad.f32 r0.z, r0.z, r3.z, r0.w
+absneg.f r0.w, (neg)r1.x
+max.f r3.z, r5.x, r5.y
+add.f r2.y, r2.y, c6.y
mov.f32f32 r5.x, c3.w
-rcp r4.w, r4.w
add.f r1.y, c3.z, (neg)r1.y
-add.f r4.z, c5.w, r4.z
-(ss)mul.f r2.z, r2.z, r4.x
-mov.f32f32 r4.x, r5.x
-mov.f32f32 r5.x, c3.z
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.y, r1.y
-mul.f r6.x, r0.w, r0.w
-mul.f r4.z, r4.z, r4.w
-(ss)max.f r4.w, r3.w, r2.z
-sel.b32 r4.x, r4.x, r2.w, r5.x
-mov.f32f32 r5.x, c3.w
-min.f r2.z, r3.w, r2.z
-mov.f32f32 r3.w, r4.w
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r4.w, r5.x
-mad.f32 r5.x, r1.x, r1.x, r6.x
-min.f r3.z, r3.z, r3.w
-max.f r3.w, r4.y, r4.z
-mov.f32f32 r4.w, r4.w
-max.f r2.z, r3.y, r2.z
-min.f r3.y, r4.y, r4.z
-mov.f32f32 r3.w, r3.w
-sel.b32 r4.x, r4.w, r3.x, r4.x
-mov.f32f32 r4.y, r5.x
-mov.f32f32 r1.z, r1.z
-min.f r3.z, r3.z, r3.w
-mad.f32 r0.z, r0.z, r4.x, r2.y
-max.f r2.y, r2.z, r3.y
-nop
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r0.z, r0.z
-absneg.f r3.y, (neg)r0.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.z, r5.w, r2.z, r1.x
-mad.f32 r3.w, r0.x, r0.x, r4.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, c3.w
-mov.f32f32 r3.z, r3.z
-add.f r2.y, r2.z, (neg)r2.y
-cmps.f.ne r2.z, r1.z, c3.w
-cov.u32f32 r1.z, r1.w
-add.f r1.w, r3.z, c6.y
-sel.b32 r3.y, r3.y, r2.w, r4.x
-mov.f32f32 r3.z, c3.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
-rsq r3.w, r3.w
-(ss)mov.f32f32 r3.w, r3.w
-mov.f32f32 r3.z, r3.z
-mad.f32 r2.y, c6.z, r2.y, c3.z
-mul.f r1.w, c6.x, r1.w
-mul.f r3.w, c3.x, r3.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r2.y
-sel.b32 r3.y, r3.z, r3.x, r3.y
-mul.f r1.y, r3.w, r1.y
-cmps.f.ne r1.z, r1.z, c3.w
-mov.f32f32 r3.z, r2.x
-mad.f32 r2.x, c3.x, r0.x, c3.x
-mad.f32 r1.x, c3.x, r1.x, c3.z
-rcp r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mad.f32 r0.y, r0.y, r3.y, r0.z
-mov.f32f32 r0.z, r1.y
-mov.f32f32 r1.y, r2.x
-mul.f r1.w, r1.w, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.w, r1.w
+add.f r3.z, r3.w, (neg)r3.z
+mul.f r2.y, c6.x, r2.y
+sel.b32 r0.w, r0.w, r1.z, r5.x
+mov.f32f32 r3.w, c3.w
+mad.f32 r3.z, c6.z, r3.z, c3.z
+mad.f32 r2.w, r0.x, r0.x, r2.w
+sam (f32)(xyz)r5.x, r3.x, s#0, t#0
+(sy)(ss)sel.b32 r3.x, r4.w, r1.z, r5.z
+sel.b32 r3.y, r4.z, r1.z, r5.y
+sel.b32 r0.w, r3.w, r4.x, r0.w
+sel.b32 r1.z, r4.y, r1.z, r5.x
+mad.f32 r3.w, r1.x, r1.x, r2.w
+rcp r2.w, r3.z
+(ss)mul.f r2.y, r2.y, r2.w
+mad.f32 r0.y, r0.y, r0.w, r0.z
+mad.f32 r0.z, c3.x, r0.x, c3.z
+mad.f32 r0.w, c3.x, r1.x, c3.x
+mad.f32 r2.w, c3.x, r1.x, c3.x
+sam (f32)(x)r4.y, r1.w, s#2, t#2
max.f r0.y, c3.w, r0.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r2.y, c3.x, r0.w, c3.x
-mad.f32 r0.x, c3.x, r0.x, c3.x
-mad.f32 r0.w, c3.x, r0.w, c3.x
-exp2 r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.w, r1.y
-mov.f32f32 r1.x, r1.x
-add.f r1.y, c3.z, r1.w
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.x, r1.x
-mov.f32f32 r1.x, r1.y
-(sy)mul.f r1.y, r1.w, r5.y
-sam (f32)(xyz)r4.y, r3.z, s#0, t#0
-mov.f32f32 r0.w, r0.w
-(sy)mov.f32f32 r1.w, r4.w
-mov.f32f32 r3.y, r4.z
-(ss)mov.f32f32 r3.z, r4.y
-mov.f32f32 r0.w, r0.w
-rcp r1.x, r1.x
-(ss)mov.f32f32 r1.x, r1.x
-mul.f r1.y, r1.y, c5.y
-mov.f32f32 r3.w, r1.w
-mov.f32f32 r3.y, r3.y
-mul.f r0.y, r0.y, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r4.y, r0.w
-mov.f32f32 r0.w, r3.z
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, r1.x, r5.z, r0.z
+rsq r1.x, r3.w
+(ss)mul.f r1.x, c3.x, r1.x
+(ss)exp2 r1.w, r2.y
+(ss)add.f r2.x, c3.z, r1.w
+(sy)(ss)cmps.f.lt r2.y, r0.x, r4.y
+mov.f32f32 r1.w, c3.z
(rpt1)nop
-mad.f32 r0.y, c3.x, r0.y, r2.x
-mov.f32f32 r0.z, r0.z
-sam (f32)(xyz)r4.x, r4.x, s#0, t#0
-(sy)mov.f32f32 r1.x, r4.z
-mov.f32f32 r1.y, r4.y
+mul.f r3.z, r0.y, r6.z
mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.w, r2.y
-mov.f32f32 r2.x, r4.x
-mov.f32f32 r0.x, r0.x
-sel.b32 r0.y, r0.z, r1.z, r0.y
-mov.f32f32 r0.z, r1.w
-mov.f32f32 r1.z, c3.z
+rcp r2.x, r2.x
+mul.f r1.x, r1.x, r1.y
(rpt1)nop
-(ss)mov.f32f32 r4.x, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r1.z
-(rpt1)nop
-mov.f32f32 r4.y, r0.x
-mov.f32f32 r1.w, r0.z
-(rpt4)nop
-sam (f32)(xyz)r4.x, r4.x, s#0, t#0
-(sy)mov.f32f32 r0.x, r4.z
-mov.f32f32 r0.z, r4.y
-mov.f32f32 r1.z, r4.x
-nop
-sel.b32 r0.x, r1.x, r2.w, r0.x
-sel.b32 r0.z, r1.y, r2.w, r0.z
-sel.b32 r1.x, r2.x, r2.w, r1.z
+(ss)mul.f r0.y, r0.y, r2.x
+mov.f32f32 r1.y, r1.x
+(ss)mul.f r2.x, r3.z, c5.y
nop
-sel.b32 r0.x, r3.w, r3.x, r0.x
-sel.b32 r0.z, r3.y, r3.x, r0.z
-sel.b32 r0.w, r0.w, r3.x, r1.x
-nop
-mul.f r0.x, r0.x, r0.y
-mul.f r0.z, r0.z, r0.y
-mul.f r0.y, r0.w, r0.y
+mad.f32 r0.y, c3.x, r0.y, r1.y
+sam (f32)(xyz)r4.y, r0.z, s#0, t#0
+(sy)(ss)sel.b32 r0.z, r4.w, r4.x, r3.x
+mad.f32 r0.w, r2.x, r6.w, r1.x
+sam (f32)(x)r4.w, r2.z, s#2, t#2
+(sy)cmps.f.lt r0.x, r0.x, r4.w
+cov.u32f32 r1.x, r2.y
+sel.b32 r1.y, r4.z, r4.x, r3.y
+sel.b32 r1.z, r4.y, r4.x, r1.z
+cov.u32f32 r0.x, r0.x
+cmps.f.ne r1.x, r1.x, c3.w
+(rpt1)nop
+cmps.f.ne r0.x, r0.x, c3.w
+(rpt2)nop
+sel.b32 r0.x, r0.w, r0.x, r0.y
+(rpt2)nop
+mul.f r0.y, r0.z, r0.x
+mul.f r0.z, r1.y, r0.x
+mul.f r0.x, r1.z, r0.x
nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
+mul.f r0.w, r0.y, c7.z
mov.f32f32 r0.y, r0.y
-nop
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r1.x, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r0.w, c7.z
-mul.f r1.x, r1.x, c7.y
-mul.f r1.y, r1.y, c7.x
-mov.f32f32 r0.x, r0.x
+mul.f r1.y, r0.z, c7.y
+mul.f r2.x, r0.x, c7.x
mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+sel.b32 r1.z, r0.w, r1.x, r0.y
+mov.f32f32 r0.x, r0.x
nop
-sel.b32 r0.x, r0.w, r2.z, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
+sel.b32 r1.y, r1.y, r1.x, r0.z
nop
-mov.f32f32 r1.z, r0.x
-sel.b32 r0.x, r1.x, r2.z, r0.z
-sel.b32 r0.y, r1.y, r2.z, r0.y
-(rpt1)nop
-mov.f32f32 r1.y, r0.x
-mov.f32f32 r1.x, r0.y
+sel.b32 r1.x, r2.x, r1.x, r0.x
end
nop
nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 349 instructions, 0 half, 7 full
+; FRAG: 205 instructions, 0 half, 8 full
diff --git a/reference/webgl-water/webgl-water-39.asm b/reference/webgl-water/webgl-water-39.asm
index 660019f..73f0f60 100644
--- a/reference/webgl-water/webgl-water-39.asm
+++ b/reference/webgl-water/webgl-water-39.asm
@@ -11,51 +11,32 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c4.x) 0x3f800000, 0x3f155326, 0xbf800000, 0x00000000
(sy)(ss)add.f r0.x, c4.x, (neg)r1.y
-mov.f32f32 r0.y, r1.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.w, (0.000000)
+mov.f32f32 r1.w, (0.000000)
+mul.f r0.y, c0.w, r1.x
+mul.f r0.z, c0.z, r1.x
mad.f32 r0.x, c4.y, r0.x, c4.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r0.x, r0.x
-mul.f r0.z, c0.w, r1.x
-mul.f r0.w, c0.z, r1.x
+mul.f r0.w, c0.x, r1.x
mul.f r2.x, c0.y, r1.x
-mad.f32 r0.z, c1.w, r0.x, r0.z
-mad.f32 r0.w, c1.z, r0.x, r0.w
-mad.f32 r2.x, c1.y, r0.x, r2.x
-mul.f r1.x, c0.x, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, c2.w, r1.z, r0.z
-mad.f32 r0.w, c2.z, r1.z, r0.w
-mov.f32f32 r2.x, r2.x
-mad.f32 r1.x, c1.x, r0.x, r1.x
-add.f r0.z, r0.z, c3.w
-add.f r2.y, r0.w, c3.z
-mad.f32 r2.x, c2.y, r1.z, r2.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r0.z, r2.y
-add.f r2.x, r2.x, c3.y
-mad.f32 r1.x, c2.x, r1.z, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r0.y, r2.x
-add.f r1.x, r1.x, c3.x
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r1.y
-mov.f32f32 r0.x, r1.x
-mov.f32f32 r1.x, r2.x
-(rpt2)nop
-mov.f32f32 r1.y, r1.x
-mov.f32f32 r1.x, r2.y
-end
nop
+mov.f32f32 r1.y, r0.x
+mad.f32 r0.x, c1.x, r0.x, r0.w
+(rpt1)nop
+mad.f32 r0.y, c1.w, r1.y, r0.y
+mad.f32 r0.z, c1.z, r1.y, r0.z
+mad.f32 r0.y, c2.w, r1.z, r0.y
+mad.f32 r0.z, c2.z, r1.z, r0.z
+mad.f32 r2.x, c1.y, r1.y, r2.x
+mad.f32 r0.x, c2.x, r1.z, r0.x
+add.f r0.w, r0.y, c3.w
+add.f r0.z, r0.z, c3.z
+mad.f32 r0.y, c2.y, r1.z, r2.x
+add.f r0.x, r0.x, c3.x
+(rpt1)nop
+add.f r0.y, r0.y, c3.y
+end
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r1.x (0:0,cm=7,il=8,b=0)
-; VERT: 45 instructions, 0 half, 3 full
+; VERT: 26 instructions, 0 half, 3 full
diff --git a/reference/webgl-water/webgl-water-40.asm b/reference/webgl-water/webgl-water-40.asm
index 1a0f09e..323714f 100644
--- a/reference/webgl-water/webgl-water-40.asm
+++ b/reference/webgl-water/webgl-water-40.asm
@@ -6,2331 +6,1360 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c7.x) 0x3f000000, 0x3ba3d70a, 0x3f800000, 0x40000000
+@const(c8.x) 0x3f1013a9, 0x00000000, 0x3f400d1b, 0x3e800000
+@const(c9.x) 0x40400000, 0x40800000, 0x49742400, 0x3f666666
+@const(c10.x) 0x00000000, 0x3f800000, 0x3ec00000, 0xbf800000
+@const(c11.x) 0x3f7fbe77, 0xc39044fe, 0xbe2ab368, 0x41200000
+@const(c12.x) 0x3e2ab368, 0x459c4000, 0x3e800000, 0x3f800000
+@const(c13.x) 0x41200000, 0x41000000, 0x40c00000, 0x3e800000
+@const(c14.x) 0x3e800000, 0x3f800000, 0x3fa00000, 0x00000000
+@const(c15.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
absneg.f r0.w, (neg)c0.y
absneg.f r1.x, (neg)c0.y
absneg.f r1.y, (neg)c0.y
mad.f32 r1.z, c7.x, r0.z, c7.x
-mul.f r1.w, r0.w, r0.w
-mul.f r2.x, r1.x, r1.x
-mul.f r2.y, r1.y, r1.y
-mov.f32f32 r2.z, r1.z
-add.f r1.w, c7.z, (neg)r1.w
-add.f r2.x, c7.z, (neg)r2.x
+bary.f r2.x, 2, r0.x
+mul.f r2.y, r0.w, r0.w
+mul.f r2.z, r1.x, r1.x
+mul.f r2.w, r1.y, r1.y
+mad.f32 r1.w, c7.x, r2.x, c7.x
add.f r2.y, c7.z, (neg)r2.y
-mov.f32f32 r2.z, r2.z
-bary.f r3.x, 2, r0.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, c7.x, r3.x, c7.x
-mul.f r1.w, r1.w, c8.x
-mul.f r2.x, r2.x, c8.x
+add.f r2.z, c7.z, (neg)r2.z
+add.f r2.w, c7.z, (neg)r2.w
+absneg.f r3.x, (neg)c0.y
mul.f r2.y, r2.y, c8.x
-mov.f32f32 r2.w, r3.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.w, r2.w
-add.f r1.w, c7.z, (neg)r1.w
-add.f r2.x, c7.z, (neg)r2.x
+mul.f r2.z, r2.z, c8.x
+sam (f32)(zw)r3.y, r1.z, s#2, t#2
+(sy)mad.f32 r3.y, c7.y, r3.w, r1.z
+mad.f32 r3.z, c7.y, r4.x, r1.w
+(ss)add.f r1.z, c7.z, (neg)r2.y
+add.f r1.w, c7.z, (neg)r2.z
+mul.f r2.y, r2.w, c8.x
+mul.f r2.z, r3.x, r3.x
+mov.f32f32 r2.w, r1.z
+mov.f32f32 r3.w, r1.w
+sam (f32)(zw)r4.x, r3.y, s#2, t#2
+(ss)mov.f32f32 r3.y, r3.y
add.f r2.y, c7.z, (neg)r2.y
-absneg.f r3.z, (neg)c0.y
-absneg.f r3.w, (neg)c0.y
-absneg.f r4.x, (neg)c0.y
-sam (f32)(zw)r4.y, r2.z, s#2, t#2
-(sy)mad.f32 r1.z, c7.y, r4.w, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.z, r1.z
-(ss)mov.f32f32 r2.z, r1.w
-mov.f32f32 r2.w, r2.x
-mov.f32f32 r4.y, r2.y
-mov.f32f32 r4.z, r1.z
-mul.f r4.w, r3.z, r3.z
-mul.f r5.y, r3.w, r3.w
-mul.f r5.z, r4.x, r4.x
-mov.f32f32 r5.w, r4.z
-mad.f32 r3.y, c7.y, r5.x, r3.y
-sqrt r2.z, r2.z
-(ss)mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.z, r0.w
+(sy)mad.f32 r4.x, c7.y, r4.z, r3.y
+mov.f32f32 r3.y, r3.z
+add.f r2.z, c7.z, (neg)r2.z
+mad.f32 r4.y, c7.y, r4.w, r3.y
sqrt r2.w, r2.w
-(ss)mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r5.x, r1.x
-sqrt r4.y, r4.y
-(ss)mov.f32f32 r4.y, r4.y
-mad.f32 r2.z, c8.z, r4.z, r2.z
-mov.f32f32 r4.z, r3.y
-mad.f32 r2.w, c8.z, r5.x, r2.w
-mov.f32f32 r5.x, r1.y
-add.f r4.w, c7.z, (neg)r4.w
-mov.f32f32 r6.x, r4.z
-mov.f32f32 r2.z, r2.z
+(ss)mad.f32 r2.w, c8.z, r0.w, r2.w
+sqrt r3.y, r3.w
+(ss)mad.f32 r3.y, c8.z, r1.x, r3.y
+mov.f32f32 r3.z, r2.y
+mul.f r2.z, r2.z, c8.x
+(ss)mul.f r3.w, r2.w, c10.x
+absneg.f r4.z, (neg)c0.z
+sam (f32)(zw)r4.w, r4.x, s#2, t#2
+(ss)mov.f32f32 r4.x, r4.x
mov.f32f32 r2.w, r2.w
-mad.f32 r4.y, c8.z, r5.x, r4.y
-mov.f32f32 r4.z, r4.w
-add.f r4.w, c7.z, (neg)r5.y
-add.f r5.x, c7.z, (neg)r5.z
-sam (f32)(zw)r5.y, r5.w, s#2, t#2
-(sy)mad.f32 r1.z, c7.y, r5.w, r1.z
-mul.f r5.y, r2.z, c10.x
-mul.f r5.z, r2.z, c10.y
-mul.f r2.z, r2.z, c10.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r5.y, r5.y
-(ss)absneg.f r5.w, (neg)c0.z
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r6.y, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.z, r2.z
-absneg.f r6.z, (neg)c0.x
-mov.f32f32 r6.w, r6.y
-mad.f32 r3.y, c7.y, r6.x, r3.y
-mov.f32f32 r5.w, r5.w
-mad.f32 r0.w, c8.z, r0.w, (neg)r5.z
+(sy)mad.f32 r4.w, c7.y, r5.y, r4.x
+mov.f32f32 r4.x, r4.y
+mov.f32f32 r4.y, r3.y
+mad.f32 r5.x, c7.y, r5.z, r4.x
+mul.f r4.x, r2.w, c10.y
+mul.f r2.w, r2.w, c10.x
+absneg.f r5.y, (neg)c0.x
+mad.f32 r0.w, c8.z, r0.w, (neg)r4.x
+cmps.f.lt r1.z, r1.z, c8.y
+mul.f r4.x, r4.y, c10.y
+sam (f32)(zw)r5.z, r4.w, s#2, t#2
+(ss)mov.f32f32 r4.w, r4.w
+mad.f32 r2.w, c8.z, r5.y, (neg)r2.w
+(sy)mad.f32 r5.y, c7.y, r6.x, r4.w
+mov.f32f32 r4.w, r5.x
+cov.u32f32 r1.z, r1.z
+mad.f32 r5.z, c7.y, r6.y, r4.w
+mad.f32 r1.x, c8.z, r1.x, (neg)r4.x
cmps.f.lt r1.w, r1.w, c8.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r5.y, c8.z, r5.w, (neg)r5.y
-mov.f32f32 r5.z, r6.z
+mul.f r4.x, r4.y, c10.x
+cmps.f.ne r1.z, r1.z, c8.y
+mov.f32f32 r4.y, c8.y
+mov.f32f32 r4.w, c8.y
+sam (f32)(zw)r5.w, r5.y, s#2, t#2
+mov.f32f32 r5.x, r5.y
cov.u32f32 r1.w, r1.w
-mov.f32f32 r5.w, r3.y
-mul.f r6.x, r2.w, c10.x
-mul.f r6.y, r2.w, c10.y
-mul.f r2.w, r2.w, c10.x
-mov.f32f32 r7.x, r5.w
+(sy)mad.f32 r5.x, c7.y, r6.y, r5.x
+(ss)mov.f32f32 r5.y, r5.z
+sel.b32 r0.w, r4.y, r1.z, r0.w
+mad.f32 r5.y, c7.y, r6.z, r5.y
+sel.b32 r2.w, r4.w, r1.z, r2.w
cmps.f.ne r1.w, r1.w, c8.y
-mov.f32f32 r5.w, c8.y
-mov.f32f32 r6.z, c8.y
-mad.f32 r2.z, c8.z, r5.z, (neg)r2.z
-mov.f32f32 r5.z, c8.y
-mov.f32f32 r6.x, r6.x
-sam (f32)(zw)r6.w, r6.w, s#2, t#2
-(sy)mad.f32 r1.z, c7.y, r7.y, r1.z
-sel.b32 r5.y, r5.w, r1.w, r5.y
-sel.b32 r0.w, r6.z, r1.w, r0.w
-sel.b32 r1.w, r5.z, r1.w, r2.z
-mov.f32f32 r1.z, r1.z
-absneg.f r2.z, (neg)r5.y
+mov.f32f32 r4.y, c8.y
absneg.f r0.w, (neg)r0.w
-absneg.f r1.w, (neg)r1.w
-mov.f32f32 r5.y, r1.z
-mov.f32f32 r5.z, r2.z
-mov.f32f32 r5.w, r2.z
-mov.f32f32 r6.z, r0.w
-(ss)mov.f32f32 r6.w, r5.y
-mad.f32 r3.y, c7.y, r7.z, r3.y
-mov.f32f32 r5.y, r0.w
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r7.y, r1.w
-mov.f32f32 r3.y, r3.y
-rcp r5.z, r5.z
+absneg.f r2.w, (neg)r2.w
+absneg.f r4.w, (neg)c0.x
+sam (f32)(zw)r5.x, r5.x, s#2, t#2
+(sy)(ss)mul.f r5.x, r5.z, r5.z
+sel.b32 r1.x, r4.y, r1.w, r1.x
+mad.f32 r4.y, r5.w, r5.w, r5.x
+mad.f32 r4.x, c8.z, r4.w, (neg)r4.x
+mov.f32f32 r4.w, c8.y
+absneg.f r1.x, (neg)r1.x
+add.f r4.y, c7.z, (neg)r4.y
+rcp r5.x, r0.w
nop
-rcp r5.w, r5.w
+rcp r5.y, r0.w
nop
-rcp r6.z, r6.z
-mov.f32f32 r7.z, r0.w
-mov.f32f32 r7.w, r3.y
-rcp r5.y, r5.y
+rcp r6.x, r2.w
nop
-rcp r8.x, r7.x
+rcp r6.y, r2.w
nop
-rcp r7.y, r7.y
-absneg.f r8.y, (neg)c0.z
-(ss)mov.f32f32 r7.x, r7.w
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.w, r2.w
-rcp r7.z, r7.z
-mov.f32f32 r7.w, r8.y
-absneg.f r8.y, (neg)c0.x
-mad.f32 r1.x, c8.z, r1.x, (neg)r6.y
-sam (f32)(zw)r8.z, r6.w, s#2, t#2
-(sy)mad.f32 r1.z, c7.y, r9.x, r1.z
-mad.f32 r3.y, c7.y, r9.y, r3.y
-mad.f32 r6.x, c8.z, r7.w, (neg)r6.x
-cmps.f.lt r2.x, r2.x, c8.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.y, r8.y
-cov.u32f32 r2.x, r2.x
-(ss)mov.f32f32 r6.w, r1.z
-mov.f32f32 r7.x, r3.y
-mad.f32 r2.w, c8.z, r6.y, (neg)r2.w
-cmps.f.ne r2.x, r2.x, c8.y
-mov.f32f32 r8.y, r6.w
-mov.f32f32 r8.z, r7.x
-mov.f32f32 r6.y, c8.y
-mov.f32f32 r6.w, c8.y
-mov.f32f32 r7.x, c8.y
-mov.f32f32 r4.y, r4.y
-mul.f r4.z, r4.z, c8.x
-mov.f32f32 r4.w, r4.w
-sam (f32)(zw)r8.y, r8.y, s#2, t#2
-(sy)mad.f32 r1.z, c7.y, r8.w, r1.z
-mad.f32 r3.y, c7.y, r9.x, r3.y
-sel.b32 r6.x, r6.y, r2.x, r6.x
-sel.b32 r1.x, r6.w, r2.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
-absneg.f r6.x, (neg)r6.x
-absneg.f r1.x, (neg)r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r6.y, r6.x
-mov.f32f32 r6.w, r6.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r7.w, r1.x
-(ss)mov.f32f32 r8.y, r1.x
-mov.f32f32 r8.z, r1.z
-mov.f32f32 r8.w, r3.y
-rcp r1.z, r6.y
+rcp r6.z, r0.w
+sel.b32 r4.x, r4.w, r1.w, r4.x
+rcp r4.w, r1.x
nop
-rcp r3.y, r6.w
-sel.b32 r2.x, r7.x, r2.x, r2.w
-mov.f32f32 r2.w, r1.x
-(ss)rcp r6.y, r7.w
-mul.f r6.w, r4.y, c10.x
-mul.f r7.x, r4.y, c10.y
-(ss)rcp r7.w, r8.y
-absneg.f r2.x, (neg)r2.x
-sam (f32)(zw)r8.y, r8.z, s#2, t#2
-(sy)(ss)mul.f r8.y, r8.w, r8.w
-mov.f32f32 r8.z, r9.x
-mad.f32 r8.y, r9.x, r9.x, r8.y
-mov.f32f32 r8.w, r8.w
-add.f r9.x, r0.z, (neg)c6.x
-mov.f32f32 r9.y, r2.x
-add.f r8.y, c7.z, (neg)r8.y
-mov.f32f32 r9.z, r8.z
-mov.f32f32 r9.w, r8.w
-mul.f r10.x, r9.x, r9.x
-mov.f32f32 r8.y, r8.y
-bary.f (ei)r0.x, 1, r0.x
-rcp r0.y, r9.y
-(ss)mov.f32f32 r9.y, r2.x
-rcp r2.w, r2.w
-mov.f32f32 r6.w, r6.w
-absneg.f r10.y, (neg)c0.z
-add.f r10.z, r0.x, (neg)c6.y
+rcp r6.w, r1.x
+nop
+sqrt r4.y, r4.y
+(ss)mov.f32f32 r7.x, r4.y
+add.f r7.y, r0.z, (neg)c6.x
+absneg.f r4.x, (neg)r4.x
+rcp r7.z, r1.x
+nop
+sqrt r3.z, r3.z
mov.f32f32 r7.x, r7.x
-sqrt r8.y, r8.y
-(ss)mov.f32f32 r8.y, r8.y
-mov.f32f32 r10.y, r10.y
-mov.f32f32 r1.y, r1.y
-mul.f r4.y, r4.y, c10.x
-mov.f32f32 r8.y, r8.y
-mad.f32 r10.x, r10.z, r10.z, r10.x
-rcp r9.y, r9.y
-mad.f32 r6.w, c8.z, r10.y, (neg)r6.w
+mov.f32f32 r7.w, r5.z
+mul.f r8.x, r7.y, r7.y
+bary.f (ei)r0.x, 1, r0.x
+(ss)mad.f32 r0.y, c8.z, r1.y, r3.z
+add.f r2.z, c7.z, (neg)r2.z
+(ss)mad.f32 r3.z, c8.z, r4.z, (neg)r3.w
+add.f r3.w, r0.x, (neg)c6.y
+rcp r4.z, r4.x
+nop
+rcp r8.y, r4.x
+mov.f32f32 r8.z, r0.y
+mov.f32f32 r8.w, r2.z
+mad.f32 r8.x, r3.w, r3.w, r8.x
+add.f r9.x, r2.x, (neg)c6.z
+mul.f r9.y, r8.z, c10.y
+mul.f r8.z, r8.z, c10.x
+absneg.f r9.z, (neg)c0.x
+mad.f32 r8.x, r9.x, r9.x, r8.x
+mad.f32 r1.y, c8.z, r1.y, (neg)r9.y
cmps.f.lt r2.y, r2.y, c8.y
-mov.f32f32 r10.y, r8.y
-mov.f32f32 r10.x, r10.x
-add.f r10.w, r3.x, (neg)c6.z
+mad.f32 r8.z, c8.z, r9.z, (neg)r8.z
+sqrt r8.w, r8.w
+(ss)mad.f32 r8.w, c8.z, r3.x, r8.w
+mov.f32f32 r9.y, c8.y
cov.u32f32 r2.y, r2.y
-mad.f32 r1.y, c8.z, r1.y, (neg)r7.x
-mov.f32f32 r4.y, r4.y
-mad.f32 r7.x, r10.w, r10.w, r10.x
+rsq r8.x, r8.x
+(ss)mov.f32f32 r9.z, r8.x
+mov.f32f32 r9.w, r8.w
+sel.b32 r1.z, r9.y, r1.z, r3.z
+mul.f r3.y, r3.y, c10.x
+mul.f r3.z, r7.y, r9.z
cmps.f.ne r2.y, r2.y, c8.y
-mov.f32f32 r10.x, c8.y
-mov.f32f32 r11.x, c8.y
-absneg.f r11.y, (neg)c0.x
-mov.f32f32 r4.z, r4.z
-mul.f r4.w, r4.w, c8.x
-rsq r7.x, r7.x
-(ss)mov.f32f32 r7.x, r7.x
-sel.b32 r6.w, r10.x, r2.y, r6.w
-sel.b32 r1.y, r11.x, r2.y, r1.y
-mov.f32f32 r10.x, r11.y
-mul.f r9.x, r9.x, r7.x
-absneg.f r6.w, (neg)r6.w
+mov.f32f32 r7.y, c8.y
+mov.f32f32 r9.y, c8.y
+mov.f32f32 r10.x, r3.z
+mul.f r3.z, r5.z, (neg)r3.z
+mul.f r3.w, r3.w, r9.z
+sel.b32 r1.y, r7.y, r2.y, r1.y
+mul.f r5.z, r7.w, r10.x
+mul.f r7.y, r7.w, r10.x
+mov.f32f32 r9.z, r3.w
+mad.f32 r3.z, r4.y, (neg)r3.w, r3.z
+mul.f r3.w, r9.x, r8.x
absneg.f r1.y, (neg)r1.y
+mad.f32 r4.y, r7.x, r9.z, r5.z
+mov.f32f32 r5.z, r5.w
+(ss)mov.f32f32 r8.x, r3.w
+mad.f32 r7.y, r7.x, r9.z, r7.y
+mad.f32 r3.z, r5.w, (neg)r3.w, r3.z
+sel.b32 r3.w, r9.y, r2.y, r8.z
+mad.f32 r4.y, r5.z, r8.x, r4.y
+mad.f32 r5.w, r5.z, r8.x, r7.y
+add.f r3.z, c7.z, (neg)r3.z
+rcp r7.y, r1.y
+nop
+rcp r8.z, r1.y
+mul.f r9.x, r4.y, r4.y
+mul.f r9.y, r5.w, r7.w
+mul.f r10.y, r5.w, r5.z
+mul.f r5.w, r5.w, r7.x
+add.f r9.x, c7.z, (neg)r9.x
+mul.f r9.y, c7.w, r9.y
+mul.f r10.y, c7.w, r10.y
+mul.f r5.w, c7.w, r5.w
+mul.f r9.x, r9.x, c8.x
+add.f r9.y, r10.x, (neg)r9.y
+add.f r10.y, r8.x, (neg)r10.y
+add.f r5.w, r9.z, (neg)r5.w
+add.f r9.x, c7.z, (neg)r9.x
+mov.f32f32 r10.z, r9.y
+add.f r10.w, r0.z, (neg)c1.x
+mov.f32f32 r11.x, r10.y
+mov.f32f32 r11.y, r9.x
+mov.f32f32 r11.z, r10.z
+mul.f r11.w, r10.w, r10.z
+add.f r12.x, r0.x, (neg)c1.y
+mov.f32f32 r12.y, r10.z
+mul.f r12.z, c0.x, r11.z
+mov.f32f32 r12.w, r5.w
+sqrt r11.y, r11.y
+(ss)mad.f32 r4.y, c8.z, r4.y, r11.y
+mad.f32 r5.w, r12.x, r5.w, r11.w
+(ss)add.f r11.y, r2.x, (neg)c1.z
+rcp r11.w, r11.z
+absneg.f r13.x, (neg)r0.z
+mov.f32f32 r13.y, r4.y
+rcp r13.z, r12.y
+absneg.f r13.w, (neg)r0.z
+mad.f32 r5.w, r11.y, r10.y, r5.w
+add.f r10.y, c7.z, r13.x
+mul.f r7.x, r13.y, r7.x
+add.f r14.x, c7.z, r13.w
+mad.f32 r7.x, c8.z, r9.z, (neg)r7.x
+cmps.f.lt r9.x, r9.x, c8.y
+mul.f r5.w, c7.w, r5.w
+(ss)mul.f r9.z, r10.y, r11.w
+rcp r10.y, r10.z
+add.f r11.w, c10.w, r13.x
+cov.u32f32 r9.x, r9.x
+mov.f32f32 r13.x, r5.w
+mul.f r14.y, r10.z, r10.z
+(ss)mul.f r10.y, r11.w, r10.y
+cmps.f.ne r9.x, r9.x, c8.y
+mov.f32f32 r11.w, c8.y
+mad.f32 r14.y, r12.w, r12.w, r14.y
+max.f r9.z, r10.y, r9.z
+mad.f32 r10.y, r11.x, r11.x, r14.y
+sel.b32 r7.x, r11.w, r9.x, r7.x
+mul.f r4.y, r4.y, r7.w
+mov.f32f32 r7.w, r12.w
mad.f32 r4.y, c8.z, r10.x, (neg)r4.y
-mov.f32f32 r9.x, r9.x
-mov.f32f32 r10.x, r6.w
-mov.f32f32 r11.x, r6.w
-mov.f32f32 r11.y, r1.y
-mul.f r11.z, r8.w, r9.x
-mul.f r10.z, r10.z, r7.x
-mul.f r11.w, r8.w, r9.x
-mul.f r12.x, r8.w, (neg)r9.x
-rcp r10.x, r10.x
-mov.f32f32 r12.y, r1.y
-mov.f32f32 r10.z, r10.z
-rcp r11.x, r11.x
+mov.f32f32 r10.x, r7.x
+mov.f32f32 r11.w, c8.y
+mul.f r14.y, c9.y, r10.y
+mul.f r10.w, r10.w, r10.w
+mov.f32f32 r14.z, r7.x
+sel.b32 r4.y, r11.w, r9.x, r4.y
+mad.f32 r10.w, r12.x, r12.x, r10.w
+rcp r11.w, r10.x
+absneg.f r12.x, (neg)r0.x
+add.f r14.w, r0.z, (neg)c1.x
+mad.f32 r10.w, r11.y, r11.y, r10.w
+mov.f32f32 r11.y, c2.x
+add.f r15.x, c10.w, r12.x
+mul.f r15.y, r14.w, r4.y
+add.f r15.z, r0.x, (neg)c1.y
+mul.f r11.y, r11.y, c2.x
+(ss)mul.f r11.w, r15.x, r11.w
+rcp r15.x, r10.x
+add.f r12.x, c7.w, r12.x
+mad.f32 r15.y, r15.z, r7.x, r15.y
+mul.f r5.z, r13.y, r5.z
+add.f r10.w, r10.w, (neg)r11.y
+(ss)mul.f r11.y, r12.x, r15.x
+mad.f32 r5.z, c8.z, r8.x, (neg)r5.z
+mov.f32f32 r8.x, c8.y
+mul.f r10.w, r14.y, r10.w
+max.f r11.y, r11.w, r11.y
+mov.f32f32 r11.w, r4.y
+sel.b32 r5.z, r8.x, r9.x, r5.z
+add.f r8.x, r2.x, (neg)c1.z
+mad.f32 r9.x, r13.x, r13.x, (neg)r10.w
+rcp r10.w, r7.w
nop
-rcp r11.y, r11.y
-mov.f32f32 r12.z, c8.y
-mov.f32f32 r12.w, r1.y
-mad.f32 r11.z, r8.y, r10.z, r11.z
-mad.f32 r11.w, r8.y, r10.z, r11.w
-mad.f32 r12.x, r8.y, (neg)r10.z, r12.x
-rcp r12.y, r12.y
-sel.b32 r2.y, r12.z, r2.y, r4.y
-mov.f32f32 r4.y, r11.z
-mul.f r7.x, r10.w, r7.x
-mov.f32f32 r10.w, r11.w
-mov.f32f32 r11.z, r12.x
-absneg.f r2.y, (neg)r2.y
-mov.f32f32 r7.x, r7.x
-rcp r11.w, r12.w
-add.f r4.z, c7.z, (neg)r4.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r12.x, r2.y
-mad.f32 r4.y, r8.z, r7.x, r4.y
-mad.f32 r10.w, r8.z, r7.x, r10.w
-mad.f32 r11.z, r8.z, (neg)r7.x, r11.z
-mov.f32f32 r12.z, r2.y
-(ss)mul.f r12.w, r4.y, r4.y
-mul.f r8.w, r10.w, r8.w
-mul.f r8.z, r10.w, r8.z
-mul.f r8.y, r10.w, r8.y
-add.f r10.w, c7.z, (neg)r12.w
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r10.w, r10.w
-mul.f r8.w, c7.w, r8.w
-mul.f r8.z, c7.w, r8.z
-mul.f r8.y, c7.w, r8.y
-mul.f r10.w, r10.w, c8.x
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r10.w, r10.w
-add.f r8.w, r9.x, (neg)r8.w
-add.f r8.z, r7.x, (neg)r8.z
-add.f r8.y, r10.z, (neg)r8.y
-add.f r10.w, c7.z, (neg)r10.w
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r8.z, r8.z
-mov.f32f32 r8.y, r8.y
-mov.f32f32 r10.w, r10.w
-mul.f r12.w, r8.w, r8.w
-mov.f32f32 r13.x, r8.w
-mad.f32 r12.w, r8.y, r8.y, r12.w
-mov.f32f32 r13.y, r10.w
-mov.f32f32 r13.z, r8.w
-mov.f32f32 r13.w, r13.x
-mov.f32f32 r12.w, r12.w
-mov.f32f32 r14.x, r13.x
-mov.f32f32 r14.y, r13.z
-mad.f32 r12.w, r8.z, r8.z, r12.w
-sqrt r13.y, r13.y
-(ss)mov.f32f32 r13.y, r13.y
-mov.f32f32 r4.y, r4.y
-rcp r13.w, r13.w
-absneg.f r14.z, (neg)r0.z
-mul.f r14.w, c9.y, r12.w
-add.f r15.x, r0.z, (neg)c1.x
-mad.f32 r4.y, c8.z, r4.y, r13.y
-add.f r13.y, c10.w, r14.z
-rcp r14.y, r14.y
-absneg.f r15.y, (neg)r0.z
-mul.f r15.z, r15.x, r15.x
-mov.f32f32 r4.y, r4.y
-add.f r15.w, r0.x, (neg)c1.y
-(ss)mul.f r13.y, r13.y, r13.w
-(ss)add.f r13.w, c10.w, r15.y
-mul.f r9.w, r4.y, r9.w
-mad.f32 r15.z, r15.w, r15.w, r15.z
-mov.f32f32 r13.y, r13.y
-mov.f32f32 r16.x, r13.x
-mov.f32f32 r9.w, r9.w
-mov.f32f32 r9.x, r9.x
-mov.f32f32 r15.z, r15.z
-add.f r16.y, r3.x, (neg)c1.z
-mul.f r13.w, r13.w, r14.y
-mad.f32 r9.x, c8.z, r9.x, (neg)r9.w
-cmps.f.lt r9.w, r10.w, c8.y
-mad.f32 r10.w, r16.y, r16.y, r15.z
-rcp r14.y, r16.x
-add.f r14.z, c7.z, r14.z
-mov.f32f32 r13.w, r13.w
-cov.u32f32 r9.w, r9.w
-mov.f32f32 r10.w, r10.w
-mov.f32f32 r15.z, c2.x
-mov.f32f32 r14.z, r14.z
-cmps.f.ne r9.w, r9.w, c8.y
-(ss)mov.f32f32 r16.x, c8.y
-mul.f r15.z, r15.z, c2.x
-(ss)mul.f r14.y, r14.z, r14.y
-mov.f32f32 r14.z, r13.z
-sel.b32 r9.x, r16.x, r9.w, r9.x
-add.f r10.w, r10.w, (neg)r15.z
-mov.f32f32 r14.y, r14.y
-mul.f r9.z, r4.y, r9.z
-mul.f r15.z, r9.x, r9.x
-mul.f r4.y, r4.y, r10.y
-mov.f32f32 r10.y, r9.x
-mov.f32f32 r10.w, r10.w
-mov.f32f32 r16.x, r9.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r16.z, r10.y
-mul.f r10.w, r14.w, r10.w
-mov.f32f32 r14.w, r16.x
-mad.f32 r4.y, c8.z, r10.z, (neg)r4.y
-mov.f32f32 r10.z, c8.y
-mov.f32f32 r10.w, r10.w
-mul.f r15.x, r15.x, r8.w
-rcp r16.z, r16.z
-absneg.f r16.w, (neg)r0.z
-sel.b32 r4.y, r10.z, r9.w, r4.y
-mad.f32 r10.z, r15.w, r8.y, r15.x
-rcp r14.w, r14.w
+rcp r12.x, r14.z
+absneg.f r13.x, (neg)r0.x
+absneg.f r13.y, (neg)r0.x
+rcp r14.y, r11.w
absneg.f r15.x, (neg)r0.z
-add.f r15.w, c10.w, r16.w
-mad.f32 r15.z, r4.y, r4.y, r15.z
-mov.f32f32 r10.z, r10.z
-add.f r17.x, c10.w, r15.x
-(ss)mul.f r15.w, r15.w, r16.z
-mov.f32f32 r15.z, r15.z
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r15.w, r15.w
-(ss)mov.f32f32 r16.z, r10.y
-mad.f32 r10.z, r16.y, r8.z, r10.z
-mad.f32 r7.x, c8.z, r7.x, (neg)r9.z
-mov.f32f32 r9.z, c8.y
+mad.f32 r15.y, r8.x, r5.z, r15.y
+add.f r15.w, c10.w, r13.x
+add.f r16.x, c10.w, r13.y
+add.f r16.y, c7.z, r15.x
+mul.f r15.y, c7.w, r15.y
+sqrt r16.z, r9.x
+(ss)add.f r5.w, (neg)r5.w, (neg)r16.z
+mul.f r10.y, c7.w, r10.y
+mul.f r14.y, r16.y, r14.y
+rcp r16.y, r4.y
+add.f r15.x, c10.w, r15.x
+mov.f32f32 r16.z, r15.y
+mul.f r16.w, r4.y, r4.y
+mul.f r10.w, r15.w, r10.w
+(ss)mul.f r15.x, r15.x, r16.y
+mad.f32 r15.w, r7.x, r7.x, r16.w
+rcp r10.y, r10.y
+(ss)mul.f r5.w, r5.w, r10.y
+(ss)mad.f32 r10.y, r5.z, r5.z, r15.w
+max.f r14.y, r15.x, r14.y
+rcp r15.x, r7.w
+mul.f r12.x, r16.x, r12.x
+rcp r14.z, r14.z
+cmps.f.lt r15.w, c8.y, r5.w
+mul.f r16.x, c9.y, r10.y
+min.f r11.y, r14.y, r11.y
+mov.f32f32 r14.y, r5.z
+mul.f r14.w, r14.w, r14.w
+cov.u32f32 r15.w, r15.w
+mad.f32 r14.w, r15.z, r15.z, r14.w
+add.f r13.x, c7.w, r13.x
+add.f r13.y, c7.w, r13.y
+mad.f32 r8.x, r8.x, r8.x, r14.w
+rcp r14.w, r14.y
+absneg.f r15.z, (neg)r2.x
+mov.f32f32 r16.y, c2.x
+cmps.f.ne r15.w, r15.w, c8.y
+mov.f32f32 r16.w, c8.y
+add.f r17.x, c7.z, r15.z
+mul.f r16.y, r16.y, c2.x
+mov.f32f32 r17.y, c7.z
+(ss)mul.f r13.x, r13.x, r15.x
mul.f r14.w, r17.x, r14.w
-mul.f r10.z, c7.w, r10.z
-max.f r13.y, r13.y, r14.y
-sel.b32 r7.x, r9.z, r9.w, r7.x
-rcp r9.z, r16.z
-add.f r9.w, c7.z, r16.w
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r14.y, r14.w
-mad.f32 r14.w, r7.x, r7.x, r15.z
-mov.f32f32 r9.w, r9.w
-mad.f32 r10.w, r10.z, r10.z, (neg)r10.w
-mov.f32f32 r15.z, r16.x
-mul.f r16.y, c9.y, r14.w
-(ss)add.f r16.z, r0.z, (neg)c1.x
-(ss)mul.f r9.z, r9.w, r9.z
-mov.f32f32 r9.w, r10.w
-mov.f32f32 r10.w, r13.y
-mul.f r13.y, r16.z, r16.z
-add.f r16.w, r0.x, (neg)c1.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r17.x, r9.w
-rcp r15.z, r15.z
-add.f r15.x, c7.z, r15.x
-mad.f32 r13.y, r16.w, r16.w, r13.y
-max.f r9.z, r15.w, r9.z
-mov.f32f32 r15.w, r8.y
-mov.f32f32 r15.x, r15.x
-mov.f32f32 r13.y, r13.y
-add.f r17.y, r3.x, (neg)c1.z
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r17.z, r4.y
-sqrt r17.x, r17.x
-(ss)mov.f32f32 r17.x, r17.x
-mad.f32 r13.y, r17.y, r17.y, r13.y
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r17.w, r17.z
-mul.f r15.x, r15.x, r15.z
-mov.f32f32 r13.y, r13.y
-mov.f32f32 r15.z, c2.x
-add.f r10.z, (neg)r10.z, (neg)r17.x
-mov.f32f32 r15.x, r15.x
-mov.f32f32 r17.x, r15.w
-mul.f r15.z, r15.z, c2.x
-rcp r17.w, r17.w
-absneg.f r18.x, (neg)r0.x
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r12.w, r12.w
-add.f r13.y, r13.y, (neg)r15.z
-add.f r15.z, c10.w, r18.x
-max.f r14.y, r14.y, r15.x
-mul.f r12.w, c7.w, r12.w
-mov.f32f32 r13.y, r13.y
-(ss)mul.f r15.x, r15.z, r17.w
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r15.z, r4.y
-mul.f r13.y, r16.y, r13.y
-mov.f32f32 r15.x, r15.x
-mov.f32f32 r16.y, r17.z
-mov.f32f32 r12.w, r12.w
-mov.f32f32 r13.y, r13.y
-mul.f r16.z, r16.z, r9.x
-(ss)mov.f32f32 r17.w, r15.z
-mad.f32 r16.z, r16.w, r4.y, r16.z
-rcp r16.w, r17.x
-(ss)absneg.f r17.x, (neg)r0.x
-rcp r16.y, r16.y
-add.f r18.x, c7.w, r18.x
-rcp r12.w, r12.w
-(ss)mov.f32f32 r12.w, r12.w
-mov.f32f32 r16.z, r16.z
-add.f r18.y, c10.w, r17.x
-mad.f32 r16.z, r17.y, r7.x, r16.z
-mov.f32f32 r17.y, r18.x
-mul.f r10.z, r10.z, r12.w
-rcp r12.w, r17.w
-(ss)absneg.f r17.w, (neg)r0.x
-mul.f r16.z, c7.w, r16.z
-mul.f r16.y, r17.y, r16.y
-mov.f32f32 r10.z, r10.z
-add.f r17.y, c10.w, r17.w
-mov.f32f32 r16.z, r16.z
-mov.f32f32 r16.y, r16.y
-cmps.f.lt r18.x, c8.y, r10.z
-(ss)mul.f r12.w, r17.y, r12.w
-mad.f32 r13.y, r16.z, r16.z, (neg)r13.y
-max.f r15.x, r15.x, r16.y
-cov.u32f32 r16.y, r18.x
-mov.f32f32 r12.w, r12.w
-mov.f32f32 r13.y, r13.y
-mov.f32f32 r15.x, r15.x
+rcp r15.x, r5.z
+add.f r15.z, c10.w, r15.z
+add.f r8.x, r8.x, (neg)r16.y
+sel.b32 r16.y, r16.w, r15.w, r17.y
+mov.f32f32 r9.x, r9.x
+(ss)mul.f r15.x, r15.z, r15.x
+mul.f r8.x, r16.x, r8.x
+max.f r10.w, r10.w, r13.x
+mul.f r13.x, r13.y, r14.z
+max.f r13.y, r15.x, r14.w
+mad.f32 r8.x, r16.z, r16.z, (neg)r8.x
+(ss)cmps.f.lt r14.z, c8.y, r9.x
+min.f r9.z, r9.z, r10.w
+min.f r10.w, r11.y, r13.y
+mov.f32f32 r11.y, r11.x
+max.f r12.x, r12.x, r13.x
+mov.f32f32 r13.x, r4.y
+mov.f32f32 r13.y, r10.w
+sqrt r14.w, r8.x
+(ss)add.f r14.w, (neg)r15.y, (neg)r14.w
+mul.f r10.y, c7.w, r10.y
+cov.u32f32 r14.z, r14.z
+mad.f32 r15.x, r11.w, r13.y, r0.z
+rcp r15.y, r11.y
+absneg.f r15.z, (neg)r2.x
+rcp r16.x, r13.x
+absneg.f r16.z, (neg)r0.z
+cmps.f.ne r14.z, r14.z, c8.y
+mov.f32f32 r16.w, r15.x
+rcp r10.y, r10.y
+(ss)mul.f r10.y, r14.w, r10.y
+add.f r14.w, c7.z, r15.z
+sel.b32 r16.y, r16.y, r14.z, r17.y
+mov.f32f32 r17.x, r16.w
+cmps.f.lt r17.y, c8.y, r10.y
+mul.f r14.w, r14.w, r15.y
+add.f r15.y, c7.z, r16.z
+add.f r17.x, c7.z, (neg)r17.x
+cov.u32f32 r17.y, r17.y
cmps.f.ne r16.y, r16.y, c8.y
-mov.f32f32 r17.y, c8.y
-mov.f32f32 r18.x, r13.y
-min.f r9.z, r9.z, r15.x
-mov.f32f32 r15.x, r7.x
-mov.f32f32 r18.z, c7.z
-mov.f32f32 r18.w, r15.z
-mul.f r16.w, r18.y, r16.w
-mov.f32f32 r18.y, r15.x
-sqrt r18.x, r18.x
-(ss)mov.f32f32 r18.x, r18.x
-mov.f32f32 r16.z, r16.z
-sel.b32 r17.y, r17.y, r16.y, r18.z
-cmps.f.lt r19.x, c8.y, r9.w
-rcp r18.w, r18.w
-add.f r17.w, c7.w, r17.w
-add.f r16.z, (neg)r16.z, (neg)r18.x
-rcp r18.x, r18.y
-(ss)absneg.f r18.y, (neg)r3.x
-cov.u32f32 r19.x, r19.x
-mov.f32f32 r17.w, r17.w
-mov.f32f32 r16.z, r16.z
-mov.f32f32 r14.w, r14.w
-add.f r19.y, c10.w, r18.y
-cmps.f.ne r19.x, r19.x, c8.y
-(ss)mul.f r17.w, r17.w, r18.w
-mul.f r14.w, c7.w, r14.w
-mul.f r18.x, r19.y, r18.x
-sel.b32 r17.y, r17.y, r19.x, r18.z
-mov.f32f32 r17.w, r17.w
-mov.f32f32 r14.w, r14.w
-mov.f32f32 r18.x, r18.x
-mov.f32f32 r18.z, r15.x
+mov.f32f32 r5.w, r5.w
+mul.f r6.x, r17.x, r6.x
+add.f r17.x, c10.w, (neg)r16.w
cmps.f.ne r17.y, r17.y, c8.y
-mov.f32f32 r18.w, r10.z
-max.f r12.w, r12.w, r17.w
-mov.f32f32 r16.w, r16.w
-rcp r14.w, r14.w
-(ss)mov.f32f32 r14.w, r14.w
-mov.f32f32 r17.w, r18.w
-rcp r18.z, r18.z
-add.f r18.y, c7.z, r18.y
-mov.f32f32 r12.w, r12.w
-mul.f r14.w, r16.z, r14.w
-mov.f32f32 r16.z, r17.w
-mov.f32f32 r17.w, r18.y
-min.f r12.w, r14.y, r12.w
-mov.f32f32 r14.y, r14.w
-sel.b32 r10.z, r16.z, r16.y, r10.z
-(ss)mul.f r14.w, r17.w, r18.z
-mov.f32f32 r16.y, r7.x
-cmps.f.lt r16.z, c8.y, r14.y
-sel.b32 r9.w, r10.z, r19.x, r9.w
-mov.f32f32 r10.z, c9.z
-mov.f32f32 r14.w, r14.w
-cov.u32f32 r16.z, r16.z
-mov.f32f32 r17.w, r16.y
-sel.b32 r9.w, r10.z, r17.y, r9.w
-max.f r10.z, r18.x, r14.w
-cmps.f.ne r14.w, r16.z, c8.y
-mov.f32f32 r16.z, c8.y
-mov.f32f32 r17.y, c7.z
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r18.x, r9.w
-mov.f32f32 r8.w, r8.w
-sel.b32 r16.z, r16.z, r14.w, r17.y
-cmps.f.lt r18.y, c8.y, r13.y
-min.f r9.z, r9.z, r10.z
-mad.f32 r8.w, r8.w, r18.x, r0.z
-rcp r10.z, r17.w
-(ss)absneg.f r17.w, (neg)r3.x
-cov.u32f32 r18.x, r18.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r18.y, r10.y
-absneg.f r18.z, (abs)r8.w
-cmps.f.ne r18.x, r18.x, c8.y
-mov.f32f32 r18.w, c7.z
-mad.f32 r18.y, r18.y, r9.z, r0.z
-add.f r19.x, c10.w, r17.w
-sel.b32 r16.z, r16.z, r18.x, r17.y
-add.f r17.y, r18.w, c2.x
-mov.f32f32 r18.y, r18.y
-(ss)mul.f r10.z, r19.x, r10.z
-cmps.f.ne r16.z, r16.z, c8.y
-mov.f32f32 r18.w, r14.y
-mov.f32f32 r19.x, r18.y
-add.f r18.z, r17.y, (neg)r18.z
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r18.w, r18.w
-mov.f32f32 r19.y, r19.x
-mov.f32f32 r18.z, r18.z
-mov.f32f32 r19.z, c2.x
-mov.f32f32 r18.w, r18.w
-add.f r19.y, c10.w, (neg)r19.y
-mov.f32f32 r19.w, r16.y
-mov.f32f32 r20.x, r18.y
-sel.b32 r14.y, r18.w, r14.w, r14.y
-mul.f r7.y, r19.y, r7.y
-rcp r14.w, r19.z
-(ss)mul.f r18.z, r18.z, r14.w
-add.f r18.w, r20.x, (neg)c1.x
-sel.b32 r13.y, r14.y, r18.x, r13.y
-mov.f32f32 r14.y, c9.z
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r18.x, r19.x
-mov.f32f32 r18.z, r18.z
-sel.b32 r13.y, r14.y, r16.z, r13.y
-rcp r14.y, r19.w
-add.f r16.z, c7.z, r17.w
-add.f r17.w, c7.z, (neg)r18.x
-mul.f r18.x, r18.w, r18.w
-mov.f32f32 r18.w, r13.y
-mov.f32f32 r9.x, r9.x
-mul.f r8.x, r17.w, r8.x
-log2 r17.w, r18.z
-(ss)mul.f r17.w, c9.x, r17.w
-mov.f32f32 r16.z, r16.z
-mad.f32 r9.x, r9.x, r18.w, r0.z
+mov.f32f32 r17.z, c8.y
+mov.f32f32 r17.w, c7.z
+mul.f r6.y, r17.x, r6.y
+sel.b32 r5.w, r5.w, r15.w, r5.w
+rcp r15.w, r11.x
+mul.f r15.y, r15.y, r16.x
+sel.b32 r16.x, r17.z, r17.y, r17.w
+max.f r17.x, r6.y, r6.x
+mad.f32 r10.w, r7.x, r10.w, r0.x
mov.f32f32 r8.x, r8.x
-mov.f32f32 r17.w, r17.w
-mul.f r14.y, r16.z, r14.y
-absneg.f r16.z, (abs)r9.x
-(ss)mov.f32f32 r18.z, c7.z
-max.f r18.w, r7.y, r8.x
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r19.x, r13.y
-add.f r18.z, r18.z, c2.x
-mov.f32f32 r18.w, r18.w
-mov.f32f32 r19.y, r17.z
-exp2 r17.w, r17.w
+sel.b32 r5.w, r5.w, r14.z, r9.x
+mov.f32f32 r9.x, c9.z
+mov.f32f32 r14.z, r10.w
+cmps.f.lt r17.z, c8.y, r8.x
+add.f r15.z, c10.w, r15.z
+rcp r18.x, r4.y
+sel.b32 r5.w, r9.x, r16.y, r5.w
+mov.f32f32 r9.x, r14.z
+cov.u32f32 r16.y, r17.z
+(ss)mul.f r15.z, r15.z, r15.w
+add.f r15.w, c10.w, r16.z
+add.f r16.z, c7.w, (neg)r9.x
+cmps.f.ne r16.y, r16.y, c8.y
+mad.f32 r9.y, r9.y, r5.w, r0.z
+max.f r14.w, r15.z, r14.w
+mul.f r5.x, r16.z, r5.x
+add.f r15.z, c10.w, (neg)r14.z
+sel.b32 r16.x, r16.x, r16.y, r17.w
+absneg.f r16.z, (abs)r9.y
+mov.f32f32 r17.z, c7.z
+mul.f r5.y, r15.z, r5.y
+cmps.f.ne r15.z, r16.x, c8.y
+mov.f32f32 r10.y, r10.y
+add.f r16.x, r17.z, c2.x
+max.f r17.z, r5.y, r5.x
+min.f r9.z, r9.z, r14.w
+mul.f r14.w, r15.w, r18.x
+sel.b32 r10.y, r10.y, r17.y, r10.y
+min.f r15.w, r17.x, r17.z
+mad.f32 r13.y, r14.y, r13.y, r2.x
+add.f r16.z, r16.x, (neg)r16.z
+sel.b32 r8.x, r10.y, r16.y, r8.x
+mov.f32f32 r10.y, c2.x
+mov.f32f32 r16.y, r13.y
+mov.f32f32 r17.x, c9.z
+mov.f32f32 r17.y, r9.z
+max.f r14.w, r14.w, r15.y
+mov.f32f32 r15.y, r16.y
+sel.b32 r8.x, r17.x, r15.z, r8.x
+rcp r10.y, r10.y
+(ss)mul.f r15.z, r16.z, r10.y
+mad.f32 r16.z, r11.z, r17.y, r0.z
+add.f r15.y, c7.z, (neg)r15.y
+absneg.f r1.z, (neg)r1.z
+(ss)mad.f32 r4.y, r4.y, r8.x, r0.z
+mov.f32f32 r17.x, r16.z
+min.f r12.x, r14.w, r12.x
+mov.f32f32 r14.w, r5.z
+absneg.f r17.z, (abs)r4.y
+log2 r15.z, r15.z
+mov.f32f32 r17.w, c7.z
+rcp r18.x, r1.z
+(ss)mul.f r15.y, r15.y, r18.x
+add.f r18.x, c10.w, (neg)r16.y
+rcp r18.y, r1.z
+(ss)mul.f r15.z, c9.x, r15.z
+add.f r17.w, r17.w, c2.x
+mov.f32f32 r18.z, r17.x
+(ss)mul.f r18.x, r18.x, r18.y
+rcp r18.y, r14.w
+absneg.f r18.w, (neg)r2.x
+add.f r17.z, r17.w, (neg)r17.z
+mov.f32f32 r19.x, c2.x
+max.f r19.y, r18.x, r15.y
+exp2 r15.z, r15.z
nop
-(ss)rcp r17.w, r17.w
-add.f r16.z, r18.z, (neg)r16.z
-(ss)mov.f32f32 r17.w, r17.w
-mad.f32 r19.y, r19.y, r9.z, r0.x
-max.f r10.z, r10.z, r14.y
-mov.f32f32 r14.y, r16.z
-mov.f32f32 r16.z, c2.x
-mov.f32f32 r19.y, r19.y
-mul.f r17.w, c9.w, r17.w
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r19.z, r4.y
-mov.f32f32 r19.w, r19.y
-mov.f32f32 r17.w, r17.w
-rcp r16.z, r16.z
-(ss)mul.f r14.y, r14.y, r16.z
-min.f r10.z, r12.w, r10.z
-mov.f32f32 r12.w, r19.w
-add.f r17.w, c7.z, (neg)r17.w
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r16.x, r16.x
-add.f r12.w, c10.w, (neg)r12.w
-mov.f32f32 r17.w, r17.w
-mad.f32 r19.x, r19.z, r19.x, r0.x
-mad.f32 r16.x, r16.x, r10.z, r0.z
-log2 r14.y, r14.y
-(ss)mul.f r14.y, c9.x, r14.y
-mul.f r5.y, r12.w, r5.y
-mul.f r12.w, c7.x, r17.w
-mul.f r19.z, c7.x, r17.w
-mov.f32f32 r14.y, r14.y
+(ss)rcp r15.z, r15.z
+(ss)mul.f r15.z, c9.w, r15.z
+add.f r18.z, c7.z, (neg)r18.z
+min.f r15.w, r15.w, r19.y
+absneg.f r3.w, (neg)r3.w
+add.f r19.y, c7.z, r18.w
+rcp r19.x, r19.x
+(ss)mul.f r17.z, r17.z, r19.x
+mov.f32f32 r19.z, r15.w
+mov.f32f32 r6.x, r6.x
+mov.f32f32 r6.y, r6.y
+add.f r15.z, c7.z, (neg)r15.z
+rcp r19.w, r3.w
+mul.f r18.y, r19.y, r18.y
+(ss)mul.f r18.z, r18.z, r19.w
+min.f r6.x, r6.y, r6.x
+mov.f32f32 r5.x, r5.x
mov.f32f32 r5.y, r5.y
-mov.f32f32 r20.x, r19.w
-mov.f32f32 r12.w, r12.w
-mov.f32f32 r19.z, r19.z
-mul.f r17.w, c7.x, r17.w
-add.f r20.x, c7.w, (neg)r20.x
-exp2 r14.y, r14.y
+log2 r6.y, r17.z
+(ss)mul.f r6.y, c9.x, r6.y
+(ss)mov.f32f32 r17.z, r15.z
+add.f r19.y, c10.w, (neg)r17.x
+min.f r5.x, r5.y, r5.x
+rcp r5.y, r3.w
nop
-(ss)rcp r14.y, r14.y
-(ss)mov.f32f32 r14.y, r14.y
-mov.f32f32 r20.y, r9.w
-mov.f32f32 r20.z, r8.z
-mov.f32f32 r17.w, r17.w
-mul.f r14.y, c9.w, r14.y
-mul.f r6.z, r20.x, r6.z
-mad.f32 r20.x, r20.z, r20.y, r3.x
-mov.f32f32 r16.x, r16.x
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r6.z, r6.z
-absneg.f r20.y, (abs)r20.x
-mov.f32f32 r20.z, r16.x
-add.f r14.y, c7.z, (neg)r14.y
-max.f r20.w, r5.y, r6.z
-add.f r17.y, r17.y, (neg)r20.y
-add.f r20.y, c10.w, (neg)r20.z
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r20.z, r20.w
-mov.f32f32 r17.y, r17.y
-mul.f r9.y, r20.y, r9.y
-mul.f r20.y, c7.x, r14.y
-mul.f r20.w, c7.x, r14.y
-mul.f r14.y, c7.x, r14.y
-min.f r18.w, r18.w, r20.z
-mov.f32f32 r20.y, r20.y
-mov.f32f32 r20.z, r13.y
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r20.w, r20.w
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r21.x, r15.x
-mad.f32 r7.x, r7.x, r20.z, r3.x
-mul.f r17.y, r17.y, r14.w
-mov.f32f32 r9.y, r9.y
-mov.f32f32 r20.z, r16.x
-absneg.f r21.y, (abs)r7.x
-mad.f32 r9.z, r21.x, r9.z, r3.x
-mov.f32f32 r17.y, r17.y
-add.f r20.z, c7.z, (neg)r20.z
-add.f r18.z, r18.z, (neg)r21.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r21.x, r19.x
-mul.f r0.y, r20.z, r0.y
-mov.f32f32 r18.z, r18.z
-mov.f32f32 r20.z, r9.z
-log2 r17.y, r17.y
-(ss)mul.f r17.y, c9.x, r17.y
-mov.f32f32 r0.y, r0.y
-mul.f r18.z, r18.z, r16.z
-mov.f32f32 r21.y, r20.z
-mov.f32f32 r17.y, r17.y
-max.f r21.z, r9.y, r0.y
-mov.f32f32 r18.z, r18.z
-add.f r21.y, c10.w, (neg)r21.y
-add.f r4.w, c7.z, (neg)r4.w
-mov.f32f32 r21.z, r21.z
-mov.f32f32 r15.z, r15.z
-exp2 r17.y, r17.y
+rcp r19.w, r5.z
+mul.f r20.x, c7.x, r17.z
+mul.f r17.z, c7.x, r17.z
+max.f r5.x, r6.x, r5.x
+mov.f32f32 r6.x, r15.y
+mov.f32f32 r15.y, r18.x
+exp2 r6.y, r6.y
nop
-(ss)rcp r17.y, r17.y
-mul.f r5.w, r21.y, r5.w
-log2 r18.z, r18.z
-(ss)mul.f r18.z, c9.x, r18.z
-mov.f32f32 r17.y, r17.y
-mad.f32 r15.z, r15.z, r10.z, r0.x
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r18.z, r18.z
-mov.f32f32 r20.z, r20.z
-mul.f r17.y, c9.w, r17.y
-mov.f32f32 r15.z, r15.z
-mov.f32f32 r4.w, r4.w
-add.f r20.z, c7.z, (neg)r20.z
-mov.f32f32 r17.y, r17.y
-exp2 r18.z, r18.z
+(ss)rcp r6.y, r6.y
+(ss)mul.f r6.y, c9.w, r6.y
+mad.f32 r18.x, r11.x, r5.w, r2.x
+min.f r6.x, r15.y, r6.x
+mul.f r5.y, r19.y, r5.y
+add.f r15.y, c10.w, r18.w
+add.f r6.y, c7.z, (neg)r6.y
+max.f r5.x, r5.x, r6.x
+absneg.f r6.x, (abs)r18.x
+max.f r18.w, r5.y, r18.z
+mul.f r15.y, r15.y, r19.w
+add.f r5.x, r19.z, (neg)r5.x
+mov.f32f32 r19.y, r6.y
+mul.f r6.y, c7.x, r6.y
+add.f r6.x, r16.x, (neg)r6.x
+mad.f32 r5.x, c11.w, r5.x, c7.z
+mul.f r16.x, c7.x, r19.y
+mul.f r19.y, c7.x, r19.y
+mad.f32 r5.z, r5.z, r8.x, r2.x
+mul.f r6.x, r6.x, r10.y
+mad.f32 r9.z, r12.w, r9.z, r0.x
+max.f r15.y, r15.y, r18.y
+rcp r5.x, r5.x
+mad.f32 r9.x, r0.w, r15.w, r9.x
+absneg.f r15.w, (abs)r5.z
+mov.f32f32 r18.y, r9.z
+min.f r12.x, r12.x, r15.y
+add.f r9.x, r9.x, c11.z
+add.f r15.y, r17.w, (neg)r15.w
+log2 r6.x, r6.x
+(ss)mul.f r6.x, c9.x, r6.x
+mov.f32f32 r15.w, r18.y
+mul.f r9.x, c11.y, r9.x
+mul.f r15.y, r15.y, r19.x
+mov.f32f32 r17.w, r12.x
+add.f r19.z, c7.w, (neg)r15.w
+mul.f r5.x, r9.x, r5.x
+exp2 r6.x, r6.x
nop
-(ss)rcp r18.z, r18.z
-(ss)mov.f32f32 r18.z, r18.z
-mul.f r5.z, r20.z, r5.z
-add.f r17.y, c7.z, (neg)r17.y
-mov.f32f32 r20.z, r15.z
-mul.f r18.z, c9.w, r18.z
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r17.y, r17.y
-add.f r20.z, c10.w, (neg)r20.z
-mov.f32f32 r18.z, r18.z
-max.f r21.y, r5.w, r5.z
-mul.f r12.w, r12.w, r17.y
-mul.f r19.z, r19.z, r17.y
-add.f r18.z, c7.z, (neg)r18.z
-mov.f32f32 r21.y, r21.y
-mov.f32f32 r12.w, r12.w
-mov.f32f32 r19.z, r19.z
-mov.f32f32 r18.z, r18.z
+(ss)rcp r6.x, r6.x
+mad.f32 r9.x, r13.x, r17.w, r0.z
+(ss)mul.f r6.x, c9.w, r6.x
+log2 r13.x, r15.y
+(ss)mul.f r13.x, c9.x, r13.x
+mul.f r7.y, r19.z, r7.y
+(ss)mov.f32f32 r15.y, r9.x
+exp2 r5.x, r5.x
+(ss)add.f r5.x, c7.z, r5.x
+add.f r6.x, c7.z, (neg)r6.x
+add.f r19.z, c10.w, (neg)r18.y
+add.f r19.w, c10.w, (neg)r15.y
+exp2 r13.x, r13.x
+mad.f32 r20.y, r7.x, r8.x, r0.x
+(ss)rcp r13.x, r13.x
+(ss)mul.f r13.x, c9.w, r13.x
+mov.f32f32 r20.z, r6.x
+rcp r5.x, r5.x
+absneg.f r20.w, (abs)r16.y
+mul.f r8.z, r19.z, r8.z
+mul.f r8.y, r19.w, r8.y
+add.f r19.z, c7.z, (neg)r15.y
+cmps.f.lt r19.w, c11.x, r20.w
+add.f r13.x, c7.z, (neg)r13.x
+mul.f r20.x, r20.x, r20.z
+mul.f r17.z, r17.z, r20.z
+cov.u32f32 r19.w, r19.w
+mov.f32f32 r20.z, r13.x
+mul.f r6.y, r6.y, r13.x
+mad.f32 r13.x, r12.w, r5.w, r0.x
+cmps.f.ne r19.w, r19.w, c8.y
+mov.f32f32 r20.w, c8.y
+mov.f32f32 r21.x, c10.x
+mul.f r16.x, r16.x, r20.z
+mul.f r19.y, r19.y, r20.z
+add.f r20.z, r20.y, c7.z
+sel.b32 r20.w, r20.w, r19.w, r21.x
+absneg.f r15.x, (abs)r15.x
+add.f r21.x, r13.x, c7.z
+add.f r20.z, r20.z, c2.x
+max.f r21.y, r8.z, r7.y
+cmps.f.lt r15.x, c11.x, r15.x
+add.f r21.x, r21.x, c2.x
+mul.f r20.z, r20.z, r19.x
min.f r18.w, r18.w, r21.y
-mov.f32f32 r21.y, r9.w
-mov.f32f32 r21.w, r8.y
-mul.f r20.y, r20.y, r18.z
-mul.f r20.w, r20.w, r18.z
-mul.f r14.y, r14.y, r18.z
-mov.f32f32 r18.z, r18.w
-mov.f32f32 r18.w, r20.y
-add.f r20.y, r19.x, c7.z
-mov.f32f32 r20.w, r20.w
-mov.f32f32 r14.y, r14.y
-mov.f32f32 r19.w, r19.w
-add.f r20.y, r20.y, c2.x
-mov.f32f32 r22.x, r0.w
-mad.f32 r21.y, r21.w, r21.y, r0.x
-mul.f r17.y, r17.w, r17.y
-mov.f32f32 r17.w, r20.y
-mad.f32 r19.w, r22.x, r18.z, r19.w
-add.f r20.y, r21.y, c7.z
-mov.f32f32 r17.y, r17.y
-mul.f r17.w, r17.w, r16.z
-mov.f32f32 r19.w, r19.w
-add.f r20.y, r20.y, c2.x
-mul.f r7.w, r20.z, r7.w
-mov.f32f32 r17.w, r17.w
-add.f r19.w, r19.w, c11.z
-mov.f32f32 r20.y, r20.y
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r20.z, r15.z
-mov.f32f32 r19.w, r19.w
-mul.f r20.y, r20.y, r14.w
-log2 r17.w, r17.w
-(ss)mul.f r17.w, c9.x, r17.w
-add.f r20.z, c7.w, (neg)r20.z
-mul.f r19.w, c11.y, r19.w
-mov.f32f32 r20.y, r20.y
-mov.f32f32 r17.w, r17.w
-mul.f r6.y, r20.z, r6.y
-mov.f32f32 r19.w, r19.w
-min.f r7.y, r7.y, r8.x
-mov.f32f32 r8.x, r4.w
-min.f r5.y, r5.y, r6.z
-mov.f32f32 r6.y, r6.y
-exp2 r6.z, r17.w
+cov.u32f32 r15.x, r15.x
+mul.f r21.x, r21.x, r10.y
+mad.f32 r17.y, r11.y, r17.y, r2.x
+mul.f r4.z, r19.z, r4.z
+cmps.f.ne r15.x, r15.x, c8.y
+absneg.f r19.z, (neg)r16.w
+log2 r20.z, r20.z
+(ss)mul.f r20.z, c9.x, r20.z
+mov.f32f32 r21.y, r17.y
+log2 r21.x, r21.x
+(ss)mul.f r21.x, c9.x, r21.x
+sel.b32 r19.z, r19.z, r15.x, r20.w
+max.f r20.w, r8.y, r4.z
+mad.f32 r12.x, r7.x, r12.x, r0.x
+mov.f32f32 r21.z, r21.y
+mul.f r19.z, r2.w, r19.z
+mov.f32f32 r21.w, c8.y
+mov.f32f32 r22.x, c10.y
+exp2 r20.z, r20.z
nop
-(ss)rcp r6.z, r6.z
-(ss)mov.f32f32 r6.z, r6.z
-max.f r5.y, r7.y, r5.y
-min.f r5.z, r5.w, r5.z
-log2 r5.w, r20.y
-(ss)mul.f r5.w, c9.x, r5.w
-mul.f r6.z, c9.w, r6.z
-max.f r7.y, r7.w, r6.y
-max.f r5.y, r5.y, r5.z
-mov.f32f32 r5.z, r5.w
-mov.f32f32 r5.w, r6.z
-mov.f32f32 r6.z, r7.y
-mov.f32f32 r5.y, r5.y
-sqrt r7.y, r8.x
-(ss)mov.f32f32 r7.y, r7.y
-add.f r5.w, c7.z, (neg)r5.w
-min.f r6.z, r21.z, r6.z
-(ss)mov.f32f32 r8.x, r16.y
-add.f r5.y, r18.z, (neg)r5.y
-mov.f32f32 r5.w, r5.w
-exp2 r5.z, r5.z
+(ss)rcp r20.z, r20.z
+(ss)mul.f r20.z, c9.w, r20.z
+exp2 r21.x, r21.x
nop
-(ss)rcp r5.z, r5.z
-(ss)mov.f32f32 r5.z, r5.z
-mov.f32f32 r5.y, r5.y
-mul.f r16.y, r18.w, r5.w
-mul.f r17.w, r20.w, r5.w
-mul.f r5.w, r14.y, r5.w
-mad.f32 r5.y, c11.w, r5.y, c7.z
-mov.f32f32 r14.y, r16.y
-mov.f32f32 r16.y, r3.w
-mov.f32f32 r17.w, r17.w
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r5.y, r5.y
-mad.f32 r7.y, c8.z, r16.y, r7.y
-mul.f r5.z, c9.w, r5.z
-mad.f32 r8.x, r8.x, r10.z, r3.x
-mov.f32f32 r10.z, r19.y
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r5.z, r5.z
-rcp r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-mov.f32f32 r8.x, r8.x
-mul.f r16.y, r7.y, c10.x
-add.f r5.z, c7.z, (neg)r5.z
-mul.f r5.y, r19.w, r5.y
-mov.f32f32 r18.z, r8.x
-mov.f32f32 r16.y, r16.y
-absneg.f r18.w, (neg)c0.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r5.z, r5.z
-add.f r18.z, c10.w, (neg)r18.z
-mov.f32f32 r18.w, r18.w
-add.f r10.z, r10.z, (neg)c1.y
-mul.f r17.y, r17.y, r5.z
-add.f r19.w, r9.x, (neg)c1.x
-mad.f32 r16.y, c8.z, r18.w, (neg)r16.y
-cmps.f.lt r4.w, r4.w, c8.y
-exp2 r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-mul.f r12.w, r12.w, r5.z
-mul.f r5.z, r19.z, r5.z
-cov.u32f32 r4.w, r4.w
-add.f r5.y, c7.z, r5.y
-mov.f32f32 r12.w, r12.w
-mov.f32f32 r5.z, r5.z
-cmps.f.ne r4.w, r4.w, c8.y
-mov.f32f32 r18.w, c8.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r19.z, r21.y
-mov.f32f32 r5.x, r5.x
-sel.b32 r16.y, r18.w, r4.w, r16.y
-mov.f32f32 r17.y, r17.y
-mul.f r3.y, r18.z, r3.y
-mul.f r5.x, r5.x, c8.x
-mov.f32f32 r18.z, r16.y
-rcp r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-mov.f32f32 r18.w, r9.z
-mov.f32f32 r5.x, r5.x
-mul.f r18.z, r21.x, r18.z
-mul.f r20.y, r7.y, c10.y
-mov.f32f32 r20.z, r18.w
-add.f r5.x, c7.z, (neg)r5.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r20.y, r20.y
-mov.f32f32 r3.w, r3.w
-absneg.f r20.z, (abs)r20.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r20.w, r8.x
-mad.f32 r3.w, c8.z, r3.w, (neg)r20.y
-mov.f32f32 r20.y, c8.y
-cmps.f.lt r20.z, c11.x, r20.z
-mov.f32f32 r21.x, r5.x
-add.f r20.w, c7.z, (neg)r20.w
-sel.b32 r3.w, r20.y, r4.w, r3.w
-cov.u32f32 r20.y, r20.z
-mad.f32 r10.z, r10.z, r10.z, r18.x
-mul.f r18.x, r19.w, r16.z
-mov.f32f32 r19.w, r3.w
-mov.f32f32 r20.y, r20.y
-sqrt r20.z, r21.x
-(ss)mov.f32f32 r20.z, r20.z
-(ss)mov.f32f32 r21.x, r4.x
-mul.f r1.z, r20.w, r1.z
-cmps.f.ne r20.y, r20.y, c8.y
-mov.f32f32 r20.w, c8.y
-rcp r19.w, r19.w
-(ss)mul.f r18.z, r18.z, r19.w
-mad.f32 r20.z, c8.z, r21.x, r20.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r20.w, r20.w
-mov.f32f32 r18.z, r18.z
-mov.f32f32 r21.x, r9.x
-mov.f32f32 r21.z, c10.x
-mov.f32f32 r20.z, r20.z
-max.f r21.w, r3.y, r1.z
-add.f r18.z, r21.x, (neg)r18.z
-sel.b32 r20.w, r20.w, r20.y, r21.z
-mov.f32f32 r21.x, r18.y
-mul.f r21.z, r20.z, c10.x
-mov.f32f32 r18.z, r18.z
-mov.f32f32 r21.w, r21.w
-absneg.f r21.x, (abs)r21.x
-mov.f32f32 r21.z, r21.z
-mad.f32 r18.z, c10.z, r18.z, c7.x
-absneg.f r22.x, (neg)c0.x
-cmps.f.lt r21.x, c11.x, r21.x
-min.f r6.z, r6.z, r21.w
-mov.f32f32 r18.z, r18.z
-mov.f32f32 r21.w, r22.x
-cov.u32f32 r21.x, r21.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r18.z, r18.z
-mad.f32 r21.z, c8.z, r21.w, (neg)r21.z
-cmps.f.lt r5.x, r5.x, c8.y
-mov.f32f32 r21.x, r21.x
-mov.f32f32 r18.z, r18.z
-mov.f32f32 r21.w, r15.z
-mov.f32f32 r22.x, r1.x
-cmps.f.ne r21.x, r21.x, c8.y
-mov.f32f32 r22.y, r18.z
-mov.f32f32 r18.z, r19.x
-mul.f r7.y, r7.y, c10.x
-mov.f32f32 r22.z, r18.y
-cov.u32f32 r5.x, r5.x
-mad.f32 r21.w, r22.x, r6.z, r21.w
-mov.f32f32 r7.y, r7.y
-absneg.f r22.x, (neg)c0.z
-mov.f32f32 r22.z, r22.z
-cmps.f.ne r5.x, r5.x, c8.y
-mov.f32f32 r22.w, c8.y
-mov.f32f32 r22.x, r22.x
-absneg.f r22.z, (neg)r22.z
-mov.f32f32 r21.w, r21.w
-sel.b32 r21.z, r22.w, r5.x, r21.z
-mad.f32 r7.y, c8.z, r22.x, (neg)r7.y
+(ss)rcp r21.x, r21.x
+sel.b32 r21.w, r21.w, r19.w, r22.x
mov.f32f32 r22.x, c8.y
-mov.f32f32 r22.z, r22.z
-mov.f32f32 r22.w, r21.z
-add.f r21.w, r21.w, c11.z
-sel.b32 r4.w, r22.x, r4.w, r7.y
-mov.f32f32 r7.y, r22.z
-mul.f r19.z, r19.z, r22.w
-mul.f r22.x, r20.z, c10.y
-mov.f32f32 r22.z, r4.w
-sel.b32 r7.y, r7.y, r21.x, r20.w
-mov.f32f32 r20.w, r21.w
-mov.f32f32 r21.w, r22.x
-mul.f r18.z, r18.z, r22.z
-mul.f r7.y, r1.w, r7.y
+add.f r20.z, c7.z, (neg)r20.z
+(ss)mul.f r21.x, c9.w, r21.x
+add.f r21.z, c7.z, (neg)r21.z
+sel.b32 r21.w, r22.x, r15.x, r21.w
+mov.f32f32 r22.x, r20.z
+mul.f r6.y, r6.y, r20.z
+add.f r20.z, c7.z, (neg)r21.x
+mad.f32 r0.w, r0.w, r21.w, r19.z
+mov.f32f32 r19.z, r16.y
+mul.f r16.x, r16.x, r22.x
+mul.f r19.y, r19.y, r22.x
+add.f r21.x, r4.y, (neg)c1.x
+absneg.f r21.w, (neg)r19.z
+mov.f32f32 r22.x, c10.x
+mov.f32f32 r22.y, r20.z
+mul.f r0.y, r0.y, c10.x
+mov.f32f32 r22.z, r12.x
+sel.b32 r21.w, r21.w, r19.w, r22.x
mov.f32f32 r22.x, c8.y
-mov.f32f32 r4.x, r4.x
-mul.f r18.z, r18.z, r19.w
-(ss)mul.f r19.w, c11.y, r20.w
-mov.f32f32 r20.w, r22.x
-mad.f32 r4.x, c8.z, r4.x, (neg)r21.w
-mov.f32f32 r18.z, r18.z
-mov.f32f32 r21.w, r7.x
-mov.f32f32 r22.x, c10.y
-mov.f32f32 r22.z, c8.y
-mov.f32f32 r19.w, r19.w
-add.f r18.z, r21.w, (neg)r18.z
-sel.b32 r20.w, r20.w, r20.y, r22.x
-mov.f32f32 r21.w, c8.y
-sel.b32 r4.x, r22.z, r5.x, r4.x
-mov.f32f32 r18.z, r18.z
-min.f r0.y, r9.y, r0.y
-min.f r6.y, r7.w, r6.y
-mov.f32f32 r7.w, r21.w
-mad.f32 r9.y, c10.z, r18.z, c7.x
-mov.f32f32 r18.z, r4.x
-max.f r0.y, r0.y, r6.y
-min.f r1.z, r3.y, r1.z
-mov.f32f32 r3.y, r9.y
-mov.f32f32 r6.y, r7.w
-mov.f32f32 r7.w, r10.z
-mov.f32f32 r9.y, r18.x
-mov.f32f32 r3.y, r3.y
-sel.b32 r6.y, r6.y, r21.x, r20.w
-rcp r10.z, r18.z
-(ss)mul.f r18.x, r19.z, r10.z
-max.f r0.y, r0.y, r1.z
-mov.f32f32 r1.z, r3.y
-mad.f32 r0.w, r0.w, r6.y, r7.y
-mov.f32f32 r3.y, r18.x
-mov.f32f32 r6.y, r8.w
-mov.f32f32 r22.z, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r18.w
-add.f r3.y, r6.y, (neg)r3.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r6.y, r9.z
-mul.f r7.y, (neg)r16.y, r9.y
-sam (f32)(x)r21.w, r22.y, s#1, t#1
-(sy)mul.f r9.y, r21.w, c9.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.y, r3.y
-add.f r0.y, r6.z, (neg)r0.y
-mov.f32f32 r6.z, r9.y
-add.f r9.y, r19.x, (neg)c1.y
-absneg.f r1.z, (neg)r1.z
-mad.f32 r3.y, c10.z, r3.y, c7.x
-mov.f32f32 r0.y, r0.y
-mul.f r9.y, r9.y, r16.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r16.y, c10.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r9.y, r9.y
-mad.f32 r0.y, c11.w, r0.y, c7.z
-sel.b32 r1.z, r1.z, r20.y, r16.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r3.w, (neg)r3.w, r9.y, r7.y
-mov.f32f32 r7.y, c8.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.w, r3.w
-add.f r9.y, r7.x, (neg)c1.z
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r21.w, r3.y
-mov.f32f32 r3.y, r21.y
-mul.f r9.y, r9.y, r16.z
-mov.f32f32 r7.y, r7.y
-mul.f r16.y, r20.z, c10.x
-rcp r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r9.y, r9.y
-sel.b32 r1.z, r7.y, r21.x, r1.z
-mov.f32f32 r7.y, r16.y
-absneg.f r16.y, (neg)c0.z
-mad.f32 r3.w, (neg)r4.w, r9.y, r3.w
-mad.f32 r0.w, r2.z, r1.z, r0.w
-mul.f r0.y, r19.w, r0.y
-mov.f32f32 r1.z, r16.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.z, c8.z, r1.z, (neg)r7.y
-max.f r3.w, c8.y, r3.w
+mul.f r21.x, r21.x, r19.x
+absneg.f r22.w, (neg)c0.y
+mul.f r20.x, r20.x, r22.y
+sel.b32 r21.w, r22.x, r15.x, r21.w
+add.f r22.x, r9.y, (neg)c1.x
+mul.f r17.z, r17.z, r22.y
+absneg.f r22.y, (neg)c0.z
+mad.f32 r0.w, r1.z, r21.w, r0.w
+mul.f r21.w, r22.w, r22.w
+mul.f r22.x, r22.x, r10.y
+absneg.f r23.x, (neg)c0.y
max.f r0.w, c8.y, r0.w
-mov.f32f32 r4.w, c8.y
-add.f r6.y, r6.y, (neg)c1.z
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.w, r0.w
-sel.b32 r1.z, r4.w, r5.x, r1.z
-exp2 r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mul.f r3.w, r3.w, c7.x
-mov.f32f32 r4.w, r0.w
-mov.f32f32 r5.x, r1.z
-add.f r0.y, c7.z, r0.y
-mov.f32f32 r3.w, r3.w
-mul.f r4.w, r4.w, r5.y
-mul.f r3.y, r3.y, r5.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.x, r3.w
-mov.f32f32 r4.w, r4.w
-mad.f32 r5.y, r6.y, r6.y, r7.w
-mul.f r3.y, r3.y, r10.z
-mul.f r5.x, r5.x, r6.z
-mov.f32f32 r6.y, r15.w
-rcp r0.y, r0.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r5.x, r5.x
-mad.f32 r6.z, c7.x, r9.x, c7.x
-mov.f32f32 r7.y, r20.x
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r7.w, r8.x
-mov.f32f32 r6.z, r6.z
-sqrt r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-mov.f32f32 r9.x, c2.x
-add.f r3.y, r7.y, (neg)r3.y
-mov.f32f32 r6.z, r6.z
-absneg.f r7.y, (abs)r7.w
-rcp r6.y, r6.y
-add.f r7.w, c7.w, r17.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r16.y, r6.z
-mad.f32 r6.z, c7.x, r7.x, c7.x
-rcp r7.x, r9.x
-(ss)mul.f r5.y, r5.y, r7.x
-mad.f32 r3.y, c10.z, r3.y, c7.x
-cmps.f.lt r7.x, c11.x, r7.y
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r3.y, r3.y
-cov.u32f32 r7.x, r7.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r7.y, r7.w
-add.f r7.w, r16.x, (neg)c1.x
-(ss)rcp r9.x, r14.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r16.z, r6.z
-log2 r5.y, r5.y
-(ss)mul.f r5.y, c9.y, r5.y
-mov.f32f32 r6.z, r7.x
-mov.f32f32 r3.y, r3.y
-mul.f r6.y, r7.y, r6.y
-mov.f32f32 r5.y, r5.y
-mul.f r7.x, r7.w, r7.w
-sam (f32)(x)r22.x, r16.y, s#2, t#2
-(sy)cmps.f.lt r7.y, r19.x, r22.x
-mov.f32f32 r22.x, r3.y
-cmps.f.ne r3.y, r6.z, c8.y
-mov.f32f32 r6.z, c8.y
-cov.u32f32 r7.y, r7.y
-exp2 r5.y, r5.y
+add.f r21.w, c7.z, (neg)r21.w
+mad.f32 r0.y, c8.z, r22.y, (neg)r0.y
+add.f r22.y, c10.w, (neg)r22.z
+mov.f32f32 r23.y, r0.w
+mul.f r21.w, r21.w, c8.x
+mul.f r23.z, r23.x, r23.x
+mov.f32f32 r23.w, c8.y
+mul.f r5.x, r23.y, r5.x
+add.f r23.y, r16.w, (neg)c1.x
+add.f r21.w, c7.z, (neg)r21.w
+add.f r23.z, c7.z, (neg)r23.z
+sel.b32 r0.y, r23.w, r2.y, r0.y
+mul.f r2.y, r23.y, r23.y
+add.f r23.y, r14.z, (neg)c1.y
+mov.f32f32 r23.w, r21.w
+mul.f r23.z, r23.z, c8.x
+absneg.f r0.y, (neg)r0.y
+mad.f32 r2.y, r23.y, r23.y, r2.y
+add.f r23.y, r16.y, (neg)c1.z
+add.f r23.z, c7.z, (neg)r23.z
+mul.f r6.w, r22.y, r6.w
+add.f r22.y, c7.w, (neg)r22.z
+mad.f32 r2.y, r23.y, r23.y, r2.y
+sqrt r23.y, r23.w
+(ss)mad.f32 r23.y, c8.z, r22.w, r23.y
+(ss)mov.f32f32 r23.w, r23.z
+rcp r24.x, r0.y
+mul.f r4.w, r22.y, r4.w
+(ss)mul.f r21.z, r21.z, r24.x
+mov.f32f32 r22.y, r23.y
+add.f r24.x, c10.w, (neg)r21.y
+sqrt r2.y, r2.y
+mov.f32f32 r24.y, c2.x
+rcp r24.z, r0.y
+max.f r24.w, r6.w, r4.w
+sqrt r23.w, r23.w
+(ss)mad.f32 r23.w, c8.z, r23.x, r23.w
+mul.f r24.x, r24.x, r24.z
+mul.f r24.z, r22.y, c10.x
+absneg.f r25.x, (neg)c0.x
+mov.f32f32 r25.y, r23.w
+rcp r24.y, r24.y
+(ss)mul.f r2.y, r2.y, r24.y
+(ss)max.f r24.y, r24.x, r21.z
+min.f r20.w, r20.w, r24.w
+mad.f32 r14.w, r14.w, r17.w, r2.x
+mul.f r17.w, r25.y, c10.x
+absneg.f r24.w, (neg)c0.x
+min.f r18.w, r18.w, r24.y
+log2 r2.y, r2.y
+(ss)mul.f r2.y, c9.y, r2.y
+mad.f32 r24.y, c8.z, r25.x, (neg)r24.z
+cmps.f.lt r21.w, r21.w, c8.y
+mad.f32 r17.w, c8.z, r24.w, (neg)r17.w
+cmps.f.lt r23.z, r23.z, c8.y
+mov.f32f32 r24.z, r18.w
+mov.f32f32 r24.w, r14.w
+exp2 r2.y, r2.y
nop
-(ss)rcp r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r7.y, r7.y
-sam (f32)(x)r21.w, r21.w, s#1, t#1
-(sy)mul.f r7.w, r21.w, c9.y
-mul.f r5.y, c9.w, r5.y
-mov.f32f32 r9.y, c10.x
-cmps.f.ne r7.y, r7.y, c8.y
-mov.f32f32 r7.w, r7.w
-add.f r10.z, r8.w, (neg)c1.x
-mov.f32f32 r5.y, r5.y
-sel.b32 r3.w, r5.x, r7.y, r3.w
-sel.b32 r5.x, r6.z, r3.y, r9.y
-absneg.f r6.z, (abs)r16.x
-add.f r5.y, c7.z, (neg)r5.y
-add.f r7.y, r14.y, r3.w
-add.f r9.y, r17.w, r3.w
-add.f r3.w, r5.w, r3.w
+(ss)rcp r2.y, r2.y
+(ss)mul.f r2.y, c9.w, r2.y
+cov.u32f32 r21.w, r21.w
+cov.u32f32 r23.z, r23.z
+mov.f32f32 r18.z, r18.z
+add.f r2.y, c7.z, (neg)r2.y
+mul.f r25.x, r16.w, r16.w
+cmps.f.ne r21.w, r21.w, c8.y
+mad.f32 r25.x, r14.z, r14.z, r25.x
+mov.f32f32 r25.z, c8.y
+mad.f32 r25.x, r16.y, r16.y, r25.x
+cmps.f.ne r23.z, r23.z, c8.y
+mov.f32f32 r25.w, c8.y
mov.f32f32 r5.y, r5.y
-mov.f32f32 r5.w, r7.y
-mov.f32f32 r7.y, r9.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r9.y, r18.y
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r14.y, r18.y
+add.f r26.x, c10.w, (neg)r24.w
+absneg.f r26.y, (neg)c0.z
+sel.b32 r17.w, r25.w, r23.z, r17.w
+rsq r25.x, r25.x
+(ss)mul.f r25.x, c7.x, r25.x
+sel.b32 r24.y, r25.z, r21.w, r24.y
+min.f r5.y, r5.y, r18.z
+mad.f32 r3.y, c8.z, r26.y, (neg)r3.y
+mul.f r2.y, r25.x, r2.y
+mul.f r18.z, (neg)r24.y, r21.x
+add.f r21.x, r20.y, (neg)c1.y
+mul.f r22.x, (neg)r17.w, r22.x
+mov.f32f32 r25.x, r2.y
+add.f r25.z, r13.x, (neg)c1.y
+mad.f32 r5.x, c7.x, r5.x, r25.x
+mov.f32f32 r25.x, r14.z
+mul.f r21.x, r21.x, r19.x
+mul.f r22.y, r22.y, c10.y
+mul.f r25.z, r25.z, r10.y
+mul.f r1.z, r25.x, r1.z
+mad.f32 r22.y, c8.z, r22.w, (neg)r22.y
+mov.f32f32 r22.w, c8.y
+mul.f r25.x, r25.y, c10.y
+mul.f r1.z, r1.z, r6.z
+mad.f32 r23.x, c8.z, r23.x, (neg)r25.x
+mov.f32f32 r25.x, c8.y
mov.f32f32 r7.y, r7.y
-mov.f32f32 r3.w, r3.w
-mul.f r10.z, r10.z, r14.w
-mul.f r9.y, r9.y, r14.y
-mov.f32f32 r14.y, r19.y
-mov.f32f32 r14.z, r19.y
-mov.f32f32 r10.z, r10.z
-cmps.f.lt r6.z, c11.x, r6.z
-mov.f32f32 r6.y, r6.y
-mad.f32 r9.y, r14.y, r14.z, r9.y
-mul.f r10.z, (neg)r21.z, r10.z
-add.f r14.y, r21.y, (neg)c1.y
-cov.u32f32 r6.z, r6.z
-mov.f32f32 r9.y, r9.y
-mov.f32f32 r14.z, r9.z
-mov.f32f32 r16.y, r9.z
-mul.f r14.y, r14.y, r14.w
-mov.f32f32 r6.z, r6.z
-max.f r6.y, r16.w, r6.y
-mad.f32 r9.y, r14.z, r16.y, r9.y
-mov.f32f32 r14.y, r14.y
-cmps.f.ne r6.z, r6.z, c8.y
-mov.f32f32 r14.z, r16.x
-mov.f32f32 r6.y, r6.y
-add.f r16.y, r15.z, (neg)c1.y
-add.f r15.y, c7.z, r15.y
-rsq r9.y, r9.y
-(ss)mov.f32f32 r9.y, r9.y
-mad.f32 r4.x, (neg)r4.x, r14.y, r10.z
-absneg.f r10.z, (neg)r14.z
-min.f r6.y, r10.w, r6.y
-mul.f r9.y, c7.x, r9.y
-mov.f32f32 r4.x, r4.x
-add.f r10.w, r20.x, (neg)c1.z
-mov.f32f32 r10.z, r10.z
-mul.f r5.y, r9.y, r5.y
-mov.f32f32 r9.y, r8.z
-mad.f32 r7.x, r16.y, r16.y, r7.x
-mov.f32f32 r14.y, r15.y
-mov.f32f32 r5.y, r5.y
-mul.f r10.w, r10.w, r14.w
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r14.z, r9.y
-mov.f32f32 r14.w, r5.y
-mov.f32f32 r10.w, r10.w
-mad.f32 r4.w, c7.x, r4.w, r14.w
-sel.b32 r5.x, r10.z, r6.z, r5.x
-mov.f32f32 r5.y, r5.y
-mad.f32 r1.z, (neg)r1.z, r10.w, r4.x
-mov.f32f32 r4.x, r4.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r4.w, r19.y
-mov.f32f32 r1.z, r1.z
-mul.f r5.x, r2.x, r5.x
-mov.f32f32 r10.z, c8.y
-mov.f32f32 r10.w, r4.w
-mov.f32f32 r1.w, r1.w
+add.f r1.z, r16.y, (neg)r1.z
+sel.b32 r22.y, r22.w, r21.w, r22.y
+sel.b32 r22.w, r25.x, r23.z, r23.x
+mov.f32f32 r8.z, r8.z
+mad.f32 r25.y, c10.z, r1.z, c7.x
+mul.f r1.z, r14.z, r2.w
+mad.f32 r2.w, (neg)r22.y, r21.x, r18.z
+add.f r18.z, r5.z, (neg)c1.z
+mad.f32 r21.x, (neg)r22.w, r25.z, r22.x
+mul.f r1.z, r1.z, r6.z
+add.f r6.z, r18.x, (neg)c1.z
+min.f r7.y, r8.z, r7.y
+mov.f32f32 r8.z, c8.y
+add.f r1.z, r16.w, (neg)r1.z
+mul.f r18.z, r18.z, r19.x
+mul.f r19.x, r23.y, c10.x
+absneg.f r22.x, (neg)c0.z
+mad.f32 r25.x, c10.z, r1.z, c7.x
+mul.f r1.z, r6.z, r10.y
+mul.f r6.z, r23.w, c10.x
+max.f r5.y, r5.y, r7.y
+mad.f32 r7.y, c8.z, r22.x, (neg)r19.x
+mov.f32f32 r10.y, c8.y
+absneg.f r19.x, (neg)c0.z
+sam (f32)(xy)r25.x, r25.x, s#1, t#1
+(sy)mul.f r0.w, r0.w, r25.x
+mov.f32f32 r21.z, r21.z
+sel.b32 r1.w, r8.z, r1.w, r3.y
+mad.f32 r3.y, c8.z, r19.x, (neg)r6.z
+mul.f r0.w, r0.w, c7.w
+sel.b32 r6.z, r10.y, r21.w, r7.y
+mov.f32f32 r7.y, c8.y
+mov.f32f32 r8.z, r24.x
+mad.f32 r0.w, r0.w, r25.y, r2.y
+mad.f32 r21.w, c7.x, r16.w, c7.x
+mad.f32 r22.x, c7.x, r16.y, c7.x
+mad.f32 r2.y, (neg)r6.z, r18.z, r2.w
+sel.b32 r2.w, r7.y, r23.z, r3.y
+min.f r3.y, r8.z, r21.z
+absneg.f r1.w, (neg)r1.w
+max.f r2.y, c8.y, r2.y
+mad.f32 r1.z, (neg)r2.w, r1.z, r21.x
+sam (f32)(x)r23.x, r21.w, s#2, t#2
+(sy)cmps.f.lt r7.y, r14.z, r23.x
+max.f r3.y, r5.y, r3.y
+mul.f r2.y, r2.y, c7.x
max.f r1.z, c8.y, r1.z
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r14.w, c10.y
-mul.f r1.w, r10.w, r1.w
-mov.f32f32 r1.z, r1.z
-rcp r10.w, r14.z
-(ss)absneg.f r14.z, (neg)r3.x
-sel.b32 r10.z, r10.z, r3.y, r14.w
-mul.f r1.w, r1.w, r7.z
+cov.u32f32 r5.y, r7.y
+mul.f r7.y, r20.y, r24.y
+rcp r8.z, r22.y
+add.f r3.y, r24.z, (neg)r3.y
mul.f r1.z, r1.z, c7.x
-mov.f32f32 r14.w, c8.y
-add.f r15.y, c10.w, r14.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r16.y, r18.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r14.w, r14.w
-(ss)mul.f r10.w, r15.y, r10.w
-mov.f32f32 r15.y, r16.y
-mov.f32f32 r16.y, r1.z
-mov.f32f32 r14.w, r14.w
-mov.f32f32 r10.w, r10.w
-add.f r1.w, r15.y, (neg)r1.w
-mul.f r7.w, r16.y, r7.w
-sel.b32 r10.z, r14.w, r6.z, r10.z
-mov.f32f32 r14.w, r9.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r7.w, r7.w
-mad.f32 r8.w, c7.x, r8.w, c7.x
-mad.f32 r1.x, r1.x, r10.z, r5.x
-mad.f32 r1.w, c10.z, r1.w, c7.x
-mov.f32f32 r5.x, r7.x
-rcp r7.x, r14.w
-add.f r10.z, c7.z, r14.z
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r14.z, r8.x
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r10.z, r10.z
-(ss)add.f r14.w, r8.x, (neg)c1.z
-mul.f r9.x, r14.y, r9.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r16.y, r8.w
-mad.f32 r8.w, c7.x, r20.x, c7.x
-mov.f32f32 r14.y, r14.z
-mov.f32f32 r16.w, r1.w
-mov.f32f32 r1.w, r4.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.w, r8.w
-absneg.f r8.w, (neg)r14.y
-(ss)mul.f r7.x, r10.z, r7.x
-mul.f r1.w, r1.w, r2.z
-mov.f32f32 r2.z, r4.w
-mov.f32f32 r4.w, r8.w
-mov.f32f32 r8.w, c10.x
-mul.f r1.w, r1.w, r7.z
-mov.f32f32 r16.z, r2.z
-mov.f32f32 r2.z, r7.x
-mad.f32 r5.x, r14.w, r14.w, r5.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r7.x, r9.z
-sel.b32 r4.w, r4.w, r3.y, r8.w
-mov.f32f32 r7.z, c8.y
-sam (f32)(x)r21.z, r16.y, s#2, t#2
-(sy)cmps.f.lt r8.w, r21.y, r21.z
-mov.f32f32 r7.x, r7.x
-max.f r2.z, r10.w, r2.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r9.x, r9.x
-add.f r1.w, r7.x, (neg)r1.w
-cov.u32f32 r7.x, r8.w
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r7.z, r7.z
-min.f r2.z, r6.y, r2.z
-mad.f32 r1.w, c10.z, r1.w, c7.x
-cmps.f.ne r6.y, r7.x, c8.y
-sel.b32 r4.w, r7.z, r6.z, r4.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.w, r1.w
-sel.b32 r1.z, r7.w, r6.y, r1.z
-mad.f32 r1.x, r6.x, r4.w, r1.x
-mov.f32f32 r4.w, r13.x
-mov.f32f32 r1.w, r1.w
-add.f r6.y, r12.w, r1.z
-add.f r5.z, r5.z, r1.z
-add.f r1.z, r17.y, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r17.x, r1.w
-mov.f32f32 r6.y, r6.y
-mov.f32f32 r5.z, r5.z
+cmps.f.ne r5.y, r5.y, c8.y
+(ss)mul.f r7.y, r7.y, r8.z
+mul.f r10.y, r13.x, r17.w
+rcp r16.y, r22.w
+mad.f32 r3.y, c11.w, r3.y, c7.z
+sel.b32 r0.w, r0.w, r5.y, r5.x
+mov.f32f32 r5.x, r16.w
+add.f r5.y, r4.y, (neg)r7.y
+(ss)mul.f r7.y, r10.y, r16.y
+rcp r10.y, r1.w
+(ss)mul.f r10.y, r26.x, r10.y
+mad.f32 r21.z, c7.x, r5.x, c7.x
+(ss)mad.f32 r21.w, c7.x, r19.z, c7.x
+mad.f32 r5.x, c10.z, r5.y, c7.x
+mul.f r5.y, r20.y, r6.z
+add.f r6.z, r9.y, (neg)r7.y
+rcp r3.y, r3.y
+add.f r7.y, c7.z, (neg)r24.w
+rcp r17.w, r1.w
+mad.f32 r15.w, r1.y, r18.w, r15.w
+mul.f r5.y, r5.y, r8.z
+sam (f32)(xyz)r21.z, r21.z, s#0, t#0
+mad.f32 r18.w, c7.x, r16.w, c7.x
+mad.f32 r18.z, c7.x, r14.z, c7.z
+mad.f32 r22.w, c10.z, r6.z, c7.x
+add.f r5.y, r5.z, (neg)r5.y
+mul.f r2.w, r13.x, r2.w
+add.f r6.z, r15.w, c11.z
+(ss)mul.f r7.y, r7.y, r17.w
+mad.f32 r5.y, c10.z, r5.y, c7.x
+sam (f32)(xyz)r23.x, r18.z, s#0, t#0
+(sy)sel.b32 r8.z, r23.z, r19.w, r22.x
+(ss)mad.f32 r18.w, c7.x, r13.y, c7.x
+mad.f32 r18.z, c7.x, r10.w, c7.z
+sel.b32 r10.w, r23.y, r19.w, r21.w
+sel.b32 r13.y, r23.x, r19.w, r21.z
+mul.f r2.w, r2.w, r16.y
+sam (f32)(x)r21.z, r5.x, s#1, t#1
+(sy)(ss)mul.f r5.x, r21.z, c9.y
+mul.f r5.y, c11.y, r6.z
+max.f r6.z, r10.y, r7.y
+sam (f32)(xyz)r21.z, r18.z, s#0, t#0
+(sy)sel.b32 r8.z, r22.x, r15.x, r8.z
+sel.b32 r10.w, r21.w, r15.x, r10.w
+sel.b32 r13.y, r21.z, r15.x, r13.y
+mul.f r5.x, r2.y, r5.x
+mul.f r8.z, r8.z, r0.w
+cmps.f.lt r14.z, r14.z, c12.x
+mul.f r10.w, r10.w, r0.w
+mul.f r0.w, r13.y, r0.w
+mov.f32f32 r2.y, r2.y
+cov.u32f32 r13.y, r14.z
+(ss)mad.f32 r18.z, c7.x, r4.y, c7.x
+mad.f32 r18.w, c7.x, r5.z, c7.x
+add.f r2.w, r18.x, (neg)r2.w
+cmps.f.ne r4.y, r13.y, c8.y
+mul.f r5.z, c0.x, r11.w
+mul.f r3.y, r5.y, r3.y
+mad.f32 r5.y, c0.y, r10.x, r5.z
+mad.f32 r23.x, c10.z, r2.w, c7.x
+mad.f32 r2.w, c0.z, r14.y, r5.y
+sam (f32)(x)r21.z, r18.z, s#2, t#2
+(sy)cmps.f.lt r5.y, r20.y, r21.z
+min.f r5.z, r20.w, r6.z
+mul.f r6.z, c7.x, r15.z
+max.f r2.w, c8.y, r2.w
+cov.u32f32 r5.y, r5.y
+sam (f32)(x)r21.z, r22.w, s#1, t#1
+(sy)mul.f r13.y, r21.z, c9.y
+exp2 r3.y, r3.y
+mov.f32f32 r14.z, r5.z
+mov.f32f32 r8.y, r8.y
+cmps.f.ne r5.y, r5.y, c8.y
+mul.f r13.y, r1.z, r13.y
+log2 r2.w, r2.w
+(ss)mul.f r2.w, c12.y, r2.w
mov.f32f32 r1.z, r1.z
-mad.f32 r1.w, r4.w, r2.z, r0.z
-mov.f32f32 r1.x, r1.x
-sqrt r4.w, r5.x
-(ss)mov.f32f32 r4.w, r4.w
+mad.f32 r15.z, c7.x, r9.y, c7.x
+add.f r3.y, c7.z, r3.y
+mov.f32f32 r4.z, r4.z
+mad.f32 r15.w, c7.x, r18.x, c7.x
+sel.b32 r2.y, r5.x, r5.y, r2.y
+exp2 r2.w, r2.w
+mov.f32f32 r18.w, r10.x
+mov.f32f32 r19.x, r14.y
+mov.f32f32 r18.z, r11.w
+add.f r5.x, r16.x, r2.y
+add.f r5.y, r19.y, r2.y
+add.f r2.y, r6.y, r2.y
+sam (f32)(x)r15.z, r15.z, s#2, t#2
+rcp r3.y, r3.y
+(sy)cmps.f.lt r6.y, r13.x, r15.z
+absneg.f r9.y, (abs)r21.y
+min.f r4.z, r8.y, r4.z
(ss)nop
-sam (f32)(xy)r16.y, r16.w, s#1, t#1
-(sy)mul.f r0.w, r0.w, r16.y
-mov.f32f32 r1.w, r1.w
-max.f r1.x, c8.y, r1.x
-mov.f32f32 r5.x, c2.x
-mul.f r0.w, r0.w, c7.w
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r1.x, r1.x
-max.f r7.z, r13.w, r9.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r7.w, r7.x
-mov.f32f32 r8.w, r1.x
-rcp r5.x, r5.x
-(ss)mul.f r4.w, r4.w, r5.x
-mad.f32 r0.w, r0.w, r16.z, r5.y
-(ss)add.f r5.x, c10.w, (neg)r7.w
-rcp r5.y, r12.z
-mul.f r0.y, r8.w, r0.y
+sam.3d (f32)(xyz)r15.z, r18.z, s#3, t#3
+(sy)(ss)mad.f32 r8.y, c13.z, r2.w, r16.x
+mad.f32 r10.x, c13.y, r2.w, r15.w
+mad.f32 r2.w, c13.x, r2.w, r15.z
+cov.u32f32 r6.y, r6.y
+sel.b32 r8.y, r8.z, r4.y, r8.y
+mov.f32f32 r6.w, r6.w
mov.f32f32 r4.w, r4.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r7.w, r18.y
-(ss)mul.f r5.x, r5.x, r5.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r5.y, r7.z
-mad.f32 r7.z, c7.x, r7.w, c7.x
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r7.x, r7.x
+sel.b32 r8.z, r10.w, r4.y, r10.x
+sel.b32 r0.w, r0.w, r4.y, r2.w
+cmps.f.ne r2.w, r6.y, c8.y
+min.f r4.y, r6.w, r4.w
+cmps.f.lt r4.w, c11.x, r9.y
+mul.f r6.x, r6.z, r6.x
+add.f r6.y, r15.y, (neg)c1.x
+max.f r4.y, r4.z, r4.y
+mov.f32f32 r4.z, r10.y
+mov.f32f32 r6.z, r7.y
+sel.b32 r1.z, r13.y, r2.w, r1.z
+cov.u32f32 r2.w, r4.w
+mul.f r4.w, r6.x, r20.z
+min.f r4.z, r4.z, r6.z
+add.f r6.x, r20.x, r1.z
+add.f r6.z, r17.z, r1.z
+cmps.f.ne r2.w, r2.w, c8.y
+max.f r4.y, r4.y, r4.z
+mov.f32f32 r4.z, c8.y
+mov.f32f32 r6.w, c10.x
+add.f r1.z, r4.w, r1.z
+add.f r4.y, r14.z, (neg)r4.y
+mul.f r4.w, r6.y, r6.y
+add.f r6.y, r22.z, (neg)c1.y
+sel.b32 r4.z, r4.z, r2.w, r6.w
+mad.f32 r4.y, c11.w, r4.y, c7.z
+absneg.f r6.w, (abs)r16.z
+mad.f32 r4.w, r6.y, r6.y, r4.w
+add.f r6.y, r24.w, (neg)c1.z
+absneg.f r7.y, (abs)r24.w
+mul.f r9.y, r14.x, r13.z
+rcp r10.x, r10.z
+cmps.f.lt r6.w, c11.x, r6.w
+rcp r4.y, r4.y
+mad.f32 r5.z, r1.x, r5.z, r22.z
+mad.f32 r4.w, r6.y, r6.y, r4.w
+cmps.f.lt r6.y, c11.x, r7.y
+add.f r7.y, c10.w, r13.w
+add.f r5.z, r5.z, c11.z
+cov.u32f32 r6.w, r6.w
+cov.u32f32 r6.y, r6.y
+(ss)mul.f r7.y, r7.y, r10.x
+mul.f r5.z, c11.y, r5.z
+cmps.f.ne r6.w, r6.w, c8.y
+absneg.f r10.x, (neg)r17.x
+sqrt r4.w, r4.w
+mov.f32f32 r10.y, c2.x
+(ss)mul.f r4.y, r5.z, r4.y
+cmps.f.ne r5.z, r6.y, c8.y
+max.f r6.y, r7.y, r9.y
+mov.f32f32 r7.y, r12.w
+sel.b32 r4.z, r10.x, r6.w, r4.z
+mov.f32f32 r9.y, c8.y
+mov.f32f32 r10.x, c10.x
+exp2 r4.y, r4.y
+(ss)add.f r4.y, c7.z, r4.y
+mul.f r4.z, r3.w, r4.z
+mov.f32f32 r10.z, c8.y
+mov.f32f32 r10.w, c10.y
+rcp r10.y, r10.y
+(ss)mul.f r4.w, r4.w, r10.y
+sel.b32 r9.y, r9.y, r5.z, r10.x
+absneg.f r9.x, (abs)r9.x
+rcp r4.y, r4.y
+sel.b32 r10.x, r10.z, r2.w, r10.w
+(ss)mov.f32f32 r10.y, c8.y
+rcp r10.z, r7.y
+absneg.f r10.w, (neg)r0.x
+cmps.f.lt r9.x, c11.x, r9.x
log2 r4.w, r4.w
(ss)mul.f r4.w, c9.y, r4.w
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r7.w, r8.y
-add.f r7.x, c7.z, (neg)r7.x
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r7.z, r7.z
-rcp r8.w, r12.x
-mov.f32f32 r9.x, r7.w
-(ss)mul.f r7.x, r7.x, r8.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r10.z, r7.z
-mov.f32f32 r7.z, r9.z
-mov.f32f32 r7.x, r7.x
+sel.b32 r10.x, r10.y, r6.w, r10.x
+add.f r10.y, c10.w, r10.w
+cov.u32f32 r9.x, r9.x
+cmps.f.lt r8.x, r8.x, c9.z
+mad.f32 r4.z, r1.y, r10.x, r4.z
+mul.f r10.x, r10.y, r10.z
+cmps.f.ne r9.x, r9.x, c8.y
+absneg.f r10.y, (neg)r15.y
+mov.f32f32 r10.z, r21.y
exp2 r4.w, r4.w
nop
(ss)rcp r4.w, r4.w
-(ss)mov.f32f32 r4.w, r4.w
-mad.f32 r7.z, c7.x, r7.z, c7.x
-max.f r8.w, r5.x, r7.x
-rcp r9.x, r9.x
-absneg.f r12.x, (neg)r0.x
-mul.f r4.w, c9.w, r4.w
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r10.w, r15.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r7.z, r7.z
-add.f r12.z, c10.w, r12.x
-mad.f32 r12.w, r10.w, r2.z, r0.x
+(ss)mul.f r4.w, c9.w, r4.w
+sel.b32 r9.y, r10.y, r9.x, r9.y
+absneg.f r10.y, (neg)r10.z
+mov.f32f32 r11.w, c10.x
add.f r4.w, c7.z, (neg)r4.w
-mov.f32f32 r10.w, r7.z
-(ss)mul.f r7.z, r12.z, r9.x
-(ss)mov.f32f32 r9.x, r12.w
-mov.f32f32 r4.w, r4.w
-mul.f r12.z, r16.x, r16.x
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r12.w, r7.w
-sam (f32)(x)r16.y, r10.z, s#2, t#2
-(ss)mov.f32f32 r10.z, r19.y
-mov.f32f32 r10.w, r9.x
-mad.f32 r12.z, r15.z, r15.z, r12.z
-mov.f32f32 r13.x, r15.z
-(sy)cmps.f.lt r10.z, r10.z, r16.y
-mov.f32f32 r13.w, r10.w
-mov.f32f32 r12.z, r12.z
-rcp r12.w, r12.w
-add.f r12.x, c7.w, r12.x
-cov.u32f32 r10.z, r10.z
-add.f r13.w, c10.w, (neg)r13.w
-mad.f32 r12.z, r8.x, r8.x, r12.z
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r10.z, r10.z
-mul.f r12.y, r13.w, r12.y
-mov.f32f32 r2.x, r2.x
-(ss)mul.f r12.x, r12.x, r12.w
-cmps.f.ne r10.z, r10.z, c8.y
-mov.f32f32 r12.y, r12.y
-(ss)mov.f32f32 r12.w, r10.w
-rsq r12.z, r12.z
-(ss)mov.f32f32 r12.z, r12.z
-sel.b32 r0.w, r0.w, r10.z, r4.x
-mov.f32f32 r4.x, r19.y
-add.f r10.z, c7.w, (neg)r12.w
-mul.f r12.z, c7.x, r12.z
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r4.x, r4.x
-mul.f r10.z, r10.z, r11.y
-mul.f r4.w, r12.z, r4.w
-max.f r7.z, r7.z, r12.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r10.z, r10.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r7.z, r7.z
-mad.f32 r4.x, c7.x, r4.x, c7.z
-max.f r11.y, r12.y, r10.z
-mov.f32f32 r12.x, r4.w
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r11.y, r11.y
-mad.f32 r0.y, c7.x, r0.y, r12.x
-mul.f r2.x, r13.x, r2.x
-mov.f32f32 r4.x, r4.x
-min.f r8.w, r8.w, r11.y
-mov.f32f32 r11.y, r9.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r12.z, r4.x
-mov.f32f32 r4.x, r18.y
-mad.f32 r2.z, r11.y, r2.z, r3.x
-mul.f r2.x, r2.x, r2.w
-min.f r5.y, r5.y, r7.z
-mov.f32f32 r7.z, r4.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r11.y, r16.x
-mov.f32f32 r7.z, r7.z
-mov.f32f32 r12.x, r2.z
-mov.f32f32 r8.z, r8.z
-add.f r2.x, r11.y, (neg)r2.x
-mad.f32 r7.z, c7.x, r7.z, c7.x
-mov.f32f32 r11.y, r12.x
-mov.f32f32 r12.w, r8.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r7.z, r7.z
-add.f r11.y, c10.w, (neg)r11.y
-mov.f32f32 r13.x, r1.w
-cmps.f.lt r13.y, r13.y, c9.z
-mov.f32f32 r7.z, r7.z
-mul.f r11.x, r11.y, r11.x
-mad.f32 r2.x, c10.z, r2.x, c7.x
-rcp r11.y, r12.w
-absneg.f r13.w, (neg)r3.x
-(ss)mov.f32f32 r12.w, r7.z
-mov.f32f32 r7.z, r11.x
-mov.f32f32 r11.x, r12.x
-mov.f32f32 r2.x, r2.x
-add.f r12.x, c10.w, r13.w
-add.f r13.x, r13.x, (neg)c1.x
-add.f r11.x, c7.z, (neg)r11.x
-sam (f32)(xyz)r16.y, r12.z, s#0, t#0
-(sy)(ss)mov.f32f32 r12.z, r16.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r12.w, r16.z
-mov.f32f32 r14.y, r16.y
-mul.f r10.x, r11.x, r10.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.x, r2.x
-(ss)mul.f r11.x, r12.x, r11.y
-mov.f32f32 r10.x, r10.x
-mad.f32 r4.x, c7.x, r4.x, c7.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r11.x, r11.x
-mov.f32f32 r11.y, r8.z
-mov.f32f32 r4.x, r4.x
-max.f r12.x, r7.z, r10.x
-mov.f32f32 r14.z, r2.x
-mov.f32f32 r2.x, r15.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r6.x, r6.x
-rcp r11.y, r11.y
-add.f r13.w, c7.z, r13.w
-mov.f32f32 r16.y, r4.x
-mov.f32f32 r4.x, r18.w
-min.f r8.w, r8.w, r12.x
-mul.f r2.x, r2.x, r6.x
-mov.f32f32 r6.x, r13.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r8.w, r8.w
-mov.f32f32 r10.w, r10.w
-mov.f32f32 r12.x, r1.y
-mad.f32 r4.x, c7.x, r4.x, c7.x
-mul.f r2.x, r2.x, r2.w
-(ss)mul.f r2.w, r6.x, r11.y
-mad.f32 r6.x, r12.x, r8.w, r10.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r10.w, r8.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.w, r2.w
-add.f r2.x, r10.w, (neg)r2.x
-add.f r6.x, r6.x, c11.z
-mov.f32f32 r16.z, r4.x
-max.f r2.w, r11.x, r2.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.x, r6.x
-mul.f r6.x, r13.x, r13.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r10.w, r9.x
-sam (f32)(xyz)r16.y, r16.y, s#0, t#0
-(sy)mov.f32f32 r11.x, r16.w
-(ss)mov.f32f32 r11.y, r16.z
-mov.f32f32 r12.x, r16.y
-mul.f r4.x, c11.y, r4.x
-sel.b32 r11.x, r12.z, r20.y, r11.x
-mov.f32f32 r12.z, r19.y
-sel.b32 r11.y, r12.w, r20.y, r11.y
-sel.b32 r12.x, r14.y, r20.y, r12.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r12.z, r12.z
-min.f r5.x, r5.x, r7.x
-min.f r7.x, r12.y, r10.z
-mad.f32 r2.x, c10.z, r2.x, c7.x
-mad.f32 r10.z, c7.x, r12.z, c7.z
-min.f r2.w, r5.y, r2.w
-max.f r5.x, r5.x, r7.x
-min.f r5.y, r7.z, r10.x
-mov.f32f32 r7.x, r10.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r7.z, r13.z
-mov.f32f32 r7.x, r7.x
-max.f r5.x, r5.x, r5.y
-mov.f32f32 r2.x, r2.x
-mad.f32 r0.z, r7.z, r2.w, r0.z
-mov.f32f32 r12.y, r7.x
-mov.f32f32 r5.y, r9.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r5.y, r5.y
-add.f r5.x, r8.w, (neg)r5.x
-mov.f32f32 r14.w, r2.x
-mov.f32f32 r2.x, r0.z
-mad.f32 r5.y, c7.x, r5.y, c7.x
-mov.f32f32 r5.x, r5.x
-add.f r7.x, r10.w, (neg)c1.y
-add.f r2.x, c10.w, (neg)r2.x
-mov.f32f32 r5.y, r5.y
-mad.f32 r5.x, c11.w, r5.x, c7.z
-sam (f32)(xy)r16.y, r14.z, s#1, t#1
-(sy)mul.f r1.x, r1.x, r16.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r5.x, r5.x
+mul.f r9.y, r4.x, r9.y
+mov.f32f32 r13.x, c8.y
+mov.f32f32 r13.y, c10.y
+sel.b32 r10.y, r10.y, r2.w, r11.w
+mov.f32f32 r11.w, c8.y
+mul.f r13.z, r15.y, r15.y
+sel.b32 r13.x, r13.x, r5.z, r13.y
+mov.f32f32 r13.y, c8.y
+sel.b32 r10.y, r11.w, r6.w, r10.y
+mad.f32 r11.w, r22.z, r22.z, r13.z
+rcp r7.y, r7.y
+add.f r10.w, c7.w, r10.w
+sel.b32 r13.x, r13.y, r9.x, r13.x
+mad.f32 r4.z, r0.y, r10.y, r4.z
+mad.f32 r10.y, r24.w, r24.w, r11.w
+(ss)mul.f r7.y, r10.w, r7.y
+mad.f32 r1.x, r1.x, r13.x, r9.y
+absneg.f r9.y, (neg)r24.w
+mov.f32f32 r10.w, c10.x
+max.f r4.z, c8.y, r4.z
+max.f r7.y, r10.x, r7.y
+cov.u32f32 r8.x, r8.x
+sel.b32 r9.y, r9.y, r5.z, r10.w
+mov.f32f32 r10.x, c8.y
+mov.f32f32 r10.w, r4.z
+rsq r10.y, r10.y
+(ss)mul.f r10.y, c7.x, r10.y
+min.f r6.y, r6.y, r7.y
+sel.b32 r7.y, r10.x, r9.x, r9.y
+mul.f r3.y, r10.w, r3.y
+add.f r9.y, r17.x, (neg)c1.x
+mul.f r4.w, r10.y, r4.w
+mad.f32 r1.x, r1.w, r7.y, r1.x
+mov.f32f32 r7.y, r11.x
+mul.f r9.y, r9.y, r9.y
+add.f r10.x, r18.y, (neg)c1.y
+max.f r1.x, c8.y, r1.x
+mov.f32f32 r10.y, r4.w
+mul.f r4.x, r22.z, r4.x
+mad.f32 r9.y, r10.x, r10.x, r9.y
+mov.f32f32 r10.x, r1.x
+add.f r10.w, r21.y, (neg)c1.z
+mul.f r4.x, r4.x, r7.z
+rcp r11.w, r7.y
+absneg.f r13.x, (neg)r2.x
+mul.f r4.y, r10.x, r4.y
+mad.f32 r9.y, r10.w, r10.w, r9.y
+add.f r4.x, r15.y, (neg)r4.x
+add.f r10.x, c7.z, r13.x
+mad.f32 r4.y, c7.x, r4.y, r10.y
+cmps.f.ne r8.x, r8.x, c8.y
+mad.f32 r13.y, c10.z, r4.x, c7.x
+mul.f r1.w, r22.z, r1.w
+sqrt r4.x, r9.y
+(ss)mov.f32f32 r9.y, c2.x
+(ss)mul.f r10.x, r10.x, r11.w
+rcp r10.y, r11.x
+add.f r10.w, c10.w, r13.x
+mul.f r1.w, r1.w, r7.z
+mad.f32 r14.x, c7.x, r15.y, c7.x
+mad.f32 r13.w, c7.x, r22.z, c7.z
+(ss)mul.f r7.z, r10.w, r10.y
+add.f r1.w, r24.w, (neg)r1.w
+rcp r9.y, r9.y
+(ss)mul.f r4.x, r4.x, r9.y
+(ss)mov.f32f32 r9.y, r18.y
+max.f r7.z, r7.z, r10.x
+mad.f32 r13.z, c10.z, r1.w, c7.x
+sam (f32)(xyz)r13.w, r13.w, s#0, t#0
+mad.f32 r10.x, c7.x, r15.y, c7.x
+mad.f32 r10.y, c7.x, r24.w, c7.x
+min.f r1.w, r6.y, r7.z
+log2 r4.x, r4.x
+(ss)mul.f r4.x, c9.y, r4.x
+mul.f r0.y, r9.y, r0.y
+rcp r1.y, r1.y
+mad.f32 r10.w, c7.x, r15.y, c7.x
+sam (f32)(xy)r15.x, r13.y, s#1, t#1
+(sy)mul.f r1.x, r1.x, r15.x
+mov.f32f32 r6.y, r1.w
+sam (f32)(xyz)r15.z, r10.x, s#0, t#0
+(sy)sel.b32 r7.z, r14.x, r5.z, r15.w
+sel.b32 r9.y, r14.y, r5.z, r16.x
mul.f r1.x, r1.x, c7.w
-mov.f32f32 r7.z, r4.z
-mov.f32f32 r12.z, r5.y
-mad.f32 r5.y, r7.x, r7.x, r6.x
-add.f r6.x, r0.z, (neg)c1.x
-cov.u32f32 r7.x, r13.y
-rcp r5.x, r5.x
-(ss)mov.f32f32 r5.x, r5.x
-mov.f32f32 r1.x, r1.x
-sqrt r7.z, r7.z
-(ss)mov.f32f32 r7.z, r7.z
-sam (f32)(xyz)r12.y, r12.y, s#0, t#0
-(sy)mov.f32f32 r8.w, r12.w
-mov.f32f32 r9.z, r12.z
-mov.f32f32 r10.x, r12.y
-mul.f r4.x, r4.x, r5.x
-mov.f32f32 r5.x, r8.w
-mov.f32f32 r8.w, r9.z
-mov.f32f32 r9.z, r10.x
-mov.f32f32 r4.x, r4.x
-sel.b32 r5.x, r5.x, r21.x, r11.x
-sel.b32 r8.w, r8.w, r21.x, r11.y
-sel.b32 r9.z, r9.z, r21.x, r12.x
-mad.f32 r1.x, r1.x, r16.z, r4.w
-mul.f r4.w, r5.x, r0.w
-cmps.f.lt r5.x, r19.y, c12.x
-mul.f r8.w, r8.w, r0.w
-mul.f r0.w, r9.z, r0.w
exp2 r4.x, r4.x
-(ss)mov.f32f32 r4.x, r4.x
+nop
+(ss)rcp r4.x, r4.x
+(ss)mul.f r4.x, c9.w, r4.x
+mad.f32 r0.z, r12.y, r6.y, r0.z
+mad.f32 r1.x, r1.x, r15.y, r4.w
+mad.f32 r11.x, c7.x, r24.w, c7.x
+add.f r4.x, c7.z, (neg)r4.x
+mul.f r4.w, r17.x, r17.x
+mov.f32f32 r10.x, r0.z
+mad.f32 r4.w, r18.y, r18.y, r4.w
+mad.f32 r13.y, c7.x, r14.w, c7.x
+mad.f32 r13.x, c7.x, r12.x, c7.z
+sam (f32)(x)r14.x, r10.w, s#2, t#2
+(sy)cmps.f.lt r10.y, r22.z, r14.x
+mad.f32 r4.w, r21.y, r21.y, r4.w
+(ss)add.f r10.w, c10.w, (neg)r10.x
+mul.f r11.x, r9.w, c10.x
+cov.u32f32 r10.y, r10.y
+absneg.f r11.w, (neg)c0.x
+sam (f32)(xyz)r14.x, r13.x, s#0, t#0
+(sy)sel.b32 r9.y, r14.z, r9.x, r9.y
+sel.b32 r7.z, r14.y, r9.x, r7.z
+cmps.f.ne r10.y, r10.y, c8.y
+rsq r4.w, r4.w
+(ss)mul.f r4.w, c7.x, r4.w
+mad.f32 r11.x, c8.z, r11.w, (neg)r11.x
+cmps.f.lt r2.z, r2.z, c8.y
+sel.b32 r1.x, r1.x, r10.y, r4.y
+sel.b32 r4.y, r13.w, r5.z, r15.z
+mul.f r4.x, r4.w, r4.x
+cov.u32f32 r2.z, r2.z
+mul.f r4.w, r9.y, r1.x
+cmps.f.lt r5.z, r7.x, c8.y
+mul.f r7.z, r7.z, r1.x
+sel.b32 r4.y, r14.x, r9.x, r4.y
+mov.f32f32 r9.x, r4.x
+cov.u32f32 r5.z, r5.z
+mad.f32 r3.y, c7.x, r3.y, r9.x
+mul.f r0.y, r0.y, r1.y
+mul.f r1.x, r4.y, r1.x
+cmps.f.ne r4.y, r5.z, c8.y
+cmps.f.ne r2.z, r2.z, c8.y
+mov.f32f32 r5.z, c8.y
+add.f r0.y, r21.y, (neg)r0.y
+sel.b32 r4.w, r4.w, r4.y, r8.y
+sel.b32 r7.z, r7.z, r4.y, r8.z
+sel.b32 r0.w, r1.x, r4.y, r0.w
+mad.f32 r8.z, c10.z, r0.y, c7.x
+sel.b32 r0.y, r5.x, r8.x, r4.w
+sel.b32 r1.x, r5.y, r8.x, r7.z
+sel.b32 r0.w, r2.y, r8.x, r0.w
+mul.f r2.y, r18.y, r3.w
+mul.f r3.w, r0.y, c14.z
+cmps.f.lt r4.y, r7.x, c8.y
+mul.f r4.w, r1.x, c14.y
+mul.f r5.x, r0.w, c14.x
+mul.f r1.y, r2.y, r1.y
+cov.u32f32 r2.y, r4.y
+sel.b32 r4.y, r5.z, r2.z, r11.x
+add.f r5.y, r10.x, (neg)c1.x
+add.f r1.y, r17.x, (neg)r1.y
+cmps.f.ne r2.y, r2.y, c8.y
+absneg.f r4.y, (neg)r4.y
+mul.f r5.y, r5.y, r5.y
+mad.f32 r8.y, c10.z, r1.y, c7.x
+sel.b32 r0.y, r3.w, r2.y, r0.y
+log2 r1.y, r3.z
+(ss)mul.f r1.y, c9.x, r1.y
+sel.b32 r1.x, r4.w, r2.y, r1.x
+sel.b32 r0.w, r5.x, r2.y, r0.w
+rcp r2.y, r4.y
+(ss)mul.f r2.y, r10.w, r2.y
+(ss)add.f r3.z, c7.z, (neg)r10.x
+sam (f32)(xy)r13.x, r8.y, s#1, t#1
+(sy)mul.f r3.w, r4.z, r13.x
+rcp r4.z, r4.y
+mad.f32 r0.x, r12.w, r1.w, r0.x
+exp2 r1.y, r1.y
+(ss)add.f r1.w, c15.y, (neg)r1.y
+mul.f r3.z, r3.z, r4.z
+mul.f r3.w, r3.w, c7.w
+mov.f32f32 r4.z, r0.x
+mul.f r1.w, r1.w, c8.w
+(ss)mul.f r1.y, r1.y, c7.z
+mad.f32 r3.w, r3.w, r13.y, r4.x
+mad.f32 r4.w, c7.x, r17.x, c7.x
+mad.f32 r5.x, c7.x, r21.y, c7.x
+add.f r1.y, r1.y, r1.w
+max.f r1.w, r2.y, r3.z
+add.f r4.x, c10.w, (neg)r4.z
+mul.f r5.z, r9.w, c10.y
+add.f r7.x, c15.y, (neg)r1.y
+add.f r7.z, c15.y, (neg)r1.y
+add.f r8.x, c15.y, (neg)r1.y
+sam (f32)(x)r13.x, r4.w, s#2, t#2
+(sy)(ss)cmps.f.lt r4.w, r18.y, r13.x
+mul.f r0.y, r7.x, r0.y
+mul.f r1.x, r7.z, r1.x
+mul.f r0.w, r8.x, r0.w
+cov.u32f32 r4.w, r4.w
+mad.f32 r3.x, c8.z, r3.x, (neg)r5.z
+mov.f32f32 r5.x, c8.y
+add.f r5.z, r4.z, (neg)c1.y
+cmps.f.ne r4.w, r4.w, c8.y
+mad.f32 r2.x, r7.y, r6.y, r2.x
+sel.b32 r3.x, r5.x, r2.z, r3.x
+mad.f32 r5.x, r5.z, r5.z, r5.y
+sel.b32 r3.y, r3.w, r4.w, r3.y
+mov.f32f32 r3.w, r17.x
+absneg.f r3.x, (neg)r3.x
+mov.f32f32 r4.w, r2.x
+cmps.f.lt r5.y, r5.w, c9.z
+mad.f32 r5.z, c7.x, r3.w, c7.x
+mad.f32 r5.w, c7.x, r10.z, c7.x
+add.f r3.w, r4.w, (neg)c1.z
+absneg.f r6.y, (abs)r4.w
+rcp r7.x, r3.x
+(ss)mul.f r4.x, r4.x, r7.x
+add.f r7.x, c7.w, (neg)r4.z
+rcp r7.y, r3.x
+mad.f32 r3.w, r3.w, r3.w, r5.x
+cmps.f.lt r5.x, c11.x, r6.y
+sam (f32)(xyz)r10.y, r5.z, s#0, t#0
+(ss)mad.f32 r5.w, c7.x, r17.x, c7.x
+mad.f32 r5.z, c7.x, r18.y, c7.z
+(ss)mul.f r6.y, r7.x, r7.y
cov.u32f32 r5.x, r5.x
-mov.f32f32 r1.x, r1.x
-mad.f32 r9.z, c7.x, r16.x, c7.x
-add.f r4.x, c7.z, r4.x
+cov.u32f32 r5.y, r5.y
+sqrt r3.w, r3.w
+mov.f32f32 r7.x, c2.x
+max.f r7.y, r4.x, r6.y
cmps.f.ne r5.x, r5.x, c8.y
-mov.f32f32 r10.x, r10.y
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r10.y, r3.z
-mov.f32f32 r10.z, r10.x
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r5.y, r5.y
-mad.f32 r7.z, c8.z, r10.y, r7.z
-mul.f r10.y, c0.x, r10.z
-mov.f32f32 r10.z, r17.z
-rcp r4.x, r4.x
-(ss)mov.f32f32 r4.x, r4.x
-mov.f32f32 r10.w, r2.z
-mov.f32f32 r11.x, r9.z
-mov.f32f32 r9.z, r10.z
-mad.f32 r11.y, c7.x, r8.x, c7.x
-mov.f32f32 r12.x, r10.w
-mov.f32f32 r7.z, r7.z
-mad.f32 r9.z, c0.y, r9.z, r10.y
-mov.f32f32 r10.y, r11.y
-absneg.f r11.y, (abs)r12.x
-mul.f r12.x, r7.z, c10.x
-mov.f32f32 r9.z, r9.z
-mov.f32f32 r12.y, r15.x
-cmps.f.lt r11.y, c11.x, r11.y
-mov.f32f32 r10.y, r10.y
-mov.f32f32 r12.x, r12.x
-mov.f32f32 r12.z, r12.y
-cov.u32f32 r12.w, r11.y
-mov.f32f32 r11.y, r10.y
-absneg.f r10.y, (neg)c0.x
-mad.f32 r9.z, c0.z, r12.z, r9.z
-mov.f32f32 r12.z, r12.w
-mov.f32f32 r12.w, r2.z
-mov.f32f32 r10.y, r10.y
-max.f r9.z, c8.y, r9.z
-cmps.f.ne r12.z, r12.z, c8.y
-mov.f32f32 r13.x, c8.y
-sam (f32)(x)r14.y, r11.x, s#2, t#2
-(sy)(ss)cmps.f.lt r11.x, r15.z, r14.y
-mov.f32f32 r9.z, r9.z
-mad.f32 r10.y, c8.z, r10.y, (neg)r12.x
-cmps.f.lt r4.z, r4.z, c8.y
-mov.f32f32 r11.y, r13.x
-mov.f32f32 r12.x, c10.x
-add.f r12.w, r12.w, (neg)c1.z
-cov.u32f32 r4.z, r4.z
-log2 r9.z, r9.z
-(ss)mul.f r9.z, c12.y, r9.z
-sel.b32 r11.y, r11.y, r12.z, r12.x
-mov.f32f32 r12.x, r1.w
-cov.u32f32 r11.x, r11.x
-mov.f32f32 r9.z, r9.z
-cmps.f.ne r4.z, r4.z, c8.y
-mov.f32f32 r13.x, c8.y
-absneg.f r12.x, (abs)r12.x
-mad.f32 r5.y, r12.w, r12.w, r5.y
-mul.f r6.x, r6.x, r6.x
-mov.f32f32 r7.w, r7.w
-exp2 r9.z, r9.z
-mov.f32f32 r10.x, r10.x
-cmps.f.lt r12.x, c11.x, r12.x
-mov.f32f32 r11.x, r11.x
-sel.b32 r10.y, r13.x, r4.z, r10.y
-mov.f32f32 r10.x, r10.x
-cov.u32f32 r12.x, r12.x
-cmps.f.ne r11.x, r11.x, c8.y
-absneg.f r10.y, (neg)r10.y
-mov.f32f32 r12.w, r10.x
-mov.f32f32 r10.x, r10.z
-mov.f32f32 r10.z, r12.x
-sel.b32 r0.y, r1.x, r11.x, r0.y
-mov.f32f32 r1.x, r15.z
-mov.f32f32 r10.x, r10.x
-cmps.f.ne r10.z, r10.z, c8.y
-mov.f32f32 r11.x, r1.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r13.x, r10.x
-mov.f32f32 r10.x, r12.y
-mov.f32f32 r11.x, r11.x
-mad.f32 r1.x, c7.x, r1.x, c7.z
-mov.f32f32 r12.x, r10.y
-mov.f32f32 r10.x, r10.x
-absneg.f r11.x, (neg)r11.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r13.y, r10.x
-mov.f32f32 r10.x, r11.x
-mov.f32f32 r1.x, r1.x
-rcp r11.x, r12.x
-(ss)mul.f r2.x, r2.x, r11.x
-mad.f32 r0.x, r7.w, r2.w, r0.x
-cmps.f.ne r7.x, r7.x, c8.y
-sqrt r5.y, r5.y
-mov.f32f32 r7.w, r10.x
-sam.3d (f32)(xyz)r12.w, r12.w, s#3, t#3
-(sy)mad.f32 r10.x, c13.z, r9.z, r13.y
-mad.f32 r11.x, c13.y, r9.z, r13.x
-(ss)mad.f32 r9.z, c13.x, r9.z, r12.w
-sel.b32 r7.w, r7.w, r10.z, r11.y
-sel.b32 r4.w, r4.w, r5.x, r10.x
-mov.f32f32 r12.x, r1.x
-mov.f32f32 r1.x, r16.x
-sel.b32 r8.w, r8.w, r5.x, r11.x
-sel.b32 r0.w, r0.w, r5.x, r9.z
-mul.f r5.x, r2.y, r7.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r7.w, c8.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r9.z, r0.z
-mad.f32 r1.x, c7.x, r1.x, c7.x
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r10.x, c10.y
-add.f r9.z, c7.z, (neg)r9.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r11.x, r10.y
-sel.b32 r7.w, r7.w, r12.z, r10.x
-mov.f32f32 r10.x, c8.y
-mov.f32f32 r1.x, r1.x
-(ss)mov.f32f32 r5.y, r5.y
-mov.f32f32 r11.y, c2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r12.y, r1.x
-mov.f32f32 r1.x, r10.x
-rcp r10.x, r11.x
-(ss)mul.f r9.z, r9.z, r10.x
-add.f r10.x, r0.x, (neg)c1.y
-mov.f32f32 r8.z, r8.z
-(ss)rcp r11.x, r11.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r9.z, r9.z
-sam (f32)(xyz)r12.w, r12.x, s#0, t#0
-(sy)(ss)mov.f32f32 r11.y, r13.y
-mov.f32f32 r12.x, r16.x
-mov.f32f32 r12.y, r13.x
-mov.f32f32 r12.w, r12.w
-sel.b32 r1.x, r1.x, r10.z, r7.w
-mov.f32f32 r7.w, r12.x
-max.f r12.x, r2.x, r9.z
-(ss)mul.f r5.y, r5.y, r11.x
-mad.f32 r6.x, r10.x, r10.x, r6.x
-mad.f32 r7.w, c7.x, r7.w, c7.x
-mad.f32 r1.x, r1.y, r1.x, r5.x
-mov.f32f32 r1.y, r12.x
-mov.f32f32 r5.x, r0.x
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r10.x, r10.w
-add.f r5.x, c10.w, (neg)r5.x
-mov.f32f32 r7.w, r7.w
-mul.f r11.x, r7.z, c10.y
-mov.f32f32 r10.x, r10.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r13.x, r7.w
-mov.f32f32 r7.w, r8.x
-absneg.f r10.x, (neg)r10.x
-mov.f32f32 r11.x, r11.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r7.w, r7.w
-mov.f32f32 r10.x, r10.x
-mov.f32f32 r12.x, c10.x
-mad.f32 r3.z, c8.z, r3.z, (neg)r11.x
-mad.f32 r7.w, c7.x, r7.w, c7.x
-mov.f32f32 r11.x, c8.y
-sel.b32 r10.x, r10.x, r12.z, r12.x
-mov.f32f32 r12.x, c8.y
-mov.f32f32 r7.w, r7.w
-sel.b32 r3.z, r11.x, r4.z, r3.z
-log2 r5.y, r5.y
-(ss)mul.f r5.y, c9.y, r5.y
-mov.f32f32 r11.x, r12.x
-mov.f32f32 r7.w, r7.w
-absneg.f r3.z, (neg)r3.z
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r13.y, r7.w
-mov.f32f32 r7.w, r11.x
-mov.f32f32 r11.x, r3.z
-mad.f32 r2.w, r8.z, r2.w, r3.x
-mov.f32f32 r3.x, r15.z
-exp2 r5.y, r5.y
-sel.b32 r7.w, r7.w, r10.z, r10.x
-(ss)rcp r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-sam (f32)(xyz)r13.x, r13.x, s#0, t#0
-(sy)mov.f32f32 r8.z, r13.z
-mov.f32f32 r10.x, r13.y
-mov.f32f32 r12.x, r13.x
-mad.f32 r1.x, r6.w, r7.w, r1.x
-sel.b32 r7.w, r11.y, r3.y, r8.z
-mad.f32 r3.x, c7.x, r3.x, c7.z
-sel.b32 r8.z, r12.y, r3.y, r10.x
-sel.b32 r3.y, r12.w, r3.y, r12.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-rcp r10.x, r11.x
-(ss)mul.f r5.x, r5.x, r10.x
-mul.f r5.y, c9.w, r5.y
-max.f r1.x, c8.y, r1.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r10.x, r0.x
-mov.f32f32 r1.x, r1.x
-(ss)mov.f32f32 r11.x, r3.x
-mov.f32f32 r3.x, r8.x
-add.f r8.x, c7.w, (neg)r10.x
-mov.f32f32 r10.x, r3.z
-mov.f32f32 r11.y, r1.x
-mad.f32 r3.x, c7.x, r3.x, c7.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mul.f r4.x, r11.y, r4.x
-rcp r10.x, r10.x
-(ss)mul.f r8.x, r8.x, r10.x
-add.f r5.y, c7.z, (neg)r5.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r8.x, r8.x
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r11.y, r3.x
-mov.f32f32 r3.x, r1.w
-(ss)mov.f32f32 r10.x, r1.w
-max.f r12.x, r5.x, r8.x
-add.f r12.y, r2.w, (neg)c1.z
-mov.f32f32 r12.w, r9.x
-mov.f32f32 r13.x, r2.w
-sam (f32)(xyz)r14.y, r11.x, s#0, t#0
-(sy)(ss)mov.f32f32 r11.x, r14.w
-mov.f32f32 r11.y, r14.z
-mov.f32f32 r13.y, r14.y
-mul.f r3.x, r3.x, r10.x
-mov.f32f32 r10.x, r11.x
-mov.f32f32 r11.x, r11.y
-mov.f32f32 r11.y, r13.y
-mov.f32f32 r13.y, r9.x
-sel.b32 r7.w, r10.x, r6.z, r7.w
-sel.b32 r8.z, r11.x, r6.z, r8.z
-sel.b32 r3.y, r11.y, r6.z, r3.y
-mov.f32f32 r6.z, r9.x
-mul.f r7.w, r7.w, r0.y
-mov.f32f32 r10.x, r4.y
-mul.f r8.z, r8.z, r0.y
-mul.f r0.y, r3.y, r0.y
-mad.f32 r3.x, r13.y, r6.z, r3.x
-cmps.f.lt r3.y, r10.x, c8.y
-mov.f32f32 r6.z, r12.x
-mad.f32 r6.x, r12.y, r12.y, r6.x
-mov.f32f32 r3.x, r3.x
-cov.u32f32 r3.y, r3.y
-mov.f32f32 r10.x, r2.z
-mov.f32f32 r11.x, r2.z
-min.f r1.y, r1.y, r6.z
-cmps.f.ne r3.y, r3.y, c8.y
-mov.f32f32 r6.z, r2.w
-mad.f32 r3.x, r10.x, r11.x, r3.x
-mov.f32f32 r6.x, r6.x
-sel.b32 r4.w, r7.w, r3.y, r4.w
-sel.b32 r7.w, r8.z, r3.y, r8.w
-sel.b32 r0.y, r0.y, r3.y, r0.w
-add.f r0.w, c10.w, (neg)r6.z
-sel.b32 r3.y, r5.w, r7.x, r4.w
-sel.b32 r4.w, r7.y, r7.x, r7.w
-sel.b32 r0.y, r3.w, r7.x, r0.y
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.w, r3.y
-mov.f32f32 r5.w, r4.w
-mov.f32f32 r6.z, r0.y
-mul.f r3.x, c7.x, r3.x
-mul.f r3.w, r3.w, c14.z
-mul.f r5.w, r5.w, c14.y
-mul.f r6.z, r6.z, c14.x
-mul.f r3.x, r3.x, r5.y
-mov.f32f32 r3.w, r3.w
-cmps.f.lt r4.y, r4.y, c8.y
-mov.f32f32 r5.y, r5.w
-mov.f32f32 r5.w, r6.z
-mov.f32f32 r3.x, r3.x
-cov.u32f32 r4.y, r4.y
-mul.f r6.z, r7.z, c10.x
-sqrt r6.x, r6.x
-(ss)mov.f32f32 r6.x, r6.x
-mov.f32f32 r7.x, r3.x
-cmps.f.ne r4.y, r4.y, c8.y
-mad.f32 r4.x, c7.x, r4.x, r7.x
-mov.f32f32 r6.z, r6.z
-absneg.f r7.x, (neg)c0.z
-sel.b32 r3.y, r3.w, r4.y, r3.y
-mov.f32f32 r3.w, r11.z
-sel.b32 r4.w, r5.y, r4.y, r4.w
-sel.b32 r0.y, r5.w, r4.y, r0.y
-mov.f32f32 r4.x, r4.x
-add.f r3.w, c7.z, (neg)r3.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r12.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r5.y, r7.x
-mov.f32f32 r5.w, c2.x
-mul.f r2.y, r4.y, r2.y
-absneg.f r4.y, (abs)r13.x
-mad.f32 r5.y, c8.z, r5.y, (neg)r6.z
-mov.f32f32 r6.z, c8.y
+sam (f32)(xyz)r13.x, r5.z, s#0, t#0
+(sy)(ss)sel.b32 r5.z, r13.z, r2.w, r10.w
+mad.f32 r8.y, c7.x, r17.y, c7.x
+mad.f32 r8.x, c7.x, r9.z, c7.z
+sel.b32 r5.w, r13.y, r2.w, r10.z
+sel.b32 r2.w, r13.x, r2.w, r10.y
+min.f r1.w, r1.w, r7.y
+add.f r7.y, c10.w, (neg)r4.w
+mul.f r7.z, r8.w, c10.x
+absneg.f r8.z, (neg)c0.z
+sam (f32)(xyz)r8.w, r8.x, s#0, t#0
+(sy)sel.b32 r5.z, r9.y, r6.w, r5.z
+sel.b32 r5.w, r9.x, r6.w, r5.w
+sel.b32 r2.w, r8.w, r6.w, r2.w
+mad.f32 r6.w, c8.z, r8.z, (neg)r7.z
+mul.f r5.z, r5.z, r3.y
+cmps.f.lt r7.z, r18.y, c12.x
+mul.f r5.w, r5.w, r3.y
+mul.f r2.w, r2.w, r3.y
+mov.f32f32 r3.y, c8.y
+cov.u32f32 r7.z, r7.z
+rcp r7.x, r7.x
+(ss)mul.f r3.w, r3.w, r7.x
+(ss)mov.f32f32 r7.x, c8.y
+sel.b32 r2.z, r3.y, r2.z, r6.w
+cmps.f.ne r3.y, r7.z, c8.y
+mad.f32 r6.w, c0.y, r7.w, r12.z
+mov.f32f32 r7.z, c10.x
+mad.f32 r6.w, c0.z, r11.y, r6.w
+absneg.f r2.z, (neg)r2.z
log2 r3.w, r3.w
-(ss)mul.f r3.w, c9.x, r3.w
-mul.f r2.y, r2.y, r11.w
-rcp r5.w, r5.w
-(ss)mul.f r5.w, r6.x, r5.w
-sel.b32 r4.z, r6.z, r4.z, r5.y
-mov.f32f32 r3.w, r3.w
+(ss)mul.f r3.w, c9.y, r3.w
+sel.b32 r7.x, r7.x, r5.x, r7.z
+max.f r6.w, c8.y, r6.w
+absneg.f r0.z, (abs)r0.z
+cmps.f.ne r5.y, r5.y, c8.y
+mad.f32 r8.y, c7.x, r10.x, c7.x
+mad.f32 r8.x, c7.x, r4.z, c7.z
mov.f32f32 r2.y, r2.y
-mov.f32f32 r5.y, r1.w
-absneg.f r4.z, (neg)r4.z
-mov.f32f32 r5.w, r5.w
-cmps.f.lt r4.y, c11.x, r4.y
-mov.f32f32 r5.y, r5.y
+rcp r7.z, r2.z
+(ss)mul.f r7.y, r7.y, r7.z
+log2 r6.w, r6.w
+(ss)mul.f r6.w, c12.y, r6.w
+add.f r7.z, c7.z, (neg)r4.w
+rcp r8.z, r2.z
+nop
exp2 r3.w, r3.w
-(ss)add.f r6.x, c15.y, (neg)r3.w
-mov.f32f32 r6.z, r4.z
-cov.u32f32 r4.y, r4.y
-add.f r2.y, r5.y, (neg)r2.y
-mul.f r5.y, r6.x, c8.w
-(ss)mul.f r3.w, r3.w, c7.z
-log2 r5.w, r5.w
-(ss)mul.f r5.w, c9.y, r5.w
-mov.f32f32 r2.y, r2.y
-rcp r6.x, r6.z
-(ss)mul.f r0.w, r0.w, r6.x
-add.f r3.w, r3.w, r5.y
-mov.f32f32 r5.y, r5.w
-mad.f32 r2.y, c10.z, r2.y, c7.x
-mov.f32f32 r0.w, r0.w
-add.f r5.w, c15.y, (neg)r3.w
-add.f r6.x, c15.y, (neg)r3.w
-(ss)add.f r6.z, c15.y, (neg)r3.w
-mov.f32f32 r2.y, r2.y
-mul.f r3.y, r5.w, r3.y
-mul.f r4.w, r6.x, r4.w
-mul.f r0.y, r6.z, r0.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r5.w, r2.w
-exp2 r5.y, r5.y
nop
-(ss)rcp r5.y, r5.y
-(ss)mov.f32f32 r5.y, r5.y
-mov.f32f32 r2.y, r2.y
-add.f r5.w, c7.z, (neg)r5.w
-mov.f32f32 r6.x, r4.z
-mul.f r5.y, c9.w, r5.y
-mov.f32f32 r7.x, r2.y
-mov.f32f32 r2.y, r12.w
-mov.f32f32 r6.z, r6.w
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r4.y, r4.y
-rcp r6.x, r6.x
-(ss)mul.f r5.w, r5.w, r6.x
-mul.f r2.y, r2.y, r6.z
-add.f r5.y, c7.z, (neg)r5.y
-cmps.f.ne r4.y, r4.y, c8.y
-mov.f32f32 r5.w, r5.w
-mul.f r2.y, r2.y, r11.w
-mov.f32f32 r5.y, r5.y
-(ss)mul.f r6.x, r0.z, r0.z
-max.f r6.z, r0.w, r5.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r6.w, r2.z
-mad.f32 r6.x, r0.x, r0.x, r6.x
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r7.y, c8.y
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r6.x, r6.x
-min.f r1.y, r1.y, r6.z
-mad.f32 r6.x, r2.w, r2.w, r6.x
-add.f r2.y, r6.w, (neg)r2.y
-mov.f32f32 r6.z, r7.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r6.w, c10.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.y, r0.x
-mov.f32f32 r7.z, r3.z
-rsq r6.x, r6.x
-(ss)mov.f32f32 r6.x, r6.x
-mad.f32 r2.y, c10.z, r2.y, c7.x
-sel.b32 r6.z, r6.z, r4.y, r6.w
-mad.f32 r6.w, r7.z, r1.y, r7.y
-absneg.f r7.y, (abs)r0.z
-mov.f32f32 r2.y, r2.y
-mul.f r6.x, c7.x, r6.x
-mov.f32f32 r6.w, r6.w
-cmps.f.lt r7.y, c11.x, r7.y
-mov.f32f32 r2.y, r2.y
-mul.f r5.y, r6.x, r5.y
-add.f r6.x, r6.w, c11.z
-cov.u32f32 r6.w, r7.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r5.y, r5.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r6.w, r6.w
-mov.f32f32 r7.y, r2.y
-mov.f32f32 r2.y, r5.y
-mov.f32f32 r5.y, r5.y
-mul.f r6.x, c11.y, r6.x
-cmps.f.ne r6.w, r6.w, c8.y
-mov.f32f32 r7.z, r0.z
-min.f r2.x, r2.x, r9.z
-sam (f32)(xy)r11.x, r7.x, s#1, t#1
-(sy)mul.f r1.x, r1.x, r11.x
-mov.f32f32 r6.x, r6.x
-min.f r5.x, r5.x, r8.x
-(ss)absneg.f r7.x, (neg)r7.z
-mul.f r1.x, r1.x, c7.w
-cmps.f.lt r7.y, r9.w, c9.z
-max.f r2.x, r2.x, r5.x
-min.f r0.w, r0.w, r5.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r5.x, r7.x
-cov.u32f32 r5.w, r7.y
-max.f r0.w, r2.x, r0.w
-mad.f32 r1.x, r1.x, r11.y, r3.x
-mov.f32f32 r2.x, r5.x
-cmps.f.ne r3.x, r5.w, c8.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r5.x, r1.w
-sel.b32 r2.x, r2.x, r6.w, r6.z
-add.f r0.w, r1.y, (neg)r0.w
-mov.f32f32 r1.y, r0.x
-mad.f32 r5.x, c7.x, r5.x, c7.x
-mul.f r2.x, r10.y, r2.x
-mov.f32f32 r5.w, c8.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r6.z, r10.y
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r7.x, r0.x
-mov.f32f32 r5.x, r5.x
-mad.f32 r0.w, c11.w, r0.w, c7.z
-mov.f32f32 r7.y, c10.y
-mul.f r1.y, r1.y, r6.z
-mov.f32f32 r7.z, r5.x
-mov.f32f32 r5.x, r2.z
-mov.f32f32 r0.w, r0.w
-sel.b32 r5.w, r5.w, r4.y, r7.y
-mov.f32f32 r6.z, c8.y
-mad.f32 r5.x, c7.x, r5.x, c7.x
-mov.f32f32 r7.y, r3.z
-mov.f32f32 r7.w, r4.z
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r5.x, r5.x
-rcp r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mul.f r7.x, r7.x, r7.w
-mov.f32f32 r6.z, r6.z
-mov.f32f32 r5.x, r5.x
-mul.f r0.w, r6.x, r0.w
-rcp r6.x, r7.y
-(ss)mul.f r1.y, r1.y, r6.x
-sel.b32 r5.w, r6.z, r6.w, r5.w
-mov.f32f32 r7.w, r5.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r5.x, r0.z
-mad.f32 r2.x, r3.z, r5.w, r2.x
-mul.f r3.z, r7.x, r6.x
-mov.f32f32 r5.w, r9.x
-sam (f32)(x)r7.x, r7.z, s#2, t#2
-mov.f32f32 r6.x, r9.x
-exp2 r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r6.z, r2.w
-(sy)cmps.f.lt r6.x, r6.x, r7.x
-add.f r0.w, c7.z, r0.w
-add.f r1.y, r5.x, (neg)r1.y
-mov.f32f32 r5.x, r6.z
-cov.u32f32 r6.x, r6.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-absneg.f r5.x, (neg)r5.x
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r3.z, r3.z
-mad.f32 r1.y, c10.z, r1.y, c7.x
-mov.f32f32 r5.x, r5.x
-cmps.f.ne r6.x, r6.x, c8.y
-rcp r0.w, r0.w
-(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r6.z, c10.x
-mov.f32f32 r1.y, r1.y
-sel.b32 r1.x, r1.x, r6.x, r4.x
-mov.f32f32 r4.x, r5.w
-sel.b32 r5.x, r5.x, r4.y, r6.z
-mov.f32f32 r5.w, c8.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r6.x, r2.w
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r1.y, r1.y
-mad.f32 r4.x, c7.x, r4.x, c7.z
-add.f r3.z, r6.x, (neg)r3.z
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r6.x, r0.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r7.x, r1.y
-mov.f32f32 r1.y, r3.z
-sel.b32 r3.z, r5.w, r6.w, r5.x
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.x, r6.x
-mad.f32 r1.y, c10.z, r1.y, c7.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.w, r4.x
-mad.f32 r2.x, r4.z, r3.z, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r3.z, r1.w
-mad.f32 r4.x, c7.x, r5.x, c7.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r4.z, r9.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r4.x, r4.x
-mad.f32 r3.z, c7.x, r3.z, c7.x
-max.f r2.x, c8.y, r2.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r4.x, r4.x
+(ss)rcp r3.w, r3.w
+(ss)mul.f r3.w, c9.w, r3.w
+mul.f r7.z, r7.z, r8.z
+cmps.f.lt r0.z, c11.x, r0.z
+exp2 r6.w, r6.w
+mov.f32f32 r8.z, r11.z
+mov.f32f32 r8.w, r7.w
+mov.f32f32 r9.x, r11.y
+max.f r7.w, r7.y, r7.z
+add.f r3.w, c7.z, (neg)r3.w
+mul.f r9.y, r10.x, r10.x
+cov.u32f32 r0.z, r0.z
+min.f r1.w, r1.w, r7.w
+mad.f32 r7.w, r4.z, r4.z, r9.y
+sam.3d (f32)(xyz)r8.z, r8.z, s#3, t#3
+(sy)(ss)mad.f32 r9.x, c13.z, r6.w, r9.x
+mad.f32 r8.w, c13.y, r6.w, r8.w
+mad.f32 r6.w, c13.x, r6.w, r8.z
+mov.f32f32 r8.z, r1.w
+sel.b32 r5.z, r5.z, r3.y, r9.x
mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r7.y, r1.y
-mov.f32f32 r7.z, r4.x
-mov.f32f32 r1.y, r3.z
-mov.f32f32 r3.z, r2.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r4.x, r0.z
-mov.f32f32 r6.x, r1.y
-mul.f r0.w, r3.z, r0.w
-sam (f32)(xy)r9.z, r7.x, s#1, t#1
-(sy)mul.f r1.y, r2.x, r9.z
-mov.f32f32 r2.x, r4.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.z, r4.z
-mad.f32 r4.x, c7.x, r0.z, c7.x
-sam (f32)(xyz)r11.x, r5.w, s#0, t#0
-(sy)mov.f32f32 r4.z, r11.z
-mad.f32 r1.w, c7.x, r1.w, c7.x
-mov.f32f32 r5.x, r11.y
-(ss)mov.f32f32 r5.w, r11.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r1.w
-mul.f r1.y, r1.y, c7.w
-mad.f32 r2.x, c7.x, r2.x, c7.x
-mad.f32 r3.z, c7.x, r3.z, c7.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r0.w, c7.x, r0.w, r5.y
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r1.w, r10.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.y, r1.y, r9.w, r2.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.y, r3.z
+sel.b32 r5.w, r5.w, r3.y, r8.w
+sel.b32 r2.w, r2.w, r3.y, r6.w
+mad.f32 r3.y, r4.w, r4.w, r7.w
+min.f r2.y, r2.y, r3.z
mov.f32f32 r3.z, r4.x
-mov.f32f32 r7.w, r2.x
-mad.f32 r1.w, c7.x, r1.w, c7.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.z, r0.z
-sam (f32)(xyz)r9.z, r7.z, s#0, t#0
-mov.f32f32 r3.z, r0.x
-(ss)mov.f32f32 r7.z, r2.x
-mov.f32f32 r1.w, r1.w
-mad.f32 r2.x, c7.x, r2.w, c7.x
-(sy)mov.f32f32 r4.x, r10.x
-mov.f32f32 r5.y, r9.w
-mov.f32f32 r7.y, r1.w
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r9.z
-mov.f32f32 r8.z, r2.y
-mov.f32f32 r2.y, r2.z
-mad.f32 r0.z, c7.x, r0.z, c7.x
-sam (f32)(xyz)r9.z, r7.x, s#0, t#0
-(sy)mov.f32f32 r2.z, r10.x
-mov.f32f32 r6.x, r9.w
-mov.f32f32 r6.z, r9.z
-mov.f32f32 r1.w, r1.w
-sel.b32 r2.z, r4.z, r12.z, r2.z
-mov.f32f32 r2.y, r2.y
-sel.b32 r4.z, r5.x, r12.z, r6.x
-sel.b32 r5.x, r5.w, r12.z, r6.z
-mov.f32f32 r7.w, r1.w
-mad.f32 r1.w, c7.x, r2.y, c7.x
-mov.f32f32 r0.z, r0.z
-mad.f32 r2.y, c7.x, r3.z, c7.z
-mov.f32f32 r3.z, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam (f32)(x)r7.x, r7.z, s#2, t#2
-mov.f32f32 r2.y, r2.y
-(sy)cmps.f.lt r0.x, r0.x, r7.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r5.w, r0.z
-mov.f32f32 r0.z, r3.z
-cov.u32f32 r0.x, r0.x
-mov.f32f32 r8.w, r1.w
-mov.f32f32 r1.w, r2.y
-mad.f32 r0.z, c7.x, r0.z, c7.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r2.w
-mov.f32f32 r7.x, r1.w
-mov.f32f32 r0.z, r0.z
-(ss)nop
-sam (f32)(xyz)r7.y, r8.z, s#0, t#0
-(sy)mov.f32f32 r1.w, r7.w
-mov.f32f32 r2.w, r7.z
-mov.f32f32 r3.z, r7.y
-cmps.f.ne r0.x, r0.x, c8.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.z, r3.z
-sel.b32 r0.x, r1.y, r0.x, r0.w
-sel.b32 r0.w, r1.w, r10.z, r2.z
-sel.b32 r1.y, r2.w, r10.z, r4.z
-sel.b32 r1.w, r3.z, r10.z, r5.x
-mov.f32f32 r0.z, r0.z
-mul.f r0.w, r0.w, r1.x
-cmps.f.lt r2.z, r9.x, c12.x
-mul.f r1.y, r1.y, r1.x
-mul.f r1.x, r1.w, r1.x
-mov.f32f32 r6.x, r0.z
-cov.u32f32 r0.z, r2.z
-mad.f32 r1.w, c7.x, r2.y, c7.x
-mov.f32f32 r2.y, r14.x
-mov.f32f32 r2.z, r14.x
+mov.f32f32 r4.x, r6.y
cmps.f.ne r0.z, r0.z, c8.y
-mov.f32f32 r1.w, r1.w
-mul.f r2.y, c0.x, r2.y
-mov.f32f32 r2.w, r15.w
-sam (f32)(xyz)r7.y, r5.w, s#0, t#0
-(sy)mov.f32f32 r3.z, r7.z
-mov.f32f32 r4.z, r7.y
-mov.f32f32 r5.x, r7.w
-(ss)mov.f32f32 r5.w, r2.w
-sel.b32 r3.z, r5.y, r4.y, r3.z
-sel.b32 r2.x, r2.x, r4.y, r4.z
-sel.b32 r4.x, r4.x, r4.y, r5.x
-mad.f32 r2.y, c0.y, r5.w, r2.y
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r4.y, r9.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.y, r1.w
-mov.f32f32 r7.z, r2.z
-mov.f32f32 r1.w, r4.y
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r2.w, r4.y
-cmps.f.lt r4.y, r8.y, c8.y
-mad.f32 r1.w, c0.z, r1.w, r2.y
-sam (f32)(xyz)r8.z, r7.x, s#0, t#0
-(sy)mov.f32f32 r2.y, r8.z
-mov.f32f32 r4.z, r9.x
-mov.f32f32 r5.x, r8.w
-max.f r1.w, c8.y, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r1.w, r1.w
-sel.b32 r2.x, r2.y, r6.w, r2.x
-sel.b32 r2.y, r4.z, r6.w, r4.x
-sel.b32 r3.z, r5.x, r6.w, r3.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.w, r2.w
-cov.u32f32 r4.x, r4.y
-log2 r1.w, r1.w
-(ss)mul.f r1.w, c12.y, r1.w
-mul.f r2.y, r2.y, r0.x
-mul.f r3.z, r3.z, r0.x
-mul.f r0.x, r2.x, r0.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r7.w, r2.z
-mov.f32f32 r8.x, r2.w
-mov.f32f32 r2.x, r4.x
-mov.f32f32 r2.z, r8.y
-mov.f32f32 r2.w, c7.z
-nop
+absneg.f r6.y, (neg)r10.x
+mad.f32 r1.w, r3.x, r1.w, r4.z
+min.f r3.z, r3.z, r4.x
+rsq r3.y, r3.y
+(ss)mul.f r3.y, c7.x, r3.y
+sel.b32 r4.x, r6.y, r0.z, r7.x
+add.f r1.w, r1.w, c11.z
+max.f r2.y, r2.y, r3.z
+mov.f32f32 r3.z, r7.y
+mov.f32f32 r6.y, r7.z
+mul.f r3.y, r3.y, r3.w
+mul.f r3.w, r4.y, r4.x
+mov.f32f32 r4.x, c8.y
+min.f r3.z, r3.z, r6.y
+mov.f32f32 r6.y, r3.y
+mov.f32f32 r6.w, c10.y
+mul.f r1.w, c11.y, r1.w
+max.f r2.y, r2.y, r3.z
+sam (f32)(xyz)r7.x, r8.x, s#0, t#0
+absneg.f r3.z, (neg)r4.w
+sel.b32 r4.x, r4.x, r5.x, r6.w
+mov.f32f32 r6.w, c8.y
+add.f r2.y, r8.z, (neg)r2.y
+mad.f32 r7.w, c7.x, r10.x, c7.x
+(ss)mov.f32f32 r8.y, c10.x
+sel.b32 r4.x, r6.w, r0.z, r4.x
+mad.f32 r2.y, c11.w, r2.y, c7.z
+mad.f32 r8.x, c7.x, r4.w, c7.x
+sel.b32 r3.z, r3.z, r5.x, r8.y
+mad.f32 r8.y, c7.x, r10.x, c7.x
+mul.f r4.y, r4.z, r4.y
+rcp r6.w, r3.x
+(ss)mad.f32 r3.x, r3.x, r4.x, r3.w
+mov.f32f32 r3.w, c8.y
+rcp r2.y, r2.y
+(ss)mul.f r1.w, r1.w, r2.y
+sam (f32)(xyz)r8.z, r7.w, s#0, t#0
+(sy)(ss)sel.b32 r2.y, r7.x, r5.x, r8.z
+sel.b32 r4.x, r7.z, r5.x, r9.x
+sel.b32 r5.x, r7.y, r5.x, r8.w
+sel.b32 r3.z, r3.w, r0.z, r3.z
+mad.f32 r7.y, c7.x, r2.x, c7.x
+mad.f32 r8.z, c7.x, r4.w, c7.x
exp2 r1.w, r1.w
-cmps.f.ne r2.x, r2.x, c8.y
-cmps.f.lt r2.z, r2.z, c8.y
-sam.3d (f32)(xyz)r6.z, r7.z, s#3, t#3
-(sy)(ss)mad.f32 r4.x, c13.z, r1.w, r7.x
-mad.f32 r4.y, c13.y, r1.w, r6.w
-(ss)mad.f32 r1.w, c13.x, r1.w, r6.z
-cov.u32f32 r2.z, r2.z
-sel.b32 r0.w, r0.w, r0.z, r4.x
-sel.b32 r1.y, r1.y, r0.z, r4.y
-sel.b32 r0.z, r1.x, r0.z, r1.w
-cmps.f.ne r1.x, r2.z, c8.y
-mov.f32f32 r1.w, r2.w
-(rpt1)nop
-sel.b32 r0.w, r2.y, r1.x, r0.w
-sel.b32 r1.y, r3.z, r1.x, r1.y
-sel.b32 r0.x, r0.x, r1.x, r0.z
+(ss)add.f r1.w, c7.z, r1.w
+mad.f32 r2.x, r2.z, r3.z, r3.x
+mad.f32 r7.x, c7.x, r0.x, c7.z
+mul.f r0.x, r4.y, r6.w
+mul.f r2.z, r4.z, r2.z
+cmps.f.lt r3.x, r12.w, c8.y
+max.f r2.x, c8.y, r2.x
+rcp r1.w, r1.w
+add.f r0.x, r10.x, (neg)r0.x
+sam (f32)(x)r7.z, r8.y, s#2, t#2
+(sy)cmps.f.lt r3.z, r4.z, r7.z
+mul.f r2.z, r2.z, r6.w
+mov.f32f32 r3.w, r2.x
+mad.f32 r4.y, c10.z, r0.x, c7.x
+sam (f32)(xyz)r6.w, r7.x, s#0, t#0
+(sy)sel.b32 r0.x, r7.x, r0.z, r5.x
+add.f r2.z, r4.w, (neg)r2.z
+(ss)mul.f r1.w, r3.w, r1.w
+sel.b32 r3.w, r7.y, r0.z, r4.x
+cov.u32f32 r3.z, r3.z
+mad.f32 r4.z, c10.z, r2.z, c7.x
+mad.f32 r2.z, c7.x, r1.w, r6.y
+sel.b32 r0.z, r6.w, r0.z, r2.y
+cmps.f.ne r2.y, r3.z, c8.y
+cov.u32f32 r3.x, r3.x
+cmps.f.lt r3.z, r12.w, c8.y
+mov.f32f32 r1.w, c7.z
+sam (f32)(xy)r4.x, r4.y, s#1, t#1
+(sy)mul.f r2.x, r2.x, r4.x
+cmps.f.ne r3.x, r3.x, c8.y
+cov.u32f32 r3.z, r3.z
nop
-sel.b32 r0.z, r6.y, r3.x, r0.w
-sel.b32 r0.w, r5.z, r3.x, r1.y
-sel.b32 r0.x, r1.z, r3.x, r0.x
+mul.f r2.x, r2.x, c7.w
+(rpt2)nop
+mad.f32 r2.x, r2.x, r4.y, r3.y
+cmps.f.ne r3.y, r3.z, c8.y
+(rpt1)nop
+sel.b32 r2.x, r2.x, r2.y, r2.z
+(rpt2)nop
+mul.f r2.y, r3.w, r2.x
+mul.f r0.x, r0.x, r2.x
+mul.f r0.z, r0.z, r2.x
nop
-mov.f32f32 r1.x, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.z, r0.x
+sel.b32 r2.x, r2.y, r3.y, r5.z
+sel.b32 r0.x, r0.x, r3.y, r5.w
+sel.b32 r0.z, r0.z, r3.y, r2.w
nop
-mul.f r1.x, r1.x, c14.z
-mul.f r1.y, r1.y, c14.y
-mul.f r1.z, r1.z, c14.x
+sel.b32 r2.x, r6.x, r5.y, r2.x
+sel.b32 r0.x, r6.z, r5.y, r0.x
+sel.b32 r0.z, r1.z, r5.y, r0.z
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r1.z
+mul.f r1.z, r2.x, c14.z
+mul.f r2.y, r0.x, c14.y
+mul.f r2.z, r0.z, c14.x
nop
-sel.b32 r0.z, r1.x, r2.x, r0.z
-sel.b32 r0.w, r1.y, r2.x, r0.w
-sel.b32 r0.x, r1.z, r2.x, r0.x
+sel.b32 r1.z, r1.z, r3.x, r2.x
+sel.b32 r0.x, r2.y, r3.x, r0.x
+sel.b32 r0.z, r2.z, r3.x, r0.z
nop
-mul.f r0.z, r3.w, r0.z
-mul.f r0.w, r3.w, r0.w
-mul.f r0.x, r3.w, r0.x
+mul.f r1.z, r1.y, r1.z
+mul.f r0.x, r1.y, r0.x
+mul.f r0.z, r1.y, r0.z
nop
-add.f r0.z, r0.z, r3.y
-add.f r0.w, r0.w, r4.w
-add.f r0.x, r0.x, r0.y
-nop
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+add.f r1.z, r1.z, r0.y
+add.f r1.y, r0.x, r1.x
+add.f r1.x, r0.z, r0.w
end
nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 2323 instructions, 0 half, 23 full
+; FRAG: 1348 instructions, 0 half, 27 full
diff --git a/reference/xa-composite-fs.asm b/reference/xa-composite-fs.asm
index 4d9b7bc..99d1410 100644
--- a/reference/xa-composite-fs.asm
+++ b/reference/xa-composite-fs.asm
@@ -6,47 +6,40 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
bary.f r0.w, 1, r0.x
bary.f (ei)r0.x, 7, r0.x
nop
cmps.f.lt r0.y, r0.z, c0.y
cmps.f.lt r1.x, r0.w, c0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.z, r0.w
+cmps.f.gt r1.y, r0.w, c0.x
+cmps.f.gt r1.z, r0.z, c0.x
cov.u32f32 r0.y, r0.y
-cmps.f.gt r0.z, r0.z, c0.x
cov.u32f32 r1.x, r1.x
-cmps.f.gt r0.w, r0.w, c0.x
-nop
-cov.u32f32 r0.z, r0.z
-sam (f32)(xyzw)r1.y, r1.y, s#0, t#0
-nop
-cov.u32f32 r0.w, r0.w
-nop
-min.f r0.y, r0.y, r0.z
-nop
-min.f r0.z, r1.x, r0.w
+cov.u32f32 r1.y, r1.y
+cov.u32f32 r1.z, r1.z
+sam (f32)(xyzw)r1.w, r0.z, s#0, t#0
+(rpt2)nop
+min.f r0.y, r0.y, r1.z
+(ss)min.f r0.z, r1.x, r1.y
(rpt2)nop
min.f r0.w, r0.y, r0.z
min.f r1.x, r0.y, r0.z
-min.f r2.y, r0.y, r0.z
+min.f r1.y, r0.y, r0.z
min.f r0.y, r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r1.x, r2.y
-mov.f32f32 r0.y, r0.y
-(sy)mul.f r0.z, r2.x, r0.z
-mul.f r0.w, r1.w, r0.w
-mul.f r1.x, r1.z, r1.x
-mul.f r0.y, r1.y, r0.y
+(sy)mul.f r0.z, r2.z, r0.w
+mul.f r0.w, r2.y, r1.x
+mul.f r1.x, r2.x, r1.y
+mul.f r0.y, r1.w, r0.y
mul.f r1.w, r0.z, r0.x
-(ss)mul.f r1.z, r0.w, r0.x
+mul.f r1.z, r0.w, r0.x
mul.f r1.y, r1.x, r0.x
mul.f r1.x, r0.y, r0.x
end
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1) r63.y (1:0,cm=f,il=12,b=1)
-; FRAG: 41 instructions, 0 half, 3 full
+; FRAG: 34 instructions, 0 half, 3 full
diff --git a/reference/xon1.asm b/reference/xon1.asm
index b4aedcd..1867490 100644
--- a/reference/xon1.asm
+++ b/reference/xon1.asm
@@ -6,211 +6,146 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c10.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000
+@const(c11.x) 0x3f800000, 0x3e800000, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 0, r0.x
-bary.f r1.x, 4, r0.x
-bary.f r1.y, 3, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.w
-mul.f r2.x, r1.x, r1.x
-bary.f r2.y, 5, r0.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r3.x, r1.w
-bary.f r1.w, 1, r0.x
-mad.f32 r2.x, r2.y, r2.y, r2.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r3.y, r1.w
-mov.f32f32 r2.x, r2.x
-bary.f r3.z, 6, r0.x
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r3.y, r3.y
-sam (f32)(xyz)r4.x, r2.z, s#4, t#4
-(sy)mad.f32 r1.z, c10.y, r4.x, c10.z
-mad.f32 r2.x, r3.z, r3.z, r2.x
-(ss)mov.f32f32 r2.z, r1.w
-mad.f32 r2.w, c10.y, r4.y, c10.z
-mov.f32f32 r1.z, r1.z
+bary.f r0.w, 3, r0.x
+bary.f r1.x, 0, r0.x
+bary.f r1.z, 4, r0.x
+mov.f32f32 r1.w, r0.z
+mov.f32f32 r2.x, r0.w
+mov.f32f32 r2.y, r1.x
+bary.f r1.y, 1, r0.x
+sam (f32)(xyz)r2.z, r0.z, s#4, t#4
+(sy)(ss)mad.f32 r0.z, c10.y, r2.z, c10.z
+mov.f32f32 r3.y, r1.x
+bary.f r0.w, 16, r0.x
+mul.f r2.z, r1.z, r1.z
+mov.f32f32 r3.w, r0.z
bary.f r4.x, 12, r0.x
-sam (f32)(xyz)r4.w, r3.x, s#0, t#0
-(sy)(ss)add.f r3.x, r4.w, c10.x
-bary.f r3.y, 16, r0.x
+mov.f32f32 r3.z, r1.y
bary.f r4.y, 8, r0.x
-mul.f r4.x, r1.z, r4.x
-mov.f32f32 r2.w, r2.w
-bary.f r4.w, 13, r0.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.y, r1.z, r3.y
-mul.f r1.z, r1.z, r4.y
-mad.f32 r4.x, r2.w, r4.w, r4.x
-mul.f r4.y, r3.x, r3.x
-add.f r4.w, r5.x, c10.x
-bary.f r5.x, 17, r0.x
-mov.f32f32 r4.x, r4.x
-mad.f32 r4.z, c10.y, r4.z, c10.z
-mov.f32f32 r4.w, r4.w
-mad.f32 r3.y, r2.w, r5.x, r3.y
+mul.f r0.z, r0.z, r0.w
+mul.f r0.w, r3.w, r4.x
+mad.f32 r2.w, c10.y, r2.w, c10.z
+mul.f r3.w, r3.w, r4.y
+bary.f r4.x, 17, r0.x
+sam (f32)(xyz)r4.y, r3.y, s#0, t#0
+(sy)(ss)add.f r3.y, r4.y, c10.x
+mov.f32f32 r3.z, r2.w
+bary.f r4.y, 13, r0.x
bary.f r5.x, 9, r0.x
-mov.f32f32 r4.z, r4.z
-bary.f r5.z, 14, r0.x
-mad.f32 r4.y, r4.w, r4.w, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r1.z, r2.w, r5.x, r1.z
-mad.f32 r2.w, r4.z, r5.z, r4.x
-mov.f32f32 r4.x, r4.y
-add.f r4.y, r5.y, c10.x
-bary.f r5.x, 18, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r4.y
-mad.f32 r3.y, r4.z, r5.x, r3.y
-rsq r2.x, r2.x
-(ss)mov.f32f32 r2.x, r2.x
-mad.f32 r0.x, r4.z, r0.x, r1.z
-mad.f32 r1.z, r0.y, r0.y, r4.x
-mov.f32f32 r3.y, r3.y
-mul.f r1.x, r1.x, r2.x
-mul.f r4.x, r0.x, r0.x
-mul.f r2.y, r2.y, r2.x
-mad.f32 r4.x, r2.w, r2.w, r4.x
-mov.f32f32 r3.y, r3.y
-rsq r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.x, r3.y, r3.y, r4.x
-mul.f r3.x, r3.x, r1.z
-mul.f r4.y, r4.w, r1.z
-mul.f r0.y, r0.y, r1.z
-(rpt2)nop
-rsq r1.z, r4.x
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r0.y, r0.y
-mul.f r0.x, r0.x, r1.z
-mul.f r3.y, r3.y, r1.z
-mul.f r1.z, r2.w, r1.z
-mul.f r2.x, r3.z, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r3.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-mul.f r3.y, r3.x, r0.x
-mul.f r3.z, r3.x, r0.x
-mad.f32 r3.y, r4.y, r1.z, r3.y
-mad.f32 r3.z, r4.y, r1.z, r3.z
-max.f r4.z, c11.y, r2.w
-(ss)mov.f32f32 r4.x, r2.z
-mov.f32f32 r2.z, r3.y
-mov.f32f32 r3.y, r3.z
-mad.f32 r2.z, r0.y, r2.w, r2.z
-mad.f32 r3.y, r0.y, r2.w, r3.y
+mad.f32 r0.z, r2.w, r4.x, r0.z
+bary.f r2.w, 5, r0.x
+mad.f32 r0.w, r3.z, r4.y, r0.w
+mad.f32 r3.x, c10.y, r3.x, c10.z
+mov.f32f32 r4.x, r3.y
+mad.f32 r3.z, r3.z, r5.x, r3.w
+bary.f r3.w, 18, r0.x
+mov.f32f32 r4.y, r3.x
+bary.f r5.x, 14, r0.x
+mul.f r3.y, r3.y, r4.x
+add.f r4.z, r4.z, c10.x
+bary.f r5.y, 10, r0.x
+mad.f32 r0.w, r4.y, r5.x, r0.w
+mad.f32 r0.z, r3.x, r3.w, r0.z
+mad.f32 r2.z, r2.w, r2.w, r2.z
+bary.f (ei)r0.x, 6, r0.x
+mov.f32f32 r0.y, r0.w
+mad.f32 r3.x, r4.y, r5.y, r3.z
mov.f32f32 r3.z, r4.z
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r3.w, r0.z
+mad.f32 r4.y, r0.x, r0.x, r2.z
+mul.f r2.z, r3.x, r3.x
+mad.f32 r3.y, r4.z, r3.z, r3.y
+mad.f32 r0.w, r0.w, r0.y, r2.z
+mov.f32f32 r3.w, r3.w
+add.f r4.z, r4.w, c10.x
+mov.f32f32 r2.z, r1.y
+rsq r4.y, r4.y
+(ss)mov.f32f32 r4.w, r4.y
+mad.f32 r0.w, r3.w, r3.w, r0.w
+mov.f32f32 r3.w, r4.z
+mul.f r0.x, r0.x, r4.y
+sam (f32)(xyz)r5.x, r1.w, s#3, t#3
+mul.f r1.z, r1.z, r4.w
+(ss)mul.f r2.x, r2.w, r4.w
+sam (f32)(xyzw)r5.w, r2.y, s#1, t#1
+(ss)mad.f32 r2.y, r3.w, r3.w, r3.y
+(sy)mul.f r1.w, r6.z, c9.x
+rsq r0.w, r0.w
+(ss)mov.f32f32 r2.z, r0.w
+mul.f r0.z, r0.z, r0.w
+(ss)mul.f r0.w, r6.y, c5.z
+mul.f r2.w, r6.y, c6.z
+mul.f r3.x, r3.x, r2.z
+rsq r2.y, r2.y
+(ss)mov.f32f32 r3.y, r2.y
+mov.f32f32 r3.w, r0.z
+mul.f r0.y, r0.y, r2.z
+(ss)mul.f r2.y, r4.z, r2.y
+mul.f r2.z, r4.x, r3.y
+mov.f32f32 r4.x, r3.x
+max.f r4.y, c11.y, r3.w
+mov.f32f32 r4.z, r0.y
mul.f r3.x, r2.z, r3.x
-mul.f r4.y, r2.z, r4.y
-mul.f r0.y, r2.z, r0.y
-mov.f32f32 r2.z, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r4.y
-mov.f32f32 r0.y, r0.y
-max.f r2.z, r2.z, c10.w
-mul.f r3.x, c10.y, r3.x
-mul.f r3.y, c10.y, r3.y
-mul.f r0.y, c10.y, r0.y
+mul.f r3.y, r3.z, r3.y
mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyzw)r3.w, r3.w, s#1, t#1
-(sy)mul.f r4.w, r4.y, c6.z
-add.f r0.x, r0.x, (neg)r3.x
-add.f r1.z, r1.z, (neg)r3.y
-add.f r0.y, r2.w, (neg)r0.y
-mul.f r2.w, r4.w, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-mul.f r0.x, r0.x, r1.x
-mul.f r1.x, r4.x, c6.y
-mad.f32 r0.x, r1.z, r2.y, r0.x
-mul.f r1.z, r3.w, c6.x
-rcp r2.y, r3.z
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r1.x, r2.z
-mad.f32 r0.x, r0.y, r2.x, r0.x
-mul.f r0.y, r1.z, r2.z
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.x, r0.z
+mov.f32f32 r3.z, r2.y
+mul.f r4.w, r6.x, c5.y
+mad.f32 r0.y, r3.y, r0.y, r3.x
+mul.f r3.x, r2.z, r4.x
+mad.f32 r0.y, r2.y, r3.w, r0.y
+mov.f32f32 r2.y, r3.y
+rcp r3.y, r4.y
+(ss)mov.f32f32 r4.y, r3.y
+mul.f r3.y, r5.x, r3.y
+mul.f r2.z, r0.y, r2.z
+mad.f32 r3.x, r2.y, r4.z, r3.x
+mul.f r2.y, r0.y, r2.y
+mul.f r0.y, r0.y, r3.z
+mul.f r2.z, c10.y, r2.z
+mad.f32 r3.x, r3.z, r3.w, r3.x
+mul.f r2.y, c10.y, r2.y
+mul.f r0.y, c10.y, r0.y
+add.f r2.z, r4.x, (neg)r2.z
+max.f r3.x, r3.x, c10.w
+add.f r2.y, r4.z, (neg)r2.y
+mul.f r3.z, r5.w, c6.x
+mul.f r1.z, r2.z, r1.z
+mov.f32f32 r2.z, r3.x
+mad.f32 r1.z, r2.y, r2.x, r1.z
+add.f r0.y, r0.z, (neg)r0.y
+mul.f r0.z, r6.x, c6.y
+mul.f r2.x, r3.z, r3.x
+mul.f r2.y, r5.z, r4.y
+mad.f32 r0.x, r0.y, r0.x, r1.z
+mul.f r0.y, r2.w, r2.z
+mul.f r0.z, r0.z, r2.z
+mul.f r2.z, r5.y, r4.y
max.f r0.x, (neg)r0.x, c10.w
-mov.f32f32 r0.z, r1.y
-mul.f r1.y, r4.z, c9.x
-mul.f r1.z, r4.y, c5.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r0.z, r1.y
-mov.f32f32 r1.y, r1.z
-mul.f r1.z, r4.x, c5.y
-mul.f r2.x, r3.w, c5.x
-(ss)mov.f32f32 r3.z, r0.w
-log2 r0.x, r0.x
-mov.f32f32 r0.w, r1.w
-sam (f32)(xyz)r3.w, r3.x, s#3, t#3
-(sy)mul.f r1.w, r3.w, r2.y
-mul.f r2.z, r4.y, r2.y
-mul.f r2.y, r4.x, r2.y
-mov.f32f32 r3.w, r0.w
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r0.z
-(ss)nop
-sam (f32)(xyzw)r3.x, r3.z, s#2, t#2
-(sy)mad.f32 r0.z, c8.x, r3.w, c11.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-(ss)mul.f r3.z, r3.z, c7.z
-mov.f32f32 r0.z, r0.z
+mul.f r2.w, r5.w, c5.x
+sam (f32)(xyzw)r3.z, r1.x, s#2, t#2
+(sy)(ss)mul.f r1.x, r4.x, c7.z
+mad.f32 r1.y, c8.x, r4.y, c11.x
+mul.f r1.z, r3.z, c7.x
nop
-mul.f r3.y, r3.y, c7.y
-mul.f r3.x, r3.x, c7.x
-(ss)mul.f r0.x, r0.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r3.x, r3.w, c7.y
+log2 r0.x, r0.x
+(ss)mul.f r0.x, r1.y, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mad.f32 r0.z, r3.z, r0.x, r2.w
-mad.f32 r1.x, r3.y, r0.x, r1.x
-(ss)mad.f32 r0.x, r3.x, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.x, r0.x
+(ss)mad.f32 r0.y, r1.x, r0.x, r0.y
+mad.f32 r0.z, r3.x, r0.x, r0.z
+(ss)mad.f32 r0.x, r1.z, r0.x, r2.x
nop
-mad.f32 r0.y, r0.y, r2.z, r1.y
-mad.f32 r0.z, r0.z, r2.y, r1.z
-mad.f32 r0.x, r0.x, r0.w, r2.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.z, r0.y, r2.y, r0.w
+mad.f32 r1.y, r0.z, r2.z, r4.w
+mad.f32 r1.x, r0.x, r3.y, r2.w
end
nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
-; FRAG: 211 instructions, 0 half, 6 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
+; FRAG: 140 instructions, 0 half, 7 full
diff --git a/reference/xon2.asm b/reference/xon2.asm
index 9a1b31d..26f774e 100644
--- a/reference/xon2.asm
+++ b/reference/xon2.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@in(r0.w) in7
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r3.w) in12
-@in(r4.x) in13
-@in(r4.y) in14
-@in(r4.z) in16
-@in(r4.w) in17
-@in(r5.x) in18
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r4.x) in12
+@in(r4.y) in13
+@in(r4.z) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@in(r1.z) in20
@in(r1.w) in21
@out(r0.x) out0
@@ -43,99 +43,53 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)add.f r1.x, c4.x, (neg)r2.w
+@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r1.x, c4.x, (neg)r6.x
mul.f r1.y, c5.y, r0.x
mul.f r0.x, c5.x, r0.x
+mul.f r2.w, c0.w, r6.x
+mul.f r2.x, r1.x, r5.x
+add.f r2.y, c4.y, (neg)r6.y
+mul.f r2.z, r1.x, r4.x
+mul.f r1.x, r1.x, r3.x
mad.f32 r1.y, c6.y, r0.y, r1.y
-mul.f r2.x, r1.x, r4.z
-add.f r5.y, c4.y, (neg)r3.x
-mad.f32 r1.y, c7.y, r0.z, r1.y
+mad.f32 r2.x, r2.y, r5.y, r2.x
+add.f r3.w, c4.z, (neg)r6.z
+mad.f32 r4.w, r2.y, r4.y, r2.z
+mad.f32 r1.x, r2.y, r3.y, r1.x
+nop
+mad.f32 r2.z, r3.w, r5.z, r2.x
+mad.f32 r2.y, r3.w, r4.z, r4.w
+mad.f32 r2.x, r3.w, r3.z, r1.x
+mad.f32 r1.x, c7.y, r0.z, r1.y
mad.f32 r0.x, c6.x, r0.y, r0.x
-mul.f r0.y, r1.x, r3.w
-mad.f32 r2.x, r5.y, r4.w, r2.x
-mad.f32 r1.y, c8.y, r0.w, r1.y
+mad.f32 r1.y, c8.y, r0.w, r1.x
mad.f32 r0.x, c7.x, r0.z, r0.x
-mad.f32 r0.y, r5.y, r4.x, r0.y
-mov.f32f32 r0.z, r2.x
-add.f r2.x, c4.z, (neg)r3.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, c8.x, r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, r2.x, r5.x, r0.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, r2.x, r4.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.z, r0.z
-mul.f r0.y, r1.x, r6.x
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, r5.y, r6.y, r0.y
-mul.f r0.y, c0.w, r2.w
-mul.f r0.z, c0.z, r2.w
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r3.x, r0.y
-mad.f32 r0.x, r2.x, r6.z, r0.x
-mad.f32 r0.y, c2.w, r3.y, r0.y
-mad.f32 r0.z, c1.z, r3.x, r0.z
-mad.f32 r0.w, c1.y, r3.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c3.w, r3.z, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c2.y, r3.y, r0.w
-mul.f r2.w, c0.x, r2.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c3.y, r3.z, r0.z
-mad.f32 r2.w, c1.x, r3.x, r2.w
-mov.f32f32 r3.x, c9.x
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c2.x, r3.y, r2.w
-mov.f32f32 r5.w, r3.x
-mad.f32 r0.x, c3.x, r3.z, r0.x
-mov.f32f32 r2.w, r5.x
-mov.f32f32 r3.x, r4.w
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r5.y, r3.x
-mov.f32f32 r5.x, r3.y
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r3.y, r4.x
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r4.x, r3.z
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r6.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r3.w, r2.w
-mov.f32f32 r3.z, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r6.x
+mad.f32 r0.y, c1.w, r6.y, r2.w
+mad.f32 r1.x, c8.x, r0.w, r0.x
+mad.f32 r0.x, c2.w, r6.z, r0.y
+mul.f r0.y, c0.z, r6.x
+mad.f32 r0.w, c3.w, r6.w, r0.x
+mad.f32 r0.x, c1.z, r6.y, r0.y
+mul.f r0.y, c0.y, r6.x
+mad.f32 r0.x, c2.z, r6.z, r0.x
+mad.f32 r0.y, c1.y, r6.y, r0.y
+mad.f32 r0.z, c3.z, r6.w, r0.x
+mad.f32 r0.x, c2.y, r6.z, r0.y
+mul.f r2.w, c0.x, r6.x
+mad.f32 r0.y, c3.y, r6.w, r0.x
+mad.f32 r0.x, c1.x, r6.y, r2.w
+mov.f32f32 r5.w, c9.x
+mad.f32 r0.x, c2.x, r6.z, r0.x
+mov.f32f32 r4.w, c9.x
+mad.f32 r0.x, c3.x, r6.w, r0.x
+mov.f32f32 r3.w, c9.x
mov.f32f32 r2.w, c9.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-nop
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
-; VERT: 89 instructions, 0 half, 7 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
+; VERT: 42 instructions, 0 half, 7 full
diff --git a/reference/xon3.asm b/reference/xon3.asm
index 0c5831e..fc88e42 100644
--- a/reference/xon3.asm
+++ b/reference/xon3.asm
@@ -6,219 +6,158 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c12.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000
+@const(c13.x) 0x3f800000, 0x3e800000, 0x00000000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 0, r0.x
-bary.f r1.x, 4, r0.x
-bary.f r1.y, 3, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.w
-mul.f r2.x, r1.x, r1.x
-bary.f r2.y, 5, r0.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r3.x, r1.w
-bary.f r3.z, 1, r0.x
-mad.f32 r1.w, r2.y, r2.y, r2.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r1.w, r1.w
-bary.f r3.w, 6, r0.x
-mov.f32f32 r4.x, r1.z
-mov.f32f32 r3.y, r2.x
-sam (f32)(xyz)r4.y, r2.z, s#5, t#5
-(sy)mad.f32 r1.z, c12.y, r4.y, c12.z
-mad.f32 r1.w, r3.w, r3.w, r1.w
-mov.f32f32 r2.x, r3.z
-(ss)mad.f32 r2.z, c12.y, r4.z, c12.z
-mov.f32f32 r1.z, r1.z
-bary.f r2.w, 12, r0.x
-sam (f32)(xyz)r5.x, r3.x, s#0, t#0
-(sy)(ss)add.f r3.x, r5.x, c12.x
-bary.f r3.y, 16, r0.x
+bary.f r0.w, 3, r0.x
+bary.f r1.x, 0, r0.x
+bary.f r1.z, 4, r0.x
+mov.f32f32 r1.w, r0.z
+mov.f32f32 r2.x, r0.w
+mov.f32f32 r2.y, r1.x
+mov.f32f32 r2.w, r1.x
+sam (f32)(xyz)r3.x, r0.z, s#5, t#5
+(sy)(ss)mad.f32 r0.z, c12.y, r3.x, c12.z
+mov.f32f32 r3.w, r1.x
+bary.f r1.y, 1, r0.x
+bary.f r0.w, 16, r0.x
+mov.f32f32 r2.z, r0.z
+bary.f r3.x, 12, r0.x
+mov.f32f32 r4.x, r1.y
bary.f r4.y, 8, r0.x
-mul.f r2.w, r1.z, r2.w
-mov.f32f32 r2.z, r2.z
-bary.f r4.z, 13, r0.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.y, r1.z, r3.y
-mul.f r1.z, r1.z, r4.y
-mad.f32 r2.w, r2.z, r4.z, r2.w
-mul.f r4.y, r3.x, r3.x
-add.f r4.z, r5.y, c12.x
-bary.f r5.x, 17, r0.x
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.w, c12.y, r4.w, c12.z
-mov.f32f32 r4.z, r4.z
-mad.f32 r3.y, r2.z, r5.x, r3.y
+mul.f r0.z, r0.z, r0.w
+mul.f r0.w, r2.z, r3.x
+mad.f32 r3.x, c12.y, r3.y, c12.z
+mul.f r2.z, r2.z, r4.y
+bary.f r3.y, 17, r0.x
+sam (f32)(xyz)r3.w, r3.w, s#0, t#0
+(sy)(ss)add.f r3.w, r3.w, c12.x
+mov.f32f32 r4.z, r3.x
+bary.f r4.w, 13, r0.x
bary.f r5.x, 9, r0.x
-mov.f32f32 r4.w, r4.w
-bary.f r5.y, 14, r0.x
-mad.f32 r4.y, r4.z, r4.z, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r1.z, r2.z, r5.x, r1.z
-mad.f32 r2.z, r4.w, r5.y, r2.w
-mov.f32f32 r2.w, r4.y
-add.f r4.y, r5.z, c12.x
-bary.f r5.x, 18, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.z, r1.z
-bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r4.y
-mad.f32 r3.y, r4.w, r5.x, r3.y
-rsq r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mad.f32 r0.x, r4.w, r0.x, r1.z
-mad.f32 r1.z, r0.y, r0.y, r2.w
-mov.f32f32 r2.w, r3.y
-mul.f r1.x, r1.x, r1.w
-mul.f r3.y, r0.x, r0.x
-mul.f r2.y, r2.y, r1.w
-mad.f32 r3.y, r2.z, r2.z, r3.y
-mov.f32f32 r2.w, r2.w
-rsq r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-(ss)mov.f32f32 r1.z, r1.z
+mad.f32 r0.z, r3.x, r3.y, r0.z
+mul.f r3.x, r1.z, r1.z
+mad.f32 r0.w, r4.z, r4.w, r0.w
+mad.f32 r3.y, c12.y, r3.z, c12.z
+mov.f32f32 r3.z, r3.w
+mad.f32 r2.z, r4.z, r5.x, r2.z
+bary.f r4.z, 18, r0.x
+mov.f32f32 r4.w, r3.y
+bary.f r5.x, 14, r0.x
+mul.f r3.w, r3.w, r3.z
+add.f r4.x, r4.x, c12.x
+bary.f r5.y, 10, r0.x
+mad.f32 r0.w, r4.w, r5.x, r0.w
+mad.f32 r0.z, r3.y, r4.z, r0.z
+bary.f r3.y, 5, r0.x
+mov.f32f32 r4.z, r4.x
+mov.f32f32 r5.x, r0.w
+mad.f32 r4.w, r4.w, r5.y, r2.z
+mov.f32f32 r2.z, r0.z
+mad.f32 r3.x, r3.y, r3.y, r3.x
+bary.f (ei)r0.x, 6, r0.x
+mul.f r0.y, r4.w, r4.w
+mad.f32 r3.w, r4.x, r4.z, r3.w
+mad.f32 r0.y, r0.w, r5.x, r0.y
+mov.f32f32 r0.w, r2.z
+add.f r4.x, r4.y, c12.x
+mad.f32 r4.y, r0.x, r0.x, r3.x
+mov.f32f32 r2.z, r1.y
+mad.f32 r0.y, r0.w, r0.w, r0.y
+mov.f32f32 r0.w, r4.x
+mov.f32f32 r3.x, r1.y
+sam (f32)(xyz)r5.y, r1.w, s#4, t#4
+sam (f32)(xyz)r6.x, r1.x, s#3, t#3
+(rpt3)nop
+rsq r0.y, r0.y
+(ss)mov.f32f32 r1.x, r0.y
+mul.f r0.y, r0.z, r0.y
+mad.f32 r0.z, r0.w, r0.w, r3.w
+rsq r0.w, r4.y
+(ss)mov.f32f32 r1.y, r0.w
+mul.f r1.w, r4.w, r1.x
+mov.f32f32 r2.x, r0.y
+mul.f r1.x, r5.x, r1.x
+mul.f r1.z, r1.z, r1.y
+mul.f r1.y, r3.y, r1.y
+rsq r0.z, r0.z
+(ss)mov.f32f32 r3.y, r0.z
+mov.f32f32 r3.w, r1.w
+(ss)max.f r4.y, c13.y, r2.x
+mov.f32f32 r4.w, r1.x
+mul.f r3.z, r3.z, r3.y
+mul.f r3.y, r4.z, r3.y
+mul.f r0.z, r4.x, r0.z
+nop
+mul.f r1.w, r3.z, r1.w
+mov.f32f32 r3.z, r3.z
+mad.f32 r1.x, r3.y, r1.x, r1.w
+rcp r1.w, r4.y
+(ss)mov.f32f32 r4.x, r1.w
+mad.f32 r1.x, r0.z, r2.x, r1.x
+(ss)mul.f r4.y, r3.z, r3.w
mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, r2.w, r2.w, r3.y
-mul.f r3.x, r3.x, r1.z
-mul.f r4.y, r4.z, r1.z
-mul.f r0.y, r0.y, r1.z
-(rpt2)nop
-rsq r1.z, r3.y
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.x, r3.x
-(ss)mov.f32f32 r3.y, r4.y
-mov.f32f32 r0.y, r0.y
-mul.f r0.x, r0.x, r1.z
-mul.f r2.w, r2.w, r1.z
-mul.f r1.z, r2.z, r1.z
-mul.f r1.w, r3.w, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.w, r3.x, r0.x
-mul.f r3.w, r3.x, r0.x
-mad.f32 r2.w, r3.y, r1.z, r2.w
-mad.f32 r3.w, r3.y, r1.z, r3.w
-max.f r4.z, c13.y, r2.z
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r2.w, r3.w
-mad.f32 r2.x, r0.y, r2.z, r2.x
-mad.f32 r2.w, r0.y, r2.z, r2.w
-mov.f32f32 r3.w, r4.z
-mov.f32f32 r4.z, r0.w
-mul.f r3.x, r2.x, r3.x
-mul.f r3.y, r2.x, r3.y
-mul.f r0.y, r2.x, r0.y
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r0.y, r0.y
-max.f r2.x, r2.x, c12.w
-mul.f r2.w, c12.y, r2.w
-mul.f r3.x, c12.y, r3.x
-mul.f r0.y, c12.y, r0.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyzw)r4.w, r4.x, s#1, t#1
-(sy)mul.f r3.y, r5.y, c7.z
-add.f r0.x, r0.x, (neg)r2.w
-add.f r1.z, r1.z, (neg)r3.x
-add.f r0.y, r2.z, (neg)r0.y
-mul.f r2.z, r3.y, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-mul.f r0.x, r0.x, r1.x
-mul.f r1.x, r5.x, c7.y
-mad.f32 r0.x, r1.z, r2.y, r0.x
-mul.f r1.z, r4.w, c7.x
-rcp r2.y, r3.w
-(ss)mov.f32f32 r2.y, r2.y
mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r1.x, r2.x
-mad.f32 r0.x, r0.y, r1.w, r0.x
-mul.f r0.y, r1.z, r2.x
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r0.z
+mul.f r3.z, r1.x, r3.z
+(sy)mul.f r4.z, r5.w, r4.x
+mul.f r4.x, r5.z, r4.x
+mad.f32 r4.y, r3.y, r4.w, r4.y
+mul.f r3.z, c12.y, r3.z
+mad.f32 r2.x, r0.z, r2.x, r4.y
+mul.f r3.y, r1.x, r3.y
+mul.f r0.z, r1.x, r0.z
+add.f r1.x, r3.w, (neg)r3.z
+max.f r2.x, r2.x, c12.w
+sam (f32)(xyzw)r6.w, r2.y, s#1, t#1
+(ss)mul.f r2.y, c12.y, r3.y
+(sy)mul.f r2.z, r6.w, c7.x
+mul.f r1.x, r1.x, r1.z
+mov.f32f32 r1.z, r2.x
+add.f r2.y, r4.w, (neg)r2.y
+mul.f r2.x, r2.z, r2.x
+mul.f r2.z, r7.y, c7.z
+mul.f r3.y, r7.x, c7.y
+mad.f32 r1.x, r2.y, r1.y, r1.x
+mul.f r0.z, c12.y, r0.z
+mul.f r1.y, r2.z, r1.z
+mul.f r1.z, r3.y, r1.z
+mul.f r2.y, r5.y, r1.w
+add.f r0.y, r0.y, (neg)r0.z
+mul.f r0.x, r0.x, r0.w
+mul.f r0.z, r7.y, c6.z
+mul.f r1.w, r7.z, c11.x
+sam (f32)(xyzw)r2.z, r2.w, s#2, t#2
+(sy)mul.f r0.w, r3.x, c8.z
+mad.f32 r0.x, r0.y, r0.x, r1.x
+mul.f r0.y, r7.x, c6.y
+mul.f r1.x, r6.w, c6.x
+(ss)mul.f r2.w, r2.w, c8.y
max.f r0.x, (neg)r0.x, c12.w
-mov.f32f32 r0.z, r1.y
-mul.f r1.y, r5.z, c11.x
-mul.f r2.x, r5.y, c6.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r0.z, r1.y
-mov.f32f32 r1.y, r2.x
-mul.f r2.x, r5.x, c6.y
-mul.f r2.w, r4.w, c6.x
-mov.f32f32 r3.x, r4.z
+(rpt3)nop
+mad.f32 r3.x, c9.x, r3.y, c13.x
+mul.f r2.z, r2.z, c8.x
log2 r0.x, r0.x
-mov.f32f32 r3.y, r3.z
-(ss)nop
-sam (f32)(xyz)r3.w, r1.z, s#4, t#4
-(sy)(ss)mul.f r1.z, r3.w, r2.y
-mul.f r1.w, r4.y, r2.y
-mul.f r2.y, r4.x, r2.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r3.w, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r0.z
-sam (f32)(xyzw)r4.x, r3.x, s#2, t#2
-(sy)mad.f32 r0.z, c9.x, r4.w, c13.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-(ss)mul.f r3.x, r4.z, c8.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, r4.y, c8.y
-mul.f r4.x, r4.x, c8.x
-(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r3.z
-nop
-mov.f32f32 r0.x, r0.x
+(rpt1)nop
+(ss)mul.f r0.x, r3.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mad.f32 r2.z, r3.x, r0.x, r2.z
-mad.f32 r1.x, r3.y, r0.x, r1.x
-(ss)mad.f32 r0.x, r4.x, r0.x, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.x, r0.x
+(ss)mad.f32 r0.w, r0.w, r0.x, r1.y
+mad.f32 r1.y, r2.w, r0.x, r1.z
+(ss)mad.f32 r0.x, r2.z, r0.x, r2.x
nop
-mad.f32 r0.y, r0.y, r3.w, r1.y
-mad.f32 r1.x, r1.x, r2.y, r2.x
-sam (f32)(xyz)r3.x, r0.z, s#3, t#3
-(sy)mad.f32 r0.y, c10.z, r3.z, r0.y
-mad.f32 r0.x, r0.x, r1.z, r2.w
-(ss)mad.f32 r0.z, c10.y, r3.y, r1.x
-mad.f32 r0.x, c10.x, r3.x, r0.x
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+mad.f32 r0.z, r0.w, r4.z, r0.z
+mad.f32 r0.y, r1.y, r4.x, r0.y
+mad.f32 r1.z, c10.z, r6.z, r0.z
+mad.f32 r1.y, c10.y, r6.y, r0.y
+mad.f32 r0.x, r0.x, r2.y, r1.x
+nop
+mad.f32 r1.x, c10.x, r6.x, r0.x
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
-; FRAG: 222 instructions, 0 half, 6 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
+; FRAG: 157 instructions, 0 half, 8 full
diff --git a/reference/xon4.asm b/reference/xon4.asm
index 9a1b31d..26f774e 100644
--- a/reference/xon4.asm
+++ b/reference/xon4.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@in(r0.w) in7
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r3.w) in12
-@in(r4.x) in13
-@in(r4.y) in14
-@in(r4.z) in16
-@in(r4.w) in17
-@in(r5.x) in18
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r4.x) in12
+@in(r4.y) in13
+@in(r4.z) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@in(r1.z) in20
@in(r1.w) in21
@out(r0.x) out0
@@ -43,99 +43,53 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)add.f r1.x, c4.x, (neg)r2.w
+@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r1.x, c4.x, (neg)r6.x
mul.f r1.y, c5.y, r0.x
mul.f r0.x, c5.x, r0.x
+mul.f r2.w, c0.w, r6.x
+mul.f r2.x, r1.x, r5.x
+add.f r2.y, c4.y, (neg)r6.y
+mul.f r2.z, r1.x, r4.x
+mul.f r1.x, r1.x, r3.x
mad.f32 r1.y, c6.y, r0.y, r1.y
-mul.f r2.x, r1.x, r4.z
-add.f r5.y, c4.y, (neg)r3.x
-mad.f32 r1.y, c7.y, r0.z, r1.y
+mad.f32 r2.x, r2.y, r5.y, r2.x
+add.f r3.w, c4.z, (neg)r6.z
+mad.f32 r4.w, r2.y, r4.y, r2.z
+mad.f32 r1.x, r2.y, r3.y, r1.x
+nop
+mad.f32 r2.z, r3.w, r5.z, r2.x
+mad.f32 r2.y, r3.w, r4.z, r4.w
+mad.f32 r2.x, r3.w, r3.z, r1.x
+mad.f32 r1.x, c7.y, r0.z, r1.y
mad.f32 r0.x, c6.x, r0.y, r0.x
-mul.f r0.y, r1.x, r3.w
-mad.f32 r2.x, r5.y, r4.w, r2.x
-mad.f32 r1.y, c8.y, r0.w, r1.y
+mad.f32 r1.y, c8.y, r0.w, r1.x
mad.f32 r0.x, c7.x, r0.z, r0.x
-mad.f32 r0.y, r5.y, r4.x, r0.y
-mov.f32f32 r0.z, r2.x
-add.f r2.x, c4.z, (neg)r3.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, c8.x, r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, r2.x, r5.x, r0.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, r2.x, r4.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.z, r0.z
-mul.f r0.y, r1.x, r6.x
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, r5.y, r6.y, r0.y
-mul.f r0.y, c0.w, r2.w
-mul.f r0.z, c0.z, r2.w
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r3.x, r0.y
-mad.f32 r0.x, r2.x, r6.z, r0.x
-mad.f32 r0.y, c2.w, r3.y, r0.y
-mad.f32 r0.z, c1.z, r3.x, r0.z
-mad.f32 r0.w, c1.y, r3.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c3.w, r3.z, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c2.y, r3.y, r0.w
-mul.f r2.w, c0.x, r2.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c3.y, r3.z, r0.z
-mad.f32 r2.w, c1.x, r3.x, r2.w
-mov.f32f32 r3.x, c9.x
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c2.x, r3.y, r2.w
-mov.f32f32 r5.w, r3.x
-mad.f32 r0.x, c3.x, r3.z, r0.x
-mov.f32f32 r2.w, r5.x
-mov.f32f32 r3.x, r4.w
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r5.y, r3.x
-mov.f32f32 r5.x, r3.y
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r3.y, r4.x
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r4.x, r3.z
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r6.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r3.w, r2.w
-mov.f32f32 r3.z, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r6.x
+mad.f32 r0.y, c1.w, r6.y, r2.w
+mad.f32 r1.x, c8.x, r0.w, r0.x
+mad.f32 r0.x, c2.w, r6.z, r0.y
+mul.f r0.y, c0.z, r6.x
+mad.f32 r0.w, c3.w, r6.w, r0.x
+mad.f32 r0.x, c1.z, r6.y, r0.y
+mul.f r0.y, c0.y, r6.x
+mad.f32 r0.x, c2.z, r6.z, r0.x
+mad.f32 r0.y, c1.y, r6.y, r0.y
+mad.f32 r0.z, c3.z, r6.w, r0.x
+mad.f32 r0.x, c2.y, r6.z, r0.y
+mul.f r2.w, c0.x, r6.x
+mad.f32 r0.y, c3.y, r6.w, r0.x
+mad.f32 r0.x, c1.x, r6.y, r2.w
+mov.f32f32 r5.w, c9.x
+mad.f32 r0.x, c2.x, r6.z, r0.x
+mov.f32f32 r4.w, c9.x
+mad.f32 r0.x, c3.x, r6.w, r0.x
+mov.f32f32 r3.w, c9.x
mov.f32f32 r2.w, c9.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-nop
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
-; VERT: 89 instructions, 0 half, 7 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
+; VERT: 42 instructions, 0 half, 7 full
diff --git a/reference/xon5.asm b/reference/xon5.asm
index be51a42..20eca61 100644
--- a/reference/xon5.asm
+++ b/reference/xon5.asm
@@ -6,143 +6,106 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c7.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000
+@const(c8.x) 0x3e800000, 0x00000000, 0x00000000, 0x00000000
+@const(c9.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 0, r0.x
-bary.f r1.x, 3, r0.x
+bary.f r0.w, 3, r0.x
+bary.f r1.x, 0, r0.x
bary.f r1.y, 1, r0.x
mov.f32f32 r1.z, r0.z
mov.f32f32 r1.w, r0.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r3.x, r0.w
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r2.w, r1.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r3.y, r0.w
-bary.f r0.w, 4, r0.x
-sam (f32)(xyz)r1.z, r2.x, s#3, t#3
-(sy)mad.f32 r1.x, c7.y, r1.z, c7.z
-mad.f32 r1.z, c7.y, r1.w, c7.z
-sam (f32)(xyz)r3.z, r2.z, s#0, t#0
-(sy)add.f r1.w, r3.z, c7.x
-(ss)mad.f32 r2.x, c7.y, r2.x, c7.z
-mov.f32f32 r1.x, r1.x
-bary.f r2.y, 8, r0.x
-mov.f32f32 r1.w, r1.w
-bary.f r2.z, 12, r0.x
-mul.f r0.w, r1.x, r0.w
-mul.f r2.y, r1.x, r2.y
-mov.f32f32 r1.z, r1.z
-bary.f r2.w, 9, r0.x
-mul.f r3.z, r1.w, r1.w
+mov.f32f32 r2.x, r1.x
+mov.f32f32 r2.y, r1.y
+sam (f32)(xyz)r2.z, r0.z, s#3, t#3
+(sy)(ss)mad.f32 r0.z, c7.y, r2.z, c7.z
+bary.f r0.w, 12, r0.x
+mad.f32 r2.z, c7.y, r2.w, c7.z
+mad.f32 r2.w, c7.y, r3.x, c7.z
+mov.f32f32 r3.x, r0.z
+bary.f r3.y, 8, r0.x
+sam (f32)(xyz)r3.z, r2.x, s#0, t#0
+(sy)(ss)add.f r2.x, r3.z, c7.x
+bary.f r2.y, 4, r0.x
+mul.f r0.z, r0.z, r0.w
+mul.f r0.w, r3.x, r3.y
+mov.f32f32 r3.y, r2.z
+bary.f r3.z, 9, r0.x
+mov.f32f32 r4.y, r2.x
+mul.f r2.y, r3.x, r2.y
+bary.f r3.x, 13, r0.x
+mad.f32 r0.w, r3.y, r3.z, r0.w
+mov.f32f32 r3.z, r2.w
+bary.f r4.z, 10, r0.x
+mul.f r2.x, r2.x, r4.y
add.f r3.w, r3.w, c7.x
-mul.f r1.x, r1.x, r2.z
-mad.f32 r2.y, r1.z, r2.w, r2.y
-bary.f r2.z, 13, r0.x
-mov.f32f32 r2.w, r3.w
-bary.f r3.w, 5, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.x, r2.x
-bary.f r4.y, 10, r0.x
-mad.f32 r3.z, r2.w, r2.w, r3.z
-mad.f32 r1.x, r1.z, r2.z, r1.x
-mad.f32 r0.w, r1.z, r3.w, r0.w
-mad.f32 r1.z, r2.x, r4.y, r2.y
-mov.f32f32 r2.y, r3.z
-add.f r2.z, r4.x, c7.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r0.w, r0.w
-bary.f r1.z, 6, r0.x
-mov.f32f32 r2.z, r2.z
+bary.f r4.w, 5, r0.x
+mad.f32 r0.w, r3.z, r4.z, r0.w
+mad.f32 r0.z, r2.z, r3.x, r0.z
+mov.f32f32 r2.z, r3.w
+mad.f32 r2.y, r3.y, r4.w, r2.y
+mov.f32f32 r3.x, r0.w
+bary.f r3.y, 6, r0.x
+mad.f32 r2.x, r3.w, r2.z, r2.x
+add.f r3.w, r4.x, c7.x
bary.f (ei)r0.x, 14, r0.x
-sam (f32)(xyzw)r3.w, r3.x, s#1, t#1
-(sy)mul.f r0.y, r4.z, c6.x
-mad.f32 r0.w, r2.x, r1.z, r0.w
-mad.f32 r2.y, r2.z, r2.z, r2.y
-mad.f32 r0.x, r2.x, r0.x, r1.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.x, r0.w, r0.w
-mov.f32f32 r1.z, r0.z
-mad.f32 r0.z, r3.z, r3.z, r1.x
-mov.f32f32 r0.x, r0.x
-rsq r1.x, r2.y
+mad.f32 r0.y, r3.z, r3.y, r2.y
+sam (f32)(xyz)r4.z, r1.z, s#2, t#2
+sam (f32)(xyzw)r5.y, r1.x, s#1, t#1
+(sy)(ss)mul.f r1.w, r6.x, c6.x
+mov.f32f32 r1.x, r3.w
+mad.f32 r0.x, r2.w, r0.x, r0.z
+mul.f r0.z, r0.y, r0.y
nop
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, r0.w, r3.x, r0.z
+mov.f32f32 r0.w, r0.x
+mad.f32 r1.x, r1.x, r1.x, r2.x
(rpt1)nop
-mad.f32 r0.z, r0.x, r0.x, r0.z
-mul.f r1.w, r1.w, r1.x
-mul.f r2.x, r2.w, r1.x
-mul.f r1.x, r2.z, r1.x
+mov.f32f32 r0.w, r0.w
(rpt2)nop
+mad.f32 r0.z, r0.w, r0.w, r0.z
+rsq r0.w, r1.x
+(ss)mov.f32f32 r1.x, r0.w
+(rpt3)nop
+mul.f r1.y, r4.y, r1.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.x, r1.x
-mul.f r0.w, r0.w, r0.z
+(ss)mov.f32f32 r1.z, r0.z
mul.f r0.x, r0.x, r0.z
-mul.f r0.z, r3.z, r0.z
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-sam (f32)(xyz)r2.z, r1.y, s#2, t#2
+(ss)mul.f r0.z, r2.z, r1.x
+mul.f r0.w, r3.w, r0.w
+mul.f r0.y, r0.y, r1.z
+mov.f32f32 r1.x, r0.x
+mul.f r1.z, r3.x, r1.z
nop
-mul.f r0.y, r2.y, r0.y
-max.f r0.w, c8.x, r0.x
-mad.f32 r0.y, r2.x, r0.z, r0.y
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mad.f32 r0.x, r1.x, r0.x, r0.y
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r1.y, r0.y
+max.f r1.x, c8.x, r1.x
+mad.f32 r0.y, r0.z, r1.z, r0.y
+nop
+mad.f32 r0.x, r0.w, r0.x, r0.y
(rpt2)nop
max.f r0.x, r0.x, c7.w
-rcp r0.y, r0.z
-(ss)mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-(sy)(ss)mul.f r0.z, r3.x, r0.y
-mul.f r0.w, r2.w, r0.y
-mul.f r0.y, r2.z, r0.y
-mul.f r1.x, c5.z, r0.x
-mul.f r1.y, c5.y, r0.x
+rcp r0.y, r1.x
+(ss)mov.f32f32 r0.z, r0.y
+mul.f r0.y, r4.z, r0.y
+nop
+mov.f32f32 r0.w, r0.x
mul.f r0.x, c5.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, r1.x, r0.z, c4.z
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.w, r1.y, r0.w, c4.y
+(ss)mul.f r1.x, r5.x, r0.z
+mul.f r0.z, r4.w, r0.z
+mul.f r1.y, c5.z, r0.w
+mul.f r0.w, c5.y, r0.w
mad.f32 r0.x, r0.x, r0.y, c4.x
nop
-mul.f r0.y, r4.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mul.f r0.y, r4.x, r0.z
-mul.f r0.x, r3.w, r0.x
+mad.f32 r0.y, r1.y, r1.x, c4.z
+mad.f32 r0.z, r0.w, r0.z, c4.y
(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+mul.f r1.z, r5.w, r0.y
+mul.f r1.y, r5.z, r0.z
+mul.f r1.x, r5.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1)
-; FRAG: 148 instructions, 0 half, 5 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1)
+; FRAG: 103 instructions, 0 half, 7 full
diff --git a/reference/xon6.asm b/reference/xon6.asm
index 7603f89..d468c8e 100644
--- a/reference/xon6.asm
+++ b/reference/xon6.asm
@@ -1,24 +1,24 @@
; options:
; VERT: new compiler
-@in(r1.z) in0
-@in(r1.w) in1
-@in(r2.x) in2
-@in(r2.y) in3
+@in(r4.w) in0
+@in(r5.x) in1
+@in(r5.y) in2
+@in(r5.z) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@in(r0.w) in7
-@in(r5.x) in8
-@in(r5.y) in9
-@in(r5.z) in10
-@in(r2.z) in12
-@in(r2.w) in13
-@in(r3.x) in14
-@in(r3.y) in16
-@in(r3.z) in17
-@in(r3.w) in18
-@in(r5.w) in20
-@in(r6.x) in21
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r3.x) in12
+@in(r3.y) in13
+@in(r3.z) in14
+@in(r4.x) in16
+@in(r4.y) in17
+@in(r4.z) in18
+@in(r1.z) in20
+@in(r1.w) in21
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -39,75 +39,36 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r1.x, c4.y, r0.x
mul.f r0.x, c4.x, r0.x
mad.f32 r1.x, c5.y, r0.y, r1.x
mad.f32 r0.x, c5.x, r0.y, r0.x
mad.f32 r0.y, c6.y, r0.z, r1.x
mad.f32 r0.x, c6.x, r0.z, r0.x
-mad.f32 r0.y, c7.y, r0.w, r0.y
-mad.f32 r0.x, c7.x, r0.w, r0.x
-mul.f r0.z, c0.w, r1.z
-mul.f r0.w, c0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c1.w, r1.w, r0.z
-mad.f32 r0.w, c1.z, r1.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c2.w, r2.x, r0.z
-mad.f32 r0.w, c2.z, r2.x, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, c3.w, r2.y, r0.z
-mad.f32 r0.y, c3.z, r2.y, r0.w
-mul.f r4.x, c0.y, r1.z
-mul.f r1.z, c0.x, r1.z
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.x, c1.y, r1.w, r4.x
-mad.f32 r0.y, c1.x, r1.w, r1.z
-mad.f32 r0.x, c2.y, r2.x, r0.x
-mad.f32 r0.y, c2.x, r2.x, r0.y
-mad.f32 r0.x, c3.y, r2.y, r0.x
-mad.f32 r1.z, c3.x, r2.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
-mov.f32f32 r1.z, c8.x
-mov.f32f32 r1.w, r3.w
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r4.w, r1.z
-mov.f32f32 r4.z, r1.w
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r1.z, c8.x
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r1.z, c8.x
-mov.f32f32 r1.w, r5.z
-mov.f32f32 r2.x, r5.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r2.y, r2.x
-mov.f32f32 r2.x, r5.x
-mov.f32f32 r1.z, r6.x
-mov.f32f32 r5.x, r5.w
-(rpt1)nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r1.z, r5.x
+mad.f32 r1.y, c7.y, r0.w, r0.y
+mad.f32 r1.x, c7.x, r0.w, r0.x
+mul.f r0.x, c0.w, r4.w
+mul.f r0.y, c0.z, r4.w
+mad.f32 r0.x, c1.w, r5.x, r0.x
+mad.f32 r0.y, c1.z, r5.x, r0.y
+mad.f32 r0.x, c2.w, r5.y, r0.x
+mad.f32 r0.y, c2.z, r5.y, r0.y
+mad.f32 r0.w, c3.w, r5.z, r0.x
+mad.f32 r0.z, c3.z, r5.z, r0.y
+mul.f r0.x, c0.y, r4.w
+mul.f r0.y, c0.x, r4.w
+mad.f32 r0.x, c1.y, r5.x, r0.x
+mad.f32 r0.y, c1.x, r5.x, r0.y
+mad.f32 r0.x, c2.y, r5.y, r0.x
+mad.f32 r2.w, c2.x, r5.y, r0.y
+mad.f32 r0.y, c3.y, r5.z, r0.x
+mad.f32 r0.x, c3.x, r5.z, r2.w
+mov.f32f32 r4.w, c8.x
+mov.f32f32 r3.w, c8.x
+mov.f32f32 r2.w, c8.x
end
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23)
-; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r5.x (0:0,cm=7,il=16,b=0) r2.z (0:0,cm=7,il=20,b=0) r3.y (0:0,cm=7,il=24,b=0) r5.w (0:0,cm=3,il=28,b=0)
-; VERT: 71 instructions, 0 half, 7 full
+; VERT: inputs: r4.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=7,il=20,b=0) r4.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
+; VERT: 28 instructions, 0 half, 6 full
diff --git a/reference/xon7.asm b/reference/xon7.asm
index 3846190..faa1d33 100644
--- a/reference/xon7.asm
+++ b/reference/xon7.asm
@@ -6,215 +6,150 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c10.x) 0x3f000000, 0xbf000000, 0x40000000, 0xbf800000
+@const(c11.x) 0x00000000, 0x3f800000, 0x3e800000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 0, r0.x
-bary.f r1.x, 4, r0.x
-bary.f r1.y, 3, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r2.y, r0.w
-mul.f r2.z, r1.x, r1.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r3.y, r1.w
-bary.f r3.w, 1, r0.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r3.x, r1.z
-bary.f r1.z, 5, r0.x
-mov.f32f32 r1.w, r3.w
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mad.f32 r2.z, r1.z, r1.z, r2.z
-sam (f32)(xyz)r4.z, r2.w, s#4, t#4
-(sy)(ss)mad.f32 r2.w, c10.z, r4.z, c10.w
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r4.y, r2.y
-mov.f32f32 r1.w, r2.z
-mov.f32f32 r2.y, r2.w
-bary.f r2.z, 12, r0.x
-bary.f r2.w, 16, r0.x
-bary.f r3.x, 8, r0.x
-sam (f32)(xyz)r5.y, r3.y, s#0, t#0
-(sy)(ss)add.f r3.y, r5.y, c10.y
-mul.f r2.z, r2.y, r2.z
-mad.f32 r3.z, c10.z, r4.w, c10.w
-mul.f r2.w, r2.y, r2.w
-mov.f32f32 r3.y, r3.y
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.x, r3.z
-bary.f r3.z, 13, r0.x
-mul.f r4.z, r3.y, r3.y
-add.f r4.w, r5.z, c10.y
-bary.f r5.y, 17, r0.x
-mad.f32 r2.z, r3.x, r3.z, r2.z
-bary.f r3.z, 9, r0.x
-mov.f32f32 r4.w, r4.w
-mad.f32 r2.w, r3.x, r5.y, r2.w
-mov.f32f32 r2.z, r2.z
-mad.f32 r5.x, c10.z, r5.x, c10.w
-mad.f32 r4.z, r4.w, r4.w, r4.z
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.y, r3.x, r3.z, r2.y
-mov.f32f32 r3.x, r5.x
-bary.f r3.z, 14, r0.x
-mov.f32f32 r4.z, r4.z
-add.f r5.x, r5.w, c10.y
-bary.f r5.y, 18, r0.x
-mad.f32 r2.z, r3.x, r3.z, r2.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.z, r5.x
-bary.f r5.x, 10, r0.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r2.w, r3.x, r5.y, r2.w
-mad.f32 r4.z, r3.z, r3.z, r4.z
-mad.f32 r2.y, r3.x, r5.x, r2.y
+bary.f r0.w, 3, r0.x
+bary.f r1.x, 0, r0.x
+bary.f r1.z, 4, r0.x
+mov.f32f32 r2.y, r0.z
+mov.f32f32 r2.z, r0.w
+mov.f32f32 r2.w, r1.x
+bary.f r1.y, 1, r0.x
+sam (f32)(xyz)r3.y, r0.z, s#4, t#4
+(sy)(ss)mad.f32 r0.z, c10.z, r3.y, c10.w
+mov.f32f32 r4.x, r1.x
+bary.f r0.w, 16, r0.x
+mul.f r1.w, r1.z, r1.z
+mov.f32f32 r3.y, r0.z
+bary.f r4.z, 12, r0.x
+mov.f32f32 r4.y, r1.y
+mov.f32f32 r3.x, r1.y
+bary.f r4.w, 8, r0.x
+mul.f r4.z, r3.y, r4.z
+mad.f32 r3.z, c10.z, r3.z, c10.w
+mul.f r0.z, r0.z, r0.w
+mul.f r0.w, r3.y, r4.w
+bary.f r3.y, 17, r0.x
+mov.f32f32 r4.w, r3.z
+bary.f r5.x, 13, r0.x
+sam (f32)(xyz)r5.y, r4.x, s#0, t#0
+(sy)(ss)add.f r4.x, r5.y, c10.y
+sam (f32)(xyzw)r6.x, r2.w, s#1, t#1
+(ss)bary.f r2.w, 9, r0.x
+(sy)cmps.f.lt r3.x, r6.w, c10.x
+mad.f32 r4.y, r4.w, r5.x, r4.z
+mad.f32 r3.w, c10.z, r3.w, c10.w
+mov.f32f32 r4.z, r4.x
+cov.u32f32 r3.x, r3.x
+mad.f32 r0.w, r4.w, r2.w, r0.w
+mov.f32f32 r2.w, r3.w
+bary.f r4.w, 14, r0.x
+mul.f r4.x, r4.x, r4.z
+add.f r5.x, r5.z, c10.y
+mov.f32f32 r5.y, (0.000000)
+mad.f32 r4.y, r2.w, r4.w, r4.y
+bary.f r4.w, 10, r0.x
+mad.f32 r0.z, r3.z, r3.y, r0.z
+bary.f r3.y, 5, r0.x
+mov.f32f32 r3.z, r4.y
+mad.f32 r0.w, r2.w, r4.w, r0.w
+mov.f32f32 r2.w, r5.x
+cmps.f.ne p0.x, r3.x, r5.y
+bary.f r3.x, 18, r0.x
+mul.f r4.w, r0.w, r0.w
+mad.f32 r4.x, r5.x, r2.w, r4.x
+mad.f32 r4.y, r4.y, r3.z, r4.w
+mad.f32 r0.z, r3.w, r3.x, r0.z
+add.f r3.x, r5.w, c10.y
bary.f (ei)r0.x, 6, r0.x
-sam (f32)(xyzw)r5.x, r4.x, s#1, t#1
-mov.f32f32 r0.y, r2.w
-(sy)cmps.f.lt r2.w, r5.w, c10.x
-mul.f r3.x, r2.y, r2.y
-mad.f32 r1.w, r0.x, r0.x, r1.w
-mad.f32 r3.x, r2.z, r2.z, r3.x
-(ss)rsq r4.x, r4.z
-(ss)mov.f32f32 r4.x, r4.x
+kill p0.x
+mov.f32f32 r0.y, r0.z
+mov.f32f32 r3.w, r3.x
+mad.f32 r4.w, r3.y, r3.y, r1.w
+mul.f r1.w, r6.w, c9.x
mov.f32f32 r0.y, r0.y
-cov.u32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mul.f r3.y, r3.y, r4.x
-mad.f32 r3.x, r0.y, r0.y, r3.x
-mul.f r4.y, r4.w, r4.x
-mul.f r3.z, r3.z, r4.x
-(rpt3)nop
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, r4.y
+mad.f32 r3.w, r3.w, r3.w, r4.x
+mad.f32 r4.x, r0.x, r0.x, r4.w
+mul.f r4.w, r6.z, c5.z
+mad.f32 r0.y, r0.y, r0.y, r4.y
+mul.f r4.y, r6.z, c6.z
+mul.f r5.x, r6.y, c6.y
+mul.f r5.y, r6.x, c6.x
+rsq r3.w, r3.w
+(ss)mov.f32f32 r5.z, r3.w
+mul.f r3.x, r3.x, r3.w
+(ss)rsq r3.w, r4.x
+(ss)mov.f32f32 r4.x, r3.w
+rsq r0.y, r0.y
+(ss)mov.f32f32 r5.w, r0.y
+(ss)mul.f r0.y, r0.z, r0.y
+mul.f r0.z, r4.z, r5.z
+mul.f r2.w, r2.w, r5.z
+mul.f r0.w, r0.w, r5.w
+mov.f32f32 r4.z, r0.y
+mul.f r3.z, r3.z, r5.w
+mov.f32f32 r5.z, r0.z
+mul.f r0.z, r0.z, r0.w
+mov.f32f32 r0.w, r0.w
+mad.f32 r0.z, r2.w, r3.z, r0.z
+max.f r5.w, c11.z, r4.z
+mad.f32 r0.z, r3.x, r4.z, r0.z
+mul.f r6.z, r5.z, r0.w
mov.f32f32 r3.z, r3.z
-mul.f r2.y, r2.y, r3.x
-mul.f r0.y, r0.y, r3.x
-mul.f r2.z, r2.z, r3.x
-mov.f32f32 r3.x, (0.000000)
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-cmps.f.ne p0.x, r2.w, r3.x
-mul.f r2.w, r3.y, r2.y
-mul.f r3.x, r3.y, r2.y
-mad.f32 r2.w, r4.x, r2.z, r2.w
-mad.f32 r3.x, r4.x, r2.z, r3.x
-max.f r4.y, c11.z, r0.y
-rsq r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
mov.f32f32 r2.w, r2.w
+mul.f r5.z, r0.z, r5.z
mov.f32f32 r3.x, r3.x
-mad.f32 r2.w, r3.z, r0.y, r2.w
-mad.f32 r3.x, r3.z, r0.y, r3.x
-mov.f32f32 r4.y, r4.y
-kill p0.x
-mul.f r3.y, r2.w, r3.y
-mul.f r4.x, r2.w, r4.x
-mul.f r2.w, r2.w, r3.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r4.x
-mov.f32f32 r2.w, r2.w
-max.f r3.x, r3.x, c11.x
-mul.f r3.y, c10.z, r3.y
-mul.f r3.z, c10.z, r3.z
+rcp r5.w, r5.w
+(ss)mov.f32f32 r6.w, r5.w
+mad.f32 r6.z, r2.w, r3.z, r6.z
+mul.f r5.z, c10.z, r5.z
+mad.f32 r4.z, r3.x, r4.z, r6.z
+mul.f r2.w, r0.z, r2.w
+mul.f r0.z, r0.z, r3.x
+add.f r0.w, r0.w, (neg)r5.z
+mul.f r1.z, r1.z, r4.x
+max.f r3.x, r4.z, c11.x
mul.f r2.w, c10.z, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
-mul.f r4.x, r5.z, c6.z
-add.f r2.y, r2.y, (neg)r3.y
-add.f r2.z, r2.z, (neg)r3.z
-add.f r0.y, r0.y, (neg)r2.w
-mul.f r2.w, r4.x, r3.x
-mov.f32f32 r2.y, r2.y
-mul.f r1.x, r1.x, r1.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.x, r1.x
-mul.f r3.y, r5.y, c6.y
-mul.f r3.z, r5.x, c6.x
-rcp r4.x, r4.y
-(ss)mov.f32f32 r4.x, r4.x
-mul.f r1.x, r2.y, r1.x
-mul.f r1.z, r1.z, r1.w
-mul.f r2.y, r3.y, r3.x
-mul.f r3.x, r3.z, r3.x
-nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r0.z
-mad.f32 r0.z, r2.z, r1.z, r1.x
-mov.f32f32 r1.x, r1.y
-mul.f r0.x, r0.x, r1.w
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r1.x
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r5.w, c9.x
-mul.f r1.y, r5.z, c5.z
-mul.f r1.z, r5.y, c5.y
-mad.f32 r0.x, r0.y, r0.x, r0.z
-mov.f32f32 r0.y, r1.x
-nop
-sam (f32)(xyz)r5.y, r3.y, s#3, t#3
-(sy)mul.f r0.z, r5.w, r4.x
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r5.z, r4.x
-mul.f r1.w, r5.y, r4.x
-nop
+mul.f r0.z, c10.z, r0.z
+mul.f r0.w, r0.w, r1.z
+mov.f32f32 r1.z, r3.x
+add.f r2.w, r3.z, (neg)r2.w
+mul.f r3.y, r3.y, r4.x
+mul.f r3.x, r5.y, r3.x
+add.f r0.y, r0.y, (neg)r0.z
+sam (f32)(xyz)r7.x, r2.y, s#3, t#3
+mul.f r0.z, r4.y, r1.z
+mad.f32 r0.w, r2.w, r3.y, r0.w
+mul.f r0.x, r0.x, r3.w
+mul.f r1.z, r5.x, r1.z
+(sy)(ss)mul.f r2.y, r7.z, r6.w
+mul.f r2.z, r7.y, r6.w
+mad.f32 r0.x, r0.y, r0.x, r0.w
+mul.f r0.y, r6.y, c5.y
+mul.f r0.w, r6.x, c5.x
+mul.f r2.w, r7.x, r5.w
max.f r0.x, (neg)r0.x, c11.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r1.y, r1.z
-mul.f r1.z, r5.x, c5.x
-(ss)mov.f32f32 r3.y, r0.w
-mov.f32f32 r0.w, r3.w
-log2 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r0.w, r1.z
-(rpt4)nop
-sam (f32)(xyzw)r3.y, r3.y, s#2, t#2
-(sy)mad.f32 r1.z, c8.x, r4.x, c11.y
-mul.f r3.w, r3.w, c7.z
-(ss)mul.f r3.z, r3.z, c7.y
+sam (f32)(xyzw)r3.y, r1.x, s#2, t#2
+(sy)(ss)mul.f r1.x, r3.w, c7.z
+mul.f r1.y, r3.z, c7.y
+mad.f32 r3.z, c8.x, r4.x, c11.y
mul.f r3.y, r3.y, c7.x
-mov.f32f32 r1.z, r1.z
-(rpt2)nop
-(ss)mul.f r0.x, r1.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(rpt1)nop
+log2 r0.x, r0.x
+(ss)mul.f r0.x, r3.z, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mad.f32 r1.z, r3.w, r0.x, r2.w
-mad.f32 r2.y, r3.z, r0.x, r2.y
+(ss)mad.f32 r0.z, r1.x, r0.x, r0.z
+mad.f32 r1.x, r1.y, r0.x, r1.z
(ss)mad.f32 r0.x, r3.y, r0.x, r3.x
nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
-nop
-mad.f32 r0.y, r1.z, r0.z, r0.y
-mad.f32 r0.z, r2.y, r1.x, r1.y
-mad.f32 r0.x, r0.x, r2.z, r0.w
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.z, r0.z, r2.y, r4.w
+mad.f32 r1.y, r1.x, r2.z, r0.y
+mad.f32 r1.x, r0.x, r2.w, r0.w
end
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
-; FRAG: 226 instructions, 0 half, 6 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
+; FRAG: 146 instructions, 0 half, 8 full
diff --git a/reference/xon8.asm b/reference/xon8.asm
index 9a1b31d..26f774e 100644
--- a/reference/xon8.asm
+++ b/reference/xon8.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@in(r0.w) in7
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r3.w) in12
-@in(r4.x) in13
-@in(r4.y) in14
-@in(r4.z) in16
-@in(r4.w) in17
-@in(r5.x) in18
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r4.x) in12
+@in(r4.y) in13
+@in(r4.z) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@in(r1.z) in20
@in(r1.w) in21
@out(r0.x) out0
@@ -43,99 +43,53 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)add.f r1.x, c4.x, (neg)r2.w
+@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r1.x, c4.x, (neg)r6.x
mul.f r1.y, c5.y, r0.x
mul.f r0.x, c5.x, r0.x
+mul.f r2.w, c0.w, r6.x
+mul.f r2.x, r1.x, r5.x
+add.f r2.y, c4.y, (neg)r6.y
+mul.f r2.z, r1.x, r4.x
+mul.f r1.x, r1.x, r3.x
mad.f32 r1.y, c6.y, r0.y, r1.y
-mul.f r2.x, r1.x, r4.z
-add.f r5.y, c4.y, (neg)r3.x
-mad.f32 r1.y, c7.y, r0.z, r1.y
+mad.f32 r2.x, r2.y, r5.y, r2.x
+add.f r3.w, c4.z, (neg)r6.z
+mad.f32 r4.w, r2.y, r4.y, r2.z
+mad.f32 r1.x, r2.y, r3.y, r1.x
+nop
+mad.f32 r2.z, r3.w, r5.z, r2.x
+mad.f32 r2.y, r3.w, r4.z, r4.w
+mad.f32 r2.x, r3.w, r3.z, r1.x
+mad.f32 r1.x, c7.y, r0.z, r1.y
mad.f32 r0.x, c6.x, r0.y, r0.x
-mul.f r0.y, r1.x, r3.w
-mad.f32 r2.x, r5.y, r4.w, r2.x
-mad.f32 r1.y, c8.y, r0.w, r1.y
+mad.f32 r1.y, c8.y, r0.w, r1.x
mad.f32 r0.x, c7.x, r0.z, r0.x
-mad.f32 r0.y, r5.y, r4.x, r0.y
-mov.f32f32 r0.z, r2.x
-add.f r2.x, c4.z, (neg)r3.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, c8.x, r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, r2.x, r5.x, r0.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, r2.x, r4.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.z, r0.z
-mul.f r0.y, r1.x, r6.x
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, r5.y, r6.y, r0.y
-mul.f r0.y, c0.w, r2.w
-mul.f r0.z, c0.z, r2.w
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r3.x, r0.y
-mad.f32 r0.x, r2.x, r6.z, r0.x
-mad.f32 r0.y, c2.w, r3.y, r0.y
-mad.f32 r0.z, c1.z, r3.x, r0.z
-mad.f32 r0.w, c1.y, r3.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c3.w, r3.z, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c2.y, r3.y, r0.w
-mul.f r2.w, c0.x, r2.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c3.y, r3.z, r0.z
-mad.f32 r2.w, c1.x, r3.x, r2.w
-mov.f32f32 r3.x, c9.x
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c2.x, r3.y, r2.w
-mov.f32f32 r5.w, r3.x
-mad.f32 r0.x, c3.x, r3.z, r0.x
-mov.f32f32 r2.w, r5.x
-mov.f32f32 r3.x, r4.w
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r5.y, r3.x
-mov.f32f32 r5.x, r3.y
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r3.y, r4.x
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r4.x, r3.z
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r6.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r3.w, r2.w
-mov.f32f32 r3.z, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r6.x
+mad.f32 r0.y, c1.w, r6.y, r2.w
+mad.f32 r1.x, c8.x, r0.w, r0.x
+mad.f32 r0.x, c2.w, r6.z, r0.y
+mul.f r0.y, c0.z, r6.x
+mad.f32 r0.w, c3.w, r6.w, r0.x
+mad.f32 r0.x, c1.z, r6.y, r0.y
+mul.f r0.y, c0.y, r6.x
+mad.f32 r0.x, c2.z, r6.z, r0.x
+mad.f32 r0.y, c1.y, r6.y, r0.y
+mad.f32 r0.z, c3.z, r6.w, r0.x
+mad.f32 r0.x, c2.y, r6.z, r0.y
+mul.f r2.w, c0.x, r6.x
+mad.f32 r0.y, c3.y, r6.w, r0.x
+mad.f32 r0.x, c1.x, r6.y, r2.w
+mov.f32f32 r5.w, c9.x
+mad.f32 r0.x, c2.x, r6.z, r0.x
+mov.f32f32 r4.w, c9.x
+mad.f32 r0.x, c3.x, r6.w, r0.x
+mov.f32f32 r3.w, c9.x
mov.f32f32 r2.w, c9.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-nop
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
-; VERT: 89 instructions, 0 half, 7 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
+; VERT: 42 instructions, 0 half, 7 full
diff --git a/reference/xon9.asm b/reference/xon9.asm
index 47cf0de..6189f11 100644
--- a/reference/xon9.asm
+++ b/reference/xon9.asm
@@ -6,91 +6,57 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c9.x) 0x3f000000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
-mov.f32f32 r1.x, c9.y
-bary.f r1.y, 1, r0.x
+bary.f r0.w, 1, r0.x
+bary.f r1.x, 4, r0.x
+mov.f32f32 r1.y, (0.000000)
mov.f32f32 r1.z, r0.z
-mul.f r0.w, r0.w, r0.w
-bary.f r1.w, 5, r0.x
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mad.f32 r0.w, r1.w, r1.w, r0.w
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r1.w, r0.w
+mul.f r1.x, r1.x, r1.x
+bary.f r2.y, 5, r0.x
+sam (f32)(xyzw)r2.z, r0.z, s#0, t#0
+(sy)cmps.f.lt r3.z, r3.y, c9.x
+mov.f32f32 r3.w, r0.z
+mov.f32f32 r4.x, r0.w
+(ss)mad.f32 r0.z, r2.y, r2.y, r1.x
+sam (f32)(xyz)r4.y, r1.z, s#1, t#1
+(sy)mul.f r0.w, r4.w, c3.z
+mul.f r1.x, r4.z, c3.y
+(ss)mul.f r1.z, r4.y, c3.x
+cov.u32f32 r1.w, r3.z
bary.f (ei)r0.x, 6, r0.x
-mov.f32f32 r3.y, r1.w
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mad.f32 r0.x, r0.x, r0.x, r0.w
-sam (f32)(xyz)r3.z, r2.z, s#1, t#1
-(sy)mul.f r0.w, r4.x, c3.z
-mul.f r1.x, r3.w, c3.y
-mul.f r1.z, r3.z, c3.x
-(ss)nop
-sam (f32)(xyzw)r2.y, r3.x, s#0, t#0
-(sy)cmps.f.lt r1.w, r3.x, c9.x
-mov.f32f32 r0.w, r0.w
-(ss)mov.f32f32 r3.y, r0.y
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.z
-sqrt r0.x, r0.x
-(ss)mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.z, r0.y
-cov.u32f32 r0.y, r1.w
-mov.f32f32 r1.z, (0.000000)
-mul.f r1.w, r3.x, c7.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-cmps.f.ne p0.x, r0.y, r1.z
-sam (f32)(xyz)r3.x, r3.y, s#2, t#2
-(sy)mad.f32 r0.y, c4.z, r3.z, r0.w
-mad.f32 r0.w, c4.y, r3.y, r1.x
-mad.f32 r1.x, c4.x, r3.x, r1.y
-mov.f32f32 r1.y, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r1.z, r0.z
-add.f r0.x, r2.w, r0.x
-add.f r0.y, r2.z, r0.y
-add.f r0.z, r2.y, r0.w
+sam (f32)(xyz)r3.z, r3.w, s#2, t#2
+(sy)mad.f32 r0.y, c4.z, r4.x, r0.w
+mad.f32 r0.w, c4.y, r3.w, r1.x
+mad.f32 r1.x, c4.x, r3.z, r1.z
+cmps.f.ne p0.x, r1.w, r1.y
+add.f r0.y, r3.x, r0.y
+add.f r0.w, r2.w, r0.w
+add.f r1.x, r2.z, r1.x
+mad.f32 r0.x, r0.x, r0.x, r0.z
+mul.f r0.y, r0.y, c6.z
+mul.f r0.z, r0.w, c6.y
+mul.f r0.w, r1.x, c6.x
kill p0.x
-mul.f r0.x, r0.x, c6.z
-mul.f r0.y, r0.y, c6.y
-mul.f r0.z, r0.z, c6.x
-sam (f32)(x)r0.w, r1.y, s#3, t#3
-mov.f32f32 r1.x, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.w, r1.x
-mul.f r0.x, r0.x, c8.z
-mul.f r0.y, r0.y, c8.y
-mul.f r0.z, r0.z, c8.x
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-nop
-(sy)mul.f r0.x, r0.x, r0.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.z, r0.z, r0.w
-nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-nop
-(ss)mov.f32f32 r1.z, r0.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.z
+mul.f r0.y, r0.y, c8.z
+mul.f r0.z, r0.z, c8.y
+sqrt r1.x, r0.x
+mov.f32f32 r1.y, c9.y
+(ss)mul.f r0.x, r0.w, c8.x
+(rpt1)nop
+mul.f r1.w, r3.y, c7.x
+(rpt1)nop
+(ss)nop
+sam (f32)(x)r2.y, r1.x, s#3, t#3
+(sy)mul.f r1.z, r0.y, r2.y
+(ss)mul.f r1.y, r0.z, r2.y
+mul.f r1.x, r0.x, r2.y
end
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 83 instructions, 0 half, 5 full
+; FRAG: 48 instructions, 0 half, 5 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-01.asm b/reference/xonotic-gl2/xonotic-glx-gl2-01.asm
index 2c03e4f..284e180 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-01.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-01.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-02.asm b/reference/xonotic-gl2/xonotic-glx-gl2-02.asm
index 71537bc..45a3f44 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-02.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-02.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 5, r0.x
bary.f r1.x, 3, r0.x
bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-bary.f r1.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-(rpt5)nop
-sam (f32)(xyzw)r1.w, r0.y, s#0, t#0
-(sy)(ss)mul.f r0.y, r1.x, r2.z
-mul.f r0.z, r1.y, r2.y
-mul.f r0.w, r1.z, r2.x
-mul.f r0.x, r0.x, r1.w
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam (f32)(xyzw)r2.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.x, r3.x
+mul.f r1.z, r1.y, r2.w
+mul.f r1.y, r2.x, r2.z
+mul.f r1.x, r0.x, r2.y
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 26 instructions, 0 half, 3 full
+; FRAG: 14 instructions, 0 half, 4 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-04.asm b/reference/xonotic-gl2/xonotic-glx-gl2-04.asm
index 71537bc..45a3f44 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-04.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-04.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 5, r0.x
bary.f r1.x, 3, r0.x
bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-bary.f r1.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-(rpt5)nop
-sam (f32)(xyzw)r1.w, r0.y, s#0, t#0
-(sy)(ss)mul.f r0.y, r1.x, r2.z
-mul.f r0.z, r1.y, r2.y
-mul.f r0.w, r1.z, r2.x
-mul.f r0.x, r0.x, r1.w
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam (f32)(xyzw)r2.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.x, r3.x
+mul.f r1.z, r1.y, r2.w
+mul.f r1.y, r2.x, r2.z
+mul.f r1.x, r0.x, r2.y
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 26 instructions, 0 half, 3 full
+; FRAG: 14 instructions, 0 half, 4 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-06.asm b/reference/xonotic-gl2/xonotic-glx-gl2-06.asm
index 71537bc..45a3f44 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-06.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-06.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 5, r0.x
bary.f r1.x, 3, r0.x
bary.f r1.y, 2, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-bary.f r1.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-(rpt5)nop
-sam (f32)(xyzw)r1.w, r0.y, s#0, t#0
-(sy)(ss)mul.f r0.y, r1.x, r2.z
-mul.f r0.z, r1.y, r2.y
-mul.f r0.w, r1.z, r2.x
-mul.f r0.x, r0.x, r1.w
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam (f32)(xyzw)r2.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.x, r3.x
+mul.f r1.z, r1.y, r2.w
+mul.f r1.y, r2.x, r2.z
+mul.f r1.x, r0.x, r2.y
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 26 instructions, 0 half, 3 full
+; FRAG: 14 instructions, 0 half, 4 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-10.asm b/reference/xonotic-gl2/xonotic-glx-gl2-10.asm
index ea7d51f..d26b519 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-10.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-10.asm
@@ -4,6 +4,7 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x3f800000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r1.w, c0.x
mov.f32f32 r1.z, c0.x
mov.f32f32 r1.y, c0.x
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-12.asm b/reference/xonotic-gl2/xonotic-glx-gl2-12.asm
index b4aedcd..1867490 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-12.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-12.asm
@@ -6,211 +6,146 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c10.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000
+@const(c11.x) 0x3f800000, 0x3e800000, 0x00000000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 0, r0.x
-bary.f r1.x, 4, r0.x
-bary.f r1.y, 3, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.w
-mul.f r2.x, r1.x, r1.x
-bary.f r2.y, 5, r0.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r3.x, r1.w
-bary.f r1.w, 1, r0.x
-mad.f32 r2.x, r2.y, r2.y, r2.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r3.y, r1.w
-mov.f32f32 r2.x, r2.x
-bary.f r3.z, 6, r0.x
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r3.y, r3.y
-sam (f32)(xyz)r4.x, r2.z, s#4, t#4
-(sy)mad.f32 r1.z, c10.y, r4.x, c10.z
-mad.f32 r2.x, r3.z, r3.z, r2.x
-(ss)mov.f32f32 r2.z, r1.w
-mad.f32 r2.w, c10.y, r4.y, c10.z
-mov.f32f32 r1.z, r1.z
+bary.f r0.w, 3, r0.x
+bary.f r1.x, 0, r0.x
+bary.f r1.z, 4, r0.x
+mov.f32f32 r1.w, r0.z
+mov.f32f32 r2.x, r0.w
+mov.f32f32 r2.y, r1.x
+bary.f r1.y, 1, r0.x
+sam (f32)(xyz)r2.z, r0.z, s#4, t#4
+(sy)(ss)mad.f32 r0.z, c10.y, r2.z, c10.z
+mov.f32f32 r3.y, r1.x
+bary.f r0.w, 16, r0.x
+mul.f r2.z, r1.z, r1.z
+mov.f32f32 r3.w, r0.z
bary.f r4.x, 12, r0.x
-sam (f32)(xyz)r4.w, r3.x, s#0, t#0
-(sy)(ss)add.f r3.x, r4.w, c10.x
-bary.f r3.y, 16, r0.x
+mov.f32f32 r3.z, r1.y
bary.f r4.y, 8, r0.x
-mul.f r4.x, r1.z, r4.x
-mov.f32f32 r2.w, r2.w
-bary.f r4.w, 13, r0.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.y, r1.z, r3.y
-mul.f r1.z, r1.z, r4.y
-mad.f32 r4.x, r2.w, r4.w, r4.x
-mul.f r4.y, r3.x, r3.x
-add.f r4.w, r5.x, c10.x
-bary.f r5.x, 17, r0.x
-mov.f32f32 r4.x, r4.x
-mad.f32 r4.z, c10.y, r4.z, c10.z
-mov.f32f32 r4.w, r4.w
-mad.f32 r3.y, r2.w, r5.x, r3.y
+mul.f r0.z, r0.z, r0.w
+mul.f r0.w, r3.w, r4.x
+mad.f32 r2.w, c10.y, r2.w, c10.z
+mul.f r3.w, r3.w, r4.y
+bary.f r4.x, 17, r0.x
+sam (f32)(xyz)r4.y, r3.y, s#0, t#0
+(sy)(ss)add.f r3.y, r4.y, c10.x
+mov.f32f32 r3.z, r2.w
+bary.f r4.y, 13, r0.x
bary.f r5.x, 9, r0.x
-mov.f32f32 r4.z, r4.z
-bary.f r5.z, 14, r0.x
-mad.f32 r4.y, r4.w, r4.w, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r1.z, r2.w, r5.x, r1.z
-mad.f32 r2.w, r4.z, r5.z, r4.x
-mov.f32f32 r4.x, r4.y
-add.f r4.y, r5.y, c10.x
-bary.f r5.x, 18, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.z, r1.z
-bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r4.y
-mad.f32 r3.y, r4.z, r5.x, r3.y
-rsq r2.x, r2.x
-(ss)mov.f32f32 r2.x, r2.x
-mad.f32 r0.x, r4.z, r0.x, r1.z
-mad.f32 r1.z, r0.y, r0.y, r4.x
-mov.f32f32 r3.y, r3.y
-mul.f r1.x, r1.x, r2.x
-mul.f r4.x, r0.x, r0.x
-mul.f r2.y, r2.y, r2.x
-mad.f32 r4.x, r2.w, r2.w, r4.x
-mov.f32f32 r3.y, r3.y
-rsq r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r2.y, r2.y
-mad.f32 r4.x, r3.y, r3.y, r4.x
-mul.f r3.x, r3.x, r1.z
-mul.f r4.y, r4.w, r1.z
-mul.f r0.y, r0.y, r1.z
-(rpt2)nop
-rsq r1.z, r4.x
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r0.y, r0.y
-mul.f r0.x, r0.x, r1.z
-mul.f r3.y, r3.y, r1.z
-mul.f r1.z, r2.w, r1.z
-mul.f r2.x, r3.z, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r3.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-mul.f r3.y, r3.x, r0.x
-mul.f r3.z, r3.x, r0.x
-mad.f32 r3.y, r4.y, r1.z, r3.y
-mad.f32 r3.z, r4.y, r1.z, r3.z
-max.f r4.z, c11.y, r2.w
-(ss)mov.f32f32 r4.x, r2.z
-mov.f32f32 r2.z, r3.y
-mov.f32f32 r3.y, r3.z
-mad.f32 r2.z, r0.y, r2.w, r2.z
-mad.f32 r3.y, r0.y, r2.w, r3.y
+mad.f32 r0.z, r2.w, r4.x, r0.z
+bary.f r2.w, 5, r0.x
+mad.f32 r0.w, r3.z, r4.y, r0.w
+mad.f32 r3.x, c10.y, r3.x, c10.z
+mov.f32f32 r4.x, r3.y
+mad.f32 r3.z, r3.z, r5.x, r3.w
+bary.f r3.w, 18, r0.x
+mov.f32f32 r4.y, r3.x
+bary.f r5.x, 14, r0.x
+mul.f r3.y, r3.y, r4.x
+add.f r4.z, r4.z, c10.x
+bary.f r5.y, 10, r0.x
+mad.f32 r0.w, r4.y, r5.x, r0.w
+mad.f32 r0.z, r3.x, r3.w, r0.z
+mad.f32 r2.z, r2.w, r2.w, r2.z
+bary.f (ei)r0.x, 6, r0.x
+mov.f32f32 r0.y, r0.w
+mad.f32 r3.x, r4.y, r5.y, r3.z
mov.f32f32 r3.z, r4.z
-mov.f32f32 r0.w, r0.w
+mov.f32f32 r3.w, r0.z
+mad.f32 r4.y, r0.x, r0.x, r2.z
+mul.f r2.z, r3.x, r3.x
+mad.f32 r3.y, r4.z, r3.z, r3.y
+mad.f32 r0.w, r0.w, r0.y, r2.z
+mov.f32f32 r3.w, r3.w
+add.f r4.z, r4.w, c10.x
+mov.f32f32 r2.z, r1.y
+rsq r4.y, r4.y
+(ss)mov.f32f32 r4.w, r4.y
+mad.f32 r0.w, r3.w, r3.w, r0.w
+mov.f32f32 r3.w, r4.z
+mul.f r0.x, r0.x, r4.y
+sam (f32)(xyz)r5.x, r1.w, s#3, t#3
+mul.f r1.z, r1.z, r4.w
+(ss)mul.f r2.x, r2.w, r4.w
+sam (f32)(xyzw)r5.w, r2.y, s#1, t#1
+(ss)mad.f32 r2.y, r3.w, r3.w, r3.y
+(sy)mul.f r1.w, r6.z, c9.x
+rsq r0.w, r0.w
+(ss)mov.f32f32 r2.z, r0.w
+mul.f r0.z, r0.z, r0.w
+(ss)mul.f r0.w, r6.y, c5.z
+mul.f r2.w, r6.y, c6.z
+mul.f r3.x, r3.x, r2.z
+rsq r2.y, r2.y
+(ss)mov.f32f32 r3.y, r2.y
+mov.f32f32 r3.w, r0.z
+mul.f r0.y, r0.y, r2.z
+(ss)mul.f r2.y, r4.z, r2.y
+mul.f r2.z, r4.x, r3.y
+mov.f32f32 r4.x, r3.x
+max.f r4.y, c11.y, r3.w
+mov.f32f32 r4.z, r0.y
mul.f r3.x, r2.z, r3.x
-mul.f r4.y, r2.z, r4.y
-mul.f r0.y, r2.z, r0.y
-mov.f32f32 r2.z, r3.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r4.y
-mov.f32f32 r0.y, r0.y
-max.f r2.z, r2.z, c10.w
-mul.f r3.x, c10.y, r3.x
-mul.f r3.y, c10.y, r3.y
-mul.f r0.y, c10.y, r0.y
+mul.f r3.y, r3.z, r3.y
mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyzw)r3.w, r3.w, s#1, t#1
-(sy)mul.f r4.w, r4.y, c6.z
-add.f r0.x, r0.x, (neg)r3.x
-add.f r1.z, r1.z, (neg)r3.y
-add.f r0.y, r2.w, (neg)r0.y
-mul.f r2.w, r4.w, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-mul.f r0.x, r0.x, r1.x
-mul.f r1.x, r4.x, c6.y
-mad.f32 r0.x, r1.z, r2.y, r0.x
-mul.f r1.z, r3.w, c6.x
-rcp r2.y, r3.z
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r1.x, r2.z
-mad.f32 r0.x, r0.y, r2.x, r0.x
-mul.f r0.y, r1.z, r2.z
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r3.x, r0.z
+mov.f32f32 r3.z, r2.y
+mul.f r4.w, r6.x, c5.y
+mad.f32 r0.y, r3.y, r0.y, r3.x
+mul.f r3.x, r2.z, r4.x
+mad.f32 r0.y, r2.y, r3.w, r0.y
+mov.f32f32 r2.y, r3.y
+rcp r3.y, r4.y
+(ss)mov.f32f32 r4.y, r3.y
+mul.f r3.y, r5.x, r3.y
+mul.f r2.z, r0.y, r2.z
+mad.f32 r3.x, r2.y, r4.z, r3.x
+mul.f r2.y, r0.y, r2.y
+mul.f r0.y, r0.y, r3.z
+mul.f r2.z, c10.y, r2.z
+mad.f32 r3.x, r3.z, r3.w, r3.x
+mul.f r2.y, c10.y, r2.y
+mul.f r0.y, c10.y, r0.y
+add.f r2.z, r4.x, (neg)r2.z
+max.f r3.x, r3.x, c10.w
+add.f r2.y, r4.z, (neg)r2.y
+mul.f r3.z, r5.w, c6.x
+mul.f r1.z, r2.z, r1.z
+mov.f32f32 r2.z, r3.x
+mad.f32 r1.z, r2.y, r2.x, r1.z
+add.f r0.y, r0.z, (neg)r0.y
+mul.f r0.z, r6.x, c6.y
+mul.f r2.x, r3.z, r3.x
+mul.f r2.y, r5.z, r4.y
+mad.f32 r0.x, r0.y, r0.x, r1.z
+mul.f r0.y, r2.w, r2.z
+mul.f r0.z, r0.z, r2.z
+mul.f r2.z, r5.y, r4.y
max.f r0.x, (neg)r0.x, c10.w
-mov.f32f32 r0.z, r1.y
-mul.f r1.y, r4.z, c9.x
-mul.f r1.z, r4.y, c5.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r0.z, r1.y
-mov.f32f32 r1.y, r1.z
-mul.f r1.z, r4.x, c5.y
-mul.f r2.x, r3.w, c5.x
-(ss)mov.f32f32 r3.z, r0.w
-log2 r0.x, r0.x
-mov.f32f32 r0.w, r1.w
-sam (f32)(xyz)r3.w, r3.x, s#3, t#3
-(sy)mul.f r1.w, r3.w, r2.y
-mul.f r2.z, r4.y, r2.y
-mul.f r2.y, r4.x, r2.y
-mov.f32f32 r3.w, r0.w
-mov.f32f32 r0.w, r1.w
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r0.z
-(ss)nop
-sam (f32)(xyzw)r3.x, r3.z, s#2, t#2
-(sy)mad.f32 r0.z, c8.x, r3.w, c11.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-(ss)mul.f r3.z, r3.z, c7.z
-mov.f32f32 r0.z, r0.z
+mul.f r2.w, r5.w, c5.x
+sam (f32)(xyzw)r3.z, r1.x, s#2, t#2
+(sy)(ss)mul.f r1.x, r4.x, c7.z
+mad.f32 r1.y, c8.x, r4.y, c11.x
+mul.f r1.z, r3.z, c7.x
nop
-mul.f r3.y, r3.y, c7.y
-mul.f r3.x, r3.x, c7.x
-(ss)mul.f r0.x, r0.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r3.x, r3.w, c7.y
+log2 r0.x, r0.x
+(ss)mul.f r0.x, r1.y, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mad.f32 r0.z, r3.z, r0.x, r2.w
-mad.f32 r1.x, r3.y, r0.x, r1.x
-(ss)mad.f32 r0.x, r3.x, r0.x, r0.y
-nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.x, r0.x
+(ss)mad.f32 r0.y, r1.x, r0.x, r0.y
+mad.f32 r0.z, r3.x, r0.x, r0.z
+(ss)mad.f32 r0.x, r1.z, r0.x, r2.x
nop
-mad.f32 r0.y, r0.y, r2.z, r1.y
-mad.f32 r0.z, r0.z, r2.y, r1.z
-mad.f32 r0.x, r0.x, r0.w, r2.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.z, r0.y, r2.y, r0.w
+mad.f32 r1.y, r0.z, r2.z, r4.w
+mad.f32 r1.x, r0.x, r3.y, r2.w
end
nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
-; FRAG: 211 instructions, 0 half, 6 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
+; FRAG: 140 instructions, 0 half, 7 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-13.asm b/reference/xonotic-gl2/xonotic-glx-gl2-13.asm
index 9a1b31d..26f774e 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-13.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-13.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@in(r0.w) in7
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r3.w) in12
-@in(r4.x) in13
-@in(r4.y) in14
-@in(r4.z) in16
-@in(r4.w) in17
-@in(r5.x) in18
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r4.x) in12
+@in(r4.y) in13
+@in(r4.z) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@in(r1.z) in20
@in(r1.w) in21
@out(r0.x) out0
@@ -43,99 +43,53 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)add.f r1.x, c4.x, (neg)r2.w
+@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r1.x, c4.x, (neg)r6.x
mul.f r1.y, c5.y, r0.x
mul.f r0.x, c5.x, r0.x
+mul.f r2.w, c0.w, r6.x
+mul.f r2.x, r1.x, r5.x
+add.f r2.y, c4.y, (neg)r6.y
+mul.f r2.z, r1.x, r4.x
+mul.f r1.x, r1.x, r3.x
mad.f32 r1.y, c6.y, r0.y, r1.y
-mul.f r2.x, r1.x, r4.z
-add.f r5.y, c4.y, (neg)r3.x
-mad.f32 r1.y, c7.y, r0.z, r1.y
+mad.f32 r2.x, r2.y, r5.y, r2.x
+add.f r3.w, c4.z, (neg)r6.z
+mad.f32 r4.w, r2.y, r4.y, r2.z
+mad.f32 r1.x, r2.y, r3.y, r1.x
+nop
+mad.f32 r2.z, r3.w, r5.z, r2.x
+mad.f32 r2.y, r3.w, r4.z, r4.w
+mad.f32 r2.x, r3.w, r3.z, r1.x
+mad.f32 r1.x, c7.y, r0.z, r1.y
mad.f32 r0.x, c6.x, r0.y, r0.x
-mul.f r0.y, r1.x, r3.w
-mad.f32 r2.x, r5.y, r4.w, r2.x
-mad.f32 r1.y, c8.y, r0.w, r1.y
+mad.f32 r1.y, c8.y, r0.w, r1.x
mad.f32 r0.x, c7.x, r0.z, r0.x
-mad.f32 r0.y, r5.y, r4.x, r0.y
-mov.f32f32 r0.z, r2.x
-add.f r2.x, c4.z, (neg)r3.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, c8.x, r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, r2.x, r5.x, r0.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, r2.x, r4.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.z, r0.z
-mul.f r0.y, r1.x, r6.x
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, r5.y, r6.y, r0.y
-mul.f r0.y, c0.w, r2.w
-mul.f r0.z, c0.z, r2.w
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r3.x, r0.y
-mad.f32 r0.x, r2.x, r6.z, r0.x
-mad.f32 r0.y, c2.w, r3.y, r0.y
-mad.f32 r0.z, c1.z, r3.x, r0.z
-mad.f32 r0.w, c1.y, r3.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c3.w, r3.z, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c2.y, r3.y, r0.w
-mul.f r2.w, c0.x, r2.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c3.y, r3.z, r0.z
-mad.f32 r2.w, c1.x, r3.x, r2.w
-mov.f32f32 r3.x, c9.x
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c2.x, r3.y, r2.w
-mov.f32f32 r5.w, r3.x
-mad.f32 r0.x, c3.x, r3.z, r0.x
-mov.f32f32 r2.w, r5.x
-mov.f32f32 r3.x, r4.w
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r5.y, r3.x
-mov.f32f32 r5.x, r3.y
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r3.y, r4.x
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r4.x, r3.z
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r6.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r3.w, r2.w
-mov.f32f32 r3.z, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r6.x
+mad.f32 r0.y, c1.w, r6.y, r2.w
+mad.f32 r1.x, c8.x, r0.w, r0.x
+mad.f32 r0.x, c2.w, r6.z, r0.y
+mul.f r0.y, c0.z, r6.x
+mad.f32 r0.w, c3.w, r6.w, r0.x
+mad.f32 r0.x, c1.z, r6.y, r0.y
+mul.f r0.y, c0.y, r6.x
+mad.f32 r0.x, c2.z, r6.z, r0.x
+mad.f32 r0.y, c1.y, r6.y, r0.y
+mad.f32 r0.z, c3.z, r6.w, r0.x
+mad.f32 r0.x, c2.y, r6.z, r0.y
+mul.f r2.w, c0.x, r6.x
+mad.f32 r0.y, c3.y, r6.w, r0.x
+mad.f32 r0.x, c1.x, r6.y, r2.w
+mov.f32f32 r5.w, c9.x
+mad.f32 r0.x, c2.x, r6.z, r0.x
+mov.f32f32 r4.w, c9.x
+mad.f32 r0.x, c3.x, r6.w, r0.x
+mov.f32f32 r3.w, c9.x
mov.f32f32 r2.w, c9.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-nop
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
-; VERT: 89 instructions, 0 half, 7 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
+; VERT: 42 instructions, 0 half, 7 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-14.asm b/reference/xonotic-gl2/xonotic-glx-gl2-14.asm
index 0c5831e..fc88e42 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-14.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-14.asm
@@ -6,219 +6,158 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c12.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000
+@const(c13.x) 0x3f800000, 0x3e800000, 0x00000000, 0x00000000
+@const(c14.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 0, r0.x
-bary.f r1.x, 4, r0.x
-bary.f r1.y, 3, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.w
-mul.f r2.x, r1.x, r1.x
-bary.f r2.y, 5, r0.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r3.x, r1.w
-bary.f r3.z, 1, r0.x
-mad.f32 r1.w, r2.y, r2.y, r2.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r1.w, r1.w
-bary.f r3.w, 6, r0.x
-mov.f32f32 r4.x, r1.z
-mov.f32f32 r3.y, r2.x
-sam (f32)(xyz)r4.y, r2.z, s#5, t#5
-(sy)mad.f32 r1.z, c12.y, r4.y, c12.z
-mad.f32 r1.w, r3.w, r3.w, r1.w
-mov.f32f32 r2.x, r3.z
-(ss)mad.f32 r2.z, c12.y, r4.z, c12.z
-mov.f32f32 r1.z, r1.z
-bary.f r2.w, 12, r0.x
-sam (f32)(xyz)r5.x, r3.x, s#0, t#0
-(sy)(ss)add.f r3.x, r5.x, c12.x
-bary.f r3.y, 16, r0.x
+bary.f r0.w, 3, r0.x
+bary.f r1.x, 0, r0.x
+bary.f r1.z, 4, r0.x
+mov.f32f32 r1.w, r0.z
+mov.f32f32 r2.x, r0.w
+mov.f32f32 r2.y, r1.x
+mov.f32f32 r2.w, r1.x
+sam (f32)(xyz)r3.x, r0.z, s#5, t#5
+(sy)(ss)mad.f32 r0.z, c12.y, r3.x, c12.z
+mov.f32f32 r3.w, r1.x
+bary.f r1.y, 1, r0.x
+bary.f r0.w, 16, r0.x
+mov.f32f32 r2.z, r0.z
+bary.f r3.x, 12, r0.x
+mov.f32f32 r4.x, r1.y
bary.f r4.y, 8, r0.x
-mul.f r2.w, r1.z, r2.w
-mov.f32f32 r2.z, r2.z
-bary.f r4.z, 13, r0.x
-mov.f32f32 r3.x, r3.x
-mul.f r3.y, r1.z, r3.y
-mul.f r1.z, r1.z, r4.y
-mad.f32 r2.w, r2.z, r4.z, r2.w
-mul.f r4.y, r3.x, r3.x
-add.f r4.z, r5.y, c12.x
-bary.f r5.x, 17, r0.x
-mov.f32f32 r2.w, r2.w
-mad.f32 r4.w, c12.y, r4.w, c12.z
-mov.f32f32 r4.z, r4.z
-mad.f32 r3.y, r2.z, r5.x, r3.y
+mul.f r0.z, r0.z, r0.w
+mul.f r0.w, r2.z, r3.x
+mad.f32 r3.x, c12.y, r3.y, c12.z
+mul.f r2.z, r2.z, r4.y
+bary.f r3.y, 17, r0.x
+sam (f32)(xyz)r3.w, r3.w, s#0, t#0
+(sy)(ss)add.f r3.w, r3.w, c12.x
+mov.f32f32 r4.z, r3.x
+bary.f r4.w, 13, r0.x
bary.f r5.x, 9, r0.x
-mov.f32f32 r4.w, r4.w
-bary.f r5.y, 14, r0.x
-mad.f32 r4.y, r4.z, r4.z, r4.y
-mov.f32f32 r3.y, r3.y
-mad.f32 r1.z, r2.z, r5.x, r1.z
-mad.f32 r2.z, r4.w, r5.y, r2.w
-mov.f32f32 r2.w, r4.y
-add.f r4.y, r5.z, c12.x
-bary.f r5.x, 18, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.z, r1.z
-bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r0.y, r4.y
-mad.f32 r3.y, r4.w, r5.x, r3.y
-rsq r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mad.f32 r0.x, r4.w, r0.x, r1.z
-mad.f32 r1.z, r0.y, r0.y, r2.w
-mov.f32f32 r2.w, r3.y
-mul.f r1.x, r1.x, r1.w
-mul.f r3.y, r0.x, r0.x
-mul.f r2.y, r2.y, r1.w
-mad.f32 r3.y, r2.z, r2.z, r3.y
-mov.f32f32 r2.w, r2.w
-rsq r1.z, r1.z
-mov.f32f32 r1.x, r1.x
-(ss)mov.f32f32 r1.z, r1.z
+mad.f32 r0.z, r3.x, r3.y, r0.z
+mul.f r3.x, r1.z, r1.z
+mad.f32 r0.w, r4.z, r4.w, r0.w
+mad.f32 r3.y, c12.y, r3.z, c12.z
+mov.f32f32 r3.z, r3.w
+mad.f32 r2.z, r4.z, r5.x, r2.z
+bary.f r4.z, 18, r0.x
+mov.f32f32 r4.w, r3.y
+bary.f r5.x, 14, r0.x
+mul.f r3.w, r3.w, r3.z
+add.f r4.x, r4.x, c12.x
+bary.f r5.y, 10, r0.x
+mad.f32 r0.w, r4.w, r5.x, r0.w
+mad.f32 r0.z, r3.y, r4.z, r0.z
+bary.f r3.y, 5, r0.x
+mov.f32f32 r4.z, r4.x
+mov.f32f32 r5.x, r0.w
+mad.f32 r4.w, r4.w, r5.y, r2.z
+mov.f32f32 r2.z, r0.z
+mad.f32 r3.x, r3.y, r3.y, r3.x
+bary.f (ei)r0.x, 6, r0.x
+mul.f r0.y, r4.w, r4.w
+mad.f32 r3.w, r4.x, r4.z, r3.w
+mad.f32 r0.y, r0.w, r5.x, r0.y
+mov.f32f32 r0.w, r2.z
+add.f r4.x, r4.y, c12.x
+mad.f32 r4.y, r0.x, r0.x, r3.x
+mov.f32f32 r2.z, r1.y
+mad.f32 r0.y, r0.w, r0.w, r0.y
+mov.f32f32 r0.w, r4.x
+mov.f32f32 r3.x, r1.y
+sam (f32)(xyz)r5.y, r1.w, s#4, t#4
+sam (f32)(xyz)r6.x, r1.x, s#3, t#3
+(rpt3)nop
+rsq r0.y, r0.y
+(ss)mov.f32f32 r1.x, r0.y
+mul.f r0.y, r0.z, r0.y
+mad.f32 r0.z, r0.w, r0.w, r3.w
+rsq r0.w, r4.y
+(ss)mov.f32f32 r1.y, r0.w
+mul.f r1.w, r4.w, r1.x
+mov.f32f32 r2.x, r0.y
+mul.f r1.x, r5.x, r1.x
+mul.f r1.z, r1.z, r1.y
+mul.f r1.y, r3.y, r1.y
+rsq r0.z, r0.z
+(ss)mov.f32f32 r3.y, r0.z
+mov.f32f32 r3.w, r1.w
+(ss)max.f r4.y, c13.y, r2.x
+mov.f32f32 r4.w, r1.x
+mul.f r3.z, r3.z, r3.y
+mul.f r3.y, r4.z, r3.y
+mul.f r0.z, r4.x, r0.z
+nop
+mul.f r1.w, r3.z, r1.w
+mov.f32f32 r3.z, r3.z
+mad.f32 r1.x, r3.y, r1.x, r1.w
+rcp r1.w, r4.y
+(ss)mov.f32f32 r4.x, r1.w
+mad.f32 r1.x, r0.z, r2.x, r1.x
+(ss)mul.f r4.y, r3.z, r3.w
mov.f32f32 r3.y, r3.y
-mov.f32f32 r2.y, r2.y
-mad.f32 r3.y, r2.w, r2.w, r3.y
-mul.f r3.x, r3.x, r1.z
-mul.f r4.y, r4.z, r1.z
-mul.f r0.y, r0.y, r1.z
-(rpt2)nop
-rsq r1.z, r3.y
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r3.x, r3.x
-(ss)mov.f32f32 r3.y, r4.y
-mov.f32f32 r0.y, r0.y
-mul.f r0.x, r0.x, r1.z
-mul.f r2.w, r2.w, r1.z
-mul.f r1.z, r2.z, r1.z
-mul.f r1.w, r3.w, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r2.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mul.f r2.w, r3.x, r0.x
-mul.f r3.w, r3.x, r0.x
-mad.f32 r2.w, r3.y, r1.z, r2.w
-mad.f32 r3.w, r3.y, r1.z, r3.w
-max.f r4.z, c13.y, r2.z
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r2.w, r3.w
-mad.f32 r2.x, r0.y, r2.z, r2.x
-mad.f32 r2.w, r0.y, r2.z, r2.w
-mov.f32f32 r3.w, r4.z
-mov.f32f32 r4.z, r0.w
-mul.f r3.x, r2.x, r3.x
-mul.f r3.y, r2.x, r3.y
-mul.f r0.y, r2.x, r0.y
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r2.w, r3.x
-mov.f32f32 r3.x, r3.y
-mov.f32f32 r0.y, r0.y
-max.f r2.x, r2.x, c12.w
-mul.f r2.w, c12.y, r2.w
-mul.f r3.x, c12.y, r3.x
-mul.f r0.y, c12.y, r0.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyzw)r4.w, r4.x, s#1, t#1
-(sy)mul.f r3.y, r5.y, c7.z
-add.f r0.x, r0.x, (neg)r2.w
-add.f r1.z, r1.z, (neg)r3.x
-add.f r0.y, r2.z, (neg)r0.y
-mul.f r2.z, r3.y, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-mul.f r0.x, r0.x, r1.x
-mul.f r1.x, r5.x, c7.y
-mad.f32 r0.x, r1.z, r2.y, r0.x
-mul.f r1.z, r4.w, c7.x
-rcp r2.y, r3.w
-(ss)mov.f32f32 r2.y, r2.y
mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r1.x, r2.x
-mad.f32 r0.x, r0.y, r1.w, r0.x
-mul.f r0.y, r1.z, r2.x
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r0.z
+mul.f r3.z, r1.x, r3.z
+(sy)mul.f r4.z, r5.w, r4.x
+mul.f r4.x, r5.z, r4.x
+mad.f32 r4.y, r3.y, r4.w, r4.y
+mul.f r3.z, c12.y, r3.z
+mad.f32 r2.x, r0.z, r2.x, r4.y
+mul.f r3.y, r1.x, r3.y
+mul.f r0.z, r1.x, r0.z
+add.f r1.x, r3.w, (neg)r3.z
+max.f r2.x, r2.x, c12.w
+sam (f32)(xyzw)r6.w, r2.y, s#1, t#1
+(ss)mul.f r2.y, c12.y, r3.y
+(sy)mul.f r2.z, r6.w, c7.x
+mul.f r1.x, r1.x, r1.z
+mov.f32f32 r1.z, r2.x
+add.f r2.y, r4.w, (neg)r2.y
+mul.f r2.x, r2.z, r2.x
+mul.f r2.z, r7.y, c7.z
+mul.f r3.y, r7.x, c7.y
+mad.f32 r1.x, r2.y, r1.y, r1.x
+mul.f r0.z, c12.y, r0.z
+mul.f r1.y, r2.z, r1.z
+mul.f r1.z, r3.y, r1.z
+mul.f r2.y, r5.y, r1.w
+add.f r0.y, r0.y, (neg)r0.z
+mul.f r0.x, r0.x, r0.w
+mul.f r0.z, r7.y, c6.z
+mul.f r1.w, r7.z, c11.x
+sam (f32)(xyzw)r2.z, r2.w, s#2, t#2
+(sy)mul.f r0.w, r3.x, c8.z
+mad.f32 r0.x, r0.y, r0.x, r1.x
+mul.f r0.y, r7.x, c6.y
+mul.f r1.x, r6.w, c6.x
+(ss)mul.f r2.w, r2.w, c8.y
max.f r0.x, (neg)r0.x, c12.w
-mov.f32f32 r0.z, r1.y
-mul.f r1.y, r5.z, c11.x
-mul.f r2.x, r5.y, c6.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r0.z, r1.y
-mov.f32f32 r1.y, r2.x
-mul.f r2.x, r5.x, c6.y
-mul.f r2.w, r4.w, c6.x
-mov.f32f32 r3.x, r4.z
+(rpt3)nop
+mad.f32 r3.x, c9.x, r3.y, c13.x
+mul.f r2.z, r2.z, c8.x
log2 r0.x, r0.x
-mov.f32f32 r3.y, r3.z
-(ss)nop
-sam (f32)(xyz)r3.w, r1.z, s#4, t#4
-(sy)(ss)mul.f r1.z, r3.w, r2.y
-mul.f r1.w, r4.y, r2.y
-mul.f r2.y, r4.x, r2.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.z, r0.z
-nop
-mov.f32f32 r3.w, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r0.z
-sam (f32)(xyzw)r4.x, r3.x, s#2, t#2
-(sy)mad.f32 r0.z, c9.x, r4.w, c13.x
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.w, r2.w
-(ss)mul.f r3.x, r4.z, c8.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mul.f r3.y, r4.y, c8.y
-mul.f r4.x, r4.x, c8.x
-(ss)mul.f r0.x, r0.z, r0.x
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r3.z
-nop
-mov.f32f32 r0.x, r0.x
+(rpt1)nop
+(ss)mul.f r0.x, r3.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mad.f32 r2.z, r3.x, r0.x, r2.z
-mad.f32 r1.x, r3.y, r0.x, r1.x
-(ss)mad.f32 r0.x, r4.x, r0.x, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r2.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.x, r0.x
+(ss)mad.f32 r0.w, r0.w, r0.x, r1.y
+mad.f32 r1.y, r2.w, r0.x, r1.z
+(ss)mad.f32 r0.x, r2.z, r0.x, r2.x
nop
-mad.f32 r0.y, r0.y, r3.w, r1.y
-mad.f32 r1.x, r1.x, r2.y, r2.x
-sam (f32)(xyz)r3.x, r0.z, s#3, t#3
-(sy)mad.f32 r0.y, c10.z, r3.z, r0.y
-mad.f32 r0.x, r0.x, r1.z, r2.w
-(ss)mad.f32 r0.z, c10.y, r3.y, r1.x
-mad.f32 r0.x, c10.x, r3.x, r0.x
-mov.f32f32 r0.y, r0.y
-(rpt2)nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+mad.f32 r0.z, r0.w, r4.z, r0.z
+mad.f32 r0.y, r1.y, r4.x, r0.y
+mad.f32 r1.z, c10.z, r6.z, r0.z
+mad.f32 r1.y, c10.y, r6.y, r0.y
+mad.f32 r0.x, r0.x, r2.y, r1.x
+nop
+mad.f32 r1.x, c10.x, r6.x, r0.x
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
-; FRAG: 222 instructions, 0 half, 6 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
+; FRAG: 157 instructions, 0 half, 8 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-15.asm b/reference/xonotic-gl2/xonotic-glx-gl2-15.asm
index 9a1b31d..26f774e 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-15.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-15.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@in(r0.w) in7
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r3.w) in12
-@in(r4.x) in13
-@in(r4.y) in14
-@in(r4.z) in16
-@in(r4.w) in17
-@in(r5.x) in18
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r4.x) in12
+@in(r4.y) in13
+@in(r4.z) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@in(r1.z) in20
@in(r1.w) in21
@out(r0.x) out0
@@ -43,99 +43,53 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)add.f r1.x, c4.x, (neg)r2.w
+@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r1.x, c4.x, (neg)r6.x
mul.f r1.y, c5.y, r0.x
mul.f r0.x, c5.x, r0.x
+mul.f r2.w, c0.w, r6.x
+mul.f r2.x, r1.x, r5.x
+add.f r2.y, c4.y, (neg)r6.y
+mul.f r2.z, r1.x, r4.x
+mul.f r1.x, r1.x, r3.x
mad.f32 r1.y, c6.y, r0.y, r1.y
-mul.f r2.x, r1.x, r4.z
-add.f r5.y, c4.y, (neg)r3.x
-mad.f32 r1.y, c7.y, r0.z, r1.y
+mad.f32 r2.x, r2.y, r5.y, r2.x
+add.f r3.w, c4.z, (neg)r6.z
+mad.f32 r4.w, r2.y, r4.y, r2.z
+mad.f32 r1.x, r2.y, r3.y, r1.x
+nop
+mad.f32 r2.z, r3.w, r5.z, r2.x
+mad.f32 r2.y, r3.w, r4.z, r4.w
+mad.f32 r2.x, r3.w, r3.z, r1.x
+mad.f32 r1.x, c7.y, r0.z, r1.y
mad.f32 r0.x, c6.x, r0.y, r0.x
-mul.f r0.y, r1.x, r3.w
-mad.f32 r2.x, r5.y, r4.w, r2.x
-mad.f32 r1.y, c8.y, r0.w, r1.y
+mad.f32 r1.y, c8.y, r0.w, r1.x
mad.f32 r0.x, c7.x, r0.z, r0.x
-mad.f32 r0.y, r5.y, r4.x, r0.y
-mov.f32f32 r0.z, r2.x
-add.f r2.x, c4.z, (neg)r3.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, c8.x, r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, r2.x, r5.x, r0.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, r2.x, r4.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.z, r0.z
-mul.f r0.y, r1.x, r6.x
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, r5.y, r6.y, r0.y
-mul.f r0.y, c0.w, r2.w
-mul.f r0.z, c0.z, r2.w
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r3.x, r0.y
-mad.f32 r0.x, r2.x, r6.z, r0.x
-mad.f32 r0.y, c2.w, r3.y, r0.y
-mad.f32 r0.z, c1.z, r3.x, r0.z
-mad.f32 r0.w, c1.y, r3.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c3.w, r3.z, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c2.y, r3.y, r0.w
-mul.f r2.w, c0.x, r2.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c3.y, r3.z, r0.z
-mad.f32 r2.w, c1.x, r3.x, r2.w
-mov.f32f32 r3.x, c9.x
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c2.x, r3.y, r2.w
-mov.f32f32 r5.w, r3.x
-mad.f32 r0.x, c3.x, r3.z, r0.x
-mov.f32f32 r2.w, r5.x
-mov.f32f32 r3.x, r4.w
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r5.y, r3.x
-mov.f32f32 r5.x, r3.y
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r3.y, r4.x
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r4.x, r3.z
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r6.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r3.w, r2.w
-mov.f32f32 r3.z, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r6.x
+mad.f32 r0.y, c1.w, r6.y, r2.w
+mad.f32 r1.x, c8.x, r0.w, r0.x
+mad.f32 r0.x, c2.w, r6.z, r0.y
+mul.f r0.y, c0.z, r6.x
+mad.f32 r0.w, c3.w, r6.w, r0.x
+mad.f32 r0.x, c1.z, r6.y, r0.y
+mul.f r0.y, c0.y, r6.x
+mad.f32 r0.x, c2.z, r6.z, r0.x
+mad.f32 r0.y, c1.y, r6.y, r0.y
+mad.f32 r0.z, c3.z, r6.w, r0.x
+mad.f32 r0.x, c2.y, r6.z, r0.y
+mul.f r2.w, c0.x, r6.x
+mad.f32 r0.y, c3.y, r6.w, r0.x
+mad.f32 r0.x, c1.x, r6.y, r2.w
+mov.f32f32 r5.w, c9.x
+mad.f32 r0.x, c2.x, r6.z, r0.x
+mov.f32f32 r4.w, c9.x
+mad.f32 r0.x, c3.x, r6.w, r0.x
+mov.f32f32 r3.w, c9.x
mov.f32f32 r2.w, c9.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-nop
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
-; VERT: 89 instructions, 0 half, 7 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
+; VERT: 42 instructions, 0 half, 7 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-16.asm b/reference/xonotic-gl2/xonotic-glx-gl2-16.asm
index be51a42..20eca61 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-16.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-16.asm
@@ -6,143 +6,106 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c7.x) 0xbf000000, 0x40000000, 0xbf800000, 0x00000000
+@const(c8.x) 0x3e800000, 0x00000000, 0x00000000, 0x00000000
+@const(c9.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 0, r0.x
-bary.f r1.x, 3, r0.x
+bary.f r0.w, 3, r0.x
+bary.f r1.x, 0, r0.x
bary.f r1.y, 1, r0.x
mov.f32f32 r1.z, r0.z
mov.f32f32 r1.w, r0.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r3.x, r0.w
-mov.f32f32 r2.y, r1.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r2.w, r1.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r3.y, r0.w
-bary.f r0.w, 4, r0.x
-sam (f32)(xyz)r1.z, r2.x, s#3, t#3
-(sy)mad.f32 r1.x, c7.y, r1.z, c7.z
-mad.f32 r1.z, c7.y, r1.w, c7.z
-sam (f32)(xyz)r3.z, r2.z, s#0, t#0
-(sy)add.f r1.w, r3.z, c7.x
-(ss)mad.f32 r2.x, c7.y, r2.x, c7.z
-mov.f32f32 r1.x, r1.x
-bary.f r2.y, 8, r0.x
-mov.f32f32 r1.w, r1.w
-bary.f r2.z, 12, r0.x
-mul.f r0.w, r1.x, r0.w
-mul.f r2.y, r1.x, r2.y
-mov.f32f32 r1.z, r1.z
-bary.f r2.w, 9, r0.x
-mul.f r3.z, r1.w, r1.w
+mov.f32f32 r2.x, r1.x
+mov.f32f32 r2.y, r1.y
+sam (f32)(xyz)r2.z, r0.z, s#3, t#3
+(sy)(ss)mad.f32 r0.z, c7.y, r2.z, c7.z
+bary.f r0.w, 12, r0.x
+mad.f32 r2.z, c7.y, r2.w, c7.z
+mad.f32 r2.w, c7.y, r3.x, c7.z
+mov.f32f32 r3.x, r0.z
+bary.f r3.y, 8, r0.x
+sam (f32)(xyz)r3.z, r2.x, s#0, t#0
+(sy)(ss)add.f r2.x, r3.z, c7.x
+bary.f r2.y, 4, r0.x
+mul.f r0.z, r0.z, r0.w
+mul.f r0.w, r3.x, r3.y
+mov.f32f32 r3.y, r2.z
+bary.f r3.z, 9, r0.x
+mov.f32f32 r4.y, r2.x
+mul.f r2.y, r3.x, r2.y
+bary.f r3.x, 13, r0.x
+mad.f32 r0.w, r3.y, r3.z, r0.w
+mov.f32f32 r3.z, r2.w
+bary.f r4.z, 10, r0.x
+mul.f r2.x, r2.x, r4.y
add.f r3.w, r3.w, c7.x
-mul.f r1.x, r1.x, r2.z
-mad.f32 r2.y, r1.z, r2.w, r2.y
-bary.f r2.z, 13, r0.x
-mov.f32f32 r2.w, r3.w
-bary.f r3.w, 5, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.x, r2.x
-bary.f r4.y, 10, r0.x
-mad.f32 r3.z, r2.w, r2.w, r3.z
-mad.f32 r1.x, r1.z, r2.z, r1.x
-mad.f32 r0.w, r1.z, r3.w, r0.w
-mad.f32 r1.z, r2.x, r4.y, r2.y
-mov.f32f32 r2.y, r3.z
-add.f r2.z, r4.x, c7.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.z, r1.z
-mov.f32f32 r0.w, r0.w
-bary.f r1.z, 6, r0.x
-mov.f32f32 r2.z, r2.z
+bary.f r4.w, 5, r0.x
+mad.f32 r0.w, r3.z, r4.z, r0.w
+mad.f32 r0.z, r2.z, r3.x, r0.z
+mov.f32f32 r2.z, r3.w
+mad.f32 r2.y, r3.y, r4.w, r2.y
+mov.f32f32 r3.x, r0.w
+bary.f r3.y, 6, r0.x
+mad.f32 r2.x, r3.w, r2.z, r2.x
+add.f r3.w, r4.x, c7.x
bary.f (ei)r0.x, 14, r0.x
-sam (f32)(xyzw)r3.w, r3.x, s#1, t#1
-(sy)mul.f r0.y, r4.z, c6.x
-mad.f32 r0.w, r2.x, r1.z, r0.w
-mad.f32 r2.y, r2.z, r2.z, r2.y
-mad.f32 r0.x, r2.x, r0.x, r1.x
-mov.f32f32 r0.y, r0.y
-mul.f r1.x, r0.w, r0.w
-mov.f32f32 r1.z, r0.z
-mad.f32 r0.z, r3.z, r3.z, r1.x
-mov.f32f32 r0.x, r0.x
-rsq r1.x, r2.y
+mad.f32 r0.y, r3.z, r3.y, r2.y
+sam (f32)(xyz)r4.z, r1.z, s#2, t#2
+sam (f32)(xyzw)r5.y, r1.x, s#1, t#1
+(sy)(ss)mul.f r1.w, r6.x, c6.x
+mov.f32f32 r1.x, r3.w
+mad.f32 r0.x, r2.w, r0.x, r0.z
+mul.f r0.z, r0.y, r0.y
nop
-(ss)mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
+mad.f32 r0.z, r0.w, r3.x, r0.z
+mov.f32f32 r0.w, r0.x
+mad.f32 r1.x, r1.x, r1.x, r2.x
(rpt1)nop
-mad.f32 r0.z, r0.x, r0.x, r0.z
-mul.f r1.w, r1.w, r1.x
-mul.f r2.x, r2.w, r1.x
-mul.f r1.x, r2.z, r1.x
+mov.f32f32 r0.w, r0.w
(rpt2)nop
+mad.f32 r0.z, r0.w, r0.w, r0.z
+rsq r0.w, r1.x
+(ss)mov.f32f32 r1.x, r0.w
+(rpt3)nop
+mul.f r1.y, r4.y, r1.x
rsq r0.z, r0.z
-(ss)mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.y, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.x, r1.x
-mul.f r0.w, r0.w, r0.z
+(ss)mov.f32f32 r1.z, r0.z
mul.f r0.x, r0.x, r0.z
-mul.f r0.z, r3.z, r0.z
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.z, r0.z
-sam (f32)(xyz)r2.z, r1.y, s#2, t#2
+(ss)mul.f r0.z, r2.z, r1.x
+mul.f r0.w, r3.w, r0.w
+mul.f r0.y, r0.y, r1.z
+mov.f32f32 r1.x, r0.x
+mul.f r1.z, r3.x, r1.z
nop
-mul.f r0.y, r2.y, r0.y
-max.f r0.w, c8.x, r0.x
-mad.f32 r0.y, r2.x, r0.z, r0.y
-(rpt2)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.w
-mad.f32 r0.x, r1.x, r0.x, r0.y
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+mul.f r0.y, r1.y, r0.y
+max.f r1.x, c8.x, r1.x
+mad.f32 r0.y, r0.z, r1.z, r0.y
+nop
+mad.f32 r0.x, r0.w, r0.x, r0.y
(rpt2)nop
max.f r0.x, r0.x, c7.w
-rcp r0.y, r0.z
-(ss)mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-(sy)(ss)mul.f r0.z, r3.x, r0.y
-mul.f r0.w, r2.w, r0.y
-mul.f r0.y, r2.z, r0.y
-mul.f r1.x, c5.z, r0.x
-mul.f r1.y, c5.y, r0.x
+rcp r0.y, r1.x
+(ss)mov.f32f32 r0.z, r0.y
+mul.f r0.y, r4.z, r0.y
+nop
+mov.f32f32 r0.w, r0.x
mul.f r0.x, c5.x, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.z, r1.x, r0.z, c4.z
-mov.f32f32 r0.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.w, r1.y, r0.w, c4.y
+(ss)mul.f r1.x, r5.x, r0.z
+mul.f r0.z, r4.w, r0.z
+mul.f r1.y, c5.z, r0.w
+mul.f r0.w, c5.y, r0.w
mad.f32 r0.x, r0.x, r0.y, c4.x
nop
-mul.f r0.y, r4.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mul.f r0.y, r4.x, r0.z
-mul.f r0.x, r3.w, r0.x
+mad.f32 r0.y, r1.y, r1.x, c4.z
+mad.f32 r0.z, r0.w, r0.z, c4.y
(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+mul.f r1.z, r5.w, r0.y
+mul.f r1.y, r5.z, r0.z
+mul.f r1.x, r5.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.y (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1)
-; FRAG: 148 instructions, 0 half, 5 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1)
+; FRAG: 103 instructions, 0 half, 7 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-17.asm b/reference/xonotic-gl2/xonotic-glx-gl2-17.asm
index 7603f89..d468c8e 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-17.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-17.asm
@@ -1,24 +1,24 @@
; options:
; VERT: new compiler
-@in(r1.z) in0
-@in(r1.w) in1
-@in(r2.x) in2
-@in(r2.y) in3
+@in(r4.w) in0
+@in(r5.x) in1
+@in(r5.y) in2
+@in(r5.z) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@in(r0.w) in7
-@in(r5.x) in8
-@in(r5.y) in9
-@in(r5.z) in10
-@in(r2.z) in12
-@in(r2.w) in13
-@in(r3.x) in14
-@in(r3.y) in16
-@in(r3.z) in17
-@in(r3.w) in18
-@in(r5.w) in20
-@in(r6.x) in21
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r3.x) in12
+@in(r3.y) in13
+@in(r3.z) in14
+@in(r4.x) in16
+@in(r4.y) in17
+@in(r4.z) in18
+@in(r1.z) in20
+@in(r1.w) in21
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -39,75 +39,36 @@
@out(r4.y) out17
@out(r4.z) out18
@out(r4.w) out19
+@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r1.x, c4.y, r0.x
mul.f r0.x, c4.x, r0.x
mad.f32 r1.x, c5.y, r0.y, r1.x
mad.f32 r0.x, c5.x, r0.y, r0.x
mad.f32 r0.y, c6.y, r0.z, r1.x
mad.f32 r0.x, c6.x, r0.z, r0.x
-mad.f32 r0.y, c7.y, r0.w, r0.y
-mad.f32 r0.x, c7.x, r0.w, r0.x
-mul.f r0.z, c0.w, r1.z
-mul.f r0.w, c0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c1.w, r1.w, r0.z
-mad.f32 r0.w, c1.z, r1.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c2.w, r2.x, r0.z
-mad.f32 r0.w, c2.z, r2.x, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, c3.w, r2.y, r0.z
-mad.f32 r0.y, c3.z, r2.y, r0.w
-mul.f r4.x, c0.y, r1.z
-mul.f r1.z, c0.x, r1.z
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.x, c1.y, r1.w, r4.x
-mad.f32 r0.y, c1.x, r1.w, r1.z
-mad.f32 r0.x, c2.y, r2.x, r0.x
-mad.f32 r0.y, c2.x, r2.x, r0.y
-mad.f32 r0.x, c3.y, r2.y, r0.x
-mad.f32 r1.z, c3.x, r2.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
-mov.f32f32 r1.z, c8.x
-mov.f32f32 r1.w, r3.w
-mov.f32f32 r2.x, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r4.w, r1.z
-mov.f32f32 r4.z, r1.w
-mov.f32f32 r4.y, r2.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r1.z, c8.x
-mov.f32f32 r1.w, r3.x
-mov.f32f32 r2.x, r2.w
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r2.x
-mov.f32f32 r3.x, r2.y
-mov.f32f32 r1.z, c8.x
-mov.f32f32 r1.w, r5.z
-mov.f32f32 r2.x, r5.y
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r2.y, r2.x
-mov.f32f32 r2.x, r5.x
-mov.f32f32 r1.z, r6.x
-mov.f32f32 r5.x, r5.w
-(rpt1)nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r1.z, r5.x
+mad.f32 r1.y, c7.y, r0.w, r0.y
+mad.f32 r1.x, c7.x, r0.w, r0.x
+mul.f r0.x, c0.w, r4.w
+mul.f r0.y, c0.z, r4.w
+mad.f32 r0.x, c1.w, r5.x, r0.x
+mad.f32 r0.y, c1.z, r5.x, r0.y
+mad.f32 r0.x, c2.w, r5.y, r0.x
+mad.f32 r0.y, c2.z, r5.y, r0.y
+mad.f32 r0.w, c3.w, r5.z, r0.x
+mad.f32 r0.z, c3.z, r5.z, r0.y
+mul.f r0.x, c0.y, r4.w
+mul.f r0.y, c0.x, r4.w
+mad.f32 r0.x, c1.y, r5.x, r0.x
+mad.f32 r0.y, c1.x, r5.x, r0.y
+mad.f32 r0.x, c2.y, r5.y, r0.x
+mad.f32 r2.w, c2.x, r5.y, r0.y
+mad.f32 r0.y, c3.y, r5.z, r0.x
+mad.f32 r0.x, c3.x, r5.z, r2.w
+mov.f32f32 r4.w, c8.x
+mov.f32f32 r3.w, c8.x
+mov.f32f32 r2.w, c8.x
end
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23)
-; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r5.x (0:0,cm=7,il=16,b=0) r2.z (0:0,cm=7,il=20,b=0) r3.y (0:0,cm=7,il=24,b=0) r5.w (0:0,cm=3,il=28,b=0)
-; VERT: 71 instructions, 0 half, 7 full
+; VERT: inputs: r4.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=7,il=16,b=0) r3.x (0:0,cm=7,il=20,b=0) r4.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
+; VERT: 28 instructions, 0 half, 6 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-18.asm b/reference/xonotic-gl2/xonotic-glx-gl2-18.asm
index 3846190..faa1d33 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-18.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-18.asm
@@ -6,215 +6,150 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c10.x) 0x3f000000, 0xbf000000, 0x40000000, 0xbf800000
+@const(c11.x) 0x00000000, 0x3f800000, 0x3e800000, 0x00000000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 2, r0.x
-bary.f r0.w, 0, r0.x
-bary.f r1.x, 4, r0.x
-bary.f r1.y, 3, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r2.y, r0.w
-mul.f r2.z, r1.x, r1.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r3.y, r1.w
-bary.f r3.w, 1, r0.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r3.x, r1.z
-bary.f r1.z, 5, r0.x
-mov.f32f32 r1.w, r3.w
-mov.f32f32 r2.y, r3.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mad.f32 r2.z, r1.z, r1.z, r2.z
-sam (f32)(xyz)r4.z, r2.w, s#4, t#4
-(sy)(ss)mad.f32 r2.w, c10.z, r4.z, c10.w
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r4.y, r2.y
-mov.f32f32 r1.w, r2.z
-mov.f32f32 r2.y, r2.w
-bary.f r2.z, 12, r0.x
-bary.f r2.w, 16, r0.x
-bary.f r3.x, 8, r0.x
-sam (f32)(xyz)r5.y, r3.y, s#0, t#0
-(sy)(ss)add.f r3.y, r5.y, c10.y
-mul.f r2.z, r2.y, r2.z
-mad.f32 r3.z, c10.z, r4.w, c10.w
-mul.f r2.w, r2.y, r2.w
-mov.f32f32 r3.y, r3.y
-mul.f r2.y, r2.y, r3.x
-mov.f32f32 r3.x, r3.z
-bary.f r3.z, 13, r0.x
-mul.f r4.z, r3.y, r3.y
-add.f r4.w, r5.z, c10.y
-bary.f r5.y, 17, r0.x
-mad.f32 r2.z, r3.x, r3.z, r2.z
-bary.f r3.z, 9, r0.x
-mov.f32f32 r4.w, r4.w
-mad.f32 r2.w, r3.x, r5.y, r2.w
-mov.f32f32 r2.z, r2.z
-mad.f32 r5.x, c10.z, r5.x, c10.w
-mad.f32 r4.z, r4.w, r4.w, r4.z
-mov.f32f32 r2.w, r2.w
-mad.f32 r2.y, r3.x, r3.z, r2.y
-mov.f32f32 r3.x, r5.x
-bary.f r3.z, 14, r0.x
-mov.f32f32 r4.z, r4.z
-add.f r5.x, r5.w, c10.y
-bary.f r5.y, 18, r0.x
-mad.f32 r2.z, r3.x, r3.z, r2.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.z, r5.x
-bary.f r5.x, 10, r0.x
-mov.f32f32 r2.z, r2.z
-mad.f32 r2.w, r3.x, r5.y, r2.w
-mad.f32 r4.z, r3.z, r3.z, r4.z
-mad.f32 r2.y, r3.x, r5.x, r2.y
+bary.f r0.w, 3, r0.x
+bary.f r1.x, 0, r0.x
+bary.f r1.z, 4, r0.x
+mov.f32f32 r2.y, r0.z
+mov.f32f32 r2.z, r0.w
+mov.f32f32 r2.w, r1.x
+bary.f r1.y, 1, r0.x
+sam (f32)(xyz)r3.y, r0.z, s#4, t#4
+(sy)(ss)mad.f32 r0.z, c10.z, r3.y, c10.w
+mov.f32f32 r4.x, r1.x
+bary.f r0.w, 16, r0.x
+mul.f r1.w, r1.z, r1.z
+mov.f32f32 r3.y, r0.z
+bary.f r4.z, 12, r0.x
+mov.f32f32 r4.y, r1.y
+mov.f32f32 r3.x, r1.y
+bary.f r4.w, 8, r0.x
+mul.f r4.z, r3.y, r4.z
+mad.f32 r3.z, c10.z, r3.z, c10.w
+mul.f r0.z, r0.z, r0.w
+mul.f r0.w, r3.y, r4.w
+bary.f r3.y, 17, r0.x
+mov.f32f32 r4.w, r3.z
+bary.f r5.x, 13, r0.x
+sam (f32)(xyz)r5.y, r4.x, s#0, t#0
+(sy)(ss)add.f r4.x, r5.y, c10.y
+sam (f32)(xyzw)r6.x, r2.w, s#1, t#1
+(ss)bary.f r2.w, 9, r0.x
+(sy)cmps.f.lt r3.x, r6.w, c10.x
+mad.f32 r4.y, r4.w, r5.x, r4.z
+mad.f32 r3.w, c10.z, r3.w, c10.w
+mov.f32f32 r4.z, r4.x
+cov.u32f32 r3.x, r3.x
+mad.f32 r0.w, r4.w, r2.w, r0.w
+mov.f32f32 r2.w, r3.w
+bary.f r4.w, 14, r0.x
+mul.f r4.x, r4.x, r4.z
+add.f r5.x, r5.z, c10.y
+mov.f32f32 r5.y, (0.000000)
+mad.f32 r4.y, r2.w, r4.w, r4.y
+bary.f r4.w, 10, r0.x
+mad.f32 r0.z, r3.z, r3.y, r0.z
+bary.f r3.y, 5, r0.x
+mov.f32f32 r3.z, r4.y
+mad.f32 r0.w, r2.w, r4.w, r0.w
+mov.f32f32 r2.w, r5.x
+cmps.f.ne p0.x, r3.x, r5.y
+bary.f r3.x, 18, r0.x
+mul.f r4.w, r0.w, r0.w
+mad.f32 r4.x, r5.x, r2.w, r4.x
+mad.f32 r4.y, r4.y, r3.z, r4.w
+mad.f32 r0.z, r3.w, r3.x, r0.z
+add.f r3.x, r5.w, c10.y
bary.f (ei)r0.x, 6, r0.x
-sam (f32)(xyzw)r5.x, r4.x, s#1, t#1
-mov.f32f32 r0.y, r2.w
-(sy)cmps.f.lt r2.w, r5.w, c10.x
-mul.f r3.x, r2.y, r2.y
-mad.f32 r1.w, r0.x, r0.x, r1.w
-mad.f32 r3.x, r2.z, r2.z, r3.x
-(ss)rsq r4.x, r4.z
-(ss)mov.f32f32 r4.x, r4.x
+kill p0.x
+mov.f32f32 r0.y, r0.z
+mov.f32f32 r3.w, r3.x
+mad.f32 r4.w, r3.y, r3.y, r1.w
+mul.f r1.w, r6.w, c9.x
mov.f32f32 r0.y, r0.y
-cov.u32f32 r2.w, r2.w
-mov.f32f32 r3.x, r3.x
-mul.f r3.y, r3.y, r4.x
-mad.f32 r3.x, r0.y, r0.y, r3.x
-mul.f r4.y, r4.w, r4.x
-mul.f r3.z, r3.z, r4.x
-(rpt3)nop
-rsq r3.x, r3.x
-(ss)mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, r4.y
+mad.f32 r3.w, r3.w, r3.w, r4.x
+mad.f32 r4.x, r0.x, r0.x, r4.w
+mul.f r4.w, r6.z, c5.z
+mad.f32 r0.y, r0.y, r0.y, r4.y
+mul.f r4.y, r6.z, c6.z
+mul.f r5.x, r6.y, c6.y
+mul.f r5.y, r6.x, c6.x
+rsq r3.w, r3.w
+(ss)mov.f32f32 r5.z, r3.w
+mul.f r3.x, r3.x, r3.w
+(ss)rsq r3.w, r4.x
+(ss)mov.f32f32 r4.x, r3.w
+rsq r0.y, r0.y
+(ss)mov.f32f32 r5.w, r0.y
+(ss)mul.f r0.y, r0.z, r0.y
+mul.f r0.z, r4.z, r5.z
+mul.f r2.w, r2.w, r5.z
+mul.f r0.w, r0.w, r5.w
+mov.f32f32 r4.z, r0.y
+mul.f r3.z, r3.z, r5.w
+mov.f32f32 r5.z, r0.z
+mul.f r0.z, r0.z, r0.w
+mov.f32f32 r0.w, r0.w
+mad.f32 r0.z, r2.w, r3.z, r0.z
+max.f r5.w, c11.z, r4.z
+mad.f32 r0.z, r3.x, r4.z, r0.z
+mul.f r6.z, r5.z, r0.w
mov.f32f32 r3.z, r3.z
-mul.f r2.y, r2.y, r3.x
-mul.f r0.y, r0.y, r3.x
-mul.f r2.z, r2.z, r3.x
-mov.f32f32 r3.x, (0.000000)
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-cmps.f.ne p0.x, r2.w, r3.x
-mul.f r2.w, r3.y, r2.y
-mul.f r3.x, r3.y, r2.y
-mad.f32 r2.w, r4.x, r2.z, r2.w
-mad.f32 r3.x, r4.x, r2.z, r3.x
-max.f r4.y, c11.z, r0.y
-rsq r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
mov.f32f32 r2.w, r2.w
+mul.f r5.z, r0.z, r5.z
mov.f32f32 r3.x, r3.x
-mad.f32 r2.w, r3.z, r0.y, r2.w
-mad.f32 r3.x, r3.z, r0.y, r3.x
-mov.f32f32 r4.y, r4.y
-kill p0.x
-mul.f r3.y, r2.w, r3.y
-mul.f r4.x, r2.w, r4.x
-mul.f r2.w, r2.w, r3.z
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r4.x
-mov.f32f32 r2.w, r2.w
-max.f r3.x, r3.x, c11.x
-mul.f r3.y, c10.z, r3.y
-mul.f r3.z, c10.z, r3.z
+rcp r5.w, r5.w
+(ss)mov.f32f32 r6.w, r5.w
+mad.f32 r6.z, r2.w, r3.z, r6.z
+mul.f r5.z, c10.z, r5.z
+mad.f32 r4.z, r3.x, r4.z, r6.z
+mul.f r2.w, r0.z, r2.w
+mul.f r0.z, r0.z, r3.x
+add.f r0.w, r0.w, (neg)r5.z
+mul.f r1.z, r1.z, r4.x
+max.f r3.x, r4.z, c11.x
mul.f r2.w, c10.z, r2.w
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
-mul.f r4.x, r5.z, c6.z
-add.f r2.y, r2.y, (neg)r3.y
-add.f r2.z, r2.z, (neg)r3.z
-add.f r0.y, r0.y, (neg)r2.w
-mul.f r2.w, r4.x, r3.x
-mov.f32f32 r2.y, r2.y
-mul.f r1.x, r1.x, r1.w
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.x, r1.x
-mul.f r3.y, r5.y, c6.y
-mul.f r3.z, r5.x, c6.x
-rcp r4.x, r4.y
-(ss)mov.f32f32 r4.x, r4.x
-mul.f r1.x, r2.y, r1.x
-mul.f r1.z, r1.z, r1.w
-mul.f r2.y, r3.y, r3.x
-mul.f r3.x, r3.z, r3.x
-nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r0.z
-mad.f32 r0.z, r2.z, r1.z, r1.x
-mov.f32f32 r1.x, r1.y
-mul.f r0.x, r0.x, r1.w
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r1.x
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r5.w, c9.x
-mul.f r1.y, r5.z, c5.z
-mul.f r1.z, r5.y, c5.y
-mad.f32 r0.x, r0.y, r0.x, r0.z
-mov.f32f32 r0.y, r1.x
-nop
-sam (f32)(xyz)r5.y, r3.y, s#3, t#3
-(sy)mul.f r0.z, r5.w, r4.x
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r5.z, r4.x
-mul.f r1.w, r5.y, r4.x
-nop
+mul.f r0.z, c10.z, r0.z
+mul.f r0.w, r0.w, r1.z
+mov.f32f32 r1.z, r3.x
+add.f r2.w, r3.z, (neg)r2.w
+mul.f r3.y, r3.y, r4.x
+mul.f r3.x, r5.y, r3.x
+add.f r0.y, r0.y, (neg)r0.z
+sam (f32)(xyz)r7.x, r2.y, s#3, t#3
+mul.f r0.z, r4.y, r1.z
+mad.f32 r0.w, r2.w, r3.y, r0.w
+mul.f r0.x, r0.x, r3.w
+mul.f r1.z, r5.x, r1.z
+(sy)(ss)mul.f r2.y, r7.z, r6.w
+mul.f r2.z, r7.y, r6.w
+mad.f32 r0.x, r0.y, r0.x, r0.w
+mul.f r0.y, r6.y, c5.y
+mul.f r0.w, r6.x, c5.x
+mul.f r2.w, r7.x, r5.w
max.f r0.x, (neg)r0.x, c11.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r1.y, r1.z
-mul.f r1.z, r5.x, c5.x
-(ss)mov.f32f32 r3.y, r0.w
-mov.f32f32 r0.w, r3.w
-log2 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r3.z, r0.w
-mov.f32f32 r0.w, r1.z
-(rpt4)nop
-sam (f32)(xyzw)r3.y, r3.y, s#2, t#2
-(sy)mad.f32 r1.z, c8.x, r4.x, c11.y
-mul.f r3.w, r3.w, c7.z
-(ss)mul.f r3.z, r3.z, c7.y
+sam (f32)(xyzw)r3.y, r1.x, s#2, t#2
+(sy)(ss)mul.f r1.x, r3.w, c7.z
+mul.f r1.y, r3.z, c7.y
+mad.f32 r3.z, c8.x, r4.x, c11.y
mul.f r3.y, r3.y, c7.x
-mov.f32f32 r1.z, r1.z
-(rpt2)nop
-(ss)mul.f r0.x, r1.z, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(rpt1)nop
+log2 r0.x, r0.x
+(ss)mul.f r0.x, r3.z, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mad.f32 r1.z, r3.w, r0.x, r2.w
-mad.f32 r2.y, r3.z, r0.x, r2.y
+(ss)mad.f32 r0.z, r1.x, r0.x, r0.z
+mad.f32 r1.x, r1.y, r0.x, r1.z
(ss)mad.f32 r0.x, r3.y, r0.x, r3.x
nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
-nop
-mad.f32 r0.y, r1.z, r0.z, r0.y
-mad.f32 r0.z, r2.y, r1.x, r1.y
-mad.f32 r0.x, r0.x, r2.z, r0.w
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.z, r0.z, r2.y, r4.w
+mad.f32 r1.y, r1.x, r2.z, r0.y
+mad.f32 r1.x, r0.x, r2.w, r0.w
end
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
-; FRAG: 226 instructions, 0 half, 6 full
+; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1)
+; FRAG: 146 instructions, 0 half, 8 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-19.asm b/reference/xonotic-gl2/xonotic-glx-gl2-19.asm
index 9a1b31d..26f774e 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-19.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-19.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
+@in(r6.x) in0
+@in(r6.y) in1
+@in(r6.z) in2
+@in(r6.w) in3
@in(r0.x) in4
@in(r0.y) in5
@in(r0.z) in6
@in(r0.w) in7
-@in(r6.x) in8
-@in(r6.y) in9
-@in(r6.z) in10
-@in(r3.w) in12
-@in(r4.x) in13
-@in(r4.y) in14
-@in(r4.z) in16
-@in(r4.w) in17
-@in(r5.x) in18
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r4.x) in12
+@in(r4.y) in13
+@in(r4.z) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@in(r1.z) in20
@in(r1.w) in21
@out(r0.x) out0
@@ -43,99 +43,53 @@
@out(r5.y) out21
@out(r5.z) out22
@out(r5.w) out23
-(sy)(ss)add.f r1.x, c4.x, (neg)r2.w
+@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r1.x, c4.x, (neg)r6.x
mul.f r1.y, c5.y, r0.x
mul.f r0.x, c5.x, r0.x
+mul.f r2.w, c0.w, r6.x
+mul.f r2.x, r1.x, r5.x
+add.f r2.y, c4.y, (neg)r6.y
+mul.f r2.z, r1.x, r4.x
+mul.f r1.x, r1.x, r3.x
mad.f32 r1.y, c6.y, r0.y, r1.y
-mul.f r2.x, r1.x, r4.z
-add.f r5.y, c4.y, (neg)r3.x
-mad.f32 r1.y, c7.y, r0.z, r1.y
+mad.f32 r2.x, r2.y, r5.y, r2.x
+add.f r3.w, c4.z, (neg)r6.z
+mad.f32 r4.w, r2.y, r4.y, r2.z
+mad.f32 r1.x, r2.y, r3.y, r1.x
+nop
+mad.f32 r2.z, r3.w, r5.z, r2.x
+mad.f32 r2.y, r3.w, r4.z, r4.w
+mad.f32 r2.x, r3.w, r3.z, r1.x
+mad.f32 r1.x, c7.y, r0.z, r1.y
mad.f32 r0.x, c6.x, r0.y, r0.x
-mul.f r0.y, r1.x, r3.w
-mad.f32 r2.x, r5.y, r4.w, r2.x
-mad.f32 r1.y, c8.y, r0.w, r1.y
+mad.f32 r1.y, c8.y, r0.w, r1.x
mad.f32 r0.x, c7.x, r0.z, r0.x
-mad.f32 r0.y, r5.y, r4.x, r0.y
-mov.f32f32 r0.z, r2.x
-add.f r2.x, c4.z, (neg)r3.y
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.x, c8.x, r0.w, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, r2.x, r5.x, r0.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, r2.x, r4.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.z, r0.z
-mul.f r0.y, r1.x, r6.x
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, r5.y, r6.y, r0.y
-mul.f r0.y, c0.w, r2.w
-mul.f r0.z, c0.z, r2.w
-mul.f r0.w, c0.y, r2.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r3.x, r0.y
-mad.f32 r0.x, r2.x, r6.z, r0.x
-mad.f32 r0.y, c2.w, r3.y, r0.y
-mad.f32 r0.z, c1.z, r3.x, r0.z
-mad.f32 r0.w, c1.y, r3.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c3.w, r3.z, r0.y
-mad.f32 r0.y, c2.z, r3.y, r0.z
-mad.f32 r0.z, c2.y, r3.y, r0.w
-mul.f r2.w, c0.x, r2.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c3.y, r3.z, r0.z
-mad.f32 r2.w, c1.x, r3.x, r2.w
-mov.f32f32 r3.x, c9.x
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c2.x, r3.y, r2.w
-mov.f32f32 r5.w, r3.x
-mad.f32 r0.x, c3.x, r3.z, r0.x
-mov.f32f32 r2.w, r5.x
-mov.f32f32 r3.x, r4.w
-mov.f32f32 r3.y, r4.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r5.z, r2.w
-mov.f32f32 r5.y, r3.x
-mov.f32f32 r5.x, r3.y
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r4.y
-mov.f32f32 r3.y, r4.x
-mov.f32f32 r3.z, r3.w
-mov.f32f32 r4.w, r2.w
-mov.f32f32 r4.z, r3.x
-mov.f32f32 r4.y, r3.y
-mov.f32f32 r4.x, r3.z
-mov.f32f32 r2.w, c9.x
-mov.f32f32 r3.x, r6.z
-mov.f32f32 r3.y, r6.y
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r3.w, r2.w
-mov.f32f32 r3.z, r3.x
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.x, r6.x
+mad.f32 r0.y, c1.w, r6.y, r2.w
+mad.f32 r1.x, c8.x, r0.w, r0.x
+mad.f32 r0.x, c2.w, r6.z, r0.y
+mul.f r0.y, c0.z, r6.x
+mad.f32 r0.w, c3.w, r6.w, r0.x
+mad.f32 r0.x, c1.z, r6.y, r0.y
+mul.f r0.y, c0.y, r6.x
+mad.f32 r0.x, c2.z, r6.z, r0.x
+mad.f32 r0.y, c1.y, r6.y, r0.y
+mad.f32 r0.z, c3.z, r6.w, r0.x
+mad.f32 r0.x, c2.y, r6.z, r0.y
+mul.f r2.w, c0.x, r6.x
+mad.f32 r0.y, c3.y, r6.w, r0.x
+mad.f32 r0.x, c1.x, r6.y, r2.w
+mov.f32f32 r5.w, c9.x
+mad.f32 r0.x, c2.x, r6.z, r0.x
+mov.f32f32 r4.w, c9.x
+mad.f32 r0.x, c3.x, r6.w, r0.x
+mov.f32f32 r3.w, c9.x
mov.f32f32 r2.w, c9.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
-nop
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r1.z
end
nop
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r6.x (0:0,cm=7,il=16,b=0) r3.w (0:0,cm=7,il=20,b=0) r4.z (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
-; VERT: 89 instructions, 0 half, 7 full
+; VERT: inputs: r6.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0) r1.z (0:0,cm=3,il=28,b=0)
+; VERT: 42 instructions, 0 half, 7 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-20.asm b/reference/xonotic-gl2/xonotic-glx-gl2-20.asm
index 3c50540..3c90a57 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-20.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-20.asm
@@ -6,75 +6,44 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.w, r0.z
+mov.f32f32 r0.x, r0.z
+mov.f32f32 r0.y, r0.w
mov.f32f32 r1.x, r0.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r2.y, r1.x
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r2.x, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r0.y
-nop
-sam (f32)(xyz)r0.w, r1.y, s#2, t#2
-(sy)mul.f r0.y, r1.y, c4.z
-mul.f r1.x, r1.x, c4.y
-mul.f r0.w, r0.w, c4.x
-(ss)nop
-sam (f32)(xyzw)r1.y, r1.w, s#0, t#0
-(sy)(ss)mul.f r2.x, r2.x, c8.x
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyz)r2.y, r2.y, s#3, t#3
-mov.f32f32 r1.x, r1.x
-(sy)mad.f32 r0.y, c5.z, r2.w, r0.y
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c5.y, r2.z, r1.x
-mad.f32 r0.w, c5.x, r2.y, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-add.f r0.y, r1.w, r0.y
-mov.f32f32 r1.w, r2.x
-add.f r1.x, r1.z, r1.x
-add.f r1.y, r1.y, r0.w
-mul.f r0.y, r0.y, c6.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r0.w, r0.x
-nop
-mov.f32f32 r0.x, r0.y
-mul.f r0.y, r1.x, c6.y
-mul.f r1.x, r1.y, c6.x
+mov.f32f32 r1.y, r0.w
+mov.f32f32 r2.x, r0.z
+mov.f32f32 r2.y, r0.w
+sam (f32)(xyz)r2.z, r0.z, s#1, t#1
(rpt1)nop
(ss)nop
-sam (f32)(xyz)r2.x, r0.z, s#1, t#1
-(sy)mad.f32 r0.x, c7.z, r2.z, r0.x
-mov.f32f32 r0.y, r0.y
-(ss)mov.f32f32 r0.z, r1.x
-mad.f32 r0.y, c7.y, r2.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c7.x, r2.x, r0.z
+sam (f32)(xyz)r0.x, r0.x, s#2, t#2
+(sy)mul.f r0.z, r0.z, c4.z
+(ss)mul.f r0.y, r0.y, c4.y
+mul.f r0.x, r0.x, c4.x
+sam (f32)(xyzw)r0.w, r1.x, s#0, t#0
+(sy)mul.f r1.w, r1.z, c8.x
+sam (f32)(xyz)r3.y, r2.x, s#3, t#3
+(sy)mad.f32 r0.z, c5.z, r3.w, r0.z
+mad.f32 r0.y, c5.y, r3.z, r0.y
+mad.f32 r0.x, c5.x, r3.y, r0.x
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r0.x, r0.z
+add.f r0.z, r1.y, r0.z
+add.f r0.y, r1.x, r0.y
+add.f r0.x, r0.w, r0.x
nop
-mov.f32f32 r1.y, r0.y
+mul.f r0.z, r0.z, c6.z
+mul.f r0.y, r0.y, c6.y
+mad.f32 r1.z, c7.z, r3.x, r0.z
+(ss)mad.f32 r1.y, c7.y, r2.w, r0.y
+mul.f r0.x, r0.x, c6.x
nop
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.x, c7.x, r2.z, r0.x
end
nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 68 instructions, 0 half, 3 full
+; FRAG: 37 instructions, 0 half, 4 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-21.asm b/reference/xonotic-gl2/xonotic-glx-gl2-21.asm
index 25893e0..b0eaf15 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-21.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-21.asm
@@ -16,51 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r1.x, c4.y, r0.x
mul.f r0.x, c4.x, r0.x
mad.f32 r1.x, c5.y, r0.y, r1.x
mad.f32 r0.x, c5.x, r0.y, r0.x
mad.f32 r0.y, c6.y, r0.z, r1.x
mad.f32 r0.x, c6.x, r0.z, r0.x
-mad.f32 r0.y, c7.y, r0.w, r0.y
-mad.f32 r0.x, c7.x, r0.w, r0.x
-mul.f r0.z, c0.w, r1.z
-mul.f r0.w, c0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c1.w, r1.w, r0.z
-mad.f32 r0.w, c1.z, r1.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, c2.w, r2.x, r0.z
-mad.f32 r0.y, c2.z, r2.x, r0.w
-mad.f32 r0.x, c3.w, r2.y, r0.x
-mad.f32 r0.y, c3.z, r2.y, r0.y
-mul.f r2.z, c0.y, r1.z
-mul.f r1.z, c0.x, r1.z
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.x, c1.y, r1.w, r2.z
-mad.f32 r0.y, c1.x, r1.w, r1.z
+mad.f32 r1.y, c7.y, r0.w, r0.y
+mad.f32 r1.x, c7.x, r0.w, r0.x
+mul.f r0.x, c0.w, r1.z
+mul.f r0.y, c0.z, r1.z
+mad.f32 r0.x, c1.w, r1.w, r0.x
+mad.f32 r0.y, c1.z, r1.w, r0.y
+mad.f32 r0.x, c2.w, r2.x, r0.x
+mad.f32 r0.y, c2.z, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.y, r0.x
+mad.f32 r0.z, c3.z, r2.y, r0.y
+mul.f r0.x, c0.y, r1.z
+mul.f r0.y, c0.x, r1.z
+mad.f32 r0.x, c1.y, r1.w, r0.x
+mad.f32 r0.y, c1.x, r1.w, r0.y
mad.f32 r0.x, c2.y, r2.x, r0.x
-mad.f32 r0.y, c2.x, r2.x, r0.y
-mad.f32 r0.x, c3.y, r2.y, r0.x
-mad.f32 r1.z, c3.x, r2.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
+mad.f32 r1.z, c2.x, r2.x, r0.y
+mad.f32 r0.y, c3.y, r2.y, r0.x
+mad.f32 r0.x, c3.x, r2.y, r1.z
+mov.f32f32 r1.w, c8.x
mov.f32f32 r1.z, c8.x
-mov.f32f32 r2.x, c8.x
-(rpt1)nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r1.z, r2.x
end
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 45 instructions, 0 half, 3 full
+; VERT: 27 instructions, 0 half, 3 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-22.asm b/reference/xonotic-gl2/xonotic-glx-gl2-22.asm
index 8ac7ce0..5030726 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-22.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-22.asm
@@ -6,271 +6,193 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c21.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000
+@const(c22.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
bary.f r1.x, 20, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.y, 4, r0.x
+bary.f r0.w, 1, r0.x
mov.f32f32 r1.z, r0.z
-mul.f r1.w, r0.w, r0.w
-bary.f r2.x, 5, r0.x
-mul.f r2.y, r1.x, r1.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mad.f32 r1.w, r2.x, r2.x, r1.w
-bary.f r3.x, 21, r0.x
+mul.f r2.x, r1.x, r1.x
+bary.f r2.y, 21, r0.x
+mov.f32f32 r1.w, r0.w
+mul.f r2.z, r1.y, r1.y
+bary.f r2.w, 5, r0.x
+mad.f32 r2.x, r2.y, r2.y, r2.x
+bary.f r3.x, 22, r0.x
mov.f32f32 r3.y, r0.z
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r1.w
-bary.f r1.w, 6, r0.x
-mad.f32 r2.y, r3.x, r3.x, r2.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r1.y
-mov.f32f32 r3.w, r0.z
-sam (f32)(xyz)r4.x, r2.z, s#0, t#0
-(sy)(ss)add.f r2.z, r4.x, c21.x
-mad.f32 r1.z, r1.w, r1.w, r1.z
-mov.f32f32 r2.y, r2.y
-bary.f r2.w, 22, r0.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r3.w, r3.w
-mad.f32 r2.y, r2.w, r2.w, r2.y
-mul.f r4.x, r2.z, r2.z
+mov.f32f32 r3.z, r0.w
+sam (f32)(xyz)r3.w, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, r3.w, c21.x
+mad.f32 r1.w, r3.x, r3.x, r2.x
+mad.f32 r2.x, r2.w, r2.w, r2.z
+bary.f r2.z, 6, r0.x
+mov.f32f32 r3.w, r1.z
+add.f r4.x, r4.x, c21.x
add.f r4.y, r4.y, c21.x
+sam (f32)(xyz)r4.z, r3.y, s#4, t#4
+mad.f32 r2.x, r2.z, r2.z, r2.x
+mul.f r1.z, r1.z, r3.w
+(ss)mov.f32f32 r3.y, r4.x
+rsq r1.w, r1.w
+(ss)mov.f32f32 r3.z, r1.w
+(ss)mul.f r1.w, r3.x, r1.w
+(sy)mul.f r3.x, r5.x, c6.z
+mad.f32 r1.z, r4.x, r3.y, r1.z
+mov.f32f32 r4.x, r4.y
+mul.f r1.x, r1.x, r3.z
+mul.f r2.y, r2.y, r3.z
+rsq r2.x, r2.x
+(ss)mov.f32f32 r3.z, r2.x
+mad.f32 r1.z, r4.x, r4.x, r1.z
+mov.f32f32 r4.x, r1.x
+mov.f32f32 r5.x, r2.y
+mul.f r5.y, r1.y, r3.z
+mul.f r3.z, r2.w, r3.z
+mov.f32f32 r5.z, r1.w
+(ss)mul.f r2.x, r2.z, r2.x
rsq r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.w, r1.y
-sam (f32)(xyz)r5.x, r3.y, s#4, t#4
-(sy)(ss)mul.f r3.y, r5.z, c6.z
-mov.f32f32 r3.z, r4.y
-mul.f r4.y, r0.w, r1.z
-mul.f r5.z, r2.x, r1.z
-mul.f r1.z, r1.w, r1.z
-mad.f32 r4.x, r3.z, r3.z, r4.x
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r5.z, r5.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r4.x, r4.x
-add.f r4.z, r4.z, c21.x
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r5.w, r0.z
-mov.f32f32 r4.z, r4.z
-mul.f r1.x, r1.x, r2.y
-mul.f r3.x, r3.x, r2.y
-mul.f r2.y, r2.w, r2.y
-mad.f32 r2.w, r4.z, r4.z, r4.x
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r5.w, r5.w
-mov.f32f32 r6.x, r1.y
-mul.f r5.y, r5.y, c6.y
-rsq r2.w, r2.w
-(ss)mov.f32f32 r2.w, r2.w
-mul.f r5.x, r5.x, c6.x
-mov.f32f32 r4.x, r4.w
-mov.f32f32 r6.x, r6.x
-mul.f r2.z, r2.z, r2.w
-mul.f r3.z, r3.z, r2.w
-mul.f r2.w, r4.z, r2.w
-nop
-mov.f32f32 r2.z, r2.z
-absneg.f r0.w, (neg)r0.w
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r2.w, r2.w
-mul.f r4.z, r2.z, r1.x
-mul.f r4.w, r2.z, r0.w
-absneg.f r2.x, (neg)r2.x
-mad.f32 r4.z, r3.z, r3.x, r4.z
-mul.f r6.y, r2.z, r1.x
-sam (f32)(xyz)r6.z, r5.w, s#5, t#5
-(sy)mad.f32 r3.y, c7.z, r7.x, r3.y
-mad.f32 r4.w, r3.z, r2.x, r4.w
-mov.f32f32 r4.z, r4.z
-(ss)mad.f32 r5.w, r3.z, r3.x, r6.y
-mad.f32 r4.z, r2.w, r2.y, r4.z
-mov.f32f32 r4.w, r4.w
-absneg.f r1.w, (neg)r1.w
-mov.f32f32 r5.w, r5.w
-mul.f r6.x, r4.z, r2.z
-mul.f r6.y, r4.z, r3.z
-mad.f32 r4.w, r2.w, r1.w, r4.w
-mul.f r4.z, r4.z, r2.w
-mov.f32f32 r6.x, r6.x
-mov.f32f32 r6.y, r6.y
-mul.f r2.z, r4.w, r2.z
-mul.f r3.z, r4.w, r3.z
-mul.f r4.w, r4.w, r2.w
-mul.f r6.x, c21.y, r6.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r6.x, r6.x
-mul.f r2.z, c21.y, r2.z
-mul.f r3.z, c21.y, r3.z
-mul.f r4.w, c21.y, r4.w
-add.f r1.x, r1.x, (neg)r6.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r4.w, r4.w
-mov.f32f32 r1.x, r1.x
-add.f r0.w, r0.w, (neg)r2.z
-add.f r2.x, r2.x, (neg)r3.z
-add.f r1.w, r1.w, (neg)r4.w
-mul.f r2.z, c21.y, r6.y
-mov.f32f32 r0.w, r0.w
-bary.f r3.z, 8, r0.x
+(ss)mov.f32f32 r5.w, r1.z
+(ss)mul.f r1.z, r4.y, r1.z
+mov.f32f32 r6.x, r0.z
+mov.f32f32 r6.y, r0.w
+mul.f r3.w, r3.w, r5.w
+mul.f r3.y, r3.y, r5.w
+mov.f32f32 r4.y, r1.z
+mul.f r4.w, r4.w, c6.y
+mov.f32f32 r5.w, r3.w
+absneg.f r1.y, (neg)r1.y
+mul.f r1.x, r3.w, r1.x
+mov.f32f32 r3.w, r3.y
+sam (f32)(xyz)r6.x, r6.x, s#5, t#5
+mad.f32 r1.x, r3.y, r2.y, r1.x
+mul.f r2.y, r5.w, r1.y
+absneg.f r2.w, (neg)r2.w
+mad.f32 r1.x, r1.z, r5.z, r1.x
+mul.f r1.z, r5.w, r4.x
+(sy)mad.f32 r3.x, c7.z, r6.z, r3.x
+mad.f32 r2.y, r3.w, r2.w, r2.y
+absneg.f r2.z, (neg)r2.z
+mul.f r3.y, r1.x, r5.w
+mad.f32 r1.z, r3.w, r5.x, r1.z
+mul.f r6.z, r1.x, r3.w
+mad.f32 r2.y, r4.y, r2.z, r2.y
+mul.f r3.y, c21.y, r3.y
+mad.f32 r1.z, r4.y, r5.z, r1.z
+mul.f r5.z, c21.y, r6.z
+mul.f r5.w, r2.y, r5.w
+add.f r3.y, r4.x, (neg)r3.y
+mul.f r3.w, r2.y, r3.w
+mul.f r2.y, r2.y, r4.y
+mul.f r4.x, c21.y, r5.w
+mul.f r3.y, r3.y, r5.y
+add.f r5.x, r5.x, (neg)r5.z
+mul.f r3.w, c21.y, r3.w
+add.f r1.y, r1.y, (neg)r4.x
+bary.f r4.x, 10, r0.x
+mad.f32 r3.y, r5.x, r3.z, r3.y
+add.f r2.w, r2.w, (neg)r3.w
+mov.f32f32 r3.z, r1.y
+bary.f r3.w, 8, r0.x
mul.f r1.x, r1.x, r4.y
-mov.f32f32 r2.z, r2.z
bary.f r4.y, 9, r0.x
-mul.f r3.z, r0.w, r3.z
-mov.f32f32 r2.x, r2.x
-bary.f r4.w, 12, r0.x
-add.f r2.z, r3.x, (neg)r2.z
-mul.f r3.x, r0.w, r4.y
-bary.f r4.y, 10, r0.x
-mad.f32 r3.z, r2.x, r4.w, r3.z
-mov.f32f32 r2.z, r2.z
-bary.f r4.w, 13, r0.x
-mul.f r0.w, r0.w, r4.y
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r1.w, r1.w
-bary.f r4.y, 16, r0.x
-mad.f32 r1.x, r2.z, r5.z, r1.x
-mad.f32 r2.z, r2.x, r4.w, r3.x
-bary.f r3.x, 14, r0.x
-mad.f32 r3.z, r1.w, r4.y, r3.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r4.y, r4.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r3.z, r3.z
-bary.f r4.z, 17, r0.x
-mad.f32 r0.w, r2.x, r3.x, r0.w
-mul.f r2.x, c21.y, r4.y
-mul.f r3.x, c14.x, r3.z
-mad.f32 r2.z, r1.w, r4.z, r2.z
-mul.f r4.y, c14.z, r3.z
-mul.f r3.z, c14.y, r3.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.w, r0.w
-bary.f (ei)r0.x, 18, r0.x
-add.f r0.y, r2.y, (neg)r2.x
-mad.f32 r2.x, c15.x, r2.z, r3.x
-mad.f32 r3.x, c15.z, r2.z, r4.y
-mad.f32 r0.x, r1.w, r0.x, r0.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.w, c15.y, r2.z, r3.z
-mad.f32 r1.w, r2.w, r2.y, r5.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, r0.y, r1.z, r1.x
-mov.f32f32 r1.x, r3.y
-mov.f32f32 r1.z, r5.y
-mad.f32 r2.x, c16.x, r0.x, r2.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r2.y, c16.z, r0.x, r3.x
-mad.f32 r0.x, c16.y, r0.x, r0.w
-mov.f32f32 r0.w, r2.x
-max.f r0.y, (neg)r0.y, c21.z
-mov.f32f32 r2.x, r2.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.y, r0.w
-log2 r0.y, r0.y
-mov.f32f32 r0.w, r0.z
-mov.f32f32 r2.w, r2.x
-mov.f32f32 r2.z, r0.x
-max.f r0.x, r1.w, c21.z
+mul.f r1.y, r1.y, r4.x
+mul.f r3.w, r3.z, r3.w
+mov.f32f32 r4.x, r2.w
+bary.f r5.x, 12, r0.x
+mul.f r1.x, c21.y, r1.x
+mul.f r3.z, r3.z, r4.y
+bary.f r4.y, 14, r0.x
+mad.f32 r3.w, r4.x, r5.x, r3.w
+mul.f r2.y, c21.y, r2.y
+add.f r1.x, r1.w, (neg)r1.x
+bary.f r1.w, 13, r0.x
+mad.f32 r1.y, r2.w, r4.y, r1.y
+add.f r2.y, r2.z, (neg)r2.y
+mad.f32 r1.x, r1.x, r2.x, r3.y
+mad.f32 r1.w, r4.x, r1.w, r3.z
+bary.f r2.x, 18, r0.x
+mov.f32f32 r2.z, r2.y
+bary.f r2.w, 16, r0.x
+max.f r1.x, (neg)r1.x, c21.z
+bary.f (ei)r0.x, 17, r0.x
+mad.f32 r0.y, r2.y, r2.x, r1.y
+mad.f32 r1.y, r2.z, r2.w, r3.w
+max.f r1.z, r1.z, c21.z
(rpt1)nop
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r0.x, r0.x
-sam.3d (f32)(xyz)r2.y, r2.y, s#7, t#7
-sam (f32)(xyzw)r3.x, r3.w, s#1, t#1
-(sy)add.f r1.x, r3.z, r1.x
-mov.f32f32 r3.z, r0.z
-mad.f32 r1.z, c7.y, r6.w, r1.z
-(ss)mov.f32f32 r4.x, r5.x
+mov.f32f32 r2.x, r1.y
+log2 r1.x, r1.x
+mul.f r1.y, c14.x, r1.y
+mad.f32 r0.x, r2.z, r0.x, r1.w
+mov.f32f32 r2.y, r0.z
+mul.f r1.w, c14.y, r2.x
+mul.f r2.x, c14.z, r2.x
+mov.f32f32 r2.w, r0.x
+mov.f32f32 r2.z, r0.w
+mad.f32 r0.x, c15.x, r0.x, r1.y
+nop
+mad.f32 r1.y, c15.y, r2.w, r1.w
+mov.f32f32 r1.w, r0.y
+mad.f32 r2.x, c15.z, r2.w, r2.x
+mad.f32 r3.y, c16.x, r0.y, r0.x
+sam (f32)(xyzw)r5.x, r2.y, s#2, t#2
+(sy)mad.f32 r0.x, c11.x, r5.w, c21.w
+mad.f32 r3.z, c16.y, r1.w, r1.y
+mad.f32 r3.w, c16.z, r1.w, r2.x
+mov.f32f32 r0.y, r1.z
+mov.f32f32 r1.w, r0.z
+(ss)mul.f r0.x, r0.x, r1.x
mov.f32f32 r2.x, r0.w
-mov.f32f32 r4.y, r3.z
-mov.f32f32 r0.w, r1.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.z, c7.x, r6.z, r4.x
-mul.f r3.w, r3.w, c13.x
-mov.f32f32 r4.z, r0.w
-add.f r0.w, r3.y, r1.z
-mov.f32f32 r1.z, r3.z
-sam (f32)(xyzw)r4.w, r1.w, s#2, t#2
-(sy)(ss)mad.f32 r1.w, c11.x, r5.z, c21.w
-mov.f32f32 r2.x, r3.w
-mul.f r3.y, r5.y, c10.z
-mul.f r3.z, r5.x, c10.y
-sam (f32)(xyz)r3.w, r4.y, s#6, t#6
-(sy)mad.f32 r1.x, r4.y, r2.w, r1.x
-add.f r1.z, r3.x, r1.z
-mad.f32 r0.w, r4.x, r2.z, r0.w
-mad.f32 r1.z, r3.w, r2.y, r1.z
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.x, r2.x
+(ss)mad.f32 r1.x, c7.y, r6.y, r4.w
+mul.f r1.y, r4.z, c6.x
+sam.3d (f32)(xyz)r3.y, r3.y, s#7, t#7
+mul.f r2.y, r5.z, c10.z
+mad.f32 r1.y, c7.x, r6.x, r1.y
+mul.f r2.z, r5.y, c10.y
+mul.f r2.w, r5.x, c10.x
+sam (f32)(xyzw)r4.x, r1.w, s#1, t#1
+(sy)(ss)add.f r2.x, r4.z, r3.x
+mov.f32f32 r5.x, r0.z
+mov.f32f32 r5.y, r0.w
+add.f r1.x, r4.y, r1.x
+add.f r1.y, r4.x, r1.y
+exp2 r0.x, r0.x
+mul.f r1.w, r4.w, c13.x
+sam (f32)(xyz)r4.x, r0.z, s#3, t#3
+(rpt2)nop
+sam (f32)(xyz)r4.w, r5.x, s#6, t#6
+(sy)(ss)mad.f32 r0.z, r5.y, r3.w, r2.x
+mad.f32 r0.w, r5.x, r3.z, r1.x
+mad.f32 r1.x, r4.w, r3.y, r1.y
nop
-mul.f r2.y, r1.x, c9.z
+mul.f r1.y, r0.z, c9.z
+mul.f r2.x, r0.w, c9.y
+mul.f r3.x, r1.x, c9.x
+mov.f32f32 r0.z, r0.z
+mul.f r1.y, r1.y, r0.y
+mul.f r0.y, r2.x, r0.y
+(ss)mad.f32 r1.y, r2.y, r0.x, r1.y
+mul.f r0.z, r0.z, c8.z
+mad.f32 r0.y, r2.z, r0.x, r0.y
+mul.f r2.x, r3.x, r1.z
+mad.f32 r0.z, c20.z, r1.y, r0.z
mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mul.f r1.x, r1.x, c8.z
-mul.f r2.y, r2.y, r0.x
-mul.f r2.z, r0.w, c9.y
-mul.f r2.w, r1.z, c9.x
+mad.f32 r1.z, c12.z, r4.z, r0.z
+mad.f32 r0.x, r2.w, r0.x, r2.x
+mov.f32f32 r0.z, r1.x
mul.f r0.w, r0.w, c8.y
-mov.f32f32 r2.y, r2.y
-(ss)mul.f r0.y, r1.w, r0.y
-mul.f r1.w, r2.z, r0.x
-mul.f r0.x, r2.w, r0.x
-mul.f r1.z, r1.z, c8.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r2.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, r4.w, c10.x
-exp2 r0.y, r0.y
-(ss)mad.f32 r2.y, r3.y, r0.y, r2.y
-mad.f32 r2.z, r3.z, r0.y, r2.z
-mov.f32f32 r2.w, r0.z
-mov.f32f32 r3.x, r1.y
-mov.f32f32 r0.z, r2.y
-mov.f32f32 r1.y, r2.z
-mad.f32 r0.x, r2.x, r0.y, r0.x
nop
-(ss)mad.f32 r0.y, c20.z, r0.z, r1.x
-mad.f32 r0.z, c20.y, r1.y, r0.w
-sam (f32)(xyz)r2.x, r2.w, s#3, t#3
-(sy)mad.f32 r0.y, c12.z, r2.z, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c12.y, r2.y, r0.z
-nop
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c20.x, r0.x, r1.z
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.x, c12.x, r2.x, r0.x
-mov.f32f32 r1.z, r0.y
+mad.f32 r0.y, c20.y, r0.y, r0.w
+mul.f r0.z, r0.z, c8.x
+mad.f32 r1.y, c12.y, r4.y, r0.y
+mad.f32 r0.x, c20.x, r0.x, r0.z
nop
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.x, c12.x, r4.x, r0.x
end
+nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) r5.x (5:25,cm=f,il=28,b=1)
-; FRAG: 267 instructions, 0 half, 8 full
+; FRAG: 184 instructions, 0 half, 7 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-23.asm b/reference/xonotic-gl2/xonotic-glx-gl2-23.asm
index e0c9a79..a4f6c1a 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-23.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-23.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
-@in(r3.w) in4
-@in(r4.x) in5
-@in(r4.y) in6
-@in(r4.z) in7
-@in(r7.x) in8
-@in(r7.y) in9
-@in(r7.z) in10
-@in(r7.w) in12
-@in(r8.x) in13
-@in(r8.y) in14
-@in(r4.w) in16
-@in(r5.x) in17
-@in(r5.y) in18
+@in(r5.w) in0
+@in(r6.x) in1
+@in(r6.y) in2
+@in(r6.z) in3
+@in(r6.w) in4
+@in(r7.x) in5
+@in(r7.y) in6
+@in(r7.z) in7
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r4.x) in12
+@in(r4.y) in13
+@in(r4.z) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -45,123 +45,65 @@
@out(r6.y) out25
@out(r6.z) out26
@out(r6.w) out27
-(sy)(ss)mul.f r0.x, c5.x, r4.w
-mul.f r0.y, c5.x, r7.w
-mad.f32 r0.x, c5.y, r5.x, r0.x
-mad.f32 r0.y, c5.y, r8.x, r0.y
-add.f r0.z, c4.x, (neg)r2.w
-mul.f r0.w, c5.x, r7.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c5.z, r5.y, r0.x
-mad.f32 r0.y, c5.z, r8.y, r0.y
-mul.f r1.x, r0.z, r4.w
-mad.f32 r0.w, c5.y, r7.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r1.y, c4.y, (neg)r3.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, r1.y, r5.x, r1.x
-mad.f32 r0.w, c5.z, r7.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r6.z, r0.x
-mov.f32f32 r6.y, r0.y
-add.f r0.x, c4.z, (neg)r3.y
-mov.f32f32 r0.y, r0.w
-mul.f r0.w, r0.z, r7.w
-mul.f r1.z, c6.y, r3.w
-mad.f32 r1.x, r0.x, r5.y, r1.x
-mov.f32f32 r6.x, r0.y
-mad.f32 r0.y, r1.y, r8.x, r0.w
-mad.f32 r0.w, c7.y, r4.x, r1.z
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, c8.y, r4.y, r0.w
-mul.f r1.z, c6.x, r3.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.y, r0.x, r8.y, r0.y
-mad.f32 r0.w, c9.y, r4.z, r0.w
-mad.f32 r1.z, c7.x, r4.x, r1.z
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c8.x, r4.y, r1.z
-mul.f r0.z, r0.z, r7.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r0.y, r0.w
-mad.f32 r0.w, c9.x, r4.z, r1.x
-mad.f32 r0.z, r1.y, r7.y, r0.z
-mul.f r1.x, c0.w, r2.w
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.w, c1.w, r3.x, r1.x
-mul.f r1.z, c0.z, r2.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, r0.x, r7.z, r0.z
-(rpt1)nop
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r3.y, r0.w
-mad.f32 r0.y, c1.z, r3.x, r1.z
-mad.f32 r0.x, c3.w, r3.z, r0.x
-mad.f32 r0.y, c2.z, r3.y, r0.y
-mul.f r0.z, c0.y, r2.w
-mul.f r1.z, c0.x, r2.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c1.y, r3.x, r0.z
-mad.f32 r1.z, c1.x, r3.x, r1.z
-mul.f r1.w, c6.w, r3.w
-mov.f32f32 r0.z, r0.x
-mad.f32 r0.x, c2.y, r3.y, r0.y
-mad.f32 r0.y, c2.x, r3.y, r1.z
-mad.f32 r0.x, c3.y, r3.z, r0.x
-mad.f32 r1.z, c3.x, r3.z, r0.y
-mad.f32 r1.w, c7.w, r4.x, r1.w
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r5.w
+mul.f r0.y, c6.y, r6.w
+mul.f r0.z, c6.x, r6.w
+mul.f r0.w, c0.w, r5.w
+mul.f r1.x, r0.x, r5.x
+add.f r1.y, c4.y, (neg)r6.x
+mul.f r1.z, r0.x, r4.x
+mul.f r0.x, r0.x, r3.x
+mad.f32 r0.y, c7.y, r7.x, r0.y
+mad.f32 r1.x, r1.y, r5.y, r1.x
+add.f r1.w, c4.z, (neg)r6.y
+mad.f32 r1.z, r1.y, r4.y, r1.z
+mad.f32 r0.x, r1.y, r3.y, r0.x
+nop
+mad.f32 r2.z, r1.w, r5.z, r1.x
+mad.f32 r2.y, r1.w, r4.z, r1.z
+mad.f32 r2.x, r1.w, r3.z, r0.x
+mad.f32 r0.x, c8.y, r7.y, r0.y
+mad.f32 r0.y, c7.x, r7.x, r0.z
+mad.f32 r1.y, c9.y, r7.z, r0.x
+mad.f32 r0.x, c8.x, r7.y, r0.y
+mad.f32 r0.y, c1.w, r6.x, r0.w
+mad.f32 r1.x, c9.x, r7.z, r0.x
+mad.f32 r0.x, c2.w, r6.y, r0.y
+mul.f r0.y, c0.z, r5.w
+mad.f32 r0.w, c3.w, r6.z, r0.x
+mad.f32 r0.x, c1.z, r6.x, r0.y
+mul.f r0.y, c0.y, r5.w
+mad.f32 r0.x, c2.z, r6.y, r0.x
+mad.f32 r0.y, c1.y, r6.x, r0.y
+mad.f32 r0.z, c3.z, r6.z, r0.x
+mad.f32 r0.x, c2.y, r6.y, r0.y
+mul.f r1.z, c0.x, r5.w
+mad.f32 r0.y, c3.y, r6.z, r0.x
+mad.f32 r0.x, c1.x, r6.x, r1.z
+mul.f r1.z, c6.w, r6.w
+mad.f32 r0.x, c2.x, r6.y, r0.x
+mad.f32 r1.z, c7.w, r7.x, r1.z
+mad.f32 r0.x, c3.x, r6.z, r0.x
+mad.f32 r6.w, c8.w, r7.y, r1.z
+mul.f r1.z, c5.x, r5.x
+mul.f r1.w, c5.x, r4.x
+mad.f32 r1.z, c5.y, r5.y, r1.z
+mad.f32 r1.w, c5.y, r4.y, r1.w
+mad.f32 r6.z, c5.z, r5.z, r1.z
+mad.f32 r6.y, c5.z, r4.z, r1.w
+mul.f r1.z, c5.x, r3.x
+mov.f32f32 r5.w, c10.x
+mad.f32 r1.z, c5.y, r3.y, r1.z
+mov.f32f32 r4.w, c10.x
+mad.f32 r6.x, c5.z, r3.z, r1.z
+mov.f32f32 r3.w, c10.x
mov.f32f32 r2.w, c10.x
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
-mad.f32 r1.z, c8.w, r4.y, r1.w
-mov.f32f32 r5.w, r2.w
-mov.f32f32 r1.w, r5.y
-mov.f32f32 r2.w, r5.x
-mov.f32f32 r6.w, r1.z
-mov.f32f32 r1.z, r4.w
-mov.f32f32 r5.z, r1.w
-mov.f32f32 r5.y, r2.w
-mov.f32f32 r1.w, c10.x
-mov.f32f32 r5.x, r1.z
-mov.f32f32 r1.z, r8.y
-mov.f32f32 r2.w, r8.x
-mov.f32f32 r4.w, r1.w
-mov.f32f32 r1.w, r7.w
-mov.f32f32 r4.z, r1.z
-mov.f32f32 r4.y, r2.w
-mov.f32f32 r1.z, c10.x
-mov.f32f32 r4.x, r1.w
-mov.f32f32 r1.w, r7.z
-mov.f32f32 r2.w, r7.y
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r1.z, r7.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r2.w
mov.f32f32 r1.w, c10.x
-mov.f32f32 r3.x, r1.z
mov.f32f32 r1.z, c10.x
-mov.f32f32 r7.x, c10.x
-mov.f32f32 r2.w, r1.w
-nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r1.z, r7.x
end
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) r6.x (5:25)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r7.x (0:0,cm=7,il=16,b=0) r7.w (0:0,cm=7,il=20,b=0) r4.w (0:0,cm=7,il=24,b=0)
-; VERT: 116 instructions, 0 half, 9 full
+; VERT: inputs: r5.w (0:0,cm=f,il=8,b=0) r6.w (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0)
+; VERT: 56 instructions, 0 half, 8 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-24.asm b/reference/xonotic-gl2/xonotic-glx-gl2-24.asm
index 87c43ef..155e9a7 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-24.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-24.asm
@@ -6,127 +6,85 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c10.x) 0x3f000000, 0xbf000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
-bary.f r1.x, 1, r0.x
+bary.f r0.w, 1, r0.x
+bary.f r1.x, 4, r0.x
mov.f32f32 r1.y, (0.000000)
mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.z
-mul.f r2.y, r0.w, r0.w
+mov.f32f32 r1.w, r0.w
+mul.f r2.y, r1.x, r1.x
bary.f r2.z, 5, r0.x
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r3.y, r1.w
-mad.f32 r1.w, r2.z, r2.z, r2.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r3.x, r1.z
-mov.f32f32 r1.z, r1.x
-mov.f32f32 r1.w, r1.w
+sam (f32)(xyzw)r2.w, r0.z, s#1, t#1
+(sy)cmps.f.lt r3.w, r3.z, c10.x
+mov.f32f32 r4.x, r0.z
+mov.f32f32 r4.y, r0.w
+mad.f32 r2.y, r2.z, r2.z, r2.y
+sam (f32)(xyz)r4.z, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, r4.z, c10.y
+cov.u32f32 r1.w, r3.w
bary.f (ei)r0.x, 6, r0.x
-mov.f32f32 r3.w, r2.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r3.z, r1.z
-sam (f32)(xyz)r4.y, r2.w, s#0, t#0
-(sy)add.f r1.z, r4.y, c10.y
-mad.f32 r1.w, r0.x, r0.x, r1.w
-mov.f32f32 r4.x, r0.y
-add.f r0.y, r4.z, c10.y
-mov.f32f32 r1.z, r1.z
-add.f r2.y, r4.w, c10.y
-(ss)nop
-sam (f32)(xyzw)r2.w, r3.y, s#1, t#1
-(sy)cmps.f.lt r4.y, r3.z, c10.x
-(ss)mul.f r3.z, r3.z, c8.x
-mul.f r4.z, r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-cov.u32f32 r4.y, r4.y
-rsq r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-sam (f32)(xyz)r4.w, r3.w, s#2, t#2
-(sy)(ss)mul.f r3.w, r5.y, c4.z
-mad.f32 r4.x, r0.y, r0.y, r4.z
-cmps.f.ne p0.x, r4.y, r1.y
-mul.f r0.w, r0.w, r1.w
-mul.f r1.y, r2.z, r1.w
-mov.f32f32 r2.z, r4.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.y, r1.y
-mul.f r0.x, r0.x, r1.w
-mad.f32 r1.w, r2.y, r2.y, r2.z
+add.f r0.y, r4.w, c10.y
+mov.f32f32 r3.w, r1.z
+cmps.f.ne p0.x, r1.w, r1.y
+mad.f32 r1.y, r0.x, r0.x, r2.y
+sam (f32)(xyz)r4.x, r4.x, s#2, t#2
+(sy)mul.f r1.w, r4.z, c4.z
+mul.f r1.z, r1.z, r3.w
+mov.f32f32 r2.y, r0.y
+mov.f32f32 r4.z, r0.z
+mov.f32f32 r4.w, r0.w
+mul.f r0.z, r4.y, c4.y
+mad.f32 r0.y, r0.y, r2.y, r1.z
+add.f r0.w, r5.x, c10.y
kill p0.x
-mov.f32f32 r2.z, r3.w
-mov.f32f32 r0.z, r0.z
-mul.f r3.w, r5.x, c4.y
-mov.f32f32 r0.x, r0.x
-mul.f r4.x, r4.w, c4.x
-rsq r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-mov.f32f32 r4.y, r0.z
-mov.f32f32 r0.z, r3.w
-mov.f32f32 r3.w, r4.x
-mul.f r1.z, r1.z, r1.w
-mul.f r0.y, r0.y, r1.w
-mul.f r1.w, r2.y, r1.w
+rsq r1.y, r1.y
+(ss)mov.f32f32 r1.z, r1.y
+mul.f r0.x, r0.x, r1.y
+(ss)mov.f32f32 r1.y, r0.w
+sam (f32)(xyz)r4.y, r4.z, s#3, t#3
+mul.f r4.x, r4.x, c4.x
+mul.f r1.x, r1.x, r1.z
+mul.f r1.z, r2.z, r1.z
+mad.f32 r0.y, r1.y, r1.y, r0.y
+(sy)mad.f32 r1.y, c5.z, r4.w, r1.w
+mad.f32 r0.z, c5.y, r4.z, r0.z
+mad.f32 r2.z, c5.x, r4.y, r4.x
+mul.f r1.w, r3.z, c8.x
+(rpt1)nop
+rsq r0.y, r0.y
+(ss)mov.f32f32 r3.z, r0.y
+(ss)mul.f r0.y, r0.w, r0.y
+add.f r0.w, r3.y, r1.y
+add.f r0.z, r3.x, r0.z
+mul.f r1.y, r3.w, r3.z
+mul.f r2.y, r2.y, r3.z
+add.f r2.z, r2.w, r2.z
nop
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r1.w
-mov.f32f32 r1.x, r1.x
-mul.f r0.w, r1.z, r0.w
-mov.f32f32 r1.z, r3.z
-mad.f32 r0.y, r0.y, r1.y, r0.w
-mov.f32f32 r4.z, r1.x
+mul.f r1.x, r1.y, r1.x
nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r0.y, r0.y
+mad.f32 r1.x, r2.y, r1.z, r1.x
nop
-mad.f32 r0.x, r2.y, r0.x, r0.y
+mad.f32 r0.x, r0.y, r0.x, r1.x
(rpt2)nop
-mov.f32f32 r0.x, r0.x
-sam (f32)(xyz)r0.w, r4.y, s#3, t#3
-(sy)mad.f32 r0.y, c5.y, r1.x, r0.z
-mad.f32 r0.z, c5.x, r0.w, r3.w
-mad.f32 r0.w, c5.z, r1.y, r2.z
max.f r0.x, r0.x, c10.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-add.f r0.y, r3.x, r0.y
-add.f r0.z, r2.w, r0.z
-nop
-mul.f r1.x, c7.z, r0.x
-mul.f r1.y, c7.y, r0.x
+(rpt2)nop
+mov.f32f32 r0.y, r0.x
mul.f r0.x, c7.x, r0.x
-add.f r0.w, r3.y, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mad.f32 r1.x, c9.z, r1.x, c6.z
-mad.f32 r1.y, c9.y, r1.y, c6.y
+(rpt1)nop
+mul.f r1.x, c7.z, r0.y
+mul.f r0.y, c7.y, r0.y
mad.f32 r0.x, c9.x, r0.x, c6.x
nop
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r0.x, r0.x
-nop
-mul.f r0.w, r0.w, r1.x
-mul.f r0.y, r0.y, r1.y
-mul.f r0.x, r0.z, r0.x
-nop
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.x, c9.z, r1.x, c6.z
+mad.f32 r0.y, c9.y, r0.y, c6.y
+(rpt1)nop
+mul.f r1.z, r0.w, r1.x
+mul.f r1.y, r0.z, r0.y
+mul.f r1.x, r2.z, r0.x
end
-nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 120 instructions, 0 half, 6 full
+; FRAG: 83 instructions, 0 half, 6 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-25.asm b/reference/xonotic-gl2/xonotic-glx-gl2-25.asm
index 293acc0..9882cc7 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-25.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-25.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
-@in(r3.w) in4
-@in(r4.x) in5
-@in(r4.y) in6
-@in(r4.z) in7
-@in(r0.x) in8
-@in(r0.y) in9
-@in(r0.z) in10
-@in(r0.w) in12
-@in(r1.x) in13
-@in(r1.y) in14
-@in(r1.z) in16
-@in(r1.w) in17
-@in(r2.x) in18
+@in(r1.z) in0
+@in(r1.w) in1
+@in(r2.x) in2
+@in(r2.y) in3
+@in(r2.z) in4
+@in(r2.w) in5
+@in(r3.x) in6
+@in(r3.y) in7
+@in(r3.z) in8
+@in(r3.w) in9
+@in(r4.x) in10
+@in(r4.y) in12
+@in(r4.z) in13
+@in(r4.w) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -29,79 +29,49 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mul.f r1.z, c4.x, r1.z
-mul.f r0.w, c4.x, r0.w
-mad.f32 r1.z, c4.y, r1.w, r1.z
-mad.f32 r0.w, c4.y, r1.x, r0.w
-mul.f r0.x, c4.x, r0.x
-mul.f r1.x, c5.y, r3.w
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.z, c4.z, r2.x, r1.z
-mad.f32 r0.w, c4.z, r1.y, r0.w
-mad.f32 r0.x, c4.y, r0.y, r0.x
-mad.f32 r0.y, c6.y, r4.x, r1.x
-mov.f32f32 r1.x, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c7.y, r4.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c4.z, r0.z, r0.x
-mad.f32 r0.y, c8.y, r4.z, r0.y
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r0.w, r0.w
-(rpt1)nop
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.z, c5.x, r3.w
-mul.f r0.w, c0.w, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.z, c6.x, r4.x, r0.z
-mad.f32 r0.w, c1.w, r3.x, r0.w
-mov.f32f32 r2.x, r0.x
-mov.f32f32 r1.y, r0.y
-mad.f32 r0.x, c7.x, r4.y, r0.z
-mad.f32 r0.y, c2.w, r3.y, r0.w
-mad.f32 r0.x, c8.x, r4.z, r0.x
-mad.f32 r0.y, c3.w, r3.z, r0.y
-mul.f r0.z, c0.z, r2.w
-mul.f r1.x, c0.y, r2.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.y
-mad.f32 r0.y, c1.z, r3.x, r0.z
-mad.f32 r0.z, c1.y, r3.x, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c2.z, r3.y, r0.y
-mad.f32 r0.z, c2.y, r3.y, r0.z
-mul.f r1.z, c0.x, r2.w
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c3.y, r3.z, r0.z
-mad.f32 r1.z, c1.x, r3.x, r1.z
-mul.f r1.w, c5.w, r3.w
-mov.f32f32 r0.z, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c2.x, r3.y, r1.z
-mad.f32 r1.z, c6.w, r4.x, r1.w
-mad.f32 r0.x, c3.x, r3.z, r0.x
-mad.f32 r1.z, c7.w, r4.y, r1.z
-(rpt1)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r1.z
+@const(c9.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)mul.f r0.x, c5.y, r2.z
+mul.f r0.y, c5.x, r2.z
+mad.f32 r0.x, c6.y, r2.w, r0.x
+mad.f32 r0.y, c6.x, r2.w, r0.y
+mad.f32 r0.x, c7.y, r3.x, r0.x
+mad.f32 r0.y, c7.x, r3.x, r0.y
+mad.f32 r1.y, c8.y, r3.y, r0.x
+mad.f32 r1.x, c8.x, r3.y, r0.y
+mul.f r0.x, c0.w, r1.z
+mul.f r0.y, c0.z, r1.z
+mad.f32 r0.x, c1.w, r1.w, r0.x
+mad.f32 r0.y, c1.z, r1.w, r0.y
+mad.f32 r0.x, c2.w, r2.x, r0.x
+mad.f32 r0.y, c2.z, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.y, r0.x
+mad.f32 r0.z, c3.z, r2.y, r0.y
+mul.f r0.x, c0.y, r1.z
+mul.f r0.y, c0.x, r1.z
+mad.f32 r0.x, c1.y, r1.w, r0.x
+mad.f32 r0.y, c1.x, r1.w, r0.y
+mad.f32 r0.x, c2.y, r2.x, r0.x
+mad.f32 r1.z, c2.x, r2.x, r0.y
+mad.f32 r0.y, c3.y, r2.y, r0.x
+mad.f32 r0.x, c3.x, r2.y, r1.z
+mul.f r1.z, c5.w, r2.z
+mul.f r1.w, c4.x, r5.x
+mad.f32 r1.z, c6.w, r2.w, r1.z
+mad.f32 r1.w, c4.y, r5.y, r1.w
+mad.f32 r2.w, c7.w, r3.x, r1.z
+mad.f32 r2.z, c4.z, r5.z, r1.w
+mul.f r1.z, c4.x, r4.y
+mul.f r1.w, c4.x, r3.z
+mad.f32 r1.z, c4.y, r4.z, r1.z
+mad.f32 r1.w, c4.y, r3.w, r1.w
+mad.f32 r2.y, c4.z, r4.w, r1.z
+mad.f32 r2.x, c4.z, r4.x, r1.w
+mov.f32f32 r1.w, c9.x
mov.f32f32 r1.z, c9.x
-mov.f32f32 r3.x, c9.x
-(rpt1)nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r1.z, r3.x
end
nop
-nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r0.w (0:0,cm=7,il=20,b=0) r1.z (0:0,cm=7,il=24,b=0)
-; VERT: 72 instructions, 0 half, 5 full
+; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r2.z (0:0,cm=f,il=12,b=0) r3.z (0:0,cm=7,il=16,b=0) r4.y (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0)
+; VERT: 39 instructions, 0 half, 6 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-26.asm b/reference/xonotic-gl2/xonotic-glx-gl2-26.asm
index 889056b..a97e5c3 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-26.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-26.asm
@@ -6,251 +6,177 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c17.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000
+@const(c18.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
bary.f r1.x, 20, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.y, 4, r0.x
+bary.f r0.w, 1, r0.x
mov.f32f32 r1.z, r0.z
-mul.f r1.w, r0.w, r0.w
-bary.f r2.x, 5, r0.x
-mul.f r2.y, r1.x, r1.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mad.f32 r1.w, r2.x, r2.x, r1.w
-bary.f r3.x, 21, r0.x
+mul.f r2.x, r1.x, r1.x
+bary.f r2.y, 21, r0.x
+mov.f32f32 r1.w, r0.w
+mul.f r2.z, r1.y, r1.y
+bary.f r2.w, 5, r0.x
+mad.f32 r2.x, r2.y, r2.y, r2.x
+bary.f r3.x, 22, r0.x
mov.f32f32 r3.y, r0.z
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r1.w
-bary.f r3.z, 6, r0.x
-mad.f32 r1.w, r3.x, r3.x, r2.y
-mov.f32f32 r3.w, r3.y
-mov.f32f32 r2.y, r1.y
-mad.f32 r1.z, r3.z, r3.z, r1.z
-sam (f32)(xyz)r4.y, r2.z, s#0, t#0
-(sy)(ss)add.f r2.z, r4.y, c17.x
-mov.f32f32 r1.w, r1.w
-bary.f r2.w, 22, r0.x
-mov.f32f32 r4.x, r2.y
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r2.z, r0.z
-rsq r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mad.f32 r1.w, r2.w, r2.w, r1.w
-mul.f r3.y, r2.y, r2.y
+mov.f32f32 r3.w, r0.z
+sam (f32)(xyz)r4.x, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, r4.x, c17.x
+mad.f32 r1.w, r3.x, r3.x, r2.x
+mad.f32 r2.x, r2.w, r2.w, r2.z
+bary.f r2.z, 6, r0.x
+mov.f32f32 r4.w, r1.z
+mov.f32f32 r3.z, r0.w
+add.f r4.x, r4.y, c17.x
add.f r4.y, r4.z, c17.x
-mul.f r4.z, r0.w, r1.z
-mul.f r5.x, r2.x, r1.z
-mul.f r1.z, r3.z, r1.z
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.z, r4.z
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.y, r4.y, r4.y, r3.y
+mul.f r1.z, r1.z, r4.w
rsq r1.w, r1.w
-(ss)mov.f32f32 r1.w, r1.w
-sam (f32)(xyzw)r5.y, r3.w, s#1, t#1
-(sy)(ss)mul.f r3.w, r6.x, c9.x
-add.f r4.x, r4.w, c17.x
-mov.f32f32 r3.y, r3.y
-mul.f r1.x, r1.x, r1.w
+(ss)mov.f32f32 r4.z, r1.w
+mov.f32f32 r5.x, r4.x
+mad.f32 r2.x, r2.z, r2.z, r2.x
mul.f r3.x, r3.x, r1.w
-mov.f32f32 r4.x, r4.x
-mul.f r1.w, r2.w, r1.w
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r6.x, r2.z
-mad.f32 r2.z, r4.x, r4.x, r3.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r3.y, r1.w
-mov.f32f32 r1.w, r2.w
-(rpt1)nop
-rsq r2.z, r2.z
-(ss)mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r2.w, r1.y
-mov.f32f32 r3.w, r0.z
-mul.f r2.y, r2.y, r2.z
-mul.f r4.y, r4.y, r2.z
-mul.f r2.z, r4.x, r2.z
-nop
-mov.f32f32 r2.y, r2.y
-absneg.f r0.w, (neg)r0.w
-mov.f32f32 r4.x, r4.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r6.y, r2.w
-mul.f r2.w, r2.y, r0.w
-absneg.f r2.x, (neg)r2.x
-mul.f r4.y, r2.y, r1.x
-mul.f r4.w, r2.y, r1.x
-mad.f32 r4.y, r4.x, r3.x, r4.y
-mad.f32 r2.w, r4.x, r2.x, r2.w
-mad.f32 r4.w, r4.x, r3.x, r4.w
-sam (f32)(xyzw)r6.x, r6.x, s#2, t#2
-(sy)mad.f32 r6.w, c7.x, r6.w, c17.w
-mul.f r6.z, r6.z, c6.z
-mov.f32f32 r2.w, r2.w
-absneg.f r3.z, (neg)r3.z
-mov.f32f32 r4.y, r4.y
-mov.f32f32 r4.w, r4.w
-mad.f32 r4.y, r2.z, r3.y, r4.y
-mad.f32 r2.w, r2.z, r3.z, r2.w
-mad.f32 r4.w, r2.z, r3.y, r4.w
-mov.f32f32 r6.w, r6.w
-mul.f r7.x, r4.y, r2.y
-mul.f r2.y, r2.w, r2.y
-mul.f r7.y, r2.w, r4.x
-mul.f r2.w, r2.w, r2.z
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r2.w, r2.w
-mul.f r7.x, c17.y, r7.x
-mul.f r2.y, c17.y, r2.y
-mul.f r7.y, c17.y, r7.y
-mul.f r2.w, c17.y, r2.w
-mov.f32f32 r7.x, r7.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r7.y, r7.y
-mov.f32f32 r2.w, r2.w
-add.f r1.x, r1.x, (neg)r7.x
-add.f r0.w, r0.w, (neg)r2.y
-add.f r2.x, r2.x, (neg)r7.y
-add.f r2.y, r3.z, (neg)r2.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-bary.f r2.w, 8, r0.x
-bary.f r3.z, 9, r0.x
-bary.f r7.x, 10, r0.x
+(ss)mov.f32f32 r1.w, r4.y
+mad.f32 r1.z, r4.x, r5.x, r1.z
mul.f r1.x, r1.x, r4.z
-mul.f r2.w, r0.w, r2.w
-mov.f32f32 r2.x, r2.x
-bary.f r4.z, 12, r0.x
-mul.f r4.x, r4.y, r4.x
-mul.f r3.z, r0.w, r3.z
-mul.f r0.w, r0.w, r7.x
-mad.f32 r2.w, r2.x, r4.z, r2.w
-mov.f32f32 r4.x, r4.x
-bary.f r4.z, 13, r0.x
-bary.f r7.x, 14, r0.x
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.y, r2.y
-bary.f r7.y, 16, r0.x
+mul.f r2.y, r2.y, r4.z
+mad.f32 r1.z, r1.w, r1.w, r1.z
+rsq r1.w, r2.x
+(ss)mov.f32f32 r2.x, r1.w
+mov.f32f32 r4.z, r3.x
+mul.f r5.y, r2.z, r1.w
+mov.f32f32 r4.x, r0.w
+mov.f32f32 r5.z, r1.x
+mov.f32f32 r5.w, r2.y
+rsq r1.z, r1.z
+(ss)mov.f32f32 r1.w, r1.z
+(ss)mul.f r1.z, r4.y, r1.z
+mul.f r4.y, r1.y, r2.x
+mul.f r2.x, r2.w, r2.x
+mul.f r4.w, r4.w, r1.w
+mul.f r5.x, r5.x, r1.w
+mov.f32f32 r6.x, r1.z
+sam (f32)(xyzw)r6.y, r3.y, s#1, t#1
+(sy)mul.f r1.w, r7.x, c9.x
+(ss)mov.f32f32 r3.y, r4.w
+absneg.f r1.y, (neg)r1.y
+mul.f r1.x, r4.w, r1.x
+mov.f32f32 r3.z, r5.x
+mad.f32 r1.x, r5.x, r2.y, r1.x
+mul.f r2.y, r3.y, r1.y
+absneg.f r2.w, (neg)r2.w
+mad.f32 r1.x, r1.z, r4.z, r1.x
+mul.f r1.z, r3.y, r5.z
+sam (f32)(xyzw)r7.x, r3.w, s#2, t#2
+(sy)(ss)mul.f r3.w, r7.z, c6.z
+mad.f32 r2.y, r3.z, r2.w, r2.y
+absneg.f r2.z, (neg)r2.z
+mul.f r4.x, r1.x, r3.y
+mad.f32 r1.z, r3.z, r5.w, r1.z
+mul.f r4.w, r1.x, r3.z
+mad.f32 r2.y, r6.x, r2.z, r2.y
mul.f r4.x, c17.y, r4.x
-mad.f32 r3.z, r2.x, r4.z, r3.z
-mad.f32 r0.w, r2.x, r7.x, r0.w
-mad.f32 r2.x, r2.y, r7.y, r2.w
-mov.f32f32 r2.w, r4.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-add.f r2.w, r3.x, (neg)r2.w
-bary.f r3.x, 17, r0.x
-bary.f (ei)r0.x, 18, r0.x
-mul.f r0.y, c10.x, r2.x
-mov.f32f32 r2.w, r2.w
-mad.f32 r3.x, r2.y, r3.x, r3.z
-mul.f r3.z, c10.z, r2.x
-mul.f r2.x, c10.y, r2.x
-mad.f32 r1.x, r2.w, r5.x, r1.x
-mov.f32f32 r2.w, r3.x
-mad.f32 r0.x, r2.y, r0.x, r0.w
-mul.f r0.w, r4.y, r2.z
-mov.f32f32 r2.y, r4.w
-mad.f32 r0.y, c11.x, r2.w, r0.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r1.x
-mad.f32 r2.z, c11.z, r2.w, r3.z
-mad.f32 r2.x, c11.y, r2.w, r2.x
-mad.f32 r0.y, c12.x, r0.x, r0.y
-mov.f32f32 r0.w, r0.w
-mad.f32 r2.z, c12.z, r0.x, r2.z
-mad.f32 r0.x, c12.y, r0.x, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, c17.y, r0.w
-mov.f32f32 r2.x, r2.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r0.w, r3.y, (neg)r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r3.x, r2.x
+mad.f32 r1.z, r6.x, r4.z, r1.z
+mul.f r4.z, c17.y, r4.w
+mul.f r3.y, r2.y, r3.y
+add.f r4.x, r5.z, (neg)r4.x
+mul.f r3.z, r2.y, r3.z
+mul.f r2.y, r2.y, r6.x
+mul.f r3.y, c17.y, r3.y
+mul.f r4.x, r4.x, r4.y
+add.f r4.y, r5.w, (neg)r4.z
+mul.f r3.z, c17.y, r3.z
+add.f r1.y, r1.y, (neg)r3.y
+bary.f r3.y, 10, r0.x
+mad.f32 r2.x, r4.y, r2.x, r4.x
+add.f r2.w, r2.w, (neg)r3.z
+mov.f32f32 r3.z, r1.y
+bary.f r4.x, 8, r0.x
+mul.f r1.x, r1.x, r6.x
+bary.f r4.y, 9, r0.x
+mul.f r1.y, r1.y, r3.y
+mul.f r3.y, r3.z, r4.x
+mov.f32f32 r4.x, r2.w
+bary.f r4.z, 12, r0.x
+mul.f r1.x, c17.y, r1.x
+mul.f r3.z, r3.z, r4.y
+bary.f r4.y, 14, r0.x
+mad.f32 r3.y, r4.x, r4.z, r3.y
+mul.f r2.y, c17.y, r2.y
+add.f r1.x, r3.x, (neg)r1.x
+bary.f r3.x, 13, r0.x
+mad.f32 r1.y, r2.w, r4.y, r1.y
+add.f r2.y, r2.z, (neg)r2.y
+mad.f32 r1.x, r1.x, r5.y, r2.x
+mad.f32 r2.x, r4.x, r3.x, r3.z
+bary.f r2.z, 18, r0.x
+mov.f32f32 r2.w, r2.y
+bary.f r3.x, 16, r0.x
+max.f r1.x, (neg)r1.x, c17.z
+bary.f (ei)r0.x, 17, r0.x
+mad.f32 r0.y, r2.y, r2.z, r1.y
+mad.f32 r1.y, r2.w, r3.x, r3.y
+max.f r1.z, r1.z, c17.z
+(rpt1)nop
+mov.f32f32 r2.y, r1.y
+log2 r1.x, r1.x
+mul.f r1.y, c10.x, r1.y
+mad.f32 r0.x, r2.w, r0.x, r2.x
+mad.f32 r2.x, c7.x, r7.w, c17.w
+mul.f r2.z, c10.y, r2.y
+mul.f r2.y, c10.z, r2.y
mov.f32f32 r2.w, r0.x
-max.f r0.x, r2.y, c17.z
+(ss)mul.f r1.x, r2.x, r1.x
+mad.f32 r0.x, c11.x, r0.x, r1.y
+nop
+mad.f32 r1.y, c11.y, r2.w, r2.z
+mov.f32f32 r2.x, r0.y
+mad.f32 r2.y, c11.z, r2.w, r2.y
+mad.f32 r2.z, c12.x, r0.y, r0.x
+mov.f32f32 r0.x, r1.z
+mad.f32 r2.w, c12.y, r2.x, r1.y
+mad.f32 r3.x, c12.z, r2.x, r2.y
+exp2 r0.y, r1.x
+(ss)mul.f r1.x, r7.y, c6.y
+mul.f r1.y, r7.x, c6.x
+mov.f32f32 r2.x, r0.z
+mov.f32f32 r2.y, r0.w
+sam (f32)(xyz)r4.x, r0.z, s#3, t#3
(rpt1)nop
-mad.f32 r0.y, r0.y, r1.z, r1.x
-mov.f32f32 r0.x, r0.x
-mul.f r0.w, r6.y, c6.y
-sam.3d (f32)(xyz)r2.x, r2.z, s#5, t#5
-(ss)mov.f32f32 r2.w, r3.w
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r0.y, r0.y
+sam.3d (f32)(xyz)r2.z, r2.z, s#5, t#5
+(rpt3)nop
+sam (f32)(xyz)r4.w, r2.x, s#4, t#4
+(sy)(ss)mad.f32 r0.z, r5.y, r3.x, r6.w
+mad.f32 r0.w, r5.x, r2.w, r6.z
+mad.f32 r2.x, r4.w, r2.z, r6.y
+nop
+mul.f r2.y, r0.z, c5.z
+mul.f r2.z, r0.w, c5.y
+mul.f r2.w, r2.x, c5.x
mov.f32f32 r0.z, r0.z
-mul.f r1.z, r6.x, c6.x
-mov.f32f32 r3.x, r1.x
-max.f r0.y, (neg)r0.y, c17.z
-mov.f32f32 r3.y, r0.z
-mov.f32f32 r0.z, r1.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyz)r3.z, r2.w, s#4, t#4
-(sy)mad.f32 r1.x, r4.x, r2.z, r5.w
-mad.f32 r1.y, r3.w, r2.y, r5.z
-mad.f32 r2.x, r3.z, r2.x, r5.y
-mov.f32f32 r3.z, r0.z
-mov.f32f32 r0.z, r1.x
-mov.f32f32 r1.x, r1.y
-mov.f32f32 r1.y, r2.x
-log2 r0.y, r0.y
-(ss)mul.f r0.y, r6.w, r0.y
-mul.f r2.x, r0.z, c5.z
-mul.f r2.y, r1.x, c5.y
-mul.f r2.z, r1.y, c5.x
-mul.f r0.z, r0.z, c4.z
-mul.f r2.x, r2.x, r0.x
mul.f r2.y, r2.y, r0.x
mul.f r0.x, r2.z, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r0.x, r0.x
-mul.f r1.x, r1.x, c4.y
-mul.f r1.y, r1.y, c4.x
-(rpt1)nop
-exp2 r0.y, r0.y
-(ss)mad.f32 r2.x, r6.z, r0.y, r2.x
-mad.f32 r0.w, r0.w, r0.y, r2.y
-mad.f32 r0.x, r1.z, r0.y, r0.x
-(ss)mov.f32f32 r0.y, r1.x
-mov.f32f32 r1.x, r2.x
+(ss)mad.f32 r2.y, r3.w, r0.y, r2.y
+mul.f r0.z, r0.z, c4.z
+mad.f32 r0.x, r1.x, r0.y, r0.x
+mul.f r1.x, r2.w, r1.z
+mad.f32 r0.z, c16.z, r2.y, r0.z
mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.y, r1.y
-mad.f32 r0.z, c16.z, r1.x, r0.z
-sam (f32)(xyz)r2.x, r3.y, s#3, t#3
-mad.f32 r0.y, c16.y, r0.w, r0.y
-(sy)mad.f32 r0.z, c8.z, r2.z, r0.z
-mad.f32 r0.x, c16.x, r0.x, r1.y
-mad.f32 r0.y, c8.y, r2.y, r0.y
-mad.f32 r0.x, c8.x, r2.x, r0.x
-mov.f32f32 r0.z, r0.z
-(rpt2)nop
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
-end
+mad.f32 r1.z, c8.z, r4.z, r0.z
+mad.f32 r0.y, r1.y, r0.y, r1.x
+mov.f32f32 r0.z, r2.x
+mul.f r0.w, r0.w, c4.y
+nop
+mad.f32 r0.x, c16.y, r0.x, r0.w
+mul.f r0.z, r0.z, c4.x
+mad.f32 r1.y, c8.y, r4.y, r0.x
+mad.f32 r0.x, c16.x, r0.y, r0.z
nop
+mad.f32 r1.x, c8.x, r4.x, r0.x
+end
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1) r3.x (5:23,cm=f,il=20,b=1) r4.x (5:24,cm=f,il=24,b=1) r5.x (5:25,cm=f,il=28,b=1)
-; FRAG: 249 instructions, 0 half, 8 full
+; FRAG: 172 instructions, 0 half, 8 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-27.asm b/reference/xonotic-gl2/xonotic-glx-gl2-27.asm
index e0c9a79..a4f6c1a 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-27.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-27.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r2.w) in0
-@in(r3.x) in1
-@in(r3.y) in2
-@in(r3.z) in3
-@in(r3.w) in4
-@in(r4.x) in5
-@in(r4.y) in6
-@in(r4.z) in7
-@in(r7.x) in8
-@in(r7.y) in9
-@in(r7.z) in10
-@in(r7.w) in12
-@in(r8.x) in13
-@in(r8.y) in14
-@in(r4.w) in16
-@in(r5.x) in17
-@in(r5.y) in18
+@in(r5.w) in0
+@in(r6.x) in1
+@in(r6.y) in2
+@in(r6.z) in3
+@in(r6.w) in4
+@in(r7.x) in5
+@in(r7.y) in6
+@in(r7.z) in7
+@in(r3.x) in8
+@in(r3.y) in9
+@in(r3.z) in10
+@in(r4.x) in12
+@in(r4.y) in13
+@in(r4.z) in14
+@in(r5.x) in16
+@in(r5.y) in17
+@in(r5.z) in18
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -45,123 +45,65 @@
@out(r6.y) out25
@out(r6.z) out26
@out(r6.w) out27
-(sy)(ss)mul.f r0.x, c5.x, r4.w
-mul.f r0.y, c5.x, r7.w
-mad.f32 r0.x, c5.y, r5.x, r0.x
-mad.f32 r0.y, c5.y, r8.x, r0.y
-add.f r0.z, c4.x, (neg)r2.w
-mul.f r0.w, c5.x, r7.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, c5.z, r5.y, r0.x
-mad.f32 r0.y, c5.z, r8.y, r0.y
-mul.f r1.x, r0.z, r4.w
-mad.f32 r0.w, c5.y, r7.y, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-add.f r1.y, c4.y, (neg)r3.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mad.f32 r1.x, r1.y, r5.x, r1.x
-mad.f32 r0.w, c5.z, r7.z, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r6.z, r0.x
-mov.f32f32 r6.y, r0.y
-add.f r0.x, c4.z, (neg)r3.y
-mov.f32f32 r0.y, r0.w
-mul.f r0.w, r0.z, r7.w
-mul.f r1.z, c6.y, r3.w
-mad.f32 r1.x, r0.x, r5.y, r1.x
-mov.f32f32 r6.x, r0.y
-mad.f32 r0.y, r1.y, r8.x, r0.w
-mad.f32 r0.w, c7.y, r4.x, r1.z
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.w, c8.y, r4.y, r0.w
-mul.f r1.z, c6.x, r3.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mad.f32 r0.y, r0.x, r8.y, r0.y
-mad.f32 r0.w, c9.y, r4.z, r0.w
-mad.f32 r1.z, c7.x, r4.x, r1.z
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c8.x, r4.y, r1.z
-mul.f r0.z, r0.z, r7.x
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r0.y, r0.w
-mad.f32 r0.w, c9.x, r4.z, r1.x
-mad.f32 r0.z, r1.y, r7.y, r0.z
-mul.f r1.x, c0.w, r2.w
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r0.y, r0.w
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.w, c1.w, r3.x, r1.x
-mul.f r1.z, c0.z, r2.w
-mov.f32f32 r0.y, r0.y
-mad.f32 r0.x, r0.x, r7.z, r0.z
-(rpt1)nop
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r3.y, r0.w
-mad.f32 r0.y, c1.z, r3.x, r1.z
-mad.f32 r0.x, c3.w, r3.z, r0.x
-mad.f32 r0.y, c2.z, r3.y, r0.y
-mul.f r0.z, c0.y, r2.w
-mul.f r1.z, c0.x, r2.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r3.z, r0.y
-mad.f32 r0.y, c1.y, r3.x, r0.z
-mad.f32 r1.z, c1.x, r3.x, r1.z
-mul.f r1.w, c6.w, r3.w
-mov.f32f32 r0.z, r0.x
-mad.f32 r0.x, c2.y, r3.y, r0.y
-mad.f32 r0.y, c2.x, r3.y, r1.z
-mad.f32 r0.x, c3.y, r3.z, r0.x
-mad.f32 r1.z, c3.x, r3.z, r0.y
-mad.f32 r1.w, c7.w, r4.x, r1.w
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r5.w
+mul.f r0.y, c6.y, r6.w
+mul.f r0.z, c6.x, r6.w
+mul.f r0.w, c0.w, r5.w
+mul.f r1.x, r0.x, r5.x
+add.f r1.y, c4.y, (neg)r6.x
+mul.f r1.z, r0.x, r4.x
+mul.f r0.x, r0.x, r3.x
+mad.f32 r0.y, c7.y, r7.x, r0.y
+mad.f32 r1.x, r1.y, r5.y, r1.x
+add.f r1.w, c4.z, (neg)r6.y
+mad.f32 r1.z, r1.y, r4.y, r1.z
+mad.f32 r0.x, r1.y, r3.y, r0.x
+nop
+mad.f32 r2.z, r1.w, r5.z, r1.x
+mad.f32 r2.y, r1.w, r4.z, r1.z
+mad.f32 r2.x, r1.w, r3.z, r0.x
+mad.f32 r0.x, c8.y, r7.y, r0.y
+mad.f32 r0.y, c7.x, r7.x, r0.z
+mad.f32 r1.y, c9.y, r7.z, r0.x
+mad.f32 r0.x, c8.x, r7.y, r0.y
+mad.f32 r0.y, c1.w, r6.x, r0.w
+mad.f32 r1.x, c9.x, r7.z, r0.x
+mad.f32 r0.x, c2.w, r6.y, r0.y
+mul.f r0.y, c0.z, r5.w
+mad.f32 r0.w, c3.w, r6.z, r0.x
+mad.f32 r0.x, c1.z, r6.x, r0.y
+mul.f r0.y, c0.y, r5.w
+mad.f32 r0.x, c2.z, r6.y, r0.x
+mad.f32 r0.y, c1.y, r6.x, r0.y
+mad.f32 r0.z, c3.z, r6.z, r0.x
+mad.f32 r0.x, c2.y, r6.y, r0.y
+mul.f r1.z, c0.x, r5.w
+mad.f32 r0.y, c3.y, r6.z, r0.x
+mad.f32 r0.x, c1.x, r6.x, r1.z
+mul.f r1.z, c6.w, r6.w
+mad.f32 r0.x, c2.x, r6.y, r0.x
+mad.f32 r1.z, c7.w, r7.x, r1.z
+mad.f32 r0.x, c3.x, r6.z, r0.x
+mad.f32 r6.w, c8.w, r7.y, r1.z
+mul.f r1.z, c5.x, r5.x
+mul.f r1.w, c5.x, r4.x
+mad.f32 r1.z, c5.y, r5.y, r1.z
+mad.f32 r1.w, c5.y, r4.y, r1.w
+mad.f32 r6.z, c5.z, r5.z, r1.z
+mad.f32 r6.y, c5.z, r4.z, r1.w
+mul.f r1.z, c5.x, r3.x
+mov.f32f32 r5.w, c10.x
+mad.f32 r1.z, c5.y, r3.y, r1.z
+mov.f32f32 r4.w, c10.x
+mad.f32 r6.x, c5.z, r3.z, r1.z
+mov.f32f32 r3.w, c10.x
mov.f32f32 r2.w, c10.x
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
-mad.f32 r1.z, c8.w, r4.y, r1.w
-mov.f32f32 r5.w, r2.w
-mov.f32f32 r1.w, r5.y
-mov.f32f32 r2.w, r5.x
-mov.f32f32 r6.w, r1.z
-mov.f32f32 r1.z, r4.w
-mov.f32f32 r5.z, r1.w
-mov.f32f32 r5.y, r2.w
-mov.f32f32 r1.w, c10.x
-mov.f32f32 r5.x, r1.z
-mov.f32f32 r1.z, r8.y
-mov.f32f32 r2.w, r8.x
-mov.f32f32 r4.w, r1.w
-mov.f32f32 r1.w, r7.w
-mov.f32f32 r4.z, r1.z
-mov.f32f32 r4.y, r2.w
-mov.f32f32 r1.z, c10.x
-mov.f32f32 r4.x, r1.w
-mov.f32f32 r1.w, r7.z
-mov.f32f32 r2.w, r7.y
-mov.f32f32 r3.w, r1.z
-mov.f32f32 r1.z, r7.x
-mov.f32f32 r3.z, r1.w
-mov.f32f32 r3.y, r2.w
mov.f32f32 r1.w, c10.x
-mov.f32f32 r3.x, r1.z
mov.f32f32 r1.z, c10.x
-mov.f32f32 r7.x, c10.x
-mov.f32f32 r2.w, r1.w
-nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r1.z, r7.x
end
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22) r4.x (5:23) r5.x (5:24) r6.x (5:25)
-; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r7.x (0:0,cm=7,il=16,b=0) r7.w (0:0,cm=7,il=20,b=0) r4.w (0:0,cm=7,il=24,b=0)
-; VERT: 116 instructions, 0 half, 9 full
+; VERT: inputs: r5.w (0:0,cm=f,il=8,b=0) r6.w (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=7,il=16,b=0) r4.x (0:0,cm=7,il=20,b=0) r5.x (0:0,cm=7,il=24,b=0)
+; VERT: 56 instructions, 0 half, 8 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-28.asm b/reference/xonotic-gl2/xonotic-glx-gl2-28.asm
index be69eca..8941a0d 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-28.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-28.asm
@@ -6,175 +6,117 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c11.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000
+@const(c12.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
bary.f r1.x, 8, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.y, 4, r0.x
+bary.f r0.w, 1, r0.x
mov.f32f32 r1.z, r0.z
-mul.f r1.w, r0.w, r0.w
-bary.f r2.x, 5, r0.x
-mul.f r2.y, r1.x, r1.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mad.f32 r1.w, r2.x, r2.x, r1.w
-bary.f r3.x, 9, r0.x
+mul.f r2.x, r1.x, r1.x
+bary.f r2.y, 9, r0.x
+mov.f32f32 r1.w, r0.w
+mul.f r2.z, r1.y, r1.y
+bary.f r2.w, 5, r0.x
+mad.f32 r2.x, r2.y, r2.y, r2.x
+bary.f r3.x, 10, r0.x
mov.f32f32 r3.y, r0.z
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r1.w
-bary.f r1.w, 6, r0.x
-mad.f32 r2.y, r3.x, r3.x, r2.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r1.y
+mov.f32f32 r3.z, r0.w
+sam (f32)(xyz)r3.w, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, r3.w, c11.x
+mad.f32 r1.w, r3.x, r3.x, r2.x
+mad.f32 r2.x, r2.w, r2.w, r2.z
+bary.f (ei)r0.x, 6, r0.x
+mov.f32f32 r0.y, r1.z
+add.f r2.z, r4.x, c11.x
+add.f r3.w, r4.y, c11.x
+sam (f32)(xyzw)r4.x, r3.y, s#1, t#1
+mad.f32 r2.x, r0.x, r0.x, r2.x
+mul.f r1.z, r1.z, r0.y
+(ss)mov.f32f32 r3.y, r2.z
+rsq r1.w, r1.w
+(ss)mov.f32f32 r3.z, r1.w
+mul.f r3.x, r3.x, r1.w
+(ss)mov.f32f32 r1.w, r3.w
+mad.f32 r1.z, r2.z, r3.y, r1.z
+mul.f r1.x, r1.x, r3.z
+mul.f r2.y, r2.y, r3.z
mad.f32 r1.z, r1.w, r1.w, r1.z
-sam (f32)(xyz)r3.w, r2.z, s#0, t#0
-(sy)(ss)add.f r2.z, r3.w, c11.x
-mov.f32f32 r2.y, r2.y
-bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r0.y, r2.z
-mov.f32f32 r2.z, r0.z
+rsq r1.w, r2.x
+(ss)mov.f32f32 r2.x, r1.w
+mov.f32f32 r2.z, r3.x
+mul.f r0.x, r0.x, r1.w
+mov.f32f32 r3.z, r1.x
+mov.f32f32 r5.x, r2.y
+mul.f r1.y, r1.y, r2.x
rsq r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mad.f32 r2.y, r0.x, r0.x, r2.y
-mul.f r2.w, r0.y, r0.y
-add.f r3.w, r4.x, c11.x
-mul.f r0.w, r0.w, r1.z
-mul.f r2.x, r2.x, r1.z
-mul.f r1.z, r1.w, r1.z
-mov.f32f32 r1.w, r3.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.w, r1.w, r1.w, r2.w
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-sam (f32)(xyzw)r3.y, r3.y, s#1, t#1
-(sy)mul.f r4.x, r4.x, c9.x
-mul.f r4.z, r3.w, c4.z
-mov.f32f32 r2.w, r2.w
-add.f r4.y, r4.y, c11.x
-mul.f r1.x, r1.x, r2.y
-mul.f r3.x, r3.x, r2.y
-mul.f r0.x, r0.x, r2.y
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.w, r2.y, r2.y, r2.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r4.y, r4.z
-mov.f32f32 r4.z, r2.z
-mul.f r2.z, r3.w, c5.z
-mov.f32f32 r3.w, r4.x
-nop
-rsq r2.w, r2.w
-(ss)mov.f32f32 r2.w, r2.w
-mul.f r4.x, r3.z, c4.y
-mul.f r4.w, r3.y, c4.x
-mov.f32f32 r5.x, r1.y
-mul.f r0.y, r0.y, r2.w
-mul.f r1.w, r1.w, r2.w
-mul.f r2.y, r2.y, r2.w
-nop
+(ss)mov.f32f32 r5.y, r1.z
+(ss)mul.f r1.z, r3.w, r1.z
+mul.f r2.x, r2.w, r2.x
+(sy)mul.f r1.w, r4.w, c9.x
+mul.f r0.y, r0.y, r5.y
+mul.f r2.w, r3.y, r5.y
+mov.f32f32 r3.y, r1.z
+mul.f r3.w, r4.z, c4.z
+mul.f r1.x, r0.y, r1.x
mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r3.w
-mul.f r3.w, r0.y, r1.x
-mul.f r5.y, r0.y, r1.x
-mad.f32 r3.w, r2.w, r3.x, r3.w
-mad.f32 r5.y, r2.w, r3.x, r5.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r5.z, r4.w
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.w, r5.y
-mad.f32 r3.w, r2.y, r0.x, r3.w
-mad.f32 r5.y, r2.y, r0.x, r4.w
-mov.f32f32 r4.w, r5.x
-mul.f r3.z, r3.z, c5.y
-mul.f r0.y, r3.w, r0.y
-mul.f r2.w, r3.w, r2.w
-mul.f r2.y, r3.w, r2.y
-mov.f32f32 r3.w, r5.y
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.y, r2.y
-max.f r3.w, r3.w, c11.z
+mad.f32 r1.x, r2.w, r2.y, r1.x
+mov.f32f32 r2.y, r2.w
+mad.f32 r1.x, r1.z, r2.z, r1.x
+mul.f r1.z, r0.y, r3.z
+mul.f r2.w, r4.z, c5.z
+mov.f32f32 r4.z, r0.z
+mul.f r0.y, r1.x, r0.y
+mad.f32 r1.z, r2.y, r5.x, r1.z
+mul.f r2.y, r1.x, r2.y
+mul.f r1.x, r1.x, r3.y
mul.f r0.y, c11.y, r0.y
-mul.f r2.w, c11.y, r2.w
+mad.f32 r1.z, r3.y, r2.z, r1.z
mul.f r2.y, c11.y, r2.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r2.y, r2.y
-mul.f r2.z, r2.z, r3.w
-add.f r0.y, r1.x, (neg)r0.y
-add.f r1.x, r3.x, (neg)r2.w
-add.f r0.x, r0.x, (neg)r2.y
-mov.f32f32 r2.y, r2.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.x, r0.x
-mul.f r2.z, r3.z, r3.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.w, r3.y, c5.x
-mad.f32 r0.y, r1.x, r2.x, r0.y
-mov.f32f32 r1.x, r2.z
-sam (f32)(xyzw)r2.z, r4.z, s#2, t#2
-(sy)mad.f32 r2.x, c7.x, r3.y, c11.w
-mul.f r3.x, r3.x, c6.z
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r3.w
-mad.f32 r0.x, r0.x, r1.z, r0.y
-mov.f32f32 r0.y, r2.x
-mul.f r1.z, r2.w, c6.y
-mul.f r2.x, r2.z, c6.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r1.y, r1.y
+mul.f r1.x, c11.y, r1.x
+add.f r0.y, r3.z, (neg)r0.y
+max.f r1.z, r1.z, c11.z
+add.f r2.y, r5.x, (neg)r2.y
+mul.f r2.z, r4.x, c5.x
+mul.f r0.y, r0.y, r1.y
+mov.f32f32 r1.y, r1.z
+mad.f32 r0.y, r2.y, r2.x, r0.y
+add.f r1.x, r3.x, (neg)r1.x
+mul.f r1.z, r2.z, r1.z
+mov.f32f32 r4.w, r0.w
+mul.f r2.x, r2.w, r1.y
+mad.f32 r0.x, r1.x, r0.x, r0.y
+mul.f r0.y, r4.y, c5.y
+mul.f r1.x, r4.y, c4.y
+mul.f r2.y, r4.x, c4.x
max.f r0.x, (neg)r0.x, c11.z
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r2.w, r1.y
-(rpt3)nop
+mul.f r0.y, r0.y, r1.y
+sam (f32)(xyzw)r2.z, r4.z, s#2, t#2
+(sy)mul.f r1.y, r3.x, c6.z
+mul.f r2.w, r2.w, c6.y
+mad.f32 r3.x, c7.x, r3.y, c11.w
+mul.f r2.z, r2.z, c6.x
+(ss)nop
+sam (f32)(xyz)r4.x, r0.z, s#3, t#3
+nop
log2 r0.x, r0.x
-(ss)mul.f r0.x, r0.y, r0.x
-(rpt1)nop
-sam (f32)(xyz)r4.z, r2.z, s#3, t#3
nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.x, r3.x, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mad.f32 r0.y, r3.x, r0.x, r2.y
-mad.f32 r0.z, r1.z, r0.x, r1.x
-(ss)mad.f32 r0.x, r2.x, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mad.f32 r0.y, c10.z, r0.y, r4.y
-mad.f32 r0.z, c10.y, r0.z, r4.x
-(sy)mad.f32 r0.y, c8.z, r5.x, r0.y
-mad.f32 r0.z, c8.y, r4.w, r0.z
-mad.f32 r0.x, c10.x, r0.x, r5.z
+(ss)mad.f32 r0.z, r1.y, r0.x, r2.x
+mad.f32 r0.y, r2.w, r0.x, r0.y
+(ss)mad.f32 r0.x, r2.z, r0.x, r1.z
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.x, c8.x, r4.z, r0.x
+mad.f32 r0.z, c10.z, r0.z, r3.w
+mad.f32 r0.y, c10.y, r0.y, r1.x
+(sy)mad.f32 r1.z, c8.z, r4.z, r0.z
+mad.f32 r1.y, c8.y, r4.y, r0.y
+mad.f32 r0.x, c10.x, r0.x, r2.y
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.x, c8.x, r4.x, r0.x
end
-nop
-nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1)
-; FRAG: 178 instructions, 0 half, 6 full
+; FRAG: 113 instructions, 0 half, 6 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-29.asm b/reference/xonotic-gl2/xonotic-glx-gl2-29.asm
index 9d9cf0a..a1e4514 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-29.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-29.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r3.w) in0
-@in(r4.x) in1
-@in(r4.y) in2
-@in(r4.z) in3
-@in(r4.w) in4
-@in(r5.x) in5
-@in(r5.y) in6
-@in(r5.z) in7
-@in(r0.x) in8
-@in(r0.y) in9
-@in(r0.z) in10
-@in(r0.w) in12
-@in(r1.x) in13
-@in(r1.y) in14
-@in(r1.z) in16
-@in(r1.w) in17
-@in(r2.x) in18
+@in(r2.w) in0
+@in(r3.x) in1
+@in(r3.y) in2
+@in(r3.z) in3
+@in(r3.w) in4
+@in(r4.x) in5
+@in(r4.y) in6
+@in(r4.z) in7
+@in(r4.w) in8
+@in(r5.x) in9
+@in(r5.y) in10
+@in(r5.z) in12
+@in(r5.w) in13
+@in(r6.x) in14
+@in(r6.y) in16
+@in(r6.z) in17
+@in(r6.w) in18
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -33,99 +33,65 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r2.y, c5.x, r1.z
-mul.f r2.z, c5.x, r0.w
-mad.f32 r2.y, c5.y, r1.w, r2.y
-mad.f32 r2.z, c5.y, r1.x, r2.z
-add.f r2.w, c4.x, (neg)r3.w
-mul.f r3.x, c5.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r2.y, c5.z, r2.x, r2.y
-mad.f32 r2.z, c5.z, r1.y, r2.z
-mul.f r1.z, r2.w, r1.z
-mad.f32 r3.x, c5.y, r0.y, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-add.f r5.w, c4.y, (neg)r4.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r1.z, r5.w, r1.w, r1.z
-mad.f32 r1.w, c5.z, r0.z, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.z, r2.y
-mov.f32f32 r3.y, r2.z
-add.f r6.x, c4.z, (neg)r4.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r2.w, r0.w
-mul.f r2.y, c6.y, r4.w
-mad.f32 r1.z, r6.x, r2.x, r1.z
-mov.f32f32 r3.x, r1.w
-mad.f32 r0.w, r5.w, r1.x, r0.w
-mad.f32 r1.x, c7.y, r5.x, r2.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.x, c8.y, r5.y, r1.x
-mul.f r1.w, c6.x, r4.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, r6.x, r1.y, r0.w
-mad.f32 r1.x, c9.y, r5.z, r1.x
-mad.f32 r1.y, c7.x, r5.x, r1.w
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mad.f32 r1.y, c8.x, r5.y, r1.y
-mul.f r0.x, r2.w, r0.x
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r1.x, c9.x, r5.z, r1.y
-mad.f32 r0.x, r5.w, r0.y, r0.x
-mul.f r0.y, c0.w, r3.w
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r4.x, r0.y
-mul.f r1.z, c0.z, r3.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, r6.x, r0.z, r0.x
-(rpt1)nop
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r4.y, r0.y
-mad.f32 r0.y, c1.z, r4.x, r1.z
-mad.f32 r0.x, c3.w, r4.z, r0.x
-mad.f32 r0.y, c2.z, r4.y, r0.y
-mul.f r0.z, c0.y, r3.w
-mul.f r1.z, c0.x, r3.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r4.z, r0.y
-mad.f32 r0.y, c1.y, r4.x, r0.z
-mad.f32 r1.z, c1.x, r4.x, r1.z
-mul.f r1.w, c6.w, r4.w
-mov.f32f32 r0.z, r0.x
-mad.f32 r0.x, c2.y, r4.y, r0.y
-mad.f32 r0.y, c2.x, r4.y, r1.z
-mad.f32 r0.x, c3.y, r4.z, r0.x
-mad.f32 r1.z, c3.x, r4.z, r0.y
-mad.f32 r1.w, c7.w, r5.x, r1.w
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
+mul.f r0.y, c6.y, r3.w
+mul.f r0.z, c6.x, r3.w
+mul.f r0.w, c0.w, r2.w
+mul.f r1.x, r0.x, r6.y
+add.f r1.y, c4.y, (neg)r3.x
+mul.f r1.z, r0.x, r5.z
+mul.f r0.x, r0.x, r4.w
+mad.f32 r0.y, c7.y, r4.x, r0.y
+mad.f32 r1.x, r1.y, r6.z, r1.x
+add.f r1.w, c4.z, (neg)r3.y
+mad.f32 r1.z, r1.y, r5.w, r1.z
+mad.f32 r0.x, r1.y, r5.x, r0.x
+nop
+mad.f32 r2.z, r1.w, r6.w, r1.x
+mad.f32 r2.y, r1.w, r6.x, r1.z
+mad.f32 r2.x, r1.w, r5.y, r0.x
+mad.f32 r0.x, c8.y, r4.y, r0.y
+mad.f32 r0.y, c7.x, r4.x, r0.z
+mad.f32 r1.y, c9.y, r4.z, r0.x
+mad.f32 r0.x, c8.x, r4.y, r0.y
+mad.f32 r0.y, c1.w, r3.x, r0.w
+mad.f32 r1.x, c9.x, r4.z, r0.x
+mad.f32 r0.x, c2.w, r3.y, r0.y
+mul.f r0.y, c0.z, r2.w
+mad.f32 r0.w, c3.w, r3.z, r0.x
+mad.f32 r0.x, c1.z, r3.x, r0.y
+mul.f r0.y, c0.y, r2.w
+mad.f32 r0.x, c2.z, r3.y, r0.x
+mad.f32 r0.y, c1.y, r3.x, r0.y
+mad.f32 r0.z, c3.z, r3.z, r0.x
+mad.f32 r0.x, c2.y, r3.y, r0.y
+mul.f r1.z, c0.x, r2.w
+mad.f32 r0.y, c3.y, r3.z, r0.x
+mad.f32 r0.x, c1.x, r3.x, r1.z
+mul.f r1.z, c6.w, r3.w
+mad.f32 r0.x, c2.x, r3.y, r0.x
+mad.f32 r1.z, c7.w, r4.x, r1.z
+mad.f32 r0.x, c3.x, r3.z, r0.x
+mad.f32 r3.w, c8.w, r4.y, r1.z
+mul.f r1.z, c5.x, r6.y
+mul.f r1.w, c5.x, r5.z
+mad.f32 r1.z, c5.y, r6.z, r1.z
+mad.f32 r1.w, c5.y, r5.w, r1.w
+mad.f32 r3.z, c5.z, r6.w, r1.z
+mad.f32 r3.y, c5.z, r6.x, r1.w
+mul.f r1.z, c5.x, r4.w
mov.f32f32 r2.w, c10.x
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
-mad.f32 r1.z, c8.w, r5.y, r1.w
-mov.f32f32 r2.w, r2.w
+mad.f32 r1.z, c5.y, r5.x, r1.z
mov.f32f32 r1.w, c10.x
-mov.f32f32 r4.x, c10.x
-mov.f32f32 r3.w, r1.z
-nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r4.x
+mad.f32 r3.x, c5.z, r5.y, r1.z
+mov.f32f32 r1.z, c10.x
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22)
-; VERT: inputs: r3.w (0:0,cm=f,il=8,b=0) r4.w (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r0.w (0:0,cm=7,il=20,b=0) r1.z (0:0,cm=7,il=24,b=0)
-; VERT: 92 instructions, 0 half, 7 full
+; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r4.w (0:0,cm=7,il=16,b=0) r5.z (0:0,cm=7,il=20,b=0) r6.y (0:0,cm=7,il=24,b=0)
+; VERT: 53 instructions, 0 half, 7 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-30.asm b/reference/xonotic-gl2/xonotic-glx-gl2-30.asm
index d225560..45395a7 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-30.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-30.asm
@@ -6,39 +6,28 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.z, r0.x
-(rpt4)nop
-sam (f32)(xyzw)r0.w, r1.x, s#0, t#0
-(sy)mul.f r0.x, r1.z, c4.x
-(ss)mul.f r1.y, r1.y, c2.z
-mul.f r1.x, r1.x, c2.y
-mul.f r0.w, r0.w, c2.x
-mov.f32f32 r0.x, r0.x
-sam (f32)(xyz)r1.z, r0.y, s#1, t#1
-(sy)(ss)mad.f32 r0.y, c3.x, r1.z, r0.w
-mad.f32 r0.z, c3.z, r2.x, r1.y
-mad.f32 r0.w, c3.y, r1.w, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.w, r0.x
-mov.f32f32 r1.x, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
+mov.f32f32 r0.x, r0.z
+mov.f32f32 r0.y, r0.w
+(rpt1)nop
+sam (f32)(xyz)r2.x, r0.z, s#1, t#1
+(rpt3)nop
+(ss)nop
+sam (f32)(xyzw)r0.x, r0.x, s#0, t#0
+(sy)mul.f r0.z, r0.z, c2.z
+(ss)mul.f r0.y, r0.y, c2.y
+mad.f32 r1.z, c3.z, r2.z, r0.z
+mad.f32 r1.y, c3.y, r2.y, r0.y
+mul.f r0.x, r0.x, c2.x
+mul.f r1.w, r0.w, c4.x
+mad.f32 r1.x, c3.x, r2.x, r0.x
end
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 36 instructions, 0 half, 3 full
+; FRAG: 23 instructions, 0 half, 3 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-31.asm b/reference/xonotic-gl2/xonotic-glx-gl2-31.asm
index 25893e0..b0eaf15 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-31.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-31.asm
@@ -16,51 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r1.x, c4.y, r0.x
mul.f r0.x, c4.x, r0.x
mad.f32 r1.x, c5.y, r0.y, r1.x
mad.f32 r0.x, c5.x, r0.y, r0.x
mad.f32 r0.y, c6.y, r0.z, r1.x
mad.f32 r0.x, c6.x, r0.z, r0.x
-mad.f32 r0.y, c7.y, r0.w, r0.y
-mad.f32 r0.x, c7.x, r0.w, r0.x
-mul.f r0.z, c0.w, r1.z
-mul.f r0.w, c0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c1.w, r1.w, r0.z
-mad.f32 r0.w, c1.z, r1.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, c2.w, r2.x, r0.z
-mad.f32 r0.y, c2.z, r2.x, r0.w
-mad.f32 r0.x, c3.w, r2.y, r0.x
-mad.f32 r0.y, c3.z, r2.y, r0.y
-mul.f r2.z, c0.y, r1.z
-mul.f r1.z, c0.x, r1.z
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.x, c1.y, r1.w, r2.z
-mad.f32 r0.y, c1.x, r1.w, r1.z
+mad.f32 r1.y, c7.y, r0.w, r0.y
+mad.f32 r1.x, c7.x, r0.w, r0.x
+mul.f r0.x, c0.w, r1.z
+mul.f r0.y, c0.z, r1.z
+mad.f32 r0.x, c1.w, r1.w, r0.x
+mad.f32 r0.y, c1.z, r1.w, r0.y
+mad.f32 r0.x, c2.w, r2.x, r0.x
+mad.f32 r0.y, c2.z, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.y, r0.x
+mad.f32 r0.z, c3.z, r2.y, r0.y
+mul.f r0.x, c0.y, r1.z
+mul.f r0.y, c0.x, r1.z
+mad.f32 r0.x, c1.y, r1.w, r0.x
+mad.f32 r0.y, c1.x, r1.w, r0.y
mad.f32 r0.x, c2.y, r2.x, r0.x
-mad.f32 r0.y, c2.x, r2.x, r0.y
-mad.f32 r0.x, c3.y, r2.y, r0.x
-mad.f32 r1.z, c3.x, r2.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
+mad.f32 r1.z, c2.x, r2.x, r0.y
+mad.f32 r0.y, c3.y, r2.y, r0.x
+mad.f32 r0.x, c3.x, r2.y, r1.z
+mov.f32f32 r1.w, c8.x
mov.f32f32 r1.z, c8.x
-mov.f32f32 r2.x, c8.x
-(rpt1)nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r1.z, r2.x
end
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 45 instructions, 0 half, 3 full
+; VERT: 27 instructions, 0 half, 3 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-32.asm b/reference/xonotic-gl2/xonotic-glx-gl2-32.asm
index 8c54389..3b778fa 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-32.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-32.asm
@@ -6,31 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c3.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f (ei)r0.x, 1, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.x
+bary.f (ei)r0.w, 1, r0.x
(rpt5)nop
-sam (f32)(xyzw)r0.x, r0.y, s#0, t#0
-(sy)mul.f r0.w, r0.w, c2.x
-(ss)mul.f r0.z, r0.z, c1.z
-mul.f r0.y, r0.y, c1.y
-mul.f r0.x, r0.x, c1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r0.x, r0.w
-(rpt2)nop
-mov.f32f32 r1.w, r0.x
+sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
+(sy)mul.f r1.w, r0.w, c2.x
+mul.f r1.z, r0.z, c1.z
+mul.f r1.y, r0.y, c1.y
+mul.f r1.x, r0.x, c1.x
end
nop
nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1)
-; FRAG: 31 instructions, 0 half, 2 full
+; FRAG: 14 instructions, 0 half, 2 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-33.asm b/reference/xonotic-gl2/xonotic-glx-gl2-33.asm
index 25893e0..b0eaf15 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-33.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-33.asm
@@ -16,51 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
+@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r1.x, c4.y, r0.x
mul.f r0.x, c4.x, r0.x
mad.f32 r1.x, c5.y, r0.y, r1.x
mad.f32 r0.x, c5.x, r0.y, r0.x
mad.f32 r0.y, c6.y, r0.z, r1.x
mad.f32 r0.x, c6.x, r0.z, r0.x
-mad.f32 r0.y, c7.y, r0.w, r0.y
-mad.f32 r0.x, c7.x, r0.w, r0.x
-mul.f r0.z, c0.w, r1.z
-mul.f r0.w, c0.z, r1.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c1.w, r1.w, r0.z
-mad.f32 r0.w, c1.z, r1.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
-mad.f32 r0.x, c2.w, r2.x, r0.z
-mad.f32 r0.y, c2.z, r2.x, r0.w
-mad.f32 r0.x, c3.w, r2.y, r0.x
-mad.f32 r0.y, c3.z, r2.y, r0.y
-mul.f r2.z, c0.y, r1.z
-mul.f r1.z, c0.x, r1.z
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.x, c1.y, r1.w, r2.z
-mad.f32 r0.y, c1.x, r1.w, r1.z
+mad.f32 r1.y, c7.y, r0.w, r0.y
+mad.f32 r1.x, c7.x, r0.w, r0.x
+mul.f r0.x, c0.w, r1.z
+mul.f r0.y, c0.z, r1.z
+mad.f32 r0.x, c1.w, r1.w, r0.x
+mad.f32 r0.y, c1.z, r1.w, r0.y
+mad.f32 r0.x, c2.w, r2.x, r0.x
+mad.f32 r0.y, c2.z, r2.x, r0.y
+mad.f32 r0.w, c3.w, r2.y, r0.x
+mad.f32 r0.z, c3.z, r2.y, r0.y
+mul.f r0.x, c0.y, r1.z
+mul.f r0.y, c0.x, r1.z
+mad.f32 r0.x, c1.y, r1.w, r0.x
+mad.f32 r0.y, c1.x, r1.w, r0.y
mad.f32 r0.x, c2.y, r2.x, r0.x
-mad.f32 r0.y, c2.x, r2.x, r0.y
-mad.f32 r0.x, c3.y, r2.y, r0.x
-mad.f32 r1.z, c3.x, r2.y, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
+mad.f32 r1.z, c2.x, r2.x, r0.y
+mad.f32 r0.y, c3.y, r2.y, r0.x
+mad.f32 r0.x, c3.x, r2.y, r1.z
+mov.f32f32 r1.w, c8.x
mov.f32f32 r1.z, c8.x
-mov.f32f32 r2.x, c8.x
-(rpt1)nop
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r1.z, r2.x
end
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20)
; VERT: inputs: r1.z (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 45 instructions, 0 half, 3 full
+; VERT: 27 instructions, 0 half, 3 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-34.asm b/reference/xonotic-gl2/xonotic-glx-gl2-34.asm
index f0423b2..50c6e03 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-34.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-34.asm
@@ -6,35 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c4.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
bary.f r0.w, 5, r0.x
bary.f r1.x, 2, r0.x
bary.f r1.y, 1, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r1.x, c2.z, r1.x, c1.z
-mad.f32 r1.y, c2.y, r1.y, c1.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.w
bary.f (ei)r0.x, 0, r0.x
-(rpt4)nop
-sam (f32)(xyzw)r1.w, r1.z, s#0, t#0
-(sy)mul.f r0.y, r2.z, c3.x
-mul.f r0.z, r2.y, r1.x
-mul.f r0.w, r2.x, r1.y
-mad.f32 r0.x, c2.x, r0.x, c1.x
-mov.f32f32 r0.y, r0.y
-(ss)mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mul.f r0.x, r1.w, r0.x
-mov.f32f32 r0.y, r0.y
(rpt2)nop
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.x, r0.x
+sam (f32)(xyzw)r2.x, r0.z, s#0, t#0
+mad.f32 r0.y, c2.z, r1.x, c1.z
+(ss)mad.f32 r0.z, c2.y, r1.y, c1.y
+mad.f32 r0.x, c2.x, r0.x, c1.x
+(sy)mul.f r1.w, r2.w, c3.x
+mul.f r1.z, r2.z, r0.y
+mul.f r1.y, r2.y, r0.z
+mul.f r1.x, r2.x, r0.x
end
nop
-nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 32 instructions, 0 half, 3 full
+; FRAG: 17 instructions, 0 half, 3 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-35.asm b/reference/xonotic-gl2/xonotic-glx-gl2-35.asm
index 6f0d253..2fd4b2d 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-35.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-35.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
-@in(r3.x) in4
-@in(r3.y) in5
-@in(r3.z) in6
-@in(r3.w) in7
+@in(r2.z) in0
+@in(r2.w) in1
+@in(r3.x) in2
+@in(r3.y) in3
+@in(r1.x) in4
+@in(r1.y) in5
+@in(r1.z) in6
+@in(r1.w) in7
@in(r0.x) in8
@in(r0.y) in9
@in(r0.z) in10
@@ -24,55 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
+@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r2.x, c4.y, r0.x
mul.f r0.x, c4.x, r0.x
mad.f32 r2.x, c5.y, r0.y, r2.x
mad.f32 r0.x, c5.x, r0.y, r0.x
mad.f32 r0.y, c6.y, r0.z, r2.x
mad.f32 r0.x, c6.x, r0.z, r0.x
-mad.f32 r0.y, c7.y, r0.w, r0.y
-mad.f32 r0.x, c7.x, r0.w, r0.x
-mul.f r0.z, c0.w, r1.x
-mul.f r0.w, c0.z, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c1.w, r1.y, r0.z
-mad.f32 r0.w, c1.z, r1.y, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r1.z, r0.z
-mad.f32 r0.y, c2.z, r1.z, r0.w
-mad.f32 r0.x, c3.w, r1.w, r0.x
-mad.f32 r0.y, c3.z, r1.w, r0.y
-mul.f r2.z, c0.y, r1.x
-mul.f r1.x, c0.x, r1.x
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.x, c1.y, r1.y, r2.z
-mad.f32 r0.y, c1.x, r1.y, r1.x
-mad.f32 r0.x, c2.y, r1.z, r0.x
-mad.f32 r0.y, c2.x, r1.z, r0.y
-mad.f32 r0.x, c3.y, r1.w, r0.x
-mad.f32 r1.x, c3.x, r1.w, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.x
-mov.f32f32 r1.x, c8.x
-mov.f32f32 r1.y, c8.x
-(rpt1)nop
-mov.f32f32 r2.w, r1.x
-mov.f32f32 r2.z, r1.y
-mov.f32f32 r1.w, r3.w
-mov.f32f32 r1.z, r3.z
-mov.f32f32 r1.y, r3.y
-mov.f32f32 r1.x, r3.x
+mad.f32 r2.y, c7.y, r0.w, r0.y
+mad.f32 r2.x, c7.x, r0.w, r0.x
+mul.f r0.x, c0.w, r2.z
+mul.f r0.y, c0.z, r2.z
+mad.f32 r0.x, c1.w, r2.w, r0.x
+mad.f32 r0.y, c1.z, r2.w, r0.y
+mad.f32 r0.x, c2.w, r3.x, r0.x
+mad.f32 r0.y, c2.z, r3.x, r0.y
+mad.f32 r0.w, c3.w, r3.y, r0.x
+mad.f32 r0.z, c3.z, r3.y, r0.y
+mul.f r0.x, c0.y, r2.z
+mul.f r0.y, c0.x, r2.z
+mad.f32 r0.x, c1.y, r2.w, r0.x
+mad.f32 r0.y, c1.x, r2.w, r0.y
+mad.f32 r0.x, c2.y, r3.x, r0.x
+mad.f32 r2.z, c2.x, r3.x, r0.y
+mad.f32 r0.y, c3.y, r3.y, r0.x
+mad.f32 r0.x, c3.x, r3.y, r2.z
+mov.f32f32 r2.w, c8.x
+mov.f32f32 r2.z, c8.x
end
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0)
-; VERT: 49 instructions, 0 half, 4 full
+; VERT: inputs: r2.z (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0)
+; VERT: 27 instructions, 0 half, 4 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-36.asm b/reference/xonotic-gl2/xonotic-glx-gl2-36.asm
index b6d4a3c..2725ce9 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-36.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-36.asm
@@ -6,47 +6,32 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c6.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 5, r0.x
bary.f r1.x, 2, r0.x
+bary.f r0.w, 5, r0.x
bary.f r1.y, 1, r0.x
mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.z, r0.z
+mad.f32 r1.x, c3.z, r1.x, c2.z
mov.f32f32 r1.w, r0.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r1.z
-mov.f32f32 r2.z, r0.z
-mov.f32f32 r2.y, r1.w
-mov.f32f32 r2.w, r0.w
-mad.f32 r0.z, c3.z, r1.x, c2.z
-mad.f32 r0.w, c3.y, r1.y, c2.y
+mad.f32 r1.y, c3.y, r1.y, c2.y
bary.f (ei)r0.x, 0, r0.x
-(rpt1)nop
-sam (f32)(xyzw)r1.x, r2.x, s#0, t#0
-(sy)mul.f r0.y, r1.w, c5.x
-mul.f r0.z, r1.z, r0.z
-mul.f r0.w, r1.y, r0.w
+sam (f32)(xyz)r2.x, r0.z, s#1, t#1
+(rpt3)nop
+sam (f32)(xyzw)r2.w, r1.z, s#0, t#0
+(sy)mul.f r0.y, r3.y, r1.x
+(ss)mul.f r0.z, r3.x, r1.y
+mad.f32 r1.z, c4.z, r2.z, r0.y
+mad.f32 r1.y, c4.y, r2.y, r0.z
mad.f32 r0.x, c3.x, r0.x, c2.x
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyz)r1.y, r2.z, s#1, t#1
-(sy)mad.f32 r0.z, c4.z, r1.w, r0.z
-mad.f32 r0.w, c4.y, r1.z, r0.w
-mul.f r0.x, r1.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, c4.x, r1.y, r0.x
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
-end
-nop
+(rpt1)nop
+mul.f r1.w, r3.z, c5.x
+mul.f r0.x, r2.w, r0.x
nop
+mad.f32 r1.x, c4.x, r2.x, r0.x
+end
nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1)
-; FRAG: 40 instructions, 0 half, 3 full
+; FRAG: 27 instructions, 0 half, 4 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-37.asm b/reference/xonotic-gl2/xonotic-glx-gl2-37.asm
index 6f0d253..2fd4b2d 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-37.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-37.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r1.x) in0
-@in(r1.y) in1
-@in(r1.z) in2
-@in(r1.w) in3
-@in(r3.x) in4
-@in(r3.y) in5
-@in(r3.z) in6
-@in(r3.w) in7
+@in(r2.z) in0
+@in(r2.w) in1
+@in(r3.x) in2
+@in(r3.y) in3
+@in(r1.x) in4
+@in(r1.y) in5
+@in(r1.z) in6
+@in(r1.w) in7
@in(r0.x) in8
@in(r0.y) in9
@in(r0.z) in10
@@ -24,55 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
+@const(c8.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)mul.f r2.x, c4.y, r0.x
mul.f r0.x, c4.x, r0.x
mad.f32 r2.x, c5.y, r0.y, r2.x
mad.f32 r0.x, c5.x, r0.y, r0.x
mad.f32 r0.y, c6.y, r0.z, r2.x
mad.f32 r0.x, c6.x, r0.z, r0.x
-mad.f32 r0.y, c7.y, r0.w, r0.y
-mad.f32 r0.x, c7.x, r0.w, r0.x
-mul.f r0.z, c0.w, r1.x
-mul.f r0.w, c0.z, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.z, c1.w, r1.y, r0.z
-mad.f32 r0.w, c1.z, r1.y, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-(rpt1)nop
-mov.f32f32 r2.y, r0.y
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r1.z, r0.z
-mad.f32 r0.y, c2.z, r1.z, r0.w
-mad.f32 r0.x, c3.w, r1.w, r0.x
-mad.f32 r0.y, c3.z, r1.w, r0.y
-mul.f r2.z, c0.y, r1.x
-mul.f r1.x, c0.x, r1.x
-mov.f32f32 r0.w, r0.x
-mov.f32f32 r0.z, r0.y
-mad.f32 r0.x, c1.y, r1.y, r2.z
-mad.f32 r0.y, c1.x, r1.y, r1.x
-mad.f32 r0.x, c2.y, r1.z, r0.x
-mad.f32 r0.y, c2.x, r1.z, r0.y
-mad.f32 r0.x, c3.y, r1.w, r0.x
-mad.f32 r1.x, c3.x, r1.w, r0.y
-(rpt1)nop
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.x
-mov.f32f32 r1.x, c8.x
-mov.f32f32 r1.y, c8.x
-(rpt1)nop
-mov.f32f32 r2.w, r1.x
-mov.f32f32 r2.z, r1.y
-mov.f32f32 r1.w, r3.w
-mov.f32f32 r1.z, r3.z
-mov.f32f32 r1.y, r3.y
-mov.f32f32 r1.x, r3.x
+mad.f32 r2.y, c7.y, r0.w, r0.y
+mad.f32 r2.x, c7.x, r0.w, r0.x
+mul.f r0.x, c0.w, r2.z
+mul.f r0.y, c0.z, r2.z
+mad.f32 r0.x, c1.w, r2.w, r0.x
+mad.f32 r0.y, c1.z, r2.w, r0.y
+mad.f32 r0.x, c2.w, r3.x, r0.x
+mad.f32 r0.y, c2.z, r3.x, r0.y
+mad.f32 r0.w, c3.w, r3.y, r0.x
+mad.f32 r0.z, c3.z, r3.y, r0.y
+mul.f r0.x, c0.y, r2.z
+mul.f r0.y, c0.x, r2.z
+mad.f32 r0.x, c1.y, r2.w, r0.x
+mad.f32 r0.y, c1.x, r2.w, r0.y
+mad.f32 r0.x, c2.y, r3.x, r0.x
+mad.f32 r2.z, c2.x, r3.x, r0.y
+mad.f32 r0.y, c3.y, r3.y, r0.x
+mad.f32 r0.x, c3.x, r3.y, r2.z
+mov.f32f32 r2.w, c8.x
+mov.f32f32 r2.z, c8.x
end
nop
-nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21)
-; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0)
-; VERT: 49 instructions, 0 half, 4 full
+; VERT: inputs: r2.z (0:0,cm=f,il=8,b=0) r1.x (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=f,il=16,b=0)
+; VERT: 27 instructions, 0 half, 4 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-38.asm b/reference/xonotic-gl2/xonotic-glx-gl2-38.asm
index 5865f1f..6eea1c6 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-38.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-38.asm
@@ -6,159 +6,109 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c9.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000
+@const(c10.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
bary.f r1.x, 8, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.y, 4, r0.x
+bary.f r0.w, 1, r0.x
mov.f32f32 r1.z, r0.z
-mul.f r1.w, r0.w, r0.w
-bary.f r2.x, 5, r0.x
-mul.f r2.y, r1.x, r1.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mad.f32 r1.w, r2.x, r2.x, r1.w
-bary.f r3.x, 9, r0.x
+mul.f r2.x, r1.x, r1.x
+bary.f r2.y, 9, r0.x
+mov.f32f32 r1.w, r0.w
+mul.f r2.z, r1.y, r1.y
+bary.f r2.w, 5, r0.x
+mad.f32 r2.x, r2.y, r2.y, r2.x
+bary.f r3.x, 10, r0.x
mov.f32f32 r3.y, r0.z
-mov.f32f32 r2.w, r1.z
-mov.f32f32 r1.z, r1.w
-bary.f r1.w, 6, r0.x
-mad.f32 r2.y, r3.x, r3.x, r2.y
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r3.z, r1.y
+mov.f32f32 r3.z, r0.w
+sam (f32)(xyz)r3.w, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, r3.w, c9.x
+mad.f32 r1.w, r3.x, r3.x, r2.x
+mad.f32 r2.x, r2.w, r2.w, r2.z
+bary.f (ei)r0.x, 6, r0.x
+mov.f32f32 r0.y, r1.z
+add.f r2.z, r4.x, c9.x
+add.f r3.w, r4.y, c9.x
+sam (f32)(xyzw)r4.x, r3.y, s#1, t#1
+mad.f32 r2.x, r0.x, r0.x, r2.x
+mul.f r1.z, r1.z, r0.y
+(ss)mov.f32f32 r3.y, r2.z
+rsq r1.w, r1.w
+(ss)mov.f32f32 r3.z, r1.w
+mul.f r3.x, r3.x, r1.w
+(ss)mov.f32f32 r1.w, r3.w
+mad.f32 r1.z, r2.z, r3.y, r1.z
+mul.f r1.x, r1.x, r3.z
+mul.f r2.y, r2.y, r3.z
mad.f32 r1.z, r1.w, r1.w, r1.z
-sam (f32)(xyz)r3.w, r2.z, s#0, t#0
-(sy)(ss)add.f r2.z, r3.w, c9.x
-mov.f32f32 r2.y, r2.y
-bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r0.y, r2.z
-mov.f32f32 r0.z, r0.z
+rsq r1.w, r2.x
+(ss)mov.f32f32 r2.x, r1.w
+mov.f32f32 r2.z, r3.x
+mul.f r0.x, r0.x, r1.w
+mov.f32f32 r3.z, r1.x
+mov.f32f32 r5.x, r2.y
+mul.f r1.y, r1.y, r2.x
rsq r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mad.f32 r2.y, r0.x, r0.x, r2.y
-mul.f r2.z, r0.y, r0.y
-add.f r2.w, r4.x, c9.x
-mul.f r0.w, r0.w, r1.z
-mul.f r2.x, r2.x, r1.z
-mul.f r1.z, r1.w, r1.z
-mov.f32f32 r1.w, r2.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r2.z, r1.w, r1.w, r2.z
-rsq r2.y, r2.y
-(ss)mov.f32f32 r2.y, r2.y
-sam (f32)(xyzw)r3.y, r3.y, s#1, t#1
-(sy)mul.f r2.w, r4.x, c7.x
-mul.f r4.x, r3.w, c3.z
-mov.f32f32 r2.z, r2.z
-add.f r4.y, r4.y, c9.x
-mul.f r1.x, r1.x, r2.y
-mul.f r3.x, r3.x, r2.y
-mul.f r0.x, r0.x, r2.y
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r2.z, r2.y, r2.y, r2.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r4.y, r0.z
-mul.f r0.z, r3.w, c4.z
-mov.f32f32 r2.w, r2.w
-nop
-rsq r2.z, r2.z
-(ss)mov.f32f32 r2.z, r2.z
-mul.f r3.w, r3.z, c3.y
-mul.f r4.z, r3.y, c3.x
-mov.f32f32 r1.y, r1.y
-mul.f r0.y, r0.y, r2.z
-mul.f r1.w, r1.w, r2.z
-mul.f r2.y, r2.y, r2.z
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r1.w
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r1.w, r2.w
-mul.f r2.w, r0.y, r1.x
-mul.f r4.w, r0.y, r1.x
-mad.f32 r2.w, r2.z, r3.x, r2.w
-mad.f32 r4.w, r2.z, r3.x, r4.w
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r5.x, r4.z
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r4.z, r4.w
-mad.f32 r2.w, r2.y, r0.x, r2.w
-mad.f32 r4.w, r2.y, r0.x, r4.z
-mov.f32f32 r4.z, r1.y
-mul.f r1.y, r3.z, c4.y
-mul.f r0.y, r2.w, r0.y
-mul.f r2.z, r2.w, r2.z
-mul.f r2.y, r2.w, r2.y
-mov.f32f32 r2.w, r4.w
+(ss)mov.f32f32 r5.y, r1.z
+(ss)mul.f r1.z, r3.w, r1.z
+mul.f r2.x, r2.w, r2.x
+(sy)mul.f r1.w, r4.w, c7.x
+mul.f r0.y, r0.y, r5.y
+mul.f r2.w, r3.y, r5.y
+mov.f32f32 r3.y, r1.z
+mul.f r3.w, r4.z, c3.z
+mul.f r1.x, r0.y, r1.x
mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.y, r2.y
-max.f r2.w, r2.w, c9.z
+mad.f32 r1.x, r2.w, r2.y, r1.x
+mov.f32f32 r2.y, r2.w
+mad.f32 r1.x, r1.z, r2.z, r1.x
+mul.f r1.z, r0.y, r3.z
+mul.f r2.w, r4.z, c4.z
+mad.f32 r1.z, r2.y, r5.x, r1.z
+mul.f r0.y, r1.x, r0.y
+mad.f32 r1.z, r3.y, r2.z, r1.z
+mul.f r2.y, r1.x, r2.y
+mul.f r1.x, r1.x, r3.y
mul.f r0.y, c9.y, r0.y
-mul.f r2.z, c9.y, r2.z
+max.f r1.z, r1.z, c9.z
mul.f r2.y, c9.y, r2.y
-mov.f32f32 r2.w, r2.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r2.y, r2.y
-mul.f r0.z, r0.z, r2.w
-add.f r0.y, r1.x, (neg)r0.y
-add.f r1.x, r3.x, (neg)r2.z
-add.f r0.x, r0.x, (neg)r2.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.x, r0.x
-mul.f r1.y, r1.y, r2.w
-mul.f r0.y, r0.y, r0.w
-mul.f r0.w, r3.y, c4.x
-mad.f32 r0.y, r1.x, r2.x, r0.y
-mov.f32f32 r1.x, r1.y
-sam (f32)(xyzw)r5.y, r4.y, s#2, t#2
-(sy)mad.f32 r1.y, c6.x, r6.x, c9.w
-mul.f r2.x, r5.w, c5.z
-mov.f32f32 r0.y, r0.y
-mul.f r0.w, r0.w, r2.w
-mad.f32 r0.x, r0.x, r1.z, r0.y
-mov.f32f32 r0.y, r1.y
-mul.f r1.y, r5.z, c5.y
-mul.f r1.z, r5.y, c5.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r0.w
+mul.f r2.z, r4.x, c4.x
+add.f r0.y, r3.z, (neg)r0.y
+mov.f32f32 r3.y, r1.z
+mul.f r3.z, r4.y, c4.y
+mul.f r1.z, r2.z, r1.z
+mul.f r0.y, r0.y, r1.y
+add.f r1.y, r5.x, (neg)r2.y
+mul.f r2.y, r2.w, r3.y
+mul.f r2.z, r3.z, r3.y
+mul.f r1.x, c9.y, r1.x
+mad.f32 r0.y, r1.y, r2.x, r0.y
+mul.f r1.y, r4.y, c3.y
+mul.f r2.x, r4.x, c3.x
+add.f r1.x, r3.x, (neg)r1.x
+sam (f32)(xyzw)r2.w, r0.z, s#2, t#2
+(sy)(ss)mad.f32 r0.z, c6.x, r3.z, c9.w
+mul.f r0.w, r3.y, c5.z
+mul.f r3.x, r3.x, c5.y
+mad.f32 r0.x, r1.x, r0.x, r0.y
(rpt1)nop
+mul.f r0.y, r2.w, c5.x
max.f r0.x, (neg)r0.x, c9.z
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
(rpt5)nop
log2 r0.x, r0.x
-(ss)mul.f r0.x, r0.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
+(ss)mul.f r0.x, r0.z, r0.x
(rpt5)nop
exp2 r0.x, r0.x
-(ss)mad.f32 r0.y, r2.x, r0.x, r0.z
-mad.f32 r0.z, r1.y, r0.x, r1.x
-(ss)mad.f32 r0.x, r1.z, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mad.f32 r0.y, c8.z, r0.y, r4.x
-mad.f32 r0.z, c8.y, r0.z, r3.w
-mad.f32 r0.x, c8.x, r0.x, r5.x
+(ss)mad.f32 r0.z, r0.w, r0.x, r2.y
+mad.f32 r0.w, r3.x, r0.x, r2.z
+(ss)mad.f32 r0.x, r0.y, r0.x, r1.z
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.z, c8.z, r0.z, r3.w
+mad.f32 r1.y, c8.y, r0.w, r1.y
+mad.f32 r1.x, c8.x, r0.x, r2.x
end
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1)
-; FRAG: 167 instructions, 0 half, 7 full
+; FRAG: 111 instructions, 0 half, 6 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-39.asm b/reference/xonotic-gl2/xonotic-glx-gl2-39.asm
index 9d9cf0a..a1e4514 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-39.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-39.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r3.w) in0
-@in(r4.x) in1
-@in(r4.y) in2
-@in(r4.z) in3
-@in(r4.w) in4
-@in(r5.x) in5
-@in(r5.y) in6
-@in(r5.z) in7
-@in(r0.x) in8
-@in(r0.y) in9
-@in(r0.z) in10
-@in(r0.w) in12
-@in(r1.x) in13
-@in(r1.y) in14
-@in(r1.z) in16
-@in(r1.w) in17
-@in(r2.x) in18
+@in(r2.w) in0
+@in(r3.x) in1
+@in(r3.y) in2
+@in(r3.z) in3
+@in(r3.w) in4
+@in(r4.x) in5
+@in(r4.y) in6
+@in(r4.z) in7
+@in(r4.w) in8
+@in(r5.x) in9
+@in(r5.y) in10
+@in(r5.z) in12
+@in(r5.w) in13
+@in(r6.x) in14
+@in(r6.y) in16
+@in(r6.z) in17
+@in(r6.w) in18
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -33,99 +33,65 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r2.y, c5.x, r1.z
-mul.f r2.z, c5.x, r0.w
-mad.f32 r2.y, c5.y, r1.w, r2.y
-mad.f32 r2.z, c5.y, r1.x, r2.z
-add.f r2.w, c4.x, (neg)r3.w
-mul.f r3.x, c5.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r2.y, c5.z, r2.x, r2.y
-mad.f32 r2.z, c5.z, r1.y, r2.z
-mul.f r1.z, r2.w, r1.z
-mad.f32 r3.x, c5.y, r0.y, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-add.f r5.w, c4.y, (neg)r4.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r1.z, r5.w, r1.w, r1.z
-mad.f32 r1.w, c5.z, r0.z, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.z, r2.y
-mov.f32f32 r3.y, r2.z
-add.f r6.x, c4.z, (neg)r4.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r2.w, r0.w
-mul.f r2.y, c6.y, r4.w
-mad.f32 r1.z, r6.x, r2.x, r1.z
-mov.f32f32 r3.x, r1.w
-mad.f32 r0.w, r5.w, r1.x, r0.w
-mad.f32 r1.x, c7.y, r5.x, r2.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.x, c8.y, r5.y, r1.x
-mul.f r1.w, c6.x, r4.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, r6.x, r1.y, r0.w
-mad.f32 r1.x, c9.y, r5.z, r1.x
-mad.f32 r1.y, c7.x, r5.x, r1.w
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mad.f32 r1.y, c8.x, r5.y, r1.y
-mul.f r0.x, r2.w, r0.x
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r1.x, c9.x, r5.z, r1.y
-mad.f32 r0.x, r5.w, r0.y, r0.x
-mul.f r0.y, c0.w, r3.w
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r4.x, r0.y
-mul.f r1.z, c0.z, r3.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, r6.x, r0.z, r0.x
-(rpt1)nop
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r4.y, r0.y
-mad.f32 r0.y, c1.z, r4.x, r1.z
-mad.f32 r0.x, c3.w, r4.z, r0.x
-mad.f32 r0.y, c2.z, r4.y, r0.y
-mul.f r0.z, c0.y, r3.w
-mul.f r1.z, c0.x, r3.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r4.z, r0.y
-mad.f32 r0.y, c1.y, r4.x, r0.z
-mad.f32 r1.z, c1.x, r4.x, r1.z
-mul.f r1.w, c6.w, r4.w
-mov.f32f32 r0.z, r0.x
-mad.f32 r0.x, c2.y, r4.y, r0.y
-mad.f32 r0.y, c2.x, r4.y, r1.z
-mad.f32 r0.x, c3.y, r4.z, r0.x
-mad.f32 r1.z, c3.x, r4.z, r0.y
-mad.f32 r1.w, c7.w, r5.x, r1.w
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
+mul.f r0.y, c6.y, r3.w
+mul.f r0.z, c6.x, r3.w
+mul.f r0.w, c0.w, r2.w
+mul.f r1.x, r0.x, r6.y
+add.f r1.y, c4.y, (neg)r3.x
+mul.f r1.z, r0.x, r5.z
+mul.f r0.x, r0.x, r4.w
+mad.f32 r0.y, c7.y, r4.x, r0.y
+mad.f32 r1.x, r1.y, r6.z, r1.x
+add.f r1.w, c4.z, (neg)r3.y
+mad.f32 r1.z, r1.y, r5.w, r1.z
+mad.f32 r0.x, r1.y, r5.x, r0.x
+nop
+mad.f32 r2.z, r1.w, r6.w, r1.x
+mad.f32 r2.y, r1.w, r6.x, r1.z
+mad.f32 r2.x, r1.w, r5.y, r0.x
+mad.f32 r0.x, c8.y, r4.y, r0.y
+mad.f32 r0.y, c7.x, r4.x, r0.z
+mad.f32 r1.y, c9.y, r4.z, r0.x
+mad.f32 r0.x, c8.x, r4.y, r0.y
+mad.f32 r0.y, c1.w, r3.x, r0.w
+mad.f32 r1.x, c9.x, r4.z, r0.x
+mad.f32 r0.x, c2.w, r3.y, r0.y
+mul.f r0.y, c0.z, r2.w
+mad.f32 r0.w, c3.w, r3.z, r0.x
+mad.f32 r0.x, c1.z, r3.x, r0.y
+mul.f r0.y, c0.y, r2.w
+mad.f32 r0.x, c2.z, r3.y, r0.x
+mad.f32 r0.y, c1.y, r3.x, r0.y
+mad.f32 r0.z, c3.z, r3.z, r0.x
+mad.f32 r0.x, c2.y, r3.y, r0.y
+mul.f r1.z, c0.x, r2.w
+mad.f32 r0.y, c3.y, r3.z, r0.x
+mad.f32 r0.x, c1.x, r3.x, r1.z
+mul.f r1.z, c6.w, r3.w
+mad.f32 r0.x, c2.x, r3.y, r0.x
+mad.f32 r1.z, c7.w, r4.x, r1.z
+mad.f32 r0.x, c3.x, r3.z, r0.x
+mad.f32 r3.w, c8.w, r4.y, r1.z
+mul.f r1.z, c5.x, r6.y
+mul.f r1.w, c5.x, r5.z
+mad.f32 r1.z, c5.y, r6.z, r1.z
+mad.f32 r1.w, c5.y, r5.w, r1.w
+mad.f32 r3.z, c5.z, r6.w, r1.z
+mad.f32 r3.y, c5.z, r6.x, r1.w
+mul.f r1.z, c5.x, r4.w
mov.f32f32 r2.w, c10.x
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
-mad.f32 r1.z, c8.w, r5.y, r1.w
-mov.f32f32 r2.w, r2.w
+mad.f32 r1.z, c5.y, r5.x, r1.z
mov.f32f32 r1.w, c10.x
-mov.f32f32 r4.x, c10.x
-mov.f32f32 r3.w, r1.z
-nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r4.x
+mad.f32 r3.x, c5.z, r5.y, r1.z
+mov.f32f32 r1.z, c10.x
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22)
-; VERT: inputs: r3.w (0:0,cm=f,il=8,b=0) r4.w (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r0.w (0:0,cm=7,il=20,b=0) r1.z (0:0,cm=7,il=24,b=0)
-; VERT: 92 instructions, 0 half, 7 full
+; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r4.w (0:0,cm=7,il=16,b=0) r5.z (0:0,cm=7,il=20,b=0) r6.y (0:0,cm=7,il=24,b=0)
+; VERT: 53 instructions, 0 half, 7 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-40.asm b/reference/xonotic-gl2/xonotic-glx-gl2-40.asm
index 9b18924..ac67816 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-40.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-40.asm
@@ -6,195 +6,133 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c15.x) 0xbf000000, 0x40000000, 0x00000000, 0x3f800000
+@const(c16.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
bary.f r1.x, 8, r0.x
-bary.f r1.y, 1, r0.x
+bary.f r1.y, 4, r0.x
+bary.f r0.w, 1, r0.x
mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.w, r0.z
-mul.f r2.x, r0.w, r0.w
-bary.f r2.y, 5, r0.x
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r1.z, r1.y
-mov.f32f32 r3.x, r1.w
-mov.f32f32 r1.w, r1.y
+mul.f r2.x, r1.x, r1.x
+bary.f r2.y, 9, r0.x
+mov.f32f32 r1.w, r0.w
+mul.f r2.z, r1.y, r1.y
+bary.f r2.w, 5, r0.x
mad.f32 r2.x, r2.y, r2.y, r2.x
-mov.f32f32 r2.w, r1.z
-mul.f r1.z, r1.x, r1.x
-bary.f r3.z, 9, r0.x
-mov.f32f32 r3.y, r1.w
-mov.f32f32 r1.w, r2.x
-bary.f r2.x, 6, r0.x
-mad.f32 r1.z, r3.z, r3.z, r1.z
-sam (f32)(xyz)r3.w, r2.z, s#0, t#0
-(sy)(ss)add.f r2.z, r3.w, c15.x
-mov.f32f32 r2.w, r0.z
-mad.f32 r1.w, r2.x, r2.x, r1.w
-sam (f32)(xyz)r4.z, r3.x, s#4, t#4
-(sy)(ss)mul.f r3.x, r5.x, c6.z
-mov.f32f32 r2.z, r2.z
-mul.f r3.y, r4.w, c6.y
-mul.f r3.w, r4.z, c6.x
-mov.f32f32 r1.z, r1.z
-mul.f r4.z, r2.z, r2.z
-add.f r4.x, r4.x, c15.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.w, r0.z
-mov.f32f32 r3.y, r3.y
-mov.f32f32 r4.x, r4.x
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r4.w, r4.w
+bary.f r3.x, 10, r0.x
+mov.f32f32 r3.y, r0.z
+mov.f32f32 r3.z, r0.w
+sam (f32)(xyz)r3.w, r1.z, s#0, t#0
+(sy)(ss)add.f r1.z, r3.w, c15.x
+mad.f32 r1.w, r3.x, r3.x, r2.x
+mad.f32 r2.x, r2.w, r2.w, r2.z
+bary.f (ei)r0.x, 6, r0.x
+mov.f32f32 r0.y, r1.z
+add.f r2.z, r4.x, c15.x
+add.f r3.w, r4.y, c15.x
+sam (f32)(xyz)r4.x, r3.y, s#4, t#4
+mad.f32 r2.x, r0.x, r0.x, r2.x
+mul.f r1.z, r1.z, r0.y
+(ss)mov.f32f32 r3.y, r2.z
rsq r1.w, r1.w
-mov.f32f32 r5.x, r1.y
-mad.f32 r4.z, r4.x, r4.x, r4.z
-(ss)mov.f32f32 r1.w, r1.w
-bary.f (ei)r0.x, 10, r0.x
-mov.f32f32 r5.x, r5.x
-mov.f32f32 r0.y, r4.z
-add.f r4.y, r4.y, c15.x
-mul.f r0.w, r0.w, r1.w
-mul.f r2.y, r2.y, r1.w
-mul.f r1.w, r2.x, r1.w
-mov.f32f32 r2.x, r4.y
-sam (f32)(xyz)r4.y, r4.w, s#5, t#5
-(sy)mad.f32 r3.x, c7.z, r4.w, r3.x
-mad.f32 r3.y, c7.y, r4.z, r3.y
-mad.f32 r3.w, c7.x, r4.y, r3.w
-mad.f32 r0.y, r2.x, r2.x, r0.y
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r4.y, r2.w
-mov.f32f32 r2.w, r3.y
-mov.f32f32 r3.y, r1.y
-mov.f32f32 r3.w, r3.w
-mov.f32f32 r0.w, r0.w
-rsq r0.y, r0.y
-(ss)mov.f32f32 r0.y, r0.y
-mov.f32f32 r4.z, r3.y
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r3.y, r1.w
-mul.f r1.w, r2.z, r0.y
-mul.f r2.z, r4.x, r0.y
-mul.f r0.y, r2.x, r0.y
-nop
-mov.f32f32 r2.x, r1.w
-mad.f32 r1.z, r0.x, r0.x, r1.z
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r0.y, r0.y
-sam (f32)(xyzw)r4.x, r4.y, s#1, t#1
-(sy)add.f r1.w, r4.y, r2.w
-add.f r2.w, r4.z, r3.x
-add.f r3.x, r4.x, r3.w
-mul.f r3.w, r4.w, c13.x
+(ss)mov.f32f32 r3.z, r1.w
+(ss)mul.f r1.w, r3.x, r1.w
+(sy)mul.f r3.x, r4.z, c6.z
+mad.f32 r1.z, r2.z, r3.y, r1.z
+mov.f32f32 r2.z, r3.w
+mul.f r1.x, r1.x, r3.z
+mul.f r2.y, r2.y, r3.z
+rsq r2.x, r2.x
+(ss)mov.f32f32 r3.z, r2.x
+mad.f32 r1.z, r2.z, r2.z, r1.z
+mov.f32f32 r2.z, r1.x
+mov.f32f32 r4.z, r2.y
+mul.f r1.y, r1.y, r3.z
+mul.f r2.w, r2.w, r3.z
+mov.f32f32 r3.z, r1.w
+mov.f32f32 r4.w, r0.z
rsq r1.z, r1.z
-(ss)mov.f32f32 r1.z, r1.z
-mul.f r4.x, r2.w, c8.z
-mul.f r2.w, r2.w, c9.z
-mul.f r4.y, r1.w, c8.y
-mul.f r1.x, r1.x, r1.z
-mul.f r4.z, r1.w, c9.y
-mul.f r4.w, r3.x, c8.x
-mul.f r3.x, r3.x, c9.x
-mov.f32f32 r1.x, r1.x
-mul.f r1.w, r3.z, r1.z
-mul.f r0.x, r0.x, r1.z
-mov.f32f32 r1.z, r3.w
-mul.f r3.z, r2.x, r1.x
-mov.f32f32 r3.w, r1.w
-mul.f r1.w, r2.x, r1.x
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.z, r1.z
-mad.f32 r3.z, r2.z, r3.w, r3.z
-mad.f32 r1.w, r2.z, r3.w, r1.w
-mov.f32f32 r5.x, r0.z
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r3.z, r3.z
-mov.f32f32 r1.w, r1.w
-mad.f32 r3.z, r0.y, r0.x, r3.z
-mad.f32 r5.y, r0.y, r0.x, r1.w
-mov.f32f32 r1.w, r1.z
-mov.f32f32 r5.z, r5.x
-mul.f r1.z, r3.z, r2.x
-mul.f r2.x, r3.z, r2.z
-mul.f r0.y, r3.z, r0.y
-mov.f32f32 r2.z, r5.y
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
+(ss)mov.f32f32 r5.y, r1.z
+(ss)mul.f r1.z, r3.w, r1.z
+mov.f32f32 r5.x, r0.w
+mul.f r3.w, r4.y, c6.y
+mul.f r0.y, r0.y, r5.y
+mul.f r3.y, r3.y, r5.y
+mov.f32f32 r4.y, r1.z
+mul.f r4.x, r4.x, c6.x
+mul.f r1.x, r0.y, r1.x
mov.f32f32 r0.y, r0.y
-max.f r2.z, r2.z, c15.z
-mul.f r1.z, c15.y, r1.z
-mul.f r2.x, c15.y, r2.x
+mad.f32 r1.x, r3.y, r2.y, r1.x
+mov.f32f32 r2.y, r3.y
+mad.f32 r1.x, r1.z, r3.z, r1.x
+mul.f r1.z, r0.y, r2.z
+sam (f32)(xyz)r4.w, r4.w, s#5, t#5
+(sy)mad.f32 r3.x, c7.z, r5.y, r3.x
+mov.f32f32 r5.y, r0.z
+mul.f r0.y, r1.x, r0.y
+mad.f32 r1.z, r2.y, r4.z, r1.z
+mul.f r2.y, r1.x, r2.y
+mul.f r1.x, r1.x, r4.y
mul.f r0.y, c15.y, r0.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r2.x, r2.x
-mov.f32f32 r0.y, r0.y
-mul.f r2.w, r2.w, r2.z
-add.f r1.x, r1.x, (neg)r1.z
-add.f r1.z, r3.w, (neg)r2.x
-add.f r0.x, r0.x, (neg)r0.y
-mov.f32f32 r0.y, r2.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r0.x, r0.x
-mul.f r2.x, r4.z, r2.z
-mul.f r0.w, r1.x, r0.w
-mul.f r1.x, r3.x, r2.z
-mad.f32 r0.w, r1.z, r2.y, r0.w
-mov.f32f32 r1.z, r2.x
-mov.f32f32 r2.x, r1.y
-mov.f32f32 r2.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r0.x, r0.x, r3.y, r0.z
-mov.f32f32 r5.w, r2.x
-mov.f32f32 r0.z, r1.y
-nop
-mov.f32f32 r0.x, r0.x
+mad.f32 r1.z, r4.y, r3.z, r1.z
+mul.f r2.y, c15.y, r2.y
+mul.f r1.x, c15.y, r1.x
+add.f r0.y, r2.z, (neg)r0.y
+max.f r1.z, r1.z, c15.z
+add.f r2.y, r4.z, (neg)r2.y
+mad.f32 r2.z, c7.x, r4.w, r4.x
+mul.f r0.y, r0.y, r1.y
+mov.f32f32 r1.y, r1.z
+mad.f32 r0.y, r2.y, r2.w, r0.y
+add.f r1.x, r1.w, (neg)r1.x
+mul.f r0.x, r0.x, r2.x
+mov.f32f32 r5.z, r0.w
+mad.f32 r1.w, c7.y, r5.x, r3.w
+mov.f32f32 r2.x, r0.z
+mad.f32 r0.x, r1.x, r0.x, r0.y
+mov.f32f32 r2.y, r0.w
+sam (f32)(xyz)r0.y, r0.z, s#3, t#3
(rpt1)nop
-mov.f32f32 r2.z, r0.z
max.f r0.x, (neg)r0.x, c15.z
-sam (f32)(xyzw)r2.w, r5.z, s#2, t#2
-(sy)mul.f r0.z, r3.y, c10.z
-mul.f r1.x, r3.x, c10.y
-mad.f32 r1.y, c11.x, r3.z, c15.w
-mov.f32f32 r0.x, r0.x
-(rpt3)nop
-mov.f32f32 r1.y, r1.y
-mul.f r2.x, r2.w, c10.x
+sam (f32)(xyzw)r3.y, r5.y, s#1, t#1
+(sy)add.f r1.x, r3.w, r3.x
+add.f r2.w, r3.z, r1.w
+add.f r2.z, r3.y, r2.z
+sam (f32)(xyzw)r3.x, r2.x, s#2, t#2
+mul.f r1.w, r4.x, c13.x
+(ss)mul.f r2.x, r1.x, c9.z
+mul.f r2.y, r2.w, c9.y
log2 r0.x, r0.x
-sam (f32)(xyz)r2.y, r2.y, s#3, t#3
-(rpt1)nop
-(ss)mul.f r0.x, r1.y, r0.x
-(rpt2)nop
-mov.f32f32 r0.x, r0.x
-(rpt5)nop
+(sy)mad.f32 r3.w, c11.x, r3.w, c15.w
+mul.f r4.x, r2.z, c9.x
+mul.f r1.x, r1.x, c8.z
+mul.f r2.w, r2.w, c8.y
+(ss)mul.f r0.x, r3.w, r0.x
+mul.f r2.x, r2.x, r1.y
+mul.f r1.y, r2.y, r1.y
+mul.f r1.z, r4.x, r1.z
+mul.f r2.y, r2.z, c8.x
+mul.f r2.z, r3.z, c10.z
+mul.f r3.y, r3.y, c10.y
exp2 r0.x, r0.x
-(ss)mad.f32 r0.y, r0.z, r0.x, r0.y
-mad.f32 r0.z, r1.x, r0.x, r1.z
-(ss)mad.f32 r0.x, r2.x, r0.x, r0.w
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
+mul.f r3.x, r3.x, c10.x
nop
-mad.f32 r0.y, c14.z, r0.y, r4.x
-mad.f32 r0.z, c14.y, r0.z, r4.y
-(sy)mad.f32 r0.y, c12.z, r2.w, r0.y
-mad.f32 r0.z, c12.y, r2.z, r0.z
-mad.f32 r0.x, c14.x, r0.x, r4.w
+(ss)mad.f32 r2.x, r2.z, r0.x, r2.x
+mad.f32 r1.y, r3.y, r0.x, r1.y
+(ss)mad.f32 r0.x, r3.x, r0.x, r1.z
nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mad.f32 r0.x, c12.x, r2.y, r0.x
+mad.f32 r1.x, c14.z, r2.x, r1.x
+mad.f32 r1.y, c14.y, r1.y, r2.w
+mad.f32 r1.z, c12.z, r0.w, r1.x
+mad.f32 r1.y, c12.y, r0.z, r1.y
+mad.f32 r0.x, c14.x, r0.x, r2.y
nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r0.x, r0.x
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+mad.f32 r1.x, c12.x, r0.y, r0.x
end
nop
+nop
+nop
; FRAG: outputs: r1.x (1:0)
; FRAG: inputs: r0.x (5:20,cm=f,il=8,b=1) r1.x (5:21,cm=f,il=12,b=1) r2.x (5:22,cm=f,il=16,b=1)
-; FRAG: 201 instructions, 0 half, 6 full
+; FRAG: 122 instructions, 0 half, 6 full
diff --git a/reference/xonotic-gl2/xonotic-glx-gl2-41.asm b/reference/xonotic-gl2/xonotic-glx-gl2-41.asm
index 9d9cf0a..a1e4514 100644
--- a/reference/xonotic-gl2/xonotic-glx-gl2-41.asm
+++ b/reference/xonotic-gl2/xonotic-glx-gl2-41.asm
@@ -1,22 +1,22 @@
; options:
; VERT: new compiler
-@in(r3.w) in0
-@in(r4.x) in1
-@in(r4.y) in2
-@in(r4.z) in3
-@in(r4.w) in4
-@in(r5.x) in5
-@in(r5.y) in6
-@in(r5.z) in7
-@in(r0.x) in8
-@in(r0.y) in9
-@in(r0.z) in10
-@in(r0.w) in12
-@in(r1.x) in13
-@in(r1.y) in14
-@in(r1.z) in16
-@in(r1.w) in17
-@in(r2.x) in18
+@in(r2.w) in0
+@in(r3.x) in1
+@in(r3.y) in2
+@in(r3.z) in3
+@in(r3.w) in4
+@in(r4.x) in5
+@in(r4.y) in6
+@in(r4.z) in7
+@in(r4.w) in8
+@in(r5.x) in9
+@in(r5.y) in10
+@in(r5.z) in12
+@in(r5.w) in13
+@in(r6.x) in14
+@in(r6.y) in16
+@in(r6.z) in17
+@in(r6.w) in18
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -33,99 +33,65 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mul.f r2.y, c5.x, r1.z
-mul.f r2.z, c5.x, r0.w
-mad.f32 r2.y, c5.y, r1.w, r2.y
-mad.f32 r2.z, c5.y, r1.x, r2.z
-add.f r2.w, c4.x, (neg)r3.w
-mul.f r3.x, c5.x, r0.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r2.y, c5.z, r2.x, r2.y
-mad.f32 r2.z, c5.z, r1.y, r2.z
-mul.f r1.z, r2.w, r1.z
-mad.f32 r3.x, c5.y, r0.y, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-add.f r5.w, c4.y, (neg)r4.x
-mov.f32f32 r3.x, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mad.f32 r1.z, r5.w, r1.w, r1.z
-mad.f32 r1.w, c5.z, r0.z, r3.x
-mov.f32f32 r2.y, r2.y
-mov.f32f32 r2.z, r2.z
-mov.f32f32 r1.z, r1.z
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r3.z, r2.y
-mov.f32f32 r3.y, r2.z
-add.f r6.x, c4.z, (neg)r4.y
-mov.f32f32 r1.w, r1.w
-mul.f r0.w, r2.w, r0.w
-mul.f r2.y, c6.y, r4.w
-mad.f32 r1.z, r6.x, r2.x, r1.z
-mov.f32f32 r3.x, r1.w
-mad.f32 r0.w, r5.w, r1.x, r0.w
-mad.f32 r1.x, c7.y, r5.x, r2.y
-mov.f32f32 r1.z, r1.z
-mad.f32 r1.x, c8.y, r5.y, r1.x
-mul.f r1.w, c6.x, r4.w
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.z, r1.z
-mad.f32 r0.w, r6.x, r1.y, r0.w
-mad.f32 r1.x, c9.y, r5.z, r1.x
-mad.f32 r1.y, c7.x, r5.x, r1.w
-mov.f32f32 r2.z, r1.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mad.f32 r1.y, c8.x, r5.y, r1.y
-mul.f r0.x, r2.w, r0.x
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r0.w, r1.x
-mad.f32 r1.x, c9.x, r5.z, r1.y
-mad.f32 r0.x, r5.w, r0.y, r0.x
-mul.f r0.y, c0.w, r3.w
-mov.f32f32 r1.y, r0.w
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.x, r0.x
-mad.f32 r0.y, c1.w, r4.x, r0.y
-mul.f r1.z, c0.z, r3.w
-mov.f32f32 r0.w, r0.w
-mad.f32 r0.x, r6.x, r0.z, r0.x
-(rpt1)nop
-mov.f32f32 r1.x, r0.w
-mov.f32f32 r2.x, r0.x
-mad.f32 r0.x, c2.w, r4.y, r0.y
-mad.f32 r0.y, c1.z, r4.x, r1.z
-mad.f32 r0.x, c3.w, r4.z, r0.x
-mad.f32 r0.y, c2.z, r4.y, r0.y
-mul.f r0.z, c0.y, r3.w
-mul.f r1.z, c0.x, r3.w
-mov.f32f32 r0.w, r0.x
-mad.f32 r0.x, c3.z, r4.z, r0.y
-mad.f32 r0.y, c1.y, r4.x, r0.z
-mad.f32 r1.z, c1.x, r4.x, r1.z
-mul.f r1.w, c6.w, r4.w
-mov.f32f32 r0.z, r0.x
-mad.f32 r0.x, c2.y, r4.y, r0.y
-mad.f32 r0.y, c2.x, r4.y, r1.z
-mad.f32 r0.x, c3.y, r4.z, r0.x
-mad.f32 r1.z, c3.x, r4.z, r0.y
-mad.f32 r1.w, c7.w, r5.x, r1.w
+@const(c10.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
+@const(c11.x) 0x00000000, 0x3f800000, 0x43000000, 0x00000000
+(sy)(ss)add.f r0.x, c4.x, (neg)r2.w
+mul.f r0.y, c6.y, r3.w
+mul.f r0.z, c6.x, r3.w
+mul.f r0.w, c0.w, r2.w
+mul.f r1.x, r0.x, r6.y
+add.f r1.y, c4.y, (neg)r3.x
+mul.f r1.z, r0.x, r5.z
+mul.f r0.x, r0.x, r4.w
+mad.f32 r0.y, c7.y, r4.x, r0.y
+mad.f32 r1.x, r1.y, r6.z, r1.x
+add.f r1.w, c4.z, (neg)r3.y
+mad.f32 r1.z, r1.y, r5.w, r1.z
+mad.f32 r0.x, r1.y, r5.x, r0.x
+nop
+mad.f32 r2.z, r1.w, r6.w, r1.x
+mad.f32 r2.y, r1.w, r6.x, r1.z
+mad.f32 r2.x, r1.w, r5.y, r0.x
+mad.f32 r0.x, c8.y, r4.y, r0.y
+mad.f32 r0.y, c7.x, r4.x, r0.z
+mad.f32 r1.y, c9.y, r4.z, r0.x
+mad.f32 r0.x, c8.x, r4.y, r0.y
+mad.f32 r0.y, c1.w, r3.x, r0.w
+mad.f32 r1.x, c9.x, r4.z, r0.x
+mad.f32 r0.x, c2.w, r3.y, r0.y
+mul.f r0.y, c0.z, r2.w
+mad.f32 r0.w, c3.w, r3.z, r0.x
+mad.f32 r0.x, c1.z, r3.x, r0.y
+mul.f r0.y, c0.y, r2.w
+mad.f32 r0.x, c2.z, r3.y, r0.x
+mad.f32 r0.y, c1.y, r3.x, r0.y
+mad.f32 r0.z, c3.z, r3.z, r0.x
+mad.f32 r0.x, c2.y, r3.y, r0.y
+mul.f r1.z, c0.x, r2.w
+mad.f32 r0.y, c3.y, r3.z, r0.x
+mad.f32 r0.x, c1.x, r3.x, r1.z
+mul.f r1.z, c6.w, r3.w
+mad.f32 r0.x, c2.x, r3.y, r0.x
+mad.f32 r1.z, c7.w, r4.x, r1.z
+mad.f32 r0.x, c3.x, r3.z, r0.x
+mad.f32 r3.w, c8.w, r4.y, r1.z
+mul.f r1.z, c5.x, r6.y
+mul.f r1.w, c5.x, r5.z
+mad.f32 r1.z, c5.y, r6.z, r1.z
+mad.f32 r1.w, c5.y, r5.w, r1.w
+mad.f32 r3.z, c5.z, r6.w, r1.z
+mad.f32 r3.y, c5.z, r6.x, r1.w
+mul.f r1.z, c5.x, r4.w
mov.f32f32 r2.w, c10.x
-mov.f32f32 r0.y, r0.x
-mov.f32f32 r0.x, r1.z
-mad.f32 r1.z, c8.w, r5.y, r1.w
-mov.f32f32 r2.w, r2.w
+mad.f32 r1.z, c5.y, r5.x, r1.z
mov.f32f32 r1.w, c10.x
-mov.f32f32 r4.x, c10.x
-mov.f32f32 r3.w, r1.z
-nop
-mov.f32f32 r1.w, r1.w
-mov.f32f32 r1.z, r4.x
+mad.f32 r3.x, c5.z, r5.y, r1.z
+mov.f32f32 r1.z, c10.x
end
nop
+nop
+nop
; VERT: outputs: r0.x (0:0) r1.x (5:20) r2.x (5:21) r3.x (5:22)
-; VERT: inputs: r3.w (0:0,cm=f,il=8,b=0) r4.w (0:0,cm=f,il=12,b=0) r0.x (0:0,cm=7,il=16,b=0) r0.w (0:0,cm=7,il=20,b=0) r1.z (0:0,cm=7,il=24,b=0)
-; VERT: 92 instructions, 0 half, 7 full
+; VERT: inputs: r2.w (0:0,cm=f,il=8,b=0) r3.w (0:0,cm=f,il=12,b=0) r4.w (0:0,cm=7,il=16,b=0) r5.z (0:0,cm=7,il=20,b=0) r6.y (0:0,cm=7,il=24,b=0)
+; VERT: 53 instructions, 0 half, 7 full
diff --git a/reference/xonotic/xonotic05.asm b/reference/xonotic/xonotic05.asm
index 2c03e4f..284e180 100644
--- a/reference/xonotic/xonotic05.asm
+++ b/reference/xonotic/xonotic05.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/xonotic/xonotic06.asm b/reference/xonotic/xonotic06.asm
index 694d656..59b8c44 100644
--- a/reference/xonotic/xonotic06.asm
+++ b/reference/xonotic/xonotic06.asm
@@ -6,31 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 3, r0.x
-bary.f (ei)r0.x, 1, r0.x
-nop
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, r0.w
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.w, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+bary.f r0.w, 1, r0.x
+bary.f (ei)r1.x, 3, r0.x
(rpt5)nop
-sam.p (f32)(xyzw)r0.x, r0.w, s#0, t#0
-(sy)(ss)mul.f r0.w, r0.w, c4.w
-mul.f r0.z, r0.z, c4.z
-mul.f r0.y, r0.y, c4.y
-mul.f r0.x, r0.x, c4.x
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+sam.p (f32)(xyzw)r0.x, r0.z, s#0, t#0
+(sy)mul.f r1.w, r0.w, c4.w
+mul.f r1.z, r0.z, c4.z
+mul.f r1.y, r0.y, c4.y
+(ss)mul.f r1.x, r0.x, c4.x
end
nop
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1)
-; FRAG: 27 instructions, 0 half, 2 full
+; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1)
+; FRAG: 15 instructions, 0 half, 2 full
diff --git a/reference/xonotic/xonotic08.asm b/reference/xonotic/xonotic08.asm
index 2c03e4f..284e180 100644
--- a/reference/xonotic/xonotic08.asm
+++ b/reference/xonotic/xonotic08.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/xonotic/xonotic09.asm b/reference/xonotic/xonotic09.asm
index 4a651b1..16ecc3c 100644
--- a/reference/xonotic/xonotic09.asm
+++ b/reference/xonotic/xonotic09.asm
@@ -6,31 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 7, r0.x
-bary.f r1.x, 5, r0.x
+bary.f r0.w, 5, r0.x
+bary.f r1.x, 7, r0.x
bary.f r1.y, 3, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
bary.f r1.z, 2, r0.x
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.x, r1.x
-bary.f r0.z, 1, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt3)nop
-sam.p (f32)(xyzw)r1.w, r1.w, s#0, t#0
-(sy)mul.f r0.y, r2.z, r1.y
-mul.f r0.w, r2.y, r1.z
-mul.f r0.z, r2.x, r0.z
-mul.f r0.x, r1.w, r0.x
-(ss)mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.w, r1.x, r1.y
+mul.f r1.z, r0.w, r1.z
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.x (5:0,cm=f,il=12,b=1)
-; FRAG: 27 instructions, 0 half, 3 full
+; FRAG: inputs: r0.z (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 15 instructions, 0 half, 3 full
diff --git a/reference/xonotic/xonotic10.asm b/reference/xonotic/xonotic10.asm
index cca09e5..0583b5d 100644
--- a/reference/xonotic/xonotic10.asm
+++ b/reference/xonotic/xonotic10.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r3.x) in4
+@in(r3.y) in5
+@in(r3.z) in6
+@in(r3.w) in7
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,43 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r3.w, c4.x
+max.f r1.y, r3.z, c4.x
+max.f r3.y, r3.y, c4.x
+max.f r3.x, r3.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r3.y, c4.y
+min.f r1.x, r3.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 33 instructions, 0 half, 4 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 25 instructions, 0 half, 4 full
diff --git a/reference/xonotic/xonotic11.asm b/reference/xonotic/xonotic11.asm
index 2c03e4f..284e180 100644
--- a/reference/xonotic/xonotic11.asm
+++ b/reference/xonotic/xonotic11.asm
@@ -1,13 +1,13 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r2.x) in4
+@in(r2.y) in5
+@in(r2.z) in6
+@in(r2.w) in7
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -16,39 +16,36 @@
@out(r1.y) out5
@out(r1.z) out6
@out(r1.w) out7
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r2.w, c4.x
+max.f r1.y, r2.z, c4.x
+max.f r2.y, r2.y, c4.x
+max.f r2.x, r2.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r2.y, c4.y
+min.f r1.x, r2.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0)
-; VERT: 29 instructions, 0 half, 3 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r2.x (0:0,cm=f,il=12,b=0)
+; VERT: 25 instructions, 0 half, 3 full
diff --git a/reference/xonotic/xonotic14.asm b/reference/xonotic/xonotic14.asm
index ed21067..1948189 100644
--- a/reference/xonotic/xonotic14.asm
+++ b/reference/xonotic/xonotic14.asm
@@ -6,67 +6,40 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c6.x) 0x40800000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 4, r0.x
+bary.f r0.w, 1, r0.x
bary.f r1.x, 3, r0.x
-bary.f r1.y, 1, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r2.y, r0.w
-bary.f r0.z, 5, r0.x
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r2.x, r1.x
-bary.f (ei)r0.x, 7, r0.x
-mov.f32f32 r0.y, r0.z
-mov.f32f32 r0.z, c5.w
-(rpt2)nop
-sam.p (f32)(xyzw)r0.w, r1.z, s#1, t#1
-(sy)mul.f r1.y, r1.y, c5.z
-mul.f r1.x, r1.x, c5.y
-mul.f r0.w, r0.w, c5.x
-mov.f32f32 r2.z, r0.y
-mov.f32f32 r0.y, r1.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r2.w, r0.x
-mov.f32f32 r0.x, r0.y
-mov.f32f32 r0.y, r1.x
-mov.f32f32 r0.w, r0.w
-(rpt2)nop
-(ss)nop
-sam.p (f32)(xyzw)r1.x, r2.y, s#0, t#0
-(sy)mul.f r0.x, r1.z, r0.x
-mul.f r0.y, r1.y, r0.y
-mul.f r0.w, r1.x, r0.w
-mul.f r0.z, r1.w, r0.z
-mul.f r0.x, r0.x, c6.x
-mul.f r0.y, r0.y, c6.x
+bary.f r1.y, 4, r0.x
+bary.f r1.z, 5, r0.x
+bary.f (ei)r1.w, 7, r0.x
+mov.f32f32 r0.x, c5.w
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#1, t#1
+(sy)(ss)mul.f r0.w, r0.w, c5.z
+mul.f r0.z, r0.z, c5.y
+mul.f r0.y, r0.y, c5.x
+sam.p (f32)(xyzw)r1.x, r1.y, s#0, t#0
+(sy)mul.f r0.x, r1.w, r0.x
+mul.f r0.w, r1.z, r0.w
+mul.f r0.z, r1.y, r0.z
+mul.f r0.y, r1.x, r0.y
+mul.f r0.x, r0.x, c6.y
mul.f r0.w, r0.w, c6.x
-mul.f r0.z, r0.z, c6.y
+mul.f r0.z, r0.z, c6.x
+mul.f r0.y, r0.y, c6.x
max.f r0.x, r0.x, c6.z
-max.f r0.y, r0.y, c6.z
max.f r0.w, r0.w, c6.z
max.f r0.z, r0.z, c6.z
-min.f r0.x, r0.x, c6.y
-min.f r0.y, r0.y, c6.y
-min.f r0.w, r0.w, c6.y
-nop
-mov.f32f32 r1.z, r0.x
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.w
-min.f r0.x, r0.z, c6.y
-(rpt2)nop
-mov.f32f32 r1.w, r0.x
+max.f r0.y, r0.y, c6.z
+(ss)min.f r1.w, r0.x, c6.y
+min.f r1.z, r0.w, c6.y
+min.f r1.y, r0.z, c6.y
+min.f r1.x, r0.y, c6.y
end
nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1) r63.y (5:1,cm=f,il=12,b=1)
-; FRAG: 65 instructions, 0 half, 3 full
+; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1) r1.x (5:1,cm=f,il=12,b=1)
+; FRAG: 31 instructions, 0 half, 2 full
diff --git a/reference/xonotic/xonotic16.asm b/reference/xonotic/xonotic16.asm
index 36bb179..5a863b3 100644
--- a/reference/xonotic/xonotic16.asm
+++ b/reference/xonotic/xonotic16.asm
@@ -6,43 +6,20 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c5.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 0, r0.x
-bary.f r0.w, 3, r0.x
-bary.f (ei)r0.x, 1, r0.x
-mov.f32f32 r0.y, c4.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r1.w, r0.y
-mov.f32f32 r1.x, r0.z
-mov.f32f32 r1.z, r0.w
-mov.f32f32 r1.y, r0.x
-(rpt5)nop
-sam.p (f32)(xyzw)r0.x, r1.x, s#0, t#0
-(sy)mul.f r0.z, r0.z, c4.z
-mul.f r0.y, r0.y, c4.y
-mul.f r0.x, r0.x, c4.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-nop
-(ss)mov.f32f32 r1.z, r0.z
-mov.f32f32 r1.y, r0.y
-mov.f32f32 r1.x, r0.x
+bary.f r0.w, 1, r0.x
+bary.f (ei)r1.x, 3, r0.x
+mov.f32f32 r1.w, c4.w
+(rpt4)nop
+sam.p (f32)(xyzw)r0.x, r0.z, s#0, t#0
+(sy)mul.f r1.z, r0.z, c4.z
+mul.f r1.y, r0.y, c4.y
+(ss)mul.f r1.x, r0.x, c4.x
end
nop
nop
-nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (5:0,cm=f,il=8,b=1)
-; FRAG: 38 instructions, 0 half, 2 full
+; FRAG: inputs: r0.y (5:0,cm=f,il=8,b=1)
+; FRAG: 14 instructions, 0 half, 2 full
diff --git a/reference/xonotic/xonotic17.asm b/reference/xonotic/xonotic17.asm
index e65c627..8afec54 100644
--- a/reference/xonotic/xonotic17.asm
+++ b/reference/xonotic/xonotic17.asm
@@ -6,67 +6,44 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c11.x) 0x40000000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)mov.f32f32 r0.z, c10.x
-bary.f r0.w, 4, r0.x
-bary.f r1.x, 7, r0.x
-bary.f r1.y, 5, r0.x
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r0.z, c10.y
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.w, r0.z
-mov.f32f32 r0.z, c10.w
-mov.f32f32 r2.y, r0.w
-mov.f32f32 r2.z, r1.y
-mov.f32f32 r2.w, r1.x
-mov.f32f32 r2.x, r0.z
-bary.f r0.z, 3, r0.x
-(rpt1)nop
-bary.f r0.w, 2, r0.x
-bary.f r1.x, 1, r0.x
-sam.p (f32)(xyzw)r2.y, r2.y, s#0, t#0
-mov.f32f32 r0.z, r0.z
-sam.p (f32)(xyzw)r1.y, r1.z, s#1, t#1
+mov.f32f32 r0.w, c10.y
+mov.f32f32 r1.x, c10.w
+bary.f r1.y, 4, r0.x
+bary.f r1.z, 5, r0.x
+bary.f r1.w, 7, r0.x
+bary.f r2.x, 3, r0.x
+bary.f r2.y, 2, r0.x
+bary.f r2.z, 1, r0.x
+sam.p (f32)(xyzw)r2.w, r0.z, s#1, t#1
bary.f (ei)r0.x, 0, r0.x
-(sy)mul.f r0.y, r1.w, r0.w
-mul.f r0.w, r1.z, r1.x
-mul.f r0.z, r3.x, r0.z
-mul.f r0.x, r1.y, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-mul.f r0.z, r0.z, c11.y
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-max.f r0.z, r0.z, c11.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.w, r0.w
-min.f r0.z, r0.z, c11.y
-mov.f32f32 r0.x, r0.x
-mul.f r0.y, r2.w, r0.y
-mul.f r0.w, r2.z, r0.w
-(ss)mov.f32f32 r1.w, r0.z
-mul.f r0.x, r2.y, r0.x
+nop
+(sy)mul.f r0.y, r3.y, r2.y
+(ss)nop
+sam.p (f32)(xyzw)r0.z, r1.y, s#0, t#0
+(ss)mul.f r1.z, r3.x, r2.z
+mul.f r0.x, r2.w, r0.x
+(sy)mul.f r1.y, r1.y, r2.x
+mul.f r0.y, r1.x, r0.y
+mul.f r0.w, r0.w, r1.z
+mul.f r0.x, r0.z, r0.x
+mul.f r0.z, r1.y, c11.y
mul.f r0.y, r0.y, c11.x
-mul.f r0.z, r0.w, c11.x
-(rpt1)nop
-max.f r0.y, r0.y, c11.z
-max.f r0.z, r0.z, c11.z
+mul.f r0.w, r0.w, c11.x
mul.f r0.x, r0.x, c11.x
-nop
-min.f r0.y, r0.y, c11.y
-min.f r0.z, r0.z, c11.y
+max.f r0.z, r0.z, c11.z
+max.f r0.y, r0.y, c11.z
+max.f r0.w, r0.w, c11.z
max.f r0.x, r0.x, c11.z
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-min.f r0.x, r0.x, c11.y
-(rpt2)nop
-mov.f32f32 r1.x, r0.x
+min.f r1.w, r0.z, c11.y
+min.f r1.z, r0.y, c11.y
+min.f r1.y, r0.w, c11.y
+min.f r1.x, r0.x, c11.y
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r63.w (1:0,cm=f,il=8,b=1) r0.y (5:1,cm=f,il=12,b=1)
-; FRAG: 64 instructions, 0 half, 4 full
+; FRAG: inputs: r1.y (1:0,cm=f,il=8,b=1) r1.x (5:1,cm=f,il=12,b=1)
+; FRAG: 34 instructions, 0 half, 4 full
diff --git a/reference/xonotic/xonotic18.asm b/reference/xonotic/xonotic18.asm
index 2be832a..04c1964 100644
--- a/reference/xonotic/xonotic18.asm
+++ b/reference/xonotic/xonotic18.asm
@@ -1,17 +1,17 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r3.x) in8
-@in(r3.y) in9
-@in(r3.z) in10
-@in(r3.w) in11
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r3.x) in4
+@in(r3.y) in5
+@in(r3.z) in6
+@in(r3.w) in7
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -24,43 +24,36 @@
@out(r2.y) out9
@out(r2.z) out10
@out(r2.w) out11
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r2.w, r3.w
-mov.f32f32 r2.z, r3.z
-mov.f32f32 r2.y, r3.y
-mov.f32f32 r2.x, r3.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r3.w, c4.x
+max.f r1.y, r3.z, c4.x
+max.f r3.y, r3.y, c4.x
+max.f r3.x, r3.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r3.y, c4.y
+min.f r1.x, r3.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:1)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r3.x (0:0,cm=f,il=16,b=0)
-; VERT: 33 instructions, 0 half, 4 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r3.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0)
+; VERT: 25 instructions, 0 half, 4 full
diff --git a/reference/xonotic/xonotic20.asm b/reference/xonotic/xonotic20.asm
index f8cdc3f..78eedee 100644
--- a/reference/xonotic/xonotic20.asm
+++ b/reference/xonotic/xonotic20.asm
@@ -6,47 +6,24 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x00000000, 0x00000000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 3, r0.x
+bary.f r0.w, 5, r0.x
bary.f r1.x, 7, r0.x
-bary.f r1.y, 5, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r2.x, r0.z
-mov.f32f32 r1.w, r0.w
-mov.f32f32 r2.z, r1.x
-mov.f32f32 r2.y, r1.y
-bary.f r0.z, 2, r0.x
-bary.f r0.w, 1, r0.x
+bary.f r1.w, 3, r0.x
+bary.f r1.y, 2, r0.x
+bary.f r2.x, 1, r0.x
bary.f (ei)r0.x, 0, r0.x
-(rpt2)nop
-sam.p (f32)(xyzw)r2.x, r2.x, s#0, t#0
-(sy)mul.f r0.y, r2.z, r0.z
-mul.f r0.z, r2.y, r0.w
-mul.f r0.x, r2.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
+(rpt1)nop
+sam.p (f32)(xyzw)r0.y, r0.z, s#0, t#0
+(sy)mul.f r1.z, r0.w, r1.y
+mul.f r1.y, r0.z, r2.x
+(ss)mul.f r1.x, r0.y, r0.x
end
nop
nop
nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
-; FRAG: 39 instructions, 0 half, 3 full
+; FRAG: inputs: r1.x (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1)
+; FRAG: 14 instructions, 0 half, 3 full
diff --git a/reference/xonotic/xonotic21.asm b/reference/xonotic/xonotic21.asm
index e5f803f..ca2a36c 100644
--- a/reference/xonotic/xonotic21.asm
+++ b/reference/xonotic/xonotic21.asm
@@ -6,67 +6,44 @@
@out(r1.y) out1
@out(r1.z) out2
@out(r1.w) out3
+@const(c0.x) 0x40800000, 0x3f800000, 0x00000000, 0x00000000
(sy)(ss)bary.f r0.z, 4, r0.x
-bary.f r0.w, 8, r0.x
+bary.f r0.w, 5, r0.x
bary.f r1.x, 7, r0.x
-bary.f r1.y, 5, r0.x
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r1.x, r1.x
-mov.f32f32 r1.y, r1.y
-mov.f32f32 r1.z, r0.z
-mov.f32f32 r2.y, r0.w
-bary.f r0.z, 9, r0.x
-mov.f32f32 r1.w, r1.y
-mov.f32f32 r2.x, r1.x
-bary.f r0.w, 11, r0.x
-mov.f32f32 r0.z, r0.z
-bary.f r1.x, 3, r0.x
-bary.f r1.y, 2, r0.x
-bary.f r2.w, 1, r0.x
+bary.f r1.y, 8, r0.x
+bary.f r1.z, 9, r0.x
+bary.f r1.w, 11, r0.x
+bary.f r2.x, 3, r0.x
+bary.f r2.y, 2, r0.x
+bary.f r2.z, 1, r0.x
+sam.p (f32)(xyzw)r2.w, r0.z, s#1, t#1
bary.f (ei)r0.x, 0, r0.x
-sam.p (f32)(xyzw)r3.x, r1.z, s#1, t#1
-mov.f32f32 r2.z, r0.z
-(sy)mul.f r0.y, r3.z, r1.y
-mul.f r0.z, r3.y, r2.w
-mul.f r0.x, r3.x, r0.x
-mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r2.w, r0.w
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.x, r0.x
-mov.f32f32 r0.w, r1.x
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.z, r0.z
+nop
+(sy)mul.f r0.y, r3.y, r2.y
(ss)nop
-sam.p (f32)(xyzw)r1.x, r2.y, s#0, t#0
-mov.f32f32 r0.x, r0.x
-(sy)mul.f r0.w, r1.w, r0.w
-mul.f r0.y, r1.z, r0.y
-mul.f r0.z, r1.y, r0.z
-mul.f r0.x, r1.x, r0.x
-mul.f r0.w, r0.w, c0.y
+sam.p (f32)(xyzw)r0.z, r1.y, s#0, t#0
+(ss)mul.f r1.z, r3.x, r2.z
+mul.f r0.x, r2.w, r0.x
+(sy)mul.f r1.y, r1.y, r2.x
+mul.f r0.y, r1.x, r0.y
+mul.f r0.w, r0.w, r1.z
+mul.f r0.x, r0.z, r0.x
+mul.f r0.z, r1.y, c0.y
mul.f r0.y, r0.y, c0.x
-mul.f r0.z, r0.z, c0.x
+mul.f r0.w, r0.w, c0.x
mul.f r0.x, r0.x, c0.x
-max.f r0.w, r0.w, c0.z
-max.f r0.y, r0.y, c0.z
max.f r0.z, r0.z, c0.z
+max.f r0.y, r0.y, c0.z
+max.f r0.w, r0.w, c0.z
max.f r0.x, r0.x, c0.z
-min.f r0.w, r0.w, c0.y
-min.f r0.y, r0.y, c0.y
-min.f r0.z, r0.z, c0.y
-min.f r0.x, r0.x, c0.y
-nop
-mov.f32f32 r1.z, r0.y
-mov.f32f32 r1.y, r0.z
-mov.f32f32 r1.x, r0.x
-mov.f32f32 r1.w, r0.w
+min.f r1.w, r0.z, c0.y
+min.f r1.z, r0.y, c0.y
+min.f r1.y, r0.w, c0.y
+min.f r1.x, r0.x, c0.y
end
+nop
+nop
; FRAG: outputs: r1.x (1:0)
-; FRAG: inputs: r0.y (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) r0.x (5:1,cm=f,il=16,b=1)
-; FRAG: 60 instructions, 0 half, 4 full
+; FRAG: inputs: r1.y (1:0,cm=f,il=8,b=1) r0.y (5:0,cm=f,il=12,b=1) r1.x (5:1,cm=f,il=16,b=1)
+; FRAG: 34 instructions, 0 half, 4 full
diff --git a/reference/xonotic/xonotic22.asm b/reference/xonotic/xonotic22.asm
index 9446af7..c880414 100644
--- a/reference/xonotic/xonotic22.asm
+++ b/reference/xonotic/xonotic22.asm
@@ -1,21 +1,21 @@
; options:
; VERT: new compiler
-@in(r2.x) in0
-@in(r2.y) in1
-@in(r2.z) in2
-@in(r2.w) in3
-@in(r0.x) in4
-@in(r0.y) in5
-@in(r0.z) in6
-@in(r0.w) in7
-@in(r4.x) in8
-@in(r4.y) in9
-@in(r4.z) in10
-@in(r4.w) in11
-@in(r5.x) in12
-@in(r5.y) in13
-@in(r5.z) in14
-@in(r5.w) in15
+@in(r1.x) in0
+@in(r1.y) in1
+@in(r1.z) in2
+@in(r1.w) in3
+@in(r4.x) in4
+@in(r4.y) in5
+@in(r4.z) in6
+@in(r4.w) in7
+@in(r2.x) in8
+@in(r2.y) in9
+@in(r2.z) in10
+@in(r2.w) in11
+@in(r3.x) in12
+@in(r3.y) in13
+@in(r3.z) in14
+@in(r3.w) in15
@out(r0.x) out0
@out(r0.y) out1
@out(r0.z) out2
@@ -32,47 +32,36 @@
@out(r3.y) out13
@out(r3.z) out14
@out(r3.w) out15
-(sy)(ss)mov.f32f32 r0.w, r0.w
-mov.f32f32 r0.z, r0.z
-mov.f32f32 r0.y, r0.y
-mov.f32f32 r0.x, r0.x
-max.f r0.w, r0.w, c4.x
-max.f r0.z, r0.z, c4.x
-max.f r0.y, r0.y, c4.x
-max.f r0.x, r0.x, c4.x
-min.f r1.w, r0.w, c4.y
-min.f r1.z, r0.z, c4.y
-min.f r1.y, r0.y, c4.y
-min.f r1.x, r0.x, c4.y
-mul.f r0.x, r2.x, c0.w
-mul.f r0.y, r2.x, c0.z
-mad.f32 r0.x, c1.w, r2.y, r0.x
-mad.f32 r0.y, c1.z, r2.y, r0.y
-mad.f32 r0.x, c2.w, r2.z, r0.x
-mad.f32 r0.y, c2.z, r2.z, r0.y
-mad.f32 r0.w, c3.w, r2.w, r0.x
-mad.f32 r0.z, c3.z, r2.w, r0.y
-mul.f r0.x, r2.x, c0.y
-mul.f r0.y, r2.x, c0.x
-mad.f32 r0.x, c1.y, r2.y, r0.x
-mad.f32 r0.y, c1.x, r2.y, r0.y
-mad.f32 r0.x, c2.y, r2.z, r0.x
-mad.f32 r2.x, c2.x, r2.z, r0.y
-mad.f32 r0.y, c3.y, r2.w, r0.x
-mad.f32 r0.x, c3.x, r2.w, r2.x
-mov.f32f32 r3.w, r5.w
-mov.f32f32 r3.z, r5.z
-mov.f32f32 r3.y, r5.y
-mov.f32f32 r3.x, r5.x
-mov.f32f32 r2.w, r4.w
-mov.f32f32 r2.z, r4.z
-mov.f32f32 r2.y, r4.y
-mov.f32f32 r2.x, r4.x
+@const(c4.x) 0x00000000, 0x3f800000, 0x00000000, 0x00000000
+(sy)(ss)mul.f r0.x, r1.x, c0.w
+mul.f r0.y, r1.x, c0.z
+mad.f32 r0.x, c1.w, r1.y, r0.x
+mad.f32 r0.y, c1.z, r1.y, r0.y
+mad.f32 r0.x, c2.w, r1.z, r0.x
+mad.f32 r0.y, c2.z, r1.z, r0.y
+mad.f32 r0.w, c3.w, r1.w, r0.x
+mad.f32 r0.z, c3.z, r1.w, r0.y
+mul.f r0.x, r1.x, c0.y
+mul.f r0.y, r1.x, c0.x
+mad.f32 r0.x, c1.y, r1.y, r0.x
+mad.f32 r0.y, c1.x, r1.y, r0.y
+mad.f32 r0.x, c2.y, r1.z, r0.x
+mad.f32 r1.x, c2.x, r1.z, r0.y
+mad.f32 r0.y, c3.y, r1.w, r0.x
+mad.f32 r0.x, c3.x, r1.w, r1.x
+max.f r1.x, r4.w, c4.x
+max.f r1.y, r4.z, c4.x
+max.f r4.y, r4.y, c4.x
+max.f r4.x, r4.x, c4.x
+min.f r1.w, r1.x, c4.y
+min.f r1.z, r1.y, c4.y
+min.f r1.y, r4.y, c4.y
+min.f r1.x, r4.x, c4.y
end
nop
nop
nop
; VERT: outputs: r0.x (0:0) r1.x (1:0) r2.x (5:0) r3.x (5:1)
-; VERT: inputs: r2.x (0:0,cm=f,il=8,b=0) r0.x (0:0,cm=f,il=12,b=0) r4.x (0:0,cm=f,il=16,b=0) r5.x (0:0,cm=f,il=20,b=0)
-; VERT: 37 instructions, 0 half, 6 full
+; VERT: inputs: r1.x (0:0,cm=f,il=8,b=0) r4.x (0:0,cm=f,il=12,b=0) r2.x (0:0,cm=f,il=16,b=0) r3.x (0:0,cm=f,il=20,b=0)
+; VERT: 25 instructions, 0 half, 5 full