diff options
author | Wang Zhenyu <zhenyu.z.wang@intel.com> | 2006-09-25 14:15:21 +0800 |
---|---|---|
committer | Wang Zhenyu <zhenyu.z.wang@intel.com> | 2006-09-25 14:15:21 +0800 |
commit | f272f0d811f9ee059e8f8617a516e6c8bff917a1 (patch) | |
tree | c3d8966fe559c7d780cd2ce3b0b7a401705ecbd7 | |
parent | ff202f8e2ae4117b464c94047001023d5d0531cc (diff) | |
parent | b3ddcf3348365873aed4a2b8b2750b330fb4cf26 (diff) |
Merge branch 'master' into exa
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | man/i810.man | 2 | ||||
-rw-r--r-- | src/Makefile.am | 12 | ||||
-rw-r--r-- | src/i810_reg.h | 11 | ||||
-rw-r--r-- | src/i830_driver.c | 29 | ||||
-rw-r--r-- | src/i830_memory.c | 4 | ||||
-rw-r--r-- | src/i830_video.c | 39 | ||||
-rw-r--r-- | src/packed_yuv_sf.g4a | 17 | ||||
-rw-r--r-- | src/packed_yuv_wm.g4a | 161 | ||||
-rw-r--r-- | src/sf_prog.h | 17 | ||||
-rw-r--r-- | src/wm_prog.h | 110 |
11 files changed, 249 insertions, 156 deletions
diff --git a/configure.ac b/configure.ac index 436d7e5c..a27822e5 100644 --- a/configure.ac +++ b/configure.ac @@ -49,6 +49,9 @@ AC_DISABLE_STATIC AC_PROG_LIBTOOL AC_PROG_CC +AC_CHECK_PROG(gen4asm, [intel-gen4asm], yes, no) +AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes) + AH_TOP([#include "xorg-server.h"]) AC_ARG_WITH(xorg-module-dir, diff --git a/man/i810.man b/man/i810.man index e396e52f..509ffbcb 100644 --- a/man/i810.man +++ b/man/i810.man @@ -49,7 +49,7 @@ DRI is enabled. This amount may be changed with the entry in the config file .B "Device" section. It may be set to any reasonable value up to 64MB for older -chipsets or 128MB for newer chipets. It is advisable to check the +chipsets or 128MB for newer chipsets. It is advisable to check the __xservername__ log file to check if any features have been disabled because of insufficient video memory. In particular, DRI support or tiling mode may be disabled diff --git a/src/Makefile.am b/src/Makefile.am index e58350c9..163cc3f4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -33,6 +33,7 @@ i810_drv_ladir = @moduledir@/drivers i810_drv_la_SOURCES = \ brw_defines.h \ brw_structs.h \ + sf_prog.h \ wm_prog.h \ common.h \ i810_accel.c \ @@ -70,6 +71,13 @@ i810_drv_la_SOURCES = \ i830_exa_render.c \ i915_exa_render.c +if HAVE_GEN4ASM +sf_prog.h: packed_yuv_sf.g4a + intel-gen4asm -o sf_prog.h packed_yuv_sf.g4a +wm_prog.h: packed_yuv_wm.g4a + intel-gen4asm -o wm_prog.h packed_yuv_wm.g4a +endif + if DRI i810_drv_la_SOURCES += \ i810_dri.c \ @@ -78,3 +86,7 @@ i810_drv_la_SOURCES += \ i810_hwmc.c \ i830_dri.h endif + +EXTRA_DIST = \ + packed_yuv_sf.g4a \ + packed_yuv_wm.g4a diff --git a/src/i810_reg.h b/src/i810_reg.h index 05710c48..4bb5bf14 100644 --- a/src/i810_reg.h +++ b/src/i810_reg.h @@ -1238,12 +1238,11 @@ typedef enum { TvIndex, DfpIndex, LfpIndex, + Crt2Index, Tv2Index, Dfp2Index, - UnknownIndex, - Unknown2Index, - NumDisplayTypes, - NumKnownDisplayTypes = UnknownIndex + Lfp2Index, + NumDisplayTypes } DisplayType; /* What's connected to the pipes (as reported by the BIOS) */ @@ -1252,10 +1251,10 @@ typedef enum { #define PIPE_TV_ACTIVE (1 << TvIndex) #define PIPE_DFP_ACTIVE (1 << DfpIndex) #define PIPE_LCD_ACTIVE (1 << LfpIndex) +#define PIPE_CRT2_ACTIVE (1 << Crt2Index) #define PIPE_TV2_ACTIVE (1 << Tv2Index) #define PIPE_DFP2_ACTIVE (1 << Dfp2Index) -#define PIPE_UNKNOWN_ACTIVE ((1 << UnknownIndex) | \ - (1 << Unknown2Index)) +#define PIPE_LCD2_ACTIVE (1 << Lfp2Index) #define PIPE_SIZED_DISP_MASK (PIPE_DFP_ACTIVE | \ PIPE_LCD_ACTIVE | \ diff --git a/src/i830_driver.c b/src/i830_driver.c index 4d9de0cb..f9ca77cf 100644 --- a/src/i830_driver.c +++ b/src/i830_driver.c @@ -2870,12 +2870,14 @@ GetDisplayInfo(ScrnInfoPtr pScrn, int device, Bool *attached, Bool *present, DPRINTF(PFX, "GetDisplayInfo: device: 0x%x\n", device); switch (device & 0xff) { - case 0x01: - case 0x02: - case 0x04: - case 0x08: - case 0x10: - case 0x20: + case PIPE_CRT: + case PIPE_TV: + case PIPE_DFP: + case PIPE_LFP: + case PIPE_CRT2: + case PIPE_TV2: + case PIPE_DFP2: + case PIPE_LFP2: break; default: xf86DrvMsg(pScrn->scrnIndex, X_ERROR, @@ -2982,10 +2984,6 @@ PrintDisplayDeviceInfo(ScrnInfoPtr pScrn) name = DeviceToString(-1); } while (name); - if (pipe & PIPE_UNKNOWN_ACTIVE) - xf86DrvMsg(pScrn->scrnIndex, X_INFO, - "\tSome unknown display devices may also be present\n"); - } else { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "No active displays on Pipe %c.\n", PIPE_NAME(n)); @@ -3018,7 +3016,7 @@ GetPipeSizes(ScrnInfoPtr pScrn) pipe = (pI830->operatingDevices >> PIPE_SHIFT(n)) & PIPE_ACTIVE_MASK; pI830->pipeDisplaySize[n].x1 = pI830->pipeDisplaySize[n].y1 = 0; pI830->pipeDisplaySize[n].x2 = pI830->pipeDisplaySize[n].y2 = 4096; - for (i = 0; i < NumKnownDisplayTypes; i++) { + for (i = 0; i < NumDisplayTypes; i++) { if (pipe & (1 << i) & PIPE_SIZED_DISP_MASK) { if (pI830->displaySize[i].x2 != 0) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, @@ -3055,7 +3053,7 @@ I830DetectDisplayDevice(ScrnInfoPtr pScrn) "\t If you encounter this problem please add \n" "\t\t Option \"DisplayInfo\" \"FALSE\"\n" "\t to the Device section of your XF86Config file.\n"); - for (i = 0; i < NumKnownDisplayTypes; i++) { + for (i = 0; i < NumDisplayTypes; i++) { if (GetDisplayInfo(pScrn, 1 << i, &pI830->displayAttached[i], &pI830->displayPresent[i], &pI830->displaySize[i].x2, @@ -7002,6 +7000,7 @@ I830InitFBManager( BoxPtr FullBox ){ ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + I830Ptr pI830 = I830PTR(pScrn); RegionRec ScreenRegion; RegionRec FullRegion; BoxRec ScreenBox; @@ -7010,7 +7009,7 @@ I830InitFBManager( ScreenBox.x1 = 0; ScreenBox.y1 = 0; ScreenBox.x2 = pScrn->displayWidth; - if (pScrn->virtualX > pScrn->virtualY) + if (!pI830->MergedFB && pScrn->virtualX > pScrn->virtualY) ScreenBox.y2 = pScrn->virtualX; else ScreenBox.y2 = pScrn->virtualY; @@ -8161,13 +8160,15 @@ I830BIOSEnterVT(int scrnIndex, int flags) #ifdef XF86DRI if (pI830->directRenderingEnabled) { + + I830DRISetVBlankInterrupt (pScrn, TRUE); + if (!pI830->starting) { ScreenPtr pScreen = pScrn->pScreen; drmI830Sarea *sarea = (drmI830Sarea *) DRIGetSAREAPrivate(pScreen); int i; I830DRIResume(screenInfo.screens[scrnIndex]); - I830DRISetVBlankInterrupt (pScrn, TRUE); I830RefreshRing(pScrn); I830Sync(pScrn); diff --git a/src/i830_memory.c b/src/i830_memory.c index 88dc5acb..e3307d6f 100644 --- a/src/i830_memory.c +++ b/src/i830_memory.c @@ -783,7 +783,7 @@ I830Allocate2DMemory(ScrnInfoPtr pScrn, const int flags) pI830->FbMemBox.x1 = 0; pI830->FbMemBox.x2 = pScrn->displayWidth; pI830->FbMemBox.y1 = 0; - if (pScrn->virtualX > pScrn->virtualY) + if (!pI830->MergedFB && pScrn->virtualX > pScrn->virtualY) pI830->FbMemBox.y2 = pScrn->virtualX; else pI830->FbMemBox.y2 = pScrn->virtualY; @@ -855,7 +855,7 @@ I830Allocate2DMemory(ScrnInfoPtr pScrn, const int flags) } #if 1 /* ROTATION */ - if (pScrn->virtualX > pScrn->virtualY) + if (!pI830->MergedFB && pScrn->virtualX > pScrn->virtualY) size = lineSize * (pScrn->virtualX + cacheLines); else size = lineSize * (pScrn->virtualY + cacheLines); diff --git a/src/i830_video.c b/src/i830_video.c index 1c5bf4eb..4d15eebc 100644 --- a/src/i830_video.c +++ b/src/i830_video.c @@ -130,9 +130,9 @@ static Atom xvBrightness, xvContrast, xvColorKey, xvPipe, xvDoubleBuffer; static Atom xvGamma0, xvGamma1, xvGamma2, xvGamma3, xvGamma4, xvGamma5; #define IMAGE_MAX_WIDTH 1920 -#define IMAGE_MAX_HEIGHT 1080 +#define IMAGE_MAX_HEIGHT 1088 #define IMAGE_MAX_WIDTH_LEGACY 1024 -#define IMAGE_MAX_HEIGHT_LEGACY 1080 +#define IMAGE_MAX_HEIGHT_LEGACY 1088 /* * Broadwater requires a bit of extra video memory for state information @@ -2128,40 +2128,7 @@ static const CARD32 sip_kernel_static[][4] = { #define SF_MAX_THREADS 1 static const CARD32 sf_kernel_static[][4] = { -/* send 0 (1) g6<1>F g1.12<0,1,0>F math mlen 1 rlen 1 { align1 + } */ - { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 }, -/* send 0 (1) g6.4<1>F g1.20<0,1,0>F math mlen 1 rlen 1 { align1 + } */ - { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 }, -/* add (8) g7<1>F g4<8,8,1>F g3<8,8,1>F { align1 + } */ - { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 }, -/* mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 + } */ - { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 }, -/* mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 + } */ - { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 }, -/* mov (8) m1<1>F g7<0,1,0>F { align1 + } */ - { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 }, -/* mov (8) m2<1>F g7.4<0,1,0>F { align1 + } */ - { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 }, -/* mov (8) m3<1>F g3<8,8,1>F { align1 + } */ - { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 }, -/* send 0 (8) a0<1>F g0<8,8,1>F urb mlen 4 rlen 0 write +0 transpose used complete EOT{ align1 + } */ - { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, +#include "sf_prog.h" }; /* diff --git a/src/packed_yuv_sf.g4a b/src/packed_yuv_sf.g4a new file mode 100644 index 00000000..8c1398f4 --- /dev/null +++ b/src/packed_yuv_sf.g4a @@ -0,0 +1,17 @@ +send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 }; +add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 }; +mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 }; +mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 }; +mov (8) m1<1>F g7<0,1,0>F { align1 }; +mov (8) m2<1>F g7.4<0,1,0>F { align1 }; +mov (8) m3<1>F g3<8,8,1>F { align1 }; +send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT }; +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/packed_yuv_wm.g4a b/src/packed_yuv_wm.g4a new file mode 100644 index 00000000..d312d170 --- /dev/null +++ b/src/packed_yuv_wm.g4a @@ -0,0 +1,161 @@ +/* The initial payload of the thread is always g0. + * WM_URB (incoming URB entries) is g3 + * X0_R is g4 + * X1_R is g5 + * Y0_R is g6 + * Y1_R is g7 + */ + + /* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each + * subspan is a 2x2 rectangle, and the screen x/y of the upper left of each + * subspan are given in GRF register 1.2 through 1.5 (which, with the word + * addressing below, are 1.4 through 1.11). + * + * The result is WM_X*_R and WM_Y*R being: + * + * X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y} + * Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1} + * X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y} + * Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1} + */ + + /* Set up ss0.x coordinates*/ +mov (1) g4<1>F g1.8<0,1,0>UW { align1 }; +add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 }; +mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 }; +add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 }; + /* Set up ss0.y coordinates */ +mov (1) g6<1>F g1.10<0,1,0>UW { align1 }; +mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 }; +add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 }; +add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 }; + /* set up ss1.x coordinates */ +mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 }; +add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 }; +mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 }; +add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 }; + /* set up ss1.y coordinates */ +mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 }; +mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 }; +add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 }; +add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 }; + /* Set up ss2.x coordinates */ +mov (1) g5<1>F g1.16<0,1,0>UW { align1 }; +add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 }; +mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 }; +add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 }; + /* Set up ss2.y coordinates */ +mov (1) g7<1>F g1.18<0,1,0>UW { align1 }; +mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 }; +add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 }; +add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 }; + /* Set up ss3.x coordinates */ +mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 }; +add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 }; +mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 }; +add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 }; + /* Set up ss3.y coordinates */ +mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 }; +mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 }; +add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 }; +add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 }; + + /* Now, map these screen space coordinates into texture coordinates. */ + /* subtract screen-space X origin of vertex 0. */ +add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 }; +add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 }; + /* scale by texture X increment */ +mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 }; +mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 }; + /* add in texture X offset */ +add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 }; +add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 }; + /* subtract screen-space Y origin of vertex 0. */ +add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 }; +add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 }; + /* scale by texture Y increment */ +mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 }; +mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 }; + /* add in texture Y offset */ +add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 }; +add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 }; + /* sampler */ +mov (8) m1<1>F g4<8,8,1>F { align1 }; +mov (8) m2<1>F g5<8,8,1>F { align1 }; +mov (8) m3<1>F g6<8,8,1>F { align1 }; +mov (8) m4<1>F g7<8,8,1>F { align1 }; + + /* + * g0 holds the PS thread payload, which (oddly) contains + * precisely what the sampler wants to see in m0 + */ +send (16) 0 g12<1>UW g0<8,8,1>UW sampler (1,0,F) mlen 5 rlen 8 { align1 }; +mov (8) g19<1>UW g19<8,8,1>UW { align1 }; + + /* color space conversion function: + * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) + * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) + * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) + * + * Y is g14, g15. + * Cr is g12, g13. + * Cb is g16, g17. + * + * R is g2, g6. + * G is g3, g7. + * B is g4, g8. + */ + /* Y = Y - 16/255 */ +add (8) g14<1>F g14<8,8,1>F -0.0627451F { align1 }; + /* Cr = Cr - 128/255 */ +add (8) g12<1>F g12<8,8,1>F -0.501961F { align1 }; + /* Cb = Cb - 128 / 255 */ +add (8) g16<1>F g16<8,8,1>F -0.501961F { align1 }; + /* Y = Y * 1.164 */ +mul (8) g14<1>F g14<8,8,1>F 1.164F { align1 }; + /* acc = 1.596 * Cr */ +mul (8) null g12<8,8,1>F 1.596F { align1 }; + /* R = acc + Y */ +mac.sat (8) m2<1>F g14<8,8,1>F 1F { align1 }; + /* acc = Cr * -0.813 */ +mul (8) null g12<8,8,1>F -0.813F { align1 }; + /* acc += Cb * -0.392 */ +mac (8) null g16<8,8,1>F -0.392F { align1 }; + /* G = acc + Y */ +mac.sat (8) m3<1>F g14<8,8,1>F 1F { align1 }; + /* acc = Cb * 2.017 */ +mul (8) null g16<8,8,1>F 2.017F { align1 }; + /* B = acc + Y */ +mac.sat (8) m4<1>F g14<8,8,1>F 1F { align1 }; + /* and do it again */ +add (8) g15<1>F g15<8,8,1>F -0.0627451F { align1 }; +add (8) g13<1>F g13<8,8,1>F -0.501961F { align1 }; +add (8) g17<1>F g17<8,8,1>F -0.501961F { align1 }; +mul (8) g15<1>F g15<8,8,1>F 1.164F { align1 }; +mul (8) null g13<8,8,1>F 1.596F { align1 }; +mac.sat (8) m6<1>F g15<8,8,1>F 1F { align1 }; +mul (8) null g13<8,8,1>F -0.813F { align1 }; +mac (8) null g17<8,8,1>F -0.392F { align1 }; +mac.sat (8) m7<1>F g15<8,8,1>F 1F { align1 }; +mul (8) null g17<8,8,1>F 2.017F { align1 }; +mac.sat (8) m8<1>F g15<8,8,1>F 1F { align1 }; + + /* Pass through control information: + */ +mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable }; + /* Send framebuffer write message: XXX: acc0? */ +send (16) 0 null g0<8,8,1>UW write ( + 0, /* binding table index 0 */ + 8, /* pixel scoreboard clear */ + 4, /* render target write */ + 0 /* no write commit message */ + ) mlen 10 rlen 0 { align1 EOT }; + /* padding */ +nop; +nop; +nop; +nop; +nop; +nop; +nop; +nop; diff --git a/src/sf_prog.h b/src/sf_prog.h new file mode 100644 index 00000000..830d1760 --- /dev/null +++ b/src/sf_prog.h @@ -0,0 +1,17 @@ + { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 }, + { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 }, + { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 }, + { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 }, + { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 }, + { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 }, + { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 }, + { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 }, + { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/wm_prog.h b/src/wm_prog.h index 297ddcbf..708e6eb6 100644 --- a/src/wm_prog.h +++ b/src/wm_prog.h @@ -1,166 +1,82 @@ -/* wm_program */ -/* mov (1) g4<1>F g1.8<0,1,0>UW { align1 + } */ { 0x00000001, 0x2080013d, 0x00000028, 0x00000000 }, -/* add (1) g4.4<1>F g1.8<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 }, -/* mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 + } */ { 0x00000001, 0x2088013d, 0x00000028, 0x00000000 }, -/* add (1) g4.12<1>F g1.8<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 }, -/* mov (1) g6<1>F g1.10<0,1,0>UW { align1 + } */ { 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 }, -/* mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 + } */ { 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 }, -/* add (1) g6.8<1>F g1.10<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 }, -/* add (1) g6.12<1>F g1.10<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 }, -/* mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 + } */ { 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 }, -/* add (1) g4.20<1>F g1.12<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 }, -/* mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 + } */ { 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 }, -/* add (1) g4.28<1>F g1.12<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 }, -/* mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 + } */ { 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 }, -/* mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 + } */ { 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 }, -/* add (1) g6.24<1>F g1.14<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 }, -/* add (1) g6.28<1>F g1.14<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 }, -/* mov (1) g5<1>F g1.16<0,1,0>UW { align1 + } */ { 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 }, -/* add (1) g5.4<1>F g1.16<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 }, -/* mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 + } */ { 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 }, -/* add (1) g5.12<1>F g1.16<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 }, -/* mov (1) g7<1>F g1.18<0,1,0>UW { align1 + } */ { 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 }, -/* mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 + } */ { 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 }, -/* add (1) g7.8<1>F g1.18<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 }, -/* add (1) g7.12<1>F g1.18<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 }, -/* mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 + } */ { 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 }, -/* add (1) g5.20<1>F g1.20<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 }, -/* mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 + } */ { 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 }, -/* add (1) g5.28<1>F g1.20<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 }, -/* mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 + } */ { 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 }, -/* mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 + } */ { 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 }, -/* add (1) g7.24<1>F g1.22<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 }, -/* add (1) g7.28<1>F g1.22<0,1,0>UW 1 { align1 + } */ { 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 }, -/* add (8) g4<1>F g4<8,8,1>F g1<0,1,0>F { align1 + } */ { 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 }, -/* add (8) g5<1>F g5<8,8,1>F g1<0,1,0>F { align1 + } */ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 }, -/* mul (8) g4<1>F g4<8,8,1>F g3<0,1,0>F { align1 + } */ { 0x00600041, 0x208077bd, 0x008d0080, 0x00000060 }, -/* mul (8) g5<1>F g5<8,8,1>F g3<0,1,0>F { align1 + } */ { 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000060 }, -/* add (8) g4<1>F g4<8,8,1>F g3.12<0,1,0>F { align1 + } */ { 0x00600040, 0x208077bd, 0x008d0080, 0x0000006c }, -/* add (8) g5<1>F g5<8,8,1>F g3.12<0,1,0>F { align1 + } */ { 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000006c }, -/* add (8) g6<1>F g6<8,8,1>F g1.4<0,1,0>F { align1 + } */ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 }, -/* add (8) g7<1>F g7<8,8,1>F g1.4<0,1,0>F { align1 + } */ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 }, -/* mul (8) g6<1>F g6<8,8,1>F g3.20<0,1,0>F { align1 + } */ { 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000074 }, -/* mul (8) g7<1>F g7<8,8,1>F g3.20<0,1,0>F { align1 + } */ { 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000074 }, -/* add (8) g6<1>F g6<8,8,1>F g3.28<0,1,0>F { align1 + } */ { 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000007c }, -/* add (8) g7<1>F g7<8,8,1>F g3.28<0,1,0>F { align1 + } */ { 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000007c }, -/* mov (8) m1<1>F g4<8,8,1>F { align1 + } */ { 0x00600001, 0x202003be, 0x008d0080, 0x00000000 }, -/* mov (8) m2<1>F g5<8,8,1>F { align1 + } */ { 0x00600001, 0x204003be, 0x008d00a0, 0x00000000 }, -/* mov (8) m3<1>F g6<8,8,1>F { align1 + } */ { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 }, -/* mov (8) m4<1>F g7<8,8,1>F { align1 + } */ { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 }, -/* send 0 (16) g12<1>UW g0<8,8,1>UW sampler mlen 5 rlen 8 { align1 + } */ { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 }, -/* mov (8) g19<1>UW g19<8,8,1>UW { align1 + } */ { 0x00600001, 0x22600129, 0x008d0260, 0x00000000 }, -/* add (8) g14<1>F g14<8,8,1>F -0.0627451{ align1 + } */ { 0x00600040, 0x21c07fbd, 0x008d01c0, 0xbd808081 }, -/* add (8) g12<1>F g12<8,8,1>F -0.501961{ align1 + } */ - { 0x00600040, 0x21807fbd, 0x008d0180, 0xbf008081 }, -/* add (8) g16<1>F g16<8,8,1>F -0.501961{ align1 + } */ - { 0x00600040, 0x22007fbd, 0x008d0200, 0xbf008081 }, -/* mul (8) g14<1>F g14<8,8,1>F 1.164{ align1 + } */ + { 0x00600040, 0x21807fbd, 0x008d0180, 0xbf008084 }, + { 0x00600040, 0x22007fbd, 0x008d0200, 0xbf008084 }, { 0x00600041, 0x21c07fbd, 0x008d01c0, 0x3f94fdf4 }, -/* mul (8) a0<1>F g12<8,8,1>F 1.596{ align1 + } */ { 0x00600041, 0x20007fbc, 0x008d0180, 0x3fcc49ba }, -/* mac (8) m2<1>F g14<8,8,1>F 1{ align1 + Saturate } */ { 0x80600048, 0x20407fbe, 0x008d01c0, 0x3f800000 }, -/* mul (8) a0<1>F g12<8,8,1>F -0.813{ align1 + } */ { 0x00600041, 0x20007fbc, 0x008d0180, 0xbf5020c5 }, -/* mac (8) a0<1>F g16<8,8,1>F -0.392{ align1 + } */ { 0x00600048, 0x20007fbc, 0x008d0200, 0xbec8b439 }, -/* mac (8) m3<1>F g14<8,8,1>F 1{ align1 + Saturate } */ { 0x80600048, 0x20607fbe, 0x008d01c0, 0x3f800000 }, -/* mul (8) a0<1>F g16<8,8,1>F 2.017{ align1 + } */ { 0x00600041, 0x20007fbc, 0x008d0200, 0x40011687 }, -/* mac (8) m4<1>F g14<8,8,1>F 1{ align1 + Saturate } */ { 0x80600048, 0x20807fbe, 0x008d01c0, 0x3f800000 }, -/* add (8) g15<1>F g15<8,8,1>F -0.0627451{ align1 + } */ { 0x00600040, 0x21e07fbd, 0x008d01e0, 0xbd808081 }, -/* add (8) g13<1>F g13<8,8,1>F -0.501961{ align1 + } */ - { 0x00600040, 0x21a07fbd, 0x008d01a0, 0xbf008081 }, -/* add (8) g17<1>F g17<8,8,1>F -0.501961{ align1 + } */ - { 0x00600040, 0x22207fbd, 0x008d0220, 0xbf008081 }, -/* mul (8) g15<1>F g15<8,8,1>F 1.164{ align1 + } */ + { 0x00600040, 0x21a07fbd, 0x008d01a0, 0xbf008084 }, + { 0x00600040, 0x22207fbd, 0x008d0220, 0xbf008084 }, { 0x00600041, 0x21e07fbd, 0x008d01e0, 0x3f94fdf4 }, -/* mul (8) a0<1>F g13<8,8,1>F 1.596{ align1 + } */ { 0x00600041, 0x20007fbc, 0x008d01a0, 0x3fcc49ba }, -/* mac (8) m6<1>F g15<8,8,1>F 1{ align1 + Saturate } */ { 0x80600048, 0x20c07fbe, 0x008d01e0, 0x3f800000 }, -/* mul (8) a0<1>F g13<8,8,1>F -0.813{ align1 + } */ { 0x00600041, 0x20007fbc, 0x008d01a0, 0xbf5020c5 }, -/* mac (8) a0<1>F g17<8,8,1>F -0.392{ align1 + } */ { 0x00600048, 0x20007fbc, 0x008d0220, 0xbec8b439 }, -/* mac (8) m7<1>F g15<8,8,1>F 1{ align1 + Saturate } */ { 0x80600048, 0x20e07fbe, 0x008d01e0, 0x3f800000 }, -/* mul (8) a0<1>F g17<8,8,1>F 2.017{ align1 + } */ { 0x00600041, 0x20007fbc, 0x008d0220, 0x40011687 }, -/* mac (8) m8<1>F g15<8,8,1>F 1{ align1 + Saturate } */ { 0x80600048, 0x21007fbe, 0x008d01e0, 0x3f800000 }, -/* mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable + } */ { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, -/* send 0 (16) a0<1>UW g0<8,8,1>UW write mlen 10 rlen 0 EOT{ align1 + } */ - { 0x00800031, 0x20001d28, 0x008d0000, 0x85a04800 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, -/* nop (4) g0<1>UD { align1 + } */ - { 0x0040007e, 0x20000c21, 0x00690000, 0x00000000 }, - + { 0x00800031, 0x20001d3c, 0x008d0000, 0x85a04800 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, |