summaryrefslogtreecommitdiff
path: root/src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm')
-rw-r--r--src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm72
1 files changed, 72 insertions, 0 deletions
diff --git a/src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm b/src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm
new file mode 100644
index 0000000..7429790
--- /dev/null
+++ b/src/shaders/post_processing/Core_Kernels/RGB_Scaling.asm
@@ -0,0 +1,72 @@
+/*
+ * All Video Processing kernels
+ * Copyright © <2010>, Intel Corporation.
+ *
+ * This program is licensed under the terms and conditions of the
+ * Eclipse Public License (EPL), version 1.0. The full text of the EPL is at
+ * http://www.opensource.org/licenses/eclipse-1.0.php.
+ *
+ */
+
+//---------- RGB_Scaling.asm ----------
+#include "Scaling.inc"
+
+ // Build 16 elements ramp in float32 and normalized it
+// mov (8) SAMPLER_RAMP(0)<1> 0x76543210:v
+// add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f
+mov (4) SAMPLER_RAMP(0)<1> 0x48403000:vf //3, 2, 1, 0 in float vector
+mov (4) SAMPLER_RAMP(0,4)<1> 0x5C585450:vf //7, 6, 5, 4 in float vector
+add (8) SAMPLER_RAMP(1)<1> SAMPLER_RAMP(0) 8.0:f
+
+//Module: PrepareScaleCoord.asm
+
+ // Setup for sampler msg hdr
+ mov (2) rMSGSRC.0<1>:ud 0:ud { NoDDClr } // Unused fields
+ mov (1) rMSGSRC.2<1>:ud 0:ud { NoDDChk } // Write and offset
+
+ // Calculate 16 v based on the step Y and vertical origin
+ mov (16) mfMSGPAYLOAD(2)<1> fSRC_VID_V_ORI<0;1,0>:f
+ mov (16) SCALE_COORD_Y<1>:f fSRC_VID_V_ORI<0;1,0>:f
+
+ // Calculate 16 u based on the step X and hori origin
+// line (16) mfMSGPAYLOAD(0)<1> SCALE_STEP_X<0;1,0>:f SAMPLER_RAMP(0) // Assign to mrf directly
+ mov (16) acc0:f fSRC_VID_H_ORI<0;1,0>:f { Compr }
+ mac (16) mfMSGPAYLOAD(0)<1> fVIDEO_STEP_X<0;1,0>:f SAMPLER_RAMP(0) { Compr }
+
+ //Setup the constants for line instruction
+ mov (1) SCALE_LINE_P255<1>:f 255.0:f { NoDDClr } //{ NoDDClr, NoDDChk }
+ mov (1) SCALE_LINE_P0_5<1>:f 0.5:f { NoDDChk }
+
+
+//------------------------------------------------------------------------------
+
+$for (0; <nY_NUM_OF_ROWS; 1) {
+
+ // Read 16 sampled pixels and store them in float32 in 8 GRFs in the order of BGRA (VYUA).
+ mov (8) MSGHDR_SCALE.0:ud rMSGSRC.0<8;8,1>:ud // Copy msg header and payload mirrors to MRFs
+ send (16) SCALE_RESPONSE_YW(0)<1> MSGHDR_SCALE udDUMMY_NULL nSMPL_ENGINE SMPLR_MSG_DSC+nSI_SRC_SIMD16_RGB+nBI_CURRENT_SRC_RGB
+
+ // Calculate 16 v for next line
+ add (16) mfMSGPAYLOAD(2)<1> SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
+ add (16) SCALE_COORD_Y<1>:f SCALE_COORD_Y<8;8,1>:f fVIDEO_STEP_Y<0;1,0>:f // Assign to mrf directly
+
+ // Scale back to [0, 255], convert f to ud
+ line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(0) { Compr } // Process B, V
+ mov (16) SCALE_RESPONSE_YD(0)<1> acc0:f { Compr }
+
+ line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(2) { Compr } // Process B, V
+ mov (16) SCALE_RESPONSE_YD(2)<1> acc0:f { Compr }
+
+ line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(4) { Compr } // Process B, V
+ mov (16) SCALE_RESPONSE_YD(4)<1> acc0:f { Compr }
+
+//#if defined(SAVE_ARGB) //Only needed if Alpha value is written to the destination
+ line (16) acc0:f SCALE_LINE_P255<0;1,0>:f SCALE_RESPONSE_YF(6) { Compr } // Process B, V
+ mov (16) SCALE_RESPONSE_YD(6)<1> acc0:f { Compr }
+//#endif
+
+ mov (16) DEST_R(%1)<1> SCALE_RESPONSE_YB(0) //possible error due to truncation - vK
+ mov (16) DEST_G(%1)<1> SCALE_RESPONSE_YB(2) //possible error due to truncation - vK
+ mov (16) DEST_B(%1)<1> SCALE_RESPONSE_YB(4) //possible error due to truncation - vK
+ mov (16) DEST_A(%1)<1> SCALE_RESPONSE_YB(6) //possible error due to truncation - vK
+}