1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
__kernel __attribute__((intel_reqd_sub_group_size(16)))
void compiler_skip_check(__read_only image2d_t src_img,
__read_only image2d_t ref_img,
__global short2 *motion_vector_buffer,
__global ushort *residual_buffer,
__global uint* dwo_buffer,
__global uint* pld_buffer){
sampler_t vs = 0;
int gr_id0 = get_group_id(0);
int gr_id1 = get_group_id(1);
ushort2 src_coord;
src_coord.x = gr_id0 * 16;
src_coord.y = gr_id1 * 16;
intel_sub_group_avc_sic_payload_t payload =
intel_sub_group_avc_sic_initialize(src_coord);
//Ignore in unidirectional, so just set to 0
uchar bidir_weight = 0;
uint skip_block_partition_type = CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL;
uint skip_motion_vector_mask = CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL;
uchar skip_sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL;
uint2 bi_mv;
int mb_idx = gr_id0 + gr_id1 * get_num_groups(0);
short2 input_mv = motion_vector_buffer[mb_idx];
bi_mv.s0 = as_uint(input_mv);
ulong mv = as_ulong(bi_mv);
payload = intel_sub_group_avc_sic_configure_skc(
skip_block_partition_type, skip_motion_vector_mask, mv, bidir_weight,
skip_sad_adjustment, payload);
intel_sub_group_avc_sic_result_t result =
intel_sub_group_avc_sic_evaluate_with_single_reference(
src_img, ref_img, vs, payload);
ushort distortion = intel_sub_group_avc_sic_get_inter_distortions(result);
int lid_x = get_local_id(0);
if(lid_x == 0)
residual_buffer[mb_idx] = distortion;
dwo_buffer[mb_idx*16*4 + lid_x + 16*0] = result.s0;
dwo_buffer[mb_idx*16*4 + lid_x + 16*1] = result.s1;
dwo_buffer[mb_idx*16*4 + lid_x + 16*2] = result.s2;
dwo_buffer[mb_idx*16*4 + lid_x + 16*3] = result.s3;
}
|