1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
__kernel __attribute__((intel_reqd_sub_group_size(16)))
void compiler_intra_prediction(
__read_only image2d_t srcImg,
__global uchar *luma_mode,
__global ushort *luma_distortion,
__global uchar *luma_shape,
__global uint* dwo_buffer,
__global uint* pld_buffer){
int gr_id0 = get_group_id(0);
int gr_id1 = get_group_id(1);
ushort2 src_coord;
/*src_coord.x = gr_id0 * 16;
src_coord.y = gr_id1 * 16;*/
src_coord.x = 2 * 16;
src_coord.y = 1 * 16;
intel_sub_group_avc_sic_payload_t payload = intel_sub_group_avc_sic_initialize(src_coord);
uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL;
uchar intra_partition_mask = CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL;
//XXX: Different from official value?
#undef CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL
#undef CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x4
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x8
uint nb_avail = CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL |
CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL |
CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL |
CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL;
uint sgl_id = get_sub_group_local_id();
int2 nb_coord;
float4 color;
nb_coord.x = src_coord.x - 1;
nb_coord.y = src_coord.y + sgl_id;
color = read_imagef(srcImg, nb_coord);
uchar left_edge = color.s0 * 255;
nb_coord.x = src_coord.x - 1;
nb_coord.y = src_coord.y - 1;
color = read_imagef(srcImg, nb_coord);
uchar upper_left_corner = color.s0 * 255;
nb_coord.x = src_coord.x + sgl_id;
nb_coord.y = src_coord.y - 1;
color = read_imagef(srcImg, nb_coord);
uchar upper_edge = color.s0 * 255;
uchar upper_right_edge = 0;
if(sgl_id < 8){
nb_coord.x = src_coord.x + 16 + sgl_id;
nb_coord.y = src_coord.y - 1;
color = read_imagef(srcImg, nb_coord);
upper_right_edge = color.s0 * 255;
}
payload = intel_sub_group_avc_sic_configure_ipe(
intra_partition_mask, nb_avail, left_edge, upper_left_corner, upper_edge,
upper_right_edge, sad_adjustment, payload);
uchar shape_cost_16_16 = (1 << 4) | 5;
uchar shape_cost_8_8 = (1 << 4) | 4;
uchar shape_cost_4_4 = (1 << 4) | 3;
uint intra_shape_cost = (shape_cost_4_4 << 24) | (shape_cost_8_8 << 16) | (shape_cost_16_16 << 8) | (0x0);
payload = intel_sub_group_avc_sic_set_intra_luma_shape_penalty(intra_shape_cost, payload);
sampler_t vs = 0;
intel_sub_group_avc_sic_result_t result =
intel_sub_group_avc_sic_evaluate_ipe(srcImg, vs, payload);
uchar shape = intel_sub_group_avc_sic_get_ipe_luma_shape(result);
ushort dist = intel_sub_group_avc_sic_get_best_ipe_luma_distortion(result);
ulong modes = intel_sub_group_avc_sic_get_packed_ipe_luma_modes(result);
int lid_x = get_local_id(0);
int mb_idx = gr_id0 + gr_id1 * get_num_groups(0);
if (lid_x == 0) {
luma_shape[mb_idx] = shape;
luma_distortion[mb_idx] = dist;
uchar mode = modes & 0xF;
luma_mode[mb_idx] = mode;
}
dwo_buffer[mb_idx*16*4 + lid_x + 16*0] = result.s0;
dwo_buffer[mb_idx*16*4 + lid_x + 16*1] = result.s1;
dwo_buffer[mb_idx*16*4 + lid_x + 16*2] = result.s2;
dwo_buffer[mb_idx*16*4 + lid_x + 16*3] = result.s3;
}
|