diff options
author | Zhao Yakui <yakui.zhao@intel.com> | 2016-11-20 14:41:43 -0500 |
---|---|---|
committer | Xiang, Haihao <haihao.xiang@intel.com> | 2016-11-21 22:21:55 +0800 |
commit | 0babac1c9d517862fe9239dc2c6f312d4edb0792 (patch) | |
tree | 3e8feab0f5da59c35172cdd4761041a8661faad1 | |
parent | bcec25eb02ca7c98aad5d573f88a3b05419b93ec (diff) |
Use obtained eu_counts to configure GPU media pipeline on Gen8/Gen9
If it is not supported, it will fall back to the original config.
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Reviewed-by: Charles Daniel <daniel.charles@intel.com>
-rw-r--r-- | src/gen8_mfc.c | 6 | ||||
-rw-r--r-- | src/gen8_post_processing.c | 5 | ||||
-rw-r--r-- | src/gen8_vme.c | 8 | ||||
-rw-r--r-- | src/gen9_post_processing.c | 12 | ||||
-rw-r--r-- | src/gen9_vme.c | 8 | ||||
-rw-r--r-- | src/gen9_vp9_encoder.c | 23 |
6 files changed, 46 insertions, 16 deletions
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index b0ee6fb..d1de92c 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -4614,7 +4614,11 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e mfc_context->gpe_context.sampler.entry_size = 0; mfc_context->gpe_context.sampler.max_entries = 0; - mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + if (i965->intel.eu_total > 0) + mfc_context->gpe_context.vfe_state.max_num_threads = 6 * i965->intel.eu_total; + else + mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + mfc_context->gpe_context.vfe_state.num_urb_entries = 16; mfc_context->gpe_context.vfe_state.gpgpu_mode = 0; mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index 708918b..5ef8cbf 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -1592,7 +1592,10 @@ gen8_post_processing_context_common_init(VADriverContextP ctx, struct pp_module *pp_module; struct i965_post_processing_context *pp_context = data; - pp_context->vfe_gpu_state.max_num_threads = 60; + if (i965->intel.eu_total > 0) + pp_context->vfe_gpu_state.max_num_threads = 6 * i965->intel.eu_total; + else + pp_context->vfe_gpu_state.max_num_threads = 60; pp_context->vfe_gpu_state.num_urb_entries = 59; pp_context->vfe_gpu_state.gpgpu_mode = 0; pp_context->vfe_gpu_state.urb_entry_size = 16 - 1; diff --git a/src/gen8_vme.c b/src/gen8_vme.c index b14d60a..7a9ed6b 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -1333,6 +1333,7 @@ gen8_vme_context_destroy(void *context) Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_vme_context *vme_context = NULL; struct i965_kernel *vme_kernel_list = NULL; int i965_kernel_num; @@ -1385,7 +1386,12 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context->gpe_context.sampler.entry_size = 0; vme_context->gpe_context.sampler.max_entries = 0; - vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + if (i965->intel.eu_total > 0) { + vme_context->gpe_context.vfe_state.max_num_threads = 6 * + i965->intel.eu_total; + } else + vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + vme_context->gpe_context.vfe_state.num_urb_entries = 64; vme_context->gpe_context.vfe_state.gpgpu_mode = 0; vme_context->gpe_context.vfe_state.urb_entry_size = 16; diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c index e9b8f86..1e2d33a 100644 --- a/src/gen9_post_processing.c +++ b/src/gen9_post_processing.c @@ -547,10 +547,14 @@ gen9_post_processing_context_init(VADriverContextP ctx, gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64); gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64); - if (i965->intel.has_bsd2) - gpe_context->vfe_state.max_num_threads = 300; - else - gpe_context->vfe_state.max_num_threads = 60; + if (i965->intel.eu_total > 0) { + gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6; + } else { + if (i965->intel.has_bsd2) + gpe_context->vfe_state.max_num_threads = 300; + else + gpe_context->vfe_state.max_num_threads = 60; + } gpe_context->vfe_state.curbe_allocation_size = 37; gpe_context->vfe_state.urb_entry_size = 16; diff --git a/src/gen9_vme.c b/src/gen9_vme.c index fab80ce..11602a8 100644 --- a/src/gen9_vme.c +++ b/src/gen9_vme.c @@ -1978,6 +1978,7 @@ gen9_vme_context_destroy(void *context) Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_vme_context *vme_context; struct i965_kernel *vme_kernel_list = NULL; int i965_kernel_num; @@ -2037,7 +2038,12 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e vme_context->gpe_context.sampler.entry_size = 0; vme_context->gpe_context.sampler.max_entries = 0; - vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + if (i965->intel.eu_total > 0) { + vme_context->gpe_context.vfe_state.max_num_threads = 6 * + i965->intel.eu_total; + } else + vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1; + vme_context->gpe_context.vfe_state.num_urb_entries = 64; vme_context->gpe_context.vfe_state.gpgpu_mode = 0; vme_context->gpe_context.vfe_state.urb_entry_size = 16; diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index 74d0d2f..537b390 100644 --- a/src/gen9_vp9_encoder.c +++ b/src/gen9_vp9_encoder.c @@ -3679,9 +3679,12 @@ gen9_vp9_mbenc_kernel(VADriverContextP ctx, } static void -gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context, +gen9_init_gpe_context_vp9(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, struct vp9_encoder_kernel_parameter *kernel_param) { + struct i965_driver_data *i965 = i965_driver_data(ctx); + gpe_context->curbe.length = kernel_param->curbe_size; // in bytes gpe_context->sampler.entry_size = 0; @@ -3700,7 +3703,11 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context, gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64); gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64); - gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads + if (i965->intel.eu_total > 0) + gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total; + else + gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads + gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE - @@ -4606,7 +4613,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx, scoreboard_param.walkpat_flag = 0; gpe_context = &scaling_context->gpe_contexts[0]; - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y; @@ -4632,7 +4639,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx, kernel_param.sampler_size = 0; gpe_context = &scaling_context->gpe_contexts[1]; - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); memset(&scale_kernel, 0, sizeof(scale_kernel)); @@ -4674,7 +4681,7 @@ gen9_vme_me_context_init_vp9(VADriverContextP ctx, scoreboard_param.walkpat_flag = 0; gpe_context = &me_context->gpe_context; - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); memset(&scale_kernel, 0, sizeof(scale_kernel)); @@ -4722,7 +4729,7 @@ gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx, } else scoreboard_param.walkpat_flag = 0; - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); memset(&scale_kernel, 0, sizeof(scale_kernel)); @@ -4762,7 +4769,7 @@ gen9_vme_brc_context_init_vp9(VADriverContextP ctx, for (i = 0; i < NUM_VP9_BRC; i++) { gpe_context = &brc_context->gpe_contexts[i]; - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); memset(&scale_kernel, 0, sizeof(scale_kernel)); @@ -4801,7 +4808,7 @@ gen9_vme_dys_context_init_vp9(VADriverContextP ctx, scoreboard_param.walkpat_flag = 0; gpe_context = &dys_context->gpe_context; - gen9_init_gpe_context_vp9(gpe_context, &kernel_param); + gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param); gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param); memset(&scale_kernel, 0, sizeof(scale_kernel)); |