/************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ #include #include #include "gpgpu_fill.h" #include "huc_copy.h" #include "i915/gem_create.h" #include "i915/gem_mman.h" #include "intel_blt.h" #include "igt_aux.h" #include "igt_syncobj.h" #include "intel_batchbuffer.h" #include "intel_bufops.h" #include "intel_chipset.h" #include "intel_pat.h" #include "media_fill.h" #include "media_spin.h" #include "sw_sync.h" #include "veboxcopy.h" #include "xe/xe_ioctl.h" #include "xe/xe_query.h" #define BCS_SWCTRL 0x22200 #define BCS_SRC_Y (1 << 0) #define BCS_DST_Y (1 << 1) /** * SECTION:intel_batchbuffer * @short_description: Batchbuffer and blitter support * @title: Batch Buffer * @include: igt.h * * Note that this library's header pulls in the [i-g-t core](igt-gpu-tools-i-g-t-core.html) * library as a dependency. */ static bool intel_bb_do_tracking; static IGT_LIST_HEAD(intel_bb_list); static pthread_mutex_t intel_bb_list_lock = PTHREAD_MUTEX_INITIALIZER; #define CMD_POLY_STIPPLE_OFFSET 0x7906 #define CHECK_RANGE(x) do { \ igt_assert_lte(0, (x)); \ igt_assert_lt((x), (1 << 15)); \ } while (0) /* * pitches are in bytes if the surfaces are linear, number of dwords * otherwise */ static uint32_t fast_copy_pitch(unsigned int stride, unsigned int tiling) { if (tiling != I915_TILING_NONE) return stride / 4; else return stride; } uint32_t fast_copy_dword0(unsigned int src_tiling, unsigned int dst_tiling) { uint32_t dword0 = 0; dword0 |= XY_FAST_COPY_BLT; switch (src_tiling) { case I915_TILING_X: dword0 |= XY_FAST_COPY_SRC_TILING_X; break; case I915_TILING_Y: case I915_TILING_4: case I915_TILING_Yf: dword0 |= XY_FAST_COPY_SRC_TILING_Yb_Yf; break; case I915_TILING_Ys: dword0 |= XY_FAST_COPY_SRC_TILING_Ys; break; case I915_TILING_NONE: default: break; } switch (dst_tiling) { case I915_TILING_X: dword0 |= XY_FAST_COPY_DST_TILING_X; break; case I915_TILING_Y: case I915_TILING_4: case I915_TILING_Yf: dword0 |= XY_FAST_COPY_DST_TILING_Yb_Yf; break; case I915_TILING_Ys: dword0 |= XY_FAST_COPY_DST_TILING_Ys; break; case I915_TILING_NONE: default: break; } return dword0; } static bool new_tile_y_format(unsigned int tiling) { return tiling == T_YFMAJOR || tiling == T_TILE4; } uint32_t fast_copy_dword1(int fd, unsigned int src_tiling, unsigned int dst_tiling, int bpp) { uint32_t dword1 = 0; if (blt_fast_copy_supports_tiling(fd, T_YMAJOR)) { dword1 |= new_tile_y_format(src_tiling) ? XY_FAST_COPY_SRC_TILING_Yf : 0; dword1 |= new_tile_y_format(dst_tiling) ? XY_FAST_COPY_DST_TILING_Yf : 0; } else { /* Always set bits for platforms that don't support legacy TileY */ dword1 |= XY_FAST_COPY_SRC_TILING_Yf | XY_FAST_COPY_DST_TILING_Yf; } switch (bpp) { case 8: dword1 |= XY_FAST_COPY_COLOR_DEPTH_8; break; case 16: dword1 |= XY_FAST_COPY_COLOR_DEPTH_16; break; case 32: dword1 |= XY_FAST_COPY_COLOR_DEPTH_32; break; case 64: dword1 |= XY_FAST_COPY_COLOR_DEPTH_64; break; case 128: dword1 |= XY_FAST_COPY_COLOR_DEPTH_128; break; default: igt_assert(0); } return dword1; } static void fill_relocation(struct drm_i915_gem_relocation_entry *reloc, uint32_t gem_handle, uint64_t presumed_offset, uint32_t delta, /* in bytes */ uint32_t offset, /* in dwords */ uint32_t read_domains, uint32_t write_domains) { reloc->target_handle = gem_handle; reloc->delta = delta; reloc->offset = offset * sizeof(uint32_t); reloc->presumed_offset = presumed_offset; reloc->read_domains = read_domains; reloc->write_domain = write_domains; } static void fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle, uint64_t gem_offset, struct drm_i915_gem_relocation_entry *relocs, uint32_t count) { memset(obj, 0, sizeof(*obj)); obj->handle = gem_handle; obj->offset = gem_offset; obj->relocation_count = count; obj->relocs_ptr = to_user_pointer(relocs); } static uint32_t find_engine(const intel_ctx_cfg_t *cfg, unsigned int class) { unsigned int i; uint32_t engine_id = -1; for (i = 0; i < cfg->num_engines; i++) { if (cfg->engines[i].engine_class == class) engine_id = i; } igt_assert_f(engine_id != -1, "Requested engine not found!\n"); return engine_id; } static void exec_blit(int fd, struct drm_i915_gem_exec_object2 *objs, uint32_t count, uint32_t ctx, const intel_ctx_cfg_t *cfg) { struct drm_i915_gem_execbuffer2 exec; uint32_t devid = intel_get_drm_devid(fd); uint32_t blt_id = HAS_BLT_RING(devid) ? I915_EXEC_BLT : I915_EXEC_DEFAULT; if (cfg) blt_id = find_engine(cfg, I915_ENGINE_CLASS_COPY); exec = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = to_user_pointer(objs), .buffer_count = count, .flags = blt_id | I915_EXEC_NO_RELOC, .rsvd1 = ctx, }; gem_execbuf(fd, &exec); } static uint32_t src_copy_dword0(uint32_t src_tiling, uint32_t dst_tiling, uint32_t bpp, uint32_t device_gen) { uint32_t dword0 = 0; dword0 |= XY_SRC_COPY_BLT_CMD; if (bpp == 32) dword0 |= XY_SRC_COPY_BLT_WRITE_RGB | XY_SRC_COPY_BLT_WRITE_ALPHA; if (device_gen >= 4 && src_tiling) dword0 |= XY_SRC_COPY_BLT_SRC_TILED; if (device_gen >= 4 && dst_tiling) dword0 |= XY_SRC_COPY_BLT_DST_TILED; return dword0; } static uint32_t src_copy_dword1(uint32_t dst_pitch, uint32_t bpp) { uint32_t dword1 = 0; switch (bpp) { case 8: break; case 16: dword1 |= 1 << 24; /* Only support 565 color */ break; case 32: dword1 |= 3 << 24; break; default: igt_assert(0); } dword1 |= 0xcc << 16; dword1 |= dst_pitch; return dword1; } /** * igt_blitter_copy: * @fd: file descriptor of the i915 driver * @ahnd: handle to an allocator * @ctx: context within which execute copy blit * @src_handle: GEM handle of the source buffer * @src_delta: offset into the source GEM bo, in bytes * @src_stride: Stride (in bytes) of the source buffer * @src_tiling: Tiling mode of the source buffer * @src_x: X coordinate of the source region to copy * @src_y: Y coordinate of the source region to copy * @src_size: size of the src bo required for allocator and softpin * @width: Width of the region to copy * @height: Height of the region to copy * @bpp: source and destination bits per pixel * @dst_handle: GEM handle of the destination buffer * @dst_delta: offset into the destination GEM bo, in bytes * @dst_stride: Stride (in bytes) of the destination buffer * @dst_tiling: Tiling mode of the destination buffer * @dst_x: X coordinate of destination * @dst_y: Y coordinate of destination * @dst_size: size of the dst bo required for allocator and softpin * * Wrapper API to call appropriate blitter copy function. */ void igt_blitter_copy(int fd, uint64_t ahnd, uint32_t ctx, const intel_ctx_cfg_t *cfg, /* src */ uint32_t src_handle, uint32_t src_delta, uint32_t src_stride, uint32_t src_tiling, uint32_t src_x, uint32_t src_y, uint64_t src_size, /* size */ uint32_t width, uint32_t height, /* bpp */ uint32_t bpp, /* dst */ uint32_t dst_handle, uint32_t dst_delta, uint32_t dst_stride, uint32_t dst_tiling, uint32_t dst_x, uint32_t dst_y, uint64_t dst_size) { uint32_t devid; devid = intel_get_drm_devid(fd); if (intel_graphics_ver(devid) >= IP_VER(12, 60)) igt_blitter_fast_copy__raw(fd, ahnd, ctx, NULL, src_handle, src_delta, src_stride, src_tiling, src_x, src_y, src_size, width, height, bpp, dst_handle, dst_delta, dst_stride, dst_tiling, dst_x, dst_y, dst_size); else igt_blitter_src_copy(fd, ahnd, ctx, NULL, src_handle, src_delta, src_stride, src_tiling, src_x, src_y, src_size, width, height, bpp, dst_handle, dst_delta, dst_stride, dst_tiling, dst_x, dst_y, dst_size); } /** * igt_blitter_src_copy: * @fd: file descriptor of the i915 driver * @ahnd: handle to an allocator * @ctx: context within which execute copy blit * @cfg: intel_ctx configuration, NULL for default context or legacy mode * @src_handle: GEM handle of the source buffer * @src_delta: offset into the source GEM bo, in bytes * @src_stride: Stride (in bytes) of the source buffer * @src_tiling: Tiling mode of the source buffer * @src_x: X coordinate of the source region to copy * @src_y: Y coordinate of the source region to copy * @src_size: size of the src bo required for allocator and softpin * @width: Width of the region to copy * @height: Height of the region to copy * @bpp: source and destination bits per pixel * @dst_handle: GEM handle of the destination buffer * @dst_delta: offset into the destination GEM bo, in bytes * @dst_stride: Stride (in bytes) of the destination buffer * @dst_tiling: Tiling mode of the destination buffer * @dst_x: X coordinate of destination * @dst_y: Y coordinate of destination * @dst_size: size of the dst bo required for allocator and softpin * * Copy @src into @dst using the XY_SRC blit command. */ void igt_blitter_src_copy(int fd, uint64_t ahnd, uint32_t ctx, const intel_ctx_cfg_t *cfg, /* src */ uint32_t src_handle, uint32_t src_delta, uint32_t src_stride, uint32_t src_tiling, uint32_t src_x, uint32_t src_y, uint64_t src_size, /* size */ uint32_t width, uint32_t height, /* bpp */ uint32_t bpp, /* dst */ uint32_t dst_handle, uint32_t dst_delta, uint32_t dst_stride, uint32_t dst_tiling, uint32_t dst_x, uint32_t dst_y, uint64_t dst_size) { uint32_t batch[32]; struct drm_i915_gem_exec_object2 objs[3]; struct drm_i915_gem_relocation_entry relocs[2]; uint32_t batch_handle; uint32_t src_pitch, dst_pitch; uint32_t dst_reloc_offset, src_reloc_offset; uint32_t gen = intel_gen(intel_get_drm_devid(fd)); uint64_t batch_offset, src_offset, dst_offset; const bool has_64b_reloc = gen >= 8; int i = 0; batch_handle = gem_create(fd, 4096); if (ahnd) { src_offset = get_offset(ahnd, src_handle, src_size, 0); dst_offset = get_offset(ahnd, dst_handle, dst_size, 0); batch_offset = get_offset(ahnd, batch_handle, 4096, 0); } else { src_offset = 16 << 20; dst_offset = ALIGN(src_offset + src_size, 1 << 20); batch_offset = ALIGN(dst_offset + dst_size, 1 << 20); } memset(batch, 0, sizeof(batch)); igt_assert((src_tiling == I915_TILING_NONE) || (src_tiling == I915_TILING_X) || (src_tiling == I915_TILING_Y)); igt_assert((dst_tiling == I915_TILING_NONE) || (dst_tiling == I915_TILING_X) || (dst_tiling == I915_TILING_Y)); src_pitch = (gen >= 4 && src_tiling) ? src_stride / 4 : src_stride; dst_pitch = (gen >= 4 && dst_tiling) ? dst_stride / 4 : dst_stride; if (bpp == 64) { bpp /= 2; width *= 2; } CHECK_RANGE(src_x); CHECK_RANGE(src_y); CHECK_RANGE(dst_x); CHECK_RANGE(dst_y); CHECK_RANGE(width); CHECK_RANGE(height); CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height); CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height); CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch); if ((src_tiling | dst_tiling) >= I915_TILING_Y) { unsigned int mask; batch[i++] = MI_LOAD_REGISTER_IMM(1); batch[i++] = BCS_SWCTRL; mask = (BCS_SRC_Y | BCS_DST_Y) << 16; if (src_tiling == I915_TILING_Y) mask |= BCS_SRC_Y; if (dst_tiling == I915_TILING_Y) mask |= BCS_DST_Y; batch[i++] = mask; } batch[i] = src_copy_dword0(src_tiling, dst_tiling, bpp, gen); batch[i++] |= 6 + 2 * has_64b_reloc; batch[i++] = src_copy_dword1(dst_pitch, bpp); batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */ batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */ dst_reloc_offset = i; batch[i++] = dst_offset + dst_delta; /* dst address lower bits */ if (has_64b_reloc) batch[i++] = (dst_offset + dst_delta) >> 32; /* dst address upper bits */ batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */ batch[i++] = src_pitch; src_reloc_offset = i; batch[i++] = src_offset + src_delta; /* src address lower bits */ if (has_64b_reloc) batch[i++] = (src_offset + src_delta) >> 32; /* src address upper bits */ if ((src_tiling | dst_tiling) >= I915_TILING_Y) { igt_assert(gen >= 6); batch[i++] = MI_FLUSH_DW_CMD | 2; batch[i++] = 0; batch[i++] = 0; batch[i++] = 0; batch[i++] = MI_LOAD_REGISTER_IMM(1); batch[i++] = BCS_SWCTRL; batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16; } batch[i++] = MI_BATCH_BUFFER_END; batch[i++] = MI_NOOP; igt_assert(i <= ARRAY_SIZE(batch)); gem_write(fd, batch_handle, 0, batch, sizeof(batch)); fill_relocation(&relocs[0], dst_handle, dst_offset, dst_delta, dst_reloc_offset, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); fill_relocation(&relocs[1], src_handle, src_offset, src_delta, src_reloc_offset, I915_GEM_DOMAIN_RENDER, 0); fill_object(&objs[0], dst_handle, dst_offset, NULL, 0); fill_object(&objs[1], src_handle, src_offset, NULL, 0); fill_object(&objs[2], batch_handle, batch_offset, relocs, !ahnd ? 2 : 0); objs[0].flags |= EXEC_OBJECT_NEEDS_FENCE | EXEC_OBJECT_WRITE; objs[1].flags |= EXEC_OBJECT_NEEDS_FENCE; if (ahnd) { objs[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS; objs[1].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS; objs[2].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS; } exec_blit(fd, objs, 3, ctx, cfg); gem_close(fd, batch_handle); } /** * igt_blitter_fast_copy__raw: * @fd: file descriptor of the i915 driver * @ahnd: handle to an allocator * @ctx: context within which execute copy blit * @cfg: intel_ctx configuration, NULL for default context or legacy mode * @src_handle: GEM handle of the source buffer * @src_delta: offset into the source GEM bo, in bytes * @src_stride: Stride (in bytes) of the source buffer * @src_tiling: Tiling mode of the source buffer * @src_x: X coordinate of the source region to copy * @src_y: Y coordinate of the source region to copy * @src_size: size of the src bo required for allocator and softpin * @width: Width of the region to copy * @height: Height of the region to copy * @bpp: source and destination bits per pixel * @dst_handle: GEM handle of the destination buffer * @dst_delta: offset into the destination GEM bo, in bytes * @dst_stride: Stride (in bytes) of the destination buffer * @dst_tiling: Tiling mode of the destination buffer * @dst_x: X coordinate of destination * @dst_y: Y coordinate of destination * @dst_size: size of the dst bo required for allocator and softpin * * Like igt_blitter_fast_copy(), but talking to the kernel directly. */ void igt_blitter_fast_copy__raw(int fd, uint64_t ahnd, uint32_t ctx, const intel_ctx_cfg_t *cfg, /* src */ uint32_t src_handle, unsigned int src_delta, unsigned int src_stride, unsigned int src_tiling, unsigned int src_x, unsigned src_y, uint64_t src_size, /* size */ unsigned int width, unsigned int height, /* bpp */ int bpp, /* dst */ uint32_t dst_handle, unsigned dst_delta, unsigned int dst_stride, unsigned int dst_tiling, unsigned int dst_x, unsigned dst_y, uint64_t dst_size) { uint32_t batch[12]; struct drm_i915_gem_exec_object2 objs[3]; struct drm_i915_gem_relocation_entry relocs[2]; uint32_t batch_handle; uint32_t dword0, dword1; uint32_t src_pitch, dst_pitch; uint64_t batch_offset, src_offset, dst_offset; int i = 0; batch_handle = gem_create(fd, 4096); if (ahnd) { src_offset = get_offset(ahnd, src_handle, src_size, 0); dst_offset = get_offset(ahnd, dst_handle, dst_size, 0); batch_offset = get_offset(ahnd, batch_handle, 4096, 0); } else { src_offset = 16 << 20; dst_offset = ALIGN(src_offset + src_size, 1 << 20); batch_offset = ALIGN(dst_offset + dst_size, 1 << 20); } src_pitch = fast_copy_pitch(src_stride, src_tiling); dst_pitch = fast_copy_pitch(dst_stride, dst_tiling); dword0 = fast_copy_dword0(src_tiling, dst_tiling); dword1 = fast_copy_dword1(fd, src_tiling, dst_tiling, bpp); CHECK_RANGE(src_x); CHECK_RANGE(src_y); CHECK_RANGE(dst_x); CHECK_RANGE(dst_y); CHECK_RANGE(width); CHECK_RANGE(height); CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height); CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height); CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch); batch[i++] = dword0; batch[i++] = dword1 | dst_pitch; batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */ batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */ batch[i++] = dst_offset + dst_delta; /* dst address lower bits */ batch[i++] = (dst_offset + dst_delta) >> 32; /* dst address upper bits */ batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */ batch[i++] = src_pitch; batch[i++] = src_offset + src_delta; /* src address lower bits */ batch[i++] = (src_offset + src_delta) >> 32; /* src address upper bits */ batch[i++] = MI_BATCH_BUFFER_END; batch[i++] = MI_NOOP; igt_assert(i == ARRAY_SIZE(batch)); gem_write(fd, batch_handle, 0, batch, sizeof(batch)); fill_relocation(&relocs[0], dst_handle, dst_offset, dst_delta, 4, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); fill_relocation(&relocs[1], src_handle, src_offset, src_delta, 8, I915_GEM_DOMAIN_RENDER, 0); fill_object(&objs[0], dst_handle, dst_offset, NULL, 0); objs[0].flags |= EXEC_OBJECT_WRITE; fill_object(&objs[1], src_handle, src_offset, NULL, 0); fill_object(&objs[2], batch_handle, batch_offset, relocs, !ahnd ? 2 : 0); if (ahnd) { objs[0].flags |= EXEC_OBJECT_PINNED; objs[1].flags |= EXEC_OBJECT_PINNED; objs[2].flags |= EXEC_OBJECT_PINNED; } exec_blit(fd, objs, 3, ctx, cfg); gem_close(fd, batch_handle); } /** * igt_get_render_copyfunc: * @devid: pci device id * * Returns: * * The platform-specific render copy function pointer for the device * specified with @devid. Will return NULL when no render copy function is * implemented. */ igt_render_copyfunc_t igt_get_render_copyfunc(int devid) { igt_render_copyfunc_t copy = NULL; if (IS_METEORLAKE(devid)) copy = mtl_render_copyfunc; else if (IS_DG2(devid)) copy = gen12p71_render_copyfunc; else if (AT_LEAST_GEN(devid, 20)) copy = xe2_render_copyfunc; else if (IS_GEN12(devid)) copy = gen12_render_copyfunc; else if (IS_GEN11(devid)) copy = gen11_render_copyfunc; else if (IS_GEN9(devid) || IS_GEN10(devid)) copy = gen9_render_copyfunc; else if (IS_GEN8(devid)) copy = gen8_render_copyfunc; else if (IS_GEN7(devid)) copy = gen7_render_copyfunc; else if (IS_GEN6(devid)) copy = gen6_render_copyfunc; else if (IS_GEN4(devid) || IS_GEN5(devid)) copy = gen4_render_copyfunc; else if (IS_GEN3(devid)) copy = gen3_render_copyfunc; else if (IS_GEN2(devid)) copy = gen2_render_copyfunc; return copy; } igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid) { igt_vebox_copyfunc_t copy = NULL; if (IS_GEN12(devid)) copy = gen12_vebox_copyfunc; return copy; } igt_render_clearfunc_t igt_get_render_clearfunc(int devid) { if (IS_METEORLAKE(devid)) { return mtl_render_clearfunc; } else if (IS_DG2(devid)) { return gen12p71_render_clearfunc; } else if (IS_GEN12(devid)) { return gen12_render_clearfunc; } else { return NULL; } } /** * igt_get_media_fillfunc: * @devid: pci device id * * Returns: * * The platform-specific media fill function pointer for the device specified * with @devid. Will return NULL when no media fill function is implemented. */ igt_fillfunc_t igt_get_media_fillfunc(int devid) { igt_fillfunc_t fill = NULL; if (intel_graphics_ver(devid) >= IP_VER(12, 50)) { /* current implementation defeatured PIPELINE_MEDIA */ } else if (IS_GEN12(devid)) fill = gen12_media_fillfunc; else if (IS_GEN9(devid) || IS_GEN10(devid) || IS_GEN11(devid)) fill = gen9_media_fillfunc; else if (IS_GEN8(devid)) fill = gen8_media_fillfunc; else if (IS_GEN7(devid)) fill = gen7_media_fillfunc; return fill; } igt_vme_func_t igt_get_media_vme_func(int devid) { igt_vme_func_t fill = NULL; const struct intel_device_info *devinfo = intel_get_device_info(devid); if (IS_GEN11(devid) && !devinfo->is_elkhartlake && !devinfo->is_jasperlake) fill = gen11_media_vme_func; return fill; } /** * igt_get_gpgpu_fillfunc: * @devid: pci device id * * Returns: * * The platform-specific gpgpu fill function pointer for the device specified * with @devid. Will return NULL when no gpgpu fill function is implemented. */ igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid) { igt_fillfunc_t fill = NULL; if (intel_graphics_ver(devid) >= IP_VER(20, 0)) fill = xe2lpg_gpgpu_fillfunc; else if (IS_METEORLAKE(devid)) fill = xehp_gpgpu_fillfunc; else if (intel_graphics_ver(devid) >= IP_VER(12, 60)) fill = xehpc_gpgpu_fillfunc; else if (intel_graphics_ver(devid) >= IP_VER(12, 50)) fill = xehp_gpgpu_fillfunc; else if (IS_GEN12(devid)) fill = gen12_gpgpu_fillfunc; else if (IS_GEN11(devid)) fill = gen11_gpgpu_fillfunc; else if (IS_GEN9(devid) || IS_GEN10(devid)) fill = gen9_gpgpu_fillfunc; else if (IS_GEN8(devid)) fill = gen8_gpgpu_fillfunc; else if (IS_GEN7(devid)) fill = gen7_gpgpu_fillfunc; return fill; } /** * igt_get_media_spinfunc: * @devid: pci device id * * Returns: * * The platform-specific media spin function pointer for the device specified * with @devid. Will return NULL when no media spin function is implemented. */ igt_media_spinfunc_t igt_get_media_spinfunc(int devid) { igt_media_spinfunc_t spin = NULL; if (IS_GEN9(devid)) spin = gen9_media_spinfunc; else if (IS_GEN8(devid)) spin = gen8_media_spinfunc; return spin; } /* Intel batchbuffer v2 */ static bool intel_bb_debug_tree = false; /* * __reallocate_objects: * @ibb: pointer to intel_bb * * Increases number of objects if necessary. */ static void __reallocate_objects(struct intel_bb *ibb) { const uint32_t inc = 4096 / sizeof(*ibb->objects); if (ibb->num_objects == ibb->allocated_objects) { ibb->objects = realloc(ibb->objects, sizeof(*ibb->objects) * (inc + ibb->allocated_objects)); igt_assert(ibb->objects); ibb->allocated_objects += inc; memset(&ibb->objects[ibb->num_objects], 0, inc * sizeof(*ibb->objects)); } } static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb, uint32_t handle, uint64_t size, uint32_t alignment, uint8_t pat_index) { uint64_t offset; if (ibb->enforce_relocs) return 0; offset = __intel_allocator_alloc(ibb->allocator_handle, handle, size, alignment, pat_index, ALLOC_STRATEGY_NONE); igt_assert(offset != ALLOC_INVALID_ADDRESS); return offset; } /** * __intel_bb_create: * @fd: drm fd - i915 or xe * @ctx: for i915 context id, for xe engine id * @vm: for xe vm_id, unused for i915 * @cfg: for i915 intel_ctx configuration, NULL for default context or legacy mode, * unused for xe * @size: size of the batchbuffer * @do_relocs: use relocations or allocator * @allocator_type: allocator type, must be INTEL_ALLOCATOR_NONE for relocations * * intel-bb assumes it will work in one of two modes - with relocations or * with using allocator (currently RELOC and SIMPLE are implemented). * Some description is required to describe how they maintain the addresses. * * Before entering into each scenarios generic rule is intel-bb keeps objects * and their offsets in the internal cache and reuses in subsequent execs. * * 1. intel-bb with relocations (i915 only) * * Creating new intel-bb adds handle to cache implicitly and sets its address * to 0. Objects added to intel-bb later also have address 0 set for first run. * After calling execbuf cache is altered with new addresses. As intel-bb * works in reloc mode addresses are only suggestion to the driver and we * cannot be sure they won't change at next exec. * * 2. with allocator (i915 or xe) * * This mode is valid only for ppgtt. Addresses are acquired from allocator * and softpinned (i915) or vm-binded (xe). intel-bb cache must be then * coherent with allocator (simple is coherent, reloc partially [doesn't * support address reservation]). * When we do intel-bb reset with purging cache it has to reacquire addresses * from allocator (allocator should return same address - what is true for * simple and reloc allocators). * * If we do reset without purging caches we use addresses from intel-bb cache * during execbuf objects construction. * * If we do reset with purging caches allocator entries are freed as well. * * __intel_bb_create checks if a context configuration for intel_ctx_t was * passed in. If this is the case, it copies the information over to the * newly created batch buffer. * * Returns: * * Pointer the intel_bb, asserts on failure. */ static struct intel_bb * __intel_bb_create(int fd, uint32_t ctx, uint32_t vm, const intel_ctx_cfg_t *cfg, uint32_t size, bool do_relocs, uint64_t start, uint64_t end, uint64_t alignment, uint8_t allocator_type, enum allocator_strategy strategy) { struct drm_i915_gem_exec_object2 *object; struct intel_bb *ibb = calloc(1, sizeof(*ibb)); igt_assert(ibb); ibb->devid = intel_get_drm_devid(fd); ibb->gen = intel_gen(ibb->devid); ibb->ctx = ctx; ibb->fd = fd; ibb->driver = is_i915_device(fd) ? INTEL_DRIVER_I915 : is_xe_device(fd) ? INTEL_DRIVER_XE : 0; igt_assert(ibb->driver); /* * If we don't have full ppgtt driver can change our addresses * so allocator is useless in this case. Just enforce relocations * for such gens and don't use allocator at all. */ if (ibb->driver == INTEL_DRIVER_I915) { ibb->uses_full_ppgtt = gem_uses_full_ppgtt(fd); if (!alignment) alignment = gem_detect_safe_alignment(fd); ibb->alignment = alignment; ibb->gtt_size = gem_aperture_size(fd); ibb->handle = gem_create(fd, size); if (!ibb->uses_full_ppgtt) do_relocs = true; /* * For softpin mode allocator has full control over offsets allocation * so we want kernel to not interfere with this. */ if (do_relocs) { ibb->allows_obj_alignment = gem_allows_obj_alignment(fd); allocator_type = INTEL_ALLOCATOR_NONE; } else { /* Use safe start offset instead assuming 0x0 is safe */ start = max_t(uint64_t, start, gem_detect_safe_start_offset(fd)); /* if relocs are set we won't use an allocator */ ibb->allocator_handle = intel_allocator_open_full(fd, ctx, start, end, allocator_type, strategy, 0); } ibb->vm_id = 0; } else { igt_assert(!do_relocs); if (!alignment) alignment = xe_get_default_alignment(fd); ibb->alignment = alignment; size = ALIGN(size + xe_cs_prefetch_size(fd), ibb->alignment); ibb->handle = xe_bo_create(fd, 0, size, vram_if_possible(fd, 0), DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM); /* Limit to 48-bit due to MI_* address limitation */ ibb->gtt_size = 1ull << min_t(uint32_t, xe_va_bits(fd), 48); end = ibb->gtt_size; if (!vm) { igt_assert_f(!ctx, "No vm provided for engine"); vm = xe_vm_create(fd, 0, 0); } ibb->uses_full_ppgtt = true; ibb->allocator_handle = intel_allocator_open_full(fd, vm, start, end, allocator_type, strategy, ibb->alignment); ibb->vm_id = vm; ibb->last_engine = ~0U; } ibb->allocator_type = allocator_type; ibb->allocator_strategy = strategy; ibb->allocator_start = start; ibb->allocator_end = end; ibb->enforce_relocs = do_relocs; ibb->size = size; ibb->batch = calloc(1, size); igt_assert(ibb->batch); ibb->ptr = ibb->batch; ibb->fence = -1; /* Cache context configuration */ if (cfg) { ibb->cfg = malloc(sizeof(*cfg)); igt_assert(ibb->cfg); memcpy(ibb->cfg, cfg, sizeof(*cfg)); } if ((ibb->gtt_size - 1) >> 32) ibb->supports_48b_address = true; object = intel_bb_add_object(ibb, ibb->handle, ibb->size, INTEL_BUF_INVALID_ADDRESS, ibb->alignment, false); ibb->batch_offset = object->offset; IGT_INIT_LIST_HEAD(&ibb->intel_bufs); ibb->refcount = 1; if (intel_bb_do_tracking && ibb->allocator_type != INTEL_ALLOCATOR_NONE) { pthread_mutex_lock(&intel_bb_list_lock); igt_list_add(&ibb->link, &intel_bb_list); pthread_mutex_unlock(&intel_bb_list_lock); } return ibb; } /** * intel_bb_create_full: * @fd: drm fd - i915 or xe * @ctx: for i915 context id, for xe engine id * @vm: for xe vm_id, unused for i915 * @cfg: intel_ctx configuration, NULL for default context or legacy mode * @size: size of the batchbuffer * @start: allocator vm start address * @end: allocator vm start address * @alignment: alignment to use for allocator, zero for default * @allocator_type: allocator type, SIMPLE, RELOC, ... * @strategy: allocation strategy * * Creates bb with context passed in @ctx, size in @size and allocator type * in @allocator_type. Relocations are set to false because IGT allocator * is used in that case. VM range is passed to allocator (@start and @end) * and allocation @strategy (suggestion to allocator about address allocation * preferences). * * Returns: * * Pointer the intel_bb, asserts on failure. */ struct intel_bb *intel_bb_create_full(int fd, uint32_t ctx, uint32_t vm, const intel_ctx_cfg_t *cfg, uint32_t size, uint64_t start, uint64_t end, uint64_t alignment, uint8_t allocator_type, enum allocator_strategy strategy) { return __intel_bb_create(fd, ctx, vm, cfg, size, false, start, end, alignment, allocator_type, strategy); } /** * intel_bb_create_with_allocator: * @fd: drm fd - i915 or xe * @ctx: for i915 context id, for xe engine id * @vm: for xe vm_id, unused for i915 * @cfg: intel_ctx configuration, NULL for default context or legacy mode * @size: size of the batchbuffer * @allocator_type: allocator type, SIMPLE, RANDOM, ... * * Creates bb with context passed in @ctx, size in @size and allocator type * in @allocator_type. Relocations are set to false because IGT allocator * is used in that case. * * Returns: * * Pointer the intel_bb, asserts on failure. */ struct intel_bb *intel_bb_create_with_allocator(int fd, uint32_t ctx, uint32_t vm, const intel_ctx_cfg_t *cfg, uint32_t size, uint8_t allocator_type) { return __intel_bb_create(fd, ctx, vm, cfg, size, false, 0, 0, 0, allocator_type, ALLOC_STRATEGY_HIGH_TO_LOW); } static bool aux_needs_softpin(int fd) { return intel_gen(intel_get_drm_devid(fd)) >= 12; } static bool has_ctx_cfg(struct intel_bb *ibb) { return ibb->cfg && ibb->cfg->num_engines > 0; } /** * intel_bb_create: * @fd: drm fd - i915 or xe * @size: size of the batchbuffer * * Creates bb with default context. * * Returns: * * Pointer the intel_bb, asserts on failure. * * Notes: * * intel_bb must not be created in igt_fixture. The reason is intel_bb * "opens" connection to the allocator and when test completes it can * leave the allocator in unknown state (mostly for failed tests). * As igt_core was armed to reset the allocator infrastructure * connection to it inside intel_bb is not valid anymore. * Trying to use it leads to catastrofic errors. */ struct intel_bb *intel_bb_create(int fd, uint32_t size) { bool relocs = is_i915_device(fd) && gem_has_relocations(fd); return __intel_bb_create(fd, 0, 0, NULL, size, relocs && !aux_needs_softpin(fd), 0, 0, 0, INTEL_ALLOCATOR_SIMPLE, ALLOC_STRATEGY_HIGH_TO_LOW); } /** * intel_bb_create_with_context: * @fd: drm fd - i915 or xe * @ctx: for i915 context id, for xe engine id * @vm: for xe vm_id, unused for i915 * @cfg: intel_ctx configuration, NULL for default context or legacy mode * @size: size of the batchbuffer * * Creates bb with context passed in @ctx and @cfg configuration (when * working with custom engines layout). * * Returns: * * Pointer the intel_bb, asserts on failure. */ struct intel_bb * intel_bb_create_with_context(int fd, uint32_t ctx, uint32_t vm, const intel_ctx_cfg_t *cfg, uint32_t size) { bool relocs = is_i915_device(fd) && gem_has_relocations(fd); return __intel_bb_create(fd, ctx, vm, cfg, size, relocs && !aux_needs_softpin(fd), 0, 0, 0, INTEL_ALLOCATOR_SIMPLE, ALLOC_STRATEGY_HIGH_TO_LOW); } /** * intel_bb_create_with_relocs: * @fd: drm fd - i915 * @size: size of the batchbuffer * * Creates bb which will disable passing addresses. * This will lead to relocations when objects are not previously pinned. * * Returns: * * Pointer the intel_bb, asserts on failure. */ struct intel_bb *intel_bb_create_with_relocs(int fd, uint32_t size) { igt_require(is_i915_device(fd) && gem_has_relocations(fd)); return __intel_bb_create(fd, 0, 0, NULL, size, true, 0, 0, 0, INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE); } /** * intel_bb_create_with_relocs_and_context: * @fd: drm fd - i915 * @ctx: context * @cfg: intel_ctx configuration, NULL for default context or legacy mode * @size: size of the batchbuffer * * Creates bb with default context which will disable passing addresses. * This will lead to relocations when objects are not previously pinned. * * Returns: * * Pointer the intel_bb, asserts on failure. */ struct intel_bb * intel_bb_create_with_relocs_and_context(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg, uint32_t size) { igt_require(is_i915_device(fd) && gem_has_relocations(fd)); return __intel_bb_create(fd, ctx, 0, cfg, size, true, 0, 0, 0, INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE); } /** * intel_bb_create_no_relocs: * @fd: drm fd * @size: size of the batchbuffer * * Creates bb with disabled relocations. * This enables passing addresses and requires pinning objects. * * Returns: * * Pointer the intel_bb, asserts on failure. */ struct intel_bb *intel_bb_create_no_relocs(int fd, uint32_t size) { igt_require(gem_uses_full_ppgtt(fd)); return __intel_bb_create(fd, 0, 0, NULL, size, false, 0, 0, 0, INTEL_ALLOCATOR_SIMPLE, ALLOC_STRATEGY_HIGH_TO_LOW); } static void __intel_bb_destroy_relocations(struct intel_bb *ibb) { uint32_t i; /* Free relocations */ for (i = 0; i < ibb->num_objects; i++) { free(from_user_pointer(ibb->objects[i]->relocs_ptr)); ibb->objects[i]->relocs_ptr = to_user_pointer(NULL); ibb->objects[i]->relocation_count = 0; } ibb->relocs = NULL; ibb->num_relocs = 0; ibb->allocated_relocs = 0; } static void __intel_bb_destroy_objects(struct intel_bb *ibb) { free(ibb->objects); ibb->objects = NULL; tdestroy(ibb->current, free); ibb->current = NULL; ibb->num_objects = 0; ibb->allocated_objects = 0; } static void __intel_bb_destroy_cache(struct intel_bb *ibb) { tdestroy(ibb->root, free); ibb->root = NULL; } static void __intel_bb_remove_intel_bufs(struct intel_bb *ibb) { struct intel_buf *entry, *tmp; igt_list_for_each_entry_safe(entry, tmp, &ibb->intel_bufs, link) intel_bb_remove_intel_buf(ibb, entry); } /** * intel_bb_destroy: * @ibb: pointer to intel_bb * * Frees all relocations / objects allocated during filling the batch. */ void intel_bb_destroy(struct intel_bb *ibb) { igt_assert(ibb); ibb->refcount--; igt_assert_f(ibb->refcount == 0, "Trying to destroy referenced bb!"); __intel_bb_remove_intel_bufs(ibb); __intel_bb_destroy_relocations(ibb); __intel_bb_destroy_objects(ibb); __intel_bb_destroy_cache(ibb); if (ibb->allocator_type != INTEL_ALLOCATOR_NONE) { if (intel_bb_do_tracking) { pthread_mutex_lock(&intel_bb_list_lock); igt_list_del(&ibb->link); pthread_mutex_unlock(&intel_bb_list_lock); } intel_allocator_free(ibb->allocator_handle, ibb->handle); intel_allocator_close(ibb->allocator_handle); } gem_close(ibb->fd, ibb->handle); if (ibb->fence >= 0) close(ibb->fence); if (ibb->engine_syncobj) syncobj_destroy(ibb->fd, ibb->engine_syncobj); if (ibb->vm_id && !ibb->ctx) xe_vm_destroy(ibb->fd, ibb->vm_id); free(ibb->batch); free(ibb->cfg); free(ibb); } #define XE_OBJ_SIZE(rsvd1) ((rsvd1) & ~(SZ_4K-1)) #define XE_OBJ_PAT_IDX(rsvd1) ((rsvd1) & (SZ_4K-1)) static struct drm_xe_vm_bind_op *xe_alloc_bind_ops(struct intel_bb *ibb, uint32_t op, uint32_t flags, uint32_t prefetch_region) { struct drm_i915_gem_exec_object2 **objects = ibb->objects; struct drm_xe_vm_bind_op *bind_ops, *ops; bool set_obj = (op & 0xffff) == DRM_XE_VM_BIND_OP_MAP; bind_ops = calloc(ibb->num_objects, sizeof(*bind_ops)); igt_assert(bind_ops); igt_debug("bind_ops: %s\n", set_obj ? "MAP" : "UNMAP"); for (int i = 0; i < ibb->num_objects; i++) { ops = &bind_ops[i]; if (set_obj) ops->obj = objects[i]->handle; ops->op = op; ops->flags = flags; ops->obj_offset = 0; ops->addr = objects[i]->offset; ops->range = XE_OBJ_SIZE(objects[i]->rsvd1); ops->prefetch_mem_region_instance = prefetch_region; if (set_obj) ops->pat_index = XE_OBJ_PAT_IDX(objects[i]->rsvd1); igt_debug(" [%d]: handle: %u, offset: %llx, size: %llx pat_index: %u\n", i, ops->obj, (long long)ops->addr, (long long)ops->range, ops->pat_index); } return bind_ops; } static void __unbind_xe_objects(struct intel_bb *ibb) { struct drm_xe_sync syncs[2] = { { .type = DRM_XE_SYNC_TYPE_SYNCOBJ }, { .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, }, }; int ret; syncs[0].handle = ibb->engine_syncobj; syncs[1].handle = syncobj_create(ibb->fd, 0); if (ibb->num_objects > 1) { struct drm_xe_vm_bind_op *bind_ops; uint32_t op = DRM_XE_VM_BIND_OP_UNMAP; bind_ops = xe_alloc_bind_ops(ibb, op, 0, 0); xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops, ibb->num_objects, syncs, 2); free(bind_ops); } else { igt_debug("bind: UNMAP\n"); igt_debug(" offset: %llx, size: %llx\n", (long long)ibb->batch_offset, (long long)ibb->size); xe_vm_unbind_async(ibb->fd, ibb->vm_id, 0, 0, ibb->batch_offset, ibb->size, syncs, 2); } ret = syncobj_wait_err(ibb->fd, &syncs[1].handle, 1, INT64_MAX, 0); igt_assert_eq(ret, 0); syncobj_destroy(ibb->fd, syncs[1].handle); ibb->xe_bound = false; } /* * intel_bb_reset: * @ibb: pointer to intel_bb * @purge_objects_cache: if true destroy internal execobj and relocs + cache * * Recreate batch bo when there's no additional reference. * * When purge_object_cache == true we destroy cache as well as remove intel_buf * from intel-bb tracking list. Removing intel_bufs releases their addresses * in the allocator. */ void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache) { uint32_t i; if (purge_objects_cache && ibb->refcount > 1) igt_warn("Cannot purge objects cache on bb, refcount > 1!"); /* Someone keeps reference, just exit */ if (ibb->refcount > 1) return; /* * To avoid relocation objects previously pinned to high virtual * addresses should keep 48bit flag. Ensure we won't clear it * in the reset path. */ for (i = 0; i < ibb->num_objects; i++) ibb->objects[i]->flags &= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; if (ibb->driver == INTEL_DRIVER_XE && ibb->xe_bound) __unbind_xe_objects(ibb); __intel_bb_destroy_relocations(ibb); __intel_bb_destroy_objects(ibb); __reallocate_objects(ibb); if (purge_objects_cache) { __intel_bb_remove_intel_bufs(ibb); __intel_bb_destroy_cache(ibb); } /* * When we use allocators we're in no-reloc mode so we have to free * and reacquire offset (ibb->handle can change in multiprocess * environment). We also have to remove and add it again to * objects and cache tree. */ if (ibb->allocator_type != INTEL_ALLOCATOR_NONE && !purge_objects_cache) intel_bb_remove_object(ibb, ibb->handle, ibb->batch_offset, ibb->size); gem_close(ibb->fd, ibb->handle); if (ibb->driver == INTEL_DRIVER_I915) ibb->handle = gem_create(ibb->fd, ibb->size); else ibb->handle = xe_bo_create(ibb->fd, 0, ibb->size, vram_if_possible(ibb->fd, 0), DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM); /* Reacquire offset for RELOC and SIMPLE */ if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE || ibb->allocator_type == INTEL_ALLOCATOR_RELOC) ibb->batch_offset = __intel_bb_get_offset(ibb, ibb->handle, ibb->size, ibb->alignment, DEFAULT_PAT_INDEX); intel_bb_add_object(ibb, ibb->handle, ibb->size, ibb->batch_offset, ibb->alignment, false); ibb->ptr = ibb->batch; memset(ibb->batch, 0, ibb->size); } /* * intel_bb_sync: * @ibb: pointer to intel_bb * * Waits for bb completion. Returns 0 on success, otherwise errno. */ int intel_bb_sync(struct intel_bb *ibb) { int ret; if (ibb->fence < 0 && !ibb->engine_syncobj) return 0; if (ibb->fence >= 0) { ret = sync_fence_wait(ibb->fence, -1); if (ret == 0) { close(ibb->fence); ibb->fence = -1; } } else { igt_assert_neq(ibb->engine_syncobj, 0); ret = syncobj_wait_err(ibb->fd, &ibb->engine_syncobj, 1, INT64_MAX, 0); } return ret; } /* * intel_bb_print: * @ibb: pointer to intel_bb * * Prints batch to stdout. */ void intel_bb_print(struct intel_bb *ibb) { igt_info("drm fd: %d, gen: %d, devid: %u, debug: %d\n", ibb->fd, ibb->gen, ibb->devid, ibb->debug); igt_info("handle: %u, size: %u, batch: %p, ptr: %p\n", ibb->handle, ibb->size, ibb->batch, ibb->ptr); igt_info("gtt_size: %" PRIu64 ", supports 48bit: %d\n", ibb->gtt_size, ibb->supports_48b_address); igt_info("ctx: %u\n", ibb->ctx); igt_info("root: %p\n", ibb->root); igt_info("objects: %p, num_objects: %u, allocated obj: %u\n", ibb->objects, ibb->num_objects, ibb->allocated_objects); igt_info("relocs: %p, num_relocs: %u, allocated_relocs: %u\n----\n", ibb->relocs, ibb->num_relocs, ibb->allocated_relocs); } /* * intel_bb_dump: * @ibb: pointer to intel_bb * @filename: name to which write bb * @in_hex: dump bb in hex form * * Dump batch bo to file. */ void intel_bb_dump(struct intel_bb *ibb, const char *filename, bool in_hex) { FILE *out; void *ptr; /* * Note - for i915/relocations offsets inside batch are not resolved * until intel_bb_exec() will write collected instructions to bb * object. For i915/xe with allocator offsets are already acquired * and bb is complete so there's no need to map. */ if (ibb->driver == INTEL_DRIVER_I915 && ibb->enforce_relocs) ptr = gem_mmap__device_coherent(ibb->fd, ibb->handle, 0, ibb->size, PROT_READ); else ptr = ibb->batch; out = fopen(filename, "wb"); igt_assert(out); if (in_hex) { for (int i = 0; i < ibb->size / sizeof(uint32_t); i++) fprintf(out, "%08x\n", ((uint32_t *)ptr)[i]); } else { fwrite(ptr, ibb->size, 1, out); } fclose(out); if (ptr != ibb->batch) munmap(ptr, ibb->size); } /** * intel_bb_set_debug: * @ibb: pointer to intel_bb * @debug: true / false * * Sets debug to true / false. Execbuf is then called synchronously and * object/reloc arrays are printed after execution. */ void intel_bb_set_debug(struct intel_bb *ibb, bool debug) { ibb->debug = debug; } /** * intel_bb_set_dump_base64: * @ibb: pointer to intel_bb * @dump: true / false * * Do bb dump as base64 string before execbuf call. */ void intel_bb_set_dump_base64(struct intel_bb *ibb, bool dump) { ibb->dump_base64 = dump; } static int __compare_objects(const void *p1, const void *p2) { const struct drm_i915_gem_exec_object2 *o1 = p1, *o2 = p2; return (int) ((int64_t) o1->handle - (int64_t) o2->handle); } static struct drm_i915_gem_exec_object2 * __add_to_cache(struct intel_bb *ibb, uint32_t handle) { struct drm_i915_gem_exec_object2 **found, *object; object = malloc(sizeof(*object)); igt_assert(object); object->handle = handle; object->alignment = 0; found = tsearch((void *) object, &ibb->root, __compare_objects); if (*found == object) { memset(object, 0, sizeof(*object)); object->handle = handle; object->offset = INTEL_BUF_INVALID_ADDRESS; } else { free(object); object = *found; } return object; } static bool __remove_from_cache(struct intel_bb *ibb, uint32_t handle) { struct drm_i915_gem_exec_object2 **found, *object; object = intel_bb_find_object(ibb, handle); if (!object) { igt_warn("Object: handle: %u not found\n", handle); return false; } found = tdelete((void *) object, &ibb->root, __compare_objects); if (!found) return false; free(object); return true; } static int __compare_handles(const void *p1, const void *p2) { return (int) (*(int32_t *) p1 - *(int32_t *) p2); } static void __add_to_objects(struct intel_bb *ibb, struct drm_i915_gem_exec_object2 *object) { uint32_t **found, *handle; handle = malloc(sizeof(*handle)); igt_assert(handle); *handle = object->handle; found = tsearch((void *) handle, &ibb->current, __compare_handles); if (*found == handle) { __reallocate_objects(ibb); igt_assert(ibb->num_objects < ibb->allocated_objects); ibb->objects[ibb->num_objects++] = object; } else { free(handle); } } static void __remove_from_objects(struct intel_bb *ibb, struct drm_i915_gem_exec_object2 *object) { uint32_t i, **handle, *to_free; bool found = false; for (i = 0; i < ibb->num_objects; i++) { if (ibb->objects[i] == object) { found = true; break; } } /* * When we reset bb (without purging) we have: * 1. cache which contains all cached objects * 2. objects array which contains only bb object (cleared in reset * path with bb object added at the end) * So !found is normal situation and no warning is added here. */ if (!found) return; ibb->num_objects--; if (i < ibb->num_objects) memmove(&ibb->objects[i], &ibb->objects[i + 1], sizeof(object) * (ibb->num_objects - i)); handle = tfind((void *) &object->handle, &ibb->current, __compare_handles); if (!handle) { igt_warn("Object %u doesn't exist in the tree, can't remove", object->handle); return; } to_free = *handle; tdelete((void *) &object->handle, &ibb->current, __compare_handles); free(to_free); } /** * __intel_bb_add_object: * @ibb: pointer to intel_bb * @handle: which handle to add to objects array * @size: object size * @offset: presumed offset of the object when no relocation is enforced * @alignment: alignment of the object, if 0 it will be set to page size * @write: does a handle is a render target * * Function adds or updates execobj slot in bb objects array and * in the object tree. When object is a render target it has to * be marked with EXEC_OBJECT_WRITE flag. */ static struct drm_i915_gem_exec_object2 * __intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size, uint64_t offset, uint64_t alignment, uint8_t pat_index, bool write) { struct drm_i915_gem_exec_object2 *object; igt_assert(INVALID_ADDR(offset) || alignment == 0 || ALIGN(offset, alignment) == offset); igt_assert(is_power_of_two(alignment)); if (ibb->driver == INTEL_DRIVER_I915) alignment = max_t(uint64_t, alignment, gem_detect_safe_alignment(ibb->fd)); else alignment = max_t(uint64_t, ibb->alignment, alignment); object = __add_to_cache(ibb, handle); __add_to_objects(ibb, object); /* * If object->offset == INVALID_ADDRESS we added freshly object to the * cache. In that case we have two choices: * a) get new offset (passed offset was invalid) * b) use offset passed in the call (valid) */ if (INVALID_ADDR(object->offset)) { if (INVALID_ADDR(offset)) { offset = __intel_bb_get_offset(ibb, handle, size, alignment, pat_index); } else { offset = offset & (roundup_power_of_two(ibb->gtt_size) - 1); /* * For simple allocator check entry consistency * - reserve if it is not already allocated. */ if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE) { bool allocated, reserved; reserved = intel_allocator_reserve_if_not_allocated(ibb->allocator_handle, handle, size, offset, &allocated); igt_assert_f(allocated || reserved, "Can't get offset, allocated: %d, reserved: %d\n", allocated, reserved); } } } else { /* * This assertion makes sense only when we have to be consistent * with underlying allocator. For relocations and when !ppgtt * we can expect addresses passed by the user can be moved * within the driver. */ if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE) igt_assert_f(object->offset == offset, "(pid: %ld) handle: %u, offset not match: %" PRIx64 " <> %" PRIx64 "\n", (long) getpid(), handle, (uint64_t) object->offset, offset); } object->offset = offset; if (write) object->flags |= EXEC_OBJECT_WRITE; if (ibb->supports_48b_address) object->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; if (ibb->uses_full_ppgtt && !ibb->enforce_relocs) object->flags |= EXEC_OBJECT_PINNED; if (ibb->allows_obj_alignment) object->alignment = alignment; if (ibb->driver == INTEL_DRIVER_XE) { object->alignment = alignment; object->rsvd1 = size; igt_assert(!XE_OBJ_PAT_IDX(object->rsvd1)); if (pat_index == DEFAULT_PAT_INDEX) pat_index = intel_get_pat_idx_wb(ibb->fd); /* * XXX: For now encode the pat_index in the first few bits of * rsvd1. intel_batchbuffer should really stop using the i915 * drm_i915_gem_exec_object2 to encode VMA placement * information on xe... */ object->rsvd1 |= pat_index; } return object; } struct drm_i915_gem_exec_object2 * intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size, uint64_t offset, uint64_t alignment, bool write) { struct drm_i915_gem_exec_object2 *obj = NULL; obj = __intel_bb_add_object(ibb, handle, size, offset, alignment, DEFAULT_PAT_INDEX, write); igt_assert(obj); return obj; } bool intel_bb_remove_object(struct intel_bb *ibb, uint32_t handle, uint64_t offset, uint64_t size) { struct drm_i915_gem_exec_object2 *object; bool is_reserved; object = intel_bb_find_object(ibb, handle); if (!object) return false; if (ibb->allocator_type != INTEL_ALLOCATOR_NONE) { intel_allocator_free(ibb->allocator_handle, handle); is_reserved = intel_allocator_is_reserved(ibb->allocator_handle, size, offset); if (is_reserved) intel_allocator_unreserve(ibb->allocator_handle, handle, size, offset); } __remove_from_objects(ibb, object); __remove_from_cache(ibb, handle); return true; } static struct drm_i915_gem_exec_object2 * __intel_bb_add_intel_buf(struct intel_bb *ibb, struct intel_buf *buf, uint64_t alignment, bool write) { struct drm_i915_gem_exec_object2 *obj; igt_assert(ibb); igt_assert(buf); igt_assert(!buf->ibb || buf->ibb == ibb); igt_assert(ALIGN(alignment, 4096) == alignment); if (!alignment) { alignment = 0x1000; /* * TODO: * Find out why MTL need special alignment, spec says 32k * is enough for MTL. */ if (ibb->gen >= 12 && buf->compression) alignment = IS_METEORLAKE(ibb->devid) ? 0x100000 : 0x10000; /* For gen3 ensure tiled buffers are aligned to power of two size */ if (ibb->gen == 3 && buf->tiling) { alignment = 1024 * 1024; while (alignment < buf->surface[0].size) alignment <<= 1; } } obj = __intel_bb_add_object(ibb, buf->handle, intel_buf_bo_size(buf), buf->addr.offset, alignment, buf->pat_index, write); igt_assert(obj); buf->addr.offset = obj->offset; if (igt_list_empty(&buf->link)) { igt_list_add_tail(&buf->link, &ibb->intel_bufs); buf->ibb = ibb; } else { igt_assert(buf->ibb == ibb); } return obj; } struct drm_i915_gem_exec_object2 * intel_bb_add_intel_buf(struct intel_bb *ibb, struct intel_buf *buf, bool write) { return __intel_bb_add_intel_buf(ibb, buf, 0, write); } struct drm_i915_gem_exec_object2 * intel_bb_add_intel_buf_with_alignment(struct intel_bb *ibb, struct intel_buf *buf, uint64_t alignment, bool write) { return __intel_bb_add_intel_buf(ibb, buf, alignment, write); } bool intel_bb_remove_intel_buf(struct intel_bb *ibb, struct intel_buf *buf) { bool removed; igt_assert(ibb); igt_assert(buf); igt_assert(!buf->ibb || buf->ibb == ibb); if (igt_list_empty(&buf->link)) return false; removed = intel_bb_remove_object(ibb, buf->handle, buf->addr.offset, intel_buf_bo_size(buf)); if (removed) { buf->addr.offset = INTEL_BUF_INVALID_ADDRESS; buf->ibb = NULL; igt_list_del_init(&buf->link); } return removed; } void intel_bb_print_intel_bufs(struct intel_bb *ibb) { struct intel_buf *entry; igt_list_for_each_entry(entry, &ibb->intel_bufs, link) { igt_info("handle: %u, ibb: %p, offset: %lx\n", entry->handle, entry->ibb, (long) entry->addr.offset); } } struct drm_i915_gem_exec_object2 * intel_bb_find_object(struct intel_bb *ibb, uint32_t handle) { struct drm_i915_gem_exec_object2 object = { .handle = handle }; struct drm_i915_gem_exec_object2 **found; found = tfind((void *) &object, &ibb->root, __compare_objects); if (!found) return NULL; return *found; } bool intel_bb_object_set_flag(struct intel_bb *ibb, uint32_t handle, uint64_t flag) { struct drm_i915_gem_exec_object2 object = { .handle = handle }; struct drm_i915_gem_exec_object2 **found; igt_assert_f(ibb->root, "Trying to search in null tree\n"); found = tfind((void *) &object, &ibb->root, __compare_objects); if (!found) { igt_warn("Trying to set fence on not found handle: %u\n", handle); return false; } (*found)->flags |= flag; return true; } bool intel_bb_object_clear_flag(struct intel_bb *ibb, uint32_t handle, uint64_t flag) { struct drm_i915_gem_exec_object2 object = { .handle = handle }; struct drm_i915_gem_exec_object2 **found; found = tfind((void *) &object, &ibb->root, __compare_objects); if (!found) { igt_warn("Trying to set fence on not found handle: %u\n", handle); return false; } (*found)->flags &= ~flag; return true; } /* * intel_bb_add_reloc: * @ibb: pointer to intel_bb * @to_handle: object handle in which do relocation * @handle: object handle which address will be taken to patch the @to_handle * @read_domains: gem domain bits for the relocation * @write_domain: gem domain bit for the relocation * @delta: delta value to add to @buffer's gpu address * @offset: offset within bb to be patched * * When relocations are requested function allocates additional relocation slot * in reloc array for a handle. * Object must be previously added to bb. */ static uint64_t intel_bb_add_reloc(struct intel_bb *ibb, uint32_t to_handle, uint32_t handle, uint32_t read_domains, uint32_t write_domain, uint64_t delta, uint64_t offset, uint64_t presumed_offset) { struct drm_i915_gem_relocation_entry *relocs; struct drm_i915_gem_exec_object2 *object, *to_object; uint32_t i; object = intel_bb_find_object(ibb, handle); igt_assert(object); /* In no-reloc mode we just return the previously assigned address */ if (!ibb->enforce_relocs) goto out; /* For ibb we have relocs allocated in chunks */ if (to_handle == ibb->handle) { relocs = ibb->relocs; if (ibb->num_relocs == ibb->allocated_relocs) { ibb->allocated_relocs += 4096 / sizeof(*relocs); relocs = realloc(relocs, sizeof(*relocs) * ibb->allocated_relocs); igt_assert(relocs); ibb->relocs = relocs; } i = ibb->num_relocs++; } else { to_object = intel_bb_find_object(ibb, to_handle); igt_assert_f(to_object, "object has to be added to ibb first!\n"); i = to_object->relocation_count++; relocs = from_user_pointer(to_object->relocs_ptr); relocs = realloc(relocs, sizeof(*relocs) * to_object->relocation_count); to_object->relocs_ptr = to_user_pointer(relocs); igt_assert(relocs); } memset(&relocs[i], 0, sizeof(*relocs)); relocs[i].target_handle = handle; relocs[i].read_domains = read_domains; relocs[i].write_domain = write_domain; relocs[i].delta = delta; relocs[i].offset = offset; if (ibb->enforce_relocs) relocs[i].presumed_offset = -1; else relocs[i].presumed_offset = object->offset; igt_debug("add reloc: to_handle: %u, handle: %u, r/w: 0x%x/0x%x, " "delta: 0x%" PRIx64 ", " "offset: 0x%" PRIx64 ", " "poffset: %p\n", to_handle, handle, read_domains, write_domain, delta, offset, from_user_pointer(relocs[i].presumed_offset)); out: return object->offset; } static uint64_t __intel_bb_emit_reloc(struct intel_bb *ibb, uint32_t to_handle, uint32_t to_offset, uint32_t handle, uint32_t read_domains, uint32_t write_domain, uint64_t delta, uint64_t presumed_offset) { uint64_t address; igt_assert(ibb); address = intel_bb_add_reloc(ibb, to_handle, handle, read_domains, write_domain, delta, to_offset, presumed_offset); intel_bb_out(ibb, delta + address); if (ibb->gen >= 8) intel_bb_out(ibb, (delta + address) >> 32); return address; } /** * intel_bb_emit_reloc: * @ibb: pointer to intel_bb * @handle: object handle which address will be taken to patch the bb * @read_domains: gem domain bits for the relocation * @write_domain: gem domain bit for the relocation * @delta: delta value to add to @buffer's gpu address * @presumed_offset: address of the object in address space. If -1 is passed * then final offset of the object will be randomized (for no-reloc bb) or * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In * case address is known it should passed in @presumed_offset (for no-reloc). * @write: does a handle is a render target * * Function prepares relocation (execobj if required + reloc) and emits * offset in bb. For I915_EXEC_NO_RELOC presumed_offset is a hint we already * have object in valid place and relocation step can be skipped in this case. * * Note: delta is value added to address, mostly used when some instructions * require modify-bit set to apply change. Which delta is valid depends * on instruction (see instruction specification). */ uint64_t intel_bb_emit_reloc(struct intel_bb *ibb, uint32_t handle, uint32_t read_domains, uint32_t write_domain, uint64_t delta, uint64_t presumed_offset) { igt_assert(ibb); return __intel_bb_emit_reloc(ibb, ibb->handle, intel_bb_offset(ibb), handle, read_domains, write_domain, delta, presumed_offset); } uint64_t intel_bb_emit_reloc_fenced(struct intel_bb *ibb, uint32_t handle, uint32_t read_domains, uint32_t write_domain, uint64_t delta, uint64_t presumed_offset) { uint64_t address; address = intel_bb_emit_reloc(ibb, handle, read_domains, write_domain, delta, presumed_offset); intel_bb_object_set_flag(ibb, handle, EXEC_OBJECT_NEEDS_FENCE); return address; } /** * intel_bb_offset_reloc: * @ibb: pointer to intel_bb * @handle: object handle which address will be taken to patch the bb * @read_domains: gem domain bits for the relocation * @write_domain: gem domain bit for the relocation * @offset: offset within bb to be patched * @presumed_offset: address of the object in address space. If -1 is passed * then final offset of the object will be randomized (for no-reloc bb) or * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In * case address is known it should passed in @presumed_offset (for no-reloc). * * Function prepares relocation (execobj if required + reloc). It it used * for editing batchbuffer via modifying structures. It means when we're * preparing batchbuffer it is more descriptive to edit the structure * than emitting dwords. But it require for some fields to point the * relocation. For that case @offset is passed by the user and it points * to the offset in bb where the relocation will be applied. */ uint64_t intel_bb_offset_reloc(struct intel_bb *ibb, uint32_t handle, uint32_t read_domains, uint32_t write_domain, uint32_t offset, uint64_t presumed_offset) { igt_assert(ibb); return intel_bb_add_reloc(ibb, ibb->handle, handle, read_domains, write_domain, 0, offset, presumed_offset); } uint64_t intel_bb_offset_reloc_with_delta(struct intel_bb *ibb, uint32_t handle, uint32_t read_domains, uint32_t write_domain, uint32_t delta, uint32_t offset, uint64_t presumed_offset) { igt_assert(ibb); return intel_bb_add_reloc(ibb, ibb->handle, handle, read_domains, write_domain, delta, offset, presumed_offset); } uint64_t intel_bb_offset_reloc_to_object(struct intel_bb *ibb, uint32_t to_handle, uint32_t handle, uint32_t read_domains, uint32_t write_domain, uint32_t delta, uint32_t offset, uint64_t presumed_offset) { igt_assert(ibb); return intel_bb_add_reloc(ibb, to_handle, handle, read_domains, write_domain, delta, offset, presumed_offset); } /* * @intel_bb_set_pxp: * @ibb: pointer to intel_bb * @new_state: enable or disable pxp session * @apptype: pxp session input identifies what type of session to enable * @appid: pxp session input provides which appid to use * * This function merely stores the pxp state and session information to * be retrieved and programmed later by supporting libraries such as * gen12_render_copy that must program the HW within the same dispatch */ void intel_bb_set_pxp(struct intel_bb *ibb, bool new_state, uint32_t apptype, uint32_t appid) { igt_assert(ibb); ibb->pxp.enabled = new_state; ibb->pxp.apptype = new_state ? apptype : 0; ibb->pxp.appid = new_state ? appid : 0; } static void intel_bb_dump_execbuf(struct intel_bb *ibb, struct drm_i915_gem_execbuffer2 *execbuf) { struct drm_i915_gem_exec_object2 *objects; struct drm_i915_gem_relocation_entry *relocs, *reloc; int i, j; uint64_t address; igt_debug("execbuf [pid: %ld, fd: %d, ctx: %u]\n", (long) getpid(), ibb->fd, ibb->ctx); igt_debug("execbuf batch len: %u, start offset: 0x%x, " "DR1: 0x%x, DR4: 0x%x, " "num clip: %u, clipptr: 0x%llx, " "flags: 0x%llx, rsvd1: 0x%llx, rsvd2: 0x%llx\n", execbuf->batch_len, execbuf->batch_start_offset, execbuf->DR1, execbuf->DR4, execbuf->num_cliprects, execbuf->cliprects_ptr, execbuf->flags, execbuf->rsvd1, execbuf->rsvd2); igt_debug("execbuf buffer_count: %d\n", execbuf->buffer_count); for (i = 0; i < execbuf->buffer_count; i++) { objects = &((struct drm_i915_gem_exec_object2 *) from_user_pointer(execbuf->buffers_ptr))[i]; relocs = from_user_pointer(objects->relocs_ptr); address = objects->offset; igt_debug(" [%d] handle: %u, reloc_count: %d, reloc_ptr: %p, " "align: 0x%llx, offset: 0x%" PRIx64 ", flags: 0x%llx, " "rsvd1: 0x%llx, rsvd2: 0x%llx\n", i, objects->handle, objects->relocation_count, relocs, objects->alignment, address, objects->flags, objects->rsvd1, objects->rsvd2); if (objects->relocation_count) { igt_debug("\texecbuf relocs:\n"); for (j = 0; j < objects->relocation_count; j++) { reloc = &relocs[j]; address = reloc->presumed_offset; igt_debug("\t [%d] target handle: %u, " "offset: 0x%llx, delta: 0x%x, " "presumed_offset: 0x%" PRIx64 ", " "read_domains: 0x%x, " "write_domain: 0x%x\n", j, reloc->target_handle, reloc->offset, reloc->delta, address, reloc->read_domains, reloc->write_domain); } } } } static void intel_bb_dump_base64(struct intel_bb *ibb, int linelen) { int outsize; gchar *str, *pos; igt_info("--- bb ---\n"); pos = str = g_base64_encode((const guchar *) ibb->batch, ibb->size); outsize = strlen(str); while (outsize > 0) { igt_info("%.*s\n", min(outsize, linelen), pos); pos += linelen; outsize -= linelen; } free(str); } static void print_node(const void *node, VISIT which, int depth) { const struct drm_i915_gem_exec_object2 *object = *(const struct drm_i915_gem_exec_object2 **) node; (void) depth; switch (which) { case preorder: case endorder: break; case postorder: case leaf: igt_info("\t handle: %u, offset: 0x%" PRIx64 "\n", object->handle, (uint64_t) object->offset); break; } } void intel_bb_dump_cache(struct intel_bb *ibb) { igt_info("[pid: %ld] dump cache\n", (long) getpid()); twalk(ibb->root, print_node); } static struct drm_i915_gem_exec_object2 * create_objects_array(struct intel_bb *ibb) { struct drm_i915_gem_exec_object2 *objects; uint32_t i; objects = malloc(sizeof(*objects) * ibb->num_objects); igt_assert(objects); for (i = 0; i < ibb->num_objects; i++) { objects[i] = *(ibb->objects[i]); objects[i].offset = CANONICAL(objects[i].offset); } return objects; } static void update_offsets(struct intel_bb *ibb, struct drm_i915_gem_exec_object2 *objects) { struct drm_i915_gem_exec_object2 *object; struct intel_buf *entry; uint32_t i; for (i = 0; i < ibb->num_objects; i++) { object = intel_bb_find_object(ibb, objects[i].handle); igt_assert(object); object->offset = DECANONICAL(objects[i].offset); if (i == 0) ibb->batch_offset = object->offset; } igt_list_for_each_entry(entry, &ibb->intel_bufs, link) { object = intel_bb_find_object(ibb, entry->handle); igt_assert(object); if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE) igt_assert(object->offset == entry->addr.offset); else entry->addr.offset = object->offset; entry->addr.ctx = ibb->ctx; } } #define LINELEN 76 static int __xe_bb_exec(struct intel_bb *ibb, uint64_t flags, bool sync) { uint32_t engine = flags & (I915_EXEC_BSD_MASK | I915_EXEC_RING_MASK); uint32_t engine_id; struct drm_xe_sync syncs[2] = { { .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, }, { .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, }, }; struct drm_xe_vm_bind_op *bind_ops; void *map; igt_assert_eq(ibb->num_relocs, 0); igt_assert_eq(ibb->xe_bound, false); if (ibb->ctx) { engine_id = ibb->ctx; } else if (ibb->last_engine != engine) { struct drm_xe_engine_class_instance inst = { }; inst.engine_instance = (flags & I915_EXEC_BSD_MASK) >> I915_EXEC_BSD_SHIFT; switch (flags & I915_EXEC_RING_MASK) { case I915_EXEC_DEFAULT: case I915_EXEC_BLT: inst.engine_class = DRM_XE_ENGINE_CLASS_COPY; break; case I915_EXEC_BSD: inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE; break; case I915_EXEC_RENDER: if (xe_has_engine_class(ibb->fd, DRM_XE_ENGINE_CLASS_RENDER)) inst.engine_class = DRM_XE_ENGINE_CLASS_RENDER; else inst.engine_class = DRM_XE_ENGINE_CLASS_COMPUTE; break; case I915_EXEC_VEBOX: inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE; break; default: igt_assert_f(false, "Unknown engine: %x", (uint32_t) flags); } igt_debug("Run on %s\n", xe_engine_class_string(inst.engine_class)); if (ibb->engine_id) xe_exec_queue_destroy(ibb->fd, ibb->engine_id); ibb->engine_id = engine_id = xe_exec_queue_create(ibb->fd, ibb->vm_id, &inst, 0); } else { engine_id = ibb->engine_id; } ibb->last_engine = engine; map = xe_bo_map(ibb->fd, ibb->handle, ibb->size); memcpy(map, ibb->batch, ibb->size); gem_munmap(map, ibb->size); syncs[0].handle = syncobj_create(ibb->fd, 0); if (ibb->num_objects > 1) { bind_ops = xe_alloc_bind_ops(ibb, DRM_XE_VM_BIND_OP_MAP, 0, 0); xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops, ibb->num_objects, syncs, 1); free(bind_ops); } else { igt_debug("bind: MAP\n"); igt_debug(" handle: %u, offset: %llx, size: %llx\n", ibb->handle, (long long)ibb->batch_offset, (long long)ibb->size); xe_vm_bind_async(ibb->fd, ibb->vm_id, 0, ibb->handle, 0, ibb->batch_offset, ibb->size, syncs, 1); } ibb->xe_bound = true; syncs[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL; ibb->engine_syncobj = syncobj_create(ibb->fd, 0); syncs[1].handle = ibb->engine_syncobj; xe_exec_sync(ibb->fd, engine_id, ibb->batch_offset, syncs, 2); if (sync) intel_bb_sync(ibb); return 0; } /* * __intel_bb_exec: * @ibb: pointer to intel_bb * @end_offset: offset of the last instruction in the bb * @flags: flags passed directly to execbuf * @sync: if true wait for execbuf completion, otherwise caller is responsible * to wait for completion * * Returns: 0 on success, otherwise errno. * * Note: In this step execobj for bb is allocated and inserted to the objects * array. */ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset, uint64_t flags, bool sync) { struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 *objects; int ret, fence, new_fence; ibb->objects[0]->relocs_ptr = to_user_pointer(ibb->relocs); ibb->objects[0]->relocation_count = ibb->num_relocs; ibb->objects[0]->handle = ibb->handle; ibb->objects[0]->offset = ibb->batch_offset; gem_write(ibb->fd, ibb->handle, 0, ibb->batch, ibb->size); memset(&execbuf, 0, sizeof(execbuf)); objects = create_objects_array(ibb); execbuf.buffers_ptr = to_user_pointer(objects); execbuf.buffer_count = ibb->num_objects; execbuf.batch_len = end_offset; execbuf.rsvd1 = ibb->ctx; execbuf.flags = flags | I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_OUT; if (ibb->enforce_relocs) execbuf.flags &= ~I915_EXEC_NO_RELOC; execbuf.rsvd2 = 0; if (ibb->dump_base64) intel_bb_dump_base64(ibb, LINELEN); /* For debugging on CI, remove in final series */ intel_bb_dump_execbuf(ibb, &execbuf); ret = __gem_execbuf_wr(ibb->fd, &execbuf); if (ret) { intel_bb_dump_execbuf(ibb, &execbuf); free(objects); return ret; } /* Update addresses in the cache */ update_offsets(ibb, objects); /* Save/merge fences */ fence = execbuf.rsvd2 >> 32; if (ibb->fence < 0) { ibb->fence = fence; } else { new_fence = sync_fence_merge(ibb->fence, fence); close(ibb->fence); close(fence); ibb->fence = new_fence; } if (sync || ibb->debug) igt_assert(intel_bb_sync(ibb) == 0); if (ibb->debug) { intel_bb_dump_execbuf(ibb, &execbuf); if (intel_bb_debug_tree) { igt_info("\nTree:\n"); twalk(ibb->root, print_node); } } free(objects); return 0; } /** * intel_bb_exec: * @ibb: pointer to intel_bb * @end_offset: offset of the last instruction in the bb (for i915) * @flags: flags passed directly to execbuf * @sync: if true wait for execbuf completion, otherwise caller is responsible * to wait for completion * * Do execbuf on context selected during bb creation. Asserts on failure. */ void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset, uint64_t flags, bool sync) { if (ibb->dump_base64) intel_bb_dump_base64(ibb, LINELEN); if (ibb->driver == INTEL_DRIVER_I915) igt_assert_eq(__intel_bb_exec(ibb, end_offset, flags, sync), 0); else igt_assert_eq(__xe_bb_exec(ibb, flags, sync), 0); } /** * intel_bb_get_object_address: * @ibb: pointer to intel_bb * @handle: object handle * * When objects addresses are previously pinned and we don't want to relocate * we need to acquire them from previous execbuf. Function returns previous * object offset for @handle or 0 if object is not found. */ uint64_t intel_bb_get_object_offset(struct intel_bb *ibb, uint32_t handle) { struct drm_i915_gem_exec_object2 object = { .handle = handle }; struct drm_i915_gem_exec_object2 **found; igt_assert(ibb); found = tfind((void *)&object, &ibb->root, __compare_objects); if (!found) return INTEL_BUF_INVALID_ADDRESS; return (*found)->offset; } /* * intel_bb_emit_bbe: * @ibb: batchbuffer * * Outputs MI_BATCH_BUFFER_END and ensures batch is properly aligned. */ uint32_t intel_bb_emit_bbe(struct intel_bb *ibb) { /* Mark the end of the buffer. */ intel_bb_out(ibb, MI_BATCH_BUFFER_END); intel_bb_ptr_align(ibb, 8); return intel_bb_offset(ibb); } /* * intel_bb_emit_flush_common: * @ibb: batchbuffer * * Emits instructions which completes batch buffer. * * Returns: offset in batch buffer where there's end of instructions. */ uint32_t intel_bb_emit_flush_common(struct intel_bb *ibb) { if (intel_bb_offset(ibb) == 0) return 0; if (ibb->gen == 5) { /* * emit gen5 w/a without batch space checks - we reserve that * already. */ intel_bb_out(ibb, CMD_POLY_STIPPLE_OFFSET << 16); intel_bb_out(ibb, 0); } /* Round batchbuffer usage to 2 DWORDs. */ if ((intel_bb_offset(ibb) & 4) == 0) intel_bb_out(ibb, 0); intel_bb_emit_bbe(ibb); return intel_bb_offset(ibb); } static void intel_bb_exec_with_ring(struct intel_bb *ibb,uint32_t ring) { intel_bb_exec(ibb, intel_bb_offset(ibb), ring | I915_EXEC_NO_RELOC, false); intel_bb_reset(ibb, false); } /* * intel_bb_flush: * @ibb: batchbuffer * @ring: ring * * If batch is not empty emit batch buffer end, execute on ring, * then reset the batch. */ void intel_bb_flush(struct intel_bb *ibb, uint32_t ring) { if (intel_bb_emit_flush_common(ibb) == 0) return; intel_bb_exec_with_ring(ibb, ring); } /* * intel_bb_flush_render: * @ibb: batchbuffer * * If batch is not empty emit batch buffer end, find the render engine id, * execute on the ring and reset the batch. Context used to execute * is batch context. */ void intel_bb_flush_render(struct intel_bb *ibb) { uint32_t ring; if (intel_bb_emit_flush_common(ibb) == 0) return; if (has_ctx_cfg(ibb)) ring = find_engine(ibb->cfg, I915_ENGINE_CLASS_RENDER); else ring = I915_EXEC_RENDER; intel_bb_exec_with_ring(ibb, ring); } /* * intel_bb_flush_blit: * @ibb: batchbuffer * * If batch is not empty emit batch buffer end, find a suitable ring * (depending on gen and context configuration) and reset the batch. * Context used to execute is batch context. */ void intel_bb_flush_blit(struct intel_bb *ibb) { uint32_t ring; if (intel_bb_emit_flush_common(ibb) == 0) return; if (has_ctx_cfg(ibb)) ring = find_engine(ibb->cfg, I915_ENGINE_CLASS_COPY); else ring = HAS_BLT_RING(ibb->devid) ? I915_EXEC_BLT : I915_EXEC_DEFAULT; intel_bb_exec_with_ring(ibb, ring); } /* * intel_bb_copy_data: * @ibb: batchbuffer * @data: pointer of data which should be copied into batch * @bytes: number of bytes to copy, must be dword multiplied * @align: alignment in the batch * * Function copies @bytes of data pointed by @data into batch buffer. */ uint32_t intel_bb_copy_data(struct intel_bb *ibb, const void *data, unsigned int bytes, uint32_t align) { uint32_t *subdata, offset; igt_assert((bytes & 3) == 0); intel_bb_ptr_align(ibb, align); offset = intel_bb_offset(ibb); igt_assert(offset + bytes < ibb->size); subdata = intel_bb_ptr(ibb); memcpy(subdata, data, bytes); intel_bb_ptr_add(ibb, bytes); return offset; } /* * intel_bb_blit_start: * @ibb: batchbuffer * @flags: flags to blit command * * Function emits XY_SRC_COPY_BLT instruction with size appropriate size * which depend on gen. */ void intel_bb_blit_start(struct intel_bb *ibb, uint32_t flags) { if (blt_has_xy_src_copy(ibb->fd)) intel_bb_out(ibb, XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | XY_SRC_COPY_BLT_WRITE_RGB | flags | (6 + 2 * (ibb->gen >= 8))); else if (blt_has_fast_copy(ibb->fd)) intel_bb_out(ibb, XY_FAST_COPY_BLT | flags); else igt_assert_f(0, "No supported blit command found\n"); } /* * intel_bb_emit_blt_copy: * @ibb: batchbuffer * @src: source buffer (intel_buf) * @src_x1: source x1 position * @src_y1: source y1 position * @src_pitch: source pitch * @dst: destination buffer (intel_buf) * @dst_x1: destination x1 position * @dst_y1: destination y1 position * @dst_pitch: destination pitch * @width: width of data to copy * @height: height of data to copy * * Function emits complete blit command. */ void intel_bb_emit_blt_copy(struct intel_bb *ibb, struct intel_buf *src, int src_x1, int src_y1, int src_pitch, struct intel_buf *dst, int dst_x1, int dst_y1, int dst_pitch, int width, int height, int bpp) { const unsigned int gen = ibb->gen; uint32_t cmd_bits = 0; uint32_t br13_bits; uint32_t mask; igt_assert(bpp*(src_x1 + width) <= 8*src_pitch); igt_assert(bpp*(dst_x1 + width) <= 8*dst_pitch); igt_assert(src_pitch * (src_y1 + height) <= src->surface[0].size); igt_assert(dst_pitch * (dst_y1 + height) <= dst->surface[0].size); if (gen >= 4 && src->tiling != I915_TILING_NONE) { src_pitch /= 4; if (blt_has_xy_src_copy(ibb->fd)) cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED; else if (blt_has_fast_copy(ibb->fd)) cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling); else igt_assert_f(0, "No supported blit command found\n"); } if (gen >= 4 && dst->tiling != I915_TILING_NONE) { dst_pitch /= 4; if (blt_has_xy_src_copy(ibb->fd)) cmd_bits |= XY_SRC_COPY_BLT_DST_TILED; else cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling); } CHECK_RANGE(src_x1); CHECK_RANGE(src_y1); CHECK_RANGE(dst_x1); CHECK_RANGE(dst_y1); CHECK_RANGE(width); CHECK_RANGE(height); CHECK_RANGE(src_x1 + width); CHECK_RANGE(src_y1 + height); CHECK_RANGE(dst_x1 + width); CHECK_RANGE(dst_y1 + height); CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch); br13_bits = 0; if (blt_has_xy_src_copy(ibb->fd)) { switch (bpp) { case 8: break; case 16: /* supporting only RGB565, not ARGB1555 */ br13_bits |= 1 << 24; break; case 32: br13_bits |= 3 << 24; cmd_bits |= (XY_SRC_COPY_BLT_WRITE_ALPHA | XY_SRC_COPY_BLT_WRITE_RGB); break; default: igt_assert_f(0, "Unsupported pixel depth\n"); } } else { br13_bits = fast_copy_dword1(ibb->fd, src->tiling, dst->tiling, bpp); } if ((src->tiling | dst->tiling) >= I915_TILING_Y) { intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1)); intel_bb_out(ibb, BCS_SWCTRL); mask = (BCS_SRC_Y | BCS_DST_Y) << 16; if (src->tiling == I915_TILING_Y) mask |= BCS_SRC_Y; if (dst->tiling == I915_TILING_Y) mask |= BCS_DST_Y; intel_bb_out(ibb, mask); } intel_bb_add_intel_buf(ibb, src, false); intel_bb_add_intel_buf(ibb, dst, true); intel_bb_blit_start(ibb, cmd_bits); intel_bb_out(ibb, (br13_bits) | (0xcc << 16) | /* copy ROP */ dst_pitch); intel_bb_out(ibb, (dst_y1 << 16) | dst_x1); /* dst x1,y1 */ intel_bb_out(ibb, ((dst_y1 + height) << 16) | (dst_x1 + width)); /* dst x2,y2 */ intel_bb_emit_reloc_fenced(ibb, dst->handle, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0, dst->addr.offset); intel_bb_out(ibb, (src_y1 << 16) | src_x1); /* src x1,y1 */ intel_bb_out(ibb, src_pitch); intel_bb_emit_reloc_fenced(ibb, src->handle, I915_GEM_DOMAIN_RENDER, 0, 0, src->addr.offset); if (gen >= 6 && src->handle == dst->handle) { intel_bb_out(ibb, XY_SETUP_CLIP_BLT_CMD); intel_bb_out(ibb, 0); intel_bb_out(ibb, 0); } if ((src->tiling | dst->tiling) >= I915_TILING_Y) { igt_assert(ibb->gen >= 6); intel_bb_out(ibb, MI_FLUSH_DW_CMD | 2); intel_bb_out(ibb, 0); intel_bb_out(ibb, 0); intel_bb_out(ibb, 0); intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1)); intel_bb_out(ibb, BCS_SWCTRL); intel_bb_out(ibb, (BCS_SRC_Y | BCS_DST_Y) << 16); } } void intel_bb_blt_copy(struct intel_bb *ibb, struct intel_buf *src, int src_x1, int src_y1, int src_pitch, struct intel_buf *dst, int dst_x1, int dst_y1, int dst_pitch, int width, int height, int bpp) { intel_bb_emit_blt_copy(ibb, src, src_x1, src_y1, src_pitch, dst, dst_x1, dst_y1, dst_pitch, width, height, bpp); intel_bb_flush_blit(ibb); } /** * intel_bb_copy_intel_buf: * @batch: batchbuffer object * @src: source buffer (intel_buf) * @dst: destination libdrm buffer object * @size: size of the copy range in bytes * * Emits a copy operation using blitter commands into the supplied batch. * A total of @size bytes from the start of @src is copied * over to @dst. Note that @size must be page-aligned. */ void intel_bb_copy_intel_buf(struct intel_bb *ibb, struct intel_buf *src, struct intel_buf *dst, long int size) { igt_assert(size % 4096 == 0); intel_bb_blt_copy(ibb, src, 0, 0, 4096, dst, 0, 0, 4096, 4096/4, size/4096, 32); } /** * igt_get_huc_copyfunc: * @devid: pci device id * * Returns: * * The platform-specific huc copy function pointer for the device specified * with @devid. Will return NULL when no media spin function is implemented. */ igt_huc_copyfunc_t igt_get_huc_copyfunc(int devid) { igt_huc_copyfunc_t copy = NULL; if (IS_GEN12(devid) || IS_GEN11(devid) || IS_GEN9(devid)) copy = gen9_huc_copyfunc; return copy; } /** * intel_bb_track: * @do_tracking: bool * * Turn on (true) or off (false) tracking for intel_batchbuffers. */ void intel_bb_track(bool do_tracking) { if (intel_bb_do_tracking == do_tracking) return; if (intel_bb_do_tracking) { struct intel_bb *entry, *tmp; pthread_mutex_lock(&intel_bb_list_lock); igt_list_for_each_entry_safe(entry, tmp, &intel_bb_list, link) igt_list_del(&entry->link); pthread_mutex_unlock(&intel_bb_list_lock); } intel_bb_do_tracking = do_tracking; } static void __intel_bb_reinit_alloc(struct intel_bb *ibb) { if (ibb->allocator_type == INTEL_ALLOCATOR_NONE) return; ibb->allocator_handle = intel_allocator_open_full(ibb->fd, ibb->ctx, ibb->allocator_start, ibb->allocator_end, ibb->allocator_type, ibb->allocator_strategy, ibb->alignment); intel_bb_reset(ibb, true); } /** * intel_bb_reinit_allocator: * * Reinit allocator and get offsets in tracked intel_batchbuffers. */ void intel_bb_reinit_allocator(void) { struct intel_bb *iter; if (!intel_bb_do_tracking) return; pthread_mutex_lock(&intel_bb_list_lock); igt_list_for_each_entry(iter, &intel_bb_list, link) __intel_bb_reinit_alloc(iter); pthread_mutex_unlock(&intel_bb_list_lock); }