diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-13 14:37:45 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-08-19 21:29:21 +0100 |
commit | 88c02bab75c746f31862367f1c2586fbea8534a1 (patch) | |
tree | a93cd434e670f631bf83579b23686feea48830cd | |
parent | 3452b36a2b1dea0c04437b7cb18a428454e93043 (diff) |
fmbt: First experiments
34 files changed, 4383 insertions, 0 deletions
diff --git a/fmbt/Makefile b/fmbt/Makefile new file mode 100644 index 00000000..8ac7a528 --- /dev/null +++ b/fmbt/Makefile @@ -0,0 +1,50 @@ +CXXFLAGS := -Wall -I/usr/local/include/fmbt -fPIC -g + +all: drm.log gem.log + +libi915.so: \ + i915/i915_batch.o \ + i915/i915_context.o \ + i915/i915_driver.o \ + i915/i915_engine.o \ + i915/i915_image.o \ + i915/i915_object.o \ + i915/i915_userptr.o \ + drm_device.o \ + gem_driver.o \ + gem_object.o + $(CXX) -shared -o $@ $^ + +drm.log: drm.conf test_drm_device.so + fmbt -l$@ $< + +test_drm_device.so: drm_device.o test_drm_device.o + $(CXX) -shared -o $@ $^ + +gem.log: gem.conf test_gem_driver.so + fmbt -l$@ $< + +test_gem_driver.so: gem_object.o gem_driver.o drm_device.o test_gem_driver.o + $(CXX) -shared -o $@ $^ + +i915.log: i915.conf test_i915_gem_coherency.so + LD_LIBRARY_PATH=. fmbt -l$@ $< + +test_i915_gem_coherency.so: test_i915_gem_coherency.o libi915.so + $(CXX) -shared -o $@ $^ + +simple-i915.log: simple-i915.conf simple_i915_gem_coherency.so + LD_LIBRARY_PATH=. fmbt -l$@ $< + +simple_i915_gem_coherency.so: simple_i915_gem_coherency.o libi915.so + $(CXX) -shared -o $@ $^ + +unittest_i915_driver: i915/unittest_i915_driver.o libi915.so + $(CXX) -o $@ $^ + +.PRECIOUS: %.cc +%.cc: %.cc.aal + fmbt-aalc -o $@ $< + +clean: + $(RM) *.o *.so i915/*.o test_drm_device.cc test_gem_driver.cc test_i915_gem_coherency.cc simple_i915_gem_coherency.cc *.log unittest_i915_driver diff --git a/fmbt/drm.conf b/fmbt/drm.conf new file mode 100644 index 00000000..64c710f8 --- /dev/null +++ b/fmbt/drm.conf @@ -0,0 +1,10 @@ +model = lib(test_drm_device) +adapter = lib(test_drm_device) +heuristic = lookahead(6) +coverage = perm(3) + +pass = noprogress(6) +pass = duration(1 sec) + +on_pass = exit(0) +on_fail = exit(1) diff --git a/fmbt/drm.h b/fmbt/drm.h new file mode 100644 index 00000000..0876ed62 --- /dev/null +++ b/fmbt/drm.h @@ -0,0 +1,368 @@ +/** + * \file drm.h + * Header for the Direct Rendering Manager + * + * \author Rickard E. (Rik) Faith <faith@valinux.com> + * + * \par Acknowledgments: + * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg. + */ + +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_H_ +#define _DRM_H_ + +#include <linux/types.h> +#include <asm/ioctl.h> + +#define __user + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ +#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ +#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ +#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ + +#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ +#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ +#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) +#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) +#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) + +typedef unsigned int drm_context_t; +typedef unsigned int drm_drawable_t; +typedef unsigned int drm_magic_t; + +/** + * Cliprect. + * + * \warning: If you change this structure, make sure you change + * XF86DRIClipRectRec in the server as well + * + * \note KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ +struct drm_clip_rect { + unsigned short x1; + unsigned short y1; + unsigned short x2; + unsigned short y2; +}; + +/** + * Drawable information. + */ +struct drm_drawable_info { + unsigned int num_rects; + struct drm_clip_rect *rects; +}; + +/** + * Texture region, + */ +struct drm_tex_region { + unsigned char next; + unsigned char prev; + unsigned char in_use; + unsigned char padding; + unsigned int age; +}; + +/** + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +}; + +/** + * DRM_IOCTL_VERSION ioctl argument type. + * + * \sa drmGetVersion(). + */ +struct drm_version { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel; /**< Patch level */ + __kernel_size_t name_len; /**< Length of name buffer */ + char __user *name; /**< Name of driver */ + __kernel_size_t date_len; /**< Length of date buffer */ + char __user *date; /**< User-space buffer to hold date */ + __kernel_size_t desc_len; /**< Length of desc buffer */ + char __user *desc; /**< User-space buffer to hold desc */ +}; + +enum drm_vblank_seq_type { + _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ + _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + /* bits 1-6 are reserved for high crtcs */ + _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, + _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ + _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ + _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ + _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ + _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ +}; +#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 + +#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) +#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ + _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) + +struct drm_wait_vblank_request { + enum drm_vblank_seq_type type; + unsigned int sequence; + unsigned long signal; +}; + +struct drm_wait_vblank_reply { + enum drm_vblank_seq_type type; + unsigned int sequence; + long tval_sec; + long tval_usec; +}; + +/** + * DRM_IOCTL_WAIT_VBLANK ioctl argument type. + * + * \sa drmWaitVBlank(). + */ +union drm_wait_vblank { + struct drm_wait_vblank_request request; + struct drm_wait_vblank_reply reply; +}; + +/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +struct drm_gem_close { + /** Handle of the object to be closed. */ + __u32 handle; + __u32 pad; +}; + +/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +struct drm_gem_flink { + /** Handle for the object being named */ + __u32 handle; + + /** Returned global name */ + __u32 name; +}; + +/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +struct drm_gem_open { + /** Name of object being opened */ + __u32 name; + + /** Returned handle for the object */ + __u32 handle; + + /** Returned size of the object */ + __u64 size; +}; + +#define DRM_CAP_DUMB_BUFFER 0x1 +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +#define DRM_CAP_PRIME 0x5 +#define DRM_PRIME_CAP_IMPORT 0x1 +#define DRM_PRIME_CAP_EXPORT 0x2 +#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 +/* + * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight + * combination for the hardware cursor. The intention is that a hardware + * agnostic userspace can query a cursor plane size to use. + * + * Note that the cross-driver contract is to merely return a valid size; + * drivers are free to attach another meaning on top, eg. i915 returns the + * maximum plane size. + */ +#define DRM_CAP_CURSOR_WIDTH 0x8 +#define DRM_CAP_CURSOR_HEIGHT 0x9 +#define DRM_CAP_ADDFB2_MODIFIERS 0x10 +#define DRM_CAP_PAGE_FLIP_TARGET 0x11 +#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +#define DRM_CAP_SYNCOBJ 0x13 + +/** DRM_IOCTL_GET_CAP ioctl argument type */ +struct drm_get_cap { + __u64 capability; + __u64 value; +}; + +/** + * DRM_CLIENT_CAP_STEREO_3D + * + * if set to 1, the DRM core will expose the stereo 3D capabilities of the + * monitor by advertising the supported 3D layouts in the flags of struct + * drm_mode_modeinfo. + */ +#define DRM_CLIENT_CAP_STEREO_3D 1 + +/** + * DRM_CLIENT_CAP_UNIVERSAL_PLANES + * + * If set to 1, the DRM core will expose all planes (overlay, primary, and + * cursor) to userspace. + */ +#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 + +/** + * DRM_CLIENT_CAP_ATOMIC + * + * If set to 1, the DRM core will expose atomic properties to userspace + */ +#define DRM_CLIENT_CAP_ATOMIC 3 + +/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +struct drm_set_client_cap { + __u64 capability; + __u64 value; +}; + +#define DRM_RDWR O_RDWR +#define DRM_CLOEXEC O_CLOEXEC +struct drm_prime_handle { + __u32 handle; + + /** Flags.. only applicable for handle->fd */ + __u32 flags; + + /** Returned dmabuf file descriptor */ + __s32 fd; +}; + +struct drm_syncobj_create { + __u32 handle; + __u32 flags; +}; + +struct drm_syncobj_destroy { + __u32 handle; + __u32 pad; +}; + +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +struct drm_syncobj_handle { + __u32 handle; + __u32 flags; + + __s32 fd; + __u32 pad; +}; + +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +struct drm_syncobj_wait { + __u64 handles; + __s64 timeout_nsec; /* absolute timeout */ + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + +#define DRM_IOCTL_BASE 'd' +#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) +#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) +#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) +#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) + +#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) +#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) +#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) +#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) +#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) +#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) +#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) +#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) +#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) +#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) + +#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) +#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) + +#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) + +#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) + +#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) +#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) +#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) + +#define DRM_COMMAND_BASE 0x40 +#define DRM_COMMAND_END 0xA0 + +/** + * Header for events written back to userspace on the drm fd. The + * type defines the type of event, the length specifies the total + * length of the event (including the header), and user_data is + * typically a 64 bit value passed with the ioctl that triggered the + * event. A read on the drm fd will always only return complete + * events, that is, if for example the read buffer is 100 bytes, and + * there are two 64 byte events pending, only one will be returned. + * + * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and + * up are chipset specific. + */ +struct drm_event { + __u32 type; + __u32 length; +}; + +#define DRM_EVENT_VBLANK 0x01 +#define DRM_EVENT_FLIP_COMPLETE 0x02 + +struct drm_event_vblank { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 sequence; + __u32 crtc_id; /* 0 on older kernels that do not support this */ +}; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/fmbt/drm_device.cc b/fmbt/drm_device.cc new file mode 100644 index 00000000..4219f72b --- /dev/null +++ b/fmbt/drm_device.cc @@ -0,0 +1,80 @@ +#include <fcntl.h> +#include <unistd.h> +#include <asm/ioctl.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <string.h> +#include <stdio.h> + +#include "drm_device.h" + +struct l_drm_version { + int version_major; + int version_minor; + int version_patchlevel; + size_t name_len; + char *name; + size_t date_len; + char *date; + size_t desc_len; + char *desc; +}; + +#define DRM_IOCTL_VERSION _IOWR('d', 0x00, struct l_drm_version) + +static int __open(const char *path, unsigned int flags) +{ + return open(path, flags); +} + +static int __close(int fd) +{ + return close(fd); +} + +drm_device::drm_device() { + fd = -1; +} + +int drm_device::open(const char *path) { + if (fd != -1) + return -EBUSY; + + fd = __open(path, O_RDWR); + if (fd < 0) + return -errno; + + /* for valgrind */ + memset(name, 0, sizeof(name)); + + struct l_drm_version v; + memset(&v, 0, sizeof(v)); + v.name_len = sizeof(name); + v.name = name; + if (ioctl(fd, DRM_IOCTL_VERSION, &v)) { + int err = -errno; + __close(fd); + fd = -1; + return err; + } + + version.major = v.version_major; + version.minor = v.version_minor; + version.patch = v.version_patchlevel; + + //printf("Opened %s v%d.%d.%d on %s\n", name, v.version_major, v.version_minor, v.version_patchlevel, path); + return 0; +} + +int drm_device::close(void) { + if (fd < 0) + return -EBADF; + + int __fd = fd; + fd = -1; + return __close(__fd); +} + +drm_device::~drm_device() { + close(); +} diff --git a/fmbt/drm_device.h b/fmbt/drm_device.h new file mode 100644 index 00000000..21fa1533 --- /dev/null +++ b/fmbt/drm_device.h @@ -0,0 +1,21 @@ +#ifndef DRM_DEVICE_H +#define DRM_DEVICE_H + +#include <errno.h> + +class drm_device { +public: + drm_device(); + ~drm_device(); + int open(const char *path); + int close(void); + + int fd; + + char name[16]; + struct { + int major, minor, patch; + } version; +}; + +#endif diff --git a/fmbt/gem.conf b/fmbt/gem.conf new file mode 100644 index 00000000..eb88e449 --- /dev/null +++ b/fmbt/gem.conf @@ -0,0 +1,9 @@ +model = lib(test_gem_driver) +adapter = lib(test_gem_driver) +heuristic = lookahead +coverage = perm(3) + +pass = duration(1 sec) + +on_pass = exit(0) +on_fail = exit(1) diff --git a/fmbt/gem_driver.cc b/fmbt/gem_driver.cc new file mode 100644 index 00000000..afd87d04 --- /dev/null +++ b/fmbt/gem_driver.cc @@ -0,0 +1,35 @@ +#include <iterator> + +#include <unistd.h> + +#include "gem_driver.h" + +gem_driver::gem_driver(drm_device &dev) { + _fd = dup(dev.fd); +} + +class dummy_driver : public gem_driver { +public: + gem_object *create_object(uint64_t size); + dummy_driver(drm_device &dev); +private: +}; + +dummy_driver::dummy_driver(drm_device &dev) : gem_driver(dev) { +} + +gem_object *dummy_driver::create_object(uint64_t sz) { + return 0; +} + +gem_driver *gem_driver_factory::create_driver(drm_device &dev) { + if (dev.fd < 0) + return 0; + + std::map<std::string, gem_driver_create_fn>::iterator + it = map.find(dev.name); + if (it != map.end()) + return it->second(dev); + + return new dummy_driver(dev); +} diff --git a/fmbt/gem_driver.h b/fmbt/gem_driver.h new file mode 100644 index 00000000..9fe410c5 --- /dev/null +++ b/fmbt/gem_driver.h @@ -0,0 +1,81 @@ +#ifndef GEM_DRIVER_H +#define GEM_DRIVER_H + +#include <stdint.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <errno.h> + +#include <stdio.h> + +#include <map> +#include <string> + +#include "drm_device.h" +#include "gem_object.h" + +static inline long __sys_ioctl_32(int fd, unsigned long cmd, void *arg) +{ + long ret = ioctl(fd, cmd, arg); + if (ret == -1) + ret = -errno; + return ret; +} + +class gem_driver { + int _fd; +public: + virtual gem_object *create_object(uint64_t size) = 0; + gem_driver(drm_device &dev); + + inline long __ioctl(unsigned long cmd, void *arg) { + long ret; + +#if defined(__linux__) && defined(__GNUC__) && defined (__x86_64__) + __asm__ __volatile__ + ("syscall" + : "=a" (ret) + : "0" (__NR_ioctl), "D" (_fd), "S" (cmd), "d" (arg) + : "cc", "rcx", "r11", "memory"); +#else + ret = __sys_ioctl_32(_fd, cmd, arg); +#endif + + return ret; + } + + long ioctl(unsigned long cmd, void *arg) { + long ret; + do { + ret = __ioctl(cmd, arg); + } while (ret == -EINTR || ret == -EAGAIN); + + return ret; + } + + int fd() { return _fd; } +}; + +typedef gem_driver *(*gem_driver_create_fn)(drm_device &); + +class gem_driver_factory { +private: + gem_driver_factory() {} + gem_driver_factory(const gem_driver_factory &) {} + gem_driver_factory &operator=(const gem_driver_factory *) { return *this; } + std::map<std::string, gem_driver_create_fn> map; +public: + ~gem_driver_factory() { map.clear(); } + + static gem_driver_factory *get() { + static gem_driver_factory singleton; + return &singleton; + } + void add(const std::string &name, gem_driver_create_fn fn) { + map[name] = fn; + } + + gem_driver *create_driver(drm_device &drv); +}; + +#endif diff --git a/fmbt/gem_object.cc b/fmbt/gem_object.cc new file mode 100644 index 00000000..ead54a35 --- /dev/null +++ b/fmbt/gem_object.cc @@ -0,0 +1,19 @@ +#include <asm/ioctl.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <string.h> + +#include "gem_object.h" +#include "gem_driver.h" + +#include "drm.h" + +gem_object::~gem_object() { + if (!_handle) + return; + + struct drm_gem_close arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + _driver->ioctl(DRM_IOCTL_GEM_CLOSE, &arg); +} diff --git a/fmbt/gem_object.h b/fmbt/gem_object.h new file mode 100644 index 00000000..bd788c96 --- /dev/null +++ b/fmbt/gem_object.h @@ -0,0 +1,23 @@ +#ifndef GEM_OBJECT_H +#define GEM_OBJECT_H + +#include <stdint.h> +#include <stdio.h> + +class gem_driver; + +class gem_object { +public: + gem_object(gem_driver *driver, uint64_t size, uint32_t handle) { + _driver = driver, + _size = size; + _handle = handle; + } + ~gem_object(); + + uint64_t _size; + uint32_t _handle; + gem_driver *_driver; +}; + +#endif /* GEM_OBJECT_H */ diff --git a/fmbt/i915.conf b/fmbt/i915.conf new file mode 100644 index 00000000..8aa08b8e --- /dev/null +++ b/fmbt/i915.conf @@ -0,0 +1,9 @@ +model = lib(test_i915_gem_coherency) +adapter = lib(test_i915_gem_coherency) +heuristic = lookahead +coverage = perm(100) + +pass = duration(120 sec) + +on_pass = exit(0) +on_fail = exit(1) diff --git a/fmbt/i915/drm.h b/fmbt/i915/drm.h new file mode 100644 index 00000000..0876ed62 --- /dev/null +++ b/fmbt/i915/drm.h @@ -0,0 +1,368 @@ +/** + * \file drm.h + * Header for the Direct Rendering Manager + * + * \author Rickard E. (Rik) Faith <faith@valinux.com> + * + * \par Acknowledgments: + * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg. + */ + +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_H_ +#define _DRM_H_ + +#include <linux/types.h> +#include <asm/ioctl.h> + +#define __user + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ +#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ +#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ +#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ + +#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ +#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ +#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) +#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) +#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) + +typedef unsigned int drm_context_t; +typedef unsigned int drm_drawable_t; +typedef unsigned int drm_magic_t; + +/** + * Cliprect. + * + * \warning: If you change this structure, make sure you change + * XF86DRIClipRectRec in the server as well + * + * \note KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ +struct drm_clip_rect { + unsigned short x1; + unsigned short y1; + unsigned short x2; + unsigned short y2; +}; + +/** + * Drawable information. + */ +struct drm_drawable_info { + unsigned int num_rects; + struct drm_clip_rect *rects; +}; + +/** + * Texture region, + */ +struct drm_tex_region { + unsigned char next; + unsigned char prev; + unsigned char in_use; + unsigned char padding; + unsigned int age; +}; + +/** + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +}; + +/** + * DRM_IOCTL_VERSION ioctl argument type. + * + * \sa drmGetVersion(). + */ +struct drm_version { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel; /**< Patch level */ + __kernel_size_t name_len; /**< Length of name buffer */ + char __user *name; /**< Name of driver */ + __kernel_size_t date_len; /**< Length of date buffer */ + char __user *date; /**< User-space buffer to hold date */ + __kernel_size_t desc_len; /**< Length of desc buffer */ + char __user *desc; /**< User-space buffer to hold desc */ +}; + +enum drm_vblank_seq_type { + _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ + _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + /* bits 1-6 are reserved for high crtcs */ + _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, + _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ + _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ + _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ + _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ + _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ +}; +#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 + +#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) +#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ + _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) + +struct drm_wait_vblank_request { + enum drm_vblank_seq_type type; + unsigned int sequence; + unsigned long signal; +}; + +struct drm_wait_vblank_reply { + enum drm_vblank_seq_type type; + unsigned int sequence; + long tval_sec; + long tval_usec; +}; + +/** + * DRM_IOCTL_WAIT_VBLANK ioctl argument type. + * + * \sa drmWaitVBlank(). + */ +union drm_wait_vblank { + struct drm_wait_vblank_request request; + struct drm_wait_vblank_reply reply; +}; + +/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +struct drm_gem_close { + /** Handle of the object to be closed. */ + __u32 handle; + __u32 pad; +}; + +/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +struct drm_gem_flink { + /** Handle for the object being named */ + __u32 handle; + + /** Returned global name */ + __u32 name; +}; + +/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +struct drm_gem_open { + /** Name of object being opened */ + __u32 name; + + /** Returned handle for the object */ + __u32 handle; + + /** Returned size of the object */ + __u64 size; +}; + +#define DRM_CAP_DUMB_BUFFER 0x1 +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +#define DRM_CAP_PRIME 0x5 +#define DRM_PRIME_CAP_IMPORT 0x1 +#define DRM_PRIME_CAP_EXPORT 0x2 +#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 +/* + * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight + * combination for the hardware cursor. The intention is that a hardware + * agnostic userspace can query a cursor plane size to use. + * + * Note that the cross-driver contract is to merely return a valid size; + * drivers are free to attach another meaning on top, eg. i915 returns the + * maximum plane size. + */ +#define DRM_CAP_CURSOR_WIDTH 0x8 +#define DRM_CAP_CURSOR_HEIGHT 0x9 +#define DRM_CAP_ADDFB2_MODIFIERS 0x10 +#define DRM_CAP_PAGE_FLIP_TARGET 0x11 +#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +#define DRM_CAP_SYNCOBJ 0x13 + +/** DRM_IOCTL_GET_CAP ioctl argument type */ +struct drm_get_cap { + __u64 capability; + __u64 value; +}; + +/** + * DRM_CLIENT_CAP_STEREO_3D + * + * if set to 1, the DRM core will expose the stereo 3D capabilities of the + * monitor by advertising the supported 3D layouts in the flags of struct + * drm_mode_modeinfo. + */ +#define DRM_CLIENT_CAP_STEREO_3D 1 + +/** + * DRM_CLIENT_CAP_UNIVERSAL_PLANES + * + * If set to 1, the DRM core will expose all planes (overlay, primary, and + * cursor) to userspace. + */ +#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 + +/** + * DRM_CLIENT_CAP_ATOMIC + * + * If set to 1, the DRM core will expose atomic properties to userspace + */ +#define DRM_CLIENT_CAP_ATOMIC 3 + +/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +struct drm_set_client_cap { + __u64 capability; + __u64 value; +}; + +#define DRM_RDWR O_RDWR +#define DRM_CLOEXEC O_CLOEXEC +struct drm_prime_handle { + __u32 handle; + + /** Flags.. only applicable for handle->fd */ + __u32 flags; + + /** Returned dmabuf file descriptor */ + __s32 fd; +}; + +struct drm_syncobj_create { + __u32 handle; + __u32 flags; +}; + +struct drm_syncobj_destroy { + __u32 handle; + __u32 pad; +}; + +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +struct drm_syncobj_handle { + __u32 handle; + __u32 flags; + + __s32 fd; + __u32 pad; +}; + +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +struct drm_syncobj_wait { + __u64 handles; + __s64 timeout_nsec; /* absolute timeout */ + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + +#define DRM_IOCTL_BASE 'd' +#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) +#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) +#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) +#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) + +#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) +#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) +#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) +#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) +#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) +#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) +#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) +#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) +#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) +#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) + +#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) +#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) + +#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) + +#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) + +#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) +#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) +#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) + +#define DRM_COMMAND_BASE 0x40 +#define DRM_COMMAND_END 0xA0 + +/** + * Header for events written back to userspace on the drm fd. The + * type defines the type of event, the length specifies the total + * length of the event (including the header), and user_data is + * typically a 64 bit value passed with the ioctl that triggered the + * event. A read on the drm fd will always only return complete + * events, that is, if for example the read buffer is 100 bytes, and + * there are two 64 byte events pending, only one will be returned. + * + * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and + * up are chipset specific. + */ +struct drm_event { + __u32 type; + __u32 length; +}; + +#define DRM_EVENT_VBLANK 0x01 +#define DRM_EVENT_FLIP_COMPLETE 0x02 + +struct drm_event_vblank { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 sequence; + __u32 crtc_id; /* 0 on older kernels that do not support this */ +}; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/fmbt/i915/i915_batch.cc b/fmbt/i915/i915_batch.cc new file mode 100644 index 00000000..f1042b53 --- /dev/null +++ b/fmbt/i915/i915_batch.cc @@ -0,0 +1,165 @@ +#include "i915_batch.h" +#include "i915_context.h" +#include "i915_engine.h" +#include "i915_object.h" + +#include <string.h> + +namespace i915 { + batch::batch(context *ctx, engine *engine) { + _ctx = ctx; + _engine = engine; + _gen = ctx->drv()->gen(); + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.rsvd1 = ctx->id(); + execbuf.flags = engine->exec_id; + execbuf.flags |= I915_EXEC_HANDLE_LUT; + execbuf.flags |= I915_EXEC_NO_RELOC; + execbuf.flags |= I915_EXEC_BATCH_FIRST; + + _map = _end = _base = 0; + } + + uint32_t *batch::begin(unsigned int num_dwords) { + if (_map + num_dwords > _end) { + object *bo = static_cast<object *>(_ctx->drv()->create_object(4096)); + if (_map != _base) { + uint64_t addr = reloc(_map + 1, bo, _gen < 4, 0); + _map[0] = i915::MI_BATCH_BUFFER_START; + _map[1] = addr; + if (_gen >= 8) { + _map[2] = addr >> 32; + _map[0] |= 1 << 8 | 1; + } else if (_gen >= 6) { + _map[0] |= 1 << 8; + } else if (_gen >= 4) { + _map[0] |= 2 << 6; + } else { + _map[0] |= 2 << 6; + _map[1] |= 1; + } + + unsigned int idx = add_object(bb.bo); + exec[idx].relocs_ptr = (uint64_t)(uintptr_t)bb.relocs.data(); + exec[idx].relocation_count = bb.relocs.size(); + batches.push_back(bb); + + bb.relocs.clear(); + } + + if (_ctx->drv()->llc()) { + bo->set_domain(I915_GEM_DOMAIN_WC, true); + _base = static_cast<uint32_t *>(bo->map_wc()); + } else { + bo->set_domain(I915_GEM_DOMAIN_CPU, true); + _base = static_cast<uint32_t *>(bo->map_wb()); + } + _end = _base + 1000; + _map = _base; + + add_object(bo); + bb.bo = bo; + } + + uint32_t *ret = _map; + _map += num_dwords; + return ret; + } + + unsigned int batch::add_object(object *bo) { + std::map<object*, unsigned int>::iterator it = lut.find(bo); + if (it != lut.end()) + return it->second; + + lut[bo] = execbuf.buffer_count; + + drm_i915_gem_exec_object2 entry; + memset(&entry, 0, sizeof(entry)); + entry.handle = bo->_handle; + entry.offset = _ctx->get_offset(bo); + exec.push_back(entry); + + objects.push_back(bo); + + return execbuf.buffer_count++; + } + + uint64_t batch::reloc(uint32_t *pkt, + object *bo, uint32_t delta, + unsigned int flags) { + unsigned int idx = add_object(bo); + + if (flags) + exec[idx].flags |= flags; + + struct drm_i915_gem_relocation_entry reloc; + memset(&reloc, 0, sizeof(reloc)); + reloc.target_handle = idx; + reloc.offset = (pkt - _base) * sizeof(uint32_t); + reloc.presumed_offset = exec[idx].offset; + reloc.delta = delta + bo->reloc_offset(); + bb.relocs.push_back(reloc); + + return reloc.presumed_offset + reloc.delta; + } + + void batch::emit_MI_STORE_DWORD_IMM(object *bo, + uint64_t offset, + uint32_t value) { + uint32_t *pkt; + if (_gen >= 8) { + pkt = begin(4); + *pkt++ = i915::MI_STORE_DWORD_IMM | 2; + uint64_t r = reloc(pkt, bo, offset, EXEC_OBJECT_WRITE); + *pkt++ = r; + *pkt = r >> 32; + } else if (_gen >= 6) { + pkt = begin(4); + *pkt++ = i915::MI_STORE_DWORD_IMM | 2; + *pkt++ = 0; + *pkt = reloc(pkt, bo, offset, EXEC_OBJECT_WRITE); + } else if (_gen >= 4) { + pkt = begin(4); + *pkt++ = i915::MI_STORE_DWORD_IMM | 1 << 22 | 2; + *pkt++ = 0; + *pkt = reloc(pkt, bo, offset, EXEC_OBJECT_WRITE); + } else { + pkt = begin(3); + *pkt++ = i915::MI_STORE_DWORD_IMM | 1 << 22 | 1; + *pkt = reloc(pkt, bo, offset, EXEC_OBJECT_WRITE); + } + *++pkt = value; + } + + void batch::flush() { + if (_map == _base) + return; + + _map[0] = i915::MI_BATCH_BUFFER_END; + + unsigned int idx = add_object(bb.bo); + exec[idx].relocs_ptr = (uint64_t)(uintptr_t)bb.relocs.data(); + exec[idx].relocation_count = bb.relocs.size(); + + execbuf.buffers_ptr = (uint64_t)(uintptr_t)exec.data(); + _ctx->drv()->ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); + + for (uint32_t i = 0; i < execbuf.buffer_count; i++) + _ctx->set_offset(objects[i], exec[i].offset); + + delete bb.bo; + bb.relocs.clear(); + batches.clear(); + exec.clear(); + lut.clear(); + + execbuf.buffer_count = 0; + _map = _end = _base = 0; + } + + void batch::flush(object *bo) { + if (lut.find(bo) != lut.end()) + flush(); + } +} diff --git a/fmbt/i915/i915_batch.h b/fmbt/i915/i915_batch.h new file mode 100644 index 00000000..43053171 --- /dev/null +++ b/fmbt/i915/i915_batch.h @@ -0,0 +1,60 @@ +#ifndef I915_BATCH_H +#define I915_BATCH_H + +#include <vector> + +#include "i915_driver.h" +#include "i915_engine.h" +#include "i915_object.h" + +namespace i915 { + class context; + class object; + + struct batch_bo { + std::vector<drm_i915_gem_relocation_entry> relocs; + object *bo; + }; + +class batch { + std::vector<drm_i915_gem_exec_object2> exec; + std::vector<object *> objects; + std::map<object*, unsigned int> lut; + + struct drm_i915_gem_execbuffer2 execbuf; + + context *_ctx; + engine *_engine; + int _gen; + + struct batch_bo bb; + std::vector<batch_bo> batches; + + uint32_t *_base; + uint32_t *_map; + uint32_t *_end; + +public: + batch(context *ctx, engine *engine); + + uint32_t *begin(unsigned int num_dwords); + unsigned add_object(object *bo); + uint64_t reloc(uint32_t *pkt, + object *bo, uint32_t delta, + unsigned int flags=0); + + void emit_MI_STORE_DWORD_IMM(object *bo, + uint64_t offset, + uint32_t value); + + void emit_XY_SRC_FILL(object *bo, + uint32_t x1, uint32_t y1, + uint32_t x2, uint32_t y2, + uint32_t stride, uint32_t value) { } + + void flush(); + void flush(object *bo); +}; +} + +#endif /* I915_BATCH_H */ diff --git a/fmbt/i915/i915_context.cc b/fmbt/i915/i915_context.cc new file mode 100644 index 00000000..4c356fd1 --- /dev/null +++ b/fmbt/i915/i915_context.cc @@ -0,0 +1,38 @@ +#include "i915_context.h" +#include "i915_driver.h" +#include "i915_object.h" + +#include <string.h> + +namespace i915 { + uint64_t context::get_offset(object *bo) { + std::map<uint32_t, uint64_t>::iterator it = objects.find(bo->_handle); + if (it != objects.end()) + return it->second; + + uint64_t offset = last_offset; + last_offset += bo->_size; + last_offset &= gtt_size - 1; + + return objects[bo->_handle] = offset; + } + + void context::set_offset(object *bo, uint64_t offset) { + objects[bo->_handle] = offset; + } + + unsigned int context::reset_count() { + struct drm_i915_reset_stats arg; + memset(&arg, 0, sizeof(arg)); + arg.ctx_id = _id; + _drv->ioctl(DRM_IOCTL_I915_GET_RESET_STATS, &arg); + return arg.batch_active; + } + + context::~context() { + struct drm_i915_gem_context_create arg; + memset(&arg, 0, sizeof(arg)); + arg.ctx_id = _id; + _drv->ioctl(DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &arg); + } +} diff --git a/fmbt/i915/i915_context.h b/fmbt/i915/i915_context.h new file mode 100644 index 00000000..363aa617 --- /dev/null +++ b/fmbt/i915/i915_context.h @@ -0,0 +1,39 @@ +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + +#include <map> +#include <stdint.h> + +namespace i915 { +class driver; +class object; + +class context { + std::map<uint32_t, uint64_t> objects; + + uint32_t _id; + driver *_drv; + + uint64_t last_offset; + uint64_t gtt_size; + +public: + context(driver *drv, uint32_t id) { + _drv = drv; + _id = id; + last_offset = 4096; + gtt_size = 256 << 20; + } + ~context(); + + uint32_t id() { return _id; } + driver *drv() { return _drv; } + + uint64_t get_offset(object *bo); + void set_offset(object *bo, uint64_t offset); + + unsigned int reset_count(); +}; +} + +#endif /* I915_CONTEXT_H */ diff --git a/fmbt/i915/i915_driver.cc b/fmbt/i915/i915_driver.cc new file mode 100644 index 00000000..e35f1a0c --- /dev/null +++ b/fmbt/i915/i915_driver.cc @@ -0,0 +1,254 @@ +#include <string.h> +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> + +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/types.h> + +#include "i915_driver.h" +#include "i915_engine.h" +#include "i915_object.h" +#include "i915_userptr.h" + +static bool is_mountpoint(const char *path) +{ + char buf[strlen(path) + 4]; + struct stat st; + dev_t dev; + + snprintf(buf, sizeof(buf), "%s/.", path); + if (stat(buf, &st)) + return false; + + if (!S_ISDIR(st.st_mode)) + return false; + + dev = st.st_dev; + + snprintf(buf, sizeof(buf), "%s/..", path); + if (stat(buf, &st)) + return false; + + if (!S_ISDIR(st.st_mode)) + return false; + + return dev != st.st_dev; +} + +static const char *mount_debugfs(void) +{ + if (is_mountpoint("/sys/kernel/debug")) + return "/sys/kernel/debug"; + + if (is_mountpoint("/debug")) + return "/debug"; + + if (mount("debug", "/sys/kernel/debug", "debugfs", 0, 0)) + return NULL; + + return "/sys/kernel/debug"; +} + +static int open_debugfs(int device) +{ + struct stat st; + const char *mnt; + char path[200]; + int idx; + + if (fstat(device, &st) || !S_ISCHR(st.st_mode)) + return -1; + + mnt = mount_debugfs(); + if (!mnt) + return -1; + + idx = minor(st.st_rdev); + snprintf(path, sizeof(path), "%s/dri/%d/name", mnt, idx); + if (stat(path, &st)) + return -1; + + if (idx >= 64) { + int file, name_len, cmp_len; + char name[100], cmp[100]; + + file = open(path, O_RDONLY); + if (file < 0) + return -1; + + name_len = read(file, name, sizeof(name)); + close(file); + + for (idx = 0; idx < 16; idx++) { + snprintf(path, sizeof(path), "%s/dri/%d/name", + mnt, idx); + file = open(path, O_RDONLY); + if (file < 0) + return -1; + + cmp_len = read(file, cmp, sizeof(cmp)); + close(file); + + if (cmp_len == name_len && !memcmp(cmp, name, name_len)) + break; + } + + if (idx == 16) + return -1; + } + + snprintf(path, sizeof(path), "%s/dri/%d", mnt, idx); + return open(path, O_RDONLY); +} + +namespace i915 { + driver::driver(drm_device &dev) : gem_driver(dev), default_context(this, 0) { + _gen = 9; + + for (unsigned int i = 0; i < sizeof(engines)/sizeof(engines[0]); i++) + engines[i] = engine(this, i); + + struct drm_i915_getparam gp; + memset(&gp, 0, sizeof(gp)); + gp.param = I915_PARAM_HAS_LLC; + gp.value = (int *)&_llc; + ioctl(DRM_IOCTL_I915_GETPARAM, &gp); + + + _debugfs = open_debugfs(dev.fd); + } + + void driver::__force_gpu_reset(unsigned long value) { + int fd = openat(_debugfs, "i915_wedged", O_WRONLY); + if (fd != -1) { + char buf[128]; + int len = snprintf(buf, sizeof(buf), "0x%lx", value); + write(fd, buf, len+1); + close(fd); + } + } + + bool driver::has_gpu_reset() { + if (_debugfs == -1) + return false; + + int has_gpu_reset = 0; + struct drm_i915_getparam gp; + memset(&gp, 0, sizeof(gp)); + gp.param = I915_PARAM_HAS_GPU_RESET; + gp.value = (int *)&has_gpu_reset; + ioctl(DRM_IOCTL_I915_GETPARAM, &gp); + + return has_gpu_reset; + } + + void driver::__force_reclaim(unsigned long value) { + int fd = openat(_debugfs, "i915_gem_drop_caches", O_WRONLY); + if (fd != -1) { + char buf[128]; + int len = snprintf(buf, sizeof(buf), "0x%lx", value); + write(fd, buf, len+1); + close(fd); + } + } + + unsigned long driver::check_and_clear_missed_interrupts() { + if (_debugfs == -1) + return 0; + + char buf[128]; + int fd, len; + + unsigned long missed = 0; + fd = openat(_debugfs, "i915_ring_missed_irq", O_RDONLY); + if (fd == -1) + return 0; + + len = read(fd, buf, sizeof(buf)-1); + close(fd); + + buf[len] = '\0'; + missed = strtoul(buf, NULL, 16); + + if (missed) { + fd = openat(_debugfs, "i915_ring_missed_irq", O_WRONLY); + if (fd != -1) { + write(fd, "0", 2); + close(fd); + } + } + + return missed; + } + + gem_driver *driver::create(drm_device &dev) { + return new driver(dev); + } + + driver *driver::create(bool master) { + drm_device dev; + + if (!master) { + for (int i = 0; i < 16; i++) { + char path[256]; + snprintf(path, sizeof(path), "/dev/dri/renderD%d", i+128); + if (dev.open(path) == 0 && !strcmp(dev.name, "i915")) + return new driver(dev); + dev.close(); + } + } + + for (int i = 0; i < 16; i++) { + char path[256]; + snprintf(path, sizeof(path), "/dev/dri/card%d", i); + if (dev.open(path) == 0 && !strcmp(dev.name, "i915")) + return new driver(dev); + dev.close(); + } + + return 0; + } + + object *driver::create_userptr(void *data, unsigned long len, bool read_only) { + struct drm_i915_gem_userptr arg; + memset(&arg, 0, sizeof(arg)); + arg.user_ptr = (uintptr_t)data & -4096ull; + arg.user_size = ((uintptr_t)data + len + 4095) & -4096ull; + arg.user_size -= arg.user_ptr; + if (read_only) + arg.flags |= I915_USERPTR_READ_ONLY; + + if (ioctl(DRM_IOCTL_I915_GEM_USERPTR, &arg)) + return 0; + + return new userptr(this, arg.user_size, arg.handle, + (uintptr_t)data - arg.user_ptr); + } + + gem_object *driver::create_object(uint64_t sz) { + struct drm_i915_gem_create create; + memset(&create, 0, sizeof(create)); + create.size = (sz + 4095) & -4096; + if (ioctl(DRM_IOCTL_I915_GEM_CREATE, &create)) + return 0; + + return new i915::object(this, sz, create.handle); + } + + context *driver::create_context() { + struct drm_i915_gem_context_create arg; + memset(&arg, 0, sizeof(arg)); + if (ioctl(DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg)) + return 0; + + return new context(this, arg.ctx_id); + } +} + +__attribute__((constructor)) +static void init_i915_driver() { + gem_driver_factory::get()->add("i915", &i915::driver::create); +} diff --git a/fmbt/i915/i915_driver.h b/fmbt/i915/i915_driver.h new file mode 100644 index 00000000..15806f76 --- /dev/null +++ b/fmbt/i915/i915_driver.h @@ -0,0 +1,69 @@ +#ifndef I915_DRIVER_H +#define I915_DRIVER_H + +#include "../gem_driver.h" + +#include "i915_drm.h" +#include "i915_context.h" +#include "i915_engine.h" +#include "i915_tiling.h" + +namespace i915 { + enum MI_commands { + MI_BATCH_BUFFER_START = 0x31 << 23, + MI_BATCH_BUFFER_END = 0xa << 23, + MI_STORE_DWORD_IMM = 0x20 << 23, + }; + + enum XY_commands { + XY_SRC_COPY = 0x2 << 29 | 0x53 << 22, + }; + + class object; + + class driver : public gem_driver { + private: + int _gen; + bool _llc; + + int _debugfs; + + public: + driver(drm_device &dev); + gem_object *create_object(uint64_t size); + object *create_userptr(void *data, unsigned long len, bool read_only=false); + + int gen() { return _gen; } + int llc() { return _llc; } + int debugfs() { return _debugfs; } + + static driver *create(bool master=false); + static gem_driver *create(drm_device &dev); + + context *create_context(); + + engine engines[VECS+1]; + context default_context; + + bool has_gpu_reset(); + void __force_gpu_reset(unsigned long value); + void force_gpu_reset(engine &engine) { + __force_gpu_reset(1ul << engine.exec_id); + } + void force_gpu_reset(engine *engine) { + __force_gpu_reset(1ul << engine->exec_id); + } + void force_gpu_reset() { + __force_gpu_reset(-1ul); + } + + void __force_reclaim(unsigned long value); + void force_reclaim() { + __force_reclaim(-1ul); + } + + unsigned long check_and_clear_missed_interrupts(); + }; +} + +#endif /* I915_DRIVER_H */ diff --git a/fmbt/i915/i915_drm.h b/fmbt/i915/i915_drm.h new file mode 100644 index 00000000..608ce195 --- /dev/null +++ b/fmbt/i915/i915_drm.h @@ -0,0 +1,1419 @@ +/* + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _UAPI_I915_DRM_H_ +#define _UAPI_I915_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ + +/** + * DOC: uevents generated by i915 on it's device node + * + * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch + * event from the gpu l3 cache. Additional information supplied is ROW, + * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep + * track of these events and if a specific cache-line seems to have a + * persistent error remap it with the l3 remapping tool supplied in + * intel-gpu-tools. The value supplied with the event is always 1. + * + * I915_ERROR_UEVENT - Generated upon error detection, currently only via + * hangcheck. The error detection event is a good indicator of when things + * began to go badly. The value supplied with the event is a 1 upon error + * detection, and a 0 upon reset completion, signifying no more error + * exists. NOTE: Disabling hangcheck or reset via module parameter will + * cause the related events to not be seen. + * + * I915_RESET_UEVENT - Event is generated just before an attempt to reset the + * the GPU. The value supplied with the event is always 1. NOTE: Disable + * reset via module parameter will cause this event to not be seen. + */ +#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR" +#define I915_ERROR_UEVENT "ERROR" +#define I915_RESET_UEVENT "RESET" + +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + I915_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + I915_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + I915_MOCS_CACHED, +}; + +/** + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 + * + */ +#define I915_SAMPLE_QUEUED 0 +#define I915_SAMPLE_BUSY 1 +#define I915_SAMPLE_WAIT 2 +#define I915_SAMPLE_SEMA 3 + +#define I915_SAMPLE_RCS 0 +#define I915_SAMPLE_VCS 1 +#define I915_SAMPLE_BCS 2 +#define I915_SAMPLE_VECS 3 + +#define __I915_PMU_COUNT(ring, id) ((ring) << 2 | (id)) + +#define I915_PMU_COUNT_RCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY) +#define I915_PMU_COUNT_RCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT) +#define I915_PMU_COUNT_RCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA) + +#define I915_PMU_COUNT_VCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY) +#define I915_PMU_COUNT_VCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT) +#define I915_PMU_COUNT_VCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA) + +#define I915_PMU_COUNT_BCS_BUSY __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY) +#define I915_PMU_COUNT_BCS_WAIT __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT) +#define I915_PMU_COUNT_BCS_SEMA __I915_PMU_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA) + +#define I915_PMU_COUNT_VECS_BUSY __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY) +#define I915_PMU_COUNT_VECS_WAIT __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT) +#define I915_PMU_COUNT_VECS_SEMA __I915_PMU_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA) + +#define I915_PMU_ACTUAL_FREQUENCY 32 +#define I915_PMU_REQUESTED_FREQUENCY 33 +#define I915_PMU_ENERGY 34 +#define I915_PMU_INTERRUPTS 35 + +#define I915_PMU_RC6_RESIDENCY 40 +#define I915_PMU_RC6p_RESIDENCY 41 +#define I915_PMU_RC6pp_RESIDENCY 42 + +/* due to userspace building against these headers we need some compat here */ +#define planeA_x pipeA_x +#define planeA_y pipeA_y +#define planeA_w pipeA_w +#define planeA_h pipeA_h +#define planeB_x pipeB_x +#define planeB_y pipeB_y +#define planeB_w pipeB_w +#define planeB_h pipeB_h + +/* + * i915 specific ioctls. + * + * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie + * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset + * against DRM_COMMAND_BASE and should be between [0x0, 0x60). + */ +#define DRM_I915_INIT 0x00 +#define DRM_I915_FLUSH 0x01 +#define DRM_I915_FLIP 0x02 +#define DRM_I915_BATCHBUFFER 0x03 +#define DRM_I915_IRQ_EMIT 0x04 +#define DRM_I915_IRQ_WAIT 0x05 +#define DRM_I915_GETPARAM 0x06 +#define DRM_I915_SETPARAM 0x07 +#define DRM_I915_ALLOC 0x08 +#define DRM_I915_FREE 0x09 +#define DRM_I915_INIT_HEAP 0x0a +#define DRM_I915_CMDBUFFER 0x0b +#define DRM_I915_DESTROY_HEAP 0x0c +#define DRM_I915_SET_VBLANK_PIPE 0x0d +#define DRM_I915_GET_VBLANK_PIPE 0x0e +#define DRM_I915_VBLANK_SWAP 0x0f +#define DRM_I915_HWS_ADDR 0x11 +#define DRM_I915_GEM_INIT 0x13 +#define DRM_I915_GEM_EXECBUFFER 0x14 +#define DRM_I915_GEM_PIN 0x15 +#define DRM_I915_GEM_UNPIN 0x16 +#define DRM_I915_GEM_BUSY 0x17 +#define DRM_I915_GEM_THROTTLE 0x18 +#define DRM_I915_GEM_ENTERVT 0x19 +#define DRM_I915_GEM_LEAVEVT 0x1a +#define DRM_I915_GEM_CREATE 0x1b +#define DRM_I915_GEM_PREAD 0x1c +#define DRM_I915_GEM_PWRITE 0x1d +#define DRM_I915_GEM_MMAP 0x1e +#define DRM_I915_GEM_SET_DOMAIN 0x1f +#define DRM_I915_GEM_SW_FINISH 0x20 +#define DRM_I915_GEM_SET_TILING 0x21 +#define DRM_I915_GEM_GET_TILING 0x22 +#define DRM_I915_GEM_GET_APERTURE 0x23 +#define DRM_I915_GEM_MMAP_GTT 0x24 +#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 +#define DRM_I915_GEM_MADVISE 0x26 +#define DRM_I915_OVERLAY_PUT_IMAGE 0x27 +#define DRM_I915_OVERLAY_ATTRS 0x28 +#define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2 +#define DRM_I915_GET_SPRITE_COLORKEY 0x2a +#define DRM_I915_SET_SPRITE_COLORKEY 0x2b +#define DRM_I915_GEM_WAIT 0x2c +#define DRM_I915_GEM_CONTEXT_CREATE 0x2d +#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e +#define DRM_I915_GEM_SET_CACHING 0x2f +#define DRM_I915_GEM_GET_CACHING 0x30 +#define DRM_I915_REG_READ 0x31 +#define DRM_I915_GET_RESET_STATS 0x32 +#define DRM_I915_GEM_USERPTR 0x33 +#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 +#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 +#define DRM_I915_PERF_OPEN 0x36 +#define DRM_I915_PERF_ADD_CONFIG 0x37 +#define DRM_I915_PERF_REMOVE_CONFIG 0x38 + +#define DRM_IOCTL_I915_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, drm_i915_getparam_t) +#define DRM_IOCTL_I915_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SETPARAM, drm_i915_setparam_t) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) +#define DRM_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHING, struct drm_i915_gem_caching) +#define DRM_IOCTL_I915_GEM_GET_CACHING DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHING, struct drm_i915_gem_caching) +#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) +#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) +#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) +#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) +#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) +#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) +#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) +#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) +#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) +#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) +#define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create) +#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) +#define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) +#define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) +#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) +#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) +#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) +#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) +#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) +#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) + +/* Ioctl to query kernel params: + */ +#define I915_PARAM_IRQ_ACTIVE 1 +#define I915_PARAM_ALLOW_BATCHBUFFER 2 +#define I915_PARAM_LAST_DISPATCH 3 +#define I915_PARAM_CHIPSET_ID 4 +#define I915_PARAM_HAS_GEM 5 +#define I915_PARAM_NUM_FENCES_AVAIL 6 +#define I915_PARAM_HAS_OVERLAY 7 +#define I915_PARAM_HAS_PAGEFLIPPING 8 +#define I915_PARAM_HAS_EXECBUF2 9 +#define I915_PARAM_HAS_BSD 10 +#define I915_PARAM_HAS_BLT 11 +#define I915_PARAM_HAS_RELAXED_FENCING 12 +#define I915_PARAM_HAS_COHERENT_RINGS 13 +#define I915_PARAM_HAS_EXEC_CONSTANTS 14 +#define I915_PARAM_HAS_RELAXED_DELTA 15 +#define I915_PARAM_HAS_GEN7_SOL_RESET 16 +#define I915_PARAM_HAS_LLC 17 +#define I915_PARAM_HAS_ALIASING_PPGTT 18 +#define I915_PARAM_HAS_WAIT_TIMEOUT 19 +#define I915_PARAM_HAS_SEMAPHORES 20 +#define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 +#define I915_PARAM_HAS_VEBOX 22 +#define I915_PARAM_HAS_SECURE_BATCHES 23 +#define I915_PARAM_HAS_PINNED_BATCHES 24 +#define I915_PARAM_HAS_EXEC_NO_RELOC 25 +#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 +#define I915_PARAM_HAS_WT 27 +#define I915_PARAM_CMD_PARSER_VERSION 28 +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 +#define I915_PARAM_MMAP_VERSION 30 +#define I915_PARAM_HAS_BSD2 31 +#define I915_PARAM_REVISION 32 +#define I915_PARAM_SUBSLICE_TOTAL 33 +#define I915_PARAM_EU_TOTAL 34 +#define I915_PARAM_HAS_GPU_RESET 35 +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 +#define I915_PARAM_HAS_EXEC_SOFTPIN 37 +#define I915_PARAM_HAS_POOLED_EU 38 +#define I915_PARAM_MIN_EU_IN_POOL 39 +#define I915_PARAM_MMAP_GTT_VERSION 40 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution + * priorities and the driver will attempt to execute batches in priority order. + * The initial priority for each batch is supplied by the context and is + * controlled via I915_CONTEXT_PARAM_PRIORITY. + */ +#define I915_PARAM_HAS_SCHEDULER 41 +#define I915_PARAM_HUC_STATUS 42 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of + * synchronisation with implicit fencing on individual objects. + * See EXEC_OBJECT_ASYNC. + */ +#define I915_PARAM_HAS_EXEC_ASYNC 43 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. + */ +#define I915_PARAM_HAS_EXEC_FENCE 44 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture + * user specified bufffers for post-mortem debugging of GPU hangs. See + * EXEC_OBJECT_CAPTURE. + */ +#define I915_PARAM_HAS_EXEC_CAPTURE 45 + +#define I915_PARAM_SLICE_MASK 46 + +/* Assuming it's uniform for each slice, this queries the mask of subslices + * per-slice for this system. + */ +#define I915_PARAM_SUBSLICE_MASK 47 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer + * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. + */ +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel + * execution through use of explicit fence support. + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. + */ +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 50 + +typedef struct drm_i915_getparam { + __s32 param; + /* + * WARNING: Using pointers instead of fixed-size u64 means we need to write + * compat32 code. Don't repeat this mistake. + */ + int __user *value; +} drm_i915_getparam_t; + +/* Ioctl to set kernel params: + */ +#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 +#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 +#define I915_SETPARAM_ALLOW_BATCHBUFFER 3 +#define I915_SETPARAM_NUM_USED_FENCES 4 + +typedef struct drm_i915_setparam { + int param; + int value; +} drm_i915_setparam_t; + +/* A memory manager for regions of shared memory: + */ +#define I915_MEM_REGION_AGP 1 + +typedef struct drm_i915_mem_alloc { + int region; + int alignment; + int size; + int __user *region_offset; /* offset from start of fb or agp */ +} drm_i915_mem_alloc_t; + +typedef struct drm_i915_mem_free { + int region; + int region_offset; +} drm_i915_mem_free_t; + +typedef struct drm_i915_mem_init_heap { + int region; + int size; + int start; +} drm_i915_mem_init_heap_t; + +/* Allow memory manager to be torn down and re-initialized (eg on + * rotate): + */ +typedef struct drm_i915_mem_destroy_heap { + int region; +} drm_i915_mem_destroy_heap_t; + +/* Allow X server to configure which pipes to monitor for vblank signals + */ +#define DRM_I915_VBLANK_PIPE_A 1 +#define DRM_I915_VBLANK_PIPE_B 2 + +typedef struct drm_i915_vblank_pipe { + int pipe; +} drm_i915_vblank_pipe_t; + +/* Schedule buffer swap at given vertical blank: + */ +typedef struct drm_i915_vblank_swap { + drm_drawable_t drawable; + enum drm_vblank_seq_type seqtype; + unsigned int sequence; +} drm_i915_vblank_swap_t; + +typedef struct drm_i915_hws_addr { + __u64 addr; +} drm_i915_hws_addr_t; + +struct drm_i915_gem_init { + /** + * Beginning offset in the GTT to be managed by the DRM memory + * manager. + */ + __u64 gtt_start; + /** + * Ending offset in the GTT to be managed by the DRM memory + * manager. + */ + __u64 gtt_end; +}; + +struct drm_i915_gem_create { + /** + * Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + __u32 pad; +}; + +struct drm_i915_gem_pread { + /** Handle for the object being read. */ + __u32 handle; + __u32 pad; + /** Offset into the object to read from */ + __u64 offset; + /** Length of data to read */ + __u64 size; + /** + * Pointer to write the data into. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 data_ptr; +}; + +struct drm_i915_gem_pwrite { + /** Handle for the object being written to. */ + __u32 handle; + __u32 pad; + /** Offset into the object to write to */ + __u64 offset; + /** Length of data to write */ + __u64 size; + /** + * Pointer to read the data from. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 data_ptr; +}; + +struct drm_i915_gem_mmap { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** Offset in the object to map. */ + __u64 offset; + /** + * Length of data to map. + * + * The value will be page-aligned. + */ + __u64 size; + /** + * Returned pointer the data was mapped at. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 addr_ptr; + + /** + * Flags for extended behaviour. + * + * Added in version 2. + */ + __u64 flags; +#define I915_MMAP_WC 0x1 +}; + +struct drm_i915_gem_mmap_gtt { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; +}; + +struct drm_i915_gem_set_domain { + /** Handle for the object */ + __u32 handle; + + /** New read domains */ + __u32 read_domains; + + /** New write domain */ + __u32 write_domain; +}; + +struct drm_i915_gem_sw_finish { + /** Handle for the object */ + __u32 handle; +}; + +struct drm_i915_gem_relocation_entry { + /** + * Handle of the buffer being pointed to by this relocation entry. + * + * It's appealing to make this be an index into the mm_validate_entry + * list to refer to the buffer, but this allows the driver to create + * a relocation list for state buffers and not re-write it per + * exec using the buffer. + */ + __u32 target_handle; + + /** + * Value to be added to the offset of the target buffer to make up + * the relocation entry. + */ + __u32 delta; + + /** Offset in the buffer the relocation entry will be written into */ + __u64 offset; + + /** + * Offset value of the target buffer that the relocation entry was last + * written as. + * + * If the buffer has the same offset as last time, we can skip syncing + * and writing the relocation. This value is written back out by + * the execbuffer ioctl when the relocation is written. + */ + __u64 presumed_offset; + + /** + * Target memory domains read by this operation. + */ + __u32 read_domains; + + /** + * Target memory domains written by this operation. + * + * Note that only one domain may be written by the whole + * execbuffer operation, so that where there are conflicts, + * the application will get -EINVAL back. + */ + __u32 write_domain; +}; + +/** @{ + * Intel memory domains + * + * Most of these just align with the various caches in + * the system and are used to flush and invalidate as + * objects end up cached in different domains. + */ +/** CPU cache */ +#define I915_GEM_DOMAIN_CPU 0x00000001 +/** Render cache, used by 2D and 3D drawing */ +#define I915_GEM_DOMAIN_RENDER 0x00000002 +/** Sampler cache, used by texture engine */ +#define I915_GEM_DOMAIN_SAMPLER 0x00000004 +/** Command queue, used to load batch buffers */ +#define I915_GEM_DOMAIN_COMMAND 0x00000008 +/** Instruction cache, used by shader programs */ +#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 +/** Vertex address cache */ +#define I915_GEM_DOMAIN_VERTEX 0x00000020 +/** GTT domain - aperture and scanout */ +#define I915_GEM_DOMAIN_GTT 0x00000040 +/** WC domain - uncached access */ +#define I915_GEM_DOMAIN_WC 0x00000080 +/** @} */ + +struct drm_i915_gem_exec_object { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + __u64 offset; +}; + +struct drm_i915_gem_execbuffer { + /** + * List of buffers to be validated with their relocations to be + * performend on them. + * + * This is a pointer to an array of struct drm_i915_gem_validate_entry. + * + * These buffers must be listed in an order such that all relocations + * a buffer is performing refer to buffers that have already appeared + * in the validate list. + */ + __u64 buffers_ptr; + __u32 buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + __u32 batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + __u32 batch_len; + __u32 DR1; + __u32 DR4; + __u32 num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + __u64 cliprects_ptr; +}; + +struct drm_i915_gem_exec_object2 { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * When the EXEC_OBJECT_PINNED flag is specified this is populated by + * the user with the GTT offset at which this object will be pinned. + * When the I915_EXEC_NO_RELOC flag is specified this must contain the + * presumed_offset of the object. + * During execbuffer2 the kernel populates it with the value of the + * current GTT offset of the object, for future presumed_offset writes. + */ + __u64 offset; + +#define EXEC_OBJECT_NEEDS_FENCE (1<<0) +#define EXEC_OBJECT_NEEDS_GTT (1<<1) +#define EXEC_OBJECT_WRITE (1<<2) +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) +#define EXEC_OBJECT_PINNED (1<<4) +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5) +/* The kernel implicitly tracks GPU activity on all GEM objects, and + * synchronises operations with outstanding rendering. This includes + * rendering on other devices if exported via dma-buf. However, sometimes + * this tracking is too coarse and the user knows better. For example, + * if the object is split into non-overlapping ranges shared between different + * clients or engines (i.e. suballocating objects), the implicit tracking + * by kernel assumes that each operation affects the whole object rather + * than an individual range, causing needless synchronisation between clients. + * The kernel will also forgo any CPU cache flushes prior to rendering from + * the object as the client is expected to be also handling such domain + * tracking. + * + * The kernel maintains the implicit tracking in order to manage resources + * used by the GPU - this flag only disables the synchronisation prior to + * rendering with this object in this execbuf. + * + * Opting out of implicit synhronisation requires the user to do its own + * explicit tracking to avoid rendering corruption. See, for example, + * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. + */ +#define EXEC_OBJECT_ASYNC (1<<6) +/* Request that the contents of this execobject be copied into the error + * state upon a GPU hang involving this batch for post-mortem debugging. + * These buffers are recorded in no particular order as "user" in + * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see + * if the kernel supports this flag. + */ +#define EXEC_OBJECT_CAPTURE (1<<7) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1) + __u64 flags; + + union { + __u64 rsvd1; + __u64 pad_to_size; + }; + __u64 rsvd2; +}; + +struct drm_i915_gem_exec_fence { + /** + * User's handle for a drm_syncobj to wait on or signal. + */ + __u32 handle; + +#define I915_EXEC_FENCE_WAIT (1<<0) +#define I915_EXEC_FENCE_SIGNAL (1<<1) + __u32 flags; +}; + +struct drm_i915_gem_execbuffer2 { + /** + * List of gem_exec_object2 structs + */ + __u64 buffers_ptr; + __u32 buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + __u32 batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + __u32 batch_len; + __u32 DR1; + __u32 DR4; + __u32 num_cliprects; + /** + * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY + * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a + * struct drm_i915_gem_exec_fence *fences. + */ + __u64 cliprects_ptr; +#define I915_EXEC_RING_MASK (7<<0) +#define I915_EXEC_DEFAULT (0<<0) +#define I915_EXEC_RENDER (1<<0) +#define I915_EXEC_BSD (2<<0) +#define I915_EXEC_BLT (3<<0) +#define I915_EXEC_VEBOX (4<<0) + +/* Used for switching the constants addressing mode on gen4+ RENDER ring. + * Gen6+ only supports relative addressing to dynamic state (default) and + * absolute addressing. + * + * These flags are ignored for the BSD and BLT rings. + */ +#define I915_EXEC_CONSTANTS_MASK (3<<6) +#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ +#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) +#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ + __u64 flags; + __u64 rsvd1; /* now used for context info */ + __u64 rsvd2; +}; + +/** Resets the SO write offset registers for transform feedback on gen7. */ +#define I915_EXEC_GEN7_SOL_RESET (1<<8) + +/** Request a privileged ("secure") batch buffer. Note only available for + * DRM_ROOT_ONLY | DRM_MASTER processes. + */ +#define I915_EXEC_SECURE (1<<9) + +/** Inform the kernel that the batch is and will always be pinned. This + * negates the requirement for a workaround to be performed to avoid + * an incoherent CS (such as can be found on 830/845). If this flag is + * not passed, the kernel will endeavour to make sure the batch is + * coherent with the CS before execution. If this flag is passed, + * userspace assumes the responsibility for ensuring the same. + */ +#define I915_EXEC_IS_PINNED (1<<10) + +/** Provide a hint to the kernel that the command stream and auxiliary + * state buffers already holds the correct presumed addresses and so the + * relocation process may be skipped if no buffers need to be moved in + * preparation for the execbuffer. + */ +#define I915_EXEC_NO_RELOC (1<<11) + +/** Use the reloc.handle as an index into the exec object array rather + * than as the per-file handle. + */ +#define I915_EXEC_HANDLE_LUT (1<<12) + +/** Used for switching BSD rings on the platforms with two BSD rings */ +#define I915_EXEC_BSD_SHIFT (13) +#define I915_EXEC_BSD_MASK (3 << I915_EXEC_BSD_SHIFT) +/* default ping-pong mode */ +#define I915_EXEC_BSD_DEFAULT (0 << I915_EXEC_BSD_SHIFT) +#define I915_EXEC_BSD_RING1 (1 << I915_EXEC_BSD_SHIFT) +#define I915_EXEC_BSD_RING2 (2 << I915_EXEC_BSD_SHIFT) + +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<15) + +/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_IN (1<<16) + +/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd + * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given + * to the caller, and it should be close() after use. (The fd is a regular + * file descriptor and will be cleaned up on process termination. It holds + * a reference to the request, but nothing else.) + * + * The sync_file fd can be combined with other sync_file and passed either + * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip + * will only occur after this request completes), or to other devices. + * + * Using I915_EXEC_FENCE_OUT requires use of + * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written + * back to userspace. Failure to do so will cause the out-fence to always + * be reported as zero, and the real fence fd to be leaked. + */ +#define I915_EXEC_FENCE_OUT (1<<17) + +/* + * Traditionally the execbuf ioctl has only considered the final element in + * the execobject[] to be the executable batch. Often though, the client + * will known the batch object prior to construction and being able to place + * it into the execobject[] array first can simplify the relocation tracking. + * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the + * execobject[] as the * batch instead (the default is to use the last + * element). + */ +#define I915_EXEC_BATCH_FIRST (1<<18) + +/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr + * define an array of i915_gem_exec_fence structures which specify a set of + * dma fences to wait upon or signal. + */ +#define I915_EXEC_FENCE_ARRAY (1<<19) + +/* Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_SUBMIT (1<<20) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT<<1)) + +#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) +#define i915_execbuffer2_set_context_id(eb2, context) \ + (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK +#define i915_execbuffer2_get_context_id(eb2) \ + ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK) + +struct drm_i915_gem_pin { + /** Handle of the buffer to be pinned. */ + __u32 handle; + __u32 pad; + + /** alignment required within the aperture */ + __u64 alignment; + + /** Returned GTT offset of the buffer. */ + __u64 offset; +}; + +struct drm_i915_gem_unpin { + /** Handle of the buffer to be unpinned. */ + __u32 handle; + __u32 pad; +}; + +struct drm_i915_gem_busy { + /** Handle of the buffer to check for busy */ + __u32 handle; + + /** Return busy status + * + * A return of 0 implies that the object is idle (after + * having flushed any pending activity), and a non-zero return that + * the object is still in-flight on the GPU. (The GPU has not yet + * signaled completion for all pending requests that reference the + * object.) An object is guaranteed to become idle eventually (so + * long as no new GPU commands are executed upon it). Due to the + * asynchronous nature of the hardware, an object reported + * as busy may become idle before the ioctl is completed. + * + * Furthermore, if the object is busy, which engine is busy is only + * provided as a guide. There are race conditions which prevent the + * report of which engines are busy from being always accurate. + * However, the converse is not true. If the object is idle, the + * result of the ioctl, that all engines are idle, is accurate. + * + * The returned dword is split into two fields to indicate both + * the engines on which the object is being read, and the + * engine on which it is currently being written (if any). + * + * The low word (bits 0:15) indicate if the object is being written + * to by any engine (there can only be one, as the GEM implicit + * synchronisation rules force writes to be serialised). Only the + * engine for the last write is reported. + * + * The high word (bits 16:31) are a bitmask of which engines are + * currently reading from the object. Multiple engines may be + * reading from the object simultaneously. + * + * The value of each engine is the same as specified in the + * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc. + * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to + * the I915_EXEC_RENDER engine for execution, and so it is never + * reported as active itself. Some hardware may have parallel + * execution engines, e.g. multiple media engines, which are + * mapped to the same identifier in the EXECBUFFER2 ioctl and + * so are not separately reported for busyness. + * + * Caveat emptor: + * Only the boolean result of this query is reliable; that is whether + * the object is idle or busy. The report of which engines are busy + * should be only used as a heuristic. + */ + __u32 busy; +}; + +/** + * I915_CACHING_NONE + * + * GPU access is not coherent with cpu caches. Default for machines without an + * LLC. + */ +#define I915_CACHING_NONE 0 +/** + * I915_CACHING_CACHED + * + * GPU access is coherent with cpu caches and furthermore the data is cached in + * last-level caches shared between cpu cores and the gpu GT. Default on + * machines with HAS_LLC. + */ +#define I915_CACHING_CACHED 1 +/** + * I915_CACHING_DISPLAY + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no special + * cache mode (like write-through or gfdt flushing) is available. The kernel + * automatically sets this mode when using a buffer as a scanout target. + * Userspace can manually set this mode to avoid a costly stall and clflush in + * the hotpath of drawing the first frame. + */ +#define I915_CACHING_DISPLAY 2 + +struct drm_i915_gem_caching { + /** + * Handle of the buffer to set/get the caching level of. */ + __u32 handle; + + /** + * Cacheing level to apply or return value + * + * bits0-15 are for generic caching control (i.e. the above defined + * values). bits16-31 are reserved for platform-specific variations + * (e.g. l3$ caching on gen7). */ + __u32 caching; +}; + +#define I915_TILING_NONE 0 +#define I915_TILING_X 1 +#define I915_TILING_Y 2 +#define I915_TILING_LAST I915_TILING_Y + +#define I915_BIT_6_SWIZZLE_NONE 0 +#define I915_BIT_6_SWIZZLE_9 1 +#define I915_BIT_6_SWIZZLE_9_10 2 +#define I915_BIT_6_SWIZZLE_9_11 3 +#define I915_BIT_6_SWIZZLE_9_10_11 4 +/* Not seen by userland */ +#define I915_BIT_6_SWIZZLE_UNKNOWN 5 +/* Seen by userland. */ +#define I915_BIT_6_SWIZZLE_9_17 6 +#define I915_BIT_6_SWIZZLE_9_10_17 7 + +struct drm_i915_gem_set_tiling { + /** Handle of the buffer to have its tiling state updated */ + __u32 handle; + + /** + * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + * + * This value is to be set on request, and will be updated by the + * kernel on successful return with the actual chosen tiling layout. + * + * The tiling mode may be demoted to I915_TILING_NONE when the system + * has bit 6 swizzling that can't be managed correctly by GEM. + * + * Buffer contents become undefined when changing tiling_mode. + */ + __u32 tiling_mode; + + /** + * Stride in bytes for the object when in I915_TILING_X or + * I915_TILING_Y. + */ + __u32 stride; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + __u32 swizzle_mode; +}; + +struct drm_i915_gem_get_tiling { + /** Handle of the buffer to get tiling state for. */ + __u32 handle; + + /** + * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + */ + __u32 tiling_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + __u32 swizzle_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping whilst bound. + */ + __u32 phys_swizzle_mode; +}; + +struct drm_i915_gem_get_aperture { + /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ + __u64 aper_size; + + /** + * Available space in the aperture used by i915_gem_execbuffer, in + * bytes + */ + __u64 aper_available_size; +}; + +struct drm_i915_get_pipe_from_crtc_id { + /** ID of CRTC being requested **/ + __u32 crtc_id; + + /** pipe of requested CRTC **/ + __u32 pipe; +}; + +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 +#define __I915_MADV_PURGED 2 /* internal state */ + +struct drm_i915_gem_madvise { + /** Handle of the buffer to change the backing store advice */ + __u32 handle; + + /* Advice: either the buffer will be needed again in the near future, + * or wont be and could be discarded under memory pressure. + */ + __u32 madv; + + /** Whether the backing store still exists. */ + __u32 retained; +}; + +/* flags */ +#define I915_OVERLAY_TYPE_MASK 0xff +#define I915_OVERLAY_YUV_PLANAR 0x01 +#define I915_OVERLAY_YUV_PACKED 0x02 +#define I915_OVERLAY_RGB 0x03 + +#define I915_OVERLAY_DEPTH_MASK 0xff00 +#define I915_OVERLAY_RGB24 0x1000 +#define I915_OVERLAY_RGB16 0x2000 +#define I915_OVERLAY_RGB15 0x3000 +#define I915_OVERLAY_YUV422 0x0100 +#define I915_OVERLAY_YUV411 0x0200 +#define I915_OVERLAY_YUV420 0x0300 +#define I915_OVERLAY_YUV410 0x0400 + +#define I915_OVERLAY_SWAP_MASK 0xff0000 +#define I915_OVERLAY_NO_SWAP 0x000000 +#define I915_OVERLAY_UV_SWAP 0x010000 +#define I915_OVERLAY_Y_SWAP 0x020000 +#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000 + +#define I915_OVERLAY_FLAGS_MASK 0xff000000 +#define I915_OVERLAY_ENABLE 0x01000000 + +struct drm_intel_overlay_put_image { + /* various flags and src format description */ + __u32 flags; + /* source picture description */ + __u32 bo_handle; + /* stride values and offsets are in bytes, buffer relative */ + __u16 stride_Y; /* stride for packed formats */ + __u16 stride_UV; + __u32 offset_Y; /* offset for packet formats */ + __u32 offset_U; + __u32 offset_V; + /* in pixels */ + __u16 src_width; + __u16 src_height; + /* to compensate the scaling factors for partially covered surfaces */ + __u16 src_scan_width; + __u16 src_scan_height; + /* output crtc description */ + __u32 crtc_id; + __u16 dst_x; + __u16 dst_y; + __u16 dst_width; + __u16 dst_height; +}; + +/* flags */ +#define I915_OVERLAY_UPDATE_ATTRS (1<<0) +#define I915_OVERLAY_UPDATE_GAMMA (1<<1) +#define I915_OVERLAY_DISABLE_DEST_COLORKEY (1<<2) +struct drm_intel_overlay_attrs { + __u32 flags; + __u32 color_key; + __s32 brightness; + __u32 contrast; + __u32 saturation; + __u32 gamma0; + __u32 gamma1; + __u32 gamma2; + __u32 gamma3; + __u32 gamma4; + __u32 gamma5; +}; + +/* + * Intel sprite handling + * + * Color keying works with a min/mask/max tuple. Both source and destination + * color keying is allowed. + * + * Source keying: + * Sprite pixels within the min & max values, masked against the color channels + * specified in the mask field, will be transparent. All other pixels will + * be displayed on top of the primary plane. For RGB surfaces, only the min + * and mask fields will be used; ranged compares are not allowed. + * + * Destination keying: + * Primary plane pixels that match the min value, masked against the color + * channels specified in the mask field, will be replaced by corresponding + * pixels from the sprite plane. + * + * Note that source & destination keying are exclusive; only one can be + * active on a given plane. + */ + +#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */ +#define I915_SET_COLORKEY_DESTINATION (1<<1) +#define I915_SET_COLORKEY_SOURCE (1<<2) +struct drm_intel_sprite_colorkey { + __u32 plane_id; + __u32 min_value; + __u32 channel_mask; + __u32 max_value; + __u32 flags; +}; + +struct drm_i915_gem_wait { + /** Handle of BO we shall wait on */ + __u32 bo_handle; + __u32 flags; + /** Number of nanoseconds to wait, Returns time remaining. */ + __s64 timeout_ns; +}; + +struct drm_i915_gem_context_create { + /* output: id of new context*/ + __u32 ctx_id; + __u32 pad; +}; + +struct drm_i915_gem_context_destroy { + __u32 ctx_id; + __u32 pad; +}; + +struct drm_i915_reg_read { + /* + * Register offset. + * For 64bit wide registers where the upper 32bits don't immediately + * follow the lower 32bits, the offset of the lower 32bits must + * be specified + */ + __u64 offset; + __u64 val; /* Return value */ +}; +/* Known registers: + * + * Render engine timestamp - 0x2358 + 64bit - gen7+ + * - Note this register returns an invalid value if using the default + * single instruction 8byte read, in order to workaround that use + * offset (0x2538 | 1) instead. + * + */ + +struct drm_i915_reset_stats { + __u32 ctx_id; + __u32 flags; + + /* All resets since boot/module reload, for all contexts */ + __u32 reset_count; + + /* Number of batches lost when active in GPU, for this context */ + __u32 batch_active; + + /* Number of batches lost pending for execution, for this context */ + __u32 batch_pending; + + __u32 pad; +}; + +struct drm_i915_gem_userptr { + __u64 user_ptr; + __u64 user_size; + __u32 flags; +#define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; +}; + +struct drm_i915_gem_context_param { + __u32 ctx_id; + __u32 size; + __u64 param; +#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 +#define I915_CONTEXT_PARAM_GTT_SIZE 0x3 +#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 +#define I915_CONTEXT_PARAM_BANNABLE 0x5 +#define I915_CONTEXT_PARAM_PRIORITY 0x6 +#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ +#define I915_CONTEXT_DEFAULT_PRIORITY 0 +#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + __u64 value; +}; + +enum drm_i915_oa_format { + I915_OA_FORMAT_A13 = 1, /* HSW only */ + I915_OA_FORMAT_A29, /* HSW only */ + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8, /* HSW only */ + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ + I915_OA_FORMAT_C4_B8, /* HSW+ */ + + /* Gen8+ */ + I915_OA_FORMAT_A12, + I915_OA_FORMAT_A12_B8_C8, + I915_OA_FORMAT_A32u40_A4u32_B8_C8, + + I915_OA_FORMAT_MAX /* non-ABI */ +}; + +enum drm_i915_perf_property_id { + /** + * Open the stream for a specific context handle (as used with + * execbuffer2). A stream opened for a specific context this way + * won't typically require root privileges. + */ + DRM_I915_PERF_PROP_CTX_HANDLE = 1, + + /** + * A value of 1 requests the inclusion of raw OA unit reports as + * part of stream samples. + */ + DRM_I915_PERF_PROP_SAMPLE_OA, + + /** + * The value specifies which set of OA unit metrics should be + * be configured, defining the contents of any OA unit reports. + */ + DRM_I915_PERF_PROP_OA_METRICS_SET, + + /** + * The value specifies the size and layout of OA unit reports. + */ + DRM_I915_PERF_PROP_OA_FORMAT, + + /** + * Specifying this property implicitly requests periodic OA unit + * sampling and (at least on Haswell) the sampling frequency is derived + * from this exponent as follows: + * + * 80ns * 2^(period_exponent + 1) + */ + DRM_I915_PERF_PROP_OA_EXPONENT, + + DRM_I915_PERF_PROP_MAX /* non-ABI */ +}; + +struct drm_i915_perf_open_param { + __u32 flags; +#define I915_PERF_FLAG_FD_CLOEXEC (1<<0) +#define I915_PERF_FLAG_FD_NONBLOCK (1<<1) +#define I915_PERF_FLAG_DISABLED (1<<2) + + /** The number of u64 (id, value) pairs */ + __u32 num_properties; + + /** + * Pointer to array of u64 (id, value) pairs configuring the stream + * to open. + */ + __u64 properties_ptr; +}; + +/** + * Enable data capture for a stream that was either opened in a disabled state + * via I915_PERF_FLAG_DISABLED or was later disabled via + * I915_PERF_IOCTL_DISABLE. + * + * It is intended to be cheaper to disable and enable a stream than it may be + * to close and re-open a stream with the same configuration. + * + * It's undefined whether any pending data for the stream will be lost. + */ +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) + +/** + * Disable data capture for a stream. + * + * It is an error to try and read a stream that is disabled. + */ +#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) + +/** + * Common to all i915 perf records + */ +struct drm_i915_perf_record_header { + __u32 type; + __u16 pad; + __u16 size; +}; + +enum drm_i915_perf_record_type { + + /** + * Samples are the work horse record type whose contents are extensible + * and defined when opening an i915 perf stream based on the given + * properties. + * + * Boolean properties following the naming convention + * DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in + * every sample. + * + * The order of these sample properties given by userspace has no + * affect on the ordering of data within a sample. The order is + * documented here. + * + * struct { + * struct drm_i915_perf_record_header header; + * + * { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA + * }; + */ + DRM_I915_PERF_RECORD_SAMPLE = 1, + + /* + * Indicates that one or more OA reports were not written by the + * hardware. This can happen for example if an MI_REPORT_PERF_COUNT + * command collides with periodic sampling - which would be more likely + * at higher sampling frequencies. + */ + DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2, + + /** + * An error occurred that resulted in all pending OA reports being lost. + */ + DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, + + DRM_I915_PERF_RECORD_MAX /* non-ABI */ +}; + +/** + * Structure to upload perf dynamic configuration into the kernel. + */ +struct drm_i915_perf_oa_config { + /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + char uuid[36]; + + __u32 n_mux_regs; + __u32 n_boolean_regs; + __u32 n_flex_regs; + + __u64 __user mux_regs_ptr; + __u64 __user boolean_regs_ptr; + __u64 __user flex_regs_ptr; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _UAPI_I915_DRM_H_ */ diff --git a/fmbt/i915/i915_engine.cc b/fmbt/i915/i915_engine.cc new file mode 100644 index 00000000..80791930 --- /dev/null +++ b/fmbt/i915/i915_engine.cc @@ -0,0 +1,34 @@ +#include <string.h> +#include <stdint.h> + +#include "i915_driver.h" +#include "i915_engine.h" + +static unsigned int abi_map[] = { + [i915::RCS] = I915_EXEC_RENDER, + [i915::BCS] = I915_EXEC_BLT, + [i915::VCS0] = I915_EXEC_BSD, + [i915::VCS1] = I915_EXEC_BSD, + [i915::VECS] = I915_EXEC_VEBOX, +}; + +namespace i915 { + bool engine_exists(driver *drv, unsigned int exec_id) { + struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_exec_object2 exec; + + memset(&exec, 0, sizeof(exec)); + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uint64_t)(uintptr_t)&exec; + execbuf.buffer_count = 1; + execbuf.flags = exec_id; + + return drv->ioctl(DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf) == -ENOENT; + } + + engine::engine(driver *drv, unsigned int id) { + _id = id; + exec_id = abi_map[id]; + exists = engine_exists(drv, exec_id); + } +} diff --git a/fmbt/i915/i915_engine.h b/fmbt/i915/i915_engine.h new file mode 100644 index 00000000..3cd7cc23 --- /dev/null +++ b/fmbt/i915/i915_engine.h @@ -0,0 +1,37 @@ +#ifndef I915_ENGINE_H +#define I915_ENGINE_H + +#include "i915_tiling.h" + +namespace i915 { + class driver; + + enum engine_id { + RCS = 0, + RCS0 = RCS, + BCS, + BCS0 = BCS, + VCS, + VCS0 = VCS, + VCS1, + VECS, + VECS0 = VECS, + }; + +class engine { + unsigned int _id; +public: + engine() {} + engine(driver *drv, unsigned int id); + + unsigned int id() { return _id; } + unsigned int exec_id; + bool exists; + + bool can_mi_store_dword() { return true; } + bool can_XY_SRC_FILL(tiling t) { return 0&&_id == BCS; } +}; +} + +#endif /* I915_ENGINE_H */ + diff --git a/fmbt/i915/i915_image.cc b/fmbt/i915/i915_image.cc new file mode 100644 index 00000000..b164fbd6 --- /dev/null +++ b/fmbt/i915/i915_image.cc @@ -0,0 +1,78 @@ +#include "i915_image.h" + +namespace i915 { + +image::image(driver *drv, unsigned int width, unsigned int height, unsigned int stride) { + _gen = drv->gen(); + /* query swizzling */ + + _width = width; + _height = height; + + /* Pick a stride that will work with any tiling mode */ + if (stride) + _stride = stride; + else + _stride = ((width + 512/_cpp - 1) & -(512/_cpp)) * _cpp; + _tiling = i915::TILING_NONE; + + _size = _stride * height; + _pixels = new uint32_t[_size/_cpp] (); +} + +image::~image() { + delete [] _pixels; +} + +unsigned long image::linear_offset(unsigned int x, unsigned int y) { + switch (_tiling) { + default: + case TILING_NONE: + return y * _stride + x * _cpp; + + case TILING_X: { + unsigned int tile_y = y / _tile_height * _tile_height; + unsigned int tile_x = x / _tile_width * _tile_width; + return (tile_y * _stride + + tile_x * _tile_size + + ((y - tile_y) * _tile_width + + (x - tile_x)) * _cpp); + } + + case TILING_Y: + return 0; + } +} + +void image::set_tiling(i915::tiling tiling, unsigned int swizzle) { + _tiling = tiling; + _swizzle = swizzle; + + switch (tiling) { + default: + case TILING_NONE: + _tile_width = 1; + _tile_height = 1; + _tile_size = _cpp; + break; + + case TILING_X: + if (_gen == 2) { + _tile_height = 16; + _tile_width = 128 / _cpp; /* pixels */ + _tile_size = 2048; + } else { + _tile_height = 8; + _tile_width = 512 / _cpp; /* pixels */ + _tile_size = 4096; + } + break; + + case TILING_Y: + _tile_height = 16; + _tile_width = 64 / _cpp; /* pixels */ + _tile_size = 4096; + break; + } +} +} diff --git a/fmbt/i915/i915_image.h b/fmbt/i915/i915_image.h new file mode 100644 index 00000000..32032586 --- /dev/null +++ b/fmbt/i915/i915_image.h @@ -0,0 +1,62 @@ +#ifndef I915_IMAGE_H +#define I915_IMAGE_H + +#include <stdint.h> + +#include "i915_driver.h" + +namespace i915 { + class image { + public: + image(driver *drv, unsigned int width, unsigned int height, unsigned int stride=0); + ~image(); + + unsigned int width() { return _width; } + unsigned int height() { return _height; } + unsigned int stride() { return _stride; } + unsigned long size() { return _size; } + + void set_tiling(enum i915::tiling, unsigned int swizzle=0); + enum i915::tiling tiling() { return _tiling; } + unsigned int tile_width() { return _tile_width; } + unsigned int tile_height() { return _tile_height; } + + unsigned long linear_offset(unsigned int x, unsigned int y); + unsigned long tiled_offset(unsigned int x, unsigned int y) { + return y*_stride + x*4; + } + + uint32_t& operator [](std::size_t px) { + return _pixels[px]; + } + const uint32_t& operator [](std::size_t px) const { + return _pixels[px]; + } + + uint32_t *pixel(unsigned long px) { + return &_pixels[px]; + } + uint32_t *pixel (unsigned long x, unsigned long y) { + return &_pixels[tiled_offset(x, y)/4]; + } + + private: + unsigned int _gen; + + unsigned int _width; + unsigned int _height; + unsigned int _stride; + unsigned long _size; + static const unsigned int _cpp = 4; + + i915::tiling _tiling; + unsigned int _swizzle; + unsigned int _tile_width; + unsigned int _tile_height; + unsigned int _tile_size; + + uint32_t *_pixels; + }; +}; + +#endif diff --git a/fmbt/i915/i915_object.cc b/fmbt/i915/i915_object.cc new file mode 100644 index 00000000..fa9246fe --- /dev/null +++ b/fmbt/i915/i915_object.cc @@ -0,0 +1,223 @@ +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <sys/mman.h> + +#include "i915_object.h" +#include "i915_batch.h" +#include "i915_userptr.h" + +#ifndef min +#define min(x, y) ((x) <= (y) ? (x) : (y)) +#endif + +namespace i915 { + void *object::__map_wc() { + struct drm_i915_gem_mmap arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + arg.size = _size; + arg.flags = I915_MMAP_WC; + if (_driver->ioctl(DRM_IOCTL_I915_GEM_MMAP, &arg)) + return 0; + + return _map_wc = (void *)(uintptr_t)arg.addr_ptr; + } + + void *object::__map_wb() { + struct drm_i915_gem_mmap arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + arg.size = _size; + if (_driver->ioctl(DRM_IOCTL_I915_GEM_MMAP, &arg)) + return 0; + + return _map_wb = (void *)(uintptr_t)arg.addr_ptr; + } + + void *object::__map_gtt() { + struct drm_i915_gem_mmap_gtt arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + + void *ptr = MAP_FAILED; + if (_driver->ioctl(DRM_IOCTL_I915_GEM_MMAP_GTT, &arg) == 0) + ptr = mmap64(0, _size, PROT_WRITE, MAP_SHARED, + _driver->fd(), arg.offset); + return _map_gtt = ptr; + } + + int object::__set_tiling(enum tiling tiling, unsigned int stride) { + int err; + + do { + struct drm_i915_gem_set_tiling arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + arg.tiling_mode = tiling; + arg.stride = tiling ? stride : 0; + err = _driver->__ioctl(DRM_IOCTL_I915_GEM_SET_TILING, + &arg); + } while (err == -EINTR || err == -EAGAIN); + + return err; + } + + int object::__set_cache_level(unsigned int cache_level) { + struct drm_i915_gem_caching arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + arg.caching = cache_level; + int err = _driver->ioctl(DRM_IOCTL_I915_GEM_SET_CACHING, &arg); + if (err) + return err; + + cache_coherent = cache_level; + return 0; + } + unsigned int object::get_cache_level() { + struct drm_i915_gem_caching arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + _driver->ioctl(DRM_IOCTL_I915_GEM_GET_CACHING, &arg); + return arg.caching; + } + + int object::__set_domain(unsigned int domain, bool write) { + struct drm_i915_gem_set_domain arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + arg.read_domains = domain; + arg.write_domain = write ? domain : 0; + return _driver->ioctl(DRM_IOCTL_I915_GEM_SET_DOMAIN, &arg); + } + + int object::__write(uint64_t offset, void *data, unsigned long length) { + struct drm_i915_gem_pwrite arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + arg.offset = offset; + arg.size = length; + arg.data_ptr = (uintptr_t)data; + return _driver->ioctl(DRM_IOCTL_I915_GEM_PWRITE, &arg); + } + + int object::__read(uint64_t offset, void *data, unsigned long length) { + struct drm_i915_gem_pread arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + arg.offset = offset; + arg.size = length; + arg.data_ptr = (uintptr_t)data; + return _driver->ioctl(DRM_IOCTL_I915_GEM_PREAD, &arg); + } + + void dma_copy(driver *drv, + object *dst, uint64_t dst_offset, + object *src, uint64_t src_offset, + uint64_t length) { + batch b(&drv->default_context, + drv->engines[i915::BCS].exists ? &drv->engines[i915::BCS] : &drv->engines[i915::RCS]); + + int misaligned; + + int dst_x0 = 0; + misaligned = (dst_offset + dst->reloc_offset()) & 63; + if (misaligned) { + dst_offset -= misaligned; + dst_x0 = misaligned / 4; + } + + int src_x0 = 0; + misaligned = (src_offset + src->reloc_offset()) & 63; + if (misaligned) { + src_offset -= misaligned; + src_x0 = misaligned / 4; + } + + do { + unsigned int l = min(length, 4096 * 16384); + unsigned x1, y1; + if (l > 4096) { + l &= -4096; + x1 = 1024; + y1 = l / 4096 + 1; + } else { + x1 = l / 4; + y1 = 1; + } + length -= l; + + unsigned len = drv->gen() >= 8 ? 10 : 8; + uint32_t *pkt = b.begin(len); + *pkt++ = i915::XY_SRC_COPY | 3 << 20 | (len - 2); + *pkt++ = 3 << 24 | 0xcc << 16 | 4096; + *pkt++ = dst_x0; + *pkt++ = y1 << 16 | (dst_x0 + x1); + + { + uint64_t r = b.reloc(pkt, dst, dst_offset, EXEC_OBJECT_WRITE); + *pkt++ = r; + if (drv->gen() >= 8) + *pkt++ = r >> 32; + } + + *pkt++ = src_x0; + *pkt++ = 4096; + + { + uint64_t r = b.reloc(pkt, src, src_offset); + *pkt++ = r; + if (drv->gen() >= 8) + *pkt++ = r >> 32; + } + + src_offset += l; + dst_offset += l; + } while (length); + + b.flush(); + } + + void object::dma_set(uint64_t offset, void *data, unsigned long length) { + driver *drv = static_cast<driver*>(_driver); + object *user = drv->create_userptr(data, length); + dma_copy(drv, this, offset, user, 0, length); + user->wait(); + delete user; + } + + void object::dma_get(uint64_t offset, void *data, unsigned long length) { + driver *drv = static_cast<driver*>(_driver); + object *user = drv->create_userptr(data, length); + dma_copy(drv, user, 0, this, offset, length); + user->wait(); + delete user; + } + + int object::wait(unsigned int flags, int64_t timeout) { + struct drm_i915_gem_wait arg; + memset(&arg, 0, sizeof(arg)); + arg.bo_handle = _handle; + arg.timeout_ns = timeout; + arg.flags = flags; + return _driver->ioctl(DRM_IOCTL_I915_GEM_WAIT, &arg); + } + + unsigned int object::busy() { + struct drm_i915_gem_busy arg; + memset(&arg, 0, sizeof(arg)); + arg.handle = _handle; + _driver->ioctl(DRM_IOCTL_I915_GEM_BUSY, &arg); + return arg.busy; + } + + object::~object() { + if (_map_wc) + munmap(_map_wc, _size); + if (_map_wb) + munmap(_map_wb, _size); + if (_map_gtt) + munmap(_map_gtt, _size); + } +} diff --git a/fmbt/i915/i915_object.h b/fmbt/i915/i915_object.h new file mode 100644 index 00000000..6b930dbe --- /dev/null +++ b/fmbt/i915/i915_object.h @@ -0,0 +1,92 @@ +#ifndef I915_OBJECT_H +#define I915_OBJECT_H + +#include <stdint.h> +#include <assert.h> + +#include "i915_driver.h" +#include "../gem_object.h" + +namespace i915 { + +class driver; + +class object : public gem_object { + void *_map_wc; + void *_map_wb; + void *_map_gtt; + unsigned long _reloc_offset; +public: + object(driver *drv, uint64_t sz, uint32_t handle) : gem_object(drv, sz, handle), _map_wc(0), _map_wb(0), _map_gtt(0), _reloc_offset(0) { + cache_coherent = drv->llc(); + } + ~object(); + + int __set_tiling(enum tiling, unsigned int stride); + void set_tiling(enum tiling tiling, unsigned int stride) { + assert(__set_tiling(tiling, stride) == 0); + } + + int __set_cache_level(unsigned int cache_level); + void set_cache_level(unsigned int cache_level) { + assert(__set_cache_level(cache_level) == 0); + } + unsigned int get_cache_level(); + + int __set_domain(unsigned int domain, bool write=false); + void set_domain(unsigned int domain, bool write=false) { + assert(__set_domain(domain, write) == 0); + } + + int __read(uint64_t offset, void *data, unsigned long length); + void read(uint64_t offset, void *data, unsigned long length) { + assert(__read(offset, data, length) == 0); + } + + int __write(uint64_t offset, void *data, unsigned long length); + void write(uint64_t offset, void *data, unsigned long length) { + assert(__write(offset, data, length) == 0); + } + + void dma_set(uint64_t offset, void *data, unsigned long length); + void dma_get(uint64_t offset, void *data, unsigned long length); + + int wait(unsigned int flags, int64_t timeout=-1); + int wait() { return wait(0, -1); } + + unsigned int busy(); + + void *__map_wc(); + void *map_wc() { + if (_map_wc) + return _map_wc; + else + return __map_wc(); + } + + void *__map_wb(); + void *map_wb() { + if (_map_wb) + return _map_wb; + else + return __map_wb(); + } + + void *__map_gtt(); + void *map_gtt() { + if (_map_gtt) + return _map_gtt; + else + return __map_gtt(); + } + + unsigned long reloc_offset() { return _reloc_offset; } + void set_reloc_offset(unsigned long offset) { _reloc_offset = offset; } + + bool can_set_tiling(tiling t) { return false; } + + bool cache_coherent; +}; +} + +#endif /* I915_OBJECT_H */ diff --git a/fmbt/i915/i915_tiling.h b/fmbt/i915/i915_tiling.h new file mode 100644 index 00000000..1e612faf --- /dev/null +++ b/fmbt/i915/i915_tiling.h @@ -0,0 +1,12 @@ +#ifndef I915_TILING_H +#define I915_TILING_H + +namespace i915 { + enum tiling { + TILING_NONE, + TILING_X, + TILING_Y, + }; +} + +#endif /* I915_TILING_H */ diff --git a/fmbt/i915/i915_userptr.cc b/fmbt/i915/i915_userptr.cc new file mode 100644 index 00000000..95ef914d --- /dev/null +++ b/fmbt/i915/i915_userptr.cc @@ -0,0 +1,4 @@ +#include "i915_userptr.h" + +namespace i915 { +} diff --git a/fmbt/i915/i915_userptr.h b/fmbt/i915/i915_userptr.h new file mode 100644 index 00000000..bb83d22e --- /dev/null +++ b/fmbt/i915/i915_userptr.h @@ -0,0 +1,16 @@ +#ifndef I915_USERPTR_H +#define I915_USERPTR_H + +#include "i915_driver.h" +#include "i915_object.h" + +namespace i915 { +class userptr : public object { +public: + userptr(driver *drv, uint64_t size, uint32_t handle, unsigned int offset) : object(drv, size, handle) { + set_reloc_offset(offset); + } +}; +} + +#endif /* I915_USERPTR_H */ diff --git a/fmbt/i915/unittest_i915_driver.cc b/fmbt/i915/unittest_i915_driver.cc new file mode 100644 index 00000000..cc98717e --- /dev/null +++ b/fmbt/i915/unittest_i915_driver.cc @@ -0,0 +1,62 @@ +#include <stdio.h> + +#include "i915_driver.h" +#include "i915_image.h" +#include "i915_object.h" +#include "i915_batch.h" + +int main(void) +{ + i915::driver *drv = i915::driver::create(); + + i915::image image = i915::image(drv, 512, 512); + + printf("Image %dx%d, size=%lu\n", image.width(), image.height(), image.size()); + + printf("Image[0] = %u\n", image[0]); + image[1] = 1; + printf("Image[1] = %u\n", image[1]); + + for (unsigned int y = 1; y < image.height(); y <<= 1) + for (unsigned int x = 1; x < image.width(); x <<= 1) + printf("linear: (%d, %d) = [%lu, %lu]\n", + x, y, + image.linear_offset(x, y), + image.tiled_offset(x, y)); + + image.set_tiling(i915::TILING_X); + for (unsigned int y = 1; y < image.height(); y <<= 1) + for (unsigned int x = 1; x < image.width(); x <<= 1) + printf("tiledX: (%d, %d) = [%lu, %lu]\n", + x, y, + image.linear_offset(x, y), + image.tiled_offset(x, y)); + + + i915::object *bo = static_cast<i915::object *>(drv->create_object(4096)); + bo->write(0, &image[1], sizeof(image[1])); + bo->read(0, &image[0], sizeof(image[0])); + printf("1: Image[0, 1] = [%u, %u]\n", image[0], image[1]); + + image[1] = 2; + bo->dma_set(0, &image[1], sizeof(image[1])); + bo->dma_get(0, &image[0], sizeof(image[0])); + printf("2: Image[0, 1] = [%u, %u]\n", image[0], image[1]); + + i915::batch b(&drv->default_context, &drv->engines[i915::RCS]); + b.emit_MI_STORE_DWORD_IMM(bo, 4, 3); + b.flush(); + bo->dma_get(4, &image[10], sizeof(image[10])); + bo->read(4, &image[11], sizeof(image[11])); + bo->dma_get(4, &image[0], sizeof(image[0])); + printf("3: Image[0, 1] = [%u:%u, %u]\n", image[10], image[0], image[11]); + + b.emit_MI_STORE_DWORD_IMM(bo, 4, 4); + b.emit_MI_STORE_DWORD_IMM(bo, 8, 5); + b.emit_MI_STORE_DWORD_IMM(bo, 12, 6); + b.flush(); + bo->dma_get(4, &image[20], 3*sizeof(image[20])); + printf("4: Image[0, 1, 2] = [%u, %u, %u]\n", image[20], image[21], image[22]); + + return 0; +} diff --git a/fmbt/simple-i915.conf b/fmbt/simple-i915.conf new file mode 100644 index 00000000..4c9076da --- /dev/null +++ b/fmbt/simple-i915.conf @@ -0,0 +1,9 @@ +model = lib(simple_i915_gem_coherency) +adapter = lib(simple_i915_gem_coherency) +heuristic = lookahead +coverage = perm(100) + +pass = duration(120 sec) + +on_pass = exit(0) +on_fail = exit(1) diff --git a/fmbt/simple_i915_gem_coherency.cc.aal b/fmbt/simple_i915_gem_coherency.cc.aal new file mode 100644 index 00000000..e74cd003 --- /dev/null +++ b/fmbt/simple_i915_gem_coherency.cc.aal @@ -0,0 +1,151 @@ +aal "simple_i915_gem_coherency" { + language "C++" { + #include "i915/i915_driver.h" + #include "i915/i915_batch.h" + #include "i915/i915_object.h" + #include "i915/i915_image.h" + } + + variables { + i915::driver *drv; + i915::engine *engine; + i915::context *ctx; + i915::batch *batch; + + i915::object *bo; + i915::image *image; + + uint32_t value; + } + + initial_state { + drv = i915::driver::create(); + engine = &drv->engines[i915::RCS0]; + ctx = &drv->default_context; + batch = new i915::batch(ctx, engine); + + image = new i915::image(drv, 64, 128); + bo = static_cast<i915::object *>(drv->create_object(image->size())); + value = 0xffffffff; + } + + push { /* too scary */ } + pop { } + + input "new value 0x00" { + guard() { return value != 0; } + body() { value = 0; } + } + input "new value 0xcc" { + guard() { return value != 0xcccccccc; } + body() { value = 0xcccccccc; } + } + input "new value 0xaa" { + guard() { return value != 0xaaaaaaaa; } + body() { value = 0xaaaaaaaa; } + } + input "new value 0x55" { + guard() { return value != 0x55555555; } + body() { value = 0x55555555; } + } + input "new value 0xff" { + guard() { return value != 0xffffffff; } + body() { value = 0xffffffff; } + } + + input "emit dword" { + guard() { return engine->can_mi_store_dword(); } + adapter() { batch->emit_MI_STORE_DWORD_IMM(bo, 0, value); } + body() { *image->pixel(0) = value; } + } + + input "gpu set dword" { + adapter() { + batch->flush(bo); + bo->dma_set(0, &value, 4); + } + body() { *image->pixel(0) = value; } + } + input "gpu get dword" { + adapter() { + batch->flush(bo); + uint32_t result; + bo->dma_get(0, &result, 4); + ASSERT_EQ(result, *image->pixel(0)); + } + } + + input "pwrite dword" { + adapter() { + batch->flush(bo); + bo->write(0, &value, 4); + } + body() { *image->pixel(0) = value; } + } + input "pread dword" { + adapter() { + batch->flush(bo); + uint32_t result; + bo->read(0, &result, 4); + ASSERT_EQ(result, *image->pixel(0)); + } + } + + input "wb set dword" { + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_CPU, true); + static_cast<uint32_t *>(bo->map_wb())[0] = value; + } + body() { *image->pixel(0) = value; } + } + input "wb get dword" { + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_CPU, false); + ASSERT_EQ(static_cast<uint32_t *>(bo->map_wb())[0], *image->pixel(0)); + } + } + + input "wc set dword" { + guard() { return bo->map_wc(); } + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_WC, true); + static_cast<uint32_t *>(bo->map_wc())[0] = value; + } + body() { *image->pixel(0) = value; } + } + input "wc get dword" { + guard() { return bo->map_wc(); } + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_WC, false); + ASSERT_EQ(static_cast<uint32_t *>(bo->map_wc())[0], *image->pixel(0)); + } + } + + input "gtt set dword" { + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_GTT, true); + static_cast<uint32_t *>(bo->map_gtt())[0] = value; + } + body() { *image->pixel(0) = value; } + } + input "gtt get dword" { + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_WC, false); + ASSERT_EQ(static_cast<uint32_t *>(bo->map_gtt())[0], *image->pixel(0)); + } + } + + output "check missed interrupts" { + adapter() { ASSERT_EQ(drv->check_and_clear_missed_interrupts(), 0); return 0; } + } + + output "check GPU hangs" { + adapter() { ASSERT_EQ(ctx->reset_count(), 0); return 0; } + } +} diff --git a/fmbt/test_drm_device.cc.aal b/fmbt/test_drm_device.cc.aal new file mode 100644 index 00000000..303683f0 --- /dev/null +++ b/fmbt/test_drm_device.cc.aal @@ -0,0 +1,59 @@ +aal "test_drm_device" { + language "C++" { + #include "drm_device.h" + } + + variables { + drm_device dev; + std::stack<bool> st; + bool open; + } + + initial_state { + open = false; + } + + push { + st.push(open); + } + pop { + open = st.top(); st.pop(); + } + + tag "opened" { + guard() { return open; } + adapter() { ASSERT_NEQ(dev.fd, -1); } + + input "close" { + adapter() { ASSERT_EQ(dev.close(), 0); } + body() { open = false; } + } + + input "already_opened" { + adapter() { ASSERT_EQ(dev.open("/dev/dri/card0"), -EBUSY); } + } + } + + tag "closed" { + guard() { return !open; } + adapter() { ASSERT_EQ(dev.fd, -1); } + + input "open" { + adapter() { + ASSERT_EQ(dev.open("/dev/dri/card0"), 0); + ASSERT_NEQ(dev.fd, -1); + } + body() { open = true; } + } + + input "invalid_path" { + adapter() { ASSERT_EQ(dev.open("/dev/dri/no-such-card"), -ENOENT); } + } + input "invalid_device" { + adapter() { ASSERT_EQ(dev.open("/dev/null"), -ENOTTY); } + } + input "already_closed" { + adapter() { ASSERT_EQ(dev.close(), -EBADF); } + } + } +} diff --git a/fmbt/test_gem_driver.cc.aal b/fmbt/test_gem_driver.cc.aal new file mode 100644 index 00000000..ca69208b --- /dev/null +++ b/fmbt/test_gem_driver.cc.aal @@ -0,0 +1,30 @@ +aal "test_gem_driver" { + language "C++" { + #include "gem_driver.h" + } + + variables { + drm_device device; + gem_driver *drv; + } + + initial_state { + device.open("/dev/dri/card0"); + drv = NULL; + } + + input "create" { + guard() { return !drv; } + adapter() { + drv = gem_driver_factory::get()->create_driver(device); + ASSERT_NEQ((long)drv, 0); + } + } + input "destroy" { + guard() { return drv; } + adapter() { + delete drv; + drv = NULL; + } + } +} diff --git a/fmbt/test_i915_gem_coherency.cc.aal b/fmbt/test_i915_gem_coherency.cc.aal new file mode 100644 index 00000000..8116481b --- /dev/null +++ b/fmbt/test_i915_gem_coherency.cc.aal @@ -0,0 +1,397 @@ +aal "test_i915_gem_coherency" { + language "C++" { + #include "i915/i915_driver.h" + #include "i915/i915_batch.h" + #include "i915/i915_object.h" + #include "i915/i915_image.h" + } + + variables { + i915::driver *drv; + i915::engine *engine; + i915::context *ctx; + i915::batch *batch; + + struct { + i915::object *bo; + i915::image *image; + } buffers[7]; + unsigned int idx; + + i915::object *bo; + i915::image *image; + + uint16_t x, y; + uint32_t value; + unsigned long linear_offset; + + bool interruptible; + } + + initial_state { + drv = i915::driver::create(); + engine = &drv->engines[i915::RCS0]; + ctx = &drv->default_context; + batch = new i915::batch(ctx, engine); + + buffers[0].image = new i915::image(drv, 64, 128); + buffers[0].bo = static_cast<i915::object *>(drv->create_object(buffers[0].image->size())); + + buffers[1].image = new i915::image(drv, 128, 32); + buffers[1].bo = static_cast<i915::object *>(drv->create_object(buffers[1].image->size())); + + buffers[2].image = new i915::image(drv, 256, 8); + buffers[2].bo = static_cast<i915::object *>(drv->create_object(buffers[2].image->size())); + + buffers[3].image = new i915::image(drv, 512, 4); + buffers[3].bo = static_cast<i915::object *>(drv->create_object(buffers[3].image->size())); + + buffers[4].image = new i915::image(drv, 1024, 1024); + buffers[4].bo = static_cast<i915::object *>(drv->create_object(buffers[4].image->size())); + + buffers[5].image = new i915::image(drv, 2048, 2048); + buffers[5].bo = static_cast<i915::object *>(drv->create_object(buffers[5].image->size())); + + buffers[6].image = new i915::image(drv, 4096, 4096); + buffers[6].bo = static_cast<i915::object *>(drv->create_object(buffers[6].image->size())); + + x = y = 0; + linear_offset = 0; + value = 0xffffffff; + idx = 0; + + image = buffers[idx].image; + bo = buffers[idx].bo; + } + + push { /* too scary */ } + pop { } + + input "next buffer" { + body() { + do { + if (++idx == (sizeof(buffers) / sizeof(buffers[0]))) + idx = 0; + } while (!buffers[idx].image); + image = buffers[idx].image; + bo = buffers[idx].bo; + + x = x % image->width(); + y = y % image->height(); + linear_offset = image->linear_offset(x, y); + } + } + input "prev buffer" { + body() { + do { + if (idx-- == 0) + idx += (sizeof(buffers) / sizeof(buffers[0])); + } while (!buffers[idx].image); + image = buffers[idx].image; + bo = buffers[idx].bo; + + x = x % image->width(); + y = y % image->height(); + linear_offset = image->linear_offset(x, y); + } + } + + input "advance x" { + body() { + x = (x + 1) % image->width(); + linear_offset = image->linear_offset(x, y); + } + } + input "advance y" { + body() { + y = (y + 1) % image->height(); + linear_offset = image->linear_offset(x, y); + } + } + input "advance tile column" { + body() { + x = (x + image->tile_width()) % image->width(); + linear_offset = image->linear_offset(x, y); + } + } + input "advance tile row" { + body() { + y = (y + image->tile_height()) % image->height(); + linear_offset = image->linear_offset(x, y); + } + } + input "advance cacheline" { + body() { + linear_offset = (linear_offset + 64) % image->size(); + x = (linear_offset % image->stride()) / 4; + y = linear_offset / image->stride(); + } + } + input "advance page" { + body() { + linear_offset = (linear_offset + 4096) % image->size(); + x = (linear_offset % image->stride()) / 4; + y = linear_offset / image->stride(); + } + } + + input "new value 0x00" { + guard() { return value != 0; } + body() { value = 0; } + } + input "new value 0xcc" { + guard() { return value != 0xcccccccc; } + body() { value = 0xcccccccc; } + } + input "new value 0xaa" { + guard() { return value != 0xaaaaaaaa; } + body() { value = 0xaaaaaaaa; } + } + input "new value 0x55" { + guard() { return value != 0x55555555; } + body() { value = 0x55555555; } + } + input "new value 0xff" { + guard() { return value != 0xffffffff; } + body() { value = 0xffffffff; } + } + + input "emit dword" { + guard() { return engine->can_mi_store_dword(); } + adapter() { batch->emit_MI_STORE_DWORD_IMM(bo, linear_offset, value); } + body() { *image->pixel(linear_offset/4) = value; } + } + + input "blt dword" { + guard() { return engine->can_XY_SRC_FILL(image->tiling()); } + adapter() { batch->emit_XY_SRC_FILL(bo, x, y, x+1, y+1, image->stride(), value); } + body() { *image->pixel(x, y) = value; } + } + + input "gpu set dword" { + adapter() { + batch->flush(bo); + bo->dma_set(linear_offset, &value, 4); + } + body() { *image->pixel(linear_offset/4) = value; } + } + input "gpu get dword" { + adapter() { + batch->flush(bo); + uint32_t result; + bo->dma_get(linear_offset, &result, 4); + ASSERT_EQ(result, *image->pixel(linear_offset/4)); + } + } + + input "pwrite dword" { + adapter() { + batch->flush(bo); + bo->write(linear_offset, &value, 4); + } + body() { *image->pixel(linear_offset/4) = value; } + } + input "pread dword" { + adapter() { + batch->flush(bo); + uint32_t result; + bo->read(linear_offset, &result, 4); + ASSERT_EQ(result, *image->pixel(linear_offset/4)); + } + } + + input "wb set dword" { + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_CPU, true); + static_cast<uint32_t *>(bo->map_wb())[linear_offset/4] = value; + } + body() { *image->pixel(linear_offset/4) = value; } + } + input "wb get dword" { + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_CPU, false); + ASSERT_EQ(static_cast<uint32_t *>(bo->map_wb())[linear_offset/4], *image->pixel(linear_offset/4)); + } + } + + input "wc set dword" { + guard() { return bo->map_wc(); } + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_WC, true); + static_cast<uint32_t *>(bo->map_wc())[linear_offset/4] = value; + } + body() { *image->pixel(linear_offset/4) = value; } + } + input "wc get dword" { + guard() { return bo->map_wc(); } + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_WC, false); + ASSERT_EQ(static_cast<uint32_t *>(bo->map_wc())[linear_offset/4], *image->pixel(linear_offset/4)); + } + } + + input "gtt set dword" { + guard() { return drv->llc() || !bo->cache_coherent; } + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_GTT, true); + static_cast<uint32_t *>(bo->map_gtt())[linear_offset/4] = value; + } + body() { *image->pixel(x, y) = value; } + } + input "gtt get dword" { + guard() { return drv->llc() || !bo->cache_coherent; } + adapter() { + batch->flush(bo); + bo->set_domain(I915_GEM_DOMAIN_WC, false); + ASSERT_EQ(static_cast<uint32_t *>(bo->map_gtt())[linear_offset/4], *image->pixel(x, y)); + } + } + + input "change tiling to NONE" { + guard() { return image->tiling() != i915::TILING_NONE; } + adapter() { + batch->flush(bo); + bo->set_tiling(i915::TILING_NONE, image->stride()); + } + body() { image->set_tiling(i915::TILING_NONE); } + } + input "change tiling to X" { + guard() { return image->tiling() != i915::TILING_X && bo->can_set_tiling(i915::TILING_X); } + adapter() { + batch->flush(bo); + bo->set_tiling(i915::TILING_X, image->stride()); + } + body() { image->set_tiling(i915::TILING_X); } + } + input "change tiling to Y" { + guard() { return image->tiling() != i915::TILING_Y && bo->can_set_tiling(i915::TILING_Y); } + adapter() { + batch->flush(bo); + bo->set_tiling(i915::TILING_Y, image->stride()); + } + body() { image->set_tiling(i915::TILING_Y); } + } + + input "change cache-level to UNCACHED" { + guard() { return bo->cache_coherent; } + adapter() { + batch->flush(bo); + bo->set_cache_level(0); + } + } + input "change cache-level to CACHED" { + guard() { return !bo->cache_coherent; } + adapter() { + batch->flush(bo); + bo->set_cache_level(1); + } + } + + input "change engine to RCS" { + guard() { return engine->id() != i915::RCS0; } + adapter() { + batch->flush(); + delete batch; + batch = new i915::batch(ctx, &drv->engines[i915::RCS0]); + } + body() { + engine = &drv->engines[i915::RCS0]; + } + } + input "change engine to BCS" { + guard() { return engine->id() != i915::BCS0 && drv->engines[i915::BCS0].exists; } + adapter() { + batch->flush(); + delete batch; + batch = new i915::batch(ctx, &drv->engines[i915::BCS0]); + } + body() { + engine = &drv->engines[i915::BCS0]; + } + } + input "change engine to VCS0" { + guard() { return engine->id() != i915::VCS0 && drv->engines[i915::VCS0].exists; } + adapter() { + batch->flush(); + delete batch; + batch = new i915::batch(ctx, &drv->engines[i915::VCS0]); + } + body() { + engine = &drv->engines[i915::VCS0]; + } + } + input "change engine to VCS1" { + guard() { return engine->id() != i915::VCS1 && drv->engines[i915::VCS1].exists; } + adapter() { + batch->flush(); + delete batch; + batch = new i915::batch(ctx, &drv->engines[i915::VCS1]); + } + body() { + engine = &drv->engines[i915::VCS1]; + } + } + input "change engine to VECS" { + guard() { return engine->id() != i915::VECS0 && drv->engines[i915::VECS0].exists; } + adapter() { + batch->flush(); + delete batch; + batch = new i915::batch(ctx, &drv->engines[i915::VECS0]); + } + body() { + engine = &drv->engines[i915::VECS0]; + } + } + + input "reset context to default" { + guard() { return ctx != &drv->default_context; } + adapter() { + batch->flush(); + delete batch; + + delete ctx; + + ctx = &drv->default_context; + batch = new i915::batch(ctx, engine); + } + } + input "new context" { + adapter() { + batch->flush(); + delete batch; + + if (ctx != &drv->default_context) + delete ctx; + + ctx = drv->create_context(); + batch = new i915::batch(ctx, engine); + } + } + + input "force reclaim" { + adapter() { batch->flush(); drv->force_reclaim(); } + } + + input "force engine reset" { + guard() { return drv->has_gpu_reset(); } + adapter() { drv->force_gpu_reset(engine); } + } + + input "force global reset" { + guard() { return drv->has_gpu_reset(); } + adapter() { drv->force_gpu_reset(); } + } + + output "check missed interrupts" { + adapter() { ASSERT_EQ(drv->check_and_clear_missed_interrupts(), 0); return 0; } + } + + output "check GPU hangs" { + adapter() { ASSERT_EQ(ctx->reset_count(), 0); return 0; } + } +} |