diff options
author | Zhenyu Wang <zhenyuw@linux.intel.com> | 2014-05-27 16:14:31 +0800 |
---|---|---|
committer | Zhenyu Wang <zhenyuw@linux.intel.com> | 2014-05-27 16:14:31 +0800 |
commit | 768669791fee0846dc19a38a16e58508ac7c791b (patch) | |
tree | 069f8dbc8725624bc6af7077d149bc66a414ed22 | |
parent | ff3c12209893ee8ee0e757d6cf10276596ef0338 (diff) |
Add CS ALU programming helpercs_alu
On HSW, render CS can do ALU programming. Total 16 general registers
are available which can load value from immediate, register or memory
to do basic arithmetic/logic calculation.
For BDW and later, this ALU capability will be available on all rings.
This provides an example API helper to do ALU programming and includes
a simple 'gem_alu' test.
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r-- | lib/Makefile.sources | 2 | ||||
-rw-r--r-- | lib/intel_alu.c | 207 | ||||
-rw-r--r-- | lib/intel_alu.h | 62 | ||||
-rw-r--r-- | tests/.gitignore | 1 | ||||
-rw-r--r-- | tests/Makefile.sources | 1 | ||||
-rw-r--r-- | tests/gem_alu.c | 66 |
6 files changed, 339 insertions, 0 deletions
diff --git a/lib/Makefile.sources b/lib/Makefile.sources index 62a0c758..1afd62f7 100644 --- a/lib/Makefile.sources +++ b/lib/Makefile.sources @@ -44,5 +44,7 @@ libintel_tools_la_SOURCES = \ igt_fb.h \ igt_core.c \ igt_core.h \ + intel_alu.c \ + intel_alu.h \ $(NULL) diff --git a/lib/intel_alu.c b/lib/intel_alu.c new file mode 100644 index 00000000..9e73b0e8 --- /dev/null +++ b/lib/intel_alu.c @@ -0,0 +1,207 @@ + +#include "intel_alu.h" +#include "intel_batchbuffer.h" + +struct intel_batchbuffer *batch; + +static int check_opcode(uint32_t opcode) +{ + switch(opcode) { + case NOOP: + case LOAD: + case LOADINV: + case LOAD0: + case LOAD1: + case ADD: + case SUB: + case AND: + case OR: + case XOR: + case STORE: + case STOREINV: + return 0; + default: + return 1; + } +} + +static int check_operand(uint32_t operand) +{ + switch(operand) { + case R0: + case R1: + case R2: + case R3: + case R4: + case R5: + case R6: + case R7: + case R8: + case R9: + case R10: + case R11: + case R12: + case R13: + case R14: + case R15: + case SRCA: + case SRCB: + case ACCU: + case ZF: + case CF: + return 0; + default: + return 1; + } +} + +#define R_0 0x2600 +#define R_1 0x2608 +#define R_2 0x2610 +#define R_3 0x2618 +#define R_4 0x2620 +#define R_5 0x2628 +#define R_6 0x2630 +#define R_7 0x2638 +#define R_8 0x2640 +#define R_9 0x2648 +#define R_10 0x2650 +#define R_11 0x2658 +#define R_12 0x2660 +#define R_13 0x2668 +#define R_14 0x2670 +#define R_15 0x2678 + +uint32_t gpr_reg[] = { + R_0, + R_1, + R_2, + R_3, + R_4, + R_5, + R_6, + R_7, + R_8, + R_9, + R_10, + R_11, + R_12, + R_13, + R_14, + R_15, +}; + +int intel_alu_init(int drm_fd, drm_intel_bufmgr *bufmgr) +{ + uint32_t devid; + + devid = intel_get_drm_devid(drm_fd); + if (!IS_HASWELL(devid)) + return 1; + + batch = intel_batchbuffer_alloc(bufmgr, devid); + if (!batch) + return 1; + + return 0; +} + +/* + intel_alu_load_gpr_xxx(); + + intel_alu_begin(); + intel_alu_instr(); + ... + intel_alu_end(); + + intel_alu_store_gpr(); + */ + +#define MATH_INSTR_MAX 64 /* HSW has 64 max, BDW has 256 max, in one shot */ +uint32_t instr[MATH_INSTR_MAX]; +uint32_t shot_len, shot_idx; + +int intel_alu_begin(uint32_t num) +{ + if (num > MATH_INSTR_MAX) + return 1; + + shot_len = num; + shot_idx = 0; + memset(instr, 0, sizeof(instr)); + return 0; +} + +#define MI_MATH (0x1a << 23) + +void intel_alu_end(void) +{ + int i; + + assert(shot_idx == shot_len); + + BEGIN_BATCH(shot_len + 1); + OUT_BATCH(MI_MATH | (shot_len - 1)); + for (i = 0; i < shot_len; i++) + OUT_BATCH(instr[i]); + ADVANCE_BATCH(); + + intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER); +} + +void intel_alu_instr(uint32_t opcode, uint32_t operand1, uint32_t operand2) +{ + if (check_opcode(opcode)) + opcode = NOOP; + if (check_operand(operand1) || check_operand(operand2)) + opcode = NOOP; + + instr[shot_idx++] = (opcode << 20) | (operand1 << 10) | operand2; +} + +#define MI_LOAD_REGISTER_IMM ((0x22<<23) | 1) +#define MI_LOAD_REGISTER_REG ((0x2a<<23) | 1) +#define MI_LOAD_REGISTER_MEM ((0x29<<23) | 1) +#define MI_STORE_REGISTER_MEM ((0x24<<23) | 1) + +void intel_alu_load_gpr_imm(uint32_t gpr, uint32_t val) +{ + BEGIN_BATCH(4); + OUT_BATCH(MI_LOAD_REGISTER_IMM); + OUT_BATCH(gpr_reg[gpr]); + OUT_BATCH(val); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); +} + +void intel_alu_load_gpr_reg(uint32_t gpr, uint32_t reg) +{ + BEGIN_BATCH(4); + OUT_BATCH(MI_LOAD_REGISTER_REG); + OUT_BATCH(gpr_reg[gpr]); + OUT_BATCH(reg); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); +} + +void intel_alu_load_gpr_mem(uint32_t gpr, drm_intel_bo *bo, uint32_t off) +{ + BEGIN_BATCH(4); + OUT_BATCH(MI_LOAD_REGISTER_MEM); + OUT_BATCH(gpr_reg[gpr]); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, off); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); +} + +void intel_alu_store_gpr(uint32_t gpr, drm_intel_bo *bo, uint32_t off) +{ + BEGIN_BATCH(4); + OUT_BATCH(MI_STORE_REGISTER_MEM); + OUT_BATCH(gpr_reg[gpr]); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, off); + OUT_BATCH(MI_NOOP); + ADVANCE_BATCH(); + + intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER); +} diff --git a/lib/intel_alu.h b/lib/intel_alu.h new file mode 100644 index 00000000..d2b044e5 --- /dev/null +++ b/lib/intel_alu.h @@ -0,0 +1,62 @@ +#ifndef _INTEL_ALU_H +#define _INTEL_ALU_H + +#include <stdint.h> +#include <assert.h> +#include <unistd.h> + +#include <drm.h> +#include <i915_drm.h> +#include "intel_bufmgr.h" +#include "intel_io.h" +#include "intel_chipset.h" + +/* ALU instruction */ +#define NOOP 0x000 +#define LOAD 0x080 +#define LOADINV 0x480 /* load inverse */ +#define LOAD0 0x081 /* load all 0 */ +#define LOAD1 0x481 /* load all 1 */ +#define ADD 0x100 +#define SUB 0x101 +#define AND 0x102 +#define OR 0x103 +#define XOR 0x104 +#define STORE 0x180 +#define STOREINV 0x580 /* store inverse */ + +/* General Register */ +#define R0 0x0 +#define R1 0x1 +#define R2 0x2 +#define R3 0x3 +#define R4 0x4 +#define R5 0x5 +#define R6 0x6 +#define R7 0x7 +#define R8 0x8 +#define R9 0x9 +#define R10 0xa +#define R11 0xb +#define R12 0xc +#define R13 0xd +#define R14 0xe +#define R15 0xf + +/* HW internal ALU register */ +#define SRCA 0x20 +#define SRCB 0x21 +#define ACCU 0x31 +#define ZF 0x32 +#define CF 0x33 + +int intel_alu_init(int fd, drm_intel_bufmgr *bufmgr); +int intel_alu_begin(uint32_t num); +void intel_alu_end(void); +void intel_alu_instr(uint32_t opcode, uint32_t operand1, uint32_t operand2); +void intel_alu_load_gpr_imm(uint32_t gpr, uint32_t val); +void intel_alu_load_gpr_reg(uint32_t gpr, uint32_t reg); +void intel_alu_load_gpr_mem(uint32_t gpr, drm_intel_bo *bo, uint32_t off); +void intel_alu_store_gpr(uint32_t gpr, drm_intel_bo *bo, uint32_t off); + +#endif diff --git a/tests/.gitignore b/tests/.gitignore index d7ad0547..e0ff0f46 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -11,6 +11,7 @@ drm_vma_limiter_gtt drv_hangman drv_suspend gem_alive +gem_alu gem_bad_address gem_bad_batch gem_bad_blit diff --git a/tests/Makefile.sources b/tests/Makefile.sources index eca4af9c..d5a60350 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -140,6 +140,7 @@ TESTS_progs = \ pm_psr \ pm_rc6_residency \ prime_udl \ + gem_alu \ $(NULL) # IMPORTANT: The ZZ_ tests need to be run last! diff --git a/tests/gem_alu.c b/tests/gem_alu.c new file mode 100644 index 00000000..3be91a33 --- /dev/null +++ b/tests/gem_alu.c @@ -0,0 +1,66 @@ + +#include <stdbool.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/time.h> + +#include <drm.h> + +#include "drmtest.h" +#include "ioctl_wrappers.h" +#include "intel_bufmgr.h" +#include "intel_batchbuffer.h" +#include "intel_alu.h" + +int main(int argc, char *argv[]) +{ + drm_intel_bufmgr *bufmgr; + int fd; + drm_intel_bo *accu; + int *p; + + igt_simple_init(); + + fd = drm_open_any_render(); + bufmgr = drm_intel_bufmgr_gem_init(fd, 4096); + igt_assert(bufmgr); + + if (intel_alu_init(fd, bufmgr)) { + fprintf(stderr, "ALU init fail!\n"); + return -1; + } + + accu = drm_intel_bo_alloc(bufmgr, "accu", 4096, 4096); + + /* SUB */ + intel_alu_load_gpr_imm(R0, 1); + intel_alu_load_gpr_imm(R1, 2); + + intel_alu_begin(6); + intel_alu_instr(LOAD, SRCA, R0); + intel_alu_instr(LOAD, SRCB, R1); + intel_alu_instr(SUB, SRCA, SRCB); + intel_alu_instr(STORE, R2, ACCU); + intel_alu_instr(STORE, R3, CF); + intel_alu_instr(STORE, R4, ZF); + intel_alu_end(); + + intel_alu_store_gpr(R2, accu, 0); + intel_alu_store_gpr(R3, accu, 4); + intel_alu_store_gpr(R4, accu, 8); + + drm_intel_bo_map(accu, 0); + p = accu->virtual; + printf("accu: %d CF: 0x%x ZF: 0x%x\n", *p, *(p+1), *(p+2)); + drm_intel_bo_unmap(accu); + drm_intel_bo_unreference(accu); + + return 0; +} |