summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhenyu Wang <zhenyuw@linux.intel.com>2014-05-27 16:14:31 +0800
committerZhenyu Wang <zhenyuw@linux.intel.com>2014-05-27 16:14:31 +0800
commit768669791fee0846dc19a38a16e58508ac7c791b (patch)
tree069f8dbc8725624bc6af7077d149bc66a414ed22
parentff3c12209893ee8ee0e757d6cf10276596ef0338 (diff)
Add CS ALU programming helpercs_alu
On HSW, render CS can do ALU programming. Total 16 general registers are available which can load value from immediate, register or memory to do basic arithmetic/logic calculation. For BDW and later, this ALU capability will be available on all rings. This provides an example API helper to do ALU programming and includes a simple 'gem_alu' test. Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
-rw-r--r--lib/Makefile.sources2
-rw-r--r--lib/intel_alu.c207
-rw-r--r--lib/intel_alu.h62
-rw-r--r--tests/.gitignore1
-rw-r--r--tests/Makefile.sources1
-rw-r--r--tests/gem_alu.c66
6 files changed, 339 insertions, 0 deletions
diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 62a0c758..1afd62f7 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -44,5 +44,7 @@ libintel_tools_la_SOURCES = \
igt_fb.h \
igt_core.c \
igt_core.h \
+ intel_alu.c \
+ intel_alu.h \
$(NULL)
diff --git a/lib/intel_alu.c b/lib/intel_alu.c
new file mode 100644
index 00000000..9e73b0e8
--- /dev/null
+++ b/lib/intel_alu.c
@@ -0,0 +1,207 @@
+
+#include "intel_alu.h"
+#include "intel_batchbuffer.h"
+
+struct intel_batchbuffer *batch;
+
+static int check_opcode(uint32_t opcode)
+{
+ switch(opcode) {
+ case NOOP:
+ case LOAD:
+ case LOADINV:
+ case LOAD0:
+ case LOAD1:
+ case ADD:
+ case SUB:
+ case AND:
+ case OR:
+ case XOR:
+ case STORE:
+ case STOREINV:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static int check_operand(uint32_t operand)
+{
+ switch(operand) {
+ case R0:
+ case R1:
+ case R2:
+ case R3:
+ case R4:
+ case R5:
+ case R6:
+ case R7:
+ case R8:
+ case R9:
+ case R10:
+ case R11:
+ case R12:
+ case R13:
+ case R14:
+ case R15:
+ case SRCA:
+ case SRCB:
+ case ACCU:
+ case ZF:
+ case CF:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+#define R_0 0x2600
+#define R_1 0x2608
+#define R_2 0x2610
+#define R_3 0x2618
+#define R_4 0x2620
+#define R_5 0x2628
+#define R_6 0x2630
+#define R_7 0x2638
+#define R_8 0x2640
+#define R_9 0x2648
+#define R_10 0x2650
+#define R_11 0x2658
+#define R_12 0x2660
+#define R_13 0x2668
+#define R_14 0x2670
+#define R_15 0x2678
+
+uint32_t gpr_reg[] = {
+ R_0,
+ R_1,
+ R_2,
+ R_3,
+ R_4,
+ R_5,
+ R_6,
+ R_7,
+ R_8,
+ R_9,
+ R_10,
+ R_11,
+ R_12,
+ R_13,
+ R_14,
+ R_15,
+};
+
+int intel_alu_init(int drm_fd, drm_intel_bufmgr *bufmgr)
+{
+ uint32_t devid;
+
+ devid = intel_get_drm_devid(drm_fd);
+ if (!IS_HASWELL(devid))
+ return 1;
+
+ batch = intel_batchbuffer_alloc(bufmgr, devid);
+ if (!batch)
+ return 1;
+
+ return 0;
+}
+
+/*
+ intel_alu_load_gpr_xxx();
+
+ intel_alu_begin();
+ intel_alu_instr();
+ ...
+ intel_alu_end();
+
+ intel_alu_store_gpr();
+ */
+
+#define MATH_INSTR_MAX 64 /* HSW has 64 max, BDW has 256 max, in one shot */
+uint32_t instr[MATH_INSTR_MAX];
+uint32_t shot_len, shot_idx;
+
+int intel_alu_begin(uint32_t num)
+{
+ if (num > MATH_INSTR_MAX)
+ return 1;
+
+ shot_len = num;
+ shot_idx = 0;
+ memset(instr, 0, sizeof(instr));
+ return 0;
+}
+
+#define MI_MATH (0x1a << 23)
+
+void intel_alu_end(void)
+{
+ int i;
+
+ assert(shot_idx == shot_len);
+
+ BEGIN_BATCH(shot_len + 1);
+ OUT_BATCH(MI_MATH | (shot_len - 1));
+ for (i = 0; i < shot_len; i++)
+ OUT_BATCH(instr[i]);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER);
+}
+
+void intel_alu_instr(uint32_t opcode, uint32_t operand1, uint32_t operand2)
+{
+ if (check_opcode(opcode))
+ opcode = NOOP;
+ if (check_operand(operand1) || check_operand(operand2))
+ opcode = NOOP;
+
+ instr[shot_idx++] = (opcode << 20) | (operand1 << 10) | operand2;
+}
+
+#define MI_LOAD_REGISTER_IMM ((0x22<<23) | 1)
+#define MI_LOAD_REGISTER_REG ((0x2a<<23) | 1)
+#define MI_LOAD_REGISTER_MEM ((0x29<<23) | 1)
+#define MI_STORE_REGISTER_MEM ((0x24<<23) | 1)
+
+void intel_alu_load_gpr_imm(uint32_t gpr, uint32_t val)
+{
+ BEGIN_BATCH(4);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM);
+ OUT_BATCH(gpr_reg[gpr]);
+ OUT_BATCH(val);
+ OUT_BATCH(MI_NOOP);
+ ADVANCE_BATCH();
+}
+
+void intel_alu_load_gpr_reg(uint32_t gpr, uint32_t reg)
+{
+ BEGIN_BATCH(4);
+ OUT_BATCH(MI_LOAD_REGISTER_REG);
+ OUT_BATCH(gpr_reg[gpr]);
+ OUT_BATCH(reg);
+ OUT_BATCH(MI_NOOP);
+ ADVANCE_BATCH();
+}
+
+void intel_alu_load_gpr_mem(uint32_t gpr, drm_intel_bo *bo, uint32_t off)
+{
+ BEGIN_BATCH(4);
+ OUT_BATCH(MI_LOAD_REGISTER_MEM);
+ OUT_BATCH(gpr_reg[gpr]);
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, off);
+ OUT_BATCH(MI_NOOP);
+ ADVANCE_BATCH();
+}
+
+void intel_alu_store_gpr(uint32_t gpr, drm_intel_bo *bo, uint32_t off)
+{
+ BEGIN_BATCH(4);
+ OUT_BATCH(MI_STORE_REGISTER_MEM);
+ OUT_BATCH(gpr_reg[gpr]);
+ OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, off);
+ OUT_BATCH(MI_NOOP);
+ ADVANCE_BATCH();
+
+ intel_batchbuffer_flush_on_ring(batch, I915_EXEC_RENDER);
+}
diff --git a/lib/intel_alu.h b/lib/intel_alu.h
new file mode 100644
index 00000000..d2b044e5
--- /dev/null
+++ b/lib/intel_alu.h
@@ -0,0 +1,62 @@
+#ifndef _INTEL_ALU_H
+#define _INTEL_ALU_H
+
+#include <stdint.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include <drm.h>
+#include <i915_drm.h>
+#include "intel_bufmgr.h"
+#include "intel_io.h"
+#include "intel_chipset.h"
+
+/* ALU instruction */
+#define NOOP 0x000
+#define LOAD 0x080
+#define LOADINV 0x480 /* load inverse */
+#define LOAD0 0x081 /* load all 0 */
+#define LOAD1 0x481 /* load all 1 */
+#define ADD 0x100
+#define SUB 0x101
+#define AND 0x102
+#define OR 0x103
+#define XOR 0x104
+#define STORE 0x180
+#define STOREINV 0x580 /* store inverse */
+
+/* General Register */
+#define R0 0x0
+#define R1 0x1
+#define R2 0x2
+#define R3 0x3
+#define R4 0x4
+#define R5 0x5
+#define R6 0x6
+#define R7 0x7
+#define R8 0x8
+#define R9 0x9
+#define R10 0xa
+#define R11 0xb
+#define R12 0xc
+#define R13 0xd
+#define R14 0xe
+#define R15 0xf
+
+/* HW internal ALU register */
+#define SRCA 0x20
+#define SRCB 0x21
+#define ACCU 0x31
+#define ZF 0x32
+#define CF 0x33
+
+int intel_alu_init(int fd, drm_intel_bufmgr *bufmgr);
+int intel_alu_begin(uint32_t num);
+void intel_alu_end(void);
+void intel_alu_instr(uint32_t opcode, uint32_t operand1, uint32_t operand2);
+void intel_alu_load_gpr_imm(uint32_t gpr, uint32_t val);
+void intel_alu_load_gpr_reg(uint32_t gpr, uint32_t reg);
+void intel_alu_load_gpr_mem(uint32_t gpr, drm_intel_bo *bo, uint32_t off);
+void intel_alu_store_gpr(uint32_t gpr, drm_intel_bo *bo, uint32_t off);
+
+#endif
diff --git a/tests/.gitignore b/tests/.gitignore
index d7ad0547..e0ff0f46 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -11,6 +11,7 @@ drm_vma_limiter_gtt
drv_hangman
drv_suspend
gem_alive
+gem_alu
gem_bad_address
gem_bad_batch
gem_bad_blit
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index eca4af9c..d5a60350 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -140,6 +140,7 @@ TESTS_progs = \
pm_psr \
pm_rc6_residency \
prime_udl \
+ gem_alu \
$(NULL)
# IMPORTANT: The ZZ_ tests need to be run last!
diff --git a/tests/gem_alu.c b/tests/gem_alu.c
new file mode 100644
index 00000000..3be91a33
--- /dev/null
+++ b/tests/gem_alu.c
@@ -0,0 +1,66 @@
+
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "drmtest.h"
+#include "ioctl_wrappers.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "intel_alu.h"
+
+int main(int argc, char *argv[])
+{
+ drm_intel_bufmgr *bufmgr;
+ int fd;
+ drm_intel_bo *accu;
+ int *p;
+
+ igt_simple_init();
+
+ fd = drm_open_any_render();
+ bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+ igt_assert(bufmgr);
+
+ if (intel_alu_init(fd, bufmgr)) {
+ fprintf(stderr, "ALU init fail!\n");
+ return -1;
+ }
+
+ accu = drm_intel_bo_alloc(bufmgr, "accu", 4096, 4096);
+
+ /* SUB */
+ intel_alu_load_gpr_imm(R0, 1);
+ intel_alu_load_gpr_imm(R1, 2);
+
+ intel_alu_begin(6);
+ intel_alu_instr(LOAD, SRCA, R0);
+ intel_alu_instr(LOAD, SRCB, R1);
+ intel_alu_instr(SUB, SRCA, SRCB);
+ intel_alu_instr(STORE, R2, ACCU);
+ intel_alu_instr(STORE, R3, CF);
+ intel_alu_instr(STORE, R4, ZF);
+ intel_alu_end();
+
+ intel_alu_store_gpr(R2, accu, 0);
+ intel_alu_store_gpr(R3, accu, 4);
+ intel_alu_store_gpr(R4, accu, 8);
+
+ drm_intel_bo_map(accu, 0);
+ p = accu->virtual;
+ printf("accu: %d CF: 0x%x ZF: 0x%x\n", *p, *(p+1), *(p+2));
+ drm_intel_bo_unmap(accu);
+ drm_intel_bo_unreference(accu);
+
+ return 0;
+}