summaryrefslogtreecommitdiff
path: root/backend/src/driver/cl_gen_gpu_state.h
blob: df34045d64f1833dc1663573dbbbd3739572cf4c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/*
 * Copyright © 2012 Intel Corporation
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
 *
 */
#ifndef __CL_GEN_GPU_STATE_H__
#define __CL_GEN_GPU_STATE_H__

#ifdef __cplusplus
extern "C" {  // for the C header files
#endif /* __cplusplus */
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <stdint.h>
#include <CL/cl.h>
#include <drm.h>
#include <i915_drm.h>
#include <intel_bufmgr.h>
#include "cl_gen_gpu_defines.h"
#include "cl_gen_devices.h"
#ifdef __cplusplus
}
#endif /* __cplusplus */
#include "sys/assert.hpp"
#include "sys/alloc.hpp"
#include "sys/platform.hpp"

using namespace gbe;

struct GenBatchbuffer {
  drm_intel_bo *buffer;
  drm_intel_context *ctx;
  dri_bufmgr *bufmgr;
  uint32_t size;
  uint8_t *map;
  uint8_t *ptr;
  /** HSW: can't set LRI in batch buffer, set I915_EXEC_ENABLE_SLM
  *  flag when call exec. */
  uint8_t enable_slm;
  GenBatchbuffer(dri_bufmgr *bufmgr, drm_intel_context *context, size_t sz);
  ~GenBatchbuffer(void);
  void emitReloc(dri_bo *bo, uint32_t read_domains,
                 uint32_t write_domains, uint32_t delta);
  bool flush(void);

  uint32_t getSpace(void)
  {
    GBE_ASSERT(this->ptr);
    return this->size - (this->ptr - this->map);
  }

  void emitDword(uint32_t x)
  {
    GBE_ASSERT(getSpace() >= 4);
    *(uint32_t*)this->ptr = x;
    this->ptr += 4;
  }

  void requireSpace(uint32_t size)
  {
    GBE_ASSERT(size < this->size - 8);
    if (getSpace() < size)
      GBE_ASSERT(0);
  }

  uint8_t* allocSpace(uint32_t size)
  {
    assert(getSpace() >= size);
    uint8_t *space_ptr = this->ptr;
    this->ptr += size;
    return space_ptr;
  }
};

#define BEGIN_BATCH(b, n) do {                                            \
  b->requireSpace((n) * 4);                                               \
} while (0)

#define OUT_BATCH(b, d) do {                                              \
  b->emitDword(d);                                                        \
} while (0)

#define OUT_RELOC(b, bo, read_domains, write_domain, delta) do {          \
  GBE_ASSERT((delta) >= 0);                                               \
  b->emitReloc(bo, read_domains, write_domain, delta);                    \
} while (0)

#define ADVANCE_BATCH(b) do { } while (0)


struct GenGPUState {
  static const int max_buf_n = 128;
  static const int max_img_n = 128;
  static const int max_sampler_n = 16;

  dri_bufmgr *bufmgr;
  drm_intel_context *ctx;
  int device_id;
  GenBatchbuffer* batchbuf;

  size_t global_wk_sz[3];
  struct {
    drm_intel_bo *bo;
  } stack_b;
  struct {
    drm_intel_bo *bo;
  } scratch_b;
  struct {
    drm_intel_bo *bo;
  } constant_b;
  struct {
    drm_intel_bo *bo;
  } time_stamp_b;   /* time stamp buffer */
  struct {
    drm_intel_bo *bo;
  } printf_b;       /* the printf buf and index buf*/
  struct {
    drm_intel_bo *bo;
  } profiling_b;    /* the buf for profiling*/
  struct {
    drm_intel_bo *bo;
  } aux_buf;
  struct {
    uint32_t surface_heap_offset;
    uint32_t curbe_offset;
    uint32_t idrt_offset;
    uint32_t sampler_state_offset;
    uint32_t sampler_border_color_state_offset;
  } aux_offset;

  uint32_t per_thread_scratch;
  struct {
    uint32_t num_cs_entries;
    uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
  } curb;

  drm_intel_bo *binded_buf[max_buf_n];  /* all buffers binded for the call */
  uint32_t target_buf_offset[max_buf_n];/* internal offset for buffers binded for the call */
  uint32_t binded_offset[max_buf_n];    /* their offsets in the curbe buffer */
  uint32_t binded_n;                    /* number of buffers binded */

  unsigned long img_bitmap;             /* image usage bitmap. */
  unsigned int img_index_base;          /* base index for image surface.*/

  unsigned long sampler_bitmap;         /* sampler usage bitmap. */

  uint32_t max_threads;                 /* max threads requested by the user */

  GenGPUState(dri_bufmgr *bufmgr, drm_intel_context *ctx, int device_id);
  ~GenGPUState(void);
  void newBatchbuf(size_t sz)
  {
    this->batchbuf = GBE_NEW(GenBatchbuffer, bufmgr, ctx, sz);
  }

  void sync(void);
  void bindBuf(drm_intel_bo *buf, uint32_t offset, uint32_t internal_offset,
               size_t size, uint8_t bti);
  void setStack(uint32_t offset, uint32_t size, uint8_t bti);
  bool stateInit(uint32_t max_threads, uint32_t size_cs_entry, int profiling);
  bool allocConstantBuffer(uint32_t size, uint8_t bti);
  void batchStart(uint32_t use_slm);
  void walker(uint32_t simd_sz, uint32_t thread_n, const size_t global_wk_off[3],
              const size_t global_wk_sz[3], const size_t local_wk_sz[3]);
  void bindSamplers(uint32_t *samplers, size_t sampler_sz);
  bool uploadCurbes(const void* data, uint32_t size, uint32_t thread_n, uint32_t curbe_sz);
  void writeTimestamp(int idx);
  int getCurbeSize(void);

  virtual void selectPipeline(void) = 0;
  virtual uint32_t getCacheCtrl(void) = 0;
  virtual void setBaseAddress(void) = 0;
  virtual void setupBTI(drm_intel_bo *buf, uint32_t internal_offset,
                        size_t size, unsigned char index, uint32_t format) = 0;
  virtual void setL3(uint32_t use_slm) = 0;
  virtual void pipeControl(void) = 0;
  virtual void loadIdrt(void) = 0;
  virtual void buildIdrt(drm_intel_bo *ker_bo, uint32_t curbe_sz, uint32_t use_slm,
                         uint32_t slm_sz, uint32_t thread_n) = 0;
  virtual void insertSampler(uint32_t index, uint32_t clk_sampler) = 0;
  virtual void loadVfeState(void) = 0;
  virtual void loadCurbeBuffer(void) = 0;
  virtual uint32_t getScratchIndex(uint32_t size) = 0;
  virtual void bindImage(uint32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset,
                         uint32_t format, cl_mem_object_type type, uint32_t bpp,
                         int32_t w, int32_t h, int32_t depth, int32_t pitch,
                         int32_t slice_pitch, int32_t tiling) = 0;
  virtual void postAction(int32_t flush_mode) = 0;
};

struct Gen7GPUState : public GenGPUState {
  virtual void selectPipeline(void);
  virtual uint32_t getCacheCtrl(void);
  virtual void setBaseAddress(void);
  virtual void setupBTI(drm_intel_bo *buf, uint32_t internal_offset,
                        size_t size, unsigned char index, uint32_t format);
  virtual void setL3(uint32_t use_slm);
  virtual void pipeControl(void);
  virtual void loadIdrt(void);
  virtual void buildIdrt(drm_intel_bo *ker_bo, uint32_t curbe_sz, uint32_t use_slm,
                         uint32_t slm_sz, uint32_t thread_n);
  virtual void insertSampler(uint32_t index, uint32_t clk_sampler);
  virtual void loadVfeState(void);
  virtual void loadCurbeBuffer(void);
  virtual uint32_t getScratchIndex(uint32_t size);
  virtual void bindImage(uint32_t index, dri_bo* obj_bo, uint32_t obj_bo_offset,
                         uint32_t format, cl_mem_object_type type, uint32_t bpp,
                         int32_t w, int32_t h, int32_t depth, int32_t pitch,
                         int32_t slice_pitch, int32_t tiling) = 0;
};

#endif /* __CL_GEN_GPU_STATE_H__ */