summaryrefslogtreecommitdiff
path: root/BealtoOpenCL/include/CLCommandQueue.h
blob: 6fde44c0820e8d3e860a2d1def78a948b678df81 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
// OpenCL command queue object
// (c) EB Sep 2009

#ifndef CLCommandQueue_h
#define CLCommandQueue_h

#include <CL/cl.h>
#include "CLBuffer.h"
#include "CLEvent.h"
#include "CLEventList.h"
#include "CLKernel.h"

namespace cl {

class CommandQueue
{
public:

  // Instances of this class are created from a Context.

  // Copy constructor
  CommandQueue(const CommandQueue & a) : mX(a.mX)
  { clRetainCommandQueue(mX); }

  // = operator
  CommandQueue & operator = (const CommandQueue & a)
  {
    if (a.mX != mX)
    {
      clReleaseCommandQueue(mX);
      mX = a.mX;
      clRetainCommandQueue(mX);
    }
    return *this;
  }

  // Destructor
  virtual ~CommandQueue()
  { clReleaseCommandQueue(mX); }

  // Commands
  // All commands return an Event instance.
  // If the call fails, the returned event is invalid.

  // Read buffer to host memory
  Event readBuffer(Buffer * b,cl_bool blocking_read,size_t offset,size_t cb,void * ptr,const EventList & wait_list = EventList())
  {
    if (b == 0) return Event(0); // Invalid
    cl_uint num_events = 0;
    const cl_event * events = 0;
    wait_list.getParams(num_events,events);
    cl_event e = 0;
    cl_int status = clEnqueueReadBuffer(mX,b->mX,blocking_read,offset,cb,ptr,num_events,events,&e);
    REPORT_OPENCL_STATUS(status);
    if (status != CL_SUCCESS) e = 0;
    return Event(e);
  }

  // Write buffer from host memory
  Event writeBuffer(Buffer * b,cl_bool blocking_write,size_t offset,size_t cb,const void * ptr,const EventList & wait_list = EventList())
  {
    if (b == 0) return Event(0); // Invalid
    cl_uint num_events = 0;
    const cl_event * events = 0;
    wait_list.getParams(num_events,events);
    cl_event e = 0;
    cl_int status = clEnqueueWriteBuffer(mX,b->mX,blocking_write,offset,cb,ptr,num_events,events,&e);
    REPORT_OPENCL_STATUS(status);
    if (status != CL_SUCCESS) e = 0;
    return Event(e);
  }

  // Copy buffers
  Event copyBuffer(Buffer * src,Buffer * dst,size_t src_offset,size_t dst_offset,size_t cb,const EventList & wait_list = EventList())
  {
    if (src == 0 || dst == 0) return Event(0); // Invalid
    cl_uint num_events = 0;
    const cl_event * events = 0;
    wait_list.getParams(num_events,events);
    cl_event e = 0;
    cl_int status = clEnqueueCopyBuffer(mX,src->mX,dst->mX,src_offset,dst_offset,cb,num_events,events,&e);
    REPORT_OPENCL_STATUS(status);
    if (status != CL_SUCCESS) e = 0;
    return Event(e);
  }

  // Map buffer.  The mapped address is put in ADDRESS.
  template <typename T> Event mapBuffer(Buffer * b,T * & address,cl_bool blocking_map,cl_map_flags map_flags,size_t offset,size_t cb,const EventList & wait_list = EventList())
  {
    if (b == 0) return Event(0);
    cl_uint num_events = 0;
    const cl_event * events = 0;
    wait_list.getParams(num_events,events);
    cl_event e = 0;
    cl_int status;
    address = (T *)clEnqueueMapBuffer(mX,b->mX,blocking_map,map_flags,offset,cb,num_events,events,&e,&status);
    REPORT_OPENCL_STATUS(status);
    if (status != CL_SUCCESS) e = 0;
    return Event(e);
  }

  // Map full buffer.  Queries its size, and maps all its contents.  The mapped address is put in ADDRESS.
  template <typename T> Event mapBuffer(Buffer * b,T * & address,cl_bool blocking_map,cl_map_flags map_flags,const EventList & wait_list = EventList())
  {
    if (b == 0) return Event(0); // Invalid buffer
    size_t sz = b->getSize();
    if (sz == 0) return Event(0); // Invalid size
    return mapBuffer<T>(b,address,blocking_map,map_flags,0,sz,wait_list);
  }

  // Unmap memory object. (Buffer, Image2D, etc.)
  Event unmapMemoryObject(MemoryObject * m,void * address,const EventList & wait_list = EventList())
  {
    if (m == 0) return Event(0);
    cl_uint num_events = 0;
    const cl_event * events = 0;
    wait_list.getParams(num_events,events);
    cl_event e = 0;
    cl_int status = clEnqueueUnmapMemObject(mX,m->mX,address,num_events,events,&e);
    REPORT_OPENCL_STATUS(status);
    if (status != CL_SUCCESS) e = 0;
    return Event(e);
  }

  // Enqueue 1D kernel execution
  // K is the kernel to run.
  // N is the total number of work items (global work size).
  // G is the number of work items inside a work group. G must divide N.
  // G can be 0, in which case the OpenCL implementation will choose the best value.
  Event execKernel1(Kernel * k,size_t n,size_t g,const EventList & wait_list = EventList())
  {
    if (k == 0) return Event(0); // Invalid
    cl_uint num_events = 0;
    const cl_event * events = 0;
    wait_list.getParams(num_events,events);
    cl_event e = 0;
    size_t * pgw = &n;
    size_t * plw = (g>0)?(&g):0;
    cl_int status = clEnqueueNDRangeKernel(mX,k->mX,1,0,pgw,plw,num_events,events,&e);
    REPORT_OPENCL_STATUS(status);
    if (status != CL_SUCCESS) e = 0;
    return Event(e);
  }

  // Enqueue 2D kernel execution
  // K is the kernel to run.
  // NX*NY is the total number of work items (global work size).
  // GX*GY is the number of work items inside a work group. G<d> must divide N<d>.
  Event execKernel2(Kernel * k,size_t nx,size_t ny,size_t gx,size_t gy,const EventList & wait_list = EventList())
  {
    if (k == 0) return Event(0); // Invalid
    cl_uint num_events = 0;
    const cl_event * events = 0;
    wait_list.getParams(num_events,events);
    cl_event e = 0;
    size_t pgw[2]; pgw[0] = nx; pgw[1] = ny;
    size_t plw[2]; plw[0] = gx; plw[1] = gy;
    cl_int status = clEnqueueNDRangeKernel(mX,k->mX,2,0,pgw,plw,num_events,events,&e);
    REPORT_OPENCL_STATUS(status);
    if (status != CL_SUCCESS) e = 0;
    return Event(e);
  }

  //
  // Execution control
  //

  // Enqueue a marker
  Event mark()
  {
    cl_event e = 0;
    cl_int status = clEnqueueMarker(mX,&e);
    REPORT_OPENCL_STATUS(status);
    if (status != CL_SUCCESS) e = 0;
    return Event(e);
  }

  // Insert a wait point for a specific list of events
  bool wait(const EventList & wait_list)
  {
    cl_uint num_events = 0;
    const cl_event * events = 0;
    wait_list.getParams(num_events,events);
    if (num_events == 0) return true; // Nop
    cl_int status = clEnqueueWaitForEvents(mX,num_events,events);
    REPORT_OPENCL_STATUS(status);
    return (status == CL_SUCCESS);
  }

  // Insert a wait point for all events. (barrier)
  bool wait()
  {
    cl_int status = clEnqueueBarrier(mX);
    REPORT_OPENCL_STATUS(status);
    return (status == CL_SUCCESS);
  }

  // Blocks until all queued commands have been submitted to the device.
  bool flush()
  {
    cl_int status = clFlush(mX);
    REPORT_OPENCL_STATUS(status);
    return (status == CL_SUCCESS);
  }

  // Blocks until all queued commands have been executed.
  bool finish()
  {
    cl_int status = clFinish(mX);
    REPORT_OPENCL_STATUS(status);
    return (status == CL_SUCCESS);
  }

private:

  // Private constructors
  CommandQueue(); // not implemented
  CommandQueue(cl_command_queue x) : mX(x) { }

  // OpenCL handle (always non 0)
  cl_command_queue mX;

  friend class Context;

}; // class CommandQueue

} // namespace

#endif // CLCommandQueue_h