1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
#ifndef __CPU_KERNEL_H__
#define __CPU_KERNEL_H__
#include "../deviceinterface.h"
#include "config.h"
#include <llvm/ExecutionEngine/GenericValue.h>
#include <vector>
#include <string>
#include <pthread.h>
namespace llvm
{
class Function;
}
namespace Coal
{
class CPUDevice;
class Kernel;
class KernelEvent;
class CPUKernel : public DeviceKernel
{
public:
CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function);
~CPUKernel();
size_t workGroupSize() const;
cl_ulong localMemSize() const;
cl_ulong privateMemSize() const;
size_t preferredWorkGroupSizeMultiple() const;
size_t guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
size_t global_work_size) const;
Kernel *kernel() const;
CPUDevice *device() const;
llvm::Function *function() const;
llvm::Function *callFunction(std::vector<void *> &freeLocal);
private:
CPUDevice *p_device;
Kernel *p_kernel;
llvm::Function *p_function, *p_call_function;
pthread_mutex_t p_call_function_mutex;
};
class CPUKernelEvent;
class CPUKernelWorkGroup
{
public:
CPUKernelWorkGroup(CPUKernel *kernel, KernelEvent *event,
CPUKernelEvent *cpu_event,
const size_t *work_group_index);
~CPUKernelWorkGroup();
bool run();
// Native functions
size_t getGlobalId(cl_uint dimindx) const;
cl_uint getWorkDim() const;
size_t getGlobalSize(cl_uint dimindx) const;
size_t getLocalSize(cl_uint dimindx) const;
size_t getLocalID(cl_uint dimindx) const;
size_t getNumGroups(cl_uint dimindx) const;
size_t getGroupID(cl_uint dimindx) const;
size_t getGlobalOffset(cl_uint dimindx) const;
void builtinNotFound(const std::string &name) const;
private:
CPUKernel *p_kernel;
CPUKernelEvent *p_cpu_event;
KernelEvent *p_event;
cl_uint p_work_dim;
size_t p_index[MAX_WORK_DIMS],
p_current[MAX_WORK_DIMS],
p_maxs[MAX_WORK_DIMS],
p_global_id[MAX_WORK_DIMS];
};
class CPUKernelEvent
{
public:
CPUKernelEvent(CPUDevice *device, KernelEvent *event);
~CPUKernelEvent();
bool reserve(); /*!< The next Work Group that will execute will be the last. Locks the event */
bool finished(); /*!< All the work groups have finished */
CPUKernelWorkGroup *takeInstance(); /*!< Must be called exactly one time after reserve(). Unlocks the event */
void workGroupFinished();
private:
CPUDevice *p_device;
KernelEvent *p_event;
size_t p_current_work_group[MAX_WORK_DIMS],
p_max_work_groups[MAX_WORK_DIMS];
size_t p_current_wg, p_finished_wg, p_num_wg;
pthread_mutex_t p_mutex;
};
}
void setThreadLocalWorkGroup(Coal::CPUKernelWorkGroup *current);
void *getBuiltin(const std::string &name);
#endif
|