summaryrefslogtreecommitdiff
path: root/src/core/cpu/kernel.h
blob: 91d1dfd46d5697c3d1894f2bf9a4449184339f1d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#ifndef __CPU_KERNEL_H__
#define __CPU_KERNEL_H__

#include "../deviceinterface.h"
#include "config.h"

#include <llvm/ExecutionEngine/GenericValue.h>
#include <vector>
#include <string>
#include <pthread.h>

namespace llvm
{
    class Function;
}

namespace Coal
{

class CPUDevice;
class Kernel;
class KernelEvent;

class CPUKernel : public DeviceKernel
{
    public:
        CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function);
        ~CPUKernel();

        size_t workGroupSize() const;
        cl_ulong localMemSize() const;
        cl_ulong privateMemSize() const;
        size_t preferredWorkGroupSizeMultiple() const;
        size_t guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
                                  size_t global_work_size) const;

        Kernel *kernel() const;
        CPUDevice *device() const;

        llvm::Function *function() const;
        llvm::Function *callFunction(std::vector<void *> &freeLocal);

    private:
        CPUDevice *p_device;
        Kernel *p_kernel;
        llvm::Function *p_function, *p_call_function;
        pthread_mutex_t p_call_function_mutex;
};

class CPUKernelEvent;

class CPUKernelWorkGroup
{
    public:
        CPUKernelWorkGroup(CPUKernel *kernel, KernelEvent *event,
                           CPUKernelEvent *cpu_event,
                           const size_t *work_group_index);
        ~CPUKernelWorkGroup();

        bool run();

        // Native functions
        size_t getGlobalId(cl_uint dimindx) const;
        cl_uint getWorkDim() const;
        size_t getGlobalSize(cl_uint dimindx) const;
        size_t getLocalSize(cl_uint dimindx) const;
        size_t getLocalID(cl_uint dimindx) const;
        size_t getNumGroups(cl_uint dimindx) const;
        size_t getGroupID(cl_uint dimindx) const;
        size_t getGlobalOffset(cl_uint dimindx) const;

        void builtinNotFound(const std::string &name) const;

    private:
        CPUKernel *p_kernel;
        CPUKernelEvent *p_cpu_event;
        KernelEvent *p_event;
        cl_uint p_work_dim;
        size_t p_index[MAX_WORK_DIMS],
               p_current[MAX_WORK_DIMS],
               p_maxs[MAX_WORK_DIMS],
               p_global_id[MAX_WORK_DIMS];
};

class CPUKernelEvent
{
    public:
        CPUKernelEvent(CPUDevice *device, KernelEvent *event);
        ~CPUKernelEvent();

        bool reserve();  /*!< The next Work Group that will execute will be the last. Locks the event */
        bool finished(); /*!< All the work groups have finished */
        CPUKernelWorkGroup *takeInstance(); /*!< Must be called exactly one time after reserve(). Unlocks the event */

        void workGroupFinished();

    private:
        CPUDevice *p_device;
        KernelEvent *p_event;
        size_t p_current_work_group[MAX_WORK_DIMS],
               p_max_work_groups[MAX_WORK_DIMS];
        size_t p_current_wg, p_finished_wg, p_num_wg;
        pthread_mutex_t p_mutex;
};

}

void setThreadLocalWorkGroup(Coal::CPUKernelWorkGroup *current);
void *getBuiltin(const std::string &name);

#endif