__kernel void mul(__global int * out, int arg0, int arg1) { out[0] = arg0 * arg1; }