summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo Xionghu <xionghu.luo@intel.com>2014-09-16 11:24:48 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-09-16 16:04:53 +0800
commit565d1eb00d9a5219c2848b3674e40ac07cb48b89 (patch)
treef08255eee310ae960d34df207570d820d4a2ed56
parentd17f7739c5d5db38498297103c0b8bf32ce469a3 (diff)
improve the build performance of vector type built-in function.
this patch was lost during the libocl merge. resubmit it to improve the vector function performance. please refer to e2db890596eea0a6eb741e11e576a38952f1ed1e for detail. Signed-off-by: Luo Xionghu <xionghu.luo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rwxr-xr-xbackend/src/libocl/script/gen_vector.py45
1 files changed, 39 insertions, 6 deletions
diff --git a/backend/src/libocl/script/gen_vector.py b/backend/src/libocl/script/gen_vector.py
index a91dfcf..de28552 100755
--- a/backend/src/libocl/script/gen_vector.py
+++ b/backend/src/libocl/script/gen_vector.py
@@ -289,9 +289,42 @@ class builtinProto():
formatStr += ';'
self.append(formatStr)
return formatStr
- formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1]))
- self.indent = len(formatStr)
- for j in range(0, vtype[1]):
+ if self.functionName != 'select' and ptypeSeqs[0] == ptypeSeqs[self.paramCount-1] and ptype[1] > 4:
+ formatStr += '\n{ \n union{'
+ formatStr = self.append(formatStr, ' {0} va[{1}];'.format(vtype[0], vtype[1]))
+ formatStr = self.append(formatStr, ' {0}{1} vv{2};'.format(vtype[0], vtype[1], vtype[1]))
+ formatStr += '\n }uret;'
+ formatStr += '\n union{'
+ formatStr = self.append(formatStr, ' {0} pa[{1}];'.format(ptype[0], ptype[1]))
+ formatStr = self.append(formatStr, ' {0}{1} pv{2};'.format(ptype[0], ptype[1], ptype[1]))
+ formatStr += '\n }'
+ for n in range(0, self.paramCount):
+ formatStr += 'usrc{0}'.format(n)
+ if n+1 != self.paramCount:
+ formatStr +=', '
+ formatStr += ';'
+
+ for n in range(0, self.paramCount):
+ formatStr = self.append(formatStr, ' usrc{0}.pv{1} = param{2};'.format(n, ptype[1], n))
+ formatStr = self.append(formatStr, ' for(int i =0; i < {0}; i++)'.format(ptype[1]))
+ formatStr += '\n uret.va[i] = '
+ if self.prefix == 'relational' and self.functionName != 'bitselect' and self.functionName != 'select':
+ formatStr += '-'
+ formatStr += '{0}('.format(self.functionName)
+
+ for n in range(0, self.paramCount):
+ formatStr += 'usrc{0}.pa[i]'.format(n)
+ if n+1 != self.paramCount:
+ formatStr +=', '
+ formatStr += ');'
+ formatStr = self.append(formatStr, ' return uret.vv{0};'.format(vtype[1]))
+ formatStr += '\n}'
+ formatStr = self.append(formatStr)
+ return formatStr
+ else:
+ formatStr = self.append(formatStr, '{{return ({0}{1})('.format(vtype[0], vtype[1]))
+ self.indent = len(formatStr)
+ for j in range(0, vtype[1]):
if (j != 0):
formatStr += ','
if (j + 1) % 2 == 0:
@@ -326,10 +359,10 @@ class builtinProto():
formatStr += ')'
- formatStr += '); }\n'
- self.append(formatStr)
+ formatStr += '); }\n'
+ self.append(formatStr)
- return formatStr
+ return formatStr
def output(self):
for line in self.outputStr: