diff options
Diffstat (limited to 'test/CodeGen/R600/schedule1.ll')
-rw-r--r-- | test/CodeGen/R600/schedule1.ll | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/test/CodeGen/R600/schedule1.ll b/test/CodeGen/R600/schedule1.ll new file mode 100644 index 0000000000..14fad97a15 --- /dev/null +++ b/test/CodeGen/R600/schedule1.ll @@ -0,0 +1,116 @@ +;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s + + +;CHECK: MOV_SAT T{{[0-9]+\.[X]}} +;CHECK: MOV_SAT T{{[0-9]+\.[Y]}} +;CHECK: MOV_SAT T{{[0-9]+\.[Z]}} +;CHECK: MOV_SAT T{{[0-9]+\.[W]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[X]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[Y]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[Z]}} +;CHECK: MUL_IEEE T{{[0-9]+\.[W]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[X]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[Y]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[Z]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[W]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[X]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[Y]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[Z]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[W]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[X]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[Y]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[Z]}} +;CHECK: MULADD_IEEE T{{[0-9]+\.[W]}} + +define void @main() { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = call float @llvm.R600.load.input(i32 8) + %5 = call float @llvm.R600.load.input(i32 9) + %6 = call float @llvm.R600.load.input(i32 10) + %7 = call float @llvm.R600.load.input(i32 11) + %8 = load <4 x float> addrspace(9)* null + %9 = extractelement <4 x float> %8, i32 0 + %10 = fmul float %0, %9 + %11 = load <4 x float> addrspace(9)* null + %12 = extractelement <4 x float> %11, i32 1 + %13 = fmul float %0, %12 + %14 = load <4 x float> addrspace(9)* null + %15 = extractelement <4 x float> %14, i32 2 + %16 = fmul float %0, %15 + %17 = load <4 x float> addrspace(9)* null + %18 = extractelement <4 x float> %17, i32 3 + %19 = fmul float %0, %18 + %20 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %21 = extractelement <4 x float> %20, i32 0 + %22 = fmul float %1, %21 + %23 = fadd float %22, %10 + %24 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %25 = extractelement <4 x float> %24, i32 1 + %26 = fmul float %1, %25 + %27 = fadd float %26, %13 + %28 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %29 = extractelement <4 x float> %28, i32 2 + %30 = fmul float %1, %29 + %31 = fadd float %30, %16 + %32 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1) + %33 = extractelement <4 x float> %32, i32 3 + %34 = fmul float %1, %33 + %35 = fadd float %34, %19 + %36 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %37 = extractelement <4 x float> %36, i32 0 + %38 = fmul float %2, %37 + %39 = fadd float %38, %23 + %40 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %41 = extractelement <4 x float> %40, i32 1 + %42 = fmul float %2, %41 + %43 = fadd float %42, %27 + %44 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %45 = extractelement <4 x float> %44, i32 2 + %46 = fmul float %2, %45 + %47 = fadd float %46, %31 + %48 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2) + %49 = extractelement <4 x float> %48, i32 3 + %50 = fmul float %2, %49 + %51 = fadd float %50, %35 + %52 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %53 = extractelement <4 x float> %52, i32 0 + %54 = fmul float %3, %53 + %55 = fadd float %54, %39 + %56 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %57 = extractelement <4 x float> %56, i32 1 + %58 = fmul float %3, %57 + %59 = fadd float %58, %43 + %60 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %61 = extractelement <4 x float> %60, i32 2 + %62 = fmul float %3, %61 + %63 = fadd float %62, %47 + %64 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3) + %65 = extractelement <4 x float> %64, i32 3 + %66 = fmul float %3, %65 + %67 = fadd float %66, %51 + %68 = call float @llvm.AMDIL.clamp.(float %4, float 0.000000e+00, float 1.000000e+00) + %69 = call float @llvm.AMDIL.clamp.(float %5, float 0.000000e+00, float 1.000000e+00) + %70 = call float @llvm.AMDIL.clamp.(float %6, float 0.000000e+00, float 1.000000e+00) + %71 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00) + %72 = insertelement <4 x float> undef, float %55, i32 0 + %73 = insertelement <4 x float> %72, float %59, i32 1 + %74 = insertelement <4 x float> %73, float %63, i32 2 + %75 = insertelement <4 x float> %74, float %67, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %75, i32 60, i32 1) + %76 = insertelement <4 x float> undef, float %68, i32 0 + %77 = insertelement <4 x float> %76, float %69, i32 1 + %78 = insertelement <4 x float> %77, float %70, i32 2 + %79 = insertelement <4 x float> %78, float %71, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %79, i32 0, i32 2) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare float @llvm.AMDIL.clamp.(float, float, float) readnone + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) |