summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValery Pykhtin <Valery.Pykhtin@amd.com>2016-04-06 15:55:10 +0000
committerValery Pykhtin <Valery.Pykhtin@amd.com>2016-04-06 15:55:10 +0000
commit496db860118a1e6d0286e3a8c59f857496228f74 (patch)
tree7023b2a1386921befb88378243d6df86160e2984
parent505761b9d2be7bb4fdf64227af023fbb8b0b7245 (diff)
[AMDGPU] llvm-objdump: Minimal HSA Code Object disassembler support.
Differential revision: http://reviews.llvm.org/D16998 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265550 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--test/Object/AMDGPU/objdump.s75
-rw-r--r--tools/llvm-objdump/llvm-objdump.cpp41
2 files changed, 116 insertions, 0 deletions
diff --git a/test/Object/AMDGPU/objdump.s b/test/Object/AMDGPU/objdump.s
new file mode 100644
index 00000000000..997bcb1f325
--- /dev/null
+++ b/test/Object/AMDGPU/objdump.s
@@ -0,0 +1,75 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga %s -filetype=obj | llvm-objdump -disassemble -arch-name=amdgcn -mcpu=tonga - | FileCheck %s
+
+ .text
+
+ .amdgpu_hsa_kernel hello_world
+hello_world:
+ .amd_kernel_code_t
+ .end_amd_kernel_code_t
+
+ s_mov_b32 m0, 0x10000
+ s_load_dwordx2 s[0:1], s[4:5], 0x8
+ s_waitcnt lgkmcnt(0)
+ s_add_u32 s0, s7, s0
+ v_add_i32_e32 v1, vcc, s0, v1
+ s_movk_i32 s0, 0x483
+ v_cmp_ge_i32_e32 vcc, s0, v0
+ s_and_saveexec_b64 s[0:1], vcc
+ v_lshlrev_b32_e32 v4, 2, v0
+ s_cbranch_execz 21
+ s_mov_b64 s[2:3], exec
+ s_mov_b64 s[10:11], exec
+ v_mov_b32_e32 v3, v0
+ s_endpgm
+
+ .amdgpu_hsa_kernel hello_world2
+hello_world2:
+ .amd_kernel_code_t
+ .end_amd_kernel_code_t
+
+ s_and_saveexec_b64 s[0:1], vcc
+ s_cbranch_execz 85
+ s_load_dwordx4 s[8:11], s[4:5], 0x40
+ v_ashrrev_i32_e32 v77, 31, v76
+ v_lshlrev_b64 v[10:11], 2, v[76:77]
+ s_waitcnt lgkmcnt(0)
+ v_add_i32_e32 v10, vcc, s8, v10
+ v_mov_b32_e32 v6, s9
+ v_addc_u32_e32 v11, vcc, v6, v11, vcc
+ flat_load_dword v0, v[10:11]
+ v_lshlrev_b32_e32 v6, 5, v8
+ v_lshlrev_b32_e32 v7, 2, v7
+ s_endpgm
+
+// CHECK: file format ELF64-amdgpu-hsacobj
+// CHECK: Disassembly of section .hsatext:
+// CHECK: hello_world:
+// CHECK: s_mov_b32 m0, 0x10000 // 000000000100: BEFC00FF 00010000
+// CHECK: s_load_dwordx2 s[0:1], s[4:5], 0x8 // 000000000108: C0060002 00000008
+// CHECK: s_waitcnt lgkmcnt(0) // 000000000110: BF8C007F
+// CHECK: s_add_u32 s0, s7, s0 // 000000000114: 80000007
+// CHECK: v_add_i32_e32 v1, vcc, s0, v1 // 000000000118: 32020200
+// CHECK: s_movk_i32 s0, 0x483 // 00000000011C: B0000483
+// CHECK: v_cmp_ge_i32_e32 vcc, s0, v0 // 000000000120: 7D8C0000
+// CHECK: s_and_saveexec_b64 s[0:1], vcc // 000000000124: BE80206A
+// CHECK: v_lshlrev_b32_e32 v4, 2, v0 // 000000000128: 24080082
+// CHECK: s_cbranch_execz 21 // 00000000012C: BF880015
+// CHECK: s_mov_b64 s[2:3], exec // 000000000130: BE82017E
+// CHECK: s_mov_b64 s[10:11], exec // 000000000134: BE8A017E
+// CHECK: v_mov_b32_e32 v3, v0 // 000000000138: 7E060300
+// CHECK: s_endpgm // 00000000013C: BF810000
+
+// CHECK: hello_world2:
+// CHECK: s_and_saveexec_b64 s[0:1], vcc // 000000000240: BE80206A
+// CHECK: s_cbranch_execz 85 // 000000000244: BF880055
+// CHECK: s_load_dwordx4 s[8:11], s[4:5], 0x40 // 000000000248: C00A0202 00000040
+// CHECK: v_ashrrev_i32_e32 v77, 31, v76 // 000000000250: 229A989F
+// CHECK: v_lshlrev_b64 v[10:11], 2, v[76:77] // 000000000254: D28F000A 00029882
+// CHECK: s_waitcnt lgkmcnt(0) // 00000000025C: BF8C007F
+// CHECK: v_add_i32_e32 v10, vcc, s8, v10 // 000000000260: 32141408
+// CHECK: v_mov_b32_e32 v6, s9 // 000000000264: 7E0C0209
+// CHECK: v_addc_u32_e32 v11, vcc, v6, v11, vcc // 000000000268: 38161706
+// CHECK: flat_load_dword v0, v[10:11] // 00000000026C: DC500000 0000000A
+// CHECK: v_lshlrev_b32_e32 v6, 5, v8 // 000000000274: 240C1085
+// CHECK: v_lshlrev_b32_e32 v7, 2, v7 // 000000000278: 240E0E82
+// CHECK: s_endpgm // 00000000027C: BF810000
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 28d667a486e..ccd98147ac3 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -384,12 +384,41 @@ public:
}
};
HexagonPrettyPrinter HexagonPrettyPrinterInst;
+
+class AMDGCNPrettyPrinter : public PrettyPrinter {
+public:
+ void printInst(MCInstPrinter &IP,
+ const MCInst *MI,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &OS,
+ StringRef Annot,
+ MCSubtargetInfo const &STI) override {
+ SmallString<40> InstStr;
+ raw_svector_ostream IS(InstStr);
+
+ IP.printInst(MI, IS, "", STI);
+
+ OS << left_justify(IS.str(), 60) << format("// %012X: ", Address);
+ typedef support::ulittle32_t U32;
+ for (auto D : makeArrayRef(reinterpret_cast<const U32*>(Bytes.data()),
+ Bytes.size() / sizeof(U32)))
+ OS << format("%08X ", D);
+
+ if (!Annot.empty())
+ OS << "// " << Annot;
+ }
+};
+AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
+
PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
switch(Triple.getArch()) {
default:
return PrettyPrinterInst;
case Triple::hexagon:
return HexagonPrettyPrinterInst;
+ case Triple::amdgcn:
+ return AMDGCNPrettyPrinterInst;
}
}
}
@@ -1046,6 +1075,18 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
if (Start >= End)
continue;
+ if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
+ // make size 4 bytes folded
+ End = Start + ((End - Start) & ~0x3ull);
+ Start += 256; // add sizeof(amd_kernel_code_t)
+ // cut trailing zeroes - up to 256 bytes (align)
+ const uint64_t EndAlign = 256;
+ const auto Limit = End - (std::min)(EndAlign, End - Start);
+ while (End > Limit &&
+ *reinterpret_cast<const support::ulittle32_t*>(&Bytes[End - 4]) == 0)
+ End -= 4;
+ }
+
outs() << '\n' << Symbols[si].second << ":\n";
#ifndef NDEBUG