1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
/*===-- llvm-c/Disassembler.h - Disassembler Public C Interface ---*- C -*-===*\
|* *|
|* The LLVM Compiler Infrastructure *|
|* *|
|* This file is distributed under the University of Illinois Open Source *|
|* License. See LICENSE.TXT for details. *|
|* *|
|*===----------------------------------------------------------------------===*|
|* *|
|* This header provides a public interface to a disassembler library. *|
|* LLVM provides an implementation of this interface. *|
|* *|
\*===----------------------------------------------------------------------===*/
#ifndef LLVM_C_DISASSEMBLER_H
#define LLVM_C_DISASSEMBLER_H
#include "llvm/Support/DataTypes.h"
#include <stddef.h>
/**
* @defgroup LLVMCDisassembler Disassembler
* @ingroup LLVMC
*
* @{
*/
/**
* An opaque reference to a disassembler context.
*/
typedef void *LLVMDisasmContextRef;
/**
* The type for the operand information call back function. This is called to
* get the symbolic information for an operand of an instruction. Typically
* this is from the relocation information, symbol table, etc. That block of
* information is saved when the disassembler context is created and passed to
* the call back in the DisInfo parameter. The instruction containing operand
* is at the PC parameter. For some instruction sets, there can be more than
* one operand with symbolic information. To determine the symbolic operand
* information for each operand, the bytes for the specific operand in the
* instruction are specified by the Offset parameter and its byte widith is the
* size parameter. For instructions sets with fixed widths and one symbolic
* operand per instruction, the Offset parameter will be zero and Size parameter
* will be the instruction width. The information is returned in TagBuf and is
* Triple specific with its specific information defined by the value of
* TagType for that Triple. If symbolic information is returned the function
* returns 1, otherwise it returns 0.
*/
typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC,
uint64_t Offset, uint64_t Size,
int TagType, void *TagBuf);
/**
* The initial support in LLVM MC for the most general form of a relocatable
* expression is "AddSymbol - SubtractSymbol + Offset". For some Darwin targets
* this full form is encoded in the relocation information so that AddSymbol and
* SubtractSymbol can be link edited independent of each other. Many other
* platforms only allow a relocatable expression of the form AddSymbol + Offset
* to be encoded.
*
* The LLVMOpInfoCallback() for the TagType value of 1 uses the struct
* LLVMOpInfo1. The value of the relocatable expression for the operand,
* including any PC adjustment, is passed in to the call back in the Value
* field. The symbolic information about the operand is returned using all
* the fields of the structure with the Offset of the relocatable expression
* returned in the Value field. It is possible that some symbols in the
* relocatable expression were assembly temporary symbols, for example
* "Ldata - LpicBase + constant", and only the Values of the symbols without
* symbol names are present in the relocation information. The VariantKind
* type is one of the Target specific #defines below and is used to print
* operands like "_foo@GOT", ":lower16:_foo", etc.
*/
struct LLVMOpInfoSymbol1 {
uint64_t Present; /* 1 if this symbol is present */
const char *Name; /* symbol name if not NULL */
uint64_t Value; /* symbol value if name is NULL */
};
struct LLVMOpInfo1 {
struct LLVMOpInfoSymbol1 AddSymbol;
struct LLVMOpInfoSymbol1 SubtractSymbol;
uint64_t Value;
uint64_t VariantKind;
};
/**
* The operand VariantKinds for symbolic disassembly.
*/
#define LLVMDisassembler_VariantKind_None 0 /* all targets */
/**
* The ARM target VariantKinds.
*/
#define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */
#define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */
/**
* The type for the symbol lookup function. This may be called by the
* disassembler for things like adding a comment for a PC plus a constant
* offset load instruction to use a symbol name instead of a load address value.
* It is passed the block information is saved when the disassembler context is
* created and the ReferenceValue to look up as a symbol. If no symbol is found
* for the ReferenceValue NULL is returned. The ReferenceType of the
* instruction is passed indirectly as is the PC of the instruction in
* ReferencePC. If the output reference can be determined its type is returned
* indirectly in ReferenceType along with ReferenceName if any, or that is set
* to NULL.
*/
typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo,
uint64_t ReferenceValue,
uint64_t *ReferenceType,
uint64_t ReferencePC,
const char **ReferenceName);
/**
* The reference types on input and output.
*/
/* No input reference type or no output reference type. */
#define LLVMDisassembler_ReferenceType_InOut_None 0
/* The input reference is from a branch instruction. */
#define LLVMDisassembler_ReferenceType_In_Branch 1
/* The input reference is from a PC relative load instruction. */
#define LLVMDisassembler_ReferenceType_In_PCrel_Load 2
/* The output reference is to as symbol stub. */
#define LLVMDisassembler_ReferenceType_Out_SymbolStub 1
/* The output reference is to a symbol address in a literal pool. */
#define LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr 2
/* The output reference is to a cstring address in a literal pool. */
#define LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr 3
#ifdef __cplusplus
extern "C" {
#endif /* !defined(__cplusplus) */
/**
* Create a disassembler for the TripleName. Symbolic disassembly is supported
* by passing a block of information in the DisInfo parameter and specifying the
* TagType and callback functions as described above. These can all be passed
* as NULL. If successful, this returns a disassembler context. If not, it
* returns NULL.
*/
LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
int TagType, LLVMOpInfoCallback GetOpInfo,
LLVMSymbolLookupCallback SymbolLookUp);
/**
* Dispose of a disassembler context.
*/
void LLVMDisasmDispose(LLVMDisasmContextRef DC);
/**
* Disassemble a single instruction using the disassembler context specified in
* the parameter DC. The bytes of the instruction are specified in the
* parameter Bytes, and contains at least BytesSize number of bytes. The
* instruction is at the address specified by the PC parameter. If a valid
* instruction can be disassembled, its string is returned indirectly in
* OutString whose size is specified in the parameter OutStringSize. This
* function returns the number of bytes in the instruction or zero if there was
* no valid instruction.
*/
size_t LLVMDisasmInstruction(LLVMDisasmContextRef DC, uint8_t *Bytes,
uint64_t BytesSize, uint64_t PC,
char *OutString, size_t OutStringSize);
/**
* @}
*/
#ifdef __cplusplus
}
#endif /* !defined(__cplusplus) */
#endif /* !defined(LLVM_C_DISASSEMBLER_H) */
|