diff options
author | Lang Hames <lhames@gmail.com> | 2016-02-10 01:02:33 +0000 |
---|---|---|
committer | Lang Hames <lhames@gmail.com> | 2016-02-10 01:02:33 +0000 |
commit | db7e3d1e5c95f0045c288c742b6b113b8b8d361c (patch) | |
tree | d93103dd1629e675d40c1850eec001dc950d16ad | |
parent | 7c9f178cf7068dcad3bb012718a9bd91e9fef078 (diff) |
[Orc] Add lazy-JITting support for i386.
This patch adds a new class, OrcI386, which contains the hooks needed to
support lazy-JITing on i386 (currently only for Pentium 2 or above, as the JIT
re-entry code uses the FXSAVE/FXRSTOR instructions).
Support for i386 is enabled in the LLI lazy JIT and the Orc C API, and
regression and unit tests are enabled for this architecture.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260338 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h | 36 | ||||
-rw-r--r-- | lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp | 125 | ||||
-rw-r--r-- | lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp | 11 | ||||
-rw-r--r-- | test/ExecutionEngine/OrcLazy/lit.local.cfg | 2 | ||||
-rw-r--r-- | tools/lli/OrcLazyJIT.cpp | 11 | ||||
-rw-r--r-- | unittests/ExecutionEngine/Orc/OrcTestCommon.h | 4 |
6 files changed, 187 insertions, 2 deletions
diff --git a/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h b/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h index abf392b5b01..241139ef987 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h +++ b/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h @@ -145,6 +145,42 @@ public: void *InitialPtrVal); }; +/// @brief I386 support. +/// +/// I386 supports lazy JITing. +class OrcI386 { +public: + static const unsigned PointerSize = 4; + static const unsigned TrampolineSize = 8; + static const unsigned ResolverCodeSize = 0x66; + + typedef GenericIndirectStubsInfo<8> IndirectStubsInfo; + + typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId); + + /// @brief Write the resolver code into the given memory. The user is be + /// responsible for allocating the memory and setting permissions. + static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry, + void *CallbackMgr); + + /// @brief Write the requsted number of trampolines into the given memory, + /// which must be big enough to hold 1 pointer, plus NumTrampolines + /// trampolines. + static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + unsigned NumTrampolines); + + /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to + /// the nearest page size. + /// + /// E.g. Asking for 4 stubs on i386, where stubs are 8-bytes, with 4k + /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 + /// will return a block of 1024 (2-pages worth). + static std::error_code emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, + void *InitialPtrVal); +}; + + } // End namespace orc. } // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp b/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp index 606b5616f59..c9b73114e37 100644 --- a/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp +++ b/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp @@ -165,5 +165,130 @@ std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, return std::error_code(); } +void OrcI386::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn, + void *CallbackMgr) { + + const uint8_t ResolverCode[] = { + // resolver_entry: + 0x55, // 0x00: pushl %ebp + 0x89, 0xe5, // 0x01: movl %esp, %ebp + 0x50, // 0x03: pushl %eax + 0x53, // 0x04: pushl %ebx + 0x51, // 0x05: pushl %ecx + 0x52, // 0x06: pushl %edx + 0x56, // 0x07: pushl %esi + 0x57, // 0x08: pushl %edi + 0x81, 0xec, 0x1C, 0x02, 0x00, 0x00, // 0x09: subl $0x21C, %esp + 0x0f, 0xae, 0x44, 0x24, 0x10, // 0x0f: fxsave 0x10(%esp) + 0x8b, 0x75, 0x04, // 0x14: movl 0x4(%ebp), %esi + 0x83, 0xee, 0x05, // 0x17: subl $0x5, %esi + 0x89, 0x74, 0x24, 0x04, // 0x1a: movl %esi, 0x4(%esp) + 0xc7, 0x04, 0x24, 0x00, 0x00, 0x00, 0x00, // 0x1e: movl <cbmgr>, (%esp) + 0xb8, 0x00, 0x00, 0x00, 0x00, // 0x25: movl <reentry>, %eax + 0xff, 0xd0, // 0x2a: calll *%eax + 0x89, 0x45, 0x04, // 0x2c: movl %eax, 0x4(%ebp) + 0x0f, 0xae, 0x4c, 0x24, 0x10, // 0x2f: fxrstor 0x10(%esp) + 0x81, 0xc4, 0x1c, 0x02, 0x00, 0x00, // 0x34: addl $0x21C, %esp + 0x5f, // 0x3a: popl %edi + 0x5e, // 0x3b: popl %esi + 0x5a, // 0x3c: popl %edx + 0x59, // 0x3d: popl %ecx + 0x5b, // 0x3e: popl %ebx + 0x58, // 0x3f: popl %eax + 0x5d, // 0x40: popl %ebp + 0xc3 // 0x41: retl + }; + + const unsigned ReentryFnAddrOffset = 0x26; + const unsigned CallbackMgrAddrOffset = 0x21; + + memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode)); + memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn)); + memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr, + sizeof(CallbackMgr)); +} + +void OrcI386::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr, + unsigned NumTrampolines) { + + uint64_t CallRelImm = 0xF1C4C400000000e8; + uint64_t Resolver = reinterpret_cast<uint64_t>(ResolverAddr); + uint64_t ResolverRel = + Resolver - reinterpret_cast<uint64_t>(TrampolineMem) - 5; + + uint64_t *Trampolines = reinterpret_cast<uint64_t*>(TrampolineMem); + for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize) + Trampolines[I] = CallRelImm | (ResolverRel << 8); +} + +std::error_code OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, + void *InitialPtrVal) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // jmpq *ptr1 + // .byte 0xC4 ; <- Invalid opcode padding. + // .byte 0xF1 + // stub2: + // jmpq *ptr2 + // + // ... + // + // .section __orc_ptrs + // ptr1: + // .quad 0x0 + // ptr2: + // .quad 0x0 + // + // ... + + const unsigned StubSize = IndirectStubsInfo::StubSize; + + // Emit at least MinStubs, rounded up to fill the pages allocated. + unsigned PageSize = sys::Process::getPageSize(); + unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; + unsigned NumStubs = (NumPages * PageSize) / StubSize; + + // Allocate memory for stubs and pointers in one call. + std::error_code EC; + auto StubsMem = + sys::OwningMemoryBlock( + sys::Memory::allocateMappedMemory(2 * NumPages * PageSize, nullptr, + sys::Memory::MF_READ | + sys::Memory::MF_WRITE, + EC)); + + if (EC) + return EC; + + // Create separate MemoryBlocks representing the stubs and pointers. + sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize); + sys::MemoryBlock PtrsBlock(static_cast<char*>(StubsMem.base()) + + NumPages * PageSize, + NumPages * PageSize); + + // Populate the stubs page stubs and mark it executable. + uint64_t *Stub = reinterpret_cast<uint64_t*>(StubsBlock.base()); + uint64_t PtrAddr = reinterpret_cast<uint64_t>(PtrsBlock.base()); + for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4) + Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16); + + if (auto EC = sys::Memory::protectMappedMemory(StubsBlock, + sys::Memory::MF_READ | + sys::Memory::MF_EXEC)) + return EC; + + // Initialize all pointers to point at FailureAddress. + void **Ptr = reinterpret_cast<void**>(PtrsBlock.base()); + for (unsigned I = 0; I < NumStubs; ++I) + Ptr[I] = InitialPtrVal; + + StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem)); + + return std::error_code(); +} + } // End namespace orc. } // End namespace llvm. diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp index 956daae372d..412e605fc0b 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp @@ -22,6 +22,11 @@ OrcCBindingsStack::createCompileCallbackMgr(Triple T) { switch (T.getArch()) { default: return nullptr; + case Triple::x86: { + typedef orc::LocalJITCompileCallbackManager<orc::OrcI386> CCMgrT; + return llvm::make_unique<CCMgrT>(0); + }; + case Triple::x86_64: { typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT; return llvm::make_unique<CCMgrT>(0); @@ -34,6 +39,12 @@ OrcCBindingsStack::createIndirectStubsMgrBuilder(Triple T) { switch (T.getArch()) { default: return nullptr; + case Triple::x86: + return [](){ + return llvm::make_unique< + orc::LocalIndirectStubsManager<orc::OrcI386>>(); + }; + case Triple::x86_64: return [](){ return llvm::make_unique< diff --git a/test/ExecutionEngine/OrcLazy/lit.local.cfg b/test/ExecutionEngine/OrcLazy/lit.local.cfg index bf63c4f73ed..aa213e4ca20 100644 --- a/test/ExecutionEngine/OrcLazy/lit.local.cfg +++ b/test/ExecutionEngine/OrcLazy/lit.local.cfg @@ -1,2 +1,2 @@ -if config.root.host_arch not in ['x86_64']: +if config.root.host_arch not in ['i386', 'x86', 'x86_64', 'AMD64']: config.unsupported = True diff --git a/tools/lli/OrcLazyJIT.cpp b/tools/lli/OrcLazyJIT.cpp index 7f483f742b8..fee3c72d643 100644 --- a/tools/lli/OrcLazyJIT.cpp +++ b/tools/lli/OrcLazyJIT.cpp @@ -51,6 +51,11 @@ OrcLazyJIT::createCompileCallbackMgr(Triple T) { switch (T.getArch()) { default: return nullptr; + case Triple::x86: { + typedef orc::LocalJITCompileCallbackManager<orc::OrcI386> CCMgrT; + return llvm::make_unique<CCMgrT>(0); + } + case Triple::x86_64: { typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT; return llvm::make_unique<CCMgrT>(0); @@ -63,6 +68,12 @@ OrcLazyJIT::createIndirectStubsMgrBuilder(Triple T) { switch (T.getArch()) { default: return nullptr; + case Triple::x86: + return [](){ + return llvm::make_unique< + orc::LocalIndirectStubsManager<orc::OrcI386>>(); + }; + case Triple::x86_64: return [](){ return llvm::make_unique< diff --git a/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/unittests/ExecutionEngine/Orc/OrcTestCommon.h index 5cc7071a866..e8fab561564 100644 --- a/unittests/ExecutionEngine/Orc/OrcTestCommon.h +++ b/unittests/ExecutionEngine/Orc/OrcTestCommon.h @@ -46,7 +46,9 @@ public: if (TM) { // If we found a TargetMachine, check that it's one that Orc supports. const Triple& TT = TM->getTargetTriple(); - if (TT.getArch() != Triple::x86_64 || TT.isOSWindows()) + + if ((TT.getArch() != Triple::x86_64 && TT.getArch() != Triple::x86) || + TT.isOSWindows()) TM = nullptr; } }; |