diff options
author | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-09-18 19:40:39 +0000 |
---|---|---|
committer | tstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8> | 2012-09-18 19:40:39 +0000 |
commit | 15d14cb83df480dd4aab6435873ca50254891a2d (patch) | |
tree | e5ef7d77a1e7703b625d5de82d50c26e4944cdc7 /examples |
Create R600 Branch
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@164161 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'examples')
107 files changed, 11687 insertions, 0 deletions
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp new file mode 100644 index 00000000000..b002d1f496d --- /dev/null +++ b/examples/BrainF/BrainF.cpp @@ -0,0 +1,467 @@ +//===-- BrainF.cpp - BrainF compiler example ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +// This class compiles the BrainF language into LLVM assembly. +// +// The BrainF language has 8 commands: +// Command Equivalent C Action +// ------- ------------ ------ +// , *h=getchar(); Read a character from stdin, 255 on EOF +// . putchar(*h); Write a character to stdout +// - --*h; Decrement tape +// + ++*h; Increment tape +// < --h; Move head left +// > ++h; Move head right +// [ while(*h) { Start loop +// ] } End loop +// +//===--------------------------------------------------------------------===// + +#include "BrainF.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/ADT/STLExtras.h" +#include <iostream> +using namespace llvm; + +//Set the constants for naming +const char *BrainF::tapereg = "tape"; +const char *BrainF::headreg = "head"; +const char *BrainF::label = "brainf"; +const char *BrainF::testreg = "test"; + +Module *BrainF::parse(std::istream *in1, int mem, CompileFlags cf, + LLVMContext& Context) { + in = in1; + memtotal = mem; + comflag = cf; + + header(Context); + readloop(0, 0, 0, Context); + delete builder; + return module; +} + +void BrainF::header(LLVMContext& C) { + module = new Module("BrainF", C); + + //Function prototypes + + //declare void @llvm.memset.p0i8.i32(i8 *, i8, i32, i32, i1) + Type *Tys[] = { Type::getInt8PtrTy(C), Type::getInt32Ty(C) }; + Function *memset_func = Intrinsic::getDeclaration(module, Intrinsic::memset, + Tys); + + //declare i32 @getchar() + getchar_func = cast<Function>(module-> + getOrInsertFunction("getchar", IntegerType::getInt32Ty(C), NULL)); + + //declare i32 @putchar(i32) + putchar_func = cast<Function>(module-> + getOrInsertFunction("putchar", IntegerType::getInt32Ty(C), + IntegerType::getInt32Ty(C), NULL)); + + + //Function header + + //define void @brainf() + brainf_func = cast<Function>(module-> + getOrInsertFunction("brainf", Type::getVoidTy(C), NULL)); + + builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func)); + + //%arr = malloc i8, i32 %d + ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal)); + BasicBlock* BB = builder->GetInsertBlock(); + Type* IntPtrTy = IntegerType::getInt32Ty(C); + Type* Int8Ty = IntegerType::getInt8Ty(C); + Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty); + allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy); + ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem, + NULL, "arr"); + BB->getInstList().push_back(cast<Instruction>(ptr_arr)); + + //call void @llvm.memset.p0i8.i32(i8 *%arr, i8 0, i32 %d, i32 1, i1 0) + { + Value *memset_params[] = { + ptr_arr, + ConstantInt::get(C, APInt(8, 0)), + val_mem, + ConstantInt::get(C, APInt(32, 1)), + ConstantInt::get(C, APInt(1, 0)) + }; + + CallInst *memset_call = builder-> + CreateCall(memset_func, memset_params); + memset_call->setTailCall(false); + } + + //%arrmax = getelementptr i8 *%arr, i32 %d + if (comflag & flag_arraybounds) { + ptr_arrmax = builder-> + CreateGEP(ptr_arr, ConstantInt::get(C, APInt(32, memtotal)), "arrmax"); + } + + //%head.%d = getelementptr i8 *%arr, i32 %d + curhead = builder->CreateGEP(ptr_arr, + ConstantInt::get(C, APInt(32, memtotal/2)), + headreg); + + + + //Function footer + + //brainf.end: + endbb = BasicBlock::Create(C, label, brainf_func); + + //call free(i8 *%arr) + endbb->getInstList().push_back(CallInst::CreateFree(ptr_arr, endbb)); + + //ret void + ReturnInst::Create(C, endbb); + + + + //Error block for array out of bounds + if (comflag & flag_arraybounds) + { + //@aberrormsg = internal constant [%d x i8] c"\00" + Constant *msg_0 = + ConstantDataArray::getString(C, "Error: The head has left the tape.", + true); + + GlobalVariable *aberrormsg = new GlobalVariable( + *module, + msg_0->getType(), + true, + GlobalValue::InternalLinkage, + msg_0, + "aberrormsg"); + + //declare i32 @puts(i8 *) + Function *puts_func = cast<Function>(module-> + getOrInsertFunction("puts", IntegerType::getInt32Ty(C), + PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL)); + + //brainf.aberror: + aberrorbb = BasicBlock::Create(C, label, brainf_func); + + //call i32 @puts(i8 *getelementptr([%d x i8] *@aberrormsg, i32 0, i32 0)) + { + Constant *zero_32 = Constant::getNullValue(IntegerType::getInt32Ty(C)); + + Constant *gep_params[] = { + zero_32, + zero_32 + }; + + Constant *msgptr = ConstantExpr:: + getGetElementPtr(aberrormsg, gep_params); + + Value *puts_params[] = { + msgptr + }; + + CallInst *puts_call = + CallInst::Create(puts_func, + puts_params, + "", aberrorbb); + puts_call->setTailCall(false); + } + + //br label %brainf.end + BranchInst::Create(endbb, aberrorbb); + } +} + +void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb, + LLVMContext &C) { + Symbol cursym = SYM_NONE; + int curvalue = 0; + Symbol nextsym = SYM_NONE; + int nextvalue = 0; + char c; + int loop; + int direction; + + while(cursym != SYM_EOF && cursym != SYM_ENDLOOP) { + // Write out commands + switch(cursym) { + case SYM_NONE: + // Do nothing + break; + + case SYM_READ: + { + //%tape.%d = call i32 @getchar() + CallInst *getchar_call = builder->CreateCall(getchar_func, tapereg); + getchar_call->setTailCall(false); + Value *tape_0 = getchar_call; + + //%tape.%d = trunc i32 %tape.%d to i8 + Value *tape_1 = builder-> + CreateTrunc(tape_0, IntegerType::getInt8Ty(C), tapereg); + + //store i8 %tape.%d, i8 *%head.%d + builder->CreateStore(tape_1, curhead); + } + break; + + case SYM_WRITE: + { + //%tape.%d = load i8 *%head.%d + LoadInst *tape_0 = builder->CreateLoad(curhead, tapereg); + + //%tape.%d = sext i8 %tape.%d to i32 + Value *tape_1 = builder-> + CreateSExt(tape_0, IntegerType::getInt32Ty(C), tapereg); + + //call i32 @putchar(i32 %tape.%d) + Value *putchar_params[] = { + tape_1 + }; + CallInst *putchar_call = builder-> + CreateCall(putchar_func, + putchar_params); + putchar_call->setTailCall(false); + } + break; + + case SYM_MOVE: + { + //%head.%d = getelementptr i8 *%head.%d, i32 %d + curhead = builder-> + CreateGEP(curhead, ConstantInt::get(C, APInt(32, curvalue)), + headreg); + + //Error block for array out of bounds + if (comflag & flag_arraybounds) + { + //%test.%d = icmp uge i8 *%head.%d, %arrmax + Value *test_0 = builder-> + CreateICmpUGE(curhead, ptr_arrmax, testreg); + + //%test.%d = icmp ult i8 *%head.%d, %arr + Value *test_1 = builder-> + CreateICmpULT(curhead, ptr_arr, testreg); + + //%test.%d = or i1 %test.%d, %test.%d + Value *test_2 = builder-> + CreateOr(test_0, test_1, testreg); + + //br i1 %test.%d, label %main.%d, label %main.%d + BasicBlock *nextbb = BasicBlock::Create(C, label, brainf_func); + builder->CreateCondBr(test_2, aberrorbb, nextbb); + + //main.%d: + builder->SetInsertPoint(nextbb); + } + } + break; + + case SYM_CHANGE: + { + //%tape.%d = load i8 *%head.%d + LoadInst *tape_0 = builder->CreateLoad(curhead, tapereg); + + //%tape.%d = add i8 %tape.%d, %d + Value *tape_1 = builder-> + CreateAdd(tape_0, ConstantInt::get(C, APInt(8, curvalue)), tapereg); + + //store i8 %tape.%d, i8 *%head.%d\n" + builder->CreateStore(tape_1, curhead); + } + break; + + case SYM_LOOP: + { + //br label %main.%d + BasicBlock *testbb = BasicBlock::Create(C, label, brainf_func); + builder->CreateBr(testbb); + + //main.%d: + BasicBlock *bb_0 = builder->GetInsertBlock(); + BasicBlock *bb_1 = BasicBlock::Create(C, label, brainf_func); + builder->SetInsertPoint(bb_1); + + // Make part of PHI instruction now, wait until end of loop to finish + PHINode *phi_0 = + PHINode::Create(PointerType::getUnqual(IntegerType::getInt8Ty(C)), + 2, headreg, testbb); + phi_0->addIncoming(curhead, bb_0); + curhead = phi_0; + + readloop(phi_0, bb_1, testbb, C); + } + break; + + default: + std::cerr << "Error: Unknown symbol.\n"; + abort(); + break; + } + + cursym = nextsym; + curvalue = nextvalue; + nextsym = SYM_NONE; + + // Reading stdin loop + loop = (cursym == SYM_NONE) + || (cursym == SYM_MOVE) + || (cursym == SYM_CHANGE); + while(loop) { + *in>>c; + if (in->eof()) { + if (cursym == SYM_NONE) { + cursym = SYM_EOF; + } else { + nextsym = SYM_EOF; + } + loop = 0; + } else { + direction = 1; + switch(c) { + case '-': + direction = -1; + // Fall through + + case '+': + if (cursym == SYM_CHANGE) { + curvalue += direction; + // loop = 1 + } else { + if (cursym == SYM_NONE) { + cursym = SYM_CHANGE; + curvalue = direction; + // loop = 1 + } else { + nextsym = SYM_CHANGE; + nextvalue = direction; + loop = 0; + } + } + break; + + case '<': + direction = -1; + // Fall through + + case '>': + if (cursym == SYM_MOVE) { + curvalue += direction; + // loop = 1 + } else { + if (cursym == SYM_NONE) { + cursym = SYM_MOVE; + curvalue = direction; + // loop = 1 + } else { + nextsym = SYM_MOVE; + nextvalue = direction; + loop = 0; + } + } + break; + + case ',': + if (cursym == SYM_NONE) { + cursym = SYM_READ; + } else { + nextsym = SYM_READ; + } + loop = 0; + break; + + case '.': + if (cursym == SYM_NONE) { + cursym = SYM_WRITE; + } else { + nextsym = SYM_WRITE; + } + loop = 0; + break; + + case '[': + if (cursym == SYM_NONE) { + cursym = SYM_LOOP; + } else { + nextsym = SYM_LOOP; + } + loop = 0; + break; + + case ']': + if (cursym == SYM_NONE) { + cursym = SYM_ENDLOOP; + } else { + nextsym = SYM_ENDLOOP; + } + loop = 0; + break; + + // Ignore other characters + default: + break; + } + } + } + } + + if (cursym == SYM_ENDLOOP) { + if (!phi) { + std::cerr << "Error: Extra ']'\n"; + abort(); + } + + // Write loop test + { + //br label %main.%d + builder->CreateBr(testbb); + + //main.%d: + + //%head.%d = phi i8 *[%head.%d, %main.%d], [%head.%d, %main.%d] + //Finish phi made at beginning of loop + phi->addIncoming(curhead, builder->GetInsertBlock()); + Value *head_0 = phi; + + //%tape.%d = load i8 *%head.%d + LoadInst *tape_0 = new LoadInst(head_0, tapereg, testbb); + + //%test.%d = icmp eq i8 %tape.%d, 0 + ICmpInst *test_0 = new ICmpInst(*testbb, ICmpInst::ICMP_EQ, tape_0, + ConstantInt::get(C, APInt(8, 0)), testreg); + + //br i1 %test.%d, label %main.%d, label %main.%d + BasicBlock *bb_0 = BasicBlock::Create(C, label, brainf_func); + BranchInst::Create(bb_0, oldbb, test_0, testbb); + + //main.%d: + builder->SetInsertPoint(bb_0); + + //%head.%d = phi i8 *[%head.%d, %main.%d] + PHINode *phi_1 = builder-> + CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), 1, + headreg); + phi_1->addIncoming(head_0, testbb); + curhead = phi_1; + } + + return; + } + + //End of the program, so go to return block + builder->CreateBr(endbb); + + if (phi) { + std::cerr << "Error: Missing ']'\n"; + abort(); + } +} diff --git a/examples/BrainF/BrainF.h b/examples/BrainF/BrainF.h new file mode 100644 index 00000000000..c069feb51e7 --- /dev/null +++ b/examples/BrainF/BrainF.h @@ -0,0 +1,94 @@ +//===-- BrainF.h - BrainF compiler class ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +// This class stores the data for the BrainF compiler so it doesn't have +// to pass all of it around. The main method is parse. +// +//===--------------------------------------------------------------------===// + +#ifndef BRAINF_H +#define BRAINF_H + +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" + +using namespace llvm; + +/// This class provides a parser for the BrainF language. +/// The class itself is made to store values during +/// parsing so they don't have to be passed around +/// as much. +class BrainF { + public: + /// Options for how BrainF should compile + enum CompileFlags { + flag_off = 0, + flag_arraybounds = 1 + }; + + /// This is the main method. It parses BrainF from in1 + /// and returns the module with a function + /// void brainf() + /// containing the resulting code. + /// On error, it calls abort. + /// The caller must delete the returned module. + Module *parse(std::istream *in1, int mem, CompileFlags cf, + LLVMContext& C); + + protected: + /// The different symbols in the BrainF language + enum Symbol { + SYM_NONE, + SYM_READ, + SYM_WRITE, + SYM_MOVE, + SYM_CHANGE, + SYM_LOOP, + SYM_ENDLOOP, + SYM_EOF + }; + + /// Names of the different parts of the language. + /// Tape is used for reading and writing the tape. + /// headreg is used for the position of the head. + /// label is used for the labels for the BasicBlocks. + /// testreg is used for testing the loop exit condition. + static const char *tapereg; + static const char *headreg; + static const char *label; + static const char *testreg; + + /// Put the brainf function preamble and other fixed pieces of code + void header(LLVMContext& C); + + /// The main loop for parsing. It calls itself recursively + /// to handle the depth of nesting of "[]". + void readloop(PHINode *phi, BasicBlock *oldbb, + BasicBlock *testbb, LLVMContext &Context); + + /// Constants during parsing + int memtotal; + CompileFlags comflag; + std::istream *in; + Module *module; + Function *brainf_func; + Function *getchar_func; + Function *putchar_func; + Value *ptr_arr; + Value *ptr_arrmax; + BasicBlock *endbb; + BasicBlock *aberrorbb; + + /// Variables + IRBuilder<> *builder; + Value *curhead; +}; + +#endif diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp new file mode 100644 index 00000000000..58617b7f380 --- /dev/null +++ b/examples/BrainF/BrainFDriver.cpp @@ -0,0 +1,159 @@ +//===-- BrainFDriver.cpp - BrainF compiler driver -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +// This program converts the BrainF language into LLVM assembly, +// which it can then run using the JIT or output as BitCode. +// +// This implementation has a tape of 65536 bytes, +// with the head starting in the middle. +// Range checking is off by default, so be careful. +// It can be enabled with -abc. +// +// Use: +// ./BrainF -jit prog.bf #Run program now +// ./BrainF -jit -abc prog.bf #Run program now safely +// ./BrainF prog.bf #Write as BitCode +// +// lli prog.bf.bc #Run generated BitCode +// +//===--------------------------------------------------------------------===// + +#include "BrainF.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include <iostream> +#include <fstream> +using namespace llvm; + +//Command line options + +static cl::opt<std::string> +InputFilename(cl::Positional, cl::desc("<input brainf>")); + +static cl::opt<std::string> +OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename")); + +static cl::opt<bool> +ArrayBoundsChecking("abc", cl::desc("Enable array bounds checking")); + +static cl::opt<bool> +JIT("jit", cl::desc("Run program Just-In-Time")); + + +//Add main function so can be fully compiled +void addMainFunction(Module *mod) { + //define i32 @main(i32 %argc, i8 **%argv) + Function *main_func = cast<Function>(mod-> + getOrInsertFunction("main", IntegerType::getInt32Ty(mod->getContext()), + IntegerType::getInt32Ty(mod->getContext()), + PointerType::getUnqual(PointerType::getUnqual( + IntegerType::getInt8Ty(mod->getContext()))), NULL)); + { + Function::arg_iterator args = main_func->arg_begin(); + Value *arg_0 = args++; + arg_0->setName("argc"); + Value *arg_1 = args++; + arg_1->setName("argv"); + } + + //main.0: + BasicBlock *bb = BasicBlock::Create(mod->getContext(), "main.0", main_func); + + //call void @brainf() + { + CallInst *brainf_call = CallInst::Create(mod->getFunction("brainf"), + "", bb); + brainf_call->setTailCall(false); + } + + //ret i32 0 + ReturnInst::Create(mod->getContext(), + ConstantInt::get(mod->getContext(), APInt(32, 0)), bb); +} + +int main(int argc, char **argv) { + cl::ParseCommandLineOptions(argc, argv, " BrainF compiler\n"); + + LLVMContext &Context = getGlobalContext(); + + if (InputFilename == "") { + errs() << "Error: You must specify the filename of the program to " + "be compiled. Use --help to see the options.\n"; + abort(); + } + + //Get the output stream + raw_ostream *out = &outs(); + if (!JIT) { + if (OutputFilename == "") { + std::string base = InputFilename; + if (InputFilename == "-") { base = "a"; } + + // Use default filename. + OutputFilename = base+".bc"; + } + if (OutputFilename != "-") { + std::string ErrInfo; + out = new raw_fd_ostream(OutputFilename.c_str(), ErrInfo, + raw_fd_ostream::F_Binary); + } + } + + //Get the input stream + std::istream *in = &std::cin; + if (InputFilename != "-") + in = new std::ifstream(InputFilename.c_str()); + + //Gather the compile flags + BrainF::CompileFlags cf = BrainF::flag_off; + if (ArrayBoundsChecking) + cf = BrainF::CompileFlags(cf | BrainF::flag_arraybounds); + + //Read the BrainF program + BrainF bf; + Module *mod = bf.parse(in, 65536, cf, Context); //64 KiB + if (in != &std::cin) + delete in; + addMainFunction(mod); + + //Verify generated code + if (verifyModule(*mod)) { + errs() << "Error: module failed verification. This shouldn't happen.\n"; + abort(); + } + + //Write it out + if (JIT) { + InitializeNativeTarget(); + + outs() << "------- Running JIT -------\n"; + ExecutionEngine *ee = EngineBuilder(mod).create(); + std::vector<GenericValue> args; + Function *brainf_func = mod->getFunction("brainf"); + GenericValue gv = ee->runFunction(brainf_func, args); + } else { + WriteBitcodeToFile(mod, *out); + } + + //Clean up + if (out != &outs()) + delete out; + delete mod; + + llvm_shutdown(); + + return 0; +} diff --git a/examples/BrainF/CMakeLists.txt b/examples/BrainF/CMakeLists.txt new file mode 100644 index 00000000000..7bec105cdc8 --- /dev/null +++ b/examples/BrainF/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_LINK_COMPONENTS jit bitwriter nativecodegen interpreter) + +add_llvm_example(BrainF + BrainF.cpp + BrainFDriver.cpp + ) diff --git a/examples/BrainF/Makefile b/examples/BrainF/Makefile new file mode 100644 index 00000000000..2c3e0662523 --- /dev/null +++ b/examples/BrainF/Makefile @@ -0,0 +1,15 @@ +##===- examples/BrainF/Makefile ----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../.. +TOOLNAME = BrainF +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := jit bitwriter nativecodegen interpreter + +include $(LEVEL)/Makefile.common diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 00000000000..54ee6cc3a3a --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,13 @@ +add_subdirectory(BrainF) +add_subdirectory(Fibonacci) +add_subdirectory(HowToUseJIT) +add_subdirectory(Kaleidoscope) +add_subdirectory(ModuleMaker) + +if( NOT WIN32 ) + add_subdirectory(ExceptionDemo) +endif() + +if( HAVE_PTHREAD_H ) + add_subdirectory(ParallelJIT) +endif( HAVE_PTHREAD_H ) diff --git a/examples/ExceptionDemo/CMakeLists.txt b/examples/ExceptionDemo/CMakeLists.txt new file mode 100644 index 00000000000..88c9ab7c181 --- /dev/null +++ b/examples/ExceptionDemo/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_LINK_COMPONENTS jit nativecodegen) +set(LLVM_REQUIRES_EH 1) + +add_llvm_example(ExceptionDemo + ExceptionDemo.cpp + ) diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp new file mode 100644 index 00000000000..6dbd6626de9 --- /dev/null +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -0,0 +1,2006 @@ +//===-- ExceptionDemo.cpp - An example using llvm Exceptions --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Demo program which implements an example LLVM exception implementation, and +// shows several test cases including the handling of foreign exceptions. +// It is run with type info types arguments to throw. A test will +// be run for each given type info type. While type info types with the value +// of -1 will trigger a foreign C++ exception to be thrown; type info types +// <= 6 and >= 1 will cause the associated generated exceptions to be thrown +// and caught by generated test functions; and type info types > 6 +// will result in exceptions which pass through to the test harness. All other +// type info types are not supported and could cause a crash. In all cases, +// the "finally" blocks of every generated test functions will executed +// regardless of whether or not that test function ignores or catches the +// thrown exception. +// +// examples: +// +// ExceptionDemo +// +// causes a usage to be printed to stderr +// +// ExceptionDemo 2 3 7 -1 +// +// results in the following cases: +// - Value 2 causes an exception with a type info type of 2 to be +// thrown and caught by an inner generated test function. +// - Value 3 causes an exception with a type info type of 3 to be +// thrown and caught by an outer generated test function. +// - Value 7 causes an exception with a type info type of 7 to be +// thrown and NOT be caught by any generated function. +// - Value -1 causes a foreign C++ exception to be thrown and not be +// caught by any generated function +// +// Cases -1 and 7 are caught by a C++ test harness where the validity of +// of a C++ catch(...) clause catching a generated exception with a +// type info type of 7 is explained by: example in rules 1.6.4 in +// http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22) +// +// This code uses code from the llvm compiler-rt project and the llvm +// Kaleidoscope project. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LLVMContext.h" +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Intrinsics.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/TargetSelect.h" + +// FIXME: Although all systems tested with (Linux, OS X), do not need this +// header file included. A user on ubuntu reported, undefined symbols +// for stderr, and fprintf, and the addition of this include fixed the +// issue for them. Given that LLVM's best practices include the goal +// of reducing the number of redundant header files included, the +// correct solution would be to find out why these symbols are not +// defined for the system in question, and fix the issue by finding out +// which LLVM header file, if any, would include these symbols. +#include <cstdio> + +#include <sstream> +#include <stdexcept> + + +#ifndef USE_GLOBAL_STR_CONSTS +#define USE_GLOBAL_STR_CONSTS true +#endif + +// System C++ ABI unwind types from: +// http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22) + +extern "C" { + + typedef enum { + _URC_NO_REASON = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8 + } _Unwind_Reason_Code; + + typedef enum { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16 + } _Unwind_Action; + + struct _Unwind_Exception; + + typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code, + struct _Unwind_Exception *); + + struct _Unwind_Exception { + uint64_t exception_class; + _Unwind_Exception_Cleanup_Fn exception_cleanup; + + uintptr_t private_1; + uintptr_t private_2; + + // @@@ The IA-64 ABI says that this structure must be double-word aligned. + // Taking that literally does not make much sense generically. Instead + // we provide the maximum alignment required by any type for the machine. + } __attribute__((__aligned__)); + + struct _Unwind_Context; + typedef struct _Unwind_Context *_Unwind_Context_t; + + extern const uint8_t *_Unwind_GetLanguageSpecificData (_Unwind_Context_t c); + extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i); + extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n); + extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value); + extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context); + extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context); + +} // extern "C" + +// +// Example types +// + +/// This is our simplistic type info +struct OurExceptionType_t { + /// type info type + int type; +}; + + +/// This is our Exception class which relies on a negative offset to calculate +/// pointers to its instances from pointers to its unwindException member. +/// +/// Note: The above unwind.h defines struct _Unwind_Exception to be aligned +/// on a double word boundary. This is necessary to match the standard: +/// http://refspecs.freestandards.org/abi-eh-1.21.html +struct OurBaseException_t { + struct OurExceptionType_t type; + + // Note: This is properly aligned in unwind.h + struct _Unwind_Exception unwindException; +}; + + +// Note: Not needed since we are C++ +typedef struct OurBaseException_t OurException; +typedef struct _Unwind_Exception OurUnwindException; + +// +// Various globals used to support typeinfo and generatted exceptions in +// general +// + +static std::map<std::string, llvm::Value*> namedValues; + +int64_t ourBaseFromUnwindOffset; + +const unsigned char ourBaseExcpClassChars[] = +{'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'}; + + +static uint64_t ourBaseExceptionClass = 0; + +static std::vector<std::string> ourTypeInfoNames; +static std::map<int, std::string> ourTypeInfoNamesIndex; + +static llvm::StructType *ourTypeInfoType; +static llvm::StructType *ourCaughtResultType; +static llvm::StructType *ourExceptionType; +static llvm::StructType *ourUnwindExceptionType; + +static llvm::ConstantInt *ourExceptionNotThrownState; +static llvm::ConstantInt *ourExceptionThrownState; +static llvm::ConstantInt *ourExceptionCaughtState; + +typedef std::vector<std::string> ArgNames; +typedef std::vector<llvm::Type*> ArgTypes; + +// +// Code Generation Utilities +// + +/// Utility used to create a function, both declarations and definitions +/// @param module for module instance +/// @param retType function return type +/// @param theArgTypes function's ordered argument types +/// @param theArgNames function's ordered arguments needed if use of this +/// function corresponds to a function definition. Use empty +/// aggregate for function declarations. +/// @param functName function name +/// @param linkage function linkage +/// @param declarationOnly for function declarations +/// @param isVarArg function uses vararg arguments +/// @returns function instance +llvm::Function *createFunction(llvm::Module &module, + llvm::Type *retType, + const ArgTypes &theArgTypes, + const ArgNames &theArgNames, + const std::string &functName, + llvm::GlobalValue::LinkageTypes linkage, + bool declarationOnly, + bool isVarArg) { + llvm::FunctionType *functType = + llvm::FunctionType::get(retType, theArgTypes, isVarArg); + llvm::Function *ret = + llvm::Function::Create(functType, linkage, functName, &module); + if (!ret || declarationOnly) + return(ret); + + namedValues.clear(); + unsigned i = 0; + for (llvm::Function::arg_iterator argIndex = ret->arg_begin(); + i != theArgNames.size(); + ++argIndex, ++i) { + + argIndex->setName(theArgNames[i]); + namedValues[theArgNames[i]] = argIndex; + } + + return(ret); +} + + +/// Create an alloca instruction in the entry block of +/// the parent function. This is used for mutable variables etc. +/// @param function parent instance +/// @param varName stack variable name +/// @param type stack variable type +/// @param initWith optional constant initialization value +/// @returns AllocaInst instance +static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function &function, + const std::string &varName, + llvm::Type *type, + llvm::Constant *initWith = 0) { + llvm::BasicBlock &block = function.getEntryBlock(); + llvm::IRBuilder<> tmp(&block, block.begin()); + llvm::AllocaInst *ret = tmp.CreateAlloca(type, 0, varName.c_str()); + + if (initWith) + tmp.CreateStore(initWith, ret); + + return(ret); +} + + +// +// Code Generation Utilities End +// + +// +// Runtime C Library functions +// + +// Note: using an extern "C" block so that static functions can be used +extern "C" { + +// Note: Better ways to decide on bit width +// +/// Prints a 32 bit number, according to the format, to stderr. +/// @param intToPrint integer to print +/// @param format printf like format to use when printing +void print32Int(int intToPrint, const char *format) { + if (format) { + // Note: No NULL check + fprintf(stderr, format, intToPrint); + } + else { + // Note: No NULL check + fprintf(stderr, "::print32Int(...):NULL arg.\n"); + } +} + + +// Note: Better ways to decide on bit width +// +/// Prints a 64 bit number, according to the format, to stderr. +/// @param intToPrint integer to print +/// @param format printf like format to use when printing +void print64Int(long int intToPrint, const char *format) { + if (format) { + // Note: No NULL check + fprintf(stderr, format, intToPrint); + } + else { + // Note: No NULL check + fprintf(stderr, "::print64Int(...):NULL arg.\n"); + } +} + + +/// Prints a C string to stderr +/// @param toPrint string to print +void printStr(char *toPrint) { + if (toPrint) { + fprintf(stderr, "%s", toPrint); + } + else { + fprintf(stderr, "::printStr(...):NULL arg.\n"); + } +} + + +/// Deletes the true previosly allocated exception whose address +/// is calculated from the supplied OurBaseException_t::unwindException +/// member address. Handles (ignores), NULL pointers. +/// @param expToDelete exception to delete +void deleteOurException(OurUnwindException *expToDelete) { +#ifdef DEBUG + fprintf(stderr, + "deleteOurException(...).\n"); +#endif + + if (expToDelete && + (expToDelete->exception_class == ourBaseExceptionClass)) { + + free(((char*) expToDelete) + ourBaseFromUnwindOffset); + } +} + + +/// This function is the struct _Unwind_Exception API mandated delete function +/// used by foreign exception handlers when deleting our exception +/// (OurException), instances. +/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html +/// @unlink +/// @param expToDelete exception instance to delete +void deleteFromUnwindOurException(_Unwind_Reason_Code reason, + OurUnwindException *expToDelete) { +#ifdef DEBUG + fprintf(stderr, + "deleteFromUnwindOurException(...).\n"); +#endif + + deleteOurException(expToDelete); +} + + +/// Creates (allocates on the heap), an exception (OurException instance), +/// of the supplied type info type. +/// @param type type info type +OurUnwindException *createOurException(int type) { + size_t size = sizeof(OurException); + OurException *ret = (OurException*) memset(malloc(size), 0, size); + (ret->type).type = type; + (ret->unwindException).exception_class = ourBaseExceptionClass; + (ret->unwindException).exception_cleanup = deleteFromUnwindOurException; + + return(&(ret->unwindException)); +} + + +/// Read a uleb128 encoded value and advance pointer +/// See Variable Length Data in: +/// @link http://dwarfstd.org/Dwarf3.pdf @unlink +/// @param data reference variable holding memory pointer to decode from +/// @returns decoded value +static uintptr_t readULEB128(const uint8_t **data) { + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t *p = *data; + + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } + while (byte & 0x80); + + *data = p; + + return result; +} + + +/// Read a sleb128 encoded value and advance pointer +/// See Variable Length Data in: +/// @link http://dwarfstd.org/Dwarf3.pdf @unlink +/// @param data reference variable holding memory pointer to decode from +/// @returns decoded value +static uintptr_t readSLEB128(const uint8_t **data) { + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t *p = *data; + + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } + while (byte & 0x80); + + *data = p; + + if ((byte & 0x40) && (shift < (sizeof(result) << 3))) { + result |= (~0 << shift); + } + + return result; +} + + +/// Read a pointer encoded value and advance pointer +/// See Variable Length Data in: +/// @link http://dwarfstd.org/Dwarf3.pdf @unlink +/// @param data reference variable holding memory pointer to decode from +/// @param encoding dwarf encoding type +/// @returns decoded value +static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) { + uintptr_t result = 0; + const uint8_t *p = *data; + + if (encoding == llvm::dwarf::DW_EH_PE_omit) + return(result); + + // first get value + switch (encoding & 0x0F) { + case llvm::dwarf::DW_EH_PE_absptr: + result = *((uintptr_t*)p); + p += sizeof(uintptr_t); + break; + case llvm::dwarf::DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + // Note: This case has not been tested + case llvm::dwarf::DW_EH_PE_sleb128: + result = readSLEB128(&p); + break; + case llvm::dwarf::DW_EH_PE_udata2: + result = *((uint16_t*)p); + p += sizeof(uint16_t); + break; + case llvm::dwarf::DW_EH_PE_udata4: + result = *((uint32_t*)p); + p += sizeof(uint32_t); + break; + case llvm::dwarf::DW_EH_PE_udata8: + result = *((uint64_t*)p); + p += sizeof(uint64_t); + break; + case llvm::dwarf::DW_EH_PE_sdata2: + result = *((int16_t*)p); + p += sizeof(int16_t); + break; + case llvm::dwarf::DW_EH_PE_sdata4: + result = *((int32_t*)p); + p += sizeof(int32_t); + break; + case llvm::dwarf::DW_EH_PE_sdata8: + result = *((int64_t*)p); + p += sizeof(int64_t); + break; + default: + // not supported + abort(); + break; + } + + // then add relative offset + switch (encoding & 0x70) { + case llvm::dwarf::DW_EH_PE_absptr: + // do nothing + break; + case llvm::dwarf::DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case llvm::dwarf::DW_EH_PE_textrel: + case llvm::dwarf::DW_EH_PE_datarel: + case llvm::dwarf::DW_EH_PE_funcrel: + case llvm::dwarf::DW_EH_PE_aligned: + default: + // not supported + abort(); + break; + } + + // then apply indirection + if (encoding & llvm::dwarf::DW_EH_PE_indirect) { + result = *((uintptr_t*)result); + } + + *data = p; + + return result; +} + + +/// Deals with Dwarf actions matching our type infos +/// (OurExceptionType_t instances). Returns whether or not a dwarf emitted +/// action matches the supplied exception type. If such a match succeeds, +/// the resultAction argument will be set with > 0 index value. Only +/// corresponding llvm.eh.selector type info arguments, cleanup arguments +/// are supported. Filters are not supported. +/// See Variable Length Data in: +/// @link http://dwarfstd.org/Dwarf3.pdf @unlink +/// Also see @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// @param resultAction reference variable which will be set with result +/// @param classInfo our array of type info pointers (to globals) +/// @param actionEntry index into above type info array or 0 (clean up). +/// We do not support filters. +/// @param exceptionClass exception class (_Unwind_Exception::exception_class) +/// of thrown exception. +/// @param exceptionObject thrown _Unwind_Exception instance. +/// @returns whether or not a type info was found. False is returned if only +/// a cleanup was found +static bool handleActionValue(int64_t *resultAction, + struct OurExceptionType_t **classInfo, + uintptr_t actionEntry, + uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject) { + bool ret = false; + + if (!resultAction || + !exceptionObject || + (exceptionClass != ourBaseExceptionClass)) + return(ret); + + struct OurBaseException_t *excp = (struct OurBaseException_t*) + (((char*) exceptionObject) + ourBaseFromUnwindOffset); + struct OurExceptionType_t *excpType = &(excp->type); + int type = excpType->type; + +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...): exceptionObject = <%p>, " + "excp = <%p>.\n", + exceptionObject, + excp); +#endif + + const uint8_t *actionPos = (uint8_t*) actionEntry, + *tempActionPos; + int64_t typeOffset = 0, + actionOffset; + + for (int i = 0; true; ++i) { + // Each emitted dwarf action corresponds to a 2 tuple of + // type info address offset, and action offset to the next + // emitted action. + typeOffset = readSLEB128(&actionPos); + tempActionPos = actionPos; + actionOffset = readSLEB128(&tempActionPos); + +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...):typeOffset: <%lld>, " + "actionOffset: <%lld>.\n", + typeOffset, + actionOffset); +#endif + assert((typeOffset >= 0) && + "handleActionValue(...):filters are not supported."); + + // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector + // argument has been matched. + if ((typeOffset > 0) && + (type == (classInfo[-typeOffset])->type)) { +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...):actionValue <%d> found.\n", + i); +#endif + *resultAction = i + 1; + ret = true; + break; + } + +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...):actionValue not found.\n"); +#endif + if (!actionOffset) + break; + + actionPos += actionOffset; + } + + return(ret); +} + + +/// Deals with the Language specific data portion of the emitted dwarf code. +/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// @param version unsupported (ignored), unwind version +/// @param lsda language specific data area +/// @param _Unwind_Action actions minimally supported unwind stage +/// (forced specifically not supported) +/// @param exceptionClass exception class (_Unwind_Exception::exception_class) +/// of thrown exception. +/// @param exceptionObject thrown _Unwind_Exception instance. +/// @param context unwind system context +/// @returns minimally supported unwinding control indicator +static _Unwind_Reason_Code handleLsda(int version, + const uint8_t *lsda, + _Unwind_Action actions, + uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, + _Unwind_Context_t context) { + _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND; + + if (!lsda) + return(ret); + +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...):lsda is non-zero.\n"); +#endif + + // Get the current instruction pointer and offset it before next + // instruction in the current frame which threw the exception. + uintptr_t pc = _Unwind_GetIP(context)-1; + + // Get beginning current frame's code (as defined by the + // emitted dwarf code) + uintptr_t funcStart = _Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + struct OurExceptionType_t **classInfo = NULL; + + // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding + // dwarf emission + + // Parse LSDA header. + uint8_t lpStartEncoding = *lsda++; + + if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + + uint8_t ttypeEncoding = *lsda++; + uintptr_t classInfoOffset; + + if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) { + // Calculate type info locations in emitted dwarf code which + // were flagged by type info arguments to llvm.eh.selector + // intrinsic + classInfoOffset = readULEB128(&lsda); + classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset); + } + + // Walk call-site table looking for range that + // includes current PC. + + uint8_t callSiteEncoding = *lsda++; + uint32_t callSiteTableLength = readULEB128(&lsda); + const uint8_t *callSiteTableStart = lsda; + const uint8_t *callSiteTableEnd = callSiteTableStart + + callSiteTableLength; + const uint8_t *actionTableStart = callSiteTableEnd; + const uint8_t *callSitePtr = callSiteTableStart; + + bool foreignException = false; + + while (callSitePtr < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&callSitePtr, + callSiteEncoding); + uintptr_t length = readEncodedPointer(&callSitePtr, + callSiteEncoding); + uintptr_t landingPad = readEncodedPointer(&callSitePtr, + callSiteEncoding); + + // Note: Action value + uintptr_t actionEntry = readULEB128(&callSitePtr); + + if (exceptionClass != ourBaseExceptionClass) { + // We have been notified of a foreign exception being thrown, + // and we therefore need to execute cleanup landing pads + actionEntry = 0; + foreignException = true; + } + + if (landingPad == 0) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): No landing pad found.\n"); +#endif + + continue; // no landing pad for this entry + } + + if (actionEntry) { + actionEntry += ((uintptr_t) actionTableStart) - 1; + } + else { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...):No action table found.\n"); +#endif + } + + bool exceptionMatched = false; + + if ((start <= pcOffset) && (pcOffset < (start + length))) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): Landing pad found.\n"); +#endif + int64_t actionValue = 0; + + if (actionEntry) { + exceptionMatched = handleActionValue(&actionValue, + classInfo, + actionEntry, + exceptionClass, + exceptionObject); + } + + if (!(actions & _UA_SEARCH_PHASE)) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): installed landing pad " + "context.\n"); +#endif + + // Found landing pad for the PC. + // Set Instruction Pointer to so we re-enter function + // at landing pad. The landing pad is created by the + // compiler to take two parameters in registers. + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + + // Note: this virtual register directly corresponds + // to the return of the llvm.eh.selector intrinsic + if (!actionEntry || !exceptionMatched) { + // We indicate cleanup only + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(1), + 0); + } + else { + // Matched type info index of llvm.eh.selector intrinsic + // passed here. + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(1), + actionValue); + } + + // To execute landing pad set here + _Unwind_SetIP(context, funcStart + landingPad); + ret = _URC_INSTALL_CONTEXT; + } + else if (exceptionMatched) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): setting handler found.\n"); +#endif + ret = _URC_HANDLER_FOUND; + } + else { + // Note: Only non-clean up handlers are marked as + // found. Otherwise the clean up handlers will be + // re-found and executed during the clean up + // phase. +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): cleanup handler found.\n"); +#endif + } + + break; + } + } + + return(ret); +} + + +/// This is the personality function which is embedded (dwarf emitted), in the +/// dwarf unwind info block. Again see: JITDwarfEmitter.cpp. +/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// @param version unsupported (ignored), unwind version +/// @param _Unwind_Action actions minimally supported unwind stage +/// (forced specifically not supported) +/// @param exceptionClass exception class (_Unwind_Exception::exception_class) +/// of thrown exception. +/// @param exceptionObject thrown _Unwind_Exception instance. +/// @param context unwind system context +/// @returns minimally supported unwinding control indicator +_Unwind_Reason_Code ourPersonality(int version, + _Unwind_Action actions, + uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, + _Unwind_Context_t context) { +#ifdef DEBUG + fprintf(stderr, + "We are in ourPersonality(...):actions is <%d>.\n", + actions); + + if (actions & _UA_SEARCH_PHASE) { + fprintf(stderr, "ourPersonality(...):In search phase.\n"); + } + else { + fprintf(stderr, "ourPersonality(...):In non-search phase.\n"); + } +#endif + + const uint8_t *lsda = _Unwind_GetLanguageSpecificData(context); + +#ifdef DEBUG + fprintf(stderr, + "ourPersonality(...):lsda = <%p>.\n", + lsda); +#endif + + // The real work of the personality function is captured here + return(handleLsda(version, + lsda, + actions, + exceptionClass, + exceptionObject, + context)); +} + + +/// Generates our _Unwind_Exception class from a given character array. +/// thereby handling arbitrary lengths (not in standard), and handling +/// embedded \0s. +/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink +/// @param classChars char array to encode. NULL values not checkedf +/// @param classCharsSize number of chars in classChars. Value is not checked. +/// @returns class value +uint64_t genClass(const unsigned char classChars[], size_t classCharsSize) +{ + uint64_t ret = classChars[0]; + + for (unsigned i = 1; i < classCharsSize; ++i) { + ret <<= 8; + ret += classChars[i]; + } + + return(ret); +} + +} // extern "C" + +// +// Runtime C Library functions End +// + +// +// Code generation functions +// + +/// Generates code to print given constant string +/// @param context llvm context +/// @param module code for module instance +/// @param builder builder instance +/// @param toPrint string to print +/// @param useGlobal A value of true (default) indicates a GlobalValue is +/// generated, and is used to hold the constant string. A value of +/// false indicates that the constant string will be stored on the +/// stack. +void generateStringPrint(llvm::LLVMContext &context, + llvm::Module &module, + llvm::IRBuilder<> &builder, + std::string toPrint, + bool useGlobal = true) { + llvm::Function *printFunct = module.getFunction("printStr"); + + llvm::Value *stringVar; + llvm::Constant *stringConstant = + llvm::ConstantDataArray::getString(context, toPrint); + + if (useGlobal) { + // Note: Does not work without allocation + stringVar = + new llvm::GlobalVariable(module, + stringConstant->getType(), + true, + llvm::GlobalValue::LinkerPrivateLinkage, + stringConstant, + ""); + } + else { + stringVar = builder.CreateAlloca(stringConstant->getType()); + builder.CreateStore(stringConstant, stringVar); + } + + llvm::Value *cast = builder.CreatePointerCast(stringVar, + builder.getInt8PtrTy()); + builder.CreateCall(printFunct, cast); +} + + +/// Generates code to print given runtime integer according to constant +/// string format, and a given print function. +/// @param context llvm context +/// @param module code for module instance +/// @param builder builder instance +/// @param printFunct function used to "print" integer +/// @param toPrint string to print +/// @param format printf like formating string for print +/// @param useGlobal A value of true (default) indicates a GlobalValue is +/// generated, and is used to hold the constant string. A value of +/// false indicates that the constant string will be stored on the +/// stack. +void generateIntegerPrint(llvm::LLVMContext &context, + llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::Function &printFunct, + llvm::Value &toPrint, + std::string format, + bool useGlobal = true) { + llvm::Constant *stringConstant = + llvm::ConstantDataArray::getString(context, format); + llvm::Value *stringVar; + + if (useGlobal) { + // Note: Does not seem to work without allocation + stringVar = + new llvm::GlobalVariable(module, + stringConstant->getType(), + true, + llvm::GlobalValue::LinkerPrivateLinkage, + stringConstant, + ""); + } + else { + stringVar = builder.CreateAlloca(stringConstant->getType()); + builder.CreateStore(stringConstant, stringVar); + } + + llvm::Value *cast = builder.CreateBitCast(stringVar, + builder.getInt8PtrTy()); + builder.CreateCall2(&printFunct, &toPrint, cast); +} + + +/// Generates code to handle finally block type semantics: always runs +/// regardless of whether a thrown exception is passing through or the +/// parent function is simply exiting. In addition to printing some state +/// to stderr, this code will resume the exception handling--runs the +/// unwind resume block, if the exception has not been previously caught +/// by a catch clause, and will otherwise execute the end block (terminator +/// block). In addition this function creates the corresponding function's +/// stack storage for the exception pointer and catch flag status. +/// @param context llvm context +/// @param module code for module instance +/// @param builder builder instance +/// @param toAddTo parent function to add block to +/// @param blockName block name of new "finally" block. +/// @param functionId output id used for printing +/// @param terminatorBlock terminator "end" block +/// @param unwindResumeBlock unwind resume block +/// @param exceptionCaughtFlag reference exception caught/thrown status storage +/// @param exceptionStorage reference to exception pointer storage +/// @param caughtResultStorage reference to landingpad result storage +/// @returns newly created block +static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context, + llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::Function &toAddTo, + std::string &blockName, + std::string &functionId, + llvm::BasicBlock &terminatorBlock, + llvm::BasicBlock &unwindResumeBlock, + llvm::Value **exceptionCaughtFlag, + llvm::Value **exceptionStorage, + llvm::Value **caughtResultStorage) { + assert(exceptionCaughtFlag && + "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag " + "is NULL"); + assert(exceptionStorage && + "ExceptionDemo::createFinallyBlock(...):exceptionStorage " + "is NULL"); + assert(caughtResultStorage && + "ExceptionDemo::createFinallyBlock(...):caughtResultStorage " + "is NULL"); + + *exceptionCaughtFlag = createEntryBlockAlloca(toAddTo, + "exceptionCaught", + ourExceptionNotThrownState->getType(), + ourExceptionNotThrownState); + + llvm::PointerType *exceptionStorageType = builder.getInt8PtrTy(); + *exceptionStorage = createEntryBlockAlloca(toAddTo, + "exceptionStorage", + exceptionStorageType, + llvm::ConstantPointerNull::get( + exceptionStorageType)); + *caughtResultStorage = createEntryBlockAlloca(toAddTo, + "caughtResultStorage", + ourCaughtResultType, + llvm::ConstantAggregateZero::get( + ourCaughtResultType)); + + llvm::BasicBlock *ret = llvm::BasicBlock::Create(context, + blockName, + &toAddTo); + + builder.SetInsertPoint(ret); + + std::ostringstream bufferToPrint; + bufferToPrint << "Gen: Executing finally block " + << blockName << " in " << functionId << "\n"; + generateStringPrint(context, + module, + builder, + bufferToPrint.str(), + USE_GLOBAL_STR_CONSTS); + + llvm::SwitchInst *theSwitch = builder.CreateSwitch(builder.CreateLoad( + *exceptionCaughtFlag), + &terminatorBlock, + 2); + theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock); + theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock); + + return(ret); +} + + +/// Generates catch block semantics which print a string to indicate type of +/// catch executed, sets an exception caught flag, and executes passed in +/// end block (terminator block). +/// @param context llvm context +/// @param module code for module instance +/// @param builder builder instance +/// @param toAddTo parent function to add block to +/// @param blockName block name of new "catch" block. +/// @param functionId output id used for printing +/// @param terminatorBlock terminator "end" block +/// @param exceptionCaughtFlag exception caught/thrown status +/// @returns newly created block +static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context, + llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::Function &toAddTo, + std::string &blockName, + std::string &functionId, + llvm::BasicBlock &terminatorBlock, + llvm::Value &exceptionCaughtFlag) { + + llvm::BasicBlock *ret = llvm::BasicBlock::Create(context, + blockName, + &toAddTo); + + builder.SetInsertPoint(ret); + + std::ostringstream bufferToPrint; + bufferToPrint << "Gen: Executing catch block " + << blockName + << " in " + << functionId + << std::endl; + generateStringPrint(context, + module, + builder, + bufferToPrint.str(), + USE_GLOBAL_STR_CONSTS); + builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag); + builder.CreateBr(&terminatorBlock); + + return(ret); +} + + +/// Generates a function which invokes a function (toInvoke) and, whose +/// unwind block will "catch" the type info types correspondingly held in the +/// exceptionTypesToCatch argument. If the toInvoke function throws an +/// exception which does not match any type info types contained in +/// exceptionTypesToCatch, the generated code will call _Unwind_Resume +/// with the raised exception. On the other hand the generated code will +/// normally exit if the toInvoke function does not throw an exception. +/// The generated "finally" block is always run regardless of the cause of +/// the generated function exit. +/// The generated function is returned after being verified. +/// @param module code for module instance +/// @param builder builder instance +/// @param fpm a function pass manager holding optional IR to IR +/// transformations +/// @param toInvoke inner function to invoke +/// @param ourId id used to printing purposes +/// @param numExceptionsToCatch length of exceptionTypesToCatch array +/// @param exceptionTypesToCatch array of type info types to "catch" +/// @returns generated function +static +llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::FunctionPassManager &fpm, + llvm::Function &toInvoke, + std::string ourId, + unsigned numExceptionsToCatch, + unsigned exceptionTypesToCatch[]) { + + llvm::LLVMContext &context = module.getContext(); + llvm::Function *toPrint32Int = module.getFunction("print32Int"); + + ArgTypes argTypes; + argTypes.push_back(builder.getInt32Ty()); + + ArgNames argNames; + argNames.push_back("exceptTypeToThrow"); + + llvm::Function *ret = createFunction(module, + builder.getVoidTy(), + argTypes, + argNames, + ourId, + llvm::Function::ExternalLinkage, + false, + false); + + // Block which calls invoke + llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context, + "entry", + ret); + // Normal block for invoke + llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, + "normal", + ret); + // Unwind block for invoke + llvm::BasicBlock *exceptionBlock = llvm::BasicBlock::Create(context, + "exception", + ret); + + // Block which routes exception to correct catch handler block + llvm::BasicBlock *exceptionRouteBlock = llvm::BasicBlock::Create(context, + "exceptionRoute", + ret); + + // Foreign exception handler + llvm::BasicBlock *externalExceptionBlock = llvm::BasicBlock::Create(context, + "externalException", + ret); + + // Block which calls _Unwind_Resume + llvm::BasicBlock *unwindResumeBlock = llvm::BasicBlock::Create(context, + "unwindResume", + ret); + + // Clean up block which delete exception if needed + llvm::BasicBlock *endBlock = llvm::BasicBlock::Create(context, "end", ret); + + std::string nextName; + std::vector<llvm::BasicBlock*> catchBlocks(numExceptionsToCatch); + llvm::Value *exceptionCaughtFlag = NULL; + llvm::Value *exceptionStorage = NULL; + llvm::Value *caughtResultStorage = NULL; + + // Finally block which will branch to unwindResumeBlock if + // exception is not caught. Initializes/allocates stack locations. + llvm::BasicBlock *finallyBlock = createFinallyBlock(context, + module, + builder, + *ret, + nextName = "finally", + ourId, + *endBlock, + *unwindResumeBlock, + &exceptionCaughtFlag, + &exceptionStorage, + &caughtResultStorage + ); + + for (unsigned i = 0; i < numExceptionsToCatch; ++i) { + nextName = ourTypeInfoNames[exceptionTypesToCatch[i]]; + + // One catch block per type info to be caught + catchBlocks[i] = createCatchBlock(context, + module, + builder, + *ret, + nextName, + ourId, + *finallyBlock, + *exceptionCaughtFlag); + } + + // Entry Block + + builder.SetInsertPoint(entryBlock); + + std::vector<llvm::Value*> args; + args.push_back(namedValues["exceptTypeToThrow"]); + builder.CreateInvoke(&toInvoke, + normalBlock, + exceptionBlock, + args); + + // End Block + + builder.SetInsertPoint(endBlock); + + generateStringPrint(context, + module, + builder, + "Gen: In end block: exiting in " + ourId + ".\n", + USE_GLOBAL_STR_CONSTS); + llvm::Function *deleteOurException = module.getFunction("deleteOurException"); + + // Note: function handles NULL exceptions + builder.CreateCall(deleteOurException, + builder.CreateLoad(exceptionStorage)); + builder.CreateRetVoid(); + + // Normal Block + + builder.SetInsertPoint(normalBlock); + + generateStringPrint(context, + module, + builder, + "Gen: No exception in " + ourId + "!\n", + USE_GLOBAL_STR_CONSTS); + + // Finally block is always called + builder.CreateBr(finallyBlock); + + // Unwind Resume Block + + builder.SetInsertPoint(unwindResumeBlock); + + builder.CreateResume(builder.CreateLoad(caughtResultStorage)); + + // Exception Block + + builder.SetInsertPoint(exceptionBlock); + + llvm::Function *personality = module.getFunction("ourPersonality"); + + llvm::LandingPadInst *caughtResult = + builder.CreateLandingPad(ourCaughtResultType, + personality, + numExceptionsToCatch, + "landingPad"); + + caughtResult->setCleanup(true); + + for (unsigned i = 0; i < numExceptionsToCatch; ++i) { + // Set up type infos to be caught + caughtResult->addClause(module.getGlobalVariable( + ourTypeInfoNames[exceptionTypesToCatch[i]])); + } + + llvm::Value *unwindException = builder.CreateExtractValue(caughtResult, 0); + llvm::Value *retTypeInfoIndex = builder.CreateExtractValue(caughtResult, 1); + + // FIXME: Redundant storage which, beyond utilizing value of + // caughtResultStore for unwindException storage, may be alleviated + // altogether with a block rearrangement + builder.CreateStore(caughtResult, caughtResultStorage); + builder.CreateStore(unwindException, exceptionStorage); + builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag); + + // Retrieve exception_class member from thrown exception + // (_Unwind_Exception instance). This member tells us whether or not + // the exception is foreign. + llvm::Value *unwindExceptionClass = + builder.CreateLoad(builder.CreateStructGEP( + builder.CreatePointerCast(unwindException, + ourUnwindExceptionType->getPointerTo()), + 0)); + + // Branch to the externalExceptionBlock if the exception is foreign or + // to a catch router if not. Either way the finally block will be run. + builder.CreateCondBr(builder.CreateICmpEQ(unwindExceptionClass, + llvm::ConstantInt::get(builder.getInt64Ty(), + ourBaseExceptionClass)), + exceptionRouteBlock, + externalExceptionBlock); + + // External Exception Block + + builder.SetInsertPoint(externalExceptionBlock); + + generateStringPrint(context, + module, + builder, + "Gen: Foreign exception received.\n", + USE_GLOBAL_STR_CONSTS); + + // Branch to the finally block + builder.CreateBr(finallyBlock); + + // Exception Route Block + + builder.SetInsertPoint(exceptionRouteBlock); + + // Casts exception pointer (_Unwind_Exception instance) to parent + // (OurException instance). + // + // Note: ourBaseFromUnwindOffset is usually negative + llvm::Value *typeInfoThrown = builder.CreatePointerCast( + builder.CreateConstGEP1_64(unwindException, + ourBaseFromUnwindOffset), + ourExceptionType->getPointerTo()); + + // Retrieve thrown exception type info type + // + // Note: Index is not relative to pointer but instead to structure + // unlike a true getelementptr (GEP) instruction + typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0); + + llvm::Value *typeInfoThrownType = + builder.CreateStructGEP(typeInfoThrown, 0); + + generateIntegerPrint(context, + module, + builder, + *toPrint32Int, + *(builder.CreateLoad(typeInfoThrownType)), + "Gen: Exception type <%d> received (stack unwound) " + " in " + + ourId + + ".\n", + USE_GLOBAL_STR_CONSTS); + + // Route to matched type info catch block or run cleanup finally block + llvm::SwitchInst *switchToCatchBlock = builder.CreateSwitch(retTypeInfoIndex, + finallyBlock, + numExceptionsToCatch); + + unsigned nextTypeToCatch; + + for (unsigned i = 1; i <= numExceptionsToCatch; ++i) { + nextTypeToCatch = i - 1; + switchToCatchBlock->addCase(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(context), i), + catchBlocks[nextTypeToCatch]); + } + + llvm::verifyFunction(*ret); + fpm.run(*ret); + + return(ret); +} + + +/// Generates function which throws either an exception matched to a runtime +/// determined type info type (argument to generated function), or if this +/// runtime value matches nativeThrowType, throws a foreign exception by +/// calling nativeThrowFunct. +/// @param module code for module instance +/// @param builder builder instance +/// @param fpm a function pass manager holding optional IR to IR +/// transformations +/// @param ourId id used to printing purposes +/// @param nativeThrowType a runtime argument of this value results in +/// nativeThrowFunct being called to generate/throw exception. +/// @param nativeThrowFunct function which will throw a foreign exception +/// if the above nativeThrowType matches generated function's arg. +/// @returns generated function +static +llvm::Function *createThrowExceptionFunction(llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::FunctionPassManager &fpm, + std::string ourId, + int32_t nativeThrowType, + llvm::Function &nativeThrowFunct) { + llvm::LLVMContext &context = module.getContext(); + namedValues.clear(); + ArgTypes unwindArgTypes; + unwindArgTypes.push_back(builder.getInt32Ty()); + ArgNames unwindArgNames; + unwindArgNames.push_back("exceptTypeToThrow"); + + llvm::Function *ret = createFunction(module, + builder.getVoidTy(), + unwindArgTypes, + unwindArgNames, + ourId, + llvm::Function::ExternalLinkage, + false, + false); + + // Throws either one of our exception or a native C++ exception depending + // on a runtime argument value containing a type info type. + llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context, + "entry", + ret); + // Throws a foreign exception + llvm::BasicBlock *nativeThrowBlock = llvm::BasicBlock::Create(context, + "nativeThrow", + ret); + // Throws one of our Exceptions + llvm::BasicBlock *generatedThrowBlock = llvm::BasicBlock::Create(context, + "generatedThrow", + ret); + // Retrieved runtime type info type to throw + llvm::Value *exceptionType = namedValues["exceptTypeToThrow"]; + + // nativeThrowBlock block + + builder.SetInsertPoint(nativeThrowBlock); + + // Throws foreign exception + builder.CreateCall(&nativeThrowFunct, exceptionType); + builder.CreateUnreachable(); + + // entry block + + builder.SetInsertPoint(entryBlock); + + llvm::Function *toPrint32Int = module.getFunction("print32Int"); + generateIntegerPrint(context, + module, + builder, + *toPrint32Int, + *exceptionType, + "\nGen: About to throw exception type <%d> in " + + ourId + + ".\n", + USE_GLOBAL_STR_CONSTS); + + // Switches on runtime type info type value to determine whether or not + // a foreign exception is thrown. Defaults to throwing one of our + // generated exceptions. + llvm::SwitchInst *theSwitch = builder.CreateSwitch(exceptionType, + generatedThrowBlock, + 1); + + theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), + nativeThrowType), + nativeThrowBlock); + + // generatedThrow block + + builder.SetInsertPoint(generatedThrowBlock); + + llvm::Function *createOurException = module.getFunction("createOurException"); + llvm::Function *raiseOurException = module.getFunction( + "_Unwind_RaiseException"); + + // Creates exception to throw with runtime type info type. + llvm::Value *exception = builder.CreateCall(createOurException, + namedValues["exceptTypeToThrow"]); + + // Throw generated Exception + builder.CreateCall(raiseOurException, exception); + builder.CreateUnreachable(); + + llvm::verifyFunction(*ret); + fpm.run(*ret); + + return(ret); +} + +static void createStandardUtilityFunctions(unsigned numTypeInfos, + llvm::Module &module, + llvm::IRBuilder<> &builder); + +/// Creates test code by generating and organizing these functions into the +/// test case. The test case consists of an outer function setup to invoke +/// an inner function within an environment having multiple catch and single +/// finally blocks. This inner function is also setup to invoke a throw +/// function within an evironment similar in nature to the outer function's +/// catch and finally blocks. Each of these two functions catch mutually +/// exclusive subsets (even or odd) of the type info types configured +/// for this this. All generated functions have a runtime argument which +/// holds a type info type to throw that each function takes and passes it +/// to the inner one if such a inner function exists. This type info type is +/// looked at by the generated throw function to see whether or not it should +/// throw a generated exception with the same type info type, or instead call +/// a supplied a function which in turn will throw a foreign exception. +/// @param module code for module instance +/// @param builder builder instance +/// @param fpm a function pass manager holding optional IR to IR +/// transformations +/// @param nativeThrowFunctName name of external function which will throw +/// a foreign exception +/// @returns outermost generated test function. +llvm::Function *createUnwindExceptionTest(llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::FunctionPassManager &fpm, + std::string nativeThrowFunctName) { + // Number of type infos to generate + unsigned numTypeInfos = 6; + + // Initialze intrisics and external functions to use along with exception + // and type info globals. + createStandardUtilityFunctions(numTypeInfos, + module, + builder); + llvm::Function *nativeThrowFunct = module.getFunction(nativeThrowFunctName); + + // Create exception throw function using the value ~0 to cause + // foreign exceptions to be thrown. + llvm::Function *throwFunct = createThrowExceptionFunction(module, + builder, + fpm, + "throwFunct", + ~0, + *nativeThrowFunct); + // Inner function will catch even type infos + unsigned innerExceptionTypesToCatch[] = {6, 2, 4}; + size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / + sizeof(unsigned); + + // Generate inner function. + llvm::Function *innerCatchFunct = createCatchWrappedInvokeFunction(module, + builder, + fpm, + *throwFunct, + "innerCatchFunct", + numExceptionTypesToCatch, + innerExceptionTypesToCatch); + + // Outer function will catch odd type infos + unsigned outerExceptionTypesToCatch[] = {3, 1, 5}; + numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / + sizeof(unsigned); + + // Generate outer function + llvm::Function *outerCatchFunct = createCatchWrappedInvokeFunction(module, + builder, + fpm, + *innerCatchFunct, + "outerCatchFunct", + numExceptionTypesToCatch, + outerExceptionTypesToCatch); + + // Return outer function to run + return(outerCatchFunct); +} + + +/// Represents our foreign exceptions +class OurCppRunException : public std::runtime_error { +public: + OurCppRunException(const std::string reason) : + std::runtime_error(reason) {} + + OurCppRunException (const OurCppRunException &toCopy) : + std::runtime_error(toCopy) {} + + OurCppRunException &operator = (const OurCppRunException &toCopy) { + return(reinterpret_cast<OurCppRunException&>( + std::runtime_error::operator=(toCopy))); + } + + ~OurCppRunException (void) throw () {} +}; + + +/// Throws foreign C++ exception. +/// @param ignoreIt unused parameter that allows function to match implied +/// generated function contract. +extern "C" +void throwCppException (int32_t ignoreIt) { + throw(OurCppRunException("thrown by throwCppException(...)")); +} + +typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow); + +/// This is a test harness which runs test by executing generated +/// function with a type info type to throw. Harness wraps the execution +/// of generated function in a C++ try catch clause. +/// @param engine execution engine to use for executing generated function. +/// This demo program expects this to be a JIT instance for demo +/// purposes. +/// @param function generated test function to run +/// @param typeToThrow type info type of generated exception to throw, or +/// indicator to cause foreign exception to be thrown. +static +void runExceptionThrow(llvm::ExecutionEngine *engine, + llvm::Function *function, + int32_t typeToThrow) { + + // Find test's function pointer + OurExceptionThrowFunctType functPtr = + reinterpret_cast<OurExceptionThrowFunctType>( + reinterpret_cast<intptr_t>(engine->getPointerToFunction(function))); + + try { + // Run test + (*functPtr)(typeToThrow); + } + catch (OurCppRunException exc) { + // Catch foreign C++ exception + fprintf(stderr, + "\nrunExceptionThrow(...):In C++ catch OurCppRunException " + "with reason: %s.\n", + exc.what()); + } + catch (...) { + // Catch all exceptions including our generated ones. This latter + // functionality works according to the example in rules 1.6.4 of + // http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22), + // given that these will be exceptions foreign to C++ + // (the _Unwind_Exception::exception_class should be different from + // the one used by C++). + fprintf(stderr, + "\nrunExceptionThrow(...):In C++ catch all.\n"); + } +} + +// +// End test functions +// + +typedef llvm::ArrayRef<llvm::Type*> TypeArray; + +/// This initialization routine creates type info globals and +/// adds external function declarations to module. +/// @param numTypeInfos number of linear type info associated type info types +/// to create as GlobalVariable instances, starting with the value 1. +/// @param module code for module instance +/// @param builder builder instance +static void createStandardUtilityFunctions(unsigned numTypeInfos, + llvm::Module &module, + llvm::IRBuilder<> &builder) { + + llvm::LLVMContext &context = module.getContext(); + + // Exception initializations + + // Setup exception catch state + ourExceptionNotThrownState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0), + ourExceptionThrownState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1), + ourExceptionCaughtState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2), + + + + // Create our type info type + ourTypeInfoType = llvm::StructType::get(context, + TypeArray(builder.getInt32Ty())); + + llvm::Type *caughtResultFieldTypes[] = { + builder.getInt8PtrTy(), + builder.getInt32Ty() + }; + + // Create our landingpad result type + ourCaughtResultType = llvm::StructType::get(context, + TypeArray(caughtResultFieldTypes)); + + // Create OurException type + ourExceptionType = llvm::StructType::get(context, + TypeArray(ourTypeInfoType)); + + // Create portion of _Unwind_Exception type + // + // Note: Declaring only a portion of the _Unwind_Exception struct. + // Does this cause problems? + ourUnwindExceptionType = + llvm::StructType::get(context, + TypeArray(builder.getInt64Ty())); + + struct OurBaseException_t dummyException; + + // Calculate offset of OurException::unwindException member. + ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - + ((uintptr_t) &(dummyException.unwindException)); + +#ifdef DEBUG + fprintf(stderr, + "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset " + "= %lld, sizeof(struct OurBaseException_t) - " + "sizeof(struct _Unwind_Exception) = %lu.\n", + ourBaseFromUnwindOffset, + sizeof(struct OurBaseException_t) - + sizeof(struct _Unwind_Exception)); +#endif + + size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char); + + // Create our _Unwind_Exception::exception_class value + ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars); + + // Type infos + + std::string baseStr = "typeInfo", typeInfoName; + std::ostringstream typeInfoNameBuilder; + std::vector<llvm::Constant*> structVals; + + llvm::Constant *nextStruct; + llvm::GlobalVariable *nextGlobal = NULL; + + // Generate each type info + // + // Note: First type info is not used. + for (unsigned i = 0; i <= numTypeInfos; ++i) { + structVals.clear(); + structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i)); + nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals); + + typeInfoNameBuilder.str(""); + typeInfoNameBuilder << baseStr << i; + typeInfoName = typeInfoNameBuilder.str(); + + // Note: Does not seem to work without allocation + nextGlobal = + new llvm::GlobalVariable(module, + ourTypeInfoType, + true, + llvm::GlobalValue::ExternalLinkage, + nextStruct, + typeInfoName); + + ourTypeInfoNames.push_back(typeInfoName); + ourTypeInfoNamesIndex[i] = typeInfoName; + } + + ArgNames argNames; + ArgTypes argTypes; + llvm::Function *funct = NULL; + + // print32Int + + llvm::Type *retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt8PtrTy()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "print32Int", + llvm::Function::ExternalLinkage, + true, + false); + + // print64Int + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt64Ty()); + argTypes.push_back(builder.getInt8PtrTy()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "print64Int", + llvm::Function::ExternalLinkage, + true, + false); + + // printStr + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8PtrTy()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "printStr", + llvm::Function::ExternalLinkage, + true, + false); + + // throwCppException + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "throwCppException", + llvm::Function::ExternalLinkage, + true, + false); + + // deleteOurException + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8PtrTy()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "deleteOurException", + llvm::Function::ExternalLinkage, + true, + false); + + // createOurException + + retType = builder.getInt8PtrTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "createOurException", + llvm::Function::ExternalLinkage, + true, + false); + + // _Unwind_RaiseException + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8PtrTy()); + + argNames.clear(); + + funct = createFunction(module, + retType, + argTypes, + argNames, + "_Unwind_RaiseException", + llvm::Function::ExternalLinkage, + true, + false); + + funct->addFnAttr(llvm::Attribute::NoReturn); + + // _Unwind_Resume + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8PtrTy()); + + argNames.clear(); + + funct = createFunction(module, + retType, + argTypes, + argNames, + "_Unwind_Resume", + llvm::Function::ExternalLinkage, + true, + false); + + funct->addFnAttr(llvm::Attribute::NoReturn); + + // ourPersonality + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt64Ty()); + argTypes.push_back(builder.getInt8PtrTy()); + argTypes.push_back(builder.getInt8PtrTy()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "ourPersonality", + llvm::Function::ExternalLinkage, + true, + false); + + // llvm.eh.typeid.for intrinsic + + getDeclaration(&module, llvm::Intrinsic::eh_typeid_for); +} + + +//===----------------------------------------------------------------------===// +// Main test driver code. +//===----------------------------------------------------------------------===// + +/// Demo main routine which takes the type info types to throw. A test will +/// be run for each given type info type. While type info types with the value +/// of -1 will trigger a foreign C++ exception to be thrown; type info types +/// <= 6 and >= 1 will be caught by test functions; and type info types > 6 +/// will result in exceptions which pass through to the test harness. All other +/// type info types are not supported and could cause a crash. +int main(int argc, char *argv[]) { + if (argc == 1) { + fprintf(stderr, + "\nUsage: ExceptionDemo <exception type to throw> " + "[<type 2>...<type n>].\n" + " Each type must have the value of 1 - 6 for " + "generated exceptions to be caught;\n" + " the value -1 for foreign C++ exceptions to be " + "generated and thrown;\n" + " or the values > 6 for exceptions to be ignored.\n" + "\nTry: ExceptionDemo 2 3 7 -1\n" + " for a full test.\n\n"); + return(0); + } + + // If not set, exception handling will not be turned on + llvm::TargetOptions Opts; + Opts.JITExceptionHandling = true; + + llvm::InitializeNativeTarget(); + llvm::LLVMContext &context = llvm::getGlobalContext(); + llvm::IRBuilder<> theBuilder(context); + + // Make the module, which holds all the code. + llvm::Module *module = new llvm::Module("my cool jit", context); + + // Build engine with JIT + llvm::EngineBuilder factory(module); + factory.setEngineKind(llvm::EngineKind::JIT); + factory.setAllocateGVsWithCode(false); + factory.setTargetOptions(Opts); + llvm::ExecutionEngine *executionEngine = factory.create(); + + { + llvm::FunctionPassManager fpm(module); + + // Set up the optimizer pipeline. + // Start with registering info about how the + // target lays out data structures. + fpm.add(new llvm::TargetData(*executionEngine->getTargetData())); + + // Optimizations turned on +#ifdef ADD_OPT_PASSES + + // Basic AliasAnslysis support for GVN. + fpm.add(llvm::createBasicAliasAnalysisPass()); + + // Promote allocas to registers. + fpm.add(llvm::createPromoteMemoryToRegisterPass()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + fpm.add(llvm::createInstructionCombiningPass()); + + // Reassociate expressions. + fpm.add(llvm::createReassociatePass()); + + // Eliminate Common SubExpressions. + fpm.add(llvm::createGVNPass()); + + // Simplify the control flow graph (deleting unreachable + // blocks, etc). + fpm.add(llvm::createCFGSimplificationPass()); +#endif // ADD_OPT_PASSES + + fpm.doInitialization(); + + // Generate test code using function throwCppException(...) as + // the function which throws foreign exceptions. + llvm::Function *toRun = + createUnwindExceptionTest(*module, + theBuilder, + fpm, + "throwCppException"); + + fprintf(stderr, "\nBegin module dump:\n\n"); + + module->dump(); + + fprintf(stderr, "\nEnd module dump:\n"); + + fprintf(stderr, "\n\nBegin Test:\n"); + + for (int i = 1; i < argc; ++i) { + // Run test for each argument whose value is the exception + // type to throw. + runExceptionThrow(executionEngine, + toRun, + (unsigned) strtoul(argv[i], NULL, 10)); + } + + fprintf(stderr, "\nEnd Test:\n\n"); + } + + delete executionEngine; + + return 0; +} + diff --git a/examples/ExceptionDemo/Makefile b/examples/ExceptionDemo/Makefile new file mode 100644 index 00000000000..480744730eb --- /dev/null +++ b/examples/ExceptionDemo/Makefile @@ -0,0 +1,16 @@ +##===- examples/ExceptionDemo/Makefile --------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===---------------------------------------------------------------------===## +LEVEL = ../.. +TOOLNAME = ExceptionDemo +EXAMPLE_TOOL = 1 +REQUIRES_EH = 1 + +LINK_COMPONENTS := jit interpreter nativecodegen + +include $(LEVEL)/Makefile.common diff --git a/examples/Fibonacci/CMakeLists.txt b/examples/Fibonacci/CMakeLists.txt new file mode 100644 index 00000000000..693761241fc --- /dev/null +++ b/examples/Fibonacci/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) + +add_llvm_example(Fibonacci + fibonacci.cpp + ) diff --git a/examples/Fibonacci/Makefile b/examples/Fibonacci/Makefile new file mode 100644 index 00000000000..71f6ba0ef52 --- /dev/null +++ b/examples/Fibonacci/Makefile @@ -0,0 +1,17 @@ +##===- examples/Fibonacci/Makefile -------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = Fibonacci +EXAMPLE_TOOL = 1 + +# Link in JIT support +LINK_COMPONENTS := jit interpreter nativecodegen + +include $(LEVEL)/Makefile.common diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp new file mode 100644 index 00000000000..cfd9b1e33cf --- /dev/null +++ b/examples/Fibonacci/fibonacci.cpp @@ -0,0 +1,137 @@ +//===--- examples/Fibonacci/fibonacci.cpp - An example use of the JIT -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This small program provides an example of how to build quickly a small module +// with function Fibonacci and execute it with the JIT. +// +// The goal of this snippet is to create in the memory the LLVM module +// consisting of one function as follow: +// +// int fib(int x) { +// if(x<=2) return 1; +// return fib(x-1)+fib(x-2); +// } +// +// Once we have this, we compile the module via JIT, then execute the `fib' +// function and return result to a driver, i.e. to a "host program". +// +//===----------------------------------------------------------------------===// + +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetSelect.h" +using namespace llvm; + +static Function *CreateFibFunction(Module *M, LLVMContext &Context) { + // Create the fib function and insert it into module M. This function is said + // to return an int and take an int parameter. + Function *FibF = + cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), + Type::getInt32Ty(Context), + (Type *)0)); + + // Add a basic block to the function. + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF); + + // Get pointers to the constants. + Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1); + Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); + + // Get pointer to the integer argument of the add1 function... + Argument *ArgX = FibF->arg_begin(); // Get the arg. + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the true_block. + BasicBlock *RetBB = BasicBlock::Create(Context, "return", FibF); + // Create an exit block. + BasicBlock* RecurseBB = BasicBlock::Create(Context, "recurse", FibF); + + // Create the "if (arg <= 2) goto exitbb" + Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond"); + BranchInst::Create(RetBB, RecurseBB, CondInst, BB); + + // Create: ret int 1 + ReturnInst::Create(Context, One, RetBB); + + // create fib(x-1) + Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB); + CallInst *CallFibX1 = CallInst::Create(FibF, Sub, "fibx1", RecurseBB); + CallFibX1->setTailCall(); + + // create fib(x-2) + Sub = BinaryOperator::CreateSub(ArgX, Two, "arg", RecurseBB); + CallInst *CallFibX2 = CallInst::Create(FibF, Sub, "fibx2", RecurseBB); + CallFibX2->setTailCall(); + + + // fib(x-1)+fib(x-2) + Value *Sum = BinaryOperator::CreateAdd(CallFibX1, CallFibX2, + "addresult", RecurseBB); + + // Create the return instruction and add it to the basic block + ReturnInst::Create(Context, Sum, RecurseBB); + + return FibF; +} + + +int main(int argc, char **argv) { + int n = argc > 1 ? atol(argv[1]) : 24; + + InitializeNativeTarget(); + LLVMContext Context; + + // Create some module to put our function into it. + OwningPtr<Module> M(new Module("test", Context)); + + // We are about to create the "fib" function: + Function *FibF = CreateFibFunction(M.get(), Context); + + // Now we going to create JIT + std::string errStr; + ExecutionEngine *EE = + EngineBuilder(M.get()) + .setErrorStr(&errStr) + .setEngineKind(EngineKind::JIT) + .create(); + + if (!EE) { + errs() << argv[0] << ": Failed to construct ExecutionEngine: " << errStr + << "\n"; + return 1; + } + + errs() << "verifying... "; + if (verifyModule(*M)) { + errs() << argv[0] << ": Error constructing function!\n"; + return 1; + } + + errs() << "OK\n"; + errs() << "We just constructed this LLVM module:\n\n---------\n" << *M; + errs() << "---------\nstarting fibonacci(" << n << ") with JIT...\n"; + + // Call the Fibonacci function with argument n: + std::vector<GenericValue> Args(1); + Args[0].IntVal = APInt(32, n); + GenericValue GV = EE->runFunction(FibF, Args); + + // import result of execution + outs() << "Result: " << GV.IntVal << "\n"; + + return 0; +} diff --git a/examples/HowToUseJIT/CMakeLists.txt b/examples/HowToUseJIT/CMakeLists.txt new file mode 100644 index 00000000000..428b53ffb9b --- /dev/null +++ b/examples/HowToUseJIT/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) + +add_llvm_example(HowToUseJIT + HowToUseJIT.cpp + ) diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp new file mode 100644 index 00000000000..5588e923df8 --- /dev/null +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -0,0 +1,133 @@ +//===-- examples/HowToUseJIT/HowToUseJIT.cpp - An example use of the JIT --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This small program provides an example of how to quickly build a small +// module with two functions and execute it with the JIT. +// +// Goal: +// The goal of this snippet is to create in the memory +// the LLVM module consisting of two functions as follow: +// +// int add1(int x) { +// return x+1; +// } +// +// int foo() { +// return add1(10); +// } +// +// then compile the module via JIT, then execute the `foo' +// function and return result to a driver, i.e. to a "host program". +// +// Some remarks and questions: +// +// - could we invoke some code using noname functions too? +// e.g. evaluate "foo()+foo()" without fears to introduce +// conflict of temporary function name with some real +// existing function name? +// +//===----------------------------------------------------------------------===// + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" +#include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +int main() { + + InitializeNativeTarget(); + + LLVMContext Context; + + // Create some module to put our function into it. + Module *M = new Module("test", Context); + + // Create the add1 function entry and insert this entry into module M. The + // function will have a return type of "int" and take an argument of "int". + // The '0' terminates the list of argument types. + Function *Add1F = + cast<Function>(M->getOrInsertFunction("add1", Type::getInt32Ty(Context), + Type::getInt32Ty(Context), + (Type *)0)); + + // Add a basic block to the function. As before, it automatically inserts + // because of the last argument. + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", Add1F); + + // Create a basic block builder with default parameters. The builder will + // automatically append instructions to the basic block `BB'. + IRBuilder<> builder(BB); + + // Get pointers to the constant `1'. + Value *One = builder.getInt32(1); + + // Get pointers to the integer argument of the add1 function... + assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg + Argument *ArgX = Add1F->arg_begin(); // Get the arg + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the add instruction, inserting it into the end of BB. + Value *Add = builder.CreateAdd(One, ArgX); + + // Create the return instruction and add it to the basic block + builder.CreateRet(Add); + + // Now, function add1 is ready. + + + // Now we're going to create function `foo', which returns an int and takes no + // arguments. + Function *FooF = + cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context), + (Type *)0)); + + // Add a basic block to the FooF function. + BB = BasicBlock::Create(Context, "EntryBlock", FooF); + + // Tell the basic block builder to attach itself to the new basic block + builder.SetInsertPoint(BB); + + // Get pointer to the constant `10'. + Value *Ten = builder.getInt32(10); + + // Pass Ten to the call to Add1F + CallInst *Add1CallRes = builder.CreateCall(Add1F, Ten); + Add1CallRes->setTailCall(true); + + // Create the return instruction and add it to the basic block. + builder.CreateRet(Add1CallRes); + + // Now we create the JIT. + ExecutionEngine* EE = EngineBuilder(M).create(); + + outs() << "We just constructed this LLVM module:\n\n" << *M; + outs() << "\n\nRunning foo: "; + outs().flush(); + + // Call the `foo' function with no arguments: + std::vector<GenericValue> noargs; + GenericValue gv = EE->runFunction(FooF, noargs); + + // Import result of execution: + outs() << "Result: " << gv.IntVal << "\n"; + EE->freeMachineCodeForFunction(FooF); + delete EE; + llvm_shutdown(); + return 0; +} diff --git a/examples/HowToUseJIT/Makefile b/examples/HowToUseJIT/Makefile new file mode 100644 index 00000000000..c8919db90cc --- /dev/null +++ b/examples/HowToUseJIT/Makefile @@ -0,0 +1,15 @@ +##===- examples/HowToUseJIT/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../.. +TOOLNAME = HowToUseJIT +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := jit interpreter nativecodegen + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/CMakeLists.txt b/examples/Kaleidoscope/CMakeLists.txt new file mode 100644 index 00000000000..8c87ac50b7a --- /dev/null +++ b/examples/Kaleidoscope/CMakeLists.txt @@ -0,0 +1,6 @@ +add_subdirectory(Chapter2) +add_subdirectory(Chapter3) +add_subdirectory(Chapter4) +add_subdirectory(Chapter5) +add_subdirectory(Chapter6) +add_subdirectory(Chapter7) diff --git a/examples/Kaleidoscope/Chapter2/CMakeLists.txt b/examples/Kaleidoscope/Chapter2/CMakeLists.txt new file mode 100644 index 00000000000..79f2b172d0d --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_example(Kaleidoscope-Ch2 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter2/Makefile b/examples/Kaleidoscope/Chapter2/Makefile new file mode 100644 index 00000000000..1a9b94ce541 --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/Makefile @@ -0,0 +1,13 @@ +##===- examples/Kaleidoscope/Chapter2/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch2 +EXAMPLE_TOOL = 1 + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp new file mode 100644 index 00000000000..f4f09d0b351 --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/toy.cpp @@ -0,0 +1,398 @@ +#include <cstdio> +#include <cstdlib> +#include <string> +#include <map> +#include <vector> + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (ParseDefinition()) { + fprintf(stderr, "Parsed a function definition.\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (ParseExtern()) { + fprintf(stderr, "Parsed an extern\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (ParseTopLevelExpr()) { + fprintf(stderr, "Parsed a top-level expr\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter3/CMakeLists.txt b/examples/Kaleidoscope/Chapter3/CMakeLists.txt new file mode 100644 index 00000000000..1af8db00a17 --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core) + +add_llvm_example(Kaleidoscope-Ch3 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter3/Makefile b/examples/Kaleidoscope/Chapter3/Makefile new file mode 100644 index 00000000000..4cc6948d803 --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter3/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch3 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp new file mode 100644 index 00000000000..c1e34b2f09a --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/toy.cpp @@ -0,0 +1,563 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Analysis/Verifier.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read top-level expression:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Run the main "interpreter loop" now. + MainLoop(); + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/examples/Kaleidoscope/Chapter4/CMakeLists.txt new file mode 100644 index 00000000000..0d1ac533f02 --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch4 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter4/Makefile b/examples/Kaleidoscope/Chapter4/Makefile new file mode 100644 index 00000000000..30162d94bce --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter4/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch4 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp new file mode 100644 index 00000000000..cce4466ed57 --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -0,0 +1,613 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/TargetSelect.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt new file mode 100644 index 00000000000..2d75ad35923 --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch5 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter5/Makefile b/examples/Kaleidoscope/Chapter5/Makefile new file mode 100644 index 00000000000..d1f5e2035b4 --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter5/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch5 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp new file mode 100644 index 00000000000..36dd760e5ff --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -0,0 +1,858 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/TargetSelect.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // ... + // start = startexpr + // goto loop + // loop: + // variable = phi [start, loopheader], [nextvariable, loopend] + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // nextvariable = variable + step + // endcond = endexpr + // br endcond, loop, endloop + // outloop: + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt new file mode 100644 index 00000000000..2e15a5f7dfc --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch6 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter6/Makefile b/examples/Kaleidoscope/Chapter6/Makefile new file mode 100644 index 00000000000..a5fbcbdf9b2 --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter6/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch6 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp new file mode 100644 index 00000000000..db3495dcc98 --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -0,0 +1,976 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/TargetSelect.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10, + + // operators + tok_binary = -11, tok_unary = -12 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; +public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; +} + +/// binoprhs +/// ::= ('+' unary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= unary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, Value*> NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = { L, R }; + return Builder.CreateCall(F, Ops, "binop"); +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // ... + // start = startexpr + // goto loop + // loop: + // variable = phi [start, loopheader], [nextvariable, loopend] + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // nextvariable = variable + step + // endcond = endexpr + // br endcond, loop, endloop + // outloop: + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (Proto->isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" +double printd(double X) { + printf("%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt new file mode 100644 index 00000000000..da3839843bd --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt @@ -0,0 +1,6 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) +set(LLVM_REQUIRES_RTTI 1) + +add_llvm_example(Kaleidoscope-Ch7 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter7/Makefile b/examples/Kaleidoscope/Chapter7/Makefile new file mode 100644 index 00000000000..6cec323efd4 --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/Makefile @@ -0,0 +1,16 @@ +##===- examples/Kaleidoscope/Chapter7/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch7 +EXAMPLE_TOOL = 1 +REQUIRES_RTTI := 1 + +LINK_COMPONENTS := core jit native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp new file mode 100644 index 00000000000..143b30bf476 --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -0,0 +1,1142 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/IRBuilder.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/TargetSelect.h" +#include <cstdio> +#include <string> +#include <map> +#include <vector> +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10, + + // operators + tok_binary = -11, tok_unary = -12, + + // var definition + tok_var = -13 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + if (IdentifierStr == "var") return tok_var; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + const std::string &getName() const { return Name; } + virtual Value *Codegen(); +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; +public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector<ExprAST*> Args; +public: + CallExprAST(const std::string &callee, std::vector<ExprAST*> &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// VarExprAST - Expression class for var/in +class VarExprAST : public ExprAST { + std::vector<std::pair<std::string, ExprAST*> > VarNames; + ExprAST *Body; +public: + VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames, + ExprAST *body) + : VarNames(varnames), Body(body) {} + + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its argument names as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector<std::string> Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. +public: + PrototypeAST(const std::string &name, const std::vector<std::string> &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); + + void CreateArgumentAllocas(Function *F); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map<char, int> BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector<ExprAST*> Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// varexpr ::= 'var' identifier ('=' expression)? +// (',' identifier ('=' expression)?)* 'in' expression +static ExprAST *ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector<std::pair<std::string, ExprAST*> > VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return Error("expected identifier after var"); + + while (1) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + ExprAST *Init = 0; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (Init == 0) return 0; + } + + VarNames.push_back(std::make_pair(Name, Init)); + + // End of var list, exit loop. + if (CurTok != ',') break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return Error("expected identifier list after var"); + } + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return Error("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new VarExprAST(VarNames, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +/// ::= varexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + case tok_var: return ParseVarExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; +} + +/// binoprhs +/// ::= ('+' unary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= unary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector<std::string> ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map<std::string, AllocaInst*> NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of +/// the function. This is used for mutable variables etc. +static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + const std::string &VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + VarName.c_str()); +} + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (V == 0) return ErrorV("Unknown variable name"); + + // Load the value. + return Builder.CreateLoad(V, Name.c_str()); +} + +Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::Codegen() { + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS); + if (!LHSE) + return ErrorV("destination of '=' must be a variable"); + // Codegen the RHS. + Value *Val = RHS->Codegen(); + if (Val == 0) return 0; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (Variable == 0) return ErrorV("Unknown variable name"); + + Builder.CreateStore(Val, Variable); + return Val; + } + + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = { L, R }; + return Builder.CreateCall(F, Ops, "binop"); +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector<Value*> ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // var = alloca double + // ... + // start = startexpr + // store start -> var + // goto loop + // loop: + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // endcond = endexpr + // + // curvar = load var + // nextvar = curvar + step + // store nextvar -> var + // br endcond, loop, endloop + // outloop: + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Store the value into the alloca. + Builder.CreateStore(StartVal, Alloca); + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + AllocaInst *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Alloca; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); + Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); + Builder.CreateStore(NextVar, Alloca); + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Value *VarExprAST::Codegen() { + std::vector<AllocaInst *> OldBindings; + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second; + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->Codegen(); + if (InitVal == 0) return 0; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); + } + + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + Builder.CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->Codegen(); + if (BodyVal == 0) return 0; + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector<Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) + AI->setName(Args[Idx]); + + return F; +} + +/// CreateArgumentAllocas - Create an alloca for each argument and register the +/// argument in the symbol table so that references to it will succeed. +void PrototypeAST::CreateArgumentAllocas(Function *F) { + Function::arg_iterator AI = F->arg_begin(); + for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); + + // Store the initial value into the alloca. + Builder.CreateStore(AI, Alloca); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = Alloca; + } +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Add all arguments to the symbol table and create their allocas. + Proto->CreateArgumentAllocas(TheFunction); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (Proto->isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" +double printd(double X) { + printf("%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['='] = 2; + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + FunctionPassManager OurFPM(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Provide basic AliasAnalysis support for GVN. + OurFPM.add(createBasicAliasAnalysisPass()); + // Promote allocas to registers. + OurFPM.add(createPromoteMemoryToRegisterPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Makefile b/examples/Kaleidoscope/Makefile new file mode 100644 index 00000000000..bd0c252c2c0 --- /dev/null +++ b/examples/Kaleidoscope/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Makefile ----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL=../.. + +include $(LEVEL)/Makefile.config + +PARALLEL_DIRS:= Chapter2 Chapter3 Chapter4 Chapter5 Chapter6 Chapter7 + +include $(LEVEL)/Makefile.common diff --git a/examples/LLVMBuild.txt b/examples/LLVMBuild.txt new file mode 100644 index 00000000000..4d06ffb995f --- /dev/null +++ b/examples/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./examples/LLVMBuild.txt ---------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Group +name = Examples +parent = $ROOT diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 00000000000..50a6db76aa2 --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,32 @@ +##===- examples/Makefile -----------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL=.. + +include $(LEVEL)/Makefile.config + +PARALLEL_DIRS:= BrainF Fibonacci HowToUseJIT Kaleidoscope ModuleMaker + +ifeq ($(HAVE_PTHREAD),1) +PARALLEL_DIRS += ParallelJIT +endif + +ifeq ($(LLVM_ON_UNIX),1) + ifeq ($(ARCH),x86) + PARALLEL_DIRS += ExceptionDemo + endif + ifeq ($(ARCH),x86_64) + PARALLEL_DIRS += ExceptionDemo + endif +endif + +ifeq ($(filter $(BINDINGS_TO_BUILD),ocaml),ocaml) + PARALLEL_DIRS += OCaml-Kaleidoscope +endif + +include $(LEVEL)/Makefile.common diff --git a/examples/ModuleMaker/CMakeLists.txt b/examples/ModuleMaker/CMakeLists.txt new file mode 100644 index 00000000000..81e911560bd --- /dev/null +++ b/examples/ModuleMaker/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS bitwriter) + +add_llvm_example(ModuleMaker + ModuleMaker.cpp + ) diff --git a/examples/ModuleMaker/Makefile b/examples/ModuleMaker/Makefile new file mode 100644 index 00000000000..9454cf514dc --- /dev/null +++ b/examples/ModuleMaker/Makefile @@ -0,0 +1,14 @@ +##===- examples/ModuleMaker/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL=../.. +TOOLNAME=ModuleMaker +EXAMPLE_TOOL = 1 +LINK_COMPONENTS := bitwriter + +include $(LEVEL)/Makefile.common diff --git a/examples/ModuleMaker/ModuleMaker.cpp b/examples/ModuleMaker/ModuleMaker.cpp new file mode 100644 index 00000000000..6bc52c12a03 --- /dev/null +++ b/examples/ModuleMaker/ModuleMaker.cpp @@ -0,0 +1,64 @@ +//===- examples/ModuleMaker/ModuleMaker.cpp - Example project ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This programs is a simple example that creates an LLVM module "from scratch", +// emitting it as a bitcode file to standard out. This is just to show how +// LLVM projects work and to demonstrate some of the LLVM APIs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +int main() { + LLVMContext Context; + + // Create the "module" or "program" or "translation unit" to hold the + // function + Module *M = new Module("test", Context); + + // Create the main function: first create the type 'int ()' + FunctionType *FT = + FunctionType::get(Type::getInt32Ty(Context), /*not vararg*/false); + + // By passing a module as the last parameter to the Function constructor, + // it automatically gets appended to the Module. + Function *F = Function::Create(FT, Function::ExternalLinkage, "main", M); + + // Add a basic block to the function... again, it automatically inserts + // because of the last argument. + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", F); + + // Get pointers to the constant integers... + Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); + Value *Three = ConstantInt::get(Type::getInt32Ty(Context), 3); + + // Create the add instruction... does not insert... + Instruction *Add = BinaryOperator::Create(Instruction::Add, Two, Three, + "addresult"); + + // explicitly insert it into the basic block... + BB->getInstList().push_back(Add); + + // Create the return instruction and add it to the basic block + BB->getInstList().push_back(ReturnInst::Create(Context, Add)); + + // Output the bitcode file to stdout + WriteBitcodeToFile(M, outs()); + + // Delete the module and all of its contents. + delete M; + return 0; +} diff --git a/examples/ModuleMaker/README.txt b/examples/ModuleMaker/README.txt new file mode 100644 index 00000000000..66a5d3fe0b1 --- /dev/null +++ b/examples/ModuleMaker/README.txt @@ -0,0 +1,8 @@ +//===----------------------------------------------------------------------===// +// ModuleMaker Sample project +//===----------------------------------------------------------------------===// + +This project is an extremely simple example of using some simple pieces of the +LLVM API. The actual executable generated by this project simply emits an +LLVM bitcode file to standard output. It is designed to show some basic +usage of LLVM APIs, and how to link to LLVM libraries. diff --git a/examples/OCaml-Kaleidoscope/Chapter2/Makefile b/examples/OCaml-Kaleidoscope/Chapter2/Makefile new file mode 100644 index 00000000000..8fc03da0fbd --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter2/Makefile @@ -0,0 +1,22 @@ +##===- examples/OCaml-Kaleidoscope/Chapter2/Makefile -------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 2. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../../.. +TOOLNAME := OCaml-Kaleidoscope-Ch2 +EXAMPLE_TOOL := 1 +UsedComponents := core +UsedOcamLibs := llvm + +OCAMLCFLAGS += -pp camlp4of + +include $(LEVEL)/bindings/ocaml/Makefile.ocaml diff --git a/examples/OCaml-Kaleidoscope/Chapter2/_tags b/examples/OCaml-Kaleidoscope/Chapter2/_tags new file mode 100644 index 00000000000..7b9b80b1e94 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter2/_tags @@ -0,0 +1 @@ +<{lexer,parser}.ml>: use_camlp4, pp(camlp4of) diff --git a/examples/OCaml-Kaleidoscope/Chapter2/ast.ml b/examples/OCaml-Kaleidoscope/Chapter2/ast.ml new file mode 100644 index 00000000000..4cc2dea86b7 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter2/ast.ml @@ -0,0 +1,25 @@ +(*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + +(* expr - Base type for all expression nodes. *) +type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + +(* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) +type proto = Prototype of string * string array + +(* func - This type represents a function definition itself. *) +type func = Function of proto * expr diff --git a/examples/OCaml-Kaleidoscope/Chapter2/lexer.ml b/examples/OCaml-Kaleidoscope/Chapter2/lexer.ml new file mode 100644 index 00000000000..22a915552f0 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter2/lexer.ml @@ -0,0 +1,52 @@ +(*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + +let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + +and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + +and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | id -> [< 'Token.Ident id; stream >] + +and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] diff --git a/examples/OCaml-Kaleidoscope/Chapter2/parser.ml b/examples/OCaml-Kaleidoscope/Chapter2/parser.ml new file mode 100644 index 00000000000..83d9874a4ab --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter2/parser.ml @@ -0,0 +1,122 @@ +(*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + +(* binop_precedence - This holds the precedence for each binary operator that is + * defined *) +let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + +(* precedence - Get the precedence of the pending binary operator token. *) +let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + +(* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr *) +let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + +(* binoprhs + * ::= ('+' primary)* *) +and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_primary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + +(* expression + * ::= primary binoprhs *) +and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + +(* prototype + * ::= id '(' id* ')' *) +let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +(* definition ::= 'def' prototype expression *) +let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + +(* toplevelexpr ::= expression *) +let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + +(* external ::= 'extern' prototype *) +let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e diff --git a/examples/OCaml-Kaleidoscope/Chapter2/token.ml b/examples/OCaml-Kaleidoscope/Chapter2/token.ml new file mode 100644 index 00000000000..2ca782e1499 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter2/token.ml @@ -0,0 +1,15 @@ +(*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + +(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) +type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char diff --git a/examples/OCaml-Kaleidoscope/Chapter2/toplevel.ml b/examples/OCaml-Kaleidoscope/Chapter2/toplevel.ml new file mode 100644 index 00000000000..01c85bd70d2 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter2/toplevel.ml @@ -0,0 +1,34 @@ +(*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + +(* top ::= definition | external | expression | ';' *) +let rec main_loop stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop stream + + | Some token -> + begin + try match token with + | Token.Def -> + ignore(Parser.parse_definition stream); + print_endline "parsed a function definition."; + | Token.Extern -> + ignore(Parser.parse_extern stream); + print_endline "parsed an extern."; + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + ignore(Parser.parse_toplevel stream); + print_endline "parsed a top-level expr"; + with Stream.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop stream diff --git a/examples/OCaml-Kaleidoscope/Chapter2/toy.ml b/examples/OCaml-Kaleidoscope/Chapter2/toy.ml new file mode 100644 index 00000000000..42b19fec001 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter2/toy.ml @@ -0,0 +1,21 @@ +(*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + +let main () = + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop stream; +;; + +main () diff --git a/examples/OCaml-Kaleidoscope/Chapter3/Makefile b/examples/OCaml-Kaleidoscope/Chapter3/Makefile new file mode 100644 index 00000000000..fdbcd5191f4 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/Makefile @@ -0,0 +1,24 @@ +##===- examples/OCaml-Kaleidoscope/Chapter3/Makefile -------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 3. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../../.. +TOOLNAME := OCaml-Kaleidoscope-Ch3 +EXAMPLE_TOOL := 1 +UsedComponents := core +UsedOcamLibs := llvm llvm_analysis + +OCAMLCFLAGS += -pp camlp4of + +ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml + +include $(LEVEL)/bindings/ocaml/Makefile.ocaml diff --git a/examples/OCaml-Kaleidoscope/Chapter3/_tags b/examples/OCaml-Kaleidoscope/Chapter3/_tags new file mode 100644 index 00000000000..990490a5db2 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/_tags @@ -0,0 +1,2 @@ +<{lexer,parser}.ml>: use_camlp4, pp(camlp4of) +<*.{byte,native}>: g++, use_llvm, use_llvm_analysis diff --git a/examples/OCaml-Kaleidoscope/Chapter3/ast.ml b/examples/OCaml-Kaleidoscope/Chapter3/ast.ml new file mode 100644 index 00000000000..4cc2dea86b7 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/ast.ml @@ -0,0 +1,25 @@ +(*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + +(* expr - Base type for all expression nodes. *) +type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + +(* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) +type proto = Prototype of string * string array + +(* func - This type represents a function definition itself. *) +type func = Function of proto * expr diff --git a/examples/OCaml-Kaleidoscope/Chapter3/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter3/codegen.ml new file mode 100644 index 00000000000..275cd0bee04 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/codegen.ml @@ -0,0 +1,100 @@ +(*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + +open Llvm + +exception Error of string + +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context +let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 +let double_type = double_type context + +let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_add lhs_val rhs_val "addtmp" builder + | '-' -> build_sub lhs_val rhs_val "subtmp" builder + | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> raise (Error "invalid binary operator") + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + +let codegen_proto = function + | Ast.Prototype (name, args) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + +let codegen_func = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + the_function + with e -> + delete_function the_function; + raise e diff --git a/examples/OCaml-Kaleidoscope/Chapter3/lexer.ml b/examples/OCaml-Kaleidoscope/Chapter3/lexer.ml new file mode 100644 index 00000000000..22a915552f0 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/lexer.ml @@ -0,0 +1,52 @@ +(*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + +let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + +and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + +and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | id -> [< 'Token.Ident id; stream >] + +and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] diff --git a/examples/OCaml-Kaleidoscope/Chapter3/myocamlbuild.ml b/examples/OCaml-Kaleidoscope/Chapter3/myocamlbuild.ml new file mode 100644 index 00000000000..b71f5d717ef --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/myocamlbuild.ml @@ -0,0 +1,6 @@ +open Ocamlbuild_plugin;; + +ocaml_lib ~extern:true "llvm";; +ocaml_lib ~extern:true "llvm_analysis";; + +flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);; diff --git a/examples/OCaml-Kaleidoscope/Chapter3/parser.ml b/examples/OCaml-Kaleidoscope/Chapter3/parser.ml new file mode 100644 index 00000000000..83d9874a4ab --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/parser.ml @@ -0,0 +1,122 @@ +(*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + +(* binop_precedence - This holds the precedence for each binary operator that is + * defined *) +let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + +(* precedence - Get the precedence of the pending binary operator token. *) +let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + +(* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr *) +let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + +(* binoprhs + * ::= ('+' primary)* *) +and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_primary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + +(* expression + * ::= primary binoprhs *) +and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + +(* prototype + * ::= id '(' id* ')' *) +let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +(* definition ::= 'def' prototype expression *) +let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + +(* toplevelexpr ::= expression *) +let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + +(* external ::= 'extern' prototype *) +let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e diff --git a/examples/OCaml-Kaleidoscope/Chapter3/token.ml b/examples/OCaml-Kaleidoscope/Chapter3/token.ml new file mode 100644 index 00000000000..2ca782e1499 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/token.ml @@ -0,0 +1,15 @@ +(*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + +(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) +type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char diff --git a/examples/OCaml-Kaleidoscope/Chapter3/toplevel.ml b/examples/OCaml-Kaleidoscope/Chapter3/toplevel.ml new file mode 100644 index 00000000000..d1bf5d4c0c6 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/toplevel.ml @@ -0,0 +1,39 @@ +(*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + +open Llvm + +(* top ::= definition | external | expression | ';' *) +let rec main_loop stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + dump_value (Codegen.codegen_func e); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop stream diff --git a/examples/OCaml-Kaleidoscope/Chapter3/toy.ml b/examples/OCaml-Kaleidoscope/Chapter3/toy.ml new file mode 100644 index 00000000000..73c1a1ec62a --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter3/toy.ml @@ -0,0 +1,26 @@ +(*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + +open Llvm + +let main () = + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module +;; + +main () diff --git a/examples/OCaml-Kaleidoscope/Chapter4/Makefile b/examples/OCaml-Kaleidoscope/Chapter4/Makefile new file mode 100644 index 00000000000..d9c3f49bea6 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/Makefile @@ -0,0 +1,25 @@ +##===- examples/OCaml-Kaleidoscope/Chapter4/Makefile -------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 4. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../../.. +TOOLNAME := OCaml-Kaleidoscope-Ch4 +EXAMPLE_TOOL := 1 +UsedComponents := core +UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \ + llvm_scalar_opts + +OCAMLCFLAGS += -pp camlp4of + +ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml + +include $(LEVEL)/bindings/ocaml/Makefile.ocaml diff --git a/examples/OCaml-Kaleidoscope/Chapter4/_tags b/examples/OCaml-Kaleidoscope/Chapter4/_tags new file mode 100644 index 00000000000..7a03dba6672 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/_tags @@ -0,0 +1,4 @@ +<{lexer,parser}.ml>: use_camlp4, pp(camlp4of) +<*.{byte,native}>: g++, use_llvm, use_llvm_analysis +<*.{byte,native}>: use_llvm_executionengine, use_llvm_target +<*.{byte,native}>: use_llvm_scalar_opts, use_bindings diff --git a/examples/OCaml-Kaleidoscope/Chapter4/ast.ml b/examples/OCaml-Kaleidoscope/Chapter4/ast.ml new file mode 100644 index 00000000000..4cc2dea86b7 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/ast.ml @@ -0,0 +1,25 @@ +(*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + +(* expr - Base type for all expression nodes. *) +type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + +(* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) +type proto = Prototype of string * string array + +(* func - This type represents a function definition itself. *) +type func = Function of proto * expr diff --git a/examples/OCaml-Kaleidoscope/Chapter4/bindings.c b/examples/OCaml-Kaleidoscope/Chapter4/bindings.c new file mode 100644 index 00000000000..053513bf0c0 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/bindings.c @@ -0,0 +1,7 @@ +#include <stdio.h> + +/* putchard - putchar that takes a double and returns 0. */ +extern double putchard(double X) { + putchar((char)X); + return 0; +} diff --git a/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml new file mode 100644 index 00000000000..8957f4c610f --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml @@ -0,0 +1,103 @@ +(*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + +open Llvm + +exception Error of string + +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context +let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 +let double_type = double_type context + +let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> raise (Error "invalid binary operator") + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + +let codegen_proto = function + | Ast.Prototype (name, args) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + +let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + with e -> + delete_function the_function; + raise e diff --git a/examples/OCaml-Kaleidoscope/Chapter4/lexer.ml b/examples/OCaml-Kaleidoscope/Chapter4/lexer.ml new file mode 100644 index 00000000000..22a915552f0 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/lexer.ml @@ -0,0 +1,52 @@ +(*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + +let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + +and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + +and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | id -> [< 'Token.Ident id; stream >] + +and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] diff --git a/examples/OCaml-Kaleidoscope/Chapter4/myocamlbuild.ml b/examples/OCaml-Kaleidoscope/Chapter4/myocamlbuild.ml new file mode 100644 index 00000000000..ff42664c43b --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/myocamlbuild.ml @@ -0,0 +1,10 @@ +open Ocamlbuild_plugin;; + +ocaml_lib ~extern:true "llvm";; +ocaml_lib ~extern:true "llvm_analysis";; +ocaml_lib ~extern:true "llvm_executionengine";; +ocaml_lib ~extern:true "llvm_target";; +ocaml_lib ~extern:true "llvm_scalar_opts";; + +flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);; +dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];; diff --git a/examples/OCaml-Kaleidoscope/Chapter4/parser.ml b/examples/OCaml-Kaleidoscope/Chapter4/parser.ml new file mode 100644 index 00000000000..83d9874a4ab --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/parser.ml @@ -0,0 +1,122 @@ +(*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + +(* binop_precedence - This holds the precedence for each binary operator that is + * defined *) +let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + +(* precedence - Get the precedence of the pending binary operator token. *) +let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + +(* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr *) +let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + +(* binoprhs + * ::= ('+' primary)* *) +and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_primary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + +(* expression + * ::= primary binoprhs *) +and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + +(* prototype + * ::= id '(' id* ')' *) +let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +(* definition ::= 'def' prototype expression *) +let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + +(* toplevelexpr ::= expression *) +let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + +(* external ::= 'extern' prototype *) +let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e diff --git a/examples/OCaml-Kaleidoscope/Chapter4/token.ml b/examples/OCaml-Kaleidoscope/Chapter4/token.ml new file mode 100644 index 00000000000..2ca782e1499 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/token.ml @@ -0,0 +1,15 @@ +(*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + +(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) +type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char diff --git a/examples/OCaml-Kaleidoscope/Chapter4/toplevel.ml b/examples/OCaml-Kaleidoscope/Chapter4/toplevel.ml new file mode 100644 index 00000000000..01d24ede149 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/toplevel.ml @@ -0,0 +1,49 @@ +(*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + +open Llvm +open Llvm_executionengine + +(* top ::= definition | external | expression | ';' *) +let rec main_loop the_fpm the_execution_engine stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop the_fpm the_execution_engine stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func the_fpm e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop the_fpm the_execution_engine stream diff --git a/examples/OCaml-Kaleidoscope/Chapter4/toy.ml b/examples/OCaml-Kaleidoscope/Chapter4/toy.ml new file mode 100644 index 00000000000..5f9d912499c --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter4/toy.ml @@ -0,0 +1,53 @@ +(*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + +open Llvm +open Llvm_executionengine +open Llvm_target +open Llvm_scalar_opts + +let main () = + ignore (initialize_native_target ()); + + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combination the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module +;; + +main () diff --git a/examples/OCaml-Kaleidoscope/Chapter5/Makefile b/examples/OCaml-Kaleidoscope/Chapter5/Makefile new file mode 100644 index 00000000000..f31c10d3c2f --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/Makefile @@ -0,0 +1,25 @@ +##===- examples/OCaml-Kaleidoscope/Chapter5/Makefile -------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 5. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../../.. +TOOLNAME := OCaml-Kaleidoscope-Ch5 +EXAMPLE_TOOL := 1 +UsedComponents := core +UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \ + llvm_scalar_opts + +OCAMLCFLAGS += -pp camlp4of + +ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml + +include $(LEVEL)/bindings/ocaml/Makefile.ocaml diff --git a/examples/OCaml-Kaleidoscope/Chapter5/_tags b/examples/OCaml-Kaleidoscope/Chapter5/_tags new file mode 100644 index 00000000000..7a03dba6672 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/_tags @@ -0,0 +1,4 @@ +<{lexer,parser}.ml>: use_camlp4, pp(camlp4of) +<*.{byte,native}>: g++, use_llvm, use_llvm_analysis +<*.{byte,native}>: use_llvm_executionengine, use_llvm_target +<*.{byte,native}>: use_llvm_scalar_opts, use_bindings diff --git a/examples/OCaml-Kaleidoscope/Chapter5/ast.ml b/examples/OCaml-Kaleidoscope/Chapter5/ast.ml new file mode 100644 index 00000000000..8a6703db837 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/ast.ml @@ -0,0 +1,31 @@ +(*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + +(* expr - Base type for all expression nodes. *) +type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* variant for if/then/else. *) + | If of expr * expr * expr + + (* variant for for/in. *) + | For of string * expr * expr * expr option * expr + +(* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) +type proto = Prototype of string * string array + +(* func - This type represents a function definition itself. *) +type func = Function of proto * expr diff --git a/examples/OCaml-Kaleidoscope/Chapter5/bindings.c b/examples/OCaml-Kaleidoscope/Chapter5/bindings.c new file mode 100644 index 00000000000..053513bf0c0 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/bindings.c @@ -0,0 +1,7 @@ +#include <stdio.h> + +/* putchard - putchar that takes a double and returns 0. */ +extern double putchard(double X) { + putchar((char)X); + return 0; +} diff --git a/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml new file mode 100644 index 00000000000..e4570a65759 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml @@ -0,0 +1,225 @@ +(*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + +open Llvm + +exception Error of string + +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context +let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 +let double_type = double_type context + +let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> raise (Error "invalid binary operator") + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + | Ast.If (cond, then_, else_) -> + let cond = codegen_expr cond in + + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = const_float double_type 0.0 in + let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in + + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = insertion_block builder in + let the_function = block_parent start_bb in + + let then_bb = append_block context "then" the_function in + + (* Emit 'then' value. *) + position_at_end then_bb builder; + let then_val = codegen_expr then_ in + + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = insertion_block builder in + + (* Emit 'else' value. *) + let else_bb = append_block context "else" the_function in + position_at_end else_bb builder; + let else_val = codegen_expr else_ in + + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = insertion_block builder in + + (* Emit merge block. *) + let merge_bb = append_block context "ifcont" the_function in + position_at_end merge_bb builder; + let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in + let phi = build_phi incoming "iftmp" builder in + + (* Return to the start block to add the conditional branch. *) + position_at_end start_bb builder; + ignore (build_cond_br cond_val then_bb else_bb builder); + + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + position_at_end new_then_bb builder; ignore (build_br merge_bb builder); + position_at_end new_else_bb builder; ignore (build_br merge_bb builder); + + (* Finally, set the builder to the end of the merge block. *) + position_at_end merge_bb builder; + + phi + | Ast.For (var_name, start, end_, step, body) -> + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + + (* Make the new basic block for the loop header, inserting after current + * block. *) + let preheader_bb = insertion_block builder in + let the_function = block_parent preheader_bb in + let loop_bb = append_block context "loop" the_function in + + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + ignore (build_br loop_bb builder); + + (* Start insertion in loop_bb. *) + position_at_end loop_bb builder; + + (* Start the PHI node with an entry for start. *) + let variable = build_phi [(start_val, preheader_bb)] var_name builder in + + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = + try Some (Hashtbl.find named_values var_name) with Not_found -> None + in + Hashtbl.add named_values var_name variable; + + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + ignore (codegen_expr body); + + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> const_float double_type 1.0 + in + + let next_var = build_add variable step_val "nextvar" builder in + + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = const_float double_type 0.0 in + let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in + + (* Create the "after loop" block and insert it. *) + let loop_end_bb = insertion_block builder in + let after_bb = append_block context "afterloop" the_function in + + (* Insert the conditional branch into the end of loop_end_bb. *) + ignore (build_cond_br end_cond loop_bb after_bb builder); + + (* Any new code will be inserted in after_bb. *) + position_at_end after_bb builder; + + (* Add a new entry to the PHI node for the backedge. *) + add_incoming (next_var, loop_end_bb) variable; + + (* Restore the unshadowed variable. *) + begin match old_val with + | Some old_val -> Hashtbl.add named_values var_name old_val + | None -> () + end; + + (* for expr always returns 0.0. *) + const_null double_type + +let codegen_proto = function + | Ast.Prototype (name, args) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + +let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + with e -> + delete_function the_function; + raise e diff --git a/examples/OCaml-Kaleidoscope/Chapter5/lexer.ml b/examples/OCaml-Kaleidoscope/Chapter5/lexer.ml new file mode 100644 index 00000000000..d8c1d563010 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/lexer.ml @@ -0,0 +1,57 @@ +(*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + +let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + +and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + +and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | "if" -> [< 'Token.If; stream >] + | "then" -> [< 'Token.Then; stream >] + | "else" -> [< 'Token.Else; stream >] + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | id -> [< 'Token.Ident id; stream >] + +and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] diff --git a/examples/OCaml-Kaleidoscope/Chapter5/myocamlbuild.ml b/examples/OCaml-Kaleidoscope/Chapter5/myocamlbuild.ml new file mode 100644 index 00000000000..ff42664c43b --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/myocamlbuild.ml @@ -0,0 +1,10 @@ +open Ocamlbuild_plugin;; + +ocaml_lib ~extern:true "llvm";; +ocaml_lib ~extern:true "llvm_analysis";; +ocaml_lib ~extern:true "llvm_executionengine";; +ocaml_lib ~extern:true "llvm_target";; +ocaml_lib ~extern:true "llvm_scalar_opts";; + +flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);; +dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];; diff --git a/examples/OCaml-Kaleidoscope/Chapter5/parser.ml b/examples/OCaml-Kaleidoscope/Chapter5/parser.ml new file mode 100644 index 00000000000..bfb4f168458 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/parser.ml @@ -0,0 +1,158 @@ +(*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + +(* binop_precedence - This holds the precedence for each binary operator that is + * defined *) +let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + +(* precedence - Get the precedence of the pending binary operator token. *) +let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + +(* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr + * ::= ifexpr + * ::= forexpr *) +let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) + | [< 'Token.If; c=parse_expr; + 'Token.Then ?? "expected 'then'"; t=parse_expr; + 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> + Ast.If (c, t, e) + + (* forexpr + ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *) + | [< 'Token.For; + 'Token.Ident id ?? "expected identifier after for"; + 'Token.Kwd '=' ?? "expected '=' after for"; + stream >] -> + begin parser + | [< + start=parse_expr; + 'Token.Kwd ',' ?? "expected ',' after for"; + end_=parse_expr; + stream >] -> + let step = + begin parser + | [< 'Token.Kwd ','; step=parse_expr >] -> Some step + | [< >] -> None + end stream + in + begin parser + | [< 'Token.In; body=parse_expr >] -> + Ast.For (id, start, end_, step, body) + | [< >] -> + raise (Stream.Error "expected 'in' after for") + end stream + | [< >] -> + raise (Stream.Error "expected '=' after for") + end stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + +(* binoprhs + * ::= ('+' primary)* *) +and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_primary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + +(* expression + * ::= primary binoprhs *) +and parse_expr = parser + | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream + +(* prototype + * ::= id '(' id* ')' *) +let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +(* definition ::= 'def' prototype expression *) +let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + +(* toplevelexpr ::= expression *) +let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + +(* external ::= 'extern' prototype *) +let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e diff --git a/examples/OCaml-Kaleidoscope/Chapter5/token.ml b/examples/OCaml-Kaleidoscope/Chapter5/token.ml new file mode 100644 index 00000000000..5eb502fb894 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/token.ml @@ -0,0 +1,19 @@ +(*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + +(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) +type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + + (* control *) + | If | Then | Else + | For | In diff --git a/examples/OCaml-Kaleidoscope/Chapter5/toplevel.ml b/examples/OCaml-Kaleidoscope/Chapter5/toplevel.ml new file mode 100644 index 00000000000..01d24ede149 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/toplevel.ml @@ -0,0 +1,49 @@ +(*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + +open Llvm +open Llvm_executionengine + +(* top ::= definition | external | expression | ';' *) +let rec main_loop the_fpm the_execution_engine stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop the_fpm the_execution_engine stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func the_fpm e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop the_fpm the_execution_engine stream diff --git a/examples/OCaml-Kaleidoscope/Chapter5/toy.ml b/examples/OCaml-Kaleidoscope/Chapter5/toy.ml new file mode 100644 index 00000000000..5f9d912499c --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter5/toy.ml @@ -0,0 +1,53 @@ +(*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + +open Llvm +open Llvm_executionengine +open Llvm_target +open Llvm_scalar_opts + +let main () = + ignore (initialize_native_target ()); + + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combination the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module +;; + +main () diff --git a/examples/OCaml-Kaleidoscope/Chapter6/Makefile b/examples/OCaml-Kaleidoscope/Chapter6/Makefile new file mode 100644 index 00000000000..21f0c53df4b --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/Makefile @@ -0,0 +1,34 @@ +##===- examples/OCaml-Kaleidoscope/Chapter6/Makefile -------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 6. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../../.. +TOOLNAME := OCaml-Kaleidoscope-Ch6 +EXAMPLE_TOOL := 1 +UsedComponents := core +UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \ + llvm_scalar_opts + +OCAMLCFLAGS += -pp camlp4of + +OcamlSources1 = \ + $(PROJ_SRC_DIR)/ast.ml \ + $(PROJ_SRC_DIR)/parser.ml \ + $(PROJ_SRC_DIR)/codegen.ml \ + $(PROJ_SRC_DIR)/lexer.ml \ + $(PROJ_SRC_DIR)/token.ml \ + $(PROJ_SRC_DIR)/toplevel.ml \ + $(PROJ_SRC_DIR)/toy.ml + +ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml + +include $(LEVEL)/bindings/ocaml/Makefile.ocaml diff --git a/examples/OCaml-Kaleidoscope/Chapter6/_tags b/examples/OCaml-Kaleidoscope/Chapter6/_tags new file mode 100644 index 00000000000..7a03dba6672 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/_tags @@ -0,0 +1,4 @@ +<{lexer,parser}.ml>: use_camlp4, pp(camlp4of) +<*.{byte,native}>: g++, use_llvm, use_llvm_analysis +<*.{byte,native}>: use_llvm_executionengine, use_llvm_target +<*.{byte,native}>: use_llvm_scalar_opts, use_bindings diff --git a/examples/OCaml-Kaleidoscope/Chapter6/ast.ml b/examples/OCaml-Kaleidoscope/Chapter6/ast.ml new file mode 100644 index 00000000000..99088cfd2f5 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/ast.ml @@ -0,0 +1,36 @@ +(*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + +(* expr - Base type for all expression nodes. *) +type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a unary operator. *) + | Unary of char * expr + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* variant for if/then/else. *) + | If of expr * expr * expr + + (* variant for for/in. *) + | For of string * expr * expr * expr option * expr + +(* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) +type proto = + | Prototype of string * string array + | BinOpPrototype of string * string array * int + +(* func - This type represents a function definition itself. *) +type func = Function of proto * expr diff --git a/examples/OCaml-Kaleidoscope/Chapter6/bindings.c b/examples/OCaml-Kaleidoscope/Chapter6/bindings.c new file mode 100644 index 00000000000..90faed15813 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/bindings.c @@ -0,0 +1,13 @@ +#include <stdio.h> + +/* putchard - putchar that takes a double and returns 0. */ +extern double putchard(double X) { + putchar((char)X); + return 0; +} + +/* printd - printf that takes a double prints it as "%f\n", returning 0. */ +extern double printd(double X) { + printf("%f\n", X); + return 0; +} diff --git a/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml new file mode 100644 index 00000000000..96674359440 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml @@ -0,0 +1,251 @@ +(*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + +open Llvm + +exception Error of string + +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context +let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 +let double_type = double_type context + +let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + (try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name")) + | Ast.Unary (op, operand) -> + let operand = codegen_expr operand in + let callee = "unary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown unary operator") + in + build_call callee [|operand|] "unop" builder + | Ast.Binary (op, lhs, rhs) -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> + (* If it wasn't a builtin binary operator, it must be a user defined + * one. Emit a call to it. *) + let callee = "binary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "binary operator not found!") + in + build_call callee [|lhs_val; rhs_val|] "binop" builder + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + | Ast.If (cond, then_, else_) -> + let cond = codegen_expr cond in + + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = const_float double_type 0.0 in + let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in + + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = insertion_block builder in + let the_function = block_parent start_bb in + + let then_bb = append_block context "then" the_function in + + (* Emit 'then' value. *) + position_at_end then_bb builder; + let then_val = codegen_expr then_ in + + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = insertion_block builder in + + (* Emit 'else' value. *) + let else_bb = append_block context "else" the_function in + position_at_end else_bb builder; + let else_val = codegen_expr else_ in + + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = insertion_block builder in + + (* Emit merge block. *) + let merge_bb = append_block context "ifcont" the_function in + position_at_end merge_bb builder; + let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in + let phi = build_phi incoming "iftmp" builder in + + (* Return to the start block to add the conditional branch. *) + position_at_end start_bb builder; + ignore (build_cond_br cond_val then_bb else_bb builder); + + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + position_at_end new_then_bb builder; ignore (build_br merge_bb builder); + position_at_end new_else_bb builder; ignore (build_br merge_bb builder); + + (* Finally, set the builder to the end of the merge block. *) + position_at_end merge_bb builder; + + phi + | Ast.For (var_name, start, end_, step, body) -> + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + + (* Make the new basic block for the loop header, inserting after current + * block. *) + let preheader_bb = insertion_block builder in + let the_function = block_parent preheader_bb in + let loop_bb = append_block context "loop" the_function in + + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + ignore (build_br loop_bb builder); + + (* Start insertion in loop_bb. *) + position_at_end loop_bb builder; + + (* Start the PHI node with an entry for start. *) + let variable = build_phi [(start_val, preheader_bb)] var_name builder in + + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = + try Some (Hashtbl.find named_values var_name) with Not_found -> None + in + Hashtbl.add named_values var_name variable; + + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + ignore (codegen_expr body); + + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> const_float double_type 1.0 + in + + let next_var = build_add variable step_val "nextvar" builder in + + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = const_float double_type 0.0 in + let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in + + (* Create the "after loop" block and insert it. *) + let loop_end_bb = insertion_block builder in + let after_bb = append_block context "afterloop" the_function in + + (* Insert the conditional branch into the end of loop_end_bb. *) + ignore (build_cond_br end_cond loop_bb after_bb builder); + + (* Any new code will be inserted in after_bb. *) + position_at_end after_bb builder; + + (* Add a new entry to the PHI node for the backedge. *) + add_incoming (next_var, loop_end_bb) variable; + + (* Restore the unshadowed variable. *) + begin match old_val with + | Some old_val -> Hashtbl.add named_values var_name old_val + | None -> () + end; + + (* for expr always returns 0.0. *) + const_null double_type + +let codegen_proto = function + | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + +let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* If this is an operator, install it. *) + begin match proto with + | Ast.BinOpPrototype (name, args, prec) -> + let op = name.[String.length name - 1] in + Hashtbl.add Parser.binop_precedence op prec; + | _ -> () + end; + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + with e -> + delete_function the_function; + raise e diff --git a/examples/OCaml-Kaleidoscope/Chapter6/lexer.ml b/examples/OCaml-Kaleidoscope/Chapter6/lexer.ml new file mode 100644 index 00000000000..5fe9da4f16c --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/lexer.ml @@ -0,0 +1,59 @@ +(*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + +let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + +and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + +and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | "if" -> [< 'Token.If; stream >] + | "then" -> [< 'Token.Then; stream >] + | "else" -> [< 'Token.Else; stream >] + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | "binary" -> [< 'Token.Binary; stream >] + | "unary" -> [< 'Token.Unary; stream >] + | id -> [< 'Token.Ident id; stream >] + +and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] diff --git a/examples/OCaml-Kaleidoscope/Chapter6/myocamlbuild.ml b/examples/OCaml-Kaleidoscope/Chapter6/myocamlbuild.ml new file mode 100644 index 00000000000..54d3fd97709 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/myocamlbuild.ml @@ -0,0 +1,10 @@ +open Ocamlbuild_plugin;; + +ocaml_lib ~extern:true "llvm";; +ocaml_lib ~extern:true "llvm_analysis";; +ocaml_lib ~extern:true "llvm_executionengine";; +ocaml_lib ~extern:true "llvm_target";; +ocaml_lib ~extern:true "llvm_scalar_opts";; + +flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);; +dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];; diff --git a/examples/OCaml-Kaleidoscope/Chapter6/parser.ml b/examples/OCaml-Kaleidoscope/Chapter6/parser.ml new file mode 100644 index 00000000000..da443c5bb68 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/parser.ml @@ -0,0 +1,195 @@ +(*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + +(* binop_precedence - This holds the precedence for each binary operator that is + * defined *) +let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + +(* precedence - Get the precedence of the pending binary operator token. *) +let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + +(* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr + * ::= ifexpr + * ::= forexpr *) +let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) + | [< 'Token.If; c=parse_expr; + 'Token.Then ?? "expected 'then'"; t=parse_expr; + 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> + Ast.If (c, t, e) + + (* forexpr + ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *) + | [< 'Token.For; + 'Token.Ident id ?? "expected identifier after for"; + 'Token.Kwd '=' ?? "expected '=' after for"; + stream >] -> + begin parser + | [< + start=parse_expr; + 'Token.Kwd ',' ?? "expected ',' after for"; + end_=parse_expr; + stream >] -> + let step = + begin parser + | [< 'Token.Kwd ','; step=parse_expr >] -> Some step + | [< >] -> None + end stream + in + begin parser + | [< 'Token.In; body=parse_expr >] -> + Ast.For (id, start, end_, step, body) + | [< >] -> + raise (Stream.Error "expected 'in' after for") + end stream + | [< >] -> + raise (Stream.Error "expected '=' after for") + end stream + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + +(* unary + * ::= primary + * ::= '!' unary *) +and parse_unary = parser + (* If this is a unary operator, read it. *) + | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] -> + Ast.Unary (op, operand) + + (* If the current token is not an operator, it must be a primary expr. *) + | [< stream >] -> parse_primary stream + +(* binoprhs + * ::= ('+' primary)* *) +and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the unary expression after the binary operator. *) + let rhs = parse_unary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + +(* expression + * ::= primary binoprhs *) +and parse_expr = parser + | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream + +(* prototype + * ::= id '(' id* ')' + * ::= binary LETTER number? (id, id) + * ::= unary LETTER number? (id) *) +let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + let parse_operator = parser + | [< 'Token.Unary >] -> "unary", 1 + | [< 'Token.Binary >] -> "binary", 2 + in + let parse_binary_precedence = parser + | [< 'Token.Number n >] -> int_of_float n + | [< >] -> 30 + in + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + | [< (prefix, kind)=parse_operator; + 'Token.Kwd op ?? "expected an operator"; + (* Read the precedence if present. *) + binary_precedence=parse_binary_precedence; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + let name = prefix ^ (String.make 1 op) in + let args = Array.of_list (List.rev args) in + + (* Verify right number of arguments for operator. *) + if Array.length args != kind + then raise (Stream.Error "invalid number of operands for operator") + else + if kind == 1 then + Ast.Prototype (name, args) + else + Ast.BinOpPrototype (name, args, binary_precedence) + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +(* definition ::= 'def' prototype expression *) +let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + +(* toplevelexpr ::= expression *) +let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + +(* external ::= 'extern' prototype *) +let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e diff --git a/examples/OCaml-Kaleidoscope/Chapter6/token.ml b/examples/OCaml-Kaleidoscope/Chapter6/token.ml new file mode 100644 index 00000000000..c7a5f95092d --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/token.ml @@ -0,0 +1,22 @@ +(*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + +(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) +type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + + (* control *) + | If | Then | Else + | For | In + + (* operators *) + | Binary | Unary diff --git a/examples/OCaml-Kaleidoscope/Chapter6/toplevel.ml b/examples/OCaml-Kaleidoscope/Chapter6/toplevel.ml new file mode 100644 index 00000000000..01d24ede149 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/toplevel.ml @@ -0,0 +1,49 @@ +(*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + +open Llvm +open Llvm_executionengine + +(* top ::= definition | external | expression | ';' *) +let rec main_loop the_fpm the_execution_engine stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop the_fpm the_execution_engine stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func the_fpm e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop the_fpm the_execution_engine stream diff --git a/examples/OCaml-Kaleidoscope/Chapter6/toy.ml b/examples/OCaml-Kaleidoscope/Chapter6/toy.ml new file mode 100644 index 00000000000..5f9d912499c --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter6/toy.ml @@ -0,0 +1,53 @@ +(*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + +open Llvm +open Llvm_executionengine +open Llvm_target +open Llvm_scalar_opts + +let main () = + ignore (initialize_native_target ()); + + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combination the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module +;; + +main () diff --git a/examples/OCaml-Kaleidoscope/Chapter7/Makefile b/examples/OCaml-Kaleidoscope/Chapter7/Makefile new file mode 100644 index 00000000000..99686e17ea8 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/Makefile @@ -0,0 +1,34 @@ +##===- examples/OCaml-Kaleidoscope/Chapter7/Makefile -------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 7. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../../.. +TOOLNAME := OCaml-Kaleidoscope-Ch7 +EXAMPLE_TOOL := 1 +UsedComponents := core +UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \ + llvm_scalar_opts + +OCAMLCFLAGS += -pp camlp4of + +OcamlSources1 = \ + $(PROJ_SRC_DIR)/ast.ml \ + $(PROJ_SRC_DIR)/parser.ml \ + $(PROJ_SRC_DIR)/codegen.ml \ + $(PROJ_SRC_DIR)/lexer.ml \ + $(PROJ_SRC_DIR)/token.ml \ + $(PROJ_SRC_DIR)/toplevel.ml \ + $(PROJ_SRC_DIR)/toy.ml + +ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml + +include $(LEVEL)/bindings/ocaml/Makefile.ocaml diff --git a/examples/OCaml-Kaleidoscope/Chapter7/_tags b/examples/OCaml-Kaleidoscope/Chapter7/_tags new file mode 100644 index 00000000000..7a03dba6672 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/_tags @@ -0,0 +1,4 @@ +<{lexer,parser}.ml>: use_camlp4, pp(camlp4of) +<*.{byte,native}>: g++, use_llvm, use_llvm_analysis +<*.{byte,native}>: use_llvm_executionengine, use_llvm_target +<*.{byte,native}>: use_llvm_scalar_opts, use_bindings diff --git a/examples/OCaml-Kaleidoscope/Chapter7/ast.ml b/examples/OCaml-Kaleidoscope/Chapter7/ast.ml new file mode 100644 index 00000000000..c699e8074a4 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/ast.ml @@ -0,0 +1,39 @@ +(*===----------------------------------------------------------------------=== + * Abstract Syntax Tree (aka Parse Tree) + *===----------------------------------------------------------------------===*) + +(* expr - Base type for all expression nodes. *) +type expr = + (* variant for numeric literals like "1.0". *) + | Number of float + + (* variant for referencing a variable, like "a". *) + | Variable of string + + (* variant for a unary operator. *) + | Unary of char * expr + + (* variant for a binary operator. *) + | Binary of char * expr * expr + + (* variant for function calls. *) + | Call of string * expr array + + (* variant for if/then/else. *) + | If of expr * expr * expr + + (* variant for for/in. *) + | For of string * expr * expr * expr option * expr + + (* variant for var/in. *) + | Var of (string * expr option) array * expr + +(* proto - This type represents the "prototype" for a function, which captures + * its name, and its argument names (thus implicitly the number of arguments the + * function takes). *) +type proto = + | Prototype of string * string array + | BinOpPrototype of string * string array * int + +(* func - This type represents a function definition itself. *) +type func = Function of proto * expr diff --git a/examples/OCaml-Kaleidoscope/Chapter7/bindings.c b/examples/OCaml-Kaleidoscope/Chapter7/bindings.c new file mode 100644 index 00000000000..90faed15813 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/bindings.c @@ -0,0 +1,13 @@ +#include <stdio.h> + +/* putchard - putchar that takes a double and returns 0. */ +extern double putchard(double X) { + putchar((char)X); + return 0; +} + +/* printd - printf that takes a double prints it as "%f\n", returning 0. */ +extern double printd(double X) { + printf("%f\n", X); + return 0; +} diff --git a/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml new file mode 100644 index 00000000000..e66396eb6ac --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml @@ -0,0 +1,370 @@ +(*===----------------------------------------------------------------------=== + * Code Generation + *===----------------------------------------------------------------------===*) + +open Llvm + +exception Error of string + +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context +let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 +let double_type = double_type context + +(* Create an alloca instruction in the entry block of the function. This + * is used for mutable variables etc. *) +let create_entry_block_alloca the_function var_name = + let builder = builder_at context (instr_begin (entry_block the_function)) in + build_alloca double_type var_name builder + +let rec codegen_expr = function + | Ast.Number n -> const_float double_type n + | Ast.Variable name -> + let v = try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name") + in + (* Load the value. *) + build_load v name builder + | Ast.Unary (op, operand) -> + let operand = codegen_expr operand in + let callee = "unary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown unary operator") + in + build_call callee [|operand|] "unop" builder + | Ast.Binary (op, lhs, rhs) -> + begin match op with + | '=' -> + (* Special case '=' because we don't want to emit the LHS as an + * expression. *) + let name = + match lhs with + | Ast.Variable name -> name + | _ -> raise (Error "destination of '=' must be a variable") + in + + (* Codegen the rhs. *) + let val_ = codegen_expr rhs in + + (* Lookup the name. *) + let variable = try Hashtbl.find named_values name with + | Not_found -> raise (Error "unknown variable name") + in + ignore(build_store val_ variable builder); + val_ + | _ -> + let lhs_val = codegen_expr lhs in + let rhs_val = codegen_expr rhs in + begin + match op with + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder + | '<' -> + (* Convert bool 0/1 to double 0.0 or 1.0 *) + let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in + build_uitofp i double_type "booltmp" builder + | _ -> + (* If it wasn't a builtin binary operator, it must be a user defined + * one. Emit a call to it. *) + let callee = "binary" ^ (String.make 1 op) in + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "binary operator not found!") + in + build_call callee [|lhs_val; rhs_val|] "binop" builder + end + end + | Ast.Call (callee, args) -> + (* Look up the name in the module table. *) + let callee = + match lookup_function callee the_module with + | Some callee -> callee + | None -> raise (Error "unknown function referenced") + in + let params = params callee in + + (* If argument mismatch error. *) + if Array.length params == Array.length args then () else + raise (Error "incorrect # arguments passed"); + let args = Array.map codegen_expr args in + build_call callee args "calltmp" builder + | Ast.If (cond, then_, else_) -> + let cond = codegen_expr cond in + + (* Convert condition to a bool by comparing equal to 0.0 *) + let zero = const_float double_type 0.0 in + let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in + + (* Grab the first block so that we might later add the conditional branch + * to it at the end of the function. *) + let start_bb = insertion_block builder in + let the_function = block_parent start_bb in + + let then_bb = append_block context "then" the_function in + + (* Emit 'then' value. *) + position_at_end then_bb builder; + let then_val = codegen_expr then_ in + + (* Codegen of 'then' can change the current block, update then_bb for the + * phi. We create a new name because one is used for the phi node, and the + * other is used for the conditional branch. *) + let new_then_bb = insertion_block builder in + + (* Emit 'else' value. *) + let else_bb = append_block context "else" the_function in + position_at_end else_bb builder; + let else_val = codegen_expr else_ in + + (* Codegen of 'else' can change the current block, update else_bb for the + * phi. *) + let new_else_bb = insertion_block builder in + + (* Emit merge block. *) + let merge_bb = append_block context "ifcont" the_function in + position_at_end merge_bb builder; + let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in + let phi = build_phi incoming "iftmp" builder in + + (* Return to the start block to add the conditional branch. *) + position_at_end start_bb builder; + ignore (build_cond_br cond_val then_bb else_bb builder); + + (* Set a unconditional branch at the end of the 'then' block and the + * 'else' block to the 'merge' block. *) + position_at_end new_then_bb builder; ignore (build_br merge_bb builder); + position_at_end new_else_bb builder; ignore (build_br merge_bb builder); + + (* Finally, set the builder to the end of the merge block. *) + position_at_end merge_bb builder; + + phi + | Ast.For (var_name, start, end_, step, body) -> + (* Output this as: + * var = alloca double + * ... + * start = startexpr + * store start -> var + * goto loop + * loop: + * ... + * bodyexpr + * ... + * loopend: + * step = stepexpr + * endcond = endexpr + * + * curvar = load var + * nextvar = curvar + step + * store nextvar -> var + * br endcond, loop, endloop + * outloop: *) + + let the_function = block_parent (insertion_block builder) in + + (* Create an alloca for the variable in the entry block. *) + let alloca = create_entry_block_alloca the_function var_name in + + (* Emit the start code first, without 'variable' in scope. *) + let start_val = codegen_expr start in + + (* Store the value into the alloca. *) + ignore(build_store start_val alloca builder); + + (* Make the new basic block for the loop header, inserting after current + * block. *) + let loop_bb = append_block context "loop" the_function in + + (* Insert an explicit fall through from the current block to the + * loop_bb. *) + ignore (build_br loop_bb builder); + + (* Start insertion in loop_bb. *) + position_at_end loop_bb builder; + + (* Within the loop, the variable is defined equal to the PHI node. If it + * shadows an existing variable, we have to restore it, so save it + * now. *) + let old_val = + try Some (Hashtbl.find named_values var_name) with Not_found -> None + in + Hashtbl.add named_values var_name alloca; + + (* Emit the body of the loop. This, like any other expr, can change the + * current BB. Note that we ignore the value computed by the body, but + * don't allow an error *) + ignore (codegen_expr body); + + (* Emit the step value. *) + let step_val = + match step with + | Some step -> codegen_expr step + (* If not specified, use 1.0. *) + | None -> const_float double_type 1.0 + in + + (* Compute the end condition. *) + let end_cond = codegen_expr end_ in + + (* Reload, increment, and restore the alloca. This handles the case where + * the body of the loop mutates the variable. *) + let cur_var = build_load alloca var_name builder in + let next_var = build_add cur_var step_val "nextvar" builder in + ignore(build_store next_var alloca builder); + + (* Convert condition to a bool by comparing equal to 0.0. *) + let zero = const_float double_type 0.0 in + let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in + + (* Create the "after loop" block and insert it. *) + let after_bb = append_block context "afterloop" the_function in + + (* Insert the conditional branch into the end of loop_end_bb. *) + ignore (build_cond_br end_cond loop_bb after_bb builder); + + (* Any new code will be inserted in after_bb. *) + position_at_end after_bb builder; + + (* Restore the unshadowed variable. *) + begin match old_val with + | Some old_val -> Hashtbl.add named_values var_name old_val + | None -> () + end; + + (* for expr always returns 0.0. *) + const_null double_type + | Ast.Var (var_names, body) -> + let old_bindings = ref [] in + + let the_function = block_parent (insertion_block builder) in + + (* Register all variables and emit their initializer. *) + Array.iter (fun (var_name, init) -> + (* Emit the initializer before adding the variable to scope, this + * prevents the initializer from referencing the variable itself, and + * permits stuff like this: + * var a = 1 in + * var a = a in ... # refers to outer 'a'. *) + let init_val = + match init with + | Some init -> codegen_expr init + (* If not specified, use 0.0. *) + | None -> const_float double_type 0.0 + in + + let alloca = create_entry_block_alloca the_function var_name in + ignore(build_store init_val alloca builder); + + (* Remember the old variable binding so that we can restore the binding + * when we unrecurse. *) + begin + try + let old_value = Hashtbl.find named_values var_name in + old_bindings := (var_name, old_value) :: !old_bindings; + with Not_found -> () + end; + + (* Remember this binding. *) + Hashtbl.add named_values var_name alloca; + ) var_names; + + (* Codegen the body, now that all vars are in scope. *) + let body_val = codegen_expr body in + + (* Pop all our variables from scope. *) + List.iter (fun (var_name, old_value) -> + Hashtbl.add named_values var_name old_value + ) !old_bindings; + + (* Return the body computation. *) + body_val + +let codegen_proto = function + | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -> + (* Make the function type: double(double,double) etc. *) + let doubles = Array.make (Array.length args) double_type in + let ft = function_type double_type doubles in + let f = + match lookup_function name the_module with + | None -> declare_function name ft the_module + + (* If 'f' conflicted, there was already something named 'name'. If it + * has a body, don't allow redefinition or reextern. *) + | Some f -> + (* If 'f' already has a body, reject this. *) + if block_begin f <> At_end f then + raise (Error "redefinition of function"); + + (* If 'f' took a different number of arguments, reject. *) + if element_type (type_of f) <> ft then + raise (Error "redefinition of function with different # args"); + f + in + + (* Set names for all arguments. *) + Array.iteri (fun i a -> + let n = args.(i) in + set_value_name n a; + Hashtbl.add named_values n a; + ) (params f); + f + +(* Create an alloca for each argument and register the argument in the symbol + * table so that references to it will succeed. *) +let create_argument_allocas the_function proto = + let args = match proto with + | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args + in + Array.iteri (fun i ai -> + let var_name = args.(i) in + (* Create an alloca for this variable. *) + let alloca = create_entry_block_alloca the_function var_name in + + (* Store the initial value into the alloca. *) + ignore(build_store ai alloca builder); + + (* Add arguments to variable symbol table. *) + Hashtbl.add named_values var_name alloca; + ) (params the_function) + +let codegen_func the_fpm = function + | Ast.Function (proto, body) -> + Hashtbl.clear named_values; + let the_function = codegen_proto proto in + + (* If this is an operator, install it. *) + begin match proto with + | Ast.BinOpPrototype (name, args, prec) -> + let op = name.[String.length name - 1] in + Hashtbl.add Parser.binop_precedence op prec; + | _ -> () + end; + + (* Create a new basic block to start insertion into. *) + let bb = append_block context "entry" the_function in + position_at_end bb builder; + + try + (* Add all arguments to the symbol table and create their allocas. *) + create_argument_allocas the_function proto; + + let ret_val = codegen_expr body in + + (* Finish off the function. *) + let _ = build_ret ret_val builder in + + (* Validate the generated code, checking for consistency. *) + Llvm_analysis.assert_valid_function the_function; + + (* Optimize the function. *) + let _ = PassManager.run_function the_function the_fpm in + + the_function + with e -> + delete_function the_function; + raise e diff --git a/examples/OCaml-Kaleidoscope/Chapter7/lexer.ml b/examples/OCaml-Kaleidoscope/Chapter7/lexer.ml new file mode 100644 index 00000000000..922cabf0428 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/lexer.ml @@ -0,0 +1,60 @@ +(*===----------------------------------------------------------------------=== + * Lexer + *===----------------------------------------------------------------------===*) + +let rec lex = parser + (* Skip any whitespace. *) + | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream + + (* identifier: [a-zA-Z][a-zA-Z0-9] *) + | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_ident buffer stream + + (* number: [0-9.]+ *) + | [< ' ('0' .. '9' as c); stream >] -> + let buffer = Buffer.create 1 in + Buffer.add_char buffer c; + lex_number buffer stream + + (* Comment until end of line. *) + | [< ' ('#'); stream >] -> + lex_comment stream + + (* Otherwise, just return the character as its ascii value. *) + | [< 'c; stream >] -> + [< 'Token.Kwd c; lex stream >] + + (* end of stream. *) + | [< >] -> [< >] + +and lex_number buffer = parser + | [< ' ('0' .. '9' | '.' as c); stream >] -> + Buffer.add_char buffer c; + lex_number buffer stream + | [< stream=lex >] -> + [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >] + +and lex_ident buffer = parser + | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] -> + Buffer.add_char buffer c; + lex_ident buffer stream + | [< stream=lex >] -> + match Buffer.contents buffer with + | "def" -> [< 'Token.Def; stream >] + | "extern" -> [< 'Token.Extern; stream >] + | "if" -> [< 'Token.If; stream >] + | "then" -> [< 'Token.Then; stream >] + | "else" -> [< 'Token.Else; stream >] + | "for" -> [< 'Token.For; stream >] + | "in" -> [< 'Token.In; stream >] + | "binary" -> [< 'Token.Binary; stream >] + | "unary" -> [< 'Token.Unary; stream >] + | "var" -> [< 'Token.Var; stream >] + | id -> [< 'Token.Ident id; stream >] + +and lex_comment = parser + | [< ' ('\n'); stream=lex >] -> stream + | [< 'c; e=lex_comment >] -> e + | [< >] -> [< >] diff --git a/examples/OCaml-Kaleidoscope/Chapter7/myocamlbuild.ml b/examples/OCaml-Kaleidoscope/Chapter7/myocamlbuild.ml new file mode 100644 index 00000000000..54d3fd97709 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/myocamlbuild.ml @@ -0,0 +1,10 @@ +open Ocamlbuild_plugin;; + +ocaml_lib ~extern:true "llvm";; +ocaml_lib ~extern:true "llvm_analysis";; +ocaml_lib ~extern:true "llvm_executionengine";; +ocaml_lib ~extern:true "llvm_target";; +ocaml_lib ~extern:true "llvm_scalar_opts";; + +flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);; +dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];; diff --git a/examples/OCaml-Kaleidoscope/Chapter7/parser.ml b/examples/OCaml-Kaleidoscope/Chapter7/parser.ml new file mode 100644 index 00000000000..c0e7db8349a --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/parser.ml @@ -0,0 +1,221 @@ +(*===---------------------------------------------------------------------=== + * Parser + *===---------------------------------------------------------------------===*) + +(* binop_precedence - This holds the precedence for each binary operator that is + * defined *) +let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10 + +(* precedence - Get the precedence of the pending binary operator token. *) +let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1 + +(* primary + * ::= identifier + * ::= numberexpr + * ::= parenexpr + * ::= ifexpr + * ::= forexpr + * ::= varexpr *) +let rec parse_primary = parser + (* numberexpr ::= number *) + | [< 'Token.Number n >] -> Ast.Number n + + (* parenexpr ::= '(' expression ')' *) + | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e + + (* identifierexpr + * ::= identifier + * ::= identifier '(' argumentexpr ')' *) + | [< 'Token.Ident id; stream >] -> + let rec parse_args accumulator = parser + | [< e=parse_expr; stream >] -> + begin parser + | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e + | [< >] -> e :: accumulator + end stream + | [< >] -> accumulator + in + let rec parse_ident id = parser + (* Call. *) + | [< 'Token.Kwd '('; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')'">] -> + Ast.Call (id, Array.of_list (List.rev args)) + + (* Simple variable ref. *) + | [< >] -> Ast.Variable id + in + parse_ident id stream + + (* ifexpr ::= 'if' expr 'then' expr 'else' expr *) + | [< 'Token.If; c=parse_expr; + 'Token.Then ?? "expected 'then'"; t=parse_expr; + 'Token.Else ?? "expected 'else'"; e=parse_expr >] -> + Ast.If (c, t, e) + + (* forexpr + ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *) + | [< 'Token.For; + 'Token.Ident id ?? "expected identifier after for"; + 'Token.Kwd '=' ?? "expected '=' after for"; + stream >] -> + begin parser + | [< + start=parse_expr; + 'Token.Kwd ',' ?? "expected ',' after for"; + end_=parse_expr; + stream >] -> + let step = + begin parser + | [< 'Token.Kwd ','; step=parse_expr >] -> Some step + | [< >] -> None + end stream + in + begin parser + | [< 'Token.In; body=parse_expr >] -> + Ast.For (id, start, end_, step, body) + | [< >] -> + raise (Stream.Error "expected 'in' after for") + end stream + | [< >] -> + raise (Stream.Error "expected '=' after for") + end stream + + (* varexpr + * ::= 'var' identifier ('=' expression? + * (',' identifier ('=' expression)?)* 'in' expression *) + | [< 'Token.Var; + (* At least one variable name is required. *) + 'Token.Ident id ?? "expected identifier after var"; + init=parse_var_init; + var_names=parse_var_names [(id, init)]; + (* At this point, we have to have 'in'. *) + 'Token.In ?? "expected 'in' keyword after 'var'"; + body=parse_expr >] -> + Ast.Var (Array.of_list (List.rev var_names), body) + + | [< >] -> raise (Stream.Error "unknown token when expecting an expression.") + +(* unary + * ::= primary + * ::= '!' unary *) +and parse_unary = parser + (* If this is a unary operator, read it. *) + | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] -> + Ast.Unary (op, operand) + + (* If the current token is not an operator, it must be a primary expr. *) + | [< stream >] -> parse_primary stream + +(* binoprhs + * ::= ('+' primary)* *) +and parse_bin_rhs expr_prec lhs stream = + match Stream.peek stream with + (* If this is a binop, find its precedence. *) + | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -> + let token_prec = precedence c in + + (* If this is a binop that binds at least as tightly as the current binop, + * consume it, otherwise we are done. *) + if token_prec < expr_prec then lhs else begin + (* Eat the binop. *) + Stream.junk stream; + + (* Parse the primary expression after the binary operator. *) + let rhs = parse_unary stream in + + (* Okay, we know this is a binop. *) + let rhs = + match Stream.peek stream with + | Some (Token.Kwd c2) -> + (* If BinOp binds less tightly with rhs than the operator after + * rhs, let the pending operator take rhs as its lhs. *) + let next_prec = precedence c2 in + if token_prec < next_prec + then parse_bin_rhs (token_prec + 1) rhs stream + else rhs + | _ -> rhs + in + + (* Merge lhs/rhs. *) + let lhs = Ast.Binary (c, lhs, rhs) in + parse_bin_rhs expr_prec lhs stream + end + | _ -> lhs + +and parse_var_init = parser + (* read in the optional initializer. *) + | [< 'Token.Kwd '='; e=parse_expr >] -> Some e + | [< >] -> None + +and parse_var_names accumulator = parser + | [< 'Token.Kwd ','; + 'Token.Ident id ?? "expected identifier list after var"; + init=parse_var_init; + e=parse_var_names ((id, init) :: accumulator) >] -> e + | [< >] -> accumulator + +(* expression + * ::= primary binoprhs *) +and parse_expr = parser + | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream + +(* prototype + * ::= id '(' id* ')' + * ::= binary LETTER number? (id, id) + * ::= unary LETTER number? (id) *) +let parse_prototype = + let rec parse_args accumulator = parser + | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e + | [< >] -> accumulator + in + let parse_operator = parser + | [< 'Token.Unary >] -> "unary", 1 + | [< 'Token.Binary >] -> "binary", 2 + in + let parse_binary_precedence = parser + | [< 'Token.Number n >] -> int_of_float n + | [< >] -> 30 + in + parser + | [< 'Token.Ident id; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + (* success. *) + Ast.Prototype (id, Array.of_list (List.rev args)) + | [< (prefix, kind)=parse_operator; + 'Token.Kwd op ?? "expected an operator"; + (* Read the precedence if present. *) + binary_precedence=parse_binary_precedence; + 'Token.Kwd '(' ?? "expected '(' in prototype"; + args=parse_args []; + 'Token.Kwd ')' ?? "expected ')' in prototype" >] -> + let name = prefix ^ (String.make 1 op) in + let args = Array.of_list (List.rev args) in + + (* Verify right number of arguments for operator. *) + if Array.length args != kind + then raise (Stream.Error "invalid number of operands for operator") + else + if kind == 1 then + Ast.Prototype (name, args) + else + Ast.BinOpPrototype (name, args, binary_precedence) + | [< >] -> + raise (Stream.Error "expected function name in prototype") + +(* definition ::= 'def' prototype expression *) +let parse_definition = parser + | [< 'Token.Def; p=parse_prototype; e=parse_expr >] -> + Ast.Function (p, e) + +(* toplevelexpr ::= expression *) +let parse_toplevel = parser + | [< e=parse_expr >] -> + (* Make an anonymous proto. *) + Ast.Function (Ast.Prototype ("", [||]), e) + +(* external ::= 'extern' prototype *) +let parse_extern = parser + | [< 'Token.Extern; e=parse_prototype >] -> e diff --git a/examples/OCaml-Kaleidoscope/Chapter7/token.ml b/examples/OCaml-Kaleidoscope/Chapter7/token.ml new file mode 100644 index 00000000000..1489f0b9e79 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/token.ml @@ -0,0 +1,25 @@ +(*===----------------------------------------------------------------------=== + * Lexer Tokens + *===----------------------------------------------------------------------===*) + +(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of + * these others for known things. *) +type token = + (* commands *) + | Def | Extern + + (* primary *) + | Ident of string | Number of float + + (* unknown *) + | Kwd of char + + (* control *) + | If | Then | Else + | For | In + + (* operators *) + | Binary | Unary + + (* var definition *) + | Var diff --git a/examples/OCaml-Kaleidoscope/Chapter7/toplevel.ml b/examples/OCaml-Kaleidoscope/Chapter7/toplevel.ml new file mode 100644 index 00000000000..01d24ede149 --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/toplevel.ml @@ -0,0 +1,49 @@ +(*===----------------------------------------------------------------------=== + * Top-Level parsing and JIT Driver + *===----------------------------------------------------------------------===*) + +open Llvm +open Llvm_executionengine + +(* top ::= definition | external | expression | ';' *) +let rec main_loop the_fpm the_execution_engine stream = + match Stream.peek stream with + | None -> () + + (* ignore top-level semicolons. *) + | Some (Token.Kwd ';') -> + Stream.junk stream; + main_loop the_fpm the_execution_engine stream + + | Some token -> + begin + try match token with + | Token.Def -> + let e = Parser.parse_definition stream in + print_endline "parsed a function definition."; + dump_value (Codegen.codegen_func the_fpm e); + | Token.Extern -> + let e = Parser.parse_extern stream in + print_endline "parsed an extern."; + dump_value (Codegen.codegen_proto e); + | _ -> + (* Evaluate a top-level expression into an anonymous function. *) + let e = Parser.parse_toplevel stream in + print_endline "parsed a top-level expr"; + let the_function = Codegen.codegen_func the_fpm e in + dump_value the_function; + + (* JIT the function, returning a function pointer. *) + let result = ExecutionEngine.run_function the_function [||] + the_execution_engine in + + print_string "Evaluated to "; + print_float (GenericValue.as_float Codegen.double_type result); + print_newline (); + with Stream.Error s | Codegen.Error s -> + (* Skip token for error recovery. *) + Stream.junk stream; + print_endline s; + end; + print_string "ready> "; flush stdout; + main_loop the_fpm the_execution_engine stream diff --git a/examples/OCaml-Kaleidoscope/Chapter7/toy.ml b/examples/OCaml-Kaleidoscope/Chapter7/toy.ml new file mode 100644 index 00000000000..babab28601d --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Chapter7/toy.ml @@ -0,0 +1,57 @@ +(*===----------------------------------------------------------------------=== + * Main driver code. + *===----------------------------------------------------------------------===*) + +open Llvm +open Llvm_executionengine +open Llvm_target +open Llvm_scalar_opts + +let main () = + ignore (initialize_native_target ()); + + (* Install standard binary operators. + * 1 is the lowest precedence. *) + Hashtbl.add Parser.binop_precedence '=' 2; + Hashtbl.add Parser.binop_precedence '<' 10; + Hashtbl.add Parser.binop_precedence '+' 20; + Hashtbl.add Parser.binop_precedence '-' 20; + Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *) + + (* Prime the first token. *) + print_string "ready> "; flush stdout; + let stream = Lexer.lex (Stream.of_channel stdin) in + + (* Create the JIT. *) + let the_execution_engine = ExecutionEngine.create Codegen.the_module in + let the_fpm = PassManager.create_function Codegen.the_module in + + (* Set up the optimizer pipeline. Start with registering info about how the + * target lays out data structures. *) + TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm; + + (* Promote allocas to registers. *) + add_memory_to_register_promotion the_fpm; + + (* Do simple "peephole" optimizations and bit-twiddling optzn. *) + add_instruction_combination the_fpm; + + (* reassociate expressions. *) + add_reassociation the_fpm; + + (* Eliminate Common SubExpressions. *) + add_gvn the_fpm; + + (* Simplify the control flow graph (deleting unreachable blocks, etc). *) + add_cfg_simplification the_fpm; + + ignore (PassManager.initialize the_fpm); + + (* Run the main "interpreter loop" now. *) + Toplevel.main_loop the_fpm the_execution_engine stream; + + (* Print out all the generated code. *) + dump_module Codegen.the_module +;; + +main () diff --git a/examples/OCaml-Kaleidoscope/Makefile b/examples/OCaml-Kaleidoscope/Makefile new file mode 100644 index 00000000000..5342b94022a --- /dev/null +++ b/examples/OCaml-Kaleidoscope/Makefile @@ -0,0 +1,15 @@ +##===- examples/OCaml-Kaleidoscope/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL=../.. + +include $(LEVEL)/Makefile.config + +PARALLEL_DIRS:= Chapter2 Chapter3 Chapter4 Chapter5 Chapter6 Chapter7 + +include $(LEVEL)/Makefile.common diff --git a/examples/ParallelJIT/CMakeLists.txt b/examples/ParallelJIT/CMakeLists.txt new file mode 100644 index 00000000000..fbdc6e5fc10 --- /dev/null +++ b/examples/ParallelJIT/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen) + +add_llvm_example(ParallelJIT + ParallelJIT.cpp + ) + +if(HAVE_LIBPTHREAD) + target_link_libraries(ParallelJIT pthread) +endif(HAVE_LIBPTHREAD) diff --git a/examples/ParallelJIT/Makefile b/examples/ParallelJIT/Makefile new file mode 100644 index 00000000000..8a49d427313 --- /dev/null +++ b/examples/ParallelJIT/Makefile @@ -0,0 +1,17 @@ +##===- examples/ParallelJIT/Makefile -----------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../.. +TOOLNAME = ParallelJIT +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := jit interpreter nativecodegen + +include $(LEVEL)/Makefile.common + +LIBS += -lpthread diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp new file mode 100644 index 00000000000..305cf1dde06 --- /dev/null +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -0,0 +1,305 @@ +//===-- examples/ParallelJIT/ParallelJIT.cpp - Exercise threaded-safe JIT -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Parallel JIT +// +// This test program creates two LLVM functions then calls them from three +// separate threads. It requires the pthreads library. +// The three threads are created and then block waiting on a condition variable. +// Once all threads are blocked on the conditional variable, the main thread +// wakes them up. This complicated work is performed so that all three threads +// call into the JIT at the same time (or the best possible approximation of the +// same time). This test had assertion errors until I got the locking right. + +#include <pthread.h> +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/Support/TargetSelect.h" +#include <iostream> +using namespace llvm; + +static Function* createAdd1(Module *M) { + // Create the add1 function entry and insert this entry into module M. The + // function will have a return type of "int" and take an argument of "int". + // The '0' terminates the list of argument types. + Function *Add1F = + cast<Function>(M->getOrInsertFunction("add1", + Type::getInt32Ty(M->getContext()), + Type::getInt32Ty(M->getContext()), + (Type *)0)); + + // Add a basic block to the function. As before, it automatically inserts + // because of the last argument. + BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", Add1F); + + // Get pointers to the constant `1'. + Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); + + // Get pointers to the integer argument of the add1 function... + assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg + Argument *ArgX = Add1F->arg_begin(); // Get the arg + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the add instruction, inserting it into the end of BB. + Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB); + + // Create the return instruction and add it to the basic block + ReturnInst::Create(M->getContext(), Add, BB); + + // Now, function add1 is ready. + return Add1F; +} + +static Function *CreateFibFunction(Module *M) { + // Create the fib function and insert it into module M. This function is said + // to return an int and take an int parameter. + Function *FibF = + cast<Function>(M->getOrInsertFunction("fib", + Type::getInt32Ty(M->getContext()), + Type::getInt32Ty(M->getContext()), + (Type *)0)); + + // Add a basic block to the function. + BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", FibF); + + // Get pointers to the constants. + Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); + Value *Two = ConstantInt::get(Type::getInt32Ty(M->getContext()), 2); + + // Get pointer to the integer argument of the add1 function... + Argument *ArgX = FibF->arg_begin(); // Get the arg. + ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. + + // Create the true_block. + BasicBlock *RetBB = BasicBlock::Create(M->getContext(), "return", FibF); + // Create an exit block. + BasicBlock* RecurseBB = BasicBlock::Create(M->getContext(), "recurse", FibF); + + // Create the "if (arg < 2) goto exitbb" + Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond"); + BranchInst::Create(RetBB, RecurseBB, CondInst, BB); + + // Create: ret int 1 + ReturnInst::Create(M->getContext(), One, RetBB); + + // create fib(x-1) + Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB); + Value *CallFibX1 = CallInst::Create(FibF, Sub, "fibx1", RecurseBB); + + // create fib(x-2) + Sub = BinaryOperator::CreateSub(ArgX, Two, "arg", RecurseBB); + Value *CallFibX2 = CallInst::Create(FibF, Sub, "fibx2", RecurseBB); + + // fib(x-1)+fib(x-2) + Value *Sum = + BinaryOperator::CreateAdd(CallFibX1, CallFibX2, "addresult", RecurseBB); + + // Create the return instruction and add it to the basic block + ReturnInst::Create(M->getContext(), Sum, RecurseBB); + + return FibF; +} + +struct threadParams { + ExecutionEngine* EE; + Function* F; + int value; +}; + +// We block the subthreads just before they begin to execute: +// we want all of them to call into the JIT at the same time, +// to verify that the locking is working correctly. +class WaitForThreads +{ +public: + WaitForThreads() + { + n = 0; + waitFor = 0; + + int result = pthread_cond_init( &condition, NULL ); + assert( result == 0 ); + + result = pthread_mutex_init( &mutex, NULL ); + assert( result == 0 ); + } + + ~WaitForThreads() + { + int result = pthread_cond_destroy( &condition ); + assert( result == 0 ); + + result = pthread_mutex_destroy( &mutex ); + assert( result == 0 ); + } + + // All threads will stop here until another thread calls releaseThreads + void block() + { + int result = pthread_mutex_lock( &mutex ); + assert( result == 0 ); + n ++; + //~ std::cout << "block() n " << n << " waitFor " << waitFor << std::endl; + + assert( waitFor == 0 || n <= waitFor ); + if ( waitFor > 0 && n == waitFor ) + { + // There are enough threads blocked that we can release all of them + std::cout << "Unblocking threads from block()" << std::endl; + unblockThreads(); + } + else + { + // We just need to wait until someone unblocks us + result = pthread_cond_wait( &condition, &mutex ); + assert( result == 0 ); + } + + // unlock the mutex before returning + result = pthread_mutex_unlock( &mutex ); + assert( result == 0 ); + } + + // If there are num or more threads blocked, it will signal them all + // Otherwise, this thread blocks until there are enough OTHER threads + // blocked + void releaseThreads( size_t num ) + { + int result = pthread_mutex_lock( &mutex ); + assert( result == 0 ); + + if ( n >= num ) { + std::cout << "Unblocking threads from releaseThreads()" << std::endl; + unblockThreads(); + } + else + { + waitFor = num; + pthread_cond_wait( &condition, &mutex ); + } + + // unlock the mutex before returning + result = pthread_mutex_unlock( &mutex ); + assert( result == 0 ); + } + +private: + void unblockThreads() + { + // Reset the counters to zero: this way, if any new threads + // enter while threads are exiting, they will block instead + // of triggering a new release of threads + n = 0; + + // Reset waitFor to zero: this way, if waitFor threads enter + // while threads are exiting, they will block instead of + // triggering a new release of threads + waitFor = 0; + + int result = pthread_cond_broadcast( &condition ); + (void)result; + assert(result == 0); + } + + size_t n; + size_t waitFor; + pthread_cond_t condition; + pthread_mutex_t mutex; +}; + +static WaitForThreads synchronize; + +void* callFunc( void* param ) +{ + struct threadParams* p = (struct threadParams*) param; + + // Call the `foo' function with no arguments: + std::vector<GenericValue> Args(1); + Args[0].IntVal = APInt(32, p->value); + + synchronize.block(); // wait until other threads are at this point + GenericValue gv = p->EE->runFunction(p->F, Args); + + return (void*)(intptr_t)gv.IntVal.getZExtValue(); +} + +int main() { + InitializeNativeTarget(); + LLVMContext Context; + + // Create some module to put our function into it. + Module *M = new Module("test", Context); + + Function* add1F = createAdd1( M ); + Function* fibF = CreateFibFunction( M ); + + // Now we create the JIT. + ExecutionEngine* EE = EngineBuilder(M).create(); + + //~ std::cout << "We just constructed this LLVM module:\n\n" << *M; + //~ std::cout << "\n\nRunning foo: " << std::flush; + + // Create one thread for add1 and two threads for fib + struct threadParams add1 = { EE, add1F, 1000 }; + struct threadParams fib1 = { EE, fibF, 39 }; + struct threadParams fib2 = { EE, fibF, 42 }; + + pthread_t add1Thread; + int result = pthread_create( &add1Thread, NULL, callFunc, &add1 ); + if ( result != 0 ) { + std::cerr << "Could not create thread" << std::endl; + return 1; + } + + pthread_t fibThread1; + result = pthread_create( &fibThread1, NULL, callFunc, &fib1 ); + if ( result != 0 ) { + std::cerr << "Could not create thread" << std::endl; + return 1; + } + + pthread_t fibThread2; + result = pthread_create( &fibThread2, NULL, callFunc, &fib2 ); + if ( result != 0 ) { + std::cerr << "Could not create thread" << std::endl; + return 1; + } + + synchronize.releaseThreads(3); // wait until other threads are at this point + + void* returnValue; + result = pthread_join( add1Thread, &returnValue ); + if ( result != 0 ) { + std::cerr << "Could not join thread" << std::endl; + return 1; + } + std::cout << "Add1 returned " << intptr_t(returnValue) << std::endl; + + result = pthread_join( fibThread1, &returnValue ); + if ( result != 0 ) { + std::cerr << "Could not join thread" << std::endl; + return 1; + } + std::cout << "Fib1 returned " << intptr_t(returnValue) << std::endl; + + result = pthread_join( fibThread2, &returnValue ); + if ( result != 0 ) { + std::cerr << "Could not join thread" << std::endl; + return 1; + } + std::cout << "Fib2 returned " << intptr_t(returnValue) << std::endl; + + return 0; +} |