[LTO] Support for embedding bitcode section during LTO

Summary:
This adds support for embedding bitcode in a binary during LTO. The libLTO gains supports the `-lto-embed-bitcode` flag. The option allows users of the LTO library to embed a bitcode section. For example, LLD can pass the option via `ld.lld -mllvm=-lto-embed-bitcode`.

This feature allows doing something comparable to `clang -c -fembed-bitcode`, but on the (LTO) linker level. Having bitcode alongside native code has many use-cases. To give an example, the MacOS linker can create a `-bitcode_bundle` section containing bitcode. Also, having this feature built into LLVM is an alternative to 3rd party tools such as [[ https://github.com/travitch/whole-program-llvm | wllvm ]] or [[ https://github.com/SRI-CSL/gllvm | gllvm ]]. As with these tools, this feature simplifies creating "whole-program" llvm bitcode files, but in contrast to wllvm/gllvm it does not rely on a specific llvm frontend/driver.

Patch by Josef Eisl <josef.eisl@oracle.com>

Reviewers: #llvm, #clang, rsmith, pcc, alexshap, tejohnson

Reviewed By: tejohnson

Subscribers: tejohnson, mehdi_amini, inglorion, hiraditya, aheejin, steven_wu, dexonsmith, dang, cfe-commits, llvm-commits, #llvm, #clang

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D68213
This commit is contained in:
Teresa Johnson
2019-12-12 11:59:36 -08:00
parent 0ee89c1bad
commit c8e0bb3b2c
8 changed files with 270 additions and 120 deletions

View File

@@ -24,9 +24,10 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitstream/BitCodes.h"
#include "llvm/Bitstream/BitstreamWriter.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -4677,3 +4678,125 @@ void llvm::WriteThinLinkBitcodeToFile(const Module &M, raw_ostream &Out,
Out.write((char *)&Buffer.front(), Buffer.size());
}
static const char *getSectionNameForBitcode(const Triple &T) {
switch (T.getObjectFormat()) {
case Triple::MachO:
return "__LLVM,__bitcode";
case Triple::COFF:
case Triple::ELF:
case Triple::Wasm:
case Triple::UnknownObjectFormat:
return ".llvmbc";
case Triple::XCOFF:
llvm_unreachable("XCOFF is not yet implemented");
break;
}
llvm_unreachable("Unimplemented ObjectFormatType");
}
static const char *getSectionNameForCommandline(const Triple &T) {
switch (T.getObjectFormat()) {
case Triple::MachO:
return "__LLVM,__cmdline";
case Triple::COFF:
case Triple::ELF:
case Triple::Wasm:
case Triple::UnknownObjectFormat:
return ".llvmcmd";
case Triple::XCOFF:
llvm_unreachable("XCOFF is not yet implemented");
break;
}
llvm_unreachable("Unimplemented ObjectFormatType");
}
void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf,
bool EmbedBitcode, bool EmbedMarker,
const std::vector<uint8_t> *CmdArgs) {
// Save llvm.compiler.used and remove it.
SmallVector<Constant *, 2> UsedArray;
SmallPtrSet<GlobalValue *, 4> UsedGlobals;
Type *UsedElementType = Type::getInt8Ty(M.getContext())->getPointerTo(0);
GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true);
for (auto *GV : UsedGlobals) {
if (GV->getName() != "llvm.embedded.module" &&
GV->getName() != "llvm.cmdline")
UsedArray.push_back(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
}
if (Used)
Used->eraseFromParent();
// Embed the bitcode for the llvm module.
std::string Data;
ArrayRef<uint8_t> ModuleData;
Triple T(M.getTargetTriple());
// Create a constant that contains the bitcode.
// In case of embedding a marker, ignore the input Buf and use the empty
// ArrayRef. It is also legal to create a bitcode marker even Buf is empty.
if (EmbedBitcode) {
if (!isBitcode((const unsigned char *)Buf.getBufferStart(),
(const unsigned char *)Buf.getBufferEnd())) {
// If the input is LLVM Assembly, bitcode is produced by serializing
// the module. Use-lists order need to be preserved in this case.
llvm::raw_string_ostream OS(Data);
llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
ModuleData =
ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
} else
// If the input is LLVM bitcode, write the input byte stream directly.
ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(),
Buf.getBufferSize());
}
llvm::Constant *ModuleConstant =
llvm::ConstantDataArray::get(M.getContext(), ModuleData);
llvm::GlobalVariable *GV = new llvm::GlobalVariable(
M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage,
ModuleConstant);
GV->setSection(getSectionNameForBitcode(T));
UsedArray.push_back(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
if (llvm::GlobalVariable *Old =
M.getGlobalVariable("llvm.embedded.module", true)) {
assert(Old->hasOneUse() &&
"llvm.embedded.module can only be used once in llvm.compiler.used");
GV->takeName(Old);
Old->eraseFromParent();
} else {
GV->setName("llvm.embedded.module");
}
// Skip if only bitcode needs to be embedded.
if (EmbedMarker) {
// Embed command-line options.
ArrayRef<uint8_t> CmdData(const_cast<uint8_t *>(CmdArgs->data()),
CmdArgs->size());
llvm::Constant *CmdConstant =
llvm::ConstantDataArray::get(M.getContext(), CmdData);
GV = new llvm::GlobalVariable(M, CmdConstant->getType(), true,
llvm::GlobalValue::PrivateLinkage,
CmdConstant);
GV->setSection(getSectionNameForCommandline(T));
UsedArray.push_back(
ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType));
if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) {
assert(Old->hasOneUse() &&
"llvm.cmdline can only be used once in llvm.compiler.used");
GV->takeName(Old);
Old->eraseFromParent();
} else {
GV->setName("llvm.cmdline");
}
}
if (UsedArray.empty())
return;
// Recreate llvm.compiler.used.
ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size());
auto *NewUsed = new GlobalVariable(
M, ATy, false, llvm::GlobalValue::AppendingLinkage,
llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used");
NewUsed->setSection("llvm.metadata");
}