Reapply ~"Bitcode: Collect all MDString records into a single blob"

Spiritually reapply commit r264409 (reverted in r264410), albeit with a
bit of a redesign.

Firstly, avoid splitting the big blob into multiple chunks of strings.

r264409 imposed an arbitrary limit to avoid a massive allocation on the
shared 'Record' SmallVector.  The bug with that commit only reproduced
when there were more than "chunk-size" strings.  A test for this would
have been useless long-term, since we're liable to adjust the chunk-size
in the future.

Thus, eliminate the motivation for chunk-ing by storing the string sizes
in the blob.  Here's the layout:

    vbr6: # of strings
    vbr6: offset-to-blob
    blob:
       [vbr6]: string lengths
       [char]: concatenated strings

Secondly, make the output of llvm-bcanalyzer readable.

I noticed when debugging r264409 that llvm-bcanalyzer was outputting a
massive blob all in one line.  Past a small number, the strings were
impossible to split in my head, and the lines were way too long.  This
version adds support in llvm-bcanalyzer for pretty-printing.

    <STRINGS abbrevid=4 op0=3 op1=9/> num-strings = 3 {
      'abc'
      'def'
      'ghi'
    }

From the original commit:

Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this
should (a) slightly reduce bitcode size, since there is less record
overhead, and (b) greatly improve reading speed, since blobs are super
cheap to deserialize.

llvm-svn: 264551
This commit is contained in:
Duncan P. N. Exon Smith
2016-03-27 23:17:54 +00:00
parent 376fa26060
commit 6565a0d4b2
7 changed files with 200 additions and 36 deletions

View File

@@ -396,6 +396,9 @@ private:
std::error_code globalCleanup();
std::error_code resolveGlobalAndAliasInits();
std::error_code parseMetadata(bool ModuleLevel = false);
std::error_code parseMetadataStrings(ArrayRef<uint64_t> Record,
StringRef Blob,
unsigned &NextMetadataNo);
std::error_code parseMetadataKinds();
std::error_code parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
std::error_code parseMetadataAttachment(Function &F);
@@ -1883,6 +1886,47 @@ BitcodeReader::parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record) {
static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
std::error_code BitcodeReader::parseMetadataStrings(ArrayRef<uint64_t> Record,
StringRef Blob,
unsigned &NextMetadataNo) {
// All the MDStrings in the block are emitted together in a single
// record. The strings are concatenated and stored in a blob along with
// their sizes.
if (Record.size() != 2)
return error("Invalid record: metadata strings layout");
unsigned NumStrings = Record[0];
unsigned StringsOffset = Record[1];
if (!NumStrings)
return error("Invalid record: metadata strings with no strings");
if (StringsOffset >= Blob.size())
return error("Invalid record: metadata strings corrupt offset");
StringRef Lengths = Blob.slice(0, StringsOffset);
SimpleBitstreamCursor R(*StreamFile);
R.jumpToPointer(Lengths.begin());
// Ensure that Blob doesn't get invalidated, even if this is reading from
// a StreamingMemoryObject with corrupt data.
R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
StringRef Strings = Blob.drop_front(StringsOffset);
do {
if (R.AtEndOfStream())
return error("Invalid record: metadata strings bad length");
unsigned Size = R.ReadVBR(6);
if (Strings.size() < Size)
return error("Invalid record: metadata strings truncated chars");
MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)),
NextMetadataNo++);
Strings = Strings.drop_front(Size);
} while (--NumStrings);
return std::error_code();
}
/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
/// module level metadata.
std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
@@ -1929,7 +1973,8 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
// Read a record.
Record.clear();
unsigned Code = Stream.readRecord(Entry.ID, Record);
StringRef Blob;
unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
bool IsDistinct = false;
switch (Code) {
default: // Default behavior: ignore.
@@ -2363,7 +2408,7 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
NextMetadataNo++);
break;
}
case bitc::METADATA_STRING: {
case bitc::METADATA_STRING_OLD: {
std::string String(Record.begin(), Record.end());
// Test for upgrading !llvm.loop.
@@ -2373,6 +2418,11 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
MetadataList.assignValue(MD, NextMetadataNo++);
break;
}
case bitc::METADATA_STRINGS:
if (std::error_code EC =
parseMetadataStrings(Record, Blob, NextMetadataNo))
return EC;
break;
case bitc::METADATA_KIND: {
// Support older bitcode files that had METADATA_KIND records in a
// block with METADATA_BLOCK_ID.