Skip to content

Commit

Permalink
Add an (optional) identification block in the bitcode
Browse files Browse the repository at this point in the history
Processing bitcode from a different LLVM version can lead to
unexpected behavior. The LLVM project guarantees autoupdating
bitcode from a previous minor revision for the same major, but
can't make any promise when reading bitcode generated from a
either a non-released LLVM, a vendor toolchain, or a "future"
LLVM release. This patch aims at being more user-friendly and
allows a bitcode produce to emit an optional block at the
beginning of the bitcode that will contains an opaque string
intended to describe the bitcode producer information. The
bitcode reader will dump this information alongside any error it
reports.

The optional block also includes an "epoch" number, monotonically
increasing when incompatible changes are made to the bitcode. The
reader will reject bitcode whose epoch is different from the one
expected.

Differential Revision: http://reviews.llvm.org/D13666

From: Mehdi Amini <[email protected]>
llvm-svn: 251325
  • Loading branch information
joker-eph committed Oct 26, 2015
1 parent d1aad26 commit 5d30328
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 27 deletions.
71 changes: 44 additions & 27 deletions llvm/include/llvm/Bitcode/LLVMBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,50 @@
namespace llvm {
namespace bitc {
// The only top-level block type defined is for a module.
enum BlockIDs {
// Blocks
MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,

// Module sub-block id's.
PARAMATTR_BLOCK_ID,
PARAMATTR_GROUP_BLOCK_ID,

CONSTANTS_BLOCK_ID,
FUNCTION_BLOCK_ID,

UNUSED_ID1,

VALUE_SYMTAB_BLOCK_ID,
METADATA_BLOCK_ID,
METADATA_ATTACHMENT_ID,

TYPE_BLOCK_ID_NEW,

USELIST_BLOCK_ID,

MODULE_STRTAB_BLOCK_ID,
FUNCTION_SUMMARY_BLOCK_ID,

OPERAND_BUNDLE_TAGS_BLOCK_ID
};

enum BlockIDs {
// Blocks
MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,

// Module sub-block id's.
PARAMATTR_BLOCK_ID,
PARAMATTR_GROUP_BLOCK_ID,

CONSTANTS_BLOCK_ID,
FUNCTION_BLOCK_ID,

// Block intended to contains information on the bitcode versioning.
// Can be used to provide better error messages when we fail to parse a
// bitcode file.
IDENTIFICATION_BLOCK_ID,

VALUE_SYMTAB_BLOCK_ID,
METADATA_BLOCK_ID,
METADATA_ATTACHMENT_ID,

TYPE_BLOCK_ID_NEW,

USELIST_BLOCK_ID,

MODULE_STRTAB_BLOCK_ID,
FUNCTION_SUMMARY_BLOCK_ID,

OPERAND_BUNDLE_TAGS_BLOCK_ID
};

/// Idenfitication block contains a string that describes the producer details,
/// and an epoch that defines the auto-upgrade capability.
enum IdentificationCodes {
IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION: [strchr x N]
IDENTIFICATION_CODE_EPOCH = 2, // EPOCH: [epoch#]
};

/// The epoch that defines the auto-upgrade compatibility for the bitcode.
///
/// LLVM guarantees in a major release that a minor release can read bitcode
/// generated by previous minor releases. We translate this by making the reader
/// accepting only bitcode with the same epoch, except for the X.0 release which
/// also accepts N-1.
enum { BITCODE_CURRENT_EPOCH = 0 };

/// MODULE blocks have a number of optional fields and subblocks.
enum ModuleCodes {
Expand Down
67 changes: 67 additions & 0 deletions llvm/lib/Bitcode/Reader/BitcodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ class BitcodeReader : public GVMaterializer {
uint64_t LastFunctionBlockBit = 0;
bool SeenValueSymbolTable = false;
unsigned VSTOffset = 0;
// Contains an arbitrary and optional string identifying the bitcode producer
std::string ProducerIdentification;

std::vector<Type*> TypeList;
BitcodeReaderValueList ValueList;
Expand Down Expand Up @@ -273,6 +275,11 @@ class BitcodeReader : public GVMaterializer {
void setStripDebugInfo() override;

private:
/// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the
// ProducerIdentification data member, and do some basic enforcement on the
// "epoch" encoded in the bitcode.
std::error_code parseBitcodeVersion();

std::vector<StructType *> IdentifiedStructTypes;
StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
StructType *createIdentifiedStructType(LLVMContext &Context);
Expand Down Expand Up @@ -518,10 +525,21 @@ static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler,
}

std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) {
if (!ProducerIdentification.empty()) {
Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification +
"' Reader: 'LLVM " + LLVM_VERSION_STRING "')";
return ::error(DiagnosticHandler, make_error_code(E), MsgWithID);
}
return ::error(DiagnosticHandler, make_error_code(E), Message);
}

std::error_code BitcodeReader::error(const Twine &Message) {
if (!ProducerIdentification.empty()) {
Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification +
"' Reader: 'LLVM " + LLVM_VERSION_STRING "')";
return ::error(DiagnosticHandler,
make_error_code(BitcodeError::CorruptedBitcode), MsgWithID);
}
return ::error(DiagnosticHandler,
make_error_code(BitcodeError::CorruptedBitcode), Message);
}
Expand Down Expand Up @@ -3061,6 +3079,50 @@ std::error_code BitcodeReader::rememberAndSkipFunctionBodies() {
}
}

std::error_code BitcodeReader::parseBitcodeVersion() {
if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
return error("Invalid record");

// Read all the records.
SmallVector<uint64_t, 64> Record;
while (1) {
BitstreamEntry Entry = Stream.advance();

switch (Entry.Kind) {
default:
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
}

// Read a record.
Record.clear();
unsigned BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
default: // Default behavior: reject
return error("Invalid value");
case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION: [strchr x
// N]
convertToString(Record, 0, ProducerIdentification);
break;
}
case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#]
unsigned epoch = (unsigned)Record[0];
if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
auto BitcodeEpoch = std::to_string(epoch);
auto CurrentEpoch = std::to_string(bitc::BITCODE_CURRENT_EPOCH);
return error(Twine("Incompatible epoch: Bitcode '") + BitcodeEpoch +
"' vs current: '" + CurrentEpoch + "'");
}
}
}
}
}

std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
bool ShouldLazyLoadMetadata) {
if (ResumeBit)
Expand Down Expand Up @@ -3552,6 +3614,11 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
if (Entry.Kind != BitstreamEntry::SubBlock)
return error("Malformed block");

if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
parseBitcodeVersion();
continue;
}

if (Entry.ID == bitc::MODULE_BLOCK_ID)
return parseModule(0, ShouldLazyLoadMetadata);

Expand Down
25 changes: 25 additions & 0 deletions llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2829,6 +2829,29 @@ static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I,
Stream.ExitBlock();
}

// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
// current llvm version, and a record for the epoch number.
static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);

// Write the "user readable" string identifying the bitcode producer
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
auto StringAbbrev = Stream.EmitAbbrev(Abbv);
WriteStringRecord(bitc::IDENTIFICATION_CODE_STRING,
"LLVM" LLVM_VERSION_STRING, StringAbbrev, Stream);

// Write the epoch version
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
SmallVector<unsigned, 1> Vals = {bitc::BITCODE_CURRENT_EPOCH};
Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals);
Stream.ExitBlock();
}

/// WriteModule - Emit the specified module to the bitstream.
static void WriteModule(const Module *M, BitstreamWriter &Stream,
bool ShouldPreserveUseListOrder,
Expand Down Expand Up @@ -3000,6 +3023,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
// Emit the file header.
WriteBitcodeHeader(Stream);

WriteIdentificationBlock(M, Stream);

// Emit the module.
WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
EmitFunctionSummary);
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/Bitcode/identification.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
; Check that a block "IDENTIFICATION_BLOCK_ID" is emitted.
;RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
;CHECK: <IDENTIFICATION_BLOCK_ID
;CHECK-NEXT: <STRING
;CHECK-NEXT: <EPOCH
;CHECK-NEXT: </IDENTIFICATION_BLOCK_ID
9 changes: 9 additions & 0 deletions llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ static const char *GetBlockName(unsigned BlockID,
case bitc::TYPE_BLOCK_ID_NEW: return "TYPE_BLOCK_ID";
case bitc::CONSTANTS_BLOCK_ID: return "CONSTANTS_BLOCK";
case bitc::FUNCTION_BLOCK_ID: return "FUNCTION_BLOCK";
case bitc::IDENTIFICATION_BLOCK_ID:
return "IDENTIFICATION_BLOCK_ID";
case bitc::VALUE_SYMTAB_BLOCK_ID: return "VALUE_SYMTAB";
case bitc::METADATA_BLOCK_ID: return "METADATA_BLOCK";
case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
Expand Down Expand Up @@ -170,6 +172,13 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(MODULE_CODE, GCNAME)
STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
}
case bitc::IDENTIFICATION_BLOCK_ID:
switch (CodeID) {
default:
return nullptr;
STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
}
case bitc::PARAMATTR_BLOCK_ID:
switch (CodeID) {
default: return nullptr;
Expand Down

0 comments on commit 5d30328

Please sign in to comment.