-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Modify llvm-dwp to be able to emit string tables over 4GB without losing data #167457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…ing data. We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset. Fixed LLDB to be able to successfully load the 64 bit string tables in .dwp files. Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing with DWARF64 string table headers.
|
Created the pull request prior to adding testing to get comments on this. |
|
@llvm/pr-subscribers-debuginfo Author: Greg Clayton (clayborg) ChangesWe can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset. Fixed LLDB to be able to successfully load the 64 bit string tables in .dwp files. Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing with DWARF64 string table headers. Full diff: https://github.com/llvm/llvm-project/pull/167457.diff 6 Files Affected:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 94fc2e83e899d..7b7864caf8c09 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -360,8 +360,10 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
const DWARFDataExtractor &strOffsets =
GetSymbolFileDWARF().GetDWARFContext().getOrLoadStrOffsetsData();
uint64_t length = strOffsets.GetU32(&baseOffset);
- if (length == 0xffffffff)
+ if (length == 0xffffffff) {
length = strOffsets.GetU64(&baseOffset);
+ m_str_offsets_size = 8;
+ }
// Check version.
if (strOffsets.GetU16(&baseOffset) < 5)
@@ -369,6 +371,10 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
// Skip padding.
baseOffset += 2;
+ } else {
+ // Size of offset for .debug_str_offsets is same as DWARF offset byte size
+ // of the DWARFUnit for DWARF version 4 and earlier.
+ m_str_offsets_size = m_header.getDwarfOffsetByteSize();
}
SetStrOffsetsBase(baseOffset);
@@ -1079,10 +1085,9 @@ uint32_t DWARFUnit::GetHeaderByteSize() const { return m_header.getSize(); }
std::optional<uint64_t>
DWARFUnit::GetStringOffsetSectionItem(uint32_t index) const {
- lldb::offset_t offset =
- GetStrOffsetsBase() + index * m_header.getDwarfOffsetByteSize();
+ lldb::offset_t offset = GetStrOffsetsBase() + index * m_str_offsets_size;
return m_dwarf.GetDWARFContext().getOrLoadStrOffsetsData().GetMaxU64(
- &offset, m_header.getDwarfOffsetByteSize());
+ &offset, m_str_offsets_size);
}
llvm::Expected<llvm::DWARFAddressRangesVector>
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 91a693860c55a..856db5e4101cd 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -364,7 +364,7 @@ class DWARFUnit : public DWARFExpression::Delegate, public UserID {
dw_offset_t m_line_table_offset = DW_INVALID_OFFSET;
dw_offset_t m_str_offsets_base = 0; // Value of DW_AT_str_offsets_base.
-
+ dw_offset_t m_str_offsets_size = 4; // Size in bytes of the string offsets.
std::optional<llvm::DWARFDebugRnglistTable> m_rnglist_table;
bool m_rnglist_table_done = false;
std::optional<llvm::DWARFListTableHeader> m_loclist_table_header;
diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index a759bae10d160..cc38369658eaa 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -70,6 +70,8 @@ struct CompileUnitIdentifiers {
LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
OnCuIndexOverflow OverflowOptValue);
+typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
+
LLVM_ABI Error handleSection(
const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections,
const MCSection *StrSection, const MCSection *StrOffsetSection,
@@ -82,7 +84,7 @@ LLVM_ABI Error handleSection(
std::vector<StringRef> &CurTypesSection,
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
- std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength);
+ SectionLengths &SectionLength);
LLVM_ABI Expected<InfoSectionUnitHeader>
parseInfoSectionUnitHeader(StringRef Info);
diff --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h
index 1354b46f156b6..d1486ff7872e1 100644
--- a/llvm/include/llvm/DWP/DWPStringPool.h
+++ b/llvm/include/llvm/DWP/DWPStringPool.h
@@ -32,13 +32,13 @@ class DWPStringPool {
MCStreamer &Out;
MCSection *Sec;
- DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
- uint32_t Offset = 0;
+ DenseMap<const char *, uint64_t, CStrDenseMapInfo> Pool;
+ uint64_t Offset = 0;
public:
DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}
- uint32_t getOffset(const char *Str, unsigned Length) {
+ uint64_t getOffset(const char *Str, unsigned Length) {
assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");
auto Pair = Pool.insert(std::make_pair(Str, Offset));
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index b565edbfe96db..54edce81208b5 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -413,33 +413,43 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) {
}
static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
- DenseMap<uint64_t, uint32_t> &OffsetRemapping,
- uint64_t &Offset, uint64_t &Size) {
+ DenseMap<uint64_t, uint64_t> &OffsetRemapping,
+ uint64_t &Offset, const uint64_t Size,
+ uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
while (Offset < Size) {
- auto OldOffset = Data.getU32(&Offset);
- auto NewOffset = OffsetRemapping[OldOffset];
- Out.emitIntValue(NewOffset, 4);
+ const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
+ const uint64_t NewOffset = OffsetRemapping[OldOffset];
+ assert(NewOffsetSize == 8 || NewOffset <= UINT32_MAX);
+ Out.emitIntValue(NewOffset, NewOffsetSize);
}
}
void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
MCSection *StrOffsetSection,
StringRef CurStrSection,
- StringRef CurStrOffsetSection, uint16_t Version) {
+ StringRef CurStrOffsetSection, uint16_t Version,
+ SectionLengths &SectionLength) {
// Could possibly produce an error or warning if one of these was non-null but
// the other was null.
if (CurStrSection.empty() || CurStrOffsetSection.empty())
return;
- DenseMap<uint64_t, uint32_t> OffsetRemapping;
+ DenseMap<uint64_t, uint64_t> OffsetRemapping;
DataExtractor Data(CurStrSection, true, 0);
uint64_t LocalOffset = 0;
uint64_t PrevOffset = 0;
+
+ // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
+ // emit a DWARF64 .debug_str_offsets table for this compile unit.
+ uint32_t OldOffsetSize = 4;
+ uint32_t NewOffsetSize = 4;
while (const char *S = Data.getCStr(&LocalOffset)) {
- OffsetRemapping[PrevOffset] =
- Strings.getOffset(S, LocalOffset - PrevOffset);
+ uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
+ OffsetRemapping[PrevOffset] = NewOffset;
+ if (NewOffset > UINT32_MAX)
+ NewOffsetSize = 8;
PrevOffset = LocalOffset;
}
@@ -451,7 +461,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
uint64_t Size = CurStrOffsetSection.size();
if (Version > 4) {
while (Offset < Size) {
- uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
+ const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
assert(HeaderSize <= Size - Offset &&
"StrOffsetSection size is less than its header");
@@ -461,16 +471,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
if (HeaderSize == 8) {
ContributionSize = Data.getU32(&HeaderLengthOffset);
} else if (HeaderSize == 16) {
+ OldOffsetSize = 8;
HeaderLengthOffset += 4; // skip the dwarf64 marker
ContributionSize = Data.getU64(&HeaderLengthOffset);
}
ContributionEnd = ContributionSize + HeaderLengthOffset;
- Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
- writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd);
+
+ StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize);
+ if (OldOffsetSize == 4 && NewOffsetSize == 8) {
+ // We had a DWARF32 .debug_str_offsets header, but we need to emit
+ // some string offsets that require 64 bit offsets on the .debug_str
+ // section. Emit the .debug_str_offsets header in DWARF64 format so we
+ // can emit string offsets that exceed UINT32_MAX without truncating
+ // the string offset.
+
+ // 2 bytes for DWARF version, 2 bytes pad.
+ const uint64_t VersionPadSize = 4;
+ const uint64_t NewLength =
+ (ContributionSize - VersionPadSize) * 2 + VersionPadSize;
+ // Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64
+ // value followed by the 8 byte updated length.
+ Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4);
+ Out.emitIntValue(NewLength, 8);
+ // Emit DWARF version as a 2 byte integer.
+ Out.emitIntValue(Version, 2);
+ // Emit 2 bytes of padding.
+ Out.emitIntValue(0, 2);
+ // Update the .debug_str_offsets section length contribution for the
+ // this .dwo file.
+ for (auto &Pair : SectionLength) {
+ if (Pair.first == DW_SECT_STR_OFFSETS) {
+ Pair.second = NewLength + 12;
+ break;
+ }
+ }
+ } else {
+ // Just emit the same .debug_str_offsets header.
+ Out.emitBytes(HeaderBytes);
+ }
+ writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd,
+ OldOffsetSize, NewOffsetSize);
}
} else {
- writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size);
+ assert(OldOffsetSize == NewOffsetSize);
+ writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize,
+ NewOffsetSize);
}
}
@@ -562,7 +608,7 @@ Error handleSection(
std::vector<StringRef> &CurTypesSection,
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
- std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) {
+ SectionLengths &SectionLength) {
if (Section.isBSS())
return Error::success();
@@ -684,7 +730,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
// This maps each section contained in this file to its length.
// This information is later on used to calculate the contributions,
// i.e. offset and length, of each compile/type unit to a section.
- std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
+ SectionLengths SectionLength;
for (const auto &Section : Obj.sections())
if (auto Err = handleSection(
@@ -713,7 +759,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
}
writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
- CurStrOffsetSection, Header.Version);
+ CurStrOffsetSection, Header.Version, SectionLength);
for (auto Pair : SectionLength) {
auto Index = getContributionIndex(Pair.first, IndexVersion);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index da0bf03e1ac57..b4256ae13914c 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1187,9 +1187,18 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA) {
if (getVersion() >= 5) {
if (DA.getData().data() == nullptr)
return std::nullopt;
- Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16;
+ // For .dwo files, the section contribution for the .debug_str_offsets
+ // points to the string offsets table header. Decode the format from this
+ // data as llvm-dwp has been modified to be able to emit a
+ // .debug_str_offsets table as DWARF64 even if the compile unit is DWARF32.
+ // This allows .dwp files to have string tables that exceed UINT32_MAX in
+ // size.
+ uint64_t Length = 0;
+ DwarfFormat Format = dwarf::DwarfFormat::DWARF32;
+ std::tie(Length, Format) = DA.getInitialLength(&Offset);
+ Offset += 4; // Skip the DWARF version uint16_t and the uint16_t padding.
// Look for a valid contribution at the given offset.
- auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
+ auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Format, Offset);
if (!DescOrError)
return DescOrError.takeError();
return *DescOrError;
|
|
@llvm/pr-subscribers-lldb Author: Greg Clayton (clayborg) ChangesWe can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset. Fixed LLDB to be able to successfully load the 64 bit string tables in .dwp files. Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing with DWARF64 string table headers. Full diff: https://github.com/llvm/llvm-project/pull/167457.diff 6 Files Affected:
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
index 94fc2e83e899d..7b7864caf8c09 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.cpp
@@ -360,8 +360,10 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
const DWARFDataExtractor &strOffsets =
GetSymbolFileDWARF().GetDWARFContext().getOrLoadStrOffsetsData();
uint64_t length = strOffsets.GetU32(&baseOffset);
- if (length == 0xffffffff)
+ if (length == 0xffffffff) {
length = strOffsets.GetU64(&baseOffset);
+ m_str_offsets_size = 8;
+ }
// Check version.
if (strOffsets.GetU16(&baseOffset) < 5)
@@ -369,6 +371,10 @@ void DWARFUnit::SetDwoStrOffsetsBase() {
// Skip padding.
baseOffset += 2;
+ } else {
+ // Size of offset for .debug_str_offsets is same as DWARF offset byte size
+ // of the DWARFUnit for DWARF version 4 and earlier.
+ m_str_offsets_size = m_header.getDwarfOffsetByteSize();
}
SetStrOffsetsBase(baseOffset);
@@ -1079,10 +1085,9 @@ uint32_t DWARFUnit::GetHeaderByteSize() const { return m_header.getSize(); }
std::optional<uint64_t>
DWARFUnit::GetStringOffsetSectionItem(uint32_t index) const {
- lldb::offset_t offset =
- GetStrOffsetsBase() + index * m_header.getDwarfOffsetByteSize();
+ lldb::offset_t offset = GetStrOffsetsBase() + index * m_str_offsets_size;
return m_dwarf.GetDWARFContext().getOrLoadStrOffsetsData().GetMaxU64(
- &offset, m_header.getDwarfOffsetByteSize());
+ &offset, m_str_offsets_size);
}
llvm::Expected<llvm::DWARFAddressRangesVector>
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
index 91a693860c55a..856db5e4101cd 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h
@@ -364,7 +364,7 @@ class DWARFUnit : public DWARFExpression::Delegate, public UserID {
dw_offset_t m_line_table_offset = DW_INVALID_OFFSET;
dw_offset_t m_str_offsets_base = 0; // Value of DW_AT_str_offsets_base.
-
+ dw_offset_t m_str_offsets_size = 4; // Size in bytes of the string offsets.
std::optional<llvm::DWARFDebugRnglistTable> m_rnglist_table;
bool m_rnglist_table_done = false;
std::optional<llvm::DWARFListTableHeader> m_loclist_table_header;
diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index a759bae10d160..cc38369658eaa 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -70,6 +70,8 @@ struct CompileUnitIdentifiers {
LLVM_ABI Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
OnCuIndexOverflow OverflowOptValue);
+typedef std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLengths;
+
LLVM_ABI Error handleSection(
const StringMap<std::pair<MCSection *, DWARFSectionKind>> &KnownSections,
const MCSection *StrSection, const MCSection *StrOffsetSection,
@@ -82,7 +84,7 @@ LLVM_ABI Error handleSection(
std::vector<StringRef> &CurTypesSection,
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
- std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength);
+ SectionLengths &SectionLength);
LLVM_ABI Expected<InfoSectionUnitHeader>
parseInfoSectionUnitHeader(StringRef Info);
diff --git a/llvm/include/llvm/DWP/DWPStringPool.h b/llvm/include/llvm/DWP/DWPStringPool.h
index 1354b46f156b6..d1486ff7872e1 100644
--- a/llvm/include/llvm/DWP/DWPStringPool.h
+++ b/llvm/include/llvm/DWP/DWPStringPool.h
@@ -32,13 +32,13 @@ class DWPStringPool {
MCStreamer &Out;
MCSection *Sec;
- DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
- uint32_t Offset = 0;
+ DenseMap<const char *, uint64_t, CStrDenseMapInfo> Pool;
+ uint64_t Offset = 0;
public:
DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}
- uint32_t getOffset(const char *Str, unsigned Length) {
+ uint64_t getOffset(const char *Str, unsigned Length) {
assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");
auto Pair = Pool.insert(std::make_pair(Str, Offset));
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index b565edbfe96db..54edce81208b5 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -413,33 +413,43 @@ Expected<InfoSectionUnitHeader> parseInfoSectionUnitHeader(StringRef Info) {
}
static void writeNewOffsetsTo(MCStreamer &Out, DataExtractor &Data,
- DenseMap<uint64_t, uint32_t> &OffsetRemapping,
- uint64_t &Offset, uint64_t &Size) {
+ DenseMap<uint64_t, uint64_t> &OffsetRemapping,
+ uint64_t &Offset, const uint64_t Size,
+ uint32_t OldOffsetSize, uint32_t NewOffsetSize) {
while (Offset < Size) {
- auto OldOffset = Data.getU32(&Offset);
- auto NewOffset = OffsetRemapping[OldOffset];
- Out.emitIntValue(NewOffset, 4);
+ const uint64_t OldOffset = Data.getUnsigned(&Offset, OldOffsetSize);
+ const uint64_t NewOffset = OffsetRemapping[OldOffset];
+ assert(NewOffsetSize == 8 || NewOffset <= UINT32_MAX);
+ Out.emitIntValue(NewOffset, NewOffsetSize);
}
}
void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
MCSection *StrOffsetSection,
StringRef CurStrSection,
- StringRef CurStrOffsetSection, uint16_t Version) {
+ StringRef CurStrOffsetSection, uint16_t Version,
+ SectionLengths &SectionLength) {
// Could possibly produce an error or warning if one of these was non-null but
// the other was null.
if (CurStrSection.empty() || CurStrOffsetSection.empty())
return;
- DenseMap<uint64_t, uint32_t> OffsetRemapping;
+ DenseMap<uint64_t, uint64_t> OffsetRemapping;
DataExtractor Data(CurStrSection, true, 0);
uint64_t LocalOffset = 0;
uint64_t PrevOffset = 0;
+
+ // Keep track if any new string offsets exceed UINT32_MAX. If any do, we can
+ // emit a DWARF64 .debug_str_offsets table for this compile unit.
+ uint32_t OldOffsetSize = 4;
+ uint32_t NewOffsetSize = 4;
while (const char *S = Data.getCStr(&LocalOffset)) {
- OffsetRemapping[PrevOffset] =
- Strings.getOffset(S, LocalOffset - PrevOffset);
+ uint64_t NewOffset = Strings.getOffset(S, LocalOffset - PrevOffset);
+ OffsetRemapping[PrevOffset] = NewOffset;
+ if (NewOffset > UINT32_MAX)
+ NewOffsetSize = 8;
PrevOffset = LocalOffset;
}
@@ -451,7 +461,7 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
uint64_t Size = CurStrOffsetSection.size();
if (Version > 4) {
while (Offset < Size) {
- uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
+ const uint64_t HeaderSize = debugStrOffsetsHeaderSize(Data, Version);
assert(HeaderSize <= Size - Offset &&
"StrOffsetSection size is less than its header");
@@ -461,16 +471,52 @@ void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
if (HeaderSize == 8) {
ContributionSize = Data.getU32(&HeaderLengthOffset);
} else if (HeaderSize == 16) {
+ OldOffsetSize = 8;
HeaderLengthOffset += 4; // skip the dwarf64 marker
ContributionSize = Data.getU64(&HeaderLengthOffset);
}
ContributionEnd = ContributionSize + HeaderLengthOffset;
- Out.emitBytes(Data.getBytes(&Offset, HeaderSize));
- writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd);
+
+ StringRef HeaderBytes = Data.getBytes(&Offset, HeaderSize);
+ if (OldOffsetSize == 4 && NewOffsetSize == 8) {
+ // We had a DWARF32 .debug_str_offsets header, but we need to emit
+ // some string offsets that require 64 bit offsets on the .debug_str
+ // section. Emit the .debug_str_offsets header in DWARF64 format so we
+ // can emit string offsets that exceed UINT32_MAX without truncating
+ // the string offset.
+
+ // 2 bytes for DWARF version, 2 bytes pad.
+ const uint64_t VersionPadSize = 4;
+ const uint64_t NewLength =
+ (ContributionSize - VersionPadSize) * 2 + VersionPadSize;
+ // Emit the DWARF64 length that starts with a 4 byte DW_LENGTH_DWARF64
+ // value followed by the 8 byte updated length.
+ Out.emitIntValue(llvm::dwarf::DW_LENGTH_DWARF64, 4);
+ Out.emitIntValue(NewLength, 8);
+ // Emit DWARF version as a 2 byte integer.
+ Out.emitIntValue(Version, 2);
+ // Emit 2 bytes of padding.
+ Out.emitIntValue(0, 2);
+ // Update the .debug_str_offsets section length contribution for the
+ // this .dwo file.
+ for (auto &Pair : SectionLength) {
+ if (Pair.first == DW_SECT_STR_OFFSETS) {
+ Pair.second = NewLength + 12;
+ break;
+ }
+ }
+ } else {
+ // Just emit the same .debug_str_offsets header.
+ Out.emitBytes(HeaderBytes);
+ }
+ writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, ContributionEnd,
+ OldOffsetSize, NewOffsetSize);
}
} else {
- writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size);
+ assert(OldOffsetSize == NewOffsetSize);
+ writeNewOffsetsTo(Out, Data, OffsetRemapping, Offset, Size, OldOffsetSize,
+ NewOffsetSize);
}
}
@@ -562,7 +608,7 @@ Error handleSection(
std::vector<StringRef> &CurTypesSection,
std::vector<StringRef> &CurInfoSection, StringRef &AbbrevSection,
StringRef &CurCUIndexSection, StringRef &CurTUIndexSection,
- std::vector<std::pair<DWARFSectionKind, uint32_t>> &SectionLength) {
+ SectionLengths &SectionLength) {
if (Section.isBSS())
return Error::success();
@@ -684,7 +730,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
// This maps each section contained in this file to its length.
// This information is later on used to calculate the contributions,
// i.e. offset and length, of each compile/type unit to a section.
- std::vector<std::pair<DWARFSectionKind, uint32_t>> SectionLength;
+ SectionLengths SectionLength;
for (const auto &Section : Obj.sections())
if (auto Err = handleSection(
@@ -713,7 +759,7 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
}
writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
- CurStrOffsetSection, Header.Version);
+ CurStrOffsetSection, Header.Version, SectionLength);
for (auto Pair : SectionLength) {
auto Index = getContributionIndex(Pair.first, IndexVersion);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index da0bf03e1ac57..b4256ae13914c 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -1187,9 +1187,18 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor &DA) {
if (getVersion() >= 5) {
if (DA.getData().data() == nullptr)
return std::nullopt;
- Offset += Header.getFormat() == dwarf::DwarfFormat::DWARF32 ? 8 : 16;
+ // For .dwo files, the section contribution for the .debug_str_offsets
+ // points to the string offsets table header. Decode the format from this
+ // data as llvm-dwp has been modified to be able to emit a
+ // .debug_str_offsets table as DWARF64 even if the compile unit is DWARF32.
+ // This allows .dwp files to have string tables that exceed UINT32_MAX in
+ // size.
+ uint64_t Length = 0;
+ DwarfFormat Format = dwarf::DwarfFormat::DWARF32;
+ std::tie(Length, Format) = DA.getInitialLength(&Offset);
+ Offset += 4; // Skip the DWARF version uint16_t and the uint16_t padding.
// Look for a valid contribution at the given offset.
- auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset);
+ auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Format, Offset);
if (!DescOrError)
return DescOrError.takeError();
return *DescOrError;
|
We can change llvm-dwp to emit DWARF64 version of the .debug_str_offsets tables for .dwo files in a .dwp file. This allows the string table to exceed 4GB without truncating string offsets into the .debug_str section and losing data. llvm-dwp will append all strings to the .debug_str section for a .dwo file, and if any of the new string offsets exceed UINT32_MAX, it will upgrade the .debug_str_offsets table to a DWARF64 header and then each string offset in that table can now have a 64 bit offset.
Fixed LLDB to be able to successfully load the 64 bit string tables in .dwp files.
Fixed llvm-dwarfdump and LLVM DWARF parsing code to do the right thing with DWARF64 string table headers.