std.debug.Coverage.resolveAddressesDwarf: fix broken logic

The implementation assumed that compilation units did not overlap, which
is not the case. The new implementation uses .debug_ranges to iterate
over the requested PCs.

This partially resolves #20990. The dump-cov tool is fixed but the same
fix needs to be applied to `std.Build.Fuzz.WebServer` (sorting the PC
list before passing it to be resolved by debug info).

I am observing LLVM emit multiple 8-bit counters for the same PC
addresses when enabling `-fsanitize-coverage=inline-8bit-counters`. This
seems like a bug in LLVM. I can't fathom why that would be desireable.
This commit is contained in:
Andrew Kelley 2024-08-09 19:49:48 -07:00
parent 0b5ea2b902
commit b5398180d6
5 changed files with 91 additions and 76 deletions

View File

@ -151,46 +151,35 @@ pub fn resolveAddressesDwarf(
d: *Dwarf,
) ResolveAddressesDwarfError!void {
assert(sorted_pc_addrs.len == output.len);
assert(d.compile_units_sorted);
assert(d.ranges.items.len != 0); // call `populateRanges` first.
var cu_i: usize = 0;
var line_table_i: usize = 0;
var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0];
var range = cu.pc_range.?;
var range_i: usize = 0;
var range: *std.debug.Dwarf.Range = &d.ranges.items[0];
var line_table_i: usize = undefined;
var prev_cu: ?*std.debug.Dwarf.CompileUnit = null;
// Protects directories and files tables from other threads.
cov.mutex.lock();
defer cov.mutex.unlock();
next_pc: for (sorted_pc_addrs, output) |pc, *out| {
while (pc >= range.end) {
cu_i += 1;
if (cu_i >= d.compile_unit_list.items.len) {
range_i += 1;
if (range_i >= d.ranges.items.len) {
out.* = SourceLocation.invalid;
continue :next_pc;
}
cu = &d.compile_unit_list.items[cu_i];
line_table_i = 0;
range = cu.pc_range orelse {
out.* = SourceLocation.invalid;
continue :next_pc;
};
range = &d.ranges.items[range_i];
}
if (pc < range.start) {
out.* = SourceLocation.invalid;
continue :next_pc;
}
if (line_table_i == 0) {
line_table_i = 1;
const cu = &d.compile_unit_list.items[range.compile_unit_index];
if (cu.src_loc_cache == null) {
cov.mutex.unlock();
defer cov.mutex.lock();
d.populateSrcLocCache(gpa, cu) catch |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => {
out.* = SourceLocation.invalid;
cu_i += 1;
if (cu_i < d.compile_unit_list.items.len) {
cu = &d.compile_unit_list.items[cu_i];
line_table_i = 0;
if (cu.pc_range) |r| range = r;
}
continue :next_pc;
},
else => |e| return e,
@ -198,6 +187,14 @@ pub fn resolveAddressesDwarf(
}
const slc = &cu.src_loc_cache.?;
const table_addrs = slc.line_table.keys();
if (cu != prev_cu) {
prev_cu = cu;
line_table_i = std.sort.upperBound(u64, table_addrs, pc, struct {
fn order(context: u64, item: u64) std.math.Order {
return std.math.order(item, context);
}
}.order);
}
while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1;
const entry = slc.line_table.values()[line_table_i - 1];

View File

@ -38,19 +38,30 @@ pub const call_frame = @import("Dwarf/call_frame.zig");
endian: std.builtin.Endian,
sections: SectionArray = null_section_array,
is_macho: bool,
compile_units_sorted: bool,
// Filled later by the initializer
/// Filled later by the initializer
abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{},
/// Filled later by the initializer
compile_unit_list: std.ArrayListUnmanaged(CompileUnit) = .{},
/// Filled later by the initializer
func_list: std.ArrayListUnmanaged(Func) = .{},
eh_frame_hdr: ?ExceptionFrameHeader = null,
// These lookup tables are only used if `eh_frame_hdr` is null
/// These lookup tables are only used if `eh_frame_hdr` is null
cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .{},
// Sorted by start_pc
/// Sorted by start_pc
fde_list: std.ArrayListUnmanaged(FrameDescriptionEntry) = .{},
/// Populated by `populateRanges`.
ranges: std.ArrayListUnmanaged(Range) = .{},
pub const Range = struct {
start: u64,
end: u64,
/// Index into `compile_unit_list`.
compile_unit_index: usize,
};
pub const Section = struct {
data: []const u8,
// Module-relative virtual address.
@ -799,6 +810,7 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void {
di.func_list.deinit(gpa);
di.cie_map.deinit(gpa);
di.fde_list.deinit(gpa);
di.ranges.deinit(gpa);
di.* = undefined;
}
@ -985,8 +997,8 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void {
try di.func_list.append(allocator, .{
.name = fn_name,
.pc_range = .{
.start = range.start_addr,
.end = range.end_addr,
.start = range.start,
.end = range.end,
},
});
}
@ -1096,37 +1108,38 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void {
}
}
/// Populate missing PC ranges in compilation units, and then sort them by start address.
/// Does not guarantee pc_range to be non-null because there could be missing debug info.
pub fn sortCompileUnits(d: *Dwarf) ScanError!void {
assert(!d.compile_units_sorted);
pub fn populateRanges(d: *Dwarf, gpa: Allocator) ScanError!void {
assert(d.ranges.items.len == 0);
for (d.compile_unit_list.items) |*cu| {
if (cu.pc_range != null) continue;
for (d.compile_unit_list.items, 0..) |*cu, cu_index| {
if (cu.pc_range) |range| {
try d.ranges.append(gpa, .{
.start = range.start,
.end = range.end,
.compile_unit_index = cu_index,
});
continue;
}
const ranges_value = cu.die.getAttr(AT.ranges) orelse continue;
var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue;
var start: u64 = maxInt(u64);
var end: u64 = 0;
while (try iter.next()) |range| {
start = @min(start, range.start_addr);
end = @max(end, range.end_addr);
// Not sure why LLVM thinks it's OK to emit these...
if (range.start == range.end) continue;
try d.ranges.append(gpa, .{
.start = range.start,
.end = range.end,
.compile_unit_index = cu_index,
});
}
if (end != 0) cu.pc_range = .{
.start = start,
.end = end,
};
}
std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct {
pub fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool {
std.mem.sortUnstable(Range, d.ranges.items, {}, struct {
pub fn lessThan(ctx: void, a: Range, b: Range) bool {
_ = ctx;
const a_range = a.pc_range orelse return false;
const b_range = b.pc_range orelse return true;
return a_range.start < b_range.start;
return a.start < b.start;
}
}.lessThan);
d.compile_units_sorted = true;
}
const DebugRangeIterator = struct {
@ -1184,7 +1197,7 @@ const DebugRangeIterator = struct {
}
// Returns the next range in the list, or null if the end was reached.
pub fn next(self: *@This()) !?struct { start_addr: u64, end_addr: u64 } {
pub fn next(self: *@This()) !?PcRange {
switch (self.section_type) {
.debug_rnglists => {
const kind = try self.fbr.readByte();
@ -1203,8 +1216,8 @@ const DebugRangeIterator = struct {
const end_addr = try self.di.readDebugAddr(self.compile_unit.*, end_index);
return .{
.start_addr = start_addr,
.end_addr = end_addr,
.start = start_addr,
.end = end_addr,
};
},
RLE.startx_length => {
@ -1215,8 +1228,8 @@ const DebugRangeIterator = struct {
const end_addr = start_addr + len;
return .{
.start_addr = start_addr,
.end_addr = end_addr,
.start = start_addr,
.end = end_addr,
};
},
RLE.offset_pair => {
@ -1225,8 +1238,8 @@ const DebugRangeIterator = struct {
// This is the only kind that uses the base address
return .{
.start_addr = self.base_address + start_addr,
.end_addr = self.base_address + end_addr,
.start = self.base_address + start_addr,
.end = self.base_address + end_addr,
};
},
RLE.base_address => {
@ -1238,8 +1251,8 @@ const DebugRangeIterator = struct {
const end_addr = try self.fbr.readInt(usize);
return .{
.start_addr = start_addr,
.end_addr = end_addr,
.start = start_addr,
.end = end_addr,
};
},
RLE.start_length => {
@ -1248,8 +1261,8 @@ const DebugRangeIterator = struct {
const end_addr = start_addr + len;
return .{
.start_addr = start_addr,
.end_addr = end_addr,
.start = start_addr,
.end = end_addr,
};
},
else => return bad(),
@ -1267,8 +1280,8 @@ const DebugRangeIterator = struct {
}
return .{
.start_addr = self.base_address + start_addr,
.end_addr = self.base_address + end_addr,
.start = self.base_address + start_addr,
.end = self.base_address + end_addr,
};
},
else => unreachable,
@ -1286,7 +1299,7 @@ pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit {
const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue;
var iter = DebugRangeIterator.init(ranges_value, di, compile_unit) catch continue;
while (try iter.next()) |range| {
if (target_address >= range.start_addr and target_address < range.end_addr) return compile_unit;
if (target_address >= range.start and target_address < range.end) return compile_unit;
}
}
@ -2345,7 +2358,6 @@ pub const ElfModule = struct {
.endian = endian,
.sections = sections,
.is_macho = false,
.compile_units_sorted = false,
};
try Dwarf.open(&di, gpa);

View File

@ -27,7 +27,7 @@ pub const LoadError = Dwarf.ElfModule.LoadError;
pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info {
var sections: Dwarf.SectionArray = Dwarf.null_section_array;
var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, &sections, null);
try elf_module.dwarf.sortCompileUnits();
try elf_module.dwarf.populateRanges(gpa);
var info: Info = .{
.address_map = .{},
.coverage = coverage,

View File

@ -606,7 +606,6 @@ pub const Module = switch (native_os) {
.endian = .little,
.sections = sections,
.is_macho = true,
.compile_units_sorted = false,
};
try Dwarf.open(&di, allocator);
@ -996,7 +995,6 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module {
.endian = native_endian,
.sections = sections,
.is_macho = false,
.compile_units_sorted = false,
};
try Dwarf.open(&dwarf, allocator);
@ -1810,7 +1808,6 @@ fn unwindFrameMachODwarf(
var di: Dwarf = .{
.endian = native_endian,
.is_macho = true,
.compile_units_sorted = false,
};
defer di.deinit(context.allocator);

View File

@ -54,21 +54,30 @@ pub fn main() !void {
const header: *SeenPcsHeader = @ptrCast(cov_bytes);
try stdout.print("{any}\n", .{header.*});
const pcs = header.pcAddrs();
for (0.., pcs[0 .. pcs.len - 1], pcs[1..]) |i, a, b| {
if (a > b) std.log.err("{d}: 0x{x} > 0x{x}", .{ i, a, b });
}
assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize)));
var indexed_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .{};
try indexed_pcs.entries.resize(arena, pcs.len);
@memcpy(indexed_pcs.entries.items(.key), pcs);
try indexed_pcs.reIndex(arena);
const sorted_pcs = try arena.dupe(usize, pcs);
std.mem.sortUnstable(usize, sorted_pcs, {}, std.sort.asc(usize));
const source_locations = try arena.alloc(std.debug.Coverage.SourceLocation, sorted_pcs.len);
try debug_info.resolveAddresses(gpa, sorted_pcs, source_locations);
const seen_pcs = header.seenBits();
const source_locations = try arena.alloc(std.debug.Coverage.SourceLocation, pcs.len);
try debug_info.resolveAddresses(gpa, pcs, source_locations);
for (pcs, source_locations, 0..) |pc, sl, i| {
for (sorted_pcs, source_locations) |pc, sl| {
if (sl.file == .invalid) {
try stdout.print(" {x}: invalid\n", .{pc});
continue;
}
const file = debug_info.coverage.fileAt(sl.file);
const dir_name = debug_info.coverage.directories.keys()[file.directory_index];
const dir_name_slice = debug_info.coverage.stringAt(dir_name);
const hit: u1 = @truncate(seen_pcs[i / @bitSizeOf(usize)] >> @intCast(i % @bitSizeOf(usize)));
const seen_i = indexed_pcs.getIndex(pc).?;
const hit: u1 = @truncate(seen_pcs[seen_i / @bitSizeOf(usize)] >> @intCast(seen_i % @bitSizeOf(usize)));
try stdout.print("{c}{x}: {s}/{s}:{d}:{d}\n", .{
"-+"[hit], pc, dir_name_slice, debug_info.coverage.stringAt(file.basename), sl.line, sl.column,
});