Merge pull request #20908 from ziglang/reorg-std.debug-again

std.debug: reorg and clarify API goals
This commit is contained in:
Andrew Kelley 2024-08-02 17:10:41 -07:00 committed by GitHub
commit a931bfada5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 3512 additions and 3428 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,44 +1,50 @@
const builtin = @import("builtin");
const std = @import("../../std.zig");
const mem = std.mem;
const native_os = builtin.os.tag;
const posix = std.posix;
const Arch = std.Target.Cpu.Arch;
/// Tells whether unwinding for this target is supported by the Dwarf standard.
///
/// See also `std.debug.SelfInfo.supportsUnwinding` which tells whether the Zig
/// standard library has a working implementation of unwinding for this target.
pub fn supportsUnwinding(target: std.Target) bool {
return switch (target.cpu.arch) {
.x86 => switch (target.os.tag) {
.linux, .netbsd, .solaris, .illumos => true,
else => false,
},
.x86_64 => switch (target.os.tag) {
.linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true,
else => false,
},
.arm => switch (target.os.tag) {
.linux => true,
else => false,
},
.aarch64 => switch (target.os.tag) {
.linux, .netbsd, .freebsd, .macos, .ios => true,
else => false,
},
else => false,
.amdgcn,
.nvptx,
.nvptx64,
.spirv,
.spirv32,
.spirv64,
.spu_2,
=> false,
// Enabling this causes relocation errors such as:
// error: invalid relocation type R_RISCV_SUB32 at offset 0x20
.riscv64, .riscv32 => false,
// Conservative guess. Feel free to update this logic with any targets
// that are known to not support Dwarf unwinding.
else => true,
};
}
pub fn ipRegNum() u8 {
return switch (builtin.cpu.arch) {
/// Returns `null` for CPU architectures without an instruction pointer register.
pub fn ipRegNum(arch: Arch) ?u8 {
return switch (arch) {
.x86 => 8,
.x86_64 => 16,
.arm => 15,
.aarch64 => 32,
else => unreachable,
else => null,
};
}
pub fn fpRegNum(reg_context: RegisterContext) u8 {
return switch (builtin.cpu.arch) {
// GCC on OS X historically did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO
pub fn fpRegNum(arch: Arch, reg_context: RegisterContext) u8 {
return switch (arch) {
// GCC on OS X historically did the opposite of ELF for these registers
// (only in .eh_frame), and that is now the convention for MachO
.x86 => if (reg_context.eh_frame and reg_context.is_macho) 4 else 5,
.x86_64 => 6,
.arm => 11,
@ -47,8 +53,8 @@ pub fn fpRegNum(reg_context: RegisterContext) u8 {
};
}
pub fn spRegNum(reg_context: RegisterContext) u8 {
return switch (builtin.cpu.arch) {
pub fn spRegNum(arch: Arch, reg_context: RegisterContext) u8 {
return switch (arch) {
.x86 => if (reg_context.eh_frame and reg_context.is_macho) 5 else 4,
.x86_64 => 7,
.arm => 13,
@ -57,33 +63,12 @@ pub fn spRegNum(reg_context: RegisterContext) u8 {
};
}
/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature.
/// This function clears these signature bits to make the pointer usable.
pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize {
if (builtin.cpu.arch == .aarch64) {
// `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it)
// The save / restore is because `xpaclri` operates on x30 (LR)
return asm (
\\mov x16, x30
\\mov x30, x15
\\hint 0x07
\\mov x15, x30
\\mov x30, x16
: [ret] "={x15}" (-> usize),
: [ptr] "{x15}" (ptr),
: "x16"
);
}
return ptr;
}
pub const RegisterContext = struct {
eh_frame: bool,
is_macho: bool,
};
pub const AbiError = error{
pub const RegBytesError = error{
InvalidRegister,
UnimplementedArch,
UnimplementedOs,
@ -91,55 +76,21 @@ pub const AbiError = error{
ThreadContextNotSupported,
};
fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type {
const reg_bytes_type = comptime RegBytesReturnType(ContextPtrType);
const info = @typeInfo(reg_bytes_type).Pointer;
return @Type(.{
.Pointer = .{
.size = .One,
.is_const = info.is_const,
.is_volatile = info.is_volatile,
.is_allowzero = info.is_allowzero,
.alignment = info.alignment,
.address_space = info.address_space,
.child = T,
.sentinel = null,
},
});
}
/// Returns a pointer to a register stored in a ThreadContext, preserving the pointer attributes of the context.
pub fn regValueNative(
comptime T: type,
thread_context_ptr: anytype,
reg_number: u8,
reg_context: ?RegisterContext,
) !RegValueReturnType(@TypeOf(thread_context_ptr), T) {
const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context);
if (@sizeOf(T) != reg_bytes.len) return error.IncompatibleRegisterSize;
return mem.bytesAsValue(T, reg_bytes[0..@sizeOf(T)]);
}
fn RegBytesReturnType(comptime ContextPtrType: type) type {
const info = @typeInfo(ContextPtrType);
if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) {
@compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType)));
}
return if (info.Pointer.is_const) return []const u8 else []u8;
}
/// Returns a slice containing the backing storage for `reg_number`.
///
/// This function assumes the Dwarf information corresponds not necessarily to
/// the current executable, but at least with a matching CPU architecture and
/// OS. It is planned to lift this limitation with a future enhancement.
///
/// `reg_context` describes in what context the register number is used, as it can have different
/// meanings depending on the DWARF container. It is only required when getting the stack or
/// frame pointer register on some architectures.
pub fn regBytes(
thread_context_ptr: anytype,
thread_context_ptr: *std.debug.ThreadContext,
reg_number: u8,
reg_context: ?RegisterContext,
) AbiError!RegBytesReturnType(@TypeOf(thread_context_ptr)) {
if (native_os == .windows) {
) RegBytesError![]u8 {
if (builtin.os.tag == .windows) {
return switch (builtin.cpu.arch) {
.x86 => switch (reg_number) {
0 => mem.asBytes(&thread_context_ptr.Eax),
@ -194,7 +145,7 @@ pub fn regBytes(
const ucontext_ptr = thread_context_ptr;
return switch (builtin.cpu.arch) {
.x86 => switch (native_os) {
.x86 => switch (builtin.os.tag) {
.linux, .netbsd, .solaris, .illumos => switch (reg_number) {
0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EAX]),
1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ECX]),
@ -229,7 +180,7 @@ pub fn regBytes(
},
else => error.UnimplementedOs,
},
.x86_64 => switch (native_os) {
.x86_64 => switch (builtin.os.tag) {
.linux, .solaris, .illumos => switch (reg_number) {
0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RAX]),
1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDX]),
@ -248,7 +199,7 @@ pub fn regBytes(
14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R14]),
15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R15]),
16 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RIP]),
17...32 => |i| if (native_os.isSolarish())
17...32 => |i| if (builtin.os.tag.isSolarish())
mem.asBytes(&ucontext_ptr.mcontext.fpregs.chip_state.xmm[i - 17])
else
mem.asBytes(&ucontext_ptr.mcontext.fpregs.xmm[i - 17]),
@ -318,7 +269,7 @@ pub fn regBytes(
},
else => error.UnimplementedOs,
},
.arm => switch (native_os) {
.arm => switch (builtin.os.tag) {
.linux => switch (reg_number) {
0 => mem.asBytes(&ucontext_ptr.mcontext.arm_r0),
1 => mem.asBytes(&ucontext_ptr.mcontext.arm_r1),
@ -341,7 +292,7 @@ pub fn regBytes(
},
else => error.UnimplementedOs,
},
.aarch64 => switch (native_os) {
.aarch64 => switch (builtin.os.tag) {
.macos, .ios => switch (reg_number) {
0...28 => mem.asBytes(&ucontext_ptr.mcontext.ss.regs[reg_number]),
29 => mem.asBytes(&ucontext_ptr.mcontext.ss.fp),
@ -389,22 +340,14 @@ pub fn regBytes(
};
}
/// Returns the ABI-defined default value this register has in the unwinding table
/// before running any of the CIE instructions. The DWARF spec defines these as having
/// the .undefined rule by default, but allows ABI authors to override that.
pub fn getRegDefaultValue(reg_number: u8, context: *std.debug.Dwarf.UnwindContext, out: []u8) !void {
switch (builtin.cpu.arch) {
.aarch64 => {
// Callee-saved registers are initialized as if they had the .same_value rule
if (reg_number >= 19 and reg_number <= 28) {
const src = try regBytes(context.thread_context, reg_number, context.reg_context);
if (src.len != out.len) return error.RegisterSizeMismatch;
@memcpy(out, src);
return;
}
},
else => {},
}
@memset(out, undefined);
/// Returns a pointer to a register stored in a ThreadContext, preserving the
/// pointer attributes of the context.
pub fn regValueNative(
thread_context_ptr: *std.debug.ThreadContext,
reg_number: u8,
reg_context: ?RegisterContext,
) !*align(1) usize {
const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context);
if (@sizeOf(usize) != reg_bytes.len) return error.IncompatibleRegisterSize;
return mem.bytesAsValue(usize, reg_bytes[0..@sizeOf(usize)]);
}

View File

@ -297,391 +297,3 @@ pub const Instruction = union(Opcode) {
}
}
};
/// Since register rules are applied (usually) during a panic,
/// checked addition / subtraction is used so that we can return
/// an error and fall back to FP-based unwinding.
pub fn applyOffset(base: usize, offset: i64) !usize {
return if (offset >= 0)
try std.math.add(usize, base, @as(usize, @intCast(offset)))
else
try std.math.sub(usize, base, @as(usize, @intCast(-offset)));
}
/// This is a virtual machine that runs DWARF call frame instructions.
pub const VirtualMachine = struct {
/// See section 6.4.1 of the DWARF5 specification for details on each
const RegisterRule = union(enum) {
// The spec says that the default rule for each column is the undefined rule.
// However, it also allows ABI / compiler authors to specify alternate defaults, so
// there is a distinction made here.
default: void,
undefined: void,
same_value: void,
// offset(N)
offset: i64,
// val_offset(N)
val_offset: i64,
// register(R)
register: u8,
// expression(E)
expression: []const u8,
// val_expression(E)
val_expression: []const u8,
// Augmenter-defined rule
architectural: void,
};
/// Each row contains unwinding rules for a set of registers.
pub const Row = struct {
/// Offset from `FrameDescriptionEntry.pc_begin`
offset: u64 = 0,
/// Special-case column that defines the CFA (Canonical Frame Address) rule.
/// The register field of this column defines the register that CFA is derived from.
cfa: Column = .{},
/// The register fields in these columns define the register the rule applies to.
columns: ColumnRange = .{},
/// Indicates that the next write to any column in this row needs to copy
/// the backing column storage first, as it may be referenced by previous rows.
copy_on_write: bool = false,
};
pub const Column = struct {
register: ?u8 = null,
rule: RegisterRule = .{ .default = {} },
/// Resolves the register rule and places the result into `out` (see dwarf.abi.regBytes)
pub fn resolveValue(
self: Column,
context: *std.debug.Dwarf.UnwindContext,
expression_context: std.debug.Dwarf.expression.Context,
ma: *debug.StackIterator.MemoryAccessor,
out: []u8,
) !void {
switch (self.rule) {
.default => {
const register = self.register orelse return error.InvalidRegister;
try abi.getRegDefaultValue(register, context, out);
},
.undefined => {
@memset(out, undefined);
},
.same_value => {
// TODO: This copy could be eliminated if callers always copy the state then call this function to update it
const register = self.register orelse return error.InvalidRegister;
const src = try abi.regBytes(context.thread_context, register, context.reg_context);
if (src.len != out.len) return error.RegisterSizeMismatch;
@memcpy(out, src);
},
.offset => |offset| {
if (context.cfa) |cfa| {
const addr = try applyOffset(cfa, offset);
if (ma.load(usize, addr) == null) return error.InvalidAddress;
const ptr: *const usize = @ptrFromInt(addr);
mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian);
} else return error.InvalidCFA;
},
.val_offset => |offset| {
if (context.cfa) |cfa| {
mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian);
} else return error.InvalidCFA;
},
.register => |register| {
const src = try abi.regBytes(context.thread_context, register, context.reg_context);
if (src.len != out.len) return error.RegisterSizeMismatch;
@memcpy(out, try abi.regBytes(context.thread_context, register, context.reg_context));
},
.expression => |expression| {
context.stack_machine.reset();
const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?);
const addr = if (value) |v| blk: {
if (v != .generic) return error.InvalidExpressionValue;
break :blk v.generic;
} else return error.NoExpressionValue;
if (ma.load(usize, addr) == null) return error.InvalidExpressionAddress;
const ptr: *usize = @ptrFromInt(addr);
mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian);
},
.val_expression => |expression| {
context.stack_machine.reset();
const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?);
if (value) |v| {
if (v != .generic) return error.InvalidExpressionValue;
mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian);
} else return error.NoExpressionValue;
},
.architectural => return error.UnimplementedRegisterRule,
}
}
};
const ColumnRange = struct {
/// Index into `columns` of the first column in this row.
start: usize = undefined,
len: u8 = 0,
};
columns: std.ArrayListUnmanaged(Column) = .{},
stack: std.ArrayListUnmanaged(ColumnRange) = .{},
current_row: Row = .{},
/// The result of executing the CIE's initial_instructions
cie_row: ?Row = null,
pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void {
self.stack.deinit(allocator);
self.columns.deinit(allocator);
self.* = undefined;
}
pub fn reset(self: *VirtualMachine) void {
self.stack.clearRetainingCapacity();
self.columns.clearRetainingCapacity();
self.current_row = .{};
self.cie_row = null;
}
/// Return a slice backed by the row's non-CFA columns
pub fn rowColumns(self: VirtualMachine, row: Row) []Column {
if (row.columns.len == 0) return &.{};
return self.columns.items[row.columns.start..][0..row.columns.len];
}
/// Either retrieves or adds a column for `register` (non-CFA) in the current row.
fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column {
for (self.rowColumns(self.current_row)) |*c| {
if (c.register == register) return c;
}
if (self.current_row.columns.len == 0) {
self.current_row.columns.start = self.columns.items.len;
}
self.current_row.columns.len += 1;
const column = try self.columns.addOne(allocator);
column.* = .{
.register = register,
};
return column;
}
/// Runs the CIE instructions, then the FDE instructions. Execution halts
/// once the row that corresponds to `pc` is known, and the row is returned.
pub fn runTo(
self: *VirtualMachine,
allocator: std.mem.Allocator,
pc: u64,
cie: std.debug.Dwarf.CommonInformationEntry,
fde: std.debug.Dwarf.FrameDescriptionEntry,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !Row {
assert(self.cie_row == null);
if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange;
var prev_row: Row = self.current_row;
var cie_stream = std.io.fixedBufferStream(cie.initial_instructions);
var fde_stream = std.io.fixedBufferStream(fde.instructions);
var streams = [_]*std.io.FixedBufferStream([]const u8){
&cie_stream,
&fde_stream,
};
for (&streams, 0..) |stream, i| {
while (stream.pos < stream.buffer.len) {
const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian);
prev_row = try self.step(allocator, cie, i == 0, instruction);
if (pc < fde.pc_begin + self.current_row.offset) return prev_row;
}
}
return self.current_row;
}
pub fn runToNative(
self: *VirtualMachine,
allocator: std.mem.Allocator,
pc: u64,
cie: std.debug.Dwarf.CommonInformationEntry,
fde: std.debug.Dwarf.FrameDescriptionEntry,
) !Row {
return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), builtin.target.cpu.arch.endian());
}
fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void {
if (!self.current_row.copy_on_write) return;
const new_start = self.columns.items.len;
if (self.current_row.columns.len > 0) {
try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len);
self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row));
self.current_row.columns.start = new_start;
}
}
/// Executes a single instruction.
/// If this instruction is from the CIE, `is_initial` should be set.
/// Returns the value of `current_row` before executing this instruction.
pub fn step(
self: *VirtualMachine,
allocator: std.mem.Allocator,
cie: std.debug.Dwarf.CommonInformationEntry,
is_initial: bool,
instruction: Instruction,
) !Row {
// CIE instructions must be run before FDE instructions
assert(!is_initial or self.cie_row == null);
if (!is_initial and self.cie_row == null) {
self.cie_row = self.current_row;
self.current_row.copy_on_write = true;
}
const prev_row = self.current_row;
switch (instruction) {
.set_loc => |i| {
if (i.address <= self.current_row.offset) return error.InvalidOperation;
// TODO: Check cie.segment_selector_size != 0 for DWARFV4
self.current_row.offset = i.address;
},
inline .advance_loc,
.advance_loc1,
.advance_loc2,
.advance_loc4,
=> |i| {
self.current_row.offset += i.delta * cie.code_alignment_factor;
self.current_row.copy_on_write = true;
},
inline .offset,
.offset_extended,
.offset_extended_sf,
=> |i| {
try self.resolveCopyOnWrite(allocator);
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor };
},
inline .restore,
.restore_extended,
=> |i| {
try self.resolveCopyOnWrite(allocator);
if (self.cie_row) |cie_row| {
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = for (self.rowColumns(cie_row)) |cie_column| {
if (cie_column.register == i.register) break cie_column.rule;
} else .{ .default = {} };
} else return error.InvalidOperation;
},
.nop => {},
.undefined => |i| {
try self.resolveCopyOnWrite(allocator);
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = .{ .undefined = {} };
},
.same_value => |i| {
try self.resolveCopyOnWrite(allocator);
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = .{ .same_value = {} };
},
.register => |i| {
try self.resolveCopyOnWrite(allocator);
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = .{ .register = i.target_register };
},
.remember_state => {
try self.stack.append(allocator, self.current_row.columns);
self.current_row.copy_on_write = true;
},
.restore_state => {
const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation;
self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len);
try self.columns.ensureUnusedCapacity(allocator, restored_columns.len);
self.current_row.columns.start = self.columns.items.len;
self.current_row.columns.len = restored_columns.len;
self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]);
},
.def_cfa => |i| {
try self.resolveCopyOnWrite(allocator);
self.current_row.cfa = .{
.register = i.register,
.rule = .{ .val_offset = @intCast(i.offset) },
};
},
.def_cfa_sf => |i| {
try self.resolveCopyOnWrite(allocator);
self.current_row.cfa = .{
.register = i.register,
.rule = .{ .val_offset = i.offset * cie.data_alignment_factor },
};
},
.def_cfa_register => |i| {
try self.resolveCopyOnWrite(allocator);
if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation;
self.current_row.cfa.register = i.register;
},
.def_cfa_offset => |i| {
try self.resolveCopyOnWrite(allocator);
if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation;
self.current_row.cfa.rule = .{
.val_offset = @intCast(i.offset),
};
},
.def_cfa_offset_sf => |i| {
try self.resolveCopyOnWrite(allocator);
if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation;
self.current_row.cfa.rule = .{
.val_offset = i.offset * cie.data_alignment_factor,
};
},
.def_cfa_expression => |i| {
try self.resolveCopyOnWrite(allocator);
self.current_row.cfa.register = undefined;
self.current_row.cfa.rule = .{
.expression = i.block,
};
},
.expression => |i| {
try self.resolveCopyOnWrite(allocator);
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = .{
.expression = i.block,
};
},
.val_offset => |i| {
try self.resolveCopyOnWrite(allocator);
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = .{
.val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor,
};
},
.val_offset_sf => |i| {
try self.resolveCopyOnWrite(allocator);
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = .{
.val_offset = i.offset * cie.data_alignment_factor,
};
},
.val_expression => |i| {
try self.resolveCopyOnWrite(allocator);
const column = try self.getOrAddColumn(allocator, i.register);
column.rule = .{
.val_expression = i.block,
};
},
}
return prev_row;
}
};

View File

@ -1,11 +1,13 @@
const std = @import("std");
const builtin = @import("builtin");
const native_arch = builtin.cpu.arch;
const native_endian = native_arch.endian();
const std = @import("std");
const leb = std.leb;
const OP = std.dwarf.OP;
const abi = std.debug.Dwarf.abi;
const mem = std.mem;
const assert = std.debug.assert;
const native_endian = builtin.cpu.arch.endian();
/// Expressions can be evaluated in different contexts, each requiring its own set of inputs.
/// Callers should specify all the fields relevant to their context. If a field is required
@ -14,7 +16,7 @@ pub const Context = struct {
/// The dwarf format of the section this expression is in
format: std.dwarf.Format = .@"32",
/// If specified, any addresses will pass through before being accessed
memory_accessor: ?*std.debug.StackIterator.MemoryAccessor = null,
memory_accessor: ?*std.debug.MemoryAccessor = null,
/// The compilation unit this expression relates to, if any
compile_unit: ?*const std.debug.Dwarf.CompileUnit = null,
/// When evaluating a user-presented expression, this is the address of the object being evaluated
@ -34,7 +36,7 @@ pub const Options = struct {
/// The address size of the target architecture
addr_size: u8 = @sizeOf(usize),
/// Endianness of the target architecture
endian: std.builtin.Endian = builtin.target.cpu.arch.endian(),
endian: std.builtin.Endian = native_endian,
/// Restrict the stack machine to a subset of opcodes used in call frame instructions
call_frame_context: bool = false,
};
@ -60,7 +62,7 @@ pub const Error = error{
InvalidTypeLength,
TruncatedIntegralType,
} || abi.AbiError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero };
} || abi.RegBytesError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero };
/// A stack machine that can decode and run DWARF expressions.
/// Expressions can be decoded for non-native address size and endianness,
@ -304,7 +306,7 @@ pub fn StackMachine(comptime options: Options) type {
allocator: std.mem.Allocator,
context: Context,
) Error!bool {
if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian())
if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != native_endian)
@compileError("Execution of non-native address sizes / endianness is not supported");
const opcode = try stream.reader().readByte();
@ -1186,13 +1188,13 @@ test "DWARF expressions" {
// TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it
mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian);
(try abi.regValueNative(usize, &thread_context, abi.fpRegNum(reg_context), reg_context)).* = 1;
(try abi.regValueNative(usize, &thread_context, abi.spRegNum(reg_context), reg_context)).* = 2;
(try abi.regValueNative(usize, &thread_context, abi.ipRegNum(), reg_context)).* = 3;
(try abi.regValueNative(&thread_context, abi.fpRegNum(native_arch, reg_context), reg_context)).* = 1;
(try abi.regValueNative(&thread_context, abi.spRegNum(native_arch, reg_context), reg_context)).* = 2;
(try abi.regValueNative(&thread_context, abi.ipRegNum(native_arch).?, reg_context)).* = 3;
try b.writeBreg(writer, abi.fpRegNum(reg_context), @as(usize, 100));
try b.writeBreg(writer, abi.spRegNum(reg_context), @as(usize, 200));
try b.writeBregx(writer, abi.ipRegNum(), @as(usize, 300));
try b.writeBreg(writer, abi.fpRegNum(native_arch, reg_context), @as(usize, 100));
try b.writeBreg(writer, abi.spRegNum(native_arch, reg_context), @as(usize, 200));
try b.writeBregx(writer, abi.ipRegNum(native_arch).?, @as(usize, 300));
try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400));
_ = try stack_machine.run(program.items, allocator, context, 0);

View File

@ -0,0 +1,128 @@
//! Reads memory from any address of the current location using OS-specific
//! syscalls, bypassing memory page protection. Useful for stack unwinding.
const builtin = @import("builtin");
const native_os = builtin.os.tag;
const std = @import("../std.zig");
const posix = std.posix;
const File = std.fs.File;
const page_size = std.mem.page_size;
const MemoryAccessor = @This();
var cached_pid: posix.pid_t = -1;
mem: switch (native_os) {
.linux => File,
else => void,
},
pub const init: MemoryAccessor = .{
.mem = switch (native_os) {
.linux => .{ .handle = -1 },
else => {},
},
};
fn read(ma: *MemoryAccessor, address: usize, buf: []u8) bool {
switch (native_os) {
.linux => while (true) switch (ma.mem.handle) {
-2 => break,
-1 => {
const linux = std.os.linux;
const pid = switch (@atomicLoad(posix.pid_t, &cached_pid, .monotonic)) {
-1 => pid: {
const pid = linux.getpid();
@atomicStore(posix.pid_t, &cached_pid, pid, .monotonic);
break :pid pid;
},
else => |pid| pid,
};
const bytes_read = linux.process_vm_readv(
pid,
&.{.{ .base = buf.ptr, .len = buf.len }},
&.{.{ .base = @ptrFromInt(address), .len = buf.len }},
0,
);
switch (linux.E.init(bytes_read)) {
.SUCCESS => return bytes_read == buf.len,
.FAULT => return false,
.INVAL, .PERM, .SRCH => unreachable, // own pid is always valid
.NOMEM => {},
.NOSYS => {}, // QEMU is known not to implement this syscall.
else => unreachable, // unexpected
}
var path_buf: [
std.fmt.count("/proc/{d}/mem", .{std.math.minInt(posix.pid_t)})
]u8 = undefined;
const path = std.fmt.bufPrint(&path_buf, "/proc/{d}/mem", .{pid}) catch
unreachable;
ma.mem = std.fs.openFileAbsolute(path, .{}) catch {
ma.mem.handle = -2;
break;
};
},
else => return (ma.mem.pread(buf, address) catch return false) == buf.len,
},
else => {},
}
if (!isValidMemory(address)) return false;
@memcpy(buf, @as([*]const u8, @ptrFromInt(address)));
return true;
}
pub fn load(ma: *MemoryAccessor, comptime Type: type, address: usize) ?Type {
var result: Type = undefined;
return if (ma.read(address, std.mem.asBytes(&result))) result else null;
}
pub fn isValidMemory(address: usize) bool {
// We are unable to determine validity of memory for freestanding targets
if (native_os == .freestanding or native_os == .uefi) return true;
const aligned_address = address & ~@as(usize, @intCast((page_size - 1)));
if (aligned_address == 0) return false;
const aligned_memory = @as([*]align(page_size) u8, @ptrFromInt(aligned_address))[0..page_size];
if (native_os == .windows) {
const windows = std.os.windows;
var memory_info: windows.MEMORY_BASIC_INFORMATION = undefined;
// The only error this function can throw is ERROR_INVALID_PARAMETER.
// supply an address that invalid i'll be thrown.
const rc = windows.VirtualQuery(aligned_memory, &memory_info, aligned_memory.len) catch {
return false;
};
// Result code has to be bigger than zero (number of bytes written)
if (rc == 0) {
return false;
}
// Free pages cannot be read, they are unmapped
if (memory_info.State == windows.MEM_FREE) {
return false;
}
return true;
} else if (have_msync) {
posix.msync(aligned_memory, posix.MSF.ASYNC) catch |err| {
switch (err) {
error.UnmappedMemory => return false,
else => unreachable,
}
};
return true;
} else {
// We are unable to determine validity of memory on this target.
return true;
}
}
const have_msync = switch (native_os) {
.wasi, .emscripten, .windows => false,
else => true,
};

591
lib/std/debug/Pdb.zig Normal file
View File

@ -0,0 +1,591 @@
const std = @import("../std.zig");
const File = std.fs.File;
const Allocator = std.mem.Allocator;
const pdb = std.pdb;
const Pdb = @This();
in_file: File,
msf: Msf,
allocator: Allocator,
string_table: ?*MsfStream,
dbi: ?*MsfStream,
modules: []Module,
sect_contribs: []pdb.SectionContribEntry,
guid: [16]u8,
age: u32,
pub const Module = struct {
mod_info: pdb.ModInfo,
module_name: []u8,
obj_file_name: []u8,
// The fields below are filled on demand.
populated: bool,
symbols: []u8,
subsect_info: []u8,
checksum_offset: ?usize,
pub fn deinit(self: *Module, allocator: Allocator) void {
allocator.free(self.module_name);
allocator.free(self.obj_file_name);
if (self.populated) {
allocator.free(self.symbols);
allocator.free(self.subsect_info);
}
}
};
pub fn init(allocator: Allocator, path: []const u8) !Pdb {
const file = try std.fs.cwd().openFile(path, .{});
errdefer file.close();
return .{
.in_file = file,
.allocator = allocator,
.string_table = null,
.dbi = null,
.msf = try Msf.init(allocator, file),
.modules = &[_]Module{},
.sect_contribs = &[_]pdb.SectionContribEntry{},
.guid = undefined,
.age = undefined,
};
}
pub fn deinit(self: *Pdb) void {
self.in_file.close();
self.msf.deinit(self.allocator);
for (self.modules) |*module| {
module.deinit(self.allocator);
}
self.allocator.free(self.modules);
self.allocator.free(self.sect_contribs);
}
pub fn parseDbiStream(self: *Pdb) !void {
var stream = self.getStream(pdb.StreamType.Dbi) orelse
return error.InvalidDebugInfo;
const reader = stream.reader();
const header = try reader.readStruct(std.pdb.DbiStreamHeader);
if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team
return error.UnknownPDBVersion;
// if (header.Age != age)
// return error.UnmatchingPDB;
const mod_info_size = header.ModInfoSize;
const section_contrib_size = header.SectionContributionSize;
var modules = std.ArrayList(Module).init(self.allocator);
errdefer modules.deinit();
// Module Info Substream
var mod_info_offset: usize = 0;
while (mod_info_offset != mod_info_size) {
const mod_info = try reader.readStruct(pdb.ModInfo);
var this_record_len: usize = @sizeOf(pdb.ModInfo);
const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024);
errdefer self.allocator.free(module_name);
this_record_len += module_name.len + 1;
const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024);
errdefer self.allocator.free(obj_file_name);
this_record_len += obj_file_name.len + 1;
if (this_record_len % 4 != 0) {
const round_to_next_4 = (this_record_len | 0x3) + 1;
const march_forward_bytes = round_to_next_4 - this_record_len;
try stream.seekBy(@as(isize, @intCast(march_forward_bytes)));
this_record_len += march_forward_bytes;
}
try modules.append(Module{
.mod_info = mod_info,
.module_name = module_name,
.obj_file_name = obj_file_name,
.populated = false,
.symbols = undefined,
.subsect_info = undefined,
.checksum_offset = null,
});
mod_info_offset += this_record_len;
if (mod_info_offset > mod_info_size)
return error.InvalidDebugInfo;
}
// Section Contribution Substream
var sect_contribs = std.ArrayList(pdb.SectionContribEntry).init(self.allocator);
errdefer sect_contribs.deinit();
var sect_cont_offset: usize = 0;
if (section_contrib_size != 0) {
const version = reader.readEnum(std.pdb.SectionContrSubstreamVersion, .little) catch |err| switch (err) {
error.InvalidValue => return error.InvalidDebugInfo,
else => |e| return e,
};
_ = version;
sect_cont_offset += @sizeOf(u32);
}
while (sect_cont_offset != section_contrib_size) {
const entry = try sect_contribs.addOne();
entry.* = try reader.readStruct(pdb.SectionContribEntry);
sect_cont_offset += @sizeOf(pdb.SectionContribEntry);
if (sect_cont_offset > section_contrib_size)
return error.InvalidDebugInfo;
}
self.modules = try modules.toOwnedSlice();
self.sect_contribs = try sect_contribs.toOwnedSlice();
}
pub fn parseInfoStream(self: *Pdb) !void {
var stream = self.getStream(pdb.StreamType.Pdb) orelse
return error.InvalidDebugInfo;
const reader = stream.reader();
// Parse the InfoStreamHeader.
const version = try reader.readInt(u32, .little);
const signature = try reader.readInt(u32, .little);
_ = signature;
const age = try reader.readInt(u32, .little);
const guid = try reader.readBytesNoEof(16);
if (version != 20000404) // VC70, only value observed by LLVM team
return error.UnknownPDBVersion;
self.guid = guid;
self.age = age;
// Find the string table.
const string_table_index = str_tab_index: {
const name_bytes_len = try reader.readInt(u32, .little);
const name_bytes = try self.allocator.alloc(u8, name_bytes_len);
defer self.allocator.free(name_bytes);
try reader.readNoEof(name_bytes);
const HashTableHeader = extern struct {
Size: u32,
Capacity: u32,
fn maxLoad(cap: u32) u32 {
return cap * 2 / 3 + 1;
}
};
const hash_tbl_hdr = try reader.readStruct(HashTableHeader);
if (hash_tbl_hdr.Capacity == 0)
return error.InvalidDebugInfo;
if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity))
return error.InvalidDebugInfo;
const present = try readSparseBitVector(&reader, self.allocator);
defer self.allocator.free(present);
if (present.len != hash_tbl_hdr.Size)
return error.InvalidDebugInfo;
const deleted = try readSparseBitVector(&reader, self.allocator);
defer self.allocator.free(deleted);
for (present) |_| {
const name_offset = try reader.readInt(u32, .little);
const name_index = try reader.readInt(u32, .little);
if (name_offset > name_bytes.len)
return error.InvalidDebugInfo;
const name = std.mem.sliceTo(name_bytes[name_offset..], 0);
if (std.mem.eql(u8, name, "/names")) {
break :str_tab_index name_index;
}
}
return error.MissingDebugInfo;
};
self.string_table = self.getStreamById(string_table_index) orelse
return error.MissingDebugInfo;
}
pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 {
_ = self;
std.debug.assert(module.populated);
var symbol_i: usize = 0;
while (symbol_i != module.symbols.len) {
const prefix = @as(*align(1) pdb.RecordPrefix, @ptrCast(&module.symbols[symbol_i]));
if (prefix.RecordLen < 2)
return null;
switch (prefix.RecordKind) {
.S_LPROC32, .S_GPROC32 => {
const proc_sym = @as(*align(1) pdb.ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(pdb.RecordPrefix)]));
if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) {
return std.mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0);
}
},
else => {},
}
symbol_i += prefix.RecordLen + @sizeOf(u16);
}
return null;
}
pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !std.debug.SourceLocation {
std.debug.assert(module.populated);
const subsect_info = module.subsect_info;
var sect_offset: usize = 0;
var skip_len: usize = undefined;
const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo;
while (sect_offset != subsect_info.len) : (sect_offset += skip_len) {
const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset]));
skip_len = subsect_hdr.Length;
sect_offset += @sizeOf(pdb.DebugSubsectionHeader);
switch (subsect_hdr.Kind) {
.Lines => {
var line_index = sect_offset;
const line_hdr = @as(*align(1) pdb.LineFragmentHeader, @ptrCast(&subsect_info[line_index]));
if (line_hdr.RelocSegment == 0)
return error.MissingDebugInfo;
line_index += @sizeOf(pdb.LineFragmentHeader);
const frag_vaddr_start = line_hdr.RelocOffset;
const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize;
if (address >= frag_vaddr_start and address < frag_vaddr_end) {
// There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records)
// from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in,
// breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection.
const subsection_end_index = sect_offset + subsect_hdr.Length;
while (line_index < subsection_end_index) {
const block_hdr = @as(*align(1) pdb.LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index]));
line_index += @sizeOf(pdb.LineBlockFragmentHeader);
const start_line_index = line_index;
const has_column = line_hdr.Flags.LF_HaveColumns;
// All line entries are stored inside their line block by ascending start address.
// Heuristic: we want to find the last line entry
// that has a vaddr_start <= address.
// This is done with a simple linear search.
var line_i: u32 = 0;
while (line_i < block_hdr.NumLines) : (line_i += 1) {
const line_num_entry = @as(*align(1) pdb.LineNumberEntry, @ptrCast(&subsect_info[line_index]));
line_index += @sizeOf(pdb.LineNumberEntry);
const vaddr_start = frag_vaddr_start + line_num_entry.Offset;
if (address < vaddr_start) {
break;
}
}
// line_i == 0 would mean that no matching pdb.LineNumberEntry was found.
if (line_i > 0) {
const subsect_index = checksum_offset + block_hdr.NameIndex;
const chksum_hdr = @as(*align(1) pdb.FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index]));
const strtab_offset = @sizeOf(pdb.StringTableHeader) + chksum_hdr.FileNameOffset;
try self.string_table.?.seekTo(strtab_offset);
const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024);
const line_entry_idx = line_i - 1;
const column = if (has_column) blk: {
const start_col_index = start_line_index + @sizeOf(pdb.LineNumberEntry) * block_hdr.NumLines;
const col_index = start_col_index + @sizeOf(pdb.ColumnNumberEntry) * line_entry_idx;
const col_num_entry = @as(*align(1) pdb.ColumnNumberEntry, @ptrCast(&subsect_info[col_index]));
break :blk col_num_entry.StartColumn;
} else 0;
const found_line_index = start_line_index + line_entry_idx * @sizeOf(pdb.LineNumberEntry);
const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[found_line_index]);
const flags: *align(1) pdb.LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags);
return .{
.file_name = source_file_name,
.line = flags.Start,
.column = column,
};
}
}
// Checking that we are not reading garbage after the (possibly) multiple block fragments.
if (line_index != subsection_end_index) {
return error.InvalidDebugInfo;
}
}
},
else => {},
}
if (sect_offset > subsect_info.len)
return error.InvalidDebugInfo;
}
return error.MissingDebugInfo;
}
pub fn getModule(self: *Pdb, index: usize) !?*Module {
if (index >= self.modules.len)
return null;
const mod = &self.modules[index];
if (mod.populated)
return mod;
// At most one can be non-zero.
if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0)
return error.InvalidDebugInfo;
if (mod.mod_info.C13ByteSize == 0)
return error.InvalidDebugInfo;
const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse
return error.MissingDebugInfo;
const reader = stream.reader();
const signature = try reader.readInt(u32, .little);
if (signature != 4)
return error.InvalidDebugInfo;
mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4);
errdefer self.allocator.free(mod.symbols);
try reader.readNoEof(mod.symbols);
mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize);
errdefer self.allocator.free(mod.subsect_info);
try reader.readNoEof(mod.subsect_info);
var sect_offset: usize = 0;
var skip_len: usize = undefined;
while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) {
const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset]));
skip_len = subsect_hdr.Length;
sect_offset += @sizeOf(pdb.DebugSubsectionHeader);
switch (subsect_hdr.Kind) {
.FileChecksums => {
mod.checksum_offset = sect_offset;
break;
},
else => {},
}
if (sect_offset > mod.subsect_info.len)
return error.InvalidDebugInfo;
}
mod.populated = true;
return mod;
}
pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream {
if (id >= self.msf.streams.len)
return null;
return &self.msf.streams[id];
}
pub fn getStream(self: *Pdb, stream: pdb.StreamType) ?*MsfStream {
const id = @intFromEnum(stream);
return self.getStreamById(id);
}
/// https://llvm.org/docs/PDB/MsfFile.html
const Msf = struct {
directory: MsfStream,
streams: []MsfStream,
fn init(allocator: Allocator, file: File) !Msf {
const in = file.reader();
const superblock = try in.readStruct(pdb.SuperBlock);
// Sanity checks
if (!std.mem.eql(u8, &superblock.FileMagic, pdb.SuperBlock.file_magic))
return error.InvalidDebugInfo;
if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2)
return error.InvalidDebugInfo;
const file_len = try file.getEndPos();
if (superblock.NumBlocks * superblock.BlockSize != file_len)
return error.InvalidDebugInfo;
switch (superblock.BlockSize) {
// llvm only supports 4096 but we can handle any of these values
512, 1024, 2048, 4096 => {},
else => return error.InvalidDebugInfo,
}
const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize);
if (dir_block_count > superblock.BlockSize / @sizeOf(u32))
return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment.
try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr);
const dir_blocks = try allocator.alloc(u32, dir_block_count);
for (dir_blocks) |*b| {
b.* = try in.readInt(u32, .little);
}
var directory = MsfStream.init(
superblock.BlockSize,
file,
dir_blocks,
);
const begin = directory.pos;
const stream_count = try directory.reader().readInt(u32, .little);
const stream_sizes = try allocator.alloc(u32, stream_count);
defer allocator.free(stream_sizes);
// Microsoft's implementation uses @as(u32, -1) for inexistent streams.
// These streams are not used, but still participate in the file
// and must be taken into account when resolving stream indices.
const Nil = 0xFFFFFFFF;
for (stream_sizes) |*s| {
const size = try directory.reader().readInt(u32, .little);
s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize);
}
const streams = try allocator.alloc(MsfStream, stream_count);
for (streams, 0..) |*stream, i| {
const size = stream_sizes[i];
if (size == 0) {
stream.* = MsfStream{
.blocks = &[_]u32{},
};
} else {
var blocks = try allocator.alloc(u32, size);
var j: u32 = 0;
while (j < size) : (j += 1) {
const block_id = try directory.reader().readInt(u32, .little);
const n = (block_id % superblock.BlockSize);
// 0 is for pdb.SuperBlock, 1 and 2 for FPMs.
if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len)
return error.InvalidBlockIndex;
blocks[j] = block_id;
}
stream.* = MsfStream.init(
superblock.BlockSize,
file,
blocks,
);
}
}
const end = directory.pos;
if (end - begin != superblock.NumDirectoryBytes)
return error.InvalidStreamDirectory;
return Msf{
.directory = directory,
.streams = streams,
};
}
fn deinit(self: *Msf, allocator: Allocator) void {
allocator.free(self.directory.blocks);
for (self.streams) |*stream| {
allocator.free(stream.blocks);
}
allocator.free(self.streams);
}
};
const MsfStream = struct {
in_file: File = undefined,
pos: u64 = undefined,
blocks: []u32 = undefined,
block_size: u32 = undefined,
pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set;
fn init(block_size: u32, file: File, blocks: []u32) MsfStream {
const stream = MsfStream{
.in_file = file,
.pos = 0,
.blocks = blocks,
.block_size = block_size,
};
return stream;
}
fn read(self: *MsfStream, buffer: []u8) !usize {
var block_id = @as(usize, @intCast(self.pos / self.block_size));
if (block_id >= self.blocks.len) return 0; // End of Stream
var block = self.blocks[block_id];
var offset = self.pos % self.block_size;
try self.in_file.seekTo(block * self.block_size + offset);
const in = self.in_file.reader();
var size: usize = 0;
var rem_buffer = buffer;
while (size < buffer.len) {
const size_to_read = @min(self.block_size - offset, rem_buffer.len);
size += try in.read(rem_buffer[0..size_to_read]);
rem_buffer = buffer[size..];
offset += size_to_read;
// If we're at the end of a block, go to the next one.
if (offset == self.block_size) {
offset = 0;
block_id += 1;
if (block_id >= self.blocks.len) break; // End of Stream
block = self.blocks[block_id];
try self.in_file.seekTo(block * self.block_size);
}
}
self.pos += buffer.len;
return buffer.len;
}
pub fn seekBy(self: *MsfStream, len: i64) !void {
self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len));
if (self.pos >= self.blocks.len * self.block_size)
return error.EOF;
}
pub fn seekTo(self: *MsfStream, len: u64) !void {
self.pos = len;
if (self.pos >= self.blocks.len * self.block_size)
return error.EOF;
}
fn getSize(self: *const MsfStream) u64 {
return self.blocks.len * self.block_size;
}
fn getFilePos(self: MsfStream) u64 {
const block_id = self.pos / self.block_size;
const block = self.blocks[block_id];
const offset = self.pos % self.block_size;
return block * self.block_size + offset;
}
pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) {
return .{ .context = self };
}
};
fn readSparseBitVector(stream: anytype, allocator: Allocator) ![]u32 {
const num_words = try stream.readInt(u32, .little);
var list = std.ArrayList(u32).init(allocator);
errdefer list.deinit();
var word_i: u32 = 0;
while (word_i != num_words) : (word_i += 1) {
const word = try stream.readInt(u32, .little);
var bit_i: u5 = 0;
while (true) : (bit_i += 1) {
if (word & (@as(u32, 1) << bit_i) != 0) {
try list.append(word_i * 32 + bit_i);
}
if (bit_i == std.math.maxInt(u5)) break;
}
}
return try list.toOwnedSlice();
}
fn blockCountFromSize(size: u32, block_size: u32) u32 {
return (size + block_size - 1) / block_size;
}

2438
lib/std/debug/SelfInfo.zig Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,12 @@
//! Program Data Base debugging information format.
//!
//! This namespace contains unopinionated types and data definitions only. For
//! an implementation of parsing and caching PDB information, see
//! `std.debug.Pdb`.
//!
//! Most of this is based on information gathered from LLVM source code,
//! documentation and/or contributors.
const std = @import("std.zig");
const io = std.io;
const math = std.math;
@ -9,10 +18,7 @@ const debug = std.debug;
const ArrayList = std.ArrayList;
// Note: most of this is based on information gathered from LLVM source code,
// documentation and/or contributors.
// https://llvm.org/docs/PDB/DbiStream.html#stream-header
/// https://llvm.org/docs/PDB/DbiStream.html#stream-header
pub const DbiStreamHeader = extern struct {
VersionSignature: i32,
VersionHeader: u32,
@ -415,10 +421,8 @@ pub const ColumnNumberEntry = extern struct {
pub const FileChecksumEntryHeader = extern struct {
/// Byte offset of filename in global string table.
FileNameOffset: u32,
/// Number of bytes of checksum.
ChecksumSize: u8,
/// FileChecksumKind
ChecksumKind: u8,
};
@ -451,525 +455,15 @@ pub const DebugSubsectionHeader = extern struct {
Length: u32,
};
pub const PDBStringTableHeader = extern struct {
pub const StringTableHeader = extern struct {
/// PDBStringTableSignature
Signature: u32,
/// 1 or 2
HashVersion: u32,
/// Number of bytes of names buffer.
ByteSize: u32,
};
fn readSparseBitVector(stream: anytype, allocator: mem.Allocator) ![]u32 {
const num_words = try stream.readInt(u32, .little);
var list = ArrayList(u32).init(allocator);
errdefer list.deinit();
var word_i: u32 = 0;
while (word_i != num_words) : (word_i += 1) {
const word = try stream.readInt(u32, .little);
var bit_i: u5 = 0;
while (true) : (bit_i += 1) {
if (word & (@as(u32, 1) << bit_i) != 0) {
try list.append(word_i * 32 + bit_i);
}
if (bit_i == std.math.maxInt(u5)) break;
}
}
return try list.toOwnedSlice();
}
pub const Pdb = struct {
in_file: File,
msf: Msf,
allocator: mem.Allocator,
string_table: ?*MsfStream,
dbi: ?*MsfStream,
modules: []Module,
sect_contribs: []SectionContribEntry,
guid: [16]u8,
age: u32,
pub const Module = struct {
mod_info: ModInfo,
module_name: []u8,
obj_file_name: []u8,
// The fields below are filled on demand.
populated: bool,
symbols: []u8,
subsect_info: []u8,
checksum_offset: ?usize,
pub fn deinit(self: *Module, allocator: mem.Allocator) void {
allocator.free(self.module_name);
allocator.free(self.obj_file_name);
if (self.populated) {
allocator.free(self.symbols);
allocator.free(self.subsect_info);
}
}
};
pub fn init(allocator: mem.Allocator, path: []const u8) !Pdb {
const file = try fs.cwd().openFile(path, .{});
errdefer file.close();
return Pdb{
.in_file = file,
.allocator = allocator,
.string_table = null,
.dbi = null,
.msf = try Msf.init(allocator, file),
.modules = &[_]Module{},
.sect_contribs = &[_]SectionContribEntry{},
.guid = undefined,
.age = undefined,
};
}
pub fn deinit(self: *Pdb) void {
self.in_file.close();
self.msf.deinit(self.allocator);
for (self.modules) |*module| {
module.deinit(self.allocator);
}
self.allocator.free(self.modules);
self.allocator.free(self.sect_contribs);
}
pub fn parseDbiStream(self: *Pdb) !void {
var stream = self.getStream(StreamType.Dbi) orelse
return error.InvalidDebugInfo;
const reader = stream.reader();
const header = try reader.readStruct(DbiStreamHeader);
if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team
return error.UnknownPDBVersion;
// if (header.Age != age)
// return error.UnmatchingPDB;
const mod_info_size = header.ModInfoSize;
const section_contrib_size = header.SectionContributionSize;
var modules = ArrayList(Module).init(self.allocator);
errdefer modules.deinit();
// Module Info Substream
var mod_info_offset: usize = 0;
while (mod_info_offset != mod_info_size) {
const mod_info = try reader.readStruct(ModInfo);
var this_record_len: usize = @sizeOf(ModInfo);
const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024);
errdefer self.allocator.free(module_name);
this_record_len += module_name.len + 1;
const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024);
errdefer self.allocator.free(obj_file_name);
this_record_len += obj_file_name.len + 1;
if (this_record_len % 4 != 0) {
const round_to_next_4 = (this_record_len | 0x3) + 1;
const march_forward_bytes = round_to_next_4 - this_record_len;
try stream.seekBy(@as(isize, @intCast(march_forward_bytes)));
this_record_len += march_forward_bytes;
}
try modules.append(Module{
.mod_info = mod_info,
.module_name = module_name,
.obj_file_name = obj_file_name,
.populated = false,
.symbols = undefined,
.subsect_info = undefined,
.checksum_offset = null,
});
mod_info_offset += this_record_len;
if (mod_info_offset > mod_info_size)
return error.InvalidDebugInfo;
}
// Section Contribution Substream
var sect_contribs = ArrayList(SectionContribEntry).init(self.allocator);
errdefer sect_contribs.deinit();
var sect_cont_offset: usize = 0;
if (section_contrib_size != 0) {
const version = reader.readEnum(SectionContrSubstreamVersion, .little) catch |err| switch (err) {
error.InvalidValue => return error.InvalidDebugInfo,
else => |e| return e,
};
_ = version;
sect_cont_offset += @sizeOf(u32);
}
while (sect_cont_offset != section_contrib_size) {
const entry = try sect_contribs.addOne();
entry.* = try reader.readStruct(SectionContribEntry);
sect_cont_offset += @sizeOf(SectionContribEntry);
if (sect_cont_offset > section_contrib_size)
return error.InvalidDebugInfo;
}
self.modules = try modules.toOwnedSlice();
self.sect_contribs = try sect_contribs.toOwnedSlice();
}
pub fn parseInfoStream(self: *Pdb) !void {
var stream = self.getStream(StreamType.Pdb) orelse
return error.InvalidDebugInfo;
const reader = stream.reader();
// Parse the InfoStreamHeader.
const version = try reader.readInt(u32, .little);
const signature = try reader.readInt(u32, .little);
_ = signature;
const age = try reader.readInt(u32, .little);
const guid = try reader.readBytesNoEof(16);
if (version != 20000404) // VC70, only value observed by LLVM team
return error.UnknownPDBVersion;
self.guid = guid;
self.age = age;
// Find the string table.
const string_table_index = str_tab_index: {
const name_bytes_len = try reader.readInt(u32, .little);
const name_bytes = try self.allocator.alloc(u8, name_bytes_len);
defer self.allocator.free(name_bytes);
try reader.readNoEof(name_bytes);
const HashTableHeader = extern struct {
Size: u32,
Capacity: u32,
fn maxLoad(cap: u32) u32 {
return cap * 2 / 3 + 1;
}
};
const hash_tbl_hdr = try reader.readStruct(HashTableHeader);
if (hash_tbl_hdr.Capacity == 0)
return error.InvalidDebugInfo;
if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity))
return error.InvalidDebugInfo;
const present = try readSparseBitVector(&reader, self.allocator);
defer self.allocator.free(present);
if (present.len != hash_tbl_hdr.Size)
return error.InvalidDebugInfo;
const deleted = try readSparseBitVector(&reader, self.allocator);
defer self.allocator.free(deleted);
for (present) |_| {
const name_offset = try reader.readInt(u32, .little);
const name_index = try reader.readInt(u32, .little);
if (name_offset > name_bytes.len)
return error.InvalidDebugInfo;
const name = mem.sliceTo(name_bytes[name_offset..], 0);
if (mem.eql(u8, name, "/names")) {
break :str_tab_index name_index;
}
}
return error.MissingDebugInfo;
};
self.string_table = self.getStreamById(string_table_index) orelse
return error.MissingDebugInfo;
}
pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 {
_ = self;
std.debug.assert(module.populated);
var symbol_i: usize = 0;
while (symbol_i != module.symbols.len) {
const prefix = @as(*align(1) RecordPrefix, @ptrCast(&module.symbols[symbol_i]));
if (prefix.RecordLen < 2)
return null;
switch (prefix.RecordKind) {
.S_LPROC32, .S_GPROC32 => {
const proc_sym = @as(*align(1) ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(RecordPrefix)]));
if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) {
return mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0);
}
},
else => {},
}
symbol_i += prefix.RecordLen + @sizeOf(u16);
}
return null;
}
pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !debug.LineInfo {
std.debug.assert(module.populated);
const subsect_info = module.subsect_info;
var sect_offset: usize = 0;
var skip_len: usize = undefined;
const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo;
while (sect_offset != subsect_info.len) : (sect_offset += skip_len) {
const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset]));
skip_len = subsect_hdr.Length;
sect_offset += @sizeOf(DebugSubsectionHeader);
switch (subsect_hdr.Kind) {
.Lines => {
var line_index = sect_offset;
const line_hdr = @as(*align(1) LineFragmentHeader, @ptrCast(&subsect_info[line_index]));
if (line_hdr.RelocSegment == 0)
return error.MissingDebugInfo;
line_index += @sizeOf(LineFragmentHeader);
const frag_vaddr_start = line_hdr.RelocOffset;
const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize;
if (address >= frag_vaddr_start and address < frag_vaddr_end) {
// There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records)
// from now on. We will iterate through them, and eventually find a LineInfo that we're interested in,
// breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection.
const subsection_end_index = sect_offset + subsect_hdr.Length;
while (line_index < subsection_end_index) {
const block_hdr = @as(*align(1) LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index]));
line_index += @sizeOf(LineBlockFragmentHeader);
const start_line_index = line_index;
const has_column = line_hdr.Flags.LF_HaveColumns;
// All line entries are stored inside their line block by ascending start address.
// Heuristic: we want to find the last line entry
// that has a vaddr_start <= address.
// This is done with a simple linear search.
var line_i: u32 = 0;
while (line_i < block_hdr.NumLines) : (line_i += 1) {
const line_num_entry = @as(*align(1) LineNumberEntry, @ptrCast(&subsect_info[line_index]));
line_index += @sizeOf(LineNumberEntry);
const vaddr_start = frag_vaddr_start + line_num_entry.Offset;
if (address < vaddr_start) {
break;
}
}
// line_i == 0 would mean that no matching LineNumberEntry was found.
if (line_i > 0) {
const subsect_index = checksum_offset + block_hdr.NameIndex;
const chksum_hdr = @as(*align(1) FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index]));
const strtab_offset = @sizeOf(PDBStringTableHeader) + chksum_hdr.FileNameOffset;
try self.string_table.?.seekTo(strtab_offset);
const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024);
const line_entry_idx = line_i - 1;
const column = if (has_column) blk: {
const start_col_index = start_line_index + @sizeOf(LineNumberEntry) * block_hdr.NumLines;
const col_index = start_col_index + @sizeOf(ColumnNumberEntry) * line_entry_idx;
const col_num_entry = @as(*align(1) ColumnNumberEntry, @ptrCast(&subsect_info[col_index]));
break :blk col_num_entry.StartColumn;
} else 0;
const found_line_index = start_line_index + line_entry_idx * @sizeOf(LineNumberEntry);
const line_num_entry: *align(1) LineNumberEntry = @ptrCast(&subsect_info[found_line_index]);
const flags: *align(1) LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags);
return debug.LineInfo{
.file_name = source_file_name,
.line = flags.Start,
.column = column,
};
}
}
// Checking that we are not reading garbage after the (possibly) multiple block fragments.
if (line_index != subsection_end_index) {
return error.InvalidDebugInfo;
}
}
},
else => {},
}
if (sect_offset > subsect_info.len)
return error.InvalidDebugInfo;
}
return error.MissingDebugInfo;
}
pub fn getModule(self: *Pdb, index: usize) !?*Module {
if (index >= self.modules.len)
return null;
const mod = &self.modules[index];
if (mod.populated)
return mod;
// At most one can be non-zero.
if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0)
return error.InvalidDebugInfo;
if (mod.mod_info.C13ByteSize == 0)
return error.InvalidDebugInfo;
const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse
return error.MissingDebugInfo;
const reader = stream.reader();
const signature = try reader.readInt(u32, .little);
if (signature != 4)
return error.InvalidDebugInfo;
mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4);
errdefer self.allocator.free(mod.symbols);
try reader.readNoEof(mod.symbols);
mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize);
errdefer self.allocator.free(mod.subsect_info);
try reader.readNoEof(mod.subsect_info);
var sect_offset: usize = 0;
var skip_len: usize = undefined;
while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) {
const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset]));
skip_len = subsect_hdr.Length;
sect_offset += @sizeOf(DebugSubsectionHeader);
switch (subsect_hdr.Kind) {
.FileChecksums => {
mod.checksum_offset = sect_offset;
break;
},
else => {},
}
if (sect_offset > mod.subsect_info.len)
return error.InvalidDebugInfo;
}
mod.populated = true;
return mod;
}
pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream {
if (id >= self.msf.streams.len)
return null;
return &self.msf.streams[id];
}
pub fn getStream(self: *Pdb, stream: StreamType) ?*MsfStream {
const id = @intFromEnum(stream);
return self.getStreamById(id);
}
};
// see https://llvm.org/docs/PDB/MsfFile.html
const Msf = struct {
directory: MsfStream,
streams: []MsfStream,
fn init(allocator: mem.Allocator, file: File) !Msf {
const in = file.reader();
const superblock = try in.readStruct(SuperBlock);
// Sanity checks
if (!mem.eql(u8, &superblock.FileMagic, SuperBlock.file_magic))
return error.InvalidDebugInfo;
if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2)
return error.InvalidDebugInfo;
const file_len = try file.getEndPos();
if (superblock.NumBlocks * superblock.BlockSize != file_len)
return error.InvalidDebugInfo;
switch (superblock.BlockSize) {
// llvm only supports 4096 but we can handle any of these values
512, 1024, 2048, 4096 => {},
else => return error.InvalidDebugInfo,
}
const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize);
if (dir_block_count > superblock.BlockSize / @sizeOf(u32))
return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment.
try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr);
const dir_blocks = try allocator.alloc(u32, dir_block_count);
for (dir_blocks) |*b| {
b.* = try in.readInt(u32, .little);
}
var directory = MsfStream.init(
superblock.BlockSize,
file,
dir_blocks,
);
const begin = directory.pos;
const stream_count = try directory.reader().readInt(u32, .little);
const stream_sizes = try allocator.alloc(u32, stream_count);
defer allocator.free(stream_sizes);
// Microsoft's implementation uses @as(u32, -1) for inexistent streams.
// These streams are not used, but still participate in the file
// and must be taken into account when resolving stream indices.
const Nil = 0xFFFFFFFF;
for (stream_sizes) |*s| {
const size = try directory.reader().readInt(u32, .little);
s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize);
}
const streams = try allocator.alloc(MsfStream, stream_count);
for (streams, 0..) |*stream, i| {
const size = stream_sizes[i];
if (size == 0) {
stream.* = MsfStream{
.blocks = &[_]u32{},
};
} else {
var blocks = try allocator.alloc(u32, size);
var j: u32 = 0;
while (j < size) : (j += 1) {
const block_id = try directory.reader().readInt(u32, .little);
const n = (block_id % superblock.BlockSize);
// 0 is for SuperBlock, 1 and 2 for FPMs.
if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len)
return error.InvalidBlockIndex;
blocks[j] = block_id;
}
stream.* = MsfStream.init(
superblock.BlockSize,
file,
blocks,
);
}
}
const end = directory.pos;
if (end - begin != superblock.NumDirectoryBytes)
return error.InvalidStreamDirectory;
return Msf{
.directory = directory,
.streams = streams,
};
}
fn deinit(self: *Msf, allocator: mem.Allocator) void {
allocator.free(self.directory.blocks);
for (self.streams) |*stream| {
allocator.free(stream.blocks);
}
allocator.free(self.streams);
}
};
fn blockCountFromSize(size: u32, block_size: u32) u32 {
return (size + block_size - 1) / block_size;
}
// https://llvm.org/docs/PDB/MsfFile.html#the-superblock
pub const SuperBlock = extern struct {
/// The LLVM docs list a space between C / C++ but empirically this is not the case.
@ -1016,82 +510,3 @@ pub const SuperBlock = extern struct {
// implement it so we're kind of safe making this assumption for now.
BlockMapAddr: u32,
};
const MsfStream = struct {
in_file: File = undefined,
pos: u64 = undefined,
blocks: []u32 = undefined,
block_size: u32 = undefined,
pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set;
fn init(block_size: u32, file: File, blocks: []u32) MsfStream {
const stream = MsfStream{
.in_file = file,
.pos = 0,
.blocks = blocks,
.block_size = block_size,
};
return stream;
}
fn read(self: *MsfStream, buffer: []u8) !usize {
var block_id = @as(usize, @intCast(self.pos / self.block_size));
if (block_id >= self.blocks.len) return 0; // End of Stream
var block = self.blocks[block_id];
var offset = self.pos % self.block_size;
try self.in_file.seekTo(block * self.block_size + offset);
const in = self.in_file.reader();
var size: usize = 0;
var rem_buffer = buffer;
while (size < buffer.len) {
const size_to_read = @min(self.block_size - offset, rem_buffer.len);
size += try in.read(rem_buffer[0..size_to_read]);
rem_buffer = buffer[size..];
offset += size_to_read;
// If we're at the end of a block, go to the next one.
if (offset == self.block_size) {
offset = 0;
block_id += 1;
if (block_id >= self.blocks.len) break; // End of Stream
block = self.blocks[block_id];
try self.in_file.seekTo(block * self.block_size);
}
}
self.pos += buffer.len;
return buffer.len;
}
pub fn seekBy(self: *MsfStream, len: i64) !void {
self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len));
if (self.pos >= self.blocks.len * self.block_size)
return error.EOF;
}
pub fn seekTo(self: *MsfStream, len: u64) !void {
self.pos = len;
if (self.pos >= self.blocks.len * self.block_size)
return error.EOF;
}
fn getSize(self: *const MsfStream) u64 {
return self.blocks.len * self.block_size;
}
fn getFilePos(self: MsfStream) u64 {
const block_id = self.pos / self.block_size;
const block = self.blocks[block_id];
const offset = self.pos % self.block_size;
return block * self.block_size + offset;
}
pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) {
return .{ .context = self };
}
};

View File

@ -256,7 +256,7 @@ const StackContext = union(enum) {
current: struct {
ret_addr: ?usize,
},
exception: *const debug.ThreadContext,
exception: *debug.ThreadContext,
not_supported: void,
pub fn dumpStackTrace(ctx: @This()) void {

View File

@ -9,7 +9,7 @@ pub fn main() !void {
defer assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();
var debug_info = try std.debug.openSelfDebugInfo(allocator);
var debug_info = try std.debug.SelfInfo.open(allocator);
defer debug_info.deinit();
var add_addr: usize = undefined;