From 2ec9a11646c792a046b4601e0b99f8e182416a6c Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Sat, 21 Jul 2018 20:30:11 +0200 Subject: [PATCH] Very much WIP base implementation for #721. Currently does: - read COFF executable file - locate and load corresponding .pdb file - expose .pdb content as streams (PDB format) --- CMakeLists.txt | 2 + std/coff.zig | 238 +++++++++++++++++++++++++++++++++++ std/debug/index.zig | 47 ++++++- std/index.zig | 4 + std/os/index.zig | 8 +- std/os/windows/index.zig | 2 + std/pdb.zig | 265 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 563 insertions(+), 3 deletions(-) create mode 100644 std/coff.zig create mode 100644 std/pdb.zig diff --git a/CMakeLists.txt b/CMakeLists.txt index dd4770ad72..4ddf0bd66e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -427,6 +427,7 @@ set(ZIG_STD_FILES "c/index.zig" "c/linux.zig" "c/windows.zig" + "coff.zig" "crypto/blake2.zig" "crypto/hmac.zig" "crypto/index.zig" @@ -544,6 +545,7 @@ set(ZIG_STD_FILES "os/windows/index.zig" "os/windows/util.zig" "os/zen.zig" + "pdb.zig" "rand/index.zig" "rand/ziggurat.zig" "segmented_list.zig" diff --git a/std/coff.zig b/std/coff.zig new file mode 100644 index 0000000000..475b4fcbc1 --- /dev/null +++ b/std/coff.zig @@ -0,0 +1,238 @@ +const builtin = @import("builtin"); +const std = @import("index.zig"); +const io = std.io; +const mem = std.mem; +const os = std.os; + +const ArrayList = std.ArrayList; + +// CoffHeader.machine values +// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms680313(v=vs.85).aspx +const IMAGE_FILE_MACHINE_I386 = 0x014c; +const IMAGE_FILE_MACHINE_IA64 = 0x0200; +const IMAGE_FILE_MACHINE_AMD64 = 0x8664; + +// OptionalHeader.magic values +// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx +const IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b; +const IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b; + +const IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16; +const DEBUG_DIRECTORY = 6; + +pub const CoffError = error { + InvalidPEMagic, + InvalidPEHeader, + InvalidMachine, + MissingCoffSection, +}; + +pub const Coff = struct { + in_file: os.File, + allocator: *mem.Allocator, + + coff_header: CoffHeader, + pe_header: OptionalHeader, + sections: ArrayList(Section), + + guid: [16]u8, + age: u32, + + pub fn loadHeader(self: *Coff) !void { + const pe_pointer_offset = 0x3C; + + var file_stream = io.FileInStream.init(&self.in_file); + const in = &file_stream.stream; + + var magic: [2]u8 = undefined; + try in.readNoEof(magic[0..]); + if (!mem.eql(u8, magic, "MZ")) + return error.InvalidPEMagic; + + // Seek to PE File Header (coff header) + try self.in_file.seekTo(pe_pointer_offset); + const pe_magic_offset = try in.readIntLe(u32); + try self.in_file.seekTo(pe_magic_offset); + + var pe_header_magic: [4]u8 = undefined; + try in.readNoEof(pe_header_magic[0..]); + if (!mem.eql(u8, pe_header_magic, []u8{'P', 'E', 0, 0})) + return error.InvalidPEHeader; + + self.coff_header = CoffHeader { + .machine = try in.readIntLe(u16), + .number_of_sections = try in.readIntLe(u16), + .timedate_stamp = try in.readIntLe(u32), + .pointer_to_symbol_table = try in.readIntLe(u32), + .number_of_symbols = try in.readIntLe(u32), + .size_of_optional_header = try in.readIntLe(u16), + .characteristics = try in.readIntLe(u16), + }; + + switch (self.coff_header.machine) { + IMAGE_FILE_MACHINE_I386, + IMAGE_FILE_MACHINE_AMD64, + IMAGE_FILE_MACHINE_IA64 + => {}, + else => return error.InvalidMachine, + } + + try self.loadOptionalHeader(&file_stream); + } + + fn loadOptionalHeader(self: *Coff, file_stream: *io.FileInStream) !void { + const in = &file_stream.stream; + self.pe_header.magic = try in.readIntLe(u16); + std.debug.warn("reading pe optional\n"); + // For now we're only interested in finding the reference to the .pdb, + // so we'll skip most of this header, which size is different in 32 + // 64 bits by the way. + var skip_size: u16 = undefined; + if (self.pe_header.magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) { + skip_size = 2 * @sizeOf(u8) + 8 * @sizeOf(u16) + 18 * @sizeOf(u32); + } + else if (self.pe_header.magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) { + skip_size = 2 * @sizeOf(u8) + 8 * @sizeOf(u16) + 12 * @sizeOf(u32) + 5 * @sizeOf(u64); + } + else + return error.InvalidPEMagic; + + std.debug.warn("skipping {}\n", skip_size); + try self.in_file.seekForward(skip_size); + + const number_of_rva_and_sizes = try in.readIntLe(u32); + //std.debug.warn("indicating {} data dirs\n", number_of_rva_and_sizes); + if (number_of_rva_and_sizes != IMAGE_NUMBEROF_DIRECTORY_ENTRIES) + return error.InvalidPEHeader; + + for (self.pe_header.data_directory) |*data_dir| { + data_dir.* = OptionalHeader.DataDirectory { + .virtual_address = try in.readIntLe(u32), + .size = try in.readIntLe(u32), + }; + //std.debug.warn("data_dir @ {x}, size {}\n", data_dir.virtual_address, data_dir.size); + } + std.debug.warn("loaded data directories\n"); + } + + pub fn getPdbPath(self: *Coff, buffer: []u8) !usize { + try self.loadSections(); + const header = (self.getSection(".rdata") orelse return error.MissingCoffSection).header; + + // The linker puts a chunk that contains the .pdb path right after the + // debug_directory. + const debug_dir = &self.pe_header.data_directory[DEBUG_DIRECTORY]; + const file_offset = debug_dir.virtual_address - header.virtual_address + header.pointer_to_raw_data; + std.debug.warn("file offset {x}\n", file_offset); + try self.in_file.seekTo(file_offset + debug_dir.size); + + var file_stream = io.FileInStream.init(&self.in_file); + const in = &file_stream.stream; + + var cv_signature: [4]u8 = undefined; // CodeView signature + try in.readNoEof(cv_signature[0..]); + // 'RSDS' indicates PDB70 format, used by lld. + if (!mem.eql(u8, cv_signature, "RSDS")) + return error.InvalidPEMagic; + std.debug.warn("cv_signature {}\n", cv_signature); + try in.readNoEof(self.guid[0..]); + self.age = try in.readIntLe(u32); + + // Finally read the null-terminated string. + var byte = try in.readByte(); + var i: usize = 0; + while (byte != 0 and i < buffer.len) : (i += 1) { + buffer[i] = byte; + byte = try in.readByte(); + } + + if (byte != 0 and i == buffer.len) + return error.NameTooLong; + + return i; + } + + pub fn loadSections(self: *Coff) !void { + if (self.sections.len != 0) + return; + + self.sections = ArrayList(Section).init(self.allocator); + + var file_stream = io.FileInStream.init(&self.in_file); + const in = &file_stream.stream; + + var name: [8]u8 = undefined; + + var i: u16 = 0; + while (i < self.coff_header.number_of_sections) : (i += 1) { + try in.readNoEof(name[0..]); + try self.sections.append(Section { + .header = SectionHeader { + .name = name, + .misc = SectionHeader.Misc { .physical_address = try in.readIntLe(u32) }, + .virtual_address = try in.readIntLe(u32), + .size_of_raw_data = try in.readIntLe(u32), + .pointer_to_raw_data = try in.readIntLe(u32), + .pointer_to_relocations = try in.readIntLe(u32), + .pointer_to_line_numbers = try in.readIntLe(u32), + .number_of_relocations = try in.readIntLe(u16), + .number_of_line_numbers = try in.readIntLe(u16), + .characteristics = try in.readIntLe(u32), + }, + }); + } + std.debug.warn("loaded {} sections\n", self.coff_header.number_of_sections); + } + + pub fn getSection(self: *Coff, comptime name: []const u8) ?*Section { + for (self.sections.toSlice()) |*sec| { + if (mem.eql(u8, sec.header.name[0..name.len], name)) { + return sec; + } + } + return null; + } + +}; + +const CoffHeader = struct { + machine: u16, + number_of_sections: u16, + timedate_stamp: u32, + pointer_to_symbol_table: u32, + number_of_symbols: u32, + size_of_optional_header: u16, + characteristics: u16 +}; + +const OptionalHeader = struct { + const DataDirectory = struct { + virtual_address: u32, + size: u32 + }; + + magic: u16, + data_directory: [IMAGE_NUMBEROF_DIRECTORY_ENTRIES]DataDirectory, +}; + +const Section = struct { + header: SectionHeader, +}; + +const SectionHeader = struct { + const Misc = union { + physical_address: u32, + virtual_size: u32 + }; + + name: [8]u8, + misc: Misc, + virtual_address: u32, + size_of_raw_data: u32, + pointer_to_raw_data: u32, + pointer_to_relocations: u32, + pointer_to_line_numbers: u32, + number_of_relocations: u16, + number_of_line_numbers: u16, + characteristics: u32, +}; \ No newline at end of file diff --git a/std/debug/index.zig b/std/debug/index.zig index 25f7a58b25..5d00b5a873 100644 --- a/std/debug/index.zig +++ b/std/debug/index.zig @@ -6,6 +6,9 @@ const os = std.os; const elf = std.elf; const DW = std.dwarf; const macho = std.macho; +const coff = std.coff; +const pdb = std.pdb; +const windows = os.windows; const ArrayList = std.ArrayList; const builtin = @import("builtin"); @@ -197,7 +200,13 @@ fn printSourceAtAddress(debug_info: *ElfStackTrace, out_stream: var, address: us const ptr_hex = "0x{x}"; switch (builtin.os) { - builtin.Os.windows => return error.UnsupportedDebugInfo, + builtin.Os.windows => { + const base_address = @ptrToInt(windows.GetModuleHandleA(null)); // returned HMODULE points to our executable file in memory + const relative_address = address - base_address; + std.debug.warn("{x} - {x} => {x}\n", address, base_address, relative_address); + try debug_info.pdb.getSourceLine(relative_address); + return error.UnsupportedDebugInfo; + }, builtin.Os.macosx => { // TODO(bnoordhuis) It's theoretically possible to obtain the // compilation unit from the symbtab but it's not that useful @@ -288,7 +297,38 @@ pub fn openSelfDebugInfo(allocator: *mem.Allocator) !*ElfStackTrace { return st; }, builtin.ObjectFormat.coff => { - return error.TodoSupportCoffDebugInfo; + var coff_file: coff.Coff = undefined; + coff_file.in_file = try os.openSelfExe(); + coff_file.allocator = allocator; + defer coff_file.in_file.close(); + + try coff_file.loadHeader(); + + var path: [windows.MAX_PATH]u8 = undefined; + const len = try coff_file.getPdbPath(path[0..]); + std.debug.warn("pdb path {}\n", path[0..len]); + + const st = try allocator.create(ElfStackTrace); + errdefer allocator.destroy(st); + st.* = ElfStackTrace { + .pdb = undefined, + }; + + try st.pdb.openFile(allocator, path[0..len]); + + var pdb_stream = st.pdb.getStream(pdb.StreamType.Pdb) orelse return error.CorruptedFile; + std.debug.warn("pdb real filepos {}\n", pdb_stream.getFilePos()); + const version = try pdb_stream.stream.readIntLe(u32); + const signature = try pdb_stream.stream.readIntLe(u32); + const age = try pdb_stream.stream.readIntLe(u32); + var guid: [16]u8 = undefined; + try pdb_stream.stream.readNoEof(guid[0..]); + if (!mem.eql(u8, coff_file.guid, guid) or coff_file.age != age) + return error.CorruptedFile; + std.debug.warn("v {} s {} a {}\n", version, signature, age); + // We validated the executable and pdb match. + + return st; }, builtin.ObjectFormat.wasm => { return error.TodoSupportCOFFDebugInfo; @@ -339,6 +379,9 @@ pub const ElfStackTrace = switch (builtin.os) { self.symbol_table.deinit(); } }, + builtin.Os.windows => struct { + pdb: pdb.Pdb, + }, else => struct { self_exe_file: os.File, elf: elf.Elf, diff --git a/std/index.zig b/std/index.zig index 8abfa3db88..a54c5ac465 100644 --- a/std/index.zig +++ b/std/index.zig @@ -13,6 +13,7 @@ pub const atomic = @import("atomic/index.zig"); pub const base64 = @import("base64.zig"); pub const build = @import("build.zig"); pub const c = @import("c/index.zig"); +pub const coff = @import("coff.zig"); pub const crypto = @import("crypto/index.zig"); pub const cstr = @import("cstr.zig"); pub const debug = @import("debug/index.zig"); @@ -30,6 +31,7 @@ pub const math = @import("math/index.zig"); pub const mem = @import("mem.zig"); pub const net = @import("net.zig"); pub const os = @import("os/index.zig"); +pub const pdb = @import("pdb.zig"); pub const rand = @import("rand/index.zig"); pub const sort = @import("sort.zig"); pub const unicode = @import("unicode.zig"); @@ -49,6 +51,7 @@ test "std" { _ = @import("base64.zig"); _ = @import("build.zig"); _ = @import("c/index.zig"); + _ = @import("coff.zig"); _ = @import("crypto/index.zig"); _ = @import("cstr.zig"); _ = @import("debug/index.zig"); @@ -67,6 +70,7 @@ test "std" { _ = @import("heap.zig"); _ = @import("os/index.zig"); _ = @import("rand/index.zig"); + _ = @import("pdb.zig"); _ = @import("sort.zig"); _ = @import("unicode.zig"); _ = @import("zig/index.zig"); diff --git a/std/os/index.zig b/std/os/index.zig index 62eeb7e43e..45bad41a02 100644 --- a/std/os/index.zig +++ b/std/os/index.zig @@ -1896,13 +1896,19 @@ pub fn openSelfExe() !os.File { const self_exe_path = try selfExePath(&fixed_allocator.allocator); return os.File.openRead(&fixed_allocator.allocator, self_exe_path); }, + Os.windows => { + var fixed_buffer_mem: [windows.MAX_PATH * 2]u8 = undefined; + var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]); + const self_exe_path = try selfExePath(&fixed_allocator.allocator); + return os.File.openRead(&fixed_allocator.allocator, self_exe_path); + }, else => @compileError("Unsupported OS"), } } test "openSelfExe" { switch (builtin.os) { - Os.linux, Os.macosx, Os.ios => (try openSelfExe()).close(), + Os.linux, Os.macosx, Os.ios, Os.windows => (try openSelfExe()).close(), else => return, // Unsupported OS. } } diff --git a/std/os/windows/index.zig b/std/os/windows/index.zig index d631c6adbf..6eb9fc38f3 100644 --- a/std/os/windows/index.zig +++ b/std/os/windows/index.zig @@ -105,6 +105,8 @@ pub extern "kernel32" stdcallcc fn GetFinalPathNameByHandleA( dwFlags: DWORD, ) DWORD; +pub extern "kernel32" stdcallcc fn GetModuleHandleA(lpModuleName: ?LPCSTR) HMODULE; + pub extern "kernel32" stdcallcc fn GetProcessHeap() ?HANDLE; pub extern "kernel32" stdcallcc fn GetSystemTimeAsFileTime(*FILETIME) void; diff --git a/std/pdb.zig b/std/pdb.zig new file mode 100644 index 0000000000..8c5a82880e --- /dev/null +++ b/std/pdb.zig @@ -0,0 +1,265 @@ +const builtin = @import("builtin"); +const std = @import("index.zig"); +const io = std.io; +const math = std.math; +const mem = std.mem; +const os = std.os; +const warn = std.debug.warn; + +const ArrayList = std.ArrayList; + +pub const PdbError = error { + InvalidPdbMagic, + CorruptedFile, +}; + +pub const StreamType = enum(u16) { + Pdb = 1, + Tpi = 2, + Dbi = 3, + Ipi = 4, +}; + +pub const Pdb = struct { + in_file: os.File, + allocator: *mem.Allocator, + + msf: Msf, + + pub fn openFile(self: *Pdb, allocator: *mem.Allocator, file_name: []u8) !void { + self.in_file = try os.File.openRead(allocator, file_name[0..]); + self.allocator = allocator; + + try self.msf.openFile(allocator, &self.in_file); + } + + pub fn getStream(self: *Pdb, stream: StreamType) ?*MsfStream { + const id = u16(stream); + if (id < self.msf.streams.len) + return &self.msf.streams.items[id]; + return null; + } + + pub fn getSourceLine(self: *Pdb, address: usize) !void { + const dbi = self.getStream(StreamType.Dbi) orelse return error.CorruptedFile; + + // Dbi Header + try dbi.seekForward(@sizeOf(u32) * 3 + @sizeOf(u16) * 6); + warn("dbi stream at {} (file offset)\n", dbi.getFilePos()); + const module_info_size = try dbi.stream.readIntLe(u32); + const section_contribution_size = try dbi.stream.readIntLe(u32); + const section_map_size = try dbi.stream.readIntLe(u32); + const source_info_size = try dbi.stream.readIntLe(u32); + warn("module_info_size: {}\n", module_info_size); + warn("section_contribution_size: {}\n", section_contribution_size); + warn("section_map_size: {}\n", section_map_size); + warn("source_info_size: {}\n", source_info_size); + try dbi.seekForward(@sizeOf(u32) * 5 + @sizeOf(u16) * 2); + warn("after header dbi stream at {} (file offset)\n", dbi.getFilePos()); + + // Module Info Substream + try dbi.seekForward(@sizeOf(u32) + @sizeOf(u16) + @sizeOf(u8) * 2); + const offset = try dbi.stream.readIntLe(u32); + const size = try dbi.stream.readIntLe(u32); + try dbi.seekForward(@sizeOf(u32)); + const module_index = try dbi.stream.readIntLe(u16); + warn("module {} of size {} at {}\n", module_index, size, offset); + + // TODO: locate corresponding source line information + } +}; + +// see https://llvm.org/docs/PDB/MsfFile.html +const Msf = struct { + superblock: SuperBlock, + directory: MsfStream, + streams: ArrayList(MsfStream), + + fn openFile(self: *Msf, allocator: *mem.Allocator, file: *os.File) !void { + var file_stream = io.FileInStream.init(file); + const in = &file_stream.stream; + + var magic: SuperBlock.FileMagicBuffer = undefined; + try in.readNoEof(magic[0..]); + warn("magic: '{}'\n", magic); + + if (!mem.eql(u8, magic, SuperBlock.FileMagic)) + return error.InvalidPdbMagic; + + self.superblock = SuperBlock { + .block_size = try in.readIntLe(u32), + .free_block_map_block = try in.readIntLe(u32), + .num_blocks = try in.readIntLe(u32), + .num_directory_bytes = try in.readIntLe(u32), + .unknown = try in.readIntLe(u32), + .block_map_addr = try in.readIntLe(u32), + }; + + switch (self.superblock.block_size) { + 512, 1024, 2048, 4096 => {}, // llvm only uses 4096 + else => return error.InvalidPdbMagic + } + + if (self.superblock.fileSize() != try file.getEndPos()) + return error.CorruptedFile; // Should always stand. + + self.directory = try MsfStream.init( + self.superblock.block_size, + self.superblock.blocksOccupiedByDirectoryStream(), + self.superblock.blockMapAddr(), + file, + allocator + ); + + const stream_count = try self.directory.stream.readIntLe(u32); + warn("stream count {}\n", stream_count); + + var stream_sizes = ArrayList(u32).init(allocator); + try stream_sizes.resize(stream_count); + for (stream_sizes.toSlice()) |*s| { + const size = try self.directory.stream.readIntLe(u32); + s.* = blockCountFromSize(size, self.superblock.block_size); + warn("stream {}B {} blocks\n", size, s.*); + } + + self.streams = ArrayList(MsfStream).init(allocator); + try self.streams.resize(stream_count); + for (self.streams.toSlice()) |*ss, i| { + ss.* = try MsfStream.init( + self.superblock.block_size, + stream_sizes.items[i], + try file.getPos(), // We're reading the jagged array of block indices when creating streams so the file is always at the right position. + file, + allocator + ); + } + } +}; + +fn blockCountFromSize(size: u32, block_size: u32) u32 { + return (size + block_size - 1) / block_size; +} + +const SuperBlock = struct { + const FileMagic = "Microsoft C/C++ MSF 7.00\r\n" ++ []u8 { 0x1A, 'D', 'S', 0, 0, 0}; + const FileMagicBuffer = @typeOf(FileMagic); + + block_size: u32, + free_block_map_block: u32, + num_blocks: u32, + num_directory_bytes: u32, + unknown: u32, + block_map_addr: u32, + + fn fileSize(self: *const SuperBlock) usize { + return self.num_blocks * self.block_size; + } + + fn blockMapAddr(self: *const SuperBlock) usize { + return self.block_size * self.block_map_addr; + } + + fn blocksOccupiedByDirectoryStream(self: *const SuperBlock) u32 { + return blockCountFromSize(self.num_directory_bytes, self.block_size); + } +}; + +const MsfStream = struct { + in_file: *os.File, + pos: usize, + blocks: ArrayList(u32), + block_size: u32, + + fn init(block_size: u32, block_count: u32, pos: usize, file: *os.File, allocator: *mem.Allocator) !MsfStream { + var stream = MsfStream { + .in_file = file, + .pos = 0, + .blocks = ArrayList(u32).init(allocator), + .block_size = block_size, + .stream = Stream { + .readFn = readFn, + }, + }; + + try stream.blocks.resize(block_count); + + var file_stream = io.FileInStream.init(file); + const in = &file_stream.stream; + try file.seekTo(pos); + + warn("stream with blocks"); + var i: u32 = 0; + while (i < block_count) : (i += 1) { + stream.blocks.items[i] = try in.readIntLe(u32); + warn(" {}", stream.blocks.items[i]); + } + warn("\n"); + + return stream; + } + + fn read(self: *MsfStream, buffer: []u8) !usize { + var block_id = self.pos / self.block_size; + var block = self.blocks.items[block_id]; + var offset = self.pos % self.block_size; + + try self.in_file.seekTo(block * self.block_size + offset); + var file_stream = io.FileInStream.init(self.in_file); + const in = &file_stream.stream; + + var size: usize = 0; + for (buffer) |*byte| { + byte.* = try in.readByte(); + + offset += 1; + size += 1; + + // If we're at the end of a block, go to the next one. + if (offset == self.block_size) + { + offset = 0; + block_id += 1; + block = self.blocks.items[block_id]; + try self.in_file.seekTo(block * self.block_size); + } + } + + self.pos += size; + return size; + } + + fn seekForward(self: *MsfStream, len: usize) !void { + self.pos += len; + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + fn seekTo(self: *MsfStream, len: usize) !void { + self.pos = len; + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + fn getSize(self: *const MsfStream) usize { + return self.blocks.len * self.block_size; + } + + fn getFilePos(self: *const MsfStream) usize { + const block_id = self.pos / self.block_size; + const block = self.blocks.items[block_id]; + const offset = self.pos % self.block_size; + + return block * self.block_size + offset; + } + + /// Implementation of InStream trait for Pdb.MsfStream + pub const Error = @typeOf(read).ReturnType.ErrorSet; + pub const Stream = io.InStream(Error); + + stream: Stream, + + fn readFn(in_stream: *Stream, buffer: []u8) Error!usize { + const self = @fieldParentPtr(MsfStream, "stream", in_stream); + return self.read(buffer); + } +}; \ No newline at end of file