Very much WIP base implementation for #721.

Currently does:
- read COFF executable file
- locate and load corresponding .pdb file
- expose .pdb content as streams (PDB format)
This commit is contained in:
Sahnvour 2018-07-21 20:30:11 +02:00
parent f47655eb6d
commit 2ec9a11646
7 changed files with 563 additions and 3 deletions

View File

@ -427,6 +427,7 @@ set(ZIG_STD_FILES
"c/index.zig"
"c/linux.zig"
"c/windows.zig"
"coff.zig"
"crypto/blake2.zig"
"crypto/hmac.zig"
"crypto/index.zig"
@ -544,6 +545,7 @@ set(ZIG_STD_FILES
"os/windows/index.zig"
"os/windows/util.zig"
"os/zen.zig"
"pdb.zig"
"rand/index.zig"
"rand/ziggurat.zig"
"segmented_list.zig"

238
std/coff.zig Normal file
View File

@ -0,0 +1,238 @@
const builtin = @import("builtin");
const std = @import("index.zig");
const io = std.io;
const mem = std.mem;
const os = std.os;
const ArrayList = std.ArrayList;
// CoffHeader.machine values
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms680313(v=vs.85).aspx
const IMAGE_FILE_MACHINE_I386 = 0x014c;
const IMAGE_FILE_MACHINE_IA64 = 0x0200;
const IMAGE_FILE_MACHINE_AMD64 = 0x8664;
// OptionalHeader.magic values
// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx
const IMAGE_NT_OPTIONAL_HDR32_MAGIC = 0x10b;
const IMAGE_NT_OPTIONAL_HDR64_MAGIC = 0x20b;
const IMAGE_NUMBEROF_DIRECTORY_ENTRIES = 16;
const DEBUG_DIRECTORY = 6;
pub const CoffError = error {
InvalidPEMagic,
InvalidPEHeader,
InvalidMachine,
MissingCoffSection,
};
pub const Coff = struct {
in_file: os.File,
allocator: *mem.Allocator,
coff_header: CoffHeader,
pe_header: OptionalHeader,
sections: ArrayList(Section),
guid: [16]u8,
age: u32,
pub fn loadHeader(self: *Coff) !void {
const pe_pointer_offset = 0x3C;
var file_stream = io.FileInStream.init(&self.in_file);
const in = &file_stream.stream;
var magic: [2]u8 = undefined;
try in.readNoEof(magic[0..]);
if (!mem.eql(u8, magic, "MZ"))
return error.InvalidPEMagic;
// Seek to PE File Header (coff header)
try self.in_file.seekTo(pe_pointer_offset);
const pe_magic_offset = try in.readIntLe(u32);
try self.in_file.seekTo(pe_magic_offset);
var pe_header_magic: [4]u8 = undefined;
try in.readNoEof(pe_header_magic[0..]);
if (!mem.eql(u8, pe_header_magic, []u8{'P', 'E', 0, 0}))
return error.InvalidPEHeader;
self.coff_header = CoffHeader {
.machine = try in.readIntLe(u16),
.number_of_sections = try in.readIntLe(u16),
.timedate_stamp = try in.readIntLe(u32),
.pointer_to_symbol_table = try in.readIntLe(u32),
.number_of_symbols = try in.readIntLe(u32),
.size_of_optional_header = try in.readIntLe(u16),
.characteristics = try in.readIntLe(u16),
};
switch (self.coff_header.machine) {
IMAGE_FILE_MACHINE_I386,
IMAGE_FILE_MACHINE_AMD64,
IMAGE_FILE_MACHINE_IA64
=> {},
else => return error.InvalidMachine,
}
try self.loadOptionalHeader(&file_stream);
}
fn loadOptionalHeader(self: *Coff, file_stream: *io.FileInStream) !void {
const in = &file_stream.stream;
self.pe_header.magic = try in.readIntLe(u16);
std.debug.warn("reading pe optional\n");
// For now we're only interested in finding the reference to the .pdb,
// so we'll skip most of this header, which size is different in 32
// 64 bits by the way.
var skip_size: u16 = undefined;
if (self.pe_header.magic == IMAGE_NT_OPTIONAL_HDR32_MAGIC) {
skip_size = 2 * @sizeOf(u8) + 8 * @sizeOf(u16) + 18 * @sizeOf(u32);
}
else if (self.pe_header.magic == IMAGE_NT_OPTIONAL_HDR64_MAGIC) {
skip_size = 2 * @sizeOf(u8) + 8 * @sizeOf(u16) + 12 * @sizeOf(u32) + 5 * @sizeOf(u64);
}
else
return error.InvalidPEMagic;
std.debug.warn("skipping {}\n", skip_size);
try self.in_file.seekForward(skip_size);
const number_of_rva_and_sizes = try in.readIntLe(u32);
//std.debug.warn("indicating {} data dirs\n", number_of_rva_and_sizes);
if (number_of_rva_and_sizes != IMAGE_NUMBEROF_DIRECTORY_ENTRIES)
return error.InvalidPEHeader;
for (self.pe_header.data_directory) |*data_dir| {
data_dir.* = OptionalHeader.DataDirectory {
.virtual_address = try in.readIntLe(u32),
.size = try in.readIntLe(u32),
};
//std.debug.warn("data_dir @ {x}, size {}\n", data_dir.virtual_address, data_dir.size);
}
std.debug.warn("loaded data directories\n");
}
pub fn getPdbPath(self: *Coff, buffer: []u8) !usize {
try self.loadSections();
const header = (self.getSection(".rdata") orelse return error.MissingCoffSection).header;
// The linker puts a chunk that contains the .pdb path right after the
// debug_directory.
const debug_dir = &self.pe_header.data_directory[DEBUG_DIRECTORY];
const file_offset = debug_dir.virtual_address - header.virtual_address + header.pointer_to_raw_data;
std.debug.warn("file offset {x}\n", file_offset);
try self.in_file.seekTo(file_offset + debug_dir.size);
var file_stream = io.FileInStream.init(&self.in_file);
const in = &file_stream.stream;
var cv_signature: [4]u8 = undefined; // CodeView signature
try in.readNoEof(cv_signature[0..]);
// 'RSDS' indicates PDB70 format, used by lld.
if (!mem.eql(u8, cv_signature, "RSDS"))
return error.InvalidPEMagic;
std.debug.warn("cv_signature {}\n", cv_signature);
try in.readNoEof(self.guid[0..]);
self.age = try in.readIntLe(u32);
// Finally read the null-terminated string.
var byte = try in.readByte();
var i: usize = 0;
while (byte != 0 and i < buffer.len) : (i += 1) {
buffer[i] = byte;
byte = try in.readByte();
}
if (byte != 0 and i == buffer.len)
return error.NameTooLong;
return i;
}
pub fn loadSections(self: *Coff) !void {
if (self.sections.len != 0)
return;
self.sections = ArrayList(Section).init(self.allocator);
var file_stream = io.FileInStream.init(&self.in_file);
const in = &file_stream.stream;
var name: [8]u8 = undefined;
var i: u16 = 0;
while (i < self.coff_header.number_of_sections) : (i += 1) {
try in.readNoEof(name[0..]);
try self.sections.append(Section {
.header = SectionHeader {
.name = name,
.misc = SectionHeader.Misc { .physical_address = try in.readIntLe(u32) },
.virtual_address = try in.readIntLe(u32),
.size_of_raw_data = try in.readIntLe(u32),
.pointer_to_raw_data = try in.readIntLe(u32),
.pointer_to_relocations = try in.readIntLe(u32),
.pointer_to_line_numbers = try in.readIntLe(u32),
.number_of_relocations = try in.readIntLe(u16),
.number_of_line_numbers = try in.readIntLe(u16),
.characteristics = try in.readIntLe(u32),
},
});
}
std.debug.warn("loaded {} sections\n", self.coff_header.number_of_sections);
}
pub fn getSection(self: *Coff, comptime name: []const u8) ?*Section {
for (self.sections.toSlice()) |*sec| {
if (mem.eql(u8, sec.header.name[0..name.len], name)) {
return sec;
}
}
return null;
}
};
const CoffHeader = struct {
machine: u16,
number_of_sections: u16,
timedate_stamp: u32,
pointer_to_symbol_table: u32,
number_of_symbols: u32,
size_of_optional_header: u16,
characteristics: u16
};
const OptionalHeader = struct {
const DataDirectory = struct {
virtual_address: u32,
size: u32
};
magic: u16,
data_directory: [IMAGE_NUMBEROF_DIRECTORY_ENTRIES]DataDirectory,
};
const Section = struct {
header: SectionHeader,
};
const SectionHeader = struct {
const Misc = union {
physical_address: u32,
virtual_size: u32
};
name: [8]u8,
misc: Misc,
virtual_address: u32,
size_of_raw_data: u32,
pointer_to_raw_data: u32,
pointer_to_relocations: u32,
pointer_to_line_numbers: u32,
number_of_relocations: u16,
number_of_line_numbers: u16,
characteristics: u32,
};

View File

@ -6,6 +6,9 @@ const os = std.os;
const elf = std.elf;
const DW = std.dwarf;
const macho = std.macho;
const coff = std.coff;
const pdb = std.pdb;
const windows = os.windows;
const ArrayList = std.ArrayList;
const builtin = @import("builtin");
@ -197,7 +200,13 @@ fn printSourceAtAddress(debug_info: *ElfStackTrace, out_stream: var, address: us
const ptr_hex = "0x{x}";
switch (builtin.os) {
builtin.Os.windows => return error.UnsupportedDebugInfo,
builtin.Os.windows => {
const base_address = @ptrToInt(windows.GetModuleHandleA(null)); // returned HMODULE points to our executable file in memory
const relative_address = address - base_address;
std.debug.warn("{x} - {x} => {x}\n", address, base_address, relative_address);
try debug_info.pdb.getSourceLine(relative_address);
return error.UnsupportedDebugInfo;
},
builtin.Os.macosx => {
// TODO(bnoordhuis) It's theoretically possible to obtain the
// compilation unit from the symbtab but it's not that useful
@ -288,7 +297,38 @@ pub fn openSelfDebugInfo(allocator: *mem.Allocator) !*ElfStackTrace {
return st;
},
builtin.ObjectFormat.coff => {
return error.TodoSupportCoffDebugInfo;
var coff_file: coff.Coff = undefined;
coff_file.in_file = try os.openSelfExe();
coff_file.allocator = allocator;
defer coff_file.in_file.close();
try coff_file.loadHeader();
var path: [windows.MAX_PATH]u8 = undefined;
const len = try coff_file.getPdbPath(path[0..]);
std.debug.warn("pdb path {}\n", path[0..len]);
const st = try allocator.create(ElfStackTrace);
errdefer allocator.destroy(st);
st.* = ElfStackTrace {
.pdb = undefined,
};
try st.pdb.openFile(allocator, path[0..len]);
var pdb_stream = st.pdb.getStream(pdb.StreamType.Pdb) orelse return error.CorruptedFile;
std.debug.warn("pdb real filepos {}\n", pdb_stream.getFilePos());
const version = try pdb_stream.stream.readIntLe(u32);
const signature = try pdb_stream.stream.readIntLe(u32);
const age = try pdb_stream.stream.readIntLe(u32);
var guid: [16]u8 = undefined;
try pdb_stream.stream.readNoEof(guid[0..]);
if (!mem.eql(u8, coff_file.guid, guid) or coff_file.age != age)
return error.CorruptedFile;
std.debug.warn("v {} s {} a {}\n", version, signature, age);
// We validated the executable and pdb match.
return st;
},
builtin.ObjectFormat.wasm => {
return error.TodoSupportCOFFDebugInfo;
@ -339,6 +379,9 @@ pub const ElfStackTrace = switch (builtin.os) {
self.symbol_table.deinit();
}
},
builtin.Os.windows => struct {
pdb: pdb.Pdb,
},
else => struct {
self_exe_file: os.File,
elf: elf.Elf,

View File

@ -13,6 +13,7 @@ pub const atomic = @import("atomic/index.zig");
pub const base64 = @import("base64.zig");
pub const build = @import("build.zig");
pub const c = @import("c/index.zig");
pub const coff = @import("coff.zig");
pub const crypto = @import("crypto/index.zig");
pub const cstr = @import("cstr.zig");
pub const debug = @import("debug/index.zig");
@ -30,6 +31,7 @@ pub const math = @import("math/index.zig");
pub const mem = @import("mem.zig");
pub const net = @import("net.zig");
pub const os = @import("os/index.zig");
pub const pdb = @import("pdb.zig");
pub const rand = @import("rand/index.zig");
pub const sort = @import("sort.zig");
pub const unicode = @import("unicode.zig");
@ -49,6 +51,7 @@ test "std" {
_ = @import("base64.zig");
_ = @import("build.zig");
_ = @import("c/index.zig");
_ = @import("coff.zig");
_ = @import("crypto/index.zig");
_ = @import("cstr.zig");
_ = @import("debug/index.zig");
@ -67,6 +70,7 @@ test "std" {
_ = @import("heap.zig");
_ = @import("os/index.zig");
_ = @import("rand/index.zig");
_ = @import("pdb.zig");
_ = @import("sort.zig");
_ = @import("unicode.zig");
_ = @import("zig/index.zig");

View File

@ -1896,13 +1896,19 @@ pub fn openSelfExe() !os.File {
const self_exe_path = try selfExePath(&fixed_allocator.allocator);
return os.File.openRead(&fixed_allocator.allocator, self_exe_path);
},
Os.windows => {
var fixed_buffer_mem: [windows.MAX_PATH * 2]u8 = undefined;
var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
const self_exe_path = try selfExePath(&fixed_allocator.allocator);
return os.File.openRead(&fixed_allocator.allocator, self_exe_path);
},
else => @compileError("Unsupported OS"),
}
}
test "openSelfExe" {
switch (builtin.os) {
Os.linux, Os.macosx, Os.ios => (try openSelfExe()).close(),
Os.linux, Os.macosx, Os.ios, Os.windows => (try openSelfExe()).close(),
else => return, // Unsupported OS.
}
}

View File

@ -105,6 +105,8 @@ pub extern "kernel32" stdcallcc fn GetFinalPathNameByHandleA(
dwFlags: DWORD,
) DWORD;
pub extern "kernel32" stdcallcc fn GetModuleHandleA(lpModuleName: ?LPCSTR) HMODULE;
pub extern "kernel32" stdcallcc fn GetProcessHeap() ?HANDLE;
pub extern "kernel32" stdcallcc fn GetSystemTimeAsFileTime(*FILETIME) void;

265
std/pdb.zig Normal file
View File

@ -0,0 +1,265 @@
const builtin = @import("builtin");
const std = @import("index.zig");
const io = std.io;
const math = std.math;
const mem = std.mem;
const os = std.os;
const warn = std.debug.warn;
const ArrayList = std.ArrayList;
pub const PdbError = error {
InvalidPdbMagic,
CorruptedFile,
};
pub const StreamType = enum(u16) {
Pdb = 1,
Tpi = 2,
Dbi = 3,
Ipi = 4,
};
pub const Pdb = struct {
in_file: os.File,
allocator: *mem.Allocator,
msf: Msf,
pub fn openFile(self: *Pdb, allocator: *mem.Allocator, file_name: []u8) !void {
self.in_file = try os.File.openRead(allocator, file_name[0..]);
self.allocator = allocator;
try self.msf.openFile(allocator, &self.in_file);
}
pub fn getStream(self: *Pdb, stream: StreamType) ?*MsfStream {
const id = u16(stream);
if (id < self.msf.streams.len)
return &self.msf.streams.items[id];
return null;
}
pub fn getSourceLine(self: *Pdb, address: usize) !void {
const dbi = self.getStream(StreamType.Dbi) orelse return error.CorruptedFile;
// Dbi Header
try dbi.seekForward(@sizeOf(u32) * 3 + @sizeOf(u16) * 6);
warn("dbi stream at {} (file offset)\n", dbi.getFilePos());
const module_info_size = try dbi.stream.readIntLe(u32);
const section_contribution_size = try dbi.stream.readIntLe(u32);
const section_map_size = try dbi.stream.readIntLe(u32);
const source_info_size = try dbi.stream.readIntLe(u32);
warn("module_info_size: {}\n", module_info_size);
warn("section_contribution_size: {}\n", section_contribution_size);
warn("section_map_size: {}\n", section_map_size);
warn("source_info_size: {}\n", source_info_size);
try dbi.seekForward(@sizeOf(u32) * 5 + @sizeOf(u16) * 2);
warn("after header dbi stream at {} (file offset)\n", dbi.getFilePos());
// Module Info Substream
try dbi.seekForward(@sizeOf(u32) + @sizeOf(u16) + @sizeOf(u8) * 2);
const offset = try dbi.stream.readIntLe(u32);
const size = try dbi.stream.readIntLe(u32);
try dbi.seekForward(@sizeOf(u32));
const module_index = try dbi.stream.readIntLe(u16);
warn("module {} of size {} at {}\n", module_index, size, offset);
// TODO: locate corresponding source line information
}
};
// see https://llvm.org/docs/PDB/MsfFile.html
const Msf = struct {
superblock: SuperBlock,
directory: MsfStream,
streams: ArrayList(MsfStream),
fn openFile(self: *Msf, allocator: *mem.Allocator, file: *os.File) !void {
var file_stream = io.FileInStream.init(file);
const in = &file_stream.stream;
var magic: SuperBlock.FileMagicBuffer = undefined;
try in.readNoEof(magic[0..]);
warn("magic: '{}'\n", magic);
if (!mem.eql(u8, magic, SuperBlock.FileMagic))
return error.InvalidPdbMagic;
self.superblock = SuperBlock {
.block_size = try in.readIntLe(u32),
.free_block_map_block = try in.readIntLe(u32),
.num_blocks = try in.readIntLe(u32),
.num_directory_bytes = try in.readIntLe(u32),
.unknown = try in.readIntLe(u32),
.block_map_addr = try in.readIntLe(u32),
};
switch (self.superblock.block_size) {
512, 1024, 2048, 4096 => {}, // llvm only uses 4096
else => return error.InvalidPdbMagic
}
if (self.superblock.fileSize() != try file.getEndPos())
return error.CorruptedFile; // Should always stand.
self.directory = try MsfStream.init(
self.superblock.block_size,
self.superblock.blocksOccupiedByDirectoryStream(),
self.superblock.blockMapAddr(),
file,
allocator
);
const stream_count = try self.directory.stream.readIntLe(u32);
warn("stream count {}\n", stream_count);
var stream_sizes = ArrayList(u32).init(allocator);
try stream_sizes.resize(stream_count);
for (stream_sizes.toSlice()) |*s| {
const size = try self.directory.stream.readIntLe(u32);
s.* = blockCountFromSize(size, self.superblock.block_size);
warn("stream {}B {} blocks\n", size, s.*);
}
self.streams = ArrayList(MsfStream).init(allocator);
try self.streams.resize(stream_count);
for (self.streams.toSlice()) |*ss, i| {
ss.* = try MsfStream.init(
self.superblock.block_size,
stream_sizes.items[i],
try file.getPos(), // We're reading the jagged array of block indices when creating streams so the file is always at the right position.
file,
allocator
);
}
}
};
fn blockCountFromSize(size: u32, block_size: u32) u32 {
return (size + block_size - 1) / block_size;
}
const SuperBlock = struct {
const FileMagic = "Microsoft C/C++ MSF 7.00\r\n" ++ []u8 { 0x1A, 'D', 'S', 0, 0, 0};
const FileMagicBuffer = @typeOf(FileMagic);
block_size: u32,
free_block_map_block: u32,
num_blocks: u32,
num_directory_bytes: u32,
unknown: u32,
block_map_addr: u32,
fn fileSize(self: *const SuperBlock) usize {
return self.num_blocks * self.block_size;
}
fn blockMapAddr(self: *const SuperBlock) usize {
return self.block_size * self.block_map_addr;
}
fn blocksOccupiedByDirectoryStream(self: *const SuperBlock) u32 {
return blockCountFromSize(self.num_directory_bytes, self.block_size);
}
};
const MsfStream = struct {
in_file: *os.File,
pos: usize,
blocks: ArrayList(u32),
block_size: u32,
fn init(block_size: u32, block_count: u32, pos: usize, file: *os.File, allocator: *mem.Allocator) !MsfStream {
var stream = MsfStream {
.in_file = file,
.pos = 0,
.blocks = ArrayList(u32).init(allocator),
.block_size = block_size,
.stream = Stream {
.readFn = readFn,
},
};
try stream.blocks.resize(block_count);
var file_stream = io.FileInStream.init(file);
const in = &file_stream.stream;
try file.seekTo(pos);
warn("stream with blocks");
var i: u32 = 0;
while (i < block_count) : (i += 1) {
stream.blocks.items[i] = try in.readIntLe(u32);
warn(" {}", stream.blocks.items[i]);
}
warn("\n");
return stream;
}
fn read(self: *MsfStream, buffer: []u8) !usize {
var block_id = self.pos / self.block_size;
var block = self.blocks.items[block_id];
var offset = self.pos % self.block_size;
try self.in_file.seekTo(block * self.block_size + offset);
var file_stream = io.FileInStream.init(self.in_file);
const in = &file_stream.stream;
var size: usize = 0;
for (buffer) |*byte| {
byte.* = try in.readByte();
offset += 1;
size += 1;
// If we're at the end of a block, go to the next one.
if (offset == self.block_size)
{
offset = 0;
block_id += 1;
block = self.blocks.items[block_id];
try self.in_file.seekTo(block * self.block_size);
}
}
self.pos += size;
return size;
}
fn seekForward(self: *MsfStream, len: usize) !void {
self.pos += len;
if (self.pos >= self.blocks.len * self.block_size)
return error.EOF;
}
fn seekTo(self: *MsfStream, len: usize) !void {
self.pos = len;
if (self.pos >= self.blocks.len * self.block_size)
return error.EOF;
}
fn getSize(self: *const MsfStream) usize {
return self.blocks.len * self.block_size;
}
fn getFilePos(self: *const MsfStream) usize {
const block_id = self.pos / self.block_size;
const block = self.blocks.items[block_id];
const offset = self.pos % self.block_size;
return block * self.block_size + offset;
}
/// Implementation of InStream trait for Pdb.MsfStream
pub const Error = @typeOf(read).ReturnType.ErrorSet;
pub const Stream = io.InStream(Error);
stream: Stream,
fn readFn(in_stream: *Stream, buffer: []u8) Error!usize {
const self = @fieldParentPtr(MsfStream, "stream", in_stream);
return self.read(buffer);
}
};