introduce tool for dumping coverage file

with debug info resolved.

begin efforts of providing `std.debug.Info`, a cross-platform
abstraction for loading debug information into an in-memory format that
supports queries such as "what is the source location of this virtual
memory address?"

Unlike `std.debug.SelfInfo`, this API does not assume the debug
information in question happens to match the host CPU architecture, OS,
or other target properties.
This commit is contained in:
Andrew Kelley 2024-08-02 16:31:49 -07:00
parent 107b272766
commit 2e12b45d8b
6 changed files with 541 additions and 285 deletions

View File

@ -32,16 +32,16 @@ pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.E
};
}
pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 {
pub fn joinString(p: Path, gpa: Allocator, sub_path: []const u8) Allocator.Error![]u8 {
const parts: []const []const u8 =
if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };
return p.root_dir.join(allocator, parts);
return p.root_dir.join(gpa, parts);
}
pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 {
pub fn joinStringZ(p: Path, gpa: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 {
const parts: []const []const u8 =
if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };
return p.root_dir.joinZ(allocator, parts);
return p.root_dir.joinZ(gpa, parts);
}
pub fn openFile(

View File

@ -17,6 +17,7 @@ pub const MemoryAccessor = @import("debug/MemoryAccessor.zig");
pub const Dwarf = @import("debug/Dwarf.zig");
pub const Pdb = @import("debug/Pdb.zig");
pub const SelfInfo = @import("debug/SelfInfo.zig");
pub const Info = @import("debug/Info.zig");
/// Unresolved source locations can be represented with a single `usize` that
/// corresponds to a virtual memory address of the program counter. Combined
@ -28,6 +29,12 @@ pub const SourceLocation = struct {
file_name: []const u8,
};
pub const Symbol = struct {
name: []const u8 = "???",
compile_unit_name: []const u8 = "???",
source_location: ?SourceLocation = null,
};
/// Deprecated because it returns the optimization mode of the standard
/// library, when the caller probably wants to use the optimization mode of
/// their own module.
@ -871,13 +878,13 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address:
error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config),
else => return err,
};
defer symbol_info.deinit(debug_info.allocator);
defer if (symbol_info.source_location) |sl| debug_info.allocator.free(sl.file_name);
return printLineInfo(
out_stream,
symbol_info.line_info,
symbol_info.source_location,
address,
symbol_info.symbol_name,
symbol_info.name,
symbol_info.compile_unit_name,
tty_config,
printLineFromFileAnyOs,
@ -886,7 +893,7 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address:
fn printLineInfo(
out_stream: anytype,
line_info: ?SourceLocation,
source_location: ?SourceLocation,
address: usize,
symbol_name: []const u8,
compile_unit_name: []const u8,
@ -896,8 +903,8 @@ fn printLineInfo(
nosuspend {
try tty_config.setColor(out_stream, .bold);
if (line_info) |*li| {
try out_stream.print("{s}:{d}:{d}", .{ li.file_name, li.line, li.column });
if (source_location) |*sl| {
try out_stream.print("{s}:{d}:{d}", .{ sl.file_name, sl.line, sl.column });
} else {
try out_stream.writeAll("???:?:?");
}
@ -910,11 +917,11 @@ fn printLineInfo(
try out_stream.writeAll("\n");
// Show the matching source code line if possible
if (line_info) |li| {
if (printLineFromFile(out_stream, li)) {
if (li.column > 0) {
if (source_location) |sl| {
if (printLineFromFile(out_stream, sl)) {
if (sl.column > 0) {
// The caret already takes one char
const space_needed = @as(usize, @intCast(li.column - 1));
const space_needed = @as(usize, @intCast(sl.column - 1));
try out_stream.writeByteNTimes(' ', space_needed);
try tty_config.setColor(out_stream, .green);
@ -932,10 +939,10 @@ fn printLineInfo(
}
}
fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void {
fn printLineFromFileAnyOs(out_stream: anytype, source_location: SourceLocation) !void {
// Need this to always block even in async I/O mode, because this could potentially
// be called from e.g. the event loop code crashing.
var f = try fs.cwd().openFile(line_info.file_name, .{});
var f = try fs.cwd().openFile(source_location.file_name, .{});
defer f.close();
// TODO fstat and make sure that the file has the correct size
@ -944,7 +951,7 @@ fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void
const line_start = seek: {
var current_line_start: usize = 0;
var next_line: usize = 1;
while (next_line != line_info.line) {
while (next_line != source_location.line) {
const slice = buf[current_line_start..amt_read];
if (mem.indexOfScalar(u8, slice, '\n')) |pos| {
next_line += 1;

View File

@ -12,6 +12,8 @@ const native_endian = builtin.cpu.arch.endian();
const std = @import("../std.zig");
const Allocator = std.mem.Allocator;
const elf = std.elf;
const mem = std.mem;
const DW = std.dwarf;
const AT = DW.AT;
const EH = DW.EH;
@ -22,8 +24,8 @@ const UT = DW.UT;
const assert = std.debug.assert;
const cast = std.math.cast;
const maxInt = std.math.maxInt;
const readInt = std.mem.readInt;
const MemoryAccessor = std.debug.MemoryAccessor;
const Path = std.Build.Cache.Path;
/// Did I mention this is deprecated?
const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader;
@ -252,13 +254,13 @@ pub const Die = struct {
.@"32" => {
const byte_offset = compile_unit.str_offsets_base + 4 * index;
if (byte_offset + 4 > debug_str_offsets.len) return bad();
const offset = readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian);
const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian);
return getStringGeneric(opt_str, offset);
},
.@"64" => {
const byte_offset = compile_unit.str_offsets_base + 8 * index;
if (byte_offset + 8 > debug_str_offsets.len) return bad();
const offset = readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian);
const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian);
return getStringGeneric(opt_str, offset);
},
}
@ -721,12 +723,14 @@ const num_sections = std.enums.directEnumArrayLen(Section.Id, 0);
pub const SectionArray = [num_sections]?Section;
pub const null_section_array = [_]?Section{null} ** num_sections;
pub const OpenError = ScanError;
/// Initialize DWARF info. The caller has the responsibility to initialize most
/// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the
/// main binary file (not the secondary debug info file).
pub fn open(di: *Dwarf, allocator: Allocator) !void {
try di.scanAllFunctions(allocator);
try di.scanAllCompileUnits(allocator);
pub fn open(di: *Dwarf, gpa: Allocator) OpenError!void {
try di.scanAllFunctions(gpa);
try di.scanAllCompileUnits(gpa);
}
const PcRange = struct {
@ -747,21 +751,21 @@ pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address:
return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null;
}
pub fn deinit(di: *Dwarf, allocator: Allocator) void {
pub fn deinit(di: *Dwarf, gpa: Allocator) void {
for (di.sections) |opt_section| {
if (opt_section) |s| if (s.owned) allocator.free(s.data);
if (opt_section) |s| if (s.owned) gpa.free(s.data);
}
for (di.abbrev_table_list.items) |*abbrev| {
abbrev.deinit(allocator);
abbrev.deinit(gpa);
}
di.abbrev_table_list.deinit(allocator);
di.abbrev_table_list.deinit(gpa);
for (di.compile_unit_list.items) |*cu| {
cu.die.deinit(allocator);
cu.die.deinit(gpa);
}
di.compile_unit_list.deinit(allocator);
di.func_list.deinit(allocator);
di.cie_map.deinit(allocator);
di.fde_list.deinit(allocator);
di.compile_unit_list.deinit(gpa);
di.func_list.deinit(gpa);
di.cie_map.deinit(gpa);
di.fde_list.deinit(gpa);
di.* = undefined;
}
@ -777,7 +781,12 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 {
return null;
}
fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void {
const ScanError = error{
InvalidDebugInfo,
MissingDebugInfo,
} || Allocator.Error || std.debug.DeprecatedFixedBufferReader.Error;
fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void {
var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian };
var this_unit_offset: u64 = 0;
@ -964,7 +973,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void {
}
}
fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void {
fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void {
var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian };
var this_unit_offset: u64 = 0;
@ -1070,13 +1079,13 @@ const DebugRangeIterator = struct {
.@"32" => {
const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx));
if (offset_loc + 4 > debug_ranges.len) return bad();
const offset = readInt(u32, debug_ranges[offset_loc..][0..4], di.endian);
const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian);
break :off compile_unit.rnglists_base + offset;
},
.@"64" => {
const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx));
if (offset_loc + 8 > debug_ranges.len) return bad();
const offset = readInt(u64, debug_ranges[offset_loc..][0..8], di.endian);
const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian);
break :off compile_unit.rnglists_base + offset;
},
}
@ -1287,7 +1296,7 @@ fn parseDie(
attrs_buf: []Die.Attr,
abbrev_table: *const Abbrev.Table,
format: Format,
) !?Die {
) ScanError!?Die {
const abbrev_code = try fbr.readUleb128(u64);
if (abbrev_code == 0) return null;
const table_entry = abbrev_table.get(abbrev_code) orelse return bad();
@ -1588,7 +1597,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 {
// The header is 8 or 12 bytes depending on is_64.
if (compile_unit.addr_base < 8) return bad();
const version = readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian);
const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian);
if (version != 5) return bad();
const addr_size = debug_addr[compile_unit.addr_base - 2];
@ -1598,9 +1607,9 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 {
if (byte_offset + addr_size > debug_addr.len) return bad();
return switch (addr_size) {
1 => debug_addr[byte_offset],
2 => readInt(u16, debug_addr[byte_offset..][0..2], di.endian),
4 => readInt(u32, debug_addr[byte_offset..][0..4], di.endian),
8 => readInt(u64, debug_addr[byte_offset..][0..8], di.endian),
2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian),
4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian),
8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian),
else => bad(),
};
}
@ -1699,7 +1708,7 @@ fn parseFormValue(
form_id: u64,
format: Format,
implicit_const: ?i64,
) anyerror!FormValue {
) ScanError!FormValue {
return switch (form_id) {
FORM.addr => .{ .addr = try fbr.readAddress(switch (@bitSizeOf(usize)) {
32 => .@"32",
@ -1892,7 +1901,8 @@ const UnitHeader = struct {
header_length: u4,
unit_length: u64,
};
fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) !UnitHeader {
fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader {
return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) {
0...0xfffffff0 - 1 => |unit_length| .{
.format = .@"32",
@ -2023,3 +2033,335 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize {
return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset)));
}
}
pub const ElfModule = struct {
base_address: usize,
dwarf: Dwarf,
mapped_memory: []align(std.mem.page_size) const u8,
external_mapped_memory: ?[]align(std.mem.page_size) const u8,
pub fn deinit(self: *@This(), allocator: Allocator) void {
self.dwarf.deinit(allocator);
std.posix.munmap(self.mapped_memory);
if (self.external_mapped_memory) |m| std.posix.munmap(m);
}
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol {
// Translate the VA into an address into this object
const relocated_address = address - self.base_address;
return self.dwarf.getSymbol(allocator, relocated_address);
}
pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf {
_ = allocator;
_ = address;
return &self.dwarf;
}
pub const LoadError = error{
InvalidDebugInfo,
MissingDebugInfo,
InvalidElfMagic,
InvalidElfVersion,
InvalidElfEndian,
/// TODO: implement this and then remove this error code
UnimplementedDwarfForeignEndian,
/// The debug info may be valid but this implementation uses memory
/// mapping which limits things to usize. If the target debug info is
/// 64-bit and host is 32-bit, there may be debug info that is not
/// supportable using this method.
Overflow,
PermissionDenied,
LockedMemoryLimitExceeded,
MemoryMappingNotSupported,
} || Allocator.Error || std.fs.File.OpenError || OpenError;
/// Reads debug info from an already mapped ELF file.
///
/// If the required sections aren't present but a reference to external debug
/// info is, then this this function will recurse to attempt to load the debug
/// sections from an external file.
pub fn load(
gpa: Allocator,
mapped_mem: []align(std.mem.page_size) const u8,
build_id: ?[]const u8,
expected_crc: ?u32,
parent_sections: *Dwarf.SectionArray,
parent_mapped_mem: ?[]align(std.mem.page_size) const u8,
elf_filename: ?[]const u8,
) LoadError!Dwarf.ElfModule {
if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo;
const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]);
if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic;
if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion;
const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) {
elf.ELFDATA2LSB => .little,
elf.ELFDATA2MSB => .big,
else => return error.InvalidElfEndian,
};
if (endian != native_endian) return error.UnimplementedDwarfForeignEndian;
const shoff = hdr.e_shoff;
const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx);
const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow]));
const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size];
const shdrs = @as(
[*]const elf.Shdr,
@ptrCast(@alignCast(&mapped_mem[shoff])),
)[0..hdr.e_shnum];
var sections: Dwarf.SectionArray = Dwarf.null_section_array;
// Combine section list. This takes ownership over any owned sections from the parent scope.
for (parent_sections, &sections) |*parent, *section_elem| {
if (parent.*) |*p| {
section_elem.* = p.*;
p.owned = false;
}
}
errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data);
var separate_debug_filename: ?[]const u8 = null;
var separate_debug_crc: ?u32 = null;
for (shdrs) |*shdr| {
if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue;
const name = mem.sliceTo(header_strings[shdr.sh_name..], 0);
if (mem.eql(u8, name, ".gnu_debuglink")) {
const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size);
const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0);
const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr);
const crc_bytes = gnu_debuglink[crc_offset..][0..4];
separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian);
separate_debug_filename = debug_filename;
continue;
}
var section_index: ?usize = null;
inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |sect, i| {
if (mem.eql(u8, "." ++ sect.name, name)) section_index = i;
}
if (section_index == null) continue;
if (sections[section_index.?] != null) continue;
const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size);
sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: {
var section_stream = std.io.fixedBufferStream(section_bytes);
const section_reader = section_stream.reader();
const chdr = section_reader.readStruct(elf.Chdr) catch continue;
if (chdr.ch_type != .ZLIB) continue;
var zlib_stream = std.compress.zlib.decompressor(section_reader);
const decompressed_section = try gpa.alloc(u8, chdr.ch_size);
errdefer gpa.free(decompressed_section);
const read = zlib_stream.reader().readAll(decompressed_section) catch continue;
assert(read == decompressed_section.len);
break :blk .{
.data = decompressed_section,
.virtual_address = shdr.sh_addr,
.owned = true,
};
} else .{
.data = section_bytes,
.virtual_address = shdr.sh_addr,
.owned = false,
};
}
const missing_debug_info =
sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null;
// Attempt to load debug info from an external file
// See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
if (missing_debug_info) {
// Only allow one level of debug info nesting
if (parent_mapped_mem) |_| {
return error.MissingDebugInfo;
}
const global_debug_directories = [_][]const u8{
"/usr/lib/debug",
};
// <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug
if (build_id) |id| blk: {
if (id.len < 3) break :blk;
// Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice
const extension = ".debug";
var id_prefix_buf: [2]u8 = undefined;
var filename_buf: [38 + extension.len]u8 = undefined;
_ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable;
const filename = std.fmt.bufPrint(
&filename_buf,
"{s}" ++ extension,
.{std.fmt.fmtSliceHexLower(id[1..])},
) catch break :blk;
for (global_debug_directories) |global_directory| {
const path: Path = .{
.root_dir = std.Build.Cache.Directory.cwd(),
.sub_path = try std.fs.path.join(gpa, &.{
global_directory, ".build-id", &id_prefix_buf, filename,
}),
};
defer gpa.free(path.sub_path);
return loadPath(gpa, path, null, separate_debug_crc, &sections, mapped_mem) catch continue;
}
}
// use the path from .gnu_debuglink, in the same search order as gdb
if (separate_debug_filename) |separate_filename| blk: {
if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename))
return error.MissingDebugInfo;
// <cwd>/<gnu_debuglink>
if (loadPath(
gpa,
.{
.root_dir = std.Build.Cache.Directory.cwd(),
.sub_path = separate_filename,
},
null,
separate_debug_crc,
&sections,
mapped_mem,
)) |debug_info| {
return debug_info;
} else |_| {}
// <cwd>/.debug/<gnu_debuglink>
{
const path: Path = .{
.root_dir = std.Build.Cache.Directory.cwd(),
.sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }),
};
defer gpa.free(path.sub_path);
if (loadPath(gpa, path, null, separate_debug_crc, &sections, mapped_mem)) |debug_info| return debug_info else |_| {}
}
var cwd_buf: [std.fs.max_path_bytes]u8 = undefined;
const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :blk;
// <global debug directory>/<absolute folder of current binary>/<gnu_debuglink>
for (global_debug_directories) |global_directory| {
const path: Path = .{
.root_dir = std.Build.Cache.Directory.cwd(),
.sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }),
};
defer gpa.free(path.sub_path);
if (loadPath(gpa, path, null, separate_debug_crc, &sections, mapped_mem)) |debug_info| return debug_info else |_| {}
}
}
return error.MissingDebugInfo;
}
var di: Dwarf = .{
.endian = endian,
.sections = sections,
.is_macho = false,
};
try Dwarf.open(&di, gpa);
return .{
.base_address = 0,
.dwarf = di,
.mapped_memory = parent_mapped_mem orelse mapped_mem,
.external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null,
};
}
pub fn loadPath(
gpa: Allocator,
elf_file_path: Path,
build_id: ?[]const u8,
expected_crc: ?u32,
parent_sections: *Dwarf.SectionArray,
parent_mapped_mem: ?[]align(std.mem.page_size) const u8,
) LoadError!Dwarf.ElfModule {
const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) {
error.FileNotFound => return missing(),
else => return err,
};
defer elf_file.close();
const end_pos = elf_file.getEndPos() catch return bad();
const file_len = cast(usize, end_pos) orelse return error.Overflow;
const mapped_mem = try std.posix.mmap(
null,
file_len,
std.posix.PROT.READ,
.{ .TYPE = .SHARED },
elf_file.handle,
0,
);
errdefer std.posix.munmap(mapped_mem);
return load(
gpa,
mapped_mem,
build_id,
expected_crc,
parent_sections,
parent_mapped_mem,
elf_file_path.sub_path,
);
}
};
/// Given an array of virtual memory addresses, sorted ascending, outputs a
/// corresponding array of source locations, by appending to the provided
/// array list.
pub fn resolveSourceLocations(
d: *Dwarf,
gpa: Allocator,
sorted_pc_addrs: []const u64,
/// Asserts its length equals length of `sorted_pc_addrs`.
output: []std.debug.SourceLocation,
) error{ MissingDebugInfo, InvalidDebugInfo }!void {
assert(sorted_pc_addrs.len == output.len);
_ = d;
_ = gpa;
@panic("TODO");
}
fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol {
if (di.findCompileUnit(address)) |compile_unit| {
return .{
.name = di.getSymbolName(address) orelse "???",
.compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => "???",
},
.source_location = di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => null,
else => return err,
},
};
} else |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => return .{},
else => return err,
}
}
pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 {
const start = cast(usize, offset) orelse return error.Overflow;
const end = start + (cast(usize, size) orelse return error.Overflow);
return ptr[start..end];
}

57
lib/std/debug/Info.zig Normal file
View File

@ -0,0 +1,57 @@
//! Cross-platform abstraction for loading debug information into an in-memory
//! format that supports queries such as "what is the source location of this
//! virtual memory address?"
//!
//! Unlike `std.debug.SelfInfo`, this API does not assume the debug information
//! in question happens to match the host CPU architecture, OS, or other target
//! properties.
const std = @import("../std.zig");
const Allocator = std.mem.Allocator;
const Path = std.Build.Cache.Path;
const Dwarf = std.debug.Dwarf;
const page_size = std.mem.page_size;
const assert = std.debug.assert;
const Info = @This();
/// Sorted by key, ascending.
address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule),
pub const LoadError = Dwarf.ElfModule.LoadError;
pub fn load(gpa: Allocator, path: Path) LoadError!Info {
var sections: Dwarf.SectionArray = Dwarf.null_section_array;
const elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, &sections, null);
var info: Info = .{
.address_map = .{},
};
try info.address_map.put(gpa, elf_module.base_address, elf_module);
return info;
}
pub fn deinit(info: *Info, gpa: Allocator) void {
for (info.address_map.values()) |*elf_module| {
elf_module.dwarf.deinit(gpa);
}
info.address_map.deinit(gpa);
info.* = undefined;
}
pub const ResolveSourceLocationsError = error{
MissingDebugInfo,
InvalidDebugInfo,
} || Allocator.Error;
pub fn resolveSourceLocations(
info: *Info,
gpa: Allocator,
sorted_pc_addrs: []const u64,
/// Asserts its length equals length of `sorted_pc_addrs`.
output: []std.debug.SourceLocation,
) ResolveSourceLocationsError!void {
assert(sorted_pc_addrs.len == output.len);
if (info.address_map.entries.len != 1) @panic("TODO");
const elf_module = &info.address_map.values()[0];
return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output);
}

View File

@ -587,7 +587,7 @@ pub const Module = switch (native_os) {
}
if (section_index == null) continue;
const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size);
const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size);
sections[section_index.?] = .{
.data = section_bytes,
.virtual_address = sect.addr,
@ -622,7 +622,7 @@ pub const Module = switch (native_os) {
return result.value_ptr;
}
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo {
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !Dwarf.SymbolInfo {
nosuspend {
const result = try self.getOFileInfoForAddress(allocator, address);
if (result.symbol == null) return .{};
@ -641,7 +641,7 @@ pub const Module = switch (native_os) {
const addr_off = result.relocated_address - result.symbol.?.addr;
const o_file_di = &result.o_file_info.?.di;
if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| {
return SymbolInfo{
return .{
.symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???",
.compile_unit_name = compile_unit.die.getAttrString(
o_file_di,
@ -662,7 +662,7 @@ pub const Module = switch (native_os) {
};
} else |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => {
return SymbolInfo{ .symbol_name = stab_symbol };
return .{ .symbol_name = stab_symbol };
},
else => return err,
}
@ -729,7 +729,7 @@ pub const Module = switch (native_os) {
}
}
fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo {
fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?std.debug.Symbol {
var coff_section: *align(1) const coff.SectionHeader = undefined;
const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| {
if (sect_contrib.Section > self.coff_section_headers.len) continue;
@ -759,14 +759,14 @@ pub const Module = switch (native_os) {
relocated_address - coff_section.virtual_address,
);
return SymbolInfo{
return .{
.symbol_name = symbol_name,
.compile_unit_name = obj_basename,
.line_info = opt_line_info,
};
}
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo {
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol {
// Translate the VA into an address into this object
const relocated_address = address - self.base_address;
@ -776,10 +776,10 @@ pub const Module = switch (native_os) {
if (self.dwarf) |*dwarf| {
const dwarf_address = relocated_address + self.coff_image_base;
return getSymbolFromDwarf(allocator, dwarf_address, dwarf);
return dwarf.getSymbol(allocator, dwarf_address);
}
return SymbolInfo{};
return .{};
}
pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf {
@ -792,41 +792,18 @@ pub const Module = switch (native_os) {
};
}
},
.linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct {
base_address: usize,
dwarf: Dwarf,
mapped_memory: []align(mem.page_size) const u8,
external_mapped_memory: ?[]align(mem.page_size) const u8,
pub fn deinit(self: *@This(), allocator: Allocator) void {
self.dwarf.deinit(allocator);
posix.munmap(self.mapped_memory);
if (self.external_mapped_memory) |m| posix.munmap(m);
}
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo {
// Translate the VA into an address into this object
const relocated_address = address - self.base_address;
return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf);
}
pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf {
_ = allocator;
_ = address;
return &self.dwarf;
}
},
.linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule,
.wasi, .emscripten => struct {
pub fn deinit(self: *@This(), allocator: Allocator) void {
_ = self;
_ = allocator;
}
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo {
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol {
_ = self;
_ = allocator;
_ = address;
return SymbolInfo{};
return .{};
}
pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf {
@ -1068,7 +1045,7 @@ pub fn readElfDebugInfo(
expected_crc: ?u32,
parent_sections: *Dwarf.SectionArray,
parent_mapped_mem: ?[]align(mem.page_size) const u8,
) !Module {
) !Dwarf.ElfModule {
nosuspend {
const elf_file = (if (elf_filename) |filename| blk: {
break :blk fs.cwd().openFile(filename, .{});
@ -1078,176 +1055,15 @@ pub fn readElfDebugInfo(
};
const mapped_mem = try mapWholeFile(elf_file);
if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo;
const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]);
if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic;
if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion;
const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) {
elf.ELFDATA2LSB => .little,
elf.ELFDATA2MSB => .big,
else => return error.InvalidElfEndian,
};
assert(endian == native_endian); // this is our own debug info
const shoff = hdr.e_shoff;
const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx);
const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow]));
const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size];
const shdrs = @as(
[*]const elf.Shdr,
@ptrCast(@alignCast(&mapped_mem[shoff])),
)[0..hdr.e_shnum];
var sections: Dwarf.SectionArray = Dwarf.null_section_array;
// Combine section list. This takes ownership over any owned sections from the parent scope.
for (parent_sections, &sections) |*parent, *section| {
if (parent.*) |*p| {
section.* = p.*;
p.owned = false;
}
}
errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data);
var separate_debug_filename: ?[]const u8 = null;
var separate_debug_crc: ?u32 = null;
for (shdrs) |*shdr| {
if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue;
const name = mem.sliceTo(header_strings[shdr.sh_name..], 0);
if (mem.eql(u8, name, ".gnu_debuglink")) {
const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size);
const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0);
const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr);
const crc_bytes = gnu_debuglink[crc_offset..][0..4];
separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian);
separate_debug_filename = debug_filename;
continue;
}
var section_index: ?usize = null;
inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| {
if (mem.eql(u8, "." ++ section.name, name)) section_index = i;
}
if (section_index == null) continue;
if (sections[section_index.?] != null) continue;
const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size);
sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: {
var section_stream = std.io.fixedBufferStream(section_bytes);
var section_reader = section_stream.reader();
const chdr = section_reader.readStruct(elf.Chdr) catch continue;
if (chdr.ch_type != .ZLIB) continue;
var zlib_stream = std.compress.zlib.decompressor(section_stream.reader());
const decompressed_section = try allocator.alloc(u8, chdr.ch_size);
errdefer allocator.free(decompressed_section);
const read = zlib_stream.reader().readAll(decompressed_section) catch continue;
assert(read == decompressed_section.len);
break :blk .{
.data = decompressed_section,
.virtual_address = shdr.sh_addr,
.owned = true,
};
} else .{
.data = section_bytes,
.virtual_address = shdr.sh_addr,
.owned = false,
};
}
const missing_debug_info =
sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null;
// Attempt to load debug info from an external file
// See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
if (missing_debug_info) {
// Only allow one level of debug info nesting
if (parent_mapped_mem) |_| {
return error.MissingDebugInfo;
}
const global_debug_directories = [_][]const u8{
"/usr/lib/debug",
};
// <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug
if (build_id) |id| blk: {
if (id.len < 3) break :blk;
// Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice
const extension = ".debug";
var id_prefix_buf: [2]u8 = undefined;
var filename_buf: [38 + extension.len]u8 = undefined;
_ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable;
const filename = std.fmt.bufPrint(
&filename_buf,
"{s}" ++ extension,
.{std.fmt.fmtSliceHexLower(id[1..])},
) catch break :blk;
for (global_debug_directories) |global_directory| {
const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename });
defer allocator.free(path);
return readElfDebugInfo(allocator, path, null, separate_debug_crc, &sections, mapped_mem) catch continue;
}
}
// use the path from .gnu_debuglink, in the same search order as gdb
if (separate_debug_filename) |separate_filename| blk: {
if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo;
// <cwd>/<gnu_debuglink>
if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, &sections, mapped_mem)) |debug_info| return debug_info else |_| {}
// <cwd>/.debug/<gnu_debuglink>
{
const path = try fs.path.join(allocator, &.{ ".debug", separate_filename });
defer allocator.free(path);
if (readElfDebugInfo(allocator, path, null, separate_debug_crc, &sections, mapped_mem)) |debug_info| return debug_info else |_| {}
}
var cwd_buf: [fs.max_path_bytes]u8 = undefined;
const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk;
// <global debug directory>/<absolute folder of current binary>/<gnu_debuglink>
for (global_debug_directories) |global_directory| {
const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename });
defer allocator.free(path);
if (readElfDebugInfo(allocator, path, null, separate_debug_crc, &sections, mapped_mem)) |debug_info| return debug_info else |_| {}
}
}
return error.MissingDebugInfo;
}
var di = Dwarf{
.endian = endian,
.sections = sections,
.is_macho = false,
};
try Dwarf.open(&di, allocator);
return .{
.base_address = undefined,
.dwarf = di,
.mapped_memory = parent_mapped_mem orelse mapped_mem,
.external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null,
};
return Dwarf.ElfModule.load(
allocator,
mapped_mem,
build_id,
expected_crc,
parent_sections,
parent_mapped_mem,
elf_filename,
);
}
}
@ -1289,22 +1105,6 @@ fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 {
}
}
fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 {
const start = math.cast(usize, offset) orelse return error.Overflow;
const end = start + (math.cast(usize, size) orelse return error.Overflow);
return ptr[start..end];
}
pub const SymbolInfo = struct {
symbol_name: []const u8 = "???",
compile_unit_name: []const u8 = "???",
line_info: ?std.debug.SourceLocation = null,
pub fn deinit(self: SymbolInfo, allocator: Allocator) void {
if (self.line_info) |li| allocator.free(li.file_name);
}
};
fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol {
var min: usize = 0;
var max: usize = symbols.len - 1;
@ -1350,26 +1150,6 @@ test machoSearchSymbols {
try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?);
}
fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo {
if (nosuspend di.findCompileUnit(address)) |compile_unit| {
return SymbolInfo{
.symbol_name = nosuspend di.getSymbolName(address) orelse "???",
.compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => "???",
},
.line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => null,
else => return err,
},
};
} else |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => {
return SymbolInfo{};
},
else => return err,
}
}
/// Unwind a frame using MachO compact unwind info (from __unwind_info).
/// If the compact encoding can't encode a way to unwind a frame, it will
/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available.

70
tools/dump-cov.zig Normal file
View File

@ -0,0 +1,70 @@
//! Reads a Zig coverage file and prints human-readable information to stdout,
//! including file:line:column information for each PC.
const std = @import("std");
const fatal = std.process.fatal;
const Path = std.Build.Cache.Path;
const assert = std.debug.assert;
pub fn main() !void {
var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .{};
defer _ = general_purpose_allocator.deinit();
const gpa = general_purpose_allocator.allocator();
var arena_instance = std.heap.ArenaAllocator.init(gpa);
defer arena_instance.deinit();
const arena = arena_instance.allocator();
const args = try std.process.argsAlloc(arena);
const exe_file_name = args[1];
const cov_file_name = args[2];
const exe_path: Path = .{
.root_dir = std.Build.Cache.Directory.cwd(),
.sub_path = exe_file_name,
};
const cov_path: Path = .{
.root_dir = std.Build.Cache.Directory.cwd(),
.sub_path = cov_file_name,
};
var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| {
fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) });
};
defer debug_info.deinit(gpa);
const cov_bytes = cov_path.root_dir.handle.readFileAlloc(arena, cov_path.sub_path, 1 << 30) catch |err| {
fatal("failed to load coverage file {}: {s}", .{ cov_path, @errorName(err) });
};
var bw = std.io.bufferedWriter(std.io.getStdOut().writer());
const stdout = bw.writer();
const header: *align(1) SeenPcsHeader = @ptrCast(cov_bytes);
try stdout.print("{any}\n", .{header.*});
//const n_bitset_elems = (header.pcs_len + 7) / 8;
const pcs_bytes = cov_bytes[@sizeOf(SeenPcsHeader)..][0 .. header.pcs_len * @sizeOf(usize)];
const pcs = try arena.alloc(usize, header.pcs_len);
for (0..pcs_bytes.len / @sizeOf(usize), pcs) |i, *pc| {
pc.* = std.mem.readInt(usize, pcs_bytes[i * @sizeOf(usize) ..][0..@sizeOf(usize)], .little);
}
assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize)));
const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len);
try debug_info.resolveSourceLocations(gpa, pcs, source_locations);
for (pcs, source_locations) |pc, sl| {
try stdout.print("{x}: {s}:{d}:{d}\n", .{
pc, sl.file_name, sl.line, sl.column,
});
}
try bw.flush();
}
const SeenPcsHeader = extern struct {
n_runs: usize,
deduplicated_runs: usize,
pcs_len: usize,
lowest_stack: usize,
};