zig/lib/std/dwarf.zig
Andrew Kelley aa852f737b improve documentation in std
A lot of these "shorthand" doc comments were redundant, low quality
filler content. Better to let the actual modules speak for themselves
with top level doc comments rather than trying to document their
aliases.
2024-03-10 18:13:30 -07:00

2808 lines
110 KiB
Zig

//! DWARF debugging data format.
const builtin = @import("builtin");
const std = @import("std.zig");
const debug = std.debug;
const mem = std.mem;
const math = std.math;
const assert = debug.assert;
const native_endian = builtin.cpu.arch.endian();
pub const TAG = @import("dwarf/TAG.zig");
pub const AT = @import("dwarf/AT.zig");
pub const OP = @import("dwarf/OP.zig");
pub const LANG = @import("dwarf/LANG.zig");
pub const FORM = @import("dwarf/FORM.zig");
pub const ATE = @import("dwarf/ATE.zig");
pub const EH = @import("dwarf/EH.zig");
pub const abi = @import("dwarf/abi.zig");
pub const call_frame = @import("dwarf/call_frame.zig");
pub const expressions = @import("dwarf/expressions.zig");
pub const LLE = struct {
pub const end_of_list = 0x00;
pub const base_addressx = 0x01;
pub const startx_endx = 0x02;
pub const startx_length = 0x03;
pub const offset_pair = 0x04;
pub const default_location = 0x05;
pub const base_address = 0x06;
pub const start_end = 0x07;
pub const start_length = 0x08;
};
pub const CFA = struct {
pub const advance_loc = 0x40;
pub const offset = 0x80;
pub const restore = 0xc0;
pub const nop = 0x00;
pub const set_loc = 0x01;
pub const advance_loc1 = 0x02;
pub const advance_loc2 = 0x03;
pub const advance_loc4 = 0x04;
pub const offset_extended = 0x05;
pub const restore_extended = 0x06;
pub const @"undefined" = 0x07;
pub const same_value = 0x08;
pub const register = 0x09;
pub const remember_state = 0x0a;
pub const restore_state = 0x0b;
pub const def_cfa = 0x0c;
pub const def_cfa_register = 0x0d;
pub const def_cfa_offset = 0x0e;
// DWARF 3.
pub const def_cfa_expression = 0x0f;
pub const expression = 0x10;
pub const offset_extended_sf = 0x11;
pub const def_cfa_sf = 0x12;
pub const def_cfa_offset_sf = 0x13;
pub const val_offset = 0x14;
pub const val_offset_sf = 0x15;
pub const val_expression = 0x16;
pub const lo_user = 0x1c;
pub const hi_user = 0x3f;
// SGI/MIPS specific.
pub const MIPS_advance_loc8 = 0x1d;
// GNU extensions.
pub const GNU_window_save = 0x2d;
pub const GNU_args_size = 0x2e;
pub const GNU_negative_offset_extended = 0x2f;
};
pub const CHILDREN = struct {
pub const no = 0x00;
pub const yes = 0x01;
};
pub const LNS = struct {
pub const extended_op = 0x00;
pub const copy = 0x01;
pub const advance_pc = 0x02;
pub const advance_line = 0x03;
pub const set_file = 0x04;
pub const set_column = 0x05;
pub const negate_stmt = 0x06;
pub const set_basic_block = 0x07;
pub const const_add_pc = 0x08;
pub const fixed_advance_pc = 0x09;
pub const set_prologue_end = 0x0a;
pub const set_epilogue_begin = 0x0b;
pub const set_isa = 0x0c;
};
pub const LNE = struct {
pub const end_sequence = 0x01;
pub const set_address = 0x02;
pub const define_file = 0x03;
pub const set_discriminator = 0x04;
pub const lo_user = 0x80;
pub const hi_user = 0xff;
};
pub const UT = struct {
pub const compile = 0x01;
pub const @"type" = 0x02;
pub const partial = 0x03;
pub const skeleton = 0x04;
pub const split_compile = 0x05;
pub const split_type = 0x06;
pub const lo_user = 0x80;
pub const hi_user = 0xff;
};
pub const LNCT = struct {
pub const path = 0x1;
pub const directory_index = 0x2;
pub const timestamp = 0x3;
pub const size = 0x4;
pub const MD5 = 0x5;
pub const lo_user = 0x2000;
pub const hi_user = 0x3fff;
};
pub const RLE = struct {
pub const end_of_list = 0x00;
pub const base_addressx = 0x01;
pub const startx_endx = 0x02;
pub const startx_length = 0x03;
pub const offset_pair = 0x04;
pub const base_address = 0x05;
pub const start_end = 0x06;
pub const start_length = 0x07;
};
pub const CC = enum(u8) {
normal = 0x1,
program = 0x2,
nocall = 0x3,
pass_by_reference = 0x4,
pass_by_value = 0x5,
GNU_renesas_sh = 0x40,
GNU_borland_fastcall_i386 = 0x41,
pub const lo_user = 0x40;
pub const hi_user = 0xff;
};
pub const Format = enum { @"32", @"64" };
const PcRange = struct {
start: u64,
end: u64,
};
const Func = struct {
pc_range: ?PcRange,
name: ?[]const u8,
};
pub const CompileUnit = struct {
version: u16,
format: Format,
die: Die,
pc_range: ?PcRange,
str_offsets_base: usize,
addr_base: usize,
rnglists_base: usize,
loclists_base: usize,
frame_base: ?*const FormValue,
};
const Abbrev = struct {
code: u64,
tag_id: u64,
has_children: bool,
attrs: []Attr,
fn deinit(abbrev: *Abbrev, allocator: mem.Allocator) void {
allocator.free(abbrev.attrs);
abbrev.* = undefined;
}
const Attr = struct {
id: u64,
form_id: u64,
/// Only valid if form_id is .implicit_const
payload: i64,
};
const Table = struct {
// offset from .debug_abbrev
offset: u64,
abbrevs: []Abbrev,
fn deinit(table: *Table, allocator: mem.Allocator) void {
for (table.abbrevs) |*abbrev| {
abbrev.deinit(allocator);
}
allocator.free(table.abbrevs);
table.* = undefined;
}
fn get(table: *const Table, abbrev_code: u64) ?*const Abbrev {
return for (table.abbrevs) |*abbrev| {
if (abbrev.code == abbrev_code) break abbrev;
} else null;
}
};
};
pub const FormValue = union(enum) {
addr: u64,
addrx: usize,
block: []const u8,
udata: u64,
data16: *const [16]u8,
sdata: i64,
exprloc: []const u8,
flag: bool,
sec_offset: u64,
ref: u64,
ref_addr: u64,
string: [:0]const u8,
strp: u64,
strx: usize,
line_strp: u64,
loclistx: u64,
rnglistx: u64,
fn getString(fv: FormValue, di: DwarfInfo) ![:0]const u8 {
switch (fv) {
.string => |s| return s,
.strp => |off| return di.getString(off),
.line_strp => |off| return di.getLineString(off),
else => return badDwarf(),
}
}
fn getUInt(fv: FormValue, comptime U: type) !U {
return switch (fv) {
inline .udata,
.sdata,
.sec_offset,
=> |c| math.cast(U, c) orelse badDwarf(),
else => badDwarf(),
};
}
};
const Die = struct {
tag_id: u64,
has_children: bool,
attrs: []Attr,
const Attr = struct {
id: u64,
value: FormValue,
};
fn deinit(self: *Die, allocator: mem.Allocator) void {
allocator.free(self.attrs);
self.* = undefined;
}
fn getAttr(self: *const Die, id: u64) ?*const FormValue {
for (self.attrs) |*attr| {
if (attr.id == id) return &attr.value;
}
return null;
}
fn getAttrAddr(
self: *const Die,
di: *const DwarfInfo,
id: u64,
compile_unit: CompileUnit,
) error{ InvalidDebugInfo, MissingDebugInfo }!u64 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
return switch (form_value.*) {
.addr => |value| value,
.addrx => |index| di.readDebugAddr(compile_unit, index),
else => error.InvalidDebugInfo,
};
}
fn getAttrSecOffset(self: *const Die, id: u64) !u64 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
return form_value.getUInt(u64);
}
fn getAttrUnsignedLe(self: *const Die, id: u64) !u64 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
return switch (form_value.*) {
.Const => |value| value.asUnsignedLe(),
else => error.InvalidDebugInfo,
};
}
fn getAttrRef(self: *const Die, id: u64) !u64 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
return switch (form_value.*) {
.ref => |value| value,
else => error.InvalidDebugInfo,
};
}
pub fn getAttrString(
self: *const Die,
di: *DwarfInfo,
id: u64,
opt_str: ?[]const u8,
compile_unit: CompileUnit,
) error{ InvalidDebugInfo, MissingDebugInfo }![]const u8 {
const form_value = self.getAttr(id) orelse return error.MissingDebugInfo;
switch (form_value.*) {
.string => |value| return value,
.strp => |offset| return di.getString(offset),
.strx => |index| {
const debug_str_offsets = di.section(.debug_str_offsets) orelse return badDwarf();
if (compile_unit.str_offsets_base == 0) return badDwarf();
switch (compile_unit.format) {
.@"32" => {
const byte_offset = compile_unit.str_offsets_base + 4 * index;
if (byte_offset + 4 > debug_str_offsets.len) return badDwarf();
const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian);
return getStringGeneric(opt_str, offset);
},
.@"64" => {
const byte_offset = compile_unit.str_offsets_base + 8 * index;
if (byte_offset + 8 > debug_str_offsets.len) return badDwarf();
const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian);
return getStringGeneric(opt_str, offset);
},
}
},
.line_strp => |offset| return di.getLineString(offset),
else => return badDwarf(),
}
}
};
const FileEntry = struct {
path: []const u8,
dir_index: u32 = 0,
mtime: u64 = 0,
size: u64 = 0,
md5: [16]u8 = [1]u8{0} ** 16,
};
const LineNumberProgram = struct {
address: u64,
file: usize,
line: i64,
column: u64,
version: u16,
is_stmt: bool,
basic_block: bool,
end_sequence: bool,
default_is_stmt: bool,
target_address: u64,
include_dirs: []const FileEntry,
prev_valid: bool,
prev_address: u64,
prev_file: usize,
prev_line: i64,
prev_column: u64,
prev_is_stmt: bool,
prev_basic_block: bool,
prev_end_sequence: bool,
// Reset the state machine following the DWARF specification
pub fn reset(self: *LineNumberProgram) void {
self.address = 0;
self.file = 1;
self.line = 1;
self.column = 0;
self.is_stmt = self.default_is_stmt;
self.basic_block = false;
self.end_sequence = false;
// Invalidate all the remaining fields
self.prev_valid = false;
self.prev_address = 0;
self.prev_file = undefined;
self.prev_line = undefined;
self.prev_column = undefined;
self.prev_is_stmt = undefined;
self.prev_basic_block = undefined;
self.prev_end_sequence = undefined;
}
pub fn init(
is_stmt: bool,
include_dirs: []const FileEntry,
target_address: u64,
version: u16,
) LineNumberProgram {
return LineNumberProgram{
.address = 0,
.file = 1,
.line = 1,
.column = 0,
.version = version,
.is_stmt = is_stmt,
.basic_block = false,
.end_sequence = false,
.include_dirs = include_dirs,
.default_is_stmt = is_stmt,
.target_address = target_address,
.prev_valid = false,
.prev_address = 0,
.prev_file = undefined,
.prev_line = undefined,
.prev_column = undefined,
.prev_is_stmt = undefined,
.prev_basic_block = undefined,
.prev_end_sequence = undefined,
};
}
pub fn checkLineMatch(
self: *LineNumberProgram,
allocator: mem.Allocator,
file_entries: []const FileEntry,
) !?debug.LineInfo {
if (self.prev_valid and
self.target_address >= self.prev_address and
self.target_address < self.address)
{
const file_index = if (self.version >= 5) self.prev_file else i: {
if (self.prev_file == 0) return missingDwarf();
break :i self.prev_file - 1;
};
if (file_index >= file_entries.len) return badDwarf();
const file_entry = &file_entries[file_index];
if (file_entry.dir_index >= self.include_dirs.len) return badDwarf();
const dir_name = self.include_dirs[file_entry.dir_index].path;
const file_name = try std.fs.path.join(allocator, &[_][]const u8{
dir_name, file_entry.path,
});
return debug.LineInfo{
.line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0,
.column = self.prev_column,
.file_name = file_name,
};
}
self.prev_valid = true;
self.prev_address = self.address;
self.prev_file = self.file;
self.prev_line = self.line;
self.prev_column = self.column;
self.prev_is_stmt = self.is_stmt;
self.prev_basic_block = self.basic_block;
self.prev_end_sequence = self.end_sequence;
return null;
}
};
const UnitHeader = struct {
format: Format,
header_length: u4,
unit_length: u64,
};
fn readUnitHeader(fbr: *FixedBufferReader) !UnitHeader {
return switch (try fbr.readInt(u32)) {
0...0xfffffff0 - 1 => |unit_length| .{
.format = .@"32",
.header_length = 4,
.unit_length = unit_length,
},
0xfffffff0...0xffffffff - 1 => badDwarf(),
0xffffffff => .{
.format = .@"64",
.header_length = 12,
.unit_length = try fbr.readInt(u64),
},
};
}
fn parseFormValue(
fbr: *FixedBufferReader,
form_id: u64,
format: Format,
implicit_const: ?i64,
) anyerror!FormValue {
return switch (form_id) {
FORM.addr => .{ .addr = try fbr.readAddress(switch (@bitSizeOf(usize)) {
32 => .@"32",
64 => .@"64",
else => @compileError("unsupported @sizeOf(usize)"),
}) },
FORM.addrx1 => .{ .addrx = try fbr.readInt(u8) },
FORM.addrx2 => .{ .addrx = try fbr.readInt(u16) },
FORM.addrx3 => .{ .addrx = try fbr.readInt(u24) },
FORM.addrx4 => .{ .addrx = try fbr.readInt(u32) },
FORM.addrx => .{ .addrx = try fbr.readUleb128(usize) },
FORM.block1,
FORM.block2,
FORM.block4,
FORM.block,
=> .{ .block = try fbr.readBytes(switch (form_id) {
FORM.block1 => try fbr.readInt(u8),
FORM.block2 => try fbr.readInt(u16),
FORM.block4 => try fbr.readInt(u32),
FORM.block => try fbr.readUleb128(usize),
else => unreachable,
}) },
FORM.data1 => .{ .udata = try fbr.readInt(u8) },
FORM.data2 => .{ .udata = try fbr.readInt(u16) },
FORM.data4 => .{ .udata = try fbr.readInt(u32) },
FORM.data8 => .{ .udata = try fbr.readInt(u64) },
FORM.data16 => .{ .data16 = (try fbr.readBytes(16))[0..16] },
FORM.udata => .{ .udata = try fbr.readUleb128(u64) },
FORM.sdata => .{ .sdata = try fbr.readIleb128(i64) },
FORM.exprloc => .{ .exprloc = try fbr.readBytes(try fbr.readUleb128(usize)) },
FORM.flag => .{ .flag = (try fbr.readByte()) != 0 },
FORM.flag_present => .{ .flag = true },
FORM.sec_offset => .{ .sec_offset = try fbr.readAddress(format) },
FORM.ref1 => .{ .ref = try fbr.readInt(u8) },
FORM.ref2 => .{ .ref = try fbr.readInt(u16) },
FORM.ref4 => .{ .ref = try fbr.readInt(u32) },
FORM.ref8 => .{ .ref = try fbr.readInt(u64) },
FORM.ref_udata => .{ .ref = try fbr.readUleb128(u64) },
FORM.ref_addr => .{ .ref_addr = try fbr.readAddress(format) },
FORM.ref_sig8 => .{ .ref = try fbr.readInt(u64) },
FORM.string => .{ .string = try fbr.readBytesTo(0) },
FORM.strp => .{ .strp = try fbr.readAddress(format) },
FORM.strx1 => .{ .strx = try fbr.readInt(u8) },
FORM.strx2 => .{ .strx = try fbr.readInt(u16) },
FORM.strx3 => .{ .strx = try fbr.readInt(u24) },
FORM.strx4 => .{ .strx = try fbr.readInt(u32) },
FORM.strx => .{ .strx = try fbr.readUleb128(usize) },
FORM.line_strp => .{ .line_strp = try fbr.readAddress(format) },
FORM.indirect => parseFormValue(fbr, try fbr.readUleb128(u64), format, implicit_const),
FORM.implicit_const => .{ .sdata = implicit_const orelse return badDwarf() },
FORM.loclistx => .{ .loclistx = try fbr.readUleb128(u64) },
FORM.rnglistx => .{ .rnglistx = try fbr.readUleb128(u64) },
else => {
//debug.print("unrecognized form id: {x}\n", .{form_id});
return badDwarf();
},
};
}
pub const DwarfSection = enum {
debug_info,
debug_abbrev,
debug_str,
debug_str_offsets,
debug_line,
debug_line_str,
debug_ranges,
debug_loclists,
debug_rnglists,
debug_addr,
debug_names,
debug_frame,
eh_frame,
eh_frame_hdr,
};
pub const DwarfInfo = struct {
pub const Section = struct {
data: []const u8,
// Module-relative virtual address.
// Only set if the section data was loaded from disk.
virtual_address: ?usize = null,
// If `data` is owned by this DwarfInfo.
owned: bool,
// For sections that are not memory mapped by the loader, this is an offset
// from `data.ptr` to where the section would have been mapped. Otherwise,
// `data` is directly backed by the section and the offset is zero.
pub fn virtualOffset(self: Section, base_address: usize) i64 {
return if (self.virtual_address) |va|
@as(i64, @intCast(base_address + va)) -
@as(i64, @intCast(@intFromPtr(self.data.ptr)))
else
0;
}
};
const num_sections = std.enums.directEnumArrayLen(DwarfSection, 0);
pub const SectionArray = [num_sections]?Section;
pub const null_section_array = [_]?Section{null} ** num_sections;
endian: std.builtin.Endian,
sections: SectionArray = null_section_array,
is_macho: bool,
// Filled later by the initializer
abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{},
compile_unit_list: std.ArrayListUnmanaged(CompileUnit) = .{},
func_list: std.ArrayListUnmanaged(Func) = .{},
eh_frame_hdr: ?ExceptionFrameHeader = null,
// These lookup tables are only used if `eh_frame_hdr` is null
cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .{},
// Sorted by start_pc
fde_list: std.ArrayListUnmanaged(FrameDescriptionEntry) = .{},
pub fn section(di: DwarfInfo, dwarf_section: DwarfSection) ?[]const u8 {
return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null;
}
pub fn sectionVirtualOffset(di: DwarfInfo, dwarf_section: DwarfSection, base_address: usize) ?i64 {
return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null;
}
pub fn deinit(di: *DwarfInfo, allocator: mem.Allocator) void {
for (di.sections) |opt_section| {
if (opt_section) |s| if (s.owned) allocator.free(s.data);
}
for (di.abbrev_table_list.items) |*abbrev| {
abbrev.deinit(allocator);
}
di.abbrev_table_list.deinit(allocator);
for (di.compile_unit_list.items) |*cu| {
cu.die.deinit(allocator);
}
di.compile_unit_list.deinit(allocator);
di.func_list.deinit(allocator);
di.cie_map.deinit(allocator);
di.fde_list.deinit(allocator);
di.* = undefined;
}
pub fn getSymbolName(di: *DwarfInfo, address: u64) ?[]const u8 {
for (di.func_list.items) |*func| {
if (func.pc_range) |range| {
if (address >= range.start and address < range.end) {
return func.name;
}
}
}
return null;
}
fn scanAllFunctions(di: *DwarfInfo, allocator: mem.Allocator) !void {
var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian };
var this_unit_offset: u64 = 0;
while (this_unit_offset < fbr.buf.len) {
try fbr.seekTo(this_unit_offset);
const unit_header = try readUnitHeader(&fbr);
if (unit_header.unit_length == 0) return;
const next_offset = unit_header.header_length + unit_header.unit_length;
const version = try fbr.readInt(u16);
if (version < 2 or version > 5) return badDwarf();
var address_size: u8 = undefined;
var debug_abbrev_offset: u64 = undefined;
if (version >= 5) {
const unit_type = try fbr.readInt(u8);
if (unit_type != UT.compile) return badDwarf();
address_size = try fbr.readByte();
debug_abbrev_offset = try fbr.readAddress(unit_header.format);
} else {
debug_abbrev_offset = try fbr.readAddress(unit_header.format);
address_size = try fbr.readByte();
}
if (address_size != @sizeOf(usize)) return badDwarf();
const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset);
var max_attrs: usize = 0;
var zig_padding_abbrev_code: u7 = 0;
for (abbrev_table.abbrevs) |abbrev| {
max_attrs = @max(max_attrs, abbrev.attrs.len);
if (math.cast(u7, abbrev.code)) |code| {
if (abbrev.tag_id == TAG.ZIG_padding and
!abbrev.has_children and
abbrev.attrs.len == 0)
{
zig_padding_abbrev_code = code;
}
}
}
const attrs_buf = try allocator.alloc(Die.Attr, max_attrs * 3);
defer allocator.free(attrs_buf);
var attrs_bufs: [3][]Die.Attr = undefined;
for (&attrs_bufs, 0..) |*buf, index| buf.* = attrs_buf[index * max_attrs ..][0..max_attrs];
const next_unit_pos = this_unit_offset + next_offset;
var compile_unit: CompileUnit = .{
.version = version,
.format = unit_header.format,
.die = undefined,
.pc_range = null,
.str_offsets_base = 0,
.addr_base = 0,
.rnglists_base = 0,
.loclists_base = 0,
.frame_base = null,
};
while (true) {
fbr.pos = mem.indexOfNonePos(u8, fbr.buf, fbr.pos, &.{
zig_padding_abbrev_code, 0,
}) orelse fbr.buf.len;
if (fbr.pos >= next_unit_pos) break;
var die_obj = (try parseDie(
&fbr,
attrs_bufs[0],
abbrev_table,
unit_header.format,
)) orelse continue;
switch (die_obj.tag_id) {
TAG.compile_unit => {
compile_unit.die = die_obj;
compile_unit.die.attrs = attrs_bufs[1][0..die_obj.attrs.len];
@memcpy(compile_unit.die.attrs, die_obj.attrs);
compile_unit.str_offsets_base = if (die_obj.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0;
compile_unit.addr_base = if (die_obj.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0;
compile_unit.rnglists_base = if (die_obj.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0;
compile_unit.loclists_base = if (die_obj.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0;
compile_unit.frame_base = die_obj.getAttr(AT.frame_base);
},
TAG.subprogram, TAG.inlined_subroutine, TAG.subroutine, TAG.entry_point => {
const fn_name = x: {
var this_die_obj = die_obj;
// Prevent endless loops
for (0..3) |_| {
if (this_die_obj.getAttr(AT.name)) |_| {
break :x try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit);
} else if (this_die_obj.getAttr(AT.abstract_origin)) |_| {
const after_die_offset = fbr.pos;
defer fbr.pos = after_die_offset;
// Follow the DIE it points to and repeat
const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin);
if (ref_offset > next_offset) return badDwarf();
try fbr.seekTo(this_unit_offset + ref_offset);
this_die_obj = (try parseDie(
&fbr,
attrs_bufs[2],
abbrev_table,
unit_header.format,
)) orelse return badDwarf();
} else if (this_die_obj.getAttr(AT.specification)) |_| {
const after_die_offset = fbr.pos;
defer fbr.pos = after_die_offset;
// Follow the DIE it points to and repeat
const ref_offset = try this_die_obj.getAttrRef(AT.specification);
if (ref_offset > next_offset) return badDwarf();
try fbr.seekTo(this_unit_offset + ref_offset);
this_die_obj = (try parseDie(
&fbr,
attrs_bufs[2],
abbrev_table,
unit_header.format,
)) orelse return badDwarf();
} else {
break :x null;
}
}
break :x null;
};
var range_added = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: {
if (die_obj.getAttr(AT.high_pc)) |high_pc_value| {
const pc_end = switch (high_pc_value.*) {
.addr => |value| value,
.udata => |offset| low_pc + offset,
else => return badDwarf(),
};
try di.func_list.append(allocator, .{
.name = fn_name,
.pc_range = .{
.start = low_pc,
.end = pc_end,
},
});
break :blk true;
}
break :blk false;
} else |err| blk: {
if (err != error.MissingDebugInfo) return err;
break :blk false;
};
if (die_obj.getAttr(AT.ranges)) |ranges_value| blk: {
var iter = DebugRangeIterator.init(ranges_value, di, &compile_unit) catch |err| {
if (err != error.MissingDebugInfo) return err;
break :blk;
};
while (try iter.next()) |range| {
range_added = true;
try di.func_list.append(allocator, .{
.name = fn_name,
.pc_range = .{
.start = range.start_addr,
.end = range.end_addr,
},
});
}
}
if (fn_name != null and !range_added) {
try di.func_list.append(allocator, .{
.name = fn_name,
.pc_range = null,
});
}
},
else => {},
}
}
this_unit_offset += next_offset;
}
}
fn scanAllCompileUnits(di: *DwarfInfo, allocator: mem.Allocator) !void {
var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian };
var this_unit_offset: u64 = 0;
var attrs_buf = std.ArrayList(Die.Attr).init(allocator);
defer attrs_buf.deinit();
while (this_unit_offset < fbr.buf.len) {
try fbr.seekTo(this_unit_offset);
const unit_header = try readUnitHeader(&fbr);
if (unit_header.unit_length == 0) return;
const next_offset = unit_header.header_length + unit_header.unit_length;
const version = try fbr.readInt(u16);
if (version < 2 or version > 5) return badDwarf();
var address_size: u8 = undefined;
var debug_abbrev_offset: u64 = undefined;
if (version >= 5) {
const unit_type = try fbr.readInt(u8);
if (unit_type != UT.compile) return badDwarf();
address_size = try fbr.readByte();
debug_abbrev_offset = try fbr.readAddress(unit_header.format);
} else {
debug_abbrev_offset = try fbr.readAddress(unit_header.format);
address_size = try fbr.readByte();
}
if (address_size != @sizeOf(usize)) return badDwarf();
const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset);
var max_attrs: usize = 0;
for (abbrev_table.abbrevs) |abbrev| {
max_attrs = @max(max_attrs, abbrev.attrs.len);
}
try attrs_buf.resize(max_attrs);
var compile_unit_die = (try parseDie(
&fbr,
attrs_buf.items,
abbrev_table,
unit_header.format,
)) orelse return badDwarf();
if (compile_unit_die.tag_id != TAG.compile_unit) return badDwarf();
compile_unit_die.attrs = try allocator.dupe(Die.Attr, compile_unit_die.attrs);
var compile_unit: CompileUnit = .{
.version = version,
.format = unit_header.format,
.pc_range = null,
.die = compile_unit_die,
.str_offsets_base = if (compile_unit_die.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0,
.addr_base = if (compile_unit_die.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0,
.rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0,
.loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0,
.frame_base = compile_unit_die.getAttr(AT.frame_base),
};
compile_unit.pc_range = x: {
if (compile_unit_die.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| {
if (compile_unit_die.getAttr(AT.high_pc)) |high_pc_value| {
const pc_end = switch (high_pc_value.*) {
.addr => |value| value,
.udata => |offset| low_pc + offset,
else => return badDwarf(),
};
break :x PcRange{
.start = low_pc,
.end = pc_end,
};
} else {
break :x null;
}
} else |err| {
if (err != error.MissingDebugInfo) return err;
break :x null;
}
};
try di.compile_unit_list.append(allocator, compile_unit);
this_unit_offset += next_offset;
}
}
const DebugRangeIterator = struct {
base_address: u64,
section_type: DwarfSection,
di: *const DwarfInfo,
compile_unit: *const CompileUnit,
fbr: FixedBufferReader,
pub fn init(ranges_value: *const FormValue, di: *const DwarfInfo, compile_unit: *const CompileUnit) !@This() {
const section_type = if (compile_unit.version >= 5) DwarfSection.debug_rnglists else DwarfSection.debug_ranges;
const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo;
const ranges_offset = switch (ranges_value.*) {
.sec_offset, .udata => |off| off,
.rnglistx => |idx| off: {
switch (compile_unit.format) {
.@"32" => {
const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx));
if (offset_loc + 4 > debug_ranges.len) return badDwarf();
const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian);
break :off compile_unit.rnglists_base + offset;
},
.@"64" => {
const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx));
if (offset_loc + 8 > debug_ranges.len) return badDwarf();
const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian);
break :off compile_unit.rnglists_base + offset;
},
}
},
else => return badDwarf(),
};
// All the addresses in the list are relative to the value
// specified by DW_AT.low_pc or to some other value encoded
// in the list itself.
// If no starting value is specified use zero.
const base_address = compile_unit.die.getAttrAddr(di, AT.low_pc, compile_unit.*) catch |err| switch (err) {
error.MissingDebugInfo => 0,
else => return err,
};
return .{
.base_address = base_address,
.section_type = section_type,
.di = di,
.compile_unit = compile_unit,
.fbr = .{
.buf = debug_ranges,
.pos = math.cast(usize, ranges_offset) orelse return badDwarf(),
.endian = di.endian,
},
};
}
// Returns the next range in the list, or null if the end was reached.
pub fn next(self: *@This()) !?struct { start_addr: u64, end_addr: u64 } {
switch (self.section_type) {
.debug_rnglists => {
const kind = try self.fbr.readByte();
switch (kind) {
RLE.end_of_list => return null,
RLE.base_addressx => {
const index = try self.fbr.readUleb128(usize);
self.base_address = try self.di.readDebugAddr(self.compile_unit.*, index);
return try self.next();
},
RLE.startx_endx => {
const start_index = try self.fbr.readUleb128(usize);
const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index);
const end_index = try self.fbr.readUleb128(usize);
const end_addr = try self.di.readDebugAddr(self.compile_unit.*, end_index);
return .{
.start_addr = start_addr,
.end_addr = end_addr,
};
},
RLE.startx_length => {
const start_index = try self.fbr.readUleb128(usize);
const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index);
const len = try self.fbr.readUleb128(usize);
const end_addr = start_addr + len;
return .{
.start_addr = start_addr,
.end_addr = end_addr,
};
},
RLE.offset_pair => {
const start_addr = try self.fbr.readUleb128(usize);
const end_addr = try self.fbr.readUleb128(usize);
// This is the only kind that uses the base address
return .{
.start_addr = self.base_address + start_addr,
.end_addr = self.base_address + end_addr,
};
},
RLE.base_address => {
self.base_address = try self.fbr.readInt(usize);
return try self.next();
},
RLE.start_end => {
const start_addr = try self.fbr.readInt(usize);
const end_addr = try self.fbr.readInt(usize);
return .{
.start_addr = start_addr,
.end_addr = end_addr,
};
},
RLE.start_length => {
const start_addr = try self.fbr.readInt(usize);
const len = try self.fbr.readUleb128(usize);
const end_addr = start_addr + len;
return .{
.start_addr = start_addr,
.end_addr = end_addr,
};
},
else => return badDwarf(),
}
},
.debug_ranges => {
const start_addr = try self.fbr.readInt(usize);
const end_addr = try self.fbr.readInt(usize);
if (start_addr == 0 and end_addr == 0) return null;
// This entry selects a new value for the base address
if (start_addr == math.maxInt(usize)) {
self.base_address = end_addr;
return try self.next();
}
return .{
.start_addr = self.base_address + start_addr,
.end_addr = self.base_address + end_addr,
};
},
else => unreachable,
}
}
};
pub fn findCompileUnit(di: *const DwarfInfo, target_address: u64) !*const CompileUnit {
for (di.compile_unit_list.items) |*compile_unit| {
if (compile_unit.pc_range) |range| {
if (target_address >= range.start and target_address < range.end) return compile_unit;
}
const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue;
var iter = DebugRangeIterator.init(ranges_value, di, compile_unit) catch continue;
while (try iter.next()) |range| {
if (target_address >= range.start_addr and target_address < range.end_addr) return compile_unit;
}
}
return missingDwarf();
}
/// Gets an already existing AbbrevTable given the abbrev_offset, or if not found,
/// seeks in the stream and parses it.
fn getAbbrevTable(di: *DwarfInfo, allocator: mem.Allocator, abbrev_offset: u64) !*const Abbrev.Table {
for (di.abbrev_table_list.items) |*table| {
if (table.offset == abbrev_offset) {
return table;
}
}
try di.abbrev_table_list.append(
allocator,
try di.parseAbbrevTable(allocator, abbrev_offset),
);
return &di.abbrev_table_list.items[di.abbrev_table_list.items.len - 1];
}
fn parseAbbrevTable(di: *DwarfInfo, allocator: mem.Allocator, offset: u64) !Abbrev.Table {
var fbr: FixedBufferReader = .{
.buf = di.section(.debug_abbrev).?,
.pos = math.cast(usize, offset) orelse return badDwarf(),
.endian = di.endian,
};
var abbrevs = std.ArrayList(Abbrev).init(allocator);
defer {
for (abbrevs.items) |*abbrev| {
abbrev.deinit(allocator);
}
abbrevs.deinit();
}
var attrs = std.ArrayList(Abbrev.Attr).init(allocator);
defer attrs.deinit();
while (true) {
const code = try fbr.readUleb128(u64);
if (code == 0) break;
const tag_id = try fbr.readUleb128(u64);
const has_children = (try fbr.readByte()) == CHILDREN.yes;
while (true) {
const attr_id = try fbr.readUleb128(u64);
const form_id = try fbr.readUleb128(u64);
if (attr_id == 0 and form_id == 0) break;
try attrs.append(.{
.id = attr_id,
.form_id = form_id,
.payload = switch (form_id) {
FORM.implicit_const => try fbr.readIleb128(i64),
else => undefined,
},
});
}
try abbrevs.append(.{
.code = code,
.tag_id = tag_id,
.has_children = has_children,
.attrs = try attrs.toOwnedSlice(),
});
}
return .{
.offset = offset,
.abbrevs = try abbrevs.toOwnedSlice(),
};
}
fn parseDie(
fbr: *FixedBufferReader,
attrs_buf: []Die.Attr,
abbrev_table: *const Abbrev.Table,
format: Format,
) !?Die {
const abbrev_code = try fbr.readUleb128(u64);
if (abbrev_code == 0) return null;
const table_entry = abbrev_table.get(abbrev_code) orelse return badDwarf();
const attrs = attrs_buf[0..table_entry.attrs.len];
for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = Die.Attr{
.id = attr.id,
.value = try parseFormValue(
fbr,
attr.form_id,
format,
attr.payload,
),
};
return .{
.tag_id = table_entry.tag_id,
.has_children = table_entry.has_children,
.attrs = attrs,
};
}
pub fn getLineNumberInfo(
di: *DwarfInfo,
allocator: mem.Allocator,
compile_unit: CompileUnit,
target_address: u64,
) !debug.LineInfo {
const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit);
const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list);
var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian };
try fbr.seekTo(line_info_offset);
const unit_header = try readUnitHeader(&fbr);
if (unit_header.unit_length == 0) return missingDwarf();
const next_offset = unit_header.header_length + unit_header.unit_length;
const version = try fbr.readInt(u16);
if (version < 2) return badDwarf();
var addr_size: u8 = switch (unit_header.format) {
.@"32" => 4,
.@"64" => 8,
};
var seg_size: u8 = 0;
if (version >= 5) {
addr_size = try fbr.readByte();
seg_size = try fbr.readByte();
}
const prologue_length = try fbr.readAddress(unit_header.format);
const prog_start_offset = fbr.pos + prologue_length;
const minimum_instruction_length = try fbr.readByte();
if (minimum_instruction_length == 0) return badDwarf();
if (version >= 4) {
// maximum_operations_per_instruction
_ = try fbr.readByte();
}
const default_is_stmt = (try fbr.readByte()) != 0;
const line_base = try fbr.readByteSigned();
const line_range = try fbr.readByte();
if (line_range == 0) return badDwarf();
const opcode_base = try fbr.readByte();
const standard_opcode_lengths = try allocator.alloc(u8, opcode_base - 1);
defer allocator.free(standard_opcode_lengths);
{
var i: usize = 0;
while (i < opcode_base - 1) : (i += 1) {
standard_opcode_lengths[i] = try fbr.readByte();
}
}
var include_directories = std.ArrayList(FileEntry).init(allocator);
defer include_directories.deinit();
var file_entries = std.ArrayList(FileEntry).init(allocator);
defer file_entries.deinit();
if (version < 5) {
try include_directories.append(.{ .path = compile_unit_cwd });
while (true) {
const dir = try fbr.readBytesTo(0);
if (dir.len == 0) break;
try include_directories.append(.{ .path = dir });
}
while (true) {
const file_name = try fbr.readBytesTo(0);
if (file_name.len == 0) break;
const dir_index = try fbr.readUleb128(u32);
const mtime = try fbr.readUleb128(u64);
const size = try fbr.readUleb128(u64);
try file_entries.append(.{
.path = file_name,
.dir_index = dir_index,
.mtime = mtime,
.size = size,
});
}
} else {
const FileEntFmt = struct {
content_type_code: u8,
form_code: u16,
};
{
var dir_ent_fmt_buf: [10]FileEntFmt = undefined;
const directory_entry_format_count = try fbr.readByte();
if (directory_entry_format_count > dir_ent_fmt_buf.len) return badDwarf();
for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| {
ent_fmt.* = .{
.content_type_code = try fbr.readUleb128(u8),
.form_code = try fbr.readUleb128(u16),
};
}
const directories_count = try fbr.readUleb128(usize);
try include_directories.ensureUnusedCapacity(directories_count);
{
var i: usize = 0;
while (i < directories_count) : (i += 1) {
var e: FileEntry = .{ .path = &.{} };
for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| {
const form_value = try parseFormValue(
&fbr,
ent_fmt.form_code,
unit_header.format,
null,
);
switch (ent_fmt.content_type_code) {
LNCT.path => e.path = try form_value.getString(di.*),
LNCT.directory_index => e.dir_index = try form_value.getUInt(u32),
LNCT.timestamp => e.mtime = try form_value.getUInt(u64),
LNCT.size => e.size = try form_value.getUInt(u64),
LNCT.MD5 => e.md5 = switch (form_value) {
.data16 => |data16| data16.*,
else => return badDwarf(),
},
else => continue,
}
}
include_directories.appendAssumeCapacity(e);
}
}
}
var file_ent_fmt_buf: [10]FileEntFmt = undefined;
const file_name_entry_format_count = try fbr.readByte();
if (file_name_entry_format_count > file_ent_fmt_buf.len) return badDwarf();
for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| {
ent_fmt.* = .{
.content_type_code = try fbr.readUleb128(u8),
.form_code = try fbr.readUleb128(u16),
};
}
const file_names_count = try fbr.readUleb128(usize);
try file_entries.ensureUnusedCapacity(file_names_count);
{
var i: usize = 0;
while (i < file_names_count) : (i += 1) {
var e: FileEntry = .{ .path = &.{} };
for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| {
const form_value = try parseFormValue(
&fbr,
ent_fmt.form_code,
unit_header.format,
null,
);
switch (ent_fmt.content_type_code) {
LNCT.path => e.path = try form_value.getString(di.*),
LNCT.directory_index => e.dir_index = try form_value.getUInt(u32),
LNCT.timestamp => e.mtime = try form_value.getUInt(u64),
LNCT.size => e.size = try form_value.getUInt(u64),
LNCT.MD5 => e.md5 = switch (form_value) {
.data16 => |data16| data16.*,
else => return badDwarf(),
},
else => continue,
}
}
file_entries.appendAssumeCapacity(e);
}
}
}
var prog = LineNumberProgram.init(
default_is_stmt,
include_directories.items,
target_address,
version,
);
try fbr.seekTo(prog_start_offset);
const next_unit_pos = line_info_offset + next_offset;
while (fbr.pos < next_unit_pos) {
const opcode = try fbr.readByte();
if (opcode == LNS.extended_op) {
const op_size = try fbr.readUleb128(u64);
if (op_size < 1) return badDwarf();
const sub_op = try fbr.readByte();
switch (sub_op) {
LNE.end_sequence => {
prog.end_sequence = true;
if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info;
prog.reset();
},
LNE.set_address => {
const addr = try fbr.readInt(usize);
prog.address = addr;
},
LNE.define_file => {
const path = try fbr.readBytesTo(0);
const dir_index = try fbr.readUleb128(u32);
const mtime = try fbr.readUleb128(u64);
const size = try fbr.readUleb128(u64);
try file_entries.append(.{
.path = path,
.dir_index = dir_index,
.mtime = mtime,
.size = size,
});
},
else => try fbr.seekForward(op_size - 1),
}
} else if (opcode >= opcode_base) {
// special opcodes
const adjusted_opcode = opcode - opcode_base;
const inc_addr = minimum_instruction_length * (adjusted_opcode / line_range);
const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range);
prog.line += inc_line;
prog.address += inc_addr;
if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info;
prog.basic_block = false;
} else {
switch (opcode) {
LNS.copy => {
if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info;
prog.basic_block = false;
},
LNS.advance_pc => {
const arg = try fbr.readUleb128(usize);
prog.address += arg * minimum_instruction_length;
},
LNS.advance_line => {
const arg = try fbr.readIleb128(i64);
prog.line += arg;
},
LNS.set_file => {
const arg = try fbr.readUleb128(usize);
prog.file = arg;
},
LNS.set_column => {
const arg = try fbr.readUleb128(u64);
prog.column = arg;
},
LNS.negate_stmt => {
prog.is_stmt = !prog.is_stmt;
},
LNS.set_basic_block => {
prog.basic_block = true;
},
LNS.const_add_pc => {
const inc_addr = minimum_instruction_length * ((255 - opcode_base) / line_range);
prog.address += inc_addr;
},
LNS.fixed_advance_pc => {
const arg = try fbr.readInt(u16);
prog.address += arg;
},
LNS.set_prologue_end => {},
else => {
if (opcode - 1 >= standard_opcode_lengths.len) return badDwarf();
try fbr.seekForward(standard_opcode_lengths[opcode - 1]);
},
}
}
}
return missingDwarf();
}
fn getString(di: DwarfInfo, offset: u64) ![:0]const u8 {
return getStringGeneric(di.section(.debug_str), offset);
}
fn getLineString(di: DwarfInfo, offset: u64) ![:0]const u8 {
return getStringGeneric(di.section(.debug_line_str), offset);
}
fn readDebugAddr(di: DwarfInfo, compile_unit: CompileUnit, index: u64) !u64 {
const debug_addr = di.section(.debug_addr) orelse return badDwarf();
// addr_base points to the first item after the header, however we
// need to read the header to know the size of each item. Empirically,
// it may disagree with is_64 on the compile unit.
// The header is 8 or 12 bytes depending on is_64.
if (compile_unit.addr_base < 8) return badDwarf();
const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian);
if (version != 5) return badDwarf();
const addr_size = debug_addr[compile_unit.addr_base - 2];
const seg_size = debug_addr[compile_unit.addr_base - 1];
const byte_offset = @as(usize, @intCast(compile_unit.addr_base + (addr_size + seg_size) * index));
if (byte_offset + addr_size > debug_addr.len) return badDwarf();
return switch (addr_size) {
1 => debug_addr[byte_offset],
2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian),
4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian),
8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian),
else => badDwarf(),
};
}
/// If .eh_frame_hdr is present, then only the header needs to be parsed.
///
/// Otherwise, .eh_frame and .debug_frame are scanned and a sorted list
/// of FDEs is built for binary searching during unwinding.
pub fn scanAllUnwindInfo(di: *DwarfInfo, allocator: mem.Allocator, base_address: usize) !void {
if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: {
var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian };
const version = try fbr.readByte();
if (version != 1) break :blk;
const eh_frame_ptr_enc = try fbr.readByte();
if (eh_frame_ptr_enc == EH.PE.omit) break :blk;
const fde_count_enc = try fbr.readByte();
if (fde_count_enc == EH.PE.omit) break :blk;
const table_enc = try fbr.readByte();
if (table_enc == EH.PE.omit) break :blk;
const eh_frame_ptr = math.cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{
.pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]),
.follow_indirect = true,
}) orelse return badDwarf()) orelse return badDwarf();
const fde_count = math.cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{
.pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]),
.follow_indirect = true,
}) orelse return badDwarf()) orelse return badDwarf();
const entry_size = try ExceptionFrameHeader.entrySize(table_enc);
const entries_len = fde_count * entry_size;
if (entries_len > eh_frame_hdr.len - fbr.pos) return badDwarf();
di.eh_frame_hdr = .{
.eh_frame_ptr = eh_frame_ptr,
.table_enc = table_enc,
.fde_count = fde_count,
.entries = eh_frame_hdr[fbr.pos..][0..entries_len],
};
// No need to scan .eh_frame, we have a binary search table already
return;
}
const frame_sections = [2]DwarfSection{ .eh_frame, .debug_frame };
for (frame_sections) |frame_section| {
if (di.section(frame_section)) |section_data| {
var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian };
while (fbr.pos < fbr.buf.len) {
const entry_header = try EntryHeader.read(&fbr, frame_section);
switch (entry_header.type) {
.cie => {
const cie = try CommonInformationEntry.parse(
entry_header.entry_bytes,
di.sectionVirtualOffset(frame_section, base_address).?,
true,
entry_header.format,
frame_section,
entry_header.length_offset,
@sizeOf(usize),
di.endian,
);
try di.cie_map.put(allocator, entry_header.length_offset, cie);
},
.fde => |cie_offset| {
const cie = di.cie_map.get(cie_offset) orelse return badDwarf();
const fde = try FrameDescriptionEntry.parse(
entry_header.entry_bytes,
di.sectionVirtualOffset(frame_section, base_address).?,
true,
cie,
@sizeOf(usize),
di.endian,
);
try di.fde_list.append(allocator, fde);
},
.terminator => break,
}
}
mem.sortUnstable(FrameDescriptionEntry, di.fde_list.items, {}, struct {
fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool {
_ = ctx;
return a.pc_begin < b.pc_begin;
}
}.lessThan);
}
}
}
/// Unwind a stack frame using DWARF unwinding info, updating the register context.
///
/// If `.eh_frame_hdr` is available, it will be used to binary search for the FDE.
/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE.
///
/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info
/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section.
pub fn unwindFrame(di: *const DwarfInfo, context: *UnwindContext, explicit_fde_offset: ?usize) !usize {
if (!comptime abi.supportsUnwinding(builtin.target)) return error.UnsupportedCpuArchitecture;
if (context.pc == 0) return 0;
// Find the FDE and CIE
var cie: CommonInformationEntry = undefined;
var fde: FrameDescriptionEntry = undefined;
if (explicit_fde_offset) |fde_offset| {
const dwarf_section: DwarfSection = .eh_frame;
const frame_section = di.section(dwarf_section) orelse return error.MissingFDE;
if (fde_offset >= frame_section.len) return error.MissingFDE;
var fbr: FixedBufferReader = .{
.buf = frame_section,
.pos = fde_offset,
.endian = di.endian,
};
const fde_entry_header = try EntryHeader.read(&fbr, dwarf_section);
if (fde_entry_header.type != .fde) return error.MissingFDE;
const cie_offset = fde_entry_header.type.fde;
try fbr.seekTo(cie_offset);
fbr.endian = native_endian;
const cie_entry_header = try EntryHeader.read(&fbr, dwarf_section);
if (cie_entry_header.type != .cie) return badDwarf();
cie = try CommonInformationEntry.parse(
cie_entry_header.entry_bytes,
0,
true,
cie_entry_header.format,
dwarf_section,
cie_entry_header.length_offset,
@sizeOf(usize),
native_endian,
);
fde = try FrameDescriptionEntry.parse(
fde_entry_header.entry_bytes,
0,
true,
cie,
@sizeOf(usize),
native_endian,
);
} else if (di.eh_frame_hdr) |header| {
const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null;
try header.findEntry(
context.isValidMemory,
eh_frame_len,
@intFromPtr(di.section(.eh_frame_hdr).?.ptr),
context.pc,
&cie,
&fde,
);
} else {
const index = std.sort.binarySearch(FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct {
pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) math.Order {
if (pc < mid_item.pc_begin) return .lt;
const range_end = mid_item.pc_begin + mid_item.pc_range;
if (pc < range_end) return .eq;
return .gt;
}
}.compareFn);
fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE;
cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE;
}
var expression_context: expressions.ExpressionContext = .{
.format = cie.format,
.isValidMemory = context.isValidMemory,
.compile_unit = di.findCompileUnit(fde.pc_begin) catch null,
.thread_context = context.thread_context,
.reg_context = context.reg_context,
.cfa = context.cfa,
};
context.vm.reset();
context.reg_context.eh_frame = cie.version != 4;
context.reg_context.is_macho = di.is_macho;
const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde);
context.cfa = switch (row.cfa.rule) {
.val_offset => |offset| blk: {
const register = row.cfa.register orelse return error.InvalidCFARule;
const value = mem.readInt(usize, (try abi.regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian);
break :blk try call_frame.applyOffset(value, offset);
},
.expression => |expression| blk: {
context.stack_machine.reset();
const value = try context.stack_machine.run(
expression,
context.allocator,
expression_context,
context.cfa,
);
if (value) |v| {
if (v != .generic) return error.InvalidExpressionValue;
break :blk v.generic;
} else return error.NoExpressionValue;
},
else => return error.InvalidCFARule,
};
if (!context.isValidMemory(context.cfa.?)) return error.InvalidCFA;
expression_context.cfa = context.cfa;
// Buffering the modifications is done because copying the thread context is not portable,
// some implementations (ie. darwin) use internal pointers to the mcontext.
var arena = std.heap.ArenaAllocator.init(context.allocator);
defer arena.deinit();
const update_allocator = arena.allocator();
const RegisterUpdate = struct {
// Backed by thread_context
dest: []u8,
// Backed by arena
src: []const u8,
prev: ?*@This(),
};
var update_tail: ?*RegisterUpdate = null;
var has_return_address = true;
for (context.vm.rowColumns(row)) |column| {
if (column.register) |register| {
if (register == cie.return_address_register) {
has_return_address = column.rule != .undefined;
}
const dest = try abi.regBytes(context.thread_context, register, context.reg_context);
const src = try update_allocator.alloc(u8, dest.len);
const prev = update_tail;
update_tail = try update_allocator.create(RegisterUpdate);
update_tail.?.* = .{
.dest = dest,
.src = src,
.prev = prev,
};
try column.resolveValue(
context,
expression_context,
src,
);
}
}
// On all implemented architectures, the CFA is defined as being the previous frame's SP
(try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?;
while (update_tail) |tail| {
@memcpy(tail.dest, tail.src);
update_tail = tail.prev;
}
if (has_return_address) {
context.pc = abi.stripInstructionPtrAuthCode(mem.readInt(usize, (try abi.regBytes(
context.thread_context,
cie.return_address_register,
context.reg_context,
))[0..@sizeOf(usize)], native_endian));
} else {
context.pc = 0;
}
(try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc;
// The call instruction will have pushed the address of the instruction that follows the call as the return address.
// This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in
// the function was the call). If we were to look up an FDE entry using the return address directly, it could end up
// either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this,
// we subtract one so that the next lookup is guaranteed to land inside the
//
// The exception to this rule is signal frames, where we return execution would be returned to the instruction
// that triggered the handler.
const return_address = context.pc;
if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1;
return return_address;
}
};
/// Returns the DWARF register number for an x86_64 register number found in compact unwind info
fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 {
return switch (unwind_reg_number) {
1 => 3, // RBX
2 => 12, // R12
3 => 13, // R13
4 => 14, // R14
5 => 15, // R15
6 => 6, // RBP
else => error.InvalidUnwindRegisterNumber,
};
}
const macho = std.macho;
/// Unwind a frame using MachO compact unwind info (from __unwind_info).
/// If the compact encoding can't encode a way to unwind a frame, it will
/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available.
pub fn unwindFrameMachO(context: *UnwindContext, unwind_info: []const u8, eh_frame: ?[]const u8, module_base_address: usize) !usize {
const header = mem.bytesAsValue(
macho.unwind_info_section_header,
unwind_info[0..@sizeOf(macho.unwind_info_section_header)],
);
const indices = mem.bytesAsSlice(
macho.unwind_info_section_header_index_entry,
unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)],
);
if (indices.len == 0) return error.MissingUnwindInfo;
const mapped_pc = context.pc - module_base_address;
const second_level_index = blk: {
var left: usize = 0;
var len: usize = indices.len;
while (len > 1) {
const mid = left + len / 2;
const offset = indices[mid].functionOffset;
if (mapped_pc < offset) {
len /= 2;
} else {
left = mid;
if (mapped_pc == offset) break;
len -= len / 2;
}
}
// Last index is a sentinel containing the highest address as its functionOffset
if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo;
break :blk &indices[left];
};
const common_encodings = mem.bytesAsSlice(
macho.compact_unwind_encoding_t,
unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)],
);
const start_offset = second_level_index.secondLevelPagesSectionOffset;
const kind = mem.bytesAsValue(
macho.UNWIND_SECOND_LEVEL,
unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)],
);
const entry: struct {
function_offset: usize,
raw_encoding: u32,
} = switch (kind.*) {
.REGULAR => blk: {
const page_header = mem.bytesAsValue(
macho.unwind_info_regular_second_level_page_header,
unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)],
);
const entries = mem.bytesAsSlice(
macho.unwind_info_regular_second_level_entry,
unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)],
);
if (entries.len == 0) return error.InvalidUnwindInfo;
var left: usize = 0;
var len: usize = entries.len;
while (len > 1) {
const mid = left + len / 2;
const offset = entries[mid].functionOffset;
if (mapped_pc < offset) {
len /= 2;
} else {
left = mid;
if (mapped_pc == offset) break;
len -= len / 2;
}
}
break :blk .{
.function_offset = entries[left].functionOffset,
.raw_encoding = entries[left].encoding,
};
},
.COMPRESSED => blk: {
const page_header = mem.bytesAsValue(
macho.unwind_info_compressed_second_level_page_header,
unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)],
);
const entries = mem.bytesAsSlice(
macho.UnwindInfoCompressedEntry,
unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)],
);
if (entries.len == 0) return error.InvalidUnwindInfo;
var left: usize = 0;
var len: usize = entries.len;
while (len > 1) {
const mid = left + len / 2;
const offset = second_level_index.functionOffset + entries[mid].funcOffset;
if (mapped_pc < offset) {
len /= 2;
} else {
left = mid;
if (mapped_pc == offset) break;
len -= len / 2;
}
}
const entry = entries[left];
const function_offset = second_level_index.functionOffset + entry.funcOffset;
if (entry.encodingIndex < header.commonEncodingsArrayCount) {
if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo;
break :blk .{
.function_offset = function_offset,
.raw_encoding = common_encodings[entry.encodingIndex],
};
} else {
const local_index = try math.sub(
u8,
entry.encodingIndex,
math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo,
);
const local_encodings = mem.bytesAsSlice(
macho.compact_unwind_encoding_t,
unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)],
);
if (local_index >= local_encodings.len) return error.InvalidUnwindInfo;
break :blk .{
.function_offset = function_offset,
.raw_encoding = local_encodings[local_index],
};
}
},
else => return error.InvalidUnwindInfo,
};
if (entry.raw_encoding == 0) return error.NoUnwindInfo;
const reg_context = abi.RegisterContext{
.eh_frame = false,
.is_macho = true,
};
const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding);
const new_ip = switch (builtin.cpu.arch) {
.x86_64 => switch (encoding.mode.x86_64) {
.OLD => return error.UnimplementedUnwindEncoding,
.RBP_FRAME => blk: {
const regs: [5]u3 = .{
encoding.value.x86_64.frame.reg0,
encoding.value.x86_64.frame.reg1,
encoding.value.x86_64.frame.reg2,
encoding.value.x86_64.frame.reg3,
encoding.value.x86_64.frame.reg4,
};
const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize);
var max_reg: usize = 0;
inline for (regs, 0..) |reg, i| {
if (reg > 0) max_reg = i;
}
const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*;
const new_sp = fp + 2 * @sizeOf(usize);
// Verify the stack range we're about to read register values from
if (!context.isValidMemory(new_sp) or !context.isValidMemory(fp - frame_offset + max_reg * @sizeOf(usize))) return error.InvalidUnwindInfo;
const ip_ptr = fp + @sizeOf(usize);
const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
const new_fp = @as(*const usize, @ptrFromInt(fp)).*;
(try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp;
(try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp;
(try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip;
for (regs, 0..) |reg, i| {
if (reg == 0) continue;
const addr = fp - frame_offset + i * @sizeOf(usize);
const reg_number = try compactUnwindToDwarfRegNumber(reg);
(try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*;
}
break :blk new_ip;
},
.STACK_IMMD,
.STACK_IND,
=> blk: {
const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*;
const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD)
@as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize)
else stack_size: {
// In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function.
const sub_offset_addr =
module_base_address +
entry.function_offset +
encoding.value.x86_64.frameless.stack.indirect.sub_offset;
if (!context.isValidMemory(sub_offset_addr)) return error.InvalidUnwindInfo;
// `sub_offset_addr` points to the offset of the literal within the instruction
const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*;
break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust);
};
// Decode the Lehmer-coded sequence of registers.
// For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h
// Decode the variable-based permutation number into its digits. Each digit represents
// an index into the list of register numbers that weren't yet used in the sequence at
// the time the digit was added.
const reg_count = encoding.value.x86_64.frameless.stack_reg_count;
const ip_ptr = if (reg_count > 0) reg_blk: {
var digits: [6]u3 = undefined;
var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation;
var base: usize = 2;
for (0..reg_count) |i| {
const div = accumulator / base;
digits[digits.len - 1 - i] = @intCast(accumulator - base * div);
accumulator = div;
base += 1;
}
const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 };
var registers: [reg_numbers.len]u3 = undefined;
var used_indices = [_]bool{false} ** reg_numbers.len;
for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| {
var unused_count: u8 = 0;
const unused_index = for (used_indices, 0..) |used, index| {
if (!used) {
if (target_unused_index == unused_count) break index;
unused_count += 1;
}
} else unreachable;
registers[i] = reg_numbers[unused_index];
used_indices[unused_index] = true;
}
var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1);
if (!context.isValidMemory(reg_addr)) return error.InvalidUnwindInfo;
for (0..reg_count) |i| {
const reg_number = try compactUnwindToDwarfRegNumber(registers[i]);
(try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*;
reg_addr += @sizeOf(usize);
}
break :reg_blk reg_addr;
} else sp + stack_size - @sizeOf(usize);
const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
const new_sp = ip_ptr + @sizeOf(usize);
if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo;
(try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp;
(try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip;
break :blk new_ip;
},
.DWARF => {
return unwindFrameMachODwarf(context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf));
},
},
.aarch64 => switch (encoding.mode.arm64) {
.OLD => return error.UnimplementedUnwindEncoding,
.FRAMELESS => blk: {
const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*;
const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16;
const new_ip = (try abi.regValueNative(usize, context.thread_context, 30, reg_context)).*;
if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo;
(try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp;
break :blk new_ip;
},
.DWARF => {
return unwindFrameMachODwarf(context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf));
},
.FRAME => blk: {
const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*;
const new_sp = fp + 16;
const ip_ptr = fp + @sizeOf(usize);
const num_restored_pairs: usize =
@popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) +
@popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs)));
const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize);
if (!context.isValidMemory(new_sp) or !context.isValidMemory(min_reg_addr)) return error.InvalidUnwindInfo;
var reg_addr = fp - @sizeOf(usize);
inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| {
if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) {
(try abi.regValueNative(usize, context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*;
reg_addr += @sizeOf(usize);
(try abi.regValueNative(usize, context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*;
reg_addr += @sizeOf(usize);
}
}
inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| {
if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) {
// Only the lower half of the 128-bit V registers are restored during unwinding
@memcpy(
try abi.regBytes(context.thread_context, 64 + 8 + i, context.reg_context),
mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))),
);
reg_addr += @sizeOf(usize);
@memcpy(
try abi.regBytes(context.thread_context, 64 + 9 + i, context.reg_context),
mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))),
);
reg_addr += @sizeOf(usize);
}
}
const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
const new_fp = @as(*const usize, @ptrFromInt(fp)).*;
(try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp;
(try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip;
break :blk new_ip;
},
},
else => return error.UnimplementedArch,
};
context.pc = abi.stripInstructionPtrAuthCode(new_ip);
if (context.pc > 0) context.pc -= 1;
return new_ip;
}
fn unwindFrameMachODwarf(context: *UnwindContext, eh_frame: []const u8, fde_offset: usize) !usize {
var di = DwarfInfo{
.endian = native_endian,
.is_macho = true,
};
defer di.deinit(context.allocator);
di.sections[@intFromEnum(DwarfSection.eh_frame)] = .{
.data = eh_frame,
.owned = false,
};
return di.unwindFrame(context, fde_offset);
}
pub const UnwindContext = struct {
allocator: mem.Allocator,
cfa: ?usize,
pc: usize,
thread_context: *debug.ThreadContext,
reg_context: abi.RegisterContext,
isValidMemory: *const fn (address: usize) bool,
vm: call_frame.VirtualMachine,
stack_machine: expressions.StackMachine(.{ .call_frame_context = true }),
pub fn init(allocator: mem.Allocator, thread_context: *const debug.ThreadContext, isValidMemory: *const fn (address: usize) bool) !UnwindContext {
const pc = abi.stripInstructionPtrAuthCode((try abi.regValueNative(usize, thread_context, abi.ipRegNum(), null)).*);
const context_copy = try allocator.create(debug.ThreadContext);
debug.copyContext(thread_context, context_copy);
return .{
.allocator = allocator,
.cfa = null,
.pc = pc,
.thread_context = context_copy,
.reg_context = undefined,
.isValidMemory = isValidMemory,
.vm = .{},
.stack_machine = .{},
};
}
pub fn deinit(self: *UnwindContext) void {
self.vm.deinit(self.allocator);
self.stack_machine.deinit(self.allocator);
self.allocator.destroy(self.thread_context);
self.* = undefined;
}
pub fn getFp(self: *const UnwindContext) !usize {
return (try abi.regValueNative(usize, self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)).*;
}
};
/// Initialize DWARF info. The caller has the responsibility to initialize most
/// the DwarfInfo fields before calling. `binary_mem` is the raw bytes of the
/// main binary file (not the secondary debug info file).
pub fn openDwarfDebugInfo(di: *DwarfInfo, allocator: mem.Allocator) !void {
try di.scanAllFunctions(allocator);
try di.scanAllCompileUnits(allocator);
}
/// This function is to make it handy to comment out the return and make it
/// into a crash when working on this file.
fn badDwarf() error{InvalidDebugInfo} {
//std.os.abort(); // can be handy to uncomment when working on this file
return error.InvalidDebugInfo;
}
fn missingDwarf() error{MissingDebugInfo} {
//std.os.abort(); // can be handy to uncomment when working on this file
return error.MissingDebugInfo;
}
fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 {
const str = opt_str orelse return badDwarf();
if (offset > str.len) return badDwarf();
const casted_offset = math.cast(usize, offset) orelse return badDwarf();
// Valid strings always have a terminating zero byte
const last = mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return badDwarf();
return str[casted_offset..last :0];
}
const EhPointerContext = struct {
// The address of the pointer field itself
pc_rel_base: u64,
// Whether or not to follow indirect pointers. This should only be
// used when decoding pointers at runtime using the current process's
// debug info
follow_indirect: bool,
// These relative addressing modes are only used in specific cases, and
// might not be available / required in all parsing contexts
data_rel_base: ?u64 = null,
text_rel_base: ?u64 = null,
function_rel_base: ?u64 = null,
};
fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 {
if (enc == EH.PE.omit) return null;
const value: union(enum) {
signed: i64,
unsigned: u64,
} = switch (enc & EH.PE.type_mask) {
EH.PE.absptr => .{
.unsigned = switch (addr_size_bytes) {
2 => try fbr.readInt(u16),
4 => try fbr.readInt(u32),
8 => try fbr.readInt(u64),
else => return error.InvalidAddrSize,
},
},
EH.PE.uleb128 => .{ .unsigned = try fbr.readUleb128(u64) },
EH.PE.udata2 => .{ .unsigned = try fbr.readInt(u16) },
EH.PE.udata4 => .{ .unsigned = try fbr.readInt(u32) },
EH.PE.udata8 => .{ .unsigned = try fbr.readInt(u64) },
EH.PE.sleb128 => .{ .signed = try fbr.readIleb128(i64) },
EH.PE.sdata2 => .{ .signed = try fbr.readInt(i16) },
EH.PE.sdata4 => .{ .signed = try fbr.readInt(i32) },
EH.PE.sdata8 => .{ .signed = try fbr.readInt(i64) },
else => return badDwarf(),
};
const base = switch (enc & EH.PE.rel_mask) {
EH.PE.pcrel => ctx.pc_rel_base,
EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified,
EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified,
EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified,
else => null,
};
const ptr: u64 = if (base) |b| switch (value) {
.signed => |s| @intCast(try math.add(i64, s, @as(i64, @intCast(b)))),
// absptr can actually contain signed values in some cases (aarch64 MachO)
.unsigned => |u| u +% b,
} else switch (value) {
.signed => |s| @as(u64, @intCast(s)),
.unsigned => |u| u,
};
if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) {
if (@sizeOf(usize) != addr_size_bytes) {
// See the documentation for `follow_indirect`
return error.NonNativeIndirection;
}
const native_ptr = math.cast(usize, ptr) orelse return error.PointerOverflow;
return switch (addr_size_bytes) {
2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*,
else => return error.UnsupportedAddrSize,
};
} else {
return ptr;
}
}
/// This represents the decoded .eh_frame_hdr header
pub const ExceptionFrameHeader = struct {
eh_frame_ptr: usize,
table_enc: u8,
fde_count: usize,
entries: []const u8,
pub fn entrySize(table_enc: u8) !u8 {
return switch (table_enc & EH.PE.type_mask) {
EH.PE.udata2,
EH.PE.sdata2,
=> 4,
EH.PE.udata4,
EH.PE.sdata4,
=> 8,
EH.PE.udata8,
EH.PE.sdata8,
=> 16,
// This is a binary search table, so all entries must be the same length
else => return badDwarf(),
};
}
fn isValidPtr(
self: ExceptionFrameHeader,
ptr: usize,
isValidMemory: *const fn (address: usize) bool,
eh_frame_len: ?usize,
) bool {
if (eh_frame_len) |len| {
return ptr >= self.eh_frame_ptr and ptr < self.eh_frame_ptr + len;
} else {
return isValidMemory(ptr);
}
}
/// Find an entry by binary searching the eh_frame_hdr section.
///
/// Since the length of the eh_frame section (`eh_frame_len`) may not be known by the caller,
/// `isValidMemory` will be called before accessing any memory referenced by
/// the header entries. If `eh_frame_len` is provided, then these checks can be skipped.
pub fn findEntry(
self: ExceptionFrameHeader,
isValidMemory: *const fn (address: usize) bool,
eh_frame_len: ?usize,
eh_frame_hdr_ptr: usize,
pc: usize,
cie: *CommonInformationEntry,
fde: *FrameDescriptionEntry,
) !void {
const entry_size = try entrySize(self.table_enc);
var left: usize = 0;
var len: usize = self.fde_count;
var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian };
while (len > 1) {
const mid = left + len / 2;
fbr.pos = mid * entry_size;
const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{
.pc_rel_base = @intFromPtr(&self.entries[fbr.pos]),
.follow_indirect = true,
.data_rel_base = eh_frame_hdr_ptr,
}) orelse return badDwarf();
if (pc < pc_begin) {
len /= 2;
} else {
left = mid;
if (pc == pc_begin) break;
len -= len / 2;
}
}
if (len == 0) return badDwarf();
fbr.pos = left * entry_size;
// Read past the pc_begin field of the entry
_ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{
.pc_rel_base = @intFromPtr(&self.entries[fbr.pos]),
.follow_indirect = true,
.data_rel_base = eh_frame_hdr_ptr,
}) orelse return badDwarf();
const fde_ptr = math.cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{
.pc_rel_base = @intFromPtr(&self.entries[fbr.pos]),
.follow_indirect = true,
.data_rel_base = eh_frame_hdr_ptr,
}) orelse return badDwarf()) orelse return badDwarf();
// Verify the length fields of the FDE header are readable
if (!self.isValidPtr(fde_ptr, isValidMemory, eh_frame_len) or fde_ptr < self.eh_frame_ptr) return badDwarf();
var fde_entry_header_len: usize = 4;
if (!self.isValidPtr(fde_ptr + 3, isValidMemory, eh_frame_len)) return badDwarf();
if (self.isValidPtr(fde_ptr + 11, isValidMemory, eh_frame_len)) fde_entry_header_len = 12;
// Even if eh_frame_len is not specified, all ranges accssed are checked by isValidPtr
const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse math.maxInt(u32)];
const fde_offset = fde_ptr - self.eh_frame_ptr;
var eh_frame_fbr: FixedBufferReader = .{
.buf = eh_frame,
.pos = fde_offset,
.endian = native_endian,
};
const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame);
if (!self.isValidPtr(@intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf();
if (fde_entry_header.type != .fde) return badDwarf();
// CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable
const cie_offset = fde_entry_header.type.fde;
try eh_frame_fbr.seekTo(cie_offset);
const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame);
if (!self.isValidPtr(@intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), isValidMemory, eh_frame_len)) return badDwarf();
if (cie_entry_header.type != .cie) return badDwarf();
cie.* = try CommonInformationEntry.parse(
cie_entry_header.entry_bytes,
0,
true,
cie_entry_header.format,
.eh_frame,
cie_entry_header.length_offset,
@sizeOf(usize),
native_endian,
);
fde.* = try FrameDescriptionEntry.parse(
fde_entry_header.entry_bytes,
0,
true,
cie.*,
@sizeOf(usize),
native_endian,
);
}
};
pub const EntryHeader = struct {
/// Offset of the length field in the backing buffer
length_offset: usize,
format: Format,
type: union(enum) {
cie,
/// Value is the offset of the corresponding CIE
fde: u64,
terminator,
},
/// The entry's contents, not including the ID field
entry_bytes: []const u8,
/// The length of the entry including the ID field, but not the length field itself
pub fn entryLength(self: EntryHeader) usize {
return self.entry_bytes.len + @as(u8, if (self.is_64) 8 else 4);
}
/// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure.
/// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections.
pub fn read(fbr: *FixedBufferReader, dwarf_section: DwarfSection) !EntryHeader {
assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame);
const length_offset = fbr.pos;
const unit_header = try readUnitHeader(fbr);
const unit_length = math.cast(usize, unit_header.unit_length) orelse return badDwarf();
if (unit_length == 0) return .{
.length_offset = length_offset,
.format = unit_header.format,
.type = .terminator,
.entry_bytes = &.{},
};
const start_offset = fbr.pos;
const end_offset = start_offset + unit_length;
defer fbr.pos = end_offset;
const id = try fbr.readAddress(unit_header.format);
const entry_bytes = fbr.buf[fbr.pos..end_offset];
const cie_id: u64 = switch (dwarf_section) {
.eh_frame => CommonInformationEntry.eh_id,
.debug_frame => switch (unit_header.format) {
.@"32" => CommonInformationEntry.dwarf32_id,
.@"64" => CommonInformationEntry.dwarf64_id,
},
else => unreachable,
};
return .{
.length_offset = length_offset,
.format = unit_header.format,
.type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) {
.eh_frame => try math.sub(u64, start_offset, id),
.debug_frame => id,
else => unreachable,
} },
.entry_bytes = entry_bytes,
};
}
};
pub const CommonInformationEntry = struct {
// Used in .eh_frame
pub const eh_id = 0;
// Used in .debug_frame (DWARF32)
pub const dwarf32_id = math.maxInt(u32);
// Used in .debug_frame (DWARF64)
pub const dwarf64_id = math.maxInt(u64);
// Offset of the length field of this entry in the eh_frame section.
// This is the key that FDEs use to reference CIEs.
length_offset: u64,
version: u8,
address_size: u8,
format: Format,
// Only present in version 4
segment_selector_size: ?u8,
code_alignment_factor: u32,
data_alignment_factor: i32,
return_address_register: u8,
aug_str: []const u8,
aug_data: []const u8,
lsda_pointer_enc: u8,
personality_enc: ?u8,
personality_routine_pointer: ?u64,
fde_pointer_enc: u8,
initial_instructions: []const u8,
pub fn isSignalFrame(self: CommonInformationEntry) bool {
for (self.aug_str) |c| if (c == 'S') return true;
return false;
}
pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool {
for (self.aug_str) |c| if (c == 'B') return true;
return false;
}
pub fn mteTaggedFrame(self: CommonInformationEntry) bool {
for (self.aug_str) |c| if (c == 'G') return true;
return false;
}
/// This function expects to read the CIE starting with the version field.
/// The returned struct references memory backed by cie_bytes.
///
/// See the FrameDescriptionEntry.parse documentation for the description
/// of `pc_rel_offset` and `is_runtime`.
///
/// `length_offset` specifies the offset of this CIE's length field in the
/// .eh_frame / .debug_frame section.
pub fn parse(
cie_bytes: []const u8,
pc_rel_offset: i64,
is_runtime: bool,
format: Format,
dwarf_section: DwarfSection,
length_offset: u64,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !CommonInformationEntry {
if (addr_size_bytes > 8) return error.UnsupportedAddrSize;
var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian };
const version = try fbr.readByte();
switch (dwarf_section) {
.eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion,
.debug_frame => if (version != 4) return error.UnsupportedDwarfVersion,
else => return error.UnsupportedDwarfSection,
}
var has_eh_data = false;
var has_aug_data = false;
var aug_str_len: usize = 0;
const aug_str_start = fbr.pos;
var aug_byte = try fbr.readByte();
while (aug_byte != 0) : (aug_byte = try fbr.readByte()) {
switch (aug_byte) {
'z' => {
if (aug_str_len != 0) return badDwarf();
has_aug_data = true;
},
'e' => {
if (has_aug_data or aug_str_len != 0) return badDwarf();
if (try fbr.readByte() != 'h') return badDwarf();
has_eh_data = true;
},
else => if (has_eh_data) return badDwarf(),
}
aug_str_len += 1;
}
if (has_eh_data) {
// legacy data created by older versions of gcc - unsupported here
for (0..addr_size_bytes) |_| _ = try fbr.readByte();
}
const address_size = if (version == 4) try fbr.readByte() else addr_size_bytes;
const segment_selector_size = if (version == 4) try fbr.readByte() else null;
const code_alignment_factor = try fbr.readUleb128(u32);
const data_alignment_factor = try fbr.readIleb128(i32);
const return_address_register = if (version == 1) try fbr.readByte() else try fbr.readUleb128(u8);
var lsda_pointer_enc: u8 = EH.PE.omit;
var personality_enc: ?u8 = null;
var personality_routine_pointer: ?u64 = null;
var fde_pointer_enc: u8 = EH.PE.absptr;
var aug_data: []const u8 = &[_]u8{};
const aug_str = if (has_aug_data) blk: {
const aug_data_len = try fbr.readUleb128(usize);
const aug_data_start = fbr.pos;
aug_data = cie_bytes[aug_data_start..][0..aug_data_len];
const aug_str = cie_bytes[aug_str_start..][0..aug_str_len];
for (aug_str[1..]) |byte| {
switch (byte) {
'L' => {
lsda_pointer_enc = try fbr.readByte();
},
'P' => {
personality_enc = try fbr.readByte();
personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{
.pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.pos]), pc_rel_offset),
.follow_indirect = is_runtime,
});
},
'R' => {
fde_pointer_enc = try fbr.readByte();
},
'S', 'B', 'G' => {},
else => return badDwarf(),
}
}
// aug_data_len can include padding so the CIE ends on an address boundary
fbr.pos = aug_data_start + aug_data_len;
break :blk aug_str;
} else &[_]u8{};
const initial_instructions = cie_bytes[fbr.pos..];
return .{
.length_offset = length_offset,
.version = version,
.address_size = address_size,
.format = format,
.segment_selector_size = segment_selector_size,
.code_alignment_factor = code_alignment_factor,
.data_alignment_factor = data_alignment_factor,
.return_address_register = return_address_register,
.aug_str = aug_str,
.aug_data = aug_data,
.lsda_pointer_enc = lsda_pointer_enc,
.personality_enc = personality_enc,
.personality_routine_pointer = personality_routine_pointer,
.fde_pointer_enc = fde_pointer_enc,
.initial_instructions = initial_instructions,
};
}
};
pub const FrameDescriptionEntry = struct {
// Offset into eh_frame where the CIE for this FDE is stored
cie_length_offset: u64,
pc_begin: u64,
pc_range: u64,
lsda_pointer: ?u64,
aug_data: []const u8,
instructions: []const u8,
/// This function expects to read the FDE starting at the PC Begin field.
/// The returned struct references memory backed by `fde_bytes`.
///
/// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values
/// used when decoding pointers. This should be set to zero if fde_bytes is
/// backed by the memory of a .eh_frame / .debug_frame section in the running executable.
/// Otherwise, it should be the relative offset to translate addresses from
/// where the section is currently stored in memory, to where it *would* be
/// stored at runtime: section base addr - backing data base ptr.
///
/// Similarly, `is_runtime` specifies this function is being called on a runtime
/// section, and so indirect pointers can be followed.
pub fn parse(
fde_bytes: []const u8,
pc_rel_offset: i64,
is_runtime: bool,
cie: CommonInformationEntry,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !FrameDescriptionEntry {
if (addr_size_bytes > 8) return error.InvalidAddrSize;
var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian };
const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{
.pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset),
.follow_indirect = is_runtime,
}) orelse return badDwarf();
const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{
.pc_rel_base = 0,
.follow_indirect = false,
}) orelse return badDwarf();
var aug_data: []const u8 = &[_]u8{};
const lsda_pointer = if (cie.aug_str.len > 0) blk: {
const aug_data_len = try fbr.readUleb128(usize);
const aug_data_start = fbr.pos;
aug_data = fde_bytes[aug_data_start..][0..aug_data_len];
const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit)
try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{
.pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset),
.follow_indirect = is_runtime,
})
else
null;
fbr.pos = aug_data_start + aug_data_len;
break :blk lsda_pointer;
} else null;
const instructions = fde_bytes[fbr.pos..];
return .{
.cie_length_offset = cie.length_offset,
.pc_begin = pc_begin,
.pc_range = pc_range,
.lsda_pointer = lsda_pointer,
.aug_data = aug_data,
.instructions = instructions,
};
}
};
fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize {
if (pc_rel_offset < 0) {
return math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset)));
} else {
return math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset)));
}
}
// Reading debug info needs to be fast, even when compiled in debug mode,
// so avoid using a `std.io.FixedBufferStream` which is too slow.
const FixedBufferReader = struct {
buf: []const u8,
pos: usize = 0,
endian: std.builtin.Endian,
pub const Error = error{ EndOfBuffer, Overflow };
fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void {
if (pos > fbr.buf.len) return error.EndOfBuffer;
fbr.pos = @intCast(pos);
}
fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void {
if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer;
fbr.pos += @intCast(amount);
}
pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 {
if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer;
defer fbr.pos += 1;
return fbr.buf[fbr.pos];
}
fn readByteSigned(fbr: *FixedBufferReader) Error!i8 {
return @bitCast(try fbr.readByte());
}
fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T {
const size = @divExact(@typeInfo(T).Int.bits, 8);
if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer;
defer fbr.pos += size;
return mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian);
}
fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T {
return std.leb.readULEB128(T, fbr);
}
fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T {
return std.leb.readILEB128(T, fbr);
}
fn readAddress(fbr: *FixedBufferReader, format: Format) Error!u64 {
return switch (format) {
.@"32" => try fbr.readInt(u32),
.@"64" => try fbr.readInt(u64),
};
}
fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 {
if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer;
defer fbr.pos += len;
return fbr.buf[fbr.pos..][0..len];
}
fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 {
const end = @call(.always_inline, mem.indexOfScalarPos, .{
u8,
fbr.buf,
fbr.pos,
sentinel,
}) orelse return error.EndOfBuffer;
defer fbr.pos = end + 1;
return fbr.buf[fbr.pos..end :sentinel];
}
};
test {
std.testing.refAllDecls(@This());
}