macho: add unwindFrame which can unwind stack frames using the __unwind_info section

dwarf: fixup missing error
This commit is contained in:
kcbanner 2023-07-08 16:39:38 -04:00
parent d226b74ae8
commit 94354aa6aa
6 changed files with 409 additions and 40 deletions

View File

@ -623,11 +623,15 @@ pub const StackIterator = struct {
const module = try self.debug_info.?.getModuleForAddress(self.dwarf_context.pc);
switch (native_os) {
.macos, .ios, .watchos, .tvos => {
const o_file_info = try module.getOFileInfoForAddress(self.debug_info.?.allocator, self.dwarf_context.pc);
if (o_file_info.unwind_info == null) return error.MissingUnwindInfo;
// TODO: Unwind using __unwind_info,
unreachable;
// __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding
// via DWARF before attempting to use the compact unwind info will produce incorrect results.
if (module.unwind_info) |unwind_info| {
if (macho.unwindFrame(&self.dwarf_context, unwind_info, module.base_address)) |return_address| {
return return_address;
} else |err| {
if (err != error.RequiresDWARFUnwind) return err;
}
} else return error.MissingUnwindInfo;
},
else => {},
}
@ -1236,7 +1240,16 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn
.ncmds = hdr.ncmds,
.buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
};
var unwind_info: ?[]const u8 = null;
const symtab = while (it.next()) |cmd| switch (cmd.cmd()) {
.SEGMENT_64 => {
for (cmd.getSections()) |sect| {
if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) {
unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size);
break;
}
}
},
.SYMTAB => break cmd.cast(macho.symtab_command).?,
else => {},
} else return error.MissingDebugInfo;
@ -1346,6 +1359,7 @@ fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugIn
.ofiles = ModuleDebugInfo.OFileTable.init(allocator),
.symbols = symbols,
.strings = strings,
.unwind_info = unwind_info,
};
}
@ -1886,12 +1900,13 @@ pub const ModuleDebugInfo = switch (native_os) {
symbols: []const MachoSymbol,
strings: [:0]const u8,
ofiles: OFileTable,
// Backed by mapped_memory
unwind_info: ?[]const u8,
const OFileTable = std.StringHashMap(OFileInfo);
const OFileInfo = struct {
di: DW.DwarfInfo,
addr_table: std.StringHashMap(u64),
unwind_info: ?[]const u8,
};
fn deinit(self: *@This(), allocator: mem.Allocator) void {
@ -1949,24 +1964,21 @@ pub const ModuleDebugInfo = switch (native_os) {
addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value);
}
var unwind_info: ?[]const u8 = null;
var sections: DW.DwarfInfo.SectionArray = DW.DwarfInfo.null_section_array;
for (segcmd.?.getSections()) |sect| {
if (std.mem.eql(u8, "__TEXT", sect.segName()) and mem.eql(u8, "__unwind_info", sect.sectName())) {
unwind_info = try chopSlice(mapped_mem, sect.offset, sect.size);
} else if (std.mem.eql(u8, "__DWARF", sect.segName())) {
var section_index: ?usize = null;
inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| {
if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i;
}
if (section_index == null) continue;
if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size);
sections[section_index.?] = .{
.data = section_bytes,
.owned = false,
};
var section_index: ?usize = null;
inline for (@typeInfo(DW.DwarfSection).Enum.fields, 0..) |section, i| {
if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i;
}
if (section_index == null) continue;
const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size);
sections[section_index.?] = .{
.data = section_bytes,
.owned = false,
};
}
const missing_debug_info =
@ -1986,7 +1998,6 @@ pub const ModuleDebugInfo = switch (native_os) {
var info = OFileInfo{
.di = di,
.addr_table = addr_table,
.unwind_info = unwind_info,
};
// Add the debug info to the cache

View File

@ -1641,7 +1641,6 @@ pub const DwarfInfo = struct {
// instead of the actual base address of the module. When using .eh_frame_hdr, PC can be used directly
// as pointers will be decoded relative to the alreayd-mapped .eh_frame.
var mapped_pc: usize = undefined;
if (di.eh_frame_hdr) |header| {
const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null;
mapped_pc = context.pc;
@ -1657,16 +1656,12 @@ pub const DwarfInfo = struct {
mapped_pc = context.pc - module_base_address;
const index = std.sort.binarySearch(FrameDescriptionEntry, mapped_pc, di.fde_list.items, {}, struct {
pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order {
if (pc < mid_item.pc_begin) {
return .lt;
} else {
const range_end = mid_item.pc_begin + mid_item.pc_range;
if (pc < range_end) {
return .eq;
}
if (pc < mid_item.pc_begin) return .lt;
return .gt;
}
const range_end = mid_item.pc_begin + mid_item.pc_range;
if (pc < range_end) return .eq;
return .gt;
}
}.compareFn);
@ -2000,6 +1995,7 @@ pub const ExceptionFrameHeader = struct {
}
}
if (len == 0) return badDwarf();
try stream.seekTo(left * entry_size);
// Read past the pc_begin field of the entry

View File

@ -45,15 +45,6 @@ pub fn spRegNum(reg_context: RegisterContext) u8 {
};
}
fn RegBytesReturnType(comptime ContextPtrType: type) type {
const info = @typeInfo(ContextPtrType);
if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) {
@compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType)));
}
return if (info.Pointer.is_const) return []const u8 else []u8;
}
pub const RegisterContext = struct {
eh_frame: bool,
is_macho: bool,
@ -63,9 +54,47 @@ pub const AbiError = error{
InvalidRegister,
UnimplementedArch,
UnimplementedOs,
RegisterContextRequired,
ThreadContextNotSupported,
};
fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type {
const reg_bytes_type = comptime RegBytesReturnType(ContextPtrType);
const info = @typeInfo(reg_bytes_type).Pointer;
return @Type(.{
.Pointer = .{
.size = .One,
.is_const = info.is_const,
.is_volatile = info.is_volatile,
.is_allowzero = info.is_allowzero,
.alignment = info.alignment,
.address_space = info.address_space,
.child = T,
.sentinel = null,
},
});
}
pub fn regValueNative(
comptime T: type,
thread_context_ptr: anytype,
reg_number: u8,
reg_context: ?RegisterContext,
) !RegValueReturnType(@TypeOf(thread_context_ptr), T) {
const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context);
if (@sizeOf(T) != reg_bytes.len) return error.IncompatibleRegisterSize;
return mem.bytesAsValue(T, reg_bytes[0..@sizeOf(T)]);
}
fn RegBytesReturnType(comptime ContextPtrType: type) type {
const info = @typeInfo(ContextPtrType);
if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) {
@compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType)));
}
return if (info.Pointer.is_const) return []const u8 else []u8;
}
/// Returns a slice containing the backing storage for `reg_number`.
///
/// `reg_context` describes in what context the register number is used, as it can have different

View File

@ -2064,3 +2064,315 @@ pub const UNWIND_ARM64_FRAME_D14_D15_PAIR: u32 = 0x00000800;
pub const UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK: u32 = 0x00FFF000;
pub const UNWIND_ARM64_DWARF_SECTION_OFFSET: u32 = 0x00FFFFFF;
pub const CompactUnwindEncoding = packed struct(u32) {
value: packed union {
x86_64: packed union {
frame: packed struct(u24) {
reg4: u3,
reg3: u3,
reg2: u3,
reg1: u3,
reg0: u3,
unused: u1 = 0,
frame_offset: u8,
},
frameless: packed struct(u24) {
stack_reg_permutation: u10,
stack_reg_count: u3,
stack_adjust: u3,
stack_size: u8,
},
dwarf: u24,
},
arm64: packed union {
frame: packed struct(u24) {
x_reg_pairs: packed struct {
x19_x20: u1,
x21_x22: u1,
x23_x24: u1,
x25_x26: u1,
x27_x28: u1,
},
d_reg_pairs: packed struct {
d8_d9: u1,
d10_d11: u1,
d12_d13: u1,
d14_d15: u1,
},
unused: u15,
},
frameless: packed struct(u24) {
unused: u12 = 0,
stack_size: u12,
},
dwarf: u24,
},
},
mode: packed union {
x86_64: UNWIND_X86_64_MODE,
arm64: UNWIND_ARM64_MODE,
},
personality_index: u2,
has_lsda: u1,
start: u1,
};
/// Returns the DWARF register number for an x86_64 register number found in compact unwind info
fn dwarfRegNumber(unwind_reg_number: u3) !u8 {
return switch (unwind_reg_number) {
1 => 3, // RBX
2 => 12, // R12
3 => 13, // R13
4 => 14, // R14
5 => 15, // R15
6 => 6, // RBP
else => error.InvalidUnwindRegisterNumber,
};
}
const dwarf = std.dwarf;
const abi = dwarf.abi;
pub fn unwindFrame(context: *dwarf.UnwindContext, unwind_info: []const u8, module_base_address: usize) !usize {
const header = mem.bytesAsValue(
unwind_info_section_header,
unwind_info[0..@sizeOf(unwind_info_section_header)],
);
const indices = mem.bytesAsSlice(
unwind_info_section_header_index_entry,
unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(unwind_info_section_header_index_entry)],
);
if (indices.len == 0) return error.MissingUnwindInfo;
const mapped_pc = context.pc - module_base_address;
const second_level_index = blk: {
var left: usize = 0;
var len: usize = indices.len;
while (len > 1) {
const mid = left + len / 2;
const offset = indices[mid].functionOffset;
if (mapped_pc < offset) {
len /= 2;
} else {
left = mid;
if (mapped_pc == offset) break;
len -= len / 2;
}
}
// Last index is a sentinel containing the highest address as its functionOffset
if (len == 0 or indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo;
break :blk &indices[left];
};
const common_encodings = mem.bytesAsSlice(
compact_unwind_encoding_t,
unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(compact_unwind_encoding_t)],
);
const start_offset = second_level_index.secondLevelPagesSectionOffset;
const kind = mem.bytesAsValue(
UNWIND_SECOND_LEVEL,
unwind_info[start_offset..][0..@sizeOf(UNWIND_SECOND_LEVEL)],
);
const raw_encoding = switch (kind.*) {
.REGULAR => blk: {
const page_header = mem.bytesAsValue(
unwind_info_regular_second_level_page_header,
unwind_info[start_offset..][0..@sizeOf(unwind_info_regular_second_level_page_header)],
);
const entries = mem.bytesAsSlice(
unwind_info_regular_second_level_entry,
unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(unwind_info_regular_second_level_entry)],
);
if (entries.len == 0) return error.InvalidUnwindInfo;
var left: usize = 0;
var len: usize = entries.len;
while (len > 1) {
const mid = left + len / 2;
const offset = entries[mid].functionOffset;
if (mapped_pc < offset) {
len /= 2;
} else {
left = mid;
if (mapped_pc == offset) break;
len -= len / 2;
}
}
if (len == 0) return error.InvalidUnwindInfo;
break :blk entries[left].encoding;
},
.COMPRESSED => blk: {
const page_header = mem.bytesAsValue(
unwind_info_compressed_second_level_page_header,
unwind_info[start_offset..][0..@sizeOf(unwind_info_compressed_second_level_page_header)],
);
const entries = mem.bytesAsSlice(
UnwindInfoCompressedEntry,
unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(UnwindInfoCompressedEntry)],
);
if (entries.len == 0) return error.InvalidUnwindInfo;
var left: usize = 0;
var len: usize = entries.len;
while (len > 1) {
const mid = left + len / 2;
const offset = second_level_index.functionOffset + entries[mid].funcOffset;
if (mapped_pc < offset) {
len /= 2;
} else {
left = mid;
if (mapped_pc == offset) break;
len -= len / 2;
}
}
if (len == 0) return error.InvalidUnwindInfo;
const entry = entries[left];
if (entry.encodingIndex < header.commonEncodingsArrayCount) {
if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo;
break :blk common_encodings[entry.encodingIndex];
} else {
const local_index = try std.math.sub(
u8,
entry.encodingIndex,
std.math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo,
);
const local_encodings = mem.bytesAsSlice(
compact_unwind_encoding_t,
unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(compact_unwind_encoding_t)],
);
if (local_index >= local_encodings.len) return error.InvalidUnwindInfo;
break :blk local_encodings[local_index];
}
},
else => return error.InvalidUnwindInfo,
};
if (raw_encoding == 0) return error.NoUnwindInfo;
const reg_context = dwarf.abi.RegisterContext{
.eh_frame = false,
.is_macho = true,
};
const encoding: CompactUnwindEncoding = @bitCast(raw_encoding);
const new_ip = switch (builtin.cpu.arch) {
.x86_64 => switch (encoding.mode.x86_64) {
.OLD => return error.UnimplementedUnwindEncoding,
.RBP_FRAME => blk: {
const regs: [5]u3 = .{
encoding.value.x86_64.frame.reg0,
encoding.value.x86_64.frame.reg1,
encoding.value.x86_64.frame.reg2,
encoding.value.x86_64.frame.reg3,
encoding.value.x86_64.frame.reg4,
};
const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize);
var max_reg: usize = 0;
inline for (regs, 0..) |reg, i| {
if (reg > 0) max_reg = i;
}
const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*;
const new_sp = fp + 2 * @sizeOf(usize);
// Verify the stack range we're about to read register values from is valid
if (!context.isValidMemory(new_sp) or !context.isValidMemory(fp - frame_offset + max_reg * @sizeOf(usize))) return error.InvalidUnwindInfo;
const ip_ptr = fp + @sizeOf(usize);
const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
const new_fp = @as(*const usize, @ptrFromInt(fp)).*;
(try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp;
(try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp;
(try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip;
for (regs, 0..) |reg, i| {
if (reg == 0) continue;
const addr = fp - frame_offset + i * @sizeOf(usize);
const reg_number = try dwarfRegNumber(reg);
(try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*;
}
break :blk new_ip;
},
.STACK_IMMD => blk: {
const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*;
// Decode Lehmer-coded sequence of registers.
// For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h
// Decode the variable-based permutation number into its digits. Each digit represents
// an index into the list of register numbers that weren't yet used in the sequence at
// the time the digit was added.
const reg_count = encoding.value.x86_64.frameless.stack_reg_count;
const ip_ptr = if (reg_count > 0) reg_blk: {
var digits: [6]u3 = undefined;
var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation;
var base: usize = 2;
for (0..reg_count) |i| {
const div = accumulator / base;
digits[digits.len - 1 - i] = @intCast(accumulator - base * div);
accumulator = div;
base += 1;
}
const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 };
var registers: [reg_numbers.len]u3 = undefined;
var used_indices = [_]bool{false} ** reg_numbers.len;
for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| {
var unused_count: u8 = 0;
const unused_index = for (used_indices, 0..) |used, index| {
if (!used) {
if (target_unused_index == unused_count) break index;
unused_count += 1;
}
} else unreachable;
registers[i] = reg_numbers[unused_index];
used_indices[unused_index] = true;
}
var reg_addr = sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - reg_count - 1)) * @sizeOf(usize);
if (!context.isValidMemory(reg_addr)) return error.InvalidUnwindInfo;
for (0..reg_count) |i| {
const reg_number = try dwarfRegNumber(registers[i]);
(try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*;
reg_addr += @sizeOf(usize);
}
break :reg_blk reg_addr;
} else sp + @as(usize, (encoding.value.x86_64.frameless.stack_size - 1)) * @sizeOf(usize);
const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
const new_sp = ip_ptr + @sizeOf(usize);
if (!context.isValidMemory(new_sp)) return error.InvalidUnwindInfo;
(try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp;
(try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip;
break :blk new_ip;
},
.STACK_IND => {
return error.UnimplementedUnwindEncoding; // TODO
},
.DWARF => return error.RequiresDWARFUnwind,
},
.aarch64 => switch (encoding.mode.x86_64) {
.DWARF => return error.RequiresDWARFUnwind,
else => return error.UnimplementedUnwindEncoding,
},
else => return error.UnimplementedArch,
};
context.pc = new_ip;
if (context.pc > 0) context.pc -= 1;
return new_ip;
}

View File

@ -16,6 +16,7 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
});
if (target.isDarwin()) exe.unwind_tables = true;
exe.omit_frame_pointer = true;
const run_cmd = b.addRunArtifact(exe);
@ -43,6 +44,7 @@ pub fn build(b: *std.Build) void {
.optimize = optimize,
});
if (target.isDarwin()) exe.unwind_tables = true;
exe.omit_frame_pointer = true;
exe.linkLibrary(c_shared_lib);

View File

@ -1,4 +1,5 @@
const std = @import("std");
const builtin = @import("builtin");
const debug = std.debug;
const testing = std.testing;
@ -18,6 +19,24 @@ noinline fn frame3(expected: *[4]usize, unwound: *[4]usize) void {
}
noinline fn frame2(expected: *[4]usize, unwound: *[4]usize) void {
if (builtin.os.tag == .macos) {
// Excercise different __unwind_info encodings by forcing some registers to be restored
switch (builtin.cpu.arch) {
.x86_64 => {
asm volatile (
\\movq $3, %%rbx
\\movq $12, %%r12
\\movq $13, %%r13
\\movq $14, %%r14
\\movq $15, %%r15
\\movq $6, %%rbp
::: "rbx", "r12", "r13", "r14", "r15", "rbp");
},
.aarch64 => {},
else => {},
}
}
expected[1] = @returnAddress();
frame3(expected, unwound);
}