- add DWARF expression parser

- change read apis to use a stream
- add register formatters
This commit is contained in:
kcbanner 2023-05-09 01:35:22 -04:00
parent 38303d7b9c
commit f3f3c877e0
3 changed files with 395 additions and 73 deletions

View File

@ -1,7 +1,7 @@
const std = @import("../std.zig");
fn writeUnknownReg(writer: anytype, reg_number: u8) !void {
try writer.print("reg{}", .{ reg_number });
try writer.print("reg{}", .{reg_number});
}
pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number: u8) !void {
@ -17,11 +17,11 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number
5 => try writer.writeAll("RDI"),
6 => try writer.writeAll("RBP"),
7 => try writer.writeAll("RSP"),
8...15 => try writer.print("R{}", .{ reg_number }),
8...15 => try writer.print("R{}", .{reg_number}),
16 => try writer.writeAll("RIP"),
17...32 => try writer.print("XMM{}", .{ reg_number - 17 }),
33...40 => try writer.print("ST{}", .{ reg_number - 33 }),
41...48 => try writer.print("MM{}", .{ reg_number - 41 }),
17...32 => try writer.print("XMM{}", .{reg_number - 17}),
33...40 => try writer.print("ST{}", .{reg_number - 33}),
41...48 => try writer.print("MM{}", .{reg_number - 41}),
49 => try writer.writeAll("RFLAGS"),
50 => try writer.writeAll("ES"),
51 => try writer.writeAll("CS"),
@ -38,9 +38,9 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number
64 => try writer.writeAll("MXCSR"),
65 => try writer.writeAll("FCW"),
66 => try writer.writeAll("FSW"),
67...82 => try writer.print("XMM{}", .{ reg_number - 51 }),
67...82 => try writer.print("XMM{}", .{reg_number - 51}),
// 83-117 Reserved
118...125 => try writer.print("K{}", .{ reg_number - 118 }),
118...125 => try writer.print("K{}", .{reg_number - 118}),
// 126-129 Reserved
else => try writeUnknownReg(writer, reg_number),
}
@ -52,3 +52,23 @@ pub fn writeRegisterName(writer: anytype, arch: ?std.Target.Cpu.Arch, reg_number
}
} else try writeUnknownReg(writer, reg_number);
}
const FormatRegisterData = struct {
reg_number: u8,
arch: ?std.Target.Cpu.Arch,
};
pub fn formatRegister(
data: FormatRegisterData,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
try writeRegisterName(writer, data.arch, data.reg_number);
}
pub fn fmtRegister(reg_number: u8, arch: ?std.Target.Cpu.Arch) std.fmt.Formatter(formatRegister) {
return .{ .data = .{ .reg_number = reg_number, .arch = arch } };
}

View File

@ -3,18 +3,13 @@ const debug = std.debug;
const leb = @import("../leb128.zig");
const abi = @import("abi.zig");
const dwarf = @import("../dwarf.zig");
const expressions = @import("expressions.zig");
// These enum values correspond to the opcode encoding itself, with
// the exception of the opcodes that include data in the opcode itself.
// For those, the enum value is the opcode with the lower 6 bits (the data) masked to 0.
const Opcode = enum(u8) {
// These are placeholders that define the range of vendor-specific opcodes
const lo_user = 0x1c;
const hi_user = 0x3f;
advance_loc = 0x1 << 6,
offset = 0x2 << 6,
restore = 0x3 << 6,
nop = 0x00,
set_loc = 0x01,
advance_loc1 = 0x02,
@ -39,7 +34,17 @@ const Opcode = enum(u8) {
val_offset_sf = 0x15,
val_expression = 0x16,
_,
// These opcodes encode an operand in the lower 6 bits of the opcode itself
pub const lo_inline = Opcode.advance_loc;
pub const hi_inline = Opcode.restore;
// These opcodes are trailed by zero or more operands
pub const lo_reserved = Opcode.nop;
pub const hi_reserved = Opcode.val_expression;
// Vendor-specific opcodes
pub const lo_user = 0x1c;
pub const hi_user = 0x3f;
};
const Operand = enum {
@ -70,11 +75,12 @@ const Operand = enum {
fn read(
comptime self: Operand,
reader: anytype,
stream: *std.io.FixedBufferStream([]const u8),
opcode_value: ?u6,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !Storage(self) {
const reader = stream.reader();
return switch (self) {
.opcode_delta, .opcode_register => opcode_value orelse return error.InvalidOperand,
.uleb128_register => try leb.readULEB128(u8, reader),
@ -91,13 +97,13 @@ const Operand = enum {
.u32_delta => try reader.readInt(u32, endian),
.block => {
const block_len = try leb.readULEB128(u64, reader);
if (stream.pos + block_len > stream.buffer.len) return error.InvalidOperand;
// TODO: This feels like a kludge, change to FixedBufferStream param?
const block = reader.context.buffer[reader.context.pos..][0..block_len];
const block = stream.buffer[stream.pos..][0..block_len];
reader.context.pos += block_len;
return block;
}
},
};
}
};
@ -133,11 +139,16 @@ fn InstructionType(comptime definition: anytype) type {
const Self = @This();
operands: InstructionOperands,
pub fn read(reader: anytype, opcode_value: ?u6, addr_size_bytes: u8, endian: std.builtin.Endian) !Self {
pub fn read(
stream: *std.io.FixedBufferStream([]const u8),
opcode_value: ?u6,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !Self {
var operands: InstructionOperands = undefined;
inline for (definition_type.Struct.fields) |definition_field| {
const operand = comptime std.enums.nameCast(Operand, @field(definition, definition_field.name));
@field(operands, definition_field.name) = try operand.read(reader, opcode_value, addr_size_bytes, endian);
@field(operands, definition_field.name) = try operand.read(stream, opcode_value, addr_size_bytes, endian);
}
return .{ .operands = operands };
@ -173,37 +184,44 @@ pub const Instruction = union(Opcode) {
val_offset_sf: InstructionType(.{ .a = .uleb128_offset, .b = .sleb128_offset }),
val_expression: InstructionType(.{ .a = .uleb128_offset, .block = .block }),
pub fn read(reader: anytype, addr_size_bytes: u8, endian: std.builtin.Endian) !Instruction {
const opcode = try reader.readByte();
const upper = opcode & 0b11000000;
return switch (upper) {
inline @enumToInt(Opcode.advance_loc), @enumToInt(Opcode.offset), @enumToInt(Opcode.restore) => |u| @unionInit(
Instruction,
@tagName(@intToEnum(Opcode, u)),
try std.meta.TagPayload(Instruction, @intToEnum(Opcode, u)).read(reader, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian),
),
0 => blk: {
inline for (@typeInfo(Opcode).Enum.fields) |field| {
if (field.value == opcode) {
break :blk @unionInit(
Instruction,
@tagName(@intToEnum(Opcode, field.value)),
try std.meta.TagPayload(Instruction, @intToEnum(Opcode, field.value)).read(reader, null, addr_size_bytes, endian),
);
}
}
break :blk error.UnknownOpcode;
pub fn read(
stream: *std.io.FixedBufferStream([]const u8),
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !Instruction {
@setEvalBranchQuota(1800);
return switch (try stream.reader().readByte()) {
inline @enumToInt(Opcode.lo_inline)...@enumToInt(Opcode.hi_inline) => |opcode| blk: {
const e = @intToEnum(Opcode, opcode & 0b11000000);
const payload_type = std.meta.TagPayload(Instruction, e);
const value = try payload_type.read(stream, @intCast(u6, opcode & 0b111111), addr_size_bytes, endian);
break :blk @unionInit(Instruction, @tagName(e), value);
},
else => error.UnknownOpcode,
inline @enumToInt(Opcode.lo_reserved)...@enumToInt(Opcode.hi_reserved) => |opcode| blk: {
const e = @intToEnum(Opcode, opcode);
const payload_type = std.meta.TagPayload(Instruction, e);
const value = try payload_type.read(stream, null, addr_size_bytes, endian);
break :blk @unionInit(Instruction, @tagName(e), value);
},
Opcode.lo_user...Opcode.hi_user => error.UnimplementedUserOpcode,
else => error.InvalidOpcode,
};
}
pub fn writeOperands(self: Instruction, writer: anytype, cie: dwarf.CommonInformationEntry, arch: ?std.Target.Cpu.Arch) !void {
pub fn writeOperands(
self: Instruction,
writer: anytype,
cie: dwarf.CommonInformationEntry,
arch: ?std.Target.Cpu.Arch,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !void {
switch (self) {
inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{ i.operands.delta * cie.code_alignment_factor }),
inline .advance_loc, .advance_loc1, .advance_loc2, .advance_loc4 => |i| try writer.print("{}", .{i.operands.delta * cie.code_alignment_factor}),
.offset => |i| {
try abi.writeRegisterName(writer, arch, i.operands.register);
try writer.print(" {}", .{ @intCast(i64, i.operands.offset) * cie.data_alignment_factor });
try writer.print(" {}", .{@intCast(i64, i.operands.offset) * cie.data_alignment_factor});
},
.restore => {},
.nop => {},
@ -217,14 +235,14 @@ pub const Instruction = union(Opcode) {
.restore_state => {},
.def_cfa => |i| {
try abi.writeRegisterName(writer, arch, i.operands.register);
try writer.print(" {}", .{ fmtOffset(@intCast(i64, i.operands.offset)) });
try writer.print(" {d:<1}", .{@intCast(i64, i.operands.offset)});
},
.def_cfa_register => {},
.def_cfa_offset => |i| {
try writer.print("{}", .{ fmtOffset(@intCast(i64, i.operands.offset)) });
try writer.print("{d:<1}", .{@intCast(i64, i.operands.offset)});
},
.def_cfa_expression => |i| {
try writer.print("TODO(parse expressions data {x})", .{ std.fmt.fmtSliceHexLower(i.operands.block) });
try writeExpression(writer, i.operands.block, arch, addr_size_bytes, endian);
},
.expression => {},
.offset_extended_sf => {},
@ -235,23 +253,83 @@ pub const Instruction = union(Opcode) {
.val_expression => {},
}
}
};
fn writeExpression(
writer: anytype,
block: []const u8,
arch: ?std.Target.Cpu.Arch,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !void {
var stream = std.io.fixedBufferStream(block);
fn formatOffset(data: i64, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
_ = fmt;
if (data >= 0) try writer.writeByte('+');
return std.fmt.formatInt(data, 10, .lower, options, writer);
// Generate a lookup table from opcode value to name
const opcode_lut_len = 256;
const opcode_lut: [opcode_lut_len]?[]const u8 = comptime blk: {
var lut: [opcode_lut_len]?[]const u8 = [_]?[]const u8{null} ** opcode_lut_len;
for (@typeInfo(dwarf.OP).Struct.decls) |decl| {
lut[@as(u8, @field(dwarf.OP, decl.name))] = decl.name;
}
break :blk lut;
};
switch (endian) {
inline .Little, .Big => |e| {
switch (addr_size_bytes) {
inline 2, 4, 8 => |size| {
const StackMachine = expressions.StackMachine(.{
.addr_size = size,
.endian = e,
.call_frame_mode = true,
});
const reader = stream.reader();
while (stream.pos < stream.buffer.len) {
if (stream.pos > 0) try writer.writeAll(", ");
const opcode = try reader.readByte();
if (opcode_lut[opcode]) |opcode_name| {
try writer.print("DW_OP_{s}", .{opcode_name});
} else {
// TODO: See how llvm-dwarfdump prints these?
if (opcode >= dwarf.OP.lo_user and opcode <= dwarf.OP.lo_user) {
try writer.print("<unknown vendor opcode: 0x{x}>", .{opcode});
} else {
try writer.print("<invalid opcode: 0x{x}>", .{opcode});
}
}
if (try StackMachine.readOperand(&stream, opcode)) |value| {
switch (value) {
//.generic => |v| try writer.print("{d}", .{v}),
.generic => {}, // Constant values are implied by the opcode name
.register => |v| try writer.print(" {}", .{ abi.fmtRegister(v, arch) }),
.base_register => |v| try writer.print(" {}{d:<1}", .{ abi.fmtRegister(v.base_register, arch), v.offset }),
else => try writer.print(" TODO({s})", .{@tagName(value)}),
}
}
}
},
else => return error.InvalidAddrSize,
}
},
}
}
fn fmtOffset(offset: i64) std.fmt.Formatter(formatOffset) {
return .{ .data = offset };
}
// fn formatOffset(data: i64, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
// _ = fmt;
// if (data >= 0) try writer.writeByte('+');
// return std.fmt.formatInt(data, 10, .lower, options, writer);
// }
// fn fmtOffset(offset: i64) std.fmt.Formatter(formatOffset) {
// return .{ .data = offset };
// }
/// See section 6.4.1 of the DWARF5 specification
pub const VirtualMachine = struct {
const RegisterRule = union(enum) {
undefined: void,
same_value: void,
@ -263,11 +341,18 @@ pub const VirtualMachine = struct {
architectural: void,
};
const Column = struct {
pub const Column = struct {
register: u8 = undefined,
rule: RegisterRule = .{ .undefined = {} },
pub fn writeRule(self: Column, writer: anytype, is_cfa: bool, arch: ?std.Target.Cpu.Arch) !void {
pub fn writeRule(
self: Column,
writer: anytype,
is_cfa: bool,
arch: ?std.Target.Cpu.Arch,
addr_size_bytes: u8,
endian: std.builtin.Endian,
) !void {
if (is_cfa) {
try writer.writeAll("CFA");
} else {
@ -281,48 +366,54 @@ pub const VirtualMachine = struct {
.offset => |offset| {
if (is_cfa) {
try abi.writeRegisterName(writer, arch, self.register);
try writer.print("{}", .{ fmtOffset(offset) });
try writer.print("{d:<1}", .{offset});
} else {
try writer.print("[CFA{}]", .{ fmtOffset(offset) });
try writer.print("[CFA{d:<1}]", .{offset});
}
},
.val_offset => |offset| {
if (is_cfa) {
try abi.writeRegisterName(writer, arch, self.register);
try writer.print("{}", .{ fmtOffset(offset) });
try writer.print("{d:<1}", .{offset});
} else {
try writer.print("CFA{}", .{ fmtOffset(offset) });
try writer.print("CFA{d:<1}", .{offset});
}
},
.register => |register| try abi.writeRegisterName(writer, arch, register),
.expression => try writer.writeAll("TODO(expression)"),
.expression => |expression| try writeExpression(writer, expression, arch, addr_size_bytes, endian),
.val_expression => try writer.writeAll("TODO(val_expression)"),
.architectural => try writer.writeAll("TODO(architectural)"),
}
}
};
/// Each row contains unwinding rules for a set of registers at a specific location in the program.
pub const Row = struct {
/// Offset from pc_begin
offset: u64 = 0,
/// Special-case column that defines the CFA (Canonical Frame Address) rule.
/// The register field of this column defines the register that CFA is derived
/// from, while other columns define registers in terms of the CFA.
cfa: Column = .{},
/// Index into `columns` of the first column in this row
/// Index into `columns` of the first column in this row.
columns_start: usize = undefined,
columns_len: u8 = 0,
};
rows: std.ArrayListUnmanaged(Row) = .{},
columns: std.ArrayListUnmanaged(Column) = .{},
row_stack: std.ArrayListUnmanaged(Row) = .{},
current_row: Row = .{},
// TODO: Add stack machine stack
pub fn reset(self: *VirtualMachine) void {
self.rows.clearRetainingCapacity();
self.row_stack.clearRetainingCapacity();
self.columns.clearRetainingCapacity();
self.current_row = .{};
}
pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void {
self.rows.deinit(allocator);
self.row_stack.deinit(allocator);
self.columns.deinit(allocator);
self.* = undefined;
}
@ -366,8 +457,20 @@ pub const VirtualMachine = struct {
.undefined => {},
.same_value => {},
.register => {},
.remember_state => {},
.restore_state => {},
.remember_state => {
// TODO: The row stack only actually needs the column information
// TODO: Also it needs to copy the columns because changes can edit the referenced columns
// TODO: This function could push the column range onto the stack, the copy the columns and update current row
try self.row_stack.append(allocator, self.current_row);
},
.restore_state => {
if (self.row_stack.items.len == 0) return error.InvalidOperation;
const row = self.row_stack.pop();
self.current_row.columns_len = row.columns_len;
self.current_row.columns_start = row.columns_start;
},
.def_cfa => |i| {
self.current_row.cfa = .{
.register = i.operands.register,
@ -376,11 +479,14 @@ pub const VirtualMachine = struct {
},
.def_cfa_register => {},
.def_cfa_offset => |i| {
self.current_row.cfa.rule = .{ .offset = @intCast(i64, i.operands.offset) };
},
.def_cfa_expression => |i| {
self.current_row.cfa.register = undefined;
self.current_row.cfa.rule = .{
.offset = @intCast(i64, i.operands.offset)
.expression = i.operands.block,
};
},
.def_cfa_expression => {},
.expression => {},
.offset_extended_sf => {},
.def_cfa_sf => {},
@ -390,5 +496,4 @@ pub const VirtualMachine = struct {
.val_expression => {},
}
}
};

View File

@ -0,0 +1,197 @@
const std = @import("std");
const builtin = @import("builtin");
const OP = @import("OP.zig");
const leb = @import("../leb128.zig");
pub const StackMachineOptions = struct {
/// The address size of the target architecture
addr_size: u8 = @sizeOf(usize),
/// Endianess of the target architecture
endian: std.builtin.Endian = .Little,
/// Restrict the stack machine to a subset of opcodes used in call frame instructions
call_frame_mode: bool = false,
};
/// A stack machine that can decode and run DWARF expressions.
/// Expressions can be decoded for non-native address size and endianness,
/// but can only be executed if the current target matches the configuration.
pub fn StackMachine(comptime options: StackMachineOptions) type {
const addr_type = switch(options.addr_size) {
2 => u16,
4 => u32,
8 => u64,
else => @compileError("Unsupported address size of " ++ options.addr_size),
};
const addr_type_signed = switch(options.addr_size) {
2 => i16,
4 => i32,
8 => i64,
else => @compileError("Unsupported address size of " ++ options.addr_size),
};
return struct {
const Value = union(enum) {
generic: addr_type,
const_type: []const u8,
register: u8,
base_register: struct {
base_register: u8,
offset: i64,
},
composite_location: struct {
size: u64,
offset: i64,
},
block: []const u8,
base_type: struct {
type_offset: u64,
value_bytes: []const u8,
},
deref_type: struct {
size: u8,
offset: u64,
},
};
stack: std.ArrayListUnmanaged(Value) = .{},
fn generic(value: anytype) Value {
const int_info = @typeInfo(@TypeOf(value)).Int;
if (@sizeOf(@TypeOf(value)) > options.addr_size) {
return .{
.generic = switch (int_info.signedness) {
.signed => @bitCast(addr_type, @truncate(addr_type_signed, value)),
.unsigned => @truncate(addr_type, value),
}
};
} else {
return .{
.generic = switch (int_info.signedness) {
.signed => @bitCast(addr_type, @intCast(addr_type_signed, value)),
.unsigned => @intCast(addr_type, value),
}
};
}
}
pub fn readOperand(stream: *std.io.FixedBufferStream([]const u8), opcode: u8) !?Value {
const reader = stream.reader();
return switch (opcode) {
OP.addr,
OP.call_ref,
=> generic(try reader.readInt(addr_type, options.endian)),
OP.const1u,
OP.pick,
OP.deref_size,
OP.xderef_size,
=> generic(try reader.readByte()),
OP.const1s => generic(try reader.readByteSigned()),
OP.const2u,
OP.call2,
OP.call4,
=> generic(try reader.readInt(u16, options.endian)),
OP.const2s,
OP.bra,
OP.skip,
=> generic(try reader.readInt(i16, options.endian)),
OP.const4u => generic(try reader.readInt(u32, options.endian)),
OP.const4s => generic(try reader.readInt(i32, options.endian)),
OP.const8u => generic(try reader.readInt(u64, options.endian)),
OP.const8s => generic(try reader.readInt(i64, options.endian)),
OP.constu,
OP.plus_uconst,
OP.addrx,
OP.constx,
OP.convert,
OP.reinterpret,
=> generic(try leb.readULEB128(u64, reader)),
OP.consts,
OP.fbreg,
=> generic(try leb.readILEB128(i64, reader)),
OP.lit0...OP.lit31 => |n| generic(n - OP.lit0),
OP.reg0...OP.reg31 => |n| .{ .register = n - OP.reg0 },
OP.breg0...OP.breg31 => |n| .{
.base_register = .{
.base_register = n - OP.breg0,
.offset = try leb.readILEB128(i64, reader),
}
},
OP.regx => .{ .register = try leb.readULEB128(u8, reader) },
OP.bregx,
OP.regval_type => .{
.base_register = .{
.base_register = try leb.readULEB128(u8, reader),
.offset = try leb.readILEB128(i64, reader),
}
},
OP.piece => .{
.composite_location = .{
.size = try leb.readULEB128(u8, reader),
.offset = 0,
},
},
OP.bit_piece => .{
.composite_location = .{
.size = try leb.readULEB128(u8, reader),
.offset = try leb.readILEB128(i64, reader),
},
},
OP.implicit_value,
OP.entry_value
=> blk: {
const size = try leb.readULEB128(u8, reader);
if (stream.pos + size > stream.buffer.len) return error.InvalidExpression;
const block = stream.buffer[stream.pos..][0..size];
stream.pos += size;
break :blk .{
.block = block,
};
},
OP.const_type => blk: {
const type_offset = try leb.readULEB128(u8, reader);
const size = try reader.readByte();
if (stream.pos + size > stream.buffer.len) return error.InvalidExpression;
const value_bytes = stream.buffer[stream.pos..][0..size];
stream.pos += size;
break :blk .{
.base_type = .{
.type_offset = type_offset,
.value_bytes = value_bytes,
}
};
},
OP.deref_type,
OP.xderef_type,
=> .{
.deref_type = .{
.size = try reader.readByte(),
.offset = try leb.readULEB128(u64, reader),
},
},
OP.lo_user...OP.hi_user => return error.UnimplementedUserOpcode,
else => null,
};
}
pub fn step(
self: *StackMachine,
stream: std.io.FixedBufferStream([]const u8),
allocator: std.mem.Allocator,
) !void {
if (@sizeOf(usize) != addr_type or options.endian != builtin.target.cpu.arch.endian())
@compileError("Execution of non-native address sizees / endianness is not supported");
const opcode = try stream.reader.readByte();
_ = opcode;
_ = self;
_ = allocator;
// switch (opcode) {
// OP.addr => try self.stack.append(allocator, try readOperand(stream, opcode)),
// }
}
};
}