diff --git a/CMakeLists.txt b/CMakeLists.txt index 9aeb481abe..27ed0ac73c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -575,6 +575,8 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/commands.zig" "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/aarch64.zig" + "${CMAKE_SOURCE_DIR}/src/link/MachO/reloc/x86_64.zig" "${CMAKE_SOURCE_DIR}/src/link/Wasm.zig" "${CMAKE_SOURCE_DIR}/src/link/C/zig.h" "${CMAKE_SOURCE_DIR}/src/link/msdos-stub.bin" diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index f58a46cbcf..7b5f23756a 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -310,6 +310,7 @@ pub fn parseSections(self: *Object) !void { break :relocs try reloc.parse( self.allocator, + self.arch.?, section.code, mem.bytesAsSlice(macho.relocation_info, raw_relocs), ); diff --git a/src/link/MachO/Zld.zig b/src/link/MachO/Zld.zig index 8e90e8d017..56a35a3bca 100644 --- a/src/link/MachO/Zld.zig +++ b/src/link/MachO/Zld.zig @@ -10,12 +10,12 @@ const macho = std.macho; const math = std.math; const log = std.log.scoped(.zld); const aarch64 = @import("../../codegen/aarch64.zig"); +const reloc = @import("reloc.zig"); const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); const CodeSignature = @import("CodeSignature.zig"); const Object = @import("Object.zig"); -const Relocation = @import("reloc.zig").Relocation; const Symbol = @import("Symbol.zig"); const Trie = @import("Trie.zig"); @@ -1360,11 +1360,11 @@ fn resolveStubsAndGotEntries(self: *Zld) !void { for (object.sections.items) |sect| { const relocs = sect.relocs orelse continue; - for (relocs) |reloc| { - switch (reloc.@"type") { + for (relocs) |rel| { + switch (rel.@"type") { .unsigned => continue, - .got_page, .got_page_off => { - const sym = object.symtab.items[reloc.target.symbol]; + .got_page, .got_page_off, .got_load, .got => { + const sym = object.symtab.items[rel.target.symbol]; const sym_name = object.getString(sym.n_strx); if (self.got_entries.contains(sym_name)) continue; @@ -1383,7 +1383,9 @@ fn resolveStubsAndGotEntries(self: *Zld) !void { log.debug(" | found GOT entry {s}: {}", .{ sym_name, self.got_entries.get(sym_name) }); }, else => { - const sym = object.symtab.items[reloc.target.symbol]; + if (rel.target != .symbol) continue; + + const sym = object.symtab.items[rel.target.symbol]; const sym_name = object.getString(sym.n_strx); if (!Symbol.isUndef(sym)) continue; @@ -1442,20 +1444,23 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { for (relocs) |rel| { const source_addr = target_sect_addr + rel.offset; - var args: Relocation.ResolveArgs = .{ + var args: reloc.Relocation.ResolveArgs = .{ .source_addr = source_addr, .target_addr = undefined, - .subtractor = null, }; switch (rel.@"type") { .unsigned => { args.target_addr = try self.relocTargetAddr(@intCast(u16, object_id), rel.target); - const unsigned = rel.cast(Relocation.Unsigned) orelse unreachable; + const unsigned = rel.cast(reloc.Unsigned) orelse unreachable; if (unsigned.subtractor) |subtractor| { args.subtractor = try self.relocTargetAddr(@intCast(u16, object_id), subtractor); } + if (rel.target == .section) { + const source_sect = object.sections.items[rel.target.section]; + args.source_sect_addr = source_sect.inner.addr; + } rebases: { var hit: bool = false; @@ -1500,7 +1505,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { try self.threadlocal_offsets.append(self.allocator, args.target_addr - base_addr); } }, - .got_page, .got_page_off => { + .got_page, .got_page_off, .got_load, .got => { const dc_seg = self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; const got = dc_seg.sections.items[self.got_section_index.?]; const sym = object.symtab.items[rel.target.symbol]; @@ -1508,7 +1513,11 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { const entry = self.got_entries.get(sym_name) orelse unreachable; args.target_addr = got.addr + entry.index * @sizeOf(u64); }, - else => { + else => |tt| { + if (tt == .signed and rel.target == .section) { + const source_sect = object.sections.items[rel.target.section]; + args.source_sect_addr = source_sect.inner.addr; + } args.target_addr = try self.relocTargetAddr(@intCast(u16, object_id), rel.target); }, } @@ -1547,7 +1556,7 @@ fn resolveRelocsAndWriteSections(self: *Zld) !void { } } -fn relocTargetAddr(self: *Zld, object_id: u16, target: Relocation.Target) !u64 { +fn relocTargetAddr(self: *Zld, object_id: u16, target: reloc.Relocation.Target) !u64 { const object = self.objects.items[object_id]; const target_addr = blk: { switch (target) { diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 6c796d3adc..57825149d1 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -1,5 +1,4 @@ const std = @import("std"); -const aarch64 = @import("../../codegen/aarch64.zig"); const assert = std.debug.assert; const log = std.log.scoped(.reloc); const macho = std.macho; @@ -7,6 +6,9 @@ const math = std.math; const mem = std.mem; const meta = std.meta; +const aarch64 = @import("reloc/aarch64.zig"); +const x86_64 = @import("reloc/x86_64.zig"); + const Allocator = mem.Allocator; pub const Relocation = struct { @@ -25,7 +27,8 @@ pub const Relocation = struct { pub const ResolveArgs = struct { source_addr: u64, target_addr: u64, - subtractor: ?u64, + subtractor: ?u64 = null, + source_sect_addr: ?u64 = null, }; pub fn resolve(base: *Relocation, args: ResolveArgs) !void { @@ -35,21 +38,28 @@ pub const Relocation = struct { log.debug(" | target address 0x{x}", .{args.target_addr}); if (args.subtractor) |sub| log.debug(" | subtractor address 0x{x}", .{sub}); + if (args.source_sect_addr) |addr| + log.debug(" | source section address 0x{x}", .{addr}); return switch (base.@"type") { - .branch => @fieldParentPtr(Branch, "base", base).resolve(args.source_addr, args.target_addr), - .unsigned => @fieldParentPtr(Unsigned, "base", base).resolve(args.target_addr, args.subtractor), - .page => @fieldParentPtr(Page, "base", base).resolve(args.source_addr, args.target_addr), - .page_off => @fieldParentPtr(PageOff, "base", base).resolve(args.target_addr), - .got_page => @fieldParentPtr(GotPage, "base", base).resolve(args.source_addr, args.target_addr), - .got_page_off => @fieldParentPtr(GotPageOff, "base", base).resolve(args.target_addr), - .tlvp_page => @fieldParentPtr(TlvpPage, "base", base).resolve(args.source_addr, args.target_addr), - .tlvp_page_off => @fieldParentPtr(TlvpPageOff, "base", base).resolve(args.target_addr), + .unsigned => @fieldParentPtr(Unsigned, "base", base).resolve(args), + .branch_aarch64 => @fieldParentPtr(aarch64.Branch, "base", base).resolve(args), + .page => @fieldParentPtr(aarch64.Page, "base", base).resolve(args), + .page_off => @fieldParentPtr(aarch64.PageOff, "base", base).resolve(args), + .got_page => @fieldParentPtr(aarch64.GotPage, "base", base).resolve(args), + .got_page_off => @fieldParentPtr(aarch64.GotPageOff, "base", base).resolve(args), + .tlvp_page => @fieldParentPtr(aarch64.TlvpPage, "base", base).resolve(args), + .tlvp_page_off => @fieldParentPtr(aarch64.TlvpPageOff, "base", base).resolve(args), + .branch_x86_64 => @fieldParentPtr(x86_64.Branch, "base", base).resolve(args), + .signed => @fieldParentPtr(x86_64.Signed, "base", base).resolve(args), + .got_load => @fieldParentPtr(x86_64.GotLoad, "base", base).resolve(args), + .got => @fieldParentPtr(x86_64.Got, "base", base).resolve(args), + .tlv => @fieldParentPtr(x86_64.Tlv, "base", base).resolve(args), }; } pub const Type = enum { - branch, + branch_aarch64, unsigned, page, page_off, @@ -57,6 +67,11 @@ pub const Relocation = struct { got_page_off, tlvp_page, tlvp_page_off, + branch_x86_64, + signed, + got_load, + got, + tlv, }; pub const Target = union(enum) { @@ -71,236 +86,91 @@ pub const Relocation = struct { }; } }; - - pub const Branch = struct { - base: Relocation, - /// Always .UnconditionalBranchImmediate - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .branch; - - pub fn resolve(branch: Branch, source_addr: u64, target_addr: u64) !void { - const displacement = try math.cast(i28, @intCast(i64, target_addr) - @intCast(i64, source_addr)); - - log.debug(" | displacement 0x{x}", .{displacement}); - - var inst = branch.inst; - inst.UnconditionalBranchImmediate.imm26 = @truncate(u26, @bitCast(u28, displacement) >> 2); - mem.writeIntLittle(u32, branch.base.code[0..4], inst.toU32()); - } - }; - - pub const Unsigned = struct { - base: Relocation, - subtractor: ?Target = null, - /// Addend embedded directly in the relocation slot - addend: i64, - /// Extracted from r_length: - /// => 3 implies true - /// => 2 implies false - /// => * is unreachable - is_64bit: bool, - - pub const base_type: Relocation.Type = .unsigned; - - pub fn resolve(unsigned: Unsigned, target_addr: u64, subtractor: ?u64) !void { - const result = if (subtractor) |sub| - @intCast(i64, target_addr) - @intCast(i64, sub) + unsigned.addend - else - @intCast(i64, target_addr) + unsigned.addend; - - log.debug(" | calculated addend 0x{x}", .{unsigned.addend}); - log.debug(" | calculated unsigned value 0x{x}", .{result}); - - if (unsigned.is_64bit) { - mem.writeIntLittle( - u64, - unsigned.base.code[0..8], - @bitCast(u64, result), - ); - } else { - mem.writeIntLittle( - u32, - unsigned.base.code[0..4], - @truncate(u32, @bitCast(u64, result)), - ); - } - } - }; - - pub const Page = struct { - base: Relocation, - addend: ?u32 = null, - /// Always .PCRelativeAddress - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .page; - - pub fn resolve(page: Page, source_addr: u64, target_addr: u64) !void { - const ta = if (page.addend) |a| target_addr + a else target_addr; - const source_page = @intCast(i32, source_addr >> 12); - const target_page = @intCast(i32, ta >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - log.debug(" | calculated addend 0x{x}", .{page.addend}); - log.debug(" | moving by {} pages", .{pages}); - - var inst = page.inst; - inst.PCRelativeAddress.immhi = @truncate(u19, pages >> 2); - inst.PCRelativeAddress.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - } - }; - - pub const PageOff = struct { - base: Relocation, - addend: ?u32 = null, - op_kind: OpKind, - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .page_off; - - pub const OpKind = enum { - arithmetic, - load_store, - }; - - pub fn resolve(page_off: PageOff, target_addr: u64) !void { - const ta = if (page_off.addend) |a| target_addr + a else target_addr; - const narrowed = @truncate(u12, ta); - - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - log.debug(" | {s} opcode", .{page_off.op_kind}); - - var inst = page_off.inst; - if (page_off.op_kind == .arithmetic) { - inst.AddSubtractImmediate.imm12 = narrowed; - } else { - const offset: u12 = blk: { - if (inst.LoadStoreRegister.size == 0) { - if (inst.LoadStoreRegister.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = try math.powi(u4, 2, inst.LoadStoreRegister.size); - break :blk try math.divExact(u12, narrowed, denom); - } - }; - inst.LoadStoreRegister.offset = offset; - } - - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - } - }; - - pub const GotPage = struct { - base: Relocation, - /// Always .PCRelativeAddress - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .got_page; - - pub fn resolve(page: GotPage, source_addr: u64, target_addr: u64) !void { - const source_page = @intCast(i32, source_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - log.debug(" | moving by {} pages", .{pages}); - - var inst = page.inst; - inst.PCRelativeAddress.immhi = @truncate(u19, pages >> 2); - inst.PCRelativeAddress.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - } - }; - - pub const GotPageOff = struct { - base: Relocation, - /// Always .LoadStoreRegister with size = 3 for GOT indirection - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .got_page_off; - - pub fn resolve(page_off: GotPageOff, target_addr: u64) !void { - const narrowed = @truncate(u12, target_addr); - - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - - var inst = page_off.inst; - const offset = try math.divExact(u12, narrowed, 8); - inst.LoadStoreRegister.offset = offset; - - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - } - }; - - pub const TlvpPage = struct { - base: Relocation, - /// Always .PCRelativeAddress - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .tlvp_page; - - pub fn resolve(page: TlvpPage, source_addr: u64, target_addr: u64) !void { - const source_page = @intCast(i32, source_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - log.debug(" | moving by {} pages", .{pages}); - - var inst = page.inst; - inst.PCRelativeAddress.immhi = @truncate(u19, pages >> 2); - inst.PCRelativeAddress.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); - } - }; - - pub const TlvpPageOff = struct { - base: Relocation, - /// Always .AddSubtractImmediate regardless of the source instruction. - /// This means, we always rewrite the instruction to add even if the - /// source instruction was an ldr. - inst: aarch64.Instruction, - - pub const base_type: Relocation.Type = .tlvp_page_off; - - pub fn resolve(page_off: TlvpPageOff, target_addr: u64) !void { - const narrowed = @truncate(u12, target_addr); - - log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); - - var inst = page_off.inst; - inst.AddSubtractImmediate.imm12 = narrowed; - - mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); - } - }; }; -pub fn parse(allocator: *Allocator, code: []u8, relocs: []const macho.relocation_info) ![]*Relocation { +pub const Unsigned = struct { + base: Relocation, + subtractor: ?Relocation.Target = null, + /// Addend embedded directly in the relocation slot + addend: i64, + /// Extracted from r_length: + /// => 3 implies true + /// => 2 implies false + /// => * is unreachable + is_64bit: bool, + + pub const base_type: Relocation.Type = .unsigned; + + pub fn resolve(unsigned: Unsigned, args: Relocation.ResolveArgs) !void { + const addend = if (unsigned.base.target == .section) + unsigned.addend - @intCast(i64, args.source_sect_addr.?) + else + unsigned.addend; + + const result = if (args.subtractor) |subtractor| + @intCast(i64, args.target_addr) - @intCast(i64, subtractor) + addend + else + @intCast(i64, args.target_addr) + addend; + + log.debug(" | calculated addend 0x{x}", .{addend}); + log.debug(" | calculated unsigned value 0x{x}", .{result}); + + if (unsigned.is_64bit) { + mem.writeIntLittle( + u64, + unsigned.base.code[0..8], + @bitCast(u64, result), + ); + } else { + mem.writeIntLittle( + u32, + unsigned.base.code[0..4], + @truncate(u32, @bitCast(u64, result)), + ); + } + } +}; + +pub fn parse( + allocator: *Allocator, + arch: std.Target.Cpu.Arch, + code: []u8, + relocs: []const macho.relocation_info, +) ![]*Relocation { var it = RelocIterator{ .buffer = relocs, }; - var parser = Parser{ - .allocator = allocator, - .it = &it, - .code = code, - .parsed = std.ArrayList(*Relocation).init(allocator), - }; - defer parser.deinit(); - try parser.parse(); + switch (arch) { + .aarch64 => { + var parser = aarch64.Parser{ + .allocator = allocator, + .it = &it, + .code = code, + .parsed = std.ArrayList(*Relocation).init(allocator), + }; + defer parser.deinit(); + try parser.parse(); - return parser.parsed.toOwnedSlice(); + return parser.parsed.toOwnedSlice(); + }, + .x86_64 => { + var parser = x86_64.Parser{ + .allocator = allocator, + .it = &it, + .code = code, + .parsed = std.ArrayList(*Relocation).init(allocator), + }; + defer parser.deinit(); + try parser.parse(); + + return parser.parsed.toOwnedSlice(); + }, + else => unreachable, + } } -const RelocIterator = struct { +pub const RelocIterator = struct { buffer: []const macho.relocation_info, index: i64 = -1, @@ -308,7 +178,8 @@ const RelocIterator = struct { self.index += 1; if (self.index < self.buffer.len) { const reloc = self.buffer[@intCast(u64, self.index)]; - log.debug("{s}", .{@intToEnum(macho.reloc_type_arm64, reloc.r_type)}); + log.debug("relocation", .{}); + log.debug(" | type = {}", .{reloc.r_type}); log.debug(" | offset = {}", .{reloc.r_address}); log.debug(" | PC = {}", .{reloc.r_pcrel == 1}); log.debug(" | length = {}", .{reloc.r_length}); @@ -319,423 +190,8 @@ const RelocIterator = struct { return null; } - pub fn peek(self: *RelocIterator) ?macho.reloc_type_arm64 { - if (self.index + 1 < self.buffer.len) { - const reloc = self.buffer[@intCast(u64, self.index + 1)]; - const tt = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - return tt; - } - return null; + pub fn peek(self: RelocIterator) macho.relocation_info { + assert(self.index + 1 < self.buffer.len); + return self.buffer[@intCast(u64, self.index + 1)]; } }; - -const Parser = struct { - allocator: *Allocator, - it: *RelocIterator, - code: []u8, - parsed: std.ArrayList(*Relocation), - addend: ?u32 = null, - subtractor: ?Relocation.Target = null, - - fn deinit(parser: *Parser) void { - parser.parsed.deinit(); - } - - fn parse(parser: *Parser) !void { - while (parser.it.next()) |reloc| { - switch (@intToEnum(macho.reloc_type_arm64, reloc.r_type)) { - .ARM64_RELOC_BRANCH26 => { - try parser.parseBranch(reloc); - }, - .ARM64_RELOC_SUBTRACTOR => { - try parser.parseSubtractor(reloc); - }, - .ARM64_RELOC_UNSIGNED => { - try parser.parseUnsigned(reloc); - }, - .ARM64_RELOC_ADDEND => { - try parser.parseAddend(reloc); - }, - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - try parser.parsePage(reloc); - }, - .ARM64_RELOC_PAGEOFF12 => { - try parser.parsePageOff(reloc); - }, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { - try parser.parseGotLoadPageOff(reloc); - }, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { - try parser.parseTlvpLoadPageOff(reloc); - }, - .ARM64_RELOC_POINTER_TO_GOT => { - return error.ToDoRelocPointerToGot; - }, - } - } - } - - fn parseAddend(parser: *Parser, reloc: macho.relocation_info) !void { - const reloc_type = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - assert(reloc_type == .ARM64_RELOC_ADDEND); - assert(reloc.r_pcrel == 0); - assert(reloc.r_extern == 0); - assert(parser.addend == null); - - parser.addend = reloc.r_symbolnum; - - // Verify ADDEND is followed by a load. - if (parser.it.peek()) |tt| { - switch (tt) { - .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, - else => |other| { - log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{other}); - return error.UnexpectedRelocationType; - }, - } - } else { - log.err("unexpected end of stream", .{}); - return error.UnexpectedEndOfStream; - } - } - - fn parseBranch(parser: *Parser, reloc: macho.relocation_info) !void { - const reloc_type = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - assert(reloc_type == .ARM64_RELOC_BRANCH26); - assert(reloc.r_pcrel == 1); - assert(reloc.r_length == 2); - - const offset = @intCast(u32, reloc.r_address); - const inst = parser.code[offset..][0..4]; - const parsed_inst = aarch64.Instruction{ .UnconditionalBranchImmediate = mem.bytesToValue( - meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.UnconditionalBranchImmediate, - ), - inst, - ) }; - - var branch = try parser.allocator.create(Relocation.Branch); - errdefer parser.allocator.destroy(branch); - - const target = Relocation.Target.from_reloc(reloc); - - branch.* = .{ - .base = .{ - .@"type" = .branch, - .code = inst, - .offset = @intCast(u32, reloc.r_address), - .target = target, - }, - .inst = parsed_inst, - }; - - log.debug(" | emitting {}", .{branch}); - try parser.parsed.append(&branch.base); - } - - fn parsePage(parser: *Parser, reloc: macho.relocation_info) !void { - assert(reloc.r_pcrel == 1); - assert(reloc.r_length == 2); - - const reloc_type = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - const target = Relocation.Target.from_reloc(reloc); - - const offset = @intCast(u32, reloc.r_address); - const inst = parser.code[offset..][0..4]; - const parsed_inst = aarch64.Instruction{ .PCRelativeAddress = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.PCRelativeAddress, - ), inst) }; - - const ptr: *Relocation = ptr: { - switch (reloc_type) { - .ARM64_RELOC_PAGE21 => { - defer { - // Reset parser's addend state - parser.addend = null; - } - var page = try parser.allocator.create(Relocation.Page); - errdefer parser.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .page, - .code = inst, - .offset = offset, - .target = target, - }, - .addend = parser.addend, - .inst = parsed_inst, - }; - - log.debug(" | emitting {}", .{page}); - - break :ptr &page.base; - }, - .ARM64_RELOC_GOT_LOAD_PAGE21 => { - var page = try parser.allocator.create(Relocation.GotPage); - errdefer parser.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .got_page, - .code = inst, - .offset = offset, - .target = target, - }, - .inst = parsed_inst, - }; - - log.debug(" | emitting {}", .{page}); - - break :ptr &page.base; - }, - .ARM64_RELOC_TLVP_LOAD_PAGE21 => { - var page = try parser.allocator.create(Relocation.TlvpPage); - errdefer parser.allocator.destroy(page); - - page.* = .{ - .base = .{ - .@"type" = .tlvp_page, - .code = inst, - .offset = offset, - .target = target, - }, - .inst = parsed_inst, - }; - - log.debug(" | emitting {}", .{page}); - - break :ptr &page.base; - }, - else => unreachable, - } - }; - - try parser.parsed.append(ptr); - } - - fn parsePageOff(parser: *Parser, reloc: macho.relocation_info) !void { - defer { - // Reset parser's addend state - parser.addend = null; - } - - const reloc_type = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - assert(reloc_type == .ARM64_RELOC_PAGEOFF12); - assert(reloc.r_pcrel == 0); - assert(reloc.r_length == 2); - - const offset = @intCast(u32, reloc.r_address); - const inst = parser.code[offset..][0..4]; - - var op_kind: Relocation.PageOff.OpKind = undefined; - var parsed_inst: aarch64.Instruction = undefined; - if (isArithmeticOp(inst)) { - op_kind = .arithmetic; - parsed_inst = .{ .AddSubtractImmediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.AddSubtractImmediate, - ), inst) }; - } else { - op_kind = .load_store; - parsed_inst = .{ .LoadStoreRegister = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.LoadStoreRegister, - ), inst) }; - } - const target = Relocation.Target.from_reloc(reloc); - - var page_off = try parser.allocator.create(Relocation.PageOff); - errdefer parser.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .page_off, - .code = inst, - .offset = offset, - .target = target, - }, - .op_kind = op_kind, - .inst = parsed_inst, - .addend = parser.addend, - }; - - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); - } - - fn parseGotLoadPageOff(parser: *Parser, reloc: macho.relocation_info) !void { - const reloc_type = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - assert(reloc_type == .ARM64_RELOC_GOT_LOAD_PAGEOFF12); - assert(reloc.r_pcrel == 0); - assert(reloc.r_length == 2); - - const offset = @intCast(u32, reloc.r_address); - const inst = parser.code[offset..][0..4]; - assert(!isArithmeticOp(inst)); - - const parsed_inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.LoadStoreRegister, - ), inst); - assert(parsed_inst.size == 3); - - const target = Relocation.Target.from_reloc(reloc); - - var page_off = try parser.allocator.create(Relocation.GotPageOff); - errdefer parser.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .got_page_off, - .code = inst, - .offset = offset, - .target = target, - }, - .inst = .{ - .LoadStoreRegister = parsed_inst, - }, - }; - - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); - } - - fn parseTlvpLoadPageOff(parser: *Parser, reloc: macho.relocation_info) !void { - const reloc_type = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - assert(reloc_type == .ARM64_RELOC_TLVP_LOAD_PAGEOFF12); - assert(reloc.r_pcrel == 0); - assert(reloc.r_length == 2); - - const RegInfo = struct { - rd: u5, - rn: u5, - size: u1, - }; - - const offset = @intCast(u32, reloc.r_address); - const inst = parser.code[offset..][0..4]; - const parsed: RegInfo = parsed: { - if (isArithmeticOp(inst)) { - const parsed_inst = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.AddSubtractImmediate, - ), inst); - break :parsed .{ - .rd = parsed_inst.rd, - .rn = parsed_inst.rn, - .size = parsed_inst.sf, - }; - } else { - const parsed_inst = mem.bytesAsValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.LoadStoreRegister, - ), inst); - break :parsed .{ - .rd = parsed_inst.rt, - .rn = parsed_inst.rn, - .size = @truncate(u1, parsed_inst.size), - }; - } - }; - - const target = Relocation.Target.from_reloc(reloc); - - var page_off = try parser.allocator.create(Relocation.TlvpPageOff); - errdefer parser.allocator.destroy(page_off); - - page_off.* = .{ - .base = .{ - .@"type" = .tlvp_page_off, - .code = inst, - .offset = @intCast(u32, reloc.r_address), - .target = target, - }, - .inst = .{ - .AddSubtractImmediate = .{ - .rd = parsed.rd, - .rn = parsed.rn, - .imm12 = 0, // This will be filled when target addresses are known. - .sh = 0, - .s = 0, - .op = 0, - .sf = parsed.size, - }, - }, - }; - - log.debug(" | emitting {}", .{page_off}); - try parser.parsed.append(&page_off.base); - } - - fn parseSubtractor(parser: *Parser, reloc: macho.relocation_info) !void { - const reloc_type = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - assert(reloc_type == .ARM64_RELOC_SUBTRACTOR); - assert(reloc.r_pcrel == 0); - assert(parser.subtractor == null); - - parser.subtractor = Relocation.Target.from_reloc(reloc); - - // Verify SUBTRACTOR is followed by UNSIGNED. - if (parser.it.peek()) |tt| { - if (tt != .ARM64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{tt}); - return error.UnexpectedRelocationType; - } - } else { - log.err("unexpected end of stream", .{}); - return error.UnexpectedEndOfStream; - } - } - - fn parseUnsigned(parser: *Parser, reloc: macho.relocation_info) !void { - defer { - // Reset parser's subtractor state - parser.subtractor = null; - } - - const reloc_type = @intToEnum(macho.reloc_type_arm64, reloc.r_type); - assert(reloc_type == .ARM64_RELOC_UNSIGNED); - assert(reloc.r_pcrel == 0); - - var unsigned = try parser.allocator.create(Relocation.Unsigned); - errdefer parser.allocator.destroy(unsigned); - - const target = Relocation.Target.from_reloc(reloc); - const is_64bit: bool = switch (reloc.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - const offset = @intCast(u32, reloc.r_address); - const addend: i64 = if (is_64bit) - mem.readIntLittle(i64, parser.code[offset..][0..8]) - else - mem.readIntLittle(i32, parser.code[offset..][0..4]); - - unsigned.* = .{ - .base = .{ - .@"type" = .unsigned, - .code = if (is_64bit) parser.code[offset..][0..8] else parser.code[offset..][0..4], - .offset = offset, - .target = target, - }, - .subtractor = parser.subtractor, - .is_64bit = is_64bit, - .addend = addend, - }; - - log.debug(" | emitting {}", .{unsigned}); - try parser.parsed.append(&unsigned.base); - } -}; - -fn isArithmeticOp(inst: *const [4]u8) callconv(.Inline) bool { - const group_decode = @truncate(u5, inst[3]); - return ((group_decode >> 2) == 4); -} diff --git a/src/link/MachO/reloc/aarch64.zig b/src/link/MachO/reloc/aarch64.zig new file mode 100644 index 0000000000..b875ab0109 --- /dev/null +++ b/src/link/MachO/reloc/aarch64.zig @@ -0,0 +1,587 @@ +const std = @import("std"); +const aarch64 = @import("../../../codegen/aarch64.zig"); +const assert = std.debug.assert; +const log = std.log.scoped(.reloc); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; +const reloc = @import("../reloc.zig"); + +const Allocator = mem.Allocator; +const Relocation = reloc.Relocation; + +pub const Branch = struct { + base: Relocation, + /// Always .UnconditionalBranchImmediate + inst: aarch64.Instruction, + + pub const base_type: Relocation.Type = .branch_aarch64; + + pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { + const displacement = try math.cast(i28, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + + log.debug(" | displacement 0x{x}", .{displacement}); + + var inst = branch.inst; + inst.UnconditionalBranchImmediate.imm26 = @truncate(u26, @bitCast(u28, displacement) >> 2); + mem.writeIntLittle(u32, branch.base.code[0..4], inst.toU32()); + } +}; + +pub const Page = struct { + base: Relocation, + addend: ?u32 = null, + /// Always .PCRelativeAddress + inst: aarch64.Instruction, + + pub const base_type: Relocation.Type = .page; + + pub fn resolve(page: Page, args: Relocation.ResolveArgs) !void { + const target_addr = if (page.addend) |addend| args.target_addr + addend else args.target_addr; + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + log.debug(" | calculated addend 0x{x}", .{page.addend}); + log.debug(" | moving by {} pages", .{pages}); + + var inst = page.inst; + inst.PCRelativeAddress.immhi = @truncate(u19, pages >> 2); + inst.PCRelativeAddress.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + } +}; + +pub const PageOff = struct { + base: Relocation, + addend: ?u32 = null, + op_kind: OpKind, + inst: aarch64.Instruction, + + pub const base_type: Relocation.Type = .page_off; + + pub const OpKind = enum { + arithmetic, + load_store, + }; + + pub fn resolve(page_off: PageOff, args: Relocation.ResolveArgs) !void { + const target_addr = if (page_off.addend) |addend| args.target_addr + addend else args.target_addr; + const narrowed = @truncate(u12, target_addr); + + log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); + log.debug(" | {s} opcode", .{page_off.op_kind}); + + var inst = page_off.inst; + if (page_off.op_kind == .arithmetic) { + inst.AddSubtractImmediate.imm12 = narrowed; + } else { + const offset: u12 = blk: { + if (inst.LoadStoreRegister.size == 0) { + if (inst.LoadStoreRegister.v == 1) { + // 128-bit SIMD is scaled by 16. + break :blk try math.divExact(u12, narrowed, 16); + } + // Otherwise, 8-bit SIMD or ldrb. + break :blk narrowed; + } else { + const denom: u4 = try math.powi(u4, 2, inst.LoadStoreRegister.size); + break :blk try math.divExact(u12, narrowed, denom); + } + }; + inst.LoadStoreRegister.offset = offset; + } + + mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + } +}; + +pub const GotPage = struct { + base: Relocation, + /// Always .PCRelativeAddress + inst: aarch64.Instruction, + + pub const base_type: Relocation.Type = .got_page; + + pub fn resolve(page: GotPage, args: Relocation.ResolveArgs) !void { + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, args.target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + log.debug(" | moving by {} pages", .{pages}); + + var inst = page.inst; + inst.PCRelativeAddress.immhi = @truncate(u19, pages >> 2); + inst.PCRelativeAddress.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + } +}; + +pub const GotPageOff = struct { + base: Relocation, + /// Always .LoadStoreRegister with size = 3 for GOT indirection + inst: aarch64.Instruction, + + pub const base_type: Relocation.Type = .got_page_off; + + pub fn resolve(page_off: GotPageOff, args: Relocation.ResolveArgs) !void { + const narrowed = @truncate(u12, args.target_addr); + + log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); + + var inst = page_off.inst; + const offset = try math.divExact(u12, narrowed, 8); + inst.LoadStoreRegister.offset = offset; + + mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + } +}; + +pub const TlvpPage = struct { + base: Relocation, + /// Always .PCRelativeAddress + inst: aarch64.Instruction, + + pub const base_type: Relocation.Type = .tlvp_page; + + pub fn resolve(page: TlvpPage, args: Relocation.ResolveArgs) !void { + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, args.target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + log.debug(" | moving by {} pages", .{pages}); + + var inst = page.inst; + inst.PCRelativeAddress.immhi = @truncate(u19, pages >> 2); + inst.PCRelativeAddress.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, page.base.code[0..4], inst.toU32()); + } +}; + +pub const TlvpPageOff = struct { + base: Relocation, + /// Always .AddSubtractImmediate regardless of the source instruction. + /// This means, we always rewrite the instruction to add even if the + /// source instruction was an ldr. + inst: aarch64.Instruction, + + pub const base_type: Relocation.Type = .tlvp_page_off; + + pub fn resolve(page_off: TlvpPageOff, args: Relocation.ResolveArgs) !void { + const narrowed = @truncate(u12, args.target_addr); + + log.debug(" | narrowed address within the page 0x{x}", .{narrowed}); + + var inst = page_off.inst; + inst.AddSubtractImmediate.imm12 = narrowed; + + mem.writeIntLittle(u32, page_off.base.code[0..4], inst.toU32()); + } +}; + +pub const Parser = struct { + allocator: *Allocator, + it: *reloc.RelocIterator, + code: []u8, + parsed: std.ArrayList(*Relocation), + addend: ?u32 = null, + subtractor: ?Relocation.Target = null, + + pub fn deinit(parser: *Parser) void { + parser.parsed.deinit(); + } + + pub fn parse(parser: *Parser) !void { + while (parser.it.next()) |rel| { + switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_BRANCH26 => { + try parser.parseBranch(rel); + }, + .ARM64_RELOC_SUBTRACTOR => { + try parser.parseSubtractor(rel); + }, + .ARM64_RELOC_UNSIGNED => { + try parser.parseUnsigned(rel); + }, + .ARM64_RELOC_ADDEND => { + try parser.parseAddend(rel); + }, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + try parser.parsePage(rel); + }, + .ARM64_RELOC_PAGEOFF12 => { + try parser.parsePageOff(rel); + }, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { + try parser.parseGotLoadPageOff(rel); + }, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { + try parser.parseTlvpLoadPageOff(rel); + }, + .ARM64_RELOC_POINTER_TO_GOT => { + return error.ToDoRelocPointerToGot; + }, + } + } + } + + fn parseAddend(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + assert(rel_type == .ARM64_RELOC_ADDEND); + assert(rel.r_pcrel == 0); + assert(rel.r_extern == 0); + assert(parser.addend == null); + + parser.addend = rel.r_symbolnum; + + // Verify ADDEND is followed by a load. + const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type); + switch (next) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => { + log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); + return error.UnexpectedRelocationType; + }, + } + } + + fn parseBranch(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + assert(rel_type == .ARM64_RELOC_BRANCH26); + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + const parsed_inst = aarch64.Instruction{ .UnconditionalBranchImmediate = mem.bytesToValue( + meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.UnconditionalBranchImmediate, + ), + inst, + ) }; + + var branch = try parser.allocator.create(Branch); + errdefer parser.allocator.destroy(branch); + + const target = Relocation.Target.from_reloc(rel); + + branch.* = .{ + .base = .{ + .@"type" = .branch_aarch64, + .code = inst, + .offset = offset, + .target = target, + }, + .inst = parsed_inst, + }; + + log.debug(" | emitting {}", .{branch}); + try parser.parsed.append(&branch.base); + } + + fn parsePage(parser: *Parser, rel: macho.relocation_info) !void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + const target = Relocation.Target.from_reloc(rel); + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + const parsed_inst = aarch64.Instruction{ .PCRelativeAddress = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.PCRelativeAddress, + ), inst) }; + + const ptr: *Relocation = ptr: { + switch (rel_type) { + .ARM64_RELOC_PAGE21 => { + defer { + // Reset parser's addend state + parser.addend = null; + } + var page = try parser.allocator.create(Page); + errdefer parser.allocator.destroy(page); + + page.* = .{ + .base = .{ + .@"type" = .page, + .code = inst, + .offset = offset, + .target = target, + }, + .addend = parser.addend, + .inst = parsed_inst, + }; + + log.debug(" | emitting {}", .{page}); + + break :ptr &page.base; + }, + .ARM64_RELOC_GOT_LOAD_PAGE21 => { + var page = try parser.allocator.create(GotPage); + errdefer parser.allocator.destroy(page); + + page.* = .{ + .base = .{ + .@"type" = .got_page, + .code = inst, + .offset = offset, + .target = target, + }, + .inst = parsed_inst, + }; + + log.debug(" | emitting {}", .{page}); + + break :ptr &page.base; + }, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => { + var page = try parser.allocator.create(TlvpPage); + errdefer parser.allocator.destroy(page); + + page.* = .{ + .base = .{ + .@"type" = .tlvp_page, + .code = inst, + .offset = offset, + .target = target, + }, + .inst = parsed_inst, + }; + + log.debug(" | emitting {}", .{page}); + + break :ptr &page.base; + }, + else => unreachable, + } + }; + + try parser.parsed.append(ptr); + } + + fn parsePageOff(parser: *Parser, rel: macho.relocation_info) !void { + defer { + // Reset parser's addend state + parser.addend = null; + } + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + assert(rel_type == .ARM64_RELOC_PAGEOFF12); + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + + var op_kind: PageOff.OpKind = undefined; + var parsed_inst: aarch64.Instruction = undefined; + if (isArithmeticOp(inst)) { + op_kind = .arithmetic; + parsed_inst = .{ .AddSubtractImmediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.AddSubtractImmediate, + ), inst) }; + } else { + op_kind = .load_store; + parsed_inst = .{ .LoadStoreRegister = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.LoadStoreRegister, + ), inst) }; + } + const target = Relocation.Target.from_reloc(rel); + + var page_off = try parser.allocator.create(PageOff); + errdefer parser.allocator.destroy(page_off); + + page_off.* = .{ + .base = .{ + .@"type" = .page_off, + .code = inst, + .offset = offset, + .target = target, + }, + .op_kind = op_kind, + .inst = parsed_inst, + .addend = parser.addend, + }; + + log.debug(" | emitting {}", .{page_off}); + try parser.parsed.append(&page_off.base); + } + + fn parseGotLoadPageOff(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + assert(rel_type == .ARM64_RELOC_GOT_LOAD_PAGEOFF12); + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + assert(!isArithmeticOp(inst)); + + const parsed_inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.LoadStoreRegister, + ), inst); + assert(parsed_inst.size == 3); + + const target = Relocation.Target.from_reloc(rel); + + var page_off = try parser.allocator.create(GotPageOff); + errdefer parser.allocator.destroy(page_off); + + page_off.* = .{ + .base = .{ + .@"type" = .got_page_off, + .code = inst, + .offset = offset, + .target = target, + }, + .inst = .{ + .LoadStoreRegister = parsed_inst, + }, + }; + + log.debug(" | emitting {}", .{page_off}); + try parser.parsed.append(&page_off.base); + } + + fn parseTlvpLoadPageOff(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + assert(rel_type == .ARM64_RELOC_TLVP_LOAD_PAGEOFF12); + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const RegInfo = struct { + rd: u5, + rn: u5, + size: u1, + }; + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + const parsed: RegInfo = parsed: { + if (isArithmeticOp(inst)) { + const parsed_inst = mem.bytesAsValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.AddSubtractImmediate, + ), inst); + break :parsed .{ + .rd = parsed_inst.rd, + .rn = parsed_inst.rn, + .size = parsed_inst.sf, + }; + } else { + const parsed_inst = mem.bytesAsValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.LoadStoreRegister, + ), inst); + break :parsed .{ + .rd = parsed_inst.rt, + .rn = parsed_inst.rn, + .size = @truncate(u1, parsed_inst.size), + }; + } + }; + + const target = Relocation.Target.from_reloc(rel); + + var page_off = try parser.allocator.create(TlvpPageOff); + errdefer parser.allocator.destroy(page_off); + + page_off.* = .{ + .base = .{ + .@"type" = .tlvp_page_off, + .code = inst, + .offset = offset, + .target = target, + }, + .inst = .{ + .AddSubtractImmediate = .{ + .rd = parsed.rd, + .rn = parsed.rn, + .imm12 = 0, // This will be filled when target addresses are known. + .sh = 0, + .s = 0, + .op = 0, + .sf = parsed.size, + }, + }, + }; + + log.debug(" | emitting {}", .{page_off}); + try parser.parsed.append(&page_off.base); + } + + fn parseSubtractor(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + assert(rel_type == .ARM64_RELOC_SUBTRACTOR); + assert(rel.r_pcrel == 0); + assert(parser.subtractor == null); + + parser.subtractor = Relocation.Target.from_reloc(rel); + + // Verify SUBTRACTOR is followed by UNSIGNED. + const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type); + if (next != .ARM64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + } + + fn parseUnsigned(parser: *Parser, rel: macho.relocation_info) !void { + defer { + // Reset parser's subtractor state + parser.subtractor = null; + } + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + assert(rel_type == .ARM64_RELOC_UNSIGNED); + assert(rel.r_pcrel == 0); + + var unsigned = try parser.allocator.create(reloc.Unsigned); + errdefer parser.allocator.destroy(unsigned); + + const target = Relocation.Target.from_reloc(rel); + const is_64bit: bool = switch (rel.r_length) { + 3 => true, + 2 => false, + else => unreachable, + }; + const offset = @intCast(u32, rel.r_address); + const addend: i64 = if (is_64bit) + mem.readIntLittle(i64, parser.code[offset..][0..8]) + else + mem.readIntLittle(i32, parser.code[offset..][0..4]); + + unsigned.* = .{ + .base = .{ + .@"type" = .unsigned, + .code = if (is_64bit) parser.code[offset..][0..8] else parser.code[offset..][0..4], + .offset = offset, + .target = target, + }, + .subtractor = parser.subtractor, + .is_64bit = is_64bit, + .addend = addend, + }; + + log.debug(" | emitting {}", .{unsigned}); + try parser.parsed.append(&unsigned.base); + } +}; + +fn isArithmeticOp(inst: *const [4]u8) callconv(.Inline) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); +} diff --git a/src/link/MachO/reloc/x86_64.zig b/src/link/MachO/reloc/x86_64.zig new file mode 100644 index 0000000000..d42781a042 --- /dev/null +++ b/src/link/MachO/reloc/x86_64.zig @@ -0,0 +1,343 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.reloc); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; +const reloc = @import("../reloc.zig"); + +const Allocator = mem.Allocator; +const Relocation = reloc.Relocation; + +pub const Branch = struct { + base: Relocation, + + pub const base_type: Relocation.Type = .branch_x86_64; + + pub fn resolve(branch: Branch, args: Relocation.ResolveArgs) !void { + const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); + log.debug(" | displacement 0x{x}", .{displacement}); + mem.writeIntLittle(u32, branch.base.code[0..4], @bitCast(u32, displacement)); + } +}; + +pub const Signed = struct { + base: Relocation, + addend: i32, + correction: i4, + + pub const base_type: Relocation.Type = .signed; + + pub fn resolve(signed: Signed, args: Relocation.ResolveArgs) !void { + const target_addr = target_addr: { + if (signed.base.target == .section) { + const source_target = @intCast(i64, signed.base.offset) + signed.addend + 4 + signed.correction; + const source_disp = source_target - @intCast(i64, args.source_sect_addr.?); + break :target_addr @intCast(i64, args.target_addr) + source_disp; + } + break :target_addr @intCast(i64, args.target_addr) + signed.addend; + }; + const displacement = try math.cast(i32, target_addr - @intCast(i64, args.source_addr) - signed.correction - 4); + + log.debug(" | calculated addend 0x{x}", .{signed.addend}); + log.debug(" | calculated correction 0x{x}", .{signed.correction}); + log.debug(" | displacement 0x{x}", .{displacement}); + + mem.writeIntLittle(u32, signed.base.code[0..4], @bitCast(u32, displacement)); + } +}; + +pub const GotLoad = struct { + base: Relocation, + op: *u8, + + pub const base_type: Relocation.Type = .got_load; + + pub fn resolve(got_load: GotLoad, args: Relocation.ResolveArgs) !void { + const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); + log.debug(" | displacement 0x{x}", .{displacement}); + mem.writeIntLittle(u32, got_load.base.code[0..4], @bitCast(u32, displacement)); + } +}; + +pub const Got = struct { + base: Relocation, + + pub const base_type: Relocation.Type = .got; + + pub fn resolve(got: Got, args: Relocation.ResolveArgs) !void { + const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); + log.debug(" | displacement 0x{x}", .{displacement}); + mem.writeIntLittle(u32, got.base.code[0..4], @bitCast(u32, displacement)); + } +}; + +pub const Tlv = struct { + base: Relocation, + op: *u8, + + pub const base_type: Relocation.Type = .tlv; + + pub fn resolve(tlv: Tlv, args: Relocation.ResolveArgs) !void { + // We need to rewrite the opcode from movq to leaq. + tlv.op.* = 0x8d; + log.debug(" | rewriting op to leaq", .{}); + + const displacement = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4); + log.debug(" | displacement 0x{x}", .{displacement}); + + mem.writeIntLittle(u32, tlv.base.code[0..4], @bitCast(u32, displacement)); + } +}; + +pub const Parser = struct { + allocator: *Allocator, + it: *reloc.RelocIterator, + code: []u8, + parsed: std.ArrayList(*Relocation), + subtractor: ?Relocation.Target = null, + + pub fn deinit(parser: *Parser) void { + parser.parsed.deinit(); + } + + pub fn parse(parser: *Parser) !void { + while (parser.it.next()) |rel| { + switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_BRANCH => { + try parser.parseBranch(rel); + }, + .X86_64_RELOC_SUBTRACTOR => { + try parser.parseSubtractor(rel); + }, + .X86_64_RELOC_UNSIGNED => { + try parser.parseUnsigned(rel); + }, + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + try parser.parseSigned(rel); + }, + .X86_64_RELOC_GOT_LOAD => { + try parser.parseGotLoad(rel); + }, + .X86_64_RELOC_GOT => { + try parser.parseGot(rel); + }, + .X86_64_RELOC_TLV => { + try parser.parseTlv(rel); + }, + } + } + } + + fn parseBranch(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + assert(rel_type == .X86_64_RELOC_BRANCH); + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + + var branch = try parser.allocator.create(Branch); + errdefer parser.allocator.destroy(branch); + + const target = Relocation.Target.from_reloc(rel); + + branch.* = .{ + .base = .{ + .@"type" = .branch_x86_64, + .code = inst, + .offset = offset, + .target = target, + }, + }; + + log.debug(" | emitting {}", .{branch}); + try parser.parsed.append(&branch.base); + } + + fn parseSigned(parser: *Parser, rel: macho.relocation_info) !void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const target = Relocation.Target.from_reloc(rel); + const is_extern = rel.r_extern == 1; + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + const addend = mem.readIntLittle(i32, inst); + + const correction: i4 = correction: { + if (is_extern) break :correction 0; + + const corr: i4 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + break :correction corr; + }; + + var signed = try parser.allocator.create(Signed); + errdefer parser.allocator.destroy(signed); + + signed.* = .{ + .base = .{ + .@"type" = .signed, + .code = inst, + .offset = offset, + .target = target, + }, + .addend = addend, + .correction = correction, + }; + + log.debug(" | emitting {}", .{signed}); + try parser.parsed.append(&signed.base); + } + + fn parseGotLoad(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + assert(rel_type == .X86_64_RELOC_GOT_LOAD); + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + const target = Relocation.Target.from_reloc(rel); + + var got_load = try parser.allocator.create(GotLoad); + errdefer parser.allocator.destroy(got_load); + + got_load.* = .{ + .base = .{ + .@"type" = .got_load, + .code = inst, + .offset = offset, + .target = target, + }, + .op = &parser.code[offset - 2], + }; + + log.debug(" | emitting {}", .{got_load}); + try parser.parsed.append(&got_load.base); + } + + fn parseGot(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + assert(rel_type == .X86_64_RELOC_GOT); + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + const target = Relocation.Target.from_reloc(rel); + + var got = try parser.allocator.create(Got); + errdefer parser.allocator.destroy(got); + + got.* = .{ + .base = .{ + .@"type" = .got, + .code = inst, + .offset = offset, + .target = target, + }, + }; + + log.debug(" | emitting {}", .{got}); + try parser.parsed.append(&got.base); + } + + fn parseTlv(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + assert(rel_type == .X86_64_RELOC_TLV); + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const offset = @intCast(u32, rel.r_address); + const inst = parser.code[offset..][0..4]; + const target = Relocation.Target.from_reloc(rel); + + var tlv = try parser.allocator.create(Tlv); + errdefer parser.allocator.destroy(tlv); + + tlv.* = .{ + .base = .{ + .@"type" = .tlv, + .code = inst, + .offset = offset, + .target = target, + }, + .op = &parser.code[offset - 2], + }; + + log.debug(" | emitting {}", .{tlv}); + try parser.parsed.append(&tlv.base); + } + + fn parseSubtractor(parser: *Parser, rel: macho.relocation_info) !void { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + assert(rel_type == .X86_64_RELOC_SUBTRACTOR); + assert(rel.r_pcrel == 0); + assert(parser.subtractor == null); + + parser.subtractor = Relocation.Target.from_reloc(rel); + + // Verify SUBTRACTOR is followed by UNSIGNED. + const next = @intToEnum(macho.reloc_type_x86_64, parser.it.peek().r_type); + if (next != .X86_64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + } + + fn parseUnsigned(parser: *Parser, rel: macho.relocation_info) !void { + defer { + // Reset parser's subtractor state + parser.subtractor = null; + } + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + assert(rel_type == .X86_64_RELOC_UNSIGNED); + assert(rel.r_pcrel == 0); + + var unsigned = try parser.allocator.create(reloc.Unsigned); + errdefer parser.allocator.destroy(unsigned); + + const target = Relocation.Target.from_reloc(rel); + const is_64bit: bool = switch (rel.r_length) { + 3 => true, + 2 => false, + else => unreachable, + }; + const offset = @intCast(u32, rel.r_address); + const addend: i64 = if (is_64bit) + mem.readIntLittle(i64, parser.code[offset..][0..8]) + else + mem.readIntLittle(i32, parser.code[offset..][0..4]); + + unsigned.* = .{ + .base = .{ + .@"type" = .unsigned, + .code = if (is_64bit) parser.code[offset..][0..8] else parser.code[offset..][0..4], + .offset = offset, + .target = target, + }, + .subtractor = parser.subtractor, + .is_64bit = is_64bit, + .addend = addend, + }; + + log.debug(" | emitting {}", .{unsigned}); + try parser.parsed.append(&unsigned.base); + } +};