macho: calculate UUID excluding stabs and part of contributing strtab

This commit is contained in:
Jakub Konka 2022-12-15 15:10:35 +01:00
parent 09dee74414
commit 660270b7a9
5 changed files with 102 additions and 74 deletions

View File

@ -594,7 +594,6 @@ set(ZIG_STAGE2_SOURCES
"${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig"
"${CMAKE_SOURCE_DIR}/src/link/MachO/zld.zig"
"${CMAKE_SOURCE_DIR}/src/link/Plan9.zig"
"${CMAKE_SOURCE_DIR}/src/link/Plan9/aout.zig"

View File

@ -39,6 +39,7 @@ const Object = @import("MachO/Object.zig");
const LibStub = @import("tapi.zig").LibStub;
const Liveness = @import("../Liveness.zig");
const LlvmObject = @import("../codegen/llvm.zig").Object;
const Md5 = std.crypto.hash.Md5;
const Module = @import("../Module.zig");
const Relocation = @import("MachO/Relocation.zig");
const StringTable = @import("strtab.zig").StringTable;
@ -598,6 +599,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
if (self.cold_start) {
std.crypto.random.bytes(&self.uuid_cmd.uuid);
Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
conformUuid(&self.uuid_cmd.uuid);
}
try lc_writer.writeStruct(self.uuid_cmd);
@ -662,6 +665,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
self.cold_start = false;
}
inline fn conformUuid(out: *[Md5.digest_length]u8) void {
// LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
out[6] = (out[6] & 0x0F) | (3 << 4);
out[8] = (out[8] & 0x3F) | 0x80;
}
pub fn resolveLibSystem(
arena: Allocator,

View File

@ -13,6 +13,7 @@ pub fn ParallelHasher(comptime Hasher: type) type {
return struct {
pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct {
chunk_size: u16 = 0x4000,
file_pos: u64 = 0,
max_file_size: ?u64 = null,
}) !void {
_ = self;
@ -38,7 +39,14 @@ pub fn ParallelHasher(comptime Hasher: type) type {
const fstart = i * opts.chunk_size;
const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
wg.start();
try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg });
try pool.spawn(worker, .{
file,
fstart + opts.file_pos,
buffer[fstart..][0..fsize],
&out[i],
&results[i],
&wg,
});
}
}
for (results) |result| _ = try result;

View File

@ -1,69 +0,0 @@
const std = @import("std");
const fs = std.fs;
const mem = std.mem;
const Allocator = mem.Allocator;
const Compilation = @import("../../Compilation.zig");
const Md5 = std.crypto.hash.Md5;
const Hasher = @import("hasher.zig").ParallelHasher;
/// Somewhat random chunk size for MD5 hash calculation.
pub const chunk_size = 0x4000;
/// Calculates Md5 hash of the file contents.
/// Hash is calculated in a streaming manner which may be slow.
pub fn calcUuidStreaming(file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
const total_num_chunks = mem.alignForward(file_size, chunk_size) / chunk_size;
var hasher = Md5.init(.{});
var buffer: [chunk_size]u8 = undefined;
var i: usize = 0;
while (i < total_num_chunks) : (i += 1) {
const start = i * chunk_size;
const size = if (start + chunk_size > file_size)
file_size - start
else
chunk_size;
const amt = try file.preadAll(&buffer, start);
if (amt != size) return error.InputOutput;
hasher.update(buffer[0..size]);
}
hasher.final(out);
conform(out);
}
/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
/// the final digest.
/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
/// and we will use it too as it seems accepted by Apple OSes.
pub fn calcUuidParallel(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
const total_hashes = mem.alignForward(file_size, chunk_size) / chunk_size;
const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes);
defer comp.gpa.free(hashes);
var hasher = Hasher(Md5){};
try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{
.chunk_size = chunk_size,
.max_file_size = file_size,
});
const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length);
defer comp.gpa.free(final_buffer);
for (hashes) |hash, i| {
mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
}
Md5.hash(final_buffer, out, .{});
conform(out);
}
inline fn conform(out: *[Md5.digest_length]u8) void {
// LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
out[6] = (out[6] & 0x0F) | (3 << 4);
out[8] = (out[8] & 0x3F) | 0x80;
}

View File

@ -16,7 +16,6 @@ const link = @import("../../link.zig");
const load_commands = @import("load_commands.zig");
const thunks = @import("thunks.zig");
const trace = @import("../../tracy.zig").trace;
const uuid = @import("uuid.zig");
const Allocator = mem.Allocator;
const Archive = @import("Archive.zig");
@ -26,7 +25,9 @@ const CodeSignature = @import("CodeSignature.zig");
const Compilation = @import("../../Compilation.zig");
const DwarfInfo = @import("DwarfInfo.zig");
const Dylib = @import("Dylib.zig");
const Hasher = @import("hasher.zig").ParallelHasher;
const MachO = @import("../MachO.zig");
const Md5 = std.crypto.hash.Md5;
const LibStub = @import("../tapi.zig").LibStub;
const Object = @import("Object.zig");
const StringTable = @import("../strtab.zig").StringTable;
@ -2680,17 +2681,98 @@ pub const Zld = struct {
// In Debug we don't really care about reproducibility, so put in a random value
// and be done with it.
std.crypto.random.bytes(&self.uuid_cmd.uuid);
Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
conformUuid(&self.uuid_cmd.uuid);
},
else => {
const seg = self.getLinkeditSegmentPtr();
const file_size = seg.fileoff + seg.filesize;
try uuid.calcUuidParallel(comp, self.file, file_size, &self.uuid_cmd.uuid);
const max_file_size = @intCast(u32, seg.fileoff + seg.filesize);
var hashes = std.ArrayList([Md5.digest_length]u8).init(self.gpa);
defer hashes.deinit();
if (!self.options.strip) {
// First exclusion region will comprise all symbol stabs.
const nlocals = self.dysymtab_cmd.nlocalsym;
const locals_buf = try self.gpa.alloc(u8, nlocals * @sizeOf(macho.nlist_64));
defer self.gpa.free(locals_buf);
const amt = try self.file.preadAll(locals_buf, self.symtab_cmd.symoff);
if (amt != locals_buf.len) return error.InputOutput;
const locals = @ptrCast([*]macho.nlist_64, @alignCast(@alignOf(macho.nlist_64), locals_buf))[0..nlocals];
const istab: usize = for (locals) |local, i| {
if (local.stab()) break i;
} else locals.len;
const nstabs = locals.len - istab;
// Next, a subsection of the strtab.
// We do not care about anything succeeding strtab as it is the code signature data which is
// not part of the UUID calculation anyway.
const stab_stroff = locals[istab].n_strx;
const first_cut = FileSubsection{
.start = 0,
.end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)),
};
const second_cut = FileSubsection{
.start = first_cut.end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)),
.end = self.symtab_cmd.stroff + stab_stroff,
};
for (&[_]FileSubsection{ first_cut, second_cut }) |cut| {
try self.calcUuidHashes(comp, cut, &hashes);
}
} else {
try self.calcUuidHashes(comp, .{ .start = 0, .end = max_file_size }, &hashes);
}
const final_buffer = try self.gpa.alloc(u8, hashes.items.len * Md5.digest_length);
defer self.gpa.free(final_buffer);
for (hashes.items) |hash, i| {
mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
}
Md5.hash(final_buffer, &self.uuid_cmd.uuid, .{});
conformUuid(&self.uuid_cmd.uuid);
},
}
const in_file = @sizeOf(macho.mach_header_64) + offset + @sizeOf(macho.load_command);
try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file);
}
inline fn conformUuid(out: *[Md5.digest_length]u8) void {
// LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
out[6] = (out[6] & 0x0F) | (3 << 4);
out[8] = (out[8] & 0x3F) | 0x80;
}
const FileSubsection = struct {
start: u32,
end: u32,
};
fn calcUuidHashes(
self: *Zld,
comp: *const Compilation,
cut: FileSubsection,
hashes: *std.ArrayList([Md5.digest_length]u8),
) !void {
const chunk_size = 0x4000;
const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size;
try hashes.resize(hashes.items.len + total_hashes);
var hasher = Hasher(Md5){};
try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{
.chunk_size = chunk_size,
.file_pos = cut.start,
.max_file_size = cut.end - cut.start,
});
}
fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void {
const seg = self.getLinkeditSegmentPtr();
// Code signature data has to be 16-bytes aligned for Apple tools to recognize the file