macho: generalize parallel hasher; impl parallel MD5-like hash

By pulling out the parallel hashing setup from `CodeSignature.zig`, we can now reuse it different places across MachO linker (for now; I can totally see its usefulness beyond MachO, eg. in COFF or ELF too). The parallel hasher is generic over actual hasher such as Sha256 or MD5. The implementation is kept as it was. For UUID calculation, depending on the linking mode: * incremental - since it only supports debug mode, we don't bother with MD5 hashing of the contents, and populate it with random data but only once per a sequence of in-place binary patches * traditional - in debug, we use random string (for speed); in release, we calculate the hash, however we use LLVM/LLD's trick in that we calculate a series of MD5 hashes in parallel and then one an MD5 of MD5 final hash to generate digest.
2024-11-14 16:13:24 +00:00 · 2022-12-14 15:15:20 +01:00 · 2022-12-14 15:15:20 +01:00 · 79457fc76a
commit 79457fc76a
parent ec40c6b28f
7 changed files with 160 additions and 59 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -591,6 +591,7 @@ set(ZIG_STAGE2_SOURCES
    "${CMAKE_SOURCE_DIR}/src/link/MachO/bind.zig"
    "${CMAKE_SOURCE_DIR}/src/link/MachO/dead_strip.zig"
    "${CMAKE_SOURCE_DIR}/src/link/MachO/fat.zig"
+    "${CMAKE_SOURCE_DIR}/src/link/MachO/hasher.zig"
    "${CMAKE_SOURCE_DIR}/src/link/MachO/load_commands.zig"
    "${CMAKE_SOURCE_DIR}/src/link/MachO/thunks.zig"
    "${CMAKE_SOURCE_DIR}/src/link/MachO/uuid.zig"
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@ -99,10 +99,10 @@ page_size: u16,
 /// fashion (default for LLVM backend).
 mode: enum { incremental, one_shot },

-uuid: macho.uuid_command = .{
-    .cmdsize = @sizeOf(macho.uuid_command),
-    .uuid = undefined,
-},
+uuid: struct {
+    buf: [16]u8 = undefined,
+    final: bool = false,
+} = .{},

 dylibs: std.ArrayListUnmanaged(Dylib) = .{},
 dylibs_map: std.StringHashMapUnmanaged(u16) = .{},
@ -588,11 +588,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No

    try load_commands.writeBuildVersionLC(&self.base.options, &ncmds, lc_writer);

-    {
-        std.crypto.random.bytes(&self.uuid.uuid);
-        try lc_writer.writeStruct(self.uuid);
-        ncmds += 1;
+    if (!self.uuid.final) {
+        std.crypto.random.bytes(&self.uuid.buf);
+        self.uuid.final = true;
    }
+    try load_commands.writeUuidLC(&self.uuid.buf, &ncmds, lc_writer);

    try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), &ncmds, lc_writer);

--- a/src/link/MachO/CodeSignature.zig
+++ b/src/link/MachO/CodeSignature.zig
@ -1,6 +1,4 @@
 const CodeSignature = @This();
-const Compilation = @import("../../Compilation.zig");
-const WaitGroup = @import("../../WaitGroup.zig");

 const std = @import("std");
 const assert = std.debug.assert;
@ -9,10 +7,13 @@ const log = std.log.scoped(.link);
 const macho = std.macho;
 const mem = std.mem;
 const testing = std.testing;
+
 const Allocator = mem.Allocator;
+const Compilation = @import("../../Compilation.zig");
+const Hasher = @import("hasher.zig").ParallelHasher;
 const Sha256 = std.crypto.hash.sha2.Sha256;

-const hash_size: u8 = 32;
+const hash_size = Sha256.digest_length;

 const Blob = union(enum) {
    code_directory: *CodeDirectory,
@ -109,7 +110,7 @@ const CodeDirectory = struct {
    fn size(self: CodeDirectory) u32 {
        const code_slots = self.inner.nCodeSlots * hash_size;
        const special_slots = self.inner.nSpecialSlots * hash_size;
-        return @sizeOf(macho.CodeDirectory) + @intCast(u32, self.ident.len + 1) + special_slots + code_slots;
+        return @sizeOf(macho.CodeDirectory) + @intCast(u32, self.ident.len + 1 + special_slots + code_slots);
    }

    fn write(self: CodeDirectory, writer: anytype) !void {
@ -287,33 +288,11 @@ pub fn writeAdhocSignature(
    self.code_directory.inner.nCodeSlots = total_pages;

    // Calculate hash for each page (in file) and write it to the buffer
-    var wg: WaitGroup = .{};
-    {
-        const buffer = try gpa.alloc(u8, self.page_size * total_pages);
-        defer gpa.free(buffer);
-
-        const results = try gpa.alloc(fs.File.PReadError!usize, total_pages);
-        defer gpa.free(results);
-        {
-            wg.reset();
-            defer wg.wait();
-
-            var i: usize = 0;
-            while (i < total_pages) : (i += 1) {
-                const fstart = i * self.page_size;
-                const fsize = if (fstart + self.page_size > opts.file_size)
-                    opts.file_size - fstart
-                else
-                    self.page_size;
-                const out_hash = &self.code_directory.code_slots.items[i];
-                wg.start();
-                try comp.thread_pool.spawn(workerSha256Hash, .{
-                    opts.file, fstart, buffer[fstart..][0..fsize], out_hash, &results[i], &wg,
-                });
-            }
-        }
-        for (results) |result| _ = try result;
-    }
+    var hasher = Hasher(Sha256){};
+    try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{
+        .chunk_size = self.page_size,
+        .max_file_size = opts.file_size,
+    });

    try blobs.append(.{ .code_directory = &self.code_directory });
    header.length += @sizeOf(macho.BlobIndex);
@ -352,7 +331,7 @@ pub fn writeAdhocSignature(
    }

    self.code_directory.inner.hashOffset =
-        @sizeOf(macho.CodeDirectory) + @intCast(u32, self.code_directory.ident.len + 1) + self.code_directory.inner.nSpecialSlots * hash_size;
+        @sizeOf(macho.CodeDirectory) + @intCast(u32, self.code_directory.ident.len + 1 + self.code_directory.inner.nSpecialSlots * hash_size);
    self.code_directory.inner.length = self.code_directory.size();
    header.length += self.code_directory.size();

@ -372,19 +351,6 @@ pub fn writeAdhocSignature(
    }
 }

-fn workerSha256Hash(
-    file: fs.File,
-    fstart: usize,
-    buffer: []u8,
-    hash: *[hash_size]u8,
-    err: *fs.File.PReadError!usize,
-    wg: *WaitGroup,
-) void {
-    defer wg.finish();
-    err.* = file.preadAll(buffer, fstart);
-    Sha256.hash(buffer, hash, .{});
-}
-
 pub fn size(self: CodeSignature) u32 {
    var ssize: u32 = @sizeOf(macho.SuperBlob) + @sizeOf(macho.BlobIndex) + self.code_directory.size();
    if (self.requirements) |req| {
--- a/src/link/MachO/DebugSymbols.zig
+++ b/src/link/MachO/DebugSymbols.zig
@ -5,6 +5,7 @@ const build_options = @import("build_options");
 const assert = std.debug.assert;
 const fs = std.fs;
 const link = @import("../../link.zig");
+const load_commands = @import("load_commands.zig");
 const log = std.log.scoped(.dsym);
 const macho = std.macho;
 const makeStaticString = MachO.makeStaticString;
@ -303,10 +304,7 @@ pub fn flushModule(self: *DebugSymbols, macho_file: *MachO) !void {
    self.finalizeDwarfSegment(macho_file);
    try self.writeLinkeditSegmentData(macho_file, &ncmds, lc_writer);

-    {
-        try lc_writer.writeStruct(macho_file.uuid);
-        ncmds += 1;
-    }
+    try load_commands.writeUuidLC(&macho_file.uuid.buf, &ncmds, lc_writer);

    var headers_buf = std.ArrayList(u8).init(self.allocator);
    defer headers_buf.deinit();
--- a/src/link/MachO/hasher.zig
+++ b/src/link/MachO/hasher.zig
@ -0,0 +1,60 @@
+const std = @import("std");
+const assert = std.debug.assert;
+const fs = std.fs;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const ThreadPool = @import("../../ThreadPool.zig");
+const WaitGroup = @import("../../WaitGroup.zig");
+
+pub fn ParallelHasher(comptime Hasher: type) type {
+    const hash_size = Hasher.digest_length;
+
+    return struct {
+        pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct {
+            chunk_size: u16 = 0x4000,
+            max_file_size: ?u64 = null,
+        }) !void {
+            _ = self;
+
+            var wg: WaitGroup = .{};
+
+            const file_size = opts.max_file_size orelse try file.getEndPos();
+            const total_num_chunks = mem.alignForward(file_size, opts.chunk_size) / opts.chunk_size;
+            assert(out.len >= total_num_chunks);
+
+            const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks);
+            defer gpa.free(buffer);
+
+            const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks);
+            defer gpa.free(results);
+
+            {
+                wg.reset();
+                defer wg.wait();
+
+                var i: usize = 0;
+                while (i < total_num_chunks) : (i += 1) {
+                    const fstart = i * opts.chunk_size;
+                    const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
+                    wg.start();
+                    try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg });
+                }
+            }
+            for (results) |result| _ = try result;
+        }
+
+        fn worker(
+            file: fs.File,
+            fstart: usize,
+            buffer: []u8,
+            out: *[hash_size]u8,
+            err: *fs.File.PReadError!usize,
+            wg: *WaitGroup,
+        ) void {
+            defer wg.finish();
+            err.* = file.preadAll(buffer, fstart);
+            Hasher.hash(buffer, out, .{});
+        }
+    };
+}
--- a/src/link/MachO/uuid.zig
+++ b/src/link/MachO/uuid.zig
@ -0,0 +1,69 @@
+const std = @import("std");
+const fs = std.fs;
+const mem = std.mem;
+
+const Allocator = mem.Allocator;
+const Compilation = @import("../../Compilation.zig");
+const Md5 = std.crypto.hash.Md5;
+const Hasher = @import("hasher.zig").ParallelHasher;
+
+/// Somewhat random chunk size for MD5 hash calculation.
+pub const chunk_size = 0x4000;
+
+/// Calculates Md5 hash of the file contents.
+/// Hash is calculated in a streaming manner which may be slow.
+pub fn calcUuidStreaming(file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
+    const total_num_chunks = mem.alignForward(file_size, chunk_size) / chunk_size;
+
+    var hasher = Md5.init(.{});
+    var buffer: [chunk_size]u8 = undefined;
+
+    var i: usize = 0;
+    while (i < total_num_chunks) : (i += 1) {
+        const start = i * chunk_size;
+        const size = if (start + chunk_size > file_size)
+            file_size - start
+        else
+            chunk_size;
+        const amt = try file.preadAll(&buffer, start);
+        if (amt != size) return error.InputOutput;
+
+        hasher.update(buffer[0..size]);
+    }
+
+    hasher.final(out);
+    conform(out);
+}
+
+/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
+/// the final digest.
+/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
+/// and we will use it too as it seems accepted by Apple OSes.
+pub fn calcUuidParallel(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
+    const total_hashes = mem.alignForward(file_size, chunk_size) / chunk_size;
+
+    const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes);
+    defer comp.gpa.free(hashes);
+
+    var hasher = Hasher(Md5){};
+    try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{
+        .chunk_size = chunk_size,
+        .max_file_size = file_size,
+    });
+
+    const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length);
+    defer comp.gpa.free(final_buffer);
+
+    for (hashes) |hash, i| {
+        mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
+    }
+
+    Md5.hash(final_buffer, out, .{});
+    conform(out);
+}
+
+inline fn conform(out: *[Md5.digest_length]u8) void {
+    // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
+    out[6] = (out[6] & 0x0F) | (3 << 4);
+    out[8] = (out[8] & 0x3F) | 0x80;
+}
--- a/src/link/MachO/zld.zig
+++ b/src/link/MachO/zld.zig
@ -4037,8 +4037,15 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
        const uuid_offset_backpatch: ?usize = blk: {
            const index = lc_buffer.items.len;
            var uuid_buf: [16]u8 = [_]u8{0} ** 16;
+
+            if (zld.options.optimize_mode == .Debug) {
+                // In Debug we don't really care about reproducibility, so put in a random value
+                // and be done with it.
+                std.crypto.random.bytes(&uuid_buf);
+            }
+
            try load_commands.writeUuidLC(&uuid_buf, &ncmds, lc_writer);
-            break :blk index;
+            break :blk if (zld.options.optimize_mode == .Debug) null else index;
        };

        try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), &ncmds, lc_writer);
@ -4076,7 +4083,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr
            const seg = zld.getLinkeditSegmentPtr();
            const file_size = seg.fileoff + seg.filesize;
            var uuid_buf: [16]u8 = undefined;
-            try uuid.calcMd5Hash(zld.gpa, zld.file, file_size, &uuid_buf);
+            try uuid.calcUuidParallel(comp, zld.file, file_size, &uuid_buf);
            const offset = @sizeOf(macho.mach_header_64) + headers_buf.items.len + backpatch + @sizeOf(macho.load_command);
            try zld.file.pwriteAll(&uuid_buf, offset);
        }