zig/lib/std/tar.zig

/// Tar archive is single ordinary file which can contain many files (or
/// directories, symlinks, ...). It's build by series of blocks each size of 512
/// bytes. First block of each entry is header which defines type, name, size
/// permissions and other attributes. Header is followed by series of blocks of
/// file content, if any that entry has content. Content is padded to the block
/// size, so next header always starts at block boundary.
///
/// This simple format is extended by GNU and POSIX pax extensions to support
/// file names longer than 256 bytes and additional attributes.
///
/// This is not comprehensive tar parser. Here we are only file types needed to
/// support Zig package manager; normal file, directory, symbolic link. And
/// subset of attributes: name, size, permissions.
///
/// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
/// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
///
const std = @import("std.zig");
const assert = std.debug.assert;

pub const Options = struct {
    /// Number of directory levels to skip when extracting files.
    strip_components: u32 = 0,
    /// How to handle the "mode" property of files from within the tar file.
    mode_mode: ModeMode = .executable_bit_only,
    /// Prevents creation of empty directories.
    exclude_empty_directories: bool = false,
    /// Provide this to receive detailed error messages.
    /// When this is provided, some errors which would otherwise be returned immediately
    /// will instead be added to this structure. The API user must check the errors
    /// in diagnostics to know whether the operation succeeded or failed.
    diagnostics: ?*Diagnostics = null,

    pub const ModeMode = enum {
        /// The mode from the tar file is completely ignored. Files are created
        /// with the default mode when creating files.
        ignore,
        /// The mode from the tar file is inspected for the owner executable bit
        /// only. This bit is copied to the group and other executable bits.
        /// Other bits of the mode are left as the default when creating files.
        executable_bit_only,
    };

    pub const Diagnostics = struct {
        allocator: std.mem.Allocator,
        errors: std.ArrayListUnmanaged(Error) = .{},

        pub const Error = union(enum) {
            unable_to_create_sym_link: struct {
                code: anyerror,
                file_name: []const u8,
                link_name: []const u8,
            },
            unable_to_create_file: struct {
                code: anyerror,
                file_name: []const u8,
            },
            unsupported_file_type: struct {
                file_name: []const u8,
                file_type: Header.Kind,
            },
        };

        pub fn deinit(d: *Diagnostics) void {
            for (d.errors.items) |item| {
                switch (item) {
                    .unable_to_create_sym_link => |info| {
                        d.allocator.free(info.file_name);
                        d.allocator.free(info.link_name);
                    },
                    .unable_to_create_file => |info| {
                        d.allocator.free(info.file_name);
                    },
                    .unsupported_file_type => |info| {
                        d.allocator.free(info.file_name);
                    },
                }
            }
            d.errors.deinit(d.allocator);
            d.* = undefined;
        }
    };
};

pub const Header = struct {
    const SIZE = 512;
    const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
    const LINK_NAME_SIZE = 100;

    bytes: *const [SIZE]u8,

    pub const Kind = enum(u8) {
        normal_alias = 0,
        normal = '0',
        hard_link = '1',
        symbolic_link = '2',
        character_special = '3',
        block_special = '4',
        directory = '5',
        fifo = '6',
        contiguous = '7',
        global_extended_header = 'g',
        extended_header = 'x',
        // Types 'L' and 'K' are used by the GNU format for a meta file
        // used to store the path or link name for the next file.
        gnu_long_name = 'L',
        gnu_long_link = 'K',
        _,
    };

    /// Includes prefix concatenated, if any.
    /// TODO: check against "../" and other nefarious things
    pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 {
        const n = name(header);
        const p = prefix(header);
        if (!is_ustar(header) or p.len == 0) {
            @memcpy(buffer[0..n.len], n);
            return buffer[0..n.len];
        }
        @memcpy(buffer[0..p.len], p);
        buffer[p.len] = '/';
        @memcpy(buffer[p.len + 1 ..][0..n.len], n);
        return buffer[0 .. p.len + 1 + n.len];
    }

    pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 {
        const link_name = header.str(157, 100);
        if (link_name.len == 0) {
            return buffer[0..0];
        }
        const buf = buffer[0..link_name.len];
        @memcpy(buf, link_name);
        return buf;
    }

    pub fn name(header: Header) []const u8 {
        return header.str(0, 100);
    }

    pub fn mode(header: Header) !u32 {
        return @intCast(try header.numeric(100, 8));
    }

    pub fn size(header: Header) !u64 {
        return header.numeric(124, 12);
    }

    pub fn chksum(header: Header) !u64 {
        return header.octal(148, 8);
    }

    pub fn is_ustar(header: Header) bool {
        const magic = header.bytes[257..][0..6];
        return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
    }

    pub fn prefix(header: Header) []const u8 {
        return header.str(345, 155);
    }

    pub fn kind(header: Header) Kind {
        const result: Kind = @enumFromInt(header.bytes[156]);
        if (result == .normal_alias) return .normal;
        return result;
    }

    fn str(header: Header, start: usize, len: usize) []const u8 {
        return nullStr(header.bytes[start .. start + len]);
    }

    fn numeric(header: Header, start: usize, len: usize) !u64 {
        const raw = header.bytes[start..][0..len];
        //  If the leading byte is 0xff (255), all the bytes of the field
        //  (including the leading byte) are concatenated in big-endian order,
        //  with the result being a negative number expressed in two’s
        //  complement form.
        if (raw[0] == 0xff) return error.TarNumericValueNegative;
        // If the leading byte is 0x80 (128), the non-leading bytes of the
        // field are concatenated in big-endian order.
        if (raw[0] == 0x80) {
            if (raw[1] + raw[2] + raw[3] != 0) return error.TarNumericValueTooBig;
            return std.mem.readInt(u64, raw[4..12], .big);
        }
        return try header.octal(start, len);
    }

    fn octal(header: Header, start: usize, len: usize) !u64 {
        const raw = header.bytes[start..][0..len];
        // Zero-filled octal number in ASCII. Each numeric field of width w
        // contains w minus 1 digits, and a null
        const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
        const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
        if (rtrimmed.len == 0) return 0;
        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
    }

    // Sum of all bytes in the header block. The chksum field is treated as if
    // it were filled with spaces (ASCII 32).
    fn computeChksum(header: Header) u64 {
        var sum: u64 = 0;
        for (header.bytes, 0..) |b, i| {
            if (148 <= i and i < 156) continue; // skip chksum field bytes
            sum += b;
        }
        // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces.
        return if (sum > 0) sum + 256 else 0;
    }

    // Checks calculated chksum with value of chksum field.
    // Returns error or valid chksum value.
    // Zero value indicates empty block.
    pub fn checkChksum(header: Header) !u64 {
        const field = try header.chksum();
        const computed = header.computeChksum();
        if (field != computed) return error.TarHeaderChksum;
        return field;
    }
};

// Breaks string on first null character.
fn nullStr(str: []const u8) []const u8 {
    for (str, 0..) |c, i| {
        if (c == 0) return str[0..i];
    }
    return str;
}

pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
    return .{
        .reader = reader,
        .diagnostics = diagnostics,
    };
}

fn TarReader(comptime ReaderType: type) type {
    return struct {
        reader: ReaderType,
        diagnostics: ?*Options.Diagnostics,

        // buffers for heeader and file attributes
        header_buffer: [Header.SIZE]u8 = undefined,
        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,

        // bytes of padding to the end of the block
        padding: usize = 0,
        // current tar file
        file: File = undefined,

        pub const File = struct {
            name: []const u8, // name of file, symlink or directory
            link_name: []const u8, // target name of symlink
            size: u64, // size of the file in bytes
            mode: u32,
            kind: Header.Kind,

            reader: ReaderType,

            // Writes file content to writer.
            pub fn write(self: File, writer: anytype) !void {
                var buffer: [4096]u8 = undefined;

                var n: u64 = 0;
                while (n < self.size) {
                    const buf = buffer[0..@min(buffer.len, self.size - n)];
                    try self.reader.readNoEof(buf);
                    try writer.writeAll(buf);
                    n += buf.len;
                }
            }

            // Skips file content. Advances reader.
            pub fn skip(self: File) !void {
                try self.reader.skipBytes(self.size, .{});
            }
        };

        const Self = @This();

        fn readHeader(self: *Self) !?Header {
            if (self.padding > 0) {
                try self.reader.skipBytes(self.padding, .{});
            }
            const n = try self.reader.readAll(&self.header_buffer);
            if (n == 0) return null;
            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
            if (try header.checkChksum() == 0) return null;
            return header;
        }

        inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
            assert(buffer.len >= size);
            const buf = buffer[0..size];
            try self.reader.readNoEof(buf);
            return nullStr(buf);
        }

        inline fn initFile(self: *Self) void {
            self.file = File{
                .name = self.file_name_buffer[0..0],
                .link_name = self.link_name_buffer[0..0],
                .size = 0,
                .kind = .normal,
                .mode = 0,
                .reader = self.reader,
            };
        }

        // Number of padding bytes in the last file block.
        inline fn blockPadding(size: u64) usize {
            const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
            return @intCast(block_rounded - size);
        }

        /// Iterates through the tar archive as if it is a series of files.
        /// Internally, the tar format often uses entries (header with optional
        /// content) to add meta data that describes the next file. These
        /// entries should not normally be visible to the outside. As such, this
        /// loop iterates through one or more entries until it collects a all
        /// file attributes.
        pub fn next(self: *Self) !?File {
            self.initFile();

            while (try self.readHeader()) |header| {
                const kind = header.kind();
                const size: u64 = try header.size();
                self.padding = blockPadding(size);

                switch (kind) {
                    // File types to retrun upstream
                    .directory, .normal, .symbolic_link => {
                        self.file.kind = kind;
                        self.file.mode = try header.mode();

                        // set file attributes if not already set by prefix/extended headers
                        if (self.file.size == 0) {
                            self.file.size = size;
                        }
                        if (self.file.link_name.len == 0) {
                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
                        }
                        if (self.file.name.len == 0) {
                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
                        }

                        self.padding = blockPadding(self.file.size);
                        return self.file;
                    },
                    // Prefix header types
                    .gnu_long_name => {
                        self.file.name = try self.readString(@intCast(size), &self.file_name_buffer);
                    },
                    .gnu_long_link => {
                        self.file.link_name = try self.readString(@intCast(size), &self.link_name_buffer);
                    },
                    .extended_header => {
                        // Use just attributes from last extended header.
                        self.initFile();

                        var rdr = paxReader(self.reader, @intCast(size));
                        while (try rdr.next()) |attr| {
                            switch (attr.kind) {
                                .path => {
                                    self.file.name = try attr.value(&self.file_name_buffer);
                                },
                                .linkpath => {
                                    self.file.link_name = try attr.value(&self.link_name_buffer);
                                },
                                .size => {
                                    var buf: [64]u8 = undefined;
                                    self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
                                },
                            }
                        }
                    },
                    // Ignored header type
                    .global_extended_header => {
                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
                    },
                    // All other are unsupported header types
                    else => {
                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
                            .file_name = try d.allocator.dupe(u8, header.name()),
                            .file_type = kind,
                        } });
                    },
                }
            }
            return null;
        }
    };
}

// Pax attributes reader.
// Size is length of pax extended header in reader.
fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
    return PaxReader(@TypeOf(reader)){
        .reader = reader,
        .size = size,
    };
}

const PaxAttributeKind = enum {
    path,
    linkpath,
    size,
};

fn PaxReader(comptime ReaderType: type) type {
    return struct {
        size: usize, // cumulative size of all pax attributes
        reader: ReaderType,
        // scratch buffer used for reading attribute length and keyword
        scratch: [128]u8 = undefined,

        const Self = @This();

        const Attribute = struct {
            kind: PaxAttributeKind,
            len: usize, // length of the attribute value
            reader: ReaderType, // reader positioned at value start

            // Copies pax attribute value into destination buffer.
            // Must be called with destination buffer of size at least Attribute.len.
            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
                assert(self.len <= dst.len);
                const buf = dst[0..self.len];
                const n = try self.reader.readAll(buf);
                if (n < self.len) return error.UnexpectedEndOfStream;
                try validateAttributeEnding(self.reader);
                if (hasNull(buf)) return error.PaxNullInValue;
                return buf;
            }
        };

        // Iterates over pax attributes. Returns known only known attributes.
        // Caller has to call value in Attribute, to advance reader across value.
        pub fn next(self: *Self) !?Attribute {
            // Pax extended header consists of one or more attributes, each constructed as follows:
            // "%d %s=%s\n", <length>, <keyword>, <value>
            while (self.size > 0) {
                const length_buf = try self.readUntil(' ');
                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes

                const keyword = try self.readUntil('=');
                if (hasNull(keyword)) return error.PaxNullInKeyword;

                // calculate value_len
                const value_start = length_buf.len + keyword.len + 2; // 2 separators
                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
                const value_len = length - value_start - 1; // \n separator at end
                self.size -= length;

                const kind: PaxAttributeKind = if (eql(keyword, "path"))
                    .path
                else if (eql(keyword, "linkpath"))
                    .linkpath
                else if (eql(keyword, "size"))
                    .size
                else {
                    try self.reader.skipBytes(value_len, .{});
                    try validateAttributeEnding(self.reader);
                    continue;
                };
                return Attribute{
                    .kind = kind,
                    .len = value_len,
                    .reader = self.reader,
                };
            }

            return null;
        }

        inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
            var fbs = std.io.fixedBufferStream(&self.scratch);
            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
            return fbs.getWritten();
        }

        inline fn eql(a: []const u8, b: []const u8) bool {
            return std.mem.eql(u8, a, b);
        }

        inline fn hasNull(str: []const u8) bool {
            return (std.mem.indexOfScalar(u8, str, 0)) != null;
        }

        // Checks that each record ends with new line.
        inline fn validateAttributeEnding(reader: ReaderType) !void {
            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
        }
    };
}

pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
    switch (options.mode_mode) {
        .ignore => {},
        .executable_bit_only => {
            // This code does not look at the mode bits yet. To implement this feature,
            // the implementation must be adjusted to look at the mode, and check the
            // user executable bit, then call fchmod on newly created files when
            // the executable bit is supposed to be set.
            // It also needs to properly deal with ACLs on Windows.
            @panic("TODO: unimplemented: tar ModeMode.executable_bit_only");
        },
    }

    var iter = tarReader(reader, options.diagnostics);

    while (try iter.next()) |file| {
        switch (file.kind) {
            .directory => {
                const file_name = try stripComponents(file.name, options.strip_components);
                if (file_name.len != 0 and !options.exclude_empty_directories) {
                    try dir.makePath(file_name);
                }
            },
            .normal => {
                if (file.size == 0 and file.name.len == 0) return;
                const file_name = try stripComponents(file.name, options.strip_components);

                const fs_file = dir.createFile(file_name, .{}) catch |err| switch (err) {
                    error.FileNotFound => again: {
                        const code = code: {
                            if (std.fs.path.dirname(file_name)) |dir_name| {
                                dir.makePath(dir_name) catch |code| break :code code;
                                break :again dir.createFile(file_name, .{}) catch |code| {
                                    break :code code;
                                };
                            }
                            break :code err;
                        };
                        const d = options.diagnostics orelse return error.UnableToCreateFile;
                        try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
                            .code = code,
                            .file_name = try d.allocator.dupe(u8, file_name),
                        } });
                        break :again null;
                    },
                    else => |e| return e,
                };
                defer if (fs_file) |f| f.close();

                if (fs_file) |f| {
                    try file.write(f);
                } else {
                    try file.skip();
                }
            },
            .symbolic_link => {
                // The file system path of the symbolic link.
                const file_name = try stripComponents(file.name, options.strip_components);
                // The data inside the symbolic link.
                const link_name = file.link_name;

                dir.symLink(link_name, file_name, .{}) catch |err| again: {
                    const code = code: {
                        if (err == error.FileNotFound) {
                            if (std.fs.path.dirname(file_name)) |dir_name| {
                                dir.makePath(dir_name) catch |code| break :code code;
                                break :again dir.symLink(link_name, file_name, .{}) catch |code| {
                                    break :code code;
                                };
                            }
                        }
                        break :code err;
                    };
                    const d = options.diagnostics orelse return error.UnableToCreateSymLink;
                    try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
                        .code = code,
                        .file_name = try d.allocator.dupe(u8, file_name),
                        .link_name = try d.allocator.dupe(u8, link_name),
                    } });
                };
            },
            else => unreachable,
        }
    }
}

fn stripComponents(path: []const u8, count: u32) ![]const u8 {
    var i: usize = 0;
    var c = count;
    while (c > 0) : (c -= 1) {
        if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
            i = pos + 1;
        } else {
            return error.TarComponentsOutsideStrippedPrefix;
        }
    }
    return path[i..];
}

test "tar stripComponents" {
    const expectEqualStrings = std.testing.expectEqualStrings;
    try expectEqualStrings("a/b/c", try stripComponents("a/b/c", 0));
    try expectEqualStrings("b/c", try stripComponents("a/b/c", 1));
    try expectEqualStrings("c", try stripComponents("a/b/c", 2));
}

test "tar PaxReader" {
    const Attr = struct {
        kind: PaxAttributeKind,
        value: []const u8 = undefined,
        err: ?anyerror = null,
    };
    const cases = [_]struct {
        data: []const u8,
        attrs: []const Attr,
        err: ?anyerror = null,
    }{
        .{ // valid but unknown keys
            .data =
            \\30 mtime=1350244992.023960108
            \\6 k=1
            \\13 key1=val1
            \\10 a=name
            \\9 a=name
            \\
            ,
            .attrs = &[_]Attr{},
        },
        .{ // mix of known and unknown keys
            .data =
            \\6 k=1
            \\13 path=name
            \\17 linkpath=link
            \\13 key1=val1
            \\12 size=123
            \\13 key2=val2
            \\
            ,
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
                .{ .kind = .linkpath, .value = "link" },
                .{ .kind = .size, .value = "123" },
            },
        },
        .{ // too short size of the second key-value pair
            .data =
            \\13 path=name
            \\10 linkpath=value
            \\
            ,
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
            },
            .err = error.UnexpectedEndOfStream,
        },
        .{ // too long size of the second key-value pair
            .data =
            \\13 path=name
            \\6 k=1
            \\19 linkpath=value
            \\
            ,
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
            },
            .err = error.UnexpectedEndOfStream,
        },

        .{ // too long size of the second key-value pair
            .data =
            \\13 path=name
            \\19 linkpath=value
            \\6 k=1
            \\
            ,
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
                .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
            },
        },
        .{ // null in keyword is not valid
            .data = "13 path=name\n" ++ "7 k\x00b=1\n",
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
            },
            .err = error.PaxNullInKeyword,
        },
        .{ // null in value is not valid
            .data = "23 path=name\x00with null\n",
            .attrs = &[_]Attr{
                .{ .kind = .path, .err = error.PaxNullInValue },
            },
        },
        .{ // 1000 characters path
            .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "0123456789" ** 100 },
            },
        },
    };
    var buffer: [1024]u8 = undefined;

    outer: for (cases) |case| {
        var stream = std.io.fixedBufferStream(case.data);
        var rdr = paxReader(stream.reader(), case.data.len);

        var i: usize = 0;
        while (rdr.next() catch |err| {
            if (case.err) |e| {
                try std.testing.expectEqual(e, err);
                continue;
            }
            return err;
        }) |attr| : (i += 1) {
            const exp = case.attrs[i];
            try std.testing.expectEqual(exp.kind, attr.kind);
            const value = attr.value(&buffer) catch |err| {
                if (exp.err) |e| {
                    try std.testing.expectEqual(e, err);
                    break :outer;
                }
                return err;
            };
            try std.testing.expectEqualStrings(exp.value, value);
        }
        try std.testing.expectEqual(case.attrs.len, i);
        try std.testing.expect(case.err == null);
    }
}

test {
    _ = @import("tar/test.zig");
}
-												tar: add module comment and references

											
										
										
											2023-12-06 14:35:29 +00:00
+								/// Tar archive is single ordinary file which can contain many files (or
 								/// directories, symlinks, ...). It's build by series of blocks each size of 512
 								/// bytes. First block of each entry is header which defines type, name, size
 								/// permissions and other attributes. Header is followed by series of blocks of
 								/// file content, if any that entry has content. Content is padded to the block
 								/// size, so next header always starts at block boundary.
 								///
 								/// This simple format is extended by GNU and POSIX pax extensions to support
 								/// file names longer than 256 bytes and additional attributes.
 								///
 								/// This is not comprehensive tar parser. Here we are only file types needed to
 								/// support Zig package manager; normal file, directory, symbolic link. And
 								/// subset of attributes: name, size, permissions.
 								///
 								/// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
 								/// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								///
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								const std = @import("std.zig");
-												tar: fix import path

											
										
										
											2023-11-29 14:31:22 +00:00
+								const assert = std.debug.assert;
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								pub const Options = struct {
 								    /// Number of directory levels to skip when extracting files.
 								    strip_components: u32 = 0,
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								    /// How to handle the "mode" property of files from within the tar file.
 								    mode_mode: ModeMode = .executable_bit_only,
-												std.tar: add option for omitting empty directories

											
										
										
											2023-10-04 06:25:04 +00:00
+								    /// Prevents creation of empty directories.
 								    exclude_empty_directories: bool = false,
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    /// Provide this to receive detailed error messages.
 								    /// When this is provided, some errors which would otherwise be returned immediately
 								    /// will instead be added to this structure. The API user must check the errors
 								    /// in diagnostics to know whether the operation succeeded or failed.
 								    diagnostics: ?*Diagnostics = null,
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    pub const ModeMode = enum {
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								        /// The mode from the tar file is completely ignored. Files are created
 								        /// with the default mode when creating files.
 								        ignore,
 								        /// The mode from the tar file is inspected for the owner executable bit
 								        /// only. This bit is copied to the group and other executable bits.
 								        /// Other bits of the mode are left as the default when creating files.
 								        executable_bit_only,
 								    };
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
 								    pub const Diagnostics = struct {
 								        allocator: std.mem.Allocator,
 								        errors: std.ArrayListUnmanaged(Error) = .{},
 								        pub const Error = union(enum) {
 								            unable_to_create_sym_link: struct {
 								                code: anyerror,
 								                file_name: []const u8,
 								                link_name: []const u8,
 								            },
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								            unable_to_create_file: struct {
 								                code: anyerror,
 								                file_name: []const u8,
 								            },
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								            unsupported_file_type: struct {
 								                file_name: []const u8,
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								                file_type: Header.Kind,
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								            },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								        };
 								        pub fn deinit(d: *Diagnostics) void {
 								            for (d.errors.items) |item| {
 								                switch (item) {
 								                    .unable_to_create_sym_link => |info| {
 								                        d.allocator.free(info.file_name);
 								                        d.allocator.free(info.link_name);
 								                    },
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                    .unable_to_create_file => |info| {
 								                        d.allocator.free(info.file_name);
 								                    },
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                    .unsupported_file_type => |info| {
 								                        d.allocator.free(info.file_name);
 								                    },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								                }
 								            }
 								            d.errors.deinit(d.allocator);
 								            d.* = undefined;
 								        }
 								    };
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								};
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
 								pub const Header = struct {
-												tar: remove stratch from  tar reader

Use explicit buffers for name, link_name instead.
It is cleaner that way.

											
										
										
											2023-12-11 19:18:59 +00:00
+								    const SIZE = 512;
 								    const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
 								    const LINK_NAME_SIZE = 100;
 								    bytes: *const [SIZE]u8,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								    pub const Kind = enum(u8) {
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								        normal_alias = 0,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        normal = '0',
 								        hard_link = '1',
 								        symbolic_link = '2',
 								        character_special = '3',
 								        block_special = '4',
 								        directory = '5',
 								        fifo = '6',
 								        contiguous = '7',
 								        global_extended_header = 'g',
 								        extended_header = 'x',
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        // Types 'L' and 'K' are used by the GNU format for a meta file
 								        // used to store the path or link name for the next file.
 								        gnu_long_name = 'L',
 								        gnu_long_link = 'K',
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        _,
 								    };
 								    /// Includes prefix concatenated, if any.
 								    /// TODO: check against "../" and other nefarious things
-												tar: remove stratch from  tar reader

Use explicit buffers for name, link_name instead.
It is cleaner that way.

											
										
										
											2023-12-11 19:18:59 +00:00
+								    pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 {
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        const n = name(header);
 								        const p = prefix(header);
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        if (!is_ustar(header) or p.len == 0) {
 								            @memcpy(buffer[0..n.len], n);
 								            return buffer[0..n.len];
 								        }
-												std: update to use `@memcpy` directly

											
										
										
											2023-04-27 22:16:01 +00:00
+								        @memcpy(buffer[0..p.len], p);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        buffer[p.len] = '/';
-												std: update to use `@memcpy` directly

											
										
										
											2023-04-27 22:16:01 +00:00
+								        @memcpy(buffer[p.len + 1 ..][0..n.len], n);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        return buffer[0 .. p.len + 1 + n.len];
 								    }
-												tar: remove stratch from  tar reader

Use explicit buffers for name, link_name instead.
It is cleaner that way.

											
										
										
											2023-12-11 19:18:59 +00:00
+								    pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 {
 								        const link_name = header.str(157, 100);
 								        if (link_name.len == 0) {
 								            return buffer[0..0];
 								        }
 								        const buf = buffer[0..link_name.len];
 								        @memcpy(buf, link_name);
 								        return buf;
 								    }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    pub fn name(header: Header) []const u8 {
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return header.str(0, 100);
 								    }
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								    pub fn mode(header: Header) !u32 {
 								        return @intCast(try header.numeric(100, 8));
 								    }
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								    pub fn size(header: Header) !u64 {
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return header.numeric(124, 12);
 								    }
 								    pub fn chksum(header: Header) !u64 {
 								        return header.octal(148, 8);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    pub fn is_ustar(header: Header) bool {
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        const magic = header.bytes[257..][0..6];
 								        return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    pub fn prefix(header: Header) []const u8 {
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return header.str(345, 155);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								    pub fn kind(header: Header) Kind {
 								        const result: Kind = @enumFromInt(header.bytes[156]);
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								        if (result == .normal_alias) return .normal;
 								        return result;
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    fn str(header: Header, start: usize, len: usize) []const u8 {
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        return nullStr(header.bytes[start .. start + len]);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    fn numeric(header: Header, start: usize, len: usize) !u64 {
 								        const raw = header.bytes[start..][0..len];
 								        //  If the leading byte is 0xff (255), all the bytes of the field
 								        //  (including the leading byte) are concatenated in big-endian order,
 								        //  with the result being a negative number expressed in two’s
 								        //  complement form.
 								        if (raw[0] == 0xff) return error.TarNumericValueNegative;
 								        // If the leading byte is 0x80 (128), the non-leading bytes of the
 								        // field are concatenated in big-endian order.
 								        if (raw[0] == 0x80) {
 								            if (raw[1] + raw[2] + raw[3] != 0) return error.TarNumericValueTooBig;
 								            return std.mem.readInt(u64, raw[4..12], .big);
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								        }
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return try header.octal(start, len);
 								    }
 								    fn octal(header: Header, start: usize, len: usize) !u64 {
 								        const raw = header.bytes[start..][0..len];
 								        // Zero-filled octal number in ASCII. Each numeric field of width w
 								        // contains w minus 1 digits, and a null
 								        const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
 								        const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
 								        if (rtrimmed.len == 0) return 0;
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    }
 								    // Sum of all bytes in the header block. The chksum field is treated as if
 								    // it were filled with spaces (ASCII 32).
 								    fn computeChksum(header: Header) u64 {
 								        var sum: u64 = 0;
 								        for (header.bytes, 0..) |b, i| {
 								            if (148 <= i and i < 156) continue; // skip chksum field bytes
 								            sum += b;
 								        }
 								        // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces.
 								        return if (sum > 0) sum + 256 else 0;
 								    }
 								    // Checks calculated chksum with value of chksum field.
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								    // Returns error or valid chksum value.
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    // Zero value indicates empty block.
 								    pub fn checkChksum(header: Header) !u64 {
 								        const field = try header.chksum();
 								        const computed = header.computeChksum();
 								        if (field != computed) return error.TarHeaderChksum;
 								        return field;
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								};
-												tar: add module comment and references

											
										
										
											2023-12-06 14:35:29 +00:00
+								// Breaks string on first null character.
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								fn nullStr(str: []const u8) []const u8 {
 								    for (str, 0..) |c, i| {
 								        if (c == 0) return str[0..i];
 								    }
 								    return str;
 								}
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								pub fn tarReader(reader: anytype, diagnostics: ?*Options.Diagnostics) TarReader(@TypeOf(reader)) {
 								    return .{
 								        .reader = reader,
 								        .diagnostics = diagnostics,
 								    };
 								}
 								fn TarReader(comptime ReaderType: type) type {
 								    return struct {
 								        reader: ReaderType,
 								        diagnostics: ?*Options.Diagnostics,
 								        // buffers for heeader and file attributes
 								        header_buffer: [Header.SIZE]u8 = undefined,
 								        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
 								        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
 								        // bytes of padding to the end of the block
 								        padding: usize = 0,
 								        // current tar file
 								        file: File = undefined,
 								        pub const File = struct {
 								            name: []const u8, // name of file, symlink or directory
 								            link_name: []const u8, // target name of symlink
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								            size: u64, // size of the file in bytes
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								            mode: u32,
 								            kind: Header.Kind,
 								            reader: ReaderType,
 								            // Writes file content to writer.
 								            pub fn write(self: File, writer: anytype) !void {
 								                var buffer: [4096]u8 = undefined;
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                var n: u64 = 0;
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                while (n < self.size) {
 								                    const buf = buffer[0..@min(buffer.len, self.size - n)];
 								                    try self.reader.readNoEof(buf);
 								                    try writer.writeAll(buf);
 								                    n += buf.len;
 								                }
 								            }
 								            // Skips file content. Advances reader.
 								            pub fn skip(self: File) !void {
 								                try self.reader.skipBytes(self.size, .{});
 								            }
 								        };
 								        const Self = @This();
 								        fn readHeader(self: *Self) !?Header {
 								            if (self.padding > 0) {
 								                try self.reader.skipBytes(self.padding, .{});
 								            }
 								            const n = try self.reader.readAll(&self.header_buffer);
 								            if (n == 0) return null;
 								            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
 								            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
 								            if (try header.checkChksum() == 0) return null;
 								            return header;
 								        }
 								        inline fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
 								            assert(buffer.len >= size);
 								            const buf = buffer[0..size];
 								            try self.reader.readNoEof(buf);
 								            return nullStr(buf);
 								        }
 								        inline fn initFile(self: *Self) void {
 								            self.file = File{
 								                .name = self.file_name_buffer[0..0],
 								                .link_name = self.link_name_buffer[0..0],
 								                .size = 0,
 								                .kind = .normal,
 								                .mode = 0,
 								                .reader = self.reader,
 								            };
 								        }
 								        // Number of padding bytes in the last file block.
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								        inline fn blockPadding(size: u64) usize {
 								            const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
 								            return @intCast(block_rounded - size);
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								        }
 								        /// Iterates through the tar archive as if it is a series of files.
 								        /// Internally, the tar format often uses entries (header with optional
 								        /// content) to add meta data that describes the next file. These
 								        /// entries should not normally be visible to the outside. As such, this
 								        /// loop iterates through one or more entries until it collects a all
 								        /// file attributes.
 								        pub fn next(self: *Self) !?File {
 								            self.initFile();
 								            while (try self.readHeader()) |header| {
 								                const kind = header.kind();
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                const size: u64 = try header.size();
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                self.padding = blockPadding(size);
 								                switch (kind) {
 								                    // File types to retrun upstream
 								                    .directory, .normal, .symbolic_link => {
 								                        self.file.kind = kind;
 								                        self.file.mode = try header.mode();
 								                        // set file attributes if not already set by prefix/extended headers
 								                        if (self.file.size == 0) {
 								                            self.file.size = size;
 								                        }
 								                        if (self.file.link_name.len == 0) {
 								                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
 								                        }
 								                        if (self.file.name.len == 0) {
 								                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
 								                        }
 								                        self.padding = blockPadding(self.file.size);
 								                        return self.file;
 								                    },
 								                    // Prefix header types
 								                    .gnu_long_name => {
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                        self.file.name = try self.readString(@intCast(size), &self.file_name_buffer);
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                    },
 								                    .gnu_long_link => {
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                        self.file.link_name = try self.readString(@intCast(size), &self.link_name_buffer);
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                    },
 								                    .extended_header => {
 								                        // Use just attributes from last extended header.
 								                        self.initFile();
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                        var rdr = paxReader(self.reader, @intCast(size));
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                        while (try rdr.next()) |attr| {
 								                            switch (attr.kind) {
 								                                .path => {
 								                                    self.file.name = try attr.value(&self.file_name_buffer);
 								                                },
 								                                .linkpath => {
 								                                    self.file.link_name = try attr.value(&self.link_name_buffer);
 								                                },
 								                                .size => {
 								                                    var buf: [64]u8 = undefined;
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                                    self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                                },
 								                            }
 								                        }
 								                    },
 								                    // Ignored header type
 								                    .global_extended_header => {
 								                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
 								                    },
 								                    // All other are unsupported header types
 								                    else => {
 								                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
 								                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
 								                            .file_name = try d.allocator.dupe(u8, header.name()),
 								                            .file_type = kind,
 								                        } });
 								                    },
 								                }
 								            }
 								            return null;
 								        }
 								    };
 								}
 								// Pax attributes reader.
 								// Size is length of pax extended header in reader.
 								fn paxReader(reader: anytype, size: usize) PaxReader(@TypeOf(reader)) {
 								    return PaxReader(@TypeOf(reader)){
 								        .reader = reader,
 								        .size = size,
 								    };
 								}
 								const PaxAttributeKind = enum {
 								    path,
 								    linkpath,
 								    size,
 								};
 								fn PaxReader(comptime ReaderType: type) type {
 								    return struct {
 								        size: usize, // cumulative size of all pax attributes
 								        reader: ReaderType,
 								        // scratch buffer used for reading attribute length and keyword
 								        scratch: [128]u8 = undefined,
 								        const Self = @This();
 								        const Attribute = struct {
 								            kind: PaxAttributeKind,
 								            len: usize, // length of the attribute value
 								            reader: ReaderType, // reader positioned at value start
 								            // Copies pax attribute value into destination buffer.
 								            // Must be called with destination buffer of size at least Attribute.len.
 								            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
 								                assert(self.len <= dst.len);
 								                const buf = dst[0..self.len];
 								                const n = try self.reader.readAll(buf);
 								                if (n < self.len) return error.UnexpectedEndOfStream;
 								                try validateAttributeEnding(self.reader);
 								                if (hasNull(buf)) return error.PaxNullInValue;
 								                return buf;
 								            }
 								        };
 								        // Iterates over pax attributes. Returns known only known attributes.
 								        // Caller has to call value in Attribute, to advance reader across value.
 								        pub fn next(self: *Self) !?Attribute {
 								            // Pax extended header consists of one or more attributes, each constructed as follows:
 								            // "%d %s=%s\n", <length>, <keyword>, <value>
 								            while (self.size > 0) {
 								                const length_buf = try self.readUntil(' ');
 								                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
 								                const keyword = try self.readUntil('=');
 								                if (hasNull(keyword)) return error.PaxNullInKeyword;
 								                // calculate value_len
 								                const value_start = length_buf.len + keyword.len + 2; // 2 separators
 								                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
 								                const value_len = length - value_start - 1; // \n separator at end
 								                self.size -= length;
 								                const kind: PaxAttributeKind = if (eql(keyword, "path"))
 								                    .path
 								                else if (eql(keyword, "linkpath"))
 								                    .linkpath
 								                else if (eql(keyword, "size"))
 								                    .size
 								                else {
 								                    try self.reader.skipBytes(value_len, .{});
 								                    try validateAttributeEnding(self.reader);
 								                    continue;
 								                };
 								                return Attribute{
 								                    .kind = kind,
 								                    .len = value_len,
 								                    .reader = self.reader,
 								                };
 								            }
 								            return null;
 								        }
 								        inline fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
 								            var fbs = std.io.fixedBufferStream(&self.scratch);
 								            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
 								            return fbs.getWritten();
 								        }
 								        inline fn eql(a: []const u8, b: []const u8) bool {
 								            return std.mem.eql(u8, a, b);
 								        }
 								        inline fn hasNull(str: []const u8) bool {
 								            return (std.mem.indexOfScalar(u8, str, 0)) != null;
 								        }
 								        // Checks that each record ends with new line.
 								        inline fn validateAttributeEnding(reader: ReaderType) !void {
 								            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
 								        }
 								    };
 								}
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								    switch (options.mode_mode) {
 								        .ignore => {},
 								        .executable_bit_only => {
 								            // This code does not look at the mode bits yet. To implement this feature,
 								            // the implementation must be adjusted to look at the mode, and check the
 								            // user executable bit, then call fchmod on newly created files when
 								            // the executable bit is supposed to be set.
 								            // It also needs to properly deal with ACLs on Windows.
 								            @panic("TODO: unimplemented: tar ModeMode.executable_bit_only");
 								        },
 								    }
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								    var iter = tarReader(reader, options.diagnostics);
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    while (try iter.next()) |file| {
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								        switch (file.kind) {
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								            .directory => {
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const file_name = try stripComponents(file.name, options.strip_components);
-												std.tar: add option for omitting empty directories

											
										
										
											2023-10-04 06:25:04 +00:00
+								                if (file_name.len != 0 and !options.exclude_empty_directories) {
-												std.tar: make sub dirs + trim spaces

closes #15222. these changes allow the .tgz from this issue to
decompress and the test code to succeed.

											
										
										
											2023-04-09 23:05:17 +00:00
+								                    try dir.makePath(file_name);
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								                }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								            },
 								            .normal => {
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                if (file.size == 0 and file.name.len == 0) return;
 								                const file_name = try stripComponents(file.name, options.strip_components);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const fs_file = dir.createFile(file_name, .{}) catch |err| switch (err) {
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                    error.FileNotFound => again: {
 								                        const code = code: {
 								                            if (std.fs.path.dirname(file_name)) |dir_name| {
 								                                dir.makePath(dir_name) catch |code| break :code code;
 								                                break :again dir.createFile(file_name, .{}) catch |code| {
 								                                    break :code code;
 								                                };
 								                            }
 								                            break :code err;
 								                        };
 								                        const d = options.diagnostics orelse return error.UnableToCreateFile;
 								                        try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
 								                            .code = code,
 								                            .file_name = try d.allocator.dupe(u8, file_name),
 								                        } });
 								                        break :again null;
 								                    },
 								                    else => |e| return e,
 								                };
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                defer if (fs_file) |f| f.close();
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                if (fs_file) |f| {
 								                    try file.write(f);
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								                } else {
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                    try file.skip();
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								                }
 								            },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								            .symbolic_link => {
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                // The file system path of the symbolic link.
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const file_name = try stripComponents(file.name, options.strip_components);
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                // The data inside the symbolic link.
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const link_name = file.link_name;
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                dir.symLink(link_name, file_name, .{}) catch |err| again: {
 								                    const code = code: {
 								                        if (err == error.FileNotFound) {
 								                            if (std.fs.path.dirname(file_name)) |dir_name| {
 								                                dir.makePath(dir_name) catch |code| break :code code;
 								                                break :again dir.symLink(link_name, file_name, .{}) catch |code| {
 								                                    break :code code;
 								                                };
 								                            }
 								                        }
 								                        break :code err;
 								                    };
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                    const d = options.diagnostics orelse return error.UnableToCreateSymLink;
 								                    try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                        .code = code,
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                        .file_name = try d.allocator.dupe(u8, file_name),
 								                        .link_name = try d.allocator.dupe(u8, link_name),
 								                    } });
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								                };
 								            },
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								            else => unreachable,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        }
 								    }
 								}
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								fn stripComponents(path: []const u8, count: u32) ![]const u8 {
 								    var i: usize = 0;
 								    var c = count;
 								    while (c > 0) : (c -= 1) {
 								        if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
 								            i = pos + 1;
 								        } else {
 								            return error.TarComponentsOutsideStrippedPrefix;
 								        }
 								    }
 								    return path[i..];
 								}
-												tar: prefix test cases with 'tar'

To make it little easier to filter from all stdlib tests.

											
										
										
											2023-12-01 18:03:32 +00:00
+								test "tar stripComponents" {
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								    const expectEqualStrings = std.testing.expectEqualStrings;
 								    try expectEqualStrings("a/b/c", try stripComponents("a/b/c", 0));
 								    try expectEqualStrings("b/c", try stripComponents("a/b/c", 1));
 								    try expectEqualStrings("c", try stripComponents("a/b/c", 2));
 								}
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								test "tar PaxReader" {
 								    const Attr = struct {
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								        kind: PaxAttributeKind,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								        value: []const u8 = undefined,
 								        err: ?anyerror = null,
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								    };
 								    const cases = [_]struct {
 								        data: []const u8,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								        attrs: []const Attr,
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        err: ?anyerror = null,
 								    }{
 								        .{ // valid but unknown keys
 								            .data =
 								            \\30 mtime=1350244992.023960108
 								            \\6 k=1
 								            \\13 key1=val1
 								            \\10 a=name
 								            \\9 a=name
 								            \\
 								            ,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            .attrs = &[_]Attr{},
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        },
 								        .{ // mix of known and unknown keys
 								            .data =
 								            \\6 k=1
 								            \\13 path=name
 								            \\17 linkpath=link
 								            \\13 key1=val1
 								            \\12 size=123
 								            \\13 key2=val2
 								            \\
 								            ,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
 								                .{ .kind = .linkpath, .value = "link" },
 								                .{ .kind = .size, .value = "123" },
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            },
 								        },
 								        .{ // too short size of the second key-value pair
 								            .data =
 								            \\13 path=name
 								            \\10 linkpath=value
 								            \\
 								            ,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            },
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								            .err = error.UnexpectedEndOfStream,
 								        },
 								        .{ // too long size of the second key-value pair
 								            .data =
 								            \\13 path=name
 								            \\6 k=1
 								            \\19 linkpath=value
 								            \\
 								            ,
 								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
 								            },
 								            .err = error.UnexpectedEndOfStream,
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        },
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        .{ // too long size of the second key-value pair
 								            .data =
 								            \\13 path=name
 								            \\19 linkpath=value
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            \\6 k=1
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            \\
 								            ,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								                .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            },
 								        },
 								        .{ // null in keyword is not valid
 								            .data = "13 path=name\n" ++ "7 k\x00b=1\n",
 								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            },
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								            .err = error.PaxNullInKeyword,
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        },
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								        .{ // null in value is not valid
 								            .data = "23 path=name\x00with null\n",
 								            .attrs = &[_]Attr{
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								                .{ .kind = .path, .err = error.PaxNullInValue },
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            },
 								        },
 								        .{ // 1000 characters path
 								            .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
 								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "0123456789" ** 100 },
 								            },
 								        },
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								    };
 								    var buffer: [1024]u8 = undefined;
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								    outer: for (cases) |case| {
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        var stream = std.io.fixedBufferStream(case.data);
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								        var rdr = paxReader(stream.reader(), case.data.len);
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
 								        var i: usize = 0;
 								        while (rdr.next() catch |err| {
 								            if (case.err) |e| {
 								                try std.testing.expectEqual(e, err);
 								                continue;
 								            }
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            return err;
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        }) |attr| : (i += 1) {
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            const exp = case.attrs[i];
 								            try std.testing.expectEqual(exp.kind, attr.kind);
 								            const value = attr.value(&buffer) catch |err| {
 								                if (exp.err) |e| {
 								                    try std.testing.expectEqual(e, err);
 								                    break :outer;
 								                }
 								                return err;
 								            };
 								            try std.testing.expectEqualStrings(exp.value, value);
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        }
 								        try std.testing.expectEqual(case.attrs.len, i);
 								        try std.testing.expect(case.err == null);
 								    }
 								}
-												tar: move test cases to std/tar/testdata

Create std/tar/test.zig for test which uses cases from testdata.

											
										
										
											2023-12-11 22:55:07 +00:00
 								test {
 								    _ = @import("tar/test.zig");
 								}