zig/lib/std/tar.zig

const std = @import("std.zig");
const assert = std.debug.assert;

pub const Options = struct {
    /// Number of directory levels to skip when extracting files.
    strip_components: u32 = 0,
    /// How to handle the "mode" property of files from within the tar file.
    mode_mode: ModeMode = .executable_bit_only,
    /// Prevents creation of empty directories.
    exclude_empty_directories: bool = false,
    /// Provide this to receive detailed error messages.
    /// When this is provided, some errors which would otherwise be returned immediately
    /// will instead be added to this structure. The API user must check the errors
    /// in diagnostics to know whether the operation succeeded or failed.
    diagnostics: ?*Diagnostics = null,

    pub const ModeMode = enum {
        /// The mode from the tar file is completely ignored. Files are created
        /// with the default mode when creating files.
        ignore,
        /// The mode from the tar file is inspected for the owner executable bit
        /// only. This bit is copied to the group and other executable bits.
        /// Other bits of the mode are left as the default when creating files.
        executable_bit_only,
    };

    pub const Diagnostics = struct {
        allocator: std.mem.Allocator,
        errors: std.ArrayListUnmanaged(Error) = .{},

        pub const Error = union(enum) {
            unable_to_create_sym_link: struct {
                code: anyerror,
                file_name: []const u8,
                link_name: []const u8,
            },
            unable_to_create_file: struct {
                code: anyerror,
                file_name: []const u8,
            },
            unsupported_file_type: struct {
                file_name: []const u8,
                file_type: Header.FileType,
            },
        };

        pub fn deinit(d: *Diagnostics) void {
            for (d.errors.items) |item| {
                switch (item) {
                    .unable_to_create_sym_link => |info| {
                        d.allocator.free(info.file_name);
                        d.allocator.free(info.link_name);
                    },
                    .unable_to_create_file => |info| {
                        d.allocator.free(info.file_name);
                    },
                    .unsupported_file_type => |info| {
                        d.allocator.free(info.file_name);
                    },
                }
            }
            d.errors.deinit(d.allocator);
            d.* = undefined;
        }
    };
};

const BLOCK_SIZE = 512;
const MAX_HEADER_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)

pub const Header = struct {
    bytes: *const [BLOCK_SIZE]u8,

    pub const FileType = enum(u8) {
        normal_alias = 0,
        normal = '0',
        hard_link = '1',
        symbolic_link = '2',
        character_special = '3',
        block_special = '4',
        directory = '5',
        fifo = '6',
        contiguous = '7',
        global_extended_header = 'g',
        extended_header = 'x',
        // Types 'L' and 'K' are used by the GNU format for a meta file
        // used to store the path or link name for the next file.
        gnu_long_name = 'L',
        gnu_long_link = 'K',
        _,
    };

    /// Includes prefix concatenated, if any.
    /// TODO: check against "../" and other nefarious things
    pub fn fullName(header: Header, buffer: *[MAX_HEADER_NAME_SIZE]u8) ![]const u8 {
        const n = name(header);
        const p = prefix(header);
        if (!is_ustar(header) or p.len == 0) {
            @memcpy(buffer[0..n.len], n);
            return buffer[0..n.len];
        }
        @memcpy(buffer[0..p.len], p);
        buffer[p.len] = '/';
        @memcpy(buffer[p.len + 1 ..][0..n.len], n);
        return buffer[0 .. p.len + 1 + n.len];
    }

    pub fn name(header: Header) []const u8 {
        return header.str(0, 100);
    }

    pub fn mode(header: Header) !u32 {
        return @intCast(try header.numeric(100, 8));
    }

    pub fn fileSize(header: Header) !u64 {
        return header.numeric(124, 12);
    }

    pub fn chksum(header: Header) !u64 {
        return header.octal(148, 8);
    }

    pub fn linkName(header: Header) []const u8 {
        return header.str(157, 100);
    }

    pub fn is_ustar(header: Header) bool {
        const magic = header.bytes[257..][0..6];
        return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
    }

    pub fn prefix(header: Header) []const u8 {
        return header.str(345, 155);
    }

    pub fn fileType(header: Header) FileType {
        const result: FileType = @enumFromInt(header.bytes[156]);
        if (result == .normal_alias) return .normal;
        return result;
    }

    fn str(header: Header, start: usize, len: usize) []const u8 {
        return nullStr(header.bytes[start .. start + len]);
    }

    fn numeric(header: Header, start: usize, len: usize) !u64 {
        const raw = header.bytes[start..][0..len];
        //  If the leading byte is 0xff (255), all the bytes of the field
        //  (including the leading byte) are concatenated in big-endian order,
        //  with the result being a negative number expressed in two’s
        //  complement form.
        if (raw[0] == 0xff) return error.TarNumericValueNegative;
        // If the leading byte is 0x80 (128), the non-leading bytes of the
        // field are concatenated in big-endian order.
        if (raw[0] == 0x80) {
            if (raw[1] + raw[2] + raw[3] != 0) return error.TarNumericValueTooBig;
            return std.mem.readInt(u64, raw[4..12], .big);
        }
        return try header.octal(start, len);
    }

    fn octal(header: Header, start: usize, len: usize) !u64 {
        const raw = header.bytes[start..][0..len];
        // Zero-filled octal number in ASCII. Each numeric field of width w
        // contains w minus 1 digits, and a null
        const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
        const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
        if (rtrimmed.len == 0) return 0;
        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
    }

    // Sum of all bytes in the header block. The chksum field is treated as if
    // it were filled with spaces (ASCII 32).
    fn computeChksum(header: Header) u64 {
        var sum: u64 = 0;
        for (header.bytes, 0..) |b, i| {
            if (148 <= i and i < 156) continue; // skip chksum field bytes
            sum += b;
        }
        // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces.
        return if (sum > 0) sum + 256 else 0;
    }

    // Checks calculated chksum with value of chksum field.
    // Returns error or valid chksum value.
    // Zero value indicates empty block.
    pub fn checkChksum(header: Header) !u64 {
        const field = try header.chksum();
        const computed = header.computeChksum();
        if (field != computed) return error.TarHeaderChksum;
        return field;
    }
};

// Breaks string on first null char.
fn nullStr(str: []const u8) []const u8 {
    for (str, 0..) |c, i| {
        if (c == 0) return str[0..i];
    }
    return str;
}

// Number of padding bytes in the last file block.
inline fn blockPadding(size: usize) usize {
    const block_rounded = std.mem.alignForward(usize, size, BLOCK_SIZE); // size rounded to te block boundary
    return block_rounded - size;
}

fn BufferedReader(comptime ReaderType: type) type {
    return struct {
        unbuffered_reader: ReaderType,
        buffer: [BLOCK_SIZE * 8]u8 = undefined,
        start: usize = 0,
        end: usize = 0,

        const Self = @This();

        // Fills buffer from underlaying reader.
        fn fillBuffer(self: *Self) !void {
            self.removeUsed();
            self.end += try self.unbuffered_reader.read(self.buffer[self.end..]);
        }

        // Returns slice of size count or how much fits into buffer.
        pub fn readSlice(self: *Self, count: usize) ![]const u8 {
            if (count <= self.end - self.start) {
                return self.buffer[self.start .. self.start + count];
            }
            try self.fillBuffer();
            const buf = self.buffer[self.start..self.end];
            if (buf.len == 0) return error.UnexpectedEndOfStream;
            return buf[0..@min(count, buf.len)];
        }

        // Returns tar header block, 512 bytes, or null if eof. Before reading
        // advances buffer for padding of the previous block, to position reader
        // at the start of new block. After reading advances for block size, to
        // position reader at the start of the file content.
        pub fn readHeader(self: *Self, padding: usize) !?[]const u8 {
            try self.skip(padding);
            const buf = self.readSlice(BLOCK_SIZE) catch return null;
            if (buf.len < BLOCK_SIZE) return error.UnexpectedEndOfStream;
            self.advance(BLOCK_SIZE);
            return buf[0..BLOCK_SIZE];
        }

        // Returns byte at current position in buffer.
        pub fn readByte(self: *@This()) u8 {
            assert(self.start < self.end);
            return self.buffer[self.start];
        }

        // Advances reader for count bytes, assumes that we have that number of
        // bytes in buffer.
        pub fn advance(self: *Self, count: usize) void {
            self.start += count;
            assert(self.start <= self.end);
        }

        // Advances reader without assuming that count bytes are in the buffer.
        pub fn skip(self: *Self, count: usize) !void {
            if (self.start + count > self.end) {
                try self.unbuffered_reader.skipBytes(self.start + count - self.end, .{});
                self.start = self.end;
            } else {
                self.advance(count);
            }
        }

        // Removes used part of the buffer.
        inline fn removeUsed(self: *Self) void {
            const dest_end = self.end - self.start;
            if (self.start == 0 or dest_end > self.start) return;
            @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]);
            self.end = dest_end;
            self.start = 0;
        }

        // Writes count bytes to the writer. Advances reader.
        pub fn write(self: *Self, writer: anytype, count: usize) !void {
            var pos: usize = 0;
            while (pos < count) {
                const slice = try self.readSlice(count - pos);
                try writer.writeAll(slice);
                self.advance(slice.len);
                pos += slice.len;
            }
        }

        // Copies dst.len bytes into dst buffer. Advances reader.
        pub fn copy(self: *Self, dst: []u8) ![]const u8 {
            var pos: usize = 0;
            while (pos < dst.len) {
                const slice = try self.readSlice(dst.len - pos);
                @memcpy(dst[pos .. pos + slice.len], slice);
                self.advance(slice.len);
                pos += slice.len;
            }
            return dst;
        }

        pub fn paxFileReader(self: *Self, size: usize) PaxFileReader {
            return .{
                .size = size,
                .reader = self,
                .offset = 0,
            };
        }

        const PaxFileReader = struct {
            size: usize,
            offset: usize = 0,
            reader: *Self,

            const PaxKey = enum {
                path,
                linkpath,
                size,
            };

            const PaxAttribute = struct {
                key: PaxKey,
                value_len: usize,
                parent: *PaxFileReader,

                // Copies pax attribute value into destination buffer.
                // Must be called with destination buffer of size at least value_len.
                pub fn value(self: PaxAttribute, dst: []u8) ![]u8 {
                    assert(dst.len >= self.value_len);
                    const buf = dst[0..self.value_len];
                    _ = try self.parent.reader.copy(buf);
                    self.parent.offset += buf.len;
                    try self.parent.checkAttributeEnding();
                    return buf;
                }
            };

            // Caller of the next has to call value in PaxAttribute, to advance
            // reader across value.
            pub fn next(self: *PaxFileReader) !?PaxAttribute {
                while (true) {
                    const remaining_size = self.size - self.offset;
                    if (remaining_size == 0) return null;

                    const inf = try parsePaxAttribute(
                        try self.reader.readSlice(remaining_size),
                        remaining_size,
                    );
                    const key: PaxKey = if (inf.is("path"))
                        .path
                    else if (inf.is("linkpath"))
                        .linkpath
                    else if (inf.is("size"))
                        .size
                    else {
                        try self.advance(inf.value_off + inf.value_len);
                        try self.checkAttributeEnding();
                        continue;
                    };
                    try self.advance(inf.value_off); // position reader at the start of the value
                    return PaxAttribute{ .key = key, .value_len = inf.value_len, .parent = self };
                }
            }

            fn checkAttributeEnding(self: *PaxFileReader) !void {
                if (self.reader.readByte() != '\n') return error.InvalidPaxAttribute;
                try self.advance(1);
            }

            fn advance(self: *PaxFileReader, len: usize) !void {
                self.offset += len;
                try self.reader.skip(len);
            }
        };
    };
}

fn Iterator(comptime ReaderType: type) type {
    const BufferedReaderType = BufferedReader(ReaderType);
    return struct {
        // scratch buffer for file attributes
        scratch: struct {
            // size: two paths (name and link_name) and files size bytes (24 in pax attribute)
            buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined,
            tail: usize = 0,

            name: []const u8 = undefined,
            link_name: []const u8 = undefined,
            size: usize = 0,

            // Allocate size of the buffer for some attribute.
            fn alloc(self: *@This(), size: usize) ![]u8 {
                const free_size = self.buffer.len - self.tail;
                if (size > free_size) return error.TarScratchBufferOverflow;
                const head = self.tail;
                self.tail += size;
                assert(self.tail <= self.buffer.len);
                return self.buffer[head..self.tail];
            }

            // Reset buffer and all fields.
            fn reset(self: *@This()) void {
                self.tail = 0;
                self.name = self.buffer[0..0];
                self.link_name = self.buffer[0..0];
                self.size = 0;
            }

            fn append(self: *@This(), header: Header) !void {
                if (self.size == 0) self.size = try header.fileSize();
                if (self.link_name.len == 0) {
                    const link_name = header.linkName();
                    if (link_name.len > 0) {
                        const buf = try self.alloc(link_name.len);
                        @memcpy(buf, link_name);
                        self.link_name = buf;
                    }
                }
                if (self.name.len == 0) {
                    self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]);
                }
            }
        } = .{},

        reader: BufferedReaderType,
        diagnostics: ?*Options.Diagnostics,
        padding: usize = 0, // bytes of padding to the end of the block

        const Self = @This();

        pub const File = struct {
            name: []const u8, // name of file, symlink or directory
            link_name: []const u8, // target name of symlink
            size: usize, // size of the file in bytes
            mode: u32,
            file_type: Header.FileType,

            reader: *BufferedReaderType,

            // Writes file content to writer.
            pub fn write(self: File, writer: anytype) !void {
                try self.reader.write(writer, self.size);
            }

            // Skips file content. Advances reader.
            pub fn skip(self: File) !void {
                try self.reader.skip(self.size);
            }
        };

        // Externally, `next` iterates through the tar archive as if it is a
        // series of files. Internally, the tar format often uses fake "files"
        // to add meta data that describes the next file. These meta data
        // "files" should not normally be visible to the outside. As such, this
        // loop iterates through one or more "header files" until it finds a
        // "normal file".
        pub fn next(self: *Self) !?File {
            self.scratch.reset();

            while (try self.reader.readHeader(self.padding)) |block_bytes| {
                const header = Header{ .bytes = block_bytes[0..BLOCK_SIZE] };
                if (try header.checkChksum() == 0) return null; // zero block found

                const file_type = header.fileType();
                const size: usize = @intCast(try header.fileSize());
                self.padding = blockPadding(size);

                switch (file_type) {
                    // File types to retrun upstream
                    .directory, .normal, .symbolic_link => {
                        try self.scratch.append(header);
                        const file = File{
                            .file_type = file_type,
                            .name = self.scratch.name,
                            .link_name = self.scratch.link_name,
                            .size = self.scratch.size,
                            .reader = &self.reader,
                            .mode = try header.mode(),
                        };
                        self.padding = blockPadding(file.size);
                        return file;
                    },
                    // Prefix header types
                    .gnu_long_name => {
                        self.scratch.name = nullStr(try self.reader.copy(try self.scratch.alloc(size)));
                    },
                    .gnu_long_link => {
                        self.scratch.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(size)));
                    },
                    .extended_header => {
                        if (size == 0) continue;
                        // Use just attributes from last extended header.
                        self.scratch.reset();

                        var rdr = self.reader.paxFileReader(size);
                        while (try rdr.next()) |attr| {
                            switch (attr.key) {
                                .path => {
                                    self.scratch.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
                                },
                                .linkpath => {
                                    self.scratch.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
                                },
                                .size => {
                                    self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10);
                                },
                            }
                        }
                    },
                    // Ignored header type
                    .global_extended_header => {
                        self.reader.skip(size) catch return error.TarHeadersTooBig;
                    },
                    // All other are unsupported header types
                    else => {
                        const d = self.diagnostics orelse return error.TarUnsupportedFileType;
                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
                            .file_name = try d.allocator.dupe(u8, header.name()),
                            .file_type = file_type,
                        } });
                    },
                }
            }
            return null;
        }
    };
}

pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) {
    const ReaderType = @TypeOf(reader);
    return .{
        .reader = BufferedReader(ReaderType){ .unbuffered_reader = reader },
        .diagnostics = diagnostics,
    };
}

pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
    switch (options.mode_mode) {
        .ignore => {},
        .executable_bit_only => {
            // This code does not look at the mode bits yet. To implement this feature,
            // the implementation must be adjusted to look at the mode, and check the
            // user executable bit, then call fchmod on newly created files when
            // the executable bit is supposed to be set.
            // It also needs to properly deal with ACLs on Windows.
            @panic("TODO: unimplemented: tar ModeMode.executable_bit_only");
        },
    }

    var iter = iterator(reader, options.diagnostics);

    while (try iter.next()) |file| {
        switch (file.file_type) {
            .directory => {
                const file_name = try stripComponents(file.name, options.strip_components);
                if (file_name.len != 0 and !options.exclude_empty_directories) {
                    try dir.makePath(file_name);
                }
            },
            .normal => {
                if (file.size == 0 and file.name.len == 0) return;
                const file_name = try stripComponents(file.name, options.strip_components);

                const fs_file = dir.createFile(file_name, .{}) catch |err| switch (err) {
                    error.FileNotFound => again: {
                        const code = code: {
                            if (std.fs.path.dirname(file_name)) |dir_name| {
                                dir.makePath(dir_name) catch |code| break :code code;
                                break :again dir.createFile(file_name, .{}) catch |code| {
                                    break :code code;
                                };
                            }
                            break :code err;
                        };
                        const d = options.diagnostics orelse return error.UnableToCreateFile;
                        try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
                            .code = code,
                            .file_name = try d.allocator.dupe(u8, file_name),
                        } });
                        break :again null;
                    },
                    else => |e| return e,
                };
                defer if (fs_file) |f| f.close();

                if (fs_file) |f| {
                    try file.write(f);
                } else {
                    try file.skip();
                }
            },
            .symbolic_link => {
                // The file system path of the symbolic link.
                const file_name = try stripComponents(file.name, options.strip_components);
                // The data inside the symbolic link.
                const link_name = file.link_name;

                dir.symLink(link_name, file_name, .{}) catch |err| again: {
                    const code = code: {
                        if (err == error.FileNotFound) {
                            if (std.fs.path.dirname(file_name)) |dir_name| {
                                dir.makePath(dir_name) catch |code| break :code code;
                                break :again dir.symLink(link_name, file_name, .{}) catch |code| {
                                    break :code code;
                                };
                            }
                        }
                        break :code err;
                    };
                    const d = options.diagnostics orelse return error.UnableToCreateSymLink;
                    try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
                        .code = code,
                        .file_name = try d.allocator.dupe(u8, file_name),
                        .link_name = try d.allocator.dupe(u8, link_name),
                    } });
                };
            },
            else => unreachable,
        }
    }
}

fn stripComponents(path: []const u8, count: u32) ![]const u8 {
    var i: usize = 0;
    var c = count;
    while (c > 0) : (c -= 1) {
        if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
            i = pos + 1;
        } else {
            return error.TarComponentsOutsideStrippedPrefix;
        }
    }
    return path[i..];
}

test "tar stripComponents" {
    const expectEqualStrings = std.testing.expectEqualStrings;
    try expectEqualStrings("a/b/c", try stripComponents("a/b/c", 0));
    try expectEqualStrings("b/c", try stripComponents("a/b/c", 1));
    try expectEqualStrings("c", try stripComponents("a/b/c", 2));
}

const PaxAttributeInfo = struct {
    size: usize,
    key: []const u8,
    value_off: usize,
    value_len: usize,

    inline fn is(self: @This(), key: []const u8) bool {
        return (std.mem.eql(u8, self.key, key));
    }
};

fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo {
    const pos_space = std.mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidPaxAttribute;
    const pos_equals = std.mem.indexOfScalarPos(u8, data, pos_space, '=') orelse return error.InvalidPaxAttribute;
    const kv_size = try std.fmt.parseInt(usize, data[0..pos_space], 10);
    if (kv_size > max_size) {
        return error.InvalidPaxAttribute;
    }
    const key = data[pos_space + 1 .. pos_equals];
    return .{
        .size = kv_size,
        .key = try noNull(key),
        .value_off = pos_equals + 1,
        .value_len = kv_size - pos_equals - 2,
    };
}

fn noNull(str: []const u8) ![]const u8 {
    if (std.mem.indexOfScalar(u8, str, 0)) |_| return error.InvalidPaxAttribute;
    return str;
}

test "tar parsePaxAttribute" {
    const expectEqual = std.testing.expectEqual;
    const expectEqualStrings = std.testing.expectEqualStrings;
    const expectError = std.testing.expectError;
    const prefix = "1011 path=";
    const file_name = "0123456789" ** 100;
    const header = prefix ++ file_name ++ "\n";
    const attr_info = try parsePaxAttribute(header, 1011);
    try expectEqual(@as(usize, 1011), attr_info.size);
    try expectEqualStrings("path", attr_info.key);
    try expectEqual(prefix.len, attr_info.value_off);
    try expectEqual(file_name.len, attr_info.value_len);
    try expectEqual(attr_info, try parsePaxAttribute(header, 1012));
    try expectError(error.InvalidPaxAttribute, parsePaxAttribute(header, 1010));
    try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0));
    try expectError(error.InvalidPaxAttribute, parsePaxAttribute("13 pa\x00th=abc\n", 1024)); // null in key
}

const TestCase = struct {
    const File = struct {
        name: []const u8,
        size: usize = 0,
        mode: u32 = 0,
        link_name: []const u8 = &[0]u8{},
        file_type: Header.FileType = .normal,
        truncated: bool = false, // when there is no file body, just header, usefull for huge files
    };

    path: []const u8, // path to the tar archive file on dis
    files: []const File = &[_]TestCase.File{}, // expected files to found in archive
    chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content
    err: ?anyerror = null, // parsing should fail with this error
};

test "tar run Go test cases" {
    const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path|
        try std.fs.openDirAbsolute(path, .{})
    else
        return error.SkipZigTest;

    const cases = [_]TestCase{
        .{
            .path = "gnu.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "small.txt",
                    .size = 5,
                    .mode = 0o640,
                },
                .{
                    .name = "small2.txt",
                    .size = 11,
                    .mode = 0o640,
                },
            },
            .chksums = &[_][]const u8{
                "e38b27eaccb4391bdec553a7f3ae6b2f",
                "c65bd2e50a56a2138bf1716f2fd56fe9",
            },
        },
        .{
            .path = "sparse-formats.tar",
            .err = error.TarUnsupportedFileType,
        },
        .{
            .path = "star.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "small.txt",
                    .size = 5,
                    .mode = 0o640,
                },
                .{
                    .name = "small2.txt",
                    .size = 11,
                    .mode = 0o640,
                },
            },
            .chksums = &[_][]const u8{
                "e38b27eaccb4391bdec553a7f3ae6b2f",
                "c65bd2e50a56a2138bf1716f2fd56fe9",
            },
        },
        .{
            .path = "v7.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "small.txt",
                    .size = 5,
                    .mode = 0o444,
                },
                .{
                    .name = "small2.txt",
                    .size = 11,
                    .mode = 0o444,
                },
            },
            .chksums = &[_][]const u8{
                "e38b27eaccb4391bdec553a7f3ae6b2f",
                "c65bd2e50a56a2138bf1716f2fd56fe9",
            },
        },
        .{
            .path = "pax.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
                    .size = 7,
                    .mode = 0o664,
                },
                .{
                    .name = "a/b",
                    .size = 0,
                    .file_type = .symbolic_link,
                    .mode = 0o777,
                    .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
                },
            },
            .chksums = &[_][]const u8{
                "3c382e8f5b6631aa2db52643912ffd4a",
            },
        },
        .{
            // pax attribute don't end with \n
            .path = "pax-bad-hdr-file.tar",
            .err = error.InvalidPaxAttribute,
        },
        .{
            // size is in pax attribute
            .path = "pax-pos-size-file.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "foo",
                    .size = 999,
                    .file_type = .normal,
                    .mode = 0o640,
                },
            },
            .chksums = &[_][]const u8{
                "0afb597b283fe61b5d4879669a350556",
            },
        },
        .{
            // has pax records which we are not interested in
            .path = "pax-records.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "file",
                },
            },
        },
        .{
            // has global records which we are ignoring
            .path = "pax-global-records.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "file1",
                },
                .{
                    .name = "file2",
                },
                .{
                    .name = "file3",
                },
                .{
                    .name = "file4",
                },
            },
        },
        .{
            .path = "nil-uid.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "P1050238.JPG.log",
                    .size = 14,
                    .file_type = .normal,
                    .mode = 0o664,
                },
            },
            .chksums = &[_][]const u8{
                "08d504674115e77a67244beac19668f5",
            },
        },
        .{
            // has xattrs and pax records which we are ignoring
            .path = "xattrs.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "small.txt",
                    .size = 5,
                    .file_type = .normal,
                    .mode = 0o644,
                },
                .{
                    .name = "small2.txt",
                    .size = 11,
                    .file_type = .normal,
                    .mode = 0o644,
                },
            },
            .chksums = &[_][]const u8{
                "e38b27eaccb4391bdec553a7f3ae6b2f",
                "c65bd2e50a56a2138bf1716f2fd56fe9",
            },
        },
        .{
            .path = "gnu-multi-hdrs.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "GNU2/GNU2/long-path-name",
                    .link_name = "GNU4/GNU4/long-linkpath-name",
                    .file_type = .symbolic_link,
                },
            },
        },
        .{
            // has gnu type D (directory) and S (sparse) blocks
            .path = "gnu-incremental.tar",
            .err = error.TarUnsupportedFileType,
        },
        .{
            // should use values only from last pax header
            .path = "pax-multi-hdrs.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "bar",
                    .link_name = "PAX4/PAX4/long-linkpath-name",
                    .file_type = .symbolic_link,
                },
            },
        },
        .{
            .path = "gnu-long-nul.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "0123456789",
                    .mode = 0o644,
                },
            },
        },
        .{
            .path = "gnu-utf8.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
                    .mode = 0o644,
                },
            },
        },
        .{
            .path = "gnu-not-utf8.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "hi\x80\x81\x82\x83bye",
                    .mode = 0o644,
                },
            },
        },
        .{
            // null in pax key
            .path = "pax-nul-xattrs.tar",
            .err = error.InvalidPaxAttribute,
        },
        .{
            .path = "pax-nul-path.tar",
            .err = error.InvalidPaxAttribute,
        },
        .{
            .path = "neg-size.tar",
            .err = error.TarHeader,
        },
        .{
            .path = "issue10968.tar",
            .err = error.TarHeader,
        },
        .{
            .path = "issue11169.tar",
            .err = error.TarHeader,
        },
        .{
            .path = "issue12435.tar",
            .err = error.TarHeaderChksum,
        },
        .{
            // has magic with space at end instead of null
            .path = "invalid-go17.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
                },
            },
        },
        .{
            .path = "ustar-file-devs.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "file",
                    .mode = 0o644,
                },
            },
        },
        .{
            .path = "trailing-slash.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "123456789/" ** 30,
                    .file_type = .directory,
                },
            },
        },
        .{
            // Has size in gnu extended format. To represent size bigger than 8 GB.
            .path = "writer-big.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "tmp/16gig.txt",
                    .size = 16 * 1024 * 1024 * 1024,
                    .truncated = true,
                    .mode = 0o640,
                },
            },
        },
        .{
            // Size in gnu extended format, and name in pax attribute.
            .path = "writer-big-long.tar",
            .files = &[_]TestCase.File{
                .{
                    .name = "longname/" ** 15 ++ "16gig.txt",
                    .size = 16 * 1024 * 1024 * 1024,
                    .mode = 0o644,
                    .truncated = true,
                },
            },
        },
    };

    for (cases) |case| {
        var fs_file = try test_dir.openFile(case.path, .{});
        defer fs_file.close();

        var iter = iterator(fs_file.reader(), null);
        var i: usize = 0;
        while (iter.next() catch |err| {
            if (case.err) |e| {
                try std.testing.expectEqual(e, err);
                continue;
            } else {
                return err;
            }
        }) |actual| : (i += 1) {
            const expected = case.files[i];
            try std.testing.expectEqualStrings(expected.name, actual.name);
            try std.testing.expectEqual(expected.size, actual.size);
            try std.testing.expectEqual(expected.file_type, actual.file_type);
            try std.testing.expectEqual(expected.mode, actual.mode);
            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);

            if (case.chksums.len > i) {
                var md5writer = Md5Writer{};
                try actual.write(&md5writer);
                const chksum = md5writer.chksum();
                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
            } else {
                if (!expected.truncated) try actual.skip(); // skip file content
            }
        }
        try std.testing.expectEqual(case.files.len, i);
    }
}

// used in test to calculate file chksum
const Md5Writer = struct {
    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),

    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
        self.h.update(buf);
    }

    pub fn chksum(self: *Md5Writer) [32]u8 {
        var s = [_]u8{0} ** 16;
        self.h.final(&s);
        return std.fmt.bytesToHex(s, .lower);
    }
};
-												tar: fix import path

											
										
										
											2023-11-29 14:31:22 +00:00
+								const std = @import("std.zig");
 								const assert = std.debug.assert;
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								pub const Options = struct {
 								    /// Number of directory levels to skip when extracting files.
 								    strip_components: u32 = 0,
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								    /// How to handle the "mode" property of files from within the tar file.
 								    mode_mode: ModeMode = .executable_bit_only,
-												std.tar: add option for omitting empty directories

											
										
										
											2023-10-04 06:25:04 +00:00
+								    /// Prevents creation of empty directories.
 								    exclude_empty_directories: bool = false,
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    /// Provide this to receive detailed error messages.
 								    /// When this is provided, some errors which would otherwise be returned immediately
 								    /// will instead be added to this structure. The API user must check the errors
 								    /// in diagnostics to know whether the operation succeeded or failed.
 								    diagnostics: ?*Diagnostics = null,
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    pub const ModeMode = enum {
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								        /// The mode from the tar file is completely ignored. Files are created
 								        /// with the default mode when creating files.
 								        ignore,
 								        /// The mode from the tar file is inspected for the owner executable bit
 								        /// only. This bit is copied to the group and other executable bits.
 								        /// Other bits of the mode are left as the default when creating files.
 								        executable_bit_only,
 								    };
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
 								    pub const Diagnostics = struct {
 								        allocator: std.mem.Allocator,
 								        errors: std.ArrayListUnmanaged(Error) = .{},
 								        pub const Error = union(enum) {
 								            unable_to_create_sym_link: struct {
 								                code: anyerror,
 								                file_name: []const u8,
 								                link_name: []const u8,
 								            },
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								            unable_to_create_file: struct {
 								                code: anyerror,
 								                file_name: []const u8,
 								            },
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								            unsupported_file_type: struct {
 								                file_name: []const u8,
 								                file_type: Header.FileType,
 								            },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								        };
 								        pub fn deinit(d: *Diagnostics) void {
 								            for (d.errors.items) |item| {
 								                switch (item) {
 								                    .unable_to_create_sym_link => |info| {
 								                        d.allocator.free(info.file_name);
 								                        d.allocator.free(info.link_name);
 								                    },
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                    .unable_to_create_file => |info| {
 								                        d.allocator.free(info.file_name);
 								                    },
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                    .unsupported_file_type => |info| {
 								                        d.allocator.free(info.file_name);
 								                    },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								                }
 								            }
 								            d.errors.deinit(d.allocator);
 								            d.* = undefined;
 								        }
 								    };
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								};
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								const BLOCK_SIZE = 512;
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								const MAX_HEADER_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								pub const Header = struct {
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								    bytes: *const [BLOCK_SIZE]u8,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
 								    pub const FileType = enum(u8) {
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								        normal_alias = 0,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        normal = '0',
 								        hard_link = '1',
 								        symbolic_link = '2',
 								        character_special = '3',
 								        block_special = '4',
 								        directory = '5',
 								        fifo = '6',
 								        contiguous = '7',
 								        global_extended_header = 'g',
 								        extended_header = 'x',
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        // Types 'L' and 'K' are used by the GNU format for a meta file
 								        // used to store the path or link name for the next file.
 								        gnu_long_name = 'L',
 								        gnu_long_link = 'K',
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        _,
 								    };
 								    /// Includes prefix concatenated, if any.
 								    /// TODO: check against "../" and other nefarious things
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								    pub fn fullName(header: Header, buffer: *[MAX_HEADER_NAME_SIZE]u8) ![]const u8 {
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        const n = name(header);
 								        const p = prefix(header);
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        if (!is_ustar(header) or p.len == 0) {
 								            @memcpy(buffer[0..n.len], n);
 								            return buffer[0..n.len];
 								        }
-												std: update to use `@memcpy` directly

											
										
										
											2023-04-27 22:16:01 +00:00
+								        @memcpy(buffer[0..p.len], p);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        buffer[p.len] = '/';
-												std: update to use `@memcpy` directly

											
										
										
											2023-04-27 22:16:01 +00:00
+								        @memcpy(buffer[p.len + 1 ..][0..n.len], n);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        return buffer[0 .. p.len + 1 + n.len];
 								    }
 								    pub fn name(header: Header) []const u8 {
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return header.str(0, 100);
 								    }
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								    pub fn mode(header: Header) !u32 {
 								        return @intCast(try header.numeric(100, 8));
 								    }
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    pub fn fileSize(header: Header) !u64 {
 								        return header.numeric(124, 12);
 								    }
 								    pub fn chksum(header: Header) !u64 {
 								        return header.octal(148, 8);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    pub fn linkName(header: Header) []const u8 {
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return header.str(157, 100);
 								    }
 								    pub fn is_ustar(header: Header) bool {
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        const magic = header.bytes[257..][0..6];
 								        return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    pub fn prefix(header: Header) []const u8 {
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return header.str(345, 155);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
 								    pub fn fileType(header: Header) FileType {
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								        const result: FileType = @enumFromInt(header.bytes[156]);
 								        if (result == .normal_alias) return .normal;
 								        return result;
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    fn str(header: Header, start: usize, len: usize) []const u8 {
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        return nullStr(header.bytes[start .. start + len]);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    fn numeric(header: Header, start: usize, len: usize) !u64 {
 								        const raw = header.bytes[start..][0..len];
 								        //  If the leading byte is 0xff (255), all the bytes of the field
 								        //  (including the leading byte) are concatenated in big-endian order,
 								        //  with the result being a negative number expressed in two’s
 								        //  complement form.
 								        if (raw[0] == 0xff) return error.TarNumericValueNegative;
 								        // If the leading byte is 0x80 (128), the non-leading bytes of the
 								        // field are concatenated in big-endian order.
 								        if (raw[0] == 0x80) {
 								            if (raw[1] + raw[2] + raw[3] != 0) return error.TarNumericValueTooBig;
 								            return std.mem.readInt(u64, raw[4..12], .big);
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								        }
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return try header.octal(start, len);
 								    }
 								    fn octal(header: Header, start: usize, len: usize) !u64 {
 								        const raw = header.bytes[start..][0..len];
 								        // Zero-filled octal number in ASCII. Each numeric field of width w
 								        // contains w minus 1 digits, and a null
 								        const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
 								        const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
 								        if (rtrimmed.len == 0) return 0;
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    }
 								    // Sum of all bytes in the header block. The chksum field is treated as if
 								    // it were filled with spaces (ASCII 32).
 								    fn computeChksum(header: Header) u64 {
 								        var sum: u64 = 0;
 								        for (header.bytes, 0..) |b, i| {
 								            if (148 <= i and i < 156) continue; // skip chksum field bytes
 								            sum += b;
 								        }
 								        // Treating chksum bytes as spaces. 256 = 8 * 32, 8 spaces.
 								        return if (sum > 0) sum + 256 else 0;
 								    }
 								    // Checks calculated chksum with value of chksum field.
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								    // Returns error or valid chksum value.
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    // Zero value indicates empty block.
 								    pub fn checkChksum(header: Header) !u64 {
 								        const field = try header.chksum();
 								        const computed = header.computeChksum();
 								        if (field != computed) return error.TarHeaderChksum;
 								        return field;
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								};
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								// Breaks string on first null char.
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								fn nullStr(str: []const u8) []const u8 {
 								    for (str, 0..) |c, i| {
 								        if (c == 0) return str[0..i];
 								    }
 								    return str;
 								}
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								// Number of padding bytes in the last file block.
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								inline fn blockPadding(size: usize) usize {
 								    const block_rounded = std.mem.alignForward(usize, size, BLOCK_SIZE); // size rounded to te block boundary
 								    return block_rounded - size;
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								}
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								fn BufferedReader(comptime ReaderType: type) type {
 								    return struct {
 								        unbuffered_reader: ReaderType,
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								        buffer: [BLOCK_SIZE * 8]u8 = undefined,
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								        start: usize = 0,
 								        end: usize = 0,
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								        const Self = @This();
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        // Fills buffer from underlaying reader.
 								        fn fillBuffer(self: *Self) !void {
 								            self.removeUsed();
 								            self.end += try self.unbuffered_reader.read(self.buffer[self.end..]);
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								        }
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        // Returns slice of size count or how much fits into buffer.
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								        pub fn readSlice(self: *Self, count: usize) ![]const u8 {
 								            if (count <= self.end - self.start) {
 								                return self.buffer[self.start .. self.start + count];
 								            }
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            try self.fillBuffer();
 								            const buf = self.buffer[self.start..self.end];
 								            if (buf.len == 0) return error.UnexpectedEndOfStream;
 								            return buf[0..@min(count, buf.len)];
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								        }
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        // Returns tar header block, 512 bytes, or null if eof. Before reading
 								        // advances buffer for padding of the previous block, to position reader
 								        // at the start of new block. After reading advances for block size, to
 								        // position reader at the start of the file content.
 								        pub fn readHeader(self: *Self, padding: usize) !?[]const u8 {
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								            try self.skip(padding);
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            const buf = self.readSlice(BLOCK_SIZE) catch return null;
 								            if (buf.len < BLOCK_SIZE) return error.UnexpectedEndOfStream;
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								            self.advance(BLOCK_SIZE);
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            return buf[0..BLOCK_SIZE];
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								        }
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        // Returns byte at current position in buffer.
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								        pub fn readByte(self: *@This()) u8 {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            assert(self.start < self.end);
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								            return self.buffer[self.start];
 								        }
 								        // Advances reader for count bytes, assumes that we have that number of
 								        // bytes in buffer.
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								        pub fn advance(self: *Self, count: usize) void {
 								            self.start += count;
 								            assert(self.start <= self.end);
 								        }
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								        // Advances reader without assuming that count bytes are in the buffer.
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								        pub fn skip(self: *Self, count: usize) !void {
 								            if (self.start + count > self.end) {
 								                try self.unbuffered_reader.skipBytes(self.start + count - self.end, .{});
 								                self.start = self.end;
 								            } else {
 								                self.advance(count);
 								            }
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
+								        }
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        // Removes used part of the buffer.
 								        inline fn removeUsed(self: *Self) void {
 								            const dest_end = self.end - self.start;
 								            if (self.start == 0 or dest_end > self.start) return;
 								            @memcpy(self.buffer[0..dest_end], self.buffer[self.start..self.end]);
 								            self.end = dest_end;
 								            self.start = 0;
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
+								        }
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        // Writes count bytes to the writer. Advances reader.
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								        pub fn write(self: *Self, writer: anytype, count: usize) !void {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            var pos: usize = 0;
 								            while (pos < count) {
 								                const slice = try self.readSlice(count - pos);
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								                try writer.writeAll(slice);
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                self.advance(slice.len);
 								                pos += slice.len;
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								            }
 								        }
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        // Copies dst.len bytes into dst buffer. Advances reader.
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								        pub fn copy(self: *Self, dst: []u8) ![]const u8 {
 								            var pos: usize = 0;
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            while (pos < dst.len) {
 								                const slice = try self.readSlice(dst.len - pos);
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								                @memcpy(dst[pos .. pos + slice.len], slice);
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                self.advance(slice.len);
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                pos += slice.len;
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								            }
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            return dst;
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								        }
 								        pub fn paxFileReader(self: *Self, size: usize) PaxFileReader {
 								            return .{
 								                .size = size,
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								                .reader = self,
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                .offset = 0,
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								            };
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								        }
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								        const PaxFileReader = struct {
 								            size: usize,
 								            offset: usize = 0,
 								            reader: *Self,
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								            const PaxKey = enum {
 								                path,
 								                linkpath,
 								                size,
 								            };
 								            const PaxAttribute = struct {
 								                key: PaxKey,
 								                value_len: usize,
 								                parent: *PaxFileReader,
 								                // Copies pax attribute value into destination buffer.
 								                // Must be called with destination buffer of size at least value_len.
 								                pub fn value(self: PaxAttribute, dst: []u8) ![]u8 {
 								                    assert(dst.len >= self.value_len);
 								                    const buf = dst[0..self.value_len];
 								                    _ = try self.parent.reader.copy(buf);
 								                    self.parent.offset += buf.len;
 								                    try self.parent.checkAttributeEnding();
 								                    return buf;
 								                }
 								            };
 								            // Caller of the next has to call value in PaxAttribute, to advance
 								            // reader across value.
 								            pub fn next(self: *PaxFileReader) !?PaxAttribute {
 								                while (true) {
 								                    const remaining_size = self.size - self.offset;
 								                    if (remaining_size == 0) return null;
 								                    const inf = try parsePaxAttribute(
 								                        try self.reader.readSlice(remaining_size),
 								                        remaining_size,
 								                    );
 								                    const key: PaxKey = if (inf.is("path"))
 								                        .path
 								                    else if (inf.is("linkpath"))
 								                        .linkpath
 								                    else if (inf.is("size"))
 								                        .size
 								                    else {
 								                        try self.advance(inf.value_off + inf.value_len);
 								                        try self.checkAttributeEnding();
 								                        continue;
 								                    };
 								                    try self.advance(inf.value_off); // position reader at the start of the value
 								                    return PaxAttribute{ .key = key, .value_len = inf.value_len, .parent = self };
 								                }
 								            }
 								            fn checkAttributeEnding(self: *PaxFileReader) !void {
 								                if (self.reader.readByte() != '\n') return error.InvalidPaxAttribute;
 								                try self.advance(1);
 								            }
 								            fn advance(self: *PaxFileReader, len: usize) !void {
 								                self.offset += len;
 								                try self.reader.skip(len);
 								            }
 								        };
 								    };
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								}
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								fn Iterator(comptime ReaderType: type) type {
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								    const BufferedReaderType = BufferedReader(ReaderType);
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								    return struct {
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								        // scratch buffer for file attributes
 								        scratch: struct {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            // size: two paths (name and link_name) and files size bytes (24 in pax attribute)
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								            buffer: [std.fs.MAX_PATH_BYTES * 2 + 24]u8 = undefined,
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								            tail: usize = 0,
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            name: []const u8 = undefined,
 								            link_name: []const u8 = undefined,
 								            size: usize = 0,
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								            // Allocate size of the buffer for some attribute.
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								            fn alloc(self: *@This(), size: usize) ![]u8 {
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                const free_size = self.buffer.len - self.tail;
 								                if (size > free_size) return error.TarScratchBufferOverflow;
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								                const head = self.tail;
 								                self.tail += size;
 								                assert(self.tail <= self.buffer.len);
 								                return self.buffer[head..self.tail];
 								            }
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            // Reset buffer and all fields.
 								            fn reset(self: *@This()) void {
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								                self.tail = 0;
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                self.name = self.buffer[0..0];
 								                self.link_name = self.buffer[0..0];
 								                self.size = 0;
 								            }
 								            fn append(self: *@This(), header: Header) !void {
 								                if (self.size == 0) self.size = try header.fileSize();
 								                if (self.link_name.len == 0) {
 								                    const link_name = header.linkName();
 								                    if (link_name.len > 0) {
 								                        const buf = try self.alloc(link_name.len);
 								                        @memcpy(buf, link_name);
 								                        self.link_name = buf;
 								                    }
 								                }
 								                if (self.name.len == 0) {
 								                    self.name = try header.fullName((try self.alloc(MAX_HEADER_NAME_SIZE))[0..MAX_HEADER_NAME_SIZE]);
 								                }
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								            }
 								        } = .{},
-												tar: add pax linkpath attribute parsing

Name of symbolic link can be also found in pax attribute.

											
										
										
											2023-11-27 21:23:16 +00:00
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								        reader: BufferedReaderType,
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								        diagnostics: ?*Options.Diagnostics,
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        padding: usize = 0, // bytes of padding to the end of the block
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
 								        const Self = @This();
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        pub const File = struct {
 								            name: []const u8, // name of file, symlink or directory
 								            link_name: []const u8, // target name of symlink
 								            size: usize, // size of the file in bytes
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								            mode: u32,
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            file_type: Header.FileType,
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								            reader: *BufferedReaderType,
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            // Writes file content to writer.
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								            pub fn write(self: File, writer: anytype) !void {
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								                try self.reader.write(writer, self.size);
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								            }
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            // Skips file content. Advances reader.
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								            pub fn skip(self: File) !void {
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                try self.reader.skip(self.size);
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								            }
 								        };
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								        // Externally, `next` iterates through the tar archive as if it is a
 								        // series of files. Internally, the tar format often uses fake "files"
 								        // to add meta data that describes the next file. These meta data
 								        // "files" should not normally be visible to the outside. As such, this
 								        // loop iterates through one or more "header files" until it finds a
 								        // "normal file".
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								        pub fn next(self: *Self) !?File {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            self.scratch.reset();
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								            while (try self.reader.readHeader(self.padding)) |block_bytes| {
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                const header = Header{ .bytes = block_bytes[0..BLOCK_SIZE] };
 								                if (try header.checkChksum() == 0) return null; // zero block found
 								                const file_type = header.fileType();
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                const size: usize = @intCast(try header.fileSize());
 								                self.padding = blockPadding(size);
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
 								                switch (file_type) {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                    // File types to retrun upstream
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								                    .directory, .normal, .symbolic_link => {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                        try self.scratch.append(header);
 								                        const file = File{
 								                            .file_type = file_type,
 								                            .name = self.scratch.name,
 								                            .link_name = self.scratch.link_name,
 								                            .size = self.scratch.size,
 								                            .reader = &self.reader,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                            .mode = try header.mode(),
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                        };
 								                        self.padding = blockPadding(file.size);
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								                        return file;
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								                    },
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                    // Prefix header types
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                    .gnu_long_name => {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                        self.scratch.name = nullStr(try self.reader.copy(try self.scratch.alloc(size)));
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                    },
 								                    .gnu_long_link => {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                        self.scratch.link_name = nullStr(try self.reader.copy(try self.scratch.alloc(size)));
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								                    },
 								                    .extended_header => {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                        if (size == 0) continue;
 								                        // Use just attributes from last extended header.
 								                        self.scratch.reset();
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                        var rdr = self.reader.paxFileReader(size);
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                        while (try rdr.next()) |attr| {
 								                            switch (attr.key) {
 								                                .path => {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                                    self.scratch.name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                                },
 								                                .linkpath => {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                                    self.scratch.link_name = try noNull(try attr.value(try self.scratch.alloc(attr.value_len)));
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                                },
 								                                .size => {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                                    self.scratch.size = try std.fmt.parseInt(usize, try attr.value(try self.scratch.alloc(attr.value_len)), 10);
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                                },
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								                            }
-												tar: add pax linkpath attribute parsing

Name of symbolic link can be also found in pax attribute.

											
										
										
											2023-11-27 21:23:16 +00:00
+								                        }
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								                    },
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                    // Ignored header type
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                    .global_extended_header => {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                        self.reader.skip(size) catch return error.TarHeadersTooBig;
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								                    },
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                    // All other are unsupported header types
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								                    else => {
 								                        const d = self.diagnostics orelse return error.TarUnsupportedFileType;
 								                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
-												tar: refactor reading pax attributes

											
										
										
											2023-11-30 20:28:10 +00:00
+								                            .file_name = try d.allocator.dupe(u8, header.name()),
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								                            .file_type = file_type,
 								                        } });
 								                    },
 								                }
 								            }
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								            return null;
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								        }
 								    };
 								}
 								pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) {
-												tar: refactor Buffer

Move reader into Buffer and make it BufferedReader. This doesn't
introduce any new functionality just grouping similar things.

											
										
										
											2023-11-27 16:17:28 +00:00
+								    const ReaderType = @TypeOf(reader);
 								    return .{
 								        .reader = BufferedReader(ReaderType){ .unbuffered_reader = reader },
 								        .diagnostics = diagnostics,
 								    };
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								}
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								    switch (options.mode_mode) {
 								        .ignore => {},
 								        .executable_bit_only => {
 								            // This code does not look at the mode bits yet. To implement this feature,
 								            // the implementation must be adjusted to look at the mode, and check the
 								            // user executable bit, then call fchmod on newly created files when
 								            // the executable bit is supposed to be set.
 								            // It also needs to properly deal with ACLs on Windows.
 								            @panic("TODO: unimplemented: tar ModeMode.executable_bit_only");
 								        },
 								    }
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
 								    var iter = iterator(reader, options.diagnostics);
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    while (try iter.next()) |file| {
 								        switch (file.file_type) {
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								            .directory => {
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const file_name = try stripComponents(file.name, options.strip_components);
-												std.tar: add option for omitting empty directories

											
										
										
											2023-10-04 06:25:04 +00:00
+								                if (file_name.len != 0 and !options.exclude_empty_directories) {
-												std.tar: make sub dirs + trim spaces

closes #15222. these changes allow the .tgz from this issue to
decompress and the test code to succeed.

											
										
										
											2023-04-09 23:05:17 +00:00
+								                    try dir.makePath(file_name);
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								                }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								            },
 								            .normal => {
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                if (file.size == 0 and file.name.len == 0) return;
 								                const file_name = try stripComponents(file.name, options.strip_components);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const fs_file = dir.createFile(file_name, .{}) catch |err| switch (err) {
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                    error.FileNotFound => again: {
 								                        const code = code: {
 								                            if (std.fs.path.dirname(file_name)) |dir_name| {
 								                                dir.makePath(dir_name) catch |code| break :code code;
 								                                break :again dir.createFile(file_name, .{}) catch |code| {
 								                                    break :code code;
 								                                };
 								                            }
 								                            break :code err;
 								                        };
 								                        const d = options.diagnostics orelse return error.UnableToCreateFile;
 								                        try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
 								                            .code = code,
 								                            .file_name = try d.allocator.dupe(u8, file_name),
 								                        } });
 								                        break :again null;
 								                    },
 								                    else => |e| return e,
 								                };
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                defer if (fs_file) |f| f.close();
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                if (fs_file) |f| {
 								                    try file.write(f);
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								                } else {
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                    try file.skip();
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								                }
 								            },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								            .symbolic_link => {
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                // The file system path of the symbolic link.
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const file_name = try stripComponents(file.name, options.strip_components);
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                // The data inside the symbolic link.
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const link_name = file.link_name;
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                dir.symLink(link_name, file_name, .{}) catch |err| again: {
 								                    const code = code: {
 								                        if (err == error.FileNotFound) {
 								                            if (std.fs.path.dirname(file_name)) |dir_name| {
 								                                dir.makePath(dir_name) catch |code| break :code code;
 								                                break :again dir.symLink(link_name, file_name, .{}) catch |code| {
 								                                    break :code code;
 								                                };
 								                            }
 								                        }
 								                        break :code err;
 								                    };
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                    const d = options.diagnostics orelse return error.UnableToCreateSymLink;
 								                    try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                        .code = code,
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                        .file_name = try d.allocator.dupe(u8, file_name),
 								                        .link_name = try d.allocator.dupe(u8, link_name),
 								                    } });
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								                };
 								            },
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								            else => unreachable,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        }
 								    }
 								}
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								fn stripComponents(path: []const u8, count: u32) ![]const u8 {
 								    var i: usize = 0;
 								    var c = count;
 								    while (c > 0) : (c -= 1) {
 								        if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
 								            i = pos + 1;
 								        } else {
 								            return error.TarComponentsOutsideStrippedPrefix;
 								        }
 								    }
 								    return path[i..];
 								}
-												tar: prefix test cases with 'tar'

To make it little easier to filter from all stdlib tests.

											
										
										
											2023-12-01 18:03:32 +00:00
+								test "tar stripComponents" {
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								    const expectEqualStrings = std.testing.expectEqualStrings;
 								    try expectEqualStrings("a/b/c", try stripComponents("a/b/c", 0));
 								    try expectEqualStrings("b/c", try stripComponents("a/b/c", 1));
 								    try expectEqualStrings("c", try stripComponents("a/b/c", 2));
 								}
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
+								const PaxAttributeInfo = struct {
 								    size: usize,
 								    key: []const u8,
 								    value_off: usize,
 								    value_len: usize,
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
 								    inline fn is(self: @This(), key: []const u8) bool {
 								        return (std.mem.eql(u8, self.key, key));
 								    }
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
+								};
 								fn parsePaxAttribute(data: []const u8, max_size: usize) !PaxAttributeInfo {
 								    const pos_space = std.mem.indexOfScalar(u8, data, ' ') orelse return error.InvalidPaxAttribute;
 								    const pos_equals = std.mem.indexOfScalarPos(u8, data, pos_space, '=') orelse return error.InvalidPaxAttribute;
 								    const kv_size = try std.fmt.parseInt(usize, data[0..pos_space], 10);
 								    if (kv_size > max_size) {
 								        return error.InvalidPaxAttribute;
 								    }
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								    const key = data[pos_space + 1 .. pos_equals];
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
+								    return .{
 								        .size = kv_size,
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        .key = try noNull(key),
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
+								        .value_off = pos_equals + 1,
 								        .value_len = kv_size - pos_equals - 2,
 								    };
 								}
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								fn noNull(str: []const u8) ![]const u8 {
 								    if (std.mem.indexOfScalar(u8, str, 0)) |_| return error.InvalidPaxAttribute;
 								    return str;
 								}
-												tar: prefix test cases with 'tar'

To make it little easier to filter from all stdlib tests.

											
										
										
											2023-12-01 18:03:32 +00:00
+								test "tar parsePaxAttribute" {
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
+								    const expectEqual = std.testing.expectEqual;
 								    const expectEqualStrings = std.testing.expectEqualStrings;
 								    const expectError = std.testing.expectError;
 								    const prefix = "1011 path=";
 								    const file_name = "0123456789" ** 100;
 								    const header = prefix ++ file_name ++ "\n";
 								    const attr_info = try parsePaxAttribute(header, 1011);
 								    try expectEqual(@as(usize, 1011), attr_info.size);
 								    try expectEqualStrings("path", attr_info.key);
 								    try expectEqual(prefix.len, attr_info.value_off);
 								    try expectEqual(file_name.len, attr_info.value_len);
 								    try expectEqual(attr_info, try parsePaxAttribute(header, 1012));
 								    try expectError(error.InvalidPaxAttribute, parsePaxAttribute(header, 1010));
 								    try expectError(error.InvalidPaxAttribute, parsePaxAttribute("", 0));
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								    try expectError(error.InvalidPaxAttribute, parsePaxAttribute("13 pa\x00th=abc\n", 1024)); // null in key
-												std.tar: add support for file path in pax attributes

Handles .extended_header type to parse PAX attributes and check if they override
the path of the next file. Increases file path limit to std.fs.MAX_PATH_BYTES.

Fixes #15342

											
										
										
											2023-08-27 17:35:33 +00:00
+								}
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								const TestCase = struct {
 								    const File = struct {
 								        name: []const u8,
 								        size: usize = 0,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								        mode: u32 = 0,
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        link_name: []const u8 = &[0]u8{},
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								        file_type: Header.FileType = .normal,
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								        truncated: bool = false, // when there is no file body, just header, usefull for huge files
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    };
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								    path: []const u8, // path to the tar archive file on dis
 								    files: []const File = &[_]TestCase.File{}, // expected files to found in archive
 								    chksums: []const []const u8 = &[_][]const u8{}, // chksums of files content
 								    err: ?anyerror = null, // parsing should fail with this error
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								};
-												tar: prefix test cases with 'tar'

To make it little easier to filter from all stdlib tests.

											
										
										
											2023-12-01 18:03:32 +00:00
+								test "tar run Go test cases" {
-												tar: use Go test cases path from env variable

Skip tests if env is not set.

											
										
										
											2023-12-01 17:50:48 +00:00
+								    const test_dir = if (std.os.getenv("GO_TAR_TESTDATA_PATH")) |path|
 								        try std.fs.openDirAbsolute(path, .{})
 								    else
 								        return error.SkipZigTest;
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    const cases = [_]TestCase{
 								        .{
 								            .path = "gnu.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "small.txt",
 								                    .size = 5,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o640,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								                .{
 								                    .name = "small2.txt",
 								                    .size = 11,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o640,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								            },
 								            .chksums = &[_][]const u8{
 								                "e38b27eaccb4391bdec553a7f3ae6b2f",
 								                "c65bd2e50a56a2138bf1716f2fd56fe9",
 								            },
 								        },
 								        .{
 								            .path = "sparse-formats.tar",
 								            .err = error.TarUnsupportedFileType,
 								        },
 								        .{
 								            .path = "star.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "small.txt",
 								                    .size = 5,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o640,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								                .{
 								                    .name = "small2.txt",
 								                    .size = 11,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o640,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								            },
 								            .chksums = &[_][]const u8{
 								                "e38b27eaccb4391bdec553a7f3ae6b2f",
 								                "c65bd2e50a56a2138bf1716f2fd56fe9",
 								            },
 								        },
 								        .{
 								            .path = "v7.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "small.txt",
 								                    .size = 5,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o444,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								                .{
 								                    .name = "small2.txt",
 								                    .size = 11,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o444,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								            },
 								            .chksums = &[_][]const u8{
 								                "e38b27eaccb4391bdec553a7f3ae6b2f",
 								                "c65bd2e50a56a2138bf1716f2fd56fe9",
 								            },
 								        },
 								        .{
 								            .path = "pax.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "a/123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
 								                    .size = 7,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o664,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								                .{
 								                    .name = "a/b",
 								                    .size = 0,
 								                    .file_type = .symbolic_link,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o777,
-												tar: add pax linkpath attribute parsing

Name of symbolic link can be also found in pax attribute.

											
										
										
											2023-11-27 21:23:16 +00:00
+								                    .link_name = "123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100",
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								            },
 								            .chksums = &[_][]const u8{
 								                "3c382e8f5b6631aa2db52643912ffd4a",
 								            },
 								        },
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        .{
 								            // pax attribute don't end with \n
 								            .path = "pax-bad-hdr-file.tar",
 								            .err = error.InvalidPaxAttribute,
 								        },
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								        .{
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								            // size is in pax attribute
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								            .path = "pax-pos-size-file.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "foo",
 								                    .size = 999,
 								                    .file_type = .normal,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o640,
-												tar: refactor reader and iterator

Make it more readable.

											
										
										
											2023-11-28 22:07:37 +00:00
+								                },
 								            },
 								            .chksums = &[_][]const u8{
 								                "0afb597b283fe61b5d4879669a350556",
 								            },
 								        },
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								        .{
 								            // has pax records which we are not interested in
 								            .path = "pax-records.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "file",
 								                },
 								            },
 								        },
 								        .{
 								            // has global records which we are ignoring
 								            .path = "pax-global-records.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "file1",
 								                },
 								                .{
 								                    .name = "file2",
 								                },
 								                .{
 								                    .name = "file3",
 								                },
 								                .{
 								                    .name = "file4",
 								                },
 								            },
 								        },
 								        .{
 								            .path = "nil-uid.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "P1050238.JPG.log",
 								                    .size = 14,
 								                    .file_type = .normal,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o664,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								            },
 								            .chksums = &[_][]const u8{
 								                "08d504674115e77a67244beac19668f5",
 								            },
 								        },
 								        .{
 								            // has xattrs and pax records which we are ignoring
 								            .path = "xattrs.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "small.txt",
 								                    .size = 5,
 								                    .file_type = .normal,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o644,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								                .{
 								                    .name = "small2.txt",
 								                    .size = 11,
 								                    .file_type = .normal,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o644,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								            },
 								            .chksums = &[_][]const u8{
 								                "e38b27eaccb4391bdec553a7f3ae6b2f",
 								                "c65bd2e50a56a2138bf1716f2fd56fe9",
 								            },
 								        },
 								        .{
 								            .path = "gnu-multi-hdrs.tar",
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "GNU2/GNU2/long-path-name",
 								                    .link_name = "GNU4/GNU4/long-linkpath-name",
 								                    .file_type = .symbolic_link,
 								                },
 								            },
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								        },
 								        .{
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								            // has gnu type D (directory) and S (sparse) blocks
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								            .path = "gnu-incremental.tar",
 								            .err = error.TarUnsupportedFileType,
 								        },
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        .{
 								            // should use values only from last pax header
 								            .path = "pax-multi-hdrs.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "bar",
 								                    .link_name = "PAX4/PAX4/long-linkpath-name",
 								                    .file_type = .symbolic_link,
 								                },
 								            },
 								        },
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        .{
 								            .path = "gnu-long-nul.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "0123456789",
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o644,
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								                },
 								            },
 								        },
 								        .{
 								            .path = "gnu-utf8.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹☺☻☹",
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o644,
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								                },
 								            },
 								        },
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								        .{
 								            .path = "gnu-not-utf8.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "hi\x80\x81\x82\x83bye",
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o644,
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                },
 								            },
 								        },
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        .{
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								            // null in pax key
 								            .path = "pax-nul-xattrs.tar",
 								            .err = error.InvalidPaxAttribute,
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        },
 								        .{
 								            .path = "pax-nul-path.tar",
 								            .err = error.InvalidPaxAttribute,
 								        },
 								        .{
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								            .path = "neg-size.tar",
 								            .err = error.TarHeader,
 								        },
 								        .{
 								            .path = "issue10968.tar",
 								            .err = error.TarHeader,
 								        },
 								        .{
 								            .path = "issue11169.tar",
 								            .err = error.TarHeader,
 								        },
 								        .{
 								            .path = "issue12435.tar",
 								            .err = error.TarHeaderChksum,
 								        },
 								        .{
 								            // has magic with space at end instead of null
 								            .path = "invalid-go17.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/foo",
 								                },
 								            },
 								        },
 								        .{
 								            .path = "ustar-file-devs.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "file",
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o644,
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								                },
 								            },
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        },
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								        .{
 								            .path = "trailing-slash.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "123456789/" ** 30,
 								                    .file_type = .directory,
 								                },
 								            },
 								        },
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								        .{
 								            // Has size in gnu extended format. To represent size bigger than 8 GB.
 								            .path = "writer-big.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "tmp/16gig.txt",
 								                    .size = 16 * 1024 * 1024 * 1024,
 								                    .truncated = true,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o640,
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								                },
 								            },
 								        },
 								        .{
 								            // Size in gnu extended format, and name in pax attribute.
 								            .path = "writer-big-long.tar",
 								            .files = &[_]TestCase.File{
 								                .{
 								                    .name = "longname/" ** 15 ++ "16gig.txt",
 								                    .size = 16 * 1024 * 1024 * 1024,
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								                    .mode = 0o644,
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								                    .truncated = true,
 								                },
 								            },
 								        },
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    };
 								    for (cases) |case| {
 								        var fs_file = try test_dir.openFile(case.path, .{});
 								        defer fs_file.close();
 								        var iter = iterator(fs_file.reader(), null);
 								        var i: usize = 0;
 								        while (iter.next() catch |err| {
 								            if (case.err) |e| {
 								                try std.testing.expectEqual(e, err);
 								                continue;
 								            } else {
 								                return err;
 								            }
-												tar: use Go test cases path from env variable

Skip tests if env is not set.

											
										
										
											2023-12-01 17:50:48 +00:00
+								        }) |actual| : (i += 1) {
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								            const expected = case.files[i];
 								            try std.testing.expectEqualStrings(expected.name, actual.name);
 								            try std.testing.expectEqual(expected.size, actual.size);
 								            try std.testing.expectEqual(expected.file_type, actual.file_type);
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								            try std.testing.expectEqual(expected.mode, actual.mode);
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								            try std.testing.expectEqualStrings(expected.link_name, actual.link_name);
 								            if (case.chksums.len > i) {
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								                var md5writer = Md5Writer{};
 								                try actual.write(&md5writer);
 								                const chksum = md5writer.chksum();
 								                try std.testing.expectEqualStrings(case.chksums[i], &chksum);
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								            } else {
-												tar: add parsing size in gnu extended format

Reference:
https://www.gnu.org/software/tar/manual/html_node/Extensions.html#Extensions

If the leading byte is 0x80 (128), the non-leading bytes of the field
are concatenated in big-endian order, with the result being a positive
number expressed in binary form.

											
										
										
											2023-11-29 14:28:38 +00:00
+								                if (!expected.truncated) try actual.skip(); // skip file content
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								            }
 								        }
 								        try std.testing.expectEqual(case.files.len, i);
 								    }
 								}
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
 								// used in test to calculate file chksum
 								const Md5Writer = struct {
 								    h: std.crypto.hash.Md5 = std.crypto.hash.Md5.init(.{}),
 								    pub fn writeAll(self: *Md5Writer, buf: []const u8) !void {
 								        self.h.update(buf);
 								    }
 								    pub fn chksum(self: *Md5Writer) [32]u8 {
 								        var s = [_]u8{0} ** 16;
 								        self.h.final(&s);
 								        return std.fmt.bytesToHex(s, .lower);
 								    }
 								};