zig/lib/std/tar.zig

/// Tar archive is single ordinary file which can contain many files (or
/// directories, symlinks, ...). It's build by series of blocks each size of 512
/// bytes. First block of each entry is header which defines type, name, size
/// permissions and other attributes. Header is followed by series of blocks of
/// file content, if any that entry has content. Content is padded to the block
/// size, so next header always starts at block boundary.
///
/// This simple format is extended by GNU and POSIX pax extensions to support
/// file names longer than 256 bytes and additional attributes.
///
/// This is not comprehensive tar parser. Here we are only file types needed to
/// support Zig package manager; normal file, directory, symbolic link. And
/// subset of attributes: name, size, permissions.
///
/// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
/// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
///
const std = @import("std.zig");
const assert = std.debug.assert;

pub const Options = struct {
    /// Number of directory levels to skip when extracting files.
    strip_components: u32 = 0,
    /// How to handle the "mode" property of files from within the tar file.
    mode_mode: ModeMode = .executable_bit_only,
    /// Prevents creation of empty directories.
    exclude_empty_directories: bool = false,
    /// Provide this to receive detailed error messages.
    /// When this is provided, some errors which would otherwise be returned immediately
    /// will instead be added to this structure. The API user must check the errors
    /// in diagnostics to know whether the operation succeeded or failed.
    diagnostics: ?*Diagnostics = null,

    pub const ModeMode = enum {
        /// The mode from the tar file is completely ignored. Files are created
        /// with the default mode when creating files.
        ignore,
        /// The mode from the tar file is inspected for the owner executable bit
        /// only. This bit is copied to the group and other executable bits.
        /// Other bits of the mode are left as the default when creating files.
        executable_bit_only,
    };

    pub const Diagnostics = struct {
        allocator: std.mem.Allocator,
        errors: std.ArrayListUnmanaged(Error) = .{},

        pub const Error = union(enum) {
            unable_to_create_sym_link: struct {
                code: anyerror,
                file_name: []const u8,
                link_name: []const u8,
            },
            unable_to_create_file: struct {
                code: anyerror,
                file_name: []const u8,
            },
            unsupported_file_type: struct {
                file_name: []const u8,
                file_type: Header.Kind,
            },
        };

        pub fn deinit(d: *Diagnostics) void {
            for (d.errors.items) |item| {
                switch (item) {
                    .unable_to_create_sym_link => |info| {
                        d.allocator.free(info.file_name);
                        d.allocator.free(info.link_name);
                    },
                    .unable_to_create_file => |info| {
                        d.allocator.free(info.file_name);
                    },
                    .unsupported_file_type => |info| {
                        d.allocator.free(info.file_name);
                    },
                }
            }
            d.errors.deinit(d.allocator);
            d.* = undefined;
        }
    };
};

pub const Header = struct {
    const SIZE = 512;
    const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
    const LINK_NAME_SIZE = 100;

    bytes: *const [SIZE]u8,

    pub const Kind = enum(u8) {
        normal_alias = 0,
        normal = '0',
        hard_link = '1',
        symbolic_link = '2',
        character_special = '3',
        block_special = '4',
        directory = '5',
        fifo = '6',
        contiguous = '7',
        global_extended_header = 'g',
        extended_header = 'x',
        // Types 'L' and 'K' are used by the GNU format for a meta file
        // used to store the path or link name for the next file.
        gnu_long_name = 'L',
        gnu_long_link = 'K',
        gnu_sparse = 'S',
        solaris_extended_header = 'X',
        _,
    };

    /// Includes prefix concatenated, if any.
    /// TODO: check against "../" and other nefarious things
    pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 {
        const n = name(header);
        const p = prefix(header);
        if (!is_ustar(header) or p.len == 0) {
            @memcpy(buffer[0..n.len], n);
            return buffer[0..n.len];
        }
        @memcpy(buffer[0..p.len], p);
        buffer[p.len] = '/';
        @memcpy(buffer[p.len + 1 ..][0..n.len], n);
        return buffer[0 .. p.len + 1 + n.len];
    }

    pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 {
        const link_name = header.str(157, 100);
        if (link_name.len == 0) {
            return buffer[0..0];
        }
        const buf = buffer[0..link_name.len];
        @memcpy(buf, link_name);
        return buf;
    }

    pub fn name(header: Header) []const u8 {
        return header.str(0, 100);
    }

    pub fn mode(header: Header) !u32 {
        return @intCast(try header.octal(100, 8));
    }

    pub fn size(header: Header) !u64 {
        const start = 124;
        const len = 12;
        const raw = header.bytes[start..][0..len];
        //  If the leading byte is 0xff (255), all the bytes of the field
        //  (including the leading byte) are concatenated in big-endian order,
        //  with the result being a negative number expressed in two’s
        //  complement form.
        if (raw[0] == 0xff) return error.TarNumericValueNegative;
        // If the leading byte is 0x80 (128), the non-leading bytes of the
        // field are concatenated in big-endian order.
        if (raw[0] == 0x80) {
            if (raw[1] != 0 or raw[2] != 0 or raw[3] != 0) return error.TarNumericValueTooBig;
            return std.mem.readInt(u64, raw[4..12], .big);
        }
        return try header.octal(start, len);
    }

    pub fn chksum(header: Header) !u64 {
        return header.octal(148, 8);
    }

    pub fn is_ustar(header: Header) bool {
        const magic = header.bytes[257..][0..6];
        return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
    }

    pub fn prefix(header: Header) []const u8 {
        return header.str(345, 155);
    }

    pub fn kind(header: Header) Kind {
        const result: Kind = @enumFromInt(header.bytes[156]);
        if (result == .normal_alias) return .normal;
        return result;
    }

    fn str(header: Header, start: usize, len: usize) []const u8 {
        return nullStr(header.bytes[start .. start + len]);
    }

    fn octal(header: Header, start: usize, len: usize) !u64 {
        const raw = header.bytes[start..][0..len];
        // Zero-filled octal number in ASCII. Each numeric field of width w
        // contains w minus 1 digits, and a null
        const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
        const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
        if (rtrimmed.len == 0) return 0;
        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
    }

    const Chksums = struct {
        unsigned: u64,
        signed: i64,
    };

    // Sum of all bytes in the header block. The chksum field is treated as if
    // it were filled with spaces (ASCII 32).
    fn computeChksum(header: Header) Chksums {
        var cs: Chksums = .{ .signed = 0, .unsigned = 0 };
        for (header.bytes, 0..) |v, i| {
            const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces.
            cs.unsigned += b;
            cs.signed += @as(i8, @bitCast(b));
        }
        return cs;
    }

    // Checks calculated chksum with value of chksum field.
    // Returns error or valid chksum value.
    // Zero value indicates empty block.
    pub fn checkChksum(header: Header) !u64 {
        const field = try header.chksum();
        const cs = header.computeChksum();
        if (field == 0 and cs.unsigned == 256) return 0;
        if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum;
        return field;
    }
};

// Breaks string on first null character.
fn nullStr(str: []const u8) []const u8 {
    for (str, 0..) |c, i| {
        if (c == 0) return str[0..i];
    }
    return str;
}

/// Iterates over files in tar archive.
/// `next` returns each file in `reader` tar archive.
pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) {
    return .{
        .reader = reader,
        .diagnostics = diagnostics,
    };
}

fn Iterator(comptime ReaderType: type) type {
    return struct {
        reader: ReaderType,
        diagnostics: ?*Options.Diagnostics,

        // buffers for heeader and file attributes
        header_buffer: [Header.SIZE]u8 = undefined,
        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,

        // bytes of padding to the end of the block
        padding: usize = 0,
        // current tar file
        file: File = undefined,

        pub const File = struct {
            name: []const u8, // name of file, symlink or directory
            link_name: []const u8, // target name of symlink
            size: u64, // size of the file in bytes
            mode: u32,
            kind: Header.Kind,

            reader: ReaderType,

            // Writes file content to writer.
            pub fn write(self: File, writer: anytype) !void {
                var buffer: [4096]u8 = undefined;

                var n: u64 = 0;
                while (n < self.size) {
                    const buf = buffer[0..@min(buffer.len, self.size - n)];
                    try self.reader.readNoEof(buf);
                    try writer.writeAll(buf);
                    n += buf.len;
                }
            }

            // Skips file content. Advances reader.
            pub fn skip(self: File) !void {
                try self.reader.skipBytes(self.size, .{});
            }
        };

        const Self = @This();

        fn readHeader(self: *Self) !?Header {
            if (self.padding > 0) {
                try self.reader.skipBytes(self.padding, .{});
            }
            const n = try self.reader.readAll(&self.header_buffer);
            if (n == 0) return null;
            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
            if (try header.checkChksum() == 0) return null;
            return header;
        }

        fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
            if (size > buffer.len) return error.TarCorruptInput;
            const buf = buffer[0..size];
            try self.reader.readNoEof(buf);
            return nullStr(buf);
        }

        fn initFile(self: *Self) void {
            self.file = File{
                .name = self.file_name_buffer[0..0],
                .link_name = self.link_name_buffer[0..0],
                .size = 0,
                .kind = .normal,
                .mode = 0,
                .reader = self.reader,
            };
        }

        // Number of padding bytes in the last file block.
        fn blockPadding(size: u64) usize {
            const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
            return @intCast(block_rounded - size);
        }

        /// Iterates through the tar archive as if it is a series of files.
        /// Internally, the tar format often uses entries (header with optional
        /// content) to add meta data that describes the next file. These
        /// entries should not normally be visible to the outside. As such, this
        /// loop iterates through one or more entries until it collects a all
        /// file attributes.
        pub fn next(self: *Self) !?File {
            self.initFile();

            while (try self.readHeader()) |header| {
                const kind = header.kind();
                const size: u64 = try header.size();
                self.padding = blockPadding(size);

                switch (kind) {
                    // File types to retrun upstream
                    .directory, .normal, .symbolic_link => {
                        self.file.kind = kind;
                        self.file.mode = try header.mode();

                        // set file attributes if not already set by prefix/extended headers
                        if (self.file.size == 0) {
                            self.file.size = size;
                        }
                        if (self.file.link_name.len == 0) {
                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
                        }
                        if (self.file.name.len == 0) {
                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
                        }

                        self.padding = blockPadding(self.file.size);
                        return self.file;
                    },
                    // Prefix header types
                    .gnu_long_name => {
                        self.file.name = try self.readString(@intCast(size), &self.file_name_buffer);
                    },
                    .gnu_long_link => {
                        self.file.link_name = try self.readString(@intCast(size), &self.link_name_buffer);
                    },
                    .extended_header => {
                        // Use just attributes from last extended header.
                        self.initFile();

                        var rdr = paxIterator(self.reader, @intCast(size));
                        while (try rdr.next()) |attr| {
                            switch (attr.kind) {
                                .path => {
                                    self.file.name = try attr.value(&self.file_name_buffer);
                                },
                                .linkpath => {
                                    self.file.link_name = try attr.value(&self.link_name_buffer);
                                },
                                .size => {
                                    var buf: [64]u8 = undefined;
                                    self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
                                },
                            }
                        }
                    },
                    // Ignored header type
                    .global_extended_header => {
                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
                    },
                    // All other are unsupported header types
                    else => {
                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
                            .file_name = try d.allocator.dupe(u8, header.name()),
                            .file_type = kind,
                        } });
                        if (kind == .gnu_sparse) {
                            try self.skipGnuSparseExtendedHeaders(header);
                        }
                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
                    },
                }
            }
            return null;
        }

        fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void {
            var is_extended = header.bytes[482] > 0;
            while (is_extended) {
                var buf: [Header.SIZE]u8 = undefined;
                const n = try self.reader.readAll(&buf);
                if (n < Header.SIZE) return error.UnexpectedEndOfStream;
                is_extended = buf[504] > 0;
            }
        }
    };
}

/// Pax attributes iterator.
/// Size is length of pax extended header in reader.
fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) {
    return PaxIterator(@TypeOf(reader)){
        .reader = reader,
        .size = size,
    };
}

const PaxAttributeKind = enum {
    path,
    linkpath,
    size,
};

fn PaxIterator(comptime ReaderType: type) type {
    return struct {
        size: usize, // cumulative size of all pax attributes
        reader: ReaderType,
        // scratch buffer used for reading attribute length and keyword
        scratch: [128]u8 = undefined,

        const Self = @This();

        const Attribute = struct {
            kind: PaxAttributeKind,
            len: usize, // length of the attribute value
            reader: ReaderType, // reader positioned at value start

            // Copies pax attribute value into destination buffer.
            // Must be called with destination buffer of size at least Attribute.len.
            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
                assert(self.len <= dst.len);
                const buf = dst[0..self.len];
                const n = try self.reader.readAll(buf);
                if (n < self.len) return error.UnexpectedEndOfStream;
                try validateAttributeEnding(self.reader);
                if (hasNull(buf)) return error.PaxNullInValue;
                return buf;
            }
        };

        // Iterates over pax attributes. Returns known only known attributes.
        // Caller has to call value in Attribute, to advance reader across value.
        pub fn next(self: *Self) !?Attribute {
            // Pax extended header consists of one or more attributes, each constructed as follows:
            // "%d %s=%s\n", <length>, <keyword>, <value>
            while (self.size > 0) {
                const length_buf = try self.readUntil(' ');
                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes

                const keyword = try self.readUntil('=');
                if (hasNull(keyword)) return error.PaxNullInKeyword;

                // calculate value_len
                const value_start = length_buf.len + keyword.len + 2; // 2 separators
                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
                const value_len = length - value_start - 1; // \n separator at end
                self.size -= length;

                const kind: PaxAttributeKind = if (eql(keyword, "path"))
                    .path
                else if (eql(keyword, "linkpath"))
                    .linkpath
                else if (eql(keyword, "size"))
                    .size
                else {
                    try self.reader.skipBytes(value_len, .{});
                    try validateAttributeEnding(self.reader);
                    continue;
                };
                return Attribute{
                    .kind = kind,
                    .len = value_len,
                    .reader = self.reader,
                };
            }

            return null;
        }

        fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
            var fbs = std.io.fixedBufferStream(&self.scratch);
            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
            return fbs.getWritten();
        }

        fn eql(a: []const u8, b: []const u8) bool {
            return std.mem.eql(u8, a, b);
        }

        fn hasNull(str: []const u8) bool {
            return (std.mem.indexOfScalar(u8, str, 0)) != null;
        }

        // Checks that each record ends with new line.
        fn validateAttributeEnding(reader: ReaderType) !void {
            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
        }
    };
}

pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
    switch (options.mode_mode) {
        .ignore => {},
        .executable_bit_only => {
            // This code does not look at the mode bits yet. To implement this feature,
            // the implementation must be adjusted to look at the mode, and check the
            // user executable bit, then call fchmod on newly created files when
            // the executable bit is supposed to be set.
            // It also needs to properly deal with ACLs on Windows.
            @panic("TODO: unimplemented: tar ModeMode.executable_bit_only");
        },
    }

    var iter = iterator(reader, options.diagnostics);
    while (try iter.next()) |file| {
        switch (file.kind) {
            .directory => {
                const file_name = stripComponents(file.name, options.strip_components);
                if (file_name.len != 0 and !options.exclude_empty_directories) {
                    try dir.makePath(file_name);
                }
            },
            .normal => {
                if (file.size == 0 and file.name.len == 0) return;
                const file_name = stripComponents(file.name, options.strip_components);
                if (file_name.len == 0) return error.BadFileName;

                if (createDirAndFile(dir, file_name)) |fs_file| {
                    defer fs_file.close();
                    try file.write(fs_file);
                } else |err| {
                    const d = options.diagnostics orelse return err;
                    try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
                        .code = err,
                        .file_name = try d.allocator.dupe(u8, file_name),
                    } });
                    try file.skip();
                }
            },
            .symbolic_link => {
                // The file system path of the symbolic link.
                const file_name = stripComponents(file.name, options.strip_components);
                if (file_name.len == 0) return error.BadFileName;
                // The data inside the symbolic link.
                const link_name = file.link_name;

                createDirAndSymlink(dir, link_name, file_name) catch |err| {
                    const d = options.diagnostics orelse return err;
                    try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
                        .code = err,
                        .file_name = try d.allocator.dupe(u8, file_name),
                        .link_name = try d.allocator.dupe(u8, link_name),
                    } });
                };
            },
            else => unreachable,
        }
    }
}

fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File {
    const fs_file = dir.createFile(file_name, .{ .exclusive = true }) catch |err| {
        if (err == error.FileNotFound) {
            if (std.fs.path.dirname(file_name)) |dir_name| {
                try dir.makePath(dir_name);
                return try dir.createFile(file_name, .{ .exclusive = true });
            }
        }
        return err;
    };
    return fs_file;
}

fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
    dir.symLink(link_name, file_name, .{}) catch |err| {
        if (err == error.FileNotFound) {
            if (std.fs.path.dirname(file_name)) |dir_name| {
                try dir.makePath(dir_name);
                try dir.symLink(link_name, file_name, .{});
            }
        }
    };
}

fn stripComponents(path: []const u8, count: u32) []const u8 {
    var i: usize = 0;
    var c = count;
    while (c > 0) : (c -= 1) {
        if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
            i = pos + 1;
        } else {
            i = path.len;
            break;
        }
    }
    return path[i..];
}

test "tar stripComponents" {
    const expectEqualStrings = std.testing.expectEqualStrings;
    try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0));
    try expectEqualStrings("b/c", stripComponents("a/b/c", 1));
    try expectEqualStrings("c", stripComponents("a/b/c", 2));
    try expectEqualStrings("", stripComponents("a/b/c", 3));
    try expectEqualStrings("", stripComponents("a/b/c", 4));
}

test "tar PaxIterator" {
    const Attr = struct {
        kind: PaxAttributeKind,
        value: []const u8 = undefined,
        err: ?anyerror = null,
    };
    const cases = [_]struct {
        data: []const u8,
        attrs: []const Attr,
        err: ?anyerror = null,
    }{
        .{ // valid but unknown keys
            .data =
            \\30 mtime=1350244992.023960108
            \\6 k=1
            \\13 key1=val1
            \\10 a=name
            \\9 a=name
            \\
            ,
            .attrs = &[_]Attr{},
        },
        .{ // mix of known and unknown keys
            .data =
            \\6 k=1
            \\13 path=name
            \\17 linkpath=link
            \\13 key1=val1
            \\12 size=123
            \\13 key2=val2
            \\
            ,
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
                .{ .kind = .linkpath, .value = "link" },
                .{ .kind = .size, .value = "123" },
            },
        },
        .{ // too short size of the second key-value pair
            .data =
            \\13 path=name
            \\10 linkpath=value
            \\
            ,
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
            },
            .err = error.UnexpectedEndOfStream,
        },
        .{ // too long size of the second key-value pair
            .data =
            \\13 path=name
            \\6 k=1
            \\19 linkpath=value
            \\
            ,
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
            },
            .err = error.UnexpectedEndOfStream,
        },

        .{ // too long size of the second key-value pair
            .data =
            \\13 path=name
            \\19 linkpath=value
            \\6 k=1
            \\
            ,
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
                .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
            },
        },
        .{ // null in keyword is not valid
            .data = "13 path=name\n" ++ "7 k\x00b=1\n",
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "name" },
            },
            .err = error.PaxNullInKeyword,
        },
        .{ // null in value is not valid
            .data = "23 path=name\x00with null\n",
            .attrs = &[_]Attr{
                .{ .kind = .path, .err = error.PaxNullInValue },
            },
        },
        .{ // 1000 characters path
            .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
            .attrs = &[_]Attr{
                .{ .kind = .path, .value = "0123456789" ** 100 },
            },
        },
    };
    var buffer: [1024]u8 = undefined;

    outer: for (cases) |case| {
        var stream = std.io.fixedBufferStream(case.data);
        var iter = paxIterator(stream.reader(), case.data.len);

        var i: usize = 0;
        while (iter.next() catch |err| {
            if (case.err) |e| {
                try std.testing.expectEqual(e, err);
                continue;
            }
            return err;
        }) |attr| : (i += 1) {
            const exp = case.attrs[i];
            try std.testing.expectEqual(exp.kind, attr.kind);
            const value = attr.value(&buffer) catch |err| {
                if (exp.err) |e| {
                    try std.testing.expectEqual(e, err);
                    break :outer;
                }
                return err;
            };
            try std.testing.expectEqualStrings(exp.value, value);
        }
        try std.testing.expectEqual(case.attrs.len, i);
        try std.testing.expect(case.err == null);
    }
}

test {
    _ = @import("tar/test.zig");
}

test "tar header parse size" {
    const cases = [_]struct {
        in: []const u8,
        want: u64 = 0,
        err: ?anyerror = null,
    }{
        // Test base-256 (binary) encoded values.
        .{ .in = "", .want = 0 },
        .{ .in = "\x80", .want = 0 },
        .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", .want = 1 },
        .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02", .want = 0x0102 },
        .{ .in = "\x80\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08", .want = 0x0102030405060708 },
        .{ .in = "\x80\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09", .err = error.TarNumericValueTooBig },
        .{ .in = "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", .want = 537795476381659745 },
        .{ .in = "\x80\x80\x80\x00\x01\x02\x03\x04\x05\x06\x07\x08", .err = error.TarNumericValueTooBig },

        // // Test base-8 (octal) encoded values.
        .{ .in = "00000000227\x00", .want = 0o227 },
        .{ .in = "  000000227\x00", .want = 0o227 },
        .{ .in = "00000000228\x00", .err = error.TarHeader },
        .{ .in = "11111111111\x00", .want = 0o11111111111 },
    };

    for (cases) |case| {
        var bytes = [_]u8{0} ** Header.SIZE;
        @memcpy(bytes[124 .. 124 + case.in.len], case.in);
        var header = Header{ .bytes = &bytes };
        if (case.err) |err| {
            try std.testing.expectError(err, header.size());
        } else {
            try std.testing.expectEqual(case.want, try header.size());
        }
    }
}

test "tar header parse mode" {
    const cases = [_]struct {
        in: []const u8,
        want: u64 = 0,
        err: ?anyerror = null,
    }{
        .{ .in = "0000644\x00", .want = 0o644 },
        .{ .in = "0000777\x00", .want = 0o777 },
        .{ .in = "7777777\x00", .want = 0o7777777 },
        .{ .in = "7777778\x00", .err = error.TarHeader },
        .{ .in = "77777777", .want = 0o77777777 },
        .{ .in = "777777777777", .want = 0o77777777 },
    };
    for (cases) |case| {
        var bytes = [_]u8{0} ** Header.SIZE;
        @memcpy(bytes[100 .. 100 + case.in.len], case.in);
        var header = Header{ .bytes = &bytes };
        if (case.err) |err| {
            try std.testing.expectError(err, header.mode());
        } else {
            try std.testing.expectEqual(case.want, try header.mode());
        }
    }
}
-												tar: add module comment and references

											
										
										
											2023-12-06 14:35:29 +00:00
+								/// Tar archive is single ordinary file which can contain many files (or
 								/// directories, symlinks, ...). It's build by series of blocks each size of 512
 								/// bytes. First block of each entry is header which defines type, name, size
 								/// permissions and other attributes. Header is followed by series of blocks of
 								/// file content, if any that entry has content. Content is padded to the block
 								/// size, so next header always starts at block boundary.
 								///
 								/// This simple format is extended by GNU and POSIX pax extensions to support
 								/// file names longer than 256 bytes and additional attributes.
 								///
 								/// This is not comprehensive tar parser. Here we are only file types needed to
 								/// support Zig package manager; normal file, directory, symbolic link. And
 								/// subset of attributes: name, size, permissions.
 								///
 								/// GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
 								/// pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								///
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								const std = @import("std.zig");
-												tar: fix import path

											
										
										
											2023-11-29 14:31:22 +00:00
+								const assert = std.debug.assert;
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								pub const Options = struct {
 								    /// Number of directory levels to skip when extracting files.
 								    strip_components: u32 = 0,
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								    /// How to handle the "mode" property of files from within the tar file.
 								    mode_mode: ModeMode = .executable_bit_only,
-												std.tar: add option for omitting empty directories

											
										
										
											2023-10-04 06:25:04 +00:00
+								    /// Prevents creation of empty directories.
 								    exclude_empty_directories: bool = false,
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    /// Provide this to receive detailed error messages.
 								    /// When this is provided, some errors which would otherwise be returned immediately
 								    /// will instead be added to this structure. The API user must check the errors
 								    /// in diagnostics to know whether the operation succeeded or failed.
 								    diagnostics: ?*Diagnostics = null,
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    pub const ModeMode = enum {
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								        /// The mode from the tar file is completely ignored. Files are created
 								        /// with the default mode when creating files.
 								        ignore,
 								        /// The mode from the tar file is inspected for the owner executable bit
 								        /// only. This bit is copied to the group and other executable bits.
 								        /// Other bits of the mode are left as the default when creating files.
 								        executable_bit_only,
 								    };
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
 								    pub const Diagnostics = struct {
 								        allocator: std.mem.Allocator,
 								        errors: std.ArrayListUnmanaged(Error) = .{},
 								        pub const Error = union(enum) {
 								            unable_to_create_sym_link: struct {
 								                code: anyerror,
 								                file_name: []const u8,
 								                link_name: []const u8,
 								            },
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								            unable_to_create_file: struct {
 								                code: anyerror,
 								                file_name: []const u8,
 								            },
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								            unsupported_file_type: struct {
 								                file_name: []const u8,
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								                file_type: Header.Kind,
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								            },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								        };
 								        pub fn deinit(d: *Diagnostics) void {
 								            for (d.errors.items) |item| {
 								                switch (item) {
 								                    .unable_to_create_sym_link => |info| {
 								                        d.allocator.free(info.file_name);
 								                        d.allocator.free(info.link_name);
 								                    },
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                    .unable_to_create_file => |info| {
 								                        d.allocator.free(info.file_name);
 								                    },
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                    .unsupported_file_type => |info| {
 								                        d.allocator.free(info.file_name);
 								                    },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								                }
 								            }
 								            d.errors.deinit(d.allocator);
 								            d.* = undefined;
 								        }
 								    };
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								};
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
 								pub const Header = struct {
-												tar: remove stratch from  tar reader

Use explicit buffers for name, link_name instead.
It is cleaner that way.

											
										
										
											2023-12-11 19:18:59 +00:00
+								    const SIZE = 512;
 								    const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
 								    const LINK_NAME_SIZE = 100;
 								    bytes: *const [SIZE]u8,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								    pub const Kind = enum(u8) {
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								        normal_alias = 0,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        normal = '0',
 								        hard_link = '1',
 								        symbolic_link = '2',
 								        character_special = '3',
 								        block_special = '4',
 								        directory = '5',
 								        fifo = '6',
 								        contiguous = '7',
 								        global_extended_header = 'g',
 								        extended_header = 'x',
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        // Types 'L' and 'K' are used by the GNU format for a meta file
 								        // used to store the path or link name for the next file.
 								        gnu_long_name = 'L',
 								        gnu_long_link = 'K',
-												tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic
reporting.
Added computing checksum by using both unsigned and signed header bytes
values.
Added skipping gnu exteneded sparse headers while reporting unsupported
header in diagnostic.

Note on testing:

wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O
/tmp/testtar.tar

```
test "Python testtar.tar file" {
    const file_name = "testtar.tar";

    var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{});
    defer file.close();

    var diag = Options.Diagnostics{ .allocator = std.testing.allocator };
    defer diag.deinit();

    var iter = iterator(file.reader(), &diag);
    while (try iter.next()) |f| {
        std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size });
        try f.skip();
    }
    for (diag.errors.items) |e| {
        switch (e) {
            .unsupported_file_type => |u| {
                std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name });
            },
            else => unreachable,
        }
    }
}
```

											
										
										
											2023-12-18 20:39:07 +00:00
+								        gnu_sparse = 'S',
 								        solaris_extended_header = 'X',
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        _,
 								    };
 								    /// Includes prefix concatenated, if any.
 								    /// TODO: check against "../" and other nefarious things
-												tar: remove stratch from  tar reader

Use explicit buffers for name, link_name instead.
It is cleaner that way.

											
										
										
											2023-12-11 19:18:59 +00:00
+								    pub fn fullName(header: Header, buffer: *[MAX_NAME_SIZE]u8) ![]const u8 {
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        const n = name(header);
 								        const p = prefix(header);
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								        if (!is_ustar(header) or p.len == 0) {
 								            @memcpy(buffer[0..n.len], n);
 								            return buffer[0..n.len];
 								        }
-												std: update to use `@memcpy` directly

											
										
										
											2023-04-27 22:16:01 +00:00
+								        @memcpy(buffer[0..p.len], p);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        buffer[p.len] = '/';
-												std: update to use `@memcpy` directly

											
										
										
											2023-04-27 22:16:01 +00:00
+								        @memcpy(buffer[p.len + 1 ..][0..n.len], n);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        return buffer[0 .. p.len + 1 + n.len];
 								    }
-												tar: remove stratch from  tar reader

Use explicit buffers for name, link_name instead.
It is cleaner that way.

											
										
										
											2023-12-11 19:18:59 +00:00
+								    pub fn linkName(header: Header, buffer: *[LINK_NAME_SIZE]u8) []const u8 {
 								        const link_name = header.str(157, 100);
 								        if (link_name.len == 0) {
 								            return buffer[0..0];
 								        }
 								        const buf = buffer[0..link_name.len];
 								        @memcpy(buf, link_name);
 								        return buf;
 								    }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    pub fn name(header: Header) []const u8 {
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return header.str(0, 100);
 								    }
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								    pub fn mode(header: Header) !u32 {
-												std.tar fix parsing mode field in tar header

Found by fuzzing. Previous numeric function assumed that is is getting
buffer of size 12, mode is size 8. Fuzzing found overflow.
Fixing and adding test cases.

											
										
										
											2024-02-23 20:57:15 +00:00
+								        return @intCast(try header.octal(100, 8));
-												tar: add file mode to result of tarbal iteration

So we have information to set executable bit on write to file system.

											
										
										
											2023-12-02 14:00:42 +00:00
+								    }
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								    pub fn size(header: Header) !u64 {
-												std.tar fix parsing mode field in tar header

Found by fuzzing. Previous numeric function assumed that is is getting
buffer of size 12, mode is size 8. Fuzzing found overflow.
Fixing and adding test cases.

											
										
										
											2024-02-23 20:57:15 +00:00
+								        const start = 124;
 								        const len = 12;
 								        const raw = header.bytes[start..][0..len];
 								        //  If the leading byte is 0xff (255), all the bytes of the field
 								        //  (including the leading byte) are concatenated in big-endian order,
 								        //  with the result being a negative number expressed in two’s
 								        //  complement form.
 								        if (raw[0] == 0xff) return error.TarNumericValueNegative;
 								        // If the leading byte is 0x80 (128), the non-leading bytes of the
 								        // field are concatenated in big-endian order.
 								        if (raw[0] == 0x80) {
-												std.tar fix integer overflow in header size parse

Found by fuzzing. Fixing code and adding test.

											
										
										
											2024-02-23 20:57:40 +00:00
+								            if (raw[1] != 0 or raw[2] != 0 or raw[3] != 0) return error.TarNumericValueTooBig;
-												std.tar fix parsing mode field in tar header

Found by fuzzing. Previous numeric function assumed that is is getting
buffer of size 12, mode is size 8. Fuzzing found overflow.
Fixing and adding test cases.

											
										
										
											2024-02-23 20:57:15 +00:00
+								            return std.mem.readInt(u64, raw[4..12], .big);
 								        }
 								        return try header.octal(start, len);
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    }
 								    pub fn chksum(header: Header) !u64 {
 								        return header.octal(148, 8);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    pub fn is_ustar(header: Header) bool {
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        const magic = header.bytes[257..][0..6];
 								        return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								    }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    pub fn prefix(header: Header) []const u8 {
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return header.str(345, 155);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								    pub fn kind(header: Header) Kind {
 								        const result: Kind = @enumFromInt(header.bytes[156]);
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								        if (result == .normal_alias) return .normal;
 								        return result;
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    fn str(header: Header, start: usize, len: usize) []const u8 {
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								        return nullStr(header.bytes[start .. start + len]);
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								    }
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    fn octal(header: Header, start: usize, len: usize) !u64 {
 								        const raw = header.bytes[start..][0..len];
 								        // Zero-filled octal number in ASCII. Each numeric field of width w
 								        // contains w minus 1 digits, and a null
 								        const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
 								        const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
 								        if (rtrimmed.len == 0) return 0;
-												tar: handle pax null attrs and pax attr ending

											
										
										
											2023-11-29 19:30:08 +00:00
+								        return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    }
-												tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic
reporting.
Added computing checksum by using both unsigned and signed header bytes
values.
Added skipping gnu exteneded sparse headers while reporting unsupported
header in diagnostic.

Note on testing:

wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O
/tmp/testtar.tar

```
test "Python testtar.tar file" {
    const file_name = "testtar.tar";

    var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{});
    defer file.close();

    var diag = Options.Diagnostics{ .allocator = std.testing.allocator };
    defer diag.deinit();

    var iter = iterator(file.reader(), &diag);
    while (try iter.next()) |f| {
        std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size });
        try f.skip();
    }
    for (diag.errors.items) |e| {
        switch (e) {
            .unsupported_file_type => |u| {
                std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name });
            },
            else => unreachable,
        }
    }
}
```

											
										
										
											2023-12-18 20:39:07 +00:00
+								    const Chksums = struct {
 								        unsigned: u64,
 								        signed: i64,
 								    };
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    // Sum of all bytes in the header block. The chksum field is treated as if
 								    // it were filled with spaces (ASCII 32).
-												tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic
reporting.
Added computing checksum by using both unsigned and signed header bytes
values.
Added skipping gnu exteneded sparse headers while reporting unsupported
header in diagnostic.

Note on testing:

wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O
/tmp/testtar.tar

```
test "Python testtar.tar file" {
    const file_name = "testtar.tar";

    var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{});
    defer file.close();

    var diag = Options.Diagnostics{ .allocator = std.testing.allocator };
    defer diag.deinit();

    var iter = iterator(file.reader(), &diag);
    while (try iter.next()) |f| {
        std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size });
        try f.skip();
    }
    for (diag.errors.items) |e| {
        switch (e) {
            .unsupported_file_type => |u| {
                std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name });
            },
            else => unreachable,
        }
    }
}
```

											
										
										
											2023-12-18 20:39:07 +00:00
+								    fn computeChksum(header: Header) Chksums {
 								        var cs: Chksums = .{ .signed = 0, .unsigned = 0 };
 								        for (header.bytes, 0..) |v, i| {
 								            const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces.
 								            cs.unsigned += b;
 								            cs.signed += @as(i8, @bitCast(b));
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        }
-												tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic
reporting.
Added computing checksum by using both unsigned and signed header bytes
values.
Added skipping gnu exteneded sparse headers while reporting unsupported
header in diagnostic.

Note on testing:

wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O
/tmp/testtar.tar

```
test "Python testtar.tar file" {
    const file_name = "testtar.tar";

    var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{});
    defer file.close();

    var diag = Options.Diagnostics{ .allocator = std.testing.allocator };
    defer diag.deinit();

    var iter = iterator(file.reader(), &diag);
    while (try iter.next()) |f| {
        std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size });
        try f.skip();
    }
    for (diag.errors.items) |e| {
        switch (e) {
            .unsupported_file_type => |u| {
                std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name });
            },
            else => unreachable,
        }
    }
}
```

											
										
										
											2023-12-18 20:39:07 +00:00
+								        return cs;
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    }
 								    // Checks calculated chksum with value of chksum field.
-												tar: use scratch buffer for file names

That makes names strings stable during the iteration. Otherwise string
buffers can be overwritten while reading file content.

											
										
										
											2023-12-01 17:26:31 +00:00
+								    // Returns error or valid chksum value.
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								    // Zero value indicates empty block.
 								    pub fn checkChksum(header: Header) !u64 {
 								        const field = try header.chksum();
-												tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic
reporting.
Added computing checksum by using both unsigned and signed header bytes
values.
Added skipping gnu exteneded sparse headers while reporting unsupported
header in diagnostic.

Note on testing:

wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O
/tmp/testtar.tar

```
test "Python testtar.tar file" {
    const file_name = "testtar.tar";

    var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{});
    defer file.close();

    var diag = Options.Diagnostics{ .allocator = std.testing.allocator };
    defer diag.deinit();

    var iter = iterator(file.reader(), &diag);
    while (try iter.next()) |f| {
        std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size });
        try f.skip();
    }
    for (diag.errors.items) |e| {
        switch (e) {
            .unsupported_file_type => |u| {
                std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name });
            },
            else => unreachable,
        }
    }
}
```

											
										
										
											2023-12-18 20:39:07 +00:00
+								        const cs = header.computeChksum();
 								        if (field == 0 and cs.unsigned == 256) return 0;
 								        if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum;
-												tar: add header chksum checking

											
										
										
											2023-11-29 16:17:20 +00:00
+								        return field;
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								};
-												tar: add module comment and references

											
										
										
											2023-12-06 14:35:29 +00:00
+								// Breaks string on first null character.
-												tar: add gnu path and link extensions handling

											
										
										
											2023-11-29 20:37:13 +00:00
+								fn nullStr(str: []const u8) []const u8 {
 								    for (str, 0..) |c, i| {
 								        if (c == 0) return str[0..i];
 								    }
 								    return str;
 								}
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								/// Iterates over files in tar archive.
 								/// `next` returns each file in `reader` tar archive.
 								pub fn iterator(reader: anytype, diagnostics: ?*Options.Diagnostics) Iterator(@TypeOf(reader)) {
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								    return .{
 								        .reader = reader,
 								        .diagnostics = diagnostics,
 								    };
 								}
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								fn Iterator(comptime ReaderType: type) type {
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								    return struct {
 								        reader: ReaderType,
 								        diagnostics: ?*Options.Diagnostics,
 								        // buffers for heeader and file attributes
 								        header_buffer: [Header.SIZE]u8 = undefined,
 								        file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
 								        link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined,
 								        // bytes of padding to the end of the block
 								        padding: usize = 0,
 								        // current tar file
 								        file: File = undefined,
 								        pub const File = struct {
 								            name: []const u8, // name of file, symlink or directory
 								            link_name: []const u8, // target name of symlink
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								            size: u64, // size of the file in bytes
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								            mode: u32,
 								            kind: Header.Kind,
 								            reader: ReaderType,
 								            // Writes file content to writer.
 								            pub fn write(self: File, writer: anytype) !void {
 								                var buffer: [4096]u8 = undefined;
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                var n: u64 = 0;
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                while (n < self.size) {
 								                    const buf = buffer[0..@min(buffer.len, self.size - n)];
 								                    try self.reader.readNoEof(buf);
 								                    try writer.writeAll(buf);
 								                    n += buf.len;
 								                }
 								            }
 								            // Skips file content. Advances reader.
 								            pub fn skip(self: File) !void {
 								                try self.reader.skipBytes(self.size, .{});
 								            }
 								        };
 								        const Self = @This();
 								        fn readHeader(self: *Self) !?Header {
 								            if (self.padding > 0) {
 								                try self.reader.skipBytes(self.padding, .{});
 								            }
 								            const n = try self.reader.readAll(&self.header_buffer);
 								            if (n == 0) return null;
 								            if (n < Header.SIZE) return error.UnexpectedEndOfStream;
 								            const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
 								            if (try header.checkChksum() == 0) return null;
 								            return header;
 								        }
-												std.tar: remove abuse of inline fn

In general, any `inline fn` should document why it is using `inline`
because the rule of thumb is: don't use inline.

											
										
										
											2024-02-22 22:45:07 +00:00
+								        fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
-												fix crash in tar found by fuzzing

Running fuzzing tar test with [zig std lib
fuzzing](https://github.com/squeek502/zig-std-lib-fuzzing) reached and
assert in tar implementation. Assert (in std lib) should not be
reachable by external input, so I'm fixing this to return error.

											
										
										
											2024-02-22 20:24:30 +00:00
+								            if (size > buffer.len) return error.TarCorruptInput;
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								            const buf = buffer[0..size];
 								            try self.reader.readNoEof(buf);
 								            return nullStr(buf);
 								        }
-												std.tar: remove abuse of inline fn

In general, any `inline fn` should document why it is using `inline`
because the rule of thumb is: don't use inline.

											
										
										
											2024-02-22 22:45:07 +00:00
+								        fn initFile(self: *Self) void {
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								            self.file = File{
 								                .name = self.file_name_buffer[0..0],
 								                .link_name = self.link_name_buffer[0..0],
 								                .size = 0,
 								                .kind = .normal,
 								                .mode = 0,
 								                .reader = self.reader,
 								            };
 								        }
 								        // Number of padding bytes in the last file block.
-												std.tar: remove abuse of inline fn

In general, any `inline fn` should document why it is using `inline`
because the rule of thumb is: don't use inline.

											
										
										
											2024-02-22 22:45:07 +00:00
+								        fn blockPadding(size: u64) usize {
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								            const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
 								            return @intCast(block_rounded - size);
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								        }
 								        /// Iterates through the tar archive as if it is a series of files.
 								        /// Internally, the tar format often uses entries (header with optional
 								        /// content) to add meta data that describes the next file. These
 								        /// entries should not normally be visible to the outside. As such, this
 								        /// loop iterates through one or more entries until it collects a all
 								        /// file attributes.
 								        pub fn next(self: *Self) !?File {
 								            self.initFile();
 								            while (try self.readHeader()) |header| {
 								                const kind = header.kind();
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                const size: u64 = try header.size();
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                self.padding = blockPadding(size);
 								                switch (kind) {
 								                    // File types to retrun upstream
 								                    .directory, .normal, .symbolic_link => {
 								                        self.file.kind = kind;
 								                        self.file.mode = try header.mode();
 								                        // set file attributes if not already set by prefix/extended headers
 								                        if (self.file.size == 0) {
 								                            self.file.size = size;
 								                        }
 								                        if (self.file.link_name.len == 0) {
 								                            self.file.link_name = header.linkName(self.link_name_buffer[0..Header.LINK_NAME_SIZE]);
 								                        }
 								                        if (self.file.name.len == 0) {
 								                            self.file.name = try header.fullName(self.file_name_buffer[0..Header.MAX_NAME_SIZE]);
 								                        }
 								                        self.padding = blockPadding(self.file.size);
 								                        return self.file;
 								                    },
 								                    // Prefix header types
 								                    .gnu_long_name => {
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                        self.file.name = try self.readString(@intCast(size), &self.file_name_buffer);
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                    },
 								                    .gnu_long_link => {
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                        self.file.link_name = try self.readString(@intCast(size), &self.link_name_buffer);
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                    },
 								                    .extended_header => {
 								                        // Use just attributes from last extended header.
 								                        self.initFile();
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								                        var rdr = paxIterator(self.reader, @intCast(size));
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                        while (try rdr.next()) |attr| {
 								                            switch (attr.kind) {
 								                                .path => {
 								                                    self.file.name = try attr.value(&self.file_name_buffer);
 								                                },
 								                                .linkpath => {
 								                                    self.file.link_name = try attr.value(&self.link_name_buffer);
 								                                },
 								                                .size => {
 								                                    var buf: [64]u8 = undefined;
-												tar: fix tests on 32-bit platforms

											
										
										
											2023-12-12 13:18:20 +00:00
+								                                    self.file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                                },
 								                            }
 								                        }
 								                    },
 								                    // Ignored header type
 								                    .global_extended_header => {
 								                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
 								                    },
 								                    // All other are unsupported header types
 								                    else => {
 								                        const d = self.diagnostics orelse return error.TarUnsupportedHeader;
 								                        try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
 								                            .file_name = try d.allocator.dupe(u8, header.name()),
 								                            .file_type = kind,
 								                        } });
-												tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic
reporting.
Added computing checksum by using both unsigned and signed header bytes
values.
Added skipping gnu exteneded sparse headers while reporting unsupported
header in diagnostic.

Note on testing:

wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O
/tmp/testtar.tar

```
test "Python testtar.tar file" {
    const file_name = "testtar.tar";

    var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{});
    defer file.close();

    var diag = Options.Diagnostics{ .allocator = std.testing.allocator };
    defer diag.deinit();

    var iter = iterator(file.reader(), &diag);
    while (try iter.next()) |f| {
        std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size });
        try f.skip();
    }
    for (diag.errors.items) |e| {
        switch (e) {
            .unsupported_file_type => |u| {
                std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name });
            },
            else => unreachable,
        }
    }
}
```

											
										
										
											2023-12-18 20:39:07 +00:00
+								                        if (kind == .gnu_sparse) {
 								                            try self.skipGnuSparseExtendedHeaders(header);
 								                        }
 								                        self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								                    },
 								                }
 								            }
 								            return null;
 								        }
-												tar: improve diagnostic reporting

Using Python testtar file (mentioned in #14310) to test diagnostic
reporting.
Added computing checksum by using both unsigned and signed header bytes
values.
Added skipping gnu exteneded sparse headers while reporting unsupported
header in diagnostic.

Note on testing:

wget https://github.com/python/cpython/raw/3.11/Lib/test/testtar.tar -O
/tmp/testtar.tar

```
test "Python testtar.tar file" {
    const file_name = "testtar.tar";

    var file = try std.fs.cwd().openFile("/tmp/" ++ file_name, .{});
    defer file.close();

    var diag = Options.Diagnostics{ .allocator = std.testing.allocator };
    defer diag.deinit();

    var iter = iterator(file.reader(), &diag);
    while (try iter.next()) |f| {
        std.debug.print("supported: {} {s} {d}\n", .{ f.kind, f.name, f.size });
        try f.skip();
    }
    for (diag.errors.items) |e| {
        switch (e) {
            .unsupported_file_type => |u| {
                std.debug.print("unsupported: {} {s}\n", .{ u.file_type, u.file_name });
            },
            else => unreachable,
        }
    }
}
```

											
										
										
											2023-12-18 20:39:07 +00:00
 								        fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void {
 								            var is_extended = header.bytes[482] > 0;
 								            while (is_extended) {
 								                var buf: [Header.SIZE]u8 = undefined;
 								                const n = try self.reader.readAll(&buf);
 								                if (n < Header.SIZE) return error.UnexpectedEndOfStream;
 								                is_extended = buf[504] > 0;
 								            }
 								        }
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								    };
 								}
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								/// Pax attributes iterator.
 								/// Size is length of pax extended header in reader.
 								fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) {
 								    return PaxIterator(@TypeOf(reader)){
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								        .reader = reader,
 								        .size = size,
 								    };
 								}
 								const PaxAttributeKind = enum {
 								    path,
 								    linkpath,
 								    size,
 								};
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								fn PaxIterator(comptime ReaderType: type) type {
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								    return struct {
 								        size: usize, // cumulative size of all pax attributes
 								        reader: ReaderType,
 								        // scratch buffer used for reading attribute length and keyword
 								        scratch: [128]u8 = undefined,
 								        const Self = @This();
 								        const Attribute = struct {
 								            kind: PaxAttributeKind,
 								            len: usize, // length of the attribute value
 								            reader: ReaderType, // reader positioned at value start
 								            // Copies pax attribute value into destination buffer.
 								            // Must be called with destination buffer of size at least Attribute.len.
 								            pub fn value(self: Attribute, dst: []u8) ![]const u8 {
 								                assert(self.len <= dst.len);
 								                const buf = dst[0..self.len];
 								                const n = try self.reader.readAll(buf);
 								                if (n < self.len) return error.UnexpectedEndOfStream;
 								                try validateAttributeEnding(self.reader);
 								                if (hasNull(buf)) return error.PaxNullInValue;
 								                return buf;
 								            }
 								        };
 								        // Iterates over pax attributes. Returns known only known attributes.
 								        // Caller has to call value in Attribute, to advance reader across value.
 								        pub fn next(self: *Self) !?Attribute {
 								            // Pax extended header consists of one or more attributes, each constructed as follows:
 								            // "%d %s=%s\n", <length>, <keyword>, <value>
 								            while (self.size > 0) {
 								                const length_buf = try self.readUntil(' ');
 								                const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
 								                const keyword = try self.readUntil('=');
 								                if (hasNull(keyword)) return error.PaxNullInKeyword;
 								                // calculate value_len
 								                const value_start = length_buf.len + keyword.len + 2; // 2 separators
 								                if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
 								                const value_len = length - value_start - 1; // \n separator at end
 								                self.size -= length;
 								                const kind: PaxAttributeKind = if (eql(keyword, "path"))
 								                    .path
 								                else if (eql(keyword, "linkpath"))
 								                    .linkpath
 								                else if (eql(keyword, "size"))
 								                    .size
 								                else {
 								                    try self.reader.skipBytes(value_len, .{});
 								                    try validateAttributeEnding(self.reader);
 								                    continue;
 								                };
 								                return Attribute{
 								                    .kind = kind,
 								                    .len = value_len,
 								                    .reader = self.reader,
 								                };
 								            }
 								            return null;
 								        }
-												std.tar: remove abuse of inline fn

In general, any `inline fn` should document why it is using `inline`
because the rule of thumb is: don't use inline.

											
										
										
											2024-02-22 22:45:07 +00:00
+								        fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								            var fbs = std.io.fixedBufferStream(&self.scratch);
 								            try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
 								            return fbs.getWritten();
 								        }
-												std.tar: remove abuse of inline fn

In general, any `inline fn` should document why it is using `inline`
because the rule of thumb is: don't use inline.

											
										
										
											2024-02-22 22:45:07 +00:00
+								        fn eql(a: []const u8, b: []const u8) bool {
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								            return std.mem.eql(u8, a, b);
 								        }
-												std.tar: remove abuse of inline fn

In general, any `inline fn` should document why it is using `inline`
because the rule of thumb is: don't use inline.

											
										
										
											2024-02-22 22:45:07 +00:00
+								        fn hasNull(str: []const u8) bool {
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								            return (std.mem.indexOfScalar(u8, str, 0)) != null;
 								        }
 								        // Checks that each record ends with new line.
-												std.tar: remove abuse of inline fn

In general, any `inline fn` should document why it is using `inline`
because the rule of thumb is: don't use inline.

											
										
										
											2024-02-22 22:45:07 +00:00
+								        fn validateAttributeEnding(reader: ReaderType) !void {
-												tar: reorganize file, functions before tests

											
										
										
											2023-12-11 21:00:49 +00:00
+								            if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
 								        }
 								    };
 								}
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: Options) !void {
-												zig build: add executable bit and file path to package hash

Unfortunately, due to the Windows equivalent of executable permissions
being a bit tricky, there is follow-up work to be done.

What is done in this commit is the hash modifications. At the fetch
layer, executable bits inside packages are ignored. In the hash
computation layer, executable bit is implemented for POSIX but not yet
for Windows. This means that the hash will not break again in the future
for packages that do not have any executable files, but it will break
for packages that do.

This is a hash-breaking change.

Closes #14308

											
										
										
											2023-02-02 01:42:29 +00:00
+								    switch (options.mode_mode) {
 								        .ignore => {},
 								        .executable_bit_only => {
 								            // This code does not look at the mode bits yet. To implement this feature,
 								            // the implementation must be adjusted to look at the mode, and check the
 								            // user executable bit, then call fchmod on newly created files when
 								            // the executable bit is supposed to be set.
 								            // It also needs to properly deal with ACLs on Windows.
 								            @panic("TODO: unimplemented: tar ModeMode.executable_bit_only");
 								        },
 								    }
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								    var iter = iterator(reader, options.diagnostics);
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								    while (try iter.next()) |file| {
-												tar: use file word in less places

											
										
										
											2023-12-11 19:46:27 +00:00
+								        switch (file.kind) {
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								            .directory => {
-												Permits tar directory path without trailing slash

											
										
										
											2023-11-30 15:51:29 +00:00
+								                const file_name = stripComponents(file.name, options.strip_components);
-												std.tar: add option for omitting empty directories

											
										
										
											2023-10-04 06:25:04 +00:00
+								                if (file_name.len != 0 and !options.exclude_empty_directories) {
-												std.tar: make sub dirs + trim spaces

closes #15222. these changes allow the .tgz from this issue to
decompress and the test code to succeed.

											
										
										
											2023-04-09 23:05:17 +00:00
+								                    try dir.makePath(file_name);
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								                }
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								            },
 								            .normal => {
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                if (file.size == 0 and file.name.len == 0) return;
-												Permits tar directory path without trailing slash

											
										
										
											2023-11-30 15:51:29 +00:00
+								                const file_name = stripComponents(file.name, options.strip_components);
 								                if (file_name.len == 0) return error.BadFileName;
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
-												std.tar improve error reporting

Report file name which failed to create in all cases.

											
										
										
											2024-02-25 11:03:23 +00:00
+								                if (createDirAndFile(dir, file_name)) |fs_file| {
 								                    defer fs_file.close();
 								                    try file.write(fs_file);
 								                } else |err| {
 								                    const d = options.diagnostics orelse return err;
 								                    try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
 								                        .code = err,
 								                        .file_name = try d.allocator.dupe(u8, file_name),
 								                    } });
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                    try file.skip();
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								                }
 								            },
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								            .symbolic_link => {
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                // The file system path of the symbolic link.
-												Permits tar directory path without trailing slash

											
										
										
											2023-11-30 15:51:29 +00:00
+								                const file_name = stripComponents(file.name, options.strip_components);
 								                if (file_name.len == 0) return error.BadFileName;
-												std.tar: fix creation of symlinks with omit_empty_directories

											
										
										
											2023-10-08 06:09:39 +00:00
+								                // The data inside the symbolic link.
-												tar: add initial test cases

Just adding tests, without changing functionality.

											
										
										
											2023-11-27 20:37:30 +00:00
+								                const link_name = file.link_name;
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
-												std.tar improve error reporting

Report file name which failed to create in all cases.

											
										
										
											2024-02-25 11:03:23 +00:00
+								                createDirAndSymlink(dir, link_name, file_name) catch |err| {
 								                    const d = options.diagnostics orelse return err;
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                    try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
-												std.tar improve error reporting

Report file name which failed to create in all cases.

											
										
										
											2024-02-25 11:03:23 +00:00
+								                        .code = err,
-												zig fetch: enhanced error reporting

* Package: use std.tar diagnostics to give detailed error messages
* std.tar: add diagnostic for unsupported file type

											
										
										
											2023-10-03 00:00:45 +00:00
+								                        .file_name = try d.allocator.dupe(u8, file_name),
 								                        .link_name = try d.allocator.dupe(u8, link_name),
 								                    } });
-												std.tar: support symlinks

closes #16678

											
										
										
											2023-10-01 06:00:39 +00:00
+								                };
 								            },
-												tar: refactor code to be more testable

Split reading/parsing tar file and writing results to the disk in two
separate steps. So we can later test parsing part without need to write
everyting to the disk.

											
										
										
											2023-11-27 13:51:51 +00:00
+								            else => unreachable,
-												add std.tar for tar file unpacking

											
										
										
											2023-01-10 05:36:35 +00:00
+								        }
 								    }
 								}
-												std.tar improve error reporting

Report file name which failed to create in all cases.

											
										
										
											2024-02-25 11:03:23 +00:00
+								fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File {
 								    const fs_file = dir.createFile(file_name, .{ .exclusive = true }) catch |err| {
 								        if (err == error.FileNotFound) {
 								            if (std.fs.path.dirname(file_name)) |dir_name| {
 								                try dir.makePath(dir_name);
 								                return try dir.createFile(file_name, .{ .exclusive = true });
 								            }
 								        }
 								        return err;
 								    };
 								    return fs_file;
 								}
 								fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
 								    dir.symLink(link_name, file_name, .{}) catch |err| {
 								        if (err == error.FileNotFound) {
 								            if (std.fs.path.dirname(file_name)) |dir_name| {
 								                try dir.makePath(dir_name);
 								                try dir.symLink(link_name, file_name, .{});
 								            }
 								        }
 								    };
 								}
-												Permits tar directory path without trailing slash

											
										
										
											2023-11-30 15:51:29 +00:00
+								fn stripComponents(path: []const u8, count: u32) []const u8 {
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								    var i: usize = 0;
 								    var c = count;
 								    while (c > 0) : (c -= 1) {
 								        if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
 								            i = pos + 1;
 								        } else {
-												Permits tar directory path without trailing slash

											
										
										
											2023-11-30 15:51:29 +00:00
+								            i = path.len;
 								            break;
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								        }
 								    }
 								    return path[i..];
 								}
-												tar: prefix test cases with 'tar'

To make it little easier to filter from all stdlib tests.

											
										
										
											2023-12-01 18:03:32 +00:00
+								test "tar stripComponents" {
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								    const expectEqualStrings = std.testing.expectEqualStrings;
-												Permits tar directory path without trailing slash

											
										
										
											2023-11-30 15:51:29 +00:00
+								    try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0));
 								    try expectEqualStrings("b/c", stripComponents("a/b/c", 1));
 								    try expectEqualStrings("c", stripComponents("a/b/c", 2));
 								    try expectEqualStrings("", stripComponents("a/b/c", 3));
 								    try expectEqualStrings("", stripComponents("a/b/c", 4));
-												std.tar: add strip_components option

											
										
										
											2023-01-10 07:38:18 +00:00
+								}
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								test "tar PaxIterator" {
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								    const Attr = struct {
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								        kind: PaxAttributeKind,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								        value: []const u8 = undefined,
 								        err: ?anyerror = null,
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								    };
 								    const cases = [_]struct {
 								        data: []const u8,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								        attrs: []const Attr,
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        err: ?anyerror = null,
 								    }{
 								        .{ // valid but unknown keys
 								            .data =
 								            \\30 mtime=1350244992.023960108
 								            \\6 k=1
 								            \\13 key1=val1
 								            \\10 a=name
 								            \\9 a=name
 								            \\
 								            ,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            .attrs = &[_]Attr{},
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        },
 								        .{ // mix of known and unknown keys
 								            .data =
 								            \\6 k=1
 								            \\13 path=name
 								            \\17 linkpath=link
 								            \\13 key1=val1
 								            \\12 size=123
 								            \\13 key2=val2
 								            \\
 								            ,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
 								                .{ .kind = .linkpath, .value = "link" },
 								                .{ .kind = .size, .value = "123" },
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            },
 								        },
 								        .{ // too short size of the second key-value pair
 								            .data =
 								            \\13 path=name
 								            \\10 linkpath=value
 								            \\
 								            ,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            },
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								            .err = error.UnexpectedEndOfStream,
 								        },
 								        .{ // too long size of the second key-value pair
 								            .data =
 								            \\13 path=name
 								            \\6 k=1
 								            \\19 linkpath=value
 								            \\
 								            ,
 								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
 								            },
 								            .err = error.UnexpectedEndOfStream,
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        },
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        .{ // too long size of the second key-value pair
 								            .data =
 								            \\13 path=name
 								            \\19 linkpath=value
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            \\6 k=1
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            \\
 								            ,
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								                .{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            },
 								        },
 								        .{ // null in keyword is not valid
 								            .data = "13 path=name\n" ++ "7 k\x00b=1\n",
 								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "name" },
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            },
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								            .err = error.PaxNullInKeyword,
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        },
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								        .{ // null in value is not valid
 								            .data = "23 path=name\x00with null\n",
 								            .attrs = &[_]Attr{
-												tar: refactor pax attribute

Make it little readable.

											
										
										
											2023-12-11 16:47:19 +00:00
+								                .{ .kind = .path, .err = error.PaxNullInValue },
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            },
 								        },
 								        .{ // 1000 characters path
 								            .data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
 								            .attrs = &[_]Attr{
 								                .{ .kind = .path, .value = "0123456789" ** 100 },
 								            },
 								        },
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								    };
 								    var buffer: [1024]u8 = undefined;
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								    outer: for (cases) |case| {
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        var stream = std.io.fixedBufferStream(case.data);
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								        var iter = paxIterator(stream.reader(), case.data.len);
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
 								        var i: usize = 0;
-												tar: rename reader to iterator

Itarator has `next` function, iterates over tar files. When using from
outside of module with `tar.` prefix makes more sense.

var iter = tar.iterator(reader, null);
while (try iter.next()) |file| {
...
}

											
										
										
											2023-12-12 17:50:25 +00:00
+								        while (iter.next() catch |err| {
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								            if (case.err) |e| {
 								                try std.testing.expectEqual(e, err);
 								                continue;
 								            }
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            return err;
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        }) |attr| : (i += 1) {
-												tar: replace custom buffered reader with std.io

											
										
										
											2023-12-11 14:48:43 +00:00
+								            const exp = case.attrs[i];
 								            try std.testing.expectEqual(exp.kind, attr.kind);
 								            const value = attr.value(&buffer) catch |err| {
 								                if (exp.err) |e| {
 								                    try std.testing.expectEqual(e, err);
 								                    break :outer;
 								                }
 								                return err;
 								            };
 								            try std.testing.expectEqualStrings(exp.value, value);
-												tar: add pax file reader tests

											
										
										
											2023-12-05 16:08:45 +00:00
+								        }
 								        try std.testing.expectEqual(case.attrs.len, i);
 								        try std.testing.expect(case.err == null);
 								    }
 								}
-												tar: move test cases to std/tar/testdata

Create std/tar/test.zig for test which uses cases from testdata.

											
										
										
											2023-12-11 22:55:07 +00:00
 								test {
 								    _ = @import("tar/test.zig");
 								}
-												std.tar fix parsing mode field in tar header

Found by fuzzing. Previous numeric function assumed that is is getting
buffer of size 12, mode is size 8. Fuzzing found overflow.
Fixing and adding test cases.

											
										
										
											2024-02-23 20:57:15 +00:00
 								test "tar header parse size" {
 								    const cases = [_]struct {
 								        in: []const u8,
 								        want: u64 = 0,
 								        err: ?anyerror = null,
 								    }{
 								        // Test base-256 (binary) encoded values.
 								        .{ .in = "", .want = 0 },
 								        .{ .in = "\x80", .want = 0 },
 								        .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", .want = 1 },
 								        .{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02", .want = 0x0102 },
 								        .{ .in = "\x80\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08", .want = 0x0102030405060708 },
 								        .{ .in = "\x80\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09", .err = error.TarNumericValueTooBig },
 								        .{ .in = "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", .want = 537795476381659745 },
-												std.tar fix integer overflow in header size parse

Found by fuzzing. Fixing code and adding test.

											
										
										
											2024-02-23 20:57:40 +00:00
+								        .{ .in = "\x80\x80\x80\x00\x01\x02\x03\x04\x05\x06\x07\x08", .err = error.TarNumericValueTooBig },
-												std.tar fix parsing mode field in tar header

Found by fuzzing. Previous numeric function assumed that is is getting
buffer of size 12, mode is size 8. Fuzzing found overflow.
Fixing and adding test cases.

											
										
										
											2024-02-23 20:57:15 +00:00
 								        // // Test base-8 (octal) encoded values.
 								        .{ .in = "00000000227\x00", .want = 0o227 },
 								        .{ .in = "  000000227\x00", .want = 0o227 },
 								        .{ .in = "00000000228\x00", .err = error.TarHeader },
 								        .{ .in = "11111111111\x00", .want = 0o11111111111 },
 								    };
 								    for (cases) |case| {
 								        var bytes = [_]u8{0} ** Header.SIZE;
 								        @memcpy(bytes[124 .. 124 + case.in.len], case.in);
 								        var header = Header{ .bytes = &bytes };
 								        if (case.err) |err| {
 								            try std.testing.expectError(err, header.size());
 								        } else {
 								            try std.testing.expectEqual(case.want, try header.size());
 								        }
 								    }
 								}
 								test "tar header parse mode" {
 								    const cases = [_]struct {
 								        in: []const u8,
 								        want: u64 = 0,
 								        err: ?anyerror = null,
 								    }{
 								        .{ .in = "0000644\x00", .want = 0o644 },
 								        .{ .in = "0000777\x00", .want = 0o777 },
 								        .{ .in = "7777777\x00", .want = 0o7777777 },
 								        .{ .in = "7777778\x00", .err = error.TarHeader },
 								        .{ .in = "77777777", .want = 0o77777777 },
 								        .{ .in = "777777777777", .want = 0o77777777 },
 								    };
 								    for (cases) |case| {
 								        var bytes = [_]u8{0} ** Header.SIZE;
 								        @memcpy(bytes[100 .. 100 + case.in.len], case.in);
 								        var header = Header{ .bytes = &bytes };
 								        if (case.err) |err| {
 								            try std.testing.expectError(err, header.mode());
 								        } else {
 								            try std.testing.expectEqual(case.want, try header.mode());
 								        }
 								    }
 								}