2024-03-07 04:15:36 +00:00
|
|
|
|
//! Tar archive is single ordinary file which can contain many files (or
|
|
|
|
|
//! directories, symlinks, ...). It's build by series of blocks each size of 512
|
|
|
|
|
//! bytes. First block of each entry is header which defines type, name, size
|
|
|
|
|
//! permissions and other attributes. Header is followed by series of blocks of
|
|
|
|
|
//! file content, if any that entry has content. Content is padded to the block
|
|
|
|
|
//! size, so next header always starts at block boundary.
|
|
|
|
|
//!
|
|
|
|
|
//! This simple format is extended by GNU and POSIX pax extensions to support
|
|
|
|
|
//! file names longer than 256 bytes and additional attributes.
|
|
|
|
|
//!
|
|
|
|
|
//! This is not comprehensive tar parser. Here we are only file types needed to
|
|
|
|
|
//! support Zig package manager; normal file, directory, symbolic link. And
|
|
|
|
|
//! subset of attributes: name, size, permissions.
|
|
|
|
|
//!
|
|
|
|
|
//! GNU tar reference: https://www.gnu.org/software/tar/manual/html_node/Standard.html
|
|
|
|
|
//! pax reference: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13
|
|
|
|
|
|
2024-03-02 17:08:32 +00:00
|
|
|
|
const std = @import("std");
|
2023-11-29 14:31:22 +00:00
|
|
|
|
const assert = std.debug.assert;
|
2024-03-10 15:31:10 +00:00
|
|
|
|
const testing = std.testing;
|
2023-11-29 14:31:22 +00:00
|
|
|
|
|
2024-03-07 04:15:36 +00:00
|
|
|
|
pub const output = @import("tar/output.zig");
|
|
|
|
|
|
2024-03-10 17:13:47 +00:00
|
|
|
|
/// Provide this to receive detailed error messages.
|
|
|
|
|
/// When this is provided, some errors which would otherwise be returned
|
|
|
|
|
/// immediately will instead be added to this structure. The API user must check
|
|
|
|
|
/// the errors in diagnostics to know whether the operation succeeded or failed.
|
|
|
|
|
pub const Diagnostics = struct {
|
|
|
|
|
allocator: std.mem.Allocator,
|
|
|
|
|
errors: std.ArrayListUnmanaged(Error) = .{},
|
|
|
|
|
|
|
|
|
|
pub const Error = union(enum) {
|
|
|
|
|
unable_to_create_sym_link: struct {
|
|
|
|
|
code: anyerror,
|
|
|
|
|
file_name: []const u8,
|
|
|
|
|
link_name: []const u8,
|
|
|
|
|
},
|
|
|
|
|
unable_to_create_file: struct {
|
|
|
|
|
code: anyerror,
|
|
|
|
|
file_name: []const u8,
|
|
|
|
|
},
|
|
|
|
|
unsupported_file_type: struct {
|
|
|
|
|
file_name: []const u8,
|
|
|
|
|
file_type: Header.Kind,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
pub fn deinit(d: *Diagnostics) void {
|
|
|
|
|
for (d.errors.items) |item| {
|
|
|
|
|
switch (item) {
|
|
|
|
|
.unable_to_create_sym_link => |info| {
|
|
|
|
|
d.allocator.free(info.file_name);
|
|
|
|
|
d.allocator.free(info.link_name);
|
|
|
|
|
},
|
|
|
|
|
.unable_to_create_file => |info| {
|
|
|
|
|
d.allocator.free(info.file_name);
|
|
|
|
|
},
|
|
|
|
|
.unsupported_file_type => |info| {
|
|
|
|
|
d.allocator.free(info.file_name);
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
d.errors.deinit(d.allocator);
|
|
|
|
|
d.* = undefined;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
/// pipeToFileSystem options
|
|
|
|
|
pub const PipeOptions = struct {
|
2023-01-10 07:38:18 +00:00
|
|
|
|
/// Number of directory levels to skip when extracting files.
|
|
|
|
|
strip_components: u32 = 0,
|
2023-02-02 01:42:29 +00:00
|
|
|
|
/// How to handle the "mode" property of files from within the tar file.
|
|
|
|
|
mode_mode: ModeMode = .executable_bit_only,
|
2023-10-04 06:25:04 +00:00
|
|
|
|
/// Prevents creation of empty directories.
|
|
|
|
|
exclude_empty_directories: bool = false,
|
2024-03-10 17:13:47 +00:00
|
|
|
|
/// Collects error messages during unpacking
|
2023-10-01 06:00:39 +00:00
|
|
|
|
diagnostics: ?*Diagnostics = null,
|
2023-02-02 01:42:29 +00:00
|
|
|
|
|
2023-10-01 06:00:39 +00:00
|
|
|
|
pub const ModeMode = enum {
|
2023-02-02 01:42:29 +00:00
|
|
|
|
/// The mode from the tar file is completely ignored. Files are created
|
|
|
|
|
/// with the default mode when creating files.
|
|
|
|
|
ignore,
|
|
|
|
|
/// The mode from the tar file is inspected for the owner executable bit
|
|
|
|
|
/// only. This bit is copied to the group and other executable bits.
|
|
|
|
|
/// Other bits of the mode are left as the default when creating files.
|
|
|
|
|
executable_bit_only,
|
|
|
|
|
};
|
2023-01-10 07:38:18 +00:00
|
|
|
|
};
|
2023-01-10 05:36:35 +00:00
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
const Header = struct {
|
2023-12-11 19:18:59 +00:00
|
|
|
|
const SIZE = 512;
|
2024-03-10 15:31:10 +00:00
|
|
|
|
const MAX_NAME_SIZE = 100 + 1 + 155; // name(100) + separator(1) + prefix(155)
|
|
|
|
|
const LINK_NAME_SIZE = 100;
|
2023-12-11 19:18:59 +00:00
|
|
|
|
|
|
|
|
|
bytes: *const [SIZE]u8,
|
2023-01-10 05:36:35 +00:00
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
const Kind = enum(u8) {
|
2023-10-03 00:00:45 +00:00
|
|
|
|
normal_alias = 0,
|
2023-01-10 05:36:35 +00:00
|
|
|
|
normal = '0',
|
|
|
|
|
hard_link = '1',
|
|
|
|
|
symbolic_link = '2',
|
|
|
|
|
character_special = '3',
|
|
|
|
|
block_special = '4',
|
|
|
|
|
directory = '5',
|
|
|
|
|
fifo = '6',
|
|
|
|
|
contiguous = '7',
|
|
|
|
|
global_extended_header = 'g',
|
|
|
|
|
extended_header = 'x',
|
2023-11-29 20:37:13 +00:00
|
|
|
|
// Types 'L' and 'K' are used by the GNU format for a meta file
|
|
|
|
|
// used to store the path or link name for the next file.
|
|
|
|
|
gnu_long_name = 'L',
|
|
|
|
|
gnu_long_link = 'K',
|
2023-12-18 20:39:07 +00:00
|
|
|
|
gnu_sparse = 'S',
|
|
|
|
|
solaris_extended_header = 'X',
|
2023-01-10 05:36:35 +00:00
|
|
|
|
_,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/// Includes prefix concatenated, if any.
|
|
|
|
|
/// TODO: check against "../" and other nefarious things
|
2024-03-02 09:59:25 +00:00
|
|
|
|
pub fn fullName(header: Header, buffer: []u8) ![]const u8 {
|
2023-01-10 05:36:35 +00:00
|
|
|
|
const n = name(header);
|
|
|
|
|
const p = prefix(header);
|
2024-03-02 09:59:25 +00:00
|
|
|
|
if (buffer.len < n.len + p.len + 1) return error.TarInsufficientBuffer;
|
2023-12-01 17:26:31 +00:00
|
|
|
|
if (!is_ustar(header) or p.len == 0) {
|
|
|
|
|
@memcpy(buffer[0..n.len], n);
|
|
|
|
|
return buffer[0..n.len];
|
|
|
|
|
}
|
2023-04-27 22:16:01 +00:00
|
|
|
|
@memcpy(buffer[0..p.len], p);
|
2023-01-10 05:36:35 +00:00
|
|
|
|
buffer[p.len] = '/';
|
2023-04-27 22:16:01 +00:00
|
|
|
|
@memcpy(buffer[p.len + 1 ..][0..n.len], n);
|
2023-01-10 05:36:35 +00:00
|
|
|
|
return buffer[0 .. p.len + 1 + n.len];
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-01 20:37:47 +00:00
|
|
|
|
/// When kind is symbolic_link linked-to name (target_path) is specified in
|
|
|
|
|
/// the linkname field.
|
2024-03-02 09:59:25 +00:00
|
|
|
|
pub fn linkName(header: Header, buffer: []u8) ![]const u8 {
|
2023-12-11 19:18:59 +00:00
|
|
|
|
const link_name = header.str(157, 100);
|
|
|
|
|
if (link_name.len == 0) {
|
|
|
|
|
return buffer[0..0];
|
|
|
|
|
}
|
2024-03-02 09:59:25 +00:00
|
|
|
|
if (buffer.len < link_name.len) return error.TarInsufficientBuffer;
|
2023-12-11 19:18:59 +00:00
|
|
|
|
const buf = buffer[0..link_name.len];
|
|
|
|
|
@memcpy(buf, link_name);
|
|
|
|
|
return buf;
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-10 05:36:35 +00:00
|
|
|
|
pub fn name(header: Header) []const u8 {
|
2023-11-29 16:17:20 +00:00
|
|
|
|
return header.str(0, 100);
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-02 14:00:42 +00:00
|
|
|
|
pub fn mode(header: Header) !u32 {
|
2024-02-23 20:57:15 +00:00
|
|
|
|
return @intCast(try header.octal(100, 8));
|
2023-12-02 14:00:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-12-11 19:46:27 +00:00
|
|
|
|
pub fn size(header: Header) !u64 {
|
2024-02-23 20:57:15 +00:00
|
|
|
|
const start = 124;
|
|
|
|
|
const len = 12;
|
|
|
|
|
const raw = header.bytes[start..][0..len];
|
|
|
|
|
// If the leading byte is 0xff (255), all the bytes of the field
|
|
|
|
|
// (including the leading byte) are concatenated in big-endian order,
|
|
|
|
|
// with the result being a negative number expressed in two’s
|
|
|
|
|
// complement form.
|
|
|
|
|
if (raw[0] == 0xff) return error.TarNumericValueNegative;
|
|
|
|
|
// If the leading byte is 0x80 (128), the non-leading bytes of the
|
|
|
|
|
// field are concatenated in big-endian order.
|
|
|
|
|
if (raw[0] == 0x80) {
|
2024-02-23 20:57:40 +00:00
|
|
|
|
if (raw[1] != 0 or raw[2] != 0 or raw[3] != 0) return error.TarNumericValueTooBig;
|
2024-02-23 20:57:15 +00:00
|
|
|
|
return std.mem.readInt(u64, raw[4..12], .big);
|
|
|
|
|
}
|
|
|
|
|
return try header.octal(start, len);
|
2023-11-29 16:17:20 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn chksum(header: Header) !u64 {
|
|
|
|
|
return header.octal(148, 8);
|
2023-01-10 05:36:35 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-29 16:17:20 +00:00
|
|
|
|
pub fn is_ustar(header: Header) bool {
|
2023-11-29 20:37:13 +00:00
|
|
|
|
const magic = header.bytes[257..][0..6];
|
|
|
|
|
return std.mem.eql(u8, magic[0..5], "ustar") and (magic[5] == 0 or magic[5] == ' ');
|
2023-10-01 06:00:39 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-10 05:36:35 +00:00
|
|
|
|
pub fn prefix(header: Header) []const u8 {
|
2023-11-29 16:17:20 +00:00
|
|
|
|
return header.str(345, 155);
|
2023-01-10 05:36:35 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-12-11 19:46:27 +00:00
|
|
|
|
pub fn kind(header: Header) Kind {
|
|
|
|
|
const result: Kind = @enumFromInt(header.bytes[156]);
|
2023-10-03 00:00:45 +00:00
|
|
|
|
if (result == .normal_alias) return .normal;
|
|
|
|
|
return result;
|
2023-01-10 05:36:35 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-29 16:17:20 +00:00
|
|
|
|
fn str(header: Header, start: usize, len: usize) []const u8 {
|
2023-11-29 20:37:13 +00:00
|
|
|
|
return nullStr(header.bytes[start .. start + len]);
|
2023-01-10 05:36:35 +00:00
|
|
|
|
}
|
2023-11-27 20:37:30 +00:00
|
|
|
|
|
2023-11-29 16:17:20 +00:00
|
|
|
|
fn octal(header: Header, start: usize, len: usize) !u64 {
|
|
|
|
|
const raw = header.bytes[start..][0..len];
|
|
|
|
|
// Zero-filled octal number in ASCII. Each numeric field of width w
|
|
|
|
|
// contains w minus 1 digits, and a null
|
|
|
|
|
const ltrimmed = std.mem.trimLeft(u8, raw, "0 ");
|
|
|
|
|
const rtrimmed = std.mem.trimRight(u8, ltrimmed, " \x00");
|
|
|
|
|
if (rtrimmed.len == 0) return 0;
|
2023-11-29 19:30:08 +00:00
|
|
|
|
return std.fmt.parseInt(u64, rtrimmed, 8) catch return error.TarHeader;
|
2023-11-29 16:17:20 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-12-18 20:39:07 +00:00
|
|
|
|
const Chksums = struct {
|
|
|
|
|
unsigned: u64,
|
|
|
|
|
signed: i64,
|
|
|
|
|
};
|
|
|
|
|
|
2023-11-29 16:17:20 +00:00
|
|
|
|
// Sum of all bytes in the header block. The chksum field is treated as if
|
|
|
|
|
// it were filled with spaces (ASCII 32).
|
2023-12-18 20:39:07 +00:00
|
|
|
|
fn computeChksum(header: Header) Chksums {
|
|
|
|
|
var cs: Chksums = .{ .signed = 0, .unsigned = 0 };
|
|
|
|
|
for (header.bytes, 0..) |v, i| {
|
|
|
|
|
const b = if (148 <= i and i < 156) 32 else v; // Treating chksum bytes as spaces.
|
|
|
|
|
cs.unsigned += b;
|
|
|
|
|
cs.signed += @as(i8, @bitCast(b));
|
2023-11-29 16:17:20 +00:00
|
|
|
|
}
|
2023-12-18 20:39:07 +00:00
|
|
|
|
return cs;
|
2023-11-29 16:17:20 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Checks calculated chksum with value of chksum field.
|
2023-12-01 17:26:31 +00:00
|
|
|
|
// Returns error or valid chksum value.
|
2023-11-29 16:17:20 +00:00
|
|
|
|
// Zero value indicates empty block.
|
|
|
|
|
pub fn checkChksum(header: Header) !u64 {
|
|
|
|
|
const field = try header.chksum();
|
2023-12-18 20:39:07 +00:00
|
|
|
|
const cs = header.computeChksum();
|
|
|
|
|
if (field == 0 and cs.unsigned == 256) return 0;
|
|
|
|
|
if (field != cs.unsigned and field != cs.signed) return error.TarHeaderChksum;
|
2023-11-29 16:17:20 +00:00
|
|
|
|
return field;
|
2023-11-27 20:37:30 +00:00
|
|
|
|
}
|
2023-01-10 05:36:35 +00:00
|
|
|
|
};
|
|
|
|
|
|
2023-12-06 14:35:29 +00:00
|
|
|
|
// Breaks string on first null character.
|
2023-11-29 20:37:13 +00:00
|
|
|
|
fn nullStr(str: []const u8) []const u8 {
|
|
|
|
|
for (str, 0..) |c, i| {
|
|
|
|
|
if (c == 0) return str[0..i];
|
|
|
|
|
}
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
/// Options for iterator.
|
|
|
|
|
/// Buffers should be provided by the caller.
|
2024-02-29 03:32:09 +00:00
|
|
|
|
pub const IteratorOptions = struct {
|
|
|
|
|
/// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities.
|
|
|
|
|
file_name_buffer: []u8,
|
|
|
|
|
/// Use a buffer with length `std.fs.MAX_PATH_BYTES` to match file system capabilities.
|
|
|
|
|
link_name_buffer: []u8,
|
2024-03-10 17:13:47 +00:00
|
|
|
|
/// Collects error messages during unpacking
|
2024-02-29 03:32:09 +00:00
|
|
|
|
diagnostics: ?*Diagnostics = null,
|
|
|
|
|
};
|
|
|
|
|
|
2023-12-12 17:50:25 +00:00
|
|
|
|
/// Iterates over files in tar archive.
|
2024-03-10 15:31:10 +00:00
|
|
|
|
/// `next` returns each file in tar archive.
|
2024-03-02 09:59:25 +00:00
|
|
|
|
pub fn iterator(reader: anytype, options: IteratorOptions) Iterator(@TypeOf(reader)) {
|
2023-12-11 21:00:49 +00:00
|
|
|
|
return .{
|
|
|
|
|
.reader = reader,
|
2024-02-29 03:32:09 +00:00
|
|
|
|
.diagnostics = options.diagnostics,
|
|
|
|
|
.file_name_buffer = options.file_name_buffer,
|
|
|
|
|
.link_name_buffer = options.link_name_buffer,
|
2023-12-11 21:00:49 +00:00
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
/// Type of the file returned by iterator `next` method.
|
2024-03-02 16:52:31 +00:00
|
|
|
|
pub const FileKind = enum {
|
|
|
|
|
directory,
|
|
|
|
|
sym_link,
|
|
|
|
|
file,
|
|
|
|
|
};
|
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
/// Iteartor over entries in the tar file represented by reader.
|
|
|
|
|
pub fn Iterator(comptime ReaderType: type) type {
|
2023-12-11 21:00:49 +00:00
|
|
|
|
return struct {
|
|
|
|
|
reader: ReaderType,
|
2024-03-10 17:13:47 +00:00
|
|
|
|
diagnostics: ?*Diagnostics = null,
|
2023-12-11 21:00:49 +00:00
|
|
|
|
|
|
|
|
|
// buffers for heeader and file attributes
|
2024-03-10 15:31:10 +00:00
|
|
|
|
header_buffer: [Header.SIZE]u8 = undefined,
|
2024-02-29 03:32:09 +00:00
|
|
|
|
file_name_buffer: []u8,
|
|
|
|
|
link_name_buffer: []u8,
|
2023-12-11 21:00:49 +00:00
|
|
|
|
|
|
|
|
|
// bytes of padding to the end of the block
|
2024-03-10 15:31:10 +00:00
|
|
|
|
padding: usize = 0,
|
2024-03-02 16:52:31 +00:00
|
|
|
|
// not consumed bytes of file from last next iteration
|
2024-03-02 22:29:02 +00:00
|
|
|
|
unread_file_bytes: u64 = 0,
|
2023-12-11 21:00:49 +00:00
|
|
|
|
|
|
|
|
|
pub const File = struct {
|
|
|
|
|
name: []const u8, // name of file, symlink or directory
|
|
|
|
|
link_name: []const u8, // target name of symlink
|
2024-03-02 16:52:31 +00:00
|
|
|
|
size: u64 = 0, // size of the file in bytes
|
|
|
|
|
mode: u32 = 0,
|
|
|
|
|
kind: FileKind = .file,
|
2023-12-11 21:00:49 +00:00
|
|
|
|
|
2024-03-02 22:29:02 +00:00
|
|
|
|
unread_bytes: *u64,
|
2024-03-10 15:31:10 +00:00
|
|
|
|
parent_reader: ReaderType,
|
2023-12-11 21:00:49 +00:00
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
pub const Reader = std.io.Reader(File, ReaderType.Error, File.read);
|
2024-03-02 16:52:31 +00:00
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
pub fn reader(self: File) Reader {
|
2024-03-02 16:52:31 +00:00
|
|
|
|
return .{ .context = self };
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
pub fn read(self: File, dest: []u8) ReaderType.Error!usize {
|
|
|
|
|
const buf = dest[0..@min(dest.len, self.unread_bytes.*)];
|
|
|
|
|
const n = try self.parent_reader.read(buf);
|
|
|
|
|
self.unread_bytes.* -= n;
|
2024-03-02 16:52:31 +00:00
|
|
|
|
return n;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-11 21:00:49 +00:00
|
|
|
|
// Writes file content to writer.
|
2024-03-02 16:52:31 +00:00
|
|
|
|
pub fn writeAll(self: File, writer: anytype) !void {
|
2023-12-11 21:00:49 +00:00
|
|
|
|
var buffer: [4096]u8 = undefined;
|
|
|
|
|
|
2024-03-02 16:52:31 +00:00
|
|
|
|
while (self.unread_bytes.* > 0) {
|
|
|
|
|
const buf = buffer[0..@min(buffer.len, self.unread_bytes.*)];
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try self.parent_reader.readNoEof(buf);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
try writer.writeAll(buf);
|
2024-03-02 16:52:31 +00:00
|
|
|
|
self.unread_bytes.* -= buf.len;
|
2023-12-11 21:00:49 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const Self = @This();
|
|
|
|
|
|
|
|
|
|
fn readHeader(self: *Self) !?Header {
|
|
|
|
|
if (self.padding > 0) {
|
|
|
|
|
try self.reader.skipBytes(self.padding, .{});
|
|
|
|
|
}
|
|
|
|
|
const n = try self.reader.readAll(&self.header_buffer);
|
|
|
|
|
if (n == 0) return null;
|
|
|
|
|
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
|
|
|
|
|
const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
|
|
|
|
|
if (try header.checkChksum() == 0) return null;
|
|
|
|
|
return header;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-22 22:45:07 +00:00
|
|
|
|
fn readString(self: *Self, size: usize, buffer: []u8) ![]const u8 {
|
2024-03-01 23:39:48 +00:00
|
|
|
|
if (size > buffer.len) return error.TarInsufficientBuffer;
|
2023-12-11 21:00:49 +00:00
|
|
|
|
const buf = buffer[0..size];
|
|
|
|
|
try self.reader.readNoEof(buf);
|
|
|
|
|
return nullStr(buf);
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-02 16:52:31 +00:00
|
|
|
|
fn newFile(self: *Self) File {
|
|
|
|
|
return .{
|
2023-12-11 21:00:49 +00:00
|
|
|
|
.name = self.file_name_buffer[0..0],
|
|
|
|
|
.link_name = self.link_name_buffer[0..0],
|
2024-03-10 15:31:10 +00:00
|
|
|
|
.parent_reader = self.reader,
|
2024-03-02 16:52:31 +00:00
|
|
|
|
.unread_bytes = &self.unread_file_bytes,
|
2023-12-11 21:00:49 +00:00
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Number of padding bytes in the last file block.
|
2024-02-22 22:45:07 +00:00
|
|
|
|
fn blockPadding(size: u64) usize {
|
2023-12-12 13:18:20 +00:00
|
|
|
|
const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
|
|
|
|
|
return @intCast(block_rounded - size);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Iterates through the tar archive as if it is a series of files.
|
|
|
|
|
/// Internally, the tar format often uses entries (header with optional
|
|
|
|
|
/// content) to add meta data that describes the next file. These
|
|
|
|
|
/// entries should not normally be visible to the outside. As such, this
|
|
|
|
|
/// loop iterates through one or more entries until it collects a all
|
|
|
|
|
/// file attributes.
|
|
|
|
|
pub fn next(self: *Self) !?File {
|
2024-03-02 16:52:31 +00:00
|
|
|
|
if (self.unread_file_bytes > 0) {
|
|
|
|
|
// If file content was not consumed by caller
|
|
|
|
|
try self.reader.skipBytes(self.unread_file_bytes, .{});
|
|
|
|
|
self.unread_file_bytes = 0;
|
|
|
|
|
}
|
|
|
|
|
var file: File = self.newFile();
|
2023-12-11 21:00:49 +00:00
|
|
|
|
|
|
|
|
|
while (try self.readHeader()) |header| {
|
|
|
|
|
const kind = header.kind();
|
2023-12-12 13:18:20 +00:00
|
|
|
|
const size: u64 = try header.size();
|
2023-12-11 21:00:49 +00:00
|
|
|
|
self.padding = blockPadding(size);
|
|
|
|
|
|
|
|
|
|
switch (kind) {
|
|
|
|
|
// File types to retrun upstream
|
|
|
|
|
.directory, .normal, .symbolic_link => {
|
2024-03-02 16:52:31 +00:00
|
|
|
|
file.kind = switch (kind) {
|
|
|
|
|
.directory => .directory,
|
|
|
|
|
.normal => .file,
|
|
|
|
|
.symbolic_link => .sym_link,
|
|
|
|
|
else => unreachable,
|
|
|
|
|
};
|
|
|
|
|
file.mode = try header.mode();
|
2023-12-11 21:00:49 +00:00
|
|
|
|
|
|
|
|
|
// set file attributes if not already set by prefix/extended headers
|
2024-03-02 16:52:31 +00:00
|
|
|
|
if (file.size == 0) {
|
|
|
|
|
file.size = size;
|
2023-12-11 21:00:49 +00:00
|
|
|
|
}
|
2024-03-02 16:52:31 +00:00
|
|
|
|
if (file.link_name.len == 0) {
|
|
|
|
|
file.link_name = try header.linkName(self.link_name_buffer);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
}
|
2024-03-02 16:52:31 +00:00
|
|
|
|
if (file.name.len == 0) {
|
|
|
|
|
file.name = try header.fullName(self.file_name_buffer);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
}
|
|
|
|
|
|
2024-03-02 16:52:31 +00:00
|
|
|
|
self.padding = blockPadding(file.size);
|
|
|
|
|
self.unread_file_bytes = file.size;
|
|
|
|
|
return file;
|
2023-12-11 21:00:49 +00:00
|
|
|
|
},
|
|
|
|
|
// Prefix header types
|
|
|
|
|
.gnu_long_name => {
|
2024-03-02 16:52:31 +00:00
|
|
|
|
file.name = try self.readString(@intCast(size), self.file_name_buffer);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
},
|
|
|
|
|
.gnu_long_link => {
|
2024-03-02 16:52:31 +00:00
|
|
|
|
file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
},
|
|
|
|
|
.extended_header => {
|
|
|
|
|
// Use just attributes from last extended header.
|
2024-03-02 16:52:31 +00:00
|
|
|
|
file = self.newFile();
|
2023-12-11 21:00:49 +00:00
|
|
|
|
|
2023-12-12 17:50:25 +00:00
|
|
|
|
var rdr = paxIterator(self.reader, @intCast(size));
|
2023-12-11 21:00:49 +00:00
|
|
|
|
while (try rdr.next()) |attr| {
|
|
|
|
|
switch (attr.kind) {
|
|
|
|
|
.path => {
|
2024-03-02 16:52:31 +00:00
|
|
|
|
file.name = try attr.value(self.file_name_buffer);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
},
|
|
|
|
|
.linkpath => {
|
2024-03-02 16:52:31 +00:00
|
|
|
|
file.link_name = try attr.value(self.link_name_buffer);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
},
|
|
|
|
|
.size => {
|
2024-02-24 15:22:54 +00:00
|
|
|
|
var buf: [pax_max_size_attr_len]u8 = undefined;
|
2024-03-02 16:52:31 +00:00
|
|
|
|
file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
// Ignored header type
|
|
|
|
|
.global_extended_header => {
|
|
|
|
|
self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
|
|
|
|
|
},
|
|
|
|
|
// All other are unsupported header types
|
|
|
|
|
else => {
|
|
|
|
|
const d = self.diagnostics orelse return error.TarUnsupportedHeader;
|
|
|
|
|
try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
|
|
|
|
|
.file_name = try d.allocator.dupe(u8, header.name()),
|
|
|
|
|
.file_type = kind,
|
|
|
|
|
} });
|
2023-12-18 20:39:07 +00:00
|
|
|
|
if (kind == .gnu_sparse) {
|
|
|
|
|
try self.skipGnuSparseExtendedHeaders(header);
|
|
|
|
|
}
|
|
|
|
|
self.reader.skipBytes(size, .{}) catch return error.TarHeadersTooBig;
|
2023-12-11 21:00:49 +00:00
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
2023-12-18 20:39:07 +00:00
|
|
|
|
|
|
|
|
|
fn skipGnuSparseExtendedHeaders(self: *Self, header: Header) !void {
|
|
|
|
|
var is_extended = header.bytes[482] > 0;
|
|
|
|
|
while (is_extended) {
|
|
|
|
|
var buf: [Header.SIZE]u8 = undefined;
|
|
|
|
|
const n = try self.reader.readAll(&buf);
|
|
|
|
|
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
|
|
|
|
|
is_extended = buf[504] > 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-12-11 21:00:49 +00:00
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-12 17:50:25 +00:00
|
|
|
|
/// Pax attributes iterator.
|
|
|
|
|
/// Size is length of pax extended header in reader.
|
|
|
|
|
fn paxIterator(reader: anytype, size: usize) PaxIterator(@TypeOf(reader)) {
|
|
|
|
|
return PaxIterator(@TypeOf(reader)){
|
2023-12-11 21:00:49 +00:00
|
|
|
|
.reader = reader,
|
|
|
|
|
.size = size,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const PaxAttributeKind = enum {
|
|
|
|
|
path,
|
|
|
|
|
linkpath,
|
|
|
|
|
size,
|
|
|
|
|
};
|
|
|
|
|
|
2024-02-24 15:22:54 +00:00
|
|
|
|
// maxInt(u64) has 20 chars, base 10 in practice we got 24 chars
|
|
|
|
|
const pax_max_size_attr_len = 64;
|
|
|
|
|
|
2023-12-12 17:50:25 +00:00
|
|
|
|
fn PaxIterator(comptime ReaderType: type) type {
|
2023-12-11 21:00:49 +00:00
|
|
|
|
return struct {
|
|
|
|
|
size: usize, // cumulative size of all pax attributes
|
|
|
|
|
reader: ReaderType,
|
|
|
|
|
// scratch buffer used for reading attribute length and keyword
|
|
|
|
|
scratch: [128]u8 = undefined,
|
|
|
|
|
|
|
|
|
|
const Self = @This();
|
|
|
|
|
|
|
|
|
|
const Attribute = struct {
|
|
|
|
|
kind: PaxAttributeKind,
|
|
|
|
|
len: usize, // length of the attribute value
|
|
|
|
|
reader: ReaderType, // reader positioned at value start
|
|
|
|
|
|
|
|
|
|
// Copies pax attribute value into destination buffer.
|
|
|
|
|
// Must be called with destination buffer of size at least Attribute.len.
|
|
|
|
|
pub fn value(self: Attribute, dst: []u8) ![]const u8 {
|
2024-03-01 23:39:48 +00:00
|
|
|
|
if (self.len > dst.len) return error.TarInsufficientBuffer;
|
|
|
|
|
// assert(self.len <= dst.len);
|
2023-12-11 21:00:49 +00:00
|
|
|
|
const buf = dst[0..self.len];
|
|
|
|
|
const n = try self.reader.readAll(buf);
|
|
|
|
|
if (n < self.len) return error.UnexpectedEndOfStream;
|
|
|
|
|
try validateAttributeEnding(self.reader);
|
|
|
|
|
if (hasNull(buf)) return error.PaxNullInValue;
|
|
|
|
|
return buf;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Iterates over pax attributes. Returns known only known attributes.
|
|
|
|
|
// Caller has to call value in Attribute, to advance reader across value.
|
|
|
|
|
pub fn next(self: *Self) !?Attribute {
|
|
|
|
|
// Pax extended header consists of one or more attributes, each constructed as follows:
|
|
|
|
|
// "%d %s=%s\n", <length>, <keyword>, <value>
|
|
|
|
|
while (self.size > 0) {
|
|
|
|
|
const length_buf = try self.readUntil(' ');
|
|
|
|
|
const length = try std.fmt.parseInt(usize, length_buf, 10); // record length in bytes
|
|
|
|
|
|
|
|
|
|
const keyword = try self.readUntil('=');
|
|
|
|
|
if (hasNull(keyword)) return error.PaxNullInKeyword;
|
|
|
|
|
|
|
|
|
|
// calculate value_len
|
|
|
|
|
const value_start = length_buf.len + keyword.len + 2; // 2 separators
|
|
|
|
|
if (length < value_start + 1 or self.size < length) return error.UnexpectedEndOfStream;
|
|
|
|
|
const value_len = length - value_start - 1; // \n separator at end
|
|
|
|
|
self.size -= length;
|
|
|
|
|
|
|
|
|
|
const kind: PaxAttributeKind = if (eql(keyword, "path"))
|
|
|
|
|
.path
|
|
|
|
|
else if (eql(keyword, "linkpath"))
|
|
|
|
|
.linkpath
|
|
|
|
|
else if (eql(keyword, "size"))
|
|
|
|
|
.size
|
|
|
|
|
else {
|
|
|
|
|
try self.reader.skipBytes(value_len, .{});
|
|
|
|
|
try validateAttributeEnding(self.reader);
|
|
|
|
|
continue;
|
|
|
|
|
};
|
2024-02-24 15:22:54 +00:00
|
|
|
|
if (kind == .size and value_len > pax_max_size_attr_len) {
|
|
|
|
|
return error.PaxSizeAttrOverflow;
|
|
|
|
|
}
|
2023-12-11 21:00:49 +00:00
|
|
|
|
return Attribute{
|
|
|
|
|
.kind = kind,
|
|
|
|
|
.len = value_len,
|
|
|
|
|
.reader = self.reader,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-22 22:45:07 +00:00
|
|
|
|
fn readUntil(self: *Self, delimiter: u8) ![]const u8 {
|
2023-12-11 21:00:49 +00:00
|
|
|
|
var fbs = std.io.fixedBufferStream(&self.scratch);
|
|
|
|
|
try self.reader.streamUntilDelimiter(fbs.writer(), delimiter, null);
|
|
|
|
|
return fbs.getWritten();
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-22 22:45:07 +00:00
|
|
|
|
fn eql(a: []const u8, b: []const u8) bool {
|
2023-12-11 21:00:49 +00:00
|
|
|
|
return std.mem.eql(u8, a, b);
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-22 22:45:07 +00:00
|
|
|
|
fn hasNull(str: []const u8) bool {
|
2023-12-11 21:00:49 +00:00
|
|
|
|
return (std.mem.indexOfScalar(u8, str, 0)) != null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Checks that each record ends with new line.
|
2024-02-22 22:45:07 +00:00
|
|
|
|
fn validateAttributeEnding(reader: ReaderType) !void {
|
2023-12-11 21:00:49 +00:00
|
|
|
|
if (try reader.readByte() != '\n') return error.PaxInvalidAttributeEnd;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-10 15:31:10 +00:00
|
|
|
|
/// Saves tar file content to the file systems.
|
|
|
|
|
pub fn pipeToFileSystem(dir: std.fs.Dir, reader: anytype, options: PipeOptions) !void {
|
2023-02-02 01:42:29 +00:00
|
|
|
|
switch (options.mode_mode) {
|
|
|
|
|
.ignore => {},
|
|
|
|
|
.executable_bit_only => {
|
|
|
|
|
// This code does not look at the mode bits yet. To implement this feature,
|
|
|
|
|
// the implementation must be adjusted to look at the mode, and check the
|
|
|
|
|
// user executable bit, then call fchmod on newly created files when
|
|
|
|
|
// the executable bit is supposed to be set.
|
|
|
|
|
// It also needs to properly deal with ACLs on Windows.
|
|
|
|
|
@panic("TODO: unimplemented: tar ModeMode.executable_bit_only");
|
|
|
|
|
},
|
|
|
|
|
}
|
2023-11-27 13:51:51 +00:00
|
|
|
|
|
2024-02-29 03:32:09 +00:00
|
|
|
|
var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
|
|
|
|
|
var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
|
2024-03-02 09:59:25 +00:00
|
|
|
|
var iter = iterator(reader, .{
|
2024-02-29 03:32:09 +00:00
|
|
|
|
.file_name_buffer = &file_name_buffer,
|
|
|
|
|
.link_name_buffer = &link_name_buffer,
|
|
|
|
|
.diagnostics = options.diagnostics,
|
|
|
|
|
});
|
2023-11-27 20:37:30 +00:00
|
|
|
|
while (try iter.next()) |file| {
|
2023-12-11 19:46:27 +00:00
|
|
|
|
switch (file.kind) {
|
2023-01-10 05:36:35 +00:00
|
|
|
|
.directory => {
|
2023-11-30 15:51:29 +00:00
|
|
|
|
const file_name = stripComponents(file.name, options.strip_components);
|
2023-10-04 06:25:04 +00:00
|
|
|
|
if (file_name.len != 0 and !options.exclude_empty_directories) {
|
2023-04-09 23:05:17 +00:00
|
|
|
|
try dir.makePath(file_name);
|
2023-01-10 07:38:18 +00:00
|
|
|
|
}
|
2023-01-10 05:36:35 +00:00
|
|
|
|
},
|
2024-03-02 16:52:31 +00:00
|
|
|
|
.file => {
|
2023-11-27 20:37:30 +00:00
|
|
|
|
if (file.size == 0 and file.name.len == 0) return;
|
2023-11-30 15:51:29 +00:00
|
|
|
|
const file_name = stripComponents(file.name, options.strip_components);
|
|
|
|
|
if (file_name.len == 0) return error.BadFileName;
|
2023-01-10 05:36:35 +00:00
|
|
|
|
|
2024-02-25 11:03:23 +00:00
|
|
|
|
if (createDirAndFile(dir, file_name)) |fs_file| {
|
|
|
|
|
defer fs_file.close();
|
2024-03-02 16:52:31 +00:00
|
|
|
|
try file.writeAll(fs_file);
|
2024-02-25 11:03:23 +00:00
|
|
|
|
} else |err| {
|
|
|
|
|
const d = options.diagnostics orelse return err;
|
|
|
|
|
try d.errors.append(d.allocator, .{ .unable_to_create_file = .{
|
|
|
|
|
.code = err,
|
|
|
|
|
.file_name = try d.allocator.dupe(u8, file_name),
|
|
|
|
|
} });
|
2023-01-10 05:36:35 +00:00
|
|
|
|
}
|
|
|
|
|
},
|
2024-03-02 16:52:31 +00:00
|
|
|
|
.sym_link => {
|
2023-10-08 06:09:39 +00:00
|
|
|
|
// The file system path of the symbolic link.
|
2023-11-30 15:51:29 +00:00
|
|
|
|
const file_name = stripComponents(file.name, options.strip_components);
|
|
|
|
|
if (file_name.len == 0) return error.BadFileName;
|
2023-10-08 06:09:39 +00:00
|
|
|
|
// The data inside the symbolic link.
|
2023-11-27 20:37:30 +00:00
|
|
|
|
const link_name = file.link_name;
|
2023-10-01 06:00:39 +00:00
|
|
|
|
|
2024-02-25 11:03:23 +00:00
|
|
|
|
createDirAndSymlink(dir, link_name, file_name) catch |err| {
|
2023-10-03 00:00:45 +00:00
|
|
|
|
const d = options.diagnostics orelse return error.UnableToCreateSymLink;
|
|
|
|
|
try d.errors.append(d.allocator, .{ .unable_to_create_sym_link = .{
|
2024-02-25 11:03:23 +00:00
|
|
|
|
.code = err,
|
2023-10-03 00:00:45 +00:00
|
|
|
|
.file_name = try d.allocator.dupe(u8, file_name),
|
|
|
|
|
.link_name = try d.allocator.dupe(u8, link_name),
|
|
|
|
|
} });
|
2023-10-01 06:00:39 +00:00
|
|
|
|
};
|
|
|
|
|
},
|
2023-01-10 05:36:35 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-25 11:03:23 +00:00
|
|
|
|
fn createDirAndFile(dir: std.fs.Dir, file_name: []const u8) !std.fs.File {
|
|
|
|
|
const fs_file = dir.createFile(file_name, .{ .exclusive = true }) catch |err| {
|
|
|
|
|
if (err == error.FileNotFound) {
|
|
|
|
|
if (std.fs.path.dirname(file_name)) |dir_name| {
|
|
|
|
|
try dir.makePath(dir_name);
|
|
|
|
|
return try dir.createFile(file_name, .{ .exclusive = true });
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return err;
|
|
|
|
|
};
|
|
|
|
|
return fs_file;
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-01 20:37:47 +00:00
|
|
|
|
// Creates a symbolic link at path `file_name` which points to `link_name`.
|
2024-02-25 11:03:23 +00:00
|
|
|
|
fn createDirAndSymlink(dir: std.fs.Dir, link_name: []const u8, file_name: []const u8) !void {
|
|
|
|
|
dir.symLink(link_name, file_name, .{}) catch |err| {
|
|
|
|
|
if (err == error.FileNotFound) {
|
|
|
|
|
if (std.fs.path.dirname(file_name)) |dir_name| {
|
|
|
|
|
try dir.makePath(dir_name);
|
2024-02-29 00:22:49 +00:00
|
|
|
|
return try dir.symLink(link_name, file_name, .{});
|
2024-02-25 11:03:23 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2024-02-25 23:24:23 +00:00
|
|
|
|
return err;
|
2024-02-25 11:03:23 +00:00
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-30 15:51:29 +00:00
|
|
|
|
fn stripComponents(path: []const u8, count: u32) []const u8 {
|
2023-01-10 07:38:18 +00:00
|
|
|
|
var i: usize = 0;
|
|
|
|
|
var c = count;
|
|
|
|
|
while (c > 0) : (c -= 1) {
|
|
|
|
|
if (std.mem.indexOfScalarPos(u8, path, i, '/')) |pos| {
|
|
|
|
|
i = pos + 1;
|
|
|
|
|
} else {
|
2023-11-30 15:51:29 +00:00
|
|
|
|
i = path.len;
|
|
|
|
|
break;
|
2023-01-10 07:38:18 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return path[i..];
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-13 22:56:09 +00:00
|
|
|
|
test stripComponents {
|
2024-03-10 15:31:10 +00:00
|
|
|
|
const expectEqualStrings = testing.expectEqualStrings;
|
2023-11-30 15:51:29 +00:00
|
|
|
|
try expectEqualStrings("a/b/c", stripComponents("a/b/c", 0));
|
|
|
|
|
try expectEqualStrings("b/c", stripComponents("a/b/c", 1));
|
|
|
|
|
try expectEqualStrings("c", stripComponents("a/b/c", 2));
|
|
|
|
|
try expectEqualStrings("", stripComponents("a/b/c", 3));
|
|
|
|
|
try expectEqualStrings("", stripComponents("a/b/c", 4));
|
2023-01-10 07:38:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
2024-03-13 22:56:09 +00:00
|
|
|
|
test PaxIterator {
|
2023-12-11 14:48:43 +00:00
|
|
|
|
const Attr = struct {
|
2023-12-11 16:47:19 +00:00
|
|
|
|
kind: PaxAttributeKind,
|
2023-12-11 14:48:43 +00:00
|
|
|
|
value: []const u8 = undefined,
|
|
|
|
|
err: ?anyerror = null,
|
2023-12-05 16:08:45 +00:00
|
|
|
|
};
|
|
|
|
|
const cases = [_]struct {
|
|
|
|
|
data: []const u8,
|
2023-12-11 14:48:43 +00:00
|
|
|
|
attrs: []const Attr,
|
2023-12-05 16:08:45 +00:00
|
|
|
|
err: ?anyerror = null,
|
|
|
|
|
}{
|
|
|
|
|
.{ // valid but unknown keys
|
|
|
|
|
.data =
|
|
|
|
|
\\30 mtime=1350244992.023960108
|
|
|
|
|
\\6 k=1
|
|
|
|
|
\\13 key1=val1
|
|
|
|
|
\\10 a=name
|
|
|
|
|
\\9 a=name
|
|
|
|
|
\\
|
|
|
|
|
,
|
2023-12-11 14:48:43 +00:00
|
|
|
|
.attrs = &[_]Attr{},
|
2023-12-05 16:08:45 +00:00
|
|
|
|
},
|
|
|
|
|
.{ // mix of known and unknown keys
|
|
|
|
|
.data =
|
|
|
|
|
\\6 k=1
|
|
|
|
|
\\13 path=name
|
|
|
|
|
\\17 linkpath=link
|
|
|
|
|
\\13 key1=val1
|
|
|
|
|
\\12 size=123
|
|
|
|
|
\\13 key2=val2
|
|
|
|
|
\\
|
|
|
|
|
,
|
2023-12-11 14:48:43 +00:00
|
|
|
|
.attrs = &[_]Attr{
|
|
|
|
|
.{ .kind = .path, .value = "name" },
|
|
|
|
|
.{ .kind = .linkpath, .value = "link" },
|
|
|
|
|
.{ .kind = .size, .value = "123" },
|
2023-12-05 16:08:45 +00:00
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
.{ // too short size of the second key-value pair
|
|
|
|
|
.data =
|
|
|
|
|
\\13 path=name
|
|
|
|
|
\\10 linkpath=value
|
|
|
|
|
\\
|
|
|
|
|
,
|
2023-12-11 14:48:43 +00:00
|
|
|
|
.attrs = &[_]Attr{
|
|
|
|
|
.{ .kind = .path, .value = "name" },
|
2023-12-05 16:08:45 +00:00
|
|
|
|
},
|
2023-12-11 16:47:19 +00:00
|
|
|
|
.err = error.UnexpectedEndOfStream,
|
|
|
|
|
},
|
|
|
|
|
.{ // too long size of the second key-value pair
|
|
|
|
|
.data =
|
|
|
|
|
\\13 path=name
|
|
|
|
|
\\6 k=1
|
|
|
|
|
\\19 linkpath=value
|
|
|
|
|
\\
|
|
|
|
|
,
|
|
|
|
|
.attrs = &[_]Attr{
|
|
|
|
|
.{ .kind = .path, .value = "name" },
|
|
|
|
|
},
|
|
|
|
|
.err = error.UnexpectedEndOfStream,
|
2023-12-05 16:08:45 +00:00
|
|
|
|
},
|
2023-12-11 16:47:19 +00:00
|
|
|
|
|
2023-12-05 16:08:45 +00:00
|
|
|
|
.{ // too long size of the second key-value pair
|
|
|
|
|
.data =
|
|
|
|
|
\\13 path=name
|
|
|
|
|
\\19 linkpath=value
|
2023-12-11 14:48:43 +00:00
|
|
|
|
\\6 k=1
|
2023-12-05 16:08:45 +00:00
|
|
|
|
\\
|
|
|
|
|
,
|
2023-12-11 14:48:43 +00:00
|
|
|
|
.attrs = &[_]Attr{
|
|
|
|
|
.{ .kind = .path, .value = "name" },
|
2023-12-11 16:47:19 +00:00
|
|
|
|
.{ .kind = .linkpath, .err = error.PaxInvalidAttributeEnd },
|
2023-12-11 14:48:43 +00:00
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
.{ // null in keyword is not valid
|
|
|
|
|
.data = "13 path=name\n" ++ "7 k\x00b=1\n",
|
|
|
|
|
.attrs = &[_]Attr{
|
|
|
|
|
.{ .kind = .path, .value = "name" },
|
2023-12-05 16:08:45 +00:00
|
|
|
|
},
|
2023-12-11 16:47:19 +00:00
|
|
|
|
.err = error.PaxNullInKeyword,
|
2023-12-05 16:08:45 +00:00
|
|
|
|
},
|
2023-12-11 14:48:43 +00:00
|
|
|
|
.{ // null in value is not valid
|
|
|
|
|
.data = "23 path=name\x00with null\n",
|
|
|
|
|
.attrs = &[_]Attr{
|
2023-12-11 16:47:19 +00:00
|
|
|
|
.{ .kind = .path, .err = error.PaxNullInValue },
|
2023-12-11 14:48:43 +00:00
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
.{ // 1000 characters path
|
|
|
|
|
.data = "1011 path=" ++ "0123456789" ** 100 ++ "\n",
|
|
|
|
|
.attrs = &[_]Attr{
|
|
|
|
|
.{ .kind = .path, .value = "0123456789" ** 100 },
|
|
|
|
|
},
|
|
|
|
|
},
|
2023-12-05 16:08:45 +00:00
|
|
|
|
};
|
|
|
|
|
var buffer: [1024]u8 = undefined;
|
|
|
|
|
|
2023-12-11 14:48:43 +00:00
|
|
|
|
outer: for (cases) |case| {
|
2023-12-05 16:08:45 +00:00
|
|
|
|
var stream = std.io.fixedBufferStream(case.data);
|
2023-12-12 17:50:25 +00:00
|
|
|
|
var iter = paxIterator(stream.reader(), case.data.len);
|
2023-12-05 16:08:45 +00:00
|
|
|
|
|
|
|
|
|
var i: usize = 0;
|
2023-12-12 17:50:25 +00:00
|
|
|
|
while (iter.next() catch |err| {
|
2023-12-05 16:08:45 +00:00
|
|
|
|
if (case.err) |e| {
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectEqual(e, err);
|
2023-12-05 16:08:45 +00:00
|
|
|
|
continue;
|
|
|
|
|
}
|
2023-12-11 14:48:43 +00:00
|
|
|
|
return err;
|
2023-12-05 16:08:45 +00:00
|
|
|
|
}) |attr| : (i += 1) {
|
2023-12-11 14:48:43 +00:00
|
|
|
|
const exp = case.attrs[i];
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectEqual(exp.kind, attr.kind);
|
2023-12-11 14:48:43 +00:00
|
|
|
|
const value = attr.value(&buffer) catch |err| {
|
|
|
|
|
if (exp.err) |e| {
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectEqual(e, err);
|
2023-12-11 14:48:43 +00:00
|
|
|
|
break :outer;
|
|
|
|
|
}
|
|
|
|
|
return err;
|
|
|
|
|
};
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectEqualStrings(exp.value, value);
|
2023-12-05 16:08:45 +00:00
|
|
|
|
}
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectEqual(case.attrs.len, i);
|
|
|
|
|
try testing.expect(case.err == null);
|
2023-12-05 16:08:45 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2023-12-11 22:55:07 +00:00
|
|
|
|
|
|
|
|
|
test {
|
|
|
|
|
_ = @import("tar/test.zig");
|
|
|
|
|
}
|
2024-02-23 20:57:15 +00:00
|
|
|
|
|
2024-03-01 23:50:39 +00:00
|
|
|
|
test "header parse size" {
|
2024-02-23 20:57:15 +00:00
|
|
|
|
const cases = [_]struct {
|
|
|
|
|
in: []const u8,
|
|
|
|
|
want: u64 = 0,
|
|
|
|
|
err: ?anyerror = null,
|
|
|
|
|
}{
|
|
|
|
|
// Test base-256 (binary) encoded values.
|
|
|
|
|
.{ .in = "", .want = 0 },
|
|
|
|
|
.{ .in = "\x80", .want = 0 },
|
|
|
|
|
.{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", .want = 1 },
|
|
|
|
|
.{ .in = "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02", .want = 0x0102 },
|
|
|
|
|
.{ .in = "\x80\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08", .want = 0x0102030405060708 },
|
|
|
|
|
.{ .in = "\x80\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09", .err = error.TarNumericValueTooBig },
|
|
|
|
|
.{ .in = "\x80\x00\x00\x00\x07\x76\xa2\x22\xeb\x8a\x72\x61", .want = 537795476381659745 },
|
2024-02-23 20:57:40 +00:00
|
|
|
|
.{ .in = "\x80\x80\x80\x00\x01\x02\x03\x04\x05\x06\x07\x08", .err = error.TarNumericValueTooBig },
|
2024-02-23 20:57:15 +00:00
|
|
|
|
|
|
|
|
|
// // Test base-8 (octal) encoded values.
|
|
|
|
|
.{ .in = "00000000227\x00", .want = 0o227 },
|
|
|
|
|
.{ .in = " 000000227\x00", .want = 0o227 },
|
|
|
|
|
.{ .in = "00000000228\x00", .err = error.TarHeader },
|
|
|
|
|
.{ .in = "11111111111\x00", .want = 0o11111111111 },
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (cases) |case| {
|
|
|
|
|
var bytes = [_]u8{0} ** Header.SIZE;
|
|
|
|
|
@memcpy(bytes[124 .. 124 + case.in.len], case.in);
|
|
|
|
|
var header = Header{ .bytes = &bytes };
|
|
|
|
|
if (case.err) |err| {
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectError(err, header.size());
|
2024-02-23 20:57:15 +00:00
|
|
|
|
} else {
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectEqual(case.want, try header.size());
|
2024-02-23 20:57:15 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-01 23:50:39 +00:00
|
|
|
|
test "header parse mode" {
|
2024-02-23 20:57:15 +00:00
|
|
|
|
const cases = [_]struct {
|
|
|
|
|
in: []const u8,
|
|
|
|
|
want: u64 = 0,
|
|
|
|
|
err: ?anyerror = null,
|
|
|
|
|
}{
|
|
|
|
|
.{ .in = "0000644\x00", .want = 0o644 },
|
|
|
|
|
.{ .in = "0000777\x00", .want = 0o777 },
|
|
|
|
|
.{ .in = "7777777\x00", .want = 0o7777777 },
|
|
|
|
|
.{ .in = "7777778\x00", .err = error.TarHeader },
|
|
|
|
|
.{ .in = "77777777", .want = 0o77777777 },
|
|
|
|
|
.{ .in = "777777777777", .want = 0o77777777 },
|
|
|
|
|
};
|
|
|
|
|
for (cases) |case| {
|
|
|
|
|
var bytes = [_]u8{0} ** Header.SIZE;
|
|
|
|
|
@memcpy(bytes[100 .. 100 + case.in.len], case.in);
|
|
|
|
|
var header = Header{ .bytes = &bytes };
|
|
|
|
|
if (case.err) |err| {
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectError(err, header.mode());
|
2024-02-23 20:57:15 +00:00
|
|
|
|
} else {
|
2024-03-10 15:31:10 +00:00
|
|
|
|
try testing.expectEqual(case.want, try header.mode());
|
2024-02-23 20:57:15 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-03-01 20:37:47 +00:00
|
|
|
|
|
|
|
|
|
test "create file and symlink" {
|
2024-03-10 15:31:10 +00:00
|
|
|
|
var root = testing.tmpDir(.{});
|
2024-03-01 20:37:47 +00:00
|
|
|
|
defer root.cleanup();
|
|
|
|
|
|
2024-03-09 12:23:07 +00:00
|
|
|
|
var file = try createDirAndFile(root.dir, "file1");
|
|
|
|
|
file.close();
|
|
|
|
|
file = try createDirAndFile(root.dir, "a/b/c/file2");
|
|
|
|
|
file.close();
|
2024-03-01 20:37:47 +00:00
|
|
|
|
|
2024-03-09 12:23:07 +00:00
|
|
|
|
createDirAndSymlink(root.dir, "a/b/c/file2", "symlink1") catch |err| {
|
2024-03-01 20:37:47 +00:00
|
|
|
|
// On Windows when developer mode is not enabled
|
|
|
|
|
if (err == error.AccessDenied) return error.SkipZigTest;
|
|
|
|
|
return err;
|
|
|
|
|
};
|
2024-03-09 12:23:07 +00:00
|
|
|
|
try createDirAndSymlink(root.dir, "../../../file1", "d/e/f/symlink2");
|
2024-03-01 20:37:47 +00:00
|
|
|
|
|
|
|
|
|
// Danglink symlnik, file created later
|
2024-03-09 12:23:07 +00:00
|
|
|
|
try createDirAndSymlink(root.dir, "../../../g/h/i/file4", "j/k/l/symlink3");
|
|
|
|
|
file = try createDirAndFile(root.dir, "g/h/i/file4");
|
|
|
|
|
file.close();
|
2024-03-01 20:37:47 +00:00
|
|
|
|
}
|
2024-03-10 15:31:10 +00:00
|
|
|
|
|
|
|
|
|
test iterator {
|
|
|
|
|
// Example tar file is created from this tree structure:
|
|
|
|
|
// $ tree example
|
|
|
|
|
// example
|
|
|
|
|
// ├── a
|
|
|
|
|
// │ └── file
|
|
|
|
|
// ├── b
|
|
|
|
|
// │ └── symlink -> ../a/file
|
|
|
|
|
// └── empty
|
|
|
|
|
// $ cat example/a/file
|
|
|
|
|
// content
|
|
|
|
|
// $ tar -cf example.tar example
|
|
|
|
|
// $ tar -tvf example.tar
|
|
|
|
|
// example/
|
|
|
|
|
// example/b/
|
|
|
|
|
// example/b/symlink -> ../a/file
|
|
|
|
|
// example/a/
|
|
|
|
|
// example/a/file
|
|
|
|
|
// example/empty/
|
|
|
|
|
|
|
|
|
|
const data = @embedFile("tar/testdata/example.tar");
|
|
|
|
|
var fbs = std.io.fixedBufferStream(data);
|
|
|
|
|
|
|
|
|
|
// User provided buffers to the iterator
|
|
|
|
|
var file_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
|
|
|
|
|
var link_name_buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined;
|
|
|
|
|
// Create iterator
|
|
|
|
|
var iter = iterator(fbs.reader(), .{
|
|
|
|
|
.file_name_buffer = &file_name_buffer,
|
|
|
|
|
.link_name_buffer = &link_name_buffer,
|
|
|
|
|
});
|
|
|
|
|
// Iterate over files in example.tar
|
|
|
|
|
var file_no: usize = 0;
|
|
|
|
|
while (try iter.next()) |file| : (file_no += 1) {
|
|
|
|
|
switch (file.kind) {
|
|
|
|
|
.directory => {
|
|
|
|
|
switch (file_no) {
|
|
|
|
|
0 => try testing.expectEqualStrings("example/", file.name),
|
|
|
|
|
1 => try testing.expectEqualStrings("example/b/", file.name),
|
|
|
|
|
3 => try testing.expectEqualStrings("example/a/", file.name),
|
|
|
|
|
5 => try testing.expectEqualStrings("example/empty/", file.name),
|
|
|
|
|
else => unreachable,
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
.file => {
|
|
|
|
|
try testing.expectEqualStrings("example/a/file", file.name);
|
|
|
|
|
// Read file content
|
|
|
|
|
var buf: [16]u8 = undefined;
|
|
|
|
|
const n = try file.reader().readAll(&buf);
|
|
|
|
|
try testing.expectEqualStrings("content\n", buf[0..n]);
|
|
|
|
|
},
|
|
|
|
|
.sym_link => {
|
|
|
|
|
try testing.expectEqualStrings("example/b/symlink", file.name);
|
|
|
|
|
try testing.expectEqualStrings("../a/file", file.link_name);
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
test pipeToFileSystem {
|
|
|
|
|
// Example tar file is created from this tree structure:
|
|
|
|
|
// $ tree example
|
|
|
|
|
// example
|
|
|
|
|
// ├── a
|
|
|
|
|
// │ └── file
|
|
|
|
|
// ├── b
|
|
|
|
|
// │ └── symlink -> ../a/file
|
|
|
|
|
// └── empty
|
|
|
|
|
// $ cat example/a/file
|
|
|
|
|
// content
|
|
|
|
|
// $ tar -cf example.tar example
|
|
|
|
|
// $ tar -tvf example.tar
|
|
|
|
|
// example/
|
|
|
|
|
// example/b/
|
|
|
|
|
// example/b/symlink -> ../a/file
|
|
|
|
|
// example/a/
|
|
|
|
|
// example/a/file
|
|
|
|
|
// example/empty/
|
|
|
|
|
|
|
|
|
|
const data = @embedFile("tar/testdata/example.tar");
|
|
|
|
|
var fbs = std.io.fixedBufferStream(data);
|
|
|
|
|
const reader = fbs.reader();
|
|
|
|
|
|
|
|
|
|
var tmp = testing.tmpDir(.{ .no_follow = true });
|
|
|
|
|
defer tmp.cleanup();
|
|
|
|
|
const dir = tmp.dir;
|
|
|
|
|
|
|
|
|
|
// Save tar from `reader` to the file system `dir`
|
|
|
|
|
pipeToFileSystem(dir, reader, .{
|
|
|
|
|
.mode_mode = .ignore,
|
|
|
|
|
.strip_components = 1,
|
|
|
|
|
.exclude_empty_directories = true,
|
|
|
|
|
}) catch |err| {
|
|
|
|
|
// Skip on platform which don't support symlinks
|
|
|
|
|
if (err == error.UnableToCreateSymLink) return error.SkipZigTest;
|
|
|
|
|
return err;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
try testing.expectError(error.FileNotFound, dir.statFile("empty"));
|
|
|
|
|
try testing.expect((try dir.statFile("a/file")).kind == .file);
|
|
|
|
|
try testing.expect((try dir.statFile("b/symlink")).kind == .file); // statFile follows symlink
|
|
|
|
|
|
|
|
|
|
var buf: [32]u8 = undefined;
|
|
|
|
|
try testing.expectEqualSlices(
|
|
|
|
|
u8,
|
|
|
|
|
"../a/file",
|
|
|
|
|
normalizePath(try dir.readLink("b/symlink", &buf)),
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn normalizePath(bytes: []u8) []u8 {
|
|
|
|
|
const canonical_sep = std.fs.path.sep_posix;
|
|
|
|
|
if (std.fs.path.sep == canonical_sep) return bytes;
|
|
|
|
|
std.mem.replaceScalar(u8, bytes, std.fs.path.sep, canonical_sep);
|
|
|
|
|
return bytes;
|
|
|
|
|
}
|