Uri: propagate per-component encoding

This allows `std.Uri.resolve_inplace` to properly preserve the fact
that `new` is already escaped but `base` may not be.  I originally tried
just moving `raw_uri` around, but it made uri resolution unmanagably
complicated, so I instead added per-component information to `Uri` which
allows extra allocations to be avoided when constructing uris with
components from different sources, and in some cases, deferring the work
all the way to when the uri is printed, where an allocator may not even
be needed.

Closes #19587
This commit is contained in:
Jacob Young 2024-04-09 22:22:08 -04:00 committed by Andrew Kelley
parent 215de3ee67
commit c4587dc9f4
7 changed files with 522 additions and 512 deletions

View File

@ -1,156 +1,157 @@
//! Uniform Resource Identifier (URI) parsing roughly adhering to <https://tools.ietf.org/html/rfc3986>.
//! Does not do perfect grammar and character class checking, but should be robust against URIs in the wild.
const Uri = @This();
const std = @import("std.zig");
const testing = std.testing;
const Allocator = std.mem.Allocator;
scheme: []const u8,
user: ?[]const u8 = null,
password: ?[]const u8 = null,
host: ?[]const u8 = null,
user: ?Component = null,
password: ?Component = null,
host: ?Component = null,
port: ?u16 = null,
path: []const u8,
query: ?[]const u8 = null,
fragment: ?[]const u8 = null,
path: Component = Component.empty,
query: ?Component = null,
fragment: ?Component = null,
/// Applies URI encoding and replaces all reserved characters with their respective %XX code.
pub fn escapeString(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 {
return escapeStringWithFn(allocator, input, isUnreserved);
}
pub const Component = union(enum) {
/// Invalid characters in this component must be percent encoded
/// before being printed as part of a URI.
raw: []const u8,
/// This component is already percent-encoded, it can be printed
/// directly as part of a URI.
percent_encoded: []const u8,
pub fn escapePath(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 {
return escapeStringWithFn(allocator, input, isPathChar);
}
pub const empty: Component = .{ .percent_encoded = "" };
pub fn escapeQuery(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 {
return escapeStringWithFn(allocator, input, isQueryChar);
}
pub fn writeEscapedString(writer: anytype, input: []const u8) !void {
return writeEscapedStringWithFn(writer, input, isUnreserved);
}
pub fn writeEscapedPath(writer: anytype, input: []const u8) !void {
return writeEscapedStringWithFn(writer, input, isPathChar);
}
pub fn writeEscapedQuery(writer: anytype, input: []const u8) !void {
return writeEscapedStringWithFn(writer, input, isQueryChar);
}
pub fn escapeStringWithFn(allocator: Allocator, input: []const u8, comptime keepUnescaped: fn (c: u8) bool) Allocator.Error![]u8 {
var outsize: usize = 0;
for (input) |c| {
outsize += if (keepUnescaped(c)) @as(usize, 1) else 3;
pub fn isEmpty(component: Component) bool {
return switch (component) {
.raw, .percent_encoded => |string| string.len == 0,
};
}
var output = try allocator.alloc(u8, outsize);
var outptr: usize = 0;
for (input) |c| {
if (keepUnescaped(c)) {
output[outptr] = c;
outptr += 1;
} else {
var buf: [2]u8 = undefined;
_ = std.fmt.bufPrint(&buf, "{X:0>2}", .{c}) catch unreachable;
/// Allocates the result with `arena` only if needed, so the result should not be freed.
pub fn toRawMaybeAlloc(
component: Component,
arena: std.mem.Allocator,
) std.mem.Allocator.Error![]const u8 {
return switch (component) {
.raw => |raw| raw,
.percent_encoded => |percent_encoded| if (std.mem.indexOfScalar(u8, percent_encoded, '%')) |_|
try std.fmt.allocPrint(arena, "{raw}", .{component})
else
percent_encoded,
};
}
output[outptr + 0] = '%';
output[outptr + 1] = buf[0];
output[outptr + 2] = buf[1];
outptr += 3;
pub fn format(
component: Component,
comptime fmt_str: []const u8,
_: std.fmt.FormatOptions,
writer: anytype,
) @TypeOf(writer).Error!void {
if (fmt_str.len == 0) {
try writer.print("std.Uri.Component{{ .{s} = \"{}\" }}", .{
@tagName(component),
std.zig.fmtEscapes(switch (component) {
.raw, .percent_encoded => |string| string,
}),
});
} else if (comptime std.mem.eql(u8, fmt_str, "raw")) switch (component) {
.raw => |raw| try writer.writeAll(raw),
.percent_encoded => |percent_encoded| {
var start: usize = 0;
var index: usize = 0;
while (std.mem.indexOfScalarPos(u8, percent_encoded, index, '%')) |percent| {
index = percent + 1;
if (percent_encoded.len - index < 2) continue;
const percent_encoded_char =
std.fmt.parseInt(u8, percent_encoded[index..][0..2], 16) catch continue;
try writer.print("{s}{c}", .{
percent_encoded[start..percent],
percent_encoded_char,
});
start = percent + 3;
index = percent + 3;
}
try writer.writeAll(percent_encoded[start..]);
},
} else if (comptime std.mem.eql(u8, fmt_str, "%")) switch (component) {
.raw => |raw| try percentEncode(writer, raw, isUnreserved),
.percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded),
} else if (comptime std.mem.eql(u8, fmt_str, "user")) switch (component) {
.raw => |raw| try percentEncode(writer, raw, isUserChar),
.percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded),
} else if (comptime std.mem.eql(u8, fmt_str, "password")) switch (component) {
.raw => |raw| try percentEncode(writer, raw, isPasswordChar),
.percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded),
} else if (comptime std.mem.eql(u8, fmt_str, "host")) switch (component) {
.raw => |raw| try percentEncode(writer, raw, isHostChar),
.percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded),
} else if (comptime std.mem.eql(u8, fmt_str, "path")) switch (component) {
.raw => |raw| try percentEncode(writer, raw, isPathChar),
.percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded),
} else if (comptime std.mem.eql(u8, fmt_str, "query")) switch (component) {
.raw => |raw| try percentEncode(writer, raw, isQueryChar),
.percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded),
} else if (comptime std.mem.eql(u8, fmt_str, "fragment")) switch (component) {
.raw => |raw| try percentEncode(writer, raw, isFragmentChar),
.percent_encoded => |percent_encoded| try writer.writeAll(percent_encoded),
} else @compileError("invalid format string '" ++ fmt_str ++ "'");
}
pub fn percentEncode(
writer: anytype,
raw: []const u8,
comptime isValidChar: fn (u8) bool,
) @TypeOf(writer).Error!void {
var start: usize = 0;
for (raw, 0..) |char, index| {
if (isValidChar(char)) continue;
try writer.print("{s}%{X:0>2}", .{ raw[start..index], char });
start = index + 1;
}
try writer.writeAll(raw[start..]);
}
return output;
}
};
pub fn writeEscapedStringWithFn(writer: anytype, input: []const u8, comptime keepUnescaped: fn (c: u8) bool) @TypeOf(writer).Error!void {
for (input) |c| {
if (keepUnescaped(c)) {
try writer.writeByte(c);
} else {
try writer.print("%{X:0>2}", .{c});
}
}
}
/// Parses a URI string and unescapes all %XX where XX is a valid hex number. Otherwise, verbatim copies
/// them to the output.
pub fn unescapeString(allocator: Allocator, input: []const u8) error{OutOfMemory}![]u8 {
var outsize: usize = 0;
var inptr: usize = 0;
while (inptr < input.len) {
if (input[inptr] == '%') {
inptr += 1;
if (inptr + 2 <= input.len) {
_ = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch {
outsize += 3;
inptr += 2;
/// Percent decodes all %XX where XX is a valid hex number.
/// `output` may alias `input` if `output.ptr <= input.ptr`.
/// Mutates and returns a subslice of `output`.
pub fn percentDecodeBackwards(output: []u8, input: []const u8) []u8 {
var input_index = input.len;
var output_index = output.len;
while (input_index > 0) {
if (input_index >= 3) {
const maybe_percent_encoded = input[input_index - 3 ..][0..3];
if (maybe_percent_encoded[0] == '%') {
if (std.fmt.parseInt(u8, maybe_percent_encoded[1..], 16)) |percent_encoded_char| {
input_index -= maybe_percent_encoded.len;
output_index -= 1;
output[output_index] = percent_encoded_char;
continue;
};
inptr += 2;
outsize += 1;
} else {
outsize += 1;
} else |_| {}
}
} else {
inptr += 1;
outsize += 1;
}
input_index -= 1;
output_index -= 1;
output[output_index] = input[input_index];
}
return output[output_index..];
}
var output = try allocator.alloc(u8, outsize);
var outptr: usize = 0;
inptr = 0;
while (inptr < input.len) {
if (input[inptr] == '%') {
inptr += 1;
if (inptr + 2 <= input.len) {
const value = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch {
output[outptr + 0] = input[inptr + 0];
output[outptr + 1] = input[inptr + 1];
inptr += 2;
outptr += 2;
continue;
};
output[outptr] = value;
inptr += 2;
outptr += 1;
} else {
output[outptr] = input[inptr - 1];
outptr += 1;
}
} else {
output[outptr] = input[inptr];
inptr += 1;
outptr += 1;
}
}
return output;
/// Percent decodes all %XX where XX is a valid hex number.
/// Mutates and returns a subslice of `buffer`.
pub fn percentDecodeInPlace(buffer: []u8) []u8 {
return percentDecodeBackwards(buffer, buffer);
}
pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort };
/// Parses the URI or returns an error. This function is not compliant, but is required to parse
/// some forms of URIs in the wild, such as HTTP Location headers.
/// The return value will contain unescaped strings pointing into the
/// original `text`. Each component that is provided, will be non-`null`.
pub fn parseWithoutScheme(text: []const u8) ParseError!Uri {
/// The return value will contain strings pointing into the original `text`.
/// Each component that is provided, will be non-`null`.
pub fn parseAfterScheme(scheme: []const u8, text: []const u8) ParseError!Uri {
var reader = SliceReader{ .slice = text };
var uri = Uri{
.scheme = "",
.user = null,
.password = null,
.host = null,
.port = null,
.path = "", // path is always set, but empty by default.
.query = null,
.fragment = null,
};
var uri: Uri = .{ .scheme = scheme, .path = undefined };
if (reader.peekPrefix("//")) a: { // authority part
std.debug.assert(reader.get().? == '/');
@ -167,12 +168,12 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri {
const user_info = authority[0..index];
if (std.mem.indexOf(u8, user_info, ":")) |idx| {
uri.user = user_info[0..idx];
uri.user = .{ .percent_encoded = user_info[0..idx] };
if (idx < user_info.len - 1) { // empty password is also "no password"
uri.password = user_info[idx + 1 ..];
uri.password = .{ .percent_encoded = user_info[idx + 1 ..] };
}
} else {
uri.user = user_info;
uri.user = .{ .percent_encoded = user_info };
uri.password = null;
}
}
@ -205,19 +206,19 @@ pub fn parseWithoutScheme(text: []const u8) ParseError!Uri {
}
if (start_of_host >= end_of_host) return error.InvalidFormat;
uri.host = authority[start_of_host..end_of_host];
uri.host = .{ .percent_encoded = authority[start_of_host..end_of_host] };
}
uri.path = reader.readUntil(isPathSeparator);
uri.path = .{ .percent_encoded = reader.readUntil(isPathSeparator) };
if ((reader.peek() orelse 0) == '?') { // query part
std.debug.assert(reader.get().? == '?');
uri.query = reader.readUntil(isQuerySeparator);
uri.query = .{ .percent_encoded = reader.readUntil(isQuerySeparator) };
}
if ((reader.peek() orelse 0) == '#') { // fragment part
std.debug.assert(reader.get().? == '#');
uri.fragment = reader.readUntilEof();
uri.fragment = .{ .percent_encoded = reader.readUntilEof() };
}
return uri;
@ -241,9 +242,6 @@ pub const WriteToStreamOptions = struct {
/// When true, include the fragment part of the URI. Ignored when `path` is false.
fragment: bool = false,
/// When true, do not escape any part of the URI.
raw: bool = false,
};
pub fn writeToStream(
@ -252,80 +250,51 @@ pub fn writeToStream(
writer: anytype,
) @TypeOf(writer).Error!void {
if (options.scheme) {
try writer.writeAll(uri.scheme);
try writer.writeAll(":");
try writer.print("{s}:", .{uri.scheme});
if (options.authority and uri.host != null) {
try writer.writeAll("//");
}
}
if (options.authority) {
if (options.authentication and uri.host != null) {
if (uri.user) |user| {
try writer.writeAll(user);
try writer.print("{user}", .{user});
if (uri.password) |password| {
try writer.writeAll(":");
try writer.writeAll(password);
try writer.print(":{password}", .{password});
}
try writer.writeAll("@");
try writer.writeByte('@');
}
}
if (uri.host) |host| {
try writer.writeAll(host);
if (uri.port) |port| {
try writer.writeAll(":");
try std.fmt.formatInt(port, 10, .lower, .{}, writer);
}
try writer.print("{host}", .{host});
if (uri.port) |port| try writer.print(":{d}", .{port});
}
}
if (options.path) {
if (uri.path.len == 0) {
try writer.writeAll("/");
} else if (options.raw) {
try writer.writeAll(uri.path);
} else {
try writeEscapedPath(writer, uri.path);
try writer.print("{path}", .{
if (uri.path.isEmpty()) Uri.Component{ .percent_encoded = "/" } else uri.path,
});
if (options.query) {
if (uri.query) |query| try writer.print("?{query}", .{query});
}
if (options.fragment) {
if (uri.fragment) |fragment| try writer.print("#{fragment}", .{fragment});
}
if (options.query) if (uri.query) |q| {
try writer.writeAll("?");
if (options.raw) {
try writer.writeAll(q);
} else {
try writeEscapedQuery(writer, q);
}
};
if (options.fragment) if (uri.fragment) |f| {
try writer.writeAll("#");
if (options.raw) {
try writer.writeAll(f);
} else {
try writeEscapedQuery(writer, f);
}
};
}
}
pub fn format(
uri: Uri,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
comptime fmt_str: []const u8,
_: std.fmt.FormatOptions,
writer: anytype,
) @TypeOf(writer).Error!void {
_ = options;
const scheme = comptime std.mem.indexOf(u8, fmt, ";") != null or fmt.len == 0;
const authentication = comptime std.mem.indexOf(u8, fmt, "@") != null or fmt.len == 0;
const authority = comptime std.mem.indexOf(u8, fmt, "+") != null or fmt.len == 0;
const path = comptime std.mem.indexOf(u8, fmt, "/") != null or fmt.len == 0;
const query = comptime std.mem.indexOf(u8, fmt, "?") != null or fmt.len == 0;
const fragment = comptime std.mem.indexOf(u8, fmt, "#") != null or fmt.len == 0;
const raw = comptime std.mem.indexOf(u8, fmt, "r") != null or fmt.len == 0;
const scheme = comptime std.mem.indexOfScalar(u8, fmt_str, ';') != null or fmt_str.len == 0;
const authentication = comptime std.mem.indexOfScalar(u8, fmt_str, '@') != null or fmt_str.len == 0;
const authority = comptime std.mem.indexOfScalar(u8, fmt_str, '+') != null or fmt_str.len == 0;
const path = comptime std.mem.indexOfScalar(u8, fmt_str, '/') != null or fmt_str.len == 0;
const query = comptime std.mem.indexOfScalar(u8, fmt_str, '?') != null or fmt_str.len == 0;
const fragment = comptime std.mem.indexOfScalar(u8, fmt_str, '#') != null or fmt_str.len == 0;
return writeToStream(uri, .{
.scheme = scheme,
@ -334,12 +303,11 @@ pub fn format(
.path = path,
.query = query,
.fragment = fragment,
.raw = raw,
}, writer);
}
/// Parses the URI or returns an error.
/// The return value will contain unescaped strings pointing into the
/// The return value will contain strings pointing into the
/// original `text`. Each component that is provided, will be non-`null`.
pub fn parse(text: []const u8) ParseError!Uri {
var reader: SliceReader = .{ .slice = text };
@ -353,42 +321,32 @@ pub fn parse(text: []const u8) ParseError!Uri {
return error.InvalidFormat;
}
var uri = try parseWithoutScheme(reader.readUntilEof());
uri.scheme = scheme;
return uri;
return parseAfterScheme(scheme, reader.readUntilEof());
}
pub const ResolveInplaceError = ParseError || error{OutOfMemory};
pub const ResolveInPlaceError = ParseError || error{NoSpaceLeft};
/// Resolves a URI against a base URI, conforming to RFC 3986, Section 5.
/// Copies `new` to the beginning of `aux_buf`, allowing the slices to overlap,
/// Copies `new` to the beginning of `aux_buf.*`, allowing the slices to overlap,
/// then parses `new` as a URI, and then resolves the path in place.
/// If a merge needs to take place, the newly constructed path will be stored
/// in `aux_buf` just after the copied `new`.
pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplaceError!Uri {
std.mem.copyForwards(u8, aux_buf, new);
/// in `aux_buf.*` just after the copied `new`, and `aux_buf.*` will be modified
/// to only contain the remaining unused space.
pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: *[]u8) ResolveInPlaceError!Uri {
std.mem.copyForwards(u8, aux_buf.*, new);
// At this point, new is an invalid pointer.
const new_mut = aux_buf[0..new.len];
const new_parsed, const has_scheme = p: {
break :p .{
parse(new_mut) catch |first_err| {
break :p .{
parseWithoutScheme(new_mut) catch return first_err,
false,
};
},
true,
};
};
const new_mut = aux_buf.*[0..new.len];
aux_buf.* = aux_buf.*[new.len..];
const new_parsed = parse(new_mut) catch |err|
(parseAfterScheme("", new_mut) catch return err);
// As you can see above, `new_mut` is not a const pointer.
const new_path: []u8 = @constCast(new_parsed.path);
const new_path: []u8 = @constCast(new_parsed.path.percent_encoded);
if (has_scheme) return .{
if (new_parsed.scheme.len > 0) return .{
.scheme = new_parsed.scheme,
.user = new_parsed.user,
.password = new_parsed.password,
.host = new_parsed.host,
.port = new_parsed.port,
.path = remove_dot_segments(new_path),
@ -399,6 +357,7 @@ pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplace
if (new_parsed.host) |host| return .{
.scheme = base.scheme,
.user = new_parsed.user,
.password = new_parsed.password,
.host = host,
.port = new_parsed.port,
.path = remove_dot_segments(new_path),
@ -406,28 +365,21 @@ pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplace
.fragment = new_parsed.fragment,
};
const path, const query = b: {
if (new_path.len == 0)
break :b .{
base.path,
new_parsed.query orelse base.query,
};
if (new_path[0] == '/')
break :b .{
remove_dot_segments(new_path),
new_parsed.query,
};
break :b .{
try merge_paths(base.path, new_path, aux_buf[new_mut.len..]),
new_parsed.query,
};
const path, const query = if (new_path.len == 0) .{
base.path,
new_parsed.query orelse base.query,
} else if (new_path[0] == '/') .{
remove_dot_segments(new_path),
new_parsed.query,
} else .{
try merge_paths(base.path, new_path, aux_buf),
new_parsed.query,
};
return .{
.scheme = base.scheme,
.user = base.user,
.password = base.password,
.host = base.host,
.port = base.port,
.path = path,
@ -437,7 +389,7 @@ pub fn resolve_inplace(base: Uri, new: []const u8, aux_buf: []u8) ResolveInplace
}
/// In-place implementation of RFC 3986, Section 5.2.4.
fn remove_dot_segments(path: []u8) []u8 {
fn remove_dot_segments(path: []u8) Component {
var in_i: usize = 0;
var out_i: usize = 0;
while (in_i < path.len) {
@ -476,28 +428,28 @@ fn remove_dot_segments(path: []u8) []u8 {
}
}
}
return path[0..out_i];
return .{ .percent_encoded = path[0..out_i] };
}
test remove_dot_segments {
{
var buffer = "/a/b/c/./../../g".*;
try std.testing.expectEqualStrings("/a/g", remove_dot_segments(&buffer));
try std.testing.expectEqualStrings("/a/g", remove_dot_segments(&buffer).percent_encoded);
}
}
/// 5.2.3. Merge Paths
fn merge_paths(base: []const u8, new: []u8, aux: []u8) error{OutOfMemory}![]u8 {
if (aux.len < base.len + 1 + new.len) return error.OutOfMemory;
if (base.len == 0) {
aux[0] = '/';
@memcpy(aux[1..][0..new.len], new);
return remove_dot_segments(aux[0 .. new.len + 1]);
fn merge_paths(base: Component, new: []u8, aux_buf: *[]u8) error{NoSpaceLeft}!Component {
var aux = std.io.fixedBufferStream(aux_buf.*);
if (!base.isEmpty()) {
try aux.writer().print("{path}", .{base});
aux.pos = std.mem.lastIndexOfScalar(u8, aux.getWritten(), '/') orelse
return remove_dot_segments(new);
}
const pos = std.mem.lastIndexOfScalar(u8, base, '/') orelse return remove_dot_segments(new);
@memcpy(aux[0 .. pos + 1], base[0 .. pos + 1]);
@memcpy(aux[pos + 1 ..][0..new.len], new);
return remove_dot_segments(aux[0 .. pos + 1 + new.len]);
try aux.writer().print("/{s}", .{new});
const merged_path = remove_dot_segments(aux.getWritten());
aux_buf.* = aux_buf.*[merged_path.percent_encoded.len..];
return merged_path;
}
const SliceReader = struct {
@ -561,13 +513,6 @@ fn isSchemeChar(c: u8) bool {
};
}
fn isAuthoritySeparator(c: u8) bool {
return switch (c) {
'/', '?', '#' => true,
else => false,
};
}
/// reserved = gen-delims / sub-delims
fn isReserved(c: u8) bool {
return isGenLimit(c) or isSubLimit(c);
@ -598,6 +543,35 @@ fn isUnreserved(c: u8) bool {
};
}
fn isUserChar(c: u8) bool {
return isUnreserved(c) or isSubLimit(c);
}
fn isPasswordChar(c: u8) bool {
return isUserChar(c) or c == ':';
}
fn isHostChar(c: u8) bool {
return isPasswordChar(c) or c == '[' or c == ']';
}
fn isPathChar(c: u8) bool {
return isUserChar(c) or c == '/' or c == ':' or c == '@';
}
fn isQueryChar(c: u8) bool {
return isPathChar(c) or c == '?';
}
const isFragmentChar = isQueryChar;
fn isAuthoritySeparator(c: u8) bool {
return switch (c) {
'/', '?', '#' => true,
else => false,
};
}
fn isPathSeparator(c: u8) bool {
return switch (c) {
'?', '#' => true,
@ -605,14 +579,6 @@ fn isPathSeparator(c: u8) bool {
};
}
fn isPathChar(c: u8) bool {
return isUnreserved(c) or isSubLimit(c) or c == '/' or c == ':' or c == '@';
}
fn isQueryChar(c: u8) bool {
return isPathChar(c) or c == '?' or c == '%';
}
fn isQuerySeparator(c: u8) bool {
return switch (c) {
'#' => true,
@ -623,92 +589,92 @@ fn isQuerySeparator(c: u8) bool {
test "basic" {
const parsed = try parse("https://ziglang.org/download");
try testing.expectEqualStrings("https", parsed.scheme);
try testing.expectEqualStrings("ziglang.org", parsed.host orelse return error.UnexpectedNull);
try testing.expectEqualStrings("/download", parsed.path);
try testing.expectEqualStrings("ziglang.org", parsed.host.?.percent_encoded);
try testing.expectEqualStrings("/download", parsed.path.percent_encoded);
try testing.expectEqual(@as(?u16, null), parsed.port);
}
test "with port" {
const parsed = try parse("http://example:1337/");
try testing.expectEqualStrings("http", parsed.scheme);
try testing.expectEqualStrings("example", parsed.host orelse return error.UnexpectedNull);
try testing.expectEqualStrings("/", parsed.path);
try testing.expectEqualStrings("example", parsed.host.?.percent_encoded);
try testing.expectEqualStrings("/", parsed.path.percent_encoded);
try testing.expectEqual(@as(?u16, 1337), parsed.port);
}
test "should fail gracefully" {
try std.testing.expectEqual(@as(ParseError!Uri, error.InvalidFormat), parse("foobar://"));
try std.testing.expectError(error.InvalidFormat, parse("foobar://"));
}
test "file" {
const parsed = try parse("file:///");
try std.testing.expectEqualSlices(u8, "file", parsed.scheme);
try std.testing.expectEqual(@as(?[]const u8, null), parsed.host);
try std.testing.expectEqualSlices(u8, "/", parsed.path);
try std.testing.expectEqualStrings("file", parsed.scheme);
try std.testing.expectEqual(@as(?Component, null), parsed.host);
try std.testing.expectEqualStrings("/", parsed.path.percent_encoded);
const parsed2 = try parse("file:///an/absolute/path/to/something");
try std.testing.expectEqualSlices(u8, "file", parsed2.scheme);
try std.testing.expectEqual(@as(?[]const u8, null), parsed2.host);
try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/something", parsed2.path);
try std.testing.expectEqualStrings("file", parsed2.scheme);
try std.testing.expectEqual(@as(?Component, null), parsed2.host);
try std.testing.expectEqualStrings("/an/absolute/path/to/something", parsed2.path.percent_encoded);
const parsed3 = try parse("file://localhost/an/absolute/path/to/another/thing/");
try std.testing.expectEqualSlices(u8, "file", parsed3.scheme);
try std.testing.expectEqualSlices(u8, "localhost", parsed3.host.?);
try std.testing.expectEqualSlices(u8, "/an/absolute/path/to/another/thing/", parsed3.path);
try std.testing.expectEqualStrings("file", parsed3.scheme);
try std.testing.expectEqualStrings("localhost", parsed3.host.?.percent_encoded);
try std.testing.expectEqualStrings("/an/absolute/path/to/another/thing/", parsed3.path.percent_encoded);
}
test "scheme" {
try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme);
try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme);
try std.testing.expectEqualSlices(u8, "a.b.c", (try parse("a.b.c:_")).scheme);
try std.testing.expectEqualSlices(u8, "ab+", (try parse("ab+:_")).scheme);
try std.testing.expectEqualSlices(u8, "X+++", (try parse("X+++:_")).scheme);
try std.testing.expectEqualSlices(u8, "Y+-.", (try parse("Y+-.:_")).scheme);
try std.testing.expectEqualStrings("http", (try parse("http:_")).scheme);
try std.testing.expectEqualStrings("scheme-mee", (try parse("scheme-mee:_")).scheme);
try std.testing.expectEqualStrings("a.b.c", (try parse("a.b.c:_")).scheme);
try std.testing.expectEqualStrings("ab+", (try parse("ab+:_")).scheme);
try std.testing.expectEqualStrings("X+++", (try parse("X+++:_")).scheme);
try std.testing.expectEqualStrings("Y+-.", (try parse("Y+-.:_")).scheme);
}
test "authority" {
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname")).host.?);
try std.testing.expectEqualStrings("hostname", (try parse("scheme://hostname")).host.?.percent_encoded);
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname")).host.?);
try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname")).user.?);
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname")).password);
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@")).host);
try std.testing.expectEqualStrings("hostname", (try parse("scheme://userinfo@hostname")).host.?.percent_encoded);
try std.testing.expectEqualStrings("userinfo", (try parse("scheme://userinfo@hostname")).user.?.percent_encoded);
try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@hostname")).password);
try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@")).host);
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname")).host.?);
try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname")).user.?);
try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname")).password.?);
try std.testing.expectEqualStrings("hostname", (try parse("scheme://user:password@hostname")).host.?.percent_encoded);
try std.testing.expectEqualStrings("user", (try parse("scheme://user:password@hostname")).user.?.percent_encoded);
try std.testing.expectEqualStrings("password", (try parse("scheme://user:password@hostname")).password.?.percent_encoded);
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname:0")).host.?);
try std.testing.expectEqualStrings("hostname", (try parse("scheme://hostname:0")).host.?.percent_encoded);
try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://hostname:1234")).port.?);
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname:1234")).host.?);
try std.testing.expectEqualStrings("hostname", (try parse("scheme://userinfo@hostname:1234")).host.?.percent_encoded);
try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://userinfo@hostname:1234")).port.?);
try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname:1234")).user.?);
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname:1234")).password);
try std.testing.expectEqualStrings("userinfo", (try parse("scheme://userinfo@hostname:1234")).user.?.percent_encoded);
try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://userinfo@hostname:1234")).password);
try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname:1234")).host.?);
try std.testing.expectEqualStrings("hostname", (try parse("scheme://user:password@hostname:1234")).host.?.percent_encoded);
try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://user:password@hostname:1234")).port.?);
try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname:1234")).user.?);
try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname:1234")).password.?);
try std.testing.expectEqualStrings("user", (try parse("scheme://user:password@hostname:1234")).user.?.percent_encoded);
try std.testing.expectEqualStrings("password", (try parse("scheme://user:password@hostname:1234")).password.?.percent_encoded);
}
test "authority.password" {
try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username@a")).user.?);
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username@a")).password);
try std.testing.expectEqualStrings("username", (try parse("scheme://username@a")).user.?.percent_encoded);
try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://username@a")).password);
try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:@a")).user.?);
try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username:@a")).password);
try std.testing.expectEqualStrings("username", (try parse("scheme://username:@a")).user.?.percent_encoded);
try std.testing.expectEqual(@as(?Component, null), (try parse("scheme://username:@a")).password);
try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:password@a")).user.?);
try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://username:password@a")).password.?);
try std.testing.expectEqualStrings("username", (try parse("scheme://username:password@a")).user.?.percent_encoded);
try std.testing.expectEqualStrings("password", (try parse("scheme://username:password@a")).password.?.percent_encoded);
try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username::@a")).user.?);
try std.testing.expectEqualSlices(u8, ":", (try parse("scheme://username::@a")).password.?);
try std.testing.expectEqualStrings("username", (try parse("scheme://username::@a")).user.?.percent_encoded);
try std.testing.expectEqualStrings(":", (try parse("scheme://username::@a")).password.?.percent_encoded);
}
fn testAuthorityHost(comptime hostlist: anytype) !void {
inline for (hostlist) |hostname| {
try std.testing.expectEqualSlices(u8, hostname, (try parse("scheme://" ++ hostname)).host.?);
try std.testing.expectEqualStrings(hostname, (try parse("scheme://" ++ hostname)).host.?.percent_encoded);
}
}
@ -761,11 +727,11 @@ test "RFC example 1" {
.scheme = uri[0..3],
.user = null,
.password = null,
.host = uri[6..17],
.host = .{ .percent_encoded = uri[6..17] },
.port = 8042,
.path = uri[22..33],
.query = uri[34..45],
.fragment = uri[46..50],
.path = .{ .percent_encoded = uri[22..33] },
.query = .{ .percent_encoded = uri[34..45] },
.fragment = .{ .percent_encoded = uri[46..50] },
}, try parse(uri));
}
@ -777,7 +743,7 @@ test "RFC example 2" {
.password = null,
.host = null,
.port = null,
.path = uri[4..],
.path = .{ .percent_encoded = uri[4..] },
.query = null,
.fragment = null,
}, try parse(uri));
@ -838,55 +804,60 @@ test "Special test" {
_ = try parse("https://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=youtu.be&t=0");
}
test "URI escaping" {
const input = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
const expected = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
const actual = try escapeString(std.testing.allocator, input);
defer std.testing.allocator.free(actual);
try std.testing.expectEqualSlices(u8, expected, actual);
test "URI percent encoding" {
try std.testing.expectFmt(
"%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad",
"{%}",
.{Component{ .raw = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad" }},
);
}
test "URI unescaping" {
const input = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
const expected = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
test "URI percent decoding" {
{
const expected = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
var input = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad".*;
const actual = try unescapeString(std.testing.allocator, input);
defer std.testing.allocator.free(actual);
try std.testing.expectFmt(expected, "{raw}", .{Component{ .percent_encoded = &input }});
try std.testing.expectEqualSlices(u8, expected, actual);
var output: [expected.len]u8 = undefined;
try std.testing.expectEqualStrings(percentDecodeBackwards(&output, &input), expected);
const decoded = try unescapeString(std.testing.allocator, "/abc%");
defer std.testing.allocator.free(decoded);
try std.testing.expectEqualStrings("/abc%", decoded);
try std.testing.expectEqualStrings(expected, percentDecodeInPlace(&input));
}
{
const expected = "/abc%";
var input = expected.*;
try std.testing.expectFmt(expected, "{raw}", .{Component{ .percent_encoded = &input }});
var output: [expected.len]u8 = undefined;
try std.testing.expectEqualStrings(percentDecodeBackwards(&output, &input), expected);
try std.testing.expectEqualStrings(expected, percentDecodeInPlace(&input));
}
}
test "URI query escaping" {
test "URI query encoding" {
const address = "https://objects.githubusercontent.com/?response-content-type=application%2Foctet-stream";
const parsed = try Uri.parse(address);
// format the URI to escape it
const formatted_uri = try std.fmt.allocPrint(std.testing.allocator, "{/?}", .{parsed});
defer std.testing.allocator.free(formatted_uri);
try std.testing.expectEqualStrings("/?response-content-type=application%2Foctet-stream", formatted_uri);
// format the URI to percent encode it
try std.testing.expectFmt("/?response-content-type=application%2Foctet-stream", "{/?}", .{parsed});
}
test "format" {
const uri = Uri{
const uri: Uri = .{
.scheme = "file",
.user = null,
.password = null,
.host = null,
.port = null,
.path = "/foo/bar/baz",
.path = .{ .raw = "/foo/bar/baz" },
.query = null,
.fragment = null,
};
var buf = std.ArrayList(u8).init(std.testing.allocator);
defer buf.deinit();
try buf.writer().print("{;/?#}", .{uri});
try std.testing.expectEqualSlices(u8, "file:/foo/bar/baz", buf.items);
try std.testing.expectFmt("file:/foo/bar/baz", "{;/?#}", .{uri});
}
test "URI malformed input" {
@ -894,3 +865,7 @@ test "URI malformed input" {
try std.testing.expectError(error.InvalidFormat, std.Uri.parse("http://]@["));
try std.testing.expectError(error.InvalidFormat, std.Uri.parse("http://lo]s\x85hc@[/8\x10?0Q"));
}
const std = @import("std.zig");
const testing = std.testing;
const Uri = @This();

View File

@ -771,17 +771,41 @@ pub const Request = struct {
req.client.connection_pool.release(req.client.allocator, req.connection.?);
req.connection = null;
const protocol = protocol_map.get(uri.scheme) orelse return error.UnsupportedUrlScheme;
var server_header = std.heap.FixedBufferAllocator.init(req.response.parser.header_bytes_buffer);
defer req.response.parser.header_bytes_buffer = server_header.buffer[server_header.end_index..];
const protocol, const valid_uri = try validateUri(uri, server_header.allocator());
const port: u16 = uri.port orelse switch (protocol) {
.plain => 80,
.tls => 443,
};
const new_host = valid_uri.host.?.raw;
const prev_host = req.uri.host.?.raw;
const keep_privileged_headers =
std.ascii.eqlIgnoreCase(valid_uri.scheme, req.uri.scheme) and
std.ascii.endsWithIgnoreCase(new_host, prev_host) and
(new_host.len == prev_host.len or new_host[new_host.len - prev_host.len - 1] == '.');
if (!keep_privileged_headers) {
// When redirecting to a different domain, strip privileged headers.
req.privileged_headers = &.{};
}
const host = uri.host orelse return error.UriMissingHost;
if (switch (req.response.status) {
.see_other => true,
.moved_permanently, .found => req.method == .POST,
else => false,
}) {
// A redirect to a GET must change the method and remove the body.
req.method = .GET;
req.transfer_encoding = .none;
req.headers.content_type = .omit;
}
req.uri = uri;
req.connection = try req.client.connect(host, port, protocol);
if (req.transfer_encoding != .none) {
// The request body has already been sent. The request is
// still in a valid state, but the redirect must be handled
// manually.
return error.RedirectRequiresResend;
}
req.uri = valid_uri;
req.connection = try req.client.connect(new_host, valid_uri.port.?, protocol);
req.redirect_behavior.subtractOne();
req.response.parser.reset();
@ -796,13 +820,8 @@ pub const Request = struct {
pub const SendError = Connection.WriteError || error{ InvalidContentLength, UnsupportedTransferEncoding };
pub const SendOptions = struct {
/// Specifies that the uri is already escaped.
raw_uri: bool = false,
};
/// Send the HTTP request headers to the server.
pub fn send(req: *Request, options: SendOptions) SendError!void {
pub fn send(req: *Request) SendError!void {
if (!req.method.requestHasBody() and req.transfer_encoding != .none)
return error.UnsupportedTransferEncoding;
@ -821,7 +840,6 @@ pub const Request = struct {
.authority = connection.proxied,
.path = true,
.query = true,
.raw = options.raw_uri,
}, w);
}
try w.writeByte(' ');
@ -1038,55 +1056,19 @@ pub const Request = struct {
const location = req.response.location orelse
return error.HttpRedirectLocationMissing;
// This mutates the beginning of header_buffer and uses that
// for the backing memory of the returned new_uri.
const header_buffer = req.response.parser.header_bytes_buffer;
const new_uri = req.uri.resolve_inplace(location, header_buffer) catch
return error.HttpRedirectLocationInvalid;
// The new URI references the beginning of header_bytes_buffer memory.
// That memory will be kept, but everything after it will be
// reused by the subsequent request. In other words,
// header_bytes_buffer must be large enough to store all
// redirect locations as well as the final request header.
const path_end = new_uri.path.ptr + new_uri.path.len;
// https://github.com/ziglang/zig/issues/1738
const path_offset = @intFromPtr(path_end) - @intFromPtr(header_buffer.ptr);
const end_offset = @max(path_offset, location.len);
req.response.parser.header_bytes_buffer = header_buffer[end_offset..];
const is_same_domain_or_subdomain =
std.ascii.endsWithIgnoreCase(new_uri.host.?, req.uri.host.?) and
(new_uri.host.?.len == req.uri.host.?.len or
new_uri.host.?[new_uri.host.?.len - req.uri.host.?.len - 1] == '.');
if (new_uri.host == null or !is_same_domain_or_subdomain or
!std.ascii.eqlIgnoreCase(new_uri.scheme, req.uri.scheme))
{
// When redirecting to a different domain, strip privileged headers.
req.privileged_headers = &.{};
}
if (switch (req.response.status) {
.see_other => true,
.moved_permanently, .found => req.method == .POST,
else => false,
}) {
// A redirect to a GET must change the method and remove the body.
req.method = .GET;
req.transfer_encoding = .none;
req.headers.content_type = .omit;
}
if (req.transfer_encoding != .none) {
// The request body has already been sent. The request is
// still in a valid state, but the redirect must be handled
// manually.
return error.RedirectRequiresResend;
}
try req.redirect(new_uri);
try req.send(.{});
// This mutates the beginning of header_bytes_buffer and uses that
// for the backing memory of the returned Uri.
try req.redirect(req.uri.resolve_inplace(
location,
&req.response.parser.header_bytes_buffer,
) catch |err| switch (err) {
error.UnexpectedCharacter,
error.InvalidFormat,
error.InvalidPort,
=> return error.HttpRedirectLocationInvalid,
error.NoSpaceLeft => return error.HttpHeadersOversize,
});
try req.send();
} else {
req.response.skip = false;
if (!req.response.parser.done) {
@ -1264,30 +1246,25 @@ fn createProxyFromEnvVar(arena: Allocator, env_var_names: []const []const u8) !?
};
} else return null;
const uri = Uri.parse(content) catch try Uri.parseWithoutScheme(content);
const uri = Uri.parse(content) catch try Uri.parseAfterScheme("http", content);
const protocol, const valid_uri = validateUri(uri, arena) catch |err| switch (err) {
error.UnsupportedUriScheme => return null,
error.UriMissingHost => return error.HttpProxyMissingHost,
error.OutOfMemory => |e| return e,
};
const protocol = if (uri.scheme.len == 0)
.plain // No scheme, assume http://
else
protocol_map.get(uri.scheme) orelse return null; // Unknown scheme, ignore
const host = uri.host orelse return error.HttpProxyMissingHost;
const authorization: ?[]const u8 = if (uri.user != null or uri.password != null) a: {
const authorization = try arena.alloc(u8, basic_authorization.valueLengthFromUri(uri));
assert(basic_authorization.value(uri, authorization).len == authorization.len);
const authorization: ?[]const u8 = if (valid_uri.user != null or valid_uri.password != null) a: {
const authorization = try arena.alloc(u8, basic_authorization.valueLengthFromUri(valid_uri));
assert(basic_authorization.value(valid_uri, authorization).len == authorization.len);
break :a authorization;
} else null;
const proxy = try arena.create(Proxy);
proxy.* = .{
.protocol = protocol,
.host = host,
.host = valid_uri.host.?.raw,
.authorization = authorization,
.port = uri.port orelse switch (protocol) {
.plain => 80,
.tls => 443,
},
.port = valid_uri.port.?,
.supports_connect = true,
};
return proxy;
@ -1305,24 +1282,26 @@ pub const basic_authorization = struct {
}
pub fn valueLengthFromUri(uri: Uri) usize {
return valueLength(
if (uri.user) |user| user.len else 0,
if (uri.password) |password| password.len else 0,
);
var stream = std.io.countingWriter(std.io.null_writer);
try stream.writer().print("{user}", .{uri.user orelse Uri.Component.empty});
const user_len = stream.bytes_written;
stream.bytes_written = 0;
try stream.writer().print("{password}", .{uri.password orelse Uri.Component.empty});
const password_len = stream.bytes_written;
return valueLength(@intCast(user_len), @intCast(password_len));
}
pub fn value(uri: Uri, out: []u8) []u8 {
assert(uri.user == null or uri.user.?.len <= max_user_len);
assert(uri.password == null or uri.password.?.len <= max_password_len);
var buf: [max_user_len + ":".len + max_password_len]u8 = undefined;
var stream = std.io.fixedBufferStream(&buf);
stream.writer().print("{user}", .{uri.user orelse Uri.Component.empty}) catch
unreachable;
assert(stream.pos <= max_user_len);
stream.writer().print(":{password}", .{uri.password orelse Uri.Component.empty}) catch
unreachable;
@memcpy(out[0..prefix.len], prefix);
var buf: [max_user_len + ":".len + max_password_len]u8 = undefined;
const unencoded = std.fmt.bufPrint(&buf, "{s}:{s}", .{
uri.user orelse "", uri.password orelse "",
}) catch unreachable;
const base64 = std.base64.standard.Encoder.encode(out[prefix.len..], unencoded);
const base64 = std.base64.standard.Encoder.encode(out[prefix.len..], stream.getWritten());
return out[0 .. prefix.len + base64.len];
}
};
@ -1337,8 +1316,7 @@ pub fn connectTcp(client: *Client, host: []const u8, port: u16, protocol: Connec
.host = host,
.port = port,
.protocol = protocol,
})) |node|
return node;
})) |node| return node;
if (disable_tls and protocol == .tls)
return error.TlsInitializationFailed;
@ -1449,19 +1427,12 @@ pub fn connectTunnel(
client.connection_pool.release(client.allocator, conn);
}
const uri: Uri = .{
.scheme = "http",
.user = null,
.password = null,
.host = tunnel_host,
.port = tunnel_port,
.path = "",
.query = null,
.fragment = null,
};
var buffer: [8096]u8 = undefined;
var req = client.open(.CONNECT, uri, .{
var req = client.open(.CONNECT, .{
.scheme = "http",
.host = .{ .raw = tunnel_host },
.port = tunnel_port,
}, .{
.redirect_behavior = .unhandled,
.connection = conn,
.server_header_buffer = &buffer,
@ -1471,7 +1442,7 @@ pub fn connectTunnel(
};
defer req.deinit();
req.send(.{ .raw_uri = true }) catch |err| break :tunnel err;
req.send() catch |err| break :tunnel err;
req.wait() catch |err| break :tunnel err;
if (req.response.status.class() == .server_error) {
@ -1500,7 +1471,7 @@ pub fn connectTunnel(
}
// Prevents a dependency loop in open()
const ConnectErrorPartial = ConnectTcpError || error{ UnsupportedUrlScheme, ConnectionRefused };
const ConnectErrorPartial = ConnectTcpError || error{ UnsupportedUriScheme, ConnectionRefused };
pub const ConnectError = ConnectErrorPartial || RequestError;
/// Connect to `host:port` using the specified protocol. This will reuse a
@ -1548,7 +1519,7 @@ pub fn connect(
pub const RequestError = ConnectTcpError || ConnectErrorPartial || Request.SendError ||
std.fmt.ParseIntError || Connection.WriteError ||
error{ // TODO: file a zig fmt issue for this bad indentation
UnsupportedUrlScheme,
UnsupportedUriScheme,
UriMissingHost,
CertificateBundleLoadFailure,
@ -1598,12 +1569,25 @@ pub const RequestOptions = struct {
privileged_headers: []const http.Header = &.{},
};
pub const protocol_map = std.ComptimeStringMap(Connection.Protocol, .{
.{ "http", .plain },
.{ "ws", .plain },
.{ "https", .tls },
.{ "wss", .tls },
});
fn validateUri(uri: Uri, arena: Allocator) !struct { Connection.Protocol, Uri } {
const protocol_map = std.ComptimeStringMap(Connection.Protocol, .{
.{ "http", .plain },
.{ "ws", .plain },
.{ "https", .tls },
.{ "wss", .tls },
});
const protocol = protocol_map.get(uri.scheme) orelse return error.UnsupportedUriScheme;
var valid_uri = uri;
// The host is always going to be needed as a raw string for hostname resolution anyway.
valid_uri.host = .{
.raw = try (uri.host orelse return error.UriMissingHost).toRawMaybeAlloc(arena),
};
valid_uri.port = uri.port orelse switch (protocol) {
.plain => 80,
.tls => 443,
};
return .{ protocol, valid_uri };
}
/// Open a connection to the host specified by `uri` and prepare to send a HTTP request.
///
@ -1633,14 +1617,8 @@ pub fn open(
}
}
const protocol = protocol_map.get(uri.scheme) orelse return error.UnsupportedUrlScheme;
const port: u16 = uri.port orelse switch (protocol) {
.plain => 80,
.tls => 443,
};
const host = uri.host orelse return error.UriMissingHost;
var server_header = std.heap.FixedBufferAllocator.init(options.server_header_buffer);
const protocol, const valid_uri = try validateUri(uri, server_header.allocator());
if (protocol == .tls and @atomicLoad(bool, &client.next_https_rescan_certs, .acquire)) {
if (disable_tls) unreachable;
@ -1649,15 +1627,17 @@ pub fn open(
defer client.ca_bundle_mutex.unlock();
if (client.next_https_rescan_certs) {
client.ca_bundle.rescan(client.allocator) catch return error.CertificateBundleLoadFailure;
client.ca_bundle.rescan(client.allocator) catch
return error.CertificateBundleLoadFailure;
@atomicStore(bool, &client.next_https_rescan_certs, false, .release);
}
}
const conn = options.connection orelse try client.connect(host, port, protocol);
const conn = options.connection orelse
try client.connect(valid_uri.host.?.raw, valid_uri.port.?, protocol);
var req: Request = .{
.uri = uri,
.uri = valid_uri,
.client = client,
.connection = conn,
.keep_alive = options.keep_alive,
@ -1671,7 +1651,7 @@ pub fn open(
.status = undefined,
.reason = undefined,
.keep_alive = undefined,
.parser = proto.HeadersParser.init(options.server_header_buffer),
.parser = proto.HeadersParser.init(server_header.buffer[server_header.end_index..]),
},
.headers = options.headers,
.extra_headers = options.extra_headers,
@ -1751,7 +1731,7 @@ pub fn fetch(client: *Client, options: FetchOptions) !FetchResult {
if (options.payload) |payload| req.transfer_encoding = .{ .content_length = payload.len };
try req.send(.{ .raw_uri = options.raw_uri });
try req.send();
if (options.payload) |payload| try req.writeAll(payload);

View File

@ -64,7 +64,7 @@ test "trailers" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -474,6 +474,15 @@ test "general client/server API coverage" {
.{ .name = "location", .value = "/redirect/3" },
},
});
} else if (mem.eql(u8, request.head.target, "/redirect/5")) {
try request.respond("Hello, Redirected!\n", .{
.status = .found,
.extra_headers = &.{
.{ .name = "location", .value = "/%2525" },
},
});
} else if (mem.eql(u8, request.head.target, "/%2525")) {
try request.respond("Encoded redirect successful!\n", .{});
} else if (mem.eql(u8, request.head.target, "/redirect/invalid")) {
const invalid_port = try getUnusedTcpPort();
const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}", .{invalid_port});
@ -529,7 +538,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -554,7 +563,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192 * 1024);
@ -578,7 +587,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -604,7 +613,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -629,7 +638,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -656,7 +665,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -684,7 +693,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
try std.testing.expectEqual(.ok, req.response.status);
@ -725,7 +734,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -749,7 +758,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -773,7 +782,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
@ -797,13 +806,34 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
req.wait() catch |err| switch (err) {
error.TooManyHttpRedirects => {},
else => return err,
};
}
{ // redirect to encoded url
const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/5", .{port});
defer gpa.free(location);
const uri = try std.Uri.parse(location);
log.info("{s}", .{location});
var server_header_buffer: [1024]u8 = undefined;
var req = try client.open(.GET, uri, .{
.server_header_buffer = &server_header_buffer,
});
defer req.deinit();
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);
defer gpa.free(body);
try expectEqualStrings("Encoded redirect successful!\n", body);
}
// connection has been kept alive
try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
@ -819,7 +849,7 @@ test "general client/server API coverage" {
});
defer req.deinit();
try req.send(.{});
try req.send();
const result = req.wait();
// a proxy without an upstream is likely to return a 5xx status.
@ -913,16 +943,16 @@ test "Server streams both reading and writing" {
var server_header_buffer: [555]u8 = undefined;
var req = try client.open(.POST, .{
.scheme = "http",
.host = "127.0.0.1",
.host = .{ .raw = "127.0.0.1" },
.port = test_server.port(),
.path = "/",
.path = .{ .percent_encoded = "/" },
}, .{
.server_header_buffer = &server_header_buffer,
});
defer req.deinit();
req.transfer_encoding = .chunked;
try req.send(.{});
try req.send();
try req.wait();
try req.writeAll("one ");
@ -956,7 +986,7 @@ fn echoTests(client: *http.Client, port: u16) !void {
req.transfer_encoding = .{ .content_length = 14 };
try req.send(.{});
try req.send();
try req.writeAll("Hello, ");
try req.writeAll("World!\n");
try req.finish();
@ -990,7 +1020,7 @@ fn echoTests(client: *http.Client, port: u16) !void {
req.transfer_encoding = .chunked;
try req.send(.{});
try req.send();
try req.writeAll("Hello, ");
try req.writeAll("World!\n");
try req.finish();
@ -1044,7 +1074,7 @@ fn echoTests(client: *http.Client, port: u16) !void {
req.transfer_encoding = .chunked;
try req.send(.{});
try req.send();
try req.writeAll("Hello, ");
try req.writeAll("World!\n");
try req.finish();
@ -1075,7 +1105,7 @@ fn echoTests(client: *http.Client, port: u16) !void {
req.transfer_encoding = .chunked;
try req.send(.{});
try req.send();
try req.wait();
try expectEqual(.expectation_failed, req.response.status);
}
@ -1180,7 +1210,7 @@ test "redirect to different connection" {
});
defer req.deinit();
try req.send(.{});
try req.send();
try req.wait();
const body = try req.reader().readAllAlloc(gpa, 8192);

View File

@ -413,7 +413,7 @@ pub const StreamSource = @import("io/stream_source.zig").StreamSource;
pub const tty = @import("io/tty.zig");
/// A Writer that doesn't write to anything.
pub const null_writer = @as(NullWriter, .{ .context = {} });
pub const null_writer: NullWriter = .{ .context = {} };
const NullWriter = Writer(void, error{}, dummyWrite);
fn dummyWrite(context: void, data: []const u8) error{}!usize {

View File

@ -339,12 +339,12 @@ pub fn run(f: *Fetch) RunError!void {
.path_or_url => |path_or_url| {
if (fs.cwd().openDir(path_or_url, .{ .iterate = true })) |dir| {
var resource: Resource = .{ .dir = dir };
return runResource(f, path_or_url, &resource, null);
return f.runResource(path_or_url, &resource, null);
} else |dir_err| {
const file_err = if (dir_err == error.NotDir) e: {
if (fs.cwd().openFile(path_or_url, .{})) |file| {
var resource: Resource = .{ .file = file };
return runResource(f, path_or_url, &resource, null);
return f.runResource(path_or_url, &resource, null);
} else |err| break :e err;
} else dir_err;
@ -356,7 +356,7 @@ pub fn run(f: *Fetch) RunError!void {
};
var server_header_buffer: [header_buffer_size]u8 = undefined;
var resource = try f.initResource(uri, &server_header_buffer);
return runResource(f, uri.path, &resource, null);
return f.runResource(try uri.path.toRawMaybeAlloc(arena), &resource, null);
}
},
};
@ -418,7 +418,7 @@ pub fn run(f: *Fetch) RunError!void {
);
var server_header_buffer: [header_buffer_size]u8 = undefined;
var resource = try f.initResource(uri, &server_header_buffer);
return runResource(f, uri.path, &resource, remote.hash);
return f.runResource(try uri.path.toRawMaybeAlloc(arena), &resource, remote.hash);
}
pub fn deinit(f: *Fetch) void {
@ -897,13 +897,14 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re
const arena = f.arena.allocator();
const eb = &f.error_bundle;
if (ascii.eqlIgnoreCase(uri.scheme, "file")) return .{
.file = f.parent_package_root.openFile(uri.path, .{}) catch |err| {
if (ascii.eqlIgnoreCase(uri.scheme, "file")) {
const path = try uri.path.toRawMaybeAlloc(arena);
return .{ .file = f.parent_package_root.openFile(path, .{}) catch |err| {
return f.fail(f.location_tok, try eb.printString("unable to open '{}{s}': {s}", .{
f.parent_package_root, uri.path, @errorName(err),
f.parent_package_root, path, @errorName(err),
}));
},
};
} };
}
const http_client = f.job_queue.http_client;
@ -920,7 +921,7 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re
};
errdefer req.deinit(); // releases more than memory
req.send(.{}) catch |err| {
req.send() catch |err| {
return f.fail(f.location_tok, try eb.printString(
"HTTP request failed: {s}",
.{@errorName(err)},
@ -967,7 +968,8 @@ fn initResource(f: *Fetch, uri: std.Uri, server_header_buffer: []u8) RunError!Re
};
const want_oid = want_oid: {
const want_ref = uri.fragment orelse "HEAD";
const want_ref =
if (uri.fragment) |fragment| try fragment.toRawMaybeAlloc(arena) else "HEAD";
if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {}
const want_ref_head = try std.fmt.allocPrint(arena, "refs/heads/{s}", .{want_ref});

View File

@ -540,9 +540,13 @@ pub const Session = struct {
http_headers_buffer: []u8,
) !CapabilityIterator {
var info_refs_uri = session.uri;
info_refs_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "info/refs" });
defer allocator.free(info_refs_uri.path);
info_refs_uri.query = "service=git-upload-pack";
{
const session_uri_path = try std.fmt.allocPrint(allocator, "{path}", .{session.uri.path});
defer allocator.free(session_uri_path);
info_refs_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(allocator, &.{ "/", session_uri_path, "info/refs" }) };
}
defer allocator.free(info_refs_uri.path.percent_encoded);
info_refs_uri.query = .{ .percent_encoded = "service=git-upload-pack" };
info_refs_uri.fragment = null;
const max_redirects = 3;
@ -554,16 +558,18 @@ pub const Session = struct {
},
});
errdefer request.deinit();
try request.send(.{});
try request.send();
try request.finish();
try request.wait();
if (request.response.status != .ok) return error.ProtocolError;
const any_redirects_occurred = request.redirect_behavior.remaining() < max_redirects;
if (any_redirects_occurred) {
if (!mem.endsWith(u8, request.uri.path, "/info/refs")) return error.UnparseableRedirect;
const request_uri_path = try std.fmt.allocPrint(allocator, "{path}", .{request.uri.path});
defer allocator.free(request_uri_path);
if (!mem.endsWith(u8, request_uri_path, "/info/refs")) return error.UnparseableRedirect;
var new_uri = request.uri;
new_uri.path = new_uri.path[0 .. new_uri.path.len - "/info/refs".len];
new_uri.path = .{ .percent_encoded = request_uri_path[0 .. request_uri_path.len - "/info/refs".len] };
new_uri.query = null;
redirect_uri.* = try std.fmt.allocPrint(allocator, "{+/}", .{new_uri});
return error.Redirected;
@ -645,8 +651,12 @@ pub const Session = struct {
/// Returns an iterator over refs known to the server.
pub fn listRefs(session: Session, allocator: Allocator, options: ListRefsOptions) !RefIterator {
var upload_pack_uri = session.uri;
upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" });
defer allocator.free(upload_pack_uri.path);
{
const session_uri_path = try std.fmt.allocPrint(allocator, "{path}", .{session.uri.path});
defer allocator.free(session_uri_path);
upload_pack_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(allocator, &.{ "/", session_uri_path, "git-upload-pack" }) };
}
defer allocator.free(upload_pack_uri.path.percent_encoded);
upload_pack_uri.query = null;
upload_pack_uri.fragment = null;
@ -681,7 +691,7 @@ pub const Session = struct {
});
errdefer request.deinit();
request.transfer_encoding = .{ .content_length = body.items.len };
try request.send(.{});
try request.send();
try request.writeAll(body.items);
try request.finish();
@ -748,8 +758,12 @@ pub const Session = struct {
http_headers_buffer: []u8,
) !FetchStream {
var upload_pack_uri = session.uri;
upload_pack_uri.path = try std.fs.path.resolvePosix(allocator, &.{ "/", session.uri.path, "git-upload-pack" });
defer allocator.free(upload_pack_uri.path);
{
const session_uri_path = try std.fmt.allocPrint(allocator, "{path}", .{session.uri.path});
defer allocator.free(session_uri_path);
upload_pack_uri.path = .{ .percent_encoded = try std.fs.path.resolvePosix(allocator, &.{ "/", session_uri_path, "git-upload-pack" }) };
}
defer allocator.free(upload_pack_uri.path.percent_encoded);
upload_pack_uri.query = null;
upload_pack_uri.fragment = null;
@ -786,7 +800,7 @@ pub const Session = struct {
});
errdefer request.deinit();
request.transfer_encoding = .{ .content_length = body.items.len };
try request.send(.{});
try request.send();
try request.writeAll(body.items);
try request.finish();

View File

@ -233,9 +233,18 @@ pub fn flushModule(self: *SpirV, arena: Allocator, prog_node: *std.Progress.Node
// name if it contains no strange characters is nice for debugging. URI encoding fits the bill.
// We're using : as separator, which is a reserved character.
const escaped_name = try std.Uri.escapeString(gpa, name.toSlice(&mod.intern_pool));
defer gpa.free(escaped_name);
try error_info.writer().print(":{s}", .{escaped_name});
try std.Uri.Component.percentEncode(
error_info.writer(),
name.toSlice(&mod.intern_pool),
struct {
fn isValidChar(c: u8) bool {
return switch (c) {
0, '%', ':' => false,
else => true,
};
}
}.isValidChar,
);
}
try spv.sections.debug_strings.emit(gpa, .OpSourceExtension, .{
.extension = error_info.items,