mirror of
https://github.com/ziglang/zig.git
synced 2024-11-16 09:03:12 +00:00
ceb0a632cf
closes #13535
2689 lines
100 KiB
Zig
2689 lines
100 KiB
Zig
// JSON parser conforming to RFC8259.
|
|
//
|
|
// https://tools.ietf.org/html/rfc8259
|
|
|
|
const builtin = @import("builtin");
|
|
const std = @import("std.zig");
|
|
const debug = std.debug;
|
|
const assert = debug.assert;
|
|
const testing = std.testing;
|
|
const mem = std.mem;
|
|
const maxInt = std.math.maxInt;
|
|
|
|
pub const WriteStream = @import("json/write_stream.zig").WriteStream;
|
|
pub const writeStream = @import("json/write_stream.zig").writeStream;
|
|
|
|
const StringEscapes = union(enum) {
|
|
None,
|
|
|
|
Some: struct {
|
|
size_diff: isize,
|
|
},
|
|
};
|
|
|
|
/// Checks to see if a string matches what it would be as a json-encoded string
|
|
/// Assumes that `encoded` is a well-formed json string
|
|
fn encodesTo(decoded: []const u8, encoded: []const u8) bool {
|
|
var i: usize = 0;
|
|
var j: usize = 0;
|
|
while (i < decoded.len) {
|
|
if (j >= encoded.len) return false;
|
|
if (encoded[j] != '\\') {
|
|
if (decoded[i] != encoded[j]) return false;
|
|
j += 1;
|
|
i += 1;
|
|
} else {
|
|
const escape_type = encoded[j + 1];
|
|
if (escape_type != 'u') {
|
|
const t: u8 = switch (escape_type) {
|
|
'\\' => '\\',
|
|
'/' => '/',
|
|
'n' => '\n',
|
|
'r' => '\r',
|
|
't' => '\t',
|
|
'f' => 12,
|
|
'b' => 8,
|
|
'"' => '"',
|
|
else => unreachable,
|
|
};
|
|
if (decoded[i] != t) return false;
|
|
j += 2;
|
|
i += 1;
|
|
} else {
|
|
var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
|
|
j += 6;
|
|
if (codepoint >= 0xD800 and codepoint < 0xDC00) {
|
|
// surrogate pair
|
|
assert(encoded[j] == '\\');
|
|
assert(encoded[j + 1] == 'u');
|
|
const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
|
|
codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff));
|
|
j += 6;
|
|
}
|
|
var buf: [4]u8 = undefined;
|
|
const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
|
|
if (i + len > decoded.len) return false;
|
|
if (!mem.eql(u8, decoded[i .. i + len], buf[0..len])) return false;
|
|
i += len;
|
|
}
|
|
}
|
|
}
|
|
assert(i == decoded.len);
|
|
assert(j == encoded.len);
|
|
return true;
|
|
}
|
|
|
|
/// A single token slice into the parent string.
|
|
///
|
|
/// Use `token.slice()` on the input at the current position to get the current slice.
|
|
pub const Token = union(enum) {
|
|
ObjectBegin,
|
|
ObjectEnd,
|
|
ArrayBegin,
|
|
ArrayEnd,
|
|
String: struct {
|
|
/// How many bytes the token is.
|
|
count: usize,
|
|
|
|
/// Whether string contains an escape sequence and cannot be zero-copied
|
|
escapes: StringEscapes,
|
|
|
|
pub fn decodedLength(self: @This()) usize {
|
|
return self.count +% switch (self.escapes) {
|
|
.None => 0,
|
|
.Some => |s| @bitCast(usize, s.size_diff),
|
|
};
|
|
}
|
|
|
|
/// Slice into the underlying input string.
|
|
pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 {
|
|
return input[i - self.count .. i];
|
|
}
|
|
},
|
|
Number: struct {
|
|
/// How many bytes the token is.
|
|
count: usize,
|
|
|
|
/// Whether number is simple and can be represented by an integer (i.e. no `.` or `e`)
|
|
is_integer: bool,
|
|
|
|
/// Slice into the underlying input string.
|
|
pub fn slice(self: @This(), input: []const u8, i: usize) []const u8 {
|
|
return input[i - self.count .. i];
|
|
}
|
|
},
|
|
True,
|
|
False,
|
|
Null,
|
|
};
|
|
|
|
const AggregateContainerType = enum(u1) { object, array };
|
|
|
|
// A LIFO bit-stack. Tracks which container-types have been entered during parse.
|
|
fn AggregateContainerStack(comptime n: usize) type {
|
|
return struct {
|
|
const Self = @This();
|
|
|
|
const element_bitcount = 8 * @sizeOf(usize);
|
|
const element_count = n / element_bitcount;
|
|
const ElementType = @Type(.{ .Int = .{ .signedness = .unsigned, .bits = element_bitcount } });
|
|
const ElementShiftAmountType = std.math.Log2Int(ElementType);
|
|
|
|
comptime {
|
|
std.debug.assert(n % element_bitcount == 0);
|
|
}
|
|
|
|
memory: [element_count]ElementType,
|
|
len: usize,
|
|
|
|
pub fn init(self: *Self) void {
|
|
self.memory = [_]ElementType{0} ** element_count;
|
|
self.len = 0;
|
|
}
|
|
|
|
pub fn push(self: *Self, ty: AggregateContainerType) ?void {
|
|
if (self.len >= n) {
|
|
return null;
|
|
}
|
|
|
|
const index = self.len / element_bitcount;
|
|
const sub_index = @intCast(ElementShiftAmountType, self.len % element_bitcount);
|
|
const clear_mask = ~(@as(ElementType, 1) << sub_index);
|
|
const set_bits = @as(ElementType, @enumToInt(ty)) << sub_index;
|
|
|
|
self.memory[index] &= clear_mask;
|
|
self.memory[index] |= set_bits;
|
|
self.len += 1;
|
|
}
|
|
|
|
pub fn peek(self: *Self) ?AggregateContainerType {
|
|
if (self.len == 0) {
|
|
return null;
|
|
}
|
|
|
|
const bit_to_extract = self.len - 1;
|
|
const index = bit_to_extract / element_bitcount;
|
|
const sub_index = @intCast(ElementShiftAmountType, bit_to_extract % element_bitcount);
|
|
const bit = @intCast(u1, (self.memory[index] >> sub_index) & 1);
|
|
return @intToEnum(AggregateContainerType, bit);
|
|
}
|
|
|
|
pub fn pop(self: *Self) ?AggregateContainerType {
|
|
if (self.peek()) |ty| {
|
|
self.len -= 1;
|
|
return ty;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
};
|
|
}
|
|
|
|
/// A small streaming JSON parser. This accepts input one byte at a time and returns tokens as
|
|
/// they are encountered. No copies or allocations are performed during parsing and the entire
|
|
/// parsing state requires ~40-50 bytes of stack space.
|
|
///
|
|
/// Conforms strictly to RFC8259.
|
|
///
|
|
/// For a non-byte based wrapper, consider using TokenStream instead.
|
|
pub const StreamingParser = struct {
|
|
const default_max_nestings = 256;
|
|
|
|
// Current state
|
|
state: State,
|
|
// How many bytes we have counted for the current token
|
|
count: usize,
|
|
// What state to follow after parsing a string (either property or value string)
|
|
after_string_state: State,
|
|
// What state to follow after parsing a value (either top-level or value end)
|
|
after_value_state: State,
|
|
// If we stopped now, would the complete parsed string to now be a valid json string
|
|
complete: bool,
|
|
// Current token flags to pass through to the next generated, see Token.
|
|
string_escapes: StringEscapes,
|
|
// When in .String states, was the previous character a high surrogate?
|
|
string_last_was_high_surrogate: bool,
|
|
// Used inside of StringEscapeHexUnicode* states
|
|
string_unicode_codepoint: u21,
|
|
// The first byte needs to be stored to validate 3- and 4-byte sequences.
|
|
sequence_first_byte: u8 = undefined,
|
|
// When in .Number states, is the number a (still) valid integer?
|
|
number_is_integer: bool,
|
|
// Bit-stack for nested object/map literals (max 256 nestings).
|
|
stack: AggregateContainerStack(default_max_nestings),
|
|
|
|
pub fn init() StreamingParser {
|
|
var p: StreamingParser = undefined;
|
|
p.reset();
|
|
return p;
|
|
}
|
|
|
|
pub fn reset(p: *StreamingParser) void {
|
|
p.state = .TopLevelBegin;
|
|
p.count = 0;
|
|
// Set before ever read in main transition function
|
|
p.after_string_state = undefined;
|
|
p.after_value_state = .ValueEnd; // handle end of values normally
|
|
p.stack.init();
|
|
p.complete = false;
|
|
p.string_escapes = undefined;
|
|
p.string_last_was_high_surrogate = undefined;
|
|
p.string_unicode_codepoint = undefined;
|
|
p.number_is_integer = undefined;
|
|
}
|
|
|
|
pub const State = enum(u8) {
|
|
// These must be first with these explicit values as we rely on them for indexing the
|
|
// bit-stack directly and avoiding a branch.
|
|
ObjectSeparator = 0,
|
|
ValueEnd = 1,
|
|
|
|
TopLevelBegin,
|
|
TopLevelEnd,
|
|
|
|
ValueBegin,
|
|
ValueBeginNoClosing,
|
|
|
|
String,
|
|
StringUtf8Byte2Of2,
|
|
StringUtf8Byte2Of3,
|
|
StringUtf8Byte3Of3,
|
|
StringUtf8Byte2Of4,
|
|
StringUtf8Byte3Of4,
|
|
StringUtf8Byte4Of4,
|
|
StringEscapeCharacter,
|
|
StringEscapeHexUnicode4,
|
|
StringEscapeHexUnicode3,
|
|
StringEscapeHexUnicode2,
|
|
StringEscapeHexUnicode1,
|
|
|
|
Number,
|
|
NumberMaybeDotOrExponent,
|
|
NumberMaybeDigitOrDotOrExponent,
|
|
NumberFractionalRequired,
|
|
NumberFractional,
|
|
NumberMaybeExponent,
|
|
NumberExponent,
|
|
NumberExponentDigitsRequired,
|
|
NumberExponentDigits,
|
|
|
|
TrueLiteral1,
|
|
TrueLiteral2,
|
|
TrueLiteral3,
|
|
|
|
FalseLiteral1,
|
|
FalseLiteral2,
|
|
FalseLiteral3,
|
|
FalseLiteral4,
|
|
|
|
NullLiteral1,
|
|
NullLiteral2,
|
|
NullLiteral3,
|
|
|
|
// Given an aggregate container type, return the state which should be entered after
|
|
// processing a complete value type.
|
|
pub fn fromAggregateContainerType(ty: AggregateContainerType) State {
|
|
comptime {
|
|
std.debug.assert(@enumToInt(AggregateContainerType.object) == @enumToInt(State.ObjectSeparator));
|
|
std.debug.assert(@enumToInt(AggregateContainerType.array) == @enumToInt(State.ValueEnd));
|
|
}
|
|
|
|
return @intToEnum(State, @enumToInt(ty));
|
|
}
|
|
};
|
|
|
|
pub const Error = error{
|
|
InvalidTopLevel,
|
|
TooManyNestedItems,
|
|
TooManyClosingItems,
|
|
InvalidValueBegin,
|
|
InvalidValueEnd,
|
|
UnbalancedBrackets,
|
|
UnbalancedBraces,
|
|
UnexpectedClosingBracket,
|
|
UnexpectedClosingBrace,
|
|
InvalidNumber,
|
|
InvalidSeparator,
|
|
InvalidLiteral,
|
|
InvalidEscapeCharacter,
|
|
InvalidUnicodeHexSymbol,
|
|
InvalidUtf8Byte,
|
|
InvalidTopLevelTrailing,
|
|
InvalidControlCharacter,
|
|
};
|
|
|
|
/// Give another byte to the parser and obtain any new tokens. This may (rarely) return two
|
|
/// tokens. token2 is always null if token1 is null.
|
|
///
|
|
/// There is currently no error recovery on a bad stream.
|
|
pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
|
|
token1.* = null;
|
|
token2.* = null;
|
|
p.count += 1;
|
|
|
|
// unlikely
|
|
if (try p.transition(c, token1)) {
|
|
_ = try p.transition(c, token2);
|
|
}
|
|
}
|
|
|
|
// Perform a single transition on the state machine and return any possible token.
|
|
fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
|
|
switch (p.state) {
|
|
.TopLevelBegin => switch (c) {
|
|
'{' => {
|
|
p.stack.push(.object) orelse return error.TooManyNestedItems;
|
|
p.state = .ValueBegin;
|
|
p.after_string_state = .ObjectSeparator;
|
|
|
|
token.* = Token.ObjectBegin;
|
|
},
|
|
'[' => {
|
|
p.stack.push(.array) orelse return error.TooManyNestedItems;
|
|
p.state = .ValueBegin;
|
|
p.after_string_state = .ValueEnd;
|
|
|
|
token.* = Token.ArrayBegin;
|
|
},
|
|
'-' => {
|
|
p.number_is_integer = true;
|
|
p.state = .Number;
|
|
p.after_value_state = .TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'0' => {
|
|
p.number_is_integer = true;
|
|
p.state = .NumberMaybeDotOrExponent;
|
|
p.after_value_state = .TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'1'...'9' => {
|
|
p.number_is_integer = true;
|
|
p.state = .NumberMaybeDigitOrDotOrExponent;
|
|
p.after_value_state = .TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'"' => {
|
|
p.state = .String;
|
|
p.after_value_state = .TopLevelEnd;
|
|
// We don't actually need the following since after_value_state should override.
|
|
p.after_string_state = .ValueEnd;
|
|
p.string_escapes = .None;
|
|
p.string_last_was_high_surrogate = false;
|
|
p.count = 0;
|
|
},
|
|
't' => {
|
|
p.state = .TrueLiteral1;
|
|
p.after_value_state = .TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'f' => {
|
|
p.state = .FalseLiteral1;
|
|
p.after_value_state = .TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
'n' => {
|
|
p.state = .NullLiteral1;
|
|
p.after_value_state = .TopLevelEnd;
|
|
p.count = 0;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidTopLevel;
|
|
},
|
|
},
|
|
|
|
.TopLevelEnd => switch (c) {
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidTopLevelTrailing;
|
|
},
|
|
},
|
|
|
|
.ValueBegin => switch (c) {
|
|
// NOTE: These are shared in ValueEnd as well, think we can reorder states to
|
|
// be a bit clearer and avoid this duplication.
|
|
'}' => {
|
|
const last_type = p.stack.peek() orelse return error.TooManyClosingItems;
|
|
|
|
if (last_type != .object) {
|
|
return error.UnexpectedClosingBrace;
|
|
}
|
|
|
|
_ = p.stack.pop();
|
|
p.state = .ValueBegin;
|
|
p.after_string_state = State.fromAggregateContainerType(last_type);
|
|
|
|
switch (p.stack.len) {
|
|
0 => {
|
|
p.complete = true;
|
|
p.state = .TopLevelEnd;
|
|
},
|
|
else => {
|
|
p.state = .ValueEnd;
|
|
},
|
|
}
|
|
|
|
token.* = Token.ObjectEnd;
|
|
},
|
|
']' => {
|
|
const last_type = p.stack.peek() orelse return error.TooManyClosingItems;
|
|
|
|
if (last_type != .array) {
|
|
return error.UnexpectedClosingBracket;
|
|
}
|
|
|
|
_ = p.stack.pop();
|
|
p.state = .ValueBegin;
|
|
p.after_string_state = State.fromAggregateContainerType(last_type);
|
|
|
|
switch (p.stack.len) {
|
|
0 => {
|
|
p.complete = true;
|
|
p.state = .TopLevelEnd;
|
|
},
|
|
else => {
|
|
p.state = .ValueEnd;
|
|
},
|
|
}
|
|
|
|
token.* = Token.ArrayEnd;
|
|
},
|
|
'{' => {
|
|
p.stack.push(.object) orelse return error.TooManyNestedItems;
|
|
|
|
p.state = .ValueBegin;
|
|
p.after_string_state = .ObjectSeparator;
|
|
|
|
token.* = Token.ObjectBegin;
|
|
},
|
|
'[' => {
|
|
p.stack.push(.array) orelse return error.TooManyNestedItems;
|
|
|
|
p.state = .ValueBegin;
|
|
p.after_string_state = .ValueEnd;
|
|
|
|
token.* = Token.ArrayBegin;
|
|
},
|
|
'-' => {
|
|
p.number_is_integer = true;
|
|
p.state = .Number;
|
|
p.count = 0;
|
|
},
|
|
'0' => {
|
|
p.number_is_integer = true;
|
|
p.state = .NumberMaybeDotOrExponent;
|
|
p.count = 0;
|
|
},
|
|
'1'...'9' => {
|
|
p.number_is_integer = true;
|
|
p.state = .NumberMaybeDigitOrDotOrExponent;
|
|
p.count = 0;
|
|
},
|
|
'"' => {
|
|
p.state = .String;
|
|
p.string_escapes = .None;
|
|
p.string_last_was_high_surrogate = false;
|
|
p.count = 0;
|
|
},
|
|
't' => {
|
|
p.state = .TrueLiteral1;
|
|
p.count = 0;
|
|
},
|
|
'f' => {
|
|
p.state = .FalseLiteral1;
|
|
p.count = 0;
|
|
},
|
|
'n' => {
|
|
p.state = .NullLiteral1;
|
|
p.count = 0;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidValueBegin;
|
|
},
|
|
},
|
|
|
|
// TODO: A bit of duplication here and in the following state, redo.
|
|
.ValueBeginNoClosing => switch (c) {
|
|
'{' => {
|
|
p.stack.push(.object) orelse return error.TooManyNestedItems;
|
|
|
|
p.state = .ValueBegin;
|
|
p.after_string_state = .ObjectSeparator;
|
|
|
|
token.* = Token.ObjectBegin;
|
|
},
|
|
'[' => {
|
|
p.stack.push(.array) orelse return error.TooManyNestedItems;
|
|
|
|
p.state = .ValueBegin;
|
|
p.after_string_state = .ValueEnd;
|
|
|
|
token.* = Token.ArrayBegin;
|
|
},
|
|
'-' => {
|
|
p.number_is_integer = true;
|
|
p.state = .Number;
|
|
p.count = 0;
|
|
},
|
|
'0' => {
|
|
p.number_is_integer = true;
|
|
p.state = .NumberMaybeDotOrExponent;
|
|
p.count = 0;
|
|
},
|
|
'1'...'9' => {
|
|
p.number_is_integer = true;
|
|
p.state = .NumberMaybeDigitOrDotOrExponent;
|
|
p.count = 0;
|
|
},
|
|
'"' => {
|
|
p.state = .String;
|
|
p.string_escapes = .None;
|
|
p.string_last_was_high_surrogate = false;
|
|
p.count = 0;
|
|
},
|
|
't' => {
|
|
p.state = .TrueLiteral1;
|
|
p.count = 0;
|
|
},
|
|
'f' => {
|
|
p.state = .FalseLiteral1;
|
|
p.count = 0;
|
|
},
|
|
'n' => {
|
|
p.state = .NullLiteral1;
|
|
p.count = 0;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidValueBegin;
|
|
},
|
|
},
|
|
|
|
.ValueEnd => switch (c) {
|
|
',' => {
|
|
const last_type = p.stack.peek() orelse unreachable;
|
|
p.after_string_state = State.fromAggregateContainerType(last_type);
|
|
p.state = .ValueBeginNoClosing;
|
|
},
|
|
']' => {
|
|
const last_type = p.stack.peek() orelse return error.TooManyClosingItems;
|
|
|
|
if (last_type != .array) {
|
|
return error.UnexpectedClosingBracket;
|
|
}
|
|
|
|
_ = p.stack.pop();
|
|
p.state = .ValueEnd;
|
|
p.after_string_state = State.fromAggregateContainerType(last_type);
|
|
|
|
if (p.stack.len == 0) {
|
|
p.complete = true;
|
|
p.state = .TopLevelEnd;
|
|
}
|
|
|
|
token.* = Token.ArrayEnd;
|
|
},
|
|
'}' => {
|
|
const last_type = p.stack.peek() orelse return error.TooManyClosingItems;
|
|
|
|
if (last_type != .object) {
|
|
return error.UnexpectedClosingBrace;
|
|
}
|
|
|
|
_ = p.stack.pop();
|
|
p.state = .ValueEnd;
|
|
p.after_string_state = State.fromAggregateContainerType(last_type);
|
|
|
|
if (p.stack.len == 0) {
|
|
p.complete = true;
|
|
p.state = .TopLevelEnd;
|
|
}
|
|
|
|
token.* = Token.ObjectEnd;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidValueEnd;
|
|
},
|
|
},
|
|
|
|
.ObjectSeparator => switch (c) {
|
|
':' => {
|
|
p.state = .ValueBeginNoClosing;
|
|
p.after_string_state = .ValueEnd;
|
|
},
|
|
0x09, 0x0A, 0x0D, 0x20 => {
|
|
// whitespace
|
|
},
|
|
else => {
|
|
return error.InvalidSeparator;
|
|
},
|
|
},
|
|
|
|
.String => switch (c) {
|
|
0x00...0x1F => {
|
|
return error.InvalidControlCharacter;
|
|
},
|
|
'"' => {
|
|
p.state = p.after_string_state;
|
|
if (p.after_value_state == .TopLevelEnd) {
|
|
p.state = .TopLevelEnd;
|
|
p.complete = true;
|
|
}
|
|
|
|
token.* = .{
|
|
.String = .{
|
|
.count = p.count - 1,
|
|
.escapes = p.string_escapes,
|
|
},
|
|
};
|
|
p.string_escapes = undefined;
|
|
p.string_last_was_high_surrogate = undefined;
|
|
},
|
|
'\\' => {
|
|
p.state = .StringEscapeCharacter;
|
|
switch (p.string_escapes) {
|
|
.None => {
|
|
p.string_escapes = .{ .Some = .{ .size_diff = 0 } };
|
|
},
|
|
.Some => {},
|
|
}
|
|
},
|
|
0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => {
|
|
// non-control ascii
|
|
p.string_last_was_high_surrogate = false;
|
|
},
|
|
0xC2...0xDF => {
|
|
p.state = .StringUtf8Byte2Of2;
|
|
},
|
|
0xE0...0xEF => {
|
|
p.state = .StringUtf8Byte2Of3;
|
|
p.sequence_first_byte = c;
|
|
},
|
|
0xF0...0xF4 => {
|
|
p.state = .StringUtf8Byte2Of4;
|
|
p.sequence_first_byte = c;
|
|
},
|
|
else => {
|
|
return error.InvalidUtf8Byte;
|
|
},
|
|
},
|
|
|
|
.StringUtf8Byte2Of2 => switch (c >> 6) {
|
|
0b10 => p.state = .String,
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
.StringUtf8Byte2Of3 => {
|
|
switch (p.sequence_first_byte) {
|
|
0xE0 => switch (c) {
|
|
0xA0...0xBF => {},
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
0xE1...0xEF => switch (c) {
|
|
0x80...0xBF => {},
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
else => return error.InvalidUtf8Byte,
|
|
}
|
|
p.state = .StringUtf8Byte3Of3;
|
|
},
|
|
.StringUtf8Byte3Of3 => switch (c) {
|
|
0x80...0xBF => p.state = .String,
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
.StringUtf8Byte2Of4 => {
|
|
switch (p.sequence_first_byte) {
|
|
0xF0 => switch (c) {
|
|
0x90...0xBF => {},
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
0xF1...0xF3 => switch (c) {
|
|
0x80...0xBF => {},
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
0xF4 => switch (c) {
|
|
0x80...0x8F => {},
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
else => return error.InvalidUtf8Byte,
|
|
}
|
|
p.state = .StringUtf8Byte3Of4;
|
|
},
|
|
.StringUtf8Byte3Of4 => switch (c) {
|
|
0x80...0xBF => p.state = .StringUtf8Byte4Of4,
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
.StringUtf8Byte4Of4 => switch (c) {
|
|
0x80...0xBF => p.state = .String,
|
|
else => return error.InvalidUtf8Byte,
|
|
},
|
|
|
|
.StringEscapeCharacter => switch (c) {
|
|
// NOTE: '/' is allowed as an escaped character but it also is allowed
|
|
// as unescaped according to the RFC. There is a reported errata which suggests
|
|
// removing the non-escaped variant but it makes more sense to simply disallow
|
|
// it as an escape code here.
|
|
//
|
|
// The current JSONTestSuite tests rely on both of this behaviour being present
|
|
// however, so we default to the status quo where both are accepted until this
|
|
// is further clarified.
|
|
'"', '\\', '/', 'b', 'f', 'n', 'r', 't' => {
|
|
p.string_escapes.Some.size_diff -= 1;
|
|
p.state = .String;
|
|
p.string_last_was_high_surrogate = false;
|
|
},
|
|
'u' => {
|
|
p.state = .StringEscapeHexUnicode4;
|
|
},
|
|
else => {
|
|
return error.InvalidEscapeCharacter;
|
|
},
|
|
},
|
|
|
|
.StringEscapeHexUnicode4 => {
|
|
var codepoint: u21 = undefined;
|
|
switch (c) {
|
|
else => return error.InvalidUnicodeHexSymbol,
|
|
'0'...'9' => {
|
|
codepoint = c - '0';
|
|
},
|
|
'A'...'F' => {
|
|
codepoint = c - 'A' + 10;
|
|
},
|
|
'a'...'f' => {
|
|
codepoint = c - 'a' + 10;
|
|
},
|
|
}
|
|
p.state = .StringEscapeHexUnicode3;
|
|
p.string_unicode_codepoint = codepoint << 12;
|
|
},
|
|
|
|
.StringEscapeHexUnicode3 => {
|
|
var codepoint: u21 = undefined;
|
|
switch (c) {
|
|
else => return error.InvalidUnicodeHexSymbol,
|
|
'0'...'9' => {
|
|
codepoint = c - '0';
|
|
},
|
|
'A'...'F' => {
|
|
codepoint = c - 'A' + 10;
|
|
},
|
|
'a'...'f' => {
|
|
codepoint = c - 'a' + 10;
|
|
},
|
|
}
|
|
p.state = .StringEscapeHexUnicode2;
|
|
p.string_unicode_codepoint |= codepoint << 8;
|
|
},
|
|
|
|
.StringEscapeHexUnicode2 => {
|
|
var codepoint: u21 = undefined;
|
|
switch (c) {
|
|
else => return error.InvalidUnicodeHexSymbol,
|
|
'0'...'9' => {
|
|
codepoint = c - '0';
|
|
},
|
|
'A'...'F' => {
|
|
codepoint = c - 'A' + 10;
|
|
},
|
|
'a'...'f' => {
|
|
codepoint = c - 'a' + 10;
|
|
},
|
|
}
|
|
p.state = .StringEscapeHexUnicode1;
|
|
p.string_unicode_codepoint |= codepoint << 4;
|
|
},
|
|
|
|
.StringEscapeHexUnicode1 => {
|
|
var codepoint: u21 = undefined;
|
|
switch (c) {
|
|
else => return error.InvalidUnicodeHexSymbol,
|
|
'0'...'9' => {
|
|
codepoint = c - '0';
|
|
},
|
|
'A'...'F' => {
|
|
codepoint = c - 'A' + 10;
|
|
},
|
|
'a'...'f' => {
|
|
codepoint = c - 'a' + 10;
|
|
},
|
|
}
|
|
p.state = .String;
|
|
p.string_unicode_codepoint |= codepoint;
|
|
if (p.string_unicode_codepoint < 0xD800 or p.string_unicode_codepoint >= 0xE000) {
|
|
// not part of surrogate pair
|
|
p.string_escapes.Some.size_diff -= @as(isize, 6 - (std.unicode.utf8CodepointSequenceLength(p.string_unicode_codepoint) catch unreachable));
|
|
p.string_last_was_high_surrogate = false;
|
|
} else if (p.string_unicode_codepoint < 0xDC00) {
|
|
// 'high' surrogate
|
|
// takes 3 bytes to encode a half surrogate pair into wtf8
|
|
p.string_escapes.Some.size_diff -= 6 - 3;
|
|
p.string_last_was_high_surrogate = true;
|
|
} else {
|
|
// 'low' surrogate
|
|
p.string_escapes.Some.size_diff -= 6;
|
|
if (p.string_last_was_high_surrogate) {
|
|
// takes 4 bytes to encode a full surrogate pair into utf8
|
|
// 3 bytes are already reserved by high surrogate
|
|
p.string_escapes.Some.size_diff -= -1;
|
|
} else {
|
|
// takes 3 bytes to encode a half surrogate pair into wtf8
|
|
p.string_escapes.Some.size_diff -= -3;
|
|
}
|
|
p.string_last_was_high_surrogate = false;
|
|
}
|
|
p.string_unicode_codepoint = undefined;
|
|
},
|
|
|
|
.Number => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
switch (c) {
|
|
'0' => {
|
|
p.state = .NumberMaybeDotOrExponent;
|
|
},
|
|
'1'...'9' => {
|
|
p.state = .NumberMaybeDigitOrDotOrExponent;
|
|
},
|
|
else => {
|
|
return error.InvalidNumber;
|
|
},
|
|
}
|
|
},
|
|
|
|
.NumberMaybeDotOrExponent => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
switch (c) {
|
|
'.' => {
|
|
p.number_is_integer = false;
|
|
p.state = .NumberFractionalRequired;
|
|
},
|
|
'e', 'E' => {
|
|
p.number_is_integer = false;
|
|
p.state = .NumberExponent;
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = .{
|
|
.Number = .{
|
|
.count = p.count,
|
|
.is_integer = p.number_is_integer,
|
|
},
|
|
};
|
|
p.number_is_integer = undefined;
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
.NumberMaybeDigitOrDotOrExponent => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
switch (c) {
|
|
'.' => {
|
|
p.number_is_integer = false;
|
|
p.state = .NumberFractionalRequired;
|
|
},
|
|
'e', 'E' => {
|
|
p.number_is_integer = false;
|
|
p.state = .NumberExponent;
|
|
},
|
|
'0'...'9' => {
|
|
// another digit
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = .{
|
|
.Number = .{
|
|
.count = p.count,
|
|
.is_integer = p.number_is_integer,
|
|
},
|
|
};
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
.NumberFractionalRequired => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
switch (c) {
|
|
'0'...'9' => {
|
|
p.state = .NumberFractional;
|
|
},
|
|
else => {
|
|
return error.InvalidNumber;
|
|
},
|
|
}
|
|
},
|
|
|
|
.NumberFractional => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
switch (c) {
|
|
'0'...'9' => {
|
|
// another digit
|
|
},
|
|
'e', 'E' => {
|
|
p.number_is_integer = false;
|
|
p.state = .NumberExponent;
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = .{
|
|
.Number = .{
|
|
.count = p.count,
|
|
.is_integer = p.number_is_integer,
|
|
},
|
|
};
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
.NumberMaybeExponent => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
switch (c) {
|
|
'e', 'E' => {
|
|
p.number_is_integer = false;
|
|
p.state = .NumberExponent;
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = .{
|
|
.Number = .{
|
|
.count = p.count,
|
|
.is_integer = p.number_is_integer,
|
|
},
|
|
};
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
.NumberExponent => switch (c) {
|
|
'-', '+' => {
|
|
p.complete = false;
|
|
p.state = .NumberExponentDigitsRequired;
|
|
},
|
|
'0'...'9' => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
p.state = .NumberExponentDigits;
|
|
},
|
|
else => {
|
|
return error.InvalidNumber;
|
|
},
|
|
},
|
|
|
|
.NumberExponentDigitsRequired => switch (c) {
|
|
'0'...'9' => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
p.state = .NumberExponentDigits;
|
|
},
|
|
else => {
|
|
return error.InvalidNumber;
|
|
},
|
|
},
|
|
|
|
.NumberExponentDigits => {
|
|
p.complete = p.after_value_state == .TopLevelEnd;
|
|
switch (c) {
|
|
'0'...'9' => {
|
|
// another digit
|
|
},
|
|
else => {
|
|
p.state = p.after_value_state;
|
|
token.* = .{
|
|
.Number = .{
|
|
.count = p.count,
|
|
.is_integer = p.number_is_integer,
|
|
},
|
|
};
|
|
return true;
|
|
},
|
|
}
|
|
},
|
|
|
|
.TrueLiteral1 => switch (c) {
|
|
'r' => p.state = .TrueLiteral2,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
.TrueLiteral2 => switch (c) {
|
|
'u' => p.state = .TrueLiteral3,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
.TrueLiteral3 => switch (c) {
|
|
'e' => {
|
|
p.state = p.after_value_state;
|
|
p.complete = p.state == .TopLevelEnd;
|
|
token.* = Token.True;
|
|
},
|
|
else => {
|
|
return error.InvalidLiteral;
|
|
},
|
|
},
|
|
|
|
.FalseLiteral1 => switch (c) {
|
|
'a' => p.state = .FalseLiteral2,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
.FalseLiteral2 => switch (c) {
|
|
'l' => p.state = .FalseLiteral3,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
.FalseLiteral3 => switch (c) {
|
|
's' => p.state = .FalseLiteral4,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
.FalseLiteral4 => switch (c) {
|
|
'e' => {
|
|
p.state = p.after_value_state;
|
|
p.complete = p.state == .TopLevelEnd;
|
|
token.* = Token.False;
|
|
},
|
|
else => {
|
|
return error.InvalidLiteral;
|
|
},
|
|
},
|
|
|
|
.NullLiteral1 => switch (c) {
|
|
'u' => p.state = .NullLiteral2,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
.NullLiteral2 => switch (c) {
|
|
'l' => p.state = .NullLiteral3,
|
|
else => return error.InvalidLiteral,
|
|
},
|
|
|
|
.NullLiteral3 => switch (c) {
|
|
'l' => {
|
|
p.state = p.after_value_state;
|
|
p.complete = p.state == .TopLevelEnd;
|
|
token.* = Token.Null;
|
|
},
|
|
else => {
|
|
return error.InvalidLiteral;
|
|
},
|
|
},
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
/// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
|
|
pub const TokenStream = struct {
|
|
i: usize,
|
|
slice: []const u8,
|
|
parser: StreamingParser,
|
|
token: ?Token,
|
|
|
|
pub const Error = StreamingParser.Error || error{UnexpectedEndOfJson};
|
|
|
|
pub fn init(slice: []const u8) TokenStream {
|
|
return TokenStream{
|
|
.i = 0,
|
|
.slice = slice,
|
|
.parser = StreamingParser.init(),
|
|
.token = null,
|
|
};
|
|
}
|
|
|
|
fn stackUsed(self: *TokenStream) usize {
|
|
return self.parser.stack.len + if (self.token != null) @as(usize, 1) else 0;
|
|
}
|
|
|
|
pub fn next(self: *TokenStream) Error!?Token {
|
|
if (self.token) |token| {
|
|
self.token = null;
|
|
return token;
|
|
}
|
|
|
|
var t1: ?Token = undefined;
|
|
var t2: ?Token = undefined;
|
|
|
|
while (self.i < self.slice.len) {
|
|
try self.parser.feed(self.slice[self.i], &t1, &t2);
|
|
self.i += 1;
|
|
|
|
if (t1) |token| {
|
|
self.token = t2;
|
|
return token;
|
|
}
|
|
}
|
|
|
|
// Without this a bare number fails, the streaming parser doesn't know the input ended
|
|
try self.parser.feed(' ', &t1, &t2);
|
|
self.i += 1;
|
|
|
|
if (t1) |token| {
|
|
return token;
|
|
} else if (self.parser.complete) {
|
|
return null;
|
|
} else {
|
|
return error.UnexpectedEndOfJson;
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Validate a JSON string. This does not limit number precision so a decoder may not necessarily
|
|
/// be able to decode the string even if this returns true.
|
|
pub fn validate(s: []const u8) bool {
|
|
var p = StreamingParser.init();
|
|
|
|
for (s) |c| {
|
|
var token1: ?Token = undefined;
|
|
var token2: ?Token = undefined;
|
|
|
|
p.feed(c, &token1, &token2) catch {
|
|
return false;
|
|
};
|
|
}
|
|
|
|
return p.complete;
|
|
}
|
|
|
|
const Allocator = std.mem.Allocator;
|
|
const ArenaAllocator = std.heap.ArenaAllocator;
|
|
const ArrayList = std.ArrayList;
|
|
const StringArrayHashMap = std.StringArrayHashMap;
|
|
|
|
pub const ValueTree = struct {
|
|
arena: ArenaAllocator,
|
|
root: Value,
|
|
|
|
pub fn deinit(self: *ValueTree) void {
|
|
self.arena.deinit();
|
|
}
|
|
};
|
|
|
|
pub const ObjectMap = StringArrayHashMap(Value);
|
|
pub const Array = ArrayList(Value);
|
|
|
|
/// Represents a JSON value
|
|
/// Currently only supports numbers that fit into i64 or f64.
|
|
pub const Value = union(enum) {
|
|
Null,
|
|
Bool: bool,
|
|
Integer: i64,
|
|
Float: f64,
|
|
NumberString: []const u8,
|
|
String: []const u8,
|
|
Array: Array,
|
|
Object: ObjectMap,
|
|
|
|
pub fn jsonStringify(
|
|
value: @This(),
|
|
options: StringifyOptions,
|
|
out_stream: anytype,
|
|
) @TypeOf(out_stream).Error!void {
|
|
switch (value) {
|
|
.Null => try stringify(null, options, out_stream),
|
|
.Bool => |inner| try stringify(inner, options, out_stream),
|
|
.Integer => |inner| try stringify(inner, options, out_stream),
|
|
.Float => |inner| try stringify(inner, options, out_stream),
|
|
.NumberString => |inner| try out_stream.writeAll(inner),
|
|
.String => |inner| try stringify(inner, options, out_stream),
|
|
.Array => |inner| try stringify(inner.items, options, out_stream),
|
|
.Object => |inner| {
|
|
try out_stream.writeByte('{');
|
|
var field_output = false;
|
|
var child_options = options;
|
|
if (child_options.whitespace) |*child_whitespace| {
|
|
child_whitespace.indent_level += 1;
|
|
}
|
|
var it = inner.iterator();
|
|
while (it.next()) |entry| {
|
|
if (!field_output) {
|
|
field_output = true;
|
|
} else {
|
|
try out_stream.writeByte(',');
|
|
}
|
|
if (child_options.whitespace) |child_whitespace| {
|
|
try child_whitespace.outputIndent(out_stream);
|
|
}
|
|
|
|
try stringify(entry.key_ptr.*, options, out_stream);
|
|
try out_stream.writeByte(':');
|
|
if (child_options.whitespace) |child_whitespace| {
|
|
if (child_whitespace.separator) {
|
|
try out_stream.writeByte(' ');
|
|
}
|
|
}
|
|
try stringify(entry.value_ptr.*, child_options, out_stream);
|
|
}
|
|
if (field_output) {
|
|
if (options.whitespace) |whitespace| {
|
|
try whitespace.outputIndent(out_stream);
|
|
}
|
|
}
|
|
try out_stream.writeByte('}');
|
|
},
|
|
}
|
|
}
|
|
|
|
pub fn dump(self: Value) void {
|
|
std.debug.getStderrMutex().lock();
|
|
defer std.debug.getStderrMutex().unlock();
|
|
|
|
const stderr = std.io.getStdErr().writer();
|
|
std.json.stringify(self, std.json.StringifyOptions{ .whitespace = null }, stderr) catch return;
|
|
}
|
|
};
|
|
|
|
/// parse tokens from a stream, returning `false` if they do not decode to `value`
|
|
fn parsesTo(comptime T: type, value: T, tokens: *TokenStream, options: ParseOptions) !bool {
|
|
// TODO: should be able to write this function to not require an allocator
|
|
const tmp = try parse(T, tokens, options);
|
|
defer parseFree(T, tmp, options);
|
|
|
|
return parsedEqual(tmp, value);
|
|
}
|
|
|
|
/// Returns if a value returned by `parse` is deep-equal to another value
|
|
fn parsedEqual(a: anytype, b: @TypeOf(a)) bool {
|
|
switch (@typeInfo(@TypeOf(a))) {
|
|
.Optional => {
|
|
if (a == null and b == null) return true;
|
|
if (a == null or b == null) return false;
|
|
return parsedEqual(a.?, b.?);
|
|
},
|
|
.Union => |info| {
|
|
if (info.tag_type) |UnionTag| {
|
|
const tag_a = std.meta.activeTag(a);
|
|
const tag_b = std.meta.activeTag(b);
|
|
if (tag_a != tag_b) return false;
|
|
|
|
inline for (info.fields) |field_info| {
|
|
if (@field(UnionTag, field_info.name) == tag_a) {
|
|
return parsedEqual(@field(a, field_info.name), @field(b, field_info.name));
|
|
}
|
|
}
|
|
return false;
|
|
} else {
|
|
unreachable;
|
|
}
|
|
},
|
|
.Array => {
|
|
for (a) |e, i|
|
|
if (!parsedEqual(e, b[i])) return false;
|
|
return true;
|
|
},
|
|
.Struct => |info| {
|
|
inline for (info.fields) |field_info| {
|
|
if (!parsedEqual(@field(a, field_info.name), @field(b, field_info.name))) return false;
|
|
}
|
|
return true;
|
|
},
|
|
.Pointer => |ptrInfo| switch (ptrInfo.size) {
|
|
.One => return parsedEqual(a.*, b.*),
|
|
.Slice => {
|
|
if (a.len != b.len) return false;
|
|
for (a) |e, i|
|
|
if (!parsedEqual(e, b[i])) return false;
|
|
return true;
|
|
},
|
|
.Many, .C => unreachable,
|
|
},
|
|
else => return a == b,
|
|
}
|
|
unreachable;
|
|
}
|
|
|
|
pub const ParseOptions = struct {
|
|
allocator: ?Allocator = null,
|
|
|
|
/// Behaviour when a duplicate field is encountered.
|
|
duplicate_field_behavior: enum {
|
|
UseFirst,
|
|
Error,
|
|
UseLast,
|
|
} = .Error,
|
|
|
|
/// If false, finding an unknown field returns an error.
|
|
ignore_unknown_fields: bool = false,
|
|
|
|
allow_trailing_data: bool = false,
|
|
};
|
|
|
|
const SkipValueError = error{UnexpectedJsonDepth} || TokenStream.Error;
|
|
|
|
fn skipValue(tokens: *TokenStream) SkipValueError!void {
|
|
const original_depth = tokens.stackUsed();
|
|
|
|
// Return an error if no value is found
|
|
_ = try tokens.next();
|
|
if (tokens.stackUsed() < original_depth) return error.UnexpectedJsonDepth;
|
|
if (tokens.stackUsed() == original_depth) return;
|
|
|
|
while (try tokens.next()) |_| {
|
|
if (tokens.stackUsed() == original_depth) return;
|
|
}
|
|
}
|
|
|
|
fn ParseInternalError(comptime T: type) type {
|
|
// `inferred_types` is used to avoid infinite recursion for recursive type definitions.
|
|
const inferred_types = [_]type{};
|
|
return ParseInternalErrorImpl(T, &inferred_types);
|
|
}
|
|
|
|
fn ParseInternalErrorImpl(comptime T: type, comptime inferred_types: []const type) type {
|
|
for (inferred_types) |ty| {
|
|
if (T == ty) return error{};
|
|
}
|
|
|
|
switch (@typeInfo(T)) {
|
|
.Bool => return error{UnexpectedToken},
|
|
.Float, .ComptimeFloat => return error{UnexpectedToken} || std.fmt.ParseFloatError,
|
|
.Int, .ComptimeInt => {
|
|
return error{ UnexpectedToken, InvalidNumber, Overflow } ||
|
|
std.fmt.ParseIntError || std.fmt.ParseFloatError;
|
|
},
|
|
.Optional => |optionalInfo| {
|
|
return ParseInternalErrorImpl(optionalInfo.child, inferred_types ++ [_]type{T});
|
|
},
|
|
.Enum => return error{ UnexpectedToken, InvalidEnumTag } || std.fmt.ParseIntError ||
|
|
std.meta.IntToEnumError || std.meta.IntToEnumError,
|
|
.Union => |unionInfo| {
|
|
if (unionInfo.tag_type) |_| {
|
|
var errors = error{NoUnionMembersMatched};
|
|
for (unionInfo.fields) |u_field| {
|
|
errors = errors || ParseInternalErrorImpl(u_field.field_type, inferred_types ++ [_]type{T});
|
|
}
|
|
return errors;
|
|
} else {
|
|
@compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
|
|
}
|
|
},
|
|
.Struct => |structInfo| {
|
|
var errors = error{
|
|
DuplicateJSONField,
|
|
UnexpectedEndOfJson,
|
|
UnexpectedToken,
|
|
UnexpectedValue,
|
|
UnknownField,
|
|
MissingField,
|
|
} || SkipValueError || TokenStream.Error;
|
|
for (structInfo.fields) |field| {
|
|
errors = errors || ParseInternalErrorImpl(field.field_type, inferred_types ++ [_]type{T});
|
|
}
|
|
return errors;
|
|
},
|
|
.Array => |arrayInfo| {
|
|
return error{ UnexpectedEndOfJson, UnexpectedToken } || TokenStream.Error ||
|
|
UnescapeValidStringError ||
|
|
ParseInternalErrorImpl(arrayInfo.child, inferred_types ++ [_]type{T});
|
|
},
|
|
.Pointer => |ptrInfo| {
|
|
var errors = error{AllocatorRequired} || std.mem.Allocator.Error;
|
|
switch (ptrInfo.size) {
|
|
.One => {
|
|
return errors || ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T});
|
|
},
|
|
.Slice => {
|
|
return errors || error{ UnexpectedEndOfJson, UnexpectedToken } ||
|
|
ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T}) ||
|
|
UnescapeValidStringError || TokenStream.Error;
|
|
},
|
|
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
|
|
}
|
|
},
|
|
else => return error{},
|
|
}
|
|
unreachable;
|
|
}
|
|
|
|
fn parseInternal(
|
|
comptime T: type,
|
|
token: Token,
|
|
tokens: *TokenStream,
|
|
options: ParseOptions,
|
|
) ParseInternalError(T)!T {
|
|
switch (@typeInfo(T)) {
|
|
.Bool => {
|
|
return switch (token) {
|
|
.True => true,
|
|
.False => false,
|
|
else => error.UnexpectedToken,
|
|
};
|
|
},
|
|
.Float, .ComptimeFloat => {
|
|
switch (token) {
|
|
.Number => |numberToken| return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1)),
|
|
.String => |stringToken| return try std.fmt.parseFloat(T, stringToken.slice(tokens.slice, tokens.i - 1)),
|
|
else => return error.UnexpectedToken,
|
|
}
|
|
},
|
|
.Int, .ComptimeInt => {
|
|
switch (token) {
|
|
.Number => |numberToken| {
|
|
if (numberToken.is_integer)
|
|
return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10);
|
|
const float = try std.fmt.parseFloat(f128, numberToken.slice(tokens.slice, tokens.i - 1));
|
|
if (@round(float) != float) return error.InvalidNumber;
|
|
if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow;
|
|
return @floatToInt(T, float);
|
|
},
|
|
.String => |stringToken| {
|
|
return std.fmt.parseInt(T, stringToken.slice(tokens.slice, tokens.i - 1), 10) catch |err| {
|
|
switch (err) {
|
|
error.Overflow => return err,
|
|
error.InvalidCharacter => {
|
|
const float = try std.fmt.parseFloat(f128, stringToken.slice(tokens.slice, tokens.i - 1));
|
|
if (@round(float) != float) return error.InvalidNumber;
|
|
if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow;
|
|
return @floatToInt(T, float);
|
|
},
|
|
}
|
|
};
|
|
},
|
|
else => return error.UnexpectedToken,
|
|
}
|
|
},
|
|
.Optional => |optionalInfo| {
|
|
if (token == .Null) {
|
|
return null;
|
|
} else {
|
|
return try parseInternal(optionalInfo.child, token, tokens, options);
|
|
}
|
|
},
|
|
.Enum => |enumInfo| {
|
|
switch (token) {
|
|
.Number => |numberToken| {
|
|
if (!numberToken.is_integer) return error.UnexpectedToken;
|
|
const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10);
|
|
return try std.meta.intToEnum(T, n);
|
|
},
|
|
.String => |stringToken| {
|
|
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
|
|
switch (stringToken.escapes) {
|
|
.None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag,
|
|
.Some => {
|
|
inline for (enumInfo.fields) |field| {
|
|
if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) {
|
|
return @field(T, field.name);
|
|
}
|
|
}
|
|
return error.InvalidEnumTag;
|
|
},
|
|
}
|
|
},
|
|
else => return error.UnexpectedToken,
|
|
}
|
|
},
|
|
.Union => |unionInfo| {
|
|
if (unionInfo.tag_type) |_| {
|
|
// try each of the union fields until we find one that matches
|
|
inline for (unionInfo.fields) |u_field| {
|
|
// take a copy of tokens so we can withhold mutations until success
|
|
var tokens_copy = tokens.*;
|
|
if (parseInternal(u_field.field_type, token, &tokens_copy, options)) |value| {
|
|
tokens.* = tokens_copy;
|
|
return @unionInit(T, u_field.name, value);
|
|
} else |err| {
|
|
// Bubble up error.OutOfMemory
|
|
// Parsing some types won't have OutOfMemory in their
|
|
// error-sets, for the condition to be valid, merge it in.
|
|
if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
|
|
// Bubble up AllocatorRequired, as it indicates missing option
|
|
if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err;
|
|
// otherwise continue through the `inline for`
|
|
}
|
|
}
|
|
return error.NoUnionMembersMatched;
|
|
} else {
|
|
@compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
|
|
}
|
|
},
|
|
.Struct => |structInfo| {
|
|
switch (token) {
|
|
.ObjectBegin => {},
|
|
else => return error.UnexpectedToken,
|
|
}
|
|
var r: T = undefined;
|
|
var fields_seen = [_]bool{false} ** structInfo.fields.len;
|
|
errdefer {
|
|
inline for (structInfo.fields) |field, i| {
|
|
if (fields_seen[i] and !field.is_comptime) {
|
|
parseFree(field.field_type, @field(r, field.name), options);
|
|
}
|
|
}
|
|
}
|
|
|
|
while (true) {
|
|
switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) {
|
|
.ObjectEnd => break,
|
|
.String => |stringToken| {
|
|
const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
|
|
var child_options = options;
|
|
child_options.allow_trailing_data = true;
|
|
var found = false;
|
|
inline for (structInfo.fields) |field, i| {
|
|
// TODO: using switches here segfault the compiler (#2727?)
|
|
if ((stringToken.escapes == .None and mem.eql(u8, field.name, key_source_slice)) or (stringToken.escapes == .Some and (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)))) {
|
|
// if (switch (stringToken.escapes) {
|
|
// .None => mem.eql(u8, field.name, key_source_slice),
|
|
// .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)),
|
|
// }) {
|
|
if (fields_seen[i]) {
|
|
// switch (options.duplicate_field_behavior) {
|
|
// .UseFirst => {},
|
|
// .Error => {},
|
|
// .UseLast => {},
|
|
// }
|
|
if (options.duplicate_field_behavior == .UseFirst) {
|
|
// unconditonally ignore value. for comptime fields, this skips check against default_value
|
|
parseFree(field.field_type, try parse(field.field_type, tokens, child_options), child_options);
|
|
found = true;
|
|
break;
|
|
} else if (options.duplicate_field_behavior == .Error) {
|
|
return error.DuplicateJSONField;
|
|
} else if (options.duplicate_field_behavior == .UseLast) {
|
|
if (!field.is_comptime) {
|
|
parseFree(field.field_type, @field(r, field.name), child_options);
|
|
}
|
|
fields_seen[i] = false;
|
|
}
|
|
}
|
|
if (field.is_comptime) {
|
|
if (!try parsesTo(field.field_type, @ptrCast(*align(1) const field.field_type, field.default_value.?).*, tokens, child_options)) {
|
|
return error.UnexpectedValue;
|
|
}
|
|
} else {
|
|
@field(r, field.name) = try parse(field.field_type, tokens, child_options);
|
|
}
|
|
fields_seen[i] = true;
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
if (options.ignore_unknown_fields) {
|
|
try skipValue(tokens);
|
|
continue;
|
|
} else {
|
|
return error.UnknownField;
|
|
}
|
|
}
|
|
},
|
|
else => return error.UnexpectedToken,
|
|
}
|
|
}
|
|
inline for (structInfo.fields) |field, i| {
|
|
if (!fields_seen[i]) {
|
|
if (field.default_value) |default_ptr| {
|
|
if (!field.is_comptime) {
|
|
const default = @ptrCast(*align(1) const field.field_type, default_ptr).*;
|
|
@field(r, field.name) = default;
|
|
}
|
|
} else {
|
|
return error.MissingField;
|
|
}
|
|
}
|
|
}
|
|
return r;
|
|
},
|
|
.Array => |arrayInfo| {
|
|
switch (token) {
|
|
.ArrayBegin => {
|
|
var r: T = undefined;
|
|
var i: usize = 0;
|
|
var child_options = options;
|
|
child_options.allow_trailing_data = true;
|
|
errdefer {
|
|
// Without the r.len check `r[i]` is not allowed
|
|
if (r.len > 0) while (true) : (i -= 1) {
|
|
parseFree(arrayInfo.child, r[i], options);
|
|
if (i == 0) break;
|
|
};
|
|
}
|
|
while (i < r.len) : (i += 1) {
|
|
r[i] = try parse(arrayInfo.child, tokens, child_options);
|
|
}
|
|
const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
|
|
switch (tok) {
|
|
.ArrayEnd => {},
|
|
else => return error.UnexpectedToken,
|
|
}
|
|
return r;
|
|
},
|
|
.String => |stringToken| {
|
|
if (arrayInfo.child != u8) return error.UnexpectedToken;
|
|
var r: T = undefined;
|
|
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
|
|
switch (stringToken.escapes) {
|
|
.None => mem.copy(u8, &r, source_slice),
|
|
.Some => try unescapeValidString(&r, source_slice),
|
|
}
|
|
return r;
|
|
},
|
|
else => return error.UnexpectedToken,
|
|
}
|
|
},
|
|
.Pointer => |ptrInfo| {
|
|
const allocator = options.allocator orelse return error.AllocatorRequired;
|
|
switch (ptrInfo.size) {
|
|
.One => {
|
|
const r: T = try allocator.create(ptrInfo.child);
|
|
errdefer allocator.destroy(r);
|
|
r.* = try parseInternal(ptrInfo.child, token, tokens, options);
|
|
return r;
|
|
},
|
|
.Slice => {
|
|
switch (token) {
|
|
.ArrayBegin => {
|
|
var arraylist = std.ArrayList(ptrInfo.child).init(allocator);
|
|
errdefer {
|
|
while (arraylist.popOrNull()) |v| {
|
|
parseFree(ptrInfo.child, v, options);
|
|
}
|
|
arraylist.deinit();
|
|
}
|
|
|
|
while (true) {
|
|
const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
|
|
switch (tok) {
|
|
.ArrayEnd => break,
|
|
else => {},
|
|
}
|
|
|
|
try arraylist.ensureUnusedCapacity(1);
|
|
const v = try parseInternal(ptrInfo.child, tok, tokens, options);
|
|
arraylist.appendAssumeCapacity(v);
|
|
}
|
|
|
|
if (ptrInfo.sentinel) |some| {
|
|
const sentinel_value = @ptrCast(*align(1) const ptrInfo.child, some).*;
|
|
return try arraylist.toOwnedSliceSentinel(sentinel_value);
|
|
}
|
|
|
|
return try arraylist.toOwnedSlice();
|
|
},
|
|
.String => |stringToken| {
|
|
if (ptrInfo.child != u8) return error.UnexpectedToken;
|
|
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
|
|
const len = stringToken.decodedLength();
|
|
const output = try allocator.alloc(u8, len + @boolToInt(ptrInfo.sentinel != null));
|
|
errdefer allocator.free(output);
|
|
switch (stringToken.escapes) {
|
|
.None => mem.copy(u8, output, source_slice),
|
|
.Some => try unescapeValidString(output, source_slice),
|
|
}
|
|
|
|
if (ptrInfo.sentinel) |some| {
|
|
const char = @ptrCast(*const u8, some).*;
|
|
output[len] = char;
|
|
return output[0..len :char];
|
|
}
|
|
|
|
return output;
|
|
},
|
|
else => return error.UnexpectedToken,
|
|
}
|
|
},
|
|
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
|
|
}
|
|
},
|
|
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
|
|
}
|
|
unreachable;
|
|
}
|
|
|
|
pub fn ParseError(comptime T: type) type {
|
|
return ParseInternalError(T) || error{UnexpectedEndOfJson} || TokenStream.Error;
|
|
}
|
|
|
|
pub fn parse(comptime T: type, tokens: *TokenStream, options: ParseOptions) ParseError(T)!T {
|
|
const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
|
|
const r = try parseInternal(T, token, tokens, options);
|
|
errdefer parseFree(T, r, options);
|
|
if (!options.allow_trailing_data) {
|
|
if ((try tokens.next()) != null) unreachable;
|
|
assert(tokens.i >= tokens.slice.len);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
/// Releases resources created by `parse`.
|
|
/// Should be called with the same type and `ParseOptions` that were passed to `parse`
|
|
pub fn parseFree(comptime T: type, value: T, options: ParseOptions) void {
|
|
switch (@typeInfo(T)) {
|
|
.Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {},
|
|
.Optional => {
|
|
if (value) |v| {
|
|
return parseFree(@TypeOf(v), v, options);
|
|
}
|
|
},
|
|
.Union => |unionInfo| {
|
|
if (unionInfo.tag_type) |UnionTagType| {
|
|
inline for (unionInfo.fields) |u_field| {
|
|
if (value == @field(UnionTagType, u_field.name)) {
|
|
parseFree(u_field.field_type, @field(value, u_field.name), options);
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
unreachable;
|
|
}
|
|
},
|
|
.Struct => |structInfo| {
|
|
inline for (structInfo.fields) |field| {
|
|
if (!field.is_comptime) {
|
|
parseFree(field.field_type, @field(value, field.name), options);
|
|
}
|
|
}
|
|
},
|
|
.Array => |arrayInfo| {
|
|
for (value) |v| {
|
|
parseFree(arrayInfo.child, v, options);
|
|
}
|
|
},
|
|
.Pointer => |ptrInfo| {
|
|
const allocator = options.allocator orelse unreachable;
|
|
switch (ptrInfo.size) {
|
|
.One => {
|
|
parseFree(ptrInfo.child, value.*, options);
|
|
allocator.destroy(value);
|
|
},
|
|
.Slice => {
|
|
for (value) |v| {
|
|
parseFree(ptrInfo.child, v, options);
|
|
}
|
|
allocator.free(value);
|
|
},
|
|
else => unreachable,
|
|
}
|
|
},
|
|
else => unreachable,
|
|
}
|
|
}
|
|
|
|
/// A non-stream JSON parser which constructs a tree of Value's.
|
|
pub const Parser = struct {
|
|
allocator: Allocator,
|
|
state: State,
|
|
copy_strings: bool,
|
|
// Stores parent nodes and un-combined Values.
|
|
stack: Array,
|
|
|
|
const State = enum {
|
|
ObjectKey,
|
|
ObjectValue,
|
|
ArrayValue,
|
|
Simple,
|
|
};
|
|
|
|
pub fn init(allocator: Allocator, copy_strings: bool) Parser {
|
|
return Parser{
|
|
.allocator = allocator,
|
|
.state = .Simple,
|
|
.copy_strings = copy_strings,
|
|
.stack = Array.init(allocator),
|
|
};
|
|
}
|
|
|
|
pub fn deinit(p: *Parser) void {
|
|
p.stack.deinit();
|
|
}
|
|
|
|
pub fn reset(p: *Parser) void {
|
|
p.state = .Simple;
|
|
p.stack.shrinkRetainingCapacity(0);
|
|
}
|
|
|
|
pub fn parse(p: *Parser, input: []const u8) !ValueTree {
|
|
var s = TokenStream.init(input);
|
|
|
|
var arena = ArenaAllocator.init(p.allocator);
|
|
errdefer arena.deinit();
|
|
const allocator = arena.allocator();
|
|
|
|
while (try s.next()) |token| {
|
|
try p.transition(allocator, input, s.i - 1, token);
|
|
}
|
|
|
|
debug.assert(p.stack.items.len == 1);
|
|
|
|
return ValueTree{
|
|
.arena = arena,
|
|
.root = p.stack.items[0],
|
|
};
|
|
}
|
|
|
|
// Even though p.allocator exists, we take an explicit allocator so that allocation state
|
|
// can be cleaned up on error correctly during a `parse` on call.
|
|
fn transition(p: *Parser, allocator: Allocator, input: []const u8, i: usize, token: Token) !void {
|
|
switch (p.state) {
|
|
.ObjectKey => switch (token) {
|
|
.ObjectEnd => {
|
|
if (p.stack.items.len == 1) {
|
|
return;
|
|
}
|
|
|
|
var value = p.stack.pop();
|
|
try p.pushToParent(&value);
|
|
},
|
|
.String => |s| {
|
|
try p.stack.append(try p.parseString(allocator, s, input, i));
|
|
p.state = .ObjectValue;
|
|
},
|
|
else => {
|
|
// The streaming parser would return an error eventually.
|
|
// To prevent invalid state we return an error now.
|
|
// TODO make the streaming parser return an error as soon as it encounters an invalid object key
|
|
return error.InvalidLiteral;
|
|
},
|
|
},
|
|
.ObjectValue => {
|
|
var object = &p.stack.items[p.stack.items.len - 2].Object;
|
|
var key = p.stack.items[p.stack.items.len - 1].String;
|
|
|
|
switch (token) {
|
|
.ObjectBegin => {
|
|
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
|
|
p.state = .ObjectKey;
|
|
},
|
|
.ArrayBegin => {
|
|
try p.stack.append(Value{ .Array = Array.init(allocator) });
|
|
p.state = .ArrayValue;
|
|
},
|
|
.String => |s| {
|
|
try object.put(key, try p.parseString(allocator, s, input, i));
|
|
_ = p.stack.pop();
|
|
p.state = .ObjectKey;
|
|
},
|
|
.Number => |n| {
|
|
try object.put(key, try p.parseNumber(n, input, i));
|
|
_ = p.stack.pop();
|
|
p.state = .ObjectKey;
|
|
},
|
|
.True => {
|
|
try object.put(key, Value{ .Bool = true });
|
|
_ = p.stack.pop();
|
|
p.state = .ObjectKey;
|
|
},
|
|
.False => {
|
|
try object.put(key, Value{ .Bool = false });
|
|
_ = p.stack.pop();
|
|
p.state = .ObjectKey;
|
|
},
|
|
.Null => {
|
|
try object.put(key, Value.Null);
|
|
_ = p.stack.pop();
|
|
p.state = .ObjectKey;
|
|
},
|
|
.ObjectEnd, .ArrayEnd => {
|
|
unreachable;
|
|
},
|
|
}
|
|
},
|
|
.ArrayValue => {
|
|
var array = &p.stack.items[p.stack.items.len - 1].Array;
|
|
|
|
switch (token) {
|
|
.ArrayEnd => {
|
|
if (p.stack.items.len == 1) {
|
|
return;
|
|
}
|
|
|
|
var value = p.stack.pop();
|
|
try p.pushToParent(&value);
|
|
},
|
|
.ObjectBegin => {
|
|
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
|
|
p.state = .ObjectKey;
|
|
},
|
|
.ArrayBegin => {
|
|
try p.stack.append(Value{ .Array = Array.init(allocator) });
|
|
p.state = .ArrayValue;
|
|
},
|
|
.String => |s| {
|
|
try array.append(try p.parseString(allocator, s, input, i));
|
|
},
|
|
.Number => |n| {
|
|
try array.append(try p.parseNumber(n, input, i));
|
|
},
|
|
.True => {
|
|
try array.append(Value{ .Bool = true });
|
|
},
|
|
.False => {
|
|
try array.append(Value{ .Bool = false });
|
|
},
|
|
.Null => {
|
|
try array.append(Value.Null);
|
|
},
|
|
.ObjectEnd => {
|
|
unreachable;
|
|
},
|
|
}
|
|
},
|
|
.Simple => switch (token) {
|
|
.ObjectBegin => {
|
|
try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
|
|
p.state = .ObjectKey;
|
|
},
|
|
.ArrayBegin => {
|
|
try p.stack.append(Value{ .Array = Array.init(allocator) });
|
|
p.state = .ArrayValue;
|
|
},
|
|
.String => |s| {
|
|
try p.stack.append(try p.parseString(allocator, s, input, i));
|
|
},
|
|
.Number => |n| {
|
|
try p.stack.append(try p.parseNumber(n, input, i));
|
|
},
|
|
.True => {
|
|
try p.stack.append(Value{ .Bool = true });
|
|
},
|
|
.False => {
|
|
try p.stack.append(Value{ .Bool = false });
|
|
},
|
|
.Null => {
|
|
try p.stack.append(Value.Null);
|
|
},
|
|
.ObjectEnd, .ArrayEnd => {
|
|
unreachable;
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
fn pushToParent(p: *Parser, value: *const Value) !void {
|
|
switch (p.stack.items[p.stack.items.len - 1]) {
|
|
// Object Parent -> [ ..., object, <key>, value ]
|
|
Value.String => |key| {
|
|
_ = p.stack.pop();
|
|
|
|
var object = &p.stack.items[p.stack.items.len - 1].Object;
|
|
try object.put(key, value.*);
|
|
p.state = .ObjectKey;
|
|
},
|
|
// Array Parent -> [ ..., <array>, value ]
|
|
Value.Array => |*array| {
|
|
try array.append(value.*);
|
|
p.state = .ArrayValue;
|
|
},
|
|
else => {
|
|
unreachable;
|
|
},
|
|
}
|
|
}
|
|
|
|
fn parseString(p: *Parser, allocator: Allocator, s: std.meta.TagPayload(Token, Token.String), input: []const u8, i: usize) !Value {
|
|
const slice = s.slice(input, i);
|
|
switch (s.escapes) {
|
|
.None => return Value{ .String = if (p.copy_strings) try allocator.dupe(u8, slice) else slice },
|
|
.Some => {
|
|
const output = try allocator.alloc(u8, s.decodedLength());
|
|
errdefer allocator.free(output);
|
|
try unescapeValidString(output, slice);
|
|
return Value{ .String = output };
|
|
},
|
|
}
|
|
}
|
|
|
|
fn parseNumber(p: *Parser, n: std.meta.TagPayload(Token, Token.Number), input: []const u8, i: usize) !Value {
|
|
_ = p;
|
|
return if (n.is_integer)
|
|
Value{
|
|
.Integer = std.fmt.parseInt(i64, n.slice(input, i), 10) catch |e| switch (e) {
|
|
error.Overflow => return Value{ .NumberString = n.slice(input, i) },
|
|
error.InvalidCharacter => |err| return err,
|
|
},
|
|
}
|
|
else
|
|
Value{ .Float = try std.fmt.parseFloat(f64, n.slice(input, i)) };
|
|
}
|
|
};
|
|
|
|
pub const UnescapeValidStringError = error{InvalidUnicodeHexSymbol};
|
|
|
|
/// Unescape a JSON string
|
|
/// Only to be used on strings already validated by the parser
|
|
/// (note the unreachable statements and lack of bounds checking)
|
|
pub fn unescapeValidString(output: []u8, input: []const u8) UnescapeValidStringError!void {
|
|
var inIndex: usize = 0;
|
|
var outIndex: usize = 0;
|
|
|
|
while (inIndex < input.len) {
|
|
if (input[inIndex] != '\\') {
|
|
// not an escape sequence
|
|
output[outIndex] = input[inIndex];
|
|
inIndex += 1;
|
|
outIndex += 1;
|
|
} else if (input[inIndex + 1] != 'u') {
|
|
// a simple escape sequence
|
|
output[outIndex] = @as(u8, switch (input[inIndex + 1]) {
|
|
'\\' => '\\',
|
|
'/' => '/',
|
|
'n' => '\n',
|
|
'r' => '\r',
|
|
't' => '\t',
|
|
'f' => 12,
|
|
'b' => 8,
|
|
'"' => '"',
|
|
else => unreachable,
|
|
});
|
|
inIndex += 2;
|
|
outIndex += 1;
|
|
} else {
|
|
// a unicode escape sequence
|
|
const firstCodeUnit = std.fmt.parseInt(u16, input[inIndex + 2 .. inIndex + 6], 16) catch unreachable;
|
|
|
|
// guess optimistically that it's not a surrogate pair
|
|
if (std.unicode.utf8Encode(firstCodeUnit, output[outIndex..])) |byteCount| {
|
|
outIndex += byteCount;
|
|
inIndex += 6;
|
|
} else |err| {
|
|
// it might be a surrogate pair
|
|
if (err != error.Utf8CannotEncodeSurrogateHalf) {
|
|
return error.InvalidUnicodeHexSymbol;
|
|
}
|
|
// check if a second code unit is present
|
|
if (inIndex + 7 >= input.len or input[inIndex + 6] != '\\' or input[inIndex + 7] != 'u') {
|
|
return error.InvalidUnicodeHexSymbol;
|
|
}
|
|
|
|
const secondCodeUnit = std.fmt.parseInt(u16, input[inIndex + 8 .. inIndex + 12], 16) catch unreachable;
|
|
|
|
const utf16le_seq = [2]u16{
|
|
mem.nativeToLittle(u16, firstCodeUnit),
|
|
mem.nativeToLittle(u16, secondCodeUnit),
|
|
};
|
|
if (std.unicode.utf16leToUtf8(output[outIndex..], &utf16le_seq)) |byteCount| {
|
|
outIndex += byteCount;
|
|
inIndex += 12;
|
|
} else |_| {
|
|
return error.InvalidUnicodeHexSymbol;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
assert(outIndex == output.len);
|
|
}
|
|
|
|
pub const StringifyOptions = struct {
|
|
pub const Whitespace = struct {
|
|
/// How many indentation levels deep are we?
|
|
indent_level: usize = 0,
|
|
|
|
/// What character(s) should be used for indentation?
|
|
indent: union(enum) {
|
|
Space: u8,
|
|
Tab: void,
|
|
None: void,
|
|
} = .{ .Space = 4 },
|
|
|
|
/// After a colon, should whitespace be inserted?
|
|
separator: bool = true,
|
|
|
|
pub fn outputIndent(
|
|
whitespace: @This(),
|
|
out_stream: anytype,
|
|
) @TypeOf(out_stream).Error!void {
|
|
var char: u8 = undefined;
|
|
var n_chars: usize = undefined;
|
|
switch (whitespace.indent) {
|
|
.Space => |n_spaces| {
|
|
char = ' ';
|
|
n_chars = n_spaces;
|
|
},
|
|
.Tab => {
|
|
char = '\t';
|
|
n_chars = 1;
|
|
},
|
|
.None => return,
|
|
}
|
|
try out_stream.writeByte('\n');
|
|
n_chars *= whitespace.indent_level;
|
|
try out_stream.writeByteNTimes(char, n_chars);
|
|
}
|
|
};
|
|
|
|
/// Controls the whitespace emitted
|
|
whitespace: ?Whitespace = null,
|
|
|
|
/// Should optional fields with null value be written?
|
|
emit_null_optional_fields: bool = true,
|
|
|
|
string: StringOptions = StringOptions{ .String = .{} },
|
|
|
|
/// Should []u8 be serialised as a string? or an array?
|
|
pub const StringOptions = union(enum) {
|
|
Array,
|
|
String: StringOutputOptions,
|
|
|
|
/// String output options
|
|
const StringOutputOptions = struct {
|
|
/// Should '/' be escaped in strings?
|
|
escape_solidus: bool = false,
|
|
|
|
/// Should unicode characters be escaped in strings?
|
|
escape_unicode: bool = false,
|
|
};
|
|
};
|
|
};
|
|
|
|
fn outputUnicodeEscape(
|
|
codepoint: u21,
|
|
out_stream: anytype,
|
|
) !void {
|
|
if (codepoint <= 0xFFFF) {
|
|
// If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
|
|
// then it may be represented as a six-character sequence: a reverse solidus, followed
|
|
// by the lowercase letter u, followed by four hexadecimal digits that encode the character's code point.
|
|
try out_stream.writeAll("\\u");
|
|
try std.fmt.formatIntValue(codepoint, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
|
} else {
|
|
assert(codepoint <= 0x10FFFF);
|
|
// To escape an extended character that is not in the Basic Multilingual Plane,
|
|
// the character is represented as a 12-character sequence, encoding the UTF-16 surrogate pair.
|
|
const high = @intCast(u16, (codepoint - 0x10000) >> 10) + 0xD800;
|
|
const low = @intCast(u16, codepoint & 0x3FF) + 0xDC00;
|
|
try out_stream.writeAll("\\u");
|
|
try std.fmt.formatIntValue(high, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
|
try out_stream.writeAll("\\u");
|
|
try std.fmt.formatIntValue(low, "x", std.fmt.FormatOptions{ .width = 4, .fill = '0' }, out_stream);
|
|
}
|
|
}
|
|
|
|
/// Write `string` to `writer` as a JSON encoded string.
|
|
pub fn encodeJsonString(string: []const u8, options: StringifyOptions, writer: anytype) !void {
|
|
try writer.writeByte('\"');
|
|
try encodeJsonStringChars(string, options, writer);
|
|
try writer.writeByte('\"');
|
|
}
|
|
|
|
/// Write `chars` to `writer` as JSON encoded string characters.
|
|
pub fn encodeJsonStringChars(chars: []const u8, options: StringifyOptions, writer: anytype) !void {
|
|
var i: usize = 0;
|
|
while (i < chars.len) : (i += 1) {
|
|
switch (chars[i]) {
|
|
// normal ascii character
|
|
0x20...0x21, 0x23...0x2E, 0x30...0x5B, 0x5D...0x7F => |c| try writer.writeByte(c),
|
|
// only 2 characters that *must* be escaped
|
|
'\\' => try writer.writeAll("\\\\"),
|
|
'\"' => try writer.writeAll("\\\""),
|
|
// solidus is optional to escape
|
|
'/' => {
|
|
if (options.string.String.escape_solidus) {
|
|
try writer.writeAll("\\/");
|
|
} else {
|
|
try writer.writeByte('/');
|
|
}
|
|
},
|
|
// control characters with short escapes
|
|
// TODO: option to switch between unicode and 'short' forms?
|
|
0x8 => try writer.writeAll("\\b"),
|
|
0xC => try writer.writeAll("\\f"),
|
|
'\n' => try writer.writeAll("\\n"),
|
|
'\r' => try writer.writeAll("\\r"),
|
|
'\t' => try writer.writeAll("\\t"),
|
|
else => {
|
|
const ulen = std.unicode.utf8ByteSequenceLength(chars[i]) catch unreachable;
|
|
// control characters (only things left with 1 byte length) should always be printed as unicode escapes
|
|
if (ulen == 1 or options.string.String.escape_unicode) {
|
|
const codepoint = std.unicode.utf8Decode(chars[i .. i + ulen]) catch unreachable;
|
|
try outputUnicodeEscape(codepoint, writer);
|
|
} else {
|
|
try writer.writeAll(chars[i .. i + ulen]);
|
|
}
|
|
i += ulen - 1;
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn stringify(
|
|
value: anytype,
|
|
options: StringifyOptions,
|
|
out_stream: anytype,
|
|
) @TypeOf(out_stream).Error!void {
|
|
const T = @TypeOf(value);
|
|
switch (@typeInfo(T)) {
|
|
.Float, .ComptimeFloat => {
|
|
return std.fmt.formatFloatScientific(value, std.fmt.FormatOptions{}, out_stream);
|
|
},
|
|
.Int, .ComptimeInt => {
|
|
return std.fmt.formatIntValue(value, "", std.fmt.FormatOptions{}, out_stream);
|
|
},
|
|
.Bool => {
|
|
return out_stream.writeAll(if (value) "true" else "false");
|
|
},
|
|
.Null => {
|
|
return out_stream.writeAll("null");
|
|
},
|
|
.Optional => {
|
|
if (value) |payload| {
|
|
return try stringify(payload, options, out_stream);
|
|
} else {
|
|
return try stringify(null, options, out_stream);
|
|
}
|
|
},
|
|
.Enum => {
|
|
if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
|
|
return value.jsonStringify(options, out_stream);
|
|
}
|
|
|
|
@compileError("Unable to stringify enum '" ++ @typeName(T) ++ "'");
|
|
},
|
|
.Union => {
|
|
if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
|
|
return value.jsonStringify(options, out_stream);
|
|
}
|
|
|
|
const info = @typeInfo(T).Union;
|
|
if (info.tag_type) |UnionTagType| {
|
|
inline for (info.fields) |u_field| {
|
|
if (value == @field(UnionTagType, u_field.name)) {
|
|
return try stringify(@field(value, u_field.name), options, out_stream);
|
|
}
|
|
}
|
|
} else {
|
|
@compileError("Unable to stringify untagged union '" ++ @typeName(T) ++ "'");
|
|
}
|
|
},
|
|
.Struct => |S| {
|
|
if (comptime std.meta.trait.hasFn("jsonStringify")(T)) {
|
|
return value.jsonStringify(options, out_stream);
|
|
}
|
|
|
|
try out_stream.writeByte('{');
|
|
var field_output = false;
|
|
var child_options = options;
|
|
if (child_options.whitespace) |*child_whitespace| {
|
|
child_whitespace.indent_level += 1;
|
|
}
|
|
inline for (S.fields) |Field| {
|
|
// don't include void fields
|
|
if (Field.field_type == void) continue;
|
|
|
|
var emit_field = true;
|
|
|
|
// don't include optional fields that are null when emit_null_optional_fields is set to false
|
|
if (@typeInfo(Field.field_type) == .Optional) {
|
|
if (options.emit_null_optional_fields == false) {
|
|
if (@field(value, Field.name) == null) {
|
|
emit_field = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (emit_field) {
|
|
if (!field_output) {
|
|
field_output = true;
|
|
} else {
|
|
try out_stream.writeByte(',');
|
|
}
|
|
if (child_options.whitespace) |child_whitespace| {
|
|
try child_whitespace.outputIndent(out_stream);
|
|
}
|
|
try encodeJsonString(Field.name, options, out_stream);
|
|
try out_stream.writeByte(':');
|
|
if (child_options.whitespace) |child_whitespace| {
|
|
if (child_whitespace.separator) {
|
|
try out_stream.writeByte(' ');
|
|
}
|
|
}
|
|
try stringify(@field(value, Field.name), child_options, out_stream);
|
|
}
|
|
}
|
|
if (field_output) {
|
|
if (options.whitespace) |whitespace| {
|
|
try whitespace.outputIndent(out_stream);
|
|
}
|
|
}
|
|
try out_stream.writeByte('}');
|
|
return;
|
|
},
|
|
.ErrorSet => return stringify(@as([]const u8, @errorName(value)), options, out_stream),
|
|
.Pointer => |ptr_info| switch (ptr_info.size) {
|
|
.One => switch (@typeInfo(ptr_info.child)) {
|
|
.Array => {
|
|
const Slice = []const std.meta.Elem(ptr_info.child);
|
|
return stringify(@as(Slice, value), options, out_stream);
|
|
},
|
|
else => {
|
|
// TODO: avoid loops?
|
|
return stringify(value.*, options, out_stream);
|
|
},
|
|
},
|
|
// TODO: .Many when there is a sentinel (waiting for https://github.com/ziglang/zig/pull/3972)
|
|
.Slice => {
|
|
if (ptr_info.child == u8 and options.string == .String and std.unicode.utf8ValidateSlice(value)) {
|
|
try encodeJsonString(value, options, out_stream);
|
|
return;
|
|
}
|
|
|
|
try out_stream.writeByte('[');
|
|
var child_options = options;
|
|
if (child_options.whitespace) |*whitespace| {
|
|
whitespace.indent_level += 1;
|
|
}
|
|
for (value) |x, i| {
|
|
if (i != 0) {
|
|
try out_stream.writeByte(',');
|
|
}
|
|
if (child_options.whitespace) |child_whitespace| {
|
|
try child_whitespace.outputIndent(out_stream);
|
|
}
|
|
try stringify(x, child_options, out_stream);
|
|
}
|
|
if (value.len != 0) {
|
|
if (options.whitespace) |whitespace| {
|
|
try whitespace.outputIndent(out_stream);
|
|
}
|
|
}
|
|
try out_stream.writeByte(']');
|
|
return;
|
|
},
|
|
else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
|
|
},
|
|
.Array => return stringify(&value, options, out_stream),
|
|
.Vector => |info| {
|
|
const array: [info.len]info.child = value;
|
|
return stringify(&array, options, out_stream);
|
|
},
|
|
else => @compileError("Unable to stringify type '" ++ @typeName(T) ++ "'"),
|
|
}
|
|
unreachable;
|
|
}
|
|
|
|
// Same as `stringify` but accepts an Allocator and stores result in dynamically allocated memory instead of using a Writer.
|
|
// Caller owns returned memory.
|
|
pub fn stringifyAlloc(allocator: std.mem.Allocator, value: anytype, options: StringifyOptions) ![]const u8 {
|
|
var list = std.ArrayList(u8).init(allocator);
|
|
errdefer list.deinit();
|
|
try stringify(value, options, list.writer());
|
|
return list.toOwnedSlice();
|
|
}
|
|
|
|
test {
|
|
if (builtin.zig_backend != .stage1) {
|
|
// https://github.com/ziglang/zig/issues/8442
|
|
_ = @import("json/test.zig");
|
|
}
|
|
_ = @import("json/write_stream.zig");
|
|
}
|
|
|
|
test "stringify null optional fields" {
|
|
const MyStruct = struct {
|
|
optional: ?[]const u8 = null,
|
|
required: []const u8 = "something",
|
|
another_optional: ?[]const u8 = null,
|
|
another_required: []const u8 = "something else",
|
|
};
|
|
try teststringify(
|
|
\\{"optional":null,"required":"something","another_optional":null,"another_required":"something else"}
|
|
,
|
|
MyStruct{},
|
|
StringifyOptions{},
|
|
);
|
|
try teststringify(
|
|
\\{"required":"something","another_required":"something else"}
|
|
,
|
|
MyStruct{},
|
|
StringifyOptions{ .emit_null_optional_fields = false },
|
|
);
|
|
|
|
var ts = TokenStream.init(
|
|
\\{"required":"something","another_required":"something else"}
|
|
);
|
|
try std.testing.expect(try parsesTo(MyStruct, MyStruct{}, &ts, .{
|
|
.allocator = std.testing.allocator,
|
|
}));
|
|
}
|
|
|
|
test "skipValue" {
|
|
var ts = TokenStream.init("false");
|
|
try skipValue(&ts);
|
|
ts = TokenStream.init("true");
|
|
try skipValue(&ts);
|
|
ts = TokenStream.init("null");
|
|
try skipValue(&ts);
|
|
ts = TokenStream.init("42");
|
|
try skipValue(&ts);
|
|
ts = TokenStream.init("42.0");
|
|
try skipValue(&ts);
|
|
ts = TokenStream.init("\"foo\"");
|
|
try skipValue(&ts);
|
|
ts = TokenStream.init("[101, 111, 121]");
|
|
try skipValue(&ts);
|
|
ts = TokenStream.init("{}");
|
|
try skipValue(&ts);
|
|
ts = TokenStream.init("{\"foo\": \"bar\"}");
|
|
try skipValue(&ts);
|
|
|
|
{ // An absurd number of nestings
|
|
const nestings = StreamingParser.default_max_nestings + 1;
|
|
|
|
ts = TokenStream.init("[" ** nestings ++ "]" ** nestings);
|
|
try testing.expectError(error.TooManyNestedItems, skipValue(&ts));
|
|
}
|
|
|
|
{ // Would a number token cause problems in a deeply-nested array?
|
|
const nestings = StreamingParser.default_max_nestings;
|
|
const deeply_nested_array = "[" ** nestings ++ "0.118, 999, 881.99, 911.9, 725, 3" ++ "]" ** nestings;
|
|
|
|
ts = TokenStream.init(deeply_nested_array);
|
|
try skipValue(&ts);
|
|
|
|
ts = TokenStream.init("[" ++ deeply_nested_array ++ "]");
|
|
try testing.expectError(error.TooManyNestedItems, skipValue(&ts));
|
|
}
|
|
|
|
// Mismatched brace/square bracket
|
|
ts = TokenStream.init("[102, 111, 111}");
|
|
try testing.expectError(error.UnexpectedClosingBrace, skipValue(&ts));
|
|
|
|
{ // should fail if no value found (e.g. immediate close of object)
|
|
var empty_object = TokenStream.init("{}");
|
|
assert(.ObjectBegin == (try empty_object.next()).?);
|
|
try testing.expectError(error.UnexpectedJsonDepth, skipValue(&empty_object));
|
|
|
|
var empty_array = TokenStream.init("[]");
|
|
assert(.ArrayBegin == (try empty_array.next()).?);
|
|
try testing.expectError(error.UnexpectedJsonDepth, skipValue(&empty_array));
|
|
}
|
|
}
|
|
|
|
test "stringify basic types" {
|
|
try teststringify("false", false, StringifyOptions{});
|
|
try teststringify("true", true, StringifyOptions{});
|
|
try teststringify("null", @as(?u8, null), StringifyOptions{});
|
|
try teststringify("null", @as(?*u32, null), StringifyOptions{});
|
|
try teststringify("42", 42, StringifyOptions{});
|
|
try teststringify("4.2e+01", 42.0, StringifyOptions{});
|
|
try teststringify("42", @as(u8, 42), StringifyOptions{});
|
|
try teststringify("42", @as(u128, 42), StringifyOptions{});
|
|
try teststringify("4.2e+01", @as(f32, 42), StringifyOptions{});
|
|
try teststringify("4.2e+01", @as(f64, 42), StringifyOptions{});
|
|
try teststringify("\"ItBroke\"", @as(anyerror, error.ItBroke), StringifyOptions{});
|
|
}
|
|
|
|
test "stringify string" {
|
|
try teststringify("\"hello\"", "hello", StringifyOptions{});
|
|
try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{});
|
|
try teststringify("\"with\\nescapes\\r\"", "with\nescapes\r", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\u0001\"", "with unicode\u{1}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\u{80}\"", "with unicode\u{80}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\u0080\"", "with unicode\u{80}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\u{FF}\"", "with unicode\u{FF}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\u00ff\"", "with unicode\u{FF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\u{100}\"", "with unicode\u{100}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\u0100\"", "with unicode\u{100}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\u{800}\"", "with unicode\u{800}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\u0800\"", "with unicode\u{800}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\u{8000}\"", "with unicode\u{8000}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\u8000\"", "with unicode\u{8000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\u{D799}\"", "with unicode\u{D799}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\ud799\"", "with unicode\u{D799}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\u{10000}\"", "with unicode\u{10000}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\ud800\\udc00\"", "with unicode\u{10000}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"with unicode\u{10FFFF}\"", "with unicode\u{10FFFF}", StringifyOptions{});
|
|
try teststringify("\"with unicode\\udbff\\udfff\"", "with unicode\u{10FFFF}", StringifyOptions{ .string = .{ .String = .{ .escape_unicode = true } } });
|
|
try teststringify("\"/\"", "/", StringifyOptions{});
|
|
try teststringify("\"\\/\"", "/", StringifyOptions{ .string = .{ .String = .{ .escape_solidus = true } } });
|
|
}
|
|
|
|
test "stringify tagged unions" {
|
|
try teststringify("42", union(enum) {
|
|
Foo: u32,
|
|
Bar: bool,
|
|
}{ .Foo = 42 }, StringifyOptions{});
|
|
}
|
|
|
|
test "stringify struct" {
|
|
try teststringify("{\"foo\":42}", struct {
|
|
foo: u32,
|
|
}{ .foo = 42 }, StringifyOptions{});
|
|
}
|
|
|
|
test "stringify struct with string as array" {
|
|
try teststringify("{\"foo\":\"bar\"}", .{ .foo = "bar" }, StringifyOptions{});
|
|
try teststringify("{\"foo\":[98,97,114]}", .{ .foo = "bar" }, StringifyOptions{ .string = .Array });
|
|
}
|
|
|
|
test "stringify struct with indentation" {
|
|
try teststringify(
|
|
\\{
|
|
\\ "foo": 42,
|
|
\\ "bar": [
|
|
\\ 1,
|
|
\\ 2,
|
|
\\ 3
|
|
\\ ]
|
|
\\}
|
|
,
|
|
struct {
|
|
foo: u32,
|
|
bar: [3]u32,
|
|
}{
|
|
.foo = 42,
|
|
.bar = .{ 1, 2, 3 },
|
|
},
|
|
StringifyOptions{
|
|
.whitespace = .{},
|
|
},
|
|
);
|
|
try teststringify(
|
|
"{\n\t\"foo\":42,\n\t\"bar\":[\n\t\t1,\n\t\t2,\n\t\t3\n\t]\n}",
|
|
struct {
|
|
foo: u32,
|
|
bar: [3]u32,
|
|
}{
|
|
.foo = 42,
|
|
.bar = .{ 1, 2, 3 },
|
|
},
|
|
StringifyOptions{
|
|
.whitespace = .{
|
|
.indent = .Tab,
|
|
.separator = false,
|
|
},
|
|
},
|
|
);
|
|
try teststringify(
|
|
\\{"foo":42,"bar":[1,2,3]}
|
|
,
|
|
struct {
|
|
foo: u32,
|
|
bar: [3]u32,
|
|
}{
|
|
.foo = 42,
|
|
.bar = .{ 1, 2, 3 },
|
|
},
|
|
StringifyOptions{
|
|
.whitespace = .{
|
|
.indent = .None,
|
|
.separator = false,
|
|
},
|
|
},
|
|
);
|
|
}
|
|
|
|
test "stringify struct with void field" {
|
|
try teststringify("{\"foo\":42}", struct {
|
|
foo: u32,
|
|
bar: void = {},
|
|
}{ .foo = 42 }, StringifyOptions{});
|
|
}
|
|
|
|
test "stringify array of structs" {
|
|
const MyStruct = struct {
|
|
foo: u32,
|
|
};
|
|
try teststringify("[{\"foo\":42},{\"foo\":100},{\"foo\":1000}]", [_]MyStruct{
|
|
MyStruct{ .foo = 42 },
|
|
MyStruct{ .foo = 100 },
|
|
MyStruct{ .foo = 1000 },
|
|
}, StringifyOptions{});
|
|
}
|
|
|
|
test "stringify struct with custom stringifier" {
|
|
try teststringify("[\"something special\",42]", struct {
|
|
foo: u32,
|
|
const Self = @This();
|
|
pub fn jsonStringify(
|
|
value: Self,
|
|
options: StringifyOptions,
|
|
out_stream: anytype,
|
|
) !void {
|
|
_ = value;
|
|
try out_stream.writeAll("[\"something special\",");
|
|
try stringify(42, options, out_stream);
|
|
try out_stream.writeByte(']');
|
|
}
|
|
}{ .foo = 42 }, StringifyOptions{});
|
|
}
|
|
|
|
test "stringify vector" {
|
|
try teststringify("[1,1]", @splat(2, @as(u32, 1)), StringifyOptions{});
|
|
}
|
|
|
|
fn teststringify(expected: []const u8, value: anytype, options: StringifyOptions) !void {
|
|
const ValidationWriter = struct {
|
|
const Self = @This();
|
|
pub const Writer = std.io.Writer(*Self, Error, write);
|
|
pub const Error = error{
|
|
TooMuchData,
|
|
DifferentData,
|
|
};
|
|
|
|
expected_remaining: []const u8,
|
|
|
|
fn init(exp: []const u8) Self {
|
|
return .{ .expected_remaining = exp };
|
|
}
|
|
|
|
pub fn writer(self: *Self) Writer {
|
|
return .{ .context = self };
|
|
}
|
|
|
|
fn write(self: *Self, bytes: []const u8) Error!usize {
|
|
if (self.expected_remaining.len < bytes.len) {
|
|
std.debug.print(
|
|
\\====== expected this output: =========
|
|
\\{s}
|
|
\\======== instead found this: =========
|
|
\\{s}
|
|
\\======================================
|
|
, .{
|
|
self.expected_remaining,
|
|
bytes,
|
|
});
|
|
return error.TooMuchData;
|
|
}
|
|
if (!mem.eql(u8, self.expected_remaining[0..bytes.len], bytes)) {
|
|
std.debug.print(
|
|
\\====== expected this output: =========
|
|
\\{s}
|
|
\\======== instead found this: =========
|
|
\\{s}
|
|
\\======================================
|
|
, .{
|
|
self.expected_remaining[0..bytes.len],
|
|
bytes,
|
|
});
|
|
return error.DifferentData;
|
|
}
|
|
self.expected_remaining = self.expected_remaining[bytes.len..];
|
|
return bytes.len;
|
|
}
|
|
};
|
|
|
|
var vos = ValidationWriter.init(expected);
|
|
try stringify(value, options, vos.writer());
|
|
if (vos.expected_remaining.len > 0) return error.NotEnoughData;
|
|
}
|
|
|
|
test "encodesTo" {
|
|
// same
|
|
try testing.expectEqual(true, encodesTo("false", "false"));
|
|
// totally different
|
|
try testing.expectEqual(false, encodesTo("false", "true"));
|
|
// different lengths
|
|
try testing.expectEqual(false, encodesTo("false", "other"));
|
|
// with escape
|
|
try testing.expectEqual(true, encodesTo("\\", "\\\\"));
|
|
try testing.expectEqual(true, encodesTo("with\nescape", "with\\nescape"));
|
|
// with unicode
|
|
try testing.expectEqual(true, encodesTo("ą", "\\u0105"));
|
|
try testing.expectEqual(true, encodesTo("😂", "\\ud83d\\ude02"));
|
|
try testing.expectEqual(true, encodesTo("withąunicode😂", "with\\u0105unicode\\ud83d\\ude02"));
|
|
}
|