const std = @import("std"); const mem = std.mem; pub const Source = struct { buffer: []const u8, file_name: []const u8, tokens: TokenList, pub const TokenList = std.SegmentedList(Token, 64); }; pub const Token = struct { id: Id, start: usize, end: usize, source: *Source, pub const Id = union(enum) { Invalid, Eof, Nl, Identifier, /// special case for #include <...> MacroString, StringLiteral: StrKind, CharLiteral: StrKind, IntegerLiteral: NumSuffix, FloatLiteral: NumSuffix, Bang, BangEqual, Pipe, PipePipe, PipeEqual, Equal, EqualEqual, LParen, RParen, LBrace, RBrace, LBracket, RBracket, Period, Ellipsis, Caret, CaretEqual, Plus, PlusPlus, PlusEqual, Minus, MinusMinus, MinusEqual, Asterisk, AsteriskEqual, Percent, PercentEqual, Arrow, Colon, Semicolon, Slash, SlashEqual, Comma, Ampersand, AmpersandAmpersand, AmpersandEqual, QuestionMark, AngleBracketLeft, AngleBracketLeftEqual, AngleBracketAngleBracketLeft, AngleBracketAngleBracketLeftEqual, AngleBracketRight, AngleBracketRightEqual, AngleBracketAngleBracketRight, AngleBracketAngleBracketRightEqual, Tilde, LineComment, MultiLineComment, Hash, HashHash, Keyword_auto, Keyword_break, Keyword_case, Keyword_char, Keyword_const, Keyword_continue, Keyword_default, Keyword_do, Keyword_double, Keyword_else, Keyword_enum, Keyword_extern, Keyword_float, Keyword_for, Keyword_goto, Keyword_if, Keyword_int, Keyword_long, Keyword_register, Keyword_return, Keyword_short, Keyword_signed, Keyword_sizeof, Keyword_static, Keyword_struct, Keyword_switch, Keyword_typedef, Keyword_union, Keyword_unsigned, Keyword_void, Keyword_volatile, Keyword_while, // ISO C99 Keyword_bool, Keyword_complex, Keyword_imaginary, Keyword_inline, Keyword_restrict, // ISO C11 Keyword_alignas, Keyword_alignof, Keyword_atomic, Keyword_generic, Keyword_noreturn, Keyword_static_assert, Keyword_thread_local, // Preprocessor directives Keyword_include, Keyword_define, Keyword_ifdef, Keyword_ifndef, Keyword_error, Keyword_pragma, pub fn symbol(id: @TagType(Id)) []const u8 { return switch (id) { .Invalid => "Invalid", .Eof => "Eof", .Nl => "NewLine", .Identifier => "Identifier", .MacroString => "MacroString", .StringLiteral => "StringLiteral", .CharLiteral => "CharLiteral", .IntegerLiteral => "IntegerLiteral", .FloatLiteral => "FloatLiteral", .LineComment => "LineComment", .MultiLineComment => "MultiLineComment", .Bang => "!", .BangEqual => "!=", .Pipe => "|", .PipePipe => "||", .PipeEqual => "|=", .Equal => "=", .EqualEqual => "==", .LParen => "(", .RParen => ")", .LBrace => "{", .RBrace => "}", .LBracket => "[", .RBracket => "]", .Period => ".", .Ellipsis => "...", .Caret => "^", .CaretEqual => "^=", .Plus => "+", .PlusPlus => "++", .PlusEqual => "+=", .Minus => "-", .MinusMinus => "--", .MinusEqual => "-=", .Asterisk => "*", .AsteriskEqual => "*=", .Percent => "%", .PercentEqual => "%=", .Arrow => "->", .Colon => ":", .Semicolon => ";", .Slash => "/", .SlashEqual => "/=", .Comma => ",", .Ampersand => "&", .AmpersandAmpersand => "&&", .AmpersandEqual => "&=", .QuestionMark => "?", .AngleBracketLeft => "<", .AngleBracketLeftEqual => "<=", .AngleBracketAngleBracketLeft => "<<", .AngleBracketAngleBracketLeftEqual => "<<=", .AngleBracketRight => ">", .AngleBracketRightEqual => ">=", .AngleBracketAngleBracketRight => ">>", .AngleBracketAngleBracketRightEqual => ">>=", .Tilde => "~", .Hash => "#", .HashHash => "##", .Keyword_auto => "auto", .Keyword_break => "break", .Keyword_case => "case", .Keyword_char => "char", .Keyword_const => "const", .Keyword_continue => "continue", .Keyword_default => "default", .Keyword_do => "do", .Keyword_double => "double", .Keyword_else => "else", .Keyword_enum => "enum", .Keyword_extern => "extern", .Keyword_float => "float", .Keyword_for => "for", .Keyword_goto => "goto", .Keyword_if => "if", .Keyword_int => "int", .Keyword_long => "long", .Keyword_register => "register", .Keyword_return => "return", .Keyword_short => "short", .Keyword_signed => "signed", .Keyword_sizeof => "sizeof", .Keyword_static => "static", .Keyword_struct => "struct", .Keyword_switch => "switch", .Keyword_typedef => "typedef", .Keyword_union => "union", .Keyword_unsigned => "unsigned", .Keyword_void => "void", .Keyword_volatile => "volatile", .Keyword_while => "while", .Keyword_bool => "_Bool", .Keyword_complex => "_Complex", .Keyword_imaginary => "_Imaginary", .Keyword_inline => "inline", .Keyword_restrict => "restrict", .Keyword_alignas => "_Alignas", .Keyword_alignof => "_Alignof", .Keyword_atomic => "_Atomic", .Keyword_generic => "_Generic", .Keyword_noreturn => "_Noreturn", .Keyword_static_assert => "_Static_assert", .Keyword_thread_local => "_Thread_local", .Keyword_include => "include", .Keyword_define => "define", .Keyword_ifdef => "ifdef", .Keyword_ifndef => "ifndef", .Keyword_error => "error", .Keyword_pragma => "pragma", }; } }; pub fn eql(a: Token, b: Token) bool { // do we really need this cast here if (@as(@TagType(Id), a.id) != b.id) return false; return mem.eql(u8, a.slice(), b.slice()); } pub fn slice(tok: Token) []const u8 { return tok.source.buffer[tok.start..tok.end]; } pub const Keyword = struct { bytes: []const u8, id: Id, hash: u32, fn init(bytes: []const u8, id: Id) Keyword { @setEvalBranchQuota(2000); return .{ .bytes = bytes, .id = id, .hash = std.hash_map.hashString(bytes), }; } }; // TODO extensions pub const keywords = [_]Keyword{ Keyword.init("auto", .Keyword_auto), Keyword.init("break", .Keyword_break), Keyword.init("case", .Keyword_case), Keyword.init("char", .Keyword_char), Keyword.init("const", .Keyword_const), Keyword.init("continue", .Keyword_continue), Keyword.init("default", .Keyword_default), Keyword.init("do", .Keyword_do), Keyword.init("double", .Keyword_double), Keyword.init("else", .Keyword_else), Keyword.init("enum", .Keyword_enum), Keyword.init("extern", .Keyword_extern), Keyword.init("float", .Keyword_float), Keyword.init("for", .Keyword_for), Keyword.init("goto", .Keyword_goto), Keyword.init("if", .Keyword_if), Keyword.init("int", .Keyword_int), Keyword.init("long", .Keyword_long), Keyword.init("register", .Keyword_register), Keyword.init("return", .Keyword_return), Keyword.init("short", .Keyword_short), Keyword.init("signed", .Keyword_signed), Keyword.init("sizeof", .Keyword_sizeof), Keyword.init("static", .Keyword_static), Keyword.init("struct", .Keyword_struct), Keyword.init("switch", .Keyword_switch), Keyword.init("typedef", .Keyword_typedef), Keyword.init("union", .Keyword_union), Keyword.init("unsigned", .Keyword_unsigned), Keyword.init("void", .Keyword_void), Keyword.init("volatile", .Keyword_volatile), Keyword.init("while", .Keyword_while), // ISO C99 Keyword.init("_Bool", .Keyword_bool), Keyword.init("_Complex", .Keyword_complex), Keyword.init("_Imaginary", .Keyword_imaginary), Keyword.init("inline", .Keyword_inline), Keyword.init("restrict", .Keyword_restrict), // ISO C11 Keyword.init("_Alignas", .Keyword_alignas), Keyword.init("_Alignof", .Keyword_alignof), Keyword.init("_Atomic", .Keyword_atomic), Keyword.init("_Generic", .Keyword_generic), Keyword.init("_Noreturn", .Keyword_noreturn), Keyword.init("_Static_assert", .Keyword_static_assert), Keyword.init("_Thread_local", .Keyword_thread_local), // Preprocessor directives Keyword.init("include", .Keyword_include), Keyword.init("define", .Keyword_define), Keyword.init("ifdef", .Keyword_ifdef), Keyword.init("ifndef", .Keyword_ifndef), Keyword.init("error", .Keyword_error), Keyword.init("pragma", .Keyword_pragma), }; // TODO perfect hash at comptime // TODO do this in the preprocessor pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id { var hash = std.hash_map.hashString(bytes); for (keywords) |kw| { if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) { switch (kw.id) { .Keyword_include, .Keyword_define, .Keyword_ifdef, .Keyword_ifndef, .Keyword_error, .Keyword_pragma, => if (!pp_directive) return null, else => {}, } return kw.id; } } return null; } pub const NumSuffix = enum { None, F, L, U, LU, LL, LLU, }; pub const StrKind = enum { None, Wide, Utf8, Utf16, Utf32, }; }; pub const Tokenizer = struct { source: *Source, index: usize = 0, prev_tok_id: @TagType(Token.Id) = .Invalid, pp_directive: bool = false, pub fn next(self: *Tokenizer) Token { const start_index = self.index; var result = Token{ .id = .Eof, .start = self.index, .end = undefined, .source = self.source, }; var state: enum { Start, Cr, BackSlash, BackSlashCr, u, u8, U, L, StringLiteral, CharLiteralStart, CharLiteral, EscapeSequence, CrEscape, OctalEscape, HexEscape, UnicodeEscape, Identifier, Equal, Bang, Pipe, Percent, Asterisk, Plus, /// special case for #include <...> MacroString, AngleBracketLeft, AngleBracketAngleBracketLeft, AngleBracketRight, AngleBracketAngleBracketRight, Caret, Period, Period2, Minus, Slash, Ampersand, Hash, LineComment, MultiLineComment, MultiLineCommentAsterisk, Zero, IntegerLiteralOct, IntegerLiteralBinary, IntegerLiteralHex, IntegerLiteral, IntegerSuffix, IntegerSuffixU, IntegerSuffixL, IntegerSuffixLL, IntegerSuffixUL, FloatFraction, FloatFractionHex, FloatExponent, FloatExponentDigits, FloatSuffix, } = .Start; var string = false; var counter: u32 = 0; while (self.index < self.source.buffer.len) : (self.index += 1) { const c = self.source.buffer[self.index]; switch (state) { .Start => switch (c) { '\n' => { self.pp_directive = false; result.id = .Nl; self.index += 1; break; }, '\r' => { state = .Cr; }, '"' => { result.id = .{ .StringLiteral = .None }; state = .StringLiteral; }, '\'' => { result.id = .{ .CharLiteral = .None }; state = .CharLiteralStart; }, 'u' => { state = .u; }, 'U' => { state = .U; }, 'L' => { state = .L; }, 'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => { state = .Identifier; }, '=' => { state = .Equal; }, '!' => { state = .Bang; }, '|' => { state = .Pipe; }, '(' => { result.id = .LParen; self.index += 1; break; }, ')' => { result.id = .RParen; self.index += 1; break; }, '[' => { result.id = .LBracket; self.index += 1; break; }, ']' => { result.id = .RBracket; self.index += 1; break; }, ';' => { result.id = .Semicolon; self.index += 1; break; }, ',' => { result.id = .Comma; self.index += 1; break; }, '?' => { result.id = .QuestionMark; self.index += 1; break; }, ':' => { result.id = .Colon; self.index += 1; break; }, '%' => { state = .Percent; }, '*' => { state = .Asterisk; }, '+' => { state = .Plus; }, '<' => { if (self.prev_tok_id == .Keyword_include) state = .MacroString else state = .AngleBracketLeft; }, '>' => { state = .AngleBracketRight; }, '^' => { state = .Caret; }, '{' => { result.id = .LBrace; self.index += 1; break; }, '}' => { result.id = .RBrace; self.index += 1; break; }, '~' => { result.id = .Tilde; self.index += 1; break; }, '.' => { state = .Period; }, '-' => { state = .Minus; }, '/' => { state = .Slash; }, '&' => { state = .Ampersand; }, '#' => { state = .Hash; }, '0' => { state = .Zero; }, '1'...'9' => { state = .IntegerLiteral; }, '\\' => { state = .BackSlash; }, '\t', '\x0B', '\x0C', ' ' => { result.start = self.index + 1; }, else => { // TODO handle invalid bytes better result.id = .Invalid; self.index += 1; break; }, }, .Cr => switch (c) { '\n' => { self.pp_directive = false; result.id = .Nl; self.index += 1; break; }, else => { result.id = .Invalid; break; }, }, .BackSlash => switch (c) { '\n' => { result.start = self.index + 1; state = .Start; }, '\r' => { state = .BackSlashCr; }, '\t', '\x0B', '\x0C', ' ' => { // TODO warn }, else => { result.id = .Invalid; break; }, }, .BackSlashCr => switch (c) { '\n' => { result.start = self.index + 1; state = .Start; }, else => { result.id = .Invalid; break; }, }, .u => switch (c) { '8' => { state = .u8; }, '\'' => { result.id = .{ .CharLiteral = .Utf16 }; state = .CharLiteralStart; }, '\"' => { result.id = .{ .StringLiteral = .Utf16 }; state = .StringLiteral; }, else => { self.index -= 1; state = .Identifier; }, }, .u8 => switch (c) { '\"' => { result.id = .{ .StringLiteral = .Utf8 }; state = .StringLiteral; }, else => { self.index -= 1; state = .Identifier; }, }, .U => switch (c) { '\'' => { result.id = .{ .CharLiteral = .Utf32 }; state = .CharLiteralStart; }, '\"' => { result.id = .{ .StringLiteral = .Utf32 }; state = .StringLiteral; }, else => { self.index -= 1; state = .Identifier; }, }, .L => switch (c) { '\'' => { result.id = .{ .CharLiteral = .Wide }; state = .CharLiteralStart; }, '\"' => { result.id = .{ .StringLiteral = .Wide }; state = .StringLiteral; }, else => { self.index -= 1; state = .Identifier; }, }, .StringLiteral => switch (c) { '\\' => { string = true; state = .EscapeSequence; }, '"' => { self.index += 1; break; }, '\n', '\r' => { result.id = .Invalid; break; }, else => {}, }, .CharLiteralStart => switch (c) { '\\' => { string = false; state = .EscapeSequence; }, '\'', '\n' => { result.id = .Invalid; break; }, else => { state = .CharLiteral; }, }, .CharLiteral => switch (c) { '\\' => { string = false; state = .EscapeSequence; }, '\'' => { self.index += 1; break; }, '\n' => { result.id = .Invalid; break; }, else => {}, }, .EscapeSequence => switch (c) { '\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v', '\n' => { state = if (string) .StringLiteral else .CharLiteral; }, '\r' => { state = .CrEscape; }, '0'...'7' => { counter = 1; state = .OctalEscape; }, 'x' => { state = .HexEscape; }, 'u' => { counter = 4; state = .OctalEscape; }, 'U' => { counter = 8; state = .OctalEscape; }, else => { result.id = .Invalid; break; }, }, .CrEscape => switch (c) { '\n' => { state = if (string) .StringLiteral else .CharLiteral; }, else => { result.id = .Invalid; break; }, }, .OctalEscape => switch (c) { '0'...'7' => { counter += 1; if (counter == 3) { state = if (string) .StringLiteral else .CharLiteral; } }, else => { self.index -= 1; state = if (string) .StringLiteral else .CharLiteral; }, }, .HexEscape => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => {}, else => { self.index -= 1; state = if (string) .StringLiteral else .CharLiteral; }, }, .UnicodeEscape => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => { counter -= 1; if (counter == 0) { state = if (string) .StringLiteral else .CharLiteral; } }, else => { if (counter != 0) { result.id = .Invalid; break; } self.index -= 1; state = if (string) .StringLiteral else .CharLiteral; }, }, .Identifier => switch (c) { 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, else => { result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; if (self.prev_tok_id == .Hash) self.pp_directive = true; break; }, }, .Equal => switch (c) { '=' => { result.id = .EqualEqual; self.index += 1; break; }, else => { result.id = .Equal; break; }, }, .Bang => switch (c) { '=' => { result.id = .BangEqual; self.index += 1; break; }, else => { result.id = .Bang; break; }, }, .Pipe => switch (c) { '=' => { result.id = .PipeEqual; self.index += 1; break; }, '|' => { result.id = .PipePipe; self.index += 1; break; }, else => { result.id = .Pipe; break; }, }, .Percent => switch (c) { '=' => { result.id = .PercentEqual; self.index += 1; break; }, else => { result.id = .Percent; break; }, }, .Asterisk => switch (c) { '=' => { result.id = .AsteriskEqual; self.index += 1; break; }, else => { result.id = .Asterisk; break; }, }, .Plus => switch (c) { '=' => { result.id = .PlusEqual; self.index += 1; break; }, '+' => { result.id = .PlusPlus; self.index += 1; break; }, else => { result.id = .Plus; break; }, }, .MacroString => switch (c) { '>' => { result.id = .MacroString; self.index += 1; break; }, else => {}, }, .AngleBracketLeft => switch (c) { '<' => { state = .AngleBracketAngleBracketLeft; }, '=' => { result.id = .AngleBracketLeftEqual; self.index += 1; break; }, else => { result.id = .AngleBracketLeft; break; }, }, .AngleBracketAngleBracketLeft => switch (c) { '=' => { result.id = .AngleBracketAngleBracketLeftEqual; self.index += 1; break; }, else => { result.id = .AngleBracketAngleBracketLeft; break; }, }, .AngleBracketRight => switch (c) { '>' => { state = .AngleBracketAngleBracketRight; }, '=' => { result.id = .AngleBracketRightEqual; self.index += 1; break; }, else => { result.id = .AngleBracketRight; break; }, }, .AngleBracketAngleBracketRight => switch (c) { '=' => { result.id = .AngleBracketAngleBracketRightEqual; self.index += 1; break; }, else => { result.id = .AngleBracketAngleBracketRight; break; }, }, .Caret => switch (c) { '=' => { result.id = .CaretEqual; self.index += 1; break; }, else => { result.id = .Caret; break; }, }, .Period => switch (c) { '.' => { state = .Period2; }, '0'...'9' => { state = .FloatFraction; }, else => { result.id = .Period; break; }, }, .Period2 => switch (c) { '.' => { result.id = .Ellipsis; self.index += 1; break; }, else => { result.id = .Period; self.index -= 1; break; }, }, .Minus => switch (c) { '>' => { result.id = .Arrow; self.index += 1; break; }, '=' => { result.id = .MinusEqual; self.index += 1; break; }, '-' => { result.id = .MinusMinus; self.index += 1; break; }, else => { result.id = .Minus; break; }, }, .Slash => switch (c) { '/' => { state = .LineComment; }, '*' => { state = .MultiLineComment; }, '=' => { result.id = .SlashEqual; self.index += 1; break; }, else => { result.id = .Slash; break; }, }, .Ampersand => switch (c) { '&' => { result.id = .AmpersandAmpersand; self.index += 1; break; }, '=' => { result.id = .AmpersandEqual; self.index += 1; break; }, else => { result.id = .Ampersand; break; }, }, .Hash => switch (c) { '#' => { result.id = .HashHash; self.index += 1; break; }, else => { result.id = .Hash; break; }, }, .LineComment => switch (c) { '\n' => { result.id = .LineComment; break; }, else => {}, }, .MultiLineComment => switch (c) { '*' => { state = .MultiLineCommentAsterisk; }, else => {}, }, .MultiLineCommentAsterisk => switch (c) { '/' => { result.id = .MultiLineComment; self.index += 1; break; }, else => { state = .MultiLineComment; }, }, .Zero => switch (c) { '0'...'9' => { state = .IntegerLiteralOct; }, 'b', 'B' => { state = .IntegerLiteralBinary; }, 'x', 'X' => { state = .IntegerLiteralHex; }, '.' => { state = .FloatFraction; }, else => { state = .IntegerSuffix; self.index -= 1; }, }, .IntegerLiteralOct => switch (c) { '0'...'7' => {}, else => { state = .IntegerSuffix; self.index -= 1; }, }, .IntegerLiteralBinary => switch (c) { '0', '1' => {}, else => { state = .IntegerSuffix; self.index -= 1; }, }, .IntegerLiteralHex => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => {}, '.' => { state = .FloatFractionHex; }, 'p', 'P' => { state = .FloatExponent; }, else => { state = .IntegerSuffix; self.index -= 1; }, }, .IntegerLiteral => switch (c) { '0'...'9' => {}, '.' => { state = .FloatFraction; }, 'e', 'E' => { state = .FloatExponent; }, else => { state = .IntegerSuffix; self.index -= 1; }, }, .IntegerSuffix => switch (c) { 'u', 'U' => { state = .IntegerSuffixU; }, 'l', 'L' => { state = .IntegerSuffixL; }, else => { result.id = .{ .IntegerLiteral = .None }; break; }, }, .IntegerSuffixU => switch (c) { 'l', 'L' => { state = .IntegerSuffixUL; }, else => { result.id = .{ .IntegerLiteral = .U }; break; }, }, .IntegerSuffixL => switch (c) { 'l', 'L' => { state = .IntegerSuffixLL; }, 'u', 'U' => { result.id = .{ .IntegerLiteral = .LU }; self.index += 1; break; }, else => { result.id = .{ .IntegerLiteral = .L }; break; }, }, .IntegerSuffixLL => switch (c) { 'u', 'U' => { result.id = .{ .IntegerLiteral = .LLU }; self.index += 1; break; }, else => { result.id = .{ .IntegerLiteral = .LL }; break; }, }, .IntegerSuffixUL => switch (c) { 'l', 'L' => { result.id = .{ .IntegerLiteral = .LLU }; self.index += 1; break; }, else => { result.id = .{ .IntegerLiteral = .LU }; break; }, }, .FloatFraction => switch (c) { '0'...'9' => {}, 'e', 'E' => { state = .FloatExponent; }, else => { self.index -= 1; state = .FloatSuffix; }, }, .FloatFractionHex => switch (c) { '0'...'9', 'a'...'f', 'A'...'F' => {}, 'p', 'P' => { state = .FloatExponent; }, else => { result.id = .Invalid; break; }, }, .FloatExponent => switch (c) { '+', '-' => { state = .FloatExponentDigits; }, else => { self.index -= 1; state = .FloatExponentDigits; }, }, .FloatExponentDigits => switch (c) { '0'...'9' => { counter += 1; }, else => { if (counter == 0) { result.id = .Invalid; break; } self.index -= 1; state = .FloatSuffix; }, }, .FloatSuffix => switch (c) { 'l', 'L' => { result.id = .{ .FloatLiteral = .L }; self.index += 1; break; }, 'f', 'F' => { result.id = .{ .FloatLiteral = .F }; self.index += 1; break; }, else => { result.id = .{ .FloatLiteral = .None }; break; }, }, } } else if (self.index == self.source.buffer.len) { switch (state) { .Start => {}, .u, .u8, .U, .L, .Identifier => { result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; }, .Cr, .BackSlash, .BackSlashCr, .Period2, .StringLiteral, .CharLiteralStart, .CharLiteral, .EscapeSequence, .CrEscape, .OctalEscape, .HexEscape, .UnicodeEscape, .MultiLineComment, .MultiLineCommentAsterisk, .FloatExponent, .MacroString, => result.id = .Invalid, .FloatExponentDigits => result.id = if (counter == 0) .Invalid else .{ .FloatLiteral = .None }, .FloatFraction, .FloatFractionHex, => result.id = .{ .FloatLiteral = .None }, .IntegerLiteralOct, .IntegerLiteralBinary, .IntegerLiteralHex, .IntegerLiteral, .IntegerSuffix, .Zero, => result.id = .{ .IntegerLiteral = .None }, .IntegerSuffixU => result.id = .{ .IntegerLiteral = .U }, .IntegerSuffixL => result.id = .{ .IntegerLiteral = .L }, .IntegerSuffixLL => result.id = .{ .IntegerLiteral = .LL }, .IntegerSuffixUL => result.id = .{ .IntegerLiteral = .LU }, .FloatSuffix => result.id = .{ .FloatLiteral = .None }, .Equal => result.id = .Equal, .Bang => result.id = .Bang, .Minus => result.id = .Minus, .Slash => result.id = .Slash, .Ampersand => result.id = .Ampersand, .Hash => result.id = .Hash, .Period => result.id = .Period, .Pipe => result.id = .Pipe, .AngleBracketAngleBracketRight => result.id = .AngleBracketAngleBracketRight, .AngleBracketRight => result.id = .AngleBracketRight, .AngleBracketAngleBracketLeft => result.id = .AngleBracketAngleBracketLeft, .AngleBracketLeft => result.id = .AngleBracketLeft, .Plus => result.id = .Plus, .Percent => result.id = .Percent, .Caret => result.id = .Caret, .Asterisk => result.id = .Asterisk, .LineComment => result.id = .LineComment, } } self.prev_tok_id = result.id; result.end = self.index; return result; } }; test "operators" { expectTokens( \\ ! != | || |= = == \\ ( ) { } [ ] . .. ... \\ ^ ^= + ++ += - -- -= \\ * *= % %= -> : ; / /= \\ , & && &= ? < <= << \\ <<= > >= >> >>= ~ # ## \\ , &[_]Token.Id{ .Bang, .BangEqual, .Pipe, .PipePipe, .PipeEqual, .Equal, .EqualEqual, .Nl, .LParen, .RParen, .LBrace, .RBrace, .LBracket, .RBracket, .Period, .Period, .Period, .Ellipsis, .Nl, .Caret, .CaretEqual, .Plus, .PlusPlus, .PlusEqual, .Minus, .MinusMinus, .MinusEqual, .Nl, .Asterisk, .AsteriskEqual, .Percent, .PercentEqual, .Arrow, .Colon, .Semicolon, .Slash, .SlashEqual, .Nl, .Comma, .Ampersand, .AmpersandAmpersand, .AmpersandEqual, .QuestionMark, .AngleBracketLeft, .AngleBracketLeftEqual, .AngleBracketAngleBracketLeft, .Nl, .AngleBracketAngleBracketLeftEqual, .AngleBracketRight, .AngleBracketRightEqual, .AngleBracketAngleBracketRight, .AngleBracketAngleBracketRightEqual, .Tilde, .Hash, .HashHash, .Nl, }); } test "keywords" { expectTokens( \\auto break case char const continue default do \\double else enum extern float for goto if int \\long register return short signed sizeof static \\struct switch typedef union unsigned void volatile \\while _Bool _Complex _Imaginary inline restrict _Alignas \\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local \\ , &[_]Token.Id{ .Keyword_auto, .Keyword_break, .Keyword_case, .Keyword_char, .Keyword_const, .Keyword_continue, .Keyword_default, .Keyword_do, .Nl, .Keyword_double, .Keyword_else, .Keyword_enum, .Keyword_extern, .Keyword_float, .Keyword_for, .Keyword_goto, .Keyword_if, .Keyword_int, .Nl, .Keyword_long, .Keyword_register, .Keyword_return, .Keyword_short, .Keyword_signed, .Keyword_sizeof, .Keyword_static, .Nl, .Keyword_struct, .Keyword_switch, .Keyword_typedef, .Keyword_union, .Keyword_unsigned, .Keyword_void, .Keyword_volatile, .Nl, .Keyword_while, .Keyword_bool, .Keyword_complex, .Keyword_imaginary, .Keyword_inline, .Keyword_restrict, .Keyword_alignas, .Nl, .Keyword_alignof, .Keyword_atomic, .Keyword_generic, .Keyword_noreturn, .Keyword_static_assert, .Keyword_thread_local, .Nl, }); } test "preprocessor keywords" { expectTokens( \\#include \\#define #include <1 \\#ifdef \\#ifndef \\#error \\#pragma \\ , &[_]Token.Id{ .Hash, .Keyword_include, .MacroString, .Nl, .Hash, .Keyword_define, .Hash, .Identifier, .AngleBracketLeft, .{ .IntegerLiteral = .None }, .Nl, .Hash, .Keyword_ifdef, .Nl, .Hash, .Keyword_ifndef, .Nl, .Hash, .Keyword_error, .Nl, .Hash, .Keyword_pragma, .Nl, }); } test "line continuation" { expectTokens( \\#define foo \ \\ bar \\"foo\ \\ bar" \\#define "foo" \\ "bar" \\#define "foo" \ \\ "bar" , &[_]Token.Id{ .Hash, .Keyword_define, .Identifier, .Identifier, .Nl, .{ .StringLiteral = .None }, .Nl, .Hash, .Keyword_define, .{ .StringLiteral = .None }, .Nl, .{ .StringLiteral = .None }, .Nl, .Hash, .Keyword_define, .{ .StringLiteral = .None }, .{ .StringLiteral = .None }, }); } test "string prefix" { expectTokens( \\"foo" \\u"foo" \\u8"foo" \\U"foo" \\L"foo" \\'foo' \\u'foo' \\U'foo' \\L'foo' \\ , &[_]Token.Id{ .{ .StringLiteral = .None }, .Nl, .{ .StringLiteral = .Utf16 }, .Nl, .{ .StringLiteral = .Utf8 }, .Nl, .{ .StringLiteral = .Utf32 }, .Nl, .{ .StringLiteral = .Wide }, .Nl, .{ .CharLiteral = .None }, .Nl, .{ .CharLiteral = .Utf16 }, .Nl, .{ .CharLiteral = .Utf32 }, .Nl, .{ .CharLiteral = .Wide }, .Nl, }); } test "num suffixes" { expectTokens( \\ 1.0f 1.0L 1.0 .0 1. \\ 0l 0lu 0ll 0llu 0 \\ 1u 1ul 1ull 1 \\ , &[_]Token.Id{ .{ .FloatLiteral = .F }, .{ .FloatLiteral = .L }, .{ .FloatLiteral = .None }, .{ .FloatLiteral = .None }, .{ .FloatLiteral = .None }, .Nl, .{ .IntegerLiteral = .L }, .{ .IntegerLiteral = .LU }, .{ .IntegerLiteral = .LL }, .{ .IntegerLiteral = .LLU }, .{ .IntegerLiteral = .None }, .Nl, .{ .IntegerLiteral = .U }, .{ .IntegerLiteral = .LU }, .{ .IntegerLiteral = .LLU }, .{ .IntegerLiteral = .None }, .Nl, }); } fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void { var tokenizer = Tokenizer{ .source = &Source{ .buffer = source, .file_name = undefined, .tokens = undefined, }, }; for (expected_tokens) |expected_token_id| { const token = tokenizer.next(); if (!std.meta.eql(token.id, expected_token_id)) { std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) }); } } const last_token = tokenizer.next(); std.testing.expect(last_token.id == .Eof); }