mirror of
https://github.com/ziglang/zig.git
synced 2024-11-14 16:13:24 +00:00
generated parser understands tuples
This commit is contained in:
parent
4ecb37a8a4
commit
72be61fc0a
@ -26,6 +26,8 @@ readable, safe, optimal, and concise code to solve any computing problem.
|
||||
* Resilient to parsing errors to make IDE integration work well.
|
||||
* Source code is UTF-8.
|
||||
* Shebang line OK so language can be used for "scripting" as well.
|
||||
* Ability to mark functions as test and automatically run them in test mode.
|
||||
* Memory zeroed by default, unless you initialize with "uninitialized".
|
||||
|
||||
## Roadmap
|
||||
|
||||
|
@ -36,7 +36,7 @@ void buf_appendf(Buf *buf, const char *format, ...) {
|
||||
|
||||
int orig_len = buf_len(buf);
|
||||
|
||||
buf_resize(buf, orig_len + required_size);
|
||||
buf_resize(buf, orig_len + len1);
|
||||
|
||||
int len2 = vsnprintf(buf_ptr(buf) + orig_len, required_size, format, ap2);
|
||||
assert(len2 == len1);
|
||||
|
@ -55,6 +55,14 @@ static inline void buf_init_from_mem(Buf *buf, const char *ptr, int len) {
|
||||
buf->list.at(buf_len(buf)) = 0;
|
||||
}
|
||||
|
||||
static inline void buf_init_from_str(Buf *buf, const char *str) {
|
||||
buf_init_from_mem(buf, str, strlen(str));
|
||||
}
|
||||
|
||||
static inline void buf_init_from_buf(Buf *buf, Buf *other) {
|
||||
buf_init_from_mem(buf, buf_ptr(other), buf_len(other));
|
||||
}
|
||||
|
||||
static inline Buf *buf_create_from_mem(const char *ptr, int len) {
|
||||
Buf *buf = allocate<Buf>(1);
|
||||
buf_init_from_mem(buf, ptr, len);
|
||||
|
@ -28,7 +28,6 @@ static int usage(const char *arg0) {
|
||||
fprintf(stderr, "Usage: %s [command] [options] target\n"
|
||||
"Commands:\n"
|
||||
" build create an executable from target\n"
|
||||
" link turn a .o file into an executable\n"
|
||||
"Options:\n"
|
||||
" --output output file\n"
|
||||
" --version print version number and exit\n"
|
||||
|
@ -78,3 +78,7 @@ void ast_print(AstNode *node, int indent) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
AstNode *ast_create_root(Token *token) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -89,4 +89,6 @@ const char *node_type_str(NodeType node_type);
|
||||
|
||||
void ast_print(AstNode *node, int indent);
|
||||
|
||||
AstNode *ast_create_root(Token *token);
|
||||
|
||||
#endif
|
||||
|
@ -187,29 +187,39 @@ struct RuleNode {
|
||||
};
|
||||
|
||||
|
||||
enum ParserStateType {
|
||||
ParserStateTypeError,
|
||||
ParserStateTypeOk,
|
||||
ParserStateTypeCapture,
|
||||
enum CodeGenType {
|
||||
CodeGenTypeTransition,
|
||||
CodeGenTypeError,
|
||||
CodeGenTypeSave,
|
||||
CodeGenTypePushNode,
|
||||
CodeGenTypeCapture,
|
||||
CodeGenTypePopNode,
|
||||
CodeGenTypeEatToken,
|
||||
};
|
||||
|
||||
struct ParserStateError {
|
||||
struct CodeGenError {
|
||||
Buf *msg;
|
||||
};
|
||||
|
||||
struct ParserStateCapture {
|
||||
struct CodeGenCapture {
|
||||
Buf *body;
|
||||
bool is_root;
|
||||
Buf *field_names;
|
||||
};
|
||||
|
||||
struct CodeGen {
|
||||
CodeGenType type;
|
||||
union {
|
||||
CodeGenError error;
|
||||
CodeGenCapture capture;
|
||||
};
|
||||
};
|
||||
|
||||
struct ParserState {
|
||||
ParserStateType type;
|
||||
ZigList<CodeGen *> code_gen_list;
|
||||
// One for each token ID.
|
||||
ParserState **transition;
|
||||
int index;
|
||||
union {
|
||||
ParserStateError error;
|
||||
ParserStateCapture capture;
|
||||
};
|
||||
};
|
||||
|
||||
enum LexState {
|
||||
@ -234,6 +244,7 @@ struct Gen {
|
||||
ZigList<ParserState *> transition_table;
|
||||
ZigList<Token *> tokens;
|
||||
RuleNode *root;
|
||||
int biggest_tuple_len;
|
||||
|
||||
Buf *in_buf;
|
||||
LexState lex_state;
|
||||
@ -249,9 +260,8 @@ struct Gen {
|
||||
int lex_body_end;
|
||||
};
|
||||
|
||||
static ParserState *create_state(Gen *g, ParserStateType type) {
|
||||
static ParserState *create_state(Gen *g) {
|
||||
ParserState *state = allocate<ParserState>(1);
|
||||
state->type = type;
|
||||
state->index = g->transition_table.length;
|
||||
state->transition = allocate<ParserState*>(g->tokens.length);
|
||||
g->transition_table.append(state);
|
||||
@ -264,28 +274,93 @@ static void fill_state_with_transition(Gen *g, ParserState *source, ParserState
|
||||
}
|
||||
}
|
||||
|
||||
static void gen(Gen *g, RuleNode *node) {
|
||||
static void state_add_code(ParserState *state, CodeGen *code) {
|
||||
state->code_gen_list.append(code);
|
||||
}
|
||||
|
||||
static void state_add_save_token(ParserState *state) {
|
||||
CodeGen *code = allocate<CodeGen>(1);
|
||||
code->type = CodeGenTypeSave;
|
||||
state_add_code(state, code);
|
||||
}
|
||||
|
||||
static void state_add_error(ParserState *state, Buf *msg) {
|
||||
CodeGen *code = allocate<CodeGen>(1);
|
||||
code->type = CodeGenTypeError;
|
||||
code->error.msg = msg;
|
||||
state_add_code(state, code);
|
||||
}
|
||||
|
||||
static void state_add_transition(ParserState *state) {
|
||||
CodeGen *code = allocate<CodeGen>(1);
|
||||
code->type = CodeGenTypeTransition;
|
||||
state_add_code(state, code);
|
||||
}
|
||||
|
||||
static void state_add_push_node(ParserState *state) {
|
||||
CodeGen *code = allocate<CodeGen>(1);
|
||||
code->type = CodeGenTypePushNode;
|
||||
state_add_code(state, code);
|
||||
}
|
||||
|
||||
static CodeGen *codegen_create_capture(Buf *body, bool is_root, int field_name_count) {
|
||||
CodeGen *code = allocate<CodeGen>(1);
|
||||
code->type = CodeGenTypeCapture;
|
||||
code->capture.body = body;
|
||||
code->capture.is_root = is_root;
|
||||
code->capture.field_names = allocate<Buf>(field_name_count);
|
||||
return code;
|
||||
}
|
||||
|
||||
static void state_add_pop_node(ParserState *state) {
|
||||
CodeGen *code = allocate<CodeGen>(1);
|
||||
code->type = CodeGenTypePopNode;
|
||||
state_add_code(state, code);
|
||||
}
|
||||
|
||||
static void state_add_eat_token(ParserState *state) {
|
||||
CodeGen *code = allocate<CodeGen>(1);
|
||||
code->type = CodeGenTypeEatToken;
|
||||
state_add_code(state, code);
|
||||
}
|
||||
|
||||
static void gen(Gen *g, RuleNode *node, Buf *out_field_name) {
|
||||
switch (node->type) {
|
||||
case RuleNodeTypeToken:
|
||||
{
|
||||
ParserState *ok_state = create_state(g, ParserStateTypeOk);
|
||||
ParserState *err_state = create_state(g, ParserStateTypeError);
|
||||
buf_init_from_str(out_field_name, "token");
|
||||
|
||||
err_state->error.msg = buf_sprintf("expected token '%s'", buf_ptr(&node->token.token->name));
|
||||
state_add_save_token(g->cur_state);
|
||||
|
||||
ParserState *ok_state = create_state(g);
|
||||
ParserState *err_state = create_state(g);
|
||||
state_add_error(err_state, buf_sprintf("expected token '%s'", buf_ptr(&node->token.token->name)));
|
||||
|
||||
fill_state_with_transition(g, g->cur_state, err_state);
|
||||
g->cur_state->transition[node->token.token->id] = ok_state;
|
||||
state_add_transition(g->cur_state);
|
||||
state_add_eat_token(g->cur_state);
|
||||
|
||||
g->cur_state = ok_state;
|
||||
}
|
||||
break;
|
||||
case RuleNodeTypeTuple:
|
||||
{
|
||||
buf_init_from_str(out_field_name, "node");
|
||||
|
||||
state_add_push_node(g->cur_state);
|
||||
|
||||
bool is_root = (node == g->root);
|
||||
int field_name_count = node->tuple.children.length;
|
||||
CodeGen *code = codegen_create_capture(&node->tuple.body, is_root, field_name_count);
|
||||
|
||||
for (int i = 0; i < node->tuple.children.length; i += 1) {
|
||||
RuleNode *child = node->tuple.children.at(i);
|
||||
gen(g, child);
|
||||
gen(g, child, &code->capture.field_names[i]);
|
||||
}
|
||||
g->cur_state->type = ParserStateTypeCapture;
|
||||
g->cur_state->capture.body = &node->tuple.body;
|
||||
state_add_code(g->cur_state, code);
|
||||
|
||||
state_add_pop_node(g->cur_state);
|
||||
}
|
||||
break;
|
||||
case RuleNodeTypeMany:
|
||||
@ -301,7 +376,10 @@ static void gen(Gen *g, RuleNode *node) {
|
||||
zig_panic("TODO");
|
||||
break;
|
||||
case RuleNodeTypeSubRule:
|
||||
zig_panic("TODO");
|
||||
{
|
||||
RuleNode *child = node->sub_rule.child;
|
||||
gen(g, child, out_field_name);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -407,6 +485,9 @@ static void end_token_name(Gen *g) {
|
||||
assert(g->lex_cur_rule->type == RuleNodeTypeTuple);
|
||||
g->lex_cur_rule->tuple.children.append(node);
|
||||
|
||||
g->biggest_tuple_len = max(g->biggest_tuple_len, g->lex_cur_rule->tuple.children.length);
|
||||
|
||||
|
||||
lex_pop_stack(g);
|
||||
}
|
||||
|
||||
@ -571,6 +652,76 @@ static void initialize_rules(Gen *g) {
|
||||
}
|
||||
}
|
||||
|
||||
enum TemplateState {
|
||||
TemplateStateStart,
|
||||
TemplateStateDollar,
|
||||
TemplateStateNumber,
|
||||
};
|
||||
|
||||
static Buf *fill_template(Buf *body, const char *result_name, Buf *field_names) {
|
||||
//fprintf(stderr, "fill template input:\n%s\n", buf_ptr(body));
|
||||
Buf *result = buf_alloc();
|
||||
TemplateState state = TemplateStateStart;
|
||||
int digit_start;
|
||||
for (int i = 0; i < buf_len(body); i += 1) {
|
||||
uint8_t c = buf_ptr(body)[i];
|
||||
switch (state) {
|
||||
case TemplateStateStart:
|
||||
switch (c) {
|
||||
case '$':
|
||||
state = TemplateStateDollar;
|
||||
break;
|
||||
default:
|
||||
buf_append_char(result, c);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TemplateStateDollar:
|
||||
switch (c) {
|
||||
case '$':
|
||||
buf_append_str(result, result_name);
|
||||
state = TemplateStateStart;
|
||||
break;
|
||||
case DIGIT:
|
||||
digit_start = i;
|
||||
state = TemplateStateNumber;
|
||||
break;
|
||||
default:
|
||||
buf_append_char(result, '$');
|
||||
buf_append_char(result, c);
|
||||
state = TemplateStateStart;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case TemplateStateNumber:
|
||||
switch (c) {
|
||||
case DIGIT:
|
||||
// nothing
|
||||
break;
|
||||
default:
|
||||
{
|
||||
Buf *num_buf = buf_create_from_mem(&buf_ptr(body)[digit_start], i - digit_start);
|
||||
int index = atoi(buf_ptr(num_buf)) - 1;
|
||||
buf_appendf(result, "(top_node->data[%d].%s)%c",
|
||||
index, buf_ptr(&field_names[index]), c);
|
||||
|
||||
state = TemplateStateStart;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
switch (state) {
|
||||
case TemplateStateStart:
|
||||
// OK
|
||||
break;
|
||||
default:
|
||||
zig_panic("unable to fill grammar template");
|
||||
}
|
||||
//fprintf(stderr, "fill template output:\n%s\n", buf_ptr(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
const char *in_filename = argv[1];
|
||||
@ -603,8 +754,9 @@ int main(int argc, char **argv) {
|
||||
|
||||
g.root = g.rules.at(0);
|
||||
|
||||
g.cur_state = create_state(&g, ParserStateTypeOk);
|
||||
gen(&g, g.root);
|
||||
g.cur_state = create_state(&g);
|
||||
Buf root_field_name = {0};
|
||||
gen(&g, g.root, &root_field_name);
|
||||
|
||||
fprintf(out_f, "/* This file is generated by parsergen.cpp */\n");
|
||||
fprintf(out_f, "\n");
|
||||
@ -627,15 +779,14 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
fprintf(out_f, "\n");
|
||||
|
||||
/* TODO
|
||||
fprintf(out_f, "struct ParserGenNode{\n");
|
||||
fprintf(out_f, "struct ParserGenNode {\n");
|
||||
fprintf(out_f, " int next_index;\n");
|
||||
fprintf(out_f, " union {\n");
|
||||
fprintf(out_f, " [%d];\n", biggest_tuple_len);
|
||||
fprintf(out_f, " Token *token;\n");
|
||||
fprintf(out_f, " };\n");
|
||||
fprintf(out_f, " AstNode *node;\n");
|
||||
fprintf(out_f, " } data[%d];\n", g.biggest_tuple_len);
|
||||
fprintf(out_f, "};\n");
|
||||
fprintf(out_f, "\n");
|
||||
*/
|
||||
|
||||
fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList<Token> *tokens) {\n");
|
||||
|
||||
@ -655,39 +806,66 @@ int main(int argc, char **argv) {
|
||||
|
||||
|
||||
fprintf(out_f, " int state = 0;\n");
|
||||
fprintf(out_f, " int token_index = 0;\n");
|
||||
fprintf(out_f, " Token *token = &tokens->at(token_index);\n");
|
||||
fprintf(out_f, " AstNode *root = nullptr;\n");
|
||||
fprintf(out_f, " ZigList<ParserGenNode *> stack = {0};\n");
|
||||
fprintf(out_f, " ParserGenNode *top_node = nullptr;\n");
|
||||
|
||||
fprintf(out_f, " for (int i = 0; i < tokens->length; i += 1) {\n");
|
||||
fprintf(out_f, " Token *token = &tokens->at(i);\n");
|
||||
fprintf(out_f, " for (;;) {\n");
|
||||
fprintf(out_f, " switch (state) {\n");
|
||||
|
||||
for (int i = 0; i < g.transition_table.length; i += 1) {
|
||||
ParserState *state = g.transition_table.at(i);
|
||||
fprintf(out_f, " case %d:\n", i);
|
||||
switch (state->type) {
|
||||
case ParserStateTypeError:
|
||||
fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg));
|
||||
break;
|
||||
case ParserStateTypeOk:
|
||||
fprintf(out_f, " assert(transition[%d][token->id] >= 0);\n", state->index);
|
||||
fprintf(out_f, " assert(transition[%d][token->id] < %d);\n",
|
||||
state->index, g.transition_table.length);
|
||||
fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
|
||||
break;
|
||||
case ParserStateTypeCapture:
|
||||
// TODO fprintf(out_f, " %s\n", buf_ptr(state->capture.body));
|
||||
fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
|
||||
break;
|
||||
for (int state_i = 0; state_i < g.transition_table.length; state_i += 1) {
|
||||
ParserState *state = g.transition_table.at(state_i);
|
||||
fprintf(out_f, " case %d: {\n", state_i);
|
||||
for (int code_i = 0; code_i < state->code_gen_list.length; code_i += 1) {
|
||||
CodeGen *code = state->code_gen_list.at(code_i);
|
||||
switch (code->type) {
|
||||
case CodeGenTypeTransition:
|
||||
fprintf(out_f, " assert(transition[%d][token->id] >= 0);\n", state->index);
|
||||
fprintf(out_f, " assert(transition[%d][token->id] < %d);\n",
|
||||
state->index, g.transition_table.length);
|
||||
fprintf(out_f, " state = transition[%d][token->id];\n", state->index);
|
||||
break;
|
||||
case CodeGenTypeError:
|
||||
fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(code->error.msg));
|
||||
break;
|
||||
case CodeGenTypeSave:
|
||||
fprintf(out_f, " top_node->data[top_node->next_index++].token = token;\n");
|
||||
break;
|
||||
case CodeGenTypePushNode:
|
||||
fprintf(out_f, " top_node = allocate<ParserGenNode>(1);\n");
|
||||
fprintf(out_f, " stack.append(top_node);\n");
|
||||
break;
|
||||
case CodeGenTypeCapture:
|
||||
if (code->capture.is_root) {
|
||||
Buf *code_text = fill_template(code->capture.body, "root", code->capture.field_names);
|
||||
fprintf(out_f, "%s\n", buf_ptr(code_text));
|
||||
fprintf(out_f, " return root;\n");
|
||||
} else {
|
||||
zig_panic("TODO capture non-root");
|
||||
}
|
||||
break;
|
||||
case CodeGenTypePopNode:
|
||||
fprintf(out_f, " stack.pop();\n");
|
||||
fprintf(out_f, " top_node = stack.length ? stack.last() : nullptr;\n");
|
||||
break;
|
||||
case CodeGenTypeEatToken:
|
||||
fprintf(out_f, " token_index += 1;\n");
|
||||
fprintf(out_f, " token = (token_index < tokens->length) ? &tokens->at(token_index) : nullptr;\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(out_f, " break;\n");
|
||||
fprintf(out_f, " }\n");
|
||||
}
|
||||
fprintf(out_f, " default:\n");
|
||||
fprintf(out_f, " zig_panic(\"unreachable\");\n");
|
||||
|
||||
fprintf(out_f, " }\n");
|
||||
fprintf(out_f, " }\n");
|
||||
fprintf(out_f, " return root;\n");
|
||||
fprintf(out_f, " zig_panic(\"unreachable\");\n");
|
||||
fprintf(out_f, "}\n");
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user