Token :: struct { kind: Kind; str: string; Kind :: enum { invalid; end_of_file; symbol; number; string; kw_var; kw_def; kw_type; kw_fn; kw_do; kw_end; kw_if; kw_else; kw_switch; kw_case; kw_for; kw_in; kw_loop; kw_return; kw_break; kw_continue; kw_goto; kw_true; kw_false; kw_print; equal :: #char "="; plus :: #char "+"; minus :: #char "-"; star :: #char "*"; percent :: #char "%"; bang :: #char "!"; and :: #char "&"; f_slash :: #char "/"; b_slash :: #char "\\"; l_paren :: #char "("; r_paren :: #char ")"; l_square :: #char "["; r_square :: #char "]"; l_brace :: #char "{"; r_brace :: #char "}"; comma :: #char ","; dot :: #char "."; colon :: #char ":"; semicolon :: #char ";"; } } Node :: struct { kind: Kind; type: *Type_Info; Kind :: enum { invalid; block; stmt_start; print; return_; stmt_end; decl_start; variable; procedure; decl_end; expr_start; type; unary; binary; procedure_call; symbol; literal; expr_end; } } Node_Var :: struct { #as using n: Node; n.kind = .variable; symbol: *Node; // always *Node_Symbol type_expr: *Node; // always *Node_Type value_expr: *Node; var_flags: Var_Flag; Var_Flag :: enum_flags { immutable; // def } } Node_Unary :: struct { #as using n: Node; n.kind = .unary; op: Token; right: *Node; } Node_Binary :: struct { #as using n: Node; n.kind = .binary; op: Token; left: *Node; right: *Node; } Node_Symbol :: struct { #as using n: Node; n.kind = .symbol; str: string; } Node_Literal :: struct { #as using n: Node; n.kind = .literal; value_kind: Value_Kind; value_flags: Value_Flag; union { i: s64; u: u64; f: float64; b: bool; s: string; } Value_Kind :: enum { int; float; bool; string; } Value_Flag :: enum_flags { can_be_unsigned; } } Node_Type :: struct { #as using n: Node; n.kind = .type; resolved_type: *Type_Info; type_kind: Type_Kind; union { alias_target: *Node; pointer_target: *Node_Type; struct { array_element: *Node_Type; array_count: *Node; // can be null }; } Type_Kind :: enum { alias; pointer; array; } } Node_Procedure :: struct { #as using n: Node; n.kind = .procedure; header: *Node_Procedure_Header; args: [..]Node_Parameter; rets: [..]Node_Parameter; body: *Node_Block; flags: Flag; Flag :: enum_flags { must_inline; } } Node_Parameter :: struct { symbol: *Node_Symbol; type: *Node_Type; value: *Node; // always an expression, can be null } Node_Procedure_Header :: struct { args: [..]*Node_Type; rets: [..]*Node_Type; } Node_Print :: struct { #as using n: Node; n.kind = .print; expr: *Node; } Node_Procedure_Call :: struct { #as using n: Node; n.kind = .procedure_call; call_expr: *Node; named_arguments: kv.Kv(*Node, *Node); all_arguments: [..]*Node; } Node_Block :: struct { #as using n: Node; n.kind = .block; body: [..]*Node; } Node_Return :: struct { #as using n: Node; n.kind = .return_; values: [..]*Node; } Parser :: struct { allocator: Allocator; toplevel: [..]*Node; previous: Token; filename: string; source: string; offset: int; } init :: (p: *Parser, allocator: Allocator) { p.allocator = allocator; p.toplevel.allocator = allocator; } parse_string :: (p: *Parser, source: string) -> bool { p.source = source; p.offset = 0; while !at_end(p) { t := peek_token(p); if t.kind == .invalid || t.kind == .end_of_file { break; } node := parse_toplevel(p); if node != null array.append(*p.toplevel, node); } return false; } #scope_file; parse_toplevel :: (p: *Parser) -> *Node { t, ok := expect_token(p, .kw_var, .kw_def, .kw_fn, .kw_print, .kw_do, .kw_return); basic.assert(ok, "var, def, print, found '%'", t.str); // @errors if t.kind == { // var sym type_expr // var sym type_expr = expr // var sym = expr case .kw_var; #through; case .kw_def; s:, ok = expect_token(p, .symbol); basic.assert(ok, "symbol"); // @errors type_expr: *Node; value_expr: *Node; is_const := t.kind == .kw_def; t = peek_token(p); if t.kind == .equal { consume_token(p); value_expr = parse_expression(p); basic.assert(value_expr != null, "value expr"); // @errors } else { type_expr = parse_type_expression(p); basic.assert(type_expr != null, "type expr"); // @errors if peek_token(p).kind == .equal { consume_token(p); value_expr = parse_expression(p); basic.assert(value_expr != null, "value expr"); // @errors } } symbol := make_node(p, Node_Symbol); symbol.str = s.str; node := make_node(p, Node_Var); node.symbol = symbol; node.type_expr = type_expr; node.value_expr = value_expr; if is_const { node.var_flags |= .immutable; } return node; // return // return expr0, ..exprN case .kw_return; node := make_node(p, Node_Return); array.init(*node.values, p.allocator); prev_offset := p.offset; expr := parse_expression(p); if expr == null { p.offset = prev_offset; return node; } array.append(*node.values, expr); return node; // print(expr) // print expr case .kw_print; expr := parse_expression(p); basic.assert(expr != null, "expected expression"); // @errors node := make_node(p, Node_Print); node.expr = expr; return node; // fn symbol(arg0, ..argN) do end case .kw_fn; symbol, ok := expect_token(p, .symbol); basic.assert(ok, "expected name for procedure"); // @errors @todo(judah): lambdas t, ok = expect_token(p, .l_paren); basic.assert(ok, "expected '(' but found '%'", t.str); // @errors while !at_end(p) { t = peek_token(p); if t.kind == .r_paren break; expr := parse_expression(p); basic.assert(expr != null); // @errors t = peek_token(p); if t.kind == { case .comma; consume_token(p); continue; case .r_paren; break; case; basic.assert(false, "expected ',' or ')' but found '%'", t.str); } } _, ok = expect_token(p, .r_paren); basic.assert(ok, "expected ')'"); // @errors block := parse_block(p); basic.assert(block != null, "expected block"); // @errors node := make_node(p, Node_Procedure); return null; } return null; } parse_block :: (p: *Parser) -> *Node_Block { t, ok := expect_token(p, .kw_do); basic.assert(ok, "expected 'do' found '%'", t.str); // @errors block := make_node(p, Node_Block); array.init(*block.body, p.allocator); while !at_end(p) { t = peek_token(p); if t.kind == .kw_end break; node := parse_toplevel(p); basic.assert(node != null); // @errors array.append(*block.body, node); } t, ok = expect_token(p, .kw_end); basic.assert(ok, "expected 'end' found '%'", t.str); // @errors return block; } parse_type_expression :: (p: *Parser) -> *Node_Type { t, ok := expect_token(p, .symbol, .star, .l_square); basic.assert(ok, "type expression"); // @errors if t.kind == { case .star; target := parse_type_expression(p); basic.assert(target != null, "pointer target"); // @errors node := make_node(p, Node_Type); node.type_kind = .pointer; node.pointer_target = target; return node; case .l_square; node := make_node(p, Node_Type); node.type_kind = .array; // slice if peek_token(p).kind == .r_square { consume_token(p); element := parse_type_expression(p); basic.assert(element != null, "array element"); // @errors node.array_element = element; } else { count := parse_expression(p); basic.assert(count != null, "array count"); // @errors _, ok := expect_token(p, .r_square); basic.assert(ok, "end of array type"); element := parse_type_expression(p); basic.assert(element != null, "array element"); // @errors node.array_count = count; node.array_element = element; } return node; case .symbol; symbol := make_node(p, Node_Symbol); symbol.str = t.str; node := make_node(p, Node_Type); node.type_kind = .alias; node.alias_target = symbol; return node; } return null; } parse_expression :: (p: *Parser, min_precedence := 1) -> *Node { get_precedence :: inline (t: Token) -> int { if t.kind == { case .star; #through; case .f_slash; #through; case .percent; #through; case .and; return 4; case .plus; #through; case .minus; return 3; // case .equal_equal; #through; // case .bang_equal; #through; // case .less; #through; // case .less_equal; #through; // case .more; #through; // case .more_equal; // return 2; } return 0; } node := parse_expression_unary(p); basic.assert(node != null, "expected expression"); // @errors while !at_end(p) { op := peek_token(p); prec := get_precedence(op); if prec <= min_precedence break; op = consume_token(p); lhs := node; rhs := parse_expression(p, prec); basic.assert(rhs != null, "expected rhs"); // @errors new := make_node(p, Node_Binary); new.op = op; new.left = lhs; new.right = rhs; node = new; } return node; } parse_expression_unary :: (p: *Parser) -> *Node { op := peek_token(p); if op.kind == { case .plus; #through; case .minus; op = consume_token(p); node := parse_expression_unary(p); basic.assert(node != null, "expected expr"); // @errors unary := make_node(p, Node_Unary); unary.op = op; unary.right = node; return unary; } return parse_expression_postfix(p); } parse_expression_postfix :: (p: *Parser) -> *Node { // @TODO base := parse_expression_base(p); basic.assert(base != null, "expected expression"); // @errors t := peek_token(p); if t.kind == { case .l_paren; // procedure calls consume_token(p); node := make_node(p, Node_Procedure_Call); node.call_expr = base; array.init(*node.all_arguments, p.allocator); kv.init(*node.named_arguments, p.allocator); while !at_end(p) { t = peek_token(p); if t.kind == .r_paren break; arg_or_name := parse_expression(p); basic.assert(arg_or_name != null, "expected expression in procedure call"); // @errors if peek_token(p).kind == .colon { consume_token(p); basic.assert(arg_or_name.kind == .symbol, "expected symbol for named argument"); // @errors basic.assert(!kv.exists(*node.named_arguments, arg_or_name), "duplicate named argument '%'", arg_or_name.(*Node_Symbol).str); // @errors value := parse_expression(p); basic.assert(value != null, "expected expression after ':'"); // @errors array.append(*node.all_arguments, value); kv.set(*node.named_arguments, arg_or_name, value); } else { array.append(*node.all_arguments, arg_or_name); } t = peek_token(p); if t.kind == { case .comma; consume_token(p); continue; case .r_paren; break; case; basic.assert(false, "expected ',' or ')' but found '%'", t.str); } } _, ok := expect_token(p, .r_paren); basic.assert(ok, "expected ')'"); // @errors return node; } return base; } parse_expression_base :: (p: *Parser) -> *Node { t, ok := expect_token(p, .kw_true, .kw_false, .number, .symbol, .l_paren); basic.assert(ok, "expected expression, found '%'", t.str); // @errors if t.kind == { case .kw_true; #through; case .kw_false; node := make_node(p, Node_Literal); node.b = t.kind == .kw_true; node.value_kind = .bool; return node; case .symbol; node := make_node(p, Node_Symbol); node.str = t.str; return node; case .number; node := make_node(p, Node_Literal); copy := t.str; if strings.contains(t.str, ".") { node.value_kind = .float; value, ok := strings.parse_float(*copy); basic.assert(ok, "malformed float '%'", t.str); // @errors node.f = value; } else { node.value_kind = .int; if t.str[0] == "-" { node.value_flags |= .can_be_unsigned; } value, ok := strings.parse_int(*copy); basic.assert(ok, "malformed integer '%'", t.str); // @errors node.i = value; } return node; case .l_paren; node := parse_expression(p); basic.assert(node != null, "expected expression"); // @errors _, ok := expect_token(p, .r_paren); basic.assert(ok, "expected ')'"); // @errors return node; } return null; } make_node :: (p: *Parser, $T: Type) -> *T { return mem.request_memory(T,, allocator = p.allocator); } peek_token :: (p: *Parser) -> Token { copy := p.*; return consume_token(*copy); } at_end :: (p: *Parser) -> bool { return p.offset >= p.source.count; } starts_symbol :: (c: u8) -> bool { return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z") || (c == "_"); } continues_symbol :: (c: u8) -> bool { return starts_symbol(c) || (c >= "0" && c <= "9"); } starts_number :: (c: u8) -> bool { return (c >= "0" && c <= "9"); } continues_number :: (c: u8) -> bool { return starts_number(c) || c == "."; } consume_token :: (p: *Parser) -> Token { if at_end(p) return .{ kind = .end_of_file }; c := p.source[p.offset]; // skip whitespace while !at_end(p) { c = p.source[p.offset]; if c == { case " "; #through; case "\n"; #through; case "\t"; p.offset += 1; case; break; } } // line comments // @todo(judah): don't ignore these if c == "/" && p.offset + 1 < p.source.count && p.source[p.offset + 1] == "/" { p.offset += 2; while !at_end(p) { c = p.source[p.offset]; if c == "\n" break; p.offset += 1; } // @todo(judah): don't recurse return consume_token(p); } if starts_symbol(c) { t := Token.{ str = .{ data = p.source.data + p.offset } }; while !at_end(p) { c = p.source[p.offset]; if !continues_symbol(c) break; p.offset += 1; } t.str.count = (p.source.data + p.offset) - t.str.data; if t.str == { case "var"; t.kind = .kw_var; case "def"; t.kind = .kw_def; case "type"; t.kind = .kw_type; case "fn"; t.kind = .kw_fn; case "do"; t.kind = .kw_do; case "end"; t.kind = .kw_end; case "if"; t.kind = .kw_if; case "else"; t.kind = .kw_else; case "switch"; t.kind = .kw_switch; case "case"; t.kind = .kw_case; case "for"; t.kind = .kw_for; case "in"; t.kind = .kw_in; case "loop"; t.kind = .kw_loop; case "return"; t.kind = .kw_return; case "break"; t.kind = .kw_break; case "continue"; t.kind = .kw_continue; case "goto"; t.kind = .kw_goto; case "true"; t.kind = .kw_true; case "false"; t.kind = .kw_false; case "print"; t.kind = .kw_print; case; t.kind = .symbol; } return t; } if starts_number(c) { t := Token.{ kind = .number, str = .{ data = p.source.data + p.offset } }; while !at_end(p) { c = p.source[p.offset]; if !continues_number(c) break; p.offset += 1; } t.str.count = (p.source.data + p.offset) - t.str.data; return t; } if c == { case "+"; #through; case "-"; #through; case "*"; #through; case "/"; #through; case "="; #through; case "%"; #through; case "!"; #through; case "&"; #through; case ","; #through; case "."; #through; case ":"; #through; case ";"; #through; case "("; #through; case ")"; #through; case "["; #through; case "]"; #through; case "{"; #through; case "}"; s := string.{ data = p.source.data + p.offset, count = 1 }; p.offset += 1; return .{ kind = xx c, str = s }; } s := string.{ data = p.source.data + p.offset, count = 1 }; return .{ kind = .invalid, str = s }; } expect_token :: (p: *Parser, kinds: ..Token.Kind) -> Token, bool { t := consume_token(p); for kinds if it == t.kind { return t, true; } return t, false; }