jc/vm/parser.jai

794 lines
18 KiB
Text

Token :: struct {
kind: Kind;
str: string;
Kind :: enum {
invalid;
end_of_file;
symbol;
number;
string;
kw_var;
kw_def;
kw_type;
kw_fn;
kw_do;
kw_end;
kw_if;
kw_else;
kw_switch;
kw_case;
kw_for;
kw_in;
kw_loop;
kw_return;
kw_break;
kw_continue;
kw_goto;
kw_true;
kw_false;
kw_print;
equal :: #char "=";
plus :: #char "+";
minus :: #char "-";
star :: #char "*";
percent :: #char "%";
bang :: #char "!";
and :: #char "&";
f_slash :: #char "/";
b_slash :: #char "\\";
l_paren :: #char "(";
r_paren :: #char ")";
l_square :: #char "[";
r_square :: #char "]";
l_brace :: #char "{";
r_brace :: #char "}";
comma :: #char ",";
dot :: #char ".";
colon :: #char ":";
semicolon :: #char ";";
}
}
Node :: struct {
kind: Kind;
type: *Type_Info;
Kind :: enum {
invalid;
block;
stmt_start;
print;
return_;
stmt_end;
decl_start;
variable;
procedure;
decl_end;
expr_start;
type;
unary;
binary;
procedure_call;
symbol;
literal;
expr_end;
}
}
Node_Print :: struct {
#as using n: Node;
n.kind = .print;
expr: *Node;
}
Node_Procedure_Call :: struct {
#as using n: Node;
n.kind = .procedure_call;
call_expr: *Node;
named_arguments: kv.Kv(*Node, *Node);
all_arguments: [..]*Node;
}
Node_Var :: struct {
#as using n: Node;
n.kind = .variable;
symbol: *Node; // always *Node_Symbol
type_expr: *Node; // always *Node_Type
value_expr: *Node;
var_flags: Var_Flag;
Var_Flag :: enum_flags {
immutable; // def
}
}
Node_Unary :: struct {
#as using n: Node;
n.kind = .unary;
op: Token;
right: *Node;
}
Node_Binary :: struct {
#as using n: Node;
n.kind = .binary;
op: Token;
left: *Node;
right: *Node;
}
Node_Symbol :: struct {
#as using n: Node;
n.kind = .symbol;
str: string;
}
Node_Literal :: struct {
#as using n: Node;
n.kind = .literal;
value_kind: Value_Kind;
value_flags: Value_Flag;
union {
i: s64;
u: u64;
f: float64;
b: bool;
s: string;
}
Value_Kind :: enum {
int;
float;
bool;
string;
}
Value_Flag :: enum_flags {
can_be_unsigned;
}
}
Node_Type :: struct {
#as using n: Node;
n.kind = .type;
resolved_type: *Type_Info;
type_kind: Type_Kind;
union {
alias_target: *Node;
pointer_target: *Node_Type;
struct {
array_element: *Node_Type;
array_count: *Node; // can be null
};
}
Type_Kind :: enum {
alias;
pointer;
array;
}
}
/*
fn add(x int, y int) int do return x + y end
*/
Node_Procedure :: struct {
#as using n: Node;
n.kind = .procedure;
header: *Node_Procedure_Header;
args: [..]Node_Argument;
rets: [..]Node_Argument;
body: *Node_Block;
flags: Flag;
Flag :: enum_flags {
inline_;
}
}
Node_Argument :: struct {
symbol: *Node_Symbol;
type: *Node_Type;
value: *Node; // always an expression, can be null
}
Node_Procedure_Header :: struct {
args: [..]*Node_Type;
rets: [..]*Node_Type;
}
Node_Block :: struct {
#as using n: Node;
n.kind = .block;
body: [..]*Node;
}
Node_Return :: struct {
#as using n: Node;
n.kind = .return_;
values: [..]*Node;
}
Parser :: struct {
allocator: Allocator;
toplevel: [..]*Node;
previous: Token;
filename: string;
source: string;
offset: int;
}
init :: (p: *Parser, allocator: Allocator) {
p.allocator = allocator;
p.toplevel.allocator = allocator;
}
parse_string :: (p: *Parser, source: string) -> bool {
p.source = source;
p.offset = 0;
while !at_end(p) {
t := peek_token(p);
if t.kind == .invalid || t.kind == .end_of_file {
break;
}
node := parse_toplevel(p);
if node != null array.append(*p.toplevel, node);
}
return false;
}
#scope_file;
parse_toplevel :: (p: *Parser) -> *Node {
t, ok := expect_token(p, .kw_var, .kw_def, .kw_fn, .kw_print, .kw_do, .kw_return);
basic.assert(ok, "var, def, print, found '%'", t.str); // @errors
if t.kind == {
// var sym type_expr
// var sym type_expr = expr
// var sym = expr
case .kw_var; #through;
case .kw_def;
s:, ok = expect_token(p, .symbol);
basic.assert(ok, "symbol"); // @errors
type_expr: *Node;
value_expr: *Node;
is_const := t.kind == .kw_def;
t = peek_token(p);
if t.kind == .equal {
consume_token(p);
value_expr = parse_expression(p);
basic.assert(value_expr != null, "value expr"); // @errors
}
else {
type_expr = parse_type_expression(p);
basic.assert(type_expr != null, "type expr"); // @errors
if peek_token(p).kind == .equal {
consume_token(p);
value_expr = parse_expression(p);
basic.assert(value_expr != null, "value expr"); // @errors
}
}
symbol := make_node(p, Node_Symbol);
symbol.str = s.str;
node := make_node(p, Node_Var);
node.symbol = symbol;
node.type_expr = type_expr;
node.value_expr = value_expr;
if is_const {
node.var_flags |= .immutable;
}
return node;
// return
// return expr0, ..exprN
case .kw_return;
node := make_node(p, Node_Return);
array.init(*node.values, p.allocator);
prev_offset := p.offset;
expr := parse_expression(p);
if expr == null {
p.offset = prev_offset;
return node;
}
array.append(*node.values, expr);
return node;
// print(expr)
// print expr
case .kw_print;
expr := parse_expression(p);
basic.assert(expr != null, "expected expression"); // @errors
node := make_node(p, Node_Print);
node.expr = expr;
return node;
// fn symbol(arg0, ..argN) do end
case .kw_fn;
symbol, ok := expect_token(p, .symbol);
basic.assert(ok, "expected name for procedure"); // @errors @todo(judah): lambdas
t, ok = expect_token(p, .l_paren);
basic.assert(ok, "expected '(' but found '%'", t.str); // @errors
while !at_end(p) {
t = peek_token(p);
if t.kind == .r_paren break;
expr := parse_expression(p);
basic.assert(expr != null); // @errors
t = peek_token(p);
if t.kind == {
case .comma;
consume_token(p);
continue;
case .r_paren;
break;
case;
basic.assert(false, "expected ',' or ')' but found '%'", t.str);
}
}
_, ok = expect_token(p, .r_paren);
basic.assert(ok, "expected ')'"); // @errors
block := parse_block(p);
basic.assert(block != null, "expected block"); // @errors
node := make_node(p, Node_Procedure);
return null;
}
return null;
}
parse_block :: (p: *Parser) -> *Node_Block {
t, ok := expect_token(p, .kw_do);
basic.assert(ok, "expected 'do' found '%'", t.str); // @errors
block := make_node(p, Node_Block);
array.init(*block.body, p.allocator);
while !at_end(p) {
t = peek_token(p);
if t.kind == .kw_end break;
node := parse_toplevel(p);
basic.assert(node != null); // @errors
array.append(*block.body, node);
}
t, ok = expect_token(p, .kw_end);
basic.assert(ok, "expected 'end' found '%'", t.str); // @errors
return block;
}
parse_type_expression :: (p: *Parser) -> *Node_Type {
t, ok := expect_token(p, .symbol, .star, .l_square);
basic.assert(ok, "type expression"); // @errors
if t.kind == {
case .star;
target := parse_type_expression(p);
basic.assert(target != null, "pointer target"); // @errors
node := make_node(p, Node_Type);
node.type_kind = .pointer;
node.pointer_target = target;
return node;
case .l_square;
node := make_node(p, Node_Type);
node.type_kind = .array;
// slice
if peek_token(p).kind == .r_square {
consume_token(p);
element := parse_type_expression(p);
basic.assert(element != null, "array element"); // @errors
node.array_element = element;
}
else {
count := parse_expression(p);
basic.assert(count != null, "array count"); // @errors
_, ok := expect_token(p, .r_square);
basic.assert(ok, "end of array type");
element := parse_type_expression(p);
basic.assert(element != null, "array element"); // @errors
node.array_count = count;
node.array_element = element;
}
return node;
case .symbol;
symbol := make_node(p, Node_Symbol);
symbol.str = t.str;
node := make_node(p, Node_Type);
node.type_kind = .alias;
node.alias_target = symbol;
return node;
}
return null;
}
parse_expression :: (p: *Parser, min_precedence := 1) -> *Node {
get_precedence :: inline (t: Token) -> int {
if t.kind == {
case .star; #through;
case .f_slash; #through;
case .percent; #through;
case .and;
return 4;
case .plus; #through;
case .minus;
return 3;
// case .equal_equal; #through;
// case .bang_equal; #through;
// case .less; #through;
// case .less_equal; #through;
// case .more; #through;
// case .more_equal;
// return 2;
}
return 0;
}
node := parse_expression_unary(p);
basic.assert(node != null, "expected expression"); // @errors
while !at_end(p) {
op := peek_token(p);
prec := get_precedence(op);
if prec <= min_precedence break;
op = consume_token(p);
lhs := node;
rhs := parse_expression(p, prec);
basic.assert(rhs != null, "expected rhs"); // @errors
new := make_node(p, Node_Binary);
new.op = op;
new.left = lhs;
new.right = rhs;
node = new;
}
return node;
}
parse_expression_unary :: (p: *Parser) -> *Node {
op := peek_token(p);
if op.kind == {
case .plus; #through;
case .minus;
op = consume_token(p);
node := parse_expression_unary(p);
basic.assert(node != null, "expected expr"); // @errors
unary := make_node(p, Node_Unary);
unary.op = op;
unary.right = node;
return unary;
}
return parse_expression_postfix(p);
}
parse_expression_postfix :: (p: *Parser) -> *Node {
// @TODO
base := parse_expression_base(p);
basic.assert(base != null, "expected expression"); // @errors
t := peek_token(p);
if t.kind == {
case .l_paren; // procedure calls
consume_token(p);
node := make_node(p, Node_Procedure_Call);
node.call_expr = base;
array.init(*node.all_arguments, p.allocator);
kv.init(*node.named_arguments, p.allocator);
while !at_end(p) {
t = peek_token(p);
if t.kind == .r_paren break;
arg_or_name := parse_expression(p);
basic.assert(arg_or_name != null, "expected expression in procedure call"); // @errors
if peek_token(p).kind == .colon {
consume_token(p);
basic.assert(arg_or_name.kind == .symbol, "expected symbol for named argument"); // @errors
basic.assert(!kv.exists(*node.named_arguments, arg_or_name), "duplicate named argument '%'", arg_or_name.(*Node_Symbol).str); // @errors
value := parse_expression(p);
basic.assert(value != null, "expected expression after ':'"); // @errors
array.append(*node.all_arguments, value);
kv.set(*node.named_arguments, arg_or_name, value);
}
else {
array.append(*node.all_arguments, arg_or_name);
}
t = peek_token(p);
if t.kind == {
case .comma;
consume_token(p);
continue;
case .r_paren;
break;
case;
basic.assert(false, "expected ',' or ')' but found '%'", t.str);
}
}
_, ok := expect_token(p, .r_paren);
basic.assert(ok, "expected ')'"); // @errors
return node;
}
return base;
}
parse_expression_base :: (p: *Parser) -> *Node {
t, ok := expect_token(p, .kw_true, .kw_false, .number, .symbol, .l_paren);
basic.assert(ok, "expected expression, found '%'", t.str); // @errors
if t.kind == {
case .kw_true; #through;
case .kw_false;
node := make_node(p, Node_Literal);
node.b = t.kind == .kw_true;
node.value_kind = .bool;
return node;
case .symbol;
node := make_node(p, Node_Symbol);
node.str = t.str;
return node;
case .number;
node := make_node(p, Node_Literal);
copy := t.str;
if strings.contains(t.str, ".") {
node.value_kind = .float;
value, ok := strings.parse_float(*copy);
basic.assert(ok, "malformed float '%'", t.str); // @errors
node.f = value;
}
else {
node.value_kind = .int;
if t.str[0] == "-" {
node.value_flags |= .can_be_unsigned;
}
value, ok := strings.parse_int(*copy);
basic.assert(ok, "malformed integer '%'", t.str); // @errors
node.i = value;
}
return node;
case .l_paren;
node := parse_expression(p);
basic.assert(node != null, "expected expression"); // @errors
_, ok := expect_token(p, .r_paren);
basic.assert(ok, "expected ')'"); // @errors
return node;
}
return null;
}
make_node :: (p: *Parser, $T: Type) -> *T {
return mem.request_memory(T,, allocator = p.allocator);
}
peek_token :: (p: *Parser) -> Token {
copy := p.*;
return consume_token(*copy);
}
at_end :: (p: *Parser) -> bool {
return p.offset >= p.source.count;
}
starts_symbol :: (c: u8) -> bool {
return (c >= "a" && c <= "z") ||
(c >= "A" && c <= "Z") ||
(c == "_");
}
continues_symbol :: (c: u8) -> bool {
return starts_symbol(c) || (c >= "0" && c <= "9");
}
starts_number :: (c: u8) -> bool {
return (c >= "0" && c <= "9");
}
continues_number :: (c: u8) -> bool {
return starts_number(c) || c == ".";
}
consume_token :: (p: *Parser) -> Token {
if at_end(p) return .{ kind = .end_of_file };
c := p.source[p.offset];
// skip whitespace
while !at_end(p) {
c = p.source[p.offset];
if c == {
case " "; #through;
case "\n"; #through;
case "\t";
p.offset += 1;
case;
break;
}
}
// line comments
// @todo(judah): don't ignore these
if c == "/" && p.offset + 1 < p.source.count && p.source[p.offset + 1] == "/" {
p.offset += 2;
while !at_end(p) {
c = p.source[p.offset];
if c == "\n" break;
p.offset += 1;
}
// @todo(judah): don't recurse
return consume_token(p);
}
if starts_symbol(c) {
t := Token.{ str = .{ data = p.source.data + p.offset } };
while !at_end(p) {
c = p.source[p.offset];
if !continues_symbol(c) break;
p.offset += 1;
}
t.str.count = (p.source.data + p.offset) - t.str.data;
if t.str == {
case "var"; t.kind = .kw_var;
case "def"; t.kind = .kw_def;
case "type"; t.kind = .kw_type;
case "fn"; t.kind = .kw_fn;
case "do"; t.kind = .kw_do;
case "end"; t.kind = .kw_end;
case "if"; t.kind = .kw_if;
case "else"; t.kind = .kw_else;
case "switch"; t.kind = .kw_switch;
case "case"; t.kind = .kw_case;
case "for"; t.kind = .kw_for;
case "in"; t.kind = .kw_in;
case "loop"; t.kind = .kw_loop;
case "return"; t.kind = .kw_return;
case "break"; t.kind = .kw_break;
case "continue"; t.kind = .kw_continue;
case "goto"; t.kind = .kw_goto;
case "true"; t.kind = .kw_true;
case "false"; t.kind = .kw_false;
case "print"; t.kind = .kw_print;
case; t.kind = .symbol;
}
return t;
}
if starts_number(c) {
t := Token.{ kind = .number, str = .{ data = p.source.data + p.offset } };
while !at_end(p) {
c = p.source[p.offset];
if !continues_number(c) break;
p.offset += 1;
}
t.str.count = (p.source.data + p.offset) - t.str.data;
return t;
}
if c == {
case "+"; #through;
case "-"; #through;
case "*"; #through;
case "/"; #through;
case "="; #through;
case "%"; #through;
case "!"; #through;
case "&"; #through;
case ","; #through;
case "."; #through;
case ":"; #through;
case ";"; #through;
case "("; #through;
case ")"; #through;
case "["; #through;
case "]"; #through;
case "{"; #through;
case "}";
s := string.{ data = p.source.data + p.offset, count = 1 };
p.offset += 1;
return .{ kind = xx c, str = s };
}
s := string.{ data = p.source.data + p.offset, count = 1 };
return .{ kind = .invalid, str = s };
}
expect_token :: (p: *Parser, kinds: ..Token.Kind) -> Token, bool {
t := consume_token(p);
for kinds if it == t.kind {
return t, true;
}
return t, false;
}