794 lines
18 KiB
Text
794 lines
18 KiB
Text
Token :: struct {
|
|
kind: Kind;
|
|
str: string;
|
|
|
|
Kind :: enum {
|
|
invalid;
|
|
end_of_file;
|
|
|
|
symbol;
|
|
number;
|
|
string;
|
|
|
|
kw_var;
|
|
kw_def;
|
|
kw_type;
|
|
kw_fn;
|
|
kw_do;
|
|
kw_end;
|
|
kw_if;
|
|
kw_else;
|
|
kw_switch;
|
|
kw_case;
|
|
kw_for;
|
|
kw_in;
|
|
kw_loop;
|
|
kw_return;
|
|
kw_break;
|
|
kw_continue;
|
|
kw_goto;
|
|
kw_true;
|
|
kw_false;
|
|
|
|
kw_print;
|
|
|
|
equal :: #char "=";
|
|
plus :: #char "+";
|
|
minus :: #char "-";
|
|
star :: #char "*";
|
|
percent :: #char "%";
|
|
bang :: #char "!";
|
|
and :: #char "&";
|
|
f_slash :: #char "/";
|
|
b_slash :: #char "\\";
|
|
|
|
l_paren :: #char "(";
|
|
r_paren :: #char ")";
|
|
l_square :: #char "[";
|
|
r_square :: #char "]";
|
|
l_brace :: #char "{";
|
|
r_brace :: #char "}";
|
|
comma :: #char ",";
|
|
dot :: #char ".";
|
|
colon :: #char ":";
|
|
semicolon :: #char ";";
|
|
}
|
|
}
|
|
|
|
Node :: struct {
|
|
kind: Kind;
|
|
type: *Type_Info;
|
|
|
|
Kind :: enum {
|
|
invalid;
|
|
|
|
block;
|
|
|
|
stmt_start;
|
|
print;
|
|
return_;
|
|
stmt_end;
|
|
|
|
decl_start;
|
|
variable;
|
|
procedure;
|
|
decl_end;
|
|
|
|
expr_start;
|
|
type;
|
|
unary;
|
|
binary;
|
|
procedure_call;
|
|
symbol;
|
|
literal;
|
|
expr_end;
|
|
}
|
|
}
|
|
|
|
Node_Print :: struct {
|
|
#as using n: Node;
|
|
n.kind = .print;
|
|
|
|
expr: *Node;
|
|
}
|
|
|
|
Node_Procedure_Call :: struct {
|
|
#as using n: Node;
|
|
n.kind = .procedure_call;
|
|
|
|
call_expr: *Node;
|
|
named_arguments: kv.Kv(*Node, *Node);
|
|
all_arguments: [..]*Node;
|
|
}
|
|
|
|
Node_Var :: struct {
|
|
#as using n: Node;
|
|
n.kind = .variable;
|
|
|
|
symbol: *Node; // always *Node_Symbol
|
|
type_expr: *Node; // always *Node_Type
|
|
value_expr: *Node;
|
|
var_flags: Var_Flag;
|
|
|
|
Var_Flag :: enum_flags {
|
|
immutable; // def
|
|
}
|
|
}
|
|
|
|
Node_Unary :: struct {
|
|
#as using n: Node;
|
|
n.kind = .unary;
|
|
|
|
op: Token;
|
|
right: *Node;
|
|
}
|
|
|
|
Node_Binary :: struct {
|
|
#as using n: Node;
|
|
n.kind = .binary;
|
|
|
|
op: Token;
|
|
left: *Node;
|
|
right: *Node;
|
|
}
|
|
|
|
Node_Symbol :: struct {
|
|
#as using n: Node;
|
|
n.kind = .symbol;
|
|
|
|
str: string;
|
|
}
|
|
|
|
Node_Literal :: struct {
|
|
#as using n: Node;
|
|
n.kind = .literal;
|
|
|
|
value_kind: Value_Kind;
|
|
value_flags: Value_Flag;
|
|
|
|
union {
|
|
i: s64;
|
|
u: u64;
|
|
f: float64;
|
|
b: bool;
|
|
s: string;
|
|
}
|
|
|
|
Value_Kind :: enum {
|
|
int;
|
|
float;
|
|
bool;
|
|
string;
|
|
}
|
|
|
|
Value_Flag :: enum_flags {
|
|
can_be_unsigned;
|
|
}
|
|
}
|
|
|
|
Node_Type :: struct {
|
|
#as using n: Node;
|
|
n.kind = .type;
|
|
|
|
resolved_type: *Type_Info;
|
|
type_kind: Type_Kind;
|
|
|
|
union {
|
|
alias_target: *Node;
|
|
pointer_target: *Node_Type;
|
|
struct {
|
|
array_element: *Node_Type;
|
|
array_count: *Node; // can be null
|
|
};
|
|
}
|
|
|
|
Type_Kind :: enum {
|
|
alias;
|
|
pointer;
|
|
array;
|
|
}
|
|
}
|
|
|
|
/*
|
|
fn add(x int, y int) int do return x + y end
|
|
*/
|
|
|
|
Node_Procedure :: struct {
|
|
#as using n: Node;
|
|
n.kind = .procedure;
|
|
|
|
header: *Node_Procedure_Header;
|
|
|
|
args: [..]Node_Argument;
|
|
rets: [..]Node_Argument;
|
|
body: *Node_Block;
|
|
flags: Flag;
|
|
|
|
Flag :: enum_flags {
|
|
inline_;
|
|
}
|
|
}
|
|
|
|
Node_Argument :: struct {
|
|
symbol: *Node_Symbol;
|
|
type: *Node_Type;
|
|
value: *Node; // always an expression, can be null
|
|
}
|
|
|
|
Node_Procedure_Header :: struct {
|
|
args: [..]*Node_Type;
|
|
rets: [..]*Node_Type;
|
|
}
|
|
|
|
Node_Block :: struct {
|
|
#as using n: Node;
|
|
n.kind = .block;
|
|
body: [..]*Node;
|
|
}
|
|
|
|
Node_Return :: struct {
|
|
#as using n: Node;
|
|
n.kind = .return_;
|
|
|
|
values: [..]*Node;
|
|
}
|
|
|
|
Parser :: struct {
|
|
allocator: Allocator;
|
|
toplevel: [..]*Node;
|
|
|
|
previous: Token;
|
|
filename: string;
|
|
source: string;
|
|
offset: int;
|
|
}
|
|
|
|
init :: (p: *Parser, allocator: Allocator) {
|
|
p.allocator = allocator;
|
|
p.toplevel.allocator = allocator;
|
|
}
|
|
|
|
parse_string :: (p: *Parser, source: string) -> bool {
|
|
p.source = source;
|
|
p.offset = 0;
|
|
|
|
while !at_end(p) {
|
|
t := peek_token(p);
|
|
if t.kind == .invalid || t.kind == .end_of_file {
|
|
break;
|
|
}
|
|
|
|
node := parse_toplevel(p);
|
|
if node != null array.append(*p.toplevel, node);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
#scope_file;
|
|
|
|
parse_toplevel :: (p: *Parser) -> *Node {
|
|
t, ok := expect_token(p, .kw_var, .kw_def, .kw_fn, .kw_print, .kw_do, .kw_return);
|
|
basic.assert(ok, "var, def, print, found '%'", t.str); // @errors
|
|
|
|
if t.kind == {
|
|
// var sym type_expr
|
|
// var sym type_expr = expr
|
|
// var sym = expr
|
|
case .kw_var; #through;
|
|
case .kw_def;
|
|
s:, ok = expect_token(p, .symbol);
|
|
basic.assert(ok, "symbol"); // @errors
|
|
|
|
type_expr: *Node;
|
|
value_expr: *Node;
|
|
is_const := t.kind == .kw_def;
|
|
|
|
t = peek_token(p);
|
|
if t.kind == .equal {
|
|
consume_token(p);
|
|
value_expr = parse_expression(p);
|
|
basic.assert(value_expr != null, "value expr"); // @errors
|
|
}
|
|
else {
|
|
type_expr = parse_type_expression(p);
|
|
basic.assert(type_expr != null, "type expr"); // @errors
|
|
|
|
if peek_token(p).kind == .equal {
|
|
consume_token(p);
|
|
value_expr = parse_expression(p);
|
|
basic.assert(value_expr != null, "value expr"); // @errors
|
|
}
|
|
}
|
|
|
|
symbol := make_node(p, Node_Symbol);
|
|
symbol.str = s.str;
|
|
|
|
node := make_node(p, Node_Var);
|
|
node.symbol = symbol;
|
|
node.type_expr = type_expr;
|
|
node.value_expr = value_expr;
|
|
|
|
if is_const {
|
|
node.var_flags |= .immutable;
|
|
}
|
|
|
|
return node;
|
|
|
|
// return
|
|
// return expr0, ..exprN
|
|
case .kw_return;
|
|
node := make_node(p, Node_Return);
|
|
array.init(*node.values, p.allocator);
|
|
|
|
prev_offset := p.offset;
|
|
expr := parse_expression(p);
|
|
if expr == null {
|
|
p.offset = prev_offset;
|
|
return node;
|
|
}
|
|
|
|
array.append(*node.values, expr);
|
|
return node;
|
|
|
|
// print(expr)
|
|
// print expr
|
|
case .kw_print;
|
|
expr := parse_expression(p);
|
|
basic.assert(expr != null, "expected expression"); // @errors
|
|
|
|
node := make_node(p, Node_Print);
|
|
node.expr = expr;
|
|
return node;
|
|
|
|
// fn symbol(arg0, ..argN) do end
|
|
case .kw_fn;
|
|
symbol, ok := expect_token(p, .symbol);
|
|
basic.assert(ok, "expected name for procedure"); // @errors @todo(judah): lambdas
|
|
|
|
t, ok = expect_token(p, .l_paren);
|
|
basic.assert(ok, "expected '(' but found '%'", t.str); // @errors
|
|
|
|
while !at_end(p) {
|
|
t = peek_token(p);
|
|
if t.kind == .r_paren break;
|
|
|
|
expr := parse_expression(p);
|
|
basic.assert(expr != null); // @errors
|
|
|
|
t = peek_token(p);
|
|
if t.kind == {
|
|
case .comma;
|
|
consume_token(p);
|
|
continue;
|
|
case .r_paren;
|
|
break;
|
|
case;
|
|
basic.assert(false, "expected ',' or ')' but found '%'", t.str);
|
|
}
|
|
}
|
|
|
|
_, ok = expect_token(p, .r_paren);
|
|
basic.assert(ok, "expected ')'"); // @errors
|
|
|
|
block := parse_block(p);
|
|
basic.assert(block != null, "expected block"); // @errors
|
|
|
|
node := make_node(p, Node_Procedure);
|
|
|
|
return null;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
parse_block :: (p: *Parser) -> *Node_Block {
|
|
t, ok := expect_token(p, .kw_do);
|
|
basic.assert(ok, "expected 'do' found '%'", t.str); // @errors
|
|
|
|
block := make_node(p, Node_Block);
|
|
array.init(*block.body, p.allocator);
|
|
|
|
while !at_end(p) {
|
|
t = peek_token(p);
|
|
if t.kind == .kw_end break;
|
|
|
|
node := parse_toplevel(p);
|
|
basic.assert(node != null); // @errors
|
|
|
|
array.append(*block.body, node);
|
|
}
|
|
|
|
t, ok = expect_token(p, .kw_end);
|
|
basic.assert(ok, "expected 'end' found '%'", t.str); // @errors
|
|
|
|
return block;
|
|
}
|
|
|
|
parse_type_expression :: (p: *Parser) -> *Node_Type {
|
|
t, ok := expect_token(p, .symbol, .star, .l_square);
|
|
basic.assert(ok, "type expression"); // @errors
|
|
|
|
if t.kind == {
|
|
case .star;
|
|
target := parse_type_expression(p);
|
|
basic.assert(target != null, "pointer target"); // @errors
|
|
|
|
node := make_node(p, Node_Type);
|
|
node.type_kind = .pointer;
|
|
node.pointer_target = target;
|
|
return node;
|
|
|
|
case .l_square;
|
|
node := make_node(p, Node_Type);
|
|
node.type_kind = .array;
|
|
|
|
// slice
|
|
if peek_token(p).kind == .r_square {
|
|
consume_token(p);
|
|
|
|
element := parse_type_expression(p);
|
|
basic.assert(element != null, "array element"); // @errors
|
|
|
|
node.array_element = element;
|
|
}
|
|
else {
|
|
count := parse_expression(p);
|
|
basic.assert(count != null, "array count"); // @errors
|
|
|
|
_, ok := expect_token(p, .r_square);
|
|
basic.assert(ok, "end of array type");
|
|
|
|
element := parse_type_expression(p);
|
|
basic.assert(element != null, "array element"); // @errors
|
|
|
|
node.array_count = count;
|
|
node.array_element = element;
|
|
}
|
|
|
|
return node;
|
|
|
|
case .symbol;
|
|
symbol := make_node(p, Node_Symbol);
|
|
symbol.str = t.str;
|
|
|
|
node := make_node(p, Node_Type);
|
|
node.type_kind = .alias;
|
|
node.alias_target = symbol;
|
|
return node;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
parse_expression :: (p: *Parser, min_precedence := 1) -> *Node {
|
|
get_precedence :: inline (t: Token) -> int {
|
|
if t.kind == {
|
|
case .star; #through;
|
|
case .f_slash; #through;
|
|
case .percent; #through;
|
|
case .and;
|
|
return 4;
|
|
|
|
case .plus; #through;
|
|
case .minus;
|
|
return 3;
|
|
|
|
// case .equal_equal; #through;
|
|
// case .bang_equal; #through;
|
|
// case .less; #through;
|
|
// case .less_equal; #through;
|
|
// case .more; #through;
|
|
// case .more_equal;
|
|
// return 2;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
node := parse_expression_unary(p);
|
|
basic.assert(node != null, "expected expression"); // @errors
|
|
|
|
while !at_end(p) {
|
|
op := peek_token(p);
|
|
prec := get_precedence(op);
|
|
if prec <= min_precedence break;
|
|
|
|
op = consume_token(p);
|
|
|
|
lhs := node;
|
|
rhs := parse_expression(p, prec);
|
|
basic.assert(rhs != null, "expected rhs"); // @errors
|
|
|
|
new := make_node(p, Node_Binary);
|
|
new.op = op;
|
|
new.left = lhs;
|
|
new.right = rhs;
|
|
|
|
node = new;
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
parse_expression_unary :: (p: *Parser) -> *Node {
|
|
op := peek_token(p);
|
|
if op.kind == {
|
|
case .plus; #through;
|
|
case .minus;
|
|
op = consume_token(p);
|
|
|
|
node := parse_expression_unary(p);
|
|
basic.assert(node != null, "expected expr"); // @errors
|
|
|
|
unary := make_node(p, Node_Unary);
|
|
unary.op = op;
|
|
unary.right = node;
|
|
return unary;
|
|
}
|
|
|
|
return parse_expression_postfix(p);
|
|
}
|
|
|
|
parse_expression_postfix :: (p: *Parser) -> *Node {
|
|
// @TODO
|
|
base := parse_expression_base(p);
|
|
basic.assert(base != null, "expected expression"); // @errors
|
|
|
|
t := peek_token(p);
|
|
if t.kind == {
|
|
case .l_paren; // procedure calls
|
|
consume_token(p);
|
|
|
|
node := make_node(p, Node_Procedure_Call);
|
|
node.call_expr = base;
|
|
|
|
array.init(*node.all_arguments, p.allocator);
|
|
kv.init(*node.named_arguments, p.allocator);
|
|
|
|
while !at_end(p) {
|
|
t = peek_token(p);
|
|
if t.kind == .r_paren break;
|
|
|
|
arg_or_name := parse_expression(p);
|
|
basic.assert(arg_or_name != null, "expected expression in procedure call"); // @errors
|
|
|
|
if peek_token(p).kind == .colon {
|
|
consume_token(p);
|
|
basic.assert(arg_or_name.kind == .symbol, "expected symbol for named argument"); // @errors
|
|
basic.assert(!kv.exists(*node.named_arguments, arg_or_name), "duplicate named argument '%'", arg_or_name.(*Node_Symbol).str); // @errors
|
|
|
|
value := parse_expression(p);
|
|
basic.assert(value != null, "expected expression after ':'"); // @errors
|
|
array.append(*node.all_arguments, value);
|
|
kv.set(*node.named_arguments, arg_or_name, value);
|
|
}
|
|
else {
|
|
array.append(*node.all_arguments, arg_or_name);
|
|
}
|
|
|
|
t = peek_token(p);
|
|
if t.kind == {
|
|
case .comma;
|
|
consume_token(p);
|
|
continue;
|
|
case .r_paren;
|
|
break;
|
|
case;
|
|
basic.assert(false, "expected ',' or ')' but found '%'", t.str);
|
|
}
|
|
}
|
|
|
|
_, ok := expect_token(p, .r_paren);
|
|
basic.assert(ok, "expected ')'"); // @errors
|
|
return node;
|
|
}
|
|
|
|
return base;
|
|
}
|
|
|
|
parse_expression_base :: (p: *Parser) -> *Node {
|
|
t, ok := expect_token(p, .kw_true, .kw_false, .number, .symbol, .l_paren);
|
|
basic.assert(ok, "expected expression, found '%'", t.str); // @errors
|
|
|
|
if t.kind == {
|
|
case .kw_true; #through;
|
|
case .kw_false;
|
|
node := make_node(p, Node_Literal);
|
|
node.b = t.kind == .kw_true;
|
|
node.value_kind = .bool;
|
|
return node;
|
|
|
|
case .symbol;
|
|
node := make_node(p, Node_Symbol);
|
|
node.str = t.str;
|
|
return node;
|
|
|
|
case .number;
|
|
node := make_node(p, Node_Literal);
|
|
copy := t.str;
|
|
|
|
if strings.contains(t.str, ".") {
|
|
node.value_kind = .float;
|
|
|
|
value, ok := strings.parse_float(*copy);
|
|
basic.assert(ok, "malformed float '%'", t.str); // @errors
|
|
|
|
node.f = value;
|
|
}
|
|
else {
|
|
node.value_kind = .int;
|
|
if t.str[0] == "-" {
|
|
node.value_flags |= .can_be_unsigned;
|
|
}
|
|
|
|
value, ok := strings.parse_int(*copy);
|
|
basic.assert(ok, "malformed integer '%'", t.str); // @errors
|
|
|
|
node.i = value;
|
|
}
|
|
|
|
return node;
|
|
|
|
case .l_paren;
|
|
node := parse_expression(p);
|
|
basic.assert(node != null, "expected expression"); // @errors
|
|
|
|
_, ok := expect_token(p, .r_paren);
|
|
basic.assert(ok, "expected ')'"); // @errors
|
|
|
|
return node;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
make_node :: (p: *Parser, $T: Type) -> *T {
|
|
return mem.request_memory(T,, allocator = p.allocator);
|
|
}
|
|
|
|
peek_token :: (p: *Parser) -> Token {
|
|
copy := p.*;
|
|
return consume_token(*copy);
|
|
}
|
|
|
|
at_end :: (p: *Parser) -> bool {
|
|
return p.offset >= p.source.count;
|
|
}
|
|
|
|
starts_symbol :: (c: u8) -> bool {
|
|
return (c >= "a" && c <= "z") ||
|
|
(c >= "A" && c <= "Z") ||
|
|
(c == "_");
|
|
}
|
|
continues_symbol :: (c: u8) -> bool {
|
|
return starts_symbol(c) || (c >= "0" && c <= "9");
|
|
}
|
|
|
|
starts_number :: (c: u8) -> bool {
|
|
return (c >= "0" && c <= "9");
|
|
}
|
|
continues_number :: (c: u8) -> bool {
|
|
return starts_number(c) || c == ".";
|
|
}
|
|
|
|
consume_token :: (p: *Parser) -> Token {
|
|
if at_end(p) return .{ kind = .end_of_file };
|
|
|
|
c := p.source[p.offset];
|
|
|
|
// skip whitespace
|
|
while !at_end(p) {
|
|
c = p.source[p.offset];
|
|
if c == {
|
|
case " "; #through;
|
|
case "\n"; #through;
|
|
case "\t";
|
|
p.offset += 1;
|
|
case;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// line comments
|
|
// @todo(judah): don't ignore these
|
|
if c == "/" && p.offset + 1 < p.source.count && p.source[p.offset + 1] == "/" {
|
|
p.offset += 2;
|
|
|
|
while !at_end(p) {
|
|
c = p.source[p.offset];
|
|
if c == "\n" break;
|
|
p.offset += 1;
|
|
}
|
|
|
|
// @todo(judah): don't recurse
|
|
return consume_token(p);
|
|
}
|
|
|
|
if starts_symbol(c) {
|
|
t := Token.{ str = .{ data = p.source.data + p.offset } };
|
|
while !at_end(p) {
|
|
c = p.source[p.offset];
|
|
if !continues_symbol(c) break;
|
|
p.offset += 1;
|
|
}
|
|
|
|
t.str.count = (p.source.data + p.offset) - t.str.data;
|
|
if t.str == {
|
|
case "var"; t.kind = .kw_var;
|
|
case "def"; t.kind = .kw_def;
|
|
case "type"; t.kind = .kw_type;
|
|
case "fn"; t.kind = .kw_fn;
|
|
case "do"; t.kind = .kw_do;
|
|
case "end"; t.kind = .kw_end;
|
|
case "if"; t.kind = .kw_if;
|
|
case "else"; t.kind = .kw_else;
|
|
case "switch"; t.kind = .kw_switch;
|
|
case "case"; t.kind = .kw_case;
|
|
case "for"; t.kind = .kw_for;
|
|
case "in"; t.kind = .kw_in;
|
|
case "loop"; t.kind = .kw_loop;
|
|
case "return"; t.kind = .kw_return;
|
|
case "break"; t.kind = .kw_break;
|
|
case "continue"; t.kind = .kw_continue;
|
|
case "goto"; t.kind = .kw_goto;
|
|
case "true"; t.kind = .kw_true;
|
|
case "false"; t.kind = .kw_false;
|
|
|
|
case "print"; t.kind = .kw_print;
|
|
case; t.kind = .symbol;
|
|
}
|
|
|
|
return t;
|
|
}
|
|
|
|
if starts_number(c) {
|
|
t := Token.{ kind = .number, str = .{ data = p.source.data + p.offset } };
|
|
while !at_end(p) {
|
|
c = p.source[p.offset];
|
|
if !continues_number(c) break;
|
|
p.offset += 1;
|
|
}
|
|
|
|
t.str.count = (p.source.data + p.offset) - t.str.data;
|
|
return t;
|
|
}
|
|
|
|
if c == {
|
|
case "+"; #through;
|
|
case "-"; #through;
|
|
case "*"; #through;
|
|
case "/"; #through;
|
|
case "="; #through;
|
|
case "%"; #through;
|
|
case "!"; #through;
|
|
case "&"; #through;
|
|
case ","; #through;
|
|
case "."; #through;
|
|
case ":"; #through;
|
|
case ";"; #through;
|
|
case "("; #through;
|
|
case ")"; #through;
|
|
case "["; #through;
|
|
case "]"; #through;
|
|
case "{"; #through;
|
|
case "}";
|
|
s := string.{ data = p.source.data + p.offset, count = 1 };
|
|
p.offset += 1;
|
|
return .{ kind = xx c, str = s };
|
|
}
|
|
|
|
s := string.{ data = p.source.data + p.offset, count = 1 };
|
|
return .{ kind = .invalid, str = s };
|
|
}
|
|
|
|
expect_token :: (p: *Parser, kinds: ..Token.Kind) -> Token, bool {
|
|
t := consume_token(p);
|
|
for kinds if it == t.kind {
|
|
return t, true;
|
|
}
|
|
|
|
return t, false;
|
|
}
|
|
|