From aa066f87d0c49719cfbb61319a00b22e183a2635 Mon Sep 17 00:00:00 2001 From: Emin Arslan <53535669+haxala1r@users.noreply.github.com> Date: Sun, 12 Oct 2025 21:33:57 +0300 Subject: [PATCH] Initial state - basic lexer + parser + interpreter --- .gitignore | 1 + bin/dune | 6 +++ bin/main.ml | 17 +++++++ dune-project | 2 + lib/ast.ml | 37 +++++++++++++++ lib/dune | 9 ++++ lib/eval.ml | 100 +++++++++++++++++++++++++++++++++++++++ lib/interpreterStdlib.ml | 46 ++++++++++++++++++ lib/lexer.mll | 36 ++++++++++++++ lib/parser.mly | 34 +++++++++++++ lib/read.ml | 13 +++++ main.opam | 0 12 files changed, 301 insertions(+) create mode 100644 .gitignore create mode 100644 bin/dune create mode 100644 bin/main.ml create mode 100644 dune-project create mode 100644 lib/ast.ml create mode 100644 lib/dune create mode 100644 lib/eval.ml create mode 100644 lib/interpreterStdlib.ml create mode 100644 lib/lexer.mll create mode 100644 lib/parser.mly create mode 100644 lib/read.ml create mode 100644 main.opam diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9c5f578 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +_build \ No newline at end of file diff --git a/bin/dune b/bin/dune new file mode 100644 index 0000000..5abb7b8 --- /dev/null +++ b/bin/dune @@ -0,0 +1,6 @@ +(executable + (name main) + (public_name main) + (libraries str lisp unix)) +(include_subdirs unqualified) + diff --git a/bin/main.ml b/bin/main.ml new file mode 100644 index 0000000..f4df5b3 --- /dev/null +++ b/bin/main.ml @@ -0,0 +1,17 @@ +open Lisp.Ast;; +open Printf;; +open Lisp;; +open Eval;; +open Read;; +let rec repl c = + let () = printf ">>> "; Out_channel.flush Out_channel.stdout; in + match In_channel.input_line c with + | None -> () + | Some l -> + let vals = (parse_str l) in + (* dbg_print_all vals; *) + dbg_print_all (eval_all vals); + Out_channel.flush Out_channel.stdout; + repl c;; + +let _ = repl (In_channel.stdin) \ No newline at end of file diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..c48cb49 --- /dev/null +++ b/dune-project @@ -0,0 +1,2 @@ +(lang dune 3.7) +(using menhir 2.1) \ No newline at end of file diff --git a/lib/ast.ml b/lib/ast.ml new file mode 100644 index 0000000..65104d4 --- /dev/null +++ b/lib/ast.ml @@ -0,0 +1,37 @@ +type lisp_val = + | LInt of int + | LDouble of float + | LCons of lisp_val * lisp_val + | LNil + | LSymbol of string + | LString of string + + (* a builtin function is expressed as a name and the ocaml function + that performs the operation. The function should take a list of arguments. + generally, builtin functions should handle their arguments directly, + and eval forms in the environment as necessary. *) + | LBuiltinFunction of string * (environment -> lisp_val -> lisp_val) + (* a function is a name, a parameter list, and function body. *) + | LFunction of string * lisp_val * lisp_val + | LQuoted of lisp_val +and environment = (string, lisp_val) Hashtbl.t list + + +let rec dbg_print_one v = + let pf = Printf.sprintf in + match v with + | LInt x -> pf "" x + | LSymbol s -> pf "" s + | LString s -> pf "" s + | LNil -> pf "()" + | LCons (a, b) -> pf "(%s . %s)" (dbg_print_one a) (dbg_print_one b) + | LDouble d -> pf "" d + | LBuiltinFunction (name, _) -> pf "" name + | LFunction (name, args, _) -> pf "" + name ((dbg_print_one args)) + | LQuoted v -> pf "" (dbg_print_one v) + (*| _ -> ""*) + +let dbg_print_all vs = + let pr v = Printf.printf "%s\n" (dbg_print_one v) in + List.iter pr vs diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..2c9d859 --- /dev/null +++ b/lib/dune @@ -0,0 +1,9 @@ +(library + (name lisp) + ;(modules ast read lexer parser) + ) + +(include_subdirs unqualified) + +(menhir (modules parser)) +(ocamllex lexer) \ No newline at end of file diff --git a/lib/eval.ml b/lib/eval.ml new file mode 100644 index 0000000..e62e624 --- /dev/null +++ b/lib/eval.ml @@ -0,0 +1,100 @@ +open Ast;; +open InterpreterStdlib;; + + +let default_env: environment = [Hashtbl.create 1024];; + +let env_add env s v = + match env with + | [] -> () + | e1 :: _ -> Hashtbl.add e1 s v + +let env_new_lexical env = + let h = Hashtbl.create 16 in + h :: env + +let add_builtin s f = + env_add default_env s (LBuiltinFunction (s, f)) +let () = add_builtin "+" iadd +let () = add_builtin "-" isub +let () = add_builtin "car" car +let () = add_builtin "cdr" cdr + +let make_env () = [Hashtbl.copy (List.hd default_env)] + +(* the type annotations are unnecessary, but help constrain us from a +potentially more general function here *) +let rec eval_sym (env: environment) (s: string) = + match env with + | [] -> raise (Invalid_argument (Printf.sprintf "eval_sym: symbol %s has no value in current scope" s)) + | e :: rest -> + match Hashtbl.find_opt e s with + | None -> eval_sym rest s + | Some v -> v + +let rec eval_one env v = + match v with + | LInt x -> LInt x + | LDouble x -> LDouble x + | LString s -> LString s + | LSymbol s -> eval_sym env s + | LNil -> LNil + | LCons (func, args) -> eval_call env func args + | LBuiltinFunction (n, f) -> LBuiltinFunction (n, f) + | LFunction (n, l, f) -> LFunction (n, l, f) + | LQuoted v -> v + +(* Evaluate a list of values, without evaluating the resulting +function or macro call. Since macros and functions inherently +look similar, they share a lot of code, which is extracted here *) +and eval_list env l = + match l with + | LNil -> LNil + | LCons (a, b) -> LCons (eval_one env a, eval_list env b) + | _ -> raise (Invalid_argument "eval_list: cannot process non-list") + +and eval_body env body = + match body with + | LNil -> LNil + | LCons (form, LNil) -> eval_one env form + | LCons (form, next) -> + ignore (eval_one env form); eval_body env next + | _ -> LNil + +and eval_apply env func args = + let lexical_env = env_new_lexical env in + let rec bind_one s a = + match a with + | LNil -> raise (Invalid_argument "not enough arguments supplied to function") + | LCons (value, resta) -> + env_add lexical_env s value; resta + | _ -> raise (Invalid_argument "invalid argument list") + and bind_args l a = + match l with + | LNil -> (match a with + | LNil -> () + | _ -> raise (Invalid_argument "too many arguments supplied to function")) + | LCons (LSymbol sym, LSymbol restl)-> + env_add lexical_env restl (bind_one sym a) + | LCons (LSymbol sym, restl) -> + bind_args restl (bind_one sym a) + | _ -> raise (Invalid_argument "Failure while binding arguments") + in match func with + | LFunction (_, l, b) -> + bind_args l args; + eval_body lexical_env b + | _ -> LNil + + + +and eval_call env func args = + match eval_one env func with + | LBuiltinFunction (_, f) -> f env (eval_list env args) + | LFunction (n, l, b) -> eval_apply env (LFunction (n, l, b)) (eval_list env args) + | v -> raise (Invalid_argument + (Printf.sprintf "eval_apply: cannot call non-function object %s" (dbg_print_one v)));; + +let eval_all vs = + let env = make_env () in + let ev v = eval_one env v in + List.map ev vs diff --git a/lib/interpreterStdlib.ml b/lib/interpreterStdlib.ml new file mode 100644 index 0000000..3d0658d --- /dev/null +++ b/lib/interpreterStdlib.ml @@ -0,0 +1,46 @@ +open Ast;; + +let iadd _ vs : lisp_val = + let rec auxi vs accum = + match vs with + | LCons (LInt a, b) -> (auxi b (accum + a)) + | LCons (LDouble a, b) -> (auxf b ((float_of_int accum) +. a)) + | _ -> LInt accum + and auxf vs accum = + match vs with + | LCons (LInt a, b) -> (auxf b (accum +. (float_of_int a))) + | LCons (LDouble a, b) -> (auxf b (accum +. a)) + | _ -> LDouble accum + in (auxi vs 0);; +let isub _ vs = + let rec auxi vs accum = + match vs with + | LNil -> LInt accum + | LCons (LInt a, b) -> auxi b (accum - a) + | LCons (LDouble a, b) -> auxf b ((float_of_int accum) -. a) + | _ -> raise (Invalid_argument "-: invalid argument to subtraction operator") + and auxf vs accum = + match vs with + | LNil -> LDouble accum + | LCons (LInt a, b) -> auxf b (accum -. (float_of_int a)) + | LCons (LDouble a, b) -> auxf b (accum -. a) + | _ -> raise (Invalid_argument "-: invalid argument to subtraction operator") + in + match vs with + | LCons (LInt a, LNil) -> LInt (-a) + | LCons (LInt a, b) -> auxi b a + | LCons (LDouble a, LNil) -> LDouble (-. a) + | LCons (LDouble a, b) -> auxf b a + | _ -> auxi vs 0;; + + +let car _ vs = + match vs with + | LCons (LCons (a, _), LNil) -> a + | _ -> raise (Invalid_argument "car: invalid argument") + +let cdr _ vs = + match vs with + | LCons (LCons (_, b), LNil) -> b + | _ -> raise (Invalid_argument "cdr: invalid argument") + diff --git a/lib/lexer.mll b/lib/lexer.mll new file mode 100644 index 0000000..7e2407c --- /dev/null +++ b/lib/lexer.mll @@ -0,0 +1,36 @@ +{ +open Lexing +open Parser +exception SyntaxError of string + +let strip_quotes s = String.sub s 1 (String.length s - 2);; +} + +let digit = ['0'-'9'] +let number_sign = '-' | '+' +let int = number_sign? digit+ +let double = digit* '.' digit+ | digit+ '.' digit* + + +let white = [' ' '\t']+ +let newline = '\r' | '\n' | "\r\n" + +let sym_char = ['a'-'z' 'A'-'Z' '!' '\\' '+' '-' '*' '/' '_' '?'] +let sym = sym_char sym_char* + +let str = '"' [^'"']* '"' + +rule read = + parse + | white { read lexbuf } + | newline { new_line lexbuf; read lexbuf} + | int { INT (int_of_string (Lexing.lexeme lexbuf))} + | double { DOUBLE (float_of_string (Lexing.lexeme lexbuf))} + | sym { SYM (Lexing.lexeme lexbuf)} + | str { STR (strip_quotes (Lexing.lexeme lexbuf))} + | '(' { LPAREN } + | ')' { RPAREN } + | '\'' { QUOTE } + | _ { raise (SyntaxError ("Unexpected char: " ^ Lexing.lexeme lexbuf))} + | eof { EOF } + diff --git a/lib/parser.mly b/lib/parser.mly new file mode 100644 index 0000000..77e51cc --- /dev/null +++ b/lib/parser.mly @@ -0,0 +1,34 @@ +%{ + open Ast +%} + +%token INT +%token DOUBLE +%token SYM +%token STR +%token LPAREN +%token RPAREN +%token QUOTE +%token EOF + +%start prog +%% + +prog: + | EOF { None } + | e = expr { Some e } + ; + +expr: + | i = INT { LInt i } + | d = DOUBLE {LDouble d} + | s = SYM { LSymbol s } + | s = STR { LString (String.uppercase_ascii s) } + | LPAREN; l = lisp_list_rest { l } + | QUOTE; e = expr { LQuoted e} + ; + +lisp_list_rest: + | RPAREN { LNil } + | e = expr; lr = lisp_list_rest { LCons (e, lr) } + ; diff --git a/lib/read.ml b/lib/read.ml new file mode 100644 index 0000000..4955f4a --- /dev/null +++ b/lib/read.ml @@ -0,0 +1,13 @@ +let parse_one lb = Parser.prog (Lexer.read) lb + +let parse lb = + let rec helper () = + match parse_one lb with + | None -> [] + | Some (t) -> t :: helper () + in + helper () + +let parse_str s = + parse (Lexing.from_string s) + diff --git a/main.opam b/main.opam new file mode 100644 index 0000000..e69de29