diff --git a/CMakeLists.txt b/CMakeLists.txt index 0450cd5..694c1f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,8 +9,8 @@ set(CMAKE_CXX_STANDARD 23) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(HEADER_FILES src/include/lex.hpp) -set(SOURCE_FILES src/lex.cpp) +set(HEADER_FILES src/include/lex.hpp src/include/value.hpp src/include/parse.hpp) +set(SOURCE_FILES src/lex.cpp src/parse.cpp src/value.cpp) set(CXX_WARNING_FLAGS -Wall -Wextra -Wpedantic -pedantic) # we're not actually shipping a library yet, diff --git a/src/include/lex.hpp b/src/include/lex.hpp index 957e370..c24e998 100644 --- a/src/include/lex.hpp +++ b/src/include/lex.hpp @@ -1,12 +1,11 @@ #pragma once #include #include -#include #include #include #include -enum TokenType { +enum class TokenType { OpenParen, CloseParen, Dollar, diff --git a/src/include/parse.hpp b/src/include/parse.hpp new file mode 100644 index 0000000..d78f509 --- /dev/null +++ b/src/include/parse.hpp @@ -0,0 +1,32 @@ +#pragma once +#include +#include + + + + +// The Parser produces a regular lisp value. +// lisp code is made of lisp lists and atoms. +class Parser { +private: + // the token stream. + std::deque ts; + Token get_token(); + void unget_token(Token); + + // these may need to be interned later + String make_string(std::string); + Symbol make_symbol(std::string); + + + std::optional parse_one(); + LispValue parse_list(); + +public: + Parser(Lexer); + + void feed(Lexer); + + std::optional next(); +}; + diff --git a/src/include/value.hpp b/src/include/value.hpp new file mode 100644 index 0000000..4cba57c --- /dev/null +++ b/src/include/value.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +// we're using a pure variant as our value type. +struct Integer {int64_t value;}; +struct Double {double value;}; +struct String {std::string value;}; // might be a good idea to intern strings +struct Symbol {std::string value;}; +struct List; +struct Nil {}; + + +using LispValue = std::variant; +struct List {std::vector list;}; +// during compilation, we don't really care for cyclical lists etc. +// during compilation we'll mostly be dealing with regular, flat lists +// that form function calls. +// We will have a different set of values during runtime +// as the runtime will be a bytecode interpreter anyhow. + + + +void print_val(LispValue); + +String make_string(std::string); +Symbol make_symbol(std::string); + diff --git a/src/lex.cpp b/src/lex.cpp index e4011fd..e85e47a 100644 --- a/src/lex.cpp +++ b/src/lex.cpp @@ -9,14 +9,14 @@ using namespace std; std::ostream &operator<<(std::ostream &os, Token const &t) { os << "Token("; switch (t.type) { - case OpenParen: os << "OpenParen)"; break; - case CloseParen: os << "CloseParen)"; break; - case Dollar: os << "Dollar)"; break; - case Symbol: os << "Symbol, " << get(*t.value) << ")"; break; - case String: os << "String, \"" << get(*t.value) << "\")"; break; - case Int: os << "Int, " << get(*t.value) << ")"; break; - case Double: os << "Double, " << get(*t.value) << ")"; break; - case End: os << "END)"; break; + case TokenType::OpenParen: os << "OpenParen)"; break; + case TokenType::CloseParen: os << "CloseParen)"; break; + case TokenType::Dollar: os << "Dollar)"; break; + case TokenType::Symbol: os << "Symbol, " << get(*t.value) << ")"; break; + case TokenType::String: os << "String, \"" << get(*t.value) << "\")"; break; + case TokenType::Int: os << "Int, " << get(*t.value) << ")"; break; + case TokenType::Double: os << "Double, " << get(*t.value) << ")"; break; + case TokenType::End: os << "END)"; break; default: os << ")"; } diff --git a/src/main.cpp b/src/main.cpp index 9e20c1a..41ae12b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,5 +1,7 @@ +#include "value.hpp" #include #include +#include #include using namespace std; @@ -8,9 +10,9 @@ int main() { string s; getline(cin, s); cout << s << endl; - for (auto t : lex(s)) { - cout << t << " "; - } + Parser p(s); + print_val(*p.next()); + cout << endl; return 0; } \ No newline at end of file diff --git a/src/parse.cpp b/src/parse.cpp new file mode 100644 index 0000000..8e13357 --- /dev/null +++ b/src/parse.cpp @@ -0,0 +1,74 @@ +#include + +#include + +#include + +using namespace std; + +Parser::Parser(Lexer l) : ts(l.collect()) {} + +void Parser::feed(Lexer l) { + ts.append_range(l.collect()); +} + +Token Parser::get_token() { + Token t = ts.front(); + ts.pop_front(); + return t; +} +void Parser::unget_token(Token t) { + ts.push_front(t); +} + +String Parser::make_string(string s) { + return String {s}; +} + +Symbol Parser::make_symbol(string s) { + return Symbol {s}; +} + +optional Parser::parse_one() { + Token t = get_token(); + switch (t.type) { + case TokenType::Int: return Integer {get(*t.value)}; + case TokenType::Double: return Double {get(*t.value)}; + case TokenType::String: return make_string(get(*t.value)); + case TokenType::Symbol: return make_symbol(get(*t.value)); + case TokenType::OpenParen: return parse_list(); + case TokenType::CloseParen: throw "whatever"; + + // I don't know what this will actually do, in theory maybe just like the OpenParen, + // but parses things in a different namespace? unimplemented for now. + case TokenType::Dollar: return parse_one(); + case TokenType::End : return nullopt; + } + return nullopt; +} + +LispValue Parser::parse_list() { + // assumes that we have read the OpenParen, and are reading elements until + // we find the CloseParen + List l; + Token t = get_token(); + + while (true) { + if (t.type == TokenType::End) { + // this is clearly an error! + cerr << "Parser::parse_list: Input ended before list ended." << endl; + break; + } + if (t.type == TokenType::CloseParen) + break; + unget_token(t); + l.list.push_back(*parse_one()); + t = get_token(); + } + return l; +} + + +optional Parser::next() { + return parse_one(); +} diff --git a/src/value.cpp b/src/value.cpp new file mode 100644 index 0000000..7426314 --- /dev/null +++ b/src/value.cpp @@ -0,0 +1,35 @@ +#include +#include + + +template +requires std::convertible_to +void value_printer(T t) { + if constexpr (std::same_as) { + print_val(t); + } else if constexpr (requires { std::cout << t.value;}) { + std::cout << t.value; + } else { + std::cout << "{UNKNOWN}" << std::endl; + } +} + +template <> +void value_printer(List l) { + std::cout << "("; + for (auto i : l.list) { + value_printer(i); + std::cout << " "; + } + std::cout << ")"; +} + +template <> +void value_printer(String s) { + std::cout << '"' << s.value << '"'; +} + +void print_val(LispValue v) { + std::visit([](auto arg) {value_printer(arg);}, v); +} +