Initial parser attempt added - seems to parse correctly
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful

This commit is contained in:
2025-10-03 22:07:20 +03:00
committed by Emin Arslan
parent 561c76b6d7
commit 7746fdda6f
8 changed files with 190 additions and 15 deletions

View File

@@ -9,8 +9,8 @@ set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(HEADER_FILES src/include/lex.hpp)
set(SOURCE_FILES src/lex.cpp)
set(HEADER_FILES src/include/lex.hpp src/include/value.hpp src/include/parse.hpp)
set(SOURCE_FILES src/lex.cpp src/parse.cpp src/value.cpp)
set(CXX_WARNING_FLAGS -Wall -Wextra -Wpedantic -pedantic)
# we're not actually shipping a library yet,

View File

@@ -1,12 +1,11 @@
#pragma once
#include <deque>
#include <sstream>
#include <vector>
#include <stdint.h>
#include <variant>
#include <optional>
enum TokenType {
enum class TokenType {
OpenParen,
CloseParen,
Dollar,

32
src/include/parse.hpp Normal file
View File

@@ -0,0 +1,32 @@
#pragma once
#include <value.hpp>
#include <lex.hpp>
// The Parser produces a regular lisp value.
// lisp code is made of lisp lists and atoms.
class Parser {
private:
// the token stream.
std::deque<Token> ts;
Token get_token();
void unget_token(Token);
// these may need to be interned later
String make_string(std::string);
Symbol make_symbol(std::string);
std::optional<LispValue> parse_one();
LispValue parse_list();
public:
Parser(Lexer);
void feed(Lexer);
std::optional<LispValue> next();
};

33
src/include/value.hpp Normal file
View File

@@ -0,0 +1,33 @@
#pragma once
#include <concepts>
#include <vector>
#include <cstdint>
#include <string>
#include <variant>
#include <iostream>
// we're using a pure variant as our value type.
struct Integer {int64_t value;};
struct Double {double value;};
struct String {std::string value;}; // might be a good idea to intern strings
struct Symbol {std::string value;};
struct List;
struct Nil {};
using LispValue = std::variant<Integer, Double, String, Symbol, List>;
struct List {std::vector<LispValue> list;};
// during compilation, we don't really care for cyclical lists etc.
// during compilation we'll mostly be dealing with regular, flat lists
// that form function calls.
// We will have a different set of values during runtime
// as the runtime will be a bytecode interpreter anyhow.
void print_val(LispValue);
String make_string(std::string);
Symbol make_symbol(std::string);

View File

@@ -9,14 +9,14 @@ using namespace std;
std::ostream &operator<<(std::ostream &os, Token const &t) {
os << "Token(";
switch (t.type) {
case OpenParen: os << "OpenParen)"; break;
case CloseParen: os << "CloseParen)"; break;
case Dollar: os << "Dollar)"; break;
case Symbol: os << "Symbol, " << get<string>(*t.value) << ")"; break;
case String: os << "String, \"" << get<string>(*t.value) << "\")"; break;
case Int: os << "Int, " << get<int64_t>(*t.value) << ")"; break;
case Double: os << "Double, " << get<double>(*t.value) << ")"; break;
case End: os << "END)"; break;
case TokenType::OpenParen: os << "OpenParen)"; break;
case TokenType::CloseParen: os << "CloseParen)"; break;
case TokenType::Dollar: os << "Dollar)"; break;
case TokenType::Symbol: os << "Symbol, " << get<string>(*t.value) << ")"; break;
case TokenType::String: os << "String, \"" << get<string>(*t.value) << "\")"; break;
case TokenType::Int: os << "Int, " << get<int64_t>(*t.value) << ")"; break;
case TokenType::Double: os << "Double, " << get<double>(*t.value) << ")"; break;
case TokenType::End: os << "END)"; break;
default:
os << ")";
}

View File

@@ -1,5 +1,7 @@
#include "value.hpp"
#include <iostream>
#include <lex.hpp>
#include <parse.hpp>
#include <string>
using namespace std;
@@ -8,9 +10,9 @@ int main() {
string s;
getline(cin, s);
cout << s << endl;
for (auto t : lex(s)) {
cout << t << " ";
}
Parser p(s);
print_val(*p.next());
cout << endl;
return 0;
}

74
src/parse.cpp Normal file
View File

@@ -0,0 +1,74 @@
#include <lex.hpp>
#include <parse.hpp>
#include <iostream>
using namespace std;
Parser::Parser(Lexer l) : ts(l.collect()) {}
void Parser::feed(Lexer l) {
ts.append_range(l.collect());
}
Token Parser::get_token() {
Token t = ts.front();
ts.pop_front();
return t;
}
void Parser::unget_token(Token t) {
ts.push_front(t);
}
String Parser::make_string(string s) {
return String {s};
}
Symbol Parser::make_symbol(string s) {
return Symbol {s};
}
optional<LispValue> Parser::parse_one() {
Token t = get_token();
switch (t.type) {
case TokenType::Int: return Integer {get<int64_t>(*t.value)};
case TokenType::Double: return Double {get<double>(*t.value)};
case TokenType::String: return make_string(get<string>(*t.value));
case TokenType::Symbol: return make_symbol(get<string>(*t.value));
case TokenType::OpenParen: return parse_list();
case TokenType::CloseParen: throw "whatever";
// I don't know what this will actually do, in theory maybe just like the OpenParen,
// but parses things in a different namespace? unimplemented for now.
case TokenType::Dollar: return parse_one();
case TokenType::End : return nullopt;
}
return nullopt;
}
LispValue Parser::parse_list() {
// assumes that we have read the OpenParen, and are reading elements until
// we find the CloseParen
List l;
Token t = get_token();
while (true) {
if (t.type == TokenType::End) {
// this is clearly an error!
cerr << "Parser::parse_list: Input ended before list ended." << endl;
break;
}
if (t.type == TokenType::CloseParen)
break;
unget_token(t);
l.list.push_back(*parse_one());
t = get_token();
}
return l;
}
optional<LispValue> Parser::next() {
return parse_one();
}

35
src/value.cpp Normal file
View File

@@ -0,0 +1,35 @@
#include <concepts>
#include <value.hpp>
template <typename T>
requires std::convertible_to<T, LispValue>
void value_printer(T t) {
if constexpr (std::same_as<T, LispValue>) {
print_val(t);
} else if constexpr (requires { std::cout << t.value;}) {
std::cout << t.value;
} else {
std::cout << "{UNKNOWN}" << std::endl;
}
}
template <>
void value_printer(List l) {
std::cout << "(";
for (auto i : l.list) {
value_printer(i);
std::cout << " ";
}
std::cout << ")";
}
template <>
void value_printer(String s) {
std::cout << '"' << s.value << '"';
}
void print_val(LispValue v) {
std::visit([](auto arg) {value_printer(arg);}, v);
}