Compare commits
27 Commits
f93b2deda2
...
main
Author | SHA1 | Date | |
---|---|---|---|
![]() |
82a606e80b | ||
b3c2bcb92d | |||
a0124b791d | |||
fddbd9b03b | |||
0fc3ab2ea8 | |||
fd79376cfe | |||
75eb879993 | |||
01246cc0e1 | |||
a13dbcaa77 | |||
ed791d96f2 | |||
8a9655cdd5 | |||
b6c095caf1 | |||
7746fdda6f | |||
561c76b6d7 | |||
00fdc84d22 | |||
ec59b49c32 | |||
15176cdbf6 | |||
2dd10e08d9 | |||
a40487f84d | |||
47f33f3dc0 | |||
d0eae97771 | |||
e1580755e9 | |||
8fc3e82173 | |||
34d35d6039 | |||
a476d1b9e9 | |||
acc9b94c1f | |||
8d3cc2181e |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,2 +1,4 @@
|
||||
build/*
|
||||
.cache
|
||||
.cache
|
||||
.idea
|
||||
compile_commands.json
|
@@ -6,13 +6,15 @@ steps:
|
||||
- name: build
|
||||
image: gcc:latest
|
||||
commands: # we probably shouldn't install cmake every time
|
||||
- apt update && apt install -y cmake
|
||||
- apt update && apt install -y cmake catch2
|
||||
- mkdir -p build/ && cd build
|
||||
- cmake ..
|
||||
- cmake .. --toolchain ../toolchain/sanitize.cmake
|
||||
- make
|
||||
- name: test
|
||||
image: ubuntu
|
||||
image: gcc:latest
|
||||
commands:
|
||||
# TODO: Probably make actual tests at some point
|
||||
# Automated tests, this should not fail
|
||||
- ./build/test
|
||||
# Manual test, you can see the output of this in woodpecker
|
||||
- echo "(print 42)" | ./build/main
|
||||
# TODO: add publish step, when we're at a working state.
|
@@ -1,10 +1,31 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
project(lispy_stuff)
|
||||
# we'll use catch2 as testing library.
|
||||
# Catch2 version 3 or above needs to be installed on your system.
|
||||
find_package(Catch2 3 REQUIRED)
|
||||
|
||||
set(HEADER_FILES src/include/lex.hpp)
|
||||
set(SOURCE_FILES src/main.cpp src/lex.cpp)
|
||||
# we'll use a recent c++ standard.
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
|
||||
add_executable(main ${SOURCE_FILES} ${HEADER_FILES})
|
||||
set(HEADER_FILES src/include/lex.hpp src/include/value.hpp src/include/parse.hpp)
|
||||
set(SOURCE_FILES src/lex.cpp src/parse.cpp src/value.cpp)
|
||||
set(CXX_WARNING_FLAGS -Wall -Wextra -Wpedantic -pedantic)
|
||||
|
||||
# we're not actually shipping a library yet,
|
||||
# this is so we don't have to compile twice for main and tests.
|
||||
add_library(libmash STATIC ${SOURCE_FILES} ${HEADER_FILES})
|
||||
target_include_directories(libmash PUBLIC src/include/)
|
||||
target_compile_options(libmash PRIVATE ${CXX_WARNING_FLAGS})
|
||||
|
||||
# Main target
|
||||
add_executable(main src/main.cpp)
|
||||
target_link_libraries(main libmash)
|
||||
target_compile_options(main PRIVATE ${CXX_WARNING_FLAGS})
|
||||
|
||||
# tests
|
||||
add_executable(test src/tests/test.cpp)
|
||||
target_link_libraries(test PRIVATE libmash Catch2::Catch2WithMain)
|
||||
target_compile_options(test PRIVATE ${CXX_WARNING_FLAGS})
|
||||
|
||||
target_include_directories(main PRIVATE src/include/)
|
50
README.md
50
README.md
@@ -1,14 +1,21 @@
|
||||
# Lispy stuff
|
||||
|
||||
[](https://ci.emin.software/repos/2)
|
||||
|
||||
Simple lisp-ish language compiler written in C++.
|
||||
|
||||
Right now it doesn't compile much - it's just a parser.
|
||||
The goal is to emit bytecode. The bytecode format is not decided yet.
|
||||
The end goal is to emit bytecode. The bytecode format is not decided yet.
|
||||
|
||||
## Build
|
||||
|
||||
I use cmake for the build system. I prefer to build out-of-tree,
|
||||
here's how to build if you've never used cmake:
|
||||
All you need is:
|
||||
|
||||
- CMake
|
||||
- A modern C++ compiler
|
||||
- The [Catch2](https://github.com/catchorg/Catch2) library v3 or higher installed on your system
|
||||
|
||||
Once you have these, you can build with:
|
||||
|
||||
```bash
|
||||
cd build
|
||||
@@ -16,11 +23,14 @@ cmake ..
|
||||
make
|
||||
```
|
||||
|
||||
This will build two executables, `main` and `test`. `test` runs all tests
|
||||
on the compiler itself.
|
||||
|
||||
## Development
|
||||
|
||||
I use clangd as the language server. Appropriate `compile_commands.json`
|
||||
(required for clangd, otherwise it can not find include files) is provided.
|
||||
If you'd like to generate them yourself, just use cmake:
|
||||
I use clangd as the language server. If you want your include files to be handled
|
||||
correctly, you'll need to generate `compile_commands.json` yourself. You can
|
||||
do this using cmake:
|
||||
|
||||
```bash
|
||||
cd build
|
||||
@@ -28,4 +38,32 @@ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=1 ..
|
||||
cp compile_commands.json ../
|
||||
```
|
||||
|
||||
After this, clangd should not give you errors on every included header.
|
||||
|
||||
### Toolchain
|
||||
|
||||
C++ can be somewhat error-prone, so in order to reduce the possibility of
|
||||
certain bugs, cmake toolchain options are provided that build the project
|
||||
with the undefined behaviour sanitizer and the address sanitizer.
|
||||
|
||||
You can build a sanitized version like this:
|
||||
|
||||
```
|
||||
cd build
|
||||
cmake .. --toolchain ../toolchain/sanitize.cmake
|
||||
make
|
||||
```
|
||||
|
||||
Catch2 testing framework is used to maintain code correctness.
|
||||
|
||||
|
||||
## Progress
|
||||
|
||||
Woodpecker CI/CD system is integrated.
|
||||
Currently using Catch2 for unit testing.
|
||||
|
||||
- Lexing - complete, more token types may be added as necessary
|
||||
- Parsing - the main steps are done, but things may change as time progresses
|
||||
- Optimizations -
|
||||
- Emitting Bytecode - the goal.
|
||||
|
||||
|
@@ -1,8 +0,0 @@
|
||||
[
|
||||
{
|
||||
"directory": "/home/haxala1r/Desktop/Programming/C++/lispy-stuff/build",
|
||||
"command": "/usr/bin/c++ -I/home/haxala1r/Desktop/Programming/C++/lispy-stuff/src/include -o CMakeFiles/main.dir/src/main.cpp.o -c /home/haxala1r/Desktop/Programming/C++/lispy-stuff/src/main.cpp",
|
||||
"file": "/home/haxala1r/Desktop/Programming/C++/lispy-stuff/src/main.cpp",
|
||||
"output": "CMakeFiles/main.dir/src/main.cpp.o"
|
||||
}
|
||||
]
|
109
docs/doc.md
Normal file
109
docs/doc.md
Normal file
@@ -0,0 +1,109 @@
|
||||
## Mash
|
||||
|
||||
### Base design:
|
||||
|
||||
#### Paradigm:
|
||||
|
||||
Dynamically typed, functional-first.
|
||||
|
||||
#### Syntax:
|
||||
|
||||
A clean subset of Lisp. It's the simplest to parse and will get us to a working state fastest.
|
||||
|
||||
- Atoms: 123, 4.56, "hello", my-var, true
|
||||
- Lists: (func arg1 arg2)
|
||||
- Almost everything else is syntax sugar that can be added as macros in the standard library. That is also probably easier to implement, and more modular.
|
||||
|
||||
#### Data Types:
|
||||
|
||||
Integer, Float, String, Boolean, List, Symbol, Function. That's it.
|
||||
|
||||
#### Memory:
|
||||
|
||||
Garbage Collected. Let's forget custom allocators for now, and just use Boehm or
|
||||
something.
|
||||
|
||||
#### Standard Library:
|
||||
|
||||
Basic math ops (+, -, *, /), comparison (=, <, >), cons, car, cdr, list, some form of arrays, and I/O functions. Keep it small, at least for the beginning.
|
||||
|
||||
## Basic operations
|
||||
|
||||
### Defining functions and variables
|
||||
|
||||
Let's just copy Scheme here, it has a very simple special form:
|
||||
|
||||
```
|
||||
(define x 5)
|
||||
(define (addTwo x y) (+ x y))
|
||||
```
|
||||
|
||||
This simple syntax is fairly readable and math-like.
|
||||
|
||||
It's a dynamic language, no type inference or funny business,
|
||||
no PhDs required.
|
||||
|
||||
Every form ("expression") evaluates to something. Variable definitions
|
||||
evaluate to the value assigned to the variable, or nil if it's
|
||||
an empty definition.
|
||||
|
||||
Function definitions evaluate to the value of their function.
|
||||
|
||||
Nil is the empty list, (). Common Lisp treats
|
||||
it as the canonical falsy value, scheme has #t and #f, and treats
|
||||
nil as truthy. We can go with either choice but I lean towards
|
||||
Common Lisp here.
|
||||
|
||||
### Macros
|
||||
|
||||
We use CL-style macros, a macro is a function that receives its parameters
|
||||
unevaluated and runs completely at compile time, producing lisp code that
|
||||
will be compiled. Of course, it will produce a list.
|
||||
|
||||
I.e. using a macro `(foo (1 2 3) arg2)` is equivalent to doing
|
||||
`(eval (bar '(1 2 3) 'arg2))` assuming the function bar does the
|
||||
same transformations that foo would have done, except that the
|
||||
macro foo is evaluated at compile time.
|
||||
|
||||
## Special syntax
|
||||
|
||||
Let's NOT add too much syntax to the core.
|
||||
|
||||
The idea is, if we make a small core that has access to lisp macros,
|
||||
we can effectively add *any* syntax sugar we want by simply defining
|
||||
it as part of the standard library.
|
||||
|
||||
That's usually what Common Lisp does actually, most language constructs
|
||||
are actually functions or macros defined in the standard library.
|
||||
|
||||
The greatest power of a Lisp is its ability to extend syntax.
|
||||
Adding too much syntax too early defeats the purpose.
|
||||
Let's keep it small.
|
||||
|
||||
|
||||
#### Dollar sign
|
||||
|
||||
```
|
||||
(define x $(1/3 + 2^60))
|
||||
(define y (map (lambda (n) $(n * n)) (range 0 10)))
|
||||
```
|
||||
|
||||
This can be implemented later as a reader macro,
|
||||
e.g. $(1/3 + 2^60) expands to (math-syntax (1/3 + 2^60))
|
||||
math-syntax is a macro that expands this further to
|
||||
(+ (/ 1 3) (^ 2 60)).
|
||||
|
||||
So at the start, we don't need much syntax at all.
|
||||
|
||||
#### SQL sub-language
|
||||
|
||||
This can trivially be done as a library of functions,
|
||||
and macros can add whatever syntax sugar is desired.
|
||||
|
||||
## Evaluation strategy
|
||||
|
||||
In order for macros to be possible, the compiler must be able to execute
|
||||
code during compilation time. This is fine, we can simply keep a running
|
||||
"image" of all lisp forms compiled so far, and run code there.
|
||||
|
||||
We need a byte code VM for this. Lua bytecode is perfectly acceptable.
|
425
src/include/int.hpp
Normal file
425
src/include/int.hpp
Normal file
@@ -0,0 +1,425 @@
|
||||
#include <gmp.h>
|
||||
#include <gmpxx.h>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* @brief Options for `base` parameter used in GMP's `mpz_set_str` and
|
||||
* `mpz_sizeinbase`.
|
||||
* @see https://gmplib.org/manual/Assigning-Integers
|
||||
*
|
||||
* `base_N`: no characters, no case sensitivity.
|
||||
* `ci_N`: case-insensitive (`A`=`10`, `a`=`10`)
|
||||
* `cs_N`: case-sensitive (`A`=`10`, `a`=`36`)
|
||||
*/
|
||||
enum class base {
|
||||
base_2 = 2,
|
||||
base_3 = 3,
|
||||
base_4 = 4,
|
||||
base_5 = 5,
|
||||
base_6 = 6,
|
||||
base_7 = 7,
|
||||
base_8 = 8,
|
||||
base_9 = 9,
|
||||
ci_10 = 10,
|
||||
ci_11 = 11,
|
||||
ci_12 = 12,
|
||||
ci_13 = 13,
|
||||
ci_14 = 14,
|
||||
ci_15 = 15,
|
||||
ci_16 = 16,
|
||||
ci_17 = 17,
|
||||
ci_18 = 18,
|
||||
ci_19 = 19,
|
||||
ci_20 = 20,
|
||||
ci_21 = 21,
|
||||
ci_22 = 22,
|
||||
ci_23 = 23,
|
||||
ci_24 = 24,
|
||||
ci_25 = 25,
|
||||
ci_26 = 26,
|
||||
ci_27 = 27,
|
||||
ci_28 = 28,
|
||||
ci_29 = 29,
|
||||
ci_30 = 30,
|
||||
ci_31 = 31,
|
||||
ci_32 = 32,
|
||||
ci_33 = 33,
|
||||
ci_34 = 34,
|
||||
ci_35 = 35,
|
||||
ci_36 = 36,
|
||||
cs_37 = 37,
|
||||
cs_38 = 38,
|
||||
cs_39 = 39,
|
||||
cs_40 = 40,
|
||||
cs_41 = 41,
|
||||
cs_42 = 42,
|
||||
cs_43 = 43,
|
||||
cs_44 = 44,
|
||||
cs_45 = 45,
|
||||
cs_46 = 46,
|
||||
cs_47 = 47,
|
||||
cs_48 = 48,
|
||||
cs_49 = 49,
|
||||
cs_50 = 50,
|
||||
cs_51 = 51,
|
||||
cs_52 = 52,
|
||||
cs_53 = 53,
|
||||
cs_54 = 54,
|
||||
cs_55 = 55,
|
||||
cs_56 = 56,
|
||||
cs_57 = 57,
|
||||
cs_58 = 58,
|
||||
cs_59 = 59,
|
||||
cs_60 = 60,
|
||||
cs_61 = 61,
|
||||
cs_62 = 62,
|
||||
binary = base_2,
|
||||
octal = base_8,
|
||||
decimal = ci_10,
|
||||
hexadecimal = ci_16,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Options for `base` parameter used in GMP's `mpz_get_str`.
|
||||
* @see https://gmplib.org/manual/Converting-Integers
|
||||
*
|
||||
* `base_N`: case sensitive. It doesn't have its upper and lower case versions.
|
||||
*
|
||||
* `lc_N`: lower-case, base N. All letters are lower case.
|
||||
*
|
||||
* `uc_N`: upper-case, base N. All letters are upper case.
|
||||
*/
|
||||
enum class print_base {
|
||||
lc_2 = 2,
|
||||
lc_3 = 3,
|
||||
lc_4 = 4,
|
||||
lc_5 = 5,
|
||||
lc_6 = 6,
|
||||
lc_7 = 7,
|
||||
lc_8 = 8,
|
||||
lc_9 = 9,
|
||||
lc_10 = 10,
|
||||
lc_11 = 11,
|
||||
lc_12 = 12,
|
||||
lc_13 = 13,
|
||||
lc_14 = 14,
|
||||
lc_15 = 15,
|
||||
lc_16 = 16,
|
||||
lc_17 = 17,
|
||||
lc_18 = 18,
|
||||
lc_19 = 19,
|
||||
lc_20 = 20,
|
||||
lc_21 = 21,
|
||||
lc_22 = 22,
|
||||
lc_23 = 23,
|
||||
lc_24 = 24,
|
||||
lc_25 = 25,
|
||||
lc_26 = 26,
|
||||
lc_27 = 27,
|
||||
lc_28 = 28,
|
||||
lc_29 = 29,
|
||||
lc_30 = 30,
|
||||
lc_31 = 31,
|
||||
lc_32 = 32,
|
||||
lc_33 = 33,
|
||||
lc_34 = 34,
|
||||
lc_35 = 35,
|
||||
lc_36 = 36,
|
||||
uc_2 = -2,
|
||||
uc_3 = -3,
|
||||
uc_4 = -4,
|
||||
uc_5 = -5,
|
||||
uc_6 = -6,
|
||||
uc_7 = -7,
|
||||
uc_8 = -8,
|
||||
uc_9 = -9,
|
||||
uc_10 = -10,
|
||||
uc_11 = -11,
|
||||
uc_12 = -12,
|
||||
uc_13 = -13,
|
||||
uc_14 = -14,
|
||||
uc_15 = -15,
|
||||
uc_16 = -16,
|
||||
uc_17 = -17,
|
||||
uc_18 = -18,
|
||||
uc_19 = -19,
|
||||
uc_20 = -20,
|
||||
uc_21 = -21,
|
||||
uc_22 = -22,
|
||||
uc_23 = -23,
|
||||
uc_24 = -24,
|
||||
uc_25 = -25,
|
||||
uc_26 = -26,
|
||||
uc_27 = -27,
|
||||
uc_28 = -28,
|
||||
uc_29 = -29,
|
||||
uc_30 = -30,
|
||||
uc_31 = -31,
|
||||
uc_32 = -32,
|
||||
uc_33 = -33,
|
||||
uc_34 = -34,
|
||||
uc_35 = -35,
|
||||
uc_36 = -36,
|
||||
base_37 = 37,
|
||||
base_38 = 38,
|
||||
base_39 = 39,
|
||||
base_40 = 40,
|
||||
base_41 = 41,
|
||||
base_42 = 42,
|
||||
base_43 = 43,
|
||||
base_44 = 44,
|
||||
base_45 = 45,
|
||||
base_46 = 46,
|
||||
base_47 = 47,
|
||||
base_48 = 48,
|
||||
base_49 = 49,
|
||||
base_50 = 50,
|
||||
base_51 = 51,
|
||||
base_52 = 52,
|
||||
base_53 = 53,
|
||||
base_54 = 54,
|
||||
base_55 = 55,
|
||||
base_56 = 56,
|
||||
base_57 = 57,
|
||||
base_58 = 58,
|
||||
base_59 = 59,
|
||||
base_60 = 60,
|
||||
base_61 = 61,
|
||||
base_62 = 62,
|
||||
octal_lower_case = lc_8,
|
||||
hex_lower_case = lc_16,
|
||||
decimal = lc_10,
|
||||
binary = lc_2,
|
||||
octal_upper_case = uc_8,
|
||||
hex_upper_case = uc_16,
|
||||
};
|
||||
|
||||
base print_base_to_base(print_base);
|
||||
|
||||
/**
|
||||
* @brief is an high-performance, arbitrary precision Integer class for mash
|
||||
*/
|
||||
class Int {
|
||||
private:
|
||||
mpz_t inner;
|
||||
|
||||
public:
|
||||
Int();
|
||||
Int(const Int &);
|
||||
Int(unsigned long);
|
||||
Int(long);
|
||||
Int(double);
|
||||
Int(const std::string &, base);
|
||||
|
||||
unsigned long to_ul();
|
||||
long to_l();
|
||||
double to_d();
|
||||
std::string to_string(print_base);
|
||||
|
||||
void swap(Int &);
|
||||
void operator+(const Int &);
|
||||
void operator+(unsigned long);
|
||||
void operator+(long);
|
||||
|
||||
void operator-(const Int &);
|
||||
void operator-(unsigned long);
|
||||
void operator-(long);
|
||||
|
||||
void operator*(const Int &);
|
||||
void operator*(unsigned long);
|
||||
void operator*(long);
|
||||
|
||||
void operator/(const Int &);
|
||||
void operator/(unsigned long);
|
||||
void operator/(long);
|
||||
|
||||
void operator%(const Int &);
|
||||
void operator%(unsigned long);
|
||||
|
||||
void add_mul(Int &);
|
||||
void add_mul(unsigned long);
|
||||
void add_mul(long);
|
||||
|
||||
void sub_mul(Int &);
|
||||
void sub_mul(unsigned long);
|
||||
void sub_mul(long);
|
||||
|
||||
void mul_2exp(unsigned long);
|
||||
|
||||
void operator~();
|
||||
|
||||
void abs();
|
||||
|
||||
void ceil_div_quotient(Int &);
|
||||
void ceil_div_remainder(Int &);
|
||||
void ceil_div_both(Int &);
|
||||
void ceil_div(unsigned long);
|
||||
void ceil_div_both(unsigned long);
|
||||
void ceil_div_quotient(unsigned long);
|
||||
void ceil_div_remainder(unsigned long);
|
||||
void ceil_div(long);
|
||||
void ceil_div_both(long);
|
||||
void ceil_div_quotient(long);
|
||||
void ceil_div_remainder(long);
|
||||
void ceil_div_quotient_2exp(unsigned long);
|
||||
void ceil_div_remainder_2exp(unsigned long);
|
||||
|
||||
void floor_div_quotient(Int &);
|
||||
void floor_div_remainder(Int &);
|
||||
void floor_div_both(Int &);
|
||||
void floor_div(unsigned long);
|
||||
void floor_div_both(unsigned long);
|
||||
void floor_div_quotient(unsigned long);
|
||||
void floor_div_remainder(unsigned long);
|
||||
void floor_div(long);
|
||||
void floor_div_both(long);
|
||||
void floor_div_quotient(long);
|
||||
void floor_div_remainder(long);
|
||||
void floor_div_quotient_2exp(unsigned long);
|
||||
void floor_div_remainder_2exp(unsigned long);
|
||||
|
||||
void truncate_div_quotient(Int &);
|
||||
void truncate_div_remainder(Int &);
|
||||
void truncate_div_both(Int &);
|
||||
void truncate_div(unsigned long);
|
||||
void truncate_div_both(unsigned long);
|
||||
void truncate_div_quotient(unsigned long);
|
||||
void truncate_div_remainder(unsigned long);
|
||||
void truncate_div(long);
|
||||
void truncate_div_both(long);
|
||||
void truncate_div_quotient(long);
|
||||
void truncate_div_remainder(long);
|
||||
void truncate_div_quotient_2exp(unsigned long);
|
||||
void truncate_div_remainder_2exp(unsigned long);
|
||||
|
||||
void mod(Int &);
|
||||
void mod(unsigned long);
|
||||
void mod(long);
|
||||
|
||||
void div_exact(Int &);
|
||||
void div_exact(unsigned long);
|
||||
void div_exact(long);
|
||||
|
||||
int divisible(Int &);
|
||||
int divisible(unsigned long);
|
||||
int divisible(long);
|
||||
int divisible_2exp(unsigned long);
|
||||
|
||||
int congruent(Int &);
|
||||
int congruent(unsigned long);
|
||||
int congruent(long);
|
||||
int congruent_2exp(unsigned long);
|
||||
|
||||
void mod_pow(Int &);
|
||||
void mod_pow(unsigned long);
|
||||
void mod_pow(long);
|
||||
|
||||
void powm_sec(Int &);
|
||||
|
||||
void pow(Int &);
|
||||
void pow(unsigned long, unsigned long);
|
||||
void pow(long, unsigned long);
|
||||
|
||||
~Int() { mpz_clear(inner); };
|
||||
};
|
||||
|
||||
/*
|
||||
///
|
||||
/// Enum for the number's sign.
|
||||
///
|
||||
/// `Zero` is for when number is zero.
|
||||
///
|
||||
pub const Sign = enum(i8) {
|
||||
Negative = -1,
|
||||
Positive = 1,
|
||||
Zero = 0,
|
||||
|
||||
pub fn toChar(self: @This()) u8 {
|
||||
return switch (self) {
|
||||
.Negative => '-',
|
||||
.Positive => '+',
|
||||
.Zero => '0',
|
||||
};
|
||||
}
|
||||
|
||||
pub fn toString(self: @This()) []const u8 {
|
||||
return switch (self) {
|
||||
.Negative => "negative",
|
||||
.Positive => "positive",
|
||||
.Zero => "zero",
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
///
|
||||
/// Enum for the comparison of two numbers.
|
||||
///
|
||||
pub const Ordering = enum(i8) {
|
||||
Less = -1,
|
||||
Greater = 1,
|
||||
Equals = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn toChar(self: Self) u8 {
|
||||
return switch (self) {
|
||||
.Less => '<',
|
||||
.Greater => '>',
|
||||
.Equals => '=',
|
||||
};
|
||||
}
|
||||
|
||||
pub fn toString(self: Self) []const u8 {
|
||||
return switch (self) {
|
||||
.Less => "less",
|
||||
.Greater => "greater",
|
||||
.Equals => "equals",
|
||||
};
|
||||
}
|
||||
|
||||
pub fn fromC(num: c_int) Self {
|
||||
if (num < 0) return .Less;
|
||||
if (num > 0) return .Greater;
|
||||
return .Equals;
|
||||
}
|
||||
};
|
||||
|
||||
/// Enum for the number's parity.
|
||||
pub const Parity = enum(i8) {
|
||||
Even = 0,
|
||||
Odd = 1,
|
||||
|
||||
pub fn toString(self: @This()) []const u8 {
|
||||
return switch (self) {
|
||||
.Even => "even",
|
||||
.Odd => "odd",
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
////
|
||||
/// Rounding methods for division.
|
||||
///
|
||||
/// `Truncate` rounds the integer towards 0.
|
||||
/// `Ceil` rounds the integer towards positive infinity.
|
||||
/// `Floor` rounds the integer towrds positiv infinity.
|
||||
///
|
||||
/// |`Number`|`Truncate`|`Ceil`|`Floor`|
|
||||
/// |--------|----------|------|-------|
|
||||
/// |`4.5` |`4` |`5` |`4` |
|
||||
/// |`-4.5` |`-4` |`-4` |`-5` |
|
||||
///
|
||||
pub const RoundingMethod = enum(i8) {
|
||||
Truncate = 0,
|
||||
Ceil = 1,
|
||||
Floor = -1,
|
||||
};
|
||||
|
||||
///
|
||||
/// Option for which result will be written to the integer.
|
||||
///
|
||||
pub const Output = enum(i8) {
|
||||
Quotient = 0,
|
||||
Remainder = 1,
|
||||
};
|
||||
*/
|
@@ -1,10 +1,11 @@
|
||||
#pragma once
|
||||
#include <deque>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include <variant>
|
||||
#include <optional>
|
||||
|
||||
enum TokenType {
|
||||
enum class TokenType {
|
||||
OpenParen,
|
||||
CloseParen,
|
||||
Dollar,
|
||||
@@ -12,14 +13,16 @@ enum TokenType {
|
||||
String,
|
||||
Int,
|
||||
Double,
|
||||
Quote,
|
||||
End
|
||||
};
|
||||
|
||||
// Plain Old Data
|
||||
struct Token {
|
||||
enum TokenType type;
|
||||
std::variant<int64_t, double, std::string> value;
|
||||
std::optional<std::variant<int64_t, double, std::string>> value;
|
||||
};
|
||||
bool operator==(Token const& one, Token const& other);
|
||||
std::ostream &operator<<(std::ostream &os, Token const &t);
|
||||
|
||||
class Lexer {
|
||||
@@ -38,8 +41,8 @@ public:
|
||||
void feed(std::string);
|
||||
|
||||
Token next();
|
||||
std::vector<Token> collect();
|
||||
std::deque<Token> collect();
|
||||
};
|
||||
|
||||
// when you don't want to construct the object
|
||||
std::vector<Token> lex(std::string);
|
||||
std::deque<Token> lex(std::string);
|
||||
|
33
src/include/parse.hpp
Normal file
33
src/include/parse.hpp
Normal file
@@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
#include <value.hpp>
|
||||
#include <lex.hpp>
|
||||
|
||||
|
||||
|
||||
|
||||
// The Parser produces a regular lisp value.
|
||||
// lisp code is made of lisp lists and atoms.
|
||||
class Parser {
|
||||
private:
|
||||
// the token stream.
|
||||
std::deque<Token> ts;
|
||||
Token get_token();
|
||||
void unget_token(Token);
|
||||
|
||||
// these may need to be interned later
|
||||
String make_string(std::string);
|
||||
Symbol make_symbol(std::string);
|
||||
|
||||
|
||||
std::optional<LispValue> parse_one();
|
||||
LispValue parse_quote();
|
||||
LispValue parse_list();
|
||||
|
||||
public:
|
||||
Parser(Lexer);
|
||||
|
||||
void feed(Lexer);
|
||||
|
||||
std::optional<LispValue> next();
|
||||
};
|
||||
|
31
src/include/value.hpp
Normal file
31
src/include/value.hpp
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <variant>
|
||||
|
||||
// we're using a pure variant as our value type.
|
||||
struct Integer {int64_t value;};
|
||||
struct Double {double value;};
|
||||
struct String {std::string value;}; // might be a good idea to intern strings
|
||||
struct Symbol {std::string value;};
|
||||
struct List;
|
||||
struct Nil {};
|
||||
|
||||
|
||||
using LispValue = std::variant<Integer, Double, String, Symbol, List>;
|
||||
struct List {std::deque<LispValue> list;};
|
||||
// during compilation, we don't really care for cyclical lists etc.
|
||||
// during compilation we'll mostly be dealing with regular, flat lists
|
||||
// that form function calls.
|
||||
// We will have a different set of values during runtime
|
||||
// as the runtime will be a bytecode interpreter anyhow.
|
||||
|
||||
|
||||
|
||||
void print_val(LispValue);
|
||||
|
||||
String make_string(std::string);
|
||||
Symbol make_symbol(std::string);
|
||||
|
342
src/int.cpp
Normal file
342
src/int.cpp
Normal file
@@ -0,0 +1,342 @@
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <gmp.h>
|
||||
#include <int.hpp>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
Int::Int() { mpz_init(this->inner); };
|
||||
Int::Int(const Int &big) { mpz_init_set(this->inner, big.inner); };
|
||||
Int::Int(unsigned long ul) { mpz_init_set_ui(this->inner, ul); };
|
||||
Int::Int(long l) { mpz_init_set_si(this->inner, l); };
|
||||
Int::Int(double d) { mpz_init_set_d(this->inner, d); };
|
||||
Int::Int(const std::string &str, base b) {
|
||||
int result = mpz_init_set_str(this->inner, str.data(), static_cast<int>(b));
|
||||
if (result == 1)
|
||||
throw std::exception();
|
||||
};
|
||||
|
||||
base print_base_to_base(print_base p_base) {
|
||||
switch (p_base) {
|
||||
case print_base::lc_2:
|
||||
case print_base::uc_2:
|
||||
return base::base_2;
|
||||
case print_base::lc_3:
|
||||
case print_base::uc_3:
|
||||
return base::base_3;
|
||||
case print_base::lc_4:
|
||||
case print_base::uc_4:
|
||||
return base::base_4;
|
||||
case print_base::lc_5:
|
||||
case print_base::uc_5:
|
||||
return base::base_5;
|
||||
case print_base::lc_6:
|
||||
case print_base::uc_6:
|
||||
return base::base_6;
|
||||
case print_base::lc_7:
|
||||
case print_base::uc_7:
|
||||
return base::base_7;
|
||||
case print_base::lc_8:
|
||||
case print_base::uc_8:
|
||||
return base::base_8;
|
||||
case print_base::lc_9:
|
||||
case print_base::uc_9:
|
||||
return base::base_9;
|
||||
case print_base::lc_10:
|
||||
case print_base::uc_10:
|
||||
return base::ci_10;
|
||||
case print_base::lc_11:
|
||||
case print_base::uc_11:
|
||||
return base::ci_11;
|
||||
case print_base::lc_12:
|
||||
case print_base::uc_12:
|
||||
return base::ci_12;
|
||||
case print_base::lc_13:
|
||||
case print_base::uc_13:
|
||||
return base::ci_13;
|
||||
case print_base::lc_14:
|
||||
case print_base::uc_14:
|
||||
return base::ci_14;
|
||||
case print_base::lc_15:
|
||||
case print_base::uc_15:
|
||||
return base::ci_15;
|
||||
case print_base::lc_16:
|
||||
case print_base::uc_16:
|
||||
return base::ci_16;
|
||||
case print_base::lc_17:
|
||||
case print_base::uc_17:
|
||||
return base::ci_17;
|
||||
case print_base::lc_18:
|
||||
case print_base::uc_18:
|
||||
return base::ci_18;
|
||||
case print_base::lc_19:
|
||||
case print_base::uc_19:
|
||||
return base::ci_19;
|
||||
case print_base::lc_20:
|
||||
case print_base::uc_20:
|
||||
return base::ci_20;
|
||||
case print_base::lc_21:
|
||||
case print_base::uc_21:
|
||||
return base::ci_21;
|
||||
case print_base::lc_22:
|
||||
case print_base::uc_22:
|
||||
return base::ci_22;
|
||||
case print_base::lc_23:
|
||||
case print_base::uc_23:
|
||||
return base::ci_23;
|
||||
case print_base::lc_24:
|
||||
case print_base::uc_24:
|
||||
return base::ci_24;
|
||||
case print_base::lc_25:
|
||||
case print_base::uc_25:
|
||||
return base::ci_25;
|
||||
case print_base::lc_26:
|
||||
case print_base::uc_26:
|
||||
return base::ci_26;
|
||||
case print_base::lc_27:
|
||||
case print_base::uc_27:
|
||||
return base::ci_27;
|
||||
case print_base::lc_28:
|
||||
case print_base::uc_28:
|
||||
return base::ci_28;
|
||||
case print_base::lc_29:
|
||||
case print_base::uc_29:
|
||||
return base::ci_29;
|
||||
case print_base::lc_30:
|
||||
case print_base::uc_30:
|
||||
return base::ci_30;
|
||||
case print_base::lc_31:
|
||||
case print_base::uc_31:
|
||||
return base::ci_31;
|
||||
case print_base::lc_32:
|
||||
case print_base::uc_32:
|
||||
return base::ci_32;
|
||||
case print_base::lc_33:
|
||||
case print_base::uc_33:
|
||||
return base::ci_33;
|
||||
case print_base::lc_34:
|
||||
case print_base::uc_34:
|
||||
return base::ci_34;
|
||||
case print_base::lc_35:
|
||||
case print_base::uc_35:
|
||||
return base::ci_35;
|
||||
case print_base::lc_36:
|
||||
case print_base::uc_36:
|
||||
return base::ci_36;
|
||||
case print_base::base_37:
|
||||
return base::cs_37;
|
||||
case print_base::base_38:
|
||||
return base::cs_38;
|
||||
case print_base::base_39:
|
||||
return base::cs_39;
|
||||
case print_base::base_40:
|
||||
return base::cs_40;
|
||||
case print_base::base_41:
|
||||
return base::cs_41;
|
||||
case print_base::base_42:
|
||||
return base::cs_42;
|
||||
case print_base::base_43:
|
||||
return base::cs_43;
|
||||
case print_base::base_44:
|
||||
return base::cs_44;
|
||||
case print_base::base_45:
|
||||
return base::cs_45;
|
||||
case print_base::base_46:
|
||||
return base::cs_46;
|
||||
case print_base::base_47:
|
||||
return base::cs_47;
|
||||
case print_base::base_48:
|
||||
return base::cs_48;
|
||||
case print_base::base_49:
|
||||
return base::cs_49;
|
||||
case print_base::base_50:
|
||||
return base::cs_50;
|
||||
case print_base::base_51:
|
||||
return base::cs_51;
|
||||
case print_base::base_52:
|
||||
return base::cs_52;
|
||||
case print_base::base_53:
|
||||
return base::cs_53;
|
||||
case print_base::base_54:
|
||||
return base::cs_54;
|
||||
case print_base::base_55:
|
||||
return base::cs_55;
|
||||
case print_base::base_56:
|
||||
return base::cs_56;
|
||||
case print_base::base_57:
|
||||
return base::cs_57;
|
||||
case print_base::base_58:
|
||||
return base::cs_58;
|
||||
case print_base::base_59:
|
||||
return base::cs_59;
|
||||
case print_base::base_60:
|
||||
return base::cs_60;
|
||||
case print_base::base_61:
|
||||
return base::cs_61;
|
||||
case print_base::base_62:
|
||||
return base::cs_62;
|
||||
}
|
||||
|
||||
std::unreachable();
|
||||
}
|
||||
|
||||
unsigned long Int::to_ul() { return mpz_get_ui(this->inner); };
|
||||
long Int::to_l() { return mpz_get_si(this->inner); };
|
||||
double Int::to_d() { return mpz_get_d(this->inner); };
|
||||
std::string Int::to_string(print_base base) {
|
||||
int print_base_int = static_cast<int>(base);
|
||||
|
||||
char *str = mpz_get_str(NULL, print_base_int, this->inner);
|
||||
|
||||
std::string res(str);
|
||||
|
||||
free(str);
|
||||
|
||||
return res;
|
||||
};
|
||||
|
||||
void Int::swap(Int &rhs) { mpz_swap(this->inner, rhs.inner); }
|
||||
|
||||
void Int::operator+(const Int &rhs) {
|
||||
mpz_add(this->inner, this->inner, rhs.inner);
|
||||
};
|
||||
|
||||
void Int::operator+(unsigned long rhs) {
|
||||
mpz_add_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void Int::operator+(long rhs) {
|
||||
if (rhs < 0)
|
||||
mpz_sub_ui(this->inner, this->inner, std::abs(rhs));
|
||||
else
|
||||
mpz_add_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void Int::operator-(const Int &rhs) {
|
||||
mpz_sub(this->inner, this->inner, rhs.inner);
|
||||
};
|
||||
|
||||
void Int::operator-(unsigned long rhs) {
|
||||
mpz_sub_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void Int::operator-(long rhs) {
|
||||
if (rhs < 0)
|
||||
mpz_add_ui(this->inner, this->inner, std::abs(rhs));
|
||||
else
|
||||
mpz_sub_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void Int::operator*(const Int &rhs) {
|
||||
mpz_mul(this->inner, this->inner, rhs.inner);
|
||||
};
|
||||
|
||||
void Int::operator*(unsigned long rhs) {
|
||||
mpz_mul_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void Int::operator*(long rhs) {
|
||||
if (rhs < 0) {
|
||||
mpz_mul_ui(this->inner, this->inner, std::abs(rhs));
|
||||
this->operator~();
|
||||
} else
|
||||
mpz_mul_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void Int::operator/(const Int &rhs) {
|
||||
mpz_tdiv_q(this->inner, this->inner, rhs.inner);
|
||||
};
|
||||
|
||||
void Int::operator/(unsigned long rhs) {
|
||||
mpz_tdiv_q_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void Int::operator/(long rhs) {
|
||||
if (rhs < 0) {
|
||||
mpz_tdiv_q_ui(this->inner, this->inner, std::abs(rhs));
|
||||
this->operator~();
|
||||
} else
|
||||
mpz_tdiv_q_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void Int::operator%(const Int &rhs) {
|
||||
mpz_mod(this->inner, this->inner, rhs.inner);
|
||||
};
|
||||
|
||||
void Int::operator%(unsigned long rhs) {
|
||||
mpz_mod_ui(this->inner, this->inner, rhs);
|
||||
};
|
||||
|
||||
void add_mul(Int &);
|
||||
void add_mul(unsigned long);
|
||||
void add_mul(long);
|
||||
|
||||
void sub_mul(Int &);
|
||||
void sub_mul(unsigned long);
|
||||
void sub_mul(long);
|
||||
|
||||
void mul_2exp(unsigned long);
|
||||
|
||||
void Int::operator~() { mpz_neg(this->inner, this->inner); };
|
||||
|
||||
void abs();
|
||||
|
||||
void ceil_div_quotient(Int &);
|
||||
void ceil_div_remainder(Int &);
|
||||
void ceil_div_both(Int &);
|
||||
void ceil_div(unsigned long);
|
||||
void ceil_div_both(unsigned long);
|
||||
void ceil_div_quotient(unsigned long);
|
||||
void ceil_div_remainder(unsigned long);
|
||||
void ceil_div(long);
|
||||
void ceil_div_both(long);
|
||||
void ceil_div_quotient(long);
|
||||
void ceil_div_remainder(long);
|
||||
void ceil_div_quotient_2exp(unsigned long);
|
||||
void ceil_div_remainder_2exp(unsigned long);
|
||||
|
||||
void floor_div_quotient(Int &);
|
||||
void floor_div_remainder(Int &);
|
||||
void floor_div_both(Int &);
|
||||
void floor_div(unsigned long);
|
||||
void floor_div_both(unsigned long);
|
||||
void floor_div_quotient(unsigned long);
|
||||
void floor_div_remainder(unsigned long);
|
||||
void floor_div(long);
|
||||
void floor_div_both(long);
|
||||
void floor_div_quotient(long);
|
||||
void floor_div_remainder(long);
|
||||
void floor_div_quotient_2exp(unsigned long);
|
||||
void floor_div_remainder_2exp(unsigned long);
|
||||
|
||||
void truncate_div_quotient(Int &);
|
||||
void truncate_div_remainder(Int &);
|
||||
void truncate_div_both(Int &);
|
||||
void truncate_div(unsigned long);
|
||||
void truncate_div_both(unsigned long);
|
||||
void truncate_div_quotient(unsigned long);
|
||||
void truncate_div_remainder(unsigned long);
|
||||
void truncate_div(long);
|
||||
void truncate_div_both(long);
|
||||
void truncate_div_quotient(long);
|
||||
void truncate_div_remainder(long);
|
||||
void truncate_div_quotient_2exp(unsigned long);
|
||||
void truncate_div_remainder_2exp(unsigned long);
|
||||
|
||||
void mod(Int &);
|
||||
void mod(unsigned long);
|
||||
void mod(long);
|
||||
|
||||
void div_exact(Int &);
|
||||
void div_exact(unsigned long);
|
||||
void div_exact(long);
|
||||
|
||||
int divisible(Int &);
|
||||
int divisible(unsigned long);
|
||||
int divisible(long);
|
||||
int divisible_2exp(unsigned long);
|
||||
|
||||
int congruent(Int &);
|
||||
int congruent(unsigned long);
|
||||
int congruent(long);
|
||||
int congruent_2exp(unsigned long);
|
39
src/lex.cpp
39
src/lex.cpp
@@ -12,10 +12,11 @@ std::ostream &operator<<(std::ostream &os, Token const &t) {
|
||||
case TokenType::OpenParen: os << "OpenParen)"; break;
|
||||
case TokenType::CloseParen: os << "CloseParen)"; break;
|
||||
case TokenType::Dollar: os << "Dollar)"; break;
|
||||
case TokenType::Symbol: os << "Symbol, " << get<string>(t.value) << ")"; break;
|
||||
case TokenType::String: os << "String, \"" << get<string>(t.value) << "\")"; break;
|
||||
case TokenType::Int: os << "Int, " << get<int64_t>(t.value) << ")"; break;
|
||||
case TokenType::Double: os << "Double, " << get<double>(t.value) << ")"; break;
|
||||
case TokenType::Quote: os << "QUOTE)"; break;
|
||||
case TokenType::Symbol: os << "Symbol, " << get<string>(*t.value) << ")"; break;
|
||||
case TokenType::String: os << "String, \"" << get<string>(*t.value) << "\")"; break;
|
||||
case TokenType::Int: os << "Int, " << get<int64_t>(*t.value) << ")"; break;
|
||||
case TokenType::Double: os << "Double, " << get<double>(*t.value) << ")"; break;
|
||||
case TokenType::End: os << "END)"; break;
|
||||
default:
|
||||
os << ")";
|
||||
@@ -23,6 +24,10 @@ std::ostream &operator<<(std::ostream &os, Token const &t) {
|
||||
return os;
|
||||
}
|
||||
|
||||
bool operator==(Token const& one, Token const& other) {
|
||||
return one.type == other.type && one.value == other.value;
|
||||
}
|
||||
|
||||
bool ispunct(char c) {
|
||||
for (char i : "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") {
|
||||
if (i == c) return true;
|
||||
@@ -61,9 +66,10 @@ Token Lexer::lexNumOrSym() {
|
||||
// ... this will almost certainly change, won't it?
|
||||
string s = acc.str();
|
||||
string iterate_over = (s.at(0) == '-') ? s.substr(1) : s;
|
||||
bool is_number = true;
|
||||
bool is_number = false;
|
||||
bool dot_seen = false;
|
||||
for (char c : s) {
|
||||
for (char c : iterate_over) {
|
||||
|
||||
if (c == '.') {
|
||||
if (dot_seen) {
|
||||
is_number = false;
|
||||
@@ -72,14 +78,16 @@ Token Lexer::lexNumOrSym() {
|
||||
dot_seen = true;
|
||||
continue;
|
||||
}
|
||||
if (!isdigit(c)) {
|
||||
if (isdigit(c)) {
|
||||
is_number = true;
|
||||
} else {
|
||||
is_number = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_number && dot_seen) {
|
||||
if (s == ".")
|
||||
if (iterate_over == ".")
|
||||
return {TokenType::Symbol, s};
|
||||
return {TokenType::Double, stod(s)};
|
||||
} else if (is_number) {
|
||||
@@ -124,14 +132,15 @@ Token Lexer::next() {
|
||||
// character while at EOF, even if we have exhausted the stream.
|
||||
char c = ss.get();
|
||||
if (ss.eof())
|
||||
return {TokenType::End};
|
||||
return {TokenType::End, nullopt};
|
||||
|
||||
if (isspace(c))
|
||||
continue;
|
||||
switch (c) {
|
||||
case '(': return {TokenType::OpenParen};
|
||||
case ')': return {TokenType::CloseParen};
|
||||
case '$': return {TokenType::Dollar};
|
||||
case '(': return {TokenType::OpenParen, nullopt};
|
||||
case ')': return {TokenType::CloseParen, nullopt};
|
||||
case '$': return {TokenType::Dollar, nullopt};
|
||||
case '\'': return {TokenType::Quote, nullopt};
|
||||
default:
|
||||
ss.unget();
|
||||
return lexNonSpecial();
|
||||
@@ -139,8 +148,8 @@ Token Lexer::next() {
|
||||
}
|
||||
}
|
||||
|
||||
vector<Token> Lexer::collect() {
|
||||
vector<Token> v;
|
||||
deque<Token> Lexer::collect() {
|
||||
deque<Token> v;
|
||||
while (true) {
|
||||
Token t = next();
|
||||
if (t.type == TokenType::End)
|
||||
@@ -151,7 +160,7 @@ vector<Token> Lexer::collect() {
|
||||
return v;
|
||||
}
|
||||
|
||||
std::vector<Token> lex(std::string s) {
|
||||
std::deque<Token> lex(std::string s) {
|
||||
Lexer l(s);
|
||||
return l.collect();
|
||||
}
|
||||
|
@@ -1,5 +1,7 @@
|
||||
#include "value.hpp"
|
||||
#include <iostream>
|
||||
#include <lex.hpp>
|
||||
#include <parse.hpp>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
@@ -8,10 +10,9 @@ int main() {
|
||||
string s;
|
||||
getline(cin, s);
|
||||
cout << s << endl;
|
||||
|
||||
for (auto t : lex(s)) {
|
||||
cout << t << " ";
|
||||
}
|
||||
Parser p(s);
|
||||
print_val(*p.next());
|
||||
|
||||
cout << endl;
|
||||
return 0;
|
||||
}
|
100
src/parse.cpp
Normal file
100
src/parse.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
#include <cctype>
|
||||
#include <exception>
|
||||
#include <lex.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <parse.hpp>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
Parser::Parser(Lexer l) : ts(l.collect()) {}
|
||||
|
||||
void Parser::feed(Lexer lexer) {
|
||||
auto l = lexer.collect();
|
||||
ts.insert(ts.end(), l.begin(), l.end());
|
||||
}
|
||||
|
||||
Token Parser::get_token() {
|
||||
if (ts.empty()) {
|
||||
cerr << "Parser::get_token: Token requested at input end." << endl;
|
||||
throw exception();
|
||||
}
|
||||
Token t = ts.front();
|
||||
ts.pop_front();
|
||||
return t;
|
||||
}
|
||||
void Parser::unget_token(Token t) {
|
||||
ts.push_front(t);
|
||||
}
|
||||
|
||||
String Parser::make_string(string s) {
|
||||
return String {s};
|
||||
}
|
||||
|
||||
Symbol Parser::make_symbol(string s) {
|
||||
transform(s.begin(), s.end(), s.begin(), ::toupper);
|
||||
return Symbol {s};
|
||||
}
|
||||
|
||||
LispValue Parser::parse_quote() {
|
||||
// in regular lisps, a quote gets expanded to a (quote) form.
|
||||
// i.e. 'a is actually (QUOTE A). This prevents the symbol from being
|
||||
// evaluated.
|
||||
// This is the same way we'll handle quotes for now, because I can't
|
||||
// think of anything else.
|
||||
List l;
|
||||
|
||||
l.list.push_back(make_symbol("QUOTE"));
|
||||
auto next = parse_one();
|
||||
// this is guaranteed to work, because if we do not have enough tokens
|
||||
// to constitute another value Parser::get_token will throw an exception
|
||||
l.list.push_back(*next);
|
||||
return l;
|
||||
}
|
||||
|
||||
optional<LispValue> Parser::parse_one() {
|
||||
Token t = get_token();
|
||||
switch (t.type) {
|
||||
case TokenType::Int: return Integer {get<int64_t>(*t.value)};
|
||||
case TokenType::Double: return Double {get<double>(*t.value)};
|
||||
case TokenType::String: return make_string(get<string>(*t.value));
|
||||
case TokenType::Symbol: return make_symbol(get<string>(*t.value));
|
||||
case TokenType::OpenParen: return parse_list();
|
||||
case TokenType::CloseParen: throw "whatever";
|
||||
case TokenType::Quote: return parse_quote();
|
||||
|
||||
// I don't know what this will actually do, in theory maybe just like the OpenParen,
|
||||
// but parses things in a different namespace? unimplemented for now.
|
||||
case TokenType::Dollar: return parse_one();
|
||||
case TokenType::End : return nullopt;
|
||||
}
|
||||
return nullopt;
|
||||
}
|
||||
|
||||
LispValue Parser::parse_list() {
|
||||
// assumes that we have read the OpenParen, and are reading elements until
|
||||
// we find the CloseParen
|
||||
List l;
|
||||
Token t = get_token();
|
||||
|
||||
while (true) {
|
||||
if (t.type == TokenType::End) {
|
||||
// this is clearly an error!
|
||||
cerr << "Parser::parse_list: Input ended before list ended." << endl;
|
||||
break;
|
||||
}
|
||||
if (t.type == TokenType::CloseParen)
|
||||
break;
|
||||
unget_token(t);
|
||||
l.list.push_back(*parse_one());
|
||||
t = get_token();
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
|
||||
optional<LispValue> Parser::next() {
|
||||
return parse_one();
|
||||
}
|
87
src/tests/test.cpp
Normal file
87
src/tests/test.cpp
Normal file
@@ -0,0 +1,87 @@
|
||||
#include "value.hpp"
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <lex.hpp>
|
||||
#include <parse.hpp>
|
||||
using namespace std;
|
||||
|
||||
template <typename T>
|
||||
T pop_and_front(deque<T> &dq) {
|
||||
T t = dq.front();
|
||||
dq.pop_front();
|
||||
return t;
|
||||
}
|
||||
|
||||
TEST_CASE("Lexer lexes doubles correctly", "[Lexer]") {
|
||||
|
||||
SECTION("double and negative syntax") {
|
||||
Lexer l("(1.0 0.1 -.1 -1. . - -. .-)");
|
||||
REQUIRE(l.next() == Token({TokenType::OpenParen, nullopt}));
|
||||
REQUIRE(l.next() == Token({TokenType::Double, 1.0}));
|
||||
REQUIRE(l.next() == Token({TokenType::Double, 0.1}));
|
||||
REQUIRE(l.next() == Token({TokenType::Double, -0.1}));
|
||||
REQUIRE(l.next() == Token({TokenType::Double, -1.0}));
|
||||
REQUIRE(l.next() == Token({TokenType::Symbol, "."}));
|
||||
REQUIRE(l.next() == Token({TokenType::Symbol, "-"}));
|
||||
REQUIRE(l.next() == Token({TokenType::Symbol, "-."}));
|
||||
REQUIRE(l.next() == Token({TokenType::Symbol, ".-"}));
|
||||
REQUIRE(l.next() == Token({TokenType::CloseParen, nullopt}));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Parser parses correctly", "[Parser]") {
|
||||
SECTION("hello world") {
|
||||
Parser p (Lexer("(print \"hello world\")"));
|
||||
auto dq = get<List>(*p.next()).list;
|
||||
REQUIRE(get<Symbol>(pop_and_front(dq)).value == "PRINT");
|
||||
REQUIRE(get<String>(pop_and_front(dq)).value == "hello world");
|
||||
}
|
||||
SECTION("doubles") {
|
||||
Parser p (Lexer("(1.0 0.1 -.1 -1. . - -. .-)"));
|
||||
auto dq = get<List>(*p.next()).list;
|
||||
REQUIRE(get<Double>(pop_and_front(dq)).value == 1.0);
|
||||
REQUIRE(get<Double>(pop_and_front(dq)).value == 0.1);
|
||||
REQUIRE(get<Double>(pop_and_front(dq)).value == -0.1);
|
||||
REQUIRE(get<Double>(pop_and_front(dq)).value == -1.0);
|
||||
REQUIRE(get<Symbol>(pop_and_front(dq)).value == ".");
|
||||
REQUIRE(get<Symbol>(pop_and_front(dq)).value == "-");
|
||||
REQUIRE(get<Symbol>(pop_and_front(dq)).value == "-.");
|
||||
REQUIRE(get<Symbol>(pop_and_front(dq)).value == ".-");
|
||||
}
|
||||
SECTION("Nested lists") {
|
||||
Parser p(Lexer("((((0) (1) (2) (3))))"));
|
||||
auto l0 = get<List>(*p.next()).list;
|
||||
auto l1 = get<List>(pop_and_front(l0)).list;
|
||||
auto l2 = get<List>(pop_and_front(l1)).list;
|
||||
|
||||
auto l20 = get<List>(pop_and_front(l2)).list;
|
||||
auto l21 = get<List>(pop_and_front(l2)).list;
|
||||
auto l22 = get<List>(pop_and_front(l2)).list;
|
||||
auto l23 = get<List>(pop_and_front(l2)).list;
|
||||
|
||||
REQUIRE(get<Integer>(pop_and_front(l20)).value == 0);
|
||||
REQUIRE(get<Integer>(pop_and_front(l21)).value == 1);
|
||||
REQUIRE(get<Integer>(pop_and_front(l22)).value == 2);
|
||||
REQUIRE(get<Integer>(pop_and_front(l23)).value == 3);
|
||||
}
|
||||
SECTION("Nested quotes") {
|
||||
Parser p(Lexer("((('a 'b 'c 'd)))"));
|
||||
auto l0 = get<List>(*p.next()).list;
|
||||
auto l1 = get<List>(pop_and_front(l0)).list;
|
||||
auto l2 = get<List>(pop_and_front(l1)).list;
|
||||
|
||||
auto l20 = get<List>(pop_and_front(l2)).list;
|
||||
auto l21 = get<List>(pop_and_front(l2)).list;
|
||||
auto l22 = get<List>(pop_and_front(l2)).list;
|
||||
auto l23 = get<List>(pop_and_front(l2)).list;
|
||||
|
||||
REQUIRE(get<Symbol>(pop_and_front(l20)).value == "QUOTE");
|
||||
REQUIRE(get<Symbol>(pop_and_front(l20)).value == "A");
|
||||
REQUIRE(get<Symbol>(pop_and_front(l21)).value == "QUOTE");
|
||||
REQUIRE(get<Symbol>(pop_and_front(l21)).value == "B");
|
||||
REQUIRE(get<Symbol>(pop_and_front(l22)).value == "QUOTE");
|
||||
REQUIRE(get<Symbol>(pop_and_front(l22)).value == "C");
|
||||
REQUIRE(get<Symbol>(pop_and_front(l23)).value == "QUOTE");
|
||||
REQUIRE(get<Symbol>(pop_and_front(l23)).value == "D");
|
||||
}
|
||||
}
|
||||
|
36
src/value.cpp
Normal file
36
src/value.cpp
Normal file
@@ -0,0 +1,36 @@
|
||||
#include <concepts>
|
||||
#include <value.hpp>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
template <typename T>
|
||||
requires std::convertible_to<T, LispValue>
|
||||
void value_printer(T t) {
|
||||
if constexpr (std::same_as<T, LispValue>) {
|
||||
print_val(t);
|
||||
} else if constexpr (requires { std::cout << t.value;}) {
|
||||
std::cout << t.value;
|
||||
} else {
|
||||
std::cout << "{UNKNOWN}" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void value_printer(List l) {
|
||||
std::cout << "(";
|
||||
for (auto i : l.list) {
|
||||
value_printer(i);
|
||||
std::cout << " ";
|
||||
}
|
||||
std::cout << ")";
|
||||
}
|
||||
|
||||
template <>
|
||||
void value_printer(String s) {
|
||||
std::cout << '"' << s.value << '"';
|
||||
}
|
||||
|
||||
void print_val(LispValue v) {
|
||||
std::visit([](auto arg) {value_printer(arg);}, v);
|
||||
}
|
||||
|
5
toolchain/sanitize.cmake
Normal file
5
toolchain/sanitize.cmake
Normal file
@@ -0,0 +1,5 @@
|
||||
# We're assuming clang or gcc as the compiler.
|
||||
# feel free to change if you're using msvc or something else
|
||||
set(SANITIZERS_FLAGS "-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fno-omit-frame-pointer")
|
||||
set(CMAKE_CXX_FLAGS_INIT "${SANITIZERS_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS_INIT "${SANITIZERS_FLAGS}")
|
Reference in New Issue
Block a user