Compare commits

...

25 Commits

Author SHA1 Message Date
Emin Arslan
82a606e80b Added proposed spec for the language
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-10 12:06:40 +03:00
b3c2bcb92d Int class is added
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
`Parser::feed` method is redesigned for compatibility
2025-10-07 21:14:07 +03:00
a0124b791d Change C++ version
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-07 19:52:52 +03:00
fddbd9b03b Change a function to remove C++23 dependancy
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-07 14:08:44 +03:00
0fc3ab2ea8 update Readme.md 2025-10-07 14:08:44 +03:00
fd79376cfe .gitignore is updated
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-07 13:31:59 +03:00
75eb879993 Added extra test cases for parsing quotes and symbols, also change symbols to always be uppercase
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-03 22:57:28 +03:00
01246cc0e1 Added lexing and parsing for single quote
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-03 22:51:46 +03:00
a13dbcaa77 update README.md 2025-10-03 22:39:46 +03:00
ed791d96f2 Added nested lists test
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-03 22:29:40 +03:00
8a9655cdd5 Added tests for the parser
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-03 22:22:38 +03:00
b6c095caf1 Changed List value to deque for easier testing and use, 2025-10-03 22:22:25 +03:00
7746fdda6f Initial parser attempt added - seems to parse correctly
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-03 22:07:20 +03:00
561c76b6d7 fix tests to obey new rules
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-03 22:06:10 +03:00
00fdc84d22 Minor changes - change vector to deque in lexer as this needs to behave like a queue 2025-10-03 21:08:43 +03:00
ec59b49c32 Update README.md to mention new library dependancy
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-02 18:20:04 +03:00
15176cdbf6 Change CMakeLists.txt to expect Catch2 in system path to speed up builds
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-02 18:08:22 +03:00
2dd10e08d9 Added toolchain file for sanitized builds
Some checks failed
ci/woodpecker/push/workflow Pipeline failed
2025-10-02 17:56:30 +03:00
a40487f84d Change build code to be maximally pedantic
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-10-02 17:20:14 +03:00
47f33f3dc0 Change lexer code to use an option instead of ignoring the variant when no value is needed 2025-10-02 17:19:51 +03:00
d0eae97771 Update README.md
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-09-30 22:32:44 +03:00
e1580755e9 Use correct path for test executable
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-09-30 21:49:37 +03:00
8fc3e82173 Added testing with Catch2
Some checks failed
ci/woodpecker/push/workflow Pipeline failed
2025-09-30 21:46:26 +03:00
34d35d6039 compile_commands.json should be generated per person
All checks were successful
ci/woodpecker/push/workflow Pipeline was successful
2025-09-30 21:44:14 +03:00
a476d1b9e9 Updated README.md, added status badge for continuous integration 2025-09-30 21:39:59 +03:00
17 changed files with 1271 additions and 43 deletions

4
.gitignore vendored
View File

@@ -1,2 +1,4 @@
build/*
.cache
.cache
.idea
compile_commands.json

View File

@@ -6,13 +6,15 @@ steps:
- name: build
image: gcc:latest
commands: # we probably shouldn't install cmake every time
- apt update && apt install -y cmake
- apt update && apt install -y cmake catch2
- mkdir -p build/ && cd build
- cmake ..
- cmake .. --toolchain ../toolchain/sanitize.cmake
- make
- name: test
image: ubuntu
image: gcc:latest
commands:
# TODO: Probably make actual tests at some point
# Automated tests, this should not fail
- ./build/test
# Manual test, you can see the output of this in woodpecker
- echo "(print 42)" | ./build/main
# TODO: add publish step, when we're at a working state.

View File

@@ -1,10 +1,31 @@
cmake_minimum_required(VERSION 3.16)
project(lispy_stuff)
# we'll use catch2 as testing library.
# Catch2 version 3 or above needs to be installed on your system.
find_package(Catch2 3 REQUIRED)
set(HEADER_FILES src/include/lex.hpp)
set(SOURCE_FILES src/main.cpp src/lex.cpp)
# we'll use a recent c++ standard.
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_executable(main ${SOURCE_FILES} ${HEADER_FILES})
set(HEADER_FILES src/include/lex.hpp src/include/value.hpp src/include/parse.hpp)
set(SOURCE_FILES src/lex.cpp src/parse.cpp src/value.cpp)
set(CXX_WARNING_FLAGS -Wall -Wextra -Wpedantic -pedantic)
# we're not actually shipping a library yet,
# this is so we don't have to compile twice for main and tests.
add_library(libmash STATIC ${SOURCE_FILES} ${HEADER_FILES})
target_include_directories(libmash PUBLIC src/include/)
target_compile_options(libmash PRIVATE ${CXX_WARNING_FLAGS})
# Main target
add_executable(main src/main.cpp)
target_link_libraries(main libmash)
target_compile_options(main PRIVATE ${CXX_WARNING_FLAGS})
# tests
add_executable(test src/tests/test.cpp)
target_link_libraries(test PRIVATE libmash Catch2::Catch2WithMain)
target_compile_options(test PRIVATE ${CXX_WARNING_FLAGS})
target_include_directories(main PRIVATE src/include/)

View File

@@ -1,14 +1,21 @@
# Lispy stuff
[![status-badge](https://ci.emin.software/api/badges/2/status.svg)](https://ci.emin.software/repos/2)
Simple lisp-ish language compiler written in C++.
Right now it doesn't compile much - it's just a parser.
The goal is to emit bytecode. The bytecode format is not decided yet.
The end goal is to emit bytecode. The bytecode format is not decided yet.
## Build
I use cmake for the build system. I prefer to build out-of-tree,
here's how to build if you've never used cmake:
All you need is:
- CMake
- A modern C++ compiler
- The [Catch2](https://github.com/catchorg/Catch2) library v3 or higher installed on your system
Once you have these, you can build with:
```bash
cd build
@@ -16,11 +23,14 @@ cmake ..
make
```
This will build two executables, `main` and `test`. `test` runs all tests
on the compiler itself.
## Development
I use clangd as the language server. Appropriate `compile_commands.json`
(required for clangd, otherwise it can not find include files) is provided.
If you'd like to generate them yourself, just use cmake:
I use clangd as the language server. If you want your include files to be handled
correctly, you'll need to generate `compile_commands.json` yourself. You can
do this using cmake:
```bash
cd build
@@ -28,4 +38,32 @@ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=1 ..
cp compile_commands.json ../
```
After this, clangd should not give you errors on every included header.
### Toolchain
C++ can be somewhat error-prone, so in order to reduce the possibility of
certain bugs, cmake toolchain options are provided that build the project
with the undefined behaviour sanitizer and the address sanitizer.
You can build a sanitized version like this:
```
cd build
cmake .. --toolchain ../toolchain/sanitize.cmake
make
```
Catch2 testing framework is used to maintain code correctness.
## Progress
Woodpecker CI/CD system is integrated.
Currently using Catch2 for unit testing.
- Lexing - complete, more token types may be added as necessary
- Parsing - the main steps are done, but things may change as time progresses
- Optimizations -
- Emitting Bytecode - the goal.

View File

@@ -1,8 +0,0 @@
[
{
"directory": "/home/haxala1r/Desktop/Programming/C++/lispy-stuff/build",
"command": "/usr/bin/c++ -I/home/haxala1r/Desktop/Programming/C++/lispy-stuff/src/include -o CMakeFiles/main.dir/src/main.cpp.o -c /home/haxala1r/Desktop/Programming/C++/lispy-stuff/src/main.cpp",
"file": "/home/haxala1r/Desktop/Programming/C++/lispy-stuff/src/main.cpp",
"output": "CMakeFiles/main.dir/src/main.cpp.o"
}
]

109
docs/doc.md Normal file
View File

@@ -0,0 +1,109 @@
## Mash
### Base design:
#### Paradigm:
Dynamically typed, functional-first.
#### Syntax:
A clean subset of Lisp. It's the simplest to parse and will get us to a working state fastest.
- Atoms: 123, 4.56, "hello", my-var, true
- Lists: (func arg1 arg2)
- Almost everything else is syntax sugar that can be added as macros in the standard library. That is also probably easier to implement, and more modular.
#### Data Types:
Integer, Float, String, Boolean, List, Symbol, Function. That's it.
#### Memory:
Garbage Collected. Let's forget custom allocators for now, and just use Boehm or
something.
#### Standard Library:
Basic math ops (+, -, *, /), comparison (=, <, >), cons, car, cdr, list, some form of arrays, and I/O functions. Keep it small, at least for the beginning.
## Basic operations
### Defining functions and variables
Let's just copy Scheme here, it has a very simple special form:
```
(define x 5)
(define (addTwo x y) (+ x y))
```
This simple syntax is fairly readable and math-like.
It's a dynamic language, no type inference or funny business,
no PhDs required.
Every form ("expression") evaluates to something. Variable definitions
evaluate to the value assigned to the variable, or nil if it's
an empty definition.
Function definitions evaluate to the value of their function.
Nil is the empty list, (). Common Lisp treats
it as the canonical falsy value, scheme has #t and #f, and treats
nil as truthy. We can go with either choice but I lean towards
Common Lisp here.
### Macros
We use CL-style macros, a macro is a function that receives its parameters
unevaluated and runs completely at compile time, producing lisp code that
will be compiled. Of course, it will produce a list.
I.e. using a macro `(foo (1 2 3) arg2)` is equivalent to doing
`(eval (bar '(1 2 3) 'arg2))` assuming the function bar does the
same transformations that foo would have done, except that the
macro foo is evaluated at compile time.
## Special syntax
Let's NOT add too much syntax to the core.
The idea is, if we make a small core that has access to lisp macros,
we can effectively add *any* syntax sugar we want by simply defining
it as part of the standard library.
That's usually what Common Lisp does actually, most language constructs
are actually functions or macros defined in the standard library.
The greatest power of a Lisp is its ability to extend syntax.
Adding too much syntax too early defeats the purpose.
Let's keep it small.
#### Dollar sign
```
(define x $(1/3 + 2^60))
(define y (map (lambda (n) $(n * n)) (range 0 10)))
```
This can be implemented later as a reader macro,
e.g. $(1/3 + 2^60) expands to (math-syntax (1/3 + 2^60))
math-syntax is a macro that expands this further to
(+ (/ 1 3) (^ 2 60)).
So at the start, we don't need much syntax at all.
#### SQL sub-language
This can trivially be done as a library of functions,
and macros can add whatever syntax sugar is desired.
## Evaluation strategy
In order for macros to be possible, the compiler must be able to execute
code during compilation time. This is fine, we can simply keep a running
"image" of all lisp forms compiled so far, and run code there.
We need a byte code VM for this. Lua bytecode is perfectly acceptable.

425
src/include/int.hpp Normal file
View File

@@ -0,0 +1,425 @@
#include <gmp.h>
#include <gmpxx.h>
#include <string>
/**
* @brief Options for `base` parameter used in GMP's `mpz_set_str` and
* `mpz_sizeinbase`.
* @see https://gmplib.org/manual/Assigning-Integers
*
* `base_N`: no characters, no case sensitivity.
* `ci_N`: case-insensitive (`A`=`10`, `a`=`10`)
* `cs_N`: case-sensitive (`A`=`10`, `a`=`36`)
*/
enum class base {
base_2 = 2,
base_3 = 3,
base_4 = 4,
base_5 = 5,
base_6 = 6,
base_7 = 7,
base_8 = 8,
base_9 = 9,
ci_10 = 10,
ci_11 = 11,
ci_12 = 12,
ci_13 = 13,
ci_14 = 14,
ci_15 = 15,
ci_16 = 16,
ci_17 = 17,
ci_18 = 18,
ci_19 = 19,
ci_20 = 20,
ci_21 = 21,
ci_22 = 22,
ci_23 = 23,
ci_24 = 24,
ci_25 = 25,
ci_26 = 26,
ci_27 = 27,
ci_28 = 28,
ci_29 = 29,
ci_30 = 30,
ci_31 = 31,
ci_32 = 32,
ci_33 = 33,
ci_34 = 34,
ci_35 = 35,
ci_36 = 36,
cs_37 = 37,
cs_38 = 38,
cs_39 = 39,
cs_40 = 40,
cs_41 = 41,
cs_42 = 42,
cs_43 = 43,
cs_44 = 44,
cs_45 = 45,
cs_46 = 46,
cs_47 = 47,
cs_48 = 48,
cs_49 = 49,
cs_50 = 50,
cs_51 = 51,
cs_52 = 52,
cs_53 = 53,
cs_54 = 54,
cs_55 = 55,
cs_56 = 56,
cs_57 = 57,
cs_58 = 58,
cs_59 = 59,
cs_60 = 60,
cs_61 = 61,
cs_62 = 62,
binary = base_2,
octal = base_8,
decimal = ci_10,
hexadecimal = ci_16,
};
/**
* @brief Options for `base` parameter used in GMP's `mpz_get_str`.
* @see https://gmplib.org/manual/Converting-Integers
*
* `base_N`: case sensitive. It doesn't have its upper and lower case versions.
*
* `lc_N`: lower-case, base N. All letters are lower case.
*
* `uc_N`: upper-case, base N. All letters are upper case.
*/
enum class print_base {
lc_2 = 2,
lc_3 = 3,
lc_4 = 4,
lc_5 = 5,
lc_6 = 6,
lc_7 = 7,
lc_8 = 8,
lc_9 = 9,
lc_10 = 10,
lc_11 = 11,
lc_12 = 12,
lc_13 = 13,
lc_14 = 14,
lc_15 = 15,
lc_16 = 16,
lc_17 = 17,
lc_18 = 18,
lc_19 = 19,
lc_20 = 20,
lc_21 = 21,
lc_22 = 22,
lc_23 = 23,
lc_24 = 24,
lc_25 = 25,
lc_26 = 26,
lc_27 = 27,
lc_28 = 28,
lc_29 = 29,
lc_30 = 30,
lc_31 = 31,
lc_32 = 32,
lc_33 = 33,
lc_34 = 34,
lc_35 = 35,
lc_36 = 36,
uc_2 = -2,
uc_3 = -3,
uc_4 = -4,
uc_5 = -5,
uc_6 = -6,
uc_7 = -7,
uc_8 = -8,
uc_9 = -9,
uc_10 = -10,
uc_11 = -11,
uc_12 = -12,
uc_13 = -13,
uc_14 = -14,
uc_15 = -15,
uc_16 = -16,
uc_17 = -17,
uc_18 = -18,
uc_19 = -19,
uc_20 = -20,
uc_21 = -21,
uc_22 = -22,
uc_23 = -23,
uc_24 = -24,
uc_25 = -25,
uc_26 = -26,
uc_27 = -27,
uc_28 = -28,
uc_29 = -29,
uc_30 = -30,
uc_31 = -31,
uc_32 = -32,
uc_33 = -33,
uc_34 = -34,
uc_35 = -35,
uc_36 = -36,
base_37 = 37,
base_38 = 38,
base_39 = 39,
base_40 = 40,
base_41 = 41,
base_42 = 42,
base_43 = 43,
base_44 = 44,
base_45 = 45,
base_46 = 46,
base_47 = 47,
base_48 = 48,
base_49 = 49,
base_50 = 50,
base_51 = 51,
base_52 = 52,
base_53 = 53,
base_54 = 54,
base_55 = 55,
base_56 = 56,
base_57 = 57,
base_58 = 58,
base_59 = 59,
base_60 = 60,
base_61 = 61,
base_62 = 62,
octal_lower_case = lc_8,
hex_lower_case = lc_16,
decimal = lc_10,
binary = lc_2,
octal_upper_case = uc_8,
hex_upper_case = uc_16,
};
base print_base_to_base(print_base);
/**
* @brief is an high-performance, arbitrary precision Integer class for mash
*/
class Int {
private:
mpz_t inner;
public:
Int();
Int(const Int &);
Int(unsigned long);
Int(long);
Int(double);
Int(const std::string &, base);
unsigned long to_ul();
long to_l();
double to_d();
std::string to_string(print_base);
void swap(Int &);
void operator+(const Int &);
void operator+(unsigned long);
void operator+(long);
void operator-(const Int &);
void operator-(unsigned long);
void operator-(long);
void operator*(const Int &);
void operator*(unsigned long);
void operator*(long);
void operator/(const Int &);
void operator/(unsigned long);
void operator/(long);
void operator%(const Int &);
void operator%(unsigned long);
void add_mul(Int &);
void add_mul(unsigned long);
void add_mul(long);
void sub_mul(Int &);
void sub_mul(unsigned long);
void sub_mul(long);
void mul_2exp(unsigned long);
void operator~();
void abs();
void ceil_div_quotient(Int &);
void ceil_div_remainder(Int &);
void ceil_div_both(Int &);
void ceil_div(unsigned long);
void ceil_div_both(unsigned long);
void ceil_div_quotient(unsigned long);
void ceil_div_remainder(unsigned long);
void ceil_div(long);
void ceil_div_both(long);
void ceil_div_quotient(long);
void ceil_div_remainder(long);
void ceil_div_quotient_2exp(unsigned long);
void ceil_div_remainder_2exp(unsigned long);
void floor_div_quotient(Int &);
void floor_div_remainder(Int &);
void floor_div_both(Int &);
void floor_div(unsigned long);
void floor_div_both(unsigned long);
void floor_div_quotient(unsigned long);
void floor_div_remainder(unsigned long);
void floor_div(long);
void floor_div_both(long);
void floor_div_quotient(long);
void floor_div_remainder(long);
void floor_div_quotient_2exp(unsigned long);
void floor_div_remainder_2exp(unsigned long);
void truncate_div_quotient(Int &);
void truncate_div_remainder(Int &);
void truncate_div_both(Int &);
void truncate_div(unsigned long);
void truncate_div_both(unsigned long);
void truncate_div_quotient(unsigned long);
void truncate_div_remainder(unsigned long);
void truncate_div(long);
void truncate_div_both(long);
void truncate_div_quotient(long);
void truncate_div_remainder(long);
void truncate_div_quotient_2exp(unsigned long);
void truncate_div_remainder_2exp(unsigned long);
void mod(Int &);
void mod(unsigned long);
void mod(long);
void div_exact(Int &);
void div_exact(unsigned long);
void div_exact(long);
int divisible(Int &);
int divisible(unsigned long);
int divisible(long);
int divisible_2exp(unsigned long);
int congruent(Int &);
int congruent(unsigned long);
int congruent(long);
int congruent_2exp(unsigned long);
void mod_pow(Int &);
void mod_pow(unsigned long);
void mod_pow(long);
void powm_sec(Int &);
void pow(Int &);
void pow(unsigned long, unsigned long);
void pow(long, unsigned long);
~Int() { mpz_clear(inner); };
};
/*
///
/// Enum for the number's sign.
///
/// `Zero` is for when number is zero.
///
pub const Sign = enum(i8) {
Negative = -1,
Positive = 1,
Zero = 0,
pub fn toChar(self: @This()) u8 {
return switch (self) {
.Negative => '-',
.Positive => '+',
.Zero => '0',
};
}
pub fn toString(self: @This()) []const u8 {
return switch (self) {
.Negative => "negative",
.Positive => "positive",
.Zero => "zero",
};
}
};
///
/// Enum for the comparison of two numbers.
///
pub const Ordering = enum(i8) {
Less = -1,
Greater = 1,
Equals = 0,
const Self = @This();
pub fn toChar(self: Self) u8 {
return switch (self) {
.Less => '<',
.Greater => '>',
.Equals => '=',
};
}
pub fn toString(self: Self) []const u8 {
return switch (self) {
.Less => "less",
.Greater => "greater",
.Equals => "equals",
};
}
pub fn fromC(num: c_int) Self {
if (num < 0) return .Less;
if (num > 0) return .Greater;
return .Equals;
}
};
/// Enum for the number's parity.
pub const Parity = enum(i8) {
Even = 0,
Odd = 1,
pub fn toString(self: @This()) []const u8 {
return switch (self) {
.Even => "even",
.Odd => "odd",
};
}
};
////
/// Rounding methods for division.
///
/// `Truncate` rounds the integer towards 0.
/// `Ceil` rounds the integer towards positive infinity.
/// `Floor` rounds the integer towrds positiv infinity.
///
/// |`Number`|`Truncate`|`Ceil`|`Floor`|
/// |--------|----------|------|-------|
/// |`4.5` |`4` |`5` |`4` |
/// |`-4.5` |`-4` |`-4` |`-5` |
///
pub const RoundingMethod = enum(i8) {
Truncate = 0,
Ceil = 1,
Floor = -1,
};
///
/// Option for which result will be written to the integer.
///
pub const Output = enum(i8) {
Quotient = 0,
Remainder = 1,
};
*/

View File

@@ -1,10 +1,11 @@
#pragma once
#include <deque>
#include <sstream>
#include <vector>
#include <stdint.h>
#include <variant>
#include <optional>
enum TokenType {
enum class TokenType {
OpenParen,
CloseParen,
Dollar,
@@ -12,13 +13,14 @@ enum TokenType {
String,
Int,
Double,
Quote,
End
};
// Plain Old Data
struct Token {
enum TokenType type;
std::variant<int64_t, double, std::string> value;
std::optional<std::variant<int64_t, double, std::string>> value;
};
bool operator==(Token const& one, Token const& other);
std::ostream &operator<<(std::ostream &os, Token const &t);
@@ -39,8 +41,8 @@ public:
void feed(std::string);
Token next();
std::vector<Token> collect();
std::deque<Token> collect();
};
// when you don't want to construct the object
std::vector<Token> lex(std::string);
std::deque<Token> lex(std::string);

33
src/include/parse.hpp Normal file
View File

@@ -0,0 +1,33 @@
#pragma once
#include <value.hpp>
#include <lex.hpp>
// The Parser produces a regular lisp value.
// lisp code is made of lisp lists and atoms.
class Parser {
private:
// the token stream.
std::deque<Token> ts;
Token get_token();
void unget_token(Token);
// these may need to be interned later
String make_string(std::string);
Symbol make_symbol(std::string);
std::optional<LispValue> parse_one();
LispValue parse_quote();
LispValue parse_list();
public:
Parser(Lexer);
void feed(Lexer);
std::optional<LispValue> next();
};

31
src/include/value.hpp Normal file
View File

@@ -0,0 +1,31 @@
#pragma once
#include <deque>
#include <cstdint>
#include <string>
#include <variant>
// we're using a pure variant as our value type.
struct Integer {int64_t value;};
struct Double {double value;};
struct String {std::string value;}; // might be a good idea to intern strings
struct Symbol {std::string value;};
struct List;
struct Nil {};
using LispValue = std::variant<Integer, Double, String, Symbol, List>;
struct List {std::deque<LispValue> list;};
// during compilation, we don't really care for cyclical lists etc.
// during compilation we'll mostly be dealing with regular, flat lists
// that form function calls.
// We will have a different set of values during runtime
// as the runtime will be a bytecode interpreter anyhow.
void print_val(LispValue);
String make_string(std::string);
Symbol make_symbol(std::string);

342
src/int.cpp Normal file
View File

@@ -0,0 +1,342 @@
#include <cmath>
#include <cstdlib>
#include <gmp.h>
#include <int.hpp>
#include <string>
#include <utility>
Int::Int() { mpz_init(this->inner); };
Int::Int(const Int &big) { mpz_init_set(this->inner, big.inner); };
Int::Int(unsigned long ul) { mpz_init_set_ui(this->inner, ul); };
Int::Int(long l) { mpz_init_set_si(this->inner, l); };
Int::Int(double d) { mpz_init_set_d(this->inner, d); };
Int::Int(const std::string &str, base b) {
int result = mpz_init_set_str(this->inner, str.data(), static_cast<int>(b));
if (result == 1)
throw std::exception();
};
base print_base_to_base(print_base p_base) {
switch (p_base) {
case print_base::lc_2:
case print_base::uc_2:
return base::base_2;
case print_base::lc_3:
case print_base::uc_3:
return base::base_3;
case print_base::lc_4:
case print_base::uc_4:
return base::base_4;
case print_base::lc_5:
case print_base::uc_5:
return base::base_5;
case print_base::lc_6:
case print_base::uc_6:
return base::base_6;
case print_base::lc_7:
case print_base::uc_7:
return base::base_7;
case print_base::lc_8:
case print_base::uc_8:
return base::base_8;
case print_base::lc_9:
case print_base::uc_9:
return base::base_9;
case print_base::lc_10:
case print_base::uc_10:
return base::ci_10;
case print_base::lc_11:
case print_base::uc_11:
return base::ci_11;
case print_base::lc_12:
case print_base::uc_12:
return base::ci_12;
case print_base::lc_13:
case print_base::uc_13:
return base::ci_13;
case print_base::lc_14:
case print_base::uc_14:
return base::ci_14;
case print_base::lc_15:
case print_base::uc_15:
return base::ci_15;
case print_base::lc_16:
case print_base::uc_16:
return base::ci_16;
case print_base::lc_17:
case print_base::uc_17:
return base::ci_17;
case print_base::lc_18:
case print_base::uc_18:
return base::ci_18;
case print_base::lc_19:
case print_base::uc_19:
return base::ci_19;
case print_base::lc_20:
case print_base::uc_20:
return base::ci_20;
case print_base::lc_21:
case print_base::uc_21:
return base::ci_21;
case print_base::lc_22:
case print_base::uc_22:
return base::ci_22;
case print_base::lc_23:
case print_base::uc_23:
return base::ci_23;
case print_base::lc_24:
case print_base::uc_24:
return base::ci_24;
case print_base::lc_25:
case print_base::uc_25:
return base::ci_25;
case print_base::lc_26:
case print_base::uc_26:
return base::ci_26;
case print_base::lc_27:
case print_base::uc_27:
return base::ci_27;
case print_base::lc_28:
case print_base::uc_28:
return base::ci_28;
case print_base::lc_29:
case print_base::uc_29:
return base::ci_29;
case print_base::lc_30:
case print_base::uc_30:
return base::ci_30;
case print_base::lc_31:
case print_base::uc_31:
return base::ci_31;
case print_base::lc_32:
case print_base::uc_32:
return base::ci_32;
case print_base::lc_33:
case print_base::uc_33:
return base::ci_33;
case print_base::lc_34:
case print_base::uc_34:
return base::ci_34;
case print_base::lc_35:
case print_base::uc_35:
return base::ci_35;
case print_base::lc_36:
case print_base::uc_36:
return base::ci_36;
case print_base::base_37:
return base::cs_37;
case print_base::base_38:
return base::cs_38;
case print_base::base_39:
return base::cs_39;
case print_base::base_40:
return base::cs_40;
case print_base::base_41:
return base::cs_41;
case print_base::base_42:
return base::cs_42;
case print_base::base_43:
return base::cs_43;
case print_base::base_44:
return base::cs_44;
case print_base::base_45:
return base::cs_45;
case print_base::base_46:
return base::cs_46;
case print_base::base_47:
return base::cs_47;
case print_base::base_48:
return base::cs_48;
case print_base::base_49:
return base::cs_49;
case print_base::base_50:
return base::cs_50;
case print_base::base_51:
return base::cs_51;
case print_base::base_52:
return base::cs_52;
case print_base::base_53:
return base::cs_53;
case print_base::base_54:
return base::cs_54;
case print_base::base_55:
return base::cs_55;
case print_base::base_56:
return base::cs_56;
case print_base::base_57:
return base::cs_57;
case print_base::base_58:
return base::cs_58;
case print_base::base_59:
return base::cs_59;
case print_base::base_60:
return base::cs_60;
case print_base::base_61:
return base::cs_61;
case print_base::base_62:
return base::cs_62;
}
std::unreachable();
}
unsigned long Int::to_ul() { return mpz_get_ui(this->inner); };
long Int::to_l() { return mpz_get_si(this->inner); };
double Int::to_d() { return mpz_get_d(this->inner); };
std::string Int::to_string(print_base base) {
int print_base_int = static_cast<int>(base);
char *str = mpz_get_str(NULL, print_base_int, this->inner);
std::string res(str);
free(str);
return res;
};
void Int::swap(Int &rhs) { mpz_swap(this->inner, rhs.inner); }
void Int::operator+(const Int &rhs) {
mpz_add(this->inner, this->inner, rhs.inner);
};
void Int::operator+(unsigned long rhs) {
mpz_add_ui(this->inner, this->inner, rhs);
};
void Int::operator+(long rhs) {
if (rhs < 0)
mpz_sub_ui(this->inner, this->inner, std::abs(rhs));
else
mpz_add_ui(this->inner, this->inner, rhs);
};
void Int::operator-(const Int &rhs) {
mpz_sub(this->inner, this->inner, rhs.inner);
};
void Int::operator-(unsigned long rhs) {
mpz_sub_ui(this->inner, this->inner, rhs);
};
void Int::operator-(long rhs) {
if (rhs < 0)
mpz_add_ui(this->inner, this->inner, std::abs(rhs));
else
mpz_sub_ui(this->inner, this->inner, rhs);
};
void Int::operator*(const Int &rhs) {
mpz_mul(this->inner, this->inner, rhs.inner);
};
void Int::operator*(unsigned long rhs) {
mpz_mul_ui(this->inner, this->inner, rhs);
};
void Int::operator*(long rhs) {
if (rhs < 0) {
mpz_mul_ui(this->inner, this->inner, std::abs(rhs));
this->operator~();
} else
mpz_mul_ui(this->inner, this->inner, rhs);
};
void Int::operator/(const Int &rhs) {
mpz_tdiv_q(this->inner, this->inner, rhs.inner);
};
void Int::operator/(unsigned long rhs) {
mpz_tdiv_q_ui(this->inner, this->inner, rhs);
};
void Int::operator/(long rhs) {
if (rhs < 0) {
mpz_tdiv_q_ui(this->inner, this->inner, std::abs(rhs));
this->operator~();
} else
mpz_tdiv_q_ui(this->inner, this->inner, rhs);
};
void Int::operator%(const Int &rhs) {
mpz_mod(this->inner, this->inner, rhs.inner);
};
void Int::operator%(unsigned long rhs) {
mpz_mod_ui(this->inner, this->inner, rhs);
};
void add_mul(Int &);
void add_mul(unsigned long);
void add_mul(long);
void sub_mul(Int &);
void sub_mul(unsigned long);
void sub_mul(long);
void mul_2exp(unsigned long);
void Int::operator~() { mpz_neg(this->inner, this->inner); };
void abs();
void ceil_div_quotient(Int &);
void ceil_div_remainder(Int &);
void ceil_div_both(Int &);
void ceil_div(unsigned long);
void ceil_div_both(unsigned long);
void ceil_div_quotient(unsigned long);
void ceil_div_remainder(unsigned long);
void ceil_div(long);
void ceil_div_both(long);
void ceil_div_quotient(long);
void ceil_div_remainder(long);
void ceil_div_quotient_2exp(unsigned long);
void ceil_div_remainder_2exp(unsigned long);
void floor_div_quotient(Int &);
void floor_div_remainder(Int &);
void floor_div_both(Int &);
void floor_div(unsigned long);
void floor_div_both(unsigned long);
void floor_div_quotient(unsigned long);
void floor_div_remainder(unsigned long);
void floor_div(long);
void floor_div_both(long);
void floor_div_quotient(long);
void floor_div_remainder(long);
void floor_div_quotient_2exp(unsigned long);
void floor_div_remainder_2exp(unsigned long);
void truncate_div_quotient(Int &);
void truncate_div_remainder(Int &);
void truncate_div_both(Int &);
void truncate_div(unsigned long);
void truncate_div_both(unsigned long);
void truncate_div_quotient(unsigned long);
void truncate_div_remainder(unsigned long);
void truncate_div(long);
void truncate_div_both(long);
void truncate_div_quotient(long);
void truncate_div_remainder(long);
void truncate_div_quotient_2exp(unsigned long);
void truncate_div_remainder_2exp(unsigned long);
void mod(Int &);
void mod(unsigned long);
void mod(long);
void div_exact(Int &);
void div_exact(unsigned long);
void div_exact(long);
int divisible(Int &);
int divisible(unsigned long);
int divisible(long);
int divisible_2exp(unsigned long);
int congruent(Int &);
int congruent(unsigned long);
int congruent(long);
int congruent_2exp(unsigned long);

View File

@@ -12,10 +12,11 @@ std::ostream &operator<<(std::ostream &os, Token const &t) {
case TokenType::OpenParen: os << "OpenParen)"; break;
case TokenType::CloseParen: os << "CloseParen)"; break;
case TokenType::Dollar: os << "Dollar)"; break;
case TokenType::Symbol: os << "Symbol, " << get<string>(t.value) << ")"; break;
case TokenType::String: os << "String, \"" << get<string>(t.value) << "\")"; break;
case TokenType::Int: os << "Int, " << get<int64_t>(t.value) << ")"; break;
case TokenType::Double: os << "Double, " << get<double>(t.value) << ")"; break;
case TokenType::Quote: os << "QUOTE)"; break;
case TokenType::Symbol: os << "Symbol, " << get<string>(*t.value) << ")"; break;
case TokenType::String: os << "String, \"" << get<string>(*t.value) << "\")"; break;
case TokenType::Int: os << "Int, " << get<int64_t>(*t.value) << ")"; break;
case TokenType::Double: os << "Double, " << get<double>(*t.value) << ")"; break;
case TokenType::End: os << "END)"; break;
default:
os << ")";
@@ -131,14 +132,15 @@ Token Lexer::next() {
// character while at EOF, even if we have exhausted the stream.
char c = ss.get();
if (ss.eof())
return {TokenType::End};
return {TokenType::End, nullopt};
if (isspace(c))
continue;
switch (c) {
case '(': return {TokenType::OpenParen};
case ')': return {TokenType::CloseParen};
case '$': return {TokenType::Dollar};
case '(': return {TokenType::OpenParen, nullopt};
case ')': return {TokenType::CloseParen, nullopt};
case '$': return {TokenType::Dollar, nullopt};
case '\'': return {TokenType::Quote, nullopt};
default:
ss.unget();
return lexNonSpecial();
@@ -146,8 +148,8 @@ Token Lexer::next() {
}
}
vector<Token> Lexer::collect() {
vector<Token> v;
deque<Token> Lexer::collect() {
deque<Token> v;
while (true) {
Token t = next();
if (t.type == TokenType::End)
@@ -158,7 +160,7 @@ vector<Token> Lexer::collect() {
return v;
}
std::vector<Token> lex(std::string s) {
std::deque<Token> lex(std::string s) {
Lexer l(s);
return l.collect();
}

View File

@@ -1,5 +1,7 @@
#include "value.hpp"
#include <iostream>
#include <lex.hpp>
#include <parse.hpp>
#include <string>
using namespace std;
@@ -8,10 +10,9 @@ int main() {
string s;
getline(cin, s);
cout << s << endl;
for (auto t : lex(s)) {
cout << t << " ";
}
Parser p(s);
print_val(*p.next());
cout << endl;
return 0;
}

100
src/parse.cpp Normal file
View File

@@ -0,0 +1,100 @@
#include <cctype>
#include <exception>
#include <lex.hpp>
#include <algorithm>
#include <parse.hpp>
#include <iostream>
using namespace std;
Parser::Parser(Lexer l) : ts(l.collect()) {}
void Parser::feed(Lexer lexer) {
auto l = lexer.collect();
ts.insert(ts.end(), l.begin(), l.end());
}
Token Parser::get_token() {
if (ts.empty()) {
cerr << "Parser::get_token: Token requested at input end." << endl;
throw exception();
}
Token t = ts.front();
ts.pop_front();
return t;
}
void Parser::unget_token(Token t) {
ts.push_front(t);
}
String Parser::make_string(string s) {
return String {s};
}
Symbol Parser::make_symbol(string s) {
transform(s.begin(), s.end(), s.begin(), ::toupper);
return Symbol {s};
}
LispValue Parser::parse_quote() {
// in regular lisps, a quote gets expanded to a (quote) form.
// i.e. 'a is actually (QUOTE A). This prevents the symbol from being
// evaluated.
// This is the same way we'll handle quotes for now, because I can't
// think of anything else.
List l;
l.list.push_back(make_symbol("QUOTE"));
auto next = parse_one();
// this is guaranteed to work, because if we do not have enough tokens
// to constitute another value Parser::get_token will throw an exception
l.list.push_back(*next);
return l;
}
optional<LispValue> Parser::parse_one() {
Token t = get_token();
switch (t.type) {
case TokenType::Int: return Integer {get<int64_t>(*t.value)};
case TokenType::Double: return Double {get<double>(*t.value)};
case TokenType::String: return make_string(get<string>(*t.value));
case TokenType::Symbol: return make_symbol(get<string>(*t.value));
case TokenType::OpenParen: return parse_list();
case TokenType::CloseParen: throw "whatever";
case TokenType::Quote: return parse_quote();
// I don't know what this will actually do, in theory maybe just like the OpenParen,
// but parses things in a different namespace? unimplemented for now.
case TokenType::Dollar: return parse_one();
case TokenType::End : return nullopt;
}
return nullopt;
}
LispValue Parser::parse_list() {
// assumes that we have read the OpenParen, and are reading elements until
// we find the CloseParen
List l;
Token t = get_token();
while (true) {
if (t.type == TokenType::End) {
// this is clearly an error!
cerr << "Parser::parse_list: Input ended before list ended." << endl;
break;
}
if (t.type == TokenType::CloseParen)
break;
unget_token(t);
l.list.push_back(*parse_one());
t = get_token();
}
return l;
}
optional<LispValue> Parser::next() {
return parse_one();
}

87
src/tests/test.cpp Normal file
View File

@@ -0,0 +1,87 @@
#include "value.hpp"
#include <catch2/catch_test_macros.hpp>
#include <lex.hpp>
#include <parse.hpp>
using namespace std;
template <typename T>
T pop_and_front(deque<T> &dq) {
T t = dq.front();
dq.pop_front();
return t;
}
TEST_CASE("Lexer lexes doubles correctly", "[Lexer]") {
SECTION("double and negative syntax") {
Lexer l("(1.0 0.1 -.1 -1. . - -. .-)");
REQUIRE(l.next() == Token({TokenType::OpenParen, nullopt}));
REQUIRE(l.next() == Token({TokenType::Double, 1.0}));
REQUIRE(l.next() == Token({TokenType::Double, 0.1}));
REQUIRE(l.next() == Token({TokenType::Double, -0.1}));
REQUIRE(l.next() == Token({TokenType::Double, -1.0}));
REQUIRE(l.next() == Token({TokenType::Symbol, "."}));
REQUIRE(l.next() == Token({TokenType::Symbol, "-"}));
REQUIRE(l.next() == Token({TokenType::Symbol, "-."}));
REQUIRE(l.next() == Token({TokenType::Symbol, ".-"}));
REQUIRE(l.next() == Token({TokenType::CloseParen, nullopt}));
}
}
TEST_CASE("Parser parses correctly", "[Parser]") {
SECTION("hello world") {
Parser p (Lexer("(print \"hello world\")"));
auto dq = get<List>(*p.next()).list;
REQUIRE(get<Symbol>(pop_and_front(dq)).value == "PRINT");
REQUIRE(get<String>(pop_and_front(dq)).value == "hello world");
}
SECTION("doubles") {
Parser p (Lexer("(1.0 0.1 -.1 -1. . - -. .-)"));
auto dq = get<List>(*p.next()).list;
REQUIRE(get<Double>(pop_and_front(dq)).value == 1.0);
REQUIRE(get<Double>(pop_and_front(dq)).value == 0.1);
REQUIRE(get<Double>(pop_and_front(dq)).value == -0.1);
REQUIRE(get<Double>(pop_and_front(dq)).value == -1.0);
REQUIRE(get<Symbol>(pop_and_front(dq)).value == ".");
REQUIRE(get<Symbol>(pop_and_front(dq)).value == "-");
REQUIRE(get<Symbol>(pop_and_front(dq)).value == "-.");
REQUIRE(get<Symbol>(pop_and_front(dq)).value == ".-");
}
SECTION("Nested lists") {
Parser p(Lexer("((((0) (1) (2) (3))))"));
auto l0 = get<List>(*p.next()).list;
auto l1 = get<List>(pop_and_front(l0)).list;
auto l2 = get<List>(pop_and_front(l1)).list;
auto l20 = get<List>(pop_and_front(l2)).list;
auto l21 = get<List>(pop_and_front(l2)).list;
auto l22 = get<List>(pop_and_front(l2)).list;
auto l23 = get<List>(pop_and_front(l2)).list;
REQUIRE(get<Integer>(pop_and_front(l20)).value == 0);
REQUIRE(get<Integer>(pop_and_front(l21)).value == 1);
REQUIRE(get<Integer>(pop_and_front(l22)).value == 2);
REQUIRE(get<Integer>(pop_and_front(l23)).value == 3);
}
SECTION("Nested quotes") {
Parser p(Lexer("((('a 'b 'c 'd)))"));
auto l0 = get<List>(*p.next()).list;
auto l1 = get<List>(pop_and_front(l0)).list;
auto l2 = get<List>(pop_and_front(l1)).list;
auto l20 = get<List>(pop_and_front(l2)).list;
auto l21 = get<List>(pop_and_front(l2)).list;
auto l22 = get<List>(pop_and_front(l2)).list;
auto l23 = get<List>(pop_and_front(l2)).list;
REQUIRE(get<Symbol>(pop_and_front(l20)).value == "QUOTE");
REQUIRE(get<Symbol>(pop_and_front(l20)).value == "A");
REQUIRE(get<Symbol>(pop_and_front(l21)).value == "QUOTE");
REQUIRE(get<Symbol>(pop_and_front(l21)).value == "B");
REQUIRE(get<Symbol>(pop_and_front(l22)).value == "QUOTE");
REQUIRE(get<Symbol>(pop_and_front(l22)).value == "C");
REQUIRE(get<Symbol>(pop_and_front(l23)).value == "QUOTE");
REQUIRE(get<Symbol>(pop_and_front(l23)).value == "D");
}
}

36
src/value.cpp Normal file
View File

@@ -0,0 +1,36 @@
#include <concepts>
#include <value.hpp>
#include <iostream>
template <typename T>
requires std::convertible_to<T, LispValue>
void value_printer(T t) {
if constexpr (std::same_as<T, LispValue>) {
print_val(t);
} else if constexpr (requires { std::cout << t.value;}) {
std::cout << t.value;
} else {
std::cout << "{UNKNOWN}" << std::endl;
}
}
template <>
void value_printer(List l) {
std::cout << "(";
for (auto i : l.list) {
value_printer(i);
std::cout << " ";
}
std::cout << ")";
}
template <>
void value_printer(String s) {
std::cout << '"' << s.value << '"';
}
void print_val(LispValue v) {
std::visit([](auto arg) {value_printer(arg);}, v);
}

5
toolchain/sanitize.cmake Normal file
View File

@@ -0,0 +1,5 @@
# We're assuming clang or gcc as the compiler.
# feel free to change if you're using msvc or something else
set(SANITIZERS_FLAGS "-fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS_INIT "${SANITIZERS_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS_INIT "${SANITIZERS_FLAGS}")