commit 3bed67c2b6fe6b4f42666e36c0b77fc8cf3a776a Author: Yandrik Date: Sun Nov 28 23:08:27 2021 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5afb444 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/utils \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..639900d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..0ebb7da --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..18e4f4b --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,65 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cb-calculator" +version = "0.1.0" +dependencies = [ + "thiserror", +] + +[[package]] +name = "proc-macro2" +version = "1.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..468a390 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "cb-calculator" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +thiserror = "1.0" diff --git a/Makefile.toml b/Makefile.toml new file mode 100644 index 0000000..e5789bb --- /dev/null +++ b/Makefile.toml @@ -0,0 +1,83 @@ +[config] +skip_core_tasks = true + +[env] +PLANTUML_FILE = "utils/plantuml.jar" +MODEL_PATH = "model" + + +# ================================================ +# META TASKS +# ================================================ + +[tasks.default] +alias = "tasks" + +[tasks.tasks] +command = "cargo" +description = "Lists all tasks availabe" +args = ["make", "--list-all-steps"] + + + +# ================================================ +# MODEL TASKS +# ================================================ + +[tasks.get_plantuml] +private = true +category = "diagrams" +script_runner = "@duckscript" +script = ''' +if not is_file ${PLANTUML_FILE} + echo plantuml not found, downloading to ${PLANTUML_FILE} from sourceforge... + wget -O ${PLANTUML_FILE} https://sourceforge.net/projects/plantuml/files/plantuml.jar/download + echo plantuml was downloaded successfully +else + echo plantuml already in utils directory, no download required +end +''' + +[tasks.diagrams] +category = "diagrams" +description = "Compiles all diagrams in the ${MODEL_PATH} folder to png images. Downloads plantuml if needed." +dependencies = ["get_plantuml"] +command = "java" +args = ["-jar", "${PLANTUML_FILE}", "-tpng", "${MODEL_PATH}/*.puml"] + + +# ================================================ +# BUILD TASKS +# ================================================ + +[tasks.build] +category = "build" +description = "builds all artifacts in the project" +env = { "CARGO_CMD_ARGS" = "build" } +run_task = { name = [ + "execute_for_all_rust" +] } + +[tasks.test] +category = "test" +description = "executes all tests in the project" +run_task = { name = [ + "rust_test" +] } + +[tasks.rust_test] +category = "test" +description = "executes all tests in the rust part of the project" +command = "cargo" +args = ["test"] + +[tasks.check] +category = "check" +description = "execute the cargo check command in all Rust crates" +run_task = { name = [ "rust_check" ] } + +[tasks.run_check] +category = "check" +description = "executes cargo check in the rust part of the project" +command = "cargo" +args = ["check"] \ No newline at end of file diff --git a/grammar.md b/grammar.md new file mode 100644 index 0000000..47bbfca --- /dev/null +++ b/grammar.md @@ -0,0 +1,150 @@ +# Grammar + +The grammar for the calculator. It is segmented into two parts: +1. **Regular Grammar** for the Lexer +2. **Context-free Grammar** for the Parser + +## Lexer Grammar + +```regexp + ID: \d+ +OBR: ( +CBR: ) + OP: \*|/|^|-|\+ +``` + +### Lexer DFA +```mermaid +graph LR; +init["s0"] -- "(" --> f0((f0)) +init -- ")" --> f1((f1)) +init -- "\d" --> f2((f2)) +f2 -- "\d" --> f2 +init -- "*, /, ^, -, +" --> f3((f3)) +init -- "[ ]" --> init +init -- "otherwise" --> error[[error]] +``` + +## Parser Grammar + +``` +S -> A +A -> A + A | A - A | M +M -> M * M | M / M | G +G -> ( A ) | P ^ G | ID +P -> ( A ) | ID +``` + +Die Grammatik ist eindeutig und nicht linksrekursiv. Außerdem hat sie eine weitere interessante +Eigenschaft: +Wenn die Grammatik mehrere Potenzen parsed, expandieren diese nach rechts. Beispiel: +``` +S +A +M +G +P ^ G +ID ^ G +ID ^ P ^ G +ID ^ ID ^ G +ID ^ ID ^ P ^ G +ID ^ ID ^ ID ^ ID +5 ^ 4 ^ 3 ^ 2 +``` + +Dabei sieht der Syntax Tree so aus: +``` +S +| +A +| +M +| +G +| \ +P G +| | \ +5 P G + | | \ + 4 P G + | | + 3 P + | + 2 +``` +Wenn also per Recursive Descent immer zuerst das "tiefste" Ergebnis ausgewertet wird, heißt +5^4^3^2 5^(4^(3^(2))), ohne weitere Berechnungen auszuführen. + +```mermaid +graph LR; +s1("[S->.A]") --> s2(["[S->A.]"]) +``` + +```mermaid +graph LR; +a1("[A->.A+A] | [A->.A-A] | [A->.M]") +a2(["[A->M.]"]) +a3("[A->A.+A] | [A->A.-A]") +a4("[A->A+.A]") +a5(["[A->A+A.]"]) +a6("[A->A-.A]") +a7(["[A->A-A.]"]) + +a1 -- M --> a2 +a1 -- A --> a3 +a3 -- + --> a4 +a4 -- A --> a5 +a3 -- - --> a6 +a6 -- A --> a7 +``` + +```mermaid +graph LR; +m1("[M->.M*M] | [M->.M/M] | [M->.G]") +m2(["[M->G.]"]) +m3("[M->M.*M] | [M->M./M]") +m4("[M->M*.M]") +m5(["[M->M*M.]"]) +m6("[M->M/.M]") +m7(["[M->M/M.]"]) + +m1 -- G --> m2 +m1 -- M --> m3 +m3 -- * --> m4 +m4 -- M --> m5 +m3 -- / --> m6 +m6 -- M --> m7 +``` + +```mermaid +graph LR; +g1("[G->.(A)] | [G->.P^G] | [G->.ID]") +g2("[G->(.A)]") +g3("[G->(A.)]") +g4(["[G->(A).]"]) +g5("[G->P.^G]") +g6("[G->P^.G]") +g7(["[G->P^G.]"]) +g8(["[G->ID.]"]) + +g1 -- "(" --> g2 +g2 -- A --> g3 +g3 -- ")" --> g4 +g1 -- P --> g5 +g5 -- ^ --> g6 +g6 -- G --> g7 +g1 -- ID --> g8 +``` + +```mermaid +graph LR; +p1("[P->.(A)] | [G->.ID]") +p2("[P->(.A)]") +p3("[P->(A.)]") +p4(["[P->(A).]"]) +p5(["[P->ID.]"]) +p1 -- "(" --> p2 +p2 -- A --> p3 +p3 -- ")" --> p4 +p1 -- ID --> p5 +``` diff --git a/s5-cb-calculator.iml b/s5-cb-calculator.iml new file mode 100644 index 0000000..2fecef3 --- /dev/null +++ b/s5-cb-calculator.iml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..39dd0e2 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,7 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum CalculatorErrors { + #[error("IOError")] + IOError, +} diff --git a/src/lexer/errors.rs b/src/lexer/errors.rs new file mode 100644 index 0000000..c50f827 --- /dev/null +++ b/src/lexer/errors.rs @@ -0,0 +1,13 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum LexerErrors { + #[error("unexpected character {char} at position {pos} in context {context}")] + UnexpectedCharacter { + char: char, + pos: u32, + context: String, + }, + #[error("cannot lex an empty text sequence")] + EmptyTextSequenceError, +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..a6febd8 --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,42 @@ +mod errors; +mod tokens; + +use std::collections::VecDeque; +use std::fs::File; +use std::path::Path; +use std::str::Chars; +use tokens::Token; +use crate::lexer::errors::LexerErrors; +use crate::lexer::errors::LexerErrors::EmptyTextSequenceError; + +pub type Result = std::result::Result; + +pub struct Lexer<'a> { + input: String, + chars: Chars<'a>, +} + +impl Lexer<'_> { + + /// Create a new Lexer for the given String + /// Example: + /// ``` + /// use cb_calculator::lexer; + /// let text = "some text"; + /// lexer::Lexer::new(String::from(text)); + /// ``` + #[inline] + pub fn new(input: String) -> Lexer { + Lexer { input, chars: input.chars() } + } + + // Get the next token + pub fn next(&mut self) -> Result> { + let mut buffer: Vec = Vec::new(); + loop { + if let Some(c) = self.chars.next() { + buffer.push(c); + } + } + } +} diff --git a/src/lexer/tokens.rs b/src/lexer/tokens.rs new file mode 100644 index 0000000..1e5c708 --- /dev/null +++ b/src/lexer/tokens.rs @@ -0,0 +1,42 @@ + +/// # Token Metadata +/// Data contained is: +/// * File that the token was parsed in +/// * Line that the token was parsed in +/// * Position of the *first character making up the token* in said line +#[derive(Debug)] +pub struct TokenMeta { + file: String, + line: u32, + pos: u32, +} + +#[derive(Debug)] +pub enum OpType { + MUL, + DIV, + ADD, + SUB, + POW, +} + +/// Bracket types, either OPEN or CLOSE. +#[derive(Debug)] +pub enum BrType { + OPEN, + CLOSE, +} + +/// # Tokens +/// The tokens all contain [metadata](TokenMeta). +/// 1. `ID`: A number, parsed into 64 bit floating-point. +/// 1. `OBR`: An opening bracket (`(`). +/// 1. `CBR`: A closing bracket (`)`). +/// 1. `OP`: An operation. Containing an [Operation Type](OpType). +#[derive(Debug)] +pub enum Token { + ID (TokenMeta, f64), + OBR (TokenMeta), + CBR (TokenMeta), + OP (TokenMeta, OpType), +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..0e5d4cb --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,6 @@ +pub mod lexer; +pub mod errors; + +pub fn test() { + println!("{}", "hi there"); +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..17212d4 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,6 @@ +use cb_calculator::lexer::Lexer; + +fn main() { + let lexer = Lexer::new("15+(30^2-5)*2/4".chars().collect()); + println!("Hello, world!"); +}