feat: completed lexer
This commit is contained in:
parent
3bed67c2b6
commit
e1ae39199b
14
Cargo.lock
generated
14
Cargo.lock
generated
@ -2,13 +2,6 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cb-calculator"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"thiserror",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.32"
|
version = "1.0.32"
|
||||||
@ -27,6 +20,13 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "s5-cb-calculator"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.82"
|
version = "1.0.82"
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cb-calculator"
|
name = "s5-cb-calculator"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
|
@ -2,11 +2,10 @@ use thiserror::Error;
|
|||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum LexerErrors {
|
pub enum LexerErrors {
|
||||||
#[error("unexpected character {char} at position {pos} in context {context}")]
|
#[error("unexpected character {char} at position {pos}")]
|
||||||
UnexpectedCharacter {
|
UnexpectedCharacter {
|
||||||
char: char,
|
char: char,
|
||||||
pos: u32,
|
pos: usize,
|
||||||
context: String,
|
|
||||||
},
|
},
|
||||||
#[error("cannot lex an empty text sequence")]
|
#[error("cannot lex an empty text sequence")]
|
||||||
EmptyTextSequenceError,
|
EmptyTextSequenceError,
|
||||||
|
85
src/lexer/fsm.rs
Normal file
85
src/lexer/fsm.rs
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use crate::lexer::tokens::{OpType, Token, TokenMeta};
|
||||||
|
|
||||||
|
const STATES: [i32; 6] = [1, 2, 3, 4, 5, 6];
|
||||||
|
const FINAL_STATES: [i32; 4] = [2, 3, 4, 5];
|
||||||
|
const ERROR_STATE: i32 = 6;
|
||||||
|
|
||||||
|
/// Transitions in a matrix in the form of this:
|
||||||
|
/// \CHR ( ) 0..=9 *|/|^|-|+ _ SPACE
|
||||||
|
/// ST
|
||||||
|
/// 1 2 3 4 5 6 1
|
||||||
|
/// 2 6 6 6 6 6 6
|
||||||
|
/// 3 6 6 6 6 6 6
|
||||||
|
/// 4 6 6 4 6 6 6
|
||||||
|
/// 5 6 6 6 6 6 6
|
||||||
|
/// 6 6 6 6 6 6 6
|
||||||
|
const TRANSITIONS: [[i32; 6]; 6] = [
|
||||||
|
[2, 3, 4, 5, 6, 1],
|
||||||
|
[6, 6, 6, 6, 6, 6],
|
||||||
|
[6, 6, 6, 6, 6, 6],
|
||||||
|
[6, 6, 4, 6, 6, 6],
|
||||||
|
[6, 6, 6, 6, 6, 6],
|
||||||
|
[6, 6, 6, 6, 6, 6],
|
||||||
|
];
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone)]
|
||||||
|
pub struct FSM {
|
||||||
|
state: i32,
|
||||||
|
last: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FSM {
|
||||||
|
#[inline]
|
||||||
|
pub fn new() -> FSM {
|
||||||
|
FSM { state: 1, last: 1 }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_final(&self) -> bool {
|
||||||
|
FINAL_STATES.contains(&self.state)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_error(&self) -> bool {
|
||||||
|
ERROR_STATE == self.state
|
||||||
|
}
|
||||||
|
|
||||||
|
/// revert to last state
|
||||||
|
pub fn revert(&mut self) {
|
||||||
|
self.state = self.last;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_state(&self) -> i32 {
|
||||||
|
self.state
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn transition(&mut self, c: char) {
|
||||||
|
let new_state = self.get_transition(c);
|
||||||
|
self.last = self.state;
|
||||||
|
self.state = new_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_transition(&self, c: char) -> i32 {
|
||||||
|
let lut_col = match c {
|
||||||
|
'(' => 0,
|
||||||
|
')' => 1,
|
||||||
|
'0'..='9' => 2,
|
||||||
|
'*' | '/' | '^' | '-' | '+' => 3,
|
||||||
|
' ' => 5,
|
||||||
|
_ => 4,
|
||||||
|
};
|
||||||
|
TRANSITIONS[(&self.state - 1) as usize][lut_col]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_token(fsm: &FSM, cbuf: &Vec<char>, meta: TokenMeta) -> Option<Token> {
|
||||||
|
match fsm.state {
|
||||||
|
1 | 6 => None,
|
||||||
|
2 => Some(Token::OBR(meta)),
|
||||||
|
3 => Some(Token::CBR(meta)),
|
||||||
|
4 => Some(Token::ID(meta, f64::from_str(&cbuf.iter().collect::<String>()).unwrap())),
|
||||||
|
5 => Some(Token::OP(meta, OpType::from_char(*cbuf.last().unwrap()).unwrap())),
|
||||||
|
_ => panic!("Invalid State {}!", fsm.state),
|
||||||
|
}
|
||||||
|
}
|
@ -1,23 +1,29 @@
|
|||||||
mod errors;
|
|
||||||
mod tokens;
|
|
||||||
|
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::str::Chars;
|
use std::str::{Chars, FromStr};
|
||||||
|
|
||||||
use tokens::Token;
|
use tokens::Token;
|
||||||
|
|
||||||
use crate::lexer::errors::LexerErrors;
|
use crate::lexer::errors::LexerErrors;
|
||||||
use crate::lexer::errors::LexerErrors::EmptyTextSequenceError;
|
use crate::lexer::errors::LexerErrors::EmptyTextSequenceError;
|
||||||
|
use crate::lexer::fsm::{FSM, get_token};
|
||||||
|
use crate::lexer::tokens::{OpType, TokenMeta};
|
||||||
|
|
||||||
|
mod errors;
|
||||||
|
mod tokens;
|
||||||
|
mod fsm;
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, errors::LexerErrors>;
|
pub type Result<T> = std::result::Result<T, errors::LexerErrors>;
|
||||||
|
|
||||||
pub struct Lexer<'a> {
|
pub struct Lexer {
|
||||||
input: String,
|
input: Vec<char>,
|
||||||
chars: Chars<'a>,
|
pos: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Lexer<'_> {
|
|
||||||
|
|
||||||
|
|
||||||
|
impl Lexer {
|
||||||
/// Create a new Lexer for the given String
|
/// Create a new Lexer for the given String
|
||||||
/// Example:
|
/// Example:
|
||||||
/// ```
|
/// ```
|
||||||
@ -26,17 +32,77 @@ impl Lexer<'_> {
|
|||||||
/// lexer::Lexer::new(String::from(text));
|
/// lexer::Lexer::new(String::from(text));
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(input: String) -> Lexer {
|
pub fn new(input: &str) -> Lexer {
|
||||||
Lexer { input, chars: input.chars() }
|
Lexer { input: input.chars().collect(), pos: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the next token
|
// Get the next token
|
||||||
pub fn next(&mut self) -> Result<Option<Token>> {
|
pub fn next(&mut self) -> Result<Option<Token>> {
|
||||||
let mut buffer: Vec<char> = Vec::new();
|
if let Some((fsm, cbuf)) = self.longest_token_prefix() {
|
||||||
loop {
|
if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta{ pos: self.pos }) {
|
||||||
if let Some(c) = self.chars.next() {
|
self.pos += cbuf.len();
|
||||||
buffer.push(c);
|
Ok(Some(token))
|
||||||
|
} else {
|
||||||
|
// TODO: handling of end of stream
|
||||||
|
Err(LexerErrors::UnexpectedCharacter { char: self.input.get(self.pos), pos: self.pos })
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// if no more tokens are there
|
||||||
|
Ok(None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn longest_token_prefix(&mut self) -> Option<(FSM, Vec<char>)> {
|
||||||
|
let mut fsm = FSM::new();
|
||||||
|
let mut chars = match self.input.get(self.pos..) {
|
||||||
|
Some(slice) => slice.iter(),
|
||||||
|
None => return None,
|
||||||
|
};
|
||||||
|
let mut cbuf: Vec<char> = vec![];
|
||||||
|
|
||||||
|
while let Some(cur) = chars.next() {
|
||||||
|
fsm.transition(*cur);
|
||||||
|
if fsm.is_final() {
|
||||||
|
cbuf.push(*cur);
|
||||||
|
} else if fsm.is_error() {
|
||||||
|
fsm.revert();
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some((fsm, cbuf))
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parser_test() {
|
||||||
|
let mut lexer = Lexer::new("15/3^2+20-(5*60)");
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 0 }, 15.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 2 }, OpType::DIV)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 3 }, 3.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 4 }, OpType::POW)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 5 }, 2.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 6 }, OpType::ADD)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 7 }, 20.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 9 }, OpType::SUB)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OBR(TokenMeta { pos: 10 })))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 11 }, 5.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 12 }, OpType::MUL)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 13 }, 60.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 15 })))));
|
||||||
|
// println!("{:?}", lexer.next());
|
||||||
|
assert!(matches!(lexer.next(), Ok(None)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parser_error_test() {
|
||||||
|
let mut lexer = Lexer::new("15+@");
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta{ pos: 0 }, 15.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta{ pos: 2 }, OpType::ADD)))));
|
||||||
|
assert!(matches!(lexer.next(), Err(LexerErrors::UnexpectedCharacter { char: _, pos: _ })))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,9 +6,7 @@
|
|||||||
/// * Position of the *first character making up the token* in said line
|
/// * Position of the *first character making up the token* in said line
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct TokenMeta {
|
pub struct TokenMeta {
|
||||||
file: String,
|
pub pos: usize,
|
||||||
line: u32,
|
|
||||||
pos: u32,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -20,6 +18,20 @@ pub enum OpType {
|
|||||||
POW,
|
POW,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl OpType {
|
||||||
|
#[inline]
|
||||||
|
pub fn from_char(c: char) -> Option<OpType> {
|
||||||
|
match c {
|
||||||
|
'*' => Some(OpType::MUL),
|
||||||
|
'/' => Some(OpType::DIV),
|
||||||
|
'+' => Some(OpType::ADD),
|
||||||
|
'-' => Some(OpType::SUB),
|
||||||
|
'^' => Some(OpType::POW),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Bracket types, either OPEN or CLOSE.
|
/// Bracket types, either OPEN or CLOSE.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum BrType {
|
pub enum BrType {
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
use cb_calculator::lexer::Lexer;
|
use s5_cb_calculator::lexer::Lexer;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let lexer = Lexer::new("15+(30^2-5)*2/4".chars().collect());
|
let mut lexer = Lexer::new("15+(30^2-5)*2/4".chars().collect());
|
||||||
|
while let Ok(Some(token)) = lexer.next() {
|
||||||
|
println!("Token: {:?}", token)
|
||||||
|
}
|
||||||
println!("Hello, world!");
|
println!("Hello, world!");
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user