feat: completed lexer
This commit is contained in:
parent
3bed67c2b6
commit
e1ae39199b
14
Cargo.lock
generated
14
Cargo.lock
generated
@ -2,13 +2,6 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "cb-calculator"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.32"
|
||||
@ -27,6 +20,13 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "s5-cb-calculator"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.82"
|
||||
|
@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "cb-calculator"
|
||||
name = "s5-cb-calculator"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
|
@ -2,11 +2,10 @@ use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum LexerErrors {
|
||||
#[error("unexpected character {char} at position {pos} in context {context}")]
|
||||
#[error("unexpected character {char} at position {pos}")]
|
||||
UnexpectedCharacter {
|
||||
char: char,
|
||||
pos: u32,
|
||||
context: String,
|
||||
pos: usize,
|
||||
},
|
||||
#[error("cannot lex an empty text sequence")]
|
||||
EmptyTextSequenceError,
|
||||
|
85
src/lexer/fsm.rs
Normal file
85
src/lexer/fsm.rs
Normal file
@ -0,0 +1,85 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::lexer::tokens::{OpType, Token, TokenMeta};
|
||||
|
||||
const STATES: [i32; 6] = [1, 2, 3, 4, 5, 6];
|
||||
const FINAL_STATES: [i32; 4] = [2, 3, 4, 5];
|
||||
const ERROR_STATE: i32 = 6;
|
||||
|
||||
/// Transitions in a matrix in the form of this:
|
||||
/// \CHR ( ) 0..=9 *|/|^|-|+ _ SPACE
|
||||
/// ST
|
||||
/// 1 2 3 4 5 6 1
|
||||
/// 2 6 6 6 6 6 6
|
||||
/// 3 6 6 6 6 6 6
|
||||
/// 4 6 6 4 6 6 6
|
||||
/// 5 6 6 6 6 6 6
|
||||
/// 6 6 6 6 6 6 6
|
||||
const TRANSITIONS: [[i32; 6]; 6] = [
|
||||
[2, 3, 4, 5, 6, 1],
|
||||
[6, 6, 6, 6, 6, 6],
|
||||
[6, 6, 6, 6, 6, 6],
|
||||
[6, 6, 4, 6, 6, 6],
|
||||
[6, 6, 6, 6, 6, 6],
|
||||
[6, 6, 6, 6, 6, 6],
|
||||
];
|
||||
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct FSM {
|
||||
state: i32,
|
||||
last: i32,
|
||||
}
|
||||
|
||||
impl FSM {
|
||||
#[inline]
|
||||
pub fn new() -> FSM {
|
||||
FSM { state: 1, last: 1 }
|
||||
}
|
||||
|
||||
pub fn is_final(&self) -> bool {
|
||||
FINAL_STATES.contains(&self.state)
|
||||
}
|
||||
|
||||
pub fn is_error(&self) -> bool {
|
||||
ERROR_STATE == self.state
|
||||
}
|
||||
|
||||
/// revert to last state
|
||||
pub fn revert(&mut self) {
|
||||
self.state = self.last;
|
||||
}
|
||||
|
||||
pub fn get_state(&self) -> i32 {
|
||||
self.state
|
||||
}
|
||||
|
||||
pub fn transition(&mut self, c: char) {
|
||||
let new_state = self.get_transition(c);
|
||||
self.last = self.state;
|
||||
self.state = new_state;
|
||||
}
|
||||
|
||||
fn get_transition(&self, c: char) -> i32 {
|
||||
let lut_col = match c {
|
||||
'(' => 0,
|
||||
')' => 1,
|
||||
'0'..='9' => 2,
|
||||
'*' | '/' | '^' | '-' | '+' => 3,
|
||||
' ' => 5,
|
||||
_ => 4,
|
||||
};
|
||||
TRANSITIONS[(&self.state - 1) as usize][lut_col]
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_token(fsm: &FSM, cbuf: &Vec<char>, meta: TokenMeta) -> Option<Token> {
|
||||
match fsm.state {
|
||||
1 | 6 => None,
|
||||
2 => Some(Token::OBR(meta)),
|
||||
3 => Some(Token::CBR(meta)),
|
||||
4 => Some(Token::ID(meta, f64::from_str(&cbuf.iter().collect::<String>()).unwrap())),
|
||||
5 => Some(Token::OP(meta, OpType::from_char(*cbuf.last().unwrap()).unwrap())),
|
||||
_ => panic!("Invalid State {}!", fsm.state),
|
||||
}
|
||||
}
|
@ -1,23 +1,29 @@
|
||||
mod errors;
|
||||
mod tokens;
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::str::Chars;
|
||||
use std::str::{Chars, FromStr};
|
||||
|
||||
use tokens::Token;
|
||||
|
||||
use crate::lexer::errors::LexerErrors;
|
||||
use crate::lexer::errors::LexerErrors::EmptyTextSequenceError;
|
||||
use crate::lexer::fsm::{FSM, get_token};
|
||||
use crate::lexer::tokens::{OpType, TokenMeta};
|
||||
|
||||
mod errors;
|
||||
mod tokens;
|
||||
mod fsm;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, errors::LexerErrors>;
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
input: String,
|
||||
chars: Chars<'a>,
|
||||
pub struct Lexer {
|
||||
input: Vec<char>,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl Lexer<'_> {
|
||||
|
||||
|
||||
impl Lexer {
|
||||
/// Create a new Lexer for the given String
|
||||
/// Example:
|
||||
/// ```
|
||||
@ -26,17 +32,77 @@ impl Lexer<'_> {
|
||||
/// lexer::Lexer::new(String::from(text));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn new(input: String) -> Lexer {
|
||||
Lexer { input, chars: input.chars() }
|
||||
pub fn new(input: &str) -> Lexer {
|
||||
Lexer { input: input.chars().collect(), pos: 0 }
|
||||
}
|
||||
|
||||
// Get the next token
|
||||
pub fn next(&mut self) -> Result<Option<Token>> {
|
||||
let mut buffer: Vec<char> = Vec::new();
|
||||
loop {
|
||||
if let Some(c) = self.chars.next() {
|
||||
buffer.push(c);
|
||||
if let Some((fsm, cbuf)) = self.longest_token_prefix() {
|
||||
if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta{ pos: self.pos }) {
|
||||
self.pos += cbuf.len();
|
||||
Ok(Some(token))
|
||||
} else {
|
||||
// TODO: handling of end of stream
|
||||
Err(LexerErrors::UnexpectedCharacter { char: self.input.get(self.pos), pos: self.pos })
|
||||
}
|
||||
} else {
|
||||
// if no more tokens are there
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn longest_token_prefix(&mut self) -> Option<(FSM, Vec<char>)> {
|
||||
let mut fsm = FSM::new();
|
||||
let mut chars = match self.input.get(self.pos..) {
|
||||
Some(slice) => slice.iter(),
|
||||
None => return None,
|
||||
};
|
||||
let mut cbuf: Vec<char> = vec![];
|
||||
|
||||
while let Some(cur) = chars.next() {
|
||||
fsm.transition(*cur);
|
||||
if fsm.is_final() {
|
||||
cbuf.push(*cur);
|
||||
} else if fsm.is_error() {
|
||||
fsm.revert();
|
||||
break
|
||||
}
|
||||
}
|
||||
Some((fsm, cbuf))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parser_test() {
|
||||
let mut lexer = Lexer::new("15/3^2+20-(5*60)");
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 0 }, 15.0)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 2 }, OpType::DIV)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 3 }, 3.0)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 4 }, OpType::POW)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 5 }, 2.0)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 6 }, OpType::ADD)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 7 }, 20.0)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 9 }, OpType::SUB)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::OBR(TokenMeta { pos: 10 })))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 11 }, 5.0)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 12 }, OpType::MUL)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 13 }, 60.0)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 15 })))));
|
||||
// println!("{:?}", lexer.next());
|
||||
assert!(matches!(lexer.next(), Ok(None)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parser_error_test() {
|
||||
let mut lexer = Lexer::new("15+@");
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta{ pos: 0 }, 15.0)))));
|
||||
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta{ pos: 2 }, OpType::ADD)))));
|
||||
assert!(matches!(lexer.next(), Err(LexerErrors::UnexpectedCharacter { char: _, pos: _ })))
|
||||
}
|
||||
}
|
||||
|
@ -6,9 +6,7 @@
|
||||
/// * Position of the *first character making up the token* in said line
|
||||
#[derive(Debug)]
|
||||
pub struct TokenMeta {
|
||||
file: String,
|
||||
line: u32,
|
||||
pos: u32,
|
||||
pub pos: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -20,6 +18,20 @@ pub enum OpType {
|
||||
POW,
|
||||
}
|
||||
|
||||
impl OpType {
|
||||
#[inline]
|
||||
pub fn from_char(c: char) -> Option<OpType> {
|
||||
match c {
|
||||
'*' => Some(OpType::MUL),
|
||||
'/' => Some(OpType::DIV),
|
||||
'+' => Some(OpType::ADD),
|
||||
'-' => Some(OpType::SUB),
|
||||
'^' => Some(OpType::POW),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Bracket types, either OPEN or CLOSE.
|
||||
#[derive(Debug)]
|
||||
pub enum BrType {
|
||||
|
@ -1,6 +1,9 @@
|
||||
use cb_calculator::lexer::Lexer;
|
||||
use s5_cb_calculator::lexer::Lexer;
|
||||
|
||||
fn main() {
|
||||
let lexer = Lexer::new("15+(30^2-5)*2/4".chars().collect());
|
||||
let mut lexer = Lexer::new("15+(30^2-5)*2/4".chars().collect());
|
||||
while let Ok(Some(token)) = lexer.next() {
|
||||
println!("Token: {:?}", token)
|
||||
}
|
||||
println!("Hello, world!");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user