109 lines
3.7 KiB
Rust
109 lines
3.7 KiB
Rust
use std::collections::VecDeque;
|
|
use std::fs::File;
|
|
use std::path::Path;
|
|
use std::str::{Chars, FromStr};
|
|
|
|
use tokens::Token;
|
|
|
|
use crate::lexer::errors::LexerErrors;
|
|
use crate::lexer::errors::LexerErrors::EmptyTextSequenceError;
|
|
use crate::lexer::fsm::{FSM, get_token};
|
|
use crate::lexer::tokens::{OpType, TokenMeta};
|
|
|
|
mod errors;
|
|
mod tokens;
|
|
mod fsm;
|
|
|
|
pub type Result<T> = std::result::Result<T, errors::LexerErrors>;
|
|
|
|
pub struct Lexer {
|
|
input: Vec<char>,
|
|
pos: usize,
|
|
}
|
|
|
|
|
|
|
|
impl Lexer {
|
|
/// Create a new Lexer for the given String
|
|
/// Example:
|
|
/// ```
|
|
/// use cb_calculator::lexer;
|
|
/// let text = "some text";
|
|
/// lexer::Lexer::new(String::from(text));
|
|
/// ```
|
|
#[inline]
|
|
pub fn new(input: &str) -> Lexer {
|
|
Lexer { input: input.chars().collect(), pos: 0 }
|
|
}
|
|
|
|
// Get the next token
|
|
pub fn next(&mut self) -> Result<Option<Token>> {
|
|
if let Some((fsm, cbuf)) = self.longest_token_prefix() {
|
|
if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta{ pos: self.pos }) {
|
|
self.pos += cbuf.len();
|
|
Ok(Some(token))
|
|
} else {
|
|
// TODO: handling of end of stream
|
|
Err(LexerErrors::UnexpectedCharacter { char: self.input.get(self.pos), pos: self.pos })
|
|
}
|
|
} else {
|
|
// if no more tokens are there
|
|
Ok(None)
|
|
}
|
|
}
|
|
|
|
fn longest_token_prefix(&mut self) -> Option<(FSM, Vec<char>)> {
|
|
let mut fsm = FSM::new();
|
|
let mut chars = match self.input.get(self.pos..) {
|
|
Some(slice) => slice.iter(),
|
|
None => return None,
|
|
};
|
|
let mut cbuf: Vec<char> = vec![];
|
|
|
|
while let Some(cur) = chars.next() {
|
|
fsm.transition(*cur);
|
|
if fsm.is_final() {
|
|
cbuf.push(*cur);
|
|
} else if fsm.is_error() {
|
|
fsm.revert();
|
|
break
|
|
}
|
|
}
|
|
Some((fsm, cbuf))
|
|
}
|
|
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn parser_test() {
|
|
let mut lexer = Lexer::new("15/3^2+20-(5*60)");
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 0 }, 15.0)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 2 }, OpType::DIV)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 3 }, 3.0)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 4 }, OpType::POW)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 5 }, 2.0)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 6 }, OpType::ADD)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 7 }, 20.0)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 9 }, OpType::SUB)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::OBR(TokenMeta { pos: 10 })))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 11 }, 5.0)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 12 }, OpType::MUL)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 13 }, 60.0)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 15 })))));
|
|
// println!("{:?}", lexer.next());
|
|
assert!(matches!(lexer.next(), Ok(None)));
|
|
}
|
|
|
|
#[test]
|
|
fn parser_error_test() {
|
|
let mut lexer = Lexer::new("15+@");
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta{ pos: 0 }, 15.0)))));
|
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta{ pos: 2 }, OpType::ADD)))));
|
|
assert!(matches!(lexer.next(), Err(LexerErrors::UnexpectedCharacter { char: _, pos: _ })))
|
|
}
|
|
}
|