diff --git a/src/lexer/errors.rs b/src/lexer/errors.rs index a08118c..457fe14 100644 --- a/src/lexer/errors.rs +++ b/src/lexer/errors.rs @@ -2,9 +2,9 @@ use thiserror::Error; #[derive(Error, Debug)] pub enum LexerErrors { - #[error("unexpected character {char} at position {pos}")] - UnexpectedCharacter { - char: char, + #[error("unexpected character {token} at position {pos}")] + InvalidTokenError { + token: String, pos: usize, }, #[error("cannot lex an empty text sequence")] diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 988871c..b238760 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -19,10 +19,10 @@ pub type Result = std::result::Result; pub struct Lexer { input: Vec, pos: usize, + read_pointer: usize, } - impl Lexer { /// Create a new Lexer for the given String /// Example: @@ -33,18 +33,30 @@ impl Lexer { /// ``` #[inline] pub fn new(input: &str) -> Lexer { - Lexer { input: input.chars().collect(), pos: 0 } + Lexer { input: input.chars().collect(), pos: 0, read_pointer: 0 } } // Get the next token pub fn next(&mut self) -> Result> { if let Some((fsm, cbuf)) = self.longest_token_prefix() { - if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta{ pos: self.pos }) { - self.pos += cbuf.len(); + if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta { pos: self.pos }) { + // if a token could be derived + self.pos = self.read_pointer; Ok(Some(token)) } else { - // TODO: handling of end of stream - Err(LexerErrors::UnexpectedCharacter { char: self.input.get(self.pos), pos: self.pos }) + // If no token could be found + if self.input.len() > self.read_pointer { + // If not at the end of the input + Err(LexerErrors::InvalidTokenError { + token: self.input.get(self.pos..).unwrap() + .iter() + .take(5) + .collect(), + pos: self.pos }) + } else { + // If at the end of the input + Ok(None) + } } } else { // if no more tokens are there @@ -66,14 +78,15 @@ impl Lexer { cbuf.push(*cur); } else if fsm.is_error() { fsm.revert(); - break + break; } + self.read_pointer += 1; } Some((fsm, cbuf)) } - } +#[warn(dead_code)] #[cfg(test)] mod test { use super::*; @@ -94,7 +107,6 @@ mod test { assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 12 }, OpType::MUL))))); assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 13 }, 60.0))))); assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 15 }))))); - // println!("{:?}", lexer.next()); assert!(matches!(lexer.next(), Ok(None))); } @@ -103,6 +115,25 @@ mod test { let mut lexer = Lexer::new("15+@"); assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta{ pos: 0 }, 15.0))))); assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta{ pos: 2 }, OpType::ADD))))); - assert!(matches!(lexer.next(), Err(LexerErrors::UnexpectedCharacter { char: _, pos: _ }))) + assert!(matches!(lexer.next(), Err(LexerErrors::InvalidTokenError { token: _, pos: _ }))) } -} + + #[test] + fn spaces_are_ignored() { + let mut lexer = Lexer::new(" 15 / 3 ^ 2 + 20 -( 5 * 60 ) "); + assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 0 }, 15.0))))); + assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 4 }, OpType::DIV))))); + assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 6 }, 3.0))))); + assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 8 }, OpType::POW))))); + assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 11 }, 2.0))))); + assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 13}, OpType::ADD))))); + assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 15 }, 20.0))))); + assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 22 }, OpType::SUB))))); + assert!(matches!(lexer.next(), Ok(Some(Token::OBR(TokenMeta { pos: 24 }))))); + assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 25 }, 5.0))))); + assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 30 }, OpType::MUL))))); + assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 33 }, 60.0))))); + assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 36 }))))); + assert!(matches!(lexer.next(), Ok(None))); + } + }