fix: fixed issues with errors and positions while lexing

This commit is contained in:
Yandrik 2021-11-29 20:45:54 +01:00
parent e1ae39199b
commit 8df8689ca4
2 changed files with 45 additions and 14 deletions

View File

@ -2,9 +2,9 @@ use thiserror::Error;
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum LexerErrors { pub enum LexerErrors {
#[error("unexpected character {char} at position {pos}")] #[error("unexpected character {token} at position {pos}")]
UnexpectedCharacter { InvalidTokenError {
char: char, token: String,
pos: usize, pos: usize,
}, },
#[error("cannot lex an empty text sequence")] #[error("cannot lex an empty text sequence")]

View File

@ -19,10 +19,10 @@ pub type Result<T> = std::result::Result<T, errors::LexerErrors>;
pub struct Lexer { pub struct Lexer {
input: Vec<char>, input: Vec<char>,
pos: usize, pos: usize,
read_pointer: usize,
} }
impl Lexer { impl Lexer {
/// Create a new Lexer for the given String /// Create a new Lexer for the given String
/// Example: /// Example:
@ -33,18 +33,30 @@ impl Lexer {
/// ``` /// ```
#[inline] #[inline]
pub fn new(input: &str) -> Lexer { pub fn new(input: &str) -> Lexer {
Lexer { input: input.chars().collect(), pos: 0 } Lexer { input: input.chars().collect(), pos: 0, read_pointer: 0 }
} }
// Get the next token // Get the next token
pub fn next(&mut self) -> Result<Option<Token>> { pub fn next(&mut self) -> Result<Option<Token>> {
if let Some((fsm, cbuf)) = self.longest_token_prefix() { if let Some((fsm, cbuf)) = self.longest_token_prefix() {
if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta{ pos: self.pos }) { if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta { pos: self.pos }) {
self.pos += cbuf.len(); // if a token could be derived
self.pos = self.read_pointer;
Ok(Some(token)) Ok(Some(token))
} else { } else {
// TODO: handling of end of stream // If no token could be found
Err(LexerErrors::UnexpectedCharacter { char: self.input.get(self.pos), pos: self.pos }) if self.input.len() > self.read_pointer {
// If not at the end of the input
Err(LexerErrors::InvalidTokenError {
token: self.input.get(self.pos..).unwrap()
.iter()
.take(5)
.collect(),
pos: self.pos })
} else {
// If at the end of the input
Ok(None)
}
} }
} else { } else {
// if no more tokens are there // if no more tokens are there
@ -66,14 +78,15 @@ impl Lexer {
cbuf.push(*cur); cbuf.push(*cur);
} else if fsm.is_error() { } else if fsm.is_error() {
fsm.revert(); fsm.revert();
break break;
} }
self.read_pointer += 1;
} }
Some((fsm, cbuf)) Some((fsm, cbuf))
} }
} }
#[warn(dead_code)]
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::*; use super::*;
@ -94,7 +107,6 @@ mod test {
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 12 }, OpType::MUL))))); assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 12 }, OpType::MUL)))));
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 13 }, 60.0))))); assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 13 }, 60.0)))));
assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 15 }))))); assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 15 })))));
// println!("{:?}", lexer.next());
assert!(matches!(lexer.next(), Ok(None))); assert!(matches!(lexer.next(), Ok(None)));
} }
@ -103,6 +115,25 @@ mod test {
let mut lexer = Lexer::new("15+@"); let mut lexer = Lexer::new("15+@");
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta{ pos: 0 }, 15.0))))); assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta{ pos: 0 }, 15.0)))));
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta{ pos: 2 }, OpType::ADD))))); assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta{ pos: 2 }, OpType::ADD)))));
assert!(matches!(lexer.next(), Err(LexerErrors::UnexpectedCharacter { char: _, pos: _ }))) assert!(matches!(lexer.next(), Err(LexerErrors::InvalidTokenError { token: _, pos: _ })))
} }
}
#[test]
fn spaces_are_ignored() {
let mut lexer = Lexer::new(" 15 / 3 ^ 2 + 20 -( 5 * 60 ) ");
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 0 }, 15.0)))));
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 4 }, OpType::DIV)))));
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 6 }, 3.0)))));
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 8 }, OpType::POW)))));
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 11 }, 2.0)))));
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 13}, OpType::ADD)))));
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 15 }, 20.0)))));
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 22 }, OpType::SUB)))));
assert!(matches!(lexer.next(), Ok(Some(Token::OBR(TokenMeta { pos: 24 })))));
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 25 }, 5.0)))));
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 30 }, OpType::MUL)))));
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 33 }, 60.0)))));
assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 36 })))));
assert!(matches!(lexer.next(), Ok(None)));
}
}