fix: fixed issues with errors and positions while lexing
This commit is contained in:
parent
e1ae39199b
commit
8df8689ca4
@ -2,9 +2,9 @@ use thiserror::Error;
|
|||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum LexerErrors {
|
pub enum LexerErrors {
|
||||||
#[error("unexpected character {char} at position {pos}")]
|
#[error("unexpected character {token} at position {pos}")]
|
||||||
UnexpectedCharacter {
|
InvalidTokenError {
|
||||||
char: char,
|
token: String,
|
||||||
pos: usize,
|
pos: usize,
|
||||||
},
|
},
|
||||||
#[error("cannot lex an empty text sequence")]
|
#[error("cannot lex an empty text sequence")]
|
||||||
|
@ -19,10 +19,10 @@ pub type Result<T> = std::result::Result<T, errors::LexerErrors>;
|
|||||||
pub struct Lexer {
|
pub struct Lexer {
|
||||||
input: Vec<char>,
|
input: Vec<char>,
|
||||||
pos: usize,
|
pos: usize,
|
||||||
|
read_pointer: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
impl Lexer {
|
impl Lexer {
|
||||||
/// Create a new Lexer for the given String
|
/// Create a new Lexer for the given String
|
||||||
/// Example:
|
/// Example:
|
||||||
@ -33,18 +33,30 @@ impl Lexer {
|
|||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(input: &str) -> Lexer {
|
pub fn new(input: &str) -> Lexer {
|
||||||
Lexer { input: input.chars().collect(), pos: 0 }
|
Lexer { input: input.chars().collect(), pos: 0, read_pointer: 0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the next token
|
// Get the next token
|
||||||
pub fn next(&mut self) -> Result<Option<Token>> {
|
pub fn next(&mut self) -> Result<Option<Token>> {
|
||||||
if let Some((fsm, cbuf)) = self.longest_token_prefix() {
|
if let Some((fsm, cbuf)) = self.longest_token_prefix() {
|
||||||
if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta{ pos: self.pos }) {
|
if let Some(token) = fsm::get_token(&fsm, &cbuf, TokenMeta { pos: self.pos }) {
|
||||||
self.pos += cbuf.len();
|
// if a token could be derived
|
||||||
|
self.pos = self.read_pointer;
|
||||||
Ok(Some(token))
|
Ok(Some(token))
|
||||||
} else {
|
} else {
|
||||||
// TODO: handling of end of stream
|
// If no token could be found
|
||||||
Err(LexerErrors::UnexpectedCharacter { char: self.input.get(self.pos), pos: self.pos })
|
if self.input.len() > self.read_pointer {
|
||||||
|
// If not at the end of the input
|
||||||
|
Err(LexerErrors::InvalidTokenError {
|
||||||
|
token: self.input.get(self.pos..).unwrap()
|
||||||
|
.iter()
|
||||||
|
.take(5)
|
||||||
|
.collect(),
|
||||||
|
pos: self.pos })
|
||||||
|
} else {
|
||||||
|
// If at the end of the input
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if no more tokens are there
|
// if no more tokens are there
|
||||||
@ -66,14 +78,15 @@ impl Lexer {
|
|||||||
cbuf.push(*cur);
|
cbuf.push(*cur);
|
||||||
} else if fsm.is_error() {
|
} else if fsm.is_error() {
|
||||||
fsm.revert();
|
fsm.revert();
|
||||||
break
|
break;
|
||||||
}
|
}
|
||||||
|
self.read_pointer += 1;
|
||||||
}
|
}
|
||||||
Some((fsm, cbuf))
|
Some((fsm, cbuf))
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[warn(dead_code)]
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -94,7 +107,6 @@ mod test {
|
|||||||
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 12 }, OpType::MUL)))));
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 12 }, OpType::MUL)))));
|
||||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 13 }, 60.0)))));
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 13 }, 60.0)))));
|
||||||
assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 15 })))));
|
assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 15 })))));
|
||||||
// println!("{:?}", lexer.next());
|
|
||||||
assert!(matches!(lexer.next(), Ok(None)));
|
assert!(matches!(lexer.next(), Ok(None)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,6 +115,25 @@ mod test {
|
|||||||
let mut lexer = Lexer::new("15+@");
|
let mut lexer = Lexer::new("15+@");
|
||||||
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta{ pos: 0 }, 15.0)))));
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta{ pos: 0 }, 15.0)))));
|
||||||
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta{ pos: 2 }, OpType::ADD)))));
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta{ pos: 2 }, OpType::ADD)))));
|
||||||
assert!(matches!(lexer.next(), Err(LexerErrors::UnexpectedCharacter { char: _, pos: _ })))
|
assert!(matches!(lexer.next(), Err(LexerErrors::InvalidTokenError { token: _, pos: _ })))
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
#[test]
|
||||||
|
fn spaces_are_ignored() {
|
||||||
|
let mut lexer = Lexer::new(" 15 / 3 ^ 2 + 20 -( 5 * 60 ) ");
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 0 }, 15.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 4 }, OpType::DIV)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 6 }, 3.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 8 }, OpType::POW)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 11 }, 2.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 13}, OpType::ADD)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 15 }, 20.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 22 }, OpType::SUB)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OBR(TokenMeta { pos: 24 })))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 25 }, 5.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::OP(TokenMeta { pos: 30 }, OpType::MUL)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::ID(TokenMeta { pos: 33 }, 60.0)))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(Some(Token::CBR(TokenMeta { pos: 36 })))));
|
||||||
|
assert!(matches!(lexer.next(), Ok(None)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user