diff --git a/examples/sq.t b/examples/sq.t
index fc720bca57de64c745bbff6e678b37f850037011..e62eead68a769163eb60397008fbf120aa2f26b2 100644
--- a/examples/sq.t
+++ b/examples/sq.t
@@ -8,7 +8,7 @@ fun init() {
sprint("Table of squares:\n")
i : 1
loop {
- until .i > 10
+ until .i >= 10
iprint(.i)
sprint(" squared equals ")
iprint(sq(.i))
diff --git a/src/lex/lexer.rs b/src/lex/lexer.rs
index cc2c9b9e78d8159450f0895a4257fa9497330547..c77cdcd24e5c4c94b711bf4a1f11fd95c5117336 100644
--- a/src/lex/lexer.rs
+++ b/src/lex/lexer.rs
@@ -33,6 +33,8 @@ pub enum LexerError {
InvalidEOF(usize, usize),
#[error("Unexpected EOL at {0}:{1}")]
InvalidEOL(usize, usize),
+ #[error("Invalid token {0:?} at {2}:{3}[1]")]
+ UnknownToken(Token, usize, usize, usize),
#[error("Unknown lexer error at {0}:{1}")]
Unknown(usize, usize),
}
@@ -227,6 +229,31 @@ impl Lexer {
Ok(c)
}
+ fn try_consume(&mut self) -> Result<Option<char>> {
+ if self.has_next() {
+ let c = self.curr_char;
+ self.advance()?;
+ Ok(Some(c))
+ } else {
+ Ok(None)
+ }
+ }
+
+ fn peek(&self) -> Result<Option<char>> {
+ if !self.has_next() {
+ return Ok(None);
+ }
+ let c = self.source.chars().nth(self.index + 1).unwrap();
+ trace!(
+ "Peeked character: {:?} at {}:{}[{}]",
+ c,
+ self.line_no,
+ self.col_no,
+ self.index
+ );
+ Ok(Some(c))
+ }
+
fn collect(&mut self, n: usize) -> Result<&str> {
let start = self.index;
let end = self.index + n;
@@ -284,6 +311,7 @@ impl Lexer {
}
fn get_next_token(&mut self) -> Result<Token> {
+ trace!("Getting token at {}:{}[{}]", self.line_no, self.col_no, self.index);
let start = (self.index, self.line_no, self.col_no);
let token = match self.curr_char {
n if self.is_whitespace() => {
@@ -376,9 +404,18 @@ impl Lexer {
self.advance_eol()?;
return self.get_next_token();
}
- _ => TokenType::from_char(self.consume()?),
+ _ => TokenType::from_char(self.consume()?, Some(self.curr_char))
+ }.at(start.0, start.1, start.2);
+
+ if let TokenType::Unknown(_) = token.token_type() {
+ bail!(LexerError::UnknownToken(token, self.index, self.line_no, self.col_no))
}
- .at(start.0, start.1, start.2);
+
+ if let TokenType::Syntax(_) = token.token_type() {
+ self.advance_n(token.length() - 1)?;
+ }
+
+ trace!("Returning token {:?}", token);
Ok(token)
}
}
diff --git a/src/lex/token.rs b/src/lex/token.rs
index a02a20801c7efbd1548d51638899fab17c95985f..d7f934145d174478d86994fbc35d57aa45e2cf5c 100644
--- a/src/lex/token.rs
+++ b/src/lex/token.rs
@@ -21,8 +21,8 @@ impl TokenType {
}
}
- pub fn from_char(c: char) -> Self {
- SyntaxToken::from_char(c)
+ pub fn from_char(c: char, next: Option<char>) -> Self {
+ SyntaxToken::from_char(c, next)
.map(TokenType::Syntax)
.unwrap_or_else(|| TokenType::Unknown(c))
}
@@ -66,6 +66,20 @@ impl Token {
TokenType::Literal(LiteralToken::Character(_))
)
}
+
+ pub fn length(&self) -> usize {
+ match &self.token_type {
+ TokenType::Keyword(k) => k.length(),
+ TokenType::Syntax(s) => s.length(),
+ TokenType::IdentifierToken(s) => s.len(),
+ TokenType::Literal(LiteralToken::Integer(i)) => i.to_string().len(),
+ TokenType::Literal(LiteralToken::String(s)) => s.len(),
+ TokenType::Literal(LiteralToken::Character(c)) => c.len_utf8(),
+ TokenType::Unknown(c) => c.len_utf8(),
+ TokenType::Eof => 0,
+ TokenType::NL => 1,
+ }
+ }
pub fn index(&self) -> usize {
self.index
diff --git a/src/lex/types.rs b/src/lex/types.rs
index cd7930566964bcb1ab19c5cb67ad577fa1aa6fd4..45724d65f60bf67654374b7969bf16d38576cd2e 100644
--- a/src/lex/types.rs
+++ b/src/lex/types.rs
@@ -22,6 +22,18 @@ impl KeywordToken {
_ => None,
}
}
+
+ pub fn length(&self) -> usize {
+ match self {
+ KeywordToken::Var => 3,
+ KeywordToken::Fun => 3,
+ KeywordToken::If => 2,
+ KeywordToken::Else => 4,
+ KeywordToken::Until => 5,
+ KeywordToken::Loop => 4,
+ KeywordToken::Return => 6,
+ }
+ }
}
#[derive(Debug, Eq, PartialEq, Clone)]
@@ -38,10 +50,10 @@ pub enum SyntaxToken {
Plus,
Times,
Slash,
+ Mod,
And,
Or,
Xor,
- Mod,
Eq,
Neq,
Lt,
@@ -53,28 +65,69 @@ pub enum SyntaxToken {
}
impl SyntaxToken {
- pub fn from_char(c: char) -> Option<Self> {
- match c {
- '{' => Some(SyntaxToken::LBrace),
- '}' => Some(SyntaxToken::RBrace),
- '(' => Some(SyntaxToken::LParen),
- ')' => Some(SyntaxToken::RParen),
- ':' => Some(SyntaxToken::Assign),
- ',' => Some(SyntaxToken::Comma),
- '.' => Some(SyntaxToken::Dot),
- '-' => Some(SyntaxToken::Minus),
- '!' => Some(SyntaxToken::Not),
- '+' => Some(SyntaxToken::Plus),
- '*' => Some(SyntaxToken::Times),
- '/' => Some(SyntaxToken::Slash),
- '&' => Some(SyntaxToken::And),
- '|' => Some(SyntaxToken::Or),
- '^' => Some(SyntaxToken::Xor),
- '%' => Some(SyntaxToken::Mod),
- '=' => Some(SyntaxToken::Eq),
- '<' => Some(SyntaxToken::Lt),
- '>' => Some(SyntaxToken::Gt),
+ pub fn from_char(c: char, next: Option<char>) -> Option<Self> {
+ trace!("SyntaxToken::from_char: c = {:?}, next = {:?}", c, next);
+ let token = match (c, next) {
+ ('{', _) => Some(SyntaxToken::LBrace),
+ ('}', _) => Some(SyntaxToken::RBrace),
+ ('(', _) => Some(SyntaxToken::LParen),
+ (')', _) => Some(SyntaxToken::RParen),
+ (':', _) => Some(SyntaxToken::Assign),
+ (',', _) => Some(SyntaxToken::Comma),
+ ('.', _) => Some(SyntaxToken::Dot),
+ ('-', _) => Some(SyntaxToken::Minus),
+ ('!', Some('=')) => Some(SyntaxToken::Neq),
+ ('!', _) => Some(SyntaxToken::Not),
+ ('+', _) => Some(SyntaxToken::Plus),
+ ('*', _) => Some(SyntaxToken::Times),
+ ('/', _) => Some(SyntaxToken::Slash),
+ ('%', _) => Some(SyntaxToken::Mod),
+ ('&', _) => Some(SyntaxToken::And),
+ ('|', _) => Some(SyntaxToken::Or),
+ ('^', _) => Some(SyntaxToken::Xor),
+ ('=', Some('=')) => Some(SyntaxToken::Eq),
+ ('<', Some('=')) => Some(SyntaxToken::Leq),
+ ('>', Some('=')) => Some(SyntaxToken::Geq),
+ ('<', Some('<')) => Some(SyntaxToken::LShift),
+ ('>', Some('>')) => Some(SyntaxToken::RShift),
+ ('<', _) => Some(SyntaxToken::Lt),
+ ('>', _) => Some(SyntaxToken::Gt),
_ => None,
+ };
+ if token.is_some() {
+ trace!("Found token {:?}", token);
+ } else {
+ trace!("No token found");
+ }
+ token
+ }
+
+ pub fn length(&self) -> usize {
+ match self {
+ SyntaxToken::LBrace => 1,
+ SyntaxToken::RBrace => 1,
+ SyntaxToken::LParen => 1,
+ SyntaxToken::RParen => 1,
+ SyntaxToken::Assign => 1,
+ SyntaxToken::Comma => 1,
+ SyntaxToken::Dot => 1,
+ SyntaxToken::Minus => 1,
+ SyntaxToken::Not => 1,
+ SyntaxToken::Plus => 1,
+ SyntaxToken::Times => 1,
+ SyntaxToken::Slash => 1,
+ SyntaxToken::Mod => 1,
+ SyntaxToken::And => 1,
+ SyntaxToken::Or => 1,
+ SyntaxToken::Xor => 1,
+ SyntaxToken::Eq => 2,
+ SyntaxToken::Neq => 2,
+ SyntaxToken::Lt => 1,
+ SyntaxToken::Leq => 2,
+ SyntaxToken::Gt => 1,
+ SyntaxToken::Geq => 2,
+ SyntaxToken::LShift => 2,
+ SyntaxToken::RShift => 2,
}
}
}