Skip to content
Snippets Groups Projects
Commit 4d7584c5 authored by MarvelousAnything's avatar MarvelousAnything
Browse files

Made it so lexer does lookahead for syntax token

parent 0f8f1650
Branches
No related tags found
No related merge requests found
......@@ -8,7 +8,7 @@ fun init() {
sprint("Table of squares:\n")
i : 1
loop {
until .i > 10
until .i >= 10
iprint(.i)
sprint(" squared equals ")
iprint(sq(.i))
......
......@@ -33,6 +33,8 @@ pub enum LexerError {
InvalidEOF(usize, usize),
#[error("Unexpected EOL at {0}:{1}")]
InvalidEOL(usize, usize),
#[error("Invalid token {0:?} at {2}:{3}[1]")]
UnknownToken(Token, usize, usize, usize),
#[error("Unknown lexer error at {0}:{1}")]
Unknown(usize, usize),
}
......@@ -227,6 +229,31 @@ impl Lexer {
Ok(c)
}
fn try_consume(&mut self) -> Result<Option<char>> {
if self.has_next() {
let c = self.curr_char;
self.advance()?;
Ok(Some(c))
} else {
Ok(None)
}
}
fn peek(&self) -> Result<Option<char>> {
if !self.has_next() {
return Ok(None);
}
let c = self.source.chars().nth(self.index + 1).unwrap();
trace!(
"Peeked character: {:?} at {}:{}[{}]",
c,
self.line_no,
self.col_no,
self.index
);
Ok(Some(c))
}
fn collect(&mut self, n: usize) -> Result<&str> {
let start = self.index;
let end = self.index + n;
......@@ -284,6 +311,7 @@ impl Lexer {
}
fn get_next_token(&mut self) -> Result<Token> {
trace!("Getting token at {}:{}[{}]", self.line_no, self.col_no, self.index);
let start = (self.index, self.line_no, self.col_no);
let token = match self.curr_char {
n if self.is_whitespace() => {
......@@ -376,9 +404,18 @@ impl Lexer {
self.advance_eol()?;
return self.get_next_token();
}
_ => TokenType::from_char(self.consume()?),
_ => TokenType::from_char(self.consume()?, Some(self.curr_char))
}.at(start.0, start.1, start.2);
if let TokenType::Unknown(_) = token.token_type() {
bail!(LexerError::UnknownToken(token, self.index, self.line_no, self.col_no))
}
.at(start.0, start.1, start.2);
if let TokenType::Syntax(_) = token.token_type() {
self.advance_n(token.length() - 1)?;
}
trace!("Returning token {:?}", token);
Ok(token)
}
}
......
......@@ -21,8 +21,8 @@ impl TokenType {
}
}
pub fn from_char(c: char) -> Self {
SyntaxToken::from_char(c)
pub fn from_char(c: char, next: Option<char>) -> Self {
SyntaxToken::from_char(c, next)
.map(TokenType::Syntax)
.unwrap_or_else(|| TokenType::Unknown(c))
}
......@@ -67,6 +67,20 @@ impl Token {
)
}
pub fn length(&self) -> usize {
match &self.token_type {
TokenType::Keyword(k) => k.length(),
TokenType::Syntax(s) => s.length(),
TokenType::IdentifierToken(s) => s.len(),
TokenType::Literal(LiteralToken::Integer(i)) => i.to_string().len(),
TokenType::Literal(LiteralToken::String(s)) => s.len(),
TokenType::Literal(LiteralToken::Character(c)) => c.len_utf8(),
TokenType::Unknown(c) => c.len_utf8(),
TokenType::Eof => 0,
TokenType::NL => 1,
}
}
pub fn index(&self) -> usize {
self.index
}
......
......@@ -22,6 +22,18 @@ impl KeywordToken {
_ => None,
}
}
pub fn length(&self) -> usize {
match self {
KeywordToken::Var => 3,
KeywordToken::Fun => 3,
KeywordToken::If => 2,
KeywordToken::Else => 4,
KeywordToken::Until => 5,
KeywordToken::Loop => 4,
KeywordToken::Return => 6,
}
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
......@@ -38,10 +50,10 @@ pub enum SyntaxToken {
Plus,
Times,
Slash,
Mod,
And,
Or,
Xor,
Mod,
Eq,
Neq,
Lt,
......@@ -53,28 +65,69 @@ pub enum SyntaxToken {
}
impl SyntaxToken {
pub fn from_char(c: char) -> Option<Self> {
match c {
'{' => Some(SyntaxToken::LBrace),
'}' => Some(SyntaxToken::RBrace),
'(' => Some(SyntaxToken::LParen),
')' => Some(SyntaxToken::RParen),
':' => Some(SyntaxToken::Assign),
',' => Some(SyntaxToken::Comma),
'.' => Some(SyntaxToken::Dot),
'-' => Some(SyntaxToken::Minus),
'!' => Some(SyntaxToken::Not),
'+' => Some(SyntaxToken::Plus),
'*' => Some(SyntaxToken::Times),
'/' => Some(SyntaxToken::Slash),
'&' => Some(SyntaxToken::And),
'|' => Some(SyntaxToken::Or),
'^' => Some(SyntaxToken::Xor),
'%' => Some(SyntaxToken::Mod),
'=' => Some(SyntaxToken::Eq),
'<' => Some(SyntaxToken::Lt),
'>' => Some(SyntaxToken::Gt),
pub fn from_char(c: char, next: Option<char>) -> Option<Self> {
trace!("SyntaxToken::from_char: c = {:?}, next = {:?}", c, next);
let token = match (c, next) {
('{', _) => Some(SyntaxToken::LBrace),
('}', _) => Some(SyntaxToken::RBrace),
('(', _) => Some(SyntaxToken::LParen),
(')', _) => Some(SyntaxToken::RParen),
(':', _) => Some(SyntaxToken::Assign),
(',', _) => Some(SyntaxToken::Comma),
('.', _) => Some(SyntaxToken::Dot),
('-', _) => Some(SyntaxToken::Minus),
('!', Some('=')) => Some(SyntaxToken::Neq),
('!', _) => Some(SyntaxToken::Not),
('+', _) => Some(SyntaxToken::Plus),
('*', _) => Some(SyntaxToken::Times),
('/', _) => Some(SyntaxToken::Slash),
('%', _) => Some(SyntaxToken::Mod),
('&', _) => Some(SyntaxToken::And),
('|', _) => Some(SyntaxToken::Or),
('^', _) => Some(SyntaxToken::Xor),
('=', Some('=')) => Some(SyntaxToken::Eq),
('<', Some('=')) => Some(SyntaxToken::Leq),
('>', Some('=')) => Some(SyntaxToken::Geq),
('<', Some('<')) => Some(SyntaxToken::LShift),
('>', Some('>')) => Some(SyntaxToken::RShift),
('<', _) => Some(SyntaxToken::Lt),
('>', _) => Some(SyntaxToken::Gt),
_ => None,
};
if token.is_some() {
trace!("Found token {:?}", token);
} else {
trace!("No token found");
}
token
}
pub fn length(&self) -> usize {
match self {
SyntaxToken::LBrace => 1,
SyntaxToken::RBrace => 1,
SyntaxToken::LParen => 1,
SyntaxToken::RParen => 1,
SyntaxToken::Assign => 1,
SyntaxToken::Comma => 1,
SyntaxToken::Dot => 1,
SyntaxToken::Minus => 1,
SyntaxToken::Not => 1,
SyntaxToken::Plus => 1,
SyntaxToken::Times => 1,
SyntaxToken::Slash => 1,
SyntaxToken::Mod => 1,
SyntaxToken::And => 1,
SyntaxToken::Or => 1,
SyntaxToken::Xor => 1,
SyntaxToken::Eq => 2,
SyntaxToken::Neq => 2,
SyntaxToken::Lt => 1,
SyntaxToken::Leq => 2,
SyntaxToken::Gt => 1,
SyntaxToken::Geq => 2,
SyntaxToken::LShift => 2,
SyntaxToken::RShift => 2,
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment