Skip to content
Snippets Groups Projects
Commit 24a472be authored by MarvelousAnything's avatar MarvelousAnything
Browse files

Separated token types.

parent ba8589a6
No related branches found
No related tags found
No related merge requests found
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum Keyword {
Var,
Fun,
If,
Else,
Until,
Loop,
Return,
}
impl Keyword {
pub fn from_str(s: &str) -> Option<Self> {
match s {
"var" => Some(Keyword::Var),
"fun" => Some(Keyword::Fun),
"if" => Some(Keyword::If),
"else" => Some(Keyword::Else),
"until" => Some(Keyword::Until),
"loop" => Some(Keyword::Loop),
"return" => Some(Keyword::Return),
_ => None,
}
}
}
...@@ -3,7 +3,7 @@ use crate::lex::token::{Token, TokenType}; ...@@ -3,7 +3,7 @@ use crate::lex::token::{Token, TokenType};
use anyhow::{bail, ensure, Result}; use anyhow::{bail, ensure, Result};
use log::{debug}; use log::{debug};
use thiserror::Error; use thiserror::Error;
use crate::lex::keyword::Keyword; use crate::lex::types::{KeywordToken, LiteralToken};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Lexer { pub struct Lexer {
...@@ -39,7 +39,7 @@ pub enum LexerError { ...@@ -39,7 +39,7 @@ pub enum LexerError {
#[derive(Debug)] #[derive(Debug)]
pub struct TokenStream { pub struct TokenStream {
tokens: Vec<Token>, pub tokens: Vec<Token>,
} }
impl Display for TokenStream { impl Display for TokenStream {
...@@ -110,6 +110,10 @@ impl Lexer { ...@@ -110,6 +110,10 @@ impl Lexer {
while self.has_next() { while self.has_next() {
tokens.push(self.get_next_token()?); tokens.push(self.get_next_token()?);
} }
if !self.has_next() {
debug!("No more tokens to lex");
tokens.push(TokenType::Eof.at(self.index, self.line_no, self.col_no));
}
info!("Lexed {} tokens", tokens.len()); info!("Lexed {} tokens", tokens.len());
...@@ -257,9 +261,6 @@ impl Lexer { ...@@ -257,9 +261,6 @@ impl Lexer {
} }
fn get_next_token(&mut self) -> Result<Token> { fn get_next_token(&mut self) -> Result<Token> {
if !self.has_next() {
return Ok(TokenType::Eof.at(self.index, self.line_no, self.col_no));
}
let start = (self.index, self.line_no, self.col_no); let start = (self.index, self.line_no, self.col_no);
let token = match self.curr_char { let token = match self.curr_char {
n if self.is_whitespace() => { n if self.is_whitespace() => {
...@@ -271,7 +272,7 @@ impl Lexer { ...@@ -271,7 +272,7 @@ impl Lexer {
'\n' => { '\n' => {
trace!("Found newline at {}:{}[{}]", self.line_no, self.col_no, self.index); trace!("Found newline at {}:{}[{}]", self.line_no, self.col_no, self.index);
// fold newlines into a single token // fold newlines into a single token
self.col_no = 1; self.col_no = 0;
self.line_no += 1; self.line_no += 1;
self.advance()?; self.advance()?;
TokenType::NL TokenType::NL
...@@ -280,31 +281,31 @@ impl Lexer { ...@@ -280,31 +281,31 @@ impl Lexer {
// TODO: Look into this. // TODO: Look into this.
n if n.is_alphabetic() => { n if n.is_alphabetic() => {
let identifier = self.collect_identifier()?; let identifier = self.collect_identifier()?;
if let Some(keyword) = Keyword::from_str(&identifier) { if let Some(keyword) = KeywordToken::from_str(&identifier) {
debug!("Found keyword {:?} at {}:{}[{}]", keyword, self.line_no, self.col_no, self.index); debug!("Found keyword {:?} at {}:{}[{}]", keyword, self.line_no, self.col_no, self.index);
TokenType::Keyword(keyword) TokenType::Keyword(keyword)
} else { } else {
debug!("Found identifier {:?} at {}:{}[{}]", identifier, self.line_no, self.col_no, self.index); debug!("Found identifier {:?} at {}:{}[{}]", identifier, self.line_no, self.col_no, self.index);
TokenType::Identifier(identifier) TokenType::IdentifierToken(identifier)
} }
} }
n if n.is_numeric() => { n if n.is_numeric() => {
let integer = self.collect_integer()?; let integer = self.collect_integer()?;
debug!("Collected integer: {} at {}:{}[{}] to {}:{}[{}]", integer, start.1, start.2, start.0, self.line_no, self.col_no, self.index); debug!("Collected integer: {} at {}:{}[{}] to {}:{}[{}]", integer, start.1, start.2, start.0, self.line_no, self.col_no, self.index);
TokenType::IntegerLiteral(integer) TokenType::Literal(LiteralToken::IntegerLiteral(integer))
} }
'"' => { '"' => {
self.advance()?; self.advance()?;
let string = self.collect_string()?; let string = self.collect_string()?;
debug!("Collected string: {} at {}:{}[{}] to {}:{}[{}]", string, start.1, start.2, start.0, self.line_no, self.col_no, self.index); debug!("Collected string: \"{}\" at {}:{}[{}] to {}:{}[{}]", string, start.1, start.2, start.0, self.line_no, self.col_no, self.index);
TokenType::StringLiteral(string) TokenType::Literal(LiteralToken::StringLiteral(string))
} }
'\'' => { '\'' => {
self.advance()?; self.advance()?;
let character = self.consume()?; let character = self.consume()?;
ensure!(self.consume()? == '\'', LexerError::InvalidCharacterLiteral(self.line_no, self.col_no)); ensure!(self.consume()? == '\'', LexerError::InvalidCharacterLiteral(self.line_no, self.col_no));
debug!("Collected character literal: {:?} at {}:{}[{}] to {}:{}[{}]", character, start.1, start.2, start.0, self.line_no, self.col_no, self.index); debug!("Collected character literal: {:?} at {}:{}[{}] to {}:{}[{}]", character, start.1, start.2, start.0, self.line_no, self.col_no, self.index);
TokenType::CharacterLiteral(character) TokenType::Literal(LiteralToken::CharacterLiteral(character))
} }
'#' => { '#' => {
debug!("Found comment at {}:{}[{}]", self.line_no, self.col_no, self.index); debug!("Found comment at {}:{}[{}]", self.line_no, self.col_no, self.index);
... ...
......
mod consts; pub(crate) mod consts;
mod keyword; pub(crate) mod types;
mod lexer; pub(crate) mod lexer;
mod token; pub(crate) mod token;
pub use lexer::Lexer; pub use lexer::Lexer;
\ No newline at end of file
use crate::lex::keyword::Keyword; use crate::lex::types::{KeywordToken, LiteralToken, SyntaxToken};
#[derive(Debug, Eq, PartialEq, Clone)] #[derive(Debug, Eq, PartialEq, Clone)]
pub enum TokenType { pub enum TokenType {
Keyword(Keyword), Keyword(KeywordToken),
LBrace, Syntax(SyntaxToken),
RBrace, IdentifierToken(String),
LParen, Literal(LiteralToken),
RParen,
Assign,
Comma,
Dot,
Minus,
Not,
Plus,
Times,
Slash,
And,
Or,
Xor,
Mod,
Eq,
Neq,
Lt,
Leq,
Gt,
Geq,
LShift,
RShift,
CharacterLiteral(char),
Identifier(String),
IntegerLiteral(i64),
StringLiteral(String),
Unknown(char), Unknown(char),
Eof, Eof,
NL NL
...@@ -47,27 +22,9 @@ impl TokenType { ...@@ -47,27 +22,9 @@ impl TokenType {
} }
pub fn from_char(c: char) -> Self { pub fn from_char(c: char) -> Self {
match c { SyntaxToken::from_char(c)
'{' => TokenType::LBrace, .map(TokenType::Syntax)
'}' => TokenType::RBrace, .unwrap_or_else(|| TokenType::Unknown(c))
'(' => TokenType::LParen,
')' => TokenType::RParen,
':' => TokenType::Assign,
',' => TokenType::Comma,
'.' => TokenType::Dot,
'-' => TokenType::Minus,
'!' => TokenType::Not,
'+' => TokenType::Plus,
'*' => TokenType::Times,
'/' => TokenType::Slash,
'&' => TokenType::And,
'|' => TokenType::Or,
'^' => TokenType::Xor,
'%' => TokenType::Mod,
'<' => TokenType::Lt,
'>' => TokenType::Gt,
_ => TokenType::Unknown(c),
}
} }
} }
...@@ -89,19 +46,19 @@ impl Token { ...@@ -89,19 +46,19 @@ impl Token {
} }
pub fn is_identifier(&self) -> bool { pub fn is_identifier(&self) -> bool {
matches!(self.token_type, TokenType::Identifier(_)) matches!(self.token_type, TokenType::IdentifierToken(_))
} }
pub fn is_integer_literal(&self) -> bool { pub fn is_integer_literal(&self) -> bool {
matches!(self.token_type, TokenType::IntegerLiteral(_)) matches!(self.token_type, TokenType::Literal(LiteralToken::IntegerLiteral(_)))
} }
pub fn is_string_literal(&self) -> bool { pub fn is_string_literal(&self) -> bool {
matches!(self.token_type, TokenType::StringLiteral(_)) matches!(self.token_type, TokenType::Literal(LiteralToken::StringLiteral(_)))
} }
pub fn is_character_literal(&self) -> bool { pub fn is_character_literal(&self) -> bool {
matches!(self.token_type, TokenType::CharacterLiteral(_)) matches!(self.token_type, TokenType::Literal(LiteralToken::CharacterLiteral(_)))
} }
pub fn index(&self) -> usize { pub fn index(&self) -> usize {
... ...
......
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum KeywordToken {
Var,
Fun,
If,
Else,
Until,
Loop,
Return,
}
impl KeywordToken {
pub fn from_str(s: &str) -> Option<Self> {
match s {
"var" => Some(KeywordToken::Var),
"fun" => Some(KeywordToken::Fun),
"if" => Some(KeywordToken::If),
"else" => Some(KeywordToken::Else),
"until" => Some(KeywordToken::Until),
"loop" => Some(KeywordToken::Loop),
"return" => Some(KeywordToken::Return),
_ => None,
}
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum SyntaxToken {
LBrace,
RBrace,
LParen,
RParen,
Assign,
Comma,
Dot,
Minus,
Not,
Plus,
Times,
Slash,
And,
Or,
Xor,
Mod,
Eq,
Neq,
Lt,
Leq,
Gt,
Geq,
LShift,
RShift,
}
impl SyntaxToken {
pub fn from_char(c: char) -> Option<Self> {
match c {
'{' => Some(SyntaxToken::LBrace),
'}' => Some(SyntaxToken::RBrace),
'(' => Some(SyntaxToken::LParen),
')' => Some(SyntaxToken::RParen),
':' => Some(SyntaxToken::Assign),
',' => Some(SyntaxToken::Comma),
'.' => Some(SyntaxToken::Dot),
'-' => Some(SyntaxToken::Minus),
'!' => Some(SyntaxToken::Not),
'+' => Some(SyntaxToken::Plus),
'*' => Some(SyntaxToken::Times),
'/' => Some(SyntaxToken::Slash),
'&' => Some(SyntaxToken::And),
'|' => Some(SyntaxToken::Or),
'^' => Some(SyntaxToken::Xor),
'%' => Some(SyntaxToken::Mod),
'=' => Some(SyntaxToken::Eq),
'<' => Some(SyntaxToken::Lt),
'>' => Some(SyntaxToken::Gt),
_ => None,
}
}
}
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum LiteralToken {
CharacterLiteral(char),
IntegerLiteral(i64),
StringLiteral(String),
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment