2023-02-21 09:42:41 +11:00
|
|
|
use lazy_static::lazy_static;
|
|
|
|
use regex::Regex;
|
|
|
|
use serde::{Deserialize, Serialize};
|
2023-07-20 12:38:05 +10:00
|
|
|
use wasm_bindgen::prelude::*;
|
2023-02-21 09:42:41 +11:00
|
|
|
|
|
|
|
#[wasm_bindgen]
|
|
|
|
#[derive(Debug, PartialEq, Eq, Copy, Clone, Deserialize, Serialize)]
|
|
|
|
#[serde(rename_all = "lowercase")]
|
|
|
|
pub enum TokenType {
|
|
|
|
Number,
|
|
|
|
Word,
|
|
|
|
Operator,
|
|
|
|
String,
|
|
|
|
Brace,
|
|
|
|
Whitespace,
|
|
|
|
Comma,
|
|
|
|
Colon,
|
|
|
|
Period,
|
|
|
|
LineComment,
|
|
|
|
BlockComment,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[wasm_bindgen]
|
|
|
|
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize, Clone)]
|
|
|
|
pub struct Token {
|
|
|
|
#[serde(rename = "type")]
|
|
|
|
pub token_type: TokenType,
|
|
|
|
pub start: usize,
|
|
|
|
pub end: usize,
|
|
|
|
#[wasm_bindgen(skip)]
|
|
|
|
pub value: String,
|
|
|
|
}
|
|
|
|
#[wasm_bindgen]
|
|
|
|
impl Token {
|
|
|
|
#[wasm_bindgen(constructor)]
|
|
|
|
pub fn new(token_type: TokenType, value: String, start: usize, end: usize) -> Token {
|
2023-07-20 12:38:05 +10:00
|
|
|
Token {
|
|
|
|
token_type,
|
|
|
|
value,
|
|
|
|
start,
|
|
|
|
end,
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[wasm_bindgen(getter)]
|
|
|
|
pub fn value(&self) -> String {
|
|
|
|
self.value.clone()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[wasm_bindgen(setter)]
|
|
|
|
pub fn set_value(&mut self, value: String) {
|
|
|
|
self.value = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
lazy_static! {
|
|
|
|
static ref NUMBER: Regex = Regex::new(r"^-?\d+(\.\d+)?").unwrap();
|
|
|
|
static ref WHITESPACE: Regex = Regex::new(r"\s+").unwrap();
|
|
|
|
static ref WORD: Regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*").unwrap();
|
|
|
|
static ref STRING: Regex = Regex::new(r#"^"([^"\\]|\\.)*"|'([^'\\]|\\.)*'"#).unwrap();
|
2023-07-20 12:38:05 +10:00
|
|
|
static ref OPERATOR: Regex =
|
|
|
|
Regex::new(r"^(>=|<=|==|=>|!= |\|>|\*|\+|-|/|%|=|<|>|\||\^)").unwrap();
|
2023-02-21 09:42:41 +11:00
|
|
|
static ref BLOCK_START: Regex = Regex::new(r"^\{").unwrap();
|
|
|
|
static ref BLOCK_END: Regex = Regex::new(r"^\}").unwrap();
|
2023-07-20 12:38:05 +10:00
|
|
|
static ref PARAN_START: Regex = Regex::new(r"^\(").unwrap();
|
2023-02-21 09:42:41 +11:00
|
|
|
static ref PARAN_END: Regex = Regex::new(r"^\)").unwrap();
|
|
|
|
static ref ARRAY_START: Regex = Regex::new(r"^\[").unwrap();
|
|
|
|
static ref ARRAY_END: Regex = Regex::new(r"^\]").unwrap();
|
|
|
|
static ref COMMA: Regex = Regex::new(r"^,").unwrap();
|
|
|
|
static ref COLON: Regex = Regex::new(r"^:").unwrap();
|
|
|
|
static ref PERIOD: Regex = Regex::new(r"^\.").unwrap();
|
|
|
|
static ref LINECOMMENT: Regex = Regex::new(r"^//.*").unwrap();
|
|
|
|
static ref BLOCKCOMMENT: Regex = Regex::new(r"^/\*[\s\S]*?\*/").unwrap();
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_number(character: &str) -> bool {
|
|
|
|
NUMBER.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_whitespace(character: &str) -> bool {
|
|
|
|
WHITESPACE.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_word(character: &str) -> bool {
|
|
|
|
WORD.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_string(character: &str) -> bool {
|
|
|
|
match STRING.find(character) {
|
|
|
|
Some(m) => m.start() == 0,
|
|
|
|
None => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fn is_operator(character: &str) -> bool {
|
|
|
|
OPERATOR.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_block_start(character: &str) -> bool {
|
|
|
|
BLOCK_START.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_block_end(character: &str) -> bool {
|
|
|
|
BLOCK_END.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_paran_start(character: &str) -> bool {
|
|
|
|
PARAN_START.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_paran_end(character: &str) -> bool {
|
|
|
|
PARAN_END.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_array_start(character: &str) -> bool {
|
|
|
|
ARRAY_START.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_array_end(character: &str) -> bool {
|
|
|
|
ARRAY_END.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_comma(character: &str) -> bool {
|
|
|
|
COMMA.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_colon(character: &str) -> bool {
|
|
|
|
COLON.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_period(character: &str) -> bool {
|
|
|
|
PERIOD.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_line_comment(character: &str) -> bool {
|
|
|
|
LINECOMMENT.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_block_comment(character: &str) -> bool {
|
|
|
|
BLOCKCOMMENT.is_match(character)
|
|
|
|
}
|
|
|
|
|
2023-08-07 23:04:28 -05:00
|
|
|
fn match_first(str: &str, regex: &Regex) -> Option<String> {
|
|
|
|
regex
|
|
|
|
.find(str)
|
|
|
|
.map(|the_match| the_match.as_str().to_string())
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
fn make_token(token_type: TokenType, value: &str, start: usize) -> Token {
|
|
|
|
Token {
|
|
|
|
token_type,
|
|
|
|
value: value.to_string(),
|
|
|
|
start,
|
|
|
|
end: start + value.len(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn return_token_at_index(str: &str, start_index: usize) -> Option<Token> {
|
|
|
|
let str_from_index = &str[start_index..];
|
|
|
|
if is_string(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::String,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &STRING)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
let is_line_comment_bool = is_line_comment(str_from_index);
|
|
|
|
if is_line_comment_bool || is_block_comment(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
if is_line_comment_bool {
|
|
|
|
TokenType::LineComment
|
|
|
|
} else {
|
|
|
|
TokenType::BlockComment
|
|
|
|
},
|
|
|
|
&match_first(
|
|
|
|
str_from_index,
|
|
|
|
if is_line_comment_bool {
|
|
|
|
&LINECOMMENT
|
|
|
|
} else {
|
|
|
|
&BLOCKCOMMENT
|
|
|
|
},
|
2023-08-07 23:04:28 -05:00
|
|
|
)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_paran_end(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &PARAN_END)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_paran_start(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &PARAN_START)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_block_start(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &BLOCK_START)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_block_end(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &BLOCK_END)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_array_start(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &ARRAY_START)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_array_end(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &ARRAY_END)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_comma(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Comma,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &COMMA)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_number(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Number,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &NUMBER)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_operator(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Operator,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &OPERATOR)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_word(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Word,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &WORD)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_colon(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Colon,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &COLON)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_period(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Period,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &PERIOD)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_whitespace(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Whitespace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &WHITESPACE)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
2023-08-18 19:37:52 +10:00
|
|
|
pub fn lexer(str: &str) -> Vec<Token> {
|
2023-07-20 12:38:05 +10:00
|
|
|
fn recursively_tokenise(
|
|
|
|
str: &str,
|
|
|
|
current_index: usize,
|
|
|
|
previous_tokens: Vec<Token>,
|
|
|
|
) -> Vec<Token> {
|
2023-02-21 09:42:41 +11:00
|
|
|
if current_index >= str.len() {
|
|
|
|
return previous_tokens;
|
|
|
|
}
|
|
|
|
let token = return_token_at_index(str, current_index);
|
2023-08-07 23:04:28 -05:00
|
|
|
let Some(token) = token else {
|
2023-07-20 12:38:05 +10:00
|
|
|
return recursively_tokenise(str, current_index + 1, previous_tokens);
|
2023-08-07 23:04:28 -05:00
|
|
|
};
|
2023-02-21 09:42:41 +11:00
|
|
|
let mut new_tokens = previous_tokens;
|
|
|
|
let token_length = token.value.len();
|
|
|
|
new_tokens.push(token);
|
|
|
|
recursively_tokenise(str, current_index + token_length, new_tokens)
|
|
|
|
}
|
|
|
|
recursively_tokenise(str, 0, Vec::new())
|
|
|
|
}
|
|
|
|
|
|
|
|
// wasm_bindgen wrapper for lexer
|
|
|
|
// test for this function and by extension lexer are done in javascript land src/lang/tokeniser.test.ts
|
|
|
|
#[wasm_bindgen]
|
2023-08-07 23:04:28 -05:00
|
|
|
pub fn lexer_js(str: &str) -> Result<JsValue, JsError> {
|
2023-02-21 09:42:41 +11:00
|
|
|
let tokens = lexer(str);
|
2023-08-18 19:37:52 +10:00
|
|
|
Ok(serde_wasm_bindgen::to_value(&tokens)?)
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2023-08-18 19:37:52 +10:00
|
|
|
use pretty_assertions::assert_eq;
|
2023-02-21 09:42:41 +11:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_number_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_number("1"));
|
|
|
|
assert!(is_number("1 abc"));
|
|
|
|
assert!(is_number("1abc"));
|
|
|
|
assert!(is_number("1.1"));
|
|
|
|
assert!(is_number("1.1 abc"));
|
|
|
|
assert!(!is_number("a"));
|
|
|
|
|
|
|
|
assert!(is_number("1"));
|
|
|
|
assert!(is_number("5?"));
|
|
|
|
assert!(is_number("5 + 6"));
|
|
|
|
assert!(is_number("5 + a"));
|
|
|
|
assert!(is_number("-5"));
|
|
|
|
assert!(is_number("5.5"));
|
|
|
|
assert!(is_number("-5.5"));
|
|
|
|
|
|
|
|
assert!(!is_number("a"));
|
|
|
|
assert!(!is_number("?"));
|
|
|
|
assert!(!is_number("?5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_whitespace_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_whitespace(" "));
|
|
|
|
assert!(is_whitespace(" "));
|
|
|
|
assert!(is_whitespace(" a"));
|
|
|
|
assert!(is_whitespace("a "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_whitespace("a"));
|
|
|
|
assert!(!is_whitespace("?"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_word_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_word("a"));
|
|
|
|
assert!(is_word("a "));
|
|
|
|
assert!(is_word("a5"));
|
|
|
|
assert!(is_word("a5a"));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_word("5"));
|
|
|
|
assert!(!is_word("5a"));
|
|
|
|
assert!(!is_word("5a5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_string_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_string("\"\""));
|
|
|
|
assert!(is_string("\"a\""));
|
|
|
|
assert!(is_string("\"a\" "));
|
|
|
|
assert!(is_string("\"a\"5"));
|
|
|
|
assert!(is_string("'a'5"));
|
|
|
|
assert!(is_string("\"with escaped \\\" backslash\""));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_string("\""));
|
|
|
|
assert!(!is_string("\"a"));
|
|
|
|
assert!(!is_string("a\""));
|
|
|
|
assert!(!is_string(" \"a\""));
|
|
|
|
assert!(!is_string("5\"a\""));
|
|
|
|
assert!(!is_string("a + 'str'"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_operator_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_operator("+"));
|
|
|
|
assert!(is_operator("+ "));
|
|
|
|
assert!(is_operator("-"));
|
|
|
|
assert!(is_operator("<="));
|
|
|
|
assert!(is_operator("<= "));
|
|
|
|
assert!(is_operator(">="));
|
|
|
|
assert!(is_operator(">= "));
|
|
|
|
assert!(is_operator("> "));
|
|
|
|
assert!(is_operator("< "));
|
|
|
|
assert!(is_operator("| "));
|
|
|
|
assert!(is_operator("|> "));
|
|
|
|
assert!(is_operator("^ "));
|
|
|
|
assert!(is_operator("% "));
|
|
|
|
assert!(is_operator("+* "));
|
|
|
|
|
|
|
|
assert!(!is_operator("5 + 5"));
|
|
|
|
assert!(!is_operator("a"));
|
|
|
|
assert!(!is_operator("a+"));
|
|
|
|
assert!(!is_operator("a+5"));
|
|
|
|
assert!(!is_operator("5a+5"));
|
|
|
|
assert!(!is_operator(", newVar"));
|
|
|
|
assert!(!is_operator(","));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_block_start_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_block_start("{"));
|
|
|
|
assert!(is_block_start("{ "));
|
|
|
|
assert!(is_block_start("{5"));
|
|
|
|
assert!(is_block_start("{a"));
|
|
|
|
assert!(is_block_start("{5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_block_start("5"));
|
|
|
|
assert!(!is_block_start("5 + 5"));
|
|
|
|
assert!(!is_block_start("5{ + 5"));
|
|
|
|
assert!(!is_block_start("a{ + 5"));
|
|
|
|
assert!(!is_block_start(" { + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_block_end_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_block_end("}"));
|
|
|
|
assert!(is_block_end("} "));
|
|
|
|
assert!(is_block_end("}5"));
|
|
|
|
assert!(is_block_end("}5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_block_end("5"));
|
|
|
|
assert!(!is_block_end("5 + 5"));
|
|
|
|
assert!(!is_block_end("5} + 5"));
|
|
|
|
assert!(!is_block_end(" } + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_paran_start_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_paran_start("("));
|
|
|
|
assert!(is_paran_start("( "));
|
|
|
|
assert!(is_paran_start("(5"));
|
|
|
|
assert!(is_paran_start("(5 "));
|
|
|
|
assert!(is_paran_start("(5 + 5"));
|
|
|
|
assert!(is_paran_start("(5 + 5)"));
|
|
|
|
assert!(is_paran_start("(5 + 5) "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_paran_start("5"));
|
|
|
|
assert!(!is_paran_start("5 + 5"));
|
|
|
|
assert!(!is_paran_start("5( + 5)"));
|
|
|
|
assert!(!is_paran_start(" ( + 5)"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_paran_end_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_paran_end(")"));
|
|
|
|
assert!(is_paran_end(") "));
|
|
|
|
assert!(is_paran_end(")5"));
|
|
|
|
assert!(is_paran_end(")5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_paran_end("5"));
|
|
|
|
assert!(!is_paran_end("5 + 5"));
|
|
|
|
assert!(!is_paran_end("5) + 5"));
|
|
|
|
assert!(!is_paran_end(" ) + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_comma_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_comma(","));
|
|
|
|
assert!(is_comma(", "));
|
|
|
|
assert!(is_comma(",5"));
|
|
|
|
assert!(is_comma(",5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_comma("5"));
|
|
|
|
assert!(!is_comma("5 + 5"));
|
|
|
|
assert!(!is_comma("5, + 5"));
|
|
|
|
assert!(!is_comma(" , + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_line_comment_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_line_comment("//"));
|
|
|
|
assert!(is_line_comment("// "));
|
|
|
|
assert!(is_line_comment("//5"));
|
|
|
|
assert!(is_line_comment("//5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_line_comment("5"));
|
|
|
|
assert!(!is_line_comment("5 + 5"));
|
|
|
|
assert!(!is_line_comment("5// + 5"));
|
|
|
|
assert!(!is_line_comment(" // + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_block_comment_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_block_comment("/* */"));
|
|
|
|
assert!(is_block_comment("/***/"));
|
|
|
|
assert!(is_block_comment("/*5*/"));
|
|
|
|
assert!(is_block_comment("/*5 */"));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_block_comment("/*"));
|
|
|
|
assert!(!is_block_comment("5"));
|
|
|
|
assert!(!is_block_comment("5 + 5"));
|
|
|
|
assert!(!is_block_comment("5/* + 5"));
|
|
|
|
assert!(!is_block_comment(" /* + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn make_token_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert_eq!(
|
|
|
|
make_token(TokenType::Word, "const", 56),
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Word,
|
|
|
|
value: "const".to_string(),
|
|
|
|
start: 56,
|
|
|
|
end: 61,
|
|
|
|
}
|
|
|
|
);
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn return_token_at_index_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert_eq!(
|
|
|
|
return_token_at_index("const", 0),
|
|
|
|
Some(Token {
|
|
|
|
token_type: TokenType::Word,
|
|
|
|
value: "const".to_string(),
|
|
|
|
start: 0,
|
|
|
|
end: 5,
|
|
|
|
})
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
return_token_at_index(" 4554", 2),
|
2023-02-21 09:42:41 +11:00
|
|
|
Some(Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "4554".to_string(),
|
|
|
|
start: 2,
|
|
|
|
end: 6,
|
|
|
|
})
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn lexer_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert_eq!(
|
|
|
|
lexer("const a=5"),
|
|
|
|
vec![
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Word,
|
|
|
|
value: "const".to_string(),
|
|
|
|
start: 0,
|
|
|
|
end: 5,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 5,
|
|
|
|
end: 6,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Word,
|
|
|
|
value: "a".to_string(),
|
|
|
|
start: 6,
|
|
|
|
end: 7,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Operator,
|
|
|
|
value: "=".to_string(),
|
|
|
|
start: 7,
|
|
|
|
end: 8,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "5".to_string(),
|
|
|
|
start: 8,
|
|
|
|
end: 9,
|
|
|
|
},
|
|
|
|
]
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
lexer("54 + 22500 + 6"),
|
|
|
|
vec![
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "54".to_string(),
|
|
|
|
start: 0,
|
|
|
|
end: 2,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 2,
|
|
|
|
end: 3,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Operator,
|
|
|
|
value: "+".to_string(),
|
|
|
|
start: 3,
|
|
|
|
end: 4,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 4,
|
|
|
|
end: 5,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "22500".to_string(),
|
|
|
|
start: 5,
|
|
|
|
end: 10,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 10,
|
|
|
|
end: 11,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Operator,
|
|
|
|
value: "+".to_string(),
|
|
|
|
start: 11,
|
|
|
|
end: 12,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 12,
|
|
|
|
end: 13,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "6".to_string(),
|
|
|
|
start: 13,
|
|
|
|
end: 14,
|
|
|
|
},
|
|
|
|
]
|
|
|
|
);
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
}
|