2023-09-05 16:02:27 -07:00
|
|
|
use std::str::FromStr;
|
|
|
|
|
|
|
|
use anyhow::Result;
|
2023-02-21 09:42:41 +11:00
|
|
|
use lazy_static::lazy_static;
|
2023-09-05 16:02:27 -07:00
|
|
|
use parse_display::{Display, FromStr};
|
2023-02-21 09:42:41 +11:00
|
|
|
use regex::Regex;
|
2023-09-05 16:02:27 -07:00
|
|
|
use schemars::JsonSchema;
|
2023-02-21 09:42:41 +11:00
|
|
|
use serde::{Deserialize, Serialize};
|
2023-09-05 16:02:27 -07:00
|
|
|
use tower_lsp::lsp_types::SemanticTokenType;
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-09-05 16:02:27 -07:00
|
|
|
/// The types of tokens.
|
|
|
|
#[derive(Debug, PartialEq, Eq, Copy, Clone, Deserialize, Serialize, ts_rs::TS, JsonSchema, FromStr, Display)]
|
2023-08-19 23:18:54 -07:00
|
|
|
#[ts(export)]
|
2023-09-05 16:02:27 -07:00
|
|
|
#[serde(rename_all = "camelCase")]
|
|
|
|
#[display(style = "camelCase")]
|
2023-02-21 09:42:41 +11:00
|
|
|
pub enum TokenType {
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A number.
|
2023-02-21 09:42:41 +11:00
|
|
|
Number,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A word.
|
2023-02-21 09:42:41 +11:00
|
|
|
Word,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// An operator.
|
2023-02-21 09:42:41 +11:00
|
|
|
Operator,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A string.
|
2023-02-21 09:42:41 +11:00
|
|
|
String,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A keyword.
|
|
|
|
Keyword,
|
|
|
|
/// A brace.
|
2023-02-21 09:42:41 +11:00
|
|
|
Brace,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// Whitespace.
|
2023-02-21 09:42:41 +11:00
|
|
|
Whitespace,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A comma.
|
2023-02-21 09:42:41 +11:00
|
|
|
Comma,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A colon.
|
2023-02-21 09:42:41 +11:00
|
|
|
Colon,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A period.
|
2023-02-21 09:42:41 +11:00
|
|
|
Period,
|
2023-09-13 10:03:28 -07:00
|
|
|
/// A double period: `..`.
|
|
|
|
DoublePeriod,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A line comment.
|
2023-02-21 09:42:41 +11:00
|
|
|
LineComment,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A block comment.
|
2023-02-21 09:42:41 +11:00
|
|
|
BlockComment,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A function name.
|
|
|
|
Function,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TryFrom<TokenType> for SemanticTokenType {
|
|
|
|
type Error = anyhow::Error;
|
|
|
|
fn try_from(token_type: TokenType) -> Result<Self> {
|
|
|
|
Ok(match token_type {
|
|
|
|
TokenType::Number => Self::NUMBER,
|
|
|
|
TokenType::Word => Self::VARIABLE,
|
|
|
|
TokenType::Keyword => Self::KEYWORD,
|
|
|
|
TokenType::Operator => Self::OPERATOR,
|
|
|
|
TokenType::String => Self::STRING,
|
|
|
|
TokenType::LineComment => Self::COMMENT,
|
|
|
|
TokenType::BlockComment => Self::COMMENT,
|
|
|
|
TokenType::Function => Self::FUNCTION,
|
2023-09-13 10:03:28 -07:00
|
|
|
TokenType::Whitespace
|
|
|
|
| TokenType::Brace
|
|
|
|
| TokenType::Comma
|
|
|
|
| TokenType::Colon
|
|
|
|
| TokenType::Period
|
|
|
|
| TokenType::DoublePeriod => {
|
2023-09-05 16:02:27 -07:00
|
|
|
anyhow::bail!("unsupported token type: {:?}", token_type)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TokenType {
|
|
|
|
// This is for the lsp server.
|
|
|
|
pub fn to_semantic_token_types() -> Result<Vec<SemanticTokenType>> {
|
|
|
|
let mut settings = schemars::gen::SchemaSettings::openapi3();
|
|
|
|
settings.inline_subschemas = true;
|
|
|
|
let mut generator = schemars::gen::SchemaGenerator::new(settings);
|
|
|
|
|
|
|
|
let schema = TokenType::json_schema(&mut generator);
|
|
|
|
let schemars::schema::Schema::Object(o) = &schema else {
|
|
|
|
anyhow::bail!("expected object schema: {:#?}", schema);
|
|
|
|
};
|
|
|
|
let Some(subschemas) = &o.subschemas else {
|
|
|
|
anyhow::bail!("expected subschemas: {:#?}", schema);
|
|
|
|
};
|
|
|
|
let Some(one_ofs) = &subschemas.one_of else {
|
|
|
|
anyhow::bail!("expected one_of: {:#?}", schema);
|
|
|
|
};
|
|
|
|
|
|
|
|
let mut semantic_tokens = vec![];
|
|
|
|
for one_of in one_ofs {
|
|
|
|
let schemars::schema::Schema::Object(o) = one_of else {
|
|
|
|
anyhow::bail!("expected object one_of: {:#?}", one_of);
|
|
|
|
};
|
|
|
|
|
|
|
|
let Some(enum_values) = o.enum_values.as_ref() else {
|
|
|
|
anyhow::bail!("expected enum values: {:#?}", o);
|
|
|
|
};
|
|
|
|
|
|
|
|
if enum_values.len() > 1 {
|
|
|
|
anyhow::bail!("expected only one enum value: {:#?}", o);
|
|
|
|
}
|
|
|
|
|
|
|
|
if enum_values.is_empty() {
|
|
|
|
anyhow::bail!("expected at least one enum value: {:#?}", o);
|
|
|
|
}
|
|
|
|
|
|
|
|
let label = TokenType::from_str(&enum_values[0].to_string().replace('"', ""))?;
|
|
|
|
if let Ok(semantic_token_type) = SemanticTokenType::try_from(label) {
|
|
|
|
semantic_tokens.push(semantic_token_type);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(semantic_tokens)
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
2023-08-19 23:18:54 -07:00
|
|
|
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize, Clone, ts_rs::TS)]
|
|
|
|
#[ts(export)]
|
2023-02-21 09:42:41 +11:00
|
|
|
pub struct Token {
|
|
|
|
#[serde(rename = "type")]
|
|
|
|
pub token_type: TokenType,
|
|
|
|
pub start: usize,
|
|
|
|
pub end: usize,
|
|
|
|
pub value: String,
|
|
|
|
}
|
|
|
|
|
2023-08-24 15:34:51 -07:00
|
|
|
impl From<Token> for crate::executor::SourceRange {
|
|
|
|
fn from(token: Token) -> Self {
|
|
|
|
Self([token.start, token.end])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&Token> for crate::executor::SourceRange {
|
|
|
|
fn from(token: &Token) -> Self {
|
|
|
|
Self([token.start, token.end])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-21 09:42:41 +11:00
|
|
|
lazy_static! {
|
2023-09-13 13:10:55 -07:00
|
|
|
static ref NUMBER: Regex = Regex::new(r"^(\d+(\.\d*)?|\.\d+)\b").unwrap();
|
2023-02-21 09:42:41 +11:00
|
|
|
static ref WHITESPACE: Regex = Regex::new(r"\s+").unwrap();
|
|
|
|
static ref WORD: Regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*").unwrap();
|
2023-09-05 16:02:27 -07:00
|
|
|
// TODO: these should be generated using our struct types for these.
|
|
|
|
static ref KEYWORD: Regex =
|
2023-09-13 10:51:03 -07:00
|
|
|
Regex::new(r"^(if|else|for|while|return|break|continue|fn|let|mut|loop|true|false|nil|and|or|not|var|const)\b").unwrap();
|
2023-08-29 14:12:48 -07:00
|
|
|
static ref OPERATOR: Regex = Regex::new(r"^(>=|<=|==|=>|!= |\|>|\*|\+|-|/|%|=|<|>|\||\^)").unwrap();
|
2023-09-05 16:02:27 -07:00
|
|
|
static ref STRING: Regex = Regex::new(r#"^"([^"\\]|\\.)*"|'([^'\\]|\\.)*'"#).unwrap();
|
2023-02-21 09:42:41 +11:00
|
|
|
static ref BLOCK_START: Regex = Regex::new(r"^\{").unwrap();
|
|
|
|
static ref BLOCK_END: Regex = Regex::new(r"^\}").unwrap();
|
2023-07-20 12:38:05 +10:00
|
|
|
static ref PARAN_START: Regex = Regex::new(r"^\(").unwrap();
|
2023-02-21 09:42:41 +11:00
|
|
|
static ref PARAN_END: Regex = Regex::new(r"^\)").unwrap();
|
|
|
|
static ref ARRAY_START: Regex = Regex::new(r"^\[").unwrap();
|
|
|
|
static ref ARRAY_END: Regex = Regex::new(r"^\]").unwrap();
|
|
|
|
static ref COMMA: Regex = Regex::new(r"^,").unwrap();
|
|
|
|
static ref COLON: Regex = Regex::new(r"^:").unwrap();
|
|
|
|
static ref PERIOD: Regex = Regex::new(r"^\.").unwrap();
|
2023-09-13 10:03:28 -07:00
|
|
|
static ref DOUBLE_PERIOD: Regex = Regex::new(r"^\.\.").unwrap();
|
2023-02-21 09:42:41 +11:00
|
|
|
static ref LINECOMMENT: Regex = Regex::new(r"^//.*").unwrap();
|
|
|
|
static ref BLOCKCOMMENT: Regex = Regex::new(r"^/\*[\s\S]*?\*/").unwrap();
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_number(character: &str) -> bool {
|
|
|
|
NUMBER.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_whitespace(character: &str) -> bool {
|
|
|
|
WHITESPACE.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_word(character: &str) -> bool {
|
|
|
|
WORD.is_match(character)
|
|
|
|
}
|
2023-09-05 16:02:27 -07:00
|
|
|
fn is_keyword(character: &str) -> bool {
|
|
|
|
KEYWORD.is_match(character)
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
fn is_string(character: &str) -> bool {
|
|
|
|
match STRING.find(character) {
|
|
|
|
Some(m) => m.start() == 0,
|
|
|
|
None => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fn is_operator(character: &str) -> bool {
|
|
|
|
OPERATOR.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_block_start(character: &str) -> bool {
|
|
|
|
BLOCK_START.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_block_end(character: &str) -> bool {
|
|
|
|
BLOCK_END.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_paran_start(character: &str) -> bool {
|
|
|
|
PARAN_START.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_paran_end(character: &str) -> bool {
|
|
|
|
PARAN_END.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_array_start(character: &str) -> bool {
|
|
|
|
ARRAY_START.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_array_end(character: &str) -> bool {
|
|
|
|
ARRAY_END.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_comma(character: &str) -> bool {
|
|
|
|
COMMA.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_colon(character: &str) -> bool {
|
|
|
|
COLON.is_match(character)
|
|
|
|
}
|
2023-09-13 10:03:28 -07:00
|
|
|
fn is_double_period(character: &str) -> bool {
|
|
|
|
DOUBLE_PERIOD.is_match(character)
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
fn is_period(character: &str) -> bool {
|
|
|
|
PERIOD.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_line_comment(character: &str) -> bool {
|
|
|
|
LINECOMMENT.is_match(character)
|
|
|
|
}
|
|
|
|
fn is_block_comment(character: &str) -> bool {
|
|
|
|
BLOCKCOMMENT.is_match(character)
|
|
|
|
}
|
|
|
|
|
2023-09-06 19:34:47 -07:00
|
|
|
fn match_first(s: &str, regex: &Regex) -> Option<String> {
|
|
|
|
regex.find(s).map(|the_match| the_match.as_str().to_string())
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
fn make_token(token_type: TokenType, value: &str, start: usize) -> Token {
|
|
|
|
Token {
|
|
|
|
token_type,
|
|
|
|
value: value.to_string(),
|
|
|
|
start,
|
|
|
|
end: start + value.len(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-06 19:34:47 -07:00
|
|
|
fn return_token_at_index(s: &str, start_index: usize) -> Option<Token> {
|
2023-09-06 21:56:10 -07:00
|
|
|
let str_from_index = &s.chars().skip(start_index).collect::<String>();
|
2023-02-21 09:42:41 +11:00
|
|
|
if is_string(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::String,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &STRING)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
let is_line_comment_bool = is_line_comment(str_from_index);
|
|
|
|
if is_line_comment_bool || is_block_comment(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
if is_line_comment_bool {
|
|
|
|
TokenType::LineComment
|
|
|
|
} else {
|
|
|
|
TokenType::BlockComment
|
|
|
|
},
|
|
|
|
&match_first(
|
|
|
|
str_from_index,
|
|
|
|
if is_line_comment_bool {
|
|
|
|
&LINECOMMENT
|
|
|
|
} else {
|
|
|
|
&BLOCKCOMMENT
|
|
|
|
},
|
2023-08-07 23:04:28 -05:00
|
|
|
)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_paran_end(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &PARAN_END)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_paran_start(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &PARAN_START)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_block_start(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &BLOCK_START)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_block_end(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &BLOCK_END)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_array_start(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &ARRAY_START)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_array_end(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Brace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &ARRAY_END)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_comma(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Comma,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &COMMA)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
2023-09-13 07:23:14 -07:00
|
|
|
if is_operator(str_from_index) {
|
2023-02-21 09:42:41 +11:00
|
|
|
return Some(make_token(
|
2023-09-13 07:23:14 -07:00
|
|
|
TokenType::Operator,
|
|
|
|
&match_first(str_from_index, &OPERATOR)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
2023-09-13 07:23:14 -07:00
|
|
|
if is_number(str_from_index) {
|
2023-02-21 09:42:41 +11:00
|
|
|
return Some(make_token(
|
2023-09-13 07:23:14 -07:00
|
|
|
TokenType::Number,
|
|
|
|
&match_first(str_from_index, &NUMBER)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
2023-09-05 16:02:27 -07:00
|
|
|
if is_keyword(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Keyword,
|
|
|
|
&match_first(str_from_index, &KEYWORD)?,
|
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
if is_word(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Word,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &WORD)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_colon(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Colon,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &COLON)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
2023-09-13 10:03:28 -07:00
|
|
|
if is_double_period(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::DoublePeriod,
|
|
|
|
&match_first(str_from_index, &DOUBLE_PERIOD)?,
|
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
if is_period(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Period,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &PERIOD)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
if is_whitespace(str_from_index) {
|
|
|
|
return Some(make_token(
|
|
|
|
TokenType::Whitespace,
|
2023-08-07 23:04:28 -05:00
|
|
|
&match_first(str_from_index, &WHITESPACE)?,
|
2023-02-21 09:42:41 +11:00
|
|
|
start_index,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
2023-09-06 19:34:47 -07:00
|
|
|
fn recursively_tokenise(s: &str, current_index: usize, previous_tokens: Vec<Token>) -> Vec<Token> {
|
|
|
|
if current_index >= s.len() {
|
|
|
|
return previous_tokens;
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
2023-09-06 19:34:47 -07:00
|
|
|
let token = return_token_at_index(s, current_index);
|
|
|
|
let Some(token) = token else {
|
|
|
|
return recursively_tokenise(s, current_index + 1, previous_tokens);
|
|
|
|
};
|
|
|
|
let mut new_tokens = previous_tokens;
|
|
|
|
let token_length = token.value.len();
|
|
|
|
new_tokens.push(token);
|
|
|
|
recursively_tokenise(s, current_index + token_length, new_tokens)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn lexer(s: &str) -> Vec<Token> {
|
|
|
|
recursively_tokenise(s, 0, Vec::new())
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2023-08-18 19:37:52 +10:00
|
|
|
use pretty_assertions::assert_eq;
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-08-29 14:12:48 -07:00
|
|
|
use super::*;
|
|
|
|
|
2023-02-21 09:42:41 +11:00
|
|
|
#[test]
|
|
|
|
fn is_number_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_number("1"));
|
|
|
|
assert!(is_number("1 abc"));
|
|
|
|
assert!(is_number("1.1"));
|
|
|
|
assert!(is_number("1.1 abc"));
|
|
|
|
assert!(!is_number("a"));
|
|
|
|
|
|
|
|
assert!(is_number("1"));
|
2023-09-13 13:10:55 -07:00
|
|
|
assert!(is_number(".1"));
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_number("5?"));
|
|
|
|
assert!(is_number("5 + 6"));
|
|
|
|
assert!(is_number("5 + a"));
|
|
|
|
assert!(is_number("5.5"));
|
|
|
|
|
2023-09-13 13:10:55 -07:00
|
|
|
assert!(!is_number("1abc"));
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_number("a"));
|
|
|
|
assert!(!is_number("?"));
|
|
|
|
assert!(!is_number("?5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_whitespace_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_whitespace(" "));
|
|
|
|
assert!(is_whitespace(" "));
|
|
|
|
assert!(is_whitespace(" a"));
|
|
|
|
assert!(is_whitespace("a "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_whitespace("a"));
|
|
|
|
assert!(!is_whitespace("?"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_word_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_word("a"));
|
|
|
|
assert!(is_word("a "));
|
|
|
|
assert!(is_word("a5"));
|
|
|
|
assert!(is_word("a5a"));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_word("5"));
|
|
|
|
assert!(!is_word("5a"));
|
|
|
|
assert!(!is_word("5a5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_string_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_string("\"\""));
|
|
|
|
assert!(is_string("\"a\""));
|
|
|
|
assert!(is_string("\"a\" "));
|
|
|
|
assert!(is_string("\"a\"5"));
|
|
|
|
assert!(is_string("'a'5"));
|
|
|
|
assert!(is_string("\"with escaped \\\" backslash\""));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_string("\""));
|
|
|
|
assert!(!is_string("\"a"));
|
|
|
|
assert!(!is_string("a\""));
|
|
|
|
assert!(!is_string(" \"a\""));
|
|
|
|
assert!(!is_string("5\"a\""));
|
|
|
|
assert!(!is_string("a + 'str'"));
|
2023-09-05 16:02:27 -07:00
|
|
|
assert!(is_string("'c'"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_operator_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_operator("+"));
|
|
|
|
assert!(is_operator("+ "));
|
|
|
|
assert!(is_operator("-"));
|
|
|
|
assert!(is_operator("<="));
|
|
|
|
assert!(is_operator("<= "));
|
|
|
|
assert!(is_operator(">="));
|
|
|
|
assert!(is_operator(">= "));
|
|
|
|
assert!(is_operator("> "));
|
|
|
|
assert!(is_operator("< "));
|
|
|
|
assert!(is_operator("| "));
|
|
|
|
assert!(is_operator("|> "));
|
|
|
|
assert!(is_operator("^ "));
|
|
|
|
assert!(is_operator("% "));
|
|
|
|
assert!(is_operator("+* "));
|
|
|
|
|
|
|
|
assert!(!is_operator("5 + 5"));
|
|
|
|
assert!(!is_operator("a"));
|
|
|
|
assert!(!is_operator("a+"));
|
|
|
|
assert!(!is_operator("a+5"));
|
|
|
|
assert!(!is_operator("5a+5"));
|
|
|
|
assert!(!is_operator(", newVar"));
|
|
|
|
assert!(!is_operator(","));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_block_start_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_block_start("{"));
|
|
|
|
assert!(is_block_start("{ "));
|
|
|
|
assert!(is_block_start("{5"));
|
|
|
|
assert!(is_block_start("{a"));
|
|
|
|
assert!(is_block_start("{5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_block_start("5"));
|
|
|
|
assert!(!is_block_start("5 + 5"));
|
|
|
|
assert!(!is_block_start("5{ + 5"));
|
|
|
|
assert!(!is_block_start("a{ + 5"));
|
|
|
|
assert!(!is_block_start(" { + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_block_end_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_block_end("}"));
|
|
|
|
assert!(is_block_end("} "));
|
|
|
|
assert!(is_block_end("}5"));
|
|
|
|
assert!(is_block_end("}5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_block_end("5"));
|
|
|
|
assert!(!is_block_end("5 + 5"));
|
|
|
|
assert!(!is_block_end("5} + 5"));
|
|
|
|
assert!(!is_block_end(" } + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_paran_start_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_paran_start("("));
|
|
|
|
assert!(is_paran_start("( "));
|
|
|
|
assert!(is_paran_start("(5"));
|
|
|
|
assert!(is_paran_start("(5 "));
|
|
|
|
assert!(is_paran_start("(5 + 5"));
|
|
|
|
assert!(is_paran_start("(5 + 5)"));
|
|
|
|
assert!(is_paran_start("(5 + 5) "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_paran_start("5"));
|
|
|
|
assert!(!is_paran_start("5 + 5"));
|
|
|
|
assert!(!is_paran_start("5( + 5)"));
|
|
|
|
assert!(!is_paran_start(" ( + 5)"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_paran_end_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_paran_end(")"));
|
|
|
|
assert!(is_paran_end(") "));
|
|
|
|
assert!(is_paran_end(")5"));
|
|
|
|
assert!(is_paran_end(")5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_paran_end("5"));
|
|
|
|
assert!(!is_paran_end("5 + 5"));
|
|
|
|
assert!(!is_paran_end("5) + 5"));
|
|
|
|
assert!(!is_paran_end(" ) + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_comma_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_comma(","));
|
|
|
|
assert!(is_comma(", "));
|
|
|
|
assert!(is_comma(",5"));
|
|
|
|
assert!(is_comma(",5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_comma("5"));
|
|
|
|
assert!(!is_comma("5 + 5"));
|
|
|
|
assert!(!is_comma("5, + 5"));
|
|
|
|
assert!(!is_comma(" , + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_line_comment_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_line_comment("//"));
|
|
|
|
assert!(is_line_comment("// "));
|
|
|
|
assert!(is_line_comment("//5"));
|
|
|
|
assert!(is_line_comment("//5 "));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_line_comment("5"));
|
|
|
|
assert!(!is_line_comment("5 + 5"));
|
|
|
|
assert!(!is_line_comment("5// + 5"));
|
|
|
|
assert!(!is_line_comment(" // + 5"));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn is_block_comment_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(is_block_comment("/* */"));
|
|
|
|
assert!(is_block_comment("/***/"));
|
|
|
|
assert!(is_block_comment("/*5*/"));
|
|
|
|
assert!(is_block_comment("/*5 */"));
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2023-07-20 12:38:05 +10:00
|
|
|
assert!(!is_block_comment("/*"));
|
|
|
|
assert!(!is_block_comment("5"));
|
|
|
|
assert!(!is_block_comment("5 + 5"));
|
|
|
|
assert!(!is_block_comment("5/* + 5"));
|
|
|
|
assert!(!is_block_comment(" /* + 5"));
|
2023-09-05 16:02:27 -07:00
|
|
|
assert!(!is_block_comment(
|
|
|
|
r#" /* and
|
|
|
|
here
|
|
|
|
*/
|
|
|
|
"#
|
|
|
|
));
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn make_token_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert_eq!(
|
2023-09-05 16:02:27 -07:00
|
|
|
make_token(TokenType::Keyword, "const", 56),
|
2023-07-20 12:38:05 +10:00
|
|
|
Token {
|
2023-09-05 16:02:27 -07:00
|
|
|
token_type: TokenType::Keyword,
|
2023-07-20 12:38:05 +10:00
|
|
|
value: "const".to_string(),
|
|
|
|
start: 56,
|
|
|
|
end: 61,
|
|
|
|
}
|
|
|
|
);
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn return_token_at_index_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert_eq!(
|
|
|
|
return_token_at_index("const", 0),
|
|
|
|
Some(Token {
|
2023-09-05 16:02:27 -07:00
|
|
|
token_type: TokenType::Keyword,
|
2023-07-20 12:38:05 +10:00
|
|
|
value: "const".to_string(),
|
|
|
|
start: 0,
|
|
|
|
end: 5,
|
|
|
|
})
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
return_token_at_index(" 4554", 2),
|
2023-02-21 09:42:41 +11:00
|
|
|
Some(Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "4554".to_string(),
|
|
|
|
start: 2,
|
|
|
|
end: 6,
|
|
|
|
})
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn lexer_test() {
|
2023-07-20 12:38:05 +10:00
|
|
|
assert_eq!(
|
|
|
|
lexer("const a=5"),
|
|
|
|
vec![
|
|
|
|
Token {
|
2023-09-05 16:02:27 -07:00
|
|
|
token_type: TokenType::Keyword,
|
2023-07-20 12:38:05 +10:00
|
|
|
value: "const".to_string(),
|
|
|
|
start: 0,
|
|
|
|
end: 5,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 5,
|
|
|
|
end: 6,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Word,
|
|
|
|
value: "a".to_string(),
|
|
|
|
start: 6,
|
|
|
|
end: 7,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Operator,
|
|
|
|
value: "=".to_string(),
|
|
|
|
start: 7,
|
|
|
|
end: 8,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "5".to_string(),
|
|
|
|
start: 8,
|
|
|
|
end: 9,
|
|
|
|
},
|
|
|
|
]
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
lexer("54 + 22500 + 6"),
|
|
|
|
vec![
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "54".to_string(),
|
|
|
|
start: 0,
|
|
|
|
end: 2,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 2,
|
|
|
|
end: 3,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Operator,
|
|
|
|
value: "+".to_string(),
|
|
|
|
start: 3,
|
|
|
|
end: 4,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 4,
|
|
|
|
end: 5,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "22500".to_string(),
|
|
|
|
start: 5,
|
|
|
|
end: 10,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 10,
|
|
|
|
end: 11,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Operator,
|
|
|
|
value: "+".to_string(),
|
|
|
|
start: 11,
|
|
|
|
end: 12,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Whitespace,
|
|
|
|
value: " ".to_string(),
|
|
|
|
start: 12,
|
|
|
|
end: 13,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Number,
|
|
|
|
value: "6".to_string(),
|
|
|
|
start: 13,
|
|
|
|
end: 14,
|
|
|
|
},
|
|
|
|
]
|
|
|
|
);
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
2023-09-05 16:02:27 -07:00
|
|
|
|
|
|
|
// We have this as a test so we can ensure it never panics with an unwrap in the server.
|
|
|
|
#[test]
|
|
|
|
fn test_token_type_to_semantic_token_type() {
|
|
|
|
let semantic_types = TokenType::to_semantic_token_types().unwrap();
|
|
|
|
assert!(!semantic_types.is_empty());
|
|
|
|
}
|
2023-09-15 13:19:53 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_lexer_negative_word() {
|
|
|
|
assert_eq!(
|
|
|
|
lexer("-legX"),
|
|
|
|
vec![
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Operator,
|
|
|
|
value: "-".to_string(),
|
|
|
|
start: 0,
|
|
|
|
end: 1,
|
|
|
|
},
|
|
|
|
Token {
|
|
|
|
token_type: TokenType::Word,
|
|
|
|
value: "legX".to_string(),
|
|
|
|
start: 1,
|
|
|
|
end: 5,
|
|
|
|
},
|
|
|
|
]
|
|
|
|
);
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|