Restructure tokenizer module (#700)

* Remove duplicated tests

These tests already were copied to tokeniser2.rs, so removing them doesn't affect code coverage.

* Move tokeniser to its own module

Now there's a module for tokens, and the tokenizer/lexer implementation is private within the token module.
This commit is contained in:
Adam Chalmers
2023-09-24 20:01:17 -05:00
committed by GitHub
parent 2971b7752b
commit a03d09b41d
13 changed files with 145 additions and 248 deletions

View File

@ -13,11 +13,11 @@ pub fn bench_lex_parse(c: &mut Criterion) {
}
fn lex(program: &str) {
black_box(kcl_lib::tokeniser::lexer(program));
black_box(kcl_lib::token::lexer(program));
}
fn lex_and_parse(program: &str) {
let tokens = kcl_lib::tokeniser::lexer(program);
let tokens = kcl_lib::token::lexer(program);
let parser = kcl_lib::parser::Parser::new(tokens);
black_box(parser.ast().unwrap());
}

View File

@ -166,7 +166,7 @@ pub async fn modify_ast_for_sketch(
let recasted = program.recast(&FormatOptions::default(), 0);
// Re-parse the ast so we get the correct source ranges.
let tokens = crate::tokeniser::lexer(&recasted);
let tokens = crate::token::lexer(&recasted);
let parser = crate::parser::Parser::new(tokens);
*program = parser.ast()?;

View File

@ -2691,7 +2691,7 @@ fn ghi = (x) => {
}
show(part001)"#;
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
let symbols = program.get_lsp_symbols(code);
@ -2719,7 +2719,7 @@ show(part001)
let some_program_string = r#"const part001 = startSketchAt([0.0, 5.0])
|> line([0.4900857016, -0.0240763666], %)
|> line([0.6804562304, 0.9087880491], %)"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2738,7 +2738,7 @@ show(part001)
let some_program_string = r#"const part001 = startSketchAt([0.0, 5.0])
|> line([0.4900857016, -0.0240763666], %) // hello world
|> line([0.6804562304, 0.9087880491], %)"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2757,7 +2757,7 @@ show(part001)
|> line([0.4900857016, -0.0240763666], %)
// hello world
|> line([0.6804562304, 0.9087880491], %)"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2783,7 +2783,7 @@ show(part001)
// this is also a comment
return things
}"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2820,7 +2820,7 @@ const mySk1 = startSketchAt([0, 0])
|> ry(45, %)
|> rx(45, %)
// one more for good measure"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2859,7 +2859,7 @@ a comment between pipe expression statements */
|> line([-0.42, -1.72], %)
show(part001)"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2885,7 +2885,7 @@ const yo = [
" hey oooooo really long long long"
]
"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2903,7 +2903,7 @@ const key = 'c'
const things = "things"
// this is also a comment"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2921,7 +2921,7 @@ const things = "things"
// a comment
"
}"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2946,7 +2946,7 @@ const part001 = startSketchAt([0, 0])
-angleToMatchLengthY('seg01', myVar, %),
myVar
], %) // ln-lineTo-yAbsolute should use angleToMatchLengthY helper"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -2972,7 +2972,7 @@ const part001 = startSketchAt([0, 0])
myVar
], %) // ln-lineTo-yAbsolute should use angleToMatchLengthY helper
"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -3003,7 +3003,7 @@ fn ghi = (part001) => {
}
show(part001)"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let mut program = parser.ast().unwrap();
program.rename_symbol("mySuperCoolPart", 6);
@ -3034,7 +3034,7 @@ show(mySuperCoolPart)
let some_program_string = r#"fn ghi = (x, y, z) => {
return x
}"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let mut program = parser.ast().unwrap();
program.rename_symbol("newName", 10);
@ -3063,7 +3063,7 @@ const firstExtrude = startSketchAt([0,0])
|> extrude(h, %)
show(firstExtrude)"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -3089,7 +3089,7 @@ show(firstExtrude)
#[tokio::test(flavor = "multi_thread")]
async fn test_recast_math_start_negative() {
let some_program_string = r#"const myVar = -5 + 6"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();
@ -3105,7 +3105,7 @@ const FOS = 2
const sigmaAllow = 8
const width = 20
const thickness = sqrt(distance * p * FOS * 6 / (sigmaAllow * width))"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast().unwrap();

View File

@ -804,7 +804,7 @@ mod tests {
use super::*;
pub async fn parse_execute(code: &str) -> Result<ProgramMemory> {
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = crate::parser::Parser::new(tokens);
let program = parser.ast()?;
let mut mem: ProgramMemory = Default::default();

View File

@ -9,5 +9,4 @@ pub mod math_parser;
pub mod parser;
pub mod server;
pub mod std;
pub mod tokeniser;
pub mod tokeniser2;
pub mod token;

View File

@ -11,7 +11,7 @@ use crate::{
errors::{KclError, KclErrorDetails},
executor::SourceRange,
parser::Parser,
tokeniser::{Token, TokenType},
token::{Token, TokenType},
};
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize, Clone, ts_rs::TS)]
@ -704,7 +704,7 @@ mod test {
#[test]
fn test_parse_expression() {
let tokens = crate::tokeniser::lexer("1 + 2");
let tokens = crate::token::lexer("1 + 2");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -731,7 +731,7 @@ mod test {
#[test]
fn test_parse_expression_add_no_spaces() {
let tokens = crate::tokeniser::lexer("1+2");
let tokens = crate::token::lexer("1+2");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -758,7 +758,7 @@ mod test {
#[test]
fn test_parse_expression_sub_no_spaces() {
let tokens = crate::tokeniser::lexer("1 -2");
let tokens = crate::token::lexer("1 -2");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -785,7 +785,7 @@ mod test {
#[test]
fn test_parse_expression_plus_followed_by_star() {
let tokens = crate::tokeniser::lexer("1 + 2 * 3");
let tokens = crate::token::lexer("1 + 2 * 3");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -823,7 +823,7 @@ mod test {
#[test]
fn test_parse_expression_with_parentheses() {
let tokens = crate::tokeniser::lexer("1 * ( 2 + 3 )");
let tokens = crate::token::lexer("1 * ( 2 + 3 )");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -861,7 +861,7 @@ mod test {
#[test]
fn test_parse_expression_parens_in_middle() {
let tokens = crate::tokeniser::lexer("1 * ( 2 + 3 ) / 4");
let tokens = crate::token::lexer("1 * ( 2 + 3 ) / 4");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -910,7 +910,7 @@ mod test {
#[test]
fn test_parse_expression_parans_and_predence() {
let tokens = crate::tokeniser::lexer("1 + ( 2 + 3 ) / 4");
let tokens = crate::token::lexer("1 + ( 2 + 3 ) / 4");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -958,7 +958,7 @@ mod test {
}
#[test]
fn test_parse_expression_nested() {
let tokens = crate::tokeniser::lexer("1 * (( 2 + 3 ) / 4 + 5 )");
let tokens = crate::token::lexer("1 * (( 2 + 3 ) / 4 + 5 )");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -1017,7 +1017,7 @@ mod test {
}
#[test]
fn test_parse_expression_redundant_braces() {
let tokens = crate::tokeniser::lexer("1 * ((( 2 + 3 )))");
let tokens = crate::token::lexer("1 * ((( 2 + 3 )))");
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -1055,7 +1055,7 @@ mod test {
#[test]
fn test_reverse_polish_notation_simple() {
let parser = ReversePolishNotation::new(&crate::tokeniser::lexer("1 + 2"), &[], &[]);
let parser = ReversePolishNotation::new(&crate::token::lexer("1 + 2"), &[], &[]);
let result = parser.parse().unwrap();
assert_eq!(
result,
@ -1084,7 +1084,7 @@ mod test {
#[test]
fn test_reverse_polish_notation_complex() {
let parser = ReversePolishNotation::new(&crate::tokeniser::lexer("1 + 2 * 3"), &[], &[]);
let parser = ReversePolishNotation::new(&crate::token::lexer("1 + 2 * 3"), &[], &[]);
let result = parser.parse().unwrap();
assert_eq!(
result,
@ -1125,7 +1125,7 @@ mod test {
#[test]
fn test_reverse_polish_notation_complex_with_parentheses() {
let parser = ReversePolishNotation::new(&crate::tokeniser::lexer("1 * ( 2 + 3 )"), &[], &[]);
let parser = ReversePolishNotation::new(&crate::token::lexer("1 * ( 2 + 3 )"), &[], &[]);
let result = parser.parse().unwrap();
assert_eq!(
result,
@ -1179,7 +1179,7 @@ mod test {
#[test]
fn test_parse_expression_redundant_braces_around_literal() {
let code = "2 + (((3)))";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let mut parser = MathParser::new(&tokens);
let result = parser.parse().unwrap();
assert_eq!(
@ -1274,7 +1274,7 @@ mod test {
#[test]
fn test_parse_expression_braces_around_lots_of_math() {
let code = "(distance * p * FOS * 6 / (sigmaAllow * width))";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let mut parser = MathParser::new(&tokens);
let result = parser.parse();
assert!(result.is_ok());
@ -1283,7 +1283,7 @@ mod test {
#[test]
fn test_parse_expression_braces_around_internals_lots_of_math() {
let code = "distance * p * FOS * 6 / (sigmaAllow * width)";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let mut parser = MathParser::new(&tokens);
let result = parser.parse();
assert!(result.is_ok());

View File

@ -10,7 +10,7 @@ use crate::{
},
errors::{KclError, KclErrorDetails},
math_parser::MathParser,
tokeniser::{Token, TokenType},
token::{Token, TokenType},
};
pub const PIPE_SUBSTITUTION_OPERATOR: &str = "%";
@ -1797,7 +1797,7 @@ mod tests {
#[test]
fn test_make_identifier() {
let tokens = crate::tokeniser::lexer("a");
let tokens = crate::token::lexer("a");
let parser = Parser::new(tokens);
let identifier = parser.make_identifier(0).unwrap();
assert_eq!(
@ -1812,7 +1812,7 @@ mod tests {
#[test]
fn test_make_identifier_with_const_myvar_equals_5_and_index_2() {
let tokens = crate::tokeniser::lexer("const myVar = 5");
let tokens = crate::token::lexer("const myVar = 5");
let parser = Parser::new(tokens);
let identifier = parser.make_identifier(2).unwrap();
assert_eq!(
@ -1827,7 +1827,7 @@ mod tests {
#[test]
fn test_make_identifier_multiline() {
let tokens = crate::tokeniser::lexer("const myVar = 5\nconst newVar = myVar + 1");
let tokens = crate::token::lexer("const myVar = 5\nconst newVar = myVar + 1");
let parser = Parser::new(tokens);
let identifier = parser.make_identifier(2).unwrap();
assert_eq!(
@ -1851,7 +1851,7 @@ mod tests {
#[test]
fn test_make_identifier_call_expression() {
let tokens = crate::tokeniser::lexer("log(5, \"hello\", aIdentifier)");
let tokens = crate::token::lexer("log(5, \"hello\", aIdentifier)");
let parser = Parser::new(tokens);
let identifier = parser.make_identifier(0).unwrap();
assert_eq!(
@ -1874,7 +1874,7 @@ mod tests {
}
#[test]
fn test_make_non_code_node() {
let tokens = crate::tokeniser::lexer("log(5, \"hello\", aIdentifier)");
let tokens = crate::token::lexer("log(5, \"hello\", aIdentifier)");
let parser = Parser::new(tokens);
let index = 4;
let expected_output = (None, 4);
@ -1883,7 +1883,7 @@ mod tests {
let index = 7;
let expected_output = (None, 7);
assert_eq!(parser.make_non_code_node(index).unwrap(), expected_output);
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"
const yo = { a: { b: { c: '123' } } }
// this is a comment
@ -1914,7 +1914,7 @@ const key = 'c'"#,
31,
);
assert_eq!(parser.make_non_code_node(index).unwrap(), expected_output);
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const mySketch = startSketchAt([0,0])
|> lineTo({ to: [0, 1], tag: 'myPath' }, %)
|> lineTo([1, 1], %) /* this is
@ -1940,7 +1940,7 @@ const key = 'c'"#,
#[test]
fn test_collect_object_keys() {
let tokens = crate::tokeniser::lexer("const prop = yo.one[\"two\"]");
let tokens = crate::token::lexer("const prop = yo.one[\"two\"]");
let parser = Parser::new(tokens);
let keys_info = parser.collect_object_keys(6, None, false).unwrap();
assert_eq!(keys_info.len(), 2);
@ -1960,7 +1960,7 @@ const key = 'c'"#,
#[test]
fn test_make_literal_call_expression() {
let tokens = crate::tokeniser::lexer("log(5, \"hello\", aIdentifier)");
let tokens = crate::token::lexer("log(5, \"hello\", aIdentifier)");
let parser = Parser::new(tokens);
let literal = parser.make_literal(2).unwrap();
assert_eq!(
@ -2065,7 +2065,7 @@ const key = 'c'"#,
#[test]
fn test_next_meaningful_token() {
let _offset = 1;
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const mySketch = startSketchAt([0,0])
|> lineTo({ to: [0, 1], tag: 'myPath' }, %)
|> lineTo([1, 1], %) /* this is
@ -2451,7 +2451,7 @@ const key = 'c'"#,
#[test]
fn test_find_closing_brace() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const mySketch = startSketchAt([0,0])
|> lineTo({ to: [0, 1], tag: 'myPath' }, %)
|> lineTo([1, 1], %) /* this is
@ -2468,16 +2468,16 @@ const key = 'c'"#,
assert_eq!(parser.find_closing_brace(90, 0, "").unwrap(), 92);
let basic = "( hey )";
let parser = Parser::new(crate::tokeniser::lexer(basic));
let parser = Parser::new(crate::token::lexer(basic));
assert_eq!(parser.find_closing_brace(0, 0, "").unwrap(), 4);
let handles_non_zero_index = "(indexForBracketToRightOfThisIsTwo(shouldBeFour)AndNotThisSix)";
let parser = Parser::new(crate::tokeniser::lexer(handles_non_zero_index));
let parser = Parser::new(crate::token::lexer(handles_non_zero_index));
assert_eq!(parser.find_closing_brace(2, 0, "").unwrap(), 4);
assert_eq!(parser.find_closing_brace(0, 0, "").unwrap(), 6);
let handles_nested = "{a{b{c(}d]}eathou athoeu tah u} thatOneToTheLeftIsLast }";
let parser = Parser::new(crate::tokeniser::lexer(handles_nested));
let parser = Parser::new(crate::token::lexer(handles_nested));
assert_eq!(parser.find_closing_brace(0, 0, "").unwrap(), 18);
// TODO expect error when not started on a brace
@ -2485,7 +2485,7 @@ const key = 'c'"#,
#[test]
fn test_is_call_expression() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const mySketch = startSketchAt([0,0])
|> lineTo({ to: [0, 1], tag: 'myPath' }, %)
|> lineTo([1, 1], %) /* this is
@ -2506,7 +2506,7 @@ const key = 'c'"#,
#[test]
fn test_find_next_declaration_keyword() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const mySketch = startSketchAt([0,0])
|> lineTo({ to: [0, 1], tag: 'myPath' }, %)
|> lineTo([1, 1], %) /* this is
@ -2521,7 +2521,7 @@ const key = 'c'"#,
TokenReturn { token: None, index: 92 }
);
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const myVar = 5
const newVar = myVar + 1
"#,
@ -2551,7 +2551,7 @@ const newVar = myVar + 1
lineTo(2, 3)
} |> rx(45, %)
"#;
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens);
assert_eq!(
parser.has_pipe_operator(0, None).unwrap(),
@ -2570,7 +2570,7 @@ const newVar = myVar + 1
lineTo(2, 3)
} |> rx(45, %) |> rx(45, %)
"#;
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens);
assert_eq!(
parser.has_pipe_operator(0, None).unwrap(),
@ -2592,7 +2592,7 @@ const newVar = myVar + 1
const yo = myFunc(9()
|> rx(45, %)
"#;
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens);
assert_eq!(
parser.has_pipe_operator(0, None).unwrap(),
@ -2604,7 +2604,7 @@ const yo = myFunc(9()
);
let code = "const myVar2 = 5 + 1 |> myFn(%)";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens);
assert_eq!(
parser.has_pipe_operator(1, None).unwrap(),
@ -2626,7 +2626,7 @@ const yo = myFunc(9()
lineTo(1,1)
} |> rx(90, %)
show(mySk1)"#;
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let token_with_my_path_index = tokens.iter().position(|token| token.value == "myPath").unwrap();
// loop through getting the token and it's index
@ -2666,7 +2666,7 @@ show(mySk1)"#;
#[test]
fn test_make_member_expression() {
let tokens = crate::tokeniser::lexer("const prop = yo.one[\"two\"]");
let tokens = crate::token::lexer("const prop = yo.one[\"two\"]");
let parser = Parser::new(tokens);
let member_expression_return = parser.make_member_expression(6).unwrap();
let member_expression = member_expression_return.expression;
@ -2708,13 +2708,13 @@ show(mySk1)"#;
#[test]
fn test_find_end_of_binary_expression() {
let code = "1 + 2 * 3\nconst yo = 5";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(0).unwrap();
assert_eq!(tokens[end].value, "3");
let code = "(1 + 25) / 5 - 3\nconst yo = 5";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(0).unwrap();
assert_eq!(tokens[end].value, "3");
@ -2723,48 +2723,48 @@ show(mySk1)"#;
assert_eq!(end_starting_at_the_5, end);
// whole thing wrapped
let code = "((1 + 2) / 5 - 3)\nconst yo = 5";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(0).unwrap();
assert_eq!(tokens[end].end, code.find("3)").unwrap() + 2);
// whole thing wrapped but given index after the first brace
let code = "((1 + 2) / 5 - 3)\nconst yo = 5";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(1).unwrap();
assert_eq!(tokens[end].value, "3");
// given the index of a small wrapped section i.e. `1 + 2` in ((1 + 2) / 5 - 3)'
let code = "((1 + 2) / 5 - 3)\nconst yo = 5";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(2).unwrap();
assert_eq!(tokens[end].value, "2");
// lots of silly nesting
let code = "(1 + 2) / (5 - (3))\nconst yo = 5";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(0).unwrap();
assert_eq!(tokens[end].end, code.find("))").unwrap() + 2);
// with pipe operator at the end
let code = "(1 + 2) / (5 - (3))\n |> fn(%)";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(0).unwrap();
assert_eq!(tokens[end].end, code.find("))").unwrap() + 2);
// with call expression at the start of binary expression
let code = "yo(2) + 3\n |> fn(%)";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(0).unwrap();
assert_eq!(tokens[end].value, "3");
// with call expression at the end of binary expression
let code = "3 + yo(2)\n |> fn(%)";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens);
let _end = parser.find_end_of_binary_expression(0).unwrap();
// with call expression at the end of binary expression
let code = "-legX + 2, ";
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens.clone());
let end = parser.find_end_of_binary_expression(0).unwrap();
assert_eq!(tokens[end].value, "2");
@ -2773,7 +2773,7 @@ show(mySk1)"#;
#[test]
fn test_make_array_expression() {
// input_index: 6, output_index: 14, output: {"type":"ArrayExpression","start":11,"end":26,"elements":[{"type":"Literal","start":12,"end":15,"value":"1","raw":"\"1\""},{"type":"Literal","start":17,"end":18,"value":2,"raw":"2"},{"type":"Identifier","start":20,"end":25,"name":"three"}]}
let tokens = crate::tokeniser::lexer("const yo = [\"1\", 2, three]");
let tokens = crate::token::lexer("const yo = [\"1\", 2, three]");
let parser = Parser::new(tokens);
let array_expression = parser.make_array_expression(6).unwrap();
let expression = array_expression.expression;
@ -2812,7 +2812,7 @@ show(mySk1)"#;
#[test]
fn test_make_call_expression() {
let tokens = crate::tokeniser::lexer("foo(\"a\", a, 3)");
let tokens = crate::token::lexer("foo(\"a\", a, 3)");
let parser = Parser::new(tokens);
let result = parser.make_call_expression(0).unwrap();
assert_eq!(result.last_index, 9);
@ -2846,7 +2846,7 @@ show(mySk1)"#;
#[test]
fn test_make_variable_declaration() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const yo = startSketch([0, 0])
|> lineTo([1, myVar], %)
|> foo(myVar2, %)
@ -2916,7 +2916,7 @@ show(mySk1)"#;
#[test]
fn test_make_body() {
let tokens = crate::tokeniser::lexer("const myVar = 5");
let tokens = crate::token::lexer("const myVar = 5");
let parser = Parser::new(tokens);
let body = parser
.make_body(
@ -2934,7 +2934,7 @@ show(mySk1)"#;
#[test]
fn test_abstract_syntax_tree() {
let code = "5 +6";
let parser = Parser::new(crate::tokeniser::lexer(code));
let parser = Parser::new(crate::token::lexer(code));
let result = parser.ast().unwrap();
let expected_result = Program {
start: 0,
@ -2972,7 +2972,7 @@ show(mySk1)"#;
#[test]
fn test_empty_file() {
let some_program_string = r#""#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -2981,7 +2981,7 @@ show(mySk1)"#;
#[test]
fn test_parse_half_pipe_small() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
"const secondExtrude = startSketchAt([0,0])
|",
);
@ -2993,14 +2993,14 @@ show(mySk1)"#;
#[test]
fn test_parse_member_expression_double_nested_braces() {
let tokens = crate::tokeniser::lexer(r#"const prop = yo["one"][two]"#);
let tokens = crate::token::lexer(r#"const prop = yo["one"][two]"#);
let parser = Parser::new(tokens);
parser.ast().unwrap();
}
#[test]
fn test_parse_member_expression_binary_expression_period_number_first() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const obj = { a: 1, b: 2 }
const height = 1 - obj.a"#,
);
@ -3010,7 +3010,7 @@ const height = 1 - obj.a"#,
#[test]
fn test_parse_member_expression_binary_expression_brace_number_first() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const obj = { a: 1, b: 2 }
const height = 1 - obj["a"]"#,
);
@ -3020,7 +3020,7 @@ const height = 1 - obj["a"]"#,
#[test]
fn test_parse_member_expression_binary_expression_brace_number_second() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const obj = { a: 1, b: 2 }
const height = obj["a"] - 1"#,
);
@ -3030,7 +3030,7 @@ const height = obj["a"] - 1"#,
#[test]
fn test_parse_member_expression_binary_expression_in_array_number_first() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const obj = { a: 1, b: 2 }
const height = [1 - obj["a"], 0]"#,
);
@ -3040,7 +3040,7 @@ const height = [1 - obj["a"], 0]"#,
#[test]
fn test_parse_member_expression_binary_expression_in_array_number_second() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const obj = { a: 1, b: 2 }
const height = [obj["a"] - 1, 0]"#,
);
@ -3050,7 +3050,7 @@ const height = [obj["a"] - 1, 0]"#,
#[test]
fn test_parse_member_expression_binary_expression_in_array_number_second_missing_space() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const obj = { a: 1, b: 2 }
const height = [obj["a"] -1, 0]"#,
);
@ -3060,7 +3060,7 @@ const height = [obj["a"] -1, 0]"#,
#[test]
fn test_parse_half_pipe() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
"const height = 10
const firstExtrude = startSketchAt([0,0])
@ -3083,7 +3083,7 @@ const secondExtrude = startSketchAt([0,0])
#[test]
fn test_parse_greater_bang() {
let tokens = crate::tokeniser::lexer(">!");
let tokens = crate::token::lexer(">!");
let parser = Parser::new(tokens);
let err = parser.ast().unwrap_err();
// TODO: Better errors when program cannot tokenize.
@ -3093,7 +3093,7 @@ const secondExtrude = startSketchAt([0,0])
#[test]
fn test_parse_z_percent_parens() {
let tokens = crate::tokeniser::lexer("z%)");
let tokens = crate::token::lexer("z%)");
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3102,7 +3102,7 @@ const secondExtrude = startSketchAt([0,0])
#[test]
fn test_parse_parens_unicode() {
let tokens = crate::tokeniser::lexer("");
let tokens = crate::token::lexer("");
let parser = Parser::new(tokens);
let result = parser.ast();
// TODO: Better errors when program cannot tokenize.
@ -3112,7 +3112,7 @@ const secondExtrude = startSketchAt([0,0])
#[test]
fn test_parse_negative_in_array_binary_expression() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"const leg1 = 5
const thickness = 0.56
@ -3126,7 +3126,7 @@ const bracket = [-leg2 + thickness, 0]
#[test]
fn test_parse_nested_open_brackets() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"
z(-[["#,
);
@ -3141,7 +3141,7 @@ z(-[["#,
#[test]
fn test_parse_weird_new_line_function() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"z
(--#"#,
);
@ -3158,7 +3158,7 @@ z(-[["#,
#[test]
fn test_parse_weird_lots_of_fancy_brackets() {
let tokens = crate::tokeniser::lexer(r#"zz({{{{{{{{)iegAng{{{{{{{##"#);
let tokens = crate::token::lexer(r#"zz({{{{{{{{)iegAng{{{{{{{##"#);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3172,7 +3172,7 @@ z(-[["#,
#[test]
fn test_parse_weird_close_before_open() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"fn)n
e
["#,
@ -3189,7 +3189,7 @@ e
#[test]
fn test_parse_weird_close_before_nada() {
let tokens = crate::tokeniser::lexer(r#"fn)n-"#);
let tokens = crate::token::lexer(r#"fn)n-"#);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3198,7 +3198,7 @@ e
#[test]
fn test_parse_weird_lots_of_slashes() {
let tokens = crate::tokeniser::lexer(
let tokens = crate::token::lexer(
r#"J///////////o//+///////////P++++*++++++P///////˟
++4"#,
);
@ -3215,7 +3215,7 @@ e
#[test]
fn test_parse_expand_array() {
let code = "const myArray = [0..10]";
let parser = Parser::new(crate::tokeniser::lexer(code));
let parser = Parser::new(crate::token::lexer(code));
let result = parser.ast().unwrap();
let expected_result = Program {
start: 0,
@ -3318,7 +3318,7 @@ e
#[test]
fn test_error_keyword_in_variable() {
let some_program_string = r#"const let = "thing""#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3331,7 +3331,7 @@ e
#[test]
fn test_error_keyword_in_fn_name() {
let some_program_string = r#"fn let = () {}"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3344,7 +3344,7 @@ e
#[test]
fn test_error_stdlib_in_fn_name() {
let some_program_string = r#"fn cos = () {}"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3359,7 +3359,7 @@ e
let some_program_string = r#"fn thing = (let) => {
return 1
}"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3374,7 +3374,7 @@ e
let some_program_string = r#"fn thing = (cos) => {
return 1
}"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3392,7 +3392,7 @@ e
}
firstPrimeNumber()
"#;
let tokens = crate::tokeniser::lexer(program);
let tokens = crate::token::lexer(program);
let parser = Parser::new(tokens);
let _ast = parser.ast().unwrap();
}
@ -3405,7 +3405,7 @@ e
thing(false)
"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
parser.ast().unwrap();
}
@ -3422,7 +3422,7 @@ thing(false)
"#,
name
);
let tokens = crate::tokeniser::lexer(&some_program_string);
let tokens = crate::token::lexer(&some_program_string);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3440,7 +3440,7 @@ thing(false)
#[test]
fn test_error_define_var_as_function() {
let some_program_string = r#"fn thing = "thing""#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
@ -3469,7 +3469,7 @@ const pt2 = b2[0]
show(b1)
show(b2)"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
parser.ast().unwrap();
}
@ -3478,7 +3478,7 @@ show(b2)"#;
fn test_math_with_stdlib() {
let some_program_string = r#"const d2r = pi() / 2
let other_thing = 2 * cos(3)"#;
let tokens = crate::tokeniser::lexer(some_program_string);
let tokens = crate::token::lexer(some_program_string);
let parser = Parser::new(tokens);
parser.ast().unwrap();
}
@ -3488,7 +3488,7 @@ let other_thing = 2 * cos(3)"#;
fn test_parse_pipes_on_pipes() {
let code = include_str!("../../tests/executor/inputs/pipes_on_pipes.kcl");
let tokens = crate::tokeniser::lexer(code);
let tokens = crate::token::lexer(code);
let parser = Parser::new(tokens);
parser.ast().unwrap();
}

View File

@ -34,7 +34,7 @@ pub struct Backend {
/// The types of tokens the server supports.
pub token_types: Vec<SemanticTokenType>,
/// Token maps.
pub token_map: DashMap<String, Vec<crate::tokeniser::Token>>,
pub token_map: DashMap<String, Vec<crate::token::Token>>,
/// AST maps.
pub ast_map: DashMap<String, crate::ast::types::Program>,
/// Current code.
@ -56,7 +56,7 @@ impl Backend {
// Lets update the tokens.
self.current_code_map
.insert(params.uri.to_string(), params.text.clone());
let tokens = crate::tokeniser::lexer(&params.text);
let tokens = crate::token::lexer(&params.text);
self.token_map.insert(params.uri.to_string(), tokens.clone());
// Update the semantic tokens map.
@ -69,9 +69,7 @@ impl Backend {
continue;
};
if token.token_type == crate::tokeniser::TokenType::Word
&& self.stdlib_completions.contains_key(&token.value)
{
if token.token_type == crate::token::TokenType::Word && self.stdlib_completions.contains_key(&token.value) {
// This is a stdlib function.
token_type = SemanticTokenType::FUNCTION;
}
@ -549,7 +547,7 @@ impl LanguageServer for Backend {
// Parse the ast.
// I don't know if we need to do this again since it should be updated in the context.
// But I figure better safe than sorry since this will write back out to the file.
let tokens = crate::tokeniser::lexer(&current_code);
let tokens = crate::token::lexer(&current_code);
let parser = crate::parser::Parser::new(tokens);
let Ok(ast) = parser.ast() else {
return Ok(None);
@ -581,7 +579,7 @@ impl LanguageServer for Backend {
// Parse the ast.
// I don't know if we need to do this again since it should be updated in the context.
// But I figure better safe than sorry since this will write back out to the file.
let tokens = crate::tokeniser::lexer(&current_code);
let tokens = crate::token::lexer(&current_code);
let parser = crate::parser::Parser::new(tokens);
let Ok(mut ast) = parser.ast() else {
return Ok(None);

View File

@ -6,6 +6,8 @@ use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use tower_lsp::lsp_types::SemanticTokenType;
mod tokeniser;
/// The types of tokens.
#[derive(Debug, PartialEq, Eq, Copy, Clone, Deserialize, Serialize, ts_rs::TS, JsonSchema, FromStr, Display)]
#[ts(export)]
@ -155,138 +157,17 @@ impl From<&Token> for crate::executor::SourceRange {
}
pub fn lexer(s: &str) -> Vec<Token> {
super::tokeniser2::lexer(s).unwrap_or_default()
tokeniser::lexer(s).unwrap_or_default()
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn lexer_test() {
assert_eq!(
lexer("const a=5"),
vec![
Token {
token_type: TokenType::Keyword,
value: "const".to_string(),
start: 0,
end: 5,
},
Token {
token_type: TokenType::Whitespace,
value: " ".to_string(),
start: 5,
end: 6,
},
Token {
token_type: TokenType::Word,
value: "a".to_string(),
start: 6,
end: 7,
},
Token {
token_type: TokenType::Operator,
value: "=".to_string(),
start: 7,
end: 8,
},
Token {
token_type: TokenType::Number,
value: "5".to_string(),
start: 8,
end: 9,
},
]
);
assert_eq!(
lexer("54 + 22500 + 6"),
vec![
Token {
token_type: TokenType::Number,
value: "54".to_string(),
start: 0,
end: 2,
},
Token {
token_type: TokenType::Whitespace,
value: " ".to_string(),
start: 2,
end: 3,
},
Token {
token_type: TokenType::Operator,
value: "+".to_string(),
start: 3,
end: 4,
},
Token {
token_type: TokenType::Whitespace,
value: " ".to_string(),
start: 4,
end: 5,
},
Token {
token_type: TokenType::Number,
value: "22500".to_string(),
start: 5,
end: 10,
},
Token {
token_type: TokenType::Whitespace,
value: " ".to_string(),
start: 10,
end: 11,
},
Token {
token_type: TokenType::Operator,
value: "+".to_string(),
start: 11,
end: 12,
},
Token {
token_type: TokenType::Whitespace,
value: " ".to_string(),
start: 12,
end: 13,
},
Token {
token_type: TokenType::Number,
value: "6".to_string(),
start: 13,
end: 14,
},
]
);
}
// We have this as a test so we can ensure it never panics with an unwrap in the server.
#[test]
fn test_token_type_to_semantic_token_type() {
let semantic_types = TokenType::all_semantic_token_types().unwrap();
assert!(!semantic_types.is_empty());
}
#[test]
fn test_lexer_negative_word() {
assert_eq!(
lexer("-legX"),
vec![
Token {
token_type: TokenType::Operator,
value: "-".to_string(),
start: 0,
end: 1,
},
Token {
token_type: TokenType::Word,
value: "legX".to_string(),
start: 1,
end: 5,
},
]
);
}
}

View File

@ -7,7 +7,7 @@ use winnow::{
Located,
};
use crate::tokeniser::{Token, TokenType};
use crate::token::{Token, TokenType};
pub fn lexer(i: &str) -> Result<Vec<Token>, ParseError<Located<&str>, ContextError>> {
repeat(0.., token).parse(Located::new(i))
@ -1432,14 +1432,33 @@ const things = "things"
#[test]
fn test_kitt() {
let program = include_str!("../../tests/executor/inputs/kittycad_svg.kcl");
let program = include_str!("../../../tests/executor/inputs/kittycad_svg.kcl");
let actual = lexer(program).unwrap();
assert_eq!(actual.len(), 5088);
}
#[test]
fn test_pipes_on_pipes() {
let program = include_str!("../../tests/executor/inputs/pipes_on_pipes.kcl");
let program = include_str!("../../../tests/executor/inputs/pipes_on_pipes.kcl");
let actual = lexer(program).unwrap();
assert_eq!(actual.len(), 17836);
}
#[test]
fn test_lexer_negative_word() {
let actual = lexer("-legX").unwrap();
let expected = vec![
Token {
token_type: TokenType::Operator,
value: "-".to_string(),
start: 0,
end: 1,
},
Token {
token_type: TokenType::Word,
value: "legX".to_string(),
start: 1,
end: 5,
},
];
assert_tokens(expected, actual);
}
}

View File

@ -84,13 +84,13 @@ pub fn deserialize_files(data: &[u8]) -> Result<JsValue, JsError> {
// test for this function and by extension lexer are done in javascript land src/lang/tokeniser.test.ts
#[wasm_bindgen]
pub fn lexer_js(js: &str) -> Result<JsValue, JsError> {
let tokens = kcl_lib::tokeniser::lexer(js);
let tokens = kcl_lib::token::lexer(js);
Ok(JsValue::from_serde(&tokens)?)
}
#[wasm_bindgen]
pub fn parse_js(js: &str) -> Result<JsValue, String> {
let tokens = kcl_lib::tokeniser::lexer(js);
let tokens = kcl_lib::token::lexer(js);
let parser = kcl_lib::parser::Parser::new(tokens);
let program = parser.ast().map_err(String::from)?;
// The serde-wasm-bindgen does not work here because of weird HashMap issues so we use the
@ -149,7 +149,7 @@ pub async fn lsp_run(config: ServerConfig) -> Result<(), JsValue> {
let stdlib_signatures = get_signatures_from_stdlib(&stdlib).map_err(|e| e.to_string())?;
// We can unwrap here because we know the tokeniser is valid, since
// we have a test for it.
let token_types = kcl_lib::tokeniser::TokenType::all_semantic_token_types().unwrap();
let token_types = kcl_lib::token::TokenType::all_semantic_token_types().unwrap();
let (service, socket) = LspService::new(|client| Backend {
client,

View File

@ -32,7 +32,7 @@ async fn execute_and_snapshot(code: &str) -> Result<image::DynamicImage> {
// Create a temporary file to write the output to.
let output_file = std::env::temp_dir().join(format!("kcl_output_{}.png", uuid::Uuid::new_v4()));
let tokens = kcl_lib::tokeniser::lexer(code);
let tokens = kcl_lib::token::lexer(code);
let parser = kcl_lib::parser::Parser::new(tokens);
let program = parser.ast()?;
let mut mem: kcl_lib::executor::ProgramMemory = Default::default();

View File

@ -33,7 +33,7 @@ async fn setup(code: &str, name: &str) -> Result<(EngineConnection, Program, uui
.commands_ws(None, None, None, None, Some(false))
.await?;
let tokens = kcl_lib::tokeniser::lexer(code);
let tokens = kcl_lib::token::lexer(code);
let parser = kcl_lib::parser::Parser::new(tokens);
let program = parser.ast()?;
let mut mem: kcl_lib::executor::ProgramMemory = Default::default();