From c943a3f1923dafe3f89377d27bda73f6e69809bc Mon Sep 17 00:00:00 2001 From: Nick Cameron Date: Tue, 10 Dec 2024 14:26:53 +1300 Subject: [PATCH] Refactor TokenStream (and some minor changes to Token) (#4695) * Refactor TokenStream (and some minor changes to Token) Signed-off-by: Nick Cameron * Tidy up lexer tests Signed-off-by: Nick Cameron --------- Signed-off-by: Nick Cameron --- src/wasm-lib/kcl/src/lib.rs | 4 +- src/wasm-lib/kcl/src/lsp/kcl/mod.rs | 59 +- src/wasm-lib/kcl/src/lsp/tests.rs | 22 +- src/wasm-lib/kcl/src/parsing/mod.rs | 12 +- src/wasm-lib/kcl/src/parsing/parser.rs | 486 ++--- src/wasm-lib/kcl/src/parsing/token/mod.rs | 281 ++- ...ng__token__tokeniser__tests__program2.snap | 1014 +++++++++++ .../kcl/src/parsing/token/tokeniser.rs | 1566 ++--------------- src/wasm-lib/kcl/src/simulation_tests.rs | 2 +- src/wasm-lib/kcl/src/unparser.rs | 18 +- 10 files changed, 1625 insertions(+), 1839 deletions(-) create mode 100644 src/wasm-lib/kcl/src/parsing/token/snapshots/kcl_lib__parsing__token__tokeniser__tests__program2.snap diff --git a/src/wasm-lib/kcl/src/lib.rs b/src/wasm-lib/kcl/src/lib.rs index 35f190b54..c6d8dbb5d 100644 --- a/src/wasm-lib/kcl/src/lib.rs +++ b/src/wasm-lib/kcl/src/lib.rs @@ -137,7 +137,7 @@ pub use lsp::test_util::kcl_lsp_server; impl Program { pub fn parse(input: &str) -> Result<(Option, Vec), KclError> { let module_id = ModuleId::default(); - let tokens = parsing::token::lexer(input, module_id)?; + let tokens = parsing::token::lex(input, module_id)?; let (ast, errs) = parsing::parse_tokens(tokens).0?; Ok((ast.map(|ast| Program { ast }), errs)) @@ -145,7 +145,7 @@ impl Program { pub fn parse_no_errs(input: &str) -> Result { let module_id = ModuleId::default(); - let tokens = parsing::token::lexer(input, module_id)?; + let tokens = parsing::token::lex(input, module_id)?; let ast = parsing::parse_tokens(tokens).parse_errs_as_err()?; Ok(Program { ast }) diff --git a/src/wasm-lib/kcl/src/lsp/kcl/mod.rs b/src/wasm-lib/kcl/src/lsp/kcl/mod.rs index 53ce4baec..cbdd2f3fd 100644 --- a/src/wasm-lib/kcl/src/lsp/kcl/mod.rs +++ b/src/wasm-lib/kcl/src/lsp/kcl/mod.rs @@ -46,34 +46,31 @@ use crate::{ lsp::{backend::Backend as _, util::IntoDiagnostic}, parsing::{ ast::types::{Expr, Node, VariableKind}, - token::TokenType, + token::TokenStream, PIPE_OPERATOR, }, CacheInformation, ModuleId, OldAstState, Program, SourceRange, }; +const SEMANTIC_TOKEN_TYPES: [SemanticTokenType; 10] = [ + SemanticTokenType::NUMBER, + SemanticTokenType::VARIABLE, + SemanticTokenType::KEYWORD, + SemanticTokenType::TYPE, + SemanticTokenType::STRING, + SemanticTokenType::OPERATOR, + SemanticTokenType::COMMENT, + SemanticTokenType::FUNCTION, + SemanticTokenType::PARAMETER, + SemanticTokenType::PROPERTY, +]; -lazy_static::lazy_static! { - pub static ref SEMANTIC_TOKEN_TYPES: Vec = { - // This is safe to unwrap because we know all the token types are valid. - // And the test would fail if they were not. - let mut gen = TokenType::all_semantic_token_types().unwrap(); - gen.extend(vec![ - SemanticTokenType::PARAMETER, - SemanticTokenType::PROPERTY, - ]); - gen - }; - - pub static ref SEMANTIC_TOKEN_MODIFIERS: Vec = { - vec![ - SemanticTokenModifier::DECLARATION, - SemanticTokenModifier::DEFINITION, - SemanticTokenModifier::DEFAULT_LIBRARY, - SemanticTokenModifier::READONLY, - SemanticTokenModifier::STATIC, - ] - }; -} +const SEMANTIC_TOKEN_MODIFIERS: [SemanticTokenModifier; 5] = [ + SemanticTokenModifier::DECLARATION, + SemanticTokenModifier::DEFINITION, + SemanticTokenModifier::DEFAULT_LIBRARY, + SemanticTokenModifier::READONLY, + SemanticTokenModifier::STATIC, +]; /// A subcommand for running the server. #[derive(Clone, Debug)] @@ -102,7 +99,7 @@ pub struct Backend { /// The stdlib signatures for the language. pub stdlib_signatures: HashMap, /// Token maps. - pub token_map: DashMap>, + pub(super) token_map: DashMap, /// AST maps. pub ast_map: DashMap>, /// Last successful execution. @@ -281,7 +278,7 @@ impl crate::lsp::backend::Backend for Backend { // Lets update the tokens. let module_id = ModuleId::default(); - let tokens = match crate::parsing::token::lexer(¶ms.text, module_id) { + let tokens = match crate::parsing::token::lex(¶ms.text, module_id) { Ok(tokens) => tokens, Err(err) => { self.add_to_diagnostics(¶ms, &[err], true).await; @@ -407,11 +404,11 @@ impl Backend { self.executor_ctx.read().await } - async fn update_semantic_tokens(&self, tokens: &[crate::parsing::token::Token], params: &TextDocumentItem) { + async fn update_semantic_tokens(&self, tokens: &TokenStream, params: &TextDocumentItem) { // Update the semantic tokens map. let mut semantic_tokens = vec![]; let mut last_position = Position::new(0, 0); - for token in tokens { + for token in tokens.as_slice() { let Ok(token_type) = SemanticTokenType::try_from(token.token_type) else { // We continue here because not all tokens can be converted this way, we will get // the rest from the ast. @@ -563,7 +560,7 @@ impl Backend { let semantic_token = SemanticToken { delta_line: position.line - last_position.line + 1, delta_start: 0, - length: token.value.len() as u32, + length: (token.end - token.start) as u32, token_type: token_type_index, token_modifiers_bitset, }; @@ -582,7 +579,7 @@ impl Backend { } else { position.character - last_position.character }, - length: token.value.len() as u32, + length: (token.end - token.start) as u32, token_type: token_type_index, token_modifiers_bitset, }; @@ -963,8 +960,8 @@ impl LanguageServer for Backend { semantic_tokens_options: SemanticTokensOptions { work_done_progress_options: WorkDoneProgressOptions::default(), legend: SemanticTokensLegend { - token_types: SEMANTIC_TOKEN_TYPES.clone(), - token_modifiers: SEMANTIC_TOKEN_MODIFIERS.clone(), + token_types: SEMANTIC_TOKEN_TYPES.to_vec(), + token_modifiers: SEMANTIC_TOKEN_MODIFIERS.to_vec(), }, range: Some(false), full: Some(SemanticTokensFullOptions::Bool(true)), diff --git a/src/wasm-lib/kcl/src/lsp/tests.rs b/src/wasm-lib/kcl/src/lsp/tests.rs index 1be7aa17a..81dd275bf 100644 --- a/src/wasm-lib/kcl/src/lsp/tests.rs +++ b/src/wasm-lib/kcl/src/lsp/tests.rs @@ -1082,7 +1082,7 @@ fn myFn = (param1) => { // Get the token map. let token_map = server.token_map.get("file:///test.kcl").unwrap().clone(); - assert!(token_map != vec![]); + assert!(!token_map.is_empty()); // Get the ast. let ast = server.ast_map.get("file:///test.kcl").unwrap().clone(); @@ -2206,7 +2206,7 @@ part001 = cube([0,0], 20) // Get the tokens. let tokens = server.token_map.get("file:///test.kcl").unwrap().clone(); - assert_eq!(tokens.len(), 120); + assert_eq!(tokens.as_slice().len(), 120); // Get the ast. let ast = server.ast_map.get("file:///test.kcl").unwrap().clone(); @@ -3379,11 +3379,11 @@ part001 = startSketchOn('XY') // Get the symbols map. let symbols_map = server.symbols_map.get("file:///test.kcl").unwrap().clone(); - assert!(symbols_map != vec![]); + assert!(!symbols_map.is_empty()); // Get the semantic tokens map. let semantic_tokens_map = server.semantic_tokens_map.get("file:///test.kcl").unwrap().clone(); - assert!(semantic_tokens_map != vec![]); + assert!(!semantic_tokens_map.is_empty()); // Get the memory. let memory = server.memory_map.get("file:///test.kcl").unwrap().clone(); @@ -3422,7 +3422,7 @@ NEW_LINT = 1"# // Get the semantic tokens map. let semantic_tokens_map = server.semantic_tokens_map.get("file:///test.kcl").unwrap().clone(); - assert!(semantic_tokens_map != vec![]); + assert!(!semantic_tokens_map.is_empty()); // Get the memory. let memory = server.memory_map.get("file:///test.kcl"); @@ -3466,7 +3466,7 @@ part001 = startSketchOn('XY') // Get the token map. let token_map = server.token_map.get("file:///test.kcl").unwrap().clone(); - assert!(token_map != vec![]); + assert!(!token_map.is_empty()); // Get the ast. let ast = server.ast_map.get("file:///test.kcl").unwrap().clone(); @@ -3474,11 +3474,11 @@ part001 = startSketchOn('XY') // Get the symbols map. let symbols_map = server.symbols_map.get("file:///test.kcl").unwrap().clone(); - assert!(symbols_map != vec![]); + assert!(!symbols_map.is_empty()); // Get the semantic tokens map. let semantic_tokens_map = server.semantic_tokens_map.get("file:///test.kcl").unwrap().clone(); - assert!(semantic_tokens_map != vec![]); + assert!(!semantic_tokens_map.is_empty()); // Get the memory. let memory = server.memory_map.get("file:///test.kcl").unwrap().clone(); @@ -3509,7 +3509,7 @@ part001 = startSketchOn('XY') // Get the token map. let token_map = server.token_map.get("file:///test.kcl").unwrap().clone(); - assert!(token_map != vec![]); + assert!(!token_map.is_empty()); // Get the ast. let ast = server.ast_map.get("file:///test.kcl").unwrap().clone(); @@ -3517,11 +3517,11 @@ part001 = startSketchOn('XY') // Get the symbols map. let symbols_map = server.symbols_map.get("file:///test.kcl").unwrap().clone(); - assert!(symbols_map != vec![]); + assert!(!symbols_map.is_empty()); // Get the semantic tokens map. let semantic_tokens_map = server.semantic_tokens_map.get("file:///test.kcl").unwrap().clone(); - assert!(semantic_tokens_map != vec![]); + assert!(!semantic_tokens_map.is_empty()); // Get the memory. let memory = server.memory_map.get("file:///test.kcl"); diff --git a/src/wasm-lib/kcl/src/parsing/mod.rs b/src/wasm-lib/kcl/src/parsing/mod.rs index 3330fbe29..93ba33aac 100644 --- a/src/wasm-lib/kcl/src/parsing/mod.rs +++ b/src/wasm-lib/kcl/src/parsing/mod.rs @@ -2,7 +2,7 @@ use crate::{ errors::{CompilationError, KclError, KclErrorDetails}, parsing::{ ast::types::{Node, Program}, - token::{Token, TokenType}, + token::TokenStream, }, source_range::{ModuleId, SourceRange}, }; @@ -34,15 +34,13 @@ pub fn top_level_parse(code: &str) -> ParseResult { /// Parse the given KCL code into an AST. pub fn parse_str(code: &str, module_id: ModuleId) -> ParseResult { - let tokens = pr_try!(crate::parsing::token::lexer(code, module_id)); + let tokens = pr_try!(crate::parsing::token::lex(code, module_id)); parse_tokens(tokens) } /// Parse the supplied tokens into an AST. -pub fn parse_tokens(tokens: Vec) -> ParseResult { - let (tokens, unknown_tokens): (Vec, Vec) = tokens - .into_iter() - .partition(|token| token.token_type != TokenType::Unknown); +pub fn parse_tokens(mut tokens: TokenStream) -> ParseResult { + let unknown_tokens = tokens.remove_unknown(); if !unknown_tokens.is_empty() { let source_ranges = unknown_tokens.iter().map(SourceRange::from).collect(); @@ -69,7 +67,7 @@ pub fn parse_tokens(tokens: Vec) -> ParseResult { return Node::::default().into(); } - parser::run_parser(&mut tokens.as_slice()) + parser::run_parser(tokens.as_slice()) } /// Result of parsing. diff --git a/src/wasm-lib/kcl/src/parsing/parser.rs b/src/wasm-lib/kcl/src/parsing/parser.rs index e414cd270..9393ec2bc 100644 --- a/src/wasm-lib/kcl/src/parsing/parser.rs +++ b/src/wasm-lib/kcl/src/parsing/parser.rs @@ -26,7 +26,7 @@ use crate::{ UnaryExpression, UnaryOperator, VariableDeclaration, VariableDeclarator, VariableKind, }, math::BinaryExpressionToken, - token::{Token, TokenType}, + token::{Token, TokenSlice, TokenType}, PIPE_OPERATOR, PIPE_SUBSTITUTION_OPERATOR, }, unparser::ExprContext, @@ -38,8 +38,6 @@ thread_local! { static CTXT: RefCell> = const { RefCell::new(None) }; } -pub type TokenSlice<'slice, 'input> = &'slice mut &'input [Token]; - pub fn run_parser(i: TokenSlice) -> super::ParseResult { let _stats = crate::log::LogPerfStats::new("Parsing"); ParseContext::init(); @@ -122,13 +120,13 @@ pub(crate) struct ContextError { pub cause: Option, } -impl From> for CompilationError { - fn from(err: winnow::error::ParseError<&[Token], ContextError>) -> Self { +impl From, ContextError>> for CompilationError { + fn from(err: winnow::error::ParseError, ContextError>) -> Self { let Some(last_token) = err.input().last() else { return CompilationError::fatal(Default::default(), "file is empty"); }; - let (input, offset, err) = (err.input().to_vec(), err.offset(), err.into_inner()); + let (input, offset, err) = (err.input(), err.offset(), err.clone().into_inner()); if let Some(e) = err.cause { return e; @@ -146,7 +144,7 @@ impl From> for CompilationErro ); } - let bad_token = &input[offset]; + let bad_token = input.token(offset); // TODO: Add the Winnow parser context to the error. // See https://github.com/KittyCAD/modeling-app/issues/784 CompilationError::fatal( @@ -227,7 +225,7 @@ fn expected(what: &'static str) -> StrContext { StrContext::Expected(StrContextValue::Description(what)) } -fn program(i: TokenSlice) -> PResult> { +fn program(i: &mut TokenSlice) -> PResult> { let shebang = opt(shebang).parse_next(i)?; let mut out: Node = function_body.parse_next(i)?; out.shebang = shebang; @@ -239,7 +237,7 @@ fn program(i: TokenSlice) -> PResult> { Ok(out) } -fn pipe_surrounded_by_whitespace(i: TokenSlice) -> PResult<()> { +fn pipe_surrounded_by_whitespace(i: &mut TokenSlice) -> PResult<()> { ( repeat(0.., whitespace).map(|_: Vec<_>| ()), pipe_operator, @@ -255,10 +253,10 @@ fn count_in(target: char, s: &str) -> usize { } /// Matches all four cases of NonCodeValue -fn non_code_node(i: TokenSlice) -> PResult> { +fn non_code_node(i: &mut TokenSlice) -> PResult> { /// Matches one case of NonCodeValue /// See docstring on [NonCodeValue::NewLineBlockComment] for why that case is different to the others. - fn non_code_node_leading_whitespace(i: TokenSlice) -> PResult> { + fn non_code_node_leading_whitespace(i: &mut TokenSlice) -> PResult> { let leading_whitespace = one_of(TokenType::Whitespace) .context(expected("whitespace, with a newline")) .parse_next(i)?; @@ -288,7 +286,7 @@ fn non_code_node(i: TokenSlice) -> PResult> { } // Matches remaining three cases of NonCodeValue -fn non_code_node_no_leading_whitespace(i: TokenSlice) -> PResult> { +fn non_code_node_no_leading_whitespace(i: &mut TokenSlice) -> PResult> { any.verify_map(|token: Token| { if token.is_code_token() { None @@ -322,7 +320,7 @@ fn non_code_node_no_leading_whitespace(i: TokenSlice) -> PResult PResult> { +fn pipe_expression(i: &mut TokenSlice) -> PResult> { let mut non_code_meta = NonCodeMeta::default(); let (head, noncode): (_, Vec<_>) = terminated( ( @@ -394,7 +392,7 @@ fn pipe_expression(i: TokenSlice) -> PResult> { }) } -fn bool_value(i: TokenSlice) -> PResult> { +fn bool_value(i: &mut TokenSlice) -> PResult> { let (value, token) = any .try_map(|token: Token| match token.token_type { TokenType::Keyword if token.value == "true" => Ok((true, token)), @@ -418,7 +416,7 @@ fn bool_value(i: TokenSlice) -> PResult> { ))) } -fn literal(i: TokenSlice) -> PResult> { +fn literal(i: &mut TokenSlice) -> PResult> { alt((string_literal, unsigned_number_literal)) .map(Box::new) .context(expected("a KCL literal, like 'myPart' or 3")) @@ -426,7 +424,7 @@ fn literal(i: TokenSlice) -> PResult> { } /// Parse a KCL string literal -fn string_literal(i: TokenSlice) -> PResult> { +fn string_literal(i: &mut TokenSlice) -> PResult> { let (value, token) = any .try_map(|token: Token| match token.token_type { TokenType::String => { @@ -453,7 +451,7 @@ fn string_literal(i: TokenSlice) -> PResult> { } /// Parse a KCL literal number, with no - sign. -pub(crate) fn unsigned_number_literal(i: TokenSlice) -> PResult> { +pub(crate) fn unsigned_number_literal(i: &mut TokenSlice) -> PResult> { let (value, token) = any .try_map(|token: Token| match token.token_type { TokenType::Number => { @@ -480,7 +478,7 @@ pub(crate) fn unsigned_number_literal(i: TokenSlice) -> PResult> { } /// Parse a KCL operator that takes a left- and right-hand side argument. -fn binary_operator(i: TokenSlice) -> PResult { +fn binary_operator(i: &mut TokenSlice) -> PResult { any.try_map(|token: Token| { if !matches!(token.token_type, TokenType::Operator) { return Err(CompilationError::fatal( @@ -515,7 +513,7 @@ fn binary_operator(i: TokenSlice) -> PResult { } /// Parse a KCL operand that can be used with an operator. -fn operand(i: TokenSlice) -> PResult { +fn operand(i: &mut TokenSlice) -> PResult { const TODO_783: &str = "found a value, but this kind of value cannot be used as the operand to an operator yet (see https://github.com/KittyCAD/modeling-app/issues/783)"; let op = possible_operands .try_map(|part| { @@ -565,7 +563,7 @@ fn operand(i: TokenSlice) -> PResult { } impl TokenType { - fn parse_from(self, i: TokenSlice) -> PResult { + fn parse_from(self, i: &mut TokenSlice) -> PResult { any.try_map(|token: Token| { if token.token_type == self { Ok(token) @@ -585,7 +583,7 @@ impl TokenType { } /// Parse some whitespace (i.e. at least one whitespace token) -fn whitespace(i: TokenSlice) -> PResult> { +fn whitespace(i: &mut TokenSlice) -> PResult> { repeat( 1.., any.try_map(|token: Token| { @@ -609,7 +607,7 @@ fn whitespace(i: TokenSlice) -> PResult> { /// A shebang is a line at the start of a file that starts with `#!`. /// If the shebang is present it takes up the whole line. -fn shebang(i: TokenSlice) -> PResult> { +fn shebang(i: &mut TokenSlice) -> PResult> { // Parse the hash and the bang. hash.parse_next(i)?; let tok = bang.parse_next(i)?; @@ -642,7 +640,7 @@ pub enum NonCodeOr { } /// Parse a KCL array of elements. -fn array(i: TokenSlice) -> PResult { +fn array(i: &mut TokenSlice) -> PResult { alt(( array_empty.map(Box::new).map(Expr::ArrayExpression), array_elem_by_elem.map(Box::new).map(Expr::ArrayExpression), @@ -652,7 +650,7 @@ fn array(i: TokenSlice) -> PResult { } /// Match an empty array. -fn array_empty(i: TokenSlice) -> PResult> { +fn array_empty(i: &mut TokenSlice) -> PResult> { let open = open_bracket(i)?; let start = open.start; ignore_whitespace(i); @@ -670,7 +668,7 @@ fn array_empty(i: TokenSlice) -> PResult> { } /// Match something that separates elements of an array. -fn array_separator(i: TokenSlice) -> PResult<()> { +fn array_separator(i: &mut TokenSlice) -> PResult<()> { alt(( // Normally you need a comma. comma_sep, @@ -680,7 +678,7 @@ fn array_separator(i: TokenSlice) -> PResult<()> { .parse_next(i) } -pub(crate) fn array_elem_by_elem(i: TokenSlice) -> PResult> { +pub(crate) fn array_elem_by_elem(i: &mut TokenSlice) -> PResult> { let open = open_bracket(i)?; let start = open.start; ignore_whitespace(i); @@ -728,7 +726,7 @@ pub(crate) fn array_elem_by_elem(i: TokenSlice) -> PResult )) } -fn array_end_start(i: TokenSlice) -> PResult> { +fn array_end_start(i: &mut TokenSlice) -> PResult> { let open = open_bracket(i)?; let start = open.start; ignore_whitespace(i); @@ -752,7 +750,7 @@ fn array_end_start(i: TokenSlice) -> PResult> { )) } -fn object_property_same_key_and_val(i: TokenSlice) -> PResult> { +fn object_property_same_key_and_val(i: &mut TokenSlice) -> PResult> { let key = identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height: 4', 'height' is the property key")).parse_next(i)?; ignore_whitespace(i); Ok(Node { @@ -767,7 +765,7 @@ fn object_property_same_key_and_val(i: TokenSlice) -> PResult PResult> { +fn object_property(i: &mut TokenSlice) -> PResult> { let key = identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height = 4', 'height' is the property key")).parse_next(i)?; ignore_whitespace(i); // Temporarily accept both `:` and `=` for compatibility. @@ -808,7 +806,7 @@ fn object_property(i: TokenSlice) -> PResult> { } /// Match something that separates properties of an object. -fn property_separator(i: TokenSlice) -> PResult<()> { +fn property_separator(i: &mut TokenSlice) -> PResult<()> { alt(( // Normally you need a comma. comma_sep, @@ -819,7 +817,7 @@ fn property_separator(i: TokenSlice) -> PResult<()> { } /// Parse a KCL object value. -pub(crate) fn object(i: TokenSlice) -> PResult> { +pub(crate) fn object(i: &mut TokenSlice) -> PResult> { let open = open_brace(i)?; let start = open.start; ignore_whitespace(i); @@ -874,7 +872,7 @@ pub(crate) fn object(i: TokenSlice) -> PResult> { } /// Parse the % symbol, used to substitute a curried argument from a |> (pipe). -fn pipe_sub(i: TokenSlice) -> PResult> { +fn pipe_sub(i: &mut TokenSlice) -> PResult> { any.try_map(|token: Token| { if matches!(token.token_type, TokenType::Operator) && token.value == PIPE_SUBSTITUTION_OPERATOR { Ok(Node::new( @@ -897,7 +895,7 @@ fn pipe_sub(i: TokenSlice) -> PResult> { .parse_next(i) } -fn else_if(i: TokenSlice) -> PResult> { +fn else_if(i: &mut TokenSlice) -> PResult> { let else_ = any .try_map(|token: Token| { if matches!(token.token_type, TokenType::Keyword) && token.value == "else" { @@ -948,7 +946,7 @@ fn else_if(i: TokenSlice) -> PResult> { )) } -fn if_expr(i: TokenSlice) -> PResult> { +fn if_expr(i: &mut TokenSlice) -> PResult> { let if_ = any .try_map(|token: Token| { if matches!(token.token_type, TokenType::Keyword) && token.value == "if" { @@ -1016,7 +1014,7 @@ fn if_expr(i: TokenSlice) -> PResult> { )) } -fn function_expr(i: TokenSlice) -> PResult { +fn function_expr(i: &mut TokenSlice) -> PResult { let fn_tok = opt(fun).parse_next(i)?; ignore_whitespace(i); let (result, has_arrow) = function_decl.parse_next(i)?; @@ -1032,8 +1030,8 @@ fn function_expr(i: TokenSlice) -> PResult { // const x = arg0 + arg1; // return x // } -fn function_decl(i: TokenSlice) -> PResult<(Node, bool)> { - fn return_type(i: TokenSlice) -> PResult { +fn function_decl(i: &mut TokenSlice) -> PResult<(Node, bool)> { + fn return_type(i: &mut TokenSlice) -> PResult { colon(i)?; ignore_whitespace(i); argument_type(i) @@ -1081,7 +1079,7 @@ fn function_decl(i: TokenSlice) -> PResult<(Node, bool)> { } /// E.g. `person.name` -fn member_expression_dot(i: TokenSlice) -> PResult<(LiteralIdentifier, usize, bool)> { +fn member_expression_dot(i: &mut TokenSlice) -> PResult<(LiteralIdentifier, usize, bool)> { period.parse_next(i)?; let property = alt(( sketch_keyword.map(Box::new).map(LiteralIdentifier::Identifier), @@ -1093,7 +1091,7 @@ fn member_expression_dot(i: TokenSlice) -> PResult<(LiteralIdentifier, usize, bo } /// E.g. `people[0]` or `people[i]` or `people['adam']` -fn member_expression_subscript(i: TokenSlice) -> PResult<(LiteralIdentifier, usize, bool)> { +fn member_expression_subscript(i: &mut TokenSlice) -> PResult<(LiteralIdentifier, usize, bool)> { let _ = open_bracket.parse_next(i)?; let property = alt(( sketch_keyword.map(Box::new).map(LiteralIdentifier::Identifier), @@ -1109,7 +1107,7 @@ fn member_expression_subscript(i: TokenSlice) -> PResult<(LiteralIdentifier, usi /// Get a property of an object, or an index of an array, or a member of a collection. /// Can be arbitrarily nested, e.g. `people[i]['adam'].age`. -fn member_expression(i: TokenSlice) -> PResult> { +fn member_expression(i: &mut TokenSlice) -> PResult> { // This is an identifier, followed by a sequence of members (aka properties) // First, the identifier. let id = identifier.context(expected("the identifier of the object whose property you're trying to access, e.g. in 'shape.size.width', 'shape' is the identifier")).parse_next(i)?; @@ -1159,7 +1157,7 @@ fn member_expression(i: TokenSlice) -> PResult> { /// Find a noncode node which occurs just after a body item, /// such that if the noncode item is a comment, it might be an inline comment. -fn noncode_just_after_code(i: TokenSlice) -> PResult> { +fn noncode_just_after_code(i: &mut TokenSlice) -> PResult> { let ws = opt(whitespace).parse_next(i)?; // What is the preceding whitespace like? @@ -1233,7 +1231,7 @@ impl WithinFunction { } } -fn body_items_within_function(i: TokenSlice) -> PResult { +fn body_items_within_function(i: &mut TokenSlice) -> PResult { // Any of the body item variants, each of which can optionally be followed by a comment. // If there is a comment, it may be preceded by whitespace. let item = dispatch! {peek(any); @@ -1265,7 +1263,7 @@ fn body_items_within_function(i: TokenSlice) -> PResult { } /// Parse the body of a user-defined function. -fn function_body(i: TokenSlice) -> PResult> { +fn function_body(i: &mut TokenSlice) -> PResult> { let leading_whitespace_start = alt(( peek(non_code_node).map(|_| None), // Subtract 1 from `t.start` to match behaviour of the old parser. @@ -1407,19 +1405,19 @@ fn function_body(i: TokenSlice) -> PResult> { )) } -fn import_items(i: TokenSlice) -> PResult> { +fn import_items(i: &mut TokenSlice) -> PResult> { separated(1.., import_item, comma_sep) .parse_next(i) .map_err(|e| e.cut()) } -fn glob(i: TokenSlice) -> PResult { +fn glob(i: &mut TokenSlice) -> PResult { one_of((TokenType::Operator, "*")) .context(expected("the multiple import operator, *")) .parse_next(i) } -fn import_stmt(i: TokenSlice) -> PResult> { +fn import_stmt(i: &mut TokenSlice) -> PResult> { let (visibility, visibility_token) = opt(terminated(item_visibility, whitespace)) .parse_next(i)? .map_or((ItemVisibility::Default, None), |pair| (pair.0, Some(pair.1))); @@ -1551,7 +1549,7 @@ fn import_stmt(i: TokenSlice) -> PResult> { )) } -fn import_item(i: TokenSlice) -> PResult> { +fn import_item(i: &mut TokenSlice) -> PResult> { let name = identifier.context(expected("an identifier to import")).parse_next(i)?; let start = name.start; let module_id = name.module_id; @@ -1577,7 +1575,7 @@ fn import_item(i: TokenSlice) -> PResult> { )) } -fn import_as_keyword(i: TokenSlice) -> PResult { +fn import_as_keyword(i: &mut TokenSlice) -> PResult { any.try_map(|token: Token| { if matches!(token.token_type, TokenType::Keyword | TokenType::Word) && token.value == "as" { Ok(token) @@ -1593,7 +1591,7 @@ fn import_as_keyword(i: TokenSlice) -> PResult { } /// Parse a return statement of a user-defined function, e.g. `return x`. -fn return_stmt(i: TokenSlice) -> PResult> { +fn return_stmt(i: &mut TokenSlice) -> PResult> { let ret = any .try_map(|token: Token| { if matches!(token.token_type, TokenType::Keyword) && token.value == "return" { @@ -1620,7 +1618,7 @@ fn return_stmt(i: TokenSlice) -> PResult> { } /// Parse a KCL expression. -fn expression(i: TokenSlice) -> PResult { +fn expression(i: &mut TokenSlice) -> PResult { alt(( pipe_expression.map(Box::new).map(Expr::PipeExpression), expression_but_not_pipe, @@ -1629,7 +1627,7 @@ fn expression(i: TokenSlice) -> PResult { .parse_next(i) } -fn expression_but_not_pipe(i: TokenSlice) -> PResult { +fn expression_but_not_pipe(i: &mut TokenSlice) -> PResult { alt(( binary_expression.map(Box::new).map(Expr::BinaryExpression), unary_expression.map(Box::new).map(Expr::UnaryExpression), @@ -1639,7 +1637,7 @@ fn expression_but_not_pipe(i: TokenSlice) -> PResult { .parse_next(i) } -fn unnecessarily_bracketed(i: TokenSlice) -> PResult { +fn unnecessarily_bracketed(i: &mut TokenSlice) -> PResult { delimited( terminated(open_paren, opt(whitespace)), expression, @@ -1648,7 +1646,7 @@ fn unnecessarily_bracketed(i: TokenSlice) -> PResult { .parse_next(i) } -fn expr_allowed_in_pipe_expr(i: TokenSlice) -> PResult { +fn expr_allowed_in_pipe_expr(i: &mut TokenSlice) -> PResult { alt(( member_expression.map(Box::new).map(Expr::MemberExpression), bool_value.map(Expr::Literal), @@ -1668,7 +1666,7 @@ fn expr_allowed_in_pipe_expr(i: TokenSlice) -> PResult { .parse_next(i) } -fn possible_operands(i: TokenSlice) -> PResult { +fn possible_operands(i: &mut TokenSlice) -> PResult { alt(( unary_expression.map(Box::new).map(Expr::UnaryExpression), bool_value.map(Expr::Literal), @@ -1686,7 +1684,7 @@ fn possible_operands(i: TokenSlice) -> PResult { } /// Parse an item visibility specifier, e.g. export. -fn item_visibility(i: TokenSlice) -> PResult<(ItemVisibility, Token)> { +fn item_visibility(i: &mut TokenSlice) -> PResult<(ItemVisibility, Token)> { any.verify_map(|token: Token| { if token.token_type == TokenType::Keyword && token.value == "export" { Some((ItemVisibility::Export, token)) @@ -1698,7 +1696,7 @@ fn item_visibility(i: TokenSlice) -> PResult<(ItemVisibility, Token)> { .parse_next(i) } -fn declaration_keyword(i: TokenSlice) -> PResult<(VariableKind, Token)> { +fn declaration_keyword(i: &mut TokenSlice) -> PResult<(VariableKind, Token)> { let res = any .verify_map(|token: Token| token.declaration_keyword().map(|kw| (kw, token))) .parse_next(i)?; @@ -1706,7 +1704,7 @@ fn declaration_keyword(i: TokenSlice) -> PResult<(VariableKind, Token)> { } /// Parse a variable/constant declaration. -fn declaration(i: TokenSlice) -> PResult> { +fn declaration(i: &mut TokenSlice) -> PResult> { let (visibility, visibility_token) = opt(terminated(item_visibility, whitespace)) .parse_next(i)? .map_or((ItemVisibility::Default, None), |pair| (pair.0, Some(pair.1))); @@ -1845,13 +1843,13 @@ impl TryFrom for Node { } /// Parse a KCL identifier (name of a constant/variable/function) -fn identifier(i: TokenSlice) -> PResult> { +fn identifier(i: &mut TokenSlice) -> PResult> { any.try_map(Node::::try_from) .context(expected("an identifier, e.g. 'width' or 'myPart'")) .parse_next(i) } -fn sketch_keyword(i: TokenSlice) -> PResult> { +fn sketch_keyword(i: &mut TokenSlice) -> PResult> { any.try_map(|token: Token| { if token.token_type == TokenType::Type && token.value == "sketch" { Ok(Node::new( @@ -1949,7 +1947,7 @@ impl Node { } /// Parse a Kcl tag that starts with a `$`. -fn tag(i: TokenSlice) -> PResult> { +fn tag(i: &mut TokenSlice) -> PResult> { dollar.parse_next(i)?; let tag_declarator = any .try_map(Node::::try_from) @@ -1964,21 +1962,21 @@ fn tag(i: TokenSlice) -> PResult> { } /// Helper function. Matches any number of whitespace tokens and ignores them. -fn ignore_whitespace(i: TokenSlice) { +fn ignore_whitespace(i: &mut TokenSlice) { let _: PResult<()> = repeat(0.., whitespace).parse_next(i); } // A helper function to ignore a trailing comma. -fn ignore_trailing_comma(i: TokenSlice) { +fn ignore_trailing_comma(i: &mut TokenSlice) { let _ = opt(comma).parse_next(i); } /// Matches at least 1 whitespace. -fn require_whitespace(i: TokenSlice) -> PResult<()> { +fn require_whitespace(i: &mut TokenSlice) -> PResult<()> { repeat(1.., whitespace).parse_next(i) } -fn unary_expression(i: TokenSlice) -> PResult> { +fn unary_expression(i: &mut TokenSlice) -> PResult> { const EXPECTED: &str = "expected a unary operator (like '-', the negative-numeric operator),"; let (operator, op_token) = any .try_map(|token: Token| match token.token_type { @@ -2008,7 +2006,7 @@ fn unary_expression(i: TokenSlice) -> PResult> { /// Consume tokens that make up a binary expression, but don't actually return them. /// Why not? /// Because this is designed to be used with .take() within the `binary_expression` parser. -fn binary_expression_tokens(i: TokenSlice) -> PResult> { +fn binary_expression_tokens(i: &mut TokenSlice) -> PResult> { let first = operand.parse_next(i).map(BinaryExpressionToken::from)?; let remaining: Vec<_> = repeat( 1.., @@ -2028,7 +2026,7 @@ fn binary_expression_tokens(i: TokenSlice) -> PResult } /// Parse an infix binary expression. -fn binary_expression(i: TokenSlice) -> PResult> { +fn binary_expression(i: &mut TokenSlice) -> PResult> { // Find the slice of tokens which makes up the binary expression let tokens = binary_expression_tokens.parse_next(i)?; @@ -2038,10 +2036,9 @@ fn binary_expression(i: TokenSlice) -> PResult> { Ok(expr) } -fn binary_expr_in_parens(i: TokenSlice) -> PResult> { +fn binary_expr_in_parens(i: &mut TokenSlice) -> PResult> { let span_with_brackets = bracketed_section.take().parse_next(i)?; - let n = span_with_brackets.len(); - let mut span_no_brackets = &span_with_brackets[1..n - 1]; + let mut span_no_brackets = span_with_brackets.without_ends(); let expr = binary_expression.parse_next(&mut span_no_brackets)?; Ok(expr) } @@ -2049,7 +2046,7 @@ fn binary_expr_in_parens(i: TokenSlice) -> PResult> { /// Match a starting bracket, then match to the corresponding end bracket. /// Return the count of how many tokens are in that span /// (not including the bracket tokens). -fn bracketed_section(i: TokenSlice) -> PResult { +fn bracketed_section(i: &mut TokenSlice) -> PResult { // Find the start of this bracketed expression. let _ = open_paren.parse_next(i)?; let mut opened_braces = 1usize; @@ -2069,7 +2066,7 @@ fn bracketed_section(i: TokenSlice) -> PResult { } /// Parse a KCL expression statement. -fn expression_stmt(i: TokenSlice) -> PResult> { +fn expression_stmt(i: &mut TokenSlice) -> PResult> { let val = expression .context(expected( "an expression (i.e. a value, or an algorithm for calculating one), e.g. 'x + y' or '3' or 'width * 2'", @@ -2087,20 +2084,20 @@ fn expression_stmt(i: TokenSlice) -> PResult> { } /// Parse the given brace symbol. -fn some_brace(symbol: &'static str, i: TokenSlice) -> PResult { +fn some_brace(symbol: &'static str, i: &mut TokenSlice) -> PResult { one_of((TokenType::Brace, symbol)) .context(expected(symbol)) .parse_next(i) } /// Parse a => operator. -fn big_arrow(i: TokenSlice) -> PResult { +fn big_arrow(i: &mut TokenSlice) -> PResult { one_of((TokenType::Operator, "=>")) .context(expected("the => symbol, used for declaring functions")) .parse_next(i) } /// Parse a |> operator. -fn pipe_operator(i: TokenSlice) -> PResult { +fn pipe_operator(i: &mut TokenSlice) -> PResult { one_of((TokenType::Operator, PIPE_OPERATOR)) .context(expected( "the |> operator, used for 'piping' one function's output into another function's input", @@ -2108,7 +2105,7 @@ fn pipe_operator(i: TokenSlice) -> PResult { .parse_next(i) } -fn ws_with_newline(i: TokenSlice) -> PResult { +fn ws_with_newline(i: &mut TokenSlice) -> PResult { one_of(TokenType::Whitespace) .verify(|token: &Token| token.value.contains('\n')) .context(expected("a newline, possibly with whitespace")) @@ -2116,60 +2113,60 @@ fn ws_with_newline(i: TokenSlice) -> PResult { } /// ( -fn open_paren(i: TokenSlice) -> PResult { +fn open_paren(i: &mut TokenSlice) -> PResult { some_brace("(", i) } /// ) -fn close_paren(i: TokenSlice) -> PResult { +fn close_paren(i: &mut TokenSlice) -> PResult { some_brace(")", i) } /// [ -fn open_bracket(i: TokenSlice) -> PResult { +fn open_bracket(i: &mut TokenSlice) -> PResult { some_brace("[", i) } /// ] -fn close_bracket(i: TokenSlice) -> PResult { +fn close_bracket(i: &mut TokenSlice) -> PResult { some_brace("]", i) } /// { -fn open_brace(i: TokenSlice) -> PResult { +fn open_brace(i: &mut TokenSlice) -> PResult { some_brace("{", i) } /// } -fn close_brace(i: TokenSlice) -> PResult { +fn close_brace(i: &mut TokenSlice) -> PResult { some_brace("}", i) } -fn comma(i: TokenSlice) -> PResult<()> { +fn comma(i: &mut TokenSlice) -> PResult<()> { TokenType::Comma.parse_from(i)?; Ok(()) } -fn hash(i: TokenSlice) -> PResult<()> { +fn hash(i: &mut TokenSlice) -> PResult<()> { TokenType::Hash.parse_from(i)?; Ok(()) } -fn bang(i: TokenSlice) -> PResult { +fn bang(i: &mut TokenSlice) -> PResult { TokenType::Bang.parse_from(i) } -fn dollar(i: TokenSlice) -> PResult<()> { +fn dollar(i: &mut TokenSlice) -> PResult<()> { TokenType::Dollar.parse_from(i)?; Ok(()) } -fn period(i: TokenSlice) -> PResult<()> { +fn period(i: &mut TokenSlice) -> PResult<()> { TokenType::Period.parse_from(i)?; Ok(()) } -fn double_period(i: TokenSlice) -> PResult { +fn double_period(i: &mut TokenSlice) -> PResult { any.try_map(|token: Token| { if matches!(token.token_type, TokenType::DoublePeriod) { Ok(token) @@ -2188,27 +2185,27 @@ fn double_period(i: TokenSlice) -> PResult { .parse_next(i) } -fn colon(i: TokenSlice) -> PResult { +fn colon(i: &mut TokenSlice) -> PResult { TokenType::Colon.parse_from(i) } -fn equals(i: TokenSlice) -> PResult { +fn equals(i: &mut TokenSlice) -> PResult { one_of((TokenType::Operator, "=")) .context(expected("the equals operator, =")) .parse_next(i) } -fn question_mark(i: TokenSlice) -> PResult<()> { +fn question_mark(i: &mut TokenSlice) -> PResult<()> { TokenType::QuestionMark.parse_from(i)?; Ok(()) } -fn at_sign(i: TokenSlice) -> PResult<()> { +fn at_sign(i: &mut TokenSlice) -> PResult<()> { TokenType::At.parse_from(i)?; Ok(()) } -fn fun(i: TokenSlice) -> PResult { +fn fun(i: &mut TokenSlice) -> PResult { any.try_map(|token: Token| match token.token_type { TokenType::Keyword if token.value == "fn" => Ok(token), _ => Err(CompilationError::fatal( @@ -2220,7 +2217,7 @@ fn fun(i: TokenSlice) -> PResult { } /// Parse a comma, optionally followed by some whitespace. -fn comma_sep(i: TokenSlice) -> PResult<()> { +fn comma_sep(i: &mut TokenSlice) -> PResult<()> { (opt(whitespace), comma, opt(whitespace)) .context(expected("a comma, optionally followed by whitespace")) .parse_next(i)?; @@ -2228,13 +2225,13 @@ fn comma_sep(i: TokenSlice) -> PResult<()> { } /// Arguments are passed into a function. -fn arguments(i: TokenSlice) -> PResult> { +fn arguments(i: &mut TokenSlice) -> PResult> { separated(0.., expression, comma_sep) .context(expected("function arguments")) .parse_next(i) } -fn labeled_argument(i: TokenSlice) -> PResult { +fn labeled_argument(i: &mut TokenSlice) -> PResult { separated_pair(identifier, (one_of(TokenType::Colon), opt(whitespace)), expression) .map(|(label, arg)| LabeledArg { label: label.inner, @@ -2245,7 +2242,7 @@ fn labeled_argument(i: TokenSlice) -> PResult { /// Arguments are passed into a function, /// preceded by the name of the parameter (the label). -fn labeled_arguments(i: TokenSlice) -> PResult> { +fn labeled_arguments(i: &mut TokenSlice) -> PResult> { separated(0.., labeled_argument, comma_sep) .context(expected("function arguments")) .parse_next(i) @@ -2256,7 +2253,7 @@ fn labeled_arguments(i: TokenSlice) -> PResult> { /// - a primitive type, e.g. 'number' or 'string' or 'bool' /// - an array type, e.g. 'number[]' or 'string[]' or 'bool[]' /// - an object type, e.g. '{x: number, y: number}' or '{name: string, age: number}' -fn argument_type(i: TokenSlice) -> PResult { +fn argument_type(i: &mut TokenSlice) -> PResult { let type_ = alt(( // Object types // TODO it is buggy to treat object fields like parameters since the parameters parser assumes a terminating `)`. @@ -2286,7 +2283,7 @@ struct ParamDescription { is_optional: bool, } -fn parameter(i: TokenSlice) -> PResult { +fn parameter(i: &mut TokenSlice) -> PResult { let (found_at_sign, arg_name, optional, _, type_) = ( opt(at_sign), any.verify(|token: &Token| !matches!(token.token_type, TokenType::Brace) || token.value != ")"), @@ -2304,7 +2301,7 @@ fn parameter(i: TokenSlice) -> PResult { } /// Parameters are declared in a function signature, and used within a function. -fn parameters(i: TokenSlice) -> PResult> { +fn parameters(i: &mut TokenSlice) -> PResult> { // Get all tokens until the next ), because that ends the parameter list. let candidates: Vec<_> = separated(0.., parameter, comma_sep) .context(expected("function parameters")) @@ -2386,7 +2383,7 @@ impl Node { } /// Introduce a new name, which binds some value. -fn binding_name(i: TokenSlice) -> PResult> { +fn binding_name(i: &mut TokenSlice) -> PResult> { identifier .context(expected("an identifier, which will be the name of some value")) .try_map(Node::::into_valid_binding_name) @@ -2446,7 +2443,7 @@ fn typecheck(spec_arg: &crate::docs::StdLibFnArg, arg: &&Expr) -> PResult<()> { Ok(()) } -fn fn_call(i: TokenSlice) -> PResult> { +fn fn_call(i: &mut TokenSlice) -> PResult> { let fn_name = identifier(i)?; opt(whitespace).parse_next(i)?; let _ = terminated(open_paren, opt(whitespace)).parse_next(i)?; @@ -2487,7 +2484,7 @@ fn fn_call(i: TokenSlice) -> PResult> { }) } -fn fn_call_kw(i: TokenSlice) -> PResult> { +fn fn_call_kw(i: &mut TokenSlice) -> PResult> { let fn_name = identifier(i)?; opt(whitespace).parse_next(i)?; let _ = open_paren.parse_next(i)?; @@ -2553,8 +2550,8 @@ mod tests { #[test] fn parse_args() { for (i, (test, expected_len)) in [("someVar", 1), ("5, 3", 2), (r#""a""#, 1)].into_iter().enumerate() { - let tokens = crate::parsing::token::lexer(test, ModuleId::default()).unwrap(); - let actual = match arguments.parse(&tokens) { + let tokens = crate::parsing::token::lex(test, ModuleId::default()).unwrap(); + let actual = match arguments.parse(tokens.as_slice()) { Ok(x) => x, Err(e) => panic!("Failed test {i}, could not parse function arguments from \"{test}\": {e:?}"), }; @@ -2564,10 +2561,11 @@ mod tests { #[test] fn weird_program_unclosed_paren() { - let tokens = crate::parsing::token::lexer("fn firstPrime(", ModuleId::default()).unwrap(); - let last = tokens.last().unwrap(); - let err: CompilationError = program.parse(&tokens).unwrap_err().into(); - assert_eq!(vec![err.source_range], last.as_source_ranges()); + let tokens = crate::parsing::token::lex("fn firstPrime(", ModuleId::default()).unwrap(); + let tokens = tokens.as_slice(); + let last = tokens.last().unwrap().as_source_range(); + let err: CompilationError = program.parse(tokens).unwrap_err().into(); + assert_eq!(err.source_range, last); // TODO: Better comment. This should explain the compiler expected ) because the user had started declaring the function's parameters. // Part of https://github.com/KittyCAD/modeling-app/issues/784 assert_eq!(err.message, "Unexpected end of file. The compiler expected )"); @@ -2575,8 +2573,8 @@ mod tests { #[test] fn weird_program_just_a_pipe() { - let tokens = crate::parsing::token::lexer("|", ModuleId::default()).unwrap(); - let err: CompilationError = program.parse(&tokens).unwrap_err().into(); + let tokens = crate::parsing::token::lex("|", ModuleId::default()).unwrap(); + let err: CompilationError = program.parse(tokens.as_slice()).unwrap_err().into(); assert_eq!(err.source_range, SourceRange::new(0, 1, ModuleId::default())); assert_eq!(err.message, "Unexpected token: |"); } @@ -2584,9 +2582,8 @@ mod tests { #[test] fn parse_binary_expressions() { for (i, test_program) in ["1 + 2 + 3"].into_iter().enumerate() { - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let mut slice = tokens.as_slice(); - let _actual = match binary_expression.parse_next(&mut slice) { + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let _actual = match binary_expression.parse_next(&mut tokens.as_slice()) { Ok(x) => x, Err(e) => panic!("Failed test {i}, could not parse binary expressions from \"{test_program}\": {e:?}"), }; @@ -2595,7 +2592,7 @@ mod tests { #[test] fn test_vardec_no_keyword() { - let tokens = crate::parsing::token::lexer("x = 4", ModuleId::default()).unwrap(); + let tokens = crate::parsing::token::lex("x = 4", ModuleId::default()).unwrap(); let vardec = declaration(&mut tokens.as_slice()).unwrap(); assert_eq!(vardec.inner.kind, VariableKind::Const); let vardec = &vardec.declaration; @@ -2608,7 +2605,7 @@ mod tests { #[test] fn test_negative_operands() { - let tokens = crate::parsing::token::lexer("-leg2", ModuleId::default()).unwrap(); + let tokens = crate::parsing::token::lex("-leg2", ModuleId::default()).unwrap(); let _s = operand.parse_next(&mut tokens.as_slice()).unwrap(); } @@ -2622,9 +2619,8 @@ mod tests { // comment 2 return 1 }"#; - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let mut slice = tokens.as_slice(); - let expr = function_decl.map(|t| t.0).parse_next(&mut slice).unwrap(); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let expr = function_decl.map(|t| t.0).parse_next(&mut tokens.as_slice()).unwrap(); assert_eq!(expr.params, vec![]); let comment_start = expr.body.non_code_meta.start_nodes.first().unwrap(); let comment0 = &expr.body.non_code_meta.non_code_nodes.get(&0).unwrap()[0]; @@ -2640,9 +2636,8 @@ mod tests { yo = { a = { b = { c = '123' } } } /* block comment */ }"#; - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let mut slice = tokens.as_slice(); - let expr = function_decl.map(|t| t.0).parse_next(&mut slice).unwrap(); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let expr = function_decl.map(|t| t.0).parse_next(&mut tokens.as_slice()).unwrap(); let comment0 = &expr.body.non_code_meta.non_code_nodes.get(&0).unwrap()[0]; assert_eq!(comment0.value(), "block\ncomment"); } @@ -2653,8 +2648,8 @@ comment */ /* comment at start */ mySk1 = startSketchAt([0, 0])"#; - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let program = program.parse(&tokens).unwrap(); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let program = program.parse(tokens.as_slice()).unwrap(); let mut starting_comments = program.inner.non_code_meta.start_nodes; assert_eq!(starting_comments.len(), 2); let start0 = starting_comments.remove(0); @@ -2671,8 +2666,8 @@ mySk1 = startSketchAt([0, 0])"#; #[test] fn test_comment_in_pipe() { - let tokens = crate::parsing::token::lexer(r#"x = y() |> /*hi*/ z(%)"#, ModuleId::default()).unwrap(); - let mut body = program.parse(&tokens).unwrap().inner.body; + let tokens = crate::parsing::token::lex(r#"x = y() |> /*hi*/ z(%)"#, ModuleId::default()).unwrap(); + let mut body = program.parse(tokens.as_slice()).unwrap().inner.body; let BodyItem::VariableDeclaration(item) = body.remove(0) else { panic!("expected vardec"); }; @@ -2698,9 +2693,8 @@ mySk1 = startSketchAt([0, 0])"#; return sg return sg }"#; - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let mut slice = tokens.as_slice(); - let _expr = function_decl.parse_next(&mut slice).unwrap(); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let _expr = function_decl.parse_next(&mut tokens.as_slice()).unwrap(); } #[test] @@ -2710,9 +2704,8 @@ mySk1 = startSketchAt([0, 0])"#; return 2 }"; let module_id = ModuleId::from_usize(1); - let tokens = crate::parsing::token::lexer(test_program, module_id).unwrap(); - let mut slice = tokens.as_slice(); - let expr = function_decl.map(|t| t.0).parse_next(&mut slice).unwrap(); + let tokens = crate::parsing::token::lex(test_program, module_id).unwrap(); + let expr = function_decl.map(|t| t.0).parse_next(&mut tokens.as_slice()).unwrap(); assert_eq!( expr, Node::new( @@ -2775,14 +2768,13 @@ mySk1 = startSketchAt([0, 0])"#; |> c(%) // inline-comment |> d(%)"#; - let tokens = crate::parsing::token::lexer(test_input, ModuleId::default()).unwrap(); - let mut slice = tokens.as_slice(); + let tokens = crate::parsing::token::lex(test_input, ModuleId::default()).unwrap(); let Node { inner: PipeExpression { body, non_code_meta, .. }, .. - } = pipe_expression.parse_next(&mut slice).unwrap(); + } = pipe_expression.parse_next(&mut tokens.as_slice()).unwrap(); assert_eq!(non_code_meta.non_code_nodes.len(), 1); assert_eq!( non_code_meta.non_code_nodes.get(&2).unwrap()[0].value, @@ -2806,8 +2798,8 @@ mySk1 = startSketchAt([0, 0])"#; "#; let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(test_program, module_id).unwrap(); - let Program { non_code_meta, .. } = function_body.parse(&tokens).unwrap().inner; + let tokens = crate::parsing::token::lex(test_program, module_id).unwrap(); + let Program { non_code_meta, .. } = function_body.parse(tokens.as_slice()).unwrap().inner; assert_eq!( vec![Node::new( NonCodeNode { @@ -2874,8 +2866,8 @@ mySk1 = startSketchAt([0, 0])"#; comment */ return 1"#; - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let actual = program.parse(&tokens).unwrap(); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let actual = program.parse(tokens.as_slice()).unwrap(); assert_eq!(actual.non_code_meta.non_code_nodes.len(), 1); assert_eq!( actual.non_code_meta.non_code_nodes.get(&0).unwrap()[0].value, @@ -2889,8 +2881,8 @@ mySk1 = startSketchAt([0, 0])"#; #[test] fn test_bracketed_binary_expression() { let input = "(2 - 3)"; - let tokens = crate::parsing::token::lexer(input, ModuleId::default()).unwrap(); - let actual = match binary_expr_in_parens.parse(&tokens) { + let tokens = crate::parsing::token::lex(input, ModuleId::default()).unwrap(); + let actual = match binary_expr_in_parens.parse(tokens.as_slice()) { Ok(x) => x, Err(e) => panic!("{e:?}"), }; @@ -2904,8 +2896,8 @@ mySk1 = startSketchAt([0, 0])"#; "6 / ( sigmaAllow * width )", "sqrt(distance * p * FOS * 6 / ( sigmaAllow * width ))", ] { - let tokens = crate::parsing::token::lexer(input, ModuleId::default()).unwrap(); - let _actual = match expression.parse(&tokens) { + let tokens = crate::parsing::token::lex(input, ModuleId::default()).unwrap(); + let _actual = match expression.parse(tokens.as_slice()) { Ok(x) => x, Err(e) => panic!("{e:?}"), }; @@ -2915,9 +2907,9 @@ mySk1 = startSketchAt([0, 0])"#; #[test] fn test_arithmetic() { let input = "1 * (2 - 3)"; - let tokens = crate::parsing::token::lexer(input, ModuleId::default()).unwrap(); + let tokens = crate::parsing::token::lex(input, ModuleId::default()).unwrap(); // The RHS should be a binary expression. - let actual = binary_expression.parse(&tokens).unwrap(); + let actual = binary_expression.parse(tokens.as_slice()).unwrap(); assert_eq!(actual.operator, BinaryOperator::Mul); let BinaryPart::BinaryExpression(rhs) = actual.inner.right else { panic!("Expected RHS to be another binary expression"); @@ -2943,8 +2935,8 @@ mySk1 = startSketchAt([0, 0])"#; .into_iter() .enumerate() { - let tokens = crate::parsing::token::lexer(test_input, ModuleId::default()).unwrap(); - let actual = match declaration.parse(&tokens) { + let tokens = crate::parsing::token::lex(test_input, ModuleId::default()).unwrap(); + let actual = match declaration.parse(tokens.as_slice()) { Err(e) => panic!("Could not parse test {i}: {e:#?}"), Ok(a) => a, }; @@ -2961,8 +2953,8 @@ mySk1 = startSketchAt([0, 0])"#; #[test] fn test_function_call() { for (i, test_input) in ["x = f(1)", "x = f( 1 )"].into_iter().enumerate() { - let tokens = crate::parsing::token::lexer(test_input, ModuleId::default()).unwrap(); - let _actual = match declaration.parse(&tokens) { + let tokens = crate::parsing::token::lex(test_input, ModuleId::default()).unwrap(); + let _actual = match declaration.parse(tokens.as_slice()) { Err(e) => panic!("Could not parse test {i}: {e:#?}"), Ok(a) => a, }; @@ -2972,9 +2964,9 @@ mySk1 = startSketchAt([0, 0])"#; #[test] fn test_nested_arithmetic() { let input = "1 * ((2 - 3) / 4)"; - let tokens = crate::parsing::token::lexer(input, ModuleId::default()).unwrap(); + let tokens = crate::parsing::token::lex(input, ModuleId::default()).unwrap(); // The RHS should be a binary expression. - let outer = binary_expression.parse(&tokens).unwrap(); + let outer = binary_expression.parse(tokens.as_slice()).unwrap(); assert_eq!(outer.operator, BinaryOperator::Mul); let BinaryPart::BinaryExpression(middle) = outer.inner.right else { panic!("Expected RHS to be another binary expression"); @@ -2991,8 +2983,8 @@ mySk1 = startSketchAt([0, 0])"#; fn binary_expression_ignores_whitespace() { let tests = ["1 - 2", "1- 2", "1 -2", "1-2"]; for test in tests { - let tokens = crate::parsing::token::lexer(test, ModuleId::default()).unwrap(); - let actual = binary_expression.parse(&tokens).unwrap(); + let tokens = crate::parsing::token::lex(test, ModuleId::default()).unwrap(); + let actual = binary_expression.parse(tokens.as_slice()).unwrap(); assert_eq!(actual.operator, BinaryOperator::Sub); let BinaryPart::Literal(left) = actual.inner.left else { panic!("should be expression"); @@ -3012,8 +3004,8 @@ mySk1 = startSketchAt([0, 0])"#; a comment spanning a few lines */ |> z(%)"#; - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let actual = pipe_expression.parse(&tokens).unwrap(); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let actual = pipe_expression.parse(tokens.as_slice()).unwrap(); let n = actual.non_code_meta.non_code_nodes.len(); assert_eq!(n, 1, "expected one comment in pipe expression but found {n}"); let nc = &actual.non_code_meta.non_code_nodes.get(&1).unwrap()[0]; @@ -3040,8 +3032,8 @@ mySk1 = startSketchAt([0, 0])"#; .into_iter() .enumerate() { - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let actual = pipe_expression.parse(&tokens); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let actual = pipe_expression.parse(tokens.as_slice()); assert!(actual.is_ok(), "could not parse test {i}, '{test_program}'"); let actual = actual.unwrap(); let n = actual.non_code_meta.non_code_nodes.len(); @@ -3185,8 +3177,8 @@ mySk1 = startSketchAt([0, 0])"#; .into_iter() .enumerate() { - let tokens = crate::parsing::token::lexer(test_program, module_id).unwrap(); - let actual = non_code_node.parse(&tokens); + let tokens = crate::parsing::token::lex(test_program, module_id).unwrap(); + let actual = non_code_node.parse(tokens.as_slice()); assert!(actual.is_ok(), "could not parse test {i}: {actual:#?}"); let actual = actual.unwrap(); assert_eq!(actual, expected, "failed test {i}"); @@ -3197,8 +3189,8 @@ mySk1 = startSketchAt([0, 0])"#; fn recognize_invalid_params() { let test_fn = "(let) => { return 1 }"; let module_id = ModuleId::from_usize(2); - let tokens = crate::parsing::token::lexer(test_fn, module_id).unwrap(); - let err = function_decl.parse(&tokens).unwrap_err().into_inner(); + let tokens = crate::parsing::token::lex(test_fn, module_id).unwrap(); + let err = function_decl.parse(tokens.as_slice()).unwrap_err().into_inner(); let cause = err.cause.unwrap(); // This is the token `let` assert_eq!(cause.source_range, SourceRange::new(1, 4, ModuleId::from_usize(2))); @@ -3210,8 +3202,8 @@ mySk1 = startSketchAt([0, 0])"#; let string_literal = r#"" // a comment ""#; - let tokens = crate::parsing::token::lexer(string_literal, ModuleId::default()).unwrap(); - let parsed_literal = literal.parse(&tokens).unwrap(); + let tokens = crate::parsing::token::lex(string_literal, ModuleId::default()).unwrap(); + let parsed_literal = literal.parse(tokens.as_slice()).unwrap(); assert_eq!( parsed_literal.value, " @@ -3227,24 +3219,24 @@ mySk1 = startSketchAt([0, 0])"#; |> lineTo([0, -0], %) // MoveRelative "#; - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let mut slice = &tokens[..]; - let _actual = pipe_expression.parse_next(&mut slice).unwrap(); - assert_eq!(slice[0].token_type, TokenType::Whitespace); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let tokens = &mut tokens.as_slice(); + let _actual = pipe_expression.parse_next(tokens).unwrap(); + assert_eq!(tokens.first().unwrap().token_type, TokenType::Whitespace); } #[test] fn test_pipes_on_pipes() { let test_program = include_str!("../../../tests/executor/inputs/pipes_on_pipes.kcl"); - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - let _ = run_parser(&mut &*tokens).unwrap(); + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + let _ = run_parser(tokens.as_slice()).unwrap(); } #[test] fn test_cube() { let test_program = include_str!("../../../tests/executor/inputs/cube.kcl"); - let tokens = crate::parsing::token::lexer(test_program, ModuleId::default()).unwrap(); - match program.parse(&tokens) { + let tokens = crate::parsing::token::lex(test_program, ModuleId::default()).unwrap(); + match program.parse(tokens.as_slice()) { Ok(_) => {} Err(e) => { panic!("{e:#?}"); @@ -3261,8 +3253,8 @@ mySk1 = startSketchAt([0, 0])"#; ("a,b", vec!["a", "b"]), ]; for (i, (input, expected)) in tests.into_iter().enumerate() { - let tokens = crate::parsing::token::lexer(input, ModuleId::default()).unwrap(); - let actual = parameters.parse(&tokens); + let tokens = crate::parsing::token::lex(input, ModuleId::default()).unwrap(); + let actual = parameters.parse(tokens.as_slice()); assert!(actual.is_ok(), "could not parse test {i}"); let actual_ids: Vec<_> = actual.unwrap().into_iter().map(|p| p.identifier.inner.name).collect(); assert_eq!(actual_ids, expected); @@ -3275,8 +3267,8 @@ mySk1 = startSketchAt([0, 0])"#; return 2 }"; - let tokens = crate::parsing::token::lexer(input, ModuleId::default()).unwrap(); - let actual = function_decl.parse(&tokens); + let tokens = crate::parsing::token::lex(input, ModuleId::default()).unwrap(); + let actual = function_decl.parse(tokens.as_slice()); assert!(actual.is_ok(), "could not parse test function"); } @@ -3285,7 +3277,7 @@ mySk1 = startSketchAt([0, 0])"#; let tests = ["myVar = 5", "myVar=5", "myVar =5", "myVar= 5"]; for test in tests { // Run the original parser - let tokens = crate::parsing::token::lexer(test, ModuleId::default()).unwrap(); + let tokens = crate::parsing::token::lex(test, ModuleId::default()).unwrap(); let mut expected_body = crate::parsing::parse_tokens(tokens.clone()).unwrap().inner.body; assert_eq!(expected_body.len(), 1); let BodyItem::VariableDeclaration(expected) = expected_body.pop().unwrap() else { @@ -3293,7 +3285,7 @@ mySk1 = startSketchAt([0, 0])"#; }; // Run the second parser, check it matches the first parser. - let actual = declaration.parse(&tokens).unwrap(); + let actual = declaration.parse(tokens.as_slice()).unwrap(); assert_eq!(expected, actual); // Inspect its output in more detail. @@ -3354,96 +3346,6 @@ mySk1 = startSketchAt([0, 0])"#; assert_eq!(expected, actual); } - #[test] - fn test_is_code_token() { - let module_id = ModuleId::default(); - let tokens = [ - Token { - token_type: TokenType::Word, - start: 0, - end: 3, - module_id, - value: "log".to_string(), - }, - Token { - token_type: TokenType::Brace, - start: 3, - end: 4, - module_id, - value: "(".to_string(), - }, - Token { - token_type: TokenType::Number, - start: 4, - end: 5, - module_id, - value: "5".to_string(), - }, - Token { - token_type: TokenType::Comma, - start: 5, - end: 6, - module_id, - value: ",".to_string(), - }, - Token { - token_type: TokenType::String, - start: 7, - end: 14, - module_id, - value: "\"hello\"".to_string(), - }, - Token { - token_type: TokenType::Word, - start: 16, - end: 27, - module_id, - value: "aIdentifier".to_string(), - }, - Token { - token_type: TokenType::Brace, - start: 27, - end: 28, - module_id, - value: ")".to_string(), - }, - ]; - for (i, token) in tokens.iter().enumerate() { - assert!(token.is_code_token(), "failed test {i}: {token:?}") - } - } - - #[test] - fn test_is_not_code_token() { - let module_id = ModuleId::default(); - let tokens = [ - Token { - token_type: TokenType::Whitespace, - start: 6, - end: 7, - module_id, - value: " ".to_string(), - }, - Token { - token_type: TokenType::BlockComment, - start: 28, - end: 30, - module_id, - value: "/* abte */".to_string(), - }, - Token { - token_type: TokenType::LineComment, - start: 30, - end: 33, - module_id, - value: "// yoyo a line".to_string(), - }, - ]; - for (i, token) in tokens.iter().enumerate() { - assert!(!token.is_code_token(), "failed test {i}: {token:?}") - } - } - #[test] fn test_abstract_syntax_tree() { let code = "5 +6"; @@ -3927,9 +3829,8 @@ e fn array() { let program = r#"[1, 2, 3]"#; let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(program, module_id).unwrap(); - let mut sl: &[Token] = &tokens; - let _arr = array_elem_by_elem(&mut sl).unwrap(); + let tokens = crate::parsing::token::lex(program, module_id).unwrap(); + let _arr = array_elem_by_elem(&mut tokens.as_slice()).unwrap(); } #[test] @@ -3940,9 +3841,8 @@ e 3, ]"#; let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(program, module_id).unwrap(); - let mut sl: &[Token] = &tokens; - let _arr = array_elem_by_elem(&mut sl).unwrap(); + let tokens = crate::parsing::token::lex(program, module_id).unwrap(); + let _arr = array_elem_by_elem(&mut tokens.as_slice()).unwrap(); } #[allow(unused)] @@ -3954,9 +3854,8 @@ e 3 ]"#; let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(program, module_id).unwrap(); - let mut sl: &[Token] = &tokens; - let _arr = array_elem_by_elem(&mut sl).unwrap(); + let tokens = crate::parsing::token::lex(program, module_id).unwrap(); + let _arr = array_elem_by_elem(&mut tokens.as_slice()).unwrap(); } #[test] @@ -3967,9 +3866,8 @@ e 4 }"; let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(some_program_string, module_id).unwrap(); - let mut sl: &[Token] = &tokens; - let _res = if_expr(&mut sl).unwrap(); + let tokens = crate::parsing::token::lex(some_program_string, module_id).unwrap(); + let _res = if_expr(&mut tokens.as_slice()).unwrap(); } #[test] @@ -3978,9 +3876,8 @@ e 4 }"; let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(some_program_string, module_id).unwrap(); - let mut sl: &[Token] = &tokens; - let _res = else_if(&mut sl).unwrap(); + let tokens = crate::parsing::token::lex(some_program_string, module_id).unwrap(); + let _res = else_if(&mut tokens.as_slice()).unwrap(); } #[test] @@ -3993,9 +3890,8 @@ e 5 }"; let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(some_program_string, module_id).unwrap(); - let mut sl: &[Token] = &tokens; - let _res = if_expr(&mut sl).unwrap(); + let tokens = crate::parsing::token::lex(some_program_string, module_id).unwrap(); + let _res = if_expr(&mut tokens.as_slice()).unwrap(); } #[test] @@ -4113,18 +4009,16 @@ let myBox = box([0,0], -3, -16, -10) fn arg_labels() { let input = r#"length: 3"#; let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(input, module_id).unwrap(); - let mut sl: &[Token] = &tokens; - super::labeled_arguments(&mut sl).unwrap(); + let tokens = crate::parsing::token::lex(input, module_id).unwrap(); + super::labeled_arguments(&mut tokens.as_slice()).unwrap(); } #[test] fn kw_fn() { for input in ["val = foo(x, y: z)", "val = foo(y: z)"] { let module_id = ModuleId::default(); - let tokens = crate::parsing::token::lexer(input, module_id).unwrap(); - let sl = &tokens; - super::program.parse(sl).unwrap(); + let tokens = crate::parsing::token::lex(input, module_id).unwrap(); + super::program.parse(tokens.as_slice()).unwrap(); } } @@ -4317,10 +4211,10 @@ mod snapshot_math_tests { #[test] fn $func_name() { let module_id = crate::ModuleId::default(); - let tokens = crate::parsing::token::lexer($test_kcl_program, module_id).unwrap(); + let tokens = crate::parsing::token::lex($test_kcl_program, module_id).unwrap(); ParseContext::init(); - let actual = match binary_expression.parse(&tokens) { + let actual = match binary_expression.parse(tokens.as_slice()) { Ok(x) => x, Err(_e) => panic!("could not parse test"), }; @@ -4355,10 +4249,10 @@ mod snapshot_tests { #[test] fn $func_name() { let module_id = crate::ModuleId::default(); - let tokens = crate::parsing::token::lexer($test_kcl_program, module_id).unwrap(); - print_tokens(&tokens); + let tokens = crate::parsing::token::lex($test_kcl_program, module_id).unwrap(); + print_tokens(tokens.as_slice()); ParseContext::init(); - let actual = match program.parse(&tokens) { + let actual = match program.parse(tokens.as_slice()) { Ok(x) => x, Err(e) => panic!("could not parse test: {e:?}"), }; @@ -4569,7 +4463,7 @@ my14 = 4 ^ 2 - 3 ^ 2 * 2 #[allow(unused)] #[cfg(test)] -pub(crate) fn print_tokens(tokens: &[Token]) { +pub(crate) fn print_tokens(tokens: TokenSlice) { for (i, tok) in tokens.iter().enumerate() { println!("{i:.2}: ({:?}):) '{}'", tok.token_type, tok.value.replace("\n", "\\n")); } diff --git a/src/wasm-lib/kcl/src/parsing/token/mod.rs b/src/wasm-lib/kcl/src/parsing/token/mod.rs index 0f71000c9..316c45218 100644 --- a/src/wasm-lib/kcl/src/parsing/token/mod.rs +++ b/src/wasm-lib/kcl/src/parsing/token/mod.rs @@ -1,28 +1,221 @@ -use std::str::FromStr; +// Clippy does not agree with rustc here for some reason. +#![allow(clippy::needless_lifetimes)] + +use std::{fmt, iter::Enumerate, num::NonZeroUsize}; use anyhow::Result; -use parse_display::{Display, FromStr}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; +use parse_display::Display; use tower_lsp::lsp_types::SemanticTokenType; -use winnow::{error::ParseError, stream::ContainsToken}; +use winnow::{ + self, + error::ParseError, + stream::{ContainsToken, Stream}, +}; use crate::{ errors::KclError, parsing::ast::types::{ItemVisibility, VariableKind}, source_range::{ModuleId, SourceRange}, }; +use tokeniser::Input; mod tokeniser; -// Re-export -pub use tokeniser::Input; #[cfg(test)] pub(crate) use tokeniser::RESERVED_WORDS; +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct TokenStream { + tokens: Vec, +} + +impl TokenStream { + fn new(tokens: Vec) -> Self { + Self { tokens } + } + + pub(super) fn remove_unknown(&mut self) -> Vec { + let tokens = std::mem::take(&mut self.tokens); + let (tokens, unknown_tokens): (Vec, Vec) = tokens + .into_iter() + .partition(|token| token.token_type != TokenType::Unknown); + self.tokens = tokens; + unknown_tokens + } + + pub fn iter(&self) -> impl Iterator { + self.tokens.iter() + } + + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } + + pub fn as_slice(&self) -> TokenSlice { + TokenSlice::from(self) + } +} + +impl<'a> From<&'a TokenStream> for TokenSlice<'a> { + fn from(stream: &'a TokenStream) -> Self { + TokenSlice { + start: 0, + end: stream.tokens.len(), + stream, + } + } +} + +impl IntoIterator for TokenStream { + type Item = Token; + + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.tokens.into_iter() + } +} + +#[derive(Debug, Clone)] +pub(crate) struct TokenSlice<'a> { + stream: &'a TokenStream, + start: usize, + end: usize, +} + +impl<'a> std::ops::Deref for TokenSlice<'a> { + type Target = [Token]; + + fn deref(&self) -> &Self::Target { + &self.stream.tokens[self.start..self.end] + } +} + +impl<'a> TokenSlice<'a> { + pub fn token(&self, i: usize) -> &Token { + &self.stream.tokens[i + self.start] + } + + pub fn iter(&self) -> impl Iterator { + (**self).iter() + } + + pub fn without_ends(&self) -> Self { + Self { + start: self.start + 1, + end: self.end - 1, + stream: self.stream, + } + } +} + +impl<'a> IntoIterator for TokenSlice<'a> { + type Item = &'a Token; + + type IntoIter = std::slice::Iter<'a, Token>; + + fn into_iter(self) -> Self::IntoIter { + self.stream.tokens[self.start..self.end].iter() + } +} + +impl<'a> Stream for TokenSlice<'a> { + type Token = Token; + type Slice = Self; + type IterOffsets = Enumerate>; + type Checkpoint = Checkpoint; + + fn iter_offsets(&self) -> Self::IterOffsets { + #[allow(clippy::unnecessary_to_owned)] + self.to_vec().into_iter().enumerate() + } + + fn eof_offset(&self) -> usize { + self.len() + } + + fn next_token(&mut self) -> Option { + let token = self.first()?.clone(); + self.start += 1; + Some(token) + } + + fn offset_for

(&self, predicate: P) -> Option + where + P: Fn(Self::Token) -> bool, + { + self.iter().position(|b| predicate(b.clone())) + } + + fn offset_at(&self, tokens: usize) -> Result { + if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) { + Err(winnow::error::Needed::Size(needed)) + } else { + Ok(tokens) + } + } + + fn next_slice(&mut self, offset: usize) -> Self::Slice { + assert!(self.start + offset <= self.end); + + let next = TokenSlice { + stream: self.stream, + start: self.start, + end: self.start + offset, + }; + self.start += offset; + next + } + + fn checkpoint(&self) -> Self::Checkpoint { + Checkpoint(self.start, self.end) + } + + fn reset(&mut self, checkpoint: &Self::Checkpoint) { + self.start = checkpoint.0; + self.end = checkpoint.1; + } + + fn raw(&self) -> &dyn fmt::Debug { + self + } +} + +impl<'a> winnow::stream::Offset for TokenSlice<'a> { + fn offset_from(&self, start: &Self) -> usize { + self.start - start.start + } +} + +impl<'a> winnow::stream::Offset for TokenSlice<'a> { + fn offset_from(&self, start: &Checkpoint) -> usize { + self.start - start.0 + } +} + +impl winnow::stream::Offset for Checkpoint { + fn offset_from(&self, start: &Self) -> usize { + self.0 - start.0 + } +} + +impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> { + type PartialState = (); + + fn complete(&mut self) -> Self::PartialState {} + + fn restore_partial(&mut self, _: Self::PartialState) {} + + fn is_partial_supported() -> bool { + false + } +} + +#[derive(Clone, Debug)] +pub struct Checkpoint(usize, usize); + /// The types of tokens. -#[derive(Debug, PartialEq, Eq, Copy, Clone, Deserialize, Serialize, JsonSchema, FromStr, Display)] -#[serde(rename_all = "camelCase")] +#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)] #[display(style = "camelCase")] pub enum TokenType { /// A number. @@ -73,6 +266,8 @@ pub enum TokenType { impl TryFrom for SemanticTokenType { type Error = anyhow::Error; fn try_from(token_type: TokenType) -> Result { + // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES` + // in the LSP implementation. Ok(match token_type { TokenType::Number => Self::NUMBER, TokenType::Word => Self::VARIABLE, @@ -102,52 +297,6 @@ impl TryFrom for SemanticTokenType { } impl TokenType { - // This is for the lsp server. - // Don't call this function directly in the code use a lazy_static instead - // like we do in the lsp server. - pub fn all_semantic_token_types() -> Result> { - let mut settings = schemars::gen::SchemaSettings::openapi3(); - settings.inline_subschemas = true; - let mut generator = schemars::gen::SchemaGenerator::new(settings); - - let schema = TokenType::json_schema(&mut generator); - let schemars::schema::Schema::Object(o) = &schema else { - anyhow::bail!("expected object schema: {:#?}", schema); - }; - let Some(subschemas) = &o.subschemas else { - anyhow::bail!("expected subschemas: {:#?}", schema); - }; - let Some(one_ofs) = &subschemas.one_of else { - anyhow::bail!("expected one_of: {:#?}", schema); - }; - - let mut semantic_tokens = vec![]; - for one_of in one_ofs { - let schemars::schema::Schema::Object(o) = one_of else { - anyhow::bail!("expected object one_of: {:#?}", one_of); - }; - - let Some(enum_values) = o.enum_values.as_ref() else { - anyhow::bail!("expected enum values: {:#?}", o); - }; - - if enum_values.len() > 1 { - anyhow::bail!("expected only one enum value: {:#?}", o); - } - - if enum_values.is_empty() { - anyhow::bail!("expected at least one enum value: {:#?}", o); - } - - let label = TokenType::from_str(&enum_values[0].to_string().replace('"', ""))?; - if let Ok(semantic_token_type) = SemanticTokenType::try_from(label) { - semantic_tokens.push(semantic_token_type); - } - } - - Ok(semantic_tokens) - } - pub fn is_whitespace(&self) -> bool { matches!(self, Self::Whitespace) } @@ -157,17 +306,15 @@ impl TokenType { } } -#[derive(Debug, PartialEq, Eq, Deserialize, Serialize, Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub struct Token { - #[serde(rename = "type")] pub token_type: TokenType, /// Offset in the source code where this token begins. pub start: usize, /// Offset in the source code where this token ends. pub end: usize, - #[serde(default, skip_serializing_if = "ModuleId::is_top_level")] - pub module_id: ModuleId, - pub value: String, + pub(super) module_id: ModuleId, + pub(super) value: String, } impl ContainsToken for (TokenType, &str) { @@ -249,7 +396,7 @@ impl From<&Token> for SourceRange { } } -pub fn lexer(s: &str, module_id: ModuleId) -> Result, KclError> { +pub fn lex(s: &str, module_id: ModuleId) -> Result { tokeniser::lex(s, module_id).map_err(From::from) } @@ -281,15 +428,3 @@ impl From, winnow::error::ContextError>> for KclError { }) } } - -#[cfg(test)] -mod tests { - use super::*; - - // We have this as a test so we can ensure it never panics with an unwrap in the server. - #[test] - fn test_token_type_to_semantic_token_type() { - let semantic_types = TokenType::all_semantic_token_types().unwrap(); - assert!(!semantic_types.is_empty()); - } -} diff --git a/src/wasm-lib/kcl/src/parsing/token/snapshots/kcl_lib__parsing__token__tokeniser__tests__program2.snap b/src/wasm-lib/kcl/src/parsing/token/snapshots/kcl_lib__parsing__token__tokeniser__tests__program2.snap new file mode 100644 index 000000000..4518b52c2 --- /dev/null +++ b/src/wasm-lib/kcl/src/parsing/token/snapshots/kcl_lib__parsing__token__tokeniser__tests__program2.snap @@ -0,0 +1,1014 @@ +--- +source: kcl/src/parsing/token/tokeniser.rs +expression: actual.tokens +--- +[ + Token { + token_type: Keyword, + start: 0, + end: 5, + module_id: ModuleId( + 1, + ), + value: "const", + }, + Token { + token_type: Whitespace, + start: 5, + end: 6, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 6, + end: 13, + module_id: ModuleId( + 1, + ), + value: "part001", + }, + Token { + token_type: Whitespace, + start: 13, + end: 14, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 14, + end: 15, + module_id: ModuleId( + 1, + ), + value: "=", + }, + Token { + token_type: Whitespace, + start: 15, + end: 16, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 16, + end: 29, + module_id: ModuleId( + 1, + ), + value: "startSketchAt", + }, + Token { + token_type: Brace, + start: 29, + end: 30, + module_id: ModuleId( + 1, + ), + value: "(", + }, + Token { + token_type: Brace, + start: 30, + end: 31, + module_id: ModuleId( + 1, + ), + value: "[", + }, + Token { + token_type: Number, + start: 31, + end: 43, + module_id: ModuleId( + 1, + ), + value: "0.0000000000", + }, + Token { + token_type: Comma, + start: 43, + end: 44, + module_id: ModuleId( + 1, + ), + value: ",", + }, + Token { + token_type: Whitespace, + start: 44, + end: 45, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Number, + start: 45, + end: 57, + module_id: ModuleId( + 1, + ), + value: "5.0000000000", + }, + Token { + token_type: Brace, + start: 57, + end: 58, + module_id: ModuleId( + 1, + ), + value: "]", + }, + Token { + token_type: Brace, + start: 58, + end: 59, + module_id: ModuleId( + 1, + ), + value: ")", + }, + Token { + token_type: Whitespace, + start: 59, + end: 64, + module_id: ModuleId( + 1, + ), + value: "\n ", + }, + Token { + token_type: Operator, + start: 64, + end: 66, + module_id: ModuleId( + 1, + ), + value: "|>", + }, + Token { + token_type: Whitespace, + start: 66, + end: 67, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 67, + end: 71, + module_id: ModuleId( + 1, + ), + value: "line", + }, + Token { + token_type: Brace, + start: 71, + end: 72, + module_id: ModuleId( + 1, + ), + value: "(", + }, + Token { + token_type: Brace, + start: 72, + end: 73, + module_id: ModuleId( + 1, + ), + value: "[", + }, + Token { + token_type: Number, + start: 73, + end: 85, + module_id: ModuleId( + 1, + ), + value: "0.4900857016", + }, + Token { + token_type: Comma, + start: 85, + end: 86, + module_id: ModuleId( + 1, + ), + value: ",", + }, + Token { + token_type: Whitespace, + start: 86, + end: 87, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 87, + end: 88, + module_id: ModuleId( + 1, + ), + value: "-", + }, + Token { + token_type: Number, + start: 88, + end: 100, + module_id: ModuleId( + 1, + ), + value: "0.0240763666", + }, + Token { + token_type: Brace, + start: 100, + end: 101, + module_id: ModuleId( + 1, + ), + value: "]", + }, + Token { + token_type: Comma, + start: 101, + end: 102, + module_id: ModuleId( + 1, + ), + value: ",", + }, + Token { + token_type: Whitespace, + start: 102, + end: 103, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 103, + end: 104, + module_id: ModuleId( + 1, + ), + value: "%", + }, + Token { + token_type: Brace, + start: 104, + end: 105, + module_id: ModuleId( + 1, + ), + value: ")", + }, + Token { + token_type: Whitespace, + start: 105, + end: 107, + module_id: ModuleId( + 1, + ), + value: "\n\n", + }, + Token { + token_type: Keyword, + start: 107, + end: 112, + module_id: ModuleId( + 1, + ), + value: "const", + }, + Token { + token_type: Whitespace, + start: 112, + end: 113, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 113, + end: 120, + module_id: ModuleId( + 1, + ), + value: "part002", + }, + Token { + token_type: Whitespace, + start: 120, + end: 121, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 121, + end: 122, + module_id: ModuleId( + 1, + ), + value: "=", + }, + Token { + token_type: Whitespace, + start: 122, + end: 123, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: String, + start: 123, + end: 132, + module_id: ModuleId( + 1, + ), + value: "\"part002\"", + }, + Token { + token_type: Whitespace, + start: 132, + end: 133, + module_id: ModuleId( + 1, + ), + value: "\n", + }, + Token { + token_type: Keyword, + start: 133, + end: 138, + module_id: ModuleId( + 1, + ), + value: "const", + }, + Token { + token_type: Whitespace, + start: 138, + end: 139, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 139, + end: 145, + module_id: ModuleId( + 1, + ), + value: "things", + }, + Token { + token_type: Whitespace, + start: 145, + end: 146, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 146, + end: 147, + module_id: ModuleId( + 1, + ), + value: "=", + }, + Token { + token_type: Whitespace, + start: 147, + end: 148, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Brace, + start: 148, + end: 149, + module_id: ModuleId( + 1, + ), + value: "[", + }, + Token { + token_type: Word, + start: 149, + end: 156, + module_id: ModuleId( + 1, + ), + value: "part001", + }, + Token { + token_type: Comma, + start: 156, + end: 157, + module_id: ModuleId( + 1, + ), + value: ",", + }, + Token { + token_type: Whitespace, + start: 157, + end: 158, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Number, + start: 158, + end: 161, + module_id: ModuleId( + 1, + ), + value: "0.0", + }, + Token { + token_type: Brace, + start: 161, + end: 162, + module_id: ModuleId( + 1, + ), + value: "]", + }, + Token { + token_type: Whitespace, + start: 162, + end: 163, + module_id: ModuleId( + 1, + ), + value: "\n", + }, + Token { + token_type: Keyword, + start: 163, + end: 166, + module_id: ModuleId( + 1, + ), + value: "let", + }, + Token { + token_type: Whitespace, + start: 166, + end: 167, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 167, + end: 171, + module_id: ModuleId( + 1, + ), + value: "blah", + }, + Token { + token_type: Whitespace, + start: 171, + end: 172, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 172, + end: 173, + module_id: ModuleId( + 1, + ), + value: "=", + }, + Token { + token_type: Whitespace, + start: 173, + end: 174, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Number, + start: 174, + end: 175, + module_id: ModuleId( + 1, + ), + value: "1", + }, + Token { + token_type: Whitespace, + start: 175, + end: 176, + module_id: ModuleId( + 1, + ), + value: "\n", + }, + Token { + token_type: Keyword, + start: 176, + end: 181, + module_id: ModuleId( + 1, + ), + value: "const", + }, + Token { + token_type: Whitespace, + start: 181, + end: 182, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 182, + end: 185, + module_id: ModuleId( + 1, + ), + value: "foo", + }, + Token { + token_type: Whitespace, + start: 185, + end: 186, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 186, + end: 187, + module_id: ModuleId( + 1, + ), + value: "=", + }, + Token { + token_type: Whitespace, + start: 187, + end: 188, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Keyword, + start: 188, + end: 193, + module_id: ModuleId( + 1, + ), + value: "false", + }, + Token { + token_type: Whitespace, + start: 193, + end: 194, + module_id: ModuleId( + 1, + ), + value: "\n", + }, + Token { + token_type: Keyword, + start: 194, + end: 197, + module_id: ModuleId( + 1, + ), + value: "let", + }, + Token { + token_type: Whitespace, + start: 197, + end: 198, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 198, + end: 201, + module_id: ModuleId( + 1, + ), + value: "baz", + }, + Token { + token_type: Whitespace, + start: 201, + end: 202, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 202, + end: 203, + module_id: ModuleId( + 1, + ), + value: "=", + }, + Token { + token_type: Whitespace, + start: 203, + end: 204, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Brace, + start: 204, + end: 205, + module_id: ModuleId( + 1, + ), + value: "{", + }, + Token { + token_type: Word, + start: 205, + end: 206, + module_id: ModuleId( + 1, + ), + value: "a", + }, + Token { + token_type: Colon, + start: 206, + end: 207, + module_id: ModuleId( + 1, + ), + value: ":", + }, + Token { + token_type: Whitespace, + start: 207, + end: 208, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Number, + start: 208, + end: 209, + module_id: ModuleId( + 1, + ), + value: "1", + }, + Token { + token_type: Comma, + start: 209, + end: 210, + module_id: ModuleId( + 1, + ), + value: ",", + }, + Token { + token_type: Whitespace, + start: 210, + end: 211, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 211, + end: 218, + module_id: ModuleId( + 1, + ), + value: "part001", + }, + Token { + token_type: Colon, + start: 218, + end: 219, + module_id: ModuleId( + 1, + ), + value: ":", + }, + Token { + token_type: Whitespace, + start: 219, + end: 220, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: String, + start: 220, + end: 227, + module_id: ModuleId( + 1, + ), + value: "\"thing\"", + }, + Token { + token_type: Brace, + start: 227, + end: 228, + module_id: ModuleId( + 1, + ), + value: "}", + }, + Token { + token_type: Whitespace, + start: 228, + end: 230, + module_id: ModuleId( + 1, + ), + value: "\n\n", + }, + Token { + token_type: Keyword, + start: 230, + end: 232, + module_id: ModuleId( + 1, + ), + value: "fn", + }, + Token { + token_type: Whitespace, + start: 232, + end: 233, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 233, + end: 236, + module_id: ModuleId( + 1, + ), + value: "ghi", + }, + Token { + token_type: Whitespace, + start: 236, + end: 237, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 237, + end: 238, + module_id: ModuleId( + 1, + ), + value: "=", + }, + Token { + token_type: Whitespace, + start: 238, + end: 239, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Brace, + start: 239, + end: 240, + module_id: ModuleId( + 1, + ), + value: "(", + }, + Token { + token_type: Word, + start: 240, + end: 247, + module_id: ModuleId( + 1, + ), + value: "part001", + }, + Token { + token_type: Brace, + start: 247, + end: 248, + module_id: ModuleId( + 1, + ), + value: ")", + }, + Token { + token_type: Whitespace, + start: 248, + end: 249, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Operator, + start: 249, + end: 251, + module_id: ModuleId( + 1, + ), + value: "=>", + }, + Token { + token_type: Whitespace, + start: 251, + end: 252, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Brace, + start: 252, + end: 253, + module_id: ModuleId( + 1, + ), + value: "{", + }, + Token { + token_type: Whitespace, + start: 253, + end: 256, + module_id: ModuleId( + 1, + ), + value: "\n ", + }, + Token { + token_type: Keyword, + start: 256, + end: 262, + module_id: ModuleId( + 1, + ), + value: "return", + }, + Token { + token_type: Whitespace, + start: 262, + end: 263, + module_id: ModuleId( + 1, + ), + value: " ", + }, + Token { + token_type: Word, + start: 263, + end: 270, + module_id: ModuleId( + 1, + ), + value: "part001", + }, + Token { + token_type: Whitespace, + start: 270, + end: 271, + module_id: ModuleId( + 1, + ), + value: "\n", + }, + Token { + token_type: Brace, + start: 271, + end: 272, + module_id: ModuleId( + 1, + ), + value: "}", + }, + Token { + token_type: Whitespace, + start: 272, + end: 274, + module_id: ModuleId( + 1, + ), + value: "\n\n", + }, + Token { + token_type: Word, + start: 274, + end: 278, + module_id: ModuleId( + 1, + ), + value: "show", + }, + Token { + token_type: Brace, + start: 278, + end: 279, + module_id: ModuleId( + 1, + ), + value: "(", + }, + Token { + token_type: Word, + start: 279, + end: 286, + module_id: ModuleId( + 1, + ), + value: "part001", + }, + Token { + token_type: Brace, + start: 286, + end: 287, + module_id: ModuleId( + 1, + ), + value: ")", + }, +] diff --git a/src/wasm-lib/kcl/src/parsing/token/tokeniser.rs b/src/wasm-lib/kcl/src/parsing/token/tokeniser.rs index e6fbb4ad5..ae33243f5 100644 --- a/src/wasm-lib/kcl/src/parsing/token/tokeniser.rs +++ b/src/wasm-lib/kcl/src/parsing/token/tokeniser.rs @@ -15,6 +15,8 @@ use crate::{ source_range::ModuleId, }; +use super::TokenStream; + lazy_static! { pub(crate) static ref RESERVED_WORDS: FnvHashMap<&'static str, TokenType> = { let mut set = FnvHashMap::default(); @@ -62,19 +64,19 @@ lazy_static! { }; } -pub fn lex(i: &str, module_id: ModuleId) -> Result, ParseError, ContextError>> { +pub(super) fn lex(i: &str, module_id: ModuleId) -> Result, ContextError>> { let state = State::new(module_id); let input = Input { input: Located::new(i), state, }; - repeat(0.., token).parse(input) + Ok(TokenStream::new(repeat(0.., token).parse(input)?)) } -pub type Input<'a> = Stateful, State>; +pub(super) type Input<'a> = Stateful, State>; #[derive(Debug, Clone)] -pub struct State { +pub(super) struct State { pub module_id: ModuleId, } @@ -84,7 +86,7 @@ impl State { } } -pub fn token(i: &mut Input<'_>) -> PResult { +pub(super) fn token(i: &mut Input<'_>) -> PResult { match winnow::combinator::dispatch! {peek(any); '"' | '\'' => string, '/' => alt((line_comment, block_comment, operator)), @@ -363,6 +365,8 @@ fn keyword_type_or_word(i: &mut Input<'_>) -> PResult { mod tests { use winnow::Located; + use crate::parsing::token::TokenSlice; + use super::*; fn assert_parse_err<'i, P, O, E>(mut p: P, s: &'i str) where @@ -457,23 +461,34 @@ mod tests { } } - fn assert_tokens(expected: Vec, actual: Vec) { - assert_eq!( - expected.len(), - actual.len(), - "\nexpected {} tokens, actually got {}", - expected.len(), - actual.len() - ); + #[track_caller] + fn assert_tokens(expected: &[(TokenType, usize, usize)], actual: TokenSlice) { + let mut e = 0; + let mut issues = vec![]; + for a in actual { + if expected[e].0 != a.token_type { + if a.token_type == TokenType::Whitespace { + continue; + } + issues.push(format!( + "Type mismatch: expected `{}`, found `{}` (`{a:?}`), at index {e}", + expected[e].0, a.token_type + )); + } - let n = expected.len(); - for i in 0..n { - assert_eq!( - expected[i], actual[i], - "token #{i} (of {n}) does not match.\nExpected:\n{:#?}\nActual:\n{:#?}", - expected[i], actual[i], - ) + if expected[e].1 != a.start || expected[e].2 != a.end { + issues.push(format!( + "Source range mismatch: expected {}-{}, found {}-{} (`{a:?}`), at index {e}", + expected[e].1, expected[e].2, a.start, a.end + )); + } + + e += 1; } + if e < expected.len() { + issues.push(format!("Expected `{}` tokens, found `{e}`", expected.len())); + } + assert!(issues.is_empty(), "{}", issues.join("\n")); } #[test] @@ -481,44 +496,12 @@ mod tests { let program = "const a=5"; let module_id = ModuleId::from_usize(1); let actual = lex(program, module_id).unwrap(); - let expected = vec![ - Token { - token_type: TokenType::Keyword, - value: "const".to_string(), - start: 0, - end: 5, - module_id, - }, - Token { - token_type: TokenType::Whitespace, - value: " ".to_string(), - start: 5, - end: 6, - module_id, - }, - Token { - token_type: TokenType::Word, - value: "a".to_string(), - start: 6, - end: 7, - module_id, - }, - Token { - token_type: TokenType::Operator, - value: "=".to_string(), - start: 7, - end: 8, - module_id, - }, - Token { - token_type: TokenType::Number, - value: "5".to_string(), - start: 8, - end: 9, - module_id, - }, - ]; - assert_tokens(expected, actual); + + use TokenType::*; + assert_tokens( + &[(Keyword, 0, 5), (Word, 6, 7), (Operator, 7, 8), (Number, 8, 9)], + actual.as_slice(), + ); } #[test] @@ -526,73 +509,20 @@ mod tests { let program = "54 + 22500 + 6"; let module_id = ModuleId::from_usize(1); let actual = lex(program, module_id).unwrap(); - let expected = vec![ - Token { - token_type: TokenType::Number, - value: "54".to_string(), - start: 0, - end: 2, - module_id, - }, - Token { - token_type: TokenType::Whitespace, - value: " ".to_string(), - start: 2, - end: 3, - module_id, - }, - Token { - token_type: TokenType::Operator, - value: "+".to_string(), - start: 3, - end: 4, - module_id, - }, - Token { - token_type: TokenType::Whitespace, - value: " ".to_string(), - start: 4, - end: 5, - module_id, - }, - Token { - token_type: TokenType::Number, - value: "22500".to_string(), - start: 5, - end: 10, - module_id, - }, - Token { - token_type: TokenType::Whitespace, - value: " ".to_string(), - start: 10, - end: 11, - module_id, - }, - Token { - token_type: TokenType::Operator, - value: "+".to_string(), - start: 11, - end: 12, - module_id, - }, - Token { - token_type: TokenType::Whitespace, - value: " ".to_string(), - start: 12, - end: 13, - module_id, - }, - Token { - token_type: TokenType::Number, - value: "6".to_string(), - start: 13, - end: 14, - module_id, - }, - ]; - assert_tokens(expected, actual); + + use TokenType::*; + assert_tokens( + &[ + (Number, 0, 2), + (Operator, 3, 4), + (Number, 5, 10), + (Operator, 11, 12), + (Number, 13, 14), + ], + actual.as_slice(), + ); } + #[test] fn test_program2() { let program = r#"const part001 = startSketchAt([0.0000000000, 5.0000000000]) @@ -610,797 +540,8 @@ fn ghi = (part001) => { show(part001)"#; let module_id = ModuleId::from_usize(1); - - use TokenType::*; - - let expected = vec![ - Token { - token_type: Keyword, - start: 0, - end: 5, - module_id, - value: "const".to_owned(), - }, - Token { - token_type: Whitespace, - start: 5, - end: 6, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 6, - end: 13, - module_id, - value: "part001".to_owned(), - }, - Token { - token_type: Whitespace, - start: 13, - end: 14, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 14, - end: 15, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 15, - end: 16, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 16, - end: 29, - module_id, - value: "startSketchAt".to_owned(), - }, - Token { - token_type: Brace, - start: 29, - end: 30, - module_id, - value: "(".to_owned(), - }, - Token { - token_type: Brace, - start: 30, - end: 31, - module_id, - value: "[".to_owned(), - }, - Token { - token_type: Number, - start: 31, - end: 43, - module_id, - value: "0.0000000000".to_owned(), - }, - Token { - token_type: Comma, - start: 43, - end: 44, - module_id, - value: ",".to_owned(), - }, - Token { - token_type: Whitespace, - start: 44, - end: 45, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Number, - start: 45, - end: 57, - module_id, - value: "5.0000000000".to_owned(), - }, - Token { - token_type: Brace, - start: 57, - end: 58, - module_id, - value: "]".to_owned(), - }, - Token { - token_type: Brace, - start: 58, - end: 59, - module_id, - value: ")".to_owned(), - }, - Token { - token_type: Whitespace, - start: 59, - end: 64, - module_id, - value: "\n ".to_owned(), - }, - Token { - token_type: Operator, - start: 64, - end: 66, - module_id, - value: "|>".to_owned(), - }, - Token { - token_type: Whitespace, - start: 66, - end: 67, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 67, - end: 71, - module_id, - value: "line".to_owned(), - }, - Token { - token_type: Brace, - start: 71, - end: 72, - module_id, - value: "(".to_owned(), - }, - Token { - token_type: Brace, - start: 72, - end: 73, - module_id, - value: "[".to_owned(), - }, - Token { - token_type: Number, - start: 73, - end: 85, - module_id, - value: "0.4900857016".to_owned(), - }, - Token { - token_type: Comma, - start: 85, - end: 86, - module_id, - value: ",".to_owned(), - }, - Token { - token_type: Whitespace, - start: 86, - end: 87, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 87, - end: 88, - module_id, - value: "-".to_owned(), - }, - Token { - token_type: Number, - start: 88, - end: 100, - module_id, - value: "0.0240763666".to_owned(), - }, - Token { - token_type: Brace, - start: 100, - end: 101, - module_id, - value: "]".to_owned(), - }, - Token { - token_type: Comma, - start: 101, - end: 102, - module_id, - value: ",".to_owned(), - }, - Token { - token_type: Whitespace, - start: 102, - end: 103, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 103, - end: 104, - module_id, - value: "%".to_owned(), - }, - Token { - token_type: Brace, - start: 104, - end: 105, - module_id, - value: ")".to_owned(), - }, - Token { - token_type: Whitespace, - start: 105, - end: 107, - module_id, - value: "\n\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 107, - end: 112, - module_id, - value: "const".to_owned(), - }, - Token { - token_type: Whitespace, - start: 112, - end: 113, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 113, - end: 120, - module_id, - value: "part002".to_owned(), - }, - Token { - token_type: Whitespace, - start: 120, - end: 121, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 121, - end: 122, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 122, - end: 123, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: String, - start: 123, - end: 132, - module_id, - value: "\"part002\"".to_owned(), - }, - Token { - token_type: Whitespace, - start: 132, - end: 133, - module_id, - value: "\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 133, - end: 138, - module_id, - value: "const".to_owned(), - }, - Token { - token_type: Whitespace, - start: 138, - end: 139, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 139, - end: 145, - module_id, - value: "things".to_owned(), - }, - Token { - token_type: Whitespace, - start: 145, - end: 146, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 146, - end: 147, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 147, - end: 148, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 148, - end: 149, - module_id, - value: "[".to_owned(), - }, - Token { - token_type: Word, - start: 149, - end: 156, - module_id, - value: "part001".to_owned(), - }, - Token { - token_type: Comma, - start: 156, - end: 157, - module_id, - value: ",".to_owned(), - }, - Token { - token_type: Whitespace, - start: 157, - end: 158, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Number, - start: 158, - end: 161, - module_id, - value: "0.0".to_owned(), - }, - Token { - token_type: Brace, - start: 161, - end: 162, - module_id, - value: "]".to_owned(), - }, - Token { - token_type: Whitespace, - start: 162, - end: 163, - module_id, - value: "\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 163, - end: 166, - module_id, - value: "let".to_owned(), - }, - Token { - token_type: Whitespace, - start: 166, - end: 167, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 167, - end: 171, - module_id, - value: "blah".to_owned(), - }, - Token { - token_type: Whitespace, - start: 171, - end: 172, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 172, - end: 173, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 173, - end: 174, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Number, - start: 174, - end: 175, - module_id, - value: "1".to_owned(), - }, - Token { - token_type: Whitespace, - start: 175, - end: 176, - module_id, - value: "\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 176, - end: 181, - module_id, - value: "const".to_owned(), - }, - Token { - token_type: Whitespace, - start: 181, - end: 182, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 182, - end: 185, - module_id, - value: "foo".to_owned(), - }, - Token { - token_type: Whitespace, - start: 185, - end: 186, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 186, - end: 187, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 187, - end: 188, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Keyword, - start: 188, - end: 193, - module_id, - value: "false".to_owned(), - }, - Token { - token_type: Whitespace, - start: 193, - end: 194, - module_id, - value: "\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 194, - end: 197, - module_id, - value: "let".to_owned(), - }, - Token { - token_type: Whitespace, - start: 197, - end: 198, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 198, - end: 201, - module_id, - value: "baz".to_owned(), - }, - Token { - token_type: Whitespace, - start: 201, - end: 202, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 202, - end: 203, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 203, - end: 204, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 204, - end: 205, - module_id, - value: "{".to_owned(), - }, - Token { - token_type: Word, - start: 205, - end: 206, - module_id, - value: "a".to_owned(), - }, - Token { - token_type: Colon, - start: 206, - end: 207, - module_id, - value: ":".to_owned(), - }, - Token { - token_type: Whitespace, - start: 207, - end: 208, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Number, - start: 208, - end: 209, - module_id, - value: "1".to_owned(), - }, - Token { - token_type: Comma, - start: 209, - end: 210, - module_id, - value: ",".to_owned(), - }, - Token { - token_type: Whitespace, - start: 210, - end: 211, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 211, - end: 218, - module_id, - value: "part001".to_owned(), - }, - Token { - token_type: Colon, - start: 218, - end: 219, - module_id, - value: ":".to_owned(), - }, - Token { - token_type: Whitespace, - start: 219, - end: 220, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: String, - start: 220, - end: 227, - module_id, - value: "\"thing\"".to_owned(), - }, - Token { - token_type: Brace, - start: 227, - end: 228, - module_id, - value: "}".to_owned(), - }, - Token { - token_type: Whitespace, - start: 228, - end: 230, - module_id, - value: "\n\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 230, - end: 232, - module_id, - value: "fn".to_owned(), - }, - Token { - token_type: Whitespace, - start: 232, - end: 233, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 233, - end: 236, - module_id, - value: "ghi".to_owned(), - }, - Token { - token_type: Whitespace, - start: 236, - end: 237, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 237, - end: 238, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 238, - end: 239, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 239, - end: 240, - module_id, - value: "(".to_owned(), - }, - Token { - token_type: Word, - start: 240, - end: 247, - module_id, - value: "part001".to_owned(), - }, - Token { - token_type: Brace, - start: 247, - end: 248, - module_id, - value: ")".to_owned(), - }, - Token { - token_type: Whitespace, - start: 248, - end: 249, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 249, - end: 251, - module_id, - value: "=>".to_owned(), - }, - Token { - token_type: Whitespace, - start: 251, - end: 252, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 252, - end: 253, - module_id, - value: "{".to_owned(), - }, - Token { - token_type: Whitespace, - start: 253, - end: 256, - module_id, - value: "\n ".to_owned(), - }, - Token { - token_type: Keyword, - start: 256, - end: 262, - module_id, - value: "return".to_owned(), - }, - Token { - token_type: Whitespace, - start: 262, - end: 263, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 263, - end: 270, - module_id, - value: "part001".to_owned(), - }, - Token { - token_type: Whitespace, - start: 270, - end: 271, - module_id, - value: "\n".to_owned(), - }, - Token { - token_type: Brace, - start: 271, - end: 272, - module_id, - value: "}".to_owned(), - }, - Token { - token_type: Whitespace, - start: 272, - end: 274, - module_id, - value: "\n\n".to_owned(), - }, - Token { - token_type: Word, - start: 274, - end: 278, - module_id, - value: "show".to_owned(), - }, - Token { - token_type: Brace, - start: 278, - end: 279, - module_id, - value: "(".to_owned(), - }, - Token { - token_type: Word, - start: 279, - end: 286, - module_id, - value: "part001".to_owned(), - }, - Token { - token_type: Brace, - start: 286, - end: 287, - module_id, - value: ")".to_owned(), - }, - ]; let actual = lex(program, module_id).unwrap(); - assert_tokens(expected, actual); + insta::assert_debug_snapshot!(actual.tokens); } #[test] @@ -1415,476 +556,93 @@ const things = "things" // this is also a comment"#; let module_id = ModuleId::from_usize(1); let actual = lex(program, module_id).unwrap(); + use TokenType::*; - let expected = vec![ - Token { - token_type: Whitespace, - start: 0, - end: 1, - module_id, - value: "\n".to_owned(), - }, - Token { - token_type: LineComment, - start: 1, - end: 21, - module_id, - value: "// this is a comment".to_owned(), - }, - Token { - token_type: Whitespace, - start: 21, - end: 22, - module_id, - value: "\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 22, - end: 27, - module_id, - value: "const".to_owned(), - }, - Token { - token_type: Whitespace, - start: 27, - end: 28, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 28, - end: 30, - module_id, - value: "yo".to_owned(), - }, - Token { - token_type: Whitespace, - start: 30, - end: 31, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 31, - end: 32, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 32, - end: 33, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 33, - end: 34, - module_id, - value: "{".to_owned(), - }, - Token { - token_type: Whitespace, - start: 34, - end: 35, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 35, - end: 36, - module_id, - value: "a".to_owned(), - }, - Token { - token_type: Colon, - start: 36, - end: 37, - module_id, - value: ":".to_owned(), - }, - Token { - token_type: Whitespace, - start: 37, - end: 38, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 38, - end: 39, - module_id, - value: "{".to_owned(), - }, - Token { - token_type: Whitespace, - start: 39, - end: 40, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 40, - end: 41, - module_id, - value: "b".to_owned(), - }, - Token { - token_type: Colon, - start: 41, - end: 42, - module_id, - value: ":".to_owned(), - }, - Token { - token_type: Whitespace, - start: 42, - end: 43, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 43, - end: 44, - module_id, - value: "{".to_owned(), - }, - Token { - token_type: Whitespace, - start: 44, - end: 45, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 45, - end: 46, - module_id, - value: "c".to_owned(), - }, - Token { - token_type: Colon, - start: 46, - end: 47, - module_id, - value: ":".to_owned(), - }, - Token { - token_type: Whitespace, - start: 47, - end: 48, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: String, - start: 48, - end: 53, - module_id, - value: "'123'".to_owned(), - }, - Token { - token_type: Whitespace, - start: 53, - end: 54, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 54, - end: 55, - module_id, - value: "}".to_owned(), - }, - Token { - token_type: Whitespace, - start: 55, - end: 56, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 56, - end: 57, - module_id, - value: "}".to_owned(), - }, - Token { - token_type: Whitespace, - start: 57, - end: 58, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 58, - end: 59, - module_id, - value: "}".to_owned(), - }, - Token { - token_type: Whitespace, - start: 59, - end: 61, - module_id, - value: "\n\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 61, - end: 66, - module_id, - value: "const".to_owned(), - }, - Token { - token_type: Whitespace, - start: 66, - end: 67, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 67, - end: 70, - module_id, - value: "key".to_owned(), - }, - Token { - token_type: Whitespace, - start: 70, - end: 71, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 71, - end: 72, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 72, - end: 73, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: String, - start: 73, - end: 76, - module_id, - value: "'c'".to_owned(), - }, - Token { - token_type: Whitespace, - start: 76, - end: 77, - module_id, - value: "\n".to_owned(), - }, - Token { - token_type: Keyword, - start: 77, - end: 82, - module_id, - value: "const".to_owned(), - }, - Token { - token_type: Whitespace, - start: 82, - end: 83, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 83, - end: 89, - module_id, - value: "things".to_owned(), - }, - Token { - token_type: Whitespace, - start: 89, - end: 90, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 90, - end: 91, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 91, - end: 92, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: String, - start: 92, - end: 100, - module_id, - value: "\"things\"".to_owned(), - }, - Token { - token_type: Whitespace, - start: 100, - end: 102, - module_id, - value: "\n\n".to_owned(), - }, - Token { - token_type: LineComment, - start: 102, - end: 127, - module_id, - value: "// this is also a comment".to_owned(), - }, - ]; - assert_tokens(expected, actual); + assert_tokens( + &[ + (Whitespace, 0, 1), + (LineComment, 1, 21), + (Whitespace, 21, 22), + (Keyword, 22, 27), + (Whitespace, 27, 28), + (Word, 28, 30), + (Whitespace, 30, 31), + (Operator, 31, 32), + (Whitespace, 32, 33), + (Brace, 33, 34), + (Whitespace, 34, 35), + (Word, 35, 36), + (Colon, 36, 37), + (Whitespace, 37, 38), + (Brace, 38, 39), + (Whitespace, 39, 40), + (Word, 40, 41), + (Colon, 41, 42), + (Whitespace, 42, 43), + (Brace, 43, 44), + (Whitespace, 44, 45), + (Word, 45, 46), + (Colon, 46, 47), + (Whitespace, 47, 48), + (String, 48, 53), + (Whitespace, 53, 54), + (Brace, 54, 55), + (Whitespace, 55, 56), + (Brace, 56, 57), + (Whitespace, 57, 58), + (Brace, 58, 59), + (Whitespace, 59, 61), + (Keyword, 61, 66), + (Whitespace, 66, 67), + (Word, 67, 70), + (Whitespace, 70, 71), + (Operator, 71, 72), + (Whitespace, 72, 73), + (String, 73, 76), + (Whitespace, 76, 77), + (Keyword, 77, 82), + (Whitespace, 82, 83), + (Word, 83, 89), + (Whitespace, 89, 90), + (Operator, 90, 91), + (Whitespace, 91, 92), + (String, 92, 100), + (Whitespace, 100, 102), + (LineComment, 102, 127), + ], + actual.as_slice(), + ); } #[test] fn test_program4() { let program = "const myArray = [0..10]"; let module_id = ModuleId::from_usize(1); - use TokenType::*; - let expected = vec![ - Token { - token_type: Keyword, - start: 0, - end: 5, - module_id, - value: "const".to_owned(), - }, - Token { - token_type: Whitespace, - start: 5, - end: 6, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Word, - start: 6, - end: 13, - module_id, - value: "myArray".to_owned(), - }, - Token { - token_type: Whitespace, - start: 13, - end: 14, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Operator, - start: 14, - end: 15, - module_id, - value: "=".to_owned(), - }, - Token { - token_type: Whitespace, - start: 15, - end: 16, - module_id, - value: " ".to_owned(), - }, - Token { - token_type: Brace, - start: 16, - end: 17, - module_id, - value: "[".to_owned(), - }, - Token { - token_type: Number, - start: 17, - end: 18, - module_id, - value: "0".to_owned(), - }, - Token { - token_type: DoublePeriod, - start: 18, - end: 20, - module_id, - value: "..".to_owned(), - }, - Token { - token_type: Number, - start: 20, - end: 22, - module_id, - value: "10".to_owned(), - }, - Token { - token_type: Brace, - start: 22, - end: 23, - module_id, - value: "]".to_owned(), - }, - ]; let actual = lex(program, module_id).unwrap(); - assert_tokens(expected, actual); + + use TokenType::*; + assert_tokens( + &[ + (Keyword, 0, 5), + (Word, 6, 13), + (Operator, 14, 15), + (Brace, 16, 17), + (Number, 17, 18), + (DoublePeriod, 18, 20), + (Number, 20, 22), + (Brace, 22, 23), + ], + actual.as_slice(), + ); } - #[test] - fn test_kitt() { - let program = include_str!("../../../../tests/executor/inputs/kittycad_svg.kcl"); - let actual = lex(program, ModuleId::default()).unwrap(); - assert_eq!(actual.len(), 5103); - } - #[test] - fn test_pipes_on_pipes() { - let program = include_str!("../../../../tests/executor/inputs/pipes_on_pipes.kcl"); - let actual = lex(program, ModuleId::default()).unwrap(); - assert_eq!(actual.len(), 17841); - } #[test] fn test_lexer_negative_word() { let module_id = ModuleId::from_usize(1); let actual = lex("-legX", module_id).unwrap(); - let expected = vec![ - Token { - token_type: TokenType::Operator, - value: "-".to_string(), - start: 0, - end: 1, - module_id, - }, - Token { - token_type: TokenType::Word, - value: "legX".to_string(), - start: 1, - end: 5, - module_id, - }, - ]; - assert_tokens(expected, actual); + + use TokenType::*; + assert_tokens(&[(Operator, 0, 1), (Word, 1, 5)], actual.as_slice()); } #[test] @@ -1898,52 +656,16 @@ const things = "things" end: 2, module_id, }]; - assert_eq!(actual, expected); + assert_eq!(actual.tokens, expected); } #[test] fn test_unrecognized_token() { let module_id = ModuleId::from_usize(1); let actual = lex("12 ; 8", module_id).unwrap(); - let expected = vec![ - Token { - token_type: TokenType::Number, - value: "12".to_string(), - start: 0, - end: 2, - module_id, - }, - Token { - token_type: TokenType::Whitespace, - value: " ".to_string(), - start: 2, - end: 3, - module_id, - }, - Token { - token_type: TokenType::Unknown, - value: ";".to_string(), - start: 3, - end: 4, - module_id, - }, - Token { - token_type: TokenType::Whitespace, - value: " ".to_string(), - start: 4, - end: 5, - module_id, - }, - Token { - token_type: TokenType::Number, - value: "8".to_string(), - start: 5, - end: 6, - module_id, - }, - ]; - assert_tokens(expected, actual); + use TokenType::*; + assert_tokens(&[(Number, 0, 2), (Unknown, 3, 4), (Number, 5, 6)], actual.as_slice()); } #[test] @@ -1957,7 +679,7 @@ const things = "things" end: 6, module_id, }; - assert_eq!(actual[0], expected); + assert_eq!(actual.tokens[0], expected); } #[test] @@ -1971,6 +693,28 @@ const things = "things" end: 6, module_id, }; - assert_eq!(actual[0], expected); + assert_eq!(actual.tokens[0], expected); + } + + #[test] + fn test_is_code_token() { + let module_id = ModuleId::default(); + let actual = lex("foo (4/* comment */ +,2,\"sdfsdf\") // comment", module_id).unwrap(); + let non_code = [1, 4, 5, 12, 13]; + for i in 0..14 { + if non_code.contains(&i) { + assert!( + !actual.tokens[i].is_code_token(), + "failed test {i}: {:?}", + &actual.tokens[i], + ); + } else { + assert!( + actual.tokens[i].is_code_token(), + "failed test {i}: {:?}", + &actual.tokens[i], + ); + } + } } } diff --git a/src/wasm-lib/kcl/src/simulation_tests.rs b/src/wasm-lib/kcl/src/simulation_tests.rs index 9225dcba6..d16d0c457 100644 --- a/src/wasm-lib/kcl/src/simulation_tests.rs +++ b/src/wasm-lib/kcl/src/simulation_tests.rs @@ -47,7 +47,7 @@ fn read(filename: &'static str, test_name: &str) -> String { fn parse(test_name: &str) { let input = read("input.kcl", test_name); - let tokens = crate::parsing::token::lexer(&input, ModuleId::default()).unwrap(); + let tokens = crate::parsing::token::lex(&input, ModuleId::default()).unwrap(); // Parse the tokens into an AST. let parse_res = Result::<_, KclError>::Ok(crate::parsing::parse_tokens(tokens).unwrap()); diff --git a/src/wasm-lib/kcl/src/unparser.rs b/src/wasm-lib/kcl/src/unparser.rs index b9eb695e2..354e8b0d6 100644 --- a/src/wasm-lib/kcl/src/unparser.rs +++ b/src/wasm-lib/kcl/src/unparser.rs @@ -2137,8 +2137,10 @@ fn f() { .into_iter() .enumerate() { - let tokens = crate::parsing::token::lexer(raw, ModuleId::default()).unwrap(); - let literal = crate::parsing::parser::unsigned_number_literal.parse(&tokens).unwrap(); + let tokens = crate::parsing::token::lex(raw, ModuleId::default()).unwrap(); + let literal = crate::parsing::parser::unsigned_number_literal + .parse(tokens.as_slice()) + .unwrap(); assert_eq!( literal.recast(), expected, @@ -2216,9 +2218,9 @@ sketch002 = startSketchOn({ .into_iter() .enumerate() { - let tokens = crate::parsing::token::lexer(input, ModuleId::default()).unwrap(); - crate::parsing::parser::print_tokens(&tokens); - let expr = crate::parsing::parser::object.parse(&tokens).unwrap(); + let tokens = crate::parsing::token::lex(input, ModuleId::default()).unwrap(); + crate::parsing::parser::print_tokens(tokens.as_slice()); + let expr = crate::parsing::parser::object.parse(tokens.as_slice()).unwrap(); assert_eq!( expr.recast(&FormatOptions::new(), 0, ExprContext::Other), expected, @@ -2314,8 +2316,10 @@ sketch002 = startSketchOn({ .into_iter() .enumerate() { - let tokens = crate::parsing::token::lexer(input, ModuleId::default()).unwrap(); - let expr = crate::parsing::parser::array_elem_by_elem.parse(&tokens).unwrap(); + let tokens = crate::parsing::token::lex(input, ModuleId::default()).unwrap(); + let expr = crate::parsing::parser::array_elem_by_elem + .parse(tokens.as_slice()) + .unwrap(); assert_eq!( expr.recast(&FormatOptions::new(), 0, ExprContext::Other), expected,