diff --git a/src/wasm-lib/kcl/benches/compiler_benchmark.rs b/src/wasm-lib/kcl/benches/compiler_benchmark.rs index 6fa2c652c..f0dfcb569 100644 --- a/src/wasm-lib/kcl/benches/compiler_benchmark.rs +++ b/src/wasm-lib/kcl/benches/compiler_benchmark.rs @@ -27,15 +27,4 @@ criterion_main!(benches); const KITT_PROGRAM: &str = include_str!("../../tests/executor/inputs/kittycad_svg.kcl"); const PIPES_PROGRAM: &str = include_str!("../../tests/executor/inputs/pipes_on_pipes.kcl"); -const CUBE_PROGRAM: &str = r#"fn cube = (pos, scale) => { - const sg = startSketchAt(pos) - |> line([0, scale], %) - |> line([scale, 0], %) - |> line([0, -scale], %) - - return sg -} - -const b1 = cube([0,0], 10) -const pt1 = b1[0] -show(b1)"#; +const CUBE_PROGRAM: &str = include_str!("../../tests/executor/inputs/cube.kcl"); diff --git a/src/wasm-lib/kcl/src/ast/types.rs b/src/wasm-lib/kcl/src/ast/types.rs index 2cc41fa98..7ad4c6640 100644 --- a/src/wasm-lib/kcl/src/ast/types.rs +++ b/src/wasm-lib/kcl/src/ast/types.rs @@ -82,7 +82,10 @@ impl Program { }; let custom_white_space_or_comment = match self.non_code_meta.non_code_nodes.get(&index) { - Some(custom_white_space_or_comment) => custom_white_space_or_comment.format(&indentation), + Some(noncodes) => noncodes + .iter() + .map(|custom_white_space_or_comment| custom_white_space_or_comment.format(&indentation)) + .collect::(), None => String::new(), }; let end_string = if custom_white_space_or_comment.is_empty() { @@ -707,30 +710,35 @@ pub struct NonCodeNode { impl NonCodeNode { pub fn value(&self) -> String { match &self.value { - NonCodeValue::InlineComment { value } => value.clone(), - NonCodeValue::BlockComment { value } => value.clone(), - NonCodeValue::NewLineBlockComment { value } => value.clone(), + NonCodeValue::InlineComment { value, style: _ } => value.clone(), + NonCodeValue::BlockComment { value, style: _ } => value.clone(), + NonCodeValue::NewLineBlockComment { value, style: _ } => value.clone(), NonCodeValue::NewLine => "\n\n".to_string(), } } pub fn format(&self, indentation: &str) -> String { match &self.value { - NonCodeValue::InlineComment { value } => format!(" // {}\n", value), - NonCodeValue::BlockComment { value } => { + NonCodeValue::InlineComment { + value, + style: CommentStyle::Line, + } => format!(" // {}\n", value), + NonCodeValue::InlineComment { + value, + style: CommentStyle::Block, + } => format!(" /* {} */", value), + NonCodeValue::BlockComment { value, style } => { let add_start_new_line = if self.start == 0 { "" } else { "\n" }; - if value.contains('\n') { - format!("{}{}/* {} */\n", add_start_new_line, indentation, value) - } else { - format!("{}{}// {}\n", add_start_new_line, indentation, value) + match style { + CommentStyle::Block => format!("{}{}/* {} */\n", add_start_new_line, indentation, value), + CommentStyle::Line => format!("{}{}// {}\n", add_start_new_line, indentation, value), } } - NonCodeValue::NewLineBlockComment { value } => { + NonCodeValue::NewLineBlockComment { value, style } => { let add_start_new_line = if self.start == 0 { "" } else { "\n\n" }; - if value.contains('\n') { - format!("{}{}/* {} */\n", add_start_new_line, indentation, value) - } else { - format!("{}{}// {}\n", add_start_new_line, indentation, value) + match style { + CommentStyle::Block => format!("{}{}/* {} */\n", add_start_new_line, indentation, value), + CommentStyle::Line => format!("{}{}// {}\n", add_start_new_line, indentation, value), } } NonCodeValue::NewLine => "\n\n".to_string(), @@ -738,14 +746,27 @@ impl NonCodeNode { } } +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, ts_rs::TS, JsonSchema)] +#[ts(export)] +#[serde(tag = "type", rename_all = "camelCase")] +pub enum CommentStyle { + /// Like // foo + Line, + /// Like /* foo */ + Block, +} + #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, ts_rs::TS, JsonSchema)] #[ts(export)] #[serde(tag = "type", rename_all = "camelCase")] pub enum NonCodeValue { /// An inline comment. - /// An example of this is the following: `1 + 1 // This is an inline comment`. + /// Here are examples: + /// `1 + 1 // This is an inline comment`. + /// `1 + 1 /* Here's another */`. InlineComment { value: String, + style: CommentStyle, }, /// A block comment. /// An example of this is the following: @@ -759,11 +780,13 @@ pub enum NonCodeValue { /// If it did it would be a `NewLineBlockComment`. BlockComment { value: String, + style: CommentStyle, }, /// A block comment that has a new line above it. /// The user explicitly added a new line above the block comment. NewLineBlockComment { value: String, + style: CommentStyle, }, // A new line like `\n\n` NOT a new line like `\n`. // This is also not a comment. @@ -774,7 +797,7 @@ pub enum NonCodeValue { #[ts(export)] #[serde(rename_all = "camelCase")] pub struct NonCodeMeta { - pub non_code_nodes: HashMap, + pub non_code_nodes: HashMap>, pub start: Option, } @@ -795,7 +818,10 @@ impl<'de> Deserialize<'de> for NonCodeMeta { let helper = NonCodeMetaHelper::deserialize(deserializer)?; let mut non_code_nodes = HashMap::new(); for (key, value) in helper.non_code_nodes { - non_code_nodes.insert(key.parse().map_err(serde::de::Error::custom)?, value); + non_code_nodes + .entry(key.parse().map_err(serde::de::Error::custom)?) + .or_insert(Vec::new()) + .push(value); } Ok(NonCodeMeta { non_code_nodes, @@ -804,6 +830,12 @@ impl<'de> Deserialize<'de> for NonCodeMeta { } } +impl NonCodeMeta { + pub fn insert(&mut self, i: usize, new: NonCodeNode) { + self.non_code_nodes.entry(i).or_default().push(new); + } +} + #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, ts_rs::TS, JsonSchema)] #[ts(export)] #[serde(tag = "type")] @@ -2385,7 +2417,9 @@ impl PipeExpression { let mut s = statement.recast(options, indentation_level + 1, true); let non_code_meta = self.non_code_meta.clone(); if let Some(non_code_meta_value) = non_code_meta.non_code_nodes.get(&index) { - s += non_code_meta_value.format(&indentation).trim_end_matches('\n') + for val in non_code_meta_value { + s += val.format(&indentation).trim_end_matches('\n') + } } if index != self.body.len() - 1 { @@ -2869,9 +2903,9 @@ show(part001)"#; recasted, r#"fn myFn = () => { // this is a comment - const yo = { a: { b: { c: '123' } } } - /* block + const yo = { a: { b: { c: '123' } } } /* block comment */ + const key = 'c' // this is also a comment return things @@ -2913,14 +2947,13 @@ const mySk1 = startSketchOn('XY') |> lineTo({ to: [0, 1], tag: 'myTag' }, %) |> lineTo([1, 1], %) /* and - here - -a comment between pipe expression statements */ + here */ + // a comment between pipe expression statements |> rx(90, %) // and another with just white space between others below |> ry(45, %) |> rx(45, %) -// one more for good measure + // one more for good measure "# ); } @@ -2988,16 +3021,19 @@ const things = "things" let program = parser.ast().unwrap(); let recasted = program.recast(&Default::default(), 0); - assert_eq!(recasted.trim(), some_program_string.trim()); + let expected = some_program_string.trim(); + // Currently new parser removes an empty line + let actual = recasted.trim(); + assert_eq!(actual, expected); } #[test] fn test_recast_comment_tokens_inside_strings() { let some_program_string = r#"let b = { - "end": 141, - "start": 125, - "type": "NonCodeNode", - "value": " + end: 141, + start: 125, + type: "NonCodeNode", + value: " // a comment " }"#; diff --git a/src/wasm-lib/kcl/src/errors.rs b/src/wasm-lib/kcl/src/errors.rs index fa240efaa..841e4fadd 100644 --- a/src/wasm-lib/kcl/src/errors.rs +++ b/src/wasm-lib/kcl/src/errors.rs @@ -4,7 +4,7 @@ use tower_lsp::lsp_types::{Diagnostic, DiagnosticSeverity}; use crate::executor::SourceRange; -#[derive(Error, Debug, Serialize, Deserialize, ts_rs::TS)] +#[derive(Error, Debug, Serialize, Deserialize, ts_rs::TS, Clone)] #[ts(export)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum KclError { @@ -28,7 +28,7 @@ pub enum KclError { Engine(KclErrorDetails), } -#[derive(Debug, Serialize, Deserialize, ts_rs::TS)] +#[derive(Debug, Serialize, Deserialize, ts_rs::TS, Clone)] #[ts(export)] pub struct KclErrorDetails { #[serde(rename = "sourceRanges")] @@ -78,6 +78,22 @@ impl KclError { KclError::Engine(e) => e.source_ranges.clone(), } } + + /// Get the inner error message. + pub fn message(&self) -> &str { + match &self { + KclError::Syntax(e) => &e.message, + KclError::Semantic(e) => &e.message, + KclError::Type(e) => &e.message, + KclError::Unimplemented(e) => &e.message, + KclError::Unexpected(e) => &e.message, + KclError::ValueAlreadyDefined(e) => &e.message, + KclError::UndefinedValue(e) => &e.message, + KclError::InvalidExpression(e) => &e.message, + KclError::Engine(e) => &e.message, + } + } + pub fn to_lsp_diagnostic(&self, code: &str) -> Diagnostic { let (message, _, _) = self.get_message_line_column(code); let source_ranges = self.source_ranges(); diff --git a/src/wasm-lib/kcl/src/parser.rs b/src/wasm-lib/kcl/src/parser.rs index 9ac16ab62..982d09551 100644 --- a/src/wasm-lib/kcl/src/parser.rs +++ b/src/wasm-lib/kcl/src/parser.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, str::FromStr}; use crate::{ ast::types::{ - ArrayExpression, BinaryExpression, BinaryPart, BodyItem, CallExpression, ExpressionStatement, + ArrayExpression, BinaryExpression, BinaryPart, BodyItem, CallExpression, CommentStyle, ExpressionStatement, FunctionExpression, Identifier, Literal, LiteralIdentifier, MemberExpression, MemberObject, NonCodeMeta, NonCodeNode, NonCodeValue, ObjectExpression, ObjectKeyInfo, ObjectProperty, PipeExpression, PipeSubstitution, Program, ReturnStatement, UnaryExpression, UnaryOperator, Value, VariableDeclaration, VariableDeclarator, @@ -13,6 +13,8 @@ use crate::{ token::{Token, TokenType}, }; +mod parser_impl; + pub const PIPE_SUBSTITUTION_OPERATOR: &str = "%"; pub const PIPE_OPERATOR: &str = "|>"; @@ -180,24 +182,7 @@ impl Parser { } pub fn ast(&self) -> Result { - let body = self.make_body( - 0, - vec![], - NonCodeMeta { - non_code_nodes: HashMap::new(), - start: None, - }, - )?; - let end = match self.get_token(body.last_index) { - Ok(token) => token.end, - Err(_) => self.tokens[self.tokens.len() - 1].end, - }; - Ok(Program { - start: 0, - end, - body: body.body, - non_code_meta: body.non_code_meta, - }) + parser_impl::run_parser(&mut self.tokens.as_slice()) } fn make_identifier(&self, index: usize) -> Result { @@ -209,7 +194,7 @@ impl Parser { }) } - pub fn make_literal(&self, index: usize) -> Result { + fn make_literal(&self, index: usize) -> Result { let token = self.get_token(index)?; let value = if token.token_type == TokenType::Number { if let Ok(value) = token.value.parse::() { @@ -295,6 +280,11 @@ impl Parser { )); } + let is_block_style = non_code_tokens + .first() + .map(|tok| matches!(tok.token_type, TokenType::BlockComment)) + .unwrap_or_default(); + let full_string = non_code_tokens .iter() .map(|t| { @@ -336,11 +326,32 @@ impl Parser { value: if start_end_string.starts_with("\n\n") && is_new_line_comment { // Preserve if they want a whitespace line before the comment. // But let's just allow one. - NonCodeValue::NewLineBlockComment { value: full_string } + NonCodeValue::NewLineBlockComment { + value: full_string, + style: if is_block_style { + CommentStyle::Block + } else { + CommentStyle::Line + }, + } } else if is_new_line_comment { - NonCodeValue::BlockComment { value: full_string } + NonCodeValue::BlockComment { + value: full_string, + style: if is_block_style { + CommentStyle::Block + } else { + CommentStyle::Line + }, + } } else { - NonCodeValue::InlineComment { value: full_string } + NonCodeValue::InlineComment { + value: full_string, + style: if is_block_style { + CommentStyle::Block + } else { + CommentStyle::Line + }, + } }, }; Ok((Some(node), end_index - 1)) @@ -1064,7 +1075,7 @@ impl Parser { let mut _non_code_meta: NonCodeMeta; if let Some(node) = next_pipe.non_code_node { _non_code_meta = non_code_meta; - _non_code_meta.non_code_nodes.insert(previous_values.len(), node); + _non_code_meta.insert(previous_values.len(), node); } else { _non_code_meta = non_code_meta; } @@ -1435,7 +1446,7 @@ impl Parser { self.make_params(next_brace_or_comma_token.index, _previous_params) } - pub fn make_unary_expression(&self, index: usize) -> Result { + fn make_unary_expression(&self, index: usize) -> Result { let current_token = self.get_token(index)?; let next_token = self.next_meaningful_token(index, None)?; if next_token.token.is_none() { @@ -1633,7 +1644,7 @@ impl Parser { if previous_body.is_empty() { non_code_meta.start = next_token.non_code_node; } else { - non_code_meta.non_code_nodes.insert(previous_body.len(), node.clone()); + non_code_meta.insert(previous_body.len(), node.clone()); } } return self.make_body(next_token.index, previous_body, non_code_meta); @@ -1641,14 +1652,14 @@ impl Parser { let next = self.next_meaningful_token(token_index, None)?; if let Some(node) = &next.non_code_node { - non_code_meta.non_code_nodes.insert(previous_body.len(), node.clone()); + non_code_meta.insert(previous_body.len(), node.clone()); } if token.token_type == TokenType::Keyword && VariableKind::from_str(&token.value).is_ok() { let declaration = self.make_variable_declaration(token_index)?; let next_thing = self.next_meaningful_token(declaration.last_index, None)?; if let Some(node) = &next_thing.non_code_node { - non_code_meta.non_code_nodes.insert(previous_body.len(), node.clone()); + non_code_meta.insert(previous_body.len(), node.clone()); } let mut _previous_body = previous_body; _previous_body.push(BodyItem::VariableDeclaration(VariableDeclaration { @@ -1669,7 +1680,7 @@ impl Parser { let statement = self.make_return_statement(token_index)?; let next_thing = self.next_meaningful_token(statement.last_index, None)?; if let Some(node) = &next_thing.non_code_node { - non_code_meta.non_code_nodes.insert(previous_body.len(), node.clone()); + non_code_meta.insert(previous_body.len(), node.clone()); } let mut _previous_body = previous_body; _previous_body.push(BodyItem::ReturnStatement(ReturnStatement { @@ -1693,7 +1704,7 @@ impl Parser { let expression = self.make_expression_statement(token_index)?; let next_thing = self.next_meaningful_token(expression.last_index, None)?; if let Some(node) = &next_thing.non_code_node { - non_code_meta.non_code_nodes.insert(previous_body.len(), node.clone()); + non_code_meta.insert(previous_body.len(), node.clone()); } let mut _previous_body = previous_body; _previous_body.push(BodyItem::ExpressionStatement(ExpressionStatement { @@ -1716,7 +1727,7 @@ impl Parser { && next_thing_token.token_type == TokenType::Operator { if let Some(node) = &next_thing.non_code_node { - non_code_meta.non_code_nodes.insert(previous_body.len(), node.clone()); + non_code_meta.insert(previous_body.len(), node.clone()); } let expression = self.make_expression_statement(token_index)?; let mut _previous_body = previous_body; @@ -1913,6 +1924,7 @@ const key = 'c'"#, end: 60, value: NonCodeValue::BlockComment { value: "this is a comment".to_string(), + style: CommentStyle::Line, }, }), 31, @@ -1966,6 +1978,35 @@ const key = 'c'"#, ); } + #[test] + fn test_math_parse() { + let tokens = crate::token::lexer(r#"5 + "a""#); + let actual = Parser::new(tokens).ast().unwrap().body; + let expr = BinaryExpression { + start: 0, + end: 7, + operator: BinaryOperator::Add, + left: BinaryPart::Literal(Box::new(Literal { + start: 0, + end: 1, + value: serde_json::Value::Number(serde_json::Number::from(5)), + raw: "5".to_owned(), + })), + right: BinaryPart::Literal(Box::new(Literal { + start: 4, + end: 7, + value: serde_json::Value::String("a".to_owned()), + raw: r#""a""#.to_owned(), + })), + }; + let expected = vec![BodyItem::ExpressionStatement(ExpressionStatement { + start: 0, + end: 7, + expression: Value::BinaryExpression(Box::new(expr)), + })]; + assert_eq!(expected, actual); + } + #[test] fn test_is_code_token() { let tokens = [ @@ -2812,10 +2853,6 @@ z(-[["#, let parser = Parser::new(tokens); let result = parser.ast(); assert!(result.is_err()); - assert_eq!( - result.err().unwrap().to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([1, 2])], message: "missing a closing brace for the function call" }"# - ); } #[test] @@ -2831,7 +2868,7 @@ z(-[["#, // https://github.com/KittyCAD/modeling-app/issues/696 assert_eq!( result.err().unwrap().to_string(), - r#"semantic: KclErrorDetails { source_ranges: [], message: "file is empty" }"# + r#"syntax: KclErrorDetails { source_ranges: [], message: "file is empty" }"# ); } @@ -2845,7 +2882,7 @@ z(-[["#, // https://github.com/KittyCAD/modeling-app/issues/696 assert_eq!( result.err().unwrap().to_string(), - r#"semantic: KclErrorDetails { source_ranges: [], message: "file is empty" }"# + r#"syntax: KclErrorDetails { source_ranges: [], message: "file is empty" }"# ); } @@ -2863,7 +2900,7 @@ e .err() .unwrap() .to_string() - .contains("expected to be started on a identifier or literal")); + .contains("expected whitespace, found ')' which is brace")); } #[test] @@ -2872,7 +2909,11 @@ e let parser = Parser::new(tokens); let result = parser.ast(); assert!(result.is_err()); - assert!(result.err().unwrap().to_string().contains("expected another token")); + assert!(result + .err() + .unwrap() + .to_string() + .contains("expected whitespace, found ')' which is brace")); } #[test] @@ -2884,11 +2925,7 @@ e let parser = Parser::new(tokens); let result = parser.ast(); assert!(result.is_err()); - assert!(result - .err() - .unwrap() - .to_string() - .contains("unexpected end of expression")); + assert!(result.err().unwrap().to_string().contains("Unexpected token")); } #[test] @@ -3022,7 +3059,9 @@ e #[test] fn test_error_stdlib_in_fn_name() { - let some_program_string = r#"fn cos = () {}"#; + let some_program_string = r#"fn cos = () => { + return 1 + }"#; let tokens = crate::token::lexer(some_program_string); let parser = Parser::new(tokens); let result = parser.ast(); @@ -3123,9 +3162,12 @@ thing(false) let parser = Parser::new(tokens); let result = parser.ast(); assert!(result.is_err()); + // TODO: https://github.com/KittyCAD/modeling-app/issues/784 + // Improve this error message. + // It should say that the compiler is expecting a function expression on the RHS. assert_eq!( result.err().unwrap().to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([0, 2])], message: "Expected a `let` variable kind, found: `fn`" }"# + r#"syntax: KclErrorDetails { source_ranges: [SourceRange([11, 18])], message: "Unexpected token" }"# ); } @@ -3163,15 +3205,6 @@ let other_thing = 2 * cos(3)"#; parser.ast().unwrap(); } - #[test] - fn test_parse_pipes_on_pipes() { - let code = include_str!("../../tests/executor/inputs/pipes_on_pipes.kcl"); - - let tokens = crate::token::lexer(code); - let parser = Parser::new(tokens); - parser.ast().unwrap(); - } - #[test] fn test_negative_arguments() { let some_program_string = r#"fn box = (p, h, l, w) => { diff --git a/src/wasm-lib/kcl/src/parser/parser_impl.rs b/src/wasm-lib/kcl/src/parser/parser_impl.rs new file mode 100644 index 000000000..26c15646f --- /dev/null +++ b/src/wasm-lib/kcl/src/parser/parser_impl.rs @@ -0,0 +1,1967 @@ +use serde_json::{Number as JNumber, Value as JValue}; +use winnow::{ + combinator::{alt, delimited, opt, peek, preceded, repeat, separated0, terminated}, + dispatch, + error::{ErrMode, StrContext, StrContextValue}, + prelude::*, + token::any, +}; + +use crate::{ + ast::types::{ + ArrayExpression, BinaryExpression, BinaryOperator, BinaryPart, BodyItem, CallExpression, CommentStyle, + ExpressionStatement, FunctionExpression, Identifier, Literal, LiteralIdentifier, MemberExpression, + MemberObject, NonCodeMeta, NonCodeNode, NonCodeValue, ObjectExpression, ObjectProperty, PipeExpression, + PipeSubstitution, Program, ReturnStatement, UnaryExpression, UnaryOperator, Value, VariableDeclaration, + VariableDeclarator, VariableKind, + }, + errors::{KclError, KclErrorDetails}, + executor::SourceRange, + math_parser::MathParser, + parser::parser_impl::error::ContextError, + std::StdLib, + token::{Token, TokenType}, +}; + +mod error; + +type PResult = winnow::prelude::PResult; + +lazy_static::lazy_static! { + static ref STDLIB: StdLib = StdLib::new(); +} + +type TokenSlice<'slice, 'input> = &'slice mut &'input [Token]; + +pub fn run_parser(i: TokenSlice) -> Result { + if i.is_empty() { + return Err(KclError::Syntax(KclErrorDetails { + source_ranges: vec![], + message: "file is empty".to_string(), + })); + } + + program.parse(i).map_err(KclError::from) +} + +fn expected(what: &'static str) -> StrContext { + StrContext::Expected(StrContextValue::Description(what)) +} + +fn program(i: TokenSlice) -> PResult { + let mut out = function_body.parse_next(i)?; + // Match original parser behaviour, for now. + // Once this is merged and stable, consider changing this as I think it's more accurate + // without the -1. + out.end -= 1; + Ok(out) +} + +fn pipe_surrounded_by_whitespace(i: TokenSlice) -> PResult<()> { + ( + repeat(0.., whitespace).map(|_: Vec<_>| ()), + pipe_operator, + repeat(0.., whitespace).map(|_: Vec<_>| ()), + ) + .parse_next(i)?; + Ok(()) +} + +/// Note this is O(n). +fn count_in(target: char, s: &str) -> usize { + s.chars().filter(|&c| c == target).count() +} + +/// Matches all four cases of NonCodeValue +fn non_code_node(i: TokenSlice) -> PResult { + /// Matches one case of NonCodeValue + /// See docstring on [NonCodeValue::NewLineBlockComment] for why that case is different to the others. + fn non_code_node_leading_whitespace(i: TokenSlice) -> PResult { + let leading_whitespace = any + .verify(|token: &Token| token.token_type == TokenType::Whitespace) + .context(expected("whitespace, with a newline")) + .parse_next(i)?; + let has_empty_line = count_in('\n', &leading_whitespace.value) >= 2; + non_code_node_no_leading_whitespace + .verify_map(|node: NonCodeNode| match node.value { + NonCodeValue::BlockComment { value, style } => Some(NonCodeNode { + start: leading_whitespace.start, + end: node.end + 1, + value: if has_empty_line { + NonCodeValue::NewLineBlockComment { value, style } + } else { + NonCodeValue::BlockComment { value, style } + }, + }), + _ => None, + }) + .context(expected("a comment or whitespace")) + .parse_next(i) + } + + alt((non_code_node_leading_whitespace, non_code_node_no_leading_whitespace)).parse_next(i) +} + +// Matches remaining three cases of NonCodeValue +fn non_code_node_no_leading_whitespace(i: TokenSlice) -> PResult { + any.verify_map(|token: Token| { + if token.is_code_token() { + None + } else { + let value = match token.token_type { + TokenType::Whitespace if token.value.contains("\n\n") => NonCodeValue::NewLine, + TokenType::LineComment => NonCodeValue::BlockComment { + value: token.value.trim_start_matches("//").trim().to_owned(), + style: CommentStyle::Line, + }, + TokenType::BlockComment => NonCodeValue::BlockComment { + style: CommentStyle::Block, + value: token + .value + .trim_start_matches("/*") + .trim_end_matches("*/") + .trim() + .to_owned(), + }, + _ => return None, + }; + Some(NonCodeNode { + start: token.start, + end: token.end, + value, + }) + } + }) + .context(expected("Non-code token (comments or whitespace)")) + .parse_next(i) +} + +fn pipe_expression(i: TokenSlice) -> PResult { + let mut non_code_meta = NonCodeMeta::default(); + let (head, noncode) = terminated( + (value_but_not_pipe, preceded(whitespace, opt(non_code_node))), + peek(pipe_surrounded_by_whitespace), + ) + .context(expected("an expression, followed by the |> (pipe) operator")) + .parse_next(i)?; + if let Some(nc) = noncode { + non_code_meta.insert(0, nc); + } + let mut values = vec![head]; + let value_surrounded_by_comments = ( + repeat(0.., preceded(opt(whitespace), non_code_node)), // Before the value + preceded(opt(whitespace), value_but_not_pipe), // The value + repeat(0.., noncode_just_after_code), // After the value + ); + let tail: Vec<(Vec<_>, _, Vec<_>)> = repeat( + 1.., + preceded(pipe_surrounded_by_whitespace, value_surrounded_by_comments), + ) + .context(expected( + "a sequence of at least one |> (pipe) operator, followed by an expression", + )) + .parse_next(i)?; + + // All child parsers have been run. Time to structure the return value. + let mut code_count = 0; + let mut max_noncode_end = 0; + for (noncode_before, code, noncode_after) in tail { + for nc in noncode_before { + max_noncode_end = nc.end.max(max_noncode_end); + non_code_meta.insert(code_count, nc); + } + values.push(code); + code_count += 1; + for nc in noncode_after { + max_noncode_end = nc.end.max(max_noncode_end); + non_code_meta.insert(code_count, nc); + } + } + Ok(PipeExpression { + start: values.first().unwrap().start(), + end: values.last().unwrap().end().max(max_noncode_end), + body: values, + non_code_meta, + }) +} + +fn bool_value(i: TokenSlice) -> PResult { + let (name, token) = any + .try_map(|token: Token| match token.token_type { + TokenType::Keyword if token.value == "true" => Ok(("true", token)), + TokenType::Keyword if token.value == "false" => Ok(("false", token)), + _ => Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: "invalid boolean literal".to_owned(), + })), + }) + .context(expected("a boolean literal (either true or false)")) + .parse_next(i)?; + Ok(Identifier { + start: token.start, + end: token.end, + name: name.to_owned(), + }) +} + +pub fn literal(i: TokenSlice) -> PResult { + alt((string_literal, unsigned_number_literal)) + .context(expected("a KCL literal, like 'myPart' or 3")) + .parse_next(i) +} + +/// Parse a KCL string literal +pub fn string_literal(i: TokenSlice) -> PResult { + let (value, token) = any + .try_map(|token: Token| match token.token_type { + TokenType::String => { + let s = token.value[1..token.value.len() - 1].to_string(); + Ok((JValue::String(s), token)) + } + _ => Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: "invalid string literal".to_owned(), + })), + }) + .context(expected("string literal (like \"myPart\"")) + .parse_next(i)?; + Ok(Literal { + start: token.start, + end: token.end, + value, + raw: token.value.clone(), + }) +} + +/// Parse a KCL literal number, with no - sign. +fn unsigned_number_literal(i: TokenSlice) -> PResult { + let (value, token) = any + .try_map(|token: Token| match token.token_type { + TokenType::Number => { + if let Ok(x) = token.value.parse::() { + return Ok((JValue::Number(JNumber::from(x)), token)); + } + let x: f64 = token.value.parse().map_err(|_| { + KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!("Invalid float: {}", token.value), + }) + })?; + + match JNumber::from_f64(x) { + Some(n) => Ok((JValue::Number(n), token)), + None => Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!("Invalid float: {}", token.value), + })), + } + } + _ => Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: "invalid literal".to_owned(), + })), + }) + .context(expected("an unsigned number literal (e.g. 3 or 12.5)")) + .parse_next(i)?; + Ok(Literal { + start: token.start, + end: token.end, + value, + raw: token.value.clone(), + }) +} + +/// Parse a KCL operator that takes a left- and right-hand side argument. +fn binary_operator(i: TokenSlice) -> PResult { + any.try_map(|token: Token| { + if !matches!(token.token_type, TokenType::Operator) { + return Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!("unexpected token, should be an operator but was {}", token.token_type), + })); + } + let op = match token.value.as_str() { + "+" => BinaryOperator::Add, + "-" => BinaryOperator::Sub, + "/" => BinaryOperator::Div, + "*" => BinaryOperator::Mul, + "%" => BinaryOperator::Mod, + _ => { + return Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!("{} is not a binary operator", token.value.as_str()), + })) + } + }; + Ok(op) + }) + .context(expected("a binary operator (like + or *)")) + .parse_next(i) +} + +/// Parse a KCL operand that can be used with an operator. +fn operand(i: TokenSlice) -> PResult { + const TODO_783: &str = "found a value, but this kind of value cannot be used as the operand to an operator yet (see https://github.com/KittyCAD/modeling-app/issues/783)"; + let op = possible_operands + .try_map(|part| { + let source_ranges = vec![SourceRange([part.start(), part.end()])]; + let val = match part { + // TODO: these should be valid operands eventually, + // users should be able to run "let x = f() + g()" + // see https://github.com/KittyCAD/modeling-app/issues/783 + Value::FunctionExpression(_) + | Value::PipeExpression(_) + | Value::PipeSubstitution(_) + | Value::ArrayExpression(_) + | Value::ObjectExpression(_) => { + return Err(KclError::Syntax(KclErrorDetails { + source_ranges, + message: TODO_783.to_owned(), + })) + } + Value::UnaryExpression(x) => BinaryPart::UnaryExpression(x), + Value::Literal(x) => BinaryPart::Literal(x), + Value::Identifier(x) => BinaryPart::Identifier(x), + Value::BinaryExpression(x) => BinaryPart::BinaryExpression(x), + Value::CallExpression(x) => BinaryPart::CallExpression(x), + Value::MemberExpression(x) => BinaryPart::MemberExpression(x), + }; + Ok(val) + }) + .context(expected("an operand (a value which can be used with an operator)")) + .parse_next(i)?; + Ok(op) +} + +impl TokenType { + fn parse_from(self, i: TokenSlice) -> PResult { + any.try_map(|token: Token| { + if token.token_type == self { + Ok(token) + } else { + Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!( + "expected {self} but found {} which is a {}", + token.value.as_str(), + token.token_type + ), + })) + } + }) + .parse_next(i) + } +} + +/// Parse some whitespace (i.e. at least one whitespace token) +fn whitespace(i: TokenSlice) -> PResult> { + repeat( + 1.., + any.try_map(|token: Token| { + if token.token_type == TokenType::Whitespace { + Ok(token) + } else { + Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!( + "expected whitespace, found '{}' which is {}", + token.value.as_str(), + token.token_type + ), + })) + } + }), + ) + .context(expected("some whitespace (e.g. spaces, tabs, new lines)")) + .parse_next(i) +} + +/// Parse the = operator. +fn equals(i: TokenSlice) -> PResult { + any.verify(|token: &Token| matches!(token.token_type, TokenType::Operator) && token.value == "=") + .context(expected("the equals operator, =")) + .parse_next(i) +} + +/// Parse a KCL array of elements. +fn array(i: TokenSlice) -> PResult { + let start = open_bracket(i)?.start; + ignore_whitespace(i); + let elements = alt((integer_range, separated0(value, comma_sep))) + .context(expected( + "array contents, either a numeric range (like 0..10) or a list of elements (like [1, 2, 3])", + )) + .parse_next(i)?; + ignore_whitespace(i); + let end = close_bracket(i)?.end; + Ok(ArrayExpression { start, end, elements }) +} + +/// Parse n..m into a vec of numbers [n, n+1, ..., m] +fn integer_range(i: TokenSlice) -> PResult> { + let (token0, floor) = integer.parse_next(i)?; + double_period.parse_next(i)?; + let (_token1, ceiling) = integer.parse_next(i)?; + Ok((floor..=ceiling) + .map(|num| { + Value::Literal(Box::new(Literal { + start: token0.start, + end: token0.end, + value: JValue::Number(num.into()), + raw: num.to_string(), + })) + }) + .collect()) +} + +fn object_property(i: TokenSlice) -> PResult { + let key = identifier + .context(expected( + "the property's key (the name or identifier of the property), e.g. in 'height: 4', 'height' is the property key", + )) + .parse_next(i)?; + colon + .context(expected( + "a colon, which separates the property's key from the value you're setting it to, e.g. 'height: 4'", + )) + .parse_next(i)?; + ignore_whitespace(i); + let val = value + .context(expected( + "the value which you're setting the property to, e.g. in 'height: 4', the value is 4", + )) + .parse_next(i)?; + Ok(ObjectProperty { + start: key.start, + end: val.end(), + key, + value: val, + }) +} + +/// Parse a KCL object value. +fn object(i: TokenSlice) -> PResult { + let start = open_brace(i)?.start; + ignore_whitespace(i); + let properties = separated0(object_property, comma_sep) + .context(expected( + "a comma-separated list of key-value pairs, e.g. 'height: 4, width: 3'", + )) + .parse_next(i)?; + ignore_whitespace(i); + let end = close_brace(i)?.end; + Ok(ObjectExpression { start, end, properties }) +} + +/// Parse the % symbol, used to substitute a curried argument from a |> (pipe). +fn pipe_sub(i: TokenSlice) -> PResult { + any.try_map(|token: Token| { + if matches!(token.token_type, TokenType::Operator) && token.value == "%" { + Ok(PipeSubstitution { + start: token.start, + end: token.end, + }) + } else { + Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!( + "expected a pipe substitution symbol (%) but found {}", + token.value.as_str() + ), + })) + } + }) + .context(expected("the substitution symbol, %")) + .parse_next(i) +} + +// Looks like +// (arg0, arg1) => { +// const x = arg0 + arg1; +// return x +// } +fn function_expression(i: TokenSlice) -> PResult { + let start = open_paren(i)?.start; + let params = parameters(i)?; + close_paren(i)?; + ignore_whitespace(i); + big_arrow(i)?; + ignore_whitespace(i); + open_brace(i)?; + let body = function_body(i)?; + let end = close_brace(i)?.end; + Ok(FunctionExpression { + start, + end, + params, + body, + }) +} + +/// E.g. `person.name` +fn member_expression_dot(i: TokenSlice) -> PResult<(LiteralIdentifier, usize, bool)> { + period.parse_next(i)?; + let property = identifier.parse_next(i)?; + let end = property.end; + Ok((LiteralIdentifier::Identifier(Box::new(property)), end, false)) +} + +/// E.g. `people[0]` or `people[i]` or `people['adam']` +fn member_expression_subscript(i: TokenSlice) -> PResult<(LiteralIdentifier, usize, bool)> { + let _ = open_bracket.parse_next(i)?; + let property = alt(( + literal.map(Box::new).map(LiteralIdentifier::Literal), + identifier.map(Box::new).map(LiteralIdentifier::Identifier), + )) + .parse_next(i)?; + let end = close_bracket.parse_next(i)?.end; + let computed = matches!(property, LiteralIdentifier::Identifier(_)); + Ok((property, end, computed)) +} + +/// Get a property of an object, or an index of an array, or a member of a collection. +/// Can be arbitrarily nested, e.g. `people[i]['adam'].age`. +fn member_expression(i: TokenSlice) -> PResult { + // This is an identifier, followed by a sequence of members (aka properties) + // First, the identifier. + let id = identifier + .context(expected("the identifier of the object whose property you're trying to access, e.g. in 'shape.size.width', 'shape' is the identifier")) + .parse_next(i)?; + // Now a sequence of members. + let member = alt((member_expression_dot, member_expression_subscript)) + .context(expected("a member/property, e.g. size.x and size['height'] and size[0] are all different ways to access a member/property of 'size'")); + let mut members: Vec<_> = repeat(1.., member) + .context(expected("a sequence of at least one members/properties")) + .parse_next(i)?; + + // Process the first member. + // It's safe to call remove(0), because the vec is created from repeat(1..), + // which is guaranteed to have >=1 elements. + let (property, end, computed) = members.remove(0); + let start = id.start; + let initial_member_expression = MemberExpression { + start, + end, + object: MemberObject::Identifier(Box::new(id)), + computed, + property, + }; + + // Each remaining member wraps the current member expression inside another member expression. + Ok(members + .into_iter() + // Take the accumulated member expression from the previous iteration, + // and use it as the `object` of a new, bigger member expression. + .fold(initial_member_expression, |accumulated, (property, end, computed)| { + MemberExpression { + start, + end, + object: MemberObject::MemberExpression(Box::new(accumulated)), + computed, + property, + } + })) +} + +/// Find a noncode node which occurs just after a body item, +/// such that if the noncode item is a comment, it might be an inline comment. +fn noncode_just_after_code(i: TokenSlice) -> PResult { + let ws = opt(whitespace).parse_next(i)?; + + // What is the preceding whitespace like? + let (has_newline, has_empty_line) = if let Some(ref ws) = ws { + ( + ws.iter().any(|token| token.value.contains('\n')), + ws.iter().any(|token| count_in('\n', &token.value) >= 2), + ) + } else { + (false, false) + }; + + // Look for a non-code node (e.g. comment) + let nc = non_code_node_no_leading_whitespace + .map(|nc| { + if has_empty_line { + // There's an empty line between the body item and the comment, + // This means the comment is a NewLineBlockComment! + let value = match nc.value { + // Change block comments to inline, as discussed above + NonCodeValue::BlockComment { value, style } => NonCodeValue::NewLineBlockComment { value, style }, + // Other variants don't need to change. + x @ NonCodeValue::InlineComment { .. } => x, + x @ NonCodeValue::NewLineBlockComment { .. } => x, + x @ NonCodeValue::NewLine => x, + }; + NonCodeNode { + value, + start: nc.start.saturating_sub(1), + ..nc + } + } else if has_newline { + // Nothing has to change, a single newline does not need preserving. + nc + } else { + // There's no newline between the body item and comment, + // so if this is a comment, it must be inline with code. + let value = match nc.value { + // Change block comments to inline, as discussed above + NonCodeValue::BlockComment { value, style } => NonCodeValue::InlineComment { value, style }, + // Other variants don't need to change. + x @ NonCodeValue::InlineComment { .. } => x, + x @ NonCodeValue::NewLineBlockComment { .. } => x, + x @ NonCodeValue::NewLine => x, + }; + NonCodeNode { value, ..nc } + } + }) + .map(|nc| NonCodeNode { + start: nc.start.saturating_sub(1), + ..nc + }) + .parse_next(i)?; + Ok(nc) +} + +#[derive(Debug)] +enum WithinFunction { + BodyItem((BodyItem, Option)), + NonCode(NonCodeNode), +} + +fn body_items_within_function(i: TokenSlice) -> PResult { + // Any of the body item variants, each of which can optionally be followed by a comment. + // If there is a comment, it may be preceded by whitespace. + let item = dispatch! {peek(any); + token if token.declaration_keyword().is_some() => + (declaration.map(BodyItem::VariableDeclaration), opt(noncode_just_after_code)).map(WithinFunction::BodyItem), + Token { ref value, .. } if value == "return" => + (return_stmt.map(BodyItem::ReturnStatement), opt(noncode_just_after_code)).map(WithinFunction::BodyItem), + token if !token.is_code_token() => { + non_code_node.map(WithinFunction::NonCode) + }, + _ => + (expression.map(BodyItem::ExpressionStatement), opt(noncode_just_after_code)).map(WithinFunction::BodyItem), + } + .context(expected("a function body items (functions are made up of variable declarations, expressions, and return statements, each of those is a possible body item")) + .parse_next(i)?; + Ok(item) +} + +/// Parse the body of a user-defined function. +pub fn function_body(i: TokenSlice) -> PResult { + let leading_whitespace_start = alt(( + peek(non_code_node).map(|_| None), + // Subtract 1 from `t.start` to match behaviour of the old parser. + // Consider removing the -1 in the future because I think it's inaccurate, but for now, + // I prefer to match the old parser exactly when I can. + opt(whitespace).map(|tok| tok.and_then(|t| t.first().map(|t| t.start.saturating_sub(1)))), + )) + .parse_next(i)?; + + let mut things_within_body = Vec::new(); + // Parse the first item + things_within_body.push(body_items_within_function.parse_next(i)?); + + // This loop is complicated! I'm sorry! + // It's almost identical to the loop in `winnow::combinator::separated1`, + // see , + // where the "main" parser is body_items_within_function and the `sep` (separator) parser is + // ws_with_newline. + // + // Except for one thing. + // + // In this case, one of the body items being matched could be a whitespace with a newline, + // and that could _also_ be the separator. + // + // So, if both the main parser and the `sep` parser within `separated1` try to match the same + // token, the main parser will consume it and then the `sep` parser will fail. + // + // The solution is that this parser should check if the last matched body item was an empty line, + // and if so, then ignore the separator parser for the current iteration. + loop { + let last_match_was_empty_line = matches!( + things_within_body.last(), + Some(WithinFunction::NonCode(NonCodeNode { + value: NonCodeValue::NewLine, + .. + })) + ); + + use winnow::stream::Stream; + + let start = i.checkpoint(); + let len = i.eof_offset(); + + let found_ws = ws_with_newline.parse_next(i); + + // The separator whitespace might be important: + // if it has an empty line, it should be considered a noncode token, because the user + // deliberately put an empty line there. We should track this and preserve it. + if let Ok(ref ws_token) = found_ws { + if ws_token.value.contains("\n\n") { + things_within_body.push(WithinFunction::NonCode(NonCodeNode { + start: ws_token.start, + end: ws_token.end, + value: NonCodeValue::NewLine, + })); + } + } + + match (found_ws, last_match_was_empty_line) { + (Ok(_), _) | (_, true) => { + // Infinite loop check: this loop must always consume tokens from the input. + // That can either happen through the `sep` parser (i.e. ws_with_newline) or through + // the main parser (body_items_within_function). + // LHS of this checks fht + if i.eof_offset() == len && !last_match_was_empty_line { + use winnow::error::ParserError; + return Err(ErrMode::assert(i, "sep parsers must always consume")); + } + + match body_items_within_function.parse_next(i) { + Err(ErrMode::Backtrack(_)) => { + i.reset(start); + break; + } + Err(e) => return Err(e), + Ok(o) => { + things_within_body.push(o); + } + } + } + (Err(ErrMode::Backtrack(_)), _) => { + i.reset(start); + break; + } + (Err(e), _) => return Err(e), + } + } + + let mut body = Vec::new(); + let mut non_code_meta = NonCodeMeta::default(); + let mut end = 0; + let mut start = leading_whitespace_start; + for thing_in_body in things_within_body { + match thing_in_body { + WithinFunction::BodyItem((b, maybe_noncode)) => { + if start.is_none() { + start = Some(b.start()); + } + end = b.end(); + body.push(b); + if let Some(nc) = maybe_noncode { + end = nc.end; + non_code_meta.insert(body.len() - 1, nc); + } + } + WithinFunction::NonCode(nc) => { + if start.is_none() { + start = Some(nc.start); + } + end = nc.end; + if body.is_empty() { + non_code_meta.start = Some(nc) + } else { + non_code_meta.insert(body.len() - 1, nc); + } + } + } + } + let start = start.expect( + "the `things_within_body` vec should have looped at least once, and each loop overwrites `start` if it is None", + ); + // Safe to unwrap `body.first()` because `body` is `separated1` therefore guaranteed + // to have len >= 1. + let end_ws = opt(whitespace) + .parse_next(i)? + .and_then(|ws| ws.first().map(|tok| tok.end)); + if let Some(end_ws) = end_ws { + end = end.max(end_ws); + } + end += 1; + Ok(Program { + start, + end, + body, + non_code_meta, + }) +} + +/// Parse a return statement of a user-defined function, e.g. `return x`. +pub fn return_stmt(i: TokenSlice) -> PResult { + let start = any + .try_map(|token: Token| { + if matches!(token.token_type, TokenType::Keyword) && token.value == "return" { + Ok(token.start) + } else { + Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!("{} is not a return keyword", token.value.as_str()), + })) + } + }) + .context(expected( + "the 'return' keyword, which ends your function (and becomes this function's value when it's called)", + )) + .parse_next(i)?; + require_whitespace(i)?; + let argument = value(i)?; + Ok(ReturnStatement { + start, + end: argument.end(), + argument, + }) +} + +/// Parse a KCL value +fn value(i: TokenSlice) -> PResult { + alt(( + pipe_expression.map(Box::new).map(Value::PipeExpression), + value_but_not_pipe, + )) + .context(expected("a KCL value")) + .parse_next(i) +} + +fn unnecessarily_bracketed(i: TokenSlice) -> PResult { + delimited(open_paren, value_but_not_pipe, close_paren).parse_next(i) +} + +fn value_but_not_pipe(i: TokenSlice) -> PResult { + alt(( + binary_expression.map(Box::new).map(Value::BinaryExpression), + unary_expression.map(Box::new).map(Value::UnaryExpression), + member_expression.map(Box::new).map(Value::MemberExpression), + bool_value.map(Box::new).map(Value::Identifier), + literal.map(Box::new).map(Value::Literal), + fn_call.map(Box::new).map(Value::CallExpression), + identifier.map(Box::new).map(Value::Identifier), + array.map(Box::new).map(Value::ArrayExpression), + object.map(Box::new).map(Value::ObjectExpression), + pipe_sub.map(Box::new).map(Value::PipeSubstitution), + function_expression.map(Box::new).map(Value::FunctionExpression), + unnecessarily_bracketed, + )) + .context(expected("a KCL value (but not a pipe expression)")) + .parse_next(i) +} + +fn possible_operands(i: TokenSlice) -> PResult { + alt(( + unary_expression.map(Box::new).map(Value::UnaryExpression), + bool_value.map(Box::new).map(Value::Identifier), + member_expression.map(Box::new).map(Value::MemberExpression), + literal.map(Box::new).map(Value::Literal), + fn_call.map(Box::new).map(Value::CallExpression), + identifier.map(Box::new).map(Value::Identifier), + binary_expr_in_parens.map(Box::new).map(Value::BinaryExpression), + )) + .context(expected( + "a KCL value which can be used as an argument/operand to an operator", + )) + .parse_next(i) +} + +/// Parse a variable/constant declaration. +fn declaration(i: TokenSlice) -> PResult { + const EXPECTED: &str = "expected a variable declaration keyword (e.g. 'let') but found"; + let (kind, start, dec_end) = any + .try_map(|token: Token| { + let Some(kind) = token.declaration_keyword() else { + return Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!("{EXPECTED} {}", token.value.as_str()), + })); + }; + + Ok((kind, token.start, token.end)) + }) + .context(expected("declaring a name, e.g. 'let width = 3'")) + .parse_next(i)?; + + // After this point, the parser is DEFINITELY parsing a variable declaration, because + // `fn`, `let`, `const` etc are all unambiguous. If you've parsed one of those tokens -- + // and we certainly have because `kind` was parsed above -- then the following tokens + // MUST continue the variable declaration, otherwise the program is invalid. + // + // This means, from here until this function returns, any errors should be ErrMode::Cut, + // not ErrMode::Backtrack. Because the parser is definitely parsing a variable declaration. + // If there's an error, there's no point backtracking -- instead the parser should fail. + require_whitespace(i).map_err(|e| e.cut())?; + let id = binding_name + .context(expected( + "an identifier, which becomes name you're binding the value to", + )) + .parse_next(i) + .map_err(|e| e.cut())?; + + ignore_whitespace(i); + equals(i).map_err(|e| e.cut())?; + ignore_whitespace(i); + + let val = if kind == VariableKind::Fn { + function_expression + .map(Box::new) + .map(Value::FunctionExpression) + .context(expected("a KCL function expression, like () => { return 1 }")) + .parse_next(i) + } else { + value + .try_map(|val| { + // Function bodies can be used if and only if declaring a function. + // Check the 'if' direction: + if matches!(val, Value::FunctionExpression(_)) { + return Err(KclError::Syntax(KclErrorDetails { + source_ranges: vec![SourceRange([start, dec_end])], + message: format!("Expected a `fn` variable kind, found: `{}`", kind), + })); + } + Ok(val) + }) + .context(expected("a KCL value, which is being bound to a variable")) + .parse_next(i) + } + .map_err(|e| e.cut())?; + + let end = val.end(); + Ok(VariableDeclaration { + start, + end, + declarations: vec![VariableDeclarator { + start: id.start, + end, + id, + init: val, + }], + kind, + }) +} + +impl TryFrom for Identifier { + type Error = KclError; + + fn try_from(token: Token) -> Result { + if token.token_type == TokenType::Word { + Ok(Identifier { + start: token.start, + end: token.end, + name: token.value, + }) + } else { + Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!( + "Cannot assign a variable to a reserved keyword: {}", + token.value.as_str() + ), + })) + } + } +} + +/// Parse a KCL identifier (name of a constant/variable/function) +fn identifier(i: TokenSlice) -> PResult { + any.try_map(Identifier::try_from) + .context(expected("an identifier, e.g. 'width' or 'myPart'")) + .parse_next(i) +} + +/// Helper function. Matches any number of whitespace tokens and ignores them. +fn ignore_whitespace(i: TokenSlice) { + let _: PResult<()> = repeat(0.., whitespace).parse_next(i); +} + +/// Matches at least 1 whitespace. +fn require_whitespace(i: TokenSlice) -> PResult<()> { + repeat(1.., whitespace).parse_next(i) +} + +fn unary_expression(i: TokenSlice) -> PResult { + const EXPECTED: &str = "expected a unary operator (like '-', the negative-numeric operator),"; + let (operator, op_token) = any + .try_map(|token: Token| match token.token_type { + TokenType::Operator if token.value == "-" => Ok((UnaryOperator::Neg, token)), + // TODO: negation. Original parser doesn't support `not` yet. + TokenType::Operator => Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!( + "{EXPECTED} but found {} which is an operator, but not a unary one (unary operators apply to just a single operand, your operator applies to two or more operands)", + token.value.as_str(), + ), + })), + other => Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!( + "{EXPECTED} but found {} which is {}", + token.value.as_str(), + other, + ), + })), + }) + .context(expected("a unary expression, e.g. -x or -3")) + .parse_next(i)?; + let argument = operand.parse_next(i)?; + Ok(UnaryExpression { + start: op_token.start, + end: argument.end(), + operator, + argument, + }) +} + +/// Consume tokens that make up a binary expression, but don't actually return them. +/// Why not? +/// Because this is designed to be used with .recognize() within the `binary_expression` parser. +fn binary_expression_tokens(i: TokenSlice) -> PResult<()> { + let _first = operand.parse_next(i)?; + let _remaining: Vec<_> = repeat( + 1.., + ( + preceded(opt(whitespace), binary_operator), + preceded(opt(whitespace), operand), + ), + ) + .context(expected( + "one or more binary operators (like + or -) and operands for them, e.g. 1 + 2 - 3", + )) + .parse_next(i)?; + Ok(()) +} + +/// Parse an infix binary expression. +fn binary_expression(i: TokenSlice) -> PResult { + // Find the slice of tokens which makes up the binary expression + let tokens = binary_expression_tokens.recognize().parse_next(i)?; + + // Pass the token slice into the specialized math parser, for things like + // precedence and converting infix operations to an AST. + let mut math_parser = MathParser::new(tokens); + let expr = math_parser + .parse() + .map_err(error::ContextError::from) + .map_err(ErrMode::Backtrack)?; + Ok(expr) +} + +fn binary_expr_in_parens(i: TokenSlice) -> PResult { + let span_with_brackets = bracketed_section.recognize().parse_next(i)?; + let n = span_with_brackets.len(); + let mut span_no_brackets = &span_with_brackets[1..n - 1]; + let expr = binary_expression.parse_next(&mut span_no_brackets)?; + Ok(expr) +} + +/// Match a starting bracket, then match to the corresponding end bracket. +/// Return the count of how many tokens are in that span +/// (not including the bracket tokens). +fn bracketed_section(i: TokenSlice) -> PResult { + // Find the start of this bracketed expression. + let _ = open_paren.parse_next(i)?; + let mut opened_braces = 1usize; + let mut tokens_examined = 0; + while opened_braces > 0 { + let tok = any.parse_next(i)?; + tokens_examined += 1; + if matches!(tok.token_type, TokenType::Brace) { + if tok.value == "(" { + opened_braces += 1; + } else if tok.value == ")" { + opened_braces -= 1; + } + } + } + Ok(tokens_examined) +} + +/// Parse a KCL expression. +fn expression(i: TokenSlice) -> PResult { + let val = value + .context(expected( + "an expression (i.e. a value, or an algorithm for calculating one), e.g. 'x + y' or '3' or 'width * 2'", + )) + .parse_next(i)?; + Ok(ExpressionStatement { + start: val.start(), + end: val.end(), + expression: val, + }) +} + +/// Parse a KCL integer, and the token that held it. +fn integer(i: TokenSlice) -> PResult<(Token, u64)> { + let num = any + .verify(|token: &Token| matches!(token.token_type, TokenType::Number)) + .context(expected("a number token e.g. 3")) + .try_map(|token: Token| { + let source_ranges = token.as_source_ranges(); + let value = token.value.clone(); + token.value.parse().map(|num| (token, num)).map_err(|e| { + KclError::Syntax(KclErrorDetails { + source_ranges, + message: format!("invalid integer {value}: {e}"), + }) + }) + }) + .context(expected("an integer e.g. 3 (but not 3.1)")) + .parse_next(i)?; + Ok(num) +} + +/// Parse the given brace symbol. +fn some_brace(symbol: &'static str, i: TokenSlice) -> PResult { + any.verify(|token: &Token| matches!(token.token_type, TokenType::Brace) && token.value == symbol) + .context(expected(symbol)) + .parse_next(i) +} + +/// Parse a => operator. +fn big_arrow(i: TokenSlice) -> PResult { + any.verify(|token: &Token| matches!(token.token_type, TokenType::Operator) && token.value == "=>") + .context(expected("the => symbol, used for declaring functions")) + .parse_next(i) +} +/// Parse a |> operator. +fn pipe_operator(i: TokenSlice) -> PResult { + any.verify(|token: &Token| matches!(token.token_type, TokenType::Operator) && token.value == "|>") + .context(expected( + "the |> operator, used for 'piping' one function's output into another function's input", + )) + .parse_next(i) +} + +fn ws_with_newline(i: TokenSlice) -> PResult { + any.verify(|token: &Token| matches!(token.token_type, TokenType::Whitespace) && token.value.contains('\n')) + .context(expected("a newline, possibly with whitespace")) + .parse_next(i) +} + +/// ( +fn open_paren(i: TokenSlice) -> PResult { + some_brace("(", i) +} + +/// ) +fn close_paren(i: TokenSlice) -> PResult { + some_brace(")", i) +} + +/// [ +fn open_bracket(i: TokenSlice) -> PResult { + some_brace("[", i) +} + +/// ] +fn close_bracket(i: TokenSlice) -> PResult { + some_brace("]", i) +} + +/// { +fn open_brace(i: TokenSlice) -> PResult { + some_brace("{", i) +} + +/// } +fn close_brace(i: TokenSlice) -> PResult { + some_brace("}", i) +} + +fn comma(i: TokenSlice) -> PResult<()> { + TokenType::Comma.parse_from(i)?; + Ok(()) +} + +fn period(i: TokenSlice) -> PResult<()> { + TokenType::Period.parse_from(i)?; + Ok(()) +} + +fn double_period(i: TokenSlice) -> PResult { + any.try_map(|token: Token| { + if matches!(token.token_type, TokenType::DoublePeriod) { + Ok(token) + } else { + Err(KclError::Syntax(KclErrorDetails { + source_ranges: token.as_source_ranges(), + message: format!( + "expected a '..' (double period) found {} which is {}", + token.value.as_str(), + token.token_type + ), + })) + } + }) + .context(expected("the .. operator, used for array ranges like [0..10]")) + .parse_next(i) +} + +fn colon(i: TokenSlice) -> PResult<()> { + TokenType::Colon.parse_from(i)?; + Ok(()) +} + +/// Parse a comma, optionally followed by some whitespace. +fn comma_sep(i: TokenSlice) -> PResult<()> { + (comma, opt(whitespace)) + .context(expected("a comma, optionally followed by whitespace")) + .parse_next(i)?; + Ok(()) +} + +/// Arguments are passed into a function. +fn arguments(i: TokenSlice) -> PResult> { + separated0(value, comma_sep) + .context(expected("function arguments")) + .parse_next(i) +} + +fn not_close_paren(i: TokenSlice) -> PResult { + any.verify(|token: &Token| !matches!(token.token_type, TokenType::Brace) || token.value != ")") + .parse_next(i) +} + +/// Parameters are declared in a function signature, and used within a function. +fn parameters(i: TokenSlice) -> PResult> { + // Get all tokens until the next ), because that ends the parameter list. + let candidates: Vec = separated0(not_close_paren, comma_sep) + .context(expected("function parameters")) + .parse_next(i)?; + // Make sure all those tokens are valid parameters. + let params = candidates + .into_iter() + .map(|token| Identifier::try_from(token).and_then(Identifier::into_valid_binding_name)) + .collect::>() + .map_err(|e| ErrMode::Backtrack(ContextError::from(e)))?; + Ok(params) +} + +impl Identifier { + fn into_valid_binding_name(self) -> Result { + // Make sure they are not assigning a variable to a stdlib function. + if STDLIB.fns.contains_key(&self.name) { + return Err(KclError::Syntax(KclErrorDetails { + source_ranges: vec![SourceRange([self.start, self.end])], + message: format!("Cannot assign a variable to a reserved keyword: {}", self.name), + })); + } + Ok(self) + } +} + +/// Introduce a new name, which binds some value. +fn binding_name(i: TokenSlice) -> PResult { + identifier + .context(expected("an identifier, which will be the name of some value")) + .try_map(Identifier::into_valid_binding_name) + .context(expected("an identifier, which will be the name of some value")) + .parse_next(i) +} + +fn fn_call(i: TokenSlice) -> PResult { + let fn_name = identifier(i)?; + let _ = open_paren(i)?; + let args = arguments(i)?; + let end = close_paren(i)?.end; + let function = if let Some(stdlib_fn) = STDLIB.get(&fn_name.name) { + crate::ast::types::Function::StdLib { func: stdlib_fn } + } else { + crate::ast::types::Function::InMemory + }; + Ok(CallExpression { + start: fn_name.start, + end, + callee: fn_name, + arguments: args, + optional: false, + function, + }) +} + +#[cfg(test)] +mod tests { + use pretty_assertions::assert_eq; + + use super::*; + use crate::ast::types::{BodyItem, Value, VariableKind}; + + #[test] + fn parse_args() { + for (i, (test, expected_len)) in [("someVar", 1), ("5, 3", 2), (r#""a""#, 1)].into_iter().enumerate() { + let tokens = crate::token::lexer(test); + let actual = match arguments.parse(&tokens) { + Ok(x) => x, + Err(e) => panic!("Failed test {i}, could not parse function arguments from \"{test}\": {e:?}"), + }; + assert_eq!(actual.len(), expected_len, "failed test {i}"); + } + } + + #[test] + fn weird_program_unclosed_paren() { + let tokens = crate::token::lexer("fn firstPrime=("); + let last = tokens.last().unwrap(); + let err: KclError = program.parse(&tokens).unwrap_err().into(); + assert_eq!(err.source_ranges(), last.as_source_ranges()); + // TODO: Better comment. This should explain the compiler expected ) because the user had started declaring the function's parameters. + // Part of https://github.com/KittyCAD/modeling-app/issues/784 + assert_eq!(err.message(), "Unexpected end of file. The compiler expected )"); + } + + #[test] + fn weird_program_just_a_pipe() { + let tokens = crate::token::lexer("|"); + let err: KclError = program.parse(&tokens).unwrap_err().into(); + assert_eq!(err.source_ranges(), vec![SourceRange([0, 1])]); + assert_eq!(err.message(), "Unexpected token"); + } + + #[test] + fn parse_binary_expressions() { + for (i, test_program) in ["1 + 2 + 3"].into_iter().enumerate() { + let tokens = crate::token::lexer(test_program); + let mut slice = tokens.as_slice(); + let _actual = match binary_expression.parse_next(&mut slice) { + Ok(x) => x, + Err(e) => panic!("Failed test {i}, could not parse binary expressions from \"{test_program}\": {e:?}"), + }; + } + } + + #[test] + fn test_negative_operands() { + let tokens = crate::token::lexer("-leg2"); + let _s = operand.parse_next(&mut tokens.as_slice()).unwrap(); + } + + #[test] + fn test_comments_in_function1() { + let test_program = r#"() => { + // comment 0 + const a = 1 + // comment 1 + const b = 2 + // comment 2 + return 1 + }"#; + let tokens = crate::token::lexer(test_program); + let mut slice = tokens.as_slice(); + let expr = function_expression.parse_next(&mut slice).unwrap(); + assert_eq!(expr.params, vec![]); + let comment_start = expr.body.non_code_meta.start.unwrap(); + let comment0 = &expr.body.non_code_meta.non_code_nodes.get(&0).unwrap()[0]; + let comment1 = &expr.body.non_code_meta.non_code_nodes.get(&1).unwrap()[0]; + assert_eq!(comment_start.value(), "comment 0"); + assert_eq!(comment0.value(), "comment 1"); + assert_eq!(comment1.value(), "comment 2"); + } + + #[test] + fn test_comments_in_function2() { + let test_program = r#"() => { + const yo = { a: { b: { c: '123' } } } /* block +comment */ +}"#; + let tokens = crate::token::lexer(test_program); + let mut slice = tokens.as_slice(); + let expr = function_expression.parse_next(&mut slice).unwrap(); + let comment0 = &expr.body.non_code_meta.non_code_nodes.get(&0).unwrap()[0]; + assert_eq!(comment0.value(), "block\ncomment"); + } + + #[test] + fn test_whitespace_in_function() { + let test_program = r#"() => { + return sg + return sg + }"#; + let tokens = crate::token::lexer(test_program); + let mut slice = tokens.as_slice(); + let _expr = function_expression.parse_next(&mut slice).unwrap(); + } + + #[test] + fn test_empty_lines_in_function() { + let test_program = "() => { + + return 2 + }"; + let tokens = crate::token::lexer(test_program); + let mut slice = tokens.as_slice(); + let expr = function_expression.parse_next(&mut slice).unwrap(); + assert_eq!( + expr, + FunctionExpression { + start: 0, + end: 47, + params: Default::default(), + body: Program { + start: 7, + end: 47, + body: vec![BodyItem::ReturnStatement(ReturnStatement { + start: 25, + end: 33, + argument: Value::Literal(Box::new(Literal { + start: 32, + end: 33, + value: JValue::Number(JNumber::from(2)), + raw: "2".to_owned(), + })), + })], + non_code_meta: NonCodeMeta { + non_code_nodes: Default::default(), + start: Some(NonCodeNode { + start: 7, + end: 25, + value: NonCodeValue::NewLine + }) + }, + } + } + ); + } + + #[test] + fn inline_comment_pipe_expression() { + let test_input = r#"a('XY') + |> b() + |> c(%) // inline-comment + |> d(%)"#; + + let tokens = crate::token::lexer(test_input); + let mut slice = tokens.as_slice(); + let PipeExpression { + body, non_code_meta, .. + } = pipe_expression.parse_next(&mut slice).unwrap(); + assert_eq!(non_code_meta.non_code_nodes.len(), 1); + assert_eq!( + non_code_meta.non_code_nodes.get(&2).unwrap()[0].value, + NonCodeValue::InlineComment { + value: "inline-comment".to_owned(), + style: CommentStyle::Line, + } + ); + assert_eq!(body.len(), 4); + } + + #[test] + fn many_comments() { + let test_program = r#"// this is a comment + const yo = { a: { b: { c: '123' } } } /* block + comment */ + + const key = 'c' + // this is also a comment + return things +"#; + + let tokens = crate::token::lexer(test_program); + let Program { non_code_meta, .. } = function_body.parse(&tokens).unwrap(); + assert_eq!( + Some(NonCodeNode { + start: 0, + end: 20, + value: NonCodeValue::BlockComment { + value: "this is a comment".to_owned(), + style: CommentStyle::Line, + }, + }), + non_code_meta.start, + ); + assert_eq!( + Some(&vec![ + NonCodeNode { + start: 60, + end: 82, + value: NonCodeValue::InlineComment { + value: "block\n comment".to_owned(), + style: CommentStyle::Block, + }, + }, + NonCodeNode { + start: 82, + end: 86, + value: NonCodeValue::NewLine, + }, + ]), + non_code_meta.non_code_nodes.get(&0), + ); + assert_eq!( + Some(&vec![NonCodeNode { + start: 103, + end: 129, + value: NonCodeValue::BlockComment { + value: "this is also a comment".to_owned(), + style: CommentStyle::Line, + }, + }]), + non_code_meta.non_code_nodes.get(&1), + ); + } + + #[test] + fn inline_block_comments() { + let test_program = r#"const yo = 3 /* block + comment */ + return 1"#; + + let tokens = crate::token::lexer(test_program); + let actual = program.parse(&tokens).unwrap(); + assert_eq!(actual.non_code_meta.non_code_nodes.len(), 1); + assert_eq!( + actual.non_code_meta.non_code_nodes.get(&0).unwrap()[0].value, + NonCodeValue::InlineComment { + value: "block\n comment".to_owned(), + style: CommentStyle::Block, + } + ); + } + + #[test] + fn test_bracketed_binary_expression() { + let input = "(2 - 3)"; + let tokens = crate::token::lexer(input); + let actual = match binary_expr_in_parens.parse(&tokens) { + Ok(x) => x, + Err(e) => panic!("{e:?}"), + }; + assert_eq!(actual.operator, BinaryOperator::Sub); + } + + #[test] + fn test_arithmetic() { + let input = "1 * (2 - 3)"; + let tokens = crate::token::lexer(input); + // The RHS should be a binary expression. + let actual = binary_expression.parse(&tokens).unwrap(); + assert_eq!(actual.operator, BinaryOperator::Mul); + let BinaryPart::BinaryExpression(rhs) = actual.right else { + panic!("Expected RHS to be another binary expression"); + }; + assert_eq!(rhs.operator, BinaryOperator::Sub); + assert_eq!( + rhs.right, + BinaryPart::Literal(Box::new(Literal { + start: 9, + end: 10, + value: JValue::Number(JNumber::from(3)), + raw: "3".to_owned(), + })) + ); + } + + #[test] + fn assign_brackets() { + let test_input = "const thickness_squared = (1 + 1)"; + let tokens = crate::token::lexer(test_input); + let _decl = declaration.parse(&tokens).unwrap(); + } + + #[test] + fn test_nested_arithmetic() { + let input = "1 * ((2 - 3) / 4)"; + let tokens = crate::token::lexer(input); + // The RHS should be a binary expression. + let outer = binary_expression.parse(&tokens).unwrap(); + assert_eq!(outer.operator, BinaryOperator::Mul); + let BinaryPart::BinaryExpression(middle) = outer.right else { + panic!("Expected RHS to be another binary expression"); + }; + + assert_eq!(middle.operator, BinaryOperator::Div); + let BinaryPart::BinaryExpression(inner) = middle.left else { + panic!("expected nested binary expression"); + }; + assert_eq!(inner.operator, BinaryOperator::Sub); + } + + #[test] + fn check_parsers_work_the_same() { + for (i, test_program) in [ + "let x = 1 * (3 - 4)", + r#" +// this is a comment +const yo = { a: { b: { c: '123' } } } + +const key = 'c' +const things = "things" + +// this is also a comment"#, + r#"const three = 3 + +const yo = 3 +"#, + r#"const x = 1 // this is an inline comment"#, + r#"fn x = () => { + return sg + return sg + }"#, + r#"const x = -leg2 + thickness"#, + r#"const obj = { a: 1, b: 2 } + const height = 1 - obj.a"#, + r#"const obj = { a: 1, b: 2 } + const height = 1 - obj["a"]"#, + r#"const obj = { a: 1, b: 2 } + const height = obj["a"] - 1"#, + r#"const obj = { a: 1, b: 2 } + const height = [1 - obj["a"], 0]"#, + r#"const obj = { a: 1, b: 2 } + const height = [obj["a"] - 1, 0]"#, + r#"const obj = { a: 1, b: 2 } + const height = [obj["a"] -1, 0]"#, + "const height = 1 - obj.a", + "const six = 1 + 2 + 3", + "const five = 3 * 1 + 2", + r#"const height = [ obj["a"], 0 ]"#, + r#"const obj = { a: 1, b: 2 } + const height = obj["a"]"#, + r#"const prop = yo["one"][two]"#, + r#"const pt1 = b1[x]"#, + "const prop = yo.one.two.three.four", + r#"const pt1 = b1[0]"#, + r#"const pt1 = b1['zero']"#, + r#"const pt1 = b1.zero"#, + "const sg = startSketchAt(pos)", + "const sg = startSketchAt(pos) |> line([0, -scale], %)", + r#"const sg = -scale"#, + "lineTo({ to: [0, -1] })", + "const myArray = [0..10]", + r#" + fn firstPrimeNumber = () => { + return 2 + } + firstPrimeNumber()"#, + r#"fn thing = (param) => { + return true + } + thing(false)"#, + r#"const mySketch = startSketchAt([0,0]) + |> lineTo({ to: [0, 1], tag: 'myPath' }, %) + |> lineTo([1, 1], %) + |> lineTo({ to: [1,0], tag: "rightPath" }, %) + |> close(%)"#, + "const mySketch = startSketchAt([0,0]) |> lineTo([1, 1], %) |> close(%)", + "const myBox = startSketchAt(p)", + r#"const myBox = f(1) |> g(2)"#, + r#"const myBox = startSketchAt(p) |> line([0, l], %)"#, + "lineTo({ to: [0, 1] })", + "lineTo({ to: [0, 1], from: [3, 3] })", + "lineTo({to:[0, 1]})", + "lineTo({ to: [0, 1], from: [3, 3]})", + "lineTo({ to: [0, 1],from: [3, 3] })", + "const mySketch = startSketchAt([0,0])", + "log(5, \"hello\", aIdentifier)", + r#"5 + "a""#, + "line([0, l], %)", + ] + .into_iter() + .enumerate() + { + // Run the original parser + let tokens = crate::token::lexer(test_program); + let expected = crate::parser::Parser::new(tokens.clone()) + .ast() + .expect("Old parser failed"); + + // Run the second parser, check it matches the first parser. + let actual = match program.parse(&tokens) { + Ok(x) => x, + Err(_e) => panic!("could not parse test {i}"), + }; + assert_eq!( + expected, actual, + "old parser (left) and new parser (right) disagree on test {i}" + ); + } + } + + #[test] + fn binary_expression_ignores_whitespace() { + let tests = ["1 - 2", "1- 2", "1 -2", "1-2"]; + for test in tests { + let tokens = crate::token::lexer(test); + let actual = binary_expression.parse(&tokens).unwrap(); + assert_eq!(actual.operator, BinaryOperator::Sub); + let BinaryPart::Literal(left) = actual.left else { + panic!("should be expression"); + }; + assert_eq!(left.value, serde_json::Value::Number(1.into())); + let BinaryPart::Literal(right) = actual.right else { + panic!("should be expression"); + }; + assert_eq!(right.value, serde_json::Value::Number(2.into())); + } + } + + #[test] + fn some_pipe_expr() { + let test_program = r#"x() + |> y() /* this is + a comment + spanning a few lines */ + |> z()"#; + let tokens = crate::token::lexer(test_program); + let actual = pipe_expression.parse(&tokens).unwrap(); + let n = actual.non_code_meta.non_code_nodes.len(); + assert_eq!(n, 1, "expected one comment in pipe expression but found {n}"); + let nc = &actual.non_code_meta.non_code_nodes.get(&1).unwrap()[0]; + assert!(nc.value().starts_with("this")); + assert!(nc.value().ends_with("lines")); + } + + #[test] + fn comments_in_pipe_expr() { + for (i, test_program) in [ + r#"y() |> /*hi*/ z(%)"#, + "1 |>/*hi*/ f", + r#"y() |> /*hi*/ z(%)"#, + "1 /*hi*/ |> f", + "1 + // Hi + |> f", + "1 + /* Hi + there + */ + |> f", + ] + .into_iter() + .enumerate() + { + let tokens = crate::token::lexer(test_program); + let actual = pipe_expression.parse(&tokens); + assert!(actual.is_ok(), "could not parse test {i}, '{test_program}'"); + let actual = actual.unwrap(); + let n = actual.non_code_meta.non_code_nodes.len(); + assert_eq!(n, 1, "expected one comment in pipe expression but found {n}",) + } + } + + #[test] + fn comments() { + for (i, (test_program, expected)) in [ + ( + "//hi", + NonCodeNode { + start: 0, + end: 4, + value: NonCodeValue::BlockComment { + value: "hi".to_owned(), + style: CommentStyle::Line, + }, + }, + ), + ( + "/*hello*/", + NonCodeNode { + start: 0, + end: 9, + value: NonCodeValue::BlockComment { + value: "hello".to_owned(), + style: CommentStyle::Block, + }, + }, + ), + ( + "/* hello */", + NonCodeNode { + start: 0, + end: 11, + value: NonCodeValue::BlockComment { + value: "hello".to_owned(), + style: CommentStyle::Block, + }, + }, + ), + ( + "/* \nhello */", + NonCodeNode { + start: 0, + end: 12, + value: NonCodeValue::BlockComment { + value: "hello".to_owned(), + style: CommentStyle::Block, + }, + }, + ), + ( + " + /* hello */", + NonCodeNode { + start: 0, + end: 29, + value: NonCodeValue::BlockComment { + value: "hello".to_owned(), + style: CommentStyle::Block, + }, + }, + ), + ( + // Empty line with trailing whitespace + " + + /* hello */", + NonCodeNode { + start: 0, + end: 32, + value: NonCodeValue::NewLineBlockComment { + value: "hello".to_owned(), + style: CommentStyle::Block, + }, + }, + ), + ( + // Empty line, no trailing whitespace + " + + /* hello */", + NonCodeNode { + start: 0, + end: 30, + value: NonCodeValue::NewLineBlockComment { + value: "hello".to_owned(), + style: CommentStyle::Block, + }, + }, + ), + ( + r#"/* block + comment */"#, + NonCodeNode { + start: 0, + end: 39, + value: NonCodeValue::BlockComment { + value: "block\n comment".to_owned(), + style: CommentStyle::Block, + }, + }, + ), + ] + .into_iter() + .enumerate() + { + let tokens = crate::token::lexer(test_program); + let actual = non_code_node.parse(&tokens); + assert!(actual.is_ok(), "could not parse test {i}: {actual:#?}"); + let actual = actual.unwrap(); + assert_eq!(actual, expected, "failed test {i}"); + } + } + + #[test] + fn recognize_invalid_params() { + let test_fn = "(let) => { return 1 }"; + let tokens = crate::token::lexer(test_fn); + let err = function_expression.parse(&tokens).unwrap_err().into_inner(); + let cause = err.cause.unwrap(); + // This is the token `let` + assert_eq!(cause.source_ranges(), vec![SourceRange([1, 4])]); + assert_eq!(cause.message(), "Cannot assign a variable to a reserved keyword: let"); + } + + #[test] + fn comment_in_string() { + let string_literal = r#"" + // a comment + ""#; + let tokens = crate::token::lexer(string_literal); + let parsed_literal = literal.parse(&tokens).unwrap(); + assert_eq!( + parsed_literal.value, + JValue::String( + " + // a comment + " + .to_owned() + ) + ); + } + + #[test] + fn pipes_on_pipes_minimal() { + let test_program = r#"startSketchAt([0, 0]) + |> lineTo([0, -0], %) // MoveRelative + + show(svg) + "#; + let tokens = crate::token::lexer(test_program); + let mut slice = &tokens[..]; + let _actual = pipe_expression.parse_next(&mut slice).unwrap(); + assert_eq!(slice[0].token_type, TokenType::Whitespace); + } + + #[test] + fn test_pipes_on_pipes() { + let test_program = include_str!("../../../tests/executor/inputs/pipes_on_pipes.kcl"); + let tokens = crate::token::lexer(test_program); + let _actual = program.parse(&tokens).unwrap(); + } + + #[test] + fn test_cube() { + let test_program = include_str!("../../../tests/executor/inputs/cube.kcl"); + let tokens = crate::token::lexer(test_program); + match program.parse(&tokens) { + Ok(_) => {} + Err(e) => { + panic!("{e:#?}"); + } + } + } + + #[test] + fn test_parameter_list() { + let tests = [ + ("", vec![]), + ("a", vec!["a"]), + ("a, b", vec!["a", "b"]), + ("a,b", vec!["a", "b"]), + ]; + for (i, (input, expected)) in tests.into_iter().enumerate() { + let tokens = crate::token::lexer(input); + let actual = parameters.parse(&tokens); + assert!(actual.is_ok(), "could not parse test {i}"); + let actual_ids: Vec<_> = actual.unwrap().into_iter().map(|id| id.name).collect(); + assert_eq!(actual_ids, expected); + } + } + + #[test] + fn test_user_function() { + let input = "() => { + return 2 + }"; + + let tokens = crate::token::lexer(input); + let actual = function_expression.parse(&tokens); + assert!(actual.is_ok(), "could not parse test function"); + } + + #[test] + fn test_declaration() { + let tests = ["const myVar = 5", "const myVar=5", "const myVar =5", "const myVar= 5"]; + for test in tests { + // Run the original parser + let tokens = crate::token::lexer(test); + let mut expected_body = crate::parser::Parser::new(tokens.clone()).ast().unwrap().body; + assert_eq!(expected_body.len(), 1); + let BodyItem::VariableDeclaration(expected) = expected_body.pop().unwrap() else { + panic!("Expected variable declaration"); + }; + + // Run the second parser, check it matches the first parser. + let mut actual = declaration.parse(&tokens).unwrap(); + assert_eq!(expected, actual); + + // Inspect its output in more detail. + assert_eq!(actual.kind, VariableKind::Const); + assert_eq!(actual.start, 0); + assert_eq!(actual.declarations.len(), 1); + let decl = actual.declarations.pop().unwrap(); + assert_eq!(decl.id.name, "myVar"); + let Value::Literal(value) = decl.init else { + panic!("value should be a literal") + }; + assert_eq!(value.end, test.len()); + assert_eq!(value.raw, "5"); + } + } +} diff --git a/src/wasm-lib/kcl/src/parser/parser_impl/error.rs b/src/wasm-lib/kcl/src/parser/parser_impl/error.rs new file mode 100644 index 000000000..41d1e6a76 --- /dev/null +++ b/src/wasm-lib/kcl/src/parser/parser_impl/error.rs @@ -0,0 +1,107 @@ +use winnow::error::{ErrorKind, ParseError, StrContext}; + +use crate::{ + errors::{KclError, KclErrorDetails}, + token::Token, +}; + +/// Accumulate context while backtracking errors +/// Very similar to [`winnow::error::ContextError`] type, +/// but the 'cause' field is always a [`KclError`], +/// instead of a dynamic [`std::error::Error`] trait object. +#[derive(Debug, Clone)] +pub struct ContextError { + pub context: Vec, + pub cause: Option, +} + +impl From> for KclError { + fn from(err: ParseError<&[Token], ContextError>) -> Self { + let Some(last_token) = err.input().last() else { + return KclError::Syntax(KclErrorDetails { + source_ranges: Default::default(), + message: "file is empty".to_owned(), + }); + }; + + let (input, offset, err) = (err.input().to_vec(), err.offset(), err.into_inner()); + + if let Some(e) = err.cause { + return e; + } + + // See docs on `offset`. + if offset >= input.len() { + let context = err.context.first(); + return KclError::Syntax(KclErrorDetails { + source_ranges: last_token.as_source_ranges(), + message: match context { + Some(what) => format!("Unexpected end of file. The compiler {what}"), + None => "Unexpected end of file while still parsing".to_owned(), + }, + }); + } + + let bad_token = &input[offset]; + // TODO: Add the Winnow parser context to the error. + // See https://github.com/KittyCAD/modeling-app/issues/784 + KclError::Syntax(KclErrorDetails { + source_ranges: bad_token.as_source_ranges(), + message: "Unexpected token".to_owned(), + }) + } +} + +impl From for ContextError { + fn from(e: KclError) -> Self { + Self { + context: Default::default(), + cause: Some(e), + } + } +} + +impl std::default::Default for ContextError { + fn default() -> Self { + Self { + context: Default::default(), + cause: None, + } + } +} + +impl winnow::error::ParserError for ContextError { + #[inline] + fn from_error_kind(_input: &I, _kind: ErrorKind) -> Self { + Self::default() + } + + #[inline] + fn append(self, _input: &I, _kind: ErrorKind) -> Self { + self + } + + #[inline] + fn or(self, other: Self) -> Self { + other + } +} + +impl winnow::error::AddContext for ContextError { + #[inline] + fn add_context(mut self, _input: &I, ctx: C) -> Self { + self.context.push(ctx); + self + } +} + +impl winnow::error::FromExternalError for ContextError { + #[inline] + fn from_external_error(_input: &I, _kind: ErrorKind, e: KclError) -> Self { + let mut err = Self::default(); + { + err.cause = Some(e); + } + err + } +} diff --git a/src/wasm-lib/kcl/src/token.rs b/src/wasm-lib/kcl/src/token.rs index c2acb6f34..7a912e312 100644 --- a/src/wasm-lib/kcl/src/token.rs +++ b/src/wasm-lib/kcl/src/token.rs @@ -6,6 +6,8 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use tower_lsp::lsp_types::SemanticTokenType; +use crate::{ast::types::VariableKind, executor::SourceRange}; + mod tokeniser; /// The types of tokens. @@ -142,15 +144,39 @@ impl Token { TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment ) } + + pub fn as_source_range(&self) -> SourceRange { + SourceRange([self.start, self.end]) + } + + pub fn as_source_ranges(&self) -> Vec { + vec![self.as_source_range()] + } + + /// Is this token the beginning of a variable/function declaration? + /// If so, what kind? + /// If not, returns None. + pub fn declaration_keyword(&self) -> Option { + if !matches!(self.token_type, TokenType::Keyword) { + return None; + } + Some(match self.value.as_str() { + "var" => VariableKind::Var, + "let" => VariableKind::Let, + "fn" => VariableKind::Fn, + "const" => VariableKind::Const, + _ => return None, + }) + } } -impl From for crate::executor::SourceRange { +impl From for SourceRange { fn from(token: Token) -> Self { Self([token.start, token.end]) } } -impl From<&Token> for crate::executor::SourceRange { +impl From<&Token> for SourceRange { fn from(token: &Token) -> Self { Self([token.start, token.end]) } diff --git a/src/wasm-lib/tests/executor/inputs/cube.kcl b/src/wasm-lib/tests/executor/inputs/cube.kcl new file mode 100644 index 000000000..d082e5116 --- /dev/null +++ b/src/wasm-lib/tests/executor/inputs/cube.kcl @@ -0,0 +1,12 @@ +fn cube = (pos, scale) => { + const sg = startSketchAt(pos) + |> line([0, scale], %) + |> line([scale, 0], %) + |> line([0, -scale], %) + + return sg +} + +const b1 = cube([0,0], 10) +const pt1 = b1[0] +show(b1)