From 455fb49fb656961dae28f33eba1186f4439ef232 Mon Sep 17 00:00:00 2001 From: Nick Cameron Date: Fri, 22 Nov 2024 13:25:14 +1300 Subject: [PATCH] Support multiple errors and warnings in the parser (#4534) Signed-off-by: Nick Cameron --- src/wasm-lib/kcl/src/ast/modify.rs | 4 +- src/wasm-lib/kcl/src/ast/types.rs | 30 -- src/wasm-lib/kcl/src/errors.rs | 7 + src/wasm-lib/kcl/src/executor.rs | 3 +- src/wasm-lib/kcl/src/lib.rs | 3 +- src/wasm-lib/kcl/src/lsp/kcl/mod.rs | 7 +- src/wasm-lib/kcl/src/parser.rs | 97 +++++- src/wasm-lib/kcl/src/parser/parser_impl.rs | 305 +++++++++++------- .../kcl/src/parser/parser_impl/error.rs | 138 +++++--- src/wasm-lib/kcl/src/simulation_tests.rs | 2 +- src/wasm-lib/kcl/src/token.rs | 33 +- src/wasm-lib/kcl/src/token/tokeniser.rs | 26 +- src/wasm-lib/kcl/src/unparser.rs | 13 - 13 files changed, 436 insertions(+), 232 deletions(-) diff --git a/src/wasm-lib/kcl/src/ast/modify.rs b/src/wasm-lib/kcl/src/ast/modify.rs index 2a973da1b..5fbf31950 100644 --- a/src/wasm-lib/kcl/src/ast/modify.rs +++ b/src/wasm-lib/kcl/src/ast/modify.rs @@ -185,7 +185,9 @@ pub async fn modify_ast_for_sketch( let recasted = program.ast.recast(&FormatOptions::default(), 0); // Re-parse the ast so we get the correct source ranges. - *program = crate::parser::parse_str(&recasted, module_id)?.into(); + *program = crate::parser::parse_str(&recasted, module_id) + .parse_errs_as_err()? + .into(); Ok(recasted) } diff --git a/src/wasm-lib/kcl/src/ast/types.rs b/src/wasm-lib/kcl/src/ast/types.rs index 148003147..dfa694500 100644 --- a/src/wasm-lib/kcl/src/ast/types.rs +++ b/src/wasm-lib/kcl/src/ast/types.rs @@ -3491,36 +3491,6 @@ const cylinder = startSketchOn('-XZ') assert_eq!(l.raw, "false"); } - #[tokio::test(flavor = "multi_thread")] - async fn test_parse_tag_named_std_lib() { - let some_program_string = r#"startSketchOn('XY') - |> startProfileAt([0, 0], %) - |> line([5, 5], %, $xLine) -"#; - let result = crate::parser::top_level_parse(some_program_string); - - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([76, 82, 0])], message: "Cannot assign a tag to a reserved keyword: xLine" }"# - ); - } - - #[tokio::test(flavor = "multi_thread")] - async fn test_parse_empty_tag() { - let some_program_string = r#"startSketchOn('XY') - |> startProfileAt([0, 0], %) - |> line([5, 5], %, $) -"#; - let result = crate::parser::top_level_parse(some_program_string); - - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([57, 59, 0])], message: "Unexpected token: |>" }"# - ); - } - #[tokio::test(flavor = "multi_thread")] async fn test_parse_digest() { let prog1_string = r#"startSketchOn('XY') diff --git a/src/wasm-lib/kcl/src/errors.rs b/src/wasm-lib/kcl/src/errors.rs index b0caaeb3b..65c0cc2a6 100644 --- a/src/wasm-lib/kcl/src/errors.rs +++ b/src/wasm-lib/kcl/src/errors.rs @@ -44,6 +44,13 @@ pub struct KclErrorDetails { } impl KclError { + pub fn internal(message: String) -> KclError { + KclError::Internal(KclErrorDetails { + source_ranges: Default::default(), + message, + }) + } + /// Get the error message. pub fn get_message(&self) -> String { format!("{}: {}", self.error_type(), self.message()) diff --git a/src/wasm-lib/kcl/src/executor.rs b/src/wasm-lib/kcl/src/executor.rs index 1f88da71a..7b148eab9 100644 --- a/src/wasm-lib/kcl/src/executor.rs +++ b/src/wasm-lib/kcl/src/executor.rs @@ -1954,7 +1954,8 @@ impl ExecutorContext { } let module_id = exec_state.add_module(resolved_path.clone()); let source = self.fs.read_to_string(&resolved_path, source_range).await?; - let program = crate::parser::parse_str(&source, module_id)?; + // TODO handle parsing errors properly + let program = crate::parser::parse_str(&source, module_id).parse_errs_as_err()?; let (module_memory, module_exports) = { exec_state.import_stack.push(resolved_path.clone()); let original_execution = self.engine.replace_execution_kind(ExecutionKind::Isolated); diff --git a/src/wasm-lib/kcl/src/lib.rs b/src/wasm-lib/kcl/src/lib.rs index 32aa63777..e8212e203 100644 --- a/src/wasm-lib/kcl/src/lib.rs +++ b/src/wasm-lib/kcl/src/lib.rs @@ -89,7 +89,8 @@ impl Program { pub fn parse(input: &str) -> Result { let module_id = ModuleId::default(); let tokens = token::lexer(input, module_id)?; - let ast = parser::parse_tokens(tokens)?; + // TODO handle parsing errors properly + let ast = parser::parse_tokens(tokens).parse_errs_as_err()?; Ok(Program { ast }) } diff --git a/src/wasm-lib/kcl/src/lsp/kcl/mod.rs b/src/wasm-lib/kcl/src/lsp/kcl/mod.rs index 7136018ea..e5db11bc4 100644 --- a/src/wasm-lib/kcl/src/lsp/kcl/mod.rs +++ b/src/wasm-lib/kcl/src/lsp/kcl/mod.rs @@ -299,7 +299,8 @@ impl crate::lsp::backend::Backend for Backend { // Lets update the ast. let result = crate::parser::parse_tokens(tokens.clone()); - let mut ast = match result { + // TODO handle parse errors properly + let mut ast = match result.parse_errs_as_err() { Ok(ast) => ast, Err(err) => { self.add_to_diagnostics(¶ms, &[err], true).await; @@ -1301,7 +1302,7 @@ impl LanguageServer for Backend { // I don't know if we need to do this again since it should be updated in the context. // But I figure better safe than sorry since this will write back out to the file. let module_id = ModuleId::default(); - let Ok(ast) = crate::parser::parse_str(current_code, module_id) else { + let Ok(ast) = crate::parser::parse_str(current_code, module_id).parse_errs_as_err() else { return Ok(None); }; // Now recast it. @@ -1335,7 +1336,7 @@ impl LanguageServer for Backend { // I don't know if we need to do this again since it should be updated in the context. // But I figure better safe than sorry since this will write back out to the file. let module_id = ModuleId::default(); - let Ok(mut ast) = crate::parser::parse_str(current_code, module_id) else { + let Ok(mut ast) = crate::parser::parse_str(current_code, module_id).parse_errs_as_err() else { return Ok(None); }; diff --git a/src/wasm-lib/kcl/src/parser.rs b/src/wasm-lib/kcl/src/parser.rs index 2f78fbd80..dd91c48c9 100644 --- a/src/wasm-lib/kcl/src/parser.rs +++ b/src/wasm-lib/kcl/src/parser.rs @@ -1,3 +1,5 @@ +use parser_impl::ParseContext; + use crate::{ ast::types::{ModuleId, Node, Program}, errors::{KclError, KclErrorDetails}, @@ -12,20 +14,31 @@ pub(crate) mod parser_impl; pub const PIPE_SUBSTITUTION_OPERATOR: &str = "%"; pub const PIPE_OPERATOR: &str = "|>"; +// `?` like behavior for `Result`s to return a ParseResult if there is an error. +macro_rules! pr_try { + ($e: expr) => { + match $e { + Ok(a) => a, + Err(e) => return e.into(), + } + }; +} + #[cfg(test)] /// Parse the given KCL code into an AST. This is the top-level. -pub fn top_level_parse(code: &str) -> Result, KclError> { +pub fn top_level_parse(code: &str) -> ParseResult { let module_id = ModuleId::default(); parse_str(code, module_id) } /// Parse the given KCL code into an AST. -pub fn parse_str(code: &str, module_id: ModuleId) -> Result, KclError> { - let tokens = crate::token::lexer(code, module_id)?; +pub fn parse_str(code: &str, module_id: ModuleId) -> ParseResult { + let tokens = pr_try!(crate::token::lexer(code, module_id)); parse_tokens(tokens) } -pub fn parse_tokens(tokens: Vec) -> Result, KclError> { +/// Parse the supplied tokens into an AST. +pub fn parse_tokens(tokens: Vec) -> ParseResult { let (tokens, unknown_tokens): (Vec, Vec) = tokens .into_iter() .partition(|token| token.token_type != TokenType::Unknown); @@ -38,13 +51,13 @@ pub fn parse_tokens(tokens: Vec) -> Result, KclError> { } else { format!("found unknown tokens [{}]", token_list.join(", ")) }; - return Err(KclError::Lexical(KclErrorDetails { source_ranges, message })); + return KclError::Lexical(KclErrorDetails { source_ranges, message }).into(); } // Important, to not call this before the unknown tokens check. if tokens.is_empty() { // Empty file should just do nothing. - return Ok(Node::::default()); + return Node::::default().into(); } // Check all the tokens are whitespace or comments. @@ -52,8 +65,78 @@ pub fn parse_tokens(tokens: Vec) -> Result, KclError> { .iter() .all(|t| t.token_type.is_whitespace() || t.token_type.is_comment()) { - return Ok(Node::::default()); + return Node::::default().into(); } parser_impl::run_parser(&mut tokens.as_slice()) } + +/// Result of parsing. +/// +/// Will be a KclError if there was a lexing error or some unexpected error during parsing. +/// TODO - lexing errors should be included with the parse errors. +/// Will be Ok otherwise, including if there were parsing errors. Any errors or warnings will +/// be in the ParseContext. If an AST was produced, then that will be in the Option. +/// +/// Invariants: +/// - if there are no errors, then the Option will be Some +/// - if the Option is None, then there will be at least one error in the ParseContext. +pub(crate) struct ParseResult(pub Result<(Option>, ParseContext), KclError>); + +impl ParseResult { + #[cfg(test)] + pub fn unwrap(self) -> Node { + self.0.unwrap().0.unwrap() + } + + #[cfg(test)] + pub fn is_ok(&self) -> bool { + match &self.0 { + Ok((p, pc)) => p.is_some() && pc.errors.is_empty(), + Err(_) => false, + } + } + + #[cfg(test)] + #[track_caller] + pub fn unwrap_errs(&self) -> &[parser_impl::error::ParseError] { + &self.0.as_ref().unwrap().1.errors + } + + /// Treat parsing errors as an Error. + pub fn parse_errs_as_err(self) -> Result, KclError> { + let (p, errs) = self.0?; + if !errs.errors.is_empty() { + // TODO could summarise all errors rather than just the first one. + return Err(errs.errors.into_iter().next().unwrap().into()); + } + match p { + Some(p) => Ok(p), + None => Err(KclError::internal("Unknown parsing error".to_owned())), + } + } +} + +impl From>, ParseContext), KclError>> for ParseResult { + fn from(r: Result<(Option>, ParseContext), KclError>) -> ParseResult { + ParseResult(r) + } +} + +impl From<(Option>, ParseContext)> for ParseResult { + fn from(p: (Option>, ParseContext)) -> ParseResult { + ParseResult(Ok(p)) + } +} + +impl From> for ParseResult { + fn from(p: Node) -> ParseResult { + ParseResult(Ok((Some(p), ParseContext::default()))) + } +} + +impl From for ParseResult { + fn from(e: KclError) -> ParseResult { + ParseResult(Err(e)) + } +} diff --git a/src/wasm-lib/kcl/src/parser/parser_impl.rs b/src/wasm-lib/kcl/src/parser/parser_impl.rs index 93278c132..69b099729 100644 --- a/src/wasm-lib/kcl/src/parser/parser_impl.rs +++ b/src/wasm-lib/kcl/src/parser/parser_impl.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, str::FromStr}; +use std::{cell::RefCell, collections::HashMap, str::FromStr}; use winnow::{ combinator::{alt, delimited, opt, peek, preceded, repeat, separated, terminated}, @@ -8,6 +8,7 @@ use winnow::{ token::{any, one_of, take_till}, }; +use self::error::ParseError; use crate::{ ast::types::{ ArrayExpression, ArrayRangeExpression, BinaryExpression, BinaryOperator, BinaryPart, BodyItem, BoxNode, @@ -25,14 +26,93 @@ use crate::{ token::{Token, TokenType}, }; -mod error; +pub(crate) mod error; + +thread_local! { + /// The current `ParseContext`. `None` if parsing is not currently happening on this thread. + static CTXT: RefCell> = const { RefCell::new(None) }; +} + +pub type TokenSlice<'slice, 'input> = &'slice mut &'input [Token]; + +pub fn run_parser(i: TokenSlice) -> super::ParseResult { + ParseContext::init(); + + let result = program.parse(i).save_err(); + let ctxt = ParseContext::take(); + result.map(|o| (o, ctxt)).into() +} + +/// Context built up while parsing a program. +/// +/// When returned from parsing contains the errors and warnings from the current parse. +#[derive(Debug, Clone, Default)] +pub(crate) struct ParseContext { + pub errors: Vec, + #[allow(dead_code)] + pub warnings: Vec, +} + +impl ParseContext { + fn new() -> Self { + ParseContext { + errors: Vec::new(), + warnings: Vec::new(), + } + } + + /// Set a new `ParseContext` in thread-local storage. Panics if one already exists. + fn init() { + assert!(CTXT.with_borrow(|ctxt| ctxt.is_none())); + CTXT.with_borrow_mut(|ctxt| *ctxt = Some(ParseContext::new())); + } + + /// Take the current `ParseContext` from thread-local storage, leaving `None`. Panics if a `ParseContext` + /// is not present. + fn take() -> ParseContext { + CTXT.with_borrow_mut(|ctxt| ctxt.take()).unwrap() + } + + /// Add an error to the current `ParseContext`, panics if there is none. + fn err(e: ParseError) { + CTXT.with_borrow_mut(|ctxt| ctxt.as_mut().unwrap().errors.push(e)); + } + + /// Add a warning to the current `ParseContext`, panics if there is none. + #[allow(dead_code)] + fn warn(mut e: ParseError) { + e.severity = error::Severity::Warning; + CTXT.with_borrow_mut(|ctxt| ctxt.as_mut().unwrap().warnings.push(e)); + } +} type PResult = winnow::prelude::PResult; -type TokenSlice<'slice, 'input> = &'slice mut &'input [Token]; +/// Helper trait for dealing with PResults and the `ParseContext`. +trait PResultEx { + type O; -pub fn run_parser(i: TokenSlice) -> Result, KclError> { - program.parse(i).map_err(KclError::from) + /// If self is Ok, then returns it wrapped in `Ok(Some())`. + /// If self is a parsing error, saves it to the current `ParseContext` and returns `Ok(None)`. + /// If self is some other kind of error, then returns it. + fn save_err(self) -> Result, KclError>; +} + +impl> PResultEx for Result { + type O = O; + + fn save_err(self) -> Result, KclError> { + match self { + Ok(o) => Ok(Some(o)), + Err(e) => match e.into() { + error::ErrorKind::Parse(e) => { + ParseContext::err(e); + Ok(None) + } + error::ErrorKind::Internal(e) => Err(e), + }, + } + } } fn expected(what: &'static str) -> StrContext { @@ -41,7 +121,7 @@ fn expected(what: &'static str) -> StrContext { fn program(i: TokenSlice) -> PResult> { let shebang = opt(shebang).parse_next(i)?; - let mut out = function_body.parse_next(i)?; + let mut out: Node = function_body.parse_next(i)?; // Add the shebang to the non-code meta. if let Some(shebang) = shebang { @@ -2069,16 +2149,16 @@ mod tests { // Try to use it as a variable name. let code = format!(r#"{} = 0"#, word); let result = crate::parser::top_level_parse(code.as_str()); - let err = result.unwrap_err(); + let err = &result.unwrap_errs()[0]; // Which token causes the error may change. In "return = 0", for // example, "return" is the problem. assert!( - err.message().starts_with("Unexpected token: ") + err.message.starts_with("Unexpected token: ") || err - .message() + .message .starts_with("Cannot assign a variable to a reserved keyword: "), "Error message is: {}", - err.message(), + err.message, ); } @@ -2108,19 +2188,21 @@ mod tests { fn weird_program_unclosed_paren() { let tokens = crate::token::lexer("fn firstPrime=(", ModuleId::default()).unwrap(); let last = tokens.last().unwrap(); - let err: KclError = program.parse(&tokens).unwrap_err().into(); - assert_eq!(err.source_ranges(), last.as_source_ranges()); + let err: super::error::ErrorKind = program.parse(&tokens).unwrap_err().into(); + let err = err.unwrap_parse_error(); + assert_eq!(vec![err.source_range], last.as_source_ranges()); // TODO: Better comment. This should explain the compiler expected ) because the user had started declaring the function's parameters. // Part of https://github.com/KittyCAD/modeling-app/issues/784 - assert_eq!(err.message(), "Unexpected end of file. The compiler expected )"); + assert_eq!(err.message, "Unexpected end of file. The compiler expected )"); } #[test] fn weird_program_just_a_pipe() { let tokens = crate::token::lexer("|", ModuleId::default()).unwrap(); - let err: KclError = program.parse(&tokens).unwrap_err().into(); - assert_eq!(err.source_ranges(), vec![SourceRange([0, 1, 0])]); - assert_eq!(err.message(), "Unexpected token: |"); + let err: super::error::ErrorKind = program.parse(&tokens).unwrap_err().into(); + let err = err.unwrap_parse_error(); + assert_eq!(vec![err.source_range], vec![SourceRange([0, 1, 0])]); + assert_eq!(err.message, "Unexpected token: |"); } #[test] @@ -3048,15 +3130,29 @@ const mySk1 = startSketchAt([0, 0])"#; assert!(result.is_ok()); } + #[track_caller] + fn assert_err(p: &str, msg: &str, src: [usize; 2]) { + let result = crate::parser::top_level_parse(p); + let err = &result.unwrap_errs()[0]; + assert_eq!(err.message, msg); + assert_eq!(&err.source_range.0[..2], &src); + } + + #[track_caller] + fn assert_err_contains(p: &str, expected: &str) { + let result = crate::parser::top_level_parse(p); + let err = &result.unwrap_errs()[0].message; + assert!(err.contains(expected), "actual='{err}'"); + } + #[test] fn test_parse_half_pipe_small() { - let code = "const secondExtrude = startSketchOn('XY') + assert_err_contains( + "const secondExtrude = startSketchOn('XY') |> startProfileAt([0,0], %) - |"; - let result = crate::parser::top_level_parse(code); - assert!(result.is_err()); - let actual = result.err().unwrap().to_string(); - assert!(actual.contains("Unexpected token: |"), "actual={actual:?}"); + |", + "Unexpected token: |", + ); } #[test] @@ -3130,41 +3226,29 @@ const firstExtrude = startSketchOn('XY') const secondExtrude = startSketchOn('XY') |> startProfileAt([0,0], %) |"; - let result = crate::parser::top_level_parse(code); - assert!(result.is_err()); - assert!(result.err().unwrap().to_string().contains("Unexpected token: |")); + assert_err_contains(code, "Unexpected token: |"); } #[test] fn test_parse_greater_bang() { - let module_id = ModuleId::default(); - let err = crate::parser::parse_str(">!", module_id).unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([0, 1, 0])], message: "Unexpected token: >" }"# - ); + assert_err(">!", "Unexpected token: >", [0, 1]); } #[test] fn test_parse_z_percent_parens() { - let module_id = ModuleId::default(); - let err = crate::parser::parse_str("z%)", module_id).unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([1, 2, 0])], message: "Unexpected token: %" }"# - ); + assert_err("z%)", "Unexpected token: %", [1, 2]); } #[test] fn test_parse_parens_unicode() { - let module_id = ModuleId::default(); - let err = crate::parser::parse_str("(ޜ", module_id).unwrap_err(); + let result = crate::parser::top_level_parse("(ޜ"); + let KclError::Lexical(details) = result.0.unwrap_err() else { + panic!(); + }; // TODO: Better errors when program cannot tokenize. // https://github.com/KittyCAD/modeling-app/issues/696 - assert_eq!( - err.to_string(), - r#"lexical: KclErrorDetails { source_ranges: [SourceRange([1, 2, 0])], message: "found unknown token 'ޜ'" }"# - ); + assert_eq!(details.message, "found unknown token 'ޜ'"); + assert_eq!(&details.source_ranges[0].0[..2], &[1, 2]); } #[test] @@ -3183,61 +3267,46 @@ const bracket = [-leg2 + thickness, 0] r#" z(-[["#, ) - .unwrap_err(); + .unwrap_errs(); } #[test] fn test_parse_weird_new_line_function() { - let err = crate::parser::top_level_parse( + assert_err( r#"z - (--#"#, - ) - .unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([3, 4, 0])], message: "Unexpected token: (" }"# +(--#"#, + "Unexpected token: (", + [2, 3], ); } #[test] fn test_parse_weird_lots_of_fancy_brackets() { - let err = crate::parser::top_level_parse(r#"zz({{{{{{{{)iegAng{{{{{{{##"#).unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([2, 3, 0])], message: "Unexpected token: (" }"# - ); + assert_err(r#"zz({{{{{{{{)iegAng{{{{{{{##"#, "Unexpected token: (", [2, 3]); } #[test] fn test_parse_weird_close_before_open() { - let err = crate::parser::top_level_parse( + assert_err_contains( r#"fn)n e ["#, - ) - .unwrap_err(); - assert!(err - .to_string() - .contains("expected whitespace, found ')' which is brace")); + "expected whitespace, found ')' which is brace", + ); } #[test] fn test_parse_weird_close_before_nada() { - let err = crate::parser::top_level_parse(r#"fn)n-"#).unwrap_err(); - assert!(err - .to_string() - .contains("expected whitespace, found ')' which is brace")); + assert_err_contains(r#"fn)n-"#, "expected whitespace, found ')' which is brace"); } #[test] fn test_parse_weird_lots_of_slashes() { - let err = crate::parser::top_level_parse( + assert_err_contains( r#"J///////////o//+///////////P++++*++++++P///////˟ ++4"#, - ) - .unwrap_err(); - let actual = err.to_string(); - assert!(actual.contains("Unexpected token: +"), "actual={actual:?}"); + "Unexpected token: +", + ); } #[test] @@ -3324,62 +3393,53 @@ e #[test] fn test_error_keyword_in_variable() { - let err = crate::parser::top_level_parse(r#"const let = "thing""#).unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([6, 9, 0])], message: "Cannot assign a variable to a reserved keyword: let" }"# + assert_err( + r#"const let = "thing""#, + "Cannot assign a variable to a reserved keyword: let", + [6, 9], ); } #[test] fn test_error_keyword_in_fn_name() { - let err = crate::parser::top_level_parse(r#"fn let = () {}"#).unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([3, 6, 0])], message: "Cannot assign a variable to a reserved keyword: let" }"# + assert_err( + r#"fn let = () {}"#, + "Cannot assign a variable to a reserved keyword: let", + [3, 6], ); } #[test] fn test_error_stdlib_in_fn_name() { - let err = crate::parser::top_level_parse( + assert_err( r#"fn cos = () => { return 1 }"#, - ) - .unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([3, 6, 0])], message: "Cannot assign a variable to a reserved keyword: cos" }"# + "Cannot assign a variable to a reserved keyword: cos", + [3, 6], ); } #[test] fn test_error_keyword_in_fn_args() { - let err = crate::parser::top_level_parse( + assert_err( r#"fn thing = (let) => { return 1 }"#, + "Cannot assign a variable to a reserved keyword: let", + [12, 15], ) - .unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([12, 15, 0])], message: "Cannot assign a variable to a reserved keyword: let" }"# - ); } #[test] fn test_error_stdlib_in_fn_args() { - let err = crate::parser::top_level_parse( + assert_err( r#"fn thing = (cos) => { return 1 }"#, + "Cannot assign a variable to a reserved keyword: cos", + [12, 15], ) - .unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([12, 15, 0])], message: "Cannot assign a variable to a reserved keyword: cos" }"# - ); } #[test] @@ -3491,28 +3551,20 @@ thing(false) "#, name ); - let err = crate::parser::top_level_parse(&some_program_string).unwrap_err(); - assert_eq!( - err.to_string(), - format!( - r#"syntax: KclErrorDetails {{ source_ranges: [SourceRange([0, {}, 0])], message: "Expected a `fn` variable kind, found: `const`" }}"#, - name.len(), - ) + assert_err( + &some_program_string, + "Expected a `fn` variable kind, found: `const`", + [0, name.len()], ); } } #[test] fn test_error_define_var_as_function() { - let some_program_string = r#"fn thing = "thing""#; - let err = crate::parser::top_level_parse(some_program_string).unwrap_err(); // TODO: https://github.com/KittyCAD/modeling-app/issues/784 // Improve this error message. // It should say that the compiler is expecting a function expression on the RHS. - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([11, 18, 0])], message: "Unexpected token: \"thing\"" }"# - ); + assert_err(r#"fn thing = "thing""#, "Unexpected token: \"thing\"", [11, 18]); } #[test] @@ -3525,7 +3577,7 @@ thing(false) |> line([-5.09, 12.33], %) asdasd "#; - crate::parser::top_level_parse(test_program).unwrap_err(); + crate::parser::top_level_parse(test_program).unwrap_errs(); } #[test] @@ -3573,18 +3625,41 @@ let myBox = box([0,0], -3, -16, -10) "#; crate::parser::top_level_parse(some_program_string).unwrap(); } + #[test] fn must_use_percent_in_pipeline_fn() { let some_program_string = r#" foo() |> bar(2) "#; - let err = crate::parser::top_level_parse(some_program_string).unwrap_err(); - assert_eq!( - err.to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([30, 36, 0])], message: "All expressions in a pipeline must use the % (substitution operator)" }"# + assert_err( + some_program_string, + "All expressions in a pipeline must use the % (substitution operator)", + [30, 36], ); } + + #[test] + fn test_parse_tag_named_std_lib() { + let some_program_string = r#"startSketchOn('XY') + |> startProfileAt([0, 0], %) + |> line([5, 5], %, $xLine) +"#; + assert_err( + some_program_string, + "Cannot assign a tag to a reserved keyword: xLine", + [76, 82], + ); + } + + #[test] + fn test_parse_empty_tag() { + let some_program_string = r#"startSketchOn('XY') + |> startProfileAt([0, 0], %) + |> line([5, 5], %, $) +"#; + assert_err(some_program_string, "Unexpected token: |>", [57, 59]); + } } #[cfg(test)] diff --git a/src/wasm-lib/kcl/src/parser/parser_impl/error.rs b/src/wasm-lib/kcl/src/parser/parser_impl/error.rs index a4697076b..e46f806db 100644 --- a/src/wasm-lib/kcl/src/parser/parser_impl/error.rs +++ b/src/wasm-lib/kcl/src/parser/parser_impl/error.rs @@ -1,12 +1,9 @@ -use winnow::{ - error::{ErrorKind, ParseError, StrContext}, - stream::Stream, -}; +use winnow::{error::StrContext, stream::Stream}; use crate::{ errors::{KclError, KclErrorDetails}, executor::SourceRange, - token::{Input, Token}, + token::Token, }; /// Accumulate context while backtracking errors @@ -19,69 +16,115 @@ pub struct ContextError { pub cause: Option, } -impl From, winnow::error::ContextError>> for KclError { - fn from(err: ParseError, winnow::error::ContextError>) -> Self { - let (input, offset): (Vec, usize) = (err.input().chars().collect(), err.offset()); - let module_id = err.input().state.module_id; +/// An error which occurred during parsing. +/// +/// In contrast to Winnow errors which may not be an actual error but just an attempted parse which +/// didn't work out, these are errors which are always a result of incorrect user code and which should +/// be presented to the user. +#[derive(Debug, Clone)] +pub(crate) struct ParseError { + pub source_range: SourceRange, + pub message: String, + #[allow(dead_code)] + pub suggestion: String, + pub severity: Severity, +} - if offset >= input.len() { - // From the winnow docs: - // - // This is an offset, not an index, and may point to - // the end of input (input.len()) on eof errors. - - return KclError::Lexical(KclErrorDetails { - source_ranges: vec![SourceRange([offset, offset, module_id.as_usize()])], - message: "unexpected EOF while parsing".to_string(), - }); +impl ParseError { + pub(super) fn err(source_range: SourceRange, message: impl ToString) -> ParseError { + ParseError { + source_range, + message: message.to_string(), + suggestion: String::new(), + severity: Severity::Error, } + } - // TODO: Add the Winnow tokenizer context to the error. - // See https://github.com/KittyCAD/modeling-app/issues/784 - let bad_token = &input[offset]; - // TODO: Add the Winnow parser context to the error. - // See https://github.com/KittyCAD/modeling-app/issues/784 - KclError::Lexical(KclErrorDetails { - source_ranges: vec![SourceRange([offset, offset + 1, module_id.as_usize()])], - message: format!("found unknown token '{}'", bad_token), + #[allow(dead_code)] + pub(super) fn with_suggestion( + source_range: SourceRange, + message: impl ToString, + suggestion: impl ToString, + ) -> ParseError { + ParseError { + source_range, + message: message.to_string(), + suggestion: suggestion.to_string(), + severity: Severity::Error, + } + } +} + +impl From for KclError { + fn from(err: ParseError) -> Self { + KclError::Syntax(KclErrorDetails { + source_ranges: vec![err.source_range], + message: err.message, }) } } -impl From> for KclError { - fn from(err: ParseError<&[Token], ContextError>) -> Self { +#[derive(Debug, Clone)] +pub(crate) enum Severity { + #[allow(dead_code)] + Warning, + Error, +} + +/// Helper enum for the below conversion of Winnow errors into either a parse error or an unexpected +/// error. +pub(super) enum ErrorKind { + Parse(ParseError), + Internal(KclError), +} + +impl ErrorKind { + #[cfg(test)] + pub fn unwrap_parse_error(self) -> ParseError { + match self { + ErrorKind::Parse(parse_error) => parse_error, + ErrorKind::Internal(_) => panic!(), + } + } +} + +impl From> for ErrorKind { + fn from(err: winnow::error::ParseError<&[Token], ContextError>) -> Self { let Some(last_token) = err.input().last() else { - return KclError::Syntax(KclErrorDetails { - source_ranges: Default::default(), - message: "file is empty".to_owned(), - }); + return ErrorKind::Parse(ParseError::err(Default::default(), "file is empty")); }; let (input, offset, err) = (err.input().to_vec(), err.offset(), err.into_inner()); if let Some(e) = err.cause { - return e; + return match e { + KclError::Syntax(details) => ErrorKind::Parse(ParseError::err( + details.source_ranges.into_iter().next().unwrap(), + details.message, + )), + e => ErrorKind::Internal(e), + }; } // See docs on `offset`. if offset >= input.len() { let context = err.context.first(); - return KclError::Syntax(KclErrorDetails { - source_ranges: last_token.as_source_ranges(), - message: match context { + return ErrorKind::Parse(ParseError::err( + last_token.as_source_range(), + match context { Some(what) => format!("Unexpected end of file. The compiler {what}"), None => "Unexpected end of file while still parsing".to_owned(), }, - }); + )); } let bad_token = &input[offset]; // TODO: Add the Winnow parser context to the error. // See https://github.com/KittyCAD/modeling-app/issues/784 - KclError::Syntax(KclErrorDetails { - source_ranges: bad_token.as_source_ranges(), - message: format!("Unexpected token: {}", bad_token.value), - }) + ErrorKind::Parse(ParseError::err( + bad_token.as_source_range(), + format!("Unexpected token: {}", bad_token.value), + )) } } @@ -108,12 +151,17 @@ where I: Stream, { #[inline] - fn from_error_kind(_input: &I, _kind: ErrorKind) -> Self { + fn from_error_kind(_input: &I, _kind: winnow::error::ErrorKind) -> Self { Self::default() } #[inline] - fn append(self, _input: &I, _input_checkpoint: &::Checkpoint, _kind: ErrorKind) -> Self { + fn append( + self, + _input: &I, + _input_checkpoint: &::Checkpoint, + _kind: winnow::error::ErrorKind, + ) -> Self { self } @@ -136,7 +184,7 @@ where impl winnow::error::FromExternalError for ContextError { #[inline] - fn from_external_error(_input: &I, _kind: ErrorKind, e: KclError) -> Self { + fn from_external_error(_input: &I, _kind: winnow::error::ErrorKind, e: KclError) -> Self { let mut err = Self::default(); { err.cause = Some(e); diff --git a/src/wasm-lib/kcl/src/simulation_tests.rs b/src/wasm-lib/kcl/src/simulation_tests.rs index d28bf41bb..854badab3 100644 --- a/src/wasm-lib/kcl/src/simulation_tests.rs +++ b/src/wasm-lib/kcl/src/simulation_tests.rs @@ -60,7 +60,7 @@ fn parse(test_name: &str) { }; // Parse the tokens into an AST. - let parse_res = crate::parser::parse_tokens(tokens); + let parse_res = Result::<_, KclError>::Ok(crate::parser::parse_tokens(tokens).unwrap()); assert_snapshot(test_name, "Result of parsing", || { insta::assert_json_snapshot!("ast", parse_res); }); diff --git a/src/wasm-lib/kcl/src/token.rs b/src/wasm-lib/kcl/src/token.rs index 490532e39..9775ce90c 100644 --- a/src/wasm-lib/kcl/src/token.rs +++ b/src/wasm-lib/kcl/src/token.rs @@ -5,7 +5,7 @@ use parse_display::{Display, FromStr}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use tower_lsp::lsp_types::SemanticTokenType; -use winnow::stream::ContainsToken; +use winnow::{error::ParseError, stream::ContainsToken}; use crate::{ ast::types::{ItemVisibility, ModuleId, VariableKind}, @@ -251,7 +251,36 @@ impl From<&Token> for SourceRange { } pub fn lexer(s: &str, module_id: ModuleId) -> Result, KclError> { - tokeniser::lexer(s, module_id).map_err(From::from) + tokeniser::lex(s, module_id).map_err(From::from) +} + +impl From, winnow::error::ContextError>> for KclError { + fn from(err: ParseError, winnow::error::ContextError>) -> Self { + let (input, offset): (Vec, usize) = (err.input().chars().collect(), err.offset()); + let module_id = err.input().state.module_id; + + if offset >= input.len() { + // From the winnow docs: + // + // This is an offset, not an index, and may point to + // the end of input (input.len()) on eof errors. + + return KclError::Lexical(crate::errors::KclErrorDetails { + source_ranges: vec![SourceRange([offset, offset, module_id.as_usize()])], + message: "unexpected EOF while parsing".to_string(), + }); + } + + // TODO: Add the Winnow tokenizer context to the error. + // See https://github.com/KittyCAD/modeling-app/issues/784 + let bad_token = &input[offset]; + // TODO: Add the Winnow parser context to the error. + // See https://github.com/KittyCAD/modeling-app/issues/784 + KclError::Lexical(crate::errors::KclErrorDetails { + source_ranges: vec![SourceRange([offset, offset + 1, module_id.as_usize()])], + message: format!("found unknown token '{}'", bad_token), + }) + } } #[cfg(test)] diff --git a/src/wasm-lib/kcl/src/token/tokeniser.rs b/src/wasm-lib/kcl/src/token/tokeniser.rs index 48d324be7..ae0bbbc89 100644 --- a/src/wasm-lib/kcl/src/token/tokeniser.rs +++ b/src/wasm-lib/kcl/src/token/tokeniser.rs @@ -62,7 +62,7 @@ lazy_static! { }; } -pub fn lexer(i: &str, module_id: ModuleId) -> Result, ParseError, ContextError>> { +pub fn lex(i: &str, module_id: ModuleId) -> Result, ParseError, ContextError>> { let state = State::new(module_id); let input = Input { input: Located::new(i), @@ -469,7 +469,7 @@ mod tests { fn test_program0() { let program = "const a=5"; let module_id = ModuleId::from_usize(1); - let actual = lexer(program, module_id).unwrap(); + let actual = lex(program, module_id).unwrap(); let expected = vec![ Token { token_type: TokenType::Keyword, @@ -514,7 +514,7 @@ mod tests { fn test_program1() { let program = "54 + 22500 + 6"; let module_id = ModuleId::from_usize(1); - let actual = lexer(program, module_id).unwrap(); + let actual = lex(program, module_id).unwrap(); let expected = vec![ Token { token_type: TokenType::Number, @@ -1388,7 +1388,7 @@ show(part001)"#; value: ")".to_owned(), }, ]; - let actual = lexer(program, module_id).unwrap(); + let actual = lex(program, module_id).unwrap(); assert_tokens(expected, actual); } @@ -1403,7 +1403,7 @@ const things = "things" // this is also a comment"#; let module_id = ModuleId::from_usize(1); - let actual = lexer(program, module_id).unwrap(); + let actual = lex(program, module_id).unwrap(); use TokenType::*; let expected = vec![ Token { @@ -1837,26 +1837,26 @@ const things = "things" value: "]".to_owned(), }, ]; - let actual = lexer(program, module_id).unwrap(); + let actual = lex(program, module_id).unwrap(); assert_tokens(expected, actual); } #[test] fn test_kitt() { let program = include_str!("../../../tests/executor/inputs/kittycad_svg.kcl"); - let actual = lexer(program, ModuleId::default()).unwrap(); + let actual = lex(program, ModuleId::default()).unwrap(); assert_eq!(actual.len(), 5103); } #[test] fn test_pipes_on_pipes() { let program = include_str!("../../../tests/executor/inputs/pipes_on_pipes.kcl"); - let actual = lexer(program, ModuleId::default()).unwrap(); + let actual = lex(program, ModuleId::default()).unwrap(); assert_eq!(actual.len(), 17841); } #[test] fn test_lexer_negative_word() { let module_id = ModuleId::from_usize(1); - let actual = lexer("-legX", module_id).unwrap(); + let actual = lex("-legX", module_id).unwrap(); let expected = vec![ Token { token_type: TokenType::Operator, @@ -1879,7 +1879,7 @@ const things = "things" #[test] fn not_eq() { let module_id = ModuleId::from_usize(1); - let actual = lexer("!=", module_id).unwrap(); + let actual = lex("!=", module_id).unwrap(); let expected = vec![Token { token_type: TokenType::Operator, value: "!=".to_owned(), @@ -1893,7 +1893,7 @@ const things = "things" #[test] fn test_unrecognized_token() { let module_id = ModuleId::from_usize(1); - let actual = lexer("12 ; 8", module_id).unwrap(); + let actual = lex("12 ; 8", module_id).unwrap(); let expected = vec![ Token { token_type: TokenType::Number, @@ -1938,7 +1938,7 @@ const things = "things" #[test] fn import_keyword() { let module_id = ModuleId::from_usize(1); - let actual = lexer("import foo", module_id).unwrap(); + let actual = lex("import foo", module_id).unwrap(); let expected = Token { token_type: TokenType::Keyword, value: "import".to_owned(), @@ -1952,7 +1952,7 @@ const things = "things" #[test] fn import_function() { let module_id = ModuleId::from_usize(1); - let actual = lexer("import(3)", module_id).unwrap(); + let actual = lex("import(3)", module_id).unwrap(); let expected = Token { token_type: TokenType::Word, value: "import".to_owned(), diff --git a/src/wasm-lib/kcl/src/unparser.rs b/src/wasm-lib/kcl/src/unparser.rs index 91abb8fb0..a7a9f44f6 100644 --- a/src/wasm-lib/kcl/src/unparser.rs +++ b/src/wasm-lib/kcl/src/unparser.rs @@ -999,19 +999,6 @@ myNestedVar = [ assert_eq!(recasted, r#""#); } - #[test] - fn test_recast_shebang_only() { - let some_program_string = r#"#!/usr/local/env zoo kcl"#; - - let result = crate::parser::top_level_parse(some_program_string); - - assert!(result.is_err()); - assert_eq!( - result.unwrap_err().to_string(), - r#"syntax: KclErrorDetails { source_ranges: [SourceRange([21, 24, 0])], message: "Unexpected end of file. The compiler expected a function body items (functions are made up of variable declarations, expressions, and return statements, each of those is a possible body item" }"# - ); - } - #[test] fn test_recast_shebang() { let some_program_string = r#"#!/usr/local/env zoo kcl