diff --git a/src/lang/errors.ts b/src/lang/errors.ts index 73736c95a..c118b20ec 100644 --- a/src/lang/errors.ts +++ b/src/lang/errors.ts @@ -18,6 +18,13 @@ export class KCLError { } } +export class KCLLexicalError extends KCLError { + constructor(msg: string, sourceRanges: [number, number][]) { + super('lexical', msg, sourceRanges) + Object.setPrototypeOf(this, KCLSyntaxError.prototype) + } +} + export class KCLSyntaxError extends KCLError { constructor(msg: string, sourceRanges: [number, number][]) { super('syntax', msg, sourceRanges) diff --git a/src/wasm-lib/kcl/src/errors.rs b/src/wasm-lib/kcl/src/errors.rs index 841e4fadd..a2a486475 100644 --- a/src/wasm-lib/kcl/src/errors.rs +++ b/src/wasm-lib/kcl/src/errors.rs @@ -8,6 +8,8 @@ use crate::executor::SourceRange; #[ts(export)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum KclError { + #[error("lexical: {0:?}")] + Lexical(KclErrorDetails), #[error("syntax: {0:?}")] Syntax(KclErrorDetails), #[error("semantic: {0:?}")] @@ -41,6 +43,7 @@ impl KclError { /// Get the error message, line and column from the error and input code. pub fn get_message_line_column(&self, input: &str) -> (String, Option, Option) { let (type_, source_range, message) = match &self { + KclError::Lexical(e) => ("lexical", e.source_ranges.clone(), e.message.clone()), KclError::Syntax(e) => ("syntax", e.source_ranges.clone(), e.message.clone()), KclError::Semantic(e) => ("semantic", e.source_ranges.clone(), e.message.clone()), KclError::Type(e) => ("type", e.source_ranges.clone(), e.message.clone()), @@ -67,6 +70,7 @@ impl KclError { pub fn source_ranges(&self) -> Vec { match &self { + KclError::Lexical(e) => e.source_ranges.clone(), KclError::Syntax(e) => e.source_ranges.clone(), KclError::Semantic(e) => e.source_ranges.clone(), KclError::Type(e) => e.source_ranges.clone(), @@ -82,6 +86,7 @@ impl KclError { /// Get the inner error message. pub fn message(&self) -> &str { match &self { + KclError::Lexical(e) => &e.message, KclError::Syntax(e) => &e.message, KclError::Semantic(e) => &e.message, KclError::Type(e) => &e.message, diff --git a/src/wasm-lib/kcl/src/parser.rs b/src/wasm-lib/kcl/src/parser.rs index a45919d97..3ffdfaa8b 100644 --- a/src/wasm-lib/kcl/src/parser.rs +++ b/src/wasm-lib/kcl/src/parser.rs @@ -1,4 +1,10 @@ -use crate::{ast::types::Program, errors::KclError, token::Token}; +use crate::{ + ast::types::Program, + errors::KclError, + errors::KclErrorDetails, + executor::SourceRange, + token::{Token, TokenType}, +}; mod math; pub(crate) mod parser_impl; @@ -8,15 +14,37 @@ pub const PIPE_OPERATOR: &str = "|>"; pub struct Parser { pub tokens: Vec, + pub unknown_tokens: Vec, } impl Parser { pub fn new(tokens: Vec) -> Self { - Self { tokens } + let (tokens, unknown_tokens): (Vec, Vec) = tokens + .into_iter() + .partition(|token| token.token_type != TokenType::Unknown); + Self { tokens, unknown_tokens } } /// Run the parser pub fn ast(&self) -> Result { + if self.tokens.is_empty() { + return Err(KclError::Syntax(KclErrorDetails { + source_ranges: vec![], + message: "file is empty".to_string(), + })); + } + + if !self.unknown_tokens.is_empty() { + let source_ranges = self.unknown_tokens.iter().map(SourceRange::from).collect(); + return Err(KclError::Lexical(KclErrorDetails { + source_ranges, + message: format!( + "found unknown tokens {:?}", + self.unknown_tokens.iter().map(|t| t.value.as_str()).collect::>() + ), + })); + } + parser_impl::run_parser(&mut self.tokens.as_slice()) } } diff --git a/src/wasm-lib/kcl/src/parser/parser_impl.rs b/src/wasm-lib/kcl/src/parser/parser_impl.rs index 2950eddd6..cc0d242e2 100644 --- a/src/wasm-lib/kcl/src/parser/parser_impl.rs +++ b/src/wasm-lib/kcl/src/parser/parser_impl.rs @@ -34,13 +34,6 @@ lazy_static::lazy_static! { type TokenSlice<'slice, 'input> = &'slice mut &'input [Token]; pub fn run_parser(i: TokenSlice) -> Result { - if i.is_empty() { - return Err(KclError::Syntax(KclErrorDetails { - source_ranges: vec![], - message: "file is empty".to_string(), - })); - } - program.parse(i).map_err(KclError::from) } @@ -2223,7 +2216,7 @@ const secondExtrude = startSketchOn('XY') let err = parser.ast().unwrap_err(); // TODO: Better errors when program cannot tokenize. // https://github.com/KittyCAD/modeling-app/issues/696 - assert!(err.to_string().contains("file is empty")); + assert!(err.to_string().contains("found list of unknown tokens")); } #[test] @@ -2283,7 +2276,7 @@ z(-[["#, // https://github.com/KittyCAD/modeling-app/issues/696 assert_eq!( result.err().unwrap().to_string(), - r#"syntax: KclErrorDetails { source_ranges: [], message: "file is empty" }"# + r##"lexical: KclErrorDetails { source_ranges: [SourceRange([6, 7])], message: "found list of unknown tokens \"#\"" }"## ); } @@ -2297,7 +2290,7 @@ z(-[["#, // https://github.com/KittyCAD/modeling-app/issues/696 assert_eq!( result.err().unwrap().to_string(), - r#"syntax: KclErrorDetails { source_ranges: [], message: "file is empty" }"# + r##"lexical: KclErrorDetails { source_ranges: [SourceRange([25, 26]), SourceRange([26, 27])], message: "found list of unknown tokens \"# #\"" }"## ); } diff --git a/src/wasm-lib/kcl/src/token.rs b/src/wasm-lib/kcl/src/token.rs index 743fc32bf..04a5a24ee 100644 --- a/src/wasm-lib/kcl/src/token.rs +++ b/src/wasm-lib/kcl/src/token.rs @@ -45,6 +45,8 @@ pub enum TokenType { BlockComment, /// A function name. Function, + /// Unknown lexemes. + Unknown, } /// Most KCL tokens correspond to LSP semantic tokens (but not all). @@ -65,7 +67,8 @@ impl TryFrom for SemanticTokenType { | TokenType::Comma | TokenType::Colon | TokenType::Period - | TokenType::DoublePeriod => { + | TokenType::DoublePeriod + | TokenType::Unknown => { anyhow::bail!("unsupported token type: {:?}", token_type) } }) diff --git a/src/wasm-lib/kcl/src/token/tokeniser.rs b/src/wasm-lib/kcl/src/token/tokeniser.rs index 00c819b4f..c56d5c226 100644 --- a/src/wasm-lib/kcl/src/token/tokeniser.rs +++ b/src/wasm-lib/kcl/src/token/tokeniser.rs @@ -3,6 +3,7 @@ use winnow::{ combinator::{alt, opt, peek, preceded, repeat, terminated}, error::{ContextError, ParseError}, prelude::*, + stream::{Location, Stream}, token::{any, none_of, one_of, take_till1, take_until0}, Located, }; @@ -14,7 +15,7 @@ pub fn lexer(i: &str) -> Result, ParseError, ContextErr } pub fn token(i: &mut Located<&str>) -> PResult { - winnow::combinator::dispatch! {peek(any); + match winnow::combinator::dispatch! {peek(any); '"' | '\'' => string, '/' => alt((line_comment, block_comment, operator)), '{' | '(' | '[' => brace_start, @@ -27,6 +28,21 @@ pub fn token(i: &mut Located<&str>) -> PResult { _ => alt((operator, keyword, word)) } .parse_next(i) + { + Ok(token) => Ok(token), + Err(x) => { + // TODO: Handle non ascii cases + if i.len() == 0 || !i.is_ascii() { + return Err(x); + } + + Ok(Token::from_range( + i.location()..i.location() + 1, + TokenType::Unknown, + i.next_slice(1).to_string(), + )) + } + } } fn block_comment(i: &mut Located<&str>) -> PResult { @@ -234,6 +250,14 @@ mod tests { } fn assert_tokens(expected: Vec, actual: Vec) { + assert_eq!( + expected.len(), + actual.len(), + "\nexpected {} tokens, actually got {}", + expected.len(), + actual.len() + ); + let n = expected.len(); for i in 0..n { assert_eq!( @@ -242,7 +266,6 @@ mod tests { expected[i], actual[i], ) } - assert_eq!(n, actual.len(), "expected {} tokens, actually got {}", n, actual.len()); } #[test] @@ -1461,4 +1484,43 @@ const things = "things" ]; assert_tokens(expected, actual); } + + #[test] + fn test_unrecognized_token() { + let actual = lexer("12 ; 8").unwrap(); + let expected = vec![ + Token { + token_type: TokenType::Number, + value: "12".to_string(), + start: 0, + end: 2, + }, + Token { + token_type: TokenType::Whitespace, + value: " ".to_string(), + start: 2, + end: 3, + }, + Token { + token_type: TokenType::Unknown, + value: ";".to_string(), + start: 3, + end: 4, + }, + Token { + token_type: TokenType::Whitespace, + value: " ".to_string(), + start: 4, + end: 5, + }, + Token { + token_type: TokenType::Number, + value: "8".to_string(), + start: 5, + end: 6, + }, + ]; + + assert_tokens(expected, actual); + } }