Tokenizing fallibility (#883)
Tokenization tracks invalid tokens and produces a nice error about them --------- Co-authored-by: Adam Chalmers <adam.chalmers@kittycad.io>
This commit is contained in:
committed by
GitHub
parent
3d0c5c10b0
commit
2e419907e6
@ -18,6 +18,13 @@ export class KCLError {
|
||||
}
|
||||
}
|
||||
|
||||
export class KCLLexicalError extends KCLError {
|
||||
constructor(msg: string, sourceRanges: [number, number][]) {
|
||||
super('lexical', msg, sourceRanges)
|
||||
Object.setPrototypeOf(this, KCLSyntaxError.prototype)
|
||||
}
|
||||
}
|
||||
|
||||
export class KCLSyntaxError extends KCLError {
|
||||
constructor(msg: string, sourceRanges: [number, number][]) {
|
||||
super('syntax', msg, sourceRanges)
|
||||
|
@ -8,6 +8,8 @@ use crate::executor::SourceRange;
|
||||
#[ts(export)]
|
||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||
pub enum KclError {
|
||||
#[error("lexical: {0:?}")]
|
||||
Lexical(KclErrorDetails),
|
||||
#[error("syntax: {0:?}")]
|
||||
Syntax(KclErrorDetails),
|
||||
#[error("semantic: {0:?}")]
|
||||
@ -41,6 +43,7 @@ impl KclError {
|
||||
/// Get the error message, line and column from the error and input code.
|
||||
pub fn get_message_line_column(&self, input: &str) -> (String, Option<usize>, Option<usize>) {
|
||||
let (type_, source_range, message) = match &self {
|
||||
KclError::Lexical(e) => ("lexical", e.source_ranges.clone(), e.message.clone()),
|
||||
KclError::Syntax(e) => ("syntax", e.source_ranges.clone(), e.message.clone()),
|
||||
KclError::Semantic(e) => ("semantic", e.source_ranges.clone(), e.message.clone()),
|
||||
KclError::Type(e) => ("type", e.source_ranges.clone(), e.message.clone()),
|
||||
@ -67,6 +70,7 @@ impl KclError {
|
||||
|
||||
pub fn source_ranges(&self) -> Vec<SourceRange> {
|
||||
match &self {
|
||||
KclError::Lexical(e) => e.source_ranges.clone(),
|
||||
KclError::Syntax(e) => e.source_ranges.clone(),
|
||||
KclError::Semantic(e) => e.source_ranges.clone(),
|
||||
KclError::Type(e) => e.source_ranges.clone(),
|
||||
@ -82,6 +86,7 @@ impl KclError {
|
||||
/// Get the inner error message.
|
||||
pub fn message(&self) -> &str {
|
||||
match &self {
|
||||
KclError::Lexical(e) => &e.message,
|
||||
KclError::Syntax(e) => &e.message,
|
||||
KclError::Semantic(e) => &e.message,
|
||||
KclError::Type(e) => &e.message,
|
||||
|
@ -1,4 +1,10 @@
|
||||
use crate::{ast::types::Program, errors::KclError, token::Token};
|
||||
use crate::{
|
||||
ast::types::Program,
|
||||
errors::KclError,
|
||||
errors::KclErrorDetails,
|
||||
executor::SourceRange,
|
||||
token::{Token, TokenType},
|
||||
};
|
||||
|
||||
mod math;
|
||||
pub(crate) mod parser_impl;
|
||||
@ -8,15 +14,37 @@ pub const PIPE_OPERATOR: &str = "|>";
|
||||
|
||||
pub struct Parser {
|
||||
pub tokens: Vec<Token>,
|
||||
pub unknown_tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new(tokens: Vec<Token>) -> Self {
|
||||
Self { tokens }
|
||||
let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
|
||||
.into_iter()
|
||||
.partition(|token| token.token_type != TokenType::Unknown);
|
||||
Self { tokens, unknown_tokens }
|
||||
}
|
||||
|
||||
/// Run the parser
|
||||
pub fn ast(&self) -> Result<Program, KclError> {
|
||||
if self.tokens.is_empty() {
|
||||
return Err(KclError::Syntax(KclErrorDetails {
|
||||
source_ranges: vec![],
|
||||
message: "file is empty".to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
if !self.unknown_tokens.is_empty() {
|
||||
let source_ranges = self.unknown_tokens.iter().map(SourceRange::from).collect();
|
||||
return Err(KclError::Lexical(KclErrorDetails {
|
||||
source_ranges,
|
||||
message: format!(
|
||||
"found unknown tokens {:?}",
|
||||
self.unknown_tokens.iter().map(|t| t.value.as_str()).collect::<Vec<_>>()
|
||||
),
|
||||
}));
|
||||
}
|
||||
|
||||
parser_impl::run_parser(&mut self.tokens.as_slice())
|
||||
}
|
||||
}
|
||||
|
@ -34,13 +34,6 @@ lazy_static::lazy_static! {
|
||||
type TokenSlice<'slice, 'input> = &'slice mut &'input [Token];
|
||||
|
||||
pub fn run_parser(i: TokenSlice) -> Result<Program, KclError> {
|
||||
if i.is_empty() {
|
||||
return Err(KclError::Syntax(KclErrorDetails {
|
||||
source_ranges: vec![],
|
||||
message: "file is empty".to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
program.parse(i).map_err(KclError::from)
|
||||
}
|
||||
|
||||
@ -2223,7 +2216,7 @@ const secondExtrude = startSketchOn('XY')
|
||||
let err = parser.ast().unwrap_err();
|
||||
// TODO: Better errors when program cannot tokenize.
|
||||
// https://github.com/KittyCAD/modeling-app/issues/696
|
||||
assert!(err.to_string().contains("file is empty"));
|
||||
assert!(err.to_string().contains("found list of unknown tokens"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -2283,7 +2276,7 @@ z(-[["#,
|
||||
// https://github.com/KittyCAD/modeling-app/issues/696
|
||||
assert_eq!(
|
||||
result.err().unwrap().to_string(),
|
||||
r#"syntax: KclErrorDetails { source_ranges: [], message: "file is empty" }"#
|
||||
r##"lexical: KclErrorDetails { source_ranges: [SourceRange([6, 7])], message: "found list of unknown tokens \"#\"" }"##
|
||||
);
|
||||
}
|
||||
|
||||
@ -2297,7 +2290,7 @@ z(-[["#,
|
||||
// https://github.com/KittyCAD/modeling-app/issues/696
|
||||
assert_eq!(
|
||||
result.err().unwrap().to_string(),
|
||||
r#"syntax: KclErrorDetails { source_ranges: [], message: "file is empty" }"#
|
||||
r##"lexical: KclErrorDetails { source_ranges: [SourceRange([25, 26]), SourceRange([26, 27])], message: "found list of unknown tokens \"# #\"" }"##
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,8 @@ pub enum TokenType {
|
||||
BlockComment,
|
||||
/// A function name.
|
||||
Function,
|
||||
/// Unknown lexemes.
|
||||
Unknown,
|
||||
}
|
||||
|
||||
/// Most KCL tokens correspond to LSP semantic tokens (but not all).
|
||||
@ -65,7 +67,8 @@ impl TryFrom<TokenType> for SemanticTokenType {
|
||||
| TokenType::Comma
|
||||
| TokenType::Colon
|
||||
| TokenType::Period
|
||||
| TokenType::DoublePeriod => {
|
||||
| TokenType::DoublePeriod
|
||||
| TokenType::Unknown => {
|
||||
anyhow::bail!("unsupported token type: {:?}", token_type)
|
||||
}
|
||||
})
|
||||
|
@ -3,6 +3,7 @@ use winnow::{
|
||||
combinator::{alt, opt, peek, preceded, repeat, terminated},
|
||||
error::{ContextError, ParseError},
|
||||
prelude::*,
|
||||
stream::{Location, Stream},
|
||||
token::{any, none_of, one_of, take_till1, take_until0},
|
||||
Located,
|
||||
};
|
||||
@ -14,7 +15,7 @@ pub fn lexer(i: &str) -> Result<Vec<Token>, ParseError<Located<&str>, ContextErr
|
||||
}
|
||||
|
||||
pub fn token(i: &mut Located<&str>) -> PResult<Token> {
|
||||
winnow::combinator::dispatch! {peek(any);
|
||||
match winnow::combinator::dispatch! {peek(any);
|
||||
'"' | '\'' => string,
|
||||
'/' => alt((line_comment, block_comment, operator)),
|
||||
'{' | '(' | '[' => brace_start,
|
||||
@ -27,6 +28,21 @@ pub fn token(i: &mut Located<&str>) -> PResult<Token> {
|
||||
_ => alt((operator, keyword, word))
|
||||
}
|
||||
.parse_next(i)
|
||||
{
|
||||
Ok(token) => Ok(token),
|
||||
Err(x) => {
|
||||
// TODO: Handle non ascii cases
|
||||
if i.len() == 0 || !i.is_ascii() {
|
||||
return Err(x);
|
||||
}
|
||||
|
||||
Ok(Token::from_range(
|
||||
i.location()..i.location() + 1,
|
||||
TokenType::Unknown,
|
||||
i.next_slice(1).to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn block_comment(i: &mut Located<&str>) -> PResult<Token> {
|
||||
@ -234,6 +250,14 @@ mod tests {
|
||||
}
|
||||
|
||||
fn assert_tokens(expected: Vec<Token>, actual: Vec<Token>) {
|
||||
assert_eq!(
|
||||
expected.len(),
|
||||
actual.len(),
|
||||
"\nexpected {} tokens, actually got {}",
|
||||
expected.len(),
|
||||
actual.len()
|
||||
);
|
||||
|
||||
let n = expected.len();
|
||||
for i in 0..n {
|
||||
assert_eq!(
|
||||
@ -242,7 +266,6 @@ mod tests {
|
||||
expected[i], actual[i],
|
||||
)
|
||||
}
|
||||
assert_eq!(n, actual.len(), "expected {} tokens, actually got {}", n, actual.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -1461,4 +1484,43 @@ const things = "things"
|
||||
];
|
||||
assert_tokens(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unrecognized_token() {
|
||||
let actual = lexer("12 ; 8").unwrap();
|
||||
let expected = vec![
|
||||
Token {
|
||||
token_type: TokenType::Number,
|
||||
value: "12".to_string(),
|
||||
start: 0,
|
||||
end: 2,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Whitespace,
|
||||
value: " ".to_string(),
|
||||
start: 2,
|
||||
end: 3,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Unknown,
|
||||
value: ";".to_string(),
|
||||
start: 3,
|
||||
end: 4,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Whitespace,
|
||||
value: " ".to_string(),
|
||||
start: 4,
|
||||
end: 5,
|
||||
},
|
||||
Token {
|
||||
token_type: TokenType::Number,
|
||||
value: "8".to_string(),
|
||||
start: 5,
|
||||
end: 6,
|
||||
},
|
||||
];
|
||||
|
||||
assert_tokens(expected, actual);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user