Tokenizing fallibility (#883)
Tokenization tracks invalid tokens and produces a nice error about them --------- Co-authored-by: Adam Chalmers <adam.chalmers@kittycad.io>
This commit is contained in:
committed by
GitHub
parent
3d0c5c10b0
commit
2e419907e6
@ -18,6 +18,13 @@ export class KCLError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export class KCLLexicalError extends KCLError {
|
||||||
|
constructor(msg: string, sourceRanges: [number, number][]) {
|
||||||
|
super('lexical', msg, sourceRanges)
|
||||||
|
Object.setPrototypeOf(this, KCLSyntaxError.prototype)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export class KCLSyntaxError extends KCLError {
|
export class KCLSyntaxError extends KCLError {
|
||||||
constructor(msg: string, sourceRanges: [number, number][]) {
|
constructor(msg: string, sourceRanges: [number, number][]) {
|
||||||
super('syntax', msg, sourceRanges)
|
super('syntax', msg, sourceRanges)
|
||||||
|
@ -8,6 +8,8 @@ use crate::executor::SourceRange;
|
|||||||
#[ts(export)]
|
#[ts(export)]
|
||||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||||
pub enum KclError {
|
pub enum KclError {
|
||||||
|
#[error("lexical: {0:?}")]
|
||||||
|
Lexical(KclErrorDetails),
|
||||||
#[error("syntax: {0:?}")]
|
#[error("syntax: {0:?}")]
|
||||||
Syntax(KclErrorDetails),
|
Syntax(KclErrorDetails),
|
||||||
#[error("semantic: {0:?}")]
|
#[error("semantic: {0:?}")]
|
||||||
@ -41,6 +43,7 @@ impl KclError {
|
|||||||
/// Get the error message, line and column from the error and input code.
|
/// Get the error message, line and column from the error and input code.
|
||||||
pub fn get_message_line_column(&self, input: &str) -> (String, Option<usize>, Option<usize>) {
|
pub fn get_message_line_column(&self, input: &str) -> (String, Option<usize>, Option<usize>) {
|
||||||
let (type_, source_range, message) = match &self {
|
let (type_, source_range, message) = match &self {
|
||||||
|
KclError::Lexical(e) => ("lexical", e.source_ranges.clone(), e.message.clone()),
|
||||||
KclError::Syntax(e) => ("syntax", e.source_ranges.clone(), e.message.clone()),
|
KclError::Syntax(e) => ("syntax", e.source_ranges.clone(), e.message.clone()),
|
||||||
KclError::Semantic(e) => ("semantic", e.source_ranges.clone(), e.message.clone()),
|
KclError::Semantic(e) => ("semantic", e.source_ranges.clone(), e.message.clone()),
|
||||||
KclError::Type(e) => ("type", e.source_ranges.clone(), e.message.clone()),
|
KclError::Type(e) => ("type", e.source_ranges.clone(), e.message.clone()),
|
||||||
@ -67,6 +70,7 @@ impl KclError {
|
|||||||
|
|
||||||
pub fn source_ranges(&self) -> Vec<SourceRange> {
|
pub fn source_ranges(&self) -> Vec<SourceRange> {
|
||||||
match &self {
|
match &self {
|
||||||
|
KclError::Lexical(e) => e.source_ranges.clone(),
|
||||||
KclError::Syntax(e) => e.source_ranges.clone(),
|
KclError::Syntax(e) => e.source_ranges.clone(),
|
||||||
KclError::Semantic(e) => e.source_ranges.clone(),
|
KclError::Semantic(e) => e.source_ranges.clone(),
|
||||||
KclError::Type(e) => e.source_ranges.clone(),
|
KclError::Type(e) => e.source_ranges.clone(),
|
||||||
@ -82,6 +86,7 @@ impl KclError {
|
|||||||
/// Get the inner error message.
|
/// Get the inner error message.
|
||||||
pub fn message(&self) -> &str {
|
pub fn message(&self) -> &str {
|
||||||
match &self {
|
match &self {
|
||||||
|
KclError::Lexical(e) => &e.message,
|
||||||
KclError::Syntax(e) => &e.message,
|
KclError::Syntax(e) => &e.message,
|
||||||
KclError::Semantic(e) => &e.message,
|
KclError::Semantic(e) => &e.message,
|
||||||
KclError::Type(e) => &e.message,
|
KclError::Type(e) => &e.message,
|
||||||
|
@ -1,4 +1,10 @@
|
|||||||
use crate::{ast::types::Program, errors::KclError, token::Token};
|
use crate::{
|
||||||
|
ast::types::Program,
|
||||||
|
errors::KclError,
|
||||||
|
errors::KclErrorDetails,
|
||||||
|
executor::SourceRange,
|
||||||
|
token::{Token, TokenType},
|
||||||
|
};
|
||||||
|
|
||||||
mod math;
|
mod math;
|
||||||
pub(crate) mod parser_impl;
|
pub(crate) mod parser_impl;
|
||||||
@ -8,15 +14,37 @@ pub const PIPE_OPERATOR: &str = "|>";
|
|||||||
|
|
||||||
pub struct Parser {
|
pub struct Parser {
|
||||||
pub tokens: Vec<Token>,
|
pub tokens: Vec<Token>,
|
||||||
|
pub unknown_tokens: Vec<Token>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Parser {
|
impl Parser {
|
||||||
pub fn new(tokens: Vec<Token>) -> Self {
|
pub fn new(tokens: Vec<Token>) -> Self {
|
||||||
Self { tokens }
|
let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
|
||||||
|
.into_iter()
|
||||||
|
.partition(|token| token.token_type != TokenType::Unknown);
|
||||||
|
Self { tokens, unknown_tokens }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Run the parser
|
/// Run the parser
|
||||||
pub fn ast(&self) -> Result<Program, KclError> {
|
pub fn ast(&self) -> Result<Program, KclError> {
|
||||||
|
if self.tokens.is_empty() {
|
||||||
|
return Err(KclError::Syntax(KclErrorDetails {
|
||||||
|
source_ranges: vec![],
|
||||||
|
message: "file is empty".to_string(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.unknown_tokens.is_empty() {
|
||||||
|
let source_ranges = self.unknown_tokens.iter().map(SourceRange::from).collect();
|
||||||
|
return Err(KclError::Lexical(KclErrorDetails {
|
||||||
|
source_ranges,
|
||||||
|
message: format!(
|
||||||
|
"found unknown tokens {:?}",
|
||||||
|
self.unknown_tokens.iter().map(|t| t.value.as_str()).collect::<Vec<_>>()
|
||||||
|
),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
parser_impl::run_parser(&mut self.tokens.as_slice())
|
parser_impl::run_parser(&mut self.tokens.as_slice())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,13 +34,6 @@ lazy_static::lazy_static! {
|
|||||||
type TokenSlice<'slice, 'input> = &'slice mut &'input [Token];
|
type TokenSlice<'slice, 'input> = &'slice mut &'input [Token];
|
||||||
|
|
||||||
pub fn run_parser(i: TokenSlice) -> Result<Program, KclError> {
|
pub fn run_parser(i: TokenSlice) -> Result<Program, KclError> {
|
||||||
if i.is_empty() {
|
|
||||||
return Err(KclError::Syntax(KclErrorDetails {
|
|
||||||
source_ranges: vec![],
|
|
||||||
message: "file is empty".to_string(),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
program.parse(i).map_err(KclError::from)
|
program.parse(i).map_err(KclError::from)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2223,7 +2216,7 @@ const secondExtrude = startSketchOn('XY')
|
|||||||
let err = parser.ast().unwrap_err();
|
let err = parser.ast().unwrap_err();
|
||||||
// TODO: Better errors when program cannot tokenize.
|
// TODO: Better errors when program cannot tokenize.
|
||||||
// https://github.com/KittyCAD/modeling-app/issues/696
|
// https://github.com/KittyCAD/modeling-app/issues/696
|
||||||
assert!(err.to_string().contains("file is empty"));
|
assert!(err.to_string().contains("found list of unknown tokens"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -2283,7 +2276,7 @@ z(-[["#,
|
|||||||
// https://github.com/KittyCAD/modeling-app/issues/696
|
// https://github.com/KittyCAD/modeling-app/issues/696
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.err().unwrap().to_string(),
|
result.err().unwrap().to_string(),
|
||||||
r#"syntax: KclErrorDetails { source_ranges: [], message: "file is empty" }"#
|
r##"lexical: KclErrorDetails { source_ranges: [SourceRange([6, 7])], message: "found list of unknown tokens \"#\"" }"##
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2297,7 +2290,7 @@ z(-[["#,
|
|||||||
// https://github.com/KittyCAD/modeling-app/issues/696
|
// https://github.com/KittyCAD/modeling-app/issues/696
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.err().unwrap().to_string(),
|
result.err().unwrap().to_string(),
|
||||||
r#"syntax: KclErrorDetails { source_ranges: [], message: "file is empty" }"#
|
r##"lexical: KclErrorDetails { source_ranges: [SourceRange([25, 26]), SourceRange([26, 27])], message: "found list of unknown tokens \"# #\"" }"##
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,6 +45,8 @@ pub enum TokenType {
|
|||||||
BlockComment,
|
BlockComment,
|
||||||
/// A function name.
|
/// A function name.
|
||||||
Function,
|
Function,
|
||||||
|
/// Unknown lexemes.
|
||||||
|
Unknown,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Most KCL tokens correspond to LSP semantic tokens (but not all).
|
/// Most KCL tokens correspond to LSP semantic tokens (but not all).
|
||||||
@ -65,7 +67,8 @@ impl TryFrom<TokenType> for SemanticTokenType {
|
|||||||
| TokenType::Comma
|
| TokenType::Comma
|
||||||
| TokenType::Colon
|
| TokenType::Colon
|
||||||
| TokenType::Period
|
| TokenType::Period
|
||||||
| TokenType::DoublePeriod => {
|
| TokenType::DoublePeriod
|
||||||
|
| TokenType::Unknown => {
|
||||||
anyhow::bail!("unsupported token type: {:?}", token_type)
|
anyhow::bail!("unsupported token type: {:?}", token_type)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -3,6 +3,7 @@ use winnow::{
|
|||||||
combinator::{alt, opt, peek, preceded, repeat, terminated},
|
combinator::{alt, opt, peek, preceded, repeat, terminated},
|
||||||
error::{ContextError, ParseError},
|
error::{ContextError, ParseError},
|
||||||
prelude::*,
|
prelude::*,
|
||||||
|
stream::{Location, Stream},
|
||||||
token::{any, none_of, one_of, take_till1, take_until0},
|
token::{any, none_of, one_of, take_till1, take_until0},
|
||||||
Located,
|
Located,
|
||||||
};
|
};
|
||||||
@ -14,7 +15,7 @@ pub fn lexer(i: &str) -> Result<Vec<Token>, ParseError<Located<&str>, ContextErr
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn token(i: &mut Located<&str>) -> PResult<Token> {
|
pub fn token(i: &mut Located<&str>) -> PResult<Token> {
|
||||||
winnow::combinator::dispatch! {peek(any);
|
match winnow::combinator::dispatch! {peek(any);
|
||||||
'"' | '\'' => string,
|
'"' | '\'' => string,
|
||||||
'/' => alt((line_comment, block_comment, operator)),
|
'/' => alt((line_comment, block_comment, operator)),
|
||||||
'{' | '(' | '[' => brace_start,
|
'{' | '(' | '[' => brace_start,
|
||||||
@ -27,6 +28,21 @@ pub fn token(i: &mut Located<&str>) -> PResult<Token> {
|
|||||||
_ => alt((operator, keyword, word))
|
_ => alt((operator, keyword, word))
|
||||||
}
|
}
|
||||||
.parse_next(i)
|
.parse_next(i)
|
||||||
|
{
|
||||||
|
Ok(token) => Ok(token),
|
||||||
|
Err(x) => {
|
||||||
|
// TODO: Handle non ascii cases
|
||||||
|
if i.len() == 0 || !i.is_ascii() {
|
||||||
|
return Err(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Token::from_range(
|
||||||
|
i.location()..i.location() + 1,
|
||||||
|
TokenType::Unknown,
|
||||||
|
i.next_slice(1).to_string(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn block_comment(i: &mut Located<&str>) -> PResult<Token> {
|
fn block_comment(i: &mut Located<&str>) -> PResult<Token> {
|
||||||
@ -234,6 +250,14 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn assert_tokens(expected: Vec<Token>, actual: Vec<Token>) {
|
fn assert_tokens(expected: Vec<Token>, actual: Vec<Token>) {
|
||||||
|
assert_eq!(
|
||||||
|
expected.len(),
|
||||||
|
actual.len(),
|
||||||
|
"\nexpected {} tokens, actually got {}",
|
||||||
|
expected.len(),
|
||||||
|
actual.len()
|
||||||
|
);
|
||||||
|
|
||||||
let n = expected.len();
|
let n = expected.len();
|
||||||
for i in 0..n {
|
for i in 0..n {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -242,7 +266,6 @@ mod tests {
|
|||||||
expected[i], actual[i],
|
expected[i], actual[i],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
assert_eq!(n, actual.len(), "expected {} tokens, actually got {}", n, actual.len());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -1461,4 +1484,43 @@ const things = "things"
|
|||||||
];
|
];
|
||||||
assert_tokens(expected, actual);
|
assert_tokens(expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unrecognized_token() {
|
||||||
|
let actual = lexer("12 ; 8").unwrap();
|
||||||
|
let expected = vec![
|
||||||
|
Token {
|
||||||
|
token_type: TokenType::Number,
|
||||||
|
value: "12".to_string(),
|
||||||
|
start: 0,
|
||||||
|
end: 2,
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
token_type: TokenType::Whitespace,
|
||||||
|
value: " ".to_string(),
|
||||||
|
start: 2,
|
||||||
|
end: 3,
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
token_type: TokenType::Unknown,
|
||||||
|
value: ";".to_string(),
|
||||||
|
start: 3,
|
||||||
|
end: 4,
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
token_type: TokenType::Whitespace,
|
||||||
|
value: " ".to_string(),
|
||||||
|
start: 4,
|
||||||
|
end: 5,
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
token_type: TokenType::Number,
|
||||||
|
value: "8".to_string(),
|
||||||
|
start: 5,
|
||||||
|
end: 6,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
assert_tokens(expected, actual);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user