fix: add better errors for missing commas in arrays and objects (#5210)

* fix: add better errors for missing commas in arrays and objects

* chore: add object prop shorthand missing comma test

* fix: wording on unexpected character in arrays and objects

* fix: don't eagerly evaluate whether there is a closing brace/bracket

* feat: exit early when detecting missing commas if encountering invalid tokens

* A snapshot a day keeps the bugs away! 📷🐛 (OS: namespace-profile-ubuntu-8-cores)

* A snapshot a day keeps the bugs away! 📷🐛 (OS: namespace-profile-ubuntu-8-cores)

* fix: updated reserved word in test to replace removed one

---------

Co-authored-by: Tom Pridham <pridham.tom@gmail.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Jonathan Tran
2025-03-14 12:52:10 -04:00
committed by GitHub
parent 9e37e13b6b
commit 0229105158
3 changed files with 257 additions and 29 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 75 KiB

After

Width:  |  Height:  |  Size: 74 KiB

View File

@ -4,17 +4,17 @@
use std::{cell::RefCell, collections::BTreeMap}; use std::{cell::RefCell, collections::BTreeMap};
use winnow::{ use winnow::{
combinator::{alt, delimited, opt, peek, preceded, repeat, separated, separated_pair, terminated}, combinator::{alt, delimited, opt, peek, preceded, repeat, repeat_till, separated, separated_pair, terminated},
dispatch, dispatch,
error::{ErrMode, StrContext, StrContextValue}, error::{ErrMode, StrContext, StrContextValue},
prelude::*, prelude::*,
stream::Stream, stream::Stream,
token::{any, one_of, take_till}, token::{any, none_of, one_of, take_till},
}; };
use super::{ use super::{
ast::types::{Ascription, ImportPath, LabelledExpression}, ast::types::{Ascription, ImportPath, LabelledExpression},
token::NumericSuffix, token::{NumericSuffix, RESERVED_WORDS},
}; };
use crate::{ use crate::{
docs::StdLibFn, docs::StdLibFn,
@ -746,21 +746,58 @@ pub(crate) fn array_elem_by_elem(i: &mut TokenSlice) -> PResult<Node<ArrayExpres
) )
.context(expected("array contents, a list of elements (like [1, 2, 3])")) .context(expected("array contents, a list of elements (like [1, 2, 3])"))
.parse_next(i)?; .parse_next(i)?;
ignore_trailing_comma(i);
ignore_whitespace(i); ignore_whitespace(i);
let end = close_bracket(i)
.map_err(|e| { let maybe_end = close_bracket(i).map_err(|e| {
if let Some(mut err) = e.clone().into_inner() { if let Some(mut err) = e.clone().into_inner() {
err.cause = Some(CompilationError::fatal( let start_range = open.as_source_range();
open.as_source_range(), let end_range = i.as_source_range();
"Array is missing a closing bracket(`]`)", err.cause = Some(CompilationError::fatal(
)); SourceRange::from([start_range.start(), end_range.start(), end_range.module_id().as_usize()]),
ErrMode::Cut(err) "Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
} else { ));
// ErrMode::Incomplete, not sure if it's actually possible to end up with this here ErrMode::Cut(err)
e } else {
} // ErrMode::Incomplete, not sure if it's actually possible to end up with this here
})? e
.end; }
});
if maybe_end.is_err() {
// if there is a closing bracket at some point, but it wasn't the next token, it's likely that they forgot a comma between some
// of the elements
let maybe_closing_bracket: PResult<((), Token)> = peek(repeat_till(
0..,
none_of(|token: Token| {
// bail out early if we encounter something that is for sure not allowed in an
// array, otherwise we could seek to find a closing bracket until the end of the
// file
RESERVED_WORDS
.keys()
.chain([",,", "{", "}", "["].iter())
.any(|word| *word == token.value)
})
.void(),
one_of(|term: Token| term.value == "]"),
))
.parse_next(i);
let has_closing_bracket = maybe_closing_bracket.is_ok();
if has_closing_bracket {
let start_range = i.as_source_range();
// safe to unwrap here because we checked it was Ok above
let end_range = maybe_closing_bracket.unwrap().1.as_source_range();
let e = ContextError {
context: vec![],
cause: Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.end(), end_range.module_id().as_usize()]),
"Unexpected character encountered. You might be missing a comma in between elements.",
)),
};
return Err(ErrMode::Cut(e));
}
}
let end = maybe_end?.end;
// Sort the array's elements (i.e. expression nodes) from the noncode nodes. // Sort the array's elements (i.e. expression nodes) from the noncode nodes.
let (elements, non_code_nodes): (Vec<_>, BTreeMap<usize, _>) = elements.into_iter().enumerate().fold( let (elements, non_code_nodes): (Vec<_>, BTreeMap<usize, _>) = elements.into_iter().enumerate().fold(
@ -819,7 +856,7 @@ fn array_end_start(i: &mut TokenSlice) -> PResult<Node<ArrayRangeExpression>> {
} }
fn object_property_same_key_and_val(i: &mut TokenSlice) -> PResult<Node<ObjectProperty>> { fn object_property_same_key_and_val(i: &mut TokenSlice) -> PResult<Node<ObjectProperty>> {
let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height: 4', 'height' is the property key")).parse_next(i)?; let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height = 4', 'height' is the property key")).parse_next(i)?;
ignore_whitespace(i); ignore_whitespace(i);
Ok(Node { Ok(Node {
start: key.start, start: key.start,
@ -846,7 +883,7 @@ fn object_property(i: &mut TokenSlice) -> PResult<Node<ObjectProperty>> {
ignore_whitespace(i); ignore_whitespace(i);
let expr = match expression let expr = match expression
.context(expected( .context(expected(
"the value which you're setting the property to, e.g. in 'height: 4', the value is 4", "the value which you're setting the property to, e.g. in 'height = 4', the value is 4",
)) ))
.parse_next(i) .parse_next(i)
{ {
@ -892,7 +929,7 @@ fn property_separator(i: &mut TokenSlice) -> PResult<()> {
alt(( alt((
// Normally you need a comma. // Normally you need a comma.
comma_sep, comma_sep,
// But, if the array is ending, no need for a comma. // But, if the object is ending, no need for a comma.
peek(preceded(opt(whitespace), close_brace)).void(), peek(preceded(opt(whitespace), close_brace)).void(),
)) ))
.parse_next(i) .parse_next(i)
@ -926,10 +963,62 @@ pub(crate) fn object(i: &mut TokenSlice) -> PResult<Node<ObjectExpression>> {
)), )),
) )
.context(expected( .context(expected(
"a comma-separated list of key-value pairs, e.g. 'height: 4, width: 3'", "a comma-separated list of key-value pairs, e.g. 'height = 4, width = 3'",
)) ))
.parse_next(i)?; .parse_next(i)?;
ignore_trailing_comma(i);
ignore_whitespace(i);
let maybe_end = close_brace(i).map_err(|e| {
if let Some(mut err) = e.clone().into_inner() {
let start_range = open.as_source_range();
let end_range = i.as_source_range();
err.cause = Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.start(), end_range.module_id().as_usize()]),
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
));
ErrMode::Cut(err)
} else {
// ErrMode::Incomplete, not sure if it's actually possible to end up with this here
e
}
});
if maybe_end.is_err() {
// if there is a closing brace at some point, but it wasn't the next token, it's likely that they forgot a comma between some
// of the properties
let maybe_closing_brace: PResult<((), Token)> = peek(repeat_till(
0..,
none_of(|token: Token| {
// bail out early if we encounter something that is for sure not allowed in an
// object, otherwise we could seek to find a closing brace until the end of the
// file
RESERVED_WORDS
.keys()
.chain([",,", "[", "]", "{"].iter())
.any(|word| *word == token.value)
})
.void(),
one_of(|c: Token| c.value == "}"),
))
.parse_next(i);
let has_closing_brace = maybe_closing_brace.is_ok();
if has_closing_brace {
let start_range = i.as_source_range();
// okay to unwrap here because we checked it was Ok above
let end_range = maybe_closing_brace.unwrap().1.as_source_range();
let e = ContextError {
context: vec![],
cause: Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.end(), end_range.module_id().as_usize()]),
"Unexpected character encountered. You might be missing a comma in between properties.",
)),
};
return Err(ErrMode::Cut(e));
}
}
let end = maybe_end?.end;
// Sort the object's properties from the noncode nodes. // Sort the object's properties from the noncode nodes.
let (properties, non_code_nodes): (Vec<_>, BTreeMap<usize, _>) = properties.into_iter().enumerate().fold( let (properties, non_code_nodes): (Vec<_>, BTreeMap<usize, _>) = properties.into_iter().enumerate().fold(
(Vec::new(), BTreeMap::new()), (Vec::new(), BTreeMap::new()),
@ -945,9 +1034,7 @@ pub(crate) fn object(i: &mut TokenSlice) -> PResult<Node<ObjectExpression>> {
(properties, non_code_nodes) (properties, non_code_nodes)
}, },
); );
ignore_trailing_comma(i);
ignore_whitespace(i);
let end = close_brace(i)?.end;
let non_code_meta = NonCodeMeta { let non_code_meta = NonCodeMeta {
non_code_nodes, non_code_nodes,
..Default::default() ..Default::default()
@ -3869,7 +3956,7 @@ mySk1 = startSketchOn(XY)
assert_eq!( assert_eq!(
src_expected, src_expected,
src_actual, src_actual,
"expected error would highlight {} but it actually highlighted {}", "expected error would highlight `{}` but it actually highlighted `{}`",
&p[src_expected[0]..src_expected[1]], &p[src_expected[0]..src_expected[1]],
&p[src_actual[0]..src_actual[1]], &p[src_actual[0]..src_actual[1]],
); );
@ -4060,7 +4147,11 @@ z(-[["#,
#[test] #[test]
fn test_parse_weird_lots_of_fancy_brackets() { fn test_parse_weird_lots_of_fancy_brackets() {
assert_err(r#"zz({{{{{{{{)iegAng{{{{{{{##"#, "Unexpected token: (", [2, 3]); assert_err(
r#"zz({{{{{{{{)iegAng{{{{{{{##"#,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[3, 4],
);
} }
#[test] #[test]
@ -4601,10 +4692,123 @@ let myBox = box([0,0], -3, -16, -10)
} }
#[test] #[test]
fn test_parse_missing_closing_bracket() { fn test_parse_array_missing_closing_bracket() {
let some_program_string = r#" let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45, 119.09, %)"#; sketch001 = startSketchOn('XZ') |> startProfileAt([90.45, 119.09, %)"#;
assert_err(some_program_string, "Array is missing a closing bracket(`]`)", [51, 52]); assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 67],
);
}
#[test]
fn test_parse_array_missing_comma() {
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45 119.09], %)"#;
assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between elements.",
[52, 65],
);
}
#[test]
fn test_parse_array_reserved_word_early_exit() {
// since there is an early exit if encountering a reserved word, the error should be about
// that and not the missing comma
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45 $struct], %)"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 52],
);
}
#[test]
fn test_parse_array_random_brace() {
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([}], %)"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 52],
);
}
#[test]
fn test_parse_object_missing_closing_brace() {
let some_program_string = r#"{
foo = bar,"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 23],
);
}
#[test]
fn test_parse_object_reserved_word_early_exit() {
// since there is an early exit if encountering a reserved word, the error should be about
// that and not the missing comma
let some_program_string = r#"{bar = foo struct = man}"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 1],
);
}
#[test]
fn test_parse_object_missing_comma() {
let some_program_string = r#"{
foo = bar,
bar = foo
bat = man
}"#;
assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[37, 78],
);
}
#[test]
fn test_parse_object_missing_comma_one_line() {
let some_program_string = r#"{bar = foo bat = man}"#;
assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[1, 21],
);
}
#[test]
fn test_parse_object_random_bracket() {
let some_program_string = r#"{]}"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 1],
);
}
#[test]
fn test_parse_object_shorthand_missing_comma() {
let some_program_string = r#"
bar = 1
{
foo = bar,
bar
bat = man
}"#;
assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[54, 89],
);
} }
#[test] #[test]

View File

@ -24,7 +24,6 @@ use crate::{
mod tokeniser; mod tokeniser;
#[cfg(test)]
pub(crate) use tokeniser::RESERVED_WORDS; pub(crate) use tokeniser::RESERVED_WORDS;
// Note the ordering, it's important that `m` comes after `mm` and `cm`. // Note the ordering, it's important that `m` comes after `mm` and `cm`.
@ -162,7 +161,9 @@ impl IntoIterator for TokenStream {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub(crate) struct TokenSlice<'a> { pub(crate) struct TokenSlice<'a> {
stream: &'a TokenStream, stream: &'a TokenStream,
/// Current position of the leading Token in the stream
start: usize, start: usize,
/// The number of total Tokens in the stream
end: usize, end: usize,
} }
@ -190,6 +191,21 @@ impl<'a> TokenSlice<'a> {
stream: self.stream, stream: self.stream,
} }
} }
pub fn as_source_range(&self) -> SourceRange {
let stream_len = self.stream.tokens.len();
let first_token = if stream_len == self.start {
&self.stream.tokens[self.start - 1]
} else {
self.token(0)
};
let last_token = if stream_len == self.end {
&self.stream.tokens[stream_len - 1]
} else {
self.token(self.end - self.start)
};
SourceRange::new(first_token.start, last_token.end, last_token.module_id)
}
} }
impl<'a> IntoIterator for TokenSlice<'a> { impl<'a> IntoIterator for TokenSlice<'a> {
@ -294,6 +310,14 @@ impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
} }
} }
impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
self.iter()
.enumerate()
.find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
}
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Checkpoint(usize, usize); pub struct Checkpoint(usize, usize);