Factor away the Parser struct (#4520)

Signed-off-by: Nick Cameron <nrc@ncameron.org>
This commit is contained in:
Nick Cameron
2024-11-21 07:52:10 +13:00
committed by GitHub
parent 6fb493f209
commit c17cb1067f
9 changed files with 91 additions and 191 deletions

View File

@ -185,7 +185,7 @@ pub async fn modify_ast_for_sketch(
let recasted = program.ast.recast(&FormatOptions::default(), 0);
// Re-parse the ast so we get the correct source ranges.
*program = crate::parser::parse(&recasted, module_id)?.into();
*program = crate::parser::parse_str(&recasted, module_id)?.into();
Ok(recasted)
}

View File

@ -3194,7 +3194,7 @@ const cylinder = startSketchOn('-XZ')
return arg0
}"#;
let module_id = ModuleId::default();
let program = crate::parser::parse(some_program_string, module_id).unwrap();
let program = crate::parser::parse_str(some_program_string, module_id).unwrap();
// Check the program output for the types of the parameters.
let function = program.body.first().unwrap();
@ -3265,7 +3265,7 @@ const cylinder = startSketchOn('-XZ')
return 1
}"#;
let module_id = ModuleId::default();
let program = crate::parser::parse(some_program_string, module_id).unwrap();
let program = crate::parser::parse_str(some_program_string, module_id).unwrap();
// Check the program output for the types of the parameters.
let function = program.body.first().unwrap();

View File

@ -2413,7 +2413,7 @@ impl ExecutorContext {
}
let module_id = exec_state.add_module(resolved_path.clone());
let source = self.fs.read_to_string(&resolved_path, source_range).await?;
let program = crate::parser::parse(&source, module_id)?;
let program = crate::parser::parse_str(&source, module_id)?;
let (module_memory, module_exports) = {
exec_state.import_stack.push(resolved_path.clone());
let original_execution = self.engine.replace_execution_kind(ExecutionKind::Isolated);

View File

@ -88,13 +88,11 @@ impl Program {
pub fn parse(input: &str) -> Result<Program, KclError> {
let module_id = ModuleId::default();
let tokens = token::lexer(input, module_id)?;
let parser = parser::Parser::new(tokens);
let ast = parser.ast()?;
let ast = parser::parse_tokens(tokens)?;
Ok(Program { ast })
}
/// Deserialize the ast from a stringified json
pub fn compute_digest(&mut self) -> ast::types::digest::Digest {
self.ast.compute_digest()
}

View File

@ -298,8 +298,7 @@ impl crate::lsp::backend::Backend for Backend {
}
// Lets update the ast.
let parser = crate::parser::Parser::new(tokens.clone());
let result = parser.ast();
let result = crate::parser::parse_tokens(tokens.clone());
let mut ast = match result {
Ok(ast) => ast,
Err(err) => {
@ -1302,11 +1301,7 @@ impl LanguageServer for Backend {
// I don't know if we need to do this again since it should be updated in the context.
// But I figure better safe than sorry since this will write back out to the file.
let module_id = ModuleId::default();
let Ok(tokens) = crate::token::lexer(current_code, module_id) else {
return Ok(None);
};
let parser = crate::parser::Parser::new(tokens);
let Ok(ast) = parser.ast() else {
let Ok(ast) = crate::parser::parse_str(current_code, module_id) else {
return Ok(None);
};
// Now recast it.
@ -1340,11 +1335,7 @@ impl LanguageServer for Backend {
// I don't know if we need to do this again since it should be updated in the context.
// But I figure better safe than sorry since this will write back out to the file.
let module_id = ModuleId::default();
let Ok(tokens) = crate::token::lexer(current_code, module_id) else {
return Ok(None);
};
let parser = crate::parser::Parser::new(tokens);
let Ok(mut ast) = parser.ast() else {
let Ok(mut ast) = crate::parser::parse_str(current_code, module_id) else {
return Ok(None);
};

View File

@ -16,57 +16,44 @@ pub const PIPE_OPERATOR: &str = "|>";
/// Parse the given KCL code into an AST. This is the top-level.
pub fn top_level_parse(code: &str) -> Result<Node<Program>, KclError> {
let module_id = ModuleId::default();
parse(code, module_id)
parse_str(code, module_id)
}
/// Parse the given KCL code into an AST.
pub fn parse(code: &str, module_id: ModuleId) -> Result<Node<Program>, KclError> {
pub fn parse_str(code: &str, module_id: ModuleId) -> Result<Node<Program>, KclError> {
let tokens = crate::token::lexer(code, module_id)?;
let parser = Parser::new(tokens);
parser.ast()
parse_tokens(tokens)
}
pub struct Parser {
pub tokens: Vec<Token>,
pub unknown_tokens: Vec<Token>,
}
pub fn parse_tokens(tokens: Vec<Token>) -> Result<Node<Program>, KclError> {
let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
.into_iter()
.partition(|token| token.token_type != TokenType::Unknown);
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
.into_iter()
.partition(|token| token.token_type != TokenType::Unknown);
Self { tokens, unknown_tokens }
if !unknown_tokens.is_empty() {
let source_ranges = unknown_tokens.iter().map(SourceRange::from).collect();
let token_list = unknown_tokens.iter().map(|t| t.value.as_str()).collect::<Vec<_>>();
let message = if token_list.len() == 1 {
format!("found unknown token '{}'", token_list[0])
} else {
format!("found unknown tokens [{}]", token_list.join(", "))
};
return Err(KclError::Lexical(KclErrorDetails { source_ranges, message }));
}
/// Run the parser
pub fn ast(&self) -> Result<Node<Program>, KclError> {
if !self.unknown_tokens.is_empty() {
let source_ranges = self.unknown_tokens.iter().map(SourceRange::from).collect();
let token_list = self.unknown_tokens.iter().map(|t| t.value.as_str()).collect::<Vec<_>>();
let message = if token_list.len() == 1 {
format!("found unknown token '{}'", token_list[0])
} else {
format!("found unknown tokens [{}]", token_list.join(", "))
};
return Err(KclError::Lexical(KclErrorDetails { source_ranges, message }));
}
// Important, to not call this before the unknown tokens check.
if self.tokens.is_empty() {
// Empty file should just do nothing.
return Ok(Node::<Program>::default());
}
// Check all the tokens are whitespace or comments.
if self
.tokens
.iter()
.all(|t| t.token_type.is_whitespace() || t.token_type.is_comment())
{
return Ok(Node::<Program>::default());
}
parser_impl::run_parser(&mut self.tokens.as_slice())
// Important, to not call this before the unknown tokens check.
if tokens.is_empty() {
// Empty file should just do nothing.
return Ok(Node::<Program>::default());
}
// Check all the tokens are whitespace or comments.
if tokens
.iter()
.all(|t| t.token_type.is_whitespace() || t.token_type.is_comment())
{
return Ok(Node::<Program>::default());
}
parser_impl::run_parser(&mut tokens.as_slice())
}

View File

@ -1,14 +1,10 @@
#[cfg(test)]
mod tests {
macro_rules! parse_and_lex {
($func_name:ident, $test_kcl_program:expr) => {
#[test]
fn $func_name() {
let module_id = $crate::parser::ModuleId::default();
if let Ok(v) = $crate::token::lexer($test_kcl_program, module_id) {
let _ = $crate::parser::Parser::new(v).ast();
}
let _ = crate::parser::top_level_parse($test_kcl_program);
}
};
}

View File

@ -2827,7 +2827,7 @@ const mySk1 = startSketchAt([0, 0])"#;
for test in tests {
// Run the original parser
let tokens = crate::token::lexer(test, ModuleId::default()).unwrap();
let mut expected_body = crate::parser::Parser::new(tokens.clone()).ast().unwrap().inner.body;
let mut expected_body = crate::parser::parse_tokens(tokens.clone()).unwrap().inner.body;
assert_eq!(expected_body.len(), 1);
let BodyItem::VariableDeclaration(expected) = expected_body.pop().unwrap() else {
panic!("Expected variable declaration");
@ -2854,8 +2854,7 @@ const mySk1 = startSketchAt([0, 0])"#;
#[test]
fn test_math_parse() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer(r#"5 + "a""#, module_id).unwrap();
let actual = crate::parser::Parser::new(tokens).ast().unwrap().inner.body;
let actual = crate::parser::parse_str(r#"5 + "a""#, module_id).unwrap().inner.body;
let expr = Node::boxed(
BinaryExpression {
operator: BinaryOperator::Add,
@ -2991,8 +2990,7 @@ const mySk1 = startSketchAt([0, 0])"#;
fn test_abstract_syntax_tree() {
let code = "5 +6";
let module_id = ModuleId::default();
let parser = crate::parser::Parser::new(crate::token::lexer(code, module_id).unwrap());
let result = parser.ast().unwrap();
let result = crate::parser::parse_str(code, module_id).unwrap();
let expected_result = Node::new(
Program {
body: vec![BodyItem::ExpressionStatement(Node::new(
@ -3140,9 +3138,7 @@ const secondExtrude = startSketchOn('XY')
#[test]
fn test_parse_greater_bang() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer(">!", module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let err = parser.ast().unwrap_err();
let err = crate::parser::parse_str(">!", module_id).unwrap_err();
assert_eq!(
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([0, 1, 0])], message: "Unexpected token: >" }"#
@ -3152,12 +3148,9 @@ const secondExtrude = startSketchOn('XY')
#[test]
fn test_parse_z_percent_parens() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer("z%)", module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
let err = crate::parser::parse_str("z%)", module_id).unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([1, 2, 0])], message: "Unexpected token: %" }"#
);
}
@ -3165,12 +3158,11 @@ const secondExtrude = startSketchOn('XY')
#[test]
fn test_parse_parens_unicode() {
let module_id = ModuleId::default();
let result = crate::token::lexer("", module_id);
let err = crate::parser::parse_str("", module_id).unwrap_err();
// TODO: Better errors when program cannot tokenize.
// https://github.com/KittyCAD/modeling-app/issues/696
assert!(result.is_err());
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"lexical: KclErrorDetails { source_ranges: [SourceRange([1, 2, 0])], message: "found unknown token 'ޜ'" }"#
);
}
@ -3187,96 +3179,64 @@ const bracket = [-leg2 + thickness, 0]
#[test]
fn test_parse_nested_open_brackets() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer(
crate::parser::top_level_parse(
r#"
z(-[["#,
module_id,
)
.unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
.unwrap_err();
}
#[test]
fn test_parse_weird_new_line_function() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer(
let err = crate::parser::top_level_parse(
r#"z
(--#"#,
module_id,
)
.unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
.unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([3, 4, 0])], message: "Unexpected token: (" }"#
);
}
#[test]
fn test_parse_weird_lots_of_fancy_brackets() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer(r#"zz({{{{{{{{)iegAng{{{{{{{##"#, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
let err = crate::parser::top_level_parse(r#"zz({{{{{{{{)iegAng{{{{{{{##"#).unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([2, 3, 0])], message: "Unexpected token: (" }"#
);
}
#[test]
fn test_parse_weird_close_before_open() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer(
let err = crate::parser::top_level_parse(
r#"fn)n
e
["#,
module_id,
)
.unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
assert!(result
.err()
.unwrap()
.unwrap_err();
assert!(err
.to_string()
.contains("expected whitespace, found ')' which is brace"));
}
#[test]
fn test_parse_weird_close_before_nada() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer(r#"fn)n-"#, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
assert!(result
.err()
.unwrap()
let err = crate::parser::top_level_parse(r#"fn)n-"#).unwrap_err();
assert!(err
.to_string()
.contains("expected whitespace, found ')' which is brace"));
}
#[test]
fn test_parse_weird_lots_of_slashes() {
let module_id = ModuleId::default();
let tokens = crate::token::lexer(
let err = crate::parser::top_level_parse(
r#"J///////////o//+///////////P++++*++++++P///////˟
++4"#,
module_id,
)
.unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
let actual = result.err().unwrap().to_string();
.unwrap_err();
let actual = err.to_string();
assert!(actual.contains("Unexpected token: +"), "actual={actual:?}");
}
@ -3364,76 +3324,60 @@ e
#[test]
fn test_error_keyword_in_variable() {
let some_program_string = r#"const let = "thing""#;
let module_id = ModuleId::default();
let tokens = crate::token::lexer(some_program_string, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
let err = crate::parser::top_level_parse(r#"const let = "thing""#).unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([6, 9, 0])], message: "Cannot assign a variable to a reserved keyword: let" }"#
);
}
#[test]
fn test_error_keyword_in_fn_name() {
let some_program_string = r#"fn let = () {}"#;
let module_id = ModuleId::default();
let tokens = crate::token::lexer(some_program_string, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
let err = crate::parser::top_level_parse(r#"fn let = () {}"#).unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([3, 6, 0])], message: "Cannot assign a variable to a reserved keyword: let" }"#
);
}
#[test]
fn test_error_stdlib_in_fn_name() {
let some_program_string = r#"fn cos = () => {
let err = crate::parser::top_level_parse(
r#"fn cos = () => {
return 1
}"#;
let module_id = ModuleId::default();
let tokens = crate::token::lexer(some_program_string, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
}"#,
)
.unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([3, 6, 0])], message: "Cannot assign a variable to a reserved keyword: cos" }"#
);
}
#[test]
fn test_error_keyword_in_fn_args() {
let some_program_string = r#"fn thing = (let) => {
let err = crate::parser::top_level_parse(
r#"fn thing = (let) => {
return 1
}"#;
let module_id = ModuleId::default();
let tokens = crate::token::lexer(some_program_string, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
}"#,
)
.unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([12, 15, 0])], message: "Cannot assign a variable to a reserved keyword: let" }"#
);
}
#[test]
fn test_error_stdlib_in_fn_args() {
let some_program_string = r#"fn thing = (cos) => {
let err = crate::parser::top_level_parse(
r#"fn thing = (cos) => {
return 1
}"#;
let module_id = ModuleId::default();
let tokens = crate::token::lexer(some_program_string, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
}"#,
)
.unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([12, 15, 0])], message: "Cannot assign a variable to a reserved keyword: cos" }"#
);
}
@ -3547,13 +3491,9 @@ thing(false)
"#,
name
);
let module_id = ModuleId::default();
let tokens = crate::token::lexer(&some_program_string, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
let err = crate::parser::top_level_parse(&some_program_string).unwrap_err();
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
format!(
r#"syntax: KclErrorDetails {{ source_ranges: [SourceRange([0, {}, 0])], message: "Expected a `fn` variable kind, found: `const`" }}"#,
name.len(),
@ -3565,16 +3505,12 @@ thing(false)
#[test]
fn test_error_define_var_as_function() {
let some_program_string = r#"fn thing = "thing""#;
let module_id = ModuleId::default();
let tokens = crate::token::lexer(some_program_string, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
assert!(result.is_err());
let err = crate::parser::top_level_parse(some_program_string).unwrap_err();
// TODO: https://github.com/KittyCAD/modeling-app/issues/784
// Improve this error message.
// It should say that the compiler is expecting a function expression on the RHS.
assert_eq!(
result.err().unwrap().to_string(),
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([11, 18, 0])], message: "Unexpected token: \"thing\"" }"#
);
}
@ -3589,11 +3525,7 @@ thing(false)
|> line([-5.09, 12.33], %)
asdasd
"#;
let module_id = ModuleId::default();
let tokens = crate::token::lexer(test_program, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let result = parser.ast();
let _e = result.unwrap_err();
crate::parser::top_level_parse(test_program).unwrap_err();
}
#[test]
@ -3647,10 +3579,7 @@ let myBox = box([0,0], -3, -16, -10)
foo()
|> bar(2)
"#;
let module_id = ModuleId::default();
let tokens = crate::token::lexer(some_program_string, module_id).unwrap();
let parser = crate::parser::Parser::new(tokens);
let err = parser.ast().unwrap_err();
let err = crate::parser::top_level_parse(some_program_string).unwrap_err();
assert_eq!(
err.to_string(),
r#"syntax: KclErrorDetails { source_ranges: [SourceRange([30, 36, 0])], message: "All expressions in a pipeline must use the % (substitution operator)" }"#

View File

@ -3,7 +3,6 @@ use insta::rounded_redaction;
use crate::{
ast::types::{ModuleId, Node, Program},
errors::KclError,
parser::Parser,
token::Token,
};
@ -61,7 +60,7 @@ fn parse(test_name: &str) {
};
// Parse the tokens into an AST.
let parse_res = Parser::new(tokens).ast();
let parse_res = crate::parser::parse_tokens(tokens);
assert_snapshot(test_name, "Result of parsing", || {
insta::assert_json_snapshot!("ast", parse_res);
});