From d7369d8a9523bb74e836321608c8e0e31e8a0a9a Mon Sep 17 00:00:00 2001 From: Nick Cameron Date: Thu, 13 Mar 2025 14:56:30 +1300 Subject: [PATCH] parse union and fancy array types Signed-off-by: Nick Cameron --- rust/kcl-lib/src/docs/kcl_doc.rs | 4 +- rust/kcl-lib/src/execution/kcl_value.rs | 11 ++- rust/kcl-lib/src/lsp/kcl/hover.rs | 6 +- rust/kcl-lib/src/lsp/tests.rs | 2 +- rust/kcl-lib/src/parsing/ast/digest.rs | 16 ++- rust/kcl-lib/src/parsing/ast/types/mod.rs | 65 ++++++++++-- rust/kcl-lib/src/parsing/parser.rs | 103 +++++++++++++++----- rust/kcl-lib/src/parsing/token/mod.rs | 15 +++ rust/kcl-lib/src/parsing/token/tokeniser.rs | 13 ++- rust/kcl-lib/src/unparser.rs | 51 ++++------ 10 files changed, 212 insertions(+), 74 deletions(-) diff --git a/rust/kcl-lib/src/docs/kcl_doc.rs b/rust/kcl-lib/src/docs/kcl_doc.rs index a0cda868a..3d28fd129 100644 --- a/rust/kcl-lib/src/docs/kcl_doc.rs +++ b/rust/kcl-lib/src/docs/kcl_doc.rs @@ -335,7 +335,7 @@ impl FnData { name, qual_name, args: expr.params.iter().map(ArgData::from_ast).collect(), - return_type: expr.return_type.as_ref().map(|t| t.recast(&Default::default(), 0)), + return_type: expr.return_type.as_ref().map(|t| t.to_string()), properties: Properties { exported: !var.visibility.is_default(), deprecated: false, @@ -496,7 +496,7 @@ impl ArgData { fn from_ast(arg: &crate::parsing::ast::types::Parameter) -> Self { ArgData { name: arg.identifier.name.clone(), - ty: arg.type_.as_ref().map(|t| t.recast(&Default::default(), 0)), + ty: arg.type_.as_ref().map(|t| t.to_string()), // Doc comments are not yet supported on parameters. docs: None, kind: if arg.labeled { diff --git a/rust/kcl-lib/src/execution/kcl_value.rs b/rust/kcl-lib/src/execution/kcl_value.rs index 9605f0fa2..5b86583e1 100644 --- a/rust/kcl-lib/src/execution/kcl_value.rs +++ b/rust/kcl-lib/src/execution/kcl_value.rs @@ -939,9 +939,14 @@ impl RuntimeType { Type::Primitive(pt) => { PrimitiveType::from_parsed(pt, exec_state, source_range)?.map(RuntimeType::Primitive) } - Type::Array(pt) => { - PrimitiveType::from_parsed(pt, exec_state, source_range)?.map(|t| RuntimeType::Array(t, ArrayLen::None)) + Type::Array { ty, len } => { + PrimitiveType::from_parsed(ty, exec_state, source_range)?.map(|t| RuntimeType::Array(t, len)) } + Type::Union { tys } => tys + .into_iter() + .map(|t| PrimitiveType::from_parsed(t.inner, exec_state, source_range)) + .collect::>, CompilationError>>()? + .map(RuntimeType::Union), Type::Object { properties } => properties .into_iter() .map(|p| { @@ -1034,7 +1039,7 @@ impl fmt::Display for RuntimeType { } } -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)] pub enum ArrayLen { None, NonEmpty, diff --git a/rust/kcl-lib/src/lsp/kcl/hover.rs b/rust/kcl-lib/src/lsp/kcl/hover.rs index 2bb754459..4904419d7 100644 --- a/rust/kcl-lib/src/lsp/kcl/hover.rs +++ b/rust/kcl-lib/src/lsp/kcl/hover.rs @@ -344,8 +344,8 @@ impl Node { let range = self.as_source_range(); if range.contains(pos) { match &self.inner { - Type::Array(t) | Type::Primitive(t) => { - let mut name = t.to_string(); + Type::Array { ty, .. } | Type::Primitive(ty) => { + let mut name = ty.to_string(); if name.ends_with(')') { name.truncate(name.find('(').unwrap()); } @@ -379,7 +379,7 @@ impl FunctionExpression { if let Some(value) = self.body.get_expr_for_position(pos) { let mut vars = opts.vars.clone().unwrap_or_default(); for arg in &self.params { - let ty = arg.type_.as_ref().map(|ty| ty.recast(&FormatOptions::default(), 0)); + let ty = arg.type_.as_ref().map(|ty| ty.to_string()); vars.insert(arg.identifier.inner.name.clone(), ty); } return value.get_hover_value_for_position( diff --git a/rust/kcl-lib/src/lsp/tests.rs b/rust/kcl-lib/src/lsp/tests.rs index 43e869d7a..ab2be8210 100644 --- a/rust/kcl-lib/src/lsp/tests.rs +++ b/rust/kcl-lib/src/lsp/tests.rs @@ -1900,7 +1900,7 @@ async fn test_kcl_lsp_diagnostic_has_errors() { assert_eq!(diagnostics.full_document_diagnostic_report.items.len(), 1); assert_eq!( diagnostics.full_document_diagnostic_report.items[0].message, - "lexical: found unknown token ';'" + "Unexpected token: ;" ); } else { panic!("Expected full diagnostics"); diff --git a/rust/kcl-lib/src/parsing/ast/digest.rs b/rust/kcl-lib/src/parsing/ast/digest.rs index aa7031022..1597096de 100644 --- a/rust/kcl-lib/src/parsing/ast/digest.rs +++ b/rust/kcl-lib/src/parsing/ast/digest.rs @@ -194,9 +194,21 @@ impl Type { hasher.update(b"FnArgType::Primitive"); hasher.update(prim.compute_digest()) } - Type::Array(prim) => { + Type::Array { ty, len } => { hasher.update(b"FnArgType::Array"); - hasher.update(prim.compute_digest()) + hasher.update(ty.compute_digest()); + match len { + crate::execution::kcl_value::ArrayLen::None => {} + crate::execution::kcl_value::ArrayLen::NonEmpty => hasher.update(usize::MAX.to_ne_bytes()), + crate::execution::kcl_value::ArrayLen::Known(n) => hasher.update(n.to_ne_bytes()), + } + } + Type::Union { tys } => { + hasher.update(b"FnArgType::Union"); + hasher.update(tys.len().to_ne_bytes()); + for t in tys.iter_mut() { + hasher.update(t.compute_digest()); + } } Type::Object { properties } => { hasher.update(b"FnArgType::Object"); diff --git a/rust/kcl-lib/src/parsing/ast/types/mod.rs b/rust/kcl-lib/src/parsing/ast/types/mod.rs index 79e0aa221..ab415e523 100644 --- a/rust/kcl-lib/src/parsing/ast/types/mod.rs +++ b/rust/kcl-lib/src/parsing/ast/types/mod.rs @@ -25,7 +25,7 @@ pub use crate::parsing::ast::types::{ use crate::{ docs::StdLibFn, errors::KclError, - execution::{annotations, KclValue, Metadata, TagIdentifier}, + execution::{annotations, kcl_value::ArrayLen, KclValue, Metadata, TagIdentifier}, parsing::{ast::digest::Digest, token::NumericSuffix, PIPE_OPERATOR}, source_range::SourceRange, ModuleId, @@ -150,7 +150,7 @@ impl Node { self.start <= pos && pos <= self.end } - pub fn map(self, f: fn(T) -> U) -> Node { + pub fn map(self, f: impl Fn(T) -> U) -> Node { Node { inner: f(self.inner), start: self.start, @@ -3024,7 +3024,14 @@ pub enum Type { /// A primitive type. Primitive(PrimitiveType), // An array of a primitive type. - Array(PrimitiveType), + Array { + ty: PrimitiveType, + len: ArrayLen, + }, + // Union/enum types + Union { + tys: NodeList, + }, // An object type. Object { properties: Vec, @@ -3035,7 +3042,22 @@ impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Type::Primitive(primitive_type) => primitive_type.fmt(f), - Type::Array(primitive_type) => write!(f, "[{primitive_type}]"), + Type::Array { ty, len } => { + write!(f, "[{ty}")?; + match len { + ArrayLen::None => {} + ArrayLen::NonEmpty => write!(f, "; 1+")?, + ArrayLen::Known(n) => write!(f, "; {n}")?, + } + write!(f, "]") + } + Type::Union { tys } => { + write!( + f, + "{}", + tys.iter().map(|t| t.to_string()).collect::>().join(" | ") + ) + } Type::Object { properties } => { write!(f, "{{")?; let mut first = true; @@ -3624,11 +3646,17 @@ const cylinder = startSketchOn('-XZ') assert_eq!(params.len(), 3); assert_eq!( params[0].type_.as_ref().unwrap().inner, - Type::Array(PrimitiveType::Number(NumericSuffix::None)) + Type::Array { + ty: PrimitiveType::Number(NumericSuffix::None), + len: ArrayLen::None + } ); assert_eq!( params[1].type_.as_ref().unwrap().inner, - Type::Array(PrimitiveType::String) + Type::Array { + ty: PrimitiveType::String, + len: ArrayLen::None + } ); assert_eq!( params[2].type_.as_ref().unwrap().inner, @@ -3656,7 +3684,10 @@ const cylinder = startSketchOn('-XZ') assert_eq!(params.len(), 3); assert_eq!( params[0].type_.as_ref().unwrap().inner, - Type::Array(PrimitiveType::Number(NumericSuffix::None)) + Type::Array { + ty: PrimitiveType::Number(NumericSuffix::None), + len: ArrayLen::None + } ); assert_eq!( params[1].type_.as_ref().unwrap().inner, @@ -3692,7 +3723,15 @@ const cylinder = startSketchOn('-XZ') 56, module_id, ), - type_: Some(Node::new(Type::Array(PrimitiveType::String), 59, 65, module_id)), + type_: Some(Node::new( + Type::Array { + ty: PrimitiveType::String, + len: ArrayLen::None + }, + 59, + 65, + module_id + )), default_value: None, labeled: true, digest: None @@ -3773,7 +3812,15 @@ const cylinder = startSketchOn('-XZ') 34, module_id, ), - type_: Some(Node::new(Type::Array(PrimitiveType::String), 37, 43, module_id)), + type_: Some(Node::new( + Type::Array { + ty: PrimitiveType::String, + len: ArrayLen::None + }, + 37, + 43, + module_id + )), default_value: None, labeled: true, digest: None diff --git a/rust/kcl-lib/src/parsing/parser.rs b/rust/kcl-lib/src/parsing/parser.rs index fdd7ec1d1..29bf592e9 100644 --- a/rust/kcl-lib/src/parsing/parser.rs +++ b/rust/kcl-lib/src/parsing/parser.rs @@ -19,6 +19,7 @@ use super::{ use crate::{ docs::StdLibFn, errors::{CompilationError, Severity, Tag}, + execution::kcl_value::ArrayLen, parsing::{ ast::types::{ Annotation, ArrayExpression, ArrayRangeExpression, BinaryExpression, BinaryOperator, BinaryPart, BodyItem, @@ -2336,21 +2337,7 @@ impl TryFrom for Node { format!("Cannot assign a tag to a reserved keyword: {}", token.value.as_str()), )), - TokenType::Bang - | TokenType::At - | TokenType::Hash - | TokenType::Colon - | TokenType::Period - | TokenType::Operator - | TokenType::DoublePeriod - | TokenType::QuestionMark - | TokenType::BlockComment - | TokenType::Function - | TokenType::String - | TokenType::Dollar - | TokenType::Keyword - | TokenType::Unknown - | TokenType::LineComment => Err(CompilationError::fatal( + _ => Err(CompilationError::fatal( token.as_source_range(), // this is `start with` because if most of these cases are in the middle, it ends // up hitting a different error path(e.g. including a bang) or being valid(e.g. including a comment) since it will get broken up into @@ -2617,6 +2604,14 @@ fn colon(i: &mut TokenSlice) -> PResult { TokenType::Colon.parse_from(i) } +fn semi_colon(i: &mut TokenSlice) -> PResult { + TokenType::SemiColon.parse_from(i) +} + +fn plus(i: &mut TokenSlice) -> PResult { + one_of((TokenType::Operator, "+")).parse_next(i) +} + fn equals(i: &mut TokenSlice) -> PResult { one_of((TokenType::Operator, "=")) .context(expected("the equals operator, =")) @@ -2659,6 +2654,12 @@ fn comma_sep(i: &mut TokenSlice) -> PResult<()> { Ok(()) } +/// Parse a `|`, optionally followed by some whitespace. +fn pipe_sep(i: &mut TokenSlice) -> PResult<()> { + (opt(whitespace), one_of((TokenType::Operator, "|")), opt(whitespace)).parse_next(i)?; + Ok(()) +} + /// Arguments are passed into a function. fn arguments(i: &mut TokenSlice) -> PResult> { separated(0.., expression, comma_sep) @@ -2686,20 +2687,29 @@ fn argument_type(i: &mut TokenSlice) -> PResult> { // Object types // TODO it is buggy to treat object fields like parameters since the parameters parser assumes a terminating `)`. (open_brace, parameters, close_brace).map(|(open, params, close)| { - Ok(Node::new( + Node::new( Type::Object { properties: params }, open.start, close.end, open.module_id, - )) + ) }), // Array types - (open_bracket, primitive_type, close_bracket).map(|(_, t, _)| Ok(t.map(Type::Array))), - // Primitive types - primitive_type.map(|t| Ok(t.map(Type::Primitive))), + array_type, + // Primitive or union types + separated(1.., primitive_type, pipe_sep).map(|mut tys: Vec<_>| { + if tys.len() == 1 { + tys.pop().unwrap().map(Type::Primitive) + } else { + let start = tys[0].start; + let module_id = tys[0].module_id; + let end = tys.last().unwrap().end; + Node::new(Type::Union { tys }, start, end, module_id) + } + }), )) - .parse_next(i)? - .map_err(|e: CompilationError| ErrMode::Backtrack(ContextError::from(e)))?; + .parse_next(i)?; + Ok(type_) } @@ -2721,6 +2731,55 @@ fn primitive_type(i: &mut TokenSlice) -> PResult> { Ok(result) } +fn array_type(i: &mut TokenSlice) -> PResult> { + fn opt_whitespace(i: &mut TokenSlice) -> PResult<()> { + ignore_whitespace(i); + Ok(()) + } + + open_bracket(i)?; + let ty = primitive_type(i)?; + let len = opt(( + semi_colon, + opt_whitespace, + any.try_map(|token: Token| match token.token_type { + TokenType::Number => { + let value = token.uint_value().ok_or_else(|| { + CompilationError::fatal( + token.as_source_range(), + format!("Expected unsigned integer literal, found: {}", token.value), + ) + })?; + + Ok(value as usize) + } + _ => Err(CompilationError::fatal(token.as_source_range(), "invalid array length")), + }), + opt(plus), + )) + .parse_next(i)?; + close_bracket(i)?; + + let len = if let Some((tok, _, n, plus)) = len { + if plus.is_some() { + if n != 1 { + return Err(ErrMode::Cut(ContextError::from(CompilationError::fatal( + tok.as_source_range(), + "Non-empty arrays are specified using `1+`, for a fixed-size array use just an integer", + )))); + } else { + ArrayLen::NonEmpty + } + } else { + ArrayLen::Known(n) + } + } else { + ArrayLen::None + }; + + Ok(ty.map(|ty| Type::Array { ty, len })) +} + fn uom_for_type(i: &mut TokenSlice) -> PResult { any.try_map(|t: Token| t.value.parse()).parse_next(i) } diff --git a/rust/kcl-lib/src/parsing/token/mod.rs b/rust/kcl-lib/src/parsing/token/mod.rs index 3c26bee06..28c3f325d 100644 --- a/rust/kcl-lib/src/parsing/token/mod.rs +++ b/rust/kcl-lib/src/parsing/token/mod.rs @@ -367,6 +367,8 @@ pub enum TokenType { QuestionMark, /// The @ symbol. At, + /// `;` + SemiColon, } /// Most KCL tokens correspond to LSP semantic tokens (but not all). @@ -396,6 +398,7 @@ impl TryFrom for SemanticTokenType { | TokenType::Hash | TokenType::Dollar | TokenType::At + | TokenType::SemiColon | TokenType::Unknown => { anyhow::bail!("unsupported token type: {:?}", token_type) } @@ -488,6 +491,18 @@ impl Token { value.parse().ok() } + pub fn uint_value(&self) -> Option { + if self.token_type != TokenType::Number { + return None; + } + let value = &self.value; + let value = value + .split_once(|c: char| c == '_' || c.is_ascii_alphabetic()) + .map(|(s, _)| s) + .unwrap_or(value); + value.parse().ok() + } + pub fn numeric_suffix(&self) -> NumericSuffix { if self.token_type != TokenType::Number { return NumericSuffix::None; diff --git a/rust/kcl-lib/src/parsing/token/tokeniser.rs b/rust/kcl-lib/src/parsing/token/tokeniser.rs index 1caaf70e5..8e46014e0 100644 --- a/rust/kcl-lib/src/parsing/token/tokeniser.rs +++ b/rust/kcl-lib/src/parsing/token/tokeniser.rs @@ -88,6 +88,7 @@ pub(super) fn token(i: &mut Input<'_>) -> PResult { '@' => at, '0'..='9' => number, ':' => colon, + ';' => semi_colon, '.' => alt((number, double_period, period)), '#' => hash, '$' => dollar, @@ -282,6 +283,16 @@ fn colon(i: &mut Input<'_>) -> PResult { )) } +fn semi_colon(i: &mut Input<'_>) -> PResult { + let (value, range) = ';'.with_span().parse_next(i)?; + Ok(Token::from_range( + range, + i.state.module_id, + TokenType::SemiColon, + value.to_string(), + )) +} + fn period(i: &mut Input<'_>) -> PResult { let (value, range) = '.'.with_span().parse_next(i)?; Ok(Token::from_range( @@ -689,7 +700,7 @@ const things = "things" #[test] fn test_unrecognized_token() { let module_id = ModuleId::from_usize(1); - let actual = lex("12 ; 8", module_id).unwrap(); + let actual = lex("12 ~ 8", module_id).unwrap(); use TokenType::*; assert_tokens(&[(Number, 0, 2), (Unknown, 3, 4), (Number, 5, 6)], actual.as_slice()); diff --git a/rust/kcl-lib/src/unparser.rs b/rust/kcl-lib/src/unparser.rs index 2c7257e4d..b1ea02213 100644 --- a/rust/kcl-lib/src/unparser.rs +++ b/rust/kcl-lib/src/unparser.rs @@ -6,8 +6,7 @@ use crate::parsing::{ CallExpression, CallExpressionKw, CommentStyle, DefaultParamVal, Expr, FormatOptions, FunctionExpression, IfExpression, ImportSelector, ImportStatement, ItemVisibility, LabeledArg, Literal, LiteralIdentifier, LiteralValue, MemberExpression, MemberObject, Node, NonCodeNode, NonCodeValue, ObjectExpression, Parameter, - PipeExpression, Program, TagDeclarator, Type, TypeDeclaration, UnaryExpression, VariableDeclaration, - VariableKind, + PipeExpression, Program, TagDeclarator, TypeDeclaration, UnaryExpression, VariableDeclaration, VariableKind, }, token::NumericSuffix, PIPE_OPERATOR, @@ -308,7 +307,7 @@ impl Expr { Expr::AscribedExpression(e) => { let mut result = e.expr.recast(options, indentation_level, ctxt); result += ": "; - result += &e.ty.recast(options, indentation_level); + result += &e.ty.to_string(); result } Expr::None(_) => { @@ -812,7 +811,7 @@ impl FunctionExpression { let tab0 = options.get_indentation(indentation_level); let tab1 = options.get_indentation(indentation_level + 1); let return_type = match &self.return_type { - Some(rt) => format!(": {}", rt.recast(&new_options, indentation_level)), + Some(rt) => format!(": {}", rt.to_string()), None => String::new(), }; let body = self.body.recast(&new_options, indentation_level + 1); @@ -822,14 +821,14 @@ impl FunctionExpression { } impl Parameter { - pub fn recast(&self, options: &FormatOptions, indentation_level: usize) -> String { + pub fn recast(&self, _options: &FormatOptions, _indentation_level: usize) -> String { let at_sign = if self.labeled { "" } else { "@" }; let identifier = &self.identifier.name; let question_mark = if self.default_value.is_some() { "?" } else { "" }; let mut result = format!("{at_sign}{identifier}{question_mark}"); if let Some(ty) = &self.type_ { result += ": "; - result += &ty.recast(options, indentation_level); + result += &ty.to_string(); } if let Some(DefaultParamVal::Literal(ref literal)) = self.default_value { let lit = literal.recast(); @@ -840,31 +839,6 @@ impl Parameter { } } -impl Type { - pub fn recast(&self, options: &FormatOptions, indentation_level: usize) -> String { - match self { - Type::Primitive(t) => t.to_string(), - Type::Array(t) => format!("[{t}]"), - Type::Object { properties } => { - let mut result = "{".to_owned(); - for p in properties { - result += " "; - result += &p.recast(options, indentation_level); - result += ","; - } - - if result.ends_with(',') { - result.pop(); - result += " "; - } - result += "}"; - - result - } - } - } -} - /// Collect all the kcl files in a directory, recursively. #[cfg(not(target_arch = "wasm32"))] #[async_recursion::async_recursion] @@ -1414,6 +1388,21 @@ thing(1) assert_eq!(recasted, some_program_string); } + #[test] + fn test_recast_typed_consts() { + let some_program_string = r#"a = 42: number +export b = 3.2: number(ft) +c = "dsfds": A | B | C +d = [1]: [number] +e = foo: [number; 3] +f = [1, 2, 3]: [number; 1+] +"#; + let program = crate::parsing::top_level_parse(some_program_string).unwrap(); + + let recasted = program.recast(&Default::default(), 0); + assert_eq!(recasted, some_program_string); + } + #[test] fn test_recast_object_fn_in_array_weird_bracket() { let some_program_string = r#"bing = { yo = 55 }