Reserve syntax for units of measure (#4783)

* Allow underscores but only for un-referenced names Signed-off-by: Nick Cameron <nrc@ncameron.org> * Support numeric suffixes for UoM types Signed-off-by: Nick Cameron <nrc@ncameron.org> * UoM type arguments Signed-off-by: Nick Cameron <nrc@ncameron.org> * warnings -> non-fatal errors Signed-off-by: Nick Cameron <nrc@ncameron.org> * type ascription Signed-off-by: Nick Cameron <nrc@ncameron.org> --------- Signed-off-by: Nick Cameron <nrc@ncameron.org>
2024-12-17 09:01:51 +13:00
parent 1d39983b08
commit fa22c14723
6 changed files with 249 additions and 47 deletions
--- a/src/wasm-lib/kcl/src/errors.rs
+++ b/src/wasm-lib/kcl/src/errors.rs
@ -353,7 +353,6 @@ pub struct CompilationError {
 }

 impl CompilationError {
-    #[allow(dead_code)]
    pub(crate) fn err(source_range: SourceRange, message: impl ToString) -> CompilationError {
        CompilationError {
            source_range,
--- a/src/wasm-lib/kcl/src/parsing/ast/types/mod.rs
+++ b/src/wasm-lib/kcl/src/parsing/ast/types/mod.rs
@ -1928,6 +1928,10 @@ impl Identifier {
        })
    }

+    pub fn is_nameable(&self) -> bool {
+        !self.name.starts_with('_')
+    }
+
    /// Rename all identifiers that have the old name to the new given name.
    fn rename(&mut self, old_name: &str, new_name: &str) {
        if self.name == old_name {
--- a/src/wasm-lib/kcl/src/parsing/parser.rs
+++ b/src/wasm-lib/kcl/src/parsing/parser.rs
@ -33,7 +33,7 @@ use crate::{
    SourceRange,
 };

-use super::ast::types::LabelledExpression;
+use super::{ast::types::LabelledExpression, token::NumericSuffix};

 thread_local! {
    /// The current `ParseContext`. `None` if parsing is not currently happening on this thread.
@ -96,10 +96,6 @@ impl ParseContext {
                    *e = err;
                    return;
                }
-
-                if e.source_range.start() > err.source_range.end() {
-                    break;
-                }
            }
            errors.push(err);
        });
@ -457,10 +453,17 @@ pub(crate) fn unsigned_number_literal(i: &mut TokenSlice) -> PResult<Node<Litera
    let (value, token) = any
        .try_map(|token: Token| match token.token_type {
            TokenType::Number => {
-                let x: f64 = token.value.parse().map_err(|_| {
+                let x: f64 = token.numeric_value().ok_or_else(|| {
                    CompilationError::fatal(token.as_source_range(), format!("Invalid float: {}", token.value))
                })?;

+                if token.numeric_suffix().is_some() {
+                    ParseContext::err(CompilationError::err(
+                        (&token).into(),
+                        "Unit of Measure suffixes are experimental and currently do nothing.",
+                    ));
+                }
+
                Ok((LiteralValue::Number(x), token))
            }
            _ => Err(CompilationError::fatal(token.as_source_range(), "invalid literal")),
@ -754,7 +757,7 @@ fn array_end_start(i: &mut TokenSlice) -> PResult<Node<ArrayRangeExpression>> {
 }

 fn object_property_same_key_and_val(i: &mut TokenSlice) -> PResult<Node<ObjectProperty>> {
-    let key = identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height: 4', 'height' is the property key")).parse_next(i)?;
+    let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height: 4', 'height' is the property key")).parse_next(i)?;
    ignore_whitespace(i);
    Ok(Node {
        start: key.start,
@ -778,7 +781,7 @@ fn object_property(i: &mut TokenSlice) -> PResult<Node<ObjectProperty>> {
        ))
        .parse_next(i)?;
    ignore_whitespace(i);
-    let expr = expression
+    let expr = expression_but_not_ascription
        .context(expected(
            "the value which you're setting the property to, e.g. in 'height: 4', the value is 4",
        ))
@ -1086,7 +1089,7 @@ fn member_expression_dot(i: &mut TokenSlice) -> PResult<(LiteralIdentifier, usiz
    period.parse_next(i)?;
    let property = alt((
        sketch_keyword.map(Box::new).map(LiteralIdentifier::Identifier),
-        identifier.map(Box::new).map(LiteralIdentifier::Identifier),
+        nameable_identifier.map(Box::new).map(LiteralIdentifier::Identifier),
    ))
    .parse_next(i)?;
    let end = property.end();
@ -1099,7 +1102,7 @@ fn member_expression_subscript(i: &mut TokenSlice) -> PResult<(LiteralIdentifier
    let property = alt((
        sketch_keyword.map(Box::new).map(LiteralIdentifier::Identifier),
        literal.map(LiteralIdentifier::Literal),
-        identifier.map(Box::new).map(LiteralIdentifier::Identifier),
+        nameable_identifier.map(Box::new).map(LiteralIdentifier::Identifier),
    ))
    .parse_next(i)?;

@ -1113,7 +1116,7 @@ fn member_expression_subscript(i: &mut TokenSlice) -> PResult<(LiteralIdentifier
 fn member_expression(i: &mut TokenSlice) -> PResult<Node<MemberExpression>> {
    // This is an identifier, followed by a sequence of members (aka properties)
    // First, the identifier.
-    let id = identifier.context(expected("the identifier of the object whose property you're trying to access, e.g. in 'shape.size.width', 'shape' is the identifier")).parse_next(i)?;
+    let id = nameable_identifier.context(expected("the identifier of the object whose property you're trying to access, e.g. in 'shape.size.width', 'shape' is the identifier")).parse_next(i)?;
    // Now a sequence of members.
    let member = alt((member_expression_dot, member_expression_subscript)).context(expected("a member/property, e.g. size.x and size['height'] and size[0] are all different ways to access a member/property of 'size'"));
    let mut members: Vec<_> = repeat(1.., member)
@ -1553,7 +1556,9 @@ fn import_stmt(i: &mut TokenSlice) -> PResult<BoxNode<ImportStatement>> {
 }

 fn import_item(i: &mut TokenSlice) -> PResult<Node<ImportItem>> {
-    let name = identifier.context(expected("an identifier to import")).parse_next(i)?;
+    let name = nameable_identifier
+        .context(expected("an identifier to import"))
+        .parse_next(i)?;
    let start = name.start;
    let module_id = name.module_id;
    let alias = opt(preceded(
@ -1622,6 +1627,24 @@ fn return_stmt(i: &mut TokenSlice) -> PResult<Node<ReturnStatement>> {

 /// Parse a KCL expression.
 fn expression(i: &mut TokenSlice) -> PResult<Expr> {
+    let expr = expression_but_not_ascription.parse_next(i)?;
+    let ty = opt((colon, opt(whitespace), argument_type)).parse_next(i)?;
+
+    // TODO this is probably not giving ascription the right precedence, but I have no idea how Winnow is handling that.
+    // Since we're not creating AST nodes for ascription, I don't think it matters right now.
+    if let Some((colon, _, _)) = ty {
+        ParseContext::err(CompilationError::err(
+            // Sadly there is no SourceRange for the type itself
+            colon.into(),
+            "Type ascription is experimental and currently does nothing.",
+        ));
+    }
+
+    Ok(expr)
+}
+
+// TODO once we remove the old record instantiation syntax, we can accept types ascription anywhere.
+fn expression_but_not_ascription(i: &mut TokenSlice) -> PResult<Expr> {
    alt((
        pipe_expression.map(Box::new).map(Expr::PipeExpression),
        expression_but_not_pipe,
@ -1678,7 +1701,7 @@ fn expr_allowed_in_pipe_expr(i: &mut TokenSlice) -> PResult<Expr> {
        literal.map(Expr::Literal),
        fn_call.map(Box::new).map(Expr::CallExpression),
        fn_call_kw.map(Box::new).map(Expr::CallExpressionKw),
-        identifier.map(Box::new).map(Expr::Identifier),
+        nameable_identifier.map(Box::new).map(Expr::Identifier),
        array,
        object.map(Box::new).map(Expr::ObjectExpression),
        pipe_sub.map(Box::new).map(Expr::PipeSubstitution),
@ -1697,7 +1720,7 @@ fn possible_operands(i: &mut TokenSlice) -> PResult<Expr> {
        member_expression.map(Box::new).map(Expr::MemberExpression),
        literal.map(Expr::Literal),
        fn_call.map(Box::new).map(Expr::CallExpression),
-        identifier.map(Box::new).map(Expr::Identifier),
+        nameable_identifier.map(Box::new).map(Expr::Identifier),
        binary_expr_in_parens.map(Box::new).map(Expr::BinaryExpression),
        unnecessarily_bracketed,
    ))
@ -1873,6 +1896,24 @@ fn identifier(i: &mut TokenSlice) -> PResult<Node<Identifier>> {
        .parse_next(i)
 }

+fn nameable_identifier(i: &mut TokenSlice) -> PResult<Node<Identifier>> {
+    let result = identifier.parse_next(i)?;
+
+    if !result.is_nameable() {
+        let desc = if result.name == "_" {
+            "Underscores"
+        } else {
+            "Names with a leading underscore"
+        };
+        ParseContext::err(CompilationError::err(
+            SourceRange::new(result.start, result.end, result.module_id),
+            format!("{desc} cannot be referred to, only declared."),
+        ));
+    }
+
+    Ok(result)
+}
+
 fn sketch_keyword(i: &mut TokenSlice) -> PResult<Node<Identifier>> {
    any.try_map(|token: Token| {
        if token.token_type == TokenType::Type && token.value == "sketch" {
@ -2257,7 +2298,7 @@ fn arguments(i: &mut TokenSlice) -> PResult<Vec<Expr>> {

 fn labeled_argument(i: &mut TokenSlice) -> PResult<LabeledArg> {
    separated_pair(
-        terminated(identifier, opt(whitespace)),
+        terminated(nameable_identifier, opt(whitespace)),
        terminated(one_of((TokenType::Operator, "=")), opt(whitespace)),
        expression,
    )
@ -2293,7 +2334,17 @@ fn argument_type(i: &mut TokenSlice) -> PResult<FnArgType> {
                .map_err(|err| CompilationError::fatal(token.as_source_range(), format!("Invalid type: {}", err)))
        }),
        // Primitive types
-        one_of(TokenType::Type).map(|token: Token| {
+        (
+            one_of(TokenType::Type),
+            opt(delimited(open_paren, uom_for_type, close_paren)),
+        )
+            .map(|(token, suffix)| {
+                if suffix.is_some() {
+                    ParseContext::err(CompilationError::err(
+                        (&token).into(),
+                        "Unit of Measure types are experimental and currently do nothing.",
+                    ));
+                }
                FnArgPrimitive::from_str(&token.value)
                    .map(FnArgType::Primitive)
                    .map_err(|err| CompilationError::fatal(token.as_source_range(), format!("Invalid type: {}", err)))
@ -2304,6 +2355,10 @@ fn argument_type(i: &mut TokenSlice) -> PResult<FnArgType> {
    Ok(type_)
 }

+fn uom_for_type(i: &mut TokenSlice) -> PResult<NumericSuffix> {
+    any.try_map(|t: Token| t.value.parse()).parse_next(i)
+}
+
 struct ParamDescription {
    labeled: bool,
    arg_name: Token,
@ -2490,7 +2545,7 @@ fn labelled_fn_call(i: &mut TokenSlice) -> PResult<Expr> {
 }

 fn fn_call(i: &mut TokenSlice) -> PResult<Node<CallExpression>> {
-    let fn_name = identifier(i)?;
+    let fn_name = nameable_identifier(i)?;
    opt(whitespace).parse_next(i)?;
    let _ = terminated(open_paren, opt(whitespace)).parse_next(i)?;
    let args = arguments(i)?;
@ -2531,7 +2586,7 @@ fn fn_call(i: &mut TokenSlice) -> PResult<Node<CallExpression>> {
 }

 fn fn_call_kw(i: &mut TokenSlice) -> PResult<Node<CallExpressionKw>> {
-    let fn_name = identifier(i)?;
+    let fn_name = nameable_identifier(i)?;
    opt(whitespace).parse_next(i)?;
    let _ = open_paren.parse_next(i)?;
    ignore_whitespace(i);
@ -3464,6 +3519,18 @@ mySk1 = startSketchAt([0, 0])"#;
        (result.0.unwrap(), result.1)
    }

+    #[track_caller]
+    fn assert_no_fatal(p: &str) -> (Node<Program>, Vec<CompilationError>) {
+        let result = crate::parsing::top_level_parse(p);
+        let result = result.0.unwrap();
+        assert!(
+            result.1.iter().all(|e| e.severity != Severity::Fatal),
+            "found: {:#?}",
+            result.1
+        );
+        (result.0.unwrap(), result.1)
+    }
+
    #[track_caller]
    fn assert_err(p: &str, msg: &str, src_expected: [usize; 2]) {
        let result = crate::parsing::top_level_parse(p);
@ -3861,6 +3928,25 @@ e
        assert_eq!(errs.len(), 1);
    }

+    #[test]
+    fn fn_decl_uom_ty() {
+        let some_program_string = r#"fn foo(x: number(mm)): number(_) { return 1 }"#;
+        let (_, errs) = assert_no_fatal(some_program_string);
+        assert_eq!(errs.len(), 2);
+    }
+
+    #[test]
+    fn error_underscore() {
+        let (_, errs) = assert_no_fatal("_foo(_blah, _)");
+        assert_eq!(errs.len(), 3, "found: {:#?}", errs);
+    }
+
+    #[test]
+    fn error_type_ascription() {
+        let (_, errs) = assert_no_fatal("a + b: number");
+        assert_eq!(errs.len(), 1, "found: {:#?}", errs);
+    }
+
    #[test]
    fn zero_param_function() {
        let code = r#"
--- a/src/wasm-lib/kcl/src/parsing/token/mod.rs
+++ b/src/wasm-lib/kcl/src/parsing/token/mod.rs
@ -1,7 +1,7 @@
 // Clippy does not agree with rustc here for some reason.
 #![allow(clippy::needless_lifetimes)]

-use std::{fmt, iter::Enumerate, num::NonZeroUsize};
+use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};

 use anyhow::Result;
 use parse_display::Display;
@ -17,6 +17,7 @@ use crate::{
    errors::KclError,
    parsing::ast::types::{ItemVisibility, VariableKind},
    source_range::{ModuleId, SourceRange},
+    CompilationError,
 };

 mod tokeniser;
@ -24,6 +25,54 @@ mod tokeniser;
 #[cfg(test)]
 pub(crate) use tokeniser::RESERVED_WORDS;

+// Note the ordering, it's important that `m` comes after `mm` and `cm`.
+pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum NumericSuffix {
+    None,
+    Count,
+    Mm,
+    Cm,
+    M,
+    Inch,
+    Ft,
+    Yd,
+    Deg,
+    Rad,
+}
+
+impl NumericSuffix {
+    #[allow(dead_code)]
+    pub fn is_none(self) -> bool {
+        self == Self::None
+    }
+
+    pub fn is_some(self) -> bool {
+        self != Self::None
+    }
+}
+
+impl FromStr for NumericSuffix {
+    type Err = CompilationError;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        match s {
+            "_" => Ok(NumericSuffix::Count),
+            "mm" => Ok(NumericSuffix::Mm),
+            "cm" => Ok(NumericSuffix::Cm),
+            "m" => Ok(NumericSuffix::M),
+            "inch" => Ok(NumericSuffix::Inch),
+            "in" => Ok(NumericSuffix::Inch),
+            "ft" => Ok(NumericSuffix::Ft),
+            "yd" => Ok(NumericSuffix::Yd),
+            "deg" => Ok(NumericSuffix::Deg),
+            "rad" => Ok(NumericSuffix::Rad),
+            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
+        }
+    }
+}
+
 #[derive(Clone, Debug, PartialEq)]
 pub(crate) struct TokenStream {
    tokens: Vec<Token>,
@ -369,6 +418,36 @@ impl Token {
        }
    }

+    pub fn numeric_value(&self) -> Option<f64> {
+        if self.token_type != TokenType::Number {
+            return None;
+        }
+        let value = &self.value;
+        let value = value
+            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
+            .map(|(s, _)| s)
+            .unwrap_or(value);
+        value.parse().ok()
+    }
+
+    pub fn numeric_suffix(&self) -> NumericSuffix {
+        if self.token_type != TokenType::Number {
+            return NumericSuffix::None;
+        }
+
+        if self.value.ends_with('_') {
+            return NumericSuffix::Count;
+        }
+
+        for suffix in NUM_SUFFIXES {
+            if self.value.ends_with(suffix) {
+                return suffix.parse().unwrap();
+            }
+        }
+
+        NumericSuffix::None
+    }
+
    /// Is this token the beginning of a variable/function declaration?
    /// If so, what kind?
    /// If not, returns None.
--- a/src/wasm-lib/kcl/src/parsing/token/tokeniser.rs
+++ b/src/wasm-lib/kcl/src/parsing/token/tokeniser.rs
@ -50,7 +50,6 @@ lazy_static! {
        set.insert("record", TokenType::Keyword);
        set.insert("struct", TokenType::Keyword);
        set.insert("object", TokenType::Keyword);
-        set.insert("_", TokenType::Keyword);

        set.insert("string", TokenType::Type);
        set.insert("number", TokenType::Type);
@ -147,9 +146,9 @@ fn line_comment(i: &mut Input<'_>) -> PResult<Token> {
 fn number(i: &mut Input<'_>) -> PResult<Token> {
    let number_parser = alt((
        // Digits before the decimal point.
-        (digit1, opt(('.', digit1))).map(|_| ()),
+        (digit1, opt(('.', digit1)), opt('_'), opt(alt(super::NUM_SUFFIXES))).map(|_| ()),
        // No digits before the decimal point.
-        ('.', digit1).map(|_| ()),
+        ('.', digit1, opt('_'), opt(alt(super::NUM_SUFFIXES))).map(|_| ()),
    ));
    let (value, range) = number_parser.take().with_span().parse_next(i)?;
    Ok(Token::from_range(
@ -379,7 +378,8 @@ mod tests {
        assert!(p.parse_next(&mut input).is_err(), "parsed {s} but should have failed");
    }

-    fn assert_parse_ok<'i, P, O, E>(mut p: P, s: &'i str)
+    // Returns the token and whether any more input is remaining to tokenize.
+    fn assert_parse_ok<'i, P, O, E>(mut p: P, s: &'i str) -> (O, bool)
    where
        E: std::fmt::Debug,
        O: std::fmt::Debug,
@ -392,14 +392,27 @@ mod tests {
        };
        let res = p.parse_next(&mut input);
        assert!(res.is_ok(), "failed to parse {s}, got {}", res.unwrap_err());
+        (res.unwrap(), !input.is_empty())
    }

    #[test]
    fn test_number() {
-        for valid in [
-            "1", "1 abc", "1.1", "1.1 abv", "1.1 abv", "1", ".1", "5?", "5 + 6", "5 + a", "5.5", "1abc",
+        for (valid, expected) in [
+            ("1", false),
+            ("1 abc", true),
+            ("1.1", false),
+            ("1.1 abv", true),
+            ("1.1 abv", true),
+            ("1", false),
+            (".1", false),
+            ("5?", true),
+            ("5 + 6", true),
+            ("5 + a", true),
+            ("5.5", false),
+            ("1abc", true),
        ] {
-            assert_parse_ok(number, valid);
+            let (_, remaining) = assert_parse_ok(number, valid);
+            assert_eq!(expected, remaining, "`{valid}` expected another token to be {expected}");
        }

        for invalid in ["a", "?", "?5"] {
@ -415,6 +428,27 @@ mod tests {
        assert_eq!(number.parse(input).unwrap().value, "0.0000000000");
    }

+    #[test]
+    fn test_number_suffix() {
+        for (valid, expected_val, expected_next) in [
+            ("1_", 1.0, false),
+            ("1_mm", 1.0, false),
+            ("1_yd", 1.0, false),
+            ("1m", 1.0, false),
+            ("1inch", 1.0, false),
+            ("1toot", 1.0, true),
+            ("1.4inch t", 1.4, true),
+        ] {
+            let (t, remaining) = assert_parse_ok(number, valid);
+            assert_eq!(expected_next, remaining);
+            assert_eq!(
+                Some(expected_val),
+                t.numeric_value(),
+                "{valid} has incorrect numeric value, expected {expected_val} {t:?}"
+            );
+        }
+    }
+
    #[test]
    fn test_word() {
        for valid in ["a", "a ", "a5", "a5a"] {
--- a/src/wasm-lib/tests/executor/inputs/lsystem.kcl
+++ b/src/wasm-lib/tests/executor/inputs/lsystem.kcl
@ -27,7 +27,7 @@ fn Gte = (a, b) => { return Not(Lt(a, b)) }

 deg = pi()*2 / 360

-fn setSketch = (state, _q) => {
+fn setSketch = (state, q) => {
  return {
    depthMax: state.depthMax,
    depth: state.depth + 1,
@ -35,43 +35,43 @@ fn setSketch = (state, _q) => {
    factor: state.factor,
    currentAngle: state.currentAngle,
    angle: state.angle,
-    _q: _q
+    q
  }
 }

-fn setDepth = (state, _q) => {
+fn setDepth = (state, q) => {
  return {
    depthMax: state.depthMax,
-    depth: _q,
+    depth: q,
    currentLength: state.currentLength,
    factor: state.factor,
    currentAngle: state.currentAngle,
    angle: state.angle,
-    _q:  state._q
+    q: state.q
  }
 }

-fn setAngle = (state, _q) => {
+fn setAngle = (state, q) => {
  return {
    depthMax: state.depthMax,
    depth: state.depth,
    currentLength: state.currentLength,
    factor: state.factor,
-    currentAngle: _q,
+    currentAngle: q,
    angle: state.angle,
-    _q:  state._q
+    q: state.q
  }
 }

-fn setLength = (state, _q) => {
+fn setLength = (state, q) => {
  return {
    depthMax: state.depthMax,
    depth: state.depth,
-    currentLength: _q,
+    currentLength: q,
    factor: state.factor,
    currentAngle: state.currentAngle,
    angle: state.angle,
-    _q:  state._q
+    q: state.q
  }
 }

@ -95,7 +95,7 @@ fn F = (state, F) => {

  } else {
    // Pass onto the next instruction
-    state |> setSketch(%, angledLine({ angle: state.currentAngle, length: state.currentLength }, state._q))
+    state |> setSketch(%, angledLine({ angle: state.currentAngle, length: state.currentLength }, state.q))
  }
 }

@ -107,7 +107,7 @@ fn LSystem = (args, axioms) => {
    factor: args.factor,
    currentAngle: 0,
    angle: args.angle,
-    _q: startSketchAt([0, 0]),
+    q: startSketchAt([0, 0]),
  })
 }

@ -115,7 +115,7 @@ LSystem({
  iterations: 1,
  factor: 1.36,
  angle: 60,
-}, (_q) => {
-  result = _q |> F(%, F) |> Add(%) |> Add(%) |> F(%, F) |> Add(%) |> Add(%) |> F(%, F) 
-  return result._q
+}, (q) => {
+  result = q |> F(%, F) |> Add(%) |> Add(%) |> F(%, F) |> Add(%) |> Add(%) |> F(%, F) 
+  return result.q
 })