KCL: Support non-ASCII identifiers (#7525)

Both human and LLMs want to write KCL code in non-English languages. This is important and we should support it.

Note that errors are currently a bit broken with non-ASCII identifiers, see #4327
This commit is contained in:
Adam Chalmers
2025-06-19 09:10:21 -05:00
committed by GitHub
parent 9eaacc2a51
commit 9dd6e3e852
12 changed files with 655 additions and 11 deletions

View File

@ -3334,7 +3334,7 @@ mod tests {
use super::*; use super::*;
use crate::{ use crate::{
parsing::ast::types::{BodyItem, Expr, VariableKind}, parsing::ast::types::{BodyItem, Expr, VariableKind},
KclError, ModuleId, ModuleId,
}; };
fn assert_reserved(word: &str) { fn assert_reserved(word: &str) {
@ -4398,14 +4398,10 @@ secondExtrude = startSketchOn(XY)
#[test] #[test]
fn test_parse_parens_unicode() { fn test_parse_parens_unicode() {
let result = crate::parsing::top_level_parse(""); let result = crate::parsing::top_level_parse("");
let KclError::Lexical { details } = result.0.unwrap_err() else { let details = result.0.unwrap().1.pop().unwrap();
panic!(); // TODO: Highlight where the unmatched open parenthesis is.
};
// TODO: Better errors when program cannot tokenize.
// https://github.com/KittyCAD/modeling-app/issues/696 // https://github.com/KittyCAD/modeling-app/issues/696
assert_eq!(details.message, "found unknown token 'ޜ'"); assert_eq!(details.message, "Unexpected end of file. The compiler expected )");
assert_eq!(details.source_ranges[0].start(), 1);
assert_eq!(details.source_ranges[0].end(), 2);
} }
#[test] #[test]

View File

@ -6,7 +6,7 @@ use winnow::{
error::{ContextError, ParseError}, error::{ContextError, ParseError},
prelude::*, prelude::*,
stream::{Location, Stream}, stream::{Location, Stream},
token::{any, none_of, one_of, take_till, take_until}, token::{any, none_of, take_till, take_until, take_while},
LocatingSlice, Stateful, LocatingSlice, Stateful,
}; };
@ -163,8 +163,8 @@ fn whitespace(i: &mut Input<'_>) -> ModalResult<Token> {
} }
fn inner_word(i: &mut Input<'_>) -> ModalResult<()> { fn inner_word(i: &mut Input<'_>) -> ModalResult<()> {
one_of(('a'..='z', 'A'..='Z', '_')).parse_next(i)?; take_while(1.., |c: char| c.is_alphabetic() || c == '_').parse_next(i)?;
repeat::<_, _, (), _, _>(0.., one_of(('a'..='z', 'A'..='Z', '0'..='9', '_'))).parse_next(i)?; take_while(0.., |c: char| c.is_alphabetic() || c.is_ascii_digit() || c == '_').parse_next(i)?;
Ok(()) Ok(())
} }
@ -786,6 +786,7 @@ const things = "things"
}; };
assert_eq!(actual.tokens[0], expected); assert_eq!(actual.tokens[0], expected);
} }
#[test] #[test]
fn test_word_starting_with_keyword() { fn test_word_starting_with_keyword() {
let module_id = ModuleId::default(); let module_id = ModuleId::default();
@ -799,4 +800,18 @@ const things = "things"
}; };
assert_eq!(actual.tokens[0], expected); assert_eq!(actual.tokens[0], expected);
} }
#[test]
fn non_english_identifiers() {
let module_id = ModuleId::default();
let actual = lex("亞當", module_id).unwrap();
let expected = Token {
token_type: TokenType::Word,
value: "亞當".to_owned(),
start: 0,
end: 6,
module_id,
};
assert_eq!(actual.tokens[0], expected);
}
} }

View File

@ -3605,3 +3605,24 @@ mod user_reported_union_2_bug {
super::execute(TEST_NAME, false).await super::execute(TEST_NAME, false).await
} }
} }
mod non_english_identifiers {
const TEST_NAME: &str = "non_english_identifiers";
/// Test parsing KCL.
#[test]
fn parse() {
super::parse(TEST_NAME)
}
/// Test that parsing and unparsing KCL produces the original KCL input.
#[tokio::test(flavor = "multi_thread")]
async fn unparse() {
super::unparse(TEST_NAME).await
}
/// Test that KCL is executed correctly.
#[tokio::test(flavor = "multi_thread")]
async fn kcl_test_execute() {
super::execute(TEST_NAME, true).await
}
}

View File

@ -0,0 +1,18 @@
---
source: kcl-lib/src/simulation_tests.rs
description: Artifact commands non_english_identifiers.kcl
---
{
"rust/kcl-lib/tests/non_english_identifiers/input.kcl": [],
"std::appearance": [],
"std::array": [],
"std::math": [],
"std::prelude": [],
"std::sketch": [],
"std::solid": [],
"std::sweep": [],
"std::transform": [],
"std::turns": [],
"std::types": [],
"std::units": []
}

View File

@ -0,0 +1,6 @@
---
source: kcl-lib/src/simulation_tests.rs
description: Artifact graph flowchart non_english_identifiers.kcl
extension: md
snapshot_kind: binary
---

View File

@ -0,0 +1,3 @@
```mermaid
flowchart LR
```

View File

@ -0,0 +1,284 @@
---
source: kcl-lib/src/simulation_tests.rs
description: Result of parsing non_english_identifiers.kcl
---
{
"Ok": {
"body": [
{
"commentStart": 0,
"declaration": {
"commentStart": 0,
"end": 0,
"id": {
"commentStart": 0,
"end": 0,
"name": "comprimentoTotal",
"start": 0,
"type": "Identifier"
},
"init": {
"commentStart": 0,
"end": 0,
"raw": "100",
"start": 0,
"type": "Literal",
"type": "Literal",
"value": {
"value": 100.0,
"suffix": "None"
}
},
"start": 0,
"type": "VariableDeclarator"
},
"end": 0,
"kind": "const",
"start": 0,
"type": "VariableDeclaration",
"type": "VariableDeclaration"
},
{
"commentStart": 0,
"declaration": {
"commentStart": 0,
"end": 0,
"id": {
"commentStart": 0,
"end": 0,
"name": "亞當",
"start": 0,
"type": "Identifier"
},
"init": {
"commentStart": 0,
"end": 0,
"raw": "100",
"start": 0,
"type": "Literal",
"type": "Literal",
"value": {
"value": 100.0,
"suffix": "None"
}
},
"start": 0,
"type": "VariableDeclarator"
},
"end": 0,
"kind": "const",
"start": 0,
"type": "VariableDeclaration",
"type": "VariableDeclaration"
},
{
"commentStart": 0,
"declaration": {
"commentStart": 0,
"end": 0,
"id": {
"commentStart": 0,
"end": 0,
"name": "comprimentoRosca",
"start": 0,
"type": "Identifier"
},
"init": {
"commentStart": 0,
"end": 0,
"left": {
"abs_path": false,
"commentStart": 0,
"end": 0,
"name": {
"commentStart": 0,
"end": 0,
"name": "亞當",
"start": 0,
"type": "Identifier"
},
"path": [],
"start": 0,
"type": "Name",
"type": "Name"
},
"operator": "*",
"right": {
"commentStart": 0,
"end": 0,
"raw": "0.8",
"start": 0,
"type": "Literal",
"type": "Literal",
"value": {
"value": 0.8,
"suffix": "None"
}
},
"start": 0,
"type": "BinaryExpression",
"type": "BinaryExpression"
},
"start": 0,
"type": "VariableDeclarator"
},
"end": 0,
"kind": "const",
"start": 0,
"type": "VariableDeclaration",
"type": "VariableDeclaration"
},
{
"commentStart": 0,
"declaration": {
"commentStart": 0,
"end": 0,
"id": {
"commentStart": 0,
"end": 0,
"name": "comprimentoCabeça",
"start": 0,
"type": "Identifier"
},
"init": {
"commentStart": 0,
"end": 0,
"left": {
"abs_path": false,
"commentStart": 0,
"end": 0,
"name": {
"commentStart": 0,
"end": 0,
"name": "comprimentoTotal",
"start": 0,
"type": "Identifier"
},
"path": [],
"start": 0,
"type": "Name",
"type": "Name"
},
"operator": "-",
"right": {
"abs_path": false,
"commentStart": 0,
"end": 0,
"name": {
"commentStart": 0,
"end": 0,
"name": "comprimentoRosca",
"start": 0,
"type": "Identifier"
},
"path": [],
"start": 0,
"type": "Name",
"type": "Name"
},
"start": 0,
"type": "BinaryExpression",
"type": "BinaryExpression"
},
"start": 0,
"type": "VariableDeclarator"
},
"end": 0,
"kind": "const",
"start": 0,
"type": "VariableDeclaration",
"type": "VariableDeclaration"
},
{
"commentStart": 0,
"end": 0,
"expression": {
"arguments": [
{
"type": "LabeledArg",
"label": {
"commentStart": 0,
"end": 0,
"name": "isEqualTo",
"start": 0,
"type": "Identifier"
},
"arg": {
"commentStart": 0,
"end": 0,
"raw": "20",
"start": 0,
"type": "Literal",
"type": "Literal",
"value": {
"value": 20.0,
"suffix": "None"
}
}
}
],
"callee": {
"abs_path": false,
"commentStart": 0,
"end": 0,
"name": {
"commentStart": 0,
"end": 0,
"name": "assert",
"start": 0,
"type": "Identifier"
},
"path": [],
"start": 0,
"type": "Name"
},
"commentStart": 0,
"end": 0,
"start": 0,
"type": "CallExpressionKw",
"type": "CallExpressionKw",
"unlabeled": {
"abs_path": false,
"commentStart": 0,
"end": 0,
"name": {
"commentStart": 0,
"end": 0,
"name": "comprimentoCabeça",
"start": 0,
"type": "Identifier"
},
"path": [],
"start": 0,
"type": "Name",
"type": "Name"
}
},
"start": 0,
"type": "ExpressionStatement",
"type": "ExpressionStatement"
}
],
"commentStart": 0,
"end": 0,
"nonCodeMeta": {
"nonCodeNodes": {
"2": [
{
"commentStart": 0,
"end": 0,
"start": 0,
"type": "NonCodeNode",
"value": {
"type": "inlineComment",
"value": "80% do comprimento total é roscado",
"style": "line"
}
}
]
},
"startNodes": []
},
"start": 0
}
}

View File

@ -0,0 +1,5 @@
comprimentoTotal = 100
亞當 = 100
comprimentoRosca = 亞當 * 0.8 // 80% do comprimento total é roscado
comprimentoCabeça = comprimentoTotal - comprimentoRosca
assert(comprimentoCabeça, isEqualTo = 20)

View File

@ -0,0 +1,229 @@
---
source: kcl-lib/src/simulation_tests.rs
description: Operations executed non_english_identifiers.kcl
---
{
"rust/kcl-lib/tests/non_english_identifiers/input.kcl": [
{
"type": "VariableDeclaration",
"name": "comprimentoTotal",
"value": {
"type": "Number",
"value": 100.0,
"ty": {
"type": "Default",
"len": {
"type": "Mm"
},
"angle": {
"type": "Degrees"
}
}
},
"visibility": "default",
"nodePath": {
"steps": [
{
"type": "ProgramBodyItem",
"index": 0
},
{
"type": "VariableDeclarationDeclaration"
},
{
"type": "VariableDeclarationInit"
}
]
},
"sourceRange": []
},
{
"type": "VariableDeclaration",
"name": "亞當",
"value": {
"type": "Number",
"value": 100.0,
"ty": {
"type": "Default",
"len": {
"type": "Mm"
},
"angle": {
"type": "Degrees"
}
}
},
"visibility": "default",
"nodePath": {
"steps": [
{
"type": "ProgramBodyItem",
"index": 1
},
{
"type": "VariableDeclarationDeclaration"
},
{
"type": "VariableDeclarationInit"
}
]
},
"sourceRange": []
},
{
"type": "VariableDeclaration",
"name": "comprimentoRosca",
"value": {
"type": "Number",
"value": 80.0,
"ty": {
"type": "Default",
"len": {
"type": "Mm"
},
"angle": {
"type": "Degrees"
}
}
},
"visibility": "default",
"nodePath": {
"steps": [
{
"type": "ProgramBodyItem",
"index": 2
},
{
"type": "VariableDeclarationDeclaration"
},
{
"type": "VariableDeclarationInit"
}
]
},
"sourceRange": []
},
{
"type": "VariableDeclaration",
"name": "comprimentoCabeça",
"value": {
"type": "Number",
"value": 20.0,
"ty": {
"type": "Default",
"len": {
"type": "Mm"
},
"angle": {
"type": "Degrees"
}
}
},
"visibility": "default",
"nodePath": {
"steps": [
{
"type": "ProgramBodyItem",
"index": 3
},
{
"type": "VariableDeclarationDeclaration"
},
{
"type": "VariableDeclarationInit"
}
]
},
"sourceRange": []
}
],
"std::appearance": [],
"std::array": [],
"std::math": [
{
"type": "VariableDeclaration",
"name": "PI",
"value": {
"type": "Number",
"value": 3.141592653589793,
"ty": {
"type": "Unknown"
}
},
"visibility": "export",
"nodePath": {
"steps": []
},
"sourceRange": []
},
{
"type": "VariableDeclaration",
"name": "E",
"value": {
"type": "Number",
"value": 2.718281828459045,
"ty": {
"type": "Known",
"type": "Count"
}
},
"visibility": "export",
"nodePath": {
"steps": []
},
"sourceRange": []
},
{
"type": "VariableDeclaration",
"name": "TAU",
"value": {
"type": "Number",
"value": 6.283185307179586,
"ty": {
"type": "Known",
"type": "Count"
}
},
"visibility": "export",
"nodePath": {
"steps": []
},
"sourceRange": []
}
],
"std::prelude": [
{
"type": "VariableDeclaration",
"name": "START",
"value": {
"type": "String",
"value": "start"
},
"visibility": "export",
"nodePath": {
"steps": []
},
"sourceRange": []
},
{
"type": "VariableDeclaration",
"name": "END",
"value": {
"type": "String",
"value": "end"
},
"visibility": "export",
"nodePath": {
"steps": []
},
"sourceRange": []
}
],
"std::sketch": [],
"std::solid": [],
"std::sweep": [],
"std::transform": [],
"std::turns": [],
"std::types": [],
"std::units": []
}

View File

@ -0,0 +1,58 @@
---
source: kcl-lib/src/simulation_tests.rs
description: Variables in memory after executing non_english_identifiers.kcl
---
{
"comprimentoCabeça": {
"type": "Number",
"value": 20.0,
"ty": {
"type": "Default",
"len": {
"type": "Mm"
},
"angle": {
"type": "Degrees"
}
}
},
"comprimentoRosca": {
"type": "Number",
"value": 80.0,
"ty": {
"type": "Default",
"len": {
"type": "Mm"
},
"angle": {
"type": "Degrees"
}
}
},
"comprimentoTotal": {
"type": "Number",
"value": 100.0,
"ty": {
"type": "Default",
"len": {
"type": "Mm"
},
"angle": {
"type": "Degrees"
}
}
},
"亞當": {
"type": "Number",
"value": 100.0,
"ty": {
"type": "Default",
"len": {
"type": "Mm"
},
"angle": {
"type": "Degrees"
}
}
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

View File

@ -0,0 +1,9 @@
---
source: kcl-lib/src/simulation_tests.rs
description: Result of unparsing non_english_identifiers.kcl
---
comprimentoTotal = 100
亞當 = 100
comprimentoRosca = 亞當 * 0.8 // 80% do comprimento total é roscado
comprimentoCabeça = comprimentoTotal - comprimentoRosca
assert(comprimentoCabeça, isEqualTo = 20)