KCL: Support non-ASCII identifiers (#7525)
Both human and LLMs want to write KCL code in non-English languages. This is important and we should support it. Note that errors are currently a bit broken with non-ASCII identifiers, see #4327
This commit is contained in:
@ -3334,7 +3334,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
parsing::ast::types::{BodyItem, Expr, VariableKind},
|
||||
KclError, ModuleId,
|
||||
ModuleId,
|
||||
};
|
||||
|
||||
fn assert_reserved(word: &str) {
|
||||
@ -4398,14 +4398,10 @@ secondExtrude = startSketchOn(XY)
|
||||
#[test]
|
||||
fn test_parse_parens_unicode() {
|
||||
let result = crate::parsing::top_level_parse("(ޜ");
|
||||
let KclError::Lexical { details } = result.0.unwrap_err() else {
|
||||
panic!();
|
||||
};
|
||||
// TODO: Better errors when program cannot tokenize.
|
||||
let details = result.0.unwrap().1.pop().unwrap();
|
||||
// TODO: Highlight where the unmatched open parenthesis is.
|
||||
// https://github.com/KittyCAD/modeling-app/issues/696
|
||||
assert_eq!(details.message, "found unknown token 'ޜ'");
|
||||
assert_eq!(details.source_ranges[0].start(), 1);
|
||||
assert_eq!(details.source_ranges[0].end(), 2);
|
||||
assert_eq!(details.message, "Unexpected end of file. The compiler expected )");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -6,7 +6,7 @@ use winnow::{
|
||||
error::{ContextError, ParseError},
|
||||
prelude::*,
|
||||
stream::{Location, Stream},
|
||||
token::{any, none_of, one_of, take_till, take_until},
|
||||
token::{any, none_of, take_till, take_until, take_while},
|
||||
LocatingSlice, Stateful,
|
||||
};
|
||||
|
||||
@ -163,8 +163,8 @@ fn whitespace(i: &mut Input<'_>) -> ModalResult<Token> {
|
||||
}
|
||||
|
||||
fn inner_word(i: &mut Input<'_>) -> ModalResult<()> {
|
||||
one_of(('a'..='z', 'A'..='Z', '_')).parse_next(i)?;
|
||||
repeat::<_, _, (), _, _>(0.., one_of(('a'..='z', 'A'..='Z', '0'..='9', '_'))).parse_next(i)?;
|
||||
take_while(1.., |c: char| c.is_alphabetic() || c == '_').parse_next(i)?;
|
||||
take_while(0.., |c: char| c.is_alphabetic() || c.is_ascii_digit() || c == '_').parse_next(i)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -786,6 +786,7 @@ const things = "things"
|
||||
};
|
||||
assert_eq!(actual.tokens[0], expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_word_starting_with_keyword() {
|
||||
let module_id = ModuleId::default();
|
||||
@ -799,4 +800,18 @@ const things = "things"
|
||||
};
|
||||
assert_eq!(actual.tokens[0], expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_english_identifiers() {
|
||||
let module_id = ModuleId::default();
|
||||
let actual = lex("亞當", module_id).unwrap();
|
||||
let expected = Token {
|
||||
token_type: TokenType::Word,
|
||||
value: "亞當".to_owned(),
|
||||
start: 0,
|
||||
end: 6,
|
||||
module_id,
|
||||
};
|
||||
assert_eq!(actual.tokens[0], expected);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user