Move the wasm lib, and cleanup rust directory and all references (#5585)

* git mv src/wasm-lib rust

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* mv wasm-lib to workspace

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* mv kcl-lib

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* mv derive docs

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* resolve file paths

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* clippy

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* move more shit

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix more paths

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* make yarn build:wasm work

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix scripts

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fixups

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* better references

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix cargo ci

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix reference

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix more ci

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix tests

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* cargo sort

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix script

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fmt

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fix a dep

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* sort

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* remove unused deps

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* Revert "remove unused deps"

This reverts commit fbabdb062e275fd5cbc1476f8480a1afee15d972.

* updates

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* deps;

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* fixes

Signed-off-by: Jess Frazelle <github@jessfraz.com>

* updates

Signed-off-by: Jess Frazelle <github@jessfraz.com>

---------

Signed-off-by: Jess Frazelle <github@jessfraz.com>
This commit is contained in:
Jess Frazelle
2025-03-01 13:59:01 -08:00
committed by GitHub
parent 0a2bf4b55f
commit c3bdc6f106
1443 changed files with 509 additions and 4274 deletions

View File

@ -0,0 +1,543 @@
// Clippy does not agree with rustc here for some reason.
#![allow(clippy::needless_lifetimes)]
use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
use anyhow::Result;
use parse_display::Display;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use tokeniser::Input;
use tower_lsp::lsp_types::SemanticTokenType;
use winnow::{
self,
error::ParseError,
stream::{ContainsToken, Stream},
};
use crate::{
errors::KclError,
parsing::ast::types::{ItemVisibility, VariableKind},
source_range::SourceRange,
CompilationError, ModuleId,
};
mod tokeniser;
#[cfg(test)]
pub(crate) use tokeniser::RESERVED_WORDS;
// Note the ordering, it's important that `m` comes after `mm` and `cm`.
pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
#[repr(u32)]
pub enum NumericSuffix {
None,
Count,
Mm,
Cm,
M,
Inch,
Ft,
Yd,
Deg,
Rad,
}
impl NumericSuffix {
#[allow(dead_code)]
pub fn is_none(self) -> bool {
self == Self::None
}
pub fn is_some(self) -> bool {
self != Self::None
}
pub fn digestable_id(&self) -> &[u8] {
match self {
NumericSuffix::None => &[],
NumericSuffix::Count => b"_",
NumericSuffix::Mm => b"mm",
NumericSuffix::Cm => b"cm",
NumericSuffix::M => b"m",
NumericSuffix::Inch => b"in",
NumericSuffix::Ft => b"ft",
NumericSuffix::Yd => b"yd",
NumericSuffix::Deg => b"deg",
NumericSuffix::Rad => b"rad",
}
}
}
impl FromStr for NumericSuffix {
type Err = CompilationError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"_" => Ok(NumericSuffix::Count),
"mm" | "millimeters" => Ok(NumericSuffix::Mm),
"cm" | "centimeters" => Ok(NumericSuffix::Cm),
"m" | "meters" => Ok(NumericSuffix::M),
"inch" | "in" => Ok(NumericSuffix::Inch),
"ft" | "feet" => Ok(NumericSuffix::Ft),
"yd" | "yards" => Ok(NumericSuffix::Yd),
"deg" | "degrees" => Ok(NumericSuffix::Deg),
"rad" | "radians" => Ok(NumericSuffix::Rad),
_ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
}
}
}
impl fmt::Display for NumericSuffix {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
NumericSuffix::None => Ok(()),
NumericSuffix::Count => write!(f, "_"),
NumericSuffix::Mm => write!(f, "mm"),
NumericSuffix::Cm => write!(f, "cm"),
NumericSuffix::M => write!(f, "m"),
NumericSuffix::Inch => write!(f, "in"),
NumericSuffix::Ft => write!(f, "ft"),
NumericSuffix::Yd => write!(f, "yd"),
NumericSuffix::Deg => write!(f, "deg"),
NumericSuffix::Rad => write!(f, "rad"),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct TokenStream {
tokens: Vec<Token>,
}
impl TokenStream {
fn new(tokens: Vec<Token>) -> Self {
Self { tokens }
}
pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
let tokens = std::mem::take(&mut self.tokens);
let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
.into_iter()
.partition(|token| token.token_type != TokenType::Unknown);
self.tokens = tokens;
unknown_tokens
}
pub fn iter(&self) -> impl Iterator<Item = &Token> {
self.tokens.iter()
}
pub fn is_empty(&self) -> bool {
self.tokens.is_empty()
}
pub fn as_slice(&self) -> TokenSlice {
TokenSlice::from(self)
}
}
impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
fn from(stream: &'a TokenStream) -> Self {
TokenSlice {
start: 0,
end: stream.tokens.len(),
stream,
}
}
}
impl IntoIterator for TokenStream {
type Item = Token;
type IntoIter = std::vec::IntoIter<Token>;
fn into_iter(self) -> Self::IntoIter {
self.tokens.into_iter()
}
}
#[derive(Debug, Clone)]
pub(crate) struct TokenSlice<'a> {
stream: &'a TokenStream,
start: usize,
end: usize,
}
impl<'a> std::ops::Deref for TokenSlice<'a> {
type Target = [Token];
fn deref(&self) -> &Self::Target {
&self.stream.tokens[self.start..self.end]
}
}
impl<'a> TokenSlice<'a> {
pub fn token(&self, i: usize) -> &Token {
&self.stream.tokens[i + self.start]
}
pub fn iter(&self) -> impl Iterator<Item = &Token> {
(**self).iter()
}
pub fn without_ends(&self) -> Self {
Self {
start: self.start + 1,
end: self.end - 1,
stream: self.stream,
}
}
}
impl<'a> IntoIterator for TokenSlice<'a> {
type Item = &'a Token;
type IntoIter = std::slice::Iter<'a, Token>;
fn into_iter(self) -> Self::IntoIter {
self.stream.tokens[self.start..self.end].iter()
}
}
impl<'a> Stream for TokenSlice<'a> {
type Token = Token;
type Slice = Self;
type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
type Checkpoint = Checkpoint;
fn iter_offsets(&self) -> Self::IterOffsets {
#[allow(clippy::unnecessary_to_owned)]
self.to_vec().into_iter().enumerate()
}
fn eof_offset(&self) -> usize {
self.len()
}
fn next_token(&mut self) -> Option<Self::Token> {
let token = self.first()?.clone();
self.start += 1;
Some(token)
}
fn offset_for<P>(&self, predicate: P) -> Option<usize>
where
P: Fn(Self::Token) -> bool,
{
self.iter().position(|b| predicate(b.clone()))
}
fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
Err(winnow::error::Needed::Size(needed))
} else {
Ok(tokens)
}
}
fn next_slice(&mut self, offset: usize) -> Self::Slice {
assert!(self.start + offset <= self.end);
let next = TokenSlice {
stream: self.stream,
start: self.start,
end: self.start + offset,
};
self.start += offset;
next
}
fn checkpoint(&self) -> Self::Checkpoint {
Checkpoint(self.start, self.end)
}
fn reset(&mut self, checkpoint: &Self::Checkpoint) {
self.start = checkpoint.0;
self.end = checkpoint.1;
}
fn raw(&self) -> &dyn fmt::Debug {
self
}
}
impl<'a> winnow::stream::Offset for TokenSlice<'a> {
fn offset_from(&self, start: &Self) -> usize {
self.start - start.start
}
}
impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
fn offset_from(&self, start: &Checkpoint) -> usize {
self.start - start.0
}
}
impl winnow::stream::Offset for Checkpoint {
fn offset_from(&self, start: &Self) -> usize {
self.0 - start.0
}
}
impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
type PartialState = ();
fn complete(&mut self) -> Self::PartialState {}
fn restore_partial(&mut self, _: Self::PartialState) {}
fn is_partial_supported() -> bool {
false
}
}
#[derive(Clone, Debug)]
pub struct Checkpoint(usize, usize);
/// The types of tokens.
#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
#[display(style = "camelCase")]
pub enum TokenType {
/// A number.
Number,
/// A word.
Word,
/// An operator.
Operator,
/// A string.
String,
/// A keyword.
Keyword,
/// A type.
Type,
/// A brace.
Brace,
/// A hash.
Hash,
/// A bang.
Bang,
/// A dollar sign.
Dollar,
/// Whitespace.
Whitespace,
/// A comma.
Comma,
/// A colon.
Colon,
/// A period.
Period,
/// A double period: `..`.
DoublePeriod,
/// A line comment.
LineComment,
/// A block comment.
BlockComment,
/// A function name.
Function,
/// Unknown lexemes.
Unknown,
/// The ? symbol, used for optional values.
QuestionMark,
/// The @ symbol.
At,
}
/// Most KCL tokens correspond to LSP semantic tokens (but not all).
impl TryFrom<TokenType> for SemanticTokenType {
type Error = anyhow::Error;
fn try_from(token_type: TokenType) -> Result<Self> {
// If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
// in the LSP implementation.
Ok(match token_type {
TokenType::Number => Self::NUMBER,
TokenType::Word => Self::VARIABLE,
TokenType::Keyword => Self::KEYWORD,
TokenType::Type => Self::TYPE,
TokenType::Operator => Self::OPERATOR,
TokenType::QuestionMark => Self::OPERATOR,
TokenType::String => Self::STRING,
TokenType::Bang => Self::OPERATOR,
TokenType::LineComment => Self::COMMENT,
TokenType::BlockComment => Self::COMMENT,
TokenType::Function => Self::FUNCTION,
TokenType::Whitespace
| TokenType::Brace
| TokenType::Comma
| TokenType::Colon
| TokenType::Period
| TokenType::DoublePeriod
| TokenType::Hash
| TokenType::Dollar
| TokenType::At
| TokenType::Unknown => {
anyhow::bail!("unsupported token type: {:?}", token_type)
}
})
}
}
impl TokenType {
pub fn is_whitespace(&self) -> bool {
matches!(self, Self::Whitespace)
}
pub fn is_comment(&self) -> bool {
matches!(self, Self::LineComment | Self::BlockComment)
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token {
pub token_type: TokenType,
/// Offset in the source code where this token begins.
pub start: usize,
/// Offset in the source code where this token ends.
pub end: usize,
pub(super) module_id: ModuleId,
pub(super) value: String,
}
impl ContainsToken<Token> for (TokenType, &str) {
fn contains_token(&self, token: Token) -> bool {
self.0 == token.token_type && self.1 == token.value
}
}
impl ContainsToken<Token> for TokenType {
fn contains_token(&self, token: Token) -> bool {
*self == token.token_type
}
}
impl Token {
pub fn from_range(
range: std::ops::Range<usize>,
module_id: ModuleId,
token_type: TokenType,
value: String,
) -> Self {
Self {
start: range.start,
end: range.end,
module_id,
value,
token_type,
}
}
pub fn is_code_token(&self) -> bool {
!matches!(
self.token_type,
TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
)
}
pub fn as_source_range(&self) -> SourceRange {
SourceRange::new(self.start, self.end, self.module_id)
}
pub fn as_source_ranges(&self) -> Vec<SourceRange> {
vec![self.as_source_range()]
}
pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
if !matches!(self.token_type, TokenType::Keyword) {
return None;
}
match self.value.as_str() {
"export" => Some(ItemVisibility::Export),
_ => None,
}
}
pub fn numeric_value(&self) -> Option<f64> {
if self.token_type != TokenType::Number {
return None;
}
let value = &self.value;
let value = value
.split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
.map(|(s, _)| s)
.unwrap_or(value);
value.parse().ok()
}
pub fn numeric_suffix(&self) -> NumericSuffix {
if self.token_type != TokenType::Number {
return NumericSuffix::None;
}
if self.value.ends_with('_') {
return NumericSuffix::Count;
}
for suffix in NUM_SUFFIXES {
if self.value.ends_with(suffix) {
return suffix.parse().unwrap();
}
}
NumericSuffix::None
}
/// Is this token the beginning of a variable/function declaration?
/// If so, what kind?
/// If not, returns None.
pub fn declaration_keyword(&self) -> Option<VariableKind> {
if !matches!(self.token_type, TokenType::Keyword) {
return None;
}
Some(match self.value.as_str() {
"fn" => VariableKind::Fn,
"var" | "let" | "const" => VariableKind::Const,
_ => return None,
})
}
}
impl From<Token> for SourceRange {
fn from(token: Token) -> Self {
Self::new(token.start, token.end, token.module_id)
}
}
impl From<&Token> for SourceRange {
fn from(token: &Token) -> Self {
Self::new(token.start, token.end, token.module_id)
}
}
pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
tokeniser::lex(s, module_id).map_err(From::from)
}
impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
let module_id = err.input().state.module_id;
if offset >= input.len() {
// From the winnow docs:
//
// This is an offset, not an index, and may point to
// the end of input (input.len()) on eof errors.
return KclError::Lexical(crate::errors::KclErrorDetails {
source_ranges: vec![SourceRange::new(offset, offset, module_id)],
message: "unexpected EOF while parsing".to_string(),
});
}
// TODO: Add the Winnow tokenizer context to the error.
// See https://github.com/KittyCAD/modeling-app/issues/784
let bad_token = &input[offset];
// TODO: Add the Winnow parser context to the error.
// See https://github.com/KittyCAD/modeling-app/issues/784
KclError::Lexical(crate::errors::KclErrorDetails {
source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
message: format!("found unknown token '{}'", bad_token),
})
}
}

View File

@ -0,0 +1,780 @@
use fnv::FnvHashMap;
use lazy_static::lazy_static;
use winnow::{
ascii::{digit1, multispace1},
combinator::{alt, opt, peek, preceded, repeat},
error::{ContextError, ParseError},
prelude::*,
stream::{Location, Stream},
token::{any, none_of, one_of, take_till, take_until},
LocatingSlice, Stateful,
};
use super::TokenStream;
use crate::{
parsing::token::{Token, TokenType},
ModuleId,
};
lazy_static! {
pub(crate) static ref RESERVED_WORDS: FnvHashMap<&'static str, TokenType> = {
let mut set = FnvHashMap::default();
set.insert("if", TokenType::Keyword);
set.insert("else", TokenType::Keyword);
set.insert("for", TokenType::Keyword);
set.insert("while", TokenType::Keyword);
set.insert("return", TokenType::Keyword);
set.insert("break", TokenType::Keyword);
set.insert("continue", TokenType::Keyword);
set.insert("fn", TokenType::Keyword);
set.insert("let", TokenType::Keyword);
set.insert("mut", TokenType::Keyword);
set.insert("as", TokenType::Keyword);
set.insert("loop", TokenType::Keyword);
set.insert("true", TokenType::Keyword);
set.insert("false", TokenType::Keyword);
set.insert("nil", TokenType::Keyword);
// This isn't a type because brackets are used for the type.
set.insert("array", TokenType::Keyword);
set.insert("and", TokenType::Keyword);
set.insert("or", TokenType::Keyword);
set.insert("not", TokenType::Keyword);
set.insert("var", TokenType::Keyword);
set.insert("const", TokenType::Keyword);
// "import" is special because of import().
set.insert("export", TokenType::Keyword);
set.insert("type", TokenType::Keyword);
set.insert("interface", TokenType::Keyword);
set.insert("new", TokenType::Keyword);
set.insert("self", TokenType::Keyword);
set.insert("record", TokenType::Keyword);
set.insert("struct", TokenType::Keyword);
set.insert("object", TokenType::Keyword);
set.insert("string", TokenType::Type);
set.insert("number", TokenType::Type);
set.insert("bool", TokenType::Type);
set.insert("Sketch", TokenType::Type);
set.insert("SketchSurface", TokenType::Type);
set.insert("Solid", TokenType::Type);
set.insert("Plane", TokenType::Type);
set
};
}
pub(super) fn lex(i: &str, module_id: ModuleId) -> Result<TokenStream, ParseError<Input<'_>, ContextError>> {
let state = State::new(module_id);
let input = Input {
input: LocatingSlice::new(i),
state,
};
Ok(TokenStream::new(repeat(0.., token).parse(input)?))
}
pub(super) type Input<'a> = Stateful<LocatingSlice<&'a str>, State>;
#[derive(Debug, Clone)]
pub(super) struct State {
pub module_id: ModuleId,
}
impl State {
fn new(module_id: ModuleId) -> Self {
Self { module_id }
}
}
pub(super) fn token(i: &mut Input<'_>) -> PResult<Token> {
match winnow::combinator::dispatch! {peek(any);
'"' | '\'' => string,
'/' => alt((line_comment, block_comment, operator)),
'{' | '(' | '[' => brace_start,
'}' | ')' | ']' => brace_end,
',' => comma,
'?' => question_mark,
'@' => at,
'0'..='9' => number,
':' => colon,
'.' => alt((number, double_period, period)),
'#' => hash,
'$' => dollar,
'!' => alt((operator, bang)),
' ' | '\t' | '\n' | '\r' => whitespace,
_ => alt((operator, keyword_type_or_word))
}
.parse_next(i)
{
Ok(token) => Ok(token),
Err(x) => {
// TODO: Handle non ascii cases
if i.len() == 0 || !i.is_ascii() {
return Err(x);
}
Ok(Token::from_range(
i.location()..i.location() + 1,
i.state.module_id,
TokenType::Unknown,
i.next_slice(1).to_string(),
))
}
}
}
fn block_comment(i: &mut Input<'_>) -> PResult<Token> {
let inner = ("/*", take_until(0.., "*/"), "*/").take();
let (value, range) = inner.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::BlockComment,
value.to_string(),
))
}
fn line_comment(i: &mut Input<'_>) -> PResult<Token> {
let inner = (r#"//"#, take_till(0.., ['\n', '\r'])).take();
let (value, range) = inner.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::LineComment,
value.to_string(),
))
}
fn number(i: &mut Input<'_>) -> PResult<Token> {
let number_parser = alt((
// Digits before the decimal point.
(digit1, opt(('.', digit1)), opt('_'), opt(alt(super::NUM_SUFFIXES))).map(|_| ()),
// No digits before the decimal point.
('.', digit1, opt('_'), opt(alt(super::NUM_SUFFIXES))).map(|_| ()),
));
let (value, range) = number_parser.take().with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Number,
value.to_string(),
))
}
fn whitespace(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = multispace1.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Whitespace,
value.to_string(),
))
}
fn inner_word(i: &mut Input<'_>) -> PResult<()> {
one_of(('a'..='z', 'A'..='Z', '_')).parse_next(i)?;
repeat::<_, _, (), _, _>(0.., one_of(('a'..='z', 'A'..='Z', '0'..='9', '_'))).parse_next(i)?;
Ok(())
}
fn word(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = inner_word.take().with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Word,
value.to_string(),
))
}
fn operator(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = alt((
">=", "<=", "==", "=>", "!=", "|>", "*", "+", "-", "/", "%", "=", "<", ">", r"\", "^", "|", "&",
))
.with_span()
.parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Operator,
value.to_string(),
))
}
fn brace_start(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = alt(('{', '(', '[')).with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Brace,
value.to_string(),
))
}
fn brace_end(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = alt(('}', ')', ']')).with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Brace,
value.to_string(),
))
}
fn comma(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = ','.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Comma,
value.to_string(),
))
}
fn hash(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = '#'.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Hash,
value.to_string(),
))
}
fn bang(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = '!'.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Bang,
value.to_string(),
))
}
fn dollar(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = '$'.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Dollar,
value.to_string(),
))
}
fn question_mark(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = '?'.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::QuestionMark,
value.to_string(),
))
}
fn at(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = '@'.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::At,
value.to_string(),
))
}
fn colon(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = ':'.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Colon,
value.to_string(),
))
}
fn period(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = '.'.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::Period,
value.to_string(),
))
}
fn double_period(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = "..".with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::DoublePeriod,
value.to_string(),
))
}
/// Zero or more of either:
/// 1. Any character except " or \
/// 2. Any character preceded by \
fn inner_double_quote(i: &mut Input<'_>) -> PResult<()> {
repeat(0.., alt((none_of(('"', '\\')), preceded('\\', winnow::token::any)))).parse_next(i)
}
/// Zero or more of either:
/// 1. Any character except ' or \
/// 2. Any character preceded by \
fn inner_single_quote(i: &mut Input<'_>) -> PResult<()> {
repeat(0.., alt((none_of(('\'', '\\')), preceded('\\', winnow::token::any)))).parse_next(i)
}
fn string(i: &mut Input<'_>) -> PResult<Token> {
let single_quoted_string = ('\'', inner_single_quote.take(), '\'');
let double_quoted_string = ('"', inner_double_quote.take(), '"');
let either_quoted_string = alt((single_quoted_string.take(), double_quoted_string.take()));
let (value, range): (&str, _) = either_quoted_string.with_span().parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
TokenType::String,
value.to_string(),
))
}
fn import_keyword(i: &mut Input<'_>) -> PResult<Token> {
let (value, range) = "import".with_span().parse_next(i)?;
let token_type = peek(alt((' '.map(|_| TokenType::Keyword), '('.map(|_| TokenType::Word)))).parse_next(i)?;
Ok(Token::from_range(
range,
i.state.module_id,
token_type,
value.to_owned(),
))
}
fn unambiguous_keyword_type_or_word(i: &mut Input<'_>) -> PResult<Token> {
let mut w = word.parse_next(i)?;
if let Some(token_type) = RESERVED_WORDS.get(w.value.as_str()) {
w.token_type = *token_type;
}
Ok(w)
}
fn keyword_type_or_word(i: &mut Input<'_>) -> PResult<Token> {
alt((import_keyword, unambiguous_keyword_type_or_word)).parse_next(i)
}
#[cfg(test)]
mod tests {
use winnow::LocatingSlice;
use super::*;
use crate::parsing::token::TokenSlice;
fn assert_parse_err<'i, P, O, E>(mut p: P, s: &'i str)
where
O: std::fmt::Debug,
P: Parser<Input<'i>, O, E>,
{
let state = State::new(ModuleId::default());
let mut input = Input {
input: LocatingSlice::new(s),
state,
};
assert!(p.parse_next(&mut input).is_err(), "parsed {s} but should have failed");
}
// Returns the token and whether any more input is remaining to tokenize.
fn assert_parse_ok<'i, P, O, E>(mut p: P, s: &'i str) -> (O, bool)
where
E: std::fmt::Debug,
O: std::fmt::Debug,
P: Parser<Input<'i>, O, E>,
{
let state = State::new(ModuleId::default());
let mut input = Input {
input: LocatingSlice::new(s),
state,
};
let res = p.parse_next(&mut input);
assert!(res.is_ok(), "failed to parse {s}, got {}", res.unwrap_err());
(res.unwrap(), !input.is_empty())
}
#[test]
fn test_number() {
for (valid, expected) in [
("1", false),
("1 abc", true),
("1.1", false),
("1.1 abv", true),
("1.1 abv", true),
("1", false),
(".1", false),
("5?", true),
("5 + 6", true),
("5 + a", true),
("5.5", false),
("1abc", true),
] {
let (_, remaining) = assert_parse_ok(number, valid);
assert_eq!(expected, remaining, "`{valid}` expected another token to be {expected}");
}
for invalid in ["a", "?", "?5"] {
assert_parse_err(number, invalid);
}
let module_id = ModuleId::from_usize(1);
let input = Input {
input: LocatingSlice::new("0.0000000000"),
state: State::new(module_id),
};
assert_eq!(number.parse(input).unwrap().value, "0.0000000000");
}
#[test]
fn test_number_suffix() {
for (valid, expected_val, expected_next) in [
("1_", 1.0, false),
("1_mm", 1.0, false),
("1_yd", 1.0, false),
("1m", 1.0, false),
("1inch", 1.0, false),
("1toot", 1.0, true),
("1.4inch t", 1.4, true),
] {
let (t, remaining) = assert_parse_ok(number, valid);
assert_eq!(expected_next, remaining);
assert_eq!(
Some(expected_val),
t.numeric_value(),
"{valid} has incorrect numeric value, expected {expected_val} {t:?}"
);
}
}
#[test]
fn test_word() {
for valid in ["a", "a ", "a5", "a5a"] {
assert_parse_ok(word, valid);
}
for invalid in ["5", "5a", "5a5"] {
assert_parse_err(word, invalid);
}
}
#[test]
fn test_operator() {
for valid in [
"+", "+ ", "-", "<=", "<= ", ">=", ">= ", "> ", "< ", "|> ", "^ ", "% ", "+* ", "| ", "& ",
] {
assert_parse_ok(operator, valid);
}
for invalid in ["5 + 5", "a", "a+", "a+5", "5a+5", ", newVar", ","] {
assert_parse_err(operator, invalid);
}
}
#[test]
fn test_string() {
for valid in [
"\"\"",
"\"a\"",
"\"a\" ",
"\"a\"5",
"'a'5",
"\"with escaped \\\" backslash\"",
"\'with escaped \\\' backslash\'",
"'c'",
] {
assert_parse_ok(string, valid);
}
for invalid in ["\"", "\"a", "a\"", " \"a\"", "5\"a\"", "a + 'str'"] {
assert_parse_err(string, invalid);
}
}
#[track_caller]
fn assert_tokens(expected: &[(TokenType, usize, usize)], actual: TokenSlice) {
let mut e = 0;
let mut issues = vec![];
for a in actual {
if expected[e].0 != a.token_type {
if a.token_type == TokenType::Whitespace {
continue;
}
issues.push(format!(
"Type mismatch: expected `{}`, found `{}` (`{a:?}`), at index {e}",
expected[e].0, a.token_type
));
}
if expected[e].1 != a.start || expected[e].2 != a.end {
issues.push(format!(
"Source range mismatch: expected {}-{}, found {}-{} (`{a:?}`), at index {e}",
expected[e].1, expected[e].2, a.start, a.end
));
}
e += 1;
}
if e < expected.len() {
issues.push(format!("Expected `{}` tokens, found `{e}`", expected.len()));
}
assert!(issues.is_empty(), "{}", issues.join("\n"));
}
#[test]
fn test_program0() {
let program = "const a=5";
let module_id = ModuleId::from_usize(1);
let actual = lex(program, module_id).unwrap();
use TokenType::*;
assert_tokens(
&[(Keyword, 0, 5), (Word, 6, 7), (Operator, 7, 8), (Number, 8, 9)],
actual.as_slice(),
);
}
#[test]
fn test_program1() {
let program = "54 + 22500 + 6";
let module_id = ModuleId::from_usize(1);
let actual = lex(program, module_id).unwrap();
use TokenType::*;
assert_tokens(
&[
(Number, 0, 2),
(Operator, 3, 4),
(Number, 5, 10),
(Operator, 11, 12),
(Number, 13, 14),
],
actual.as_slice(),
);
}
#[test]
fn test_program2() {
let program = r#"const part001 = startSketchAt([0.0000000000, 5.0000000000])
|> line([0.4900857016, -0.0240763666], %)
const part002 = "part002"
const things = [part001, 0.0]
let blah = 1
const foo = false
let baz = {a: 1, part001: "thing"}
fn ghi = (part001) => {
return part001
}
show(part001)"#;
let module_id = ModuleId::from_usize(1);
let actual = lex(program, module_id).unwrap();
insta::assert_debug_snapshot!(actual.tokens);
}
#[test]
fn test_program3() {
let program = r#"
// this is a comment
const yo = { a: { b: { c: '123' } } }
const key = 'c'
const things = "things"
// this is also a comment"#;
let module_id = ModuleId::from_usize(1);
let actual = lex(program, module_id).unwrap();
use TokenType::*;
assert_tokens(
&[
(Whitespace, 0, 1),
(LineComment, 1, 21),
(Whitespace, 21, 22),
(Keyword, 22, 27),
(Whitespace, 27, 28),
(Word, 28, 30),
(Whitespace, 30, 31),
(Operator, 31, 32),
(Whitespace, 32, 33),
(Brace, 33, 34),
(Whitespace, 34, 35),
(Word, 35, 36),
(Colon, 36, 37),
(Whitespace, 37, 38),
(Brace, 38, 39),
(Whitespace, 39, 40),
(Word, 40, 41),
(Colon, 41, 42),
(Whitespace, 42, 43),
(Brace, 43, 44),
(Whitespace, 44, 45),
(Word, 45, 46),
(Colon, 46, 47),
(Whitespace, 47, 48),
(String, 48, 53),
(Whitespace, 53, 54),
(Brace, 54, 55),
(Whitespace, 55, 56),
(Brace, 56, 57),
(Whitespace, 57, 58),
(Brace, 58, 59),
(Whitespace, 59, 61),
(Keyword, 61, 66),
(Whitespace, 66, 67),
(Word, 67, 70),
(Whitespace, 70, 71),
(Operator, 71, 72),
(Whitespace, 72, 73),
(String, 73, 76),
(Whitespace, 76, 77),
(Keyword, 77, 82),
(Whitespace, 82, 83),
(Word, 83, 89),
(Whitespace, 89, 90),
(Operator, 90, 91),
(Whitespace, 91, 92),
(String, 92, 100),
(Whitespace, 100, 102),
(LineComment, 102, 127),
],
actual.as_slice(),
);
}
#[test]
fn test_program4() {
let program = "const myArray = [0..10]";
let module_id = ModuleId::from_usize(1);
let actual = lex(program, module_id).unwrap();
use TokenType::*;
assert_tokens(
&[
(Keyword, 0, 5),
(Word, 6, 13),
(Operator, 14, 15),
(Brace, 16, 17),
(Number, 17, 18),
(DoublePeriod, 18, 20),
(Number, 20, 22),
(Brace, 22, 23),
],
actual.as_slice(),
);
}
#[test]
fn test_lexer_negative_word() {
let module_id = ModuleId::from_usize(1);
let actual = lex("-legX", module_id).unwrap();
use TokenType::*;
assert_tokens(&[(Operator, 0, 1), (Word, 1, 5)], actual.as_slice());
}
#[test]
fn not_eq() {
let module_id = ModuleId::from_usize(1);
let actual = lex("!=", module_id).unwrap();
let expected = vec![Token {
token_type: TokenType::Operator,
value: "!=".to_owned(),
start: 0,
end: 2,
module_id,
}];
assert_eq!(actual.tokens, expected);
}
#[test]
fn test_unrecognized_token() {
let module_id = ModuleId::from_usize(1);
let actual = lex("12 ; 8", module_id).unwrap();
use TokenType::*;
assert_tokens(&[(Number, 0, 2), (Unknown, 3, 4), (Number, 5, 6)], actual.as_slice());
}
#[test]
fn import_keyword() {
let module_id = ModuleId::from_usize(1);
let actual = lex("import foo", module_id).unwrap();
let expected = Token {
token_type: TokenType::Keyword,
value: "import".to_owned(),
start: 0,
end: 6,
module_id,
};
assert_eq!(actual.tokens[0], expected);
}
#[test]
fn import_function() {
let module_id = ModuleId::from_usize(1);
let actual = lex("import(3)", module_id).unwrap();
let expected = Token {
token_type: TokenType::Word,
value: "import".to_owned(),
start: 0,
end: 6,
module_id,
};
assert_eq!(actual.tokens[0], expected);
}
#[test]
fn test_is_code_token() {
let module_id = ModuleId::default();
let actual = lex("foo (4/* comment */ +,2,\"sdfsdf\") // comment", module_id).unwrap();
let non_code = [1, 4, 5, 12, 13];
for i in 0..14 {
if non_code.contains(&i) {
assert!(
!actual.tokens[i].is_code_token(),
"failed test {i}: {:?}",
&actual.tokens[i],
);
} else {
assert!(
actual.tokens[i].is_code_token(),
"failed test {i}: {:?}",
&actual.tokens[i],
);
}
}
}
#[test]
fn test_boolean_literal() {
let module_id = ModuleId::default();
let actual = lex("true", module_id).unwrap();
let expected = Token {
token_type: TokenType::Keyword,
value: "true".to_owned(),
start: 0,
end: 4,
module_id,
};
assert_eq!(actual.tokens[0], expected);
}
#[test]
fn test_word_starting_with_keyword() {
let module_id = ModuleId::default();
let actual = lex("truee", module_id).unwrap();
let expected = Token {
token_type: TokenType::Word,
value: "truee".to_owned(),
start: 0,
end: 5,
module_id,
};
assert_eq!(actual.tokens[0], expected);
}
}