// Clippy does not agree with rustc here for some reason. #![allow(clippy::needless_lifetimes)] use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr}; use anyhow::Result; use parse_display::Display; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use tokeniser::Input; use tower_lsp::lsp_types::SemanticTokenType; use winnow::{ self, error::ParseError, stream::{ContainsToken, Stream}, }; use crate::{ errors::KclError, parsing::ast::types::{ItemVisibility, VariableKind}, source_range::SourceRange, CompilationError, ModuleId, }; mod tokeniser; pub(crate) use tokeniser::RESERVED_WORDS; // Note the ordering, it's important that `m` comes after `mm` and `cm`. pub const NUM_SUFFIXES: [&str; 10] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad", "?"]; #[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)] #[repr(u32)] pub enum NumericSuffix { None, Count, Length, Angle, Mm, Cm, M, Inch, Ft, Yd, Deg, Rad, Unknown, } impl NumericSuffix { #[allow(dead_code)] pub fn is_none(self) -> bool { self == Self::None } pub fn is_some(self) -> bool { self != Self::None } pub fn digestable_id(&self) -> &[u8] { match self { NumericSuffix::None => &[], NumericSuffix::Count => b"_", NumericSuffix::Unknown => b"?", NumericSuffix::Length => b"Length", NumericSuffix::Angle => b"Angle", NumericSuffix::Mm => b"mm", NumericSuffix::Cm => b"cm", NumericSuffix::M => b"m", NumericSuffix::Inch => b"in", NumericSuffix::Ft => b"ft", NumericSuffix::Yd => b"yd", NumericSuffix::Deg => b"deg", NumericSuffix::Rad => b"rad", } } } impl FromStr for NumericSuffix { type Err = CompilationError; fn from_str(s: &str) -> Result { match s { "_" | "Count" => Ok(NumericSuffix::Count), "Length" => Ok(NumericSuffix::Length), "Angle" => Ok(NumericSuffix::Angle), "mm" | "millimeters" => Ok(NumericSuffix::Mm), "cm" | "centimeters" => Ok(NumericSuffix::Cm), "m" | "meters" => Ok(NumericSuffix::M), "inch" | "in" => Ok(NumericSuffix::Inch), "ft" | "feet" => Ok(NumericSuffix::Ft), "yd" | "yards" => Ok(NumericSuffix::Yd), "deg" | "degrees" => Ok(NumericSuffix::Deg), "rad" | "radians" => Ok(NumericSuffix::Rad), "?" => Ok(NumericSuffix::Unknown), _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")), } } } impl fmt::Display for NumericSuffix { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { NumericSuffix::None => Ok(()), NumericSuffix::Count => write!(f, "_"), NumericSuffix::Unknown => write!(f, "_?"), NumericSuffix::Length => write!(f, "Length"), NumericSuffix::Angle => write!(f, "Angle"), NumericSuffix::Mm => write!(f, "mm"), NumericSuffix::Cm => write!(f, "cm"), NumericSuffix::M => write!(f, "m"), NumericSuffix::Inch => write!(f, "in"), NumericSuffix::Ft => write!(f, "ft"), NumericSuffix::Yd => write!(f, "yd"), NumericSuffix::Deg => write!(f, "deg"), NumericSuffix::Rad => write!(f, "rad"), } } } #[derive(Clone, Debug, PartialEq)] pub(crate) struct TokenStream { tokens: Vec, } impl TokenStream { fn new(tokens: Vec) -> Self { Self { tokens } } pub(super) fn remove_unknown(&mut self) -> Vec { let tokens = std::mem::take(&mut self.tokens); let (tokens, unknown_tokens): (Vec, Vec) = tokens .into_iter() .partition(|token| token.token_type != TokenType::Unknown); self.tokens = tokens; unknown_tokens } pub fn iter(&self) -> impl Iterator { self.tokens.iter() } pub fn is_empty(&self) -> bool { self.tokens.is_empty() } pub fn as_slice(&self) -> TokenSlice { TokenSlice::from(self) } } impl<'a> From<&'a TokenStream> for TokenSlice<'a> { fn from(stream: &'a TokenStream) -> Self { TokenSlice { start: 0, end: stream.tokens.len(), stream, } } } impl IntoIterator for TokenStream { type Item = Token; type IntoIter = std::vec::IntoIter; fn into_iter(self) -> Self::IntoIter { self.tokens.into_iter() } } #[derive(Debug, Clone)] pub(crate) struct TokenSlice<'a> { stream: &'a TokenStream, /// Current position of the leading Token in the stream start: usize, /// The number of total Tokens in the stream end: usize, } impl<'a> std::ops::Deref for TokenSlice<'a> { type Target = [Token]; fn deref(&self) -> &Self::Target { &self.stream.tokens[self.start..self.end] } } impl<'a> TokenSlice<'a> { pub fn token(&self, i: usize) -> &Token { &self.stream.tokens[i + self.start] } pub fn iter(&self) -> impl Iterator { (**self).iter() } pub fn without_ends(&self) -> Self { Self { start: self.start + 1, end: self.end - 1, stream: self.stream, } } pub fn as_source_range(&self) -> SourceRange { let stream_len = self.stream.tokens.len(); let first_token = if stream_len == self.start { &self.stream.tokens[self.start - 1] } else { self.token(0) }; let last_token = if stream_len == self.end { &self.stream.tokens[stream_len - 1] } else { self.token(self.end - self.start) }; SourceRange::new(first_token.start, last_token.end, last_token.module_id) } } impl<'a> IntoIterator for TokenSlice<'a> { type Item = &'a Token; type IntoIter = std::slice::Iter<'a, Token>; fn into_iter(self) -> Self::IntoIter { self.stream.tokens[self.start..self.end].iter() } } impl<'a> Stream for TokenSlice<'a> { type Token = Token; type Slice = Self; type IterOffsets = Enumerate>; type Checkpoint = Checkpoint; fn iter_offsets(&self) -> Self::IterOffsets { #[allow(clippy::unnecessary_to_owned)] self.to_vec().into_iter().enumerate() } fn eof_offset(&self) -> usize { self.len() } fn next_token(&mut self) -> Option { let token = self.first()?.clone(); self.start += 1; Some(token) } fn offset_for

(&self, predicate: P) -> Option where P: Fn(Self::Token) -> bool, { self.iter().position(|b| predicate(b.clone())) } fn offset_at(&self, tokens: usize) -> Result { if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) { Err(winnow::error::Needed::Size(needed)) } else { Ok(tokens) } } fn next_slice(&mut self, offset: usize) -> Self::Slice { assert!(self.start + offset <= self.end); let next = TokenSlice { stream: self.stream, start: self.start, end: self.start + offset, }; self.start += offset; next } fn checkpoint(&self) -> Self::Checkpoint { Checkpoint(self.start, self.end) } fn reset(&mut self, checkpoint: &Self::Checkpoint) { self.start = checkpoint.0; self.end = checkpoint.1; } fn raw(&self) -> &dyn fmt::Debug { self } } impl<'a> winnow::stream::Offset for TokenSlice<'a> { fn offset_from(&self, start: &Self) -> usize { self.start - start.start } } impl<'a> winnow::stream::Offset for TokenSlice<'a> { fn offset_from(&self, start: &Checkpoint) -> usize { self.start - start.0 } } impl winnow::stream::Offset for Checkpoint { fn offset_from(&self, start: &Self) -> usize { self.0 - start.0 } } impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> { type PartialState = (); fn complete(&mut self) -> Self::PartialState {} fn restore_partial(&mut self, _: Self::PartialState) {} fn is_partial_supported() -> bool { false } } impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> { fn find_slice(&self, substr: &str) -> Option> { self.iter() .enumerate() .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None }) } } #[derive(Clone, Debug)] pub struct Checkpoint(usize, usize); /// The types of tokens. #[derive(Debug, PartialEq, Eq, Copy, Clone, Display)] #[display(style = "camelCase")] pub enum TokenType { /// A number. Number, /// A word. Word, /// An operator. Operator, /// A string. String, /// A keyword. Keyword, /// A type. Type, /// A brace. Brace, /// A hash. Hash, /// A bang. Bang, /// A dollar sign. Dollar, /// Whitespace. Whitespace, /// A comma. Comma, /// A colon. Colon, /// A double colon: `::` DoubleColon, /// A period. Period, /// A double period: `..`. DoublePeriod, /// A double period and a less than: `..<`. DoublePeriodLessThan, /// A line comment. LineComment, /// A block comment. BlockComment, /// A function name. Function, /// Unknown lexemes. Unknown, /// The ? symbol, used for optional values. QuestionMark, /// The @ symbol. At, /// `;` SemiColon, } /// Most KCL tokens correspond to LSP semantic tokens (but not all). impl TryFrom for SemanticTokenType { type Error = anyhow::Error; fn try_from(token_type: TokenType) -> Result { // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES` // in the LSP implementation. Ok(match token_type { TokenType::Number => Self::NUMBER, TokenType::Word => Self::VARIABLE, TokenType::Keyword => Self::KEYWORD, TokenType::Type => Self::TYPE, TokenType::Operator => Self::OPERATOR, TokenType::QuestionMark => Self::OPERATOR, TokenType::String => Self::STRING, TokenType::Bang => Self::OPERATOR, TokenType::LineComment => Self::COMMENT, TokenType::BlockComment => Self::COMMENT, TokenType::Function => Self::FUNCTION, TokenType::Whitespace | TokenType::Brace | TokenType::Comma | TokenType::Colon | TokenType::DoubleColon | TokenType::Period | TokenType::DoublePeriod | TokenType::DoublePeriodLessThan | TokenType::Hash | TokenType::Dollar | TokenType::At | TokenType::SemiColon | TokenType::Unknown => { anyhow::bail!("unsupported token type: {:?}", token_type) } }) } } impl TokenType { pub fn is_whitespace(&self) -> bool { matches!(self, Self::Whitespace) } pub fn is_comment(&self) -> bool { matches!(self, Self::LineComment | Self::BlockComment) } } #[derive(Debug, PartialEq, Eq, Clone)] pub struct Token { pub token_type: TokenType, /// Offset in the source code where this token begins. pub start: usize, /// Offset in the source code where this token ends. pub end: usize, pub(super) module_id: ModuleId, pub(super) value: String, } impl ContainsToken for (TokenType, &str) { fn contains_token(&self, token: Token) -> bool { self.0 == token.token_type && self.1 == token.value } } impl ContainsToken for TokenType { fn contains_token(&self, token: Token) -> bool { *self == token.token_type } } impl Token { pub fn from_range( range: std::ops::Range, module_id: ModuleId, token_type: TokenType, value: String, ) -> Self { Self { start: range.start, end: range.end, module_id, value, token_type, } } pub fn is_code_token(&self) -> bool { !matches!( self.token_type, TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment ) } pub fn as_source_range(&self) -> SourceRange { SourceRange::new(self.start, self.end, self.module_id) } pub fn as_source_ranges(&self) -> Vec { vec![self.as_source_range()] } pub fn visibility_keyword(&self) -> Option { if !matches!(self.token_type, TokenType::Keyword) { return None; } match self.value.as_str() { "export" => Some(ItemVisibility::Export), _ => None, } } pub fn numeric_value(&self) -> Option { if self.token_type != TokenType::Number { return None; } let value = &self.value; let value = value .split_once(|c: char| c == '_' || c.is_ascii_alphabetic()) .map(|(s, _)| s) .unwrap_or(value); value.parse().ok() } pub fn uint_value(&self) -> Option { if self.token_type != TokenType::Number { return None; } let value = &self.value; let value = value .split_once(|c: char| c == '_' || c.is_ascii_alphabetic()) .map(|(s, _)| s) .unwrap_or(value); value.parse().ok() } pub fn numeric_suffix(&self) -> NumericSuffix { if self.token_type != TokenType::Number { return NumericSuffix::None; } if self.value.ends_with('_') { return NumericSuffix::Count; } for suffix in NUM_SUFFIXES { if self.value.ends_with(suffix) { return suffix.parse().unwrap(); } } NumericSuffix::None } /// Is this token the beginning of a variable/function declaration? /// If so, what kind? /// If not, returns None. pub fn declaration_keyword(&self) -> Option { if !matches!(self.token_type, TokenType::Keyword) { return None; } Some(match self.value.as_str() { "fn" => VariableKind::Fn, "var" | "let" | "const" => VariableKind::Const, _ => return None, }) } } impl From for SourceRange { fn from(token: Token) -> Self { Self::new(token.start, token.end, token.module_id) } } impl From<&Token> for SourceRange { fn from(token: &Token) -> Self { Self::new(token.start, token.end, token.module_id) } } pub fn lex(s: &str, module_id: ModuleId) -> Result { tokeniser::lex(s, module_id).map_err(From::from) } impl From, winnow::error::ContextError>> for KclError { fn from(err: ParseError, winnow::error::ContextError>) -> Self { let (input, offset): (Vec, usize) = (err.input().chars().collect(), err.offset()); let module_id = err.input().state.module_id; if offset >= input.len() { // From the winnow docs: // // This is an offset, not an index, and may point to // the end of input (input.len()) on eof errors. return KclError::Lexical(crate::errors::KclErrorDetails::new( "unexpected EOF while parsing".to_owned(), vec![SourceRange::new(offset, offset, module_id)], )); } // TODO: Add the Winnow tokenizer context to the error. // See https://github.com/KittyCAD/modeling-app/issues/784 let bad_token = &input[offset]; // TODO: Add the Winnow parser context to the error. // See https://github.com/KittyCAD/modeling-app/issues/784 KclError::Lexical(crate::errors::KclErrorDetails::new( format!("found unknown token '{}'", bad_token), vec![SourceRange::new(offset, offset + 1, module_id)], )) } }