Move the wasm lib, and cleanup rust directory and all references (#5585)
* git mv src/wasm-lib rust Signed-off-by: Jess Frazelle <github@jessfraz.com> * mv wasm-lib to workspace Signed-off-by: Jess Frazelle <github@jessfraz.com> * mv kcl-lib Signed-off-by: Jess Frazelle <github@jessfraz.com> * mv derive docs Signed-off-by: Jess Frazelle <github@jessfraz.com> * resolve file paths Signed-off-by: Jess Frazelle <github@jessfraz.com> * clippy Signed-off-by: Jess Frazelle <github@jessfraz.com> * move more shit Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix more paths Signed-off-by: Jess Frazelle <github@jessfraz.com> * make yarn build:wasm work Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix scripts Signed-off-by: Jess Frazelle <github@jessfraz.com> * fixups Signed-off-by: Jess Frazelle <github@jessfraz.com> * better references Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix cargo ci Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix reference Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix more ci Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix tests Signed-off-by: Jess Frazelle <github@jessfraz.com> * cargo sort Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix script Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix Signed-off-by: Jess Frazelle <github@jessfraz.com> * fmt Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix a dep Signed-off-by: Jess Frazelle <github@jessfraz.com> * sort Signed-off-by: Jess Frazelle <github@jessfraz.com> * remove unused deps Signed-off-by: Jess Frazelle <github@jessfraz.com> * Revert "remove unused deps" This reverts commit fbabdb062e275fd5cbc1476f8480a1afee15d972. * updates Signed-off-by: Jess Frazelle <github@jessfraz.com> * deps; Signed-off-by: Jess Frazelle <github@jessfraz.com> * fixes Signed-off-by: Jess Frazelle <github@jessfraz.com> * updates Signed-off-by: Jess Frazelle <github@jessfraz.com> --------- Signed-off-by: Jess Frazelle <github@jessfraz.com>
This commit is contained in:
543
rust/kcl-lib/src/parsing/token/mod.rs
Normal file
543
rust/kcl-lib/src/parsing/token/mod.rs
Normal file
@ -0,0 +1,543 @@
|
||||
// Clippy does not agree with rustc here for some reason.
|
||||
#![allow(clippy::needless_lifetimes)]
|
||||
|
||||
use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
|
||||
|
||||
use anyhow::Result;
|
||||
use parse_display::Display;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokeniser::Input;
|
||||
use tower_lsp::lsp_types::SemanticTokenType;
|
||||
use winnow::{
|
||||
self,
|
||||
error::ParseError,
|
||||
stream::{ContainsToken, Stream},
|
||||
};
|
||||
|
||||
use crate::{
|
||||
errors::KclError,
|
||||
parsing::ast::types::{ItemVisibility, VariableKind},
|
||||
source_range::SourceRange,
|
||||
CompilationError, ModuleId,
|
||||
};
|
||||
|
||||
mod tokeniser;
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) use tokeniser::RESERVED_WORDS;
|
||||
|
||||
// Note the ordering, it's important that `m` comes after `mm` and `cm`.
|
||||
pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
|
||||
#[repr(u32)]
|
||||
pub enum NumericSuffix {
|
||||
None,
|
||||
Count,
|
||||
Mm,
|
||||
Cm,
|
||||
M,
|
||||
Inch,
|
||||
Ft,
|
||||
Yd,
|
||||
Deg,
|
||||
Rad,
|
||||
}
|
||||
|
||||
impl NumericSuffix {
|
||||
#[allow(dead_code)]
|
||||
pub fn is_none(self) -> bool {
|
||||
self == Self::None
|
||||
}
|
||||
|
||||
pub fn is_some(self) -> bool {
|
||||
self != Self::None
|
||||
}
|
||||
|
||||
pub fn digestable_id(&self) -> &[u8] {
|
||||
match self {
|
||||
NumericSuffix::None => &[],
|
||||
NumericSuffix::Count => b"_",
|
||||
NumericSuffix::Mm => b"mm",
|
||||
NumericSuffix::Cm => b"cm",
|
||||
NumericSuffix::M => b"m",
|
||||
NumericSuffix::Inch => b"in",
|
||||
NumericSuffix::Ft => b"ft",
|
||||
NumericSuffix::Yd => b"yd",
|
||||
NumericSuffix::Deg => b"deg",
|
||||
NumericSuffix::Rad => b"rad",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for NumericSuffix {
|
||||
type Err = CompilationError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"_" => Ok(NumericSuffix::Count),
|
||||
"mm" | "millimeters" => Ok(NumericSuffix::Mm),
|
||||
"cm" | "centimeters" => Ok(NumericSuffix::Cm),
|
||||
"m" | "meters" => Ok(NumericSuffix::M),
|
||||
"inch" | "in" => Ok(NumericSuffix::Inch),
|
||||
"ft" | "feet" => Ok(NumericSuffix::Ft),
|
||||
"yd" | "yards" => Ok(NumericSuffix::Yd),
|
||||
"deg" | "degrees" => Ok(NumericSuffix::Deg),
|
||||
"rad" | "radians" => Ok(NumericSuffix::Rad),
|
||||
_ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for NumericSuffix {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
NumericSuffix::None => Ok(()),
|
||||
NumericSuffix::Count => write!(f, "_"),
|
||||
NumericSuffix::Mm => write!(f, "mm"),
|
||||
NumericSuffix::Cm => write!(f, "cm"),
|
||||
NumericSuffix::M => write!(f, "m"),
|
||||
NumericSuffix::Inch => write!(f, "in"),
|
||||
NumericSuffix::Ft => write!(f, "ft"),
|
||||
NumericSuffix::Yd => write!(f, "yd"),
|
||||
NumericSuffix::Deg => write!(f, "deg"),
|
||||
NumericSuffix::Rad => write!(f, "rad"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub(crate) struct TokenStream {
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl TokenStream {
|
||||
fn new(tokens: Vec<Token>) -> Self {
|
||||
Self { tokens }
|
||||
}
|
||||
|
||||
pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
|
||||
let tokens = std::mem::take(&mut self.tokens);
|
||||
let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
|
||||
.into_iter()
|
||||
.partition(|token| token.token_type != TokenType::Unknown);
|
||||
self.tokens = tokens;
|
||||
unknown_tokens
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Token> {
|
||||
self.tokens.iter()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.tokens.is_empty()
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> TokenSlice {
|
||||
TokenSlice::from(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
|
||||
fn from(stream: &'a TokenStream) -> Self {
|
||||
TokenSlice {
|
||||
start: 0,
|
||||
end: stream.tokens.len(),
|
||||
stream,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for TokenStream {
|
||||
type Item = Token;
|
||||
|
||||
type IntoIter = std::vec::IntoIter<Token>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.tokens.into_iter()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct TokenSlice<'a> {
|
||||
stream: &'a TokenStream,
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl<'a> std::ops::Deref for TokenSlice<'a> {
|
||||
type Target = [Token];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.stream.tokens[self.start..self.end]
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> TokenSlice<'a> {
|
||||
pub fn token(&self, i: usize) -> &Token {
|
||||
&self.stream.tokens[i + self.start]
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Token> {
|
||||
(**self).iter()
|
||||
}
|
||||
|
||||
pub fn without_ends(&self) -> Self {
|
||||
Self {
|
||||
start: self.start + 1,
|
||||
end: self.end - 1,
|
||||
stream: self.stream,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for TokenSlice<'a> {
|
||||
type Item = &'a Token;
|
||||
|
||||
type IntoIter = std::slice::Iter<'a, Token>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.stream.tokens[self.start..self.end].iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Stream for TokenSlice<'a> {
|
||||
type Token = Token;
|
||||
type Slice = Self;
|
||||
type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
|
||||
type Checkpoint = Checkpoint;
|
||||
|
||||
fn iter_offsets(&self) -> Self::IterOffsets {
|
||||
#[allow(clippy::unnecessary_to_owned)]
|
||||
self.to_vec().into_iter().enumerate()
|
||||
}
|
||||
|
||||
fn eof_offset(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn next_token(&mut self) -> Option<Self::Token> {
|
||||
let token = self.first()?.clone();
|
||||
self.start += 1;
|
||||
Some(token)
|
||||
}
|
||||
|
||||
fn offset_for<P>(&self, predicate: P) -> Option<usize>
|
||||
where
|
||||
P: Fn(Self::Token) -> bool,
|
||||
{
|
||||
self.iter().position(|b| predicate(b.clone()))
|
||||
}
|
||||
|
||||
fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
|
||||
if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
|
||||
Err(winnow::error::Needed::Size(needed))
|
||||
} else {
|
||||
Ok(tokens)
|
||||
}
|
||||
}
|
||||
|
||||
fn next_slice(&mut self, offset: usize) -> Self::Slice {
|
||||
assert!(self.start + offset <= self.end);
|
||||
|
||||
let next = TokenSlice {
|
||||
stream: self.stream,
|
||||
start: self.start,
|
||||
end: self.start + offset,
|
||||
};
|
||||
self.start += offset;
|
||||
next
|
||||
}
|
||||
|
||||
fn checkpoint(&self) -> Self::Checkpoint {
|
||||
Checkpoint(self.start, self.end)
|
||||
}
|
||||
|
||||
fn reset(&mut self, checkpoint: &Self::Checkpoint) {
|
||||
self.start = checkpoint.0;
|
||||
self.end = checkpoint.1;
|
||||
}
|
||||
|
||||
fn raw(&self) -> &dyn fmt::Debug {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> winnow::stream::Offset for TokenSlice<'a> {
|
||||
fn offset_from(&self, start: &Self) -> usize {
|
||||
self.start - start.start
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
|
||||
fn offset_from(&self, start: &Checkpoint) -> usize {
|
||||
self.start - start.0
|
||||
}
|
||||
}
|
||||
|
||||
impl winnow::stream::Offset for Checkpoint {
|
||||
fn offset_from(&self, start: &Self) -> usize {
|
||||
self.0 - start.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
|
||||
type PartialState = ();
|
||||
|
||||
fn complete(&mut self) -> Self::PartialState {}
|
||||
|
||||
fn restore_partial(&mut self, _: Self::PartialState) {}
|
||||
|
||||
fn is_partial_supported() -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Checkpoint(usize, usize);
|
||||
|
||||
/// The types of tokens.
|
||||
#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
|
||||
#[display(style = "camelCase")]
|
||||
pub enum TokenType {
|
||||
/// A number.
|
||||
Number,
|
||||
/// A word.
|
||||
Word,
|
||||
/// An operator.
|
||||
Operator,
|
||||
/// A string.
|
||||
String,
|
||||
/// A keyword.
|
||||
Keyword,
|
||||
/// A type.
|
||||
Type,
|
||||
/// A brace.
|
||||
Brace,
|
||||
/// A hash.
|
||||
Hash,
|
||||
/// A bang.
|
||||
Bang,
|
||||
/// A dollar sign.
|
||||
Dollar,
|
||||
/// Whitespace.
|
||||
Whitespace,
|
||||
/// A comma.
|
||||
Comma,
|
||||
/// A colon.
|
||||
Colon,
|
||||
/// A period.
|
||||
Period,
|
||||
/// A double period: `..`.
|
||||
DoublePeriod,
|
||||
/// A line comment.
|
||||
LineComment,
|
||||
/// A block comment.
|
||||
BlockComment,
|
||||
/// A function name.
|
||||
Function,
|
||||
/// Unknown lexemes.
|
||||
Unknown,
|
||||
/// The ? symbol, used for optional values.
|
||||
QuestionMark,
|
||||
/// The @ symbol.
|
||||
At,
|
||||
}
|
||||
|
||||
/// Most KCL tokens correspond to LSP semantic tokens (but not all).
|
||||
impl TryFrom<TokenType> for SemanticTokenType {
|
||||
type Error = anyhow::Error;
|
||||
fn try_from(token_type: TokenType) -> Result<Self> {
|
||||
// If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
|
||||
// in the LSP implementation.
|
||||
Ok(match token_type {
|
||||
TokenType::Number => Self::NUMBER,
|
||||
TokenType::Word => Self::VARIABLE,
|
||||
TokenType::Keyword => Self::KEYWORD,
|
||||
TokenType::Type => Self::TYPE,
|
||||
TokenType::Operator => Self::OPERATOR,
|
||||
TokenType::QuestionMark => Self::OPERATOR,
|
||||
TokenType::String => Self::STRING,
|
||||
TokenType::Bang => Self::OPERATOR,
|
||||
TokenType::LineComment => Self::COMMENT,
|
||||
TokenType::BlockComment => Self::COMMENT,
|
||||
TokenType::Function => Self::FUNCTION,
|
||||
TokenType::Whitespace
|
||||
| TokenType::Brace
|
||||
| TokenType::Comma
|
||||
| TokenType::Colon
|
||||
| TokenType::Period
|
||||
| TokenType::DoublePeriod
|
||||
| TokenType::Hash
|
||||
| TokenType::Dollar
|
||||
| TokenType::At
|
||||
| TokenType::Unknown => {
|
||||
anyhow::bail!("unsupported token type: {:?}", token_type)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenType {
|
||||
pub fn is_whitespace(&self) -> bool {
|
||||
matches!(self, Self::Whitespace)
|
||||
}
|
||||
|
||||
pub fn is_comment(&self) -> bool {
|
||||
matches!(self, Self::LineComment | Self::BlockComment)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct Token {
|
||||
pub token_type: TokenType,
|
||||
/// Offset in the source code where this token begins.
|
||||
pub start: usize,
|
||||
/// Offset in the source code where this token ends.
|
||||
pub end: usize,
|
||||
pub(super) module_id: ModuleId,
|
||||
pub(super) value: String,
|
||||
}
|
||||
|
||||
impl ContainsToken<Token> for (TokenType, &str) {
|
||||
fn contains_token(&self, token: Token) -> bool {
|
||||
self.0 == token.token_type && self.1 == token.value
|
||||
}
|
||||
}
|
||||
|
||||
impl ContainsToken<Token> for TokenType {
|
||||
fn contains_token(&self, token: Token) -> bool {
|
||||
*self == token.token_type
|
||||
}
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn from_range(
|
||||
range: std::ops::Range<usize>,
|
||||
module_id: ModuleId,
|
||||
token_type: TokenType,
|
||||
value: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
module_id,
|
||||
value,
|
||||
token_type,
|
||||
}
|
||||
}
|
||||
pub fn is_code_token(&self) -> bool {
|
||||
!matches!(
|
||||
self.token_type,
|
||||
TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
|
||||
)
|
||||
}
|
||||
|
||||
pub fn as_source_range(&self) -> SourceRange {
|
||||
SourceRange::new(self.start, self.end, self.module_id)
|
||||
}
|
||||
|
||||
pub fn as_source_ranges(&self) -> Vec<SourceRange> {
|
||||
vec![self.as_source_range()]
|
||||
}
|
||||
|
||||
pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
|
||||
if !matches!(self.token_type, TokenType::Keyword) {
|
||||
return None;
|
||||
}
|
||||
match self.value.as_str() {
|
||||
"export" => Some(ItemVisibility::Export),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn numeric_value(&self) -> Option<f64> {
|
||||
if self.token_type != TokenType::Number {
|
||||
return None;
|
||||
}
|
||||
let value = &self.value;
|
||||
let value = value
|
||||
.split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
|
||||
.map(|(s, _)| s)
|
||||
.unwrap_or(value);
|
||||
value.parse().ok()
|
||||
}
|
||||
|
||||
pub fn numeric_suffix(&self) -> NumericSuffix {
|
||||
if self.token_type != TokenType::Number {
|
||||
return NumericSuffix::None;
|
||||
}
|
||||
|
||||
if self.value.ends_with('_') {
|
||||
return NumericSuffix::Count;
|
||||
}
|
||||
|
||||
for suffix in NUM_SUFFIXES {
|
||||
if self.value.ends_with(suffix) {
|
||||
return suffix.parse().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
NumericSuffix::None
|
||||
}
|
||||
|
||||
/// Is this token the beginning of a variable/function declaration?
|
||||
/// If so, what kind?
|
||||
/// If not, returns None.
|
||||
pub fn declaration_keyword(&self) -> Option<VariableKind> {
|
||||
if !matches!(self.token_type, TokenType::Keyword) {
|
||||
return None;
|
||||
}
|
||||
Some(match self.value.as_str() {
|
||||
"fn" => VariableKind::Fn,
|
||||
"var" | "let" | "const" => VariableKind::Const,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Token> for SourceRange {
|
||||
fn from(token: Token) -> Self {
|
||||
Self::new(token.start, token.end, token.module_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Token> for SourceRange {
|
||||
fn from(token: &Token) -> Self {
|
||||
Self::new(token.start, token.end, token.module_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
|
||||
tokeniser::lex(s, module_id).map_err(From::from)
|
||||
}
|
||||
|
||||
impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
|
||||
fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
|
||||
let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
|
||||
let module_id = err.input().state.module_id;
|
||||
|
||||
if offset >= input.len() {
|
||||
// From the winnow docs:
|
||||
//
|
||||
// This is an offset, not an index, and may point to
|
||||
// the end of input (input.len()) on eof errors.
|
||||
|
||||
return KclError::Lexical(crate::errors::KclErrorDetails {
|
||||
source_ranges: vec![SourceRange::new(offset, offset, module_id)],
|
||||
message: "unexpected EOF while parsing".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: Add the Winnow tokenizer context to the error.
|
||||
// See https://github.com/KittyCAD/modeling-app/issues/784
|
||||
let bad_token = &input[offset];
|
||||
// TODO: Add the Winnow parser context to the error.
|
||||
// See https://github.com/KittyCAD/modeling-app/issues/784
|
||||
KclError::Lexical(crate::errors::KclErrorDetails {
|
||||
source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
|
||||
message: format!("found unknown token '{}'", bad_token),
|
||||
})
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user