Move the wasm lib, and cleanup rust directory and all references (#5585)

* git mv src/wasm-lib rust Signed-off-by: Jess Frazelle <github@jessfraz.com> * mv wasm-lib to workspace Signed-off-by: Jess Frazelle <github@jessfraz.com> * mv kcl-lib Signed-off-by: Jess Frazelle <github@jessfraz.com> * mv derive docs Signed-off-by: Jess Frazelle <github@jessfraz.com> * resolve file paths Signed-off-by: Jess Frazelle <github@jessfraz.com> * clippy Signed-off-by: Jess Frazelle <github@jessfraz.com> * move more shit Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix more paths Signed-off-by: Jess Frazelle <github@jessfraz.com> * make yarn build:wasm work Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix scripts Signed-off-by: Jess Frazelle <github@jessfraz.com> * fixups Signed-off-by: Jess Frazelle <github@jessfraz.com> * better references Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix cargo ci Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix reference Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix more ci Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix tests Signed-off-by: Jess Frazelle <github@jessfraz.com> * cargo sort Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix script Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix Signed-off-by: Jess Frazelle <github@jessfraz.com> * fmt Signed-off-by: Jess Frazelle <github@jessfraz.com> * fix a dep Signed-off-by: Jess Frazelle <github@jessfraz.com> * sort Signed-off-by: Jess Frazelle <github@jessfraz.com> * remove unused deps Signed-off-by: Jess Frazelle <github@jessfraz.com> * Revert "remove unused deps" This reverts commit fbabdb062e275fd5cbc1476f8480a1afee15d972. * updates Signed-off-by: Jess Frazelle <github@jessfraz.com> * deps; Signed-off-by: Jess Frazelle <github@jessfraz.com> * fixes Signed-off-by: Jess Frazelle <github@jessfraz.com> * updates Signed-off-by: Jess Frazelle <github@jessfraz.com> --------- Signed-off-by: Jess Frazelle <github@jessfraz.com>
2025-03-01 13:59:01 -08:00
parent 0a2bf4b55f
commit c3bdc6f106
1443 changed files with 509 additions and 4274 deletions
--- a/rust/kcl-lib/src/parsing/token/mod.rs
+++ b/rust/kcl-lib/src/parsing/token/mod.rs
@ -0,0 +1,543 @@
+// Clippy does not agree with rustc here for some reason.
+#![allow(clippy::needless_lifetimes)]
+
+use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
+
+use anyhow::Result;
+use parse_display::Display;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use tokeniser::Input;
+use tower_lsp::lsp_types::SemanticTokenType;
+use winnow::{
+    self,
+    error::ParseError,
+    stream::{ContainsToken, Stream},
+};
+
+use crate::{
+    errors::KclError,
+    parsing::ast::types::{ItemVisibility, VariableKind},
+    source_range::SourceRange,
+    CompilationError, ModuleId,
+};
+
+mod tokeniser;
+
+#[cfg(test)]
+pub(crate) use tokeniser::RESERVED_WORDS;
+
+// Note the ordering, it's important that `m` comes after `mm` and `cm`.
+pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
+#[repr(u32)]
+pub enum NumericSuffix {
+    None,
+    Count,
+    Mm,
+    Cm,
+    M,
+    Inch,
+    Ft,
+    Yd,
+    Deg,
+    Rad,
+}
+
+impl NumericSuffix {
+    #[allow(dead_code)]
+    pub fn is_none(self) -> bool {
+        self == Self::None
+    }
+
+    pub fn is_some(self) -> bool {
+        self != Self::None
+    }
+
+    pub fn digestable_id(&self) -> &[u8] {
+        match self {
+            NumericSuffix::None => &[],
+            NumericSuffix::Count => b"_",
+            NumericSuffix::Mm => b"mm",
+            NumericSuffix::Cm => b"cm",
+            NumericSuffix::M => b"m",
+            NumericSuffix::Inch => b"in",
+            NumericSuffix::Ft => b"ft",
+            NumericSuffix::Yd => b"yd",
+            NumericSuffix::Deg => b"deg",
+            NumericSuffix::Rad => b"rad",
+        }
+    }
+}
+
+impl FromStr for NumericSuffix {
+    type Err = CompilationError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "_" => Ok(NumericSuffix::Count),
+            "mm" | "millimeters" => Ok(NumericSuffix::Mm),
+            "cm" | "centimeters" => Ok(NumericSuffix::Cm),
+            "m" | "meters" => Ok(NumericSuffix::M),
+            "inch" | "in" => Ok(NumericSuffix::Inch),
+            "ft" | "feet" => Ok(NumericSuffix::Ft),
+            "yd" | "yards" => Ok(NumericSuffix::Yd),
+            "deg" | "degrees" => Ok(NumericSuffix::Deg),
+            "rad" | "radians" => Ok(NumericSuffix::Rad),
+            _ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
+        }
+    }
+}
+
+impl fmt::Display for NumericSuffix {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            NumericSuffix::None => Ok(()),
+            NumericSuffix::Count => write!(f, "_"),
+            NumericSuffix::Mm => write!(f, "mm"),
+            NumericSuffix::Cm => write!(f, "cm"),
+            NumericSuffix::M => write!(f, "m"),
+            NumericSuffix::Inch => write!(f, "in"),
+            NumericSuffix::Ft => write!(f, "ft"),
+            NumericSuffix::Yd => write!(f, "yd"),
+            NumericSuffix::Deg => write!(f, "deg"),
+            NumericSuffix::Rad => write!(f, "rad"),
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub(crate) struct TokenStream {
+    tokens: Vec<Token>,
+}
+
+impl TokenStream {
+    fn new(tokens: Vec<Token>) -> Self {
+        Self { tokens }
+    }
+
+    pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
+        let tokens = std::mem::take(&mut self.tokens);
+        let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
+            .into_iter()
+            .partition(|token| token.token_type != TokenType::Unknown);
+        self.tokens = tokens;
+        unknown_tokens
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &Token> {
+        self.tokens.iter()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.tokens.is_empty()
+    }
+
+    pub fn as_slice(&self) -> TokenSlice {
+        TokenSlice::from(self)
+    }
+}
+
+impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
+    fn from(stream: &'a TokenStream) -> Self {
+        TokenSlice {
+            start: 0,
+            end: stream.tokens.len(),
+            stream,
+        }
+    }
+}
+
+impl IntoIterator for TokenStream {
+    type Item = Token;
+
+    type IntoIter = std::vec::IntoIter<Token>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.tokens.into_iter()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub(crate) struct TokenSlice<'a> {
+    stream: &'a TokenStream,
+    start: usize,
+    end: usize,
+}
+
+impl<'a> std::ops::Deref for TokenSlice<'a> {
+    type Target = [Token];
+
+    fn deref(&self) -> &Self::Target {
+        &self.stream.tokens[self.start..self.end]
+    }
+}
+
+impl<'a> TokenSlice<'a> {
+    pub fn token(&self, i: usize) -> &Token {
+        &self.stream.tokens[i + self.start]
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &Token> {
+        (**self).iter()
+    }
+
+    pub fn without_ends(&self) -> Self {
+        Self {
+            start: self.start + 1,
+            end: self.end - 1,
+            stream: self.stream,
+        }
+    }
+}
+
+impl<'a> IntoIterator for TokenSlice<'a> {
+    type Item = &'a Token;
+
+    type IntoIter = std::slice::Iter<'a, Token>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.stream.tokens[self.start..self.end].iter()
+    }
+}
+
+impl<'a> Stream for TokenSlice<'a> {
+    type Token = Token;
+    type Slice = Self;
+    type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
+    type Checkpoint = Checkpoint;
+
+    fn iter_offsets(&self) -> Self::IterOffsets {
+        #[allow(clippy::unnecessary_to_owned)]
+        self.to_vec().into_iter().enumerate()
+    }
+
+    fn eof_offset(&self) -> usize {
+        self.len()
+    }
+
+    fn next_token(&mut self) -> Option<Self::Token> {
+        let token = self.first()?.clone();
+        self.start += 1;
+        Some(token)
+    }
+
+    fn offset_for<P>(&self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Token) -> bool,
+    {
+        self.iter().position(|b| predicate(b.clone()))
+    }
+
+    fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
+        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
+            Err(winnow::error::Needed::Size(needed))
+        } else {
+            Ok(tokens)
+        }
+    }
+
+    fn next_slice(&mut self, offset: usize) -> Self::Slice {
+        assert!(self.start + offset <= self.end);
+
+        let next = TokenSlice {
+            stream: self.stream,
+            start: self.start,
+            end: self.start + offset,
+        };
+        self.start += offset;
+        next
+    }
+
+    fn checkpoint(&self) -> Self::Checkpoint {
+        Checkpoint(self.start, self.end)
+    }
+
+    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
+        self.start = checkpoint.0;
+        self.end = checkpoint.1;
+    }
+
+    fn raw(&self) -> &dyn fmt::Debug {
+        self
+    }
+}
+
+impl<'a> winnow::stream::Offset for TokenSlice<'a> {
+    fn offset_from(&self, start: &Self) -> usize {
+        self.start - start.start
+    }
+}
+
+impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
+    fn offset_from(&self, start: &Checkpoint) -> usize {
+        self.start - start.0
+    }
+}
+
+impl winnow::stream::Offset for Checkpoint {
+    fn offset_from(&self, start: &Self) -> usize {
+        self.0 - start.0
+    }
+}
+
+impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
+    type PartialState = ();
+
+    fn complete(&mut self) -> Self::PartialState {}
+
+    fn restore_partial(&mut self, _: Self::PartialState) {}
+
+    fn is_partial_supported() -> bool {
+        false
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Checkpoint(usize, usize);
+
+/// The types of tokens.
+#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
+#[display(style = "camelCase")]
+pub enum TokenType {
+    /// A number.
+    Number,
+    /// A word.
+    Word,
+    /// An operator.
+    Operator,
+    /// A string.
+    String,
+    /// A keyword.
+    Keyword,
+    /// A type.
+    Type,
+    /// A brace.
+    Brace,
+    /// A hash.
+    Hash,
+    /// A bang.
+    Bang,
+    /// A dollar sign.
+    Dollar,
+    /// Whitespace.
+    Whitespace,
+    /// A comma.
+    Comma,
+    /// A colon.
+    Colon,
+    /// A period.
+    Period,
+    /// A double period: `..`.
+    DoublePeriod,
+    /// A line comment.
+    LineComment,
+    /// A block comment.
+    BlockComment,
+    /// A function name.
+    Function,
+    /// Unknown lexemes.
+    Unknown,
+    /// The ? symbol, used for optional values.
+    QuestionMark,
+    /// The @ symbol.
+    At,
+}
+
+/// Most KCL tokens correspond to LSP semantic tokens (but not all).
+impl TryFrom<TokenType> for SemanticTokenType {
+    type Error = anyhow::Error;
+    fn try_from(token_type: TokenType) -> Result<Self> {
+        // If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
+        // in the LSP implementation.
+        Ok(match token_type {
+            TokenType::Number => Self::NUMBER,
+            TokenType::Word => Self::VARIABLE,
+            TokenType::Keyword => Self::KEYWORD,
+            TokenType::Type => Self::TYPE,
+            TokenType::Operator => Self::OPERATOR,
+            TokenType::QuestionMark => Self::OPERATOR,
+            TokenType::String => Self::STRING,
+            TokenType::Bang => Self::OPERATOR,
+            TokenType::LineComment => Self::COMMENT,
+            TokenType::BlockComment => Self::COMMENT,
+            TokenType::Function => Self::FUNCTION,
+            TokenType::Whitespace
+            | TokenType::Brace
+            | TokenType::Comma
+            | TokenType::Colon
+            | TokenType::Period
+            | TokenType::DoublePeriod
+            | TokenType::Hash
+            | TokenType::Dollar
+            | TokenType::At
+            | TokenType::Unknown => {
+                anyhow::bail!("unsupported token type: {:?}", token_type)
+            }
+        })
+    }
+}
+
+impl TokenType {
+    pub fn is_whitespace(&self) -> bool {
+        matches!(self, Self::Whitespace)
+    }
+
+    pub fn is_comment(&self) -> bool {
+        matches!(self, Self::LineComment | Self::BlockComment)
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Token {
+    pub token_type: TokenType,
+    /// Offset in the source code where this token begins.
+    pub start: usize,
+    /// Offset in the source code where this token ends.
+    pub end: usize,
+    pub(super) module_id: ModuleId,
+    pub(super) value: String,
+}
+
+impl ContainsToken<Token> for (TokenType, &str) {
+    fn contains_token(&self, token: Token) -> bool {
+        self.0 == token.token_type && self.1 == token.value
+    }
+}
+
+impl ContainsToken<Token> for TokenType {
+    fn contains_token(&self, token: Token) -> bool {
+        *self == token.token_type
+    }
+}
+
+impl Token {
+    pub fn from_range(
+        range: std::ops::Range<usize>,
+        module_id: ModuleId,
+        token_type: TokenType,
+        value: String,
+    ) -> Self {
+        Self {
+            start: range.start,
+            end: range.end,
+            module_id,
+            value,
+            token_type,
+        }
+    }
+    pub fn is_code_token(&self) -> bool {
+        !matches!(
+            self.token_type,
+            TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
+        )
+    }
+
+    pub fn as_source_range(&self) -> SourceRange {
+        SourceRange::new(self.start, self.end, self.module_id)
+    }
+
+    pub fn as_source_ranges(&self) -> Vec<SourceRange> {
+        vec![self.as_source_range()]
+    }
+
+    pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
+        if !matches!(self.token_type, TokenType::Keyword) {
+            return None;
+        }
+        match self.value.as_str() {
+            "export" => Some(ItemVisibility::Export),
+            _ => None,
+        }
+    }
+
+    pub fn numeric_value(&self) -> Option<f64> {
+        if self.token_type != TokenType::Number {
+            return None;
+        }
+        let value = &self.value;
+        let value = value
+            .split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
+            .map(|(s, _)| s)
+            .unwrap_or(value);
+        value.parse().ok()
+    }
+
+    pub fn numeric_suffix(&self) -> NumericSuffix {
+        if self.token_type != TokenType::Number {
+            return NumericSuffix::None;
+        }
+
+        if self.value.ends_with('_') {
+            return NumericSuffix::Count;
+        }
+
+        for suffix in NUM_SUFFIXES {
+            if self.value.ends_with(suffix) {
+                return suffix.parse().unwrap();
+            }
+        }
+
+        NumericSuffix::None
+    }
+
+    /// Is this token the beginning of a variable/function declaration?
+    /// If so, what kind?
+    /// If not, returns None.
+    pub fn declaration_keyword(&self) -> Option<VariableKind> {
+        if !matches!(self.token_type, TokenType::Keyword) {
+            return None;
+        }
+        Some(match self.value.as_str() {
+            "fn" => VariableKind::Fn,
+            "var" | "let" | "const" => VariableKind::Const,
+            _ => return None,
+        })
+    }
+}
+
+impl From<Token> for SourceRange {
+    fn from(token: Token) -> Self {
+        Self::new(token.start, token.end, token.module_id)
+    }
+}
+
+impl From<&Token> for SourceRange {
+    fn from(token: &Token) -> Self {
+        Self::new(token.start, token.end, token.module_id)
+    }
+}
+
+pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
+    tokeniser::lex(s, module_id).map_err(From::from)
+}
+
+impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
+    fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
+        let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
+        let module_id = err.input().state.module_id;
+
+        if offset >= input.len() {
+            // From the winnow docs:
+            //
+            // This is an offset, not an index, and may point to
+            // the end of input (input.len()) on eof errors.
+
+            return KclError::Lexical(crate::errors::KclErrorDetails {
+                source_ranges: vec![SourceRange::new(offset, offset, module_id)],
+                message: "unexpected EOF while parsing".to_string(),
+            });
+        }
+
+        // TODO: Add the Winnow tokenizer context to the error.
+        // See https://github.com/KittyCAD/modeling-app/issues/784
+        let bad_token = &input[offset];
+        // TODO: Add the Winnow parser context to the error.
+        // See https://github.com/KittyCAD/modeling-app/issues/784
+        KclError::Lexical(crate::errors::KclErrorDetails {
+            source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
+            message: format!("found unknown token '{}'", bad_token),
+        })
+    }
+}
--- a/rust/kcl-lib/src/parsing/token/snapshots/kcl_libparsingtokentokenisertests__program2.snap
+++ b/rust/kcl-lib/src/parsing/token/snapshots/kcl_libparsingtokentokenisertests__program2.snap
--- a/rust/kcl-lib/src/parsing/token/tokeniser.rs
+++ b/rust/kcl-lib/src/parsing/token/tokeniser.rs
@ -0,0 +1,780 @@
+use fnv::FnvHashMap;
+use lazy_static::lazy_static;
+use winnow::{
+    ascii::{digit1, multispace1},
+    combinator::{alt, opt, peek, preceded, repeat},
+    error::{ContextError, ParseError},
+    prelude::*,
+    stream::{Location, Stream},
+    token::{any, none_of, one_of, take_till, take_until},
+    LocatingSlice, Stateful,
+};
+
+use super::TokenStream;
+use crate::{
+    parsing::token::{Token, TokenType},
+    ModuleId,
+};
+
+lazy_static! {
+    pub(crate) static ref RESERVED_WORDS: FnvHashMap<&'static str, TokenType> = {
+        let mut set = FnvHashMap::default();
+        set.insert("if", TokenType::Keyword);
+        set.insert("else", TokenType::Keyword);
+        set.insert("for", TokenType::Keyword);
+        set.insert("while", TokenType::Keyword);
+        set.insert("return", TokenType::Keyword);
+        set.insert("break", TokenType::Keyword);
+        set.insert("continue", TokenType::Keyword);
+        set.insert("fn", TokenType::Keyword);
+        set.insert("let", TokenType::Keyword);
+        set.insert("mut", TokenType::Keyword);
+        set.insert("as", TokenType::Keyword);
+        set.insert("loop", TokenType::Keyword);
+        set.insert("true", TokenType::Keyword);
+        set.insert("false", TokenType::Keyword);
+        set.insert("nil", TokenType::Keyword);
+        // This isn't a type because brackets are used for the type.
+        set.insert("array", TokenType::Keyword);
+        set.insert("and", TokenType::Keyword);
+        set.insert("or", TokenType::Keyword);
+        set.insert("not", TokenType::Keyword);
+        set.insert("var", TokenType::Keyword);
+        set.insert("const", TokenType::Keyword);
+        // "import" is special because of import().
+        set.insert("export", TokenType::Keyword);
+        set.insert("type", TokenType::Keyword);
+        set.insert("interface", TokenType::Keyword);
+        set.insert("new", TokenType::Keyword);
+        set.insert("self", TokenType::Keyword);
+        set.insert("record", TokenType::Keyword);
+        set.insert("struct", TokenType::Keyword);
+        set.insert("object", TokenType::Keyword);
+
+        set.insert("string", TokenType::Type);
+        set.insert("number", TokenType::Type);
+        set.insert("bool", TokenType::Type);
+        set.insert("Sketch", TokenType::Type);
+        set.insert("SketchSurface", TokenType::Type);
+        set.insert("Solid", TokenType::Type);
+        set.insert("Plane", TokenType::Type);
+
+        set
+    };
+}
+
+pub(super) fn lex(i: &str, module_id: ModuleId) -> Result<TokenStream, ParseError<Input<'_>, ContextError>> {
+    let state = State::new(module_id);
+    let input = Input {
+        input: LocatingSlice::new(i),
+        state,
+    };
+    Ok(TokenStream::new(repeat(0.., token).parse(input)?))
+}
+
+pub(super) type Input<'a> = Stateful<LocatingSlice<&'a str>, State>;
+
+#[derive(Debug, Clone)]
+pub(super) struct State {
+    pub module_id: ModuleId,
+}
+
+impl State {
+    fn new(module_id: ModuleId) -> Self {
+        Self { module_id }
+    }
+}
+
+pub(super) fn token(i: &mut Input<'_>) -> PResult<Token> {
+    match winnow::combinator::dispatch! {peek(any);
+        '"' | '\'' => string,
+        '/' => alt((line_comment, block_comment, operator)),
+        '{' | '(' | '[' => brace_start,
+        '}' | ')' | ']' => brace_end,
+        ',' => comma,
+        '?' => question_mark,
+        '@' => at,
+        '0'..='9' => number,
+        ':' => colon,
+        '.' => alt((number, double_period, period)),
+        '#' => hash,
+        '$' => dollar,
+        '!' => alt((operator, bang)),
+        ' ' | '\t' | '\n' | '\r' => whitespace,
+        _ => alt((operator, keyword_type_or_word))
+    }
+    .parse_next(i)
+    {
+        Ok(token) => Ok(token),
+        Err(x) => {
+            // TODO: Handle non ascii cases
+            if i.len() == 0 || !i.is_ascii() {
+                return Err(x);
+            }
+
+            Ok(Token::from_range(
+                i.location()..i.location() + 1,
+                i.state.module_id,
+                TokenType::Unknown,
+                i.next_slice(1).to_string(),
+            ))
+        }
+    }
+}
+
+fn block_comment(i: &mut Input<'_>) -> PResult<Token> {
+    let inner = ("/*", take_until(0.., "*/"), "*/").take();
+    let (value, range) = inner.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::BlockComment,
+        value.to_string(),
+    ))
+}
+
+fn line_comment(i: &mut Input<'_>) -> PResult<Token> {
+    let inner = (r#"//"#, take_till(0.., ['\n', '\r'])).take();
+    let (value, range) = inner.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::LineComment,
+        value.to_string(),
+    ))
+}
+
+fn number(i: &mut Input<'_>) -> PResult<Token> {
+    let number_parser = alt((
+        // Digits before the decimal point.
+        (digit1, opt(('.', digit1)), opt('_'), opt(alt(super::NUM_SUFFIXES))).map(|_| ()),
+        // No digits before the decimal point.
+        ('.', digit1, opt('_'), opt(alt(super::NUM_SUFFIXES))).map(|_| ()),
+    ));
+    let (value, range) = number_parser.take().with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Number,
+        value.to_string(),
+    ))
+}
+
+fn whitespace(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = multispace1.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Whitespace,
+        value.to_string(),
+    ))
+}
+
+fn inner_word(i: &mut Input<'_>) -> PResult<()> {
+    one_of(('a'..='z', 'A'..='Z', '_')).parse_next(i)?;
+    repeat::<_, _, (), _, _>(0.., one_of(('a'..='z', 'A'..='Z', '0'..='9', '_'))).parse_next(i)?;
+    Ok(())
+}
+
+fn word(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = inner_word.take().with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Word,
+        value.to_string(),
+    ))
+}
+
+fn operator(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = alt((
+        ">=", "<=", "==", "=>", "!=", "|>", "*", "+", "-", "/", "%", "=", "<", ">", r"\", "^", "|", "&",
+    ))
+    .with_span()
+    .parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Operator,
+        value.to_string(),
+    ))
+}
+
+fn brace_start(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = alt(('{', '(', '[')).with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Brace,
+        value.to_string(),
+    ))
+}
+
+fn brace_end(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = alt(('}', ')', ']')).with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Brace,
+        value.to_string(),
+    ))
+}
+
+fn comma(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = ','.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Comma,
+        value.to_string(),
+    ))
+}
+
+fn hash(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = '#'.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Hash,
+        value.to_string(),
+    ))
+}
+
+fn bang(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = '!'.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Bang,
+        value.to_string(),
+    ))
+}
+
+fn dollar(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = '$'.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Dollar,
+        value.to_string(),
+    ))
+}
+
+fn question_mark(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = '?'.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::QuestionMark,
+        value.to_string(),
+    ))
+}
+
+fn at(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = '@'.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::At,
+        value.to_string(),
+    ))
+}
+
+fn colon(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = ':'.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Colon,
+        value.to_string(),
+    ))
+}
+
+fn period(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = '.'.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::Period,
+        value.to_string(),
+    ))
+}
+
+fn double_period(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = "..".with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::DoublePeriod,
+        value.to_string(),
+    ))
+}
+
+/// Zero or more of either:
+/// 1. Any character except " or \
+/// 2. Any character preceded by \
+fn inner_double_quote(i: &mut Input<'_>) -> PResult<()> {
+    repeat(0.., alt((none_of(('"', '\\')), preceded('\\', winnow::token::any)))).parse_next(i)
+}
+
+/// Zero or more of either:
+/// 1. Any character except ' or \
+/// 2. Any character preceded by \
+fn inner_single_quote(i: &mut Input<'_>) -> PResult<()> {
+    repeat(0.., alt((none_of(('\'', '\\')), preceded('\\', winnow::token::any)))).parse_next(i)
+}
+
+fn string(i: &mut Input<'_>) -> PResult<Token> {
+    let single_quoted_string = ('\'', inner_single_quote.take(), '\'');
+    let double_quoted_string = ('"', inner_double_quote.take(), '"');
+    let either_quoted_string = alt((single_quoted_string.take(), double_quoted_string.take()));
+    let (value, range): (&str, _) = either_quoted_string.with_span().parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        TokenType::String,
+        value.to_string(),
+    ))
+}
+
+fn import_keyword(i: &mut Input<'_>) -> PResult<Token> {
+    let (value, range) = "import".with_span().parse_next(i)?;
+    let token_type = peek(alt((' '.map(|_| TokenType::Keyword), '('.map(|_| TokenType::Word)))).parse_next(i)?;
+    Ok(Token::from_range(
+        range,
+        i.state.module_id,
+        token_type,
+        value.to_owned(),
+    ))
+}
+
+fn unambiguous_keyword_type_or_word(i: &mut Input<'_>) -> PResult<Token> {
+    let mut w = word.parse_next(i)?;
+    if let Some(token_type) = RESERVED_WORDS.get(w.value.as_str()) {
+        w.token_type = *token_type;
+    }
+    Ok(w)
+}
+
+fn keyword_type_or_word(i: &mut Input<'_>) -> PResult<Token> {
+    alt((import_keyword, unambiguous_keyword_type_or_word)).parse_next(i)
+}
+
+#[cfg(test)]
+mod tests {
+    use winnow::LocatingSlice;
+
+    use super::*;
+    use crate::parsing::token::TokenSlice;
+
+    fn assert_parse_err<'i, P, O, E>(mut p: P, s: &'i str)
+    where
+        O: std::fmt::Debug,
+        P: Parser<Input<'i>, O, E>,
+    {
+        let state = State::new(ModuleId::default());
+        let mut input = Input {
+            input: LocatingSlice::new(s),
+            state,
+        };
+        assert!(p.parse_next(&mut input).is_err(), "parsed {s} but should have failed");
+    }
+
+    // Returns the token and whether any more input is remaining to tokenize.
+    fn assert_parse_ok<'i, P, O, E>(mut p: P, s: &'i str) -> (O, bool)
+    where
+        E: std::fmt::Debug,
+        O: std::fmt::Debug,
+        P: Parser<Input<'i>, O, E>,
+    {
+        let state = State::new(ModuleId::default());
+        let mut input = Input {
+            input: LocatingSlice::new(s),
+            state,
+        };
+        let res = p.parse_next(&mut input);
+        assert!(res.is_ok(), "failed to parse {s}, got {}", res.unwrap_err());
+        (res.unwrap(), !input.is_empty())
+    }
+
+    #[test]
+    fn test_number() {
+        for (valid, expected) in [
+            ("1", false),
+            ("1 abc", true),
+            ("1.1", false),
+            ("1.1 abv", true),
+            ("1.1 abv", true),
+            ("1", false),
+            (".1", false),
+            ("5?", true),
+            ("5 + 6", true),
+            ("5 + a", true),
+            ("5.5", false),
+            ("1abc", true),
+        ] {
+            let (_, remaining) = assert_parse_ok(number, valid);
+            assert_eq!(expected, remaining, "`{valid}` expected another token to be {expected}");
+        }
+
+        for invalid in ["a", "?", "?5"] {
+            assert_parse_err(number, invalid);
+        }
+
+        let module_id = ModuleId::from_usize(1);
+        let input = Input {
+            input: LocatingSlice::new("0.0000000000"),
+            state: State::new(module_id),
+        };
+
+        assert_eq!(number.parse(input).unwrap().value, "0.0000000000");
+    }
+
+    #[test]
+    fn test_number_suffix() {
+        for (valid, expected_val, expected_next) in [
+            ("1_", 1.0, false),
+            ("1_mm", 1.0, false),
+            ("1_yd", 1.0, false),
+            ("1m", 1.0, false),
+            ("1inch", 1.0, false),
+            ("1toot", 1.0, true),
+            ("1.4inch t", 1.4, true),
+        ] {
+            let (t, remaining) = assert_parse_ok(number, valid);
+            assert_eq!(expected_next, remaining);
+            assert_eq!(
+                Some(expected_val),
+                t.numeric_value(),
+                "{valid} has incorrect numeric value, expected {expected_val} {t:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_word() {
+        for valid in ["a", "a ", "a5", "a5a"] {
+            assert_parse_ok(word, valid);
+        }
+
+        for invalid in ["5", "5a", "5a5"] {
+            assert_parse_err(word, invalid);
+        }
+    }
+
+    #[test]
+    fn test_operator() {
+        for valid in [
+            "+", "+ ", "-", "<=", "<= ", ">=", ">= ", "> ", "< ", "|> ", "^ ", "% ", "+* ", "| ", "& ",
+        ] {
+            assert_parse_ok(operator, valid);
+        }
+
+        for invalid in ["5 + 5", "a", "a+", "a+5", "5a+5", ", newVar", ","] {
+            assert_parse_err(operator, invalid);
+        }
+    }
+
+    #[test]
+    fn test_string() {
+        for valid in [
+            "\"\"",
+            "\"a\"",
+            "\"a\" ",
+            "\"a\"5",
+            "'a'5",
+            "\"with escaped \\\" backslash\"",
+            "\'with escaped \\\' backslash\'",
+            "'c'",
+        ] {
+            assert_parse_ok(string, valid);
+        }
+
+        for invalid in ["\"", "\"a", "a\"", " \"a\"", "5\"a\"", "a + 'str'"] {
+            assert_parse_err(string, invalid);
+        }
+    }
+
+    #[track_caller]
+    fn assert_tokens(expected: &[(TokenType, usize, usize)], actual: TokenSlice) {
+        let mut e = 0;
+        let mut issues = vec![];
+        for a in actual {
+            if expected[e].0 != a.token_type {
+                if a.token_type == TokenType::Whitespace {
+                    continue;
+                }
+                issues.push(format!(
+                    "Type mismatch: expected `{}`, found `{}` (`{a:?}`), at index {e}",
+                    expected[e].0, a.token_type
+                ));
+            }
+
+            if expected[e].1 != a.start || expected[e].2 != a.end {
+                issues.push(format!(
+                    "Source range mismatch: expected {}-{}, found {}-{} (`{a:?}`), at index {e}",
+                    expected[e].1, expected[e].2, a.start, a.end
+                ));
+            }
+
+            e += 1;
+        }
+        if e < expected.len() {
+            issues.push(format!("Expected `{}` tokens, found `{e}`", expected.len()));
+        }
+        assert!(issues.is_empty(), "{}", issues.join("\n"));
+    }
+
+    #[test]
+    fn test_program0() {
+        let program = "const a=5";
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex(program, module_id).unwrap();
+
+        use TokenType::*;
+        assert_tokens(
+            &[(Keyword, 0, 5), (Word, 6, 7), (Operator, 7, 8), (Number, 8, 9)],
+            actual.as_slice(),
+        );
+    }
+
+    #[test]
+    fn test_program1() {
+        let program = "54 + 22500 + 6";
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex(program, module_id).unwrap();
+
+        use TokenType::*;
+        assert_tokens(
+            &[
+                (Number, 0, 2),
+                (Operator, 3, 4),
+                (Number, 5, 10),
+                (Operator, 11, 12),
+                (Number, 13, 14),
+            ],
+            actual.as_slice(),
+        );
+    }
+
+    #[test]
+    fn test_program2() {
+        let program = r#"const part001 = startSketchAt([0.0000000000, 5.0000000000])
+    |> line([0.4900857016, -0.0240763666], %)
+
+const part002 = "part002"
+const things = [part001, 0.0]
+let blah = 1
+const foo = false
+let baz = {a: 1, part001: "thing"}
+
+fn ghi = (part001) => {
+  return part001
+}
+
+show(part001)"#;
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex(program, module_id).unwrap();
+        insta::assert_debug_snapshot!(actual.tokens);
+    }
+
+    #[test]
+    fn test_program3() {
+        let program = r#"
+// this is a comment
+const yo = { a: { b: { c: '123' } } }
+
+const key = 'c'
+const things = "things"
+
+// this is also a comment"#;
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex(program, module_id).unwrap();
+
+        use TokenType::*;
+        assert_tokens(
+            &[
+                (Whitespace, 0, 1),
+                (LineComment, 1, 21),
+                (Whitespace, 21, 22),
+                (Keyword, 22, 27),
+                (Whitespace, 27, 28),
+                (Word, 28, 30),
+                (Whitespace, 30, 31),
+                (Operator, 31, 32),
+                (Whitespace, 32, 33),
+                (Brace, 33, 34),
+                (Whitespace, 34, 35),
+                (Word, 35, 36),
+                (Colon, 36, 37),
+                (Whitespace, 37, 38),
+                (Brace, 38, 39),
+                (Whitespace, 39, 40),
+                (Word, 40, 41),
+                (Colon, 41, 42),
+                (Whitespace, 42, 43),
+                (Brace, 43, 44),
+                (Whitespace, 44, 45),
+                (Word, 45, 46),
+                (Colon, 46, 47),
+                (Whitespace, 47, 48),
+                (String, 48, 53),
+                (Whitespace, 53, 54),
+                (Brace, 54, 55),
+                (Whitespace, 55, 56),
+                (Brace, 56, 57),
+                (Whitespace, 57, 58),
+                (Brace, 58, 59),
+                (Whitespace, 59, 61),
+                (Keyword, 61, 66),
+                (Whitespace, 66, 67),
+                (Word, 67, 70),
+                (Whitespace, 70, 71),
+                (Operator, 71, 72),
+                (Whitespace, 72, 73),
+                (String, 73, 76),
+                (Whitespace, 76, 77),
+                (Keyword, 77, 82),
+                (Whitespace, 82, 83),
+                (Word, 83, 89),
+                (Whitespace, 89, 90),
+                (Operator, 90, 91),
+                (Whitespace, 91, 92),
+                (String, 92, 100),
+                (Whitespace, 100, 102),
+                (LineComment, 102, 127),
+            ],
+            actual.as_slice(),
+        );
+    }
+
+    #[test]
+    fn test_program4() {
+        let program = "const myArray = [0..10]";
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex(program, module_id).unwrap();
+
+        use TokenType::*;
+        assert_tokens(
+            &[
+                (Keyword, 0, 5),
+                (Word, 6, 13),
+                (Operator, 14, 15),
+                (Brace, 16, 17),
+                (Number, 17, 18),
+                (DoublePeriod, 18, 20),
+                (Number, 20, 22),
+                (Brace, 22, 23),
+            ],
+            actual.as_slice(),
+        );
+    }
+
+    #[test]
+    fn test_lexer_negative_word() {
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex("-legX", module_id).unwrap();
+
+        use TokenType::*;
+        assert_tokens(&[(Operator, 0, 1), (Word, 1, 5)], actual.as_slice());
+    }
+
+    #[test]
+    fn not_eq() {
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex("!=", module_id).unwrap();
+        let expected = vec![Token {
+            token_type: TokenType::Operator,
+            value: "!=".to_owned(),
+            start: 0,
+            end: 2,
+            module_id,
+        }];
+        assert_eq!(actual.tokens, expected);
+    }
+
+    #[test]
+    fn test_unrecognized_token() {
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex("12 ; 8", module_id).unwrap();
+
+        use TokenType::*;
+        assert_tokens(&[(Number, 0, 2), (Unknown, 3, 4), (Number, 5, 6)], actual.as_slice());
+    }
+
+    #[test]
+    fn import_keyword() {
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex("import foo", module_id).unwrap();
+        let expected = Token {
+            token_type: TokenType::Keyword,
+            value: "import".to_owned(),
+            start: 0,
+            end: 6,
+            module_id,
+        };
+        assert_eq!(actual.tokens[0], expected);
+    }
+
+    #[test]
+    fn import_function() {
+        let module_id = ModuleId::from_usize(1);
+        let actual = lex("import(3)", module_id).unwrap();
+        let expected = Token {
+            token_type: TokenType::Word,
+            value: "import".to_owned(),
+            start: 0,
+            end: 6,
+            module_id,
+        };
+        assert_eq!(actual.tokens[0], expected);
+    }
+
+    #[test]
+    fn test_is_code_token() {
+        let module_id = ModuleId::default();
+        let actual = lex("foo (4/* comment */ +,2,\"sdfsdf\") // comment", module_id).unwrap();
+        let non_code = [1, 4, 5, 12, 13];
+        for i in 0..14 {
+            if non_code.contains(&i) {
+                assert!(
+                    !actual.tokens[i].is_code_token(),
+                    "failed test {i}: {:?}",
+                    &actual.tokens[i],
+                );
+            } else {
+                assert!(
+                    actual.tokens[i].is_code_token(),
+                    "failed test {i}: {:?}",
+                    &actual.tokens[i],
+                );
+            }
+        }
+    }
+    #[test]
+    fn test_boolean_literal() {
+        let module_id = ModuleId::default();
+        let actual = lex("true", module_id).unwrap();
+        let expected = Token {
+            token_type: TokenType::Keyword,
+            value: "true".to_owned(),
+            start: 0,
+            end: 4,
+            module_id,
+        };
+        assert_eq!(actual.tokens[0], expected);
+    }
+    #[test]
+    fn test_word_starting_with_keyword() {
+        let module_id = ModuleId::default();
+        let actual = lex("truee", module_id).unwrap();
+        let expected = Token {
+            token_type: TokenType::Word,
+            value: "truee".to_owned(),
+            start: 0,
+            end: 5,
+            module_id,
+        };
+        assert_eq!(actual.tokens[0], expected);
+    }
+}