2024-12-10 14:26:53 +13:00
|
|
|
// Clippy does not agree with rustc here for some reason.
|
|
|
|
#![allow(clippy::needless_lifetimes)]
|
|
|
|
|
2024-12-17 09:01:51 +13:00
|
|
|
use std::{fmt, iter::Enumerate, num::NonZeroUsize, str::FromStr};
|
2023-09-05 16:02:27 -07:00
|
|
|
|
|
|
|
use anyhow::Result;
|
2024-12-10 14:26:53 +13:00
|
|
|
use parse_display::Display;
|
2025-01-22 08:29:30 +13:00
|
|
|
use schemars::JsonSchema;
|
|
|
|
use serde::{Deserialize, Serialize};
|
2024-12-10 17:51:51 -08:00
|
|
|
use tokeniser::Input;
|
2023-09-05 16:02:27 -07:00
|
|
|
use tower_lsp::lsp_types::SemanticTokenType;
|
2024-12-10 14:26:53 +13:00
|
|
|
use winnow::{
|
|
|
|
self,
|
|
|
|
error::ParseError,
|
|
|
|
stream::{ContainsToken, Stream},
|
|
|
|
};
|
2023-02-21 09:42:41 +11:00
|
|
|
|
2024-10-17 00:48:33 -04:00
|
|
|
use crate::{
|
|
|
|
errors::KclError,
|
2024-12-05 17:56:49 +13:00
|
|
|
parsing::ast::types::{ItemVisibility, VariableKind},
|
2025-02-11 13:52:46 +13:00
|
|
|
source_range::SourceRange,
|
|
|
|
CompilationError, ModuleId,
|
2024-10-17 00:48:33 -04:00
|
|
|
};
|
New parser built in Winnow (#731)
* New parser built with Winnow
This new parser uses [winnow](docs.rs/winnow) to replace the handwritten recursive parser.
## Differences
I think the Winnow parser is more readable than handwritten one, due to reusing standard combinators. If you have a parsre like `p` or `q` you can combine them with standard functions like `repeat(0..4, p)`, `opt(p)`, `alt((p, q))` and `separated1(p, ", ")`. This IMO makes it more readable once you know what those standard functions do.
It's also more accurate now -- e.g. the parser no longer swallows whitespace between comments, or inserts it where there was none before. It no longer changes // comments to /* comments depending on the surrounding whitespace.
Primary form of testing was running the same KCL program through both the old and new parsers and asserting that both parsers produce the same AST. See the test `parser::parser_impl::tests::check_parsers_work_the_same`. But occasionally the new and old parsers disagree. This is either:
- Innocuous (e.g. disagreeing on whether a comment starts at the preceding whitespace or at the //)
- Helpful (e.g. new parser recognizes comments more accurately, preserving the difference between // and /* comments)
- Acceptably bad (e.g. new parser sometimes outputs worse error messages, TODO in #784)
so those KCL programs have their own unit tests in `parser_impl.rs` demonstrating the behaviour.
If you'd like to review this PR, it's arguably more important to review changes to the existing unit tests rather than the new parser itself. Because changes to the unit tests show where my parser changes behaviour -- usually for the better, occasionally for the worse (e.g. a worse error message than before). I think overall the improvements are worth it that I'd like to merge it without spending another week fixing it up -- we can fix the error messages in a follow-up PR.
## Performance
| Benchmark | Old parser (this branch) | New parser (this branch) | Speedup |
| ------------- | ------------- | ------------- | ------------- |
| Pipes on pipes | 922 ms | 42 ms | 21x |
| Kitt SVG | 148 ms | 7 ms | 21x |
There's definitely still room to improve performance:
- https://github.com/KittyCAD/modeling-app/issues/839
- https://github.com/KittyCAD/modeling-app/issues/840
## Winnow
Y'all know I love [Nom](docs.rs/nom) and I've blogged about it a lot. But I'm very happy using Winnow, a fork. It's got some really nice usability improvements. While writing this PR I found some bugs or unclear docs in Winnow:
- https://github.com/winnow-rs/winnow/issues/339
- https://github.com/winnow-rs/winnow/issues/341
- https://github.com/winnow-rs/winnow/issues/342
- https://github.com/winnow-rs/winnow/issues/344
The maintainer was quick to close them and release new versions within a few hours, so I feel very confident building the parser on this library. It's a clear improvement over Nom and it's used in toml-edit (and therefore within Cargo) and Gitoxide, so it's becoming a staple of the Rust ecosystem, which adds confidence.
Closes #716
Closes #815
Closes #599
2023-10-12 09:42:37 -05:00
|
|
|
|
2023-09-24 20:01:17 -05:00
|
|
|
mod tokeniser;
|
|
|
|
|
2024-11-18 19:54:25 -05:00
|
|
|
pub(crate) use tokeniser::RESERVED_WORDS;
|
2024-11-07 11:23:41 -05:00
|
|
|
|
2024-12-17 09:01:51 +13:00
|
|
|
// Note the ordering, it's important that `m` comes after `mm` and `cm`.
|
|
|
|
pub const NUM_SUFFIXES: [&str; 9] = ["mm", "cm", "m", "inch", "in", "ft", "yd", "deg", "rad"];
|
|
|
|
|
2025-01-22 08:29:30 +13:00
|
|
|
#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize, ts_rs::TS, JsonSchema)]
|
|
|
|
#[repr(u32)]
|
2024-12-17 09:01:51 +13:00
|
|
|
pub enum NumericSuffix {
|
|
|
|
None,
|
|
|
|
Count,
|
2025-04-14 05:58:19 -04:00
|
|
|
Length,
|
|
|
|
Angle,
|
2024-12-17 09:01:51 +13:00
|
|
|
Mm,
|
|
|
|
Cm,
|
|
|
|
M,
|
|
|
|
Inch,
|
|
|
|
Ft,
|
|
|
|
Yd,
|
|
|
|
Deg,
|
|
|
|
Rad,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl NumericSuffix {
|
|
|
|
#[allow(dead_code)]
|
|
|
|
pub fn is_none(self) -> bool {
|
|
|
|
self == Self::None
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn is_some(self) -> bool {
|
|
|
|
self != Self::None
|
|
|
|
}
|
2025-02-11 11:26:14 +13:00
|
|
|
|
|
|
|
pub fn digestable_id(&self) -> &[u8] {
|
|
|
|
match self {
|
|
|
|
NumericSuffix::None => &[],
|
|
|
|
NumericSuffix::Count => b"_",
|
2025-04-14 05:58:19 -04:00
|
|
|
NumericSuffix::Length => b"Length",
|
|
|
|
NumericSuffix::Angle => b"Angle",
|
2025-02-11 11:26:14 +13:00
|
|
|
NumericSuffix::Mm => b"mm",
|
|
|
|
NumericSuffix::Cm => b"cm",
|
|
|
|
NumericSuffix::M => b"m",
|
|
|
|
NumericSuffix::Inch => b"in",
|
|
|
|
NumericSuffix::Ft => b"ft",
|
|
|
|
NumericSuffix::Yd => b"yd",
|
|
|
|
NumericSuffix::Deg => b"deg",
|
|
|
|
NumericSuffix::Rad => b"rad",
|
|
|
|
}
|
|
|
|
}
|
2024-12-17 09:01:51 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
impl FromStr for NumericSuffix {
|
|
|
|
type Err = CompilationError;
|
|
|
|
|
2024-12-17 15:23:00 +13:00
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
2024-12-17 09:01:51 +13:00
|
|
|
match s {
|
2025-04-14 05:58:19 -04:00
|
|
|
"_" | "Count" => Ok(NumericSuffix::Count),
|
|
|
|
"Length" => Ok(NumericSuffix::Length),
|
|
|
|
"Angle" => Ok(NumericSuffix::Angle),
|
2025-01-28 17:09:27 -08:00
|
|
|
"mm" | "millimeters" => Ok(NumericSuffix::Mm),
|
|
|
|
"cm" | "centimeters" => Ok(NumericSuffix::Cm),
|
|
|
|
"m" | "meters" => Ok(NumericSuffix::M),
|
2024-12-17 15:23:00 +13:00
|
|
|
"inch" | "in" => Ok(NumericSuffix::Inch),
|
2025-01-28 17:09:27 -08:00
|
|
|
"ft" | "feet" => Ok(NumericSuffix::Ft),
|
|
|
|
"yd" | "yards" => Ok(NumericSuffix::Yd),
|
|
|
|
"deg" | "degrees" => Ok(NumericSuffix::Deg),
|
|
|
|
"rad" | "radians" => Ok(NumericSuffix::Rad),
|
2024-12-17 09:01:51 +13:00
|
|
|
_ => Err(CompilationError::err(SourceRange::default(), "invalid unit of measure")),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-01-22 08:29:30 +13:00
|
|
|
impl fmt::Display for NumericSuffix {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
|
match self {
|
|
|
|
NumericSuffix::None => Ok(()),
|
|
|
|
NumericSuffix::Count => write!(f, "_"),
|
2025-04-14 05:58:19 -04:00
|
|
|
NumericSuffix::Length => write!(f, "Length"),
|
|
|
|
NumericSuffix::Angle => write!(f, "Angle"),
|
2025-01-22 08:29:30 +13:00
|
|
|
NumericSuffix::Mm => write!(f, "mm"),
|
|
|
|
NumericSuffix::Cm => write!(f, "cm"),
|
|
|
|
NumericSuffix::M => write!(f, "m"),
|
|
|
|
NumericSuffix::Inch => write!(f, "in"),
|
|
|
|
NumericSuffix::Ft => write!(f, "ft"),
|
|
|
|
NumericSuffix::Yd => write!(f, "yd"),
|
|
|
|
NumericSuffix::Deg => write!(f, "deg"),
|
|
|
|
NumericSuffix::Rad => write!(f, "rad"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-12-10 14:26:53 +13:00
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
|
|
pub(crate) struct TokenStream {
|
|
|
|
tokens: Vec<Token>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TokenStream {
|
2024-12-12 15:37:37 -06:00
|
|
|
fn new(tokens: Vec<Token>) -> Self {
|
|
|
|
Self { tokens }
|
2024-12-10 14:26:53 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn remove_unknown(&mut self) -> Vec<Token> {
|
|
|
|
let tokens = std::mem::take(&mut self.tokens);
|
|
|
|
let (tokens, unknown_tokens): (Vec<Token>, Vec<Token>) = tokens
|
|
|
|
.into_iter()
|
|
|
|
.partition(|token| token.token_type != TokenType::Unknown);
|
|
|
|
self.tokens = tokens;
|
|
|
|
unknown_tokens
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn iter(&self) -> impl Iterator<Item = &Token> {
|
|
|
|
self.tokens.iter()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
|
|
self.tokens.is_empty()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn as_slice(&self) -> TokenSlice {
|
|
|
|
TokenSlice::from(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> From<&'a TokenStream> for TokenSlice<'a> {
|
|
|
|
fn from(stream: &'a TokenStream) -> Self {
|
|
|
|
TokenSlice {
|
|
|
|
start: 0,
|
|
|
|
end: stream.tokens.len(),
|
|
|
|
stream,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl IntoIterator for TokenStream {
|
|
|
|
type Item = Token;
|
|
|
|
|
|
|
|
type IntoIter = std::vec::IntoIter<Token>;
|
|
|
|
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
|
|
self.tokens.into_iter()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub(crate) struct TokenSlice<'a> {
|
|
|
|
stream: &'a TokenStream,
|
2025-03-14 12:52:10 -04:00
|
|
|
/// Current position of the leading Token in the stream
|
2024-12-10 14:26:53 +13:00
|
|
|
start: usize,
|
2025-03-14 12:52:10 -04:00
|
|
|
/// The number of total Tokens in the stream
|
2024-12-10 14:26:53 +13:00
|
|
|
end: usize,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> std::ops::Deref for TokenSlice<'a> {
|
|
|
|
type Target = [Token];
|
|
|
|
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
&self.stream.tokens[self.start..self.end]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> TokenSlice<'a> {
|
|
|
|
pub fn token(&self, i: usize) -> &Token {
|
|
|
|
&self.stream.tokens[i + self.start]
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn iter(&self) -> impl Iterator<Item = &Token> {
|
|
|
|
(**self).iter()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn without_ends(&self) -> Self {
|
|
|
|
Self {
|
|
|
|
start: self.start + 1,
|
|
|
|
end: self.end - 1,
|
|
|
|
stream: self.stream,
|
|
|
|
}
|
|
|
|
}
|
2025-03-14 12:52:10 -04:00
|
|
|
|
|
|
|
pub fn as_source_range(&self) -> SourceRange {
|
|
|
|
let stream_len = self.stream.tokens.len();
|
|
|
|
let first_token = if stream_len == self.start {
|
|
|
|
&self.stream.tokens[self.start - 1]
|
|
|
|
} else {
|
|
|
|
self.token(0)
|
|
|
|
};
|
|
|
|
let last_token = if stream_len == self.end {
|
|
|
|
&self.stream.tokens[stream_len - 1]
|
|
|
|
} else {
|
|
|
|
self.token(self.end - self.start)
|
|
|
|
};
|
|
|
|
SourceRange::new(first_token.start, last_token.end, last_token.module_id)
|
|
|
|
}
|
2024-12-10 14:26:53 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> IntoIterator for TokenSlice<'a> {
|
|
|
|
type Item = &'a Token;
|
|
|
|
|
|
|
|
type IntoIter = std::slice::Iter<'a, Token>;
|
|
|
|
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
|
|
self.stream.tokens[self.start..self.end].iter()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Stream for TokenSlice<'a> {
|
|
|
|
type Token = Token;
|
|
|
|
type Slice = Self;
|
|
|
|
type IterOffsets = Enumerate<std::vec::IntoIter<Token>>;
|
|
|
|
type Checkpoint = Checkpoint;
|
|
|
|
|
|
|
|
fn iter_offsets(&self) -> Self::IterOffsets {
|
|
|
|
#[allow(clippy::unnecessary_to_owned)]
|
|
|
|
self.to_vec().into_iter().enumerate()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn eof_offset(&self) -> usize {
|
|
|
|
self.len()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn next_token(&mut self) -> Option<Self::Token> {
|
|
|
|
let token = self.first()?.clone();
|
|
|
|
self.start += 1;
|
|
|
|
Some(token)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn offset_for<P>(&self, predicate: P) -> Option<usize>
|
|
|
|
where
|
|
|
|
P: Fn(Self::Token) -> bool,
|
|
|
|
{
|
|
|
|
self.iter().position(|b| predicate(b.clone()))
|
|
|
|
}
|
|
|
|
|
|
|
|
fn offset_at(&self, tokens: usize) -> Result<usize, winnow::error::Needed> {
|
|
|
|
if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
|
|
|
|
Err(winnow::error::Needed::Size(needed))
|
|
|
|
} else {
|
|
|
|
Ok(tokens)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn next_slice(&mut self, offset: usize) -> Self::Slice {
|
|
|
|
assert!(self.start + offset <= self.end);
|
|
|
|
|
|
|
|
let next = TokenSlice {
|
|
|
|
stream: self.stream,
|
|
|
|
start: self.start,
|
|
|
|
end: self.start + offset,
|
|
|
|
};
|
|
|
|
self.start += offset;
|
|
|
|
next
|
|
|
|
}
|
|
|
|
|
|
|
|
fn checkpoint(&self) -> Self::Checkpoint {
|
|
|
|
Checkpoint(self.start, self.end)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn reset(&mut self, checkpoint: &Self::Checkpoint) {
|
|
|
|
self.start = checkpoint.0;
|
|
|
|
self.end = checkpoint.1;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn raw(&self) -> &dyn fmt::Debug {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> winnow::stream::Offset for TokenSlice<'a> {
|
|
|
|
fn offset_from(&self, start: &Self) -> usize {
|
|
|
|
self.start - start.start
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> winnow::stream::Offset<Checkpoint> for TokenSlice<'a> {
|
|
|
|
fn offset_from(&self, start: &Checkpoint) -> usize {
|
|
|
|
self.start - start.0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl winnow::stream::Offset for Checkpoint {
|
|
|
|
fn offset_from(&self, start: &Self) -> usize {
|
|
|
|
self.0 - start.0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
|
|
|
|
type PartialState = ();
|
|
|
|
|
|
|
|
fn complete(&mut self) -> Self::PartialState {}
|
|
|
|
|
|
|
|
fn restore_partial(&mut self, _: Self::PartialState) {}
|
|
|
|
|
|
|
|
fn is_partial_supported() -> bool {
|
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-03-14 12:52:10 -04:00
|
|
|
impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
|
|
|
|
fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
|
|
|
|
self.iter()
|
|
|
|
.enumerate()
|
|
|
|
.find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-12-10 14:26:53 +13:00
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub struct Checkpoint(usize, usize);
|
|
|
|
|
2023-09-05 16:02:27 -07:00
|
|
|
/// The types of tokens.
|
2024-12-10 14:26:53 +13:00
|
|
|
#[derive(Debug, PartialEq, Eq, Copy, Clone, Display)]
|
2023-09-05 16:02:27 -07:00
|
|
|
#[display(style = "camelCase")]
|
2023-02-21 09:42:41 +11:00
|
|
|
pub enum TokenType {
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A number.
|
2023-02-21 09:42:41 +11:00
|
|
|
Number,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A word.
|
2023-02-21 09:42:41 +11:00
|
|
|
Word,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// An operator.
|
2023-02-21 09:42:41 +11:00
|
|
|
Operator,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A string.
|
2023-02-21 09:42:41 +11:00
|
|
|
String,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A keyword.
|
|
|
|
Keyword,
|
2024-03-21 17:14:30 -07:00
|
|
|
/// A type.
|
|
|
|
Type,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A brace.
|
2023-02-21 09:42:41 +11:00
|
|
|
Brace,
|
2024-05-02 15:13:00 -07:00
|
|
|
/// A hash.
|
|
|
|
Hash,
|
|
|
|
/// A bang.
|
|
|
|
Bang,
|
2024-06-24 14:45:07 -07:00
|
|
|
/// A dollar sign.
|
|
|
|
Dollar,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// Whitespace.
|
2023-02-21 09:42:41 +11:00
|
|
|
Whitespace,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A comma.
|
2023-02-21 09:42:41 +11:00
|
|
|
Comma,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A colon.
|
2023-02-21 09:42:41 +11:00
|
|
|
Colon,
|
2025-03-24 20:58:55 +13:00
|
|
|
/// A double colon: `::`
|
|
|
|
DoubleColon,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A period.
|
2023-02-21 09:42:41 +11:00
|
|
|
Period,
|
2023-09-13 10:03:28 -07:00
|
|
|
/// A double period: `..`.
|
|
|
|
DoublePeriod,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A line comment.
|
2023-02-21 09:42:41 +11:00
|
|
|
LineComment,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A block comment.
|
2023-02-21 09:42:41 +11:00
|
|
|
BlockComment,
|
2023-09-05 16:02:27 -07:00
|
|
|
/// A function name.
|
|
|
|
Function,
|
2023-11-01 17:20:49 -05:00
|
|
|
/// Unknown lexemes.
|
|
|
|
Unknown,
|
2023-11-20 11:19:08 -06:00
|
|
|
/// The ? symbol, used for optional values.
|
|
|
|
QuestionMark,
|
2024-12-02 15:23:18 -06:00
|
|
|
/// The @ symbol.
|
|
|
|
At,
|
2025-03-21 10:56:55 +13:00
|
|
|
/// `;`
|
|
|
|
SemiColon,
|
2023-09-05 16:02:27 -07:00
|
|
|
}
|
|
|
|
|
2023-09-21 14:18:42 -05:00
|
|
|
/// Most KCL tokens correspond to LSP semantic tokens (but not all).
|
2023-09-05 16:02:27 -07:00
|
|
|
impl TryFrom<TokenType> for SemanticTokenType {
|
|
|
|
type Error = anyhow::Error;
|
|
|
|
fn try_from(token_type: TokenType) -> Result<Self> {
|
2024-12-10 14:26:53 +13:00
|
|
|
// If you return a new kind of `SemanticTokenType`, make sure to update `SEMANTIC_TOKEN_TYPES`
|
|
|
|
// in the LSP implementation.
|
2023-09-05 16:02:27 -07:00
|
|
|
Ok(match token_type {
|
|
|
|
TokenType::Number => Self::NUMBER,
|
|
|
|
TokenType::Word => Self::VARIABLE,
|
|
|
|
TokenType::Keyword => Self::KEYWORD,
|
2024-03-21 17:14:30 -07:00
|
|
|
TokenType::Type => Self::TYPE,
|
2023-09-05 16:02:27 -07:00
|
|
|
TokenType::Operator => Self::OPERATOR,
|
2023-11-20 11:19:08 -06:00
|
|
|
TokenType::QuestionMark => Self::OPERATOR,
|
2023-09-05 16:02:27 -07:00
|
|
|
TokenType::String => Self::STRING,
|
2024-08-14 02:38:37 -04:00
|
|
|
TokenType::Bang => Self::OPERATOR,
|
2023-09-05 16:02:27 -07:00
|
|
|
TokenType::LineComment => Self::COMMENT,
|
|
|
|
TokenType::BlockComment => Self::COMMENT,
|
|
|
|
TokenType::Function => Self::FUNCTION,
|
2023-09-13 10:03:28 -07:00
|
|
|
TokenType::Whitespace
|
|
|
|
| TokenType::Brace
|
|
|
|
| TokenType::Comma
|
|
|
|
| TokenType::Colon
|
2025-03-24 20:58:55 +13:00
|
|
|
| TokenType::DoubleColon
|
2023-09-13 10:03:28 -07:00
|
|
|
| TokenType::Period
|
2023-11-01 17:20:49 -05:00
|
|
|
| TokenType::DoublePeriod
|
2024-05-02 15:13:00 -07:00
|
|
|
| TokenType::Hash
|
2024-06-24 14:45:07 -07:00
|
|
|
| TokenType::Dollar
|
2024-12-02 15:23:18 -06:00
|
|
|
| TokenType::At
|
2025-03-21 10:56:55 +13:00
|
|
|
| TokenType::SemiColon
|
2023-11-01 17:20:49 -05:00
|
|
|
| TokenType::Unknown => {
|
2023-09-05 16:02:27 -07:00
|
|
|
anyhow::bail!("unsupported token type: {:?}", token_type)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TokenType {
|
2024-04-15 17:18:32 -07:00
|
|
|
pub fn is_whitespace(&self) -> bool {
|
|
|
|
matches!(self, Self::Whitespace)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn is_comment(&self) -> bool {
|
|
|
|
matches!(self, Self::LineComment | Self::BlockComment)
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
2024-12-10 14:26:53 +13:00
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
2023-02-21 09:42:41 +11:00
|
|
|
pub struct Token {
|
|
|
|
pub token_type: TokenType,
|
2023-09-21 14:18:42 -05:00
|
|
|
/// Offset in the source code where this token begins.
|
2023-02-21 09:42:41 +11:00
|
|
|
pub start: usize,
|
2023-09-21 14:18:42 -05:00
|
|
|
/// Offset in the source code where this token ends.
|
2023-02-21 09:42:41 +11:00
|
|
|
pub end: usize,
|
2024-12-10 14:26:53 +13:00
|
|
|
pub(super) module_id: ModuleId,
|
|
|
|
pub(super) value: String,
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|
|
|
|
|
2023-10-30 20:20:37 -07:00
|
|
|
impl ContainsToken<Token> for (TokenType, &str) {
|
|
|
|
fn contains_token(&self, token: Token) -> bool {
|
|
|
|
self.0 == token.token_type && self.1 == token.value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ContainsToken<Token> for TokenType {
|
|
|
|
fn contains_token(&self, token: Token) -> bool {
|
|
|
|
*self == token.token_type
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-09-22 21:57:39 -05:00
|
|
|
impl Token {
|
2024-11-07 11:23:41 -05:00
|
|
|
pub fn from_range(
|
|
|
|
range: std::ops::Range<usize>,
|
|
|
|
module_id: ModuleId,
|
|
|
|
token_type: TokenType,
|
|
|
|
value: String,
|
|
|
|
) -> Self {
|
2023-09-22 21:57:39 -05:00
|
|
|
Self {
|
|
|
|
start: range.start,
|
|
|
|
end: range.end,
|
2024-11-07 11:23:41 -05:00
|
|
|
module_id,
|
2023-09-22 21:57:39 -05:00
|
|
|
value,
|
|
|
|
token_type,
|
|
|
|
}
|
|
|
|
}
|
2023-09-24 16:11:36 -05:00
|
|
|
pub fn is_code_token(&self) -> bool {
|
|
|
|
!matches!(
|
|
|
|
self.token_type,
|
|
|
|
TokenType::Whitespace | TokenType::LineComment | TokenType::BlockComment
|
|
|
|
)
|
|
|
|
}
|
New parser built in Winnow (#731)
* New parser built with Winnow
This new parser uses [winnow](docs.rs/winnow) to replace the handwritten recursive parser.
## Differences
I think the Winnow parser is more readable than handwritten one, due to reusing standard combinators. If you have a parsre like `p` or `q` you can combine them with standard functions like `repeat(0..4, p)`, `opt(p)`, `alt((p, q))` and `separated1(p, ", ")`. This IMO makes it more readable once you know what those standard functions do.
It's also more accurate now -- e.g. the parser no longer swallows whitespace between comments, or inserts it where there was none before. It no longer changes // comments to /* comments depending on the surrounding whitespace.
Primary form of testing was running the same KCL program through both the old and new parsers and asserting that both parsers produce the same AST. See the test `parser::parser_impl::tests::check_parsers_work_the_same`. But occasionally the new and old parsers disagree. This is either:
- Innocuous (e.g. disagreeing on whether a comment starts at the preceding whitespace or at the //)
- Helpful (e.g. new parser recognizes comments more accurately, preserving the difference between // and /* comments)
- Acceptably bad (e.g. new parser sometimes outputs worse error messages, TODO in #784)
so those KCL programs have their own unit tests in `parser_impl.rs` demonstrating the behaviour.
If you'd like to review this PR, it's arguably more important to review changes to the existing unit tests rather than the new parser itself. Because changes to the unit tests show where my parser changes behaviour -- usually for the better, occasionally for the worse (e.g. a worse error message than before). I think overall the improvements are worth it that I'd like to merge it without spending another week fixing it up -- we can fix the error messages in a follow-up PR.
## Performance
| Benchmark | Old parser (this branch) | New parser (this branch) | Speedup |
| ------------- | ------------- | ------------- | ------------- |
| Pipes on pipes | 922 ms | 42 ms | 21x |
| Kitt SVG | 148 ms | 7 ms | 21x |
There's definitely still room to improve performance:
- https://github.com/KittyCAD/modeling-app/issues/839
- https://github.com/KittyCAD/modeling-app/issues/840
## Winnow
Y'all know I love [Nom](docs.rs/nom) and I've blogged about it a lot. But I'm very happy using Winnow, a fork. It's got some really nice usability improvements. While writing this PR I found some bugs or unclear docs in Winnow:
- https://github.com/winnow-rs/winnow/issues/339
- https://github.com/winnow-rs/winnow/issues/341
- https://github.com/winnow-rs/winnow/issues/342
- https://github.com/winnow-rs/winnow/issues/344
The maintainer was quick to close them and release new versions within a few hours, so I feel very confident building the parser on this library. It's a clear improvement over Nom and it's used in toml-edit (and therefore within Cargo) and Gitoxide, so it's becoming a staple of the Rust ecosystem, which adds confidence.
Closes #716
Closes #815
Closes #599
2023-10-12 09:42:37 -05:00
|
|
|
|
|
|
|
pub fn as_source_range(&self) -> SourceRange {
|
2024-12-03 16:39:51 +13:00
|
|
|
SourceRange::new(self.start, self.end, self.module_id)
|
New parser built in Winnow (#731)
* New parser built with Winnow
This new parser uses [winnow](docs.rs/winnow) to replace the handwritten recursive parser.
## Differences
I think the Winnow parser is more readable than handwritten one, due to reusing standard combinators. If you have a parsre like `p` or `q` you can combine them with standard functions like `repeat(0..4, p)`, `opt(p)`, `alt((p, q))` and `separated1(p, ", ")`. This IMO makes it more readable once you know what those standard functions do.
It's also more accurate now -- e.g. the parser no longer swallows whitespace between comments, or inserts it where there was none before. It no longer changes // comments to /* comments depending on the surrounding whitespace.
Primary form of testing was running the same KCL program through both the old and new parsers and asserting that both parsers produce the same AST. See the test `parser::parser_impl::tests::check_parsers_work_the_same`. But occasionally the new and old parsers disagree. This is either:
- Innocuous (e.g. disagreeing on whether a comment starts at the preceding whitespace or at the //)
- Helpful (e.g. new parser recognizes comments more accurately, preserving the difference between // and /* comments)
- Acceptably bad (e.g. new parser sometimes outputs worse error messages, TODO in #784)
so those KCL programs have their own unit tests in `parser_impl.rs` demonstrating the behaviour.
If you'd like to review this PR, it's arguably more important to review changes to the existing unit tests rather than the new parser itself. Because changes to the unit tests show where my parser changes behaviour -- usually for the better, occasionally for the worse (e.g. a worse error message than before). I think overall the improvements are worth it that I'd like to merge it without spending another week fixing it up -- we can fix the error messages in a follow-up PR.
## Performance
| Benchmark | Old parser (this branch) | New parser (this branch) | Speedup |
| ------------- | ------------- | ------------- | ------------- |
| Pipes on pipes | 922 ms | 42 ms | 21x |
| Kitt SVG | 148 ms | 7 ms | 21x |
There's definitely still room to improve performance:
- https://github.com/KittyCAD/modeling-app/issues/839
- https://github.com/KittyCAD/modeling-app/issues/840
## Winnow
Y'all know I love [Nom](docs.rs/nom) and I've blogged about it a lot. But I'm very happy using Winnow, a fork. It's got some really nice usability improvements. While writing this PR I found some bugs or unclear docs in Winnow:
- https://github.com/winnow-rs/winnow/issues/339
- https://github.com/winnow-rs/winnow/issues/341
- https://github.com/winnow-rs/winnow/issues/342
- https://github.com/winnow-rs/winnow/issues/344
The maintainer was quick to close them and release new versions within a few hours, so I feel very confident building the parser on this library. It's a clear improvement over Nom and it's used in toml-edit (and therefore within Cargo) and Gitoxide, so it's becoming a staple of the Rust ecosystem, which adds confidence.
Closes #716
Closes #815
Closes #599
2023-10-12 09:42:37 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn as_source_ranges(&self) -> Vec<SourceRange> {
|
|
|
|
vec![self.as_source_range()]
|
|
|
|
}
|
|
|
|
|
2024-10-17 00:48:33 -04:00
|
|
|
pub fn visibility_keyword(&self) -> Option<ItemVisibility> {
|
|
|
|
if !matches!(self.token_type, TokenType::Keyword) {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
match self.value.as_str() {
|
|
|
|
"export" => Some(ItemVisibility::Export),
|
|
|
|
_ => None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-12-17 09:01:51 +13:00
|
|
|
pub fn numeric_value(&self) -> Option<f64> {
|
|
|
|
if self.token_type != TokenType::Number {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
let value = &self.value;
|
|
|
|
let value = value
|
|
|
|
.split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
|
|
|
|
.map(|(s, _)| s)
|
|
|
|
.unwrap_or(value);
|
|
|
|
value.parse().ok()
|
|
|
|
}
|
|
|
|
|
2025-03-21 10:56:55 +13:00
|
|
|
pub fn uint_value(&self) -> Option<u32> {
|
|
|
|
if self.token_type != TokenType::Number {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
let value = &self.value;
|
|
|
|
let value = value
|
|
|
|
.split_once(|c: char| c == '_' || c.is_ascii_alphabetic())
|
|
|
|
.map(|(s, _)| s)
|
|
|
|
.unwrap_or(value);
|
|
|
|
value.parse().ok()
|
|
|
|
}
|
|
|
|
|
2024-12-17 09:01:51 +13:00
|
|
|
pub fn numeric_suffix(&self) -> NumericSuffix {
|
|
|
|
if self.token_type != TokenType::Number {
|
|
|
|
return NumericSuffix::None;
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.value.ends_with('_') {
|
|
|
|
return NumericSuffix::Count;
|
|
|
|
}
|
|
|
|
|
|
|
|
for suffix in NUM_SUFFIXES {
|
|
|
|
if self.value.ends_with(suffix) {
|
|
|
|
return suffix.parse().unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
NumericSuffix::None
|
|
|
|
}
|
|
|
|
|
New parser built in Winnow (#731)
* New parser built with Winnow
This new parser uses [winnow](docs.rs/winnow) to replace the handwritten recursive parser.
## Differences
I think the Winnow parser is more readable than handwritten one, due to reusing standard combinators. If you have a parsre like `p` or `q` you can combine them with standard functions like `repeat(0..4, p)`, `opt(p)`, `alt((p, q))` and `separated1(p, ", ")`. This IMO makes it more readable once you know what those standard functions do.
It's also more accurate now -- e.g. the parser no longer swallows whitespace between comments, or inserts it where there was none before. It no longer changes // comments to /* comments depending on the surrounding whitespace.
Primary form of testing was running the same KCL program through both the old and new parsers and asserting that both parsers produce the same AST. See the test `parser::parser_impl::tests::check_parsers_work_the_same`. But occasionally the new and old parsers disagree. This is either:
- Innocuous (e.g. disagreeing on whether a comment starts at the preceding whitespace or at the //)
- Helpful (e.g. new parser recognizes comments more accurately, preserving the difference between // and /* comments)
- Acceptably bad (e.g. new parser sometimes outputs worse error messages, TODO in #784)
so those KCL programs have their own unit tests in `parser_impl.rs` demonstrating the behaviour.
If you'd like to review this PR, it's arguably more important to review changes to the existing unit tests rather than the new parser itself. Because changes to the unit tests show where my parser changes behaviour -- usually for the better, occasionally for the worse (e.g. a worse error message than before). I think overall the improvements are worth it that I'd like to merge it without spending another week fixing it up -- we can fix the error messages in a follow-up PR.
## Performance
| Benchmark | Old parser (this branch) | New parser (this branch) | Speedup |
| ------------- | ------------- | ------------- | ------------- |
| Pipes on pipes | 922 ms | 42 ms | 21x |
| Kitt SVG | 148 ms | 7 ms | 21x |
There's definitely still room to improve performance:
- https://github.com/KittyCAD/modeling-app/issues/839
- https://github.com/KittyCAD/modeling-app/issues/840
## Winnow
Y'all know I love [Nom](docs.rs/nom) and I've blogged about it a lot. But I'm very happy using Winnow, a fork. It's got some really nice usability improvements. While writing this PR I found some bugs or unclear docs in Winnow:
- https://github.com/winnow-rs/winnow/issues/339
- https://github.com/winnow-rs/winnow/issues/341
- https://github.com/winnow-rs/winnow/issues/342
- https://github.com/winnow-rs/winnow/issues/344
The maintainer was quick to close them and release new versions within a few hours, so I feel very confident building the parser on this library. It's a clear improvement over Nom and it's used in toml-edit (and therefore within Cargo) and Gitoxide, so it's becoming a staple of the Rust ecosystem, which adds confidence.
Closes #716
Closes #815
Closes #599
2023-10-12 09:42:37 -05:00
|
|
|
/// Is this token the beginning of a variable/function declaration?
|
|
|
|
/// If so, what kind?
|
|
|
|
/// If not, returns None.
|
|
|
|
pub fn declaration_keyword(&self) -> Option<VariableKind> {
|
|
|
|
if !matches!(self.token_type, TokenType::Keyword) {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
Some(match self.value.as_str() {
|
|
|
|
"fn" => VariableKind::Fn,
|
2024-10-02 14:19:40 -05:00
|
|
|
"var" | "let" | "const" => VariableKind::Const,
|
New parser built in Winnow (#731)
* New parser built with Winnow
This new parser uses [winnow](docs.rs/winnow) to replace the handwritten recursive parser.
## Differences
I think the Winnow parser is more readable than handwritten one, due to reusing standard combinators. If you have a parsre like `p` or `q` you can combine them with standard functions like `repeat(0..4, p)`, `opt(p)`, `alt((p, q))` and `separated1(p, ", ")`. This IMO makes it more readable once you know what those standard functions do.
It's also more accurate now -- e.g. the parser no longer swallows whitespace between comments, or inserts it where there was none before. It no longer changes // comments to /* comments depending on the surrounding whitespace.
Primary form of testing was running the same KCL program through both the old and new parsers and asserting that both parsers produce the same AST. See the test `parser::parser_impl::tests::check_parsers_work_the_same`. But occasionally the new and old parsers disagree. This is either:
- Innocuous (e.g. disagreeing on whether a comment starts at the preceding whitespace or at the //)
- Helpful (e.g. new parser recognizes comments more accurately, preserving the difference between // and /* comments)
- Acceptably bad (e.g. new parser sometimes outputs worse error messages, TODO in #784)
so those KCL programs have their own unit tests in `parser_impl.rs` demonstrating the behaviour.
If you'd like to review this PR, it's arguably more important to review changes to the existing unit tests rather than the new parser itself. Because changes to the unit tests show where my parser changes behaviour -- usually for the better, occasionally for the worse (e.g. a worse error message than before). I think overall the improvements are worth it that I'd like to merge it without spending another week fixing it up -- we can fix the error messages in a follow-up PR.
## Performance
| Benchmark | Old parser (this branch) | New parser (this branch) | Speedup |
| ------------- | ------------- | ------------- | ------------- |
| Pipes on pipes | 922 ms | 42 ms | 21x |
| Kitt SVG | 148 ms | 7 ms | 21x |
There's definitely still room to improve performance:
- https://github.com/KittyCAD/modeling-app/issues/839
- https://github.com/KittyCAD/modeling-app/issues/840
## Winnow
Y'all know I love [Nom](docs.rs/nom) and I've blogged about it a lot. But I'm very happy using Winnow, a fork. It's got some really nice usability improvements. While writing this PR I found some bugs or unclear docs in Winnow:
- https://github.com/winnow-rs/winnow/issues/339
- https://github.com/winnow-rs/winnow/issues/341
- https://github.com/winnow-rs/winnow/issues/342
- https://github.com/winnow-rs/winnow/issues/344
The maintainer was quick to close them and release new versions within a few hours, so I feel very confident building the parser on this library. It's a clear improvement over Nom and it's used in toml-edit (and therefore within Cargo) and Gitoxide, so it's becoming a staple of the Rust ecosystem, which adds confidence.
Closes #716
Closes #815
Closes #599
2023-10-12 09:42:37 -05:00
|
|
|
_ => return None,
|
|
|
|
})
|
|
|
|
}
|
2023-09-22 21:57:39 -05:00
|
|
|
}
|
|
|
|
|
New parser built in Winnow (#731)
* New parser built with Winnow
This new parser uses [winnow](docs.rs/winnow) to replace the handwritten recursive parser.
## Differences
I think the Winnow parser is more readable than handwritten one, due to reusing standard combinators. If you have a parsre like `p` or `q` you can combine them with standard functions like `repeat(0..4, p)`, `opt(p)`, `alt((p, q))` and `separated1(p, ", ")`. This IMO makes it more readable once you know what those standard functions do.
It's also more accurate now -- e.g. the parser no longer swallows whitespace between comments, or inserts it where there was none before. It no longer changes // comments to /* comments depending on the surrounding whitespace.
Primary form of testing was running the same KCL program through both the old and new parsers and asserting that both parsers produce the same AST. See the test `parser::parser_impl::tests::check_parsers_work_the_same`. But occasionally the new and old parsers disagree. This is either:
- Innocuous (e.g. disagreeing on whether a comment starts at the preceding whitespace or at the //)
- Helpful (e.g. new parser recognizes comments more accurately, preserving the difference between // and /* comments)
- Acceptably bad (e.g. new parser sometimes outputs worse error messages, TODO in #784)
so those KCL programs have their own unit tests in `parser_impl.rs` demonstrating the behaviour.
If you'd like to review this PR, it's arguably more important to review changes to the existing unit tests rather than the new parser itself. Because changes to the unit tests show where my parser changes behaviour -- usually for the better, occasionally for the worse (e.g. a worse error message than before). I think overall the improvements are worth it that I'd like to merge it without spending another week fixing it up -- we can fix the error messages in a follow-up PR.
## Performance
| Benchmark | Old parser (this branch) | New parser (this branch) | Speedup |
| ------------- | ------------- | ------------- | ------------- |
| Pipes on pipes | 922 ms | 42 ms | 21x |
| Kitt SVG | 148 ms | 7 ms | 21x |
There's definitely still room to improve performance:
- https://github.com/KittyCAD/modeling-app/issues/839
- https://github.com/KittyCAD/modeling-app/issues/840
## Winnow
Y'all know I love [Nom](docs.rs/nom) and I've blogged about it a lot. But I'm very happy using Winnow, a fork. It's got some really nice usability improvements. While writing this PR I found some bugs or unclear docs in Winnow:
- https://github.com/winnow-rs/winnow/issues/339
- https://github.com/winnow-rs/winnow/issues/341
- https://github.com/winnow-rs/winnow/issues/342
- https://github.com/winnow-rs/winnow/issues/344
The maintainer was quick to close them and release new versions within a few hours, so I feel very confident building the parser on this library. It's a clear improvement over Nom and it's used in toml-edit (and therefore within Cargo) and Gitoxide, so it's becoming a staple of the Rust ecosystem, which adds confidence.
Closes #716
Closes #815
Closes #599
2023-10-12 09:42:37 -05:00
|
|
|
impl From<Token> for SourceRange {
|
2023-08-24 15:34:51 -07:00
|
|
|
fn from(token: Token) -> Self {
|
2024-12-03 16:39:51 +13:00
|
|
|
Self::new(token.start, token.end, token.module_id)
|
2023-08-24 15:34:51 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
New parser built in Winnow (#731)
* New parser built with Winnow
This new parser uses [winnow](docs.rs/winnow) to replace the handwritten recursive parser.
## Differences
I think the Winnow parser is more readable than handwritten one, due to reusing standard combinators. If you have a parsre like `p` or `q` you can combine them with standard functions like `repeat(0..4, p)`, `opt(p)`, `alt((p, q))` and `separated1(p, ", ")`. This IMO makes it more readable once you know what those standard functions do.
It's also more accurate now -- e.g. the parser no longer swallows whitespace between comments, or inserts it where there was none before. It no longer changes // comments to /* comments depending on the surrounding whitespace.
Primary form of testing was running the same KCL program through both the old and new parsers and asserting that both parsers produce the same AST. See the test `parser::parser_impl::tests::check_parsers_work_the_same`. But occasionally the new and old parsers disagree. This is either:
- Innocuous (e.g. disagreeing on whether a comment starts at the preceding whitespace or at the //)
- Helpful (e.g. new parser recognizes comments more accurately, preserving the difference between // and /* comments)
- Acceptably bad (e.g. new parser sometimes outputs worse error messages, TODO in #784)
so those KCL programs have their own unit tests in `parser_impl.rs` demonstrating the behaviour.
If you'd like to review this PR, it's arguably more important to review changes to the existing unit tests rather than the new parser itself. Because changes to the unit tests show where my parser changes behaviour -- usually for the better, occasionally for the worse (e.g. a worse error message than before). I think overall the improvements are worth it that I'd like to merge it without spending another week fixing it up -- we can fix the error messages in a follow-up PR.
## Performance
| Benchmark | Old parser (this branch) | New parser (this branch) | Speedup |
| ------------- | ------------- | ------------- | ------------- |
| Pipes on pipes | 922 ms | 42 ms | 21x |
| Kitt SVG | 148 ms | 7 ms | 21x |
There's definitely still room to improve performance:
- https://github.com/KittyCAD/modeling-app/issues/839
- https://github.com/KittyCAD/modeling-app/issues/840
## Winnow
Y'all know I love [Nom](docs.rs/nom) and I've blogged about it a lot. But I'm very happy using Winnow, a fork. It's got some really nice usability improvements. While writing this PR I found some bugs or unclear docs in Winnow:
- https://github.com/winnow-rs/winnow/issues/339
- https://github.com/winnow-rs/winnow/issues/341
- https://github.com/winnow-rs/winnow/issues/342
- https://github.com/winnow-rs/winnow/issues/344
The maintainer was quick to close them and release new versions within a few hours, so I feel very confident building the parser on this library. It's a clear improvement over Nom and it's used in toml-edit (and therefore within Cargo) and Gitoxide, so it's becoming a staple of the Rust ecosystem, which adds confidence.
Closes #716
Closes #815
Closes #599
2023-10-12 09:42:37 -05:00
|
|
|
impl From<&Token> for SourceRange {
|
2023-08-24 15:34:51 -07:00
|
|
|
fn from(token: &Token) -> Self {
|
2024-12-03 16:39:51 +13:00
|
|
|
Self::new(token.start, token.end, token.module_id)
|
2023-08-24 15:34:51 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-12-10 14:26:53 +13:00
|
|
|
pub fn lex(s: &str, module_id: ModuleId) -> Result<TokenStream, KclError> {
|
2024-11-22 13:25:14 +13:00
|
|
|
tokeniser::lex(s, module_id).map_err(From::from)
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<ParseError<Input<'_>, winnow::error::ContextError>> for KclError {
|
|
|
|
fn from(err: ParseError<Input<'_>, winnow::error::ContextError>) -> Self {
|
|
|
|
let (input, offset): (Vec<char>, usize) = (err.input().chars().collect(), err.offset());
|
|
|
|
let module_id = err.input().state.module_id;
|
|
|
|
|
|
|
|
if offset >= input.len() {
|
|
|
|
// From the winnow docs:
|
|
|
|
//
|
|
|
|
// This is an offset, not an index, and may point to
|
|
|
|
// the end of input (input.len()) on eof errors.
|
|
|
|
|
|
|
|
return KclError::Lexical(crate::errors::KclErrorDetails {
|
2024-12-03 16:39:51 +13:00
|
|
|
source_ranges: vec![SourceRange::new(offset, offset, module_id)],
|
2024-11-22 13:25:14 +13:00
|
|
|
message: "unexpected EOF while parsing".to_string(),
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: Add the Winnow tokenizer context to the error.
|
|
|
|
// See https://github.com/KittyCAD/modeling-app/issues/784
|
|
|
|
let bad_token = &input[offset];
|
|
|
|
// TODO: Add the Winnow parser context to the error.
|
|
|
|
// See https://github.com/KittyCAD/modeling-app/issues/784
|
|
|
|
KclError::Lexical(crate::errors::KclErrorDetails {
|
2024-12-03 16:39:51 +13:00
|
|
|
source_ranges: vec![SourceRange::new(offset, offset + 1, module_id)],
|
2024-11-22 13:25:14 +13:00
|
|
|
message: format!("found unknown token '{}'", bad_token),
|
|
|
|
})
|
|
|
|
}
|
2023-02-21 09:42:41 +11:00
|
|
|
}
|