Files
modeling-app/src/lang/abstractSyntaxTree.ts

1641 lines
45 KiB
TypeScript
Raw Normal View History

import { PathToNode } from './executor'
2022-11-26 08:34:23 +11:00
import { Token } from './tokeniser'
import { parseExpression } from './astMathExpressions'
2022-11-13 11:14:30 +11:00
type syntaxType =
2022-11-26 08:34:23 +11:00
| 'Program'
| 'ExpressionStatement'
| 'BinaryExpression'
| 'CallExpression'
| 'Identifier'
| 'BlockStatement'
| 'ReturnStatement'
| 'VariableDeclaration'
| 'VariableDeclarator'
| 'MemberExpression'
| 'ArrayExpression'
| 'ObjectExpression'
2023-01-01 21:48:30 +11:00
| 'ObjectProperty'
2022-11-26 08:34:23 +11:00
| 'FunctionExpression'
| 'SketchExpression'
| 'PipeExpression'
| 'PipeSubstitution'
2022-11-26 08:34:23 +11:00
| 'Literal'
| 'NoneCodeNode'
// | 'NumberLiteral'
// | 'StringLiteral'
// | 'IfStatement'
// | 'WhileStatement'
// | 'FunctionDeclaration'
// | 'AssignmentExpression'
// | 'UnaryExpression'
// | 'Property'
// | 'LogicalExpression'
// | 'ConditionalExpression'
// | 'ForStatement'
// | 'ForInStatement'
// | 'ForOfStatement'
// | 'BreakStatement'
// | 'ContinueStatement'
// | 'SwitchStatement'
// | 'SwitchCase'
// | 'ThrowStatement'
// | 'TryStatement'
// | 'CatchClause'
// | 'ClassDeclaration'
// | 'ClassBody'
// | 'MethodDefinition'
// | 'NewExpression'
// | 'ThisExpression'
// | 'UpdateExpression'
// | 'YieldExpression'
// | 'AwaitExpression'
// | 'ImportDeclaration'
// | 'ImportSpecifier'
// | 'ImportDefaultSpecifier'
// | 'ImportNamespaceSpecifier'
// | 'ExportNamedDeclaration'
// | 'ExportDefaultDeclaration'
// | 'ExportAllDeclaration'
// | 'ExportSpecifier'
// | 'TaggedTemplateExpression'
// | 'TemplateLiteral'
// | 'TemplateElement'
// | 'SpreadElement'
// | 'RestElement'
// | 'SequenceExpression'
// | 'DebuggerStatement'
// | 'LabeledStatement'
// | 'DoWhileStatement'
// | 'WithStatement'
// | 'EmptyStatement'
// | 'ArrayPattern'
// | 'ObjectPattern'
// | 'AssignmentPattern'
// | 'MetaProperty'
// | 'Super'
// | 'Import'
// | 'RegExpLiteral'
// | 'BooleanLiteral'
// | 'NullLiteral'
// | 'TypeAnnotation'
2022-11-13 11:14:30 +11:00
export interface Program {
2022-11-26 08:34:23 +11:00
type: syntaxType
start: number
end: number
Add the ability to recast comments and some whitespace (#10) * Add the ability to recast comments and some whitespace Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting. I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in. In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so. Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm. Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either. * clean up
2023-01-23 14:50:58 +11:00
body: BodyItem[]
nonCodeMeta: NoneCodeMeta
2022-11-13 11:14:30 +11:00
}
interface GeneralStatement {
2022-11-26 08:34:23 +11:00
type: syntaxType
start: number
end: number
2022-11-13 11:14:30 +11:00
}
interface NoneCodeNode extends GeneralStatement {
type: 'NoneCodeNode'
value: string
}
interface NoneCodeMeta {
// Stores the whitespace/comments that go after the statement who's index we're using here
[statementIndex: number]: NoneCodeNode
// Which is why we also need `start` for and whitespace at the start of the file/block
start?: NoneCodeNode
}
function makeNoneCodeNode(
tokens: Token[],
index: number
): { node?: NoneCodeNode; lastIndex: number } {
const currentToken = tokens[index]
const endIndex = findEndOfNonCodeNode(tokens, index)
const nonCodeTokens = tokens.slice(index, endIndex)
let value = nonCodeTokens.map((t) => t.value).join('')
const node: NoneCodeNode = {
type: 'NoneCodeNode',
start: currentToken.start,
end: tokens[endIndex - 1].end,
value,
}
return { node, lastIndex: endIndex - 1 }
}
export function findEndOfNonCodeNode(tokens: Token[], index: number): number {
const currentToken = tokens[index]
if (isNotCodeToken(currentToken)) {
return findEndOfNonCodeNode(tokens, index + 1)
}
return index
}
export interface ExpressionStatement extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'ExpressionStatement'
expression: Value
2022-11-13 11:14:30 +11:00
}
function makeExpressionStatement(
tokens: Token[],
index: number
2022-11-17 20:17:00 +11:00
): { expression: ExpressionStatement; lastIndex: number } {
2022-11-26 08:34:23 +11:00
const currentToken = tokens[index]
const { token: nextToken } = nextMeaningfulToken(tokens, index)
if (nextToken.type === 'brace' && nextToken.value === '(') {
const { expression, lastIndex } = makeCallExpression(tokens, index)
2022-11-14 13:28:16 +11:00
return {
2022-11-17 20:17:00 +11:00
expression: {
2022-11-26 08:34:23 +11:00
type: 'ExpressionStatement',
2022-11-17 20:17:00 +11:00
start: currentToken.start,
end: expression.end,
expression,
},
lastIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-14 13:28:16 +11:00
}
2022-11-26 08:34:23 +11:00
const { expression, lastIndex } = makeBinaryExpression(tokens, index)
2022-11-13 11:14:30 +11:00
return {
2022-11-17 20:17:00 +11:00
expression: {
2022-11-26 08:34:23 +11:00
type: 'ExpressionStatement',
2022-11-17 20:17:00 +11:00
start: currentToken.start,
end: expression.end,
expression,
},
lastIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-13 11:14:30 +11:00
}
2022-11-26 19:03:09 +11:00
export interface CallExpression extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'CallExpression'
callee: Identifier
arguments: Value[]
optional: boolean
2022-11-14 13:28:16 +11:00
}
function makeCallExpression(
tokens: Token[],
index: number
): {
2022-11-26 08:34:23 +11:00
expression: CallExpression
lastIndex: number
2022-11-14 13:28:16 +11:00
} {
2022-11-26 08:34:23 +11:00
const currentToken = tokens[index]
const braceToken = nextMeaningfulToken(tokens, index)
2022-11-14 13:28:16 +11:00
// const firstArgumentToken = nextMeaningfulToken(tokens, braceToken.index);
2022-11-26 08:34:23 +11:00
const callee = makeIdentifier(tokens, index)
const args = makeArguments(tokens, braceToken.index)
2022-11-14 13:28:16 +11:00
// const closingBraceToken = nextMeaningfulToken(tokens, args.lastIndex);
2022-11-26 08:34:23 +11:00
const closingBraceToken = tokens[args.lastIndex]
2022-11-14 13:28:16 +11:00
return {
expression: {
2022-11-26 08:34:23 +11:00
type: 'CallExpression',
2022-11-14 13:28:16 +11:00
start: currentToken.start,
end: closingBraceToken.end,
callee,
arguments: args.arguments,
optional: false,
},
lastIndex: args.lastIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-14 13:28:16 +11:00
}
function makeArguments(
tokens: Token[],
index: number,
previousArgs: Value[] = []
2022-11-14 13:28:16 +11:00
): {
2022-11-26 08:34:23 +11:00
arguments: Value[]
lastIndex: number
2022-11-14 13:28:16 +11:00
} {
2022-11-26 08:34:23 +11:00
const braceOrCommaToken = tokens[index]
const argumentToken = nextMeaningfulToken(tokens, index)
2022-11-17 20:17:00 +11:00
const shouldFinishRecursion =
2022-11-26 08:34:23 +11:00
braceOrCommaToken.type === 'brace' && braceOrCommaToken.value === ')'
2022-11-14 13:28:16 +11:00
if (shouldFinishRecursion) {
return {
arguments: previousArgs,
lastIndex: index,
2022-11-26 08:34:23 +11:00
}
2022-11-14 13:28:16 +11:00
}
2022-11-26 08:34:23 +11:00
const nextBraceOrCommaToken = nextMeaningfulToken(tokens, argumentToken.index)
2022-11-17 20:17:00 +11:00
const isIdentifierOrLiteral =
2022-11-26 08:34:23 +11:00
nextBraceOrCommaToken.token.type === 'comma' ||
nextBraceOrCommaToken.token.type === 'brace'
2023-01-01 13:44:48 +11:00
if (
argumentToken.token.type === 'brace' &&
argumentToken.token.value === '['
) {
const { expression, lastIndex } = makeArrayExpression(
tokens,
argumentToken.index
)
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
tokens,
lastIndex
).index
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
...previousArgs,
expression,
])
}
if (
argumentToken.token.type === 'brace' &&
argumentToken.token.value === '{'
) {
const { expression, lastIndex } = makeObjectExpression(
tokens,
argumentToken.index
)
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
tokens,
lastIndex
).index
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
...previousArgs,
expression,
])
}
2022-11-14 13:28:16 +11:00
if (!isIdentifierOrLiteral) {
2022-11-26 08:34:23 +11:00
const { expression, lastIndex } = makeBinaryExpression(tokens, index)
return makeArguments(tokens, lastIndex, [...previousArgs, expression])
2022-11-14 13:28:16 +11:00
}
if (
argumentToken.token.type === 'operator' &&
argumentToken.token.value === '%'
) {
const value: PipeSubstitution = {
type: 'PipeSubstitution',
start: argumentToken.token.start,
end: argumentToken.token.end,
}
return makeArguments(tokens, nextBraceOrCommaToken.index, [
...previousArgs,
value,
])
}
if (
argumentToken.token.type === 'word' &&
nextBraceOrCommaToken.token.type === 'brace' &&
nextBraceOrCommaToken.token.value === '('
) {
const { expression, lastIndex } = makeCallExpression(
tokens,
argumentToken.index
)
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
tokens,
lastIndex
).index
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
...previousArgs,
expression,
])
}
2022-11-26 08:34:23 +11:00
if (argumentToken.token.type === 'word') {
const identifier = makeIdentifier(tokens, argumentToken.index)
2022-11-14 13:28:16 +11:00
return makeArguments(tokens, nextBraceOrCommaToken.index, [
...previousArgs,
identifier,
2022-11-26 08:34:23 +11:00
])
2022-11-14 13:28:16 +11:00
} else if (
2022-11-26 08:34:23 +11:00
argumentToken.token.type === 'number' ||
argumentToken.token.type === 'string'
2022-11-14 13:28:16 +11:00
) {
2022-11-26 08:34:23 +11:00
const literal = makeLiteral(tokens, argumentToken.index)
2022-11-17 20:17:00 +11:00
return makeArguments(tokens, nextBraceOrCommaToken.index, [
...previousArgs,
literal,
2022-11-26 08:34:23 +11:00
])
} else if (
argumentToken.token.type === 'brace' &&
argumentToken.token.value === ')'
) {
2022-11-20 17:43:21 +11:00
return makeArguments(tokens, argumentToken.index, previousArgs)
2022-11-14 13:28:16 +11:00
}
throw new Error('Expected a previous Argument if statement to match')
2022-11-14 13:28:16 +11:00
}
export interface VariableDeclaration extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'VariableDeclaration'
declarations: VariableDeclarator[]
kind: 'const' | 'unknown' | 'fn' | 'sketch' | 'path' //| "solid" | "surface" | "face"
2022-11-13 11:14:30 +11:00
}
function makeVariableDeclaration(
tokens: Token[],
index: number
): { declaration: VariableDeclaration; lastIndex: number } {
// token index should point to a declaration keyword i.e. const, fn, sketch, path
2022-11-26 08:34:23 +11:00
const currentToken = tokens[index]
const declarationStartToken = nextMeaningfulToken(tokens, index)
2022-11-13 11:14:30 +11:00
const { declarations, lastIndex } = makeVariableDeclarators(
tokens,
declarationStartToken.index
2022-11-26 08:34:23 +11:00
)
2022-11-13 11:14:30 +11:00
return {
declaration: {
2022-11-26 08:34:23 +11:00
type: 'VariableDeclaration',
2022-11-13 11:14:30 +11:00
start: currentToken.start,
end: declarations[declarations.length - 1].end,
2022-11-17 20:17:00 +11:00
kind:
2022-11-26 08:34:23 +11:00
currentToken.value === 'const'
? 'const'
: currentToken.value === 'fn'
? 'fn'
: currentToken.value === 'sketch'
? 'sketch'
: currentToken.value === 'path'
? 'path'
: 'unknown',
2022-11-13 11:14:30 +11:00
declarations,
},
lastIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-13 11:14:30 +11:00
}
2022-11-26 19:03:09 +11:00
export type Value =
| Literal
| Identifier
| BinaryExpression
| FunctionExpression
2022-11-20 17:43:21 +11:00
| CallExpression
2022-11-26 08:34:23 +11:00
| SketchExpression
| PipeExpression
| PipeSubstitution
2022-12-30 21:53:50 +11:00
| ArrayExpression
2023-01-01 21:48:30 +11:00
| ObjectExpression
2023-01-03 19:41:27 +11:00
| MemberExpression
function makeValue(
tokens: Token[],
index: number
): { value: Value; lastIndex: number } {
2022-11-26 08:34:23 +11:00
const currentToken = tokens[index]
const { token: nextToken } = nextMeaningfulToken(tokens, index)
// nextToken might be empty if it's at the end of the file
if (nextToken?.type === 'brace' && nextToken.value === '(') {
2022-11-26 08:34:23 +11:00
const { expression, lastIndex } = makeCallExpression(tokens, index)
return {
value: expression,
lastIndex,
2022-11-26 08:34:23 +11:00
}
}
if (
(currentToken.type === 'word' ||
currentToken.type === 'number' ||
currentToken.type === 'string') &&
nextToken?.type === 'operator'
) {
2022-11-26 08:34:23 +11:00
const { expression, lastIndex } = makeBinaryExpression(tokens, index)
return {
value: expression,
lastIndex,
2022-11-26 08:34:23 +11:00
}
}
if (currentToken.type === 'brace' && currentToken.value === '{') {
const objExp = makeObjectExpression(tokens, index)
return {
value: objExp.expression,
lastIndex: objExp.lastIndex,
}
}
if (currentToken.type === 'brace' && currentToken.value === '[') {
const arrExp = makeArrayExpression(tokens, index)
return {
value: arrExp.expression,
lastIndex: arrExp.lastIndex,
}
}
2023-01-03 19:41:27 +11:00
if (
currentToken.type === 'word' &&
(nextToken.type === 'period' ||
(nextToken.type === 'brace' && nextToken.value === '['))
) {
const memberExpression = makeMemberExpression(tokens, index)
return {
value: memberExpression.expression,
lastIndex: memberExpression.lastIndex,
}
}
2022-11-26 08:34:23 +11:00
if (currentToken.type === 'word') {
const identifier = makeIdentifier(tokens, index)
return {
value: identifier,
lastIndex: index,
2022-11-26 08:34:23 +11:00
}
}
2022-11-26 08:34:23 +11:00
if (currentToken.type === 'number' || currentToken.type === 'string') {
const literal = makeLiteral(tokens, index)
return {
value: literal,
lastIndex: index,
2022-11-26 08:34:23 +11:00
}
}
if (currentToken.type === 'brace' && currentToken.value === '(') {
const closingBraceIndex = findClosingBrace(tokens, index)
const arrowToken = nextMeaningfulToken(tokens, closingBraceIndex)
if (
arrowToken.token.type === 'operator' &&
arrowToken.token.value === '=>'
) {
const { expression, lastIndex: arrowFunctionLastIndex } =
makeFunctionExpression(tokens, index)
return {
value: expression,
lastIndex: arrowFunctionLastIndex,
}
} else {
throw new Error('TODO - handle expression with braces')
}
}
throw new Error('Expected a previous Value if statement to match')
}
2023-01-04 01:28:26 +11:00
export interface VariableDeclarator extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'VariableDeclarator'
id: Identifier
init: Value
2022-11-13 11:14:30 +11:00
}
function makeVariableDeclarators(
tokens: Token[],
index: number,
previousDeclarators: VariableDeclarator[] = []
): {
2022-11-26 08:34:23 +11:00
declarations: VariableDeclarator[]
lastIndex: number
2022-11-13 11:14:30 +11:00
} {
2022-11-26 08:34:23 +11:00
const currentToken = tokens[index]
const assignmentToken = nextMeaningfulToken(tokens, index)
const declarationToken = previousMeaningfulToken(tokens, index)
const contentsStartToken = nextMeaningfulToken(tokens, assignmentToken.index)
const pipeStartIndex =
assignmentToken?.token?.type === 'operator'
? contentsStartToken.index
: assignmentToken.index
const nextPipeOperator = hasPipeOperator(tokens, pipeStartIndex)
2022-11-26 08:34:23 +11:00
let init: Value
let lastIndex = contentsStartToken.index
if (nextPipeOperator) {
const { expression, lastIndex: pipeLastIndex } = makePipeExpression(
tokens,
assignmentToken.index
)
init = expression
lastIndex = pipeLastIndex
2022-11-20 17:43:21 +11:00
} else if (
2022-11-26 08:34:23 +11:00
declarationToken.token.type === 'word' &&
declarationToken.token.value === 'sketch'
2022-11-20 17:43:21 +11:00
) {
2022-11-26 08:34:23 +11:00
const sketchExp = makeSketchExpression(tokens, assignmentToken.index)
init = sketchExp.expression
lastIndex = sketchExp.lastIndex
} else {
const { value, lastIndex: valueLastIndex } = makeValue(
2022-12-30 21:53:50 +11:00
tokens,
contentsStartToken.index
)
init = value
lastIndex = valueLastIndex
2022-11-13 11:14:30 +11:00
}
const currentDeclarator: VariableDeclarator = {
2022-11-26 08:34:23 +11:00
type: 'VariableDeclarator',
2022-11-13 11:14:30 +11:00
start: currentToken.start,
end: tokens[lastIndex].end,
id: makeIdentifier(tokens, index),
init,
2022-11-26 08:34:23 +11:00
}
2022-11-13 11:14:30 +11:00
return {
declarations: [...previousDeclarators, currentDeclarator],
lastIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-13 11:14:30 +11:00
}
export type BinaryPart = Literal | Identifier | BinaryExpression
2022-11-13 11:14:30 +11:00
// | CallExpression
// | MemberExpression
// | ArrayExpression
// | ObjectExpression
// | UnaryExpression
// | LogicalExpression
// | ConditionalExpression
2022-11-26 19:03:09 +11:00
export interface Literal extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'Literal'
value: string | number | boolean | null
raw: string
2022-11-13 11:14:30 +11:00
}
export interface Identifier extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'Identifier'
name: string
2022-11-13 11:14:30 +11:00
}
function makeIdentifier(token: Token[], index: number): Identifier {
2022-11-26 08:34:23 +11:00
const currentToken = token[index]
2022-11-13 11:14:30 +11:00
return {
2022-11-26 08:34:23 +11:00
type: 'Identifier',
2022-11-13 11:14:30 +11:00
start: currentToken.start,
end: currentToken.end,
name: currentToken.value,
2022-11-26 08:34:23 +11:00
}
2022-11-13 11:14:30 +11:00
}
interface PipeSubstitution extends GeneralStatement {
type: 'PipeSubstitution'
}
2022-11-13 11:14:30 +11:00
function makeLiteral(tokens: Token[], index: number): Literal {
2022-11-26 08:34:23 +11:00
const token = tokens[index]
2022-11-14 13:28:16 +11:00
const value =
2022-11-26 08:34:23 +11:00
token.type === 'number' ? Number(token.value) : token.value.slice(1, -1)
2022-11-13 11:14:30 +11:00
return {
2022-11-26 08:34:23 +11:00
type: 'Literal',
2022-11-13 11:14:30 +11:00
start: token.start,
end: token.end,
value,
raw: token.value,
2022-11-26 08:34:23 +11:00
}
2022-11-13 11:14:30 +11:00
}
2022-12-30 21:53:50 +11:00
export interface ArrayExpression extends GeneralStatement {
type: 'ArrayExpression'
elements: Value[]
}
function makeArrayElements(
tokens: Token[],
index: number,
previousElements: Value[] = []
): { elements: ArrayExpression['elements']; lastIndex: number } {
// should be called with the first token after the opening brace
const firstElementToken = tokens[index]
if (firstElementToken.type === 'brace' && firstElementToken.value === ']') {
return {
elements: previousElements,
lastIndex: index,
}
}
const currentElement = makeValue(tokens, index)
const nextToken = nextMeaningfulToken(tokens, currentElement.lastIndex)
const isClosingBrace =
nextToken.token.type === 'brace' && nextToken.token.value === ']'
const isComma = nextToken.token.type === 'comma'
if (!isClosingBrace && !isComma) {
throw new Error('Expected a comma or closing brace')
}
const nextCallIndex = isClosingBrace
? nextToken.index
: nextMeaningfulToken(tokens, nextToken.index).index
return makeArrayElements(tokens, nextCallIndex, [
...previousElements,
currentElement.value,
])
}
function makeArrayExpression(
tokens: Token[],
index: number
): {
expression: ArrayExpression
lastIndex: number
} {
// should be called array opening brace '[' index
const openingBraceToken = tokens[index]
const firstElementToken = nextMeaningfulToken(tokens, index)
const { elements, lastIndex } = makeArrayElements(
tokens,
firstElementToken.index
)
return {
expression: {
type: 'ArrayExpression',
start: openingBraceToken.start,
end: tokens[lastIndex].end,
elements,
},
lastIndex,
}
}
2023-01-01 21:48:30 +11:00
export interface ObjectExpression extends GeneralStatement {
type: 'ObjectExpression'
properties: ObjectProperty[]
}
interface ObjectProperty extends GeneralStatement {
type: 'ObjectProperty'
key: Identifier
value: Value
}
function makeObjectExpression(
tokens: Token[],
index: number
): {
expression: ObjectExpression
lastIndex: number
} {
// should be called with the opening brace '{' index
const openingBraceToken = tokens[index]
const firstPropertyToken = nextMeaningfulToken(tokens, index)
const { properties, lastIndex } = makeObjectProperties(
tokens,
firstPropertyToken.index
)
return {
expression: {
type: 'ObjectExpression',
start: openingBraceToken.start,
end: tokens[lastIndex].end,
properties,
},
lastIndex,
}
}
function makeObjectProperties(
tokens: Token[],
index: number,
previousProperties: ObjectProperty[] = []
): { properties: ObjectProperty[]; lastIndex: number } {
// should be called with the key after the opening brace '{'
const propertyKeyToken = tokens[index]
if (propertyKeyToken.type === 'brace' && propertyKeyToken.value === '}') {
return {
properties: previousProperties,
lastIndex: index,
}
}
const colonToken = nextMeaningfulToken(tokens, index)
const valueStartToken = nextMeaningfulToken(tokens, colonToken.index)
const val = makeValue(tokens, valueStartToken.index)
const value = val.value
const valueLastIndex = val.lastIndex
const commaOrClosingBraceToken = nextMeaningfulToken(tokens, valueLastIndex)
2023-01-01 21:48:30 +11:00
let objectProperty: ObjectProperty = {
type: 'ObjectProperty',
start: propertyKeyToken.start,
end: value.end,
2023-01-01 21:48:30 +11:00
key: makeIdentifier(tokens, index),
value,
2023-01-01 21:48:30 +11:00
}
const nextKeyToken = nextMeaningfulToken(
tokens,
commaOrClosingBraceToken.index
)
const nextKeyIndex =
commaOrClosingBraceToken.token.type === 'brace' &&
commaOrClosingBraceToken.token.value === '}'
? commaOrClosingBraceToken.index
: nextKeyToken.index
return makeObjectProperties(tokens, nextKeyIndex, [
...previousProperties,
objectProperty,
])
}
2023-01-03 19:41:27 +11:00
export interface MemberExpression extends GeneralStatement {
type: 'MemberExpression'
object: MemberExpression | Identifier
property: Identifier | Literal
computed: boolean
}
function makeMemberExpression(
tokens: Token[],
index: number
): { expression: MemberExpression; lastIndex: number } {
const currentToken = tokens[index]
const keysInfo = collectObjectKeys(tokens, index)
const lastKey = keysInfo[keysInfo.length - 1]
const firstKey = keysInfo.shift()
if (!firstKey) throw new Error('Expected a key')
const root = makeIdentifier(tokens, index)
let memberExpression: MemberExpression = {
type: 'MemberExpression',
start: currentToken.start,
end: tokens[firstKey.index].end,
object: root,
property: firstKey.key,
computed: firstKey.computed,
}
keysInfo.forEach(({ key, computed, index }, i) => {
const endToken = tokens[index]
memberExpression = {
type: 'MemberExpression',
start: currentToken.start,
end: endToken.end,
object: memberExpression,
property: key,
computed,
}
})
return {
expression: memberExpression,
lastIndex: lastKey.index,
}
}
interface ObjectKeyInfo {
key: Identifier | Literal
index: number
computed: boolean
}
function collectObjectKeys(
tokens: Token[],
index: number,
previousKeys: ObjectKeyInfo[] = []
): ObjectKeyInfo[] {
const nextToken = nextMeaningfulToken(tokens, index)
const periodOrOpeningBracketToken =
nextToken?.token?.type === 'brace' && nextToken.token.value === ']'
? nextMeaningfulToken(tokens, nextToken.index)
: nextToken
if (
periodOrOpeningBracketToken?.token?.type !== 'period' &&
periodOrOpeningBracketToken?.token?.type !== 'brace'
) {
return previousKeys
}
const keyToken = nextMeaningfulToken(
tokens,
periodOrOpeningBracketToken.index
)
const nextPeriodOrOpeningBracketToken = nextMeaningfulToken(
tokens,
keyToken.index
)
const isBraced =
nextPeriodOrOpeningBracketToken?.token?.type === 'brace' &&
nextPeriodOrOpeningBracketToken?.token?.value === ']'
const endIndex = isBraced
? nextPeriodOrOpeningBracketToken.index
: keyToken.index
const key =
keyToken.token.type === 'word'
? makeIdentifier(tokens, keyToken.index)
: makeLiteral(tokens, keyToken.index)
const computed = isBraced && keyToken.token.type === 'word' ? true : false
return collectObjectKeys(tokens, keyToken.index, [
...previousKeys,
{
key,
index: endIndex,
computed,
},
])
}
export interface BinaryExpression extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'BinaryExpression'
operator: string
left: BinaryPart
right: BinaryPart
2022-11-13 11:14:30 +11:00
}
export function findEndOfBinaryExpression(
tokens: Token[],
index: number
): number {
const currentToken = tokens[index]
if (currentToken.type === 'brace' && currentToken.value === '(') {
const closingParenthesis = findClosingBrace(tokens, index)
const maybeAnotherOperator = nextMeaningfulToken(tokens, closingParenthesis)
if (
maybeAnotherOperator?.token?.type !== 'operator' ||
maybeAnotherOperator?.token?.value === '|>'
) {
return closingParenthesis
}
const nextRight = nextMeaningfulToken(tokens, maybeAnotherOperator.index)
return findEndOfBinaryExpression(tokens, nextRight.index)
}
const maybeOperator = nextMeaningfulToken(tokens, index)
if (
maybeOperator?.token?.type !== 'operator' ||
maybeOperator?.token?.value === '|>'
) {
return index
}
const nextRight = nextMeaningfulToken(tokens, maybeOperator.index)
return findEndOfBinaryExpression(tokens, nextRight.index)
}
function makeBinaryExpression(
tokens: Token[],
index: number
): { expression: BinaryExpression; lastIndex: number } {
const endIndex = findEndOfBinaryExpression(tokens, index)
const expression = parseExpression(tokens.slice(index, endIndex + 1))
2022-11-13 11:14:30 +11:00
return {
expression,
lastIndex: endIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-13 11:14:30 +11:00
}
2022-11-26 19:03:09 +11:00
export interface SketchExpression extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'SketchExpression'
body: BlockStatement
2022-11-20 17:43:21 +11:00
}
function makeSketchExpression(
tokens: Token[],
index: number
): { expression: SketchExpression; lastIndex: number } {
2022-11-26 08:34:23 +11:00
const currentToken = tokens[index]
const { block, lastIndex: bodyLastIndex } = makeBlockStatement(tokens, index)
const endToken = tokens[bodyLastIndex]
2022-11-20 17:43:21 +11:00
return {
expression: {
2022-11-26 08:34:23 +11:00
type: 'SketchExpression',
2022-11-20 17:43:21 +11:00
start: currentToken.start,
2022-11-26 08:34:23 +11:00
end: endToken.end,
2022-11-20 17:43:21 +11:00
body: block,
},
lastIndex: bodyLastIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-20 17:43:21 +11:00
}
export interface PipeExpression extends GeneralStatement {
type: 'PipeExpression'
body: Value[]
nonCodeMeta: NoneCodeMeta
}
function makePipeExpression(
tokens: Token[],
index: number
): { expression: PipeExpression; lastIndex: number } {
const currentToken = tokens[index]
const {
body,
lastIndex: bodyLastIndex,
nonCodeMeta,
} = makePipeBody(tokens, index)
const endToken = tokens[bodyLastIndex]
return {
expression: {
type: 'PipeExpression',
start: currentToken.start,
end: endToken.end,
body,
nonCodeMeta,
},
lastIndex: bodyLastIndex,
}
}
function makePipeBody(
tokens: Token[],
index: number,
previousValues: Value[] = [],
previousNonCodeMeta: NoneCodeMeta = {}
): { body: Value[]; lastIndex: number; nonCodeMeta: NoneCodeMeta } {
const nonCodeMeta = { ...previousNonCodeMeta }
const currentToken = tokens[index]
const expressionStart = nextMeaningfulToken(tokens, index)
let value: Value
let lastIndex: number
if (currentToken.type === 'operator') {
2022-12-30 21:53:50 +11:00
const val = makeValue(tokens, expressionStart.index)
value = val.value
lastIndex = val.lastIndex
} else if (currentToken.type === 'brace' && currentToken.value === '{') {
const sketch = makeSketchExpression(tokens, index)
value = sketch.expression
lastIndex = sketch.lastIndex
} else {
throw new Error('Expected a previous PipeValue if statement to match')
}
const nextPipeToken = hasPipeOperator(tokens, index)
if (!nextPipeToken) {
return {
body: [...previousValues, value],
lastIndex,
nonCodeMeta,
}
}
if (nextPipeToken.bonusNonCodeNode) {
nonCodeMeta[previousValues.length] = nextPipeToken.bonusNonCodeNode
}
return makePipeBody(
tokens,
nextPipeToken.index,
[...previousValues, value],
nonCodeMeta
)
}
2022-11-26 19:03:09 +11:00
export interface FunctionExpression extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'FunctionExpression'
id: Identifier | null
params: Identifier[]
body: BlockStatement
2022-11-17 20:17:00 +11:00
}
function makeFunctionExpression(
tokens: Token[],
index: number
): { expression: FunctionExpression; lastIndex: number } {
2022-11-26 08:34:23 +11:00
const currentToken = tokens[index]
const closingBraceIndex = findClosingBrace(tokens, index)
const arrowToken = nextMeaningfulToken(tokens, closingBraceIndex)
const bodyStartToken = nextMeaningfulToken(tokens, arrowToken.index)
const { params } = makeParams(tokens, index)
2022-11-17 20:17:00 +11:00
const { block, lastIndex: bodyLastIndex } = makeBlockStatement(
tokens,
bodyStartToken.index
2022-11-26 08:34:23 +11:00
)
2022-11-17 20:17:00 +11:00
return {
expression: {
2022-11-26 08:34:23 +11:00
type: 'FunctionExpression',
2022-11-17 20:17:00 +11:00
start: currentToken.start,
end: tokens[bodyLastIndex].end,
id: null,
params,
body: block,
},
lastIndex: bodyLastIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-17 20:17:00 +11:00
}
function makeParams(
tokens: Token[],
index: number,
previousParams: Identifier[] = []
): { params: Identifier[]; lastIndex: number } {
2022-11-26 08:34:23 +11:00
const braceOrCommaToken = tokens[index]
const argumentToken = nextMeaningfulToken(tokens, index)
const shouldFinishRecursion =
2022-11-26 08:34:23 +11:00
(argumentToken.token.type === 'brace' &&
argumentToken.token.value === ')') ||
(braceOrCommaToken.type === 'brace' && braceOrCommaToken.value === ')')
if (shouldFinishRecursion) {
2022-11-26 08:34:23 +11:00
return { params: previousParams, lastIndex: index }
}
2022-11-26 08:34:23 +11:00
const nextBraceOrCommaToken = nextMeaningfulToken(tokens, argumentToken.index)
const identifier = makeIdentifier(tokens, argumentToken.index)
return makeParams(tokens, nextBraceOrCommaToken.index, [
...previousParams,
identifier,
2022-11-26 08:34:23 +11:00
])
}
export interface BlockStatement extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'BlockStatement'
Add the ability to recast comments and some whitespace (#10) * Add the ability to recast comments and some whitespace Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting. I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in. In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so. Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm. Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either. * clean up
2023-01-23 14:50:58 +11:00
body: BodyItem[]
nonCodeMeta: NoneCodeMeta
2022-11-17 20:17:00 +11:00
}
function makeBlockStatement(
tokens: Token[],
index: number
): { block: BlockStatement; lastIndex: number } {
2022-11-26 08:34:23 +11:00
const openingCurly = tokens[index]
const nextToken = { token: tokens[index + 1], index: index + 1 }
const { body, lastIndex, nonCodeMeta } =
2022-11-26 08:34:23 +11:00
nextToken.token.value === '}'
? { body: [], lastIndex: nextToken.index, nonCodeMeta: {} }
2022-11-26 08:34:23 +11:00
: makeBody({ tokens, tokenIndex: nextToken.index })
2022-11-17 20:17:00 +11:00
return {
block: {
2022-11-26 08:34:23 +11:00
type: 'BlockStatement',
2022-11-17 20:17:00 +11:00
start: openingCurly.start,
end: tokens[lastIndex]?.end || 0,
2022-11-17 20:17:00 +11:00
body,
nonCodeMeta,
2022-11-17 20:17:00 +11:00
},
lastIndex,
2022-11-26 08:34:23 +11:00
}
2022-11-17 20:17:00 +11:00
}
interface ReturnStatement extends GeneralStatement {
2022-11-26 08:34:23 +11:00
type: 'ReturnStatement'
argument: Value
}
function makeReturnStatement(
tokens: Token[],
index: number
): { statement: ReturnStatement; lastIndex: number } {
2022-11-26 08:34:23 +11:00
const currentToken = tokens[index]
const nextToken = nextMeaningfulToken(tokens, index)
const { value, lastIndex } = makeValue(tokens, nextToken.index)
return {
statement: {
2022-11-26 08:34:23 +11:00
type: 'ReturnStatement',
start: currentToken.start,
end: tokens[lastIndex].end,
argument: value,
},
lastIndex,
2022-11-26 08:34:23 +11:00
}
}
2022-11-26 08:34:23 +11:00
export type All = Program | ExpressionStatement[] | BinaryExpression | Literal
2022-11-13 11:14:30 +11:00
export function nextMeaningfulToken(
2022-11-13 11:14:30 +11:00
tokens: Token[],
index: number,
offset: number = 1
): { token: Token; index: number; bonusNonCodeNode?: NoneCodeNode } {
2022-11-26 08:34:23 +11:00
const newIndex = index + offset
const token = tokens[newIndex]
2022-11-13 11:14:30 +11:00
if (!token) {
2022-11-26 08:34:23 +11:00
return { token, index: tokens.length }
2022-11-13 11:14:30 +11:00
}
Add the ability to recast comments and some whitespace (#10) * Add the ability to recast comments and some whitespace Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting. I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in. In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so. Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm. Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either. * clean up
2023-01-23 14:50:58 +11:00
if (isNotCodeToken(token)) {
const nonCodeNode = makeNoneCodeNode(tokens, newIndex)
const newnewIndex = nonCodeNode.lastIndex + 1
return {
token: tokens[newnewIndex],
index: newnewIndex,
bonusNonCodeNode: nonCodeNode?.node?.value ? nonCodeNode.node : undefined,
}
2022-11-13 11:14:30 +11:00
}
2022-11-26 08:34:23 +11:00
return { token, index: newIndex }
2022-11-13 11:14:30 +11:00
}
2022-11-20 17:43:21 +11:00
function previousMeaningfulToken(
tokens: Token[],
index: number,
offset: number = 1
): { token: Token; index: number } {
2022-11-26 08:34:23 +11:00
const newIndex = index - offset
const token = tokens[newIndex]
2022-11-20 17:43:21 +11:00
if (!token) {
2022-11-26 08:34:23 +11:00
return { token, index: 0 }
2022-11-20 17:43:21 +11:00
}
Add the ability to recast comments and some whitespace (#10) * Add the ability to recast comments and some whitespace Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting. I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in. In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so. Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm. Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either. * clean up
2023-01-23 14:50:58 +11:00
if (isNotCodeToken(token)) {
2022-11-26 08:34:23 +11:00
return previousMeaningfulToken(tokens, index, offset + 1)
2022-11-20 17:43:21 +11:00
}
2022-11-26 08:34:23 +11:00
return { token, index: newIndex }
2022-11-20 17:43:21 +11:00
}
type BodyItem = ExpressionStatement | VariableDeclaration | ReturnStatement
2022-11-13 11:14:30 +11:00
2022-11-17 20:17:00 +11:00
function makeBody(
2022-11-20 17:43:21 +11:00
{
tokens,
tokenIndex = 0,
}: {
2022-11-26 08:34:23 +11:00
tokens: Token[]
tokenIndex?: number
2022-11-20 17:43:21 +11:00
},
previousBody: BodyItem[] = [],
previousNonCodeMeta: NoneCodeMeta = {}
): { body: BodyItem[]; lastIndex: number; nonCodeMeta: NoneCodeMeta } {
const nonCodeMeta = { ...previousNonCodeMeta }
2022-11-17 20:17:00 +11:00
if (tokenIndex >= tokens.length) {
return { body: previousBody, lastIndex: tokenIndex, nonCodeMeta }
2022-11-17 20:17:00 +11:00
}
2022-11-26 08:34:23 +11:00
const token = tokens[tokenIndex]
if (token.type === 'brace' && token.value === '}') {
return { body: previousBody, lastIndex: tokenIndex, nonCodeMeta }
2022-11-17 20:17:00 +11:00
}
Add the ability to recast comments and some whitespace (#10) * Add the ability to recast comments and some whitespace Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting. I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in. In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so. Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm. Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either. * clean up
2023-01-23 14:50:58 +11:00
if (isNotCodeToken(token)) {
const nextToken = nextMeaningfulToken(tokens, tokenIndex, 0)
if (nextToken.bonusNonCodeNode) {
if (previousBody.length === 0) {
nonCodeMeta.start = nextToken.bonusNonCodeNode
} else {
nonCodeMeta[previousBody.length] = nextToken.bonusNonCodeNode
}
}
return makeBody(
{ tokens, tokenIndex: nextToken.index },
previousBody,
nonCodeMeta
)
2022-11-17 20:17:00 +11:00
}
2022-11-26 08:34:23 +11:00
const nextToken = nextMeaningfulToken(tokens, tokenIndex)
nextToken.bonusNonCodeNode &&
(nonCodeMeta[previousBody.length] = nextToken.bonusNonCodeNode)
2022-11-17 20:17:00 +11:00
if (
2022-11-26 08:34:23 +11:00
token.type === 'word' &&
(token.value === 'const' ||
token.value === 'fn' ||
token.value === 'sketch' ||
token.value === 'path')
2022-11-17 20:17:00 +11:00
) {
const { declaration, lastIndex } = makeVariableDeclaration(
tokens,
tokenIndex
2022-11-26 08:34:23 +11:00
)
const nextThing = nextMeaningfulToken(tokens, lastIndex)
nextThing.bonusNonCodeNode &&
(nonCodeMeta[previousBody.length] = nextThing.bonusNonCodeNode)
return makeBody(
{ tokens, tokenIndex: nextThing.index },
[...previousBody, declaration],
nonCodeMeta
)
2022-11-17 20:17:00 +11:00
}
2022-11-26 08:34:23 +11:00
if (token.type === 'word' && token.value === 'return') {
const { statement, lastIndex } = makeReturnStatement(tokens, tokenIndex)
const nextThing = nextMeaningfulToken(tokens, lastIndex)
nextThing.bonusNonCodeNode &&
(nonCodeMeta[previousBody.length] = nextThing.bonusNonCodeNode)
return makeBody(
{ tokens, tokenIndex: nextThing.index },
[...previousBody, statement],
nonCodeMeta
)
}
2022-11-20 17:43:21 +11:00
if (
2022-11-26 08:34:23 +11:00
token.type === 'word' &&
nextToken.token.type === 'brace' &&
nextToken.token.value === '('
2022-11-20 17:43:21 +11:00
) {
2022-11-17 20:17:00 +11:00
const { expression, lastIndex } = makeExpressionStatement(
tokens,
tokenIndex
2022-11-26 08:34:23 +11:00
)
const nextThing = nextMeaningfulToken(tokens, lastIndex)
if (nextThing.bonusNonCodeNode) {
nonCodeMeta[previousBody.length] = nextThing.bonusNonCodeNode
}
return makeBody(
{ tokens, tokenIndex: nextThing.index },
[...previousBody, expression],
nonCodeMeta
)
2022-11-17 20:17:00 +11:00
}
const nextThing = nextMeaningfulToken(tokens, tokenIndex)
2022-11-17 20:17:00 +11:00
if (
2022-11-26 08:34:23 +11:00
(token.type === 'number' || token.type === 'word') &&
nextThing.token.type === 'operator'
2022-11-17 20:17:00 +11:00
) {
if (nextThing.bonusNonCodeNode) {
nonCodeMeta[previousBody.length] = nextThing.bonusNonCodeNode
}
2022-11-17 20:17:00 +11:00
const { expression, lastIndex } = makeExpressionStatement(
tokens,
tokenIndex
2022-11-26 08:34:23 +11:00
)
return {
body: [...previousBody, expression],
nonCodeMeta: nonCodeMeta,
lastIndex,
}
2022-11-17 20:17:00 +11:00
}
2022-11-26 08:34:23 +11:00
throw new Error('Unexpected token')
2022-11-17 20:17:00 +11:00
}
2022-11-13 11:14:30 +11:00
export const abstractSyntaxTree = (tokens: Token[]): Program => {
const { body, nonCodeMeta } = makeBody({ tokens })
2022-11-13 11:14:30 +11:00
const program: Program = {
2022-11-26 08:34:23 +11:00
type: 'Program',
2022-11-13 11:14:30 +11:00
start: 0,
end: body[body.length - 1].end,
body: body,
nonCodeMeta,
2022-11-26 08:34:23 +11:00
}
return program
}
2022-11-17 16:06:38 +11:00
export function findNextDeclarationKeyword(
tokens: Token[],
index: number
): { token: Token | null; index: number } {
const nextToken = nextMeaningfulToken(tokens, index)
if (nextToken.index >= tokens.length) {
return { token: null, index: tokens.length - 1 }
}
if (
nextToken.token.type === 'word' &&
(nextToken.token.value === 'const' ||
nextToken.token.value === 'fn' ||
nextToken.token.value === 'sketch' ||
nextToken.token.value === 'path')
) {
return nextToken
}
if (nextToken.token.type === 'brace' && nextToken.token.value === '(') {
const closingBraceIndex = findClosingBrace(tokens, nextToken.index)
const arrowToken = nextMeaningfulToken(tokens, closingBraceIndex)
if (
arrowToken?.token?.type === 'operator' &&
arrowToken.token.value === '=>'
) {
return nextToken
}
// probably should do something else here
// throw new Error('Unexpected token')
}
return findNextDeclarationKeyword(tokens, nextToken.index)
}
export function findNextCallExpression(
tokens: Token[],
index: number
): { token: Token | null; index: number } {
const nextToken = nextMeaningfulToken(tokens, index)
const veryNextToken = tokens[nextToken.index + 1] // i.e. without whitespace
if (nextToken.index >= tokens.length) {
return { token: null, index: tokens.length - 1 }
}
if (
nextToken.token.type === 'word' &&
veryNextToken?.type === 'brace' &&
veryNextToken?.value === '('
) {
return nextToken
}
return findNextCallExpression(tokens, nextToken.index)
}
export function findNextClosingCurlyBrace(
tokens: Token[],
index: number
): { token: Token | null; index: number } {
const nextToken = nextMeaningfulToken(tokens, index)
if (nextToken.index >= tokens.length) {
return { token: null, index: tokens.length - 1 }
}
if (nextToken.token.type === 'brace' && nextToken.token.value === '}') {
return nextToken
}
if (nextToken.token.type === 'brace' && nextToken.token.value === '{') {
const closingBraceIndex = findClosingBrace(tokens, nextToken.index)
const tokenAfterClosingBrace = nextMeaningfulToken(
tokens,
closingBraceIndex
)
return findNextClosingCurlyBrace(tokens, tokenAfterClosingBrace.index)
}
return findNextClosingCurlyBrace(tokens, nextToken.index)
}
export function hasPipeOperator(
tokens: Token[],
index: number,
_limitIndex = -1
): ReturnType<typeof nextMeaningfulToken> | false {
// this probably still needs some work
// should be called on expression statuments (i.e "lineTo" for lineTo(10, 10)) or "{" for sketch declarations
let limitIndex = _limitIndex
if (limitIndex === -1) {
const callExpressionEnd = isCallExpression(tokens, index)
if (callExpressionEnd !== -1) {
const tokenAfterCallExpression = nextMeaningfulToken(
tokens,
callExpressionEnd
)
if (
tokenAfterCallExpression?.token?.type === 'operator' &&
tokenAfterCallExpression.token.value === '|>'
) {
return tokenAfterCallExpression
}
return false
}
const currentToken = tokens[index]
if (currentToken?.type === 'brace' && currentToken?.value === '{') {
const closingBraceIndex = findClosingBrace(tokens, index)
const tokenAfterClosingBrace = nextMeaningfulToken(
tokens,
closingBraceIndex
)
if (
tokenAfterClosingBrace?.token?.type === 'operator' &&
tokenAfterClosingBrace.token.value === '|>'
) {
return tokenAfterClosingBrace
}
return false
}
const nextDeclaration = findNextDeclarationKeyword(tokens, index)
limitIndex = nextDeclaration.index
}
const nextToken = nextMeaningfulToken(tokens, index)
if (nextToken.index >= limitIndex) {
return false
}
if (nextToken.token.type === 'operator' && nextToken.token.value === '|>') {
return nextToken
}
return hasPipeOperator(tokens, nextToken.index, limitIndex)
}
2022-11-17 16:06:38 +11:00
export function findClosingBrace(
tokens: Token[],
index: number,
_braceCount: number = 0,
2022-11-26 08:34:23 +11:00
_searchOpeningBrace: string = ''
2022-11-17 16:06:38 +11:00
): number {
// should be called with the index of the opening brace
2022-11-17 16:06:38 +11:00
const closingBraceMap: { [key: string]: string } = {
2022-11-26 08:34:23 +11:00
'(': ')',
'{': '}',
'[': ']',
}
const currentToken = tokens[index]
let searchOpeningBrace = _searchOpeningBrace
const isFirstCall = !searchOpeningBrace && _braceCount === 0
2022-11-17 16:06:38 +11:00
if (isFirstCall) {
2022-11-26 08:34:23 +11:00
searchOpeningBrace = currentToken.value
if (!['(', '{', '['].includes(searchOpeningBrace)) {
2022-11-17 16:06:38 +11:00
throw new Error(
`expected to be started on a opening brace ( { [, instead found '${searchOpeningBrace}'`
2022-11-26 08:34:23 +11:00
)
2022-11-17 16:06:38 +11:00
}
}
const foundClosingBrace =
_braceCount === 1 &&
2022-11-26 08:34:23 +11:00
currentToken.value === closingBraceMap[searchOpeningBrace]
const foundAnotherOpeningBrace = currentToken.value === searchOpeningBrace
2022-11-17 16:06:38 +11:00
const foundAnotherClosingBrace =
2022-11-26 08:34:23 +11:00
currentToken.value === closingBraceMap[searchOpeningBrace]
2022-11-17 16:06:38 +11:00
if (foundClosingBrace) {
2022-11-26 08:34:23 +11:00
return index
2022-11-17 16:06:38 +11:00
}
if (foundAnotherOpeningBrace) {
return findClosingBrace(
tokens,
index + 1,
_braceCount + 1,
searchOpeningBrace
2022-11-26 08:34:23 +11:00
)
2022-11-17 16:06:38 +11:00
}
if (foundAnotherClosingBrace) {
return findClosingBrace(
tokens,
index + 1,
_braceCount - 1,
searchOpeningBrace
2022-11-26 08:34:23 +11:00
)
2022-11-17 16:06:38 +11:00
}
// non-brace token, increment and continue
2022-11-26 08:34:23 +11:00
return findClosingBrace(tokens, index + 1, _braceCount, searchOpeningBrace)
2022-11-17 16:06:38 +11:00
}
// function findOpeningBrace(
// tokens: Token[],
// index: number,
// _braceCount: number = 0,
// _searchClosingBrace: string = ''
// ): number {
// // should be called with the index of the opening brace
// const closingBraceMap: { [key: string]: string } = {
// ')': '(',
// '}': '{',
// ']': '[',
// }
// const currentToken = tokens[index]
// let searchClosingBrace = _searchClosingBrace
// const isFirstCall = !searchClosingBrace && _braceCount === 0
// if (isFirstCall) {
// searchClosingBrace = currentToken.value
// if (![')', '}', ']'].includes(searchClosingBrace)) {
// throw new Error(
// `expected to be started on a opening brace ( { [, instead found '${searchClosingBrace}'`
// )
// }
// }
// const foundOpeningBrace =
// _braceCount === 1 &&
// currentToken.value === closingBraceMap[searchClosingBrace]
// const foundAnotherClosingBrace = currentToken.value === searchClosingBrace
// const foundAnotherOpeningBrace =
// currentToken.value === closingBraceMap[searchClosingBrace]
// if (foundOpeningBrace) {
// return index
// }
// if (foundAnotherClosingBrace) {
// return findOpeningBrace(
// tokens,
// index - 1,
// _braceCount + 1,
// searchClosingBrace
// )
// }
// if (foundAnotherOpeningBrace) {
// return findOpeningBrace(
// tokens,
// index - 1,
// _braceCount - 1,
// searchClosingBrace
// )
// }
// // non-brace token, increment and continue
// return findOpeningBrace(tokens, index - 1, _braceCount, searchClosingBrace)
// }
function isCallExpression(tokens: Token[], index: number): number {
const currentToken = tokens[index]
const veryNextToken = tokens[index + 1] // i.e. no whitespace
if (
currentToken.type === 'word' &&
veryNextToken.type === 'brace' &&
veryNextToken.value === '('
) {
return findClosingBrace(tokens, index + 1)
}
return -1
}
function debuggerr(tokens: Token[], indexes: number[], msg = ''): string {
// return ''
const sortedIndexes = [...indexes].sort((a, b) => a - b)
const min = Math.min(...indexes)
const start = Math.min(Math.abs(min - 1), 0)
const max = Math.max(...indexes)
const end = Math.min(Math.abs(max + 1), tokens.length)
const debugTokens = tokens.slice(start, end)
const debugIndexes = indexes.map((i) => i - start)
const debugStrings: [string, string][] = debugTokens.map((token, index) => {
if (debugIndexes.includes(index)) {
return [
`${token.value.replaceAll('\n', ' ')}`,
'^'.padEnd(token.value.length, '_'),
]
}
return [
token.value.replaceAll('\n', ' '),
' '.padEnd(token.value.length, ' '),
]
})
let topString = ''
let bottomString = ''
debugStrings.forEach(([top, bottom]) => {
topString += top
bottomString += bottom
})
const debugResult = [
`${msg} - debuggerr: ${sortedIndexes}`,
topString,
bottomString,
].join('\n')
2022-12-04 08:16:04 +11:00
console.log(debugResult)
return debugResult
}
export function getNodeFromPath<T>(
2022-12-06 05:40:05 +11:00
node: Program,
path: (string | number)[],
2023-01-10 15:40:34 +11:00
stopAt: string = '',
returnEarly = false
): {
node: T
path: PathToNode
} {
let currentNode = node as any
2022-12-06 05:40:05 +11:00
let stopAtNode = null
let successfulPaths: PathToNode = []
let pathsExplored: PathToNode = []
for (const pathItem of path) {
try {
if (typeof currentNode[pathItem] !== 'object')
throw new Error('not an object')
currentNode = currentNode[pathItem]
successfulPaths.push(pathItem)
if (!stopAtNode) {
pathsExplored.push(pathItem)
}
2022-12-06 05:40:05 +11:00
if (currentNode.type === stopAt) {
// it will match the deepest node of the type
// instead of returning at the first match
stopAtNode = currentNode
2023-01-10 15:40:34 +11:00
if (returnEarly) {
return {
node: stopAtNode,
path: pathsExplored,
}
2023-01-10 15:40:34 +11:00
}
2022-12-06 05:40:05 +11:00
}
} catch (e) {
throw new Error(
`Could not find path ${pathItem} in node ${JSON.stringify(
currentNode,
null,
2
)}, successful path was ${successfulPaths}`
)
}
}
return {
node: stopAtNode || currentNode,
path: pathsExplored,
}
}
type Path = (string | number)[]
export function getNodePathFromSourceRange(
node: Program,
sourceRange: [number, number],
previousPath: Path = []
): Path {
const [start, end] = sourceRange
let path: Path = [...previousPath, 'body']
const _node = { ...node }
// loop over each statement in body getting the index with a for loop
for (
let statementIndex = 0;
statementIndex < _node.body.length;
statementIndex++
) {
const statement = _node.body[statementIndex]
if (statement.start <= start && statement.end >= end) {
path.push(statementIndex)
if (statement.type === 'ExpressionStatement') {
const expression = statement.expression
if (expression.start <= start && expression.end >= end) {
path.push('expression')
if (expression.type === 'CallExpression') {
const callee = expression.callee
if (callee.start <= start && callee.end >= end) {
path.push('callee')
if (callee.type === 'Identifier') {
}
}
}
}
} else if (statement.type === 'VariableDeclaration') {
const declarations = statement.declarations
for (let decIndex = 0; decIndex < declarations.length; decIndex++) {
const declaration = declarations[decIndex]
if (declaration.start <= start && declaration.end >= end) {
path.push('declarations')
path.push(decIndex)
const init = declaration.init
if (init.start <= start && init.end >= end) {
path.push('init')
if (init.type === 'SketchExpression') {
const body = init.body
if (body.start <= start && body.end >= end) {
path.push('body')
if (body.type === 'BlockStatement') {
path = getNodePathFromSourceRange(body, sourceRange, path)
}
}
} else if (init.type === 'PipeExpression') {
const body = init.body
for (let pipeIndex = 0; pipeIndex < body.length; pipeIndex++) {
const pipe = body[pipeIndex]
if (pipe.start <= start && pipe.end >= end) {
path.push('body')
path.push(pipeIndex)
if (pipe.type === 'SketchExpression') {
const body = pipe.body
if (body.start <= start && body.end >= end) {
path.push('body')
if (body.type === 'BlockStatement') {
path = getNodePathFromSourceRange(
body,
sourceRange,
path
)
}
}
}
}
}
} else if (init.type === 'CallExpression') {
const callee = init.callee
if (callee.start <= start && callee.end >= end) {
path.push('callee')
if (callee.type === 'Identifier') {
}
}
}
}
}
}
}
}
}
return path
}
Add the ability to recast comments and some whitespace (#10) * Add the ability to recast comments and some whitespace Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting. I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in. In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so. Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm. Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either. * clean up
2023-01-23 14:50:58 +11:00
export function isNotCodeToken(token: Token): boolean {
return (
token?.type === 'whitespace' ||
token?.type === 'linecomment' ||
token?.type === 'blockcomment'
Add the ability to recast comments and some whitespace (#10) * Add the ability to recast comments and some whitespace Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting. I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in. In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so. Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm. Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either. * clean up
2023-01-23 14:50:58 +11:00
)
}