2023-01-13 17:58:37 +11:00
|
|
|
import { PathToNode } from './executor'
|
2022-11-26 08:34:23 +11:00
|
|
|
import { Token } from './tokeniser'
|
2023-01-21 21:23:01 +11:00
|
|
|
import { parseExpression } from './astMathExpressions'
|
2023-03-02 21:19:11 +11:00
|
|
|
import { Range } from '../useStore'
|
2022-11-13 11:14:30 +11:00
|
|
|
|
|
|
|
type syntaxType =
|
2022-11-26 08:34:23 +11:00
|
|
|
| 'Program'
|
|
|
|
| 'ExpressionStatement'
|
|
|
|
| 'BinaryExpression'
|
|
|
|
| 'CallExpression'
|
|
|
|
| 'Identifier'
|
|
|
|
| 'BlockStatement'
|
|
|
|
| 'ReturnStatement'
|
|
|
|
| 'VariableDeclaration'
|
|
|
|
| 'VariableDeclarator'
|
|
|
|
| 'MemberExpression'
|
|
|
|
| 'ArrayExpression'
|
|
|
|
| 'ObjectExpression'
|
2023-01-01 21:48:30 +11:00
|
|
|
| 'ObjectProperty'
|
2022-11-26 08:34:23 +11:00
|
|
|
| 'FunctionExpression'
|
2022-12-02 21:00:57 +11:00
|
|
|
| 'PipeExpression'
|
|
|
|
| 'PipeSubstitution'
|
2022-11-26 08:34:23 +11:00
|
|
|
| 'Literal'
|
2023-02-01 07:30:55 +11:00
|
|
|
| 'NoneCodeNode'
|
2023-03-02 21:19:11 +11:00
|
|
|
| 'UnaryExpression'
|
2023-01-13 17:58:37 +11:00
|
|
|
// | 'NumberLiteral'
|
|
|
|
// | 'StringLiteral'
|
|
|
|
// | 'IfStatement'
|
|
|
|
// | 'WhileStatement'
|
|
|
|
// | 'FunctionDeclaration'
|
|
|
|
// | 'AssignmentExpression'
|
|
|
|
// | 'Property'
|
|
|
|
// | 'LogicalExpression'
|
|
|
|
// | 'ConditionalExpression'
|
|
|
|
// | 'ForStatement'
|
|
|
|
// | 'ForInStatement'
|
|
|
|
// | 'ForOfStatement'
|
|
|
|
// | 'BreakStatement'
|
|
|
|
// | 'ContinueStatement'
|
|
|
|
// | 'SwitchStatement'
|
|
|
|
// | 'SwitchCase'
|
|
|
|
// | 'ThrowStatement'
|
|
|
|
// | 'TryStatement'
|
|
|
|
// | 'CatchClause'
|
|
|
|
// | 'ClassDeclaration'
|
|
|
|
// | 'ClassBody'
|
|
|
|
// | 'MethodDefinition'
|
|
|
|
// | 'NewExpression'
|
|
|
|
// | 'ThisExpression'
|
|
|
|
// | 'UpdateExpression'
|
|
|
|
// | 'YieldExpression'
|
|
|
|
// | 'AwaitExpression'
|
|
|
|
// | 'ImportDeclaration'
|
|
|
|
// | 'ImportSpecifier'
|
|
|
|
// | 'ImportDefaultSpecifier'
|
|
|
|
// | 'ImportNamespaceSpecifier'
|
|
|
|
// | 'ExportNamedDeclaration'
|
|
|
|
// | 'ExportDefaultDeclaration'
|
|
|
|
// | 'ExportAllDeclaration'
|
|
|
|
// | 'ExportSpecifier'
|
|
|
|
// | 'TaggedTemplateExpression'
|
|
|
|
// | 'TemplateLiteral'
|
|
|
|
// | 'TemplateElement'
|
|
|
|
// | 'SpreadElement'
|
|
|
|
// | 'RestElement'
|
|
|
|
// | 'SequenceExpression'
|
|
|
|
// | 'DebuggerStatement'
|
|
|
|
// | 'LabeledStatement'
|
|
|
|
// | 'DoWhileStatement'
|
|
|
|
// | 'WithStatement'
|
|
|
|
// | 'EmptyStatement'
|
|
|
|
// | 'ArrayPattern'
|
|
|
|
// | 'ObjectPattern'
|
|
|
|
// | 'AssignmentPattern'
|
|
|
|
// | 'MetaProperty'
|
|
|
|
// | 'Super'
|
|
|
|
// | 'Import'
|
|
|
|
// | 'RegExpLiteral'
|
|
|
|
// | 'BooleanLiteral'
|
|
|
|
// | 'NullLiteral'
|
|
|
|
// | 'TypeAnnotation'
|
2022-11-13 11:14:30 +11:00
|
|
|
|
|
|
|
export interface Program {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: syntaxType
|
|
|
|
start: number
|
|
|
|
end: number
|
Add the ability to recast comments and some whitespace (#10)
* Add the ability to recast comments and some whitespace
Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.
I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.
In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.
Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.
Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.
* clean up
2023-01-23 14:50:58 +11:00
|
|
|
body: BodyItem[]
|
2023-02-01 07:30:55 +11:00
|
|
|
nonCodeMeta: NoneCodeMeta
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
interface GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: syntaxType
|
|
|
|
start: number
|
|
|
|
end: number
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2023-02-01 07:30:55 +11:00
|
|
|
interface NoneCodeNode extends GeneralStatement {
|
|
|
|
type: 'NoneCodeNode'
|
|
|
|
value: string
|
|
|
|
}
|
|
|
|
|
|
|
|
interface NoneCodeMeta {
|
|
|
|
// Stores the whitespace/comments that go after the statement who's index we're using here
|
|
|
|
[statementIndex: number]: NoneCodeNode
|
|
|
|
// Which is why we also need `start` for and whitespace at the start of the file/block
|
|
|
|
start?: NoneCodeNode
|
|
|
|
}
|
|
|
|
|
|
|
|
function makeNoneCodeNode(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { node?: NoneCodeNode; lastIndex: number } {
|
|
|
|
const currentToken = tokens[index]
|
|
|
|
const endIndex = findEndOfNonCodeNode(tokens, index)
|
|
|
|
const nonCodeTokens = tokens.slice(index, endIndex)
|
|
|
|
let value = nonCodeTokens.map((t) => t.value).join('')
|
|
|
|
|
|
|
|
const node: NoneCodeNode = {
|
|
|
|
type: 'NoneCodeNode',
|
|
|
|
start: currentToken.start,
|
|
|
|
end: tokens[endIndex - 1].end,
|
|
|
|
value,
|
|
|
|
}
|
|
|
|
return { node, lastIndex: endIndex - 1 }
|
|
|
|
}
|
|
|
|
|
2023-03-03 20:35:48 +11:00
|
|
|
function findEndOfNonCodeNode(tokens: Token[], index: number): number {
|
2023-02-01 07:30:55 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
if (isNotCodeToken(currentToken)) {
|
|
|
|
return findEndOfNonCodeNode(tokens, index + 1)
|
|
|
|
}
|
|
|
|
return index
|
|
|
|
}
|
|
|
|
|
2023-01-06 09:29:26 +11:00
|
|
|
export interface ExpressionStatement extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'ExpressionStatement'
|
|
|
|
expression: Value
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makeExpressionStatement(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
2022-11-17 20:17:00 +11:00
|
|
|
): { expression: ExpressionStatement; lastIndex: number } {
|
2022-11-26 08:34:23 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
const { token: nextToken } = nextMeaningfulToken(tokens, index)
|
|
|
|
if (nextToken.type === 'brace' && nextToken.value === '(') {
|
|
|
|
const { expression, lastIndex } = makeCallExpression(tokens, index)
|
2022-11-14 13:28:16 +11:00
|
|
|
return {
|
2022-11-17 20:17:00 +11:00
|
|
|
expression: {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'ExpressionStatement',
|
2022-11-17 20:17:00 +11:00
|
|
|
start: currentToken.start,
|
|
|
|
end: expression.end,
|
|
|
|
expression,
|
|
|
|
},
|
|
|
|
lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-14 13:28:16 +11:00
|
|
|
}
|
|
|
|
|
2022-11-26 08:34:23 +11:00
|
|
|
const { expression, lastIndex } = makeBinaryExpression(tokens, index)
|
2022-11-13 11:14:30 +11:00
|
|
|
return {
|
2022-11-17 20:17:00 +11:00
|
|
|
expression: {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'ExpressionStatement',
|
2022-11-17 20:17:00 +11:00
|
|
|
start: currentToken.start,
|
|
|
|
end: expression.end,
|
|
|
|
expression,
|
|
|
|
},
|
|
|
|
lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2022-11-26 19:03:09 +11:00
|
|
|
export interface CallExpression extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'CallExpression'
|
|
|
|
callee: Identifier
|
|
|
|
arguments: Value[]
|
|
|
|
optional: boolean
|
2022-11-14 13:28:16 +11:00
|
|
|
}
|
|
|
|
|
2023-03-02 21:19:11 +11:00
|
|
|
export function makeCallExpression(
|
2022-11-14 13:28:16 +11:00
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): {
|
2022-11-26 08:34:23 +11:00
|
|
|
expression: CallExpression
|
|
|
|
lastIndex: number
|
2022-11-14 13:28:16 +11:00
|
|
|
} {
|
2022-11-26 08:34:23 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
const braceToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const callee = makeIdentifier(tokens, index)
|
|
|
|
const args = makeArguments(tokens, braceToken.index)
|
|
|
|
const closingBraceToken = tokens[args.lastIndex]
|
2022-11-14 13:28:16 +11:00
|
|
|
return {
|
|
|
|
expression: {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'CallExpression',
|
2022-11-14 13:28:16 +11:00
|
|
|
start: currentToken.start,
|
|
|
|
end: closingBraceToken.end,
|
|
|
|
callee,
|
|
|
|
arguments: args.arguments,
|
|
|
|
optional: false,
|
|
|
|
},
|
|
|
|
lastIndex: args.lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-14 13:28:16 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makeArguments(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
2022-11-18 08:20:18 +11:00
|
|
|
previousArgs: Value[] = []
|
2022-11-14 13:28:16 +11:00
|
|
|
): {
|
2022-11-26 08:34:23 +11:00
|
|
|
arguments: Value[]
|
|
|
|
lastIndex: number
|
2022-11-14 13:28:16 +11:00
|
|
|
} {
|
2022-11-26 08:34:23 +11:00
|
|
|
const braceOrCommaToken = tokens[index]
|
|
|
|
const argumentToken = nextMeaningfulToken(tokens, index)
|
2022-11-17 20:17:00 +11:00
|
|
|
const shouldFinishRecursion =
|
2022-11-26 08:34:23 +11:00
|
|
|
braceOrCommaToken.type === 'brace' && braceOrCommaToken.value === ')'
|
2022-11-14 13:28:16 +11:00
|
|
|
if (shouldFinishRecursion) {
|
|
|
|
return {
|
|
|
|
arguments: previousArgs,
|
|
|
|
lastIndex: index,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-14 13:28:16 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
const nextBraceOrCommaToken = nextMeaningfulToken(tokens, argumentToken.index)
|
2022-11-17 20:17:00 +11:00
|
|
|
const isIdentifierOrLiteral =
|
2022-11-26 08:34:23 +11:00
|
|
|
nextBraceOrCommaToken.token.type === 'comma' ||
|
|
|
|
nextBraceOrCommaToken.token.type === 'brace'
|
2023-01-01 13:44:48 +11:00
|
|
|
if (
|
|
|
|
argumentToken.token.type === 'brace' &&
|
|
|
|
argumentToken.token.value === '['
|
|
|
|
) {
|
|
|
|
const { expression, lastIndex } = makeArrayExpression(
|
|
|
|
tokens,
|
|
|
|
argumentToken.index
|
|
|
|
)
|
|
|
|
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
lastIndex
|
|
|
|
).index
|
|
|
|
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
|
|
|
|
...previousArgs,
|
|
|
|
expression,
|
|
|
|
])
|
|
|
|
}
|
2023-03-02 21:19:11 +11:00
|
|
|
if (
|
|
|
|
argumentToken.token.type === 'operator' &&
|
|
|
|
argumentToken.token.value === '-'
|
|
|
|
) {
|
|
|
|
const { expression, lastIndex } = makeUnaryExpression(
|
|
|
|
tokens,
|
|
|
|
argumentToken.index
|
|
|
|
)
|
|
|
|
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
lastIndex
|
|
|
|
).index
|
|
|
|
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
|
|
|
|
...previousArgs,
|
|
|
|
expression,
|
|
|
|
])
|
|
|
|
}
|
2023-01-09 08:52:48 +11:00
|
|
|
if (
|
|
|
|
argumentToken.token.type === 'brace' &&
|
|
|
|
argumentToken.token.value === '{'
|
|
|
|
) {
|
|
|
|
const { expression, lastIndex } = makeObjectExpression(
|
|
|
|
tokens,
|
|
|
|
argumentToken.index
|
|
|
|
)
|
|
|
|
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
lastIndex
|
|
|
|
).index
|
|
|
|
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
|
|
|
|
...previousArgs,
|
|
|
|
expression,
|
|
|
|
])
|
|
|
|
}
|
2023-03-02 21:19:11 +11:00
|
|
|
if (
|
|
|
|
(argumentToken.token.type === 'word' ||
|
|
|
|
argumentToken.token.type === 'number' ||
|
|
|
|
argumentToken.token.type === 'string') &&
|
|
|
|
nextBraceOrCommaToken.token.type === 'operator'
|
|
|
|
) {
|
|
|
|
const { expression, lastIndex } = makeBinaryExpression(
|
|
|
|
tokens,
|
|
|
|
argumentToken.index
|
|
|
|
)
|
|
|
|
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
lastIndex
|
|
|
|
).index
|
|
|
|
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
|
|
|
|
...previousArgs,
|
|
|
|
expression,
|
|
|
|
])
|
|
|
|
}
|
2022-11-14 13:28:16 +11:00
|
|
|
if (!isIdentifierOrLiteral) {
|
2023-03-02 21:19:11 +11:00
|
|
|
// I think this if statement might be dead code
|
|
|
|
const { expression, lastIndex } = makeBinaryExpression(
|
|
|
|
tokens,
|
|
|
|
nextBraceOrCommaToken.index
|
|
|
|
)
|
2022-11-26 08:34:23 +11:00
|
|
|
return makeArguments(tokens, lastIndex, [...previousArgs, expression])
|
2022-11-14 13:28:16 +11:00
|
|
|
}
|
2022-12-02 21:00:57 +11:00
|
|
|
if (
|
|
|
|
argumentToken.token.type === 'operator' &&
|
|
|
|
argumentToken.token.value === '%'
|
|
|
|
) {
|
|
|
|
const value: PipeSubstitution = {
|
|
|
|
type: 'PipeSubstitution',
|
|
|
|
start: argumentToken.token.start,
|
|
|
|
end: argumentToken.token.end,
|
|
|
|
}
|
|
|
|
return makeArguments(tokens, nextBraceOrCommaToken.index, [
|
|
|
|
...previousArgs,
|
|
|
|
value,
|
|
|
|
])
|
|
|
|
}
|
2023-01-09 08:52:48 +11:00
|
|
|
|
|
|
|
if (
|
|
|
|
argumentToken.token.type === 'word' &&
|
|
|
|
nextBraceOrCommaToken.token.type === 'brace' &&
|
|
|
|
nextBraceOrCommaToken.token.value === '('
|
|
|
|
) {
|
2023-03-10 14:55:16 +11:00
|
|
|
const closingBrace = findClosingBrace(tokens, nextBraceOrCommaToken.index)
|
|
|
|
const tokenAfterClosingBrace = nextMeaningfulToken(tokens, closingBrace)
|
|
|
|
if (
|
|
|
|
tokenAfterClosingBrace.token.type === 'operator' &&
|
|
|
|
tokenAfterClosingBrace.token.value !== '|>'
|
|
|
|
) {
|
|
|
|
const { expression, lastIndex } = makeBinaryExpression(
|
|
|
|
tokens,
|
|
|
|
argumentToken.index
|
|
|
|
)
|
|
|
|
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
lastIndex
|
|
|
|
).index
|
|
|
|
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
|
|
|
|
...previousArgs,
|
|
|
|
expression,
|
|
|
|
])
|
|
|
|
}
|
2023-01-09 08:52:48 +11:00
|
|
|
const { expression, lastIndex } = makeCallExpression(
|
|
|
|
tokens,
|
|
|
|
argumentToken.index
|
|
|
|
)
|
|
|
|
const nextCommarOrBraceTokenIndex = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
lastIndex
|
|
|
|
).index
|
|
|
|
return makeArguments(tokens, nextCommarOrBraceTokenIndex, [
|
|
|
|
...previousArgs,
|
|
|
|
expression,
|
|
|
|
])
|
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
if (argumentToken.token.type === 'word') {
|
|
|
|
const identifier = makeIdentifier(tokens, argumentToken.index)
|
2022-11-14 13:28:16 +11:00
|
|
|
return makeArguments(tokens, nextBraceOrCommaToken.index, [
|
|
|
|
...previousArgs,
|
|
|
|
identifier,
|
2022-11-26 08:34:23 +11:00
|
|
|
])
|
2022-11-14 13:28:16 +11:00
|
|
|
} else if (
|
2022-11-26 08:34:23 +11:00
|
|
|
argumentToken.token.type === 'number' ||
|
|
|
|
argumentToken.token.type === 'string'
|
2022-11-14 13:28:16 +11:00
|
|
|
) {
|
2022-11-26 08:34:23 +11:00
|
|
|
const literal = makeLiteral(tokens, argumentToken.index)
|
2022-11-17 20:17:00 +11:00
|
|
|
return makeArguments(tokens, nextBraceOrCommaToken.index, [
|
|
|
|
...previousArgs,
|
|
|
|
literal,
|
2022-11-26 08:34:23 +11:00
|
|
|
])
|
|
|
|
} else if (
|
|
|
|
argumentToken.token.type === 'brace' &&
|
|
|
|
argumentToken.token.value === ')'
|
|
|
|
) {
|
2022-11-20 17:43:21 +11:00
|
|
|
return makeArguments(tokens, argumentToken.index, previousArgs)
|
2022-11-14 13:28:16 +11:00
|
|
|
}
|
2022-12-02 21:00:57 +11:00
|
|
|
throw new Error('Expected a previous Argument if statement to match')
|
2022-11-14 13:28:16 +11:00
|
|
|
}
|
|
|
|
|
2023-01-06 09:29:26 +11:00
|
|
|
export interface VariableDeclaration extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'VariableDeclaration'
|
|
|
|
declarations: VariableDeclarator[]
|
2023-02-12 10:56:45 +11:00
|
|
|
kind: 'const' | 'unknown' | 'fn' //| "solid" | "surface" | "face"
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makeVariableDeclaration(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { declaration: VariableDeclaration; lastIndex: number } {
|
2023-02-12 10:56:45 +11:00
|
|
|
// token index should point to a declaration keyword i.e. const, fn
|
2022-11-26 08:34:23 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
const declarationStartToken = nextMeaningfulToken(tokens, index)
|
2022-11-13 11:14:30 +11:00
|
|
|
const { declarations, lastIndex } = makeVariableDeclarators(
|
|
|
|
tokens,
|
|
|
|
declarationStartToken.index
|
2022-11-26 08:34:23 +11:00
|
|
|
)
|
2022-11-13 11:14:30 +11:00
|
|
|
return {
|
|
|
|
declaration: {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'VariableDeclaration',
|
2022-11-13 11:14:30 +11:00
|
|
|
start: currentToken.start,
|
|
|
|
end: declarations[declarations.length - 1].end,
|
2022-11-17 20:17:00 +11:00
|
|
|
kind:
|
2022-11-26 08:34:23 +11:00
|
|
|
currentToken.value === 'const'
|
|
|
|
? 'const'
|
|
|
|
: currentToken.value === 'fn'
|
|
|
|
? 'fn'
|
|
|
|
: 'unknown',
|
2022-11-13 11:14:30 +11:00
|
|
|
declarations,
|
|
|
|
},
|
|
|
|
lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2022-11-26 19:03:09 +11:00
|
|
|
export type Value =
|
2022-11-18 08:20:18 +11:00
|
|
|
| Literal
|
|
|
|
| Identifier
|
|
|
|
| BinaryExpression
|
|
|
|
| FunctionExpression
|
2022-11-20 17:43:21 +11:00
|
|
|
| CallExpression
|
2022-12-02 21:00:57 +11:00
|
|
|
| PipeExpression
|
|
|
|
| PipeSubstitution
|
2022-12-30 21:53:50 +11:00
|
|
|
| ArrayExpression
|
2023-01-01 21:48:30 +11:00
|
|
|
| ObjectExpression
|
2023-01-03 19:41:27 +11:00
|
|
|
| MemberExpression
|
2023-03-02 21:19:11 +11:00
|
|
|
| UnaryExpression
|
2022-11-18 08:20:18 +11:00
|
|
|
|
|
|
|
function makeValue(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { value: Value; lastIndex: number } {
|
2022-11-26 08:34:23 +11:00
|
|
|
const currentToken = tokens[index]
|
2023-03-02 21:19:11 +11:00
|
|
|
const { token: nextToken, index: nextTokenIndex } = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
index
|
|
|
|
)
|
2023-01-02 12:18:54 +11:00
|
|
|
if (nextToken?.type === 'brace' && nextToken.value === '(') {
|
2023-03-02 21:19:11 +11:00
|
|
|
const endIndex = findClosingBrace(tokens, nextTokenIndex)
|
|
|
|
const tokenAfterCallExpression = nextMeaningfulToken(tokens, endIndex)
|
|
|
|
if (
|
|
|
|
tokenAfterCallExpression?.token?.type === 'operator' &&
|
|
|
|
tokenAfterCallExpression.token.value !== '|>'
|
|
|
|
) {
|
|
|
|
const { expression, lastIndex } = makeBinaryExpression(tokens, index)
|
|
|
|
return {
|
|
|
|
value: expression,
|
|
|
|
lastIndex,
|
|
|
|
}
|
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
const { expression, lastIndex } = makeCallExpression(tokens, index)
|
2022-11-18 08:20:18 +11:00
|
|
|
return {
|
|
|
|
value: expression,
|
|
|
|
lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
2022-12-03 22:50:46 +11:00
|
|
|
if (
|
2023-01-02 12:18:54 +11:00
|
|
|
(currentToken.type === 'word' ||
|
|
|
|
currentToken.type === 'number' ||
|
|
|
|
currentToken.type === 'string') &&
|
|
|
|
nextToken?.type === 'operator'
|
2022-12-03 22:50:46 +11:00
|
|
|
) {
|
2022-11-26 08:34:23 +11:00
|
|
|
const { expression, lastIndex } = makeBinaryExpression(tokens, index)
|
2022-11-18 08:20:18 +11:00
|
|
|
return {
|
|
|
|
value: expression,
|
|
|
|
lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
2023-01-02 12:18:54 +11:00
|
|
|
if (currentToken.type === 'brace' && currentToken.value === '{') {
|
|
|
|
const objExp = makeObjectExpression(tokens, index)
|
|
|
|
return {
|
|
|
|
value: objExp.expression,
|
|
|
|
lastIndex: objExp.lastIndex,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (currentToken.type === 'brace' && currentToken.value === '[') {
|
|
|
|
const arrExp = makeArrayExpression(tokens, index)
|
|
|
|
return {
|
|
|
|
value: arrExp.expression,
|
|
|
|
lastIndex: arrExp.lastIndex,
|
|
|
|
}
|
|
|
|
}
|
2023-01-03 19:41:27 +11:00
|
|
|
if (
|
|
|
|
currentToken.type === 'word' &&
|
|
|
|
(nextToken.type === 'period' ||
|
|
|
|
(nextToken.type === 'brace' && nextToken.value === '['))
|
|
|
|
) {
|
|
|
|
const memberExpression = makeMemberExpression(tokens, index)
|
|
|
|
return {
|
|
|
|
value: memberExpression.expression,
|
|
|
|
lastIndex: memberExpression.lastIndex,
|
|
|
|
}
|
2023-01-02 12:18:54 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
if (currentToken.type === 'word') {
|
|
|
|
const identifier = makeIdentifier(tokens, index)
|
2022-11-18 08:20:18 +11:00
|
|
|
return {
|
|
|
|
value: identifier,
|
|
|
|
lastIndex: index,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
if (currentToken.type === 'number' || currentToken.type === 'string') {
|
|
|
|
const literal = makeLiteral(tokens, index)
|
2022-11-18 08:20:18 +11:00
|
|
|
return {
|
|
|
|
value: literal,
|
|
|
|
lastIndex: index,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
2023-01-02 12:18:54 +11:00
|
|
|
if (currentToken.type === 'brace' && currentToken.value === '(') {
|
|
|
|
const closingBraceIndex = findClosingBrace(tokens, index)
|
|
|
|
const arrowToken = nextMeaningfulToken(tokens, closingBraceIndex)
|
|
|
|
if (
|
|
|
|
arrowToken.token.type === 'operator' &&
|
|
|
|
arrowToken.token.value === '=>'
|
|
|
|
) {
|
|
|
|
const { expression, lastIndex: arrowFunctionLastIndex } =
|
|
|
|
makeFunctionExpression(tokens, index)
|
|
|
|
return {
|
|
|
|
value: expression,
|
|
|
|
lastIndex: arrowFunctionLastIndex,
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
throw new Error('TODO - handle expression with braces')
|
|
|
|
}
|
|
|
|
}
|
2023-03-02 21:19:11 +11:00
|
|
|
if (currentToken.type === 'operator' && currentToken.value === '-') {
|
|
|
|
const { expression, lastIndex } = makeUnaryExpression(tokens, index)
|
|
|
|
return { value: expression, lastIndex }
|
|
|
|
}
|
2022-12-02 21:00:57 +11:00
|
|
|
throw new Error('Expected a previous Value if statement to match')
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
|
|
|
|
2023-01-04 01:28:26 +11:00
|
|
|
export interface VariableDeclarator extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'VariableDeclarator'
|
|
|
|
id: Identifier
|
|
|
|
init: Value
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makeVariableDeclarators(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
previousDeclarators: VariableDeclarator[] = []
|
|
|
|
): {
|
2022-11-26 08:34:23 +11:00
|
|
|
declarations: VariableDeclarator[]
|
|
|
|
lastIndex: number
|
2022-11-13 11:14:30 +11:00
|
|
|
} {
|
2022-11-26 08:34:23 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
const assignmentToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const declarationToken = previousMeaningfulToken(tokens, index)
|
|
|
|
const contentsStartToken = nextMeaningfulToken(tokens, assignmentToken.index)
|
2022-12-04 15:50:52 +11:00
|
|
|
const pipeStartIndex =
|
|
|
|
assignmentToken?.token?.type === 'operator'
|
|
|
|
? contentsStartToken.index
|
|
|
|
: assignmentToken.index
|
2022-12-03 22:50:46 +11:00
|
|
|
const nextPipeOperator = hasPipeOperator(tokens, pipeStartIndex)
|
2022-11-26 08:34:23 +11:00
|
|
|
let init: Value
|
|
|
|
let lastIndex = contentsStartToken.index
|
2022-12-02 21:00:57 +11:00
|
|
|
if (nextPipeOperator) {
|
|
|
|
const { expression, lastIndex: pipeLastIndex } = makePipeExpression(
|
|
|
|
tokens,
|
|
|
|
assignmentToken.index
|
|
|
|
)
|
|
|
|
init = expression
|
|
|
|
lastIndex = pipeLastIndex
|
2023-01-02 12:18:54 +11:00
|
|
|
} else {
|
|
|
|
const { value, lastIndex: valueLastIndex } = makeValue(
|
2022-12-30 21:53:50 +11:00
|
|
|
tokens,
|
|
|
|
contentsStartToken.index
|
|
|
|
)
|
2023-01-02 12:18:54 +11:00
|
|
|
init = value
|
|
|
|
lastIndex = valueLastIndex
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
const currentDeclarator: VariableDeclarator = {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'VariableDeclarator',
|
2022-11-13 11:14:30 +11:00
|
|
|
start: currentToken.start,
|
|
|
|
end: tokens[lastIndex].end,
|
|
|
|
id: makeIdentifier(tokens, index),
|
|
|
|
init,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
return {
|
|
|
|
declarations: [...previousDeclarators, currentDeclarator],
|
|
|
|
lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2023-03-02 21:19:11 +11:00
|
|
|
export type BinaryPart =
|
|
|
|
| Literal
|
|
|
|
| Identifier
|
|
|
|
| BinaryExpression
|
|
|
|
| CallExpression
|
|
|
|
| UnaryExpression
|
2022-11-13 11:14:30 +11:00
|
|
|
// | MemberExpression
|
|
|
|
// | ArrayExpression
|
|
|
|
// | ObjectExpression
|
|
|
|
// | LogicalExpression
|
|
|
|
// | ConditionalExpression
|
|
|
|
|
2022-11-26 19:03:09 +11:00
|
|
|
export interface Literal extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'Literal'
|
|
|
|
value: string | number | boolean | null
|
|
|
|
raw: string
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2023-01-08 16:37:31 +11:00
|
|
|
export interface Identifier extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'Identifier'
|
|
|
|
name: string
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makeIdentifier(token: Token[], index: number): Identifier {
|
2022-11-26 08:34:23 +11:00
|
|
|
const currentToken = token[index]
|
2022-11-13 11:14:30 +11:00
|
|
|
return {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'Identifier',
|
2022-11-13 11:14:30 +11:00
|
|
|
start: currentToken.start,
|
|
|
|
end: currentToken.end,
|
|
|
|
name: currentToken.value,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2023-02-12 10:56:45 +11:00
|
|
|
export interface PipeSubstitution extends GeneralStatement {
|
2022-12-02 21:00:57 +11:00
|
|
|
type: 'PipeSubstitution'
|
|
|
|
}
|
|
|
|
|
2022-11-13 11:14:30 +11:00
|
|
|
function makeLiteral(tokens: Token[], index: number): Literal {
|
2022-11-26 08:34:23 +11:00
|
|
|
const token = tokens[index]
|
2022-11-14 13:28:16 +11:00
|
|
|
const value =
|
2022-11-26 08:34:23 +11:00
|
|
|
token.type === 'number' ? Number(token.value) : token.value.slice(1, -1)
|
2022-11-13 11:14:30 +11:00
|
|
|
return {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'Literal',
|
2022-11-13 11:14:30 +11:00
|
|
|
start: token.start,
|
|
|
|
end: token.end,
|
|
|
|
value,
|
|
|
|
raw: token.value,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2022-12-30 21:53:50 +11:00
|
|
|
export interface ArrayExpression extends GeneralStatement {
|
|
|
|
type: 'ArrayExpression'
|
|
|
|
elements: Value[]
|
|
|
|
}
|
|
|
|
|
|
|
|
function makeArrayElements(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
previousElements: Value[] = []
|
|
|
|
): { elements: ArrayExpression['elements']; lastIndex: number } {
|
|
|
|
// should be called with the first token after the opening brace
|
|
|
|
const firstElementToken = tokens[index]
|
|
|
|
if (firstElementToken.type === 'brace' && firstElementToken.value === ']') {
|
|
|
|
return {
|
|
|
|
elements: previousElements,
|
|
|
|
lastIndex: index,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const currentElement = makeValue(tokens, index)
|
|
|
|
const nextToken = nextMeaningfulToken(tokens, currentElement.lastIndex)
|
|
|
|
const isClosingBrace =
|
|
|
|
nextToken.token.type === 'brace' && nextToken.token.value === ']'
|
|
|
|
const isComma = nextToken.token.type === 'comma'
|
|
|
|
if (!isClosingBrace && !isComma) {
|
|
|
|
throw new Error('Expected a comma or closing brace')
|
|
|
|
}
|
|
|
|
const nextCallIndex = isClosingBrace
|
|
|
|
? nextToken.index
|
|
|
|
: nextMeaningfulToken(tokens, nextToken.index).index
|
|
|
|
return makeArrayElements(tokens, nextCallIndex, [
|
|
|
|
...previousElements,
|
|
|
|
currentElement.value,
|
|
|
|
])
|
|
|
|
}
|
|
|
|
|
|
|
|
function makeArrayExpression(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): {
|
|
|
|
expression: ArrayExpression
|
|
|
|
lastIndex: number
|
|
|
|
} {
|
|
|
|
// should be called array opening brace '[' index
|
|
|
|
const openingBraceToken = tokens[index]
|
|
|
|
const firstElementToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const { elements, lastIndex } = makeArrayElements(
|
|
|
|
tokens,
|
|
|
|
firstElementToken.index
|
|
|
|
)
|
|
|
|
return {
|
|
|
|
expression: {
|
|
|
|
type: 'ArrayExpression',
|
|
|
|
start: openingBraceToken.start,
|
|
|
|
end: tokens[lastIndex].end,
|
|
|
|
elements,
|
|
|
|
},
|
|
|
|
lastIndex,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-01 21:48:30 +11:00
|
|
|
export interface ObjectExpression extends GeneralStatement {
|
|
|
|
type: 'ObjectExpression'
|
|
|
|
properties: ObjectProperty[]
|
|
|
|
}
|
|
|
|
|
|
|
|
interface ObjectProperty extends GeneralStatement {
|
|
|
|
type: 'ObjectProperty'
|
|
|
|
key: Identifier
|
|
|
|
value: Value
|
|
|
|
}
|
|
|
|
|
|
|
|
function makeObjectExpression(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): {
|
|
|
|
expression: ObjectExpression
|
|
|
|
lastIndex: number
|
|
|
|
} {
|
|
|
|
// should be called with the opening brace '{' index
|
|
|
|
const openingBraceToken = tokens[index]
|
|
|
|
const firstPropertyToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const { properties, lastIndex } = makeObjectProperties(
|
|
|
|
tokens,
|
|
|
|
firstPropertyToken.index
|
|
|
|
)
|
|
|
|
return {
|
|
|
|
expression: {
|
|
|
|
type: 'ObjectExpression',
|
|
|
|
start: openingBraceToken.start,
|
|
|
|
end: tokens[lastIndex].end,
|
|
|
|
properties,
|
|
|
|
},
|
|
|
|
lastIndex,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function makeObjectProperties(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
previousProperties: ObjectProperty[] = []
|
|
|
|
): { properties: ObjectProperty[]; lastIndex: number } {
|
|
|
|
// should be called with the key after the opening brace '{'
|
|
|
|
const propertyKeyToken = tokens[index]
|
|
|
|
if (propertyKeyToken.type === 'brace' && propertyKeyToken.value === '}') {
|
|
|
|
return {
|
|
|
|
properties: previousProperties,
|
|
|
|
lastIndex: index,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const colonToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const valueStartToken = nextMeaningfulToken(tokens, colonToken.index)
|
2023-01-02 12:18:54 +11:00
|
|
|
|
|
|
|
const val = makeValue(tokens, valueStartToken.index)
|
|
|
|
|
|
|
|
const value = val.value
|
|
|
|
const valueLastIndex = val.lastIndex
|
|
|
|
const commaOrClosingBraceToken = nextMeaningfulToken(tokens, valueLastIndex)
|
2023-01-01 21:48:30 +11:00
|
|
|
let objectProperty: ObjectProperty = {
|
|
|
|
type: 'ObjectProperty',
|
|
|
|
start: propertyKeyToken.start,
|
2023-01-02 12:18:54 +11:00
|
|
|
end: value.end,
|
2023-01-01 21:48:30 +11:00
|
|
|
key: makeIdentifier(tokens, index),
|
2023-01-02 12:18:54 +11:00
|
|
|
value,
|
2023-01-01 21:48:30 +11:00
|
|
|
}
|
|
|
|
const nextKeyToken = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
commaOrClosingBraceToken.index
|
|
|
|
)
|
|
|
|
const nextKeyIndex =
|
|
|
|
commaOrClosingBraceToken.token.type === 'brace' &&
|
|
|
|
commaOrClosingBraceToken.token.value === '}'
|
|
|
|
? commaOrClosingBraceToken.index
|
|
|
|
: nextKeyToken.index
|
|
|
|
return makeObjectProperties(tokens, nextKeyIndex, [
|
|
|
|
...previousProperties,
|
|
|
|
objectProperty,
|
|
|
|
])
|
|
|
|
}
|
|
|
|
|
2023-01-03 19:41:27 +11:00
|
|
|
export interface MemberExpression extends GeneralStatement {
|
|
|
|
type: 'MemberExpression'
|
|
|
|
object: MemberExpression | Identifier
|
|
|
|
property: Identifier | Literal
|
|
|
|
computed: boolean
|
|
|
|
}
|
|
|
|
|
|
|
|
function makeMemberExpression(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { expression: MemberExpression; lastIndex: number } {
|
|
|
|
const currentToken = tokens[index]
|
|
|
|
const keysInfo = collectObjectKeys(tokens, index)
|
|
|
|
const lastKey = keysInfo[keysInfo.length - 1]
|
|
|
|
const firstKey = keysInfo.shift()
|
|
|
|
if (!firstKey) throw new Error('Expected a key')
|
|
|
|
const root = makeIdentifier(tokens, index)
|
|
|
|
let memberExpression: MemberExpression = {
|
|
|
|
type: 'MemberExpression',
|
|
|
|
start: currentToken.start,
|
|
|
|
end: tokens[firstKey.index].end,
|
|
|
|
object: root,
|
|
|
|
property: firstKey.key,
|
|
|
|
computed: firstKey.computed,
|
|
|
|
}
|
|
|
|
keysInfo.forEach(({ key, computed, index }, i) => {
|
|
|
|
const endToken = tokens[index]
|
|
|
|
memberExpression = {
|
|
|
|
type: 'MemberExpression',
|
|
|
|
start: currentToken.start,
|
|
|
|
end: endToken.end,
|
|
|
|
object: memberExpression,
|
|
|
|
property: key,
|
|
|
|
computed,
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
return {
|
|
|
|
expression: memberExpression,
|
|
|
|
lastIndex: lastKey.index,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
interface ObjectKeyInfo {
|
|
|
|
key: Identifier | Literal
|
|
|
|
index: number
|
|
|
|
computed: boolean
|
|
|
|
}
|
|
|
|
|
|
|
|
function collectObjectKeys(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
previousKeys: ObjectKeyInfo[] = []
|
|
|
|
): ObjectKeyInfo[] {
|
|
|
|
const nextToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const periodOrOpeningBracketToken =
|
|
|
|
nextToken?.token?.type === 'brace' && nextToken.token.value === ']'
|
|
|
|
? nextMeaningfulToken(tokens, nextToken.index)
|
|
|
|
: nextToken
|
|
|
|
if (
|
|
|
|
periodOrOpeningBracketToken?.token?.type !== 'period' &&
|
|
|
|
periodOrOpeningBracketToken?.token?.type !== 'brace'
|
|
|
|
) {
|
|
|
|
return previousKeys
|
|
|
|
}
|
|
|
|
const keyToken = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
periodOrOpeningBracketToken.index
|
|
|
|
)
|
|
|
|
const nextPeriodOrOpeningBracketToken = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
keyToken.index
|
|
|
|
)
|
|
|
|
const isBraced =
|
|
|
|
nextPeriodOrOpeningBracketToken?.token?.type === 'brace' &&
|
|
|
|
nextPeriodOrOpeningBracketToken?.token?.value === ']'
|
|
|
|
const endIndex = isBraced
|
|
|
|
? nextPeriodOrOpeningBracketToken.index
|
|
|
|
: keyToken.index
|
|
|
|
const key =
|
|
|
|
keyToken.token.type === 'word'
|
|
|
|
? makeIdentifier(tokens, keyToken.index)
|
|
|
|
: makeLiteral(tokens, keyToken.index)
|
|
|
|
const computed = isBraced && keyToken.token.type === 'word' ? true : false
|
|
|
|
return collectObjectKeys(tokens, keyToken.index, [
|
|
|
|
...previousKeys,
|
|
|
|
{
|
|
|
|
key,
|
|
|
|
index: endIndex,
|
|
|
|
computed,
|
|
|
|
},
|
|
|
|
])
|
|
|
|
}
|
|
|
|
|
2022-11-20 09:41:21 +11:00
|
|
|
export interface BinaryExpression extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'BinaryExpression'
|
|
|
|
operator: string
|
|
|
|
left: BinaryPart
|
|
|
|
right: BinaryPart
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2023-01-21 21:23:01 +11:00
|
|
|
export function findEndOfBinaryExpression(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): number {
|
|
|
|
const currentToken = tokens[index]
|
|
|
|
if (currentToken.type === 'brace' && currentToken.value === '(') {
|
|
|
|
const closingParenthesis = findClosingBrace(tokens, index)
|
|
|
|
const maybeAnotherOperator = nextMeaningfulToken(tokens, closingParenthesis)
|
|
|
|
if (
|
|
|
|
maybeAnotherOperator?.token?.type !== 'operator' ||
|
|
|
|
maybeAnotherOperator?.token?.value === '|>'
|
|
|
|
) {
|
|
|
|
return closingParenthesis
|
|
|
|
}
|
|
|
|
const nextRight = nextMeaningfulToken(tokens, maybeAnotherOperator.index)
|
|
|
|
return findEndOfBinaryExpression(tokens, nextRight.index)
|
|
|
|
}
|
2023-03-02 21:19:11 +11:00
|
|
|
if (
|
|
|
|
currentToken.type === 'word' &&
|
|
|
|
tokens?.[index + 1]?.type === 'brace' &&
|
|
|
|
tokens[index + 1].value === '('
|
|
|
|
) {
|
|
|
|
const closingParenthesis = findClosingBrace(tokens, index + 1)
|
|
|
|
const maybeAnotherOperator = nextMeaningfulToken(tokens, closingParenthesis)
|
|
|
|
if (
|
|
|
|
maybeAnotherOperator?.token?.type !== 'operator' ||
|
|
|
|
maybeAnotherOperator?.token?.value === '|>'
|
|
|
|
) {
|
|
|
|
return closingParenthesis
|
|
|
|
}
|
|
|
|
const nextRight = nextMeaningfulToken(tokens, maybeAnotherOperator.index)
|
|
|
|
return findEndOfBinaryExpression(tokens, nextRight.index)
|
|
|
|
}
|
2023-01-21 21:23:01 +11:00
|
|
|
const maybeOperator = nextMeaningfulToken(tokens, index)
|
|
|
|
if (
|
|
|
|
maybeOperator?.token?.type !== 'operator' ||
|
|
|
|
maybeOperator?.token?.value === '|>'
|
|
|
|
) {
|
|
|
|
return index
|
|
|
|
}
|
|
|
|
const nextRight = nextMeaningfulToken(tokens, maybeOperator.index)
|
|
|
|
return findEndOfBinaryExpression(tokens, nextRight.index)
|
|
|
|
}
|
|
|
|
|
2022-11-18 08:20:18 +11:00
|
|
|
function makeBinaryExpression(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { expression: BinaryExpression; lastIndex: number } {
|
2023-01-21 21:23:01 +11:00
|
|
|
const endIndex = findEndOfBinaryExpression(tokens, index)
|
|
|
|
const expression = parseExpression(tokens.slice(index, endIndex + 1))
|
2022-11-13 11:14:30 +11:00
|
|
|
return {
|
2023-01-21 21:23:01 +11:00
|
|
|
expression,
|
|
|
|
lastIndex: endIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2023-03-02 21:19:11 +11:00
|
|
|
export interface UnaryExpression extends GeneralStatement {
|
|
|
|
type: 'UnaryExpression'
|
|
|
|
operator: '-' | '!'
|
|
|
|
argument: BinaryPart
|
|
|
|
}
|
|
|
|
|
|
|
|
function makeUnaryExpression(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { expression: UnaryExpression; lastIndex: number } {
|
|
|
|
const currentToken = tokens[index]
|
|
|
|
const nextToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const { value: argument, lastIndex: argumentLastIndex } = makeValue(
|
|
|
|
tokens,
|
|
|
|
nextToken.index
|
|
|
|
)
|
|
|
|
return {
|
|
|
|
expression: {
|
|
|
|
type: 'UnaryExpression',
|
|
|
|
operator: currentToken.value === '!' ? '!' : '-',
|
|
|
|
start: currentToken.start,
|
|
|
|
end: tokens[argumentLastIndex].end,
|
|
|
|
argument: argument as BinaryPart,
|
|
|
|
},
|
|
|
|
lastIndex: argumentLastIndex,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-02 21:00:57 +11:00
|
|
|
export interface PipeExpression extends GeneralStatement {
|
|
|
|
type: 'PipeExpression'
|
|
|
|
body: Value[]
|
2023-02-01 07:30:55 +11:00
|
|
|
nonCodeMeta: NoneCodeMeta
|
2022-12-02 21:00:57 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makePipeExpression(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { expression: PipeExpression; lastIndex: number } {
|
|
|
|
const currentToken = tokens[index]
|
2023-02-01 07:30:55 +11:00
|
|
|
const {
|
|
|
|
body,
|
|
|
|
lastIndex: bodyLastIndex,
|
|
|
|
nonCodeMeta,
|
|
|
|
} = makePipeBody(tokens, index)
|
2022-12-02 21:00:57 +11:00
|
|
|
const endToken = tokens[bodyLastIndex]
|
|
|
|
return {
|
|
|
|
expression: {
|
|
|
|
type: 'PipeExpression',
|
|
|
|
start: currentToken.start,
|
|
|
|
end: endToken.end,
|
|
|
|
body,
|
2023-02-01 07:30:55 +11:00
|
|
|
nonCodeMeta,
|
2022-12-02 21:00:57 +11:00
|
|
|
},
|
|
|
|
lastIndex: bodyLastIndex,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function makePipeBody(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
2023-02-01 07:30:55 +11:00
|
|
|
previousValues: Value[] = [],
|
|
|
|
previousNonCodeMeta: NoneCodeMeta = {}
|
|
|
|
): { body: Value[]; lastIndex: number; nonCodeMeta: NoneCodeMeta } {
|
|
|
|
const nonCodeMeta = { ...previousNonCodeMeta }
|
2022-12-02 21:00:57 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
const expressionStart = nextMeaningfulToken(tokens, index)
|
|
|
|
let value: Value
|
|
|
|
let lastIndex: number
|
|
|
|
if (currentToken.type === 'operator') {
|
2022-12-30 21:53:50 +11:00
|
|
|
const val = makeValue(tokens, expressionStart.index)
|
|
|
|
value = val.value
|
|
|
|
lastIndex = val.lastIndex
|
2022-12-02 21:00:57 +11:00
|
|
|
} else {
|
|
|
|
throw new Error('Expected a previous PipeValue if statement to match')
|
|
|
|
}
|
|
|
|
|
|
|
|
const nextPipeToken = hasPipeOperator(tokens, index)
|
|
|
|
if (!nextPipeToken) {
|
|
|
|
return {
|
|
|
|
body: [...previousValues, value],
|
|
|
|
lastIndex,
|
2023-02-01 07:30:55 +11:00
|
|
|
nonCodeMeta,
|
2022-12-02 21:00:57 +11:00
|
|
|
}
|
|
|
|
}
|
2023-02-01 07:30:55 +11:00
|
|
|
if (nextPipeToken.bonusNonCodeNode) {
|
|
|
|
nonCodeMeta[previousValues.length] = nextPipeToken.bonusNonCodeNode
|
|
|
|
}
|
|
|
|
return makePipeBody(
|
|
|
|
tokens,
|
|
|
|
nextPipeToken.index,
|
|
|
|
[...previousValues, value],
|
|
|
|
nonCodeMeta
|
|
|
|
)
|
2022-12-02 21:00:57 +11:00
|
|
|
}
|
|
|
|
|
2022-11-26 19:03:09 +11:00
|
|
|
export interface FunctionExpression extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'FunctionExpression'
|
|
|
|
id: Identifier | null
|
|
|
|
params: Identifier[]
|
|
|
|
body: BlockStatement
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makeFunctionExpression(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { expression: FunctionExpression; lastIndex: number } {
|
2022-11-26 08:34:23 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
const closingBraceIndex = findClosingBrace(tokens, index)
|
|
|
|
const arrowToken = nextMeaningfulToken(tokens, closingBraceIndex)
|
|
|
|
const bodyStartToken = nextMeaningfulToken(tokens, arrowToken.index)
|
|
|
|
const { params } = makeParams(tokens, index)
|
2022-11-17 20:17:00 +11:00
|
|
|
const { block, lastIndex: bodyLastIndex } = makeBlockStatement(
|
|
|
|
tokens,
|
|
|
|
bodyStartToken.index
|
2022-11-26 08:34:23 +11:00
|
|
|
)
|
2022-11-17 20:17:00 +11:00
|
|
|
return {
|
|
|
|
expression: {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'FunctionExpression',
|
2022-11-17 20:17:00 +11:00
|
|
|
start: currentToken.start,
|
|
|
|
end: tokens[bodyLastIndex].end,
|
|
|
|
id: null,
|
|
|
|
params,
|
|
|
|
body: block,
|
|
|
|
},
|
|
|
|
lastIndex: bodyLastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
|
|
|
|
2022-11-18 08:20:18 +11:00
|
|
|
function makeParams(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
previousParams: Identifier[] = []
|
|
|
|
): { params: Identifier[]; lastIndex: number } {
|
2022-11-26 08:34:23 +11:00
|
|
|
const braceOrCommaToken = tokens[index]
|
|
|
|
const argumentToken = nextMeaningfulToken(tokens, index)
|
2022-11-18 08:20:18 +11:00
|
|
|
const shouldFinishRecursion =
|
2022-11-26 08:34:23 +11:00
|
|
|
(argumentToken.token.type === 'brace' &&
|
|
|
|
argumentToken.token.value === ')') ||
|
|
|
|
(braceOrCommaToken.type === 'brace' && braceOrCommaToken.value === ')')
|
2022-11-18 08:20:18 +11:00
|
|
|
if (shouldFinishRecursion) {
|
2022-11-26 08:34:23 +11:00
|
|
|
return { params: previousParams, lastIndex: index }
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
const nextBraceOrCommaToken = nextMeaningfulToken(tokens, argumentToken.index)
|
|
|
|
const identifier = makeIdentifier(tokens, argumentToken.index)
|
2022-11-18 08:20:18 +11:00
|
|
|
return makeParams(tokens, nextBraceOrCommaToken.index, [
|
|
|
|
...previousParams,
|
|
|
|
identifier,
|
2022-11-26 08:34:23 +11:00
|
|
|
])
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
|
|
|
|
2023-01-06 09:29:26 +11:00
|
|
|
export interface BlockStatement extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'BlockStatement'
|
Add the ability to recast comments and some whitespace (#10)
* Add the ability to recast comments and some whitespace
Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.
I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.
In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.
Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.
Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.
* clean up
2023-01-23 14:50:58 +11:00
|
|
|
body: BodyItem[]
|
2023-02-01 07:30:55 +11:00
|
|
|
nonCodeMeta: NoneCodeMeta
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makeBlockStatement(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { block: BlockStatement; lastIndex: number } {
|
2022-11-26 08:34:23 +11:00
|
|
|
const openingCurly = tokens[index]
|
2023-02-01 07:30:55 +11:00
|
|
|
const nextToken = { token: tokens[index + 1], index: index + 1 }
|
|
|
|
const { body, lastIndex, nonCodeMeta } =
|
2022-11-26 08:34:23 +11:00
|
|
|
nextToken.token.value === '}'
|
2023-02-01 07:30:55 +11:00
|
|
|
? { body: [], lastIndex: nextToken.index, nonCodeMeta: {} }
|
2022-11-26 08:34:23 +11:00
|
|
|
: makeBody({ tokens, tokenIndex: nextToken.index })
|
2022-11-17 20:17:00 +11:00
|
|
|
return {
|
|
|
|
block: {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'BlockStatement',
|
2022-11-17 20:17:00 +11:00
|
|
|
start: openingCurly.start,
|
2022-11-28 09:37:46 +11:00
|
|
|
end: tokens[lastIndex]?.end || 0,
|
2022-11-17 20:17:00 +11:00
|
|
|
body,
|
2023-02-01 07:30:55 +11:00
|
|
|
nonCodeMeta,
|
2022-11-17 20:17:00 +11:00
|
|
|
},
|
|
|
|
lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
|
|
|
|
2022-11-18 08:20:18 +11:00
|
|
|
interface ReturnStatement extends GeneralStatement {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'ReturnStatement'
|
|
|
|
argument: Value
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
function makeReturnStatement(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { statement: ReturnStatement; lastIndex: number } {
|
2022-11-26 08:34:23 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
const nextToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const { value, lastIndex } = makeValue(tokens, nextToken.index)
|
2022-11-18 08:20:18 +11:00
|
|
|
return {
|
|
|
|
statement: {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'ReturnStatement',
|
2022-11-18 08:20:18 +11:00
|
|
|
start: currentToken.start,
|
|
|
|
end: tokens[lastIndex].end,
|
|
|
|
argument: value,
|
|
|
|
},
|
|
|
|
lastIndex,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
|
|
|
|
2022-11-26 08:34:23 +11:00
|
|
|
export type All = Program | ExpressionStatement[] | BinaryExpression | Literal
|
2022-11-13 11:14:30 +11:00
|
|
|
|
2023-03-03 20:35:48 +11:00
|
|
|
function nextMeaningfulToken(
|
2022-11-13 11:14:30 +11:00
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
offset: number = 1
|
2023-02-01 07:30:55 +11:00
|
|
|
): { token: Token; index: number; bonusNonCodeNode?: NoneCodeNode } {
|
2022-11-26 08:34:23 +11:00
|
|
|
const newIndex = index + offset
|
|
|
|
const token = tokens[newIndex]
|
2022-11-13 11:14:30 +11:00
|
|
|
if (!token) {
|
2022-11-26 08:34:23 +11:00
|
|
|
return { token, index: tokens.length }
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
Add the ability to recast comments and some whitespace (#10)
* Add the ability to recast comments and some whitespace
Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.
I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.
In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.
Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.
Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.
* clean up
2023-01-23 14:50:58 +11:00
|
|
|
if (isNotCodeToken(token)) {
|
2023-02-01 07:30:55 +11:00
|
|
|
const nonCodeNode = makeNoneCodeNode(tokens, newIndex)
|
|
|
|
const newnewIndex = nonCodeNode.lastIndex + 1
|
|
|
|
return {
|
|
|
|
token: tokens[newnewIndex],
|
|
|
|
index: newnewIndex,
|
|
|
|
bonusNonCodeNode: nonCodeNode?.node?.value ? nonCodeNode.node : undefined,
|
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
return { token, index: newIndex }
|
2022-11-13 11:14:30 +11:00
|
|
|
}
|
|
|
|
|
2022-11-20 17:43:21 +11:00
|
|
|
function previousMeaningfulToken(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
offset: number = 1
|
|
|
|
): { token: Token; index: number } {
|
2022-11-26 08:34:23 +11:00
|
|
|
const newIndex = index - offset
|
|
|
|
const token = tokens[newIndex]
|
2022-11-20 17:43:21 +11:00
|
|
|
if (!token) {
|
2022-11-26 08:34:23 +11:00
|
|
|
return { token, index: 0 }
|
2022-11-20 17:43:21 +11:00
|
|
|
}
|
Add the ability to recast comments and some whitespace (#10)
* Add the ability to recast comments and some whitespace
Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.
I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.
In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.
Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.
Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.
* clean up
2023-01-23 14:50:58 +11:00
|
|
|
if (isNotCodeToken(token)) {
|
2022-11-26 08:34:23 +11:00
|
|
|
return previousMeaningfulToken(tokens, index, offset + 1)
|
2022-11-20 17:43:21 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
return { token, index: newIndex }
|
2022-11-20 17:43:21 +11:00
|
|
|
}
|
|
|
|
|
2023-02-01 07:30:55 +11:00
|
|
|
type BodyItem = ExpressionStatement | VariableDeclaration | ReturnStatement
|
2022-11-13 11:14:30 +11:00
|
|
|
|
2022-11-17 20:17:00 +11:00
|
|
|
function makeBody(
|
2022-11-20 17:43:21 +11:00
|
|
|
{
|
|
|
|
tokens,
|
|
|
|
tokenIndex = 0,
|
|
|
|
}: {
|
2022-11-26 08:34:23 +11:00
|
|
|
tokens: Token[]
|
|
|
|
tokenIndex?: number
|
2022-11-20 17:43:21 +11:00
|
|
|
},
|
2023-02-01 07:30:55 +11:00
|
|
|
previousBody: BodyItem[] = [],
|
|
|
|
previousNonCodeMeta: NoneCodeMeta = {}
|
|
|
|
): { body: BodyItem[]; lastIndex: number; nonCodeMeta: NoneCodeMeta } {
|
|
|
|
const nonCodeMeta = { ...previousNonCodeMeta }
|
2022-11-17 20:17:00 +11:00
|
|
|
if (tokenIndex >= tokens.length) {
|
2023-02-01 07:30:55 +11:00
|
|
|
return { body: previousBody, lastIndex: tokenIndex, nonCodeMeta }
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
2022-11-18 08:20:18 +11:00
|
|
|
|
2022-11-26 08:34:23 +11:00
|
|
|
const token = tokens[tokenIndex]
|
|
|
|
if (token.type === 'brace' && token.value === '}') {
|
2023-02-01 07:30:55 +11:00
|
|
|
return { body: previousBody, lastIndex: tokenIndex, nonCodeMeta }
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
Add the ability to recast comments and some whitespace (#10)
* Add the ability to recast comments and some whitespace
Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.
I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.
In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.
Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.
Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.
* clean up
2023-01-23 14:50:58 +11:00
|
|
|
if (isNotCodeToken(token)) {
|
2023-02-01 07:30:55 +11:00
|
|
|
const nextToken = nextMeaningfulToken(tokens, tokenIndex, 0)
|
|
|
|
if (nextToken.bonusNonCodeNode) {
|
|
|
|
if (previousBody.length === 0) {
|
|
|
|
nonCodeMeta.start = nextToken.bonusNonCodeNode
|
|
|
|
} else {
|
|
|
|
nonCodeMeta[previousBody.length] = nextToken.bonusNonCodeNode
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return makeBody(
|
|
|
|
{ tokens, tokenIndex: nextToken.index },
|
|
|
|
previousBody,
|
|
|
|
nonCodeMeta
|
|
|
|
)
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
const nextToken = nextMeaningfulToken(tokens, tokenIndex)
|
2023-02-01 07:30:55 +11:00
|
|
|
nextToken.bonusNonCodeNode &&
|
|
|
|
(nonCodeMeta[previousBody.length] = nextToken.bonusNonCodeNode)
|
|
|
|
|
2022-11-17 20:17:00 +11:00
|
|
|
if (
|
2022-11-26 08:34:23 +11:00
|
|
|
token.type === 'word' &&
|
2023-02-12 10:56:45 +11:00
|
|
|
(token.value === 'const' || token.value === 'fn')
|
2022-11-17 20:17:00 +11:00
|
|
|
) {
|
|
|
|
const { declaration, lastIndex } = makeVariableDeclaration(
|
|
|
|
tokens,
|
|
|
|
tokenIndex
|
2022-11-26 08:34:23 +11:00
|
|
|
)
|
|
|
|
const nextThing = nextMeaningfulToken(tokens, lastIndex)
|
2023-02-01 07:30:55 +11:00
|
|
|
nextThing.bonusNonCodeNode &&
|
|
|
|
(nonCodeMeta[previousBody.length] = nextThing.bonusNonCodeNode)
|
|
|
|
|
|
|
|
return makeBody(
|
|
|
|
{ tokens, tokenIndex: nextThing.index },
|
|
|
|
[...previousBody, declaration],
|
|
|
|
nonCodeMeta
|
|
|
|
)
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
if (token.type === 'word' && token.value === 'return') {
|
|
|
|
const { statement, lastIndex } = makeReturnStatement(tokens, tokenIndex)
|
|
|
|
const nextThing = nextMeaningfulToken(tokens, lastIndex)
|
2023-02-01 07:30:55 +11:00
|
|
|
nextThing.bonusNonCodeNode &&
|
|
|
|
(nonCodeMeta[previousBody.length] = nextThing.bonusNonCodeNode)
|
|
|
|
|
|
|
|
return makeBody(
|
|
|
|
{ tokens, tokenIndex: nextThing.index },
|
|
|
|
[...previousBody, statement],
|
|
|
|
nonCodeMeta
|
|
|
|
)
|
2022-11-18 08:20:18 +11:00
|
|
|
}
|
2022-11-20 17:43:21 +11:00
|
|
|
if (
|
2022-11-26 08:34:23 +11:00
|
|
|
token.type === 'word' &&
|
|
|
|
nextToken.token.type === 'brace' &&
|
|
|
|
nextToken.token.value === '('
|
2022-11-20 17:43:21 +11:00
|
|
|
) {
|
2022-11-17 20:17:00 +11:00
|
|
|
const { expression, lastIndex } = makeExpressionStatement(
|
|
|
|
tokens,
|
|
|
|
tokenIndex
|
2022-11-26 08:34:23 +11:00
|
|
|
)
|
|
|
|
const nextThing = nextMeaningfulToken(tokens, lastIndex)
|
2023-02-01 07:30:55 +11:00
|
|
|
if (nextThing.bonusNonCodeNode) {
|
|
|
|
nonCodeMeta[previousBody.length] = nextThing.bonusNonCodeNode
|
|
|
|
}
|
|
|
|
|
|
|
|
return makeBody(
|
|
|
|
{ tokens, tokenIndex: nextThing.index },
|
|
|
|
[...previousBody, expression],
|
|
|
|
nonCodeMeta
|
|
|
|
)
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
2023-02-01 07:30:55 +11:00
|
|
|
const nextThing = nextMeaningfulToken(tokens, tokenIndex)
|
2022-11-17 20:17:00 +11:00
|
|
|
if (
|
2022-11-26 08:34:23 +11:00
|
|
|
(token.type === 'number' || token.type === 'word') &&
|
2023-02-01 07:30:55 +11:00
|
|
|
nextThing.token.type === 'operator'
|
2022-11-17 20:17:00 +11:00
|
|
|
) {
|
2023-02-01 07:30:55 +11:00
|
|
|
if (nextThing.bonusNonCodeNode) {
|
|
|
|
nonCodeMeta[previousBody.length] = nextThing.bonusNonCodeNode
|
|
|
|
}
|
2022-11-17 20:17:00 +11:00
|
|
|
const { expression, lastIndex } = makeExpressionStatement(
|
|
|
|
tokens,
|
|
|
|
tokenIndex
|
2022-11-26 08:34:23 +11:00
|
|
|
)
|
2023-02-01 07:30:55 +11:00
|
|
|
return {
|
|
|
|
body: [...previousBody, expression],
|
|
|
|
nonCodeMeta: nonCodeMeta,
|
|
|
|
lastIndex,
|
|
|
|
}
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
throw new Error('Unexpected token')
|
2022-11-17 20:17:00 +11:00
|
|
|
}
|
2022-11-13 11:14:30 +11:00
|
|
|
export const abstractSyntaxTree = (tokens: Token[]): Program => {
|
2023-02-01 07:30:55 +11:00
|
|
|
const { body, nonCodeMeta } = makeBody({ tokens })
|
2022-11-13 11:14:30 +11:00
|
|
|
const program: Program = {
|
2022-11-26 08:34:23 +11:00
|
|
|
type: 'Program',
|
2022-11-13 11:14:30 +11:00
|
|
|
start: 0,
|
|
|
|
end: body[body.length - 1].end,
|
|
|
|
body: body,
|
2023-02-01 07:30:55 +11:00
|
|
|
nonCodeMeta,
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
|
|
|
return program
|
|
|
|
}
|
2022-11-17 16:06:38 +11:00
|
|
|
|
2023-03-03 20:35:48 +11:00
|
|
|
function findNextDeclarationKeyword(
|
2022-12-02 21:00:57 +11:00
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { token: Token | null; index: number } {
|
|
|
|
const nextToken = nextMeaningfulToken(tokens, index)
|
|
|
|
if (nextToken.index >= tokens.length) {
|
|
|
|
return { token: null, index: tokens.length - 1 }
|
|
|
|
}
|
|
|
|
if (
|
|
|
|
nextToken.token.type === 'word' &&
|
2023-02-12 10:56:45 +11:00
|
|
|
(nextToken.token.value === 'const' || nextToken.token.value === 'fn')
|
2022-12-02 21:00:57 +11:00
|
|
|
) {
|
|
|
|
return nextToken
|
|
|
|
}
|
2022-12-03 22:50:46 +11:00
|
|
|
if (nextToken.token.type === 'brace' && nextToken.token.value === '(') {
|
|
|
|
const closingBraceIndex = findClosingBrace(tokens, nextToken.index)
|
|
|
|
const arrowToken = nextMeaningfulToken(tokens, closingBraceIndex)
|
|
|
|
if (
|
|
|
|
arrowToken?.token?.type === 'operator' &&
|
|
|
|
arrowToken.token.value === '=>'
|
|
|
|
) {
|
|
|
|
return nextToken
|
|
|
|
}
|
|
|
|
// probably should do something else here
|
|
|
|
// throw new Error('Unexpected token')
|
|
|
|
}
|
2022-12-02 21:00:57 +11:00
|
|
|
return findNextDeclarationKeyword(tokens, nextToken.index)
|
|
|
|
}
|
|
|
|
|
2023-03-03 20:35:48 +11:00
|
|
|
function findNextCallExpression(
|
2022-12-03 22:50:46 +11:00
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { token: Token | null; index: number } {
|
|
|
|
const nextToken = nextMeaningfulToken(tokens, index)
|
|
|
|
const veryNextToken = tokens[nextToken.index + 1] // i.e. without whitespace
|
|
|
|
if (nextToken.index >= tokens.length) {
|
|
|
|
return { token: null, index: tokens.length - 1 }
|
|
|
|
}
|
2022-12-04 15:50:52 +11:00
|
|
|
if (
|
|
|
|
nextToken.token.type === 'word' &&
|
|
|
|
veryNextToken?.type === 'brace' &&
|
|
|
|
veryNextToken?.value === '('
|
|
|
|
) {
|
2022-12-03 22:50:46 +11:00
|
|
|
return nextToken
|
|
|
|
}
|
|
|
|
return findNextCallExpression(tokens, nextToken.index)
|
|
|
|
}
|
|
|
|
|
2023-03-03 20:35:48 +11:00
|
|
|
function findNextClosingCurlyBrace(
|
2022-12-03 22:50:46 +11:00
|
|
|
tokens: Token[],
|
|
|
|
index: number
|
|
|
|
): { token: Token | null; index: number } {
|
|
|
|
const nextToken = nextMeaningfulToken(tokens, index)
|
|
|
|
if (nextToken.index >= tokens.length) {
|
|
|
|
return { token: null, index: tokens.length - 1 }
|
|
|
|
}
|
|
|
|
if (nextToken.token.type === 'brace' && nextToken.token.value === '}') {
|
|
|
|
return nextToken
|
|
|
|
}
|
|
|
|
if (nextToken.token.type === 'brace' && nextToken.token.value === '{') {
|
|
|
|
const closingBraceIndex = findClosingBrace(tokens, nextToken.index)
|
|
|
|
const tokenAfterClosingBrace = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
closingBraceIndex
|
|
|
|
)
|
|
|
|
return findNextClosingCurlyBrace(tokens, tokenAfterClosingBrace.index)
|
|
|
|
}
|
|
|
|
return findNextClosingCurlyBrace(tokens, nextToken.index)
|
|
|
|
}
|
|
|
|
|
2022-12-02 21:00:57 +11:00
|
|
|
export function hasPipeOperator(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
_limitIndex = -1
|
2023-02-01 07:30:55 +11:00
|
|
|
): ReturnType<typeof nextMeaningfulToken> | false {
|
2022-12-03 22:50:46 +11:00
|
|
|
// this probably still needs some work
|
2023-02-12 10:56:45 +11:00
|
|
|
// should be called on expression statuments (i.e "lineTo" for lineTo(10, 10))
|
2022-12-02 21:00:57 +11:00
|
|
|
let limitIndex = _limitIndex
|
|
|
|
if (limitIndex === -1) {
|
2022-12-03 22:50:46 +11:00
|
|
|
const callExpressionEnd = isCallExpression(tokens, index)
|
|
|
|
if (callExpressionEnd !== -1) {
|
2022-12-04 15:50:52 +11:00
|
|
|
const tokenAfterCallExpression = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
callExpressionEnd
|
|
|
|
)
|
|
|
|
if (
|
|
|
|
tokenAfterCallExpression?.token?.type === 'operator' &&
|
|
|
|
tokenAfterCallExpression.token.value === '|>'
|
|
|
|
) {
|
2022-12-03 22:50:46 +11:00
|
|
|
return tokenAfterCallExpression
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
const currentToken = tokens[index]
|
|
|
|
if (currentToken?.type === 'brace' && currentToken?.value === '{') {
|
|
|
|
const closingBraceIndex = findClosingBrace(tokens, index)
|
2022-12-04 15:50:52 +11:00
|
|
|
const tokenAfterClosingBrace = nextMeaningfulToken(
|
|
|
|
tokens,
|
|
|
|
closingBraceIndex
|
|
|
|
)
|
|
|
|
if (
|
|
|
|
tokenAfterClosingBrace?.token?.type === 'operator' &&
|
|
|
|
tokenAfterClosingBrace.token.value === '|>'
|
|
|
|
) {
|
2022-12-03 22:50:46 +11:00
|
|
|
return tokenAfterClosingBrace
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
2022-12-02 21:00:57 +11:00
|
|
|
const nextDeclaration = findNextDeclarationKeyword(tokens, index)
|
|
|
|
limitIndex = nextDeclaration.index
|
|
|
|
}
|
|
|
|
const nextToken = nextMeaningfulToken(tokens, index)
|
|
|
|
if (nextToken.index >= limitIndex) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if (nextToken.token.type === 'operator' && nextToken.token.value === '|>') {
|
|
|
|
return nextToken
|
|
|
|
}
|
|
|
|
return hasPipeOperator(tokens, nextToken.index, limitIndex)
|
|
|
|
}
|
|
|
|
|
2022-11-17 16:06:38 +11:00
|
|
|
export function findClosingBrace(
|
|
|
|
tokens: Token[],
|
|
|
|
index: number,
|
|
|
|
_braceCount: number = 0,
|
2022-11-26 08:34:23 +11:00
|
|
|
_searchOpeningBrace: string = ''
|
2022-11-17 16:06:38 +11:00
|
|
|
): number {
|
2022-12-03 22:50:46 +11:00
|
|
|
// should be called with the index of the opening brace
|
2022-11-17 16:06:38 +11:00
|
|
|
const closingBraceMap: { [key: string]: string } = {
|
2022-11-26 08:34:23 +11:00
|
|
|
'(': ')',
|
|
|
|
'{': '}',
|
|
|
|
'[': ']',
|
|
|
|
}
|
|
|
|
const currentToken = tokens[index]
|
|
|
|
let searchOpeningBrace = _searchOpeningBrace
|
|
|
|
|
|
|
|
const isFirstCall = !searchOpeningBrace && _braceCount === 0
|
2022-11-17 16:06:38 +11:00
|
|
|
if (isFirstCall) {
|
2022-11-26 08:34:23 +11:00
|
|
|
searchOpeningBrace = currentToken.value
|
|
|
|
if (!['(', '{', '['].includes(searchOpeningBrace)) {
|
2022-11-17 16:06:38 +11:00
|
|
|
throw new Error(
|
|
|
|
`expected to be started on a opening brace ( { [, instead found '${searchOpeningBrace}'`
|
2022-11-26 08:34:23 +11:00
|
|
|
)
|
2022-11-17 16:06:38 +11:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const foundClosingBrace =
|
|
|
|
_braceCount === 1 &&
|
2022-11-26 08:34:23 +11:00
|
|
|
currentToken.value === closingBraceMap[searchOpeningBrace]
|
|
|
|
const foundAnotherOpeningBrace = currentToken.value === searchOpeningBrace
|
2022-11-17 16:06:38 +11:00
|
|
|
const foundAnotherClosingBrace =
|
2022-11-26 08:34:23 +11:00
|
|
|
currentToken.value === closingBraceMap[searchOpeningBrace]
|
2022-11-17 16:06:38 +11:00
|
|
|
|
|
|
|
if (foundClosingBrace) {
|
2022-11-26 08:34:23 +11:00
|
|
|
return index
|
2022-11-17 16:06:38 +11:00
|
|
|
}
|
|
|
|
if (foundAnotherOpeningBrace) {
|
|
|
|
return findClosingBrace(
|
|
|
|
tokens,
|
|
|
|
index + 1,
|
|
|
|
_braceCount + 1,
|
|
|
|
searchOpeningBrace
|
2022-11-26 08:34:23 +11:00
|
|
|
)
|
2022-11-17 16:06:38 +11:00
|
|
|
}
|
|
|
|
if (foundAnotherClosingBrace) {
|
|
|
|
return findClosingBrace(
|
|
|
|
tokens,
|
|
|
|
index + 1,
|
|
|
|
_braceCount - 1,
|
|
|
|
searchOpeningBrace
|
2022-11-26 08:34:23 +11:00
|
|
|
)
|
2022-11-17 16:06:38 +11:00
|
|
|
}
|
|
|
|
// non-brace token, increment and continue
|
2022-11-26 08:34:23 +11:00
|
|
|
return findClosingBrace(tokens, index + 1, _braceCount, searchOpeningBrace)
|
2022-11-17 16:06:38 +11:00
|
|
|
}
|
2022-11-28 09:37:46 +11:00
|
|
|
|
2022-12-04 15:50:52 +11:00
|
|
|
function isCallExpression(tokens: Token[], index: number): number {
|
2022-12-03 22:50:46 +11:00
|
|
|
const currentToken = tokens[index]
|
|
|
|
const veryNextToken = tokens[index + 1] // i.e. no whitespace
|
2022-12-04 15:50:52 +11:00
|
|
|
if (
|
|
|
|
currentToken.type === 'word' &&
|
|
|
|
veryNextToken.type === 'brace' &&
|
|
|
|
veryNextToken.value === '('
|
|
|
|
) {
|
2022-12-03 22:50:46 +11:00
|
|
|
return findClosingBrace(tokens, index + 1)
|
|
|
|
}
|
|
|
|
return -1
|
|
|
|
}
|
|
|
|
|
2022-12-04 15:50:52 +11:00
|
|
|
function debuggerr(tokens: Token[], indexes: number[], msg = ''): string {
|
2022-12-03 22:50:46 +11:00
|
|
|
// return ''
|
|
|
|
const sortedIndexes = [...indexes].sort((a, b) => a - b)
|
|
|
|
const min = Math.min(...indexes)
|
|
|
|
const start = Math.min(Math.abs(min - 1), 0)
|
|
|
|
const max = Math.max(...indexes)
|
|
|
|
const end = Math.min(Math.abs(max + 1), tokens.length)
|
|
|
|
const debugTokens = tokens.slice(start, end)
|
|
|
|
const debugIndexes = indexes.map((i) => i - start)
|
|
|
|
const debugStrings: [string, string][] = debugTokens.map((token, index) => {
|
|
|
|
if (debugIndexes.includes(index)) {
|
|
|
|
return [
|
|
|
|
`${token.value.replaceAll('\n', ' ')}`,
|
|
|
|
'^'.padEnd(token.value.length, '_'),
|
|
|
|
]
|
|
|
|
}
|
|
|
|
return [
|
|
|
|
token.value.replaceAll('\n', ' '),
|
|
|
|
' '.padEnd(token.value.length, ' '),
|
|
|
|
]
|
|
|
|
})
|
|
|
|
let topString = ''
|
|
|
|
let bottomString = ''
|
|
|
|
debugStrings.forEach(([top, bottom]) => {
|
|
|
|
topString += top
|
|
|
|
bottomString += bottom
|
|
|
|
})
|
2022-12-04 15:50:52 +11:00
|
|
|
const debugResult = [
|
|
|
|
`${msg} - debuggerr: ${sortedIndexes}`,
|
|
|
|
topString,
|
|
|
|
bottomString,
|
|
|
|
].join('\n')
|
2022-12-04 08:16:04 +11:00
|
|
|
console.log(debugResult)
|
|
|
|
return debugResult
|
2022-12-03 22:50:46 +11:00
|
|
|
}
|
|
|
|
|
Add the ability to recast comments and some whitespace (#10)
* Add the ability to recast comments and some whitespace
Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.
I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.
In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.
Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.
Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.
* clean up
2023-01-23 14:50:58 +11:00
|
|
|
export function isNotCodeToken(token: Token): boolean {
|
|
|
|
return (
|
2023-02-01 07:30:55 +11:00
|
|
|
token?.type === 'whitespace' ||
|
|
|
|
token?.type === 'linecomment' ||
|
|
|
|
token?.type === 'blockcomment'
|
Add the ability to recast comments and some whitespace (#10)
* Add the ability to recast comments and some whitespace
Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.
I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.
In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.
Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.
Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.
* clean up
2023-01-23 14:50:58 +11:00
|
|
|
)
|
|
|
|
}
|