Files
modeling-app/src/lang/abstractSyntaxTree.ts

693 lines
18 KiB
TypeScript
Raw Normal View History

2022-11-13 11:14:30 +11:00
import { Token } from "./tokeniser";
type syntaxType =
| "Program"
| "ExpressionStatement"
| "BinaryExpression"
| "NumberLiteral"
| "StringLiteral"
| "CallExpression"
| "Identifier"
| "BlockStatement"
| "IfStatement"
| "WhileStatement"
| "FunctionDeclaration"
| "ReturnStatement"
| "VariableDeclaration"
| "VariableDeclarator"
| "AssignmentExpression"
| "UnaryExpression"
| "MemberExpression"
| "ArrayExpression"
| "ObjectExpression"
| "Property"
| "LogicalExpression"
| "ConditionalExpression"
| "ForStatement"
| "ForInStatement"
| "ForOfStatement"
| "BreakStatement"
| "ContinueStatement"
| "SwitchStatement"
| "SwitchCase"
| "ThrowStatement"
| "TryStatement"
| "CatchClause"
| "ClassDeclaration"
| "ClassBody"
| "MethodDefinition"
| "NewExpression"
| "ThisExpression"
| "UpdateExpression"
2022-11-17 20:17:00 +11:00
// | "ArrowFunctionExpression"
| "FunctionExpression"
2022-11-13 11:14:30 +11:00
| "YieldExpression"
| "AwaitExpression"
| "ImportDeclaration"
| "ImportSpecifier"
| "ImportDefaultSpecifier"
| "ImportNamespaceSpecifier"
| "ExportNamedDeclaration"
| "ExportDefaultDeclaration"
| "ExportAllDeclaration"
| "ExportSpecifier"
| "TaggedTemplateExpression"
| "TemplateLiteral"
| "TemplateElement"
| "SpreadElement"
| "RestElement"
| "SequenceExpression"
| "DebuggerStatement"
| "LabeledStatement"
| "DoWhileStatement"
| "WithStatement"
| "EmptyStatement"
| "Literal"
| "ArrayPattern"
| "ObjectPattern"
| "AssignmentPattern"
| "MetaProperty"
| "Super"
| "Import"
| "RegExpLiteral"
| "BooleanLiteral"
| "NullLiteral"
| "TypeAnnotation";
export interface Program {
type: syntaxType;
start: number;
end: number;
body: Body[];
}
interface GeneralStatement {
type: syntaxType;
start: number;
end: number;
}
interface ExpressionStatement extends GeneralStatement {
type: "ExpressionStatement";
expression: Value;
2022-11-13 11:14:30 +11:00
}
function makeExpressionStatement(
tokens: Token[],
index: number
2022-11-17 20:17:00 +11:00
): { expression: ExpressionStatement; lastIndex: number } {
2022-11-13 11:14:30 +11:00
const currentToken = tokens[index];
2022-11-14 13:28:16 +11:00
const { token: nextToken } = nextMeaningfulToken(tokens, index);
if (nextToken.type === "brace" && nextToken.value === "(") {
2022-11-17 20:17:00 +11:00
const { expression, lastIndex } = makeCallExpression(tokens, index);
2022-11-14 13:28:16 +11:00
return {
2022-11-17 20:17:00 +11:00
expression: {
type: "ExpressionStatement",
start: currentToken.start,
end: expression.end,
expression,
},
lastIndex,
2022-11-14 13:28:16 +11:00
};
}
2022-11-17 20:17:00 +11:00
const { expression, lastIndex } = makeBinaryExpression(tokens, index);
2022-11-13 11:14:30 +11:00
return {
2022-11-17 20:17:00 +11:00
expression: {
type: "ExpressionStatement",
start: currentToken.start,
end: expression.end,
expression,
},
lastIndex,
2022-11-13 11:14:30 +11:00
};
}
2022-11-14 13:28:16 +11:00
interface CallExpression extends GeneralStatement {
type: "CallExpression";
callee: Identifier;
arguments: Value[];
2022-11-14 13:28:16 +11:00
optional: boolean;
}
function makeCallExpression(
tokens: Token[],
index: number
): {
expression: CallExpression;
lastIndex: number;
} {
const currentToken = tokens[index];
const braceToken = nextMeaningfulToken(tokens, index);
// const firstArgumentToken = nextMeaningfulToken(tokens, braceToken.index);
const callee = makeIdentifier(tokens, index);
const args = makeArguments(tokens, braceToken.index);
// const closingBraceToken = nextMeaningfulToken(tokens, args.lastIndex);
const closingBraceToken = tokens[args.lastIndex];
return {
expression: {
type: "CallExpression",
start: currentToken.start,
end: closingBraceToken.end,
callee,
arguments: args.arguments,
optional: false,
},
lastIndex: args.lastIndex,
};
}
function makeArguments(
tokens: Token[],
index: number,
previousArgs: Value[] = []
2022-11-14 13:28:16 +11:00
): {
arguments: Value[];
2022-11-14 13:28:16 +11:00
lastIndex: number;
} {
const braceOrCommaToken = tokens[index];
const argumentToken = nextMeaningfulToken(tokens, index);
2022-11-17 20:17:00 +11:00
const shouldFinishRecursion =
braceOrCommaToken.type === "brace" && braceOrCommaToken.value === ")";
2022-11-14 13:28:16 +11:00
if (shouldFinishRecursion) {
return {
arguments: previousArgs,
lastIndex: index,
};
}
2022-11-17 20:17:00 +11:00
const nextBraceOrCommaToken = nextMeaningfulToken(
tokens,
argumentToken.index
);
const isIdentifierOrLiteral =
nextBraceOrCommaToken.token.type === "comma" ||
nextBraceOrCommaToken.token.type === "brace";
2022-11-14 13:28:16 +11:00
if (!isIdentifierOrLiteral) {
2022-11-17 20:17:00 +11:00
const { expression, lastIndex } = makeBinaryExpression(tokens, index);
2022-11-14 13:28:16 +11:00
return makeArguments(tokens, lastIndex, [...previousArgs, expression]);
}
if (argumentToken.token.type === "word") {
const identifier = makeIdentifier(tokens, argumentToken.index);
return makeArguments(tokens, nextBraceOrCommaToken.index, [
...previousArgs,
identifier,
]);
} else if (
argumentToken.token.type === "number" ||
argumentToken.token.type === "string"
) {
const literal = makeLiteral(tokens, argumentToken.index);
2022-11-17 20:17:00 +11:00
return makeArguments(tokens, nextBraceOrCommaToken.index, [
...previousArgs,
literal,
]);
2022-11-14 13:28:16 +11:00
}
throw new Error("Expected a previous if statement to match");
}
2022-11-13 11:14:30 +11:00
interface VariableDeclaration extends GeneralStatement {
type: "VariableDeclaration";
declarations: VariableDeclarator[];
2022-11-17 20:17:00 +11:00
kind: "const" | "unknown" | "fn"; //| "solid" | "surface" | "face"
2022-11-13 11:14:30 +11:00
}
function makeVariableDeclaration(
tokens: Token[],
index: number
): { declaration: VariableDeclaration; lastIndex: number } {
const currentToken = tokens[index];
const declarationStartToken = nextMeaningfulToken(tokens, index);
const { declarations, lastIndex } = makeVariableDeclarators(
tokens,
declarationStartToken.index
);
return {
declaration: {
type: "VariableDeclaration",
start: currentToken.start,
end: declarations[declarations.length - 1].end,
2022-11-17 20:17:00 +11:00
kind:
currentToken.value === "const"
? "const"
: currentToken.value === "fn"
? "fn"
: "unknown",
2022-11-13 11:14:30 +11:00
declarations,
},
lastIndex,
};
}
type Value =
| Literal
| Identifier
| BinaryExpression
| FunctionExpression
| CallExpression;
function makeValue(
tokens: Token[],
index: number
): { value: Value; lastIndex: number } {
const currentToken = tokens[index];
const { token: nextToken } = nextMeaningfulToken(tokens, index);
if (nextToken.type === "brace" && nextToken.value === "(") {
const { expression, lastIndex } = makeCallExpression(tokens, index);
return {
value: expression,
lastIndex,
};
}
if (currentToken.type === "word" && nextToken.type === "operator") {
const { expression, lastIndex } = makeBinaryExpression(tokens, index);
return {
value: expression,
lastIndex,
};
}
if (currentToken.type === "word") {
const identifier = makeIdentifier(tokens, index);
return {
value: identifier,
lastIndex: index,
};
}
if (currentToken.type === "number" || currentToken.type === "string") {
const literal = makeLiteral(tokens, index);
return {
value: literal,
lastIndex: index,
};
}
throw new Error("Expected a previous if statement to match");
}
2022-11-13 11:14:30 +11:00
interface VariableDeclarator extends GeneralStatement {
type: "VariableDeclarator";
id: Identifier;
init: Value;
2022-11-13 11:14:30 +11:00
}
function makeVariableDeclarators(
tokens: Token[],
index: number,
previousDeclarators: VariableDeclarator[] = []
): {
declarations: VariableDeclarator[];
lastIndex: number;
} {
const currentToken = tokens[index];
const assignmentToken = nextMeaningfulToken(tokens, index);
const contentsStartToken = nextMeaningfulToken(tokens, assignmentToken.index);
const nextAfterInit = nextMeaningfulToken(tokens, contentsStartToken.index);
let init: Value;
2022-11-13 11:14:30 +11:00
let lastIndex = contentsStartToken.index;
2022-11-17 20:17:00 +11:00
if (
contentsStartToken.token.type === "brace" &&
contentsStartToken.token.value === "("
) {
const closingBraceIndex = findClosingBrace(
tokens,
contentsStartToken.index
);
const arrowToken = nextMeaningfulToken(tokens, closingBraceIndex);
if (
arrowToken.token.type === "operator" &&
arrowToken.token.value === "=>"
) {
const { expression, lastIndex: arrowFunctionLastIndex } =
makeFunctionExpression(tokens, contentsStartToken.index);
init = expression;
lastIndex = arrowFunctionLastIndex;
} else {
throw new Error("TODO - handle expression with braces");
}
} else if (nextAfterInit.token?.type === "operator") {
2022-11-13 11:14:30 +11:00
const binExp = makeBinaryExpression(tokens, contentsStartToken.index);
init = binExp.expression;
lastIndex = binExp.lastIndex;
} else if (nextAfterInit.token?.type === "brace" && nextAfterInit.token.value === "(") {
const callExInfo = makeCallExpression(tokens, contentsStartToken.index);
init = callExInfo.expression
lastIndex = callExInfo.lastIndex
2022-11-13 11:14:30 +11:00
} else {
init = makeLiteral(tokens, contentsStartToken.index);
}
const currentDeclarator: VariableDeclarator = {
type: "VariableDeclarator",
start: currentToken.start,
end: tokens[lastIndex].end,
id: makeIdentifier(tokens, index),
init,
};
return {
declarations: [...previousDeclarators, currentDeclarator],
lastIndex,
};
}
2022-11-14 13:28:16 +11:00
export type BinaryPart = Literal | Identifier;
2022-11-13 11:14:30 +11:00
// | BinaryExpression
// | CallExpression
// | MemberExpression
// | ArrayExpression
// | ObjectExpression
// | UnaryExpression
// | LogicalExpression
// | ConditionalExpression
interface Literal extends GeneralStatement {
type: "Literal";
value: string | number | boolean | null;
raw: string;
}
interface Identifier extends GeneralStatement {
type: "Identifier";
name: string;
}
function makeIdentifier(token: Token[], index: number): Identifier {
const currentToken = token[index];
return {
type: "Identifier",
start: currentToken.start,
end: currentToken.end,
name: currentToken.value,
};
}
function makeLiteral(tokens: Token[], index: number): Literal {
const token = tokens[index];
2022-11-14 13:28:16 +11:00
const value =
token.type === "number" ? Number(token.value) : token.value.slice(1, -1);
2022-11-13 11:14:30 +11:00
return {
type: "Literal",
start: token.start,
end: token.end,
value,
raw: token.value,
};
}
export interface BinaryExpression extends GeneralStatement {
2022-11-13 11:14:30 +11:00
type: "BinaryExpression";
operator: string;
left: BinaryPart;
right: BinaryPart;
}
function makeBinaryPart(
2022-11-13 11:14:30 +11:00
tokens: Token[],
index: number
): { part: BinaryPart; lastIndex: number } {
2022-11-13 11:14:30 +11:00
const currentToken = tokens[index];
if (currentToken.type === "word") {
const identifier = makeIdentifier(tokens, index);
return {
part: identifier,
lastIndex: index,
};
2022-11-13 11:14:30 +11:00
}
if (currentToken.type === "number" || currentToken.type === "string") {
const literal = makeLiteral(tokens, index);
return {
part: literal,
lastIndex: index,
};
}
throw new Error("Expected a previous if statement to match");
}
function makeBinaryExpression(
tokens: Token[],
index: number
): { expression: BinaryExpression; lastIndex: number } {
const currentToken = tokens[index];
const { part: left } = makeBinaryPart(tokens, index);
2022-11-13 11:14:30 +11:00
const { token: operatorToken, index: operatorIndex } = nextMeaningfulToken(
tokens,
index
);
const rightToken = nextMeaningfulToken(tokens, operatorIndex);
const { part: right } = makeBinaryPart(tokens, rightToken.index);
2022-11-13 11:14:30 +11:00
return {
expression: {
type: "BinaryExpression",
start: currentToken.start,
end: right.end,
left,
operator: operatorToken.value,
right,
},
lastIndex: rightToken.index,
};
}
2022-11-17 20:17:00 +11:00
interface FunctionExpression extends GeneralStatement {
type: "FunctionExpression";
id: Identifier | null;
params: Identifier[];
body: BlockStatement;
}
function makeFunctionExpression(
tokens: Token[],
index: number
): { expression: FunctionExpression; lastIndex: number } {
const currentToken = tokens[index];
const closingBraceIndex = findClosingBrace(tokens, index);
const arrowToken = nextMeaningfulToken(tokens, closingBraceIndex);
const bodyStartToken = nextMeaningfulToken(tokens, arrowToken.index);
const { params } = makeParams(tokens, index);
2022-11-17 20:17:00 +11:00
const { block, lastIndex: bodyLastIndex } = makeBlockStatement(
tokens,
bodyStartToken.index
);
return {
expression: {
type: "FunctionExpression",
start: currentToken.start,
end: tokens[bodyLastIndex].end,
id: null,
params,
body: block,
},
lastIndex: bodyLastIndex,
};
}
function makeParams(
tokens: Token[],
index: number,
previousParams: Identifier[] = []
): { params: Identifier[]; lastIndex: number } {
const braceOrCommaToken = tokens[index];
const argumentToken = nextMeaningfulToken(tokens, index);
const shouldFinishRecursion =
(argumentToken.token.type === "brace" &&
argumentToken.token.value === ")") ||
(braceOrCommaToken.type === "brace" && braceOrCommaToken.value === ")");
if (shouldFinishRecursion) {
return { params: previousParams, lastIndex: index };
}
const nextBraceOrCommaToken = nextMeaningfulToken(
tokens,
argumentToken.index
);
const identifier = makeIdentifier(tokens, argumentToken.index);
return makeParams(tokens, nextBraceOrCommaToken.index, [
...previousParams,
identifier,
]);
}
2022-11-17 20:17:00 +11:00
interface BlockStatement extends GeneralStatement {
type: "BlockStatement";
body: Body[];
}
function makeBlockStatement(
tokens: Token[],
index: number
): { block: BlockStatement; lastIndex: number } {
const openingCurly = tokens[index];
const nextToken = nextMeaningfulToken(tokens, index);
const { body, lastIndex } =
nextToken.token.value === "}"
? { body: [], lastIndex: nextToken.index }
: makeBody(tokens, nextToken.index);
return {
block: {
type: "BlockStatement",
start: openingCurly.start,
end: tokens[lastIndex].end,
body,
},
lastIndex,
};
}
interface ReturnStatement extends GeneralStatement {
type: "ReturnStatement";
argument: Value;
}
function makeReturnStatement(
tokens: Token[],
index: number
): { statement: ReturnStatement; lastIndex: number } {
const currentToken = tokens[index];
const nextToken = nextMeaningfulToken(tokens, index);
const { value, lastIndex } = makeValue(tokens, nextToken.index);
return {
statement: {
type: "ReturnStatement",
start: currentToken.start,
end: tokens[lastIndex].end,
argument: value,
},
lastIndex,
};
}
2022-11-13 11:14:30 +11:00
export type All = Program | ExpressionStatement[] | BinaryExpression | Literal;
function nextMeaningfulToken(
tokens: Token[],
index: number,
offset: number = 1
): { token: Token; index: number } {
const newIndex = index + offset;
const token = tokens[newIndex];
if (!token) {
return { token, index: tokens.length };
}
if (token.type === "whitespace") {
return nextMeaningfulToken(tokens, index, offset + 1);
}
return { token, index: newIndex };
}
type Body = ExpressionStatement | VariableDeclaration | ReturnStatement;
2022-11-13 11:14:30 +11:00
2022-11-17 20:17:00 +11:00
function makeBody(
tokens: Token[],
tokenIndex: number = 0,
previousBody: Body[] = []
): { body: Body[]; lastIndex: number } {
if (tokenIndex >= tokens.length) {
return { body: previousBody, lastIndex: tokenIndex };
}
2022-11-17 20:17:00 +11:00
const token = tokens[tokenIndex];
if (token.type === "brace" && token.value === "}") {
return { body: previousBody, lastIndex: tokenIndex };
}
2022-11-17 20:17:00 +11:00
if (typeof token === "undefined") {
console.log("probably should throw");
}
if (token.type === "whitespace") {
return makeBody(tokens, tokenIndex + 1, previousBody);
}
const nextToken = nextMeaningfulToken(tokens, tokenIndex);
2022-11-17 20:17:00 +11:00
if (
token.type === "word" &&
(token.value === "const" || token.value === "fn")
) {
const { declaration, lastIndex } = makeVariableDeclaration(
tokens,
tokenIndex
);
const nextThing = nextMeaningfulToken(tokens, lastIndex);
return makeBody(tokens, nextThing.index, [...previousBody, declaration]);
}
if (token.type === "word" && token.value === "return") {
const { statement, lastIndex } = makeReturnStatement(tokens, tokenIndex);
const nextThing = nextMeaningfulToken(tokens, lastIndex);
return makeBody(tokens, nextThing.index, [...previousBody, statement]);
}
if (token.type === "word" && nextToken.token.type === "brace" && nextToken.token.value === '(') {
2022-11-17 20:17:00 +11:00
const { expression, lastIndex } = makeExpressionStatement(
tokens,
tokenIndex
);
return { body: [...previousBody, expression], lastIndex };
}
if (
(token.type === "number" || token.type === "word") &&
nextMeaningfulToken(tokens, tokenIndex).token.type === "operator"
) {
const { expression, lastIndex } = makeExpressionStatement(
tokens,
tokenIndex
);
// return startTree(tokens, tokenIndex, [...previousBody, makeExpressionStatement(tokens, tokenIndex)]);
return { body: [...previousBody, expression], lastIndex };
}
throw new Error("Unexpected token");
}
2022-11-13 11:14:30 +11:00
export const abstractSyntaxTree = (tokens: Token[]): Program => {
2022-11-17 20:17:00 +11:00
const { body } = makeBody(tokens);
2022-11-13 11:14:30 +11:00
const program: Program = {
type: "Program",
start: 0,
end: body[body.length - 1].end,
body: body,
};
return program;
};
2022-11-17 16:06:38 +11:00
export function findClosingBrace(
tokens: Token[],
index: number,
_braceCount: number = 0,
_searchOpeningBrace: string = ""
): number {
const closingBraceMap: { [key: string]: string } = {
"(": ")",
"{": "}",
"[": "]",
};
const currentToken = tokens[index];
let searchOpeningBrace = _searchOpeningBrace;
const isFirstCall = !searchOpeningBrace && _braceCount === 0;
if (isFirstCall) {
searchOpeningBrace = currentToken.value;
if (!["(", "{", "["].includes(searchOpeningBrace)) {
throw new Error(
`expected to be started on a opening brace ( { [, instead found '${searchOpeningBrace}'`
);
}
}
const foundClosingBrace =
_braceCount === 1 &&
currentToken.value === closingBraceMap[searchOpeningBrace];
const foundAnotherOpeningBrace = currentToken.value === searchOpeningBrace;
const foundAnotherClosingBrace =
currentToken.value === closingBraceMap[searchOpeningBrace];
if (foundClosingBrace) {
return index;
}
if (foundAnotherOpeningBrace) {
return findClosingBrace(
tokens,
index + 1,
_braceCount + 1,
searchOpeningBrace
);
}
if (foundAnotherClosingBrace) {
return findClosingBrace(
tokens,
index + 1,
_braceCount - 1,
searchOpeningBrace
);
}
// non-brace token, increment and continue
return findClosingBrace(tokens, index + 1, _braceCount, searchOpeningBrace);
}