This commit is contained in:
Kurt Hutten IrevDev
2022-11-13 11:14:30 +11:00
parent bbdaf2e6fb
commit 300e7c4118
2 changed files with 462 additions and 0 deletions

View File

@ -0,0 +1,146 @@
import { abstractSyntaxTree } from "./abstractSyntaxTree";
import { lexer } from "./tokeniser";
describe("testing AST", () => {
test("test 5 + 6", () => {
const tokens = lexer("5 +6");
const result = abstractSyntaxTree(tokens);
expect(result).toEqual({
type: "Program",
start: 0,
end: 4,
body: [
{
type: "ExpressionStatement",
start: 0,
end: 4,
expression: {
type: "BinaryExpression",
start: 0,
end: 4,
left: {
type: "Literal",
start: 0,
end: 1,
value: 5,
raw: "5",
},
operator: "+",
right: {
type: "Literal",
start: 3,
end: 4,
value: 6,
raw: "6",
},
},
},
],
});
});
test("test const myVar = 5", () => {
const tokens = lexer("const myVar = 5");
const { body } = abstractSyntaxTree(tokens);
expect(body).toEqual([
{
type: "VariableDeclaration",
start: 0,
end: 15,
kind: "const",
declarations: [
{
type: "VariableDeclarator",
start: 6,
end: 15,
id: {
type: "Identifier",
start: 6,
end: 11,
name: "myVar",
},
init: {
type: "Literal",
start: 14,
end: 15,
value: 5,
raw: "5",
},
},
],
},
]);
});
test("test multi-line", () => {
const code = `const myVar = 5
const newVar = myVar + 1
`;
const tokens = lexer(code);
const { body } = abstractSyntaxTree(tokens);
expect(body).toEqual([
{
type: "VariableDeclaration",
start: 0,
end: 15,
kind: "const",
declarations: [
{
type: "VariableDeclarator",
start: 6,
end: 15,
id: {
type: "Identifier",
start: 6,
end: 11,
name: "myVar",
},
init: {
type: "Literal",
start: 14,
end: 15,
value: 5,
raw: "5",
},
},
],
},
{
type: "VariableDeclaration",
start: 16,
end: 40,
kind: "const",
declarations: [
{
type: "VariableDeclarator",
start: 22,
end: 40,
id: {
type: "Identifier",
start: 22,
end: 28,
name: "newVar",
},
init: {
type: "BinaryExpression",
start: 31,
end: 40,
left: {
type: "Identifier",
start: 31,
end: 36,
name: "myVar",
},
operator: "+",
right: {
type: "Literal",
start: 39,
end: 40,
value: 1,
raw: "1",
},
},
},
],
},
]);
});
});

316
src/abstractSyntaxTree.ts Normal file
View File

@ -0,0 +1,316 @@
import { Token } from "./tokeniser";
type syntaxType =
| "Program"
| "ExpressionStatement"
| "BinaryExpression"
| "NumberLiteral"
| "StringLiteral"
| "CallExpression"
| "Identifier"
| "BlockStatement"
| "IfStatement"
| "WhileStatement"
| "FunctionDeclaration"
| "ReturnStatement"
| "VariableDeclaration"
| "VariableDeclarator"
| "AssignmentExpression"
| "UnaryExpression"
| "MemberExpression"
| "ArrayExpression"
| "ObjectExpression"
| "Property"
| "LogicalExpression"
| "ConditionalExpression"
| "ForStatement"
| "ForInStatement"
| "ForOfStatement"
| "BreakStatement"
| "ContinueStatement"
| "SwitchStatement"
| "SwitchCase"
| "ThrowStatement"
| "TryStatement"
| "CatchClause"
| "ClassDeclaration"
| "ClassBody"
| "MethodDefinition"
| "NewExpression"
| "ThisExpression"
| "UpdateExpression"
| "ArrowFunctionExpression"
| "YieldExpression"
| "AwaitExpression"
| "ImportDeclaration"
| "ImportSpecifier"
| "ImportDefaultSpecifier"
| "ImportNamespaceSpecifier"
| "ExportNamedDeclaration"
| "ExportDefaultDeclaration"
| "ExportAllDeclaration"
| "ExportSpecifier"
| "TaggedTemplateExpression"
| "TemplateLiteral"
| "TemplateElement"
| "SpreadElement"
| "RestElement"
| "SequenceExpression"
| "DebuggerStatement"
| "LabeledStatement"
| "DoWhileStatement"
| "WithStatement"
| "EmptyStatement"
| "Literal"
| "ArrayPattern"
| "ObjectPattern"
| "AssignmentPattern"
| "MetaProperty"
| "Super"
| "Import"
| "RegExpLiteral"
| "BooleanLiteral"
| "NullLiteral"
| "TypeAnnotation";
export interface Program {
type: syntaxType;
start: number;
end: number;
body: Body[];
}
interface GeneralStatement {
type: syntaxType;
start: number;
end: number;
}
interface ExpressionStatement extends GeneralStatement {
type: "ExpressionStatement";
expression: BinaryExpression;
}
function makeExpressionStatement(
tokens: Token[],
index: number
): ExpressionStatement {
const currentToken = tokens[index];
// if (nextToken.type === "operator") {
// }
const { expression } = makeBinaryExpression(tokens, index);
return {
type: "ExpressionStatement",
start: currentToken.start,
end: expression.end,
expression,
};
}
interface VariableDeclaration extends GeneralStatement {
type: "VariableDeclaration";
declarations: VariableDeclarator[];
kind: "const" | "unknown"; //| "solid" | "surface" | "face"
}
function makeVariableDeclaration(
tokens: Token[],
index: number
): { declaration: VariableDeclaration; lastIndex: number } {
const currentToken = tokens[index];
const declarationStartToken = nextMeaningfulToken(tokens, index);
const { declarations, lastIndex } = makeVariableDeclarators(
tokens,
declarationStartToken.index
);
return {
declaration: {
type: "VariableDeclaration",
start: currentToken.start,
end: declarations[declarations.length - 1].end,
kind: currentToken.value === "const" ? "const" : "unknown",
declarations,
},
lastIndex,
};
}
interface VariableDeclarator extends GeneralStatement {
type: "VariableDeclarator";
id: Identifier;
init: Literal | Identifier | BinaryExpression;
}
function makeVariableDeclarators(
tokens: Token[],
index: number,
previousDeclarators: VariableDeclarator[] = []
): {
declarations: VariableDeclarator[];
lastIndex: number;
} {
const currentToken = tokens[index];
const assignmentToken = nextMeaningfulToken(tokens, index);
const contentsStartToken = nextMeaningfulToken(tokens, assignmentToken.index);
const nextAfterInit = nextMeaningfulToken(tokens, contentsStartToken.index);
let init: VariableDeclarator["init"];
let lastIndex = contentsStartToken.index;
if (nextAfterInit.token?.type === "operator") {
const binExp = makeBinaryExpression(tokens, contentsStartToken.index);
init = binExp.expression;
lastIndex = binExp.lastIndex;
} else {
init = makeLiteral(tokens, contentsStartToken.index);
}
const currentDeclarator: VariableDeclarator = {
type: "VariableDeclarator",
start: currentToken.start,
end: tokens[lastIndex].end,
id: makeIdentifier(tokens, index),
init,
};
return {
declarations: [...previousDeclarators, currentDeclarator],
lastIndex,
};
}
type BinaryPart = Literal | Identifier;
// | BinaryExpression
// | CallExpression
// | MemberExpression
// | ArrayExpression
// | ObjectExpression
// | UnaryExpression
// | LogicalExpression
// | ConditionalExpression
interface Literal extends GeneralStatement {
type: "Literal";
value: string | number | boolean | null;
raw: string;
}
interface Identifier extends GeneralStatement {
type: "Identifier";
name: string;
}
function makeIdentifier(token: Token[], index: number): Identifier {
const currentToken = token[index];
return {
type: "Identifier",
start: currentToken.start,
end: currentToken.end,
name: currentToken.value,
};
}
function makeLiteral(tokens: Token[], index: number): Literal {
const token = tokens[index];
const value = token.type === "number" ? Number(token.value) : token.value;
return {
type: "Literal",
start: token.start,
end: token.end,
value,
raw: token.value,
};
}
interface BinaryExpression extends GeneralStatement {
type: "BinaryExpression";
operator: string;
left: BinaryPart;
right: BinaryPart;
}
function makeBinaryExpression(
tokens: Token[],
index: number
): { expression: BinaryExpression; lastIndex: number } {
const currentToken = tokens[index];
let left: BinaryPart;
if (currentToken.type === "word") {
left = makeIdentifier(tokens, index);
} else {
left = makeLiteral(tokens, index);
}
const { token: operatorToken, index: operatorIndex } = nextMeaningfulToken(
tokens,
index
);
const rightToken = nextMeaningfulToken(tokens, operatorIndex);
const right = makeLiteral(tokens, rightToken.index);
return {
expression: {
type: "BinaryExpression",
start: currentToken.start,
end: right.end,
left,
operator: operatorToken.value,
right,
},
lastIndex: rightToken.index,
};
}
export type All = Program | ExpressionStatement[] | BinaryExpression | Literal;
function nextMeaningfulToken(
tokens: Token[],
index: number,
offset: number = 1
): { token: Token; index: number } {
const newIndex = index + offset;
const token = tokens[newIndex];
if (!token) {
return { token, index: tokens.length };
}
if (token.type === "whitespace") {
return nextMeaningfulToken(tokens, index, offset + 1);
}
return { token, index: newIndex };
}
type Body = ExpressionStatement | VariableDeclaration;
export const abstractSyntaxTree = (tokens: Token[]): Program => {
const startTree = (
tokens: Token[],
tokenIndex: number = 0,
previousBody: Body[] = []
): Body[] => {
if (tokenIndex >= tokens.length) {
return previousBody;
}
const token = tokens[tokenIndex];
if (typeof token === "undefined") {
console.log("probably should throw");
}
if (token.type === "word" && token.value === "const") {
const { declaration, lastIndex } = makeVariableDeclaration(
tokens,
tokenIndex
);
const nextThing = nextMeaningfulToken(tokens, lastIndex);
return startTree(tokens, nextThing.index, [...previousBody, declaration]);
}
if (
(token.type === "number" || token.type === "word") &&
nextMeaningfulToken(tokens, tokenIndex).token.type === "operator"
) {
// return startTree(tokens, tokenIndex, [...previousBody, makeExpressionStatement(tokens, tokenIndex)]);
return [...previousBody, makeExpressionStatement(tokens, tokenIndex)];
}
console.log(tokenIndex, tokens.length, token);
throw new Error("Unexpected token");
};
const body = startTree(tokens);
const program: Program = {
type: "Program",
start: 0,
end: body[body.length - 1].end,
body: body,
};
return program;
};