Add callee expression parsing

This commit is contained in:
Kurt Hutten IrevDev
2022-11-14 13:28:16 +11:00
parent 24189282ec
commit aed0763770
5 changed files with 233 additions and 68 deletions

View File

@ -143,4 +143,50 @@ const newVar = myVar + 1
}, },
]); ]);
}); });
test('test using std function "log"', () => {
const code = `log(5, "hello", aIdentifier)`;
const tokens = lexer(code);
const { body } = abstractSyntaxTree(tokens);
expect(body).toEqual([
{
"type": "ExpressionStatement",
"start": 0,
"end": 28,
"expression": {
"type": "CallExpression",
"start": 0,
"end": 28,
"callee": {
"type": "Identifier",
"start": 0,
"end": 3,
"name": "log"
},
"arguments": [
{
"type": "Literal",
"start": 4,
"end": 5,
"value": 5,
"raw": "5"
},
{
"type": "Literal",
"start": 7,
"end": 14,
"value": "hello",
"raw": "\"hello\""
},
{
"type": "Identifier",
"start": 16,
"end": 27,
"name": "aIdentifier"
}
],
"optional": false
}
}
]);
});
}); });

View File

@ -87,7 +87,7 @@ interface GeneralStatement {
interface ExpressionStatement extends GeneralStatement { interface ExpressionStatement extends GeneralStatement {
type: "ExpressionStatement"; type: "ExpressionStatement";
expression: BinaryExpression; expression: BinaryExpression | CallExpression;
} }
function makeExpressionStatement( function makeExpressionStatement(
@ -95,8 +95,17 @@ function makeExpressionStatement(
index: number index: number
): ExpressionStatement { ): ExpressionStatement {
const currentToken = tokens[index]; const currentToken = tokens[index];
// if (nextToken.type === "operator") { const { token: nextToken } = nextMeaningfulToken(tokens, index);
// } if (nextToken.type === "brace" && nextToken.value === "(") {
const { expression } = makeCallExpression(tokens, index);
return {
type: "ExpressionStatement",
start: currentToken.start,
end: expression.end,
expression,
};
}
const { expression } = makeBinaryExpression(tokens, index); const { expression } = makeBinaryExpression(tokens, index);
return { return {
type: "ExpressionStatement", type: "ExpressionStatement",
@ -106,6 +115,79 @@ function makeExpressionStatement(
}; };
} }
interface CallExpression extends GeneralStatement {
type: "CallExpression";
callee: Identifier;
arguments: VariableDeclarator["init"][];
optional: boolean;
}
function makeCallExpression(
tokens: Token[],
index: number
): {
expression: CallExpression;
lastIndex: number;
} {
const currentToken = tokens[index];
const braceToken = nextMeaningfulToken(tokens, index);
// const firstArgumentToken = nextMeaningfulToken(tokens, braceToken.index);
const callee = makeIdentifier(tokens, index);
const args = makeArguments(tokens, braceToken.index);
// const closingBraceToken = nextMeaningfulToken(tokens, args.lastIndex);
const closingBraceToken = tokens[args.lastIndex];
return {
expression: {
type: "CallExpression",
start: currentToken.start,
end: closingBraceToken.end,
callee,
arguments: args.arguments,
optional: false,
},
lastIndex: args.lastIndex,
};
}
function makeArguments(
tokens: Token[],
index: number,
previousArgs: VariableDeclarator["init"][] = []
): {
arguments: VariableDeclarator["init"][];
lastIndex: number;
} {
const braceOrCommaToken = tokens[index];
const argumentToken = nextMeaningfulToken(tokens, index);
const shouldFinishRecursion = braceOrCommaToken.type === "brace" && braceOrCommaToken.value === ")";
if (shouldFinishRecursion) {
return {
arguments: previousArgs,
lastIndex: index,
};
}
const nextBraceOrCommaToken = nextMeaningfulToken(tokens, argumentToken.index);
const isIdentifierOrLiteral = nextBraceOrCommaToken.token.type === "comma" || nextBraceOrCommaToken.token.type === "brace"
if (!isIdentifierOrLiteral) {
const { expression, lastIndex} = makeBinaryExpression(tokens, index);
return makeArguments(tokens, lastIndex, [...previousArgs, expression]);
}
if (argumentToken.token.type === "word") {
const identifier = makeIdentifier(tokens, argumentToken.index);
return makeArguments(tokens, nextBraceOrCommaToken.index, [
...previousArgs,
identifier,
]);
} else if (
argumentToken.token.type === "number" ||
argumentToken.token.type === "string"
) {
const literal = makeLiteral(tokens, argumentToken.index);
return makeArguments(tokens, nextBraceOrCommaToken.index, [...previousArgs, literal]);
}
throw new Error("Expected a previous if statement to match");
}
interface VariableDeclaration extends GeneralStatement { interface VariableDeclaration extends GeneralStatement {
type: "VariableDeclaration"; type: "VariableDeclaration";
declarations: VariableDeclarator[]; declarations: VariableDeclarator[];
@ -174,7 +256,7 @@ function makeVariableDeclarators(
}; };
} }
type BinaryPart = Literal | Identifier; export type BinaryPart = Literal | Identifier;
// | BinaryExpression // | BinaryExpression
// | CallExpression // | CallExpression
// | MemberExpression // | MemberExpression
@ -207,7 +289,8 @@ function makeIdentifier(token: Token[], index: number): Identifier {
function makeLiteral(tokens: Token[], index: number): Literal { function makeLiteral(tokens: Token[], index: number): Literal {
const token = tokens[index]; const token = tokens[index];
const value = token.type === "number" ? Number(token.value) : token.value; const value =
token.type === "number" ? Number(token.value) : token.value.slice(1, -1);
return { return {
type: "Literal", type: "Literal",
start: token.start, start: token.start,
@ -295,6 +378,9 @@ export const abstractSyntaxTree = (tokens: Token[]): Program => {
const nextThing = nextMeaningfulToken(tokens, lastIndex); const nextThing = nextMeaningfulToken(tokens, lastIndex);
return startTree(tokens, nextThing.index, [...previousBody, declaration]); return startTree(tokens, nextThing.index, [...previousBody, declaration]);
} }
if (token.type === "word" && token.value === "log") {
return [...previousBody, makeExpressionStatement(tokens, tokenIndex)];
}
if ( if (
(token.type === "number" || token.type === "word") && (token.type === "number" || token.type === "word") &&
nextMeaningfulToken(tokens, tokenIndex).token.type === "operator" nextMeaningfulToken(tokens, tokenIndex).token.type === "operator"
@ -302,7 +388,6 @@ export const abstractSyntaxTree = (tokens: Token[]): Program => {
// return startTree(tokens, tokenIndex, [...previousBody, makeExpressionStatement(tokens, tokenIndex)]); // return startTree(tokens, tokenIndex, [...previousBody, makeExpressionStatement(tokens, tokenIndex)]);
return [...previousBody, makeExpressionStatement(tokens, tokenIndex)]; return [...previousBody, makeExpressionStatement(tokens, tokenIndex)];
} }
console.log(tokenIndex, tokens.length, token);
throw new Error("Unexpected token"); throw new Error("Unexpected token");
}; };
const body = startTree(tokens); const body = startTree(tokens);

View File

@ -0,0 +1 @@
const myVar = "a str" + " another str"

View File

@ -8,6 +8,7 @@ import {
isString, isString,
isWhitespace, isWhitespace,
isWord, isWord,
isComma,
lexer, lexer,
} from "./tokeniser"; } from "./tokeniser";
@ -126,87 +127,114 @@ describe("testing helpers", () => {
expect(isBlockEnd("5} + 5")).toBe(false); expect(isBlockEnd("5} + 5")).toBe(false);
expect(isBlockEnd(" } + 5")).toBe(false); expect(isBlockEnd(" } + 5")).toBe(false);
}); });
it("test is comma", () => {
expect(isComma(",")).toBe(true);
expect(isComma(", ")).toBe(true);
expect(isComma(",5")).toBe(true);
expect(isComma(",5 ")).toBe(true);
expect(isComma("5")).toBe(false);
expect(isComma("5 + 5")).toBe(false);
expect(isComma("5, + 5")).toBe(false);
expect(isComma(" , + 5")).toBe(false);
})
}); });
describe("testing lexer", () => { describe("testing lexer", () => {
it("test lexer", () => { it("test lexer", () => {
expect(stringSummaryLexer("1 + 2")).toEqual([ expect(stringSummaryLexer("1 + 2")).toEqual([
"number '1' from 0 to 1", "number '1' from 0 to 1",
"whitespace ' ' from 1 to 3", "whitespace ' ' from 1 to 3",
"operator '+' from 3 to 4", "operator '+' from 3 to 4",
"whitespace ' ' from 4 to 5", "whitespace ' ' from 4 to 5",
"number '2' from 5 to 6", "number '2' from 5 to 6",
]); ]);
expect(stringSummaryLexer("54 + 22500 + 6")).toEqual([ expect(stringSummaryLexer("54 + 22500 + 6")).toEqual([
"number '54' from 0 to 2", "number '54' from 0 to 2",
"whitespace ' ' from 2 to 3", "whitespace ' ' from 2 to 3",
"operator '+' from 3 to 4", "operator '+' from 3 to 4",
"whitespace ' ' from 4 to 5", "whitespace ' ' from 4 to 5",
"number '22500' from 5 to 10", "number '22500' from 5 to 10",
"whitespace ' ' from 10 to 11", "whitespace ' ' from 10 to 11",
"operator '+' from 11 to 12", "operator '+' from 11 to 12",
"whitespace ' ' from 12 to 13", "whitespace ' ' from 12 to 13",
"number '6' from 13 to 14", "number '6' from 13 to 14",
]); ]);
expect(stringSummaryLexer("a + bo + t5 - 6")).toEqual([ expect(stringSummaryLexer("a + bo + t5 - 6")).toEqual([
"word 'a' from 0 to 1", "word 'a' from 0 to 1",
"whitespace ' ' from 1 to 2", "whitespace ' ' from 1 to 2",
"operator '+' from 2 to 3", "operator '+' from 2 to 3",
"whitespace ' ' from 3 to 4", "whitespace ' ' from 3 to 4",
"word 'bo' from 4 to 6", "word 'bo' from 4 to 6",
"whitespace ' ' from 6 to 7", "whitespace ' ' from 6 to 7",
"operator '+' from 7 to 8", "operator '+' from 7 to 8",
"whitespace ' ' from 8 to 9", "whitespace ' ' from 8 to 9",
"word 't5' from 9 to 11", "word 't5' from 9 to 11",
"whitespace ' ' from 11 to 12", "whitespace ' ' from 11 to 12",
"operator '-' from 12 to 13", "operator '-' from 12 to 13",
"whitespace ' ' from 13 to 14", "whitespace ' ' from 13 to 14",
"number '6' from 14 to 15", "number '6' from 14 to 15",
]); ]);
expect(stringSummaryLexer('a + "a str" - 6')).toEqual([ expect(stringSummaryLexer('a + "a str" - 6')).toEqual([
"word 'a' from 0 to 1", "word 'a' from 0 to 1",
"whitespace ' ' from 1 to 2", "whitespace ' ' from 1 to 2",
"operator '+' from 2 to 3", "operator '+' from 2 to 3",
"whitespace ' ' from 3 to 4", "whitespace ' ' from 3 to 4",
"string '\"a str\"' from 4 to 11", "string '\"a str\"' from 4 to 11",
"whitespace ' ' from 11 to 12", "whitespace ' ' from 11 to 12",
"operator '-' from 12 to 13", "operator '-' from 12 to 13",
"whitespace ' ' from 13 to 14", "whitespace ' ' from 13 to 14",
"number '6' from 14 to 15", "number '6' from 14 to 15",
]); ]);
expect(stringSummaryLexer("a + 'str'")).toEqual([ expect(stringSummaryLexer("a + 'str'")).toEqual([
"word 'a' from 0 to 1", "word 'a' from 0 to 1",
"whitespace ' ' from 1 to 2", "whitespace ' ' from 1 to 2",
"operator '+' from 2 to 3", "operator '+' from 2 to 3",
"whitespace ' ' from 3 to 4", "whitespace ' ' from 3 to 4",
"string ''str'' from 4 to 9", "string ''str'' from 4 to 9",
]); ]);
expect(stringSummaryLexer("a +'str'")).toEqual([ expect(stringSummaryLexer("a +'str'")).toEqual([
"word 'a' from 0 to 1", "word 'a' from 0 to 1",
"whitespace ' ' from 1 to 2", "whitespace ' ' from 1 to 2",
"operator '+' from 2 to 3", "operator '+' from 2 to 3",
"string ''str'' from 3 to 8", "string ''str'' from 3 to 8",
]); ]);
expect(stringSummaryLexer("a + (sick)")).toEqual([ expect(stringSummaryLexer("a + (sick)")).toEqual([
"word 'a' from 0 to 1", "word 'a' from 0 to 1",
"whitespace ' ' from 1 to 2", "whitespace ' ' from 1 to 2",
"operator '+' from 2 to 3", "operator '+' from 2 to 3",
"whitespace ' ' from 3 to 4", "whitespace ' ' from 3 to 4",
"brace '(' from 4 to 5", "brace '(' from 4 to 5",
"word 'sick' from 5 to 9", "word 'sick' from 5 to 9",
"brace ')' from 9 to 10", "brace ')' from 9 to 10",
]); ]);
expect(stringSummaryLexer("a + { sick}")).toEqual([ expect(stringSummaryLexer("a + { sick}")).toEqual([
"word 'a' from 0 to 1", "word 'a' from 0 to 1",
"whitespace ' ' from 1 to 2", "whitespace ' ' from 1 to 2",
"operator '+' from 2 to 3", "operator '+' from 2 to 3",
"whitespace ' ' from 3 to 4", "whitespace ' ' from 3 to 4",
"brace '{' from 4 to 5", "brace '{' from 4 to 5",
"whitespace ' ' from 5 to 6", "whitespace ' ' from 5 to 6",
"word 'sick' from 6 to 10", "word 'sick' from 6 to 10",
"brace '}' from 10 to 11", "brace '}' from 10 to 11",
]);
expect(stringSummaryLexer("log('hi')")).toEqual([
"word 'log' from 0 to 3",
"brace '(' from 3 to 4",
"string ''hi'' from 4 to 8",
"brace ')' from 8 to 9",
]);
expect(stringSummaryLexer("log('hi', 'hello')")).toEqual([
"word 'log' from 0 to 3",
"brace '(' from 3 to 4",
"string ''hi'' from 4 to 8",
"comma ',' from 8 to 9",
"whitespace ' ' from 9 to 10",
"string ''hello'' from 10 to 17",
"brace ')' from 17 to 18",
]); ]);
}); });
}); });
@ -219,5 +247,5 @@ const stringSummaryLexer = (input: string) =>
`${type.padEnd(12, " ")} ${`'${value}'`.padEnd( `${type.padEnd(12, " ")} ${`'${value}'`.padEnd(
10, 10,
" " " "
)} from ${start} to ${end}` )} from ${String(start).padEnd(3, ' ')} to ${end}`
); );

View File

@ -9,6 +9,7 @@ const BLOCK_START = /^\{/;
const BLOCK_END = /^\}/; const BLOCK_END = /^\}/;
const PARAN_START = /^\(/; const PARAN_START = /^\(/;
const PARAN_END = /^\)/; const PARAN_END = /^\)/;
const COMMA = /^,/;
export const isNumber = (character: string) => NUMBER.test(character); export const isNumber = (character: string) => NUMBER.test(character);
export const isWhitespace = (character: string) => WHITESPACE.test(character); export const isWhitespace = (character: string) => WHITESPACE.test(character);
@ -19,6 +20,7 @@ export const isBlockStart = (character: string) => BLOCK_START.test(character);
export const isBlockEnd = (character: string) => BLOCK_END.test(character); export const isBlockEnd = (character: string) => BLOCK_END.test(character);
export const isParanStart = (character: string) => PARAN_START.test(character); export const isParanStart = (character: string) => PARAN_START.test(character);
export const isParanEnd = (character: string) => PARAN_END.test(character); export const isParanEnd = (character: string) => PARAN_END.test(character);
export const isComma = (character: string) => COMMA.test(character);
function matchFirst(str: string, regex: RegExp) { function matchFirst(str: string, regex: RegExp) {
const theMatch = str.match(regex); const theMatch = str.match(regex);
@ -29,7 +31,7 @@ function matchFirst(str: string, regex: RegExp) {
} }
export interface Token { export interface Token {
type: "number" | "word" | "operator" | "string" | "brace" | "whitespace"; type: "number" | "word" | "operator" | "string" | "brace" | "whitespace" | "comma";
value: string; value: string;
start: number; start: number;
end: number; end: number;
@ -62,6 +64,9 @@ const returnTokenAtIndex = (str: string, startIndex: number): Token | null => {
if (isBlockEnd(strFromIndex)) { if (isBlockEnd(strFromIndex)) {
return makeToken("brace", matchFirst(strFromIndex, BLOCK_END), startIndex); return makeToken("brace", matchFirst(strFromIndex, BLOCK_END), startIndex);
} }
if (isComma(strFromIndex)) {
return makeToken("comma", matchFirst(strFromIndex, COMMA), startIndex);
}
if (isNumber(strFromIndex)) { if (isNumber(strFromIndex)) {
return makeToken("number", matchFirst(strFromIndex, NUMBER), startIndex); return makeToken("number", matchFirst(strFromIndex, NUMBER), startIndex);
} }