Add callee expression parsing

2022-11-14 13:28:16 +11:00
parent 24189282ec
commit aed0763770
5 changed files with 233 additions and 68 deletions
--- a/src/lang/abstractSyntaxTree.test.ts
+++ b/src/lang/abstractSyntaxTree.test.ts
@ -143,4 +143,50 @@ const newVar = myVar + 1
      },
    ]);
  });
+  test('test using std function "log"', () => {
+    const code = `log(5, "hello", aIdentifier)`;
+    const tokens = lexer(code);
+    const { body } = abstractSyntaxTree(tokens);
+    expect(body).toEqual([
+      {
+        "type": "ExpressionStatement",
+        "start": 0,
+        "end": 28,
+        "expression": {
+          "type": "CallExpression",
+          "start": 0,
+          "end": 28,
+          "callee": {
+            "type": "Identifier",
+            "start": 0,
+            "end": 3,
+            "name": "log"
+          },
+          "arguments": [
+            {
+              "type": "Literal",
+              "start": 4,
+              "end": 5,
+              "value": 5,
+              "raw": "5"
+            },
+            {
+              "type": "Literal",
+              "start": 7,
+              "end": 14,
+              "value": "hello",
+              "raw": "\"hello\""
+            },
+            {
+              "type": "Identifier",
+              "start": 16,
+              "end": 27,
+              "name": "aIdentifier"
+            }
+          ],
+          "optional": false
+        }
+      }
+    ]);
+  });
 });
--- a/src/lang/abstractSyntaxTree.ts
+++ b/src/lang/abstractSyntaxTree.ts
@ -87,7 +87,7 @@ interface GeneralStatement {

 interface ExpressionStatement extends GeneralStatement {
  type: "ExpressionStatement";
-  expression: BinaryExpression;
+  expression: BinaryExpression | CallExpression;
 }

 function makeExpressionStatement(
@ -95,8 +95,17 @@ function makeExpressionStatement(
  index: number
 ): ExpressionStatement {
  const currentToken = tokens[index];
-  // if (nextToken.type === "operator") {
-  // }
+  const { token: nextToken } = nextMeaningfulToken(tokens, index);
+  if (nextToken.type === "brace" && nextToken.value === "(") {
+    const { expression } = makeCallExpression(tokens, index);
+    return {
+      type: "ExpressionStatement",
+      start: currentToken.start,
+      end: expression.end,
+      expression,
+    };
+  }
+
  const { expression } = makeBinaryExpression(tokens, index);
  return {
    type: "ExpressionStatement",
@ -106,6 +115,79 @@ function makeExpressionStatement(
  };
 }

+interface CallExpression extends GeneralStatement {
+  type: "CallExpression";
+  callee: Identifier;
+  arguments: VariableDeclarator["init"][];
+  optional: boolean;
+}
+
+function makeCallExpression(
+  tokens: Token[],
+  index: number
+): {
+  expression: CallExpression;
+  lastIndex: number;
+} {
+  const currentToken = tokens[index];
+  const braceToken = nextMeaningfulToken(tokens, index);
+  // const firstArgumentToken = nextMeaningfulToken(tokens, braceToken.index);
+  const callee = makeIdentifier(tokens, index);
+  const args = makeArguments(tokens, braceToken.index);
+  // const closingBraceToken = nextMeaningfulToken(tokens, args.lastIndex);
+  const closingBraceToken = tokens[args.lastIndex];
+  return {
+    expression: {
+      type: "CallExpression",
+      start: currentToken.start,
+      end: closingBraceToken.end,
+      callee,
+      arguments: args.arguments,
+      optional: false,
+    },
+    lastIndex: args.lastIndex,
+  };
+}
+
+function makeArguments(
+  tokens: Token[],
+  index: number,
+  previousArgs: VariableDeclarator["init"][] = []
+): {
+  arguments: VariableDeclarator["init"][];
+  lastIndex: number;
+} {
+  const braceOrCommaToken = tokens[index];
+  const argumentToken = nextMeaningfulToken(tokens, index);
+  const shouldFinishRecursion = braceOrCommaToken.type === "brace" && braceOrCommaToken.value === ")";
+  if (shouldFinishRecursion) {
+    return {
+      arguments: previousArgs,
+      lastIndex: index,
+    };
+  }
+  const nextBraceOrCommaToken = nextMeaningfulToken(tokens, argumentToken.index);
+  const isIdentifierOrLiteral = nextBraceOrCommaToken.token.type === "comma" || nextBraceOrCommaToken.token.type === "brace"
+  if (!isIdentifierOrLiteral) {
+    const { expression, lastIndex} = makeBinaryExpression(tokens, index);
+    return makeArguments(tokens, lastIndex, [...previousArgs, expression]);
+  }
+  if (argumentToken.token.type === "word") {
+    const identifier = makeIdentifier(tokens, argumentToken.index);
+    return makeArguments(tokens, nextBraceOrCommaToken.index, [
+      ...previousArgs,
+      identifier,
+    ]);
+  } else if (
+    argumentToken.token.type === "number" ||
+    argumentToken.token.type === "string"
+  ) {
+    const literal = makeLiteral(tokens, argumentToken.index);
+    return makeArguments(tokens, nextBraceOrCommaToken.index, [...previousArgs, literal]);
+  }
+  throw new Error("Expected a previous if statement to match");
+}
+
 interface VariableDeclaration extends GeneralStatement {
  type: "VariableDeclaration";
  declarations: VariableDeclarator[];
@ -174,7 +256,7 @@ function makeVariableDeclarators(
  };
 }

-type BinaryPart = Literal | Identifier;
+export type BinaryPart = Literal | Identifier;
 // | BinaryExpression
 // | CallExpression
 // | MemberExpression
@ -207,7 +289,8 @@ function makeIdentifier(token: Token[], index: number): Identifier {

 function makeLiteral(tokens: Token[], index: number): Literal {
  const token = tokens[index];
-  const value = token.type === "number" ? Number(token.value) : token.value;
+  const value =
+    token.type === "number" ? Number(token.value) : token.value.slice(1, -1);
  return {
    type: "Literal",
    start: token.start,
@ -295,6 +378,9 @@ export const abstractSyntaxTree = (tokens: Token[]): Program => {
      const nextThing = nextMeaningfulToken(tokens, lastIndex);
      return startTree(tokens, nextThing.index, [...previousBody, declaration]);
    }
+    if (token.type === "word" && token.value === "log") {
+      return [...previousBody, makeExpressionStatement(tokens, tokenIndex)];
+    }
    if (
      (token.type === "number" || token.type === "word") &&
      nextMeaningfulToken(tokens, tokenIndex).token.type === "operator"
@ -302,7 +388,6 @@ export const abstractSyntaxTree = (tokens: Token[]): Program => {
      // return startTree(tokens, tokenIndex, [...previousBody, makeExpressionStatement(tokens, tokenIndex)]);
      return [...previousBody, makeExpressionStatement(tokens, tokenIndex)];
    }
-    console.log(tokenIndex, tokens.length, token);
    throw new Error("Unexpected token");
  };
  const body = startTree(tokens);
--- a/src/lang/testExamples/variableDeclaration.cado
+++ b/src/lang/testExamples/variableDeclaration.cado
@ -0,0 +1 @@
+const myVar = "a str" + " another str"
--- a/src/lang/tokeniser.test.ts
+++ b/src/lang/tokeniser.test.ts
@ -8,6 +8,7 @@ import {
  isString,
  isWhitespace,
  isWord,
+  isComma,
  lexer,
 } from "./tokeniser";

@ -126,87 +127,114 @@ describe("testing helpers", () => {
    expect(isBlockEnd("5} + 5")).toBe(false);
    expect(isBlockEnd(" } + 5")).toBe(false);
  });
+  it("test is comma", () => {
+    expect(isComma(",")).toBe(true);
+    expect(isComma(", ")).toBe(true);
+    expect(isComma(",5")).toBe(true);
+    expect(isComma(",5 ")).toBe(true);
+
+    expect(isComma("5")).toBe(false);
+    expect(isComma("5 + 5")).toBe(false);
+    expect(isComma("5, + 5")).toBe(false);
+    expect(isComma(" , + 5")).toBe(false);
+  })
 });

 describe("testing lexer", () => {
  it("test lexer", () => {
    expect(stringSummaryLexer("1  + 2")).toEqual([
-      "number       '1'        from 0 to 1",
-      "whitespace   '  '       from 1 to 3",
-      "operator     '+'        from 3 to 4",
-      "whitespace   ' '        from 4 to 5",
-      "number       '2'        from 5 to 6",
+      "number       '1'        from 0   to 1",
+      "whitespace   '  '       from 1   to 3",
+      "operator     '+'        from 3   to 4",
+      "whitespace   ' '        from 4   to 5",
+      "number       '2'        from 5   to 6",
    ]);
    expect(stringSummaryLexer("54 + 22500 + 6")).toEqual([
-      "number       '54'       from 0 to 2",
-      "whitespace   ' '        from 2 to 3",
-      "operator     '+'        from 3 to 4",
-      "whitespace   ' '        from 4 to 5",
-      "number       '22500'    from 5 to 10",
-      "whitespace   ' '        from 10 to 11",
-      "operator     '+'        from 11 to 12",
-      "whitespace   ' '        from 12 to 13",
-      "number       '6'        from 13 to 14",
+      "number       '54'       from 0   to 2",
+      "whitespace   ' '        from 2   to 3",
+      "operator     '+'        from 3   to 4",
+      "whitespace   ' '        from 4   to 5",
+      "number       '22500'    from 5   to 10",
+      "whitespace   ' '        from 10  to 11",
+      "operator     '+'        from 11  to 12",
+      "whitespace   ' '        from 12  to 13",
+      "number       '6'        from 13  to 14",
    ]);
    expect(stringSummaryLexer("a + bo + t5 - 6")).toEqual([
-      "word         'a'        from 0 to 1",
-      "whitespace   ' '        from 1 to 2",
-      "operator     '+'        from 2 to 3",
-      "whitespace   ' '        from 3 to 4",
-      "word         'bo'       from 4 to 6",
-      "whitespace   ' '        from 6 to 7",
-      "operator     '+'        from 7 to 8",
-      "whitespace   ' '        from 8 to 9",
-      "word         't5'       from 9 to 11",
-      "whitespace   ' '        from 11 to 12",
-      "operator     '-'        from 12 to 13",
-      "whitespace   ' '        from 13 to 14",
-      "number       '6'        from 14 to 15",
+      "word         'a'        from 0   to 1",
+      "whitespace   ' '        from 1   to 2",
+      "operator     '+'        from 2   to 3",
+      "whitespace   ' '        from 3   to 4",
+      "word         'bo'       from 4   to 6",
+      "whitespace   ' '        from 6   to 7",
+      "operator     '+'        from 7   to 8",
+      "whitespace   ' '        from 8   to 9",
+      "word         't5'       from 9   to 11",
+      "whitespace   ' '        from 11  to 12",
+      "operator     '-'        from 12  to 13",
+      "whitespace   ' '        from 13  to 14",
+      "number       '6'        from 14  to 15",
    ]);
    expect(stringSummaryLexer('a + "a str" - 6')).toEqual([
-      "word         'a'        from 0 to 1",
-      "whitespace   ' '        from 1 to 2",
-      "operator     '+'        from 2 to 3",
-      "whitespace   ' '        from 3 to 4",
-      "string       '\"a str\"'  from 4 to 11",
-      "whitespace   ' '        from 11 to 12",
-      "operator     '-'        from 12 to 13",
-      "whitespace   ' '        from 13 to 14",
-      "number       '6'        from 14 to 15",
+      "word         'a'        from 0   to 1",
+      "whitespace   ' '        from 1   to 2",
+      "operator     '+'        from 2   to 3",
+      "whitespace   ' '        from 3   to 4",
+      "string       '\"a str\"'  from 4   to 11",
+      "whitespace   ' '        from 11  to 12",
+      "operator     '-'        from 12  to 13",
+      "whitespace   ' '        from 13  to 14",
+      "number       '6'        from 14  to 15",
    ]);
    expect(stringSummaryLexer("a + 'str'")).toEqual([
-      "word         'a'        from 0 to 1",
-      "whitespace   ' '        from 1 to 2",
-      "operator     '+'        from 2 to 3",
-      "whitespace   ' '        from 3 to 4",
-      "string       ''str''    from 4 to 9",
+      "word         'a'        from 0   to 1",
+      "whitespace   ' '        from 1   to 2",
+      "operator     '+'        from 2   to 3",
+      "whitespace   ' '        from 3   to 4",
+      "string       ''str''    from 4   to 9",
    ]);
    expect(stringSummaryLexer("a +'str'")).toEqual([
-      "word         'a'        from 0 to 1",
-      "whitespace   ' '        from 1 to 2",
-      "operator     '+'        from 2 to 3",
-      "string       ''str''    from 3 to 8",
+      "word         'a'        from 0   to 1",
+      "whitespace   ' '        from 1   to 2",
+      "operator     '+'        from 2   to 3",
+      "string       ''str''    from 3   to 8",
    ]);

    expect(stringSummaryLexer("a + (sick)")).toEqual([
-      "word         'a'        from 0 to 1",
-      "whitespace   ' '        from 1 to 2",
-      "operator     '+'        from 2 to 3",
-      "whitespace   ' '        from 3 to 4",
-      "brace        '('        from 4 to 5",
-      "word         'sick'     from 5 to 9",
-      "brace        ')'        from 9 to 10",
+      "word         'a'        from 0   to 1",
+      "whitespace   ' '        from 1   to 2",
+      "operator     '+'        from 2   to 3",
+      "whitespace   ' '        from 3   to 4",
+      "brace        '('        from 4   to 5",
+      "word         'sick'     from 5   to 9",
+      "brace        ')'        from 9   to 10",
    ]);

    expect(stringSummaryLexer("a + { sick}")).toEqual([
-      "word         'a'        from 0 to 1",
-      "whitespace   ' '        from 1 to 2",
-      "operator     '+'        from 2 to 3",
-      "whitespace   ' '        from 3 to 4",
-      "brace        '{'        from 4 to 5",
-      "whitespace   ' '        from 5 to 6",
-      "word         'sick'     from 6 to 10",
-      "brace        '}'        from 10 to 11",
+      "word         'a'        from 0   to 1",
+      "whitespace   ' '        from 1   to 2",
+      "operator     '+'        from 2   to 3",
+      "whitespace   ' '        from 3   to 4",
+      "brace        '{'        from 4   to 5",
+      "whitespace   ' '        from 5   to 6",
+      "word         'sick'     from 6   to 10",
+      "brace        '}'        from 10  to 11",
+    ]);
+
+    expect(stringSummaryLexer("log('hi')")).toEqual([
+      "word         'log'      from 0   to 3",
+      "brace        '('        from 3   to 4",
+      "string       ''hi''     from 4   to 8",
+      "brace        ')'        from 8   to 9",
+    ]);
+    expect(stringSummaryLexer("log('hi', 'hello')")).toEqual([
+      "word         'log'      from 0   to 3",
+      "brace        '('        from 3   to 4",
+      "string       ''hi''     from 4   to 8",
+      "comma        ','        from 8   to 9",
+      "whitespace   ' '        from 9   to 10",
+      "string       ''hello''  from 10  to 17",
+      "brace        ')'        from 17  to 18",
    ]);
  });
 });
@ -219,5 +247,5 @@ const stringSummaryLexer = (input: string) =>
      `${type.padEnd(12, " ")} ${`'${value}'`.padEnd(
        10,
        " "
-      )} from ${start} to ${end}`
+      )} from ${String(start).padEnd(3, ' ')} to ${end}`
  );
--- a/src/lang/tokeniser.ts
+++ b/src/lang/tokeniser.ts
@ -9,6 +9,7 @@ const BLOCK_START = /^\{/;
 const BLOCK_END = /^\}/;
 const PARAN_START = /^\(/;
 const PARAN_END = /^\)/;
+const COMMA = /^,/;

 export const isNumber = (character: string) => NUMBER.test(character);
 export const isWhitespace = (character: string) => WHITESPACE.test(character);
@ -19,6 +20,7 @@ export const isBlockStart = (character: string) => BLOCK_START.test(character);
 export const isBlockEnd = (character: string) => BLOCK_END.test(character);
 export const isParanStart = (character: string) => PARAN_START.test(character);
 export const isParanEnd = (character: string) => PARAN_END.test(character);
+export const isComma = (character: string) => COMMA.test(character);

 function matchFirst(str: string, regex: RegExp) {
  const theMatch = str.match(regex);
@ -29,7 +31,7 @@ function matchFirst(str: string, regex: RegExp) {
 }

 export interface Token {
-  type: "number" | "word" | "operator" | "string" | "brace" | "whitespace";
+  type: "number" | "word" | "operator" | "string" | "brace" | "whitespace" | "comma";
  value: string;
  start: number;
  end: number;
@ -62,6 +64,9 @@ const returnTokenAtIndex = (str: string, startIndex: number): Token | null => {
  if (isBlockEnd(strFromIndex)) {
    return makeToken("brace", matchFirst(strFromIndex, BLOCK_END), startIndex);
  }
+  if (isComma(strFromIndex)) {
+    return makeToken("comma", matchFirst(strFromIndex, COMMA), startIndex);
+  }
  if (isNumber(strFromIndex)) {
    return makeToken("number", matchFirst(strFromIndex, NUMBER), startIndex);
  }