add white spacetoken

2022-11-12 17:47:41 +11:00
parent 8ccb18fd54
commit bbdaf2e6fb
2 changed files with 104 additions and 79 deletions
--- a/src/tokeniser.test.ts
+++ b/src/tokeniser.test.ts
@ -126,61 +126,98 @@ describe("testing helpers", () => {
    expect(isBlockEnd("5} + 5")).toBe(false);
    expect(isBlockEnd(" } + 5")).toBe(false);
  });
-
 });

 describe("testing lexer", () => {
  it("test lexer", () => {
-    expect(lexer("1 + 2")).toEqual([
-      { type: "number", value: "1" },
-      { type: "operator", value: "+" },
-      { type: "number", value: "2" },
+    expect(stringSummaryLexer("1  + 2")).toEqual([
+      "number       '1'        from 0 to 1",
+      "whitespace   '  '       from 1 to 3",
+      "operator     '+'        from 3 to 4",
+      "whitespace   ' '        from 4 to 5",
+      "number       '2'        from 5 to 6",
    ]);
-    expect(lexer("54 + 22500 + 6")).toEqual([
-      { type: "number", value: "54" },
-      { type: "operator", value: "+" },
-      { type: "number", value: "22500" },
-      { type: "operator", value: "+" },
-      { type: "number", value: "6" },
+    expect(stringSummaryLexer("54 + 22500 + 6")).toEqual([
+      "number       '54'       from 0 to 2",
+      "whitespace   ' '        from 2 to 3",
+      "operator     '+'        from 3 to 4",
+      "whitespace   ' '        from 4 to 5",
+      "number       '22500'    from 5 to 10",
+      "whitespace   ' '        from 10 to 11",
+      "operator     '+'        from 11 to 12",
+      "whitespace   ' '        from 12 to 13",
+      "number       '6'        from 13 to 14",
    ]);
-    expect(lexer("a + bo + t5 - 6")).toEqual([
-      { type: "word", value: "a" },
-      { type: "operator", value: "+" },
-      { type: "word", value: "bo" },
-      { type: "operator", value: "+" },
-      { type: "word", value: "t5" },
-      { type: "operator", value: "-" },
-      { type: "number", value: "6" },
+    expect(stringSummaryLexer("a + bo + t5 - 6")).toEqual([
+      "word         'a'        from 0 to 1",
+      "whitespace   ' '        from 1 to 2",
+      "operator     '+'        from 2 to 3",
+      "whitespace   ' '        from 3 to 4",
+      "word         'bo'       from 4 to 6",
+      "whitespace   ' '        from 6 to 7",
+      "operator     '+'        from 7 to 8",
+      "whitespace   ' '        from 8 to 9",
+      "word         't5'       from 9 to 11",
+      "whitespace   ' '        from 11 to 12",
+      "operator     '-'        from 12 to 13",
+      "whitespace   ' '        from 13 to 14",
+      "number       '6'        from 14 to 15",
    ]);
-    expect(lexer('a + "a str" - 6')).toEqual([
-      { type: "word", value: "a" },
-      { type: "operator", value: "+" },
-      { type: "string", value: '"a str"' },
-      { type: "operator", value: "-" },
-      { type: "number", value: "6" },
+    expect(stringSummaryLexer('a + "a str" - 6')).toEqual([
+      "word         'a'        from 0 to 1",
+      "whitespace   ' '        from 1 to 2",
+      "operator     '+'        from 2 to 3",
+      "whitespace   ' '        from 3 to 4",
+      "string       '\"a str\"'  from 4 to 11",
+      "whitespace   ' '        from 11 to 12",
+      "operator     '-'        from 12 to 13",
+      "whitespace   ' '        from 13 to 14",
+      "number       '6'        from 14 to 15",
    ]);
-    const sameWithOrWithoutWhiteSpaces = [
-      { type: "word", value: "a" },
-      { type: "operator", value: "+" },
-      { type: "string", value: "'str'" },
-    ];
-    expect(lexer("a + 'str'")).toEqual(sameWithOrWithoutWhiteSpaces);
-    expect(lexer("a +'str'")).toEqual(sameWithOrWithoutWhiteSpaces);
-
-    expect(lexer("a + (sick)")).toEqual([
-      { type: "word", value: "a" },
-      { type: "operator", value: "+" },
-      { type: "brace", value: "(" },
-      { type: "word", value: "sick" },
-      { type: "brace", value: ")" },
+    expect(stringSummaryLexer("a + 'str'")).toEqual([
+      "word         'a'        from 0 to 1",
+      "whitespace   ' '        from 1 to 2",
+      "operator     '+'        from 2 to 3",
+      "whitespace   ' '        from 3 to 4",
+      "string       ''str''    from 4 to 9",
+    ]);
+    expect(stringSummaryLexer("a +'str'")).toEqual([
+      "word         'a'        from 0 to 1",
+      "whitespace   ' '        from 1 to 2",
+      "operator     '+'        from 2 to 3",
+      "string       ''str''    from 3 to 8",
    ]);

-    expect(lexer("a + {sick}")).toEqual([
-      { type: "word", value: "a" },
-      { type: "operator", value: "+" },
-      { type: "brace", value: "{" },
-      { type: "word", value: "sick" },
-      { type: "brace", value: "}" },
+    expect(stringSummaryLexer("a + (sick)")).toEqual([
+      "word         'a'        from 0 to 1",
+      "whitespace   ' '        from 1 to 2",
+      "operator     '+'        from 2 to 3",
+      "whitespace   ' '        from 3 to 4",
+      "brace        '('        from 4 to 5",
+      "word         'sick'     from 5 to 9",
+      "brace        ')'        from 9 to 10",
+    ]);
+
+    expect(stringSummaryLexer("a + { sick}")).toEqual([
+      "word         'a'        from 0 to 1",
+      "whitespace   ' '        from 1 to 2",
+      "operator     '+'        from 2 to 3",
+      "whitespace   ' '        from 3 to 4",
+      "brace        '{'        from 4 to 5",
+      "whitespace   ' '        from 5 to 6",
+      "word         'sick'     from 6 to 10",
+      "brace        '}'        from 10 to 11",
    ]);
  });
 });
+
+// helpers
+
+const stringSummaryLexer = (input: string) =>
+  lexer(input).map(
+    ({ type, value, start, end }) =>
+      `${type.padEnd(12, " ")} ${`'${value}'`.padEnd(
+        10,
+        " "
+      )} from ${start} to ${end}`
+  );
--- a/src/tokeniser.ts
+++ b/src/tokeniser.ts
@ -30,60 +30,48 @@ function matchFirst(str: string, regex: RegExp) {
  return theMatch[0];
 }

-interface Token {
-  type: "number" | "word" | "operator" | "string" | "brace";
+export interface Token {
+  type: "number" | "word" | "operator" | "string" | "brace" | "whitespace";
  value: string;
+  start: number;
+  end: number;
 }

+const makeToken = (type: Token["type"], value: string, start: number): Token => ({
+  type,
+  value,
+  start,
+  end: start + value.length,
+})
+
 const returnTokenAtIndex = (str: string, startIndex: number): Token | null => {
  const strFromIndex = str.slice(startIndex);
  if (isOperator(strFromIndex)) {
-    return {
-      type: "operator",
-      value: matchFirst(strFromIndex, OPERATOR),
-    };
+    return makeToken("operator", matchFirst(strFromIndex, OPERATOR), startIndex);
  }
  if (isString(strFromIndex)) {
-    return {
-      type: "string",
-      value: matchFirst(strFromIndex, STRING),
-    };
+    return makeToken("string", matchFirst(strFromIndex, STRING), startIndex);
  }
  if (isParanEnd(strFromIndex)) {
-    return {
-      type: "brace",
-      value: matchFirst(strFromIndex, PARAN_END),
-    };
+    return makeToken("brace", matchFirst(strFromIndex, PARAN_END), startIndex);
  }
  if (isParanStart(strFromIndex)) {
-    return {
-      type: "brace",
-      value: matchFirst(strFromIndex, PARAN_START),
-    };
+    return makeToken("brace", matchFirst(strFromIndex, PARAN_START), startIndex);
  }
  if (isBlockStart(strFromIndex)) {
-    return {
-      type: "brace",
-      value: matchFirst(strFromIndex, BLOCK_START),
-    };
+    return makeToken("brace", matchFirst(strFromIndex, BLOCK_START), startIndex);
  }
  if (isBlockEnd(strFromIndex)) {
-    return {
-      type: "brace",
-      value: matchFirst(strFromIndex, BLOCK_END),
-    };
+    return makeToken("brace", matchFirst(strFromIndex, BLOCK_END), startIndex);
  }
  if (isNumber(strFromIndex)) {
-    return {
-      type: "number",
-      value: matchFirst(strFromIndex, NUMBER),
-    };
+    return makeToken("number", matchFirst(strFromIndex, NUMBER), startIndex);
  }
  if (isWord(strFromIndex)) {
-    return {
-      type: "word",
-      value: matchFirst(strFromIndex, WORD),
-    };
+    return makeToken("word", matchFirst(strFromIndex, WORD), startIndex);
+  }
+  if (isWhitespace(strFromIndex)) {
+    return makeToken("whitespace", matchFirst(strFromIndex, WHITESPACE), startIndex);
  }
  return null;
 };