refactor to remove mutation

2022-11-12 13:33:35 +11:00
parent 3b095c25d7
commit 8ccb18fd54
1 changed files with 72 additions and 81 deletions
--- a/src/tokeniser.ts
+++ b/src/tokeniser.ts
@ -7,12 +7,11 @@ const WORD = /^[a-zA-Z_][a-zA-Z0-9_]*/;
 const STRING = /^(["'])(?:(?=(\\?))\2.)*?\1/;
 // regex for operators
 const OPERATOR = /^[>=|<=|+|\-|*|/|>|<|^|%]/;
-const BLOCK_START = /^\{/
-const BLOCK_END = /^\}/
+const BLOCK_START = /^\{/;
+const BLOCK_END = /^\}/;
 const PARAN_START = /^\(/;
 const PARAN_END = /^\)/;

-
 export const isNumber = (character: string) => NUMBER.test(character);
 export const isWhitespace = (character: string) => WHITESPACE.test(character);
 export const isWord = (character: string) => WORD.test(character);
@ -23,7 +22,6 @@ export const isBlockEnd = (character: string) => BLOCK_END.test(character);
 export const isParanStart = (character: string) => PARAN_START.test(character);
 export const isParanEnd = (character: string) => PARAN_END.test(character);

-
 function matchFirst(str: string, regex: RegExp) {
  const theMatch = str.match(regex);
  if (!theMatch) {
@ -32,88 +30,81 @@ function matchFirst(str: string, regex: RegExp) {
  return theMatch[0];
 }

-// type TokenTypes = 
 interface Token {
-  type: 'number' | 'word' | 'operator' | 'string' | 'brace'
+  type: "number" | "word" | "operator" | "string" | "brace";
  value: string;
 }

-export const lexer = (str: string): Token[] => {
-  const tokens: Token[] = [];
-  let currentIndex = 0;
-
-  const returnTokenAtIndex = (
-    str: string,
-    startIndex: number
-  ): Token | null => {
-    const strFromIndex = str.slice(startIndex);
-    if (isOperator(strFromIndex)) {
-      return {
-        type: "operator",
-        value: matchFirst(strFromIndex, OPERATOR),
-      };
-    }
-    if (isString(strFromIndex)) {
-      return {
-        type: "string",
-        value: matchFirst(strFromIndex, STRING),
-      };
-    }
-    if(isParanEnd(strFromIndex)){
-      return {
-        type: "brace",
-        value: matchFirst(strFromIndex, PARAN_END),
-      };
-    }
-    if(isParanStart(strFromIndex)){
-      return {
-        type: "brace",
-        value: matchFirst(strFromIndex, PARAN_START),
-      };
-    }
-    if(isBlockStart(strFromIndex)){
-      return {
-        type: "brace",
-        value: matchFirst(strFromIndex, BLOCK_START),
-      };
-    }
-    if(isBlockEnd(strFromIndex)){
-      return {
-        type: "brace",
-        value: matchFirst(strFromIndex, BLOCK_END),
-      };
-    }
-    if (isNumber(strFromIndex)) {
-      return {
-        type: "number",
-        value: matchFirst(strFromIndex, NUMBER),
-      };
-    }
-    if(isWord(strFromIndex)) {
-      return {
-        type: "word",
-        value: matchFirst(strFromIndex, WORD),
-      };
-    }
-    return null;
-  };
-  while (currentIndex < str.length) {
-    const token = returnTokenAtIndex(str, currentIndex);
-    if (token) {
-      tokens.push(token);
-      currentIndex += token.value.length;
-    } else {
-      currentIndex++;
-    }
+const returnTokenAtIndex = (str: string, startIndex: number): Token | null => {
+  const strFromIndex = str.slice(startIndex);
+  if (isOperator(strFromIndex)) {
+    return {
+      type: "operator",
+      value: matchFirst(strFromIndex, OPERATOR),
+    };
  }
-
-  return tokens
+  if (isString(strFromIndex)) {
+    return {
+      type: "string",
+      value: matchFirst(strFromIndex, STRING),
+    };
+  }
+  if (isParanEnd(strFromIndex)) {
+    return {
+      type: "brace",
+      value: matchFirst(strFromIndex, PARAN_END),
+    };
+  }
+  if (isParanStart(strFromIndex)) {
+    return {
+      type: "brace",
+      value: matchFirst(strFromIndex, PARAN_START),
+    };
+  }
+  if (isBlockStart(strFromIndex)) {
+    return {
+      type: "brace",
+      value: matchFirst(strFromIndex, BLOCK_START),
+    };
+  }
+  if (isBlockEnd(strFromIndex)) {
+    return {
+      type: "brace",
+      value: matchFirst(strFromIndex, BLOCK_END),
+    };
+  }
+  if (isNumber(strFromIndex)) {
+    return {
+      type: "number",
+      value: matchFirst(strFromIndex, NUMBER),
+    };
+  }
+  if (isWord(strFromIndex)) {
+    return {
+      type: "word",
+      value: matchFirst(strFromIndex, WORD),
+    };
+  }
+  return null;
 };

-async function main() {
-  const example1 = await fsp.readFile("./examples/addition.cado", "ascii");
-  const parsed = lexer(example1);
-  console.log(parsed);
-}
+export const lexer = (str: string): Token[] => {
+  const recursivelyTokenise = (
+    str: string,
+    currentIndex: number = 0,
+    previousTokens: Token[] = []
+  ): Token[] => {
+    if (currentIndex >= str.length) {
+      return previousTokens;
+    }
+    const token = returnTokenAtIndex(str, currentIndex);
+    if (!token) {
+      return recursivelyTokenise(str, currentIndex + 1, previousTokens);
+    }
+    const nextIndex = currentIndex + token.value.length;
+    return recursivelyTokenise(str, nextIndex, [...previousTokens, token]);
+  };
+  return recursivelyTokenise(str);
+};

-// main()
+// const example1 = await fsp.readFile("./examples/addition.cado", "ascii");