refactor to remove mutation
This commit is contained in:
		
							
								
								
									
										153
									
								
								src/tokeniser.ts
									
									
									
									
									
								
							
							
						
						
									
										153
									
								
								src/tokeniser.ts
									
									
									
									
									
								
							@ -7,12 +7,11 @@ const WORD = /^[a-zA-Z_][a-zA-Z0-9_]*/;
 | 
			
		||||
const STRING = /^(["'])(?:(?=(\\?))\2.)*?\1/;
 | 
			
		||||
// regex for operators
 | 
			
		||||
const OPERATOR = /^[>=|<=|+|\-|*|/|>|<|^|%]/;
 | 
			
		||||
const BLOCK_START = /^\{/
 | 
			
		||||
const BLOCK_END = /^\}/
 | 
			
		||||
const BLOCK_START = /^\{/;
 | 
			
		||||
const BLOCK_END = /^\}/;
 | 
			
		||||
const PARAN_START = /^\(/;
 | 
			
		||||
const PARAN_END = /^\)/;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
export const isNumber = (character: string) => NUMBER.test(character);
 | 
			
		||||
export const isWhitespace = (character: string) => WHITESPACE.test(character);
 | 
			
		||||
export const isWord = (character: string) => WORD.test(character);
 | 
			
		||||
@ -23,7 +22,6 @@ export const isBlockEnd = (character: string) => BLOCK_END.test(character);
 | 
			
		||||
export const isParanStart = (character: string) => PARAN_START.test(character);
 | 
			
		||||
export const isParanEnd = (character: string) => PARAN_END.test(character);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
function matchFirst(str: string, regex: RegExp) {
 | 
			
		||||
  const theMatch = str.match(regex);
 | 
			
		||||
  if (!theMatch) {
 | 
			
		||||
@ -32,88 +30,81 @@ function matchFirst(str: string, regex: RegExp) {
 | 
			
		||||
  return theMatch[0];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// type TokenTypes = 
 | 
			
		||||
interface Token {
 | 
			
		||||
  type: 'number' | 'word' | 'operator' | 'string' | 'brace'
 | 
			
		||||
  type: "number" | "word" | "operator" | "string" | "brace";
 | 
			
		||||
  value: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export const lexer = (str: string): Token[] => {
 | 
			
		||||
  const tokens: Token[] = [];
 | 
			
		||||
  let currentIndex = 0;
 | 
			
		||||
 | 
			
		||||
  const returnTokenAtIndex = (
 | 
			
		||||
    str: string,
 | 
			
		||||
    startIndex: number
 | 
			
		||||
  ): Token | null => {
 | 
			
		||||
    const strFromIndex = str.slice(startIndex);
 | 
			
		||||
    if (isOperator(strFromIndex)) {
 | 
			
		||||
      return {
 | 
			
		||||
        type: "operator",
 | 
			
		||||
        value: matchFirst(strFromIndex, OPERATOR),
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
    if (isString(strFromIndex)) {
 | 
			
		||||
      return {
 | 
			
		||||
        type: "string",
 | 
			
		||||
        value: matchFirst(strFromIndex, STRING),
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
    if(isParanEnd(strFromIndex)){
 | 
			
		||||
      return {
 | 
			
		||||
        type: "brace",
 | 
			
		||||
        value: matchFirst(strFromIndex, PARAN_END),
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
    if(isParanStart(strFromIndex)){
 | 
			
		||||
      return {
 | 
			
		||||
        type: "brace",
 | 
			
		||||
        value: matchFirst(strFromIndex, PARAN_START),
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
    if(isBlockStart(strFromIndex)){
 | 
			
		||||
      return {
 | 
			
		||||
        type: "brace",
 | 
			
		||||
        value: matchFirst(strFromIndex, BLOCK_START),
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
    if(isBlockEnd(strFromIndex)){
 | 
			
		||||
      return {
 | 
			
		||||
        type: "brace",
 | 
			
		||||
        value: matchFirst(strFromIndex, BLOCK_END),
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
    if (isNumber(strFromIndex)) {
 | 
			
		||||
      return {
 | 
			
		||||
        type: "number",
 | 
			
		||||
        value: matchFirst(strFromIndex, NUMBER),
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
    if(isWord(strFromIndex)) {
 | 
			
		||||
      return {
 | 
			
		||||
        type: "word",
 | 
			
		||||
        value: matchFirst(strFromIndex, WORD),
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
    return null;
 | 
			
		||||
  };
 | 
			
		||||
  while (currentIndex < str.length) {
 | 
			
		||||
    const token = returnTokenAtIndex(str, currentIndex);
 | 
			
		||||
    if (token) {
 | 
			
		||||
      tokens.push(token);
 | 
			
		||||
      currentIndex += token.value.length;
 | 
			
		||||
    } else {
 | 
			
		||||
      currentIndex++;
 | 
			
		||||
    }
 | 
			
		||||
const returnTokenAtIndex = (str: string, startIndex: number): Token | null => {
 | 
			
		||||
  const strFromIndex = str.slice(startIndex);
 | 
			
		||||
  if (isOperator(strFromIndex)) {
 | 
			
		||||
    return {
 | 
			
		||||
      type: "operator",
 | 
			
		||||
      value: matchFirst(strFromIndex, OPERATOR),
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return tokens
 | 
			
		||||
  if (isString(strFromIndex)) {
 | 
			
		||||
    return {
 | 
			
		||||
      type: "string",
 | 
			
		||||
      value: matchFirst(strFromIndex, STRING),
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  if (isParanEnd(strFromIndex)) {
 | 
			
		||||
    return {
 | 
			
		||||
      type: "brace",
 | 
			
		||||
      value: matchFirst(strFromIndex, PARAN_END),
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  if (isParanStart(strFromIndex)) {
 | 
			
		||||
    return {
 | 
			
		||||
      type: "brace",
 | 
			
		||||
      value: matchFirst(strFromIndex, PARAN_START),
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  if (isBlockStart(strFromIndex)) {
 | 
			
		||||
    return {
 | 
			
		||||
      type: "brace",
 | 
			
		||||
      value: matchFirst(strFromIndex, BLOCK_START),
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  if (isBlockEnd(strFromIndex)) {
 | 
			
		||||
    return {
 | 
			
		||||
      type: "brace",
 | 
			
		||||
      value: matchFirst(strFromIndex, BLOCK_END),
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  if (isNumber(strFromIndex)) {
 | 
			
		||||
    return {
 | 
			
		||||
      type: "number",
 | 
			
		||||
      value: matchFirst(strFromIndex, NUMBER),
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  if (isWord(strFromIndex)) {
 | 
			
		||||
    return {
 | 
			
		||||
      type: "word",
 | 
			
		||||
      value: matchFirst(strFromIndex, WORD),
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  return null;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
async function main() {
 | 
			
		||||
  const example1 = await fsp.readFile("./examples/addition.cado", "ascii");
 | 
			
		||||
  const parsed = lexer(example1);
 | 
			
		||||
  console.log(parsed);
 | 
			
		||||
}
 | 
			
		||||
export const lexer = (str: string): Token[] => {
 | 
			
		||||
  const recursivelyTokenise = (
 | 
			
		||||
    str: string,
 | 
			
		||||
    currentIndex: number = 0,
 | 
			
		||||
    previousTokens: Token[] = []
 | 
			
		||||
  ): Token[] => {
 | 
			
		||||
    if (currentIndex >= str.length) {
 | 
			
		||||
      return previousTokens;
 | 
			
		||||
    }
 | 
			
		||||
    const token = returnTokenAtIndex(str, currentIndex);
 | 
			
		||||
    if (!token) {
 | 
			
		||||
      return recursivelyTokenise(str, currentIndex + 1, previousTokens);
 | 
			
		||||
    }
 | 
			
		||||
    const nextIndex = currentIndex + token.value.length;
 | 
			
		||||
    return recursivelyTokenise(str, nextIndex, [...previousTokens, token]);
 | 
			
		||||
  };
 | 
			
		||||
  return recursivelyTokenise(str);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// main()
 | 
			
		||||
// const example1 = await fsp.readFile("./examples/addition.cado", "ascii");
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user