diff --git a/src/tokeniser.ts b/src/tokeniser.ts index 44aef8c3b..0c2861de8 100644 --- a/src/tokeniser.ts +++ b/src/tokeniser.ts @@ -7,12 +7,11 @@ const WORD = /^[a-zA-Z_][a-zA-Z0-9_]*/; const STRING = /^(["'])(?:(?=(\\?))\2.)*?\1/; // regex for operators const OPERATOR = /^[>=|<=|+|\-|*|/|>|<|^|%]/; -const BLOCK_START = /^\{/ -const BLOCK_END = /^\}/ +const BLOCK_START = /^\{/; +const BLOCK_END = /^\}/; const PARAN_START = /^\(/; const PARAN_END = /^\)/; - export const isNumber = (character: string) => NUMBER.test(character); export const isWhitespace = (character: string) => WHITESPACE.test(character); export const isWord = (character: string) => WORD.test(character); @@ -23,7 +22,6 @@ export const isBlockEnd = (character: string) => BLOCK_END.test(character); export const isParanStart = (character: string) => PARAN_START.test(character); export const isParanEnd = (character: string) => PARAN_END.test(character); - function matchFirst(str: string, regex: RegExp) { const theMatch = str.match(regex); if (!theMatch) { @@ -32,88 +30,81 @@ function matchFirst(str: string, regex: RegExp) { return theMatch[0]; } -// type TokenTypes = interface Token { - type: 'number' | 'word' | 'operator' | 'string' | 'brace' + type: "number" | "word" | "operator" | "string" | "brace"; value: string; } -export const lexer = (str: string): Token[] => { - const tokens: Token[] = []; - let currentIndex = 0; - - const returnTokenAtIndex = ( - str: string, - startIndex: number - ): Token | null => { - const strFromIndex = str.slice(startIndex); - if (isOperator(strFromIndex)) { - return { - type: "operator", - value: matchFirst(strFromIndex, OPERATOR), - }; - } - if (isString(strFromIndex)) { - return { - type: "string", - value: matchFirst(strFromIndex, STRING), - }; - } - if(isParanEnd(strFromIndex)){ - return { - type: "brace", - value: matchFirst(strFromIndex, PARAN_END), - }; - } - if(isParanStart(strFromIndex)){ - return { - type: "brace", - value: matchFirst(strFromIndex, PARAN_START), - }; - } - if(isBlockStart(strFromIndex)){ - return { - type: "brace", - value: matchFirst(strFromIndex, BLOCK_START), - }; - } - if(isBlockEnd(strFromIndex)){ - return { - type: "brace", - value: matchFirst(strFromIndex, BLOCK_END), - }; - } - if (isNumber(strFromIndex)) { - return { - type: "number", - value: matchFirst(strFromIndex, NUMBER), - }; - } - if(isWord(strFromIndex)) { - return { - type: "word", - value: matchFirst(strFromIndex, WORD), - }; - } - return null; - }; - while (currentIndex < str.length) { - const token = returnTokenAtIndex(str, currentIndex); - if (token) { - tokens.push(token); - currentIndex += token.value.length; - } else { - currentIndex++; - } +const returnTokenAtIndex = (str: string, startIndex: number): Token | null => { + const strFromIndex = str.slice(startIndex); + if (isOperator(strFromIndex)) { + return { + type: "operator", + value: matchFirst(strFromIndex, OPERATOR), + }; } - - return tokens + if (isString(strFromIndex)) { + return { + type: "string", + value: matchFirst(strFromIndex, STRING), + }; + } + if (isParanEnd(strFromIndex)) { + return { + type: "brace", + value: matchFirst(strFromIndex, PARAN_END), + }; + } + if (isParanStart(strFromIndex)) { + return { + type: "brace", + value: matchFirst(strFromIndex, PARAN_START), + }; + } + if (isBlockStart(strFromIndex)) { + return { + type: "brace", + value: matchFirst(strFromIndex, BLOCK_START), + }; + } + if (isBlockEnd(strFromIndex)) { + return { + type: "brace", + value: matchFirst(strFromIndex, BLOCK_END), + }; + } + if (isNumber(strFromIndex)) { + return { + type: "number", + value: matchFirst(strFromIndex, NUMBER), + }; + } + if (isWord(strFromIndex)) { + return { + type: "word", + value: matchFirst(strFromIndex, WORD), + }; + } + return null; }; -async function main() { - const example1 = await fsp.readFile("./examples/addition.cado", "ascii"); - const parsed = lexer(example1); - console.log(parsed); -} +export const lexer = (str: string): Token[] => { + const recursivelyTokenise = ( + str: string, + currentIndex: number = 0, + previousTokens: Token[] = [] + ): Token[] => { + if (currentIndex >= str.length) { + return previousTokens; + } + const token = returnTokenAtIndex(str, currentIndex); + if (!token) { + return recursivelyTokenise(str, currentIndex + 1, previousTokens); + } + const nextIndex = currentIndex + token.value.length; + return recursivelyTokenise(str, nextIndex, [...previousTokens, token]); + }; + return recursivelyTokenise(str); +}; -// main() +// const example1 = await fsp.readFile("./examples/addition.cado", "ascii");