refactor to remove mutation
This commit is contained in:
153
src/tokeniser.ts
153
src/tokeniser.ts
@ -7,12 +7,11 @@ const WORD = /^[a-zA-Z_][a-zA-Z0-9_]*/;
|
|||||||
const STRING = /^(["'])(?:(?=(\\?))\2.)*?\1/;
|
const STRING = /^(["'])(?:(?=(\\?))\2.)*?\1/;
|
||||||
// regex for operators
|
// regex for operators
|
||||||
const OPERATOR = /^[>=|<=|+|\-|*|/|>|<|^|%]/;
|
const OPERATOR = /^[>=|<=|+|\-|*|/|>|<|^|%]/;
|
||||||
const BLOCK_START = /^\{/
|
const BLOCK_START = /^\{/;
|
||||||
const BLOCK_END = /^\}/
|
const BLOCK_END = /^\}/;
|
||||||
const PARAN_START = /^\(/;
|
const PARAN_START = /^\(/;
|
||||||
const PARAN_END = /^\)/;
|
const PARAN_END = /^\)/;
|
||||||
|
|
||||||
|
|
||||||
export const isNumber = (character: string) => NUMBER.test(character);
|
export const isNumber = (character: string) => NUMBER.test(character);
|
||||||
export const isWhitespace = (character: string) => WHITESPACE.test(character);
|
export const isWhitespace = (character: string) => WHITESPACE.test(character);
|
||||||
export const isWord = (character: string) => WORD.test(character);
|
export const isWord = (character: string) => WORD.test(character);
|
||||||
@ -23,7 +22,6 @@ export const isBlockEnd = (character: string) => BLOCK_END.test(character);
|
|||||||
export const isParanStart = (character: string) => PARAN_START.test(character);
|
export const isParanStart = (character: string) => PARAN_START.test(character);
|
||||||
export const isParanEnd = (character: string) => PARAN_END.test(character);
|
export const isParanEnd = (character: string) => PARAN_END.test(character);
|
||||||
|
|
||||||
|
|
||||||
function matchFirst(str: string, regex: RegExp) {
|
function matchFirst(str: string, regex: RegExp) {
|
||||||
const theMatch = str.match(regex);
|
const theMatch = str.match(regex);
|
||||||
if (!theMatch) {
|
if (!theMatch) {
|
||||||
@ -32,88 +30,81 @@ function matchFirst(str: string, regex: RegExp) {
|
|||||||
return theMatch[0];
|
return theMatch[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// type TokenTypes =
|
|
||||||
interface Token {
|
interface Token {
|
||||||
type: 'number' | 'word' | 'operator' | 'string' | 'brace'
|
type: "number" | "word" | "operator" | "string" | "brace";
|
||||||
value: string;
|
value: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export const lexer = (str: string): Token[] => {
|
const returnTokenAtIndex = (str: string, startIndex: number): Token | null => {
|
||||||
const tokens: Token[] = [];
|
const strFromIndex = str.slice(startIndex);
|
||||||
let currentIndex = 0;
|
if (isOperator(strFromIndex)) {
|
||||||
|
return {
|
||||||
const returnTokenAtIndex = (
|
type: "operator",
|
||||||
str: string,
|
value: matchFirst(strFromIndex, OPERATOR),
|
||||||
startIndex: number
|
};
|
||||||
): Token | null => {
|
|
||||||
const strFromIndex = str.slice(startIndex);
|
|
||||||
if (isOperator(strFromIndex)) {
|
|
||||||
return {
|
|
||||||
type: "operator",
|
|
||||||
value: matchFirst(strFromIndex, OPERATOR),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
if (isString(strFromIndex)) {
|
|
||||||
return {
|
|
||||||
type: "string",
|
|
||||||
value: matchFirst(strFromIndex, STRING),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
if(isParanEnd(strFromIndex)){
|
|
||||||
return {
|
|
||||||
type: "brace",
|
|
||||||
value: matchFirst(strFromIndex, PARAN_END),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
if(isParanStart(strFromIndex)){
|
|
||||||
return {
|
|
||||||
type: "brace",
|
|
||||||
value: matchFirst(strFromIndex, PARAN_START),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
if(isBlockStart(strFromIndex)){
|
|
||||||
return {
|
|
||||||
type: "brace",
|
|
||||||
value: matchFirst(strFromIndex, BLOCK_START),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
if(isBlockEnd(strFromIndex)){
|
|
||||||
return {
|
|
||||||
type: "brace",
|
|
||||||
value: matchFirst(strFromIndex, BLOCK_END),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
if (isNumber(strFromIndex)) {
|
|
||||||
return {
|
|
||||||
type: "number",
|
|
||||||
value: matchFirst(strFromIndex, NUMBER),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
if(isWord(strFromIndex)) {
|
|
||||||
return {
|
|
||||||
type: "word",
|
|
||||||
value: matchFirst(strFromIndex, WORD),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
};
|
|
||||||
while (currentIndex < str.length) {
|
|
||||||
const token = returnTokenAtIndex(str, currentIndex);
|
|
||||||
if (token) {
|
|
||||||
tokens.push(token);
|
|
||||||
currentIndex += token.value.length;
|
|
||||||
} else {
|
|
||||||
currentIndex++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (isString(strFromIndex)) {
|
||||||
return tokens
|
return {
|
||||||
|
type: "string",
|
||||||
|
value: matchFirst(strFromIndex, STRING),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (isParanEnd(strFromIndex)) {
|
||||||
|
return {
|
||||||
|
type: "brace",
|
||||||
|
value: matchFirst(strFromIndex, PARAN_END),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (isParanStart(strFromIndex)) {
|
||||||
|
return {
|
||||||
|
type: "brace",
|
||||||
|
value: matchFirst(strFromIndex, PARAN_START),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (isBlockStart(strFromIndex)) {
|
||||||
|
return {
|
||||||
|
type: "brace",
|
||||||
|
value: matchFirst(strFromIndex, BLOCK_START),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (isBlockEnd(strFromIndex)) {
|
||||||
|
return {
|
||||||
|
type: "brace",
|
||||||
|
value: matchFirst(strFromIndex, BLOCK_END),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (isNumber(strFromIndex)) {
|
||||||
|
return {
|
||||||
|
type: "number",
|
||||||
|
value: matchFirst(strFromIndex, NUMBER),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (isWord(strFromIndex)) {
|
||||||
|
return {
|
||||||
|
type: "word",
|
||||||
|
value: matchFirst(strFromIndex, WORD),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return null;
|
||||||
};
|
};
|
||||||
|
|
||||||
async function main() {
|
export const lexer = (str: string): Token[] => {
|
||||||
const example1 = await fsp.readFile("./examples/addition.cado", "ascii");
|
const recursivelyTokenise = (
|
||||||
const parsed = lexer(example1);
|
str: string,
|
||||||
console.log(parsed);
|
currentIndex: number = 0,
|
||||||
}
|
previousTokens: Token[] = []
|
||||||
|
): Token[] => {
|
||||||
|
if (currentIndex >= str.length) {
|
||||||
|
return previousTokens;
|
||||||
|
}
|
||||||
|
const token = returnTokenAtIndex(str, currentIndex);
|
||||||
|
if (!token) {
|
||||||
|
return recursivelyTokenise(str, currentIndex + 1, previousTokens);
|
||||||
|
}
|
||||||
|
const nextIndex = currentIndex + token.value.length;
|
||||||
|
return recursivelyTokenise(str, nextIndex, [...previousTokens, token]);
|
||||||
|
};
|
||||||
|
return recursivelyTokenise(str);
|
||||||
|
};
|
||||||
|
|
||||||
// main()
|
// const example1 = await fsp.readFile("./examples/addition.cado", "ascii");
|
||||||
|
Reference in New Issue
Block a user