start of tokeniser
This commit is contained in:
119
src/tokeniser.ts
Normal file
119
src/tokeniser.ts
Normal file
@ -0,0 +1,119 @@
|
||||
import fsp from "node:fs/promises";
|
||||
|
||||
const NUMBER = /^[0-9]+/;
|
||||
const WHITESPACE = /\s+/;
|
||||
const WORD = /^[a-zA-Z_][a-zA-Z0-9_]*/;
|
||||
// regex that captures everything between two non escaped quotes and the quotes aren't captured in the match
|
||||
const STRING = /^(["'])(?:(?=(\\?))\2.)*?\1/;
|
||||
// regex for operators
|
||||
const OPERATOR = /^[>=|<=|+|\-|*|/|>|<|^|%]/;
|
||||
const BLOCK_START = /^\{/
|
||||
const BLOCK_END = /^\}/
|
||||
const PARAN_START = /^\(/;
|
||||
const PARAN_END = /^\)/;
|
||||
|
||||
|
||||
export const isNumber = (character: string) => NUMBER.test(character);
|
||||
export const isWhitespace = (character: string) => WHITESPACE.test(character);
|
||||
export const isWord = (character: string) => WORD.test(character);
|
||||
export const isString = (character: string) => STRING.test(character);
|
||||
export const isOperator = (character: string) => OPERATOR.test(character);
|
||||
export const isBlockStart = (character: string) => BLOCK_START.test(character);
|
||||
export const isBlockEnd = (character: string) => BLOCK_END.test(character);
|
||||
export const isParanStart = (character: string) => PARAN_START.test(character);
|
||||
export const isParanEnd = (character: string) => PARAN_END.test(character);
|
||||
|
||||
|
||||
function matchFirst(str: string, regex: RegExp) {
|
||||
const theMatch = str.match(regex);
|
||||
if (!theMatch) {
|
||||
throw new Error("Should always be a match:" + str);
|
||||
}
|
||||
return theMatch[0];
|
||||
}
|
||||
|
||||
// type TokenTypes =
|
||||
interface Token {
|
||||
type: 'number' | 'word' | 'operator' | 'string' | 'brace'
|
||||
value: string;
|
||||
}
|
||||
|
||||
export const lexer = (str: string): Token[] => {
|
||||
const tokens: Token[] = [];
|
||||
let currentIndex = 0;
|
||||
|
||||
const returnTokenAtIndex = (
|
||||
str: string,
|
||||
startIndex: number
|
||||
): Token | null => {
|
||||
const strFromIndex = str.slice(startIndex);
|
||||
if (isOperator(strFromIndex)) {
|
||||
return {
|
||||
type: "operator",
|
||||
value: matchFirst(strFromIndex, OPERATOR),
|
||||
};
|
||||
}
|
||||
if (isString(strFromIndex)) {
|
||||
return {
|
||||
type: "string",
|
||||
value: matchFirst(strFromIndex, STRING),
|
||||
};
|
||||
}
|
||||
if(isParanEnd(strFromIndex)){
|
||||
return {
|
||||
type: "brace",
|
||||
value: matchFirst(strFromIndex, PARAN_END),
|
||||
};
|
||||
}
|
||||
if(isParanStart(strFromIndex)){
|
||||
return {
|
||||
type: "brace",
|
||||
value: matchFirst(strFromIndex, PARAN_START),
|
||||
};
|
||||
}
|
||||
if(isBlockStart(strFromIndex)){
|
||||
return {
|
||||
type: "brace",
|
||||
value: matchFirst(strFromIndex, BLOCK_START),
|
||||
};
|
||||
}
|
||||
if(isBlockEnd(strFromIndex)){
|
||||
return {
|
||||
type: "brace",
|
||||
value: matchFirst(strFromIndex, BLOCK_END),
|
||||
};
|
||||
}
|
||||
if (isNumber(strFromIndex)) {
|
||||
return {
|
||||
type: "number",
|
||||
value: matchFirst(strFromIndex, NUMBER),
|
||||
};
|
||||
}
|
||||
if(isWord(strFromIndex)) {
|
||||
return {
|
||||
type: "word",
|
||||
value: matchFirst(strFromIndex, WORD),
|
||||
};
|
||||
}
|
||||
return null;
|
||||
};
|
||||
while (currentIndex < str.length) {
|
||||
const token = returnTokenAtIndex(str, currentIndex);
|
||||
if (token) {
|
||||
tokens.push(token);
|
||||
currentIndex += token.value.length;
|
||||
} else {
|
||||
currentIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
return tokens
|
||||
};
|
||||
|
||||
async function main() {
|
||||
const example1 = await fsp.readFile("./examples/addition.cado", "ascii");
|
||||
const parsed = lexer(example1);
|
||||
console.log(parsed);
|
||||
}
|
||||
|
||||
// main()
|
Reference in New Issue
Block a user