2023-02-21 09:42:41 +11:00
|
|
|
import { lexer_js } from '../wasm-lib/pkg/wasm_lib'
|
|
|
|
import { initPromise } from './rust'
|
2022-11-12 13:12:20 +11:00
|
|
|
|
2022-11-12 17:47:41 +11:00
|
|
|
export interface Token {
|
2022-11-26 08:34:23 +11:00
|
|
|
type:
|
|
|
|
| 'number'
|
|
|
|
| 'word'
|
|
|
|
| 'operator'
|
|
|
|
| 'string'
|
|
|
|
| 'brace'
|
|
|
|
| 'whitespace'
|
|
|
|
| 'comma'
|
2023-01-01 21:48:30 +11:00
|
|
|
| 'colon'
|
2023-01-03 19:41:27 +11:00
|
|
|
| 'period'
|
Add the ability to recast comments and some whitespace (#10)
* Add the ability to recast comments and some whitespace
Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.
I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.
In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.
Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.
Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.
* clean up
2023-01-23 14:50:58 +11:00
|
|
|
| 'linecomment'
|
|
|
|
| 'blockcomment'
|
2022-11-26 08:34:23 +11:00
|
|
|
value: string
|
|
|
|
start: number
|
|
|
|
end: number
|
2022-11-12 13:12:20 +11:00
|
|
|
}
|
|
|
|
|
2023-02-21 09:42:41 +11:00
|
|
|
export async function asyncLexer(str: string): Promise<Token[]> {
|
|
|
|
await initPromise
|
2023-08-18 19:37:52 +10:00
|
|
|
try {
|
|
|
|
const tokens: Token[] = lexer_js(str)
|
|
|
|
return tokens
|
|
|
|
} catch (e) {
|
|
|
|
// TODO: do something real with the error.
|
|
|
|
console.log('lexer', e)
|
|
|
|
throw e
|
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|
2022-11-12 13:12:20 +11:00
|
|
|
|
2023-02-21 09:42:41 +11:00
|
|
|
export function lexer(str: string): Token[] {
|
2023-08-18 19:37:52 +10:00
|
|
|
try {
|
|
|
|
const tokens: Token[] = lexer_js(str)
|
|
|
|
return tokens
|
|
|
|
} catch (e) {
|
|
|
|
// TODO: do something real with the error.
|
|
|
|
console.log('lexer', e)
|
|
|
|
throw e
|
|
|
|
}
|
2022-11-26 08:34:23 +11:00
|
|
|
}
|