Add the ability to recast comments and some whitespace (#10)

* Add the ability to recast comments and some whitespace

Currently because whitespace or anything that's not needed for execution is not stored in the AST, it's hard to respect things like user formatting when recasting.

I think having a by-default-opinioned formatter is a good thing, but where this becomes problematic is when users wants to simply leave a blank space between some lines for a bit of breathing room, a code paragraph if you will, but maybe more importantly comments have not been implemented for the same reason, there wasn't a way with the current setup to insert them back in.

In some ways the most straightforward way to do this is to put whitespace and comments into the AST. Even though they are not crucial for execution, code-gen/recasting needs to be a first-class citizen in this lang so that's probably the long-term solution. However I'm trying to draw inspiration from other languages, and since it's not the norm to put comments et-al into the AST I haven't done so.

Because whitespace is tokenised already if not transformed into the AST, there is somewhat of a map of these things without going back to source code, so atm I'm experimenting with using this to insert extra linebreaks and comments back in between statements. I think this is a good compromise for the time being for what is a nice to have feature atm.

Because it's only going to respect non-code parts in between statements this will mean that you can't format objects or function params how you like (but I think this is good to have an opinioned fmt out of the box) and comments like myFunctionCall('a', /* inline comment */ b) will not work either.

* clean up
This commit is contained in:
Kurt Hutten
2023-01-23 14:50:58 +11:00
committed by GitHub
parent 18629ea50c
commit dd5022b38e
9 changed files with 720 additions and 58 deletions

View File

@ -1853,3 +1853,91 @@ describe('testing findEndofBinaryExpression', () => {
expect(end).toBe(code.indexOf('))') + 1) expect(end).toBe(code.indexOf('))') + 1)
}) })
}) })
describe('testing code with comments', () => {
it('should ignore line comments', () => {
const comment = '// this is a comment'
const codeWithComment = `const yo = 5
${comment}
const yo2 = 6`
// filling with extra whitespace to make the source start end numbers match
const codeWithoutComment = `const yo = 5
${comment
.split('')
.map(() => ' ')
.join('')}
const yo2 = 6`
const { body } = abstractSyntaxTree(lexer(codeWithComment))
const { body: bodyWithoutComment } = abstractSyntaxTree(
lexer(codeWithoutComment)
)
expect(body).toEqual(bodyWithoutComment)
})
it('should ignore block comments', () => {
const comment = `/* this is a
multi line
comment */`
const codeWithComment = `const yo = 5${comment}
const yo2 = 6`
// filling with extra whitespace to make the source start end numbers match
const codeWithoutComment = `const yo = 5${comment
.split('')
.map(() => ' ')
.join('')}
const yo2 = 6`
const { body } = abstractSyntaxTree(lexer(codeWithComment))
const { body: bodyWithoutComment } = abstractSyntaxTree(
lexer(codeWithoutComment)
)
expect(body).toEqual(bodyWithoutComment)
})
it('comment in function declaration', () => {
const code = `const yo=(a)=>{
// this is a comment
return a
}`
const { body } = abstractSyntaxTree(lexer(code))
const yo = [
{
type: 'VariableDeclaration',
start: 0,
end: 51,
kind: 'const',
declarations: [
{
type: 'VariableDeclarator',
start: 6,
end: 51,
id: { type: 'Identifier', start: 6, end: 8, name: 'yo' },
init: {
type: 'FunctionExpression',
start: 9,
end: 51,
id: null,
params: [{ type: 'Identifier', start: 10, end: 11, name: 'a' }],
body: {
type: 'BlockStatement',
start: 14,
end: 51,
body: [
{
type: 'ReturnStatement',
start: 41,
end: 49,
argument: {
type: 'Identifier',
start: 48,
end: 49,
name: 'a',
},
},
],
},
},
},
],
},
]
expect(body).toEqual(yo)
})
})

View File

@ -83,7 +83,7 @@ export interface Program {
type: syntaxType type: syntaxType
start: number start: number
end: number end: number
body: Body[] body: BodyItem[]
} }
interface GeneralStatement { interface GeneralStatement {
type: syntaxType type: syntaxType
@ -937,7 +937,7 @@ function makeParams(
export interface BlockStatement extends GeneralStatement { export interface BlockStatement extends GeneralStatement {
type: 'BlockStatement' type: 'BlockStatement'
body: Body[] body: BodyItem[]
} }
function makeBlockStatement( function makeBlockStatement(
@ -996,7 +996,7 @@ function nextMeaningfulToken(
if (!token) { if (!token) {
return { token, index: tokens.length } return { token, index: tokens.length }
} }
if (token.type === 'whitespace') { if (isNotCodeToken(token)) {
return nextMeaningfulToken(tokens, index, offset + 1) return nextMeaningfulToken(tokens, index, offset + 1)
} }
return { token, index: newIndex } return { token, index: newIndex }
@ -1012,13 +1012,16 @@ function previousMeaningfulToken(
if (!token) { if (!token) {
return { token, index: 0 } return { token, index: 0 }
} }
if (token.type === 'whitespace') { if (isNotCodeToken(token)) {
return previousMeaningfulToken(tokens, index, offset + 1) return previousMeaningfulToken(tokens, index, offset + 1)
} }
return { token, index: newIndex } return { token, index: newIndex }
} }
type Body = ExpressionStatement | VariableDeclaration | ReturnStatement export type BodyItem =
| ExpressionStatement
| VariableDeclaration
| ReturnStatement
function makeBody( function makeBody(
{ {
@ -1028,8 +1031,8 @@ function makeBody(
tokens: Token[] tokens: Token[]
tokenIndex?: number tokenIndex?: number
}, },
previousBody: Body[] = [] previousBody: BodyItem[] = []
): { body: Body[]; lastIndex: number } { ): { body: BodyItem[]; lastIndex: number } {
if (tokenIndex >= tokens.length) { if (tokenIndex >= tokens.length) {
return { body: previousBody, lastIndex: tokenIndex } return { body: previousBody, lastIndex: tokenIndex }
} }
@ -1041,7 +1044,7 @@ function makeBody(
if (typeof token === 'undefined') { if (typeof token === 'undefined') {
console.log('probably should throw') console.log('probably should throw')
} }
if (token.type === 'whitespace') { if (isNotCodeToken(token)) {
return makeBody({ tokens, tokenIndex: tokenIndex + 1 }, previousBody) return makeBody({ tokens, tokenIndex: tokenIndex + 1 }, previousBody)
} }
const nextToken = nextMeaningfulToken(tokens, tokenIndex) const nextToken = nextMeaningfulToken(tokens, tokenIndex)
@ -1532,3 +1535,11 @@ export function getNodePathFromSourceRange(
} }
return path return path
} }
export function isNotCodeToken(token: Token): boolean {
return (
token.type === 'whitespace' ||
token.type === 'linecomment' ||
token.type === 'blockcomment'
)
}

View File

@ -1,4 +1,9 @@
import { BinaryExpression, Literal, Identifier } from './abstractSyntaxTree' import {
BinaryExpression,
Literal,
Identifier,
isNotCodeToken,
} from './abstractSyntaxTree'
import { Token } from './tokeniser' import { Token } from './tokeniser'
export function reversePolishNotation( export function reversePolishNotation(
@ -45,7 +50,7 @@ export function reversePolishNotation(
) )
} else if (currentToken.value === ')') { } else if (currentToken.value === ')') {
if (operators[operators.length - 1]?.value !== '(') { if (operators[operators.length - 1]?.value !== '(') {
// pop operators off the stack and pust them to postFix until we find the matching '(' // pop operators off the stack and push them to postFix until we find the matching '('
return reversePolishNotation( return reversePolishNotation(
tokens, tokens,
[...previousPostfix, operators[operators.length - 1]], [...previousPostfix, operators[operators.length - 1]],
@ -58,7 +63,7 @@ export function reversePolishNotation(
operators.slice(0, -1) operators.slice(0, -1)
) )
} }
if (currentToken.type === 'whitespace') { if (isNotCodeToken(currentToken)) {
return reversePolishNotation(tokens.slice(1), previousPostfix, operators) return reversePolishNotation(tokens.slice(1), previousPostfix, operators)
} }
throw new Error('Unknown token') throw new Error('Unknown token')

View File

@ -0,0 +1,210 @@
import { findTokensBetweenStatements } from './nonAstTokenHelpers'
import { Token } from './tokeniser'
import { BodyItem } from './abstractSyntaxTree'
describe('verify code', () => {
it('should find tokens between statements', () => {
const statement1 = {
type: 'yoyo',
start: 105,
end: 111,
}
const statement2 = {
type: 'yoyo',
start: 150,
end: 156,
}
const tokens: Token[] = [
{
type: 'word',
value: 'yoyo',
start: 100,
end: 104,
},
{
type: 'whitespace',
value: ' ',
start: 111,
end: 115,
},
{
type: 'linecomment',
value: '// this is a comment',
start: 115,
end: 119,
},
{
type: 'whitespace',
value: ' ',
start: 157,
end: 161,
},
]
const result = findTokensBetweenStatements(statement1, statement2, tokens)
// should grab the middle two tokens an the start and end tokens are less than the first statement
// and greater than the second statement respectively
expect(result).toEqual([
{ type: 'whitespace', value: ' ', start: 111, end: 115 },
{
type: 'linecomment',
value: '// this is a comment',
start: 115,
end: 119,
},
])
})
it('propert test with our types', () => {
const tokens: Token[] = [
{
type: 'whitespace',
value: '\n',
start: 37,
end: 38,
},
{
type: 'linecomment',
value: '// this is a comment',
start: 38,
end: 58,
},
{
type: 'whitespace',
value: '\n',
start: 58,
end: 59,
},
]
const statement1: BodyItem = {
type: 'VariableDeclaration',
start: 0,
end: 37,
kind: 'const',
declarations: [
{
type: 'VariableDeclarator',
start: 6,
end: 37,
id: {
type: 'Identifier',
start: 6,
end: 8,
name: 'yo',
},
init: {
type: 'ObjectExpression',
start: 11,
end: 37,
properties: [
{
type: 'ObjectProperty',
start: 13,
end: 35,
key: {
type: 'Identifier',
start: 13,
end: 14,
name: 'a',
},
value: {
type: 'ObjectExpression',
start: 16,
end: 35,
properties: [
{
type: 'ObjectProperty',
start: 18,
end: 33,
key: {
type: 'Identifier',
start: 18,
end: 19,
name: 'b',
},
value: {
type: 'ObjectExpression',
start: 21,
end: 33,
properties: [
{
type: 'ObjectProperty',
start: 23,
end: 31,
key: {
type: 'Identifier',
start: 23,
end: 24,
name: 'c',
},
value: {
type: 'Literal',
start: 26,
end: 31,
value: '123',
raw: "'123'",
},
},
],
},
},
],
},
},
],
},
},
],
}
const statement2: BodyItem = {
type: 'VariableDeclaration',
start: 59,
end: 74,
kind: 'const',
declarations: [
{
type: 'VariableDeclarator',
start: 65,
end: 74,
id: {
type: 'Identifier',
start: 65,
end: 68,
name: 'key',
},
init: {
type: 'Literal',
start: 71,
end: 74,
value: 'c',
raw: "'c'",
},
},
],
}
const result = findTokensBetweenStatements(statement1, statement2, tokens)
expect(result).toEqual([
{
type: 'whitespace',
value: '\n',
start: 37,
end: 38,
},
{
type: 'linecomment',
value: '// this is a comment',
start: 38,
end: 58,
},
{
type: 'whitespace',
value: '\n',
start: 58,
end: 59,
},
])
})
})

View File

@ -0,0 +1,123 @@
import { Token } from './tokeniser'
import { Program, BodyItem } from './abstractSyntaxTree'
export function findTokensBetweenStatements(
statement1: { start: number; end: number },
statement2: { start: number; end: number },
tokens: Token[]
): Token[] {
// Find the start index of the range using binary search
let startIndex = firstGreaterThanBinarySearch(tokens, statement1.end, 'start')
if (startIndex < 0) {
startIndex = ~startIndex
}
// Find the end index of the range using binary search
let endIndex = firstGreaterThanBinarySearch(tokens, statement2.end, 'start')
if (endIndex < 0) {
endIndex = ~endIndex
}
// Return the tokens between the start and end index
return tokens.slice(startIndex, endIndex)
}
function firstGreaterThanBinarySearch(
tokens: { start: number; end: number }[],
target: number,
property: 'start' | 'end'
): number {
let left = 0
// has trouble with including tokens at the end of the range
const paddedTokens = [
{
type: 'whitespace',
value: '',
start: 0,
end: 0,
},
...tokens,
{
type: 'whitespace',
value: '',
start: tokens[tokens.length - 1]?.end + 1000,
end: tokens[tokens.length - 1]?.end + 1001,
},
]
let right = paddedTokens.length - 1
while (left <= right) {
const middle = left + Math.floor((right - left) / 2)
if (paddedTokens[middle]?.[property] >= target) {
if (middle === 1 || paddedTokens[middle - 1]?.[property] < target) {
// minus 1 because of the padding
return middle - 1
}
right = middle - 1
} else {
left = middle + 1
}
}
return -1
}
export function getNonCodeString(
body: Program['body'],
index: number,
tokens: Token[]
): string {
let tokensToIntegrate: Token[] = []
const currentStatement = body[index]
const nextStatement = body[index + 1]
if (nextStatement && nextStatement.start && currentStatement.end) {
tokensToIntegrate = findTokensBetweenStatements(
currentStatement,
nextStatement,
tokens
)
} else if (index === body.length - 1) {
const tokensAfter = firstGreaterThanBinarySearch(
tokens,
currentStatement?.end,
'start'
)
if (tokensAfter > 0) {
tokensToIntegrate = tokens.slice(tokensAfter)
}
}
if (tokensToIntegrate.length > 0) {
const nonCodeString = tokensToIntegrate.map((token) => token.value).join('')
// check it extra ends with a line break followed by spaces (only spaces not new lines)
const hasWhitespaceOnEnd = nonCodeString.match(/(\n *)$/)
if (hasWhitespaceOnEnd) {
// we always put each statement on a new line, so this prevents it adding an extra line
// however if the user puts more than one line break between statements, we'll respect it since
// we're only removing the last one
return nonCodeString.slice(0, -hasWhitespaceOnEnd[0].length)
}
return nonCodeString
}
return ''
}
export function getStartNonCodeString(
firstStatement: BodyItem,
tokens: Token[]
): string {
if (!firstStatement) return ''
const tokensBeforeIndex = tokens.length
? firstGreaterThanBinarySearch(tokens, firstStatement.start, 'end')
: 0
let nonCodeString = ''
if (tokensBeforeIndex > 0) {
nonCodeString = tokens
.slice(0, tokensBeforeIndex)
.map((token) => token.value)
.join('')
}
return nonCodeString.trim() ? nonCodeString.trim() + '\n' : ''
}

View File

@ -1,4 +1,4 @@
import { recast } from './recast' import { recast, processTokens } from './recast'
import { Program, abstractSyntaxTree } from './abstractSyntaxTree' import { Program, abstractSyntaxTree } from './abstractSyntaxTree'
import { lexer, Token } from './tokeniser' import { lexer, Token } from './tokeniser'
import fs from 'node:fs' import fs from 'node:fs'
@ -180,6 +180,90 @@ const myVar2 = yo['a'][key2].c`
const recasted = recast(ast) const recasted = recast(ast)
expect(recasted).toBe(code.trim()) expect(recasted).toBe(code.trim())
}) })
it('code with comments', () => {
const code = `
const yo = { a: { b: { c: '123' } } }
// this is a comment
const key = 'c'`
const { ast, tokens } = code2ast(code)
const processedTokens = processTokens(tokens)
const recasted = recast(ast, processedTokens)
expect(recasted).toBe(code.trim())
})
it('code with extra whitespace should be respected when recasted', () => {
const withExtraEmptylLineBetween = `
const yo = { a: { b: { c: '123' } } }
const key = 'c'`
const { ast, tokens } = code2ast(withExtraEmptylLineBetween)
const processedTokens = processTokens(tokens)
const recasted = recast(ast, processedTokens)
expect(recasted).toBe(withExtraEmptylLineBetween.trim())
})
it('code with block comment in between', () => {
const withExtraEmptylLineBetween = `
const yo = { a: { b: { c: '123' } } }
/* hi there
yo yo yo
*/
const key = 'c'`
const { ast, tokens } = code2ast(withExtraEmptylLineBetween)
const processedTokens = processTokens(tokens)
const recasted = recast(ast, processedTokens)
expect(recasted).toBe(withExtraEmptylLineBetween.trim())
})
it('code with block comment line comment and empty line', () => {
const withExtraEmptylLineBetween = `
const yo = { a: { b: { c: '123' } } }
/* hi there
yo yo yo
*/
// empty line above and line comment here
const key = 'c'`
const { ast, tokens } = code2ast(withExtraEmptylLineBetween)
const processedTokens = processTokens(tokens)
const recasted = recast(ast, processedTokens)
expect(recasted).toBe(withExtraEmptylLineBetween.trim())
})
it('code comment at the start and end', () => {
const withExtraEmptylLineBetween = `
// comment at the start
const yo = { a: { b: { c: '123' } } }
const key = 'c'
// comment at the end`
const { ast, tokens } = code2ast(withExtraEmptylLineBetween)
const processedTokens = processTokens(tokens)
const recasted = recast(ast, processedTokens)
expect(recasted).toBe(withExtraEmptylLineBetween.trim())
})
it('comments and random new lines between statements within function declarations are fine', () => {
const withExtraEmptylLineBetween = `
const fn = (a) => {
const yo = 5
// a comment
return a + yo
}`
const { ast, tokens } = code2ast(withExtraEmptylLineBetween)
const processedTokens = processTokens(tokens)
const recasted = recast(ast, processedTokens)
expect(recasted).toBe(withExtraEmptylLineBetween.trim())
})
}) })
// helpers // helpers

View File

@ -12,46 +12,70 @@ import {
MemberExpression, MemberExpression,
} from './abstractSyntaxTree' } from './abstractSyntaxTree'
import { precedence } from './astMathExpressions' import { precedence } from './astMathExpressions'
import { Token } from './tokeniser'
import { getNonCodeString, getStartNonCodeString } from './nonAstTokenHelpers'
export const processTokens = (tokens: Token[]): Token[] => {
return tokens.filter((token) => {
if (token.type === 'linecomment' || token.type === 'blockcomment')
return true
if (token.type === 'whitespace') {
if (token.value.includes('\n')) return true
}
return false
})
}
export function recast( export function recast(
ast: Program, ast: Program,
tokens: Token[] = [],
previousWrittenCode = '', previousWrittenCode = '',
indentation = '' indentation = ''
): string { ): string {
return ast.body let startComments = getStartNonCodeString(ast?.body?.[0], tokens)
.map((statement) => { return (
if (statement.type === 'ExpressionStatement') { startComments +
if (statement.expression.type === 'BinaryExpression') { ast.body
return indentation + recastBinaryExpression(statement.expression) .map((statement) => {
} else if (statement.expression.type === 'ArrayExpression') { if (statement.type === 'ExpressionStatement') {
return indentation + recastArrayExpression(statement.expression) if (statement.expression.type === 'BinaryExpression') {
} else if (statement.expression.type === 'ObjectExpression') { return recastBinaryExpression(statement.expression)
return indentation + recastObjectExpression(statement.expression) } else if (statement.expression.type === 'ArrayExpression') {
} else if (statement.expression.type === 'CallExpression') { return recastArrayExpression(statement.expression)
return indentation + recastCallExpression(statement.expression) } else if (statement.expression.type === 'ObjectExpression') {
} return recastObjectExpression(statement.expression)
} else if (statement.type === 'VariableDeclaration') { } else if (statement.expression.type === 'CallExpression') {
return statement.declarations return recastCallExpression(statement.expression, tokens)
.map((declaration) => { }
const isSketchOrFirstPipeExpressionIsSketch = } else if (statement.type === 'VariableDeclaration') {
declaration.init.type === 'SketchExpression' || return statement.declarations
(declaration.init.type === 'PipeExpression' && .map((declaration) => {
declaration.init.body[0].type === 'SketchExpression') const isSketchOrFirstPipeExpressionIsSketch =
declaration.init.type === 'SketchExpression' ||
(declaration.init.type === 'PipeExpression' &&
declaration.init.body[0].type === 'SketchExpression')
const assignmentString = isSketchOrFirstPipeExpressionIsSketch const assignmentString = isSketchOrFirstPipeExpressionIsSketch
? ' ' ? ' '
: ' = ' : ' = '
return `${indentation}${statement.kind} ${ return `${statement.kind} ${
declaration.id.name declaration.id.name
}${assignmentString}${recastValue(declaration.init)}` }${assignmentString}${recastValue(declaration.init, '', tokens)}`
}) })
.join('') .join('')
} else if (statement.type === 'ReturnStatement') { } else if (statement.type === 'ReturnStatement') {
return `${indentation}return ${recastArgument(statement.argument)}` return `return ${recastArgument(statement.argument, tokens)}`
} }
return statement.type return statement.type
}) })
.join('\n') .map(
(statementString, index) =>
indentation +
statementString +
getNonCodeString(ast.body, index, tokens)
)
.join('\n')
)
} }
function recastBinaryExpression(expression: BinaryExpression): string { function recastBinaryExpression(expression: BinaryExpression): string {
@ -127,13 +151,16 @@ function recastLiteral(literal: Literal): string {
return String(literal?.value) return String(literal?.value)
} }
function recastCallExpression(expression: CallExpression): string { function recastCallExpression(
expression: CallExpression,
tokens: Token[] = []
): string {
return `${expression.callee.name}(${expression.arguments return `${expression.callee.name}(${expression.arguments
.map(recastArgument) .map((arg) => recastArgument(arg, tokens))
.join(', ')})` .join(', ')})`
} }
function recastArgument(argument: Value): string { function recastArgument(argument: Value, tokens: Token[] = []): string {
if (argument.type === 'Literal') { if (argument.type === 'Literal') {
return recastLiteral(argument) return recastLiteral(argument)
} else if (argument.type === 'Identifier') { } else if (argument.type === 'Identifier') {
@ -145,27 +172,32 @@ function recastArgument(argument: Value): string {
} else if (argument.type === 'ObjectExpression') { } else if (argument.type === 'ObjectExpression') {
return recastObjectExpression(argument) return recastObjectExpression(argument)
} else if (argument.type === 'CallExpression') { } else if (argument.type === 'CallExpression') {
return recastCallExpression(argument) return recastCallExpression(argument, tokens)
} else if (argument.type === 'FunctionExpression') { } else if (argument.type === 'FunctionExpression') {
return recastFunction(argument) return recastFunction(argument, tokens)
} else if (argument.type === 'PipeSubstitution') { } else if (argument.type === 'PipeSubstitution') {
return '%' return '%'
} }
throw new Error(`Cannot recast argument ${argument}`) throw new Error(`Cannot recast argument ${argument}`)
} }
function recastFunction(expression: FunctionExpression): string { function recastFunction(
expression: FunctionExpression,
tokens: Token[] = [],
indentation = ''
): string {
return `(${expression.params.map((param) => param.name).join(', ')}) => { return `(${expression.params.map((param) => param.name).join(', ')}) => {
${recast(expression.body)} ${recast(expression.body, tokens, '', indentation + ' ')}
}` }`
} }
function recastSketchExpression( function recastSketchExpression(
expression: SketchExpression, expression: SketchExpression,
indentation: string indentation: string,
tokens: Token[] = []
): string { ): string {
return `{ return `{
${recast(expression.body, '', indentation + ' ')} ${recast(expression.body, tokens, '', indentation + ' ')}
}` }`
} }
@ -186,7 +218,11 @@ function recastMemberExpression(
return expression.object.name + keyString return expression.object.name + keyString
} }
function recastValue(node: Value, indentation = ''): string { function recastValue(
node: Value,
indentation = '',
tokens: Token[] = []
): string {
if (node.type === 'BinaryExpression') { if (node.type === 'BinaryExpression') {
return recastBinaryExpression(node) return recastBinaryExpression(node)
} else if (node.type === 'ArrayExpression') { } else if (node.type === 'ArrayExpression') {
@ -198,16 +234,16 @@ function recastValue(node: Value, indentation = ''): string {
} else if (node.type === 'Literal') { } else if (node.type === 'Literal') {
return recastLiteral(node) return recastLiteral(node)
} else if (node.type === 'FunctionExpression') { } else if (node.type === 'FunctionExpression') {
return recastFunction(node) return recastFunction(node, tokens)
} else if (node.type === 'CallExpression') { } else if (node.type === 'CallExpression') {
return recastCallExpression(node) return recastCallExpression(node, tokens)
} else if (node.type === 'Identifier') { } else if (node.type === 'Identifier') {
return node.name return node.name
} else if (node.type === 'SketchExpression') { } else if (node.type === 'SketchExpression') {
return recastSketchExpression(node, indentation) return recastSketchExpression(node, indentation)
} else if (node.type === 'PipeExpression') { } else if (node.type === 'PipeExpression') {
return node.body return node.body
.map((statement): string => recastValue(statement, indentation)) .map((statement): string => recastValue(statement, indentation, tokens))
.join('\n |> ') .join('\n |> ')
} }
return '' return ''

View File

@ -10,6 +10,8 @@ import {
isWord, isWord,
isComma, isComma,
lexer, lexer,
isLineComment,
isBlockComment,
} from './tokeniser' } from './tokeniser'
describe('testing helpers', () => { describe('testing helpers', () => {
@ -143,6 +145,29 @@ describe('testing helpers', () => {
expect(isComma('5, + 5')).toBe(false) expect(isComma('5, + 5')).toBe(false)
expect(isComma(' , + 5')).toBe(false) expect(isComma(' , + 5')).toBe(false)
}) })
it('test it matches line comments', () => {
expect(isLineComment('//')).toBe(true)
expect(isLineComment('// ')).toBe(true)
expect(isLineComment('//5')).toBe(true)
expect(isLineComment('//5 ')).toBe(true)
expect(isLineComment('5')).toBe(false)
expect(isLineComment('5 + 5')).toBe(false)
expect(isLineComment('5// + 5')).toBe(false)
expect(isLineComment(' // + 5')).toBe(false)
})
it('test it matches block comments', () => {
expect(isBlockComment('/* */')).toBe(true)
expect(isBlockComment('/**/')).toBe(true)
expect(isBlockComment('/*5*/')).toBe(true)
expect(isBlockComment('/*5 */')).toBe(true)
expect(isBlockComment('/*')).toBe(false)
expect(isBlockComment('5')).toBe(false)
expect(isBlockComment('5 + 5')).toBe(false)
expect(isBlockComment('5/* + 5')).toBe(false)
expect(isBlockComment(' /* + 5')).toBe(false)
})
}) })
describe('testing lexer', () => { describe('testing lexer', () => {
@ -427,6 +452,71 @@ const prop3 = yo[key]`)
"brace ']' from 108 to 109", "brace ']' from 108 to 109",
]) ])
}) })
it('testing tokenising line comments', () => {
const result = stringSummaryLexer(`const yo = 45 // this is a comment
const yo = 6`)
expect(result).toEqual([
"word 'const' from 0 to 5",
"whitespace ' ' from 5 to 6",
"word 'yo' from 6 to 8",
"whitespace ' ' from 8 to 9",
"operator '=' from 9 to 10",
"whitespace ' ' from 10 to 11",
"number '45' from 11 to 13",
"whitespace ' ' from 13 to 14",
"linecomment '// this is a comment' from 14 to 34",
"whitespace '\n' from 34 to 35",
"word 'const' from 35 to 40",
"whitespace ' ' from 40 to 41",
"word 'yo' from 41 to 43",
"whitespace ' ' from 43 to 44",
"operator '=' from 44 to 45",
"whitespace ' ' from 45 to 46",
"number '6' from 46 to 47",
])
})
it('testing tokenising line comments', () => {
const result = stringSummaryLexer(`log('hi')
// comment on a line by itself
const yo=45`)
expect(result).toEqual([
"word 'log' from 0 to 3",
"brace '(' from 3 to 4",
"string ''hi'' from 4 to 8",
"brace ')' from 8 to 9",
"whitespace '\n' from 9 to 10",
"linecomment '// comment on a line by itself' from 10 to 40",
"whitespace '\n' from 40 to 41",
"word 'const' from 41 to 46",
"whitespace ' ' from 46 to 47",
"word 'yo' from 47 to 49",
"operator '=' from 49 to 50",
"number '45' from 50 to 52",
])
})
it('testing tokenising block comments', () => {
const result = stringSummaryLexer(`const yo = 45 /* this is a comment
const ya = 6 */
const yi=45`)
expect(result).toEqual([
"word 'const' from 0 to 5",
"whitespace ' ' from 5 to 6",
"word 'yo' from 6 to 8",
"whitespace ' ' from 8 to 9",
"operator '=' from 9 to 10",
"whitespace ' ' from 10 to 11",
"number '45' from 11 to 13",
"whitespace ' ' from 13 to 14",
`blockcomment '/* this is a comment
const ya = 6 */' from 14 to 50`,
"whitespace '\n' from 50 to 51",
"word 'const' from 51 to 56",
"whitespace ' ' from 56 to 57",
"word 'yi' from 57 to 59",
"operator '=' from 59 to 60",
"number '45' from 60 to 62",
])
})
}) })
// helpers // helpers

View File

@ -17,6 +17,8 @@ const ARRAY_END = /^\]/
const COMMA = /^,/ const COMMA = /^,/
const COLON = /^:/ const COLON = /^:/
const PERIOD = /^\./ const PERIOD = /^\./
const LINECOMMENT = /^\/\/.*/
const BLOCKCOMMENT = /^\/\*[\s\S]*?\*\//
export const isNumber = (character: string) => NUMBER.test(character) export const isNumber = (character: string) => NUMBER.test(character)
export const isWhitespace = (character: string) => WHITESPACE.test(character) export const isWhitespace = (character: string) => WHITESPACE.test(character)
@ -32,6 +34,9 @@ export const isArrayEnd = (character: string) => ARRAY_END.test(character)
export const isComma = (character: string) => COMMA.test(character) export const isComma = (character: string) => COMMA.test(character)
export const isColon = (character: string) => COLON.test(character) export const isColon = (character: string) => COLON.test(character)
export const isPeriod = (character: string) => PERIOD.test(character) export const isPeriod = (character: string) => PERIOD.test(character)
export const isLineComment = (character: string) => LINECOMMENT.test(character)
export const isBlockComment = (character: string) =>
BLOCKCOMMENT.test(character)
function matchFirst(str: string, regex: RegExp) { function matchFirst(str: string, regex: RegExp) {
const theMatch = str.match(regex) const theMatch = str.match(regex)
@ -52,6 +57,8 @@ export interface Token {
| 'comma' | 'comma'
| 'colon' | 'colon'
| 'period' | 'period'
| 'linecomment'
| 'blockcomment'
value: string value: string
start: number start: number
end: number end: number
@ -73,6 +80,14 @@ const returnTokenAtIndex = (str: string, startIndex: number): Token | null => {
if (isString(strFromIndex)) { if (isString(strFromIndex)) {
return makeToken('string', matchFirst(strFromIndex, STRING), startIndex) return makeToken('string', matchFirst(strFromIndex, STRING), startIndex)
} }
const isLineCommentBool = isLineComment(strFromIndex)
if (isLineCommentBool || isBlockComment(strFromIndex)) {
return makeToken(
isLineCommentBool ? 'linecomment' : 'blockcomment',
matchFirst(strFromIndex, isLineCommentBool ? LINECOMMENT : BLOCKCOMMENT),
startIndex
)
}
if (isParanEnd(strFromIndex)) { if (isParanEnd(strFromIndex)) {
return makeToken('brace', matchFirst(strFromIndex, PARAN_END), startIndex) return makeToken('brace', matchFirst(strFromIndex, PARAN_END), startIndex)
} }