UTF-8 vs UTF-16 mismatch in KCL diagnostic SourceRanges (#7537)
# Background The KCL interpreter (written in Rust) reports errors, lints and other diagnostics as UTF-8 source range offsets. JavaScript, including CodeMirror, represents source code as UTF-16 strings. # Problem This means the UTF-8 source ranges sent from Rust don't correspond to the same text in the JS UTF-16 GUI. At best, this means the source ranges highlight the wrong thing if you use non-ASCII characters. At worst, it can cause exceptions by trying to highlight a range that doesn't even exist. Here's the problem, on main: <img width="178" alt="Screenshot 2025-06-20 at 11 52 03 AM" src="https://github.com/user-attachments/assets/9a4e75bf-965f-49d8-b238-8868b35912a1" /> # Solution We define a KCL SourceRange as _always_ being UTF-8. This means if a source range is constructed in JS, it should be converted to UTF-8. When these ranges are converted into CodeMirror diagnostics, they should be converted to UTF-16. Here's the same code as above, but on this branch: <img width="170" alt="Screenshot 2025-06-20 at 11 50 55 AM" src="https://github.com/user-attachments/assets/a5b971c0-0b02-4acd-8fcf-5a133331682b" /> Closes https://github.com/KittyCAD/modeling-app/issues/4327
This commit is contained in:
@ -111,7 +111,8 @@ commaSep1NoTrailingComma<term> { term ("," term)* }
|
|||||||
|
|
||||||
PipeSubstitution { "%" }
|
PipeSubstitution { "%" }
|
||||||
|
|
||||||
identifier { (@asciiLetter | "_") (@asciiLetter | @digit | "_")* }
|
// Includes non-whitespace unicode characters.
|
||||||
|
identifier { $[a-zA-Z_\u{a1}-\u{167f}\u{1681}-\u{1fff}\u{200e}-\u{2027}\u{202a}-\u{202e}\u{2030}-\u{205e}\u{2061}-\u{2fff}\u{3001}-\u{fefe}\u{ff00}-\u{10ffff}] $[a-zA-Z0-9_\u{a1}-\u{167f}\u{1681}-\u{1fff}\u{200e}-\u{2027}\u{202a}-\u{202e}\u{2030}-\u{205e}\u{2061}-\u{2fff}\u{3001}-\u{fefe}\u{ff00}-\u{10ffff}]* }
|
||||||
AnnotationName { "@" identifier? }
|
AnnotationName { "@" identifier? }
|
||||||
PropertyName { identifier }
|
PropertyName { identifier }
|
||||||
TagDeclarator { "$" identifier }
|
TagDeclarator { "$" identifier }
|
||||||
|
@ -389,7 +389,7 @@ export class KclManager extends EventTarget {
|
|||||||
|
|
||||||
if (err(result)) {
|
if (err(result)) {
|
||||||
const kclError: KCLError = result as KCLError
|
const kclError: KCLError = result as KCLError
|
||||||
this.diagnostics = kclErrorsToDiagnostics([kclError])
|
this.diagnostics = kclErrorsToDiagnostics([kclError], code)
|
||||||
this._astParseFailed = true
|
this._astParseFailed = true
|
||||||
|
|
||||||
await this.checkIfSwitchedFilesShouldClear()
|
await this.checkIfSwitchedFilesShouldClear()
|
||||||
@ -403,8 +403,8 @@ export class KclManager extends EventTarget {
|
|||||||
this._kclErrorsCallBack([])
|
this._kclErrorsCallBack([])
|
||||||
this._logsCallBack([])
|
this._logsCallBack([])
|
||||||
|
|
||||||
this.addDiagnostics(compilationErrorsToDiagnostics(result.errors))
|
this.addDiagnostics(compilationErrorsToDiagnostics(result.errors, code))
|
||||||
this.addDiagnostics(compilationErrorsToDiagnostics(result.warnings))
|
this.addDiagnostics(compilationErrorsToDiagnostics(result.warnings, code))
|
||||||
if (result.errors.length > 0) {
|
if (result.errors.length > 0) {
|
||||||
this._astParseFailed = true
|
this._astParseFailed = true
|
||||||
|
|
||||||
@ -486,11 +486,16 @@ export class KclManager extends EventTarget {
|
|||||||
|
|
||||||
this.logs = logs
|
this.logs = logs
|
||||||
this.errors = errors
|
this.errors = errors
|
||||||
|
const code = this.singletons.codeManager.code
|
||||||
// Do not add the errors since the program was interrupted and the error is not a real KCL error
|
// Do not add the errors since the program was interrupted and the error is not a real KCL error
|
||||||
this.addDiagnostics(isInterrupted ? [] : kclErrorsToDiagnostics(errors))
|
this.addDiagnostics(
|
||||||
|
isInterrupted ? [] : kclErrorsToDiagnostics(errors, code)
|
||||||
|
)
|
||||||
// Add warnings and non-fatal errors
|
// Add warnings and non-fatal errors
|
||||||
this.addDiagnostics(
|
this.addDiagnostics(
|
||||||
isInterrupted ? [] : compilationErrorsToDiagnostics(execState.errors)
|
isInterrupted
|
||||||
|
? []
|
||||||
|
: compilationErrorsToDiagnostics(execState.errors, code)
|
||||||
)
|
)
|
||||||
this.execState = execState
|
this.execState = execState
|
||||||
if (!errors.length) {
|
if (!errors.length) {
|
||||||
|
@ -1,8 +1,35 @@
|
|||||||
import type { KCLError } from '@src/lang/errors'
|
import type { KCLError } from '@src/lang/errors'
|
||||||
import { kclErrorsToDiagnostics } from '@src/lang/errors'
|
import { kclErrorsToDiagnostics, toUtf16, toUtf8 } from '@src/lang/errors'
|
||||||
import { defaultArtifactGraph } from '@src/lang/std/artifactGraph'
|
import { defaultArtifactGraph } from '@src/lang/std/artifactGraph'
|
||||||
import { topLevelRange } from '@src/lang/util'
|
import { topLevelRange } from '@src/lang/util'
|
||||||
|
|
||||||
|
describe('test UTF conversions', () => {
|
||||||
|
it('Converts UTF-8 to UTF-16', () => {
|
||||||
|
// This KCL program has an error. The variable `亞當` cannot be +3 because
|
||||||
|
// it holds a string. So that variable, on line 2, should be highlighted by
|
||||||
|
// a source range.
|
||||||
|
const sourceCode = "亞當 = 'adam'\nx = 亞當 + 3"
|
||||||
|
// Start with a SourceRange from the KCL interpreter,
|
||||||
|
// which is a UTF-8 range, on where the variable is used on the second line.
|
||||||
|
const utf8SourceRange = [20, 26, 0]
|
||||||
|
|
||||||
|
// JS string of the program uses UTF-16, so check we can correctly find the
|
||||||
|
// source range offset in UTF-16.
|
||||||
|
const actualStart = toUtf16(utf8SourceRange[0], sourceCode)
|
||||||
|
const actualEnd = toUtf16(utf8SourceRange[1], sourceCode)
|
||||||
|
const textInSourceRange = sourceCode.slice(actualStart, actualEnd)
|
||||||
|
expect(actualStart).toBe(16)
|
||||||
|
expect(actualEnd).toBe(18)
|
||||||
|
expect(textInSourceRange).toBe('亞當')
|
||||||
|
|
||||||
|
// Test we can convert the UTF-16 source range back to UTF-8,
|
||||||
|
// getting the original source range back.
|
||||||
|
const utf16Range: [number, number, number] = [actualStart, actualEnd, 0]
|
||||||
|
const actualUtf8Range = toUtf8(utf16Range, sourceCode)
|
||||||
|
expect(actualUtf8Range).toStrictEqual(utf8SourceRange)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
describe('test kclErrToDiagnostic', () => {
|
describe('test kclErrToDiagnostic', () => {
|
||||||
it('converts KCL errors to CodeMirror diagnostics', () => {
|
it('converts KCL errors to CodeMirror diagnostics', () => {
|
||||||
const errors: KCLError[] = [
|
const errors: KCLError[] = [
|
||||||
@ -33,7 +60,7 @@ describe('test kclErrToDiagnostic', () => {
|
|||||||
defaultPlanes: null,
|
defaultPlanes: null,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
const diagnostics = kclErrorsToDiagnostics(errors)
|
const diagnostics = kclErrorsToDiagnostics(errors, 'TEST PROGRAM')
|
||||||
expect(diagnostics).toEqual([
|
expect(diagnostics).toEqual([
|
||||||
{
|
{
|
||||||
from: 0,
|
from: 0,
|
||||||
|
@ -290,6 +290,38 @@ export class KCLUndefinedValueError extends KCLError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Convert this UTF-16 source range offset to UTF-8 as SourceRange is always a UTF-8
|
||||||
|
*/
|
||||||
|
export function toUtf8(
|
||||||
|
utf16SourceRange: SourceRange,
|
||||||
|
sourceCode: string
|
||||||
|
): SourceRange {
|
||||||
|
const moduleId = utf16SourceRange[2]
|
||||||
|
const textEncoder = new TextEncoder()
|
||||||
|
const prefixUtf16 = sourceCode.slice(0, utf16SourceRange[0])
|
||||||
|
const prefixUtf8 = textEncoder.encode(prefixUtf16)
|
||||||
|
const prefixLen = prefixUtf8.length
|
||||||
|
const toHighlightUtf16 = sourceCode.slice(
|
||||||
|
utf16SourceRange[0],
|
||||||
|
utf16SourceRange[1]
|
||||||
|
)
|
||||||
|
const toHighlightUtf8 = textEncoder.encode(toHighlightUtf16)
|
||||||
|
const toHighlightLen = toHighlightUtf8.length
|
||||||
|
return [prefixLen, prefixLen + toHighlightLen, moduleId]
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Convert this UTF-8 source range offset to UTF-16 for display in CodeMirror,
|
||||||
|
as it relies on JS-style string encoding which is UTF-16.
|
||||||
|
*/
|
||||||
|
export function toUtf16(utf8Offset: number, sourceCode: string): number {
|
||||||
|
const sourceUtf8 = new TextEncoder().encode(sourceCode)
|
||||||
|
const prefix = sourceUtf8.slice(0, utf8Offset)
|
||||||
|
const backTo16 = new TextDecoder().decode(prefix)
|
||||||
|
return backTo16.length
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps the lsp diagnostic to an array of KclErrors.
|
* Maps the lsp diagnostic to an array of KclErrors.
|
||||||
* Currently the diagnostics are all errors, but in the future they could include lints.
|
* Currently the diagnostics are all errors, but in the future they could include lints.
|
||||||
@ -299,12 +331,15 @@ export function lspDiagnosticsToKclErrors(
|
|||||||
diagnostics: LspDiagnostic[]
|
diagnostics: LspDiagnostic[]
|
||||||
): KCLError[] {
|
): KCLError[] {
|
||||||
return diagnostics
|
return diagnostics
|
||||||
.flatMap(
|
.flatMap(({ range, message }) => {
|
||||||
({ range, message }) =>
|
const sourceRange = toUtf8(
|
||||||
new KCLError(
|
[posToOffset(doc, range.start)!, posToOffset(doc, range.end)!, 0],
|
||||||
|
doc.toString()
|
||||||
|
)
|
||||||
|
return new KCLError(
|
||||||
'unexpected',
|
'unexpected',
|
||||||
message,
|
message,
|
||||||
[posToOffset(doc, range.start)!, posToOffset(doc, range.end)!, 0],
|
sourceRange,
|
||||||
[],
|
[],
|
||||||
[],
|
[],
|
||||||
[],
|
[],
|
||||||
@ -312,7 +347,7 @@ export function lspDiagnosticsToKclErrors(
|
|||||||
{},
|
{},
|
||||||
null
|
null
|
||||||
)
|
)
|
||||||
)
|
})
|
||||||
.sort((a, b) => {
|
.sort((a, b) => {
|
||||||
const c = a.sourceRange[0]
|
const c = a.sourceRange[0]
|
||||||
const d = b.sourceRange[0]
|
const d = b.sourceRange[0]
|
||||||
@ -331,7 +366,8 @@ export function lspDiagnosticsToKclErrors(
|
|||||||
* Currently the diagnostics are all errors, but in the future they could include lints.
|
* Currently the diagnostics are all errors, but in the future they could include lints.
|
||||||
* */
|
* */
|
||||||
export function kclErrorsToDiagnostics(
|
export function kclErrorsToDiagnostics(
|
||||||
errors: KCLError[]
|
errors: KCLError[],
|
||||||
|
sourceCode: string
|
||||||
): CodeMirrorDiagnostic[] {
|
): CodeMirrorDiagnostic[] {
|
||||||
let nonFatal: CodeMirrorDiagnostic[] = []
|
let nonFatal: CodeMirrorDiagnostic[] = []
|
||||||
const errs = errors
|
const errs = errors
|
||||||
@ -350,8 +386,8 @@ export function kclErrorsToDiagnostics(
|
|||||||
item.sourceRange[1] !== err.sourceRange[1]
|
item.sourceRange[1] !== err.sourceRange[1]
|
||||||
) {
|
) {
|
||||||
diagnostics.push({
|
diagnostics.push({
|
||||||
from: item.sourceRange[0],
|
from: toUtf16(item.sourceRange[0], sourceCode),
|
||||||
to: item.sourceRange[1],
|
to: toUtf16(item.sourceRange[1], sourceCode),
|
||||||
message: 'Part of the error backtrace',
|
message: 'Part of the error backtrace',
|
||||||
severity: 'hint',
|
severity: 'hint',
|
||||||
})
|
})
|
||||||
@ -365,11 +401,13 @@ export function kclErrorsToDiagnostics(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (err.nonFatal.length > 0) {
|
if (err.nonFatal.length > 0) {
|
||||||
nonFatal = nonFatal.concat(compilationErrorsToDiagnostics(err.nonFatal))
|
nonFatal = nonFatal.concat(
|
||||||
|
compilationErrorsToDiagnostics(err.nonFatal, sourceCode)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
diagnostics.push({
|
diagnostics.push({
|
||||||
from: err.sourceRange[0],
|
from: toUtf16(err.sourceRange[0], sourceCode),
|
||||||
to: err.sourceRange[1],
|
to: toUtf16(err.sourceRange[1], sourceCode),
|
||||||
message,
|
message,
|
||||||
severity: 'error',
|
severity: 'error',
|
||||||
})
|
})
|
||||||
@ -379,7 +417,8 @@ export function kclErrorsToDiagnostics(
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function compilationErrorsToDiagnostics(
|
export function compilationErrorsToDiagnostics(
|
||||||
errors: CompilationError[]
|
errors: CompilationError[],
|
||||||
|
sourceCode: string
|
||||||
): CodeMirrorDiagnostic[] {
|
): CodeMirrorDiagnostic[] {
|
||||||
return errors
|
return errors
|
||||||
?.filter((err) => isTopLevelModule(err.sourceRange))
|
?.filter((err) => isTopLevelModule(err.sourceRange))
|
||||||
@ -397,8 +436,8 @@ export function compilationErrorsToDiagnostics(
|
|||||||
apply: (view: EditorView, from: number, to: number) => {
|
apply: (view: EditorView, from: number, to: number) => {
|
||||||
view.dispatch({
|
view.dispatch({
|
||||||
changes: {
|
changes: {
|
||||||
from: suggestion.source_range[0],
|
from: toUtf16(suggestion.source_range[0], sourceCode),
|
||||||
to: suggestion.source_range[1],
|
to: toUtf16(suggestion.source_range[1], sourceCode),
|
||||||
insert: suggestion.insert,
|
insert: suggestion.insert,
|
||||||
},
|
},
|
||||||
annotations: [lspCodeActionEvent],
|
annotations: [lspCodeActionEvent],
|
||||||
@ -408,8 +447,8 @@ export function compilationErrorsToDiagnostics(
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
from: err.sourceRange[0],
|
from: toUtf16(err.sourceRange[0], sourceCode),
|
||||||
to: err.sourceRange[1],
|
to: toUtf16(err.sourceRange[1], sourceCode),
|
||||||
message: err.message,
|
message: err.message,
|
||||||
severity,
|
severity,
|
||||||
actions,
|
actions,
|
||||||
|
Reference in New Issue
Block a user