I’m noticing this commit introduces an off-by-one tokenizing error, at the minimum for getting keywords.
The following results in the first character of a keyword being skipped. For example, for “CREATE”, String.fromCharCode(next) => C
, but result from readWord
would give REATE
since input.advance()
was called in between the destructuring of input and calling of readWord
.
// tokensFor
let {next} = input
input.advance()
} else if (isAlpha(next)) {
let word = readWord(input) // 'REATE'
input.acceptToken(d.words[word.toLowerCase()] ?? Identifier)
}
function readWord(input: InputStream) {
let result = ""
for (;;) {
if (input.next != Ch.Underscore && !isAlpha(input.next)) break
result += String.fromCharCode(input.next)
input.advance()
}
return result
}
Previously, we had
let pos = token.start, next = input.get(pos++)
} else if (isAlpha(next)) {
pos = readWord(input, pos) // skip to after end of 'CREATE'
token.accept(d.words[input.read(token.start, pos).toLowerCase()] ?? Identifier, pos) // get word between token.start and pos which is 'CREATE'
}
function readWord(input: Input, pos: number) {
for (;; pos++) {
let next = input.get(pos)
if (next != Ch.Underscore && !isAlpha(next)) break
}
return pos
}