mirror of
https://github.com/apple/pkl.git
synced 2026-04-14 12:39:44 +02:00
Fix Lexer EOF sentinel collision with valid Unicode code points (#1251)
Fixes an issue where sentinel value (U+7FFF) occurring literally in the source could cause a premature termination of parsing, leading to potential EOF injection attacks. --------- Co-authored-by: Dan Chao <dan.chao@apple.com>
This commit is contained in:
committed by
GitHub
parent
a8f76d6209
commit
fdb2bd8c75
@@ -30,7 +30,7 @@ public class Lexer {
|
||||
private int sLine = 1;
|
||||
private int col = 1;
|
||||
private int sCol = 1;
|
||||
private char lookahead;
|
||||
private int lookahead;
|
||||
private State state = State.DEFAULT;
|
||||
private final Deque<InterpolationScope> interpolationStack = new ArrayDeque<>();
|
||||
private boolean stringEnded = false;
|
||||
@@ -38,7 +38,7 @@ public class Lexer {
|
||||
// how many newlines exist between two subsequent tokens
|
||||
protected int newLinesBetween = 0;
|
||||
|
||||
private static final char EOF = Short.MAX_VALUE;
|
||||
private static final int EOF = -1;
|
||||
|
||||
public Lexer(String input) {
|
||||
source = input.toCharArray();
|
||||
@@ -248,7 +248,7 @@ public class Lexer {
|
||||
yield lexNumber(ch);
|
||||
} else if (isIdentifierStart(ch)) {
|
||||
yield lexIdentifier();
|
||||
} else throw lexError(ErrorMessages.create("invalidCharacter", ch), cursor - 1, 1);
|
||||
} else throw lexError(ErrorMessages.create("invalidCharacter", (char) ch), cursor - 1, 1);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -450,7 +450,7 @@ public class Lexer {
|
||||
case 'u' -> lexUnicodeEscape();
|
||||
default ->
|
||||
throw lexError(
|
||||
ErrorMessages.create("invalidCharacterEscapeSequence", "\\" + ch, "\\"),
|
||||
ErrorMessages.create("invalidCharacterEscapeSequence", "\\" + (char) ch, "\\"),
|
||||
cursor - 2,
|
||||
2);
|
||||
};
|
||||
@@ -513,7 +513,7 @@ public class Lexer {
|
||||
}
|
||||
}
|
||||
|
||||
private Token lexNumber(char start) {
|
||||
private Token lexNumber(int start) {
|
||||
if (start == '0') {
|
||||
if (lookahead == 'x' || lookahead == 'X') {
|
||||
nextChar();
|
||||
@@ -626,9 +626,9 @@ public class Lexer {
|
||||
if (lookahead == '_') {
|
||||
throw lexError("invalidSeparatorPosition");
|
||||
}
|
||||
var ch = (int) lookahead;
|
||||
var ch = lookahead;
|
||||
if (!(ch >= 48 && ch <= 55)) {
|
||||
throw unexpectedChar((char) ch, "octal number");
|
||||
throw unexpectedChar(ch, "octal number");
|
||||
}
|
||||
while ((ch >= 48 && ch <= 55) || ch == '_') {
|
||||
nextChar();
|
||||
@@ -671,20 +671,19 @@ public class Lexer {
|
||||
return Token.SHEBANG;
|
||||
}
|
||||
|
||||
private boolean isHex(char ch) {
|
||||
var code = (int) ch;
|
||||
private boolean isHex(int code) {
|
||||
return (code >= 48 && code <= 57) || (code >= 97 && code <= 102) || (code >= 65 && code <= 70);
|
||||
}
|
||||
|
||||
private static boolean isIdentifierStart(char c) {
|
||||
private static boolean isIdentifierStart(int c) {
|
||||
return c == '_' || c == '$' || Character.isUnicodeIdentifierStart(c);
|
||||
}
|
||||
|
||||
private static boolean isIdentifierPart(char c) {
|
||||
private static boolean isIdentifierPart(int c) {
|
||||
return c != EOF && (c == '$' || Character.isUnicodeIdentifierPart(c));
|
||||
}
|
||||
|
||||
private char nextChar() {
|
||||
private int nextChar() {
|
||||
var tmp = lookahead;
|
||||
cursor++;
|
||||
if (cursor >= size) {
|
||||
@@ -726,11 +725,11 @@ public class Lexer {
|
||||
return new ParserError(msg, span);
|
||||
}
|
||||
|
||||
private ParserError unexpectedChar(char got, String didYouMean) {
|
||||
private ParserError unexpectedChar(int got, String didYouMean) {
|
||||
if (got == EOF) {
|
||||
return unexpectedChar("EOF", didYouMean);
|
||||
}
|
||||
return lexError("unexpectedCharacter", got, didYouMean);
|
||||
return lexError("unexpectedCharacter", (char) got, didYouMean);
|
||||
}
|
||||
|
||||
private ParserError unexpectedChar(String got, String didYouMean) {
|
||||
|
||||
Reference in New Issue
Block a user