Fix CRLF handling in line continuation escapes (#1564)

This commit is contained in:
Kushal Pisavadia
2026-04-29 21:53:55 +01:00
committed by GitHub
parent 39c01c24ba
commit d3a3a14aaa
2 changed files with 58 additions and 2 deletions
@@ -461,11 +461,17 @@ public final class Lexer {
}
case 'u' -> lexUnicodeEscape();
case '\n' -> Token.STRING_ESCAPE_CONTINUATION;
case '\r' -> {
if (lookahead == '\n') {
nextChar();
}
yield Token.STRING_ESCAPE_CONTINUATION;
}
case ' ', '\t' -> {
var c = cursor;
var next = nextChar();
while (next == ' ' || next == '\t') next = nextChar();
if (next == '\n')
if (next == '\n' || next == '\r')
throw lexError(
ErrorMessages.create("invalidLineContinuationEscapeSequenceWhitespace"),
c - 2,
@@ -1,5 +1,5 @@
/*
* Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved.
* Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -63,6 +63,56 @@ class LexerTest {
assertThat(thrown).hasMessageContaining("Invalid identifier")
}
@Test
fun lineContinuationWithCRLF() {
// \r\n line endings must be handled the same as \n for line continuations
val input = "x = \"\"\"\n hello \\\r\n world\r\n \"\"\""
val lexer = Lexer(input)
assertThat(lexer.next()).isEqualTo(Token.IDENTIFIER) // x
assertThat(lexer.next()).isEqualTo(Token.ASSIGN)
assertThat(lexer.next()).isEqualTo(Token.STRING_MULTI_START) // """
assertThat(lexer.next()).isEqualTo(Token.STRING_NEWLINE)
assertThat(lexer.next()).isEqualTo(Token.STRING_PART) // " hello "
assertThat(lexer.next()).isEqualTo(Token.STRING_ESCAPE_CONTINUATION) // \<CRLF> consumed
assertThat(lexer.next()).isEqualTo(Token.STRING_PART) // " world"
assertThat(lexer.next()).isEqualTo(Token.STRING_NEWLINE)
assertThat(lexer.next()).isEqualTo(Token.STRING_PART) // " "
assertThat(lexer.next()).isEqualTo(Token.STRING_END) // """
assertThat(lexer.next()).isEqualTo(Token.EOF)
}
@Test
fun lineContinuationWithCR() {
// bare \r should also work as a line continuation
val input = "x = \"\"\"\n hello \\\r world\n \"\"\""
val lexer = Lexer(input)
assertThat(lexer.next()).isEqualTo(Token.IDENTIFIER)
assertThat(lexer.next()).isEqualTo(Token.ASSIGN)
assertThat(lexer.next()).isEqualTo(Token.STRING_MULTI_START)
assertThat(lexer.next()).isEqualTo(Token.STRING_NEWLINE)
assertThat(lexer.next()).isEqualTo(Token.STRING_PART)
assertThat(lexer.next()).isEqualTo(Token.STRING_ESCAPE_CONTINUATION) // \<CR> consumed
assertThat(lexer.next()).isEqualTo(Token.STRING_PART)
assertThat(lexer.next()).isEqualTo(Token.STRING_NEWLINE)
assertThat(lexer.next()).isEqualTo(Token.STRING_PART)
assertThat(lexer.next()).isEqualTo(Token.STRING_END)
assertThat(lexer.next()).isEqualTo(Token.EOF)
}
@Test
fun lineContinuationWhitespaceErrorWithCRLF() {
// whitespace between \ and \r\n should give the same error as \ and \n
val input = "x = \"\"\"\n hello \\ \r\n world\n \"\"\""
val thrown =
assertThrows<ParserError> {
val lexer = Lexer(input)
while (lexer.next() != Token.EOF) {
/* consume all tokens */
}
}
assertThat(thrown.message).contains("Whitespace")
}
@Test
fun acceptsAllUnicodeCodepointsInComments() {
// Test valid Unicode codepoints can appear literally