From d3a3a14aaa079e3f972bee09ce03b248081d82b8 Mon Sep 17 00:00:00 2001 From: Kushal Pisavadia Date: Wed, 29 Apr 2026 21:53:55 +0100 Subject: [PATCH] Fix CRLF handling in line continuation escapes (#1564) --- .../src/main/java/org/pkl/parser/Lexer.java | 8 ++- .../test/kotlin/org/pkl/parser/LexerTest.kt | 52 ++++++++++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/pkl-parser/src/main/java/org/pkl/parser/Lexer.java b/pkl-parser/src/main/java/org/pkl/parser/Lexer.java index 8fe7fe88..b67c3094 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/Lexer.java +++ b/pkl-parser/src/main/java/org/pkl/parser/Lexer.java @@ -461,11 +461,17 @@ public final class Lexer { } case 'u' -> lexUnicodeEscape(); case '\n' -> Token.STRING_ESCAPE_CONTINUATION; + case '\r' -> { + if (lookahead == '\n') { + nextChar(); + } + yield Token.STRING_ESCAPE_CONTINUATION; + } case ' ', '\t' -> { var c = cursor; var next = nextChar(); while (next == ' ' || next == '\t') next = nextChar(); - if (next == '\n') + if (next == '\n' || next == '\r') throw lexError( ErrorMessages.create("invalidLineContinuationEscapeSequenceWhitespace"), c - 2, diff --git a/pkl-parser/src/test/kotlin/org/pkl/parser/LexerTest.kt b/pkl-parser/src/test/kotlin/org/pkl/parser/LexerTest.kt index c9c19841..f416d194 100644 --- a/pkl-parser/src/test/kotlin/org/pkl/parser/LexerTest.kt +++ b/pkl-parser/src/test/kotlin/org/pkl/parser/LexerTest.kt @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,6 +63,56 @@ class LexerTest { assertThat(thrown).hasMessageContaining("Invalid identifier") } + @Test + fun lineContinuationWithCRLF() { + // \r\n line endings must be handled the same as \n for line continuations + val input = "x = \"\"\"\n hello \\\r\n world\r\n \"\"\"" + val lexer = Lexer(input) + assertThat(lexer.next()).isEqualTo(Token.IDENTIFIER) // x + assertThat(lexer.next()).isEqualTo(Token.ASSIGN) + assertThat(lexer.next()).isEqualTo(Token.STRING_MULTI_START) // """ + assertThat(lexer.next()).isEqualTo(Token.STRING_NEWLINE) + assertThat(lexer.next()).isEqualTo(Token.STRING_PART) // " hello " + assertThat(lexer.next()).isEqualTo(Token.STRING_ESCAPE_CONTINUATION) // \ consumed + assertThat(lexer.next()).isEqualTo(Token.STRING_PART) // " world" + assertThat(lexer.next()).isEqualTo(Token.STRING_NEWLINE) + assertThat(lexer.next()).isEqualTo(Token.STRING_PART) // " " + assertThat(lexer.next()).isEqualTo(Token.STRING_END) // """ + assertThat(lexer.next()).isEqualTo(Token.EOF) + } + + @Test + fun lineContinuationWithCR() { + // bare \r should also work as a line continuation + val input = "x = \"\"\"\n hello \\\r world\n \"\"\"" + val lexer = Lexer(input) + assertThat(lexer.next()).isEqualTo(Token.IDENTIFIER) + assertThat(lexer.next()).isEqualTo(Token.ASSIGN) + assertThat(lexer.next()).isEqualTo(Token.STRING_MULTI_START) + assertThat(lexer.next()).isEqualTo(Token.STRING_NEWLINE) + assertThat(lexer.next()).isEqualTo(Token.STRING_PART) + assertThat(lexer.next()).isEqualTo(Token.STRING_ESCAPE_CONTINUATION) // \ consumed + assertThat(lexer.next()).isEqualTo(Token.STRING_PART) + assertThat(lexer.next()).isEqualTo(Token.STRING_NEWLINE) + assertThat(lexer.next()).isEqualTo(Token.STRING_PART) + assertThat(lexer.next()).isEqualTo(Token.STRING_END) + assertThat(lexer.next()).isEqualTo(Token.EOF) + } + + @Test + fun lineContinuationWhitespaceErrorWithCRLF() { + // whitespace between \ and \r\n should give the same error as \ and \n + val input = "x = \"\"\"\n hello \\ \r\n world\n \"\"\"" + val thrown = + assertThrows { + val lexer = Lexer(input) + while (lexer.next() != Token.EOF) { + /* consume all tokens */ + } + } + assertThat(thrown.message).contains("Whitespace") + } + @Test fun acceptsAllUnicodeCodepointsInComments() { // Test valid Unicode codepoints can appear literally