diff --git a/pkl-core/src/main/java/org/pkl/core/ast/expression/binary/SubscriptNode.java b/pkl-core/src/main/java/org/pkl/core/ast/expression/binary/SubscriptNode.java index 204ebcb69..87dcf2fba 100644 --- a/pkl-core/src/main/java/org/pkl/core/ast/expression/binary/SubscriptNode.java +++ b/pkl-core/src/main/java/org/pkl/core/ast/expression/binary/SubscriptNode.java @@ -1,5 +1,5 @@ /* - * Copyright © 2024-2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2024-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,7 +44,9 @@ public abstract class SubscriptNode extends BinaryExpressionNode { .build(); } - if (Character.isHighSurrogate(receiver.charAt(charIndex))) { + if (Character.isHighSurrogate(receiver.charAt(charIndex)) + && charIndex < receiver.length() - 1 + && Character.isLowSurrogate(receiver.charAt(charIndex + 1))) { return receiver.substring(charIndex, charIndex + 2); } return receiver.substring(charIndex, charIndex + 1); diff --git a/pkl-core/src/main/java/org/pkl/core/runtime/VmUtils.java b/pkl-core/src/main/java/org/pkl/core/runtime/VmUtils.java index 4649bb4e9..155cd5fad 100644 --- a/pkl-core/src/main/java/org/pkl/core/runtime/VmUtils.java +++ b/pkl-core/src/main/java/org/pkl/core/runtime/VmUtils.java @@ -675,13 +675,14 @@ public final class VmUtils { var charOffset = startIndex; while (charOffset < length && codePointOffset > 0) { - if (Character.isHighSurrogate(string.charAt(charOffset++)) + var ch = string.charAt(charOffset); + charOffset++; + if (Character.isHighSurrogate(ch) && charOffset < length - && !Character.isLowSurrogate(string.charAt(charOffset++))) { - codePointOffset -= 2; - } else { - codePointOffset -= 1; + && Character.isLowSurrogate(string.charAt(charOffset))) { + charOffset++; } + codePointOffset--; } return codePointOffset != 0 ? -1 : charOffset; @@ -692,13 +693,14 @@ public final class VmUtils { var charOffset = string.length(); while (charOffset > 0 && codePointOffset > 0) { - if (Character.isLowSurrogate(string.charAt(--charOffset)) + charOffset--; + char ch = string.charAt(charOffset); + if (Character.isLowSurrogate(ch) && charOffset > 0 - && !Character.isHighSurrogate(string.charAt(--charOffset))) { - codePointOffset -= 2; - } else { - codePointOffset -= 1; + && Character.isHighSurrogate(string.charAt(charOffset - 1))) { + charOffset--; } + codePointOffset--; } return codePointOffset != 0 ? -1 : charOffset; diff --git a/pkl-core/src/main/java/org/pkl/core/stdlib/base/StringNodes.java b/pkl-core/src/main/java/org/pkl/core/stdlib/base/StringNodes.java index cf8d0976a..c21cb1387 100644 --- a/pkl-core/src/main/java/org/pkl/core/stdlib/base/StringNodes.java +++ b/pkl-core/src/main/java/org/pkl/core/stdlib/base/StringNodes.java @@ -220,7 +220,9 @@ public final class StringNodes { var charIndex = VmUtils.codePointOffsetToCharOffset(self, index); if (charIndex == -1 || charIndex == self.length()) return VmNull.withoutDefault(); - if (Character.isHighSurrogate(self.charAt(charIndex))) { + if (Character.isHighSurrogate(self.charAt(charIndex)) + && charIndex < self.length() - 1 + && Character.isLowSurrogate(self.charAt(charIndex + 1))) { return self.substring(charIndex, charIndex + 2); } return self.substring(charIndex, charIndex + 1); diff --git a/pkl-core/src/test/files/LanguageSnippetTests/input/api/string.pkl b/pkl-core/src/test/files/LanguageSnippetTests/input/api/string.pkl index afb766426..6c1228694 100644 --- a/pkl-core/src/test/files/LanguageSnippetTests/input/api/string.pkl +++ b/pkl-core/src/test/files/LanguageSnippetTests/input/api/string.pkl @@ -415,6 +415,7 @@ examples { str1.substring(2, 3) str1.substring(2, 4) str1.substring(0, 7) + "\u{D800}hello".substring(0, 2) module.catch(() -> str1.substring(-1, 4)) module.catch(() -> str1.substring(1, 8)) module.catch(() -> str1.substring(3, 2)) @@ -436,6 +437,15 @@ examples { str1.getOrNull(3) str1.getOrNull(6) str1.getOrNull(7) + "🏀".getOrNull(0) + "🏀".getOrNull(1) + "\u{D800}".getOrNull(0) + "\u{D800}".getOrNull(1) + "\u{D800}h".getOrNull(0) + "\u{D800}h".getOrNull(1) + "\u{D800}h".getOrNull(2) + "h\u{D800}".getOrNull(0) + "h\u{D800}".getOrNull(1) } ["toCodePoints()"] { diff --git a/pkl-core/src/test/files/LanguageSnippetTests/input/basic/string.pkl b/pkl-core/src/test/files/LanguageSnippetTests/input/basic/string.pkl index 39777b9f5..b1f6ffc57 100644 --- a/pkl-core/src/test/files/LanguageSnippetTests/input/basic/string.pkl +++ b/pkl-core/src/test/files/LanguageSnippetTests/input/basic/string.pkl @@ -59,6 +59,13 @@ examples { str3[2] module.catch(() -> str3[-1]) module.catch(() -> str3[4]) + + "🏀"[0] + "\u{D800}"[0] + "\u{D800}h"[0] + "\u{D800}h"[1] + "h\u{D800}"[0] + "h\u{D800}"[1] } ["dollar sign has no special meaning"] { diff --git a/pkl-core/src/test/files/LanguageSnippetTests/output/api/string.pcf b/pkl-core/src/test/files/LanguageSnippetTests/output/api/string.pcf index f8b5588cd..deb0a782b 100644 --- a/pkl-core/src/test/files/LanguageSnippetTests/output/api/string.pcf +++ b/pkl-core/src/test/files/LanguageSnippetTests/output/api/string.pcf @@ -353,6 +353,7 @@ examples { "c" "cd" "abcdefg" + "?h" "Character index `-1` is out of range `0`..`7`. String: \"abcdefg\"" "Character index `8` is out of range `1`..`7`. String: \"abcdefg\"" "Character index `2` is out of range `3`..`7`. String: \"abcdefg\"" @@ -372,6 +373,15 @@ examples { "d" "g" null + "🏀" + null + "?" + null + "?" + "h" + null + "h" + "?" } ["toCodePoints()"] { List(97, 98, 99, 100, 101, 102, 103) diff --git a/pkl-core/src/test/files/LanguageSnippetTests/output/basic/string.pcf b/pkl-core/src/test/files/LanguageSnippetTests/output/basic/string.pcf index 65e380bd3..ac4e7874f 100644 --- a/pkl-core/src/test/files/LanguageSnippetTests/output/basic/string.pcf +++ b/pkl-core/src/test/files/LanguageSnippetTests/output/basic/string.pcf @@ -51,6 +51,12 @@ examples { "i" "Character index `-1` is out of range `0`..`3`. String: \"this\"" "Character index `4` is out of range `0`..`3`. String: \"this\"" + "🏀" + "?" + "?" + "h" + "h" + "?" } ["dollar sign has no special meaning"] { "123$"