mirror of
https://github.com/apple/pkl.git
synced 2026-06-11 00:02:47 +02:00
Fix errors around strings with lone high or low surrogates (#1673)
This fixes some errors around treatment of strings with lone high/low surrogates.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2024-2025 Apple Inc. and the Pkl project authors. All rights reserved.
|
||||
* Copyright © 2024-2026 Apple Inc. and the Pkl project authors. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
@@ -44,7 +44,9 @@ public abstract class SubscriptNode extends BinaryExpressionNode {
|
||||
.build();
|
||||
}
|
||||
|
||||
if (Character.isHighSurrogate(receiver.charAt(charIndex))) {
|
||||
if (Character.isHighSurrogate(receiver.charAt(charIndex))
|
||||
&& charIndex < receiver.length() - 1
|
||||
&& Character.isLowSurrogate(receiver.charAt(charIndex + 1))) {
|
||||
return receiver.substring(charIndex, charIndex + 2);
|
||||
}
|
||||
return receiver.substring(charIndex, charIndex + 1);
|
||||
|
||||
@@ -675,13 +675,14 @@ public final class VmUtils {
|
||||
var charOffset = startIndex;
|
||||
|
||||
while (charOffset < length && codePointOffset > 0) {
|
||||
if (Character.isHighSurrogate(string.charAt(charOffset++))
|
||||
var ch = string.charAt(charOffset);
|
||||
charOffset++;
|
||||
if (Character.isHighSurrogate(ch)
|
||||
&& charOffset < length
|
||||
&& !Character.isLowSurrogate(string.charAt(charOffset++))) {
|
||||
codePointOffset -= 2;
|
||||
} else {
|
||||
codePointOffset -= 1;
|
||||
&& Character.isLowSurrogate(string.charAt(charOffset))) {
|
||||
charOffset++;
|
||||
}
|
||||
codePointOffset--;
|
||||
}
|
||||
|
||||
return codePointOffset != 0 ? -1 : charOffset;
|
||||
@@ -692,13 +693,14 @@ public final class VmUtils {
|
||||
var charOffset = string.length();
|
||||
|
||||
while (charOffset > 0 && codePointOffset > 0) {
|
||||
if (Character.isLowSurrogate(string.charAt(--charOffset))
|
||||
charOffset--;
|
||||
char ch = string.charAt(charOffset);
|
||||
if (Character.isLowSurrogate(ch)
|
||||
&& charOffset > 0
|
||||
&& !Character.isHighSurrogate(string.charAt(--charOffset))) {
|
||||
codePointOffset -= 2;
|
||||
} else {
|
||||
codePointOffset -= 1;
|
||||
&& Character.isHighSurrogate(string.charAt(charOffset - 1))) {
|
||||
charOffset--;
|
||||
}
|
||||
codePointOffset--;
|
||||
}
|
||||
|
||||
return codePointOffset != 0 ? -1 : charOffset;
|
||||
|
||||
@@ -220,7 +220,9 @@ public final class StringNodes {
|
||||
var charIndex = VmUtils.codePointOffsetToCharOffset(self, index);
|
||||
if (charIndex == -1 || charIndex == self.length()) return VmNull.withoutDefault();
|
||||
|
||||
if (Character.isHighSurrogate(self.charAt(charIndex))) {
|
||||
if (Character.isHighSurrogate(self.charAt(charIndex))
|
||||
&& charIndex < self.length() - 1
|
||||
&& Character.isLowSurrogate(self.charAt(charIndex + 1))) {
|
||||
return self.substring(charIndex, charIndex + 2);
|
||||
}
|
||||
return self.substring(charIndex, charIndex + 1);
|
||||
|
||||
@@ -415,6 +415,7 @@ examples {
|
||||
str1.substring(2, 3)
|
||||
str1.substring(2, 4)
|
||||
str1.substring(0, 7)
|
||||
"\u{D800}hello".substring(0, 2)
|
||||
module.catch(() -> str1.substring(-1, 4))
|
||||
module.catch(() -> str1.substring(1, 8))
|
||||
module.catch(() -> str1.substring(3, 2))
|
||||
@@ -436,6 +437,15 @@ examples {
|
||||
str1.getOrNull(3)
|
||||
str1.getOrNull(6)
|
||||
str1.getOrNull(7)
|
||||
"🏀".getOrNull(0)
|
||||
"🏀".getOrNull(1)
|
||||
"\u{D800}".getOrNull(0)
|
||||
"\u{D800}".getOrNull(1)
|
||||
"\u{D800}h".getOrNull(0)
|
||||
"\u{D800}h".getOrNull(1)
|
||||
"\u{D800}h".getOrNull(2)
|
||||
"h\u{D800}".getOrNull(0)
|
||||
"h\u{D800}".getOrNull(1)
|
||||
}
|
||||
|
||||
["toCodePoints()"] {
|
||||
|
||||
@@ -59,6 +59,13 @@ examples {
|
||||
str3[2]
|
||||
module.catch(() -> str3[-1])
|
||||
module.catch(() -> str3[4])
|
||||
|
||||
"🏀"[0]
|
||||
"\u{D800}"[0]
|
||||
"\u{D800}h"[0]
|
||||
"\u{D800}h"[1]
|
||||
"h\u{D800}"[0]
|
||||
"h\u{D800}"[1]
|
||||
}
|
||||
|
||||
["dollar sign has no special meaning"] {
|
||||
|
||||
@@ -353,6 +353,7 @@ examples {
|
||||
"c"
|
||||
"cd"
|
||||
"abcdefg"
|
||||
"?h"
|
||||
"Character index `-1` is out of range `0`..`7`. String: \"abcdefg\""
|
||||
"Character index `8` is out of range `1`..`7`. String: \"abcdefg\""
|
||||
"Character index `2` is out of range `3`..`7`. String: \"abcdefg\""
|
||||
@@ -372,6 +373,15 @@ examples {
|
||||
"d"
|
||||
"g"
|
||||
null
|
||||
"🏀"
|
||||
null
|
||||
"?"
|
||||
null
|
||||
"?"
|
||||
"h"
|
||||
null
|
||||
"h"
|
||||
"?"
|
||||
}
|
||||
["toCodePoints()"] {
|
||||
List(97, 98, 99, 100, 101, 102, 103)
|
||||
|
||||
@@ -51,6 +51,12 @@ examples {
|
||||
"i"
|
||||
"Character index `-1` is out of range `0`..`3`. String: \"this\""
|
||||
"Character index `4` is out of range `0`..`3`. String: \"this\""
|
||||
"🏀"
|
||||
"?"
|
||||
"?"
|
||||
"h"
|
||||
"h"
|
||||
"?"
|
||||
}
|
||||
["dollar sign has no special meaning"] {
|
||||
"123$"
|
||||
|
||||
Reference in New Issue
Block a user