move handling of strings to parser (#962)

This commit is contained in:
Islon Scherer
2025-02-19 17:19:48 +01:00
committed by GitHub
parent ee23a8c3f4
commit 2ffd201172
25 changed files with 408 additions and 543 deletions

View File

@@ -18,18 +18,12 @@ package org.pkl.core.ast.builder;
import com.oracle.truffle.api.source.Source;
import com.oracle.truffle.api.source.SourceSection;
import java.util.List;
import org.pkl.core.PklBugException;
import org.pkl.core.parser.BaseParserVisitor;
import org.pkl.core.parser.Span;
import org.pkl.core.parser.ast.DocComment;
import org.pkl.core.parser.ast.Modifier;
import org.pkl.core.parser.ast.Modifier.ModifierValue;
import org.pkl.core.parser.ast.Node;
import org.pkl.core.parser.ast.StringConstant;
import org.pkl.core.parser.ast.StringConstantPart;
import org.pkl.core.parser.ast.StringConstantPart.ConstantPart;
import org.pkl.core.parser.ast.StringConstantPart.StringEscape;
import org.pkl.core.parser.ast.StringConstantPart.StringUnicodeEscape;
import org.pkl.core.runtime.VmExceptionBuilder;
import org.pkl.core.util.Nullable;
@@ -43,53 +37,6 @@ public abstract class AbstractAstBuilder<T> extends BaseParserVisitor<T> {
this.source = source;
}
protected String doVisitStringConstant(StringConstant expr) {
return doVisitStringConstant(expr.getStrParts().getParts());
}
protected String doVisitStringConstant(List<StringConstantPart> strs) {
var builder = new StringBuilder();
for (var part : strs) {
builder.append(doVisitStringConstantPart(part));
}
return builder.toString();
}
protected String doVisitStringConstantPart(StringConstantPart part) {
if (part instanceof ConstantPart cp) {
return cp.getStr();
}
if (part instanceof StringUnicodeEscape ue) {
var codePoint = parseUnicodeEscapeSequence(ue);
return Character.toString(codePoint);
}
if (part instanceof StringEscape se) {
return switch (se.getType()) {
case NEWLINE -> "\n";
case QUOTE -> "\"";
case BACKSLASH -> "\\";
case TAB -> "\t";
case RETURN -> "\r";
};
}
throw PklBugException.unreachableCode();
}
protected int parseUnicodeEscapeSequence(StringUnicodeEscape escape) {
var text = escape.getEscape();
var lastIndex = text.length() - 1;
var startIndex = text.indexOf('{', 2);
assert startIndex != -1; // guaranteed by lexer
try {
return Integer.parseInt(text.substring(startIndex + 1, lastIndex), 16);
} catch (NumberFormatException e) {
throw exceptionBuilder()
.evalError("invalidUnicodeEscapeSequence", text, text.substring(0, startIndex))
.withSourceSection(createSourceSection(escape))
.build();
}
}
protected final @Nullable SourceSection createSourceSection(@Nullable Node node) {
return node == null
? null
@@ -133,16 +80,4 @@ public abstract class AbstractAstBuilder<T> extends BaseParserVisitor<T> {
protected static SourceSection createSourceSection(Source source, Span span) {
return source.createSection(span.charIndex(), span.length());
}
protected SourceSection startOf(Node node) {
return startOf(node.span());
}
protected SourceSection startOf(Span span) {
return source.createSection(span.charIndex(), 1);
}
protected SourceSection shrinkLeft(SourceSection section, int length) {
return source.createSection(section.getCharIndex() + length, section.getCharLength() - length);
}
}

View File

@@ -215,13 +215,8 @@ import org.pkl.core.parser.ast.Parameter.TypedIdentifier;
import org.pkl.core.parser.ast.ParameterList;
import org.pkl.core.parser.ast.QualifiedIdentifier;
import org.pkl.core.parser.ast.StringConstant;
import org.pkl.core.parser.ast.StringConstantPart;
import org.pkl.core.parser.ast.StringConstantPart.ConstantPart;
import org.pkl.core.parser.ast.StringConstantPart.StringEscape;
import org.pkl.core.parser.ast.StringConstantPart.StringNewline;
import org.pkl.core.parser.ast.StringConstantPart.StringUnicodeEscape;
import org.pkl.core.parser.ast.StringPart;
import org.pkl.core.parser.ast.StringPart.StringConstantParts;
import org.pkl.core.parser.ast.StringPart.StringChars;
import org.pkl.core.parser.ast.StringPart.StringInterpolation;
import org.pkl.core.parser.ast.Type;
import org.pkl.core.parser.ast.Type.ConstrainedType;
@@ -353,7 +348,7 @@ public class AstBuilder extends AbstractAstBuilder<Object> {
@Override
public UnresolvedTypeNode visitStringConstantType(StringConstantType type) {
return new UnresolvedTypeNode.StringLiteral(
createSourceSection(type), doVisitStringConstant(type.getStr()));
createSourceSection(type), type.getStr().getString());
}
@Override
@@ -428,7 +423,7 @@ public class AstBuilder extends AbstractAstBuilder<Object> {
createSourceSection(type),
defaultIndex,
elementTypes.stream()
.map(it -> doVisitStringConstant(((StringConstantType) it).getStr()))
.map(it -> ((StringConstantType) it).getStr().getString())
.collect(Collectors.toCollection(LinkedHashSet::new)));
}
@@ -609,7 +604,7 @@ public class AstBuilder extends AbstractAstBuilder<Object> {
private AbstractImportNode doVisitImport(
boolean isGlobImport, Node node, StringConstant importUriNode) {
var section = createSourceSection(node);
var importUri = doVisitStringConstant(importUriNode);
var importUri = importUriNode.getString();
if (isGlobImport && importUri.startsWith("...")) {
throw exceptionBuilder().evalError("cannotGlobTripleDots").withSourceSection(section).build();
}
@@ -670,7 +665,7 @@ public class AstBuilder extends AbstractAstBuilder<Object> {
@Override
public ExpressionNode visitStringConstant(StringConstant expr) {
return new ConstantValueNode(createSourceSection(expr), doVisitStringConstant(expr));
return new ConstantValueNode(createSourceSection(expr), expr.getString());
}
@Override
@@ -682,12 +677,8 @@ public class AstBuilder extends AbstractAstBuilder<Object> {
if (spart instanceof StringInterpolation si) {
return ToStringNodeGen.create(createSourceSection(span), visitExpr(si.getExpr()));
}
if (spart instanceof StringConstantParts sparts) {
var builder = new StringBuilder();
for (var part : sparts.getParts()) {
builder.append(doVisitStringConstantPart(part));
}
return new ConstantValueNode(createSourceSection(span), builder.toString());
if (spart instanceof StringChars sparts) {
return new ConstantValueNode(createSourceSection(span), sparts.getString());
}
throw exceptionBuilder().unreachableCode().build();
}
@@ -712,116 +703,18 @@ public class AstBuilder extends AbstractAstBuilder<Object> {
@Override
public ExpressionNode visitMultiLineStringLiteralExpr(MultiLineStringLiteralExpr expr) {
var parts = expr.getParts();
if (parts.isEmpty()) {
throw exceptionBuilder()
.evalError("stringContentMustBeginOnNewLine")
.withSourceSection(createSourceSection(expr))
.build();
}
var firstPart = parts.get(0);
var newLineStart =
firstPart instanceof StringConstantParts str
&& str.getParts().get(0) instanceof StringNewline;
if (!newLineStart) {
throw exceptionBuilder()
.evalError("stringContentMustBeginOnNewLine")
.withSourceSection(startOf(firstPart))
.build();
}
var lastPart = parts.get(parts.size() - 1);
var commonIndent = getCommonIndent(lastPart, expr.getEndDelimiterSpan());
if (parts.size() == 1) {
StringConstantParts sc = (StringConstantParts) firstPart;
return new ConstantValueNode(
createSourceSection(expr),
doVisitMultiLineStringParts(sc.getParts(), commonIndent, true, true));
return doVisitStringPart(parts.get(0), expr.span());
}
var nodes = new ExpressionNode[parts.size()];
var lastIndex = nodes.length - 1;
for (int i = 0; i <= lastIndex; i++) {
nodes[i] = doVisitMultiLineStringPart(parts.get(i), commonIndent, i == 0, i == lastIndex);
for (int i = 0; i < nodes.length; i++) {
nodes[i] = visitStringPart(parts.get(i));
}
return new InterpolatedStringLiteralNode(createSourceSection(expr), nodes);
}
public ExpressionNode doVisitMultiLineStringPart(
StringPart spart, String commonIndent, boolean isStringStart, boolean isStringEnd) {
if (spart instanceof StringInterpolation si) {
return ToStringNodeGen.create(createSourceSection(si), visitExpr(si.getExpr()));
}
if (spart instanceof StringConstantParts sparts) {
return new ConstantValueNode(
createSourceSection(spart),
doVisitMultiLineStringParts(sparts.getParts(), commonIndent, isStringStart, isStringEnd));
}
throw PklBugException.unreachableCode();
}
private String doVisitMultiLineStringParts(
List<StringConstantPart> parts,
String commonIndent,
boolean isStringStart,
boolean isStringEnd) {
var starIndex = isStringStart ? 1 : 0;
var endIndex = parts.size() - 1;
if (isStringEnd) {
if (parts.get(endIndex) instanceof StringNewline) {
// skip trailing newline token
endIndex -= 1;
} else {
// skip trailing newline and whitespace (common indent) tokens
endIndex -= 2;
}
}
var builder = new StringBuilder();
var isLineStart = isStringStart;
for (var i = starIndex; i <= endIndex; i++) {
var part = parts.get(i);
if (part instanceof StringNewline) {
builder.append('\n');
isLineStart = true;
} else if (part instanceof ConstantPart cp) {
var text = cp.getStr();
if (isLineStart) {
if (text.startsWith(commonIndent)) {
builder.append(text, commonIndent.length(), text.length());
} else {
String actualIndent = getLeadingIndent(text);
if (actualIndent.length() > commonIndent.length()) {
actualIndent = actualIndent.substring(0, commonIndent.length());
}
throw exceptionBuilder()
.evalError("stringIndentationMustMatchLastLine")
.withSourceSection(shrinkLeft(createSourceSection(cp), actualIndent.length()))
.build();
}
} else {
builder.append(text);
}
isLineStart = false;
} else if (part instanceof StringEscape || part instanceof StringUnicodeEscape) {
if (isLineStart && !commonIndent.isEmpty()) {
throw exceptionBuilder()
.evalError("stringIndentationMustMatchLastLine")
.withSourceSection(createSourceSection(part))
.build();
}
builder.append(doVisitStringConstantPart(part));
isLineStart = false;
} else {
throw PklBugException.unreachableCode();
}
}
return builder.toString();
}
@Override
public ExpressionNode visitNewExpr(NewExpr expr) {
var type = expr.getType();
@@ -2827,48 +2720,6 @@ public class AstBuilder extends AbstractAstBuilder<Object> {
scope.getConstDepth());
}
private String getCommonIndent(Node lastParts, Span endQuoteSpan) {
if (!(lastParts instanceof StringConstantParts sparts)) {
throw exceptionBuilder()
.evalError("closingStringDelimiterMustBeginOnNewLine")
.withSourceSection(startOf(endQuoteSpan))
.build();
}
var parts = sparts.getParts();
assert !parts.isEmpty();
var lastPart = parts.get(parts.size() - 1);
if (lastPart instanceof StringNewline) {
return "";
}
if (parts.size() > 1) {
var lastButOne = parts.get(parts.size() - 2);
if (lastButOne instanceof StringNewline && isIndentChars(lastPart)) {
return ((ConstantPart) lastPart).getStr();
}
}
throw exceptionBuilder()
.evalError("closingStringDelimiterMustBeginOnNewLine")
.withSourceSection(startOf(endQuoteSpan))
.build();
}
private static boolean isIndentChars(Node node) {
if (!(node instanceof ConstantPart part)) {
return false;
}
var text = part.getStr();
for (var i = 0; i < text.length(); i++) {
var ch = text.charAt(i);
if (ch != ' ' && ch != '\t') return false;
}
return true;
}
private URI resolveImport(String importUri, StringConstant ctx) {
URI parsedUri;
try {
@@ -2950,15 +2801,4 @@ public class AstBuilder extends AbstractAstBuilder<Object> {
private static SourceSection unavailableSourceSection() {
return VmUtils.unavailableSourceSection();
}
private static String getLeadingIndent(String text) {
for (var i = 0; i < text.length(); i++) {
var ch = text.charAt(i);
if (ch != ' ' && ch != '\t') {
return text.substring(0, i);
}
}
return text;
}
}

View File

@@ -34,7 +34,7 @@ import org.pkl.core.parser.ast.Expr.SingleLineStringLiteralExpr;
import org.pkl.core.parser.ast.ExtendsOrAmendsClause;
import org.pkl.core.parser.ast.ExtendsOrAmendsClause.Type;
import org.pkl.core.parser.ast.ImportClause;
import org.pkl.core.parser.ast.StringPart.StringConstantParts;
import org.pkl.core.parser.ast.StringPart.StringChars;
import org.pkl.core.runtime.VmExceptionBuilder;
import org.pkl.core.runtime.VmUtils;
import org.pkl.core.util.IoUtils;
@@ -63,8 +63,8 @@ public class ImportsAndReadsParser extends AbstractAstBuilder<@Nullable List<Ent
SourceSection sourceSection) {}
/** Parses a module, and collects all imports and reads. */
public static @Nullable List<Entry> parse(
ModuleKey moduleKey, ResolvedModuleKey resolvedModuleKey) throws IOException {
public static List<Entry> parse(ModuleKey moduleKey, ResolvedModuleKey resolvedModuleKey)
throws IOException {
var parser = new Parser();
var text = resolvedModuleKey.loadSource();
var source = VmUtils.createSource(moduleKey, text);
@@ -88,9 +88,8 @@ public class ImportsAndReadsParser extends AbstractAstBuilder<@Nullable List<Ent
@Override
public @Nullable List<Entry> visitExtendsOrAmendsClause(ExtendsOrAmendsClause decl) {
var importStr = doVisitStringConstant(decl.getUrl());
var importStr = decl.getUrl().getString();
var sourceSection = createSourceSection(decl.getUrl());
assert sourceSection != null;
return Collections.singletonList(
new Entry(
true,
@@ -103,18 +102,16 @@ public class ImportsAndReadsParser extends AbstractAstBuilder<@Nullable List<Ent
@Override
public List<Entry> visitImportClause(ImportClause imp) {
var importStr = doVisitStringConstant(imp.getImportStr());
var importStr = imp.getImportStr().getString();
var sourceSection = createSourceSection(imp.getImportStr());
assert sourceSection != null;
return Collections.singletonList(
new Entry(true, imp.isGlob(), false, false, importStr, sourceSection));
}
@Override
public List<Entry> visitImportExpr(ImportExpr expr) {
var importStr = doVisitStringConstant(expr.getImportStr());
var importStr = expr.getImportStr().getString();
var sourceSection = createSourceSection(expr.getImportStr());
assert sourceSection != null;
return Collections.singletonList(
new Entry(true, expr.isGlob(), false, false, importStr, sourceSection));
}
@@ -124,7 +121,6 @@ public class ImportsAndReadsParser extends AbstractAstBuilder<@Nullable List<Ent
return doVisitReadExpr(expr.getExpr(), expr.getReadType() == ReadType.GLOB);
}
@SuppressWarnings("DataFlowIssue")
public List<Entry> doVisitReadExpr(Expr expr, boolean isGlob) {
if (!(expr instanceof SingleLineStringLiteralExpr slStr)) {
return Collections.emptyList();
@@ -134,10 +130,8 @@ public class ImportsAndReadsParser extends AbstractAstBuilder<@Nullable List<Ent
var singleParts = slStr.getParts();
if (singleParts.isEmpty()) {
importString = "";
} else if (singleParts.size() == 1
&& singleParts.get(0) instanceof StringConstantParts cparts
&& !cparts.getParts().isEmpty()) {
importString = doVisitStringConstant(cparts.getParts());
} else if (singleParts.size() == 1 && singleParts.get(0) instanceof StringChars cparts) {
importString = cparts.getString();
} else {
return Collections.emptyList();
}

View File

@@ -73,7 +73,6 @@ import org.pkl.core.parser.ast.ParameterList;
import org.pkl.core.parser.ast.QualifiedIdentifier;
import org.pkl.core.parser.ast.ReplInput;
import org.pkl.core.parser.ast.StringConstant;
import org.pkl.core.parser.ast.StringConstantPart;
import org.pkl.core.parser.ast.StringPart;
import org.pkl.core.parser.ast.Type.ConstrainedType;
import org.pkl.core.parser.ast.Type.DeclaredType;
@@ -418,11 +417,6 @@ public abstract class BaseParserVisitor<T> implements ParserVisitor<T> {
return visitChildren(part);
}
@Override
public T visitStringConstantPart(StringConstantPart part) {
return defaultValue();
}
@Override
public T visitClassBody(ClassBody classBody) {
return visitChildren(classBody);

View File

@@ -74,12 +74,8 @@ import org.pkl.core.parser.ast.ParameterList;
import org.pkl.core.parser.ast.QualifiedIdentifier;
import org.pkl.core.parser.ast.ReplInput;
import org.pkl.core.parser.ast.StringConstant;
import org.pkl.core.parser.ast.StringConstantPart;
import org.pkl.core.parser.ast.StringConstantPart.EscapeType;
import org.pkl.core.parser.ast.StringConstantPart.StringEscape;
import org.pkl.core.parser.ast.StringConstantPart.StringNewline;
import org.pkl.core.parser.ast.StringPart;
import org.pkl.core.parser.ast.StringPart.StringConstantParts;
import org.pkl.core.parser.ast.StringPart.StringChars;
import org.pkl.core.parser.ast.Type;
import org.pkl.core.parser.ast.Type.DeclaredType;
import org.pkl.core.parser.ast.Type.ParenthesizedType;
@@ -1022,64 +1018,8 @@ public class Parser {
var tk = next();
yield new FloatLiteralExpr(tk.text(lexer), tk.span);
}
case STRING_START, STRING_MULTI_START -> {
var start = next();
var parts = new ArrayList<StringPart>();
var temp = new ArrayList<StringConstantPart>();
while (lookahead != Token.STRING_END) {
switch (lookahead) {
case STRING_PART -> {
var tk = next();
var text = tk.text(lexer);
if (!text.isEmpty()) {
temp.add(new StringConstantPart.ConstantPart(text, tk.span));
}
}
// lexer makes sure we don't get newlines in single quoted strings
case STRING_NEWLINE -> temp.add(new StringNewline(next().span));
case STRING_ESCAPE_NEWLINE ->
temp.add(new StringEscape(EscapeType.NEWLINE, next().span));
case STRING_ESCAPE_TAB -> temp.add(new StringEscape(EscapeType.TAB, next().span));
case STRING_ESCAPE_QUOTE ->
temp.add(new StringEscape(EscapeType.QUOTE, next().span));
case STRING_ESCAPE_BACKSLASH ->
temp.add(new StringEscape(EscapeType.BACKSLASH, next().span));
case STRING_ESCAPE_RETURN ->
temp.add(new StringEscape(EscapeType.RETURN, next().span));
case STRING_ESCAPE_UNICODE -> {
var tk = next();
var text = tk.text(lexer);
temp.add(new StringConstantPart.StringUnicodeEscape(text, tk.span));
}
case INTERPOLATION_START -> {
var istart = next().span;
if (!temp.isEmpty()) {
var span = temp.get(0).span().endWith(temp.get(temp.size() - 1).span());
parts.add(new StringPart.StringConstantParts(temp, span));
temp = new ArrayList<>();
}
var exp = parseExpr(")");
var end = expect(Token.RPAREN, "unexpectedToken", ")").span;
parts.add(new StringPart.StringInterpolation(exp, istart.endWith(end)));
}
case EOF -> throw parserError("unexpectedEndOfFile");
// the lexer makes sure we only get the above tokens inside a string
default -> throw PklBugException.unreachableCode();
}
}
if (!temp.isEmpty()) {
var span = temp.get(0).span().endWith(temp.get(temp.size() - 1).span());
parts.add(new StringPart.StringConstantParts(temp, span));
}
var expectedDelimiter = start.token == Token.STRING_START ? "\"" : "\"\"\"";
var end = expect(Token.STRING_END, "missingDelimiter", expectedDelimiter).span;
if (start.token == Token.STRING_START) {
yield new SingleLineStringLiteralExpr(
parts, start.span, end, start.span.endWith(end));
} else {
yield new MultiLineStringLiteralExpr(parts, start.span, end, start.span.endWith(end));
}
}
case STRING_START -> parseSingleLineStringLiteralExpr();
case STRING_MULTI_START -> parseMultiLineStringLiteralExpr();
case IDENTIFIER -> {
var identifier = parseIdentifier();
if (lookahead == Token.LPAREN
@@ -1149,6 +1089,180 @@ public class Parser {
return expr;
}
private Expr parseSingleLineStringLiteralExpr() {
var start = next();
var parts = new ArrayList<StringPart>();
var builder = new StringBuilder();
var startSpan = spanLookahead;
var end = spanLookahead;
while (lookahead != Token.STRING_END) {
switch (lookahead) {
case STRING_PART -> {
var tk = next();
end = tk.span;
builder.append(tk.text(lexer));
}
case STRING_ESCAPE_NEWLINE -> {
end = next().span;
builder.append('\n');
}
case STRING_ESCAPE_TAB -> {
end = next().span;
builder.append('\t');
}
case STRING_ESCAPE_QUOTE -> {
end = next().span;
builder.append('"');
}
case STRING_ESCAPE_BACKSLASH -> {
end = next().span;
builder.append('\\');
}
case STRING_ESCAPE_RETURN -> {
end = next().span;
builder.append('\r');
}
case STRING_ESCAPE_UNICODE -> {
var tk = next();
end = tk.span;
builder.append(parseUnicodeEscape(tk));
}
case INTERPOLATION_START -> {
var istart = next().span;
if (!builder.isEmpty()) {
assert startSpan != null;
parts.add(new StringChars(builder.toString(), startSpan.endWith(end)));
builder = new StringBuilder();
}
var exp = parseExpr(")");
end = expect(Token.RPAREN, "unexpectedToken", ")").span;
parts.add(new StringPart.StringInterpolation(exp, istart.endWith(end)));
startSpan = spanLookahead;
}
case EOF -> {
var delimiter = new StringBuilder(start.text(lexer)).reverse().toString();
throw parserError("missingDelimiter", delimiter);
}
}
}
if (!builder.isEmpty()) {
parts.add(new StringChars(builder.toString(), startSpan.endWith(end)));
}
end = next().span;
return new SingleLineStringLiteralExpr(parts, start.span, end, start.span.endWith(end));
}
private Expr parseMultiLineStringLiteralExpr() {
var start = next();
var stringTokens = new ArrayList<TempNode>();
while (lookahead != Token.STRING_END) {
switch (lookahead) {
case STRING_PART,
STRING_NEWLINE,
STRING_ESCAPE_NEWLINE,
STRING_ESCAPE_TAB,
STRING_ESCAPE_QUOTE,
STRING_ESCAPE_BACKSLASH,
STRING_ESCAPE_RETURN,
STRING_ESCAPE_UNICODE ->
stringTokens.add(new TempNode(next(), null));
case INTERPOLATION_START -> {
var istart = next();
var exp = parseExpr(")");
var end = expect(Token.RPAREN, "unexpectedToken", ")").span;
var interpolation = new StringPart.StringInterpolation(exp, istart.span.endWith(end));
stringTokens.add(new TempNode(null, interpolation));
}
case EOF -> {
var delimiter = new StringBuilder(start.text(lexer)).reverse().toString();
throw parserError("missingDelimiter", delimiter);
}
}
}
var end = next().span;
var fullSpan = start.span.endWith(end);
var parts = validateMultiLineString(stringTokens, fullSpan);
return new MultiLineStringLiteralExpr(parts, start.span, end, fullSpan);
}
private List<StringPart> validateMultiLineString(List<TempNode> nodes, Span span) {
var firstNode = nodes.isEmpty() ? null : nodes.get(0);
if (firstNode == null
|| firstNode.token == null
|| firstNode.token.token != Token.STRING_NEWLINE) {
var errorSpan = firstNode == null ? span : firstNode.span();
throw new ParserError(ErrorMessages.create("stringContentMustBeginOnNewLine"), errorSpan);
}
// only contains a newline
if (nodes.size() == 1) {
return List.of(new StringChars("", firstNode.span()));
}
var indent = getCommonIndent(nodes, span);
return renderString(nodes, indent);
}
@SuppressWarnings("DataFlowIssue")
private List<StringPart> renderString(List<TempNode> nodes, String commonIndent) {
var parts = new ArrayList<StringPart>();
var builder = new StringBuilder();
var endOffset = nodes.get(nodes.size() - 1).token.token == Token.STRING_NEWLINE ? 1 : 2;
var isNewLine = true;
Span start = null;
Span end = null;
for (var i = 1; i < nodes.size() - endOffset; i++) {
var node = nodes.get(i);
if (node.node != null) {
if (!builder.isEmpty()) {
parts.add(new StringChars(builder.toString(), start.endWith(end)));
builder = new StringBuilder();
start = null;
}
parts.add(node.node);
} else {
var token = node.token;
assert token != null;
if (start == null) {
start = token.span;
}
end = token.span;
switch (token.token) {
case STRING_NEWLINE -> {
builder.append('\n');
isNewLine = true;
}
case STRING_PART -> {
var text = token.text(lexer);
if (isNewLine) {
if (text.startsWith(commonIndent)) {
builder.append(text, commonIndent.length(), text.length());
} else {
var actualIndent = getLeadingIndentCount(text);
var textSpan = token.span.move(actualIndent).grow(-actualIndent);
throw new ParserError(
ErrorMessages.create("stringIndentationMustMatchLastLine"), textSpan);
}
} else {
builder.append(text);
}
isNewLine = false;
}
default -> {
if (isNewLine && !commonIndent.isEmpty()) {
throw new ParserError(
ErrorMessages.create("stringIndentationMustMatchLastLine"), token.span);
}
builder.append(getEscapeText(token));
isNewLine = false;
}
}
}
}
if (!builder.isEmpty()) {
parts.add(new StringChars(builder.toString(), start.endWith(end)));
}
return parts;
}
@SuppressWarnings("DuplicatedCode")
private Expr parseFunctionLiteralOrParenthesized(Span start) {
var identifier = parseIdentifier();
@@ -1471,36 +1585,117 @@ public class Parser {
private StringConstant parseStringConstant() {
var start = spanLookahead;
expect(Token.STRING_START, "unexpectedToken", "\"");
var parts = new ArrayList<StringConstantPart>();
var startTk = expect(Token.STRING_START, "unexpectedToken", "\"");
var builder = new StringBuilder();
while (lookahead != Token.STRING_END) {
switch (lookahead) {
case STRING_PART -> {
var tk = next();
var text = tk.text(lexer);
parts.add(new StringConstantPart.ConstantPart(text, tk.span));
case STRING_PART -> builder.append(next().text(lexer));
case STRING_ESCAPE_NEWLINE -> {
next();
builder.append('\n');
}
case STRING_ESCAPE_NEWLINE -> parts.add(new StringEscape(EscapeType.NEWLINE, next().span));
case STRING_ESCAPE_TAB -> parts.add(new StringEscape(EscapeType.TAB, next().span));
case STRING_ESCAPE_QUOTE -> parts.add(new StringEscape(EscapeType.QUOTE, next().span));
case STRING_ESCAPE_BACKSLASH ->
parts.add(new StringEscape(EscapeType.BACKSLASH, next().span));
case STRING_ESCAPE_RETURN -> parts.add(new StringEscape(EscapeType.RETURN, next().span));
case STRING_ESCAPE_UNICODE -> {
var tk = next();
var text = tk.text(lexer);
parts.add(new StringConstantPart.StringUnicodeEscape(text, tk.span));
case STRING_ESCAPE_TAB -> {
next();
builder.append('\t');
}
case STRING_ESCAPE_QUOTE -> {
next();
builder.append('"');
}
case STRING_ESCAPE_BACKSLASH -> {
next();
builder.append('\\');
}
case STRING_ESCAPE_RETURN -> {
next();
builder.append('\r');
}
case STRING_ESCAPE_UNICODE -> builder.append(parseUnicodeEscape(next()));
case EOF -> {
var delimiter = new StringBuilder(startTk.text(lexer)).reverse().toString();
throw parserError("missingDelimiter", delimiter);
}
case EOF -> throw parserError("unexpectedEndOfFile");
case INTERPOLATION_START -> throw parserError("interpolationInConstant");
// the lexer makes sure we only get the above tokens inside a string
default -> throw PklBugException.unreachableCode();
}
}
var end = expect(Token.STRING_END, "missingDelimiter", "\"").span;
assert !parts.isEmpty();
var constSpan = parts.get(0).span().endWith(parts.get(parts.size() - 1).span());
return new StringConstant(new StringConstantParts(parts, constSpan), start.endWith(end));
var end = next().span;
return new StringConstant(builder.toString(), start.endWith(end));
}
private String getEscapeText(FullToken tk) {
return switch (tk.token) {
case STRING_ESCAPE_NEWLINE -> "\n";
case STRING_ESCAPE_QUOTE -> "\"";
case STRING_ESCAPE_BACKSLASH -> "\\";
case STRING_ESCAPE_TAB -> "\t";
case STRING_ESCAPE_RETURN -> "\r";
case STRING_ESCAPE_UNICODE -> parseUnicodeEscape(tk);
default -> throw PklBugException.unreachableCode();
};
}
private String parseUnicodeEscape(FullToken tk) {
var text = tk.text(lexer);
var lastIndex = text.length() - 1;
var startIndex = text.indexOf('{', 2);
try {
var codepoint = Integer.parseInt(text.substring(startIndex + 1, lastIndex), 16);
return Character.toString(codepoint);
} catch (NumberFormatException e) {
throw new ParserError(
ErrorMessages.create("invalidUnicodeEscapeSequence", text, text.substring(0, startIndex)),
tk.span);
}
}
private String getCommonIndent(List<TempNode> nodes, Span span) {
var lastNode = nodes.get(nodes.size() - 1);
if (lastNode.token == null) {
throw new ParserError(
ErrorMessages.create("closingStringDelimiterMustBeginOnNewLine"), lastNode.span());
}
if (lastNode.token.token == Token.STRING_NEWLINE) return "";
var beforeLast = nodes.get(nodes.size() - 2);
if (beforeLast.token != null && beforeLast.token.token == Token.STRING_NEWLINE) {
var indent = getTrailingIndent(lastNode);
if (indent != null) {
return indent;
}
}
throw new ParserError(ErrorMessages.create("closingStringDelimiterMustBeginOnNewLine"), span);
}
private @Nullable String getTrailingIndent(TempNode node) {
var token = node.token;
if (token == null || token.token != Token.STRING_PART) return null;
var text = token.text(lexer);
for (var i = 0; i < text.length(); i++) {
var ch = text.charAt(i);
if (ch != ' ' && ch != '\t') return null;
}
return text;
}
private int getLeadingIndentCount(String text) {
if (text.isEmpty()) return 0;
for (var i = 0; i < text.length(); i++) {
var ch = text.charAt(i);
if (ch != ' ' && ch != '\t') {
return i;
}
}
return text.length();
}
private record TempNode(
@Nullable FullToken token, @Nullable StringPart.StringInterpolation node) {
Span span() {
if (token != null) return token.span;
assert node != null;
return node.span();
}
}
private FullToken expect(Token type, String errorKey, Object... messageArgs) {

View File

@@ -66,7 +66,6 @@ import org.pkl.core.parser.ast.ParameterList;
import org.pkl.core.parser.ast.QualifiedIdentifier;
import org.pkl.core.parser.ast.ReplInput;
import org.pkl.core.parser.ast.StringConstant;
import org.pkl.core.parser.ast.StringConstantPart;
import org.pkl.core.parser.ast.StringPart;
import org.pkl.core.parser.ast.Type;
import org.pkl.core.parser.ast.TypeAlias;
@@ -211,8 +210,6 @@ public interface ParserVisitor<Result> {
Result visitStringPart(StringPart part);
Result visitStringConstantPart(StringConstantPart part);
Result visitDocComment(DocComment docComment);
Result visitIdentifier(Identifier identifier);

View File

@@ -38,4 +38,8 @@ public record Span(int charIndex, int length) {
public Span move(int amount) {
return new Span(charIndex + amount, length);
}
public Span grow(int amount) {
return new Span(charIndex, length + amount);
}
}

View File

@@ -15,15 +15,17 @@
*/
package org.pkl.core.parser.ast;
import java.util.List;
import java.util.Objects;
import org.pkl.core.parser.ParserVisitor;
import org.pkl.core.parser.Span;
import org.pkl.core.parser.ast.StringPart.StringConstantParts;
import org.pkl.core.util.Nullable;
public class StringConstant extends AbstractNode {
public StringConstant(StringConstantParts strParts, Span span) {
super(span, List.of(strParts));
private final String string;
public StringConstant(String string, Span span) {
super(span, null);
this.string = string;
}
@Override
@@ -31,8 +33,33 @@ public class StringConstant extends AbstractNode {
return visitor.visitStringConstant(this);
}
public StringConstantParts getStrParts() {
assert children != null;
return (StringConstantParts) children.get(0);
public String getString() {
return string;
}
@Override
public String toString() {
return "StringConstant{string='" + string + '\'' + ", span=" + span + '}';
}
@SuppressWarnings("ConstantValue")
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
StringConstant that = (StringConstant) o;
return Objects.equals(string, that.string);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), string);
}
}

View File

@@ -1,156 +0,0 @@
/*
* Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.pkl.core.parser.ast;
import java.util.List;
import java.util.Objects;
import org.pkl.core.parser.ParserVisitor;
import org.pkl.core.parser.Span;
import org.pkl.core.util.Nullable;
public abstract sealed class StringConstantPart extends AbstractNode {
public StringConstantPart(Span span, @Nullable List<? extends @Nullable Node> children) {
super(span, children);
}
@Override
public <T> @Nullable T accept(ParserVisitor<? extends T> visitor) {
return visitor.visitStringConstantPart(this);
}
public static final class StringNewline extends StringConstantPart {
public StringNewline(Span span) {
super(span, null);
}
}
public static final class ConstantPart extends StringConstantPart {
private final String str;
public ConstantPart(String str, Span span) {
super(span, null);
this.str = str;
}
public String getStr() {
return str;
}
@Override
public String toString() {
return "ConstantPart{str='" + str + '\'' + ", span=" + span + '}';
}
@SuppressWarnings("ConstantValue")
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ConstantPart that = (ConstantPart) o;
return Objects.equals(str, that.str) && Objects.equals(span, that.span);
}
@Override
public int hashCode() {
return Objects.hash(str, span);
}
}
public static final class StringUnicodeEscape extends StringConstantPart {
private final String escape;
public StringUnicodeEscape(String escape, Span span) {
super(span, null);
this.escape = escape;
}
public String getEscape() {
return escape;
}
@Override
public String toString() {
return "StringUnicodeEscape{escape='" + escape + '\'' + ", span=" + span + '}';
}
@SuppressWarnings("ConstantValue")
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
StringUnicodeEscape that = (StringUnicodeEscape) o;
return Objects.equals(escape, that.escape) && Objects.equals(span, that.span);
}
@Override
public int hashCode() {
return Objects.hash(escape, span);
}
}
public static final class StringEscape extends StringConstantPart {
private final EscapeType type;
public StringEscape(EscapeType type, Span span) {
super(span, null);
this.type = type;
}
public EscapeType getType() {
return type;
}
@Override
public String toString() {
return "StringEscape{type=" + type + ", span=" + span + '}';
}
@SuppressWarnings("ConstantValue")
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
StringEscape that = (StringEscape) o;
return type == that.type && Objects.equals(span, that.span);
}
@Override
public int hashCode() {
return Objects.hash(type, span);
}
}
public enum EscapeType {
NEWLINE,
TAB,
RETURN,
QUOTE,
BACKSLASH
}
}

View File

@@ -16,6 +16,7 @@
package org.pkl.core.parser.ast;
import java.util.List;
import java.util.Objects;
import org.pkl.core.parser.ParserVisitor;
import org.pkl.core.parser.Span;
import org.pkl.core.util.Nullable;
@@ -31,15 +32,42 @@ public abstract sealed class StringPart extends AbstractNode {
return visitor.visitStringPart(this);
}
public static final class StringConstantParts extends StringPart {
public StringConstantParts(List<StringConstantPart> parts, Span span) {
super(span, parts);
public static final class StringChars extends StringPart {
private final String string;
public StringChars(String string, Span span) {
super(span, null);
this.string = string;
}
@SuppressWarnings("unchecked")
public List<StringConstantPart> getParts() {
assert children != null;
return (List<StringConstantPart>) children;
public String getString() {
return string;
}
@Override
public String toString() {
return "StringChars{string='" + string + '\'' + ", span=" + span + '}';
}
@SuppressWarnings("ConstantValue")
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
StringChars that = (StringChars) o;
return Objects.equals(string, that.string);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), string);
}
}

View File

@@ -1,5 +1,5 @@
Pkl Error
Unexpected end of file.
Missing `"""` delimiter.
x | res2 = 42
^

View File

@@ -1,5 +1,5 @@
Pkl Error
Unexpected end of file.
Missing `"""` delimiter.
x | res1 = """
^

View File

@@ -1,5 +1,5 @@
Pkl Error
Unexpected end of file.
Missing `"` delimiter.
x | res1 = "
^

View File

@@ -1,5 +1,5 @@
Pkl Error
Unexpected end of file.
Missing `"""` delimiter.
x |
^

View File

@@ -1,5 +1,5 @@
Pkl Error
Unexpected end of file.
Missing `"""#` delimiter.
x |
^

View File

@@ -1,8 +1,8 @@
Pkl Error
Invalid Unicode escape sequence `\u{12x}`.
Valid Unicode escape sequences are \u{0} to \u{10FFFF} (1-6 hexadecimal characters).
x | res1 = "\u{12x}"
^^^^^^^
at invalidUnicodeEscape#res1 (file:///$snippetsDir/input/errors/invalidUnicodeEscape.pkl)
Valid Unicode escape sequences are \u{0} to \u{10FFFF} (1-6 hexadecimal characters).
at invalidUnicodeEscape (file:///$snippetsDir/input/errors/invalidUnicodeEscape.pkl)

View File

@@ -1,5 +1,5 @@
Pkl Error
Unexpected end of file.
Missing `"""` delimiter.
x | res1 = """some string
^

View File

@@ -1,5 +1,5 @@
Pkl Error
Unexpected end of file.
Missing `"""` delimiter.
x | res2 = 2
^

View File

@@ -1,5 +1,5 @@
Pkl Error
Unexpected end of file.
Missing `"""` delimiter.
x | res1 = """some string"
^

View File

@@ -3,4 +3,4 @@ Line must match or exceed indentation of the String's last line.
x | mismatched indent
^^^^^^^^^^^^^^^^^
at parser8#res1 (file:///$snippetsDir/input/errors/parser8.pkl)
at parser8 (file:///$snippetsDir/input/errors/parser8.pkl)

View File

@@ -3,4 +3,4 @@ Line must match or exceed indentation of the String's last line.
x | mismatched indent
^^^^^^^^^^^^^^^^^
at parser9#res1 (file:///$snippetsDir/input/errors/parser9.pkl)
at parser9 (file:///$snippetsDir/input/errors/parser9.pkl)

View File

@@ -791,7 +791,7 @@ class ANTLRSexpRenderer {
fun renderSingleLineStringExpr(expr: SingleLineStringLiteralContext) {
buf.append(tab)
buf.append("(interpolatedStringExpr")
buf.append("(singleLineStringLiteralExpr")
val oldTab = increaseTab()
for (part in expr.singleLineStringPart()) {
if (part.expr() != null) {
@@ -808,15 +808,14 @@ class ANTLRSexpRenderer {
fun renderMultiLineStringExpr(expr: MultiLineStringLiteralContext) {
buf.append(tab)
buf.append("(interpolatedMultiStringExpr")
buf.append("(multiLineStringLiteralExpr")
val oldTab = increaseTab()
// render only interpolated expressions because
// the new parser parses string differently
for (part in expr.multiLineStringPart()) {
if (part.expr() != null) {
buf.append('\n')
renderExpr(part.expr())
} else {
buf.append('\n').append(tab)
buf.append("(stringConstantExpr)")
}
}
buf.append(')')

View File

@@ -42,10 +42,18 @@ class ParserComparisonTest : ParserComparisonTestInterface {
compare(
"""
prop = ${"\"\"\""}\(bar)${"\"\"\""}
prop2 = ${"\"\"\""}foo \(bar)${"\"\"\""}
prop3 = ${"\"\"\""}\(bar) foo${"\"\"\""}
prop4 = ${"\"\"\""}foo \(bar + baz) foo${"\"\"\""}
prop = ""${'"'}
\(bar)
""${'"'}
prop2 = ""${'"'}
foo \(bar)
""${'"'}
prop3 = ""${'"'}
\(bar) foo
""${'"'}
prop4 = ""${'"'}
foo \(bar + baz) foo
""${'"'}
"""
.trimIndent()
)

View File

@@ -419,7 +419,7 @@ class SexpRenderer {
fun renderSingleLineStringLiteral(expr: SingleLineStringLiteralExpr) {
buf.append(tab)
buf.append("(interpolatedStringExpr")
buf.append("(singleLineStringLiteralExpr")
val oldTab = increaseTab()
for (part in expr.parts) {
if (part is StringPart.StringInterpolation) {
@@ -436,15 +436,14 @@ class SexpRenderer {
fun renderMultiLineStringLiteral(expr: MultiLineStringLiteralExpr) {
buf.append(tab)
buf.append("(interpolatedMultiStringExpr")
buf.append("(multiLineStringLiteralExpr")
val oldTab = increaseTab()
// render only interpolated expressions because
// the new parser parses string differently
for (part in expr.parts) {
if (part is StringPart.StringInterpolation) {
buf.append('\n')
renderExpr(part.expr)
} else {
buf.append('\n').append(tab)
buf.append("(stringConstantExpr)")
}
}
buf.append(')')

View File

@@ -302,10 +302,11 @@ class SpanComparison(val path: String, private val softly: SoftAssertions) {
}
}
is MultiLineStringLiteralExpr -> {
node.parts.zip((ctx as MultiLineStringLiteralContext).multiLineStringPart()).forEach {
(s1, s2) ->
compareSpan(s1, s2)
}
// only compare interpolated expressions
val exprs = node.parts.filterIsInstance<StringPart.StringInterpolation>()
val antlrExprs =
(ctx as MultiLineStringLiteralContext).multiLineStringPart().mapNotNull { it.expr() }
exprs.zip(antlrExprs).forEach { (s1, s2) -> compareExpr(s1.expr, s2) }
}
is ThrowExpr -> compareExpr(node.expr, (ctx as ThrowExprContext).expr())
is TraceExpr -> compareExpr(node.expr, (ctx as TraceExprContext).expr())