Simplify precedence and associativity parsing (#1066)

This commit is contained in:
Islon Scherer
2025-05-12 14:59:26 +02:00
committed by GitHub
parent 948a20ad0c
commit fe2e4aa1a4
7 changed files with 110 additions and 322 deletions

View File

@@ -16,7 +16,6 @@
package org.pkl.parser; package org.pkl.parser;
import java.util.ArrayDeque; import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque; import java.util.Deque;
import org.pkl.parser.util.ErrorMessages; import org.pkl.parser.util.ErrorMessages;
@@ -734,70 +733,57 @@ public class Lexer {
return isRegularIdentifier(identifier) ? identifier : "`" + identifier + "`"; return isRegularIdentifier(identifier) ? identifier : "`" + identifier + "`";
} }
@SuppressWarnings("SuspiciousArrayMethodCall")
private static boolean isKeyword(String text) { private static boolean isKeyword(String text) {
var index = Arrays.binarySearch(KEYWORDS, text); return getKeywordOrIdentifier(text) != Token.IDENTIFIER;
return index >= 0;
} }
@SuppressWarnings("SuspiciousArrayMethodCall") private static Token getKeywordOrIdentifier(String keyword) {
private static Token getKeywordOrIdentifier(String text) { return switch (keyword) {
var index = Arrays.binarySearch(KEYWORDS, text); case "_" -> Token.UNDERSCORE;
if (index < 0) return Token.IDENTIFIER; case "abstract" -> Token.ABSTRACT;
return KEYWORDS[index].token; case "amends" -> Token.AMENDS;
} case "as" -> Token.AS;
case "case" -> Token.CASE;
protected static final KeywordEntry[] KEYWORDS = { case "class" -> Token.CLASS;
new KeywordEntry("_", Token.UNDERSCORE), case "const" -> Token.CONST;
new KeywordEntry("abstract", Token.ABSTRACT), case "delete" -> Token.DELETE;
new KeywordEntry("amends", Token.AMENDS), case "else" -> Token.ELSE;
new KeywordEntry("as", Token.AS), case "extends" -> Token.EXTENDS;
new KeywordEntry("case", Token.CASE), case "external" -> Token.EXTERNAL;
new KeywordEntry("class", Token.CLASS), case "false" -> Token.FALSE;
new KeywordEntry("const", Token.CONST), case "fixed" -> Token.FIXED;
new KeywordEntry("delete", Token.DELETE), case "for" -> Token.FOR;
new KeywordEntry("else", Token.ELSE), case "function" -> Token.FUNCTION;
new KeywordEntry("extends", Token.EXTENDS), case "hidden" -> Token.HIDDEN;
new KeywordEntry("external", Token.EXTERNAL), case "if" -> Token.IF;
new KeywordEntry("false", Token.FALSE), case "import" -> Token.IMPORT;
new KeywordEntry("fixed", Token.FIXED), case "in" -> Token.IN;
new KeywordEntry("for", Token.FOR), case "is" -> Token.IS;
new KeywordEntry("function", Token.FUNCTION), case "let" -> Token.LET;
new KeywordEntry("hidden", Token.HIDDEN), case "local" -> Token.LOCAL;
new KeywordEntry("if", Token.IF), case "module" -> Token.MODULE;
new KeywordEntry("import", Token.IMPORT), case "new" -> Token.NEW;
new KeywordEntry("in", Token.IN), case "nothing" -> Token.NOTHING;
new KeywordEntry("is", Token.IS), case "null" -> Token.NULL;
new KeywordEntry("let", Token.LET), case "open" -> Token.OPEN;
new KeywordEntry("local", Token.LOCAL), case "out" -> Token.OUT;
new KeywordEntry("module", Token.MODULE), case "outer" -> Token.OUTER;
new KeywordEntry("new", Token.NEW), case "override" -> Token.OVERRIDE;
new KeywordEntry("nothing", Token.NOTHING), case "protected" -> Token.PROTECTED;
new KeywordEntry("null", Token.NULL), case "read" -> Token.READ;
new KeywordEntry("open", Token.OPEN), case "record" -> Token.RECORD;
new KeywordEntry("out", Token.OUT), case "super" -> Token.SUPER;
new KeywordEntry("outer", Token.OUTER), case "switch" -> Token.SWITCH;
new KeywordEntry("override", Token.OVERRIDE), case "this" -> Token.THIS;
new KeywordEntry("protected", Token.PROTECTED), case "throw" -> Token.THROW;
new KeywordEntry("read", Token.READ), case "trace" -> Token.TRACE;
new KeywordEntry("record", Token.RECORD), case "true" -> Token.TRUE;
new KeywordEntry("super", Token.SUPER), case "typealias" -> Token.TYPE_ALIAS;
new KeywordEntry("switch", Token.SWITCH), case "unknown" -> Token.UNKNOWN;
new KeywordEntry("this", Token.THIS), case "vararg" -> Token.VARARG;
new KeywordEntry("throw", Token.THROW), case "when" -> Token.WHEN;
new KeywordEntry("trace", Token.TRACE), default -> Token.IDENTIFIER;
new KeywordEntry("true", Token.TRUE), };
new KeywordEntry("typealias", Token.TYPE_ALIAS),
new KeywordEntry("unknown", Token.UNKNOWN),
new KeywordEntry("vararg", Token.VARARG),
new KeywordEntry("when", Token.WHEN)
};
protected record KeywordEntry(String name, Token token) implements Comparable<String> {
@Override
public int compareTo(String o) {
return name.compareTo(o);
}
} }
private static class InterpolationScope { private static class InterpolationScope {

View File

@@ -1,140 +0,0 @@
/*
* Copyright © 2024-2025 Apple Inc. and the Pkl project authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.pkl.parser;
import java.util.ArrayList;
import java.util.List;
import org.pkl.parser.syntax.Expr;
import org.pkl.parser.syntax.Expr.BinaryOperatorExpr;
import org.pkl.parser.syntax.Expr.OperatorExpr;
import org.pkl.parser.syntax.Expr.TypeCastExpr;
import org.pkl.parser.syntax.Expr.TypeCheckExpr;
import org.pkl.parser.syntax.Expr.TypeExpr;
import org.pkl.parser.syntax.Operator;
import org.pkl.parser.util.Nullable;
class OperatorResolver {
private OperatorResolver() {}
private enum Associativity {
LEFT,
RIGHT
}
public static int getPrecedence(Operator op) {
return switch (op) {
case NULL_COALESCE -> 0;
case PIPE -> 1;
case OR -> 2;
case AND -> 3;
case EQ_EQ, NOT_EQ -> 4;
case IS, AS -> 5;
case LT, LTE, GT, GTE -> 6;
case PLUS, MINUS -> 7;
case MULT, DIV, INT_DIV, MOD -> 8;
case POW -> 9;
case DOT, QDOT -> 10;
};
}
private static Associativity getAssociativity(Operator op) {
return switch (op) {
case POW, NULL_COALESCE -> Associativity.RIGHT;
default -> Associativity.LEFT;
};
}
private static @Nullable Operator getHighestPrecedence(List<Expr> exprs, int min) {
var highest = -1;
Operator op = null;
for (var expr : exprs) {
if (expr instanceof OperatorExpr o) {
var precedence = getPrecedence(o.getOp());
if (precedence > highest && precedence >= min) {
highest = precedence;
op = o.getOp();
}
}
}
return op;
}
private static int index(List<Expr> exprs, Associativity associativity, Operator op) {
if (associativity == Associativity.LEFT) {
for (var i = 0; i < exprs.size(); i++) {
if (exprs.get(i) instanceof OperatorExpr operator && operator.getOp() == op) {
return i;
}
}
} else {
for (var i = exprs.size() - 1; i >= 0; i--) {
if (exprs.get(i) instanceof OperatorExpr operator && operator.getOp() == op) {
return i;
}
}
}
return -1;
}
private static List<Expr> resolveOperator(
List<Expr> exprs, Associativity associativity, Operator op) {
var res = new ArrayList<>(exprs);
var i = index(res, associativity, op);
var left = res.get(i - 1);
var right = res.get(i + 1);
var span = left.span().endWith(right.span());
var binOp =
switch (op) {
case IS -> new TypeCheckExpr(left, ((TypeExpr) right).getType(), span);
case AS -> new TypeCastExpr(left, ((TypeExpr) right).getType(), span);
default -> new BinaryOperatorExpr(left, right, op, span);
};
res.remove(i - 1);
res.remove(i - 1);
res.remove(i - 1);
res.add(i - 1, binOp);
return res;
}
/**
* Resolve all operators based on their precedence and associativity. This requires that the list
* has a valid form: `expr` `op` `expr` ...
*/
public static Expr resolveOperators(List<Expr> exprs) {
if (exprs.size() == 1) return exprs.get(0);
var res = resolveOperatorsHigherThan(exprs, 0);
if (res.size() > 1) {
throw new ParserError(
"Malformed expression", exprs.get(0).span().endWith(exprs.get(exprs.size() - 1).span()));
}
return res.get(0);
}
public static List<Expr> resolveOperatorsHigherThan(List<Expr> exprs, int minPrecedence) {
var res = exprs;
var highest = getHighestPrecedence(res, minPrecedence);
while (highest != null) {
var associativity = getAssociativity(highest);
res = resolveOperator(res, associativity, highest);
highest = getHighestPrecedence(res, minPrecedence);
}
return res;
}
}

View File

@@ -29,6 +29,7 @@ import org.pkl.parser.syntax.ClassProperty;
import org.pkl.parser.syntax.DocComment; import org.pkl.parser.syntax.DocComment;
import org.pkl.parser.syntax.Expr; import org.pkl.parser.syntax.Expr;
import org.pkl.parser.syntax.Expr.AmendsExpr; import org.pkl.parser.syntax.Expr.AmendsExpr;
import org.pkl.parser.syntax.Expr.BinaryOperatorExpr;
import org.pkl.parser.syntax.Expr.BoolLiteralExpr; import org.pkl.parser.syntax.Expr.BoolLiteralExpr;
import org.pkl.parser.syntax.Expr.FloatLiteralExpr; import org.pkl.parser.syntax.Expr.FloatLiteralExpr;
import org.pkl.parser.syntax.Expr.FunctionLiteralExpr; import org.pkl.parser.syntax.Expr.FunctionLiteralExpr;
@@ -41,7 +42,6 @@ import org.pkl.parser.syntax.Expr.MultiLineStringLiteralExpr;
import org.pkl.parser.syntax.Expr.NewExpr; import org.pkl.parser.syntax.Expr.NewExpr;
import org.pkl.parser.syntax.Expr.NonNullExpr; import org.pkl.parser.syntax.Expr.NonNullExpr;
import org.pkl.parser.syntax.Expr.NullLiteralExpr; import org.pkl.parser.syntax.Expr.NullLiteralExpr;
import org.pkl.parser.syntax.Expr.OperatorExpr;
import org.pkl.parser.syntax.Expr.OuterExpr; import org.pkl.parser.syntax.Expr.OuterExpr;
import org.pkl.parser.syntax.Expr.ParenthesizedExpr; import org.pkl.parser.syntax.Expr.ParenthesizedExpr;
import org.pkl.parser.syntax.Expr.QualifiedAccessExpr; import org.pkl.parser.syntax.Expr.QualifiedAccessExpr;
@@ -802,52 +802,50 @@ public class Parser {
@SuppressWarnings("DuplicatedCode") @SuppressWarnings("DuplicatedCode")
private Expr parseExpr(@Nullable String expectation) { private Expr parseExpr(@Nullable String expectation) {
List<Expr> exprs = new ArrayList<>(); return parseExpr(expectation, 1);
exprs.add(parseExprAtom(expectation)); }
private Expr parseExpr(@Nullable String expectation, int minPrecedence) {
var expr = parseExprAtom(expectation);
var op = getOperator(); var op = getOperator();
loop:
while (op != null) { while (op != null) {
if (op.getPrec() < minPrecedence) break;
// `-` must be in the same line as the left operand and have no semicolons inbetween
if (op == Operator.MINUS && (precededBySemicolon || _lookahead.newLinesBetween > 0)) break;
next(); // operator
switch (op) { switch (op) {
case IS, AS -> { case IS -> {
exprs.add(new OperatorExpr(op, next().span)); var type = parseType();
exprs.add(new Expr.TypeExpr(parseType())); expr = new Expr.TypeCheckExpr(expr, type, expr.span().endWith(type.span()));
var precedence = OperatorResolver.getPrecedence(op);
exprs = OperatorResolver.resolveOperatorsHigherThan(exprs, precedence);
} }
case MINUS -> { case AS -> {
if (!precededBySemicolon && _lookahead.newLinesBetween == 0) { var type = parseType();
exprs.add(new OperatorExpr(op, next().span)); expr = new Expr.TypeCastExpr(expr, type, expr.span().endWith(type.span()));
exprs.add(parseExprAtom(expectation));
} else {
break loop;
}
} }
case DOT, QDOT -> { case DOT, QDOT -> {
// this exists just to keep backward compatibility with code as `x + y as List.distinct` var rhs = parseIdentifier();
// which should be removed at some point
next();
var expr = exprs.remove(exprs.size() - 1);
var isNullable = op == Operator.QDOT; var isNullable = op == Operator.QDOT;
var identifier = parseIdentifier();
ArgumentList argumentList = null; ArgumentList argumentList = null;
if (lookahead == Token.LPAREN if (lookahead == Token.LPAREN
&& !precededBySemicolon && !precededBySemicolon
&& _lookahead.newLinesBetween == 0) { && _lookahead.newLinesBetween == 0) {
argumentList = parseArgumentList(); argumentList = parseArgumentList();
} }
var lastSpan = argumentList != null ? argumentList.span() : identifier.span(); var lastSpan = argumentList != null ? argumentList.span() : rhs.span();
exprs.add( expr =
new QualifiedAccessExpr( new QualifiedAccessExpr(
expr, identifier, isNullable, argumentList, expr.span().endWith(lastSpan))); expr, rhs, isNullable, argumentList, expr.span().endWith(lastSpan));
} }
default -> { default -> {
exprs.add(new OperatorExpr(op, next().span)); var nextMinPrec = op.isLeftAssoc() ? op.getPrec() + 1 : op.getPrec();
exprs.add(parseExprAtom(expectation)); var rhs = parseExpr(expectation, nextMinPrec);
expr = new BinaryOperatorExpr(expr, rhs, op, expr.span().endWith(rhs.span()));
} }
} }
op = getOperator(); op = getOperator();
} }
return OperatorResolver.resolveOperators(exprs); return expr;
} }
private @Nullable Operator getOperator() { private @Nullable Operator getOperator() {

View File

@@ -667,65 +667,4 @@ public abstract sealed class Expr extends AbstractNode {
return (Type) children.get(1); return (Type) children.get(1);
} }
} }
/** This is a synthetic class only used at parse time. */
public static final class OperatorExpr extends Expr {
private final Operator op;
public OperatorExpr(Operator op, Span span) {
super(span, null);
this.op = op;
}
@Override
public <T> T accept(ParserVisitor<? extends T> visitor) {
// should never be called
throw new RuntimeException("Unreacheable code");
}
public Operator getOp() {
return op;
}
@Override
public String toString() {
return "OperatorExpr{op=" + op + ", span=" + span + '}';
}
@SuppressWarnings("ConstantValue")
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
OperatorExpr that = (OperatorExpr) o;
return op == that.op && Objects.equals(span, that.span);
}
@Override
public int hashCode() {
return Objects.hash(op, span);
}
}
/** This is a synthetic class only used at parse time. */
public static final class TypeExpr extends Expr {
public TypeExpr(Type type) {
super(type.span(), List.of(type));
}
@Override
public <T> T accept(ParserVisitor<? extends T> visitor) {
// should never be called
throw new RuntimeException("Unreacheable code");
}
public Type getType() {
assert children != null;
return (Type) children.get(0);
}
}
} }

View File

@@ -16,25 +16,41 @@
package org.pkl.parser.syntax; package org.pkl.parser.syntax;
public enum Operator { public enum Operator {
POW, NULL_COALESCE(1, false),
MULT, PIPE(2, true),
DIV, OR(3, true),
INT_DIV, AND(4, true),
MOD, EQ_EQ(5, true),
PLUS, NOT_EQ(5, true),
MINUS, IS(6, true),
LT, AS(6, true),
GT, LT(7, true),
LTE, GT(7, true),
GTE, LTE(7, true),
IS, GTE(7, true),
AS, PLUS(8, true),
EQ_EQ, MINUS(8, true),
NOT_EQ, MULT(9, true),
AND, DIV(9, true),
OR, INT_DIV(9, true),
PIPE, MOD(9, true),
NULL_COALESCE, POW(10, false),
DOT, DOT(11, true),
QDOT, QDOT(11, true);
private final int prec;
private final boolean isLeftAssoc;
Operator(int prec, boolean isLeftAssoc) {
this.prec = prec;
this.isLeftAssoc = isLeftAssoc;
}
public int getPrec() {
return prec;
}
public boolean isLeftAssoc() {
return isLeftAssoc;
}
} }

View File

@@ -46,9 +46,4 @@ class LexerTest {
assertThat(Lexer.maybeQuoteIdentifier("this")).isEqualTo("`this`") assertThat(Lexer.maybeQuoteIdentifier("this")).isEqualTo("`this`")
assertThat(Lexer.maybeQuoteIdentifier("😀")).isEqualTo("`😀`") assertThat(Lexer.maybeQuoteIdentifier("😀")).isEqualTo("`😀`")
} }
@Test
fun `lexer keywords are sorted`() {
assertThat(Lexer.KEYWORDS).isSortedAccordingTo { a, b -> a.compareTo(b.name) }
}
} }

View File

@@ -381,10 +381,6 @@ class SexpRenderer {
buf.append(tab) buf.append(tab)
buf.append("(floatLiteralExpr)") buf.append("(floatLiteralExpr)")
} }
is StringConstant -> {
buf.append(tab)
buf.append("(stringConstantExpr)")
}
is SingleLineStringLiteralExpr -> renderSingleLineStringLiteral(expr) is SingleLineStringLiteralExpr -> renderSingleLineStringLiteral(expr)
is MultiLineStringLiteralExpr -> renderMultiLineStringLiteral(expr) is MultiLineStringLiteralExpr -> renderMultiLineStringLiteral(expr)
is ThrowExpr -> renderThrowExpr(expr) is ThrowExpr -> renderThrowExpr(expr)
@@ -412,8 +408,6 @@ class SexpRenderer {
is BinaryOperatorExpr -> renderBinaryOpExpr(expr) is BinaryOperatorExpr -> renderBinaryOpExpr(expr)
is TypeCheckExpr -> renderTypeCheckExpr(expr) is TypeCheckExpr -> renderTypeCheckExpr(expr)
is TypeCastExpr -> renderTypeCastExpr(expr) is TypeCastExpr -> renderTypeCastExpr(expr)
is OperatorExpr -> throw RuntimeException("Operator expr should not exist after parsing")
is TypeExpr -> throw RuntimeException("Type expr should not exist after parsing")
} }
} }