Simplify precedence and associativity parsing (#1066)

This commit is contained in:
Islon Scherer
2025-05-12 14:59:26 +02:00
committed by GitHub
parent 948a20ad0c
commit fe2e4aa1a4
7 changed files with 110 additions and 322 deletions

View File

@@ -16,7 +16,6 @@
package org.pkl.parser;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque;
import org.pkl.parser.util.ErrorMessages;
@@ -734,70 +733,57 @@ public class Lexer {
return isRegularIdentifier(identifier) ? identifier : "`" + identifier + "`";
}
@SuppressWarnings("SuspiciousArrayMethodCall")
private static boolean isKeyword(String text) {
var index = Arrays.binarySearch(KEYWORDS, text);
return index >= 0;
return getKeywordOrIdentifier(text) != Token.IDENTIFIER;
}
@SuppressWarnings("SuspiciousArrayMethodCall")
private static Token getKeywordOrIdentifier(String text) {
var index = Arrays.binarySearch(KEYWORDS, text);
if (index < 0) return Token.IDENTIFIER;
return KEYWORDS[index].token;
}
protected static final KeywordEntry[] KEYWORDS = {
new KeywordEntry("_", Token.UNDERSCORE),
new KeywordEntry("abstract", Token.ABSTRACT),
new KeywordEntry("amends", Token.AMENDS),
new KeywordEntry("as", Token.AS),
new KeywordEntry("case", Token.CASE),
new KeywordEntry("class", Token.CLASS),
new KeywordEntry("const", Token.CONST),
new KeywordEntry("delete", Token.DELETE),
new KeywordEntry("else", Token.ELSE),
new KeywordEntry("extends", Token.EXTENDS),
new KeywordEntry("external", Token.EXTERNAL),
new KeywordEntry("false", Token.FALSE),
new KeywordEntry("fixed", Token.FIXED),
new KeywordEntry("for", Token.FOR),
new KeywordEntry("function", Token.FUNCTION),
new KeywordEntry("hidden", Token.HIDDEN),
new KeywordEntry("if", Token.IF),
new KeywordEntry("import", Token.IMPORT),
new KeywordEntry("in", Token.IN),
new KeywordEntry("is", Token.IS),
new KeywordEntry("let", Token.LET),
new KeywordEntry("local", Token.LOCAL),
new KeywordEntry("module", Token.MODULE),
new KeywordEntry("new", Token.NEW),
new KeywordEntry("nothing", Token.NOTHING),
new KeywordEntry("null", Token.NULL),
new KeywordEntry("open", Token.OPEN),
new KeywordEntry("out", Token.OUT),
new KeywordEntry("outer", Token.OUTER),
new KeywordEntry("override", Token.OVERRIDE),
new KeywordEntry("protected", Token.PROTECTED),
new KeywordEntry("read", Token.READ),
new KeywordEntry("record", Token.RECORD),
new KeywordEntry("super", Token.SUPER),
new KeywordEntry("switch", Token.SWITCH),
new KeywordEntry("this", Token.THIS),
new KeywordEntry("throw", Token.THROW),
new KeywordEntry("trace", Token.TRACE),
new KeywordEntry("true", Token.TRUE),
new KeywordEntry("typealias", Token.TYPE_ALIAS),
new KeywordEntry("unknown", Token.UNKNOWN),
new KeywordEntry("vararg", Token.VARARG),
new KeywordEntry("when", Token.WHEN)
};
protected record KeywordEntry(String name, Token token) implements Comparable<String> {
@Override
public int compareTo(String o) {
return name.compareTo(o);
}
private static Token getKeywordOrIdentifier(String keyword) {
return switch (keyword) {
case "_" -> Token.UNDERSCORE;
case "abstract" -> Token.ABSTRACT;
case "amends" -> Token.AMENDS;
case "as" -> Token.AS;
case "case" -> Token.CASE;
case "class" -> Token.CLASS;
case "const" -> Token.CONST;
case "delete" -> Token.DELETE;
case "else" -> Token.ELSE;
case "extends" -> Token.EXTENDS;
case "external" -> Token.EXTERNAL;
case "false" -> Token.FALSE;
case "fixed" -> Token.FIXED;
case "for" -> Token.FOR;
case "function" -> Token.FUNCTION;
case "hidden" -> Token.HIDDEN;
case "if" -> Token.IF;
case "import" -> Token.IMPORT;
case "in" -> Token.IN;
case "is" -> Token.IS;
case "let" -> Token.LET;
case "local" -> Token.LOCAL;
case "module" -> Token.MODULE;
case "new" -> Token.NEW;
case "nothing" -> Token.NOTHING;
case "null" -> Token.NULL;
case "open" -> Token.OPEN;
case "out" -> Token.OUT;
case "outer" -> Token.OUTER;
case "override" -> Token.OVERRIDE;
case "protected" -> Token.PROTECTED;
case "read" -> Token.READ;
case "record" -> Token.RECORD;
case "super" -> Token.SUPER;
case "switch" -> Token.SWITCH;
case "this" -> Token.THIS;
case "throw" -> Token.THROW;
case "trace" -> Token.TRACE;
case "true" -> Token.TRUE;
case "typealias" -> Token.TYPE_ALIAS;
case "unknown" -> Token.UNKNOWN;
case "vararg" -> Token.VARARG;
case "when" -> Token.WHEN;
default -> Token.IDENTIFIER;
};
}
private static class InterpolationScope {

View File

@@ -1,140 +0,0 @@
/*
* Copyright © 2024-2025 Apple Inc. and the Pkl project authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.pkl.parser;
import java.util.ArrayList;
import java.util.List;
import org.pkl.parser.syntax.Expr;
import org.pkl.parser.syntax.Expr.BinaryOperatorExpr;
import org.pkl.parser.syntax.Expr.OperatorExpr;
import org.pkl.parser.syntax.Expr.TypeCastExpr;
import org.pkl.parser.syntax.Expr.TypeCheckExpr;
import org.pkl.parser.syntax.Expr.TypeExpr;
import org.pkl.parser.syntax.Operator;
import org.pkl.parser.util.Nullable;
class OperatorResolver {
private OperatorResolver() {}
private enum Associativity {
LEFT,
RIGHT
}
public static int getPrecedence(Operator op) {
return switch (op) {
case NULL_COALESCE -> 0;
case PIPE -> 1;
case OR -> 2;
case AND -> 3;
case EQ_EQ, NOT_EQ -> 4;
case IS, AS -> 5;
case LT, LTE, GT, GTE -> 6;
case PLUS, MINUS -> 7;
case MULT, DIV, INT_DIV, MOD -> 8;
case POW -> 9;
case DOT, QDOT -> 10;
};
}
private static Associativity getAssociativity(Operator op) {
return switch (op) {
case POW, NULL_COALESCE -> Associativity.RIGHT;
default -> Associativity.LEFT;
};
}
private static @Nullable Operator getHighestPrecedence(List<Expr> exprs, int min) {
var highest = -1;
Operator op = null;
for (var expr : exprs) {
if (expr instanceof OperatorExpr o) {
var precedence = getPrecedence(o.getOp());
if (precedence > highest && precedence >= min) {
highest = precedence;
op = o.getOp();
}
}
}
return op;
}
private static int index(List<Expr> exprs, Associativity associativity, Operator op) {
if (associativity == Associativity.LEFT) {
for (var i = 0; i < exprs.size(); i++) {
if (exprs.get(i) instanceof OperatorExpr operator && operator.getOp() == op) {
return i;
}
}
} else {
for (var i = exprs.size() - 1; i >= 0; i--) {
if (exprs.get(i) instanceof OperatorExpr operator && operator.getOp() == op) {
return i;
}
}
}
return -1;
}
private static List<Expr> resolveOperator(
List<Expr> exprs, Associativity associativity, Operator op) {
var res = new ArrayList<>(exprs);
var i = index(res, associativity, op);
var left = res.get(i - 1);
var right = res.get(i + 1);
var span = left.span().endWith(right.span());
var binOp =
switch (op) {
case IS -> new TypeCheckExpr(left, ((TypeExpr) right).getType(), span);
case AS -> new TypeCastExpr(left, ((TypeExpr) right).getType(), span);
default -> new BinaryOperatorExpr(left, right, op, span);
};
res.remove(i - 1);
res.remove(i - 1);
res.remove(i - 1);
res.add(i - 1, binOp);
return res;
}
/**
* Resolve all operators based on their precedence and associativity. This requires that the list
* has a valid form: `expr` `op` `expr` ...
*/
public static Expr resolveOperators(List<Expr> exprs) {
if (exprs.size() == 1) return exprs.get(0);
var res = resolveOperatorsHigherThan(exprs, 0);
if (res.size() > 1) {
throw new ParserError(
"Malformed expression", exprs.get(0).span().endWith(exprs.get(exprs.size() - 1).span()));
}
return res.get(0);
}
public static List<Expr> resolveOperatorsHigherThan(List<Expr> exprs, int minPrecedence) {
var res = exprs;
var highest = getHighestPrecedence(res, minPrecedence);
while (highest != null) {
var associativity = getAssociativity(highest);
res = resolveOperator(res, associativity, highest);
highest = getHighestPrecedence(res, minPrecedence);
}
return res;
}
}

View File

@@ -29,6 +29,7 @@ import org.pkl.parser.syntax.ClassProperty;
import org.pkl.parser.syntax.DocComment;
import org.pkl.parser.syntax.Expr;
import org.pkl.parser.syntax.Expr.AmendsExpr;
import org.pkl.parser.syntax.Expr.BinaryOperatorExpr;
import org.pkl.parser.syntax.Expr.BoolLiteralExpr;
import org.pkl.parser.syntax.Expr.FloatLiteralExpr;
import org.pkl.parser.syntax.Expr.FunctionLiteralExpr;
@@ -41,7 +42,6 @@ import org.pkl.parser.syntax.Expr.MultiLineStringLiteralExpr;
import org.pkl.parser.syntax.Expr.NewExpr;
import org.pkl.parser.syntax.Expr.NonNullExpr;
import org.pkl.parser.syntax.Expr.NullLiteralExpr;
import org.pkl.parser.syntax.Expr.OperatorExpr;
import org.pkl.parser.syntax.Expr.OuterExpr;
import org.pkl.parser.syntax.Expr.ParenthesizedExpr;
import org.pkl.parser.syntax.Expr.QualifiedAccessExpr;
@@ -802,52 +802,50 @@ public class Parser {
@SuppressWarnings("DuplicatedCode")
private Expr parseExpr(@Nullable String expectation) {
List<Expr> exprs = new ArrayList<>();
exprs.add(parseExprAtom(expectation));
return parseExpr(expectation, 1);
}
private Expr parseExpr(@Nullable String expectation, int minPrecedence) {
var expr = parseExprAtom(expectation);
var op = getOperator();
loop:
while (op != null) {
if (op.getPrec() < minPrecedence) break;
// `-` must be in the same line as the left operand and have no semicolons inbetween
if (op == Operator.MINUS && (precededBySemicolon || _lookahead.newLinesBetween > 0)) break;
next(); // operator
switch (op) {
case IS, AS -> {
exprs.add(new OperatorExpr(op, next().span));
exprs.add(new Expr.TypeExpr(parseType()));
var precedence = OperatorResolver.getPrecedence(op);
exprs = OperatorResolver.resolveOperatorsHigherThan(exprs, precedence);
case IS -> {
var type = parseType();
expr = new Expr.TypeCheckExpr(expr, type, expr.span().endWith(type.span()));
}
case MINUS -> {
if (!precededBySemicolon && _lookahead.newLinesBetween == 0) {
exprs.add(new OperatorExpr(op, next().span));
exprs.add(parseExprAtom(expectation));
} else {
break loop;
}
case AS -> {
var type = parseType();
expr = new Expr.TypeCastExpr(expr, type, expr.span().endWith(type.span()));
}
case DOT, QDOT -> {
// this exists just to keep backward compatibility with code as `x + y as List.distinct`
// which should be removed at some point
next();
var expr = exprs.remove(exprs.size() - 1);
var rhs = parseIdentifier();
var isNullable = op == Operator.QDOT;
var identifier = parseIdentifier();
ArgumentList argumentList = null;
if (lookahead == Token.LPAREN
&& !precededBySemicolon
&& _lookahead.newLinesBetween == 0) {
argumentList = parseArgumentList();
}
var lastSpan = argumentList != null ? argumentList.span() : identifier.span();
exprs.add(
var lastSpan = argumentList != null ? argumentList.span() : rhs.span();
expr =
new QualifiedAccessExpr(
expr, identifier, isNullable, argumentList, expr.span().endWith(lastSpan)));
expr, rhs, isNullable, argumentList, expr.span().endWith(lastSpan));
}
default -> {
exprs.add(new OperatorExpr(op, next().span));
exprs.add(parseExprAtom(expectation));
var nextMinPrec = op.isLeftAssoc() ? op.getPrec() + 1 : op.getPrec();
var rhs = parseExpr(expectation, nextMinPrec);
expr = new BinaryOperatorExpr(expr, rhs, op, expr.span().endWith(rhs.span()));
}
}
op = getOperator();
}
return OperatorResolver.resolveOperators(exprs);
return expr;
}
private @Nullable Operator getOperator() {

View File

@@ -667,65 +667,4 @@ public abstract sealed class Expr extends AbstractNode {
return (Type) children.get(1);
}
}
/** This is a synthetic class only used at parse time. */
public static final class OperatorExpr extends Expr {
private final Operator op;
public OperatorExpr(Operator op, Span span) {
super(span, null);
this.op = op;
}
@Override
public <T> T accept(ParserVisitor<? extends T> visitor) {
// should never be called
throw new RuntimeException("Unreacheable code");
}
public Operator getOp() {
return op;
}
@Override
public String toString() {
return "OperatorExpr{op=" + op + ", span=" + span + '}';
}
@SuppressWarnings("ConstantValue")
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
OperatorExpr that = (OperatorExpr) o;
return op == that.op && Objects.equals(span, that.span);
}
@Override
public int hashCode() {
return Objects.hash(op, span);
}
}
/** This is a synthetic class only used at parse time. */
public static final class TypeExpr extends Expr {
public TypeExpr(Type type) {
super(type.span(), List.of(type));
}
@Override
public <T> T accept(ParserVisitor<? extends T> visitor) {
// should never be called
throw new RuntimeException("Unreacheable code");
}
public Type getType() {
assert children != null;
return (Type) children.get(0);
}
}
}

View File

@@ -16,25 +16,41 @@
package org.pkl.parser.syntax;
public enum Operator {
POW,
MULT,
DIV,
INT_DIV,
MOD,
PLUS,
MINUS,
LT,
GT,
LTE,
GTE,
IS,
AS,
EQ_EQ,
NOT_EQ,
AND,
OR,
PIPE,
NULL_COALESCE,
DOT,
QDOT,
NULL_COALESCE(1, false),
PIPE(2, true),
OR(3, true),
AND(4, true),
EQ_EQ(5, true),
NOT_EQ(5, true),
IS(6, true),
AS(6, true),
LT(7, true),
GT(7, true),
LTE(7, true),
GTE(7, true),
PLUS(8, true),
MINUS(8, true),
MULT(9, true),
DIV(9, true),
INT_DIV(9, true),
MOD(9, true),
POW(10, false),
DOT(11, true),
QDOT(11, true);
private final int prec;
private final boolean isLeftAssoc;
Operator(int prec, boolean isLeftAssoc) {
this.prec = prec;
this.isLeftAssoc = isLeftAssoc;
}
public int getPrec() {
return prec;
}
public boolean isLeftAssoc() {
return isLeftAssoc;
}
}

View File

@@ -46,9 +46,4 @@ class LexerTest {
assertThat(Lexer.maybeQuoteIdentifier("this")).isEqualTo("`this`")
assertThat(Lexer.maybeQuoteIdentifier("😀")).isEqualTo("`😀`")
}
@Test
fun `lexer keywords are sorted`() {
assertThat(Lexer.KEYWORDS).isSortedAccordingTo { a, b -> a.compareTo(b.name) }
}
}

View File

@@ -381,10 +381,6 @@ class SexpRenderer {
buf.append(tab)
buf.append("(floatLiteralExpr)")
}
is StringConstant -> {
buf.append(tab)
buf.append("(stringConstantExpr)")
}
is SingleLineStringLiteralExpr -> renderSingleLineStringLiteral(expr)
is MultiLineStringLiteralExpr -> renderMultiLineStringLiteral(expr)
is ThrowExpr -> renderThrowExpr(expr)
@@ -412,8 +408,6 @@ class SexpRenderer {
is BinaryOperatorExpr -> renderBinaryOpExpr(expr)
is TypeCheckExpr -> renderTypeCheckExpr(expr)
is TypeCastExpr -> renderTypeCastExpr(expr)
is OperatorExpr -> throw RuntimeException("Operator expr should not exist after parsing")
is TypeExpr -> throw RuntimeException("Type expr should not exist after parsing")
}
}