Initial commit

This commit is contained in:
Peter Niederwieser
2016-01-19 14:51:19 +01:00
committed by Dan Chao
commit ecad035dca
2972 changed files with 211653 additions and 0 deletions
+170
View File
@@ -0,0 +1,170 @@
ABSTRACT=1
AMENDS=2
AS=3
CLASS=4
ELSE=5
EXTENDS=6
EXTERNAL=7
FALSE=8
FINAL=9
FOR=10
FUNCTION=11
HIDDEN_=12
IF=13
IMPORT=14
IMPORT_GLOB=15
IN=16
IS=17
LET=18
LOCAL=19
MODULE=20
NEW=21
NOTHING=22
NULL=23
OPEN=24
OUT=25
OUTER=26
READ=27
READ_GLOB=28
READ_OR_NULL=29
SUPER=30
THIS=31
THROW=32
TRACE=33
TRUE=34
TYPE_ALIAS=35
UNKNOWN=36
WHEN=37
LPAREN=38
RPAREN=39
LBRACE=40
RBRACE=41
LBRACK=42
RBRACK=43
LPRED=44
COMMA=45
DOT=46
QDOT=47
COALESCE=48
NON_NULL=49
AT=50
ASSIGN=51
GT=52
LT=53
NOT=54
QUESTION=55
COLON=56
ARROW=57
EQUAL=58
NOT_EQUAL=59
LTE=60
GTE=61
AND=62
OR=63
PLUS=64
MINUS=65
POW=66
MUL=67
DIV=68
INT_DIV=69
MOD=70
UNION=71
PIPE=72
SPREAD=73
QSPREAD=74
SLQuote=75
MLQuote=76
IntLiteral=77
FloatLiteral=78
Identifier=79
NewlineSemicolon=80
Whitespace=81
DocComment=82
BlockComment=83
LineComment=84
ShebangComment=85
SLEndQuote=86
SLInterpolation=87
SLUnicodeEscape=88
SLCharacterEscape=89
SLCharacters=90
MLEndQuote=91
MLInterpolation=92
MLUnicodeEscape=93
MLCharacterEscape=94
MLNewline=95
MLCharacters=96
'abstract'=1
'amends'=2
'as'=3
'class'=4
'else'=5
'extends'=6
'external'=7
'false'=8
'final'=9
'for'=10
'function'=11
'hidden'=12
'if'=13
'import'=14
'import*'=15
'in'=16
'is'=17
'let'=18
'local'=19
'module'=20
'new'=21
'nothing'=22
'null'=23
'open'=24
'out'=25
'outer'=26
'read'=27
'read*'=28
'read?'=29
'super'=30
'this'=31
'throw'=32
'trace'=33
'true'=34
'typealias'=35
'unknown'=36
'when'=37
'('=38
')'=39
'{'=40
'}'=41
'['=42
']'=43
'[['=44
','=45
'.'=46
'?.'=47
'??'=48
'!!'=49
'@'=50
'='=51
'>'=52
'<'=53
'!'=54
'?'=55
':'=56
'->'=57
'=='=58
'!='=59
'<='=60
'>='=61
'&&'=62
'||'=63
'+'=64
'-'=65
'**'=66
'*'=67
'/'=68
'~/'=69
'%'=70
'|'=71
'|>'=72
'...'=73
'...?'=74
+387
View File
@@ -0,0 +1,387 @@
/**
* Copyright © 2024 Apple Inc. and the Pkl project authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
lexer grammar PklLexer;
@header {
package org.pkl.core.parser.antlr;
}
@members {
class StringInterpolationScope {
int parenLevel = 0;
int poundLength = 0;
}
java.util.Deque<StringInterpolationScope> interpolationScopes = new java.util.ArrayDeque<>();
StringInterpolationScope interpolationScope;
{ pushInterpolationScope(); }
void pushInterpolationScope() {
interpolationScope = new StringInterpolationScope();
interpolationScopes.push(interpolationScope);
}
void incParenLevel() {
interpolationScope.parenLevel += 1;
}
void decParenLevel() {
if (interpolationScope.parenLevel == 0) {
// guard against syntax errors
if (interpolationScopes.size() > 1) {
interpolationScopes.pop();
interpolationScope = interpolationScopes.peek();
popMode();
}
} else {
interpolationScope.parenLevel -= 1;
}
}
boolean isPounds() {
// optimize for common cases (0, 1)
switch (interpolationScope.poundLength) {
case 0: return true;
case 1: return _input.LA(1) == '#';
default:
int poundLength = interpolationScope.poundLength;
for (int i = 1; i <= poundLength; i++) {
if (_input.LA(i) != '#') return false;
}
return true;
}
}
boolean isQuote() {
return _input.LA(1) == '"';
}
boolean endsWithPounds(String text) {
assert text.length() >= 2;
// optimize for common cases (0, 1)
switch (interpolationScope.poundLength) {
case 0: return true;
case 1: return text.charAt(text.length() - 1) == '#';
default:
int poundLength = interpolationScope.poundLength;
int textLength = text.length();
if (textLength < poundLength) return false;
int stop = textLength - poundLength;
for (int i = textLength - 1; i >= stop; i--) {
if (text.charAt(i) != '#') return false;
}
return true;
}
}
void removeBackTicks() {
String text = getText();
setText(text.substring(1, text.length() - 1));
}
// look ahead in predicate rather than consume in grammar so that newlines
// go to NewlineSemicolonChannel, which is important for consumers of that channel
boolean isNewlineOrEof() {
int input = _input.LA(1);
return input == '\n' || input == '\r' || input == IntStream.EOF;
}
}
channels {
NewlineSemicolonChannel,
WhitespaceChannel,
CommentsChannel,
ShebangChannel
}
ABSTRACT : 'abstract';
AMENDS : 'amends';
AS : 'as';
CLASS : 'class';
CONST : 'const';
ELSE : 'else';
EXTENDS : 'extends';
EXTERNAL : 'external';
FALSE : 'false';
FIXED : 'fixed';
FOR : 'for';
FUNCTION : 'function';
HIDDEN_ : 'hidden';
IF : 'if';
IMPORT : 'import';
IMPORT_GLOB : 'import*';
IN : 'in';
IS : 'is';
LET : 'let';
LOCAL : 'local';
MODULE : 'module';
NEW : 'new';
NOTHING : 'nothing';
NULL : 'null';
OPEN : 'open';
OUT : 'out';
OUTER : 'outer';
READ : 'read';
READ_GLOB : 'read*';
READ_OR_NULL : 'read?';
SUPER : 'super';
THIS : 'this';
THROW : 'throw';
TRACE : 'trace';
TRUE : 'true';
TYPE_ALIAS : 'typealias';
UNKNOWN : 'unknown';
WHEN : 'when';
// reserved for future use, but not used today
PROTECTED : 'protected';
OVERRIDE : 'override';
RECORD : 'record';
DELETE : 'delete';
CASE : 'case';
SWITCH : 'switch';
VARARG : 'vararg';
LPAREN : '(' { incParenLevel(); };
RPAREN : ')' { decParenLevel(); };
LBRACE : '{';
RBRACE : '}';
LBRACK : '[';
RBRACK : ']';
LPRED : '[['; // No RPRED, because that lexes too eager to allow nested index expressions, e.g. foo[bar[baz]]
COMMA : ',';
DOT : '.';
QDOT : '?.';
COALESCE : '??';
NON_NULL : '!!';
AT : '@';
ASSIGN : '=';
GT : '>';
LT : '<';
NOT : '!';
QUESTION : '?';
COLON : ':';
ARROW : '->';
EQUAL : '==';
NOT_EQUAL : '!=';
LTE : '<=';
GTE : '>=';
AND : '&&';
OR : '||';
PLUS : '+';
MINUS : '-';
POW : '**';
STAR : '*';
DIV : '/';
INT_DIV : '~/';
MOD : '%';
UNION : '|';
PIPE : '|>';
SPREAD : '...';
QSPREAD : '...?';
UNDERSCORE : '_';
SLQuote : '#'* '"' { interpolationScope.poundLength = getText().length() - 1; } -> pushMode(SLString);
MLQuote : '#'* '"""' { interpolationScope.poundLength = getText().length() - 3; } -> pushMode(MLString);
IntLiteral
: DecimalLiteral
| HexadecimalLiteral
| BinaryLiteral
| OctalLiteral
;
// leading zeros are allowed (cf. Swift)
fragment DecimalLiteral
: DecimalDigit DecimalDigitCharacters?
;
fragment DecimalDigitCharacters
: DecimalDigitCharacter+
;
fragment DecimalDigitCharacter
: DecimalDigit
| '_'
;
fragment DecimalDigit
: [0-9]
;
fragment HexadecimalLiteral
: '0x' HexadecimalCharacter+ // intentionally allow underscore after '0x'; e.g. `0x_ab`. We will throw an error in AstBuilder.
;
fragment HexadecimalCharacter
: [0-9a-fA-F_]
;
fragment BinaryLiteral
: '0b' BinaryCharacter+ // intentionally allow underscore after '0b'; e.g. `0b_11`. We will throw an error in AstBuilder.
;
fragment BinaryCharacter
: [01_]
;
fragment OctalLiteral
: '0o' OctalCharacter+ // intentionally allow underscore after '0o'; e.g. `0o_34`. We will throw an error in AstBuilder.
;
fragment OctalCharacter
: [0-7_]
;
FloatLiteral
: DecimalLiteral? '.' '_'? DecimalLiteral Exponent? // intentionally allow underscore. We will throw an error in AstBuilder.
| DecimalLiteral Exponent
;
fragment Exponent
: [eE] [+-]? '_'? DecimalLiteral // intentionally allow underscore. We will throw an error in AstBuilder.
;
Identifier
: RegularIdentifier
| QuotedIdentifier { removeBackTicks(); }
;
// Note: Keep in sync with Lexer.isRegularIdentifier()
fragment RegularIdentifier
: IdentifierStart IdentifierPart*
;
fragment QuotedIdentifier
: '`' (~'`')+ '`'
;
fragment
IdentifierStart
: [a-zA-Z$_] // handle common cases without a predicate
| . {Character.isUnicodeIdentifierStart(_input.LA(-1))}?
;
fragment
IdentifierPart
: [a-zA-Z0-9$_] // handle common cases without a predicate
| . {Character.isUnicodeIdentifierPart(_input.LA(-1))}?
;
NewlineSemicolon
: [\r\n;]+ -> channel(NewlineSemicolonChannel)
;
// Note: Java, Scala, and Swift treat \f as whitespace; Dart doesn't.
// Python and C also include vertical tab.
// C# also includes Unicode class Zs (separator, space).
Whitespace
: [ \t\f]+ -> channel(WhitespaceChannel)
;
DocComment
: ([ \t\f]* '///' .*? (Newline|EOF))+
;
BlockComment
: '/*' (BlockComment | .)*? '*/' -> channel(CommentsChannel)
;
LineComment
: '//' .*? {isNewlineOrEof()}? -> channel(CommentsChannel)
;
ShebangComment
: '#!' .*? {isNewlineOrEof()}? -> channel(ShebangChannel)
;
// strict: '\\' Pounds 'u{' HexDigit (HexDigit (HexDigit (HexDigit (HexDigit (HexDigit (HexDigit HexDigit? )?)?)?)?)?)? '}'
fragment UnicodeEscape
: '\\' Pounds 'u{' ~[}\r\n "]* '}'?
;
// strict: '\\' Pounds [tnr"\\]
fragment CharacterEscape
: '\\' Pounds .
;
fragment Pounds
: { interpolationScope.poundLength == 0 }?
| '#' { interpolationScope.poundLength == 1 }?
| '#'+ { endsWithPounds(getText()) }?
;
fragment Newline
: '\n' | '\r' '\n'?
;
mode SLString;
// strict: '"' Pounds
SLEndQuote
: ('"' Pounds | Newline ) -> popMode
;
SLInterpolation
: '\\' Pounds '(' { pushInterpolationScope(); } -> pushMode(DEFAULT_MODE)
;
SLUnicodeEscape
: UnicodeEscape
;
SLCharacterEscape
: CharacterEscape
;
SLCharacters
: ~["\\\r\n]+ SLCharacters?
| ["\\] {!isPounds()}? SLCharacters?
;
mode MLString;
MLEndQuote
: '"""' Pounds -> popMode
;
MLInterpolation
: '\\' Pounds '(' { pushInterpolationScope(); } -> pushMode(DEFAULT_MODE)
;
MLUnicodeEscape
: UnicodeEscape
;
MLCharacterEscape
: CharacterEscape
;
MLNewline
: Newline
;
MLCharacters
: ~["\\\r\n]+ MLCharacters?
| ('\\' | '"""') {!isPounds()}? MLCharacters?
| '"' '"'? {!isQuote()}? MLCharacters?
;
+255
View File
@@ -0,0 +1,255 @@
/**
* Copyright © 2024 Apple Inc. and the Pkl project authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
parser grammar PklParser;
@header {
package org.pkl.core.parser.antlr;
}
@members {
/**
* Returns true if and only if the next token to be consumed is not preceded by a newline or semicolon.
*/
boolean noNewlineOrSemicolon() {
for (int i = _input.index() - 1; i >= 0; i--) {
Token token = _input.get(i);
int channel = token.getChannel();
if (channel == PklLexer.DEFAULT_TOKEN_CHANNEL) return true;
if (channel == PklLexer.NewlineSemicolonChannel) return false;
}
return true;
}
}
options {
tokenVocab = PklLexer;
}
replInput
: ((moduleDecl
| importClause
| clazz
| typeAlias
| classProperty
| classMethod
| expr))* EOF
;
exprInput
: expr EOF
;
module
: moduleDecl? (is+=importClause)* ((cs+=clazz | ts+=typeAlias | ps+=classProperty | ms+=classMethod))* EOF
;
moduleDecl
: t=DocComment? annotation* moduleHeader
;
moduleHeader
: modifier* 'module' qualifiedIdentifier moduleExtendsOrAmendsClause?
| moduleExtendsOrAmendsClause
;
moduleExtendsOrAmendsClause
: t=('extends' | 'amends') stringConstant
;
importClause
: t=('import' | 'import*') stringConstant ('as' Identifier)?
;
clazz
: t=DocComment? annotation* classHeader classBody?
;
classHeader
: modifier* 'class' Identifier typeParameterList? ('extends' type)?
;
modifier
: t=('external' | 'abstract' | 'open' | 'local' | 'hidden' | 'fixed' | 'const')
;
classBody
: '{' ((ps+=classProperty | ms+=classMethod))* err='}'?
;
typeAlias
: t=DocComment? annotation* typeAliasHeader '=' type
;
typeAliasHeader
: modifier* 'typealias' Identifier typeParameterList?
;
// allows `foo: Bar { ... }` s.t. AstBuilder can provide better error message
classProperty
: t=DocComment? annotation* modifier* Identifier (typeAnnotation | typeAnnotation? ('=' expr | objectBody+))
;
classMethod
: t=DocComment? annotation* methodHeader ('=' expr)?
;
methodHeader
: modifier* 'function' Identifier typeParameterList? parameterList typeAnnotation?
;
parameterList
: '(' (ts+=parameter (errs+=','? ts+=parameter)*)? err=')'?
;
argumentList
: {noNewlineOrSemicolon()}? '(' (es+=expr (errs+=','? es+=expr)*)? err=')'?
;
annotation
: '@' type objectBody?
;
qualifiedIdentifier
: ts+=Identifier ('.' ts+=Identifier)*
;
typeAnnotation
: ':' type
;
typeParameterList
: '<' ts+=typeParameter (errs+=','? ts+=typeParameter)* err='>'?
;
typeParameter
: t=('in' | 'out')? Identifier
;
typeArgumentList
: '<' ts+=type (errs+=','? ts+=type)* err='>'?
;
type
: 'unknown' # unknownType
| 'nothing' # nothingType
| 'module' # moduleType
| stringConstant # stringLiteralType
| qualifiedIdentifier typeArgumentList? # declaredType
| '(' type err=')'? # parenthesizedType
| type '?' # nullableType
| type {noNewlineOrSemicolon()}? t='(' es+=expr (errs+=','? es+=expr)* err=')'? # constrainedType
| '*' u=type # defaultUnionType
| l=type '|' r=type # unionType
| t='(' (ps+=type (errs+=','? ps+=type)*)? err=')'? '->' r=type # functionType
;
typedIdentifier
: Identifier typeAnnotation?
;
parameter
: '_'
| typedIdentifier
;
// Many languages (e.g., Python) give `**` higher precedence than unary minus.
// The reason is that in Math, `-a^2` means `-(a^2)`.
// To avoid confusion, JS rejects `-a**2` and requires explicit parens.
// `-3.abs()` is a similar problem, handled differently by different languages.
expr
: 'this' # thisExpr
| 'outer' # outerExpr
| 'module' # moduleExpr
| 'null' # nullLiteral
| 'true' # trueLiteral
| 'false' # falseLiteral
| IntLiteral # intLiteral
| FloatLiteral # floatLiteral
| 'throw' '(' expr err=')'? # throwExpr
| 'trace' '(' expr err=')'? # traceExpr
| t=('import' | 'import*') '(' stringConstant err=')'? # importExpr
| t=('read' | 'read?' | 'read*') '(' expr err=')'? # readExpr
| Identifier argumentList? # unqualifiedAccessExpr
| t=SLQuote singleLineStringPart* t2=SLEndQuote # singleLineStringLiteral
| t=MLQuote multiLineStringPart* t2=MLEndQuote # multiLineStringLiteral
| t='new' type? objectBody # newExpr
| expr objectBody # amendExpr
| 'super' '.' Identifier argumentList? # superAccessExpr
| 'super' t='[' e=expr err=']'? # superSubscriptExpr
| expr t=('.' | '?.') Identifier argumentList? # qualifiedAccessExpr
| l=expr {noNewlineOrSemicolon()}? t='[' r=expr err=']'? # subscriptExpr
| expr '!!' # nonNullExpr
| '-' expr # unaryMinusExpr
| '!' expr # logicalNotExpr
| <assoc=right> l=expr t='**' r=expr # exponentiationExpr
// for some reason, moving rhs of rules starting with `l=expr` into a
// separate rule (to avoid repeated parsing of `expr`) messes up precedence
| l=expr t=('*' | '/' | '~/' | '%') r=expr # multiplicativeExpr
| l=expr (t='+' | {noNewlineOrSemicolon()}? t='-') r=expr # additiveExpr
| l=expr t=('<' | '>' | '<=' | '>=') r=expr # comparisonExpr
| l=expr t=('is' | 'as') r=type # typeTestExpr
| l=expr t=('==' | '!=') r=expr # equalityExpr
| l=expr t='&&' r=expr # logicalAndExpr
| l=expr t='||' r=expr # logicalOrExpr
| l=expr t='|>' r=expr # pipeExpr
| <assoc=right> l=expr t='??' r=expr # nullCoalesceExpr
| 'if' '(' c=expr err=')'? l=expr 'else' r=expr # ifExpr
| 'let' '(' parameter '=' l=expr err=')'? r=expr # letExpr
| parameterList '->' expr # functionLiteral
| '(' expr err=')'? # parenthesizedExpr
;
objectBody
: '{' (ps+=parameter (errs+=','? ps+=parameter)* '->')? objectMember* err='}'?
;
objectMember
: modifier* Identifier (typeAnnotation? '=' expr | objectBody+) # objectProperty
| methodHeader '=' expr # objectMethod
| t='[[' k=expr err1=']'? err2=']'? ('=' v=expr | objectBody+) # memberPredicate
| t='[' k=expr err1=']'? err2=']'? ('=' v=expr | objectBody+) # objectEntry
| expr # objectElement
| ('...' | '...?') expr # objectSpread
| 'when' '(' e=expr err=')'? (b1=objectBody ('else' b2=objectBody)?) # whenGenerator
| 'for' '(' t1=parameter (',' t2=parameter)? 'in' e=expr err=')'? objectBody # forGenerator
;
stringConstant
: t=SLQuote (ts+=SLCharacters | ts+=SLCharacterEscape | ts+=SLUnicodeEscape)* t2=SLEndQuote
;
singleLineStringPart
: SLInterpolation e=expr ')'
| (ts+=SLCharacters | ts+=SLCharacterEscape | ts+=SLUnicodeEscape)+
;
multiLineStringPart
: MLInterpolation e=expr ')'
| (ts+=MLCharacters | ts+=MLNewline | ts+=MLCharacterEscape | ts+=MLUnicodeEscape)+
;
// intentionally unused
//TODO: we get a "Mismatched Input" error unless we introduce this parser rule. Why?
reservedKeyword
: 'protected'
| 'override'
| 'record'
| 'delete'
| 'case'
| 'switch'
| 'vararg'
| 'const'
;