Skip to content

Commit a026ae0

Browse files
authored
feat: complete Query statements of FlinkSQL (#93)
* feat: add inlineDataValueClasue rule * test: update tests of select statements * feat: support flinksql window TVF grammar * test: flink sql windown TVF statement test * feat: support grouping sets grammar * test: window TVF Aggregation and Group Window Aggregation tests * test: supplemental selectAggregation with test cases * test: add Having statement test case * feat: support flinkSql over aggregation grammar * test: add over aggregation grammar test cases * test: flink sql join statement test cases * test: flink sql set Operations grammar test cases * test: flink sql limit clause test case * feat: remove allPlusUid and replace with uid * feat: support flink sql pattern recognition grammar * test: flink sql pattern recognition tests * feat: add flink sql with clause rule * test: flink sql with clasue select tests * feat: rebuild flink sql parser
1 parent fbee70c commit a026ae0

19 files changed

Lines changed: 11790 additions & 8088 deletions

src/grammar/flinksql/FlinkSqlLexer.g4

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,24 @@ ENFORCED: 'ENFORCED';
273273
METADATA: 'METADATA';
274274
VIRTUAL: 'VIRTUAL';
275275
ZONE: 'ZONE';
276+
TUMBLE: 'TUMBLE';
277+
HOP: 'HOP';
278+
CUMULATE: 'CUMULATE';
279+
DESCRIPTOR: 'DESCRIPTOR';
280+
TIMECOL: 'TIMECOL';
281+
SIZE: 'SIZE';
282+
OFFSET: 'OFFSET';
283+
STEP: 'STEP';
284+
SLIDE: 'SLIDE';
285+
SESSION: 'SESSION';
286+
MATCH_RECOGNIZE: 'MATCH_RECOGNIZE';
287+
MEASURES: 'MEASURES';
288+
PATTERN: 'PATTERN';
289+
ONE: 'ONE';
290+
PER: 'PER';
291+
KW_SKIP: 'SKIP';
292+
PAST: 'PAST';
293+
DEFINE: 'DEFINE';
276294

277295
// DATA TYPE Keywords
278296

@@ -330,6 +348,8 @@ LS_BRACKET: '[';
330348
RS_BRACKET: ']';
331349
LR_BRACKET: '(';
332350
RR_BRACKET: ')';
351+
LB_BRACKET: '{';
352+
RB_BRACKET: '}';
333353
COMMA: ',';
334354
SEMICOLON: ';';
335355
AT_SIGN: '@';
@@ -345,8 +365,8 @@ PENCENT_SIGN: '%';
345365
DOUBLE_VERTICAL_SIGN: '||';
346366
DOUBLE_HYPNEN_SIGN: '--';
347367
SLASH_SIGN: '/';
368+
QUESTION_MARK_SIGN: '?';
348369
DOT_ID: '.' ID_LITERAL_FRAG;
349-
PLUS_DOT_ID: (':' | '.') PLUS_ID_LITERAL;
350370
STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING;
351371
DIG_LITERAL: DEC_DIGIT+;
352372
REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+
@@ -355,14 +375,13 @@ REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+
355375
| DEC_DIGIT+ EXPONENT_NUM_PART;
356376
BIT_STRING: BIT_STRING_L;
357377
ID_LITERAL: ID_LITERAL_FRAG;
358-
PLUS_ID_LITERAL: PLUS_ID_LITERAL_FRAG;
359378
FILE_PATH: FILE_PATH_STRING;
379+
DOUBLE_ARROW: '=>';
360380

361381
fragment FILE_PATH_STRING: ([/\\] (~([/\\ ]))*)+;
362382
fragment JAR_FILE_PARTTARN: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
363383
fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+;
364384
fragment ID_LITERAL_FRAG: [A-Z_0-9a-z]*?[A-Z_a-z]+?[A-Z_0-9a-z]*;
365-
fragment PLUS_ID_LITERAL_FRAG: [A-Z_0-9a-z*@#^$%&{}]*?[A-Z_a-z*@#^$%&{}]+?[A-Z_0-9a-z*@#^$%&{}]*;
366385
fragment DEC_DIGIT: [0-9];
367386
fragment DEC_LETTER: [A-Za-z];
368387
fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';

src/grammar/flinksql/FlinkSqlParser.g4

Lines changed: 159 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ physicalColumnDefinition
134134
;
135135

136136
columnName
137-
: plusUid | expression
137+
: uid | expression
138138
;
139139

140140
columnNameList
@@ -380,7 +380,7 @@ insertMulStatement
380380

381381
queryStatement
382382
: valuesCaluse
383-
| WITH withItem (COMMA withItem)* queryStatement
383+
| withClause queryStatement
384384
| '(' queryStatement ')'
385385
| left=queryStatement operator=(INTERSECT | UNION | EXCEPT) ALL? right=queryStatement orderByCaluse? limitClause?
386386
| selectClause orderByCaluse? limitClause?
@@ -391,6 +391,10 @@ valuesCaluse
391391
: VALUES expression (COMMA expression )*
392392
;
393393

394+
withClause
395+
: WITH withItem (COMMA withItem)*
396+
;
397+
394398
withItem
395399
: withItemName (LR_BRACKET columnName (COMMA columnName)* RR_BRACKET)? AS LR_BRACKET queryStatement RR_BRACKET
396400
;
@@ -401,14 +405,21 @@ withItemName
401405

402406
selectStatement
403407
: selectClause fromClause whereClause? groupByClause? havingClause? windowClause?
408+
| selectClause fromClause matchRecognizeClause
404409
;
405410

406411
selectClause
407412
: SELECT setQuantifier? (ASTERISK_SIGN | projectItemDefinition (COMMA projectItemDefinition)*)
408413
;
409414

410415
projectItemDefinition
411-
: expression (AS? expression)?
416+
: overWindowItem
417+
| expression (AS? expression)?
418+
;
419+
420+
overWindowItem
421+
: primaryExpression OVER windowSpec AS strictIdentifier
422+
| primaryExpression OVER errorCapturingIdentifier AS strictIdentifier
412423
;
413424

414425
fromClause
@@ -419,6 +430,8 @@ tableExpression
419430
: tableReference (COMMA tableReference)*
420431
| tableExpression NATURAL? (LEFT | RIGHT | FULL | INNER)? OUTER? JOIN tableExpression joinCondition?
421432
| tableExpression CROSS JOIN tableExpression
433+
| inlineDataValueClause
434+
| windoTVFClause
422435
;
423436

424437
tableReference
@@ -444,6 +457,46 @@ dateTimeExpression
444457
: expression
445458
;
446459

460+
inlineDataValueClause
461+
: LR_BRACKET valuesDefinition RR_BRACKET tableAlias
462+
;
463+
464+
windoTVFClause
465+
: TABLE LR_BRACKET windowTVFExression RR_BRACKET
466+
;
467+
468+
windowTVFExression
469+
: windoTVFName LR_BRACKET windowTVFParam (COMMA windowTVFParam)* RR_BRACKET
470+
;
471+
472+
windoTVFName
473+
: TUMBLE
474+
| HOP
475+
| CUMULATE
476+
;
477+
478+
windowTVFParam
479+
: TABLE timeAttrColumn
480+
| columnDescriptor
481+
| timeIntervalExpression
482+
| DATA DOUBLE_ARROW TABLE timeAttrColumn
483+
| TIMECOL DOUBLE_ARROW columnDescriptor
484+
| timeIntervalParamName DOUBLE_ARROW timeIntervalExpression
485+
;
486+
487+
timeIntervalParamName
488+
: DATA
489+
| TIMECOL
490+
| SIZE
491+
| OFFSET
492+
| STEP
493+
| SLIDE
494+
;
495+
496+
columnDescriptor
497+
: DESCRIPTOR LR_BRACKET uid RR_BRACKET
498+
;
499+
447500
joinCondition
448501
: ON booleanExpression
449502
| USING LR_BRACKET uid (COMMA uid)* RR_BRACKET
@@ -459,27 +512,38 @@ groupByClause
459512

460513
groupItemDefinition
461514
: expression
515+
| groupWindowFunction
462516
| LR_BRACKET RR_BRACKET
463517
| LR_BRACKET expression (COMMA expression)* RR_BRACKET
464-
| CUBE LR_BRACKET expression (COMMA expression)* RR_BRACKET
465-
| ROLLUP LR_BRACKET expression (COMMA expression)* RR_BRACKET
466-
| GROUPING SETS LR_BRACKET groupItemDefinition (COMMA groupItemDefinition)* RR_BRACKET
518+
| groupingSetsNotaionName LR_BRACKET expression (COMMA expression)* RR_BRACKET
519+
| groupingSets LR_BRACKET groupItemDefinition (COMMA groupItemDefinition)* RR_BRACKET
467520
;
468521

469-
havingClause
470-
: HAVING booleanExpression
522+
groupingSets
523+
: GROUPING SETS
471524
;
472525

473-
orderByCaluse
474-
: ORDER BY orderItemDefition (COMMA orderItemDefition)*
526+
groupingSetsNotaionName
527+
: CUBE
528+
| ROLLUP
475529
;
476530

477-
orderItemDefition
478-
: expression (ASC | DESC)?
531+
groupWindowFunction
532+
: groupWindowFunctionName LR_BRACKET timeAttrColumn COMMA timeIntervalExpression RR_BRACKET
479533
;
480534

481-
limitClause
482-
: LIMIT (ALL | limit=expression)
535+
groupWindowFunctionName
536+
: TUMBLE
537+
| HOP
538+
| SESSION
539+
;
540+
541+
timeAttrColumn
542+
: uid
543+
;
544+
545+
havingClause
546+
: HAVING booleanExpression
483547
;
484548

485549
windowClause
@@ -492,26 +556,99 @@ namedWindow
492556

493557
windowSpec
494558
: name=errorCapturingIdentifier?
495-
'('
496-
(ORDER BY sortItem (',' sortItem)*)?
497-
(PARTITION BY expression (',' expression)*)?
559+
LR_BRACKET
560+
partitionByClause?
561+
orderByCaluse?
498562
windowFrame?
499-
')'
563+
RR_BRACKET
500564
;
501565

502-
sortItem
566+
matchRecognizeClause
567+
: MATCH_RECOGNIZE
568+
LR_BRACKET
569+
partitionByClause?
570+
orderByCaluse?
571+
measuresClause?
572+
outputMode?
573+
afterMatchStrategy?
574+
patternDefination?
575+
patternVariablesDefination
576+
RR_BRACKET ( AS? strictIdentifier )?
577+
;
578+
579+
orderByCaluse
580+
: ORDER BY orderItemDefition (COMMA orderItemDefition)*
581+
;
582+
583+
orderItemDefition
503584
: expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))?
504585
;
505586

587+
limitClause
588+
: LIMIT (ALL | limit=expression)
589+
;
590+
591+
partitionByClause
592+
: PARTITION BY expression (COMMA expression)*
593+
;
594+
595+
quantifiers
596+
: (ASTERISK_SIGN)
597+
| (ADD_SIGN)
598+
| (QUESTION_MARK_SIGN)
599+
| (LB_BRACKET DIG_LITERAL COMMA DIG_LITERAL RB_BRACKET)
600+
| (LB_BRACKET DIG_LITERAL COMMA RB_BRACKET)
601+
| (LB_BRACKET COMMA DIG_LITERAL RB_BRACKET)
602+
;
603+
604+
measuresClause
605+
: MEASURES projectItemDefinition (COMMA projectItemDefinition)*
606+
;
607+
608+
patternDefination
609+
: PATTERN
610+
LR_BRACKET
611+
patternVariable+
612+
RR_BRACKET
613+
withinClause?
614+
;
615+
616+
patternVariable
617+
: unquotedIdentifier quantifiers?
618+
;
619+
620+
outputMode
621+
: ALL ROWS PER MATCH
622+
| ONE ROW PER MATCH
623+
;
624+
625+
afterMatchStrategy
626+
: AFTER MATCH KW_SKIP PAST LAST ROW
627+
| AFTER MATCH KW_SKIP TO NEXT ROW
628+
| AFTER MATCH KW_SKIP TO LAST unquotedIdentifier
629+
| AFTER MATCH KW_SKIP TO FIRST unquotedIdentifier
630+
;
631+
632+
patternVariablesDefination
633+
: DEFINE projectItemDefinition (COMMA projectItemDefinition)*
634+
;
635+
506636
windowFrame
507-
: RANGE frameBound
508-
| ROWS frameBound
637+
: RANGE BETWEEN timeIntervalExpression frameBound
638+
| ROWS BETWEEN DIG_LITERAL frameBound
509639
;
510640

511641
frameBound
512-
: expression PRECEDING
642+
: PRECEDING AND CURRENT ROW
643+
;
644+
645+
withinClause
646+
: WITHIN timeIntervalExpression
513647
;
514648

649+
timeIntervalExpression
650+
: INTERVAL STRING_LITERAL ID_LITERAL
651+
;
515652

516653
// expression
517654

@@ -686,10 +823,6 @@ uid
686823
: ID_LITERAL DOT_ID*?
687824
;
688825

689-
plusUid // 匹配 xxx.$xx xx:xxxx 等字符
690-
: (ID_LITERAL | PLUS_ID_LITERAL) (DOT_ID | PLUS_DOT_ID)*?
691-
;
692-
693826
withOption
694827
: WITH tablePropertyList
695828
;

0 commit comments

Comments
 (0)