2016-10-17 07:14:01 +03:00
|
|
|
package ast
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2023-08-12 21:54:14 +03:00
|
|
|
"log/slog"
|
2020-08-23 14:16:16 +03:00
|
|
|
"strings"
|
2016-10-17 07:14:01 +03:00
|
|
|
|
2023-11-20 04:06:36 +03:00
|
|
|
antlr "github.com/antlr4-go/antlr/v4"
|
2023-07-27 07:19:11 +03:00
|
|
|
|
2023-11-20 04:06:36 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/ast/internal/slq"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/errz"
|
2023-04-02 22:49:45 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lga"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/stringz"
|
2020-08-06 20:58:47 +03:00
|
|
|
)
|
2016-10-17 07:14:01 +03:00
|
|
|
|
2020-08-06 20:58:47 +03:00
|
|
|
// parseSLQ processes SLQ input text according to the rules of the SQL grammar,
|
|
|
|
// and returns a parse tree. It executes both lexer and parser phases.
|
2023-04-02 22:49:45 +03:00
|
|
|
func parseSLQ(log *slog.Logger, input string) (*slq.QueryContext, error) {
|
2020-08-06 20:58:47 +03:00
|
|
|
lex := slq.NewSLQLexer(antlr.NewInputStream(input))
|
2016-10-17 07:14:01 +03:00
|
|
|
lex.RemoveErrorListeners() // the generated lexer has default listeners we don't want
|
2020-08-06 20:58:47 +03:00
|
|
|
lexErrs := &antlrErrorListener{name: "lexer", log: log}
|
2016-10-17 07:14:01 +03:00
|
|
|
lex.AddErrorListener(lexErrs)
|
|
|
|
|
2020-08-06 20:58:47 +03:00
|
|
|
p := slq.NewSLQParser(antlr.NewCommonTokenStream(lex, 0))
|
2016-10-17 07:14:01 +03:00
|
|
|
p.RemoveErrorListeners() // the generated parser has default listeners we don't want
|
2020-08-06 20:58:47 +03:00
|
|
|
parseErrs := &antlrErrorListener{name: "parser", log: log}
|
2016-10-17 07:14:01 +03:00
|
|
|
p.AddErrorListener(parseErrs)
|
|
|
|
|
2023-07-27 07:34:45 +03:00
|
|
|
qCtx := p.Query()
|
2023-07-27 07:19:11 +03:00
|
|
|
if err := lexErrs.error(); err != nil {
|
|
|
|
return nil, errz.Err(err)
|
2016-10-17 07:14:01 +03:00
|
|
|
}
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2023-07-27 07:19:11 +03:00
|
|
|
if err := parseErrs.error(); err != nil {
|
|
|
|
return nil, errz.Err(err)
|
2016-10-17 07:14:01 +03:00
|
|
|
}
|
|
|
|
|
2020-08-06 20:58:47 +03:00
|
|
|
return qCtx.(*slq.QueryContext), nil
|
2016-10-17 07:14:01 +03:00
|
|
|
}
|
|
|
|
|
2023-08-12 22:26:25 +03:00
|
|
|
var _ antlr.ErrorListener = (*antlrErrorListener)(nil)
|
|
|
|
|
2016-10-17 07:14:01 +03:00
|
|
|
type antlrErrorListener struct {
|
2024-01-27 10:11:24 +03:00
|
|
|
err error
|
2023-04-02 22:49:45 +03:00
|
|
|
log *slog.Logger
|
2016-10-17 07:14:01 +03:00
|
|
|
name string
|
|
|
|
errs []string
|
|
|
|
warnings []string
|
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// SyntaxError implements antlr.ErrorListener.
|
2023-04-01 11:38:32 +03:00
|
|
|
//
|
|
|
|
//nolint:revive
|
2023-08-12 22:26:25 +03:00
|
|
|
func (el *antlrErrorListener) SyntaxError(recognizer antlr.Recognizer, offendingSymbol interface{},
|
2023-06-18 04:28:11 +03:00
|
|
|
line, column int, msg string, e antlr.RecognitionException,
|
2022-12-18 11:35:59 +03:00
|
|
|
) {
|
2016-10-17 07:14:01 +03:00
|
|
|
text := fmt.Sprintf("%s: syntax error: [%d:%d] %s", el.name, line, column, msg)
|
2020-08-06 20:58:47 +03:00
|
|
|
el.errs = append(el.errs, text)
|
2016-10-17 07:14:01 +03:00
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// ReportAmbiguity implements antlr.ErrorListener.
|
2023-04-01 11:38:32 +03:00
|
|
|
//
|
|
|
|
//nolint:revive
|
2023-06-18 04:28:11 +03:00
|
|
|
func (el *antlrErrorListener) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA,
|
2023-08-12 22:26:25 +03:00
|
|
|
startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs *antlr.ATNConfigSet,
|
2022-12-18 11:35:59 +03:00
|
|
|
) {
|
2020-08-06 20:58:47 +03:00
|
|
|
tok := recognizer.GetCurrentToken()
|
2016-10-17 07:14:01 +03:00
|
|
|
text := fmt.Sprintf("%s: syntax ambiguity: [%d:%d]", el.name, startIndex, stopIndex)
|
2020-08-06 20:58:47 +03:00
|
|
|
text = text + " >>" + tok.GetText() + "<<"
|
|
|
|
el.warnings = append(el.warnings, text)
|
2016-10-17 07:14:01 +03:00
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// ReportAttemptingFullContext implements antlr.ErrorListener.
|
2023-04-01 11:38:32 +03:00
|
|
|
//
|
|
|
|
//nolint:revive
|
2023-06-18 04:28:11 +03:00
|
|
|
func (el *antlrErrorListener) ReportAttemptingFullContext(recognizer antlr.Parser, dfa *antlr.DFA,
|
2023-08-12 22:26:25 +03:00
|
|
|
startIndex, stopIndex int, conflictingAlts *antlr.BitSet, configs *antlr.ATNConfigSet,
|
2022-12-18 11:35:59 +03:00
|
|
|
) {
|
2016-10-17 07:14:01 +03:00
|
|
|
text := fmt.Sprintf("%s: attempting full context: [%d:%d]", el.name, startIndex, stopIndex)
|
2020-08-06 20:58:47 +03:00
|
|
|
el.warnings = append(el.warnings, text)
|
2016-10-17 07:14:01 +03:00
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// ReportContextSensitivity implements antlr.ErrorListener.
|
2023-04-01 11:38:32 +03:00
|
|
|
//
|
|
|
|
//nolint:revive
|
2023-08-12 22:26:25 +03:00
|
|
|
func (el *antlrErrorListener) ReportContextSensitivity(recognizer antlr.Parser, dfa *antlr.DFA,
|
|
|
|
startIndex, stopIndex, prediction int, configs *antlr.ATNConfigSet,
|
2022-12-18 11:35:59 +03:00
|
|
|
) {
|
2016-10-17 07:14:01 +03:00
|
|
|
text := fmt.Sprintf("%s: context sensitivity: [%d:%d]", el.name, startIndex, stopIndex)
|
2020-08-06 20:58:47 +03:00
|
|
|
el.warnings = append(el.warnings, text)
|
2016-10-17 07:14:01 +03:00
|
|
|
}
|
|
|
|
|
2023-08-12 22:26:25 +03:00
|
|
|
func (el *antlrErrorListener) error() error {
|
|
|
|
if el.err == nil && len(el.errs) > 0 {
|
|
|
|
msg := strings.Join(el.errs, "\n")
|
|
|
|
el.err = &parseError{msg: msg}
|
|
|
|
}
|
|
|
|
return el.err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (el *antlrErrorListener) String() string {
|
|
|
|
if len(el.errs)+len(el.warnings) == 0 {
|
|
|
|
return fmt.Sprintf("%s: no issues", el.name)
|
|
|
|
}
|
|
|
|
|
|
|
|
strs := make([]string, 0, len(el.errs)+len(el.warnings))
|
|
|
|
strs = append(strs, el.errs...)
|
|
|
|
strs = append(strs, el.warnings...)
|
|
|
|
|
|
|
|
return strings.Join(strs, "\n")
|
|
|
|
}
|
|
|
|
|
2021-01-04 04:20:05 +03:00
|
|
|
// parseError represents an error in lexing/parsing input.
|
|
|
|
type parseError struct {
|
2016-10-17 07:14:01 +03:00
|
|
|
msg string
|
2023-08-12 22:26:25 +03:00
|
|
|
// TODO: parse error should include more detail, such as
|
|
|
|
// the offending token, position, etc.
|
2016-10-17 07:14:01 +03:00
|
|
|
}
|
|
|
|
|
2023-06-18 04:28:11 +03:00
|
|
|
// Error implements error.
|
2021-01-04 04:20:05 +03:00
|
|
|
func (p *parseError) Error() string {
|
2016-10-17 07:14:01 +03:00
|
|
|
return p.msg
|
|
|
|
}
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
var _ slq.SLQVisitor = (*parseTreeVisitor)(nil)
|
|
|
|
|
|
|
|
// parseTreeVisitor implements slq.SLQVisitor to
|
|
|
|
// generate the preliminary AST.
|
|
|
|
type parseTreeVisitor struct {
|
2023-04-02 22:49:45 +03:00
|
|
|
log *slog.Logger
|
2023-03-19 07:58:00 +03:00
|
|
|
|
2020-08-06 20:58:47 +03:00
|
|
|
// cur is the currently-active node of the AST.
|
2023-03-22 09:17:34 +03:00
|
|
|
// This value is modified as the tree is descended.
|
2020-08-06 20:58:47 +03:00
|
|
|
cur Node
|
2023-03-19 07:58:00 +03:00
|
|
|
|
2023-03-22 09:17:34 +03:00
|
|
|
ast *AST
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2023-03-19 07:58:00 +03:00
|
|
|
// using is a convenience function that sets v.cur to cur,
|
|
|
|
// executes fn, and then restores v.cur to its previous value.
|
|
|
|
// The type of the returned value is declared as "any" instead of
|
|
|
|
// error, because that's the generated antlr code returns "any".
|
2023-03-26 04:20:53 +03:00
|
|
|
func (v *parseTreeVisitor) using(node Node, fn func() any) any {
|
2023-03-19 07:58:00 +03:00
|
|
|
prev := v.cur
|
2023-03-26 04:20:53 +03:00
|
|
|
v.cur = node
|
2023-03-19 07:58:00 +03:00
|
|
|
defer func() { v.cur = prev }()
|
|
|
|
return fn()
|
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// Visit implements antlr.ParseTreeVisitor.
|
2022-12-17 02:34:33 +03:00
|
|
|
func (v *parseTreeVisitor) Visit(ctx antlr.ParseTree) any {
|
2023-04-02 22:49:45 +03:00
|
|
|
v.log.Debug("Visit",
|
|
|
|
lga.Type, stringz.Type(ctx),
|
|
|
|
lga.Text, ctx.GetText(),
|
|
|
|
)
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
switch ctx := ctx.(type) {
|
|
|
|
case *slq.SegmentContext:
|
|
|
|
return v.VisitSegment(ctx)
|
|
|
|
case *slq.ElementContext:
|
|
|
|
return v.VisitElement(ctx)
|
2023-03-22 09:17:34 +03:00
|
|
|
case *slq.HandleContext:
|
|
|
|
return v.VisitHandle(ctx)
|
|
|
|
case *slq.HandleTableContext:
|
|
|
|
return v.VisitHandleTable(ctx)
|
|
|
|
case *slq.SelectorContext:
|
|
|
|
return v.VisitSelector(ctx)
|
2023-03-27 05:03:40 +03:00
|
|
|
case *slq.FuncElementContext:
|
|
|
|
return v.VisitFuncElement(ctx)
|
|
|
|
case *slq.FuncContext:
|
|
|
|
return v.VisitFunc(ctx)
|
|
|
|
case *slq.FuncNameContext:
|
|
|
|
return v.VisitFuncName(ctx)
|
2020-08-06 20:58:47 +03:00
|
|
|
case *slq.JoinContext:
|
|
|
|
return v.VisitJoin(ctx)
|
2023-03-19 07:58:00 +03:00
|
|
|
case *slq.AliasContext:
|
|
|
|
return v.VisitAlias(ctx)
|
2023-07-03 18:34:19 +03:00
|
|
|
case *slq.JoinTableContext:
|
|
|
|
return v.VisitJoinTable(ctx)
|
2020-08-06 20:58:47 +03:00
|
|
|
case *slq.RowRangeContext:
|
|
|
|
return v.VisitRowRange(ctx)
|
2023-06-17 07:54:25 +03:00
|
|
|
case *slq.ExprElementContext:
|
|
|
|
return v.VisitExprElement(ctx)
|
2020-08-06 20:58:47 +03:00
|
|
|
case *slq.ExprContext:
|
|
|
|
return v.VisitExpr(ctx)
|
2023-03-27 05:03:40 +03:00
|
|
|
case *slq.GroupByContext:
|
|
|
|
return v.VisitGroupBy(ctx)
|
|
|
|
case *slq.GroupByTermContext:
|
|
|
|
return v.VisitGroupByTerm(ctx)
|
2023-11-22 20:56:19 +03:00
|
|
|
case *slq.HavingContext:
|
|
|
|
return v.VisitHaving(ctx)
|
2023-03-26 04:20:53 +03:00
|
|
|
case *slq.OrderByContext:
|
|
|
|
return v.VisitOrderBy(ctx)
|
|
|
|
case *slq.OrderByTermContext:
|
|
|
|
return v.VisitOrderByTerm(ctx)
|
2020-08-06 20:58:47 +03:00
|
|
|
case *slq.LiteralContext:
|
|
|
|
return v.VisitLiteral(ctx)
|
|
|
|
case *antlr.TerminalNodeImpl:
|
|
|
|
return v.VisitTerminal(ctx)
|
2023-03-22 09:17:34 +03:00
|
|
|
case *slq.SelectorElementContext:
|
|
|
|
return v.VisitSelectorElement(ctx)
|
2023-03-28 09:48:24 +03:00
|
|
|
case *slq.UniqueFuncContext:
|
|
|
|
return v.VisitUniqueFunc(ctx)
|
|
|
|
case *slq.CountFuncContext:
|
|
|
|
return v.VisitCountFunc(ctx)
|
2023-06-17 07:54:25 +03:00
|
|
|
case *slq.WhereContext:
|
|
|
|
return v.VisitWhere(ctx)
|
2023-04-07 11:00:49 +03:00
|
|
|
case *slq.ArgContext:
|
|
|
|
return v.VisitArg(ctx)
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// should never be reached
|
|
|
|
return errorf("unknown node type: %T", ctx)
|
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// VisitChildren implements antlr.ParseTreeVisitor.
|
2022-12-17 02:34:33 +03:00
|
|
|
func (v *parseTreeVisitor) VisitChildren(ctx antlr.RuleNode) any {
|
2020-08-06 20:58:47 +03:00
|
|
|
for _, child := range ctx.GetChildren() {
|
|
|
|
tree, ok := child.(antlr.ParseTree)
|
|
|
|
if !ok {
|
2023-04-02 22:49:45 +03:00
|
|
|
return errorf("unknown child node type: %T(%s)", child, child.GetPayload())
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
err := v.Visit(tree)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// VisitQuery implements slq.SLQVisitor.
|
2022-12-17 02:34:33 +03:00
|
|
|
func (v *parseTreeVisitor) VisitQuery(ctx *slq.QueryContext) any {
|
2023-03-22 09:17:34 +03:00
|
|
|
v.ast = &AST{}
|
|
|
|
v.ast.ctx = ctx
|
|
|
|
v.ast.text = ctx.GetText()
|
|
|
|
v.cur = v.ast
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
for _, seg := range ctx.AllSegment() {
|
|
|
|
err := v.VisitSegment(seg.(*slq.SegmentContext))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// VisitElement implements slq.SLQVisitor.
|
2022-12-17 02:34:33 +03:00
|
|
|
func (v *parseTreeVisitor) VisitElement(ctx *slq.ElementContext) any {
|
2020-08-06 20:58:47 +03:00
|
|
|
return v.VisitChildren(ctx)
|
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// VisitStmtList implements slq.SLQVisitor.
|
2023-04-01 11:38:32 +03:00
|
|
|
func (v *parseTreeVisitor) VisitStmtList(_ *slq.StmtListContext) any {
|
2020-08-06 20:58:47 +03:00
|
|
|
return nil // not using StmtList just yet
|
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// VisitUnaryOperator implements slq.SLQVisitor.
|
2023-04-01 11:38:32 +03:00
|
|
|
func (v *parseTreeVisitor) VisitUnaryOperator(_ *slq.UnaryOperatorContext) any {
|
2020-08-06 20:58:47 +03:00
|
|
|
return nil
|
|
|
|
}
|
2021-09-13 01:14:30 +03:00
|
|
|
|
|
|
|
// VisitTerminal implements slq.SLQVisitor.
|
2022-12-17 02:34:33 +03:00
|
|
|
func (v *parseTreeVisitor) VisitTerminal(ctx antlr.TerminalNode) any {
|
2020-08-06 20:58:47 +03:00
|
|
|
val := ctx.GetText()
|
|
|
|
|
|
|
|
if isOperator(val) {
|
2023-03-26 04:20:53 +03:00
|
|
|
op := &OperatorNode{}
|
2020-08-06 20:58:47 +03:00
|
|
|
op.ctx = ctx
|
2023-06-17 07:54:25 +03:00
|
|
|
op.text = ctx.GetText()
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
err := op.SetParent(v.cur)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
err = v.cur.AddChild(op)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-03-19 07:58:00 +03:00
|
|
|
// Unknown terminal, but that's not a problem.
|
2020-08-06 20:58:47 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-09-13 01:14:30 +03:00
|
|
|
// VisitErrorNode implements slq.SLQVisitor.
|
2022-12-17 02:34:33 +03:00
|
|
|
func (v *parseTreeVisitor) VisitErrorNode(ctx antlr.ErrorNode) any {
|
2023-08-12 21:54:14 +03:00
|
|
|
v.log.Debug("Error node", lga.Val, ctx.GetText())
|
2020-08-06 20:58:47 +03:00
|
|
|
return nil
|
|
|
|
}
|