refactor order-by fd check#14
refactor order-by fd check#14AilinKid wants to merge 13 commits intowinoros:functional-dependencyfrom
Conversation
Signed-off-by: AilinKid <314806019@qq.com>
Signed-off-by: ailinkid <314806019@qq.com>
Signed-off-by: ailinkid <314806019@qq.com>
planner/core/logical_plan_builder.go
Outdated
|
|
||
| // check whether ORDER BY items show up in SELECT DISTINCT fields, see #12442 | ||
| //if hasDistinct && projExprs != nil { | ||
| // err = b.checkOrderByInDistinct(item, i, it, p, projExprs, oldLen) |
There was a problem hiding this comment.
this undo what pingcap#21286 has done about distinct and order by, leaving it be be checked by new FD logic, passed~
|
this also revealed ORM's only full group check for |
Signed-off-by: ailinkid <314806019@qq.com>
Signed-off-by: ailinkid <314806019@qq.com>
Signed-off-by: ailinkid <314806019@qq.com>
Signed-off-by: arenatlx <arenatlx@arenatlxdeMacBook-Pro.local>
Signed-off-by: arenatlx <arenatlx@arenatlxdeMacBook-Pro.local>
| } | ||
|
|
||
| // basic order-by checking, nothing to do with only-full-group-by mode | ||
| if err := b.checkOrderByPart1(ctx, sel, p); err != nil { |
There was a problem hiding this comment.
Or we can change the method name to basicOrderByCheck to avoid the part1.
| // Since here doesn't require FD to check, this can take place as soon as possible. | ||
| // Step1: | ||
| isNonAggregatedQuery := false | ||
| resolver4AggDetect := colResolverForOnlyFullGroupBy{ |
There was a problem hiding this comment.
Oh, why do we need the colResolverForOnlyFullGroupBy?
This one should be removed once we fully remove the old only_full_group_by check.
| if !b.isCreateView { | ||
| // check only-full-group-by 4 order-by clause with group-by clause | ||
| fds := proj.ExtractFD() | ||
| if sel.OrderBy != nil { | ||
| selectExprsUniqueIDs := fd.NewFastIntSet() | ||
| for _, expr := range proj.Exprs[:oldLen] { | ||
| switch x := expr.(type) { | ||
| case *expression.Column: | ||
| selectExprsUniqueIDs.Insert(int(x.UniqueID)) | ||
| case *expression.ScalarFunction: | ||
| scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) | ||
| if !ok { | ||
| panic("selected expr must have been registered, shouldn't be here") | ||
| } | ||
| selectExprsUniqueIDs.Insert(scalarUniqueID) | ||
| default: | ||
| } | ||
| } | ||
| if sel.GroupBy != nil { | ||
| for offset, odrItem := range orderByItemExprs { | ||
| item := fd.NewFastIntSet() | ||
| switch x := odrItem.Expr.(type) { | ||
| case *expression.Column: | ||
| item.Insert(int(x.UniqueID)) | ||
| case *expression.ScalarFunction: | ||
| // order by item may not be projected as a column in projection, allocated a new one | ||
| scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) | ||
| if !ok { | ||
| // panic("selected expr must have been registered, shouldn't be here") | ||
| scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) | ||
| } | ||
| item.Insert(scalarUniqueID) | ||
| case *expression.Constant: | ||
| // order by null/false/true can always be ok, let item be empty. | ||
| default: | ||
| } | ||
| // Step#1: whether order by item is in the origin select field list | ||
| if item.SubsetOf(selectExprsUniqueIDs) { | ||
| continue | ||
| } | ||
| // Step#2: whether order by item is in the group by list | ||
| if item.SubsetOf(fds.GroupByCols) { | ||
| continue | ||
| } | ||
| // Step#3: whether order by item is in the FD closure of group-by & select list items. | ||
| if item.SubsetOf(fds.ConstantCols()) { | ||
| continue | ||
| } | ||
| strictClosureOfGroupByCols := fds.ClosureOfStrict(fds.GroupByCols) | ||
| if item.SubsetOf(strictClosureOfGroupByCols) { | ||
| continue | ||
| } | ||
| strictClosureOfSelectCols := fds.ClosureOfStrict(selectExprsUniqueIDs) | ||
| if item.SubsetOf(strictClosureOfSelectCols) { | ||
| continue | ||
| } | ||
| // locate the base col that are not in (constant list / group by list / strict fd closure) for error show. | ||
| baseCols := expression.ExtractColumns(odrItem.Expr) | ||
| if len(baseCols) == 0 { | ||
| // order by item has no reference of base col, like order by abs(1) and rand() | ||
| continue | ||
| } | ||
| // GROUP BY t1.a, t2.b ORDER BY COALESCE(MIN(t.c), t2.b), agg min is determined by group-col definitely. | ||
| baseColsUniqueIDs := fd.NewFastIntSet() | ||
| for _, bc := range baseCols { | ||
| baseColsUniqueIDs.Insert(int(bc.UniqueID)) | ||
| } | ||
| if baseColsUniqueIDs.SubsetOf(strictClosureOfGroupByCols) { | ||
| continue | ||
| } | ||
| if baseColsUniqueIDs.SubsetOf(strictClosureOfSelectCols) { | ||
| continue | ||
| } | ||
| errShowCol := baseCols[0] | ||
| for _, col := range baseCols { | ||
| colSet := fd.NewFastIntSet(int(col.UniqueID)) | ||
| if !colSet.SubsetOf(strictClosureOfGroupByCols) && !colSet.SubsetOf(strictClosureOfSelectCols) { | ||
| errShowCol = col | ||
| break | ||
| } | ||
| } | ||
| // better use the schema alias name firstly if any. | ||
| name := "" | ||
| for idx, schemaCol := range proj.Schema().Columns { | ||
| if schemaCol.UniqueID == errShowCol.UniqueID { | ||
| name = proj.names[idx].String() | ||
| break | ||
| } | ||
| } | ||
| if name == "" { | ||
| name = errShowCol.OrigName | ||
| } | ||
| return nil, ErrFieldNotInGroupBy.GenWithStackByArgs(offset+1, ErrExprInOrderBy, name) | ||
| } | ||
| } | ||
| if sel.Distinct { | ||
| // order-by with distinct case | ||
| // Rule #1: order by item should be in the select filed list | ||
| // Rule #2: the base col that order by item dependent on should be in the select field list | ||
| for offset, odrItem := range orderByItemExprs { | ||
| item := fd.NewFastIntSet() | ||
| switch x := odrItem.Expr.(type) { | ||
| case *expression.Column: | ||
| item.Insert(int(x.UniqueID)) | ||
| case *expression.ScalarFunction: | ||
| // order by item may not be projected as a column in projection, allocated a new one | ||
| scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) | ||
| if !ok { | ||
| // panic("selected expr must have been registered, shouldn't be here") | ||
| scalarUniqueID = int(b.ctx.GetSessionVars().AllocPlanColumnID()) | ||
| } | ||
| item.Insert(scalarUniqueID) | ||
| default: | ||
| } | ||
| // Rule #1 | ||
| if item.SubsetOf(selectExprsUniqueIDs) { | ||
| continue | ||
| } | ||
| // Rule #2 | ||
| baseCols := expression.ExtractColumns(odrItem.Expr) | ||
| if len(baseCols) == 0 { | ||
| // order by item has no reference of base col, like order by abs(1) and rand() | ||
| continue | ||
| } | ||
| colSet := fd.NewFastIntSet() | ||
| for _, col := range baseCols { | ||
| colSet.Insert(int(col.UniqueID)) | ||
| } | ||
| if colSet.SubsetOf(selectExprsUniqueIDs) { | ||
| continue | ||
| } | ||
| // find that error base col | ||
| errShowCol := baseCols[0] | ||
| for _, col := range baseCols { | ||
| colSet := fd.NewFastIntSet() | ||
| colSet.Insert(int(col.UniqueID)) | ||
| if !colSet.SubsetOf(selectExprsUniqueIDs) { | ||
| errShowCol = col | ||
| break | ||
| } | ||
| } | ||
| // better use the schema alias name firstly if any. | ||
| name := "" | ||
| for idx, schemaCol := range proj.Schema().Columns { | ||
| if schemaCol.UniqueID == errShowCol.UniqueID { | ||
| name = proj.names[idx].String() | ||
| break | ||
| } | ||
| } | ||
| if name == "" { | ||
| name = errShowCol.OrigName | ||
| } | ||
| if _, ok := sel.OrderBy.Items[offset].Expr.(*ast.AggregateFuncExpr); ok { | ||
| return nil, ErrAggregateInOrderNotSelect.GenWithStackByArgs(offset+1, "DISTINCT") | ||
| } | ||
| // select distinct count(a) from t group by b order by sum(a); ✗ | ||
| return nil, ErrFieldInOrderNotSelect.GenWithStackByArgs(offset+1, name, "DISTINCT") | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Seems that now we can just add one more condition, becoming !b.isCreateView && b.ctx.GetSessionVars().SQLMode.HasOnlyFullGroupBy().
Then we can merge the buildSort and buildSortWithCheck.
Even further, we can split this if block to a single method.
| case *expression.ScalarFunction: | ||
| scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) | ||
| if !ok { | ||
| panic("selected expr must have been registered, shouldn't be here") |
| panic("selected expr must have been registered, shouldn't be here") | ||
| } | ||
| selectExprsUniqueIDs.Insert(scalarUniqueID) | ||
| default: |
There was a problem hiding this comment.
If it's empty, we can just remove it.
| // order by item may not be projected as a column in projection, allocated a new one | ||
| scalarUniqueID, ok := fds.IsHashCodeRegistered(string(hack.String(x.HashCode(p.SCtx().GetSessionVars().StmtCtx)))) | ||
| if !ok { | ||
| // panic("selected expr must have been registered, shouldn't be here") |
| item.Insert(scalarUniqueID) | ||
| case *expression.Constant: | ||
| // order by null/false/true can always be ok, let item be empty. | ||
| default: |
| // Step#3: whether order by item is in the FD closure of group-by & select list items. | ||
| // Since FD hasn't been built yet for now, let's delay this logic to checkOrderByPart2. | ||
| } else { | ||
| // check only-full-group-by 4 order-by clause without group-by clause |
There was a problem hiding this comment.
The following check isn't used for only_full_group_by, just for basic name resolving checks?
Signed-off-by: AilinKid <314806019@qq.com>
2d18cc8 to
2e1c49f
Compare
Signed-off-by: AilinKid <314806019@qq.com>
| conditionNC *FastIntSet | ||
| } | ||
|
|
||
| // ncEdge is quite simple for remarking the null value relationship between cols, storing it as fdEdge will add complexity of traverse of a closure. |
| // The value of the strict and eq bool forms the four kind of edges: | ||
| // functional dependency, lax functional dependency, strict equivalence constraint, lax equivalence constraint. | ||
| // And if there's a functional dependency `const` -> `column` exists. We would let the from side be empty. | ||
| // Adjustment: when strict is true and equiv is false, it means the edge is a Lax equivalence; when both true, |
| // LAX FD: | ||
| // 1: A ~~> C is stronger than AB ~~> C. --- YES | ||
| // 2: A ~~> BC is stronger than A ~~> C. --- NO | ||
| // |
| // --------------------------------- | ||
| // 1 2 1 2 1 1 1 | ||
| // 2 2 null null null null null | ||
| // 3 3 3 3 null null null |
There was a problem hiding this comment.
L134 should be
3 3 null null null null null
Co-authored-by: Yiding Cui <winoros@gmail.com>
Signed-off-by: AilinKid 314806019@qq.com
What problem does this PR solve?
order by checking can be divided as two part.
1: one is with no order by clause, which should collect info to identify whether this query is a aggregated query at all. Then judge whether a agg func in order by clause is suitable. (this can be done much earlier)
2: one is with order by clause, which require FD to check whether the item in order by clause is suitable. (this only can be done after projection is built and order item has been rewritten as an expression)
we need more test to cover, do not merge before that
What is changed and how it works?
Check List
Tests
Code changes
Side effects
Related changes
tidb-ansiblerepository