这是我参与「第四届青训营 」笔记创作活动的第22天
这次继续记录一下关于项目中SQL解析与验证部分的学习笔记,这是第三部分
- TableScan
TableScan 算子实现为 Batch 方式,通过向量化计算加速计算
type PhysicalTableScan struct {
physicalSchemaProducer
// AccessCondition is used to calculate range.
AccessCondition []expression.Expression
filterCondition []expression.Expression
Table *model.TableInfo
Columns []*model.ColumnInfo
DBName model.CIStr
Ranges []*ranger.Range
pkCol *expression.Column
TableAsName *model.CIStr
HandleIdx int
KeepOrder bool
Desc bool
}
- IndexScan
全表扫描(索引数据)
type PhysicalIndexScan struct {
physicalSchemaProducer
// AccessCondition is used to calculate range.
AccessCondition []expression.Expression
Table *model.TableInfo
Index *model.IndexInfo
IdxCols []*expression.Column
IdxColLens []int
Ranges []*ranger.Range
Columns []*model.ColumnInfo
DBName model.CIStr
TableAsName *model.CIStr
// dataSourceSchema is the original schema of DataSource. The schema of index scan in KV and index reader in TiDB
// will be different. The schema of index scan will decode all columns of index but the TiDB only need some of them.
dataSourceSchema *expression.Schema
Desc bool
KeepOrder bool
// DoubleRead means if the index executor will read kv two times.
// If the query requires the columns that don't belong to index, DoubleRead will be true.
DoubleRead bool
}
- Projection
type ProjectionExec struct {
baseExecutor
evaluatorSuit *expression.EvaluatorSuite
prepared bool
finishCh chan struct{}
outputCh chan *projectionOutput
fetcher projectionInputFetcher
numWorkers int64
workers []*projectionWorker
childResult *chunk.Chunk
wg sync.WaitGroup
parentReqRows int64
}
- Filter
// Filter the input expressions, append the results to result.
func Filter(result []Expression, input []Expression, filter func(Expression) bool) []Expression {
for _, e := range input {
if filter(e) {
result = append(result, e)
}
}
return result
}
- Aggregate
这里的聚合算法为 Hash Aggregate
在Hash Aggregate 的计算过程中,Hash 表的键为聚合计算的
Group-By列,值为聚合函数的中间结果sum和count。输入数据输入完毕之后,扫描 Hash 表并计算,便可得到结果
由于对分布式计算的需要,聚合函数有以下计算模式
| AggFunctionMode | 输入值 | 输出值 |
|---|---|---|
| CompleteMode | 原始数据 | 最终结果 |
| FinalMode | 中间结果 | 最终结果 |
| Partial1Mode | 原始数据 | 中间结果 |
| Partial2Mode | 中间结果 | 进一步聚合后的中间结果 |
- HashJoin
具有以下任务
Main Thread
读取所有的 Inner 表数据
根据 Inner 表数据构造哈希表
启动 Outer Fetcher 和 Join Worker 开始后台工作,生成 Join 结果,各个 goroutine 的启动过程由
fetchAndProbeHashTable这个函数完成将 Join Worker 计算出的 Join 结果返回给
NextChunk接口的调用方法Outer Thread: 负责读取 Outer 表的数据并分发给各个 Join Worker
Join Work: 负责查哈希表、Join 匹配的 Inner 和 Outer 表的数据,并把结果传递给 Main Thread
type HashJoinExec struct {
baseExecutor
outerSideExec Executor
innerSideExec Executor
innerSideEstCount float64
outerSideFilter expression.CNFExprs
outerKeys []*expression.Column
innerKeys []*expression.Column
// concurrency is the number of partition, build and join workers.
concurrency uint
rowContainer *hashRowContainer
// joinWorkerWaitGroup is for sync multiple join workers.
joinWorkerWaitGroup sync.WaitGroup
// closeCh add a lock for closing executor.
closeCh chan struct{}
joinType plannercore.JoinType
// We build individual joiner for each join worker when use chunk-based
// execution, to avoid the concurrency of joiner.chk and joiner.selected.
joiners []joiner
outerChkResourceCh chan *outerChkResource
outerResultChs []chan *chunk.Chunk
joinChkResourceCh []chan *chunk.Chunk
joinResultCh chan *hashjoinWorkerResult
prepared bool
}
支持三种 Join 方式
- leftOuterJoiner
- rightOuterJoiner
- innerJoiner
- MergeJoin
type MergeJoinExec struct {
baseExecutor
stmtCtx *stmtctx.StatementContext
compareFuncs []expression.CompareFunc
joiner joiner
isOuterJoin bool
prepared bool
outerIdx int
innerTable *mergeJoinInnerTable
outerTable *mergeJoinOuterTable
innerRows []chunk.Row
innerIter4Row chunk.Iterator
childrenResults []*chunk.Chunk
}
- Limit
type LimitExec struct {
baseExecutor
begin uint64
end uint64
cursor uint64
// meetFirstBatch represents whether we have met the first valid Chunk from child.
meetFirstBatch bool
childResult *chunk.Chunk
}
- Selection
type selectionExec struct {
conditions []expression.Expression
relatedColOffsets []int
row []types.Datum
evalCtx *evalContext
src executor
}
- Sort
type SortExec struct {
baseExecutor
ByItems []*plannercore.ByItems
Idx int
fetched bool
schema *expression.Schema
// keyColumns is the column index of the by items.
keyColumns []int
// keyCmpFuncs is used to compare each ByItem.
keyCmpFuncs []chunk.CompareFunc
// rowChunks is the chunks to store row values.
rowChunks *chunk.List
// rowPointer store the chunk index and row index for each row.
rowPtrs []chunk.RowPtr
}
- TopN
将相邻的 Limit 算子和 Sort 算子组合成 TopN 算子节点,表示某个排序规则提取记录的前 N 项
type TopNExec struct {
SortExec
limit *plannercore.PhysicalLimit
totalLimit uint64
chkHeap *topNChunkHeap
}
- TableReader
将 TiKV 上底层扫表算子
TableFullScan或TableRangeScan得到的数据进行汇总
type TableReaderExecutor struct {
baseExecutor
table table.Table
ranges []*ranger.Range
// kvRanges are only use for union scan.
kvRanges []kv.KeyRange
dagPB *tipb.DAGRequest
startTS uint64
// columns are only required by union scan and virtual column.
columns []*model.ColumnInfo
// resultHandler handles the order of the result. Since (MAXInt64, MAXUint64] stores before [0, MaxInt64] physically
// for unsigned int.
resultHandler *tableResultHandler
plans []plannercore.PhysicalPlan
keepOrder bool
desc bool
}
- IndexReader
将 TiKV 上底层扫表算子
IndexFullScan或IndexRangeScan得到的数据进行汇总
type IndexReaderExecutor struct {
baseExecutor
// For a partitioned table, the IndexReaderExecutor works on a partition, so
// the type of this table field is actually `table.PhysicalTable`.
table table.Table
index *model.IndexInfo
physicalTableID int64
keepOrder bool
desc bool
ranges []*ranger.Range
// kvRanges are only used for union scan.
kvRanges []kv.KeyRange
dagPB *tipb.DAGRequest
startTS uint64
// result returns one or more distsql.PartialResult and each PartialResult is returned by one region.
result distsql.SelectResult
// columns are only required by union scan.
columns []*model.ColumnInfo
// outputColumns are only required by union scan.
outputColumns []*expression.Column
idxCols []*expression.Column
colLens []int
plans []plannercore.PhysicalPlan
}
- IndexLookUpReader
汇总 Build 端 TiKV 扫描上来的 RowID,再去 Probe 端上根据这些
RowID精确地读取 TiKV 上的数据。Build 端是IndexFullScan或IndexRangeScan类型的算子,Probe 端是TableRowIDScan类型的算子。
type IndexLookUpExecutor struct {
baseExecutor
table table.Table
index *model.IndexInfo
keepOrder bool
desc bool
ranges []*ranger.Range
dagPB *tipb.DAGRequest
startTS uint64
// handleIdx is the index of handle, which is only used for case of keeping order.
handleIdx int
tableRequest *tipb.DAGRequest
// columns are only required by union scan.
columns []*model.ColumnInfo
*dataReaderBuilder
// All fields above are immutable.
idxWorkerWg sync.WaitGroup
tblWorkerWg sync.WaitGroup
finished chan struct{}
kvRanges []kv.KeyRange
workerStarted bool
resultCh chan *lookupTableTask
resultCurr *lookupTableTask
idxPlans []plannercore.PhysicalPlan
tblPlans []plannercore.PhysicalPlan
idxCols []*expression.Column
colLens []int
}