// prepare runs the first time when 'Next' is called, it starts one worker goroutine to fetch rows from the big table, // and reads all data from the small table to build a hash table, then starts multiple join worker goroutines. func (e *HashJoinExec) prepare() error { e.finished = false e.bigTableRows = make([]chan []*Row, e.concurrency) for i := 0; i < e.concurrency; i++ { e.bigTableRows[i] = make(chan []*Row, e.concurrency*batchSize) } e.bigTableErr = make(chan error, 1) // Start a worker to fetch big table rows. go e.fetchBigExec() e.hashTable = make(map[string][]*Row) e.cursor = 0 for { row, err := e.smallExec.Next() if err != nil { return errors.Trace(err) } if row == nil { e.smallExec.Close() break } matched := true if e.smallFilter != nil { matched, err = expression.EvalBool(e.smallFilter, row.Data, e.ctx) if err != nil { return errors.Trace(err) } if !matched { continue } } hasNull, hashcode, err := getHashKey(e.smallHashKey, row, e.targetTypes, e.hashJoinContexts[0].datumBuffer, nil) if err != nil { return errors.Trace(err) } if hasNull { continue } if rows, ok := e.hashTable[string(hashcode)]; !ok { e.hashTable[string(hashcode)] = []*Row{row} } else { e.hashTable[string(hashcode)] = append(rows, row) } } e.resultRows = make(chan *Row, e.concurrency*1000) e.resultErr = make(chan error, 1) e.wg = sync.WaitGroup{} for i := 0; i < e.concurrency; i++ { e.wg.Add(1) go e.runJoinWorker(i) } go e.waitJoinWorkersAndCloseResultChan() e.prepared = true return nil }
func (e *HashSemiJoinExec) rowIsMatched(bigRow *Row) (matched bool, hasNull bool, err error) { sc := e.ctx.GetSessionVars().StmtCtx hasNull, hashcode, err := getHashKey(sc, e.bigHashKey, bigRow, e.targetTypes, make([]types.Datum, len(e.smallHashKey)), nil) if err != nil { return false, false, errors.Trace(err) } if hasNull { return false, true, nil } rows, ok := e.hashTable[string(hashcode)] if !ok { return } // match eq condition for _, smallRow := range rows { matched = true if e.otherFilter != nil { var matchedRow *Row matchedRow = makeJoinRow(bigRow, smallRow) matched, err = expression.EvalBool(e.otherFilter, matchedRow.Data, e.ctx) if err != nil { return false, false, errors.Trace(err) } } if matched { return } } return }
// joinOneBigRow creates result rows from a row in a big table and sends them to resultRows channel. // Every matching row generates a result row. // If there are no matching rows and it is outer join, a null filled result row is created. func (e *HashJoinExec) joinOneBigRow(ctx *hashJoinCtx, bigRow *Row) bool { var ( matchedRows []*Row err error ) bigMatched := true if e.bigFilter != nil { bigMatched, err = expression.EvalBool(ctx.bigFilter, bigRow.Data, e.ctx) if err != nil { e.resultErr <- errors.Trace(err) return false } } if bigMatched { matchedRows, err = e.constructMatchedRows(ctx, bigRow) if err != nil { e.resultErr <- errors.Trace(err) return false } } for _, r := range matchedRows { e.resultRows <- r } if len(matchedRows) == 0 && e.outer { r := e.fillRowWithDefaultValues(bigRow) e.resultRows <- r } return true }
// constructMatchedRows creates matching result rows from a row in the big table. func (e *HashJoinExec) constructMatchedRows(ctx *hashJoinCtx, bigRow *Row) (matchedRows []*Row, err error) { hasNull, hashcode, err := getHashKey(e.bigHashKey, bigRow, e.targetTypes, ctx.datumBuffer, ctx.hashKeyBuffer[0:0:cap(ctx.hashKeyBuffer)]) if err != nil { return nil, errors.Trace(err) } if hasNull { return } rows, ok := e.hashTable[string(hashcode)] if !ok { return } // match eq condition for _, smallRow := range rows { otherMatched := true var matchedRow *Row if e.leftSmall { matchedRow = makeJoinRow(smallRow, bigRow) } else { matchedRow = makeJoinRow(bigRow, smallRow) } if e.otherFilter != nil { otherMatched, err = expression.EvalBool(ctx.otherFilter, matchedRow.Data, e.ctx) if err != nil { return nil, errors.Trace(err) } } if otherMatched { matchedRows = append(matchedRows, matchedRow) } } return matchedRows, nil }
func (e *HashJoinExec) constructMatchedRows(bigRow *Row) (matchedRows []*Row, err error) { hashcode, err := e.getHashKey(e.bigHashKey, bigRow) if err != nil { return nil, errors.Trace(err) } rows, ok := e.hashTable[string(hashcode)] if !ok { return } // match eq condition for _, smallRow := range rows { //TODO: remove result fields in order to reduce memory copy cost. otherMatched := true var matchedRow *Row if e.leftSmall { matchedRow = joinTwoRow(smallRow, bigRow) } else { matchedRow = joinTwoRow(bigRow, smallRow) } if e.otherFilter != nil { otherMatched, err = expression.EvalBool(e.otherFilter, matchedRow.Data, e.ctx) if err != nil { return nil, errors.Trace(err) } } if otherMatched { matchedRows = append(matchedRows, matchedRow) } } return matchedRows, nil }
func (e *HashSemiJoinExec) rowIsMatched(bigRow *Row) (matched bool, hasNull bool, err error) { hasNull, hashcode, err := getHashKey(e.bigHashKey, bigRow, e.targetTypes) if err != nil { return false, false, errors.Trace(err) } if hasNull { return false, true, nil } rows, ok := e.hashTable[string(hashcode)] if !ok { return } // match eq condition for _, smallRow := range rows { matched = true if e.otherFilter != nil { var matchedRow *Row matchedRow = joinTwoRow(bigRow, smallRow) matched, err = expression.EvalBool(e.otherFilter, matchedRow.Data, e.ctx) if err != nil { return false, false, errors.Trace(err) } } if matched { return } } return }
func (us *UnionScanExec) newBuildAndSortAddedRows(t table.Table, asName *model.CIStr) error { us.addedRows = make([]*Row, 0, len(us.dirty.addedRows)) for h, data := range us.dirty.addedRows { var newData []types.Datum if len(us.Src.Schema()) == len(data) { newData = data } else { newData = make([]types.Datum, 0, len(us.Src.Schema())) for _, col := range us.Src.(*NewXSelectTableExec).Columns { newData = append(newData, data[col.Offset]) } } if us.newCondition != nil { matched, err := expression.EvalBool(us.newCondition, newData, us.ctx) if err != nil { return errors.Trace(err) } if !matched { continue } } rowKeyEntry := &RowKeyEntry{Handle: h, Tbl: t, TableAsName: asName} row := &Row{Data: newData, RowKeys: []*RowKeyEntry{rowKeyEntry}} us.addedRows = append(us.addedRows, row) } if us.desc { sort.Sort(sort.Reverse(us)) } else { sort.Sort(us) } if us.sortErr != nil { return errors.Trace(us.sortErr) } return nil }
// tryToConvert2DummyScan is an optimization which checks if its parent is a selection with a constant condition // that evaluates to false. If it is, there is no need for a real physical scan, a dummy scan will do. func (p *DataSource) tryToConvert2DummyScan(prop *requiredProperty) (*physicalPlanInfo, error) { sel, isSel := p.GetParentByIndex(0).(*Selection) if !isSel { return nil, nil } for _, cond := range sel.Conditions { if con, ok := cond.(*expression.Constant); ok { result, err := expression.EvalBool(con, nil, p.ctx) if err != nil { return nil, errors.Trace(err) } if !result { dummy := &PhysicalDummyScan{} dummy.tp = "Dummy" dummy.allocator = p.allocator dummy.initIDAndContext(p.ctx) dummy.SetSchema(p.schema) info := &physicalPlanInfo{p: dummy} p.storePlanInfo(prop, info) return info, nil } } } return nil, nil }
// Next implements the Executor Next interface. func (e *HashSemiJoinExec) Next() (*Row, error) { if !e.prepared { if err := e.prepare(); err != nil { return nil, errors.Trace(err) } } for { bigRow, err := e.bigExec.Next() if err != nil { return nil, errors.Trace(err) } if bigRow == nil { e.bigExec.Close() return nil, nil } matched := true if e.bigFilter != nil { matched, err = expression.EvalBool(e.bigFilter, bigRow.Data, e.ctx) if err != nil { return nil, errors.Trace(err) } } isNull := false if matched { matched, isNull, err = e.rowIsMatched(bigRow) if err != nil { return nil, errors.Trace(err) } } if !matched && e.smallTableHasNull { isNull = true } if e.anti && !isNull { matched = !matched } // For the auxMode subquery, we return the row with a Datum indicating if it's a match, // For the non-auxMode subquery, we return the matching row only. if e.auxMode { if isNull { bigRow.Data = append(bigRow.Data, types.NewDatum(nil)) } else { bigRow.Data = append(bigRow.Data, types.NewDatum(matched)) } return bigRow, nil } if matched { return bigRow, nil } } }
func (c *conditionChecker) Exec(row *Row) (*Row, error) { var err error c.matched, err = expression.EvalBool(c.cond, row.Data, c.ctx) if err != nil { return nil, errors.Trace(err) } row.Data = row.Data[:c.trimLen] if c.matched != c.all { row.Data = append(row.Data, types.NewDatum(c.matched)) return row, nil } return nil, nil }
// Next implements Executor Next interface. func (e *HashSemiJoinExec) Next() (*Row, error) { if !e.prepared { if err := e.prepare(); err != nil { return nil, errors.Trace(err) } } for { bigRow, err := e.bigExec.Next() if err != nil { return nil, errors.Trace(err) } if bigRow == nil { e.bigExec.Close() return nil, nil } matched := true if e.bigFilter != nil { matched, err = expression.EvalBool(e.bigFilter, bigRow.Data, e.ctx) if err != nil { return nil, errors.Trace(err) } } isNull := false if matched { matched, isNull, err = e.rowIsMatched(bigRow) if err != nil { return nil, errors.Trace(err) } } if !matched && e.smallTableHasNull { isNull = true } if e.anti && !isNull { matched = !matched } if e.withAux { if isNull { bigRow.Data = append(bigRow.Data, types.NewDatum(nil)) } else { bigRow.Data = append(bigRow.Data, types.NewDatum(matched)) } return bigRow, nil } else if matched { return bigRow, nil } } }
// convert2PhysicalPlan implements LogicalPlan convert2PhysicalPlan interface. func (p *DataSource) convert2PhysicalPlan(prop requiredProperty) (*physicalPlanInfo, *physicalPlanInfo, uint64, error) { sortedRes, unsortedRes, cnt := p.getPlanInfo(prop) if sortedRes != nil { return sortedRes, unsortedRes, cnt, nil } sel, isSel := p.GetParentByIndex(0).(*Selection) var err error if isSel { for _, cond := range sel.Conditions { if con, ok := cond.(*expression.Constant); ok { var result bool result, err = expression.EvalBool(con, nil, nil) if err != nil { return nil, nil, 0, errors.Trace(err) } if !result { dummy := &PhysicalDummyScan{} dummy.SetSchema(p.schema) info := &physicalPlanInfo{p: dummy} p.storePlanInfo(prop, info, info, 0) return info, info, 0, nil } } } } indices, includeTableScan := availableIndices(p.table) if includeTableScan { sortedRes, unsortedRes, err = p.handleTableScan(prop) if err != nil { return nil, nil, 0, errors.Trace(err) } } for _, index := range indices { sortedIsRes, unsortedIsRes, err := p.handleIndexScan(prop, index) if err != nil { return nil, nil, 0, errors.Trace(err) } if sortedRes == nil || sortedIsRes.cost < sortedRes.cost { sortedRes = sortedIsRes } if unsortedRes == nil || unsortedIsRes.cost < unsortedRes.cost { unsortedRes = unsortedIsRes } } statsTbl := p.statisticTable p.storePlanInfo(prop, sortedRes, unsortedRes, uint64(statsTbl.Count)) return sortedRes, unsortedRes, uint64(statsTbl.Count), nil }
// Next implements Executor Next interface. func (e *HashJoinExec) Next() (*Row, error) { if !e.prepared { if err := e.prepare(); err != nil { return nil, errors.Trace(err) } } row, ok := e.returnRecord() if ok { return row, nil } for { bigRow, err := e.bigExec.Next() if err != nil { return nil, errors.Trace(err) } if bigRow == nil { e.bigExec.Close() return nil, nil } var matchedRows []*Row bigMatched := true if e.bigFilter != nil { bigMatched, err = expression.EvalBool(e.bigFilter, bigRow.Data, e.ctx) if err != nil { return nil, errors.Trace(err) } } if bigMatched { matchedRows, err = e.constructMatchedRows(bigRow) if err != nil { return nil, errors.Trace(err) } } e.matchedRows = matchedRows e.cursor = 0 row, ok := e.returnRecord() if ok { return row, nil } else if e.outter { row = e.fillNullRow(bigRow) return row, nil } } }
// Next implements the Executor Next interface. func (e *SelectionExec) Next() (*Row, error) { for { srcRow, err := e.Src.Next() if err != nil { return nil, errors.Trace(err) } if srcRow == nil { return nil, nil } match, err := expression.EvalBool(e.Condition, srcRow.Data, e.ctx) if err != nil { return nil, errors.Trace(err) } if match { return srcRow, nil } } }
// Prepare runs the first time when 'Next' is called and it reads all data from the small table and stores // them in a hash table. func (e *HashSemiJoinExec) prepare() error { e.hashTable = make(map[string][]*Row) sc := e.ctx.GetSessionVars().StmtCtx for { row, err := e.smallExec.Next() if err != nil { return errors.Trace(err) } if row == nil { e.smallExec.Close() break } matched := true if e.smallFilter != nil { matched, err = expression.EvalBool(e.smallFilter, row.Data, e.ctx) if err != nil { return errors.Trace(err) } if !matched { continue } } hasNull, hashcode, err := getHashKey(sc, e.smallHashKey, row, e.targetTypes, make([]types.Datum, len(e.smallHashKey)), nil) if err != nil { return errors.Trace(err) } if hasNull { e.smallTableHasNull = true continue } if rows, ok := e.hashTable[string(hashcode)]; !ok { e.hashTable[string(hashcode)] = []*Row{row} } else { e.hashTable[string(hashcode)] = append(rows, row) } } e.prepared = true return nil }
func (e *HashJoinExec) prepare() error { e.hashTable = make(map[string][]*Row) e.cursor = 0 for { row, err := e.smallExec.Next() if err != nil { return errors.Trace(err) } if row == nil { e.smallExec.Close() break } matched := true if e.smallFilter != nil { matched, err = expression.EvalBool(e.smallFilter, row.Data, e.ctx) if err != nil { return errors.Trace(err) } if !matched { continue } } hasNull, hashcode, err := getHashKey(e.smallHashKey, row, e.targetTypes) if err != nil { return errors.Trace(err) } if hasNull { continue } if rows, ok := e.hashTable[string(hashcode)]; !ok { e.hashTable[string(hashcode)] = []*Row{row} } else { e.hashTable[string(hashcode)] = append(rows, row) } } e.prepared = true return nil }