Example #1
0
// prepare runs the first time when 'Next' is called, it starts one worker goroutine to fetch rows from the big table,
// and reads all data from the small table to build a hash table, then starts multiple join worker goroutines.
func (e *HashJoinExec) prepare() error {
	e.finished = false
	e.bigTableRows = make([]chan []*Row, e.concurrency)
	for i := 0; i < e.concurrency; i++ {
		e.bigTableRows[i] = make(chan []*Row, e.concurrency*batchSize)
	}
	e.bigTableErr = make(chan error, 1)

	// Start a worker to fetch big table rows.
	go e.fetchBigExec()

	e.hashTable = make(map[string][]*Row)
	e.cursor = 0
	for {
		row, err := e.smallExec.Next()
		if err != nil {
			return errors.Trace(err)
		}
		if row == nil {
			e.smallExec.Close()
			break
		}

		matched := true
		if e.smallFilter != nil {
			matched, err = expression.EvalBool(e.smallFilter, row.Data, e.ctx)
			if err != nil {
				return errors.Trace(err)
			}
			if !matched {
				continue
			}
		}
		hasNull, hashcode, err := getHashKey(e.smallHashKey, row, e.targetTypes, e.hashJoinContexts[0].datumBuffer, nil)
		if err != nil {
			return errors.Trace(err)
		}
		if hasNull {
			continue
		}
		if rows, ok := e.hashTable[string(hashcode)]; !ok {
			e.hashTable[string(hashcode)] = []*Row{row}
		} else {
			e.hashTable[string(hashcode)] = append(rows, row)
		}
	}

	e.resultRows = make(chan *Row, e.concurrency*1000)
	e.resultErr = make(chan error, 1)

	e.wg = sync.WaitGroup{}
	for i := 0; i < e.concurrency; i++ {
		e.wg.Add(1)
		go e.runJoinWorker(i)
	}
	go e.waitJoinWorkersAndCloseResultChan()

	e.prepared = true
	return nil
}
Example #2
0
func (e *HashSemiJoinExec) rowIsMatched(bigRow *Row) (matched bool, hasNull bool, err error) {
	sc := e.ctx.GetSessionVars().StmtCtx
	hasNull, hashcode, err := getHashKey(sc, e.bigHashKey, bigRow, e.targetTypes, make([]types.Datum, len(e.smallHashKey)), nil)
	if err != nil {
		return false, false, errors.Trace(err)
	}
	if hasNull {
		return false, true, nil
	}
	rows, ok := e.hashTable[string(hashcode)]
	if !ok {
		return
	}
	// match eq condition
	for _, smallRow := range rows {
		matched = true
		if e.otherFilter != nil {
			var matchedRow *Row
			matchedRow = makeJoinRow(bigRow, smallRow)
			matched, err = expression.EvalBool(e.otherFilter, matchedRow.Data, e.ctx)
			if err != nil {
				return false, false, errors.Trace(err)
			}
		}
		if matched {
			return
		}
	}
	return
}
Example #3
0
// joinOneBigRow creates result rows from a row in a big table and sends them to resultRows channel.
// Every matching row generates a result row.
// If there are no matching rows and it is outer join, a null filled result row is created.
func (e *HashJoinExec) joinOneBigRow(ctx *hashJoinCtx, bigRow *Row) bool {
	var (
		matchedRows []*Row
		err         error
	)
	bigMatched := true
	if e.bigFilter != nil {
		bigMatched, err = expression.EvalBool(ctx.bigFilter, bigRow.Data, e.ctx)
		if err != nil {
			e.resultErr <- errors.Trace(err)
			return false
		}
	}
	if bigMatched {
		matchedRows, err = e.constructMatchedRows(ctx, bigRow)
		if err != nil {
			e.resultErr <- errors.Trace(err)
			return false
		}
	}
	for _, r := range matchedRows {
		e.resultRows <- r
	}
	if len(matchedRows) == 0 && e.outer {
		r := e.fillRowWithDefaultValues(bigRow)
		e.resultRows <- r
	}
	return true
}
Example #4
0
// constructMatchedRows creates matching result rows from a row in the big table.
func (e *HashJoinExec) constructMatchedRows(ctx *hashJoinCtx, bigRow *Row) (matchedRows []*Row, err error) {
	hasNull, hashcode, err := getHashKey(e.bigHashKey, bigRow, e.targetTypes, ctx.datumBuffer, ctx.hashKeyBuffer[0:0:cap(ctx.hashKeyBuffer)])
	if err != nil {
		return nil, errors.Trace(err)
	}

	if hasNull {
		return
	}
	rows, ok := e.hashTable[string(hashcode)]
	if !ok {
		return
	}
	// match eq condition
	for _, smallRow := range rows {
		otherMatched := true
		var matchedRow *Row
		if e.leftSmall {
			matchedRow = makeJoinRow(smallRow, bigRow)
		} else {
			matchedRow = makeJoinRow(bigRow, smallRow)
		}
		if e.otherFilter != nil {
			otherMatched, err = expression.EvalBool(ctx.otherFilter, matchedRow.Data, e.ctx)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		if otherMatched {
			matchedRows = append(matchedRows, matchedRow)
		}
	}

	return matchedRows, nil
}
Example #5
0
func (e *HashJoinExec) constructMatchedRows(bigRow *Row) (matchedRows []*Row, err error) {
	hashcode, err := e.getHashKey(e.bigHashKey, bigRow)
	if err != nil {
		return nil, errors.Trace(err)
	}

	rows, ok := e.hashTable[string(hashcode)]
	if !ok {
		return
	}
	// match eq condition
	for _, smallRow := range rows {
		//TODO: remove result fields in order to reduce memory copy cost.
		otherMatched := true
		var matchedRow *Row
		if e.leftSmall {
			matchedRow = joinTwoRow(smallRow, bigRow)
		} else {
			matchedRow = joinTwoRow(bigRow, smallRow)
		}
		if e.otherFilter != nil {
			otherMatched, err = expression.EvalBool(e.otherFilter, matchedRow.Data, e.ctx)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		if otherMatched {
			matchedRows = append(matchedRows, matchedRow)
		}
	}

	return matchedRows, nil
}
Example #6
0
func (e *HashSemiJoinExec) rowIsMatched(bigRow *Row) (matched bool, hasNull bool, err error) {
	hasNull, hashcode, err := getHashKey(e.bigHashKey, bigRow, e.targetTypes)
	if err != nil {
		return false, false, errors.Trace(err)
	}
	if hasNull {
		return false, true, nil
	}
	rows, ok := e.hashTable[string(hashcode)]
	if !ok {
		return
	}
	// match eq condition
	for _, smallRow := range rows {
		matched = true
		if e.otherFilter != nil {
			var matchedRow *Row
			matchedRow = joinTwoRow(bigRow, smallRow)
			matched, err = expression.EvalBool(e.otherFilter, matchedRow.Data, e.ctx)
			if err != nil {
				return false, false, errors.Trace(err)
			}
		}
		if matched {
			return
		}
	}
	return
}
Example #7
0
func (us *UnionScanExec) newBuildAndSortAddedRows(t table.Table, asName *model.CIStr) error {
	us.addedRows = make([]*Row, 0, len(us.dirty.addedRows))
	for h, data := range us.dirty.addedRows {
		var newData []types.Datum
		if len(us.Src.Schema()) == len(data) {
			newData = data
		} else {
			newData = make([]types.Datum, 0, len(us.Src.Schema()))
			for _, col := range us.Src.(*NewXSelectTableExec).Columns {
				newData = append(newData, data[col.Offset])
			}
		}
		if us.newCondition != nil {
			matched, err := expression.EvalBool(us.newCondition, newData, us.ctx)
			if err != nil {
				return errors.Trace(err)
			}
			if !matched {
				continue
			}
		}
		rowKeyEntry := &RowKeyEntry{Handle: h, Tbl: t, TableAsName: asName}
		row := &Row{Data: newData, RowKeys: []*RowKeyEntry{rowKeyEntry}}
		us.addedRows = append(us.addedRows, row)
	}
	if us.desc {
		sort.Sort(sort.Reverse(us))
	} else {
		sort.Sort(us)
	}
	if us.sortErr != nil {
		return errors.Trace(us.sortErr)
	}
	return nil
}
Example #8
0
// tryToConvert2DummyScan is an optimization which checks if its parent is a selection with a constant condition
// that evaluates to false. If it is, there is no need for a real physical scan, a dummy scan will do.
func (p *DataSource) tryToConvert2DummyScan(prop *requiredProperty) (*physicalPlanInfo, error) {
	sel, isSel := p.GetParentByIndex(0).(*Selection)
	if !isSel {
		return nil, nil
	}

	for _, cond := range sel.Conditions {
		if con, ok := cond.(*expression.Constant); ok {
			result, err := expression.EvalBool(con, nil, p.ctx)
			if err != nil {
				return nil, errors.Trace(err)
			}
			if !result {
				dummy := &PhysicalDummyScan{}
				dummy.tp = "Dummy"
				dummy.allocator = p.allocator
				dummy.initIDAndContext(p.ctx)
				dummy.SetSchema(p.schema)
				info := &physicalPlanInfo{p: dummy}
				p.storePlanInfo(prop, info)
				return info, nil
			}
		}
	}
	return nil, nil
}
Example #9
0
// Next implements the Executor Next interface.
func (e *HashSemiJoinExec) Next() (*Row, error) {
	if !e.prepared {
		if err := e.prepare(); err != nil {
			return nil, errors.Trace(err)
		}
	}

	for {
		bigRow, err := e.bigExec.Next()
		if err != nil {
			return nil, errors.Trace(err)
		}
		if bigRow == nil {
			e.bigExec.Close()
			return nil, nil
		}

		matched := true
		if e.bigFilter != nil {
			matched, err = expression.EvalBool(e.bigFilter, bigRow.Data, e.ctx)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		isNull := false
		if matched {
			matched, isNull, err = e.rowIsMatched(bigRow)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		if !matched && e.smallTableHasNull {
			isNull = true
		}
		if e.anti && !isNull {
			matched = !matched
		}
		// For the auxMode subquery, we return the row with a Datum indicating if it's a match,
		// For the non-auxMode subquery, we return the matching row only.
		if e.auxMode {
			if isNull {
				bigRow.Data = append(bigRow.Data, types.NewDatum(nil))
			} else {
				bigRow.Data = append(bigRow.Data, types.NewDatum(matched))
			}
			return bigRow, nil
		}
		if matched {
			return bigRow, nil
		}
	}
}
Example #10
0
func (c *conditionChecker) Exec(row *Row) (*Row, error) {
	var err error
	c.matched, err = expression.EvalBool(c.cond, row.Data, c.ctx)
	if err != nil {
		return nil, errors.Trace(err)
	}
	row.Data = row.Data[:c.trimLen]
	if c.matched != c.all {
		row.Data = append(row.Data, types.NewDatum(c.matched))
		return row, nil
	}
	return nil, nil
}
Example #11
0
// Next implements Executor Next interface.
func (e *HashSemiJoinExec) Next() (*Row, error) {
	if !e.prepared {
		if err := e.prepare(); err != nil {
			return nil, errors.Trace(err)
		}
	}

	for {
		bigRow, err := e.bigExec.Next()
		if err != nil {
			return nil, errors.Trace(err)
		}
		if bigRow == nil {
			e.bigExec.Close()
			return nil, nil
		}

		matched := true
		if e.bigFilter != nil {
			matched, err = expression.EvalBool(e.bigFilter, bigRow.Data, e.ctx)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		isNull := false
		if matched {
			matched, isNull, err = e.rowIsMatched(bigRow)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		if !matched && e.smallTableHasNull {
			isNull = true
		}
		if e.anti && !isNull {
			matched = !matched
		}
		if e.withAux {
			if isNull {
				bigRow.Data = append(bigRow.Data, types.NewDatum(nil))
			} else {
				bigRow.Data = append(bigRow.Data, types.NewDatum(matched))
			}
			return bigRow, nil
		} else if matched {
			return bigRow, nil
		}
	}
}
Example #12
0
// convert2PhysicalPlan implements LogicalPlan convert2PhysicalPlan interface.
func (p *DataSource) convert2PhysicalPlan(prop requiredProperty) (*physicalPlanInfo, *physicalPlanInfo, uint64, error) {
	sortedRes, unsortedRes, cnt := p.getPlanInfo(prop)
	if sortedRes != nil {
		return sortedRes, unsortedRes, cnt, nil
	}
	sel, isSel := p.GetParentByIndex(0).(*Selection)
	var err error
	if isSel {
		for _, cond := range sel.Conditions {
			if con, ok := cond.(*expression.Constant); ok {
				var result bool
				result, err = expression.EvalBool(con, nil, nil)
				if err != nil {
					return nil, nil, 0, errors.Trace(err)
				}
				if !result {
					dummy := &PhysicalDummyScan{}
					dummy.SetSchema(p.schema)
					info := &physicalPlanInfo{p: dummy}
					p.storePlanInfo(prop, info, info, 0)
					return info, info, 0, nil
				}
			}
		}
	}
	indices, includeTableScan := availableIndices(p.table)
	if includeTableScan {
		sortedRes, unsortedRes, err = p.handleTableScan(prop)
		if err != nil {
			return nil, nil, 0, errors.Trace(err)
		}
	}
	for _, index := range indices {
		sortedIsRes, unsortedIsRes, err := p.handleIndexScan(prop, index)
		if err != nil {
			return nil, nil, 0, errors.Trace(err)
		}
		if sortedRes == nil || sortedIsRes.cost < sortedRes.cost {
			sortedRes = sortedIsRes
		}
		if unsortedRes == nil || unsortedIsRes.cost < unsortedRes.cost {
			unsortedRes = unsortedIsRes
		}
	}
	statsTbl := p.statisticTable
	p.storePlanInfo(prop, sortedRes, unsortedRes, uint64(statsTbl.Count))
	return sortedRes, unsortedRes, uint64(statsTbl.Count), nil
}
Example #13
0
// Next implements Executor Next interface.
func (e *HashJoinExec) Next() (*Row, error) {
	if !e.prepared {
		if err := e.prepare(); err != nil {
			return nil, errors.Trace(err)
		}
	}

	row, ok := e.returnRecord()
	if ok {
		return row, nil
	}

	for {
		bigRow, err := e.bigExec.Next()
		if err != nil {
			return nil, errors.Trace(err)
		}
		if bigRow == nil {
			e.bigExec.Close()
			return nil, nil
		}

		var matchedRows []*Row
		bigMatched := true
		if e.bigFilter != nil {
			bigMatched, err = expression.EvalBool(e.bigFilter, bigRow.Data, e.ctx)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		if bigMatched {
			matchedRows, err = e.constructMatchedRows(bigRow)
			if err != nil {
				return nil, errors.Trace(err)
			}
		}
		e.matchedRows = matchedRows
		e.cursor = 0
		row, ok := e.returnRecord()
		if ok {
			return row, nil
		} else if e.outter {
			row = e.fillNullRow(bigRow)
			return row, nil
		}
	}
}
Example #14
0
// Next implements the Executor Next interface.
func (e *SelectionExec) Next() (*Row, error) {
	for {
		srcRow, err := e.Src.Next()
		if err != nil {
			return nil, errors.Trace(err)
		}
		if srcRow == nil {
			return nil, nil
		}
		match, err := expression.EvalBool(e.Condition, srcRow.Data, e.ctx)
		if err != nil {
			return nil, errors.Trace(err)
		}
		if match {
			return srcRow, nil
		}
	}
}
Example #15
0
// Prepare runs the first time when 'Next' is called and it reads all data from the small table and stores
// them in a hash table.
func (e *HashSemiJoinExec) prepare() error {
	e.hashTable = make(map[string][]*Row)
	sc := e.ctx.GetSessionVars().StmtCtx
	for {
		row, err := e.smallExec.Next()
		if err != nil {
			return errors.Trace(err)
		}
		if row == nil {
			e.smallExec.Close()
			break
		}

		matched := true
		if e.smallFilter != nil {
			matched, err = expression.EvalBool(e.smallFilter, row.Data, e.ctx)
			if err != nil {
				return errors.Trace(err)
			}
			if !matched {
				continue
			}
		}
		hasNull, hashcode, err := getHashKey(sc, e.smallHashKey, row, e.targetTypes, make([]types.Datum, len(e.smallHashKey)), nil)
		if err != nil {
			return errors.Trace(err)
		}
		if hasNull {
			e.smallTableHasNull = true
			continue
		}
		if rows, ok := e.hashTable[string(hashcode)]; !ok {
			e.hashTable[string(hashcode)] = []*Row{row}
		} else {
			e.hashTable[string(hashcode)] = append(rows, row)
		}
	}

	e.prepared = true
	return nil
}
Example #16
0
func (e *HashJoinExec) prepare() error {
	e.hashTable = make(map[string][]*Row)
	e.cursor = 0
	for {
		row, err := e.smallExec.Next()
		if err != nil {
			return errors.Trace(err)
		}
		if row == nil {
			e.smallExec.Close()
			break
		}

		matched := true
		if e.smallFilter != nil {
			matched, err = expression.EvalBool(e.smallFilter, row.Data, e.ctx)
			if err != nil {
				return errors.Trace(err)
			}
			if !matched {
				continue
			}
		}
		hasNull, hashcode, err := getHashKey(e.smallHashKey, row, e.targetTypes)
		if err != nil {
			return errors.Trace(err)
		}
		if hasNull {
			continue
		}
		if rows, ok := e.hashTable[string(hashcode)]; !ok {
			e.hashTable[string(hashcode)] = []*Row{row}
		} else {
			e.hashTable[string(hashcode)] = append(rows, row)
		}
	}

	e.prepared = true
	return nil
}