Beispiel #1
0
func (f *Firestorm) batchRows(writer store.KVWriter, rowsOfRows [][]index.IndexRow, deleteKeys [][]byte) (map[string]int64, error) {

	dictionaryDeltas := make(map[string]int64)

	// count up bytes needed for buffering.
	addNum := 0
	addKeyBytes := 0
	addValBytes := 0

	deleteNum := 0
	deleteKeyBytes := 0

	var kbuf []byte

	prepareBuf := func(buf []byte, sizeNeeded int) []byte {
		if cap(buf) < sizeNeeded {
			return make([]byte, sizeNeeded, sizeNeeded+128)
		}
		return buf[0:sizeNeeded]
	}

	for _, rows := range rowsOfRows {
		for _, row := range rows {
			tfr, ok := row.(*TermFreqRow)
			if ok {
				if tfr.Field() != 0 {
					kbuf = prepareBuf(kbuf, tfr.DictionaryRowKeySize())
					klen, err := tfr.DictionaryRowKeyTo(kbuf)
					if err != nil {
						return nil, err
					}

					dictionaryDeltas[string(kbuf[0:klen])] += 1
				}
			}

			addKeyBytes += row.KeySize()
			addValBytes += row.ValueSize()
		}
		addNum += len(rows)
	}

	for _, dk := range deleteKeys {
		deleteKeyBytes += len(dk)
	}
	deleteNum += len(deleteKeys)

	// prepare batch
	totBytes := addKeyBytes + addValBytes + deleteKeyBytes

	buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{
		TotalBytes: totBytes,
		NumSets:    addNum,
		NumDeletes: deleteNum,
		NumMerges:  0,
	})
	if err != nil {
		return nil, err
	}
	defer func() {
		_ = wb.Close()
	}()

	for _, rows := range rowsOfRows {
		for _, row := range rows {
			klen, err := row.KeyTo(buf)
			if err != nil {
				return nil, err
			}

			vlen, err := row.ValueTo(buf[klen:])
			if err != nil {
				return nil, err
			}

			wb.Set(buf[0:klen], buf[klen:klen+vlen])

			buf = buf[klen+vlen:]
		}
	}

	for _, dk := range deleteKeys {
		dklen := copy(buf, dk)
		wb.Delete(buf[0:dklen])
		buf = buf[dklen:]
	}

	// write out the batch
	err = writer.ExecuteBatch(wb)
	if err != nil {
		return nil, err
	}
	return dictionaryDeltas, nil
}
Beispiel #2
0
func (udc *SmolderingCouch) batchRows(writer store.KVWriter, addRowsAll [][]SmolderingCouchRow, updateRowsAll [][]SmolderingCouchRow, deleteRowsAll [][]SmolderingCouchRow) (err error) {
	dictionaryDeltas := make(map[string]int64)

	// count up bytes needed for buffering.
	addNum := 0
	addKeyBytes := 0
	addValBytes := 0

	updateNum := 0
	updateKeyBytes := 0
	updateValBytes := 0

	deleteNum := 0
	deleteKeyBytes := 0

	rowBuf := GetRowBuffer()

	for _, addRows := range addRowsAll {
		for _, row := range addRows {
			tfr, ok := row.(*TermFrequencyRow)
			if ok {
				if tfr.DictionaryRowKeySize() > len(rowBuf) {
					rowBuf = make([]byte, tfr.DictionaryRowKeySize())
				}
				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
				if err != nil {
					return err
				}
				dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
			}
			addKeyBytes += row.KeySize()
			addValBytes += row.ValueSize()
		}
		addNum += len(addRows)
	}

	for _, updateRows := range updateRowsAll {
		for _, row := range updateRows {
			updateKeyBytes += row.KeySize()
			updateValBytes += row.ValueSize()
		}
		updateNum += len(updateRows)
	}

	for _, deleteRows := range deleteRowsAll {
		for _, row := range deleteRows {
			tfr, ok := row.(*TermFrequencyRow)
			if ok {
				// need to decrement counter
				if tfr.DictionaryRowKeySize() > len(rowBuf) {
					rowBuf = make([]byte, tfr.DictionaryRowKeySize())
				}
				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
				if err != nil {
					return err
				}
				dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
			}
			deleteKeyBytes += row.KeySize()
		}
		deleteNum += len(deleteRows)
	}

	PutRowBuffer(rowBuf)

	mergeNum := len(dictionaryDeltas)
	mergeKeyBytes := 0
	mergeValBytes := mergeNum * DictionaryRowMaxValueSize

	for dictRowKey := range dictionaryDeltas {
		mergeKeyBytes += len(dictRowKey)
	}

	// prepare batch
	totBytes := addKeyBytes + addValBytes +
		updateKeyBytes + updateValBytes +
		deleteKeyBytes +
		2*(mergeKeyBytes+mergeValBytes)

	buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{
		TotalBytes: totBytes,
		NumSets:    addNum + updateNum,
		NumDeletes: deleteNum,
		NumMerges:  mergeNum,
	})
	if err != nil {
		return err
	}
	defer func() {
		_ = wb.Close()
	}()

	// fill the batch
	for _, addRows := range addRowsAll {
		for _, row := range addRows {
			keySize, err := row.KeyTo(buf)
			if err != nil {
				return err
			}
			valSize, err := row.ValueTo(buf[keySize:])
			if err != nil {
				return err
			}
			wb.Set(buf[:keySize], buf[keySize:keySize+valSize])
			buf = buf[keySize+valSize:]
		}
	}

	for _, updateRows := range updateRowsAll {
		for _, row := range updateRows {
			keySize, err := row.KeyTo(buf)
			if err != nil {
				return err
			}
			valSize, err := row.ValueTo(buf[keySize:])
			if err != nil {
				return err
			}
			wb.Set(buf[:keySize], buf[keySize:keySize+valSize])
			buf = buf[keySize+valSize:]
		}
	}

	for _, deleteRows := range deleteRowsAll {
		for _, row := range deleteRows {
			keySize, err := row.KeyTo(buf)
			if err != nil {
				return err
			}
			wb.Delete(buf[:keySize])
			buf = buf[keySize:]
		}
	}

	for dictRowKey, delta := range dictionaryDeltas {
		dictRowKeyLen := copy(buf, dictRowKey)
		binary.LittleEndian.PutUint64(buf[dictRowKeyLen:], uint64(delta))
		wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+DictionaryRowMaxValueSize])
		buf = buf[dictRowKeyLen+DictionaryRowMaxValueSize:]
	}

	// write out the batch
	return writer.ExecuteBatch(wb)
}