Beispiel #1
0
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) {

	// prepare batch
	wb := writer.NewBatch()

	// add
	for _, row := range addRows {
		tfr, ok := row.(*TermFrequencyRow)
		if ok {
			// need to increment counter
			dictionaryKey := tfr.DictionaryRowKey()
			wb.Merge(dictionaryKey, dictionaryTermIncr)
		}
		wb.Set(row.Key(), row.Value())
	}

	// update
	for _, row := range updateRows {
		wb.Set(row.Key(), row.Value())
	}

	// delete
	for _, row := range deleteRows {
		tfr, ok := row.(*TermFrequencyRow)
		if ok {
			// need to decrement counter
			dictionaryKey := tfr.DictionaryRowKey()
			wb.Merge(dictionaryKey, dictionaryTermDecr)
		}
		wb.Delete(row.Key())
	}

	// write out the batch
	return wb.Execute()
}
Beispiel #2
0
func (f *Firestorm) storeVersion(writer store.KVWriter) error {
	vr := NewVersionRow(Version)
	wb := writer.NewBatch()
	wb.Set(vr.Key(), vr.Value())
	err := writer.ExecuteBatch(wb)
	return err
}
Beispiel #3
0
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
	// do analysis before acquiring write lock
	analysisStart := time.Now()
	resultChan := make(chan *index.AnalysisResult)
	aw := index.NewAnalysisWork(udc, doc, resultChan)
	// put the work on the queue
	go func() {
		udc.analysisQueue.Queue(aw)
	}()

	// wait for the result
	result := <-resultChan
	close(resultChan)
	atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))

	// start a writer for this update
	indexStart := time.Now()
	var kvwriter store.KVWriter
	kvwriter, err = udc.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := kvwriter.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	// first we lookup the backindex row for the doc id if it exists
	// lookup the back index row
	var backIndexRow *BackIndexRow
	backIndexRow, err = udc.backIndexRowForDoc(kvwriter, doc.ID)
	if err != nil {
		atomic.AddUint64(&udc.stats.errors, 1)
		return
	}

	// prepare a list of rows
	addRows := make([]UpsideDownCouchRow, 0)
	updateRows := make([]UpsideDownCouchRow, 0)
	deleteRows := make([]UpsideDownCouchRow, 0)

	addRows, updateRows, deleteRows = udc.mergeOldAndNew(backIndexRow, result.Rows, addRows, updateRows, deleteRows)

	err = udc.batchRows(kvwriter, addRows, updateRows, deleteRows)
	if err == nil && backIndexRow == nil {
		udc.m.Lock()
		udc.docCount++
		udc.m.Unlock()
	}
	atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
	if err == nil {
		atomic.AddUint64(&udc.stats.updates, 1)
	} else {
		atomic.AddUint64(&udc.stats.errors, 1)
	}
	return
}
Beispiel #4
0
func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) {
	internalRow := NewInternalRow(key, nil)
	var writer store.KVWriter
	writer, err = udc.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := writer.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()
	return writer.Delete(internalRow.Key())
}
Beispiel #5
0
func (f *Firestorm) Update(doc *document.Document) (err error) {

	// assign this document a number
	doc.Number = atomic.AddUint64(&f.highDocNumber, 1)

	// do analysis before acquiring write lock
	analysisStart := time.Now()
	numPlainTextBytes := doc.NumPlainTextBytes()
	resultChan := make(chan *index.AnalysisResult)
	aw := index.NewAnalysisWork(f, doc, resultChan)

	// put the work on the queue
	f.analysisQueue.Queue(aw)

	// wait for the result
	result := <-resultChan
	close(resultChan)
	atomic.AddUint64(&f.stats.analysisTime, uint64(time.Since(analysisStart)))

	// start a writer for this update
	indexStart := time.Now()
	var kvwriter store.KVWriter
	kvwriter, err = f.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := kvwriter.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	var dictionaryDeltas map[string]int64
	dictionaryDeltas, err = f.batchRows(kvwriter, [][]index.IndexRow{result.Rows}, nil)
	if err != nil {
		_ = kvwriter.Close()
		atomic.AddUint64(&f.stats.errors, 1)
		return
	}

	f.compensator.Mutate([]byte(doc.ID), doc.Number)
	f.lookuper.NotifyBatch([]*InFlightItem{{[]byte(doc.ID), doc.Number}})
	f.dictUpdater.NotifyBatch(dictionaryDeltas)

	atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
	atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
	return
}
Beispiel #6
0
func (udc *UpsideDownCouch) Open() (err error) {
	// install the merge operator
	udc.store.SetMergeOperator(&mergeOperator)

	// now open the kv store
	err = udc.store.Open()
	if err != nil {
		return
	}

	// start a writer for the open process
	var kvwriter store.KVWriter
	kvwriter, err = udc.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := kvwriter.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	var value []byte
	value, err = kvwriter.Get(VersionKey)
	if err != nil {
		return
	}

	// init new index OR load schema
	if value == nil {
		err = udc.init(kvwriter)
		if err != nil {
			return
		}
	} else {
		err = udc.loadSchema(kvwriter)
		if err != nil {
			return
		}
	}
	// set doc count
	udc.m.Lock()
	udc.docCount, err = udc.countDocs(kvwriter)
	udc.m.Unlock()
	return
}
Beispiel #7
0
func (udc *UpsideDownCouch) Delete(id string) (err error) {
	indexStart := time.Now()
	// start a writer for this delete
	var kvwriter store.KVWriter
	kvwriter, err = udc.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := kvwriter.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	// lookup the back index row
	var backIndexRow *BackIndexRow
	backIndexRow, err = udc.backIndexRowForDoc(kvwriter, id)
	if err != nil {
		atomic.AddUint64(&udc.stats.errors, 1)
		return
	}
	if backIndexRow == nil {
		atomic.AddUint64(&udc.stats.deletes, 1)
		return
	}

	deleteRows := make([]UpsideDownCouchRow, 0)
	deleteRows = udc.deleteSingle(id, backIndexRow, deleteRows)

	err = udc.batchRows(kvwriter, nil, nil, deleteRows)
	if err == nil {
		udc.m.Lock()
		udc.docCount--
		udc.m.Unlock()
	}
	atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
	if err == nil {
		atomic.AddUint64(&udc.stats.deletes, 1)
	} else {
		atomic.AddUint64(&udc.stats.errors, 1)
	}
	return
}
Beispiel #8
0
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) {

	// prepare batch
	wb := writer.NewBatch()

	// add
	for _, row := range addRows {
		tfr, ok := row.(*TermFrequencyRow)
		if ok {
			// need to increment counter
			summaryKey := tfr.SummaryKey()
			wb.Merge(summaryKey, newTermSummaryIncr())
		}
		wb.Set(row.Key(), row.Value())
	}

	// update
	for _, row := range updateRows {
		wb.Set(row.Key(), row.Value())
	}

	// delete
	for _, row := range deleteRows {
		tfr, ok := row.(*TermFrequencyRow)
		if ok {
			// need to decrement counter
			summaryKey := tfr.SummaryKey()
			wb.Merge(summaryKey, newTermSummaryDecr())
		}
		wb.Delete(row.Key())
	}

	// write out the batch
	err = wb.Execute()
	if err != nil {
		return
	}
	return
}
Beispiel #9
0
func (f *Firestorm) DeleteInternal(key []byte) (err error) {
	internalRow := NewInternalRow(key, nil)
	var writer store.KVWriter
	writer, err = f.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := writer.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	wb := writer.NewBatch()
	wb.Delete(internalRow.Key())

	return writer.ExecuteBatch(wb)
}
Beispiel #10
0
func (udc *SmolderingCouch) DeleteInternal(key []byte) (err error) {
	internalRow := NewInternalRow(key, nil)
	udc.writeMutex.Lock()
	defer udc.writeMutex.Unlock()
	var writer store.KVWriter
	writer, err = udc.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := writer.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	batch := writer.NewBatch()
	batch.Delete(internalRow.Key())
	return writer.ExecuteBatch(batch)
}
Beispiel #11
0
func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) {
	internalRow := NewInternalRow(key, val)
	udc.writeMutex.Lock()
	defer udc.writeMutex.Unlock()
	var writer store.KVWriter
	writer, err = udc.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := writer.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	batch := writer.NewBatch()
	batch.Set(internalRow.Key(), internalRow.Value())

	return writer.ExecuteBatch(batch)
}
Beispiel #12
0
func (udc *SmolderingCouch) Update(doc *document.Document) (err error) {

	// get the next available doc number
	doc.Number = atomic.AddUint64(&udc.maxInternalDocID, 1)

	analysisStart := time.Now()
	numPlainTextBytes := doc.NumPlainTextBytes()
	resultChan := make(chan *index.AnalysisResult)
	aw := index.NewAnalysisWork(udc, doc, resultChan)

	// put the work on the queue
	udc.analysisQueue.Queue(aw)

	// wait for the result
	result := <-resultChan
	close(resultChan)
	atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))

	udc.writeMutex.Lock()
	defer udc.writeMutex.Unlock()

	indexReader, err := udc.reader()
	if err != nil {
		return
	}

	// first we lookup the backindex row for the doc id if it exists
	// lookup the back index row
	var backIndexRow *BackIndexRow
	if udc.cf.Lookup([]byte(doc.ID)) {
		backIndexRow, err = indexReader.backIndexRowForDoc(nil, doc.ID)
		if err != nil {
			_ = indexReader.Close()
			atomic.AddUint64(&udc.stats.errors, 1)
			return
		}
	}

	err = indexReader.Close()
	if err != nil {
		return
	}

	// start a writer for this update
	indexStart := time.Now()
	var kvwriter store.KVWriter
	kvwriter, err = udc.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := kvwriter.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	// prepare a list of rows
	var addRowsAll [][]SmolderingCouchRow
	var updateRowsAll [][]SmolderingCouchRow
	var deleteRowsAll [][]SmolderingCouchRow

	addRows, updateRows, deleteRows := udc.mergeOldAndNew(doc.ID, backIndexRow, result.Rows)
	if len(addRows) > 0 {
		addRowsAll = append(addRowsAll, addRows)
	}
	if len(updateRows) > 0 {
		updateRowsAll = append(updateRowsAll, updateRows)
	}
	if len(deleteRows) > 0 {
		deleteRowsAll = append(deleteRowsAll, deleteRows)
	}

	err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
	if err == nil && backIndexRow == nil {
		udc.m.Lock()
		udc.docCount++
		udc.m.Unlock()
	}
	atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
	if err == nil {
		udc.cf.Insert([]byte(doc.ID))
		atomic.AddUint64(&udc.stats.updates, 1)
		atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
	} else {
		atomic.AddUint64(&udc.stats.errors, 1)
	}
	return
}
Beispiel #13
0
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
	analysisStart := time.Now()

	resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))

	var numUpdates uint64
	var numPlainTextBytes uint64
	for _, doc := range batch.IndexOps {
		if doc != nil {
			numUpdates++
			numPlainTextBytes += doc.NumPlainTextBytes()
		}
	}

	go func() {
		for _, doc := range batch.IndexOps {
			if doc != nil {
				aw := index.NewAnalysisWork(udc, doc, resultChan)
				// put the work on the queue
				udc.analysisQueue.Queue(aw)
			}
		}
	}()

	// retrieve back index rows concurrent with analysis
	docBackIndexRowErr := error(nil)
	docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps))

	udc.writeMutex.Lock()
	defer udc.writeMutex.Unlock()

	go func() {
		defer close(docBackIndexRowCh)

		// open a reader for backindex lookup
		var kvreader store.KVReader
		kvreader, err = udc.store.Reader()
		if err != nil {
			docBackIndexRowErr = err
			return
		}

		for docID, doc := range batch.IndexOps {
			backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID))
			if err != nil {
				docBackIndexRowErr = err
				return
			}

			docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow}
		}

		err = kvreader.Close()
		if err != nil {
			docBackIndexRowErr = err
			return
		}
	}()

	// wait for analysis result
	newRowsMap := make(map[string][]index.IndexRow)
	var itemsDeQueued uint64
	for itemsDeQueued < numUpdates {
		result := <-resultChan
		newRowsMap[result.DocID] = result.Rows
		itemsDeQueued++
	}
	close(resultChan)

	atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))

	docsAdded := uint64(0)
	docsDeleted := uint64(0)

	indexStart := time.Now()

	// prepare a list of rows
	var addRowsAll [][]UpsideDownCouchRow
	var updateRowsAll [][]UpsideDownCouchRow
	var deleteRowsAll [][]UpsideDownCouchRow

	// add the internal ops
	var updateRows []UpsideDownCouchRow
	var deleteRows []UpsideDownCouchRow

	for internalKey, internalValue := range batch.InternalOps {
		if internalValue == nil {
			// delete
			deleteInternalRow := NewInternalRow([]byte(internalKey), nil)
			deleteRows = append(deleteRows, deleteInternalRow)
		} else {
			updateInternalRow := NewInternalRow([]byte(internalKey), internalValue)
			updateRows = append(updateRows, updateInternalRow)
		}
	}

	if len(updateRows) > 0 {
		updateRowsAll = append(updateRowsAll, updateRows)
	}
	if len(deleteRows) > 0 {
		deleteRowsAll = append(deleteRowsAll, deleteRows)
	}

	// process back index rows as they arrive
	for dbir := range docBackIndexRowCh {
		if dbir.doc == nil && dbir.backIndexRow != nil {
			// delete
			deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil)
			if len(deleteRows) > 0 {
				deleteRowsAll = append(deleteRowsAll, deleteRows)
			}
			docsDeleted++
		} else if dbir.doc != nil {
			addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.backIndexRow, newRowsMap[dbir.docID])
			if len(addRows) > 0 {
				addRowsAll = append(addRowsAll, addRows)
			}
			if len(updateRows) > 0 {
				updateRowsAll = append(updateRowsAll, updateRows)
			}
			if len(deleteRows) > 0 {
				deleteRowsAll = append(deleteRowsAll, deleteRows)
			}
			if dbir.backIndexRow == nil {
				docsAdded++
			}
		}
	}

	if docBackIndexRowErr != nil {
		return docBackIndexRowErr
	}

	// start a writer for this batch
	var kvwriter store.KVWriter
	kvwriter, err = udc.store.Writer()
	if err != nil {
		return
	}

	err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
	if err != nil {
		_ = kvwriter.Close()
		atomic.AddUint64(&udc.stats.errors, 1)
		return
	}

	err = kvwriter.Close()

	atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))

	if err == nil {
		udc.m.Lock()
		udc.docCount += docsAdded
		udc.docCount -= docsDeleted
		udc.m.Unlock()
		atomic.AddUint64(&udc.stats.updates, numUpdates)
		atomic.AddUint64(&udc.stats.deletes, docsDeleted)
		atomic.AddUint64(&udc.stats.batches, 1)
		atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
	} else {
		atomic.AddUint64(&udc.stats.errors, 1)
	}
	return
}
Beispiel #14
0
func (f *Firestorm) Batch(batch *index.Batch) (err error) {

	// acquire enough doc numbers for all updates in the batch
	// FIXME we actually waste doc numbers because deletes are in the
	// same map and we don't need numbers for them
	lastDocNumber := atomic.AddUint64(&f.highDocNumber, uint64(len(batch.IndexOps)))
	firstDocNumber := lastDocNumber - uint64(len(batch.IndexOps)) + 1

	analysisStart := time.Now()
	resultChan := make(chan *index.AnalysisResult)

	var docsUpdated uint64
	var docsDeleted uint64
	for _, doc := range batch.IndexOps {
		if doc != nil {
			doc.Number = firstDocNumber // actually assign doc numbers here
			firstDocNumber++
			docsUpdated++
		} else {
			docsDeleted++
		}
	}

	var detectedUnsafeMutex sync.RWMutex
	detectedUnsafe := false

	go func() {
		sofar := uint64(0)
		for _, doc := range batch.IndexOps {
			if doc != nil {
				sofar++
				if sofar > docsUpdated {
					detectedUnsafeMutex.Lock()
					detectedUnsafe = true
					detectedUnsafeMutex.Unlock()
					return
				}
				aw := index.NewAnalysisWork(f, doc, resultChan)
				// put the work on the queue
				f.analysisQueue.Queue(aw)
			}
		}
	}()

	// extra 1 capacity for internal updates.
	collectRows := make([][]index.IndexRow, 0, docsUpdated+1)

	// wait for the result
	var itemsDeQueued uint64
	for itemsDeQueued < docsUpdated {
		result := <-resultChan
		collectRows = append(collectRows, result.Rows)
		itemsDeQueued++
	}
	close(resultChan)

	detectedUnsafeMutex.RLock()
	defer detectedUnsafeMutex.RUnlock()
	if detectedUnsafe {
		return UnsafeBatchUseDetected
	}

	atomic.AddUint64(&f.stats.analysisTime, uint64(time.Since(analysisStart)))

	var deleteKeys [][]byte
	if len(batch.InternalOps) > 0 {
		// add the internal ops
		updateInternalRows := make([]index.IndexRow, 0, len(batch.InternalOps))
		for internalKey, internalValue := range batch.InternalOps {
			if internalValue == nil {
				// delete
				deleteInternalRow := NewInternalRow([]byte(internalKey), nil)
				deleteKeys = append(deleteKeys, deleteInternalRow.Key())
			} else {
				updateInternalRow := NewInternalRow([]byte(internalKey), internalValue)
				updateInternalRows = append(updateInternalRows, updateInternalRow)
			}
		}
		collectRows = append(collectRows, updateInternalRows)
	}

	inflightItems := make([]*InFlightItem, 0, len(batch.IndexOps))
	for docID, doc := range batch.IndexOps {
		if doc != nil {
			inflightItems = append(inflightItems,
				&InFlightItem{[]byte(docID), doc.Number})
		} else {
			inflightItems = append(inflightItems,
				&InFlightItem{[]byte(docID), 0})
		}
	}

	indexStart := time.Now()

	// start a writer for this batch
	var kvwriter store.KVWriter
	kvwriter, err = f.store.Writer()
	if err != nil {
		return
	}

	var dictionaryDeltas map[string]int64
	dictionaryDeltas, err = f.batchRows(kvwriter, collectRows, deleteKeys)
	if err != nil {
		_ = kvwriter.Close()
		atomic.AddUint64(&f.stats.errors, 1)
		return
	}

	f.compensator.MutateBatch(inflightItems, lastDocNumber)

	err = kvwriter.Close()

	f.lookuper.NotifyBatch(inflightItems)
	f.dictUpdater.NotifyBatch(dictionaryDeltas)

	atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))

	if err == nil {
		atomic.AddUint64(&f.stats.updates, docsUpdated)
		atomic.AddUint64(&f.stats.deletes, docsDeleted)
		atomic.AddUint64(&f.stats.batches, 1)
	} else {
		atomic.AddUint64(&f.stats.errors, 1)
	}

	return
}
Beispiel #15
0
func (f *Firestorm) batchRows(writer store.KVWriter, rowsOfRows [][]index.IndexRow, deleteKeys [][]byte) (map[string]int64, error) {

	dictionaryDeltas := make(map[string]int64)

	// count up bytes needed for buffering.
	addNum := 0
	addKeyBytes := 0
	addValBytes := 0

	deleteNum := 0
	deleteKeyBytes := 0

	var kbuf []byte

	prepareBuf := func(buf []byte, sizeNeeded int) []byte {
		if cap(buf) < sizeNeeded {
			return make([]byte, sizeNeeded, sizeNeeded+128)
		}
		return buf[0:sizeNeeded]
	}

	for _, rows := range rowsOfRows {
		for _, row := range rows {
			tfr, ok := row.(*TermFreqRow)
			if ok {
				if tfr.Field() != 0 {
					kbuf = prepareBuf(kbuf, tfr.DictionaryRowKeySize())
					klen, err := tfr.DictionaryRowKeyTo(kbuf)
					if err != nil {
						return nil, err
					}

					dictionaryDeltas[string(kbuf[0:klen])] += 1
				}
			}

			addKeyBytes += row.KeySize()
			addValBytes += row.ValueSize()
		}
		addNum += len(rows)
	}

	for _, dk := range deleteKeys {
		deleteKeyBytes += len(dk)
	}
	deleteNum += len(deleteKeys)

	// prepare batch
	totBytes := addKeyBytes + addValBytes + deleteKeyBytes

	buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{
		TotalBytes: totBytes,
		NumSets:    addNum,
		NumDeletes: deleteNum,
		NumMerges:  0,
	})
	if err != nil {
		return nil, err
	}
	defer func() {
		_ = wb.Close()
	}()

	for _, rows := range rowsOfRows {
		for _, row := range rows {
			klen, err := row.KeyTo(buf)
			if err != nil {
				return nil, err
			}

			vlen, err := row.ValueTo(buf[klen:])
			if err != nil {
				return nil, err
			}

			wb.Set(buf[0:klen], buf[klen:klen+vlen])

			buf = buf[klen+vlen:]
		}
	}

	for _, dk := range deleteKeys {
		dklen := copy(buf, dk)
		wb.Delete(buf[0:dklen])
		buf = buf[dklen:]
	}

	// write out the batch
	err = writer.ExecuteBatch(wb)
	if err != nil {
		return nil, err
	}
	return dictionaryDeltas, nil
}
Beispiel #16
0
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) {

	// prepare batch
	wb := writer.NewBatch()
	defer func() {
		_ = wb.Close()
	}()

	// buffer to work with
	rowBuf := GetRowBuffer()

	dictionaryDeltas := make(map[string]int64)

	// add
	for _, addRows := range addRowsAll {
		for _, row := range addRows {
			tfr, ok := row.(*TermFrequencyRow)
			if ok {
				if tfr.DictionaryRowKeySize() > len(rowBuf) {
					rowBuf = make([]byte, tfr.DictionaryRowKeySize())
				}
				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
				if err != nil {
					return err
				}
				dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
			}
			if row.KeySize()+row.ValueSize() > len(rowBuf) {
				rowBuf = make([]byte, row.KeySize()+row.ValueSize())
			}
			keySize, err := row.KeyTo(rowBuf)
			if err != nil {
				return err
			}
			valSize, err := row.ValueTo(rowBuf[keySize:])
			wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize])
		}
	}

	// update
	for _, updateRows := range updateRowsAll {
		for _, row := range updateRows {
			if row.KeySize()+row.ValueSize() > len(rowBuf) {
				rowBuf = make([]byte, row.KeySize()+row.ValueSize())
			}
			keySize, err := row.KeyTo(rowBuf)
			if err != nil {
				return err
			}
			valSize, err := row.ValueTo(rowBuf[keySize:])
			if err != nil {
				return err
			}
			wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize])
		}
	}

	// delete
	for _, deleteRows := range deleteRowsAll {
		for _, row := range deleteRows {
			tfr, ok := row.(*TermFrequencyRow)
			if ok {
				// need to decrement counter
				if tfr.DictionaryRowKeySize() > len(rowBuf) {
					rowBuf = make([]byte, tfr.DictionaryRowKeySize())
				}
				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
				if err != nil {
					return err
				}
				dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
			}
			if row.KeySize()+row.ValueSize() > len(rowBuf) {
				rowBuf = make([]byte, row.KeySize()+row.ValueSize())
			}
			keySize, err := row.KeyTo(rowBuf)
			if err != nil {
				return err
			}
			wb.Delete(rowBuf[:keySize])
		}
	}

	if 8 > len(rowBuf) {
		rowBuf = make([]byte, 8)
	}
	for dictRowKey, delta := range dictionaryDeltas {
		binary.LittleEndian.PutUint64(rowBuf, uint64(delta))
		wb.Merge([]byte(dictRowKey), rowBuf[0:8])
	}

	PutRowBuffer(rowBuf)

	// write out the batch
	return writer.ExecuteBatch(wb)
}
Beispiel #17
0
func (udc *SmolderingCouch) batchRows(writer store.KVWriter, addRowsAll [][]SmolderingCouchRow, updateRowsAll [][]SmolderingCouchRow, deleteRowsAll [][]SmolderingCouchRow) (err error) {
	dictionaryDeltas := make(map[string]int64)

	// count up bytes needed for buffering.
	addNum := 0
	addKeyBytes := 0
	addValBytes := 0

	updateNum := 0
	updateKeyBytes := 0
	updateValBytes := 0

	deleteNum := 0
	deleteKeyBytes := 0

	rowBuf := GetRowBuffer()

	for _, addRows := range addRowsAll {
		for _, row := range addRows {
			tfr, ok := row.(*TermFrequencyRow)
			if ok {
				if tfr.DictionaryRowKeySize() > len(rowBuf) {
					rowBuf = make([]byte, tfr.DictionaryRowKeySize())
				}
				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
				if err != nil {
					return err
				}
				dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
			}
			addKeyBytes += row.KeySize()
			addValBytes += row.ValueSize()
		}
		addNum += len(addRows)
	}

	for _, updateRows := range updateRowsAll {
		for _, row := range updateRows {
			updateKeyBytes += row.KeySize()
			updateValBytes += row.ValueSize()
		}
		updateNum += len(updateRows)
	}

	for _, deleteRows := range deleteRowsAll {
		for _, row := range deleteRows {
			tfr, ok := row.(*TermFrequencyRow)
			if ok {
				// need to decrement counter
				if tfr.DictionaryRowKeySize() > len(rowBuf) {
					rowBuf = make([]byte, tfr.DictionaryRowKeySize())
				}
				dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
				if err != nil {
					return err
				}
				dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
			}
			deleteKeyBytes += row.KeySize()
		}
		deleteNum += len(deleteRows)
	}

	PutRowBuffer(rowBuf)

	mergeNum := len(dictionaryDeltas)
	mergeKeyBytes := 0
	mergeValBytes := mergeNum * DictionaryRowMaxValueSize

	for dictRowKey := range dictionaryDeltas {
		mergeKeyBytes += len(dictRowKey)
	}

	// prepare batch
	totBytes := addKeyBytes + addValBytes +
		updateKeyBytes + updateValBytes +
		deleteKeyBytes +
		2*(mergeKeyBytes+mergeValBytes)

	buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{
		TotalBytes: totBytes,
		NumSets:    addNum + updateNum,
		NumDeletes: deleteNum,
		NumMerges:  mergeNum,
	})
	if err != nil {
		return err
	}
	defer func() {
		_ = wb.Close()
	}()

	// fill the batch
	for _, addRows := range addRowsAll {
		for _, row := range addRows {
			keySize, err := row.KeyTo(buf)
			if err != nil {
				return err
			}
			valSize, err := row.ValueTo(buf[keySize:])
			if err != nil {
				return err
			}
			wb.Set(buf[:keySize], buf[keySize:keySize+valSize])
			buf = buf[keySize+valSize:]
		}
	}

	for _, updateRows := range updateRowsAll {
		for _, row := range updateRows {
			keySize, err := row.KeyTo(buf)
			if err != nil {
				return err
			}
			valSize, err := row.ValueTo(buf[keySize:])
			if err != nil {
				return err
			}
			wb.Set(buf[:keySize], buf[keySize:keySize+valSize])
			buf = buf[keySize+valSize:]
		}
	}

	for _, deleteRows := range deleteRowsAll {
		for _, row := range deleteRows {
			keySize, err := row.KeyTo(buf)
			if err != nil {
				return err
			}
			wb.Delete(buf[:keySize])
			buf = buf[keySize:]
		}
	}

	for dictRowKey, delta := range dictionaryDeltas {
		dictRowKeyLen := copy(buf, dictRowKey)
		binary.LittleEndian.PutUint64(buf[dictRowKeyLen:], uint64(delta))
		wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+DictionaryRowMaxValueSize])
		buf = buf[dictRowKeyLen+DictionaryRowMaxValueSize:]
	}

	// write out the batch
	return writer.ExecuteBatch(wb)
}
Beispiel #18
0
func (udc *SmolderingCouch) Open() (err error) {
	//acquire the write mutex for the duratin of Open()
	udc.writeMutex.Lock()
	defer udc.writeMutex.Unlock()

	// open the kv store
	storeConstructor := registry.KVStoreConstructorByName(udc.storeName)
	if storeConstructor == nil {
		err = index.ErrorUnknownStorageType
		return
	}

	// now open the store
	udc.store, err = storeConstructor(&mergeOperator, udc.storeConfig)
	if err != nil {
		return
	}

	udc.cf = cuckoofilter.NewDefaultCuckooFilter()

	// start a reader to look at the index
	var kvreader store.KVReader
	kvreader, err = udc.store.Reader()
	if err != nil {
		return
	}

	var value []byte
	value, err = kvreader.Get(VersionKey)
	if err != nil {
		_ = kvreader.Close()
		return
	}

	if value != nil {
		err = udc.loadSchema(kvreader)
		if err != nil {
			_ = kvreader.Close()
			return
		}

		// set doc count
		udc.m.Lock()
		udc.docCount, udc.maxInternalDocID, err = udc.countDocs(kvreader)
		udc.m.Unlock()

		err = kvreader.Close()
	} else {
		// new index, close the reader and open writer to init
		err = kvreader.Close()
		if err != nil {
			return
		}

		var kvwriter store.KVWriter
		kvwriter, err = udc.store.Writer()
		if err != nil {
			return
		}
		defer func() {
			if cerr := kvwriter.Close(); err == nil && cerr != nil {
				err = cerr
			}
		}()

		// init the index
		err = udc.init(kvwriter)
	}

	return
}
Beispiel #19
0
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
	analysisStart := time.Now()
	resultChan := make(chan *index.AnalysisResult)

	var numUpdates uint64
	for _, doc := range batch.IndexOps {
		if doc != nil {
			numUpdates++
		}
	}

	var detectedUnsafeMutex sync.RWMutex
	detectedUnsafe := false

	go func() {
		sofar := uint64(0)
		for _, doc := range batch.IndexOps {
			if doc != nil {
				sofar++
				if sofar > numUpdates {
					detectedUnsafeMutex.Lock()
					detectedUnsafe = true
					detectedUnsafeMutex.Unlock()
					return
				}
				aw := index.NewAnalysisWork(udc, doc, resultChan)
				// put the work on the queue
				udc.analysisQueue.Queue(aw)
			}
		}
	}()

	newRowsMap := make(map[string][]index.IndexRow)
	// wait for the result
	var itemsDeQueued uint64
	for itemsDeQueued < numUpdates {
		result := <-resultChan
		newRowsMap[result.DocID] = result.Rows
		itemsDeQueued++
	}
	close(resultChan)

	detectedUnsafeMutex.RLock()
	defer detectedUnsafeMutex.RUnlock()
	if detectedUnsafe {
		return UnsafeBatchUseDetected
	}

	atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))

	indexStart := time.Now()
	// start a writer for this batch
	var kvwriter store.KVWriter
	kvwriter, err = udc.store.Writer()
	if err != nil {
		return
	}

	// first lookup all the back index rows
	var backIndexRows map[string]*BackIndexRow
	backIndexRows, err = udc.backIndexRowsForBatch(kvwriter, batch)
	if err != nil {
		_ = kvwriter.Close()
		return
	}

	// prepare a list of rows
	addRows := make([]UpsideDownCouchRow, 0)
	updateRows := make([]UpsideDownCouchRow, 0)
	deleteRows := make([]UpsideDownCouchRow, 0)

	docsAdded := uint64(0)
	docsDeleted := uint64(0)
	for docID, doc := range batch.IndexOps {
		backIndexRow := backIndexRows[docID]
		if doc == nil && backIndexRow != nil {
			// delete
			deleteRows = udc.deleteSingle(docID, backIndexRow, deleteRows)
			docsDeleted++
		} else if doc != nil {
			addRows, updateRows, deleteRows = udc.mergeOldAndNew(backIndexRow, newRowsMap[docID], addRows, updateRows, deleteRows)
			if backIndexRow == nil {
				docsAdded++
			}
		}
	}

	// add the internal ops
	for internalKey, internalValue := range batch.InternalOps {
		if internalValue == nil {
			// delete
			deleteInternalRow := NewInternalRow([]byte(internalKey), nil)
			deleteRows = append(deleteRows, deleteInternalRow)
		} else {
			updateInternalRow := NewInternalRow([]byte(internalKey), internalValue)
			updateRows = append(updateRows, updateInternalRow)
		}
	}

	err = udc.batchRows(kvwriter, addRows, updateRows, deleteRows)
	if err != nil {
		_ = kvwriter.Close()
		atomic.AddUint64(&udc.stats.errors, 1)
		return
	}

	err = kvwriter.Close()
	atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))

	if err == nil {
		udc.m.Lock()
		udc.docCount += docsAdded
		udc.docCount -= docsDeleted
		udc.m.Unlock()
		atomic.AddUint64(&udc.stats.updates, numUpdates)
		atomic.AddUint64(&udc.stats.deletes, docsDeleted)
		atomic.AddUint64(&udc.stats.batches, 1)
	} else {
		atomic.AddUint64(&udc.stats.errors, 1)
	}
	return
}
Beispiel #20
0
func (udc *SmolderingCouch) Delete(id string) (err error) {
	indexStart := time.Now()

	udc.writeMutex.Lock()
	defer udc.writeMutex.Unlock()

	indexReader, err := udc.reader()
	if err != nil {
		return
	}

	// first we lookup the backindex row for the doc id if it exists
	// lookup the back index row
	var backIndexRow *BackIndexRow
	backIndexRow, err = indexReader.backIndexRowForDoc(nil, id)
	if err != nil {
		_ = indexReader.Close()
		atomic.AddUint64(&udc.stats.errors, 1)
		return
	}

	err = indexReader.Close()
	if err != nil {
		return
	}

	if backIndexRow == nil {
		atomic.AddUint64(&udc.stats.deletes, 1)
		return
	}

	// start a writer for this delete
	var kvwriter store.KVWriter
	kvwriter, err = udc.store.Writer()
	if err != nil {
		return
	}
	defer func() {
		if cerr := kvwriter.Close(); err == nil && cerr != nil {
			err = cerr
		}
	}()

	var deleteRowsAll [][]SmolderingCouchRow

	deleteRows := udc.deleteSingle(backIndexRow.docNumber, backIndexRow, nil)
	if len(deleteRows) > 0 {
		deleteRowsAll = append(deleteRowsAll, deleteRows)
	}

	err = udc.batchRows(kvwriter, nil, nil, deleteRowsAll)
	if err == nil {
		udc.m.Lock()
		udc.docCount--
		udc.m.Unlock()
	}
	atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
	if err == nil {
		udc.cf.Delete([]byte(id))
		atomic.AddUint64(&udc.stats.deletes, 1)
	} else {
		atomic.AddUint64(&udc.stats.errors, 1)
	}
	return
}
Beispiel #21
0
func (udc *SmolderingCouch) Batch(batch *index.Batch) (err error) {
	// acquire enough doc numbers for all updates in the batch
	// FIXME we actually waste doc numbers because deletes are in the
	// same map and we don't need numbers for them
	lastDocNumber := atomic.AddUint64(&udc.maxInternalDocID, uint64(len(batch.IndexOps)))
	nextDocNumber := lastDocNumber - uint64(len(batch.IndexOps)) + 1

	analysisStart := time.Now()

	resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))

	var numUpdates uint64
	var numPlainTextBytes uint64
	for _, doc := range batch.IndexOps {
		if doc != nil {
			doc.Number = nextDocNumber // actually assign doc numbers here
			nextDocNumber++
			numUpdates++
			numPlainTextBytes += doc.NumPlainTextBytes()
		}
	}

	go func() {
		for _, doc := range batch.IndexOps {
			if doc != nil {
				aw := index.NewAnalysisWork(udc, doc, resultChan)
				// put the work on the queue
				udc.analysisQueue.Queue(aw)
			}
		}
	}()

	// retrieve back index rows concurrent with analysis
	docBackIndexRowErr := error(nil)
	docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps))

	udc.writeMutex.Lock()
	defer udc.writeMutex.Unlock()

	go func() {
		defer close(docBackIndexRowCh)

		// open a reader for backindex lookup

		indexReader, err := udc.reader()
		if err != nil {
			docBackIndexRowErr = err
			return
		}

		for docID, doc := range batch.IndexOps {
			var backIndexRow *BackIndexRow
			if udc.cf.Lookup([]byte(docID)) {
				backIndexRow, err = indexReader.backIndexRowForDoc(nil, docID)
				if err != nil {
					docBackIndexRowErr = err
					return
				}
			}

			var docNumber []byte
			if backIndexRow != nil {
				docNumber = backIndexRow.docNumber
			}
			docBackIndexRowCh <- &docBackIndexRow{docNumber, doc, backIndexRow}
		}

		err = indexReader.Close()
		if err != nil {
			docBackIndexRowErr = err
			return
		}
	}()

	// wait for analysis result
	newRowsMap := make(map[string][]index.IndexRow)
	var itemsDeQueued uint64
	for itemsDeQueued < numUpdates {
		result := <-resultChan
		newRowsMap[result.DocID] = result.Rows
		itemsDeQueued++
	}
	close(resultChan)

	atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))

	docsAdded := uint64(0)
	docsDeleted := uint64(0)

	indexStart := time.Now()

	// prepare a list of rows
	var addRowsAll [][]SmolderingCouchRow
	var updateRowsAll [][]SmolderingCouchRow
	var deleteRowsAll [][]SmolderingCouchRow

	// add the internal ops
	var updateRows []SmolderingCouchRow
	var deleteRows []SmolderingCouchRow

	for internalKey, internalValue := range batch.InternalOps {
		if internalValue == nil {
			// delete
			deleteInternalRow := NewInternalRow([]byte(internalKey), nil)
			deleteRows = append(deleteRows, deleteInternalRow)
		} else {
			updateInternalRow := NewInternalRow([]byte(internalKey), internalValue)
			updateRows = append(updateRows, updateInternalRow)
		}
	}

	if len(updateRows) > 0 {
		updateRowsAll = append(updateRowsAll, updateRows)
	}
	if len(deleteRows) > 0 {
		deleteRowsAll = append(deleteRowsAll, deleteRows)
	}

	// process back index rows as they arrive
	for dbir := range docBackIndexRowCh {
		if dbir.doc == nil && dbir.backIndexRow != nil {
			// delete
			deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil)
			if len(deleteRows) > 0 {
				deleteRowsAll = append(deleteRowsAll, deleteRows)
			}
			docsDeleted++
		} else if dbir.doc != nil {
			addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.doc.ID, dbir.backIndexRow, newRowsMap[dbir.doc.ID])
			if len(addRows) > 0 {
				addRowsAll = append(addRowsAll, addRows)
			}
			if len(updateRows) > 0 {
				updateRowsAll = append(updateRowsAll, updateRows)
			}
			if len(deleteRows) > 0 {
				deleteRowsAll = append(deleteRowsAll, deleteRows)
			}
			if dbir.backIndexRow == nil {
				docsAdded++
			}
		}
	}

	if docBackIndexRowErr != nil {
		return docBackIndexRowErr
	}

	// start a writer for this batch
	var kvwriter store.KVWriter
	kvwriter, err = udc.store.Writer()
	if err != nil {
		return
	}

	err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
	if err != nil {
		_ = kvwriter.Close()
		atomic.AddUint64(&udc.stats.errors, 1)
		return
	}

	err = kvwriter.Close()

	atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))

	if err == nil {
		udc.m.Lock()
		udc.docCount += docsAdded
		udc.docCount -= docsDeleted
		udc.m.Unlock()
		for did, doc := range batch.IndexOps {
			if doc != nil {
				udc.cf.Insert([]byte(did))
			} else {
				udc.cf.Delete([]byte(did))
			}
		}
		atomic.AddUint64(&udc.stats.updates, numUpdates)
		atomic.AddUint64(&udc.stats.deletes, docsDeleted)
		atomic.AddUint64(&udc.stats.batches, 1)
		atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
	} else {
		atomic.AddUint64(&udc.stats.errors, 1)
	}
	return
}
Beispiel #22
0
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) {

	// prepare batch
	wb := writer.NewBatch()

	// buffer to work with
	rowBuf := GetRowBuffer()

	// add
	for _, row := range addRows {
		tfr, ok := row.(*TermFrequencyRow)
		if ok {
			if tfr.DictionaryRowKeySize() > len(rowBuf) {
				rowBuf = make([]byte, tfr.DictionaryRowKeySize())
			}
			dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
			if err != nil {
				return err
			}
			wb.Merge(rowBuf[:dictKeySize], dictionaryTermIncr)
		}
		if row.KeySize()+row.ValueSize() > len(rowBuf) {
			rowBuf = make([]byte, row.KeySize()+row.ValueSize())
		}
		keySize, err := row.KeyTo(rowBuf)
		if err != nil {
			return err
		}
		valSize, err := row.ValueTo(rowBuf[keySize:])
		wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize])
	}

	// update
	for _, row := range updateRows {
		if row.KeySize()+row.ValueSize() > len(rowBuf) {
			rowBuf = make([]byte, row.KeySize()+row.ValueSize())
		}
		keySize, err := row.KeyTo(rowBuf)
		if err != nil {
			return err
		}
		valSize, err := row.ValueTo(rowBuf[keySize:])
		if err != nil {
			return err
		}
		wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize])
	}

	// delete
	for _, row := range deleteRows {
		tfr, ok := row.(*TermFrequencyRow)
		if ok {
			// need to decrement counter
			if tfr.DictionaryRowKeySize() > len(rowBuf) {
				rowBuf = make([]byte, tfr.DictionaryRowKeySize())
			}
			dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
			if err != nil {
				return err
			}
			wb.Merge(rowBuf[:dictKeySize], dictionaryTermDecr)
		}
		if row.KeySize()+row.ValueSize() > len(rowBuf) {
			rowBuf = make([]byte, row.KeySize()+row.ValueSize())
		}
		keySize, err := row.KeyTo(rowBuf)
		if err != nil {
			return err
		}
		wb.Delete(rowBuf[:keySize])
	}

	PutRowBuffer(rowBuf)

	// write out the batch
	return writer.ExecuteBatch(wb)
}