func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) { // prepare batch wb := writer.NewBatch() // add for _, row := range addRows { tfr, ok := row.(*TermFrequencyRow) if ok { // need to increment counter dictionaryKey := tfr.DictionaryRowKey() wb.Merge(dictionaryKey, dictionaryTermIncr) } wb.Set(row.Key(), row.Value()) } // update for _, row := range updateRows { wb.Set(row.Key(), row.Value()) } // delete for _, row := range deleteRows { tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter dictionaryKey := tfr.DictionaryRowKey() wb.Merge(dictionaryKey, dictionaryTermDecr) } wb.Delete(row.Key()) } // write out the batch return wb.Execute() }
func (f *Firestorm) storeVersion(writer store.KVWriter) error { vr := NewVersionRow(Version) wb := writer.NewBatch() wb.Set(vr.Key(), vr.Value()) err := writer.ExecuteBatch(wb) return err }
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { // do analysis before acquiring write lock analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue go func() { udc.analysisQueue.Queue(aw) }() // wait for the result result := <-resultChan close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow backIndexRow, err = udc.backIndexRowForDoc(kvwriter, doc.ID) if err != nil { atomic.AddUint64(&udc.stats.errors, 1) return } // prepare a list of rows addRows := make([]UpsideDownCouchRow, 0) updateRows := make([]UpsideDownCouchRow, 0) deleteRows := make([]UpsideDownCouchRow, 0) addRows, updateRows, deleteRows = udc.mergeOldAndNew(backIndexRow, result.Rows, addRows, updateRows, deleteRows) err = udc.batchRows(kvwriter, addRows, updateRows, deleteRows) if err == nil && backIndexRow == nil { udc.m.Lock() udc.docCount++ udc.m.Unlock() } atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.updates, 1) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) { internalRow := NewInternalRow(key, nil) var writer store.KVWriter writer, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := writer.Close(); err == nil && cerr != nil { err = cerr } }() return writer.Delete(internalRow.Key()) }
func (f *Firestorm) Update(doc *document.Document) (err error) { // assign this document a number doc.Number = atomic.AddUint64(&f.highDocNumber, 1) // do analysis before acquiring write lock analysisStart := time.Now() numPlainTextBytes := doc.NumPlainTextBytes() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(f, doc, resultChan) // put the work on the queue f.analysisQueue.Queue(aw) // wait for the result result := <-resultChan close(resultChan) atomic.AddUint64(&f.stats.analysisTime, uint64(time.Since(analysisStart))) // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter kvwriter, err = f.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() var dictionaryDeltas map[string]int64 dictionaryDeltas, err = f.batchRows(kvwriter, [][]index.IndexRow{result.Rows}, nil) if err != nil { _ = kvwriter.Close() atomic.AddUint64(&f.stats.errors, 1) return } f.compensator.Mutate([]byte(doc.ID), doc.Number) f.lookuper.NotifyBatch([]*InFlightItem{{[]byte(doc.ID), doc.Number}}) f.dictUpdater.NotifyBatch(dictionaryDeltas) atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart))) atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes) return }
func (udc *UpsideDownCouch) Open() (err error) { // install the merge operator udc.store.SetMergeOperator(&mergeOperator) // now open the kv store err = udc.store.Open() if err != nil { return } // start a writer for the open process var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() var value []byte value, err = kvwriter.Get(VersionKey) if err != nil { return } // init new index OR load schema if value == nil { err = udc.init(kvwriter) if err != nil { return } } else { err = udc.loadSchema(kvwriter) if err != nil { return } } // set doc count udc.m.Lock() udc.docCount, err = udc.countDocs(kvwriter) udc.m.Unlock() return }
func (udc *UpsideDownCouch) Delete(id string) (err error) { indexStart := time.Now() // start a writer for this delete var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() // lookup the back index row var backIndexRow *BackIndexRow backIndexRow, err = udc.backIndexRowForDoc(kvwriter, id) if err != nil { atomic.AddUint64(&udc.stats.errors, 1) return } if backIndexRow == nil { atomic.AddUint64(&udc.stats.deletes, 1) return } deleteRows := make([]UpsideDownCouchRow, 0) deleteRows = udc.deleteSingle(id, backIndexRow, deleteRows) err = udc.batchRows(kvwriter, nil, nil, deleteRows) if err == nil { udc.m.Lock() udc.docCount-- udc.m.Unlock() } atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.deletes, 1) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) { // prepare batch wb := writer.NewBatch() // add for _, row := range addRows { tfr, ok := row.(*TermFrequencyRow) if ok { // need to increment counter summaryKey := tfr.SummaryKey() wb.Merge(summaryKey, newTermSummaryIncr()) } wb.Set(row.Key(), row.Value()) } // update for _, row := range updateRows { wb.Set(row.Key(), row.Value()) } // delete for _, row := range deleteRows { tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter summaryKey := tfr.SummaryKey() wb.Merge(summaryKey, newTermSummaryDecr()) } wb.Delete(row.Key()) } // write out the batch err = wb.Execute() if err != nil { return } return }
func (f *Firestorm) DeleteInternal(key []byte) (err error) { internalRow := NewInternalRow(key, nil) var writer store.KVWriter writer, err = f.store.Writer() if err != nil { return } defer func() { if cerr := writer.Close(); err == nil && cerr != nil { err = cerr } }() wb := writer.NewBatch() wb.Delete(internalRow.Key()) return writer.ExecuteBatch(wb) }
func (udc *SmolderingCouch) DeleteInternal(key []byte) (err error) { internalRow := NewInternalRow(key, nil) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() var writer store.KVWriter writer, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := writer.Close(); err == nil && cerr != nil { err = cerr } }() batch := writer.NewBatch() batch.Delete(internalRow.Key()) return writer.ExecuteBatch(batch) }
func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) { internalRow := NewInternalRow(key, val) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() var writer store.KVWriter writer, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := writer.Close(); err == nil && cerr != nil { err = cerr } }() batch := writer.NewBatch() batch.Set(internalRow.Key(), internalRow.Value()) return writer.ExecuteBatch(batch) }
func (udc *SmolderingCouch) Update(doc *document.Document) (err error) { // get the next available doc number doc.Number = atomic.AddUint64(&udc.maxInternalDocID, 1) analysisStart := time.Now() numPlainTextBytes := doc.NumPlainTextBytes() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) // wait for the result result := <-resultChan close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() indexReader, err := udc.reader() if err != nil { return } // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow if udc.cf.Lookup([]byte(doc.ID)) { backIndexRow, err = indexReader.backIndexRowForDoc(nil, doc.ID) if err != nil { _ = indexReader.Close() atomic.AddUint64(&udc.stats.errors, 1) return } } err = indexReader.Close() if err != nil { return } // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() // prepare a list of rows var addRowsAll [][]SmolderingCouchRow var updateRowsAll [][]SmolderingCouchRow var deleteRowsAll [][]SmolderingCouchRow addRows, updateRows, deleteRows := udc.mergeOldAndNew(doc.ID, backIndexRow, result.Rows) if len(addRows) > 0 { addRowsAll = append(addRowsAll, addRows) } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll) if err == nil && backIndexRow == nil { udc.m.Lock() udc.docCount++ udc.m.Unlock() } atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { udc.cf.Insert([]byte(doc.ID)) atomic.AddUint64(&udc.stats.updates, 1) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) var numUpdates uint64 var numPlainTextBytes uint64 for _, doc := range batch.IndexOps { if doc != nil { numUpdates++ numPlainTextBytes += doc.NumPlainTextBytes() } } go func() { for _, doc := range batch.IndexOps { if doc != nil { aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) } } }() // retrieve back index rows concurrent with analysis docBackIndexRowErr := error(nil) docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps)) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() go func() { defer close(docBackIndexRowCh) // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { docBackIndexRowErr = err return } for docID, doc := range batch.IndexOps { backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID)) if err != nil { docBackIndexRowErr = err return } docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow} } err = kvreader.Close() if err != nil { docBackIndexRowErr = err return } }() // wait for analysis result newRowsMap := make(map[string][]index.IndexRow) var itemsDeQueued uint64 for itemsDeQueued < numUpdates { result := <-resultChan newRowsMap[result.DocID] = result.Rows itemsDeQueued++ } close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) docsAdded := uint64(0) docsDeleted := uint64(0) indexStart := time.Now() // prepare a list of rows var addRowsAll [][]UpsideDownCouchRow var updateRowsAll [][]UpsideDownCouchRow var deleteRowsAll [][]UpsideDownCouchRow // add the internal ops var updateRows []UpsideDownCouchRow var deleteRows []UpsideDownCouchRow for internalKey, internalValue := range batch.InternalOps { if internalValue == nil { // delete deleteInternalRow := NewInternalRow([]byte(internalKey), nil) deleteRows = append(deleteRows, deleteInternalRow) } else { updateInternalRow := NewInternalRow([]byte(internalKey), internalValue) updateRows = append(updateRows, updateInternalRow) } } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } // process back index rows as they arrive for dbir := range docBackIndexRowCh { if dbir.doc == nil && dbir.backIndexRow != nil { // delete deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil) if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } docsDeleted++ } else if dbir.doc != nil { addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.backIndexRow, newRowsMap[dbir.docID]) if len(addRows) > 0 { addRowsAll = append(addRowsAll, addRows) } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } if dbir.backIndexRow == nil { docsAdded++ } } } if docBackIndexRowErr != nil { return docBackIndexRowErr } // start a writer for this batch var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll) if err != nil { _ = kvwriter.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvwriter.Close() atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { udc.m.Lock() udc.docCount += docsAdded udc.docCount -= docsDeleted udc.m.Unlock() atomic.AddUint64(&udc.stats.updates, numUpdates) atomic.AddUint64(&udc.stats.deletes, docsDeleted) atomic.AddUint64(&udc.stats.batches, 1) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (f *Firestorm) Batch(batch *index.Batch) (err error) { // acquire enough doc numbers for all updates in the batch // FIXME we actually waste doc numbers because deletes are in the // same map and we don't need numbers for them lastDocNumber := atomic.AddUint64(&f.highDocNumber, uint64(len(batch.IndexOps))) firstDocNumber := lastDocNumber - uint64(len(batch.IndexOps)) + 1 analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult) var docsUpdated uint64 var docsDeleted uint64 for _, doc := range batch.IndexOps { if doc != nil { doc.Number = firstDocNumber // actually assign doc numbers here firstDocNumber++ docsUpdated++ } else { docsDeleted++ } } var detectedUnsafeMutex sync.RWMutex detectedUnsafe := false go func() { sofar := uint64(0) for _, doc := range batch.IndexOps { if doc != nil { sofar++ if sofar > docsUpdated { detectedUnsafeMutex.Lock() detectedUnsafe = true detectedUnsafeMutex.Unlock() return } aw := index.NewAnalysisWork(f, doc, resultChan) // put the work on the queue f.analysisQueue.Queue(aw) } } }() // extra 1 capacity for internal updates. collectRows := make([][]index.IndexRow, 0, docsUpdated+1) // wait for the result var itemsDeQueued uint64 for itemsDeQueued < docsUpdated { result := <-resultChan collectRows = append(collectRows, result.Rows) itemsDeQueued++ } close(resultChan) detectedUnsafeMutex.RLock() defer detectedUnsafeMutex.RUnlock() if detectedUnsafe { return UnsafeBatchUseDetected } atomic.AddUint64(&f.stats.analysisTime, uint64(time.Since(analysisStart))) var deleteKeys [][]byte if len(batch.InternalOps) > 0 { // add the internal ops updateInternalRows := make([]index.IndexRow, 0, len(batch.InternalOps)) for internalKey, internalValue := range batch.InternalOps { if internalValue == nil { // delete deleteInternalRow := NewInternalRow([]byte(internalKey), nil) deleteKeys = append(deleteKeys, deleteInternalRow.Key()) } else { updateInternalRow := NewInternalRow([]byte(internalKey), internalValue) updateInternalRows = append(updateInternalRows, updateInternalRow) } } collectRows = append(collectRows, updateInternalRows) } inflightItems := make([]*InFlightItem, 0, len(batch.IndexOps)) for docID, doc := range batch.IndexOps { if doc != nil { inflightItems = append(inflightItems, &InFlightItem{[]byte(docID), doc.Number}) } else { inflightItems = append(inflightItems, &InFlightItem{[]byte(docID), 0}) } } indexStart := time.Now() // start a writer for this batch var kvwriter store.KVWriter kvwriter, err = f.store.Writer() if err != nil { return } var dictionaryDeltas map[string]int64 dictionaryDeltas, err = f.batchRows(kvwriter, collectRows, deleteKeys) if err != nil { _ = kvwriter.Close() atomic.AddUint64(&f.stats.errors, 1) return } f.compensator.MutateBatch(inflightItems, lastDocNumber) err = kvwriter.Close() f.lookuper.NotifyBatch(inflightItems) f.dictUpdater.NotifyBatch(dictionaryDeltas) atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&f.stats.updates, docsUpdated) atomic.AddUint64(&f.stats.deletes, docsDeleted) atomic.AddUint64(&f.stats.batches, 1) } else { atomic.AddUint64(&f.stats.errors, 1) } return }
func (f *Firestorm) batchRows(writer store.KVWriter, rowsOfRows [][]index.IndexRow, deleteKeys [][]byte) (map[string]int64, error) { dictionaryDeltas := make(map[string]int64) // count up bytes needed for buffering. addNum := 0 addKeyBytes := 0 addValBytes := 0 deleteNum := 0 deleteKeyBytes := 0 var kbuf []byte prepareBuf := func(buf []byte, sizeNeeded int) []byte { if cap(buf) < sizeNeeded { return make([]byte, sizeNeeded, sizeNeeded+128) } return buf[0:sizeNeeded] } for _, rows := range rowsOfRows { for _, row := range rows { tfr, ok := row.(*TermFreqRow) if ok { if tfr.Field() != 0 { kbuf = prepareBuf(kbuf, tfr.DictionaryRowKeySize()) klen, err := tfr.DictionaryRowKeyTo(kbuf) if err != nil { return nil, err } dictionaryDeltas[string(kbuf[0:klen])] += 1 } } addKeyBytes += row.KeySize() addValBytes += row.ValueSize() } addNum += len(rows) } for _, dk := range deleteKeys { deleteKeyBytes += len(dk) } deleteNum += len(deleteKeys) // prepare batch totBytes := addKeyBytes + addValBytes + deleteKeyBytes buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{ TotalBytes: totBytes, NumSets: addNum, NumDeletes: deleteNum, NumMerges: 0, }) if err != nil { return nil, err } defer func() { _ = wb.Close() }() for _, rows := range rowsOfRows { for _, row := range rows { klen, err := row.KeyTo(buf) if err != nil { return nil, err } vlen, err := row.ValueTo(buf[klen:]) if err != nil { return nil, err } wb.Set(buf[0:klen], buf[klen:klen+vlen]) buf = buf[klen+vlen:] } } for _, dk := range deleteKeys { dklen := copy(buf, dk) wb.Delete(buf[0:dklen]) buf = buf[dklen:] } // write out the batch err = writer.ExecuteBatch(wb) if err != nil { return nil, err } return dictionaryDeltas, nil }
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) { // prepare batch wb := writer.NewBatch() defer func() { _ = wb.Close() }() // buffer to work with rowBuf := GetRowBuffer() dictionaryDeltas := make(map[string]int64) // add for _, addRows := range addRowsAll { for _, row := range addRows { tfr, ok := row.(*TermFrequencyRow) if ok { if tfr.DictionaryRowKeySize() > len(rowBuf) { rowBuf = make([]byte, tfr.DictionaryRowKeySize()) } dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1 } if row.KeySize()+row.ValueSize() > len(rowBuf) { rowBuf = make([]byte, row.KeySize()+row.ValueSize()) } keySize, err := row.KeyTo(rowBuf) if err != nil { return err } valSize, err := row.ValueTo(rowBuf[keySize:]) wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize]) } } // update for _, updateRows := range updateRowsAll { for _, row := range updateRows { if row.KeySize()+row.ValueSize() > len(rowBuf) { rowBuf = make([]byte, row.KeySize()+row.ValueSize()) } keySize, err := row.KeyTo(rowBuf) if err != nil { return err } valSize, err := row.ValueTo(rowBuf[keySize:]) if err != nil { return err } wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize]) } } // delete for _, deleteRows := range deleteRowsAll { for _, row := range deleteRows { tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter if tfr.DictionaryRowKeySize() > len(rowBuf) { rowBuf = make([]byte, tfr.DictionaryRowKeySize()) } dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1 } if row.KeySize()+row.ValueSize() > len(rowBuf) { rowBuf = make([]byte, row.KeySize()+row.ValueSize()) } keySize, err := row.KeyTo(rowBuf) if err != nil { return err } wb.Delete(rowBuf[:keySize]) } } if 8 > len(rowBuf) { rowBuf = make([]byte, 8) } for dictRowKey, delta := range dictionaryDeltas { binary.LittleEndian.PutUint64(rowBuf, uint64(delta)) wb.Merge([]byte(dictRowKey), rowBuf[0:8]) } PutRowBuffer(rowBuf) // write out the batch return writer.ExecuteBatch(wb) }
func (udc *SmolderingCouch) batchRows(writer store.KVWriter, addRowsAll [][]SmolderingCouchRow, updateRowsAll [][]SmolderingCouchRow, deleteRowsAll [][]SmolderingCouchRow) (err error) { dictionaryDeltas := make(map[string]int64) // count up bytes needed for buffering. addNum := 0 addKeyBytes := 0 addValBytes := 0 updateNum := 0 updateKeyBytes := 0 updateValBytes := 0 deleteNum := 0 deleteKeyBytes := 0 rowBuf := GetRowBuffer() for _, addRows := range addRowsAll { for _, row := range addRows { tfr, ok := row.(*TermFrequencyRow) if ok { if tfr.DictionaryRowKeySize() > len(rowBuf) { rowBuf = make([]byte, tfr.DictionaryRowKeySize()) } dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1 } addKeyBytes += row.KeySize() addValBytes += row.ValueSize() } addNum += len(addRows) } for _, updateRows := range updateRowsAll { for _, row := range updateRows { updateKeyBytes += row.KeySize() updateValBytes += row.ValueSize() } updateNum += len(updateRows) } for _, deleteRows := range deleteRowsAll { for _, row := range deleteRows { tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter if tfr.DictionaryRowKeySize() > len(rowBuf) { rowBuf = make([]byte, tfr.DictionaryRowKeySize()) } dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1 } deleteKeyBytes += row.KeySize() } deleteNum += len(deleteRows) } PutRowBuffer(rowBuf) mergeNum := len(dictionaryDeltas) mergeKeyBytes := 0 mergeValBytes := mergeNum * DictionaryRowMaxValueSize for dictRowKey := range dictionaryDeltas { mergeKeyBytes += len(dictRowKey) } // prepare batch totBytes := addKeyBytes + addValBytes + updateKeyBytes + updateValBytes + deleteKeyBytes + 2*(mergeKeyBytes+mergeValBytes) buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{ TotalBytes: totBytes, NumSets: addNum + updateNum, NumDeletes: deleteNum, NumMerges: mergeNum, }) if err != nil { return err } defer func() { _ = wb.Close() }() // fill the batch for _, addRows := range addRowsAll { for _, row := range addRows { keySize, err := row.KeyTo(buf) if err != nil { return err } valSize, err := row.ValueTo(buf[keySize:]) if err != nil { return err } wb.Set(buf[:keySize], buf[keySize:keySize+valSize]) buf = buf[keySize+valSize:] } } for _, updateRows := range updateRowsAll { for _, row := range updateRows { keySize, err := row.KeyTo(buf) if err != nil { return err } valSize, err := row.ValueTo(buf[keySize:]) if err != nil { return err } wb.Set(buf[:keySize], buf[keySize:keySize+valSize]) buf = buf[keySize+valSize:] } } for _, deleteRows := range deleteRowsAll { for _, row := range deleteRows { keySize, err := row.KeyTo(buf) if err != nil { return err } wb.Delete(buf[:keySize]) buf = buf[keySize:] } } for dictRowKey, delta := range dictionaryDeltas { dictRowKeyLen := copy(buf, dictRowKey) binary.LittleEndian.PutUint64(buf[dictRowKeyLen:], uint64(delta)) wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+DictionaryRowMaxValueSize]) buf = buf[dictRowKeyLen+DictionaryRowMaxValueSize:] } // write out the batch return writer.ExecuteBatch(wb) }
func (udc *SmolderingCouch) Open() (err error) { //acquire the write mutex for the duratin of Open() udc.writeMutex.Lock() defer udc.writeMutex.Unlock() // open the kv store storeConstructor := registry.KVStoreConstructorByName(udc.storeName) if storeConstructor == nil { err = index.ErrorUnknownStorageType return } // now open the store udc.store, err = storeConstructor(&mergeOperator, udc.storeConfig) if err != nil { return } udc.cf = cuckoofilter.NewDefaultCuckooFilter() // start a reader to look at the index var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { return } var value []byte value, err = kvreader.Get(VersionKey) if err != nil { _ = kvreader.Close() return } if value != nil { err = udc.loadSchema(kvreader) if err != nil { _ = kvreader.Close() return } // set doc count udc.m.Lock() udc.docCount, udc.maxInternalDocID, err = udc.countDocs(kvreader) udc.m.Unlock() err = kvreader.Close() } else { // new index, close the reader and open writer to init err = kvreader.Close() if err != nil { return } var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() // init the index err = udc.init(kvwriter) } return }
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult) var numUpdates uint64 for _, doc := range batch.IndexOps { if doc != nil { numUpdates++ } } var detectedUnsafeMutex sync.RWMutex detectedUnsafe := false go func() { sofar := uint64(0) for _, doc := range batch.IndexOps { if doc != nil { sofar++ if sofar > numUpdates { detectedUnsafeMutex.Lock() detectedUnsafe = true detectedUnsafeMutex.Unlock() return } aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) } } }() newRowsMap := make(map[string][]index.IndexRow) // wait for the result var itemsDeQueued uint64 for itemsDeQueued < numUpdates { result := <-resultChan newRowsMap[result.DocID] = result.Rows itemsDeQueued++ } close(resultChan) detectedUnsafeMutex.RLock() defer detectedUnsafeMutex.RUnlock() if detectedUnsafe { return UnsafeBatchUseDetected } atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) indexStart := time.Now() // start a writer for this batch var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } // first lookup all the back index rows var backIndexRows map[string]*BackIndexRow backIndexRows, err = udc.backIndexRowsForBatch(kvwriter, batch) if err != nil { _ = kvwriter.Close() return } // prepare a list of rows addRows := make([]UpsideDownCouchRow, 0) updateRows := make([]UpsideDownCouchRow, 0) deleteRows := make([]UpsideDownCouchRow, 0) docsAdded := uint64(0) docsDeleted := uint64(0) for docID, doc := range batch.IndexOps { backIndexRow := backIndexRows[docID] if doc == nil && backIndexRow != nil { // delete deleteRows = udc.deleteSingle(docID, backIndexRow, deleteRows) docsDeleted++ } else if doc != nil { addRows, updateRows, deleteRows = udc.mergeOldAndNew(backIndexRow, newRowsMap[docID], addRows, updateRows, deleteRows) if backIndexRow == nil { docsAdded++ } } } // add the internal ops for internalKey, internalValue := range batch.InternalOps { if internalValue == nil { // delete deleteInternalRow := NewInternalRow([]byte(internalKey), nil) deleteRows = append(deleteRows, deleteInternalRow) } else { updateInternalRow := NewInternalRow([]byte(internalKey), internalValue) updateRows = append(updateRows, updateInternalRow) } } err = udc.batchRows(kvwriter, addRows, updateRows, deleteRows) if err != nil { _ = kvwriter.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvwriter.Close() atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { udc.m.Lock() udc.docCount += docsAdded udc.docCount -= docsDeleted udc.m.Unlock() atomic.AddUint64(&udc.stats.updates, numUpdates) atomic.AddUint64(&udc.stats.deletes, docsDeleted) atomic.AddUint64(&udc.stats.batches, 1) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *SmolderingCouch) Delete(id string) (err error) { indexStart := time.Now() udc.writeMutex.Lock() defer udc.writeMutex.Unlock() indexReader, err := udc.reader() if err != nil { return } // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow backIndexRow, err = indexReader.backIndexRowForDoc(nil, id) if err != nil { _ = indexReader.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = indexReader.Close() if err != nil { return } if backIndexRow == nil { atomic.AddUint64(&udc.stats.deletes, 1) return } // start a writer for this delete var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() var deleteRowsAll [][]SmolderingCouchRow deleteRows := udc.deleteSingle(backIndexRow.docNumber, backIndexRow, nil) if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } err = udc.batchRows(kvwriter, nil, nil, deleteRowsAll) if err == nil { udc.m.Lock() udc.docCount-- udc.m.Unlock() } atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { udc.cf.Delete([]byte(id)) atomic.AddUint64(&udc.stats.deletes, 1) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *SmolderingCouch) Batch(batch *index.Batch) (err error) { // acquire enough doc numbers for all updates in the batch // FIXME we actually waste doc numbers because deletes are in the // same map and we don't need numbers for them lastDocNumber := atomic.AddUint64(&udc.maxInternalDocID, uint64(len(batch.IndexOps))) nextDocNumber := lastDocNumber - uint64(len(batch.IndexOps)) + 1 analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) var numUpdates uint64 var numPlainTextBytes uint64 for _, doc := range batch.IndexOps { if doc != nil { doc.Number = nextDocNumber // actually assign doc numbers here nextDocNumber++ numUpdates++ numPlainTextBytes += doc.NumPlainTextBytes() } } go func() { for _, doc := range batch.IndexOps { if doc != nil { aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) } } }() // retrieve back index rows concurrent with analysis docBackIndexRowErr := error(nil) docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps)) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() go func() { defer close(docBackIndexRowCh) // open a reader for backindex lookup indexReader, err := udc.reader() if err != nil { docBackIndexRowErr = err return } for docID, doc := range batch.IndexOps { var backIndexRow *BackIndexRow if udc.cf.Lookup([]byte(docID)) { backIndexRow, err = indexReader.backIndexRowForDoc(nil, docID) if err != nil { docBackIndexRowErr = err return } } var docNumber []byte if backIndexRow != nil { docNumber = backIndexRow.docNumber } docBackIndexRowCh <- &docBackIndexRow{docNumber, doc, backIndexRow} } err = indexReader.Close() if err != nil { docBackIndexRowErr = err return } }() // wait for analysis result newRowsMap := make(map[string][]index.IndexRow) var itemsDeQueued uint64 for itemsDeQueued < numUpdates { result := <-resultChan newRowsMap[result.DocID] = result.Rows itemsDeQueued++ } close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) docsAdded := uint64(0) docsDeleted := uint64(0) indexStart := time.Now() // prepare a list of rows var addRowsAll [][]SmolderingCouchRow var updateRowsAll [][]SmolderingCouchRow var deleteRowsAll [][]SmolderingCouchRow // add the internal ops var updateRows []SmolderingCouchRow var deleteRows []SmolderingCouchRow for internalKey, internalValue := range batch.InternalOps { if internalValue == nil { // delete deleteInternalRow := NewInternalRow([]byte(internalKey), nil) deleteRows = append(deleteRows, deleteInternalRow) } else { updateInternalRow := NewInternalRow([]byte(internalKey), internalValue) updateRows = append(updateRows, updateInternalRow) } } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } // process back index rows as they arrive for dbir := range docBackIndexRowCh { if dbir.doc == nil && dbir.backIndexRow != nil { // delete deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil) if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } docsDeleted++ } else if dbir.doc != nil { addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.doc.ID, dbir.backIndexRow, newRowsMap[dbir.doc.ID]) if len(addRows) > 0 { addRowsAll = append(addRowsAll, addRows) } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } if dbir.backIndexRow == nil { docsAdded++ } } } if docBackIndexRowErr != nil { return docBackIndexRowErr } // start a writer for this batch var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll) if err != nil { _ = kvwriter.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvwriter.Close() atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { udc.m.Lock() udc.docCount += docsAdded udc.docCount -= docsDeleted udc.m.Unlock() for did, doc := range batch.IndexOps { if doc != nil { udc.cf.Insert([]byte(did)) } else { udc.cf.Delete([]byte(did)) } } atomic.AddUint64(&udc.stats.updates, numUpdates) atomic.AddUint64(&udc.stats.deletes, docsDeleted) atomic.AddUint64(&udc.stats.batches, 1) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) { // prepare batch wb := writer.NewBatch() // buffer to work with rowBuf := GetRowBuffer() // add for _, row := range addRows { tfr, ok := row.(*TermFrequencyRow) if ok { if tfr.DictionaryRowKeySize() > len(rowBuf) { rowBuf = make([]byte, tfr.DictionaryRowKeySize()) } dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } wb.Merge(rowBuf[:dictKeySize], dictionaryTermIncr) } if row.KeySize()+row.ValueSize() > len(rowBuf) { rowBuf = make([]byte, row.KeySize()+row.ValueSize()) } keySize, err := row.KeyTo(rowBuf) if err != nil { return err } valSize, err := row.ValueTo(rowBuf[keySize:]) wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize]) } // update for _, row := range updateRows { if row.KeySize()+row.ValueSize() > len(rowBuf) { rowBuf = make([]byte, row.KeySize()+row.ValueSize()) } keySize, err := row.KeyTo(rowBuf) if err != nil { return err } valSize, err := row.ValueTo(rowBuf[keySize:]) if err != nil { return err } wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize]) } // delete for _, row := range deleteRows { tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter if tfr.DictionaryRowKeySize() > len(rowBuf) { rowBuf = make([]byte, tfr.DictionaryRowKeySize()) } dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } wb.Merge(rowBuf[:dictKeySize], dictionaryTermDecr) } if row.KeySize()+row.ValueSize() > len(rowBuf) { rowBuf = make([]byte, row.KeySize()+row.ValueSize()) } keySize, err := row.KeyTo(rowBuf) if err != nil { return err } wb.Delete(rowBuf[:keySize]) } PutRowBuffer(rowBuf) // write out the batch return writer.ExecuteBatch(wb) }