func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) { // use a temporary row structure to build key tempRow := &BackIndexRow{ doc: docID, } keyBuf := GetRowBuffer() if tempRow.KeySize() > len(keyBuf) { keyBuf = make([]byte, 2*tempRow.KeySize()) } defer PutRowBuffer(keyBuf) keySize, err := tempRow.KeyTo(keyBuf) if err != nil { return nil, err } value, err := kvreader.Get(keyBuf[:keySize]) if err != nil { return nil, err } if value == nil { return nil, nil } backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value) if err != nil { return nil, err } return backIndexRow, nil }
func dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) { start := prefix if start == nil { start = []byte{0} } it := kvreader.PrefixIterator(start) defer func() { cerr := it.Close() if cerr != nil { rv <- cerr } }() key, val, valid := it.Current() for valid { ck := make([]byte, len(key)) copy(ck, key) cv := make([]byte, len(val)) copy(cv, val) row, err := ParseFromKeyValue(ck, cv) if err != nil { rv <- err return } rv <- row it.Next() key, val, valid = it.Current() } }
func dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) { it := kvreader.RangeIterator(start, end) defer func() { cerr := it.Close() if cerr != nil { rv <- cerr } }() key, val, valid := it.Current() for valid { ck := make([]byte, len(key)) copy(ck, key) cv := make([]byte, len(val)) copy(cv, val) row, err := ParseFromKeyValue(ck, cv) if err != nil { rv <- err return } rv <- row it.Next() key, val, valid = it.Current() } }
func (f *Firestorm) checkVersion(reader store.KVReader) (newIndex bool, err error) { value, err := reader.Get(VersionKey) if err != nil { return } if value == nil { newIndex = true return } var vr *VersionRow vr, err = NewVersionRowV(value) if err != nil { return } // assert correct version if vr.Version() != Version { err = IncompatibleVersion return } return }
func (udc *UpsideDownCouch) dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) { start := prefix if start == nil { start = []byte{0} } it := kvreader.Iterator(start) defer it.Close() key, val, valid := it.Current() for valid { if prefix != nil && !bytes.HasPrefix(key, prefix) { break } row, err := ParseFromKeyValue(key, val) if err != nil { rv <- err return } rv <- row it.Next() key, val, valid = it.Current() } }
func visitPrefix(reader store.KVReader, prefix []byte, visitor KVVisitor) (err error) { start := prefix if start == nil { start = []byte{} } it := reader.PrefixIterator(start) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() k, v, valid := it.Current() for valid { var cont bool cont, err = visitor(k, v) if err != nil { // visitor encountered an error, stop and return it return } if !cont { // vistor has requested we stop iteration, return nil return } it.Next() k, v, valid = it.Current() } return }
func (udc *SmolderingCouch) countDocs(kvreader store.KVReader) (count, highDocNum uint64, err error) { k := TermFrequencyRowStartField(0) it := kvreader.PrefixIterator(k) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() var lastValidDocNum []byte k, _, valid := it.Current() for valid { tfr, err := NewTermFrequencyRowK(k) if err != nil { return 0, 0, err } if tfr.term != nil { udc.cf.Insert(tfr.term) } lastValidDocNum = lastValidDocNum[:0] lastValidDocNum = append(lastValidDocNum, tfr.docNumber...) count++ it.Next() k, _, valid = it.Current() } if lastValidDocNum != nil { _, highDocNum, err = DecodeUvarintAscending(lastValidDocNum) if err != nil { return 0, 0, err } } return }
func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { keyPrefix := []byte{'f'} it := kvreader.Iterator(keyPrefix) defer it.Close() it.Seek(keyPrefix) key, val, valid := it.Current() for valid { // stop when if !bytes.HasPrefix(key, keyPrefix) { break } fieldRow, err := NewFieldRowKV(key, val) if err != nil { return err } udc.fieldIndexes[fieldRow.name] = fieldRow.index if int(fieldRow.index) > udc.lastFieldIndex { udc.lastFieldIndex = int(fieldRow.index) } it.Next() key, val, valid = it.Current() } return }
func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { keyPrefix := []byte{'f'} it := kvreader.Iterator(keyPrefix) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() it.Seek(keyPrefix) key, val, valid := it.Current() for valid { // stop when if !bytes.HasPrefix(key, keyPrefix) { break } var fieldRow *FieldRow fieldRow, err = NewFieldRowKV(key, val) if err != nil { return } udc.fieldCache.AddExisting(fieldRow.name, fieldRow.index) it.Next() key, val, valid = it.Current() } keyPrefix = []byte{'v'} val, err = kvreader.Get(keyPrefix) if err != nil { return } var vr *VersionRow vr, err = NewVersionRowKV(keyPrefix, val) if err != nil { return } if vr.version != Version { err = IncompatibleVersion return } return }
func (udc *UpsideDownCouch) countDocs(kvreader store.KVReader) uint64 { it := kvreader.Iterator([]byte{'b'}) defer it.Close() var rv uint64 key, _, valid := it.Current() for valid { if !bytes.HasPrefix(key, []byte{'b'}) { break } rv++ it.Next() key, _, valid = it.Current() } return rv }
func (udc *UpsideDownCouch) countDocs(kvreader store.KVReader) (count uint64, err error) { it := kvreader.PrefixIterator([]byte{'b'}) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() _, _, valid := it.Current() for valid { count++ it.Next() _, _, valid = it.Current() } return }
func (udc *UpsideDownCouch) backIndexRowForDoc(kvreader store.KVReader, docID string) (*BackIndexRow, error) { // use a temporary row structure to build key tempRow := &BackIndexRow{ doc: []byte(docID), } key := tempRow.Key() value, err := kvreader.Get(key) if err != nil { return nil, err } if value == nil { return nil, nil } backIndexRow, err := NewBackIndexRowKV(key, value) if err != nil { return nil, err } return backIndexRow, nil }
func (udc *UpsideDownCouch) dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) { it := kvreader.RangeIterator(start, end) defer func() { cerr := it.Close() if cerr != nil { rv <- cerr } }() key, val, valid := it.Current() for valid { row, err := ParseFromKeyValue(key, val) if err != nil { rv <- err return } rv <- row it.Next() key, val, valid = it.Current() } }
func (udc *SmolderingCouch) loadSchema(kvreader store.KVReader) (err error) { it := kvreader.PrefixIterator([]byte{'f'}) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() key, val, valid := it.Current() for valid { var fieldRow *FieldRow fieldRow, err = NewFieldRowKV(key, val) if err != nil { return } udc.fieldCache.AddExisting(fieldRow.name, fieldRow.index) it.Next() key, val, valid = it.Current() } val, err = kvreader.Get([]byte{'v'}) if err != nil { return } var vr *VersionRow vr, err = NewVersionRowKV([]byte{'v'}, val) if err != nil { return } if vr.version != Version { err = IncompatibleVersion return } return }
func (udc *UpsideDownCouch) Delete(id string) (err error) { indexStart := time.Now() udc.writeMutex.Lock() defer udc.writeMutex.Unlock() // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { return } // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow backIndexRow, err = backIndexRowForDoc(kvreader, index.IndexInternalID(id)) if err != nil { _ = kvreader.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvreader.Close() if err != nil { return } if backIndexRow == nil { atomic.AddUint64(&udc.stats.deletes, 1) return } // start a writer for this delete var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() var deleteRowsAll [][]UpsideDownCouchRow deleteRows := udc.deleteSingle(id, backIndexRow, nil) if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } err = udc.batchRows(kvwriter, nil, nil, deleteRowsAll) if err == nil { udc.m.Lock() udc.docCount-- udc.m.Unlock() } atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.deletes, 1) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { // do analysis before acquiring write lock analysisStart := time.Now() numPlainTextBytes := doc.NumPlainTextBytes() resultChan := make(chan *index.AnalysisResult) aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) // wait for the result result := <-resultChan close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { return } // first we lookup the backindex row for the doc id if it exists // lookup the back index row var backIndexRow *BackIndexRow backIndexRow, err = backIndexRowForDoc(kvreader, index.IndexInternalID(doc.ID)) if err != nil { _ = kvreader.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvreader.Close() if err != nil { return } // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() // prepare a list of rows var addRowsAll [][]UpsideDownCouchRow var updateRowsAll [][]UpsideDownCouchRow var deleteRowsAll [][]UpsideDownCouchRow addRows, updateRows, deleteRows := udc.mergeOldAndNew(backIndexRow, result.Rows) if len(addRows) > 0 { addRowsAll = append(addRowsAll, addRows) } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll) if err == nil && backIndexRow == nil { udc.m.Lock() udc.docCount++ udc.m.Unlock() } atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { atomic.AddUint64(&udc.stats.updates, 1) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult) var numUpdates uint64 for _, doc := range batch.IndexOps { if doc != nil { numUpdates++ } } var detectedUnsafeMutex sync.RWMutex detectedUnsafe := false go func() { sofar := uint64(0) for _, doc := range batch.IndexOps { if doc != nil { sofar++ if sofar > numUpdates { detectedUnsafeMutex.Lock() detectedUnsafe = true detectedUnsafeMutex.Unlock() return } aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) } } }() newRowsMap := make(map[string][]index.IndexRow) // wait for the result var itemsDeQueued uint64 for itemsDeQueued < numUpdates { result := <-resultChan newRowsMap[result.DocID] = result.Rows itemsDeQueued++ } close(resultChan) detectedUnsafeMutex.RLock() defer detectedUnsafeMutex.RUnlock() if detectedUnsafe { return UnsafeBatchUseDetected } atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) indexStart := time.Now() udc.writeMutex.Lock() defer udc.writeMutex.Unlock() // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { return } // first lookup all the back index rows var backIndexRows map[string]*BackIndexRow backIndexRows, err = udc.backIndexRowsForBatch(kvreader, batch) if err != nil { _ = kvreader.Close() return } err = kvreader.Close() if err != nil { return } // start a writer for this batch var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } // prepare a list of rows addRows := make([]UpsideDownCouchRow, 0) updateRows := make([]UpsideDownCouchRow, 0) deleteRows := make([]UpsideDownCouchRow, 0) docsAdded := uint64(0) docsDeleted := uint64(0) for docID, doc := range batch.IndexOps { backIndexRow := backIndexRows[docID] if doc == nil && backIndexRow != nil { // delete deleteRows = udc.deleteSingle(docID, backIndexRow, deleteRows) docsDeleted++ } else if doc != nil { addRows, updateRows, deleteRows = udc.mergeOldAndNew(backIndexRow, newRowsMap[docID], addRows, updateRows, deleteRows) if backIndexRow == nil { docsAdded++ } } } // add the internal ops for internalKey, internalValue := range batch.InternalOps { if internalValue == nil { // delete deleteInternalRow := NewInternalRow([]byte(internalKey), nil) deleteRows = append(deleteRows, deleteInternalRow) } else { updateInternalRow := NewInternalRow([]byte(internalKey), internalValue) updateRows = append(updateRows, updateInternalRow) } } err = udc.batchRows(kvwriter, addRows, updateRows, deleteRows) if err != nil { _ = kvwriter.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvwriter.Close() atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { udc.m.Lock() udc.docCount += docsAdded udc.docCount -= docsDeleted udc.m.Unlock() atomic.AddUint64(&udc.stats.updates, numUpdates) atomic.AddUint64(&udc.stats.deletes, docsDeleted) atomic.AddUint64(&udc.stats.batches, 1) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { analysisStart := time.Now() resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps)) var numUpdates uint64 var numPlainTextBytes uint64 for _, doc := range batch.IndexOps { if doc != nil { numUpdates++ numPlainTextBytes += doc.NumPlainTextBytes() } } go func() { for _, doc := range batch.IndexOps { if doc != nil { aw := index.NewAnalysisWork(udc, doc, resultChan) // put the work on the queue udc.analysisQueue.Queue(aw) } } }() // retrieve back index rows concurrent with analysis docBackIndexRowErr := error(nil) docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps)) udc.writeMutex.Lock() defer udc.writeMutex.Unlock() go func() { defer close(docBackIndexRowCh) // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { docBackIndexRowErr = err return } for docID, doc := range batch.IndexOps { backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID)) if err != nil { docBackIndexRowErr = err return } docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow} } err = kvreader.Close() if err != nil { docBackIndexRowErr = err return } }() // wait for analysis result newRowsMap := make(map[string][]index.IndexRow) var itemsDeQueued uint64 for itemsDeQueued < numUpdates { result := <-resultChan newRowsMap[result.DocID] = result.Rows itemsDeQueued++ } close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) docsAdded := uint64(0) docsDeleted := uint64(0) indexStart := time.Now() // prepare a list of rows var addRowsAll [][]UpsideDownCouchRow var updateRowsAll [][]UpsideDownCouchRow var deleteRowsAll [][]UpsideDownCouchRow // add the internal ops var updateRows []UpsideDownCouchRow var deleteRows []UpsideDownCouchRow for internalKey, internalValue := range batch.InternalOps { if internalValue == nil { // delete deleteInternalRow := NewInternalRow([]byte(internalKey), nil) deleteRows = append(deleteRows, deleteInternalRow) } else { updateInternalRow := NewInternalRow([]byte(internalKey), internalValue) updateRows = append(updateRows, updateInternalRow) } } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } // process back index rows as they arrive for dbir := range docBackIndexRowCh { if dbir.doc == nil && dbir.backIndexRow != nil { // delete deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil) if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } docsDeleted++ } else if dbir.doc != nil { addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.backIndexRow, newRowsMap[dbir.docID]) if len(addRows) > 0 { addRowsAll = append(addRowsAll, addRows) } if len(updateRows) > 0 { updateRowsAll = append(updateRowsAll, updateRows) } if len(deleteRows) > 0 { deleteRowsAll = append(deleteRowsAll, deleteRows) } if dbir.backIndexRow == nil { docsAdded++ } } } if docBackIndexRowErr != nil { return docBackIndexRowErr } // start a writer for this batch var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll) if err != nil { _ = kvwriter.Close() atomic.AddUint64(&udc.stats.errors, 1) return } err = kvwriter.Close() atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart))) if err == nil { udc.m.Lock() udc.docCount += docsAdded udc.docCount -= docsDeleted udc.m.Unlock() atomic.AddUint64(&udc.stats.updates, numUpdates) atomic.AddUint64(&udc.stats.deletes, docsDeleted) atomic.AddUint64(&udc.stats.batches, 1) atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes) } else { atomic.AddUint64(&udc.stats.errors, 1) } return }
func (f *Firestorm) Open() (err error) { // open the kv store storeConstructor := registry.KVStoreConstructorByName(f.storeName) if storeConstructor == nil { err = index.ErrorUnknownStorageType return } // now open the store f.store, err = storeConstructor(&mergeOperator, f.storeConfig) if err != nil { return } // start a reader var kvreader store.KVReader kvreader, err = f.store.Reader() if err != nil { return } // assert correct version, and find out if this is new index var newIndex bool newIndex, err = f.checkVersion(kvreader) if err != nil { return } if !newIndex { // process existing index before opening err = f.warmup(kvreader) if err != nil { return } } err = kvreader.Close() if err != nil { return } if newIndex { // prepare a new index err = f.bootstrap() if err != nil { return } } // start the garbage collector f.garbageCollector.Start() // start the lookuper f.lookuper.Start() // start the dict updater f.dictUpdater.Start() return }
func (udc *SmolderingCouch) Open() (err error) { //acquire the write mutex for the duratin of Open() udc.writeMutex.Lock() defer udc.writeMutex.Unlock() // open the kv store storeConstructor := registry.KVStoreConstructorByName(udc.storeName) if storeConstructor == nil { err = index.ErrorUnknownStorageType return } // now open the store udc.store, err = storeConstructor(&mergeOperator, udc.storeConfig) if err != nil { return } udc.cf = cuckoofilter.NewDefaultCuckooFilter() // start a reader to look at the index var kvreader store.KVReader kvreader, err = udc.store.Reader() if err != nil { return } var value []byte value, err = kvreader.Get(VersionKey) if err != nil { _ = kvreader.Close() return } if value != nil { err = udc.loadSchema(kvreader) if err != nil { _ = kvreader.Close() return } // set doc count udc.m.Lock() udc.docCount, udc.maxInternalDocID, err = udc.countDocs(kvreader) udc.m.Unlock() err = kvreader.Close() } else { // new index, close the reader and open writer to init err = kvreader.Close() if err != nil { return } var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() if err != nil { return } defer func() { if cerr := kvwriter.Close(); err == nil && cerr != nil { err = cerr } }() // init the index err = udc.init(kvwriter) } return }