func NewIOContext(r *rand.Rand, oldContext store.IOContext) store.IOContext { randomNumDocs := r.Intn(4192) size := r.Int63n(512) * int64(randomNumDocs) if oldContext.FlushInfo != nil { // Always return at least the estimatedSegmentSize of the // incoming IOContext: if size < oldContext.FlushInfo.EstimatedSegmentSize { size = oldContext.FlushInfo.EstimatedSegmentSize } return store.NewIOContextForFlush(&store.FlushInfo{randomNumDocs, size}) } else if oldContext.MergeInfo != nil { // Always return at least the estimatedMergeBytes of the // incoming IOContext: if size < oldContext.MergeInfo.EstimatedMergeBytes { size = oldContext.MergeInfo.EstimatedMergeBytes } return store.NewIOContextForMerge( &store.MergeInfo{randomNumDocs, size, r.Intn(2) == 0, NextInt(r, 1, 100)}) } else { // Make a totally random IOContext: switch r.Intn(5) { case 1: return store.IO_CONTEXT_READ case 2: return store.IO_CONTEXT_READONCE case 3: return store.NewIOContextForMerge(&store.MergeInfo{randomNumDocs, size, true, -1}) case 4: return store.NewIOContextForFlush(&store.FlushInfo{randomNumDocs, size}) default: return store.IO_CONTEXT_DEFAULT } } }
/* Flush all pending docs to a new segment */ func (dwpt *DocumentsWriterPerThread) flush() (fs *FlushedSegment, err error) { assert(dwpt.numDocsInRAM > 0) assert2(dwpt.deleteSlice.isEmpty(), "all deletes must be applied in prepareFlush") dwpt.segmentInfo.SetDocCount(dwpt.numDocsInRAM) numBytesUsed := dwpt.bytesUsed() flushState := newSegmentWriteState(dwpt.infoStream, dwpt.directory, dwpt.segmentInfo, dwpt.fieldInfos.Finish(), dwpt.indexWriterConfig.termIndexInterval, dwpt.pendingDeletes, store.NewIOContextForFlush(&store.FlushInfo{dwpt.numDocsInRAM, numBytesUsed})) startMBUsed := float64(numBytesUsed) / 1024 / 1024 // Apply delete-by-docID now (delete-byDocID only happens when an // error is hit processing that doc, e.g., if analyzer has some // problem with the text): if delCount := len(dwpt.pendingDeletes.docIDs); delCount > 0 { flushState.liveDocs = dwpt.codec.LiveDocsFormat().NewLiveDocs(dwpt.numDocsInRAM) for _, delDocID := range dwpt.pendingDeletes.docIDs { flushState.liveDocs.Clear(delDocID) } flushState.delCountOnFlush = delCount atomic.AddInt64(&dwpt.pendingDeletes.bytesUsed, -int64(delCount)*BYTES_PER_DEL_DOCID) dwpt.pendingDeletes.docIDs = nil } if dwpt.aborting { if dwpt.infoStream.IsEnabled("DWPT") { dwpt.infoStream.Message("DWPT", "flush: skip because aborting is set") } return nil, nil } if dwpt.infoStream.IsEnabled("DWPT") { dwpt.infoStream.Message("DWPT", "flush postings as segment %v numDocs=%v", flushState.segmentInfo.Name, dwpt.numDocsInRAM) } var success = false defer func() { if !success { dwpt.abort(dwpt.filesToDelete) } }() err = dwpt.consumer.flush(flushState) if err != nil { return nil, err } dwpt.pendingDeletes.terms = make(map[*Term]int) files := make(map[string]bool) dwpt.directory.EachCreatedFiles(func(name string) { files[name] = true }) dwpt.segmentInfo.SetFiles(files) info := NewSegmentInfoPerCommit(dwpt.segmentInfo, 0, -1) if dwpt.infoStream.IsEnabled("DWPT") { dwpt.infoStream.Message("DWPT", "new segment has %v deleted docs", check(flushState.liveDocs == nil, 0, flushState.segmentInfo.DocCount()-flushState.delCountOnFlush)) dwpt.infoStream.Message("DWPT", "new segment has %v; %v; %v; %v; %v", check(flushState.fieldInfos.HasVectors, "vectors", "no vectors"), check(flushState.fieldInfos.HasNorms, "norms", "no norms"), check(flushState.fieldInfos.HasDocValues, "docValues", "no docValues"), check(flushState.fieldInfos.HasProx, "prox", "no prox"), check(flushState.fieldInfos.HasFreq, "freqs", "no freqs")) dwpt.infoStream.Message("DWPT", "flushedFiles=%v", info.Files()) dwpt.infoStream.Message("DWPT", "flushed coded=%v", dwpt.codec) } var segmentDeletes *BufferedDeletes if len(dwpt.pendingDeletes.queries) > 0 { segmentDeletes = dwpt.pendingDeletes } if dwpt.infoStream.IsEnabled("DWPT") { numBytes, err := info.SizeInBytes() if err != nil { return nil, err } newSegmentSize := float64(numBytes) / 1024 / 1024 dwpt.infoStream.Message("DWPT", "flushed: segment=%v ramUsed=%v MB newFlushedSize(includes docstores)=%v MB docs/MB=%v", startMBUsed, newSegmentSize, float64(flushState.segmentInfo.DocCount())/newSegmentSize) } assert(dwpt.segmentInfo != nil) fs = newFlushedSegment(info, flushState.fieldInfos, segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush) err = dwpt.sealFlushedSegment(fs) if err != nil { return nil, err } success = true return fs, nil }
/* Seals the SegmentInfo for the new flushed segment and persists the deleted documents MutableBits */ func (dwpt *DocumentsWriterPerThread) sealFlushedSegment(flushedSegment *FlushedSegment) error { assert(flushedSegment != nil) newSegment := flushedSegment.segmentInfo setDiagnostics(newSegment.Info, SOURCE_FLUSH) segSize, err := newSegment.SizeInBytes() if err != nil { return err } context := store.NewIOContextForFlush(&store.FlushInfo{ newSegment.Info.DocCount(), segSize, }) var success = false defer func() { if !success { if dwpt.infoStream.IsEnabled("DWPT") { dwpt.infoStream.Message( "DWPT", "hit error relating compound file for newly flushed segment %v", newSegment.Info.Name) } } }() if dwpt.indexWriterConfig.UseCompoundFile() { files, err := createCompoundFile( dwpt.infoStream, dwpt.directory, CheckAbortNone(0), newSegment.Info, context) if err != nil { return err } for _, file := range files { dwpt.filesToDelete[file] = true } newSegment.Info.SetUseCompoundFile(true) } // Have codec write SegmentInfo. Must do this after creating CFS so // that 1) .si isn't slurped into CFS, and 2) .si reflects // useCompoundFile=true change above: err = dwpt.codec.SegmentInfoFormat().SegmentInfoWriter().Write( dwpt.directory, newSegment.Info, flushedSegment.fieldInfos, context) if err != nil { return err } // TODO: ideally we would freeze newSegment here!! // because any changes after writing the .si will be lost... // Must write deleted docs after the CFS so we don't slurp the del // file into CFS: if flushedSegment.liveDocs != nil { delCount := flushedSegment.delCount assert(delCount > 0) if dwpt.infoStream.IsEnabled("DWPT") { dwpt.infoStream.Message("DWPT", "flush: write %v deletes gen=%v", delCount, flushedSegment.segmentInfo.DelGen()) } // TODO: we should prune the segment if it's 100% deleted... but // merge will also catch it. // TODO: in the NRT case id'd be better to hand this del vector // over to the shortly-to-be-opened SegmentReader and let it // carry the changes; there's no reason to use filesystem as // intermediary here. info := flushedSegment.segmentInfo codec := info.Info.Codec().(Codec) err = codec.LiveDocsFormat().WriteLiveDocs(flushedSegment.liveDocs, dwpt.directory, info, delCount, context) if err != nil { return err } newSegment.SetDelCount(delCount) newSegment.AdvanceDelGen() } success = true return nil }