Esempio n. 1
0
func NewIOContext(r *rand.Rand, oldContext store.IOContext) store.IOContext {
	randomNumDocs := r.Intn(4192)
	size := r.Int63n(512) * int64(randomNumDocs)
	if oldContext.FlushInfo != nil {
		// Always return at least the estimatedSegmentSize of the
		// incoming IOContext:
		if size < oldContext.FlushInfo.EstimatedSegmentSize {
			size = oldContext.FlushInfo.EstimatedSegmentSize
		}
		return store.NewIOContextForFlush(&store.FlushInfo{randomNumDocs, size})
	} else if oldContext.MergeInfo != nil {
		// Always return at least the estimatedMergeBytes of the
		// incoming IOContext:
		if size < oldContext.MergeInfo.EstimatedMergeBytes {
			size = oldContext.MergeInfo.EstimatedMergeBytes
		}
		return store.NewIOContextForMerge(
			&store.MergeInfo{randomNumDocs, size, r.Intn(2) == 0, NextInt(r, 1, 100)})
	} else {
		// Make a totally random IOContext:
		switch r.Intn(5) {
		case 1:
			return store.IO_CONTEXT_READ
		case 2:
			return store.IO_CONTEXT_READONCE
		case 3:
			return store.NewIOContextForMerge(&store.MergeInfo{randomNumDocs, size, true, -1})
		case 4:
			return store.NewIOContextForFlush(&store.FlushInfo{randomNumDocs, size})
		default:
			return store.IO_CONTEXT_DEFAULT
		}
	}
}
Esempio n. 2
0
/* Flush all pending docs to a new segment */
func (dwpt *DocumentsWriterPerThread) flush() (fs *FlushedSegment, err error) {
	assert(dwpt.numDocsInRAM > 0)
	assert2(dwpt.deleteSlice.isEmpty(), "all deletes must be applied in prepareFlush")
	dwpt.segmentInfo.SetDocCount(dwpt.numDocsInRAM)
	numBytesUsed := dwpt.bytesUsed()
	flushState := newSegmentWriteState(dwpt.infoStream, dwpt.directory,
		dwpt.segmentInfo, dwpt.fieldInfos.Finish(),
		dwpt.indexWriterConfig.termIndexInterval, dwpt.pendingDeletes,
		store.NewIOContextForFlush(&store.FlushInfo{dwpt.numDocsInRAM, numBytesUsed}))
	startMBUsed := float64(numBytesUsed) / 1024 / 1024

	// Apply delete-by-docID now (delete-byDocID only happens when an
	// error is hit processing that doc, e.g., if analyzer has some
	// problem with the text):
	if delCount := len(dwpt.pendingDeletes.docIDs); delCount > 0 {
		flushState.liveDocs = dwpt.codec.LiveDocsFormat().NewLiveDocs(dwpt.numDocsInRAM)
		for _, delDocID := range dwpt.pendingDeletes.docIDs {
			flushState.liveDocs.Clear(delDocID)
		}
		flushState.delCountOnFlush = delCount
		atomic.AddInt64(&dwpt.pendingDeletes.bytesUsed, -int64(delCount)*BYTES_PER_DEL_DOCID)
		dwpt.pendingDeletes.docIDs = nil
	}

	if dwpt.aborting {
		if dwpt.infoStream.IsEnabled("DWPT") {
			dwpt.infoStream.Message("DWPT", "flush: skip because aborting is set")
		}
		return nil, nil
	}

	if dwpt.infoStream.IsEnabled("DWPT") {
		dwpt.infoStream.Message("DWPT", "flush postings as segment %v numDocs=%v",
			flushState.segmentInfo.Name, dwpt.numDocsInRAM)
	}

	var success = false
	defer func() {
		if !success {
			dwpt.abort(dwpt.filesToDelete)
		}
	}()

	err = dwpt.consumer.flush(flushState)
	if err != nil {
		return nil, err
	}
	dwpt.pendingDeletes.terms = make(map[*Term]int)
	files := make(map[string]bool)
	dwpt.directory.EachCreatedFiles(func(name string) {
		files[name] = true
	})
	dwpt.segmentInfo.SetFiles(files)

	info := NewSegmentInfoPerCommit(dwpt.segmentInfo, 0, -1)
	if dwpt.infoStream.IsEnabled("DWPT") {
		dwpt.infoStream.Message("DWPT", "new segment has %v deleted docs",
			check(flushState.liveDocs == nil, 0,
				flushState.segmentInfo.DocCount()-flushState.delCountOnFlush))
		dwpt.infoStream.Message("DWPT", "new segment has %v; %v; %v; %v; %v",
			check(flushState.fieldInfos.HasVectors, "vectors", "no vectors"),
			check(flushState.fieldInfos.HasNorms, "norms", "no norms"),
			check(flushState.fieldInfos.HasDocValues, "docValues", "no docValues"),
			check(flushState.fieldInfos.HasProx, "prox", "no prox"),
			check(flushState.fieldInfos.HasFreq, "freqs", "no freqs"))
		dwpt.infoStream.Message("DWPT", "flushedFiles=%v", info.Files())
		dwpt.infoStream.Message("DWPT", "flushed coded=%v", dwpt.codec)
	}

	var segmentDeletes *BufferedDeletes
	if len(dwpt.pendingDeletes.queries) > 0 {
		segmentDeletes = dwpt.pendingDeletes
	}

	if dwpt.infoStream.IsEnabled("DWPT") {
		numBytes, err := info.SizeInBytes()
		if err != nil {
			return nil, err
		}
		newSegmentSize := float64(numBytes) / 1024 / 1024
		dwpt.infoStream.Message("DWPT",
			"flushed: segment=%v ramUsed=%v MB newFlushedSize(includes docstores)=%v MB docs/MB=%v",
			startMBUsed, newSegmentSize, float64(flushState.segmentInfo.DocCount())/newSegmentSize)
	}

	assert(dwpt.segmentInfo != nil)

	fs = newFlushedSegment(info, flushState.fieldInfos, segmentDeletes,
		flushState.liveDocs, flushState.delCountOnFlush)
	err = dwpt.sealFlushedSegment(fs)
	if err != nil {
		return nil, err
	}
	success = true

	return fs, nil
}
Esempio n. 3
0
/*
Seals the SegmentInfo for the new flushed segment and persists the
deleted documents MutableBits
*/
func (dwpt *DocumentsWriterPerThread) sealFlushedSegment(flushedSegment *FlushedSegment) error {
	assert(flushedSegment != nil)

	newSegment := flushedSegment.segmentInfo

	setDiagnostics(newSegment.Info, SOURCE_FLUSH)

	segSize, err := newSegment.SizeInBytes()
	if err != nil {
		return err
	}
	context := store.NewIOContextForFlush(&store.FlushInfo{
		newSegment.Info.DocCount(),
		segSize,
	})

	var success = false
	defer func() {
		if !success {
			if dwpt.infoStream.IsEnabled("DWPT") {
				dwpt.infoStream.Message(
					"DWPT", "hit error relating compound file for newly flushed segment %v",
					newSegment.Info.Name)
			}
		}
	}()

	if dwpt.indexWriterConfig.UseCompoundFile() {
		files, err := createCompoundFile(
			dwpt.infoStream, dwpt.directory,
			CheckAbortNone(0), newSegment.Info, context)
		if err != nil {
			return err
		}
		for _, file := range files {
			dwpt.filesToDelete[file] = true
		}
		newSegment.Info.SetUseCompoundFile(true)
	}

	// Have codec write SegmentInfo. Must do this after creating CFS so
	// that 1) .si isn't slurped into CFS, and 2) .si reflects
	// useCompoundFile=true change above:
	err = dwpt.codec.SegmentInfoFormat().SegmentInfoWriter().Write(
		dwpt.directory,
		newSegment.Info,
		flushedSegment.fieldInfos,
		context)
	if err != nil {
		return err
	}

	// TODO: ideally we would freeze newSegment here!!
	// because any changes after writing the .si will be lost...

	// Must write deleted docs after the CFS so we don't slurp the del
	// file into CFS:
	if flushedSegment.liveDocs != nil {
		delCount := flushedSegment.delCount
		assert(delCount > 0)
		if dwpt.infoStream.IsEnabled("DWPT") {
			dwpt.infoStream.Message("DWPT", "flush: write %v deletes gen=%v",
				delCount, flushedSegment.segmentInfo.DelGen())
		}

		// TODO: we should prune the segment if it's 100% deleted... but
		// merge will also catch it.

		// TODO: in the NRT case id'd be better to hand this del vector
		// over to the shortly-to-be-opened SegmentReader and let it
		// carry the changes; there's no reason to use filesystem as
		// intermediary here.

		info := flushedSegment.segmentInfo
		codec := info.Info.Codec().(Codec)
		err = codec.LiveDocsFormat().WriteLiveDocs(flushedSegment.liveDocs,
			dwpt.directory, info, delCount, context)
		if err != nil {
			return err
		}
		newSegment.SetDelCount(delCount)
		newSegment.AdvanceDelGen()
	}

	success = true
	return nil
}