Beispiel #1
0
func newDocumentsWriterPerThread(segmentName string,
	directory store.Directory, indexWriterConfig *LiveIndexWriterConfig,
	infoStream util.InfoStream, deleteQueue *DocumentsWriterDeleteQueue,
	fieldInfos *model.FieldInfosBuilder) *DocumentsWriterPerThread {

	counter := util.NewCounter()
	ans := &DocumentsWriterPerThread{
		directoryOrig:      directory,
		directory:          store.NewTrackingDirectoryWrapper(directory),
		fieldInfos:         fieldInfos,
		indexWriterConfig:  indexWriterConfig,
		infoStream:         infoStream,
		codec:              indexWriterConfig.codec,
		_bytesUsed:         counter,
		byteBlockAllocator: util.NewDirectTrackingAllocator(counter),
		pendingDeletes:     newBufferedDeletes(),
		intBlockAllocator:  newIntBlockAllocator(counter),
		deleteQueue:        deleteQueue,
		deleteSlice:        deleteQueue.newSlice(),
		segmentInfo:        model.NewSegmentInfo(directory, util.LUCENE_MAIN_VERSION, segmentName, -1, false, indexWriterConfig.codec, nil, nil),
		filesToDelete:      make(map[string]bool),
	}
	ans.docState = newDocState(ans, infoStream)
	ans.docState.similarity = indexWriterConfig.similarity
	assertn(ans.numDocsInRAM == 0, "num docs ", ans.numDocsInRAM)
	if DWPT_VERBOSE && infoStream.IsEnabled("DWPT") {
		infoStream.Message("DWPT", "init seg=%v delQueue=%v", segmentName, deleteQueue)
	}
	// this should be the last call in the ctor
	// it really sucks that we need to pull this within the ctor and pass this ref to the chain!
	ans.consumer = indexWriterConfig.indexingChain(ans)
	return ans
}
Beispiel #2
0
/*
Initialize the deleter: find all previous commits in the Directory,
incref the files they reference, call the policy to let it delete
commits. This will remove any files not referenced by any of the
commits.
*/
func newIndexFileDeleter(directory store.Directory, policy IndexDeletionPolicy,
	segmentInfos *SegmentInfos, infoStream util.InfoStream, writer *IndexWriter,
	initialIndexExists bool) (*IndexFileDeleter, error) {

	currentSegmentsFile := segmentInfos.SegmentsFileName()
	if infoStream.IsEnabled("IFD") {
		infoStream.Message("IFD", "init: current segments file is '%v'; deletePolicy=%v",
			currentSegmentsFile, policy)
	}

	fd := &IndexFileDeleter{
		infoStream: infoStream,
		writer:     writer,
		policy:     policy,
		directory:  directory,
		refCounts:  make(map[string]*RefCount),
	}

	// First pass: walk the files and initialize our ref counts:
	currentGen := segmentInfos.generation

	var currentCommitPoint *CommitPoint
	var files []string
	files, err := directory.ListAll()
	if _, ok := err.(*store.NoSuchDirectoryError); ok {
		// it means the directory is empty, so ignore it
		files = make([]string, 0)
	} else if err != nil {
		return nil, err
	}

	if currentSegmentsFile != "" {
		m := model.CODEC_FILE_PATTERN
		for _, filename := range files {
			if !strings.HasSuffix(filename, "write.lock") &&
				filename != INDEX_FILENAME_SEGMENTS_GEN &&
				(m.MatchString(filename) || strings.HasPrefix(filename, util.SEGMENTS)) {

				// Add this file to refCounts with initial count 0:
				fd.refCount(filename)

				if strings.HasPrefix(filename, util.SEGMENTS) {
					// This is a commit (segments or segments_N), and it's
					// valid (<= the max gen). Load it, then incref all files
					// it refers to:
					if infoStream.IsEnabled("IFD") {
						infoStream.Message("IFD", "init: load commit '%v'", filename)
					}
					sis := &SegmentInfos{}
					err := sis.Read(directory, filename)
					if os.IsNotExist(err) {
						// LUCENE-948: on NFS (and maybe others), if
						// you have writers switching back and forth
						// between machines, it's very likely that the
						// dir listing will be stale and will claim a
						// file segments_X exists when in fact it
						// doesn't.  So, we catch this and handle it
						// as if the file does not exist
						if infoStream.IsEnabled("IFD") {
							infoStream.Message("IFD",
								"init: hit FileNotFoundException when loading commit '%v'; skipping this commit point",
								filename)
						}
						sis = nil
					} else if err != nil {
						if GenerationFromSegmentsFileName(filename) <= currentGen {
							length, _ := directory.FileLength(filename)
							if length > 0 {
								return nil, err
							}
						}
						// Most likely we are opening an index that has an
						// aborted "future" commit, so suppress exc in this case
						sis = nil
					} else { // sis != nil
						commitPoint := newCommitPoint(fd.commitsToDelete, directory, sis)
						if sis.generation == segmentInfos.generation {
							currentCommitPoint = commitPoint
						}
						fd.commits = append(fd.commits, commitPoint)
						fd.incRef(sis, true)

						if fd.lastSegmentInfos == nil || sis.generation > fd.lastSegmentInfos.generation {
							fd.lastSegmentInfos = sis
						}
					}
				}
			}
		}
	}

	if currentCommitPoint == nil && currentSegmentsFile != "" && initialIndexExists {
		// We did not in fact see the segments_N file corresponding to
		// the segmentInfos that was passed in. Yet, it must exist,
		// because our caller holds the write lock. This can happen when
		// the directory listing was stale (e.g. when index accessed via
		// NFS client with stale directory listing cache). So we try now
		// to explicitly open this commit point:
		sis := &SegmentInfos{}
		err := sis.Read(directory, currentSegmentsFile)
		if err != nil {
			return nil, errors.New(fmt.Sprintf("failed to locate current segments_N file '%v'",
				currentSegmentsFile))
		}
		if infoStream.IsEnabled("IFD") {
			infoStream.Message("IFD", "forced open of current segments file %v",
				segmentInfos.SegmentsFileName())
		}
		currentCommitPoint = newCommitPoint(fd.commitsToDelete, directory, sis)
		fd.commits = append(fd.commits, currentCommitPoint)
		fd.incRef(sis, true)
	}

	// We keep commits list in sorted order (oldest to newest):
	util.TimSort(IndexCommits(fd.commits))

	// Now delete anyting with ref count at 0. These are presumably
	// abandoned files e.g. due to crash of IndexWriter.
	for filename, rc := range fd.refCounts {
		if rc.count == 0 {
			if infoStream.IsEnabled("IFD") {
				infoStream.Message("IFD", "init: removing unreferenced file '%v'",
					filename)
			}
			fd.deleteFile(filename)
		}
	}

	// Finally, give policy a chance to remove things on startup:
	err = policy.onInit(fd.commits)
	if err != nil {
		return nil, err
	}

	// Always protect the incoming segmentInfos since sometime it may
	// not be the most recent commit
	err = fd.checkpoint(segmentInfos, false)
	if err != nil {
		return nil, err
	}

	fd.startingCommitDeleted = (currentCommitPoint != nil && currentCommitPoint.IsDeleted())

	fd.deleteCommits()

	return fd, nil
}