func freezeBufferedDeletes(deletes *BufferedDeletes, isPrivate bool) *FrozenBufferedDeletes {
	assert2(!isPrivate || len(deletes.terms) == 0,
		"segment private package should only have del queries")
	var termsArray []*Term
	for k, _ := range deletes.terms {
		termsArray = append(termsArray, k)
	}
	util.TimSort(TermSorter(termsArray))
	builder := newPrefixCodedTermsBuilder()
	for _, term := range termsArray {
		builder.add(term)
	}
	terms := builder.finish()

	queries := make([]Query, len(deletes.queries))
	queryLimits := make([]int, len(deletes.queries))
	var upto = 0
	for k, v := range deletes.queries {
		queries[upto] = k
		queryLimits[upto] = v
		upto++
	}

	return &FrozenBufferedDeletes{
		gen:              -1,
		isSegmentPrivate: isPrivate,
		termCount:        len(termsArray),
		terms:            terms,
		_queries:         queries,
		queryLimits:      queryLimits,
		bytesUsed:        terms.sizeInBytes() + int64(len(queries))*BYTES_PER_DEL_QUERY,
		numTermDeletes:   int(atomic.LoadInt32(&deletes.numTermDeletes)),
	}
}
Beispiel #2
0
// Returns sorted list of outgoing transitions.
// Sorts transitions array in-place
func (s *State) sortTransitions(f func(t1, t2 *Transition) bool) {
	// merge-sort seems to perform better on already sorted arrays
	if len(s.transitionsArray) > 1 {
		util.TimSort(TransitionArraySorter{s.transitionsArray, f})
	}
}
Beispiel #3
0
func (pts *PointTransitionSet) sort() {
	// Tim sort performs well on already sorted arrays:
	if len(pts.points) > 0 {
		util.TimSort(PointTransitionsArray(pts.points))
	}
}
Beispiel #4
0
/*
Initialize the deleter: find all previous commits in the Directory,
incref the files they reference, call the policy to let it delete
commits. This will remove any files not referenced by any of the
commits.
*/
func newIndexFileDeleter(directory store.Directory, policy IndexDeletionPolicy,
	segmentInfos *SegmentInfos, infoStream util.InfoStream, writer *IndexWriter,
	initialIndexExists bool) (*IndexFileDeleter, error) {

	currentSegmentsFile := segmentInfos.SegmentsFileName()
	if infoStream.IsEnabled("IFD") {
		infoStream.Message("IFD", "init: current segments file is '%v'; deletePolicy=%v",
			currentSegmentsFile, policy)
	}

	fd := &IndexFileDeleter{
		infoStream: infoStream,
		writer:     writer,
		policy:     policy,
		directory:  directory,
		refCounts:  make(map[string]*RefCount),
	}

	// First pass: walk the files and initialize our ref counts:
	currentGen := segmentInfos.generation

	var currentCommitPoint *CommitPoint
	var files []string
	files, err := directory.ListAll()
	if _, ok := err.(*store.NoSuchDirectoryError); ok {
		// it means the directory is empty, so ignore it
		files = make([]string, 0)
	} else if err != nil {
		return nil, err
	}

	if currentSegmentsFile != "" {
		m := model.CODEC_FILE_PATTERN
		for _, filename := range files {
			if !strings.HasSuffix(filename, "write.lock") &&
				filename != INDEX_FILENAME_SEGMENTS_GEN &&
				(m.MatchString(filename) || strings.HasPrefix(filename, util.SEGMENTS)) {

				// Add this file to refCounts with initial count 0:
				fd.refCount(filename)

				if strings.HasPrefix(filename, util.SEGMENTS) {
					// This is a commit (segments or segments_N), and it's
					// valid (<= the max gen). Load it, then incref all files
					// it refers to:
					if infoStream.IsEnabled("IFD") {
						infoStream.Message("IFD", "init: load commit '%v'", filename)
					}
					sis := &SegmentInfos{}
					err := sis.Read(directory, filename)
					if os.IsNotExist(err) {
						// LUCENE-948: on NFS (and maybe others), if
						// you have writers switching back and forth
						// between machines, it's very likely that the
						// dir listing will be stale and will claim a
						// file segments_X exists when in fact it
						// doesn't.  So, we catch this and handle it
						// as if the file does not exist
						if infoStream.IsEnabled("IFD") {
							infoStream.Message("IFD",
								"init: hit FileNotFoundException when loading commit '%v'; skipping this commit point",
								filename)
						}
						sis = nil
					} else if err != nil {
						if GenerationFromSegmentsFileName(filename) <= currentGen {
							length, _ := directory.FileLength(filename)
							if length > 0 {
								return nil, err
							}
						}
						// Most likely we are opening an index that has an
						// aborted "future" commit, so suppress exc in this case
						sis = nil
					} else { // sis != nil
						commitPoint := newCommitPoint(fd.commitsToDelete, directory, sis)
						if sis.generation == segmentInfos.generation {
							currentCommitPoint = commitPoint
						}
						fd.commits = append(fd.commits, commitPoint)
						fd.incRef(sis, true)

						if fd.lastSegmentInfos == nil || sis.generation > fd.lastSegmentInfos.generation {
							fd.lastSegmentInfos = sis
						}
					}
				}
			}
		}
	}

	if currentCommitPoint == nil && currentSegmentsFile != "" && initialIndexExists {
		// We did not in fact see the segments_N file corresponding to
		// the segmentInfos that was passed in. Yet, it must exist,
		// because our caller holds the write lock. This can happen when
		// the directory listing was stale (e.g. when index accessed via
		// NFS client with stale directory listing cache). So we try now
		// to explicitly open this commit point:
		sis := &SegmentInfos{}
		err := sis.Read(directory, currentSegmentsFile)
		if err != nil {
			return nil, errors.New(fmt.Sprintf("failed to locate current segments_N file '%v'",
				currentSegmentsFile))
		}
		if infoStream.IsEnabled("IFD") {
			infoStream.Message("IFD", "forced open of current segments file %v",
				segmentInfos.SegmentsFileName())
		}
		currentCommitPoint = newCommitPoint(fd.commitsToDelete, directory, sis)
		fd.commits = append(fd.commits, currentCommitPoint)
		fd.incRef(sis, true)
	}

	// We keep commits list in sorted order (oldest to newest):
	util.TimSort(IndexCommits(fd.commits))

	// Now delete anyting with ref count at 0. These are presumably
	// abandoned files e.g. due to crash of IndexWriter.
	for filename, rc := range fd.refCounts {
		if rc.count == 0 {
			if infoStream.IsEnabled("IFD") {
				infoStream.Message("IFD", "init: removing unreferenced file '%v'",
					filename)
			}
			fd.deleteFile(filename)
		}
	}

	// Finally, give policy a chance to remove things on startup:
	err = policy.onInit(fd.commits)
	if err != nil {
		return nil, err
	}

	// Always protect the incoming segmentInfos since sometime it may
	// not be the most recent commit
	err = fd.checkpoint(segmentInfos, false)
	if err != nil {
		return nil, err
	}

	fd.startingCommitDeleted = (currentCommitPoint != nil && currentCommitPoint.IsDeleted())

	fd.deleteCommits()

	return fd, nil
}
Beispiel #5
0
func freezeBufferedUpdates(deletes *BufferedUpdates, isPrivate bool) *FrozenBufferedUpdates {
	assert2(!isPrivate || len(deletes.terms) == 0,
		"segment private package should only have del queries")
	var termsArray []*Term
	for k, _ := range deletes.terms {
		termsArray = append(termsArray, k)
	}
	util.TimSort(TermSorter(termsArray))
	builder := newPrefixCodedTermsBuilder()
	for _, term := range termsArray {
		builder.add(term)
	}
	terms := builder.finish()

	queries := make([]Query, len(deletes.queries))
	queryLimits := make([]int, len(deletes.queries))
	var upto = 0
	for k, v := range deletes.queries {
		queries[upto] = k
		queryLimits[upto] = v
		upto++
	}

	// TODO if a Term affects multiple fields, we could keep the updates key'd by Term
	// so that it maps to all fields it affects, sorted by their docUpto, and traverse
	// that Term only once, applying the update to all fields that still need to be
	// updated.
	var allNumericUpdates []*DocValuesUpdate
	numericUpdatesSize := 0
	for _, numericUpdates := range deletes.numericUpdates {
		for _, update := range numericUpdates {
			allNumericUpdates = append(allNumericUpdates, update)
			numericUpdatesSize += update.sizeInBytes()
		}
	}

	// TODO if a Term affects multiple fields, we could keep the updates key'd by Term
	// so that it maps to all fields it affects, sorted by their docUpto, and traverse
	// that Term only once, applying the update to all fields that still need to be
	// updated.
	var allBinaryUpdates []*DocValuesUpdate
	binaryUpdatesSize := 0
	for _, binaryUpdates := range deletes.binaryUpdates {
		for _, update := range binaryUpdates {
			allBinaryUpdates = append(allBinaryUpdates, update)
			binaryUpdatesSize += update.sizeInBytes()
		}
	}

	bytesUsed := int(terms.RamBytesUsed() +
		int64(len(queries))*BYTES_PER_DEL_QUERY +
		int64(numericUpdatesSize) + util.ShallowSizeOf(allNumericUpdates) +
		int64(binaryUpdatesSize) + util.ShallowSizeOf(allBinaryUpdates))

	return &FrozenBufferedUpdates{
		gen:              -1,
		isSegmentPrivate: isPrivate,
		termCount:        len(termsArray),
		terms:            terms,
		_queries:         queries,
		queryLimits:      queryLimits,
		numericDVUpdates: allNumericUpdates,
		binaryDVUpdates:  allBinaryUpdates,
		bytesUsed:        bytesUsed,
		numTermDeletes:   int(atomic.LoadInt32(&deletes.numTermDeletes)),
	}
}