func freezeBufferedDeletes(deletes *BufferedDeletes, isPrivate bool) *FrozenBufferedDeletes { assert2(!isPrivate || len(deletes.terms) == 0, "segment private package should only have del queries") var termsArray []*Term for k, _ := range deletes.terms { termsArray = append(termsArray, k) } util.TimSort(TermSorter(termsArray)) builder := newPrefixCodedTermsBuilder() for _, term := range termsArray { builder.add(term) } terms := builder.finish() queries := make([]Query, len(deletes.queries)) queryLimits := make([]int, len(deletes.queries)) var upto = 0 for k, v := range deletes.queries { queries[upto] = k queryLimits[upto] = v upto++ } return &FrozenBufferedDeletes{ gen: -1, isSegmentPrivate: isPrivate, termCount: len(termsArray), terms: terms, _queries: queries, queryLimits: queryLimits, bytesUsed: terms.sizeInBytes() + int64(len(queries))*BYTES_PER_DEL_QUERY, numTermDeletes: int(atomic.LoadInt32(&deletes.numTermDeletes)), } }
// Returns sorted list of outgoing transitions. // Sorts transitions array in-place func (s *State) sortTransitions(f func(t1, t2 *Transition) bool) { // merge-sort seems to perform better on already sorted arrays if len(s.transitionsArray) > 1 { util.TimSort(TransitionArraySorter{s.transitionsArray, f}) } }
func (pts *PointTransitionSet) sort() { // Tim sort performs well on already sorted arrays: if len(pts.points) > 0 { util.TimSort(PointTransitionsArray(pts.points)) } }
/* Initialize the deleter: find all previous commits in the Directory, incref the files they reference, call the policy to let it delete commits. This will remove any files not referenced by any of the commits. */ func newIndexFileDeleter(directory store.Directory, policy IndexDeletionPolicy, segmentInfos *SegmentInfos, infoStream util.InfoStream, writer *IndexWriter, initialIndexExists bool) (*IndexFileDeleter, error) { currentSegmentsFile := segmentInfos.SegmentsFileName() if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "init: current segments file is '%v'; deletePolicy=%v", currentSegmentsFile, policy) } fd := &IndexFileDeleter{ infoStream: infoStream, writer: writer, policy: policy, directory: directory, refCounts: make(map[string]*RefCount), } // First pass: walk the files and initialize our ref counts: currentGen := segmentInfos.generation var currentCommitPoint *CommitPoint var files []string files, err := directory.ListAll() if _, ok := err.(*store.NoSuchDirectoryError); ok { // it means the directory is empty, so ignore it files = make([]string, 0) } else if err != nil { return nil, err } if currentSegmentsFile != "" { m := model.CODEC_FILE_PATTERN for _, filename := range files { if !strings.HasSuffix(filename, "write.lock") && filename != INDEX_FILENAME_SEGMENTS_GEN && (m.MatchString(filename) || strings.HasPrefix(filename, util.SEGMENTS)) { // Add this file to refCounts with initial count 0: fd.refCount(filename) if strings.HasPrefix(filename, util.SEGMENTS) { // This is a commit (segments or segments_N), and it's // valid (<= the max gen). Load it, then incref all files // it refers to: if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "init: load commit '%v'", filename) } sis := &SegmentInfos{} err := sis.Read(directory, filename) if os.IsNotExist(err) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "init: hit FileNotFoundException when loading commit '%v'; skipping this commit point", filename) } sis = nil } else if err != nil { if GenerationFromSegmentsFileName(filename) <= currentGen { length, _ := directory.FileLength(filename) if length > 0 { return nil, err } } // Most likely we are opening an index that has an // aborted "future" commit, so suppress exc in this case sis = nil } else { // sis != nil commitPoint := newCommitPoint(fd.commitsToDelete, directory, sis) if sis.generation == segmentInfos.generation { currentCommitPoint = commitPoint } fd.commits = append(fd.commits, commitPoint) fd.incRef(sis, true) if fd.lastSegmentInfos == nil || sis.generation > fd.lastSegmentInfos.generation { fd.lastSegmentInfos = sis } } } } } } if currentCommitPoint == nil && currentSegmentsFile != "" && initialIndexExists { // We did not in fact see the segments_N file corresponding to // the segmentInfos that was passed in. Yet, it must exist, // because our caller holds the write lock. This can happen when // the directory listing was stale (e.g. when index accessed via // NFS client with stale directory listing cache). So we try now // to explicitly open this commit point: sis := &SegmentInfos{} err := sis.Read(directory, currentSegmentsFile) if err != nil { return nil, errors.New(fmt.Sprintf("failed to locate current segments_N file '%v'", currentSegmentsFile)) } if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "forced open of current segments file %v", segmentInfos.SegmentsFileName()) } currentCommitPoint = newCommitPoint(fd.commitsToDelete, directory, sis) fd.commits = append(fd.commits, currentCommitPoint) fd.incRef(sis, true) } // We keep commits list in sorted order (oldest to newest): util.TimSort(IndexCommits(fd.commits)) // Now delete anyting with ref count at 0. These are presumably // abandoned files e.g. due to crash of IndexWriter. for filename, rc := range fd.refCounts { if rc.count == 0 { if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "init: removing unreferenced file '%v'", filename) } fd.deleteFile(filename) } } // Finally, give policy a chance to remove things on startup: err = policy.onInit(fd.commits) if err != nil { return nil, err } // Always protect the incoming segmentInfos since sometime it may // not be the most recent commit err = fd.checkpoint(segmentInfos, false) if err != nil { return nil, err } fd.startingCommitDeleted = (currentCommitPoint != nil && currentCommitPoint.IsDeleted()) fd.deleteCommits() return fd, nil }
func freezeBufferedUpdates(deletes *BufferedUpdates, isPrivate bool) *FrozenBufferedUpdates { assert2(!isPrivate || len(deletes.terms) == 0, "segment private package should only have del queries") var termsArray []*Term for k, _ := range deletes.terms { termsArray = append(termsArray, k) } util.TimSort(TermSorter(termsArray)) builder := newPrefixCodedTermsBuilder() for _, term := range termsArray { builder.add(term) } terms := builder.finish() queries := make([]Query, len(deletes.queries)) queryLimits := make([]int, len(deletes.queries)) var upto = 0 for k, v := range deletes.queries { queries[upto] = k queryLimits[upto] = v upto++ } // TODO if a Term affects multiple fields, we could keep the updates key'd by Term // so that it maps to all fields it affects, sorted by their docUpto, and traverse // that Term only once, applying the update to all fields that still need to be // updated. var allNumericUpdates []*DocValuesUpdate numericUpdatesSize := 0 for _, numericUpdates := range deletes.numericUpdates { for _, update := range numericUpdates { allNumericUpdates = append(allNumericUpdates, update) numericUpdatesSize += update.sizeInBytes() } } // TODO if a Term affects multiple fields, we could keep the updates key'd by Term // so that it maps to all fields it affects, sorted by their docUpto, and traverse // that Term only once, applying the update to all fields that still need to be // updated. var allBinaryUpdates []*DocValuesUpdate binaryUpdatesSize := 0 for _, binaryUpdates := range deletes.binaryUpdates { for _, update := range binaryUpdates { allBinaryUpdates = append(allBinaryUpdates, update) binaryUpdatesSize += update.sizeInBytes() } } bytesUsed := int(terms.RamBytesUsed() + int64(len(queries))*BYTES_PER_DEL_QUERY + int64(numericUpdatesSize) + util.ShallowSizeOf(allNumericUpdates) + int64(binaryUpdatesSize) + util.ShallowSizeOf(allBinaryUpdates)) return &FrozenBufferedUpdates{ gen: -1, isSegmentPrivate: isPrivate, termCount: len(termsArray), terms: terms, _queries: queries, queryLimits: queryLimits, numericDVUpdates: allNumericUpdates, binaryDVUpdates: allBinaryUpdates, bytesUsed: bytesUsed, numTermDeletes: int(atomic.LoadInt32(&deletes.numTermDeletes)), } }