func newSegmentTermsEnum(r *FieldReader) *SegmentTermsEnum { ans := &SegmentTermsEnum{ FieldReader: r, stack: make([]*segmentTermsEnumFrame, 0), scratchReader: store.NewEmptyByteArrayDataInput(), term: newBytesRef(), arcs: make([]*fst.Arc, 1), fstOutputs: fst.ByteSequenceOutputsSingleton(), } ans.TermsEnumImpl = newTermsEnumImpl(ans) log.Printf("BTTR.init seg=%v", r.segment) // Used to hold seek by TermState, or cached seek ans.staticFrame = newFrame(ans, -1) if r.index != nil { ans.fstReader = r.index.BytesReader() } // Init w/ root block; don't use index since it may // not (and need not) have been loaded for i, _ := range ans.arcs { ans.arcs[i] = &fst.Arc{} } ans.currentFrame = ans.staticFrame var arc *fst.Arc if r.index != nil { arc = r.index.FirstArc(ans.arcs[0]) // Empty string prefix must have an output in the index! if !arc.IsFinal() { panic("assert fail") } } ans.currentFrame = ans.staticFrame ans.validIndexPrefix = 0 log.Printf("init frame state %v", ans.currentFrame.ord) ans.printSeekState() // ans.computeBlockStats() return ans }
func (e *SegmentTermsEnum) SeekExact(target []byte) (ok bool, err error) { if e.index == nil { panic("terms index was not loaded") } if cap(e.term.bytes) <= len(target) { e.term.ensureSize(1 + len(target)) } e.eof = false log.Printf("BTTR.seekExact seg=%v target=%v:%v current=%v (exists?=%v) validIndexPrefix=%v", e.segment, e.fieldInfo.Name, brToString(target), e.term, e.termExists, e.validIndexPrefix) e.printSeekState() var arc *fst.Arc var targetUpto int var output []byte e.targetBeforeCurrentLength = e.currentFrame.ord // if e.currentFrame != e.staticFrame { if e.currentFrame.ord != e.staticFrame.ord { // We are already seek'd; find the common // prefix of new seek term vs current term and // re-use the corresponding seek state. For // example, if app first seeks to foobar, then // seeks to foobaz, we can re-use the seek state // for the first 5 bytes. log.Printf(" re-use current seek state validIndexPrefix=%v", e.validIndexPrefix) arc = e.arcs[0] if !arc.IsFinal() { panic("assert fail") } output = arc.Output.([]byte) targetUpto = 0 lastFrame := e.stack[0] if e.validIndexPrefix > e.term.length { panic("assert fail") } targetLimit := len(target) if e.validIndexPrefix < targetLimit { targetLimit = e.validIndexPrefix } cmp := 0 // TODO: reverse vLong byte order for better FST // prefix output sharing noOutputs := e.fstOutputs.NoOutput() // First compare up to valid seek frames: for targetUpto < targetLimit { cmp = int(e.term.bytes[targetUpto]) - int(target[targetUpto]) log.Printf(" cycle targetUpto=%v (vs limit=%v) cmp=%v (targetLabel=%c vs termLabel=%c) arc.output=%v output=%v", targetUpto, targetLimit, cmp, target[targetUpto], e.term.bytes[targetUpto], arc.Output, output) if cmp != 0 { break } arc = e.arcs[1+targetUpto] if arc.Label != int(target[targetUpto]) { log.Printf("FAIL: arc.label=%c targetLabel=%c", arc.Label, target[targetUpto]) panic("assert fail") } if arc.Output != noOutputs { output = e.fstOutputs.Add(output, arc.Output).([]byte) } if arc.IsFinal() { lastFrame = e.stack[1+lastFrame.ord] } targetUpto++ } if cmp == 0 { targetUptoMid := targetUpto // Second compare the rest of the term, but // don't save arc/output/frame; we only do this // to find out if the target term is before, // equal or after the current term targetLimit2 := len(target) if e.term.length < targetLimit2 { targetLimit2 = e.term.length } for targetUpto < targetLimit2 { cmp = int(e.term.bytes[targetUpto]) - int(target[targetUpto]) log.Printf(" cycle2 targetUpto=%v (vs limit=%v) cmp=%v (targetLabel=%c vs termLabel=%c)", targetUpto, targetLimit, cmp, target[targetUpto], e.term.bytes[targetUpto]) if cmp != 0 { break } targetUpto++ } if cmp == 0 { cmp = e.term.length - len(target) } targetUpto = targetUptoMid } if cmp < 0 { // Common case: target term is after current // term, ie, app is seeking multiple terms // in sorted order log.Printf(" target is after current (shares prefixLen=%v); frame.ord=%v", targetUpto, lastFrame.ord) e.currentFrame = lastFrame } else if cmp > 0 { // Uncommon case: target term // is before current term; this means we can // keep the currentFrame but we must rewind it // (so we scan from the start) e.targetBeforeCurrentLength = 0 log.Printf(" target is before current (shares prefixLen=%v); rewind frame ord=%v", targetUpto, lastFrame.ord) e.currentFrame = lastFrame e.currentFrame.rewind() } else { // Target is exactly the same as current term if e.term.length != len(target) { panic("assert fail") } if e.termExists { log.Println(" target is same as current; return true") return true, nil } else { log.Println(" target is same as current but term doesn't exist") } } } else { e.targetBeforeCurrentLength = -1 arc = e.index.FirstArc(e.arcs[0]) // Empty string prefix must have an output (block) in the index! if !arc.IsFinal() || arc.Output == nil { panic("assert fail") } log.Println(" no seek state; push root frame") output = arc.Output.([]byte) e.currentFrame = e.staticFrame targetUpto = 0 e.currentFrame, err = e.pushFrame(arc, e.fstOutputs.Add(output, arc.NextFinalOutput).([]byte), 0) if err != nil { return false, err } } log.Printf(" start index loop targetUpto=%v output=%v currentFrame.ord=%v targetBeforeCurrentLength=%v", targetUpto, output, e.currentFrame.ord, e.targetBeforeCurrentLength) for targetUpto < len(target) { targetLabel := int(target[targetUpto]) nextArc, err := e.index.FindTargetArc(targetLabel, arc, e.getArc(1+targetUpto), e.fstReader) if err != nil { return false, err } if nextArc == nil { // Index is exhausted log.Printf(" index: index exhausted label=%c %x", targetLabel, targetLabel) e.validIndexPrefix = e.currentFrame.prefix e.currentFrame.scanToFloorFrame(target) if !e.currentFrame.hasTerms { e.termExists = false e.term.bytes[targetUpto] = byte(targetLabel) log.Printf(" FAST NOT_FOUND term=%v", e.term) return false, nil } e.currentFrame.loadBlock() status, err := e.currentFrame.scanToTerm(target, true) if err != nil { return false, err } if status == SEEK_STATUS_FOUND { log.Printf(" return FOUND term=%v", e.term) return true, nil } else { log.Printf(" got %v; return NOT_FOUND term=%v", status, e.term) return false, nil } } else { // Follow this arc arc = nextArc e.term.bytes[targetUpto] = byte(targetLabel) if arc.Output == nil { panic("assert fail") } noOutputs := e.fstOutputs.NoOutput() if !fst.CompareFSTValue(arc.Output, noOutputs) { output = e.fstOutputs.Add(output, arc.Output).([]byte) } log.Printf(" index: follow label=%x arc.output=%v arc.nfo=%v", target[targetUpto], arc.Output, arc.NextFinalOutput) targetUpto++ if arc.IsFinal() { log.Println(" arc is final!") e.currentFrame, err = e.pushFrame(arc, e.fstOutputs.Add(output, arc.NextFinalOutput).([]byte), targetUpto) if err != nil { return false, err } log.Printf(" curFrame.ord=%v hasTerms=%v", e.currentFrame.ord, e.currentFrame.hasTerms) } } } e.validIndexPrefix = e.currentFrame.prefix e.currentFrame.scanToFloorFrame(target) // Target term is entirely contained in the index: if !e.currentFrame.hasTerms { e.termExists = false e.term.length = targetUpto log.Printf(" FAST NOT_FOUND term=%v", e.term) return false, nil } e.currentFrame.loadBlock() status, err := e.currentFrame.scanToTerm(target, true) if err != nil { return false, err } if status == SEEK_STATUS_FOUND { log.Printf(" return FOUND term=%v", e.term) return true, nil } else { log.Printf(" got result %v; return NOT_FOUND term=%v", status, e.term) return false, nil } }