func newSegmentTermsEnum(r *FieldReader) *SegmentTermsEnum { ans := &SegmentTermsEnum{ FieldReader: r, stack: make([]*segmentTermsEnumFrame, 0), scratchReader: store.NewEmptyByteArrayDataInput(), term: make([]byte, 0), arcs: make([]*util.Arc, 1), fstOutputs: util.ByteSequenceOutputsSingleton(), } ans.TermsEnumImpl = newTermsEnumImpl(ans) log.Println("BTTR.init seg=%v", r.segment) // Used to hold seek by TermState, or cached seek ans.staticFrame = newFrame(ans, -1) if r.index != nil { ans.fstReader = r.index.BytesReader() } // Init w/ root block; don't use index since it may // not (and need not) have been loaded for i, _ := range ans.arcs { ans.arcs[i] = &util.Arc{} } ans.currentFrame = ans.staticFrame var arc *util.Arc if r.index != nil { arc = r.index.FirstArc(ans.arcs[0]) // Empty string prefix must have an output in the index! if !arc.IsFinal() { panic("assert fail") } } ans.currentFrame = ans.staticFrame ans.validIndexPrefix = 0 log.Printf("init frame state %v", ans.currentFrame.ord) ans.printSeekState() // ans.computeBlockStats() return ans }
func (e *SegmentTermsEnum) SeekExact(target []byte) (ok bool, err error) { if e.index == nil { panic("terms index was not loaded") } if cap(e.term) <= len(target) { next := make([]byte, len(e.term), len(target)) copy(next, e.term) e.term = next } e.eof = false log.Printf("BTTR.seekExact seg=%v target=%v:%v current=%v (exists?=%v) validIndexPrefix=%v", e.segment, e.fieldInfo.name, brToString(target), brToString(e.term), e.termExists, e.validIndexPrefix) e.printSeekState() var arc *util.Arc var targetUpto int var output []byte e.targetBeforeCurrentLength = e.currentFrame.ord // if e.currentFrame != e.staticFrame { if e.currentFrame.ord != e.staticFrame.ord { // We are already seek'd; find the common // prefix of new seek term vs current term and // re-use the corresponding seek state. For // example, if app first seeks to foobar, then // seeks to foobaz, we can re-use the seek state // for the first 5 bytes. log.Printf(" re-use current seek state validIndexPrefix=%v", e.validIndexPrefix) arc = e.arcs[0] if !arc.IsFinal() { panic("assert fail") } output = arc.Output.([]byte) targetUpto = 0 lastFrame := e.stack[0] if e.validIndexPrefix > len(e.term) { panic("assert fail") } targetLimit := len(target) if e.validIndexPrefix < targetLimit { targetLimit = e.validIndexPrefix } cmp := 0 // TODO: reverse vLong byte order for better FST // prefix output sharing noOutputs := e.fstOutputs.NoOutput() // First compare up to valid seek frames: for targetUpto < targetLimit { cmp = int(e.term[targetUpto] - target[targetUpto]) log.Printf(" cycle targetUpto=%v (vs limit=%v) cmp=%v (targetLabel=%c vs termLabel=%c) arc.output=%v output=%v", targetUpto, targetLimit, cmp, target[targetUpto], e.term[targetUpto], arc.Output, output) if cmp != 0 { break } arc = e.arcs[1+targetUpto] if arc.Label != int(target[targetUpto]) { log.Printf("FAIL: arc.label=%c targetLabel=%c", arc.Label, target[targetUpto]) panic("assert fail") } if arc.Output != noOutputs { output = e.fstOutputs.Add(output, arc.Output).([]byte) } if arc.IsFinal() { lastFrame = e.stack[1+lastFrame.ord] } targetUpto++ } if cmp == 0 { targetUptoMid := targetUpto // Second compare the rest of the term, but // don't save arc/output/frame; we only do this // to find out if the target term is before, // equal or after the current term targetLimit2 := len(target) if len(e.term) < targetLimit2 { targetLimit2 = len(e.term) } for targetUpto < targetLimit2 { cmp = int(e.term[targetUpto] - target[targetUpto]) log.Printf(" cycle2 targetUpto=%v (vs limit=%v) cmp=%v (targetLabel=%c vs termLabel=%c)", targetUpto, targetLimit, cmp, target[targetUpto], e.term[targetUpto]) if cmp != 0 { break } targetUpto++ } if cmp == 0 { cmp = len(e.term) - len(target) } targetUpto = targetUptoMid } if cmp < 0 { // Common case: target term is after current // term, ie, app is seeking multiple terms // in sorted order log.Printf(" target is after current (shares prefixLen=%v); frame.ord=%v", targetUpto, lastFrame.ord) e.currentFrame = lastFrame } else if cmp > 0 { // Uncommon case: target term // is before current term; this means we can // keep the currentFrame but we must rewind it // (so we scan from the start) e.targetBeforeCurrentLength = 0 log.Printf(" target is before current (shares prefixLen=%v); rewind frame ord=%v", targetUpto, lastFrame.ord) e.currentFrame = lastFrame e.currentFrame.rewind() } else { // Target is exactly the same as current term if len(e.term) != len(target) { panic("assert fail") } if e.termExists { log.Println(" target is same as current; return true") return true, nil } else { log.Println(" target is same as current but term doesn't exist") } } } else { e.targetBeforeCurrentLength = -1 arc = e.index.FirstArc(e.arcs[0]) // Empty string prefix must have an output (block) in the index! log.Println(arc) if !arc.IsFinal() || arc.Output == nil { panic("assert fail") } output = arc.Output.([]byte) e.currentFrame = e.staticFrame targetUpto = 0 e.currentFrame, err = e.pushFrame(arc, e.fstOutputs.Add(output, arc.NextFinalOutput).([]byte), 0) if err != nil { return false, err } } log.Printf(" start index loop targetUpto=%v output=%v currentFrame.ord=%v targetBeforeCurrentLength=%v", targetUpto, output, e.currentFrame.ord, e.targetBeforeCurrentLength) for targetUpto < len(target) { targetLabel := int(target[targetUpto]) nextArc, err := e.index.FindTargetArc(targetLabel, arc, e.getArc(1+targetUpto), e.fstReader) if err != nil { return false, err } if nextArc == nil { // Index is exhausted log.Printf(" index: index exhausted label=%c %x", targetLabel, targetLabel) e.validIndexPrefix = e.currentFrame.prefix e.currentFrame.scanToFloorFrame(target) if !e.currentFrame.hasTerms { e.termExists = false e.term = append(e.term, byte(targetLabel)) log.Printf(" FAST NOT_FOUND term=%v", brToString(e.term)) return false, nil } e.currentFrame.loadBlock() status, err := e.currentFrame.scanToTerm(target, true) if err != nil { return false, err } if status == SEEK_STATUS_FOUND { log.Printf(" return FOUND term=%v %v", utf8ToString(e.term), e.term) return true, nil } else { log.Printf(" got %v; return NOT_FOUND term=%v", status, brToString(e.term)) return false, nil } } else { // Follow this arc arc = nextArc e.term[targetUpto] = byte(targetLabel) // Aggregate output as we go: if arc.Output == nil { panic("assert fail") } noOutputs := e.fstOutputs.NoOutput() if arc.Output != noOutputs { output = e.fstOutputs.Add(output, arc.Output).([]byte) } log.Printf(" index: follow label=%x arc.output=%v arc.nfo=%v", target[targetUpto], arc.Output, arc.NextFinalOutput) targetUpto++ if arc.IsFinal() { log.Println(" arc is final!") e.currentFrame, err = e.pushFrame(arc, e.fstOutputs.Add(output, arc.NextFinalOutput).([]byte), targetUpto) if err != nil { return false, err } log.Printf(" curFrame.ord=%v hasTerms=%v", e.currentFrame.ord, e.currentFrame.hasTerms) } } } e.validIndexPrefix = e.currentFrame.prefix e.currentFrame.scanToFloorFrame(target) // Target term is entirely contained in the index: if !e.currentFrame.hasTerms { e.termExists = false e.term = e.term[0:targetUpto] log.Printf(" FAST NOT_FOUND term=%v", brToString(e.term)) return false, nil } e.currentFrame.loadBlock() status, err := e.currentFrame.scanToTerm(target, true) if err != nil { return false, err } if status == SEEK_STATUS_FOUND { log.Printf(" return FOUND term=%v %v", utf8ToString(e.term), e.term) return true, nil } else { log.Printf(" got result %v; return NOT_FOUND term=%v", status, utf8ToString(e.term)) return false, nil } }