示例#1
0
func newSegmentTermsEnum(r *FieldReader) *SegmentTermsEnum {
	ans := &SegmentTermsEnum{
		FieldReader:   r,
		stack:         make([]*segmentTermsEnumFrame, 0),
		scratchReader: store.NewEmptyByteArrayDataInput(),
		term:          newBytesRef(),
		arcs:          make([]*fst.Arc, 1),
		fstOutputs:    fst.ByteSequenceOutputsSingleton(),
	}
	ans.TermsEnumImpl = newTermsEnumImpl(ans)
	log.Printf("BTTR.init seg=%v", r.segment)

	// Used to hold seek by TermState, or cached seek
	ans.staticFrame = newFrame(ans, -1)

	if r.index != nil {
		ans.fstReader = r.index.BytesReader()
	}

	// Init w/ root block; don't use index since it may
	// not (and need not) have been loaded
	for i, _ := range ans.arcs {
		ans.arcs[i] = &fst.Arc{}
	}

	ans.currentFrame = ans.staticFrame
	var arc *fst.Arc
	if r.index != nil {
		arc = r.index.FirstArc(ans.arcs[0])
		// Empty string prefix must have an output in the index!
		if !arc.IsFinal() {
			panic("assert fail")
		}
	}
	ans.currentFrame = ans.staticFrame
	ans.validIndexPrefix = 0
	log.Printf("init frame state %v", ans.currentFrame.ord)
	ans.printSeekState()

	// ans.computeBlockStats()

	return ans
}
示例#2
0
func (e *SegmentTermsEnum) SeekExact(target []byte) (ok bool, err error) {
	if e.index == nil {
		panic("terms index was not loaded")
	}

	if cap(e.term.bytes) <= len(target) {
		e.term.ensureSize(1 + len(target))
	}

	e.eof = false
	log.Printf("BTTR.seekExact seg=%v target=%v:%v current=%v (exists?=%v) validIndexPrefix=%v",
		e.segment, e.fieldInfo.Name, brToString(target), e.term, e.termExists, e.validIndexPrefix)
	e.printSeekState()

	var arc *fst.Arc
	var targetUpto int
	var output []byte

	e.targetBeforeCurrentLength = e.currentFrame.ord

	// if e.currentFrame != e.staticFrame {
	if e.currentFrame.ord != e.staticFrame.ord {
		// We are already seek'd; find the common
		// prefix of new seek term vs current term and
		// re-use the corresponding seek state.  For
		// example, if app first seeks to foobar, then
		// seeks to foobaz, we can re-use the seek state
		// for the first 5 bytes.

		log.Printf("  re-use current seek state validIndexPrefix=%v", e.validIndexPrefix)

		arc = e.arcs[0]
		if !arc.IsFinal() {
			panic("assert fail")
		}
		output = arc.Output.([]byte)
		targetUpto = 0

		lastFrame := e.stack[0]
		if e.validIndexPrefix > e.term.length {
			panic("assert fail")
		}

		targetLimit := len(target)
		if e.validIndexPrefix < targetLimit {
			targetLimit = e.validIndexPrefix
		}

		cmp := 0

		// TODO: reverse vLong byte order for better FST
		// prefix output sharing

		noOutputs := e.fstOutputs.NoOutput()

		// First compare up to valid seek frames:
		for targetUpto < targetLimit {
			cmp = int(e.term.bytes[targetUpto]) - int(target[targetUpto])
			log.Printf("    cycle targetUpto=%v (vs limit=%v) cmp=%v (targetLabel=%c vs termLabel=%c) arc.output=%v output=%v",
				targetUpto, targetLimit, cmp, target[targetUpto], e.term.bytes[targetUpto], arc.Output, output)
			if cmp != 0 {
				break
			}

			arc = e.arcs[1+targetUpto]
			if arc.Label != int(target[targetUpto]) {
				log.Printf("FAIL: arc.label=%c targetLabel=%c", arc.Label, target[targetUpto])
				panic("assert fail")
			}
			if arc.Output != noOutputs {
				output = e.fstOutputs.Add(output, arc.Output).([]byte)
			}
			if arc.IsFinal() {
				lastFrame = e.stack[1+lastFrame.ord]
			}
			targetUpto++
		}

		if cmp == 0 {
			targetUptoMid := targetUpto

			// Second compare the rest of the term, but
			// don't save arc/output/frame; we only do this
			// to find out if the target term is before,
			// equal or after the current term
			targetLimit2 := len(target)
			if e.term.length < targetLimit2 {
				targetLimit2 = e.term.length
			}
			for targetUpto < targetLimit2 {
				cmp = int(e.term.bytes[targetUpto]) - int(target[targetUpto])
				log.Printf("    cycle2 targetUpto=%v (vs limit=%v) cmp=%v (targetLabel=%c vs termLabel=%c)",
					targetUpto, targetLimit, cmp, target[targetUpto], e.term.bytes[targetUpto])
				if cmp != 0 {
					break
				}
				targetUpto++
			}

			if cmp == 0 {
				cmp = e.term.length - len(target)
			}
			targetUpto = targetUptoMid
		}

		if cmp < 0 {
			// Common case: target term is after current
			// term, ie, app is seeking multiple terms
			// in sorted order
			log.Printf("  target is after current (shares prefixLen=%v); frame.ord=%v", targetUpto, lastFrame.ord)
			e.currentFrame = lastFrame
		} else if cmp > 0 {
			// Uncommon case: target term
			// is before current term; this means we can
			// keep the currentFrame but we must rewind it
			// (so we scan from the start)
			e.targetBeforeCurrentLength = 0
			log.Printf("  target is before current (shares prefixLen=%v); rewind frame ord=%v", targetUpto, lastFrame.ord)
			e.currentFrame = lastFrame
			e.currentFrame.rewind()
		} else {
			// Target is exactly the same as current term
			if e.term.length != len(target) {
				panic("assert fail")
			}
			if e.termExists {
				log.Println("  target is same as current; return true")
				return true, nil
			} else {
				log.Println("  target is same as current but term doesn't exist")
			}
		}
	} else {
		e.targetBeforeCurrentLength = -1
		arc = e.index.FirstArc(e.arcs[0])

		// Empty string prefix must have an output (block) in the index!
		if !arc.IsFinal() || arc.Output == nil {
			panic("assert fail")
		}

		log.Println("    no seek state; push root frame")

		output = arc.Output.([]byte)

		e.currentFrame = e.staticFrame

		targetUpto = 0
		e.currentFrame, err = e.pushFrame(arc, e.fstOutputs.Add(output, arc.NextFinalOutput).([]byte), 0)
		if err != nil {
			return false, err
		}
	}

	log.Printf("  start index loop targetUpto=%v output=%v currentFrame.ord=%v targetBeforeCurrentLength=%v",
		targetUpto, output, e.currentFrame.ord, e.targetBeforeCurrentLength)

	for targetUpto < len(target) {
		targetLabel := int(target[targetUpto])
		nextArc, err := e.index.FindTargetArc(targetLabel, arc, e.getArc(1+targetUpto), e.fstReader)
		if err != nil {
			return false, err
		}
		if nextArc == nil {
			// Index is exhausted
			log.Printf("    index: index exhausted label=%c %x", targetLabel, targetLabel)

			e.validIndexPrefix = e.currentFrame.prefix

			e.currentFrame.scanToFloorFrame(target)

			if !e.currentFrame.hasTerms {
				e.termExists = false
				e.term.bytes[targetUpto] = byte(targetLabel)
				log.Printf("  FAST NOT_FOUND term=%v", e.term)
				return false, nil
			}

			e.currentFrame.loadBlock()

			status, err := e.currentFrame.scanToTerm(target, true)
			if err != nil {
				return false, err
			}
			if status == SEEK_STATUS_FOUND {
				log.Printf("  return FOUND term=%v", e.term)
				return true, nil
			} else {
				log.Printf("  got %v; return NOT_FOUND term=%v", status, e.term)
				return false, nil
			}
		} else {
			// Follow this arc
			arc = nextArc
			e.term.bytes[targetUpto] = byte(targetLabel)
			if arc.Output == nil {
				panic("assert fail")
			}
			noOutputs := e.fstOutputs.NoOutput()
			if !fst.CompareFSTValue(arc.Output, noOutputs) {
				output = e.fstOutputs.Add(output, arc.Output).([]byte)
			}
			log.Printf("    index: follow label=%x arc.output=%v arc.nfo=%v",
				target[targetUpto], arc.Output, arc.NextFinalOutput)
			targetUpto++

			if arc.IsFinal() {
				log.Println("    arc is final!")
				e.currentFrame, err = e.pushFrame(arc, e.fstOutputs.Add(output, arc.NextFinalOutput).([]byte), targetUpto)
				if err != nil {
					return false, err
				}
				log.Printf("    curFrame.ord=%v hasTerms=%v", e.currentFrame.ord, e.currentFrame.hasTerms)
			}
		}
	}

	e.validIndexPrefix = e.currentFrame.prefix

	e.currentFrame.scanToFloorFrame(target)

	// Target term is entirely contained in the index:
	if !e.currentFrame.hasTerms {
		e.termExists = false
		e.term.length = targetUpto
		log.Printf("  FAST NOT_FOUND term=%v", e.term)
		return false, nil
	}

	e.currentFrame.loadBlock()

	status, err := e.currentFrame.scanToTerm(target, true)
	if err != nil {
		return false, err
	}
	if status == SEEK_STATUS_FOUND {
		log.Printf("  return FOUND term=%v", e.term)
		return true, nil
	} else {
		log.Printf("  got result %v; return NOT_FOUND term=%v", status, e.term)
		return false, nil
	}
}