Пример #1
0
func (e *FSTEnum) incr() {
	e.upto++
	e.spi.grow()
	if len(e.arcs) <= e.upto {
		newArcs := make([]*Arc, util.Oversize(e.upto+1, util.NUM_BYTES_OBJECT_REF))
		copy(newArcs, e.arcs)
		e.arcs = newArcs
	}
	if len(e.output) < e.upto {
		newOutput := make([]interface{}, util.Oversize(e.upto+1, util.NUM_BYTES_OBJECT_REF))
		copy(newOutput, e.output)
		e.output = newOutput
	}
}
Пример #2
0
func (br *bytesRef) ensureSize(minSize int) {
	assert(minSize >= 0)
	if cap(br.bytes) < minSize {
		next := make([]byte, util.Oversize(minSize, 1))
		copy(next, br.bytes)
		br.bytes = next
	}
}
Пример #3
0
func (w *CompressingStoredFieldsWriter) StartDocument(numStoredFields int) error {
	if w.numBufferedDocs == len(w.numStoredFields) {
		newLength := util.Oversize(w.numBufferedDocs+1, 4)
		oldArray := w.endOffsets
		w.numStoredFields = make([]int, newLength)
		w.endOffsets = make([]int, newLength)
		copy(w.numStoredFields, oldArray)
		copy(w.endOffsets, oldArray)
	}
	w.numStoredFields[w.numBufferedDocs] = numStoredFields
	w.numBufferedDocs++
	return nil
}
Пример #4
0
/* Add a new element to this builder. */
func (b *PackedLongValuesBuilderImpl) Add(l int64) PackedLongValuesBuilder {
	assert2(b.pending != nil, "Cannot be reused after build()")
	if b.pendingOff == len(b.pending) { // check size
		if b.valuesOff == len(b.values) {
			newLength := util.Oversize(b.valuesOff+1, 8)
			b.grow(newLength)
		}
		b.pack()
	}
	b.pending[b.pendingOff] = l
	b.pendingOff++
	b.size++
	return b
}
Пример #5
0
func (w *CompressingStoredFieldsWriter) FinishDocument() error {
	if w.numBufferedDocs == len(w.numStoredFields) {
		newLength := util.Oversize(w.numBufferedDocs+1, 4)

		oldArray := w.endOffsets
		w.endOffsets = make([]int, newLength)
		copy(w.endOffsets, oldArray)

		oldArray = w.numStoredFields
		w.numStoredFields = make([]int, newLength)
		copy(w.numStoredFields, oldArray)
	}
	w.numStoredFields[w.numBufferedDocs] = w.numStoredFieldsInDoc
	w.numStoredFieldsInDoc = 0
	w.endOffsets[w.numBufferedDocs] = w.bufferedDocs.length
	w.numBufferedDocs++
	if w.triggerFlush() {
		return w.flush()
	}
	return nil
}
Пример #6
0
func (n *UnCompiledNode) addArc(label int, target Node) {
	assert(label >= 0)
	if n.NumArcs != 0 {
		assert2(label > n.Arcs[n.NumArcs-1].label,
			"arc[-1].label=%v new label=%v numArcs=%v",
			n.Arcs[n.NumArcs-1].label, label, n.NumArcs)
	}
	if n.NumArcs == len(n.Arcs) {
		newArcs := make([]*builderArc, util.Oversize(n.NumArcs+1, util.NUM_BYTES_OBJECT_REF))
		copy(newArcs, n.Arcs)
		for arcIdx := n.NumArcs; arcIdx < len(newArcs); arcIdx++ {
			newArcs[arcIdx] = new(builderArc)
		}
		n.Arcs = newArcs
	}
	arc := n.Arcs[n.NumArcs]
	n.NumArcs++
	arc.label = label
	arc.Target = target
	arc.output = n.owner.NO_OUTPUT
	arc.nextFinalOutput = n.owner.NO_OUTPUT
	arc.isFinal = false
}
Пример #7
0
func (a *CharTermAttributeImpl) growTermBuffer(newSize int) {
	if len(a.termBuffer) < newSize {
		// not big enough: create a new slice with slight over allocation:
		a.termBuffer = make([]rune, util.Oversize(newSize, util.NUM_BYTES_CHAR))
	}
}
Пример #8
0
func newCharTermAttributeImpl() *CharTermAttributeImpl {
	return &CharTermAttributeImpl{
		termBuffer: make([]rune, util.Oversize(MIN_BUFFER_SIZE, util.NUM_BYTES_CHAR)),
		bytes:      util.NewBytesRefBuilder(),
	}
}
func (arr *ParallelPostingsArray) grow() *ParallelPostingsArray {
	newSize := util.Oversize(arr.size+1, arr.PostingsArray.bytesPerPosting())
	newArray := arr.PostingsArray.newInstance(newSize)
	arr.PostingsArray.copyTo(newArray, arr.size)
	return newArray.(*ParallelPostingsArray)
}
Пример #10
0
func newGrowableByteArrayDataOutput(cp int) *GrowableByteArrayDataOutput {
	ans := &GrowableByteArrayDataOutput{bytes: make([]byte, 0, util.Oversize(cp, 1))}
	ans.DataOutputImpl = util.NewDataOutput(ans)
	return ans
}
Пример #11
0
/*
It's OK to add the same input twice in a row with different outputs,
as long as outputs impls the merge method. Note that input is fully
consumed after this method is returned (so caller is free to reuse),
but output is not. So if your outputs are changeable (eg
ByteSequenceOutputs or IntSequenceOutputs) then you cannot reuse
across calls.
*/
func (b *Builder) Add(input *util.IntsRef, output interface{}) error {
	// { // debug
	// 	bytes := make([]byte, input.Length)
	// 	for i, _ := range bytes {
	// 		bytes[i] = byte(input.Ints[i])
	// 	}
	// 	if output == NO_OUTPUT {
	// 		fmt.Printf("\nFST ADD: input=%v %v\n", string(bytes), bytes)
	// 	} else {
	// 		panic("not implemented yet")
	// 		// fmt.Printf("\nFST ADD: input=%v %v output=%v", string(bytes), bytes, b.fst.outputs.outputToString(output)));
	// 	}
	// }

	// de-dup NO_OUTPUT since it must be a singleton:
	if output == NO_OUTPUT {
		output = NO_OUTPUT
	}

	assert2(b.lastInput.Length() == 0 || !input.Less(b.lastInput.Get()),
		"inputs are added out of order, lastInput=%v vs input=%v",
		b.lastInput.Get(), input)

	if input.Length == 0 {
		// empty input: only allowed as first input. We have to special
		// case this becaues the packed FST format cannot represent the
		// empty input since 'finalness' is stored on the incoming arc,
		// not on the node
		b.frontier[0].InputCount++
		b.frontier[0].IsFinal = true
		b.fst.setEmptyOutput(output)
		return nil
	}

	// compare shared prefix length
	pos1 := 0
	pos2 := input.Offset
	pos1Stop := b.lastInput.Length()
	if input.Length < pos1Stop {
		pos1Stop = input.Length
	}
	for {
		b.frontier[pos1].InputCount++
		if pos1 >= pos1Stop || b.lastInput.At(pos1) != input.Ints[pos2] {
			break
		}
		pos1++
		pos2++
	}
	prefixLenPlus1 := pos1 + 1

	if len(b.frontier) < input.Length+1 {
		next := make([]*UnCompiledNode, util.Oversize(input.Length+1, util.NUM_BYTES_OBJECT_REF))
		copy(next, b.frontier)
		for idx := len(b.frontier); idx < len(next); idx++ {
			next[idx] = NewUnCompiledNode(b, idx)
		}
		b.frontier = next
	}

	// minimize/compile states from previous input's orphan'd suffix
	err := b.freezeTail(prefixLenPlus1)
	if err != nil {
		return err
	}

	// init tail states for current input
	for idx := prefixLenPlus1; idx <= input.Length; idx++ {
		b.frontier[idx-1].addArc(input.Ints[input.Offset+idx-1], b.frontier[idx])
		b.frontier[idx].InputCount++
	}

	lastNode := b.frontier[input.Length]
	if b.lastInput.Length() != input.Length || prefixLenPlus1 != input.Length+1 {
		lastNode.IsFinal = true
		lastNode.output = b.NO_OUTPUT
	}

	// push conflicting outputs forward, only as far as needed
	for idx := 1; idx < prefixLenPlus1; idx++ {
		node := b.frontier[idx]
		parentNode := b.frontier[idx-1]

		lastOutput := parentNode.lastOutput(input.Ints[input.Offset+idx-1])

		var commonOutputPrefix interface{}
		var wordSuffix interface{}

		if lastOutput != b.NO_OUTPUT {
			commonOutputPrefix = b.fst.outputs.Common(output, lastOutput)
			wordSuffix = b.fst.outputs.Subtract(lastOutput, commonOutputPrefix)
			parentNode.setLastOutput(input.Ints[input.Offset+idx-1], commonOutputPrefix)
			node.prependOutput(wordSuffix)
		} else {
			commonOutputPrefix = NO_OUTPUT
		}

		output = b.fst.outputs.Subtract(output, commonOutputPrefix)
	}

	if b.lastInput.Length() == input.Length && prefixLenPlus1 == 1+input.Length {
		// same input more than 1 time in a row, mapping to multiple outputs
		panic("not implemented yet")
	} else {
		// this new arc is private to this new input; set its arc output
		// to the leftover output:
		b.frontier[prefixLenPlus1-1].setLastOutput(input.At(prefixLenPlus1-1), output)
	}

	// save last input
	b.lastInput.CopyInts(input)
	return nil
}
Пример #12
0
func newGrowableByteArrayDataOutput(cp int) *GrowableByteArrayDataOutput {
	return &GrowableByteArrayDataOutput{make([]byte, 0, util.Oversize(cp, 1)), 0}
}