示例#1
0
/*
Instantiates an FST/FSA builder with all the possible tuning and
construction tweaks. Read parameter documentation carefully.

...
*/
func NewBuilder(inputType InputType, minSuffixCount1, minSuffixCount2 int,
	doShareSuffix, doShareNonSingletonNodes bool, shareMaxTailLength int,
	outputs Outputs, doPackFST bool,
	acceptableOverheadRatio float32, allowArrayArcs bool, bytesPageBits int) *Builder {

	fst := newFST(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs, bytesPageBits)
	f := make([]*UnCompiledNode, 10)
	ans := &Builder{
		minSuffixCount1:          minSuffixCount1,
		minSuffixCount2:          minSuffixCount2,
		doShareNonSingletonNodes: doShareNonSingletonNodes,
		shareMaxTailLength:       shareMaxTailLength,
		doPackFST:                doPackFST,
		acceptableOverheadRatio:  acceptableOverheadRatio,
		fst:       fst,
		NO_OUTPUT: outputs.NoOutput(),
		frontier:  f,
		lastInput: util.NewIntsRefBuilder(),
	}
	if doShareSuffix {
		ans.dedupHash = newNodeHash(fst, fst.bytes.reverseReaderAllowSingle(false))
	}
	for i, _ := range f {
		f[i] = NewUnCompiledNode(ans, i)
	}
	return ans
}
示例#2
0
/*
Create a new writer. The number of items (terms or sub-blocks) per
block will aim tobe between minItermsPerBlock and maxItemsPerBlock,
though in some cases, the blocks may be smaller than the min.
*/
func NewBlockTreeTermsWriter(state *SegmentWriteState,
	postingsWriter PostingsWriterBase,
	minItemsInBlock, maxItemsInBlock int) (*BlockTreeTermsWriter, error) {
	assert2(minItemsInBlock >= 2, "minItemsInBlock must be >= 2; got %v", minItemsInBlock)
	assert2(maxItemsInBlock >= 1, "maxItemsInBlock must be >= 1; got %v", maxItemsInBlock)
	assert2(minItemsInBlock <= maxItemsInBlock,
		"maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=%v minItemsInBlock=%v",
		maxItemsInBlock, minItemsInBlock)
	assert2(2*(minItemsInBlock-1) <= maxItemsInBlock,
		"maxItemsInBlock must be at least 2*(minItemsInBlock-1; got maxItemsInBlock=%v minItemsInBlock=%v",
		maxItemsInBlock, minItemsInBlock)

	ans := &BlockTreeTermsWriter{
		maxDoc:          state.SegmentInfo.DocCount(),
		fieldInfos:      state.FieldInfos,
		minItemsInBlock: minItemsInBlock,
		maxItemsInBlock: maxItemsInBlock,
		postingsWriter:  postingsWriter,
		segment:         state.SegmentInfo.Name,
		scratchBytes:    store.NewRAMOutputStreamBuffer(),
		scratchIntsRef:  util.NewIntsRefBuilder(),
		// bytesWriter:     store.NewRAMOutputStreamBuffer(),
		// bytesWriter2:    store.NewRAMOutputStreamBuffer(),
	}
	ans.spi = ans
	var out, indexOut store.IndexOutput
	if err := func() error {
		var success = false
		defer func() {
			if !success {
				util.CloseWhileSuppressingError(out, indexOut)
			}
		}()

		var err error
		termsFileName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_EXTENSION)
		if out, err = state.Directory.CreateOutput(termsFileName, state.Context); err != nil {
			return err
		}
		if err = ans.spi.WriteHeader(out); err != nil {
			return err
		}

		termsIndexFileName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION)
		if indexOut, err = state.Directory.CreateOutput(termsIndexFileName, state.Context); err != nil {
			return err
		}
		if err = ans.spi.WriteIndexHeader(indexOut); err != nil {
			return err
		}

		// have consumer write its format/header
		if err = postingsWriter.Init(out); err != nil {
			return err
		}
		success = true
		return nil
	}(); err != nil {
		return nil, err
	}
	ans.out = out
	ans.indexOut = indexOut
	return ans, nil
}