示例#1
0
func newLucene49NormsConsumer(state *SegmentWriteState,
	dataCodec, dataExtension, metaCodec, metaExtension string) (nc *NormsConsumer, err error) {

	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(1))
	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(2))
	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(4))

	nc = &NormsConsumer{maxDoc: state.SegmentInfo.DocCount()}
	var success = false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(nc)
		}
	}()

	dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension)
	if nc.data, err = state.Directory.CreateOutput(dataName, state.Context); err != nil {
		return nil, err
	}
	if err = codec.WriteHeader(nc.data, dataCodec, VERSION_CURRENT); err != nil {
		return nil, err
	}
	metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension)
	if nc.meta, err = state.Directory.CreateOutput(metaName, state.Context); err != nil {
		return nil, err
	}
	if err = codec.WriteHeader(nc.meta, metaCodec, VERSION_CURRENT); err != nil {
		return nil, err
	}
	success = true
	return nc, nil
}
func (w *CompressingStoredFieldsWriter) Abort() {
	assert(w != nil)
	util.CloseWhileSuppressingError(w)
	util.DeleteFilesIgnoringErrors(w.directory,
		util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_EXTENSION),
		util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION))
}
示例#3
0
func (w *CompressingStoredFieldsWriter) Abort() {
	if w == nil { // tolerate early released pointer
		return
	}
	util.CloseWhileSuppressingError(w)
	util.DeleteFilesIgnoringErrors(w.directory,
		util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_EXTENSION),
		util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION))
}
示例#4
0
func NewLucene41PostingsReader(dir store.Directory,
	fis model.FieldInfos, si *model.SegmentInfo,
	ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) {

	log.Print("Initializing Lucene41PostingsReader...")
	success := false
	var docIn, posIn, payIn store.IndexInput = nil, nil, nil
	defer func() {
		if !success {
			log.Print("Failed to initialize Lucene41PostingsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			util.CloseWhileSuppressingError(docIn, posIn, payIn)
		}
	}()

	docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx)
	if err != nil {
		return r, err
	}
	_, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return r, err
	}
	forUtil, err := NewForUtil(docIn)
	if err != nil {
		return r, err
	}

	if fis.HasProx {
		posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx)
		if err != nil {
			return r, err
		}
		_, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
		if err != nil {
			return r, err
		}

		if fis.HasPayloads || fis.HasOffsets {
			payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx)
			if err != nil {
				return r, err
			}
			_, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
			if err != nil {
				return r, err
			}
		}
	}

	success = true
	return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil
}
示例#5
0
func newLucene42DocValuesProducer(state SegmentReadState,
	dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) {
	dvp = &Lucene42DocValuesProducer{
		numericInstances: make(map[int]NumericDocValues),
	}
	dvp.maxDoc = state.segmentInfo.DocCount()
	metaName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, metaExtension)
	// read in the entries from the metadata file.
	in, err := state.dir.OpenInput(metaName, state.context)
	if err != nil {
		return dvp, err
	}
	success := false
	defer func() {
		if success {
			err = util.Close(in)
		} else {
			util.CloseWhileSuppressingError(in)
		}
	}()

	version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT)
	if err != nil {
		return dvp, err
	}
	dvp.numerics = make(map[int]NumericEntry)
	dvp.binaries = make(map[int]BinaryEntry)
	dvp.fsts = make(map[int]FSTEntry)
	err = dvp.readFields(in)
	if err != nil {
		return dvp, err
	}
	success = true

	success = false
	dataName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, dataExtension)
	dvp.data, err = state.dir.OpenInput(dataName, state.context)
	if err != nil {
		return dvp, err
	}
	version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT)
	if err != nil {
		return dvp, err
	}

	if version != version2 {
		return dvp, errors.New("Format versions mismatch")
	}
	return dvp, nil
}
示例#6
0
/* Reads the most recent FieldInfos of the given segment info. */
func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error) {
	var dir store.Directory
	var closeDir bool
	if info.FieldInfosGen() == -1 && info.Info.IsCompoundFile() {
		// no fieldInfos gen and segment uses a compound file
		if dir, err = store.NewCompoundFileDirectory(info.Info.Dir,
			util.SegmentFileName(info.Info.Name, "", store.COMPOUND_FILE_EXTENSION),
			store.IO_CONTEXT_READONCE, false); err != nil {
			return
		}
		closeDir = true
	} else {
		// gen'd FIS are read outside CFS, or the segment doesn't use a compound file
		dir = info.Info.Dir
		closeDir = false
	}

	defer func() {
		if closeDir {
			err = mergeError(err, dir.Close())
		}
	}()

	var segmentSuffix string
	if n := info.FieldInfosGen(); n != -1 {
		segmentSuffix = strconv.FormatInt(n, 36)
	}
	codec := info.Info.Codec().(Codec)
	fisFormat := codec.FieldInfosFormat()
	return fisFormat.FieldInfosReader()(dir, info.Info.Name, segmentSuffix, store.IO_CONTEXT_READONCE)
}
示例#7
0
/*
Create the compound stream in the specified file. The filename is the
entire name (no extensions are added).
*/
func newCompoundFileWriter(dir Directory, name string) *CompoundFileWriter {
	assert2(dir != nil, "directory cannot be nil")
	assert2(name != "", "name cannot be empty")
	return &CompoundFileWriter{
		Locker:         &sync.Mutex{},
		directory:      dir,
		entries:        make(map[string]*FileEntry),
		seenIDs:        make(map[string]bool),
		pendingEntries: list.New(),
		outputTaken:    NewAtomicBool(),
		entryTableName: util.SegmentFileName(
			util.StripExtension(name),
			"",
			COMPOUND_FILE_ENTRIES_EXTENSION,
		),
		dataFileName: name,
	}
}
示例#8
0
func TestReadingFromSlicedIndexInputOSX(t *testing.T) {
	t.Logf("TestReadingFromSlicedIndexInputOSX...")
	path := "../search/testdata/osx/belfrysample"
	d, err := OpenFSDirectory(path)
	if err != nil {
		t.Error(err)
	}
	ctx := NewIOContextBool(false)
	cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false)
	name := util.SegmentFileName("_0", "Lucene41_0", "pos")
	posIn, err := cd.OpenInput(name, ctx)
	if err != nil {
		t.Error(err)
	}
	t.Log(posIn)
	codec.CheckHeader(posIn, "Lucene41PostingsWriterPos", 0, 0)
	// codec header mismatch: actual header=0 vs expected header=1071082519 (resource: SlicedIndexInput(SlicedIndexInput(_0_Lucene41_0.pos in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs')) in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs') slice=1461:3426))
}
示例#9
0
func (w *Lucene40SegmentInfoWriter) Write(dir store.Directory,
	si *SegmentInfo, fis FieldInfos, ctx store.IOContext) (err error) {

	filename := util.SegmentFileName(si.Name, "", LUCENE40_SI_EXTENSION)
	si.AddFile(filename)

	var output store.IndexOutput
	output, err = dir.CreateOutput(filename, ctx)
	if err != nil {
		return err
	}

	var success = false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(output)
			si.Dir.DeleteFile(filename) // ignore error
		} else {
			err = mergeError(err, output.Close())
		}
	}()

	err = codec.WriteHeader(output, LUCENE40_CODEC_NAME, LUCENE40_VERSION_CURRENT)
	if err != nil {
		return err
	}
	// Write the Lucene version that created this segment, since 3.1
	err = store.Stream(output).WriteString(si.Version().String()).
		WriteInt(int32(si.DocCount())).
		WriteByte(func() byte {
			if si.IsCompoundFile() {
				return SEGMENT_INFO_YES
			}
			return byte((SEGMENT_INFO_NO + 256) % 256) // Go byte is non-negative, unlike Java
		}()).WriteStringStringMap(si.Diagnostics()).
		WriteStringStringMap(map[string]string{}).
		WriteStringSet(si.Files()).Close()
	if err != nil {
		return err
	}

	success = true
	return nil
}
示例#10
0
func newLucene49NormsProducer(state SegmentReadState,
	dataCodec, dataExtension, metaCodec, metaExtension string) (np *NormsProducer, err error) {

	np = &NormsProducer{
		Locker:       new(sync.Mutex),
		norms:        make(map[int]*NormsEntry),
		instances:    make(map[int]NumericDocValues),
		maxDoc:       state.SegmentInfo.DocCount(),
		ramBytesUsed: util.ShallowSizeOfInstance(reflect.TypeOf(np)),
	}
	metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension)
	// read in the entries from the metadta file.
	var in store.ChecksumIndexInput
	if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil {
		return nil, err
	}

	if err = func() error {
		var success = false
		defer func() {
			if success {
				err = util.Close(in)
			} else {
				util.CloseWhileSuppressingError(in)
			}
		}()

		if np.version, err = codec.CheckHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); err != nil {
			return err
		}
		if err = np.readFields(in, state.FieldInfos); err != nil {
			return err
		}
		if _, err = codec.CheckFooter(in); err != nil {
			return err
		}
		success = true
		return nil
	}(); err != nil {
		return nil, err
	}

	dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension)
	if np.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil {
		return nil, err
	}
	var success = false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(np.data)
		}
	}()

	var version2 int32
	if version2, err = codec.CheckHeader(np.data, dataCodec, VERSION_START, VERSION_CURRENT); err != nil {
		return nil, err
	}
	if version2 != np.version {
		return nil, errors.New("Format versions mismatch")
	}

	// NOTE: data file is too costly to verify checksum against all the
	// bytes on open, but fo rnow we at least verify proper structure
	// of the checksum footer: which looks for FOOTER_MATIC +
	// algorithmID. This is cheap and can detect some forms of
	// corruption such as file trucation.
	if _, err = codec.RetrieveChecksum(np.data); err != nil {
		return nil, err
	}

	success = true

	return np, nil
}
示例#11
0
func newSegmentCoreReaders(owner *SegmentReader, dir store.Directory, si *SegmentInfoPerCommit,
	context store.IOContext, termsIndexDivisor int) (self SegmentCoreReaders, err error) {
	if termsIndexDivisor == 0 {
		panic("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)")
	}
	log.Printf("Initializing SegmentCoreReaders from directory: %v", dir)

	self = SegmentCoreReaders{
		refCount: 1,
		normsLocal: func() map[string]interface{} {
			return make(map[string]interface{})
		},
	}
	self.fieldsReaderLocal = func() StoredFieldsReader {
		return self.fieldsReaderOrig.Clone()
	}

	log.Print("Initializing listeners...")
	self.addListener = make(chan CoreClosedListener)
	self.removeListener = make(chan CoreClosedListener)
	self.notifyListener = make(chan *SegmentReader)
	// TODO re-enable later
	go func() { // ensure listners are synchronized
		coreClosedListeners := make([]CoreClosedListener, 0)
		isRunning := true
		var listener CoreClosedListener
		for isRunning {
			log.Print("Listening for events...")
			select {
			case listener = <-self.addListener:
				coreClosedListeners = append(coreClosedListeners, listener)
			case listener = <-self.removeListener:
				n := len(coreClosedListeners)
				for i, v := range coreClosedListeners {
					if v == listener {
						newListeners := make([]CoreClosedListener, 0, n-1)
						newListeners = append(newListeners, coreClosedListeners[0:i]...)
						newListeners = append(newListeners, coreClosedListeners[i+1:]...)
						coreClosedListeners = newListeners
						break
					}
				}
			case owner := <-self.notifyListener:
				log.Print("Shutting down SegmentCoreReaders...")
				isRunning = false
				for _, v := range coreClosedListeners {
					v.onClose(owner)
				}
			}
		}
		log.Print("Listeners are done.")
	}()

	success := false
	defer func() {
		if !success {
			log.Print("Failed to initialize SegmentCoreReaders.")
			self.decRef()
		}
	}()

	codec := si.info.Codec().(Codec)
	log.Print("Obtaining CFS Directory...")
	var cfsDir store.Directory // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.
	if si.info.IsCompoundFile() {
		log.Print("Detected CompoundFile.")
		name := util.SegmentFileName(si.info.Name, "", store.COMPOUND_FILE_EXTENSION)
		self.cfsReader, err = store.NewCompoundFileDirectory(dir, name, context, false)
		if err != nil {
			return self, err
		}
		log.Printf("CompoundFileDirectory: %v", self.cfsReader)
		cfsDir = self.cfsReader
	} else {
		cfsDir = dir
	}
	log.Printf("CFS Directory: %v", cfsDir)
	log.Print("Reading FieldInfos...")
	self.fieldInfos, err = codec.FieldInfosFormat().FieldInfosReader()(cfsDir, si.info.Name, store.IO_CONTEXT_READONCE)
	if err != nil {
		return self, err
	}

	self.termsIndexDivisor = termsIndexDivisor
	format := codec.PostingsFormat()

	log.Print("Obtaining SegmentReadState...")
	segmentReadState := newSegmentReadState(cfsDir, si.info, self.fieldInfos, context, termsIndexDivisor)
	// Ask codec for its Fields
	log.Print("Obtaining FieldsProducer...")
	self.fields, err = format.FieldsProducer(segmentReadState)
	if err != nil {
		return self, err
	}
	assert(self.fields != nil)
	// ask codec for its Norms:
	// TODO: since we don't write any norms file if there are no norms,
	// kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

	if self.fieldInfos.HasDocValues {
		log.Print("Obtaining DocValuesProducer...")
		self.dvProducer, err = codec.DocValuesFormat().FieldsProducer(segmentReadState)
		if err != nil {
			return self, err
		}
		assert(self.dvProducer != nil)
	} else {
		// self.dvProducer = nil
	}

	if self.fieldInfos.HasNorms {
		log.Print("Obtaining NormsDocValuesProducer...")
		self.normsProducer, err = codec.NormsFormat().NormsProducer(segmentReadState)
		if err != nil {
			return self, err
		}
		assert(self.normsProducer != nil)
	} else {
		// self.normsProducer = nil
	}

	log.Print("Obtaining StoredFieldsReader...")
	self.fieldsReaderOrig, err = si.info.Codec().(Codec).StoredFieldsFormat().FieldsReader(cfsDir, si.info, self.fieldInfos, context)
	if err != nil {
		return self, err
	}

	if self.fieldInfos.HasVectors { // open term vector files only as needed
		log.Print("Obtaining TermVectorsReader...")
		self.termVectorsReaderOrig, err = si.info.Codec().(Codec).TermVectorsFormat().VectorsReader(cfsDir, si.info, self.fieldInfos, context)
		if err != nil {
			return self, err
		}
	} else {
		// self.termVectorsReaderOrig = nil
	}

	log.Print("Success")
	success = true

	// Must assign this at the end -- if we hit an
	// exception above core, we don't want to attempt to
	// purge the FieldCache (will hit NPE because core is
	// not assigned yet).
	self.owner = owner
	return self, nil
}
示例#12
0
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode compressing.CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.IndexInput
	success := false
	defer func() {
		if !success {
			log.Println("Failed to initialize CompressionStoredFieldsReader.")
			if err != nil {
				log.Print(err)
			}
			util.Close(r, indexStream)
		}
	}()

	// Load the index into memory
	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	indexStream, err = d.OpenInput(indexStreamFN, ctx)
	if err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT)
	if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() {
		panic("assert fail")
	}
	r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si)
	if err != nil {
		return nil, err
	}
	err = indexStream.Close()
	if err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx)
	if err != nil {
		return nil, err
	}
	codecNameDat := formatName + CODEC_SFX_DAT
	codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT)
	if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() {
		panic("assert fail")
	}

	n, err := r.fieldsStream.ReadVInt()
	if err != nil {
		return nil, err
	}
	r.packedIntsVersion = int(n)
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	success = true
	return r, nil
}
示例#13
0
func NewBlockTreeTermsReader(dir store.Directory,
	fieldInfos FieldInfos, info *SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) {

	// log.Print("Initializing BlockTreeTermsReader...")
	fp := &BlockTreeTermsReader{
		postingsReader: postingsReader,
		fields:         make(map[string]FieldReader),
		segment:        info.Name,
	}
	fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, TERMS_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}

	success := false
	var indexIn store.IndexInput
	defer func() {
		if !success {
			fmt.Println("Failed to initialize BlockTreeTermsReader.")
			if err != nil {
				fmt.Println("DEBUG ", err)
			}
			// this.close() will close in:
			util.CloseWhileSuppressingError(indexIn, fp)
		}
	}()

	fp.version, err = fp.readHeader(fp.in)
	if err != nil {
		return nil, err
	}
	// log.Printf("Version: %v", fp.version)

	if indexDivisor != -1 {
		filename := util.SegmentFileName(info.Name, segmentSuffix, TERMS_INDEX_EXTENSION)
		indexIn, err = dir.OpenInput(filename, ctx)
		if err != nil {
			return nil, err
		}

		indexVersion, err := fp.readIndexHeader(indexIn)
		if err != nil {
			return nil, err
		}
		// log.Printf("Index version: %v", indexVersion)
		if int(indexVersion) != fp.version {
			return nil, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion))
		}
	}

	// verify
	if indexIn != nil && fp.version >= TERMS_VERSION_CURRENT {
		if _, err = store.ChecksumEntireFile(indexIn); err != nil {
			return nil, err
		}
	}

	// Have PostingsReader init itself
	postingsReader.Init(fp.in)

	if fp.version >= TERMS_VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but for now we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MAGIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(fp.in); err != nil {
			return nil, err
		}
	}

	// Read per-field details
	fp.seekDir(fp.in, fp.dirOffset)
	if indexDivisor != -1 {
		fp.seekDir(indexIn, fp.indexDirOffset)
	}

	numFields, err := fp.in.ReadVInt()
	if err != nil {
		return nil, err
	}
	// log.Printf("Fields number: %v", numFields)
	if numFields < 0 {
		return nil, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in))
	}

	for i := int32(0); i < numFields; i++ {
		// log.Printf("Next field...")
		field, err := fp.in.ReadVInt()
		if err != nil {
			return nil, err
		}
		// log.Printf("Field: %v", field)

		numTerms, err := fp.in.ReadVLong()
		if err != nil {
			return nil, err
		}
		assert2(numTerms > 0,
			"Illegal numTerms for field number: %v (resource=%v)", field, fp.in)
		// log.Printf("Terms number: %v", numTerms)

		numBytes, err := fp.in.ReadVInt()
		if err != nil {
			return nil, err
		}
		assert2(numBytes >= 0,
			"invalid rootCode for field number: %v, numBytes=%v (resource=%v)",
			field, numBytes, fp.in)
		// log.Printf("Bytes number: %v", numBytes)

		rootCode := make([]byte, numBytes)
		err = fp.in.ReadBytes(rootCode)
		if err != nil {
			return nil, err
		}
		fieldInfo := fieldInfos.FieldInfoByNumber(int(field))
		assert2(fieldInfo != nil, "invalid field numebr: %v (resource=%v)", field, fp.in)
		var sumTotalTermFreq int64
		if fieldInfo.IndexOptions() == INDEX_OPT_DOCS_ONLY {
			sumTotalTermFreq = -1
		} else {
			sumTotalTermFreq, err = fp.in.ReadVLong()
			if err != nil {
				return nil, err
			}
		}
		sumDocFreq, err := fp.in.ReadVLong()
		if err != nil {
			return nil, err
		}
		var docCount int
		if docCount, err = asInt(fp.in.ReadVInt()); err != nil {
			return nil, err
		}
		// fmt.Printf("DocCount: %v\n", docCount)
		var longsSize int
		if fp.version >= TERMS_VERSION_META_ARRAY {
			if longsSize, err = asInt(fp.in.ReadVInt()); err != nil {
				return nil, err
			}
		}
		assert2(longsSize >= 0,
			"invalid longsSize for field: %v, longsSize=%v (resource=%v)",
			fieldInfo.Name, longsSize, fp.in)
		var minTerm, maxTerm []byte
		if fp.version >= TERMS_VERSION_MIN_MAX_TERMS {
			if minTerm, err = readBytesRef(fp.in); err != nil {
				return nil, err
			}
			if maxTerm, err = readBytesRef(fp.in); err != nil {
				return nil, err
			}
		}
		if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs
			return nil, errors.New(fmt.Sprintf(
				"invalid docCount: %v maxDoc: %v (resource=%v)",
				docCount, info.DocCount(), fp.in))
		}
		if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field
			return nil, errors.New(fmt.Sprintf(
				"invalid sumDocFreq: %v docCount: %v (resource=%v)",
				sumDocFreq, docCount, fp.in))
		}
		if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings
			return nil, errors.New(fmt.Sprintf(
				"invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)",
				sumTotalTermFreq, sumDocFreq, fp.in))
		}

		var indexStartFP int64
		if indexDivisor != -1 {
			if indexStartFP, err = indexIn.ReadVLong(); err != nil {
				return nil, err
			}
		}
		// log.Printf("indexStartFP: %v", indexStartFP)
		if _, ok := fp.fields[fieldInfo.Name]; ok {
			return nil, errors.New(fmt.Sprintf(
				"duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in))
		}
		if fp.fields[fieldInfo.Name], err = newFieldReader(fp,
			fieldInfo, numTerms, rootCode, sumTotalTermFreq,
			sumDocFreq, docCount, indexStartFP, longsSize,
			indexIn, minTerm, maxTerm); err != nil {
			return nil, err
		}
	}

	if indexDivisor != -1 {
		if err = indexIn.Close(); err != nil {
			return nil, err
		}
	}

	success = true

	return fp, nil
}
示例#14
0
/* Creates a postings writer with the specified PackedInts overhead ratio */
func newLucene41PostingsWriter(state *SegmentWriteState,
	accetableOverheadRatio float32) (*Lucene41PostingsWriter, error) {
	docOut, err := state.Directory.CreateOutput(
		util.SegmentFileName(state.SegmentInfo.Name,
			state.SegmentSuffix,
			LUCENE41_DOC_EXTENSION),
		state.Context)
	if err != nil {
		return nil, err
	}

	ans := new(Lucene41PostingsWriter)
	if err = func() error {
		var posOut store.IndexOutput
		var payOut store.IndexOutput
		var success = false
		defer func() {
			if !success {
				util.CloseWhileSuppressingError(docOut, posOut, payOut)
			}
		}()

		err := codec.WriteHeader(docOut, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT)
		if err != nil {
			return err
		}
		ans.forUtil, err = NewForUtilInto(accetableOverheadRatio, docOut)
		if err != nil {
			return err
		}
		if state.FieldInfos.HasProx {
			ans.posDeltaBuffer = make([]int, MAX_DATA_SIZE)
			posOut, err = state.Directory.CreateOutput(util.SegmentFileName(
				state.SegmentInfo.Name, state.SegmentSuffix, LUCENE41_POS_EXTENSION),
				state.Context)
			if err != nil {
				return err
			}

			err = codec.WriteHeader(posOut, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT)
			if err != nil {
				return err
			}

			if state.FieldInfos.HasPayloads {
				ans.payloadBytes = make([]byte, 128)
				ans.payloadLengthBuffer = make([]int, MAX_DATA_SIZE)
			}

			if state.FieldInfos.HasOffsets {
				ans.offsetStartDeltaBuffer = make([]int, MAX_DATA_SIZE)
				ans.offsetLengthBuffer = make([]int, MAX_DATA_SIZE)
			}

			if state.FieldInfos.HasPayloads || state.FieldInfos.HasOffsets {
				payOut, err = state.Directory.CreateOutput(util.SegmentFileName(
					state.SegmentInfo.Name, state.SegmentSuffix, LUCENE41_PAY_EXTENSION),
					state.Context)
				if err != nil {
					return err
				}
				err = codec.WriteHeader(payOut, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT)
			}
		}
		ans.payOut, ans.posOut = payOut, posOut
		ans.docOut = docOut
		success = true
		return nil
	}(); err != nil {
		return nil, err
	}

	ans.docDeltaBuffer = make([]int, MAX_DATA_SIZE)
	ans.freqBuffer = make([]int, MAX_DATA_SIZE)
	ans.encoded = make([]byte, MAX_ENCODED_SIZE)

	// TODO: should we try skipping every 2/4 blocks...?
	ans.skipWriter = NewSkipWriter(
		maxSkipLevels,
		LUCENE41_BLOCK_SIZE,
		state.SegmentInfo.DocCount(),
		ans.docOut,
		ans.posOut,
		ans.payOut)

	return ans, nil
}
示例#15
0
const (
	LUCENE40_SI_EXTENSION    = "si"
	LUCENE40_CODEC_NAME      = "Lucene40SegmentInfo"
	LUCENE40_VERSION_START   = 0
	LUCENE40_VERSION_CURRENT = LUCENE40_VERSION_START

	SEGMENT_INFO_YES = 1
)

// lucene40/Lucene40SegmentInfoReader.java

var Lucene40SegmentInfoReader = func(dir store.Directory,
	segment string, context store.IOContext) (si *model.SegmentInfo, err error) {

	si = new(model.SegmentInfo)
	fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION)
	input, err := dir.OpenInput(fileName, context)
	if err != nil {
		return si, err
	}

	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	_, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT)
示例#16
0
		w: Lucene46FieldInfosWriter,
	}
}

func (f *Lucene46FieldInfosFormat) FieldInfosReader() FieldInfosReader {
	return f.r
}

func (f *Lucene46FieldInfosFormat) FieldInfosWriter() FieldInfosWriter {
	return f.w
}

var Lucene46FieldInfosReader = func(dir store.Directory,
	segment, suffix string, ctx store.IOContext) (fis FieldInfos, err error) {

	filename := util.SegmentFileName(segment, suffix, FI_EXTENSION)
	var input store.ChecksumIndexInput
	if input, err = dir.OpenChecksumInput(filename, ctx); err != nil {
		return
	}

	var success = false
	defer func() {
		if success {
			err = input.Close()
		} else {
			util.CloseWhileSuppressingError(input)
		}
	}()

	var codecVersion int
示例#17
0
/*
Create a new writer. The number of items (terms or sub-blocks) per
block will aim tobe between minItermsPerBlock and maxItemsPerBlock,
though in some cases, the blocks may be smaller than the min.
*/
func NewBlockTreeTermsWriter(state *SegmentWriteState,
	postingsWriter PostingsWriterBase,
	minItemsInBlock, maxItemsInBlock int) (*BlockTreeTermsWriter, error) {
	assert2(minItemsInBlock >= 2, "minItemsInBlock must be >= 2; got %v", minItemsInBlock)
	assert2(maxItemsInBlock >= 1, "maxItemsInBlock must be >= 1; got %v", maxItemsInBlock)
	assert2(minItemsInBlock <= maxItemsInBlock,
		"maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=%v minItemsInBlock=%v",
		maxItemsInBlock, minItemsInBlock)
	assert2(2*(minItemsInBlock-1) <= maxItemsInBlock,
		"maxItemsInBlock must be at least 2*(minItemsInBlock-1; got maxItemsInBlock=%v minItemsInBlock=%v",
		maxItemsInBlock, minItemsInBlock)

	ans := &BlockTreeTermsWriter{
		maxDoc:          state.SegmentInfo.DocCount(),
		fieldInfos:      state.FieldInfos,
		minItemsInBlock: minItemsInBlock,
		maxItemsInBlock: maxItemsInBlock,
		postingsWriter:  postingsWriter,
		segment:         state.SegmentInfo.Name,
		scratchBytes:    store.NewRAMOutputStreamBuffer(),
		scratchIntsRef:  util.NewIntsRefBuilder(),
		// bytesWriter:     store.NewRAMOutputStreamBuffer(),
		// bytesWriter2:    store.NewRAMOutputStreamBuffer(),
	}
	ans.spi = ans
	var out, indexOut store.IndexOutput
	if err := func() error {
		var success = false
		defer func() {
			if !success {
				util.CloseWhileSuppressingError(out, indexOut)
			}
		}()

		var err error
		termsFileName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_EXTENSION)
		if out, err = state.Directory.CreateOutput(termsFileName, state.Context); err != nil {
			return err
		}
		if err = ans.spi.WriteHeader(out); err != nil {
			return err
		}

		termsIndexFileName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION)
		if indexOut, err = state.Directory.CreateOutput(termsIndexFileName, state.Context); err != nil {
			return err
		}
		if err = ans.spi.WriteIndexHeader(indexOut); err != nil {
			return err
		}

		// have consumer write its format/header
		if err = postingsWriter.Init(out); err != nil {
			return err
		}
		success = true
		return nil
	}(); err != nil {
		return nil, err
	}
	ans.out = out
	ans.indexOut = indexOut
	return ans, nil
}
示例#18
0
func NewCompressingStoredFieldsWriter(dir store.Directory, si *model.SegmentInfo,
	segmentSuffix string, ctx store.IOContext, formatName string,
	compressionMode CompressionMode, chunkSize int) (*CompressingStoredFieldsWriter, error) {

	assert(dir != nil)
	ans := &CompressingStoredFieldsWriter{
		directory:       dir,
		segment:         si.Name,
		segmentSuffix:   segmentSuffix,
		compressionMode: compressionMode,
		compressor:      compressionMode.NewCompressor(),
		chunkSize:       chunkSize,
		docBase:         0,
		bufferedDocs:    newGrowableByteArrayDataOutput(chunkSize),
		numStoredFields: make([]int, 16),
		endOffsets:      make([]int, 16),
		numBufferedDocs: 0,
	}

	var success = false
	indexStream, err := dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix,
		lucene40.FIELDS_INDEX_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}
	assert(indexStream != nil)
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(indexStream)
			ans.Abort()
		}
	}()

	ans.fieldsStream, err = dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix,
		lucene40.FIELDS_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}

	codecNameIdx := formatName + CODEC_SFX_IDX
	codecNameDat := formatName + CODEC_SFX_DAT
	err = codec.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	err = codec.WriteHeader(ans.fieldsStream, codecNameDat, VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer())
	assert(int64(codec.HeaderLength(codecNameDat)) == ans.fieldsStream.FilePointer())

	ans.indexWriter, err = NewStoredFieldsIndexWriter(indexStream)
	if err != nil {
		return nil, err
	}
	assert(ans.indexWriter != nil)
	indexStream = nil

	err = ans.fieldsStream.WriteVInt(int32(chunkSize))
	if err != nil {
		return nil, err
	}
	err = ans.fieldsStream.WriteVInt(packed.VERSION_CURRENT)
	if err != nil {
		return nil, err
	}

	success = true
	return ans, nil
}
示例#19
0
func newSegmentCoreReaders(owner *SegmentReader, dir store.Directory, si *SegmentCommitInfo,
	context store.IOContext, termsIndexDivisor int) (self *SegmentCoreReaders, err error) {

	assert2(termsIndexDivisor != 0,
		"indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)")
	// fmt.Println("Initializing SegmentCoreReaders from directory:", dir)

	self = &SegmentCoreReaders{
		refCount: 1,
		normsLocal: func() map[string]interface{} {
			return make(map[string]interface{})
		},
	}
	self.fieldsReaderLocal = func() StoredFieldsReader {
		return self.fieldsReaderOrig.Clone()
	}

	// fmt.Println("Initializing listeners...")
	self.addListener = make(chan CoreClosedListener)
	self.removeListener = make(chan CoreClosedListener)
	self.notifyListener = make(chan bool)
	// TODO re-enable later
	go func() { // ensure listners are synchronized
		coreClosedListeners := make([]CoreClosedListener, 0)
		isRunning := true
		var listener CoreClosedListener
		for isRunning {
			// fmt.Println("Listening for events...")
			select {
			case listener = <-self.addListener:
				coreClosedListeners = append(coreClosedListeners, listener)
			case listener = <-self.removeListener:
				n := len(coreClosedListeners)
				for i, v := range coreClosedListeners {
					if v == listener {
						newListeners := make([]CoreClosedListener, 0, n-1)
						newListeners = append(newListeners, coreClosedListeners[0:i]...)
						newListeners = append(newListeners, coreClosedListeners[i+1:]...)
						coreClosedListeners = newListeners
						break
					}
				}
			case <-self.notifyListener:
				fmt.Println("Shutting down SegmentCoreReaders...")
				isRunning = false
				for _, v := range coreClosedListeners {
					v.onClose(self)
				}
			}
		}
		fmt.Println("Listeners are done.")
	}()

	var success = false
	ans := self
	defer func() {
		if !success {
			fmt.Println("Failed to initialize SegmentCoreReaders.")
			ans.decRef()
		}
	}()

	codec := si.Info.Codec().(Codec)
	// fmt.Println("Obtaining CFS Directory...")
	var cfsDir store.Directory // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.
	if si.Info.IsCompoundFile() {
		// fmt.Println("Detected CompoundFile.")
		name := util.SegmentFileName(si.Info.Name, "", store.COMPOUND_FILE_EXTENSION)
		if self.cfsReader, err = store.NewCompoundFileDirectory(dir, name, context, false); err != nil {
			return nil, err
		}
		// fmt.Println("CompoundFileDirectory: ", self.cfsReader)
		cfsDir = self.cfsReader
	} else {
		cfsDir = dir
	}
	// fmt.Println("CFS Directory:", cfsDir)

	// fmt.Println("Reading FieldInfos...")
	fieldInfos := owner.fieldInfos

	self.termsIndexDivisor = termsIndexDivisor
	format := codec.PostingsFormat()

	// fmt.Println("Obtaining SegmentReadState...")
	segmentReadState := NewSegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor)
	// Ask codec for its Fields
	// fmt.Println("Obtaining FieldsProducer...")
	if self.fields, err = format.FieldsProducer(segmentReadState); err != nil {
		return nil, err
	}
	assert(self.fields != nil)
	// ask codec for its Norms:
	// TODO: since we don't write any norms file if there are no norms,
	// kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

	if fieldInfos.HasNorms {
		// fmt.Println("Obtaining NormsDocValuesProducer...")
		if self.normsProducer, err = codec.NormsFormat().NormsProducer(segmentReadState); err != nil {
			return nil, err
		}
		assert(self.normsProducer != nil)
	}

	// fmt.Println("Obtaining StoredFieldsReader...")
	if self.fieldsReaderOrig, err = si.Info.Codec().(Codec).StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context); err != nil {
		return nil, err
	}

	if fieldInfos.HasVectors { // open term vector files only as needed
		// fmt.Println("Obtaining TermVectorsReader...")
		if self.termVectorsReaderOrig, err = si.Info.Codec().(Codec).TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context); err != nil {
			return nil, err
		}
	}

	// fmt.Println("Success")
	success = true

	return self, nil
}
示例#20
0
func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) {
	var stream IndexInput = nil
	var entriesStream ChecksumIndexInput = nil
	// read the first VInt. If it is negative, it's the version number
	// otherwise it's the count (pre-3.1 indexes)
	var success = false
	defer func() {
		if success {
			err = util.Close(stream, entriesStream)
		} else {
			util.CloseWhileSuppressingError(stream, entriesStream)
		}
	}()

	stream = handle.Clone()
	// fmt.Printf("Reading from stream: %v\n", stream)
	firstInt, err := stream.ReadVInt()
	if err != nil {
		return nil, err
	}
	// impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible
	// and separate norms/etc are outside of cfs.
	if firstInt == int32(CODEC_MAGIC_BYTE1) {
		if secondByte, err := stream.ReadByte(); err == nil {
			if thirdByte, err := stream.ReadByte(); err == nil {
				if fourthByte, err := stream.ReadByte(); err == nil {
					if secondByte != CODEC_MAGIC_BYTE2 ||
						thirdByte != CODEC_MAGIC_BYTE3 ||
						fourthByte != CODEC_MAGIC_BYTE4 {
						return nil, errors.New(fmt.Sprintf(
							"Illegal/impossible header for CFS file: %v,%v,%v",
							secondByte, thirdByte, fourthByte))
					}
				}
			}
		}
		if err != nil {
			return nil, err
		}

		d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT))
		if err != nil {
			return nil, err
		}
		entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION)
		entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE)
		if err != nil {
			return nil, err
		}
		_, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)
		if err != nil {
			return nil, err
		}
		numEntries, err := entriesStream.ReadVInt()
		if err != nil {
			return nil, err
		}

		mapping = make(map[string]FileSlice)
		// fmt.Printf("Entries number: %v\n", numEntries)
		for i := int32(0); i < numEntries; i++ {
			id, err := entriesStream.ReadString()
			if err != nil {
				return nil, err
			}
			if _, ok := mapping[id]; ok {
				return nil, errors.New(fmt.Sprintf(
					"Duplicate cfs entry id=%v in CFS: %v", id, entriesStream))
			}
			// log.Printf("Found entry: %v", id)
			offset, err := entriesStream.ReadLong()
			if err != nil {
				return nil, err
			}
			length, err := entriesStream.ReadLong()
			if err != nil {
				return nil, err
			}
			mapping[id] = FileSlice{offset, length}
		}
		if d.version >= CFD_VERSION_CHECKSUM {
			_, err = codec.CheckFooter(entriesStream)
		} else {
			err = codec.CheckEOF(entriesStream)
		}
		if err != nil {
			return nil, err
		}
	} else {
		// TODO remove once 3.x is not supported anymore
		panic("not supported yet; will also be obsolete soon")
	}
	success = true
	return mapping, nil
}
示例#21
0
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.ChecksumIndexInput
	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(r, indexStream)
		}
	}()

	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	// Load the index into memory
	if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx,
		VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer())
	if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil {
		return nil, err
	}

	var maxPointer int64 = -1

	if r.version >= VERSION_CHECKSUM {
		if maxPointer, err = indexStream.ReadVLong(); err != nil {
			return nil, err
		}
		if _, err = codec.CheckFooter(indexStream); err != nil {
			return nil, err
		}
	} else {
		if err = codec.CheckEOF(indexStream); err != nil {
			return nil, err
		}
	}

	if err = indexStream.Close(); err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil {
		return nil, err
	}
	if r.version >= VERSION_CHECKSUM {
		if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() {
			return nil, errors.New(fmt.Sprintf(
				"Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v",
				maxPointer, r.fieldsStream.Length()))
		}
	} else {
		maxPointer = r.fieldsStream.Length()
	}
	r.maxPointer = maxPointer
	codecNameDat := formatName + CODEC_SFX_DAT
	var fieldsVersion int
	if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream,
		codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert2(r.version == fieldsVersion,
		"Version mismatch between stored fields index and data: %v != %v",
		r.version, fieldsVersion)
	assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer())

	r.chunkSize = -1
	if r.version >= VERSION_BIG_CHUNKS {
		if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return nil, err
		}
	}

	if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
		return nil, err
	}
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	if r.version >= VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but fo rnow we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MATIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil {
			return nil, err
		}
	}

	success = true
	return r, nil
}
示例#22
0
func NewLucene41PostingsReader(dir store.Directory,
	fis FieldInfos, si *SegmentInfo,
	ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) {

	// fmt.Println("Initializing Lucene41PostingsReader...")
	success := false
	var docIn, posIn, payIn store.IndexInput = nil, nil, nil
	defer func() {
		if !success {
			fmt.Println("Failed to initialize Lucene41PostingsReader.")
			util.CloseWhileSuppressingError(docIn, posIn, payIn)
		}
	}()

	docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}
	var version int32
	version, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	forUtil, err := NewForUtilFrom(docIn)
	if err != nil {
		return nil, err
	}

	if version >= LUCENE41_VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but for now we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MAGIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(docIn); err != nil {
			return nil, err
		}
	}

	if fis.HasProx {
		posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx)
		if err != nil {
			return nil, err
		}
		_, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, version, version)
		if err != nil {
			return nil, err
		}

		if version >= LUCENE41_VERSION_CHECKSUM {
			// NOTE: data file is too costly to verify checksum against all the
			// bytes on open, but for now we at least verify proper structure
			// of the checksum footer: which looks for FOOTER_MAGIC +
			// algorithmID. This is cheap and can detect some forms of
			// corruption such as file trucation.
			if _, err = codec.RetrieveChecksum(posIn); err != nil {
				return nil, err
			}
		}

		if fis.HasPayloads || fis.HasOffsets {
			payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx)
			if err != nil {
				return nil, err
			}
			_, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, version, version)
			if err != nil {
				return nil, err
			}

			if version >= LUCENE41_VERSION_CHECKSUM {
				// NOTE: data file is too costly to verify checksum against all the
				// bytes on open, but for now we at least verify proper structure
				// of the checksum footer: which looks for FOOTER_MAGIC +
				// algorithmID. This is cheap and can detect some forms of
				// corruption such as file trucation.
				if _, err = codec.RetrieveChecksum(payIn); err != nil {
					return nil, err
				}

			}
		}
	}

	success = true
	return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil, int(version)}, nil
}
示例#23
0
func newBlockTreeTermsReader(dir store.Directory,
	fieldInfos model.FieldInfos, info *model.SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) {

	log.Print("Initializing BlockTreeTermsReader...")
	fp := &BlockTreeTermsReader{
		postingsReader: postingsReader,
		fields:         make(map[string]FieldReader),
		segment:        info.Name,
	}
	fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_EXTENSION), ctx)
	if err != nil {
		return fp, err
	}

	success := false
	var indexIn store.IndexInput
	defer func() {
		if !success {
			log.Print("Failed to initialize BlockTreeTermsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			// this.close() will close in:
			util.CloseWhileSuppressingError(indexIn, fp)
		}
	}()

	fp.version, err = fp.readHeader(fp.in)
	if err != nil {
		return fp, err
	}
	log.Printf("Version: %v", fp.version)

	if indexDivisor != -1 {
		indexIn, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_INDEX_EXTENSION), ctx)
		if err != nil {
			return fp, err
		}

		indexVersion, err := fp.readIndexHeader(indexIn)
		if err != nil {
			return fp, err
		}
		log.Printf("Index version: %v", indexVersion)
		if int(indexVersion) != fp.version {
			return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion))
		}
	}

	// Have PostingsReader init itself
	postingsReader.Init(fp.in)

	// Read per-field details
	fp.seekDir(fp.in, fp.dirOffset)
	if indexDivisor != -1 {
		fp.seekDir(indexIn, fp.indexDirOffset)
	}

	numFields, err := fp.in.ReadVInt()
	if err != nil {
		return fp, err
	}
	log.Printf("Fields number: %v", numFields)
	if numFields < 0 {
		return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in))
	}

	for i := int32(0); i < numFields; i++ {
		log.Printf("Next field...")
		field, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Field: %v", field)

		numTerms, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		// assert numTerms >= 0
		log.Printf("Terms number: %v", numTerms)

		numBytes, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Bytes number: %v", numBytes)

		rootCode := make([]byte, numBytes)
		err = fp.in.ReadBytes(rootCode)
		if err != nil {
			return fp, err
		}
		fieldInfo := fieldInfos.FieldInfoByNumber(int(field))
		// assert fieldInfo != nil
		var sumTotalTermFreq int64
		if fieldInfo.IndexOptions() == model.INDEX_OPT_DOCS_ONLY {
			sumTotalTermFreq = -1
		} else {
			sumTotalTermFreq, err = fp.in.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		sumDocFreq, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		docCount, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("DocCount: %v", docCount)
		if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs
			return fp, errors.New(fmt.Sprintf(
				"invalid docCount: %v maxDoc: %v (resource=%v)",
				docCount, info.DocCount(), fp.in))
		}
		if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field
			return fp, errors.New(fmt.Sprintf(
				"invalid sumDocFreq: %v docCount: %v (resource=%v)",
				sumDocFreq, docCount, fp.in))
		}
		if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings
			return fp, errors.New(fmt.Sprintf(
				"invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)",
				sumTotalTermFreq, sumDocFreq, fp.in))
		}

		var indexStartFP int64
		if indexDivisor != -1 {
			indexStartFP, err = indexIn.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		log.Printf("indexStartFP: %v", indexStartFP)
		if _, ok := fp.fields[fieldInfo.Name]; ok {
			return fp, errors.New(fmt.Sprintf(
				"duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in))
		}
		fp.fields[fieldInfo.Name], err = newFieldReader(fp,
			fieldInfo, numTerms, rootCode, sumTotalTermFreq,
			sumDocFreq, docCount, indexStartFP, indexIn)
		if err != nil {
			return fp, err
		}
		log.Print("DEBUG field processed.")
	}

	if indexDivisor != -1 {
		err = indexIn.Close()
		if err != nil {
			return fp, err
		}
	}

	success = true

	return fp, nil
}
示例#24
0
func (r *Lucene40SegmentInfoReader) Read(dir store.Directory,
	segment string, context store.IOContext) (si *SegmentInfo, err error) {

	si = new(SegmentInfo)
	fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION)
	input, err := dir.OpenInput(fileName, context)
	if err != nil {
		return nil, err
	}

	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	_, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	versionStr, err := input.ReadString()
	if err != nil {
		return nil, err
	}
	version, err := util.ParseVersion(versionStr)
	if err != nil {
		return nil, err
	}

	docCount, err := input.ReadInt()
	if err != nil {
		return nil, err
	}
	if docCount < 0 {
		return nil, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input))
	}
	sicf, err := input.ReadByte()
	if err != nil {
		return nil, err
	}
	isCompoundFile := (sicf == SEGMENT_INFO_YES)
	diagnostics, err := input.ReadStringStringMap()
	if err != nil {
		return nil, err
	}
	_, err = input.ReadStringStringMap() // read deprecated attributes
	if err != nil {
		return nil, err
	}
	files, err := input.ReadStringSet()
	if err != nil {
		return nil, err
	}

	if err = codec.CheckEOF(input); err != nil {
		return nil, err
	}

	si = NewSegmentInfo(dir, version, segment, int(docCount), isCompoundFile, nil, diagnostics)
	si.SetFiles(files)

	success = true
	return si, nil
}