func NewLucene41PostingsReader(dir store.Directory, fis FieldInfos, si SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { log.Print("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { log.Print("Failed to initialize Lucene41PostingsReader.") if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } forUtil, err := NewForUtil(docIn) if err != nil { return r, err } if fis.hasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } if fis.hasPayloads || fis.hasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil }
func newLucene42DocValuesProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) { dvp = &Lucene42DocValuesProducer{} dvp.maxDoc = int(state.segmentInfo.docCount) metaName := util.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension) // read in the entries from the metadata file. in, err := state.dir.OpenInput(metaName, state.context) if err != nil { return dvp, err } success := false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } dvp.numerics = make(map[int]NumericEntry) dvp.binaries = make(map[int]BinaryEntry) dvp.fsts = make(map[int]FSTEntry) err = dvp.readFields(in, state.fieldInfos) if err != nil { return dvp, err } success = true success = false dataName := util.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension) dvp.data, err = state.dir.OpenInput(dataName, state.context) if err != nil { return dvp, err } version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } if version != version2 { return dvp, errors.New("Format versions mismatch") } return dvp, nil }
func TestReadingFromSlicedIndexInputOSX(t *testing.T) { path := "../search/testdata/osx/belfrysample" d, err := OpenFSDirectory(path) if err != nil { t.Error(err) } ctx := NewIOContextBool(false) cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false) name := util.SegmentFileName("_0", "Lucene41_0", "pos") posIn, err := cd.OpenInput(name, ctx) if err != nil { t.Error(err) } t.Log(posIn) codec.CheckHeader(posIn, "Lucene41PostingsWriterPos", 0, 0) // codec header mismatch: actual header=0 vs expected header=1071082519 (resource: SlicedIndexInput(SlicedIndexInput(_0_Lucene41_0.pos in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs')) in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs') slice=1461:3426)) }
func newSegmentCoreReaders(owner *SegmentReader, dir store.Directory, si SegmentInfoPerCommit, context store.IOContext, termsIndexDivisor int) (self SegmentCoreReaders, err error) { if termsIndexDivisor == 0 { panic("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)") } log.Printf("Initializing SegmentCoreReaders from directory: %v", dir) self = SegmentCoreReaders{refCount: 1} log.Print("Initializing listeners...") self.addListener = make(chan CoreClosedListener) self.removeListener = make(chan CoreClosedListener) self.notifyListener = make(chan *SegmentReader) // TODO re-enable later go func() { // ensure listners are synchronized coreClosedListeners := make([]CoreClosedListener, 0) isRunning := true var listener CoreClosedListener for isRunning { log.Print("Listening for events...") select { case listener = <-self.addListener: coreClosedListeners = append(coreClosedListeners, listener) case listener = <-self.removeListener: n := len(coreClosedListeners) for i, v := range coreClosedListeners { if v == listener { newListeners := make([]CoreClosedListener, 0, n-1) newListeners = append(newListeners, coreClosedListeners[0:i]...) newListeners = append(newListeners, coreClosedListeners[i+1:]...) coreClosedListeners = newListeners break } } case owner := <-self.notifyListener: log.Print("Shutting down SegmentCoreReaders...") isRunning = false for _, v := range coreClosedListeners { v.onClose(owner) } } } log.Print("Listeners are done.") }() success := false defer func() { if !success { log.Print("Failed to initialize SegmentCoreReaders.") self.decRef() } }() codec := si.info.codec log.Print("Obtaining CFS Directory...") var cfsDir store.Directory // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. if si.info.isCompoundFile { log.Print("Detected CompoundFile.") name := util.SegmentFileName(si.info.name, "", store.COMPOUND_FILE_EXTENSION) self.cfsReader, err = store.NewCompoundFileDirectory(dir, name, context, false) if err != nil { return self, err } log.Printf("CompoundFileDirectory: %v", self.cfsReader) cfsDir = self.cfsReader } else { cfsDir = dir } log.Printf("CFS Directory: %v", cfsDir) log.Print("Reading FieldInfos...") self.fieldInfos, err = codec.ReadFieldInfos(cfsDir, si.info.name, store.IO_CONTEXT_READONCE) if err != nil { return self, err } self.termsIndexDivisor = termsIndexDivisor log.Print("Obtaining SegmentReadState...") segmentReadState := newSegmentReadState(cfsDir, si.info, self.fieldInfos, context, termsIndexDivisor) // Ask codec for its Fields log.Print("Obtaining FieldsProducer...") self.fields, err = codec.GetFieldsProducer(segmentReadState) if err != nil { return self, err } // assert fields != null; // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if self.fieldInfos.hasDocValues { log.Print("Obtaining DocValuesProducer...") self.dvProducer, err = codec.GetDocValuesProducer(segmentReadState) if err != nil { return self, err } // assert dvProducer != null; } else { // self.dvProducer = nil } if self.fieldInfos.hasNorms { log.Print("Obtaining NormsDocValuesProducer...") self.normsProducer, err = codec.GetNormsDocValuesProducer(segmentReadState) if err != nil { return self, err } // assert normsProducer != null; } else { // self.normsProducer = nil } log.Print("Obtaining StoredFieldsReader...") self.fieldsReaderOrig, err = si.info.codec.GetStoredFieldsReader(cfsDir, si.info, self.fieldInfos, context) if err != nil { return self, err } if self.fieldInfos.hasVectors { // open term vector files only as needed log.Print("Obtaining TermVectorsReader...") self.termVectorsReaderOrig, err = si.info.codec.GetTermVectorsReader(cfsDir, si.info, self.fieldInfos, context) if err != nil { return self, err } } else { // self.termVectorsReaderOrig = nil } log.Print("Success") success = true // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). self.owner = owner return self, nil }
// Field flags LUCENE42_FI_IS_INDEXED = 0x1 LUCENE42_FI_STORE_TERMVECTOR = 0x2 LUCENE42_FI_STORE_OFFSETS_IN_POSTINGS = 0x4 LUCENE42_FI_OMIT_NORMS = 0x10 LUCENE42_FI_STORE_PAYLOADS = 0x20 LUCENE42_FI_OMIT_TERM_FREQ_AND_POSITIONS = 0x40 LUCENE42_FI_OMIT_POSITIONS = 0x80 ) var ( Lucene42FieldInfosReader = func(dir store.Directory, segment string, context store.IOContext) (fi FieldInfos, err error) { log.Printf("Reading FieldInfos from %v...", dir) fi = FieldInfos{} fileName := util.SegmentFileName(segment, "", LUCENE42_FI_EXTENSION) log.Printf("Segment: %v", fileName) input, err := dir.OpenInput(fileName, context) if err != nil { return fi, err } log.Printf("Reading %v", input) success := false defer func() { if success { input.Close() } else { util.CloseWhileHandlingError(err, input) } }()
// CompressingStoredFieldsReader.java L90 func newCompressingStoredFieldsReader(d store.Directory, si SegmentInfo, segmentSuffix string, fn FieldInfos, ctx store.IOContext, formatName string, compressionMode codec.CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.name r.fieldInfos = fn r.numDocs = int(si.docCount) var indexStream store.IndexInput success := false defer func() { if !success { log.Println("Failed to initialize CompressionStoredFieldsReader.") if err != nil { log.Print(err) } util.Close(r, indexStream) } }() // Load the index into memory indexStreamFN := util.SegmentFileName(segment, segmentSuffix, LUCENE40_SF_FIELDS_INDEX_EXTENSION) indexStream, err = d.OpenInput(indexStreamFN, ctx) if err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() { panic("assert fail") } r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si) if err != nil { return nil, err } err = indexStream.Close() if err != nil { return nil, err } indexStream = nil // Open the data file and read metadata fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, LUCENE40_SF_FIELDS_EXTENSION) r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx) if err != nil { return nil, err } codecNameDat := formatName + CODEC_SFX_DAT codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() { panic("assert fail") } n, err := r.fieldsStream.ReadVInt() if err != nil { return nil, err } r.packedIntsVersion = int(n) r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) success = true return r, nil }
func readEntries(handle IndexInputSlicer, dir Directory, name string) (mapping map[string]FileEntry, err error) { var stream, entriesStream IndexInput = nil, nil defer func() { err = util.CloseWhileHandlingError(err, stream, entriesStream) }() // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) mapping = make(map[string]FileEntry) stream = handle.openFullSlice() log.Printf("Reading from stream: %v", stream) firstInt, err := stream.ReadVInt() if err != nil { return mapping, err } // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if firstInt == int32(CODEC_MAGIC_BYTE1) { if secondByte, err := stream.ReadByte(); err == nil { if thirdByte, err := stream.ReadByte(); err == nil { if fourthByte, err := stream.ReadByte(); err == nil { if secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4 { return mapping, errors.New(fmt.Sprintf( "Illegal/impossible header for CFS file: %v,%v,%v", secondByte, thirdByte, fourthByte)) } } } } if err != nil { return mapping, err } _, err = codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_START) if err != nil { return mapping, err } entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION) entriesStream, err = dir.OpenInput(entriesFileName, IO_CONTEXT_READONCE) if err != nil { return mapping, err } _, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_START) if err != nil { return mapping, err } numEntries, err := entriesStream.ReadVInt() if err != nil { return mapping, err } log.Printf("Entries number: %v", numEntries) for i := int32(0); i < numEntries; i++ { id, err := entriesStream.ReadString() if err != nil { return mapping, err } if _, ok := mapping[id]; ok { return mapping, errors.New(fmt.Sprintf( "Duplicate cfs entry id=%v in CFS: %v", id, entriesStream)) } log.Printf("Found entry: %v", id) offset, err := entriesStream.ReadLong() if err != nil { return mapping, err } length, err := entriesStream.ReadLong() if err != nil { return mapping, err } mapping[id] = FileEntry{offset, length} } } else { // TODO remove once 3.x is not supported anymore panic("not supported yet; will also be obsolete soon") } return mapping, nil }
func newBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_EXTENSION), ctx) if err != nil { return fp, err } success := false var indexIn store.IndexInput defer func() { if !success { log.Print("Failed to initialize BlockTreeTermsReader.") if err != nil { log.Print("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return fp, err } log.Printf("Version: %v", fp.version) if indexDivisor != -1 { indexIn, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_INDEX_EXTENSION), ctx) if err != nil { return fp, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return fp, err } log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // Have PostingsReader init itself postingsReader.Init(fp.in) // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Fields number: %v", numFields) if numFields < 0 { return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return fp, err } // assert numTerms >= 0 log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return fp, err } fieldInfo := fieldInfos.byNumber[field] // assert fieldInfo != nil var sumTotalTermFreq int64 if fieldInfo.indexOptions == INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return fp, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return fp, err } docCount, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("DocCount: %v", docCount) if docCount < 0 || docCount > info.docCount { // #docs with field must be <= #docs return fp, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.docCount, fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return fp, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return fp, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { indexStartFP, err = indexIn.ReadVLong() if err != nil { return fp, err } } log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.name]; ok { return fp, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.name, fp.in)) } fp.fields[fieldInfo.name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn) if err != nil { return fp, err } log.Print("DEBUG field processed.") } if indexDivisor != -1 { err = indexIn.Close() if err != nil { return fp, err } } success = true return fp, nil }