func newLucene49NormsConsumer(state *SegmentWriteState, dataCodec, dataExtension, metaCodec, metaExtension string) (nc *NormsConsumer, err error) { assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(1)) assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(2)) assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(4)) nc = &NormsConsumer{maxDoc: state.SegmentInfo.DocCount()} var success = false defer func() { if !success { util.CloseWhileSuppressingError(nc) } }() dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension) if nc.data, err = state.Directory.CreateOutput(dataName, state.Context); err != nil { return nil, err } if err = codec.WriteHeader(nc.data, dataCodec, VERSION_CURRENT); err != nil { return nil, err } metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension) if nc.meta, err = state.Directory.CreateOutput(metaName, state.Context); err != nil { return nil, err } if err = codec.WriteHeader(nc.meta, metaCodec, VERSION_CURRENT); err != nil { return nil, err } success = true return nc, nil }
func (w *CompressingStoredFieldsWriter) Abort() { assert(w != nil) util.CloseWhileSuppressingError(w) util.DeleteFilesIgnoringErrors(w.directory, util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_EXTENSION), util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)) }
func (w *CompressingStoredFieldsWriter) Abort() { if w == nil { // tolerate early released pointer return } util.CloseWhileSuppressingError(w) util.DeleteFilesIgnoringErrors(w.directory, util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_EXTENSION), util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)) }
func NewLucene41PostingsReader(dir store.Directory, fis model.FieldInfos, si *model.SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { log.Print("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { log.Print("Failed to initialize Lucene41PostingsReader.") if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } forUtil, err := NewForUtil(docIn) if err != nil { return r, err } if fis.HasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } if fis.HasPayloads || fis.HasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil }
func newLucene42DocValuesProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) { dvp = &Lucene42DocValuesProducer{ numericInstances: make(map[int]NumericDocValues), } dvp.maxDoc = state.segmentInfo.DocCount() metaName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, metaExtension) // read in the entries from the metadata file. in, err := state.dir.OpenInput(metaName, state.context) if err != nil { return dvp, err } success := false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } dvp.numerics = make(map[int]NumericEntry) dvp.binaries = make(map[int]BinaryEntry) dvp.fsts = make(map[int]FSTEntry) err = dvp.readFields(in) if err != nil { return dvp, err } success = true success = false dataName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, dataExtension) dvp.data, err = state.dir.OpenInput(dataName, state.context) if err != nil { return dvp, err } version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } if version != version2 { return dvp, errors.New("Format versions mismatch") } return dvp, nil }
/* Reads the most recent FieldInfos of the given segment info. */ func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error) { var dir store.Directory var closeDir bool if info.FieldInfosGen() == -1 && info.Info.IsCompoundFile() { // no fieldInfos gen and segment uses a compound file if dir, err = store.NewCompoundFileDirectory(info.Info.Dir, util.SegmentFileName(info.Info.Name, "", store.COMPOUND_FILE_EXTENSION), store.IO_CONTEXT_READONCE, false); err != nil { return } closeDir = true } else { // gen'd FIS are read outside CFS, or the segment doesn't use a compound file dir = info.Info.Dir closeDir = false } defer func() { if closeDir { err = mergeError(err, dir.Close()) } }() var segmentSuffix string if n := info.FieldInfosGen(); n != -1 { segmentSuffix = strconv.FormatInt(n, 36) } codec := info.Info.Codec().(Codec) fisFormat := codec.FieldInfosFormat() return fisFormat.FieldInfosReader()(dir, info.Info.Name, segmentSuffix, store.IO_CONTEXT_READONCE) }
/* Create the compound stream in the specified file. The filename is the entire name (no extensions are added). */ func newCompoundFileWriter(dir Directory, name string) *CompoundFileWriter { assert2(dir != nil, "directory cannot be nil") assert2(name != "", "name cannot be empty") return &CompoundFileWriter{ Locker: &sync.Mutex{}, directory: dir, entries: make(map[string]*FileEntry), seenIDs: make(map[string]bool), pendingEntries: list.New(), outputTaken: NewAtomicBool(), entryTableName: util.SegmentFileName( util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION, ), dataFileName: name, } }
func TestReadingFromSlicedIndexInputOSX(t *testing.T) { t.Logf("TestReadingFromSlicedIndexInputOSX...") path := "../search/testdata/osx/belfrysample" d, err := OpenFSDirectory(path) if err != nil { t.Error(err) } ctx := NewIOContextBool(false) cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false) name := util.SegmentFileName("_0", "Lucene41_0", "pos") posIn, err := cd.OpenInput(name, ctx) if err != nil { t.Error(err) } t.Log(posIn) codec.CheckHeader(posIn, "Lucene41PostingsWriterPos", 0, 0) // codec header mismatch: actual header=0 vs expected header=1071082519 (resource: SlicedIndexInput(SlicedIndexInput(_0_Lucene41_0.pos in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs')) in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs') slice=1461:3426)) }
func (w *Lucene40SegmentInfoWriter) Write(dir store.Directory, si *SegmentInfo, fis FieldInfos, ctx store.IOContext) (err error) { filename := util.SegmentFileName(si.Name, "", LUCENE40_SI_EXTENSION) si.AddFile(filename) var output store.IndexOutput output, err = dir.CreateOutput(filename, ctx) if err != nil { return err } var success = false defer func() { if !success { util.CloseWhileSuppressingError(output) si.Dir.DeleteFile(filename) // ignore error } else { err = mergeError(err, output.Close()) } }() err = codec.WriteHeader(output, LUCENE40_CODEC_NAME, LUCENE40_VERSION_CURRENT) if err != nil { return err } // Write the Lucene version that created this segment, since 3.1 err = store.Stream(output).WriteString(si.Version().String()). WriteInt(int32(si.DocCount())). WriteByte(func() byte { if si.IsCompoundFile() { return SEGMENT_INFO_YES } return byte((SEGMENT_INFO_NO + 256) % 256) // Go byte is non-negative, unlike Java }()).WriteStringStringMap(si.Diagnostics()). WriteStringStringMap(map[string]string{}). WriteStringSet(si.Files()).Close() if err != nil { return err } success = true return nil }
func newLucene49NormsProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (np *NormsProducer, err error) { np = &NormsProducer{ Locker: new(sync.Mutex), norms: make(map[int]*NormsEntry), instances: make(map[int]NumericDocValues), maxDoc: state.SegmentInfo.DocCount(), ramBytesUsed: util.ShallowSizeOfInstance(reflect.TypeOf(np)), } metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension) // read in the entries from the metadta file. var in store.ChecksumIndexInput if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil { return nil, err } if err = func() error { var success = false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() if np.version, err = codec.CheckHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); err != nil { return err } if err = np.readFields(in, state.FieldInfos); err != nil { return err } if _, err = codec.CheckFooter(in); err != nil { return err } success = true return nil }(); err != nil { return nil, err } dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension) if np.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil { return nil, err } var success = false defer func() { if !success { util.CloseWhileSuppressingError(np.data) } }() var version2 int32 if version2, err = codec.CheckHeader(np.data, dataCodec, VERSION_START, VERSION_CURRENT); err != nil { return nil, err } if version2 != np.version { return nil, errors.New("Format versions mismatch") } // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(np.data); err != nil { return nil, err } success = true return np, nil }
func newSegmentCoreReaders(owner *SegmentReader, dir store.Directory, si *SegmentInfoPerCommit, context store.IOContext, termsIndexDivisor int) (self SegmentCoreReaders, err error) { if termsIndexDivisor == 0 { panic("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)") } log.Printf("Initializing SegmentCoreReaders from directory: %v", dir) self = SegmentCoreReaders{ refCount: 1, normsLocal: func() map[string]interface{} { return make(map[string]interface{}) }, } self.fieldsReaderLocal = func() StoredFieldsReader { return self.fieldsReaderOrig.Clone() } log.Print("Initializing listeners...") self.addListener = make(chan CoreClosedListener) self.removeListener = make(chan CoreClosedListener) self.notifyListener = make(chan *SegmentReader) // TODO re-enable later go func() { // ensure listners are synchronized coreClosedListeners := make([]CoreClosedListener, 0) isRunning := true var listener CoreClosedListener for isRunning { log.Print("Listening for events...") select { case listener = <-self.addListener: coreClosedListeners = append(coreClosedListeners, listener) case listener = <-self.removeListener: n := len(coreClosedListeners) for i, v := range coreClosedListeners { if v == listener { newListeners := make([]CoreClosedListener, 0, n-1) newListeners = append(newListeners, coreClosedListeners[0:i]...) newListeners = append(newListeners, coreClosedListeners[i+1:]...) coreClosedListeners = newListeners break } } case owner := <-self.notifyListener: log.Print("Shutting down SegmentCoreReaders...") isRunning = false for _, v := range coreClosedListeners { v.onClose(owner) } } } log.Print("Listeners are done.") }() success := false defer func() { if !success { log.Print("Failed to initialize SegmentCoreReaders.") self.decRef() } }() codec := si.info.Codec().(Codec) log.Print("Obtaining CFS Directory...") var cfsDir store.Directory // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. if si.info.IsCompoundFile() { log.Print("Detected CompoundFile.") name := util.SegmentFileName(si.info.Name, "", store.COMPOUND_FILE_EXTENSION) self.cfsReader, err = store.NewCompoundFileDirectory(dir, name, context, false) if err != nil { return self, err } log.Printf("CompoundFileDirectory: %v", self.cfsReader) cfsDir = self.cfsReader } else { cfsDir = dir } log.Printf("CFS Directory: %v", cfsDir) log.Print("Reading FieldInfos...") self.fieldInfos, err = codec.FieldInfosFormat().FieldInfosReader()(cfsDir, si.info.Name, store.IO_CONTEXT_READONCE) if err != nil { return self, err } self.termsIndexDivisor = termsIndexDivisor format := codec.PostingsFormat() log.Print("Obtaining SegmentReadState...") segmentReadState := newSegmentReadState(cfsDir, si.info, self.fieldInfos, context, termsIndexDivisor) // Ask codec for its Fields log.Print("Obtaining FieldsProducer...") self.fields, err = format.FieldsProducer(segmentReadState) if err != nil { return self, err } assert(self.fields != nil) // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if self.fieldInfos.HasDocValues { log.Print("Obtaining DocValuesProducer...") self.dvProducer, err = codec.DocValuesFormat().FieldsProducer(segmentReadState) if err != nil { return self, err } assert(self.dvProducer != nil) } else { // self.dvProducer = nil } if self.fieldInfos.HasNorms { log.Print("Obtaining NormsDocValuesProducer...") self.normsProducer, err = codec.NormsFormat().NormsProducer(segmentReadState) if err != nil { return self, err } assert(self.normsProducer != nil) } else { // self.normsProducer = nil } log.Print("Obtaining StoredFieldsReader...") self.fieldsReaderOrig, err = si.info.Codec().(Codec).StoredFieldsFormat().FieldsReader(cfsDir, si.info, self.fieldInfos, context) if err != nil { return self, err } if self.fieldInfos.HasVectors { // open term vector files only as needed log.Print("Obtaining TermVectorsReader...") self.termVectorsReaderOrig, err = si.info.Codec().(Codec).TermVectorsFormat().VectorsReader(cfsDir, si.info, self.fieldInfos, context) if err != nil { return self, err } } else { // self.termVectorsReaderOrig = nil } log.Print("Success") success = true // Must assign this at the end -- if we hit an // exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is // not assigned yet). self.owner = owner return self, nil }
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode compressing.CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.IndexInput success := false defer func() { if !success { log.Println("Failed to initialize CompressionStoredFieldsReader.") if err != nil { log.Print(err) } util.Close(r, indexStream) } }() // Load the index into memory indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) indexStream, err = d.OpenInput(indexStreamFN, ctx) if err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() { panic("assert fail") } r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si) if err != nil { return nil, err } err = indexStream.Close() if err != nil { return nil, err } indexStream = nil // Open the data file and read metadata fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx) if err != nil { return nil, err } codecNameDat := formatName + CODEC_SFX_DAT codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() { panic("assert fail") } n, err := r.fieldsStream.ReadVInt() if err != nil { return nil, err } r.packedIntsVersion = int(n) r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) success = true return r, nil }
func NewBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info *SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { // log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.Name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, TERMS_EXTENSION), ctx) if err != nil { return nil, err } success := false var indexIn store.IndexInput defer func() { if !success { fmt.Println("Failed to initialize BlockTreeTermsReader.") if err != nil { fmt.Println("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return nil, err } // log.Printf("Version: %v", fp.version) if indexDivisor != -1 { filename := util.SegmentFileName(info.Name, segmentSuffix, TERMS_INDEX_EXTENSION) indexIn, err = dir.OpenInput(filename, ctx) if err != nil { return nil, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return nil, err } // log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return nil, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // verify if indexIn != nil && fp.version >= TERMS_VERSION_CURRENT { if _, err = store.ChecksumEntireFile(indexIn); err != nil { return nil, err } } // Have PostingsReader init itself postingsReader.Init(fp.in) if fp.version >= TERMS_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(fp.in); err != nil { return nil, err } } // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return nil, err } // log.Printf("Fields number: %v", numFields) if numFields < 0 { return nil, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { // log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return nil, err } // log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return nil, err } assert2(numTerms > 0, "Illegal numTerms for field number: %v (resource=%v)", field, fp.in) // log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return nil, err } assert2(numBytes >= 0, "invalid rootCode for field number: %v, numBytes=%v (resource=%v)", field, numBytes, fp.in) // log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return nil, err } fieldInfo := fieldInfos.FieldInfoByNumber(int(field)) assert2(fieldInfo != nil, "invalid field numebr: %v (resource=%v)", field, fp.in) var sumTotalTermFreq int64 if fieldInfo.IndexOptions() == INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return nil, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return nil, err } var docCount int if docCount, err = asInt(fp.in.ReadVInt()); err != nil { return nil, err } // fmt.Printf("DocCount: %v\n", docCount) var longsSize int if fp.version >= TERMS_VERSION_META_ARRAY { if longsSize, err = asInt(fp.in.ReadVInt()); err != nil { return nil, err } } assert2(longsSize >= 0, "invalid longsSize for field: %v, longsSize=%v (resource=%v)", fieldInfo.Name, longsSize, fp.in) var minTerm, maxTerm []byte if fp.version >= TERMS_VERSION_MIN_MAX_TERMS { if minTerm, err = readBytesRef(fp.in); err != nil { return nil, err } if maxTerm, err = readBytesRef(fp.in); err != nil { return nil, err } } if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs return nil, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.DocCount(), fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return nil, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return nil, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { if indexStartFP, err = indexIn.ReadVLong(); err != nil { return nil, err } } // log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.Name]; ok { return nil, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in)) } if fp.fields[fieldInfo.Name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm); err != nil { return nil, err } } if indexDivisor != -1 { if err = indexIn.Close(); err != nil { return nil, err } } success = true return fp, nil }
/* Creates a postings writer with the specified PackedInts overhead ratio */ func newLucene41PostingsWriter(state *SegmentWriteState, accetableOverheadRatio float32) (*Lucene41PostingsWriter, error) { docOut, err := state.Directory.CreateOutput( util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, LUCENE41_DOC_EXTENSION), state.Context) if err != nil { return nil, err } ans := new(Lucene41PostingsWriter) if err = func() error { var posOut store.IndexOutput var payOut store.IndexOutput var success = false defer func() { if !success { util.CloseWhileSuppressingError(docOut, posOut, payOut) } }() err := codec.WriteHeader(docOut, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT) if err != nil { return err } ans.forUtil, err = NewForUtilInto(accetableOverheadRatio, docOut) if err != nil { return err } if state.FieldInfos.HasProx { ans.posDeltaBuffer = make([]int, MAX_DATA_SIZE) posOut, err = state.Directory.CreateOutput(util.SegmentFileName( state.SegmentInfo.Name, state.SegmentSuffix, LUCENE41_POS_EXTENSION), state.Context) if err != nil { return err } err = codec.WriteHeader(posOut, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT) if err != nil { return err } if state.FieldInfos.HasPayloads { ans.payloadBytes = make([]byte, 128) ans.payloadLengthBuffer = make([]int, MAX_DATA_SIZE) } if state.FieldInfos.HasOffsets { ans.offsetStartDeltaBuffer = make([]int, MAX_DATA_SIZE) ans.offsetLengthBuffer = make([]int, MAX_DATA_SIZE) } if state.FieldInfos.HasPayloads || state.FieldInfos.HasOffsets { payOut, err = state.Directory.CreateOutput(util.SegmentFileName( state.SegmentInfo.Name, state.SegmentSuffix, LUCENE41_PAY_EXTENSION), state.Context) if err != nil { return err } err = codec.WriteHeader(payOut, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT) } } ans.payOut, ans.posOut = payOut, posOut ans.docOut = docOut success = true return nil }(); err != nil { return nil, err } ans.docDeltaBuffer = make([]int, MAX_DATA_SIZE) ans.freqBuffer = make([]int, MAX_DATA_SIZE) ans.encoded = make([]byte, MAX_ENCODED_SIZE) // TODO: should we try skipping every 2/4 blocks...? ans.skipWriter = NewSkipWriter( maxSkipLevels, LUCENE41_BLOCK_SIZE, state.SegmentInfo.DocCount(), ans.docOut, ans.posOut, ans.payOut) return ans, nil }
const ( LUCENE40_SI_EXTENSION = "si" LUCENE40_CODEC_NAME = "Lucene40SegmentInfo" LUCENE40_VERSION_START = 0 LUCENE40_VERSION_CURRENT = LUCENE40_VERSION_START SEGMENT_INFO_YES = 1 ) // lucene40/Lucene40SegmentInfoReader.java var Lucene40SegmentInfoReader = func(dir store.Directory, segment string, context store.IOContext) (si *model.SegmentInfo, err error) { si = new(model.SegmentInfo) fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION) input, err := dir.OpenInput(fileName, context) if err != nil { return si, err } success := false defer func() { if !success { util.CloseWhileSuppressingError(input) } else { input.Close() } }() _, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT)
w: Lucene46FieldInfosWriter, } } func (f *Lucene46FieldInfosFormat) FieldInfosReader() FieldInfosReader { return f.r } func (f *Lucene46FieldInfosFormat) FieldInfosWriter() FieldInfosWriter { return f.w } var Lucene46FieldInfosReader = func(dir store.Directory, segment, suffix string, ctx store.IOContext) (fis FieldInfos, err error) { filename := util.SegmentFileName(segment, suffix, FI_EXTENSION) var input store.ChecksumIndexInput if input, err = dir.OpenChecksumInput(filename, ctx); err != nil { return } var success = false defer func() { if success { err = input.Close() } else { util.CloseWhileSuppressingError(input) } }() var codecVersion int
/* Create a new writer. The number of items (terms or sub-blocks) per block will aim tobe between minItermsPerBlock and maxItemsPerBlock, though in some cases, the blocks may be smaller than the min. */ func NewBlockTreeTermsWriter(state *SegmentWriteState, postingsWriter PostingsWriterBase, minItemsInBlock, maxItemsInBlock int) (*BlockTreeTermsWriter, error) { assert2(minItemsInBlock >= 2, "minItemsInBlock must be >= 2; got %v", minItemsInBlock) assert2(maxItemsInBlock >= 1, "maxItemsInBlock must be >= 1; got %v", maxItemsInBlock) assert2(minItemsInBlock <= maxItemsInBlock, "maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=%v minItemsInBlock=%v", maxItemsInBlock, minItemsInBlock) assert2(2*(minItemsInBlock-1) <= maxItemsInBlock, "maxItemsInBlock must be at least 2*(minItemsInBlock-1; got maxItemsInBlock=%v minItemsInBlock=%v", maxItemsInBlock, minItemsInBlock) ans := &BlockTreeTermsWriter{ maxDoc: state.SegmentInfo.DocCount(), fieldInfos: state.FieldInfos, minItemsInBlock: minItemsInBlock, maxItemsInBlock: maxItemsInBlock, postingsWriter: postingsWriter, segment: state.SegmentInfo.Name, scratchBytes: store.NewRAMOutputStreamBuffer(), scratchIntsRef: util.NewIntsRefBuilder(), // bytesWriter: store.NewRAMOutputStreamBuffer(), // bytesWriter2: store.NewRAMOutputStreamBuffer(), } ans.spi = ans var out, indexOut store.IndexOutput if err := func() error { var success = false defer func() { if !success { util.CloseWhileSuppressingError(out, indexOut) } }() var err error termsFileName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_EXTENSION) if out, err = state.Directory.CreateOutput(termsFileName, state.Context); err != nil { return err } if err = ans.spi.WriteHeader(out); err != nil { return err } termsIndexFileName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION) if indexOut, err = state.Directory.CreateOutput(termsIndexFileName, state.Context); err != nil { return err } if err = ans.spi.WriteIndexHeader(indexOut); err != nil { return err } // have consumer write its format/header if err = postingsWriter.Init(out); err != nil { return err } success = true return nil }(); err != nil { return nil, err } ans.out = out ans.indexOut = indexOut return ans, nil }
func NewCompressingStoredFieldsWriter(dir store.Directory, si *model.SegmentInfo, segmentSuffix string, ctx store.IOContext, formatName string, compressionMode CompressionMode, chunkSize int) (*CompressingStoredFieldsWriter, error) { assert(dir != nil) ans := &CompressingStoredFieldsWriter{ directory: dir, segment: si.Name, segmentSuffix: segmentSuffix, compressionMode: compressionMode, compressor: compressionMode.NewCompressor(), chunkSize: chunkSize, docBase: 0, bufferedDocs: newGrowableByteArrayDataOutput(chunkSize), numStoredFields: make([]int, 16), endOffsets: make([]int, 16), numBufferedDocs: 0, } var success = false indexStream, err := dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION), ctx) if err != nil { return nil, err } assert(indexStream != nil) defer func() { if !success { util.CloseWhileSuppressingError(indexStream) ans.Abort() } }() ans.fieldsStream, err = dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix, lucene40.FIELDS_EXTENSION), ctx) if err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX codecNameDat := formatName + CODEC_SFX_DAT err = codec.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT) if err != nil { return nil, err } err = codec.WriteHeader(ans.fieldsStream, codecNameDat, VERSION_CURRENT) if err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) assert(int64(codec.HeaderLength(codecNameDat)) == ans.fieldsStream.FilePointer()) ans.indexWriter, err = NewStoredFieldsIndexWriter(indexStream) if err != nil { return nil, err } assert(ans.indexWriter != nil) indexStream = nil err = ans.fieldsStream.WriteVInt(int32(chunkSize)) if err != nil { return nil, err } err = ans.fieldsStream.WriteVInt(packed.VERSION_CURRENT) if err != nil { return nil, err } success = true return ans, nil }
func newSegmentCoreReaders(owner *SegmentReader, dir store.Directory, si *SegmentCommitInfo, context store.IOContext, termsIndexDivisor int) (self *SegmentCoreReaders, err error) { assert2(termsIndexDivisor != 0, "indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)") // fmt.Println("Initializing SegmentCoreReaders from directory:", dir) self = &SegmentCoreReaders{ refCount: 1, normsLocal: func() map[string]interface{} { return make(map[string]interface{}) }, } self.fieldsReaderLocal = func() StoredFieldsReader { return self.fieldsReaderOrig.Clone() } // fmt.Println("Initializing listeners...") self.addListener = make(chan CoreClosedListener) self.removeListener = make(chan CoreClosedListener) self.notifyListener = make(chan bool) // TODO re-enable later go func() { // ensure listners are synchronized coreClosedListeners := make([]CoreClosedListener, 0) isRunning := true var listener CoreClosedListener for isRunning { // fmt.Println("Listening for events...") select { case listener = <-self.addListener: coreClosedListeners = append(coreClosedListeners, listener) case listener = <-self.removeListener: n := len(coreClosedListeners) for i, v := range coreClosedListeners { if v == listener { newListeners := make([]CoreClosedListener, 0, n-1) newListeners = append(newListeners, coreClosedListeners[0:i]...) newListeners = append(newListeners, coreClosedListeners[i+1:]...) coreClosedListeners = newListeners break } } case <-self.notifyListener: fmt.Println("Shutting down SegmentCoreReaders...") isRunning = false for _, v := range coreClosedListeners { v.onClose(self) } } } fmt.Println("Listeners are done.") }() var success = false ans := self defer func() { if !success { fmt.Println("Failed to initialize SegmentCoreReaders.") ans.decRef() } }() codec := si.Info.Codec().(Codec) // fmt.Println("Obtaining CFS Directory...") var cfsDir store.Directory // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. if si.Info.IsCompoundFile() { // fmt.Println("Detected CompoundFile.") name := util.SegmentFileName(si.Info.Name, "", store.COMPOUND_FILE_EXTENSION) if self.cfsReader, err = store.NewCompoundFileDirectory(dir, name, context, false); err != nil { return nil, err } // fmt.Println("CompoundFileDirectory: ", self.cfsReader) cfsDir = self.cfsReader } else { cfsDir = dir } // fmt.Println("CFS Directory:", cfsDir) // fmt.Println("Reading FieldInfos...") fieldInfos := owner.fieldInfos self.termsIndexDivisor = termsIndexDivisor format := codec.PostingsFormat() // fmt.Println("Obtaining SegmentReadState...") segmentReadState := NewSegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor) // Ask codec for its Fields // fmt.Println("Obtaining FieldsProducer...") if self.fields, err = format.FieldsProducer(segmentReadState); err != nil { return nil, err } assert(self.fields != nil) // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if fieldInfos.HasNorms { // fmt.Println("Obtaining NormsDocValuesProducer...") if self.normsProducer, err = codec.NormsFormat().NormsProducer(segmentReadState); err != nil { return nil, err } assert(self.normsProducer != nil) } // fmt.Println("Obtaining StoredFieldsReader...") if self.fieldsReaderOrig, err = si.Info.Codec().(Codec).StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context); err != nil { return nil, err } if fieldInfos.HasVectors { // open term vector files only as needed // fmt.Println("Obtaining TermVectorsReader...") if self.termVectorsReaderOrig, err = si.Info.Codec().(Codec).TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context); err != nil { return nil, err } } // fmt.Println("Success") success = true return self, nil }
func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) { var stream IndexInput = nil var entriesStream ChecksumIndexInput = nil // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) var success = false defer func() { if success { err = util.Close(stream, entriesStream) } else { util.CloseWhileSuppressingError(stream, entriesStream) } }() stream = handle.Clone() // fmt.Printf("Reading from stream: %v\n", stream) firstInt, err := stream.ReadVInt() if err != nil { return nil, err } // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if firstInt == int32(CODEC_MAGIC_BYTE1) { if secondByte, err := stream.ReadByte(); err == nil { if thirdByte, err := stream.ReadByte(); err == nil { if fourthByte, err := stream.ReadByte(); err == nil { if secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4 { return nil, errors.New(fmt.Sprintf( "Illegal/impossible header for CFS file: %v,%v,%v", secondByte, thirdByte, fourthByte)) } } } } if err != nil { return nil, err } d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)) if err != nil { return nil, err } entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION) entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE) if err != nil { return nil, err } _, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT) if err != nil { return nil, err } numEntries, err := entriesStream.ReadVInt() if err != nil { return nil, err } mapping = make(map[string]FileSlice) // fmt.Printf("Entries number: %v\n", numEntries) for i := int32(0); i < numEntries; i++ { id, err := entriesStream.ReadString() if err != nil { return nil, err } if _, ok := mapping[id]; ok { return nil, errors.New(fmt.Sprintf( "Duplicate cfs entry id=%v in CFS: %v", id, entriesStream)) } // log.Printf("Found entry: %v", id) offset, err := entriesStream.ReadLong() if err != nil { return nil, err } length, err := entriesStream.ReadLong() if err != nil { return nil, err } mapping[id] = FileSlice{offset, length} } if d.version >= CFD_VERSION_CHECKSUM { _, err = codec.CheckFooter(entriesStream) } else { err = codec.CheckEOF(entriesStream) } if err != nil { return nil, err } } else { // TODO remove once 3.x is not supported anymore panic("not supported yet; will also be obsolete soon") } success = true return mapping, nil }
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.ChecksumIndexInput success := false defer func() { if !success { util.CloseWhileSuppressingError(r, indexStream) } }() indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) // Load the index into memory if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil { return nil, err } var maxPointer int64 = -1 if r.version >= VERSION_CHECKSUM { if maxPointer, err = indexStream.ReadVLong(); err != nil { return nil, err } if _, err = codec.CheckFooter(indexStream); err != nil { return nil, err } } else { if err = codec.CheckEOF(indexStream); err != nil { return nil, err } } if err = indexStream.Close(); err != nil { return nil, err } indexStream = nil // Open the data file and read metadata if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil { return nil, err } if r.version >= VERSION_CHECKSUM { if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() { return nil, errors.New(fmt.Sprintf( "Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v", maxPointer, r.fieldsStream.Length())) } } else { maxPointer = r.fieldsStream.Length() } r.maxPointer = maxPointer codecNameDat := formatName + CODEC_SFX_DAT var fieldsVersion int if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert2(r.version == fieldsVersion, "Version mismatch between stored fields index and data: %v != %v", r.version, fieldsVersion) assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer()) r.chunkSize = -1 if r.version >= VERSION_BIG_CHUNKS { if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } } if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) if r.version >= VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil { return nil, err } } success = true return r, nil }
func NewLucene41PostingsReader(dir store.Directory, fis FieldInfos, si *SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { // fmt.Println("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { fmt.Println("Failed to initialize Lucene41PostingsReader.") util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return nil, err } var version int32 version, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT) if err != nil { return nil, err } forUtil, err := NewForUtilFrom(docIn) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(docIn); err != nil { return nil, err } } if fis.HasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return nil, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, version, version) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(posIn); err != nil { return nil, err } } if fis.HasPayloads || fis.HasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return nil, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, version, version) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(payIn); err != nil { return nil, err } } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil, int(version)}, nil }
func newBlockTreeTermsReader(dir store.Directory, fieldInfos model.FieldInfos, info *model.SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.Name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_EXTENSION), ctx) if err != nil { return fp, err } success := false var indexIn store.IndexInput defer func() { if !success { log.Print("Failed to initialize BlockTreeTermsReader.") if err != nil { log.Print("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return fp, err } log.Printf("Version: %v", fp.version) if indexDivisor != -1 { indexIn, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_INDEX_EXTENSION), ctx) if err != nil { return fp, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return fp, err } log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // Have PostingsReader init itself postingsReader.Init(fp.in) // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Fields number: %v", numFields) if numFields < 0 { return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return fp, err } // assert numTerms >= 0 log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return fp, err } fieldInfo := fieldInfos.FieldInfoByNumber(int(field)) // assert fieldInfo != nil var sumTotalTermFreq int64 if fieldInfo.IndexOptions() == model.INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return fp, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return fp, err } docCount, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("DocCount: %v", docCount) if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs return fp, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.DocCount(), fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return fp, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return fp, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { indexStartFP, err = indexIn.ReadVLong() if err != nil { return fp, err } } log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.Name]; ok { return fp, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in)) } fp.fields[fieldInfo.Name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn) if err != nil { return fp, err } log.Print("DEBUG field processed.") } if indexDivisor != -1 { err = indexIn.Close() if err != nil { return fp, err } } success = true return fp, nil }
func (r *Lucene40SegmentInfoReader) Read(dir store.Directory, segment string, context store.IOContext) (si *SegmentInfo, err error) { si = new(SegmentInfo) fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION) input, err := dir.OpenInput(fileName, context) if err != nil { return nil, err } success := false defer func() { if !success { util.CloseWhileSuppressingError(input) } else { input.Close() } }() _, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT) if err != nil { return nil, err } versionStr, err := input.ReadString() if err != nil { return nil, err } version, err := util.ParseVersion(versionStr) if err != nil { return nil, err } docCount, err := input.ReadInt() if err != nil { return nil, err } if docCount < 0 { return nil, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input)) } sicf, err := input.ReadByte() if err != nil { return nil, err } isCompoundFile := (sicf == SEGMENT_INFO_YES) diagnostics, err := input.ReadStringStringMap() if err != nil { return nil, err } _, err = input.ReadStringStringMap() // read deprecated attributes if err != nil { return nil, err } files, err := input.ReadStringSet() if err != nil { return nil, err } if err = codec.CheckEOF(input); err != nil { return nil, err } si = NewSegmentInfo(dir, version, segment, int(docCount), isCompoundFile, nil, diagnostics) si.SetFiles(files) success = true return si, nil }