func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) { self := &CompoundFileDirectory{ Locker: &sync.Mutex{}, directory: directory, fileName: fileName, readBufferSize: bufferSize(context), openForWrite: openForWrite} self.DirectoryImpl = NewDirectoryImpl(self) self.BaseDirectory = NewBaseDirectory(self) if !openForWrite { // log.Printf("Open for read.") success := false defer func() { if !success { util.CloseWhileSuppressingError(self.handle) } }() self.handle, err = directory.OpenInput(fileName, context) if err != nil { return nil, err } self.entries, err = self.readEntries(self.handle, directory, fileName) if err != nil { return nil, err } if self.version >= CFD_VERSION_CHECKSUM { if _, err = codec.CheckHeader(self.handle, CFD_DATA_CODEC, int32(self.version), int32(self.version)); err != nil { return nil, err } // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(self.handle); err != nil { return nil, err } } success = true self.BaseDirectory.IsOpen = true return self, nil } else { assert2(reflect.TypeOf(directory).Name() != "CompoundFileDirectory", "compound file inside of compound file: %v", fileName) self.entries = SENTINEL self.IsOpen = true self.writer = newCompoundFileWriter(directory, fileName) self.handle = nil return self, nil } }
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.ChecksumIndexInput success := false defer func() { if !success { util.CloseWhileSuppressingError(r, indexStream) } }() indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) // Load the index into memory if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil { return nil, err } var maxPointer int64 = -1 if r.version >= VERSION_CHECKSUM { if maxPointer, err = indexStream.ReadVLong(); err != nil { return nil, err } if _, err = codec.CheckFooter(indexStream); err != nil { return nil, err } } else { if err = codec.CheckEOF(indexStream); err != nil { return nil, err } } if err = indexStream.Close(); err != nil { return nil, err } indexStream = nil // Open the data file and read metadata if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil { return nil, err } if r.version >= VERSION_CHECKSUM { if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() { return nil, errors.New(fmt.Sprintf( "Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v", maxPointer, r.fieldsStream.Length())) } } else { maxPointer = r.fieldsStream.Length() } r.maxPointer = maxPointer codecNameDat := formatName + CODEC_SFX_DAT var fieldsVersion int if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert2(r.version == fieldsVersion, "Version mismatch between stored fields index and data: %v != %v", r.version, fieldsVersion) assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer()) r.chunkSize = -1 if r.version >= VERSION_BIG_CHUNKS { if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } } if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) if r.version >= VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil { return nil, err } } success = true return r, nil }
func NewBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info *SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { // log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.Name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, TERMS_EXTENSION), ctx) if err != nil { return nil, err } success := false var indexIn store.IndexInput defer func() { if !success { fmt.Println("Failed to initialize BlockTreeTermsReader.") if err != nil { fmt.Println("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return nil, err } // log.Printf("Version: %v", fp.version) if indexDivisor != -1 { filename := util.SegmentFileName(info.Name, segmentSuffix, TERMS_INDEX_EXTENSION) indexIn, err = dir.OpenInput(filename, ctx) if err != nil { return nil, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return nil, err } // log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return nil, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // verify if indexIn != nil && fp.version >= TERMS_VERSION_CURRENT { if _, err = store.ChecksumEntireFile(indexIn); err != nil { return nil, err } } // Have PostingsReader init itself postingsReader.Init(fp.in) if fp.version >= TERMS_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(fp.in); err != nil { return nil, err } } // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return nil, err } // log.Printf("Fields number: %v", numFields) if numFields < 0 { return nil, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { // log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return nil, err } // log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return nil, err } assert2(numTerms > 0, "Illegal numTerms for field number: %v (resource=%v)", field, fp.in) // log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return nil, err } assert2(numBytes >= 0, "invalid rootCode for field number: %v, numBytes=%v (resource=%v)", field, numBytes, fp.in) // log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return nil, err } fieldInfo := fieldInfos.FieldInfoByNumber(int(field)) assert2(fieldInfo != nil, "invalid field numebr: %v (resource=%v)", field, fp.in) var sumTotalTermFreq int64 if fieldInfo.IndexOptions() == INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return nil, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return nil, err } var docCount int if docCount, err = asInt(fp.in.ReadVInt()); err != nil { return nil, err } // fmt.Printf("DocCount: %v\n", docCount) var longsSize int if fp.version >= TERMS_VERSION_META_ARRAY { if longsSize, err = asInt(fp.in.ReadVInt()); err != nil { return nil, err } } assert2(longsSize >= 0, "invalid longsSize for field: %v, longsSize=%v (resource=%v)", fieldInfo.Name, longsSize, fp.in) var minTerm, maxTerm []byte if fp.version >= TERMS_VERSION_MIN_MAX_TERMS { if minTerm, err = readBytesRef(fp.in); err != nil { return nil, err } if maxTerm, err = readBytesRef(fp.in); err != nil { return nil, err } } if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs return nil, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.DocCount(), fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return nil, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return nil, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { if indexStartFP, err = indexIn.ReadVLong(); err != nil { return nil, err } } // log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.Name]; ok { return nil, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in)) } if fp.fields[fieldInfo.Name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm); err != nil { return nil, err } } if indexDivisor != -1 { if err = indexIn.Close(); err != nil { return nil, err } } success = true return fp, nil }
func NewLucene41PostingsReader(dir store.Directory, fis FieldInfos, si *SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { // fmt.Println("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { fmt.Println("Failed to initialize Lucene41PostingsReader.") util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return nil, err } var version int32 version, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT) if err != nil { return nil, err } forUtil, err := NewForUtilFrom(docIn) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(docIn); err != nil { return nil, err } } if fis.HasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return nil, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, version, version) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(posIn); err != nil { return nil, err } } if fis.HasPayloads || fis.HasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return nil, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, version, version) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(payIn); err != nil { return nil, err } } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil, int(version)}, nil }
func newLucene49NormsProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (np *NormsProducer, err error) { np = &NormsProducer{ Locker: new(sync.Mutex), norms: make(map[int]*NormsEntry), instances: make(map[int]NumericDocValues), maxDoc: state.SegmentInfo.DocCount(), ramBytesUsed: util.ShallowSizeOfInstance(reflect.TypeOf(np)), } metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension) // read in the entries from the metadta file. var in store.ChecksumIndexInput if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil { return nil, err } if err = func() error { var success = false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() if np.version, err = codec.CheckHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); err != nil { return err } if err = np.readFields(in, state.FieldInfos); err != nil { return err } if _, err = codec.CheckFooter(in); err != nil { return err } success = true return nil }(); err != nil { return nil, err } dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension) if np.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil { return nil, err } var success = false defer func() { if !success { util.CloseWhileSuppressingError(np.data) } }() var version2 int32 if version2, err = codec.CheckHeader(np.data, dataCodec, VERSION_START, VERSION_CURRENT); err != nil { return nil, err } if version2 != np.version { return nil, errors.New("Format versions mismatch") } // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(np.data); err != nil { return nil, err } success = true return np, nil }