func NewLucene41PostingsReader(dir store.Directory, fis model.FieldInfos, si *model.SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { log.Print("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { log.Print("Failed to initialize Lucene41PostingsReader.") if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } forUtil, err := NewForUtil(docIn) if err != nil { return r, err } if fis.HasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } if fis.HasPayloads || fis.HasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil }
func newLucene42DocValuesProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) { dvp = &Lucene42DocValuesProducer{ numericInstances: make(map[int]NumericDocValues), } dvp.maxDoc = state.segmentInfo.DocCount() metaName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, metaExtension) // read in the entries from the metadata file. in, err := state.dir.OpenInput(metaName, state.context) if err != nil { return dvp, err } success := false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } dvp.numerics = make(map[int]NumericEntry) dvp.binaries = make(map[int]BinaryEntry) dvp.fsts = make(map[int]FSTEntry) err = dvp.readFields(in) if err != nil { return dvp, err } success = true success = false dataName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, dataExtension) dvp.data, err = state.dir.OpenInput(dataName, state.context) if err != nil { return dvp, err } version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } if version != version2 { return dvp, errors.New("Format versions mismatch") } return dvp, nil }
func TestClone(t *testing.T) { fmt.Println("Testing Loading FST...") path := "../search/testdata/belfrysample" d, err := OpenFSDirectory(path) if err != nil { t.Error(err) } ctx := NewIOContextBool(false) in, err := d.OpenInput("_0_Lucene41_0.tip", ctx) if err != nil { t.Error(err) } version, err := codec.CheckHeader(in, "BLOCK_TREE_TERMS_INDEX", 0, 1) var indexDirOffset int64 = 0 if version < 1 { indexDirOffset, err = in.ReadLong() if err != nil { t.Error(err) } } else { // >= 1 in.Seek(in.Length() - 8) indexDirOffset, err = in.ReadLong() if err != nil { t.Error(err) } } fmt.Println("indexDirOffset:", indexDirOffset) in.Seek(indexDirOffset) indexStartFP, err := in.ReadVLong() if err != nil { t.Error(err) } fmt.Println("indexStartFP:", indexStartFP) fmt.Println("Before clone", in) clone := in.Clone() fmt.Println("After clone", clone) if _, ok := clone.(*SimpleFSIndexInput); !ok { t.Error("Clone() should return *SimpleFSIndexInput.") } clone.Seek(indexStartFP) fmt.Println("After clone.Seek()", clone) _, err = codec.CheckHeader(clone, "FST", 3, 4) if err != nil { t.Error(err) } }
func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) { self := &CompoundFileDirectory{ Locker: &sync.Mutex{}, directory: directory, fileName: fileName, readBufferSize: bufferSize(context), openForWrite: openForWrite} self.DirectoryImpl = NewDirectoryImpl(self) self.BaseDirectory = NewBaseDirectory(self) if !openForWrite { // log.Printf("Open for read.") success := false defer func() { if !success { util.CloseWhileSuppressingError(self.handle) } }() self.handle, err = directory.OpenInput(fileName, context) if err != nil { return nil, err } self.entries, err = self.readEntries(self.handle, directory, fileName) if err != nil { return nil, err } if self.version >= CFD_VERSION_CHECKSUM { if _, err = codec.CheckHeader(self.handle, CFD_DATA_CODEC, int32(self.version), int32(self.version)); err != nil { return nil, err } // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(self.handle); err != nil { return nil, err } } success = true self.BaseDirectory.IsOpen = true return self, nil } else { assert2(reflect.TypeOf(directory).Name() != "CompoundFileDirectory", "compound file inside of compound file: %v", fileName) self.entries = SENTINEL self.IsOpen = true self.writer = newCompoundFileWriter(directory, fileName) self.handle = nil return self, nil } }
func (r *BlockTreeTermsReader) readIndexHeader(input store.IndexInput) (version int, err error) { version, err = asInt(codec.CheckHeader(input, TERMS_INDEX_CODEC_NAME, TERMS_VERSION_START, TERMS_VERSION_CURRENT)) if err != nil { return version, err } if version < TERMS_VERSION_APPEND_ONLY { r.indexDirOffset, err = input.ReadLong() if err != nil { return version, err } } return version, nil }
func (r *BlockTreeTermsReader) readHeader(input store.IndexInput) (version int, err error) { version, err = asInt(codec.CheckHeader(input, BTT_CODEC_NAME, BTT_VERSION_START, BTT_VERSION_CURRENT)) if err != nil { return int(version), err } if version < BTT_VERSION_APPEND_ONLY { r.dirOffset, err = input.ReadLong() if err != nil { return int(version), err } } return int(version), nil }
func NewPackedReader(in DataInput) (r PackedIntsReader, err error) { if version, err := codec.CheckHeader(in, PACKED_CODEC_NAME, PACKED_VERSION_START, VERSION_CURRENT); err == nil { if bitsPerValue, err := asUint32(in.ReadVInt()); err == nil { // assert bitsPerValue > 0 && bitsPerValue <= 64 if valueCount, err := in.ReadVInt(); err == nil { if id, err := in.ReadVInt(); err == nil { format := PackedFormat(id) return ReaderNoHeader(in, format, version, valueCount, bitsPerValue) } } } } return }
func (r *Lucene41PostingsReader) Init(termsIn store.IndexInput) error { log.Printf("Initializing from: %v", termsIn) // Make sure we are talking to the matching postings writer _, err := codec.CheckHeader(termsIn, LUCENE41_TERMS_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT) if err != nil { return err } indexBlockSize, err := termsIn.ReadVInt() if err != nil { return err } log.Printf("Index block size: %v", indexBlockSize) if indexBlockSize != LUCENE41_BLOCK_SIZE { panic(fmt.Sprintf("index-time BLOCK_SIZE (%v) != read-time BLOCK_SIZE (%v)", indexBlockSize, LUCENE41_BLOCK_SIZE)) } return nil }
func TestReadingFromSlicedIndexInputOSX(t *testing.T) { t.Logf("TestReadingFromSlicedIndexInputOSX...") path := "../search/testdata/osx/belfrysample" d, err := OpenFSDirectory(path) if err != nil { t.Error(err) } ctx := NewIOContextBool(false) cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false) name := util.SegmentFileName("_0", "Lucene41_0", "pos") posIn, err := cd.OpenInput(name, ctx) if err != nil { t.Error(err) } t.Log(posIn) codec.CheckHeader(posIn, "Lucene41PostingsWriterPos", 0, 0) // codec header mismatch: actual header=0 vs expected header=1071082519 (resource: SlicedIndexInput(SlicedIndexInput(_0_Lucene41_0.pos in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs')) in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs') slice=1461:3426)) }
func TestCheckHeaderWin8(t *testing.T) { fmt.Println("TestCheckHeaderWin8...") path := "../search/testdata/win8/belfrysample" d, err := OpenFSDirectory(path) if err != nil { t.Error(err) } ctx := NewIOContextBool(false) cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false) if err != nil { t.Error(err) } r, err := cd.OpenInput("_0_Lucene41_0.pos", ctx) _, err = codec.CheckHeader(r, "Lucene41PostingsWriterPos", 0, 0) if err != nil { t.Error(err) } }
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode compressing.CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.IndexInput success := false defer func() { if !success { log.Println("Failed to initialize CompressionStoredFieldsReader.") if err != nil { log.Print(err) } util.Close(r, indexStream) } }() // Load the index into memory indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) indexStream, err = d.OpenInput(indexStreamFN, ctx) if err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() { panic("assert fail") } r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si) if err != nil { return nil, err } err = indexStream.Close() if err != nil { return nil, err } indexStream = nil // Open the data file and read metadata fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx) if err != nil { return nil, err } codecNameDat := formatName + CODEC_SFX_DAT codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() { panic("assert fail") } n, err := r.fieldsStream.ReadVInt() if err != nil { return nil, err } r.packedIntsVersion = int(n) r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) success = true return r, nil }
fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION) input, err := dir.OpenInput(fileName, context) if err != nil { return si, err } success := false defer func() { if !success { util.CloseWhileSuppressingError(input) } else { input.Close() } }() _, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT) if err != nil { return si, err } version, err := input.ReadString() if err != nil { return si, err } docCount, err := input.ReadInt() if err != nil { return si, err } if docCount < 0 { return si, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input)) } sicf, err := input.ReadByte()
func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) { var stream IndexInput = nil var entriesStream ChecksumIndexInput = nil // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) var success = false defer func() { if success { err = util.Close(stream, entriesStream) } else { util.CloseWhileSuppressingError(stream, entriesStream) } }() stream = handle.Clone() // fmt.Printf("Reading from stream: %v\n", stream) firstInt, err := stream.ReadVInt() if err != nil { return nil, err } // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if firstInt == int32(CODEC_MAGIC_BYTE1) { if secondByte, err := stream.ReadByte(); err == nil { if thirdByte, err := stream.ReadByte(); err == nil { if fourthByte, err := stream.ReadByte(); err == nil { if secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4 { return nil, errors.New(fmt.Sprintf( "Illegal/impossible header for CFS file: %v,%v,%v", secondByte, thirdByte, fourthByte)) } } } } if err != nil { return nil, err } d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)) if err != nil { return nil, err } entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION) entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE) if err != nil { return nil, err } _, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT) if err != nil { return nil, err } numEntries, err := entriesStream.ReadVInt() if err != nil { return nil, err } mapping = make(map[string]FileSlice) // fmt.Printf("Entries number: %v\n", numEntries) for i := int32(0); i < numEntries; i++ { id, err := entriesStream.ReadString() if err != nil { return nil, err } if _, ok := mapping[id]; ok { return nil, errors.New(fmt.Sprintf( "Duplicate cfs entry id=%v in CFS: %v", id, entriesStream)) } // log.Printf("Found entry: %v", id) offset, err := entriesStream.ReadLong() if err != nil { return nil, err } length, err := entriesStream.ReadLong() if err != nil { return nil, err } mapping[id] = FileSlice{offset, length} } if d.version >= CFD_VERSION_CHECKSUM { _, err = codec.CheckFooter(entriesStream) } else { err = codec.CheckEOF(entriesStream) } if err != nil { return nil, err } } else { // TODO remove once 3.x is not supported anymore panic("not supported yet; will also be obsolete soon") } success = true return mapping, nil }
/** Load a previously saved FST; maxBlockBits allows you to * control the size of the byte[] pages used to hold the FST bytes. */ func loadFST3(in util.DataInput, outputs Outputs, maxBlockBits uint32) (fst *FST, err error) { log.Printf("Loading FST from %v and output to %v...", in, outputs) defer func() { if err != nil { log.Print("Failed to load FST.") log.Printf("DEBUG ", err) } }() fst = &FST{outputs: outputs, startNode: -1} if maxBlockBits < 1 || maxBlockBits > 30 { panic(fmt.Sprintf("maxBlockBits should 1..30; got %v", maxBlockBits)) } // NOTE: only reads most recent format; we don't have // back-compat promise for FSTs (they are experimental): fst.version, err = codec.CheckHeader(in, FST_FILE_FORMAT_NAME, FST_VERSION_PACKED, FST_VERSION_VINT_TARGET) if err != nil { return fst, err } if b, err := in.ReadByte(); err == nil { fst.packed = (b == 1) } else { return fst, err } if b, err := in.ReadByte(); err == nil { if b == 1 { // accepts empty string // 1 KB blocks: emptyBytes := newBytesStoreFromBits(10) if numBytes, err := in.ReadVInt(); err == nil { log.Printf("Number of bytes: %v", numBytes) emptyBytes.CopyBytes(in, int64(numBytes)) // De-serialize empty-string output: var reader BytesReader if fst.packed { log.Printf("Forward reader.") reader = emptyBytes.forwardReader() } else { log.Printf("Reverse reader.") reader = emptyBytes.reverseReader() // NoOutputs uses 0 bytes when writing its output, // so we have to check here else BytesStore gets // angry: if numBytes > 0 { reader.setPosition(int64(numBytes - 1)) } } log.Printf("Reading final output from %v to %v...", reader, outputs) fst.emptyOutput, err = outputs.ReadFinalOutput(reader) } } // else emptyOutput = nil } if err != nil { return fst, err } if t, err := in.ReadByte(); err == nil { switch t { case 0: fst.inputType = INPUT_TYPE_BYTE1 case 1: fst.inputType = INPUT_TYPE_BYTE2 case 2: fst.inputType = INPUT_TYPE_BYTE4 default: panic(fmt.Sprintf("invalid input type %v", t)) } } if err != nil { return fst, err } if fst.packed { fst.nodeRefToAddress, err = packed.NewPackedReader(in) if err != nil { return fst, err } } // else nodeRefToAddress = nil if fst.startNode, err = in.ReadVLong(); err == nil { if fst.nodeCount, err = in.ReadVLong(); err == nil { if fst.arcCount, err = in.ReadVLong(); err == nil { if fst.arcWithOutputCount, err = in.ReadVLong(); err == nil { if numBytes, err := in.ReadVLong(); err == nil { if fst.bytes, err = newBytesStoreFromInput(in, numBytes, 1<<maxBlockBits); err == nil { fst.NO_OUTPUT = outputs.NoOutput() err = fst.cacheRootArcs() // NOTE: bogus because this is only used during // building; we need to break out mutable FST from // immutable // fst.allowArrayArcs = false } } } } } } return fst, err }
func NewLucene41PostingsReader(dir store.Directory, fis FieldInfos, si *SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { // fmt.Println("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { fmt.Println("Failed to initialize Lucene41PostingsReader.") util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return nil, err } var version int32 version, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT) if err != nil { return nil, err } forUtil, err := NewForUtilFrom(docIn) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(docIn); err != nil { return nil, err } } if fis.HasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return nil, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, version, version) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(posIn); err != nil { return nil, err } } if fis.HasPayloads || fis.HasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return nil, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, version, version) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(payIn); err != nil { return nil, err } } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil, int(version)}, nil }
func newLucene49NormsProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (np *NormsProducer, err error) { np = &NormsProducer{ Locker: new(sync.Mutex), norms: make(map[int]*NormsEntry), instances: make(map[int]NumericDocValues), maxDoc: state.SegmentInfo.DocCount(), ramBytesUsed: util.ShallowSizeOfInstance(reflect.TypeOf(np)), } metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension) // read in the entries from the metadta file. var in store.ChecksumIndexInput if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil { return nil, err } if err = func() error { var success = false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() if np.version, err = codec.CheckHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); err != nil { return err } if err = np.readFields(in, state.FieldInfos); err != nil { return err } if _, err = codec.CheckFooter(in); err != nil { return err } success = true return nil }(); err != nil { return nil, err } dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension) if np.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil { return nil, err } var success = false defer func() { if !success { util.CloseWhileSuppressingError(np.data) } }() var version2 int32 if version2, err = codec.CheckHeader(np.data, dataCodec, VERSION_START, VERSION_CURRENT); err != nil { return nil, err } if version2 != np.version { return nil, errors.New("Format versions mismatch") } // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(np.data); err != nil { return nil, err } success = true return np, nil }
var input store.ChecksumIndexInput if input, err = dir.OpenChecksumInput(filename, ctx); err != nil { return } var success = false defer func() { if success { err = input.Close() } else { util.CloseWhileSuppressingError(input) } }() var codecVersion int if codecVersion, err = asInt(codec.CheckHeader(input, FI_CODEC_NAME, FI_FORMAT_START, FI_FORMAT_CURRENT)); err != nil { return } var size int if size, err = asInt(input.ReadVInt()); err != nil { return } var infos []*FieldInfo var name string var fieldNumber int32 var bits, val byte var isIndexed, storeTermVector, omitNorms, storePayloads bool var indexOptions IndexOptions var docValuesType, normsType DocValuesType
if err != nil { return fi, err } log.Printf("Reading %v", input) success := false defer func() { if success { input.Close() } else { util.CloseWhileHandlingError(err, input) } }() _, err = codec.CheckHeader(input, LUCENE42_FI_CODEC_NAME, LUCENE42_FI_FORMAT_START, LUCENE42_FI_FORMAT_CURRENT) if err != nil { return fi, err } size, err := input.ReadVInt() //read in the size if err != nil { return fi, err } log.Printf("Found %v FieldInfos.", size) infos := make([]model.FieldInfo, size) for i, _ := range infos { name, err := input.ReadString() if err != nil {
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.ChecksumIndexInput success := false defer func() { if !success { util.CloseWhileSuppressingError(r, indexStream) } }() indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) // Load the index into memory if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil { return nil, err } var maxPointer int64 = -1 if r.version >= VERSION_CHECKSUM { if maxPointer, err = indexStream.ReadVLong(); err != nil { return nil, err } if _, err = codec.CheckFooter(indexStream); err != nil { return nil, err } } else { if err = codec.CheckEOF(indexStream); err != nil { return nil, err } } if err = indexStream.Close(); err != nil { return nil, err } indexStream = nil // Open the data file and read metadata if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil { return nil, err } if r.version >= VERSION_CHECKSUM { if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() { return nil, errors.New(fmt.Sprintf( "Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v", maxPointer, r.fieldsStream.Length())) } } else { maxPointer = r.fieldsStream.Length() } r.maxPointer = maxPointer codecNameDat := formatName + CODEC_SFX_DAT var fieldsVersion int if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert2(r.version == fieldsVersion, "Version mismatch between stored fields index and data: %v != %v", r.version, fieldsVersion) assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer()) r.chunkSize = -1 if r.version >= VERSION_BIG_CHUNKS { if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } } if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) if r.version >= VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil { return nil, err } } success = true return r, nil }
func (r *Lucene40SegmentInfoReader) Read(dir store.Directory, segment string, context store.IOContext) (si *SegmentInfo, err error) { si = new(SegmentInfo) fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION) input, err := dir.OpenInput(fileName, context) if err != nil { return nil, err } success := false defer func() { if !success { util.CloseWhileSuppressingError(input) } else { input.Close() } }() _, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT) if err != nil { return nil, err } versionStr, err := input.ReadString() if err != nil { return nil, err } version, err := util.ParseVersion(versionStr) if err != nil { return nil, err } docCount, err := input.ReadInt() if err != nil { return nil, err } if docCount < 0 { return nil, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input)) } sicf, err := input.ReadByte() if err != nil { return nil, err } isCompoundFile := (sicf == SEGMENT_INFO_YES) diagnostics, err := input.ReadStringStringMap() if err != nil { return nil, err } _, err = input.ReadStringStringMap() // read deprecated attributes if err != nil { return nil, err } files, err := input.ReadStringSet() if err != nil { return nil, err } if err = codec.CheckEOF(input); err != nil { return nil, err } si = NewSegmentInfo(dir, version, segment, int(docCount), isCompoundFile, nil, diagnostics) si.SetFiles(files) success = true return si, nil }