/* Read a particular segmentFileName. Note that this may return IO error if a commit is in process. */ func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error) { // fmt.Printf("Reading segment info from %v...\n", segmentFileName) // Clear any previous segments: sis.Clear() sis.generation = GenerationFromSegmentsFileName(segmentFileName) sis.lastGeneration = sis.generation var input store.ChecksumIndexInput if input, err = directory.OpenChecksumInput(segmentFileName, store.IO_CONTEXT_READ); err != nil { return } var success = false defer func() { if !success { // Clear any segment infos we had loaded so we // have a clean slate on retry: sis.Clear() util.CloseWhileSuppressingError(input) } else { err = input.Close() } }() var format int if format, err = asInt(input.ReadInt()); err != nil { return } var actualFormat int if format == codec.CODEC_MAGIC { // 4.0+ if actualFormat, err = asInt(codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_49)); err != nil { return } if sis.version, err = input.ReadLong(); err != nil { return } if sis.counter, err = asInt(input.ReadInt()); err != nil { return } var numSegments int if numSegments, err = asInt(input.ReadInt()); err != nil { return } else if numSegments < 0 { return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input)) } var segName, codecName string var fCodec Codec var delGen, fieldInfosGen, dvGen int64 var delCount int for seg := 0; seg < numSegments; seg++ { if segName, err = input.ReadString(); err != nil { return } if codecName, err = input.ReadString(); err != nil { return } fCodec = LoadCodec(codecName) assert2(fCodec != nil, "Invalid codec name: %v", codecName) // fmt.Printf("SIS.read seg=%v codec=%v\n", seg, fCodec) var info *SegmentInfo if info, err = fCodec.SegmentInfoFormat().SegmentInfoReader().Read(directory, segName, store.IO_CONTEXT_READ); err != nil { return } info.SetCodec(fCodec) if delGen, err = input.ReadLong(); err != nil { return } if delCount, err = asInt(input.ReadInt()); err != nil { return } else if delCount < 0 || delCount > info.DocCount() { return errors.New(fmt.Sprintf( "invalid deletion count: %v vs docCount=%v (resource: %v)", delCount, info.DocCount(), input)) } fieldInfosGen = -1 if actualFormat >= VERSION_46 { if fieldInfosGen, err = input.ReadLong(); err != nil { return } } dvGen = -1 if actualFormat >= VERSION_49 { if dvGen, err = input.ReadLong(); err != nil { return } } else { dvGen = fieldInfosGen } siPerCommit := NewSegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen) if actualFormat >= VERSION_46 { if actualFormat < VERSION_49 { panic("not implemented yet") } else { var ss map[string]bool if ss, err = input.ReadStringSet(); err != nil { return err } siPerCommit.SetFieldInfosFiles(ss) var dvUpdatesFiles map[int]map[string]bool var numDVFields int if numDVFields, err = asInt(input.ReadInt()); err != nil { return err } if numDVFields == 0 { dvUpdatesFiles = make(map[int]map[string]bool) } else { panic("not implemented yet") } siPerCommit.SetDocValuesUpdatesFiles(dvUpdatesFiles) } } sis.Segments = append(sis.Segments, siPerCommit) } if sis.userData, err = input.ReadStringStringMap(); err != nil { return err } } else { // TODO support <4.0 index panic("Index format pre-4.0 not supported yet") } if actualFormat >= VERSION_48 { if _, err = codec.CheckFooter(input); err != nil { return } } else { var checksumNow = int64(input.Checksum()) var checksumThen int64 if checksumThen, err = input.ReadLong(); err != nil { return } if checksumNow != checksumThen { return errors.New(fmt.Sprintf( "checksum mismatch in segments file: %v vs %v (resource: %v)", checksumNow, checksumThen, input)) } if err = codec.CheckEOF(input); err != nil { return } } success = true return nil }
// TODO support IndexCommit func (fsf *FindSegmentsFile) run(commit IndexCommit) (interface{}, error) { // fmt.Println("Finding segments file...") if commit != nil { if fsf.directory != commit.Directory() { return nil, errors.New("the specified commit does not match the specified Directory") } return fsf.doBody(commit.SegmentsFileName()) } lastGen := int64(-1) gen := int64(0) genLookaheadCount := 0 var exc error retryCount := 0 useFirstMethod := true // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely // means a commit was in process and has finished, in // the time it took us to load the now-old infos files // (and segments files). It's also possible it's a // true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on // which generation we are trying to load. If we // don't, then the original error is real and we throw // it. // We have three methods for determining the current // generation. We try the first two in parallel (when // useFirstMethod is true), and fall back to the third // when necessary. for { // fmt.Println("Trying...") if useFirstMethod { // fmt.Println("Trying first method...") // List the directory and use the highest // segments_N file. This method works well as long // as there is no stale caching on the directory // contents (NOTE: NFS clients often have such stale // caching): genA := int64(-1) files, err := fsf.directory.ListAll() if err != nil { return nil, err } if files != nil { genA = LastCommitGeneration(files) } // message("directory listing genA=%v", genA) // Also open segments.gen and read its // contents. Then we take the larger of the two // gens. This way, if either approach is hitting // a stale cache (NFS) we have a better chance of // getting the right generation. genB := int64(-1) genInput, err := fsf.directory.OpenChecksumInput(INDEX_FILENAME_SEGMENTS_GEN, store.IO_CONTEXT_READ) if err != nil { message("segments.gen open: %v", err) } else { defer genInput.Close() // fmt.Println("Reading segments info...") var version int32 if version, err = genInput.ReadInt(); err != nil { return nil, err } // fmt.Printf("Version: %v\n", version) if version == FORMAT_SEGMENTS_GEN_47 || version == FORMAT_SEGMENTS_GEN_CURRENT { // fmt.Println("Version is current.") var gen0, gen1 int64 if gen0, err = genInput.ReadLong(); err != nil { return nil, err } if gen1, err = genInput.ReadLong(); err != nil { return nil, err } message("fallback check: %v; %v", gen0, gen1) if version == FORMAT_SEGMENTS_GEN_CHECKSUM { if _, err = codec.CheckFooter(genInput); err != nil { return nil, err } } else { if err = codec.CheckEOF(genInput); err != nil { return nil, err } } if gen0 == gen1 { // The file is consistent. genB = gen0 } } else { return nil, codec.NewIndexFormatTooNewError(genInput, version, FORMAT_SEGMENTS_GEN_CURRENT, FORMAT_SEGMENTS_GEN_CURRENT) } } message("%v check: genB=%v", INDEX_FILENAME_SEGMENTS_GEN, genB) // Pick the larger of the two gen's: gen = genA if genB > gen { gen = genB } if gen == -1 { // Neither approach found a generation return nil, errors.New(fmt.Sprintf("no segments* file found in %v: files: %#v", fsf.directory, files)) } } if useFirstMethod && lastGen == gen && retryCount >= 2 { // Give up on first method -- this is 3rd cycle on // listing directory and checking gen file to // attempt to locate the segments file. useFirstMethod = false } // Second method: since both directory cache and // file contents cache seem to be stale, just // advance the generation. if !useFirstMethod { if genLookaheadCount < fsf.defaultGenLookaheadCount { gen++ genLookaheadCount++ message("look ahead increment gen to %v", gen) } else { // All attempts have failed -- throw first exc: return nil, exc } } else if lastGen == gen { // This means we're about to try the same // segments_N last tried. retryCount++ } else { // Segment file has advanced since our last loop // (we made "progress"), so reset retryCount: retryCount = 0 } lastGen = gen segmentFileName := util.FileNameFromGeneration(INDEX_FILENAME_SEGMENTS, "", gen) // fmt.Printf("SegmentFileName: %v\n", segmentFileName) var v interface{} var err error if v, err = fsf.doBody(segmentFileName); err == nil { message("success on %v", segmentFileName) return v, nil } // Save the original root cause: if exc == nil { exc = err } message("primary Exception on '%v': %v; will retry: retryCount = %v; gen = %v", segmentFileName, err, retryCount, gen) if gen > 1 && useFirstMethod && retryCount == 1 { // This is our second time trying this same segments // file (because retryCount is 1), and, there is // possibly a segments_(N-1) (because gen > 1). // So, check if the segments_(N-1) exists and // try it if so: prevSegmentFileName := util.FileNameFromGeneration(INDEX_FILENAME_SEGMENTS, "", gen-1) if prevExists := fsf.directory.FileExists(prevSegmentFileName); prevExists { message("fallback to prior segment file '%v'", prevSegmentFileName) if v, err = fsf.doBody(prevSegmentFileName); err != nil { message("secondary Exception on '%v': %v; will retry", prevSegmentFileName, err) } else { message("success on fallback %v", prevSegmentFileName) return v, nil } } } } }
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.ChecksumIndexInput success := false defer func() { if !success { util.CloseWhileSuppressingError(r, indexStream) } }() indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) // Load the index into memory if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil { return nil, err } var maxPointer int64 = -1 if r.version >= VERSION_CHECKSUM { if maxPointer, err = indexStream.ReadVLong(); err != nil { return nil, err } if _, err = codec.CheckFooter(indexStream); err != nil { return nil, err } } else { if err = codec.CheckEOF(indexStream); err != nil { return nil, err } } if err = indexStream.Close(); err != nil { return nil, err } indexStream = nil // Open the data file and read metadata if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil { return nil, err } if r.version >= VERSION_CHECKSUM { if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() { return nil, errors.New(fmt.Sprintf( "Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v", maxPointer, r.fieldsStream.Length())) } } else { maxPointer = r.fieldsStream.Length() } r.maxPointer = maxPointer codecNameDat := formatName + CODEC_SFX_DAT var fieldsVersion int if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert2(r.version == fieldsVersion, "Version mismatch between stored fields index and data: %v != %v", r.version, fieldsVersion) assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer()) r.chunkSize = -1 if r.version >= VERSION_BIG_CHUNKS { if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } } if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) if r.version >= VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil { return nil, err } } success = true return r, nil }
return } if attributes, err = input.ReadStringStringMap(); err != nil { return } infos = append(infos, NewFieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, attributes)) } if codecVersion >= FI_FORMAT_CHECKSUM { if _, err = codec.CheckFooter(input); err != nil { return } } else { if err = codec.CheckEOF(input); err != nil { return } } fis = NewFieldInfos(infos) success = true return fis, nil } func getDocValuesType(input store.IndexInput, b byte) (t DocValuesType, err error) { switch b { case 0: return DocValuesType(0), nil case 1: return DOC_VALUES_TYPE_NUMERIC, nil case 2:
func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) { var stream IndexInput = nil var entriesStream ChecksumIndexInput = nil // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) var success = false defer func() { if success { err = util.Close(stream, entriesStream) } else { util.CloseWhileSuppressingError(stream, entriesStream) } }() stream = handle.Clone() // fmt.Printf("Reading from stream: %v\n", stream) firstInt, err := stream.ReadVInt() if err != nil { return nil, err } // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if firstInt == int32(CODEC_MAGIC_BYTE1) { if secondByte, err := stream.ReadByte(); err == nil { if thirdByte, err := stream.ReadByte(); err == nil { if fourthByte, err := stream.ReadByte(); err == nil { if secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4 { return nil, errors.New(fmt.Sprintf( "Illegal/impossible header for CFS file: %v,%v,%v", secondByte, thirdByte, fourthByte)) } } } } if err != nil { return nil, err } d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)) if err != nil { return nil, err } entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION) entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE) if err != nil { return nil, err } _, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT) if err != nil { return nil, err } numEntries, err := entriesStream.ReadVInt() if err != nil { return nil, err } mapping = make(map[string]FileSlice) // fmt.Printf("Entries number: %v\n", numEntries) for i := int32(0); i < numEntries; i++ { id, err := entriesStream.ReadString() if err != nil { return nil, err } if _, ok := mapping[id]; ok { return nil, errors.New(fmt.Sprintf( "Duplicate cfs entry id=%v in CFS: %v", id, entriesStream)) } // log.Printf("Found entry: %v", id) offset, err := entriesStream.ReadLong() if err != nil { return nil, err } length, err := entriesStream.ReadLong() if err != nil { return nil, err } mapping[id] = FileSlice{offset, length} } if d.version >= CFD_VERSION_CHECKSUM { _, err = codec.CheckFooter(entriesStream) } else { err = codec.CheckEOF(entriesStream) } if err != nil { return nil, err } } else { // TODO remove once 3.x is not supported anymore panic("not supported yet; will also be obsolete soon") } success = true return mapping, nil }
func (r *Lucene40SegmentInfoReader) Read(dir store.Directory, segment string, context store.IOContext) (si *SegmentInfo, err error) { si = new(SegmentInfo) fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION) input, err := dir.OpenInput(fileName, context) if err != nil { return nil, err } success := false defer func() { if !success { util.CloseWhileSuppressingError(input) } else { input.Close() } }() _, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT) if err != nil { return nil, err } versionStr, err := input.ReadString() if err != nil { return nil, err } version, err := util.ParseVersion(versionStr) if err != nil { return nil, err } docCount, err := input.ReadInt() if err != nil { return nil, err } if docCount < 0 { return nil, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input)) } sicf, err := input.ReadByte() if err != nil { return nil, err } isCompoundFile := (sicf == SEGMENT_INFO_YES) diagnostics, err := input.ReadStringStringMap() if err != nil { return nil, err } _, err = input.ReadStringStringMap() // read deprecated attributes if err != nil { return nil, err } files, err := input.ReadStringSet() if err != nil { return nil, err } if err = codec.CheckEOF(input); err != nil { return nil, err } si = NewSegmentInfo(dir, version, segment, int(docCount), isCompoundFile, nil, diagnostics) si.SetFiles(files) success = true return si, nil }