Exemplo n.º 1
0
/*
Read a particular segmentFileName. Note that this may return IO error
if a commit is in process.
*/
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error) {
	// fmt.Printf("Reading segment info from %v...\n", segmentFileName)

	// Clear any previous segments:
	sis.Clear()

	sis.generation = GenerationFromSegmentsFileName(segmentFileName)
	sis.lastGeneration = sis.generation

	var input store.ChecksumIndexInput
	if input, err = directory.OpenChecksumInput(segmentFileName, store.IO_CONTEXT_READ); err != nil {
		return
	}

	var success = false
	defer func() {
		if !success {
			// Clear any segment infos we had loaded so we
			// have a clean slate on retry:
			sis.Clear()
			util.CloseWhileSuppressingError(input)
		} else {
			err = input.Close()
		}
	}()

	var format int
	if format, err = asInt(input.ReadInt()); err != nil {
		return
	}

	var actualFormat int
	if format == codec.CODEC_MAGIC {
		// 4.0+
		if actualFormat, err = asInt(codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_49)); err != nil {
			return
		}
		if sis.version, err = input.ReadLong(); err != nil {
			return
		}
		if sis.counter, err = asInt(input.ReadInt()); err != nil {
			return
		}
		var numSegments int
		if numSegments, err = asInt(input.ReadInt()); err != nil {
			return
		} else if numSegments < 0 {
			return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input))
		}
		var segName, codecName string
		var fCodec Codec
		var delGen, fieldInfosGen, dvGen int64
		var delCount int
		for seg := 0; seg < numSegments; seg++ {
			if segName, err = input.ReadString(); err != nil {
				return
			}
			if codecName, err = input.ReadString(); err != nil {
				return
			}
			fCodec = LoadCodec(codecName)
			assert2(fCodec != nil, "Invalid codec name: %v", codecName)
			// fmt.Printf("SIS.read seg=%v codec=%v\n", seg, fCodec)
			var info *SegmentInfo
			if info, err = fCodec.SegmentInfoFormat().SegmentInfoReader().Read(directory, segName, store.IO_CONTEXT_READ); err != nil {
				return
			}
			info.SetCodec(fCodec)
			if delGen, err = input.ReadLong(); err != nil {
				return
			}
			if delCount, err = asInt(input.ReadInt()); err != nil {
				return
			} else if delCount < 0 || delCount > info.DocCount() {
				return errors.New(fmt.Sprintf(
					"invalid deletion count: %v vs docCount=%v (resource: %v)",
					delCount, info.DocCount(), input))
			}
			fieldInfosGen = -1
			if actualFormat >= VERSION_46 {
				if fieldInfosGen, err = input.ReadLong(); err != nil {
					return
				}
			}
			dvGen = -1
			if actualFormat >= VERSION_49 {
				if dvGen, err = input.ReadLong(); err != nil {
					return
				}
			} else {
				dvGen = fieldInfosGen
			}
			siPerCommit := NewSegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen)
			if actualFormat >= VERSION_46 {
				if actualFormat < VERSION_49 {
					panic("not implemented yet")
				} else {
					var ss map[string]bool
					if ss, err = input.ReadStringSet(); err != nil {
						return err
					}
					siPerCommit.SetFieldInfosFiles(ss)
					var dvUpdatesFiles map[int]map[string]bool
					var numDVFields int
					if numDVFields, err = asInt(input.ReadInt()); err != nil {
						return err
					}
					if numDVFields == 0 {
						dvUpdatesFiles = make(map[int]map[string]bool)
					} else {
						panic("not implemented yet")
					}
					siPerCommit.SetDocValuesUpdatesFiles(dvUpdatesFiles)
				}
			}
			sis.Segments = append(sis.Segments, siPerCommit)
		}
		if sis.userData, err = input.ReadStringStringMap(); err != nil {
			return err
		}
	} else {
		// TODO support <4.0 index
		panic("Index format pre-4.0 not supported yet")
	}

	if actualFormat >= VERSION_48 {
		if _, err = codec.CheckFooter(input); err != nil {
			return
		}
	} else {
		var checksumNow = int64(input.Checksum())
		var checksumThen int64
		if checksumThen, err = input.ReadLong(); err != nil {
			return
		}
		if checksumNow != checksumThen {
			return errors.New(fmt.Sprintf(
				"checksum mismatch in segments file: %v vs %v (resource: %v)",
				checksumNow, checksumThen, input))
		}
		if err = codec.CheckEOF(input); err != nil {
			return
		}
	}

	success = true
	return nil
}
Exemplo n.º 2
0
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.ChecksumIndexInput
	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(r, indexStream)
		}
	}()

	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	// Load the index into memory
	if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx,
		VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer())
	if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil {
		return nil, err
	}

	var maxPointer int64 = -1

	if r.version >= VERSION_CHECKSUM {
		if maxPointer, err = indexStream.ReadVLong(); err != nil {
			return nil, err
		}
		if _, err = codec.CheckFooter(indexStream); err != nil {
			return nil, err
		}
	} else {
		if err = codec.CheckEOF(indexStream); err != nil {
			return nil, err
		}
	}

	if err = indexStream.Close(); err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil {
		return nil, err
	}
	if r.version >= VERSION_CHECKSUM {
		if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() {
			return nil, errors.New(fmt.Sprintf(
				"Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v",
				maxPointer, r.fieldsStream.Length()))
		}
	} else {
		maxPointer = r.fieldsStream.Length()
	}
	r.maxPointer = maxPointer
	codecNameDat := formatName + CODEC_SFX_DAT
	var fieldsVersion int
	if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream,
		codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert2(r.version == fieldsVersion,
		"Version mismatch between stored fields index and data: %v != %v",
		r.version, fieldsVersion)
	assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer())

	r.chunkSize = -1
	if r.version >= VERSION_BIG_CHUNKS {
		if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return nil, err
		}
	}

	if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
		return nil, err
	}
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	if r.version >= VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but fo rnow we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MATIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil {
			return nil, err
		}
	}

	success = true
	return r, nil
}