Ejemplo n.º 1
0
/*
Read a particular segmentFileName. Note that this may return IO error
if a commit is in process.
*/
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error) {
	// fmt.Printf("Reading segment info from %v...\n", segmentFileName)

	// Clear any previous segments:
	sis.Clear()

	sis.generation = GenerationFromSegmentsFileName(segmentFileName)
	sis.lastGeneration = sis.generation

	var input store.ChecksumIndexInput
	if input, err = directory.OpenChecksumInput(segmentFileName, store.IO_CONTEXT_READ); err != nil {
		return
	}

	var success = false
	defer func() {
		if !success {
			// Clear any segment infos we had loaded so we
			// have a clean slate on retry:
			sis.Clear()
			util.CloseWhileSuppressingError(input)
		} else {
			err = input.Close()
		}
	}()

	var format int
	if format, err = asInt(input.ReadInt()); err != nil {
		return
	}

	var actualFormat int
	if format == codec.CODEC_MAGIC {
		// 4.0+
		if actualFormat, err = asInt(codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_49)); err != nil {
			return
		}
		if sis.version, err = input.ReadLong(); err != nil {
			return
		}
		if sis.counter, err = asInt(input.ReadInt()); err != nil {
			return
		}
		var numSegments int
		if numSegments, err = asInt(input.ReadInt()); err != nil {
			return
		} else if numSegments < 0 {
			return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input))
		}
		var segName, codecName string
		var fCodec Codec
		var delGen, fieldInfosGen, dvGen int64
		var delCount int
		for seg := 0; seg < numSegments; seg++ {
			if segName, err = input.ReadString(); err != nil {
				return
			}
			if codecName, err = input.ReadString(); err != nil {
				return
			}
			fCodec = LoadCodec(codecName)
			assert2(fCodec != nil, "Invalid codec name: %v", codecName)
			// fmt.Printf("SIS.read seg=%v codec=%v\n", seg, fCodec)
			var info *SegmentInfo
			if info, err = fCodec.SegmentInfoFormat().SegmentInfoReader().Read(directory, segName, store.IO_CONTEXT_READ); err != nil {
				return
			}
			info.SetCodec(fCodec)
			if delGen, err = input.ReadLong(); err != nil {
				return
			}
			if delCount, err = asInt(input.ReadInt()); err != nil {
				return
			} else if delCount < 0 || delCount > info.DocCount() {
				return errors.New(fmt.Sprintf(
					"invalid deletion count: %v vs docCount=%v (resource: %v)",
					delCount, info.DocCount(), input))
			}
			fieldInfosGen = -1
			if actualFormat >= VERSION_46 {
				if fieldInfosGen, err = input.ReadLong(); err != nil {
					return
				}
			}
			dvGen = -1
			if actualFormat >= VERSION_49 {
				if dvGen, err = input.ReadLong(); err != nil {
					return
				}
			} else {
				dvGen = fieldInfosGen
			}
			siPerCommit := NewSegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen)
			if actualFormat >= VERSION_46 {
				if actualFormat < VERSION_49 {
					panic("not implemented yet")
				} else {
					var ss map[string]bool
					if ss, err = input.ReadStringSet(); err != nil {
						return err
					}
					siPerCommit.SetFieldInfosFiles(ss)
					var dvUpdatesFiles map[int]map[string]bool
					var numDVFields int
					if numDVFields, err = asInt(input.ReadInt()); err != nil {
						return err
					}
					if numDVFields == 0 {
						dvUpdatesFiles = make(map[int]map[string]bool)
					} else {
						panic("not implemented yet")
					}
					siPerCommit.SetDocValuesUpdatesFiles(dvUpdatesFiles)
				}
			}
			sis.Segments = append(sis.Segments, siPerCommit)
		}
		if sis.userData, err = input.ReadStringStringMap(); err != nil {
			return err
		}
	} else {
		// TODO support <4.0 index
		panic("Index format pre-4.0 not supported yet")
	}

	if actualFormat >= VERSION_48 {
		if _, err = codec.CheckFooter(input); err != nil {
			return
		}
	} else {
		var checksumNow = int64(input.Checksum())
		var checksumThen int64
		if checksumThen, err = input.ReadLong(); err != nil {
			return
		}
		if checksumNow != checksumThen {
			return errors.New(fmt.Sprintf(
				"checksum mismatch in segments file: %v vs %v (resource: %v)",
				checksumNow, checksumThen, input))
		}
		if err = codec.CheckEOF(input); err != nil {
			return
		}
	}

	success = true
	return nil
}
Ejemplo n.º 2
0
// TODO support IndexCommit
func (fsf *FindSegmentsFile) run(commit IndexCommit) (interface{}, error) {
	// fmt.Println("Finding segments file...")
	if commit != nil {
		if fsf.directory != commit.Directory() {
			return nil, errors.New("the specified commit does not match the specified Directory")
		}
		return fsf.doBody(commit.SegmentsFileName())
	}

	lastGen := int64(-1)
	gen := int64(0)
	genLookaheadCount := 0
	var exc error
	retryCount := 0

	useFirstMethod := true

	// Loop until we succeed in calling doBody() without
	// hitting an IOException.  An IOException most likely
	// means a commit was in process and has finished, in
	// the time it took us to load the now-old infos files
	// (and segments files).  It's also possible it's a
	// true error (corrupt index).  To distinguish these,
	// on each retry we must see "forward progress" on
	// which generation we are trying to load.  If we
	// don't, then the original error is real and we throw
	// it.

	// We have three methods for determining the current
	// generation.  We try the first two in parallel (when
	// useFirstMethod is true), and fall back to the third
	// when necessary.

	for {
		// fmt.Println("Trying...")
		if useFirstMethod {
			// fmt.Println("Trying first method...")
			// List the directory and use the highest
			// segments_N file.  This method works well as long
			// as there is no stale caching on the directory
			// contents (NOTE: NFS clients often have such stale
			// caching):
			genA := int64(-1)

			files, err := fsf.directory.ListAll()
			if err != nil {
				return nil, err
			}
			if files != nil {
				genA = LastCommitGeneration(files)
			}

			// message("directory listing genA=%v", genA)

			// Also open segments.gen and read its
			// contents.  Then we take the larger of the two
			// gens.  This way, if either approach is hitting
			// a stale cache (NFS) we have a better chance of
			// getting the right generation.
			genB := int64(-1)
			genInput, err := fsf.directory.OpenChecksumInput(INDEX_FILENAME_SEGMENTS_GEN, store.IO_CONTEXT_READ)
			if err != nil {
				message("segments.gen open: %v", err)
			} else {
				defer genInput.Close()
				// fmt.Println("Reading segments info...")

				var version int32
				if version, err = genInput.ReadInt(); err != nil {
					return nil, err
				}
				// fmt.Printf("Version: %v\n", version)
				if version == FORMAT_SEGMENTS_GEN_47 || version == FORMAT_SEGMENTS_GEN_CURRENT {
					// fmt.Println("Version is current.")
					var gen0, gen1 int64
					if gen0, err = genInput.ReadLong(); err != nil {
						return nil, err
					}
					if gen1, err = genInput.ReadLong(); err != nil {
						return nil, err
					}
					message("fallback check: %v; %v", gen0, gen1)
					if version == FORMAT_SEGMENTS_GEN_CHECKSUM {
						if _, err = codec.CheckFooter(genInput); err != nil {
							return nil, err
						}
					} else {
						if err = codec.CheckEOF(genInput); err != nil {
							return nil, err
						}
					}
					if gen0 == gen1 {
						// The file is consistent.
						genB = gen0
					}
				} else {
					return nil, codec.NewIndexFormatTooNewError(genInput, version,
						FORMAT_SEGMENTS_GEN_CURRENT, FORMAT_SEGMENTS_GEN_CURRENT)
				}
			}

			message("%v check: genB=%v", INDEX_FILENAME_SEGMENTS_GEN, genB)

			// Pick the larger of the two gen's:
			gen = genA
			if genB > gen {
				gen = genB
			}

			if gen == -1 {
				// Neither approach found a generation
				return nil, errors.New(fmt.Sprintf("no segments* file found in %v: files: %#v", fsf.directory, files))
			}
		}

		if useFirstMethod && lastGen == gen && retryCount >= 2 {
			// Give up on first method -- this is 3rd cycle on
			// listing directory and checking gen file to
			// attempt to locate the segments file.
			useFirstMethod = false
		}

		// Second method: since both directory cache and
		// file contents cache seem to be stale, just
		// advance the generation.
		if !useFirstMethod {
			if genLookaheadCount < fsf.defaultGenLookaheadCount {
				gen++
				genLookaheadCount++
				message("look ahead increment gen to %v", gen)
			} else {
				// All attempts have failed -- throw first exc:
				return nil, exc
			}
		} else if lastGen == gen {
			// This means we're about to try the same
			// segments_N last tried.
			retryCount++
		} else {
			// Segment file has advanced since our last loop
			// (we made "progress"), so reset retryCount:
			retryCount = 0
		}

		lastGen = gen
		segmentFileName := util.FileNameFromGeneration(INDEX_FILENAME_SEGMENTS, "", gen)
		// fmt.Printf("SegmentFileName: %v\n", segmentFileName)

		var v interface{}
		var err error
		if v, err = fsf.doBody(segmentFileName); err == nil {
			message("success on %v", segmentFileName)
			return v, nil
		}
		// Save the original root cause:
		if exc == nil {
			exc = err
		}

		message("primary Exception on '%v': %v; will retry: retryCount = %v; gen = %v",
			segmentFileName, err, retryCount, gen)

		if gen > 1 && useFirstMethod && retryCount == 1 {
			// This is our second time trying this same segments
			// file (because retryCount is 1), and, there is
			// possibly a segments_(N-1) (because gen > 1).
			// So, check if the segments_(N-1) exists and
			// try it if so:
			prevSegmentFileName := util.FileNameFromGeneration(INDEX_FILENAME_SEGMENTS, "", gen-1)

			if prevExists := fsf.directory.FileExists(prevSegmentFileName); prevExists {
				message("fallback to prior segment file '%v'", prevSegmentFileName)
				if v, err = fsf.doBody(prevSegmentFileName); err != nil {
					message("secondary Exception on '%v': %v; will retry", prevSegmentFileName, err)
				} else {
					message("success on fallback %v", prevSegmentFileName)
					return v, nil
				}
			}
		}
	}
}
Ejemplo n.º 3
0
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.ChecksumIndexInput
	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(r, indexStream)
		}
	}()

	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	// Load the index into memory
	if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx,
		VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer())
	if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil {
		return nil, err
	}

	var maxPointer int64 = -1

	if r.version >= VERSION_CHECKSUM {
		if maxPointer, err = indexStream.ReadVLong(); err != nil {
			return nil, err
		}
		if _, err = codec.CheckFooter(indexStream); err != nil {
			return nil, err
		}
	} else {
		if err = codec.CheckEOF(indexStream); err != nil {
			return nil, err
		}
	}

	if err = indexStream.Close(); err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil {
		return nil, err
	}
	if r.version >= VERSION_CHECKSUM {
		if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() {
			return nil, errors.New(fmt.Sprintf(
				"Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v",
				maxPointer, r.fieldsStream.Length()))
		}
	} else {
		maxPointer = r.fieldsStream.Length()
	}
	r.maxPointer = maxPointer
	codecNameDat := formatName + CODEC_SFX_DAT
	var fieldsVersion int
	if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream,
		codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert2(r.version == fieldsVersion,
		"Version mismatch between stored fields index and data: %v != %v",
		r.version, fieldsVersion)
	assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer())

	r.chunkSize = -1
	if r.version >= VERSION_BIG_CHUNKS {
		if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return nil, err
		}
	}

	if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
		return nil, err
	}
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	if r.version >= VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but fo rnow we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MATIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil {
			return nil, err
		}
	}

	success = true
	return r, nil
}
Ejemplo n.º 4
0
			return
		}
		if attributes, err = input.ReadStringStringMap(); err != nil {
			return
		}
		infos = append(infos, NewFieldInfo(name, isIndexed, fieldNumber,
			storeTermVector, omitNorms, storePayloads, indexOptions,
			docValuesType, normsType, dvGen, attributes))
	}

	if codecVersion >= FI_FORMAT_CHECKSUM {
		if _, err = codec.CheckFooter(input); err != nil {
			return
		}
	} else {
		if err = codec.CheckEOF(input); err != nil {
			return
		}
	}
	fis = NewFieldInfos(infos)
	success = true
	return fis, nil
}

func getDocValuesType(input store.IndexInput, b byte) (t DocValuesType, err error) {
	switch b {
	case 0:
		return DocValuesType(0), nil
	case 1:
		return DOC_VALUES_TYPE_NUMERIC, nil
	case 2:
Ejemplo n.º 5
0
func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) {
	var stream IndexInput = nil
	var entriesStream ChecksumIndexInput = nil
	// read the first VInt. If it is negative, it's the version number
	// otherwise it's the count (pre-3.1 indexes)
	var success = false
	defer func() {
		if success {
			err = util.Close(stream, entriesStream)
		} else {
			util.CloseWhileSuppressingError(stream, entriesStream)
		}
	}()

	stream = handle.Clone()
	// fmt.Printf("Reading from stream: %v\n", stream)
	firstInt, err := stream.ReadVInt()
	if err != nil {
		return nil, err
	}
	// impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible
	// and separate norms/etc are outside of cfs.
	if firstInt == int32(CODEC_MAGIC_BYTE1) {
		if secondByte, err := stream.ReadByte(); err == nil {
			if thirdByte, err := stream.ReadByte(); err == nil {
				if fourthByte, err := stream.ReadByte(); err == nil {
					if secondByte != CODEC_MAGIC_BYTE2 ||
						thirdByte != CODEC_MAGIC_BYTE3 ||
						fourthByte != CODEC_MAGIC_BYTE4 {
						return nil, errors.New(fmt.Sprintf(
							"Illegal/impossible header for CFS file: %v,%v,%v",
							secondByte, thirdByte, fourthByte))
					}
				}
			}
		}
		if err != nil {
			return nil, err
		}

		d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT))
		if err != nil {
			return nil, err
		}
		entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION)
		entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE)
		if err != nil {
			return nil, err
		}
		_, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)
		if err != nil {
			return nil, err
		}
		numEntries, err := entriesStream.ReadVInt()
		if err != nil {
			return nil, err
		}

		mapping = make(map[string]FileSlice)
		// fmt.Printf("Entries number: %v\n", numEntries)
		for i := int32(0); i < numEntries; i++ {
			id, err := entriesStream.ReadString()
			if err != nil {
				return nil, err
			}
			if _, ok := mapping[id]; ok {
				return nil, errors.New(fmt.Sprintf(
					"Duplicate cfs entry id=%v in CFS: %v", id, entriesStream))
			}
			// log.Printf("Found entry: %v", id)
			offset, err := entriesStream.ReadLong()
			if err != nil {
				return nil, err
			}
			length, err := entriesStream.ReadLong()
			if err != nil {
				return nil, err
			}
			mapping[id] = FileSlice{offset, length}
		}
		if d.version >= CFD_VERSION_CHECKSUM {
			_, err = codec.CheckFooter(entriesStream)
		} else {
			err = codec.CheckEOF(entriesStream)
		}
		if err != nil {
			return nil, err
		}
	} else {
		// TODO remove once 3.x is not supported anymore
		panic("not supported yet; will also be obsolete soon")
	}
	success = true
	return mapping, nil
}
Ejemplo n.º 6
0
func (r *Lucene40SegmentInfoReader) Read(dir store.Directory,
	segment string, context store.IOContext) (si *SegmentInfo, err error) {

	si = new(SegmentInfo)
	fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION)
	input, err := dir.OpenInput(fileName, context)
	if err != nil {
		return nil, err
	}

	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	_, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	versionStr, err := input.ReadString()
	if err != nil {
		return nil, err
	}
	version, err := util.ParseVersion(versionStr)
	if err != nil {
		return nil, err
	}

	docCount, err := input.ReadInt()
	if err != nil {
		return nil, err
	}
	if docCount < 0 {
		return nil, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input))
	}
	sicf, err := input.ReadByte()
	if err != nil {
		return nil, err
	}
	isCompoundFile := (sicf == SEGMENT_INFO_YES)
	diagnostics, err := input.ReadStringStringMap()
	if err != nil {
		return nil, err
	}
	_, err = input.ReadStringStringMap() // read deprecated attributes
	if err != nil {
		return nil, err
	}
	files, err := input.ReadStringSet()
	if err != nil {
		return nil, err
	}

	if err = codec.CheckEOF(input); err != nil {
		return nil, err
	}

	si = NewSegmentInfo(dir, version, segment, int(docCount), isCompoundFile, nil, diagnostics)
	si.SetFiles(files)

	success = true
	return si, nil
}