Esempio n. 1
0
func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) {
	var stream IndexInput = nil
	var entriesStream ChecksumIndexInput = nil
	// read the first VInt. If it is negative, it's the version number
	// otherwise it's the count (pre-3.1 indexes)
	var success = false
	defer func() {
		if success {
			err = util.Close(stream, entriesStream)
		} else {
			util.CloseWhileSuppressingError(stream, entriesStream)
		}
	}()

	stream = handle.Clone()
	// fmt.Printf("Reading from stream: %v\n", stream)
	firstInt, err := stream.ReadVInt()
	if err != nil {
		return nil, err
	}
	// impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible
	// and separate norms/etc are outside of cfs.
	if firstInt == int32(CODEC_MAGIC_BYTE1) {
		if secondByte, err := stream.ReadByte(); err == nil {
			if thirdByte, err := stream.ReadByte(); err == nil {
				if fourthByte, err := stream.ReadByte(); err == nil {
					if secondByte != CODEC_MAGIC_BYTE2 ||
						thirdByte != CODEC_MAGIC_BYTE3 ||
						fourthByte != CODEC_MAGIC_BYTE4 {
						return nil, errors.New(fmt.Sprintf(
							"Illegal/impossible header for CFS file: %v,%v,%v",
							secondByte, thirdByte, fourthByte))
					}
				}
			}
		}
		if err != nil {
			return nil, err
		}

		d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT))
		if err != nil {
			return nil, err
		}
		entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION)
		entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE)
		if err != nil {
			return nil, err
		}
		_, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)
		if err != nil {
			return nil, err
		}
		numEntries, err := entriesStream.ReadVInt()
		if err != nil {
			return nil, err
		}

		mapping = make(map[string]FileSlice)
		// fmt.Printf("Entries number: %v\n", numEntries)
		for i := int32(0); i < numEntries; i++ {
			id, err := entriesStream.ReadString()
			if err != nil {
				return nil, err
			}
			if _, ok := mapping[id]; ok {
				return nil, errors.New(fmt.Sprintf(
					"Duplicate cfs entry id=%v in CFS: %v", id, entriesStream))
			}
			// log.Printf("Found entry: %v", id)
			offset, err := entriesStream.ReadLong()
			if err != nil {
				return nil, err
			}
			length, err := entriesStream.ReadLong()
			if err != nil {
				return nil, err
			}
			mapping[id] = FileSlice{offset, length}
		}
		if d.version >= CFD_VERSION_CHECKSUM {
			_, err = codec.CheckFooter(entriesStream)
		} else {
			err = codec.CheckEOF(entriesStream)
		}
		if err != nil {
			return nil, err
		}
	} else {
		// TODO remove once 3.x is not supported anymore
		panic("not supported yet; will also be obsolete soon")
	}
	success = true
	return mapping, nil
}
Esempio n. 2
0
/*
Read a particular segmentFileName. Note that this may return IO error
if a commit is in process.
*/
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error) {
	// fmt.Printf("Reading segment info from %v...\n", segmentFileName)

	// Clear any previous segments:
	sis.Clear()

	sis.generation = GenerationFromSegmentsFileName(segmentFileName)
	sis.lastGeneration = sis.generation

	var input store.ChecksumIndexInput
	if input, err = directory.OpenChecksumInput(segmentFileName, store.IO_CONTEXT_READ); err != nil {
		return
	}

	var success = false
	defer func() {
		if !success {
			// Clear any segment infos we had loaded so we
			// have a clean slate on retry:
			sis.Clear()
			util.CloseWhileSuppressingError(input)
		} else {
			err = input.Close()
		}
	}()

	var format int
	if format, err = asInt(input.ReadInt()); err != nil {
		return
	}

	var actualFormat int
	if format == codec.CODEC_MAGIC {
		// 4.0+
		if actualFormat, err = asInt(codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_49)); err != nil {
			return
		}
		if sis.version, err = input.ReadLong(); err != nil {
			return
		}
		if sis.counter, err = asInt(input.ReadInt()); err != nil {
			return
		}
		var numSegments int
		if numSegments, err = asInt(input.ReadInt()); err != nil {
			return
		} else if numSegments < 0 {
			return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input))
		}
		var segName, codecName string
		var fCodec Codec
		var delGen, fieldInfosGen, dvGen int64
		var delCount int
		for seg := 0; seg < numSegments; seg++ {
			if segName, err = input.ReadString(); err != nil {
				return
			}
			if codecName, err = input.ReadString(); err != nil {
				return
			}
			fCodec = LoadCodec(codecName)
			assert2(fCodec != nil, "Invalid codec name: %v", codecName)
			// fmt.Printf("SIS.read seg=%v codec=%v\n", seg, fCodec)
			var info *SegmentInfo
			if info, err = fCodec.SegmentInfoFormat().SegmentInfoReader().Read(directory, segName, store.IO_CONTEXT_READ); err != nil {
				return
			}
			info.SetCodec(fCodec)
			if delGen, err = input.ReadLong(); err != nil {
				return
			}
			if delCount, err = asInt(input.ReadInt()); err != nil {
				return
			} else if delCount < 0 || delCount > info.DocCount() {
				return errors.New(fmt.Sprintf(
					"invalid deletion count: %v vs docCount=%v (resource: %v)",
					delCount, info.DocCount(), input))
			}
			fieldInfosGen = -1
			if actualFormat >= VERSION_46 {
				if fieldInfosGen, err = input.ReadLong(); err != nil {
					return
				}
			}
			dvGen = -1
			if actualFormat >= VERSION_49 {
				if dvGen, err = input.ReadLong(); err != nil {
					return
				}
			} else {
				dvGen = fieldInfosGen
			}
			siPerCommit := NewSegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen)
			if actualFormat >= VERSION_46 {
				if actualFormat < VERSION_49 {
					panic("not implemented yet")
				} else {
					var ss map[string]bool
					if ss, err = input.ReadStringSet(); err != nil {
						return err
					}
					siPerCommit.SetFieldInfosFiles(ss)
					var dvUpdatesFiles map[int]map[string]bool
					var numDVFields int
					if numDVFields, err = asInt(input.ReadInt()); err != nil {
						return err
					}
					if numDVFields == 0 {
						dvUpdatesFiles = make(map[int]map[string]bool)
					} else {
						panic("not implemented yet")
					}
					siPerCommit.SetDocValuesUpdatesFiles(dvUpdatesFiles)
				}
			}
			sis.Segments = append(sis.Segments, siPerCommit)
		}
		if sis.userData, err = input.ReadStringStringMap(); err != nil {
			return err
		}
	} else {
		// TODO support <4.0 index
		panic("Index format pre-4.0 not supported yet")
	}

	if actualFormat >= VERSION_48 {
		if _, err = codec.CheckFooter(input); err != nil {
			return
		}
	} else {
		var checksumNow = int64(input.Checksum())
		var checksumThen int64
		if checksumThen, err = input.ReadLong(); err != nil {
			return
		}
		if checksumNow != checksumThen {
			return errors.New(fmt.Sprintf(
				"checksum mismatch in segments file: %v vs %v (resource: %v)",
				checksumNow, checksumThen, input))
		}
		if err = codec.CheckEOF(input); err != nil {
			return
		}
	}

	success = true
	return nil
}
Esempio n. 3
0
/*
Read a particular segmentFileName. Note that this may return IO error
if a commit is in process.
*/
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) error {
	log.Printf("Reading segment info from %v...", segmentFileName)
	success := false

	// Clear any previous segments:
	sis.Clear()

	sis.generation = GenerationFromSegmentsFileName(segmentFileName)
	sis.lastGeneration = sis.generation

	main, err := directory.OpenInput(segmentFileName, store.IO_CONTEXT_READ)
	if err != nil {
		return err
	}
	input := store.NewChecksumIndexInput(main)
	defer func() {
		if !success {
			// Clear any segment infos we had loaded so we
			// have a clean slate on retry:
			sis.Clear()
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	format, err := input.ReadInt()
	if err != nil {
		return err
	}
	if format == codec.CODEC_MAGIC {
		// 4.0+
		_, err = codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_40)
		if err != nil {
			return err
		}
		sis.version, err = input.ReadLong()
		if err != nil {
			return err
		}
		sis.counter, err = asInt(input.ReadInt())
		if err != nil {
			return err
		}
		numSegments, err := asInt(input.ReadInt())
		if err != nil {
			return err
		}
		if numSegments < 0 {
			return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input))
		}
		for seg := 0; seg < numSegments; seg++ {
			segName, err := input.ReadString()
			if err != nil {
				return err
			}
			codecName, err := input.ReadString()
			if err != nil {
				return err
			}
			if codecName != "Lucene42" {
				log.Panicf("Not supported yet: %v", codecName)
			}
			fCodec := LoadCodec(codecName)
			log.Printf("SIS.read seg=%v codec=%v", seg, fCodec)
			info, err := fCodec.SegmentInfoFormat().SegmentInfoReader()(directory, segName, store.IO_CONTEXT_READ)
			// method := NewLucene42Codec()
			// info, err := method.ReadSegmentInfo(directory, segName, store.IO_CONTEXT_READ)
			if err != nil {
				return err
			}
			// info.codec = method
			info.SetCodec(fCodec)
			delGen, err := input.ReadLong()
			if err != nil {
				return err
			}
			delCount, err := asInt(input.ReadInt())
			if err != nil {
				return err
			}
			if delCount < 0 || delCount > info.DocCount() {
				return errors.New(fmt.Sprintf("invalid deletion count: %v (resource: %v)", delCount, input))
			}
			sis.Segments = append(sis.Segments, NewSegmentInfoPerCommit(info, delCount, delGen))
		}
		sis.userData, err = input.ReadStringStringMap()
		if err != nil {
			return err
		}
	} else {
		// TODO support <4.0 index
		panic("Index format pre-4.0 not supported yet")
	}

	checksumNow := int64(input.Checksum())
	checksumThen, err := input.ReadLong()
	if err != nil {
		return err
	}
	if checksumNow != checksumThen {
		return errors.New(fmt.Sprintf(
			"checksum mismatch in segments file: %v vs %v (resource: %v)",
			checksumNow, checksumThen, input))
	}

	success = true
	return nil
}