func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) { var stream IndexInput = nil var entriesStream ChecksumIndexInput = nil // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) var success = false defer func() { if success { err = util.Close(stream, entriesStream) } else { util.CloseWhileSuppressingError(stream, entriesStream) } }() stream = handle.Clone() // fmt.Printf("Reading from stream: %v\n", stream) firstInt, err := stream.ReadVInt() if err != nil { return nil, err } // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if firstInt == int32(CODEC_MAGIC_BYTE1) { if secondByte, err := stream.ReadByte(); err == nil { if thirdByte, err := stream.ReadByte(); err == nil { if fourthByte, err := stream.ReadByte(); err == nil { if secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4 { return nil, errors.New(fmt.Sprintf( "Illegal/impossible header for CFS file: %v,%v,%v", secondByte, thirdByte, fourthByte)) } } } } if err != nil { return nil, err } d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)) if err != nil { return nil, err } entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION) entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE) if err != nil { return nil, err } _, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT) if err != nil { return nil, err } numEntries, err := entriesStream.ReadVInt() if err != nil { return nil, err } mapping = make(map[string]FileSlice) // fmt.Printf("Entries number: %v\n", numEntries) for i := int32(0); i < numEntries; i++ { id, err := entriesStream.ReadString() if err != nil { return nil, err } if _, ok := mapping[id]; ok { return nil, errors.New(fmt.Sprintf( "Duplicate cfs entry id=%v in CFS: %v", id, entriesStream)) } // log.Printf("Found entry: %v", id) offset, err := entriesStream.ReadLong() if err != nil { return nil, err } length, err := entriesStream.ReadLong() if err != nil { return nil, err } mapping[id] = FileSlice{offset, length} } if d.version >= CFD_VERSION_CHECKSUM { _, err = codec.CheckFooter(entriesStream) } else { err = codec.CheckEOF(entriesStream) } if err != nil { return nil, err } } else { // TODO remove once 3.x is not supported anymore panic("not supported yet; will also be obsolete soon") } success = true return mapping, nil }
/* Read a particular segmentFileName. Note that this may return IO error if a commit is in process. */ func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error) { // fmt.Printf("Reading segment info from %v...\n", segmentFileName) // Clear any previous segments: sis.Clear() sis.generation = GenerationFromSegmentsFileName(segmentFileName) sis.lastGeneration = sis.generation var input store.ChecksumIndexInput if input, err = directory.OpenChecksumInput(segmentFileName, store.IO_CONTEXT_READ); err != nil { return } var success = false defer func() { if !success { // Clear any segment infos we had loaded so we // have a clean slate on retry: sis.Clear() util.CloseWhileSuppressingError(input) } else { err = input.Close() } }() var format int if format, err = asInt(input.ReadInt()); err != nil { return } var actualFormat int if format == codec.CODEC_MAGIC { // 4.0+ if actualFormat, err = asInt(codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_49)); err != nil { return } if sis.version, err = input.ReadLong(); err != nil { return } if sis.counter, err = asInt(input.ReadInt()); err != nil { return } var numSegments int if numSegments, err = asInt(input.ReadInt()); err != nil { return } else if numSegments < 0 { return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input)) } var segName, codecName string var fCodec Codec var delGen, fieldInfosGen, dvGen int64 var delCount int for seg := 0; seg < numSegments; seg++ { if segName, err = input.ReadString(); err != nil { return } if codecName, err = input.ReadString(); err != nil { return } fCodec = LoadCodec(codecName) assert2(fCodec != nil, "Invalid codec name: %v", codecName) // fmt.Printf("SIS.read seg=%v codec=%v\n", seg, fCodec) var info *SegmentInfo if info, err = fCodec.SegmentInfoFormat().SegmentInfoReader().Read(directory, segName, store.IO_CONTEXT_READ); err != nil { return } info.SetCodec(fCodec) if delGen, err = input.ReadLong(); err != nil { return } if delCount, err = asInt(input.ReadInt()); err != nil { return } else if delCount < 0 || delCount > info.DocCount() { return errors.New(fmt.Sprintf( "invalid deletion count: %v vs docCount=%v (resource: %v)", delCount, info.DocCount(), input)) } fieldInfosGen = -1 if actualFormat >= VERSION_46 { if fieldInfosGen, err = input.ReadLong(); err != nil { return } } dvGen = -1 if actualFormat >= VERSION_49 { if dvGen, err = input.ReadLong(); err != nil { return } } else { dvGen = fieldInfosGen } siPerCommit := NewSegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen) if actualFormat >= VERSION_46 { if actualFormat < VERSION_49 { panic("not implemented yet") } else { var ss map[string]bool if ss, err = input.ReadStringSet(); err != nil { return err } siPerCommit.SetFieldInfosFiles(ss) var dvUpdatesFiles map[int]map[string]bool var numDVFields int if numDVFields, err = asInt(input.ReadInt()); err != nil { return err } if numDVFields == 0 { dvUpdatesFiles = make(map[int]map[string]bool) } else { panic("not implemented yet") } siPerCommit.SetDocValuesUpdatesFiles(dvUpdatesFiles) } } sis.Segments = append(sis.Segments, siPerCommit) } if sis.userData, err = input.ReadStringStringMap(); err != nil { return err } } else { // TODO support <4.0 index panic("Index format pre-4.0 not supported yet") } if actualFormat >= VERSION_48 { if _, err = codec.CheckFooter(input); err != nil { return } } else { var checksumNow = int64(input.Checksum()) var checksumThen int64 if checksumThen, err = input.ReadLong(); err != nil { return } if checksumNow != checksumThen { return errors.New(fmt.Sprintf( "checksum mismatch in segments file: %v vs %v (resource: %v)", checksumNow, checksumThen, input)) } if err = codec.CheckEOF(input); err != nil { return } } success = true return nil }
/* Read a particular segmentFileName. Note that this may return IO error if a commit is in process. */ func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) error { log.Printf("Reading segment info from %v...", segmentFileName) success := false // Clear any previous segments: sis.Clear() sis.generation = GenerationFromSegmentsFileName(segmentFileName) sis.lastGeneration = sis.generation main, err := directory.OpenInput(segmentFileName, store.IO_CONTEXT_READ) if err != nil { return err } input := store.NewChecksumIndexInput(main) defer func() { if !success { // Clear any segment infos we had loaded so we // have a clean slate on retry: sis.Clear() util.CloseWhileSuppressingError(input) } else { input.Close() } }() format, err := input.ReadInt() if err != nil { return err } if format == codec.CODEC_MAGIC { // 4.0+ _, err = codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_40) if err != nil { return err } sis.version, err = input.ReadLong() if err != nil { return err } sis.counter, err = asInt(input.ReadInt()) if err != nil { return err } numSegments, err := asInt(input.ReadInt()) if err != nil { return err } if numSegments < 0 { return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input)) } for seg := 0; seg < numSegments; seg++ { segName, err := input.ReadString() if err != nil { return err } codecName, err := input.ReadString() if err != nil { return err } if codecName != "Lucene42" { log.Panicf("Not supported yet: %v", codecName) } fCodec := LoadCodec(codecName) log.Printf("SIS.read seg=%v codec=%v", seg, fCodec) info, err := fCodec.SegmentInfoFormat().SegmentInfoReader()(directory, segName, store.IO_CONTEXT_READ) // method := NewLucene42Codec() // info, err := method.ReadSegmentInfo(directory, segName, store.IO_CONTEXT_READ) if err != nil { return err } // info.codec = method info.SetCodec(fCodec) delGen, err := input.ReadLong() if err != nil { return err } delCount, err := asInt(input.ReadInt()) if err != nil { return err } if delCount < 0 || delCount > info.DocCount() { return errors.New(fmt.Sprintf("invalid deletion count: %v (resource: %v)", delCount, input)) } sis.Segments = append(sis.Segments, NewSegmentInfoPerCommit(info, delCount, delGen)) } sis.userData, err = input.ReadStringStringMap() if err != nil { return err } } else { // TODO support <4.0 index panic("Index format pre-4.0 not supported yet") } checksumNow := int64(input.Checksum()) checksumThen, err := input.ReadLong() if err != nil { return err } if checksumNow != checksumThen { return errors.New(fmt.Sprintf( "checksum mismatch in segments file: %v vs %v (resource: %v)", checksumNow, checksumThen, input)) } success = true return nil }