/* Reads the most recent FieldInfos of the given segment info. */ func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error) { var dir store.Directory var closeDir bool if info.FieldInfosGen() == -1 && info.Info.IsCompoundFile() { // no fieldInfos gen and segment uses a compound file if dir, err = store.NewCompoundFileDirectory(info.Info.Dir, util.SegmentFileName(info.Info.Name, "", store.COMPOUND_FILE_EXTENSION), store.IO_CONTEXT_READONCE, false); err != nil { return } closeDir = true } else { // gen'd FIS are read outside CFS, or the segment doesn't use a compound file dir = info.Info.Dir closeDir = false } defer func() { if closeDir { err = mergeError(err, dir.Close()) } }() var segmentSuffix string if n := info.FieldInfosGen(); n != -1 { segmentSuffix = strconv.FormatInt(n, 36) } codec := info.Info.Codec().(Codec) fisFormat := codec.FieldInfosFormat() return fisFormat.FieldInfosReader()(dir, info.Info.Name, segmentSuffix, store.IO_CONTEXT_READONCE) }
func NewMockDirectoryWrapper(random *rand.Rand, delegate store.Directory) *MockDirectoryWrapper { ans := &MockDirectoryWrapper{ noDeleteOpenFile: true, preventDoubleWrite: true, trackDiskUsage: false, wrapLockFactory: true, openFilesForWrite: make(map[string]bool), openLocks: make(map[string]bool), openLocksLock: &sync.Mutex{}, throttling: THROTTLING_SOMETIMES, inputCloneCount: 0, openFileHandles: make(map[io.Closer]error), failOnCreateOutput: true, failOnOpenInput: true, assertNoUnreferencedFilesOnClose: true, } ans.BaseDirectoryWrapperImpl = NewBaseDirectoryWrapper(delegate) ans.Locker = &sync.Mutex{} // must make a private random since our methods are called from different // methods; else test failures may not be reproducible from the original // seed ans.randomState = rand.New(rand.NewSource(random.Int63())) ans.throttledOutput = NewThrottledIndexOutput( MBitsToBytes(40+ans.randomState.Intn(10)), 5+ans.randomState.Int63n(5), nil) // force wrapping of LockFactory ans.myLockFactory = newMockLockFactoryWrapper(ans, delegate.LockFactory()) ans.init() return ans }
/* Writes this vector to the file name in Directory d, in a format that can be read by the constructor BitVector(Directory, String, IOContext) */ func (bv *BitVector) Write(d store.Directory, name string, ctx store.IOContext) (err error) { assert(reflect.TypeOf(d).Name() != "CompoundFileDirectory") var output store.IndexOutput if output, err = d.CreateOutput(name, ctx); err != nil { return err } defer func() { err = mergeError(err, output.Close()) }() if err = output.WriteInt(-2); err != nil { return err } if err = codec.WriteHeader(output, CODEC, BV_VERSION_CURRENT); err != nil { return err } if bv.isSparse() { // sparse bit-set more efficiently saved as d-gaps. err = bv.writeClearedDgaps(output) } else { err = bv.writeBits(output) } if err != nil { return err } if err = codec.WriteFooter(output); err != nil { return err } bv.assertCount() return nil }
/* Commit live docs (writes new _X_N.del files) and field update (writes new _X_N.del files) to the directory; returns true if it wrote any file and false if there were no new deletes or updates to write: */ func (rld *ReadersAndUpdates) writeLiveDocs(dir store.Directory) (bool, error) { panic("not implemented yet") rld.Lock() defer rld.Unlock() log.Printf("rld.writeLiveDocs seg=%v pendingDelCount=%v", rld.info, rld._pendingDeleteCount) if rld._pendingDeleteCount != 0 { // We have new deletes assert(rld._liveDocs.Length() == rld.info.Info.DocCount()) // Do this so we can delete any created files on error; this // saves all codecs from having to do it: trackingDir := store.NewTrackingDirectoryWrapper(dir) // We can write directly to the actual name (vs to a .tmp & // renaming it) becaues the file is not live until segments file // is written: var success = false defer func() { if !success { // Advance only the nextWriteDelGen so that a 2nd attempt to // write will write to a new file rld.info.AdvanceNextWriteDelGen() // Delete any partially created files(s): trackingDir.EachCreatedFiles(func(filename string) { dir.DeleteFile(filename) // ignore error }) } }() err := rld.info.Info.Codec().(Codec).LiveDocsFormat().WriteLiveDocs(rld._liveDocs.(util.MutableBits), trackingDir, rld.info, rld._pendingDeleteCount, store.IO_CONTEXT_DEFAULT) if err != nil { return false, err } success = true // If we hit an error in the line above (e.g. disk full) then // info's delGen remains pointing to the previous (successfully // written) del docs: rld.info.AdvanceDelGen() rld.info.SetDelCount(rld.info.DelCount() + rld._pendingDeleteCount) assert(rld.info.DelCount() <= rld.info.Info.DocCount()) rld._pendingDeleteCount = 0 return true, nil } return false, nil }
func (w *Lucene40SegmentInfoWriter) Write(dir store.Directory, si *SegmentInfo, fis FieldInfos, ctx store.IOContext) (err error) { filename := util.SegmentFileName(si.Name, "", LUCENE40_SI_EXTENSION) si.AddFile(filename) var output store.IndexOutput output, err = dir.CreateOutput(filename, ctx) if err != nil { return err } var success = false defer func() { if !success { util.CloseWhileSuppressingError(output) si.Dir.DeleteFile(filename) // ignore error } else { err = mergeError(err, output.Close()) } }() err = codec.WriteHeader(output, LUCENE40_CODEC_NAME, LUCENE40_VERSION_CURRENT) if err != nil { return err } // Write the Lucene version that created this segment, since 3.1 err = store.Stream(output).WriteString(si.Version().String()). WriteInt(int32(si.DocCount())). WriteByte(func() byte { if si.IsCompoundFile() { return SEGMENT_INFO_YES } return byte((SEGMENT_INFO_NO + 256) % 256) // Go byte is non-negative, unlike Java }()).WriteStringStringMap(si.Diagnostics()). WriteStringStringMap(map[string]string{}). WriteStringSet(si.Files()).Close() if err != nil { return err } success = true return nil }
/* A utility for writing the SEGMENTS_GEN file to a Directory. NOTE: this is an internal utility which is kept public so that it's accessible by code from other packages. You should avoid calling this method unless you're absolutely sure what you're doing! */ func writeSegmentsGen(dir store.Directory, generation int64) { if err := func() (err error) { var genOutput store.IndexOutput genOutput, err = dir.CreateOutput(INDEX_FILENAME_SEGMENTS_GEN, store.IO_CONTEXT_READONCE) if err != nil { return err } defer func() { err = mergeError(err, genOutput.Close()) err = mergeError(err, dir.Sync([]string{INDEX_FILENAME_SEGMENTS_GEN})) }() if err = genOutput.WriteInt(FORMAT_SEGMENTS_GEN_CURRENT); err == nil { if err = genOutput.WriteLong(generation); err == nil { if err = genOutput.WriteLong(generation); err == nil { err = codec.WriteFooter(genOutput) } } } return err }(); err != nil { // It's OK if we fail to write this file since it's used only as // one of the retry fallbacks. dir.DeleteFile(INDEX_FILENAME_SEGMENTS_GEN) // Ignore error; this file is only used in a retry fallback on init } }
/* Returns true if an index likely exists at the specified directory. Note that if a corrupt index exists, or if an index in the process of committing */ func IsIndexExists(directory store.Directory) (ok bool, err error) { // LUCENE-2812, LUCENE-2727, LUCENE-4738: this logic will // return true in cases that should arguably be false, // such as only IW.prepareCommit has been called, or a // corrupt first commit, but it's too deadly to make // this logic "smarter" and risk accidentally returning // false due to various cases like file description // exhaustion, access denied, etc., because in that // case IndexWriter may delete the entire index. It's // safer to err towards "index exists" than try to be // smart about detecting not-yet-fully-committed or // corrupt indices. This means that IndexWriter will // throw an exception on such indices and the app must // resolve the situation manually: var files []string files, err = directory.ListAll() if _, ok := err.(*store.NoSuchDirectoryError); ok { // Directory does not exist --> no index exists return false, nil } else if err != nil { return false, err } return IsIndexFileExists(files), nil }
func NewLucene41PostingsReader(dir store.Directory, fis model.FieldInfos, si *model.SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { log.Print("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { log.Print("Failed to initialize Lucene41PostingsReader.") if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } forUtil, err := NewForUtil(docIn) if err != nil { return r, err } if fis.HasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } if fis.HasPayloads || fis.HasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil }
/* Read a particular segmentFileName. Note that this may return IO error if a commit is in process. */ func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error) { // fmt.Printf("Reading segment info from %v...\n", segmentFileName) // Clear any previous segments: sis.Clear() sis.generation = GenerationFromSegmentsFileName(segmentFileName) sis.lastGeneration = sis.generation var input store.ChecksumIndexInput if input, err = directory.OpenChecksumInput(segmentFileName, store.IO_CONTEXT_READ); err != nil { return } var success = false defer func() { if !success { // Clear any segment infos we had loaded so we // have a clean slate on retry: sis.Clear() util.CloseWhileSuppressingError(input) } else { err = input.Close() } }() var format int if format, err = asInt(input.ReadInt()); err != nil { return } var actualFormat int if format == codec.CODEC_MAGIC { // 4.0+ if actualFormat, err = asInt(codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_49)); err != nil { return } if sis.version, err = input.ReadLong(); err != nil { return } if sis.counter, err = asInt(input.ReadInt()); err != nil { return } var numSegments int if numSegments, err = asInt(input.ReadInt()); err != nil { return } else if numSegments < 0 { return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input)) } var segName, codecName string var fCodec Codec var delGen, fieldInfosGen, dvGen int64 var delCount int for seg := 0; seg < numSegments; seg++ { if segName, err = input.ReadString(); err != nil { return } if codecName, err = input.ReadString(); err != nil { return } fCodec = LoadCodec(codecName) assert2(fCodec != nil, "Invalid codec name: %v", codecName) // fmt.Printf("SIS.read seg=%v codec=%v\n", seg, fCodec) var info *SegmentInfo if info, err = fCodec.SegmentInfoFormat().SegmentInfoReader().Read(directory, segName, store.IO_CONTEXT_READ); err != nil { return } info.SetCodec(fCodec) if delGen, err = input.ReadLong(); err != nil { return } if delCount, err = asInt(input.ReadInt()); err != nil { return } else if delCount < 0 || delCount > info.DocCount() { return errors.New(fmt.Sprintf( "invalid deletion count: %v vs docCount=%v (resource: %v)", delCount, info.DocCount(), input)) } fieldInfosGen = -1 if actualFormat >= VERSION_46 { if fieldInfosGen, err = input.ReadLong(); err != nil { return } } dvGen = -1 if actualFormat >= VERSION_49 { if dvGen, err = input.ReadLong(); err != nil { return } } else { dvGen = fieldInfosGen } siPerCommit := NewSegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen) if actualFormat >= VERSION_46 { if actualFormat < VERSION_49 { panic("not implemented yet") } else { var ss map[string]bool if ss, err = input.ReadStringSet(); err != nil { return err } siPerCommit.SetFieldInfosFiles(ss) var dvUpdatesFiles map[int]map[string]bool var numDVFields int if numDVFields, err = asInt(input.ReadInt()); err != nil { return err } if numDVFields == 0 { dvUpdatesFiles = make(map[int]map[string]bool) } else { panic("not implemented yet") } siPerCommit.SetDocValuesUpdatesFiles(dvUpdatesFiles) } } sis.Segments = append(sis.Segments, siPerCommit) } if sis.userData, err = input.ReadStringStringMap(); err != nil { return err } } else { // TODO support <4.0 index panic("Index format pre-4.0 not supported yet") } if actualFormat >= VERSION_48 { if _, err = codec.CheckFooter(input); err != nil { return } } else { var checksumNow = int64(input.Checksum()) var checksumThen int64 if checksumThen, err = input.ReadLong(); err != nil { return } if checksumNow != checksumThen { return errors.New(fmt.Sprintf( "checksum mismatch in segments file: %v vs %v (resource: %v)", checksumNow, checksumThen, input)) } if err = codec.CheckEOF(input); err != nil { return } } success = true return nil }
/* Constructs a new IndexWriter per the settings given in conf. If you want to make "live" changes to this writer instance, use Config(). NOTE: after this writer is created, the given configuration instance cannot be passed to another writer. If you intend to do so, you should clone it beforehand. */ func NewIndexWriter(d store.Directory, conf *IndexWriterConfig) (w *IndexWriter, err error) { ans := &IndexWriter{ Locker: &sync.Mutex{}, ClosingControl: newClosingControl(), segmentsToMerge: make(map[*SegmentInfoPerCommit]bool), mergeExceptions: make([]*OneMerge, 0), doAfterFlush: func() error { return nil }, doBeforeFlush: func() error { return nil }, commitLock: &sync.Mutex{}, fullFlushLock: &sync.Mutex{}, config: newLiveIndexWriterConfigFrom(conf), directory: d, analyzer: conf.analyzer, infoStream: conf.infoStream, mergePolicy: conf.mergePolicy, mergeScheduler: conf.mergeScheduler, codec: conf.codec, bufferedDeletesStream: newBufferedDeletesStream(conf.infoStream), poolReaders: conf.readerPooling, writeLock: d.MakeLock(WRITE_LOCK_NAME), } ans.readerPool = newReaderPool(ans) ans.MergeControl = newMergeControl(conf.infoStream, ans.readerPool) conf.setIndexWriter(ans) ans.mergePolicy.SetIndexWriter(ans) // obtain write lock if ok, err := ans.writeLock.ObtainWithin(conf.writeLockTimeout); !ok || err != nil { if err != nil { return nil, err } return nil, errors.New(fmt.Sprintf("Index locked for write: %v", ans.writeLock)) } var success bool = false defer func() { if !success { if ans.infoStream.IsEnabled("IW") { ans.infoStream.Message("IW", "init: hit exception on init; releasing write lock") } ans.writeLock.Release() // don't mask the original exception ans.writeLock = nil } }() var create bool switch conf.openMode { case OPEN_MODE_CREATE: create = true case OPEN_MODE_APPEND: create = false default: // CREATE_OR_APPEND - create only if an index does not exist ok, err := IsIndexExists(d) if err != nil { return nil, err } create = !ok } // If index is too old, reading the segments will return // IndexFormatTooOldError ans.segmentInfos = &SegmentInfos{} var initialIndexExists bool = true if create { // Try to read first. This is to allow create against an index // that's currently open for searching. In this case we write the // next segments_N file with no segments: err = ans.segmentInfos.ReadAll(d) if err == nil { ans.segmentInfos.Clear() } else { // Likely this means it's a fresh directory initialIndexExists = false err = nil } // Record that we have a change (zero out all segments) pending: ans.changed() } else { err = ans.segmentInfos.ReadAll(d) if err != nil { return } if commit := conf.commit; commit != nil { // Swap out all segments, but, keep metadta in SegmentInfos, // like version & generation, to preserve write-once. This is // important if readers are open against the future commit // points. assert2(commit.Directory() == d, "IndexCommit's directory doesn't match my directory") oldInfos := &SegmentInfos{} ans.segmentInfos.replace(oldInfos) ans.changed() ans.infoStream.Message("IW", "init: loaded commit '%v'", commit.SegmentsFileName()) } } ans.rollbackSegments = ans.segmentInfos.createBackupSegmentInfos() // start with previous field numbers, but new FieldInfos ans.globalFieldNumberMap, err = ans.fieldNumberMap() if err != nil { return } ans.config.flushPolicy.init(ans.config) ans.docWriter = newDocumentsWriter(ans, ans.config, d) ans.eventQueue = ans.docWriter.events // Default deleter (for backwards compatibility) is // KeepOnlyLastCommitDeleter: ans.deleter, err = newIndexFileDeleter(d, conf.delPolicy, ans.segmentInfos, ans.infoStream, ans, initialIndexExists) if err != nil { return } if ans.deleter.startingCommitDeleted { // Deletion policy deleted the "head" commit point. We have to // mark outsef as changed so that if we are closed w/o any // further changes we write a new segments_N file. ans.changed() } if ans.infoStream.IsEnabled("IW") { ans.infoStream.Message("IW", "init: create=%v", create) ans.messageState() } success = true return ans, nil }
func (r *Lucene40SegmentInfoReader) Read(dir store.Directory, segment string, context store.IOContext) (si *SegmentInfo, err error) { si = new(SegmentInfo) fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION) input, err := dir.OpenInput(fileName, context) if err != nil { return nil, err } success := false defer func() { if !success { util.CloseWhileSuppressingError(input) } else { input.Close() } }() _, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT) if err != nil { return nil, err } versionStr, err := input.ReadString() if err != nil { return nil, err } version, err := util.ParseVersion(versionStr) if err != nil { return nil, err } docCount, err := input.ReadInt() if err != nil { return nil, err } if docCount < 0 { return nil, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input)) } sicf, err := input.ReadByte() if err != nil { return nil, err } isCompoundFile := (sicf == SEGMENT_INFO_YES) diagnostics, err := input.ReadStringStringMap() if err != nil { return nil, err } _, err = input.ReadStringStringMap() // read deprecated attributes if err != nil { return nil, err } files, err := input.ReadStringSet() if err != nil { return nil, err } if err = codec.CheckEOF(input); err != nil { return nil, err } si = NewSegmentInfo(dir, version, segment, int(docCount), isCompoundFile, nil, diagnostics) si.SetFiles(files) success = true return si, nil }
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.ChecksumIndexInput success := false defer func() { if !success { util.CloseWhileSuppressingError(r, indexStream) } }() indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) // Load the index into memory if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil { return nil, err } var maxPointer int64 = -1 if r.version >= VERSION_CHECKSUM { if maxPointer, err = indexStream.ReadVLong(); err != nil { return nil, err } if _, err = codec.CheckFooter(indexStream); err != nil { return nil, err } } else { if err = codec.CheckEOF(indexStream); err != nil { return nil, err } } if err = indexStream.Close(); err != nil { return nil, err } indexStream = nil // Open the data file and read metadata if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil { return nil, err } if r.version >= VERSION_CHECKSUM { if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() { return nil, errors.New(fmt.Sprintf( "Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v", maxPointer, r.fieldsStream.Length())) } } else { maxPointer = r.fieldsStream.Length() } r.maxPointer = maxPointer codecNameDat := formatName + CODEC_SFX_DAT var fieldsVersion int if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert2(r.version == fieldsVersion, "Version mismatch between stored fields index and data: %v != %v", r.version, fieldsVersion) assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer()) r.chunkSize = -1 if r.version >= VERSION_BIG_CHUNKS { if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } } if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) if r.version >= VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil { return nil, err } } success = true return r, nil }
func NewLucene41PostingsReader(dir store.Directory, fis FieldInfos, si *SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { // fmt.Println("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { fmt.Println("Failed to initialize Lucene41PostingsReader.") util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return nil, err } var version int32 version, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT) if err != nil { return nil, err } forUtil, err := NewForUtilFrom(docIn) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(docIn); err != nil { return nil, err } } if fis.HasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return nil, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, version, version) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(posIn); err != nil { return nil, err } } if fis.HasPayloads || fis.HasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return nil, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, version, version) if err != nil { return nil, err } if version >= LUCENE41_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(payIn); err != nil { return nil, err } } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil, int(version)}, nil }
func newBlockTreeTermsReader(dir store.Directory, fieldInfos model.FieldInfos, info *model.SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.Name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_EXTENSION), ctx) if err != nil { return fp, err } success := false var indexIn store.IndexInput defer func() { if !success { log.Print("Failed to initialize BlockTreeTermsReader.") if err != nil { log.Print("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return fp, err } log.Printf("Version: %v", fp.version) if indexDivisor != -1 { indexIn, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_INDEX_EXTENSION), ctx) if err != nil { return fp, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return fp, err } log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // Have PostingsReader init itself postingsReader.Init(fp.in) // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Fields number: %v", numFields) if numFields < 0 { return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return fp, err } // assert numTerms >= 0 log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return fp, err } fieldInfo := fieldInfos.FieldInfoByNumber(int(field)) // assert fieldInfo != nil var sumTotalTermFreq int64 if fieldInfo.IndexOptions() == model.INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return fp, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return fp, err } docCount, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("DocCount: %v", docCount) if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs return fp, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.DocCount(), fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return fp, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return fp, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { indexStartFP, err = indexIn.ReadVLong() if err != nil { return fp, err } } log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.Name]; ok { return fp, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in)) } fp.fields[fieldInfo.Name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn) if err != nil { return fp, err } log.Print("DEBUG field processed.") } if indexDivisor != -1 { err = indexIn.Close() if err != nil { return fp, err } } success = true return fp, nil }
func NewCompressingStoredFieldsWriter(dir store.Directory, si *model.SegmentInfo, segmentSuffix string, ctx store.IOContext, formatName string, compressionMode CompressionMode, chunkSize int) (*CompressingStoredFieldsWriter, error) { assert(dir != nil) ans := &CompressingStoredFieldsWriter{ directory: dir, segment: si.Name, segmentSuffix: segmentSuffix, compressionMode: compressionMode, compressor: compressionMode.NewCompressor(), chunkSize: chunkSize, docBase: 0, bufferedDocs: newGrowableByteArrayDataOutput(chunkSize), numStoredFields: make([]int, 16), endOffsets: make([]int, 16), numBufferedDocs: 0, } var success = false indexStream, err := dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION), ctx) if err != nil { return nil, err } assert(indexStream != nil) defer func() { if !success { util.CloseWhileSuppressingError(indexStream) ans.Abort() } }() ans.fieldsStream, err = dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix, lucene40.FIELDS_EXTENSION), ctx) if err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX codecNameDat := formatName + CODEC_SFX_DAT err = codec.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT) if err != nil { return nil, err } err = codec.WriteHeader(ans.fieldsStream, codecNameDat, VERSION_CURRENT) if err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) assert(int64(codec.HeaderLength(codecNameDat)) == ans.fieldsStream.FilePointer()) ans.indexWriter, err = NewStoredFieldsIndexWriter(indexStream) if err != nil { return nil, err } assert(ans.indexWriter != nil) indexStream = nil err = ans.fieldsStream.WriteVInt(int32(chunkSize)) if err != nil { return nil, err } err = ans.fieldsStream.WriteVInt(packed.VERSION_CURRENT) if err != nil { return nil, err } success = true return ans, nil }
/* Initialize the deleter: find all previous commits in the Directory, incref the files they reference, call the policy to let it delete commits. This will remove any files not referenced by any of the commits. */ func newIndexFileDeleter(directory store.Directory, policy IndexDeletionPolicy, segmentInfos *SegmentInfos, infoStream util.InfoStream, writer *IndexWriter, initialIndexExists bool) (*IndexFileDeleter, error) { currentSegmentsFile := segmentInfos.SegmentsFileName() if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "init: current segments file is '%v'; deletePolicy=%v", currentSegmentsFile, policy) } fd := &IndexFileDeleter{ infoStream: infoStream, writer: writer, policy: policy, directory: directory, refCounts: make(map[string]*RefCount), } // First pass: walk the files and initialize our ref counts: currentGen := segmentInfos.generation var currentCommitPoint *CommitPoint var files []string files, err := directory.ListAll() if _, ok := err.(*store.NoSuchDirectoryError); ok { // it means the directory is empty, so ignore it files = make([]string, 0) } else if err != nil { return nil, err } if currentSegmentsFile != "" { m := model.CODEC_FILE_PATTERN for _, filename := range files { if !strings.HasSuffix(filename, "write.lock") && filename != INDEX_FILENAME_SEGMENTS_GEN && (m.MatchString(filename) || strings.HasPrefix(filename, util.SEGMENTS)) { // Add this file to refCounts with initial count 0: fd.refCount(filename) if strings.HasPrefix(filename, util.SEGMENTS) { // This is a commit (segments or segments_N), and it's // valid (<= the max gen). Load it, then incref all files // it refers to: if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "init: load commit '%v'", filename) } sis := &SegmentInfos{} err := sis.Read(directory, filename) if os.IsNotExist(err) { // LUCENE-948: on NFS (and maybe others), if // you have writers switching back and forth // between machines, it's very likely that the // dir listing will be stale and will claim a // file segments_X exists when in fact it // doesn't. So, we catch this and handle it // as if the file does not exist if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "init: hit FileNotFoundException when loading commit '%v'; skipping this commit point", filename) } sis = nil } else if err != nil { if GenerationFromSegmentsFileName(filename) <= currentGen { length, _ := directory.FileLength(filename) if length > 0 { return nil, err } } // Most likely we are opening an index that has an // aborted "future" commit, so suppress exc in this case sis = nil } else { // sis != nil commitPoint := newCommitPoint(fd.commitsToDelete, directory, sis) if sis.generation == segmentInfos.generation { currentCommitPoint = commitPoint } fd.commits = append(fd.commits, commitPoint) fd.incRef(sis, true) if fd.lastSegmentInfos == nil || sis.generation > fd.lastSegmentInfos.generation { fd.lastSegmentInfos = sis } } } } } } if currentCommitPoint == nil && currentSegmentsFile != "" && initialIndexExists { // We did not in fact see the segments_N file corresponding to // the segmentInfos that was passed in. Yet, it must exist, // because our caller holds the write lock. This can happen when // the directory listing was stale (e.g. when index accessed via // NFS client with stale directory listing cache). So we try now // to explicitly open this commit point: sis := &SegmentInfos{} err := sis.Read(directory, currentSegmentsFile) if err != nil { return nil, errors.New(fmt.Sprintf("failed to locate current segments_N file '%v'", currentSegmentsFile)) } if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "forced open of current segments file %v", segmentInfos.SegmentsFileName()) } currentCommitPoint = newCommitPoint(fd.commitsToDelete, directory, sis) fd.commits = append(fd.commits, currentCommitPoint) fd.incRef(sis, true) } // We keep commits list in sorted order (oldest to newest): util.TimSort(IndexCommits(fd.commits)) // Now delete anyting with ref count at 0. These are presumably // abandoned files e.g. due to crash of IndexWriter. for filename, rc := range fd.refCounts { if rc.count == 0 { if infoStream.IsEnabled("IFD") { infoStream.Message("IFD", "init: removing unreferenced file '%v'", filename) } fd.deleteFile(filename) } } // Finally, give policy a chance to remove things on startup: err = policy.onInit(fd.commits) if err != nil { return nil, err } // Always protect the incoming segmentInfos since sometime it may // not be the most recent commit err = fd.checkpoint(segmentInfos, false) if err != nil { return nil, err } fd.startingCommitDeleted = (currentCommitPoint != nil && currentCommitPoint.IsDeleted()) fd.deleteCommits() return fd, nil }
func (sis *SegmentInfos) write(directory store.Directory) error { segmentsFilename := sis.nextSegmentFilename() // Always advance the generation on write: if sis.generation == -1 { sis.generation = 1 } else { sis.generation++ } var segnOutput *store.ChecksumIndexOutput var success = false var upgradedSIFiles = make(map[string]bool) defer func() { if !success { // We hit an error above; try to close the file but suppress // any errors util.CloseWhileSuppressingError(segnOutput) for filename, _ := range upgradedSIFiles { directory.DeleteFile(filename) // ignore error } // Try not to leave a truncated segments_N fle in the index: directory.DeleteFile(segmentsFilename) // ignore error } }() out, err := directory.CreateOutput(segmentsFilename, store.IO_CONTEXT_DEFAULT) if err != nil { return err } segnOutput = store.NewChecksumIndexOutput(out) err = codec.WriteHeader(segnOutput, "segments", VERSION_40) if err != nil { return err } err = segnOutput.WriteLong(sis.version) if err == nil { err = segnOutput.WriteInt(int32(sis.counter)) if err == nil { err = segnOutput.WriteInt(int32(len(sis.Segments))) } } if err != nil { return err } for _, siPerCommit := range sis.Segments { si := siPerCommit.info err = segnOutput.WriteString(si.Name) if err == nil { err = segnOutput.WriteString(si.Codec().(Codec).Name()) if err == nil { err = segnOutput.WriteLong(siPerCommit.delGen) if err == nil { err = segnOutput.WriteInt(int32(siPerCommit.delCount)) } } } if err != nil { return err } assert(si.Dir == directory) assert(siPerCommit.delCount <= si.DocCount()) // If this segment is pre-4.x, perform a one-time "upgrade" to // write the .si file for it: if version := si.Version(); version == "" || versionLess(version, "4.0") { panic("not implemented yet") } } err = segnOutput.WriteStringStringMap(sis.userData) if err != nil { return err } sis.pendingSegnOutput = segnOutput success = true return nil }
func (sis *SegmentInfos) write(directory store.Directory) (err error) { segmentsFilename := sis.nextSegmentFilename() // Always advance the generation on write: if sis.generation == -1 { sis.generation = 1 } else { sis.generation++ } var segnOutput store.IndexOutput var success = false // var upgradedSIFiles = make(map[string]bool) defer func() { if !success { // We hit an error above; try to close the file but suppress // any errors util.CloseWhileSuppressingError(segnOutput) // for filename, _ := range upgradedSIFiles { // directory.DeleteFile(filename) // ignore error // } // Try not to leave a truncated segments_N fle in the index: directory.DeleteFile(segmentsFilename) // ignore error } }() if segnOutput, err = directory.CreateOutput(segmentsFilename, store.IO_CONTEXT_DEFAULT); err != nil { return } if err = codec.WriteHeader(segnOutput, "segments", VERSION_49); err != nil { return } if err = segnOutput.WriteLong(sis.version); err == nil { if err = segnOutput.WriteInt(int32(sis.counter)); err == nil { err = segnOutput.WriteInt(int32(len(sis.Segments))) } } if err != nil { return } for _, siPerCommit := range sis.Segments { si := siPerCommit.Info if err = segnOutput.WriteString(si.Name); err == nil { if err = segnOutput.WriteString(si.Codec().(Codec).Name()); err == nil { if err = segnOutput.WriteLong(siPerCommit.DelGen()); err == nil { assert2(siPerCommit.DelCount() >= 0 && siPerCommit.DelCount() <= si.DocCount(), "cannot write segment: invalid docCount segment=%v docCount=%v delCount=%v", si.Name, si.DocCount(), siPerCommit.DelCount()) if err = segnOutput.WriteInt(int32(siPerCommit.DelCount())); err == nil { if err = segnOutput.WriteLong(siPerCommit.FieldInfosGen()); err == nil { if err = segnOutput.WriteLong(siPerCommit.DocValuesGen()); err == nil { if err = segnOutput.WriteStringSet(siPerCommit.FieldInfosFiles()); err == nil { dvUpdatesFiles := siPerCommit.DocValuesUpdatesFiles() if err = segnOutput.WriteInt(int32(len(dvUpdatesFiles))); err == nil { for k, v := range dvUpdatesFiles { if err = segnOutput.WriteInt(int32(k)); err != nil { break } if err = segnOutput.WriteStringSet(v); err != nil { break } } } } } } } } } } if err != nil { return } assert(si.Dir == directory) // If this segment is pre-4.x, perform a one-time "upgrade" to // write the .si file for it: if version := si.Version(); len(version) == 0 || !version.OnOrAfter(util.VERSION_4_0) { panic("not implemented yet") } } if err = segnOutput.WriteStringStringMap(sis.userData); err != nil { return } sis.pendingSegnOutput = segnOutput success = true return nil }
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode compressing.CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.IndexInput success := false defer func() { if !success { log.Println("Failed to initialize CompressionStoredFieldsReader.") if err != nil { log.Print(err) } util.Close(r, indexStream) } }() // Load the index into memory indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) indexStream, err = d.OpenInput(indexStreamFN, ctx) if err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() { panic("assert fail") } r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si) if err != nil { return nil, err } err = indexStream.Close() if err != nil { return nil, err } indexStream = nil // Open the data file and read metadata fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx) if err != nil { return nil, err } codecNameDat := formatName + CODEC_SFX_DAT codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() { panic("assert fail") } n, err := r.fieldsStream.ReadVInt() if err != nil { return nil, err } r.packedIntsVersion = int(n) r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) success = true return r, nil }
func NewBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info *SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { // log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.Name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, TERMS_EXTENSION), ctx) if err != nil { return nil, err } success := false var indexIn store.IndexInput defer func() { if !success { fmt.Println("Failed to initialize BlockTreeTermsReader.") if err != nil { fmt.Println("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return nil, err } // log.Printf("Version: %v", fp.version) if indexDivisor != -1 { filename := util.SegmentFileName(info.Name, segmentSuffix, TERMS_INDEX_EXTENSION) indexIn, err = dir.OpenInput(filename, ctx) if err != nil { return nil, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return nil, err } // log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return nil, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // verify if indexIn != nil && fp.version >= TERMS_VERSION_CURRENT { if _, err = store.ChecksumEntireFile(indexIn); err != nil { return nil, err } } // Have PostingsReader init itself postingsReader.Init(fp.in) if fp.version >= TERMS_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(fp.in); err != nil { return nil, err } } // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return nil, err } // log.Printf("Fields number: %v", numFields) if numFields < 0 { return nil, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { // log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return nil, err } // log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return nil, err } assert2(numTerms > 0, "Illegal numTerms for field number: %v (resource=%v)", field, fp.in) // log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return nil, err } assert2(numBytes >= 0, "invalid rootCode for field number: %v, numBytes=%v (resource=%v)", field, numBytes, fp.in) // log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return nil, err } fieldInfo := fieldInfos.FieldInfoByNumber(int(field)) assert2(fieldInfo != nil, "invalid field numebr: %v (resource=%v)", field, fp.in) var sumTotalTermFreq int64 if fieldInfo.IndexOptions() == INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return nil, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return nil, err } var docCount int if docCount, err = asInt(fp.in.ReadVInt()); err != nil { return nil, err } // fmt.Printf("DocCount: %v\n", docCount) var longsSize int if fp.version >= TERMS_VERSION_META_ARRAY { if longsSize, err = asInt(fp.in.ReadVInt()); err != nil { return nil, err } } assert2(longsSize >= 0, "invalid longsSize for field: %v, longsSize=%v (resource=%v)", fieldInfo.Name, longsSize, fp.in) var minTerm, maxTerm []byte if fp.version >= TERMS_VERSION_MIN_MAX_TERMS { if minTerm, err = readBytesRef(fp.in); err != nil { return nil, err } if maxTerm, err = readBytesRef(fp.in); err != nil { return nil, err } } if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs return nil, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.DocCount(), fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return nil, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return nil, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { if indexStartFP, err = indexIn.ReadVLong(); err != nil { return nil, err } } // log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.Name]; ok { return nil, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in)) } if fp.fields[fieldInfo.Name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm); err != nil { return nil, err } } if indexDivisor != -1 { if err = indexIn.Close(); err != nil { return nil, err } } success = true return fp, nil }
func (sis *SegmentInfos) finishCommit(dir store.Directory) (fileName string, err error) { assert(dir != nil) assert2(sis.pendingSegnOutput != nil, "prepareCommit was not called") if err = func() error { var success = false defer func() { if !success { // Closes pendingSegnOutput & delets partial segments_N: sis.rollbackCommit(dir) } else { err := func() error { var success = false defer func() { if !success { // Closes pendingSegnOutput & delets partial segments_N: sis.rollbackCommit(dir) } else { sis.pendingSegnOutput = nil } }() err := sis.pendingSegnOutput.Close() success = err == nil return err }() assertn(err == nil, "%v", err) // no shadow } }() if err := codec.WriteFooter(sis.pendingSegnOutput); err != nil { return err } success = true return nil }(); err != nil { return } // NOTE: if we crash here, we have left a segments_N file in the // directory in a possibly corrupt state (if some bytes made it to // stable storage and others didn't). But, the segments_N file // includes checksum at the end, which should catch this case. So // when a reader tries to read it, it will return an index corrupt // error, which should cause the retry logic in SegmentInfos to // kick in and load the last good (previous) segments_N-1 file. fileName = util.FileNameFromGeneration(INDEX_FILENAME_SEGMENTS, "", sis.generation) if err = func() error { var success = false defer func() { if !success { dir.DeleteFile(fileName) // suppress error so we keep returning the original error } }() err := dir.Sync([]string{fileName}) success = err == nil return err }(); err != nil { return } sis.lastGeneration = sis.generation writeSegmentsGen(dir, sis.generation) return }
/* Read a particular segmentFileName. Note that this may return IO error if a commit is in process. */ func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) error { log.Printf("Reading segment info from %v...", segmentFileName) success := false // Clear any previous segments: sis.Clear() sis.generation = GenerationFromSegmentsFileName(segmentFileName) sis.lastGeneration = sis.generation main, err := directory.OpenInput(segmentFileName, store.IO_CONTEXT_READ) if err != nil { return err } input := store.NewChecksumIndexInput(main) defer func() { if !success { // Clear any segment infos we had loaded so we // have a clean slate on retry: sis.Clear() util.CloseWhileSuppressingError(input) } else { input.Close() } }() format, err := input.ReadInt() if err != nil { return err } if format == codec.CODEC_MAGIC { // 4.0+ _, err = codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_40) if err != nil { return err } sis.version, err = input.ReadLong() if err != nil { return err } sis.counter, err = asInt(input.ReadInt()) if err != nil { return err } numSegments, err := asInt(input.ReadInt()) if err != nil { return err } if numSegments < 0 { return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input)) } for seg := 0; seg < numSegments; seg++ { segName, err := input.ReadString() if err != nil { return err } codecName, err := input.ReadString() if err != nil { return err } if codecName != "Lucene42" { log.Panicf("Not supported yet: %v", codecName) } fCodec := LoadCodec(codecName) log.Printf("SIS.read seg=%v codec=%v", seg, fCodec) info, err := fCodec.SegmentInfoFormat().SegmentInfoReader()(directory, segName, store.IO_CONTEXT_READ) // method := NewLucene42Codec() // info, err := method.ReadSegmentInfo(directory, segName, store.IO_CONTEXT_READ) if err != nil { return err } // info.codec = method info.SetCodec(fCodec) delGen, err := input.ReadLong() if err != nil { return err } delCount, err := asInt(input.ReadInt()) if err != nil { return err } if delCount < 0 || delCount > info.DocCount() { return errors.New(fmt.Sprintf("invalid deletion count: %v (resource: %v)", delCount, input)) } sis.Segments = append(sis.Segments, NewSegmentInfoPerCommit(info, delCount, delGen)) } sis.userData, err = input.ReadStringStringMap() if err != nil { return err } } else { // TODO support <4.0 index panic("Index format pre-4.0 not supported yet") } checksumNow := int64(input.Checksum()) checksumThen, err := input.ReadLong() if err != nil { return err } if checksumNow != checksumThen { return errors.New(fmt.Sprintf( "checksum mismatch in segments file: %v vs %v (resource: %v)", checksumNow, checksumThen, input)) } success = true return nil }