/* Closes all resouces and writes the entry table */ func (w *CompoundFileWriter) Close() (err error) { if w.closed { fmt.Println("CompoundFileWriter is already closed.") return nil } // TODO this code should clean up after itself (remove partial .cfs/.cfe) if err = func() (err error) { var success = false defer func() { if success { util.Close(w.dataOut) } else { util.CloseWhileSuppressingError(w.dataOut) } }() assert2(w.pendingEntries.Len() == 0 && !w.outputTaken.Get(), "CFS has pending open files") w.closed = true // open the compound stream; we can safely use IO_CONTEXT_DEFAULT // here because this will only open the output if no file was // added to the CFS _, err = w.output(IO_CONTEXT_DEFAULT) if err != nil { return } assert(w.dataOut != nil) err = codec.WriteFooter(w.dataOut) if err != nil { return } success = true return nil }(); err != nil { return } var entryTableOut IndexOutput var success = false defer func() { if success { util.Close(entryTableOut) } else { util.CloseWhileSuppressingError(entryTableOut) } }() entryTableOut, err = w.directory.CreateOutput(w.entryTableName, IO_CONTEXT_DEFAULT) if err != nil { return } err = w.writeEntryTable(w.entries, entryTableOut) if err != nil { return } success = true return }
func (f *Lucene41PostingsFormat) FieldsProducer(state SegmentReadState) (FieldsProducer, error) { postingsReader, err := NewLucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix) if err != nil { return nil, err } success := false defer func() { if !success { log.Printf("Failed to load FieldsProducer for %v.", f.Name()) if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(postingsReader) } }() fp, err := newBlockTreeTermsReader(state.dir, state.fieldInfos, state.segmentInfo, postingsReader, state.context, state.segmentSuffix, state.termsIndexDivisor) if err != nil { log.Print("DEBUG: ", err) return fp, err } success = true return fp, nil }
func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) { self := &CompoundFileDirectory{ Locker: &sync.Mutex{}, directory: directory, fileName: fileName, readBufferSize: bufferSize(context), openForWrite: openForWrite} self.DirectoryImpl = NewDirectoryImpl(self) if !openForWrite { log.Printf("Open for read.") success := false defer func() { if !success { util.CloseWhileSuppressingError(self.handle) } }() self.handle, err = directory.CreateSlicer(fileName, context) if err != nil { return self, err } self.entries, err = readEntries(self.handle, directory, fileName) if err != nil { return self, err } success = true self.DirectoryImpl.IsOpen = true return self, err } else { panic("not supported yet") } }
func (nc *NormsConsumer) Close() (err error) { var success = false defer func() { if success { err = util.Close(nc.data, nc.meta) } else { util.CloseWhileSuppressingError(nc.data, nc.meta) } }() if nc.meta != nil { if err = nc.meta.WriteVInt(-1); err != nil { // write EOF marker return } if err = codec.WriteFooter(nc.meta); err != nil { // write checksum return } } if nc.data != nil { if err = codec.WriteFooter(nc.data); err != nil { // write checksum return } } success = true return nil }
/* Copies the file src to 'to' under the new file name dest. If you want to copy the entire source directory to the destination one, you can do so like this: var to Directory // the directory to copy to for _, file := range dir.ListAll() { dir.Copy(to, file, newFile, IO_CONTEXT_DEFAULT) // newFile can be either file, or a new name } NOTE: this method does not check whether dest exists and will overwrite it if it does. */ func (d *DirectoryImpl) Copy(to Directory, src, dest string, ctx IOContext) (err error) { var os IndexOutput var is IndexInput var success = false defer func() { if success { err = util.Close(os, is) } else { util.CloseWhileSuppressingError(os, is) } defer func() { recover() // ignore panic }() to.DeleteFile(dest) // ignore error }() os, err = to.CreateOutput(dest, ctx) if err != nil { return err } is, err = d.spi.OpenInput(src, ctx) if err != nil { return err } err = os.CopyBytes(is, is.Length()) if err != nil { return err } success = true return nil }
func openStandardDirectoryReader(directory store.Directory, commit IndexCommit, termInfosIndexDivisor int) (r DirectoryReader, err error) { // log.Print("Initializing SegmentsFile...") obj, err := NewFindSegmentsFile(directory, func(segmentFileName string) (interface{}, error) { sis := &SegmentInfos{} err := sis.Read(directory, segmentFileName) if err != nil { return nil, err } // log.Printf("Found %v segments...", len(sis.Segments)) readers := make([]AtomicReader, len(sis.Segments)) for i := len(sis.Segments) - 1; i >= 0; i-- { sr, err := NewSegmentReader(sis.Segments[i], termInfosIndexDivisor, store.IO_CONTEXT_READ) if err != nil { for _, r := range readers { if r != nil { util.CloseWhileSuppressingError(r) } } return nil, err } readers[i] = sr } // log.Printf("Obtained %v SegmentReaders.", len(readers)) return newStandardDirectoryReader(directory, readers, sis, termInfosIndexDivisor, false), nil }).run(commit) if err != nil { return nil, err } return obj.(*StandardDirectoryReader), err }
func (w *CompressingStoredFieldsWriter) Abort() { assert(w != nil) util.CloseWhileSuppressingError(w) util.DeleteFilesIgnoringErrors(w.directory, util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_EXTENSION), util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)) }
func (f *Lucene41PostingsFormat) FieldsProducer(state SegmentReadState) (FieldsProducer, error) { postingsReader, err := NewLucene41PostingsReader(state.Dir, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix) if err != nil { return nil, err } success := false defer func() { if !success { fmt.Printf("Failed to load FieldsProducer for %v.\n", f.Name()) util.CloseWhileSuppressingError(postingsReader) } }() fp, err := blocktree.NewBlockTreeTermsReader(state.Dir, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix, state.TermsIndexDivisor) if err != nil { return fp, err } success = true return fp, nil }
func (w *Lucene41PostingsWriter) Close() (err error) { var success = false defer func() { if success { err = util.Close(w.docOut, w.posOut, w.payOut) } else { util.CloseWhileSuppressingError(w.docOut, w.posOut, w.payOut) } w.docOut = nil w.posOut = nil w.payOut = nil }() if err == nil && w.docOut != nil { err = codec.WriteFooter(w.docOut) } if err == nil && w.posOut != nil { err = codec.WriteFooter(w.posOut) } if err == nil && w.payOut != nil { err = codec.WriteFooter(w.payOut) } if err != nil { return } success = true return nil }
func (p *StoredFieldsProcessor) flush(state SegmentWriteState) (err error) { numDocs := state.segmentInfo.DocCount() if numDocs > 0 { // It's possible that all documents seen in this segment hit // non-aborting errors, in which case we will not have yet init'd // the FieldsWriter: err = p.initFieldsWriter(state.context) if err == nil { err = p.fill(numDocs) } } if w := p.fieldsWriter; w != nil { var success = false defer func() { if success { err = util.CloseWhileHandlingError(err, w) } else { util.CloseWhileSuppressingError(w) } }() err = w.Finish(state.fieldInfos, numDocs) if err != nil { return err } success = true } return }
func newLucene49NormsConsumer(state *SegmentWriteState, dataCodec, dataExtension, metaCodec, metaExtension string) (nc *NormsConsumer, err error) { assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(1)) assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(2)) assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(4)) nc = &NormsConsumer{maxDoc: state.SegmentInfo.DocCount()} var success = false defer func() { if !success { util.CloseWhileSuppressingError(nc) } }() dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension) if nc.data, err = state.Directory.CreateOutput(dataName, state.Context); err != nil { return nil, err } if err = codec.WriteHeader(nc.data, dataCodec, VERSION_CURRENT); err != nil { return nil, err } metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension) if nc.meta, err = state.Directory.CreateOutput(metaName, state.Context); err != nil { return nil, err } if err = codec.WriteHeader(nc.meta, metaCodec, VERSION_CURRENT); err != nil { return nil, err } success = true return nc, nil }
func (w *CompressingStoredFieldsWriter) Abort() { if w == nil { // tolerate early released pointer return } util.CloseWhileSuppressingError(w) util.DeleteFilesIgnoringErrors(w.directory, util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_EXTENSION), util.SegmentFileName(w.segment, w.segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)) }
func (w *BlockTreeTermsWriter) Close() (err error) { var success = false defer func() { if success { util.Close(w.out, w.indexOut, w.postingsWriter) } else { util.CloseWhileSuppressingError(w.out, w.indexOut, w.postingsWriter) } }() dirStart := w.out.FilePointer() indexDirStart := w.indexOut.FilePointer() if err = w.out.WriteVInt(int32(len(w.fields))); err != nil { return } for _, field := range w.fields { // fmt.Printf(" field %v %v terms\n", field.fieldInfo.Name, field.numTerms) if err = w.out.WriteVInt(field.fieldInfo.Number); err == nil { assert(field.numTerms > 0) if err = w.out.WriteVLong(field.numTerms); err == nil { if err = w.out.WriteVInt(int32(len(field.rootCode))); err == nil { err = w.out.WriteBytes(field.rootCode) if err == nil && field.fieldInfo.IndexOptions() != INDEX_OPT_DOCS_ONLY { err = w.out.WriteVLong(field.sumTotalTermFreq) } if err == nil { if err = w.out.WriteVLong(field.sumDocFreq); err == nil { if err = w.out.WriteVInt(int32(field.docCount)); err == nil { if err = w.out.WriteVInt(int32(field.longsSize)); err == nil { if err = w.indexOut.WriteVLong(field.indexStartFP); err == nil { if err = writeBytesRef(w.out, field.minTerm); err == nil { err = writeBytesRef(w.out, field.maxTerm) } } } } } } } } } } if err == nil { if err = w.writeTrailer(w.out, dirStart); err == nil { if err = codec.WriteFooter(w.out); err == nil { if err = w.writeIndexTrailer(w.indexOut, indexDirStart); err == nil { if err = codec.WriteFooter(w.indexOut); err == nil { success = true } } } } } return }
func newPerFieldPostingsReader(state SegmentReadState) (fp FieldsProducer, err error) { ans := PerFieldPostingsReader{ make(map[string]FieldsProducer), make(map[string]FieldsProducer), } // Read _X.per and init each format: success := false defer func() { if !success { // log.Printf("Failed to initialize PerFieldPostingsReader.") // if err != nil { // log.Print("DEBUG ", err) // } fps := make([]FieldsProducer, 0) for _, v := range ans.formats { fps = append(fps, v) } items := make([]io.Closer, len(fps)) for i, v := range fps { items[i] = v } util.CloseWhileSuppressingError(items...) } }() // Read field name -> format name for _, fi := range state.FieldInfos.Values { // log.Printf("Processing %v...", fi) if fi.IsIndexed() { fieldName := fi.Name // log.Printf("Name: %v", fieldName) if formatName := fi.Attribute(PER_FIELD_FORMAT_KEY); formatName != "" { // log.Printf("Format: %v", formatName) // null formatName means the field is in fieldInfos, but has no postings! suffix := fi.Attribute(PER_FIELD_SUFFIX_KEY) // log.Printf("Suffix: %v", suffix) assert(suffix != "") format := LoadPostingsFormat(formatName) segmentSuffix := formatName + "_" + suffix // log.Printf("Segment suffix: %v", segmentSuffix) if _, ok := ans.formats[segmentSuffix]; !ok { // log.Printf("Loading fields producer: %v", segmentSuffix) newReadState := state // clone newReadState.SegmentSuffix = formatName + "_" + suffix fp, err = format.FieldsProducer(newReadState) if err != nil { return fp, err } ans.formats[segmentSuffix] = fp } ans.fields[fieldName] = ans.formats[segmentSuffix] } } } success = true return &ans, nil }
func NewLucene41PostingsReader(dir store.Directory, fis model.FieldInfos, si *model.SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { log.Print("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { log.Print("Failed to initialize Lucene41PostingsReader.") if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } forUtil, err := NewForUtil(docIn) if err != nil { return r, err } if fis.HasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } if fis.HasPayloads || fis.HasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil }
func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) { self := &CompoundFileDirectory{ Locker: &sync.Mutex{}, directory: directory, fileName: fileName, readBufferSize: bufferSize(context), openForWrite: openForWrite} self.DirectoryImpl = NewDirectoryImpl(self) self.BaseDirectory = NewBaseDirectory(self) if !openForWrite { // log.Printf("Open for read.") success := false defer func() { if !success { util.CloseWhileSuppressingError(self.handle) } }() self.handle, err = directory.OpenInput(fileName, context) if err != nil { return nil, err } self.entries, err = self.readEntries(self.handle, directory, fileName) if err != nil { return nil, err } if self.version >= CFD_VERSION_CHECKSUM { if _, err = codec.CheckHeader(self.handle, CFD_DATA_CODEC, int32(self.version), int32(self.version)); err != nil { return nil, err } // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(self.handle); err != nil { return nil, err } } success = true self.BaseDirectory.IsOpen = true return self, nil } else { assert2(reflect.TypeOf(directory).Name() != "CompoundFileDirectory", "compound file inside of compound file: %v", fileName) self.entries = SENTINEL self.IsOpen = true self.writer = newCompoundFileWriter(directory, fileName) self.handle = nil return self, nil } }
func (sis *SegmentInfos) rollbackCommit(dir store.Directory) { if sis.pendingSegnOutput != nil { // Suppress so we keep throwing the original error in our caller util.CloseWhileSuppressingError(sis.pendingSegnOutput) sis.pendingSegnOutput = nil // Must carefully compute filename from "generation" since // lastGeneration isn't incremented: segmentFilename := util.FileNameFromGeneration(INDEX_FILENAME_SEGMENTS, "", sis.generation) // Suppress so we keep throwing the original error in our caller util.DeleteFilesIgnoringErrors(dir, segmentFilename) } }
func newLucene42DocValuesProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) { dvp = &Lucene42DocValuesProducer{ numericInstances: make(map[int]NumericDocValues), } dvp.maxDoc = state.segmentInfo.DocCount() metaName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, metaExtension) // read in the entries from the metadata file. in, err := state.dir.OpenInput(metaName, state.context) if err != nil { return dvp, err } success := false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } dvp.numerics = make(map[int]NumericEntry) dvp.binaries = make(map[int]BinaryEntry) dvp.fsts = make(map[int]FSTEntry) err = dvp.readFields(in) if err != nil { return dvp, err } success = true success = false dataName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, dataExtension) dvp.data, err = state.dir.OpenInput(dataName, state.context) if err != nil { return dvp, err } version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } if version != version2 { return dvp, errors.New("Format versions mismatch") } return dvp, nil }
func (f *Lucene41PostingsFormat) FieldsConsumer(state *SegmentWriteState) (FieldsConsumer, error) { postingsWriter, err := newLucene41PostingsWriterCompact(state) if err != nil { return nil, err } var success = false defer func() { if !success { util.CloseWhileSuppressingError(postingsWriter) } }() ret, err := blocktree.NewBlockTreeTermsWriter(state, postingsWriter, f.minTermBlockSize, f.maxTermBlockSize) if err != nil { return nil, err } success = true return ret, nil }
func (w *Lucene40SegmentInfoWriter) Write(dir store.Directory, si *SegmentInfo, fis FieldInfos, ctx store.IOContext) (err error) { filename := util.SegmentFileName(si.Name, "", LUCENE40_SI_EXTENSION) si.AddFile(filename) var output store.IndexOutput output, err = dir.CreateOutput(filename, ctx) if err != nil { return err } var success = false defer func() { if !success { util.CloseWhileSuppressingError(output) si.Dir.DeleteFile(filename) // ignore error } else { err = mergeError(err, output.Close()) } }() err = codec.WriteHeader(output, LUCENE40_CODEC_NAME, LUCENE40_VERSION_CURRENT) if err != nil { return err } // Write the Lucene version that created this segment, since 3.1 err = store.Stream(output).WriteString(si.Version().String()). WriteInt(int32(si.DocCount())). WriteByte(func() byte { if si.IsCompoundFile() { return SEGMENT_INFO_YES } return byte((SEGMENT_INFO_NO + 256) % 256) // Go byte is non-negative, unlike Java }()).WriteStringStringMap(si.Diagnostics()). WriteStringStringMap(map[string]string{}). WriteStringSet(si.Files()).Close() if err != nil { return err } success = true return nil }
func (w *IndexWriter) closeInternalCleanup(waitForMerges bool) error { defer func() { // shutdown policy, scheduler and all threads (this call is not // interruptible): util.CloseWhileSuppressingError(w.mergePolicy, w.mergeScheduler) }() // clean up merge scheduler in all cases, although flushing may have failed: if waitForMerges { err := w.mergeScheduler.Merge(w) if err != nil { return err } w.waitForMerges() } else { w.abortAllMerges() } w.stopMerges = true return nil }
func newPerFieldDocValuesReader(state SegmentReadState) (dvp DocValuesProducer, err error) { ans := PerFieldDocValuesReader{ make(map[string]DocValuesProducer), make(map[string]DocValuesProducer)} // Read _X.per and init each format: success := false defer func() { if !success { fps := make([]DocValuesProducer, 0) for _, v := range ans.formats { fps = append(fps, v) } items := make([]io.Closer, len(fps)) for i, v := range fps { items[i] = v } util.CloseWhileSuppressingError(items...) } }() // Read field name -> format name for _, fi := range state.FieldInfos.Values { if fi.HasDocValues() { fieldName := fi.Name if formatName := fi.Attribute(PER_FIELD_FORMAT_KEY); formatName != "" { // null formatName means the field is in fieldInfos, but has no docvalues! suffix := fi.Attribute(PER_FIELD_SUFFIX_KEY) // assert suffix != nil segmentSuffix := dvFullSegmentSuffix(state.SegmentSuffix, dvSuffix(formatName, suffix)) if _, ok := ans.formats[segmentSuffix]; !ok { newReadState := state // clone newReadState.SegmentSuffix = formatName + "_" + suffix if p, err := LoadDocValuesProducer(formatName, newReadState); err == nil { ans.formats[segmentSuffix] = p } } ans.fields[fieldName] = ans.formats[segmentSuffix] } } } success = true return &ans, nil }
/* Copy the contents of the file with specified extension into the provided output stream. */ func (w *CompoundFileWriter) copyFileEntry(dataOut IndexOutput, fileEntry *FileEntry) (n int64, err error) { var is IndexInput is, err = fileEntry.dir.OpenInput(fileEntry.file, IO_CONTEXT_READONCE) if err != nil { return 0, err } var success = false defer func() { if success { err = util.Close(is) // copy successful - delete file if err == nil { fileEntry.dir.DeleteFile(fileEntry.file) // ignore error } } else { util.CloseWhileSuppressingError(is) } }() startPtr := dataOut.FilePointer() length := fileEntry.length err = dataOut.CopyBytes(is, length) if err != nil { return 0, err } // verify that the output length diff is equal to original file endPtr := dataOut.FilePointer() diff := endPtr - startPtr if diff != length { return 0, errors.New(fmt.Sprintf( "Difference in the output file offsets %v does not match the original file length %v", diff, length)) } fileEntry.offset = startPtr success = true return length, nil }
func (w *CompoundFileWriter) output(ctx IOContext) (IndexOutput, error) { w.Lock() defer w.Unlock() if w.dataOut == nil { var success = false defer func() { if !success { util.CloseWhileSuppressingError(w.dataOut) } }() var err error w.dataOut, err = w.directory.CreateOutput(w.dataFileName, ctx) if err != nil { return nil, err } err = codec.WriteHeader(w.dataOut, CFD_DATA_CODEC, CFD_VERSION_CURRENT) if err != nil { return nil, err } success = true } return w.dataOut, nil }
func NewBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info *SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { // log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.Name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, TERMS_EXTENSION), ctx) if err != nil { return nil, err } success := false var indexIn store.IndexInput defer func() { if !success { fmt.Println("Failed to initialize BlockTreeTermsReader.") if err != nil { fmt.Println("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return nil, err } // log.Printf("Version: %v", fp.version) if indexDivisor != -1 { filename := util.SegmentFileName(info.Name, segmentSuffix, TERMS_INDEX_EXTENSION) indexIn, err = dir.OpenInput(filename, ctx) if err != nil { return nil, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return nil, err } // log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return nil, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // verify if indexIn != nil && fp.version >= TERMS_VERSION_CURRENT { if _, err = store.ChecksumEntireFile(indexIn); err != nil { return nil, err } } // Have PostingsReader init itself postingsReader.Init(fp.in) if fp.version >= TERMS_VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but for now we at least verify proper structure // of the checksum footer: which looks for FOOTER_MAGIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(fp.in); err != nil { return nil, err } } // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return nil, err } // log.Printf("Fields number: %v", numFields) if numFields < 0 { return nil, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { // log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return nil, err } // log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return nil, err } assert2(numTerms > 0, "Illegal numTerms for field number: %v (resource=%v)", field, fp.in) // log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return nil, err } assert2(numBytes >= 0, "invalid rootCode for field number: %v, numBytes=%v (resource=%v)", field, numBytes, fp.in) // log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return nil, err } fieldInfo := fieldInfos.FieldInfoByNumber(int(field)) assert2(fieldInfo != nil, "invalid field numebr: %v (resource=%v)", field, fp.in) var sumTotalTermFreq int64 if fieldInfo.IndexOptions() == INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return nil, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return nil, err } var docCount int if docCount, err = asInt(fp.in.ReadVInt()); err != nil { return nil, err } // fmt.Printf("DocCount: %v\n", docCount) var longsSize int if fp.version >= TERMS_VERSION_META_ARRAY { if longsSize, err = asInt(fp.in.ReadVInt()); err != nil { return nil, err } } assert2(longsSize >= 0, "invalid longsSize for field: %v, longsSize=%v (resource=%v)", fieldInfo.Name, longsSize, fp.in) var minTerm, maxTerm []byte if fp.version >= TERMS_VERSION_MIN_MAX_TERMS { if minTerm, err = readBytesRef(fp.in); err != nil { return nil, err } if maxTerm, err = readBytesRef(fp.in); err != nil { return nil, err } } if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs return nil, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.DocCount(), fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return nil, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return nil, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { if indexStartFP, err = indexIn.ReadVLong(); err != nil { return nil, err } } // log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.Name]; ok { return nil, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in)) } if fp.fields[fieldInfo.Name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm); err != nil { return nil, err } } if indexDivisor != -1 { if err = indexIn.Close(); err != nil { return nil, err } } success = true return fp, nil }
func (sis *SegmentInfos) write(directory store.Directory) (err error) { segmentsFilename := sis.nextSegmentFilename() // Always advance the generation on write: if sis.generation == -1 { sis.generation = 1 } else { sis.generation++ } var segnOutput store.IndexOutput var success = false // var upgradedSIFiles = make(map[string]bool) defer func() { if !success { // We hit an error above; try to close the file but suppress // any errors util.CloseWhileSuppressingError(segnOutput) // for filename, _ := range upgradedSIFiles { // directory.DeleteFile(filename) // ignore error // } // Try not to leave a truncated segments_N fle in the index: directory.DeleteFile(segmentsFilename) // ignore error } }() if segnOutput, err = directory.CreateOutput(segmentsFilename, store.IO_CONTEXT_DEFAULT); err != nil { return } if err = codec.WriteHeader(segnOutput, "segments", VERSION_49); err != nil { return } if err = segnOutput.WriteLong(sis.version); err == nil { if err = segnOutput.WriteInt(int32(sis.counter)); err == nil { err = segnOutput.WriteInt(int32(len(sis.Segments))) } } if err != nil { return } for _, siPerCommit := range sis.Segments { si := siPerCommit.Info if err = segnOutput.WriteString(si.Name); err == nil { if err = segnOutput.WriteString(si.Codec().(Codec).Name()); err == nil { if err = segnOutput.WriteLong(siPerCommit.DelGen()); err == nil { assert2(siPerCommit.DelCount() >= 0 && siPerCommit.DelCount() <= si.DocCount(), "cannot write segment: invalid docCount segment=%v docCount=%v delCount=%v", si.Name, si.DocCount(), siPerCommit.DelCount()) if err = segnOutput.WriteInt(int32(siPerCommit.DelCount())); err == nil { if err = segnOutput.WriteLong(siPerCommit.FieldInfosGen()); err == nil { if err = segnOutput.WriteLong(siPerCommit.DocValuesGen()); err == nil { if err = segnOutput.WriteStringSet(siPerCommit.FieldInfosFiles()); err == nil { dvUpdatesFiles := siPerCommit.DocValuesUpdatesFiles() if err = segnOutput.WriteInt(int32(len(dvUpdatesFiles))); err == nil { for k, v := range dvUpdatesFiles { if err = segnOutput.WriteInt(int32(k)); err != nil { break } if err = segnOutput.WriteStringSet(v); err != nil { break } } } } } } } } } } if err != nil { return } assert(si.Dir == directory) // If this segment is pre-4.x, perform a one-time "upgrade" to // write the .si file for it: if version := si.Version(); len(version) == 0 || !version.OnOrAfter(util.VERSION_4_0) { panic("not implemented yet") } } if err = segnOutput.WriteStringStringMap(sis.userData); err != nil { return } sis.pendingSegnOutput = segnOutput success = true return nil }
/* Read a particular segmentFileName. Note that this may return IO error if a commit is in process. */ func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error) { // fmt.Printf("Reading segment info from %v...\n", segmentFileName) // Clear any previous segments: sis.Clear() sis.generation = GenerationFromSegmentsFileName(segmentFileName) sis.lastGeneration = sis.generation var input store.ChecksumIndexInput if input, err = directory.OpenChecksumInput(segmentFileName, store.IO_CONTEXT_READ); err != nil { return } var success = false defer func() { if !success { // Clear any segment infos we had loaded so we // have a clean slate on retry: sis.Clear() util.CloseWhileSuppressingError(input) } else { err = input.Close() } }() var format int if format, err = asInt(input.ReadInt()); err != nil { return } var actualFormat int if format == codec.CODEC_MAGIC { // 4.0+ if actualFormat, err = asInt(codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_49)); err != nil { return } if sis.version, err = input.ReadLong(); err != nil { return } if sis.counter, err = asInt(input.ReadInt()); err != nil { return } var numSegments int if numSegments, err = asInt(input.ReadInt()); err != nil { return } else if numSegments < 0 { return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input)) } var segName, codecName string var fCodec Codec var delGen, fieldInfosGen, dvGen int64 var delCount int for seg := 0; seg < numSegments; seg++ { if segName, err = input.ReadString(); err != nil { return } if codecName, err = input.ReadString(); err != nil { return } fCodec = LoadCodec(codecName) assert2(fCodec != nil, "Invalid codec name: %v", codecName) // fmt.Printf("SIS.read seg=%v codec=%v\n", seg, fCodec) var info *SegmentInfo if info, err = fCodec.SegmentInfoFormat().SegmentInfoReader().Read(directory, segName, store.IO_CONTEXT_READ); err != nil { return } info.SetCodec(fCodec) if delGen, err = input.ReadLong(); err != nil { return } if delCount, err = asInt(input.ReadInt()); err != nil { return } else if delCount < 0 || delCount > info.DocCount() { return errors.New(fmt.Sprintf( "invalid deletion count: %v vs docCount=%v (resource: %v)", delCount, info.DocCount(), input)) } fieldInfosGen = -1 if actualFormat >= VERSION_46 { if fieldInfosGen, err = input.ReadLong(); err != nil { return } } dvGen = -1 if actualFormat >= VERSION_49 { if dvGen, err = input.ReadLong(); err != nil { return } } else { dvGen = fieldInfosGen } siPerCommit := NewSegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen) if actualFormat >= VERSION_46 { if actualFormat < VERSION_49 { panic("not implemented yet") } else { var ss map[string]bool if ss, err = input.ReadStringSet(); err != nil { return err } siPerCommit.SetFieldInfosFiles(ss) var dvUpdatesFiles map[int]map[string]bool var numDVFields int if numDVFields, err = asInt(input.ReadInt()); err != nil { return err } if numDVFields == 0 { dvUpdatesFiles = make(map[int]map[string]bool) } else { panic("not implemented yet") } siPerCommit.SetDocValuesUpdatesFiles(dvUpdatesFiles) } } sis.Segments = append(sis.Segments, siPerCommit) } if sis.userData, err = input.ReadStringStringMap(); err != nil { return err } } else { // TODO support <4.0 index panic("Index format pre-4.0 not supported yet") } if actualFormat >= VERSION_48 { if _, err = codec.CheckFooter(input); err != nil { return } } else { var checksumNow = int64(input.Checksum()) var checksumThen int64 if checksumThen, err = input.ReadLong(); err != nil { return } if checksumNow != checksumThen { return errors.New(fmt.Sprintf( "checksum mismatch in segments file: %v vs %v (resource: %v)", checksumNow, checksumThen, input)) } if err = codec.CheckEOF(input); err != nil { return } } success = true return nil }
func NewCompressingStoredFieldsWriter(dir store.Directory, si *model.SegmentInfo, segmentSuffix string, ctx store.IOContext, formatName string, compressionMode CompressionMode, chunkSize int) (*CompressingStoredFieldsWriter, error) { assert(dir != nil) ans := &CompressingStoredFieldsWriter{ directory: dir, segment: si.Name, segmentSuffix: segmentSuffix, compressionMode: compressionMode, compressor: compressionMode.NewCompressor(), chunkSize: chunkSize, docBase: 0, bufferedDocs: newGrowableByteArrayDataOutput(chunkSize), numStoredFields: make([]int, 16), endOffsets: make([]int, 16), numBufferedDocs: 0, } var success = false indexStream, err := dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION), ctx) if err != nil { return nil, err } assert(indexStream != nil) defer func() { if !success { util.CloseWhileSuppressingError(indexStream) ans.Abort() } }() ans.fieldsStream, err = dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix, lucene40.FIELDS_EXTENSION), ctx) if err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX codecNameDat := formatName + CODEC_SFX_DAT err = codec.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT) if err != nil { return nil, err } err = codec.WriteHeader(ans.fieldsStream, codecNameDat, VERSION_CURRENT) if err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) assert(int64(codec.HeaderLength(codecNameDat)) == ans.fieldsStream.FilePointer()) ans.indexWriter, err = NewStoredFieldsIndexWriter(indexStream) if err != nil { return nil, err } assert(ans.indexWriter != nil) indexStream = nil err = ans.fieldsStream.WriteVInt(int32(chunkSize)) if err != nil { return nil, err } err = ans.fieldsStream.WriteVInt(packed.VERSION_CURRENT) if err != nil { return nil, err } success = true return ans, nil }
/* Create a new writer. The number of items (terms or sub-blocks) per block will aim tobe between minItermsPerBlock and maxItemsPerBlock, though in some cases, the blocks may be smaller than the min. */ func NewBlockTreeTermsWriter(state *SegmentWriteState, postingsWriter PostingsWriterBase, minItemsInBlock, maxItemsInBlock int) (*BlockTreeTermsWriter, error) { assert2(minItemsInBlock >= 2, "minItemsInBlock must be >= 2; got %v", minItemsInBlock) assert2(maxItemsInBlock >= 1, "maxItemsInBlock must be >= 1; got %v", maxItemsInBlock) assert2(minItemsInBlock <= maxItemsInBlock, "maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=%v minItemsInBlock=%v", maxItemsInBlock, minItemsInBlock) assert2(2*(minItemsInBlock-1) <= maxItemsInBlock, "maxItemsInBlock must be at least 2*(minItemsInBlock-1; got maxItemsInBlock=%v minItemsInBlock=%v", maxItemsInBlock, minItemsInBlock) ans := &BlockTreeTermsWriter{ maxDoc: state.SegmentInfo.DocCount(), fieldInfos: state.FieldInfos, minItemsInBlock: minItemsInBlock, maxItemsInBlock: maxItemsInBlock, postingsWriter: postingsWriter, segment: state.SegmentInfo.Name, scratchBytes: store.NewRAMOutputStreamBuffer(), scratchIntsRef: util.NewIntsRefBuilder(), // bytesWriter: store.NewRAMOutputStreamBuffer(), // bytesWriter2: store.NewRAMOutputStreamBuffer(), } ans.spi = ans var out, indexOut store.IndexOutput if err := func() error { var success = false defer func() { if !success { util.CloseWhileSuppressingError(out, indexOut) } }() var err error termsFileName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_EXTENSION) if out, err = state.Directory.CreateOutput(termsFileName, state.Context); err != nil { return err } if err = ans.spi.WriteHeader(out); err != nil { return err } termsIndexFileName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, TERMS_INDEX_EXTENSION) if indexOut, err = state.Directory.CreateOutput(termsIndexFileName, state.Context); err != nil { return err } if err = ans.spi.WriteIndexHeader(indexOut); err != nil { return err } // have consumer write its format/header if err = postingsWriter.Init(out); err != nil { return err } success = true return nil }(); err != nil { return nil, err } ans.out = out ans.indexOut = indexOut return ans, nil }
// Sole constructor func newCompressingStoredFieldsReader(d store.Directory, si *model.SegmentInfo, segmentSuffix string, fn model.FieldInfos, ctx store.IOContext, formatName string, compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.Name r.fieldInfos = fn r.numDocs = si.DocCount() var indexStream store.ChecksumIndexInput success := false defer func() { if !success { util.CloseWhileSuppressingError(r, indexStream) } }() indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION) fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION) // Load the index into memory if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer()) if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil { return nil, err } var maxPointer int64 = -1 if r.version >= VERSION_CHECKSUM { if maxPointer, err = indexStream.ReadVLong(); err != nil { return nil, err } if _, err = codec.CheckFooter(indexStream); err != nil { return nil, err } } else { if err = codec.CheckEOF(indexStream); err != nil { return nil, err } } if err = indexStream.Close(); err != nil { return nil, err } indexStream = nil // Open the data file and read metadata if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil { return nil, err } if r.version >= VERSION_CHECKSUM { if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() { return nil, errors.New(fmt.Sprintf( "Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v", maxPointer, r.fieldsStream.Length())) } } else { maxPointer = r.fieldsStream.Length() } r.maxPointer = maxPointer codecNameDat := formatName + CODEC_SFX_DAT var fieldsVersion int if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil { return nil, err } assert2(r.version == fieldsVersion, "Version mismatch between stored fields index and data: %v != %v", r.version, fieldsVersion) assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer()) r.chunkSize = -1 if r.version >= VERSION_BIG_CHUNKS { if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } } if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return nil, err } r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) if r.version >= VERSION_CHECKSUM { // NOTE: data file is too costly to verify checksum against all the // bytes on open, but fo rnow we at least verify proper structure // of the checksum footer: which looks for FOOTER_MATIC + // algorithmID. This is cheap and can detect some forms of // corruption such as file trucation. if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil { return nil, err } } success = true return r, nil }