func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) { self := &CompoundFileDirectory{ lock: sync.Mutex{}, directory: directory, fileName: fileName, readBufferSize: bufferSize(context), openForWriter: openForWrite} self.DirectoryImpl = newDirectoryImpl(self) if !openForWrite { log.Printf("Open for read.") success := false defer func() { if !success { util.CloseWhileSuppressingError(self.handle) } }() self.handle, err = directory.createSlicer(fileName, context) if err != nil { return self, err } self.entries, err = readEntries(self.handle, directory, fileName) if err != nil { return self, err } success = true self.DirectoryImpl.isOpen = true return self, err } else { panic("not supported yet") } }
func newPerFieldPostingsReader(state SegmentReadState) (fp FieldsProducer, err error) { ans := PerFieldPostingsReader{ make(map[string]FieldsProducer), make(map[string]FieldsProducer), } // Read _X.per and init each format: success := false defer func() { if !success { log.Printf("Failed to initialize PerFieldPostingsReader.") if err != nil { log.Print("DEBUG ", err) } fps := make([]FieldsProducer, 0) for _, v := range ans.formats { fps = append(fps, v) } items := make([]io.Closer, len(fps)) for i, v := range fps { items[i] = v } util.CloseWhileSuppressingError(items...) } }() // Read field name -> format name for _, fi := range state.fieldInfos.values { log.Printf("Processing %v...", fi) if fi.indexed { fieldName := fi.name log.Printf("Name: %v", fieldName) if formatName, ok := fi.attributes[PER_FIELD_FORMAT_KEY]; ok { log.Printf("Format: %v", formatName) // null formatName means the field is in fieldInfos, but has no postings! suffix := fi.attributes[PER_FIELD_SUFFIX_KEY] log.Printf("Suffix: %v", suffix) // assert suffix != nil segmentSuffix := formatName + "_" + suffix log.Printf("Segment suffix: %v", segmentSuffix) if _, ok := ans.formats[segmentSuffix]; !ok { log.Printf("Loading fields producer: %v", segmentSuffix) newReadState := state // clone newReadState.segmentSuffix = formatName + "_" + suffix fp, err = LoadFieldsProducer(formatName, newReadState) if err != nil { return fp, err } ans.formats[segmentSuffix] = fp } ans.fields[fieldName] = ans.formats[segmentSuffix] } } } success = true return &ans, nil }
func NewLucene41PostingsReader(dir store.Directory, fis FieldInfos, si SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { log.Print("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { log.Print("Failed to initialize Lucene41PostingsReader.") if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } forUtil, err := NewForUtil(docIn) if err != nil { return r, err } if fis.hasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } if fis.hasPayloads || fis.hasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil }
func newLucene42DocValuesProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) { dvp = &Lucene42DocValuesProducer{} dvp.maxDoc = int(state.segmentInfo.docCount) metaName := util.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension) // read in the entries from the metadata file. in, err := state.dir.OpenInput(metaName, state.context) if err != nil { return dvp, err } success := false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } dvp.numerics = make(map[int]NumericEntry) dvp.binaries = make(map[int]BinaryEntry) dvp.fsts = make(map[int]FSTEntry) err = dvp.readFields(in, state.fieldInfos) if err != nil { return dvp, err } success = true success = false dataName := util.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension) dvp.data, err = state.dir.OpenInput(dataName, state.context) if err != nil { return dvp, err } version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } if version != version2 { return dvp, errors.New("Format versions mismatch") } return dvp, nil }
func newPerFieldDocValuesReader(state SegmentReadState) (dvp DocValuesProducer, err error) { ans := PerFieldDocValuesReader{ make(map[string]DocValuesProducer), make(map[string]DocValuesProducer)} // Read _X.per and init each format: success := false defer func() { if !success { fps := make([]DocValuesProducer, 0) for _, v := range ans.formats { fps = append(fps, v) } items := make([]io.Closer, len(fps)) for i, v := range fps { items[i] = v } util.CloseWhileSuppressingError(items...) } }() // Read field name -> format name for _, fi := range state.fieldInfos.values { if fi.docValueType != 0 { fieldName := fi.name if formatName, ok := fi.attributes[PER_FIELD_FORMAT_KEY]; ok { // null formatName means the field is in fieldInfos, but has no docvalues! suffix := fi.attributes[PER_FIELD_SUFFIX_KEY] // assert suffix != nil segmentSuffix := formatName + "_" + suffix if _, ok := ans.formats[segmentSuffix]; !ok { newReadState := state // clone newReadState.segmentSuffix = formatName + "_" + suffix if p, err := LoadDocValuesProducer(formatName, newReadState); err == nil { ans.formats[segmentSuffix] = p } } ans.fields[fieldName] = ans.formats[segmentSuffix] } } } success = true return &ans, nil }
func LoadFieldsProducer(name string, state SegmentReadState) (fp FieldsProducer, err error) { switch name { case "Lucene41": postingsReader, err := NewLucene41PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix) if err != nil { return nil, err } success := false defer func() { if !success { log.Printf("Failed to load FieldsProducer for %v.", name) if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(postingsReader) } }() fp, err := newBlockTreeTermsReader(state.dir, state.fieldInfos, state.segmentInfo, postingsReader, state.context, state.segmentSuffix, state.termsIndexDivisor) if err != nil { log.Print("DEBUG: ", err) return fp, err } success = true return fp, nil } panic(fmt.Sprintf("Service '%v' not found.", name)) }
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) error { log.Printf("Reading segment info from %v...", segmentFileName) success := false // Clear any previous segments: sis.Clear() sis.generation = GenerationFromSegmentsFileName(segmentFileName) sis.lastGeneration = sis.generation main, err := directory.OpenInput(segmentFileName, store.IO_CONTEXT_READ) if err != nil { return err } input := store.NewChecksumIndexInput(main) defer func() { if !success { // Clear any segment infos we had loaded so we // have a clean slate on retry: sis.Clear() util.CloseWhileSuppressingError(input) } else { input.Close() } }() format, err := input.ReadInt() if err != nil { return err } if format == codec.CODEC_MAGIC { // 4.0+ _, err = codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_40) if err != nil { return err } sis.version, err = input.ReadLong() if err != nil { return err } sis.counter, err = asInt(input.ReadInt()) if err != nil { return err } numSegments, err := asInt(input.ReadInt()) if err != nil { return err } if numSegments < 0 { return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input)) } for seg := 0; seg < numSegments; seg++ { segName, err := input.ReadString() if err != nil { return err } codecName, err := input.ReadString() if err != nil { return err } if codecName != "Lucene42" { log.Panicf("Not supported yet: %v", codecName) } // method := CodecForName(codecName) method := NewLucene42Codec() info, err := method.ReadSegmentInfo(directory, segName, store.IO_CONTEXT_READ) if err != nil { return err } info.codec = method delGen, err := input.ReadLong() if err != nil { return err } delCount, err := asInt(input.ReadInt()) if err != nil { return err } if delCount < 0 || delCount > int(info.docCount) { return errors.New(fmt.Sprintf("invalid deletion count: %v (resource: %v)", delCount, input)) } sis.Segments = append(sis.Segments, NewSegmentInfoPerCommit(info, delCount, delGen)) } sis.userData, err = input.ReadStringStringMap() if err != nil { return err } } else { // TODO support <4.0 index panic("Index format pre-4.0 not supported yet") } checksumNow := int64(input.Checksum()) checksumThen, err := input.ReadLong() if err != nil { return err } if checksumNow != checksumThen { return errors.New(fmt.Sprintf("checksum mismatch in segments file (resource: %v)", input)) } success = true return nil }
func newBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info SegmentInfo, postingsReader PostingsReaderBase, ctx store.IOContext, segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) { log.Print("Initializing BlockTreeTermsReader...") fp := &BlockTreeTermsReader{ postingsReader: postingsReader, fields: make(map[string]FieldReader), segment: info.name, } fp.in, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_EXTENSION), ctx) if err != nil { return fp, err } success := false var indexIn store.IndexInput defer func() { if !success { log.Print("Failed to initialize BlockTreeTermsReader.") if err != nil { log.Print("DEBUG ", err) } // this.close() will close in: util.CloseWhileSuppressingError(indexIn, fp) } }() fp.version, err = fp.readHeader(fp.in) if err != nil { return fp, err } log.Printf("Version: %v", fp.version) if indexDivisor != -1 { indexIn, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_INDEX_EXTENSION), ctx) if err != nil { return fp, err } indexVersion, err := fp.readIndexHeader(indexIn) if err != nil { return fp, err } log.Printf("Index version: %v", indexVersion) if int(indexVersion) != fp.version { return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion)) } } // Have PostingsReader init itself postingsReader.Init(fp.in) // Read per-field details fp.seekDir(fp.in, fp.dirOffset) if indexDivisor != -1 { fp.seekDir(indexIn, fp.indexDirOffset) } numFields, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Fields number: %v", numFields) if numFields < 0 { return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in)) } for i := int32(0); i < numFields; i++ { log.Printf("Next field...") field, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Field: %v", field) numTerms, err := fp.in.ReadVLong() if err != nil { return fp, err } // assert numTerms >= 0 log.Printf("Terms number: %v", numTerms) numBytes, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("Bytes number: %v", numBytes) rootCode := make([]byte, numBytes) err = fp.in.ReadBytes(rootCode) if err != nil { return fp, err } fieldInfo := fieldInfos.byNumber[field] // assert fieldInfo != nil var sumTotalTermFreq int64 if fieldInfo.indexOptions == INDEX_OPT_DOCS_ONLY { sumTotalTermFreq = -1 } else { sumTotalTermFreq, err = fp.in.ReadVLong() if err != nil { return fp, err } } sumDocFreq, err := fp.in.ReadVLong() if err != nil { return fp, err } docCount, err := fp.in.ReadVInt() if err != nil { return fp, err } log.Printf("DocCount: %v", docCount) if docCount < 0 || docCount > info.docCount { // #docs with field must be <= #docs return fp, errors.New(fmt.Sprintf( "invalid docCount: %v maxDoc: %v (resource=%v)", docCount, info.docCount, fp.in)) } if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field return fp, errors.New(fmt.Sprintf( "invalid sumDocFreq: %v docCount: %v (resource=%v)", sumDocFreq, docCount, fp.in)) } if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings return fp, errors.New(fmt.Sprintf( "invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)", sumTotalTermFreq, sumDocFreq, fp.in)) } var indexStartFP int64 if indexDivisor != -1 { indexStartFP, err = indexIn.ReadVLong() if err != nil { return fp, err } } log.Printf("indexStartFP: %v", indexStartFP) if _, ok := fp.fields[fieldInfo.name]; ok { return fp, errors.New(fmt.Sprintf( "duplicate field: %v (resource=%v)", fieldInfo.name, fp.in)) } fp.fields[fieldInfo.name], err = newFieldReader(fp, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn) if err != nil { return fp, err } log.Print("DEBUG field processed.") } if indexDivisor != -1 { err = indexIn.Close() if err != nil { return fp, err } } success = true return fp, nil }
SEGMENT_INFO_YES = 1 ) var ( Lucene40SegmentInfoReader = func(dir store.Directory, segment string, context store.IOContext) (si SegmentInfo, err error) { si = SegmentInfo{} fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION) input, err := dir.OpenInput(fileName, context) if err != nil { return si, err } success := false defer func() { if !success { util.CloseWhileSuppressingError(input) } else { input.Close() } }() _, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT) if err != nil { return si, err } version, err := input.ReadString() if err != nil { return si, err } docCount, err := input.ReadInt() if err != nil {