Esempio n. 1
0
func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) {
	self := &CompoundFileDirectory{
		lock:           sync.Mutex{},
		directory:      directory,
		fileName:       fileName,
		readBufferSize: bufferSize(context),
		openForWriter:  openForWrite}
	self.DirectoryImpl = newDirectoryImpl(self)

	if !openForWrite {
		log.Printf("Open for read.")
		success := false
		defer func() {
			if !success {
				util.CloseWhileSuppressingError(self.handle)
			}
		}()
		self.handle, err = directory.createSlicer(fileName, context)
		if err != nil {
			return self, err
		}
		self.entries, err = readEntries(self.handle, directory, fileName)
		if err != nil {
			return self, err
		}
		success = true
		self.DirectoryImpl.isOpen = true
		return self, err
	} else {
		panic("not supported yet")
	}
}
Esempio n. 2
0
func newPerFieldPostingsReader(state SegmentReadState) (fp FieldsProducer, err error) {
	ans := PerFieldPostingsReader{
		make(map[string]FieldsProducer),
		make(map[string]FieldsProducer),
	}
	// Read _X.per and init each format:
	success := false
	defer func() {
		if !success {
			log.Printf("Failed to initialize PerFieldPostingsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			fps := make([]FieldsProducer, 0)
			for _, v := range ans.formats {
				fps = append(fps, v)
			}
			items := make([]io.Closer, len(fps))
			for i, v := range fps {
				items[i] = v
			}
			util.CloseWhileSuppressingError(items...)
		}
	}()
	// Read field name -> format name
	for _, fi := range state.fieldInfos.values {
		log.Printf("Processing %v...", fi)
		if fi.indexed {
			fieldName := fi.name
			log.Printf("Name: %v", fieldName)
			if formatName, ok := fi.attributes[PER_FIELD_FORMAT_KEY]; ok {
				log.Printf("Format: %v", formatName)
				// null formatName means the field is in fieldInfos, but has no postings!
				suffix := fi.attributes[PER_FIELD_SUFFIX_KEY]
				log.Printf("Suffix: %v", suffix)
				// assert suffix != nil
				segmentSuffix := formatName + "_" + suffix
				log.Printf("Segment suffix: %v", segmentSuffix)
				if _, ok := ans.formats[segmentSuffix]; !ok {
					log.Printf("Loading fields producer: %v", segmentSuffix)
					newReadState := state // clone
					newReadState.segmentSuffix = formatName + "_" + suffix
					fp, err = LoadFieldsProducer(formatName, newReadState)
					if err != nil {
						return fp, err
					}
					ans.formats[segmentSuffix] = fp
				}
				ans.fields[fieldName] = ans.formats[segmentSuffix]
			}
		}
	}
	success = true
	return &ans, nil
}
Esempio n. 3
0
func NewLucene41PostingsReader(dir store.Directory, fis FieldInfos, si SegmentInfo,
	ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) {
	log.Print("Initializing Lucene41PostingsReader...")
	success := false
	var docIn, posIn, payIn store.IndexInput = nil, nil, nil
	defer func() {
		if !success {
			log.Print("Failed to initialize Lucene41PostingsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			util.CloseWhileSuppressingError(docIn, posIn, payIn)
		}
	}()

	docIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx)
	if err != nil {
		return r, err
	}
	_, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return r, err
	}
	forUtil, err := NewForUtil(docIn)
	if err != nil {
		return r, err
	}

	if fis.hasProx {
		posIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx)
		if err != nil {
			return r, err
		}
		_, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
		if err != nil {
			return r, err
		}

		if fis.hasPayloads || fis.hasOffsets {
			payIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx)
			if err != nil {
				return r, err
			}
			_, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
			if err != nil {
				return r, err
			}
		}
	}

	success = true
	return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil
}
Esempio n. 4
0
func newLucene42DocValuesProducer(state SegmentReadState,
	dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) {
	dvp = &Lucene42DocValuesProducer{}
	dvp.maxDoc = int(state.segmentInfo.docCount)
	metaName := util.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension)
	// read in the entries from the metadata file.
	in, err := state.dir.OpenInput(metaName, state.context)
	if err != nil {
		return dvp, err
	}
	success := false
	defer func() {
		if success {
			err = util.Close(in)
		} else {
			util.CloseWhileSuppressingError(in)
		}
	}()

	version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT)
	if err != nil {
		return dvp, err
	}
	dvp.numerics = make(map[int]NumericEntry)
	dvp.binaries = make(map[int]BinaryEntry)
	dvp.fsts = make(map[int]FSTEntry)
	err = dvp.readFields(in, state.fieldInfos)
	if err != nil {
		return dvp, err
	}
	success = true

	success = false
	dataName := util.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension)
	dvp.data, err = state.dir.OpenInput(dataName, state.context)
	if err != nil {
		return dvp, err
	}
	version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT)
	if err != nil {
		return dvp, err
	}

	if version != version2 {
		return dvp, errors.New("Format versions mismatch")
	}
	return dvp, nil
}
Esempio n. 5
0
func newPerFieldDocValuesReader(state SegmentReadState) (dvp DocValuesProducer, err error) {
	ans := PerFieldDocValuesReader{
		make(map[string]DocValuesProducer), make(map[string]DocValuesProducer)}
	// Read _X.per and init each format:
	success := false
	defer func() {
		if !success {
			fps := make([]DocValuesProducer, 0)
			for _, v := range ans.formats {
				fps = append(fps, v)
			}
			items := make([]io.Closer, len(fps))
			for i, v := range fps {
				items[i] = v
			}
			util.CloseWhileSuppressingError(items...)
		}
	}()
	// Read field name -> format name
	for _, fi := range state.fieldInfos.values {
		if fi.docValueType != 0 {
			fieldName := fi.name
			if formatName, ok := fi.attributes[PER_FIELD_FORMAT_KEY]; ok {
				// null formatName means the field is in fieldInfos, but has no docvalues!
				suffix := fi.attributes[PER_FIELD_SUFFIX_KEY]
				// assert suffix != nil
				segmentSuffix := formatName + "_" + suffix
				if _, ok := ans.formats[segmentSuffix]; !ok {
					newReadState := state // clone
					newReadState.segmentSuffix = formatName + "_" + suffix
					if p, err := LoadDocValuesProducer(formatName, newReadState); err == nil {
						ans.formats[segmentSuffix] = p
					}
				}
				ans.fields[fieldName] = ans.formats[segmentSuffix]
			}
		}
	}
	success = true
	return &ans, nil
}
Esempio n. 6
0
func LoadFieldsProducer(name string, state SegmentReadState) (fp FieldsProducer, err error) {
	switch name {
	case "Lucene41":
		postingsReader, err := NewLucene41PostingsReader(state.dir,
			state.fieldInfos,
			state.segmentInfo,
			state.context,
			state.segmentSuffix)
		if err != nil {
			return nil, err
		}
		success := false
		defer func() {
			if !success {
				log.Printf("Failed to load FieldsProducer for %v.", name)
				if err != nil {
					log.Print("DEBUG ", err)
				}
				util.CloseWhileSuppressingError(postingsReader)
			}
		}()

		fp, err := newBlockTreeTermsReader(state.dir,
			state.fieldInfos,
			state.segmentInfo,
			postingsReader,
			state.context,
			state.segmentSuffix,
			state.termsIndexDivisor)
		if err != nil {
			log.Print("DEBUG: ", err)
			return fp, err
		}
		success = true
		return fp, nil
	}
	panic(fmt.Sprintf("Service '%v' not found.", name))
}
Esempio n. 7
0
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) error {
	log.Printf("Reading segment info from %v...", segmentFileName)
	success := false

	// Clear any previous segments:
	sis.Clear()

	sis.generation = GenerationFromSegmentsFileName(segmentFileName)
	sis.lastGeneration = sis.generation

	main, err := directory.OpenInput(segmentFileName, store.IO_CONTEXT_READ)
	if err != nil {
		return err
	}
	input := store.NewChecksumIndexInput(main)
	defer func() {
		if !success {
			// Clear any segment infos we had loaded so we
			// have a clean slate on retry:
			sis.Clear()
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	format, err := input.ReadInt()
	if err != nil {
		return err
	}
	if format == codec.CODEC_MAGIC {
		// 4.0+
		_, err = codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_40)
		if err != nil {
			return err
		}
		sis.version, err = input.ReadLong()
		if err != nil {
			return err
		}
		sis.counter, err = asInt(input.ReadInt())
		if err != nil {
			return err
		}
		numSegments, err := asInt(input.ReadInt())
		if err != nil {
			return err
		}
		if numSegments < 0 {
			return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input))
		}
		for seg := 0; seg < numSegments; seg++ {
			segName, err := input.ReadString()
			if err != nil {
				return err
			}
			codecName, err := input.ReadString()
			if err != nil {
				return err
			}
			if codecName != "Lucene42" {
				log.Panicf("Not supported yet: %v", codecName)
			}
			// method := CodecForName(codecName)
			method := NewLucene42Codec()
			info, err := method.ReadSegmentInfo(directory, segName, store.IO_CONTEXT_READ)
			if err != nil {
				return err
			}
			info.codec = method
			delGen, err := input.ReadLong()
			if err != nil {
				return err
			}
			delCount, err := asInt(input.ReadInt())
			if err != nil {
				return err
			}
			if delCount < 0 || delCount > int(info.docCount) {
				return errors.New(fmt.Sprintf("invalid deletion count: %v (resource: %v)", delCount, input))
			}
			sis.Segments = append(sis.Segments, NewSegmentInfoPerCommit(info, delCount, delGen))
		}
		sis.userData, err = input.ReadStringStringMap()
		if err != nil {
			return err
		}
	} else {
		// TODO support <4.0 index
		panic("Index format pre-4.0 not supported yet")
	}

	checksumNow := int64(input.Checksum())
	checksumThen, err := input.ReadLong()
	if err != nil {
		return err
	}
	if checksumNow != checksumThen {
		return errors.New(fmt.Sprintf("checksum mismatch in segments file (resource: %v)", input))
	}

	success = true
	return nil
}
Esempio n. 8
0
func newBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) {
	log.Print("Initializing BlockTreeTermsReader...")
	fp := &BlockTreeTermsReader{
		postingsReader: postingsReader,
		fields:         make(map[string]FieldReader),
		segment:        info.name,
	}
	fp.in, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_EXTENSION), ctx)
	if err != nil {
		return fp, err
	}

	success := false
	var indexIn store.IndexInput
	defer func() {
		if !success {
			log.Print("Failed to initialize BlockTreeTermsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			// this.close() will close in:
			util.CloseWhileSuppressingError(indexIn, fp)
		}
	}()

	fp.version, err = fp.readHeader(fp.in)
	if err != nil {
		return fp, err
	}
	log.Printf("Version: %v", fp.version)

	if indexDivisor != -1 {
		indexIn, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_INDEX_EXTENSION), ctx)
		if err != nil {
			return fp, err
		}

		indexVersion, err := fp.readIndexHeader(indexIn)
		if err != nil {
			return fp, err
		}
		log.Printf("Index version: %v", indexVersion)
		if int(indexVersion) != fp.version {
			return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion))
		}
	}

	// Have PostingsReader init itself
	postingsReader.Init(fp.in)

	// Read per-field details
	fp.seekDir(fp.in, fp.dirOffset)
	if indexDivisor != -1 {
		fp.seekDir(indexIn, fp.indexDirOffset)
	}

	numFields, err := fp.in.ReadVInt()
	if err != nil {
		return fp, err
	}
	log.Printf("Fields number: %v", numFields)
	if numFields < 0 {
		return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in))
	}

	for i := int32(0); i < numFields; i++ {
		log.Printf("Next field...")
		field, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Field: %v", field)

		numTerms, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		// assert numTerms >= 0
		log.Printf("Terms number: %v", numTerms)

		numBytes, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Bytes number: %v", numBytes)

		rootCode := make([]byte, numBytes)
		err = fp.in.ReadBytes(rootCode)
		if err != nil {
			return fp, err
		}
		fieldInfo := fieldInfos.byNumber[field]
		// assert fieldInfo != nil
		var sumTotalTermFreq int64
		if fieldInfo.indexOptions == INDEX_OPT_DOCS_ONLY {
			sumTotalTermFreq = -1
		} else {
			sumTotalTermFreq, err = fp.in.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		sumDocFreq, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		docCount, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("DocCount: %v", docCount)
		if docCount < 0 || docCount > info.docCount { // #docs with field must be <= #docs
			return fp, errors.New(fmt.Sprintf(
				"invalid docCount: %v maxDoc: %v (resource=%v)",
				docCount, info.docCount, fp.in))
		}
		if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field
			return fp, errors.New(fmt.Sprintf(
				"invalid sumDocFreq: %v docCount: %v (resource=%v)",
				sumDocFreq, docCount, fp.in))
		}
		if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings
			return fp, errors.New(fmt.Sprintf(
				"invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)",
				sumTotalTermFreq, sumDocFreq, fp.in))
		}

		var indexStartFP int64
		if indexDivisor != -1 {
			indexStartFP, err = indexIn.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		log.Printf("indexStartFP: %v", indexStartFP)
		if _, ok := fp.fields[fieldInfo.name]; ok {
			return fp, errors.New(fmt.Sprintf(
				"duplicate field: %v (resource=%v)", fieldInfo.name, fp.in))
		}
		fp.fields[fieldInfo.name], err = newFieldReader(fp,
			fieldInfo, numTerms, rootCode, sumTotalTermFreq,
			sumDocFreq, docCount, indexStartFP, indexIn)
		if err != nil {
			return fp, err
		}
		log.Print("DEBUG field processed.")
	}

	if indexDivisor != -1 {
		err = indexIn.Close()
		if err != nil {
			return fp, err
		}
	}

	success = true

	return fp, nil
}
Esempio n. 9
0
	SEGMENT_INFO_YES = 1
)

var (
	Lucene40SegmentInfoReader = func(dir store.Directory, segment string, context store.IOContext) (si SegmentInfo, err error) {
		si = SegmentInfo{}
		fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION)
		input, err := dir.OpenInput(fileName, context)
		if err != nil {
			return si, err
		}

		success := false
		defer func() {
			if !success {
				util.CloseWhileSuppressingError(input)
			} else {
				input.Close()
			}
		}()

		_, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT)
		if err != nil {
			return si, err
		}
		version, err := input.ReadString()
		if err != nil {
			return si, err
		}
		docCount, err := input.ReadInt()
		if err != nil {