Esempio n. 1
0
func newCompressingStoredFieldsIndexReader(fieldsIndexIn store.IndexInput, si SegmentInfo) (r *CompressingStoredFieldsIndexReader, err error) {
	r = &CompressingStoredFieldsIndexReader{}
	r.maxDoc = int(si.docCount)
	r.docBases = make([]int, 0, 16)
	r.startPointers = make([]int64, 0, 16)
	r.avgChunkDocs = make([]int, 0, 16)
	r.avgChunkSizes = make([]int64, 0, 16)
	r.docBasesDeltas = make([]util.PackedIntsReader, 0, 16)
	r.startPointersDeltas = make([]util.PackedIntsReader, 0, 16)

	packedIntsVersion, err := fieldsIndexIn.ReadVInt()
	if err != nil {
		return nil, err
	}

	for blockCount := 0; ; blockCount++ {
		numChunks, err := fieldsIndexIn.ReadVInt()
		if err != nil {
			return nil, err
		}
		if numChunks == 0 {
			break
		}

		{ // doc bases
			n, err := fieldsIndexIn.ReadVInt()
			if err != nil {
				return nil, err
			}
			r.docBases = append(r.docBases, int(n))
			n, err = fieldsIndexIn.ReadVInt()
			if err != nil {
				return nil, err
			}
			r.avgChunkDocs = append(r.avgChunkDocs, int(n))
			bitsPerDocBase, err := fieldsIndexIn.ReadVInt()
			if err != nil {
				return nil, err
			}
			if bitsPerDocBase > 32 {
				return nil, errors.New(fmt.Sprintf("Corrupted bitsPerDocBase (resource=%v)", fieldsIndexIn))
			}
			pr, err := util.NewPackedReaderNoHeader(fieldsIndexIn, util.PACKED, packedIntsVersion, numChunks, uint32(bitsPerDocBase))
			if err != nil {
				return nil, err
			}
			r.docBasesDeltas = append(r.docBasesDeltas, pr)
		}

		{ // start pointers
			n, err := fieldsIndexIn.ReadVLong()
			if err != nil {
				return nil, err
			}
			r.startPointers = append(r.startPointers, n)
			n, err = fieldsIndexIn.ReadVLong()
			if err != nil {
				return nil, err
			}
			r.avgChunkSizes = append(r.avgChunkSizes, n)
			bitsPerStartPointer, err := fieldsIndexIn.ReadVInt()
			if err != nil {
				return nil, err
			}
			if bitsPerStartPointer > 64 {
				return nil, errors.New(fmt.Sprintf("Corrupted bitsPerStartPonter (resource=%v)", fieldsIndexIn))
			}
			pr, err := util.NewPackedReaderNoHeader(fieldsIndexIn, util.PACKED, packedIntsVersion, numChunks, uint32(bitsPerStartPointer))
			if err != nil {
				return nil, err
			}
			r.startPointersDeltas = append(r.startPointersDeltas, pr)
		}
	}

	return r, nil
}
Esempio n. 2
0
func newBlockTreeTermsReader(dir store.Directory, fieldInfos FieldInfos, info SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) {
	log.Print("Initializing BlockTreeTermsReader...")
	fp := &BlockTreeTermsReader{
		postingsReader: postingsReader,
		fields:         make(map[string]FieldReader),
		segment:        info.name,
	}
	fp.in, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_EXTENSION), ctx)
	if err != nil {
		return fp, err
	}

	success := false
	var indexIn store.IndexInput
	defer func() {
		if !success {
			log.Print("Failed to initialize BlockTreeTermsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			// this.close() will close in:
			util.CloseWhileSuppressingError(indexIn, fp)
		}
	}()

	fp.version, err = fp.readHeader(fp.in)
	if err != nil {
		return fp, err
	}
	log.Printf("Version: %v", fp.version)

	if indexDivisor != -1 {
		indexIn, err = dir.OpenInput(util.SegmentFileName(info.name, segmentSuffix, BTT_INDEX_EXTENSION), ctx)
		if err != nil {
			return fp, err
		}

		indexVersion, err := fp.readIndexHeader(indexIn)
		if err != nil {
			return fp, err
		}
		log.Printf("Index version: %v", indexVersion)
		if int(indexVersion) != fp.version {
			return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion))
		}
	}

	// Have PostingsReader init itself
	postingsReader.Init(fp.in)

	// Read per-field details
	fp.seekDir(fp.in, fp.dirOffset)
	if indexDivisor != -1 {
		fp.seekDir(indexIn, fp.indexDirOffset)
	}

	numFields, err := fp.in.ReadVInt()
	if err != nil {
		return fp, err
	}
	log.Printf("Fields number: %v", numFields)
	if numFields < 0 {
		return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in))
	}

	for i := int32(0); i < numFields; i++ {
		log.Printf("Next field...")
		field, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Field: %v", field)

		numTerms, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		// assert numTerms >= 0
		log.Printf("Terms number: %v", numTerms)

		numBytes, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Bytes number: %v", numBytes)

		rootCode := make([]byte, numBytes)
		err = fp.in.ReadBytes(rootCode)
		if err != nil {
			return fp, err
		}
		fieldInfo := fieldInfos.byNumber[field]
		// assert fieldInfo != nil
		var sumTotalTermFreq int64
		if fieldInfo.indexOptions == INDEX_OPT_DOCS_ONLY {
			sumTotalTermFreq = -1
		} else {
			sumTotalTermFreq, err = fp.in.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		sumDocFreq, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		docCount, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("DocCount: %v", docCount)
		if docCount < 0 || docCount > info.docCount { // #docs with field must be <= #docs
			return fp, errors.New(fmt.Sprintf(
				"invalid docCount: %v maxDoc: %v (resource=%v)",
				docCount, info.docCount, fp.in))
		}
		if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field
			return fp, errors.New(fmt.Sprintf(
				"invalid sumDocFreq: %v docCount: %v (resource=%v)",
				sumDocFreq, docCount, fp.in))
		}
		if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings
			return fp, errors.New(fmt.Sprintf(
				"invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)",
				sumTotalTermFreq, sumDocFreq, fp.in))
		}

		var indexStartFP int64
		if indexDivisor != -1 {
			indexStartFP, err = indexIn.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		log.Printf("indexStartFP: %v", indexStartFP)
		if _, ok := fp.fields[fieldInfo.name]; ok {
			return fp, errors.New(fmt.Sprintf(
				"duplicate field: %v (resource=%v)", fieldInfo.name, fp.in))
		}
		fp.fields[fieldInfo.name], err = newFieldReader(fp,
			fieldInfo, numTerms, rootCode, sumTotalTermFreq,
			sumDocFreq, docCount, indexStartFP, indexIn)
		if err != nil {
			return fp, err
		}
		log.Print("DEBUG field processed.")
	}

	if indexDivisor != -1 {
		err = indexIn.Close()
		if err != nil {
			return fp, err
		}
	}

	success = true

	return fp, nil
}