예제 #1
0
/* Reads but does not decode the byte[] blob holding
   metadata for the current terms block */
func (r *Lucene41PostingsReader) ReadTermsBlock(termsIn store.IndexInput,
	fieldInfo model.FieldInfo, _termState *BlockTermState) (err error) {

	termState := _termState.Self.(*intBlockTermState)
	numBytes, err := asInt(termsIn.ReadVInt())
	if err != nil {
		return err
	}

	if termState.bytes == nil {
		// TODO over-allocate
		termState.bytes = make([]byte, numBytes)
		termState.bytesReader = store.NewEmptyByteArrayDataInput()
	} else if len(termState.bytes) < numBytes {
		// TODO over-allocate
		termState.bytes = make([]byte, numBytes)
	}

	err = termsIn.ReadBytes(termState.bytes)
	if err != nil {
		return err
	}
	termState.bytesReader.Reset(termState.bytes)
	return nil
}
예제 #2
0
func readBytesRef(in store.IndexInput) ([]byte, error) {
	length, err := asInt(in.ReadVInt())
	if err != nil {
		return nil, err
	}
	bytes := make([]byte, length)
	if err = in.ReadBytes(bytes); err != nil {
		return nil, err
	}
	return bytes, nil
}
예제 #3
0
func (r *BlockTreeTermsReader) readHeader(input store.IndexInput) (version int, err error) {
	version, err = asInt(codec.CheckHeader(input, BTT_CODEC_NAME, BTT_VERSION_START, BTT_VERSION_CURRENT))
	if err != nil {
		return int(version), err
	}
	if version < BTT_VERSION_APPEND_ONLY {
		r.dirOffset, err = input.ReadLong()
		if err != nil {
			return int(version), err
		}
	}
	return int(version), nil
}
예제 #4
0
func (r *BlockTreeTermsReader) readIndexHeader(input store.IndexInput) (version int, err error) {
	version, err = asInt(codec.CheckHeader(input, TERMS_INDEX_CODEC_NAME, TERMS_VERSION_START, TERMS_VERSION_CURRENT))
	if err != nil {
		return version, err
	}
	if version < TERMS_VERSION_APPEND_ONLY {
		r.indexDirOffset, err = input.ReadLong()
		if err != nil {
			return version, err
		}
	}
	return version, nil
}
예제 #5
0
func (np *NormsProducer) readFields(meta store.IndexInput, infos FieldInfos) (err error) {
	var fieldNumber int32
	if fieldNumber, err = meta.ReadVInt(); err != nil {
		return err
	}
	for fieldNumber != -1 {
		info := infos.FieldInfoByNumber(int(fieldNumber))
		if info == nil {
			return errors.New(fmt.Sprintf("Invalid field number: %v (resource=%v)", fieldNumber, meta))
		} else if !info.HasNorms() {
			return errors.New(fmt.Sprintf("Invalid field: %v (resource=%v)", info.Name, meta))
		}
		var format byte
		if format, err = meta.ReadByte(); err != nil {
			return err
		}
		var offset int64
		if offset, err = meta.ReadLong(); err != nil {
			return err
		}
		entry := &NormsEntry{
			format: format,
			offset: offset,
		}
		if format > UNCOMPRESSED {
			return errors.New(fmt.Sprintf("Unknown format: %v, input=%v", format, meta))
		}
		np.norms[int(fieldNumber)] = entry
		if fieldNumber, err = meta.ReadVInt(); err != nil {
			return err
		}
	}
	return nil
}
예제 #6
0
func (r *Lucene41PostingsReader) Init(termsIn store.IndexInput) error {
	log.Printf("Initializing from: %v", termsIn)
	// Make sure we are talking to the matching postings writer
	_, err := codec.CheckHeader(termsIn, LUCENE41_TERMS_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return err
	}
	indexBlockSize, err := termsIn.ReadVInt()
	if err != nil {
		return err
	}
	log.Printf("Index block size: %v", indexBlockSize)
	if indexBlockSize != LUCENE41_BLOCK_SIZE {
		panic(fmt.Sprintf("index-time BLOCK_SIZE (%v) != read-time BLOCK_SIZE (%v)", indexBlockSize, LUCENE41_BLOCK_SIZE))
	}
	return nil
}
예제 #7
0
func (r *BlockTreeTermsReader) seekDir(input store.IndexInput, dirOffset int64) (err error) {
	log.Printf("Seeking to: %v", dirOffset)
	if r.version >= BTT_INDEX_VERSION_APPEND_ONLY {
		input.Seek(input.Length() - 8)
		if dirOffset, err = input.ReadLong(); err != nil {
			return err
		}
	}
	input.Seek(dirOffset)
	return nil
}
예제 #8
0
func newFieldReader(parent *BlockTreeTermsReader,
	fieldInfo *FieldInfo, numTerms int64, rootCode []byte,
	sumTotalTermFreq, sumDocFreq int64, docCount int,
	indexStartFP int64, longsSize int, indexIn store.IndexInput,
	minTerm, maxTerm []byte) (r FieldReader, err error) {

	// log.Print("Initializing FieldReader...")
	assert(numTerms > 0)
	r = FieldReader{
		parent:           parent,
		fieldInfo:        fieldInfo,
		numTerms:         numTerms,
		sumTotalTermFreq: sumTotalTermFreq,
		sumDocFreq:       sumDocFreq,
		docCount:         docCount,
		indexStartFP:     indexStartFP,
		rootCode:         rootCode,
		longsSize:        longsSize,
		minTerm:          minTerm,
		maxTerm:          maxTerm,
	}
	// log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=",
	// 	parent.segment, fieldInfo.Name, rootCode)

	in := store.NewByteArrayDataInput(rootCode)
	n, err := in.ReadVLong()
	if err != nil {
		return r, err
	}
	r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS)

	if indexIn != nil {
		clone := indexIn.Clone()
		// log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name)
		clone.Seek(indexStartFP)
		r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton())
	}

	return r, err
}
예제 #9
0
/**
 * Read values that have been written using variable-length encoding instead of bit-packing.
 */
func readVIntBlock(docIn store.IndexInput, docBuffer []int,
	freqBuffer []int, num int, indexHasFreq bool) (err error) {
	if indexHasFreq {
		for i := 0; i < num; i++ {
			code, err := asInt(docIn.ReadVInt())
			if err != nil {
				return err
			}
			docBuffer[i] = int(uint(code) >> 1)
			if (code & 1) != 0 {
				freqBuffer[i] = 1
			} else {
				freqBuffer[i], err = asInt(docIn.ReadVInt())
				if err != nil {
					return err
				}
			}
		}
	} else {
		for i := 0; i < num; i++ {
			docBuffer[i], err = asInt(docIn.ReadVInt())
			if err != nil {
				return err
			}
		}
	}
	return nil
}
예제 #10
0
func newFieldReader(owner *BlockTreeTermsReader,
	fieldInfo model.FieldInfo, numTerms int64, rootCode []byte,
	sumTotalTermFreq, sumDocFreq int64, docCount int32, indexStartFP int64,
	indexIn store.IndexInput) (r FieldReader, err error) {
	log.Print("Initializing FieldReader...")
	if numTerms <= 0 {
		panic("assert fail")
	}
	// assert numTerms > 0
	r = FieldReader{
		BlockTreeTermsReader: owner,
		fieldInfo:            fieldInfo,
		numTerms:             numTerms,
		sumTotalTermFreq:     sumTotalTermFreq,
		sumDocFreq:           sumDocFreq,
		docCount:             docCount,
		indexStartFP:         indexStartFP,
		rootCode:             rootCode,
	}
	log.Printf("BTTR: seg=%v field=%v rootBlockCode=%v divisor=",
		owner.segment, fieldInfo.Name, rootCode)

	in := store.NewByteArrayDataInput(rootCode)
	n, err := in.ReadVLong()
	if err != nil {
		return r, err
	}
	r.rootBlockFP = int64(uint64(n) >> BTT_OUTPUT_FLAGS_NUM_BITS)

	if indexIn != nil {
		clone := indexIn.Clone()
		log.Printf("start=%v field=%v", indexStartFP, fieldInfo.Name)
		clone.Seek(indexStartFP)
		r.index, err = fst.LoadFST(clone, fst.ByteSequenceOutputsSingleton())
	}

	return r, err
}
예제 #11
0
func (w *MockDirectoryWrapper) _crash() error {
	w.crashed = true
	w.openFiles = make(map[string]int)
	w.openFilesForWrite = make(map[string]bool)
	w.openFilesDeleted = make(map[string]bool)
	files := w.unSyncedFiles
	w.unSyncedFiles = make(map[string]bool)
	// first force-close all files, so we can corrupt on windows etc.
	// clone the file map, as these guys want to remove themselves on close.
	m := make(map[io.Closer]error)
	for k, v := range w.openFileHandles {
		m[k] = v
	}
	for f, _ := range m {
		f.Close() // ignore error
	}

	for name, _ := range files {
		var action string
		var err error
		switch w.randomState.Intn(5) {
		case 0:
			action = "deleted"
			err = w.deleteFile(name, true)
		case 1:
			action = "zeroes"
			// Zero out file entirely
			var length int64
			length, err = w.FileLength(name)
			if err == nil {
				zeroes := make([]byte, 256)
				var upto int64 = 0
				var out store.IndexOutput
				out, err = w.BaseDirectoryWrapperImpl.CreateOutput(name, NewDefaultIOContext(w.randomState))
				if err == nil {
					for upto < length && err == nil {
						limit := length - upto
						if int64(len(zeroes)) < limit {
							limit = int64(len(zeroes))
						}
						err = out.WriteBytes(zeroes[:limit])
						upto += limit
					}
					if err == nil {
						err = out.Close()
					}
				}
			}
		case 2:
			action = "partially truncated"
			// Partially Truncate the file:

			// First, make temp file and copy only half this file over:
			var tempFilename string
			for {
				tempFilename = fmt.Sprintf("%v", w.randomState.Int())
				if !w.BaseDirectoryWrapperImpl.FileExists(tempFilename) {
					break
				}
			}
			var tempOut store.IndexOutput
			if tempOut, err = w.BaseDirectoryWrapperImpl.CreateOutput(tempFilename, NewDefaultIOContext(w.randomState)); err == nil {
				var ii store.IndexInput
				if ii, err = w.BaseDirectoryWrapperImpl.OpenInput(name, NewDefaultIOContext(w.randomState)); err == nil {
					if err = tempOut.CopyBytes(ii, ii.Length()/2); err == nil {
						if err = tempOut.Close(); err == nil {
							if err = ii.Close(); err == nil {
								// Delete original and copy bytes back:
								if err = w.deleteFile(name, true); err == nil {
									var out store.IndexOutput
									if out, err = w.BaseDirectoryWrapperImpl.CreateOutput(name, NewDefaultIOContext(w.randomState)); err == nil {
										if ii, err = w.BaseDirectoryWrapperImpl.OpenInput(tempFilename, NewDefaultIOContext(w.randomState)); err == nil {
											if err = out.CopyBytes(ii, ii.Length()); err == nil {
												if err = out.Close(); err == nil {
													if err = ii.Close(); err == nil {
														err = w.deleteFile(tempFilename, true)
													}
												}
											}
										}
									}
								}
							}
						}
					}
				}
			}
		case 3:
			// the file survived intact:
			action = "didn't change"
		default:
			action = "fully truncated"
			// totally truncate the file to zero bytes
			if err = w.deleteFile(name, true); err == nil {
				var out store.IndexOutput
				if out, err = w.BaseDirectoryWrapperImpl.CreateOutput(name, NewDefaultIOContext(w.randomState)); err == nil {
					if err = out.SetLength(0); err == nil {
						err = out.Close()
					}
				}
			}
		}
		if err != nil {
			return err
		}
		if VERBOSE {
			log.Printf("MockDirectoryWrapper: %v unsynced file: %v", action, name)
		}
	}
	return nil
}
예제 #12
0
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode compressing.CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.IndexInput
	success := false
	defer func() {
		if !success {
			log.Println("Failed to initialize CompressionStoredFieldsReader.")
			if err != nil {
				log.Print(err)
			}
			util.Close(r, indexStream)
		}
	}()

	// Load the index into memory
	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	indexStream, err = d.OpenInput(indexStreamFN, ctx)
	if err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT)
	if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() {
		panic("assert fail")
	}
	r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si)
	if err != nil {
		return nil, err
	}
	err = indexStream.Close()
	if err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx)
	if err != nil {
		return nil, err
	}
	codecNameDat := formatName + CODEC_SFX_DAT
	codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT)
	if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() {
		panic("assert fail")
	}

	n, err := r.fieldsStream.ReadVInt()
	if err != nil {
		return nil, err
	}
	r.packedIntsVersion = int(n)
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	success = true
	return r, nil
}
예제 #13
0
func newBlockTreeTermsReader(dir store.Directory,
	fieldInfos model.FieldInfos, info *model.SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) {

	log.Print("Initializing BlockTreeTermsReader...")
	fp := &BlockTreeTermsReader{
		postingsReader: postingsReader,
		fields:         make(map[string]FieldReader),
		segment:        info.Name,
	}
	fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_EXTENSION), ctx)
	if err != nil {
		return fp, err
	}

	success := false
	var indexIn store.IndexInput
	defer func() {
		if !success {
			log.Print("Failed to initialize BlockTreeTermsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			// this.close() will close in:
			util.CloseWhileSuppressingError(indexIn, fp)
		}
	}()

	fp.version, err = fp.readHeader(fp.in)
	if err != nil {
		return fp, err
	}
	log.Printf("Version: %v", fp.version)

	if indexDivisor != -1 {
		indexIn, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_INDEX_EXTENSION), ctx)
		if err != nil {
			return fp, err
		}

		indexVersion, err := fp.readIndexHeader(indexIn)
		if err != nil {
			return fp, err
		}
		log.Printf("Index version: %v", indexVersion)
		if int(indexVersion) != fp.version {
			return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion))
		}
	}

	// Have PostingsReader init itself
	postingsReader.Init(fp.in)

	// Read per-field details
	fp.seekDir(fp.in, fp.dirOffset)
	if indexDivisor != -1 {
		fp.seekDir(indexIn, fp.indexDirOffset)
	}

	numFields, err := fp.in.ReadVInt()
	if err != nil {
		return fp, err
	}
	log.Printf("Fields number: %v", numFields)
	if numFields < 0 {
		return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in))
	}

	for i := int32(0); i < numFields; i++ {
		log.Printf("Next field...")
		field, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Field: %v", field)

		numTerms, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		// assert numTerms >= 0
		log.Printf("Terms number: %v", numTerms)

		numBytes, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Bytes number: %v", numBytes)

		rootCode := make([]byte, numBytes)
		err = fp.in.ReadBytes(rootCode)
		if err != nil {
			return fp, err
		}
		fieldInfo := fieldInfos.FieldInfoByNumber(int(field))
		// assert fieldInfo != nil
		var sumTotalTermFreq int64
		if fieldInfo.IndexOptions() == model.INDEX_OPT_DOCS_ONLY {
			sumTotalTermFreq = -1
		} else {
			sumTotalTermFreq, err = fp.in.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		sumDocFreq, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		docCount, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("DocCount: %v", docCount)
		if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs
			return fp, errors.New(fmt.Sprintf(
				"invalid docCount: %v maxDoc: %v (resource=%v)",
				docCount, info.DocCount(), fp.in))
		}
		if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field
			return fp, errors.New(fmt.Sprintf(
				"invalid sumDocFreq: %v docCount: %v (resource=%v)",
				sumDocFreq, docCount, fp.in))
		}
		if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings
			return fp, errors.New(fmt.Sprintf(
				"invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)",
				sumTotalTermFreq, sumDocFreq, fp.in))
		}

		var indexStartFP int64
		if indexDivisor != -1 {
			indexStartFP, err = indexIn.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		log.Printf("indexStartFP: %v", indexStartFP)
		if _, ok := fp.fields[fieldInfo.Name]; ok {
			return fp, errors.New(fmt.Sprintf(
				"duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in))
		}
		fp.fields[fieldInfo.Name], err = newFieldReader(fp,
			fieldInfo, numTerms, rootCode, sumTotalTermFreq,
			sumDocFreq, docCount, indexStartFP, indexIn)
		if err != nil {
			return fp, err
		}
		log.Print("DEBUG field processed.")
	}

	if indexDivisor != -1 {
		err = indexIn.Close()
		if err != nil {
			return fp, err
		}
	}

	success = true

	return fp, nil
}
예제 #14
0
func newCompressingStoredFieldsIndexReader(fieldsIndexIn store.IndexInput,
	si *model.SegmentInfo) (r *CompressingStoredFieldsIndexReader, err error) {

	r = &CompressingStoredFieldsIndexReader{}
	r.maxDoc = si.DocCount()
	r.docBases = make([]int, 0, 16)
	r.startPointers = make([]int64, 0, 16)
	r.avgChunkDocs = make([]int, 0, 16)
	r.avgChunkSizes = make([]int64, 0, 16)
	r.docBasesDeltas = make([]packed.PackedIntsReader, 0, 16)
	r.startPointersDeltas = make([]packed.PackedIntsReader, 0, 16)

	packedIntsVersion, err := fieldsIndexIn.ReadVInt()
	if err != nil {
		return nil, err
	}

	for blockCount := 0; ; blockCount++ {
		numChunks, err := fieldsIndexIn.ReadVInt()
		if err != nil {
			return nil, err
		}
		if numChunks == 0 {
			break
		}

		{ // doc bases
			n, err := fieldsIndexIn.ReadVInt()
			if err != nil {
				return nil, err
			}
			r.docBases = append(r.docBases, int(n))
			n, err = fieldsIndexIn.ReadVInt()
			if err != nil {
				return nil, err
			}
			r.avgChunkDocs = append(r.avgChunkDocs, int(n))
			bitsPerDocBase, err := fieldsIndexIn.ReadVInt()
			if err != nil {
				return nil, err
			}
			if bitsPerDocBase > 32 {
				return nil, errors.New(fmt.Sprintf("Corrupted bitsPerDocBase (resource=%v)", fieldsIndexIn))
			}
			pr, err := packed.NewPackedReaderNoHeader(fieldsIndexIn, packed.PACKED, packedIntsVersion, numChunks, uint32(bitsPerDocBase))
			if err != nil {
				return nil, err
			}
			r.docBasesDeltas = append(r.docBasesDeltas, pr)
		}

		{ // start pointers
			n, err := fieldsIndexIn.ReadVLong()
			if err != nil {
				return nil, err
			}
			r.startPointers = append(r.startPointers, n)
			n, err = fieldsIndexIn.ReadVLong()
			if err != nil {
				return nil, err
			}
			r.avgChunkSizes = append(r.avgChunkSizes, n)
			bitsPerStartPointer, err := fieldsIndexIn.ReadVInt()
			if err != nil {
				return nil, err
			}
			if bitsPerStartPointer > 64 {
				return nil, errors.New(fmt.Sprintf("Corrupted bitsPerStartPonter (resource=%v)", fieldsIndexIn))
			}
			pr, err := packed.NewPackedReaderNoHeader(fieldsIndexIn, packed.PACKED, packedIntsVersion, numChunks, uint32(bitsPerStartPointer))
			if err != nil {
				return nil, err
			}
			r.startPointersDeltas = append(r.startPointersDeltas, pr)
		}
	}

	return r, nil
}
예제 #15
0
func NewBlockTreeTermsReader(dir store.Directory,
	fieldInfos FieldInfos, info *SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) {

	// log.Print("Initializing BlockTreeTermsReader...")
	fp := &BlockTreeTermsReader{
		postingsReader: postingsReader,
		fields:         make(map[string]FieldReader),
		segment:        info.Name,
	}
	fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, TERMS_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}

	success := false
	var indexIn store.IndexInput
	defer func() {
		if !success {
			fmt.Println("Failed to initialize BlockTreeTermsReader.")
			if err != nil {
				fmt.Println("DEBUG ", err)
			}
			// this.close() will close in:
			util.CloseWhileSuppressingError(indexIn, fp)
		}
	}()

	fp.version, err = fp.readHeader(fp.in)
	if err != nil {
		return nil, err
	}
	// log.Printf("Version: %v", fp.version)

	if indexDivisor != -1 {
		filename := util.SegmentFileName(info.Name, segmentSuffix, TERMS_INDEX_EXTENSION)
		indexIn, err = dir.OpenInput(filename, ctx)
		if err != nil {
			return nil, err
		}

		indexVersion, err := fp.readIndexHeader(indexIn)
		if err != nil {
			return nil, err
		}
		// log.Printf("Index version: %v", indexVersion)
		if int(indexVersion) != fp.version {
			return nil, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion))
		}
	}

	// verify
	if indexIn != nil && fp.version >= TERMS_VERSION_CURRENT {
		if _, err = store.ChecksumEntireFile(indexIn); err != nil {
			return nil, err
		}
	}

	// Have PostingsReader init itself
	postingsReader.Init(fp.in)

	if fp.version >= TERMS_VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but for now we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MAGIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(fp.in); err != nil {
			return nil, err
		}
	}

	// Read per-field details
	fp.seekDir(fp.in, fp.dirOffset)
	if indexDivisor != -1 {
		fp.seekDir(indexIn, fp.indexDirOffset)
	}

	numFields, err := fp.in.ReadVInt()
	if err != nil {
		return nil, err
	}
	// log.Printf("Fields number: %v", numFields)
	if numFields < 0 {
		return nil, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in))
	}

	for i := int32(0); i < numFields; i++ {
		// log.Printf("Next field...")
		field, err := fp.in.ReadVInt()
		if err != nil {
			return nil, err
		}
		// log.Printf("Field: %v", field)

		numTerms, err := fp.in.ReadVLong()
		if err != nil {
			return nil, err
		}
		assert2(numTerms > 0,
			"Illegal numTerms for field number: %v (resource=%v)", field, fp.in)
		// log.Printf("Terms number: %v", numTerms)

		numBytes, err := fp.in.ReadVInt()
		if err != nil {
			return nil, err
		}
		assert2(numBytes >= 0,
			"invalid rootCode for field number: %v, numBytes=%v (resource=%v)",
			field, numBytes, fp.in)
		// log.Printf("Bytes number: %v", numBytes)

		rootCode := make([]byte, numBytes)
		err = fp.in.ReadBytes(rootCode)
		if err != nil {
			return nil, err
		}
		fieldInfo := fieldInfos.FieldInfoByNumber(int(field))
		assert2(fieldInfo != nil, "invalid field numebr: %v (resource=%v)", field, fp.in)
		var sumTotalTermFreq int64
		if fieldInfo.IndexOptions() == INDEX_OPT_DOCS_ONLY {
			sumTotalTermFreq = -1
		} else {
			sumTotalTermFreq, err = fp.in.ReadVLong()
			if err != nil {
				return nil, err
			}
		}
		sumDocFreq, err := fp.in.ReadVLong()
		if err != nil {
			return nil, err
		}
		var docCount int
		if docCount, err = asInt(fp.in.ReadVInt()); err != nil {
			return nil, err
		}
		// fmt.Printf("DocCount: %v\n", docCount)
		var longsSize int
		if fp.version >= TERMS_VERSION_META_ARRAY {
			if longsSize, err = asInt(fp.in.ReadVInt()); err != nil {
				return nil, err
			}
		}
		assert2(longsSize >= 0,
			"invalid longsSize for field: %v, longsSize=%v (resource=%v)",
			fieldInfo.Name, longsSize, fp.in)
		var minTerm, maxTerm []byte
		if fp.version >= TERMS_VERSION_MIN_MAX_TERMS {
			if minTerm, err = readBytesRef(fp.in); err != nil {
				return nil, err
			}
			if maxTerm, err = readBytesRef(fp.in); err != nil {
				return nil, err
			}
		}
		if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs
			return nil, errors.New(fmt.Sprintf(
				"invalid docCount: %v maxDoc: %v (resource=%v)",
				docCount, info.DocCount(), fp.in))
		}
		if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field
			return nil, errors.New(fmt.Sprintf(
				"invalid sumDocFreq: %v docCount: %v (resource=%v)",
				sumDocFreq, docCount, fp.in))
		}
		if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings
			return nil, errors.New(fmt.Sprintf(
				"invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)",
				sumTotalTermFreq, sumDocFreq, fp.in))
		}

		var indexStartFP int64
		if indexDivisor != -1 {
			if indexStartFP, err = indexIn.ReadVLong(); err != nil {
				return nil, err
			}
		}
		// log.Printf("indexStartFP: %v", indexStartFP)
		if _, ok := fp.fields[fieldInfo.Name]; ok {
			return nil, errors.New(fmt.Sprintf(
				"duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in))
		}
		if fp.fields[fieldInfo.Name], err = newFieldReader(fp,
			fieldInfo, numTerms, rootCode, sumTotalTermFreq,
			sumDocFreq, docCount, indexStartFP, longsSize,
			indexIn, minTerm, maxTerm); err != nil {
			return nil, err
		}
	}

	if indexDivisor != -1 {
		if err = indexIn.Close(); err != nil {
			return nil, err
		}
	}

	success = true

	return fp, nil
}
예제 #16
0
func (r *BlockTreeTermsReader) seekDir(input store.IndexInput, dirOffset int64) (err error) {
	// log.Printf("Seeking to: %v", dirOffset)
	if r.version >= TERMS_VERSION_CHECKSUM {
		if err = input.Seek(input.Length() - codec.FOOTER_LENGTH - 8); err != nil {
			return
		}
		if dirOffset, err = input.ReadLong(); err != nil {
			return
		}
	} else if r.version >= TERMS_VERSION_APPEND_ONLY {
		if err = input.Seek(input.Length() - 8); err != nil {
			return
		}
		if dirOffset, err = input.ReadLong(); err != nil {
			return
		}
	}
	return input.Seek(dirOffset)
}
예제 #17
0
/*
Lucene42DocValuesProducer.java/4.5.1/L138
*/
func (dvp *Lucene42DocValuesProducer) readFields(meta store.IndexInput) (err error) {
	var fieldNumber int
	var fieldType byte
	fieldNumber, err = asInt(meta.ReadVInt())
	for fieldNumber != -1 && err == nil {
		fieldType, err = meta.ReadByte()
		if err != nil {
			break
		}
		switch fieldType {
		case LUCENE42_DV_NUMBER:
			entry := NumericEntry{}
			entry.offset, err = meta.ReadLong()
			if err != nil {
				return err
			}
			entry.format, err = meta.ReadByte()
			if err != nil {
				return err
			}
			switch entry.format {
			case LUCENE42_DV_DELTA_COMPRESSED:
			case LUCENE42_DV_TABLE_COMPRESSED:
			case LUCENE42_DV_GCD_COMPRESSED:
			case LUCENE42_DV_UNCOMPRESSED:
			default:
				return errors.New(fmt.Sprintf("Unknown format: %v, input=%v", entry.format, meta))
			}
			if entry.format != LUCENE42_DV_UNCOMPRESSED {
				entry.packedIntsVersion, err = asInt(meta.ReadVInt())
				if err != nil {
					return err
				}
			}
			dvp.numerics[fieldNumber] = entry
		case LUCENE42_DV_BYTES:
			panic("not implemented yet")
		case LUCENE42_DV_FST:
			panic("not implemented yet")
		default:
			return errors.New(fmt.Sprintf("invalid entry type: %v, input=%v", fieldType, meta))
		}
		fieldNumber, err = asInt(meta.ReadVInt())
	}
	return
}