Beispiel #1
0
func NewLucene41PostingsReader(dir store.Directory,
	fis model.FieldInfos, si *model.SegmentInfo,
	ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) {

	log.Print("Initializing Lucene41PostingsReader...")
	success := false
	var docIn, posIn, payIn store.IndexInput = nil, nil, nil
	defer func() {
		if !success {
			log.Print("Failed to initialize Lucene41PostingsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			util.CloseWhileSuppressingError(docIn, posIn, payIn)
		}
	}()

	docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx)
	if err != nil {
		return r, err
	}
	_, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return r, err
	}
	forUtil, err := NewForUtil(docIn)
	if err != nil {
		return r, err
	}

	if fis.HasProx {
		posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx)
		if err != nil {
			return r, err
		}
		_, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
		if err != nil {
			return r, err
		}

		if fis.HasPayloads || fis.HasOffsets {
			payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx)
			if err != nil {
				return r, err
			}
			_, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
			if err != nil {
				return r, err
			}
		}
	}

	success = true
	return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil
}
Beispiel #2
0
func newLucene42DocValuesProducer(state SegmentReadState,
	dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) {
	dvp = &Lucene42DocValuesProducer{
		numericInstances: make(map[int]NumericDocValues),
	}
	dvp.maxDoc = state.segmentInfo.DocCount()
	metaName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, metaExtension)
	// read in the entries from the metadata file.
	in, err := state.dir.OpenInput(metaName, state.context)
	if err != nil {
		return dvp, err
	}
	success := false
	defer func() {
		if success {
			err = util.Close(in)
		} else {
			util.CloseWhileSuppressingError(in)
		}
	}()

	version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT)
	if err != nil {
		return dvp, err
	}
	dvp.numerics = make(map[int]NumericEntry)
	dvp.binaries = make(map[int]BinaryEntry)
	dvp.fsts = make(map[int]FSTEntry)
	err = dvp.readFields(in)
	if err != nil {
		return dvp, err
	}
	success = true

	success = false
	dataName := util.SegmentFileName(state.segmentInfo.Name, state.segmentSuffix, dataExtension)
	dvp.data, err = state.dir.OpenInput(dataName, state.context)
	if err != nil {
		return dvp, err
	}
	version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT)
	if err != nil {
		return dvp, err
	}

	if version != version2 {
		return dvp, errors.New("Format versions mismatch")
	}
	return dvp, nil
}
Beispiel #3
0
func TestClone(t *testing.T) {
	fmt.Println("Testing Loading FST...")
	path := "../search/testdata/belfrysample"
	d, err := OpenFSDirectory(path)
	if err != nil {
		t.Error(err)
	}
	ctx := NewIOContextBool(false)
	in, err := d.OpenInput("_0_Lucene41_0.tip", ctx)
	if err != nil {
		t.Error(err)
	}
	version, err := codec.CheckHeader(in, "BLOCK_TREE_TERMS_INDEX", 0, 1)
	var indexDirOffset int64 = 0
	if version < 1 {
		indexDirOffset, err = in.ReadLong()
		if err != nil {
			t.Error(err)
		}
	} else { // >= 1
		in.Seek(in.Length() - 8)
		indexDirOffset, err = in.ReadLong()
		if err != nil {
			t.Error(err)
		}
	}
	fmt.Println("indexDirOffset:", indexDirOffset)
	in.Seek(indexDirOffset)

	indexStartFP, err := in.ReadVLong()
	if err != nil {
		t.Error(err)
	}
	fmt.Println("indexStartFP:", indexStartFP)

	fmt.Println("Before clone", in)
	clone := in.Clone()
	fmt.Println("After clone", clone)
	if _, ok := clone.(*SimpleFSIndexInput); !ok {
		t.Error("Clone() should return *SimpleFSIndexInput.")
	}
	clone.Seek(indexStartFP)
	fmt.Println("After clone.Seek()", clone)

	_, err = codec.CheckHeader(clone, "FST", 3, 4)
	if err != nil {
		t.Error(err)
	}
}
Beispiel #4
0
func NewCompoundFileDirectory(directory Directory, fileName string, context IOContext, openForWrite bool) (d *CompoundFileDirectory, err error) {
	self := &CompoundFileDirectory{
		Locker:         &sync.Mutex{},
		directory:      directory,
		fileName:       fileName,
		readBufferSize: bufferSize(context),
		openForWrite:   openForWrite}
	self.DirectoryImpl = NewDirectoryImpl(self)
	self.BaseDirectory = NewBaseDirectory(self)

	if !openForWrite {
		// log.Printf("Open for read.")
		success := false
		defer func() {
			if !success {
				util.CloseWhileSuppressingError(self.handle)
			}
		}()
		self.handle, err = directory.OpenInput(fileName, context)
		if err != nil {
			return nil, err
		}
		self.entries, err = self.readEntries(self.handle, directory, fileName)
		if err != nil {
			return nil, err
		}
		if self.version >= CFD_VERSION_CHECKSUM {
			if _, err = codec.CheckHeader(self.handle, CFD_DATA_CODEC,
				int32(self.version), int32(self.version)); err != nil {
				return nil, err
			}
			// NOTE: data file is too costly to verify checksum against all the
			// bytes on open, but for now we at least verify proper structure
			// of the checksum footer: which looks for FOOTER_MAGIC +
			// algorithmID. This is cheap and can detect some forms of
			// corruption such as file trucation.
			if _, err = codec.RetrieveChecksum(self.handle); err != nil {
				return nil, err
			}
		}
		success = true
		self.BaseDirectory.IsOpen = true
		return self, nil
	} else {
		assert2(reflect.TypeOf(directory).Name() != "CompoundFileDirectory",
			"compound file inside of compound file: %v", fileName)
		self.entries = SENTINEL
		self.IsOpen = true
		self.writer = newCompoundFileWriter(directory, fileName)
		self.handle = nil
		return self, nil
	}
}
Beispiel #5
0
func (r *BlockTreeTermsReader) readIndexHeader(input store.IndexInput) (version int, err error) {
	version, err = asInt(codec.CheckHeader(input, TERMS_INDEX_CODEC_NAME, TERMS_VERSION_START, TERMS_VERSION_CURRENT))
	if err != nil {
		return version, err
	}
	if version < TERMS_VERSION_APPEND_ONLY {
		r.indexDirOffset, err = input.ReadLong()
		if err != nil {
			return version, err
		}
	}
	return version, nil
}
Beispiel #6
0
func (r *BlockTreeTermsReader) readHeader(input store.IndexInput) (version int, err error) {
	version, err = asInt(codec.CheckHeader(input, BTT_CODEC_NAME, BTT_VERSION_START, BTT_VERSION_CURRENT))
	if err != nil {
		return int(version), err
	}
	if version < BTT_VERSION_APPEND_ONLY {
		r.dirOffset, err = input.ReadLong()
		if err != nil {
			return int(version), err
		}
	}
	return int(version), nil
}
Beispiel #7
0
func NewPackedReader(in DataInput) (r PackedIntsReader, err error) {
	if version, err := codec.CheckHeader(in, PACKED_CODEC_NAME, PACKED_VERSION_START, VERSION_CURRENT); err == nil {
		if bitsPerValue, err := asUint32(in.ReadVInt()); err == nil {
			// assert bitsPerValue > 0 && bitsPerValue <= 64
			if valueCount, err := in.ReadVInt(); err == nil {
				if id, err := in.ReadVInt(); err == nil {
					format := PackedFormat(id)
					return ReaderNoHeader(in, format, version, valueCount, bitsPerValue)
				}
			}
		}
	}
	return
}
Beispiel #8
0
func (r *Lucene41PostingsReader) Init(termsIn store.IndexInput) error {
	log.Printf("Initializing from: %v", termsIn)
	// Make sure we are talking to the matching postings writer
	_, err := codec.CheckHeader(termsIn, LUCENE41_TERMS_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return err
	}
	indexBlockSize, err := termsIn.ReadVInt()
	if err != nil {
		return err
	}
	log.Printf("Index block size: %v", indexBlockSize)
	if indexBlockSize != LUCENE41_BLOCK_SIZE {
		panic(fmt.Sprintf("index-time BLOCK_SIZE (%v) != read-time BLOCK_SIZE (%v)", indexBlockSize, LUCENE41_BLOCK_SIZE))
	}
	return nil
}
Beispiel #9
0
func TestReadingFromSlicedIndexInputOSX(t *testing.T) {
	t.Logf("TestReadingFromSlicedIndexInputOSX...")
	path := "../search/testdata/osx/belfrysample"
	d, err := OpenFSDirectory(path)
	if err != nil {
		t.Error(err)
	}
	ctx := NewIOContextBool(false)
	cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false)
	name := util.SegmentFileName("_0", "Lucene41_0", "pos")
	posIn, err := cd.OpenInput(name, ctx)
	if err != nil {
		t.Error(err)
	}
	t.Log(posIn)
	codec.CheckHeader(posIn, "Lucene41PostingsWriterPos", 0, 0)
	// codec header mismatch: actual header=0 vs expected header=1071082519 (resource: SlicedIndexInput(SlicedIndexInput(_0_Lucene41_0.pos in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs')) in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs') slice=1461:3426))
}
Beispiel #10
0
func TestCheckHeaderWin8(t *testing.T) {
	fmt.Println("TestCheckHeaderWin8...")
	path := "../search/testdata/win8/belfrysample"
	d, err := OpenFSDirectory(path)
	if err != nil {
		t.Error(err)
	}
	ctx := NewIOContextBool(false)
	cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false)
	if err != nil {
		t.Error(err)
	}
	r, err := cd.OpenInput("_0_Lucene41_0.pos", ctx)
	_, err = codec.CheckHeader(r, "Lucene41PostingsWriterPos", 0, 0)
	if err != nil {
		t.Error(err)
	}
}
Beispiel #11
0
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode compressing.CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.IndexInput
	success := false
	defer func() {
		if !success {
			log.Println("Failed to initialize CompressionStoredFieldsReader.")
			if err != nil {
				log.Print(err)
			}
			util.Close(r, indexStream)
		}
	}()

	// Load the index into memory
	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	indexStream, err = d.OpenInput(indexStreamFN, ctx)
	if err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT)
	if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() {
		panic("assert fail")
	}
	r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si)
	if err != nil {
		return nil, err
	}
	err = indexStream.Close()
	if err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx)
	if err != nil {
		return nil, err
	}
	codecNameDat := formatName + CODEC_SFX_DAT
	codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT)
	if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() {
		panic("assert fail")
	}

	n, err := r.fieldsStream.ReadVInt()
	if err != nil {
		return nil, err
	}
	r.packedIntsVersion = int(n)
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	success = true
	return r, nil
}
Beispiel #12
0
	fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION)
	input, err := dir.OpenInput(fileName, context)
	if err != nil {
		return si, err
	}

	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	_, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT)
	if err != nil {
		return si, err
	}
	version, err := input.ReadString()
	if err != nil {
		return si, err
	}
	docCount, err := input.ReadInt()
	if err != nil {
		return si, err
	}
	if docCount < 0 {
		return si, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input))
	}
	sicf, err := input.ReadByte()
Beispiel #13
0
func (d *CompoundFileDirectory) readEntries(handle IndexInput, dir Directory, name string) (mapping map[string]FileSlice, err error) {
	var stream IndexInput = nil
	var entriesStream ChecksumIndexInput = nil
	// read the first VInt. If it is negative, it's the version number
	// otherwise it's the count (pre-3.1 indexes)
	var success = false
	defer func() {
		if success {
			err = util.Close(stream, entriesStream)
		} else {
			util.CloseWhileSuppressingError(stream, entriesStream)
		}
	}()

	stream = handle.Clone()
	// fmt.Printf("Reading from stream: %v\n", stream)
	firstInt, err := stream.ReadVInt()
	if err != nil {
		return nil, err
	}
	// impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible
	// and separate norms/etc are outside of cfs.
	if firstInt == int32(CODEC_MAGIC_BYTE1) {
		if secondByte, err := stream.ReadByte(); err == nil {
			if thirdByte, err := stream.ReadByte(); err == nil {
				if fourthByte, err := stream.ReadByte(); err == nil {
					if secondByte != CODEC_MAGIC_BYTE2 ||
						thirdByte != CODEC_MAGIC_BYTE3 ||
						fourthByte != CODEC_MAGIC_BYTE4 {
						return nil, errors.New(fmt.Sprintf(
							"Illegal/impossible header for CFS file: %v,%v,%v",
							secondByte, thirdByte, fourthByte))
					}
				}
			}
		}
		if err != nil {
			return nil, err
		}

		d.version, err = int32ToInt(codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT))
		if err != nil {
			return nil, err
		}
		entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION)
		entriesStream, err = dir.OpenChecksumInput(entriesFileName, IO_CONTEXT_READONCE)
		if err != nil {
			return nil, err
		}
		_, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_CURRENT)
		if err != nil {
			return nil, err
		}
		numEntries, err := entriesStream.ReadVInt()
		if err != nil {
			return nil, err
		}

		mapping = make(map[string]FileSlice)
		// fmt.Printf("Entries number: %v\n", numEntries)
		for i := int32(0); i < numEntries; i++ {
			id, err := entriesStream.ReadString()
			if err != nil {
				return nil, err
			}
			if _, ok := mapping[id]; ok {
				return nil, errors.New(fmt.Sprintf(
					"Duplicate cfs entry id=%v in CFS: %v", id, entriesStream))
			}
			// log.Printf("Found entry: %v", id)
			offset, err := entriesStream.ReadLong()
			if err != nil {
				return nil, err
			}
			length, err := entriesStream.ReadLong()
			if err != nil {
				return nil, err
			}
			mapping[id] = FileSlice{offset, length}
		}
		if d.version >= CFD_VERSION_CHECKSUM {
			_, err = codec.CheckFooter(entriesStream)
		} else {
			err = codec.CheckEOF(entriesStream)
		}
		if err != nil {
			return nil, err
		}
	} else {
		// TODO remove once 3.x is not supported anymore
		panic("not supported yet; will also be obsolete soon")
	}
	success = true
	return mapping, nil
}
Beispiel #14
0
/** Load a previously saved FST; maxBlockBits allows you to
 *  control the size of the byte[] pages used to hold the FST bytes. */
func loadFST3(in util.DataInput, outputs Outputs, maxBlockBits uint32) (fst *FST, err error) {
	log.Printf("Loading FST from %v and output to %v...", in, outputs)
	defer func() {
		if err != nil {
			log.Print("Failed to load FST.")
			log.Printf("DEBUG ", err)
		}
	}()
	fst = &FST{outputs: outputs, startNode: -1}

	if maxBlockBits < 1 || maxBlockBits > 30 {
		panic(fmt.Sprintf("maxBlockBits should 1..30; got %v", maxBlockBits))
	}

	// NOTE: only reads most recent format; we don't have
	// back-compat promise for FSTs (they are experimental):
	fst.version, err = codec.CheckHeader(in, FST_FILE_FORMAT_NAME, FST_VERSION_PACKED, FST_VERSION_VINT_TARGET)
	if err != nil {
		return fst, err
	}
	if b, err := in.ReadByte(); err == nil {
		fst.packed = (b == 1)
	} else {
		return fst, err
	}
	if b, err := in.ReadByte(); err == nil {
		if b == 1 {
			// accepts empty string
			// 1 KB blocks:
			emptyBytes := newBytesStoreFromBits(10)
			if numBytes, err := in.ReadVInt(); err == nil {
				log.Printf("Number of bytes: %v", numBytes)
				emptyBytes.CopyBytes(in, int64(numBytes))

				// De-serialize empty-string output:
				var reader BytesReader
				if fst.packed {
					log.Printf("Forward reader.")
					reader = emptyBytes.forwardReader()
				} else {
					log.Printf("Reverse reader.")
					reader = emptyBytes.reverseReader()
					// NoOutputs uses 0 bytes when writing its output,
					// so we have to check here else BytesStore gets
					// angry:
					if numBytes > 0 {
						reader.setPosition(int64(numBytes - 1))
					}
				}
				log.Printf("Reading final output from %v to %v...", reader, outputs)
				fst.emptyOutput, err = outputs.ReadFinalOutput(reader)
			}
		} // else emptyOutput = nil
	}
	if err != nil {
		return fst, err
	}

	if t, err := in.ReadByte(); err == nil {
		switch t {
		case 0:
			fst.inputType = INPUT_TYPE_BYTE1
		case 1:
			fst.inputType = INPUT_TYPE_BYTE2
		case 2:
			fst.inputType = INPUT_TYPE_BYTE4
		default:
			panic(fmt.Sprintf("invalid input type %v", t))
		}
	}
	if err != nil {
		return fst, err
	}

	if fst.packed {
		fst.nodeRefToAddress, err = packed.NewPackedReader(in)
		if err != nil {
			return fst, err
		}
	} // else nodeRefToAddress = nil

	if fst.startNode, err = in.ReadVLong(); err == nil {
		if fst.nodeCount, err = in.ReadVLong(); err == nil {
			if fst.arcCount, err = in.ReadVLong(); err == nil {
				if fst.arcWithOutputCount, err = in.ReadVLong(); err == nil {
					if numBytes, err := in.ReadVLong(); err == nil {
						if fst.bytes, err = newBytesStoreFromInput(in, numBytes, 1<<maxBlockBits); err == nil {
							fst.NO_OUTPUT = outputs.NoOutput()

							err = fst.cacheRootArcs()

							// NOTE: bogus because this is only used during
							// building; we need to break out mutable FST from
							// immutable
							// fst.allowArrayArcs = false
						}
					}
				}
			}
		}
	}
	return fst, err
}
Beispiel #15
0
func NewLucene41PostingsReader(dir store.Directory,
	fis FieldInfos, si *SegmentInfo,
	ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) {

	// fmt.Println("Initializing Lucene41PostingsReader...")
	success := false
	var docIn, posIn, payIn store.IndexInput = nil, nil, nil
	defer func() {
		if !success {
			fmt.Println("Failed to initialize Lucene41PostingsReader.")
			util.CloseWhileSuppressingError(docIn, posIn, payIn)
		}
	}()

	docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}
	var version int32
	version, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	forUtil, err := NewForUtilFrom(docIn)
	if err != nil {
		return nil, err
	}

	if version >= LUCENE41_VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but for now we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MAGIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(docIn); err != nil {
			return nil, err
		}
	}

	if fis.HasProx {
		posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx)
		if err != nil {
			return nil, err
		}
		_, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, version, version)
		if err != nil {
			return nil, err
		}

		if version >= LUCENE41_VERSION_CHECKSUM {
			// NOTE: data file is too costly to verify checksum against all the
			// bytes on open, but for now we at least verify proper structure
			// of the checksum footer: which looks for FOOTER_MAGIC +
			// algorithmID. This is cheap and can detect some forms of
			// corruption such as file trucation.
			if _, err = codec.RetrieveChecksum(posIn); err != nil {
				return nil, err
			}
		}

		if fis.HasPayloads || fis.HasOffsets {
			payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx)
			if err != nil {
				return nil, err
			}
			_, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, version, version)
			if err != nil {
				return nil, err
			}

			if version >= LUCENE41_VERSION_CHECKSUM {
				// NOTE: data file is too costly to verify checksum against all the
				// bytes on open, but for now we at least verify proper structure
				// of the checksum footer: which looks for FOOTER_MAGIC +
				// algorithmID. This is cheap and can detect some forms of
				// corruption such as file trucation.
				if _, err = codec.RetrieveChecksum(payIn); err != nil {
					return nil, err
				}

			}
		}
	}

	success = true
	return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil, int(version)}, nil
}
Beispiel #16
0
func newLucene49NormsProducer(state SegmentReadState,
	dataCodec, dataExtension, metaCodec, metaExtension string) (np *NormsProducer, err error) {

	np = &NormsProducer{
		Locker:       new(sync.Mutex),
		norms:        make(map[int]*NormsEntry),
		instances:    make(map[int]NumericDocValues),
		maxDoc:       state.SegmentInfo.DocCount(),
		ramBytesUsed: util.ShallowSizeOfInstance(reflect.TypeOf(np)),
	}
	metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension)
	// read in the entries from the metadta file.
	var in store.ChecksumIndexInput
	if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil {
		return nil, err
	}

	if err = func() error {
		var success = false
		defer func() {
			if success {
				err = util.Close(in)
			} else {
				util.CloseWhileSuppressingError(in)
			}
		}()

		if np.version, err = codec.CheckHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); err != nil {
			return err
		}
		if err = np.readFields(in, state.FieldInfos); err != nil {
			return err
		}
		if _, err = codec.CheckFooter(in); err != nil {
			return err
		}
		success = true
		return nil
	}(); err != nil {
		return nil, err
	}

	dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension)
	if np.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil {
		return nil, err
	}
	var success = false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(np.data)
		}
	}()

	var version2 int32
	if version2, err = codec.CheckHeader(np.data, dataCodec, VERSION_START, VERSION_CURRENT); err != nil {
		return nil, err
	}
	if version2 != np.version {
		return nil, errors.New("Format versions mismatch")
	}

	// NOTE: data file is too costly to verify checksum against all the
	// bytes on open, but fo rnow we at least verify proper structure
	// of the checksum footer: which looks for FOOTER_MATIC +
	// algorithmID. This is cheap and can detect some forms of
	// corruption such as file trucation.
	if _, err = codec.RetrieveChecksum(np.data); err != nil {
		return nil, err
	}

	success = true

	return np, nil
}
Beispiel #17
0
	var input store.ChecksumIndexInput
	if input, err = dir.OpenChecksumInput(filename, ctx); err != nil {
		return
	}

	var success = false
	defer func() {
		if success {
			err = input.Close()
		} else {
			util.CloseWhileSuppressingError(input)
		}
	}()

	var codecVersion int
	if codecVersion, err = asInt(codec.CheckHeader(input, FI_CODEC_NAME, FI_FORMAT_START, FI_FORMAT_CURRENT)); err != nil {
		return
	}

	var size int
	if size, err = asInt(input.ReadVInt()); err != nil {
		return
	}

	var infos []*FieldInfo
	var name string
	var fieldNumber int32
	var bits, val byte
	var isIndexed, storeTermVector, omitNorms, storePayloads bool
	var indexOptions IndexOptions
	var docValuesType, normsType DocValuesType
Beispiel #18
0
	if err != nil {
		return fi, err
	}
	log.Printf("Reading %v", input)

	success := false
	defer func() {
		if success {
			input.Close()
		} else {
			util.CloseWhileHandlingError(err, input)
		}
	}()

	_, err = codec.CheckHeader(input,
		LUCENE42_FI_CODEC_NAME,
		LUCENE42_FI_FORMAT_START,
		LUCENE42_FI_FORMAT_CURRENT)
	if err != nil {
		return fi, err
	}

	size, err := input.ReadVInt() //read in the size
	if err != nil {
		return fi, err
	}
	log.Printf("Found %v FieldInfos.", size)

	infos := make([]model.FieldInfo, size)
	for i, _ := range infos {
		name, err := input.ReadString()
		if err != nil {
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.ChecksumIndexInput
	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(r, indexStream)
		}
	}()

	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	// Load the index into memory
	if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx,
		VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer())
	if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil {
		return nil, err
	}

	var maxPointer int64 = -1

	if r.version >= VERSION_CHECKSUM {
		if maxPointer, err = indexStream.ReadVLong(); err != nil {
			return nil, err
		}
		if _, err = codec.CheckFooter(indexStream); err != nil {
			return nil, err
		}
	} else {
		if err = codec.CheckEOF(indexStream); err != nil {
			return nil, err
		}
	}

	if err = indexStream.Close(); err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil {
		return nil, err
	}
	if r.version >= VERSION_CHECKSUM {
		if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() {
			return nil, errors.New(fmt.Sprintf(
				"Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v",
				maxPointer, r.fieldsStream.Length()))
		}
	} else {
		maxPointer = r.fieldsStream.Length()
	}
	r.maxPointer = maxPointer
	codecNameDat := formatName + CODEC_SFX_DAT
	var fieldsVersion int
	if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream,
		codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert2(r.version == fieldsVersion,
		"Version mismatch between stored fields index and data: %v != %v",
		r.version, fieldsVersion)
	assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer())

	r.chunkSize = -1
	if r.version >= VERSION_BIG_CHUNKS {
		if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return nil, err
		}
	}

	if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
		return nil, err
	}
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	if r.version >= VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but fo rnow we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MATIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil {
			return nil, err
		}
	}

	success = true
	return r, nil
}
Beispiel #20
0
func (r *Lucene40SegmentInfoReader) Read(dir store.Directory,
	segment string, context store.IOContext) (si *SegmentInfo, err error) {

	si = new(SegmentInfo)
	fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION)
	input, err := dir.OpenInput(fileName, context)
	if err != nil {
		return nil, err
	}

	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	_, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	versionStr, err := input.ReadString()
	if err != nil {
		return nil, err
	}
	version, err := util.ParseVersion(versionStr)
	if err != nil {
		return nil, err
	}

	docCount, err := input.ReadInt()
	if err != nil {
		return nil, err
	}
	if docCount < 0 {
		return nil, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input))
	}
	sicf, err := input.ReadByte()
	if err != nil {
		return nil, err
	}
	isCompoundFile := (sicf == SEGMENT_INFO_YES)
	diagnostics, err := input.ReadStringStringMap()
	if err != nil {
		return nil, err
	}
	_, err = input.ReadStringStringMap() // read deprecated attributes
	if err != nil {
		return nil, err
	}
	files, err := input.ReadStringSet()
	if err != nil {
		return nil, err
	}

	if err = codec.CheckEOF(input); err != nil {
		return nil, err
	}

	si = NewSegmentInfo(dir, version, segment, int(docCount), isCompoundFile, nil, diagnostics)
	si.SetFiles(files)

	success = true
	return si, nil
}