Exemplo n.º 1
0
/* Reads the most recent FieldInfos of the given segment info. */
func ReadFieldInfos(info *SegmentCommitInfo) (fis FieldInfos, err error) {
	var dir store.Directory
	var closeDir bool
	if info.FieldInfosGen() == -1 && info.Info.IsCompoundFile() {
		// no fieldInfos gen and segment uses a compound file
		if dir, err = store.NewCompoundFileDirectory(info.Info.Dir,
			util.SegmentFileName(info.Info.Name, "", store.COMPOUND_FILE_EXTENSION),
			store.IO_CONTEXT_READONCE, false); err != nil {
			return
		}
		closeDir = true
	} else {
		// gen'd FIS are read outside CFS, or the segment doesn't use a compound file
		dir = info.Info.Dir
		closeDir = false
	}

	defer func() {
		if closeDir {
			err = mergeError(err, dir.Close())
		}
	}()

	var segmentSuffix string
	if n := info.FieldInfosGen(); n != -1 {
		segmentSuffix = strconv.FormatInt(n, 36)
	}
	codec := info.Info.Codec().(Codec)
	fisFormat := codec.FieldInfosFormat()
	return fisFormat.FieldInfosReader()(dir, info.Info.Name, segmentSuffix, store.IO_CONTEXT_READONCE)
}
func NewMockDirectoryWrapper(random *rand.Rand, delegate store.Directory) *MockDirectoryWrapper {
	ans := &MockDirectoryWrapper{
		noDeleteOpenFile:                 true,
		preventDoubleWrite:               true,
		trackDiskUsage:                   false,
		wrapLockFactory:                  true,
		openFilesForWrite:                make(map[string]bool),
		openLocks:                        make(map[string]bool),
		openLocksLock:                    &sync.Mutex{},
		throttling:                       THROTTLING_SOMETIMES,
		inputCloneCount:                  0,
		openFileHandles:                  make(map[io.Closer]error),
		failOnCreateOutput:               true,
		failOnOpenInput:                  true,
		assertNoUnreferencedFilesOnClose: true,
	}
	ans.BaseDirectoryWrapperImpl = NewBaseDirectoryWrapper(delegate)
	ans.Locker = &sync.Mutex{}
	// must make a private random since our methods are called from different
	// methods; else test failures may not be reproducible from the original
	// seed
	ans.randomState = rand.New(rand.NewSource(random.Int63()))
	ans.throttledOutput = NewThrottledIndexOutput(
		MBitsToBytes(40+ans.randomState.Intn(10)), 5+ans.randomState.Int63n(5), nil)
	// force wrapping of LockFactory
	ans.myLockFactory = newMockLockFactoryWrapper(ans, delegate.LockFactory())
	ans.init()
	return ans
}
Exemplo n.º 3
0
/*
Writes this vector to the file name in Directory d, in a format that
can be read by the constructor BitVector(Directory, String, IOContext)
*/
func (bv *BitVector) Write(d store.Directory, name string, ctx store.IOContext) (err error) {
	assert(reflect.TypeOf(d).Name() != "CompoundFileDirectory")
	var output store.IndexOutput
	if output, err = d.CreateOutput(name, ctx); err != nil {
		return err
	}
	defer func() {
		err = mergeError(err, output.Close())
	}()

	if err = output.WriteInt(-2); err != nil {
		return err
	}
	if err = codec.WriteHeader(output, CODEC, BV_VERSION_CURRENT); err != nil {
		return err
	}
	if bv.isSparse() {
		// sparse bit-set more efficiently saved as d-gaps.
		err = bv.writeClearedDgaps(output)
	} else {
		err = bv.writeBits(output)
	}
	if err != nil {
		return err
	}
	if err = codec.WriteFooter(output); err != nil {
		return err
	}
	bv.assertCount()
	return nil
}
Exemplo n.º 4
0
/*
Commit live docs (writes new _X_N.del files) and field update (writes
new _X_N.del files) to the directory; returns true if it wrote any
file and false if there were no new deletes or updates to write:
*/
func (rld *ReadersAndUpdates) writeLiveDocs(dir store.Directory) (bool, error) {
	panic("not implemented yet")
	rld.Lock()
	defer rld.Unlock()

	log.Printf("rld.writeLiveDocs seg=%v pendingDelCount=%v", rld.info, rld._pendingDeleteCount)
	if rld._pendingDeleteCount != 0 {
		// We have new deletes
		assert(rld._liveDocs.Length() == rld.info.Info.DocCount())

		// Do this so we can delete any created files on error; this
		// saves all codecs from having to do it:
		trackingDir := store.NewTrackingDirectoryWrapper(dir)

		// We can write directly to the actual name (vs to a .tmp &
		// renaming it) becaues the file is not live until segments file
		// is written:
		var success = false
		defer func() {
			if !success {
				// Advance only the nextWriteDelGen so that a 2nd attempt to
				// write will write to a new file
				rld.info.AdvanceNextWriteDelGen()

				// Delete any partially created files(s):
				trackingDir.EachCreatedFiles(func(filename string) {
					dir.DeleteFile(filename) // ignore error
				})
			}
		}()

		err := rld.info.Info.Codec().(Codec).LiveDocsFormat().WriteLiveDocs(rld._liveDocs.(util.MutableBits),
			trackingDir, rld.info, rld._pendingDeleteCount, store.IO_CONTEXT_DEFAULT)
		if err != nil {
			return false, err
		}
		success = true

		// If we hit an error in the line above (e.g. disk full) then
		// info's delGen remains pointing to the previous (successfully
		// written) del docs:
		rld.info.AdvanceDelGen()
		rld.info.SetDelCount(rld.info.DelCount() + rld._pendingDeleteCount)
		assert(rld.info.DelCount() <= rld.info.Info.DocCount())

		rld._pendingDeleteCount = 0
		return true, nil
	}
	return false, nil
}
Exemplo n.º 5
0
func (w *Lucene40SegmentInfoWriter) Write(dir store.Directory,
	si *SegmentInfo, fis FieldInfos, ctx store.IOContext) (err error) {

	filename := util.SegmentFileName(si.Name, "", LUCENE40_SI_EXTENSION)
	si.AddFile(filename)

	var output store.IndexOutput
	output, err = dir.CreateOutput(filename, ctx)
	if err != nil {
		return err
	}

	var success = false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(output)
			si.Dir.DeleteFile(filename) // ignore error
		} else {
			err = mergeError(err, output.Close())
		}
	}()

	err = codec.WriteHeader(output, LUCENE40_CODEC_NAME, LUCENE40_VERSION_CURRENT)
	if err != nil {
		return err
	}
	// Write the Lucene version that created this segment, since 3.1
	err = store.Stream(output).WriteString(si.Version().String()).
		WriteInt(int32(si.DocCount())).
		WriteByte(func() byte {
			if si.IsCompoundFile() {
				return SEGMENT_INFO_YES
			}
			return byte((SEGMENT_INFO_NO + 256) % 256) // Go byte is non-negative, unlike Java
		}()).WriteStringStringMap(si.Diagnostics()).
		WriteStringStringMap(map[string]string{}).
		WriteStringSet(si.Files()).Close()
	if err != nil {
		return err
	}

	success = true
	return nil
}
Exemplo n.º 6
0
/*
A utility for writing the SEGMENTS_GEN file to a Directory.

NOTE: this is an internal utility which is kept public so that it's
accessible by code from other packages. You should avoid calling this
method unless you're absolutely sure what you're doing!
*/
func writeSegmentsGen(dir store.Directory, generation int64) {
	if err := func() (err error) {
		var genOutput store.IndexOutput
		genOutput, err = dir.CreateOutput(INDEX_FILENAME_SEGMENTS_GEN, store.IO_CONTEXT_READONCE)
		if err != nil {
			return err
		}

		defer func() {
			err = mergeError(err, genOutput.Close())
			err = mergeError(err, dir.Sync([]string{INDEX_FILENAME_SEGMENTS_GEN}))
		}()

		if err = genOutput.WriteInt(FORMAT_SEGMENTS_GEN_CURRENT); err == nil {
			if err = genOutput.WriteLong(generation); err == nil {
				if err = genOutput.WriteLong(generation); err == nil {
					err = codec.WriteFooter(genOutput)
				}
			}
		}
		return err
	}(); err != nil {
		// It's OK if we fail to write this file since it's used only as
		// one of the retry fallbacks.
		dir.DeleteFile(INDEX_FILENAME_SEGMENTS_GEN)
		// Ignore error; this file is only used in a retry fallback on init
	}
}
Exemplo n.º 7
0
/*
Returns true if an index likely exists at the specified directory. Note that
if a corrupt index exists, or if an index in the process of committing
*/
func IsIndexExists(directory store.Directory) (ok bool, err error) {
	// LUCENE-2812, LUCENE-2727, LUCENE-4738: this logic will
	// return true in cases that should arguably be false,
	// such as only IW.prepareCommit has been called, or a
	// corrupt first commit, but it's too deadly to make
	// this logic "smarter" and risk accidentally returning
	// false due to various cases like file description
	// exhaustion, access denied, etc., because in that
	// case IndexWriter may delete the entire index.  It's
	// safer to err towards "index exists" than try to be
	// smart about detecting not-yet-fully-committed or
	// corrupt indices.  This means that IndexWriter will
	// throw an exception on such indices and the app must
	// resolve the situation manually:
	var files []string
	files, err = directory.ListAll()
	if _, ok := err.(*store.NoSuchDirectoryError); ok {
		// Directory does not exist --> no index exists
		return false, nil
	} else if err != nil {
		return false, err
	}
	return IsIndexFileExists(files), nil
}
Exemplo n.º 8
0
func NewLucene41PostingsReader(dir store.Directory,
	fis model.FieldInfos, si *model.SegmentInfo,
	ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) {

	log.Print("Initializing Lucene41PostingsReader...")
	success := false
	var docIn, posIn, payIn store.IndexInput = nil, nil, nil
	defer func() {
		if !success {
			log.Print("Failed to initialize Lucene41PostingsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			util.CloseWhileSuppressingError(docIn, posIn, payIn)
		}
	}()

	docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx)
	if err != nil {
		return r, err
	}
	_, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return r, err
	}
	forUtil, err := NewForUtil(docIn)
	if err != nil {
		return r, err
	}

	if fis.HasProx {
		posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx)
		if err != nil {
			return r, err
		}
		_, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
		if err != nil {
			return r, err
		}

		if fis.HasPayloads || fis.HasOffsets {
			payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx)
			if err != nil {
				return r, err
			}
			_, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT)
			if err != nil {
				return r, err
			}
		}
	}

	success = true
	return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil
}
Exemplo n.º 9
0
/*
Read a particular segmentFileName. Note that this may return IO error
if a commit is in process.
*/
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) (err error) {
	// fmt.Printf("Reading segment info from %v...\n", segmentFileName)

	// Clear any previous segments:
	sis.Clear()

	sis.generation = GenerationFromSegmentsFileName(segmentFileName)
	sis.lastGeneration = sis.generation

	var input store.ChecksumIndexInput
	if input, err = directory.OpenChecksumInput(segmentFileName, store.IO_CONTEXT_READ); err != nil {
		return
	}

	var success = false
	defer func() {
		if !success {
			// Clear any segment infos we had loaded so we
			// have a clean slate on retry:
			sis.Clear()
			util.CloseWhileSuppressingError(input)
		} else {
			err = input.Close()
		}
	}()

	var format int
	if format, err = asInt(input.ReadInt()); err != nil {
		return
	}

	var actualFormat int
	if format == codec.CODEC_MAGIC {
		// 4.0+
		if actualFormat, err = asInt(codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_49)); err != nil {
			return
		}
		if sis.version, err = input.ReadLong(); err != nil {
			return
		}
		if sis.counter, err = asInt(input.ReadInt()); err != nil {
			return
		}
		var numSegments int
		if numSegments, err = asInt(input.ReadInt()); err != nil {
			return
		} else if numSegments < 0 {
			return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input))
		}
		var segName, codecName string
		var fCodec Codec
		var delGen, fieldInfosGen, dvGen int64
		var delCount int
		for seg := 0; seg < numSegments; seg++ {
			if segName, err = input.ReadString(); err != nil {
				return
			}
			if codecName, err = input.ReadString(); err != nil {
				return
			}
			fCodec = LoadCodec(codecName)
			assert2(fCodec != nil, "Invalid codec name: %v", codecName)
			// fmt.Printf("SIS.read seg=%v codec=%v\n", seg, fCodec)
			var info *SegmentInfo
			if info, err = fCodec.SegmentInfoFormat().SegmentInfoReader().Read(directory, segName, store.IO_CONTEXT_READ); err != nil {
				return
			}
			info.SetCodec(fCodec)
			if delGen, err = input.ReadLong(); err != nil {
				return
			}
			if delCount, err = asInt(input.ReadInt()); err != nil {
				return
			} else if delCount < 0 || delCount > info.DocCount() {
				return errors.New(fmt.Sprintf(
					"invalid deletion count: %v vs docCount=%v (resource: %v)",
					delCount, info.DocCount(), input))
			}
			fieldInfosGen = -1
			if actualFormat >= VERSION_46 {
				if fieldInfosGen, err = input.ReadLong(); err != nil {
					return
				}
			}
			dvGen = -1
			if actualFormat >= VERSION_49 {
				if dvGen, err = input.ReadLong(); err != nil {
					return
				}
			} else {
				dvGen = fieldInfosGen
			}
			siPerCommit := NewSegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen)
			if actualFormat >= VERSION_46 {
				if actualFormat < VERSION_49 {
					panic("not implemented yet")
				} else {
					var ss map[string]bool
					if ss, err = input.ReadStringSet(); err != nil {
						return err
					}
					siPerCommit.SetFieldInfosFiles(ss)
					var dvUpdatesFiles map[int]map[string]bool
					var numDVFields int
					if numDVFields, err = asInt(input.ReadInt()); err != nil {
						return err
					}
					if numDVFields == 0 {
						dvUpdatesFiles = make(map[int]map[string]bool)
					} else {
						panic("not implemented yet")
					}
					siPerCommit.SetDocValuesUpdatesFiles(dvUpdatesFiles)
				}
			}
			sis.Segments = append(sis.Segments, siPerCommit)
		}
		if sis.userData, err = input.ReadStringStringMap(); err != nil {
			return err
		}
	} else {
		// TODO support <4.0 index
		panic("Index format pre-4.0 not supported yet")
	}

	if actualFormat >= VERSION_48 {
		if _, err = codec.CheckFooter(input); err != nil {
			return
		}
	} else {
		var checksumNow = int64(input.Checksum())
		var checksumThen int64
		if checksumThen, err = input.ReadLong(); err != nil {
			return
		}
		if checksumNow != checksumThen {
			return errors.New(fmt.Sprintf(
				"checksum mismatch in segments file: %v vs %v (resource: %v)",
				checksumNow, checksumThen, input))
		}
		if err = codec.CheckEOF(input); err != nil {
			return
		}
	}

	success = true
	return nil
}
Exemplo n.º 10
0
/*
Constructs a new IndexWriter per the settings given in conf. If you want to
make "live" changes to this writer instance, use Config().

NOTE: after this writer is created, the given configuration instance cannot be
passed to another writer. If you intend to do so, you should clone it
beforehand.
*/
func NewIndexWriter(d store.Directory, conf *IndexWriterConfig) (w *IndexWriter, err error) {
	ans := &IndexWriter{
		Locker:         &sync.Mutex{},
		ClosingControl: newClosingControl(),

		segmentsToMerge: make(map[*SegmentInfoPerCommit]bool),
		mergeExceptions: make([]*OneMerge, 0),
		doAfterFlush:    func() error { return nil },
		doBeforeFlush:   func() error { return nil },
		commitLock:      &sync.Mutex{},
		fullFlushLock:   &sync.Mutex{},

		config:         newLiveIndexWriterConfigFrom(conf),
		directory:      d,
		analyzer:       conf.analyzer,
		infoStream:     conf.infoStream,
		mergePolicy:    conf.mergePolicy,
		mergeScheduler: conf.mergeScheduler,
		codec:          conf.codec,

		bufferedDeletesStream: newBufferedDeletesStream(conf.infoStream),
		poolReaders:           conf.readerPooling,

		writeLock: d.MakeLock(WRITE_LOCK_NAME),
	}
	ans.readerPool = newReaderPool(ans)
	ans.MergeControl = newMergeControl(conf.infoStream, ans.readerPool)

	conf.setIndexWriter(ans)
	ans.mergePolicy.SetIndexWriter(ans)

	// obtain write lock
	if ok, err := ans.writeLock.ObtainWithin(conf.writeLockTimeout); !ok || err != nil {
		if err != nil {
			return nil, err
		}
		return nil, errors.New(fmt.Sprintf("Index locked for write: %v", ans.writeLock))
	}

	var success bool = false
	defer func() {
		if !success {
			if ans.infoStream.IsEnabled("IW") {
				ans.infoStream.Message("IW", "init: hit exception on init; releasing write lock")
			}
			ans.writeLock.Release() // don't mask the original exception
			ans.writeLock = nil
		}
	}()

	var create bool
	switch conf.openMode {
	case OPEN_MODE_CREATE:
		create = true
	case OPEN_MODE_APPEND:
		create = false
	default:
		// CREATE_OR_APPEND - create only if an index does not exist
		ok, err := IsIndexExists(d)
		if err != nil {
			return nil, err
		}
		create = !ok
	}

	// If index is too old, reading the segments will return
	// IndexFormatTooOldError
	ans.segmentInfos = &SegmentInfos{}

	var initialIndexExists bool = true

	if create {
		// Try to read first. This is to allow create against an index
		// that's currently open for searching. In this case we write the
		// next segments_N file with no segments:
		err = ans.segmentInfos.ReadAll(d)
		if err == nil {
			ans.segmentInfos.Clear()
		} else {
			// Likely this means it's a fresh directory
			initialIndexExists = false
			err = nil
		}

		// Record that we have a change (zero out all segments) pending:
		ans.changed()
	} else {
		err = ans.segmentInfos.ReadAll(d)
		if err != nil {
			return
		}

		if commit := conf.commit; commit != nil {
			// Swap out all segments, but, keep metadta in SegmentInfos,
			// like version & generation, to preserve write-once. This is
			// important if readers are open against the future commit
			// points.
			assert2(commit.Directory() == d,
				"IndexCommit's directory doesn't match my directory")
			oldInfos := &SegmentInfos{}
			ans.segmentInfos.replace(oldInfos)
			ans.changed()
			ans.infoStream.Message("IW", "init: loaded commit '%v'",
				commit.SegmentsFileName())
		}
	}

	ans.rollbackSegments = ans.segmentInfos.createBackupSegmentInfos()

	// start with previous field numbers, but new FieldInfos
	ans.globalFieldNumberMap, err = ans.fieldNumberMap()
	if err != nil {
		return
	}
	ans.config.flushPolicy.init(ans.config)
	ans.docWriter = newDocumentsWriter(ans, ans.config, d)
	ans.eventQueue = ans.docWriter.events

	// Default deleter (for backwards compatibility) is
	// KeepOnlyLastCommitDeleter:
	ans.deleter, err = newIndexFileDeleter(d, conf.delPolicy,
		ans.segmentInfos, ans.infoStream, ans, initialIndexExists)
	if err != nil {
		return
	}

	if ans.deleter.startingCommitDeleted {
		// Deletion policy deleted the "head" commit point. We have to
		// mark outsef as changed so that if we are closed w/o any
		// further changes we write a new segments_N file.
		ans.changed()
	}

	if ans.infoStream.IsEnabled("IW") {
		ans.infoStream.Message("IW", "init: create=%v", create)
		ans.messageState()
	}

	success = true
	return ans, nil
}
Exemplo n.º 11
0
func (r *Lucene40SegmentInfoReader) Read(dir store.Directory,
	segment string, context store.IOContext) (si *SegmentInfo, err error) {

	si = new(SegmentInfo)
	fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION)
	input, err := dir.OpenInput(fileName, context)
	if err != nil {
		return nil, err
	}

	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	_, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	versionStr, err := input.ReadString()
	if err != nil {
		return nil, err
	}
	version, err := util.ParseVersion(versionStr)
	if err != nil {
		return nil, err
	}

	docCount, err := input.ReadInt()
	if err != nil {
		return nil, err
	}
	if docCount < 0 {
		return nil, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input))
	}
	sicf, err := input.ReadByte()
	if err != nil {
		return nil, err
	}
	isCompoundFile := (sicf == SEGMENT_INFO_YES)
	diagnostics, err := input.ReadStringStringMap()
	if err != nil {
		return nil, err
	}
	_, err = input.ReadStringStringMap() // read deprecated attributes
	if err != nil {
		return nil, err
	}
	files, err := input.ReadStringSet()
	if err != nil {
		return nil, err
	}

	if err = codec.CheckEOF(input); err != nil {
		return nil, err
	}

	si = NewSegmentInfo(dir, version, segment, int(docCount), isCompoundFile, nil, diagnostics)
	si.SetFiles(files)

	success = true
	return si, nil
}
Exemplo n.º 12
0
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.ChecksumIndexInput
	success := false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(r, indexStream)
		}
	}()

	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	// Load the index into memory
	if indexStream, err = d.OpenChecksumInput(indexStreamFN, ctx); err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	if r.version, err = int32AsInt(codec.CheckHeader(indexStream, codecNameIdx,
		VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer())
	if r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si); err != nil {
		return nil, err
	}

	var maxPointer int64 = -1

	if r.version >= VERSION_CHECKSUM {
		if maxPointer, err = indexStream.ReadVLong(); err != nil {
			return nil, err
		}
		if _, err = codec.CheckFooter(indexStream); err != nil {
			return nil, err
		}
	} else {
		if err = codec.CheckEOF(indexStream); err != nil {
			return nil, err
		}
	}

	if err = indexStream.Close(); err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	if r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx); err != nil {
		return nil, err
	}
	if r.version >= VERSION_CHECKSUM {
		if maxPointer+codec.FOOTER_LENGTH != r.fieldsStream.Length() {
			return nil, errors.New(fmt.Sprintf(
				"Invalid fieldsStream maxPointer (file truncated?): maxPointer=%v, length=%v",
				maxPointer, r.fieldsStream.Length()))
		}
	} else {
		maxPointer = r.fieldsStream.Length()
	}
	r.maxPointer = maxPointer
	codecNameDat := formatName + CODEC_SFX_DAT
	var fieldsVersion int
	if fieldsVersion, err = int32AsInt(codec.CheckHeader(r.fieldsStream,
		codecNameDat, VERSION_START, VERSION_CURRENT)); err != nil {
		return nil, err
	}
	assert2(r.version == fieldsVersion,
		"Version mismatch between stored fields index and data: %v != %v",
		r.version, fieldsVersion)
	assert(int64(codec.HeaderLength(codecNameDat)) == r.fieldsStream.FilePointer())

	r.chunkSize = -1
	if r.version >= VERSION_BIG_CHUNKS {
		if r.chunkSize, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return nil, err
		}
	}

	if r.packedIntsVersion, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
		return nil, err
	}
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	if r.version >= VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but fo rnow we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MATIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(r.fieldsStream); err != nil {
			return nil, err
		}
	}

	success = true
	return r, nil
}
Exemplo n.º 13
0
func NewLucene41PostingsReader(dir store.Directory,
	fis FieldInfos, si *SegmentInfo,
	ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) {

	// fmt.Println("Initializing Lucene41PostingsReader...")
	success := false
	var docIn, posIn, payIn store.IndexInput = nil, nil, nil
	defer func() {
		if !success {
			fmt.Println("Failed to initialize Lucene41PostingsReader.")
			util.CloseWhileSuppressingError(docIn, posIn, payIn)
		}
	}()

	docIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}
	var version int32
	version, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	forUtil, err := NewForUtilFrom(docIn)
	if err != nil {
		return nil, err
	}

	if version >= LUCENE41_VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but for now we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MAGIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(docIn); err != nil {
			return nil, err
		}
	}

	if fis.HasProx {
		posIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx)
		if err != nil {
			return nil, err
		}
		_, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, version, version)
		if err != nil {
			return nil, err
		}

		if version >= LUCENE41_VERSION_CHECKSUM {
			// NOTE: data file is too costly to verify checksum against all the
			// bytes on open, but for now we at least verify proper structure
			// of the checksum footer: which looks for FOOTER_MAGIC +
			// algorithmID. This is cheap and can detect some forms of
			// corruption such as file trucation.
			if _, err = codec.RetrieveChecksum(posIn); err != nil {
				return nil, err
			}
		}

		if fis.HasPayloads || fis.HasOffsets {
			payIn, err = dir.OpenInput(util.SegmentFileName(si.Name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx)
			if err != nil {
				return nil, err
			}
			_, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, version, version)
			if err != nil {
				return nil, err
			}

			if version >= LUCENE41_VERSION_CHECKSUM {
				// NOTE: data file is too costly to verify checksum against all the
				// bytes on open, but for now we at least verify proper structure
				// of the checksum footer: which looks for FOOTER_MAGIC +
				// algorithmID. This is cheap and can detect some forms of
				// corruption such as file trucation.
				if _, err = codec.RetrieveChecksum(payIn); err != nil {
					return nil, err
				}

			}
		}
	}

	success = true
	return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil, int(version)}, nil
}
Exemplo n.º 14
0
func newBlockTreeTermsReader(dir store.Directory,
	fieldInfos model.FieldInfos, info *model.SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) {

	log.Print("Initializing BlockTreeTermsReader...")
	fp := &BlockTreeTermsReader{
		postingsReader: postingsReader,
		fields:         make(map[string]FieldReader),
		segment:        info.Name,
	}
	fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_EXTENSION), ctx)
	if err != nil {
		return fp, err
	}

	success := false
	var indexIn store.IndexInput
	defer func() {
		if !success {
			log.Print("Failed to initialize BlockTreeTermsReader.")
			if err != nil {
				log.Print("DEBUG ", err)
			}
			// this.close() will close in:
			util.CloseWhileSuppressingError(indexIn, fp)
		}
	}()

	fp.version, err = fp.readHeader(fp.in)
	if err != nil {
		return fp, err
	}
	log.Printf("Version: %v", fp.version)

	if indexDivisor != -1 {
		indexIn, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, BTT_INDEX_EXTENSION), ctx)
		if err != nil {
			return fp, err
		}

		indexVersion, err := fp.readIndexHeader(indexIn)
		if err != nil {
			return fp, err
		}
		log.Printf("Index version: %v", indexVersion)
		if int(indexVersion) != fp.version {
			return fp, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion))
		}
	}

	// Have PostingsReader init itself
	postingsReader.Init(fp.in)

	// Read per-field details
	fp.seekDir(fp.in, fp.dirOffset)
	if indexDivisor != -1 {
		fp.seekDir(indexIn, fp.indexDirOffset)
	}

	numFields, err := fp.in.ReadVInt()
	if err != nil {
		return fp, err
	}
	log.Printf("Fields number: %v", numFields)
	if numFields < 0 {
		return fp, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in))
	}

	for i := int32(0); i < numFields; i++ {
		log.Printf("Next field...")
		field, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Field: %v", field)

		numTerms, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		// assert numTerms >= 0
		log.Printf("Terms number: %v", numTerms)

		numBytes, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("Bytes number: %v", numBytes)

		rootCode := make([]byte, numBytes)
		err = fp.in.ReadBytes(rootCode)
		if err != nil {
			return fp, err
		}
		fieldInfo := fieldInfos.FieldInfoByNumber(int(field))
		// assert fieldInfo != nil
		var sumTotalTermFreq int64
		if fieldInfo.IndexOptions() == model.INDEX_OPT_DOCS_ONLY {
			sumTotalTermFreq = -1
		} else {
			sumTotalTermFreq, err = fp.in.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		sumDocFreq, err := fp.in.ReadVLong()
		if err != nil {
			return fp, err
		}
		docCount, err := fp.in.ReadVInt()
		if err != nil {
			return fp, err
		}
		log.Printf("DocCount: %v", docCount)
		if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs
			return fp, errors.New(fmt.Sprintf(
				"invalid docCount: %v maxDoc: %v (resource=%v)",
				docCount, info.DocCount(), fp.in))
		}
		if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field
			return fp, errors.New(fmt.Sprintf(
				"invalid sumDocFreq: %v docCount: %v (resource=%v)",
				sumDocFreq, docCount, fp.in))
		}
		if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings
			return fp, errors.New(fmt.Sprintf(
				"invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)",
				sumTotalTermFreq, sumDocFreq, fp.in))
		}

		var indexStartFP int64
		if indexDivisor != -1 {
			indexStartFP, err = indexIn.ReadVLong()
			if err != nil {
				return fp, err
			}
		}
		log.Printf("indexStartFP: %v", indexStartFP)
		if _, ok := fp.fields[fieldInfo.Name]; ok {
			return fp, errors.New(fmt.Sprintf(
				"duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in))
		}
		fp.fields[fieldInfo.Name], err = newFieldReader(fp,
			fieldInfo, numTerms, rootCode, sumTotalTermFreq,
			sumDocFreq, docCount, indexStartFP, indexIn)
		if err != nil {
			return fp, err
		}
		log.Print("DEBUG field processed.")
	}

	if indexDivisor != -1 {
		err = indexIn.Close()
		if err != nil {
			return fp, err
		}
	}

	success = true

	return fp, nil
}
Exemplo n.º 15
0
func NewCompressingStoredFieldsWriter(dir store.Directory, si *model.SegmentInfo,
	segmentSuffix string, ctx store.IOContext, formatName string,
	compressionMode CompressionMode, chunkSize int) (*CompressingStoredFieldsWriter, error) {

	assert(dir != nil)
	ans := &CompressingStoredFieldsWriter{
		directory:       dir,
		segment:         si.Name,
		segmentSuffix:   segmentSuffix,
		compressionMode: compressionMode,
		compressor:      compressionMode.NewCompressor(),
		chunkSize:       chunkSize,
		docBase:         0,
		bufferedDocs:    newGrowableByteArrayDataOutput(chunkSize),
		numStoredFields: make([]int, 16),
		endOffsets:      make([]int, 16),
		numBufferedDocs: 0,
	}

	var success = false
	indexStream, err := dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix,
		lucene40.FIELDS_INDEX_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}
	assert(indexStream != nil)
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(indexStream)
			ans.Abort()
		}
	}()

	ans.fieldsStream, err = dir.CreateOutput(util.SegmentFileName(si.Name, segmentSuffix,
		lucene40.FIELDS_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}

	codecNameIdx := formatName + CODEC_SFX_IDX
	codecNameDat := formatName + CODEC_SFX_DAT
	err = codec.WriteHeader(indexStream, codecNameIdx, VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	err = codec.WriteHeader(ans.fieldsStream, codecNameDat, VERSION_CURRENT)
	if err != nil {
		return nil, err
	}
	assert(int64(codec.HeaderLength(codecNameIdx)) == indexStream.FilePointer())
	assert(int64(codec.HeaderLength(codecNameDat)) == ans.fieldsStream.FilePointer())

	ans.indexWriter, err = NewStoredFieldsIndexWriter(indexStream)
	if err != nil {
		return nil, err
	}
	assert(ans.indexWriter != nil)
	indexStream = nil

	err = ans.fieldsStream.WriteVInt(int32(chunkSize))
	if err != nil {
		return nil, err
	}
	err = ans.fieldsStream.WriteVInt(packed.VERSION_CURRENT)
	if err != nil {
		return nil, err
	}

	success = true
	return ans, nil
}
Exemplo n.º 16
0
/*
Initialize the deleter: find all previous commits in the Directory,
incref the files they reference, call the policy to let it delete
commits. This will remove any files not referenced by any of the
commits.
*/
func newIndexFileDeleter(directory store.Directory, policy IndexDeletionPolicy,
	segmentInfos *SegmentInfos, infoStream util.InfoStream, writer *IndexWriter,
	initialIndexExists bool) (*IndexFileDeleter, error) {

	currentSegmentsFile := segmentInfos.SegmentsFileName()
	if infoStream.IsEnabled("IFD") {
		infoStream.Message("IFD", "init: current segments file is '%v'; deletePolicy=%v",
			currentSegmentsFile, policy)
	}

	fd := &IndexFileDeleter{
		infoStream: infoStream,
		writer:     writer,
		policy:     policy,
		directory:  directory,
		refCounts:  make(map[string]*RefCount),
	}

	// First pass: walk the files and initialize our ref counts:
	currentGen := segmentInfos.generation

	var currentCommitPoint *CommitPoint
	var files []string
	files, err := directory.ListAll()
	if _, ok := err.(*store.NoSuchDirectoryError); ok {
		// it means the directory is empty, so ignore it
		files = make([]string, 0)
	} else if err != nil {
		return nil, err
	}

	if currentSegmentsFile != "" {
		m := model.CODEC_FILE_PATTERN
		for _, filename := range files {
			if !strings.HasSuffix(filename, "write.lock") &&
				filename != INDEX_FILENAME_SEGMENTS_GEN &&
				(m.MatchString(filename) || strings.HasPrefix(filename, util.SEGMENTS)) {

				// Add this file to refCounts with initial count 0:
				fd.refCount(filename)

				if strings.HasPrefix(filename, util.SEGMENTS) {
					// This is a commit (segments or segments_N), and it's
					// valid (<= the max gen). Load it, then incref all files
					// it refers to:
					if infoStream.IsEnabled("IFD") {
						infoStream.Message("IFD", "init: load commit '%v'", filename)
					}
					sis := &SegmentInfos{}
					err := sis.Read(directory, filename)
					if os.IsNotExist(err) {
						// LUCENE-948: on NFS (and maybe others), if
						// you have writers switching back and forth
						// between machines, it's very likely that the
						// dir listing will be stale and will claim a
						// file segments_X exists when in fact it
						// doesn't.  So, we catch this and handle it
						// as if the file does not exist
						if infoStream.IsEnabled("IFD") {
							infoStream.Message("IFD",
								"init: hit FileNotFoundException when loading commit '%v'; skipping this commit point",
								filename)
						}
						sis = nil
					} else if err != nil {
						if GenerationFromSegmentsFileName(filename) <= currentGen {
							length, _ := directory.FileLength(filename)
							if length > 0 {
								return nil, err
							}
						}
						// Most likely we are opening an index that has an
						// aborted "future" commit, so suppress exc in this case
						sis = nil
					} else { // sis != nil
						commitPoint := newCommitPoint(fd.commitsToDelete, directory, sis)
						if sis.generation == segmentInfos.generation {
							currentCommitPoint = commitPoint
						}
						fd.commits = append(fd.commits, commitPoint)
						fd.incRef(sis, true)

						if fd.lastSegmentInfos == nil || sis.generation > fd.lastSegmentInfos.generation {
							fd.lastSegmentInfos = sis
						}
					}
				}
			}
		}
	}

	if currentCommitPoint == nil && currentSegmentsFile != "" && initialIndexExists {
		// We did not in fact see the segments_N file corresponding to
		// the segmentInfos that was passed in. Yet, it must exist,
		// because our caller holds the write lock. This can happen when
		// the directory listing was stale (e.g. when index accessed via
		// NFS client with stale directory listing cache). So we try now
		// to explicitly open this commit point:
		sis := &SegmentInfos{}
		err := sis.Read(directory, currentSegmentsFile)
		if err != nil {
			return nil, errors.New(fmt.Sprintf("failed to locate current segments_N file '%v'",
				currentSegmentsFile))
		}
		if infoStream.IsEnabled("IFD") {
			infoStream.Message("IFD", "forced open of current segments file %v",
				segmentInfos.SegmentsFileName())
		}
		currentCommitPoint = newCommitPoint(fd.commitsToDelete, directory, sis)
		fd.commits = append(fd.commits, currentCommitPoint)
		fd.incRef(sis, true)
	}

	// We keep commits list in sorted order (oldest to newest):
	util.TimSort(IndexCommits(fd.commits))

	// Now delete anyting with ref count at 0. These are presumably
	// abandoned files e.g. due to crash of IndexWriter.
	for filename, rc := range fd.refCounts {
		if rc.count == 0 {
			if infoStream.IsEnabled("IFD") {
				infoStream.Message("IFD", "init: removing unreferenced file '%v'",
					filename)
			}
			fd.deleteFile(filename)
		}
	}

	// Finally, give policy a chance to remove things on startup:
	err = policy.onInit(fd.commits)
	if err != nil {
		return nil, err
	}

	// Always protect the incoming segmentInfos since sometime it may
	// not be the most recent commit
	err = fd.checkpoint(segmentInfos, false)
	if err != nil {
		return nil, err
	}

	fd.startingCommitDeleted = (currentCommitPoint != nil && currentCommitPoint.IsDeleted())

	fd.deleteCommits()

	return fd, nil
}
Exemplo n.º 17
0
func (sis *SegmentInfos) write(directory store.Directory) error {
	segmentsFilename := sis.nextSegmentFilename()

	// Always advance the generation on write:
	if sis.generation == -1 {
		sis.generation = 1
	} else {
		sis.generation++
	}

	var segnOutput *store.ChecksumIndexOutput
	var success = false
	var upgradedSIFiles = make(map[string]bool)

	defer func() {
		if !success {
			// We hit an error above; try to close the file but suppress
			// any errors
			util.CloseWhileSuppressingError(segnOutput)

			for filename, _ := range upgradedSIFiles {
				directory.DeleteFile(filename) // ignore error
			}

			// Try not to leave a truncated segments_N fle in the index:
			directory.DeleteFile(segmentsFilename) // ignore error
		}
	}()

	out, err := directory.CreateOutput(segmentsFilename, store.IO_CONTEXT_DEFAULT)
	if err != nil {
		return err
	}
	segnOutput = store.NewChecksumIndexOutput(out)
	err = codec.WriteHeader(segnOutput, "segments", VERSION_40)
	if err != nil {
		return err
	}
	err = segnOutput.WriteLong(sis.version)
	if err == nil {
		err = segnOutput.WriteInt(int32(sis.counter))
		if err == nil {
			err = segnOutput.WriteInt(int32(len(sis.Segments)))
		}
	}
	if err != nil {
		return err
	}
	for _, siPerCommit := range sis.Segments {
		si := siPerCommit.info
		err = segnOutput.WriteString(si.Name)
		if err == nil {
			err = segnOutput.WriteString(si.Codec().(Codec).Name())
			if err == nil {
				err = segnOutput.WriteLong(siPerCommit.delGen)
				if err == nil {
					err = segnOutput.WriteInt(int32(siPerCommit.delCount))
				}
			}
		}
		if err != nil {
			return err
		}
		assert(si.Dir == directory)

		assert(siPerCommit.delCount <= si.DocCount())

		// If this segment is pre-4.x, perform a one-time "upgrade" to
		// write the .si file for it:
		if version := si.Version(); version == "" || versionLess(version, "4.0") {
			panic("not implemented yet")
		}
	}
	err = segnOutput.WriteStringStringMap(sis.userData)
	if err != nil {
		return err
	}
	sis.pendingSegnOutput = segnOutput
	success = true
	return nil
}
Exemplo n.º 18
0
func (sis *SegmentInfos) write(directory store.Directory) (err error) {
	segmentsFilename := sis.nextSegmentFilename()

	// Always advance the generation on write:
	if sis.generation == -1 {
		sis.generation = 1
	} else {
		sis.generation++
	}

	var segnOutput store.IndexOutput
	var success = false
	// var upgradedSIFiles = make(map[string]bool)

	defer func() {
		if !success {
			// We hit an error above; try to close the file but suppress
			// any errors
			util.CloseWhileSuppressingError(segnOutput)

			// for filename, _ := range upgradedSIFiles {
			// 	directory.DeleteFile(filename) // ignore error
			// }

			// Try not to leave a truncated segments_N fle in the index:
			directory.DeleteFile(segmentsFilename) // ignore error
		}
	}()

	if segnOutput, err = directory.CreateOutput(segmentsFilename, store.IO_CONTEXT_DEFAULT); err != nil {
		return
	}
	if err = codec.WriteHeader(segnOutput, "segments", VERSION_49); err != nil {
		return
	}
	if err = segnOutput.WriteLong(sis.version); err == nil {
		if err = segnOutput.WriteInt(int32(sis.counter)); err == nil {
			err = segnOutput.WriteInt(int32(len(sis.Segments)))
		}
	}
	if err != nil {
		return
	}
	for _, siPerCommit := range sis.Segments {
		si := siPerCommit.Info
		if err = segnOutput.WriteString(si.Name); err == nil {
			if err = segnOutput.WriteString(si.Codec().(Codec).Name()); err == nil {
				if err = segnOutput.WriteLong(siPerCommit.DelGen()); err == nil {
					assert2(siPerCommit.DelCount() >= 0 && siPerCommit.DelCount() <= si.DocCount(),
						"cannot write segment: invalid docCount segment=%v docCount=%v delCount=%v",
						si.Name, si.DocCount(), siPerCommit.DelCount())
					if err = segnOutput.WriteInt(int32(siPerCommit.DelCount())); err == nil {
						if err = segnOutput.WriteLong(siPerCommit.FieldInfosGen()); err == nil {
							if err = segnOutput.WriteLong(siPerCommit.DocValuesGen()); err == nil {
								if err = segnOutput.WriteStringSet(siPerCommit.FieldInfosFiles()); err == nil {
									dvUpdatesFiles := siPerCommit.DocValuesUpdatesFiles()
									if err = segnOutput.WriteInt(int32(len(dvUpdatesFiles))); err == nil {
										for k, v := range dvUpdatesFiles {
											if err = segnOutput.WriteInt(int32(k)); err != nil {
												break
											}
											if err = segnOutput.WriteStringSet(v); err != nil {
												break
											}
										}
									}
								}
							}
						}
					}
				}
			}
		}
		if err != nil {
			return
		}
		assert(si.Dir == directory)

		// If this segment is pre-4.x, perform a one-time "upgrade" to
		// write the .si file for it:
		if version := si.Version(); len(version) == 0 || !version.OnOrAfter(util.VERSION_4_0) {
			panic("not implemented yet")
		}
	}
	if err = segnOutput.WriteStringStringMap(sis.userData); err != nil {
		return
	}
	sis.pendingSegnOutput = segnOutput
	success = true
	return nil
}
Exemplo n.º 19
0
// Sole constructor
func newCompressingStoredFieldsReader(d store.Directory,
	si *model.SegmentInfo, segmentSuffix string,
	fn model.FieldInfos, ctx store.IOContext, formatName string,
	compressionMode compressing.CompressionMode) (r *CompressingStoredFieldsReader, err error) {

	r = &CompressingStoredFieldsReader{}
	r.compressionMode = compressionMode
	segment := si.Name
	r.fieldInfos = fn
	r.numDocs = si.DocCount()

	var indexStream store.IndexInput
	success := false
	defer func() {
		if !success {
			log.Println("Failed to initialize CompressionStoredFieldsReader.")
			if err != nil {
				log.Print(err)
			}
			util.Close(r, indexStream)
		}
	}()

	// Load the index into memory
	indexStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_INDEX_EXTENSION)
	indexStream, err = d.OpenInput(indexStreamFN, ctx)
	if err != nil {
		return nil, err
	}
	codecNameIdx := formatName + CODEC_SFX_IDX
	codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT)
	if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() {
		panic("assert fail")
	}
	r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si)
	if err != nil {
		return nil, err
	}
	err = indexStream.Close()
	if err != nil {
		return nil, err
	}
	indexStream = nil

	// Open the data file and read metadata
	fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, lucene40.FIELDS_EXTENSION)
	r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx)
	if err != nil {
		return nil, err
	}
	codecNameDat := formatName + CODEC_SFX_DAT
	codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT)
	if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() {
		panic("assert fail")
	}

	n, err := r.fieldsStream.ReadVInt()
	if err != nil {
		return nil, err
	}
	r.packedIntsVersion = int(n)
	r.decompressor = compressionMode.NewDecompressor()
	r.bytes = make([]byte, 0)

	success = true
	return r, nil
}
Exemplo n.º 20
0
func NewBlockTreeTermsReader(dir store.Directory,
	fieldInfos FieldInfos, info *SegmentInfo,
	postingsReader PostingsReaderBase, ctx store.IOContext,
	segmentSuffix string, indexDivisor int) (p FieldsProducer, err error) {

	// log.Print("Initializing BlockTreeTermsReader...")
	fp := &BlockTreeTermsReader{
		postingsReader: postingsReader,
		fields:         make(map[string]FieldReader),
		segment:        info.Name,
	}
	fp.in, err = dir.OpenInput(util.SegmentFileName(info.Name, segmentSuffix, TERMS_EXTENSION), ctx)
	if err != nil {
		return nil, err
	}

	success := false
	var indexIn store.IndexInput
	defer func() {
		if !success {
			fmt.Println("Failed to initialize BlockTreeTermsReader.")
			if err != nil {
				fmt.Println("DEBUG ", err)
			}
			// this.close() will close in:
			util.CloseWhileSuppressingError(indexIn, fp)
		}
	}()

	fp.version, err = fp.readHeader(fp.in)
	if err != nil {
		return nil, err
	}
	// log.Printf("Version: %v", fp.version)

	if indexDivisor != -1 {
		filename := util.SegmentFileName(info.Name, segmentSuffix, TERMS_INDEX_EXTENSION)
		indexIn, err = dir.OpenInput(filename, ctx)
		if err != nil {
			return nil, err
		}

		indexVersion, err := fp.readIndexHeader(indexIn)
		if err != nil {
			return nil, err
		}
		// log.Printf("Index version: %v", indexVersion)
		if int(indexVersion) != fp.version {
			return nil, errors.New(fmt.Sprintf("mixmatched version files: %v=%v,%v=%v", fp.in, fp.version, indexIn, indexVersion))
		}
	}

	// verify
	if indexIn != nil && fp.version >= TERMS_VERSION_CURRENT {
		if _, err = store.ChecksumEntireFile(indexIn); err != nil {
			return nil, err
		}
	}

	// Have PostingsReader init itself
	postingsReader.Init(fp.in)

	if fp.version >= TERMS_VERSION_CHECKSUM {
		// NOTE: data file is too costly to verify checksum against all the
		// bytes on open, but for now we at least verify proper structure
		// of the checksum footer: which looks for FOOTER_MAGIC +
		// algorithmID. This is cheap and can detect some forms of
		// corruption such as file trucation.
		if _, err = codec.RetrieveChecksum(fp.in); err != nil {
			return nil, err
		}
	}

	// Read per-field details
	fp.seekDir(fp.in, fp.dirOffset)
	if indexDivisor != -1 {
		fp.seekDir(indexIn, fp.indexDirOffset)
	}

	numFields, err := fp.in.ReadVInt()
	if err != nil {
		return nil, err
	}
	// log.Printf("Fields number: %v", numFields)
	if numFields < 0 {
		return nil, errors.New(fmt.Sprintf("invalid numFields: %v (resource=%v)", numFields, fp.in))
	}

	for i := int32(0); i < numFields; i++ {
		// log.Printf("Next field...")
		field, err := fp.in.ReadVInt()
		if err != nil {
			return nil, err
		}
		// log.Printf("Field: %v", field)

		numTerms, err := fp.in.ReadVLong()
		if err != nil {
			return nil, err
		}
		assert2(numTerms > 0,
			"Illegal numTerms for field number: %v (resource=%v)", field, fp.in)
		// log.Printf("Terms number: %v", numTerms)

		numBytes, err := fp.in.ReadVInt()
		if err != nil {
			return nil, err
		}
		assert2(numBytes >= 0,
			"invalid rootCode for field number: %v, numBytes=%v (resource=%v)",
			field, numBytes, fp.in)
		// log.Printf("Bytes number: %v", numBytes)

		rootCode := make([]byte, numBytes)
		err = fp.in.ReadBytes(rootCode)
		if err != nil {
			return nil, err
		}
		fieldInfo := fieldInfos.FieldInfoByNumber(int(field))
		assert2(fieldInfo != nil, "invalid field numebr: %v (resource=%v)", field, fp.in)
		var sumTotalTermFreq int64
		if fieldInfo.IndexOptions() == INDEX_OPT_DOCS_ONLY {
			sumTotalTermFreq = -1
		} else {
			sumTotalTermFreq, err = fp.in.ReadVLong()
			if err != nil {
				return nil, err
			}
		}
		sumDocFreq, err := fp.in.ReadVLong()
		if err != nil {
			return nil, err
		}
		var docCount int
		if docCount, err = asInt(fp.in.ReadVInt()); err != nil {
			return nil, err
		}
		// fmt.Printf("DocCount: %v\n", docCount)
		var longsSize int
		if fp.version >= TERMS_VERSION_META_ARRAY {
			if longsSize, err = asInt(fp.in.ReadVInt()); err != nil {
				return nil, err
			}
		}
		assert2(longsSize >= 0,
			"invalid longsSize for field: %v, longsSize=%v (resource=%v)",
			fieldInfo.Name, longsSize, fp.in)
		var minTerm, maxTerm []byte
		if fp.version >= TERMS_VERSION_MIN_MAX_TERMS {
			if minTerm, err = readBytesRef(fp.in); err != nil {
				return nil, err
			}
			if maxTerm, err = readBytesRef(fp.in); err != nil {
				return nil, err
			}
		}
		if docCount < 0 || int(docCount) > info.DocCount() { // #docs with field must be <= #docs
			return nil, errors.New(fmt.Sprintf(
				"invalid docCount: %v maxDoc: %v (resource=%v)",
				docCount, info.DocCount(), fp.in))
		}
		if sumDocFreq < int64(docCount) { // #postings must be >= #docs with field
			return nil, errors.New(fmt.Sprintf(
				"invalid sumDocFreq: %v docCount: %v (resource=%v)",
				sumDocFreq, docCount, fp.in))
		}
		if sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq { // #positions must be >= #postings
			return nil, errors.New(fmt.Sprintf(
				"invalid sumTotalTermFreq: %v sumDocFreq: %v (resource=%v)",
				sumTotalTermFreq, sumDocFreq, fp.in))
		}

		var indexStartFP int64
		if indexDivisor != -1 {
			if indexStartFP, err = indexIn.ReadVLong(); err != nil {
				return nil, err
			}
		}
		// log.Printf("indexStartFP: %v", indexStartFP)
		if _, ok := fp.fields[fieldInfo.Name]; ok {
			return nil, errors.New(fmt.Sprintf(
				"duplicate field: %v (resource=%v)", fieldInfo.Name, fp.in))
		}
		if fp.fields[fieldInfo.Name], err = newFieldReader(fp,
			fieldInfo, numTerms, rootCode, sumTotalTermFreq,
			sumDocFreq, docCount, indexStartFP, longsSize,
			indexIn, minTerm, maxTerm); err != nil {
			return nil, err
		}
	}

	if indexDivisor != -1 {
		if err = indexIn.Close(); err != nil {
			return nil, err
		}
	}

	success = true

	return fp, nil
}
Exemplo n.º 21
0
func (sis *SegmentInfos) finishCommit(dir store.Directory) (fileName string, err error) {
	assert(dir != nil)
	assert2(sis.pendingSegnOutput != nil, "prepareCommit was not called")
	if err = func() error {
		var success = false
		defer func() {
			if !success {
				// Closes pendingSegnOutput & delets partial segments_N:
				sis.rollbackCommit(dir)
			} else {
				err := func() error {
					var success = false
					defer func() {
						if !success {
							// Closes pendingSegnOutput & delets partial segments_N:
							sis.rollbackCommit(dir)
						} else {
							sis.pendingSegnOutput = nil
						}
					}()

					err := sis.pendingSegnOutput.Close()
					success = err == nil
					return err
				}()
				assertn(err == nil, "%v", err) // no shadow
			}
		}()

		if err := codec.WriteFooter(sis.pendingSegnOutput); err != nil {
			return err
		}
		success = true
		return nil
	}(); err != nil {
		return
	}

	// NOTE: if we crash here, we have left a segments_N file in the
	// directory in a possibly corrupt state (if some bytes made it to
	// stable storage and others didn't). But, the segments_N file
	// includes checksum at the end, which should catch this case. So
	// when a reader tries to read it, it will return an index corrupt
	// error, which should cause the retry logic in SegmentInfos to
	// kick in and load the last good (previous) segments_N-1 file.

	fileName = util.FileNameFromGeneration(INDEX_FILENAME_SEGMENTS, "", sis.generation)
	if err = func() error {
		var success = false
		defer func() {
			if !success {
				dir.DeleteFile(fileName)
				// suppress error so we keep returning the original error
			}
		}()

		err := dir.Sync([]string{fileName})
		success = err == nil
		return err
	}(); err != nil {
		return
	}

	sis.lastGeneration = sis.generation
	writeSegmentsGen(dir, sis.generation)
	return
}
Exemplo n.º 22
0
/*
Read a particular segmentFileName. Note that this may return IO error
if a commit is in process.
*/
func (sis *SegmentInfos) Read(directory store.Directory, segmentFileName string) error {
	log.Printf("Reading segment info from %v...", segmentFileName)
	success := false

	// Clear any previous segments:
	sis.Clear()

	sis.generation = GenerationFromSegmentsFileName(segmentFileName)
	sis.lastGeneration = sis.generation

	main, err := directory.OpenInput(segmentFileName, store.IO_CONTEXT_READ)
	if err != nil {
		return err
	}
	input := store.NewChecksumIndexInput(main)
	defer func() {
		if !success {
			// Clear any segment infos we had loaded so we
			// have a clean slate on retry:
			sis.Clear()
			util.CloseWhileSuppressingError(input)
		} else {
			input.Close()
		}
	}()

	format, err := input.ReadInt()
	if err != nil {
		return err
	}
	if format == codec.CODEC_MAGIC {
		// 4.0+
		_, err = codec.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_40)
		if err != nil {
			return err
		}
		sis.version, err = input.ReadLong()
		if err != nil {
			return err
		}
		sis.counter, err = asInt(input.ReadInt())
		if err != nil {
			return err
		}
		numSegments, err := asInt(input.ReadInt())
		if err != nil {
			return err
		}
		if numSegments < 0 {
			return errors.New(fmt.Sprintf("invalid segment count: %v (resource: %v)", numSegments, input))
		}
		for seg := 0; seg < numSegments; seg++ {
			segName, err := input.ReadString()
			if err != nil {
				return err
			}
			codecName, err := input.ReadString()
			if err != nil {
				return err
			}
			if codecName != "Lucene42" {
				log.Panicf("Not supported yet: %v", codecName)
			}
			fCodec := LoadCodec(codecName)
			log.Printf("SIS.read seg=%v codec=%v", seg, fCodec)
			info, err := fCodec.SegmentInfoFormat().SegmentInfoReader()(directory, segName, store.IO_CONTEXT_READ)
			// method := NewLucene42Codec()
			// info, err := method.ReadSegmentInfo(directory, segName, store.IO_CONTEXT_READ)
			if err != nil {
				return err
			}
			// info.codec = method
			info.SetCodec(fCodec)
			delGen, err := input.ReadLong()
			if err != nil {
				return err
			}
			delCount, err := asInt(input.ReadInt())
			if err != nil {
				return err
			}
			if delCount < 0 || delCount > info.DocCount() {
				return errors.New(fmt.Sprintf("invalid deletion count: %v (resource: %v)", delCount, input))
			}
			sis.Segments = append(sis.Segments, NewSegmentInfoPerCommit(info, delCount, delGen))
		}
		sis.userData, err = input.ReadStringStringMap()
		if err != nil {
			return err
		}
	} else {
		// TODO support <4.0 index
		panic("Index format pre-4.0 not supported yet")
	}

	checksumNow := int64(input.Checksum())
	checksumThen, err := input.ReadLong()
	if err != nil {
		return err
	}
	if checksumNow != checksumThen {
		return errors.New(fmt.Sprintf(
			"checksum mismatch in segments file: %v vs %v (resource: %v)",
			checksumNow, checksumThen, input))
	}

	success = true
	return nil
}