Example #1
0
func (db *DB) compactionError() {
	var (
		err     error
		wlocked bool
	)
noerr:
	// No error.
	for {
		select {
		case err = <-db.compErrSetC:
			switch {
			case err == nil:
			case errors.IsCorrupted(err):
				goto hasperr
			default:
				goto haserr
			}
		case _, _ = <-db.closeC:
			return
		}
	}
haserr:
	// Transient error.
	for {
		select {
		case db.compErrC <- err:
		case err = <-db.compErrSetC:
			switch {
			case err == nil:
				goto noerr
			case errors.IsCorrupted(err):
				goto hasperr
			default:
			}
		case _, _ = <-db.closeC:
			return
		}
	}
hasperr:
	// Persistent error.
	for {
		select {
		case db.compErrC <- err:
		case db.compPerErrC <- err:
		case db.writeLockC <- struct{}{}:
			// Hold write lock, so that write won't pass-through.
			wlocked = true
		case _, _ = <-db.closeC:
			if wlocked {
				// We should release the lock or Close will hang.
				<-db.writeLockC
			}
			return
		}
	}
}
Example #2
0
func (i *indexedIterator) dataErr() bool {
	if err := i.data.Error(); err != nil {
		if i.errf != nil {
			i.errf(err)
		}
		if i.strict || !errors.IsCorrupted(err) {
			i.err = err
			return true
		}
	}
	return false
}
Example #3
0
func (i *mergedIterator) iterErr(iter Iterator) bool {
	if err := iter.Error(); err != nil {
		if i.errf != nil {
			i.errf(err)
		}
		if i.strict || !errors.IsCorrupted(err) {
			i.err = err
			return true
		}
	}
	return false
}
Example #4
0
func (db *DB) recoverJournal() error {
	// Get all tables and sort it by file number.
	journalFiles_, err := db.s.getFiles(storage.TypeJournal)
	if err != nil {
		return err
	}
	journalFiles := files(journalFiles_)
	journalFiles.sort()

	// Discard older journal.
	prev := -1
	for i, file := range journalFiles {
		if file.Num() >= db.s.stJournalNum {
			if prev >= 0 {
				i--
				journalFiles[i] = journalFiles[prev]
			}
			journalFiles = journalFiles[i:]
			break
		} else if file.Num() == db.s.stPrevJournalNum {
			prev = i
		}
	}

	var jr *journal.Reader
	var of storage.File
	var mem *memdb.DB
	batch := new(Batch)
	cm := newCMem(db.s)
	buf := new(util.Buffer)
	// Options.
	strict := db.s.o.GetStrict(opt.StrictJournal)
	checksum := db.s.o.GetStrict(opt.StrictJournalChecksum)
	writeBuffer := db.s.o.GetWriteBuffer()
	recoverJournal := func(file storage.File) error {
		db.logf("journal@recovery recovering @%d", file.Num())
		reader, err := file.Open()
		if err != nil {
			return err
		}
		defer reader.Close()

		// Create/reset journal reader instance.
		if jr == nil {
			jr = journal.NewReader(reader, dropper{db.s, file}, strict, checksum)
		} else {
			jr.Reset(reader, dropper{db.s, file}, strict, checksum)
		}

		// Flush memdb and remove obsolete journal file.
		if of != nil {
			if mem.Len() > 0 {
				if err := cm.flush(mem, 0); err != nil {
					return err
				}
			}
			if err := cm.commit(file.Num(), db.seq); err != nil {
				return err
			}
			cm.reset()
			of.Remove()
			of = nil
		}

		// Replay journal to memdb.
		mem.Reset()
		for {
			r, err := jr.Next()
			if err != nil {
				if err == io.EOF {
					break
				}
				return errors.SetFile(err, file)
			}

			buf.Reset()
			if _, err := buf.ReadFrom(r); err != nil {
				if err == io.ErrUnexpectedEOF {
					// This is error returned due to corruption, with strict == false.
					continue
				} else {
					return errors.SetFile(err, file)
				}
			}
			if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mem); err != nil {
				if strict || !errors.IsCorrupted(err) {
					return errors.SetFile(err, file)
				} else {
					db.s.logf("journal error: %v (skipped)", err)
					// We won't apply sequence number as it might be corrupted.
					continue
				}
			}

			// Save sequence number.
			db.seq = batch.seq + uint64(batch.Len())

			// Flush it if large enough.
			if mem.Size() >= writeBuffer {
				if err := cm.flush(mem, 0); err != nil {
					return err
				}
				mem.Reset()
			}
		}

		of = file
		return nil
	}

	// Recover all journals.
	if len(journalFiles) > 0 {
		db.logf("journal@recovery F·%d", len(journalFiles))

		// Mark file number as used.
		db.s.markFileNum(journalFiles[len(journalFiles)-1].Num())

		mem = memdb.New(db.s.icmp, writeBuffer)
		for _, file := range journalFiles {
			if err := recoverJournal(file); err != nil {
				return err
			}
		}

		// Flush the last journal.
		if mem.Len() > 0 {
			if err := cm.flush(mem, 0); err != nil {
				return err
			}
		}
	}

	// Create a new journal.
	if _, err := db.newMem(0); err != nil {
		return err
	}

	// Commit.
	if err := cm.commit(db.journalFile.Num(), db.seq); err != nil {
		// Close journal.
		if db.journal != nil {
			db.journal.Close()
			db.journalWriter.Close()
		}
		return err
	}

	// Remove the last obsolete journal file.
	if of != nil {
		of.Remove()
	}

	return nil
}
Example #5
0
func recoverTable(s *session, o *opt.Options) error {
	o = dupOptions(o)
	// Mask StrictReader, lets StrictRecovery doing its job.
	o.Strict &= ^opt.StrictReader

	// Get all tables and sort it by file number.
	tableFiles_, err := s.getFiles(storage.TypeTable)
	if err != nil {
		return err
	}
	tableFiles := files(tableFiles_)
	tableFiles.sort()

	var (
		maxSeq                                                            uint64
		recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int

		// We will drop corrupted table.
		strict = o.GetStrict(opt.StrictRecovery)

		rec   = &sessionRecord{numLevel: o.GetNumLevel()}
		bpool = util.NewBufferPool(o.GetBlockSize() + 5)
	)
	buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) {
		tmp = s.newTemp()
		writer, err := tmp.Create()
		if err != nil {
			return
		}
		defer func() {
			writer.Close()
			if err != nil {
				tmp.Remove()
				tmp = nil
			}
		}()

		// Copy entries.
		tw := table.NewWriter(writer, o)
		for iter.Next() {
			key := iter.Key()
			if validIkey(key) {
				err = tw.Append(key, iter.Value())
				if err != nil {
					return
				}
			}
		}
		err = iter.Error()
		if err != nil {
			return
		}
		err = tw.Close()
		if err != nil {
			return
		}
		err = writer.Sync()
		if err != nil {
			return
		}
		size = int64(tw.BytesLen())
		return
	}
	recoverTable := func(file storage.File) error {
		s.logf("table@recovery recovering @%d", file.Num())
		reader, err := file.Open()
		if err != nil {
			return err
		}
		var closed bool
		defer func() {
			if !closed {
				reader.Close()
			}
		}()

		// Get file size.
		size, err := reader.Seek(0, 2)
		if err != nil {
			return err
		}

		var (
			tSeq                                     uint64
			tgoodKey, tcorruptedKey, tcorruptedBlock int
			imin, imax                               []byte
		)
		tr, err := table.NewReader(reader, size, storage.NewFileInfo(file), nil, bpool, o)
		if err != nil {
			return err
		}
		iter := tr.NewIterator(nil, nil)
		iter.(iterator.ErrorCallbackSetter).SetErrorCallback(func(err error) {
			if errors.IsCorrupted(err) {
				s.logf("table@recovery block corruption @%d %q", file.Num(), err)
				tcorruptedBlock++
			}
		})

		// Scan the table.
		for iter.Next() {
			key := iter.Key()
			_, seq, _, kerr := parseIkey(key)
			if kerr != nil {
				tcorruptedKey++
				continue
			}
			tgoodKey++
			if seq > tSeq {
				tSeq = seq
			}
			if imin == nil {
				imin = append([]byte{}, key...)
			}
			imax = append(imax[:0], key...)
		}
		if err := iter.Error(); err != nil {
			iter.Release()
			return err
		}
		iter.Release()

		goodKey += tgoodKey
		corruptedKey += tcorruptedKey
		corruptedBlock += tcorruptedBlock

		if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
			droppedTable++
			s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
			return nil
		}

		if tgoodKey > 0 {
			if tcorruptedKey > 0 || tcorruptedBlock > 0 {
				// Rebuild the table.
				s.logf("table@recovery rebuilding @%d", file.Num())
				iter := tr.NewIterator(nil, nil)
				tmp, newSize, err := buildTable(iter)
				iter.Release()
				if err != nil {
					return err
				}
				closed = true
				reader.Close()
				if err := file.Replace(tmp); err != nil {
					return err
				}
				size = newSize
			}
			if tSeq > maxSeq {
				maxSeq = tSeq
			}
			recoveredKey += tgoodKey
			// Add table to level 0.
			rec.addTable(0, file.Num(), uint64(size), imin, imax)
			s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
		} else {
			droppedTable++
			s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", file.Num(), tcorruptedKey, tcorruptedBlock, size)
		}

		return nil
	}

	// Recover all tables.
	if len(tableFiles) > 0 {
		s.logf("table@recovery F·%d", len(tableFiles))

		// Mark file number as used.
		s.markFileNum(tableFiles[len(tableFiles)-1].Num())

		for _, file := range tableFiles {
			if err := recoverTable(file); err != nil {
				return err
			}
		}

		s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(tableFiles), recoveredKey, goodKey, corruptedKey, maxSeq)
	}

	// Set sequence number.
	rec.setSeqNum(maxSeq)

	// Create new manifest.
	if err := s.create(); err != nil {
		return err
	}

	// Commit.
	return s.commit(rec)
}
Example #6
0
func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
	defer func() {
		if x := recover(); x != nil {
			if x == errCompactionTransactExiting {
				if err := t.revert(); err != nil {
					db.logf("%s revert error %q", name, err)
				}
			}
			panic(x)
		}
	}()

	const (
		backoffMin = 1 * time.Second
		backoffMax = 8 * time.Second
		backoffMul = 2 * time.Second
	)
	var (
		backoff  = backoffMin
		backoffT = time.NewTimer(backoff)
		lastCnt  = compactionTransactCounter(0)

		disableBackoff = db.s.o.GetDisableCompactionBackoff()
	)
	for n := 0; ; n++ {
		// Check wether the DB is closed.
		if db.isClosed() {
			db.logf("%s exiting", name)
			db.compactionExitTransact()
		} else if n > 0 {
			db.logf("%s retrying N·%d", name, n)
		}

		// Execute.
		cnt := compactionTransactCounter(0)
		err := t.run(&cnt)
		if err != nil {
			db.logf("%s error I·%d %q", name, cnt, err)
		}

		// Set compaction error status.
		select {
		case db.compErrSetC <- err:
		case perr := <-db.compPerErrC:
			if err != nil {
				db.logf("%s exiting (persistent error %q)", name, perr)
				db.compactionExitTransact()
			}
		case _, _ = <-db.closeC:
			db.logf("%s exiting", name)
			db.compactionExitTransact()
		}
		if err == nil {
			return
		}
		if errors.IsCorrupted(err) {
			db.logf("%s exiting (corruption detected)", name)
			db.compactionExitTransact()
		}

		if !disableBackoff {
			// Reset backoff duration if counter is advancing.
			if cnt > lastCnt {
				backoff = backoffMin
				lastCnt = cnt
			}

			// Backoff.
			backoffT.Reset(backoff)
			if backoff < backoffMax {
				backoff *= backoffMul
				if backoff > backoffMax {
					backoff = backoffMax
				}
			}
			select {
			case <-backoffT.C:
			case _, _ = <-db.closeC:
				db.logf("%s exiting", name)
				db.compactionExitTransact()
			}
		}
	}
}
Example #7
0
// Recover a database session; need external synchronization.
func (s *session) recover() (err error) {
	defer func() {
		if os.IsNotExist(err) {
			// Don't return os.ErrNotExist if the underlying storage contains
			// other files that belong to LevelDB. So the DB won't get trashed.
			if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 {
				err = &errors.ErrCorrupted{File: &storage.FileInfo{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
			}
		}
	}()

	m, err := s.stor.GetManifest()
	if err != nil {
		return
	}

	reader, err := m.Open()
	if err != nil {
		return
	}
	defer reader.Close()
	strict := s.o.GetStrict(opt.StrictManifest)
	jr := journal.NewReader(reader, dropper{s, m}, strict, true)

	staging := s.stVersion.newStaging()
	rec := &sessionRecord{numLevel: s.o.GetNumLevel()}
	for {
		var r io.Reader
		r, err = jr.Next()
		if err != nil {
			if err == io.EOF {
				err = nil
				break
			}
			return errors.SetFile(err, m)
		}

		err = rec.decode(r)
		if err == nil {
			// save compact pointers
			for _, r := range rec.compPtrs {
				s.stCompPtrs[r.level] = iKey(r.ikey)
			}
			// commit record to version staging
			staging.commit(rec)
		} else {
			err = errors.SetFile(err, m)
			if strict || !errors.IsCorrupted(err) {
				return
			} else {
				s.logf("manifest error: %v (skipped)", errors.SetFile(err, m))
			}
		}
		rec.resetCompPtrs()
		rec.resetAddedTables()
		rec.resetDeletedTables()
	}

	switch {
	case !rec.has(recComparer):
		return newErrManifestCorrupted(m, "comparer", "missing")
	case rec.comparer != s.icmp.uName():
		return newErrManifestCorrupted(m, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
	case !rec.has(recNextFileNum):
		return newErrManifestCorrupted(m, "next-file-num", "missing")
	case !rec.has(recJournalNum):
		return newErrManifestCorrupted(m, "journal-file-num", "missing")
	case !rec.has(recSeqNum):
		return newErrManifestCorrupted(m, "seq-num", "missing")
	}

	s.manifestFile = m
	s.setVersion(staging.finish())
	s.setNextFileNum(rec.nextFileNum)
	s.recordCommited(rec)
	return nil
}