func (db *DB) compactionError() { var ( err error wlocked bool ) noerr: // No error. for { select { case err = <-db.compErrSetC: switch { case err == nil: case errors.IsCorrupted(err): goto hasperr default: goto haserr } case _, _ = <-db.closeC: return } } haserr: // Transient error. for { select { case db.compErrC <- err: case err = <-db.compErrSetC: switch { case err == nil: goto noerr case errors.IsCorrupted(err): goto hasperr default: } case _, _ = <-db.closeC: return } } hasperr: // Persistent error. for { select { case db.compErrC <- err: case db.compPerErrC <- err: case db.writeLockC <- struct{}{}: // Hold write lock, so that write won't pass-through. wlocked = true case _, _ = <-db.closeC: if wlocked { // We should release the lock or Close will hang. <-db.writeLockC } return } } }
func (i *indexedIterator) dataErr() bool { if err := i.data.Error(); err != nil { if i.errf != nil { i.errf(err) } if i.strict || !errors.IsCorrupted(err) { i.err = err return true } } return false }
func (i *mergedIterator) iterErr(iter Iterator) bool { if err := iter.Error(); err != nil { if i.errf != nil { i.errf(err) } if i.strict || !errors.IsCorrupted(err) { i.err = err return true } } return false }
func (db *DB) recoverJournal() error { // Get all tables and sort it by file number. journalFiles_, err := db.s.getFiles(storage.TypeJournal) if err != nil { return err } journalFiles := files(journalFiles_) journalFiles.sort() // Discard older journal. prev := -1 for i, file := range journalFiles { if file.Num() >= db.s.stJournalNum { if prev >= 0 { i-- journalFiles[i] = journalFiles[prev] } journalFiles = journalFiles[i:] break } else if file.Num() == db.s.stPrevJournalNum { prev = i } } var jr *journal.Reader var of storage.File var mem *memdb.DB batch := new(Batch) cm := newCMem(db.s) buf := new(util.Buffer) // Options. strict := db.s.o.GetStrict(opt.StrictJournal) checksum := db.s.o.GetStrict(opt.StrictJournalChecksum) writeBuffer := db.s.o.GetWriteBuffer() recoverJournal := func(file storage.File) error { db.logf("journal@recovery recovering @%d", file.Num()) reader, err := file.Open() if err != nil { return err } defer reader.Close() // Create/reset journal reader instance. if jr == nil { jr = journal.NewReader(reader, dropper{db.s, file}, strict, checksum) } else { jr.Reset(reader, dropper{db.s, file}, strict, checksum) } // Flush memdb and remove obsolete journal file. if of != nil { if mem.Len() > 0 { if err := cm.flush(mem, 0); err != nil { return err } } if err := cm.commit(file.Num(), db.seq); err != nil { return err } cm.reset() of.Remove() of = nil } // Replay journal to memdb. mem.Reset() for { r, err := jr.Next() if err != nil { if err == io.EOF { break } return errors.SetFile(err, file) } buf.Reset() if _, err := buf.ReadFrom(r); err != nil { if err == io.ErrUnexpectedEOF { // This is error returned due to corruption, with strict == false. continue } else { return errors.SetFile(err, file) } } if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mem); err != nil { if strict || !errors.IsCorrupted(err) { return errors.SetFile(err, file) } else { db.s.logf("journal error: %v (skipped)", err) // We won't apply sequence number as it might be corrupted. continue } } // Save sequence number. db.seq = batch.seq + uint64(batch.Len()) // Flush it if large enough. if mem.Size() >= writeBuffer { if err := cm.flush(mem, 0); err != nil { return err } mem.Reset() } } of = file return nil } // Recover all journals. if len(journalFiles) > 0 { db.logf("journal@recovery F·%d", len(journalFiles)) // Mark file number as used. db.s.markFileNum(journalFiles[len(journalFiles)-1].Num()) mem = memdb.New(db.s.icmp, writeBuffer) for _, file := range journalFiles { if err := recoverJournal(file); err != nil { return err } } // Flush the last journal. if mem.Len() > 0 { if err := cm.flush(mem, 0); err != nil { return err } } } // Create a new journal. if _, err := db.newMem(0); err != nil { return err } // Commit. if err := cm.commit(db.journalFile.Num(), db.seq); err != nil { // Close journal. if db.journal != nil { db.journal.Close() db.journalWriter.Close() } return err } // Remove the last obsolete journal file. if of != nil { of.Remove() } return nil }
func recoverTable(s *session, o *opt.Options) error { o = dupOptions(o) // Mask StrictReader, lets StrictRecovery doing its job. o.Strict &= ^opt.StrictReader // Get all tables and sort it by file number. tableFiles_, err := s.getFiles(storage.TypeTable) if err != nil { return err } tableFiles := files(tableFiles_) tableFiles.sort() var ( maxSeq uint64 recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int // We will drop corrupted table. strict = o.GetStrict(opt.StrictRecovery) rec = &sessionRecord{numLevel: o.GetNumLevel()} bpool = util.NewBufferPool(o.GetBlockSize() + 5) ) buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) { tmp = s.newTemp() writer, err := tmp.Create() if err != nil { return } defer func() { writer.Close() if err != nil { tmp.Remove() tmp = nil } }() // Copy entries. tw := table.NewWriter(writer, o) for iter.Next() { key := iter.Key() if validIkey(key) { err = tw.Append(key, iter.Value()) if err != nil { return } } } err = iter.Error() if err != nil { return } err = tw.Close() if err != nil { return } err = writer.Sync() if err != nil { return } size = int64(tw.BytesLen()) return } recoverTable := func(file storage.File) error { s.logf("table@recovery recovering @%d", file.Num()) reader, err := file.Open() if err != nil { return err } var closed bool defer func() { if !closed { reader.Close() } }() // Get file size. size, err := reader.Seek(0, 2) if err != nil { return err } var ( tSeq uint64 tgoodKey, tcorruptedKey, tcorruptedBlock int imin, imax []byte ) tr, err := table.NewReader(reader, size, storage.NewFileInfo(file), nil, bpool, o) if err != nil { return err } iter := tr.NewIterator(nil, nil) iter.(iterator.ErrorCallbackSetter).SetErrorCallback(func(err error) { if errors.IsCorrupted(err) { s.logf("table@recovery block corruption @%d %q", file.Num(), err) tcorruptedBlock++ } }) // Scan the table. for iter.Next() { key := iter.Key() _, seq, _, kerr := parseIkey(key) if kerr != nil { tcorruptedKey++ continue } tgoodKey++ if seq > tSeq { tSeq = seq } if imin == nil { imin = append([]byte{}, key...) } imax = append(imax[:0], key...) } if err := iter.Error(); err != nil { iter.Release() return err } iter.Release() goodKey += tgoodKey corruptedKey += tcorruptedKey corruptedBlock += tcorruptedBlock if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) { droppedTable++ s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) return nil } if tgoodKey > 0 { if tcorruptedKey > 0 || tcorruptedBlock > 0 { // Rebuild the table. s.logf("table@recovery rebuilding @%d", file.Num()) iter := tr.NewIterator(nil, nil) tmp, newSize, err := buildTable(iter) iter.Release() if err != nil { return err } closed = true reader.Close() if err := file.Replace(tmp); err != nil { return err } size = newSize } if tSeq > maxSeq { maxSeq = tSeq } recoveredKey += tgoodKey // Add table to level 0. rec.addTable(0, file.Num(), uint64(size), imin, imax) s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) } else { droppedTable++ s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", file.Num(), tcorruptedKey, tcorruptedBlock, size) } return nil } // Recover all tables. if len(tableFiles) > 0 { s.logf("table@recovery F·%d", len(tableFiles)) // Mark file number as used. s.markFileNum(tableFiles[len(tableFiles)-1].Num()) for _, file := range tableFiles { if err := recoverTable(file); err != nil { return err } } s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(tableFiles), recoveredKey, goodKey, corruptedKey, maxSeq) } // Set sequence number. rec.setSeqNum(maxSeq) // Create new manifest. if err := s.create(); err != nil { return err } // Commit. return s.commit(rec) }
func (db *DB) compactionTransact(name string, t compactionTransactInterface) { defer func() { if x := recover(); x != nil { if x == errCompactionTransactExiting { if err := t.revert(); err != nil { db.logf("%s revert error %q", name, err) } } panic(x) } }() const ( backoffMin = 1 * time.Second backoffMax = 8 * time.Second backoffMul = 2 * time.Second ) var ( backoff = backoffMin backoffT = time.NewTimer(backoff) lastCnt = compactionTransactCounter(0) disableBackoff = db.s.o.GetDisableCompactionBackoff() ) for n := 0; ; n++ { // Check wether the DB is closed. if db.isClosed() { db.logf("%s exiting", name) db.compactionExitTransact() } else if n > 0 { db.logf("%s retrying N·%d", name, n) } // Execute. cnt := compactionTransactCounter(0) err := t.run(&cnt) if err != nil { db.logf("%s error I·%d %q", name, cnt, err) } // Set compaction error status. select { case db.compErrSetC <- err: case perr := <-db.compPerErrC: if err != nil { db.logf("%s exiting (persistent error %q)", name, perr) db.compactionExitTransact() } case _, _ = <-db.closeC: db.logf("%s exiting", name) db.compactionExitTransact() } if err == nil { return } if errors.IsCorrupted(err) { db.logf("%s exiting (corruption detected)", name) db.compactionExitTransact() } if !disableBackoff { // Reset backoff duration if counter is advancing. if cnt > lastCnt { backoff = backoffMin lastCnt = cnt } // Backoff. backoffT.Reset(backoff) if backoff < backoffMax { backoff *= backoffMul if backoff > backoffMax { backoff = backoffMax } } select { case <-backoffT.C: case _, _ = <-db.closeC: db.logf("%s exiting", name) db.compactionExitTransact() } } } }
// Recover a database session; need external synchronization. func (s *session) recover() (err error) { defer func() { if os.IsNotExist(err) { // Don't return os.ErrNotExist if the underlying storage contains // other files that belong to LevelDB. So the DB won't get trashed. if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 { err = &errors.ErrCorrupted{File: &storage.FileInfo{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}} } } }() m, err := s.stor.GetManifest() if err != nil { return } reader, err := m.Open() if err != nil { return } defer reader.Close() strict := s.o.GetStrict(opt.StrictManifest) jr := journal.NewReader(reader, dropper{s, m}, strict, true) staging := s.stVersion.newStaging() rec := &sessionRecord{numLevel: s.o.GetNumLevel()} for { var r io.Reader r, err = jr.Next() if err != nil { if err == io.EOF { err = nil break } return errors.SetFile(err, m) } err = rec.decode(r) if err == nil { // save compact pointers for _, r := range rec.compPtrs { s.stCompPtrs[r.level] = iKey(r.ikey) } // commit record to version staging staging.commit(rec) } else { err = errors.SetFile(err, m) if strict || !errors.IsCorrupted(err) { return } else { s.logf("manifest error: %v (skipped)", errors.SetFile(err, m)) } } rec.resetCompPtrs() rec.resetAddedTables() rec.resetDeletedTables() } switch { case !rec.has(recComparer): return newErrManifestCorrupted(m, "comparer", "missing") case rec.comparer != s.icmp.uName(): return newErrManifestCorrupted(m, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer)) case !rec.has(recNextFileNum): return newErrManifestCorrupted(m, "next-file-num", "missing") case !rec.has(recJournalNum): return newErrManifestCorrupted(m, "journal-file-num", "missing") case !rec.has(recSeqNum): return newErrManifestCorrupted(m, "seq-num", "missing") } s.manifestFile = m s.setVersion(staging.finish()) s.setNextFileNum(rec.nextFileNum) s.recordCommited(rec) return nil }