// NewReader creates a new initialized table reader for the file. // The fi, cache and bpool is optional and can be nil. // // The returned table reader instance is goroutine-safe. func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) { if f == nil { return nil, errors.New("leveldb/table: nil file") } r := &Reader{ fd: fd, reader: f, cache: cache, bpool: bpool, o: o, cmp: o.GetComparer(), verifyChecksum: o.GetStrict(opt.StrictBlockChecksum), } if size < footerLen { r.err = r.newErrCorrupted(0, size, "table", "too small") return r, nil } footerPos := size - footerLen var footer [footerLen]byte if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF { return nil, err } if string(footer[footerLen-len(magic):footerLen]) != magic { r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number") return r, nil } var n int // Decode the metaindex block handle. r.metaBH, n = decodeBlockHandle(footer[:]) if n == 0 { r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle") return r, nil } // Decode the index block handle. r.indexBH, n = decodeBlockHandle(footer[n:]) if n == 0 { r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle") return r, nil } // Read metaindex block. metaBlock, err := r.readBlock(r.metaBH, true) if err != nil { if errors.IsCorrupted(err) { r.err = err return r, nil } else { return nil, err } } // Set data end. r.dataEnd = int64(r.metaBH.offset) // Read metaindex. metaIter := r.newBlockIter(metaBlock, nil, nil, true) for metaIter.Next() { key := string(metaIter.Key()) if !strings.HasPrefix(key, "filter.") { continue } fn := key[7:] if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn { r.filter = f0 } else { for _, f0 := range o.GetAltFilters() { if f0.Name() == fn { r.filter = f0 break } } } if r.filter != nil { filterBH, n := decodeBlockHandle(metaIter.Value()) if n == 0 { continue } r.filterBH = filterBH // Update data end. r.dataEnd = int64(filterBH.offset) break } } metaIter.Release() metaBlock.Release() // Cache index and filter block locally, since we don't have global cache. if cache == nil { r.indexBlock, err = r.readBlock(r.indexBH, true) if err != nil { if errors.IsCorrupted(err) { r.err = err return r, nil } else { return nil, err } } if r.filter != nil { r.filterBlock, err = r.readFilterBlock(r.filterBH) if err != nil { if !errors.IsCorrupted(err) { return nil, err } // Don't use filter then. r.filter = nil } } } return r, nil }
func recoverTable(s *session, o *opt.Options) error { o = dupOptions(o) // Mask StrictReader, lets StrictRecovery doing its job. o.Strict &= ^opt.StrictReader // Get all tables and sort it by file number. fds, err := s.stor.List(storage.TypeTable) if err != nil { return err } sortFds(fds) var ( maxSeq uint64 recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int // We will drop corrupted table. strict = o.GetStrict(opt.StrictRecovery) noSync = o.GetNoSync() rec = &sessionRecord{} bpool = util.NewBufferPool(o.GetBlockSize() + 5) ) buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) { tmpFd = s.newTemp() writer, err := s.stor.Create(tmpFd) if err != nil { return } defer func() { writer.Close() if err != nil { s.stor.Remove(tmpFd) tmpFd = storage.FileDesc{} } }() // Copy entries. tw := table.NewWriter(writer, o) for iter.Next() { key := iter.Key() if validInternalKey(key) { err = tw.Append(key, iter.Value()) if err != nil { return } } } err = iter.Error() if err != nil { return } err = tw.Close() if err != nil { return } if !noSync { err = writer.Sync() if err != nil { return } } size = int64(tw.BytesLen()) return } recoverTable := func(fd storage.FileDesc) error { s.logf("table@recovery recovering @%d", fd.Num) reader, err := s.stor.Open(fd) if err != nil { return err } var closed bool defer func() { if !closed { reader.Close() } }() // Get file size. size, err := reader.Seek(0, 2) if err != nil { return err } var ( tSeq uint64 tgoodKey, tcorruptedKey, tcorruptedBlock int imin, imax []byte ) tr, err := table.NewReader(reader, size, fd, nil, bpool, o) if err != nil { return err } iter := tr.NewIterator(nil, nil) if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok { itererr.SetErrorCallback(func(err error) { if errors.IsCorrupted(err) { s.logf("table@recovery block corruption @%d %q", fd.Num, err) tcorruptedBlock++ } }) } // Scan the table. for iter.Next() { key := iter.Key() _, seq, _, kerr := parseInternalKey(key) if kerr != nil { tcorruptedKey++ continue } tgoodKey++ if seq > tSeq { tSeq = seq } if imin == nil { imin = append([]byte{}, key...) } imax = append(imax[:0], key...) } if err := iter.Error(); err != nil { iter.Release() return err } iter.Release() goodKey += tgoodKey corruptedKey += tcorruptedKey corruptedBlock += tcorruptedBlock if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) { droppedTable++ s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) return nil } if tgoodKey > 0 { if tcorruptedKey > 0 || tcorruptedBlock > 0 { // Rebuild the table. s.logf("table@recovery rebuilding @%d", fd.Num) iter := tr.NewIterator(nil, nil) tmpFd, newSize, err := buildTable(iter) iter.Release() if err != nil { return err } closed = true reader.Close() if err := s.stor.Rename(tmpFd, fd); err != nil { return err } size = newSize } if tSeq > maxSeq { maxSeq = tSeq } recoveredKey += tgoodKey // Add table to level 0. rec.addTable(0, fd.Num, size, imin, imax) s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) } else { droppedTable++ s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size) } return nil } // Recover all tables. if len(fds) > 0 { s.logf("table@recovery F·%d", len(fds)) // Mark file number as used. s.markFileNum(fds[len(fds)-1].Num) for _, fd := range fds { if err := recoverTable(fd); err != nil { return err } } s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq) } // Set sequence number. rec.setSeqNum(maxSeq) // Create new manifest. if err := s.create(); err != nil { return err } // Commit. return s.commit(rec) }