func (t *tOps) lookup(f *tFile) (c cache.Object, err error) { num := f.file.Num() c, ok := t.cacheNS.Get(num, func() (ok bool, value interface{}, charge int, fin cache.SetFin) { var r storage.Reader r, err = f.file.Open() if err != nil { return } o := t.s.o var cacheNS cache.Namespace if bc := o.GetBlockCache(); bc != nil { cacheNS = bc.GetNamespace(num) } ok = true value = table.NewReader(r, int64(f.size), cacheNS, o) charge = 1 fin = func() { r.Close() } return }) if !ok && err == nil { err = ErrClosed } return }
// Opens table. It returns a cache handle, which should // be released after use. func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) { ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) { var r storage.Reader r, err = t.s.stor.Open(f.fd) if err != nil { return 0, nil } var bcache *cache.NamespaceGetter if t.bcache != nil { bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)} } var tr *table.Reader tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options) if err != nil { r.Close() return 0, nil } return 1, tr }) if ch == nil && err == nil { err = ErrClosed } return }
// Opens table. It returns a cache handle, which should // be released after use. func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) { num := f.file.Num() ch = t.cache.Get(0, num, func() (size int, value cache.Value) { var r storage.Reader r, err = f.file.Open() if err != nil { return 0, nil } var bcache *cache.CacheGetter if t.bcache != nil { bcache = &cache.CacheGetter{Cache: t.bcache, NS: num} } var tr *table.Reader tr, err = table.NewReader(r, int64(f.size), storage.NewFileInfo(f.file), bcache, t.bpool, t.s.o.Options) if err != nil { r.Close() return 0, nil } return 1, tr }) if ch == nil && err == nil { err = ErrClosed } return }
// Opens table. It returns a cache handle, which should // be released after use. func (t *tOps) open(f *tFile) (ch cache.Handle, err error) { t.mu.Lock() defer t.mu.Unlock() num := f.file.Num() ch = t.cacheNS.Get(num, func() (charge int, value interface{}) { var r storage.Reader r, err = f.file.Open() if err != nil { return 0, nil } var bcacheNS cache.Namespace if bc := t.s.o.GetBlockCache(); bc != nil { bcacheNS = bc.GetNamespace(num) } return 1, &trWrapper{table.NewReader(r, int64(f.size), bcacheNS, t.bpool, t.s.o), t, 0} }) if ch == nil && err == nil { err = ErrClosed } ch.Value().(*trWrapper).ref++ ch = &trCacheHandleWrapper{ch, t, false} return }
// Opens table. It returns a cache handle, which should // be released after use. func (t *tOps) open(f *tFile) (ch cache.Handle, err error) { num := f.file.Num() ch = t.cacheNS.Get(num, func() (charge int, value interface{}) { var r storage.Reader r, err = f.file.Open() if err != nil { return 0, nil } var bcacheNS cache.Namespace if bc := t.s.o.GetBlockCache(); bc != nil { bcacheNS = bc.GetNamespace(num) } var tr *table.Reader tr, err = table.NewReader(r, int64(f.size), storage.NewFileInfo(f.file), bcacheNS, t.bpool, t.s.o.Options) if err != nil { r.Close() return 0, nil } return 1, tr }) if ch == nil && err == nil { err = ErrClosed } return }
func (tc *stConstructor_Table) finish() (size int, err error) { err = tc.writer.Close() if err != nil { return } tc.t.Logf("table: contains %d entries and %d blocks", tc.writer.EntriesLen(), tc.writer.BlocksLen()) size = tc.buf.Len() if csize := int(tc.writer.BytesLen()); csize != size { tc.t.Errorf("table: invalid calculated size, calculated=%d actual=%d", csize, size) } tc.reader = table.NewReader(bytes.NewReader(tc.buf.Bytes()), int64(size), nil, tc.o) return }
func (p *stConstructor_Table) finish() (size int, err error) { p.t.Logf("table: contains %d entries and %d blocks", p.tw.Len(), p.tw.CountBlock()) err = p.tw.Finish() if err != nil { return } p.w.Close() tsize := uint64(p.tw.Size()) fsize, _ := p.file.Size() if fsize != tsize { p.t.Errorf("table: calculated size doesn't equal with actual size, calculated=%d actual=%d", tsize, fsize) } p.r, _ = p.file.Open() o := &opt.Options{ BlockRestartInterval: 3, Filter: filter.NewBloomFilter(10), } p.tr, err = table.NewReader(p.r, fsize, o, nil) return int(fsize), nil }
func (t *tOps) lookup(f *tFile) (c cache.Object, err error) { num := f.file.Num() c, _ = t.cachens.Get(num, func() (ok bool, value interface{}, charge int, fin func()) { var r storage.Reader r, err = f.file.Open() if err != nil { return } o := t.s.o var ns cache.Namespace bc := o.GetBlockCache() if bc != nil { ns = bc.GetNamespace(num) } var p *table.Reader p, err = table.NewReader(r, f.size, t.s.o, ns) if err != nil { return } ok = true value = p charge = 1 fin = func() { r.Close() } return }) return }
func recoverTable(s *session, o *opt.Options) error { // Get all tables and sort it by file number. tableFiles_, err := s.getFiles(storage.TypeTable) if err != nil { return err } tableFiles := files(tableFiles_) tableFiles.sort() var mSeq uint64 var good, corrupted int rec := new(sessionRecord) bpool := util.NewBufferPool(o.GetBlockSize() + 5) buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) { tmp = s.newTemp() writer, err := tmp.Create() if err != nil { return } defer func() { writer.Close() if err != nil { tmp.Remove() tmp = nil } }() // Copy entries. tw := table.NewWriter(writer, o) for iter.Next() { key := iter.Key() if validIkey(key) { err = tw.Append(key, iter.Value()) if err != nil { return } } } err = iter.Error() if err != nil { return } err = tw.Close() if err != nil { return } err = writer.Sync() if err != nil { return } size = int64(tw.BytesLen()) return } recoverTable := func(file storage.File) error { s.logf("table@recovery recovering @%d", file.Num()) reader, err := file.Open() if err != nil { return err } defer reader.Close() // Get file size. size, err := reader.Seek(0, 2) if err != nil { return err } var tSeq uint64 var tgood, tcorrupted, blockerr int var imin, imax []byte tr := table.NewReader(reader, size, nil, bpool, o) iter := tr.NewIterator(nil, nil) iter.(iterator.ErrorCallbackSetter).SetErrorCallback(func(err error) { s.logf("table@recovery found error @%d %q", file.Num(), err) blockerr++ }) // Scan the table. for iter.Next() { key := iter.Key() _, seq, _, ok := parseIkey(key) if !ok { tcorrupted++ continue } tgood++ if seq > tSeq { tSeq = seq } if imin == nil { imin = append([]byte{}, key...) } imax = append(imax[:0], key...) } if err := iter.Error(); err != nil { iter.Release() return err } iter.Release() if tgood > 0 { if tcorrupted > 0 || blockerr > 0 { // Rebuild the table. s.logf("table@recovery rebuilding @%d", file.Num()) iter := tr.NewIterator(nil, nil) tmp, newSize, err := buildTable(iter) iter.Release() if err != nil { return err } reader.Close() if err := file.Replace(tmp); err != nil { return err } size = newSize } if tSeq > mSeq { mSeq = tSeq } // Add table to level 0. rec.addTable(0, file.Num(), uint64(size), imin, imax) s.logf("table@recovery recovered @%d N·%d C·%d B·%d S·%d Q·%d", file.Num(), tgood, tcorrupted, blockerr, size, tSeq) } else { s.logf("table@recovery unrecoverable @%d C·%d B·%d S·%d", file.Num(), tcorrupted, blockerr, size) } good += tgood corrupted += tcorrupted return nil } // Recover all tables. if len(tableFiles) > 0 { s.logf("table@recovery F·%d", len(tableFiles)) // Mark file number as used. s.markFileNum(tableFiles[len(tableFiles)-1].Num()) for _, file := range tableFiles { if err := recoverTable(file); err != nil { return err } } s.logf("table@recovery recovered F·%d N·%d C·%d Q·%d", len(tableFiles), good, corrupted, mSeq) } // Set sequence number. rec.setSeq(mSeq + 1) // Create new manifest. if err := s.create(); err != nil { return err } // Commit. return s.commit(rec) }
func (ts *testingStorage) scanTable(fd storage.FileDesc, checksum bool) (corrupted bool) { r, err := ts.Open(fd) if err != nil { log.Fatal(err) } defer r.Close() size, err := r.Seek(0, os.SEEK_END) if err != nil { log.Fatal(err) } o := &opt.Options{ DisableLargeBatchTransaction: true, Strict: opt.NoStrict, } if checksum { o.Strict = opt.StrictBlockChecksum | opt.StrictReader } tr, err := table.NewReader(r, size, fd, nil, bpool, o) if err != nil { log.Fatal(err) } defer tr.Release() checkData := func(i int, t string, data []byte) bool { if len(data) == 0 { panic(fmt.Sprintf("[%v] nil data: i=%d t=%s", fd, i, t)) } checksum0, checksum1 := dataChecksum(data) if checksum0 != checksum1 { atomic.StoreUint32(&fail, 1) atomic.StoreUint32(&done, 1) corrupted = true data0, data1 := dataSplit(data) data0c0, data0c1 := dataChecksum(data0) data1c0, data1c1 := dataChecksum(data1) log.Printf("FATAL: [%v] Corrupted data i=%d t=%s (%#x != %#x): %x(%v) vs %x(%v)", fd, i, t, checksum0, checksum1, data0, data0c0 == data0c1, data1, data1c0 == data1c1) return true } return false } iter := tr.NewIterator(nil, nil) defer iter.Release() for i := 0; iter.Next(); i++ { ukey, _, kt, kerr := parseIkey(iter.Key()) if kerr != nil { atomic.StoreUint32(&fail, 1) atomic.StoreUint32(&done, 1) corrupted = true log.Printf("FATAL: [%v] Corrupted ikey i=%d: %v", fd, i, kerr) return } if checkData(i, "key", ukey) { return } if kt == ktVal && checkData(i, "value", iter.Value()) { return } } if err := iter.Error(); err != nil { if errors.IsCorrupted(err) { atomic.StoreUint32(&fail, 1) atomic.StoreUint32(&done, 1) corrupted = true log.Printf("FATAL: [%v] Corruption detected: %v", fd, err) } else { log.Fatal(err) } } return }
func recoverTable(s *session, o *opt.Options) error { o = dupOptions(o) // Mask StrictReader, lets StrictRecovery doing its job. o.Strict &= ^opt.StrictReader // Get all tables and sort it by file number. tableFiles_, err := s.getFiles(storage.TypeTable) if err != nil { return err } tableFiles := files(tableFiles_) tableFiles.sort() var ( maxSeq uint64 recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int // We will drop corrupted table. strict = o.GetStrict(opt.StrictRecovery) rec = &sessionRecord{numLevel: o.GetNumLevel()} bpool = util.NewBufferPool(o.GetBlockSize() + 5) ) buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) { tmp = s.newTemp() writer, err := tmp.Create() if err != nil { return } defer func() { writer.Close() if err != nil { tmp.Remove() tmp = nil } }() // Copy entries. tw := table.NewWriter(writer, o) for iter.Next() { key := iter.Key() if validIkey(key) { err = tw.Append(key, iter.Value()) if err != nil { return } } } err = iter.Error() if err != nil { return } err = tw.Close() if err != nil { return } err = writer.Sync() if err != nil { return } size = int64(tw.BytesLen()) return } recoverTable := func(file storage.File) error { s.logf("table@recovery recovering @%d", file.Num()) reader, err := file.Open() if err != nil { return err } var closed bool defer func() { if !closed { reader.Close() } }() // Get file size. size, err := reader.Seek(0, 2) if err != nil { return err } var ( tSeq uint64 tgoodKey, tcorruptedKey, tcorruptedBlock int imin, imax []byte ) tr, err := table.NewReader(reader, size, storage.NewFileInfo(file), nil, bpool, o) if err != nil { return err } iter := tr.NewIterator(nil, nil) iter.(iterator.ErrorCallbackSetter).SetErrorCallback(func(err error) { if errors.IsCorrupted(err) { s.logf("table@recovery block corruption @%d %q", file.Num(), err) tcorruptedBlock++ } }) // Scan the table. for iter.Next() { key := iter.Key() _, seq, _, kerr := parseIkey(key) if kerr != nil { tcorruptedKey++ continue } tgoodKey++ if seq > tSeq { tSeq = seq } if imin == nil { imin = append([]byte{}, key...) } imax = append(imax[:0], key...) } if err := iter.Error(); err != nil { iter.Release() return err } iter.Release() goodKey += tgoodKey corruptedKey += tcorruptedKey corruptedBlock += tcorruptedBlock if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) { droppedTable++ s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) return nil } if tgoodKey > 0 { if tcorruptedKey > 0 || tcorruptedBlock > 0 { // Rebuild the table. s.logf("table@recovery rebuilding @%d", file.Num()) iter := tr.NewIterator(nil, nil) tmp, newSize, err := buildTable(iter) iter.Release() if err != nil { return err } closed = true reader.Close() if err := file.Replace(tmp); err != nil { return err } size = newSize } if tSeq > maxSeq { maxSeq = tSeq } recoveredKey += tgoodKey // Add table to level 0. rec.addTable(0, file.Num(), uint64(size), imin, imax) s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) } else { droppedTable++ s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", file.Num(), tcorruptedKey, tcorruptedBlock, size) } return nil } // Recover all tables. if len(tableFiles) > 0 { s.logf("table@recovery F·%d", len(tableFiles)) // Mark file number as used. s.markFileNum(tableFiles[len(tableFiles)-1].Num()) for _, file := range tableFiles { if err := recoverTable(file); err != nil { return err } } s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(tableFiles), recoveredKey, goodKey, corruptedKey, maxSeq) } // Set sequence number. rec.setSeqNum(maxSeq) // Create new manifest. if err := s.create(); err != nil { return err } // Commit. return s.commit(rec) }