func (f *bitFiler) WriteAt(b []byte, off int64) (n int, err error) { off0 := off pgI := off >> bfBits pgO := int(off & bfMask) n = len(b) rem := n var nc int for rem != 0 { pg := f.m[pgI] if pg == nil { pg = &bitPage{} if f.parent != nil { _, err = f.parent.ReadAt(pg.data[:], off&^bfMask) if err != nil && !fileutil.IsEOF(err) { return } err = nil } f.m[pgI] = pg } nc = copy(pg.data[pgO:], b) pgI++ pg.dirty = true for i := pgO; i < pgO+nc; i++ { pg.flags[i>>3] |= bitmask[i&7] } pgO = 0 rem -= nc b = b[nc:] off += int64(nc) } f.size = mathutil.MaxInt64(f.size, off0+int64(n)) return }
func (f *bitFiler) ReadAt(b []byte, off int64) (n int, err error) { avail := f.size - off pgI := off >> bfBits pgO := int(off & bfMask) rem := len(b) if int64(rem) >= avail { rem = int(avail) err = io.EOF } for rem != 0 && avail > 0 { pg := f.m[pgI] if pg == nil { pg = &bitPage{} if f.parent != nil { _, err = f.parent.ReadAt(pg.data[:], off&^bfMask) if err != nil && !fileutil.IsEOF(err) { return } err = nil } f.m[pgI] = pg } nc := copy(b[:mathutil.Min(rem, bfSize)], pg.data[pgO:]) pgI++ pgO = 0 rem -= nc n += nc b = b[nc:] off += int64(nc) } return }
// WriteTo is a helper to copy/persist File's content to w. If w is also // an io.WriterAt then WriteTo may attempt to _not_ write any big, for some // value of big, runs of zeros, i.e. it will attempt to punch holes, where // possible, in `w` if that happens to be a freshly created or to zero length // truncated OS file. 'n' reports the number of bytes written to 'w'. func (f *File) WriteTo(w io.Writer) (n int64, err error) { var ( b [pgSize]byte wn, rn int off int64 rerr error ) if wa, ok := w.(io.WriterAt); ok { fsize, err := f.Size() if err != nil { return n, err } lastPgI := fsize >> pgBits for pgI := int64(0); pgI <= lastPgI; pgI++ { sz := pgSize if pgI == lastPgI { sz = int(fsize & pgMask) } v, err := (*Array)(f).Get(pgI) if err != nil { return n, err } pg, _ := v.([]byte) if len(pg) != 0 { wn, err = wa.WriteAt(pg[:sz], off) if err != nil { return n, err } n += int64(wn) off += int64(sz) if wn != sz { return n, io.ErrShortWrite } } } return n, err } var werr error for rerr == nil { if rn, rerr = f.ReadAt(b[:], off); rn != 0 { off += int64(rn) if wn, werr = w.Write(b[:rn]); werr != nil { return n, werr } n += int64(wn) } } if !fileutil.IsEOF(rerr) { err = rerr } return }
// Dump outputs a human readable dump of t to w. It is usable iff t keys and // values are encoded scalars (see EncodeScalars). Intended use is only for // examples or debugging. Some type information is lost in the rendering, for // example a float value '17.' and an integer value '17' may both output as // '17'. func (t *BTree) Dump(w io.Writer) (err error) { enum, err := t.seekFirst() if err != nil { return } for { bkey, bval, err := enum.current() if err != nil { return err } key, err := DecodeScalars(bkey) if err != nil { return err } val, err := DecodeScalars(bval) if err != nil { return err } kk := []string{} if key == nil { kk = []string{"null"} } for _, v := range key { kk = append(kk, elem(v)) } vv := []string{} if val == nil { vv = []string{"null"} } for _, v := range val { vv = append(vv, elem(v)) } skey := strings.Join(kk, ", ") sval := strings.Join(vv, ", ") if len(vv) > 1 { sval = fmt.Sprintf("[]interface{%s}", sval) } if _, err = fmt.Fprintf(w, "%s → %s\n", skey, sval); err != nil { return err } err = enum.next() if err != nil { if fileutil.IsEOF(err) { err = nil break } return err } } return }
// WriteTo is a helper to copy/persist MemFiler's content to w. If w is also // an io.WriterAt then WriteTo may attempt to _not_ write any big, for some // value of big, runs of zeros, i.e. it will attempt to punch holes, where // possible, in `w` if that happens to be a freshly created or to zero length // truncated OS file. 'n' reports the number of bytes written to 'w'. func (f *MemFiler) WriteTo(w io.Writer) (n int64, err error) { var ( b [pgSize]byte wn, rn int off int64 rerr error ) if wa, ok := w.(io.WriterAt); ok { lastPgI := f.size >> pgBits for pgI := int64(0); pgI <= lastPgI; pgI++ { sz := pgSize if pgI == lastPgI { sz = int(f.size & pgMask) } pg := f.m[pgI] if pg != nil { wn, err = wa.WriteAt(pg[:sz], off) if err != nil { return } n += int64(wn) off += int64(sz) if wn != sz { return n, io.ErrShortWrite } } } return } var werr error for rerr == nil { if rn, rerr = f.ReadAt(b[:], off); rn != 0 { off += int64(rn) if wn, werr = w.Write(b[:rn]); werr != nil { return n, werr } n += int64(wn) } } if !fileutil.IsEOF(rerr) { err = rerr } return }
// ReadFrom is a helper to populate File's content from r. 'n' reports the // number of bytes read from 'r'. func (f *File) ReadFrom(r io.Reader) (n int64, err error) { if err = f.Truncate(0); err != nil { return } var ( b [pgSize]byte rn int off int64 ) var rerr error for rerr == nil { if rn, rerr = r.Read(b[:]); rn != 0 { f.WriteAt(b[:rn], off) off += int64(rn) n += int64(rn) } } if !fileutil.IsEOF(rerr) { err = rerr } return }
func TestbTreePrev(t *testing.T) { N := int64(*testN) tree := NewBTree(nil) enum, _, err := tree.seek(enc8(0)) if err != nil { t.Fatal(err) } if _, _, err = enum.current(); !fileutil.IsEOF(err) { t.Fatal(err) } if err = enum.next(); !fileutil.IsEOF(err) { t.Fatal(err) } if err = enum.prev(); !fileutil.IsEOF(err) { t.Fatal(err) } // Fill for i := int64(1); i <= N; i++ { tree.Set(enc8(10*i), enc8(10*i+1)) } var eq bool enum, eq, err = tree.seek(enc8(0)) if err != nil { t.Fatal(err) } if eq { t.Fatal(eq) } // index: 0 if _, _, err = enum.current(); err != nil { t.Fatal(err) } if err = enum.prev(); !fileutil.IsEOF(err) { t.Fatal(err) } enum, eq, err = tree.seek(enc8(N * 10)) if err != nil { t.Fatal(err) } if !eq { t.Fatal(eq) } // index: N-1 if _, _, err = enum.current(); err != nil { t.Fatal(err) } if err = enum.prev(); N > 1 && err != nil { t.Fatal(err) } enum, eq, err = tree.seek(enc8(N*10 + 1)) if err != nil { t.Fatal(err) } if eq { t.Fatal(eq) } // index: N if _, _, err = enum.current(); !fileutil.IsEOF(err) { t.Fatal(err) } if err = enum.prev(); err != nil { t.Fatal(err) } enum, _, err = tree.seek(enc8(N * 10)) if err != nil { t.Fatal(err) } for i := N; i >= 1; i-- { expKey, expValue := enc8(10*i), enc8(10*i+1) k, v, err := enum.current() if err != nil { t.Fatal(err) } if !bytes.Equal(k, expKey) { t.Fatalf("%d k|% x| expK|% x| %s\n", i, k, expKey, tree.root.String(tree.store)) } if !bytes.Equal(v, expValue) { t.Fatal(i) } switch { case i == 1: if err := enum.prev(); !fileutil.IsEOF(err) { t.Fatal(err) } default: if err := enum.prev(); err != nil { t.Fatal(i, err) } } } }
func TestBTreeSeekPrev(t *testing.T) { // seeking within 3 keys: 10, 20, 30 table := []struct { k int hit bool keys []int }{ {5, false, []int{10}}, {10, true, []int{10}}, {15, false, []int{20, 10}}, {20, true, []int{20, 10}}, {25, false, []int{30, 20, 10}}, {30, true, []int{30, 20, 10}}, {35, false, []int{}}, } for i, test := range table { down := test.keys db := NewBTree(nil) if err := db.Set(n2b(10), n2b(100)); err != nil { t.Fatal(i, err) } if err := db.Set(n2b(20), n2b(200)); err != nil { t.Fatal(i, err) } if err := db.Set(n2b(30), n2b(300)); err != nil { t.Fatal(i, err) } for brokenSerial := 0; brokenSerial < 16; brokenSerial++ { en, hit, err := db.Seek(n2b(test.k)) if err != nil { t.Fatal(err) } if g, e := hit, test.hit; g != e { t.Fatal(i, g, e) } j := 0 for { if brokenSerial&(1<<uint(j)) != 0 { if err := db.Set(n2b(20), n2b(200)); err != nil { t.Fatal(i, err) } } k, v, err := en.Prev() if err != nil { if !fileutil.IsEOF(err) { t.Fatal(i, err) } break } if g, e := len(k), 8; g != e { t.Fatal(i, g, e) } if j >= len(down) { t.Fatal(i, j, brokenSerial) } if g, e := b2n(k), down[j]; g != e { t.Fatal(i, j, brokenSerial, g, e) } if g, e := len(v), 8; g != e { t.Fatal(i, g, e) } if g, e := b2n(v), 10*down[j]; g != e { t.Fatal(i, g, e) } j++ } if g, e := j, len(down); g != e { t.Fatal(i, j, g, e) } } } }
func TestseekLast(t *testing.T) { bt := NewBTree(nil) enum, err := bt.seekFirst() if !fileutil.IsEOF(err) { t.Fatal(err) } bt.Set([]byte("a"), []byte("b")) enum, err = bt.seekFirst() if err != nil { t.Fatal(err) } err = enum.prev() if !fileutil.IsEOF(err) { t.Fatal(err) } err = enum.next() if !fileutil.IsEOF(err) { t.Fatal(err) } k, v, err := enum.current() if err != nil { t.Fatal(err) } if string(k) != "a" || string(v) != "b" { t.Fatal(k, v) } bt.Set([]byte("c"), []byte("d")) enum, err = bt.seekLast() if err != nil { t.Fatal(err) } err = enum.next() if !fileutil.IsEOF(err) { t.Fatal(err) } k, v, err = enum.current() if err != nil { t.Fatal(err) } if string(k) != "c" || string(v) != "d" { t.Fatal(k, v) } err = enum.prev() if err != nil { t.Fatal(err) } k, v, err = enum.current() if err != nil { t.Fatal(err) } if string(k) != "a" || string(v) != "b" { t.Fatal(k, v) } }
func TestSeekNext(t *testing.T) { // seeking within 3 keys: 10, 20, 30 table := []struct { k int hit bool keys []int }{ {5, false, []int{10, 20, 30}}, {10, true, []int{10, 20, 30}}, {15, false, []int{20, 30}}, {20, true, []int{20, 30}}, {25, false, []int{30}}, {30, true, []int{30}}, {35, false, []int{}}, } for i, test := range table { up := test.keys db, err := CreateMem(&Options{noClone: true}) if err != nil { t.Fatal(i, err) } if err := db.Set(n2b(10), n2b(100)); err != nil { t.Fatal(i, err) } if err := db.Set(n2b(20), n2b(200)); err != nil { t.Fatal(i, err) } if err := db.Set(n2b(30), n2b(300)); err != nil { t.Fatal(i, err) } for brokenSerial := 0; brokenSerial < 16; brokenSerial++ { en, hit, err := db.Seek(n2b(test.k)) if err != nil { t.Fatal(err) } if g, e := hit, test.hit; g != e { t.Fatal(i, g, e) } j := 0 for { if brokenSerial&(1<<uint(j)) != 0 { if err := db.Set(n2b(20), n2b(200)); err != nil { t.Fatal(i, err) } } k, v, err := en.Next() if err != nil { if !fileutil.IsEOF(err) { t.Fatal(i, err) } break } if g, e := len(k), 8; g != e { t.Fatal(i, g, e) } if j >= len(up) { t.Fatal(i, j, brokenSerial) } if g, e := b2n(k), up[j]; g != e { t.Fatal(i, j, brokenSerial, g, e) } if g, e := len(v), 8; g != e { t.Fatal(i, g, e) } if g, e := b2n(v), 10*up[j]; g != e { t.Fatal(i, g, e) } j++ } if g, e := j, len(up); g != e { t.Fatal(i, j, g, e) } } } }
func TestRollbackFiler3(t *testing.T) { var r *RollbackFiler f := NewMemFiler() checkpoint := func(sz int64) (err error) { return f.Truncate(sz) } r, err := NewRollbackFiler(f, checkpoint, f) if err != nil { t.Fatal(err) } n, err := r.ReadAt([]byte{0}, 0) if n != 0 || !fileutil.IsEOF(err) { t.Fatal(n, err) } n, err = r.ReadAt([]byte{0}, 1e6) if n != 0 || !fileutil.IsEOF(err) { t.Fatal(n, err) } if err = r.BeginUpdate(); err != nil { // BeginUpdate: 0 -> 1 t.Fatal(err) } rng := rand.New(rand.NewSource(42)) buf := rndBytes(rng, 100) if n, err := r.WriteAt(buf, 1e6); n != 100 || err != nil { t.Fatal(err) } buf = make([]byte, 100) if n, err := r.ReadAt(buf, 1e6-200); n != 100 || err != nil { t.Fatal(err) } for i, v := range buf { if v != 0 { t.Fatal(i, v) } } if err := r.Truncate(1e5); err != nil { t.Fatal(err) } if err = r.BeginUpdate(); err != nil { // BeginUpdate: 1 -> 2 t.Fatal(err) } if n, err := r.ReadAt(buf, 1e6); n != 0 || err == nil { t.Fatal(n, err) } if err := r.Truncate(2e6); err != nil { t.Fatal(err) } if err = r.BeginUpdate(); err != nil { // BeginUpdate: 2 -> 3 t.Fatal(err) } if n, err := r.ReadAt(buf, 1e6); n == 0 || err != nil { t.Fatal(n, err) } for i, v := range buf { if v != 0 { t.Fatal(i, v) } } }
func (a *ACIDFiler0) recoverDb(db Filer) (err error) { fi, err := a.wal.Stat() if err != nil { return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: err} } if sz := fi.Size(); sz%16 != 0 { return &ErrILSEQ{Type: ErrFileSize, Name: a.wal.Name(), Arg: sz} } f := bufio.NewReader(a.wal) items, err := a.readPacket(f) if err != nil { return } if len(items) != 3 || items[0] != int64(wpt00Header) || items[1] != int64(walTypeACIDFiler0) { return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("invalid packet items %#v", items)} } tr := NewBTree(nil) for { items, err = a.readPacket(f) if err != nil { return } if len(items) < 2 { return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("too few packet items %#v", items)} } switch items[0] { case int64(wpt00WriteData): if len(items) != 3 { return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("invalid data packet items %#v", items)} } b, off := items[1].([]byte), items[2].(int64) var key [8]byte binary.BigEndian.PutUint64(key[:], uint64(off)) if err = tr.Set(key[:], b); err != nil { return } case int64(wpt00Checkpoint): var b1 [1]byte if n, err := f.Read(b1[:]); n != 0 || err == nil { return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("checkpoint n %d, err %v", n, err)} } if len(items) != 2 { return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("checkpoint packet invalid items %#v", items)} } sz := items[1].(int64) enum, err := tr.seekFirst() if err != nil { return err } for { k, v, err := enum.current() if err != nil { if fileutil.IsEOF(err) { break } return err } if _, err = db.WriteAt(v, int64(binary.BigEndian.Uint64(k))); err != nil { return err } if err = enum.next(); err != nil { if fileutil.IsEOF(err) { break } return err } } if err = db.Truncate(sz); err != nil { return err } if err = db.Sync(); err != nil { return err } // Recovery complete if err = a.wal.Truncate(0); err != nil { return err } return a.wal.Sync() default: return &ErrILSEQ{Type: ErrInvalidWAL, Name: a.wal.Name(), More: fmt.Sprintf("packet tag %v", items[0])} } } }
func noEof(e error) (err error) { if !fileutil.IsEOF(e) { err = e } return }
// NewACIDFiler0 returns a newly created ACIDFiler0 with WAL in wal. // // If the WAL is zero sized then a previous clean shutdown of db is taken for // granted and no recovery procedure is taken. // // If the WAL is of non zero size then it is checked for having a // commited/fully finished transaction not yet been reflected in db. If such // transaction exists it's committed to db. If the recovery process finishes // successfully, the WAL is truncated to zero size and fsync'ed prior to return // from NewACIDFiler0. func NewACIDFiler(db Filer, wal *os.File) (r *ACIDFiler0, err error) { fi, err := wal.Stat() if err != nil { return } r = &ACIDFiler0{wal: wal} if fi.Size() != 0 { if err = r.recoverDb(db); err != nil { return } } acidWriter := (*acidWriter0)(r) if r.RollbackFiler, err = NewRollbackFiler( db, func(sz int64) (err error) { // Checkpoint if err = acidWriter.writePacket([]interface{}{wpt00Checkpoint, sz}); err != nil { return } if err = r.bwal.Flush(); err != nil { return } r.bwal = nil if err = r.wal.Sync(); err != nil { return } wfi, err := r.wal.Stat() switch err != nil { case true: // unexpected, but ignored case false: r.peakWal = mathutil.MaxInt64(wfi.Size(), r.peakWal) } // Phase 1 commit complete enum, err := r.data.seekFirst() if err != nil { return } for { k, v, err := enum.current() if err != nil { if fileutil.IsEOF(err) { break } return err } if _, err := db.WriteAt(v, int64(binary.BigEndian.Uint64(k))); err != nil { return err } if err = enum.next(); err != nil { if fileutil.IsEOF(err) { break } return err } } if err = db.Truncate(sz); err != nil { return } if err = db.Sync(); err != nil { return } // Phase 2 commit complete if !r.testHook { if err = r.wal.Truncate(0); err != nil { return } if _, err = r.wal.Seek(0, 0); err != nil { return } } r.testHook = false return r.wal.Sync() }, acidWriter, ); err != nil { return } return r, nil }
func testFilerReadAtWriteAt(t *testing.T, nf newFunc) { f := nf() t.Log(f.Name()) defer func() { if err := f.Close(); err != nil { t.Error(err) } }() if _, ok := f.(*RollbackFiler); ok { if err := f.BeginUpdate(); err != nil { t.Fatal(err) } defer func() { if err := f.EndUpdate(); err != nil { t.Error(err) } }() } const ( N = 1 << 16 M = 2e2 ) s := make([]byte, N) e := make([]byte, N) rnd := rand.New(rand.NewSource(42)) for i := range e { s[i] = byte(rnd.Intn(256)) } n2 := 0 for i := 0; i < M; i++ { var from, to int for { from = rnd.Intn(N) to = rnd.Intn(N) if from != to { break } } if from > to { from, to = to, from } for i := range s[from:to] { s[from+i] = byte(rnd.Intn(256)) } copy(e[from:to], s[from:to]) if to > n2 { n2 = to } n, err := f.WriteAt(s[from:to], int64(from)) if err != nil { t.Error(err) return } if g, e := n, to-from; g != e { t.Error(g, e) return } } fsz, err := f.Size() if err != nil { t.Error(err) return } if g, e := fsz, int64(n2); g != e { t.Error(g, e) return } b := make([]byte, n2) for i := 0; i <= M; i++ { from := rnd.Intn(n2) to := rnd.Intn(n2) if from > to { from, to = to, from } if i == M { from, to = 0, n2 } n, err := f.ReadAt(b[from:to], int64(from)) if err != nil && (!fileutil.IsEOF(err) && n != 0) { fsz, err = f.Size() if err != nil { t.Error(err) return } t.Error(fsz, from, to, err) return } if g, e := n, to-from; g != e { t.Error(g, e) return } if g, e := b[from:to], e[from:to]; !bytes.Equal(g, e) { if x, ok := f.(*MemFiler); ok { for i := int64(0); i <= 3; i++ { t.Logf("pg %d\n----\n%s", i, hex.Dump(x.m[i][:])) } } t.Errorf( "i %d from %d to %d len(g) %d len(e) %d\n---- got ----\n%s\n---- exp ----\n%s", i, from, to, len(g), len(e), hex.Dump(g), hex.Dump(e), ) return } } mf, ok := f.(*MemFiler) if !ok { return } buf := &bytes.Buffer{} if _, err := mf.WriteTo(buf); err != nil { t.Error(err) return } if g, e := buf.Bytes(), e[:n2]; !bytes.Equal(g, e) { t.Errorf("\nlen %d\n%s\nlen %d\n%s", len(g), hex.Dump(g), len(e), hex.Dump(e)) return } if err := mf.Truncate(0); err != nil { t.Error(err) return } if _, err := mf.ReadFrom(buf); err != nil { t.Error(err) return } roundTrip := make([]byte, n2) if n, err := mf.ReadAt(roundTrip, 0); err != nil && n == 0 { t.Error(err) return } if g, e := roundTrip, e[:n2]; !bytes.Equal(g, e) { t.Errorf("\nlen %d\n%s\nlen %d\n%s", len(g), hex.Dump(g), len(e), hex.Dump(e)) return } }