// createManifest creates a manifest file that contains a snapshot of vs. func (vs *versionSet) createManifest(dirname string) (err error) { var ( filename = dbFilename(dirname, fileTypeManifest, vs.manifestFileNumber) manifestFile db.File manifest *record.Writer ) defer func() { if manifest != nil { manifest.Close() } if manifestFile != nil { manifestFile.Close() } if err != nil { vs.fs.Remove(filename) } }() manifestFile, err = vs.fs.Create(filename) if err != nil { return err } manifest = record.NewWriter(manifestFile) snapshot := versionEdit{ comparatorName: vs.ucmp.Name(), } // TODO: save compaction pointers. for level, fileMetadata := range vs.currentVersion().files { for _, meta := range fileMetadata { snapshot.newFiles = append(snapshot.newFiles, newFileEntry{ level: level, meta: meta, }) } } w, err1 := manifest.Next() if err1 != nil { return err1 } err1 = snapshot.encode(w) if err1 != nil { return err1 } vs.manifest, manifest = manifest, nil vs.manifestFile, manifestFile = manifestFile, nil return nil }
// NewReader returns a new table reader for the file. Closing the reader will // close the file. func NewReader(f db.File, o *db.Options) *Reader { r := &Reader{ file: f, comparer: o.GetComparer(), verifyChecksums: o.GetVerifyChecksums(), } if f == nil { r.err = errors.New("leveldb/table: nil file") return r } stat, err := f.Stat() if err != nil { r.err = fmt.Errorf("leveldb/table: invalid table (could not stat file): %v", err) return r } var footer [footerLen]byte if stat.Size() < int64(len(footer)) { r.err = errors.New("leveldb/table: invalid table (file size is too small)") return r } _, err = f.ReadAt(footer[:], stat.Size()-int64(len(footer))) if err != nil && err != io.EOF { r.err = fmt.Errorf("leveldb/table: invalid table (could not read footer): %v", err) return r } if string(footer[footerLen-len(magic):footerLen]) != magic { r.err = errors.New("leveldb/table: invalid table (bad magic number)") return r } // Ignore the metaindex. _, n := decodeBlockHandle(footer[:]) if n == 0 { r.err = errors.New("leveldb/table: invalid table (bad metaindex block handle)") return r } // Read the index into memory. indexBH, n := decodeBlockHandle(footer[n:]) if n == 0 { r.err = errors.New("leveldb/table: invalid table (bad index block handle)") return r } r.index, r.err = r.readBlock(indexBH) return r }
func TestBasics(t *testing.T) { fs := New() testCases := []string{ // Create a top-level file. "1a: create /foo", // Create a child of that file. It should fail, since /foo is not a directory. "2a: create /foo/x fails", // Create a third-level file. It should fail, since /bar has not been created. // Similarly, opening that file should fail. "3a: create /bar/baz/y fails", "3b: open /bar/baz/y fails", // Make the /bar/baz directory; create a third-level file. Creation should now succeed. "4a: mkdirall /bar/baz", "4b: f = create /bar/baz/y", "4c: f.stat.name == y", // Write some data; read it back. "5a: f.write abcde", "5b: f.close", "5c: f = open /bar/baz/y", "5d: f.read 5 == abcde", "5e: f.readat 2 1 == bc", "5f: f.close", // Remove the file twice. The first should succeed, the second should fail. "6a: remove /bar/baz/y", "6b: remove /bar/baz/y fails", "6c: open /bar/baz/y fails", // Rename /foo to /goo. Trying to open /foo should succeed before the rename and // fail afterwards, and vice versa for /goo. "7a: open /foo", "7b: open /goo fails", "7c: rename /foo /goo", "7d: open /foo fails", "7e: open /goo", // Create /bar/baz/z and rename /bar/baz to /bar/caz. "8a: create /bar/baz/z", "8b: open /bar/baz/z", "8c: open /bar/caz/z fails", "8d: rename /bar/baz /bar/caz", "8e: open /bar/baz/z fails", "8f: open /bar/caz/z", } var f db.File for _, tc := range testCases { s := strings.Split(tc, " ")[1:] saveF := s[0] == "f" && s[1] == "=" if saveF { s = s[2:] } fails := s[len(s)-1] == "fails" if fails { s = s[:len(s)-1] } var ( fi os.FileInfo g db.File err error ) switch s[0] { case "create": g, err = fs.Create(normalize(s[1])) case "open": g, err = fs.Open(normalize(s[1])) case "mkdirall": err = fs.MkdirAll(normalize(s[1]), 0755) case "remove": err = fs.Remove(normalize(s[1])) case "rename": err = fs.Rename(normalize(s[1]), normalize(s[2])) case "f.write": _, err = f.Write([]byte(s[1])) case "f.read": n, _ := strconv.Atoi(s[1]) buf := make([]byte, n) _, err = io.ReadFull(f, buf) if err != nil { break } if got, want := string(buf), s[3]; got != want { t.Fatalf("%q: got %q, want %q", tc, got, want) } case "f.readat": n, _ := strconv.Atoi(s[1]) off, _ := strconv.Atoi(s[2]) buf := make([]byte, n) _, err = f.ReadAt(buf, int64(off)) if err != nil { break } if got, want := string(buf), s[4]; got != want { t.Fatalf("%q: got %q, want %q", tc, got, want) } case "f.close": f, err = nil, f.Close() case "f.stat.name": fi, err = f.Stat() if err != nil { break } if got, want := fi.Name(), s[2]; got != want { t.Fatalf("%q: got %q, want %q", tc, got, want) } default: t.Fatalf("bad test case: %q", tc) } if saveF { f, g = g, nil } else if g != nil { g.Close() } if fails { if err == nil { t.Fatalf("%q: got nil error, want non-nil", tc) } } else { if err != nil { t.Fatalf("%q: %v", tc, err) } } } }
// writeLevel0Table writes a memtable to a level-0 on-disk table. // // If no error is returned, it adds the file number of that on-disk table to // d.pendingOutputs. It is the caller's responsibility to remove that fileNum // from that set when it has been applied to d.versions. // // d.mu must be held when calling this, but the mutex may be dropped and // re-acquired during the course of this method. func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) { // meta用于记录新创建的level0 db文件的元信息 meta.fileNum = d.versions.nextFileNum() // filename:新db文件的文件名 filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum) d.pendingOutputs[meta.fileNum] = struct{}{} defer func(fileNum uint64) { // 如果异常退出(err不为nil),则从d.pendingOutputs中删除新db文件的记录 // 否则d.pendingOutputs是用来干什么的呢? if err != nil { delete(d.pendingOutputs, fileNum) } }(meta.fileNum) // Release the d.mu lock while doing I/O. // Note the unusual order: Unlock and then Lock. d.mu.Unlock() defer d.mu.Lock() var ( file db.File tw *table.Writer iter db.Iterator ) defer func() { if iter != nil { err = firstError(err, iter.Close()) } if tw != nil { err = firstError(err, tw.Close()) } if file != nil { err = firstError(err, file.Close()) } if err != nil { fs.Remove(filename) meta = fileMetadata{} } }() file, err = fs.Create(filename) if err != nil { return fileMetadata{}, err } // table为磁盘db文件封装写入方式 tw = table.NewWriter(file, &db.Options{ Comparer: d.icmp, }) // Find返回一个迭代器,用于遍历mem(这里即是d.imm)中的数据 // memtable是以skiplist来组织数据的,有序 // 所以取到的第一个数据的key即为当前imm中最小的key iter = mem.Find(nil, nil) iter.Next() // meta.smallest记录新db文件中最小的内部key,在写入memtable时就已经将用户key封装成了内部key,那么为什么还要使用internalKey来做类型转换? // 下面的meta.largest就没有进行类型转换就调用clone()方法了。 meta.smallest = internalKey(iter.Key()).clone() for { // 最后一次循环中的key即为最大的key,但为什么不封装成内部key呢? meta.largest = iter.Key() // 将key、value写到新db文件中 if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil { return fileMetadata{}, err1 } // 如果imm中的数据已经遍历完,全部存入新db文件,则break if !iter.Next() { break } } meta.largest = meta.largest.clone() if err1 := iter.Close(); err1 != nil { iter = nil return fileMetadata{}, err1 } iter = nil if err1 := tw.Close(); err1 != nil { tw = nil return fileMetadata{}, err1 } tw = nil // TODO: currently, closing a table.Writer closes its underlying file. // We have to re-open the file to Sync or Stat it, which seems stupid. file, err = fs.Open(filename) if err != nil { return fileMetadata{}, err } if err1 := file.Sync(); err1 != nil { return fileMetadata{}, err1 } if stat, err1 := file.Stat(); err1 != nil { return fileMetadata{}, err1 } else { size := stat.Size() if size < 0 { return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size) } // 将文件的大小值存入meta.size meta.size = uint64(size) } // TODO: compaction stats. /* 此时,meta的四个成员: - filenum - smallest - largest - size 已经全部填上了 */ return meta, nil }
func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) { meta.fileNum = d.versions.nextFileNum() filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum) // TODO: add meta.fileNum to a set of 'pending outputs' so that a // concurrent sweep of obsolete db files won't delete the fileNum file. // It is the caller's responsibility to remove that fileNum from the // set of pending outputs. var ( file db.File tw *table.Writer iter db.Iterator ) defer func() { if iter != nil { err = firstError(err, iter.Close()) } if tw != nil { err = firstError(err, tw.Close()) } if file != nil { err = firstError(err, file.Close()) } if err != nil { fs.Remove(filename) meta = fileMetadata{} } }() file, err = fs.Create(filename) if err != nil { return fileMetadata{}, err } tw = table.NewWriter(file, &db.Options{ Comparer: d.icmp, }) iter = mem.Find(nil, nil) iter.Next() meta.smallest = internalKey(iter.Key()).clone() for { meta.largest = iter.Key() if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil { return fileMetadata{}, err1 } if !iter.Next() { break } } meta.largest = meta.largest.clone() if err1 := iter.Close(); err1 != nil { iter = nil return fileMetadata{}, err1 } iter = nil if err1 := tw.Close(); err1 != nil { tw = nil return fileMetadata{}, err1 } tw = nil // TODO: currently, closing a table.Writer closes its underlying file. // We have to re-open the file to Sync or Stat it, which seems stupid. file, err = fs.Open(filename) if err != nil { return fileMetadata{}, err } if err1 := file.Sync(); err1 != nil { return fileMetadata{}, err1 } if stat, err1 := file.Stat(); err1 != nil { return fileMetadata{}, err1 } else { size := stat.Size() if size < 0 { return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size) } meta.size = uint64(size) } // TODO: compaction stats. return meta, nil }
// writeLevel0Table writes a memtable to a level-0 on-disk table. // // If no error is returned, it adds the file number of that on-disk table to // d.pendingOutputs. It is the caller's responsibility to remove that fileNum // from that set when it has been applied to d.versions. // // d.mu must be held when calling this, but the mutex may be dropped and // re-acquired during the course of this method. func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) { meta.fileNum = d.versions.nextFileNum() filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum) d.pendingOutputs[meta.fileNum] = struct{}{} defer func(fileNum uint64) { if err != nil { delete(d.pendingOutputs, fileNum) } }(meta.fileNum) // Release the d.mu lock while doing I/O. // Note the unusual order: Unlock and then Lock. d.mu.Unlock() defer d.mu.Lock() var ( file db.File tw *table.Writer iter db.Iterator ) defer func() { if iter != nil { err = firstError(err, iter.Close()) } if tw != nil { err = firstError(err, tw.Close()) } if file != nil { err = firstError(err, file.Close()) } if err != nil { fs.Remove(filename) meta = fileMetadata{} } }() file, err = fs.Create(filename) if err != nil { return fileMetadata{}, err } tw = table.NewWriter(file, &db.Options{ Comparer: d.icmp, }) iter = mem.Find(nil, nil) iter.Next() meta.smallest = internalKey(iter.Key()).clone() for { meta.largest = iter.Key() if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil { return fileMetadata{}, err1 } if !iter.Next() { break } } meta.largest = meta.largest.clone() if err1 := iter.Close(); err1 != nil { iter = nil return fileMetadata{}, err1 } iter = nil if err1 := tw.Close(); err1 != nil { tw = nil return fileMetadata{}, err1 } tw = nil // TODO: currently, closing a table.Writer closes its underlying file. // We have to re-open the file to Sync or Stat it, which seems stupid. file, err = fs.Open(filename) if err != nil { return fileMetadata{}, err } if err1 := file.Sync(); err1 != nil { return fileMetadata{}, err1 } if stat, err1 := file.Stat(); err1 != nil { return fileMetadata{}, err1 } else { size := stat.Size() if size < 0 { return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size) } meta.size = uint64(size) } // TODO: compaction stats. return meta, nil }