// cloneFileSystem returns a new memory-backed file system whose root contains // a copy of the directory dirname in the source file system srcFS. The copy // is not recursive; directories under dirname are not copied. // // Changes to the resultant file system do not modify the source file system. // // For example, if srcFS contained: // - /bar // - /baz/0 // - /foo/x // - /foo/y // - /foo/z/A // - /foo/z/B // then calling cloneFileSystem(srcFS, "/foo") would result in a file system // containing: // - /x // - /y func cloneFileSystem(srcFS db.FileSystem, dirname string) (db.FileSystem, error) { if len(dirname) == 0 || dirname[len(dirname)-1] != os.PathSeparator { dirname += string(os.PathSeparator) } dstFS := memfs.New() list, err := srcFS.List(dirname) if err != nil { return nil, err } for _, name := range list { srcFile, err := srcFS.Open(dirname + name) if err != nil { return nil, err } stat, err := srcFile.Stat() if err != nil { return nil, err } if stat.IsDir() { err = srcFile.Close() if err != nil { return nil, err } continue } data := make([]byte, stat.Size()) _, err = io.ReadFull(srcFile, data) if err != nil { return nil, err } err = srcFile.Close() if err != nil { return nil, err } dstFile, err := dstFS.Create(name) if err != nil { return nil, err } _, err = dstFile.Write(data) if err != nil { return nil, err } err = dstFile.Close() if err != nil { return nil, err } } return dstFS, nil }
// writeLevel0Table writes a memtable to a level-0 on-disk table. // // If no error is returned, it adds the file number of that on-disk table to // d.pendingOutputs. It is the caller's responsibility to remove that fileNum // from that set when it has been applied to d.versions. // // d.mu must be held when calling this, but the mutex may be dropped and // re-acquired during the course of this method. func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) { // meta用于记录新创建的level0 db文件的元信息 meta.fileNum = d.versions.nextFileNum() // filename:新db文件的文件名 filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum) d.pendingOutputs[meta.fileNum] = struct{}{} defer func(fileNum uint64) { // 如果异常退出(err不为nil),则从d.pendingOutputs中删除新db文件的记录 // 否则d.pendingOutputs是用来干什么的呢? if err != nil { delete(d.pendingOutputs, fileNum) } }(meta.fileNum) // Release the d.mu lock while doing I/O. // Note the unusual order: Unlock and then Lock. d.mu.Unlock() defer d.mu.Lock() var ( file db.File tw *table.Writer iter db.Iterator ) defer func() { if iter != nil { err = firstError(err, iter.Close()) } if tw != nil { err = firstError(err, tw.Close()) } if file != nil { err = firstError(err, file.Close()) } if err != nil { fs.Remove(filename) meta = fileMetadata{} } }() file, err = fs.Create(filename) if err != nil { return fileMetadata{}, err } // table为磁盘db文件封装写入方式 tw = table.NewWriter(file, &db.Options{ Comparer: d.icmp, }) // Find返回一个迭代器,用于遍历mem(这里即是d.imm)中的数据 // memtable是以skiplist来组织数据的,有序 // 所以取到的第一个数据的key即为当前imm中最小的key iter = mem.Find(nil, nil) iter.Next() // meta.smallest记录新db文件中最小的内部key,在写入memtable时就已经将用户key封装成了内部key,那么为什么还要使用internalKey来做类型转换? // 下面的meta.largest就没有进行类型转换就调用clone()方法了。 meta.smallest = internalKey(iter.Key()).clone() for { // 最后一次循环中的key即为最大的key,但为什么不封装成内部key呢? meta.largest = iter.Key() // 将key、value写到新db文件中 if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil { return fileMetadata{}, err1 } // 如果imm中的数据已经遍历完,全部存入新db文件,则break if !iter.Next() { break } } meta.largest = meta.largest.clone() if err1 := iter.Close(); err1 != nil { iter = nil return fileMetadata{}, err1 } iter = nil if err1 := tw.Close(); err1 != nil { tw = nil return fileMetadata{}, err1 } tw = nil // TODO: currently, closing a table.Writer closes its underlying file. // We have to re-open the file to Sync or Stat it, which seems stupid. file, err = fs.Open(filename) if err != nil { return fileMetadata{}, err } if err1 := file.Sync(); err1 != nil { return fileMetadata{}, err1 } if stat, err1 := file.Stat(); err1 != nil { return fileMetadata{}, err1 } else { size := stat.Size() if size < 0 { return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size) } // 将文件的大小值存入meta.size meta.size = uint64(size) } // TODO: compaction stats. /* 此时,meta的四个成员: - filenum - smallest - largest - size 已经全部填上了 */ return meta, nil }
func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) { meta.fileNum = d.versions.nextFileNum() filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum) // TODO: add meta.fileNum to a set of 'pending outputs' so that a // concurrent sweep of obsolete db files won't delete the fileNum file. // It is the caller's responsibility to remove that fileNum from the // set of pending outputs. var ( file db.File tw *table.Writer iter db.Iterator ) defer func() { if iter != nil { err = firstError(err, iter.Close()) } if tw != nil { err = firstError(err, tw.Close()) } if file != nil { err = firstError(err, file.Close()) } if err != nil { fs.Remove(filename) meta = fileMetadata{} } }() file, err = fs.Create(filename) if err != nil { return fileMetadata{}, err } tw = table.NewWriter(file, &db.Options{ Comparer: d.icmp, }) iter = mem.Find(nil, nil) iter.Next() meta.smallest = internalKey(iter.Key()).clone() for { meta.largest = iter.Key() if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil { return fileMetadata{}, err1 } if !iter.Next() { break } } meta.largest = meta.largest.clone() if err1 := iter.Close(); err1 != nil { iter = nil return fileMetadata{}, err1 } iter = nil if err1 := tw.Close(); err1 != nil { tw = nil return fileMetadata{}, err1 } tw = nil // TODO: currently, closing a table.Writer closes its underlying file. // We have to re-open the file to Sync or Stat it, which seems stupid. file, err = fs.Open(filename) if err != nil { return fileMetadata{}, err } if err1 := file.Sync(); err1 != nil { return fileMetadata{}, err1 } if stat, err1 := file.Stat(); err1 != nil { return fileMetadata{}, err1 } else { size := stat.Size() if size < 0 { return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size) } meta.size = uint64(size) } // TODO: compaction stats. return meta, nil }
// replayLogFile replays the edits in the named log file. // // d.mu must be held when calling this, but the mutex may be dropped and // re-acquired during the course of this method. func (d *DB) replayLogFile(ve *versionEdit, fs db.FileSystem, filename string) (maxSeqNum uint64, err error) { file, err := fs.Open(filename) if err != nil { return 0, err } defer file.Close() var ( mem *memdb.MemDB batchBuf = new(bytes.Buffer) ikey = make(internalKey, 512) rr = record.NewReader(file) ) for { r, err := rr.Next() if err == io.EOF { break } if err != nil { return 0, err } _, err = io.Copy(batchBuf, r) if err != nil { return 0, err } if batchBuf.Len() < batchHeaderLen { return 0, fmt.Errorf("leveldb: corrupt log file %q", filename) } b := Batch{batchBuf.Bytes()} seqNum := b.seqNum() seqNum1 := seqNum + uint64(b.count()) if maxSeqNum < seqNum1 { maxSeqNum = seqNum1 } if mem == nil { mem = memdb.New(&d.icmpOpts) } t := b.iter() for ; seqNum != seqNum1; seqNum++ { kind, ukey, value, ok := t.next() if !ok { return 0, fmt.Errorf("leveldb: corrupt log file %q", filename) } // Convert seqNum, kind and key into an internalKey, and add that ikey/value // pair to mem. // // TODO: instead of copying to an intermediate buffer (ikey), is it worth // adding a SetTwoPartKey(db.TwoPartKey{key0, key1}, value, opts) method to // memdb.MemDB? What effect does that have on the db.Comparer interface? // // The C++ LevelDB code does not need an intermediate copy because its memdb // implementation is a private implementation detail, and copies each internal // key component from the Batch format straight to the skiplist buffer. // // Go's LevelDB considers the memdb functionality to be useful in its own // right, and so leveldb/memdb is a separate package that is usable without // having to import the top-level leveldb package. That extra abstraction // means that we need to copy to an intermediate buffer here, to reconstruct // the complete internal key to pass to the memdb. ikey = makeInternalKey(ikey, ukey, kind, seqNum) mem.Set(ikey, value, nil) } if len(t) != 0 { return 0, fmt.Errorf("leveldb: corrupt log file %q", filename) } // TODO: if mem is large enough, write it to a level-0 table and set mem = nil. batchBuf.Reset() } if mem != nil && !mem.Empty() { // 如果初始化后memtable中有数据,则存入磁盘level0层级文件中 meta, err := d.writeLevel0Table(fs, mem) if err != nil { return 0, err } ve.newFiles = append(ve.newFiles, newFileEntry{level: 0, meta: meta}) // Strictly speaking, it's too early to delete meta.fileNum from d.pendingOutputs, // but we are replaying the log file, which happens before Open returns, so there // is no possibility of deleteObsoleteFiles being called concurrently here. delete(d.pendingOutputs, meta.fileNum) } return maxSeqNum, nil }
// writeLevel0Table writes a memtable to a level-0 on-disk table. // // If no error is returned, it adds the file number of that on-disk table to // d.pendingOutputs. It is the caller's responsibility to remove that fileNum // from that set when it has been applied to d.versions. // // d.mu must be held when calling this, but the mutex may be dropped and // re-acquired during the course of this method. func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) { meta.fileNum = d.versions.nextFileNum() filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum) d.pendingOutputs[meta.fileNum] = struct{}{} defer func(fileNum uint64) { if err != nil { delete(d.pendingOutputs, fileNum) } }(meta.fileNum) // Release the d.mu lock while doing I/O. // Note the unusual order: Unlock and then Lock. d.mu.Unlock() defer d.mu.Lock() var ( file db.File tw *table.Writer iter db.Iterator ) defer func() { if iter != nil { err = firstError(err, iter.Close()) } if tw != nil { err = firstError(err, tw.Close()) } if file != nil { err = firstError(err, file.Close()) } if err != nil { fs.Remove(filename) meta = fileMetadata{} } }() file, err = fs.Create(filename) if err != nil { return fileMetadata{}, err } tw = table.NewWriter(file, &db.Options{ Comparer: d.icmp, }) iter = mem.Find(nil, nil) iter.Next() meta.smallest = internalKey(iter.Key()).clone() for { meta.largest = iter.Key() if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil { return fileMetadata{}, err1 } if !iter.Next() { break } } meta.largest = meta.largest.clone() if err1 := iter.Close(); err1 != nil { iter = nil return fileMetadata{}, err1 } iter = nil if err1 := tw.Close(); err1 != nil { tw = nil return fileMetadata{}, err1 } tw = nil // TODO: currently, closing a table.Writer closes its underlying file. // We have to re-open the file to Sync or Stat it, which seems stupid. file, err = fs.Open(filename) if err != nil { return fileMetadata{}, err } if err1 := file.Sync(); err1 != nil { return fileMetadata{}, err1 } if stat, err1 := file.Stat(); err1 != nil { return fileMetadata{}, err1 } else { size := stat.Size() if size < 0 { return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size) } meta.size = uint64(size) } // TODO: compaction stats. return meta, nil }