// writeLevel0Table writes a memtable to a level-0 on-disk table. // // If no error is returned, it adds the file number of that on-disk table to // d.pendingOutputs. It is the caller's responsibility to remove that fileNum // from that set when it has been applied to d.versions. // // d.mu must be held when calling this, but the mutex may be dropped and // re-acquired during the course of this method. func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) { meta.fileNum = d.versions.nextFileNum() filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum) d.pendingOutputs[meta.fileNum] = struct{}{} defer func(fileNum uint64) { if err != nil { delete(d.pendingOutputs, fileNum) } }(meta.fileNum) // Release the d.mu lock while doing I/O. // Note the unusual order: Unlock and then Lock. d.mu.Unlock() defer d.mu.Lock() var ( file db.File tw *table.Writer iter db.Iterator ) defer func() { if iter != nil { err = firstError(err, iter.Close()) } if tw != nil { err = firstError(err, tw.Close()) } if file != nil { err = firstError(err, file.Close()) } if err != nil { fs.Remove(filename) meta = fileMetadata{} } }() file, err = fs.Create(filename) if err != nil { return fileMetadata{}, err } tw = table.NewWriter(file, &db.Options{ Comparer: d.icmp, }) iter = mem.Find(nil, nil) iter.Next() meta.smallest = internalKey(iter.Key()).clone() for { meta.largest = iter.Key() if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil { return fileMetadata{}, err1 } if !iter.Next() { break } } meta.largest = meta.largest.clone() if err1 := iter.Close(); err1 != nil { iter = nil return fileMetadata{}, err1 } iter = nil if err1 := tw.Close(); err1 != nil { tw = nil return fileMetadata{}, err1 } tw = nil // TODO: currently, closing a table.Writer closes its underlying file. // We have to re-open the file to Sync or Stat it, which seems stupid. file, err = fs.Open(filename) if err != nil { return fileMetadata{}, err } if err1 := file.Sync(); err1 != nil { return fileMetadata{}, err1 } if stat, err1 := file.Stat(); err1 != nil { return fileMetadata{}, err1 } else { size := stat.Size() if size < 0 { return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size) } meta.size = uint64(size) } // TODO: compaction stats. return meta, nil }
// replayLogFile replays the edits in the named log file. // // d.mu must be held when calling this, but the mutex may be dropped and // re-acquired during the course of this method. func (d *DB) replayLogFile(ve *versionEdit, fs db.FileSystem, filename string) (maxSeqNum uint64, err error) { file, err := fs.Open(filename) if err != nil { return 0, err } defer file.Close() var ( mem *memdb.MemDB batchBuf = new(bytes.Buffer) ikey = make(internalKey, 512) rr = record.NewReader(file) ) for { r, err := rr.Next() if err == io.EOF { break } if err != nil { return 0, err } _, err = io.Copy(batchBuf, r) if err != nil { return 0, err } if batchBuf.Len() < batchHeaderLen { return 0, fmt.Errorf("leveldb: corrupt log file %q", filename) } b := Batch{batchBuf.Bytes()} seqNum := b.seqNum() seqNum1 := seqNum + uint64(b.count()) if maxSeqNum < seqNum1 { maxSeqNum = seqNum1 } if mem == nil { mem = memdb.New(&d.icmpOpts) } t := b.iter() for ; seqNum != seqNum1; seqNum++ { kind, ukey, value, ok := t.next() if !ok { return 0, fmt.Errorf("leveldb: corrupt log file %q", filename) } // Convert seqNum, kind and key into an internalKey, and add that ikey/value // pair to mem. // // TODO: instead of copying to an intermediate buffer (ikey), is it worth // adding a SetTwoPartKey(db.TwoPartKey{key0, key1}, value, opts) method to // memdb.MemDB? What effect does that have on the db.Comparer interface? // // The C++ LevelDB code does not need an intermediate copy because its memdb // implementation is a private implementation detail, and copies each internal // key component from the Batch format straight to the skiplist buffer. // // Go's LevelDB considers the memdb functionality to be useful in its own // right, and so leveldb/memdb is a separate package that is usable without // having to import the top-level leveldb package. That extra abstraction // means that we need to copy to an intermediate buffer here, to reconstruct // the complete internal key to pass to the memdb. ikey = makeInternalKey(ikey, ukey, kind, seqNum) mem.Set(ikey, value, nil) } if len(t) != 0 { return 0, fmt.Errorf("leveldb: corrupt log file %q", filename) } // TODO: if mem is large enough, write it to a level-0 table and set mem = nil. batchBuf.Reset() } if mem != nil && !mem.Empty() { meta, err := d.writeLevel0Table(fs, mem) if err != nil { return 0, err } ve.newFiles = append(ve.newFiles, newFileEntry{level: 0, meta: meta}) // Strictly speaking, it's too early to delete meta.fileNum from d.pendingOutputs, // but we are replaying the log file, which happens before Open returns, so there // is no possibility of deleteObsoleteFiles being called concurrently here. delete(d.pendingOutputs, meta.fileNum) } return maxSeqNum, nil }