// New returns a new MemDB. func New(o *db.Options) *MemDB { return &MemDB{ height: 1, cmp: o.GetComparer(), kvData: make([]byte, 0, 4096), // The first maxHeight values of nodeData are the next nodes after the // head node at each possible height. Their initial value is zeroNode. nodeData: make([]int, maxHeight, 256), } }
func createDB(dirname string, opts *db.Options) (retErr error) { const manifestFileNum = 1 ve := versionEdit{ comparatorName: opts.GetComparer().Name(), nextFileNumber: manifestFileNum + 1, } manifestFilename := dbFilename(dirname, fileTypeManifest, manifestFileNum) f, err := opts.GetFileSystem().Create(manifestFilename) if err != nil { return fmt.Errorf("leveldb: could not create %q: %v", manifestFilename, err) } defer func() { if retErr != nil { opts.GetFileSystem().Remove(manifestFilename) } }() defer f.Close() recWriter := record.NewWriter(f) w, err := recWriter.Next() if err != nil { return err } err = ve.encode(w) if err != nil { return err } err = recWriter.Close() if err != nil { return err } return setCurrentFile(dirname, opts.GetFileSystem(), manifestFileNum) }
// NewWriter returns a new table writer for the file. Closing the writer will // close the file. func NewWriter(f db.File, o *db.Options) *Writer { w := &Writer{ closer: f, blockRestartInterval: o.GetBlockRestartInterval(), blockSize: o.GetBlockSize(), cmp: o.GetComparer(), compression: o.GetCompression(), prevKey: make([]byte, 0, 256), restarts: make([]uint32, 0, 256), } if f == nil { w.err = errors.New("leveldb/table: nil file") return w } // If f does not have a Flush method, do our own buffering. type flusher interface { Flush() error } if _, ok := f.(flusher); ok { w.writer = f } else { w.bufWriter = bufio.NewWriter(f) w.writer = w.bufWriter } return w }
// NewReader returns a new table reader for the file. Closing the reader will // close the file. func NewReader(f db.File, o *db.Options) *Reader { r := &Reader{ file: f, comparer: o.GetComparer(), verifyChecksums: o.GetVerifyChecksums(), } if f == nil { r.err = errors.New("leveldb/table: nil file") return r } stat, err := f.Stat() if err != nil { r.err = fmt.Errorf("leveldb/table: invalid table (could not stat file): %v", err) return r } var footer [footerLen]byte if stat.Size() < int64(len(footer)) { r.err = errors.New("leveldb/table: invalid table (file size is too small)") return r } _, err = f.ReadAt(footer[:], stat.Size()-int64(len(footer))) if err != nil && err != io.EOF { r.err = fmt.Errorf("leveldb/table: invalid table (could not read footer): %v", err) return r } if string(footer[footerLen-len(magic):footerLen]) != magic { r.err = errors.New("leveldb/table: invalid table (bad magic number)") return r } // Ignore the metaindex. _, n := decodeBlockHandle(footer[:]) if n == 0 { r.err = errors.New("leveldb/table: invalid table (bad metaindex block handle)") return r } // Read the index into memory. indexBH, n := decodeBlockHandle(footer[n:]) if n == 0 { r.err = errors.New("leveldb/table: invalid table (bad index block handle)") return r } r.index, r.err = r.readBlock(indexBH) return r }
// 创建leveldb存储目录时,需要传入db.Options, 那么db.Options有哪些属性呢? func createDB(dirname string, opts *db.Options) (retErr error) { const manifestFileNum = 1 // versionEdit保存版本之间的差异,oldVersion _+ versionEdit = newVersion ve := versionEdit{ // 原始key比较器的名称 comparatorName: opts.GetComparer().Name(), // ? nextFileNumber: manifestFileNum + 1, } // 先创建manifest文件,往里面写一些元信息,如上面versionEdit的comparatorName、nextFileNumber字段 manifestFilename := dbFilename(dirname, fileTypeManifest, manifestFileNum) f, err := opts.GetFileSystem().Create(manifestFilename) if err != nil { return fmt.Errorf("leveldb: could not create %q: %v", manifestFilename, err) } defer func() { if retErr != nil { opts.GetFileSystem().Remove(manifestFilename) } }() defer f.Close() // 生成一个record.Writer对象 recWriter := record.NewWriter(f) // 返回一个record.SingleWriter对象w w, err := recWriter.Next() if err != nil { return err } // 将ve中的数据写入manifest文件中 err = ve.encode(w) if err != nil { return err } // 加入重启点, 压缩,crc校验? err = recWriter.Close() if err != nil { return err } // 创建current文件,其中写入了当前的manifest文件名 return setCurrentFile(dirname, opts.GetFileSystem(), manifestFileNum) }
// Open opens a LevelDB whose files live in the given directory. func Open(dirname string, opts *db.Options) (*DB, error) { d := &DB{ dirname: dirname, opts: opts, icmp: internalKeyComparer{opts.GetComparer()}, pendingOutputs: make(map[uint64]struct{}), } if opts != nil { d.icmpOpts = *opts } d.icmpOpts.Comparer = d.icmp tableCacheSize := opts.GetMaxOpenFiles() - numNonTableCacheFiles if tableCacheSize < minTableCacheSize { tableCacheSize = minTableCacheSize } // tableCache初始化 d.tableCache.init(dirname, opts.GetFileSystem(), &d.icmpOpts, tableCacheSize) // 初始化一个MemDB d.mem = memdb.New(&d.icmpOpts) // sync.Cond在Locker的基础上增加的一个消息通知的功能。 // Cond有三个方法:Wait,Signal,Broadcast。 // Wait添加一个计数,也就是添加一个阻塞的goroutine。 // Signal解除一个goroutine的阻塞,计数减一。 // Broadcast接触所有wait goroutine的阻塞。 d.compactionCond = sync.Cond{L: &d.mu} fs := opts.GetFileSystem() d.mu.Lock() defer d.mu.Unlock() // Lock the database directory. // If the directory already exists, MkdirAll does nothing and returns nil. // 如果目录已经存在,则MkdirAll啥都不干 err := fs.MkdirAll(dirname, 0755) if err != nil { return nil, err } // 创建LOCK文件,并加文件锁 fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0)) if err != nil { return nil, err } defer func() { if fileLock != nil { fileLock.Close() } }() // 若CURRENT文件不存在,则调用createDB if _, err := fs.Stat(dbFilename(dirname, fileTypeCurrent, 0)); os.IsNotExist(err) { // Create the DB if it did not already exist. if err := createDB(dirname, opts); err != nil { return nil, err } } else if err != nil { return nil, fmt.Errorf("leveldb: database %q: %v", dirname, err) } else if opts.GetErrorIfDBExists() { return nil, fmt.Errorf("leveldb: database %q already exists", dirname) } // Load the version set. // 先读取CURRENT文件内容,获取manifest文件名,然后逐条记录读取manifest文件的内容,根据内容生成一个新version,放入d.versions中 err = d.versions.load(dirname, opts) if err != nil { return nil, err } // Replay any newer log files than the ones named in the manifest. var ve versionEdit ls, err := fs.List(dirname) if err != nil { return nil, err } var logFiles fileNumAndNameSlice for _, filename := range ls { ft, fn, ok := parseDBFilename(filename) if ok && ft == fileTypeLog && (fn >= d.versions.logNumber || fn == d.versions.prevLogNumber) { logFiles = append(logFiles, fileNumAndName{fn, filename}) } } sort.Sort(logFiles) for _, lf := range logFiles { // 根据日志文件重做日志中记录的操作,先将这些操作记录存入一个临时的memtable中,然后转存入磁盘上level0存储文件中 maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name)) if err != nil { return nil, err } d.versions.markFileNumUsed(lf.num) // 设置最新的操作序列号 if d.versions.lastSequence < maxSeqNum { d.versions.lastSequence = maxSeqNum } } // Create an empty .log file. // 创建一个新的空log文件 ve.logNumber = d.versions.nextFileNum() d.logNumber = ve.logNumber logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber)) if err != nil { return nil, err } defer func() { if logFile != nil { logFile.Close() } }() d.log = record.NewWriter(logFile) // Write a new manifest to disk. // 根据前面重做日志得到的ve的信息创建一个新的manifest文件 // 并在CURRENT文件中指向这个新manifest文件 if err := d.versions.logAndApply(dirname, &ve); err != nil { return nil, err } d.deleteObsoleteFiles() // 尝试调度compaction d.maybeScheduleCompaction() d.logFile, logFile = logFile, nil d.fileLock, fileLock = fileLock, nil return d, nil }
// load loads the version set from the manifest file. func (vs *versionSet) load(dirname string, opts *db.Options) error { vs.dirname = dirname vs.opts = opts vs.fs = opts.GetFileSystem() vs.ucmp = opts.GetComparer() vs.icmp = internalKeyComparer{vs.ucmp} vs.dummyVersion.prev = &vs.dummyVersion vs.dummyVersion.next = &vs.dummyVersion // For historical reasons, the next file number is initialized to 2. vs.nextFileNumber = 2 // Read the CURRENT file to find the current manifest file. current, err := vs.fs.Open(dbFilename(dirname, fileTypeCurrent, 0)) if err != nil { return fmt.Errorf("leveldb: could not open CURRENT file for DB %q: %v", dirname, err) } defer current.Close() stat, err := current.Stat() if err != nil { return err } n := stat.Size() if n == 0 { return fmt.Errorf("leveldb: CURRENT file for DB %q is empty", dirname) } if n > 4096 { return fmt.Errorf("leveldb: CURRENT file for DB %q is too large", dirname) } b := make([]byte, n) _, err = current.ReadAt(b, 0) if err != nil { return err } if b[n-1] != '\n' { return fmt.Errorf("leveldb: CURRENT file for DB %q is malformed", dirname) } b = b[:n-1] // Read the versionEdits in the manifest file. var bve bulkVersionEdit manifest, err := vs.fs.Open(dirname + string(os.PathSeparator) + string(b)) if err != nil { return fmt.Errorf("leveldb: could not open manifest file %q for DB %q: %v", b, dirname, err) } defer manifest.Close() rr := record.NewReader(manifest) for { r, err := rr.Next() if err == io.EOF { break } if err != nil { return err } var ve versionEdit err = ve.decode(r) if err != nil { return err } if ve.comparatorName != "" { if ve.comparatorName != vs.ucmp.Name() { return fmt.Errorf("leveldb: manifest file %q for DB %q: "+ "comparer name from file %q != comparer name from db.Options %q", b, dirname, ve.comparatorName, vs.ucmp.Name()) } } bve.accumulate(&ve) if ve.logNumber != 0 { vs.logNumber = ve.logNumber } if ve.prevLogNumber != 0 { vs.prevLogNumber = ve.prevLogNumber } if ve.nextFileNumber != 0 { vs.nextFileNumber = ve.nextFileNumber } if ve.lastSequence != 0 { vs.lastSequence = ve.lastSequence } } if vs.logNumber == 0 || vs.nextFileNumber == 0 { if vs.nextFileNumber == 2 { // We have a freshly created DB. } else { return fmt.Errorf("leveldb: incomplete manifest file %q for DB %q", b, dirname) } } vs.markFileNumUsed(vs.logNumber) vs.markFileNumUsed(vs.prevLogNumber) vs.manifestFileNumber = vs.nextFileNum() newVersion, err := bve.apply(nil, vs.icmp) if err != nil { return err } vs.append(newVersion) return nil }
// Open opens a LevelDB whose files live in the given directory. func Open(dirname string, opts *db.Options) (*DB, error) { d := &DB{ dirname: dirname, opts: opts, icmp: internalKeyComparer{opts.GetComparer()}, } fs := opts.GetFileSystem() // Lock the database directory. err := fs.MkdirAll(dirname, 0755) if err != nil { return nil, err } fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0)) if err != nil { return nil, err } defer func() { if fileLock != nil { fileLock.Close() } }() // TODO: add options for CreateIfMissing and ErrorIfExists, and check them here. // Load the version set. err = d.versions.load(dirname, opts) if err != nil { return nil, err } // Replay any newer log files than the ones named in the manifest. var ve versionEdit ls, err := fs.List(dirname) if err != nil { return nil, err } var logFiles fileNumAndNameSlice for _, filename := range ls { n := logFileNum(filename) if n != 0 && (n >= d.versions.logNumber || n == d.versions.prevLogNumber) { logFiles = append(logFiles, fileNumAndName{n, filename}) } } sort.Sort(logFiles) for _, lf := range logFiles { maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name)) if err != nil { return nil, err } d.versions.markFileNumUsed(lf.num) if d.versions.lastSequence < maxSeqNum { d.versions.lastSequence = maxSeqNum } } // Create an empty .log file. ve.logNumber = d.versions.nextFileNum() logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber)) if err != nil { return nil, err } defer func() { if logFile != nil { logFile.Close() } }() d.log = record.NewWriter(logFile) // Write a new manifest to disk. if err := d.versions.logAndApply(dirname, &ve); err != nil { return nil, err } // TODO: delete obsolete files. // TODO: maybe schedule compaction? d.logFile, logFile = logFile, nil d.fileLock, fileLock = fileLock, nil return d, nil }
// load loads the version set from the manifest file. // 先读取CURRENT文件内容,获取manifest文件名, func (vs *versionSet) load(dirname string, opts *db.Options) error { vs.dirname = dirname vs.opts = opts vs.fs = opts.GetFileSystem() vs.ucmp = opts.GetComparer() vs.icmp = internalKeyComparer{vs.ucmp} vs.dummyVersion.prev = &vs.dummyVersion vs.dummyVersion.next = &vs.dummyVersion // For historical reasons, the next file number is initialized to 2. vs.nextFileNumber = 2 // Read the CURRENT file to find the current manifest file. current, err := vs.fs.Open(dbFilename(dirname, fileTypeCurrent, 0)) if err != nil { return fmt.Errorf("leveldb: could not open CURRENT file for DB %q: %v", dirname, err) } defer current.Close() stat, err := current.Stat() if err != nil { return err } // 文件大小 n := stat.Size() if n == 0 { return fmt.Errorf("leveldb: CURRENT file for DB %q is empty", dirname) } if n > 4096 { return fmt.Errorf("leveldb: CURRENT file for DB %q is too large", dirname) } b := make([]byte, n) // 将文件数据读取到b中 _, err = current.ReadAt(b, 0) if err != nil { return err } if b[n-1] != '\n' { return fmt.Errorf("leveldb: CURRENT file for DB %q is malformed", dirname) } // 去除掉最后的换行符 // b即为CURRENT所指向的manifest文件的文件名 b = b[:n-1] // Read the versionEdits in the manifest file. var bve bulkVersionEdit manifest, err := vs.fs.Open(dirname + string(os.PathSeparator) + string(b)) if err != nil { return fmt.Errorf("leveldb: could not open manifest file %q for DB %q: %v", b, dirname, err) } defer manifest.Close() // 生成一个Reader对象 rr := record.NewReader(manifest) for { // Next returns a reader for the next record. r, err := rr.Next() if err == io.EOF { break } if err != nil { return err } var ve versionEdit // 将记录读入ve err = ve.decode(r) if err != nil { return err } if ve.comparatorName != "" { // 读和写的key比较方法必须一致 if ve.comparatorName != vs.ucmp.Name() { return fmt.Errorf("leveldb: manifest file %q for DB %q: "+ "comparer name from file %q != comparer name from db.Options %q", b, dirname, ve.comparatorName, vs.ucmp.Name()) } } // bve. bve.accumulate(&ve) if ve.logNumber != 0 { vs.logNumber = ve.logNumber } if ve.prevLogNumber != 0 { vs.prevLogNumber = ve.prevLogNumber } if ve.nextFileNumber != 0 { vs.nextFileNumber = ve.nextFileNumber } if ve.lastSequence != 0 { vs.lastSequence = ve.lastSequence } } if vs.logNumber == 0 || vs.nextFileNumber == 0 { if vs.nextFileNumber == 2 { // We have a freshly created DB. } else { return fmt.Errorf("leveldb: incomplete manifest file %q for DB %q", b, dirname) } } // 生成新版本version之前的准备工作 vs.markFileNumUsed(vs.logNumber) vs.markFileNumUsed(vs.prevLogNumber) vs.manifestFileNumber = vs.nextFileNum() // 根据bve的added和deleted字段,生成一个新的version(newVersion) // 并根据level0的文件数及更大level的文件所占磁盘空间大小来计算newVersion的compactionScore和compactionLevel // 这个compactionScore和compactionLevel是判断是否需要compaction的条件 newVersion, err := bve.apply(nil, vs.icmp) if err != nil { return err } vs.append(newVersion) return nil }
// Open opens a LevelDB whose files live in the given directory. func Open(dirname string, opts *db.Options) (*DB, error) { d := &DB{ dirname: dirname, opts: opts, icmp: internalKeyComparer{opts.GetComparer()}, pendingOutputs: make(map[uint64]struct{}), } if opts != nil { d.icmpOpts = *opts } d.icmpOpts.Comparer = d.icmp tableCacheSize := opts.GetMaxOpenFiles() - numNonTableCacheFiles if tableCacheSize < minTableCacheSize { tableCacheSize = minTableCacheSize } d.tableCache.init(dirname, opts.GetFileSystem(), &d.icmpOpts, tableCacheSize) d.mem = memdb.New(&d.icmpOpts) d.compactionCond = sync.Cond{L: &d.mu} fs := opts.GetFileSystem() d.mu.Lock() defer d.mu.Unlock() // Lock the database directory. err := fs.MkdirAll(dirname, 0755) if err != nil { return nil, err } fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0)) if err != nil { return nil, err } defer func() { if fileLock != nil { fileLock.Close() } }() if _, err := fs.Stat(dbFilename(dirname, fileTypeCurrent, 0)); os.IsNotExist(err) { // Create the DB if it did not already exist. if err := createDB(dirname, opts); err != nil { return nil, err } } else if err != nil { return nil, fmt.Errorf("leveldb: database %q: %v", dirname, err) } else if opts.GetErrorIfDBExists() { return nil, fmt.Errorf("leveldb: database %q already exists", dirname) } // Load the version set. err = d.versions.load(dirname, opts) if err != nil { return nil, err } // Replay any newer log files than the ones named in the manifest. var ve versionEdit ls, err := fs.List(dirname) if err != nil { return nil, err } var logFiles fileNumAndNameSlice for _, filename := range ls { ft, fn, ok := parseDBFilename(filename) if ok && ft == fileTypeLog && (fn >= d.versions.logNumber || fn == d.versions.prevLogNumber) { logFiles = append(logFiles, fileNumAndName{fn, filename}) } } sort.Sort(logFiles) for _, lf := range logFiles { maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name)) if err != nil { return nil, err } d.versions.markFileNumUsed(lf.num) if d.versions.lastSequence < maxSeqNum { d.versions.lastSequence = maxSeqNum } } // Create an empty .log file. ve.logNumber = d.versions.nextFileNum() d.logNumber = ve.logNumber logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber)) if err != nil { return nil, err } defer func() { if logFile != nil { logFile.Close() } }() d.log = record.NewWriter(logFile) // Write a new manifest to disk. if err := d.versions.logAndApply(dirname, &ve); err != nil { return nil, err } d.deleteObsoleteFiles() d.maybeScheduleCompaction() d.logFile, logFile = logFile, nil d.fileLock, fileLock = fileLock, nil return d, nil }