Пример #1
0
// makeRoomForWrite ensures that there is room in d.mem for the next write.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) makeRoomForWrite(force bool) error {
	allowDelay := !force
	for {
		// TODO: check any previous sticky error, if the paranoid option is set.

		if allowDelay && len(d.versions.currentVersion().files[0]) > l0SlowdownWritesTrigger {
			// We are getting close to hitting a hard limit on the number of
			// L0 files. Rather than delaying a single write by several
			// seconds when we hit the hard limit, start delaying each
			// individual write by 1ms to reduce latency variance.
			d.mu.Unlock()
			time.Sleep(1 * time.Millisecond)
			d.mu.Lock()
			allowDelay = false
			// TODO: how do we ensure we are still 'at the front of the writer queue'?
			continue
		}

		if !force && d.mem.ApproximateMemoryUsage() <= d.opts.GetWriteBufferSize() {
			// There is room in the current memtable.
			break
		}

		if d.imm != nil {
			// We have filled up the current memtable, but the previous
			// one is still being compacted, so we wait.
			d.compactionCond.Wait()
			continue
		}

		if len(d.versions.currentVersion().files[0]) > l0StopWritesTrigger {
			// There are too many level-0 files.
			d.compactionCond.Wait()
			continue
		}

		// Attempt to switch to a new memtable and trigger compaction of old
		// TODO: drop and re-acquire d.mu around the I/O.
		newLogNumber := d.versions.nextFileNum()
		newLogFile, err := d.opts.GetFileSystem().Create(dbFilename(d.dirname, fileTypeLog, newLogNumber))
		if err != nil {
			return err
		}
		newLog := record.NewWriter(newLogFile)
		if err := d.log.Close(); err != nil {
			newLogFile.Close()
			return err
		}
		if err := d.logFile.Close(); err != nil {
			newLog.Close()
			newLogFile.Close()
			return err
		}
		d.logNumber, d.logFile, d.log = newLogNumber, newLogFile, newLog
		d.imm, d.mem = d.mem, memdb.New(&d.icmpOpts)
		force = false
		d.maybeScheduleCompaction()
	}
	return nil
}
Пример #2
0
func createDB(dirname string, opts *db.Options) (retErr error) {
	const manifestFileNum = 1
	ve := versionEdit{
		comparatorName: opts.GetComparer().Name(),
		nextFileNumber: manifestFileNum + 1,
	}
	manifestFilename := dbFilename(dirname, fileTypeManifest, manifestFileNum)
	f, err := opts.GetFileSystem().Create(manifestFilename)
	if err != nil {
		return fmt.Errorf("leveldb: could not create %q: %v", manifestFilename, err)
	}
	defer func() {
		if retErr != nil {
			opts.GetFileSystem().Remove(manifestFilename)
		}
	}()
	defer f.Close()

	recWriter := record.NewWriter(f)
	w, err := recWriter.Next()
	if err != nil {
		return err
	}
	err = ve.encode(w)
	if err != nil {
		return err
	}
	err = recWriter.Close()
	if err != nil {
		return err
	}
	return setCurrentFile(dirname, opts.GetFileSystem(), manifestFileNum)
}
Пример #3
0
// createManifest creates a manifest file that contains a snapshot of vs.
func (vs *versionSet) createManifest(dirname string) (err error) {
	var (
		filename     = dbFilename(dirname, fileTypeManifest, vs.manifestFileNumber)
		manifestFile db.File
		manifest     *record.Writer
	)
	defer func() {
		if manifest != nil {
			manifest.Close()
		}
		if manifestFile != nil {
			manifestFile.Close()
		}
		if err != nil {
			vs.fs.Remove(filename)
		}
	}()
	manifestFile, err = vs.fs.Create(filename)
	if err != nil {
		return err
	}
	manifest = record.NewWriter(manifestFile)

	snapshot := versionEdit{
		comparatorName: vs.ucmp.Name(),
	}
	// TODO: save compaction pointers.
	for level, fileMetadata := range vs.currentVersion().files {
		for _, meta := range fileMetadata {
			snapshot.newFiles = append(snapshot.newFiles, newFileEntry{
				level: level,
				meta:  meta,
			})
		}
	}

	w, err1 := manifest.Next()
	if err1 != nil {
		return err1
	}
	err1 = snapshot.encode(w)
	if err1 != nil {
		return err1
	}

	vs.manifest, manifest = manifest, nil
	vs.manifestFile, manifestFile = manifestFile, nil
	return nil
}
Пример #4
0
// 创建leveldb存储目录时,需要传入db.Options, 那么db.Options有哪些属性呢?
func createDB(dirname string, opts *db.Options) (retErr error) {
	const manifestFileNum = 1
	// versionEdit保存版本之间的差异,oldVersion _+ versionEdit = newVersion
	ve := versionEdit{
		// 原始key比较器的名称
		comparatorName: opts.GetComparer().Name(),
		// ?
		nextFileNumber: manifestFileNum + 1,
	}
	// 先创建manifest文件,往里面写一些元信息,如上面versionEdit的comparatorName、nextFileNumber字段
	manifestFilename := dbFilename(dirname, fileTypeManifest, manifestFileNum)
	f, err := opts.GetFileSystem().Create(manifestFilename)
	if err != nil {
		return fmt.Errorf("leveldb: could not create %q: %v", manifestFilename, err)
	}
	defer func() {
		if retErr != nil {
			opts.GetFileSystem().Remove(manifestFilename)
		}
	}()
	defer f.Close()

	// 生成一个record.Writer对象
	recWriter := record.NewWriter(f)

	// 返回一个record.SingleWriter对象w
	w, err := recWriter.Next()
	if err != nil {
		return err
	}
	// 将ve中的数据写入manifest文件中
	err = ve.encode(w)
	if err != nil {
		return err
	}
	// 加入重启点, 压缩,crc校验?
	err = recWriter.Close()
	if err != nil {
		return err
	}
	// 创建current文件,其中写入了当前的manifest文件名
	return setCurrentFile(dirname, opts.GetFileSystem(), manifestFileNum)
}
Пример #5
0
// makeRoomForWrite ensures that there is room in d.mem for the next write.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) makeRoomForWrite(force bool) error {
	// allowDelay:允许延迟写
	// force = true表示强制立即写?
	allowDelay := !force
	// 无条件循环
	for {
		// TODO: check any previous sticky error, if the paranoid option is set.

		// 若当前版本(即链表versions的最后一个元素)中level0的文件数大于l0SlowdownWritesTrigger(8),则对当前写操作延迟1毫秒
		if allowDelay && len(d.versions.currentVersion().files[0]) > l0SlowdownWritesTrigger {
			// We are getting close to hitting a hard limit on the number of
			// L0 files. Rather than delaying a single write by several
			// seconds when we hit the hard limit, **start delaying each
			// individual write by 1ms to reduce latency variance.**
			d.mu.Unlock()
			time.Sleep(1 * time.Millisecond)
			d.mu.Lock()
			allowDelay = false
			// TODO: how do we ensure we are still 'at the front of the writer queue'?
			continue
		}

		// 若当前memtable中仍有写入的空间,则直接break返回

		// d.mem.ApproximateMemoryUsage()返回len(d.mem.kvData)
		// d.opts.GetWriteBufferSize是memtable的临界大小,也是一个level0的db文件的临界大小
		// 但从 <= 可以看到,这个临界大小是可以被超过的。。。
		if !force && d.mem.ApproximateMemoryUsage() <= d.opts.GetWriteBufferSize() {
			// There is room in the current memtable.
			break
		}

		// 如果当前memtable(即d.mem)已写满,且immutable memtable(d.imm)存在,则需等待,因为说明上一次从imm到level0的compaction操作还没有结束
		// 这也意味着从imm到level0的compaction过程结束时会将d.imm置为nil
		// 而什么时候d.imm会变成非nil呢?见下面,当d.mem满时,会将d.imm指向d.mem,而d.mem则指向一个新申请的内存空间
		// 那么d.imm的存在时间最长为新d.mem从空到被写满的时间
		if d.imm != nil {
			// We have filled up the current memtable, but the previous
			// one is still being compacted, so we wait.
			// d.compactionCond.Wait()类似于在等待一个唤醒的信号
			d.compactionCond.Wait()
			continue
		}

		// 若当前版本(即链表versions的最后一个元素)中level0的文件数大于l0StopWritesTrigger(12),则需等待从level0到level1的compaction过程结束
		if len(d.versions.currentVersion().files[0]) > l0StopWritesTrigger {
			// There are too many level-0 files.
			d.compactionCond.Wait()
			continue
		}

		// Attempt to switch to a new memtable and trigger compaction of old
		// 因为d.mem已经写满,而d.imm也已经被置为nil,那么可以将d.imm指向d.mem,并为d.mem申请一块新的内存空间
		// 从以下代码中可以看到在从d.mem切换到d.imm之前,先创建了一个新的log文件,并打开。这说明一个log文件的生命周期和d.mem是一样的,
		// 即一个log记录的一个d.mem上的增删改操作
		// TODO: drop and re-acquire d.mu around the I/O.

		// 不同的的log文件,通过文件名中不同的数字序号来区分
		newLogNumber := d.versions.nextFileNum()
		newLogFile, err := d.opts.GetFileSystem().Create(dbFilename(d.dirname, fileTypeLog, newLogNumber))
		if err != nil {
			return err
		}
		newLog := record.NewWriter(newLogFile)
		if err := d.log.Close(); err != nil {
			newLogFile.Close()
			return err
		}
		if err := d.logFile.Close(); err != nil {
			newLog.Close()
			newLogFile.Close()
			return err
		}
		// 设置d(DB)的属性
		// 切换到新的日志,新的imm和mem
		d.logNumber, d.logFile, d.log = newLogNumber, newLogFile, newLog
		// memdb.New申请一个新的memtable内存空间
		d.imm, d.mem = d.mem, memdb.New(&d.icmpOpts)
		force = false

		// 由于这时d.imm不为nil,则应该调度一次compaction,将d.imm中的数据写到level0磁盘文件中
		d.maybeScheduleCompaction()
	}
	return nil
}
Пример #6
0
// Open opens a LevelDB whose files live in the given directory.
func Open(dirname string, opts *db.Options) (*DB, error) {
	d := &DB{
		dirname:        dirname,
		opts:           opts,
		icmp:           internalKeyComparer{opts.GetComparer()},
		pendingOutputs: make(map[uint64]struct{}),
	}
	if opts != nil {
		d.icmpOpts = *opts
	}
	d.icmpOpts.Comparer = d.icmp
	tableCacheSize := opts.GetMaxOpenFiles() - numNonTableCacheFiles
	if tableCacheSize < minTableCacheSize {
		tableCacheSize = minTableCacheSize
	}
	// tableCache初始化
	d.tableCache.init(dirname, opts.GetFileSystem(), &d.icmpOpts, tableCacheSize)
	// 初始化一个MemDB
	d.mem = memdb.New(&d.icmpOpts)

	// sync.Cond在Locker的基础上增加的一个消息通知的功能。
	// Cond有三个方法:Wait,Signal,Broadcast。
	// Wait添加一个计数,也就是添加一个阻塞的goroutine。
	// Signal解除一个goroutine的阻塞,计数减一。
	// Broadcast接触所有wait goroutine的阻塞。
	d.compactionCond = sync.Cond{L: &d.mu}
	fs := opts.GetFileSystem()

	d.mu.Lock()
	defer d.mu.Unlock()

	// Lock the database directory.
	// If the directory already exists, MkdirAll does nothing and returns nil.
	// 如果目录已经存在,则MkdirAll啥都不干
	err := fs.MkdirAll(dirname, 0755)
	if err != nil {
		return nil, err
	}
	// 创建LOCK文件,并加文件锁
	fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0))
	if err != nil {
		return nil, err
	}
	defer func() {
		if fileLock != nil {
			fileLock.Close()
		}
	}()

	// 若CURRENT文件不存在,则调用createDB
	if _, err := fs.Stat(dbFilename(dirname, fileTypeCurrent, 0)); os.IsNotExist(err) {
		// Create the DB if it did not already exist.
		if err := createDB(dirname, opts); err != nil {
			return nil, err
		}
	} else if err != nil {
		return nil, fmt.Errorf("leveldb: database %q: %v", dirname, err)
	} else if opts.GetErrorIfDBExists() {
		return nil, fmt.Errorf("leveldb: database %q already exists", dirname)
	}

	// Load the version set.
	// 先读取CURRENT文件内容,获取manifest文件名,然后逐条记录读取manifest文件的内容,根据内容生成一个新version,放入d.versions中
	err = d.versions.load(dirname, opts)
	if err != nil {
		return nil, err
	}

	// Replay any newer log files than the ones named in the manifest.
	var ve versionEdit
	ls, err := fs.List(dirname)
	if err != nil {
		return nil, err
	}
	var logFiles fileNumAndNameSlice
	for _, filename := range ls {
		ft, fn, ok := parseDBFilename(filename)
		if ok && ft == fileTypeLog && (fn >= d.versions.logNumber || fn == d.versions.prevLogNumber) {
			logFiles = append(logFiles, fileNumAndName{fn, filename})
		}
	}
	sort.Sort(logFiles)
	for _, lf := range logFiles {
		// 根据日志文件重做日志中记录的操作,先将这些操作记录存入一个临时的memtable中,然后转存入磁盘上level0存储文件中
		maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name))
		if err != nil {
			return nil, err
		}
		d.versions.markFileNumUsed(lf.num)
		// 设置最新的操作序列号
		if d.versions.lastSequence < maxSeqNum {
			d.versions.lastSequence = maxSeqNum
		}
	}

	// Create an empty .log file.
	// 创建一个新的空log文件
	ve.logNumber = d.versions.nextFileNum()
	d.logNumber = ve.logNumber
	logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber))
	if err != nil {
		return nil, err
	}
	defer func() {
		if logFile != nil {
			logFile.Close()
		}
	}()
	d.log = record.NewWriter(logFile)

	// Write a new manifest to disk.
	// 根据前面重做日志得到的ve的信息创建一个新的manifest文件
	// 并在CURRENT文件中指向这个新manifest文件
	if err := d.versions.logAndApply(dirname, &ve); err != nil {
		return nil, err
	}

	d.deleteObsoleteFiles()
	// 尝试调度compaction
	d.maybeScheduleCompaction()

	d.logFile, logFile = logFile, nil
	d.fileLock, fileLock = fileLock, nil
	return d, nil
}
Пример #7
0
// Open opens a LevelDB whose files live in the given directory.
func Open(dirname string, opts *db.Options) (*DB, error) {
	d := &DB{
		dirname: dirname,
		opts:    opts,
		icmp:    internalKeyComparer{opts.GetComparer()},
	}
	fs := opts.GetFileSystem()

	// Lock the database directory.
	err := fs.MkdirAll(dirname, 0755)
	if err != nil {
		return nil, err
	}
	fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0))
	if err != nil {
		return nil, err
	}
	defer func() {
		if fileLock != nil {
			fileLock.Close()
		}
	}()

	// TODO: add options for CreateIfMissing and ErrorIfExists, and check them here.

	// Load the version set.
	err = d.versions.load(dirname, opts)
	if err != nil {
		return nil, err
	}

	// Replay any newer log files than the ones named in the manifest.
	var ve versionEdit
	ls, err := fs.List(dirname)
	if err != nil {
		return nil, err
	}
	var logFiles fileNumAndNameSlice
	for _, filename := range ls {
		n := logFileNum(filename)
		if n != 0 && (n >= d.versions.logNumber || n == d.versions.prevLogNumber) {
			logFiles = append(logFiles, fileNumAndName{n, filename})
		}
	}
	sort.Sort(logFiles)
	for _, lf := range logFiles {
		maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name))
		if err != nil {
			return nil, err
		}
		d.versions.markFileNumUsed(lf.num)
		if d.versions.lastSequence < maxSeqNum {
			d.versions.lastSequence = maxSeqNum
		}
	}

	// Create an empty .log file.
	ve.logNumber = d.versions.nextFileNum()
	logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber))
	if err != nil {
		return nil, err
	}
	defer func() {
		if logFile != nil {
			logFile.Close()
		}
	}()
	d.log = record.NewWriter(logFile)

	// Write a new manifest to disk.
	if err := d.versions.logAndApply(dirname, &ve); err != nil {
		return nil, err
	}

	// TODO: delete obsolete files.
	// TODO: maybe schedule compaction?

	d.logFile, logFile = logFile, nil
	d.fileLock, fileLock = fileLock, nil
	return d, nil
}
Пример #8
0
// Open opens a LevelDB whose files live in the given directory.
func Open(dirname string, opts *db.Options) (*DB, error) {
	d := &DB{
		dirname:        dirname,
		opts:           opts,
		icmp:           internalKeyComparer{opts.GetComparer()},
		pendingOutputs: make(map[uint64]struct{}),
	}
	if opts != nil {
		d.icmpOpts = *opts
	}
	d.icmpOpts.Comparer = d.icmp
	tableCacheSize := opts.GetMaxOpenFiles() - numNonTableCacheFiles
	if tableCacheSize < minTableCacheSize {
		tableCacheSize = minTableCacheSize
	}
	d.tableCache.init(dirname, opts.GetFileSystem(), &d.icmpOpts, tableCacheSize)
	d.mem = memdb.New(&d.icmpOpts)
	d.compactionCond = sync.Cond{L: &d.mu}
	fs := opts.GetFileSystem()

	d.mu.Lock()
	defer d.mu.Unlock()

	// Lock the database directory.
	err := fs.MkdirAll(dirname, 0755)
	if err != nil {
		return nil, err
	}
	fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0))
	if err != nil {
		return nil, err
	}
	defer func() {
		if fileLock != nil {
			fileLock.Close()
		}
	}()

	if _, err := fs.Stat(dbFilename(dirname, fileTypeCurrent, 0)); os.IsNotExist(err) {
		// Create the DB if it did not already exist.
		if err := createDB(dirname, opts); err != nil {
			return nil, err
		}
	} else if err != nil {
		return nil, fmt.Errorf("leveldb: database %q: %v", dirname, err)
	} else if opts.GetErrorIfDBExists() {
		return nil, fmt.Errorf("leveldb: database %q already exists", dirname)
	}

	// Load the version set.
	err = d.versions.load(dirname, opts)
	if err != nil {
		return nil, err
	}

	// Replay any newer log files than the ones named in the manifest.
	var ve versionEdit
	ls, err := fs.List(dirname)
	if err != nil {
		return nil, err
	}
	var logFiles fileNumAndNameSlice
	for _, filename := range ls {
		ft, fn, ok := parseDBFilename(filename)
		if ok && ft == fileTypeLog && (fn >= d.versions.logNumber || fn == d.versions.prevLogNumber) {
			logFiles = append(logFiles, fileNumAndName{fn, filename})
		}
	}
	sort.Sort(logFiles)
	for _, lf := range logFiles {
		maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name))
		if err != nil {
			return nil, err
		}
		d.versions.markFileNumUsed(lf.num)
		if d.versions.lastSequence < maxSeqNum {
			d.versions.lastSequence = maxSeqNum
		}
	}

	// Create an empty .log file.
	ve.logNumber = d.versions.nextFileNum()
	d.logNumber = ve.logNumber
	logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber))
	if err != nil {
		return nil, err
	}
	defer func() {
		if logFile != nil {
			logFile.Close()
		}
	}()
	d.log = record.NewWriter(logFile)

	// Write a new manifest to disk.
	if err := d.versions.logAndApply(dirname, &ve); err != nil {
		return nil, err
	}

	d.deleteObsoleteFiles()
	d.maybeScheduleCompaction()

	d.logFile, logFile = logFile, nil
	d.fileLock, fileLock = fileLock, nil
	return d, nil
}