示例#1
0
// New returns a new MemDB.
func New(o *db.Options) *MemDB {
	return &MemDB{
		height: 1,
		cmp:    o.GetComparer(),
		kvData: make([]byte, 0, 4096),
		// The first maxHeight values of nodeData are the next nodes after the
		// head node at each possible height. Their initial value is zeroNode.
		nodeData: make([]int, maxHeight, 256),
	}
}
示例#2
0
func createDB(dirname string, opts *db.Options) (retErr error) {
	const manifestFileNum = 1
	ve := versionEdit{
		comparatorName: opts.GetComparer().Name(),
		nextFileNumber: manifestFileNum + 1,
	}
	manifestFilename := dbFilename(dirname, fileTypeManifest, manifestFileNum)
	f, err := opts.GetFileSystem().Create(manifestFilename)
	if err != nil {
		return fmt.Errorf("leveldb: could not create %q: %v", manifestFilename, err)
	}
	defer func() {
		if retErr != nil {
			opts.GetFileSystem().Remove(manifestFilename)
		}
	}()
	defer f.Close()

	recWriter := record.NewWriter(f)
	w, err := recWriter.Next()
	if err != nil {
		return err
	}
	err = ve.encode(w)
	if err != nil {
		return err
	}
	err = recWriter.Close()
	if err != nil {
		return err
	}
	return setCurrentFile(dirname, opts.GetFileSystem(), manifestFileNum)
}
示例#3
0
// NewWriter returns a new table writer for the file. Closing the writer will
// close the file.
func NewWriter(f db.File, o *db.Options) *Writer {
	w := &Writer{
		closer:               f,
		blockRestartInterval: o.GetBlockRestartInterval(),
		blockSize:            o.GetBlockSize(),
		cmp:                  o.GetComparer(),
		compression:          o.GetCompression(),
		prevKey:              make([]byte, 0, 256),
		restarts:             make([]uint32, 0, 256),
	}
	if f == nil {
		w.err = errors.New("leveldb/table: nil file")
		return w
	}
	// If f does not have a Flush method, do our own buffering.
	type flusher interface {
		Flush() error
	}
	if _, ok := f.(flusher); ok {
		w.writer = f
	} else {
		w.bufWriter = bufio.NewWriter(f)
		w.writer = w.bufWriter
	}
	return w
}
示例#4
0
// NewReader returns a new table reader for the file. Closing the reader will
// close the file.
func NewReader(f db.File, o *db.Options) *Reader {
	r := &Reader{
		file:            f,
		comparer:        o.GetComparer(),
		verifyChecksums: o.GetVerifyChecksums(),
	}
	if f == nil {
		r.err = errors.New("leveldb/table: nil file")
		return r
	}
	stat, err := f.Stat()
	if err != nil {
		r.err = fmt.Errorf("leveldb/table: invalid table (could not stat file): %v", err)
		return r
	}
	var footer [footerLen]byte
	if stat.Size() < int64(len(footer)) {
		r.err = errors.New("leveldb/table: invalid table (file size is too small)")
		return r
	}
	_, err = f.ReadAt(footer[:], stat.Size()-int64(len(footer)))
	if err != nil && err != io.EOF {
		r.err = fmt.Errorf("leveldb/table: invalid table (could not read footer): %v", err)
		return r
	}
	if string(footer[footerLen-len(magic):footerLen]) != magic {
		r.err = errors.New("leveldb/table: invalid table (bad magic number)")
		return r
	}
	// Ignore the metaindex.
	_, n := decodeBlockHandle(footer[:])
	if n == 0 {
		r.err = errors.New("leveldb/table: invalid table (bad metaindex block handle)")
		return r
	}
	// Read the index into memory.
	indexBH, n := decodeBlockHandle(footer[n:])
	if n == 0 {
		r.err = errors.New("leveldb/table: invalid table (bad index block handle)")
		return r
	}
	r.index, r.err = r.readBlock(indexBH)
	return r
}
示例#5
0
// 创建leveldb存储目录时,需要传入db.Options, 那么db.Options有哪些属性呢?
func createDB(dirname string, opts *db.Options) (retErr error) {
	const manifestFileNum = 1
	// versionEdit保存版本之间的差异,oldVersion _+ versionEdit = newVersion
	ve := versionEdit{
		// 原始key比较器的名称
		comparatorName: opts.GetComparer().Name(),
		// ?
		nextFileNumber: manifestFileNum + 1,
	}
	// 先创建manifest文件,往里面写一些元信息,如上面versionEdit的comparatorName、nextFileNumber字段
	manifestFilename := dbFilename(dirname, fileTypeManifest, manifestFileNum)
	f, err := opts.GetFileSystem().Create(manifestFilename)
	if err != nil {
		return fmt.Errorf("leveldb: could not create %q: %v", manifestFilename, err)
	}
	defer func() {
		if retErr != nil {
			opts.GetFileSystem().Remove(manifestFilename)
		}
	}()
	defer f.Close()

	// 生成一个record.Writer对象
	recWriter := record.NewWriter(f)

	// 返回一个record.SingleWriter对象w
	w, err := recWriter.Next()
	if err != nil {
		return err
	}
	// 将ve中的数据写入manifest文件中
	err = ve.encode(w)
	if err != nil {
		return err
	}
	// 加入重启点, 压缩,crc校验?
	err = recWriter.Close()
	if err != nil {
		return err
	}
	// 创建current文件,其中写入了当前的manifest文件名
	return setCurrentFile(dirname, opts.GetFileSystem(), manifestFileNum)
}
示例#6
0
// Open opens a LevelDB whose files live in the given directory.
func Open(dirname string, opts *db.Options) (*DB, error) {
	d := &DB{
		dirname:        dirname,
		opts:           opts,
		icmp:           internalKeyComparer{opts.GetComparer()},
		pendingOutputs: make(map[uint64]struct{}),
	}
	if opts != nil {
		d.icmpOpts = *opts
	}
	d.icmpOpts.Comparer = d.icmp
	tableCacheSize := opts.GetMaxOpenFiles() - numNonTableCacheFiles
	if tableCacheSize < minTableCacheSize {
		tableCacheSize = minTableCacheSize
	}
	// tableCache初始化
	d.tableCache.init(dirname, opts.GetFileSystem(), &d.icmpOpts, tableCacheSize)
	// 初始化一个MemDB
	d.mem = memdb.New(&d.icmpOpts)

	// sync.Cond在Locker的基础上增加的一个消息通知的功能。
	// Cond有三个方法:Wait,Signal,Broadcast。
	// Wait添加一个计数,也就是添加一个阻塞的goroutine。
	// Signal解除一个goroutine的阻塞,计数减一。
	// Broadcast接触所有wait goroutine的阻塞。
	d.compactionCond = sync.Cond{L: &d.mu}
	fs := opts.GetFileSystem()

	d.mu.Lock()
	defer d.mu.Unlock()

	// Lock the database directory.
	// If the directory already exists, MkdirAll does nothing and returns nil.
	// 如果目录已经存在,则MkdirAll啥都不干
	err := fs.MkdirAll(dirname, 0755)
	if err != nil {
		return nil, err
	}
	// 创建LOCK文件,并加文件锁
	fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0))
	if err != nil {
		return nil, err
	}
	defer func() {
		if fileLock != nil {
			fileLock.Close()
		}
	}()

	// 若CURRENT文件不存在,则调用createDB
	if _, err := fs.Stat(dbFilename(dirname, fileTypeCurrent, 0)); os.IsNotExist(err) {
		// Create the DB if it did not already exist.
		if err := createDB(dirname, opts); err != nil {
			return nil, err
		}
	} else if err != nil {
		return nil, fmt.Errorf("leveldb: database %q: %v", dirname, err)
	} else if opts.GetErrorIfDBExists() {
		return nil, fmt.Errorf("leveldb: database %q already exists", dirname)
	}

	// Load the version set.
	// 先读取CURRENT文件内容,获取manifest文件名,然后逐条记录读取manifest文件的内容,根据内容生成一个新version,放入d.versions中
	err = d.versions.load(dirname, opts)
	if err != nil {
		return nil, err
	}

	// Replay any newer log files than the ones named in the manifest.
	var ve versionEdit
	ls, err := fs.List(dirname)
	if err != nil {
		return nil, err
	}
	var logFiles fileNumAndNameSlice
	for _, filename := range ls {
		ft, fn, ok := parseDBFilename(filename)
		if ok && ft == fileTypeLog && (fn >= d.versions.logNumber || fn == d.versions.prevLogNumber) {
			logFiles = append(logFiles, fileNumAndName{fn, filename})
		}
	}
	sort.Sort(logFiles)
	for _, lf := range logFiles {
		// 根据日志文件重做日志中记录的操作,先将这些操作记录存入一个临时的memtable中,然后转存入磁盘上level0存储文件中
		maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name))
		if err != nil {
			return nil, err
		}
		d.versions.markFileNumUsed(lf.num)
		// 设置最新的操作序列号
		if d.versions.lastSequence < maxSeqNum {
			d.versions.lastSequence = maxSeqNum
		}
	}

	// Create an empty .log file.
	// 创建一个新的空log文件
	ve.logNumber = d.versions.nextFileNum()
	d.logNumber = ve.logNumber
	logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber))
	if err != nil {
		return nil, err
	}
	defer func() {
		if logFile != nil {
			logFile.Close()
		}
	}()
	d.log = record.NewWriter(logFile)

	// Write a new manifest to disk.
	// 根据前面重做日志得到的ve的信息创建一个新的manifest文件
	// 并在CURRENT文件中指向这个新manifest文件
	if err := d.versions.logAndApply(dirname, &ve); err != nil {
		return nil, err
	}

	d.deleteObsoleteFiles()
	// 尝试调度compaction
	d.maybeScheduleCompaction()

	d.logFile, logFile = logFile, nil
	d.fileLock, fileLock = fileLock, nil
	return d, nil
}
示例#7
0
// load loads the version set from the manifest file.
func (vs *versionSet) load(dirname string, opts *db.Options) error {
	vs.dirname = dirname
	vs.opts = opts
	vs.fs = opts.GetFileSystem()
	vs.ucmp = opts.GetComparer()
	vs.icmp = internalKeyComparer{vs.ucmp}
	vs.dummyVersion.prev = &vs.dummyVersion
	vs.dummyVersion.next = &vs.dummyVersion
	// For historical reasons, the next file number is initialized to 2.
	vs.nextFileNumber = 2

	// Read the CURRENT file to find the current manifest file.
	current, err := vs.fs.Open(dbFilename(dirname, fileTypeCurrent, 0))
	if err != nil {
		return fmt.Errorf("leveldb: could not open CURRENT file for DB %q: %v", dirname, err)
	}
	defer current.Close()
	stat, err := current.Stat()
	if err != nil {
		return err
	}
	n := stat.Size()
	if n == 0 {
		return fmt.Errorf("leveldb: CURRENT file for DB %q is empty", dirname)
	}
	if n > 4096 {
		return fmt.Errorf("leveldb: CURRENT file for DB %q is too large", dirname)
	}
	b := make([]byte, n)
	_, err = current.ReadAt(b, 0)
	if err != nil {
		return err
	}
	if b[n-1] != '\n' {
		return fmt.Errorf("leveldb: CURRENT file for DB %q is malformed", dirname)
	}
	b = b[:n-1]

	// Read the versionEdits in the manifest file.
	var bve bulkVersionEdit
	manifest, err := vs.fs.Open(dirname + string(os.PathSeparator) + string(b))
	if err != nil {
		return fmt.Errorf("leveldb: could not open manifest file %q for DB %q: %v", b, dirname, err)
	}
	defer manifest.Close()
	rr := record.NewReader(manifest)
	for {
		r, err := rr.Next()
		if err == io.EOF {
			break
		}
		if err != nil {
			return err
		}
		var ve versionEdit
		err = ve.decode(r)
		if err != nil {
			return err
		}
		if ve.comparatorName != "" {
			if ve.comparatorName != vs.ucmp.Name() {
				return fmt.Errorf("leveldb: manifest file %q for DB %q: "+
					"comparer name from file %q != comparer name from db.Options %q",
					b, dirname, ve.comparatorName, vs.ucmp.Name())
			}
		}
		bve.accumulate(&ve)
		if ve.logNumber != 0 {
			vs.logNumber = ve.logNumber
		}
		if ve.prevLogNumber != 0 {
			vs.prevLogNumber = ve.prevLogNumber
		}
		if ve.nextFileNumber != 0 {
			vs.nextFileNumber = ve.nextFileNumber
		}
		if ve.lastSequence != 0 {
			vs.lastSequence = ve.lastSequence
		}
	}
	if vs.logNumber == 0 || vs.nextFileNumber == 0 {
		if vs.nextFileNumber == 2 {
			// We have a freshly created DB.
		} else {
			return fmt.Errorf("leveldb: incomplete manifest file %q for DB %q", b, dirname)
		}
	}
	vs.markFileNumUsed(vs.logNumber)
	vs.markFileNumUsed(vs.prevLogNumber)
	vs.manifestFileNumber = vs.nextFileNum()

	newVersion, err := bve.apply(nil, vs.icmp)
	if err != nil {
		return err
	}
	vs.append(newVersion)
	return nil
}
示例#8
0
// Open opens a LevelDB whose files live in the given directory.
func Open(dirname string, opts *db.Options) (*DB, error) {
	d := &DB{
		dirname: dirname,
		opts:    opts,
		icmp:    internalKeyComparer{opts.GetComparer()},
	}
	fs := opts.GetFileSystem()

	// Lock the database directory.
	err := fs.MkdirAll(dirname, 0755)
	if err != nil {
		return nil, err
	}
	fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0))
	if err != nil {
		return nil, err
	}
	defer func() {
		if fileLock != nil {
			fileLock.Close()
		}
	}()

	// TODO: add options for CreateIfMissing and ErrorIfExists, and check them here.

	// Load the version set.
	err = d.versions.load(dirname, opts)
	if err != nil {
		return nil, err
	}

	// Replay any newer log files than the ones named in the manifest.
	var ve versionEdit
	ls, err := fs.List(dirname)
	if err != nil {
		return nil, err
	}
	var logFiles fileNumAndNameSlice
	for _, filename := range ls {
		n := logFileNum(filename)
		if n != 0 && (n >= d.versions.logNumber || n == d.versions.prevLogNumber) {
			logFiles = append(logFiles, fileNumAndName{n, filename})
		}
	}
	sort.Sort(logFiles)
	for _, lf := range logFiles {
		maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name))
		if err != nil {
			return nil, err
		}
		d.versions.markFileNumUsed(lf.num)
		if d.versions.lastSequence < maxSeqNum {
			d.versions.lastSequence = maxSeqNum
		}
	}

	// Create an empty .log file.
	ve.logNumber = d.versions.nextFileNum()
	logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber))
	if err != nil {
		return nil, err
	}
	defer func() {
		if logFile != nil {
			logFile.Close()
		}
	}()
	d.log = record.NewWriter(logFile)

	// Write a new manifest to disk.
	if err := d.versions.logAndApply(dirname, &ve); err != nil {
		return nil, err
	}

	// TODO: delete obsolete files.
	// TODO: maybe schedule compaction?

	d.logFile, logFile = logFile, nil
	d.fileLock, fileLock = fileLock, nil
	return d, nil
}
// load loads the version set from the manifest file.
// 先读取CURRENT文件内容,获取manifest文件名,
func (vs *versionSet) load(dirname string, opts *db.Options) error {
	vs.dirname = dirname
	vs.opts = opts
	vs.fs = opts.GetFileSystem()
	vs.ucmp = opts.GetComparer()
	vs.icmp = internalKeyComparer{vs.ucmp}
	vs.dummyVersion.prev = &vs.dummyVersion
	vs.dummyVersion.next = &vs.dummyVersion
	// For historical reasons, the next file number is initialized to 2.
	vs.nextFileNumber = 2

	// Read the CURRENT file to find the current manifest file.
	current, err := vs.fs.Open(dbFilename(dirname, fileTypeCurrent, 0))
	if err != nil {
		return fmt.Errorf("leveldb: could not open CURRENT file for DB %q: %v", dirname, err)
	}
	defer current.Close()
	stat, err := current.Stat()
	if err != nil {
		return err
	}
	// 文件大小
	n := stat.Size()
	if n == 0 {
		return fmt.Errorf("leveldb: CURRENT file for DB %q is empty", dirname)
	}
	if n > 4096 {
		return fmt.Errorf("leveldb: CURRENT file for DB %q is too large", dirname)
	}
	b := make([]byte, n)
	// 将文件数据读取到b中
	_, err = current.ReadAt(b, 0)
	if err != nil {
		return err
	}
	if b[n-1] != '\n' {
		return fmt.Errorf("leveldb: CURRENT file for DB %q is malformed", dirname)
	}
	// 去除掉最后的换行符
	// b即为CURRENT所指向的manifest文件的文件名
	b = b[:n-1]

	// Read the versionEdits in the manifest file.
	var bve bulkVersionEdit
	manifest, err := vs.fs.Open(dirname + string(os.PathSeparator) + string(b))
	if err != nil {
		return fmt.Errorf("leveldb: could not open manifest file %q for DB %q: %v", b, dirname, err)
	}
	defer manifest.Close()
	// 生成一个Reader对象
	rr := record.NewReader(manifest)
	for {
		// Next returns a reader for the next record.
		r, err := rr.Next()
		if err == io.EOF {
			break
		}
		if err != nil {
			return err
		}
		var ve versionEdit
		// 将记录读入ve
		err = ve.decode(r)
		if err != nil {
			return err
		}
		if ve.comparatorName != "" {
			// 读和写的key比较方法必须一致
			if ve.comparatorName != vs.ucmp.Name() {
				return fmt.Errorf("leveldb: manifest file %q for DB %q: "+
					"comparer name from file %q != comparer name from db.Options %q",
					b, dirname, ve.comparatorName, vs.ucmp.Name())
			}
		}
		// bve.
		bve.accumulate(&ve)
		if ve.logNumber != 0 {
			vs.logNumber = ve.logNumber
		}
		if ve.prevLogNumber != 0 {
			vs.prevLogNumber = ve.prevLogNumber
		}
		if ve.nextFileNumber != 0 {
			vs.nextFileNumber = ve.nextFileNumber
		}
		if ve.lastSequence != 0 {
			vs.lastSequence = ve.lastSequence
		}
	}
	if vs.logNumber == 0 || vs.nextFileNumber == 0 {
		if vs.nextFileNumber == 2 {
			// We have a freshly created DB.
		} else {
			return fmt.Errorf("leveldb: incomplete manifest file %q for DB %q", b, dirname)
		}
	}
	// 生成新版本version之前的准备工作
	vs.markFileNumUsed(vs.logNumber)
	vs.markFileNumUsed(vs.prevLogNumber)
	vs.manifestFileNumber = vs.nextFileNum()

	// 根据bve的added和deleted字段,生成一个新的version(newVersion)
	// 并根据level0的文件数及更大level的文件所占磁盘空间大小来计算newVersion的compactionScore和compactionLevel
	// 这个compactionScore和compactionLevel是判断是否需要compaction的条件
	newVersion, err := bve.apply(nil, vs.icmp)
	if err != nil {
		return err
	}
	vs.append(newVersion)
	return nil
}
示例#10
0
// Open opens a LevelDB whose files live in the given directory.
func Open(dirname string, opts *db.Options) (*DB, error) {
	d := &DB{
		dirname:        dirname,
		opts:           opts,
		icmp:           internalKeyComparer{opts.GetComparer()},
		pendingOutputs: make(map[uint64]struct{}),
	}
	if opts != nil {
		d.icmpOpts = *opts
	}
	d.icmpOpts.Comparer = d.icmp
	tableCacheSize := opts.GetMaxOpenFiles() - numNonTableCacheFiles
	if tableCacheSize < minTableCacheSize {
		tableCacheSize = minTableCacheSize
	}
	d.tableCache.init(dirname, opts.GetFileSystem(), &d.icmpOpts, tableCacheSize)
	d.mem = memdb.New(&d.icmpOpts)
	d.compactionCond = sync.Cond{L: &d.mu}
	fs := opts.GetFileSystem()

	d.mu.Lock()
	defer d.mu.Unlock()

	// Lock the database directory.
	err := fs.MkdirAll(dirname, 0755)
	if err != nil {
		return nil, err
	}
	fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0))
	if err != nil {
		return nil, err
	}
	defer func() {
		if fileLock != nil {
			fileLock.Close()
		}
	}()

	if _, err := fs.Stat(dbFilename(dirname, fileTypeCurrent, 0)); os.IsNotExist(err) {
		// Create the DB if it did not already exist.
		if err := createDB(dirname, opts); err != nil {
			return nil, err
		}
	} else if err != nil {
		return nil, fmt.Errorf("leveldb: database %q: %v", dirname, err)
	} else if opts.GetErrorIfDBExists() {
		return nil, fmt.Errorf("leveldb: database %q already exists", dirname)
	}

	// Load the version set.
	err = d.versions.load(dirname, opts)
	if err != nil {
		return nil, err
	}

	// Replay any newer log files than the ones named in the manifest.
	var ve versionEdit
	ls, err := fs.List(dirname)
	if err != nil {
		return nil, err
	}
	var logFiles fileNumAndNameSlice
	for _, filename := range ls {
		ft, fn, ok := parseDBFilename(filename)
		if ok && ft == fileTypeLog && (fn >= d.versions.logNumber || fn == d.versions.prevLogNumber) {
			logFiles = append(logFiles, fileNumAndName{fn, filename})
		}
	}
	sort.Sort(logFiles)
	for _, lf := range logFiles {
		maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name))
		if err != nil {
			return nil, err
		}
		d.versions.markFileNumUsed(lf.num)
		if d.versions.lastSequence < maxSeqNum {
			d.versions.lastSequence = maxSeqNum
		}
	}

	// Create an empty .log file.
	ve.logNumber = d.versions.nextFileNum()
	d.logNumber = ve.logNumber
	logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber))
	if err != nil {
		return nil, err
	}
	defer func() {
		if logFile != nil {
			logFile.Close()
		}
	}()
	d.log = record.NewWriter(logFile)

	// Write a new manifest to disk.
	if err := d.versions.logAndApply(dirname, &ve); err != nil {
		return nil, err
	}

	d.deleteObsoleteFiles()
	d.maybeScheduleCompaction()

	d.logFile, logFile = logFile, nil
	d.fileLock, fileLock = fileLock, nil
	return d, nil
}