Пример #1
0
// makeRoomForWrite ensures that there is room in d.mem for the next write.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) makeRoomForWrite(force bool) error {
	allowDelay := !force
	for {
		// TODO: check any previous sticky error, if the paranoid option is set.

		if allowDelay && len(d.versions.currentVersion().files[0]) > l0SlowdownWritesTrigger {
			// We are getting close to hitting a hard limit on the number of
			// L0 files. Rather than delaying a single write by several
			// seconds when we hit the hard limit, start delaying each
			// individual write by 1ms to reduce latency variance.
			d.mu.Unlock()
			time.Sleep(1 * time.Millisecond)
			d.mu.Lock()
			allowDelay = false
			// TODO: how do we ensure we are still 'at the front of the writer queue'?
			continue
		}

		if !force && d.mem.ApproximateMemoryUsage() <= d.opts.GetWriteBufferSize() {
			// There is room in the current memtable.
			break
		}

		if d.imm != nil {
			// We have filled up the current memtable, but the previous
			// one is still being compacted, so we wait.
			d.compactionCond.Wait()
			continue
		}

		if len(d.versions.currentVersion().files[0]) > l0StopWritesTrigger {
			// There are too many level-0 files.
			d.compactionCond.Wait()
			continue
		}

		// Attempt to switch to a new memtable and trigger compaction of old
		// TODO: drop and re-acquire d.mu around the I/O.
		newLogNumber := d.versions.nextFileNum()
		newLogFile, err := d.opts.GetFileSystem().Create(dbFilename(d.dirname, fileTypeLog, newLogNumber))
		if err != nil {
			return err
		}
		newLog := record.NewWriter(newLogFile)
		if err := d.log.Close(); err != nil {
			newLogFile.Close()
			return err
		}
		if err := d.logFile.Close(); err != nil {
			newLog.Close()
			newLogFile.Close()
			return err
		}
		d.logNumber, d.logFile, d.log = newLogNumber, newLogFile, newLog
		d.imm, d.mem = d.mem, memdb.New(&d.icmpOpts)
		force = false
		d.maybeScheduleCompaction()
	}
	return nil
}
Пример #2
0
func main() {

	db := memdb.New(nil)
	db.Set([]byte("name"), []byte("axgle"), nil)

	p(db.Get([]byte("name"), nil))

}
Пример #3
0
// makeRoomForWrite ensures that there is room in d.mem for the next write.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) makeRoomForWrite(force bool) error {
	// allowDelay:允许延迟写
	// force = true表示强制立即写?
	allowDelay := !force
	// 无条件循环
	for {
		// TODO: check any previous sticky error, if the paranoid option is set.

		// 若当前版本(即链表versions的最后一个元素)中level0的文件数大于l0SlowdownWritesTrigger(8),则对当前写操作延迟1毫秒
		if allowDelay && len(d.versions.currentVersion().files[0]) > l0SlowdownWritesTrigger {
			// We are getting close to hitting a hard limit on the number of
			// L0 files. Rather than delaying a single write by several
			// seconds when we hit the hard limit, **start delaying each
			// individual write by 1ms to reduce latency variance.**
			d.mu.Unlock()
			time.Sleep(1 * time.Millisecond)
			d.mu.Lock()
			allowDelay = false
			// TODO: how do we ensure we are still 'at the front of the writer queue'?
			continue
		}

		// 若当前memtable中仍有写入的空间,则直接break返回

		// d.mem.ApproximateMemoryUsage()返回len(d.mem.kvData)
		// d.opts.GetWriteBufferSize是memtable的临界大小,也是一个level0的db文件的临界大小
		// 但从 <= 可以看到,这个临界大小是可以被超过的。。。
		if !force && d.mem.ApproximateMemoryUsage() <= d.opts.GetWriteBufferSize() {
			// There is room in the current memtable.
			break
		}

		// 如果当前memtable(即d.mem)已写满,且immutable memtable(d.imm)存在,则需等待,因为说明上一次从imm到level0的compaction操作还没有结束
		// 这也意味着从imm到level0的compaction过程结束时会将d.imm置为nil
		// 而什么时候d.imm会变成非nil呢?见下面,当d.mem满时,会将d.imm指向d.mem,而d.mem则指向一个新申请的内存空间
		// 那么d.imm的存在时间最长为新d.mem从空到被写满的时间
		if d.imm != nil {
			// We have filled up the current memtable, but the previous
			// one is still being compacted, so we wait.
			// d.compactionCond.Wait()类似于在等待一个唤醒的信号
			d.compactionCond.Wait()
			continue
		}

		// 若当前版本(即链表versions的最后一个元素)中level0的文件数大于l0StopWritesTrigger(12),则需等待从level0到level1的compaction过程结束
		if len(d.versions.currentVersion().files[0]) > l0StopWritesTrigger {
			// There are too many level-0 files.
			d.compactionCond.Wait()
			continue
		}

		// Attempt to switch to a new memtable and trigger compaction of old
		// 因为d.mem已经写满,而d.imm也已经被置为nil,那么可以将d.imm指向d.mem,并为d.mem申请一块新的内存空间
		// 从以下代码中可以看到在从d.mem切换到d.imm之前,先创建了一个新的log文件,并打开。这说明一个log文件的生命周期和d.mem是一样的,
		// 即一个log记录的一个d.mem上的增删改操作
		// TODO: drop and re-acquire d.mu around the I/O.

		// 不同的的log文件,通过文件名中不同的数字序号来区分
		newLogNumber := d.versions.nextFileNum()
		newLogFile, err := d.opts.GetFileSystem().Create(dbFilename(d.dirname, fileTypeLog, newLogNumber))
		if err != nil {
			return err
		}
		newLog := record.NewWriter(newLogFile)
		if err := d.log.Close(); err != nil {
			newLogFile.Close()
			return err
		}
		if err := d.logFile.Close(); err != nil {
			newLog.Close()
			newLogFile.Close()
			return err
		}
		// 设置d(DB)的属性
		// 切换到新的日志,新的imm和mem
		d.logNumber, d.logFile, d.log = newLogNumber, newLogFile, newLog
		// memdb.New申请一个新的memtable内存空间
		d.imm, d.mem = d.mem, memdb.New(&d.icmpOpts)
		force = false

		// 由于这时d.imm不为nil,则应该调度一次compaction,将d.imm中的数据写到level0磁盘文件中
		d.maybeScheduleCompaction()
	}
	return nil
}
Пример #4
0
// replayLogFile replays the edits in the named log file.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) replayLogFile(ve *versionEdit, fs db.FileSystem, filename string) (maxSeqNum uint64, err error) {
	file, err := fs.Open(filename)
	if err != nil {
		return 0, err
	}
	defer file.Close()

	var (
		mem      *memdb.MemDB
		batchBuf = new(bytes.Buffer)
		ikey     = make(internalKey, 512)
		rr       = record.NewReader(file)
	)
	for {
		r, err := rr.Next()
		if err == io.EOF {
			break
		}
		if err != nil {
			return 0, err
		}
		_, err = io.Copy(batchBuf, r)
		if err != nil {
			return 0, err
		}

		if batchBuf.Len() < batchHeaderLen {
			return 0, fmt.Errorf("leveldb: corrupt log file %q", filename)
		}
		b := Batch{batchBuf.Bytes()}
		seqNum := b.seqNum()
		seqNum1 := seqNum + uint64(b.count())
		if maxSeqNum < seqNum1 {
			maxSeqNum = seqNum1
		}

		if mem == nil {
			mem = memdb.New(&d.icmpOpts)
		}

		t := b.iter()
		for ; seqNum != seqNum1; seqNum++ {
			kind, ukey, value, ok := t.next()
			if !ok {
				return 0, fmt.Errorf("leveldb: corrupt log file %q", filename)
			}
			// Convert seqNum, kind and key into an internalKey, and add that ikey/value
			// pair to mem.
			//
			// TODO: instead of copying to an intermediate buffer (ikey), is it worth
			// adding a SetTwoPartKey(db.TwoPartKey{key0, key1}, value, opts) method to
			// memdb.MemDB? What effect does that have on the db.Comparer interface?
			//
			// The C++ LevelDB code does not need an intermediate copy because its memdb
			// implementation is a private implementation detail, and copies each internal
			// key component from the Batch format straight to the skiplist buffer.
			//
			// Go's LevelDB considers the memdb functionality to be useful in its own
			// right, and so leveldb/memdb is a separate package that is usable without
			// having to import the top-level leveldb package. That extra abstraction
			// means that we need to copy to an intermediate buffer here, to reconstruct
			// the complete internal key to pass to the memdb.
			ikey = makeInternalKey(ikey, ukey, kind, seqNum)
			mem.Set(ikey, value, nil)
		}
		if len(t) != 0 {
			return 0, fmt.Errorf("leveldb: corrupt log file %q", filename)
		}

		// TODO: if mem is large enough, write it to a level-0 table and set mem = nil.

		batchBuf.Reset()
	}

	if mem != nil && !mem.Empty() {
		// 如果初始化后memtable中有数据,则存入磁盘level0层级文件中
		meta, err := d.writeLevel0Table(fs, mem)
		if err != nil {
			return 0, err
		}
		ve.newFiles = append(ve.newFiles, newFileEntry{level: 0, meta: meta})
		// Strictly speaking, it's too early to delete meta.fileNum from d.pendingOutputs,
		// but we are replaying the log file, which happens before Open returns, so there
		// is no possibility of deleteObsoleteFiles being called concurrently here.
		delete(d.pendingOutputs, meta.fileNum)
	}

	return maxSeqNum, nil
}
Пример #5
0
// Open opens a LevelDB whose files live in the given directory.
func Open(dirname string, opts *db.Options) (*DB, error) {
	d := &DB{
		dirname:        dirname,
		opts:           opts,
		icmp:           internalKeyComparer{opts.GetComparer()},
		pendingOutputs: make(map[uint64]struct{}),
	}
	if opts != nil {
		d.icmpOpts = *opts
	}
	d.icmpOpts.Comparer = d.icmp
	tableCacheSize := opts.GetMaxOpenFiles() - numNonTableCacheFiles
	if tableCacheSize < minTableCacheSize {
		tableCacheSize = minTableCacheSize
	}
	// tableCache初始化
	d.tableCache.init(dirname, opts.GetFileSystem(), &d.icmpOpts, tableCacheSize)
	// 初始化一个MemDB
	d.mem = memdb.New(&d.icmpOpts)

	// sync.Cond在Locker的基础上增加的一个消息通知的功能。
	// Cond有三个方法:Wait,Signal,Broadcast。
	// Wait添加一个计数,也就是添加一个阻塞的goroutine。
	// Signal解除一个goroutine的阻塞,计数减一。
	// Broadcast接触所有wait goroutine的阻塞。
	d.compactionCond = sync.Cond{L: &d.mu}
	fs := opts.GetFileSystem()

	d.mu.Lock()
	defer d.mu.Unlock()

	// Lock the database directory.
	// If the directory already exists, MkdirAll does nothing and returns nil.
	// 如果目录已经存在,则MkdirAll啥都不干
	err := fs.MkdirAll(dirname, 0755)
	if err != nil {
		return nil, err
	}
	// 创建LOCK文件,并加文件锁
	fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0))
	if err != nil {
		return nil, err
	}
	defer func() {
		if fileLock != nil {
			fileLock.Close()
		}
	}()

	// 若CURRENT文件不存在,则调用createDB
	if _, err := fs.Stat(dbFilename(dirname, fileTypeCurrent, 0)); os.IsNotExist(err) {
		// Create the DB if it did not already exist.
		if err := createDB(dirname, opts); err != nil {
			return nil, err
		}
	} else if err != nil {
		return nil, fmt.Errorf("leveldb: database %q: %v", dirname, err)
	} else if opts.GetErrorIfDBExists() {
		return nil, fmt.Errorf("leveldb: database %q already exists", dirname)
	}

	// Load the version set.
	// 先读取CURRENT文件内容,获取manifest文件名,然后逐条记录读取manifest文件的内容,根据内容生成一个新version,放入d.versions中
	err = d.versions.load(dirname, opts)
	if err != nil {
		return nil, err
	}

	// Replay any newer log files than the ones named in the manifest.
	var ve versionEdit
	ls, err := fs.List(dirname)
	if err != nil {
		return nil, err
	}
	var logFiles fileNumAndNameSlice
	for _, filename := range ls {
		ft, fn, ok := parseDBFilename(filename)
		if ok && ft == fileTypeLog && (fn >= d.versions.logNumber || fn == d.versions.prevLogNumber) {
			logFiles = append(logFiles, fileNumAndName{fn, filename})
		}
	}
	sort.Sort(logFiles)
	for _, lf := range logFiles {
		// 根据日志文件重做日志中记录的操作,先将这些操作记录存入一个临时的memtable中,然后转存入磁盘上level0存储文件中
		maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name))
		if err != nil {
			return nil, err
		}
		d.versions.markFileNumUsed(lf.num)
		// 设置最新的操作序列号
		if d.versions.lastSequence < maxSeqNum {
			d.versions.lastSequence = maxSeqNum
		}
	}

	// Create an empty .log file.
	// 创建一个新的空log文件
	ve.logNumber = d.versions.nextFileNum()
	d.logNumber = ve.logNumber
	logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber))
	if err != nil {
		return nil, err
	}
	defer func() {
		if logFile != nil {
			logFile.Close()
		}
	}()
	d.log = record.NewWriter(logFile)

	// Write a new manifest to disk.
	// 根据前面重做日志得到的ve的信息创建一个新的manifest文件
	// 并在CURRENT文件中指向这个新manifest文件
	if err := d.versions.logAndApply(dirname, &ve); err != nil {
		return nil, err
	}

	d.deleteObsoleteFiles()
	// 尝试调度compaction
	d.maybeScheduleCompaction()

	d.logFile, logFile = logFile, nil
	d.fileLock, fileLock = fileLock, nil
	return d, nil
}
Пример #6
0
func TestVersion(t *testing.T) {
	// testTable is a table to insert into a version.
	// Each element of data is a string of the form "internalKey value".
	type testTable struct {
		level   int
		fileNum uint64
		data    []string
	}

	testCases := []struct {
		description string
		// badOrdering is whether this test case has a table ordering violation.
		badOrdering bool
		// tables are the tables to populate the version with.
		tables []testTable
		// queries are the queries to run against the version. Each element has
		// the form "internalKey wantedValue". The internalKey is passed to the
		// version.get method, wantedValue may be "ErrNotFound" if the query
		// should return that error.
		queries []string
	}{
		{
			description: "empty: an empty version",
			queries: []string{
				"abc.MAX.101 ErrNotFound",
			},
		},

		{
			description: "single-0: one level-0 table",
			tables: []testTable{
				{
					level:   0,
					fileNum: 10,
					data: []string{
						"the.SET.101 a",
						"cat.SET.102 b",
						"on_.SET.103 c",
						"the.SET.104 d",
						"mat.SET.105 e",
						"the.DEL.106 ",
						"the.SET.107 g",
					},
				},
			},
			queries: []string{
				"aaa.MAX.105 ErrNotFound",
				"cat.MAX.105 b",
				"hat.MAX.105 ErrNotFound",
				"mat.MAX.105 e",
				"the.MAX.108 g",
				"the.MAX.107 g",
				"the.MAX.106 ErrNotFound",
				"the.MAX.105 d",
				"the.MAX.104 d",
				"the.MAX.104 d",
				"the.MAX.103 a",
				"the.MAX.102 a",
				"the.MAX.101 a",
				"the.MAX.100 ErrNotFound",
				"zzz.MAX.105 ErrNotFound",
			},
		},

		{
			description: "triple-0: three level-0 tables",
			tables: []testTable{
				{
					level:   0,
					fileNum: 10,
					data: []string{
						"the.SET.101 a",
						"cat.SET.102 b",
						"on_.SET.103 c",
						"the.SET.104 d",
						"mat.SET.105 e",
						"the.DEL.106 ",
						"the.SET.107 g",
					},
				},
				{
					level:   0,
					fileNum: 11,
					data: []string{
						"awk.SET.111 w",
						"cat.SET.112 x",
						"man.SET.113 y",
						"sed.SET.114 z",
					},
				},
				{
					level:   0,
					fileNum: 12,
					data: []string{
						"the.DEL.121 ",
						"cat.DEL.122 ",
						"man.DEL.123 ",
						"was.SET.124 D",
						"not.SET.125 E",
						"the.SET.126 F",
						"man.SET.127 G",
					},
				},
			},
			queries: []string{
				"aaa.MAX.105 ErrNotFound",
				"awk.MAX.135 w",
				"awk.MAX.125 w",
				"awk.MAX.115 w",
				"awk.MAX.105 ErrNotFound",
				"cat.MAX.135 ErrNotFound",
				"cat.MAX.125 ErrNotFound",
				"cat.MAX.115 x",
				"cat.MAX.105 b",
				"man.MAX.135 G",
				"man.MAX.125 ErrNotFound",
				"man.MAX.115 y",
				"man.MAX.105 ErrNotFound",
				"on_.MAX.135 c",
				"on_.MAX.125 c",
				"on_.MAX.115 c",
				"on_.MAX.105 c",
				"the.MAX.135 F",
				"the.MAX.127 F",
				"the.MAX.126 F",
				"the.MAX.125 ErrNotFound",
				"the.MAX.122 ErrNotFound",
				"the.MAX.121 ErrNotFound",
				"the.MAX.120 g",
				"the.MAX.115 g",
				"the.MAX.114 g",
				"the.MAX.111 g",
				"the.MAX.110 g",
				"the.MAX.108 g",
				"the.MAX.107 g",
				"the.MAX.106 ErrNotFound",
				"the.MAX.105 d",
				"the.MAX.104 d",
				"the.MAX.104 d",
				"the.MAX.103 a",
				"the.MAX.102 a",
				"the.MAX.101 a",
				"the.MAX.100 ErrNotFound",
				"zzz.MAX.105 ErrNotFound",
			},
		},

		{
			description: "quad-4: four level-4 tables",
			tables: []testTable{
				{
					level:   4,
					fileNum: 11,
					data: []string{
						"aardvark.SET.101 a1",
						"alpaca__.SET.201 a2",
						"anteater.SET.301 a3",
					},
				},
				{
					level:   4,
					fileNum: 22,
					data: []string{
						"baboon__.SET.102 b1",
						"baboon__.DEL.202 ",
						"baboon__.SET.302 b3",
						"bear____.SET.402 b4",
						"bear____.DEL.502 ",
						"buffalo_.SET.602 b6",
					},
				},
				{
					level:   4,
					fileNum: 33,
					data: []string{
						"buffalo_.SET.103 B1",
					},
				},
				{
					level:   4,
					fileNum: 44,
					data: []string{
						"chipmunk.SET.104 c1",
						"chipmunk.SET.204 c2",
					},
				},
			},
			queries: []string{
				"a_______.MAX.999 ErrNotFound",
				"aardvark.MAX.999 a1",
				"aardvark.MAX.102 a1",
				"aardvark.MAX.101 a1",
				"aardvark.MAX.100 ErrNotFound",
				"alpaca__.MAX.999 a2",
				"alpaca__.MAX.200 ErrNotFound",
				"anteater.MAX.999 a3",
				"anteater.MAX.302 a3",
				"anteater.MAX.301 a3",
				"anteater.MAX.300 ErrNotFound",
				"anteater.MAX.000 ErrNotFound",
				"b_______.MAX.999 ErrNotFound",
				"baboon__.MAX.999 b3",
				"baboon__.MAX.302 b3",
				"baboon__.MAX.301 ErrNotFound",
				"baboon__.MAX.202 ErrNotFound",
				"baboon__.MAX.201 b1",
				"baboon__.MAX.102 b1",
				"baboon__.MAX.101 ErrNotFound",
				"bear____.MAX.999 ErrNotFound",
				"bear____.MAX.500 b4",
				"bear____.MAX.000 ErrNotFound",
				"buffalo_.MAX.999 b6",
				"buffalo_.MAX.603 b6",
				"buffalo_.MAX.602 b6",
				"buffalo_.MAX.601 B1",
				"buffalo_.MAX.104 B1",
				"buffalo_.MAX.103 B1",
				"buffalo_.MAX.102 ErrNotFound",
				"buffalo_.MAX.000 ErrNotFound",
				"c_______.MAX.999 ErrNotFound",
				"chipmunk.MAX.999 c2",
				"chipmunk.MAX.205 c2",
				"chipmunk.MAX.204 c2",
				"chipmunk.MAX.203 c1",
				"chipmunk.MAX.105 c1",
				"chipmunk.MAX.104 c1",
				"chipmunk.MAX.103 ErrNotFound",
				"chipmunk.MAX.000 ErrNotFound",
				"d_______.MAX.999 ErrNotFound",
			},
		},

		{
			description: "complex: many tables at many levels",
			tables: []testTable{
				{
					level:   0,
					fileNum: 50,
					data: []string{
						"alfalfa__.SET.501 p1",
						"asparagus.SET.502 p2",
						"cabbage__.DEL.503 ",
					},
				},
				{
					level:   0,
					fileNum: 51,
					data: []string{
						"asparagus.SET.511 q1",
						"asparagus.SET.512 q2",
						"asparagus.SET.513 q3",
						"beans____.SET.514 q4",
						"broccoli_.SET.515 q5",
						"cabbage__.SET.516 q6",
						"celery___.SET.517 q7",
					},
				},
				{
					level:   1,
					fileNum: 40,
					data: []string{
						"alfalfa__.SET.410 r1",
						"asparagus.SET.420 r2",
						"arugula__.SET.430 r3",
					},
				},
				{
					level:   1,
					fileNum: 41,
					data: []string{
						"beans____.SET.411 s1",
						"beans____.SET.421 s2",
						"bokchoy__.DEL.431 ",
						"broccoli_.SET.441 s4",
					},
				},
				{
					level:   1,
					fileNum: 42,
					data: []string{
						"cabbage__.SET.412 t1",
						"corn_____.DEL.422 ",
					},
				},
				{
					level:   2,
					fileNum: 30,
					data: []string{
						"alfalfa__.SET.310 u1",
						"bokchoy__.SET.320 u2",
						"celery___.SET.330 u3",
						"corn_____.SET.340 u4",
					},
				},
			},
			queries: []string{
				"a________.MAX.999 ErrNotFound",
				"alfalfa__.MAX.520 p1",
				"alfalfa__.MAX.510 p1",
				"alfalfa__.MAX.500 r1",
				"alfalfa__.MAX.400 u1",
				"alfalfa__.MAX.300 ErrNotFound",
				"asparagus.MAX.520 q3",
				"asparagus.MAX.510 p2",
				"asparagus.MAX.500 r2",
				"asparagus.MAX.400 ErrNotFound",
				"asparagus.MAX.300 ErrNotFound",
				"arugula__.MAX.520 r3",
				"arugula__.MAX.510 r3",
				"arugula__.MAX.500 r3",
				"arugula__.MAX.400 ErrNotFound",
				"arugula__.MAX.300 ErrNotFound",
				"beans____.MAX.520 q4",
				"beans____.MAX.510 s2",
				"beans____.MAX.500 s2",
				"beans____.MAX.400 ErrNotFound",
				"beans____.MAX.300 ErrNotFound",
				"bokchoy__.MAX.520 ErrNotFound",
				"bokchoy__.MAX.510 ErrNotFound",
				"bokchoy__.MAX.500 ErrNotFound",
				"bokchoy__.MAX.400 u2",
				"bokchoy__.MAX.300 ErrNotFound",
				"broccoli_.MAX.520 q5",
				"broccoli_.MAX.510 s4",
				"broccoli_.MAX.500 s4",
				"broccoli_.MAX.400 ErrNotFound",
				"broccoli_.MAX.300 ErrNotFound",
				"cabbage__.MAX.520 q6",
				"cabbage__.MAX.510 ErrNotFound",
				"cabbage__.MAX.500 t1",
				"cabbage__.MAX.400 ErrNotFound",
				"cabbage__.MAX.300 ErrNotFound",
				"celery___.MAX.520 q7",
				"celery___.MAX.510 u3",
				"celery___.MAX.500 u3",
				"celery___.MAX.400 u3",
				"celery___.MAX.300 ErrNotFound",
				"corn_____.MAX.520 ErrNotFound",
				"corn_____.MAX.510 ErrNotFound",
				"corn_____.MAX.500 ErrNotFound",
				"corn_____.MAX.400 u4",
				"corn_____.MAX.300 ErrNotFound",
				"d________.MAX.999 ErrNotFound",
			},
		},

		{
			description: "broken invariants 0: non-increasing level 0 file numbers",
			badOrdering: true,
			tables: []testTable{
				{
					level:   0,
					fileNum: 20,
				},
				{
					level:   0,
					fileNum: 19,
				},
			},
		},

		{
			description: "broken invariants 1: non-increasing level 0 file numbers",
			badOrdering: true,
			tables: []testTable{
				{
					level:   0,
					fileNum: 20,
				},
				{
					level:   0,
					fileNum: 21,
				},
				{
					level:   0,
					fileNum: 21,
				},
				{
					level:   0,
					fileNum: 22,
				},
			},
		},

		{
			description: "broken invariants 2: level non-0 overlapping internal key ranges",
			badOrdering: true,
			tables: []testTable{
				{
					level:   5,
					fileNum: 11,
					data: []string{
						"bat.SET.101 xxx",
						"dog.SET.102 xxx",
					},
				},
				{
					level:   5,
					fileNum: 12,
					data: []string{
						"cow.SET.103 xxx",
						"pig.SET.104 xxx",
					},
				},
			},
		},
	}

	icmp := internalKeyComparer{db.DefaultComparer}
	for _, tc := range testCases {
		desc := tc.description[:strings.Index(tc.description, ":")]

		// m is a map from file numbers to DBs.
		m := map[uint64]db.DB{}
		tiFinder := tableIkeyFinderFunc(func(fileNum uint64, ikey internalKey) (db.Iterator, error) {
			d, ok := m[fileNum]
			if !ok {
				return nil, errors.New("no such file")
			}
			return d.Find(ikey, nil), nil
		})

		v := version{}
		for _, tt := range tc.tables {
			d := memdb.New(&db.Options{
				Comparer: icmp,
			})
			defer d.Close()
			m[tt.fileNum] = d

			var smallest, largest internalKey
			for i, datum := range tt.data {
				s := strings.Split(datum, " ")
				ikey := makeIkey(s[0])
				err := d.Set(ikey, []byte(s[1]), nil)
				if err != nil {
					t.Fatalf("desc=%q: memdb Set: %v", desc, err)
				}

				if i == 0 {
					smallest = ikey
					largest = ikey
				} else {
					if icmp.Compare(ikey, smallest) < 0 {
						smallest = ikey
					}
					if icmp.Compare(ikey, largest) > 0 {
						largest = ikey
					}
				}
			}

			v.files[tt.level] = append(v.files[tt.level], fileMetadata{
				fileNum:  tt.fileNum,
				smallest: smallest,
				largest:  largest,
			})
		}

		err := v.checkOrdering(icmp)
		if tc.badOrdering && err == nil {
			t.Errorf("desc=%q: want bad ordering, got nil error", desc)
			continue
		} else if !tc.badOrdering && err != nil {
			t.Errorf("desc=%q: bad ordering: %v", desc, err)
			continue
		}

		for _, query := range tc.queries {
			s := strings.Split(query, " ")
			value, err := v.get(makeIkey(s[0]), tiFinder, db.DefaultComparer, nil)
			got, want := "", s[1]
			if err != nil {
				if err != db.ErrNotFound {
					t.Errorf("desc=%q: query=%q: %v", desc, s[0], err)
					continue
				}
				got = "ErrNotFound"
			} else {
				got = string(value)
			}
			if got != want {
				t.Errorf("desc=%q: query=%q: got %q, want %q", desc, s[0], got, want)
			}
		}
	}
}
Пример #7
0
func NewStore() Store {
	options := &db.Options{}
	writeOptions := &db.WriteOptions{Sync: false}
	readOptions := &db.ReadOptions{}
	return leveldbStore{MemDB: memdb.New(nil), options: options, writeOptions: writeOptions, readOptions: readOptions}
}
Пример #8
0
// Open opens a LevelDB whose files live in the given directory.
func Open(dirname string, opts *db.Options) (*DB, error) {
	d := &DB{
		dirname:        dirname,
		opts:           opts,
		icmp:           internalKeyComparer{opts.GetComparer()},
		pendingOutputs: make(map[uint64]struct{}),
	}
	if opts != nil {
		d.icmpOpts = *opts
	}
	d.icmpOpts.Comparer = d.icmp
	tableCacheSize := opts.GetMaxOpenFiles() - numNonTableCacheFiles
	if tableCacheSize < minTableCacheSize {
		tableCacheSize = minTableCacheSize
	}
	d.tableCache.init(dirname, opts.GetFileSystem(), &d.icmpOpts, tableCacheSize)
	d.mem = memdb.New(&d.icmpOpts)
	d.compactionCond = sync.Cond{L: &d.mu}
	fs := opts.GetFileSystem()

	d.mu.Lock()
	defer d.mu.Unlock()

	// Lock the database directory.
	err := fs.MkdirAll(dirname, 0755)
	if err != nil {
		return nil, err
	}
	fileLock, err := fs.Lock(dbFilename(dirname, fileTypeLock, 0))
	if err != nil {
		return nil, err
	}
	defer func() {
		if fileLock != nil {
			fileLock.Close()
		}
	}()

	if _, err := fs.Stat(dbFilename(dirname, fileTypeCurrent, 0)); os.IsNotExist(err) {
		// Create the DB if it did not already exist.
		if err := createDB(dirname, opts); err != nil {
			return nil, err
		}
	} else if err != nil {
		return nil, fmt.Errorf("leveldb: database %q: %v", dirname, err)
	} else if opts.GetErrorIfDBExists() {
		return nil, fmt.Errorf("leveldb: database %q already exists", dirname)
	}

	// Load the version set.
	err = d.versions.load(dirname, opts)
	if err != nil {
		return nil, err
	}

	// Replay any newer log files than the ones named in the manifest.
	var ve versionEdit
	ls, err := fs.List(dirname)
	if err != nil {
		return nil, err
	}
	var logFiles fileNumAndNameSlice
	for _, filename := range ls {
		ft, fn, ok := parseDBFilename(filename)
		if ok && ft == fileTypeLog && (fn >= d.versions.logNumber || fn == d.versions.prevLogNumber) {
			logFiles = append(logFiles, fileNumAndName{fn, filename})
		}
	}
	sort.Sort(logFiles)
	for _, lf := range logFiles {
		maxSeqNum, err := d.replayLogFile(&ve, fs, filepath.Join(dirname, lf.name))
		if err != nil {
			return nil, err
		}
		d.versions.markFileNumUsed(lf.num)
		if d.versions.lastSequence < maxSeqNum {
			d.versions.lastSequence = maxSeqNum
		}
	}

	// Create an empty .log file.
	ve.logNumber = d.versions.nextFileNum()
	d.logNumber = ve.logNumber
	logFile, err := fs.Create(dbFilename(dirname, fileTypeLog, ve.logNumber))
	if err != nil {
		return nil, err
	}
	defer func() {
		if logFile != nil {
			logFile.Close()
		}
	}()
	d.log = record.NewWriter(logFile)

	// Write a new manifest to disk.
	if err := d.versions.logAndApply(dirname, &ve); err != nil {
		return nil, err
	}

	d.deleteObsoleteFiles()
	d.maybeScheduleCompaction()

	d.logFile, logFile = logFile, nil
	d.fileLock, fileLock = fileLock, nil
	return d, nil
}