// writeLevel0Table writes a memtable to a level-0 on-disk table.
//
// If no error is returned, it adds the file number of that on-disk table to
// d.pendingOutputs. It is the caller's responsibility to remove that fileNum
// from that set when it has been applied to d.versions.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) {
	// meta用于记录新创建的level0 db文件的元信息
	meta.fileNum = d.versions.nextFileNum()
	// filename:新db文件的文件名
	filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum)

	d.pendingOutputs[meta.fileNum] = struct{}{}
	defer func(fileNum uint64) {
		// 如果异常退出(err不为nil),则从d.pendingOutputs中删除新db文件的记录
		// 否则d.pendingOutputs是用来干什么的呢?
		if err != nil {
			delete(d.pendingOutputs, fileNum)
		}
	}(meta.fileNum)

	// Release the d.mu lock while doing I/O.
	// Note the unusual order: Unlock and then Lock.
	d.mu.Unlock()
	defer d.mu.Lock()

	var (
		file db.File
		tw   *table.Writer
		iter db.Iterator
	)
	defer func() {
		if iter != nil {
			err = firstError(err, iter.Close())
		}
		if tw != nil {
			err = firstError(err, tw.Close())
		}
		if file != nil {
			err = firstError(err, file.Close())
		}
		if err != nil {
			fs.Remove(filename)
			meta = fileMetadata{}
		}
	}()

	file, err = fs.Create(filename)
	if err != nil {
		return fileMetadata{}, err
	}

	// table为磁盘db文件封装写入方式
	tw = table.NewWriter(file, &db.Options{
		Comparer: d.icmp,
	})

	// Find返回一个迭代器,用于遍历mem(这里即是d.imm)中的数据
	// memtable是以skiplist来组织数据的,有序
	// 所以取到的第一个数据的key即为当前imm中最小的key
	iter = mem.Find(nil, nil)
	iter.Next()
	// meta.smallest记录新db文件中最小的内部key,在写入memtable时就已经将用户key封装成了内部key,那么为什么还要使用internalKey来做类型转换?
	// 下面的meta.largest就没有进行类型转换就调用clone()方法了。
	meta.smallest = internalKey(iter.Key()).clone()
	for {
		// 最后一次循环中的key即为最大的key,但为什么不封装成内部key呢?
		meta.largest = iter.Key()
		// 将key、value写到新db文件中
		if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil {
			return fileMetadata{}, err1
		}
		// 如果imm中的数据已经遍历完,全部存入新db文件,则break
		if !iter.Next() {
			break
		}
	}
	meta.largest = meta.largest.clone()

	if err1 := iter.Close(); err1 != nil {
		iter = nil
		return fileMetadata{}, err1
	}
	iter = nil

	if err1 := tw.Close(); err1 != nil {
		tw = nil
		return fileMetadata{}, err1
	}
	tw = nil

	// TODO: currently, closing a table.Writer closes its underlying file.
	// We have to re-open the file to Sync or Stat it, which seems stupid.
	file, err = fs.Open(filename)
	if err != nil {
		return fileMetadata{}, err
	}

	if err1 := file.Sync(); err1 != nil {
		return fileMetadata{}, err1
	}

	if stat, err1 := file.Stat(); err1 != nil {
		return fileMetadata{}, err1
	} else {
		size := stat.Size()
		if size < 0 {
			return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size)
		}
		// 将文件的大小值存入meta.size
		meta.size = uint64(size)
	}

	// TODO: compaction stats.

	/* 此时,meta的四个成员:
		- filenum
		- smallest
		- largest
		- size
	  已经全部填上了
	*/
	return meta, nil
}
Beispiel #2
0
func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) {
	meta.fileNum = d.versions.nextFileNum()
	filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum)
	// TODO: add meta.fileNum to a set of 'pending outputs' so that a
	// concurrent sweep of obsolete db files won't delete the fileNum file.
	// It is the caller's responsibility to remove that fileNum from the
	// set of pending outputs.

	var (
		file db.File
		tw   *table.Writer
		iter db.Iterator
	)
	defer func() {
		if iter != nil {
			err = firstError(err, iter.Close())
		}
		if tw != nil {
			err = firstError(err, tw.Close())
		}
		if file != nil {
			err = firstError(err, file.Close())
		}
		if err != nil {
			fs.Remove(filename)
			meta = fileMetadata{}
		}
	}()

	file, err = fs.Create(filename)
	if err != nil {
		return fileMetadata{}, err
	}
	tw = table.NewWriter(file, &db.Options{
		Comparer: d.icmp,
	})

	iter = mem.Find(nil, nil)
	iter.Next()
	meta.smallest = internalKey(iter.Key()).clone()
	for {
		meta.largest = iter.Key()
		if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil {
			return fileMetadata{}, err1
		}
		if !iter.Next() {
			break
		}
	}
	meta.largest = meta.largest.clone()

	if err1 := iter.Close(); err1 != nil {
		iter = nil
		return fileMetadata{}, err1
	}
	iter = nil

	if err1 := tw.Close(); err1 != nil {
		tw = nil
		return fileMetadata{}, err1
	}
	tw = nil

	// TODO: currently, closing a table.Writer closes its underlying file.
	// We have to re-open the file to Sync or Stat it, which seems stupid.
	file, err = fs.Open(filename)
	if err != nil {
		return fileMetadata{}, err
	}

	if err1 := file.Sync(); err1 != nil {
		return fileMetadata{}, err1
	}

	if stat, err1 := file.Stat(); err1 != nil {
		return fileMetadata{}, err1
	} else {
		size := stat.Size()
		if size < 0 {
			return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size)
		}
		meta.size = uint64(size)
	}

	// TODO: compaction stats.

	return meta, nil
}
// internalGet looks up the first key/value pair whose (internal) key is >=
// ikey, according to the internal key ordering, and also returns whether or
// not that search was conclusive.
//
// If there is no such pair, or that pair's key and ikey do not share the same
// user key (according to ucmp), then conclusive will be false. Otherwise,
// conclusive will be true and:
//    * if that pair's key's kind is set, that pair's value will be returned,
//    * if that pair's key's kind is delete, db.ErrNotFound will be returned.
// If the returned error is non-nil then conclusive will be true.
func internalGet(t db.Iterator, ucmp db.Comparer, ukey []byte) (value []byte, conclusive bool, err error) {
	if !t.Next() {
		err = t.Close()
		return nil, err != nil, err
	}
	ikey0 := internalKey(t.Key())
	if !ikey0.valid() {
		t.Close()
		return nil, true, fmt.Errorf("leveldb: corrupt table: invalid internal key")
	}
	if ucmp.Compare(ukey, ikey0.ukey()) != 0 {
		err = t.Close()
		// 在memtable中没有找到,因为用户key不匹配
		// 之后再从磁盘db文件中查找
		return nil, err != nil, err
	}
	if ikey0.kind() == internalKeyKindDelete {
		t.Close()
		// 该key对应的数据项已经被删除了,所以也没找到
		return nil, true, db.ErrNotFound
	}
	// 找到,返回value
	return t.Value(), true, t.Close()
}
Beispiel #4
0
// internalGet looks up the first key/value pair whose (internal) key is >=
// ikey, according to the internal key ordering, and also returns whether or
// not that search was conclusive.
//
// If there is no such pair, or that pair's key and ikey do not share the same
// user key (according to ucmp), then conclusive will be false. Otherwise,
// conclusive will be true and:
//	* if that pair's key's kind is set, that pair's value will be returned,
//	* if that pair's key's kind is delete, db.ErrNotFound will be returned.
// If the returned error is non-nil then conclusive will be true.
func internalGet(t db.Iterator, ucmp db.Comparer, ukey []byte) (value []byte, conclusive bool, err error) {
	if !t.Next() {
		err = t.Close()
		return nil, err != nil, err
	}
	ikey0 := internalKey(t.Key())
	if !ikey0.valid() {
		t.Close()
		return nil, true, fmt.Errorf("leveldb: corrupt table: invalid internal key")
	}
	if ucmp.Compare(ukey, ikey0.ukey()) != 0 {
		err = t.Close()
		return nil, err != nil, err
	}
	if ikey0.kind() == internalKeyKindDelete {
		t.Close()
		return nil, true, db.ErrNotFound
	}
	return t.Value(), true, t.Close()
}
Beispiel #5
0
// writeLevel0Table writes a memtable to a level-0 on-disk table.
//
// If no error is returned, it adds the file number of that on-disk table to
// d.pendingOutputs. It is the caller's responsibility to remove that fileNum
// from that set when it has been applied to d.versions.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) {
	meta.fileNum = d.versions.nextFileNum()
	filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum)
	d.pendingOutputs[meta.fileNum] = struct{}{}
	defer func(fileNum uint64) {
		if err != nil {
			delete(d.pendingOutputs, fileNum)
		}
	}(meta.fileNum)

	// Release the d.mu lock while doing I/O.
	// Note the unusual order: Unlock and then Lock.
	d.mu.Unlock()
	defer d.mu.Lock()

	var (
		file db.File
		tw   *table.Writer
		iter db.Iterator
	)
	defer func() {
		if iter != nil {
			err = firstError(err, iter.Close())
		}
		if tw != nil {
			err = firstError(err, tw.Close())
		}
		if file != nil {
			err = firstError(err, file.Close())
		}
		if err != nil {
			fs.Remove(filename)
			meta = fileMetadata{}
		}
	}()

	file, err = fs.Create(filename)
	if err != nil {
		return fileMetadata{}, err
	}
	tw = table.NewWriter(file, &db.Options{
		Comparer: d.icmp,
	})

	iter = mem.Find(nil, nil)
	iter.Next()
	meta.smallest = internalKey(iter.Key()).clone()
	for {
		meta.largest = iter.Key()
		if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil {
			return fileMetadata{}, err1
		}
		if !iter.Next() {
			break
		}
	}
	meta.largest = meta.largest.clone()

	if err1 := iter.Close(); err1 != nil {
		iter = nil
		return fileMetadata{}, err1
	}
	iter = nil

	if err1 := tw.Close(); err1 != nil {
		tw = nil
		return fileMetadata{}, err1
	}
	tw = nil

	// TODO: currently, closing a table.Writer closes its underlying file.
	// We have to re-open the file to Sync or Stat it, which seems stupid.
	file, err = fs.Open(filename)
	if err != nil {
		return fileMetadata{}, err
	}

	if err1 := file.Sync(); err1 != nil {
		return fileMetadata{}, err1
	}

	if stat, err1 := file.Stat(); err1 != nil {
		return fileMetadata{}, err1
	} else {
		size := stat.Size()
		if size < 0 {
			return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size)
		}
		meta.size = uint64(size)
	}

	// TODO: compaction stats.

	return meta, nil
}