Exemplo n.º 1
0
func newTableCache() (*tableCache, *tableCacheTestFS, error) {
	xxx := bytes.Repeat([]byte("x"), tableCacheTestNumTables)
	fs := &tableCacheTestFS{
		FileSystem: memfs.New(),
	}
	for i := 0; i < tableCacheTestNumTables; i++ {
		f, err := fs.Create(dbFilename("", fileTypeTable, uint64(i)))
		if err != nil {
			return nil, nil, fmt.Errorf("fs.Create: %v", err)
		}
		tw := table.NewWriter(f, &db.Options{
			Comparer: internalKeyComparer{userCmp: db.DefaultComparer},
		})
		if err := tw.Set(makeIkey(fmt.Sprintf("k.SET.%d", i)), xxx[:i], nil); err != nil {
			return nil, nil, fmt.Errorf("tw.Set: %v", err)
		}
		if err := tw.Close(); err != nil {
			return nil, nil, fmt.Errorf("tw.Close: %v", err)
		}
	}

	fs.mu.Lock()
	fs.openCounts = map[string]int{}
	fs.closeCounts = map[string]int{}
	fs.mu.Unlock()

	c := &tableCache{}
	c.init("", fs, nil, tableCacheTestCacheSize)
	return c, fs, nil
}
Exemplo n.º 2
0
// writeLevel0Table writes a memtable to a level-0 on-disk table.
//
// If no error is returned, it adds the file number of that on-disk table to
// d.pendingOutputs. It is the caller's responsibility to remove that fileNum
// from that set when it has been applied to d.versions.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) writeLevel0Table(fs db.FileSystem, mem *memdb.MemDB) (meta fileMetadata, err error) {
	meta.fileNum = d.versions.nextFileNum()
	filename := dbFilename(d.dirname, fileTypeTable, meta.fileNum)
	d.pendingOutputs[meta.fileNum] = struct{}{}
	defer func(fileNum uint64) {
		if err != nil {
			delete(d.pendingOutputs, fileNum)
		}
	}(meta.fileNum)

	// Release the d.mu lock while doing I/O.
	// Note the unusual order: Unlock and then Lock.
	d.mu.Unlock()
	defer d.mu.Lock()

	var (
		file db.File
		tw   *table.Writer
		iter db.Iterator
	)
	defer func() {
		if iter != nil {
			err = firstError(err, iter.Close())
		}
		if tw != nil {
			err = firstError(err, tw.Close())
		}
		if file != nil {
			err = firstError(err, file.Close())
		}
		if err != nil {
			fs.Remove(filename)
			meta = fileMetadata{}
		}
	}()

	file, err = fs.Create(filename)
	if err != nil {
		return fileMetadata{}, err
	}
	tw = table.NewWriter(file, &db.Options{
		Comparer: d.icmp,
	})

	iter = mem.Find(nil, nil)
	iter.Next()
	meta.smallest = internalKey(iter.Key()).clone()
	for {
		meta.largest = iter.Key()
		if err1 := tw.Set(meta.largest, iter.Value(), nil); err1 != nil {
			return fileMetadata{}, err1
		}
		if !iter.Next() {
			break
		}
	}
	meta.largest = meta.largest.clone()

	if err1 := iter.Close(); err1 != nil {
		iter = nil
		return fileMetadata{}, err1
	}
	iter = nil

	if err1 := tw.Close(); err1 != nil {
		tw = nil
		return fileMetadata{}, err1
	}
	tw = nil

	// TODO: currently, closing a table.Writer closes its underlying file.
	// We have to re-open the file to Sync or Stat it, which seems stupid.
	file, err = fs.Open(filename)
	if err != nil {
		return fileMetadata{}, err
	}

	if err1 := file.Sync(); err1 != nil {
		return fileMetadata{}, err1
	}

	if stat, err1 := file.Stat(); err1 != nil {
		return fileMetadata{}, err1
	} else {
		size := stat.Size()
		if size < 0 {
			return fileMetadata{}, fmt.Errorf("leveldb: table file %q has negative size %d", filename, size)
		}
		meta.size = uint64(size)
	}

	// TODO: compaction stats.

	return meta, nil
}
Exemplo n.º 3
0
// compactDiskTables runs a compaction that produces new on-disk tables from
// old on-disk tables.
//
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) compactDiskTables(c *compaction) (ve *versionEdit, pendingOutputs []uint64, retErr error) {
	defer func() {
		if retErr != nil {
			for _, fileNum := range pendingOutputs {
				delete(d.pendingOutputs, fileNum)
			}
			pendingOutputs = nil
		}
	}()

	// TODO: track snapshots.
	smallestSnapshot := d.versions.lastSequence

	// Release the d.mu lock while doing I/O.
	// Note the unusual order: Unlock and then Lock.
	d.mu.Unlock()
	defer d.mu.Lock()

	iter, err := compactionIterator(&d.tableCache, d.icmp, c)
	if err != nil {
		return nil, pendingOutputs, err
	}

	// TODO: output to more than one table, if it would otherwise be too large.
	var (
		fileNum  uint64
		filename string
		tw       *table.Writer
	)
	defer func() {
		if iter != nil {
			retErr = firstError(retErr, iter.Close())
		}
		if tw != nil {
			retErr = firstError(retErr, tw.Close())
		}
		if retErr != nil {
			d.opts.GetFileSystem().Remove(filename)
		}
	}()

	currentUkey := make([]byte, 0, 4096)
	hasCurrentUkey := false
	lastSeqNumForKey := internalKeySeqNumMax
	smallest, largest := internalKey(nil), internalKey(nil)
	for iter.Next() {
		// TODO: prioritize compacting d.imm.

		// TODO: support c.shouldStopBefore.

		ikey := internalKey(iter.Key())
		if !ikey.valid() {
			// Do not hide invalid keys.
			currentUkey = currentUkey[:0]
			hasCurrentUkey = false
			lastSeqNumForKey = internalKeySeqNumMax

		} else {
			ukey := ikey.ukey()
			if !hasCurrentUkey || d.icmp.userCmp.Compare(currentUkey, ukey) != 0 {
				// This is the first occurrence of this user key.
				currentUkey = append(currentUkey[:0], ukey...)
				hasCurrentUkey = true
				lastSeqNumForKey = internalKeySeqNumMax
			}

			drop, ikeySeqNum := false, ikey.seqNum()
			if lastSeqNumForKey <= smallestSnapshot {
				drop = true // Rule (A) referenced below.

			} else if ikey.kind() == internalKeyKindDelete &&
				ikeySeqNum <= smallestSnapshot &&
				c.isBaseLevelForUkey(d.icmp.userCmp, ukey) {

				// For this user key:
				// (1) there is no data in higher levels
				// (2) data in lower levels will have larger sequence numbers
				// (3) data in layers that are being compacted here and have
				//     smaller sequence numbers will be dropped in the next
				//     few iterations of this loop (by rule (A) above).
				// Therefore this deletion marker is obsolete and can be dropped.
				drop = true
			}

			lastSeqNumForKey = ikeySeqNum
			if drop {
				continue
			}
		}

		if tw == nil {
			d.mu.Lock()
			fileNum = d.versions.nextFileNum()
			d.pendingOutputs[fileNum] = struct{}{}
			pendingOutputs = append(pendingOutputs, fileNum)
			d.mu.Unlock()

			filename = dbFilename(d.dirname, fileTypeTable, fileNum)
			file, err := d.opts.GetFileSystem().Create(filename)
			if err != nil {
				return nil, pendingOutputs, err
			}
			tw = table.NewWriter(file, &d.icmpOpts)

			smallest = make(internalKey, len(ikey))
			copy(smallest, ikey)
			largest = make(internalKey, 0, 2*len(ikey))
		}
		largest = append(largest[:0], ikey...)
		if err := tw.Set(ikey, iter.Value(), nil); err != nil {
			return nil, pendingOutputs, err
		}
	}

	ve = &versionEdit{
		deletedFiles: map[deletedFileEntry]bool{},
		newFiles: []newFileEntry{
			{
				level: c.level + 1,
				meta: fileMetadata{
					fileNum:  fileNum,
					size:     1,
					smallest: smallest,
					largest:  largest,
				},
			},
		},
	}
	for i := 0; i < 2; i++ {
		for _, f := range c.inputs[i] {
			ve.deletedFiles[deletedFileEntry{
				level:   c.level + i,
				fileNum: f.fileNum,
			}] = true
		}
	}
	return ve, pendingOutputs, nil
}