Esempio n. 1
// flush flushes the database cache to persistent storage.  This involes syncing
// the block store and replaying all transactions that have been applied to the
// cache to the underlying database.
// This function MUST be called with the database write lock held.
func (c *dbCache) flush() error {
	c.lastFlush = time.Now()

	// Sync the current write file associated with the block store.  This is
	// necessary before writing the metadata to prevent the case where the
	// metadata contains information about a block which actually hasn't
	// been written yet in unexpected shutdown scenarios.
	if err :=; err != nil {
		return err

	// Nothing to do if there are no transactions to flush.
	if len(c.txLog) == 0 {
		return nil

	// Perform all leveldb updates using batches for atomicity.
	batchLen := 0
	batchTxns := 0
	batch := new(leveldb.Batch)
	for logTxNum, txLogEntries := range c.txLog {
		// Replay the transaction from the log into the current batch.
		for _, logEntry := range txLogEntries {
			switch logEntry.entryType {
			case entryTypeUpdate:
				batch.Put(logEntry.key, logEntry.value)
			case entryTypeRemove:

		// Write and reset the current batch when the number of items in
		// it exceeds the the batch threshold or this is the last
		// transaction in the log.
		batchLen += len(txLogEntries)
		if batchLen > batchThreshold || logTxNum == len(c.txLog)-1 {
			if err := c.ldb.Write(batch, nil); err != nil {
				return convertErr("failed to write batch", err)
			batchLen = 0

			// Clear the transactions that were written from the
			// log so the memory can be reclaimed.
			for i := logTxNum - (batchTxns - 1); i <= logTxNum; i++ {
				c.txLog[i] = nil
			batchTxns = 0
	c.txLog = c.txLog[:]

	// Clear the cache since it has been flushed.
	c.cachedKeys = treap.NewImmutable()
	c.cachedRemove = treap.NewImmutable()

	return nil
Esempio n. 2
// commitTx atomically adds all of the pending keys to add and remove into the
// database cache.  When adding the pending keys would cause the size of the
// cache to exceed the max cache size, or the time since the last flush exceeds
// the configured flush interval, the cache will be flushed to the underlying
// persistent database.
// This is an atomic operation with respect to the cache in that either all of
// the pending keys to add and remove in the transaction will be applied or none
// of them will.
// The database cache itself might be flushed to the underlying persistent
// database even if the transaction fails to apply, but it will only be the
// state of the cache without the transaction applied.
// This function MUST be called during a database write transaction which in
// turn implies the database write lock will be held.
func (c *dbCache) commitTx(tx *transaction) error {
	// Flush the cache and write directly to the database if a flush is
	// needed.
	if c.needsFlush(tx) {
		if err := c.flush(); err != nil {
			return err

		// Perform all leveldb update operations using a batch for
		// atomicity.
		batch := new(leveldb.Batch)
		tx.pendingKeys.ForEach(func(k, v []byte) bool {
			batch.Put(k, v)
			return true
		tx.pendingKeys = nil
		tx.pendingRemove.ForEach(func(k, v []byte) bool {
			return true
		tx.pendingRemove = nil
		if err := c.ldb.Write(batch, nil); err != nil {
			return convertErr("failed to commit transaction", err)

		return nil

	// At this point a database flush is not needed, so atomically commit
	// the transaction to the cache.

	// Create a slice of transaction log entries large enough to house all
	// of the updates and add it to the list of logged transactions to
	// replay on flush.
	numEntries := tx.pendingKeys.Len() + tx.pendingRemove.Len()
	txLogEntries := make([]txLogEntry, numEntries)
	c.txLog = append(c.txLog, txLogEntries)

	// Since the cached keys to be added and removed use an immutable treap,
	// a snapshot is simply obtaining the root of the tree under the lock
	// which is used to atomically swap the root.
	newCachedKeys := c.cachedKeys
	newCachedRemove := c.cachedRemove

	// Apply every key to add in the database transaction to the cache.
	// Also create a transaction log entry for each one at the same time so
	// the database transaction can be replayed during flush.
	logEntryNum := 0
	tx.pendingKeys.ForEach(func(k, v []byte) bool {
		newCachedRemove = newCachedRemove.Delete(k)
		newCachedKeys = newCachedKeys.Put(k, v)

		logEntry := &txLogEntries[logEntryNum]
		logEntry.entryType = entryTypeUpdate
		logEntry.key = k
		logEntry.value = v
		return true
	tx.pendingKeys = nil

	// Apply every key to remove in the database transaction to the cache.
	// Also create a transaction log entry for each one at the same time so
	// the database transaction can be replayed during flush.
	tx.pendingRemove.ForEach(func(k, v []byte) bool {
		newCachedKeys = newCachedKeys.Delete(k)
		newCachedRemove = newCachedRemove.Put(k, nil)

		logEntry := &txLogEntries[logEntryNum]
		logEntry.entryType = entryTypeRemove
		logEntry.key = k
		return true
	tx.pendingRemove = nil

	// Atomically replace the immutable treaps which hold the cached keys to
	// add and delete.
	c.cachedKeys = newCachedKeys
	c.cachedRemove = newCachedRemove
	return nil
Esempio n. 3
func main() {

	if enableBufferPool {
		bpool = util.NewBufferPool(opt.DefaultBlockSize + 128)

	log.Printf("Test DB stored at %q", dbPath)
	if httpProf != "" {
		log.Printf("HTTP pprof listening at %q", httpProf)
		go func() {
			if err := http.ListenAndServe(httpProf, nil); err != nil {
				log.Fatalf("HTTPPROF: %v", err)


	stor, err := storage.OpenFile(dbPath, false)
	if err != nil {
	tstor := &testingStorage{stor}
	defer tstor.Close()

	fatalf := func(err error, format string, v ...interface{}) {
		atomic.StoreUint32(&fail, 1)
		atomic.StoreUint32(&done, 1)
		log.Printf("FATAL: "+format, v...)
		if err != nil && errors.IsCorrupted(err) {
			cerr := err.(*errors.ErrCorrupted)
			if !cerr.Fd.Nil() && cerr.Fd.Type == storage.TypeTable {
				log.Print("FATAL: corruption detected, scanning...")
				if !tstor.scanTable(storage.FileDesc{Type: storage.TypeTable, Num: cerr.Fd.Num}, false) {
					log.Printf("FATAL: unable to find corrupted key/value pair in table %v", cerr.Fd)

	if openFilesCacheCapacity == 0 {
		openFilesCacheCapacity = -1
	o := &opt.Options{
		OpenFilesCacheCapacity: openFilesCacheCapacity,
		DisableBufferPool:      !enableBufferPool,
		DisableBlockCache:      !enableBlockCache,
		ErrorIfExist:           true,
		Compression:            opt.NoCompression,
	if enableCompression {
		o.Compression = opt.DefaultCompression

	db, err := leveldb.Open(tstor, o)
	if err != nil {
	defer db.Close()

	var (
		mu              = &sync.Mutex{}
		gGetStat        = &latencyStats{}
		gIterStat       = &latencyStats{}
		gWriteStat      = &latencyStats{}
		gTrasactionStat = &latencyStats{}
		startTime       = time.Now()

		writeReq    = make(chan *leveldb.Batch)
		writeAck    = make(chan error)
		writeAckAck = make(chan struct{})

	go func() {
		for b := range writeReq {

			var err error
			if mrand.Float64() < transactionProb {
				log.Print("> Write using transaction")
				var tr *leveldb.Transaction
				if tr, err = db.OpenTransaction(); err == nil {
					if err = tr.Write(b, nil); err == nil {
						if err = tr.Commit(); err == nil {
					} else {
			} else {
				if err = db.Write(b, nil); err == nil {
			writeAck <- err

	go func() {
		for {
			time.Sleep(3 * time.Second)


			log.Printf("> Elapsed=%v", time.Now().Sub(startTime))
			log.Printf("> GetLatencyMin=%v GetLatencyMax=%v GetLatencyAvg=%v GetRatePerSec=%d",
				gGetStat.min, gGetStat.max, gGetStat.avg(), gGetStat.ratePerSec())
			log.Printf("> IterLatencyMin=%v IterLatencyMax=%v IterLatencyAvg=%v IterRatePerSec=%d",
				gIterStat.min, gIterStat.max, gIterStat.avg(), gIterStat.ratePerSec())
			log.Printf("> WriteLatencyMin=%v WriteLatencyMax=%v WriteLatencyAvg=%v WriteRatePerSec=%d",
				gWriteStat.min, gWriteStat.max, gWriteStat.avg(), gWriteStat.ratePerSec())
			log.Printf("> TransactionLatencyMin=%v TransactionLatencyMax=%v TransactionLatencyAvg=%v TransactionRatePerSec=%d",
				gTrasactionStat.min, gTrasactionStat.max, gTrasactionStat.avg(), gTrasactionStat.ratePerSec())

			cachedblock, _ := db.GetProperty("leveldb.cachedblock")
			openedtables, _ := db.GetProperty("leveldb.openedtables")
			alivesnaps, _ := db.GetProperty("leveldb.alivesnaps")
			aliveiters, _ := db.GetProperty("leveldb.aliveiters")
			blockpool, _ := db.GetProperty("leveldb.blockpool")
			log.Printf("> BlockCache=%s OpenedTables=%s AliveSnaps=%s AliveIter=%s BlockPool=%q",
				cachedblock, openedtables, alivesnaps, aliveiters, blockpool)


	for ns, numKey := range numKeys {
		func(ns, numKey int) {
			log.Printf("[%02d] STARTING: numKey=%d", ns, numKey)

			keys := make([][]byte, numKey)
			for i := range keys {
				keys[i] = randomData(nil, byte(ns), 1, uint32(i), keyLen)

			go func() {
				var wi uint32
				defer func() {
					log.Printf("[%02d] WRITER DONE #%d", ns, wi)

				var (
					b       = new(leveldb.Batch)
					k2, v2  []byte
					nReader int32
				for atomic.LoadUint32(&done) == 0 {
					log.Printf("[%02d] WRITER #%d", ns, wi)

					for _, k1 := range keys {
						k2 = randomData(k2, byte(ns), 2, wi, keyLen)
						v2 = randomData(v2, byte(ns), 3, wi, valueLen)
						b.Put(k2, v2)
						b.Put(k1, k2)
					writeReq <- b
					if err := <-writeAck; err != nil {
						writeAckAck <- struct{}{}
						fatalf(err, "[%02d] WRITER #%d db.Write: %v", ns, wi, err)

					snap, err := db.GetSnapshot()
					if err != nil {
						writeAckAck <- struct{}{}
						fatalf(err, "[%02d] WRITER #%d db.GetSnapshot: %v", ns, wi, err)

					writeAckAck <- struct{}{}

					atomic.AddInt32(&nReader, 1)
					go func(snapwi uint32, snap *leveldb.Snapshot) {
						var (
							ri       int
							iterStat = &latencyStats{}
							getStat  = &latencyStats{}
						defer func() {

							atomic.AddInt32(&nReader, -1)
							log.Printf("[%02d] READER #%d.%d DONE Snap=%v Alive=%d IterLatency=%v GetLatency=%v", ns, snapwi, ri, snap, atomic.LoadInt32(&nReader), iterStat.avg(), getStat.avg())

						stopi := snapwi + 3
						for (ri < 3 || atomic.LoadUint32(&wi) < stopi) && atomic.LoadUint32(&done) == 0 {
							var n int
							iter := snap.NewIterator(dataPrefixSlice(byte(ns), 1), nil)
							for iter.Next() {
								k1 := iter.Key()
								k2 := iter.Value()

								if dataNS(k2) != byte(ns) {
									fatalf(nil, "[%02d] READER #%d.%d K%d invalid in-key NS: want=%d got=%d", ns, snapwi, ri, n, ns, dataNS(k2))

								kwritei := dataI(k2)
								if kwritei != snapwi {
									fatalf(nil, "[%02d] READER #%d.%d K%d invalid in-key iter num: %d", ns, snapwi, ri, n, kwritei)

								v2, err := snap.Get(k2, nil)
								if err != nil {
									fatalf(err, "[%02d] READER #%d.%d K%d snap.Get: %v\nk1: %x\n -> k2: %x", ns, snapwi, ri, n, err, k1, k2)

								if checksum0, checksum1 := dataChecksum(v2); checksum0 != checksum1 {
									err := &errors.ErrCorrupted{Fd: storage.FileDesc{0xff, 0}, Err: fmt.Errorf("v2: %x: checksum mismatch: %v vs %v", v2, checksum0, checksum1)}
									fatalf(err, "[%02d] READER #%d.%d K%d snap.Get: %v\nk1: %x\n -> k2: %x", ns, snapwi, ri, n, err, k1, k2)

							if err := iter.Error(); err != nil {
								fatalf(err, "[%02d] READER #%d.%d K%d iter.Error: %v", ns, snapwi, ri, numKey, err)
							if n != numKey {
								fatalf(nil, "[%02d] READER #%d.%d missing keys: want=%d got=%d", ns, snapwi, ri, numKey, n)

					}(wi, snap)

					atomic.AddUint32(&wi, 1)

			delB := new(leveldb.Batch)
			go func() {
				var (
					i        int
					iterStat = &latencyStats{}
				defer func() {
					log.Printf("[%02d] SCANNER DONE #%d", ns, i)

				time.Sleep(2 * time.Second)

				for atomic.LoadUint32(&done) == 0 {
					var n int
					iter := db.NewIterator(dataNsSlice(byte(ns)), nil)
					for iter.Next() && atomic.LoadUint32(&done) == 0 {
						k := iter.Key()
						v := iter.Value()

						for ci, x := range [...][]byte{k, v} {
							checksum0, checksum1 := dataChecksum(x)
							if checksum0 != checksum1 {
								if ci == 0 {
									fatalf(nil, "[%02d] SCANNER %d.%d invalid key checksum: want %d, got %d\n%x -> %x", ns, i, n, checksum0, checksum1, k, v)
								} else {
									fatalf(nil, "[%02d] SCANNER %d.%d invalid value checksum: want %d, got %d\n%x -> %x", ns, i, n, checksum0, checksum1, k, v)

						if dataPrefix(k) == 2 || mrand.Int()%999 == 0 {

					if err := iter.Error(); err != nil {
						fatalf(err, "[%02d] SCANNER #%d.%d iter.Error: %v", ns, i, n, err)

					if n > 0 {
						log.Printf("[%02d] SCANNER #%d IterLatency=%v", ns, i, iterStat.avg())

					if delB.Len() > 0 && atomic.LoadUint32(&done) == 0 {
						t := time.Now()
						writeReq <- delB
						if err := <-writeAck; err != nil {
							writeAckAck <- struct{}{}
							fatalf(err, "[%02d] SCANNER #%d db.Write: %v", ns, i, err)
						} else {
							writeAckAck <- struct{}{}
						log.Printf("[%02d] SCANNER #%d Deleted=%d Time=%v", ns, i, delB.Len(), time.Now().Sub(t))

		}(ns, numKey)

	go func() {
		sig := make(chan os.Signal)
		signal.Notify(sig, os.Interrupt, os.Kill)
		log.Printf("Got signal: %v, exiting...", <-sig)
		atomic.StoreUint32(&done, 1)
