func checkRangeReplication(t *testing.T, c cluster.Cluster, d time.Duration) { // Always talk to node 0. client, dbStopper := makeClient(t, c.ConnString(0)) defer dbStopper.Stop() wantedReplicas := 3 if c.NumNodes() < 3 { wantedReplicas = c.NumNodes() } log.Infof("waiting for first range to have %d replicas", wantedReplicas) util.SucceedsWithin(t, d, func() error { select { case <-stopper: t.Fatalf("interrupted") return nil case <-time.After(1 * time.Second): } foundReplicas, err := countRangeReplicas(client) if err != nil { return err } if log.V(1) { log.Infof("found %d replicas", foundReplicas) } if foundReplicas >= wantedReplicas { return nil } return fmt.Errorf("expected %d replicas, only found %d", wantedReplicas, foundReplicas) }) }
func testPutInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { db, dbStopper := makeClient(t, c.ConnString(0)) defer dbStopper.Stop() errs := make(chan error, c.NumNodes()) start := time.Now() deadline := start.Add(cfg.Duration) var count int64 for i := 0; i < c.NumNodes(); i++ { go func() { r, _ := randutil.NewPseudoRand() value := randutil.RandBytes(r, 8192) for time.Now().Before(deadline) { k := atomic.AddInt64(&count, 1) v := value[:r.Intn(len(value))] if pErr := db.Put(fmt.Sprintf("%08d", k), v); pErr != nil { errs <- pErr.GoError() return } } errs <- nil }() } for i := 0; i < c.NumNodes(); { baseCount := atomic.LoadInt64(&count) select { case <-stopper: t.Fatalf("interrupted") case err := <-errs: if err != nil { t.Fatal(err) } i++ case <-time.After(1 * time.Second): // Periodically print out progress so that we know the test is still // running. loadedCount := atomic.LoadInt64(&count) log.Infof("%d (%d/s)", loadedCount, loadedCount-baseCount) c.Assert(t) } } elapsed := time.Since(start) log.Infof("%d %.1f/sec", count, float64(count)/elapsed.Seconds()) }
func testSingleKeyInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { num := c.NumNodes() // Initialize the value for our test key to zero. const key = "test-key" initDB, initDBStopper := makeClient(t, c.ConnString(0)) defer initDBStopper.Stop() if err := initDB.Put(key, 0); err != nil { t.Fatal(err) } type result struct { err error maxLatency time.Duration } resultCh := make(chan result, num) deadline := time.Now().Add(cfg.Duration) var expected int64 // Start up num workers each reading and writing the same // key. Each worker is configured to talk to a different node in the // cluster. for i := 0; i < num; i++ { db, dbStopper := makeClient(t, c.ConnString(i)) defer dbStopper.Stop() go func() { var r result for time.Now().Before(deadline) { start := time.Now() pErr := db.Txn(func(txn *client.Txn) *roachpb.Error { minExp := atomic.LoadInt64(&expected) r, pErr := txn.Get(key) if pErr != nil { return pErr } b := txn.NewBatch() v := r.ValueInt() b.Put(key, v+1) pErr = txn.CommitInBatch(b) // Atomic updates after the fact mean that we should read // exp or larger (since concurrent writers might have // committed but not yet performed their atomic update). if pErr == nil && v < minExp { return roachpb.NewErrorf("unexpected read: %d, expected >= %d", v, minExp) } return pErr }) if pErr != nil { resultCh <- result{err: pErr.GoError()} return } atomic.AddInt64(&expected, 1) latency := time.Since(start) if r.maxLatency < latency { r.maxLatency = latency } } resultCh <- r }() } // Verify that none of the workers encountered an error. var results []result for len(results) < num { select { case <-stopper: t.Fatalf("interrupted") case r := <-resultCh: if r.err != nil { t.Fatal(r.err) } results = append(results, r) case <-time.After(1 * time.Second): // Periodically print out progress so that we know the test is still // running. log.Infof("%d", atomic.LoadInt64(&expected)) } } // Verify the resulting value stored at the key is what we expect. r, err := initDB.Get(key) if err != nil { t.Fatal(err) } v := r.ValueInt() if expected != v { t.Fatalf("expected %d, but found %d", expected, v) } var maxLatency []time.Duration for _, r := range results { maxLatency = append(maxLatency, r.maxLatency) } log.Infof("%d increments: %s", v, maxLatency) }
func testGossipRestartInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { // This already replicates the first range (in the local setup). // The replication of the first range is important: as long as the // first range only exists on one node, that node can trivially // acquire the leader lease. Once the range is replicated, however, // nodes must be able to discover each other over gossip before the // lease can be acquired. num := c.NumNodes() deadline := time.Now().Add(cfg.Duration) waitTime := longWaitTime if cfg.Duration < waitTime { waitTime = shortWaitTime } for time.Now().Before(deadline) { log.Infof("waiting for initial gossip connections") checkGossip(t, c, waitTime, hasPeers(num)) checkGossip(t, c, waitTime, hasClusterID) checkGossip(t, c, waitTime, hasSentinel) log.Infof("killing all nodes") for i := 0; i < num; i++ { if err := c.Kill(i); err != nil { t.Fatal(err) } } log.Infof("restarting all nodes") for i := 0; i < num; i++ { if err := c.Restart(i); err != nil { t.Fatal(err) } } log.Infof("waiting for gossip to be connected") checkGossip(t, c, waitTime, hasPeers(num)) checkGossip(t, c, waitTime, hasClusterID) checkGossip(t, c, waitTime, hasSentinel) for i := 0; i < num; i++ { db, dbStopper := makeClient(t, c.ConnString(i)) if i == 0 { if err := db.Del("count"); err != nil { t.Fatal(err) } } var kv client.KeyValue if pErr := db.Txn(func(txn *client.Txn) *roachpb.Error { var pErr *roachpb.Error kv, pErr = txn.Inc("count", 1) return pErr }); pErr != nil { t.Fatal(pErr) } else if v := kv.ValueInt(); v != int64(i+1) { t.Fatalf("unexpected value %d for write #%d (expected %d)", v, i, i+1) } dbStopper.Stop() } } }