Example #1
0
func TestBuildInfo(t *testing.T) {
	l := localcluster.Create(1, stopper)
	l.Start()
	defer l.AssertAndStop(t)

	util.SucceedsWithin(t, 10*time.Second, func() error {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
			return nil
		case <-time.After(200 * time.Millisecond):
		}
		var r struct {
			BuildInfo map[string]string
		}
		if err := l.Nodes[0].GetJSON("", "/_status/details/local", &r); err != nil {
			return err
		}
		for _, key := range []string{"goVersion", "tag", "time", "dependencies"} {
			if val, ok := r.BuildInfo[key]; !ok {
				t.Errorf("build info missing for \"%s\"", key)
			} else if val == "" {
				t.Errorf("build info not set for \"%s\"", key)
			}
		}
		return nil
	})
}
Example #2
0
func TestRangeReplication(t *testing.T) {
	l := localcluster.Create(*numNodes, stopper)
	l.Start()
	defer l.Stop()

	checkRangeReplication(t, l, 20*time.Second)
}
Example #3
0
// TestGossipRestart verifies that the gossip network can be
// re-bootstrapped after a time when all nodes were down
// simultaneously.
func TestGossipRestart(t *testing.T) {
	l := localcluster.Create(*numNodes, stopper)
	l.Start()
	defer l.Stop()

	log.Infof("waiting for initial gossip connections")
	checkGossip(t, l, 20*time.Second, hasPeers(len(l.Nodes)))
	checkGossip(t, l, time.Second, hasClusterID)
	checkGossip(t, l, time.Second, hasSentinel)

	// The replication of the first range is important: as long as the
	// first range only exists on one node, that node can trivially
	// acquire the leader lease. Once the range is replicated, however,
	// nodes must be able to discover each other over gossip before the
	// lease can be acquired.
	log.Infof("waiting for range replication")
	checkRangeReplication(t, l, 10*time.Second)

	log.Infof("stopping all nodes")
	for _, node := range l.Nodes {
		node.Stop(5)
	}

	log.Infof("restarting all nodes")
	for _, node := range l.Nodes {
		node.Restart(5)
	}

	log.Infof("waiting for gossip to be connected")
	checkGossip(t, l, 20*time.Second, hasPeers(len(l.Nodes)))
	checkGossip(t, l, time.Second, hasClusterID)
	checkGossip(t, l, time.Second, hasSentinel)
}
Example #4
0
// TestPut starts up an N node cluster and runs N workers that write
// to independent keys.
func TestPut(t *testing.T) {
	l := localcluster.Create(*numNodes, stopper)
	l.Start()
	defer l.Stop()

	db, dbStopper := makeDBClient(t, l, 0)
	defer dbStopper.Stop()
	if err := configutil.SetDefaultRangeMaxBytes(db, *rangeMaxBytes); err != nil {
		t.Fatal(err)
	}
	checkRangeReplication(t, l, 20*time.Second)

	errs := make(chan error, *numNodes)
	start := time.Now()
	deadline := start.Add(*duration)
	var count int64
	for i := 0; i < *numNodes; i++ {
		go func() {
			r, _ := randutil.NewPseudoRand()
			value := randutil.RandBytes(r, 8192)

			for time.Now().Before(deadline) {
				k := atomic.AddInt64(&count, 1)
				v := value[:r.Intn(len(value))]
				if err := db.Put(fmt.Sprintf("%08d", k), v); err != nil {
					errs <- err
					return
				}
			}
			errs <- nil
		}()
	}

	for i := 0; i < *numNodes; {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
		case err := <-errs:
			if err != nil {
				t.Fatal(err)
			}
			i++
		case <-time.After(1 * time.Second):
			// Periodically print out progress so that we know the test is still
			// running.
			log.Infof("%d", atomic.LoadInt64(&count))
		}
	}

	elapsed := time.Since(start)
	log.Infof("%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Example #5
0
// TestStatusServer starts up an N node cluster and tests the status server on
// each node.
func TestStatusServer(t *testing.T) {
	t.Skipf("TODO(Bram): Test is flaky - fix it.")
	l := localcluster.Create(*numNodes, stopper)
	l.ForceLogging = true
	l.Start()
	defer l.Stop()
	checkRangeReplication(t, l, 20*time.Second)

	client := &http.Client{
		Timeout: 200 * time.Millisecond,
		Transport: &http.Transport{
			TLSClientConfig: &tls.Config{
				InsecureSkipVerify: true,
			},
		},
	}

	// Get the ids for each node.
	idMap := make(map[string]string)
	for _, node := range l.Nodes {
		body := get(t, client, node, "/_status/details/local")
		var detail details
		if err := json.Unmarshal(body, &detail); err != nil {
			t.Fatalf("unable to parse details - %s", err)
		}
		idMap[node.ID] = detail.NodeID.String()
	}

	// Check local response for the every node.
	for _, node := range l.Nodes {
		checkNode(t, client, node, idMap[node.ID], "local", idMap[node.ID])
		get(t, client, node, "/_status/nodes")
		get(t, client, node, "/_status/stores")
	}

	// Proxy from the first node to the last node.
	firstNode := l.Nodes[0]
	lastNode := l.Nodes[len(l.Nodes)-1]
	firstID := idMap[firstNode.ID]
	lastID := idMap[lastNode.ID]
	checkNode(t, client, firstNode, firstID, lastID, lastID)

	// And from the last node to the first node.
	checkNode(t, client, lastNode, lastID, firstID, firstID)

	// And from the last node to the last node.
	checkNode(t, client, lastNode, lastID, lastID, lastID)
}
// TestGossipRestart verifies that the gossip network can be
// re-bootstrapped after a time when all nodes were down
// simultaneously.
func TestGossipRestart(t *testing.T) {
	l := localcluster.Create(*numNodes, stopper)
	l.Start()
	defer l.AssertAndStop(t)

	log.Infof("waiting for initial gossip connections")
	checkGossip(t, l, 20*time.Second, hasPeers(len(l.Nodes)))
	checkGossip(t, l, time.Second, hasClusterID)
	checkGossip(t, l, time.Second, hasSentinel)

	// The replication of the first range is important: as long as the
	// first range only exists on one node, that node can trivially
	// acquire the leader lease. Once the range is replicated, however,
	// nodes must be able to discover each other over gossip before the
	// lease can be acquired.
	log.Infof("waiting for range replication")
	checkRangeReplication(t, l, 10*time.Second)

	log.Infof("killing all nodes")
	for _, node := range l.Nodes {
		node.Kill()
	}

	log.Infof("restarting all nodes")
	for _, node := range l.Nodes {
		node.Restart(5)
	}

	log.Infof("waiting for gossip to be connected")
	checkGossip(t, l, 20*time.Second, hasPeers(len(l.Nodes)))
	checkGossip(t, l, time.Second, hasClusterID)
	checkGossip(t, l, time.Second, hasSentinel)

	for i := range l.Nodes {
		db, dbStopper := makeDBClient(t, l, i)
		if kv, err := db.Inc("count", 1); err != nil {
			t.Fatal(err)
		} else if v := kv.ValueInt(); v != int64(i+1) {
			t.Fatalf("unexpected value %d for write #%d (expected %d)", v, i, i+1)
		}
		dbStopper.Stop()
	}

}
// TestStatusServer starts up an N node cluster and tests the status server on
// each node.
func TestStatusServer(t *testing.T) {
	l := localcluster.Create(*numNodes, stopper)
	l.ForceLogging = true
	l.Start()
	defer l.Stop()
	checkRangeReplication(t, l, 20*time.Second)

	// Get the ids for each node.
	idMap := make(map[string]string)
	for _, node := range l.Nodes {
		body := get(t, node, "/_status/details/local")
		var detail details
		if err := json.Unmarshal(body, &detail); err != nil {
			t.Fatalf("unable to parse details - %s", err)
		}
		idMap[node.ID] = detail.NodeID.String()
	}

	// Check local response for the every node.
	for _, node := range l.Nodes {
		checkNode(t, node, idMap[node.ID], "local", idMap[node.ID])
		get(t, node, "/_status/nodes")
		get(t, node, "/_status/stores")
	}

	// Proxy from the first node to the last node.
	firstNode := l.Nodes[0]
	lastNode := l.Nodes[len(l.Nodes)-1]
	firstID := idMap[firstNode.ID]
	lastID := idMap[lastNode.ID]
	checkNode(t, firstNode, firstID, lastID, lastID)

	// And from the last node to the first node.
	checkNode(t, lastNode, lastID, firstID, firstID)

	// And from the last node to the last node.
	checkNode(t, lastNode, lastID, lastID, lastID)
}
func TestGossipPeerings(t *testing.T) {
	l := localcluster.Create(*numNodes, stopper)
	l.Start()
	defer l.AssertAndStop(t)

	checkGossip(t, l, 20*time.Second, hasPeers(len(l.Nodes)))

	// Restart the first node.
	log.Infof("restarting node 0")
	if err := l.Nodes[0].Restart(5); err != nil {
		t.Fatal(err)
	}
	checkGossip(t, l, 20*time.Second, hasPeers(len(l.Nodes)))

	// Restart another node.
	rand.Seed(randutil.NewPseudoSeed())
	pickedNode := rand.Intn(len(l.Nodes)-1) + 1
	log.Infof("restarting node %d", pickedNode)
	if err := l.Nodes[pickedNode].Restart(5); err != nil {
		t.Fatal(err)
	}
	checkGossip(t, l, 20*time.Second, hasPeers(len(l.Nodes)))
}
Example #9
0
// TestChaos starts up a cluster and, for each node, a worker writing to
// independent keys, while nodes are being killed and restarted continuously.
// The test measures not write performance, but cluster recovery.
func TestChaos(t *testing.T) {
	t.Skip("TODO(tschottdorf): currently unstable")
	l := localcluster.Create(*numNodes, stopper)
	l.Start()
	defer l.AssertAndStop(t)

	checkRangeReplication(t, l, 20*time.Second)

	errs := make(chan error, *numNodes)
	start := time.Now()
	deadline := start.Add(*duration)
	var count int64
	counts := make([]int64, *numNodes)
	clients := make([]struct {
		sync.RWMutex
		db      *client.DB
		stopper *stop.Stopper
	}, *numNodes)

	initClient := func(i int) {
		db, dbStopper := makeDBClient(t, l, i)
		if clients[i].stopper != nil {
			clients[i].stopper.Stop()
		}
		clients[i].db, clients[i].stopper = db, dbStopper
	}

	for i := 0; i < *numNodes; i++ {
		initClient(i)
		go func(i int) {
			r, _ := randutil.NewPseudoRand()
			value := randutil.RandBytes(r, 8192)

			for time.Now().Before(deadline) {
				clients[i].RLock()
				k := atomic.AddInt64(&count, 1)
				atomic.AddInt64(&counts[i], 1)
				v := value[:r.Intn(len(value))]
				if err := clients[i].db.Put(fmt.Sprintf("%08d", k), v); err != nil {
					// These originate from DistSender when, for example, the
					// leader is down. With more realistic retry options, we
					// should probably not see them.
					if _, ok := err.(*roachpb.SendError); ok {
						log.Warning(err)
					} else {
						errs <- err
						clients[i].RUnlock()
						return
					}
				}
				clients[i].RUnlock()
			}
			errs <- nil
		}(i)
	}

	teardown := make(chan struct{})
	defer func() {
		<-teardown
		for i := range clients {
			clients[i].stopper.Stop()
			clients[i].stopper = nil
		}
	}()

	// Chaos monkey.
	go func() {
		defer close(teardown)
		rnd, seed := randutil.NewPseudoRand()
		log.Warningf("monkey starts (seed %d)", seed)
		for round := 1; time.Now().Before(deadline); round++ {
			select {
			case <-stopper:
				return
			default:
			}
			nodes := rnd.Perm(*numNodes)[:rnd.Intn(*numNodes)+1]

			log.Infof("round %d: restarting nodes %v", round, nodes)
			for _, i := range nodes {
				clients[i].Lock()
			}
			for _, i := range nodes {
				log.Infof("restarting %v", i)
				l.Nodes[i].Kill()
				l.Nodes[i].Restart(5)
				initClient(i)
				clients[i].Unlock()
			}
			for cur := atomic.LoadInt64(&count); time.Now().Before(deadline) &&
				atomic.LoadInt64(&count) == cur; time.Sleep(time.Second) {
				l.Assert(t)
				log.Warningf("monkey sleeping while cluster recovers...")
			}
		}
	}()

	for i := 0; i < *numNodes; {
		select {
		case <-teardown:
		case <-stopper:
			t.Fatal("interrupted")
		case err := <-errs:
			if err != nil {
				t.Error(err)
			}
			i++
		case <-time.After(1 * time.Second):
			// Periodically print out progress so that we know the test is still
			// running.
			cur := make([]string, *numNodes)
			for i := range cur {
				cur[i] = fmt.Sprintf("%d", atomic.LoadInt64(&counts[i]))
			}
			log.Infof("%d (%s)", atomic.LoadInt64(&count), strings.Join(cur, ", "))
		}
	}

	elapsed := time.Since(start)
	log.Infof("%d %.1f/sec", count, float64(count)/elapsed.Seconds())
}
Example #10
0
// TestMultiuser starts up an N node cluster and performs various ops
// using different users.
func TestMultiuser(t *testing.T) {
	l := localcluster.Create(*numNodes, stopper)
	l.Start()
	defer l.Stop()

	// Create client certificates for "foo" and "other".
	if err := security.RunCreateClientCert(l.CertsDir, 512, "foo"); err != nil {
		t.Fatal(err)
	}
	if err := security.RunCreateClientCert(l.CertsDir, 512, "other"); err != nil {
		t.Fatal(err)
	}

	checkRangeReplication(t, l, 20*time.Second)

	// Make clients.
	rootClient := makeDBClientForUser(t, l, "root", 0)
	fooClient := makeDBClientForUser(t, l, "foo", 0)
	otherClient := makeDBClientForUser(t, l, "other", 0)

	// Set permissions configs.
	configs := []struct {
		prefix  string
		readers []string
		writers []string
	}{
		// Good to know: "root" is always allowed to read and write.
		{"foo", []string{"foo"}, []string{"foo"}},
		{"foo/public", []string{"foo", "other"}, []string{"foo"}},
		{"tmp", []string{"foo", "other"}, []string{"foo", "other"}},
	}
	for i, cfg := range configs {
		protoConfig := &config.PermConfig{Read: cfg.readers, Write: cfg.writers}
		if err := putPermConfig(rootClient, cfg.prefix, protoConfig); err != nil {
			t.Fatalf("#%d: failed to write config %+v for prefix %q: %v", i, protoConfig, cfg.prefix, err)
		}
	}

	// Write some data. The value is just the key.
	writes := []struct {
		key     string
		db      *client.DB
		success bool
	}{
		{"some-file", rootClient, true}, {"some-file", fooClient, false}, {"some-file", otherClient, false},
		{"foo/a", rootClient, true}, {"foo/a", fooClient, true}, {"foo/a", otherClient, false},
		{"foo/public/b", rootClient, true}, {"foo/public/b", fooClient, true}, {"foo/public/b", otherClient, false},
		{"tmp/c", rootClient, true}, {"tmp/c", fooClient, true}, {"tmp/c", otherClient, true},
	}

	for i, w := range writes {
		err := w.db.Put(w.key, w.key)
		if (err == nil) != w.success {
			t.Errorf("test case #%d: %+v, got err=%v", i, w, err)
		}
	}

	// Read the previously-written files. They all succeeded at least once.
	reads := []struct {
		key     string
		db      *client.DB
		success bool
	}{
		{"some-file", rootClient, true}, {"some-file", fooClient, false}, {"some-file", otherClient, false},
		{"foo/a", rootClient, true}, {"foo/a", fooClient, true}, {"foo/a", otherClient, false},
		{"foo/public/b", rootClient, true}, {"foo/public/b", fooClient, true}, {"foo/public/b", otherClient, true},
		{"tmp/c", rootClient, true}, {"tmp/c", fooClient, true}, {"tmp/c", otherClient, true},
	}

	for i, r := range reads {
		_, err := r.db.Get(r.key)
		if (err == nil) != r.success {
			t.Errorf("test case #%d: %+v, got err=%v", i, r, err)
		}
	}
}
Example #11
0
// TestSingleKey stresses the transaction retry machinery by starting
// up an N node cluster and running N workers that are all
// incrementing the value associated with a single key.
func TestSingleKey(t *testing.T) {
	l := localcluster.Create(*numNodes, stopper)
	l.Start()
	defer l.Stop()

	checkRangeReplication(t, l, 20*time.Second)

	// Initialize the value for our test key to zero.
	const key = "test-key"
	db := makeDBClient(t, l, 0)
	if err := db.Put(key, testVal(0)); err != nil {
		t.Fatal(err)
	}

	type result struct {
		err        error
		count      int
		maxLatency time.Duration
	}

	resultCh := make(chan result, *numNodes)
	deadline := time.Now().Add(*duration)
	var expected int64

	// Start up numNodes workers each reading and writing the same
	// key. Each worker is configured to talk to a different node in the
	// cluster.
	for i := 0; i < *numNodes; i++ {
		db := makeDBClient(t, l, i)
		go func() {
			var r result
			for time.Now().Before(deadline) {
				start := time.Now()
				err := db.Txn(func(txn *client.Txn) error {
					r, err := txn.Get(key)
					if err != nil {
						return err
					}
					var v testVal
					if err := v.UnmarshalBinary(r.ValueBytes()); err != nil {
						return err
					}
					b := &client.Batch{}
					b.Put(key, v+1)
					return txn.CommitInBatch(b)
				})
				if err != nil {
					resultCh <- result{err: err}
					return
				}
				atomic.AddInt64(&expected, 1)
				r.count++
				latency := time.Since(start)
				if r.maxLatency < latency {
					r.maxLatency = latency
				}
			}
			resultCh <- r
		}()
	}

	// Verify that none of the workers encountered an error.
	var results []result
	for len(results) < *numNodes {
		select {
		case <-stopper:
			t.Fatalf("interrupted")
		case r := <-resultCh:
			if r.err != nil {
				t.Fatal(r.err)
			}
			results = append(results, r)
		case <-time.After(1 * time.Second):
			// Periodically print out progress so that we know the test is still
			// running.
			log.Infof("%d", atomic.LoadInt64(&expected))
		}
	}

	// Verify the resulting value stored at the key is what we expect.
	r, err := db.Get(key)
	if err != nil {
		t.Fatal(err)
	}
	var v testVal
	if err := v.UnmarshalBinary(r.ValueBytes()); err != nil {
		t.Fatal(err)
	}
	if expected != int64(v) {
		t.Fatalf("expected %d, but found %d", expected, v)
	}
	var maxLatency []time.Duration
	for _, r := range results {
		maxLatency = append(maxLatency, r.maxLatency)
	}
	log.Infof("%d increments: %s", v, maxLatency)
}