// connectGossip connects to gossip network and reads cluster ID. If // this node is already part of a cluster, the cluster ID is verified // for a match. If not part of a cluster, the cluster ID is set. The // node's address is gossipped with node ID as the gossip key. func (n *Node) connectGossip() { log.Infof("connecting to gossip network to verify cluster ID...") <-n.gossip.Connected val, err := n.gossip.GetInfo(gossip.KeyClusterID) if err != nil || val == nil { log.Fatalf("unable to ascertain cluster ID from gossip network: %v", err) } gossipClusterID := val.(string) if n.ClusterID == "" { n.ClusterID = gossipClusterID } else if n.ClusterID != gossipClusterID { log.Fatalf("node %d belongs to cluster %q but is attempting to connect to a gossip network for cluster %q", n.Descriptor.NodeID, n.ClusterID, gossipClusterID) } log.Infof("node connected via gossip and verified as part of cluster %q", gossipClusterID) // Gossip node address keyed by node ID. if n.Descriptor.NodeID != 0 { nodeIDKey := gossip.MakeNodeIDGossipKey(n.Descriptor.NodeID) if err := n.gossip.AddInfo(nodeIDKey, n.Descriptor.Address, ttlNodeIDGossip); err != nil { log.Errorf("couldn't gossip address for node %d: %v", n.Descriptor.NodeID, err) } } }
// process synchronously invokes admin split for each proposed split key. func (sq *splitQueue) process(now roachpb.Timestamp, rng *Replica, sysCfg *config.SystemConfig) error { // First handle case of splitting due to zone config maps. desc := rng.Desc() splitKeys := sysCfg.ComputeSplitKeys(desc.StartKey, desc.EndKey) if len(splitKeys) > 0 { log.Infof("splitting %s at keys %v", rng, splitKeys) for _, splitKey := range splitKeys { if err := sq.db.AdminSplit(splitKey.AsRawKey()); err != nil { return util.Errorf("unable to split %s at key %q: %s", rng, splitKey, err) } } return nil } // Next handle case of splitting due to size. zone, err := sysCfg.GetZoneConfigForKey(desc.StartKey) if err != nil { return err } // FIXME: why is this implementation not the same as the one above? if float64(rng.stats.GetSize())/float64(zone.RangeMaxBytes) > 1 { log.Infof("splitting %s size=%d max=%d", rng, rng.stats.GetSize(), zone.RangeMaxBytes) if _, pErr := client.SendWrapped(rng, rng.context(), &roachpb.AdminSplitRequest{ Span: roachpb.Span{Key: desc.StartKey.AsRawKey()}, }); pErr != nil { return pErr.GoError() } } return nil }
// wrap the supplied planNode with the sortNode if sorting is required. func (n *sortNode) wrap(plan planNode) planNode { if n != nil { // Check to see if the requested ordering is compatible with the existing // ordering. existingOrdering := plan.Ordering() if log.V(2) { log.Infof("Sort: existing=%d desired=%d", existingOrdering, n.ordering) } match := computeOrderingMatch(n.ordering, existingOrdering, false) if match < len(n.ordering) { n.plan = plan n.needSort = true return n } if len(n.columns) < len(plan.Columns()) { // No sorting required, but we have to strip off the extra render // expressions we added. n.plan = plan return n } } if log.V(2) { log.Infof("Sort: no sorting required") } return plan }
// start dials the remote addr and commences gossip once connected. Upon exit, // the client is sent on the disconnected channel. This method starts client // processing in a goroutine and returns immediately. func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { defer func() { disconnected <- c }() // Note: avoid using `grpc.WithBlock` here. This code is already // asynchronous from the caller's perspective, so the only effect of // `WithBlock` here is blocking shutdown - at the time of this writing, // that ends ups up making `kv` tests take twice as long. conn, err := ctx.GRPCDial(c.addr.String()) if err != nil { log.Errorf("failed to dial: %v", err) return } // Start gossiping. if err := c.gossip(g, NewGossipClient(conn), stopper); err != nil { if !grpcutil.IsClosedConnection(err) { g.mu.Lock() peerID := c.peerID g.mu.Unlock() if peerID != 0 { log.Infof("closing client to node %d (%s): %s", peerID, c.addr, err) } else { log.Infof("closing client to %s: %s", c.addr, err) } } } }) }
// bootstrapStores bootstraps uninitialized stores once the cluster // and node IDs have been established for this node. Store IDs are // allocated via a sequence id generator stored at a system key per // node. func (n *Node) bootstrapStores(bootstraps *list.List, stopper *stop.Stopper) { log.Infof("bootstrapping %d store(s)", bootstraps.Len()) if n.ClusterID == "" { panic("ClusterID missing during store bootstrap of auxiliary store") } // Bootstrap all waiting stores by allocating a new store id for // each and invoking store.Bootstrap() to persist. inc := int64(bootstraps.Len()) firstID, err := allocateStoreIDs(n.Descriptor.NodeID, inc, n.ctx.DB) if err != nil { log.Fatal(err) } sIdent := roachpb.StoreIdent{ ClusterID: n.ClusterID, NodeID: n.Descriptor.NodeID, StoreID: firstID, } for e := bootstraps.Front(); e != nil; e = e.Next() { s := e.Value.(*storage.Store) if err := s.Bootstrap(sIdent, stopper); err != nil { log.Fatal(err) } if err := s.Start(stopper); err != nil { log.Fatal(err) } n.stores.AddStore(s) sIdent.StoreID++ log.Infof("bootstrapped store %s", s) // Done regularly in Node.startGossip, but this cuts down the time // until this store is used for range allocations. s.GossipStore() } }
// runExterminate destroys the data held in the specified stores. func runExterminate(cmd *cobra.Command, args []string) { err := Context.Init("exterminate") if err != nil { log.Errorf("failed to initialize context: %s", err) return } // First attempt to shutdown the server. Note that an error of EOF just // means the HTTP server shutdown before the request to quit returned. if err := server.SendQuit(Context); err != nil { log.Infof("shutdown node %s: %s", Context.Addr, err) } else { log.Infof("shutdown node in anticipation of data extermination") } // Exterminate all data held in specified stores. for _, e := range Context.Engines { if rocksdb, ok := e.(*engine.RocksDB); ok { log.Infof("exterminating data from store %s", e) if err := rocksdb.Destroy(); err != nil { log.Fatalf("unable to destroy store %s: %s", e, err) } } } log.Infof("exterminated all data from stores %s", Context.Engines) }
// waitAndProcess waits for the pace interval and processes the replica // if repl is not nil. The method returns true when the scanner needs // to be stopped. The method also removes a replica from queues when it // is signaled via the removed channel. func (rs *replicaScanner) waitAndProcess(start time.Time, clock *hlc.Clock, stopper *stop.Stopper, repl *Replica) bool { waitInterval := rs.paceInterval(start, timeutil.Now()) rs.waitTimer.Reset(waitInterval) if log.V(6) { log.Infof("Wait time interval set to %s", waitInterval) } for { select { case <-rs.waitTimer.C: rs.waitTimer.Read = true if repl == nil { return false } return !stopper.RunTask(func() { // Try adding replica to all queues. for _, q := range rs.queues { q.MaybeAdd(repl, clock.Now()) } }) case repl := <-rs.removed: // Remove replica from all queues as applicable. for _, q := range rs.queues { q.MaybeRemove(repl) } if log.V(6) { log.Infof("removed replica %s", repl) } case <-stopper.ShouldStop(): return true } } }
// start dials the remote addr and commences gossip once connected. Upon exit, // the client is sent on the disconnected channel. This method starts client // processing in a goroutine and returns immediately. func (c *client) start(g *Gossip, disconnected chan *client, ctx *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { defer func() { disconnected <- c }() conn, err := ctx.GRPCDial(c.addr.String(), grpc.WithBlock()) if err != nil { log.Errorf("failed to dial: %v", err) return } // Start gossiping. if err := c.gossip(g, NewGossipClient(conn), stopper); err != nil { if !grpcutil.IsClosedConnection(err) { g.mu.Lock() peerID := c.peerID g.mu.Unlock() if peerID != 0 { log.Infof("closing client to node %d (%s): %s", peerID, c.addr, err) } else { log.Infof("closing client to %s: %s", c.addr, err) } } } }) }
// runExterminate destroys the data held in the specified stores. func runExterminate(cmd *cobra.Command, args []string) { if err := context.InitStores(); err != nil { log.Errorf("failed to initialize context: %s", err) return } // First attempt to shutdown the server. Note that an error of EOF just // means the HTTP server shutdown before the request to quit returned. admin := client.NewAdminClient(&context.Context, context.Addr, client.Quit) body, err := admin.Get() if err != nil { log.Infof("shutdown node %s: %s", context.Addr, err) } else { log.Infof("shutdown node in anticipation of data extermination: %s", body) } // Exterminate all data held in specified stores. for _, e := range context.Engines { if rocksdb, ok := e.(*engine.RocksDB); ok { log.Infof("exterminating data from store %s", e) if err := rocksdb.Destroy(); err != nil { log.Errorf("unable to destroy store %s: %s", e, err) osExit(1) } } } log.Infof("exterminated all data from stores %s", context.Engines) }
// createHealthCheck creates the cockroach health check if it does not exist. // Returns its resource link. func (g *Google) createHealthCheck() (string, error) { if check, err := g.getHealthCheck(); err == nil { log.Infof("found HealthCheck %s: %s", healthCheckName, check.SelfLink) return check.SelfLink, nil } op, err := g.computeService.HttpHealthChecks.Insert(g.project, &compute.HttpHealthCheck{ Name: healthCheckName, Port: g.context.Port, RequestPath: healthCheckPath, CheckIntervalSec: 2, TimeoutSec: 1, HealthyThreshold: 2, UnhealthyThreshold: 2, }).Do() if err != nil { return "", err } if err = g.waitForOperation(op); err != nil { return "", err } log.Infof("created HealthCheck %s: %s", healthCheckName, op.TargetLink) return op.TargetLink, nil }
// createFirewallRule creates the cockroach firewall if it does not exist. // It returns its resource link. func (g *Google) createFirewallRule() (string, error) { if rule, err := g.getFirewallRule(); err == nil { log.Infof("found FirewallRule %s: %s", firewallRuleName, rule.SelfLink) return rule.SelfLink, nil } op, err := g.computeService.Firewalls.Insert(g.project, &compute.Firewall{ Name: firewallRuleName, Allowed: []*compute.FirewallAllowed{ { IPProtocol: cockroachProtocol, Ports: []string{ fmt.Sprintf("%d", g.context.Port), }, }, }, SourceRanges: []string{ allIPAddresses, }, }).Do() if err != nil { return "", err } if err = g.waitForOperation(op); err != nil { return "", err } log.Infof("created FirewallRule %s: %s", firewallRuleName, op.TargetLink) return op.TargetLink, nil }
// get performs an HTTPS GET to the specified path for a specific node. func get(t *testing.T, base, rel string) []byte { // TODO(bram) #2059: Remove retry logic. url := fmt.Sprintf("%s/%s", base, rel) for r := retry.Start(retryOptions); r.Next(); { resp, err := cluster.HTTPClient.Get(url) if err != nil { log.Infof("could not GET %s - %s", url, err) continue } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { log.Infof("could not read body for %s - %s", url, err) continue } if resp.StatusCode != http.StatusOK { log.Infof("could not GET %s - statuscode: %d - body: %s", url, resp.StatusCode, body) continue } if log.V(1) { log.Infof("OK response from %s", url) } return body } t.Fatalf("There was an error retrieving %s", url) return []byte("") }
// initStores initializes the Stores map from ID to Store. Stores are // added to the local sender if already bootstrapped. A bootstrapped // Store has a valid ident with cluster, node and Store IDs set. If // the Store doesn't yet have a valid ident, it's added to the // bootstraps list for initialization once the cluster and node IDs // have been determined. func (n *Node) initStores(engines []engine.Engine, stopper *stop.Stopper) error { bootstraps := list.New() if len(engines) == 0 { return util.Errorf("no engines") } for _, e := range engines { s := storage.NewStore(n.ctx, e, &n.Descriptor) // Initialize each store in turn, handling un-bootstrapped errors by // adding the store to the bootstraps list. if err := s.Start(stopper); err != nil { if _, ok := err.(*storage.NotBootstrappedError); ok { log.Infof("store %s not bootstrapped", s) bootstraps.PushBack(s) continue } return util.Errorf("failed to start store: %s", err) } if s.Ident.ClusterID == "" || s.Ident.NodeID == 0 { return util.Errorf("unidentified store: %s", s) } capacity, err := s.Capacity() if err != nil { return util.Errorf("could not query store capacity: %s", err) } log.Infof("initialized store %s: %+v", s, capacity) n.stores.AddStore(s) } // Verify all initialized stores agree on cluster and node IDs. if err := n.validateStores(); err != nil { return err } // Set the stores map as the gossip persistent storage, so that // gossip can bootstrap using the most recently persisted set of // node addresses. if err := n.ctx.Gossip.SetStorage(n.stores); err != nil { return fmt.Errorf("failed to initialize the gossip interface: %s", err) } // Connect gossip before starting bootstrap. For new nodes, connecting // to the gossip network is necessary to get the cluster ID. n.connectGossip() // If no NodeID has been assigned yet, allocate a new node ID by // supplying 0 to initNodeID. if n.Descriptor.NodeID == 0 { n.initNodeID(0) } // Bootstrap any uninitialized stores asynchronously. if bootstraps.Len() > 0 { stopper.RunAsyncTask(func() { n.bootstrapStores(bootstraps, stopper) }) } return nil }
func checkRangeReplication(t *testing.T, cluster *localcluster.Cluster, d time.Duration) { // Always talk to node 0. client, dbStopper := makeDBClient(t, cluster, 0) defer dbStopper.Stop() wantedReplicas := 3 if len(cluster.Nodes) < 3 { wantedReplicas = len(cluster.Nodes) } log.Infof("waiting for first range to have %d replicas", wantedReplicas) util.SucceedsWithin(t, d, func() error { select { case <-stopper: t.Fatalf("interrupted") return nil case <-time.After(1 * time.Second): } foundReplicas, err := countRangeReplicas(client) if err != nil { return err } log.Infof("found %d replicas", foundReplicas) if foundReplicas >= wantedReplicas { return nil } return fmt.Errorf("expected %d replicas, only found %d", wantedReplicas, foundReplicas) }) }
// GetClientTLSConfig returns the context client TLS config, initializing it if needed. // If Insecure is true, return a nil config, otherwise load a config based // on the Certs directory. If Certs is empty, use a very permissive config. // TODO(marc): empty Certs dir should fail when client certificates are required. func (ctx *Context) GetClientTLSConfig() (*tls.Config, error) { // Early out. if ctx.Insecure { return nil, nil } ctx.tlsConfigMu.Lock() defer ctx.tlsConfigMu.Unlock() if ctx.clientTLSConfig != nil { return ctx.clientTLSConfig, nil } if ctx.Certs != "" { if log.V(1) { log.Infof("setting up TLS from certificates directory: %s", ctx.Certs) } cfg, err := security.LoadClientTLSConfig(ctx.Certs, ctx.User) if err != nil { return nil, util.Errorf("error setting up client TLS config: %s", err) } ctx.clientTLSConfig = cfg } else { if log.V(1) { log.Infof("no certificates directory specified: using insecure TLS") } ctx.clientTLSConfig = security.LoadInsecureClientTLSConfig() } return ctx.clientTLSConfig, nil }
// MaybeAdd adds the specified replica if bq.shouldQueue specifies it // should be queued. Replicas are added to the queue using the priority // returned by bq.shouldQueue. If the queue is too full, the replica may // not be added, as the replica with the lowest priority will be // dropped. func (bq *baseQueue) MaybeAdd(repl *Replica, now roachpb.Timestamp) { // Load the system config. cfg := bq.gossip.GetSystemConfig() if cfg == nil { log.Infof("no system config available. skipping...") return } desc := repl.Desc() if !bq.impl.acceptsUnsplitRanges() && cfg.NeedsSplit(desc.StartKey, desc.EndKey) { // Range needs to be split due to zone configs, but queue does // not accept unsplit ranges. if log.V(3) { log.Infof("range %s needs to be split; not adding", repl) } return } bq.Lock() defer bq.Unlock() should, priority := bq.impl.shouldQueue(now, repl, cfg) if err := bq.addInternal(repl, should, priority); err != nil && log.V(3) { log.Infof("couldn't add %s to queue %s: %s", repl, bq.name, err) } }
func testGossipPeeringsInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { num := c.NumNodes() deadline := timeutil.Now().Add(cfg.Duration) waitTime := longWaitTime if cfg.Duration < waitTime { waitTime = shortWaitTime } for timeutil.Now().Before(deadline) { checkGossip(t, c, waitTime, hasPeers(num)) // Restart the first node. log.Infof(context.Background(), "restarting node 0") if err := c.Restart(0); err != nil { t.Fatal(err) } checkGossip(t, c, waitTime, hasPeers(num)) // Restart another node (if there is one). var pickedNode int if num > 1 { pickedNode = rand.Intn(num-1) + 1 } log.Infof(context.Background(), "restarting node %d", pickedNode) if err := c.Restart(pickedNode); err != nil { t.Fatal(err) } checkGossip(t, c, waitTime, hasPeers(num)) } }
// process synchronously invokes admin split for each proposed split key. func (sq *splitQueue) process(now proto.Timestamp, rng *Range) error { // First handle case of splitting due to accounting and zone config maps. splitKeys := computeSplitKeys(sq.gossip, rng) if len(splitKeys) > 0 { log.Infof("splitting %s at keys %v", rng, splitKeys) for _, splitKey := range splitKeys { if err := sq.db.AdminSplit(splitKey); err != nil { return util.Errorf("unable to split %s at key %q: %s", rng, splitKey, err) } } return nil } // Next handle case of splitting due to size. zone, err := lookupZoneConfig(sq.gossip, rng) if err != nil { return err } // FIXME: why is this implementation not the same as the one above? if float64(rng.stats.GetSize())/float64(zone.RangeMaxBytes) > 1 { log.Infof("splitting %s size=%d max=%d", rng, rng.stats.GetSize(), zone.RangeMaxBytes) if err = rng.AddCmd(rng.context(), proto.Call{ Args: &proto.AdminSplitRequest{ RequestHeader: proto.RequestHeader{Key: rng.Desc().StartKey}, }, Reply: &proto.AdminSplitResponse{}, }, true); err != nil { return err } } return nil }
// RunCommitters lists stargazers by commits to subscribed repos, from // most prolific committer to least. func RunCommitters(c *fetch.Context, sg []*fetch.Stargazer, rs map[string]*fetch.Repo) error { log.Infof("running committers analysis") // Open file and prepare. f, err := createFile(c, "committers.csv") if err != nil { return util.Errorf("failed to create file: %s", err) } defer f.Close() w := csv.NewWriter(f) if err := w.Write([]string{"Login", "Email", "Commits", "Additions", "Deletions"}); err != nil { return util.Errorf("failed to write to CSV: %s", err) } // Sort the stargazers. slice := Contributors(sg) sort.Sort(slice) // Now accumulate by days. for _, s := range slice { c, a, d := s.TotalCommits() if c == 0 { break } if err := w.Write([]string{s.Login, s.Email, strconv.Itoa(c), strconv.Itoa(a), strconv.Itoa(d)}); err != nil { return util.Errorf("failed to write to CSV: %s", err) } } w.Flush() log.Infof("wrote committers analysis to %s", f.Name()) return nil }
// TestRangeSplitsWithSameKeyTwice check that second range split // on the same splitKey should not cause infinite retry loop. func TestRangeSplitsWithSameKeyTwice(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() splitKey := roachpb.Key("aa") log.Infof("starting split at key %q...", splitKey) if err := s.DB.AdminSplit(splitKey); err != nil { t.Fatal(err) } log.Infof("split at key %q first time complete", splitKey) ch := make(chan error) go func() { // should return error other than infinite loop ch <- s.DB.AdminSplit(splitKey) }() select { case err := <-ch: if err == nil { t.Error("range split on same splitKey should fail") } case <-time.After(500 * time.Millisecond): t.Error("range split on same splitKey timed out") } }
// clearOverlappingCachedRangeDescriptors looks up and clears any // cache entries which overlap the specified descriptor. func (rdc *rangeDescriptorCache) clearOverlappingCachedRangeDescriptors(desc *roachpb.RangeDescriptor) { key := desc.EndKey metaKey := meta(key) // Clear out any descriptors which subsume the key which we're going // to cache. For example, if an existing KeyMin->KeyMax descriptor // should be cleared out in favor of a KeyMin->"m" descriptor. k, v, ok := rdc.rangeCache.Ceil(rangeCacheKey(metaKey)) if ok { descriptor := v.(*roachpb.RangeDescriptor) if descriptor.StartKey.Less(key) && !descriptor.EndKey.Less(key) { if log.V(1) { log.Infof("clearing overlapping descriptor: key=%s desc=%s", k, descriptor) } rdc.rangeCache.Del(k.(rangeCacheKey)) } } // Also clear any descriptors which are subsumed by the one we're // going to cache. This could happen on a merge (and also happens // when there's a lot of concurrency). Iterate from the range meta key // after RangeMetaKey(desc.StartKey) to the range meta key for desc.EndKey. rdc.rangeCache.DoRange(func(k, v interface{}) { if log.V(1) { log.Infof("clearing subsumed descriptor: key=%s desc=%s", k, v.(*roachpb.RangeDescriptor)) } rdc.rangeCache.Del(k.(rangeCacheKey)) }, rangeCacheKey(meta(desc.StartKey).Next()), rangeCacheKey(meta(desc.EndKey))) }
// TestRangeSplitMeta executes various splits (including at meta addressing) // and checks that all created intents are resolved. This includes both intents // which are resolved synchronously with EndTransaction and via RPC. func TestRangeSplitMeta(t *testing.T) { defer leaktest.AfterTest(t) s := createTestDB(t) defer s.Stop() splitKeys := []roachpb.Key{roachpb.Key("G"), keys.RangeMetaKey(roachpb.Key("F")), keys.RangeMetaKey(roachpb.Key("K")), keys.RangeMetaKey(roachpb.Key("H"))} // Execute the consecutive splits. for _, splitKey := range splitKeys { log.Infof("starting split at key %q...", splitKey) if err := s.DB.AdminSplit(splitKey); err != nil { t.Fatal(err) } log.Infof("split at key %q complete", splitKey) } if err := util.IsTrueWithin(func() bool { if _, _, err := engine.MVCCScan(s.Eng, keys.LocalMax, roachpb.KeyMax, 0, roachpb.MaxTimestamp, true, nil); err != nil { log.Infof("mvcc scan should be clean: %s", err) return false } return true }, 500*time.Millisecond); err != nil { t.Error("failed to verify no dangling intents within 500ms") } }
// initNodeID updates the internal NodeDescriptor with the given ID. If zero is // supplied, a new NodeID is allocated with the first invocation. For all other // values, the supplied ID is stored into the descriptor (unless one has been // set previously, in which case a fatal error occurs). // // Upon setting a new NodeID, the descriptor is gossiped and the NodeID is // stored into the gossip instance. func (n *Node) initNodeID(id roachpb.NodeID) { if id < 0 { log.Fatalf("NodeID must not be negative") } if o := n.Descriptor.NodeID; o > 0 { if id == 0 { return } log.Fatalf("cannot initialize NodeID to %d, already have %d", id, o) } var err error if id == 0 { id, err = allocateNodeID(n.ctx.DB) log.Infof("new node allocated ID %d", id) if err != nil { log.Fatal(err) } if id == 0 { log.Fatal("new node allocated illegal ID 0") } n.ctx.Gossip.SetNodeID(id) } else { log.Infof("node ID %d initialized", id) } // Gossip the node descriptor to make this node addressable by node ID. n.Descriptor.NodeID = id if err = n.ctx.Gossip.SetNodeDescriptor(&n.Descriptor); err != nil { log.Fatalf("couldn't gossip descriptor for node %d: %s", n.Descriptor.NodeID, err) } }
func TestGossipPeerings(t *testing.T) { t.Skip("#3611") c := StartCluster(t) defer c.AssertAndStop(t) num := c.NumNodes() deadline := time.Now().Add(*duration) waitTime := longWaitTime if *duration < waitTime { waitTime = shortWaitTime } for time.Now().Before(deadline) { checkGossip(t, c, waitTime, hasPeers(num)) // Restart the first node. log.Infof("restarting node 0") if err := c.Restart(0); err != nil { t.Fatal(err) } checkGossip(t, c, waitTime, hasPeers(num)) // Restart another node (if there is one). var pickedNode int if num > 1 { pickedNode = rand.Intn(num-1) + 1 } log.Infof("restarting node %d", pickedNode) if err := c.Restart(pickedNode); err != nil { t.Fatal(err) } checkGossip(t, c, waitTime, hasPeers(num)) } }
// writeSummaries retrieves status summaries from the supplied // NodeStatusRecorder and persists them to the cockroach data store. func (s *Server) writeSummaries() error { nodeStatus, storeStatuses := s.recorder.GetStatusSummaries() if nodeStatus != nil { key := keys.NodeStatusKey(int32(nodeStatus.Desc.NodeID)) if err := s.db.Put(key, nodeStatus); err != nil { return err } if log.V(1) { statusJSON, err := json.Marshal(nodeStatus) if err != nil { log.Errorf("error marshaling nodeStatus to json: %s", err) } log.Infof("node %d status: %s", nodeStatus.Desc.NodeID, statusJSON) } } for _, ss := range storeStatuses { key := keys.StoreStatusKey(int32(ss.Desc.StoreID)) if err := s.db.Put(key, &ss); err != nil { return err } if log.V(1) { statusJSON, err := json.Marshal(&ss) if err != nil { log.Errorf("error marshaling storeStatus to json: %s", err) } log.Infof("store %d status: %s", ss.Desc.StoreID, statusJSON) } } return nil }
// start dials the remote addr and commences gossip once connected. // Upon exit, signals client is done by pushing it onto the done // channel. If the client experienced an error, its err field will // be set. This method starts client processing in a goroutine and // returns immediately. func (c *client) start(g *Gossip, done chan *client, context *rpc.Context, stopper *stop.Stopper) { stopper.RunWorker(func() { var err error c.rpcClient = rpc.NewClient(c.addr, context) select { case <-c.rpcClient.Healthy(): // Start gossiping and wait for disconnect or error. err = c.gossip(g, stopper) if context.DisableCache { c.rpcClient.Close() } case <-c.rpcClient.Closed: err = util.Errorf("client closed") } done <- c if err != nil { if c.peerID != 0 { log.Infof("closing client to node %d (%s): %s", c.peerID, c.addr, err) } else { log.Infof("closing client to %s: %s", c.addr, err) } } }) }
// handleWriteReady converts a set of raft.Ready structs into a writeRequest // to be persisted, marks the group as writing and sends it to the writeTask. func (s *state) handleWriteReady(readyGroups map[uint64]raft.Ready) { if log.V(6) { log.Infof("node %v write ready, preparing request", s.nodeID) } writeRequest := newWriteRequest() for groupID, ready := range readyGroups { raftGroupID := proto.RaftID(groupID) g, ok := s.groups[raftGroupID] if !ok { if log.V(6) { log.Infof("dropping write request to group %d", groupID) } continue } g.writing = true gwr := &groupWriteRequest{} if !raft.IsEmptyHardState(ready.HardState) { gwr.state = ready.HardState } if !raft.IsEmptySnap(ready.Snapshot) { gwr.snapshot = ready.Snapshot } if len(ready.Entries) > 0 { gwr.entries = ready.Entries } writeRequest.groups[raftGroupID] = gwr } s.writeTask.in <- writeRequest }
func (l *LocalCluster) runDockerSpy() { l.panicOnStop() create := func() (*Container, error) { return createContainer(l, dockerclient.ContainerConfig{ Image: dockerspyImage, Cmd: []string{"--dns-domain=" + domain}, }) } c, err := create() if err == dockerclient.ErrImageNotFound { log.Infof("pulling %s", dockerspyImage) err = l.client.PullImage(dockerspyImage, nil) if err == nil { c, err = create() } } if err != nil { panic(err) } maybePanic(c.Start([]string{"/var/run/docker.sock:/var/run/docker.sock"}, nil, nil)) c.Name = "docker-spy" l.dns = c if ci, err := c.Inspect(); err != nil { log.Error(err) } else { log.Infof("started %s: %s", c.Name, ci.NetworkSettings.IPAddress) } }
func (bq *baseQueue) processOne(clock *hlc.Clock) { start := time.Now() bq.Lock() repl := bq.pop() bq.Unlock() if repl != nil { now := clock.Now() if log.V(1) { log.Infof("processing replica %s from %s queue...", repl, bq.name) } // If the queue requires a replica to have the range leader lease in // order to be processed, check whether this replica has leader lease // and renew or acquire if necessary. if bq.impl.needsLeaderLease() { // Create a "fake" get request in order to invoke redirectOnOrAcquireLease. args := &proto.GetRequest{RequestHeader: proto.RequestHeader{Timestamp: now}} if err := repl.redirectOnOrAcquireLeaderLease(nil /* Trace */, args.Header().Timestamp); err != nil { if log.V(1) { log.Infof("this replica of %s could not acquire leader lease; skipping...", repl) } return } } if err := bq.impl.process(now, repl); err != nil { log.Errorf("failure processing replica %s from %s queue: %s", repl, bq.name, err) } else if log.V(2) { log.Infof("processed replica %s from %s queue in %s", repl, bq.name, time.Now().Sub(start)) } } }
func pullImage(l *LocalCluster, options types.ImagePullOptions) error { log.Infof("ImagePull %s:%s starting", options.ImageID, options.Tag) defer log.Infof("ImagePull %s:%s complete", options.ImageID, options.Tag) rc, err := l.client.ImagePull(context.Background(), options, nil) if err != nil { return err } defer rc.Close() dec := json.NewDecoder(rc) for { // Using `interface{}` to avoid dependency on github.com/docker/docker. See // https://github.com/docker/engine-api/issues/89. var message interface{} if err := dec.Decode(&message); err != nil { if err == io.EOF { _, _ = fmt.Fprintln(os.Stderr) return nil } return err } // The message is a status bar. if log.V(2) { log.Infof("ImagePull response: %s", message) } else { _, _ = fmt.Fprintf(os.Stderr, ".") } } }