// runStart starts the cockroach node using --stores as the list of // storage devices ("stores") on this machine and --gossip as the list // of "well-known" hosts used to join this node to the cockroach // cluster via the gossip network. func runStart(cmd *cobra.Command, args []string) { info := util.GetBuildInfo() log.Infof("build Vers: %s", info.Vers) log.Infof("build Tag: %s", info.Tag) log.Infof("build Time: %s", info.Time) log.Infof("build Deps: %s", info.Deps) // Default user for servers. Context.User = security.NodeUser // First initialize the Context as it is used in other places. err := Context.Init("start") if err != nil { log.Errorf("failed to initialize context: %s", err) return } log.Info("starting cockroach cluster") stopper := util.NewStopper() stopper.AddWorker() s, err := server.NewServer(Context, stopper) if err != nil { log.Errorf("failed to start Cockroach server: %s", err) return } err = s.Start(false) if err != nil { log.Errorf("cockroach server exited with error: %s", err) return } signalCh := make(chan os.Signal, 1) signal.Notify(signalCh, os.Interrupt, os.Kill) // TODO(spencer): move this behind a build tag. signal.Notify(signalCh, syscall.SIGTERM) // Block until one of the signals above is received or the stopper // is stopped externally (for example, via the quit endpoint). select { case <-stopper.ShouldStop(): stopper.SetStopped() case <-signalCh: log.Infof("initiating graceful shutdown of server") stopper.SetStopped() go func() { s.Stop() }() } select { case <-signalCh: log.Warningf("second signal received, initiating hard shutdown") case <-time.After(time.Minute): log.Warningf("time limit reached, initiating hard shutdown") return case <-stopper.IsStopped(): log.Infof("server drained and shutdown completed") } log.Flush() }
// runStart starts the cockroach node using -stores as the list of // storage devices ("stores") on this machine and -gossip as the list // of "well-known" hosts used to join this node to the cockroach // cluster via the gossip network. func runStart(cmd *commander.Command, args []string) { info := util.GetBuildInfo() log.Infof("Build Vers: %s", info.Vers) log.Infof("Build Tag: %s", info.Tag) log.Infof("Build Time: %s", info.Time) log.Infof("Build Deps: %s", info.Deps) log.Info("Starting cockroach cluster") s, err := server.NewServer(Context) if err != nil { log.Errorf("Failed to start Cockroach server: %v", err) return } err = Context.Init() if err != nil { log.Errorf("Failed to initialize context: %v", err) return } err = s.Start(false) defer s.Stop() if err != nil { log.Errorf("Cockroach server exited with error: %v", err) return } c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt, os.Kill) // Block until one of the signals above is received. <-c }
func runGenManCmd(cmd *cobra.Command, args []string) error { info := util.GetBuildInfo() header := &doc.GenManHeader{ Section: "1", Manual: "CockroachDB Manual", Source: fmt.Sprintf("CockroachDB %s", info.Tag), } if !strings.HasSuffix(manPath, string(os.PathSeparator)) { manPath += string(os.PathSeparator) } if _, err := os.Stat(manPath); err != nil { if os.IsNotExist(err) { if err := os.MkdirAll(manPath, 0755); err != nil { return err } } else { return err } } if err := doc.GenManTree(cmd.Root(), header, manPath); err != nil { return err } // TODO(cdo): The man page generated by the cobra package doesn't include a list of commands, so // one has to notice the "See Also" section at the bottom of the page to know which commands // are supported. I'd like to make this better somehow. fmt.Println("Generated CockroachDB man pages in", manPath) return nil }
func TestCheckVersion(t *testing.T) { defer leaktest.AfterTest(t)() updateChecks := int32(0) uuid := "" version := "" recorder := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { defer r.Body.Close() atomic.AddInt32(&updateChecks, 1) uuid = r.URL.Query().Get("uuid") version = r.URL.Query().Get("version") })) s := StartTestServer(t) s.parsedUpdatesURL, _ = url.Parse(recorder.URL) s.checkForUpdates() recorder.Close() s.Stop() if expected, actual := int32(1), atomic.LoadInt32(&updateChecks); actual != expected { t.Fatalf("expected %v update checks, got %v", expected, actual) } if expected, actual := s.node.ClusterID.String(), uuid; expected != actual { t.Errorf("expected uuid %v, got %v", expected, actual) } if expected, actual := util.GetBuildInfo().Tag, version; expected != actual { t.Errorf("expected version tag %v, got %v", expected, actual) } }
// GetStatusSummary returns a status summary messages for the node. The summary // includes the recent values of metrics for both the node and all of its // component stores. func (mr *MetricsRecorder) GetStatusSummary() *NodeStatus { mr.mu.Lock() defer mr.mu.Unlock() if mr.mu.nodeID == 0 { // We haven't yet processed initialization information; do nothing. if log.V(1) { log.Warning("MetricsRecorder.GetStatusSummary called before NodeID allocation.") } return nil } now := mr.mu.clock.PhysicalNow() // Generate an node status with no store data. nodeStat := &NodeStatus{ Desc: mr.mu.desc, BuildInfo: util.GetBuildInfo(), UpdatedAt: now, StartedAt: mr.mu.startedAt, StoreStatuses: make([]StoreStatus, 0, mr.mu.lastSummaryCount), Metrics: make(map[string]float64, mr.mu.lastNodeMetricCount), } eachRecordableValue(mr.nodeRegistry, func(name string, val float64) { nodeStat.Metrics[name] = val }) // Generate status summaries for stores. for storeID, r := range mr.mu.storeRegistries { storeMetrics := make(map[string]float64, mr.mu.lastStoreMetricCount) eachRecordableValue(r, func(name string, val float64) { storeMetrics[name] = val }) // Gather descriptor from store. descriptor, err := mr.mu.stores[storeID].Descriptor() if err != nil { log.Errorf("Could not record status summaries: Store %d could not return descriptor, error: %s", storeID, err) } nodeStat.StoreStatuses = append(nodeStat.StoreStatuses, StoreStatus{ Desc: *descriptor, Metrics: storeMetrics, }) } mr.mu.lastSummaryCount = len(nodeStat.StoreStatuses) mr.mu.lastNodeMetricCount = len(nodeStat.Metrics) if len(nodeStat.StoreStatuses) > 0 { mr.mu.lastStoreMetricCount = len(nodeStat.StoreStatuses[0].Metrics) } return nodeStat }
// handleLocalStatus handles GET requests for local-node status. func (s *statusServer) handleLocalStatus(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") local := struct { BuildInfo util.BuildInfo `json:"buildInfo"` }{ BuildInfo: util.GetBuildInfo(), } b, err := s.marshalJSON(r, local) if err != nil { log.Error(err) w.WriteHeader(http.StatusInternalServerError) return } w.Write(b) }
// handleLocalStatus handles GET requests for local-node status. func (s *statusServer) handleLocalStatus(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { local := struct { Address util.UnresolvedAddr `json:"address"` BuildInfo util.BuildInfo `json:"buildInfo"` }{ BuildInfo: util.GetBuildInfo(), } if addr, err := s.gossip.GetNodeIDAddress(s.gossip.GetNodeID()); err == nil { local.Address = addr.(util.UnresolvedAddr) } b, contentType, err := util.MarshalResponse(r, local, []util.EncodingType{util.JSONEncoding}) if err != nil { log.Error(err) w.WriteHeader(http.StatusInternalServerError) return } w.Header().Set(util.ContentTypeHeader, contentType) w.Write(b) }
func (s *Server) checkForUpdates() { // Don't phone home in tests (SetupReportingURLs is called in cli/start.go). if s.parsedUpdatesURL == nil { return } q := s.parsedUpdatesURL.Query() q.Set("version", util.GetBuildInfo().Tag) q.Set("uuid", s.node.ClusterID.String()) s.parsedUpdatesURL.RawQuery = q.Encode() res, err := http.Get(s.parsedUpdatesURL.String()) if err != nil { // This is probably going to be relatively common in production // environments where network access is usually curtailed. if log.V(2) { log.Warning("Error checking for updates: ", err) } return } defer res.Body.Close() decoder := json.NewDecoder(res.Body) r := struct { Details []versionInfo `json:"details"` }{} err = decoder.Decode(&r) if err != nil && err != io.EOF { log.Warning("Error decoding updates info: ", err) return } for _, v := range r.Details { log.Info("A new version is available: %s\n\t%s", v.Version, v.Details) } }
func (s *Server) reportUsage() { // Don't phone home in tests (SetupReportingURLs is called in cli/start.go). if s.parsedReportingURL == nil { return } b := new(bytes.Buffer) if err := json.NewEncoder(b).Encode(s.getReportingInfo()); err != nil { log.Warning(err) return } q := s.parsedReportingURL.Query() q.Set("version", util.GetBuildInfo().Tag) q.Set("uuid", s.node.ClusterID.String()) s.parsedReportingURL.RawQuery = q.Encode() _, err := http.Post(s.parsedReportingURL.String(), "application/json", b) if err != nil && log.V(2) { // This is probably going to be relatively common in production // environments where network access is usually curtailed. log.Warning("Error checking reporting node usage metrics: ", err) } }
"github.com/spf13/cobra" "github.com/cockroachdb/cockroach/util" ) // Proxy to allow overrides in tests. var osStderr = os.Stderr var versionCmd = &cobra.Command{ Use: "version", Short: "output version information", Long: ` Output build version information. `, Run: func(cmd *cobra.Command, args []string) { info := util.GetBuildInfo() tw := tabwriter.NewWriter(os.Stdout, 2, 1, 2, ' ', 0) fmt.Fprintf(tw, "Build Vers: %s\n", info.Vers) fmt.Fprintf(tw, "Build Tag: %s\n", info.Tag) fmt.Fprintf(tw, "Build Time: %s\n", info.Time) fmt.Fprintf(tw, "Build Deps:\n\t%s\n", strings.Replace(strings.Replace(info.Deps, " ", "\n\t", -1), ":", "\t", -1)) _ = tw.Flush() }, } var cockroachCmd = &cobra.Command{ Use: "cockroach [command] (flags)", Short: "CockroachDB command-line interface and server", // TODO(cdo): Add a pointer to the docs in Long. Long: `CockroachDB command-line interface and server.`,
func checkNodeStatus(t *testing.T, c cliTest, output string, start time.Time) { buf := bytes.NewBufferString(output) s := bufio.NewScanner(buf) // Skip command line. if !s.Scan() { t.Fatalf("Couldn't skip command line: %s", s.Err()) } checkSeparatorLine(t, s) // check column names. if !s.Scan() { t.Fatalf("Error reading column names: %s", s.Err()) } cols, err := extractFields(s.Text()) if err != nil { t.Fatalf("%s", err) } if !reflect.DeepEqual(cols, nodesColumnHeaders) { t.Fatalf("columns (%s) don't match expected (%s)", cols, nodesColumnHeaders) } checkSeparatorLine(t, s) // Check node status. if !s.Scan() { t.Fatalf("error reading node status: %s", s.Err()) } fields, err := extractFields(s.Text()) if err != nil { t.Fatalf("%s", err) } nodeID := c.Gossip().GetNodeID() nodeIDStr := strconv.FormatInt(int64(nodeID), 10) if a, e := fields[0], nodeIDStr; a != e { t.Errorf("node id (%s) != expected (%s)", a, e) } nodeAddr, err := c.Gossip().GetNodeIDAddress(nodeID) if err != nil { t.Fatal(err) } if a, e := fields[1], nodeAddr.String(); a != e { t.Errorf("node address (%s) != expected (%s)", a, e) } // Verify Build Tag. if a, e := fields[2], util.GetBuildInfo().Tag; a != e { t.Errorf("build tag (%s) != expected (%s)", a, e) } // Verify that updated_at and started_at are reasonably recent. // CircleCI can be very slow. This was flaky at 5s. checkTimeElapsed(t, fields[3], 15*time.Second, start) checkTimeElapsed(t, fields[4], 15*time.Second, start) // Verify all byte/range metrics. testcases := []struct { name string idx int maxval int64 }{ {"live_bytes", 5, 30000}, {"key_bytes", 6, 30000}, {"value_bytes", 7, 30000}, {"intent_bytes", 8, 30000}, {"system_bytes", 9, 30000}, {"leader_ranges", 10, 3}, {"repl_ranges", 11, 3}, {"avail_ranges", 12, 3}, } for _, tc := range testcases { val, err := strconv.ParseInt(fields[tc.idx], 10, 64) if err != nil { t.Errorf("couldn't parse %s '%s': %v", tc.name, fields[tc.idx], err) continue } if val < 0 { t.Errorf("value for %s (%d) cannot be less than 0", tc.name, val) continue } if val > tc.maxval { t.Errorf("value for %s (%d) greater than max (%d)", tc.name, val, tc.maxval) } } checkSeparatorLine(t, s) }
// runStart starts the cockroach node using --stores as the list of // storage devices ("stores") on this machine and --gossip as the list // of "well-known" hosts used to join this node to the cockroach // cluster via the gossip network. func runStart(cmd *cobra.Command, args []string) { info := util.GetBuildInfo() log.Infof("build Vers: %s", info.Vers) log.Infof("build Tag: %s", info.Tag) log.Infof("build Time: %s", info.Time) log.Infof("build Deps: %s", info.Deps) // Default user for servers. context.User = security.NodeUser if context.EphemeralSingleNode { context.Stores = "mem=1073741824" context.GossipBootstrap = server.SelfGossipAddr runInit(cmd, args) } else { if err := context.InitStores(); err != nil { log.Errorf("failed to initialize stores: %s", err) return } } if err := context.InitNode(); err != nil { log.Errorf("failed to initialize node: %s", err) return } log.Info("starting cockroach cluster") stopper := stop.NewStopper() s, err := server.NewServer(context, stopper) if err != nil { log.Errorf("failed to start Cockroach server: %s", err) return } if err := s.Start(false); err != nil { log.Errorf("cockroach server exited with error: %s", err) return } if context.EphemeralSingleNode { // TODO(tamird): pass this to BootstrapRange rather than doing it // at runtime. This was quicker, though. db, clientStopper := makeDBClient() if err := configutil.SetDefaultRangeReplicaNum(db, 1); err != nil { log.Errorf("failed to set default replica number: %s", err) } clientStopper.Stop() } signalCh := make(chan os.Signal, 1) signal.Notify(signalCh, os.Interrupt, os.Kill) // TODO(spencer): move this behind a build tag. signal.Notify(signalCh, syscall.SIGTERM) // Block until one of the signals above is received or the stopper // is stopped externally (for example, via the quit endpoint). select { case <-stopper.ShouldStop(): case <-signalCh: go s.Stop() } log.Info("initiating graceful shutdown of server") go func() { ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() for { select { case <-ticker.C: if log.V(1) { log.Infof("running tasks:\n%s", stopper.RunningTasks()) } log.Infof("%d running tasks", stopper.NumTasks()) case <-stopper.ShouldStop(): return } } }() select { case <-signalCh: log.Warningf("second signal received, initiating hard shutdown") case <-time.After(time.Minute): log.Warningf("time limit reached, initiating hard shutdown") case <-stopper.IsStopped(): log.Infof("server drained and shutdown completed") } log.Flush() }
func main() { // Instruct Go to use all CPU cores. // TODO(spencer): this may be excessive and result in worse // performance. We should keep an eye on this as we move to // production workloads. numCPU := runtime.NumCPU() runtime.GOMAXPROCS(numCPU) rand.Seed(util.NewPseudoSeed()) log.V(1).Infof("running using %d processor cores", numCPU) c := commander.Commander{ Name: "cockroach", Commands: []*commander.Command{ cli.CmdInit, cli.CmdGetZone, cli.CmdLsZones, cli.CmdRmZone, cli.CmdSetZone, cli.CmdStart, { UsageLine: "listparams", Short: "list all available parameters and their default values", Long: ` List all available parameters and their default values. Note that parameter parsing stops after the first non- option after the command name. Hence, the options need to precede any additional arguments, cockroach <command> [options] [arguments].`, Run: func(cmd *commander.Command, args []string) { flag.CommandLine.PrintDefaults() }, }, { UsageLine: "version", Short: "output version information", Long: ` Output build version information. `, Run: func(cmd *commander.Command, args []string) { info := util.GetBuildInfo() w := &tabwriter.Writer{} w.Init(os.Stdout, 2, 1, 2, ' ', 0) fmt.Fprintf(w, "Build Vers: %s\n", info.Vers) fmt.Fprintf(w, "Build Tag: %s\n", info.Tag) fmt.Fprintf(w, "Build Time: %s\n", info.Time) fmt.Fprintf(w, "Build Deps:\n\t%s\n", strings.Replace(strings.Replace(info.Deps, " ", "\n\t", -1), ":", "\t", -1)) w.Flush() }, }, }, } cli.InitFlags(cli.Context) if len(os.Args) == 1 { os.Args = append(os.Args, "help") } if err := c.Run(os.Args[1:]); err != nil { log.Fatalf("Failed running command %q: %v\n", os.Args[1:], err) } }
// TestMetricsRecorder verifies that the metrics recorder properly formats the // statistics from various registries, both for Time Series and for Status // Summaries. func TestMetricsRecorder(t *testing.T) { defer leaktest.AfterTest(t)() // ======================================== // Construct a series of fake descriptors for use in test. // ======================================== nodeDesc := roachpb.NodeDescriptor{ NodeID: roachpb.NodeID(1), } storeDesc1 := roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(1), Capacity: roachpb.StoreCapacity{ Capacity: 100, Available: 50, }, } storeDesc2 := roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(2), Capacity: roachpb.StoreCapacity{ Capacity: 200, Available: 75, }, } // ======================================== // Create registries and add them to the recorder (two node-level, two // store-level). // ======================================== reg1 := metric.NewRegistry() reg2 := metric.NewRegistry() store1 := fakeStore{ storeID: roachpb.StoreID(1), desc: storeDesc1, registry: metric.NewRegistry(), } store2 := fakeStore{ storeID: roachpb.StoreID(2), desc: storeDesc2, registry: metric.NewRegistry(), } manual := hlc.NewManualClock(100) recorder := NewMetricsRecorder(hlc.NewClock(manual.UnixNano)) recorder.AddNodeRegistry("one.%s", reg1) recorder.AddNodeRegistry("two.%s", reg1) recorder.AddStore(store1) recorder.AddStore(store2) recorder.NodeStarted(nodeDesc, 50) // Ensure the metric system's view of time does not advance during this test // as the test expects time to not advance too far which would age the actual // data (e.g. in histogram's) unexpectedly. defer metric.TestingSetNow(func() time.Time { return time.Unix(0, manual.UnixNano()).UTC() })() // ======================================== // Generate Metrics Data & Expected Results // ======================================== // Flatten the four registries into an array for ease of use. regList := []struct { reg *metric.Registry prefix string source int64 isNode bool }{ { reg: reg1, prefix: "one.", source: 1, isNode: true, }, { reg: reg2, prefix: "two.", source: 1, isNode: true, }, { reg: store1.registry, prefix: "", source: int64(store1.storeID), isNode: false, }, { reg: store2.registry, prefix: "", source: int64(store2.storeID), isNode: false, }, } // Every registry will have a copy of the following metrics. metricNames := []struct { name string typ string val int64 }{ {"testGauge", "gauge", 20}, {"testGaugeFloat64", "floatgauge", 20}, {"testCounter", "counter", 5}, {"testRate", "rate", 2}, {"testHistogram", "histogram", 10}, {"testLatency", "latency", 10}, // Stats needed for store summaries. {"ranges", "counter", 1}, {"ranges.leader", "gauge", 1}, {"ranges.replicated", "gauge", 1}, {"ranges.available", "gauge", 1}, } // Add the metrics to each registry and set their values. At the same time, // generate expected time series results and status summary metric values. var expected []ts.TimeSeriesData expectedNodeSummaryMetrics := make(map[string]float64) expectedStoreSummaryMetrics := make(map[string]float64) // addExpected generates expected data for a single metric data point. addExpected := func(prefix, name string, source, time, val int64, isNode bool) { // Generate time series data. tsPrefix := "cr.node." if !isNode { tsPrefix = "cr.store." } expect := ts.TimeSeriesData{ Name: tsPrefix + prefix + name, Source: strconv.FormatInt(source, 10), Datapoints: []*ts.TimeSeriesDatapoint{ { TimestampNanos: time, Value: float64(val), }, }, } expected = append(expected, expect) // Generate status summary data. if isNode { expectedNodeSummaryMetrics[prefix+name] = float64(val) } else { // This can overwrite the previous value, but this is expected as // all stores in our tests have identical values; when comparing // status summaries, the same map is used as expected data for all // stores. expectedStoreSummaryMetrics[prefix+name] = float64(val) } } for _, reg := range regList { for _, data := range metricNames { switch data.typ { case "gauge": reg.reg.Gauge(data.name).Update(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "floatgauge": reg.reg.GaugeFloat64(data.name).Update(float64(data.val)) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "counter": reg.reg.Counter(data.name).Inc(data.val) addExpected(reg.prefix, data.name, reg.source, 100, data.val, reg.isNode) case "rate": reg.reg.Rates(data.name).Add(data.val) addExpected(reg.prefix, data.name+"-count", reg.source, 100, data.val, reg.isNode) for _, scale := range metric.DefaultTimeScales { // Rate data is subject to timing errors in tests. Zero out // these values. addExpected(reg.prefix, data.name+sep+scale.Name(), reg.source, 100, 0, reg.isNode) } case "histogram": reg.reg.Histogram(data.name, time.Second, 1000, 2).RecordValue(data.val) for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+q.suffix, reg.source, 100, data.val, reg.isNode) } case "latency": reg.reg.Latency(data.name).RecordValue(data.val) // Latency is simply three histograms (at different resolution // time scales). for _, scale := range metric.DefaultTimeScales { for _, q := range recordHistogramQuantiles { addExpected(reg.prefix, data.name+sep+scale.Name()+q.suffix, reg.source, 100, data.val, reg.isNode) } } } } } // ======================================== // Verify time series data // ======================================== actual := recorder.GetTimeSeriesData() // Zero-out timing-sensitive rate values from actual data. for _, act := range actual { match, err := regexp.MatchString(`testRate-\d+m`, act.Name) if err != nil { t.Fatal(err) } if match { act.Datapoints[0].Value = 0.0 } } // Actual comparison is simple: sort the resulting arrays by time and name, // and use reflect.DeepEqual. sort.Sort(byTimeAndName(actual)) sort.Sort(byTimeAndName(expected)) if a, e := actual, expected; !reflect.DeepEqual(a, e) { t.Errorf("recorder did not yield expected time series collection; diff:\n %v", pretty.Diff(e, a)) } // ======================================== // Verify node summary generation // ======================================== expectedNodeSummary := &NodeStatus{ Desc: nodeDesc, BuildInfo: util.GetBuildInfo(), StartedAt: 50, UpdatedAt: 100, Metrics: expectedNodeSummaryMetrics, StoreStatuses: []StoreStatus{ { Desc: storeDesc1, Metrics: expectedStoreSummaryMetrics, }, { Desc: storeDesc2, Metrics: expectedStoreSummaryMetrics, }, }, } nodeSummary := recorder.GetStatusSummary() if nodeSummary == nil { t.Fatalf("recorder did not return nodeSummary.") } sort.Sort(byStoreDescID(nodeSummary.StoreStatuses)) if a, e := nodeSummary, expectedNodeSummary; !reflect.DeepEqual(a, e) { t.Errorf("recorder did not produce expected NodeSummary; diff:\n %v", pretty.Diff(e, a)) } }