// Launch some Mesos tasks func launchTasks(masterHost string, agentHost string) { cmd := "mesos" id := time.Now().Unix() task1Args := []string{ "execute", fmt.Sprintf("--master=%s", masterHost), fmt.Sprintf("--name=sleep.%v", id), "--resources=cpus:0.5;mem:64;disk:32", "--command=date && sleep 60", } task2Args := []string{ "execute", fmt.Sprintf("--master=%s", masterHost), fmt.Sprintf("--name=sleep: %v", id), "--resources=cpus:0.5;mem:64;disk:32", "--command=date && sleep 60", } launch := func(cmd *exec.Cmd) { var stderr bytes.Buffer cmd.Stderr = &stderr err := cmd.Run() if err != nil { fmt.Println(stderr.String()) panic(err) } } go launch(exec.Command(cmd, task1Args...)) go launch(exec.Command(cmd, task2Args...)) // There is a delay until disk usage information is available for a newly-launched executor. See: // * https://github.com/apache/mesos/blob/0.28.1/src/slave/containerizer/mesos/isolators/posix/disk.cpp#L352-L357 // * https://github.com/apache/mesos/blob/0.28.1/src/tests/disk_quota_tests.cpp#L496-L514 // // Therefore, this function needs to fetch statistics for the new executors it just created and block until // the disk usage metrics are available. Otherwise, we'll see some flakiness in the integration tests: // // * /home/vagrant/work/src/github.com/intelsdi-x/snap-plugin-collector-mesos/mesos/mesos_integration_test.go // Line 157: // Expected: '8' // Actual: '6' // (Should be equal) // done := map[string]bool{} for len(done) != 2 { executors, err := agent.GetMonitoringStatistics(agentHost) if err != nil { panic(err) } if len(executors) != 2 { time.Sleep(1) continue } for _, exec := range executors { if done[exec.ID] != true { if exec.Statistics.DiskUsedBytes != nil { done[exec.ID] = true } else { time.Sleep(1) } } } } }
func (m *Mesos) CollectMetrics(mts []plugin.MetricType) ([]plugin.MetricType, error) { configItems, err := getConfig(mts[0]) if err != nil { return nil, err } requestedMaster := []core.Namespace{} requestedAgent := []core.Namespace{} for _, metricType := range mts { switch metricType.Namespace().Strings()[2] { case "master": requestedMaster = append(requestedMaster, metricType.Namespace()) case "agent": requestedAgent = append(requestedAgent, metricType.Namespace()) } } // Translate Mesos metrics into Snap PluginMetrics now := time.Now() metrics := []plugin.MetricType{} if configItems["master"] != "" && len(requestedMaster) > 0 { log.Info("Collecting ", len(requestedMaster), " metrics from the master") isLeader, err := master.IsLeader(configItems["master"]) if err != nil { log.Error(err) return nil, err } if isLeader { snapshot, err := master.GetMetricsSnapshot(configItems["master"]) if err != nil { log.Error(err) return nil, err } frameworks, err := master.GetFrameworks(configItems["master"]) if err != nil { log.Error(err) return nil, err } tags := map[string]string{"source": configItems["master"]} for _, requested := range requestedMaster { isDynamic, _ := requested.IsDynamic() if isDynamic { n := requested.Strings()[4:] // Iterate through the array of frameworks returned by GetFrameworks() for _, framework := range frameworks { val := ns.GetValueByNamespace(framework, n) if val == nil { log.Warn("Attempted to collect metric ", requested.String(), " but it returned nil!") continue } // substituting "framework" wildcard with particular framework id requested[3].Value = framework.ID // TODO(roger): units metrics = append(metrics, *plugin.NewMetricType(requested, now, tags, "", val)) } } else { n := requested.Strings()[3:] val, ok := snapshot[strings.Join(n, "/")] if !ok { e := fmt.Errorf("error: requested metric %s not found", requested.String()) log.Error(e) return nil, e } //TODO(kromar): is it possible to provide unit NewMetricType(ns, time, tags, unit, value)? // I'm leaving empty string for now... metrics = append(metrics, *plugin.NewMetricType(requested, now, tags, "", val)) } } } else { log.Info("Attempted CollectMetrics() on ", configItems["master"], "but it isn't the leader. Skipping...") } } if configItems["agent"] != "" && len(requestedAgent) > 0 { log.Info("Collecting ", len(requestedAgent), " metrics from the agent") snapshot, err := agent.GetMetricsSnapshot(configItems["agent"]) if err != nil { log.Error(err) return nil, err } executors, err := agent.GetMonitoringStatistics(configItems["agent"]) if err != nil { log.Error(err) return nil, err } tags := map[string]string{"source": configItems["agent"]} for _, requested := range requestedAgent { n := requested.Strings()[5:] isDynamic, _ := requested.IsDynamic() if isDynamic { // Iterate through the array of executors returned by GetMonitoringStatistics() for _, exec := range executors { val := ns.GetValueByNamespace(exec.Statistics, n) if val == nil { log.Warn("Attempted to collect metric ", requested.String(), " but it returned nil!") continue } // substituting "framework" wildcard with particular framework id requested[3].Value = exec.Framework // substituting "executor" wildcard with particular executor id requested[4].Value = exec.ID // TODO(roger): units metrics = append(metrics, *plugin.NewMetricType(requested, now, tags, "", val)) } } else { // Get requested metrics from the snapshot map n := requested.Strings()[3:] val, ok := snapshot[strings.Join(n, "/")] if !ok { e := fmt.Errorf("error: requested metric %v not found", requested.String()) log.Error(e) return nil, e } //TODO(kromar): units here also? metrics = append(metrics, *plugin.NewMetricType(requested, now, tags, "", val)) } } } log.Debug("Collected a total of ", len(metrics), " metrics.") return metrics, nil }