func analyzeGardenAUFSStressTests() { runs := []string{"diego-2.cell-z1.0"} //, "diego-1.cell-z1.0"} for _, run := range runs { data, err := ioutil.ReadFile(config.DataDir("garden-aufs-test", run+".unified")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) significantEvents := analyzers.ExtractSignificantEvents(entries) // significantEvents.LogWithThreshold(0.2) allow := map[string]bool{ "garden-linux.loop-mounter.unmount.failed-to-unmount": true, "garden-linux.garden-server.create.creating": true, "rep.auction-fetch-state.handling": true, "rep.container-metrics-reporter.tick.started": true, "rep.depot-client.run-container.creating-container-in-garden": true, "rep.depot-client.delete-container.destroy.started": true, "rep.depot-client.run-container.run.action.download-step.fetch-starting": true, "rep.depot-client.run-container.run.monitor-run.run-step.running": true, "rep.depot-client.run-container.run.run-step-process.step-finished-with-error": true, "rep.depot-client.run-container.run.setup.download-step.fetch-starting": true, "rep.auction-delegate.auction-work.lrp-allocate-instances.requesting-container-allocation": true, "rep.event-consumer.operation-stream.executing-container-operation.task-processor.fetching-container-result": true, "rep.depot-client.run-container.run.action.run-step.running": true, } filteredSignificantEvents := analyzers.SignificantEvents{} filteredSignificantEvents.LogWithThreshold(0.2) for name, events := range significantEvents { if allow[name] { filteredSignificantEvents[name] = events } } filteredSignificantEvents.LogWithThreshold(0.2) limit := viz.NewHorizontalLine(256) limit.LineStyle = viz.LineStyle(viz.Red, 1) options := analyzers.SignificantEventsOptions{ LineOverlays: gardenStressTestContainerCountOverlays(entries), VerticalMarkers: gardenStressTestFailedContainerCreates(entries), OverlayPlots: []plot.Plotter{limit}, WidthStretch: 2, MaxY: 400, } analyzers.VisualizeSignificantEvents( filteredSignificantEvents, config.DataDir("garden-aufs-test", run+".png"), options, ) } }
func analyzePWSSlowEvacuation() { runs := []string{"cell_z1.16", "cell_z1.17", "cell_z2.23", "cell_z2.24", "cell_z2.25"} for _, run := range runs { data, err := ioutil.ReadFile(config.DataDir("pws-slow-evacuation", run+".unified")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) significantEvents := analyzers.ExtractSignificantEvents(entries) // significantEvents.LogWithThreshold(0.2) allow := map[string]bool{ "garden-linux.loop-mounter.unmount.failed-to-unmount": true, "garden-linux.garden-server.create.creating": true, "rep.auction-fetch-state.handling": true, "rep.container-metrics-reporter.tick.started": true, "rep.depot-client.run-container.creating-container-in-garden": true, "rep.depot-client.delete-container.destroy.started": true, "rep.depot-client.run-container.run.action.download-step.fetch-starting": true, "rep.depot-client.run-container.run.monitor-run.run-step.running": true, "rep.depot-client.run-container.run.setup.download-step.fetch-starting": true, "rep.auction-delegate.auction-work.lrp-allocate-instances.requesting-container-allocation": true, "rep.event-consumer.operation-stream.executing-container-operation.task-processor.fetching-container-result": true, "rep.depot-client.run-container.run.action.run-step.running": true, } filteredSignificantEvents := analyzers.SignificantEvents{} filteredSignificantEvents.LogWithThreshold(0.2) for name, events := range significantEvents { if allow[name] { filteredSignificantEvents[name] = events } } filteredSignificantEvents.LogWithThreshold(0.2) options := analyzers.SignificantEventsOptions{ LineOverlays: gardenStressTestContainerCountOverlays(entries), VerticalMarkers: pwsSlowEvacuationFailedProcessOverlays(entries), WidthStretch: 2, MaxY: 400, } analyzers.VisualizeSignificantEvents( filteredSignificantEvents, config.DataDir("pws-slow-evacuation", run+".png"), options, ) } }
func analyzeSlowPWSTasks() { for _, run := range slowPWSTaskRuns { say.Println(0, say.Green(run.Name)) data, err := ioutil.ReadFile(config.DataDir("pws-slow-tasks", run.Name+".unified")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) significantEvents := analyzers.ExtractSignificantEvents(entries) allow := map[string]bool{ "rep.auction-fetch-state.handling": true, "rep.container-metrics-reporter.tick.started": true, "rep.depot-client.run-container.creating-container-in-garden": true, "rep.depot-client.delete-container.destroy.started": true, "rep.depot-client.run-container.run.action.download-step.fetch-starting": true, "rep.depot-client.run-container.run.monitor-run.run-step.running": true, "rep.depot-client.run-container.run.run-step-process.step-finished-with-error": true, "rep.depot-client.run-container.run.setup.download-step.fetch-starting": true, } filteredSignificantEvents := analyzers.SignificantEvents{} for name, events := range significantEvents { if allow[name] { filteredSignificantEvents[name] = events } } filteredSignificantEvents.LogWithThreshold(0.2) options := analyzers.SignificantEventsOptions{ LineOverlays: containerCountOverlays(entries), } if !run.CliffTimestamp.IsZero() { options.MaxT = run.EndTimestamp.Add(time.Minute * 30) options.VerticalMarkers = []analyzers.VerticalMarker{ {T: run.CliffTimestamp, LineStyle: viz.LineStyle(viz.Red, 1, viz.Dash)}, {T: run.EndTimestamp, LineStyle: viz.LineStyle(viz.Black, 1, viz.Dash)}, } } analyzers.VisualizeSignificantEvents( filteredSignificantEvents, config.DataDir("pws-slow-tasks", run.Name+".png"), options, ) } }
func analyzeCPUWeightStresstest() { runs := []string{ "unmodified-run", "aufs-run", "2-conc-run", } for _, run := range runs { say.Println(0, say.Green(run)) data, err := ioutil.ReadFile(config.DataDir("cpu-wait-stress-test", run+".unified")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) significantEvents := analyzers.ExtractSignificantEvents(entries) allow := map[string]bool{ "rep.auction-fetch-state.handling": true, "rep.container-metrics-reporter.tick.started": true, "rep.depot-client.run-container.creating-container-in-garden": true, "rep.depot-client.delete-container.destroy.started": true, "rep.depot-client.run-container.run.action.download-step.fetch-starting": true, "rep.depot-client.run-container.run.monitor-run.run-step.running": true, "rep.depot-client.run-container.run.run-step-process.step-finished-with-error": true, "rep.depot-client.run-container.run.setup.download-step.fetch-starting": true, } filteredSignificantEvents := analyzers.SignificantEvents{} for name, events := range significantEvents { if allow[name] { filteredSignificantEvents[name] = events } } filteredSignificantEvents.LogWithThreshold(0.2) options := analyzers.SignificantEventsOptions{ LineOverlays: cpuWeightStressTestContainerCountOverlays(entries), } analyzers.VisualizeSignificantEvents( filteredSignificantEvents, config.DataDir("cpu-wait-stress-test", run+".png"), options, ) } }
func analyzeHealthCheckTimeouts() { data, err := ioutil.ReadFile(config.DataDir("health-check-timeouts", "health-check-timeouts.log")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) significantEvents := analyzers.ExtractSignificantEvents(entries) significantEvents.LogWithThreshold(0.2) allow := map[string]bool{ "rep.auction-fetch-state.handling": true, "rep.container-metrics-reporter.tick.started": true, "rep.event-consumer.operation-stream.executing-container-operation.ordinary-lrp-processor.process-reserved-container.run-container.containerstore-run.node-run.monitor-run.run-step.running": true, "rep.event-consumer.operation-stream.executing-container-operation.ordinary-lrp-processor.process-reserved-container.run-container.containerstore-create.starting": true, "rep.event-consumer.operation-stream.executing-container-operation.ordinary-lrp-processor.process-completed-container.deleting-container": true, "rep.event-consumer.operation-stream.executing-container-operation.ordinary-lrp-processor.process-reserved-container.run-container.containerstore-run.node-run.action.run-step.running": true, "rep.running-bulker.sync.starting": true, "garden-linux.garden-server.run.spawned": true, } filteredSignificantEvents := analyzers.SignificantEvents{} for name, events := range significantEvents { if allow[name] { filteredSignificantEvents[name] = events } } filteredSignificantEvents.LogWithThreshold(0.2) options := analyzers.SignificantEventsOptions{ LineOverlays: gardenStressTestContainerCountOverlays(entries), VerticalMarkers: healthCheckTimeoutsFailedHealthMonitor(entries), WidthStretch: 2, MaxY: 400, } analyzers.VisualizeSignificantEvents( filteredSignificantEvents, config.DataDir("health-check-timeouts", "health-check-timeouts.png"), options, ) }
func analyzeEventDurations(path string, options analyzers.SignificantEventsOptions, n int, skips []string, outFile string) { data, err := ioutil.ReadFile(path) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) significantEvents := analyzers.ExtractSignificantEventsWithThreshold(entries, n) significantEvents.LogWithThreshold(0.2) for _, skip := range skips { say.Println(0, "Skipping %s", skip) delete(significantEvents, skip) } analyzers.VisualizeSignificantEvents( significantEvents, outFile, options, ) }
func findTheApp() { allLrps := map[string][]string{} for _, run := range slowPWSTaskRuns { say.Println(0, say.Green(run.Name)) lrps := map[string]bool{} data, err := ioutil.ReadFile(config.DataDir("pws-slow-tasks", run.Name+".unified")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) tMin := run.CliffTimestamp.Add(-2 * time.Minute) tCliff := run.CliffTimestamp for _, entry := range entries { if entry.Timestamp.After(tMin) && entry.Timestamp.Before(tCliff) && entry.Message == "garden-linux.garden-server.bulk_info.got-bulkinfo" { handles := reflect.ValueOf(entry.Data["handles"]) for i := 0; i < handles.Len(); i += 1 { handle := handles.Index(i).Interface().(string) if len(handle) == 110 { guid := handle[0:36] lrps[guid] = true } } } } say.Println(0, format.Object(lrps, 0)) for lrp := range lrps { allLrps[lrp] = append(allLrps[lrp], run.Name) } } say.Println(0, say.Green("Counts")) for lrp, runs := range allLrps { if len(runs) > 1 { say.Println(0, "%s: %s", lrp, say.Green("%s", strings.Join(runs, ", "))) } } }
func analyzeGardenStressTests() { data, err := ioutil.ReadFile(config.DataDir("garden-stress-test", "all.unified")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) significantEvents := analyzers.ExtractSignificantEvents(entries) allow := map[string]bool{ "garden-linux.garden-server.create.creating": true, "rep.depot-client.delete-container.destroy.started": true, } filteredSignificantEvents := analyzers.SignificantEvents{} filteredSignificantEvents.LogWithThreshold(0.2) for name, events := range significantEvents { if allow[name] { filteredSignificantEvents[name] = events } } filteredSignificantEvents.LogWithThreshold(0.2) limit := viz.NewHorizontalLine(256) limit.LineStyle = viz.LineStyle(viz.Red, 1) options := analyzers.SignificantEventsOptions{ LineOverlays: gardenStressTestContainerCountOverlays(entries), VerticalMarkers: gardenStressTestFailedContainerCreates(entries), OverlayPlots: []plot.Plotter{limit}, WidthStretch: 3, MaxY: 400, } analyzers.VisualizeSignificantEvents( filteredSignificantEvents, config.DataDir("garden-stress-test", "out.png"), options, ) }
func analyzeGardenDT() { data, err := ioutil.ReadFile(config.DataDir("garden-dt", "garden-dt.logs")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) significantEvents := analyzers.ExtractSignificantEvents(entries) significantEvents.LogWithThreshold(0.2) delete(significantEvents, "garden-linux.container.info-starting") options := analyzers.SignificantEventsOptions{ MarkedEvents: map[string]plot.LineStyle{ "garden-linux.garden-server.create.creating": viz.LineStyle(viz.Blue, 1, viz.Dot), "garden-linux.garden-server.destroy.destroying": viz.LineStyle(viz.Red, 1, viz.Dot), }, } analyzers.VisualizeSignificantEvents( significantEvents, config.DataDir("garden-dt", "many-garden-dt.svg"), options, ) }
func analyzeAuctioneerFetchStateDuration() { data, err := ioutil.ReadFile(config.DataDir("auctioneer-fetch-state-duration", "auctioneer-fetch-state-duration.logs")) say.ExitIfError("couldn't read log file", err) entries := util.ChugLagerEntries(data) fetchStateDurationEvents := ExtractEventsFromLagerData(entries, DurationExtractor("duration")) allDurations := fetchStateDurationEvents.Data() highDurations := allDurations.Filter(NumFilter(">", 0.2)) lowDurations := allDurations.Filter(NumFilter("<=", 0.2)) earlyAllDurations := fetchStateDurationEvents.FilterTimes(NumFilter("<", 1445274027070991039)).Data() earlyHighDurations := earlyAllDurations.Filter(NumFilter(">", 0.2)) earlyLowDurations := earlyAllDurations.Filter(NumFilter("<=", 0.2)) lateAllDurations := fetchStateDurationEvents.FilterTimes(NumFilter(">=", 1445274027070991039)).Data() lateHighDurations := lateAllDurations.Filter(NumFilter(">", 0.2)) lateLowDurations := lateAllDurations.Filter(NumFilter("<=", 0.2)) board := viz.NewUniformBoard(3, 1, 0) earlyScale := float64(len(allDurations)) / float64(len(earlyAllDurations)) lateScale := float64(len(allDurations)) / float64(len(lateAllDurations)) say.Println(0, "All Fetches: %s", allDurations.Stats()) say.Println(1, "> 0.2s: %s", highDurations.Stats()) say.Println(1, "<= 0.2s: %s", lowDurations.Stats()) say.Println(0, "Early Fetches: %s", earlyAllDurations.Stats()) say.Println(1, "> 0.2s: %s", earlyHighDurations.Stats()) say.Println(1, "<= 0.2s: %s", earlyLowDurations.Stats()) say.Println(0, "Late Fetches: %s", lateAllDurations.Stats()) say.Println(1, "> 0.2s: %s", lateHighDurations.Stats()) say.Println(1, "<= 0.2s: %s", lateLowDurations.Stats()) p, _ := plot.New() p.Title.Text = "All Auctioneer Fetch State Durations" p.Add(viz.NewHistogram(allDurations, 20, allDurations.Min(), allDurations.Max())) h := viz.NewScaledHistogram(earlyAllDurations, 20, allDurations.Min(), allDurations.Max(), earlyScale) h.LineStyle = viz.LineStyle(viz.Blue, 1) p.Add(h) h = viz.NewScaledHistogram(lateAllDurations, 20, allDurations.Min(), allDurations.Max(), lateScale) h.LineStyle = viz.LineStyle(viz.Red, 1) p.Add(h) board.AddNextSubPlot(p) p, _ = plot.New() p.Title.Text = "Auctioneer Fetch State Durations < 0.2s" p.Add(viz.NewHistogram(lowDurations, 100, lowDurations.Min(), lowDurations.Max())) h = viz.NewScaledHistogram(earlyLowDurations, 100, lowDurations.Min(), lowDurations.Max(), earlyScale) h.LineStyle = viz.LineStyle(viz.Blue, 1) p.Add(h) h = viz.NewScaledHistogram(lateLowDurations, 100, lowDurations.Min(), lowDurations.Max(), lateScale) h.LineStyle = viz.LineStyle(viz.Red, 1) p.Add(h) board.AddNextSubPlot(p) p, _ = plot.New() p.Title.Text = "Auctioneer Fetch State Durations > 0.2s" p.Add(viz.NewHistogram(highDurations, 40, highDurations.Min(), highDurations.Max())) h = viz.NewScaledHistogram(earlyHighDurations, 40, highDurations.Min(), highDurations.Max(), earlyScale) h.LineStyle = viz.LineStyle(viz.Blue, 1) p.Add(h) h = viz.NewScaledHistogram(lateHighDurations, 40, highDurations.Min(), highDurations.Max(), lateScale) h.LineStyle = viz.LineStyle(viz.Red, 1) p.Add(h) board.AddNextSubPlot(p) board.Save(12, 4, config.DataDir("auctioneer-fetch-state-duration", "auctioneer-fetch-state-duration.svg")) }