func LRPInstances( actualLRPGroups []*models.ActualLRPGroup, addInfo func(*cc_messages.LRPInstance, *models.ActualLRP), clk clock.Clock, ) []cc_messages.LRPInstance { instances := make([]cc_messages.LRPInstance, len(actualLRPGroups)) for i, actualLRPGroup := range actualLRPGroups { actual, _ := actualLRPGroup.Resolve() instance := cc_messages.LRPInstance{ ProcessGuid: actual.ProcessGuid, InstanceGuid: actual.InstanceGuid, Index: uint(actual.Index), Since: actual.Since / 1e9, Uptime: (clk.Now().UnixNano() - actual.Since) / 1e9, State: cc_conv.StateFor(actual.State), } if addInfo != nil { addInfo(&instance, actual) } instances[i] = instance } return instances }
// Until we get a successful response from garden, // periodically emit metrics saying how long we've been trying // while retrying the connection indefinitely. func waitForGarden(logger lager.Logger, gardenClient GardenClient.Client, clock clock.Clock) error { pingStart := clock.Now() logger = logger.Session("wait-for-garden", lager.Data{"initialTime:": pingStart}) pingRequest := clock.NewTimer(0) pingResponse := make(chan error) heartbeatTimer := clock.NewTimer(StalledMetricHeartbeatInterval) for { select { case <-pingRequest.C(): go func() { logger.Info("ping-garden", lager.Data{"wait-time-ns:": clock.Since(pingStart)}) pingResponse <- gardenClient.Ping() }() case err := <-pingResponse: switch err.(type) { case nil: logger.Info("ping-garden-success", lager.Data{"wait-time-ns:": clock.Since(pingStart)}) // send 0 to indicate ping responded successfully stalledDuration.Send(0) return nil case garden.UnrecoverableError: logger.Error("failed-to-ping-garden-with-unrecoverable-error", err) return err default: logger.Error("failed-to-ping-garden", err) pingRequest.Reset(PingGardenInterval) } case <-heartbeatTimer.C(): logger.Info("emitting-stalled-garden-heartbeat", lager.Data{"wait-time-ns:": clock.Since(pingStart)}) stalledDuration.Send(clock.Since(pingStart)) heartbeatTimer.Reset(StalledMetricHeartbeatInterval) } } }
func (sender *Sender) Send(clock clock.Clock) error { sender.currentTime = clock.Now() err := sender.store.VerifyFreshness(sender.currentTime) if err != nil { sender.logger.Error("Store is not fresh", err) return err } pendingStartMessages, err := sender.store.GetPendingStartMessages() if err != nil { sender.logger.Error("Failed to fetch pending start messages", err) return err } pendingStopMessages, err := sender.store.GetPendingStopMessages() if err != nil { sender.logger.Error("Failed to fetch pending stop messages", err) return err } sender.apps, err = sender.store.GetApps() if err != nil { sender.logger.Error("Failed to fetch apps", err) return err } sender.sendStartMessages(pendingStartMessages) sender.sendStopMessages(pendingStopMessages) err = sender.metricsAccountant.IncrementSentMessageMetrics(sender.sentStartMessages, sender.sentStopMessages) if err != nil { sender.logger.Error("Failed to increment metrics", err) sender.didSucceed = false } err = sender.store.SavePendingStartMessages(sender.startMessagesToSave...) if err != nil { sender.logger.Error("Failed to save start messages", err) sender.didSucceed = false } err = sender.store.DeletePendingStartMessages(sender.startMessagesToDelete...) if err != nil { sender.logger.Error("Failed to delete start messages", err) sender.didSucceed = false } err = sender.store.SavePendingStopMessages(sender.stopMessagesToSave...) if err != nil { sender.logger.Error("Failed to save stop messages", err) sender.didSucceed = false } err = sender.store.DeletePendingStopMessages(sender.stopMessagesToDelete...) if err != nil { sender.logger.Error("Failed to delete stop messages", err) sender.didSucceed = false } if !sender.didSucceed { return errors.New("Sender failed. See logs for details.") } return nil }
func CalculateConvergence( logger lager.Logger, clock clock.Clock, restartCalculator models.RestartCalculator, input *models.ConvergenceInput, ) *models.ConvergenceChanges { sess := logger.Session("calculate-convergence") var extraLRPCount, missingLRPCount int sess.Info("start") defer sess.Info("done") changes := &models.ConvergenceChanges{} now := clock.Now() for processGuid, _ := range input.AllProcessGuids { pLog := sess.WithData(lager.Data{ "process_guid": processGuid, }) desired, hasDesired := input.DesiredLRPs[processGuid] actualsByIndex := input.ActualLRPs[processGuid] if hasDesired { for i := int32(0); i < desired.Instances; i++ { if _, hasIndex := actualsByIndex[i]; !hasIndex { pLog.Info("missing", lager.Data{"index": i}) missingLRPCount++ lrpKey := models.NewActualLRPKey(desired.ProcessGuid, i, desired.Domain) changes.ActualLRPKeysForMissingIndices = append( changes.ActualLRPKeysForMissingIndices, &lrpKey, ) } } for i, actual := range actualsByIndex { if actual.CellIsMissing(input.Cells) { pLog.Info("missing-cell", lager.Data{"index": i, "cell_id": actual.CellId}) changes.ActualLRPsWithMissingCells = append(changes.ActualLRPsWithMissingCells, actual) continue } if actual.Index >= desired.Instances && input.Domains.Contains(desired.Domain) { pLog.Info("extra", lager.Data{"index": i}) extraLRPCount++ changes.ActualLRPsForExtraIndices = append(changes.ActualLRPsForExtraIndices, actual) continue } if actual.ShouldRestartCrash(now, restartCalculator) { pLog.Info("restart-crash", lager.Data{"index": i}) changes.RestartableCrashedActualLRPs = append(changes.RestartableCrashedActualLRPs, actual) continue } if actual.ShouldStartUnclaimed(now) { pLog.Info("stale-unclaimed", lager.Data{"index": i}) changes.StaleUnclaimedActualLRPs = append(changes.StaleUnclaimedActualLRPs, actual) continue } } } else { for i, actual := range actualsByIndex { if !input.Domains.Contains(actual.Domain) { pLog.Info("skipping-unfresh-domain") continue } pLog.Info("no-longer-desired", lager.Data{"index": i}) extraLRPCount++ changes.ActualLRPsForExtraIndices = append(changes.ActualLRPsForExtraIndices, actual) } } } missingLRPs.Send(missingLRPCount) extraLRPs.Send(extraLRPCount) return changes }
func dumpApp(app *models.App, starts map[string]models.PendingStartMessage, stops map[string]models.PendingStopMessage, clock clock.Clock) { fmt.Printf("\n") fmt.Printf("Guid: %s | Version: %s\n", app.AppGuid, app.AppVersion) if app.IsDesired() { fmt.Printf(" Desired: [%d] instances, (%s, %s)\n", app.Desired.NumberOfInstances, app.Desired.State, app.Desired.PackageState) } else { fmt.Printf(" Desired: NO\n") } if len(app.InstanceHeartbeats) == 0 { fmt.Printf(" Heartbeats: NONE\n") } else { fmt.Printf(" Heartbeats:\n") for _, heartbeat := range app.InstanceHeartbeats { fmt.Printf(" [%d %s] %s on %s\n", heartbeat.InstanceIndex, heartbeat.State, heartbeat.InstanceGuid, heartbeat.DeaGuid[0:5]) } } if len(app.CrashCounts) != 0 { fmt.Printf(" CrashCounts:") for _, crashCount := range app.CrashCounts { fmt.Printf(" [%d]:%d", crashCount.InstanceIndex, crashCount.CrashCount) } fmt.Printf("\n") } appStarts := []models.PendingStartMessage{} appStops := []models.PendingStopMessage{} for _, start := range starts { if start.AppGuid == app.AppGuid && start.AppVersion == app.AppVersion { appStarts = append(appStarts, start) } } for _, stop := range stops { if stop.AppGuid == app.AppGuid && stop.AppVersion == app.AppVersion { appStops = append(appStops, stop) } } if len(appStarts) > 0 { fmt.Printf(" Pending Starts:\n") for _, start := range appStarts { message := []string{} message = append(message, fmt.Sprintf("[%d]", start.IndexToStart)) message = append(message, fmt.Sprintf("priority:%.2f", start.Priority)) if start.SkipVerification { message = append(message, "NO VERIFICATION") } if start.SentOn != 0 { message = append(message, "send:SENT") message = append(message, fmt.Sprintf("delete:%s", time.Unix(start.SentOn+int64(start.KeepAlive), 0).Sub(clock.Now()))) } else { message = append(message, fmt.Sprintf("send:%s", time.Unix(start.SendOn, 0).Sub(clock.Now()))) } fmt.Printf(" %s\n", strings.Join(message, " ")) } } if len(appStops) > 0 { fmt.Printf(" Pending Stops:\n") for _, stop := range appStops { message := []string{} message = append(message, stop.InstanceGuid) if stop.SentOn != 0 { message = append(message, "send:SENT") message = append(message, fmt.Sprintf("delete:%s", time.Unix(stop.SentOn+int64(stop.KeepAlive), 0).Sub(clock.Now()))) } else { message = append(message, fmt.Sprintf("send:%s", time.Unix(stop.SendOn, 0).Sub(clock.Now()))) } fmt.Printf(" %s\n", strings.Join(message, " ")) } } }