func LRPInstances( actualLRPGroups []*models.ActualLRPGroup, addInfo func(*cc_messages.LRPInstance, *models.ActualLRP), clk clock.Clock, ) []cc_messages.LRPInstance { instances := make([]cc_messages.LRPInstance, len(actualLRPGroups)) for i, actualLRPGroup := range actualLRPGroups { actual, _ := actualLRPGroup.Resolve() instance := cc_messages.LRPInstance{ ProcessGuid: actual.ProcessGuid, InstanceGuid: actual.InstanceGuid, Index: uint(actual.Index), Since: actual.Since / 1e9, Uptime: (clk.Now().UnixNano() - actual.Since) / 1e9, State: cc_conv.StateFor(actual.State), } if addInfo != nil { addInfo(&instance, actual) } instances[i] = instance } return instances }
func newVolume(logger lager.Logger, bcVol baggageclaim.Volume, clock clock.Clock, db VolumeFactoryDB) (Volume, bool, error) { vol := &volume{ Volume: bcVol, db: db, heartbeating: new(sync.WaitGroup), release: make(chan *time.Duration, 1), } ttl, found, err := vol.db.GetVolumeTTL(vol.Handle()) if err != nil { logger.Error("failed-to-lookup-expiration-of-volume", err) return nil, false, err } if !found { return nil, false, nil } vol.heartbeat(logger.Session("initial-heartbeat"), ttl) vol.heartbeating.Add(1) go vol.heartbeatContinuously( logger.Session("continuous-heartbeat"), clock.NewTicker(volumeKeepalive), ttl, ) return vol, true, nil }
func newVolume(logger lager.Logger, bcVol baggageclaim.Volume, clock clock.Clock, db VolumeFactoryDB) Volume { vol := &volume{ Volume: bcVol, db: db, heartbeating: new(sync.WaitGroup), release: make(chan time.Duration, 1), } ttl, err := vol.db.GetVolumeTTL(vol.Handle()) if err != nil { logger.Info("failed-to-lookup-ttl", lager.Data{"error": err.Error()}) ttl, _, err = bcVol.Expiration() if err != nil { logger.Error("failed-to-lookup-expiration-of-volume", err) return nil } } vol.heartbeat(logger.Session("initial-heartbeat"), ttl) vol.heartbeating.Add(1) go vol.heartbeatContinuously( logger.Session("continuos-heartbeat"), clock.NewTicker(volumeKeepalive), ttl, ) return vol }
func NewHardcoded( logger lager.Logger, workerDB SaveWorkerDB, clock c.Clock, gardenAddr string, baggageclaimURL string, resourceTypesNG []atc.WorkerResourceType, ) ifrit.RunFunc { return func(signals <-chan os.Signal, ready chan<- struct{}) error { workerInfo := db.WorkerInfo{ GardenAddr: gardenAddr, BaggageclaimURL: baggageclaimURL, ActiveContainers: 0, ResourceTypes: resourceTypesNG, Platform: "linux", Tags: []string{}, Name: gardenAddr, } err := workerDB.SaveWorker(workerInfo, 30*time.Second) if err != nil { logger.Error("could-not-save-garden-worker-provided", err) return err } ticker := clock.NewTicker(10 * time.Second) close(ready) dance: for { select { case <-ticker.C(): err = workerDB.SaveWorker(workerInfo, 30*time.Second) if err != nil { logger.Error("could-not-save-garden-worker-provided", err) } case <-signals: ticker.Stop() break dance } } return nil } }
func newGardenWorkerContainer( logger lager.Logger, container garden.Container, gardenClient garden.Client, baggageclaimClient baggageclaim.Client, db GardenWorkerDB, clock clock.Clock, volumeFactory VolumeFactory, ) (Container, error) { workerContainer := &gardenWorkerContainer{ Container: container, gardenClient: gardenClient, db: db, clock: clock, heartbeating: new(sync.WaitGroup), release: make(chan *time.Duration, 1), } workerContainer.heartbeat(logger.Session("initial-heartbeat"), ContainerTTL) workerContainer.heartbeating.Add(1) go workerContainer.heartbeatContinuously( logger.Session("continuous-heartbeat"), clock.NewTicker(containerKeepalive), ) metric.TrackedContainers.Inc() properties, err := workerContainer.Properties() if err != nil { workerContainer.Release(nil) return nil, err } err = workerContainer.initializeVolumes(logger, properties, baggageclaimClient, volumeFactory) if err != nil { workerContainer.Release(nil) return nil, err } if properties["user"] != "" { workerContainer.user = properties["user"] } else { workerContainer.user = "******" } return workerContainer, nil }
func NewRunner( logger lager.Logger, baggageCollector BaggageCollector, db RunnerDB, clock clock.Clock, interval time.Duration, ) ifrit.Runner { return ifrit.RunFunc(func(signals <-chan os.Signal, ready chan<- struct{}) error { close(ready) ticker := clock.NewTicker(interval) defer ticker.Stop() for { select { case <-ticker.C(): leaseLogger := logger.Session("lease-invalidate-cache") leaseLogger.Info("tick") lease, leased, err := db.LeaseCacheInvalidation(leaseLogger, interval) if err != nil { leaseLogger.Error("failed-to-get-lease", err) break } if !leased { leaseLogger.Debug("did-not-get-lease") break } leaseLogger.Info("collecting-baggage") err = baggageCollector.Collect() if err != nil { leaseLogger.Error("failed-to-collect-baggage", err) } lease.Break() case <-signals: return nil } } }) }
func newGardenWorkerContainer(container garden.Container, gardenClient garden.Client, clock clock.Clock) Container { workerContainer := &gardenWorkerContainer{ Container: container, gardenClient: gardenClient, clock: clock, heartbeating: new(sync.WaitGroup), stopHeartbeating: make(chan struct{}), } workerContainer.heartbeating.Add(1) go workerContainer.heartbeat(clock.NewTicker(containerKeepalive)) trackedContainers.Add(1) return workerContainer }
func NewFakeTicker(clock clock.Clock, d time.Duration) clock.Ticker { channel := make(chan time.Time) timer := clock.NewTimer(d) go func() { for { time := <-timer.C() timer.Reset(d) channel <- time } }() return &fakeTicker{ clock: clock, duration: d, channel: channel, timer: timer, } }
// Until we get a successful response from garden, // periodically emit metrics saying how long we've been trying // while retrying the connection indefinitely. func waitForGarden(logger lager.Logger, gardenClient GardenClient.Client, clock clock.Clock) error { pingStart := clock.Now() logger = logger.Session("wait-for-garden", lager.Data{"initialTime:": pingStart}) pingRequest := clock.NewTimer(0) pingResponse := make(chan error) heartbeatTimer := clock.NewTimer(StalledMetricHeartbeatInterval) for { select { case <-pingRequest.C(): go func() { logger.Info("ping-garden", lager.Data{"wait-time-ns:": clock.Since(pingStart)}) pingResponse <- gardenClient.Ping() }() case err := <-pingResponse: switch err.(type) { case nil: logger.Info("ping-garden-success", lager.Data{"wait-time-ns:": clock.Since(pingStart)}) // send 0 to indicate ping responded successfully stalledDuration.Send(0) return nil case garden.UnrecoverableError: logger.Error("failed-to-ping-garden-with-unrecoverable-error", err) return err default: logger.Error("failed-to-ping-garden", err) pingRequest.Reset(PingGardenInterval) } case <-heartbeatTimer.C(): logger.Info("emitting-stalled-garden-heartbeat", lager.Data{"wait-time-ns:": clock.Since(pingStart)}) stalledDuration.Send(clock.Since(pingStart)) heartbeatTimer.Reset(StalledMetricHeartbeatInterval) } } }
func CalculateConvergence( logger lager.Logger, clock clock.Clock, restartCalculator models.RestartCalculator, input *models.ConvergenceInput, ) *models.ConvergenceChanges { sess := logger.Session("calculate-convergence") var extraLRPCount, missingLRPCount int sess.Info("start") defer sess.Info("done") changes := &models.ConvergenceChanges{} now := clock.Now() for processGuid, _ := range input.AllProcessGuids { pLog := sess.WithData(lager.Data{ "process_guid": processGuid, }) desired, hasDesired := input.DesiredLRPs[processGuid] actualsByIndex := input.ActualLRPs[processGuid] if hasDesired { for i := int32(0); i < desired.Instances; i++ { if _, hasIndex := actualsByIndex[i]; !hasIndex { pLog.Info("missing", lager.Data{"index": i}) missingLRPCount++ lrpKey := models.NewActualLRPKey(desired.ProcessGuid, i, desired.Domain) changes.ActualLRPKeysForMissingIndices = append( changes.ActualLRPKeysForMissingIndices, &lrpKey, ) } } for i, actual := range actualsByIndex { if actual.CellIsMissing(input.Cells) { pLog.Info("missing-cell", lager.Data{"index": i, "cell_id": actual.CellId}) changes.ActualLRPsWithMissingCells = append(changes.ActualLRPsWithMissingCells, actual) continue } if actual.Index >= desired.Instances && input.Domains.Contains(desired.Domain) { pLog.Info("extra", lager.Data{"index": i}) extraLRPCount++ changes.ActualLRPsForExtraIndices = append(changes.ActualLRPsForExtraIndices, actual) continue } if actual.ShouldRestartCrash(now, restartCalculator) { pLog.Info("restart-crash", lager.Data{"index": i}) changes.RestartableCrashedActualLRPs = append(changes.RestartableCrashedActualLRPs, actual) continue } if actual.ShouldStartUnclaimed(now) { pLog.Info("stale-unclaimed", lager.Data{"index": i}) changes.StaleUnclaimedActualLRPs = append(changes.StaleUnclaimedActualLRPs, actual) continue } } } else { for i, actual := range actualsByIndex { if !input.Domains.Contains(actual.Domain) { pLog.Info("skipping-unfresh-domain") continue } pLog.Info("no-longer-desired", lager.Data{"index": i}) extraLRPCount++ changes.ActualLRPsForExtraIndices = append(changes.ActualLRPsForExtraIndices, actual) } } } missingLRPs.Send(missingLRPCount) extraLRPs.Send(extraLRPCount) return changes }
func (sender *Sender) Send(clock clock.Clock) error { sender.currentTime = clock.Now() err := sender.store.VerifyFreshness(sender.currentTime) if err != nil { sender.logger.Error("Store is not fresh", err) return err } pendingStartMessages, err := sender.store.GetPendingStartMessages() if err != nil { sender.logger.Error("Failed to fetch pending start messages", err) return err } pendingStopMessages, err := sender.store.GetPendingStopMessages() if err != nil { sender.logger.Error("Failed to fetch pending stop messages", err) return err } sender.apps, err = sender.store.GetApps() if err != nil { sender.logger.Error("Failed to fetch apps", err) return err } sender.sendStartMessages(pendingStartMessages) sender.sendStopMessages(pendingStopMessages) err = sender.metricsAccountant.IncrementSentMessageMetrics(sender.sentStartMessages, sender.sentStopMessages) if err != nil { sender.logger.Error("Failed to increment metrics", err) sender.didSucceed = false } err = sender.store.SavePendingStartMessages(sender.startMessagesToSave...) if err != nil { sender.logger.Error("Failed to save start messages", err) sender.didSucceed = false } err = sender.store.DeletePendingStartMessages(sender.startMessagesToDelete...) if err != nil { sender.logger.Error("Failed to delete start messages", err) sender.didSucceed = false } err = sender.store.SavePendingStopMessages(sender.stopMessagesToSave...) if err != nil { sender.logger.Error("Failed to save stop messages", err) sender.didSucceed = false } err = sender.store.DeletePendingStopMessages(sender.stopMessagesToDelete...) if err != nil { sender.logger.Error("Failed to delete stop messages", err) sender.didSucceed = false } if !sender.didSucceed { return errors.New("Sender failed. See logs for details.") } return nil }
func dumpApp(app *models.App, starts map[string]models.PendingStartMessage, stops map[string]models.PendingStopMessage, clock clock.Clock) { fmt.Printf("\n") fmt.Printf("Guid: %s | Version: %s\n", app.AppGuid, app.AppVersion) if app.IsDesired() { fmt.Printf(" Desired: [%d] instances, (%s, %s)\n", app.Desired.NumberOfInstances, app.Desired.State, app.Desired.PackageState) } else { fmt.Printf(" Desired: NO\n") } if len(app.InstanceHeartbeats) == 0 { fmt.Printf(" Heartbeats: NONE\n") } else { fmt.Printf(" Heartbeats:\n") for _, heartbeat := range app.InstanceHeartbeats { fmt.Printf(" [%d %s] %s on %s\n", heartbeat.InstanceIndex, heartbeat.State, heartbeat.InstanceGuid, heartbeat.DeaGuid[0:5]) } } if len(app.CrashCounts) != 0 { fmt.Printf(" CrashCounts:") for _, crashCount := range app.CrashCounts { fmt.Printf(" [%d]:%d", crashCount.InstanceIndex, crashCount.CrashCount) } fmt.Printf("\n") } appStarts := []models.PendingStartMessage{} appStops := []models.PendingStopMessage{} for _, start := range starts { if start.AppGuid == app.AppGuid && start.AppVersion == app.AppVersion { appStarts = append(appStarts, start) } } for _, stop := range stops { if stop.AppGuid == app.AppGuid && stop.AppVersion == app.AppVersion { appStops = append(appStops, stop) } } if len(appStarts) > 0 { fmt.Printf(" Pending Starts:\n") for _, start := range appStarts { message := []string{} message = append(message, fmt.Sprintf("[%d]", start.IndexToStart)) message = append(message, fmt.Sprintf("priority:%.2f", start.Priority)) if start.SkipVerification { message = append(message, "NO VERIFICATION") } if start.SentOn != 0 { message = append(message, "send:SENT") message = append(message, fmt.Sprintf("delete:%s", time.Unix(start.SentOn+int64(start.KeepAlive), 0).Sub(clock.Now()))) } else { message = append(message, fmt.Sprintf("send:%s", time.Unix(start.SendOn, 0).Sub(clock.Now()))) } fmt.Printf(" %s\n", strings.Join(message, " ")) } } if len(appStops) > 0 { fmt.Printf(" Pending Stops:\n") for _, stop := range appStops { message := []string{} message = append(message, stop.InstanceGuid) if stop.SentOn != 0 { message = append(message, "send:SENT") message = append(message, fmt.Sprintf("delete:%s", time.Unix(stop.SentOn+int64(stop.KeepAlive), 0).Sub(clock.Now()))) } else { message = append(message, fmt.Sprintf("send:%s", time.Unix(stop.SendOn, 0).Sub(clock.Now()))) } fmt.Printf(" %s\n", strings.Join(message, " ")) } } }