func fetchStates(cells map[string]rep.Client) map[string]rep.CellState { lock := &sync.Mutex{} states := map[string]rep.CellState{} works := []func(){} for repGuid, cell := range cells { repGuid := repGuid cell := cell works = append(works, func() { state, _ := cell.State() lock.Lock() states[repGuid] = state lock.Unlock() }) } throttler, err := workpool.NewThrottler(500, works) if err != nil { panic(err) // should never happen } throttler.Work() return states }
func (db *ETCDDB) batchDeleteNodes(keys []string, logger lager.Logger) { if len(keys) == 0 { return } works := []func(){} for _, key := range keys { key := key works = append(works, func() { logger.Info("deleting", lager.Data{"key": key}) _, err := db.client.Delete(key, true) if err != nil { logger.Error("failed-to-delete", err, lager.Data{ "key": key, }) } }) } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-to-create-throttler", err) } throttler.Work() return }
func (db *ETCDDB) batchDeleteTasks(taskGuids []string, logger lager.Logger) { if len(taskGuids) == 0 { return } works := []func(){} for _, taskGuid := range taskGuids { taskGuid := taskGuid works = append(works, func() { _, err := db.client.Delete(taskGuid, true) if err != nil { logger.Error("failed-to-delete", err, lager.Data{ "task-guid": taskGuid, }) } }) } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-to-create-throttler", err) } throttler.Work() return }
func (db *ETCDDB) DesiredLRPs(logger lager.Logger, filter models.DesiredLRPFilter) (*models.DesiredLRPs, *models.Error) { root, bbsErr := db.fetchRecursiveRaw(logger, DesiredLRPSchemaRoot) if bbsErr.Equal(models.ErrResourceNotFound) { return &models.DesiredLRPs{}, nil } if bbsErr != nil { return nil, bbsErr } if root.Nodes.Len() == 0 { return &models.DesiredLRPs{}, nil } desiredLRPs := models.DesiredLRPs{} lrpsLock := sync.Mutex{} var workErr atomic.Value works := []func(){} for _, node := range root.Nodes { node := node works = append(works, func() { var lrp models.DesiredLRP deserializeErr := models.FromJSON([]byte(node.Value), &lrp) if deserializeErr != nil { logger.Error("failed-parsing-desired-lrp", deserializeErr) workErr.Store(fmt.Errorf("cannot parse lrp JSON for key %s: %s", node.Key, deserializeErr.Error())) return } if filter.Domain == "" || lrp.GetDomain() == filter.Domain { lrpsLock.Lock() desiredLRPs.DesiredLrps = append(desiredLRPs.DesiredLrps, &lrp) lrpsLock.Unlock() } }) } throttler, err := workpool.NewThrottler(maxDesiredLRPGetterWorkPoolSize, works) if err != nil { logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": maxDesiredLRPGetterWorkPoolSize, "num-works": len(works)}) return &models.DesiredLRPs{}, models.ErrUnknownError } logger.Debug("performing-deserialization-work") throttler.Work() if err, ok := workErr.Load().(error); ok { logger.Error("failed-performing-deserialization-work", err) return &models.DesiredLRPs{}, models.ErrUnknownError } logger.Debug("succeeded-performing-deserialization-work", lager.Data{"num-desired-lrps": len(desiredLRPs.GetDesiredLrps())}) return &desiredLRPs, nil }
func (db *ETCDDB) ResolveConvergence(logger lager.Logger, desiredLRPs map[string]*models.DesiredLRP, changes *models.ConvergenceChanges) ([]*auctioneer.LRPStartRequest, []*models.ActualLRPKeyWithSchedulingInfo, []*models.ActualLRPKey) { startRequests := newStartRequests(desiredLRPs) for _, actual := range changes.StaleUnclaimedActualLRPs { startRequests.Add(logger, &actual.ActualLRPKey) } works := []func(){} keysToRetire := make([]*models.ActualLRPKey, len(changes.ActualLRPsForExtraIndices)) for i, actual := range changes.ActualLRPsForExtraIndices { keysToRetire[i] = &actual.ActualLRPKey } keysWithMissingCells := []*models.ActualLRPKeyWithSchedulingInfo{} for _, actual := range changes.ActualLRPsWithMissingCells { desiredLRP, ok := desiredLRPs[actual.ProcessGuid] if !ok { logger.Debug("actual-with-missing-cell-no-desired") continue } schedInfo := desiredLRP.DesiredLRPSchedulingInfo() key := &models.ActualLRPKeyWithSchedulingInfo{ Key: &actual.ActualLRPKey, SchedulingInfo: &schedInfo, } keysWithMissingCells = append(keysWithMissingCells, key) } for _, actualKey := range changes.ActualLRPKeysForMissingIndices { works = append(works, db.resolveActualsWithMissingIndices(logger, desiredLRPs[actualKey.ProcessGuid], actualKey, startRequests)) } for _, actual := range changes.RestartableCrashedActualLRPs { works = append(works, db.resolveRestartableCrashedActualLRPS(logger, actual, startRequests)) } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-constructing-throttler", err, lager.Data{"max_workers": db.convergenceWorkersSize, "num_works": len(works)}) return nil, nil, nil } logger.Debug("waiting-for-lrp-convergence-work") throttler.Work() logger.Debug("done-waiting-for-lrp-convergence-work") return startRequests.Slice(), keysWithMissingCells, keysToRetire }
func (db *ETCDDB) ActualLRPGroups(logger lager.Logger, filter models.ActualLRPFilter) (*models.ActualLRPGroups, *models.Error) { node, bbsErr := db.fetchRecursiveRaw(logger, ActualLRPSchemaRoot) if bbsErr.Equal(models.ErrResourceNotFound) { return &models.ActualLRPGroups{}, nil } if bbsErr != nil { return nil, bbsErr } if node.Nodes.Len() == 0 { return &models.ActualLRPGroups{}, nil } groups := &models.ActualLRPGroups{} groupsLock := sync.Mutex{} var workErr atomic.Value works := []func(){} for _, node := range node.Nodes { node := node works = append(works, func() { g, err := parseActualLRPGroups(logger, node, filter) if err != nil { workErr.Store(err) return } groupsLock.Lock() groups.ActualLrpGroups = append(groups.ActualLrpGroups, g.ActualLrpGroups...) groupsLock.Unlock() }) } throttler, err := workpool.NewThrottler(maxActualGroupGetterWorkPoolSize, works) if err != nil { logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": maxActualGroupGetterWorkPoolSize, "num-works": len(works)}) return &models.ActualLRPGroups{}, models.ErrUnknownError } logger.Debug("performing-deserialization-work") throttler.Work() if err, ok := workErr.Load().(error); ok { logger.Error("failed-performing-deserialization-work", err) return &models.ActualLRPGroups{}, models.ErrUnknownError } logger.Debug("succeeded-performing-deserialization-work", lager.Data{"num-actual-lrp-groups": len(groups.ActualLrpGroups)}) return groups, nil }
func (db *ETCDDB) deleteLeaves(logger lager.Logger, keys []string) error { works := []func(){} for _, key := range keys { key := key works = append(works, func() { _, err := db.client.DeleteDir(key) if err != nil { logger.Error("failed-deleting-leaf-node", err, lager.Data{"key": key}) } }) } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { return err } throttler.Work() return nil }
func (db *ETCDDB) ResolveConvergence(logger lager.Logger, desiredLRPs map[string]*models.DesiredLRP, changes *models.ConvergenceChanges) { startRequests := newStartRequests(desiredLRPs) for _, actual := range changes.StaleUnclaimedActualLRPs { startRequests.Add(logger, &actual.ActualLRPKey) } works := []func(){} for _, actual := range changes.ActualLRPsForExtraIndices { works = append(works, db.resolveActualsToBeRetired(logger, actual)) } for _, actual := range changes.ActualLRPsWithMissingCells { works = append(works, db.resolveActualsWithMissingCells(logger, desiredLRPs[actual.ProcessGuid], actual, startRequests)) } for _, actualKey := range changes.ActualLRPKeysForMissingIndices { works = append(works, db.resolveActualsWithMissingIndices(logger, desiredLRPs[actualKey.ProcessGuid], actualKey, startRequests)) } for _, actual := range changes.RestartableCrashedActualLRPs { works = append(works, db.resolveRestartableCrashedActualLRPS(logger, actual, startRequests)) } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": db.convergenceWorkersSize, "num-works": len(works)}) return } logger.Debug("waiting-for-lrp-convergence-work") throttler.Work() logger.Debug("done-waiting-for-lrp-convergence-work") logger.Debug("requesting-start-auctions", lager.Data{"start-requests-instance-count": startRequests.InstanceCount()}) db.startActualLRPs(logger, startRequests) logger.Debug("done-requesting-start-auctions", lager.Data{"start-requests-instance-count": startRequests.InstanceCount()}) }
func (db *ETCDDB) batchCompareAndSwapTasks(tasksToCAS []compareAndSwappableTask, logger lager.Logger) error { if len(tasksToCAS) == 0 { return nil } works := []func(){} for _, taskToCAS := range tasksToCAS { task := taskToCAS.NewTask task.UpdatedAt = db.clock.Now().UnixNano() value, err := db.serializeModel(logger, task) if err != nil { logger.Error("failed-to-marshal", err, lager.Data{ "task-guid": task.TaskGuid, }) continue } index := taskToCAS.OldIndex works = append(works, func() { _, err := db.client.CompareAndSwap(TaskSchemaPathByGuid(task.TaskGuid), value, NO_TTL, index) if err != nil { logger.Error("failed-to-compare-and-swap", err, lager.Data{ "task-guid": task.TaskGuid, }) } }) } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { return err } throttler.Work() return nil }
func (db *ETCDDB) retireActualLRPs(logger lager.Logger, keys []*models.ActualLRPKey) { logger = logger.Session("retire-actual-lrps") works := make([]func(), len(keys)) for i, key := range keys { key := key works[i] = func() { err := db.RetireActualLRP(logger, key) if err != nil { logger.Error("failed-to-retire", err, lager.Data{"lrp-key": key}) } } } throttler, err := workpool.NewThrottler(db.updateWorkersSize, works) if err != nil { logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": db.updateWorkersSize, "num-works": len(works)}) return } throttler.Work() }
func (db *ETCDDB) createUnclaimedActualLRPs(logger lager.Logger, keys []*models.ActualLRPKey) []int { count := len(keys) createdIndicesChan := make(chan int, count) works := make([]func(), count) for i, key := range keys { key := key works[i] = func() { err := db.createUnclaimedActualLRP(logger, key) if err != nil { logger.Info("failed-creating-actual-lrp", lager.Data{"actual_lrp_key": key, "err-message": err.Error()}) } else { createdIndicesChan <- int(key.Index) } } } throttler, err := workpool.NewThrottler(db.updateWorkersSize, works) if err != nil { logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": db.updateWorkersSize, "num-works": len(works)}) return []int{} } go func() { throttler.Work() close(createdIndicesChan) }() createdIndices := make([]int, 0, count) for createdIndex := range createdIndicesChan { createdIndices = append(createdIndices, createdIndex) } return createdIndices }
func (db *ETCDDB) gatherAndOptionallyPruneActualLRPs(logger lager.Logger, guids map[string]struct{}, doPrune bool, lmc *LRPMetricCounter) (map[string]map[int32]*models.ActualLRP, error) { response, modelErr := db.fetchRecursiveRaw(logger, ActualLRPSchemaRoot) if modelErr == models.ErrResourceNotFound { logger.Info("actual-lrp-schema-root-not-found") return map[string]map[int32]*models.ActualLRP{}, nil } if modelErr != nil { return nil, modelErr } actuals := map[string]map[int32]*models.ActualLRP{} var guidKeysToDelete, indexKeysToDelete []string var actualsToDelete []string var guidsLock, actualsLock, guidKeysToDeleteLock, indexKeysToDeleteLock, crashingDesiredsLock, actualsToDeleteLock sync.Mutex logger.Debug("walking-actual-lrp-tree") works := []func(){} crashingDesireds := map[string]struct{}{} for _, guidGroup := range response.Nodes { guidGroup := guidGroup works = append(works, func() { guidGroupWillBeEmpty := true for _, indexGroup := range guidGroup.Nodes { indexGroupWillBeEmpty := true for _, actualNode := range indexGroup.Nodes { actual := new(models.ActualLRP) err := db.deserializeModel(logger, actualNode, actual) if err != nil { actualsToDeleteLock.Lock() actualsToDelete = append(actualsToDelete, actualNode.Key) actualsToDeleteLock.Unlock() continue } err = actual.Validate() if err != nil { actualsToDeleteLock.Lock() actualsToDelete = append(actualsToDelete, actualNode.Key) actualsToDeleteLock.Unlock() continue } indexGroupWillBeEmpty = false guidGroupWillBeEmpty = false switch actual.State { case models.ActualLRPStateUnclaimed: atomic.AddInt32(&lmc.unclaimedLRPs, 1) case models.ActualLRPStateClaimed: atomic.AddInt32(&lmc.claimedLRPs, 1) case models.ActualLRPStateRunning: atomic.AddInt32(&lmc.runningLRPs, 1) case models.ActualLRPStateCrashed: crashingDesiredsLock.Lock() crashingDesireds[actual.ProcessGuid] = struct{}{} crashingDesiredsLock.Unlock() atomic.AddInt32(&lmc.crashedActualLRPs, 1) } guidsLock.Lock() guids[actual.ProcessGuid] = struct{}{} guidsLock.Unlock() if path.Base(actualNode.Key) == ActualLRPInstanceKey { actualsLock.Lock() if actuals[actual.ProcessGuid] == nil { actuals[actual.ProcessGuid] = map[int32]*models.ActualLRP{} } actuals[actual.ProcessGuid][actual.Index] = actual actualsLock.Unlock() } } if indexGroupWillBeEmpty { indexKeysToDeleteLock.Lock() indexKeysToDelete = append(indexKeysToDelete, indexGroup.Key) indexKeysToDeleteLock.Unlock() } } if guidGroupWillBeEmpty { guidKeysToDeleteLock.Lock() guidKeysToDelete = append(guidKeysToDelete, guidGroup.Key) guidKeysToDeleteLock.Unlock() } }) } logger.Debug("done-walking-actual-lrp-tree") throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-to-create-throttler", err) } throttler.Work() if doPrune { logger.Info("deleting-invalid-actual-lrps", lager.Data{"num_lrps": len(actualsToDelete)}) db.batchDeleteNodes(actualsToDelete, logger) actualLRPsDeleted.Add(uint64(len(actualsToDelete))) logger.Info("deleting-empty-actual-indices", lager.Data{"num_indices": len(indexKeysToDelete)}) err = db.deleteLeaves(logger, indexKeysToDelete) if err != nil { logger.Error("failed-deleting-empty-actual-indices", err, lager.Data{"num_indices": len(indexKeysToDelete)}) } else { logger.Info("succeeded-deleting-empty-actual-indices", lager.Data{"num_indices": len(indexKeysToDelete)}) } logger.Info("deleting-empty-actual-guids", lager.Data{"num_guids": len(guidKeysToDelete)}) err = db.deleteLeaves(logger, guidKeysToDelete) if err != nil { logger.Error("failed-deleting-empty-actual-guids", err, lager.Data{"num_guids": len(guidKeysToDelete)}) } else { logger.Info("succeeded-deleting-empty-actual-guids", lager.Data{"num_guids": len(guidKeysToDelete)}) } } lmc.crashingDesiredLRPs = int32(len(crashingDesireds)) return actuals, nil }
func (p *Processor) createMissingDesiredLRPs( logger lager.Logger, cancel <-chan struct{}, missing <-chan []cc_messages.DesireAppRequestFromCC, invalidCount *int32, ) <-chan error { logger = logger.Session("create-missing-desired-lrps") errc := make(chan error, 1) go func() { defer close(errc) for { var desireAppRequests []cc_messages.DesireAppRequestFromCC select { case <-cancel: return case selected, open := <-missing: if !open { return } desireAppRequests = selected } works := make([]func(), len(desireAppRequests)) for i, desireAppRequest := range desireAppRequests { desireAppRequest := desireAppRequest var builder recipebuilder.RecipeBuilder = p.builders["buildpack"] if desireAppRequest.DockerImageUrl != "" { builder = p.builders["docker"] } works[i] = func() { logger.Debug("building-create-desired-lrp-request", desireAppRequestDebugData(&desireAppRequest)) desired, err := builder.Build(&desireAppRequest) if err != nil { logger.Error("failed-building-create-desired-lrp-request", err, lager.Data{"process-guid": desireAppRequest.ProcessGuid}) errc <- err return } logger.Debug("succeeded-building-create-desired-lrp-request", desireAppRequestDebugData(&desireAppRequest)) logger.Debug("creating-desired-lrp", createDesiredReqDebugData(desired)) err = p.bbsClient.DesireLRP(desired) if err != nil { logger.Error("failed-creating-desired-lrp", err, lager.Data{"process-guid": desired.ProcessGuid}) if models.ConvertError(err).Type == models.Error_InvalidRequest { atomic.AddInt32(invalidCount, int32(1)) } else { errc <- err } return } logger.Debug("succeeded-creating-desired-lrp", createDesiredReqDebugData(desired)) } } throttler, err := workpool.NewThrottler(p.updateLRPWorkPoolSize, works) if err != nil { errc <- err return } logger.Info("processing-batch", lager.Data{"size": len(desireAppRequests)}) throttler.Work() logger.Info("done-processing-batch", lager.Data{"size": len(desireAppRequests)}) } }() return errc }
func (p *Processor) updateStaleDesiredLRPs( logger lager.Logger, cancel <-chan struct{}, stale <-chan []cc_messages.DesireAppRequestFromCC, existingSchedulingInfoMap map[string]*models.DesiredLRPSchedulingInfo, invalidCount *int32, ) <-chan error { logger = logger.Session("update-stale-desired-lrps") errc := make(chan error, 1) go func() { defer close(errc) for { var staleAppRequests []cc_messages.DesireAppRequestFromCC select { case <-cancel: return case selected, open := <-stale: if !open { return } staleAppRequests = selected } works := make([]func(), len(staleAppRequests)) for i, desireAppRequest := range staleAppRequests { desireAppRequest := desireAppRequest var builder recipebuilder.RecipeBuilder = p.builders["buildpack"] if desireAppRequest.DockerImageUrl != "" { builder = p.builders["docker"] } works[i] = func() { processGuid := desireAppRequest.ProcessGuid existingSchedulingInfo := existingSchedulingInfoMap[desireAppRequest.ProcessGuid] updateReq := &models.DesiredLRPUpdate{} instances := int32(desireAppRequest.NumInstances) updateReq.Instances = &instances updateReq.Annotation = &desireAppRequest.ETag exposedPorts, err := builder.ExtractExposedPorts(&desireAppRequest) if err != nil { logger.Error("failed-updating-stale-lrp", err, lager.Data{ "process-guid": processGuid, "execution-metadata": desireAppRequest.ExecutionMetadata, }) errc <- err return } routes, err := helpers.CCRouteInfoToRoutes(desireAppRequest.RoutingInfo, exposedPorts) if err != nil { logger.Error("failed-to-marshal-routes", err) errc <- err return } updateReq.Routes = &routes for k, v := range existingSchedulingInfo.Routes { if k != cfroutes.CF_ROUTER { (*updateReq.Routes)[k] = v } } logger.Debug("updating-stale-lrp", updateDesiredRequestDebugData(processGuid, updateReq)) err = p.bbsClient.UpdateDesiredLRP(processGuid, updateReq) if err != nil { logger.Error("failed-updating-stale-lrp", err, lager.Data{ "process-guid": processGuid, }) if models.ConvertError(err).Type == models.Error_InvalidRequest { atomic.AddInt32(invalidCount, int32(1)) } else { errc <- err } return } logger.Debug("succeeded-updating-stale-lrp", updateDesiredRequestDebugData(processGuid, updateReq)) } } throttler, err := workpool.NewThrottler(p.updateLRPWorkPoolSize, works) if err != nil { errc <- err return } logger.Info("processing-batch", lager.Data{"size": len(staleAppRequests)}) throttler.Work() logger.Info("done-processing-batch", lager.Data{"size": len(staleAppRequests)}) } }() return errc }
package workpool_test import ( "github.com/cloudfoundry/gunk/workpool" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" ) var _ = Describe("Throttler", func() { var throttler *workpool.Throttler Context("when max workers is non-positive", func() { It("errors", func() { _, err := workpool.NewThrottler(0, []func(){}) Expect(err).To(HaveOccurred()) }) }) Context("when max workers is positive", func() { var maxWorkers int var calledChan chan int var unblockChan chan struct{} var work func(int) func() BeforeEach(func() { maxWorkers = 2 calledChan = make(chan int) unblockChan = make(chan struct{}) work = func(i int) func() { return func() {
func (db *ETCDDB) GatherDesiredLRPs(logger lager.Logger, guids map[string]struct{}, lmc *LRPMetricCounter) (map[string]*models.DesiredLRP, error) { desiredLRPsRoot, modelErr := db.fetchRecursiveRaw(logger, DesiredLRPComponentsSchemaRoot) if modelErr == models.ErrResourceNotFound { logger.Info("actual-lrp-schema-root-not-found") return map[string]*models.DesiredLRP{}, nil } if modelErr != nil { return nil, modelErr } schedulingInfos := map[string]*models.DesiredLRPSchedulingInfo{} runInfos := map[string]*models.DesiredLRPRunInfo{} var malformedSchedulingInfos int32 var malformedRunInfos int32 var guidsLock, schedulingInfosLock, runInfosLock sync.Mutex works := []func(){} logger.Info("walking-desired-lrp-components-tree") for _, componentRoot := range desiredLRPsRoot.Nodes { switch componentRoot.Key { case DesiredLRPSchedulingInfoSchemaRoot: for _, node := range componentRoot.Nodes { node := node works = append(works, func() { var schedulingInfo models.DesiredLRPSchedulingInfo err := db.deserializeModel(logger, node, &schedulingInfo) if err != nil { logger.Error("failed-to-deserialize-scheduling-info", err) atomic.AddInt32(&malformedSchedulingInfos, 1) } else { schedulingInfosLock.Lock() schedulingInfos[schedulingInfo.ProcessGuid] = &schedulingInfo schedulingInfosLock.Unlock() atomic.AddInt32(&lmc.desiredLRPs, schedulingInfo.Instances) guidsLock.Lock() guids[schedulingInfo.ProcessGuid] = struct{}{} guidsLock.Unlock() } }) } case DesiredLRPRunInfoSchemaRoot: for _, node := range componentRoot.Nodes { node := node works = append(works, func() { var runInfo models.DesiredLRPRunInfo err := db.deserializeModel(logger, node, &runInfo) if err != nil { logger.Error("failed-to-deserialize-run-info", err) atomic.AddInt32(&malformedRunInfos, 1) } else { runInfosLock.Lock() runInfos[runInfo.ProcessGuid] = &runInfo runInfosLock.Unlock() } }) } default: err := fmt.Errorf("unrecognized node under desired LRPs root node: %s", componentRoot.Key) logger.Error("unrecognized-node", err) return nil, err } } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-to-create-throttler", err) } throttler.Work() malformedSchedulingInfosMetric.Add(uint64(malformedSchedulingInfos)) malformedRunInfosMetric.Add(uint64(malformedRunInfos)) logger.Info("done-walking-desired-lrp-tree") desireds := make(map[string]*models.DesiredLRP) for guid, schedulingInfo := range schedulingInfos { runInfo := runInfos[guid] desiredLRP := models.NewDesiredLRP(*schedulingInfo, *runInfo) desireds[guid] = &desiredLRP } return desireds, nil }
func (db *ETCDDB) GatherAndPruneDesiredLRPs(logger lager.Logger, guids map[string]struct{}, lmc *LRPMetricCounter) (map[string]*models.DesiredLRP, error) { desiredLRPsRoot, modelErr := db.fetchRecursiveRaw(logger, DesiredLRPComponentsSchemaRoot) if modelErr == models.ErrResourceNotFound { logger.Info("actual-lrp-schema-root-not-found") return map[string]*models.DesiredLRP{}, nil } if modelErr != nil { return nil, modelErr } schedulingInfos := map[string]*models.DesiredLRPSchedulingInfo{} runInfos := map[string]*models.DesiredLRPRunInfo{} var malformedSchedulingInfos, malformedRunInfos []string var guidsLock, schedulingInfosLock, runInfosLock sync.Mutex works := []func(){} logger.Debug("walking-desired-lrp-components-tree") for _, componentRoot := range desiredLRPsRoot.Nodes { switch componentRoot.Key { case DesiredLRPSchedulingInfoSchemaRoot: for _, node := range componentRoot.Nodes { node := node works = append(works, func() { var schedulingInfo models.DesiredLRPSchedulingInfo err := db.deserializeModel(logger, node, &schedulingInfo) if err != nil || schedulingInfo.Validate() != nil { logger.Error("failed-to-deserialize-scheduling-info", err) schedulingInfosLock.Lock() malformedSchedulingInfos = append(malformedSchedulingInfos, node.Key) schedulingInfosLock.Unlock() } else { schedulingInfosLock.Lock() schedulingInfos[schedulingInfo.ProcessGuid] = &schedulingInfo schedulingInfosLock.Unlock() atomic.AddInt32(&lmc.desiredLRPs, schedulingInfo.Instances) guidsLock.Lock() guids[schedulingInfo.ProcessGuid] = struct{}{} guidsLock.Unlock() } }) } case DesiredLRPRunInfoSchemaRoot: for _, node := range componentRoot.Nodes { node := node works = append(works, func() { var runInfo models.DesiredLRPRunInfo err := db.deserializeModel(logger, node, &runInfo) if err != nil || runInfo.Validate() != nil { runInfosLock.Lock() malformedRunInfos = append(malformedRunInfos, node.Key) runInfosLock.Unlock() } else { runInfosLock.Lock() runInfos[runInfo.ProcessGuid] = &runInfo runInfosLock.Unlock() } }) } default: err := fmt.Errorf("unrecognized node under desired LRPs root node: %s", componentRoot.Key) logger.Error("unrecognized-node", err) return nil, err } } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-to-create-throttler", err) } throttler.Work() db.batchDeleteNodes(malformedSchedulingInfos, logger) db.batchDeleteNodes(malformedRunInfos, logger) malformedSchedulingInfosMetric.Add(uint64(len(malformedSchedulingInfos))) malformedRunInfosMetric.Add(uint64(len(malformedRunInfos))) logger.Debug("done-walking-desired-lrp-tree") desireds := make(map[string]*models.DesiredLRP) var schedInfosToDelete []string for guid, schedulingInfo := range schedulingInfos { runInfo, ok := runInfos[guid] if !ok { err := fmt.Errorf("Missing runInfo for GUID %s", guid) logger.Error("runInfo-not-found-error", err) schedInfosToDelete = append(schedInfosToDelete, DesiredLRPSchedulingInfoSchemaPath(guid)) } else { desiredLRP := models.NewDesiredLRP(*schedulingInfo, *runInfo) desireds[guid] = &desiredLRP } } db.batchDeleteNodes(schedInfosToDelete, logger) // Check to see if we have orphaned RunInfos if len(runInfos) != len(schedulingInfos) { var runInfosToDelete []string for guid, runInfo := range runInfos { // If there is no corresponding SchedulingInfo and the RunInfo has // existed for longer than desiredLRPCreationTimeout, consider it orphaned // and delete it. _, ok := schedulingInfos[guid] if !ok && db.clock.Since(time.Unix(0, runInfo.CreatedAt)) > db.desiredLRPCreationTimeout { orphanedRunInfosMetric.Add(1) runInfosToDelete = append(runInfosToDelete, DesiredLRPRunInfoSchemaPath(guid)) } } db.batchDeleteNodes(runInfosToDelete, logger) } return desireds, nil }