func (db *ETCDDB) GatherAndPruneDesiredLRPs(logger lager.Logger, guids map[string]struct{}, lmc *LRPMetricCounter) (map[string]*models.DesiredLRP, error) { desiredLRPsRoot, modelErr := db.fetchRecursiveRaw(logger, DesiredLRPComponentsSchemaRoot) if modelErr == models.ErrResourceNotFound { logger.Info("actual-lrp-schema-root-not-found") return map[string]*models.DesiredLRP{}, nil } if modelErr != nil { return nil, modelErr } schedulingInfos := map[string]*models.DesiredLRPSchedulingInfo{} runInfos := map[string]*models.DesiredLRPRunInfo{} var malformedSchedulingInfos, malformedRunInfos []string var guidsLock, schedulingInfosLock, runInfosLock sync.Mutex works := []func(){} logger.Debug("walking-desired-lrp-components-tree") for _, componentRoot := range desiredLRPsRoot.Nodes { switch componentRoot.Key { case DesiredLRPSchedulingInfoSchemaRoot: for _, node := range componentRoot.Nodes { node := node works = append(works, func() { var schedulingInfo models.DesiredLRPSchedulingInfo err := db.deserializeModel(logger, node, &schedulingInfo) if err != nil || schedulingInfo.Validate() != nil { logger.Error("failed-to-deserialize-scheduling-info", err) schedulingInfosLock.Lock() malformedSchedulingInfos = append(malformedSchedulingInfos, node.Key) schedulingInfosLock.Unlock() } else { schedulingInfosLock.Lock() schedulingInfos[schedulingInfo.ProcessGuid] = &schedulingInfo schedulingInfosLock.Unlock() atomic.AddInt32(&lmc.desiredLRPs, schedulingInfo.Instances) guidsLock.Lock() guids[schedulingInfo.ProcessGuid] = struct{}{} guidsLock.Unlock() } }) } case DesiredLRPRunInfoSchemaRoot: for _, node := range componentRoot.Nodes { node := node works = append(works, func() { var runInfo models.DesiredLRPRunInfo err := db.deserializeModel(logger, node, &runInfo) if err != nil || runInfo.Validate() != nil { runInfosLock.Lock() malformedRunInfos = append(malformedRunInfos, node.Key) runInfosLock.Unlock() } else { runInfosLock.Lock() runInfos[runInfo.ProcessGuid] = &runInfo runInfosLock.Unlock() } }) } default: err := fmt.Errorf("unrecognized node under desired LRPs root node: %s", componentRoot.Key) logger.Error("unrecognized-node", err) return nil, err } } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-to-create-throttler", err) } throttler.Work() db.batchDeleteNodes(malformedSchedulingInfos, logger) db.batchDeleteNodes(malformedRunInfos, logger) malformedSchedulingInfosMetric.Add(uint64(len(malformedSchedulingInfos))) malformedRunInfosMetric.Add(uint64(len(malformedRunInfos))) logger.Debug("done-walking-desired-lrp-tree") desireds := make(map[string]*models.DesiredLRP) var schedInfosToDelete []string for guid, schedulingInfo := range schedulingInfos { runInfo, ok := runInfos[guid] if !ok { err := fmt.Errorf("Missing runInfo for GUID %s", guid) logger.Error("runInfo-not-found-error", err) schedInfosToDelete = append(schedInfosToDelete, DesiredLRPSchedulingInfoSchemaPath(guid)) } else { desiredLRP := models.NewDesiredLRP(*schedulingInfo, *runInfo) desireds[guid] = &desiredLRP } } db.batchDeleteNodes(schedInfosToDelete, logger) // Check to see if we have orphaned RunInfos if len(runInfos) != len(schedulingInfos) { var runInfosToDelete []string for guid, runInfo := range runInfos { // If there is no corresponding SchedulingInfo and the RunInfo has // existed for longer than desiredLRPCreationTimeout, consider it orphaned // and delete it. _, ok := schedulingInfos[guid] if !ok && db.clock.Since(time.Unix(0, runInfo.CreatedAt)) > db.desiredLRPCreationTimeout { orphanedRunInfosMetric.Add(1) runInfosToDelete = append(runInfosToDelete, DesiredLRPRunInfoSchemaPath(guid)) } } db.batchDeleteNodes(runInfosToDelete, logger) } return desireds, nil }