Esempio n. 1
0
func (db *ETCDDB) GatherAndPruneDesiredLRPs(logger lager.Logger, guids map[string]struct{}, lmc *LRPMetricCounter) (map[string]*models.DesiredLRP, error) {
	desiredLRPsRoot, modelErr := db.fetchRecursiveRaw(logger, DesiredLRPComponentsSchemaRoot)

	if modelErr == models.ErrResourceNotFound {
		logger.Info("actual-lrp-schema-root-not-found")
		return map[string]*models.DesiredLRP{}, nil
	}

	if modelErr != nil {
		return nil, modelErr
	}

	schedulingInfos := map[string]*models.DesiredLRPSchedulingInfo{}
	runInfos := map[string]*models.DesiredLRPRunInfo{}

	var malformedSchedulingInfos, malformedRunInfos []string

	var guidsLock, schedulingInfosLock, runInfosLock sync.Mutex

	works := []func(){}
	logger.Debug("walking-desired-lrp-components-tree")

	for _, componentRoot := range desiredLRPsRoot.Nodes {
		switch componentRoot.Key {
		case DesiredLRPSchedulingInfoSchemaRoot:
			for _, node := range componentRoot.Nodes {
				node := node
				works = append(works, func() {
					var schedulingInfo models.DesiredLRPSchedulingInfo
					err := db.deserializeModel(logger, node, &schedulingInfo)
					if err != nil || schedulingInfo.Validate() != nil {
						logger.Error("failed-to-deserialize-scheduling-info", err)
						schedulingInfosLock.Lock()
						malformedSchedulingInfos = append(malformedSchedulingInfos, node.Key)
						schedulingInfosLock.Unlock()
					} else {
						schedulingInfosLock.Lock()
						schedulingInfos[schedulingInfo.ProcessGuid] = &schedulingInfo
						schedulingInfosLock.Unlock()
						atomic.AddInt32(&lmc.desiredLRPs, schedulingInfo.Instances)

						guidsLock.Lock()
						guids[schedulingInfo.ProcessGuid] = struct{}{}
						guidsLock.Unlock()
					}
				})
			}
		case DesiredLRPRunInfoSchemaRoot:
			for _, node := range componentRoot.Nodes {
				node := node
				works = append(works, func() {
					var runInfo models.DesiredLRPRunInfo
					err := db.deserializeModel(logger, node, &runInfo)
					if err != nil || runInfo.Validate() != nil {
						runInfosLock.Lock()
						malformedRunInfos = append(malformedRunInfos, node.Key)
						runInfosLock.Unlock()
					} else {
						runInfosLock.Lock()
						runInfos[runInfo.ProcessGuid] = &runInfo
						runInfosLock.Unlock()
					}
				})
			}
		default:
			err := fmt.Errorf("unrecognized node under desired LRPs root node: %s", componentRoot.Key)
			logger.Error("unrecognized-node", err)
			return nil, err
		}
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		logger.Error("failed-to-create-throttler", err)
	}

	throttler.Work()

	db.batchDeleteNodes(malformedSchedulingInfos, logger)
	db.batchDeleteNodes(malformedRunInfos, logger)

	malformedSchedulingInfosMetric.Add(uint64(len(malformedSchedulingInfos)))
	malformedRunInfosMetric.Add(uint64(len(malformedRunInfos)))

	logger.Debug("done-walking-desired-lrp-tree")

	desireds := make(map[string]*models.DesiredLRP)
	var schedInfosToDelete []string
	for guid, schedulingInfo := range schedulingInfos {
		runInfo, ok := runInfos[guid]
		if !ok {
			err := fmt.Errorf("Missing runInfo for GUID %s", guid)
			logger.Error("runInfo-not-found-error", err)
			schedInfosToDelete = append(schedInfosToDelete, DesiredLRPSchedulingInfoSchemaPath(guid))
		} else {
			desiredLRP := models.NewDesiredLRP(*schedulingInfo, *runInfo)
			desireds[guid] = &desiredLRP
		}
	}
	db.batchDeleteNodes(schedInfosToDelete, logger)

	// Check to see if we have orphaned RunInfos
	if len(runInfos) != len(schedulingInfos) {
		var runInfosToDelete []string
		for guid, runInfo := range runInfos {
			// If there is no corresponding SchedulingInfo and the RunInfo has
			// existed for longer than desiredLRPCreationTimeout, consider it orphaned
			// and delete it.
			_, ok := schedulingInfos[guid]
			if !ok && db.clock.Since(time.Unix(0, runInfo.CreatedAt)) > db.desiredLRPCreationTimeout {
				orphanedRunInfosMetric.Add(1)
				runInfosToDelete = append(runInfosToDelete, DesiredLRPRunInfoSchemaPath(guid))
			}
		}

		db.batchDeleteNodes(runInfosToDelete, logger)
	}

	return desireds, nil
}