Пример #1
0
func fetchStates(cells map[string]rep.Client) map[string]rep.CellState {
	lock := &sync.Mutex{}
	states := map[string]rep.CellState{}
	works := []func(){}

	for repGuid, cell := range cells {
		repGuid := repGuid
		cell := cell
		works = append(works, func() {
			state, _ := cell.State()
			lock.Lock()
			states[repGuid] = state
			lock.Unlock()
		})
	}

	throttler, err := workpool.NewThrottler(500, works)
	if err != nil {
		panic(err) // should never happen
	}

	throttler.Work()

	return states
}
Пример #2
0
func (db *ETCDDB) batchDeleteNodes(keys []string, logger lager.Logger) {
	if len(keys) == 0 {
		return
	}

	works := []func(){}

	for _, key := range keys {
		key := key
		works = append(works, func() {
			logger.Info("deleting", lager.Data{"key": key})
			_, err := db.client.Delete(key, true)
			if err != nil {
				logger.Error("failed-to-delete", err, lager.Data{
					"key": key,
				})
			}
		})
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		logger.Error("failed-to-create-throttler", err)
	}

	throttler.Work()
	return
}
Пример #3
0
func (db *ETCDDB) batchDeleteTasks(taskGuids []string, logger lager.Logger) {
	if len(taskGuids) == 0 {
		return
	}

	works := []func(){}

	for _, taskGuid := range taskGuids {
		taskGuid := taskGuid
		works = append(works, func() {
			_, err := db.client.Delete(taskGuid, true)
			if err != nil {
				logger.Error("failed-to-delete", err, lager.Data{
					"task-guid": taskGuid,
				})
			}
		})
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		logger.Error("failed-to-create-throttler", err)
	}

	throttler.Work()
	return
}
Пример #4
0
func (db *ETCDDB) DesiredLRPs(logger lager.Logger, filter models.DesiredLRPFilter) (*models.DesiredLRPs, *models.Error) {
	root, bbsErr := db.fetchRecursiveRaw(logger, DesiredLRPSchemaRoot)
	if bbsErr.Equal(models.ErrResourceNotFound) {
		return &models.DesiredLRPs{}, nil
	}
	if bbsErr != nil {
		return nil, bbsErr
	}
	if root.Nodes.Len() == 0 {
		return &models.DesiredLRPs{}, nil
	}

	desiredLRPs := models.DesiredLRPs{}

	lrpsLock := sync.Mutex{}
	var workErr atomic.Value
	works := []func(){}

	for _, node := range root.Nodes {
		node := node

		works = append(works, func() {
			var lrp models.DesiredLRP
			deserializeErr := models.FromJSON([]byte(node.Value), &lrp)
			if deserializeErr != nil {
				logger.Error("failed-parsing-desired-lrp", deserializeErr)
				workErr.Store(fmt.Errorf("cannot parse lrp JSON for key %s: %s", node.Key, deserializeErr.Error()))
				return
			}

			if filter.Domain == "" || lrp.GetDomain() == filter.Domain {
				lrpsLock.Lock()
				desiredLRPs.DesiredLrps = append(desiredLRPs.DesiredLrps, &lrp)
				lrpsLock.Unlock()
			}
		})
	}

	throttler, err := workpool.NewThrottler(maxDesiredLRPGetterWorkPoolSize, works)
	if err != nil {
		logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": maxDesiredLRPGetterWorkPoolSize, "num-works": len(works)})
		return &models.DesiredLRPs{}, models.ErrUnknownError
	}

	logger.Debug("performing-deserialization-work")
	throttler.Work()
	if err, ok := workErr.Load().(error); ok {
		logger.Error("failed-performing-deserialization-work", err)
		return &models.DesiredLRPs{}, models.ErrUnknownError
	}
	logger.Debug("succeeded-performing-deserialization-work", lager.Data{"num-desired-lrps": len(desiredLRPs.GetDesiredLrps())})

	return &desiredLRPs, nil
}
Пример #5
0
func (db *ETCDDB) ResolveConvergence(logger lager.Logger, desiredLRPs map[string]*models.DesiredLRP, changes *models.ConvergenceChanges) ([]*auctioneer.LRPStartRequest, []*models.ActualLRPKeyWithSchedulingInfo, []*models.ActualLRPKey) {
	startRequests := newStartRequests(desiredLRPs)
	for _, actual := range changes.StaleUnclaimedActualLRPs {
		startRequests.Add(logger, &actual.ActualLRPKey)
	}

	works := []func(){}

	keysToRetire := make([]*models.ActualLRPKey, len(changes.ActualLRPsForExtraIndices))
	for i, actual := range changes.ActualLRPsForExtraIndices {
		keysToRetire[i] = &actual.ActualLRPKey
	}

	keysWithMissingCells := []*models.ActualLRPKeyWithSchedulingInfo{}
	for _, actual := range changes.ActualLRPsWithMissingCells {
		desiredLRP, ok := desiredLRPs[actual.ProcessGuid]
		if !ok {
			logger.Debug("actual-with-missing-cell-no-desired")
			continue
		}

		schedInfo := desiredLRP.DesiredLRPSchedulingInfo()

		key := &models.ActualLRPKeyWithSchedulingInfo{
			Key:            &actual.ActualLRPKey,
			SchedulingInfo: &schedInfo,
		}

		keysWithMissingCells = append(keysWithMissingCells, key)
	}

	for _, actualKey := range changes.ActualLRPKeysForMissingIndices {
		works = append(works, db.resolveActualsWithMissingIndices(logger, desiredLRPs[actualKey.ProcessGuid], actualKey, startRequests))
	}

	for _, actual := range changes.RestartableCrashedActualLRPs {
		works = append(works, db.resolveRestartableCrashedActualLRPS(logger, actual, startRequests))
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		logger.Error("failed-constructing-throttler", err, lager.Data{"max_workers": db.convergenceWorkersSize, "num_works": len(works)})
		return nil, nil, nil
	}

	logger.Debug("waiting-for-lrp-convergence-work")
	throttler.Work()
	logger.Debug("done-waiting-for-lrp-convergence-work")

	return startRequests.Slice(), keysWithMissingCells, keysToRetire
}
Пример #6
0
func (db *ETCDDB) ActualLRPGroups(logger lager.Logger, filter models.ActualLRPFilter) (*models.ActualLRPGroups, *models.Error) {
	node, bbsErr := db.fetchRecursiveRaw(logger, ActualLRPSchemaRoot)
	if bbsErr.Equal(models.ErrResourceNotFound) {
		return &models.ActualLRPGroups{}, nil
	}
	if bbsErr != nil {
		return nil, bbsErr
	}
	if node.Nodes.Len() == 0 {
		return &models.ActualLRPGroups{}, nil
	}

	groups := &models.ActualLRPGroups{}

	groupsLock := sync.Mutex{}
	var workErr atomic.Value
	works := []func(){}

	for _, node := range node.Nodes {
		node := node

		works = append(works, func() {
			g, err := parseActualLRPGroups(logger, node, filter)
			if err != nil {
				workErr.Store(err)
				return
			}
			groupsLock.Lock()
			groups.ActualLrpGroups = append(groups.ActualLrpGroups, g.ActualLrpGroups...)
			groupsLock.Unlock()
		})
	}

	throttler, err := workpool.NewThrottler(maxActualGroupGetterWorkPoolSize, works)
	if err != nil {
		logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": maxActualGroupGetterWorkPoolSize, "num-works": len(works)})
		return &models.ActualLRPGroups{}, models.ErrUnknownError
	}

	logger.Debug("performing-deserialization-work")
	throttler.Work()
	if err, ok := workErr.Load().(error); ok {
		logger.Error("failed-performing-deserialization-work", err)
		return &models.ActualLRPGroups{}, models.ErrUnknownError
	}
	logger.Debug("succeeded-performing-deserialization-work", lager.Data{"num-actual-lrp-groups": len(groups.ActualLrpGroups)})

	return groups, nil
}
Пример #7
0
func (db *ETCDDB) deleteLeaves(logger lager.Logger, keys []string) error {
	works := []func(){}

	for _, key := range keys {
		key := key
		works = append(works, func() {
			_, err := db.client.DeleteDir(key)
			if err != nil {
				logger.Error("failed-deleting-leaf-node", err, lager.Data{"key": key})
			}
		})
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		return err
	}

	throttler.Work()

	return nil
}
Пример #8
0
func (db *ETCDDB) ResolveConvergence(logger lager.Logger, desiredLRPs map[string]*models.DesiredLRP, changes *models.ConvergenceChanges) {
	startRequests := newStartRequests(desiredLRPs)
	for _, actual := range changes.StaleUnclaimedActualLRPs {
		startRequests.Add(logger, &actual.ActualLRPKey)
	}

	works := []func(){}

	for _, actual := range changes.ActualLRPsForExtraIndices {
		works = append(works, db.resolveActualsToBeRetired(logger, actual))
	}

	for _, actual := range changes.ActualLRPsWithMissingCells {
		works = append(works, db.resolveActualsWithMissingCells(logger, desiredLRPs[actual.ProcessGuid], actual, startRequests))
	}

	for _, actualKey := range changes.ActualLRPKeysForMissingIndices {
		works = append(works, db.resolveActualsWithMissingIndices(logger, desiredLRPs[actualKey.ProcessGuid], actualKey, startRequests))
	}

	for _, actual := range changes.RestartableCrashedActualLRPs {
		works = append(works, db.resolveRestartableCrashedActualLRPS(logger, actual, startRequests))
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": db.convergenceWorkersSize, "num-works": len(works)})
		return
	}

	logger.Debug("waiting-for-lrp-convergence-work")
	throttler.Work()
	logger.Debug("done-waiting-for-lrp-convergence-work")

	logger.Debug("requesting-start-auctions", lager.Data{"start-requests-instance-count": startRequests.InstanceCount()})
	db.startActualLRPs(logger, startRequests)
	logger.Debug("done-requesting-start-auctions", lager.Data{"start-requests-instance-count": startRequests.InstanceCount()})
}
Пример #9
0
func (db *ETCDDB) batchCompareAndSwapTasks(tasksToCAS []compareAndSwappableTask, logger lager.Logger) error {
	if len(tasksToCAS) == 0 {
		return nil
	}

	works := []func(){}

	for _, taskToCAS := range tasksToCAS {
		task := taskToCAS.NewTask
		task.UpdatedAt = db.clock.Now().UnixNano()
		value, err := db.serializeModel(logger, task)
		if err != nil {
			logger.Error("failed-to-marshal", err, lager.Data{
				"task-guid": task.TaskGuid,
			})
			continue
		}

		index := taskToCAS.OldIndex
		works = append(works, func() {
			_, err := db.client.CompareAndSwap(TaskSchemaPathByGuid(task.TaskGuid), value, NO_TTL, index)
			if err != nil {
				logger.Error("failed-to-compare-and-swap", err, lager.Data{
					"task-guid": task.TaskGuid,
				})
			}
		})
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		return err
	}

	throttler.Work()
	return nil
}
Пример #10
0
func (db *ETCDDB) retireActualLRPs(logger lager.Logger, keys []*models.ActualLRPKey) {
	logger = logger.Session("retire-actual-lrps")

	works := make([]func(), len(keys))

	for i, key := range keys {
		key := key

		works[i] = func() {
			err := db.RetireActualLRP(logger, key)
			if err != nil {
				logger.Error("failed-to-retire", err, lager.Data{"lrp-key": key})
			}
		}
	}

	throttler, err := workpool.NewThrottler(db.updateWorkersSize, works)
	if err != nil {
		logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": db.updateWorkersSize, "num-works": len(works)})
		return
	}

	throttler.Work()
}
Пример #11
0
func (db *ETCDDB) createUnclaimedActualLRPs(logger lager.Logger, keys []*models.ActualLRPKey) []int {
	count := len(keys)
	createdIndicesChan := make(chan int, count)

	works := make([]func(), count)

	for i, key := range keys {
		key := key
		works[i] = func() {
			err := db.createUnclaimedActualLRP(logger, key)
			if err != nil {
				logger.Info("failed-creating-actual-lrp", lager.Data{"actual_lrp_key": key, "err-message": err.Error()})
			} else {
				createdIndicesChan <- int(key.Index)
			}
		}
	}

	throttler, err := workpool.NewThrottler(db.updateWorkersSize, works)
	if err != nil {
		logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": db.updateWorkersSize, "num-works": len(works)})
		return []int{}
	}

	go func() {
		throttler.Work()
		close(createdIndicesChan)
	}()

	createdIndices := make([]int, 0, count)
	for createdIndex := range createdIndicesChan {
		createdIndices = append(createdIndices, createdIndex)
	}

	return createdIndices
}
Пример #12
0
func (db *ETCDDB) gatherAndOptionallyPruneActualLRPs(logger lager.Logger, guids map[string]struct{}, doPrune bool, lmc *LRPMetricCounter) (map[string]map[int32]*models.ActualLRP, error) {
	response, modelErr := db.fetchRecursiveRaw(logger, ActualLRPSchemaRoot)

	if modelErr == models.ErrResourceNotFound {
		logger.Info("actual-lrp-schema-root-not-found")
		return map[string]map[int32]*models.ActualLRP{}, nil
	}

	if modelErr != nil {
		return nil, modelErr
	}

	actuals := map[string]map[int32]*models.ActualLRP{}
	var guidKeysToDelete, indexKeysToDelete []string
	var actualsToDelete []string
	var guidsLock, actualsLock, guidKeysToDeleteLock, indexKeysToDeleteLock,
		crashingDesiredsLock, actualsToDeleteLock sync.Mutex

	logger.Debug("walking-actual-lrp-tree")
	works := []func(){}
	crashingDesireds := map[string]struct{}{}

	for _, guidGroup := range response.Nodes {
		guidGroup := guidGroup
		works = append(works, func() {
			guidGroupWillBeEmpty := true

			for _, indexGroup := range guidGroup.Nodes {
				indexGroupWillBeEmpty := true

				for _, actualNode := range indexGroup.Nodes {
					actual := new(models.ActualLRP)
					err := db.deserializeModel(logger, actualNode, actual)
					if err != nil {
						actualsToDeleteLock.Lock()
						actualsToDelete = append(actualsToDelete, actualNode.Key)
						actualsToDeleteLock.Unlock()

						continue
					}

					err = actual.Validate()
					if err != nil {
						actualsToDeleteLock.Lock()
						actualsToDelete = append(actualsToDelete, actualNode.Key)
						actualsToDeleteLock.Unlock()

						continue
					}

					indexGroupWillBeEmpty = false
					guidGroupWillBeEmpty = false

					switch actual.State {
					case models.ActualLRPStateUnclaimed:
						atomic.AddInt32(&lmc.unclaimedLRPs, 1)
					case models.ActualLRPStateClaimed:
						atomic.AddInt32(&lmc.claimedLRPs, 1)
					case models.ActualLRPStateRunning:
						atomic.AddInt32(&lmc.runningLRPs, 1)
					case models.ActualLRPStateCrashed:
						crashingDesiredsLock.Lock()
						crashingDesireds[actual.ProcessGuid] = struct{}{}
						crashingDesiredsLock.Unlock()
						atomic.AddInt32(&lmc.crashedActualLRPs, 1)
					}

					guidsLock.Lock()
					guids[actual.ProcessGuid] = struct{}{}
					guidsLock.Unlock()

					if path.Base(actualNode.Key) == ActualLRPInstanceKey {
						actualsLock.Lock()
						if actuals[actual.ProcessGuid] == nil {
							actuals[actual.ProcessGuid] = map[int32]*models.ActualLRP{}
						}
						actuals[actual.ProcessGuid][actual.Index] = actual
						actualsLock.Unlock()
					}
				}

				if indexGroupWillBeEmpty {
					indexKeysToDeleteLock.Lock()
					indexKeysToDelete = append(indexKeysToDelete, indexGroup.Key)
					indexKeysToDeleteLock.Unlock()
				}
			}

			if guidGroupWillBeEmpty {
				guidKeysToDeleteLock.Lock()
				guidKeysToDelete = append(guidKeysToDelete, guidGroup.Key)
				guidKeysToDeleteLock.Unlock()
			}
		})
	}
	logger.Debug("done-walking-actual-lrp-tree")

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		logger.Error("failed-to-create-throttler", err)
	}

	throttler.Work()

	if doPrune {
		logger.Info("deleting-invalid-actual-lrps", lager.Data{"num_lrps": len(actualsToDelete)})
		db.batchDeleteNodes(actualsToDelete, logger)
		actualLRPsDeleted.Add(uint64(len(actualsToDelete)))

		logger.Info("deleting-empty-actual-indices", lager.Data{"num_indices": len(indexKeysToDelete)})
		err = db.deleteLeaves(logger, indexKeysToDelete)
		if err != nil {
			logger.Error("failed-deleting-empty-actual-indices", err, lager.Data{"num_indices": len(indexKeysToDelete)})
		} else {
			logger.Info("succeeded-deleting-empty-actual-indices", lager.Data{"num_indices": len(indexKeysToDelete)})
		}

		logger.Info("deleting-empty-actual-guids", lager.Data{"num_guids": len(guidKeysToDelete)})
		err = db.deleteLeaves(logger, guidKeysToDelete)
		if err != nil {
			logger.Error("failed-deleting-empty-actual-guids", err, lager.Data{"num_guids": len(guidKeysToDelete)})
		} else {
			logger.Info("succeeded-deleting-empty-actual-guids", lager.Data{"num_guids": len(guidKeysToDelete)})
		}
	}

	lmc.crashingDesiredLRPs = int32(len(crashingDesireds))

	return actuals, nil
}
Пример #13
0
func (p *Processor) createMissingDesiredLRPs(
	logger lager.Logger,
	cancel <-chan struct{},
	missing <-chan []cc_messages.DesireAppRequestFromCC,
	invalidCount *int32,
) <-chan error {
	logger = logger.Session("create-missing-desired-lrps")

	errc := make(chan error, 1)

	go func() {
		defer close(errc)

		for {
			var desireAppRequests []cc_messages.DesireAppRequestFromCC

			select {
			case <-cancel:
				return

			case selected, open := <-missing:
				if !open {
					return
				}

				desireAppRequests = selected
			}

			works := make([]func(), len(desireAppRequests))

			for i, desireAppRequest := range desireAppRequests {
				desireAppRequest := desireAppRequest
				var builder recipebuilder.RecipeBuilder = p.builders["buildpack"]
				if desireAppRequest.DockerImageUrl != "" {
					builder = p.builders["docker"]
				}

				works[i] = func() {
					logger.Debug("building-create-desired-lrp-request", desireAppRequestDebugData(&desireAppRequest))
					desired, err := builder.Build(&desireAppRequest)
					if err != nil {
						logger.Error("failed-building-create-desired-lrp-request", err, lager.Data{"process-guid": desireAppRequest.ProcessGuid})
						errc <- err
						return
					}
					logger.Debug("succeeded-building-create-desired-lrp-request", desireAppRequestDebugData(&desireAppRequest))

					logger.Debug("creating-desired-lrp", createDesiredReqDebugData(desired))
					err = p.bbsClient.DesireLRP(desired)
					if err != nil {
						logger.Error("failed-creating-desired-lrp", err, lager.Data{"process-guid": desired.ProcessGuid})
						if models.ConvertError(err).Type == models.Error_InvalidRequest {
							atomic.AddInt32(invalidCount, int32(1))
						} else {
							errc <- err
						}
						return
					}
					logger.Debug("succeeded-creating-desired-lrp", createDesiredReqDebugData(desired))
				}
			}

			throttler, err := workpool.NewThrottler(p.updateLRPWorkPoolSize, works)
			if err != nil {
				errc <- err
				return
			}

			logger.Info("processing-batch", lager.Data{"size": len(desireAppRequests)})
			throttler.Work()
			logger.Info("done-processing-batch", lager.Data{"size": len(desireAppRequests)})
		}
	}()

	return errc
}
Пример #14
0
func (p *Processor) updateStaleDesiredLRPs(
	logger lager.Logger,
	cancel <-chan struct{},
	stale <-chan []cc_messages.DesireAppRequestFromCC,
	existingSchedulingInfoMap map[string]*models.DesiredLRPSchedulingInfo,
	invalidCount *int32,
) <-chan error {
	logger = logger.Session("update-stale-desired-lrps")

	errc := make(chan error, 1)

	go func() {
		defer close(errc)

		for {
			var staleAppRequests []cc_messages.DesireAppRequestFromCC

			select {
			case <-cancel:
				return

			case selected, open := <-stale:
				if !open {
					return
				}

				staleAppRequests = selected
			}

			works := make([]func(), len(staleAppRequests))

			for i, desireAppRequest := range staleAppRequests {
				desireAppRequest := desireAppRequest
				var builder recipebuilder.RecipeBuilder = p.builders["buildpack"]
				if desireAppRequest.DockerImageUrl != "" {
					builder = p.builders["docker"]
				}

				works[i] = func() {
					processGuid := desireAppRequest.ProcessGuid
					existingSchedulingInfo := existingSchedulingInfoMap[desireAppRequest.ProcessGuid]

					updateReq := &models.DesiredLRPUpdate{}
					instances := int32(desireAppRequest.NumInstances)
					updateReq.Instances = &instances
					updateReq.Annotation = &desireAppRequest.ETag

					exposedPorts, err := builder.ExtractExposedPorts(&desireAppRequest)
					if err != nil {
						logger.Error("failed-updating-stale-lrp", err, lager.Data{
							"process-guid":       processGuid,
							"execution-metadata": desireAppRequest.ExecutionMetadata,
						})
						errc <- err
						return
					}

					routes, err := helpers.CCRouteInfoToRoutes(desireAppRequest.RoutingInfo, exposedPorts)
					if err != nil {
						logger.Error("failed-to-marshal-routes", err)
						errc <- err
						return
					}

					updateReq.Routes = &routes

					for k, v := range existingSchedulingInfo.Routes {
						if k != cfroutes.CF_ROUTER {
							(*updateReq.Routes)[k] = v
						}
					}

					logger.Debug("updating-stale-lrp", updateDesiredRequestDebugData(processGuid, updateReq))
					err = p.bbsClient.UpdateDesiredLRP(processGuid, updateReq)
					if err != nil {
						logger.Error("failed-updating-stale-lrp", err, lager.Data{
							"process-guid": processGuid,
						})

						if models.ConvertError(err).Type == models.Error_InvalidRequest {
							atomic.AddInt32(invalidCount, int32(1))
						} else {
							errc <- err
						}
						return
					}
					logger.Debug("succeeded-updating-stale-lrp", updateDesiredRequestDebugData(processGuid, updateReq))
				}
			}

			throttler, err := workpool.NewThrottler(p.updateLRPWorkPoolSize, works)
			if err != nil {
				errc <- err
				return
			}

			logger.Info("processing-batch", lager.Data{"size": len(staleAppRequests)})
			throttler.Work()
			logger.Info("done-processing-batch", lager.Data{"size": len(staleAppRequests)})
		}
	}()

	return errc
}
Пример #15
0
package workpool_test

import (
	"github.com/cloudfoundry/gunk/workpool"

	. "github.com/onsi/ginkgo"
	. "github.com/onsi/gomega"
)

var _ = Describe("Throttler", func() {
	var throttler *workpool.Throttler

	Context("when max workers is non-positive", func() {
		It("errors", func() {
			_, err := workpool.NewThrottler(0, []func(){})
			Expect(err).To(HaveOccurred())
		})
	})

	Context("when max workers is positive", func() {
		var maxWorkers int
		var calledChan chan int
		var unblockChan chan struct{}
		var work func(int) func()

		BeforeEach(func() {
			maxWorkers = 2
			calledChan = make(chan int)
			unblockChan = make(chan struct{})
			work = func(i int) func() {
				return func() {
Пример #16
0
func (db *ETCDDB) GatherDesiredLRPs(logger lager.Logger, guids map[string]struct{}, lmc *LRPMetricCounter) (map[string]*models.DesiredLRP, error) {
	desiredLRPsRoot, modelErr := db.fetchRecursiveRaw(logger, DesiredLRPComponentsSchemaRoot)

	if modelErr == models.ErrResourceNotFound {
		logger.Info("actual-lrp-schema-root-not-found")
		return map[string]*models.DesiredLRP{}, nil
	}

	if modelErr != nil {
		return nil, modelErr
	}

	schedulingInfos := map[string]*models.DesiredLRPSchedulingInfo{}
	runInfos := map[string]*models.DesiredLRPRunInfo{}

	var malformedSchedulingInfos int32
	var malformedRunInfos int32
	var guidsLock, schedulingInfosLock, runInfosLock sync.Mutex

	works := []func(){}
	logger.Info("walking-desired-lrp-components-tree")

	for _, componentRoot := range desiredLRPsRoot.Nodes {
		switch componentRoot.Key {
		case DesiredLRPSchedulingInfoSchemaRoot:
			for _, node := range componentRoot.Nodes {
				node := node
				works = append(works, func() {
					var schedulingInfo models.DesiredLRPSchedulingInfo
					err := db.deserializeModel(logger, node, &schedulingInfo)
					if err != nil {
						logger.Error("failed-to-deserialize-scheduling-info", err)
						atomic.AddInt32(&malformedSchedulingInfos, 1)
					} else {
						schedulingInfosLock.Lock()
						schedulingInfos[schedulingInfo.ProcessGuid] = &schedulingInfo
						schedulingInfosLock.Unlock()
						atomic.AddInt32(&lmc.desiredLRPs, schedulingInfo.Instances)

						guidsLock.Lock()
						guids[schedulingInfo.ProcessGuid] = struct{}{}
						guidsLock.Unlock()
					}
				})
			}
		case DesiredLRPRunInfoSchemaRoot:
			for _, node := range componentRoot.Nodes {
				node := node
				works = append(works, func() {
					var runInfo models.DesiredLRPRunInfo
					err := db.deserializeModel(logger, node, &runInfo)
					if err != nil {
						logger.Error("failed-to-deserialize-run-info", err)
						atomic.AddInt32(&malformedRunInfos, 1)
					} else {
						runInfosLock.Lock()
						runInfos[runInfo.ProcessGuid] = &runInfo
						runInfosLock.Unlock()
					}
				})
			}
		default:
			err := fmt.Errorf("unrecognized node under desired LRPs root node: %s", componentRoot.Key)
			logger.Error("unrecognized-node", err)
			return nil, err
		}
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		logger.Error("failed-to-create-throttler", err)
	}

	throttler.Work()

	malformedSchedulingInfosMetric.Add(uint64(malformedSchedulingInfos))
	malformedRunInfosMetric.Add(uint64(malformedRunInfos))

	logger.Info("done-walking-desired-lrp-tree")

	desireds := make(map[string]*models.DesiredLRP)
	for guid, schedulingInfo := range schedulingInfos {
		runInfo := runInfos[guid]
		desiredLRP := models.NewDesiredLRP(*schedulingInfo, *runInfo)
		desireds[guid] = &desiredLRP
	}

	return desireds, nil
}
Пример #17
0
func (db *ETCDDB) GatherAndPruneDesiredLRPs(logger lager.Logger, guids map[string]struct{}, lmc *LRPMetricCounter) (map[string]*models.DesiredLRP, error) {
	desiredLRPsRoot, modelErr := db.fetchRecursiveRaw(logger, DesiredLRPComponentsSchemaRoot)

	if modelErr == models.ErrResourceNotFound {
		logger.Info("actual-lrp-schema-root-not-found")
		return map[string]*models.DesiredLRP{}, nil
	}

	if modelErr != nil {
		return nil, modelErr
	}

	schedulingInfos := map[string]*models.DesiredLRPSchedulingInfo{}
	runInfos := map[string]*models.DesiredLRPRunInfo{}

	var malformedSchedulingInfos, malformedRunInfos []string

	var guidsLock, schedulingInfosLock, runInfosLock sync.Mutex

	works := []func(){}
	logger.Debug("walking-desired-lrp-components-tree")

	for _, componentRoot := range desiredLRPsRoot.Nodes {
		switch componentRoot.Key {
		case DesiredLRPSchedulingInfoSchemaRoot:
			for _, node := range componentRoot.Nodes {
				node := node
				works = append(works, func() {
					var schedulingInfo models.DesiredLRPSchedulingInfo
					err := db.deserializeModel(logger, node, &schedulingInfo)
					if err != nil || schedulingInfo.Validate() != nil {
						logger.Error("failed-to-deserialize-scheduling-info", err)
						schedulingInfosLock.Lock()
						malformedSchedulingInfos = append(malformedSchedulingInfos, node.Key)
						schedulingInfosLock.Unlock()
					} else {
						schedulingInfosLock.Lock()
						schedulingInfos[schedulingInfo.ProcessGuid] = &schedulingInfo
						schedulingInfosLock.Unlock()
						atomic.AddInt32(&lmc.desiredLRPs, schedulingInfo.Instances)

						guidsLock.Lock()
						guids[schedulingInfo.ProcessGuid] = struct{}{}
						guidsLock.Unlock()
					}
				})
			}
		case DesiredLRPRunInfoSchemaRoot:
			for _, node := range componentRoot.Nodes {
				node := node
				works = append(works, func() {
					var runInfo models.DesiredLRPRunInfo
					err := db.deserializeModel(logger, node, &runInfo)
					if err != nil || runInfo.Validate() != nil {
						runInfosLock.Lock()
						malformedRunInfos = append(malformedRunInfos, node.Key)
						runInfosLock.Unlock()
					} else {
						runInfosLock.Lock()
						runInfos[runInfo.ProcessGuid] = &runInfo
						runInfosLock.Unlock()
					}
				})
			}
		default:
			err := fmt.Errorf("unrecognized node under desired LRPs root node: %s", componentRoot.Key)
			logger.Error("unrecognized-node", err)
			return nil, err
		}
	}

	throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works)
	if err != nil {
		logger.Error("failed-to-create-throttler", err)
	}

	throttler.Work()

	db.batchDeleteNodes(malformedSchedulingInfos, logger)
	db.batchDeleteNodes(malformedRunInfos, logger)

	malformedSchedulingInfosMetric.Add(uint64(len(malformedSchedulingInfos)))
	malformedRunInfosMetric.Add(uint64(len(malformedRunInfos)))

	logger.Debug("done-walking-desired-lrp-tree")

	desireds := make(map[string]*models.DesiredLRP)
	var schedInfosToDelete []string
	for guid, schedulingInfo := range schedulingInfos {
		runInfo, ok := runInfos[guid]
		if !ok {
			err := fmt.Errorf("Missing runInfo for GUID %s", guid)
			logger.Error("runInfo-not-found-error", err)
			schedInfosToDelete = append(schedInfosToDelete, DesiredLRPSchedulingInfoSchemaPath(guid))
		} else {
			desiredLRP := models.NewDesiredLRP(*schedulingInfo, *runInfo)
			desireds[guid] = &desiredLRP
		}
	}
	db.batchDeleteNodes(schedInfosToDelete, logger)

	// Check to see if we have orphaned RunInfos
	if len(runInfos) != len(schedulingInfos) {
		var runInfosToDelete []string
		for guid, runInfo := range runInfos {
			// If there is no corresponding SchedulingInfo and the RunInfo has
			// existed for longer than desiredLRPCreationTimeout, consider it orphaned
			// and delete it.
			_, ok := schedulingInfos[guid]
			if !ok && db.clock.Since(time.Unix(0, runInfo.CreatedAt)) > db.desiredLRPCreationTimeout {
				orphanedRunInfosMetric.Add(1)
				runInfosToDelete = append(runInfosToDelete, DesiredLRPRunInfoSchemaPath(guid))
			}
		}

		db.batchDeleteNodes(runInfosToDelete, logger)
	}

	return desireds, nil
}