func (db *ETCDDB) UpdateDesiredLRP(logger lager.Logger, processGuid string, update *models.DesiredLRPUpdate) error { logger = logger.Session("update-desired-lrp", lager.Data{"process-guid": processGuid}) logger.Info("starting") defer logger.Info("complete") var schedulingInfo *models.DesiredLRPSchedulingInfo var existingInstances int32 var err error for i := 0; i < 2; i++ { var index uint64 schedulingInfo, index, err = db.rawDesiredLRPSchedulingInfo(logger, processGuid) if err != nil { logger.Error("failed-to-fetch-scheduling-info", err) break } existingInstances = schedulingInfo.Instances schedulingInfo.ApplyUpdate(update) err = db.updateDesiredLRPSchedulingInfo(logger, schedulingInfo, index) if err != nil { logger.Error("update-scheduling-info-failed", err) modelErr := models.ConvertError(err) if modelErr != models.ErrResourceConflict { break } // Retry on CAS fail continue } break } if err != nil { return err } switch diff := schedulingInfo.Instances - existingInstances; { case diff > 0: db.startInstanceRange(logger, existingInstances, schedulingInfo.Instances, schedulingInfo) case diff < 0: db.stopInstanceRange(logger, schedulingInfo.Instances, existingInstances, schedulingInfo) case diff == 0: // this space intentionally left blank } return nil }
func prettyPrint(encrypted []byte, key string, label string) { var decrypted, err = decrypt(encrypted, key, label) if err != nil { return } var model1 models.DesiredLRPRunInfo err = model1.Unmarshal(decrypted) if err != nil { // NOP } else { pretty.Println(model1) return } var model2 models.DesiredLRPSchedulingInfo err = model2.Unmarshal(decrypted) if err != nil { // NOP } else { pretty.Println(model2) return } var model3 models.ActualLRP err = model3.Unmarshal(decrypted) if err != nil { // NOP } else { pretty.Println(model3) return } var model4 models.Task err = model4.Unmarshal(decrypted) if err != nil { // NOP } else { pretty.Println(model4) return } var model5 models.DesiredLRP err = model5.Unmarshal(decrypted) if err != nil { log.Println("Unknown data type: ", string(decrypted)) } else { pretty.Println(model5) return } }
func (db *ETCDDB) UpdateDesiredLRP(logger lager.Logger, processGuid string, update *models.DesiredLRPUpdate) (*models.DesiredLRP, error) { logger.Info("starting") defer logger.Info("complete") var schedulingInfo *models.DesiredLRPSchedulingInfo var err error var beforeDesiredLRP *models.DesiredLRP for i := 0; i < 2; i++ { var index uint64 beforeDesiredLRP, index, err = db.rawDesiredLRPByProcessGuid(logger, processGuid) if err != nil { logger.Error("failed-to-fetch-desired-lrp", err) break } schedulingInfoValue := beforeDesiredLRP.DesiredLRPSchedulingInfo() schedulingInfo = &schedulingInfoValue schedulingInfo.ApplyUpdate(update) err = db.updateDesiredLRPSchedulingInfo(logger, schedulingInfo, index) if err != nil { logger.Error("update-scheduling-info-failed", err) modelErr := models.ConvertError(err) if modelErr != models.ErrResourceConflict { break } // Retry on CAS fail continue } break } if err != nil { return nil, err } return beforeDesiredLRP, nil }
func (m *SplitDesiredLRP) WriteSchedulingInfo(logger lager.Logger, desiredLRP models.DesiredLRP) { schedulingInfo := models.DesiredLRPSchedulingInfo{ DesiredLRPKey: desiredLRP.DesiredLRPKey(), Annotation: desiredLRP.Annotation, Instances: desiredLRP.Instances, DesiredLRPResource: desiredLRP.DesiredLRPResource(), } if desiredLRP.Routes != nil { schedulingInfo.Routes = *desiredLRP.Routes } if desiredLRP.ModificationTag != nil { schedulingInfo.ModificationTag = *desiredLRP.ModificationTag } schedulingInfoPayload, marshalErr := m.serializer.Marshal(logger, format.ENCRYPTED_PROTO, &schedulingInfo) if marshalErr != nil { logger.Error("failed-marshaling-scheduling-info", marshalErr, lager.Data{"process_guid": schedulingInfo.ProcessGuid}) } _, setErr := m.storeClient.Set(etcd.DesiredLRPSchedulingInfoSchemaPath(desiredLRP.ProcessGuid), schedulingInfoPayload, etcd.NO_TTL) if setErr != nil { logger.Error("failed-set-of-scheduling-info", marshalErr, lager.Data{"process_guid": schedulingInfo.ProcessGuid}) } }
func (db *ETCDDB) createDesiredLRPSchedulingInfo(logger lager.Logger, schedulingInfo *models.DesiredLRPSchedulingInfo) error { epochGuid, err := uuid.NewV4() if err != nil { logger.Error("failed-to-generate-epoch", err) return models.ErrUnknownError } schedulingInfo.ModificationTag = models.NewModificationTag(epochGuid.String(), 0) serializedSchedInfo, err := db.serializeModel(logger, schedulingInfo) if err != nil { logger.Error("failed-to-serialize", err) return err } logger.Debug("persisting-scheduling-info") _, err = db.client.Create(DesiredLRPSchedulingInfoSchemaPath(schedulingInfo.ProcessGuid), serializedSchedInfo, NO_TTL) if err != nil { return ErrorFromEtcdError(logger, err) } logger.Debug("succeeded-persisting-scheduling-info") return nil }
func (db *ETCDDB) GatherAndPruneDesiredLRPs(logger lager.Logger, guids map[string]struct{}, lmc *LRPMetricCounter) (map[string]*models.DesiredLRP, error) { desiredLRPsRoot, modelErr := db.fetchRecursiveRaw(logger, DesiredLRPComponentsSchemaRoot) if modelErr == models.ErrResourceNotFound { logger.Info("actual-lrp-schema-root-not-found") return map[string]*models.DesiredLRP{}, nil } if modelErr != nil { return nil, modelErr } schedulingInfos := map[string]*models.DesiredLRPSchedulingInfo{} runInfos := map[string]*models.DesiredLRPRunInfo{} var malformedSchedulingInfos, malformedRunInfos []string var guidsLock, schedulingInfosLock, runInfosLock sync.Mutex works := []func(){} logger.Debug("walking-desired-lrp-components-tree") for _, componentRoot := range desiredLRPsRoot.Nodes { switch componentRoot.Key { case DesiredLRPSchedulingInfoSchemaRoot: for _, node := range componentRoot.Nodes { node := node works = append(works, func() { var schedulingInfo models.DesiredLRPSchedulingInfo err := db.deserializeModel(logger, node, &schedulingInfo) if err != nil || schedulingInfo.Validate() != nil { logger.Error("failed-to-deserialize-scheduling-info", err) schedulingInfosLock.Lock() malformedSchedulingInfos = append(malformedSchedulingInfos, node.Key) schedulingInfosLock.Unlock() } else { schedulingInfosLock.Lock() schedulingInfos[schedulingInfo.ProcessGuid] = &schedulingInfo schedulingInfosLock.Unlock() atomic.AddInt32(&lmc.desiredLRPs, schedulingInfo.Instances) guidsLock.Lock() guids[schedulingInfo.ProcessGuid] = struct{}{} guidsLock.Unlock() } }) } case DesiredLRPRunInfoSchemaRoot: for _, node := range componentRoot.Nodes { node := node works = append(works, func() { var runInfo models.DesiredLRPRunInfo err := db.deserializeModel(logger, node, &runInfo) if err != nil || runInfo.Validate() != nil { runInfosLock.Lock() malformedRunInfos = append(malformedRunInfos, node.Key) runInfosLock.Unlock() } else { runInfosLock.Lock() runInfos[runInfo.ProcessGuid] = &runInfo runInfosLock.Unlock() } }) } default: err := fmt.Errorf("unrecognized node under desired LRPs root node: %s", componentRoot.Key) logger.Error("unrecognized-node", err) return nil, err } } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-to-create-throttler", err) } throttler.Work() db.batchDeleteNodes(malformedSchedulingInfos, logger) db.batchDeleteNodes(malformedRunInfos, logger) malformedSchedulingInfosMetric.Add(uint64(len(malformedSchedulingInfos))) malformedRunInfosMetric.Add(uint64(len(malformedRunInfos))) logger.Debug("done-walking-desired-lrp-tree") desireds := make(map[string]*models.DesiredLRP) var schedInfosToDelete []string for guid, schedulingInfo := range schedulingInfos { runInfo, ok := runInfos[guid] if !ok { err := fmt.Errorf("Missing runInfo for GUID %s", guid) logger.Error("runInfo-not-found-error", err) schedInfosToDelete = append(schedInfosToDelete, DesiredLRPSchedulingInfoSchemaPath(guid)) } else { desiredLRP := models.NewDesiredLRP(*schedulingInfo, *runInfo) desireds[guid] = &desiredLRP } } db.batchDeleteNodes(schedInfosToDelete, logger) // Check to see if we have orphaned RunInfos if len(runInfos) != len(schedulingInfos) { var runInfosToDelete []string for guid, runInfo := range runInfos { // If there is no corresponding SchedulingInfo and the RunInfo has // existed for longer than desiredLRPCreationTimeout, consider it orphaned // and delete it. _, ok := schedulingInfos[guid] if !ok && db.clock.Since(time.Unix(0, runInfo.CreatedAt)) > db.desiredLRPCreationTimeout { orphanedRunInfosMetric.Add(1) runInfosToDelete = append(runInfosToDelete, DesiredLRPRunInfoSchemaPath(guid)) } } db.batchDeleteNodes(runInfosToDelete, logger) } return desireds, nil }