Example #1
0
func (h *DesiredLRPHandler) RemoveDesiredLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) {
	logger = logger.Session("remove-desired-lrp")

	request := &models.RemoveDesiredLRPRequest{}
	response := &models.DesiredLRPLifecycleResponse{}
	defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }()
	defer writeResponse(w, response)

	err := parseRequest(logger, req, request)
	if err != nil {
		response.Error = models.ConvertError(err)
		return
	}
	logger = logger.WithData(lager.Data{"process_guid": request.ProcessGuid})

	desiredLRP, err := h.desiredLRPDB.DesiredLRPByProcessGuid(logger.Session("fetch-desired"), request.ProcessGuid)
	if err != nil {
		response.Error = models.ConvertError(err)
		return
	}

	err = h.desiredLRPDB.RemoveDesiredLRP(logger.Session("remove-desired"), request.ProcessGuid)
	if err != nil {
		response.Error = models.ConvertError(err)
		return
	}

	go h.desiredHub.Emit(models.NewDesiredLRPRemovedEvent(desiredLRP))

	h.stopInstancesFrom(logger, request.ProcessGuid, 0)
}
Example #2
0
// The stager calls this when it wants to claim a completed task.  This ensures that only one
// stager ever attempts to handle a completed task
func (db *ETCDDB) ResolvingTask(logger lager.Logger, taskGuid string) error {
	logger = logger.WithData(lager.Data{"task_guid": taskGuid})

	logger.Info("starting")
	defer logger.Info("finished")

	task, index, err := db.taskByGuidWithIndex(logger, taskGuid)
	if err != nil {
		logger.Error("failed-getting-task", err)
		return err
	}

	err = task.ValidateTransitionTo(models.Task_Resolving)
	if err != nil {
		logger.Error("invalid-state-transition", err)
		return err
	}

	task.UpdatedAt = db.clock.Now().UnixNano()
	task.State = models.Task_Resolving

	value, err := db.serializeModel(logger, task)
	if err != nil {
		return err
	}

	_, err = db.client.CompareAndSwap(TaskSchemaPathByGuid(taskGuid), value, NO_TTL, index)
	if err != nil {
		return ErrorFromEtcdError(logger, err)
	}
	return nil
}
Example #3
0
// The cell calls this when it has finished running the task (be it success or failure)
// stagerTaskBBS will retry this repeatedly if it gets a StoreTimeout error (up to N seconds?)
// This really really shouldn't fail.  If it does, blog about it and walk away. If it failed in a
// consistent way (i.e. key already exists), there's probably a flaw in our design.
func (db *ETCDDB) CompleteTask(logger lager.Logger, taskGuid, cellId string, failed bool, failureReason, result string) (*models.Task, error) {
	logger = logger.WithData(lager.Data{"task_guid": taskGuid, "cell_id": cellId})

	logger.Info("starting")
	defer logger.Info("finished")

	task, index, err := db.taskByGuidWithIndex(logger, taskGuid)
	if err != nil {
		logger.Error("failed-getting-task", err)
		return nil, err
	}

	if task.State == models.Task_Running && task.CellId != cellId {
		err = models.NewRunningOnDifferentCellError(cellId, task.CellId)
		logger.Error("invalid-cell-id", err)
		return nil, err
	}

	if err = task.ValidateTransitionTo(models.Task_Completed); err != nil {
		logger.Error("invalid-state-transition", err)
		return nil, err
	}

	return task, db.completeTask(logger, task, index, failed, failureReason, result)
}
Example #4
0
func (db *ETCDDB) FailActualLRP(logger lager.Logger, key *models.ActualLRPKey, errorMessage string) (*models.ActualLRPGroup, *models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"actual_lrp_key": key, "error_message": errorMessage})
	logger.Info("starting")
	lrp, prevIndex, err := db.rawActualLRPByProcessGuidAndIndex(logger, key.ProcessGuid, key.Index)
	if err != nil {
		logger.Error("failed-to-get-actual-lrp", err)
		return nil, nil, err
	}
	beforeActualLRP := *lrp

	if lrp.State != models.ActualLRPStateUnclaimed {
		return nil, nil, models.ErrActualLRPCannotBeFailed
	}

	lrp.ModificationTag.Increment()
	lrp.PlacementError = errorMessage
	lrp.Since = db.clock.Now().UnixNano()

	lrpData, serialErr := db.serializeModel(logger, lrp)
	if serialErr != nil {
		return nil, nil, serialErr
	}

	_, err = db.client.CompareAndSwap(ActualLRPSchemaPath(key.ProcessGuid, key.Index), lrpData, 0, prevIndex)
	if err != nil {
		logger.Error("failed", err)
		return nil, nil, models.ErrActualLRPCannotBeFailed
	}

	logger.Info("succeeded")
	return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: lrp}, nil
}
Example #5
0
func (db *ETCDDB) DesireTask(logger lager.Logger, taskDef *models.TaskDefinition, taskGuid, domain string) error {
	logger = logger.WithData(lager.Data{"task_guid": taskGuid})
	logger.Info("starting")
	defer logger.Info("finished")

	now := db.clock.Now().UnixNano()
	task := &models.Task{
		TaskDefinition: taskDef,
		TaskGuid:       taskGuid,
		Domain:         domain,
		State:          models.Task_Pending,
		CreatedAt:      now,
		UpdatedAt:      now,
	}

	value, err := db.serializeModel(logger, task)
	if err != nil {
		return err
	}

	logger.Debug("persisting-task")
	_, err = db.client.Create(TaskSchemaPathByGuid(task.TaskGuid), value, NO_TTL)
	if err != nil {
		return ErrorFromEtcdError(logger, err)
	}
	logger.Debug("succeeded-persisting-task")

	return nil
}
Example #6
0
func HandleCompletedTask(logger lager.Logger, httpClient *http.Client, taskDB db.TaskDB, task *models.Task) {
	logger = logger.Session("handle-completed-task", lager.Data{"task_guid": task.TaskGuid})

	if task.CompletionCallbackUrl != "" {
		modelErr := taskDB.ResolvingTask(logger, task.TaskGuid)
		if modelErr != nil {
			logger.Error("marking-task-as-resolving-failed", modelErr)
			return
		}

		logger = logger.WithData(lager.Data{"callback_url": task.CompletionCallbackUrl})

		json, err := json.Marshal(&models.TaskCallbackResponse{
			TaskGuid:      task.TaskGuid,
			Failed:        task.Failed,
			FailureReason: task.FailureReason,
			Result:        task.Result,
			Annotation:    task.Annotation,
			CreatedAt:     task.CreatedAt,
		})
		if err != nil {
			logger.Error("marshalling-task-failed", err)
			return
		}

		var statusCode int

		for i := 0; i < MAX_CB_RETRIES; i++ {
			request, err := http.NewRequest("POST", task.CompletionCallbackUrl, bytes.NewReader(json))
			if err != nil {
				logger.Error("building-request-failed", err)
				return
			}

			request.Header.Set("Content-Type", "application/json")
			response, err := httpClient.Do(request)
			if err != nil {
				matched, _ := regexp.MatchString("Client.Timeout|use of closed network connection", err.Error())
				if matched {
					continue
				}
				logger.Error("doing-request-failed", err)
				return
			}
			defer response.Body.Close()

			statusCode = response.StatusCode
			if shouldResolve(statusCode) {
				modelErr := taskDB.DeleteTask(logger, task.TaskGuid)
				if modelErr != nil {
					logger.Error("delete-task-failed", modelErr)
				}
				return
			}
		}

		logger.Info("callback-failed", lager.Data{"status_code": statusCode})
	}
	return
}
Example #7
0
// The stager calls this when it wants to claim a completed task.  This ensures that only one
// stager ever attempts to handle a completed task
func (db *SQLDB) ResolvingTask(logger lager.Logger, taskGuid string) error {
	logger = logger.WithData(lager.Data{"task_guid": taskGuid})
	logger.Info("starting")
	defer logger.Info("complete")

	return db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		task, err := db.fetchTaskForUpdate(logger, taskGuid, tx)
		if err != nil {
			logger.Error("failed-locking-task", err)
			return err
		}

		if err = task.ValidateTransitionTo(models.Task_Resolving); err != nil {
			logger.Error("invalid-state-transition", err)
			return err
		}

		now := db.clock.Now().UnixNano()
		_, err = db.update(logger, tx, tasksTable,
			SQLAttributes{
				"state":      models.Task_Resolving,
				"updated_at": now,
			},
			"guid = ?", taskGuid,
		)
		if err != nil {
			logger.Error("failed-updating-tasks", err)
			return db.convertSQLError(err)
		}

		return nil
	})
}
Example #8
0
// RemoveDesiredLRP deletes the DesiredLRPSchedulingInfo and the DesiredLRPRunInfo
// from the database. We delete DesiredLRPSchedulingInfo first because the system
// uses it to determine wheter the lrp is present. In the event that only the
// RunInfo fails to delete, the orphaned DesiredLRPRunInfo will be garbage
// collected later by convergence.
func (db *ETCDDB) RemoveDesiredLRP(logger lager.Logger, processGuid string) error {
	logger = logger.WithData(lager.Data{"process_guid": processGuid})
	logger.Info("starting")
	defer logger.Info("complete")

	_, schedulingInfoErr := db.client.Delete(DesiredLRPSchedulingInfoSchemaPath(processGuid), true)
	schedulingInfoErr = ErrorFromEtcdError(logger, schedulingInfoErr)
	if schedulingInfoErr != nil && schedulingInfoErr != models.ErrResourceNotFound {
		logger.Error("failed-deleting-scheduling-info", schedulingInfoErr)
		return schedulingInfoErr
	}

	_, runInfoErr := db.client.Delete(DesiredLRPRunInfoSchemaPath(processGuid), true)
	runInfoErr = ErrorFromEtcdError(logger, runInfoErr)
	if runInfoErr != nil && runInfoErr != models.ErrResourceNotFound {
		logger.Error("failed-deleting-run-info", runInfoErr)
		return runInfoErr
	}

	if schedulingInfoErr == models.ErrResourceNotFound && runInfoErr == models.ErrResourceNotFound {
		// If neither component of the desired LRP exists, don't bother trying to delete running instances
		return models.ErrResourceNotFound
	}

	return nil
}
Example #9
0
// The cell calls this when the user requested to cancel the task
// stagerTaskBBS will retry this repeatedly if it gets a StoreTimeout error (up to N seconds?)
// Will fail if the task has already been cancelled or completed normally
func (db *ETCDDB) CancelTask(logger lager.Logger, taskGuid string) (*models.Task, string, error) {
	logger = logger.WithData(lager.Data{"task_guid": taskGuid})

	logger.Info("starting")
	defer logger.Info("finished")

	task, index, err := db.taskByGuidWithIndex(logger, taskGuid)
	if err != nil {
		logger.Error("failed-to-fetch-task", err)
		return nil, "", err
	}

	if err = task.ValidateTransitionTo(models.Task_Completed); err != nil {
		if task.State != models.Task_Pending {
			logger.Error("invalid-state-transition", err)
			return nil, "", err
		}
	}

	logger.Info("completing-task")
	cellID := task.CellId
	err = db.completeTask(logger, task, index, true, "task was cancelled", "")
	if err != nil {
		logger.Error("failed-completing-task", err)
		return nil, "", err
	}

	logger.Info("succeeded-completing-task")
	return task, cellID, nil
}
Example #10
0
func (db *SQLDB) ActualLRPGroupsByProcessGuid(logger lager.Logger, processGuid string) ([]*models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"process_guid": processGuid})
	logger.Debug("starting")
	defer logger.Debug("complete")

	return db.getActualLRPS(logger, "process_guid = ?", processGuid)
}
Example #11
0
func (db *SQLDB) ClaimActualLRP(logger lager.Logger, processGuid string, index int32, instanceKey *models.ActualLRPInstanceKey) (*models.ActualLRPGroup, *models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"process_guid": processGuid, "index": index, "instance_key": instanceKey})
	logger.Info("starting")
	defer logger.Info("complete")

	var beforeActualLRP models.ActualLRP
	var actualLRP *models.ActualLRP
	err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		var err error
		actualLRP, err = db.fetchActualLRPForUpdate(logger, processGuid, index, false, tx)
		if err != nil {
			logger.Error("failed-fetching-actual-lrp-for-share", err)
			return err
		}
		beforeActualLRP = *actualLRP

		if !actualLRP.AllowsTransitionTo(&actualLRP.ActualLRPKey, instanceKey, models.ActualLRPStateClaimed) {
			logger.Error("cannot-transition-to-claimed", nil, lager.Data{"from_state": actualLRP.State, "same_instance_key": actualLRP.ActualLRPInstanceKey.Equal(instanceKey)})
			return models.ErrActualLRPCannotBeClaimed
		}

		if actualLRP.State == models.ActualLRPStateClaimed && actualLRP.ActualLRPInstanceKey.Equal(instanceKey) {
			return nil
		}

		actualLRP.ModificationTag.Increment()
		actualLRP.State = models.ActualLRPStateClaimed
		actualLRP.ActualLRPInstanceKey = *instanceKey
		actualLRP.PlacementError = ""
		actualLRP.ActualLRPNetInfo = models.ActualLRPNetInfo{}
		actualLRP.Since = db.clock.Now().UnixNano()

		_, err = db.update(logger, tx, actualLRPsTable,
			SQLAttributes{
				"state":                  actualLRP.State,
				"cell_id":                actualLRP.CellId,
				"instance_guid":          actualLRP.InstanceGuid,
				"modification_tag_index": actualLRP.ModificationTag.Index,
				"placement_error":        actualLRP.PlacementError,
				"since":                  actualLRP.Since,
				"net_info":               []byte{},
			},
			"process_guid = ? AND instance_index = ? AND evacuating = ?",
			processGuid, index, false,
		)
		if err != nil {
			logger.Error("failed-claiming-actual-lrp", err)
			return db.convertSQLError(err)
		}

		return nil
	})

	return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: actualLRP}, err
}
Example #12
0
func (db *ETCDDB) StartActualLRP(logger lager.Logger, key *models.ActualLRPKey, instanceKey *models.ActualLRPInstanceKey, netInfo *models.ActualLRPNetInfo) (*models.ActualLRPGroup, *models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{
		"actual_lrp_key":          key,
		"actual_lrp_instance_key": instanceKey,
		"net_info":                netInfo,
	})

	lrp, prevIndex, err := db.rawActualLRPByProcessGuidAndIndex(logger, key.ProcessGuid, key.Index)
	bbsErr := models.ConvertError(err)
	if bbsErr != nil {
		if bbsErr.Type == models.Error_ResourceNotFound {
			lrp, err := db.createRunningActualLRP(logger, key, instanceKey, netInfo)
			return nil, &models.ActualLRPGroup{Instance: lrp}, err
		}
		logger.Error("failed-to-get-actual-lrp", err)
		return nil, nil, err
	}
	beforeActualLRP := *lrp

	if lrp.ActualLRPKey.Equal(key) &&
		lrp.ActualLRPInstanceKey.Equal(instanceKey) &&
		lrp.ActualLRPNetInfo.Equal(netInfo) &&
		lrp.State == models.ActualLRPStateRunning {
		lrpGroup := &models.ActualLRPGroup{Instance: lrp}
		return lrpGroup, lrpGroup, nil
	}

	if !lrp.AllowsTransitionTo(key, instanceKey, models.ActualLRPStateRunning) {
		logger.Error("failed-to-transition-actual-lrp-to-started", nil)
		return nil, nil, models.ErrActualLRPCannotBeStarted
	}

	logger.Info("starting")
	defer logger.Info("completed")

	lrp.ModificationTag.Increment()
	lrp.State = models.ActualLRPStateRunning
	lrp.Since = db.clock.Now().UnixNano()
	lrp.ActualLRPInstanceKey = *instanceKey
	lrp.ActualLRPNetInfo = *netInfo
	lrp.PlacementError = ""

	lrpData, serializeErr := db.serializeModel(logger, lrp)
	if serializeErr != nil {
		return nil, nil, serializeErr
	}

	_, err = db.client.CompareAndSwap(ActualLRPSchemaPath(key.ProcessGuid, key.Index), lrpData, 0, prevIndex)
	if err != nil {
		logger.Error("failed", err)
		return nil, nil, models.ErrActualLRPCannotBeStarted
	}

	return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: lrp}, nil
}
Example #13
0
func (db *SQLDB) UnclaimActualLRP(logger lager.Logger, key *models.ActualLRPKey) (*models.ActualLRPGroup, *models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"key": key})

	var beforeActualLRP models.ActualLRP
	var actualLRP *models.ActualLRP
	processGuid := key.ProcessGuid
	index := key.Index

	err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		var err error
		actualLRP, err = db.fetchActualLRPForUpdate(logger, processGuid, index, false, tx)
		if err != nil {
			logger.Error("failed-fetching-actual-lrp-for-share", err)
			return err
		}
		beforeActualLRP = *actualLRP

		if actualLRP.State == models.ActualLRPStateUnclaimed {
			logger.Debug("already-unclaimed")
			return models.ErrActualLRPCannotBeUnclaimed
		}
		logger.Info("starting")
		defer logger.Info("complete")

		now := db.clock.Now().UnixNano()
		actualLRP.ModificationTag.Increment()
		actualLRP.State = models.ActualLRPStateUnclaimed
		actualLRP.ActualLRPInstanceKey.CellId = ""
		actualLRP.ActualLRPInstanceKey.InstanceGuid = ""
		actualLRP.Since = now
		actualLRP.ActualLRPNetInfo = models.ActualLRPNetInfo{}

		_, err = db.update(logger, tx, actualLRPsTable,
			SQLAttributes{
				"state":                  actualLRP.State,
				"cell_id":                actualLRP.CellId,
				"instance_guid":          actualLRP.InstanceGuid,
				"modification_tag_index": actualLRP.ModificationTag.Index,
				"since":                  actualLRP.Since,
				"net_info":               []byte{},
			},
			"process_guid = ? AND instance_index = ? AND evacuating = ?",
			processGuid, index, false,
		)
		if err != nil {
			logger.Error("failed-to-unclaim-actual-lrp", err)
			return db.convertSQLError(err)
		}

		return nil
	})

	return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: actualLRP}, err
}
Example #14
0
func (db *ETCDDB) CrashActualLRP(logger lager.Logger, key *models.ActualLRPKey, instanceKey *models.ActualLRPInstanceKey, errorMessage string) (*models.ActualLRPGroup, *models.ActualLRPGroup, bool, error) {
	logger = logger.WithData(lager.Data{"actual_lrp_key": key, "actual_lrp_instance_key": instanceKey})
	logger.Info("starting")

	lrp, prevIndex, err := db.rawActualLRPByProcessGuidAndIndex(logger, key.ProcessGuid, key.Index)
	if err != nil {
		logger.Error("failed-to-get-actual-lrp", err)
		return nil, nil, false, err
	}
	beforeActualLRP := *lrp

	latestChangeTime := time.Duration(db.clock.Now().UnixNano() - lrp.Since)

	var newCrashCount int32
	if latestChangeTime > models.CrashResetTimeout && lrp.State == models.ActualLRPStateRunning {
		newCrashCount = 1
	} else {
		newCrashCount = lrp.CrashCount + 1
	}

	logger.Debug("retrieved-lrp")
	if !lrp.AllowsTransitionTo(key, instanceKey, models.ActualLRPStateCrashed) {
		logger.Error("failed-to-transition-to-crashed", nil, lager.Data{"from_state": lrp.State, "same_instance_key": lrp.ActualLRPInstanceKey.Equal(instanceKey)})
		return nil, nil, false, models.ErrActualLRPCannotBeCrashed
	}

	lrp.State = models.ActualLRPStateCrashed
	lrp.Since = db.clock.Now().UnixNano()
	lrp.CrashCount = newCrashCount
	lrp.ActualLRPInstanceKey = models.ActualLRPInstanceKey{}
	lrp.ActualLRPNetInfo = models.EmptyActualLRPNetInfo()
	lrp.ModificationTag.Increment()
	lrp.CrashReason = errorMessage

	var immediateRestart bool
	if lrp.ShouldRestartImmediately(models.NewDefaultRestartCalculator()) {
		lrp.State = models.ActualLRPStateUnclaimed
		immediateRestart = true
	}

	lrpData, serializeErr := db.serializeModel(logger, lrp)
	if serializeErr != nil {
		return nil, nil, false, serializeErr
	}

	_, err = db.client.CompareAndSwap(ActualLRPSchemaPath(key.ProcessGuid, key.Index), lrpData, 0, prevIndex)
	if err != nil {
		logger.Error("failed", err)
		return nil, nil, false, models.ErrActualLRPCannotBeCrashed
	}

	logger.Info("succeeded")
	return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: lrp}, immediateRestart, nil
}
Example #15
0
func (db *ETCDDB) DesiredLRPs(logger lager.Logger, filter models.DesiredLRPFilter) ([]*models.DesiredLRP, error) {
	logger = logger.WithData(lager.Data{"filter": filter})
	logger.Info("start")
	defer logger.Info("complete")

	desireds, _, err := db.desiredLRPs(logger, filter)
	if err != nil {
		logger.Error("failed", err)
	}
	return desireds, err
}
Example #16
0
func (db *SQLDB) DesiredLRPByProcessGuid(logger lager.Logger, processGuid string) (*models.DesiredLRP, error) {
	logger = logger.WithData(lager.Data{"process_guid": processGuid})
	logger.Debug("starting")
	defer logger.Debug("complete")

	row := db.one(logger, db.db, desiredLRPsTable,
		desiredLRPColumns, NoLockRow,
		"process_guid = ?", processGuid,
	)
	return db.fetchDesiredLRP(logger, row)
}
Example #17
0
func (h *DesiredLRPHandler) UpdateDesiredLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) {
	logger = logger.Session("update-desired-lrp")

	request := &models.UpdateDesiredLRPRequest{}
	response := &models.DesiredLRPLifecycleResponse{}
	defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }()
	defer writeResponse(w, response)

	err := parseRequest(logger, req, request)
	if err != nil {
		logger.Error("failed-parsing-request", err)
		response.Error = models.ConvertError(err)
		return
	}

	logger = logger.WithData(lager.Data{"guid": request.ProcessGuid})

	logger.Debug("updating-desired-lrp")
	beforeDesiredLRP, err := h.desiredLRPDB.UpdateDesiredLRP(logger, request.ProcessGuid, request.Update)
	if err != nil {
		logger.Debug("failed-updating-desired-lrp")
		response.Error = models.ConvertError(err)
		return
	}
	logger.Debug("completed-updating-desired-lrp")

	desiredLRP, err := h.desiredLRPDB.DesiredLRPByProcessGuid(logger, request.ProcessGuid)
	if err != nil {
		logger.Error("failed-fetching-desired-lrp", err)
		return
	}

	if request.Update.Instances != nil {
		logger.Debug("updating-lrp-instances")
		previousInstanceCount := beforeDesiredLRP.Instances

		requestedInstances := *request.Update.Instances - previousInstanceCount

		logger = logger.WithData(lager.Data{"instances_delta": requestedInstances})
		if requestedInstances > 0 {
			logger.Debug("increasing-the-instances")
			schedulingInfo := desiredLRP.DesiredLRPSchedulingInfo()
			h.startInstanceRange(logger, previousInstanceCount, *request.Update.Instances, &schedulingInfo)
		}

		if requestedInstances < 0 {
			logger.Debug("decreasing-the-instances")
			numExtraActualLRP := previousInstanceCount + requestedInstances
			h.stopInstancesFrom(logger, request.ProcessGuid, int(numExtraActualLRP))
		}
	}

	go h.desiredHub.Emit(models.NewDesiredLRPChangedEvent(beforeDesiredLRP, desiredLRP))
}
Example #18
0
func (db *ETCDDB) RemoveActualLRP(logger lager.Logger, processGuid string, index int32, instanceKey *models.ActualLRPInstanceKey) error {
	logger = logger.WithData(lager.Data{"process_guid": processGuid, "index": index, "instance_key": instanceKey})

	lrp, prevIndex, err := db.rawActualLRPByProcessGuidAndIndex(logger, processGuid, index)
	if err != nil {
		return err
	}

	if instanceKey != nil && !lrp.ActualLRPInstanceKey.Equal(instanceKey) {
		logger.Debug("not-found", lager.Data{"actual_lrp_instance_key": lrp.ActualLRPInstanceKey, "instance_key": instanceKey})
		return models.ErrResourceNotFound
	}

	return db.removeActualLRP(logger, lrp, prevIndex)
}
Example #19
0
func (db *SQLDB) UpdateDesiredLRP(logger lager.Logger, processGuid string, update *models.DesiredLRPUpdate) (*models.DesiredLRP, error) {
	logger = logger.WithData(lager.Data{"process_guid": processGuid})
	logger.Info("starting")
	defer logger.Info("complete")

	var beforeDesiredLRP *models.DesiredLRP
	err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		var err error
		row := db.one(logger, tx, desiredLRPsTable,
			desiredLRPColumns, LockRow,
			"process_guid = ?", processGuid,
		)
		beforeDesiredLRP, err = db.fetchDesiredLRP(logger, row)
		if err != nil {
			logger.Error("failed-lock-desired", err)
			return err
		}

		updateAttributes := SQLAttributes{"modification_tag_index": beforeDesiredLRP.ModificationTag.Index + 1}

		if update.Annotation != nil {
			updateAttributes["annotation"] = *update.Annotation
		}

		if update.Instances != nil {
			updateAttributes["instances"] = *update.Instances
		}

		if update.Routes != nil {
			encodedData, err := db.encodeRouteData(logger, update.Routes)
			if err != nil {
				return err
			}
			updateAttributes["routes"] = encodedData
		}

		_, err = db.update(logger, tx, desiredLRPsTable, updateAttributes, `process_guid = ?`, processGuid)
		if err != nil {
			logger.Error("failed-executing-query", err)
			return db.convertSQLError(err)
		}

		return nil
	})

	return beforeDesiredLRP, err
}
Example #20
0
func (db *SQLDB) FailActualLRP(logger lager.Logger, key *models.ActualLRPKey, placementError string) (*models.ActualLRPGroup, *models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"actual_lrp_key": key, "placement_error": placementError})
	logger.Info("starting")
	defer logger.Info("complete")

	var beforeActualLRP models.ActualLRP
	var actualLRP *models.ActualLRP

	err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		var err error
		actualLRP, err = db.fetchActualLRPForUpdate(logger, key.ProcessGuid, key.Index, false, tx)
		if err != nil {
			logger.Error("failed-to-get-actual-lrp", err)
			return err
		}
		beforeActualLRP = *actualLRP

		if actualLRP.State != models.ActualLRPStateUnclaimed {
			logger.Error("cannot-fail-actual-lrp", nil, lager.Data{"from_state": actualLRP.State})
			return models.ErrActualLRPCannotBeFailed
		}

		now := db.clock.Now().UnixNano()
		actualLRP.ModificationTag.Increment()
		actualLRP.PlacementError = placementError
		actualLRP.Since = now
		evacuating := false

		_, err = db.update(logger, tx, actualLRPsTable,
			SQLAttributes{
				"modification_tag_index": actualLRP.ModificationTag.Index,
				"placement_error":        truncateString(actualLRP.PlacementError, 1024),
				"since":                  actualLRP.Since,
			},
			"process_guid = ? AND instance_index = ? AND evacuating = ?",
			key.ProcessGuid, key.Index, evacuating,
		)
		if err != nil {
			logger.Error("failed-failing-actual-lrp", err)
			return db.convertSQLError(err)
		}

		return nil
	})

	return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: actualLRP}, err
}
Example #21
0
func (db *ETCDDB) ClaimActualLRP(logger lager.Logger, processGuid string, index int32, instanceKey *models.ActualLRPInstanceKey) (*models.ActualLRPGroup, *models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"process_guid": processGuid, "index": index, "actual_lrp_instance_key": instanceKey})
	logger.Info("starting")

	lrp, prevIndex, err := db.rawActualLRPByProcessGuidAndIndex(logger, processGuid, index)
	if err != nil {
		logger.Error("failed", err)
		return nil, nil, err
	}
	beforeActualLRP := *lrp

	if !lrp.AllowsTransitionTo(&lrp.ActualLRPKey, instanceKey, models.ActualLRPStateClaimed) {
		return nil, nil, models.ErrActualLRPCannotBeClaimed
	}

	if lrp.State == models.ActualLRPStateClaimed && lrp.ActualLRPInstanceKey.Equal(instanceKey) {
		return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: lrp}, nil
	}

	lrp.PlacementError = ""
	lrp.State = models.ActualLRPStateClaimed
	lrp.ActualLRPInstanceKey = *instanceKey
	lrp.ActualLRPNetInfo = models.ActualLRPNetInfo{}
	lrp.ModificationTag.Increment()
	lrp.Since = db.clock.Now().UnixNano()

	err = lrp.Validate()
	if err != nil {
		logger.Error("failed", err)
		return nil, nil, models.NewError(models.Error_InvalidRecord, err.Error())
	}

	lrpData, serializeErr := db.serializeModel(logger, lrp)
	if serializeErr != nil {
		return nil, nil, serializeErr
	}

	_, err = db.client.CompareAndSwap(ActualLRPSchemaPath(processGuid, index), lrpData, 0, prevIndex)
	if err != nil {
		logger.Error("compare-and-swap-failed", err)
		return nil, nil, models.ErrActualLRPCannotBeClaimed
	}
	logger.Info("succeeded")

	return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: lrp}, nil
}
Example #22
0
func (db *SQLDB) ActualLRPGroupByProcessGuidAndIndex(logger lager.Logger, processGuid string, index int32) (*models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"process_guid": processGuid, "index": index})
	logger.Debug("starting")
	defer logger.Debug("complete")

	groups, err := db.getActualLRPS(logger, "process_guid = ? AND instance_index = ?", processGuid, index)
	if err != nil {
		return nil, err
	}

	if len(groups) == 0 {
		logger.Error("failed-to-find-actual-lrp-group", models.ErrResourceNotFound)
		return nil, models.ErrResourceNotFound
	}

	return groups[0], nil
}
Example #23
0
func (db *SQLDB) DesiredLRPSchedulingInfos(logger lager.Logger, filter models.DesiredLRPFilter) ([]*models.DesiredLRPSchedulingInfo, error) {
	logger = logger.WithData(lager.Data{"filter": filter})
	logger.Debug("start")
	defer logger.Debug("complete")

	var wheres []string
	var values []interface{}

	if filter.Domain != "" {
		wheres = append(wheres, "domain = ?")
		values = append(values, filter.Domain)
	}

	results := []*models.DesiredLRPSchedulingInfo{}

	err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		rows, err := db.all(logger, tx, desiredLRPsTable,
			schedulingInfoColumns, NoLockRow,
			strings.Join(wheres, " AND "), values...,
		)
		if err != nil {
			logger.Error("failed-query", err)
			return db.convertSQLError(err)
		}
		defer rows.Close()

		for rows.Next() {
			desiredLRPSchedulingInfo, err := db.fetchDesiredLRPSchedulingInfo(logger, rows)
			if err != nil {
				logger.Error("failed-reading-row", err)
				continue
			}
			results = append(results, desiredLRPSchedulingInfo)
		}

		if rows.Err() != nil {
			logger.Error("failed-fetching-row", rows.Err())
			return db.convertSQLError(rows.Err())
		}

		return nil
	})

	return results, err
}
Example #24
0
func (db *SQLDB) ActualLRPGroups(logger lager.Logger, filter models.ActualLRPFilter) ([]*models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"filter": filter})
	logger.Debug("starting")
	defer logger.Debug("complete")

	var wheres []string
	var values []interface{}

	if filter.Domain != "" {
		wheres = append(wheres, "domain = ?")
		values = append(values, filter.Domain)
	}

	if filter.CellID != "" {
		wheres = append(wheres, "cell_id = ?")
		values = append(values, filter.CellID)
	}
	return db.getActualLRPS(logger, strings.Join(wheres, " AND "), values...)
}
Example #25
0
func (db *SQLDB) CreateUnclaimedActualLRP(logger lager.Logger, key *models.ActualLRPKey) (*models.ActualLRPGroup, error) {
	logger = logger.WithData(lager.Data{"key": key})
	logger.Info("starting")
	defer logger.Info("complete")

	guid, err := db.guidProvider.NextGUID()
	if err != nil {
		logger.Error("failed-to-generate-guid", err)
		return nil, models.ErrGUIDGeneration
	}

	now := db.clock.Now().UnixNano()
	err = db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		_, err := db.insert(logger, db.db, actualLRPsTable,
			SQLAttributes{
				"process_guid":           key.ProcessGuid,
				"instance_index":         key.Index,
				"domain":                 key.Domain,
				"state":                  models.ActualLRPStateUnclaimed,
				"since":                  now,
				"net_info":               []byte{},
				"modification_tag_epoch": guid,
				"modification_tag_index": 0,
			},
		)

		return err
	})

	if err != nil {
		logger.Error("failed-to-create-unclaimed-actual-lrp", err)
		return nil, db.convertSQLError(err)
	}
	return &models.ActualLRPGroup{
		Instance: &models.ActualLRP{
			ActualLRPKey:    *key,
			State:           models.ActualLRPStateUnclaimed,
			Since:           now,
			ModificationTag: models.ModificationTag{Epoch: guid, Index: 0},
		},
	}, nil
}
Example #26
0
func (db *SQLDB) DesiredLRPByProcessGuid(logger lager.Logger, processGuid string) (*models.DesiredLRP, error) {
	logger = logger.WithData(lager.Data{"process_guid": processGuid})
	logger.Debug("starting")
	defer logger.Debug("complete")

	var desiredLRP *models.DesiredLRP

	err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		var err error
		row := db.one(logger, tx, desiredLRPsTable,
			desiredLRPColumns, NoLockRow,
			"process_guid = ?", processGuid,
		)

		desiredLRP, err = db.fetchDesiredLRP(logger, row)
		return err
	})

	return desiredLRP, err
}
Example #27
0
func (db *SQLDB) RemoveDesiredLRP(logger lager.Logger, processGuid string) error {
	logger = logger.WithData(lager.Data{"process_guid": processGuid})
	logger.Info("starting")
	defer logger.Info("complete")

	return db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error {
		err := db.lockDesiredLRPByGuidForUpdate(logger, processGuid, tx)
		if err != nil {
			logger.Error("failed-lock-desired", err)
			return err
		}

		_, err = db.delete(logger, tx, desiredLRPsTable, "process_guid = ?", processGuid)
		if err != nil {
			logger.Error("failed-deleting-from-db", err)
			return db.convertSQLError(err)
		}

		return nil
	})
}
Example #28
0
// The stager calls this when it wants to signal that it has received a completion and is handling it
// stagerTaskBBS will retry this repeatedly if it gets a StoreTimeout error (up to N seconds?)
// If this fails, the stager should assume that someone else is handling the completion and should bail
func (db *ETCDDB) DeleteTask(logger lager.Logger, taskGuid string) error {
	logger = logger.WithData(lager.Data{"task_guid": taskGuid})

	logger.Info("starting")
	defer logger.Info("finished")

	task, _, err := db.taskByGuidWithIndex(logger, taskGuid)
	if err != nil {
		logger.Error("failed-getting-task", err)
		return err
	}

	if task.State != models.Task_Resolving {
		err = models.NewTaskTransitionError(task.State, models.Task_Resolving)
		logger.Error("invalid-state-transition", err)
		return err
	}

	_, err = db.client.Delete(TaskSchemaPathByGuid(taskGuid), false)
	return ErrorFromEtcdError(logger, err)
}
Example #29
0
func (db *ETCDDB) DesiredLRPSchedulingInfos(logger lager.Logger, filter models.DesiredLRPFilter) ([]*models.DesiredLRPSchedulingInfo, error) {
	logger = logger.WithData(lager.Data{"filter": filter})
	logger.Info("start")
	defer logger.Info("complete")

	root, err := db.fetchRecursiveRaw(logger, DesiredLRPSchedulingInfoSchemaRoot)
	bbsErr := models.ConvertError(err)
	if bbsErr != nil {
		if bbsErr.Type == models.Error_ResourceNotFound {
			return []*models.DesiredLRPSchedulingInfo{}, nil
		}
		return nil, err
	}

	schedulingInfoMap, _ := db.deserializeScheduleInfos(logger, root.Nodes, filter)

	schedulingInfos := make([]*models.DesiredLRPSchedulingInfo, 0, len(schedulingInfoMap))
	for _, schedulingInfo := range schedulingInfoMap {
		schedulingInfos = append(schedulingInfos, schedulingInfo)
	}
	return schedulingInfos, nil
}
Example #30
0
func (db *ETCDDB) StartTask(logger lager.Logger, taskGuid, cellID string) (bool, error) {
	logger.Debug("starting")
	defer logger.Debug("finished")

	task, index, err := db.taskByGuidWithIndex(logger, taskGuid)
	if err != nil {
		logger.Error("failed-to-fetch-task", err)
		return false, err
	}

	logger = logger.WithData(lager.Data{"task": task.LagerData()})

	if task.State == models.Task_Running && task.CellId == cellID {
		logger.Info("task-already-running")
		return false, nil
	}

	if err = task.ValidateTransitionTo(models.Task_Running); err != nil {
		return false, err
	}

	task.UpdatedAt = db.clock.Now().UnixNano()
	task.State = models.Task_Running
	task.CellId = cellID

	value, err := db.serializeModel(logger, task)
	if err != nil {
		return false, err
	}

	_, err = db.client.CompareAndSwap(TaskSchemaPathByGuid(taskGuid), value, NO_TTL, index)
	if err != nil {
		logger.Error("failed-persisting-task", err)
		return false, ErrorFromEtcdError(logger, err)
	}

	return true, nil
}