Example #1
0
// OnRPC should be called on each RPC that we want to track.
func (tracker *FleetTracker) OnRPC(rpcEvent *RPCEvent) error {
	tracker.lock.RLock()
	servingTracker, ok := tracker.services[rpcEvent.ServingID]
	tracker.lock.RUnlock()
	if !ok {
		// Handling new service.
		tracker.lock.Lock()
		// Make sure nothing changed when we switched to writing.
		_, ok := tracker.services[rpcEvent.ServingID]
		if ok {
			tracker.lock.Unlock()
			// Something changed while we switched locks. Try again.
			return tracker.OnRPC(rpcEvent)
		}

		codeDir := dockerutil.CodeDirPath(
			rpcEvent.Environment, rpcEvent.Service, rpcEvent.CodeVersion)
		leverConfig, err := core.ReadLeverConfig(codeDir)
		if err != nil {
			tracker.lock.Unlock()
			return err
		}
		servingTracker = NewLoadTracker(
			rpcEvent.ServingID, leverConfig.MaxInstanceLoad,
			leverConfig.MinInstances, rpcEvent.SessionID)
		tracker.services[rpcEvent.ServingID] = servingTracker
		go tracker.monitorServiceResource(rpcEvent.SessionID)
		tracker.lock.Unlock()
	}

	delta := servingTracker.OnRPC(rpcEvent.RpcNanos)
	if delta == 0 {
		return nil
	} else if delta > 0 {
		return tracker.scaleUp(delta, rpcEvent)
	} else {
		return tracker.scaleDown(-delta, rpcEvent)
	}
}
Example #2
0
func (tracker *FleetTracker) scaleUp(delta int, rpcEvent *RPCEvent) error {
	logger.WithFields(
		"leverEnv", rpcEvent.Environment,
		"leverService", rpcEvent.Service,
		"codeVersion", rpcEvent.CodeVersion,
		"servingID", rpcEvent.ServingID,
		"deltaInstances", delta,
	).Info("Scaling up")

	// Read the entry point from the config.
	codeDir := dockerutil.CodeDirPath(
		rpcEvent.Environment, rpcEvent.Service, rpcEvent.CodeVersion)
	leverConfig, err := core.ReadLeverConfig(codeDir)
	if err != nil {
		return err
	}

	// Spin up.
	hadErrors := false
	for i := 0; i < delta; i++ {
		instanceID := leverutil.RandomID()
		containerID, node, err := dockerutil.StartDockerContainer(
			tracker.docker, rpcEvent.Environment, rpcEvent.Service,
			instanceID, rpcEvent.CodeVersion, rpcEvent.IsAdmin, leverConfig)
		if err != nil {
			logger.WithFields(
				"err", err,
				"leverEnv", rpcEvent.Environment,
				"leverService", rpcEvent.Service,
				"codeVersion", rpcEvent.CodeVersion,
				"servingID", rpcEvent.ServingID,
			).Error("Error starting docker container")
			hadErrors = true
			continue
		}

		err = hostman.InitializeInstance(
			tracker.grpcPool,
			&hostman.InstanceInfo{
				Environment:       rpcEvent.Environment,
				Service:           rpcEvent.Service,
				InstanceID:        instanceID,
				ContainerID:       containerID,
				ServingID:         rpcEvent.ServingID,
				LevInstResourceID: "",
				LevInstSessionID:  "",
			}, node)
		if err != nil {
			logger.WithFields(
				"err", err,
				"leverEnv", rpcEvent.Environment,
				"leverService", rpcEvent.Service,
				"codeVersion", rpcEvent.CodeVersion,
				"servingID", rpcEvent.ServingID,
				"node", node,
				"leverInstanceID", instanceID,
			).Error("Failed to initialize instance remotely")
			hadErrors = true
			continue
		}
	}
	if hadErrors {
		return fmt.Errorf("There were errors during scale down")
	}
	return nil
}
Example #3
0
func (finder *Finder) newInstance(
	env, service string, version int64) (
	levInstTarget *LevInstTarget, targetNode string,
	levInstResource *scale.Resource, success bool, err error) {
	// Note: This process only takes place for the very first instance within
	//       a service. The non-first instances do not have a resource entry
	//       and they are found via service lookup through DNS.
	levInstResourceID := makeLevInstResourceID(env, service, version)
	levInstResource, success, err = scale.ConstructResource(
		leverutil.ServiceFlag.Get(), levInstResourceID,
		InstanceConstructTimeoutFlag.Get())
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to construct new instance")
		return nil, "", nil, false, err
	}
	if !success {
		// Failed to get lock. Someone else constructed instance.
		levInstTarget = new(LevInstTarget)
		err = json.Unmarshal([]byte(levInstResource.GetTarget()), levInstTarget)
		if err != nil {
			finder.logger.WithFields("err", err).Panic("Failed to decode json")
		}
		finder.logger.WithFields(
			"leverEnv", env,
			"leverService", service,
			"leverInstanceID", levInstTarget.InstanceID,
		).Debug("Reusing instance")
		return levInstTarget, levInstResource.GetTargetNode(), levInstResource,
			false, nil
	}

	// Read the entry point from the config.
	codeDir := dockerutil.CodeDirPath(env, service, version)
	leverConfig, err := core.ReadLeverConfig(codeDir)
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to read lever.json")
		return nil, "", nil, false, err
	}

	// TODO: If somehow first RPC fails and service no longer
	//       contacted afterwards, then the container remains hanging
	//       forever. Need a way to kill it / make it expire after a
	//       while for this situation.
	//       Idea: Poll docker every ~30s for instances that we are not
	//       handling and register those.
	instanceID := leverutil.RandomID() // TODO: Collisions possible.
	leverURL := &core.LeverURL{
		Environment: env,
		Service:     service,
	}
	isAdmin := core.IsAdmin(leverURL)
	containerID, node, err := dockerutil.StartDockerContainer(
		finder.docker, env, service, instanceID, version, isAdmin, leverConfig)
	if err != nil {
		finder.logger.WithFields(
			"err", err,
			"leverEnv", env,
			"leverService", service,
			"leverInstanceID", instanceID,
		).Error("Unable to start container")
		return nil, "", nil, false, err
	}

	hostAddr, err := GetHostAddrOnNode(node)
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to get Host addr on node")
		return nil, "", nil, false, err
	}

	finder.logger.WithFields(
		"leverEnv", env,
		"leverService", service,
		"leverInstanceID", instanceID,
		"containerID", containerID,
		"node", node,
	).Debug("Creating new instance")

	levInstTarget = &LevInstTarget{
		HostAddr:    hostAddr,
		InstanceID:  instanceID,
		ContainerID: containerID,
	}
	target, err := json.Marshal(levInstTarget)
	if err != nil {
		finder.logger.WithFields("err", err).Panic("Failed to encode json")
	}

	err = levInstResource.DoneConstructing(
		string(target), node, 2*hostman.InstanceExpiryTimeFlag.Get())
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to register new instance")
		return nil, "", nil, false, err
	}

	return levInstTarget, node, levInstResource, true, nil
}