func (tracker *FleetTracker) scaleUp(delta int, rpcEvent *RPCEvent) error { logger.WithFields( "leverEnv", rpcEvent.Environment, "leverService", rpcEvent.Service, "codeVersion", rpcEvent.CodeVersion, "servingID", rpcEvent.ServingID, "deltaInstances", delta, ).Info("Scaling up") // Read the entry point from the config. codeDir := dockerutil.CodeDirPath( rpcEvent.Environment, rpcEvent.Service, rpcEvent.CodeVersion) leverConfig, err := core.ReadLeverConfig(codeDir) if err != nil { return err } // Spin up. hadErrors := false for i := 0; i < delta; i++ { instanceID := leverutil.RandomID() containerID, node, err := dockerutil.StartDockerContainer( tracker.docker, rpcEvent.Environment, rpcEvent.Service, instanceID, rpcEvent.CodeVersion, rpcEvent.IsAdmin, leverConfig) if err != nil { logger.WithFields( "err", err, "leverEnv", rpcEvent.Environment, "leverService", rpcEvent.Service, "codeVersion", rpcEvent.CodeVersion, "servingID", rpcEvent.ServingID, ).Error("Error starting docker container") hadErrors = true continue } err = hostman.InitializeInstance( tracker.grpcPool, &hostman.InstanceInfo{ Environment: rpcEvent.Environment, Service: rpcEvent.Service, InstanceID: instanceID, ContainerID: containerID, ServingID: rpcEvent.ServingID, LevInstResourceID: "", LevInstSessionID: "", }, node) if err != nil { logger.WithFields( "err", err, "leverEnv", rpcEvent.Environment, "leverService", rpcEvent.Service, "codeVersion", rpcEvent.CodeVersion, "servingID", rpcEvent.ServingID, "node", node, "leverInstanceID", instanceID, ).Error("Failed to initialize instance remotely") hadErrors = true continue } } if hadErrors { return fmt.Errorf("There were errors during scale down") } return nil }
func (finder *Finder) newInstance( env, service string, version int64) ( levInstTarget *LevInstTarget, targetNode string, levInstResource *scale.Resource, success bool, err error) { // Note: This process only takes place for the very first instance within // a service. The non-first instances do not have a resource entry // and they are found via service lookup through DNS. levInstResourceID := makeLevInstResourceID(env, service, version) levInstResource, success, err = scale.ConstructResource( leverutil.ServiceFlag.Get(), levInstResourceID, InstanceConstructTimeoutFlag.Get()) if err != nil { finder.logger.WithFields("err", err).Error( "Failed to construct new instance") return nil, "", nil, false, err } if !success { // Failed to get lock. Someone else constructed instance. levInstTarget = new(LevInstTarget) err = json.Unmarshal([]byte(levInstResource.GetTarget()), levInstTarget) if err != nil { finder.logger.WithFields("err", err).Panic("Failed to decode json") } finder.logger.WithFields( "leverEnv", env, "leverService", service, "leverInstanceID", levInstTarget.InstanceID, ).Debug("Reusing instance") return levInstTarget, levInstResource.GetTargetNode(), levInstResource, false, nil } // Read the entry point from the config. codeDir := dockerutil.CodeDirPath(env, service, version) leverConfig, err := core.ReadLeverConfig(codeDir) if err != nil { finder.logger.WithFields("err", err).Error( "Failed to read lever.json") return nil, "", nil, false, err } // TODO: If somehow first RPC fails and service no longer // contacted afterwards, then the container remains hanging // forever. Need a way to kill it / make it expire after a // while for this situation. // Idea: Poll docker every ~30s for instances that we are not // handling and register those. instanceID := leverutil.RandomID() // TODO: Collisions possible. leverURL := &core.LeverURL{ Environment: env, Service: service, } isAdmin := core.IsAdmin(leverURL) containerID, node, err := dockerutil.StartDockerContainer( finder.docker, env, service, instanceID, version, isAdmin, leverConfig) if err != nil { finder.logger.WithFields( "err", err, "leverEnv", env, "leverService", service, "leverInstanceID", instanceID, ).Error("Unable to start container") return nil, "", nil, false, err } hostAddr, err := GetHostAddrOnNode(node) if err != nil { finder.logger.WithFields("err", err).Error( "Failed to get Host addr on node") return nil, "", nil, false, err } finder.logger.WithFields( "leverEnv", env, "leverService", service, "leverInstanceID", instanceID, "containerID", containerID, "node", node, ).Debug("Creating new instance") levInstTarget = &LevInstTarget{ HostAddr: hostAddr, InstanceID: instanceID, ContainerID: containerID, } target, err := json.Marshal(levInstTarget) if err != nil { finder.logger.WithFields("err", err).Panic("Failed to encode json") } err = levInstResource.DoneConstructing( string(target), node, 2*hostman.InstanceExpiryTimeFlag.Get()) if err != nil { finder.logger.WithFields("err", err).Error( "Failed to register new instance") return nil, "", nil, false, err } return levInstTarget, node, levInstResource, true, nil }