// OnRPC should be called on each RPC that we want to track. func (tracker *FleetTracker) OnRPC(rpcEvent *RPCEvent) error { tracker.lock.RLock() servingTracker, ok := tracker.services[rpcEvent.ServingID] tracker.lock.RUnlock() if !ok { // Handling new service. tracker.lock.Lock() // Make sure nothing changed when we switched to writing. _, ok := tracker.services[rpcEvent.ServingID] if ok { tracker.lock.Unlock() // Something changed while we switched locks. Try again. return tracker.OnRPC(rpcEvent) } codeDir := dockerutil.CodeDirPath( rpcEvent.Environment, rpcEvent.Service, rpcEvent.CodeVersion) leverConfig, err := core.ReadLeverConfig(codeDir) if err != nil { tracker.lock.Unlock() return err } servingTracker = NewLoadTracker( rpcEvent.ServingID, leverConfig.MaxInstanceLoad, leverConfig.MinInstances, rpcEvent.SessionID) tracker.services[rpcEvent.ServingID] = servingTracker go tracker.monitorServiceResource(rpcEvent.SessionID) tracker.lock.Unlock() } delta := servingTracker.OnRPC(rpcEvent.RpcNanos) if delta == 0 { return nil } else if delta > 0 { return tracker.scaleUp(delta, rpcEvent) } else { return tracker.scaleDown(-delta, rpcEvent) } }
// DeployServiceDir deploys provided dir onto Lever, as a Lever service. func DeployServiceDir( adminEnv string, host string, env string, srcDir string) error { client, err := api.NewClient() if err != nil { return err } if host != "" { client.ForceHost = host } info, err := os.Lstat(srcDir) if err != nil { return fmt.Errorf("Error getting info about source dir: %v", err) } if !info.IsDir() { return fmt.Errorf("Source is not a directory") } _, err = core.ReadLeverConfig(srcDir) if err != nil { return fmt.Errorf("Error reading lever.json: %v", err) } pipeReader, pipeWriter := io.Pipe() bufReader := bufio.NewReader(pipeReader) errCh := make(chan error) go func() { tarErr := leverutil.Tar(pipeWriter, srcDir) if tarErr != nil { errCh <- fmt.Errorf("Error trying to tar dir: %v", tarErr) return } tarErr = pipeWriter.Close() if tarErr != nil { errCh <- tarErr } close(errCh) }() endpoint := client.Service(adminEnv, "admin") stream, err := endpoint.InvokeChan("DeployServiceChan", env) if err != nil { return fmt.Errorf("Error invoking DeployServiceChan: %v", err) } chunk := make([]byte, 32*1024) for { var size int size, err = bufReader.Read(chunk) if err == io.EOF { break } if err != nil { stream.Close() return err } err = stream.Send(chunk[:size]) if err != nil { stream.Close() return err } } err = stream.Close() if err != nil { return err } err, hasErr := <-errCh if hasErr && err != nil { return err } // Wait for remote to close stream. var msg interface{} err = stream.Receive(&msg) if err == nil { return fmt.Errorf("Unexpected message received") } if err != nil && err != io.EOF { return err } return nil }
func (finder *Finder) newInstance( env, service string, version int64) ( levInstTarget *LevInstTarget, targetNode string, levInstResource *scale.Resource, success bool, err error) { // Note: This process only takes place for the very first instance within // a service. The non-first instances do not have a resource entry // and they are found via service lookup through DNS. levInstResourceID := makeLevInstResourceID(env, service, version) levInstResource, success, err = scale.ConstructResource( leverutil.ServiceFlag.Get(), levInstResourceID, InstanceConstructTimeoutFlag.Get()) if err != nil { finder.logger.WithFields("err", err).Error( "Failed to construct new instance") return nil, "", nil, false, err } if !success { // Failed to get lock. Someone else constructed instance. levInstTarget = new(LevInstTarget) err = json.Unmarshal([]byte(levInstResource.GetTarget()), levInstTarget) if err != nil { finder.logger.WithFields("err", err).Panic("Failed to decode json") } finder.logger.WithFields( "leverEnv", env, "leverService", service, "leverInstanceID", levInstTarget.InstanceID, ).Debug("Reusing instance") return levInstTarget, levInstResource.GetTargetNode(), levInstResource, false, nil } // Read the entry point from the config. codeDir := dockerutil.CodeDirPath(env, service, version) leverConfig, err := core.ReadLeverConfig(codeDir) if err != nil { finder.logger.WithFields("err", err).Error( "Failed to read lever.json") return nil, "", nil, false, err } // TODO: If somehow first RPC fails and service no longer // contacted afterwards, then the container remains hanging // forever. Need a way to kill it / make it expire after a // while for this situation. // Idea: Poll docker every ~30s for instances that we are not // handling and register those. instanceID := leverutil.RandomID() // TODO: Collisions possible. leverURL := &core.LeverURL{ Environment: env, Service: service, } isAdmin := core.IsAdmin(leverURL) containerID, node, err := dockerutil.StartDockerContainer( finder.docker, env, service, instanceID, version, isAdmin, leverConfig) if err != nil { finder.logger.WithFields( "err", err, "leverEnv", env, "leverService", service, "leverInstanceID", instanceID, ).Error("Unable to start container") return nil, "", nil, false, err } hostAddr, err := GetHostAddrOnNode(node) if err != nil { finder.logger.WithFields("err", err).Error( "Failed to get Host addr on node") return nil, "", nil, false, err } finder.logger.WithFields( "leverEnv", env, "leverService", service, "leverInstanceID", instanceID, "containerID", containerID, "node", node, ).Debug("Creating new instance") levInstTarget = &LevInstTarget{ HostAddr: hostAddr, InstanceID: instanceID, ContainerID: containerID, } target, err := json.Marshal(levInstTarget) if err != nil { finder.logger.WithFields("err", err).Panic("Failed to encode json") } err = levInstResource.DoneConstructing( string(target), node, 2*hostman.InstanceExpiryTimeFlag.Get()) if err != nil { finder.logger.WithFields("err", err).Error( "Failed to register new instance") return nil, "", nil, false, err } return levInstTarget, node, levInstResource, true, nil }
func (tracker *FleetTracker) scaleUp(delta int, rpcEvent *RPCEvent) error { logger.WithFields( "leverEnv", rpcEvent.Environment, "leverService", rpcEvent.Service, "codeVersion", rpcEvent.CodeVersion, "servingID", rpcEvent.ServingID, "deltaInstances", delta, ).Info("Scaling up") // Read the entry point from the config. codeDir := dockerutil.CodeDirPath( rpcEvent.Environment, rpcEvent.Service, rpcEvent.CodeVersion) leverConfig, err := core.ReadLeverConfig(codeDir) if err != nil { return err } // Spin up. hadErrors := false for i := 0; i < delta; i++ { instanceID := leverutil.RandomID() containerID, node, err := dockerutil.StartDockerContainer( tracker.docker, rpcEvent.Environment, rpcEvent.Service, instanceID, rpcEvent.CodeVersion, rpcEvent.IsAdmin, leverConfig) if err != nil { logger.WithFields( "err", err, "leverEnv", rpcEvent.Environment, "leverService", rpcEvent.Service, "codeVersion", rpcEvent.CodeVersion, "servingID", rpcEvent.ServingID, ).Error("Error starting docker container") hadErrors = true continue } err = hostman.InitializeInstance( tracker.grpcPool, &hostman.InstanceInfo{ Environment: rpcEvent.Environment, Service: rpcEvent.Service, InstanceID: instanceID, ContainerID: containerID, ServingID: rpcEvent.ServingID, LevInstResourceID: "", LevInstSessionID: "", }, node) if err != nil { logger.WithFields( "err", err, "leverEnv", rpcEvent.Environment, "leverService", rpcEvent.Service, "codeVersion", rpcEvent.CodeVersion, "servingID", rpcEvent.ServingID, "node", node, "leverInstanceID", instanceID, ).Error("Failed to initialize instance remotely") hadErrors = true continue } } if hadErrors { return fmt.Errorf("There were errors during scale down") } return nil }
// DeployServiceChan deploys the service with provided name onto Lever. If the // service already exists, it is replaced. The method expects a code package // in the form of chunks over stream messages to be sent over. func (admin *Admin) DeployServiceChan( stream leverapi.Stream, envName string) error { // TODO: Auth layer. // Untar to a temp dir. tmpDir := tmpDir(envName) err := os.MkdirAll(tmpDir, 0777) if err != nil { logger.WithFields("err", err).Error( "Cannot create tmp dir for untar") return errInternal } success := false defer func() { if !success { remErr := os.RemoveAll(tmpDir) if remErr != nil { logger.WithFields("err", err).Error( "Error trying to remove tmp dir after failure to untar") } } }() pipeReader, pipeWriter := io.Pipe() bufReader := bufio.NewReader(pipeReader) doneCh := make(chan error, 1) go func() { untarErr := leverutil.Untar(bufReader, tmpDir) if untarErr != nil { doneCh <- untarErr } close(doneCh) }() totalSize := 0 for { var chunk []byte err = stream.Receive(&chunk) if err == io.EOF { break } if err != nil { logger.WithFields("err", err).Error("Error receiving chunks") return errInternal } var chunkSize int chunkSize, err = pipeWriter.Write(chunk) if err != nil { logger.WithFields("err", err).Error( "Error forwarding chunk to untar") return errInternal } // TODO: Also limit total size of decompressed files. totalSize += chunkSize if totalSize > maxUploadSize { logger.Error("File being uploaded is too large") return errTooLarge } // Check if there's been an untar error. select { case untarErr, hasErr := <-doneCh: if !hasErr { continue } logger.WithFields("err", untarErr).Error("Error trying to untar") return errUntar default: } } err = pipeWriter.Close() if err != nil { logger.WithFields("err", err).Error("Error closing pipe") return errInternal } // Wait for untar to finish. untarErr, hasErr := <-doneCh if hasErr && untarErr != nil { logger.WithFields("err", untarErr).Error("Error trying to untar") return errUntar } // Read lever.json. leverConfig, err := core.ReadLeverConfig(tmpDir) if err != nil { logger.WithFields("err", err).Error("Error trying to read lever.json") return errLeverJSON } // Init service table entry. createErr := store.NewService( admin.as, envName, leverConfig.Service, leverConfig.Description, !leverConfig.IsPrivate) // Ignore createErr here in case it already exists. codeVersion, err := store.NewServiceCodeVersion( admin.as, envName, leverConfig.Service) if err != nil { if createErr != nil { logger.WithFields("err", createErr).Error("Error creating service") return errInternal } logger.WithFields("err", err).Error("Error getting new service version") return errInternal } // Everything worked so far. Move the code to the right place. targetDir := codeDir(envName, leverConfig.Service, codeVersion) parentDir := filepath.Dir(targetDir) err = os.MkdirAll(parentDir, 0777) if err != nil { logger.WithFields("err", err).Error( "Cannot create target dir before move") return errInternal } err = os.Rename(tmpDir, targetDir) if err != nil { logger.WithFields("err", err).Error( "Error trying to move new service to its final destination") return errInternal } // Update entry in service table, making it point to the newly uploaded // version. err = store.UpdateService( admin.as, envName, leverConfig.Service, leverConfig.Description, !leverConfig.IsPrivate, codeVersion) if err != nil { logger.WithFields("err", err).Error("Error trying to update service") return errInternal } // TODO: Remove older versions of code to avoid having them pile up forever. success = true stream.Close() return nil }