Beispiel #1
0
// OnRPC should be called on each RPC that we want to track.
func (tracker *FleetTracker) OnRPC(rpcEvent *RPCEvent) error {
	tracker.lock.RLock()
	servingTracker, ok := tracker.services[rpcEvent.ServingID]
	tracker.lock.RUnlock()
	if !ok {
		// Handling new service.
		tracker.lock.Lock()
		// Make sure nothing changed when we switched to writing.
		_, ok := tracker.services[rpcEvent.ServingID]
		if ok {
			tracker.lock.Unlock()
			// Something changed while we switched locks. Try again.
			return tracker.OnRPC(rpcEvent)
		}

		codeDir := dockerutil.CodeDirPath(
			rpcEvent.Environment, rpcEvent.Service, rpcEvent.CodeVersion)
		leverConfig, err := core.ReadLeverConfig(codeDir)
		if err != nil {
			tracker.lock.Unlock()
			return err
		}
		servingTracker = NewLoadTracker(
			rpcEvent.ServingID, leverConfig.MaxInstanceLoad,
			leverConfig.MinInstances, rpcEvent.SessionID)
		tracker.services[rpcEvent.ServingID] = servingTracker
		go tracker.monitorServiceResource(rpcEvent.SessionID)
		tracker.lock.Unlock()
	}

	delta := servingTracker.OnRPC(rpcEvent.RpcNanos)
	if delta == 0 {
		return nil
	} else if delta > 0 {
		return tracker.scaleUp(delta, rpcEvent)
	} else {
		return tracker.scaleDown(-delta, rpcEvent)
	}
}
Beispiel #2
0
// DeployServiceDir deploys provided dir onto Lever, as a Lever service.
func DeployServiceDir(
	adminEnv string, host string, env string, srcDir string) error {
	client, err := api.NewClient()
	if err != nil {
		return err
	}
	if host != "" {
		client.ForceHost = host
	}

	info, err := os.Lstat(srcDir)
	if err != nil {
		return fmt.Errorf("Error getting info about source dir: %v", err)
	}
	if !info.IsDir() {
		return fmt.Errorf("Source is not a directory")
	}

	_, err = core.ReadLeverConfig(srcDir)
	if err != nil {
		return fmt.Errorf("Error reading lever.json: %v", err)
	}

	pipeReader, pipeWriter := io.Pipe()
	bufReader := bufio.NewReader(pipeReader)
	errCh := make(chan error)
	go func() {
		tarErr := leverutil.Tar(pipeWriter, srcDir)
		if tarErr != nil {
			errCh <- fmt.Errorf("Error trying to tar dir: %v", tarErr)
			return
		}
		tarErr = pipeWriter.Close()
		if tarErr != nil {
			errCh <- tarErr
		}
		close(errCh)
	}()

	endpoint := client.Service(adminEnv, "admin")
	stream, err := endpoint.InvokeChan("DeployServiceChan", env)
	if err != nil {
		return fmt.Errorf("Error invoking DeployServiceChan: %v", err)
	}

	chunk := make([]byte, 32*1024)
	for {
		var size int
		size, err = bufReader.Read(chunk)
		if err == io.EOF {
			break
		}
		if err != nil {
			stream.Close()
			return err
		}

		err = stream.Send(chunk[:size])
		if err != nil {
			stream.Close()
			return err
		}
	}
	err = stream.Close()
	if err != nil {
		return err
	}
	err, hasErr := <-errCh
	if hasErr && err != nil {
		return err
	}
	// Wait for remote to close stream.
	var msg interface{}
	err = stream.Receive(&msg)
	if err == nil {
		return fmt.Errorf("Unexpected message received")
	}
	if err != nil && err != io.EOF {
		return err
	}
	return nil
}
Beispiel #3
0
func (finder *Finder) newInstance(
	env, service string, version int64) (
	levInstTarget *LevInstTarget, targetNode string,
	levInstResource *scale.Resource, success bool, err error) {
	// Note: This process only takes place for the very first instance within
	//       a service. The non-first instances do not have a resource entry
	//       and they are found via service lookup through DNS.
	levInstResourceID := makeLevInstResourceID(env, service, version)
	levInstResource, success, err = scale.ConstructResource(
		leverutil.ServiceFlag.Get(), levInstResourceID,
		InstanceConstructTimeoutFlag.Get())
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to construct new instance")
		return nil, "", nil, false, err
	}
	if !success {
		// Failed to get lock. Someone else constructed instance.
		levInstTarget = new(LevInstTarget)
		err = json.Unmarshal([]byte(levInstResource.GetTarget()), levInstTarget)
		if err != nil {
			finder.logger.WithFields("err", err).Panic("Failed to decode json")
		}
		finder.logger.WithFields(
			"leverEnv", env,
			"leverService", service,
			"leverInstanceID", levInstTarget.InstanceID,
		).Debug("Reusing instance")
		return levInstTarget, levInstResource.GetTargetNode(), levInstResource,
			false, nil
	}

	// Read the entry point from the config.
	codeDir := dockerutil.CodeDirPath(env, service, version)
	leverConfig, err := core.ReadLeverConfig(codeDir)
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to read lever.json")
		return nil, "", nil, false, err
	}

	// TODO: If somehow first RPC fails and service no longer
	//       contacted afterwards, then the container remains hanging
	//       forever. Need a way to kill it / make it expire after a
	//       while for this situation.
	//       Idea: Poll docker every ~30s for instances that we are not
	//       handling and register those.
	instanceID := leverutil.RandomID() // TODO: Collisions possible.
	leverURL := &core.LeverURL{
		Environment: env,
		Service:     service,
	}
	isAdmin := core.IsAdmin(leverURL)
	containerID, node, err := dockerutil.StartDockerContainer(
		finder.docker, env, service, instanceID, version, isAdmin, leverConfig)
	if err != nil {
		finder.logger.WithFields(
			"err", err,
			"leverEnv", env,
			"leverService", service,
			"leverInstanceID", instanceID,
		).Error("Unable to start container")
		return nil, "", nil, false, err
	}

	hostAddr, err := GetHostAddrOnNode(node)
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to get Host addr on node")
		return nil, "", nil, false, err
	}

	finder.logger.WithFields(
		"leverEnv", env,
		"leverService", service,
		"leverInstanceID", instanceID,
		"containerID", containerID,
		"node", node,
	).Debug("Creating new instance")

	levInstTarget = &LevInstTarget{
		HostAddr:    hostAddr,
		InstanceID:  instanceID,
		ContainerID: containerID,
	}
	target, err := json.Marshal(levInstTarget)
	if err != nil {
		finder.logger.WithFields("err", err).Panic("Failed to encode json")
	}

	err = levInstResource.DoneConstructing(
		string(target), node, 2*hostman.InstanceExpiryTimeFlag.Get())
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to register new instance")
		return nil, "", nil, false, err
	}

	return levInstTarget, node, levInstResource, true, nil
}
Beispiel #4
0
func (tracker *FleetTracker) scaleUp(delta int, rpcEvent *RPCEvent) error {
	logger.WithFields(
		"leverEnv", rpcEvent.Environment,
		"leverService", rpcEvent.Service,
		"codeVersion", rpcEvent.CodeVersion,
		"servingID", rpcEvent.ServingID,
		"deltaInstances", delta,
	).Info("Scaling up")

	// Read the entry point from the config.
	codeDir := dockerutil.CodeDirPath(
		rpcEvent.Environment, rpcEvent.Service, rpcEvent.CodeVersion)
	leverConfig, err := core.ReadLeverConfig(codeDir)
	if err != nil {
		return err
	}

	// Spin up.
	hadErrors := false
	for i := 0; i < delta; i++ {
		instanceID := leverutil.RandomID()
		containerID, node, err := dockerutil.StartDockerContainer(
			tracker.docker, rpcEvent.Environment, rpcEvent.Service,
			instanceID, rpcEvent.CodeVersion, rpcEvent.IsAdmin, leverConfig)
		if err != nil {
			logger.WithFields(
				"err", err,
				"leverEnv", rpcEvent.Environment,
				"leverService", rpcEvent.Service,
				"codeVersion", rpcEvent.CodeVersion,
				"servingID", rpcEvent.ServingID,
			).Error("Error starting docker container")
			hadErrors = true
			continue
		}

		err = hostman.InitializeInstance(
			tracker.grpcPool,
			&hostman.InstanceInfo{
				Environment:       rpcEvent.Environment,
				Service:           rpcEvent.Service,
				InstanceID:        instanceID,
				ContainerID:       containerID,
				ServingID:         rpcEvent.ServingID,
				LevInstResourceID: "",
				LevInstSessionID:  "",
			}, node)
		if err != nil {
			logger.WithFields(
				"err", err,
				"leverEnv", rpcEvent.Environment,
				"leverService", rpcEvent.Service,
				"codeVersion", rpcEvent.CodeVersion,
				"servingID", rpcEvent.ServingID,
				"node", node,
				"leverInstanceID", instanceID,
			).Error("Failed to initialize instance remotely")
			hadErrors = true
			continue
		}
	}
	if hadErrors {
		return fmt.Errorf("There were errors during scale down")
	}
	return nil
}
Beispiel #5
0
// DeployServiceChan deploys the service with provided name onto Lever. If the
// service already exists, it is replaced. The method expects a code package
// in the form of chunks over stream messages to be sent over.
func (admin *Admin) DeployServiceChan(
	stream leverapi.Stream, envName string) error {
	// TODO: Auth layer.

	// Untar to a temp dir.
	tmpDir := tmpDir(envName)
	err := os.MkdirAll(tmpDir, 0777)
	if err != nil {
		logger.WithFields("err", err).Error(
			"Cannot create tmp dir for untar")
		return errInternal
	}

	success := false
	defer func() {
		if !success {
			remErr := os.RemoveAll(tmpDir)
			if remErr != nil {
				logger.WithFields("err", err).Error(
					"Error trying to remove tmp dir after failure to untar")
			}
		}
	}()

	pipeReader, pipeWriter := io.Pipe()
	bufReader := bufio.NewReader(pipeReader)
	doneCh := make(chan error, 1)
	go func() {
		untarErr := leverutil.Untar(bufReader, tmpDir)
		if untarErr != nil {
			doneCh <- untarErr
		}
		close(doneCh)
	}()

	totalSize := 0
	for {
		var chunk []byte
		err = stream.Receive(&chunk)
		if err == io.EOF {
			break
		}
		if err != nil {
			logger.WithFields("err", err).Error("Error receiving chunks")
			return errInternal
		}

		var chunkSize int
		chunkSize, err = pipeWriter.Write(chunk)
		if err != nil {
			logger.WithFields("err", err).Error(
				"Error forwarding chunk to untar")
			return errInternal
		}

		// TODO: Also limit total size of decompressed files.
		totalSize += chunkSize
		if totalSize > maxUploadSize {
			logger.Error("File being uploaded is too large")
			return errTooLarge
		}

		// Check if there's been an untar error.
		select {
		case untarErr, hasErr := <-doneCh:
			if !hasErr {
				continue
			}
			logger.WithFields("err", untarErr).Error("Error trying to untar")
			return errUntar
		default:
		}
	}
	err = pipeWriter.Close()
	if err != nil {
		logger.WithFields("err", err).Error("Error closing pipe")
		return errInternal
	}
	// Wait for untar to finish.
	untarErr, hasErr := <-doneCh
	if hasErr && untarErr != nil {
		logger.WithFields("err", untarErr).Error("Error trying to untar")
		return errUntar
	}

	// Read lever.json.
	leverConfig, err := core.ReadLeverConfig(tmpDir)
	if err != nil {
		logger.WithFields("err", err).Error("Error trying to read lever.json")
		return errLeverJSON
	}

	// Init service table entry.
	createErr := store.NewService(
		admin.as, envName, leverConfig.Service, leverConfig.Description,
		!leverConfig.IsPrivate)
	// Ignore createErr here in case it already exists.
	codeVersion, err := store.NewServiceCodeVersion(
		admin.as, envName, leverConfig.Service)
	if err != nil {
		if createErr != nil {
			logger.WithFields("err", createErr).Error("Error creating service")
			return errInternal
		}
		logger.WithFields("err", err).Error("Error getting new service version")
		return errInternal
	}

	// Everything worked so far. Move the code to the right place.
	targetDir := codeDir(envName, leverConfig.Service, codeVersion)
	parentDir := filepath.Dir(targetDir)
	err = os.MkdirAll(parentDir, 0777)
	if err != nil {
		logger.WithFields("err", err).Error(
			"Cannot create target dir before move")
		return errInternal
	}
	err = os.Rename(tmpDir, targetDir)
	if err != nil {
		logger.WithFields("err", err).Error(
			"Error trying to move new service to its final destination")
		return errInternal
	}

	// Update entry in service table, making it point to the newly uploaded
	// version.
	err = store.UpdateService(
		admin.as, envName, leverConfig.Service, leverConfig.Description,
		!leverConfig.IsPrivate, codeVersion)
	if err != nil {
		logger.WithFields("err", err).Error("Error trying to update service")
		return errInternal
	}

	// TODO: Remove older versions of code to avoid having them pile up forever.
	success = true
	stream.Close()
	return nil
}