Example #1
0
// EnsureInfrastructureInitialized verifies that all the necessary
// infrastructure is registered, managed and ready to be used.
func (manager *Manager) EnsureInfrastructureInitialized(
	info *InstanceInfo) (
	envNetworkIP string, ownIP string, instanceAddr string,
	keepAlive KeepAliveFun, err error) {
	entry, err := manager.getEnvironment(info.Environment)
	if err != nil {
		return "", "", "", nil, err
	}
	defer entry.envLock.Unlock()

	instance, ok := entry.leverInstances[info.InstanceID]
	if ok {
		// Instance already managed.
		manager.logger.WithFields("leverInstanceID", info.InstanceID).Debug(
			"Instance already managed")
		keepAlive = func(
			resourceName, levResResourceID, levResSessionID string) {
			instance.KeepAlive(resourceName, levResResourceID, levResSessionID)
		}
		return entry.networkIP, entry.ownIP, instance.InstanceAddr(), keepAlive,
			nil
	}
	// Instance is new.
	manager.logger.WithFields("leverInstanceID", info.InstanceID).Debug(
		"Instance is new")
	if info.ContainerID == "" {
		// We don't have the container ID. Cannot initialize instance.
		return "", "", "", nil, fmt.Errorf("Need container ID")
	}

	// Connect the container to the env network bridge so we can talk to it
	// (forward RPCs).
	instanceIPv4, err := dockerutil.ConnectToDockerEnvNetwork(
		manager.docker, info.ContainerID, entry.networkID)
	if err != nil {
		manager.logger.WithFields(
			"err", err,
			"leverEnv", info.Environment,
			"containerID", info.ContainerID,
		).Error("Error connecting instance to env network")
		removeErr := dockerutil.RemoveDockerContainer(
			manager.docker, info.ContainerID)
		if removeErr != nil {
			manager.logger.WithFields(
				"containerID", info.ContainerID,
				"err", removeErr,
			).Error("Error trying to remove container after previous error")
		}
		return "", "", "", nil, err
	}
	instanceAddr = instanceIPv4 + ":" + core.InstanceListenPortFlag.Get()

	leverURL := &core.LeverURL{
		Environment: info.Environment,
		Service:     info.Service,
	}
	if core.IsAdmin(leverURL) {
		// Admin environment. Also connect it to the regional network.
		_, err := dockerutil.ConnectToDockerEnvNetwork(
			manager.docker, info.ContainerID, RegionalNetworkFlag.Get())
		if err != nil {
			manager.logger.WithFields(
				"err", err,
				"leverEnv", info.Environment,
				"containerID", info.ContainerID,
			).Error("Error connecting admin instance to regional network")
			removeErr := dockerutil.RemoveDockerContainer(
				manager.docker, info.ContainerID)
			if removeErr != nil {
				manager.logger.WithFields(
					"containerID", info.ContainerID,
					"err", removeErr,
				).Error(
					"Error trying to remove container after previous error")
			}
			return "", "", "", nil, err
		}
	}

	// Add the instance ID to the local serving ID map.
	manager.servingIDsLock.Lock()
	_, ok = manager.servingIDs[info.ServingID]
	if !ok {
		manager.servingIDs[info.ServingID] = make(map[string]struct{})
	}
	manager.servingIDs[info.ServingID][info.InstanceID] = struct{}{}
	manager.servingIDsLock.Unlock()

	// Start managing instance.
	instance = NewLeverInstance(
		info, instanceAddr, manager.proxyInAddr, manager.grpcPool,
		manager.docker,
		func(instanceID string, err error) {
			manager.logger.WithFields("leverInstanceID", instanceID).Debug(
				"Instance closed")
			manager.onInstanceClose(
				entry, info.Environment, instanceID, info.ServingID, err)
		})
	entry.leverInstances[info.InstanceID] = instance

	keepAlive = func(resourceName, levResResourceID, levResSessionID string) {
		instance.KeepAlive(resourceName, levResResourceID, levResSessionID)
	}
	return entry.networkIP, entry.ownIP, instanceAddr, keepAlive, nil
}
Example #2
0
func (proxy *LeverProxy) handleInStream(stream *http2stream.HTTP2Stream) {
	headers := stream.GetHeaders()
	err := expectHeaders(
		headers,
		"lever-url",
		"x-lever-src-env",
		"x-lever-dest-instance-id",
		"x-lever-dest-container-id",
		"x-lever-serving-id",
		"x-lever-code-version",
		"x-lever-inst-resource-id",
		"x-lever-inst-session-id",
		"x-lever-res-resource-id",
		"x-lever-res-session-id",
	)
	if err != nil {
		proxy.inLogger.WithFields("err", err).Error("")
		stream.Write(&http2stream.MsgError{Err: err})
		return
	}

	leverURL, err := core.ParseLeverURL(headers["lever-url"][0])
	if err != nil {
		proxy.inLogger.WithFields(
			"err", err, "leverURL", headers["lever-url"][0]).Error(
			"Unable to parse Lever URL")
	}

	if !core.IsInternalEnvironment(leverURL.Environment) {
		err = fmt.Errorf("Cannot route to dest env")
		proxy.inLogger.WithFields(
			"err", err,
			"leverEnv", leverURL.Environment,
		).Error("")
		stream.Write(&http2stream.MsgError{Err: err})
		return
	}

	srcEnv := headers["x-lever-src-env"][0]
	instanceID := headers["x-lever-dest-instance-id"][0]
	containerID := headers["x-lever-dest-container-id"][0]
	servingID := headers["x-lever-serving-id"][0]
	codeVersionInt, err := strconv.Atoi(headers["x-lever-code-version"][0])
	if err != nil {
		proxy.inLogger.WithFields("err", err).Error("Cannot parse code version")
		stream.Write(&http2stream.MsgError{Err: err})
		return
	}
	codeVersion := int64(codeVersionInt)
	levInstResourceID := headers["x-lever-inst-resource-id"][0]
	levInstSessionID := headers["x-lever-inst-session-id"][0]
	levResResourceID := headers["x-lever-res-resource-id"][0]
	levResSessionID := headers["x-lever-res-session-id"][0]

	if instanceID == "" {
		instanceID, err = proxy.manager.RandomInstaceID(servingID)
		if err != nil {
			proxy.inLogger.WithFields(
				"err", err,
				"leverEnv", leverURL.Environment,
			).Error("Could not find an instanceID for provided servingID")
			stream.Write(&http2stream.MsgError{Err: err})
			return
		}
	}

	streamID := leverutil.RandomID()
	proxy.inLogger.WithFields(
		"leverURL", leverURL.String(),
		"srcEnv", srcEnv,
		"leverInstanceID", instanceID,
		"containerID", containerID,
		"servingID", servingID,
		"levInstSessionID", levInstSessionID,
		"levInstResourceID", levInstResourceID,
		"streamID", streamID,
	).Debug("Receiving stream")
	streamLogger := proxy.inLogger.WithFields("streamID", streamID)

	if !core.IsInternalEnvironment(leverURL.Environment) {
		err = fmt.Errorf("Environment not routable internally")
		streamLogger.WithFields(
			"err", err,
			"leverEnv", leverURL.Environment,
		).Error("")
		stream.Write(&http2stream.MsgError{Err: err})
		return
	}

	_, ownIP, instanceAddr, keepAliveFun, err :=
		proxy.manager.EnsureInfrastructureInitialized(&hostman.InstanceInfo{
			Environment:       leverURL.Environment,
			Service:           leverURL.Service,
			InstanceID:        instanceID,
			ContainerID:       containerID,
			ServingID:         servingID,
			LevInstResourceID: levInstResourceID,
			LevInstSessionID:  levInstSessionID,
		})
	if err != nil {
		streamLogger.WithFields("err", err).Error(
			"Error initializing instance")
		stream.Write(&http2stream.MsgError{Err: err})
		return
	}

	err = proxy.serveOut(leverURL.Environment, ownIP)
	if err != nil {
		streamLogger.WithFields("err", err).Error(
			"Error listening on env network")
		stream.Write(&http2stream.MsgError{Err: err})
		return
	}

	destStreamI, err := leverutil.ExpBackoff(
		func() (clientStream interface{}, err error, finalErr error) {
			clientStream, err = proxy.client.NewStream(instanceAddr)
			if err != nil {
				if strings.Contains(err.Error(), "connection refused") &&
					proxy.manager.IsInstanceAlive(servingID, instanceID) {
					// Retry.
					return nil, err, nil
				}
				if err == leverutil.ErrNotYetConstructed ||
					err == leverutil.ErrWasDestructed {
					// Retry.
					return nil, err, nil
				}
				return nil, nil, err
			}
			return clientStream, nil, nil
		}, 10*time.Millisecond, 15*time.Second)
	if err != nil {
		streamLogger.WithFields(
			"err", err,
			"instanceAddr", instanceAddr,
		).Error("Error trying to create client stream to dest")
		stream.Write(&http2stream.MsgError{Err: err})
		return
	}
	destStream := destStreamI.(*http2stream.HTTP2Stream)

	addHeaders := make(map[string][]string)
	if srcEnv != "" {
		addHeaders["x-lever-src-env"] = []string{srcEnv}
	}
	addHeaders["x-lever-internal-rpc-gateway"] = []string{
		ownIP + ":" + EnvOutListenPortFlag.Get()}

	startTime := time.Now()
	firstHeaders := true
	stream.ProxyTo(
		destStream,
		func(msg http2stream.MsgItem) []http2stream.MsgItem {
			proxy.client.KeepAlive(instanceAddr)
			keepAliveFun(
				leverURL.Resource, levResResourceID, levResSessionID)
			return proxy.filterTo(&firstHeaders, addHeaders, msg)
		},
		func(msg http2stream.MsgItem) []http2stream.MsgItem {
			proxy.client.KeepAlive(instanceAddr)
			keepAliveFun(
				leverURL.Resource, levResResourceID, levResSessionID)
			return noFilter(msg)
		})
	// Wait for RPC to finish.
	<-destStream.Closed()
	rpcNanos := uint64(time.Now().Sub(startTime).Nanoseconds())
	streamLogger.WithFields("rpcNanos", rpcNanos).Debug("RPC nanos")

	// Send RPC stats to fleettracker.
	// TODO: For services with high load, we should not send info on every
	//       RPC. They should be batched and sent say... every ~50ms
	//       (well below tracker tick interval).
	err = fleettracker.OnRPC(proxy.grpcPool, &fleettracker.RPCEvent{
		Environment: leverURL.Environment,
		Service:     leverURL.Service,
		ServingID:   servingID,
		CodeVersion: codeVersion,
		IsAdmin:     core.IsAdmin(leverURL),
		RpcNanos:    rpcNanos,
	})
	if err != nil {
		streamLogger.WithFields("err", err).Error(
			"Failed to send RPC stats to fleettracker")
	}
}
Example #3
0
func (finder *Finder) newInstance(
	env, service string, version int64) (
	levInstTarget *LevInstTarget, targetNode string,
	levInstResource *scale.Resource, success bool, err error) {
	// Note: This process only takes place for the very first instance within
	//       a service. The non-first instances do not have a resource entry
	//       and they are found via service lookup through DNS.
	levInstResourceID := makeLevInstResourceID(env, service, version)
	levInstResource, success, err = scale.ConstructResource(
		leverutil.ServiceFlag.Get(), levInstResourceID,
		InstanceConstructTimeoutFlag.Get())
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to construct new instance")
		return nil, "", nil, false, err
	}
	if !success {
		// Failed to get lock. Someone else constructed instance.
		levInstTarget = new(LevInstTarget)
		err = json.Unmarshal([]byte(levInstResource.GetTarget()), levInstTarget)
		if err != nil {
			finder.logger.WithFields("err", err).Panic("Failed to decode json")
		}
		finder.logger.WithFields(
			"leverEnv", env,
			"leverService", service,
			"leverInstanceID", levInstTarget.InstanceID,
		).Debug("Reusing instance")
		return levInstTarget, levInstResource.GetTargetNode(), levInstResource,
			false, nil
	}

	// Read the entry point from the config.
	codeDir := dockerutil.CodeDirPath(env, service, version)
	leverConfig, err := core.ReadLeverConfig(codeDir)
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to read lever.json")
		return nil, "", nil, false, err
	}

	// TODO: If somehow first RPC fails and service no longer
	//       contacted afterwards, then the container remains hanging
	//       forever. Need a way to kill it / make it expire after a
	//       while for this situation.
	//       Idea: Poll docker every ~30s for instances that we are not
	//       handling and register those.
	instanceID := leverutil.RandomID() // TODO: Collisions possible.
	leverURL := &core.LeverURL{
		Environment: env,
		Service:     service,
	}
	isAdmin := core.IsAdmin(leverURL)
	containerID, node, err := dockerutil.StartDockerContainer(
		finder.docker, env, service, instanceID, version, isAdmin, leverConfig)
	if err != nil {
		finder.logger.WithFields(
			"err", err,
			"leverEnv", env,
			"leverService", service,
			"leverInstanceID", instanceID,
		).Error("Unable to start container")
		return nil, "", nil, false, err
	}

	hostAddr, err := GetHostAddrOnNode(node)
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to get Host addr on node")
		return nil, "", nil, false, err
	}

	finder.logger.WithFields(
		"leverEnv", env,
		"leverService", service,
		"leverInstanceID", instanceID,
		"containerID", containerID,
		"node", node,
	).Debug("Creating new instance")

	levInstTarget = &LevInstTarget{
		HostAddr:    hostAddr,
		InstanceID:  instanceID,
		ContainerID: containerID,
	}
	target, err := json.Marshal(levInstTarget)
	if err != nil {
		finder.logger.WithFields("err", err).Panic("Failed to encode json")
	}

	err = levInstResource.DoneConstructing(
		string(target), node, 2*hostman.InstanceExpiryTimeFlag.Get())
	if err != nil {
		finder.logger.WithFields("err", err).Error(
			"Failed to register new instance")
		return nil, "", nil, false, err
	}

	return levInstTarget, node, levInstResource, true, nil
}