Пример #1
0
func (a *Agent) heartbeatJobs(ttl time.Duration, stop <-chan struct{}) {
	heartbeat := func() {
		machID := a.Machine.State().ID
		launched := a.cache.launchedJobs()
		for _, j := range launched {
			go a.registry.UnitHeartbeat(j, machID, ttl)
		}
	}

	var interval time.Duration
	if ttl > 10*time.Second {
		interval = ttl * 4 / 5
	} else {
		interval = ttl / 2
	}

	ticker := time.Tick(interval)
	for {
		select {
		case <-stop:
			log.Debug("HeartbeatJobs exiting due to stop signal")
			return
		case <-ticker:
			log.Debug("HeartbeatJobs tick")
			heartbeat()
		}
	}
}
Пример #2
0
func getDefaultGatewayIface() *net.Interface {
	log.Debug("Attempting to retrieve IP route info from netlink")

	routes, err := netlink.RouteList(nil, 0)
	if err != nil {
		log.Debugf("Unable to detect default interface: %v", err)
		return nil
	}

	if len(routes) == 0 {
		log.Debugf("Netlink returned zero routes")
		return nil
	}

	for _, route := range routes {
		// a nil Dst means that this is the default route.
		if route.Dst == nil {
			i, err := net.InterfaceByIndex(route.LinkIndex)
			if err != nil {
				log.Debugf("Found default route but could not determine interface")
				continue
			}
			log.Debugf("Found default route with interface %v", i)
			return i
		}
	}

	log.Debugf("Unable to find default route")
	return nil
}
Пример #3
0
func getDefaultGatewayIface() *net.Interface {
	log.Debug("Attempting to retrieve IP route info from netlink")

	routes, err := netlink.NetworkGetRoutes()
	if err != nil {
		log.Debugf("Unable to detect default interface: %v", err)
		return nil
	}

	if len(routes) == 0 {
		log.Debugf("Netlink returned zero routes")
		return nil
	}

	for _, route := range routes {
		if route.Default {
			if route.Iface == nil {
				log.Debugf("Found default route but could not determine interface")
			}
			log.Debugf("Found default route with interface %v", route.Iface.Name)
			return route.Iface
		}
	}

	log.Debugf("Unable to find default route")
	return nil
}
Пример #4
0
func (s *rpcserver) GetUnitStates(ctx context.Context, filter *pb.UnitStateFilter) (*pb.UnitStates, error) {
	if debugRPCServer {
		defer debug.Exit_(debug.Enter_())
	}
	states := make([]*pb.UnitState, 0)
	states = append(states, s.localRegistry.UnitStates()...)

	if s.hasNonGRPCAgents {
		log.Debug("Merging etcd with inmemory unit states in GetUnitStates()")
		etcdUnitStates, err := s.etcdRegistry.UnitStates()
		if err != nil {
			return nil, err
		}

		unitStateNames := make(map[string]string, len(states))
		for _, state := range states {
			unitStateNames[state.Name] = state.MachineID
		}
		for _, state := range etcdUnitStates {
			machId, ok := unitStateNames[state.UnitName]
			if !ok || (ok && machId != state.MachineID) {
				states = append(states, state.ToPB())
			}
		}
	}

	return &pb.UnitStates{states}, nil
}
Пример #5
0
func (s *rpcserver) GetUnits(ctx context.Context, filter *pb.UnitFilter) (*pb.Units, error) {
	if debugRPCServer {
		defer debug.Exit_(debug.Enter_())
	}
	units := make([]pb.Unit, 0)
	units = append(units, s.localRegistry.Units()...)

	// Check if there are etcd fleet-based agents in the cluster to share the state
	if s.hasNonGRPCAgents {
		log.Debug("Merging etcd with inmemory units in GetUnits()")
		etcdUnits, err := s.etcdRegistry.Units()
		if err != nil {
			return nil, err
		}

		unitNames := make(map[string]struct{}, len(units))
		for _, unit := range units {
			unitNames[unit.Name] = struct{}{}
		}
		for _, unit := range etcdUnits {
			if _, ok := unitNames[unit.Name]; !ok {
				units = append(units, unit.ToPB())
			}
		}
	}

	return &pb.Units{Units: units}, nil
}
Пример #6
0
func (r *reconciler) Run(stop <-chan struct{}) {
	trigger := make(chan struct{})
	go func() {
		abort := make(chan struct{})
		for {
			if r.eStream == nil {
				return
			}
			select {
			case <-stop:
				close(abort)
				return
			case <-r.eStream.Next(abort):
				trigger <- struct{}{}
			}
		}
	}()

	ticker := r.clock.After(r.ival)

	// When starting up, reconcile once immediately
	log.Debug("Initial reconciliation commencing")
	r.rFunc()

	for {
		select {
		case <-stop:
			log.Debug("Reconciler exiting due to stop signal")
			return
		case <-ticker:
			ticker = r.clock.After(r.ival)
			log.Debug("Reconciler tick")
			r.rFunc()
		case <-trigger:
			ticker = r.clock.After(r.ival)
			log.Debug("Reconciler triggered")
			r.rFunc()
		}
	}

}
Пример #7
0
// PeriodicRefresh updates the current state of the CoreOSMachine at the
// interval indicated. Operation ceases when the provided channel is closed.
func (m *CoreOSMachine) PeriodicRefresh(interval time.Duration, stop <-chan struct{}) {
	ticker := time.NewTicker(interval)
	for {
		select {
		case <-stop:
			log.Debug("Halting CoreOSMachine.PeriodicRefresh")
			ticker.Stop()
			return
		case <-ticker.C:
			m.Refresh()
		}
	}
}
Пример #8
0
func (a *Agent) heartbeatJobs(ttl time.Duration, stop chan bool) {
	heartbeat := func() {
		machID := a.Machine.State().ID
		launched := a.cache.launchedJobs()
		for _, j := range launched {
			go a.registry.UnitHeartbeat(j, machID, ttl)
		}
	}

	interval := ttl / 2
	ticker := time.Tick(interval)
	for {
		select {
		case <-stop:
			log.Debug("HeartbeatJobs exiting due to stop signal")
			return
		case <-ticker:
			log.Debug("HeartbeatJobs tick")
			heartbeat()
		}
	}
}
Пример #9
0
// Monitor ensures a Heart is still beating until a channel is closed, returning
// an error if the heartbeats fail.
func (m *Monitor) Monitor(hrt Heart, stop chan bool) error {
	ticker := time.Tick(m.ival)
	for {
		select {
		case <-stop:
			log.Debug("Monitor exiting due to stop signal")
			return nil
		case <-ticker:
			if _, err := m.check(hrt); err != nil {
				return err
			}
		}
	}
}
Пример #10
0
// Run periodically attempts to reconcile the provided Agent until the stop
// channel is closed. Run will also reconcile in reaction to events on the
// AgentReconciler's rStream.
func (ar *AgentReconciler) Run(a *Agent, stop <-chan struct{}) {
	reconcile := func() {
		start := time.Now()
		ar.Reconcile(a)
		elapsed := time.Now().Sub(start)

		msg := fmt.Sprintf("AgentReconciler completed reconciliation in %s", elapsed)
		if elapsed > reconcileInterval {
			log.Warning(msg)
		} else {
			log.Debug(msg)
		}
	}
	reconciler := pkg.NewPeriodicReconciler(reconcileInterval, reconcile, ar.rStream)
	reconciler.Run(stop)
}
Пример #11
0
func (e *Engine) Run(ival time.Duration, stop chan bool) {
	leaseTTL := ival * 5
	machID := e.machine.State().ID

	reconcile := func() {
		if !ensureEngineVersionMatch(e.cRegistry, engineVersion) {
			return
		}

		var l lease.Lease
		if isLeader(e.lease, machID) {
			l = renewLeadership(e.lease, leaseTTL)
		} else {
			l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL)
		}

		// log all leadership changes
		if l != nil && e.lease == nil && l.MachineID() != machID {
			log.Infof("Engine leader is %s", l.MachineID())
		} else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() {
			log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID())
		}

		e.lease = l

		if !isLeader(e.lease, machID) {
			return
		}

		// abort is closed when reconciliation must stop prematurely, either
		// by a local timeout or the fleet server shutting down
		abort := make(chan struct{})

		// monitor is used to shut down the following goroutine
		monitor := make(chan struct{})

		go func() {
			select {
			case <-monitor:
				return
			case <-time.After(leaseTTL):
				close(abort)
			case <-stop:
				close(abort)
			}
		}()

		start := time.Now()
		e.rec.Reconcile(e, abort)
		close(monitor)
		elapsed := time.Now().Sub(start)

		msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed)
		if elapsed > ival {
			log.Warning(msg)
		} else {
			log.Debug(msg)
		}
	}

	rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream)
	rec.Run(stop)
}
Пример #12
0
func (e *Engine) Run(ival time.Duration, stop <-chan struct{}) {
	leaseTTL := ival * 5
	if e.machine.State().Capabilities.Has(machine.CapGRPC) {
		// With grpc it doesn't make sense to set to 5secs the TTL of the etcd key.
		// This has a special impact whenever we have high worload in the cluster, cause
		// it'd provoke constant leader re-elections.
		// TODO: IMHO, this should be configurable via a flag to disable the TTL.
		leaseTTL = ival * 500000
	}
	machID := e.machine.State().ID

	reconcile := func() {
		if !ensureEngineVersionMatch(e.cRegistry, engineVersion) {
			return
		}

		if e.machine.State().Capabilities.Has(machine.CapGRPC) {
			// rpcLeadership gets the lease (leader), and apply changes to the engine state if need it.
			e.lease = e.rpcLeadership(leaseTTL, machID)
		} else {
			var l lease.Lease
			if isLeader(e.lease, machID) {
				l = renewLeadership(e.lease, leaseTTL)
			} else {
				l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL)
			}

			// log all leadership changes
			if l != nil && e.lease == nil && l.MachineID() != machID {
				log.Infof("Engine leader is %s", l.MachineID())
			} else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() {
				log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID())
			}

			e.lease = l
		}

		if !isLeader(e.lease, machID) {
			return
		}

		// abort is closed when reconciliation must stop prematurely, either
		// by a local timeout or the fleet server shutting down
		abort := make(chan struct{})

		// monitor is used to shut down the following goroutine
		monitor := make(chan struct{})

		go func() {
			select {
			case <-monitor:
				return
			case <-time.After(leaseTTL):
				close(abort)
			case <-stop:
				close(abort)
			}
		}()

		start := time.Now()
		e.rec.Reconcile(e, abort)
		close(monitor)
		elapsed := time.Now().Sub(start)
		metrics.ReportEngineReconcileSuccess(start)

		msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed)
		if elapsed > ival {
			log.Warning(msg)
		} else {
			log.Debug(msg)
		}
	}

	rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream)
	rec.Run(stop)
}