func (a *Agent) heartbeatJobs(ttl time.Duration, stop <-chan struct{}) { heartbeat := func() { machID := a.Machine.State().ID launched := a.cache.launchedJobs() for _, j := range launched { go a.registry.UnitHeartbeat(j, machID, ttl) } } var interval time.Duration if ttl > 10*time.Second { interval = ttl * 4 / 5 } else { interval = ttl / 2 } ticker := time.Tick(interval) for { select { case <-stop: log.Debug("HeartbeatJobs exiting due to stop signal") return case <-ticker: log.Debug("HeartbeatJobs tick") heartbeat() } } }
func getDefaultGatewayIface() *net.Interface { log.Debug("Attempting to retrieve IP route info from netlink") routes, err := netlink.RouteList(nil, 0) if err != nil { log.Debugf("Unable to detect default interface: %v", err) return nil } if len(routes) == 0 { log.Debugf("Netlink returned zero routes") return nil } for _, route := range routes { // a nil Dst means that this is the default route. if route.Dst == nil { i, err := net.InterfaceByIndex(route.LinkIndex) if err != nil { log.Debugf("Found default route but could not determine interface") continue } log.Debugf("Found default route with interface %v", i) return i } } log.Debugf("Unable to find default route") return nil }
func getDefaultGatewayIface() *net.Interface { log.Debug("Attempting to retrieve IP route info from netlink") routes, err := netlink.NetworkGetRoutes() if err != nil { log.Debugf("Unable to detect default interface: %v", err) return nil } if len(routes) == 0 { log.Debugf("Netlink returned zero routes") return nil } for _, route := range routes { if route.Default { if route.Iface == nil { log.Debugf("Found default route but could not determine interface") } log.Debugf("Found default route with interface %v", route.Iface.Name) return route.Iface } } log.Debugf("Unable to find default route") return nil }
func (s *rpcserver) GetUnitStates(ctx context.Context, filter *pb.UnitStateFilter) (*pb.UnitStates, error) { if debugRPCServer { defer debug.Exit_(debug.Enter_()) } states := make([]*pb.UnitState, 0) states = append(states, s.localRegistry.UnitStates()...) if s.hasNonGRPCAgents { log.Debug("Merging etcd with inmemory unit states in GetUnitStates()") etcdUnitStates, err := s.etcdRegistry.UnitStates() if err != nil { return nil, err } unitStateNames := make(map[string]string, len(states)) for _, state := range states { unitStateNames[state.Name] = state.MachineID } for _, state := range etcdUnitStates { machId, ok := unitStateNames[state.UnitName] if !ok || (ok && machId != state.MachineID) { states = append(states, state.ToPB()) } } } return &pb.UnitStates{states}, nil }
func (s *rpcserver) GetUnits(ctx context.Context, filter *pb.UnitFilter) (*pb.Units, error) { if debugRPCServer { defer debug.Exit_(debug.Enter_()) } units := make([]pb.Unit, 0) units = append(units, s.localRegistry.Units()...) // Check if there are etcd fleet-based agents in the cluster to share the state if s.hasNonGRPCAgents { log.Debug("Merging etcd with inmemory units in GetUnits()") etcdUnits, err := s.etcdRegistry.Units() if err != nil { return nil, err } unitNames := make(map[string]struct{}, len(units)) for _, unit := range units { unitNames[unit.Name] = struct{}{} } for _, unit := range etcdUnits { if _, ok := unitNames[unit.Name]; !ok { units = append(units, unit.ToPB()) } } } return &pb.Units{Units: units}, nil }
func (r *reconciler) Run(stop <-chan struct{}) { trigger := make(chan struct{}) go func() { abort := make(chan struct{}) for { if r.eStream == nil { return } select { case <-stop: close(abort) return case <-r.eStream.Next(abort): trigger <- struct{}{} } } }() ticker := r.clock.After(r.ival) // When starting up, reconcile once immediately log.Debug("Initial reconciliation commencing") r.rFunc() for { select { case <-stop: log.Debug("Reconciler exiting due to stop signal") return case <-ticker: ticker = r.clock.After(r.ival) log.Debug("Reconciler tick") r.rFunc() case <-trigger: ticker = r.clock.After(r.ival) log.Debug("Reconciler triggered") r.rFunc() } } }
// PeriodicRefresh updates the current state of the CoreOSMachine at the // interval indicated. Operation ceases when the provided channel is closed. func (m *CoreOSMachine) PeriodicRefresh(interval time.Duration, stop <-chan struct{}) { ticker := time.NewTicker(interval) for { select { case <-stop: log.Debug("Halting CoreOSMachine.PeriodicRefresh") ticker.Stop() return case <-ticker.C: m.Refresh() } } }
func (a *Agent) heartbeatJobs(ttl time.Duration, stop chan bool) { heartbeat := func() { machID := a.Machine.State().ID launched := a.cache.launchedJobs() for _, j := range launched { go a.registry.UnitHeartbeat(j, machID, ttl) } } interval := ttl / 2 ticker := time.Tick(interval) for { select { case <-stop: log.Debug("HeartbeatJobs exiting due to stop signal") return case <-ticker: log.Debug("HeartbeatJobs tick") heartbeat() } } }
// Monitor ensures a Heart is still beating until a channel is closed, returning // an error if the heartbeats fail. func (m *Monitor) Monitor(hrt Heart, stop chan bool) error { ticker := time.Tick(m.ival) for { select { case <-stop: log.Debug("Monitor exiting due to stop signal") return nil case <-ticker: if _, err := m.check(hrt); err != nil { return err } } } }
// Run periodically attempts to reconcile the provided Agent until the stop // channel is closed. Run will also reconcile in reaction to events on the // AgentReconciler's rStream. func (ar *AgentReconciler) Run(a *Agent, stop <-chan struct{}) { reconcile := func() { start := time.Now() ar.Reconcile(a) elapsed := time.Now().Sub(start) msg := fmt.Sprintf("AgentReconciler completed reconciliation in %s", elapsed) if elapsed > reconcileInterval { log.Warning(msg) } else { log.Debug(msg) } } reconciler := pkg.NewPeriodicReconciler(reconcileInterval, reconcile, ar.rStream) reconciler.Run(stop) }
func (e *Engine) Run(ival time.Duration, stop chan bool) { leaseTTL := ival * 5 machID := e.machine.State().ID reconcile := func() { if !ensureEngineVersionMatch(e.cRegistry, engineVersion) { return } var l lease.Lease if isLeader(e.lease, machID) { l = renewLeadership(e.lease, leaseTTL) } else { l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL) } // log all leadership changes if l != nil && e.lease == nil && l.MachineID() != machID { log.Infof("Engine leader is %s", l.MachineID()) } else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() { log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID()) } e.lease = l if !isLeader(e.lease, machID) { return } // abort is closed when reconciliation must stop prematurely, either // by a local timeout or the fleet server shutting down abort := make(chan struct{}) // monitor is used to shut down the following goroutine monitor := make(chan struct{}) go func() { select { case <-monitor: return case <-time.After(leaseTTL): close(abort) case <-stop: close(abort) } }() start := time.Now() e.rec.Reconcile(e, abort) close(monitor) elapsed := time.Now().Sub(start) msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed) if elapsed > ival { log.Warning(msg) } else { log.Debug(msg) } } rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream) rec.Run(stop) }
func (e *Engine) Run(ival time.Duration, stop <-chan struct{}) { leaseTTL := ival * 5 if e.machine.State().Capabilities.Has(machine.CapGRPC) { // With grpc it doesn't make sense to set to 5secs the TTL of the etcd key. // This has a special impact whenever we have high worload in the cluster, cause // it'd provoke constant leader re-elections. // TODO: IMHO, this should be configurable via a flag to disable the TTL. leaseTTL = ival * 500000 } machID := e.machine.State().ID reconcile := func() { if !ensureEngineVersionMatch(e.cRegistry, engineVersion) { return } if e.machine.State().Capabilities.Has(machine.CapGRPC) { // rpcLeadership gets the lease (leader), and apply changes to the engine state if need it. e.lease = e.rpcLeadership(leaseTTL, machID) } else { var l lease.Lease if isLeader(e.lease, machID) { l = renewLeadership(e.lease, leaseTTL) } else { l = acquireLeadership(e.lManager, machID, engineVersion, leaseTTL) } // log all leadership changes if l != nil && e.lease == nil && l.MachineID() != machID { log.Infof("Engine leader is %s", l.MachineID()) } else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() { log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID()) } e.lease = l } if !isLeader(e.lease, machID) { return } // abort is closed when reconciliation must stop prematurely, either // by a local timeout or the fleet server shutting down abort := make(chan struct{}) // monitor is used to shut down the following goroutine monitor := make(chan struct{}) go func() { select { case <-monitor: return case <-time.After(leaseTTL): close(abort) case <-stop: close(abort) } }() start := time.Now() e.rec.Reconcile(e, abort) close(monitor) elapsed := time.Now().Sub(start) metrics.ReportEngineReconcileSuccess(start) msg := fmt.Sprintf("Engine completed reconciliation in %s", elapsed) if elapsed > ival { log.Warning(msg) } else { log.Debug(msg) } } rec := pkg.NewPeriodicReconciler(ival, reconcile, e.rStream) rec.Run(stop) }