Beispiel #1
0
// getUnitByHash retrieves from the Registry the Unit associated with the given Hash
func (r *EtcdRegistry) getUnitByHash(hash unit.Hash) *unit.UnitFile {
	key := r.hashedUnitPath(hash)
	opts := &etcd.GetOptions{
		Recursive: true,
	}
	resp, err := r.kAPI.Get(r.ctx(), key, opts)
	if err != nil {
		if isEtcdError(err, etcd.ErrorCodeKeyNotFound) {
			err = nil
		}
		return nil
	}
	var um unitModel
	if err := unmarshal(resp.Node.Value, &um); err != nil {
		log.Errorf("error unmarshaling Unit(%s): %v", hash, err)
		return nil
	}

	u, err := unit.NewUnitFile(um.Raw)
	if err != nil {
		log.Errorf("error parsing Unit(%s): %v", hash, err)
		return nil
	}

	return u
}
Beispiel #2
0
// getAllUnitsHashMap retrieves from the Registry all Units and returns a map of hash to UnitFile
func (r *EtcdRegistry) getAllUnitsHashMap() (map[string]*unit.UnitFile, error) {
	key := r.prefixed(unitPrefix)
	opts := &etcd.GetOptions{
		Recursive: true,
		Quorum:    true,
	}
	hashToUnit := map[string]*unit.UnitFile{}
	resp, err := r.kAPI.Get(r.ctx(), key, opts)
	if err != nil {
		return nil, err
	}

	for _, node := range resp.Node.Nodes {
		parts := strings.Split(node.Key, "/")
		if len(parts) == 0 {
			log.Errorf("key '%v' doesn't have enough parts", node.Key)
			continue
		}
		stringHash := parts[len(parts)-1]
		hash, err := unit.HashFromHexString(stringHash)
		if err != nil {
			log.Errorf("failed to get Hash for key '%v' with stringHash '%v': %v", node.Key, stringHash, err)
			continue
		}
		unit := r.unitFromEtcdNode(hash, node)
		if unit == nil {
			continue
		}
		hashToUnit[stringHash] = unit
	}

	return hashToUnit, nil
}
Beispiel #3
0
// getUnitByHash retrieves from the Registry the Unit associated with the given Hash
func (r *EtcdRegistry) getUnitByHash(hash unit.Hash) *unit.UnitFile {
	req := etcd.Get{
		Key:       r.hashedUnitPath(hash),
		Recursive: true,
	}
	resp, err := r.etcd.Do(&req)
	if err != nil {
		if isKeyNotFound(err) {
			err = nil
		}
		return nil
	}
	var um unitModel
	if err := unmarshal(resp.Node.Value, &um); err != nil {
		log.Errorf("error unmarshaling Unit(%s): %v", hash, err)
		return nil
	}

	u, err := unit.NewUnitFile(um.Raw)
	if err != nil {
		log.Errorf("error parsing Unit(%s): %v", hash, err)
		return nil
	}

	return u
}
Beispiel #4
0
// Supervise monitors the life of the Server and coordinates its shutdown.
// A shutdown occurs when the monitor returns, either because a health check
// fails or a user triggers a shutdown. If the shutdown is due to a health
// check failure, the Server is restarted. Supervise will block shutdown until
// all components have finished shutting down or a timeout occurs; if this
// happens, the Server will not automatically be restarted.
func (s *Server) Supervise() {
	sd, err := s.mon.Monitor(s.hrt, s.killc)
	if sd {
		log.Infof("Server monitor triggered: told to shut down")
	} else {
		log.Errorf("Server monitor triggered: %v", err)
	}
	close(s.stopc)
	done := make(chan struct{})
	go func() {
		s.wg.Wait()
		close(done)
	}()
	select {
	case <-done:
	case <-time.After(shutdownTimeout):
		log.Errorf("Timed out waiting for server to shut down. Panicking the server without cleanup.")
		panic("Failed server shutdown. Panic")
	}
	if !sd {
		log.Infof("Restarting server")
		s.SetRestartServer(true)
		s.Run()
		s.SetRestartServer(false)
	}
}
Beispiel #5
0
// newPublisher returns a publishFunc that publishes a single UnitState
// by the given name to the provided Registry, with the given TTL
func newPublisher(reg registry.Registry, ttl time.Duration) publishFunc {
	return func(name string, us *unit.UnitState) {
		if us == nil {
			log.V(1).Infof("Destroying UnitState(%s) in Registry", name)
			err := reg.RemoveUnitState(name)
			if err != nil {
				log.Errorf("Failed to destroy UnitState(%s) in Registry: %v", name, err)
			}
		} else {
			// Sanity check - don't want to publish incomplete UnitStates
			// TODO(jonboulle): consider teasing apart a separate UnitState-like struct
			// so we can rely on a UnitState always being fully hydrated?

			// See https://github.com/coreos/fleet/issues/720
			//if len(us.UnitHash) == 0 {
			//	log.Errorf("Refusing to push UnitState(%s), no UnitHash: %#v", name, us)

			if len(us.MachineID) == 0 {
				log.Errorf("Refusing to push UnitState(%s), no MachineID: %#v", name, us)
			} else {
				log.V(1).Infof("Pushing UnitState(%s) to Registry: %#v", name, us)
				reg.SaveUnitState(name, us, ttl)
			}
		}
	}
}
Beispiel #6
0
// SaveUnitState persists the given UnitState to the Registry
func (r *EtcdRegistry) SaveUnitState(jobName string, unitState *unit.UnitState, ttl time.Duration) {
	usm := unitStateToModel(unitState)
	if usm == nil {
		log.Errorf("Unable to save nil UnitState model")
		return
	}

	json, err := marshal(usm)
	if err != nil {
		log.Errorf("Error marshalling UnitState: %v", err)
		return
	}

	legacyKey := r.legacyUnitStatePath(jobName)
	req := etcd.Set{
		Key:   legacyKey,
		Value: json,
		TTL:   ttl,
	}
	r.etcd.Do(&req)

	newKey := r.unitStatePath(unitState.MachineID, jobName)
	req = etcd.Set{
		Key:   newKey,
		Value: json,
		TTL:   ttl,
	}
	r.etcd.Do(&req)
}
Beispiel #7
0
// IsGrpcLeader checks if the current leader has gRPC capabilities enabled or error
// if there is not a elected leader yet.
func (e *Engine) IsGrpcLeader() (bool, error) {
	leader, err := e.lManager.GetLease(engineLeaseName)
	if err != nil {
		log.Errorf("Unable to determine current lease: %v", err)
		return false, err
	}
	// It can happen that the leader is not yet stored in etcd and nor error (line 122 pkg/lease/etcd.go)
	if leader == nil {
		return false, errors.New("Unable to get the current leader")
	}

	leaderState, err := e.getMachineState(leader.MachineID())
	if err != nil {
		log.Errorf("Unable to determine current lease: %v", err)
		return false, err
	}

	if leaderState.Capabilities != nil && leaderState.Capabilities.Has(machine.CapGRPC) {
		return true, nil
	}

	log.Info("Engine leader has no gRPC capabilities enabled!")

	return false, nil
}
Beispiel #8
0
// desiredAgentState builds an *AgentState object that represents what the
// provided Agent should currently be doing.
func desiredAgentState(a *Agent, reg registry.Registry) (*AgentState, error) {
	units, err := reg.Units()
	if err != nil {
		log.Errorf("Failed fetching Units from Registry: %v", err)
		return nil, err
	}

	sUnits, err := reg.Schedule()
	if err != nil {
		log.Errorf("Failed fetching schedule from Registry: %v", err)
		return nil, err
	}

	// fetch full machine state from registry instead of
	// using the local version to allow for dynamic metadata
	ms, err := reg.MachineState(a.Machine.State().ID)
	if err != nil {
		log.Errorf("Failed fetching machine state from Registry: %v", err)
		return nil, err
	}
	as := AgentState{
		MState: &ms,
		Units:  make(map[string]*job.Unit),
	}

	sUnitMap := make(map[string]*job.ScheduledUnit)
	for _, sUnit := range sUnits {
		sUnit := sUnit
		sUnitMap[sUnit.Name] = &sUnit
	}

	for _, u := range units {
		u := u
		md := u.RequiredTargetMetadata()

		if u.IsGlobal() {
			if !machine.HasMetadata(&ms, md) {
				log.Debugf("Agent unable to run global unit %s: missing required metadata", u.Name)
				continue
			}
		}

		if !u.IsGlobal() {
			sUnit, ok := sUnitMap[u.Name]
			if !ok || sUnit.TargetMachineID == "" || sUnit.TargetMachineID != ms.ID {
				continue
			}
		}

		if cExists, _ := as.HasConflict(u.Name, u.Conflicts()); cExists {
			continue
		}

		as.Units[u.Name] = &u
	}

	return &as, nil
}
Beispiel #9
0
// statesByMUSKey returns a map of all UnitStates stored in the registry indexed by MUSKey
func (r *EtcdRegistry) statesByMUSKey() (map[MUSKey]*unit.UnitState, error) {
	mus := make(map[MUSKey]*unit.UnitState)

	// For backwards compatibility, first retrieve any states stored in the
	// old format
	req := etcd.Get{
		Key:       path.Join(r.keyPrefix, statePrefix),
		Recursive: true,
	}
	res, err := r.etcd.Do(&req)
	if err != nil && !isKeyNotFound(err) {
		return nil, err
	}
	if res != nil {
		for _, node := range res.Node.Nodes {
			_, name := path.Split(node.Key)
			var usm unitStateModel
			if err := unmarshal(node.Value, &usm); err != nil {
				log.Errorf("Error unmarshalling UnitState(%s): %v", name, err)
				continue
			}
			us := modelToUnitState(&usm, name)
			if us != nil {
				key := MUSKey{name, us.MachineID}
				mus[key] = us
			}
		}
	}

	// Now retrieve states stored in the new format and overlay them
	req = etcd.Get{
		Key:       path.Join(r.keyPrefix, statesPrefix),
		Recursive: true,
	}
	res, err = r.etcd.Do(&req)
	if err != nil && !isKeyNotFound(err) {
		return nil, err
	}
	if res != nil {
		for _, dir := range res.Node.Nodes {
			_, name := path.Split(dir.Key)
			for _, node := range dir.Nodes {
				_, machID := path.Split(node.Key)
				var usm unitStateModel
				if err := unmarshal(node.Value, &usm); err != nil {
					log.Errorf("Error unmarshalling UnitState(%s) from Machine(%s): %v", name, machID, err)
					continue
				}
				us := modelToUnitState(&usm, name)
				if us != nil {
					key := MUSKey{name, machID}
					mus[key] = us
				}
			}
		}
	}
	return mus, nil
}
Beispiel #10
0
// Units lists all Units stored in the Registry, ordered by name. This includes both global and non-global units.
func (r *EtcdRegistry) Units() ([]job.Unit, error) {
	key := r.prefixed(jobPrefix)
	opts := &etcd.GetOptions{
		Sort:      true,
		Recursive: true,
	}
	res, err := r.kAPI.Get(r.ctx(), key, opts)
	if err != nil {
		if isEtcdError(err, etcd.ErrorCodeKeyNotFound) {
			err = nil
		}
		return nil, err
	}

	// Fetch all units by hash recursively to avoid sending N requests to Etcd.
	hashToUnit, err := r.getAllUnitsHashMap()
	if err != nil {
		log.Errorf("failed fetching all Units from etcd: %v", err)
		return nil, err
	}
	unitHashLookupFunc := func(hash unit.Hash) *unit.UnitFile {
		stringHash := hash.String()
		unit, ok := hashToUnit[stringHash]
		if !ok {
			log.Errorf("did not find Unit %v in list of all units", stringHash)
			return nil
		}
		return unit
	}

	uMap := make(map[string]*job.Unit)
	for _, dir := range res.Node.Nodes {
		u, err := r.dirToUnit(dir, unitHashLookupFunc)
		if err != nil {
			log.Errorf("Failed to parse Unit from etcd: %v", err)
			continue
		}
		if u == nil {
			continue
		}
		uMap[u.Name] = u
	}

	var sortable sort.StringSlice
	for name, _ := range uMap {
		sortable = append(sortable, name)
	}
	sortable.Sort()

	units := make([]job.Unit, 0, len(sortable))
	for _, name := range sortable {
		units = append(units, *uMap[name])
	}

	return units, nil
}
Beispiel #11
0
func rpcAcquireLeadership(reg registry.Registry, lManager lease.Manager, machID string, ver int, ttl time.Duration) lease.Lease {
	existing, err := lManager.GetLease(engineLeaseName)
	if err != nil {
		log.Errorf("Unable to determine current lease: %v", err)
		return nil
	}

	var l lease.Lease
	if (existing == nil && reg.UseEtcdRegistry()) || (existing == nil && !reg.IsRegistryReady()) {
		l, err = lManager.AcquireLease(engineLeaseName, machID, ver, ttl)
		if err != nil {
			log.Errorf("Engine leadership acquisition failed: %v", err)
			return nil
		} else if l == nil {
			log.Infof("Unable to acquire engine leadership")
			return nil
		}
		log.Infof("Engine leadership acquired")
		return l
	}

	if existing != nil && existing.Version() >= ver {
		log.Debugf("Lease already held by Machine(%s) operating at acceptable version %d", existing.MachineID(), existing.Version())
		return existing
	}

	// TODO(hector): Here we could add a possible SLA to determine when the leader
	// is too busy. In such a case, we can trigger a new leader election
	if (existing != nil && reg.UseEtcdRegistry()) || (existing != nil && !reg.IsRegistryReady()) {
		rem := existing.TimeRemaining()
		l, err = lManager.StealLease(engineLeaseName, machID, ver, ttl+rem, existing.Index())
		if err != nil {
			log.Errorf("Engine leadership steal failed: %v", err)
			return nil
		} else if l == nil {
			log.Infof("Unable to steal engine leadership")
			return nil
		}

		log.Infof("Stole engine leadership from Machine(%s)", existing.MachineID())

		if rem > 0 {
			log.Infof("Waiting %v for previous lease to expire before continuing reconciliation", rem)
			<-time.After(rem)
		}

		return l
	}

	log.Infof("Engine leader is BUSY!")

	return existing

}
Beispiel #12
0
func (r *RegistryMux) rpcDialerNoEngine(_ string, timeout time.Duration) (net.Conn, error) {
	ticker := time.Tick(dialRegistryReconnectTimeout)
	// Timeout re-defined to call etcd every 5secs to get the leader
	timeout = 5 * time.Second
	check := time.After(timeout)

	for {
		select {
		case <-check:
			log.Errorf("Unable to connect to engine %s\n", r.currentEngine.PublicIP)
			// Get the new engine leader of the cluster out of etcd
			lease, err := r.leaseManager.GetLease(engineLeaderKeyPath)
			// Key found
			if err == nil && lease != nil {
				var err error
				machines, err := r.etcdRegistry.Machines()
				if err != nil {
					log.Errorf("Unable to get the machines of the cluster %v\n", err)
					return nil, errors.New("Unable to get the machines of the cluster")
				}
				for _, s := range machines {
					// Update the currentEngine with the new one... otherwise wait until
					// there is one
					if s.ID == lease.MachineID() {
						// New leader has not gRPC capabilities enabled.
						if !s.Capabilities.Has(machine.CapGRPC) {
							log.Error("New leader engine has not gRPC enabled!")
							return nil, errors.New("New leader engine has not gRPC enabled!")
						}
						r.currentEngine = s
						log.Infof("Found a new engine to connect to: %s\n", r.currentEngine.PublicIP)
						// Restore initial check configuration
						timeout = 5 * time.Second
						check = time.After(timeout)
					}
				}
			} else {
				timeout = 2 * time.Second
				log.Errorf("Unable to get the leader engine, retrying in %v...", timeout)
				check = time.After(timeout)
			}
		case <-ticker:
			addr := fmt.Sprintf("%s:%d", r.currentEngine.PublicIP, rpcServerPort)
			conn, err := net.Dial("tcp", addr)
			if err == nil {
				log.Infof("Connected to engine on %s\n", r.currentEngine.PublicIP)
				return conn, nil
			}
			log.Errorf("Retry to connect to new engine: %+v", err)
		}
	}
}
Beispiel #13
0
Datei: unit.go Projekt: pulcy/j2
func (r *EtcdRegistry) unitFromEtcdNode(hash unit.Hash, etcdNode *etcd.Node) *unit.UnitFile {
	var um unitModel
	if err := unmarshal(etcdNode.Value, &um); err != nil {
		log.Errorf("error unmarshaling Unit(%s): %v", hash, err)
		return nil
	}

	u, err := unit.NewUnitFile(um.Raw)
	if err != nil {
		log.Errorf("error parsing Unit(%s): %v", hash, err)
		return nil
	}

	return u
}
Beispiel #14
0
// statesByMUSKey returns a map of all UnitStates stored in the registry indexed by MUSKey
func (r *EtcdRegistry) statesByMUSKey() (map[MUSKey]*unit.UnitState, error) {
	mus := make(map[MUSKey]*unit.UnitState)
	key := r.prefixed(statesPrefix)
	opts := &etcd.GetOptions{
		Recursive: true,
	}
	res, err := r.kAPI.Get(r.ctx(), key, opts)
	if err != nil && !isEtcdError(err, etcd.ErrorCodeKeyNotFound) {
		return nil, err
	}
	if res != nil {
		for _, dir := range res.Node.Nodes {
			_, name := path.Split(dir.Key)
			for _, node := range dir.Nodes {
				_, machID := path.Split(node.Key)
				var usm unitStateModel
				if err := unmarshal(node.Value, &usm); err != nil {
					log.Errorf("Error unmarshalling UnitState(%s) from Machine(%s): %v", name, machID, err)
					continue
				}
				us := modelToUnitState(&usm, name)
				if us != nil {
					key := MUSKey{name, machID}
					mus[key] = us
				}
			}
		}
	}
	return mus, nil
}
Beispiel #15
0
func (mr *machinesResource) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
	if req.Method != "GET" {
		sendError(rw, http.StatusBadRequest, fmt.Errorf("only HTTP GET supported against this resource"))
		return
	}

	token, err := findNextPageToken(req.URL)
	if err != nil {
		sendError(rw, http.StatusBadRequest, err)
		return
	}

	if token == nil {
		def := DefaultPageToken()
		token = &def
	}

	page, err := getMachinePage(mr.cAPI, *token)
	if err != nil {
		log.Errorf("Failed fetching page of Machines: %v", err)
		sendError(rw, http.StatusInternalServerError, nil)
		return
	}

	sendResponse(rw, http.StatusOK, page)
}
Beispiel #16
0
// statesByMUSKey returns a map of all UnitStates stored in the registry indexed by MUSKey
func (r *EtcdRegistry) statesByMUSKey() (map[MUSKey]*unit.UnitState, error) {
	mus := make(map[MUSKey]*unit.UnitState)
	req := etcd.Get{
		Key:       path.Join(r.keyPrefix, statesPrefix),
		Recursive: true,
	}
	res, err := r.etcd.Do(&req)
	if err != nil && !etcd.IsKeyNotFound(err) {
		return nil, err
	}
	if res != nil {
		for _, dir := range res.Node.Nodes {
			_, name := path.Split(dir.Key)
			for _, node := range dir.Nodes {
				_, machID := path.Split(node.Key)
				var usm unitStateModel
				if err := unmarshal(node.Value, &usm); err != nil {
					log.Errorf("Error unmarshalling UnitState(%s) from Machine(%s): %v", name, machID, err)
					continue
				}
				us := modelToUnitState(&usm, name)
				if us != nil {
					key := MUSKey{name, machID}
					mus[key] = us
				}
			}
		}
	}
	return mus, nil
}
Beispiel #17
0
func watch(kAPI etcd.KeysAPI, key string, stop chan struct{}) (res *etcd.Response) {
	for res == nil {
		select {
		case <-stop:
			log.Debugf("Gracefully closing etcd watch loop: key=%s", key)
			return
		default:
			opts := &etcd.WatcherOptions{
				AfterIndex: 0,
				Recursive:  true,
			}
			watcher := kAPI.Watcher(key, opts)
			log.Debugf("Creating etcd watcher: %s", key)

			var err error
			res, err = watcher.Next(context.Background())
			if err != nil {
				log.Errorf("etcd watcher %v returned error: %v", key, err)
			}
		}

		// Let's not slam the etcd server in the event that we know
		// an unexpected error occurred.
		time.Sleep(time.Second)
	}

	return
}
Beispiel #18
0
func watch(client etcd.Client, key string, stop chan struct{}) (res *etcd.Result) {
	for res == nil {
		select {
		case <-stop:
			log.V(1).Infof("Gracefully closing etcd watch loop: key=%s", key)
			return
		default:
			req := &etcd.Watch{
				Key:       key,
				WaitIndex: 0,
				Recursive: true,
			}

			log.V(1).Infof("Creating etcd watcher: %v", req)

			var err error
			res, err = client.Wait(req, stop)
			if err != nil {
				log.Errorf("etcd watcher %v returned error: %v", req, err)
			}
		}

		// Let's not slam the etcd server in the event that we know
		// an unexpected error occurred.
		time.Sleep(time.Second)
	}

	return
}
Beispiel #19
0
// stateByMUSKey returns a single UnitState stored in the registry indexed by MUSKey
// that matches with the given unit name
func (r *EtcdRegistry) stateByMUSKey(uName string) (*unit.UnitState, error) {
	key := r.prefixed(statesPrefix)
	opts := &etcd.GetOptions{
		Recursive: true,
	}
	res, err := r.kAPI.Get(context.Background(), key, opts)
	if err != nil && !isEtcdError(err, etcd.ErrorCodeKeyNotFound) {
		return nil, err
	}
	if res == nil {
		return nil, nil
	}

	for _, dir := range res.Node.Nodes {
		_, name := path.Split(dir.Key)
		if name != uName {
			continue
		}
		for _, node := range dir.Nodes {
			_, machID := path.Split(node.Key)
			var usm unitStateModel
			if err := unmarshal(node.Value, &usm); err != nil {
				log.Errorf("Error unmarshalling UnitState(%s) from Machine(%s): %v", name, machID, err)
				continue
			}
			us := modelToUnitState(&usm, name)
			if us != nil {
				return us, nil
			}
		}
	}
	return nil, nil
}
Beispiel #20
0
// sendResponse attempts to marshal an arbitrary thing to JSON then write
// it to the http.ResponseWriter
func sendResponse(rw http.ResponseWriter, code int, resp interface{}) {
	enc, err := json.Marshal(resp)
	if err != nil {
		log.Errorf("Failed JSON-encoding HTTP response: %v", err)
		rw.WriteHeader(http.StatusInternalServerError)
		return
	}

	rw.Header().Set("Content-Type", "application/json")
	rw.WriteHeader(code)

	_, err = rw.Write(enc)
	if err != nil {
		log.Errorf("Failed sending HTTP response body: %v", err)
	}
}
Beispiel #21
0
// Purge attempts to unload all Units that have been loaded locally
func (ar *AgentReconciler) Purge(a *Agent) {
	for {
		cAgentState, err := a.units()
		if err != nil {
			log.Errorf("Unable to determine agent's current state: %v", err)
			return
		}
		if len(cAgentState) == 0 {
			return
		}

		for name, _ := range cAgentState {
			t := task{
				typ:    taskTypeUnloadUnit,
				reason: taskReasonPurgingAgent,
			}
			u := &job.Unit{
				Name: name,
			}
			tc := newTaskChain(u, t)
			ar.launchTaskChain(tc, a)
		}

		time.Sleep(time.Second)
	}
}
Beispiel #22
0
func (e *Engine) rpcLeadership(leaseTTL time.Duration, machID string) lease.Lease {
	var previousEngine string
	if e.lease != nil {
		previousEngine = e.lease.MachineID()
	}

	var l lease.Lease
	if isLeader(e.lease, machID) {
		l = rpcRenewLeadership(e.lManager, e.lease, engineVersion, leaseTTL)
	} else {
		l = rpcAcquireLeadership(e.registry, e.lManager, machID, engineVersion, leaseTTL)
	}

	// log all leadership changes
	if l != nil && e.lease == nil && l.MachineID() != machID {
		log.Infof("Engine leader is %s", l.MachineID())
	} else if l != nil && e.lease != nil && l.MachineID() != e.lease.MachineID() {
		log.Infof("Engine leadership changed from %s to %s", e.lease.MachineID(), l.MachineID())
	}

	e.lease = l
	if e.lease != nil && previousEngine != e.lease.MachineID() {
		engineState, err := e.getMachineState(e.lease.MachineID())
		if err != nil {
			log.Errorf("Failed to get machine state for machine %s %v", e.lease.MachineID(), err)
		}
		if engineState != nil {
			log.Infof("Updating engine state... engineState: %v previous: %s lease: %v", engineState, previousEngine, e.lease)
			go e.updateEngineState(*engineState)
		}
	}

	return e.lease
}
Beispiel #23
0
// Purge attempts to unload all Jobs that have been loaded locally
func (ar *AgentReconciler) Purge(a *Agent) {
	for {
		cAgentState, err := currentAgentState(a)
		if err != nil {
			log.Errorf("Unable to determine agent's current state: %v", err)
			return
		}

		if len(cAgentState.Jobs) == 0 {
			return
		}

		for _, cJob := range cAgentState.Jobs {
			cJob := cJob
			t := task{
				typ:    taskTypeUnloadJob,
				reason: taskReasonPurgingAgent,
			}

			tc := newTaskChain(cJob, t)
			ar.launchTaskChain(tc, a)
		}

		time.Sleep(time.Second)
	}
}
Beispiel #24
0
func (m *systemdUnitManager) stopUnit(name string) {
	if stat, err := m.systemd.StopUnit(name, "replace"); err != nil {
		log.Errorf("Failed to stop systemd unit %s: %v", name, err)
	} else {
		log.Infof("Stopped systemd unit %s(%s)", name, stat)
	}
}
Beispiel #25
0
func (r *Reconciler) Reconcile(e *Engine, stop chan struct{}) {
	log.V(1).Infof("Polling Registry for actionable work")

	clust, err := e.clusterState()
	if err != nil {
		log.Errorf("Failed getting current cluster state: %v", err)
		return
	}

	for t := range r.calculateClusterTasks(clust, stop) {
		err = doTask(t, e)
		if err != nil {
			log.Errorf("Failed resolving task: task=%s err=%v", t, err)
		}
	}
}
Beispiel #26
0
// Purge attempts to unload all Units that have been loaded locally
func (ar *AgentReconciler) Purge(a *Agent) {
	for {
		cAgentState, err := a.units()
		if err != nil {
			log.Errorf("Unable to determine agent's current state: %v", err)
			return
		}
		if len(cAgentState) == 0 {
			return
		}

		var tasks []task
		for name, _ := range cAgentState {
			tasks = append(tasks, task{
				typ:    taskTypeUnloadUnit,
				reason: taskReasonPurgingAgent,
				unit: &job.Unit{
					Name: name,
				},
			})
		}

		ar.launchTasks(tasks, a)
		time.Sleep(time.Second)
	}
}
Beispiel #27
0
// Reconcile drives the local Agent's state towards the desired state
// stored in the Registry.
func (ar *AgentReconciler) Reconcile(a *Agent) {
	dAgentState, err := desiredAgentState(a, ar.reg)
	if err != nil {
		log.Errorf("Unable to determine agent's desired state: %v", err)
		return
	}

	cAgentState, err := a.units()
	if err != nil {
		log.Errorf("Unable to determine agent's current state: %v", err)
		return
	}

	tasks := ar.calculateTasksForUnits(dAgentState, cAgentState)
	ar.launchTasks(tasks, a)
}
Beispiel #28
0
func (sr *stateResource) list(rw http.ResponseWriter, req *http.Request) {
	token, err := findNextPageToken(req.URL, sr.tokenLimit)
	if err != nil {
		sendError(rw, http.StatusBadRequest, err)
		return
	}

	if token == nil {
		def := DefaultPageToken(sr.tokenLimit)
		token = &def
	}

	var machineID, unitName string
	for _, val := range req.URL.Query()["machineID"] {
		machineID = val
		break
	}
	for _, val := range req.URL.Query()["unitName"] {
		unitName = val
		break
	}

	page, err := getUnitStatePage(sr.cAPI, machineID, unitName, *token)
	if err != nil {
		log.Errorf("Failed fetching page of UnitStates: %v", err)
		sendError(rw, http.StatusInternalServerError, nil)
		return
	}

	sendResponse(rw, http.StatusOK, &page)
}
Beispiel #29
0
// Units lists all Units stored in the Registry, ordered by name. This includes both global and non-global units.
func (r *EtcdRegistry) Units() ([]job.Unit, error) {
	key := r.prefixed(jobPrefix)
	opts := &etcd.GetOptions{
		// We need Job Units to be sorted
		Sort:      true,
		Recursive: true,
	}
	res, err := r.kAPI.Get(context.Background(), key, opts)
	if err != nil {
		if isEtcdError(err, etcd.ErrorCodeKeyNotFound) {
			err = nil
		}
		return nil, err
	}

	// Fetch all units by hash recursively to avoid sending N requests to Etcd.
	hashToUnit, err := r.getAllUnitsHashMap()
	if err != nil {
		log.Errorf("failed fetching all Units from etcd: %v", err)
		return nil, err
	}
	unitHashLookupFunc := func(hash unit.Hash) *unit.UnitFile {
		stringHash := hash.String()
		unit, ok := hashToUnit[stringHash]
		if !ok {
			log.Errorf("did not find Unit %v in list of all units", stringHash)
			return nil
		}
		return unit
	}

	units := make([]job.Unit, 0)
	for _, dir := range res.Node.Nodes {
		u, err := r.dirToUnit(dir, unitHashLookupFunc)
		if err != nil {
			log.Errorf("Failed to parse Unit from etcd: %v", err)
			continue
		}
		if u == nil {
			continue
		}

		units = append(units, *u)
	}

	return units, nil
}
Beispiel #30
0
func acquireLeadership(lManager lease.Manager, machID string, ver int, ttl time.Duration) lease.Lease {
	existing, err := lManager.GetLease(engineLeaseName)
	if err != nil {
		log.Errorf("Unable to determine current lease: %v", err)
		return nil
	}

	var l lease.Lease
	if existing == nil {
		l, err = lManager.AcquireLease(engineLeaseName, machID, ver, ttl)
		if err != nil {
			log.Errorf("Engine leadership acquisition failed: %v", err)
			return nil
		} else if l == nil {
			log.Debugf("Unable to acquire engine leadership")
			return nil
		}
		log.Infof("Engine leadership acquired")
		metrics.ReportEngineLeader()
		return l
	}

	if existing.Version() >= ver {
		log.Debugf("Lease already held by Machine(%s) operating at acceptable version %d", existing.MachineID(), existing.Version())
		return existing
	}

	rem := existing.TimeRemaining()
	l, err = lManager.StealLease(engineLeaseName, machID, ver, ttl+rem, existing.Index())
	if err != nil {
		log.Errorf("Engine leadership steal failed: %v", err)
		return nil
	} else if l == nil {
		log.Debugf("Unable to steal engine leadership")
		return nil
	}

	log.Infof("Stole engine leadership from Machine(%s)", existing.MachineID())
	metrics.ReportEngineLeader()

	if rem > 0 {
		log.Infof("Waiting %v for previous lease to expire before continuing reconciliation", rem)
		<-time.After(rem)
	}

	return l
}