Example #1
0
// these parts of Create may require a retry
func (s *consulStore) innerCreate(manifest pods.Manifest, nodeSelector klabels.Selector, podLabels klabels.Set) (fields.RC, error) {
	id := fields.ID(uuid.New())
	rcp := kp.RCPath(id.String())
	rc := fields.RC{
		ID:              id,
		Manifest:        manifest,
		NodeSelector:    nodeSelector,
		PodLabels:       podLabels,
		ReplicasDesired: 0,
		Disabled:        false,
	}

	jsonRC, err := json.Marshal(rc)
	if err != nil {
		return fields.RC{}, err
	}
	success, _, err := s.kv.CAS(&api.KVPair{
		Key:   rcp,
		Value: jsonRC,
		// the chance of the UUID already existing is vanishingly small, but
		// technically not impossible, so we should use the CAS index to guard
		// against duplicate UUIDs
		ModifyIndex: 0,
	}, nil)

	if err != nil {
		return fields.RC{}, consulutil.NewKVError("cas", rcp, err)
	}
	if !success {
		return fields.RC{}, CASError(rcp)
	}
	return rc, nil
}
Example #2
0
// performs a safe (ie check-and-set) mutation of the rc with the given id,
// using the given function
// if the mutator returns an error, it will be propagated out
// if the returned RC has id="", then it will be deleted
func (s *consulStore) mutateRc(id fields.ID, mutator func(fields.RC) (fields.RC, error)) error {
	rcp := kp.RCPath(id.String())
	kvp, meta, err := s.kv.Get(rcp, nil)
	if err != nil {
		return err
	}
	if kvp == nil {
		return fmt.Errorf("replication controller %s does not exist", id)
	}

	rc, err := s.kvpToRC(kvp)
	if err != nil {
		return err
	}
	newKVP := &api.KVPair{
		Key:         rcp,
		ModifyIndex: meta.LastIndex,
	}

	var success bool
	newRC, err := mutator(rc)
	if err != nil {
		return err
	}
	if newRC.ID.String() == "" {
		// TODO: If this fails, then we have some dangling labels.
		// Perhaps they can be cleaned up later.
		// note that if the CAS fails afterwards, we will have still deleted
		// the labels, and then we will retry, which will involve deleting them
		// again
		// really the only way to solve this is a transaction
		err = s.applicator.RemoveAllLabels(labels.RC, id.String())
		if err != nil {
			return err
		}

		success, _, err = s.kv.DeleteCAS(newKVP, nil)
	} else {
		b, err := json.Marshal(newRC)
		if err != nil {
			return err
		}
		newKVP.Value = b
		success, _, err = s.kv.CAS(newKVP, nil)
	}

	if err != nil {
		return err
	}
	if !success {
		return CASError(rcp)
	}
	return nil
}
Example #3
0
func (s *consulStore) Get(id fields.ID) (fields.RC, error) {
	kvp, _, err := s.kv.Get(kp.RCPath(id.String()), nil)
	if err != nil {
		return fields.RC{}, err
	}
	if kvp == nil {
		// ID didn't exist
		return fields.RC{}, nil
	}
	return s.kvpToRC(kvp)
}
Example #4
0
File: farm.go Project: tomzhang/p2
// close one child
func (rcf *Farm) releaseChild(id fields.ID) {
	rcf.logger.WithField("rc", id).Infoln("Releasing replication controller")
	close(rcf.children[id].quit)
	delete(rcf.children, id)

	// if our lock is active, attempt to gracefully release it on this rc
	if rcf.lock != nil {
		err := rcf.lock.Unlock(kp.LockPath(kp.RCPath(id.String())))
		if err != nil {
			rcf.logger.WithField("rc", id).Warnln("Could not release replication controller lock")
		}
	}
}
Example #5
0
func (s *consulStore) Watch(rc *fields.RC, quit <-chan struct{}) (<-chan struct{}, <-chan error) {
	updated := make(chan struct{})
	errors := make(chan error)
	input := make(chan *api.KVPair)

	go consulutil.WatchSingle(kp.RCPath(rc.ID.String()), s.kv, input, quit, errors)

	go func() {
		defer close(updated)
		defer close(errors)

		for kvp := range input {
			if kvp == nil {
				// seems this RC got deleted from under us. quitting
				// would be unexpected, so we'll just wait for it to
				// reappear in consul
				continue
			}
			newRC, err := s.kvpToRC(kvp)
			if err != nil {
				select {
				case errors <- err:
				case <-quit:
				}
			} else {
				*rc = newRC
				select {
				case updated <- struct{}{}:
				case <-quit:
				}
			}
		}
	}()

	return updated, errors
}
Example #6
0
func (u update) lockPath(id rcf.ID) string {
	// RUs want to lock the RCs they're mutating, but this lock is separate
	// from the RC lock (which is held by the rc.WatchDesires goroutine), so the
	// key being locked is different
	return kp.LockPath(kp.RCPath(id.String(), "update"))
}
Example #7
0
File: farm.go Project: tomzhang/p2
// Start is a blocking function that monitors Consul for replication controllers.
// The Farm will attempt to claim replication controllers as they appear and,
// if successful, will start goroutines for those replication controllers to do
// their job. Closing the quit channel will cause this function to return,
// releasing all locks it holds.
//
// Start is not safe for concurrent execution. Do not execute multiple
// concurrent instances of Start.
func (rcf *Farm) Start(quit <-chan struct{}) {
	subQuit := make(chan struct{})
	defer close(subQuit)
	rcWatch, rcErr := rcf.rcStore.WatchNew(subQuit)

START_LOOP:
	for {
		select {
		case <-quit:
			rcf.logger.NoFields().Infoln("Halt requested, releasing replication controllers")
			rcf.releaseChildren()
			return
		case session := <-rcf.sessions:
			if session == "" {
				// our session has expired, we must assume our locked children
				// have all been released and that someone else may have
				// claimed them by now
				rcf.logger.NoFields().Errorln("Session expired, releasing replication controllers")
				rcf.lock = nil
				rcf.releaseChildren()
			} else {
				// a new session has been acquired - only happens after an
				// expiration message, so len(children)==0
				rcf.logger.WithField("session", session).Infoln("Acquired new session")
				lock := rcf.kpStore.NewUnmanagedLock(session, "")
				rcf.lock = &lock
				// TODO: restart the watch so that you get updates right away?
			}
		case err := <-rcErr:
			rcf.logger.WithError(err).Errorln("Could not read consul replication controllers")
		case rcFields := <-rcWatch:
			rcf.logger.WithField("n", len(rcFields)).Debugln("Received replication controller update")
			if rcf.lock == nil {
				// we can't claim new nodes because our session is invalidated.
				// raise an error and ignore this update
				rcf.logger.NoFields().Warnln("Received replication controller update, but do not have session to acquire locks")
				continue
			}

			// track which children were found in the returned set
			foundChildren := make(map[fields.ID]struct{})
			for _, rcField := range rcFields {
				rcLogger := rcf.logger.SubLogger(logrus.Fields{
					"rc":  rcField.ID,
					"pod": rcField.Manifest.ID(),
				})
				if _, ok := rcf.children[rcField.ID]; ok {
					// this one is already ours, skip
					rcLogger.NoFields().Debugln("Got replication controller already owned by self")
					foundChildren[rcField.ID] = struct{}{}
					continue
				}

				err := rcf.lock.Lock(kp.LockPath(kp.RCPath(rcField.ID.String())))
				if _, ok := err.(kp.AlreadyLockedError); ok {
					// someone else must have gotten it first - log and move to
					// the next one
					rcLogger.NoFields().Debugln("Lock on replication controller was denied")
					continue
				} else if err != nil {
					rcLogger.NoFields().Errorln("Got error while locking replication controller - session may be expired")
					// stop processing this update and go back to the select
					// chances are this error is a network problem or session
					// expiry, and all the others in this update would also fail
					continue START_LOOP
				}

				// at this point the rc is ours, time to spin it up
				rcLogger.NoFields().Infoln("Acquired lock on new replication controller, spawning")

				newChild := New(
					rcField,
					rcf.kpStore,
					rcf.rcStore,
					rcf.scheduler,
					rcf.labeler,
					rcLogger,
				)
				childQuit := make(chan struct{})
				rcf.children[rcField.ID] = childRC{rc: newChild, quit: childQuit}
				foundChildren[rcField.ID] = struct{}{}

				go func() {
					// disabled-ness is handled in watchdesires
					for err := range newChild.WatchDesires(childQuit) {
						rcLogger.WithError(err).Errorln("Got error in replication controller loop")
					}
				}()
			}

			// now remove any children that were not found in the result set
			rcf.logger.NoFields().Debugln("Pruning replication controllers that have disappeared")
			for id := range rcf.children {
				if _, ok := foundChildren[id]; !ok {
					rcf.releaseChild(id)
				}
			}
		}
	}
}