Пример #1
0
func (s *pingerSuite) calculatePingTimeout(c *gc.C) time.Duration {
	// Try opening an API connection a few times and take the max
	// delay among the attempts.
	attempt := utils.AttemptStrategy{
		Delay: coretesting.ShortWait,
		Min:   3,
	}
	var maxTimeout time.Duration
	for a := attempt.Start(); a.Next(); {
		openStart := time.Now()
		st, _ := s.OpenAPIAsNewMachine(c)
		err := st.Ping()
		if c.Check(err, jc.ErrorIsNil) {
			openDelay := time.Since(openStart)
			c.Logf("API open and initial ping took %v", openDelay)
			if maxTimeout < openDelay {
				maxTimeout = openDelay
			}
		}
		if st != nil {
			c.Check(st.Close(), jc.ErrorIsNil)
		}
	}
	if !c.Failed() && maxTimeout > 0 {
		return maxTimeout
	}
	c.Fatalf("cannot calculate ping timeout")
	return 0
}
Пример #2
0
// waitForAgentInitialisation polls the bootstrapped state server with a read-only
// command which will fail until the state server is fully initialised.
// TODO(wallyworld) - add a bespoke command to maybe the admin facade for this purpose.
func (c *BootstrapCommand) waitForAgentInitialisation(ctx *cmd.Context) (err error) {
	attempts := utils.AttemptStrategy{
		Min:   bootstrapReadyPollCount,
		Delay: bootstrapReadyPollDelay,
	}
	var client block.BlockListAPI
	for attempt := attempts.Start(); attempt.Next(); {
		client, err = blockAPI(&c.EnvCommandBase)
		if err != nil {
			return err
		}
		_, err = client.List()
		client.Close()
		if err == nil {
			ctx.Infof("Bootstrap complete")
			return nil
		}
		if strings.Contains(err.Error(), apiserver.UpgradeInProgressError.Error()) {
			ctx.Infof("Waiting for API to become available")
			continue
		}
		return err
	}
	return err
}
Пример #3
0
func createWebsocketDialer(cfg *websocket.Config, opts DialOpts) func(<-chan struct{}) (io.Closer, error) {
	openAttempt := utils.AttemptStrategy{
		Total: opts.Timeout,
		Delay: opts.RetryDelay,
	}
	return func(stop <-chan struct{}) (io.Closer, error) {
		for a := openAttempt.Start(); a.Next(); {
			select {
			case <-stop:
				return nil, parallel.ErrStopped
			default:
			}
			logger.Infof("dialing %q", cfg.Location)
			conn, err := websocket.DialConfig(cfg)
			if err == nil {
				return conn, nil
			}
			if a.HasNext() {
				logger.Debugf("error dialing %q, will retry: %v", cfg.Location, err)
			} else {
				logger.Infof("error dialing %q: %v", cfg.Location, err)
				return nil, errors.Errorf("unable to connect to %q", cfg.Location)
			}
		}
		panic("unreachable")
	}
}
Пример #4
0
// newWebsocketDialer0 returns a function that dials the websocket represented
// by the given configuration with the given dial options, suitable for passing
// to utils/parallel.Try.Start.
func newWebsocketDialer(cfg *websocket.Config, opts DialOpts) func(<-chan struct{}) (io.Closer, error) {
	// TODO(katco): 2016-08-09: lp:1611427
	openAttempt := utils.AttemptStrategy{
		Total: opts.Timeout,
		Delay: opts.RetryDelay,
	}
	return func(stop <-chan struct{}) (io.Closer, error) {
		for a := openAttempt.Start(); a.Next(); {
			select {
			case <-stop:
				return nil, parallel.ErrStopped
			default:
			}
			logger.Infof("dialing %q", cfg.Location)
			conn, err := opts.DialWebsocket(cfg)
			if err == nil {
				return conn, nil
			}
			if !a.HasNext() || isX509Error(err) {
				// We won't reconnect when there's an X509 error
				// because we're not going to succeed if we retry
				// in that case.
				logger.Infof("error dialing %q: %v", cfg.Location, err)
				return nil, errors.Annotatef(err, "unable to connect to API")
			}
		}
		panic("unreachable")
	}
}
Пример #5
0
// waitOperation waits for the provided operation to reach the "done"
// status. It follows the given attempt strategy (e.g. wait time between
// attempts) and may time out.
func (rc *rawConn) waitOperation(projectID string, op *compute.Operation, attempts utils.AttemptStrategy) error {
	started := time.Now()
	logger.Infof("GCE operation %q, waiting...", op.Name)
	for a := attempts.Start(); a.Next(); {
		if op.Status == StatusDone {
			break
		}

		var err error
		op, err = rc.checkOperation(projectID, op)
		if err != nil {
			return errors.Trace(err)
		}
	}
	if op.Status != StatusDone {
		err := errors.Errorf("timed out after %d seconds", time.Now().Sub(started)/time.Second)
		return waitError{op, err}
	}
	if op.Error != nil {
		for _, err := range op.Error.Errors {
			logger.Errorf("GCE operation error: (%s) %s", err.Code, err.Message)
		}
		return waitError{op, nil}
	}

	logger.Infof("GCE operation %q finished", op.Name)
	return nil
}
Пример #6
0
func checkFileHasContents(c *gc.C, stor storage.StorageReader, name string, contents []byte, attempt utils.AttemptStrategy) {
	r, err := storage.GetWithRetry(stor, name, attempt)
	c.Assert(err, gc.IsNil)
	c.Check(r, gc.NotNil)
	defer r.Close()

	data, err := ioutil.ReadAll(r)
	c.Check(err, gc.IsNil)
	c.Check(data, gc.DeepEquals, contents)

	url, err := stor.URL(name)
	c.Assert(err, gc.IsNil)

	var resp *http.Response
	for a := attempt.Start(); a.Next(); {
		resp, err = utils.GetValidatingHTTPClient().Get(url)
		c.Assert(err, gc.IsNil)
		if resp.StatusCode != 404 {
			break
		}
		c.Logf("get retrying after earlier get succeeded. *sigh*.")
	}
	c.Assert(err, gc.IsNil)
	data, err = ioutil.ReadAll(resp.Body)
	c.Assert(err, gc.IsNil)
	defer resp.Body.Close()
	c.Assert(resp.StatusCode, gc.Equals, 200, gc.Commentf("error response: %s", data))
	c.Check(data, gc.DeepEquals, contents)
}
Пример #7
0
func (c *restoreCommand) Run(ctx *cmd.Context) error {
	if c.showDescription {
		fmt.Fprintf(ctx.Stdout, "%s\n", c.Info().Purpose)
		return nil
	}
	if err := c.Log.Start(ctx); err != nil {
		return err
	}
	agentConf, err := extractConfig(c.backupFile)
	if err != nil {
		return errors.Annotate(err, "cannot extract configuration from backup file")
	}
	progress("extracted credentials from backup file")
	store, err := configstore.Default()
	if err != nil {
		return err
	}
	cfg, err := c.Config(store)
	if err != nil {
		return err
	}
	env, err := rebootstrap(cfg, ctx, c.Constraints)
	if err != nil {
		return errors.Annotate(err, "cannot re-bootstrap environment")
	}
	progress("connecting to newly bootstrapped instance")
	var apiState *api.State
	// The state server backend may not be ready to accept logins so we retry.
	// We'll do up to 8 retries over 2 minutes to give the server time to come up.
	// Typically we expect only 1 retry will be needed.
	attempt := utils.AttemptStrategy{Delay: 15 * time.Second, Min: 8}
	for a := attempt.Start(); a.Next(); {
		apiState, err = juju.NewAPIState(env, api.DefaultDialOpts())
		if err == nil || errors.Cause(err).Error() != "EOF" {
			break
		}
		progress("bootstrapped instance not ready - attempting to redial")
	}
	if err != nil {
		return errors.Annotate(err, "cannot connect to bootstrap instance")
	}
	progress("restoring bootstrap machine")
	machine0Addr, err := restoreBootstrapMachine(apiState, c.backupFile, agentConf)
	if err != nil {
		return errors.Annotate(err, "cannot restore bootstrap machine")
	}
	progress("restored bootstrap machine")

	apiState, err = juju.NewAPIState(env, api.DefaultDialOpts())
	progress("opening state")
	if err != nil {
		return errors.Annotate(err, "cannot connect to api server")
	}
	progress("updating all machines")
	if err := updateAllMachines(apiState, machine0Addr); err != nil {
		return errors.Annotate(err, "cannot update machines")
	}
	return nil
}
Пример #8
0
// GetWithRetry gets the named file from stor using the specified attempt strategy.
func GetWithRetry(stor StorageReader, name string, attempt utils.AttemptStrategy) (r io.ReadCloser, err error) {
	for a := attempt.Start(); a.Next(); {
		r, err = stor.Get(name)
		if err == nil || !stor.ShouldRetry(err) {
			break
		}
	}
	return r, err
}
Пример #9
0
// ListWithRetry lists the files matching prefix from stor using the specified attempt strategy.
func ListWithRetry(stor StorageReader, prefix string, attempt utils.AttemptStrategy) (list []string, err error) {
	for a := attempt.Start(); a.Next(); {
		list, err = stor.List(prefix)
		if err == nil || !stor.ShouldRetry(err) {
			break
		}
	}
	return list, err
}
Пример #10
0
// WaitForAgentInitialisation polls the bootstrapped controller with a read-only
// command which will fail until the controller is fully initialised.
// TODO(wallyworld) - add a bespoke command to maybe the admin facade for this purpose.
func WaitForAgentInitialisation(ctx *cmd.Context, c *modelcmd.ModelCommandBase, controllerName, hostedModelName string) error {
	// TODO(katco): 2016-08-09: lp:1611427
	attempts := utils.AttemptStrategy{
		Min:   bootstrapReadyPollCount,
		Delay: bootstrapReadyPollDelay,
	}
	var (
		apiAttempts int
		err         error
	)

	// Make a best effort to find the new controller address so we can print it.
	addressInfo := ""
	controller, err := c.ClientStore().ControllerByName(controllerName)
	if err == nil && len(controller.APIEndpoints) > 0 {
		addr, err := network.ParseHostPort(controller.APIEndpoints[0])
		if err == nil {
			addressInfo = fmt.Sprintf(" at %s", addr.Address.Value)
		}
	}

	ctx.Infof("Contacting Juju controller%s to verify accessibility...", addressInfo)
	apiAttempts = 1
	for attempt := attempts.Start(); attempt.Next(); apiAttempts++ {
		err = tryAPI(c)
		if err == nil {
			ctx.Infof("Bootstrap complete, %q controller now available.", controllerName)
			ctx.Infof("Controller machines are in the %q model.", bootstrap.ControllerModelName)
			ctx.Infof("Initial model %q added.", hostedModelName)
			break
		}
		// As the API server is coming up, it goes through a number of steps.
		// Initially the upgrade steps run, but the api server allows some
		// calls to be processed during the upgrade, but not the list blocks.
		// Logins are also blocked during space discovery.
		// It is also possible that the underlying database causes connections
		// to be dropped as it is initialising, or reconfiguring. These can
		// lead to EOF or "connection is shut down" error messages. We skip
		// these too, hoping that things come back up before the end of the
		// retry poll count.
		errorMessage := errors.Cause(err).Error()
		switch {
		case errors.Cause(err) == io.EOF,
			strings.HasSuffix(errorMessage, "connection is shut down"),
			strings.HasSuffix(errorMessage, "no api connection available"),
			strings.Contains(errorMessage, "spaces are still being discovered"):
			ctx.Verbosef("Still waiting for API to become available")
			continue
		case params.ErrCode(err) == params.CodeUpgradeInProgress:
			ctx.Verbosef("Still waiting for API to become available: %v", err)
			continue
		}
		break
	}
	return errors.Annotatef(err, "unable to contact api server after %d attempts", apiAttempts)
}
Пример #11
0
func (s *pingerSuite) TestAgentConnectionDelaysShutdownWithPing(c *gc.C) {
	// To negate the effects of an underpowered or heavily loaded
	// machine running this test, tune the shortTimeout based on the
	// maximum duration it takes to open an API connection.
	shortTimeout := s.calculatePingTimeout(c)
	attemptDelay := shortTimeout / 4

	s.PatchValue(apiserver.MaxClientPingInterval, time.Duration(shortTimeout))

	st, _ := s.OpenAPIAsNewMachine(c)
	err := st.Ping()
	c.Assert(err, jc.ErrorIsNil)
	defer st.Close()

	// As long as we don't wait too long, the connection stays open
	attempt := utils.AttemptStrategy{
		Min:   10,
		Delay: attemptDelay,
	}
	testStart := time.Now()
	c.Logf(
		"pinging %d times with %v delay, ping timeout %v, starting at %v",
		attempt.Min, attempt.Delay, shortTimeout, testStart,
	)
	var lastLoop time.Time
	for a := attempt.Start(); a.Next(); {
		testNow := time.Now()
		loopDelta := testNow.Sub(lastLoop)
		if lastLoop.IsZero() {
			loopDelta = 0
		}
		c.Logf("duration since last ping: %v", loopDelta)
		err = st.Ping()
		if !c.Check(
			err, jc.ErrorIsNil,
			gc.Commentf(
				"ping timeout exceeded at %v (%v since the test start)",
				testNow, testNow.Sub(testStart),
			),
		) {
			c.Check(err, gc.ErrorMatches, "connection is shut down")
			return
		}
		lastLoop = time.Now()
	}

	// However, once we stop pinging for too long, the connection dies
	time.Sleep(2 * shortTimeout) // Exceed the timeout.
	err = st.Ping()
	c.Assert(err, gc.ErrorMatches, "connection is shut down")
}
Пример #12
0
func checkConnectionDies(c *gc.C, conn api.Connection) {
	attempt := utils.AttemptStrategy{
		Total: coretesting.LongWait,
		Delay: coretesting.ShortWait,
	}
	for a := attempt.Start(); a.Next(); {
		err := pingConn(conn)
		if err != nil {
			c.Assert(err, gc.ErrorMatches, "connection is shut down")
			return
		}
	}
	c.Fatal("connection didn't get shut down")
}
Пример #13
0
func attemptLoop(c *gc.C, strategy utils.AttemptStrategy, desc string, f func() error) {
	var err error
	start := time.Now()
	attemptCount := 0
	for attempt := strategy.Start(); attempt.Next(); {
		attemptCount += 1
		if err = f(); err == nil || !attempt.HasNext() {
			break
		}
		c.Logf("%s failed: %v", desc, err)
	}
	c.Logf("%s: %d attempts in %s", desc, attemptCount, time.Since(start))
	c.Assert(err, gc.IsNil)
}
Пример #14
0
// networkOperationWithRetries calls the supplied function and if it returns a
// network error which is temporary, will retry a number of times before giving up.
func networkOperationWithRetries(strategy utils.AttemptStrategy, networkOp func() error, description string) func() error {
	return func() error {
		for a := strategy.Start(); ; {
			a.Next()
			err := networkOp()
			if !a.HasNext() || err == nil {
				return errors.Trace(err)
			}
			if networkErr, ok := errors.Cause(err).(net.Error); !ok || !networkErr.Temporary() {
				return errors.Trace(err)
			}
			logger.Debugf("%q error, will retry: %v", description, err)
		}
	}
}
Пример #15
0
func (v *ebsVolumeSource) waitVolumeCreated(volumeId string) (*ec2.Volume, error) {
	var attempt = utils.AttemptStrategy{
		Total: 5 * time.Second,
		Delay: 200 * time.Millisecond,
	}
	for a := attempt.Start(); a.Next(); {
		volume, err := v.describeVolume(volumeId)
		if err != nil {
			return nil, errors.Trace(err)
		}
		if volume.Status != volumeStatusCreating {
			return volume, nil
		}
	}
	return nil, errors.Errorf("timed out waiting for volume %v to become available", volumeId)
}
Пример #16
0
func assertStateBecomesClosed(c *gc.C, st *state.State) {
	// This is gross but I can't see any other way to check for
	// closedness outside the state package.
	checkModel := func() {
		attempt := utils.AttemptStrategy{
			Total: coretesting.LongWait,
			Delay: coretesting.ShortWait,
		}
		for a := attempt.Start(); a.Next(); {
			// This will panic once the state is closed.
			_, _ = st.Model()
		}
		// If we got here then st is still open.
		st.Close()
	}
	c.Assert(checkModel, gc.PanicMatches, "Session already closed")
}
Пример #17
0
// It appears that sometimes the lock is not cleared when we expect it to be.
// Capture and log any errors from the Unlock method and retry a few times.
func unlockEnvironmentLock(lock *fslock.Lock) {
	attempts := utils.AttemptStrategy{
		Delay: 50 * time.Millisecond,
		Min:   10,
	}
	var err error
	for a := attempts.Start(); a.Next(); {
		err = lock.Unlock()
		if err == nil {
			return
		}
		if a.HasNext() {
			logger.Debugf("failed to unlock configstore lock: %s, retrying", err)
		}
	}
	logger.Errorf("unable to unlock configstore lock: %s", err)
}
Пример #18
0
func (s *mongoPingerSuite) TestAgentConnectionsShutDownWhenStateDies(c *gc.C) {
	st, _ := s.OpenAPIAsNewMachine(c)
	err := st.Ping()
	c.Assert(err, jc.ErrorIsNil)
	gitjujutesting.MgoServer.Destroy()

	attempt := utils.AttemptStrategy{
		Total: coretesting.LongWait,
		Delay: coretesting.ShortWait,
	}
	for a := attempt.Start(); a.Next(); {
		if err := st.Ping(); err != nil {
			c.Assert(err, gc.ErrorMatches, "connection is shut down")
			return
		}
	}
	c.Fatalf("timed out waiting for API server to die")
}
Пример #19
0
// GetInstall runs 'apt-get install packages' for the packages listed
// here. apt-get install calls are retried for 30 times with a 10
// second sleep between attempts.
func GetInstall(packages ...string) error {
	cmdArgs := append([]string(nil), getCommand...)
	cmdArgs = append(cmdArgs, "install")
	cmdArgs = append(cmdArgs, packages...)
	logger.Infof("Running: %s", cmdArgs)
	cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...)
	cmd.Env = append(os.Environ(), getEnvOptions...)

	var err error
	var out []byte
	// Retry APT operations for 30 times, sleeping 10 seconds
	// between attempts. This avoids failure in the case of
	// something else having the dpkg lock (e.g. a charm on the
	// machine we're deploying containers to).
	attempt := utils.AttemptStrategy{Delay: 10 * time.Second, Min: 30}
	for a := attempt.Start(); a.Next(); {
		out, err = CommandOutput(cmd)
		if err == nil {
			return nil
		}
		exitError, ok := err.(*exec.ExitError)
		if !ok {
			err = fmt.Errorf("unexpected error type %T", err)
			break
		}
		waitStatus, ok := exitError.ProcessState.Sys().(syscall.WaitStatus)
		if !ok {
			err = fmt.Errorf("unexpected process state type %T", exitError.ProcessState.Sys())
			break
		}
		// From apt-get(8) "apt-get returns zero on normal
		// operation, decimal 100 on error."
		if waitStatus.ExitStatus() != 100 {
			break
		}
	}
	if err != nil {
		logger.Errorf("apt-get command failed: %v\nargs: %#v\n%s",
			err, cmdArgs, string(out))
		return fmt.Errorf("apt-get failed: %v", err)
	}
	return nil
}
Пример #20
0
func (v *ebsVolumeSource) waitVolume(
	volumeId string,
	attempt utils.AttemptStrategy,
	pred func(v *ec2.Volume) (bool, error),
) (*ec2.Volume, error) {
	for a := attempt.Start(); a.Next(); {
		volume, err := v.describeVolume(volumeId)
		if err != nil {
			return nil, errors.Trace(err)
		}
		ok, err := pred(volume)
		if err != nil {
			return nil, errors.Trace(err)
		}
		if ok {
			return volume, nil
		}
	}
	return nil, errWaitVolumeTimeout
}
Пример #21
0
// waitForAgentInitialisation polls the bootstrapped controller with a read-only
// command which will fail until the controller is fully initialised.
// TODO(wallyworld) - add a bespoke command to maybe the admin facade for this purpose.
func (c *bootstrapCommand) waitForAgentInitialisation(ctx *cmd.Context) (err error) {
	attempts := utils.AttemptStrategy{
		Min:   bootstrapReadyPollCount,
		Delay: bootstrapReadyPollDelay,
	}
	var client block.BlockListAPI
	for attempt := attempts.Start(); attempt.Next(); {
		client, err = blockAPI(&c.ModelCommandBase)
		if err != nil {
			// Logins are prevented whilst space discovery is ongoing.
			errorMessage := err.Error()
			if strings.Contains(errorMessage, "space discovery still in progress") {
				continue
			}
			return err
		}
		_, err = client.List()
		client.Close()
		if err == nil {
			ctx.Infof("Bootstrap complete")
			return nil
		}
		// As the API server is coming up, it goes through a number of steps.
		// Initially the upgrade steps run, but the api server allows some
		// calls to be processed during the upgrade, but not the list blocks.
		// It is also possible that the underlying database causes connections
		// to be dropped as it is initialising, or reconfiguring. These can
		// lead to EOF or "connection is shut down" error messages. We skip
		// these too, hoping that things come back up before the end of the
		// retry poll count.
		errorMessage := err.Error()
		if strings.Contains(errorMessage, apiserver.UpgradeInProgressError.Error()) ||
			strings.HasSuffix(errorMessage, "EOF") ||
			strings.HasSuffix(errorMessage, "connection is shut down") {
			ctx.Infof("Waiting for API to become available")
			continue
		}
		return err
	}
	return err
}
Пример #22
0
// waitForAgentInitialisation polls the bootstrapped controller with a read-only
// command which will fail until the controller is fully initialised.
// TODO(wallyworld) - add a bespoke command to maybe the admin facade for this purpose.
func (c *bootstrapCommand) waitForAgentInitialisation(ctx *cmd.Context) error {
	attempts := utils.AttemptStrategy{
		Min:   bootstrapReadyPollCount,
		Delay: bootstrapReadyPollDelay,
	}
	var (
		apiAttempts int
		err         error
	)

	apiAttempts = 1
	for attempt := attempts.Start(); attempt.Next(); apiAttempts++ {
		err = c.tryAPI()
		if err == nil {
			ctx.Infof("Bootstrap complete, %s now available.", c.controllerName)
			break
		}
		// As the API server is coming up, it goes through a number of steps.
		// Initially the upgrade steps run, but the api server allows some
		// calls to be processed during the upgrade, but not the list blocks.
		// Logins are also blocked during space discovery.
		// It is also possible that the underlying database causes connections
		// to be dropped as it is initialising, or reconfiguring. These can
		// lead to EOF or "connection is shut down" error messages. We skip
		// these too, hoping that things come back up before the end of the
		// retry poll count.
		errorMessage := errors.Cause(err).Error()
		switch {
		case errors.Cause(err) == io.EOF,
			strings.HasSuffix(errorMessage, "connection is shut down"),
			strings.Contains(errorMessage, "spaces are still being discovered"):
			ctx.Infof("Waiting for API to become available")
			continue
		case params.ErrCode(err) == params.CodeUpgradeInProgress:
			ctx.Infof("Waiting for API to become available: %v", err)
			continue
		}
		break
	}
	return errors.Annotatef(err, "unable to contact api server after %d attempts", apiAttempts)
}
Пример #23
0
func (*utilsSuite) TestAttemptTiming(c *gc.C) {
	testAttempt := utils.AttemptStrategy{
		Total: 0.25e9,
		Delay: 0.1e9,
	}
	want := []time.Duration{0, 0.1e9, 0.2e9, 0.2e9}
	got := make([]time.Duration, 0, len(want)) // avoid allocation when testing timing
	t0 := time.Now()
	for a := testAttempt.Start(); a.Next(); {
		got = append(got, time.Now().Sub(t0))
	}
	got = append(got, time.Now().Sub(t0))
	c.Assert(got, gc.HasLen, len(want))
	const margin = 0.01e9
	for i, got := range want {
		lo := want[i] - margin
		hi := want[i] + margin
		if got < lo || got > hi {
			c.Errorf("attempt %d want %g got %g", i, want[i].Seconds(), got.Seconds())
		}
	}
}
Пример #24
0
// newStateConnection tries to connect to the newly restored state server.
func newStateConnection(environTag names.EnvironTag, info *mongo.MongoInfo) (*state.State, error) {
	// We need to retry here to allow mongo to come up on the restored state server.
	// The connection might succeed due to the mongo dial retries but there may still
	// be a problem issuing database commands.
	var (
		st  *state.State
		err error
	)
	const (
		newStateConnDelay       = 15 * time.Second
		newStateConnMinAttempts = 8
	)
	attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts}
	for a := attempt.Start(); a.Next(); {
		st, err = state.Open(environTag, info, mongoDefaultDialOpts(), environsNewStatePolicy())
		if err == nil {
			return st, nil
		}
		logger.Errorf("cannot open state, retrying: %v", err)
	}
	return st, errors.Annotate(err, "cannot open state")
}
Пример #25
0
func ExampleAttempt_HasNext() {
	// This example shows how Attempt.HasNext can be used to help
	// structure an attempt loop. If the godoc example code allowed
	// us to make the example return an error, we would uncomment
	// the commented return statements.
	attempts := utils.AttemptStrategy{
		Total: 1 * time.Second,
		Delay: 250 * time.Millisecond,
	}
	for attempt := attempts.Start(); attempt.Next(); {
		x, err := doSomething()
		if shouldRetry(err) && attempt.HasNext() {
			continue
		}
		if err != nil {
			// return err
			return
		}
		doSomethingWith(x)
	}
	// return ErrTimedOut
	return
}
Пример #26
0
// newStateConnection tries to connect to the newly restored controller.
func newStateConnection(controllerTag names.ControllerTag, modelTag names.ModelTag, info *mongo.MongoInfo) (*state.State, error) {
	// We need to retry here to allow mongo to come up on the restored controller.
	// The connection might succeed due to the mongo dial retries but there may still
	// be a problem issuing database commands.
	var (
		st  *state.State
		err error
	)
	const (
		newStateConnDelay       = 15 * time.Second
		newStateConnMinAttempts = 8
	)
	// TODO(katco): 2016-08-09: lp:1611427
	attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts}
	getEnviron := stateenvirons.GetNewEnvironFunc(environs.New)
	for a := attempt.Start(); a.Next(); {
		st, err = state.Open(modelTag, controllerTag, info, mongoDefaultDialOpts(), environsGetNewPolicyFunc(getEnviron))
		if err == nil {
			return st, nil
		}
		logger.Errorf("cannot open state, retrying: %v", err)
	}
	return st, errors.Annotate(err, "cannot open state")
}
Пример #27
0
func (s *MongoSuite) TestCurrentStatus(c *gc.C) {
	session := s.root.MustDial()
	defer session.Close()

	inst1 := newServer(c)
	defer inst1.Destroy()
	defer Remove(session, inst1.Addr())

	inst2 := newServer(c)
	defer inst2.Destroy()
	defer Remove(session, inst2.Addr())

	var err error
	strategy := utils.AttemptStrategy{Total: time.Minute * 2, Delay: time.Millisecond * 500}
	attempt := strategy.Start()
	for attempt.Next() {
		err = Add(session, Member{Address: inst1.Addr()}, Member{Address: inst2.Addr()})
		if err == nil || !attempt.HasNext() {
			break
		}
	}
	c.Assert(err, gc.IsNil)

	expected := &Status{
		Name: rsName,
		Members: []MemberStatus{{
			Id:      1,
			Address: s.root.Addr(),
			Self:    true,
			ErrMsg:  "",
			Healthy: true,
			State:   PrimaryState,
		}, {
			Id:      2,
			Address: inst1.Addr(),
			Self:    false,
			ErrMsg:  "",
			Healthy: true,
			State:   SecondaryState,
		}, {
			Id:      3,
			Address: inst2.Addr(),
			Self:    false,
			ErrMsg:  "",
			Healthy: true,
			State:   SecondaryState,
		}},
	}

	strategy.Total = time.Second * 90
	attempt = strategy.Start()
	var res *Status
	for attempt.Next() {
		var err error
		res, err = CurrentStatus(session)
		if err != nil {
			if !attempt.HasNext() {
				c.Errorf("Couldn't get status before timeout, got err: %v", err)
				return
			} else {
				// try again
				continue
			}
		}

		if res.Members[0].State == PrimaryState &&
			res.Members[1].State == SecondaryState &&
			res.Members[2].State == SecondaryState {
			break
		}
		if !attempt.HasNext() {
			c.Errorf("Servers did not get into final state before timeout.  Status: %#v", res)
			return
		}
	}

	for x, _ := range res.Members {
		// non-empty uptime and ping
		c.Check(res.Members[x].Uptime, gc.Not(gc.Equals), 0)

		// ping is always going to be zero since we're on localhost
		// so we can't really test it right now

		// now overwrite Uptime so it won't throw off DeepEquals
		res.Members[x].Uptime = 0
	}
	c.Check(res, jc.DeepEquals, expected)
}
Пример #28
0
	"fmt"
	"os"
	"os/exec"
	"strings"
	"time"

	"github.com/juju/errors"
	"github.com/juju/loggo"

	"github.com/juju/utils"
)

var (
	logger = loggo.GetLogger("juju.utils.packaging.manager")

	AttemptStrategy = utils.AttemptStrategy{
		Delay: 10 * time.Second,
		Min:   30,
	}
)

// CommandOutput is cmd.Output. It was aliased for testing purposes.
var CommandOutput = (*exec.Cmd).CombinedOutput

// processStateSys is ps.Sys. It was aliased for testing purposes.
var ProcessStateSys = (*os.ProcessState).Sys

// RunCommand is utils.RunCommand. It was aliased for testing purposes.
var RunCommand = utils.RunCommand

// exitStatuser is a mini-interface for the ExitStatus() method.
type exitStatuser interface {
Пример #29
0
func (c *restoreCommand) Run(ctx *cmd.Context) error {
	if c.showDescription {
		fmt.Fprintf(ctx.Stdout, "%s\n", c.Info().Purpose)
		return nil
	}
	if err := c.Log.Start(ctx); err != nil {
		return err
	}
	agentConf, err := extractConfig(c.backupFile)
	if err != nil {
		return errors.Annotate(err, "cannot extract configuration from backup file")
	}
	progress("extracted credentials from backup file")
	store, err := configstore.Default()
	if err != nil {
		return err
	}
	cfg, err := c.Config(store, nil)
	if err != nil {
		return err
	}
	env, err := rebootstrap(cfg, ctx, c.Constraints)
	if err != nil {
		return errors.Annotate(err, "cannot re-bootstrap environment")
	}
	progress("connecting to newly bootstrapped instance")
	var apiState api.Connection
	// The state server backend may not be ready to accept logins so we retry.
	// We'll do up to 8 retries over 2 minutes to give the server time to come up.
	// Typically we expect only 1 retry will be needed.
	attempt := utils.AttemptStrategy{Delay: 15 * time.Second, Min: 8}
	// While specifying the admin user will work for now, as soon as we allow
	// the users to have a different initial user name, or they have changed
	// the password for the admin user, this will fail.
	owner := names.NewUserTag("admin")
	for a := attempt.Start(); a.Next(); {
		apiState, err = juju.NewAPIState(owner, env, api.DefaultDialOpts())
		if err == nil || errors.Cause(err).Error() != "EOF" {
			break
		}
		progress("bootstrapped instance not ready - attempting to redial")
	}
	if err != nil {
		return errors.Annotate(err, "cannot connect to bootstrap instance")
	}
	progress("restoring bootstrap machine")
	machine0Addr, err := restoreBootstrapMachine(apiState, c.backupFile, agentConf)
	if err != nil {
		return errors.Annotate(err, "cannot restore bootstrap machine")
	}
	progress("restored bootstrap machine")

	apiState, err = juju.NewAPIState(owner, env, api.DefaultDialOpts())
	progress("opening state")
	if err != nil {
		return errors.Annotate(err, "cannot connect to api server")
	}
	progress("updating all machines")
	results, err := updateAllMachines(apiState, machine0Addr)
	if err != nil {
		return errors.Annotate(err, "cannot update machines")
	}
	var message string
	for _, result := range results {
		if result.err != nil {
			message = fmt.Sprintf("Update of machine %q failed: %v", result.machineName, result.err)
		} else {
			message = fmt.Sprintf("Succesful update of machine %q", result.machineName)
		}
		progress(message)
	}
	return nil
}
Пример #30
0
func (c *restoreCommand) Run(ctx *cmd.Context) error {
	if c.showDescription {
		fmt.Fprintf(ctx.Stdout, "%s\n", c.Info().Purpose)
		return nil
	}
	if err := c.Log.Start(ctx); err != nil {
		return err
	}
	agentConf, err := extractConfig(c.backupFile)
	if err != nil {
		return fmt.Errorf("cannot extract configuration from backup file: %v", err)
	}
	progress("extracted credentials from backup file")
	store, err := configstore.Default()
	if err != nil {
		return err
	}
	cfg, _, err := environs.ConfigForName(c.EnvName, store)
	if err != nil {
		return err
	}
	env, err := rebootstrap(cfg, ctx, c.Constraints)
	if err != nil {
		return fmt.Errorf("cannot re-bootstrap environment: %v", err)
	}
	progress("connecting to newly bootstrapped instance")
	var conn *juju.APIConn
	// The state server backend may not be ready to accept logins so we retry.
	// We'll do up to 8 retries over 2 minutes to give the server time to come up.
	// Typically we expect only 1 retry will be needed.
	attempt := utils.AttemptStrategy{Delay: 15 * time.Second, Min: 8}
	for a := attempt.Start(); a.Next(); {
		conn, err = juju.NewAPIConn(env, api.DefaultDialOpts())
		if err == nil || errors.Cause(err).Error() != "EOF" {
			break
		}
		progress("bootstrapped instance not ready - attempting to redial")
	}
	if err != nil {
		return fmt.Errorf("cannot connect to bootstrap instance: %v", err)
	}
	progress("restoring bootstrap machine")
	newInstId, machine0Addr, err := restoreBootstrapMachine(conn, c.backupFile, agentConf)
	if err != nil {
		return fmt.Errorf("cannot restore bootstrap machine: %v", err)
	}
	progress("restored bootstrap machine")
	// Update the environ state to point to the new instance.
	if err := bootstrap.SaveState(env.Storage(), &bootstrap.BootstrapState{
		StateInstances: []instance.Id{newInstId},
	}); err != nil {
		return fmt.Errorf("cannot update environ bootstrap state storage: %v", err)
	}
	// Construct our own state info rather than using juju.NewConn so
	// that we can avoid storage eventual-consistency issues
	// (and it's faster too).
	caCert, ok := cfg.CACert()
	if !ok {
		return fmt.Errorf("configuration has no CA certificate")
	}
	progress("opening state")
	// We need to retry here to allow mongo to come up on the restored state server.
	// The connection might succeed due to the mongo dial retries but there may still
	// be a problem issuing database commands.
	var st *state.State
	for a := attempt.Start(); a.Next(); {
		st, err = state.Open(&state.Info{
			Info: mongo.Info{
				Addrs:  []string{fmt.Sprintf("%s:%d", machine0Addr, cfg.StatePort())},
				CACert: caCert,
			},
			Tag:      agentConf.Credentials.Tag,
			Password: agentConf.Credentials.Password,
		}, mongo.DefaultDialOpts(), environs.NewStatePolicy())
		if err == nil {
			break
		}
		progress("state server not ready - attempting to re-connect")
	}
	if err != nil {
		return fmt.Errorf("cannot open state: %v", err)
	}
	progress("updating all machines")
	if err := updateAllMachines(st, machine0Addr); err != nil {
		return fmt.Errorf("cannot update machines: %v", err)
	}
	return nil
}