func mongoRestoreCall(runCommand utilsRun, tmpDir, mongoPath, adminPassword, migrationName string, dbs []string, statePort int, invalidSSL bool, batchSize int, callArgs retry.CallArgs) error { mongorestore := filepath.Join(mongoPath, "mongorestore") restoreParams := []string{ "--ssl", "--port", strconv.Itoa(statePort), "--host", "localhost", } if invalidSSL { restoreParams = append(restoreParams, "--sslAllowInvalidCertificates") } if batchSize > 0 { restoreParams = append(restoreParams, "--batchSize", strconv.Itoa(batchSize)) } if adminPassword != "" { restoreParams = append(restoreParams, "-u", "admin", "-p", adminPassword) } var out string if len(dbs) == 0 || dbs == nil { restoreParams = append(restoreParams, filepath.Join(tmpDir, fmt.Sprintf("migrateTo%sdump", migrationName))) restoreCallArgs := callArgs restoreCallArgs.Func = func() error { var err error out, err = runCommand(mongorestore, restoreParams...) if err == nil { return nil } logger.Errorf("cannot restore %v: %s", err, out) return err } if err := retry.Call(restoreCallArgs); err != nil { err := errors.Annotatef(err, "cannot restore dbs got: %s", out) logger.Errorf("%#v", err) return err } } for i := range dbs { restoreDbParams := append(restoreParams, fmt.Sprintf("--db=%s", dbs[i]), filepath.Join(tmpDir, fmt.Sprintf("migrateTo%sdump", migrationName), dbs[i])) restoreCallArgs := callArgs restoreCallArgs.Func = func() error { var err error out, err = runCommand(mongorestore, restoreDbParams...) if err == nil { return nil } logger.Errorf("cannot restore db %q: %v: got %s", dbs[i], err, out) return err } if err := retry.Call(restoreCallArgs); err != nil { return errors.Annotatef(err, "cannot restore db %q got: %s", dbs[i], out) } logger.Infof("Succesfully restored db %q", dbs[i]) } return nil }
// deleteSecurityGroup attempts to delete the security group. Should it fail, // the deletion is retried due to timing issues in openstack. A security group // cannot be deleted while it is in use. Theoretically we terminate all the // instances before we attempt to delete the associated security groups, but // in practice nova hasn't always finished with the instance before it // returns, so there is a race condition where we think the instance is // terminated and hence attempt to delete the security groups but nova still // has it around internally. To attempt to catch this timing issue, deletion // of the groups is tried multiple times. func deleteSecurityGroup(novaclient *nova.Client, name, id string) { logger.Debugf("deleting security group %q", name) err := retry.Call(retry.CallArgs{ Func: func() error { return novaclient.DeleteSecurityGroup(id) }, NotifyFunc: func(err error, attempt int) { if attempt%4 == 0 { message := fmt.Sprintf("waiting to delete security group %q", name) if attempt != 4 { message = "still " + message } logger.Debugf(message) } }, Attempts: 30, Delay: time.Second, // TODO(dimitern): This should be fixed to take a clock.Clock arg, not // hard-coded WallClock, like in provider/ec2/securitygroups_test.go! // See PR juju:#5197, especially the code around autoAdvancingClock. // LP Bug: http://pad.lv/1580626. Clock: clock.WallClock, }) if err != nil { logger.Warningf("cannot delete security group %q. Used by another model?", name) } }
// GetResource returns a reader for the resource's data. func (client CSRetryClient) GetResource(req charmstore.ResourceRequest) (charmstore.ResourceData, error) { args := client.retryArgs // a copy var data charmstore.ResourceData args.Func = func() error { var err error data, err = client.Client.GetResource(req) if err != nil { return errors.Trace(err) } return nil } var lastErr error args.NotifyFunc = func(err error, i int) { // Remember the error we're hiding and then retry! logger.Debugf("(attempt %d) retrying resource download from charm store due to error: %v", i, err) lastErr = err } err := retry.Call(args) if retry.IsAttemptsExceeded(err) { return data, errors.Annotate(lastErr, "failed after retrying") } if err != nil { return data, errors.Trace(err) } return data, nil }
// dialAndLogin returns a mongo session logged in as a user with administrative // privileges func dialAndLogin(mongoInfo *mongo.MongoInfo, callArgs retry.CallArgs) (mgoSession, mgoDb, error) { var session *mgo.Session opts := mongo.DefaultDialOpts() callArgs.Func = func() error { // Try to connect, retry a few times until the db comes up. var err error session, err = mongo.DialWithInfo(mongoInfo.Info, opts) if err == nil { return nil } logger.Errorf("cannot open mongo connection: %v", err) return err } if err := retry.Call(callArgs); err != nil { return nil, nil, errors.Annotate(err, "error dialing mongo to resume HA") } admin := session.DB("admin") if mongoInfo.Tag != nil { if err := admin.Login(mongoInfo.Tag.String(), mongoInfo.Password); err != nil { return nil, nil, errors.Annotatef(err, "cannot log in to admin database as %q", mongoInfo.Tag) } } else if mongoInfo.Password != "" { if err := admin.Login(mongo.AdminUser, mongoInfo.Password); err != nil { return nil, nil, errors.Annotate(err, "cannot log in to admin database") } } return session, admin, nil }
func mongoDumpCall( runCommand utilsRun, tmpDir, mongoPath, adminPassword, migrationName string, statePort int, callArgs retry.CallArgs, ) (string, error) { mongodump := filepath.Join(mongoPath, "mongodump") dumpParams := []string{ "--ssl", "-u", "admin", "-p", adminPassword, "--port", strconv.Itoa(statePort), "--host", "localhost", "--out", filepath.Join(tmpDir, fmt.Sprintf("migrateTo%sdump", migrationName)), } var out string callArgs.Func = func() error { var err error out, err = runCommand(mongodump, dumpParams...) if err == nil { return nil } logger.Errorf("cannot dump db %v: %s", err, out) return err } if err := retry.Call(callArgs); err != nil { logger.Errorf(out) return out, errors.Annotate(err, "cannot dump mongo db") } return out, nil }
// APICall places a call to the remote machine. // // This fills out the rpc.Request on the given facade, version for a given // object id, and the specific RPC method. It marshalls the Arguments, and will // unmarshall the result into the response object that is supplied. func (s *state) APICall(facade string, version int, id, method string, args, response interface{}) error { retrySpec := retry.CallArgs{ Func: func() error { return s.client.Call(rpc.Request{ Type: facade, Version: version, Id: id, Action: method, }, args, response) }, IsFatalError: func(err error) bool { err = errors.Cause(err) ec, ok := err.(hasErrorCode) if !ok { return true } return ec.ErrorCode() != params.CodeRetry }, Delay: 100 * time.Millisecond, MaxDelay: 1500 * time.Millisecond, MaxDuration: 10 * time.Second, BackoffFunc: retry.DoubleDelay, Clock: s.clock, } err := retry.Call(retrySpec) return errors.Trace(err) }
func (*retrySuite) TestMissingFuncNotValid(c *gc.C) { err := retry.Call(retry.CallArgs{ Attempts: 5, Delay: time.Minute, }) c.Check(err, jc.Satisfies, errors.IsNotValid) c.Check(err, gc.ErrorMatches, `missing Func not valid`) }
func (*retrySuite) TestMissingDelayNotValid(c *gc.C) { err := retry.Call(retry.CallArgs{ Func: func() error { return errors.New("bah") }, Attempts: 5, }) c.Check(err, jc.Satisfies, errors.IsNotValid) c.Check(err, gc.ErrorMatches, `missing Delay not valid`) }
func (*retrySuite) TestMissingAttemptsNotValid(c *gc.C) { err := retry.Call(retry.CallArgs{ Func: func() error { return errors.New("bah") }, Delay: time.Minute, Clock: clock.WallClock, }) c.Check(err, jc.Satisfies, errors.IsNotValid) c.Check(err, gc.ErrorMatches, `missing Attempts or MaxDuration not valid`) }
func (*retrySuite) TestSuccessHasNoDelay(c *gc.C) { clock := &mockClock{} err := retry.Call(retry.CallArgs{ Func: func() error { return nil }, Attempts: 5, Delay: time.Minute, Clock: clock, }) c.Assert(err, jc.ErrorIsNil) c.Assert(clock.delays, gc.HasLen, 0) }
func (*retrySuite) TestAttemptsExceededError(c *gc.C) { clock := &mockClock{} funcErr := errors.New("bah") err := retry.Call(retry.CallArgs{ Func: func() error { return funcErr }, Attempts: 5, Delay: time.Minute, Clock: clock, }) c.Assert(err, gc.ErrorMatches, `attempt count exceeded: bah`) c.Assert(err, jc.Satisfies, retry.IsAttemptsExceeded) c.Assert(retry.LastError(err), gc.Equals, funcErr) }
func (*retrySuite) TestFatalErrorsNotRetried(c *gc.C) { clock := &mockClock{} funcErr := errors.New("bah") err := retry.Call(retry.CallArgs{ Func: func() error { return funcErr }, IsFatalError: func(error) bool { return true }, Attempts: 5, Delay: time.Minute, Clock: clock, }) c.Assert(errors.Cause(err), gc.Equals, funcErr) c.Assert(clock.delays, gc.HasLen, 0) }
func (*retrySuite) TestBackoffErrors(c *gc.C) { // Backoff values of less than one are a validation error. for _, factor := range []float64{-2, 0.5} { err := retry.Call(retry.CallArgs{ Func: func() error { return errors.New("bah") }, Attempts: 5, Delay: time.Minute, BackoffFactor: factor, }) c.Check(err, jc.Satisfies, errors.IsNotValid) c.Check(err, gc.ErrorMatches, `BackoffFactor of .* not valid`) } }
func (*retrySuite) TestWithWallClock(c *gc.C) { var attempts []int err := retry.Call(retry.CallArgs{ Func: func() error { return errors.New("bah") }, NotifyFunc: func(lastError error, attempt int) { attempts = append(attempts, attempt) }, Attempts: 5, Delay: time.Microsecond, }) c.Assert(errors.Cause(err), jc.Satisfies, retry.IsAttemptsExceeded) c.Assert(attempts, jc.DeepEquals, []int{1, 2, 3, 4, 5}) }
// It appears that sometimes the lock is not cleared when we expect it to be. // Capture and log any errors from the Unlock method and retry a few times. func (s *store) unlock(lock *fslock.Lock) { err := retry.Call(retry.CallArgs{ Func: lock.Unlock, NotifyFunc: func(err error, attempt int) { logger.Debugf("failed to unlock jujuclient lock: %s", err) }, Attempts: 10, Delay: 50 * time.Millisecond, Clock: clock.WallClock, }) if err != nil { logger.Errorf("unable to unlock jujuclient lock: %s", err) } }
func (*retrySuite) TestAttempts(c *gc.C) { clock := &mockClock{} funcErr := errors.New("bah") err := retry.Call(retry.CallArgs{ Func: func() error { return funcErr }, Attempts: 4, Delay: time.Minute, Clock: clock, }) c.Assert(err, jc.Satisfies, retry.IsAttemptsExceeded) // We delay between attempts, and don't delay after the last one. c.Assert(clock.delays, jc.DeepEquals, []time.Duration{ time.Minute, time.Minute, time.Minute, }) }
func (*retrySuite) TestMaxDuration(c *gc.C) { clock := &mockClock{} err := retry.Call(retry.CallArgs{ Func: func() error { return errors.New("bah") }, Delay: time.Minute, MaxDuration: 5 * time.Minute, Clock: clock, }) c.Assert(err, jc.Satisfies, retry.IsDurationExceeded) c.Assert(clock.delays, jc.DeepEquals, []time.Duration{ time.Minute, time.Minute, time.Minute, time.Minute, time.Minute, }) }
func (*retrySuite) TestBackoffFactor(c *gc.C) { clock := &mockClock{} err := retry.Call(retry.CallArgs{ Func: func() error { return errors.New("bah") }, Clock: clock, Attempts: 5, Delay: time.Minute, BackoffFunc: retry.DoubleDelay, }) c.Assert(err, jc.Satisfies, retry.IsAttemptsExceeded) c.Assert(clock.delays, jc.DeepEquals, []time.Duration{ time.Minute, time.Minute * 2, time.Minute * 4, time.Minute * 8, }) }
func (*retrySuite) TestMaxDurationDoubling(c *gc.C) { clock := &mockClock{} err := retry.Call(retry.CallArgs{ Func: func() error { return errors.New("bah") }, Delay: time.Minute, MaxDuration: 10 * time.Minute, BackoffFunc: retry.DoubleDelay, Clock: clock, }) c.Assert(err, jc.Satisfies, retry.IsDurationExceeded) // Stops after seven minutes, because the next wait time // would take it to 15 minutes. c.Assert(clock.delays, jc.DeepEquals, []time.Duration{ time.Minute, 2 * time.Minute, 4 * time.Minute, }) }
// Dial returns a new connection to the MongoDB server. func (inst *MgoInstance) Dial() (*mgo.Session, error) { var session *mgo.Session err := retry.Call(retry.CallArgs{ Func: func() error { var err error session, err = mgo.DialWithInfo(inst.DialInfo()) return err }, // Only interested in retrying the intermittent // 'unexpected message'. IsFatalError: func(err error) bool { return !strings.HasSuffix(err.Error(), "unexpected message") }, Delay: time.Millisecond, Clock: clock.WallClock, Attempts: 5, }) return session, err }
func (*retrySuite) TestCalledOnceEvenIfStopped(c *gc.C) { stop := make(chan struct{}) clock := &mockClock{} called := false close(stop) err := retry.Call(retry.CallArgs{ Func: func() error { called = true return nil }, Attempts: 5, Delay: time.Minute, Clock: clock, Stop: stop, }) c.Assert(called, jc.IsTrue) c.Assert(err, jc.ErrorIsNil) c.Assert(clock.delays, gc.HasLen, 0) }
func (*retrySuite) TestStopChannel(c *gc.C) { clock := &mockClock{} stop := make(chan struct{}) count := 0 err := retry.Call(retry.CallArgs{ Func: func() error { if count == 2 { close(stop) } count++ return errors.New("bah") }, Attempts: 5, Delay: time.Minute, Clock: clock, Stop: stop, }) c.Assert(err, jc.Satisfies, retry.IsRetryStopped) c.Assert(clock.delays, gc.HasLen, 3) }
func (*retrySuite) TestMaxDelay(c *gc.C) { clock := &mockClock{} err := retry.Call(retry.CallArgs{ Func: func() error { return errors.New("bah") }, Attempts: 7, Delay: time.Minute, MaxDelay: 10 * time.Minute, BackoffFunc: retry.DoubleDelay, Clock: clock, }) c.Assert(err, jc.Satisfies, retry.IsAttemptsExceeded) c.Assert(clock.delays, jc.DeepEquals, []time.Duration{ time.Minute, 2 * time.Minute, 4 * time.Minute, 8 * time.Minute, 10 * time.Minute, 10 * time.Minute, }) }
func (*retrySuite) TestInfiniteRetries(c *gc.C) { // OK, we can't test infinite, but we'll go for lots. clock := &mockClock{} stop := make(chan struct{}) count := 0 err := retry.Call(retry.CallArgs{ Func: func() error { if count == 111 { close(stop) } count++ return errors.New("bah") }, Attempts: retry.UnlimitedAttempts, Delay: time.Minute, Clock: clock, Stop: stop, }) c.Assert(err, jc.Satisfies, retry.IsRetryStopped) c.Assert(clock.delays, gc.HasLen, count) }
// Reset resets the entire dummy environment and forgets any registered // operation listener. All opened environments after Reset will share // the same underlying state. func Reset(c *gc.C) { logger.Infof("reset model") dummy.mu.Lock() dummy.ops = discardOperations oldState := dummy.state dummy.controllerState = nil dummy.state = make(map[string]*environState) dummy.newStatePolicy = stateenvirons.GetNewPolicyFunc( stateenvirons.GetNewEnvironFunc(environs.New), ) dummy.supportsSpaces = true dummy.supportsSpaceDiscovery = false dummy.mu.Unlock() // NOTE(axw) we must destroy the old states without holding // the provider lock, or we risk deadlocking. Destroying // state involves closing the embedded API server, which // may require waiting on RPC calls that interact with the // EnvironProvider (e.g. EnvironProvider.Open). for _, s := range oldState { if s.apiListener != nil { s.apiListener.Close() } s.destroy() } if mongoAlive() { err := retry.Call(retry.CallArgs{ Func: gitjujutesting.MgoServer.Reset, // Only interested in retrying the intermittent // 'unexpected message'. IsFatalError: func(err error) bool { return !strings.HasSuffix(err.Error(), "unexpected message") }, Delay: time.Millisecond, Clock: clock.WallClock, Attempts: 5, }) c.Assert(err, jc.ErrorIsNil) } }
// call will call the supplied function, with exponential backoff // as long as the request returns an http.StatusTooManyRequests // status. func (c backoffAPIRequestCaller) call(f func() (autorest.Response, error)) error { var resp *http.Response return retry.Call(retry.CallArgs{ Func: func() error { autorestResp, err := f() resp = autorestResp.Response return err }, IsFatalError: func(err error) bool { return resp == nil || !autorest.ResponseHasStatusCode(resp, http.StatusTooManyRequests) }, NotifyFunc: func(err error, attempt int) { logger.Debugf("attempt %d: %v", attempt, err) }, Attempts: -1, Delay: retryDelay, MaxDelay: maxRetryDelay, MaxDuration: maxRetryDuration, BackoffFunc: retry.DoubleDelay, Clock: c.clock, }) }
// deleteSecurityGroup attempts to delete the security group. Should it fail, // the deletion is retried due to timing issues in openstack. A security group // cannot be deleted while it is in use. Theoretically we terminate all the // instances before we attempt to delete the associated security groups, but // in practice nova hasn't always finished with the instance before it // returns, so there is a race condition where we think the instance is // terminated and hence attempt to delete the security groups but nova still // has it around internally. To attempt to catch this timing issue, deletion // of the groups is tried multiple times. func deleteSecurityGroup(novaclient *nova.Client, name, id string) { logger.Debugf("deleting security group %q", name) err := retry.Call(retry.CallArgs{ Func: func() error { return novaclient.DeleteSecurityGroup(id) }, NotifyFunc: func(err error, attempt int) { if attempt%4 == 0 { message := fmt.Sprintf("waiting to delete security group %q", name) if attempt != 4 { message = "still " + message } logger.Debugf(message) } }, Attempts: 30, Delay: time.Second, Clock: clock.WallClock, }) if err != nil { logger.Warningf("cannot delete security group %q. Used by another environment?", name) } }
func (*retrySuite) TestNotifyFunc(c *gc.C) { var ( clock = &mockClock{} funcErr = errors.New("bah") attempts []int funcErrors []error ) err := retry.Call(retry.CallArgs{ Func: func() error { return funcErr }, NotifyFunc: func(lastError error, attempt int) { funcErrors = append(funcErrors, lastError) attempts = append(attempts, attempt) }, Attempts: 3, Delay: time.Minute, Clock: clock, }) c.Assert(err, jc.Satisfies, retry.IsAttemptsExceeded) c.Assert(clock.delays, gc.HasLen, 2) c.Assert(funcErrors, jc.DeepEquals, []error{funcErr, funcErr, funcErr}) c.Assert(attempts, jc.DeepEquals, []int{1, 2, 3}) }