/
retry.go
195 lines (172 loc) · 6.31 KB
/
retry.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
// Copyright 2015 Canonical Ltd.
// Licensed under the LGPLv3, see LICENCE file for details.
// The retry package encapsulates the mechanism around retrying commands.
//
// The simple use is to call retry.Call with a function closure.
//
// ```go
// err := retry.Call(retry.CallArgs{
// Func: func() error { ... },
// Attempts: 5,
// Delay: time.Minute,
// })
// ```
//
// The bare minimum arguments that need to be specified are:
// * Func - the function to call
// * Attempts - the number of times to try Func before giving up
// * Delay - how long to wait between each try that returns an error
//
// Any error that is returned from the `Func` is considered transient.
// In order to identify some errors as fatal, pass in a function for the
// `IsFatalError` CallArgs value.
//
// Exponential backoff is supported by passing a value > 1 for the
// `BackoffFactor` in the CallArgs. This is treated as a multiplier for
// the specified `Delay` each time through the loop. To cap the `Delay`,
// pass a value for the `MaxDelay`.
package retry
import (
"fmt"
"math"
"time"
"github.com/juju/errors"
"github.com/juju/utils/clock"
)
const (
// UnlimitedAttempts can be used as a value for `Attempts` to clearly
// show to the reader that there is no limit to the number of attempts.
UnlimitedAttempts = -1
)
var (
// RetryStopped is the error that is returned from the retry functions
// when the stop channel has been closed.
RetryStopped = errors.New("retry stopped")
)
// AttemptsExceeded is the error that is returned when the retry count has
// been hit without the function returning a nil error result. The last error
// returned from the function being retried is available as the LastError
// attribute.
type AttemptsExceeded struct {
LastError error
}
// Error provides the implementation for the error interface method.
func (e *AttemptsExceeded) Error() string {
return fmt.Sprintf("attempt count exceeded: %s", e.LastError)
}
// IsAttemptsExceeded returns true if the error is a AttemptsExceeded
// error.
func IsAttemptsExceeded(err error) bool {
_, ok := err.(*AttemptsExceeded)
return ok
}
// IsRetryStopped returns true if the error is RetryStopped.
func IsRetryStopped(err error) bool {
return errors.Cause(err) == RetryStopped
}
// CallArgs is a simple structure used to define the behaviour of the Call
// function.
type CallArgs struct {
// Func is the function that will be retried if it returns an error result.
Func func() error
// IsFatalError is a function that, if set, will be called for every non-
// nil error result from `Func`. If `IsFatalError` returns true, the error
// is immediately returned breaking out from any further retries.
IsFatalError func(error) bool
// NotifyFunc is a function that is called if Func fails, and the attempt
// number. The first time this function is called attempt is 1, the second
// time, attempt is 2 and so on.
NotifyFunc func(lastError error, attempt int)
// Attempts specifies the number of times Func should be retried before
// giving up and returning the `AttemptsExceeded` error. If a negative
// value is specified, the `Call` will retry forever.
Attempts int
// Delay specifies how long to wait between retries.
Delay time.Duration
// MaxDelay specifies how longest time to wait between retries. If no
// value is specified there is no maximum delay.
MaxDelay time.Duration
// BackoffFactor is a multiplier used on the Delay each time the function waits.
// If not specified, a factor of 1 is used, which means the delay does not increase
// each time through the loop. A factor of 2 would indicate that the second delay
// would be twice the first, and the third twice the second, and so on.
BackoffFactor float64
// Clock defaults to clock.Wall, but allows the caller to pass one in.
// Primarily used for testing purposes.
Clock clock.Clock
// Stop is a channel that can be used to indicate that the waiting should
// be interrupted. If Stop is nil, then the Call function cannot be interrupted.
// If the channel is closed prior to the Call function being executed, the
// Func is still attempted once.
Stop <-chan struct{}
}
// Validate the values are valid. The ensures that the Func, Delay and Attempts
// have been specified, and that the BackoffFactor makes sense (i.e. one or greater).
// If BackoffFactor is not explicitly set, it is set here to be one.
func (args *CallArgs) Validate() error {
if args.BackoffFactor == 0 {
args.BackoffFactor = 1
}
if args.Clock == nil {
args.Clock = clock.WallClock
}
if args.Func == nil {
return errors.NotValidf("missing Func")
}
if args.Delay == 0 {
return errors.NotValidf("missing Delay")
}
if args.Attempts == 0 {
return errors.NotValidf("missing Attempts")
}
if args.BackoffFactor < 1 {
return errors.NotValidf("BackoffFactor of %s", args.BackoffFactor)
}
return nil
}
// Call will repeatedly execute the Func until either the function returns no
// error, the retry count is exceeded or the stop channel is closed.
func Call(args CallArgs) error {
err := args.Validate()
if err != nil {
return errors.Trace(err)
}
for i := 1; args.Attempts < 0 || i <= args.Attempts; i++ {
err = args.Func()
if err == nil {
return nil
}
if args.IsFatalError != nil && args.IsFatalError(err) {
return errors.Trace(err)
}
if args.NotifyFunc != nil {
args.NotifyFunc(err, i)
}
if i == args.Attempts && args.Attempts > 0 {
break // don't wait before returning the error
}
// Wait for the delay, and retry
select {
case <-args.Clock.After(args.Delay):
case <-args.Stop:
return RetryStopped
}
args.Delay = ScaleDuration(args.Delay, args.MaxDelay, args.BackoffFactor)
}
return errors.Wrap(err, &AttemptsExceeded{err})
}
// ScaleDuration scale up the `current` duration by a factor of `scale`, with
// a capped value of `max`. If `max` is zero, it means there is no maximum
// duration.
func ScaleDuration(current, max time.Duration, scale float64) time.Duration {
// Any overhead that we may possibly incur by multiplying something by one,
// is more than overcome by the sleeping that will occur.
// Since scale may be something like 1.5, or 2 and time.Duration is an
// int64, we need a little casting here. Also, a negative scale is treated
// as positive.
duration := (time.Duration)((float64)(current) * math.Abs(scale))
if duration > max && max > 0 {
return max
}
return duration
}