forked from facebookarchive/grace
/
grace.go
319 lines (278 loc) · 8.06 KB
/
grace.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
// Package grace allows for gracefully waiting for a listener to
// finish serving it's active requests.
package grace
import (
"errors"
"fmt"
"net"
"os"
"os/exec"
"os/signal"
"strconv"
"strings"
"sync"
"syscall"
"time"
)
var (
// ErrNotInheriting is returned by Inherits() when we're not inheriting any fds.
ErrNotInheriting = errors.New("grace: no inherited listeners")
// ErrAlreadyClosed is returned by Listener.Accept() when Close is in progress.
ErrAlreadyClosed = errors.New("grace: already closed")
errRestartListeners = errors.New("grace: restart must be given listeners")
errTermTimeout = errors.New("grace: TERM timeout in closing listeners")
// Time in the past to trigger immediate deadline.
timeInPast = time.Now()
// Test if init activated by checking ppid on startup since we will get
// re-parented once the old parent is killed and we will end up looking like
// we're init started.
initStarted = os.Getppid() == 1
)
const (
// Used to indicate a graceful restart in the new process.
envCountKey = "LISTEN_FDS"
envCountKeyPrefix = envCountKey + "="
// The error returned by the standard library when the socket is closed.
errClosed = "use of closed network connection"
)
// A Listener providing a graceful Close process and can be sent
// across processes using the underlying File descriptor.
type Listener interface {
net.Listener
// Will return the underlying file representing this Listener.
File() (f *os.File, err error)
}
type listener struct {
Listener
closed bool
closedMutex sync.RWMutex
wg sync.WaitGroup
}
type deadliner interface {
SetDeadline(t time.Time) error
}
// Allows for us to notice when the connection is closed.
type conn struct {
net.Conn
wg *sync.WaitGroup
once sync.Once
}
func (c *conn) Close() error {
defer c.once.Do(c.wg.Done)
return c.Conn.Close()
}
// NewListener wraps an existing File listener to provide a graceful Close()
// process.
func NewListener(l Listener) Listener {
return &listener{Listener: l}
}
func (l *listener) Close() error {
l.closedMutex.Lock()
l.closed = true
l.closedMutex.Unlock()
var err error
// Init provided sockets dont actually close so we trigger Accept to return
// by setting the deadline.
if initStarted {
if ld, ok := l.Listener.(deadliner); ok {
err = ld.SetDeadline(timeInPast)
} else {
fmt.Fprintln(os.Stderr, "init activated server did not have SetDeadline")
}
} else {
err = l.Listener.Close()
}
l.wg.Wait()
return err
}
func (l *listener) Accept() (net.Conn, error) {
// Presume we'll accept and decrement in defer if we don't. If we did this
// after a successful accept we would have a race condition where we may end
// up incorrectly shutting down between the time we do a successful accept
// and the increment.
var c net.Conn
l.wg.Add(1)
defer func() {
// If we didn't accept, we decrement our presumptuous count above.
if c == nil {
l.wg.Done()
}
}()
l.closedMutex.RLock()
if l.closed {
l.closedMutex.RUnlock()
return nil, ErrAlreadyClosed
}
l.closedMutex.RUnlock()
c, err := l.Listener.Accept()
if err != nil {
if strings.HasSuffix(err.Error(), errClosed) {
return nil, ErrAlreadyClosed
}
// We use SetDeadline above to trigger Accept to return when we're trying
// to handoff to a child as part of our restart process. In this scenario
// we want to treat the timeout the same as a Close.
if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
l.closedMutex.RLock()
if l.closed {
l.closedMutex.RUnlock()
return nil, ErrAlreadyClosed
}
l.closedMutex.RUnlock()
}
return nil, err
}
return &conn{Conn: c, wg: &l.wg}, nil
}
// Process configures the restart process.
type Process struct {
// TermTimeout if set will determine how long we'll wait for listeners when
// we're sent the TERM signal.
TermTimeout time.Duration
}
func (p *Process) term(listeners []Listener) error {
// shutdown all listeners in parallel
errs := make(chan error, len(listeners))
wg := sync.WaitGroup{}
wg.Add(len(listeners))
for _, l := range listeners {
go func(l Listener) {
defer wg.Done()
if err := l.Close(); err != nil {
errs <- err
}
}(l)
}
if p.TermTimeout.Nanoseconds() == 0 {
// no timeout, wait indefinitely
wg.Wait()
} else {
// wait in background to allow for implementing a timeout
done := make(chan struct{})
go func() {
defer close(done)
wg.Wait()
}()
// wait for graceful termination or timeout
select {
case <-done:
case <-time.After(p.TermTimeout):
return errTermTimeout
}
}
// if any errors occurred, return the first one
if len(errs) > 0 {
return <-errs
}
return nil
}
// Wait for signals to gracefully terminate or restart the process.
func (p *Process) Wait(listeners []Listener) error {
ch := make(chan os.Signal, 2)
signal.Notify(ch, syscall.SIGTERM, syscall.SIGUSR2)
for {
sig := <-ch
switch sig {
case syscall.SIGTERM:
// this ensures a subsequent TERM will trigger standard go behaviour of
// terminating.
signal.Stop(ch)
return p.term(listeners)
case syscall.SIGUSR2:
// we only return here if there's an error, otherwise the new process
// will send us a TERM when it's ready to trigger the actual shutdown.
if err := p.Restart(listeners); err != nil {
return err
}
}
}
}
// Inherit listeners from the parent process.
func (p *Process) Inherit() (listeners []Listener, err error) {
countStr := os.Getenv(envCountKey)
if countStr == "" {
return nil, ErrNotInheriting
}
count, err := strconv.Atoi(countStr)
if err != nil {
return nil, err
}
// If we are inheriting, the listeners will begin at fd 3
for i := 3; i < 3+count; i++ {
file := os.NewFile(uintptr(i), "listener")
tmp, err := net.FileListener(file)
file.Close()
if err != nil {
return nil, err
}
l := tmp.(Listener)
listeners = append(listeners, NewListener(l))
}
return
}
// CloseParent starts the close process in the parent. This does not wait for
// the parent to close and simply sends it the TERM signal.
func (p *Process) CloseParent() error {
ppid := os.Getppid()
if ppid == 1 { // init provided sockets, for example systemd
return nil
}
return syscall.Kill(ppid, syscall.SIGTERM)
}
// Restart the process passing the given listeners to the new process.
func (p *Process) Restart(listeners []Listener) (err error) {
if len(listeners) == 0 {
return errRestartListeners
}
// Extract the fds from the listeners.
files := make([]*os.File, len(listeners))
for i, l := range listeners {
files[i], err = l.File()
if err != nil {
return err
}
defer files[i].Close()
syscall.CloseOnExec(int(files[i].Fd()))
}
// Use the original binary location. This works with symlinks such that if
// the file it points to has been changed we will use the updated symlink.
argv0, err := exec.LookPath(os.Args[0])
if err != nil {
return err
}
// Use PWD instead of os.Getwd(), we don't want to follow symbolic link because Capistrano
wd := os.Getenv("PWD")
// Pass on the environment and replace the old count key with the new one.
var env []string
for _, v := range os.Environ() {
if !strings.HasPrefix(v, envCountKeyPrefix) {
env = append(env, v)
}
}
env = append(env, fmt.Sprintf("%s%d", envCountKeyPrefix, len(listeners)))
allFiles := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, files...)
_, err = os.StartProcess(argv0, os.Args, &os.ProcAttr{
Dir: wd,
Env: env,
Files: allFiles,
})
return err
}
var defaultProcess = &Process{60 * time.Second}
// Wait for signals to gracefully terminate or restart the process.
func Wait(listeners []Listener) (err error) {
return defaultProcess.Wait(listeners)
}
// Inherit listeners from the parent process.
func Inherit() (listeners []Listener, err error) {
return defaultProcess.Inherit()
}
// CloseParent starts the close process in the parent. This does not wait for
// the parent to close and simply sends it the TERM signal.
func CloseParent() error {
return defaultProcess.CloseParent()
}
// Restart the process passing the given listeners to the new process.
func Restart(listeners []Listener) (err error) {
return defaultProcess.Restart(listeners)
}