/
kinesis_worker.go
250 lines (200 loc) · 5.68 KB
/
kinesis_worker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
package kinesis_worker
import (
"time"
log "github.com/Sirupsen/logrus"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/kinesis"
"github.com/aws/aws-sdk-go/service/kinesis/kinesisiface"
)
const (
ShardIteratorTypeAtSequenceNumber = "AT_SEQUENCE_NUMBER"
ShardIteratorTypeAfterSequenceNumber = "AFTER_SEQUENCE_NUMBER"
ShardIteratorTypeTrimHorizon = "TRIM_HORIZON"
ShardIteratorTypeLatest = "LATEST"
)
const (
DefaultBatchSize = 10
DefaultSleepTime = time.Second
DefaultRegion = "us-west-1"
)
var Logger *log.Logger = log.New()
// Use custom types to allow wrapping later if required
type Client kinesisiface.KinesisAPI
type Record *kinesis.Record
type Worker interface {
Start() error
Stop()
}
// Manages a single Kinesis stream and a pool of workers, one for each shard.
type StreamWorker struct {
AwsConfig *aws.Config
StreamName string
IteratorType string
StartingSequenceNumber *string
BatchSize int64
SleepTime time.Duration
Client Client
workers []Worker
// Blocking channel of kinesis Records
Output chan Record
// Non-blocking channel of state updates
State chan WorkerState
}
// Set defaults for all fields, initialize channel and client if not provided
func (stream *StreamWorker) initialize() error {
if stream.IteratorType == "" {
stream.IteratorType = ShardIteratorTypeLatest
}
if stream.BatchSize == 0 {
stream.BatchSize = DefaultBatchSize
}
if stream.SleepTime == 0 {
stream.SleepTime = DefaultSleepTime
}
if stream.Output == nil {
stream.Output = make(chan Record)
}
if stream.State == nil {
stream.State = make(chan WorkerState)
}
if stream.Client == nil {
stream.Client = kinesis.New(stream.AwsConfig)
}
Logger.WithFields(log.Fields{
"StreamWorker": stream,
"AWS": stream.AwsConfig,
}).Debug("StreamWorker initialized")
return nil
}
func (stream *StreamWorker) Start() error {
if err := stream.initialize(); err != nil {
return err
}
// Get list of shards for the stream
stream_res, err := stream.Client.DescribeStream(&kinesis.DescribeStreamInput{
StreamName: &stream.StreamName,
})
if err != nil {
return err
}
// Create one worker for each shard in the stream
numWorkers := len(stream_res.StreamDescription.Shards)
stream.workers = make([]Worker, numWorkers)
for i, shard := range stream_res.StreamDescription.Shards {
worker, err := NewShardWorker(stream, shard)
if err != nil {
return err
}
stream.workers[i] = worker
}
Logger.WithFields(log.Fields{
"StreamName": stream.StreamName,
"Shards": len(stream.workers),
"Workers": len(stream.workers),
}).Debug("StreamWorker starting")
// Worker setup was successful, now start them all
for _, worker := range stream.workers {
worker.Start()
}
return nil
}
func (stream *StreamWorker) Stop() {
for _, worker := range stream.workers {
worker.Stop()
}
}
// Retrieves records from a single shard and sends them on a channel
type ShardWorker struct {
Stream *StreamWorker
Shard *kinesis.Shard
ShardID string
ShardIterator string
done chan bool
}
type WorkerState struct {
ShardID string
Lag int64
}
func NewShardWorker(stream *StreamWorker, shard *kinesis.Shard) (*ShardWorker, error) {
iter_res, err := stream.Client.GetShardIterator(&kinesis.GetShardIteratorInput{
StreamName: &stream.StreamName,
ShardID: shard.ShardID,
ShardIteratorType: &stream.IteratorType,
StartingSequenceNumber: stream.StartingSequenceNumber,
})
if err != nil {
return nil, err
}
worker := ShardWorker{
Stream: stream,
Shard: shard,
ShardID: *shard.ShardID,
ShardIterator: *iter_res.ShardIterator,
done: make(chan bool),
}
return &worker, nil
}
func (w *ShardWorker) Start() error {
go w.run()
return nil
}
func (w *ShardWorker) Stop() {
w.done <- true
}
func (w *ShardWorker) run() {
delayTimer := time.NewTicker(w.Stream.SleepTime)
Logger.WithFields(log.Fields{
"ShardID": w.ShardID,
"ShardIterator": w.ShardIterator,
}).Debug("ShardWorker starting")
for {
w.step()
select {
case <-delayTimer.C:
// Minimum delay has elapsed, proceed with next iteration
continue
case <-w.done:
Logger.WithFields(log.Fields{
"ShardID": w.ShardID,
}).Debug("ShardWorker finishing")
// Received shutdown message from StreamWorker, finish
return
}
}
}
// Fetch one batch of records and send them to the output channel
func (w *ShardWorker) step() {
records_res, err := w.Stream.Client.GetRecords(&kinesis.GetRecordsInput{
ShardIterator: &w.ShardIterator,
Limit: &w.Stream.BatchSize,
})
if err != nil {
Logger.WithFields(log.Fields{
"Error": err,
"ShardIterator": w.ShardIterator,
}).Error("GetRecords API call failed")
// Wait until next iteration or exit
return
}
lag := *records_res.MillisBehindLatest
w.updateState(WorkerState{ShardID: w.ShardID, Lag: lag})
Logger.WithFields(log.Fields{
"MillisBehindLatest": lag,
"NumRecords": len(records_res.Records),
"ShardIterator": w.ShardIterator,
"NextShardIterator": *records_res.NextShardIterator,
}).Debug("Successfully fetched records")
// Add each record in the result to the StreamWorker's output channel
for _, record := range records_res.Records {
w.Stream.Output <- record
}
// Use the new ShardIterator returned in the response for the next request
w.ShardIterator = *records_res.NextShardIterator
}
func (w *ShardWorker) updateState(state WorkerState) {
// Attempt to update state with a non-blocking send on State channel
select {
case w.Stream.State <- state:
default:
}
}