forked from hanguofeng/taiji
/
callback_manager.go
289 lines (231 loc) · 7.75 KB
/
callback_manager.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
package main
import (
"time"
"github.com/Shopify/sarama"
"github.com/golang/glog"
"github.com/wvanbergen/kazoo-go"
)
const (
REGISTER_CONSUMER_GROUP_RETRY_TIME = 10 * time.Second
GET_CONSUMER_LIST_RETRY_TIME = 10 * time.Second
RUN_PARTITION_MANAGER_RETRY_TIME = 10 * time.Second
CONSUMER_LIST_CHANGE_RELOAD_TIME = 5 * time.Second
WATCH_INSTANCE_CHANGE_DELAY_TIME = 5 * time.Second
)
type CallbackManager struct {
*StartStopControl
Topics []string
GroupName string
Url string
// config
config *CallbackItemConfig
kafkaConfig *sarama.Config
zookeeperConfig *kazoo.Config
// zk instances
kazoo *kazoo.Kazoo // ZK
kazooGroup *kazoo.Consumergroup // ZK ConsumerGroup /consumers/<cgname>/ Object
kazooGroupInstance *kazoo.ConsumergroupInstance // ZK ConsumerGroup /consumers/<cgname>/ids/<cginstance> Object
// kafka sarama consumer
kafkaConsumer sarama.Consumer
// partition manager
partitionManagers []*PartitionManager
partitionManagerRunner *ServiceRunner
// offset manager
offsetManager *OffsetManager
}
func NewCallbackManager() *CallbackManager {
return &CallbackManager{
StartStopControl: NewStartStopControl(),
}
}
func (cm *CallbackManager) Init(config *CallbackItemConfig) error {
var err error
cm.config = config
cm.Topics = config.Topics
cm.Url = config.Url
// set group as MD5(Url)
cm.GroupName = getGroupName(cm.Url)
cm.kafkaConfig = sarama.NewConfig()
cm.zookeeperConfig = kazoo.NewConfig()
cm.zookeeperConfig.Chroot = config.ZkPath
cm.kafkaConfig.ClientID = cm.GroupName
// init OffsetManager
cm.offsetManager = NewOffsetManager()
if err = cm.offsetManager.Init(config.OffsetConfig, cm); err != nil {
return err
}
cm.partitionManagerRunner = NewServiceRunner()
return nil
}
func (cm *CallbackManager) Run() error {
// mark service as started
if err := cm.ensureStart(); err != nil {
return err
}
defer cm.markStop()
// init zookeeper
if err := cm.connectZookeeper(); err != nil {
return err
}
// init kafka sarama consumer
if err := cm.connectKafka(); err != nil {
return err
}
go cm.offsetManager.Run()
callbackManagerFailoverLoop:
for {
if !cm.Running() {
break callbackManagerFailoverLoop
}
if err := cm.registerConsumergroup(); err != nil {
time.Sleep(REGISTER_CONSUMER_GROUP_RETRY_TIME)
continue
}
glog.Infof("Waiting for %v to avoid consumer register rebalance herd",
WATCH_INSTANCE_CHANGE_DELAY_TIME)
time.Sleep(WATCH_INSTANCE_CHANGE_DELAY_TIME)
consumers, consumerChanges, err := cm.kazooGroup.WatchInstances()
if err != nil {
glog.Errorf("Failed to get list of registered consumer instances [err:%s]", err)
time.Sleep(GET_CONSUMER_LIST_RETRY_TIME)
continue
}
glog.Infof("Currently registered consumers [totalConsumers:%d]", len(consumers))
// get partitionConsuming assignments
// start ServiceRunner of PartitionManager
// TODO refactor this
if err := cm.partitionRun(consumers); err != nil {
glog.Errorf("Failed to init partition consumer [err:%s]", err)
time.Sleep(RUN_PARTITION_MANAGER_RETRY_TIME)
continue
}
select {
case <-cm.WaitForCloseChannel():
cm.partitionManagerRunner.Close()
break callbackManagerFailoverLoop
case <-consumerChanges:
glog.Infof("Triggering rebalance due to consumer list change")
cm.partitionManagerRunner.Close()
glog.Infof("Waiting for %v to avoid consumer inflight rebalance herd",
CONSUMER_LIST_CHANGE_RELOAD_TIME)
time.Sleep(CONSUMER_LIST_CHANGE_RELOAD_TIME)
case <-cm.partitionManagerRunner.WaitForExitChannel():
glog.Warning("PartitionManager unexpectedly stopped")
}
}
// deregister Consumergroup instance from zookeeper
if err := cm.kazooGroupInstance.Deregister(); err != nil {
glog.Errorf("Failed deregistering consumer instance [err:%s]", err)
} else {
glog.Infof("Deregistered consumer instance [instanceId:%s]", cm.kazooGroupInstance.ID)
}
// sync close offsetManager
if err := cm.offsetManager.Close(); err != nil {
glog.Errorf("Failed closing the offset manager [err:%s]", err)
}
// close sarama Consumer
if err := cm.kafkaConsumer.Close(); err != nil {
glog.Errorf("Failed closing the Sarama client [err:%s]", err)
}
// close zookeeper connection
if err := cm.kazoo.Close(); err != nil {
glog.Errorf("Failed closing the Zookeeper connection [err:%s]", err)
}
return nil
}
func (cm *CallbackManager) GetPartitionManagers() []*PartitionManager {
return cm.partitionManagers
}
func (cm *CallbackManager) GetOffsetManager() *OffsetManager {
return cm.offsetManager
}
func (cm *CallbackManager) GetKazooGroup() *kazoo.Consumergroup {
return cm.kazooGroup
}
func (cm *CallbackManager) GetKazooGroupInstance() *kazoo.ConsumergroupInstance {
return cm.kazooGroupInstance
}
func (cm *CallbackManager) GetKafkaConsumer() sarama.Consumer {
return cm.kafkaConsumer
}
func (cm *CallbackManager) GetConfig() *CallbackItemConfig {
return cm.config
}
func (cm *CallbackManager) connectZookeeper() error {
var err error
// zookeeper ConsumerGroup instance initialization
if cm.kazoo, err = kazoo.NewKazoo(cm.config.Zookeepers, cm.zookeeperConfig); err != nil {
return err
}
cm.kazooGroup = cm.kazoo.Consumergroup(cm.GroupName)
cm.kazooGroupInstance = cm.kazooGroup.NewInstance()
return nil
}
func (cm *CallbackManager) connectKafka() error {
var err error
// kafka consumer initialization
brokers, err := cm.kazoo.BrokerList()
if err != nil {
return err
}
// connect kafka using sarama Consumer
if cm.kafkaConsumer, err = sarama.NewConsumer(brokers, cm.kafkaConfig); err != nil {
return err
}
return nil
}
func (cm *CallbackManager) registerConsumergroup() error {
// Register Consumergroup zk node
if exists, err := cm.kazooGroup.Exists(); err != nil {
glog.Errorf("Failed to check for existence of consumergroup [err:%s]", err)
return err
} else if !exists {
glog.Infof("Consumergroup does not yet exists, creating [consumergroup:%s] ", cm.GroupName)
if err := cm.kazooGroup.Create(); err != nil {
glog.Errorf("Failed to create consumergroup in zookeeper [err:%s]", err)
return err
}
}
// register new kazoo.ConsumerGroup instance
err := cm.kazooGroupInstance.Register(cm.Topics)
if err != nil && err != kazoo.ErrInstanceAlreadyRegistered {
glog.Errorf("Failed to register consumer instance [err:%s]", err)
} else {
err = nil
glog.Infof("Consumer instance registered [instanceId:%s]", cm.kazooGroupInstance.ID)
}
return err
}
func (cm *CallbackManager) partitionRun(consumers kazoo.ConsumergroupInstanceList) error {
cm.partitionManagers = make([]*PartitionManager, 0)
for _, topic := range cm.Topics {
// Fetch a list of partition IDs
partitions, err := cm.kazoo.Topic(topic).Partitions()
if err != nil {
glog.Errorf("Failed to get list of partitions [topic:%s][err:%s]", cm.Topics[0], err)
return err
}
partitionLeaders, err := retrievePartitionLeaders(partitions)
if err != nil {
glog.Errorf("Failed to get leaders of partitions [topic:%s][err:%s]", topic, err)
return err
}
// divide partition for each callback manager instance
dividedPartitions := dividePartitionsBetweenConsumers(consumers, partitionLeaders)
myPartitions := dividedPartitions[cm.kazooGroupInstance.ID]
for i := 0; i < len(myPartitions); i++ {
partitionManager := NewPartitionManager()
if err := partitionManager.Init(cm.config, topic, myPartitions[i].ID, cm); err != nil {
glog.Fatalf("Init partition manager failed [url:%s][err:%s]", cm.Url, err)
return err
}
cm.partitionManagers = append(cm.partitionManagers, partitionManager)
}
}
cm.partitionManagerRunner.RetryTimes = len(cm.partitionManagers) * 3
cm.partitionManagerRunner.Prepare()
if _, err := cm.partitionManagerRunner.RunAsync(cm.partitionManagers); err != nil {
return err
}
return nil
}