forked from heroku/log-shuttle
/
batcher.go
101 lines (86 loc) · 2.83 KB
/
batcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package main
import (
"sync"
"time"
)
func StartBatchers(config ShuttleConfig, drops *Counter, stats chan<- NamedValue, inLogs <-chan LogLine, outBatches chan<- Batch) *sync.WaitGroup {
batchWaiter := new(sync.WaitGroup)
for i := 0; i < config.NumBatchers; i++ {
batchWaiter.Add(1)
go func() {
defer batchWaiter.Done()
batcher := NewBatcher(config.BatchSize, config.WaitDuration, drops, stats, inLogs, outBatches)
batcher.Batch()
}()
}
return batchWaiter
}
type Batcher struct {
inLogs <-chan LogLine // Where I get the log lines to batch from
outBatches chan<- Batch // Where I send completed batches to
stats chan<- NamedValue // Where to send measurements
drops *Counter // The drops counter
timeout time.Duration // How long once we have a log line before we need to flush the batch
batchSize int // The size of the batches
}
func NewBatcher(batchSize int, timeout time.Duration, drops *Counter, stats chan<- NamedValue, inLogs <-chan LogLine, outBatches chan<- Batch) Batcher {
return Batcher{
inLogs: inLogs,
drops: drops,
stats: stats,
outBatches: outBatches,
timeout: timeout,
batchSize: batchSize,
}
}
// Loops getting an empty batch and filling it.
func (batcher Batcher) Batch() {
for {
closeDown, batch := batcher.fillBatch()
if msgCount := batch.MsgCount(); msgCount > 0 {
select {
case batcher.outBatches <- batch:
// submitted into the delivery channel, just record some stats
batcher.stats <- NewNamedValue("batch.msg.count", float64(msgCount))
default:
//Unable to deliver into the delivery channel, increment drops
batcher.stats <- NewNamedValue("batch.msg.dropped", float64(msgCount))
batcher.drops.Add(msgCount)
}
}
if closeDown {
break
}
}
}
// fillBatch coalesces individual log lines into batches. Delivery of the
// batch happens on timeout after at least one message is received
// or when the batch is full.
// returns the channel status, completed batch
func (batcher Batcher) fillBatch() (bool, Batch) {
batch := NewBatch(batcher.batchSize) // Make a batch
timeout := new(time.Timer) // Gives us a nil channel and no timeout to start with
chanOpen := true // Assume the channel is open
count := 0
for {
select {
case <-timeout.C:
return !chanOpen, batch
case line, chanOpen := <-batcher.inLogs:
if !chanOpen {
return !chanOpen, batch
}
// We have a line now, so set a timeout
if timeout.C == nil {
defer func(t time.Time) { batcher.stats <- NewNamedValue("batch.fill.time", time.Since(t).Seconds()) }(time.Now())
timeout = time.NewTimer(batcher.timeout)
defer timeout.Stop() // ensure timer is stopped when done
}
batch.Add(line)
count += 1
if count >= batcher.batchSize {
return !chanOpen, batch
}
}
}
}