forked from heroku/log-shuttle
/
batcher.go
99 lines (86 loc) · 2.92 KB
/
batcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package main
import (
"sync"
"time"
)
func StartBatchers(config ShuttleConfig, drops *Counter, stats chan<- NamedValue, inLogs <-chan LogLine, inBatches <-chan *Batch, outBatches chan<- *Batch) *sync.WaitGroup {
batchWaiter := new(sync.WaitGroup)
for i := 0; i < config.NumBatchers; i++ {
batchWaiter.Add(1)
go func() {
defer batchWaiter.Done()
batcher := NewBatcher(config, drops, stats, inLogs, inBatches, outBatches)
batcher.Batch()
}()
}
return batchWaiter
}
type Batcher struct {
inLogs <-chan LogLine // Where I get the log lines to batch from
inBatches <-chan *Batch // Where I get empty batches from
outBatches chan<- *Batch // Where I send completed batches to
stats chan<- NamedValue // Where to send measurements
drops *Counter // The drops counter
timeout time.Duration // How long once we have a log line before we need to flush the batch
}
func NewBatcher(config ShuttleConfig, drops *Counter, stats chan<- NamedValue, inLogs <-chan LogLine, inBatches <-chan *Batch, outBatches chan<- *Batch) *Batcher {
return &Batcher{
inLogs: inLogs,
inBatches: inBatches,
drops: drops,
stats: stats,
outBatches: outBatches,
timeout: config.WaitDuration,
}
}
// Loops getting an empty batch and filling it.
func (batcher *Batcher) Batch() {
for batch := range batcher.inBatches {
batcher.stats <- NewNamedValue("batcher.inBatches.length", float64(len(batcher.inBatches)))
batcher.stats <- NewNamedValue("batcher.inLogs.length", float64(len(batcher.inLogs)))
closeDown := batcher.fillBatch(batch)
if batch.MsgCount > 0 {
batcher.stats <- NewNamedValue("batch.msg.count", float64(batch.MsgCount))
select {
case batcher.outBatches <- batch:
// submitted into the delivery channel,
// nothing to do here.
default:
//Unable to deliver into the delivery channel,
//increment drops
batcher.stats <- NewNamedValue("batch.msg.dropped", float64(batch.MsgCount))
batcher.drops.Add(batch.MsgCount)
}
}
if closeDown {
break
}
}
}
// fillBatch coalesces individual log lines into batches. Delivery of the
// batch happens on timeout after at least one message is received
// or when the batch is full.
func (batcher *Batcher) fillBatch(batch *Batch) (chanOpen bool) {
timeout := new(time.Timer) // Gives us a nil channel and no timeout to start with
chanOpen = true // Assume the channel is open
for {
select {
case <-timeout.C:
return !chanOpen
case line, chanOpen := <-batcher.inLogs:
if !chanOpen {
return !chanOpen
}
// We have a line now, so set a timeout
if timeout.C == nil {
defer func(t time.Time) { batcher.stats <- NewNamedValue("batch.fill.time", time.Since(t).Seconds()) }(time.Now())
timeout = time.NewTimer(batcher.timeout)
defer timeout.Stop() // ensure timer is stopped when done
}
batch.Write(line)
if batch.Full() {
return !chanOpen
}
}
}
}