/
pllcount.go
95 lines (80 loc) · 1.87 KB
/
pllcount.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package main
import (
"strings"
"loglog"
"os"
"bufio"
"fmt"
"time"
"strconv"
)
const (
MBITS = 12
)
// stream the input
func streamWords(filename string) (chan string) {
ch := make(chan string)
go func() {
f, _ := os.Open(filename)
reader := bufio.NewReader(f)
for {
line, err := reader.ReadString('\n')
if err != nil { break }
for _, word := range strings.Fields(line) {
ch <- word
}
}
close(ch)
}()
return ch
}
// scatter and gather
func scatter(in chan string, k int, counter *loglog.Counter) (chan *loglog.Entry) {
out := make(chan *loglog.Entry)
done := make(chan int)
// start k workers
for i:=0; i < k; i++ {
go func(i int) {
fmt.Println("Worker:", i)
for {
var entry loglog.Entry
x, ok := <-in
if !ok { break }
hv := loglog.HashValue(x)
loglog.SetEntry(hv, counter.MBits, &entry)
out <- &entry
}
done <- 1
}(i)
}
go func() {
for i:=0; i < k; i++ {
<-done
}
close(out)
}()
return out
}
func main() {
var WORKERS int
if len(os.Args) < 2 {
fmt.Println("Usage default workers=8")
WORKERS = 8
} else {
i, _ := strconv.ParseInt(os.Args[1], 10, 32)
WORKERS = int(i)
}
c := loglog.NewCounter(MBITS)
filename := "pg100.txt"
words := streamWords(filename)
hashvalues := scatter(words, WORKERS, c)
t := time.Now().UnixNano()
for {
entry, ok := <-hashvalues
if !ok {break}
c.DigestEntry(entry)
}
fmt.Println(c.Estimate())
d := float64(time.Now().UnixNano() - t)/1000000000.0
fmt.Printf("In %.2f seconds.\n", d)
}