/
db_client.go
161 lines (149 loc) · 3.07 KB
/
db_client.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package flister
import (
"bufio"
"fmt"
"github.com/matryer/filedb"
"github.com/nteissler/stringfixer"
"log"
"os"
)
const Dbpath = "./database"
type Client struct {
Matches chan []byte
Progress chan int
Done chan struct{}
}
func ParseFileToDB(filename string) {
var fatalErr error
defer func() {
if fatalErr != nil {
log.Fatalln(fatalErr)
}
}()
// check file exists
if _, err := os.Stat(filename); os.IsNotExist(err) {
fatalErr = err
return
}
// create the database if it doesn't exist
db, err := checkDB()
if err != nil {
fatalErr = err
return
}
defer db.Close()
collectionName, err := stringfixer.DeleteExtension(filename)
if err != nil {
fatalErr = err
return
}
col, err := db.C(collectionName)
if err != nil {
fatalErr = err
return
}
// read lines in from the file and make one db entry per line
file, err := os.Open(filename)
if err != nil {
fatalErr = err
return
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
col.Insert(scanner.Bytes())
}
if err := scanner.Err(); err != nil {
fatalErr = err
return
}
}
// checkDB is a wrapper around filedb.Dial that will also create the database
// if it doesn't already exist
func checkDB() (*filedb.DB, error) {
// check that the database exists and create if it doesn't
if _, err := os.Stat(Dbpath); os.IsNotExist(err) {
err := os.Mkdir(Dbpath, 0777)
if err != nil {
return nil, err
}
}
db, err := filedb.Dial(Dbpath)
if err != nil {
return nil, err
}
return db, nil
}
// Find searches the database and adds matches to the match
// channel as they are found so they can be reported to the
// user asap and not as a big dump of data
func (c *Client) Find(query string, r Retriever) {
db, err := checkDB()
if err != nil {
log.Fatalln(err)
}
done := false
defer db.Close()
go func() {
<-c.Done
done = true
}()
collections, _ := db.ColNames()
for _, colString := range collections {
col, err := db.C(colString)
if err != nil {
log.Fatalln(err)
}
col.ForEach(func(_ int, data []byte) bool {
if r.Match(query, string(data)) {
if done {
return true
}
c.Matches <- []byte(fmt.Sprintf("%v/%v", colString, string(data)))
}
return false
})
if done {
break
}
}
close(c.Matches)
}
// The same as Find, but with a progess channel that will output ints 0-100 until it is done
func (c *Client) FindProgress(query string, r Retriever) {
db, err := checkDB()
if err != nil {
log.Fatalln(err)
}
defer db.Close()
done := false
collections, _ := db.ColNames()
total := float64(len(collections))
go func() {
<-c.Done
done = true
close(c.Done)
}()
for i, colString := range collections {
col, err := db.C(colString)
if err != nil {
log.Fatalln(err)
}
col.ForEach(func(_ int, data []byte) bool {
if done {
return true
}
if r.Match(query, string(data)) {
c.Matches <- []byte(fmt.Sprintf("%v/%v", colString, string(data)))
}
return false
})
if done {
break
}
c.Progress <- int(float64(i+1) / total * 100)
}
c.Progress <- 100
close(c.Progress)
close(c.Matches)
}