/
strings.go
217 lines (179 loc) · 4.68 KB
/
strings.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
package main
import (
e "github.com/cinnabardk/allancorfix2/internal/errors"
"github.com/Lazin/go-ngram"
"github.com/argusdusty/Ferret"
"github.com/hashicorp/go-immutable-radix"
"github.com/cosn/collections/tst"
"github.com/derekparker/trie"
"github.com/tchap/go-patricia/patricia"
"github.com/sauerbraten/radix"
"strings"
"github.com/Mitranim/codex"
// "github.com/m4rw3r/uuid"
)
var ExampleWords = []string{
"coca cola",
"coce med is",
"koala",
"koalant afregning",
"regning med rykker",
"det regner",
"regn og blest",
"borte med blesten",
"blost og uvejr",
"blost og storm",
}
var ExampleUint32 = []uint32{66, 77, 88, 11, 22, 33, 44, 55, 99, 111}
// No delete
func (_ Test) Ferret() {
e.InfoLog.Println("\nFerret:")
var Correction = func(b []byte) [][]byte { return ferret.ErrorCorrect(b, ferret.LowercaseLetters) }
var Converter = func(s string) []byte { return []byte(s) }
var Data []interface{}
for i := range ExampleUint32 {
Data = append(Data, ExampleUint32[i])
}
f := ferret.New(ExampleWords, ExampleWords, Data, Converter)
f.Insert("blest i most", "blest i most", 7)
e.InfoLog.Println(f.ErrorCorrectingQuery("blest", 4, Correction))
e.InfoLog.Println(f.Query("blest", 10))
}
// http://hackthology.com/ternary-search-tries-for-fast-flexible-string-search-part-1.html
func (_ Test) RTrie() {
e.InfoLog.Println("\nRTrie:")
t := trie.New()
for i := range ExampleWords {
t.Add(ExampleWords[i], ExampleUint32[i])
}
t.Remove("coca cola")
node, _ := t.Find("blost og storm")
e.InfoLog.Println(node.Meta().(uint32))
e.InfoLog.Println(t.PrefixSearch("blost"))
e.InfoLog.Println(t.FuzzySearch("blest"))
}
// No delete
func (_ Test) Ngram() {
e.InfoLog.Println("\nN-Gram:")
index, _ := ngram.NewNGramIndex(ngram.SetN(3))
var token ngram.TokenID
for i := range ExampleWords {
token, _ = index.Add(ExampleWords[i])
}
str, _ := index.GetString(token) // str == "hello"
e.InfoLog.Println(str)
resultsList, _ := index.Search("blest")
for _, v := range resultsList {
e.InfoLog.Println(v.TokenID)
}
}
func (_ Test) TST() {
e.InfoLog.Println("\nTST:")
n := tst.T{}
for i := range ExampleWords {
n.Insert(ExampleWords[i], nil)
}
n.Delete("coca cola")
result := n.StartsWith("blost") //buggy, try searching for blest
for _, v := range result {
e.InfoLog.Println(v)
}
}
func (_ Test) radix() {
e.InfoLog.Println("\nRadix:")
r := radix.New()
for i := range ExampleWords {
r.Set(ExampleWords[i], ExampleUint32[i])
}
x := r.GetAllWithPrefix("blost")
for _, v := range x {
i := v.(uint32)
e.InfoLog.Println(i)
}
}
func (_ Test) Patricia() {
e.InfoLog.Println("\nPatricia:")
printItem := func(prefix patricia.Prefix, item patricia.Item) error {
e.InfoLog.Println(string(prefix), item.(uint32))
return nil
}
trie := patricia.NewTrie()
for i := range ExampleWords {
trie.Insert(patricia.Prefix(ExampleWords[i]), ExampleUint32[i])
}
trie.Set(patricia.Prefix("coca cola"), 188)
e.InfoLog.Println("SubTree:")
trie.VisitSubtree(patricia.Prefix("blost"), printItem)
e.InfoLog.Println("Prefixes:")
trie.VisitPrefixes(patricia.Prefix("borte med blesten mega"), printItem)
trie.Delete(patricia.Prefix("coca cola"))
trie.DeleteSubtree(patricia.Prefix("blost"))
e.InfoLog.Println("What is left:")
trie.Visit(printItem)
}
func (_ Test) IRadix() {
e.InfoLog.Println("\nIradix:")
out := []string{}
fn := func(k []byte, v interface{}) bool {
out = append(out, string(k))
return false
}
r := iradix.New()
for i := range ExampleWords {
r, _, _ = r.Insert([]byte(ExampleWords[i]), ExampleUint32[i])
}
x := r.Root()
m, _, _ := x.LongestPrefix([]byte("borte med blesten mega"))
e.InfoLog.Println(string(m))
x.WalkPrefix([]byte("blost"), fn)
for i := range out {
e.InfoLog.Println(out[i])
}
}
func (_ Test) Deaccent4() {
e.InfoLog.Println("\nDeaccent:")
str := "æblekage med ål og øl"
e.InfoLog.Println(string(StripÆØÅ(str)))
}
func StripÆØÅ(str string) []byte {
str = strings.ToLower(str)
b := (make([]byte, len(str)))
pos := 0
for _, char := range str {
if char > 127 {
switch char {
case 'å':
b[pos] = 'a'
case 'æ':
b[pos] = 'a'
case 'ø':
b[pos] = 'o'
}
} else {
b[pos] = byte(char)
}
pos += 1
}
return b[:pos]
}
func (_ Test) Codex(){
source := []string{ "ral", "mag", "dyn", "kunda", "sim"}
traits, err := codex.NewTraits(source)
if err != nil {
panic(err)
}
gen := traits.Generator()
var str [200]string
// Print twelve random words.
for i := 0; i < 200; i++ {
str[i] = gen()
}
e.InfoLog.Println("Codex, Words generated: ", str)
// Find out how many words can be generated from this sample.
gen = traits.Generator()
i := 0
for gen() != "" {
i++
}
e.InfoLog.Println("total:", i)
}