/
test.go
107 lines (92 loc) · 2.1 KB
/
test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package main
import (
"encoding/gob"
"fmt"
"io/ioutil"
"math"
"os"
"strconv"
"strings"
"github.com/yosssi/gohtml"
)
func check(e error) {
if e != nil {
panic(e)
}
}
func savePats(file string, pats map[string]int) {
f, err := os.Create(file)
if err != nil {
panic("cant open file")
}
defer f.Close()
enc := gob.NewEncoder(f)
if err := enc.Encode(pats); err != nil {
panic("cant encode")
}
}
func loadPats(file string) (pats map[string]int) {
f, err := os.Open(file)
if err != nil {
panic("cant open file")
}
defer f.Close()
enc := gob.NewDecoder(f)
if err := enc.Decode(&pats); err != nil {
panic("cant decode")
}
return pats
}
func main() {
nextFileNum := "2"
f, _ := os.Create("dat" + nextFileNum)
defer f.Close()
dat, err := ioutil.ReadFile("doc" + nextFileNum + ".txt")
check(err)
formattedHTML1 := gohtml.Format(string(dat))
formattedHTML := ""
for _, line := range strings.Split(formattedHTML1, "\n") {
formattedHTML += strings.TrimSpace(line + "\n")
}
// m := make(map[string]int)
// m["tally123"] = 0
strInt, _ := strconv.Atoi(nextFileNum)
m := loadPats("dat" + strconv.Itoa(strInt-1) + ".encoding")
curWord := ""
for _, c := range strings.TrimSpace(formattedHTML) {
strC := string(c)
if len(strings.TrimSpace(strC)) == 0 || strings.Contains("!#$%&'()*+,-.:;=?@[/\\]^_`{|}~><' \n\t\b", strC) || strings.Contains(`""`, strC) {
if len(curWord) > 0 {
word := strings.ToLower(curWord)
fmt.Println("|" + strings.TrimSpace(curWord) + "|")
if _, ok := m[word]; ok {
// has key
} else {
m[word] = m["tally123"]
m["tally123"]++
}
i := byte(0)
if curWord == strings.Title(word) {
i = byte(1)
} else if curWord == strings.ToUpper(word) {
i = byte(2)
}
j := byte(m[word] / 254)
k := byte(math.Mod(float64(m[word]), 254))
fmt.Println(i)
d2 := []byte{62, j, k, 60}
f.Write(d2)
curWord = ""
} else {
d2 := []byte{byte(c)}
f.Write(d2)
}
} else {
curWord += strC
}
}
fmt.Println(curWord)
savePats("dat"+nextFileNum+".encoding", m)
fmt.Println(m)
fmt.Println(m["tally123"])
}