/
court.go
152 lines (128 loc) · 3.37 KB
/
court.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// 把存在于数据库的不统一的法院名称统一
// 输出csv格式的输出
package main
import (
"fmt"
"github.com/kevindragon/lexiscnexec/court"
"io/ioutil"
"os"
"strings"
)
func main() {
convert()
//test()
}
func convert() {
analyzer := court.NewAnalyzer()
analyzer.LoadDict("court/dict.txt")
analyzer.LoadStandard("court/standard.txt")
analyzer.LoadMapping("court/manual_mapping.txt")
lines, err := readFile("court/courts_in_db.txt")
if err != nil {
fmt.Println("read file court/courts_in_db.txt failed")
os.Exit(1)
}
var standards []string
var unStandards []string
for index, l := range lines {
line := l
line = strings.Replace(strings.Trim(line, " "), " ", "", -1)
if line == "" {
continue
}
standardName := analyzer.GetFromMapping(line)
if standardName == "" {
if analyzer.IsStandard(line) {
standardName = line
} else {
standardName = analyzer.GetAncestor(line)
}
sameTop := underSameTop(analyzer, standardName, line)
if !sameTop {
unStandards = append(unStandards, fmt.Sprintf(`"%d","%s",`, index+1, l))
standardName = ""
} else {
standards = append(standards, fmt.Sprintf(`"%d","%s","%s"`, index+1, l, standardName))
}
} else {
standards = append(standards, fmt.Sprintf(`"%d","%s","%s"`, index+1, l, standardName))
}
}
fmt.Printf("%s,%s,%s\n", "#", "原始名称", "转换后的名称")
for _, line := range standards {
fmt.Printf("%s\n", line)
}
fmt.Println(`"","",""`)
for _, line := range unStandards {
fmt.Printf("%s\n", line)
}
}
func test() {
analyzer := court.NewAnalyzer()
analyzer.LoadDict("court/dict.txt")
names := []string{
"鼎城区人民法院",
}
for _, name := range names {
fmt.Println(name, "terms:", analyzer.ToTerms(name))
}
analyzer.LoadStandard("court/standard.txt")
fmt.Println("")
for _, name := range names {
standardName := analyzer.GetAncestor(name)
sameTop := underSameTop(analyzer, standardName, name)
if sameTop {
fmt.Println(name, "-->", standardName, sameTop)
} else {
fmt.Println(name, "<-?->", standardName, sameTop)
}
}
}
func underSameTop(analyzer *court.Analyzer, src, dist string) bool {
src, dist = strings.Trim(src, " "), strings.Trim(dist, " ")
if src == "" || dist == "" {
return false
}
srcTerms := analyzer.ToTerms(src)
distTerms := analyzer.ToTerms(dist)
if srcTerms[0] == distTerms[0] {
return true
}
for _, distTerm := range distTerms {
if !analyzer.IsTop(distTerm) {
distTop := analyzer.GetTop(distTerm)
if distTop != "" {
distTerms = append(distTerms, distTop)
}
}
}
srcTop := srcTerms[0]
for _, srcTerm := range srcTerms {
if !analyzer.IsTop(srcTerm) {
tmpTop := analyzer.GetTop(srcTerm)
if tmpTop != "" {
srcTop = tmpTop
break
}
}
}
for _, distTerm := range distTerms {
if srcTop == distTerm {
return true
}
}
return false
}
func readFile(filepath string) ([]string, error) {
if _, err := os.Stat(filepath); err != nil {
return []string{}, err
}
bytes, err := ioutil.ReadFile(filepath)
if err != nil {
return []string{}, err
}
content := strings.Replace(string(bytes), "\r", "\n", -1)
content = strings.Replace(content, "\n\n", "\n", -1)
lines := strings.Split(content, "\n")
return lines, nil
}