This repository has been archived by the owner on Feb 16, 2019. It is now read-only.
/
gopinyin.go
112 lines (102 loc) · 2.24 KB
/
gopinyin.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
// Small utils for Chinese Pinyin.
package gopinyin
import (
"database/sql/driver"
"fmt"
"regexp"
"strings"
)
type Pinyins []string
// Get the first letter of all pinyins.
func (pys Pinyins) Abbreviate() (abbreviated Pinyins) {
for _, py := range pys {
abbreviated = append(abbreviated, py[0:1])
}
return
}
// Find out possible matches for abbreviated pinyin.
func (pys Pinyins) Expand() (out Pinyins) {
for _, i := range pys {
out = append(out, _MAP[i])
}
return
}
// Convert the pinyin array to a string.
func (pys Pinyins) Join(bytes ...byte) (out string) {
out = strings.Join(pys, string(bytes))
return
}
// Returns a compiled regular expression.
func (pys Pinyins) Regexp() (out *regexp.Regexp) {
out = regexp.MustCompile(pys.RegexpString())
return
}
// Returns a regular expression string.
func (pys Pinyins) RegexpString() (out string) {
for _, py := range pys {
out += `\^` + py + "[a-z]*"
}
return
}
// Convert the expanded pinyins to WHERE SQL statement for PostgreSQL.
func (pys Pinyins) SQL(column string) (out string) {
var ret []string
for _, py := range pys {
ret = append(ret, fmt.Sprintf("'{%s}'", py))
}
if len(ret) == 0 {
return
}
out = fmt.Sprintf("SEQUENCED_ARRAY_CONTAINS(%s, %s)", column, strings.Join(ret, ", "))
return
}
// Convert to SQL driver value.
func (pys Pinyins) Value() (value driver.Value, err error) {
var buf []byte
var ret = []byte{'{'}
for _, py := range pys {
for _, b := range []byte(py) {
if b >= 65 && b <= 90 { // A - Z
b += 32
}
if b >= 97 && b <= 122 { // a - z
buf = append(buf, b)
}
}
if len(buf) > 0 {
buf = append(buf, ',')
ret = append(ret, buf...)
buf = nil
}
}
if ret[len(ret)-1] == ',' {
ret[len(ret)-1] = '}'
} else {
ret = append(ret, '}')
}
value = driver.Value(string(ret))
return
}
// Split a pinyin string into an array.
func Split(in string) (out Pinyins) {
_in := []byte(in)
var buf []byte
for i, b := range _in {
if b >= 65 && b <= 90 { // A - Z
b += 32
}
if b >= 97 && b <= 122 { // a - z
buf = append(buf, b)
}
if i < len(_in)-1 && _MAP[string(append(buf, _in[i+1]))] != "" {
continue
}
if buf != nil {
if _MAP[string(buf)] != "" {
out = append(out, string(buf))
}
buf = nil
}
}
return
}