// urlEncode is used for xfingers ISO-8859-1 encoding of special characters. func urlEncode(name string) (string, error) { n, ok := mahonia.NewEncoder("ISO-8859-1").ConvertStringOK(name) if !ok { return "", errutil.NewNoPosf("name contains non illegal charset characters: %s.", n) } return url.QueryEscape(n), nil }
func Utf82gbk(utfStr string) string { enc := mahonia.NewEncoder("gbk") if ret, ok := enc.ConvertStringOK(utfStr); ok { return ret } return "" }
// Sets field value by name func (dt *DbfTable) SetFieldValue(row int, fieldIndex int, value string) (err error) { e := mahonia.NewEncoder(dt.fileEncoding) b := []byte(e.ConvertString(value)) fieldLength := int(dt.fields[fieldIndex].fieldLength) //DEBUG //fmt.Printf("dt.numberOfBytesInHeader=%v\n\n", dt.numberOfBytesInHeader) //fmt.Printf("dt.lengthOfEachRecord=%v\n\n", dt.lengthOfEachRecord) // locate the offset of the field in DbfTable dataStore offset := int(dt.numberOfBytesInHeader) lengthOfRecord := int(dt.lengthOfEachRecord) offset = offset + (row * lengthOfRecord) recordOffset := 1 for i := 0; i < len(dt.fields); i++ { if i == fieldIndex { break } else { recordOffset += int(dt.fields[i].fieldLength) } } // first fill the field with space values for i := 0; i < fieldLength; i++ { dt.dataStore[offset+recordOffset+i] = 0x20 } // write new value switch dt.fields[fieldIndex].fieldType { case "C", "L", "D": for i := 0; i < len(b) && i < fieldLength; i++ { dt.dataStore[offset+recordOffset+i] = b[i] } case "N": for i := 0; i < fieldLength; i++ { // fmt.Printf("i:%v\n", i) if i < len(b) { dt.dataStore[offset+recordOffset+(fieldLength-i-1)] = b[(len(b)-1)-i] } else { break } } } return //fmt.Printf("field value:%#v\n", []byte(value)) //fmt.Printf("field index:%#v\n", fieldIndex) //fmt.Printf("field length:%v\n", dt.Fields[fieldIndex].fieldLength) //fmt.Printf("string to byte:%#v\n", b) }
/* getByteSlice converts value to byte slice according to given encoding and return a slice that is length equals to numberOfBytes or less if the string is shorter than numberOfBytes */ func (dt *DbfTable) convertToByteSlice(value string, numberOfBytes int) (s []byte) { e := mahonia.NewEncoder(dt.fileEncoding) b := []byte(e.ConvertString(value)) if len(b) <= numberOfBytes { s = b } else { s = b[0:numberOfBytes] } return }
// 加强版的`fmt.Printf()`,能识别操作系统,避免终端输出时出现乱码,自动换行 func Info(infos ...interface{}) { if runtime.GOOS == "windows" { encoder := mahonia.NewEncoder("gbk") for i, item := range infos { item, ok := item.(string) if ok { infos[i] = encoder.ConvertString(item) } } } fmt.Printf(fmt.Sprintf("%s\n", infos[0]), infos[1:]...) }
func (dt *DbfTable) getNormalizedFieldName(name string) (s string) { e := mahonia.NewEncoder(dt.fileEncoding) b := []byte(e.ConvertString(name)) if len(b) > 10 { b = b[0:10] } d := mahonia.NewDecoder(dt.fileEncoding) s = d.ConvertString(string(b)) return }
func main() { flag.Parse() var r io.Reader = os.Stdin var w io.Writer = os.Stdout if *from != "utf-8" { decode := mahonia.NewDecoder(*from) if decode == nil { log.Fatalf("Could not create decoder for %s", *from) } r = decode.NewReader(r) } if *to != "utf-8" { encode := mahonia.NewEncoder(*to) if encode == nil { log.Fatalf("Could not create decoder for %s", *to) } w = encode.NewWriter(w) } io.Copy(w, r) }
func NewFromFile(fileName string, fileEncoding string) (table *DbfTable, err error) { s, err := readFile(fileName) if err != nil { return nil, err } // Create and pupulate DbaseTable struct dt := new(DbfTable) dt.fileEncoding = fileEncoding dt.encoder = mahonia.NewEncoder(fileEncoding) dt.decoder = mahonia.NewDecoder(fileEncoding) // read dbase table header information dt.fileSignature = s[0] dt.updateYear = s[1] dt.updateMonth = s[2] dt.updateDay = s[3] dt.numberOfRecords = uint32(s[4]) | (uint32(s[5]) << 8) | (uint32(s[6]) << 16) | (uint32(s[7]) << 24) dt.numberOfBytesInHeader = uint16(s[8]) | (uint16(s[9]) << 8) dt.lengthOfEachRecord = uint16(s[10]) | (uint16(s[11]) << 8) // create fieldMap to taranslate field name to index dt.fieldMap = make(map[string]int) // Number of fields in dbase table dt.numberOfFields = int((dt.numberOfBytesInHeader - 1 - 32) / 32) // populate dbf fields for i := 0; i < int(dt.numberOfFields); i++ { offset := (i * 32) + 32 fieldName := strings.Trim(dt.encoder.ConvertString(string(s[offset:offset+10])), string([]byte{0})) dt.fieldMap[fieldName] = i var err error switch s[offset+11] { case 'C': err = dt.AddTextField(fieldName, s[offset+16]) case 'N': err = dt.AddNumberField(fieldName, s[offset+16], s[offset+17]) case 'F': err = dt.AddFloatField(fieldName, s[offset+16], s[offset+17]) case 'L': err = dt.AddBooleanField(fieldName) case 'D': err = dt.AddDateField(fieldName) } // Check return value for errors if err != nil { return nil, err } //fmt.Printf("Field name:%v\n", fieldName) //fmt.Printf("Field data type:%v\n", string(s[offset+11])) //fmt.Printf("Field length:%v\n", s[offset+16]) //fmt.Println("-----------------------------------------------") } //fmt.Printf("DbfReader:\n%#v\n", dt) //fmt.Printf("DbfReader:\n%#v\n", int(dt.Fields[2].fieldLength)) //fmt.Printf("num records in table:%v\n", (dt.numberOfRecords)) //fmt.Printf("lenght of each record:%v\n", (dt.lengthOfEachRecord)) // Since we are reading dbase file from the disk at least at this // phase changing schema of dbase file is not allowed. dt.dataEntryStarted = true // set DbfTable dataStore slice that will store the complete file in memory dt.dataStore = s return dt, nil }
// Create a new dbase table from scratch func New(encoding string) (table *DbfTable) { // Create and populate DbaseTable struct dt := new(DbfTable) dt.fileEncoding = encoding dt.encoder = mahonia.NewEncoder(encoding) dt.decoder = mahonia.NewDecoder(encoding) // set whether or not this table has been created from scratch dt.createdFromScratch = true // read dbase table header information dt.fileSignature = 0x03 dt.updateYear = byte(time.Now().Year() % 100) dt.updateMonth = byte(time.Now().Month()) dt.updateDay = byte(time.Now().YearDay()) dt.numberOfRecords = 0 dt.numberOfBytesInHeader = 32 dt.lengthOfEachRecord = 0 // create fieldMap to taranslate field name to index dt.fieldMap = make(map[string]int) // Number of fields in dbase table dt.numberOfFields = int((dt.numberOfBytesInHeader - 1 - 32) / 32) s := make([]byte, dt.numberOfBytesInHeader) //fmt.Printf("number of fields:\n%#v\n", numberOfFields) //fmt.Printf("DbfReader:\n%#v\n", int(dt.Fields[2].fieldLength)) //fmt.Printf("num records in table:%v\n", (dt.numberOfRecords)) //fmt.Printf("lenght of each record:%v\n", (dt.lengthOfEachRecord)) // Since we are reading dbase file from the disk at least at this // phase changing schema of dbase file is not allowed. dt.dataEntryStarted = false // set DbfTable dataStore slice that will store the complete file in memory dt.dataStore = s dt.dataStore[0] = dt.fileSignature dt.dataStore[1] = dt.updateYear dt.dataStore[2] = dt.updateMonth dt.dataStore[3] = dt.updateDay // no MDX file (index upon demand) dt.dataStore[28] = 0x00 // set dbase language driver // Huston we have problem! // There is no easy way to deal with encoding issues. At least at the moment // I will try to find archaic encoding code defined by dbase standard (if there is any) // for given encoding. If none match I will go with default ANSI. // // Despite this flag in set in dbase file, I will continue to use provide encoding for // the everything except this file encoding flag. // // Why? To make sure at least if you know the real encoding you can process text accordingly. if code, ok := encodingTable[lookup[encoding]]; ok { dt.dataStore[28] = code } else { dt.dataStore[28] = 0x57 // ANSI } return dt }
func EncodeString(src, charset string) string { return mahonia.NewEncoder(charset).ConvertString(src) }
func Show(s string) string { enc := mahonia.NewEncoder("gbk") return enc.ConvertString(s) }
func ToGBK(src string) (dst string) { //encoder := mahonia.NewEncoder("gbk") encoder := mahonia.NewEncoder("gb2312") return encoder.ConvertString(src) }
} port = ":" + string(*p) } type GoogleIMESKK struct{} func (s *GoogleIMESKK) Request(text string) ([]string, error) { words, err := Transliterate(text) if err != nil { return nil, err } return words, nil } // You can specify euc-jp as well. var enc = mahonia.NewEncoder("utf8") func Transliterate(text string) (words []string, err error) { text = enc.ConvertString(text) v := url.Values{"langpair": {"ja-Hira|ja"}, "text": {text + ","}} resp, err := http.Get("http://www.google.com/transliterate?" + v.Encode()) if err != nil { return nil, err } defer resp.Body.Close() dec := json.NewDecoder(resp.Body) var w [][]interface{} if err := dec.Decode(&w); err != nil { return nil, err } for _, v := range w[0][1].([]interface{}) {
// ConvertToPY converts (and only converts) chinese character in utf8 encoding into initial Pinyin string. // E.g. // "啊薄车单饿飞干很见空冷吗嫩哦盘群润三图玩先烟总" -> "abcdefghjklmnopqrstwxyz" // "世界你好!hello world!" -> "sjnh!hello world!" // "把UTF8字符转换成gbk" -> "bUTF8zfzhcgbk" // "all english" -> "all english" // "1234890" -> "1234890" // "!@#$%^&*()~,.<>/?';:[]{}" -> "!@#$%^&*()~,.<>/?';:[]{}" func ConvertToPY(s string) string { e := mahonia.NewEncoder("gbk") str := e.ConvertString(s) mapfunc := func(r int32) rune { switch { case r >= -20319 && r <= -20284: return 'a' case r >= -20283 && r <= -19776: return 'b' case r >= -19775 && r <= -19219: return 'c' case r >= -19218 && r <= -18711: return 'd' case r >= -18710 && r <= -18527: return 'e' case r >= -18526 && r <= -18240: return 'f' case r >= -18239 && r <= -17923: return 'g' case r >= -17922 && r <= -17418: return 'h' case r >= -17417 && r <= -16475: return 'j' case r >= -16474 && r <= -16213: return 'k' case r >= -16212 && r <= -15641: return 'l' case r >= -15640 && r <= -15166: return 'm' case r >= -15165 && r <= -14923: return 'n' case r >= -14922 && r <= -14915: return 'o' case r >= -14914 && r <= -14631: return 'p' case r >= -14630 && r <= -14150: return 'q' case r >= -14149 && r <= -14091: return 'r' case r >= -14090 && r <= -13319: return 's' case r >= -13318 && r <= -12839: return 't' case r >= -12838 && r <= -12557: return 'w' case r >= -12556 && r <= -11848: return 'x' case r >= -11847 && r <= -11056: return 'y' case r >= -11055 && r <= -10247: return 'z' } return r } l := len(str) buf := make([]rune, 0) for i := 0; i < l; { switch { case str[i] <= '~': buf = append(buf, rune(str[i])) i += 1 case (i + 1) < l: a := int32(str[i])*256 + int32(str[i+1]) - 65536 if r := mapfunc(a); a == r { // a is chinese punctuation decoder := mahonia.NewDecoder("gbk") decodedStr := decoder.ConvertString(str[i : i+2]) sReader := strings.NewReader(decodedStr) ch, _, _ := sReader.ReadRune() buf = append(buf, ch) } else { buf = append(buf, r) } i += 2 default: buf = append(buf, rune(str[i])) i += 1 } } return string(buf) }