Beispiel #1
0
// urlEncode is used for xfingers ISO-8859-1 encoding of special characters.
func urlEncode(name string) (string, error) {
	n, ok := mahonia.NewEncoder("ISO-8859-1").ConvertStringOK(name)
	if !ok {
		return "", errutil.NewNoPosf("name contains non illegal charset characters: %s.", n)
	}
	return url.QueryEscape(n), nil
}
Beispiel #2
0
func Utf82gbk(utfStr string) string {
	enc := mahonia.NewEncoder("gbk")
	if ret, ok := enc.ConvertStringOK(utfStr); ok {
		return ret
	}
	return ""
}
Beispiel #3
0
// Sets field value by name
func (dt *DbfTable) SetFieldValue(row int, fieldIndex int, value string) (err error) {

	e := mahonia.NewEncoder(dt.fileEncoding)
	b := []byte(e.ConvertString(value))

	fieldLength := int(dt.fields[fieldIndex].fieldLength)

	//DEBUG

	//fmt.Printf("dt.numberOfBytesInHeader=%v\n\n", dt.numberOfBytesInHeader)
	//fmt.Printf("dt.lengthOfEachRecord=%v\n\n", dt.lengthOfEachRecord)

	// locate the offset of the field in DbfTable dataStore
	offset := int(dt.numberOfBytesInHeader)
	lengthOfRecord := int(dt.lengthOfEachRecord)

	offset = offset + (row * lengthOfRecord)

	recordOffset := 1

	for i := 0; i < len(dt.fields); i++ {
		if i == fieldIndex {
			break
		} else {
			recordOffset += int(dt.fields[i].fieldLength)
		}
	}

	// first fill the field with space values
	for i := 0; i < fieldLength; i++ {
		dt.dataStore[offset+recordOffset+i] = 0x20
	}

	// write new value
	switch dt.fields[fieldIndex].fieldType {
	case "C", "L", "D":
		for i := 0; i < len(b) && i < fieldLength; i++ {
			dt.dataStore[offset+recordOffset+i] = b[i]
		}
	case "N":
		for i := 0; i < fieldLength; i++ {
			// fmt.Printf("i:%v\n", i)
			if i < len(b) {
				dt.dataStore[offset+recordOffset+(fieldLength-i-1)] = b[(len(b)-1)-i]
			} else {
				break
			}
		}
	}

	return

	//fmt.Printf("field value:%#v\n", []byte(value))
	//fmt.Printf("field index:%#v\n", fieldIndex)
	//fmt.Printf("field length:%v\n", dt.Fields[fieldIndex].fieldLength)
	//fmt.Printf("string to byte:%#v\n", b)

}
Beispiel #4
0
/*
	getByteSlice converts value to byte slice according to given encoding and return
	a slice that is length equals to numberOfBytes or less if the string is shorter than
	numberOfBytes
*/
func (dt *DbfTable) convertToByteSlice(value string, numberOfBytes int) (s []byte) {
	e := mahonia.NewEncoder(dt.fileEncoding)
	b := []byte(e.ConvertString(value))

	if len(b) <= numberOfBytes {
		s = b
	} else {
		s = b[0:numberOfBytes]
	}
	return
}
Beispiel #5
0
// 加强版的`fmt.Printf()`,能识别操作系统,避免终端输出时出现乱码,自动换行
func Info(infos ...interface{}) {
	if runtime.GOOS == "windows" {
		encoder := mahonia.NewEncoder("gbk")
		for i, item := range infos {
			item, ok := item.(string)
			if ok {
				infos[i] = encoder.ConvertString(item)
			}
		}
	}
	fmt.Printf(fmt.Sprintf("%s\n", infos[0]), infos[1:]...)
}
Beispiel #6
0
func (dt *DbfTable) getNormalizedFieldName(name string) (s string) {
	e := mahonia.NewEncoder(dt.fileEncoding)
	b := []byte(e.ConvertString(name))

	if len(b) > 10 {
		b = b[0:10]
	}

	d := mahonia.NewDecoder(dt.fileEncoding)
	s = d.ConvertString(string(b))

	return
}
Beispiel #7
0
func main() {
	flag.Parse()

	var r io.Reader = os.Stdin
	var w io.Writer = os.Stdout

	if *from != "utf-8" {
		decode := mahonia.NewDecoder(*from)
		if decode == nil {
			log.Fatalf("Could not create decoder for %s", *from)
		}
		r = decode.NewReader(r)
	}

	if *to != "utf-8" {
		encode := mahonia.NewEncoder(*to)
		if encode == nil {
			log.Fatalf("Could not create decoder for %s", *to)
		}
		w = encode.NewWriter(w)
	}

	io.Copy(w, r)
}
Beispiel #8
0
func NewFromFile(fileName string, fileEncoding string) (table *DbfTable, err error) {
	s, err := readFile(fileName)

	if err != nil {
		return nil, err
	}

	// Create and pupulate DbaseTable struct
	dt := new(DbfTable)

	dt.fileEncoding = fileEncoding
	dt.encoder = mahonia.NewEncoder(fileEncoding)
	dt.decoder = mahonia.NewDecoder(fileEncoding)

	// read dbase table header information
	dt.fileSignature = s[0]
	dt.updateYear = s[1]
	dt.updateMonth = s[2]
	dt.updateDay = s[3]
	dt.numberOfRecords = uint32(s[4]) | (uint32(s[5]) << 8) | (uint32(s[6]) << 16) | (uint32(s[7]) << 24)
	dt.numberOfBytesInHeader = uint16(s[8]) | (uint16(s[9]) << 8)
	dt.lengthOfEachRecord = uint16(s[10]) | (uint16(s[11]) << 8)

	// create fieldMap to taranslate field name to index
	dt.fieldMap = make(map[string]int)

	// Number of fields in dbase table
	dt.numberOfFields = int((dt.numberOfBytesInHeader - 1 - 32) / 32)

	// populate dbf fields
	for i := 0; i < int(dt.numberOfFields); i++ {
		offset := (i * 32) + 32

		fieldName := strings.Trim(dt.encoder.ConvertString(string(s[offset:offset+10])), string([]byte{0}))
		dt.fieldMap[fieldName] = i

		var err error

		switch s[offset+11] {
		case 'C':
			err = dt.AddTextField(fieldName, s[offset+16])
		case 'N':
			err = dt.AddNumberField(fieldName, s[offset+16], s[offset+17])
		case 'F':
			err = dt.AddFloatField(fieldName, s[offset+16], s[offset+17])
		case 'L':
			err = dt.AddBooleanField(fieldName)
		case 'D':
			err = dt.AddDateField(fieldName)
		}

		// Check return value for errors
		if err != nil {
			return nil, err
		}

		//fmt.Printf("Field name:%v\n", fieldName)
		//fmt.Printf("Field data type:%v\n", string(s[offset+11]))
		//fmt.Printf("Field length:%v\n", s[offset+16])
		//fmt.Println("-----------------------------------------------")
	}

	//fmt.Printf("DbfReader:\n%#v\n", dt)
	//fmt.Printf("DbfReader:\n%#v\n", int(dt.Fields[2].fieldLength))

	//fmt.Printf("num records in table:%v\n", (dt.numberOfRecords))
	//fmt.Printf("lenght of each record:%v\n", (dt.lengthOfEachRecord))

	// Since we are reading dbase file from the disk at least at this
	// phase changing schema of dbase file is not allowed.
	dt.dataEntryStarted = true

	// set DbfTable dataStore slice that will store the complete file in memory
	dt.dataStore = s

	return dt, nil
}
Beispiel #9
0
// Create a new dbase table from scratch
func New(encoding string) (table *DbfTable) {

	// Create and populate DbaseTable struct
	dt := new(DbfTable)

	dt.fileEncoding = encoding
	dt.encoder = mahonia.NewEncoder(encoding)
	dt.decoder = mahonia.NewDecoder(encoding)

	// set whether or not this table has been created from scratch
	dt.createdFromScratch = true

	// read dbase table header information
	dt.fileSignature = 0x03
	dt.updateYear = byte(time.Now().Year() % 100)
	dt.updateMonth = byte(time.Now().Month())
	dt.updateDay = byte(time.Now().YearDay())
	dt.numberOfRecords = 0
	dt.numberOfBytesInHeader = 32
	dt.lengthOfEachRecord = 0

	// create fieldMap to taranslate field name to index
	dt.fieldMap = make(map[string]int)

	// Number of fields in dbase table
	dt.numberOfFields = int((dt.numberOfBytesInHeader - 1 - 32) / 32)

	s := make([]byte, dt.numberOfBytesInHeader)

	//fmt.Printf("number of fields:\n%#v\n", numberOfFields)
	//fmt.Printf("DbfReader:\n%#v\n", int(dt.Fields[2].fieldLength))

	//fmt.Printf("num records in table:%v\n", (dt.numberOfRecords))
	//fmt.Printf("lenght of each record:%v\n", (dt.lengthOfEachRecord))

	// Since we are reading dbase file from the disk at least at this
	// phase changing schema of dbase file is not allowed.
	dt.dataEntryStarted = false

	// set DbfTable dataStore slice that will store the complete file in memory
	dt.dataStore = s

	dt.dataStore[0] = dt.fileSignature
	dt.dataStore[1] = dt.updateYear
	dt.dataStore[2] = dt.updateMonth
	dt.dataStore[3] = dt.updateDay

	// no MDX file (index upon demand)
	dt.dataStore[28] = 0x00

	// set dbase language driver
	// Huston we have problem!
	// There is no easy way to deal with encoding issues. At least at the moment
	// I will try to find archaic encoding code defined by dbase standard (if there is any)
	// for given encoding. If none match I will go with default ANSI.
	//
	// Despite this flag in set in dbase file, I will continue to use provide encoding for
	// the everything except this file encoding flag.
	//
	// Why? To make sure at least if you know the real encoding you can process text accordingly.

	if code, ok := encodingTable[lookup[encoding]]; ok {
		dt.dataStore[28] = code
	} else {
		dt.dataStore[28] = 0x57 // ANSI
	}

	return dt
}
Beispiel #10
0
func EncodeString(src, charset string) string {
	return mahonia.NewEncoder(charset).ConvertString(src)
}
Beispiel #11
0
func Show(s string) string {
	enc := mahonia.NewEncoder("gbk")
	return enc.ConvertString(s)
}
Beispiel #12
0
func ToGBK(src string) (dst string) {
	//encoder := mahonia.NewEncoder("gbk")
	encoder := mahonia.NewEncoder("gb2312")
	return encoder.ConvertString(src)
}
Beispiel #13
0
	}
	port = ":" + string(*p)
}

type GoogleIMESKK struct{}

func (s *GoogleIMESKK) Request(text string) ([]string, error) {
	words, err := Transliterate(text)
	if err != nil {
		return nil, err
	}
	return words, nil
}

// You can specify euc-jp as well.
var enc = mahonia.NewEncoder("utf8")

func Transliterate(text string) (words []string, err error) {
	text = enc.ConvertString(text)
	v := url.Values{"langpair": {"ja-Hira|ja"}, "text": {text + ","}}
	resp, err := http.Get("http://www.google.com/transliterate?" + v.Encode())
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	dec := json.NewDecoder(resp.Body)
	var w [][]interface{}
	if err := dec.Decode(&w); err != nil {
		return nil, err
	}
	for _, v := range w[0][1].([]interface{}) {
Beispiel #14
0
// ConvertToPY converts (and only converts) chinese character in utf8 encoding into initial Pinyin string.
// E.g.
// "啊薄车单饿飞干很见空冷吗嫩哦盘群润三图玩先烟总" -> "abcdefghjklmnopqrstwxyz"
// "世界你好!hello world!" -> "sjnh!hello world!"
// "把UTF8字符转换成gbk" -> "bUTF8zfzhcgbk"
// "all english" -> "all english"
// "1234890" -> "1234890"
// "!@#$%^&*()~,.<>/?';:[]{}" -> "!@#$%^&*()~,.<>/?';:[]{}"
func ConvertToPY(s string) string {
	e := mahonia.NewEncoder("gbk")
	str := e.ConvertString(s)

	mapfunc := func(r int32) rune {
		switch {
		case r >= -20319 && r <= -20284:
			return 'a'
		case r >= -20283 && r <= -19776:
			return 'b'
		case r >= -19775 && r <= -19219:
			return 'c'
		case r >= -19218 && r <= -18711:
			return 'd'
		case r >= -18710 && r <= -18527:
			return 'e'
		case r >= -18526 && r <= -18240:
			return 'f'
		case r >= -18239 && r <= -17923:
			return 'g'
		case r >= -17922 && r <= -17418:
			return 'h'
		case r >= -17417 && r <= -16475:
			return 'j'
		case r >= -16474 && r <= -16213:
			return 'k'
		case r >= -16212 && r <= -15641:
			return 'l'
		case r >= -15640 && r <= -15166:
			return 'm'
		case r >= -15165 && r <= -14923:
			return 'n'
		case r >= -14922 && r <= -14915:
			return 'o'
		case r >= -14914 && r <= -14631:
			return 'p'
		case r >= -14630 && r <= -14150:
			return 'q'
		case r >= -14149 && r <= -14091:
			return 'r'
		case r >= -14090 && r <= -13319:
			return 's'
		case r >= -13318 && r <= -12839:
			return 't'
		case r >= -12838 && r <= -12557:
			return 'w'
		case r >= -12556 && r <= -11848:
			return 'x'
		case r >= -11847 && r <= -11056:
			return 'y'
		case r >= -11055 && r <= -10247:
			return 'z'
		}

		return r
	}

	l := len(str)
	buf := make([]rune, 0)

	for i := 0; i < l; {
		switch {
		case str[i] <= '~':
			buf = append(buf, rune(str[i]))
			i += 1
		case (i + 1) < l:
			a := int32(str[i])*256 + int32(str[i+1]) - 65536

			if r := mapfunc(a); a == r { // a is chinese punctuation
				decoder := mahonia.NewDecoder("gbk")
				decodedStr := decoder.ConvertString(str[i : i+2])
				sReader := strings.NewReader(decodedStr)
				ch, _, _ := sReader.ReadRune()
				buf = append(buf, ch)
			} else {
				buf = append(buf, r)
			}
			i += 2
		default:
			buf = append(buf, rune(str[i]))
			i += 1
		}
	}

	return string(buf)
}