Beispiel #1
0
// Create a codec which convert a string encoded in fromcode into a string
// encoded in tocode
//
// If you add //TRANSLIT at the end of tocode, any character which doesn't
// exists in the destination charset will be replaced by its closest
// equivalent (for example, € will be represented by EUR in ASCII). Else,
// such a character will trigger an error.
func Open(tocode string, fromcode string) (*Iconv, error) {
	ret, err := C.iconv_open(C.CString(tocode), C.CString(fromcode))
	if err != nil {
		return nil, err
	}
	return &Iconv{ret}, nil
}
Beispiel #2
0
func OpenWithFallback(fromCode string, toCode string, fallbackPolicy int) (ic *Iconv, err error) {
	var pIconv C.iconv_t

	toCodeCharPtr := C.CString(toCode)
	defer C.free(unsafe.Pointer(toCodeCharPtr))
	fromCodeCharPtr := C.CString(fromCode)
	defer C.free(unsafe.Pointer(fromCodeCharPtr))

	pIconv, err = C.iconv_open(toCodeCharPtr, fromCodeCharPtr)
	if err == nil {
		if pIconv == nil {
			err = NilIconvPointer
			return
		}
		if fallbackPolicy == DISCARD_UNRECOGNIZED {
			ic = &Iconv{pIconv: pIconv, fallbackPolicy: fallbackPolicy, fallback: fallbackDiscardUnrecognized}
		} else if fallbackPolicy == KEEP_UNRECOGNIZED {
			ic = &Iconv{pIconv: pIconv, fallbackPolicy: fallbackPolicy, fallback: fallbackKeepIntactUnrecognized}
		} else if fallbackPolicy == NEXT_ENC_UNRECOGNIZED {
			ic = &Iconv{pIconv: pIconv, fallbackPolicy: fallbackPolicy}
		} else {
			err = InvalidFallbackPolicy
		}
	}
	return
}
Beispiel #3
0
// Internal helper function, wrapped by several other functions
func convertWcharToGoRune(w Wchar) (output rune, err error) {
	// return  if len(input) == 0
	if w == 0 {
		return '\000', nil
	}

	// open iconv
	iconv, errno := C.iconv_open(iconvCharsetUtf8, iconvCharsetWchar)
	if iconv == nil || errno != nil {
		return '\000', fmt.Errorf("Could not open iconv instance: %s", errno.Error())
	}
	defer C.iconv_close(iconv)

	// split Wchar into bytes
	wcharAsBytes := make([]byte, 4)
	binary.LittleEndian.PutUint32(wcharAsBytes, uint32(w))

	// place the wcharAsBytes into wcharAsCChars
	// TODO: use unsafe.Pointer here to do the conversion?
	wcharAsCChars := make([]C.char, 0, 4)
	for i := 0; i < 4; i++ {
		wcharAsCChars = append(wcharAsCChars, C.char(wcharAsBytes[i]))
	}

	// pointer to the first wcharAsCChars
	wcharAsCCharsPtr := &wcharAsCChars[0]

	// calculate buffer size for input
	bytesLeftInCSize := C.size_t(4)

	// calculate buffer size for output
	bytesLeftOutCSize := C.size_t(4)

	// create output buffer
	outputChars := make([]C.char, 4)

	// output buffer pointer for C
	outputCharsPtr := &outputChars[0]

	// call iconv for conversion of charsets
	_, errno = C.iconv(iconv, &wcharAsCCharsPtr, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize)
	if errno != nil {
		return '\000', errno
	}

	// convert outputChars ([]int8, len 4) to Wchar
	// TODO: can this conversion be done easier by using this: ?
	// output = *((*rune)(unsafe.Pointer(&outputChars[0])))
	runeAsByteAry := make([]byte, 4)
	runeAsByteAry[0] = byte(outputChars[0])
	runeAsByteAry[1] = byte(outputChars[1])
	runeAsByteAry[2] = byte(outputChars[2])
	runeAsByteAry[3] = byte(outputChars[3])

	// combine 4 position byte slice into uint32 and convert to rune.
	runeAsUint32 := binary.LittleEndian.Uint32(runeAsByteAry)
	output = rune(runeAsUint32)

	return output, nil
}
Beispiel #4
0
func Open(tocode string, fromcode string) (cd Iconv, err error) {
	ret, err := C.iconv_open(C.CString(tocode), C.CString(fromcode))
	if err != nil {
		return
	}
	cd = Iconv{ret}
	return
}
Beispiel #5
0
func NewIConv(from, to string) (*IConv, error) {
	cfrom, cto := C.CString(from), C.CString(to)
	defer C.free(unsafe.Pointer(cfrom))
	defer C.free(unsafe.Pointer(cto))

	h, err := C.iconv_open(cto, cfrom)
	if err != nil {
		return nil, err
	}

	return &IConv{h}, nil
}
Beispiel #6
0
// Internal helper function, wrapped by other functions
func convertGoRuneToWchar(r rune) (output Wchar, err error) {
	// quick return when input is an empty string
	if r == '\000' {
		return Wchar(0), nil
	}

	// open iconv
	iconv, errno := C.iconv_open(iconvCharsetWchar, iconvCharsetUtf8)
	if iconv == nil || errno != nil {
		return Wchar(0), fmt.Errorf("Could not open iconv instance: %s", errno)
	}
	defer C.iconv_close(iconv)

	// bufferSizes for C
	bytesLeftInCSize := C.size_t(4)
	bytesLeftOutCSize := C.size_t(4 * 4)
	// TODO/FIXME: the last 4 bytes as indicated by bytesLeftOutCSize wont be used...
	// iconv assumes each given char to be one wchar.
	// in this case we know that the given 4 chars will actually be one unicode-point and therefore will result in one wchar.
	// hence, we give the iconv library a buffer of 4 char's size, and tell the library that it has a buffer of 32 char's size.
	// if the rune would actually contain 2 unicode-point's this will result in massive failure (and probably the end of a process' life)

	// input for C. makes a copy using C malloc and therefore should be free'd.
	runeCString := C.CString(string(r))
	defer C.free(unsafe.Pointer(runeCString))

	// create output buffer
	outputChars := make([]C.char, 4)

	// output buffer pointer for C
	outputCharsPtr := &outputChars[0]

	// call iconv for conversion of charsets
	_, errno = C.iconv(iconv, &runeCString, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize)
	if errno != nil {
		return '\000', errno
	}

	// convert C.char's to Wchar
	wcharAsByteAry := make([]byte, 4)
	wcharAsByteAry[0] = byte(outputChars[0])
	wcharAsByteAry[1] = byte(outputChars[1])
	wcharAsByteAry[2] = byte(outputChars[2])
	wcharAsByteAry[3] = byte(outputChars[3])

	// combine 4 position byte slice into uint32 and convert to Wchar.
	wcharAsUint32 := binary.LittleEndian.Uint32(wcharAsByteAry)
	output = Wchar(wcharAsUint32)

	return output, nil
}
Beispiel #7
0
func Open(tocode string, fromcode string) (cd Iconv, err error) {
	ctocode := C.CString(tocode)
	cfromcode := C.CString(fromcode)
	defer func() {
		C.free(unsafe.Pointer(ctocode))
		C.free(unsafe.Pointer(cfromcode))
	}()
	ret, err := C.iconv_open(ctocode, cfromcode)
	if err != nil {
		return
	}
	cd = Iconv{ret}
	return
}
Beispiel #8
0
func Open(tocode string, fromcode string) (cd Iconv, err error) {

	tocode1 := C.CString(tocode)
	defer C.free(unsafe.Pointer(tocode1))

	fromcode1 := C.CString(fromcode)
	defer C.free(unsafe.Pointer(fromcode1))

	ret, err := C.iconv_open(tocode1, fromcode1)
	if err != nil {
		return
	}
	cd = Iconv{ret}
	return
}
Beispiel #9
0
// Translator returns a Translator that translates between
// the named character sets. When an invalid multibyte
// character is found, the bytes in invalid are substituted instead.
func Translator(toCharset, fromCharset string, invalid rune) (charset.Translator, error) {
	cto, cfrom := C.CString(toCharset), C.CString(fromCharset)
	cd, err := C.iconv_open(cto, cfrom)

	C.free(unsafe.Pointer(cfrom))
	C.free(unsafe.Pointer(cto))

	if cd == C.iconv_open_error {
		if err == syscall.EINVAL {
			return nil, errors.New("iconv: conversion not supported")
		}
		return nil, err
	}
	t := &iconvTranslator{cd: cd, invalid: invalid}
	runtime.SetFinalizer(t, func(*iconvTranslator) {
		C.iconv_close(cd)
	})
	return t, nil
}
Beispiel #10
0
// Initialize a new Converter. If fromEncoding or toEncoding are not supported by
// iconv then an EINVAL error will be returned. An ENOMEM error maybe returned if
// there is not enough memory to initialize an iconv descriptor
func NewConverter(fromEncoding string, toEncoding string) (converter *Converter, err error) {
	converter = new(Converter)

	// convert to C strings
	toEncodingC := C.CString(toEncoding)
	fromEncodingC := C.CString(fromEncoding)

	// open an iconv descriptor
	converter.context, err = C.iconv_open(toEncodingC, fromEncodingC)

	// free the C Strings
	C.free(unsafe.Pointer(toEncodingC))
	C.free(unsafe.Pointer(fromEncodingC))

	// check err
	if err == nil {
		// no error, mark the context as open
		converter.open = true
	}

	return
}
Beispiel #11
0
// Internal helper function, wrapped by several other functions
func convertWcharStringToGoString(ws WcharString) (output string, err error) {
	// return empty string if len(input) == 0
	if len(ws) == 0 {
		return "", nil
	}

	// open iconv
	iconv, errno := C.iconv_open(iconvCharsetUtf8, iconvCharsetWchar)
	if iconv == nil || errno != nil {
		return "", fmt.Errorf("Could not open iconv instance: %s", errno.Error())
	}
	defer C.iconv_close(iconv)

	inputAsCChars := make([]C.char, 0, len(ws)*4)
	wcharAsBytes := make([]byte, 4)
	for _, nextWchar := range ws {
		// find null terminator
		if nextWchar == 0 {
			// Return empty string if there are no chars in buffer
			//++ FIXME: this should NEVER be the case because input is checked at the begin of this function.
			if len(inputAsCChars) == 0 {
				return "", nil
			}
			break
		}

		// split Wchar into bytes
		binary.LittleEndian.PutUint32(wcharAsBytes, uint32(nextWchar))

		// append the bytes as C.char to inputAsCChars
		for i := 0; i < 4; i++ {
			inputAsCChars = append(inputAsCChars, C.char(wcharAsBytes[i]))
		}
	}

	// input for C
	inputAsCCharsPtr := &inputAsCChars[0]

	// calculate buffer size for input
	bytesLeftInCSize := C.size_t(len(inputAsCChars))

	// calculate buffer size for output
	bytesLeftOutCSize := C.size_t(len(inputAsCChars))

	// create output buffer
	outputChars := make([]C.char, bytesLeftOutCSize)

	// output buffer pointer for C
	outputCharsPtr := &outputChars[0]

	// call iconv for conversion of charsets, return on error
	_, errno = C.iconv(iconv, &inputAsCCharsPtr, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize)
	if errno != nil {
		return "", errno
	}

	// conver output buffer to go string
	output = C.GoString((*C.char)(&outputChars[0]))

	return output, nil
}
Beispiel #12
0
// Internal helper function, wrapped by several other functions
func convertGoStringToWcharString(input string) (output WcharString, err error) {
	// quick return when input is an empty string
	if input == "" {
		return NewWcharString(0), nil
	}

	// open iconv
	iconv, errno := C.iconv_open(iconvCharsetWchar, iconvCharsetUtf8)
	if iconv == nil || errno != nil {
		return nil, fmt.Errorf("Could not open iconv instance: %s", errno)
	}
	defer C.iconv_close(iconv)

	// calculate bufferSizes in bytes for C
	bytesLeftInCSize := C.size_t(len([]byte(input))) // count exact amount of bytes from input
	bytesLeftOutCSize := C.size_t(len(input) * 4)    // wide char seems to be 4 bytes for every single- or multi-byte character. Not very sure though.

	// input for C. makes a copy using C malloc and therefore should be free'd.
	inputCString := C.CString(input)
	defer C.free(unsafe.Pointer(inputCString))

	// create output buffer
	outputChars := make([]int8, len(input)*4)

	// output for C
	outputCString := (*C.char)(unsafe.Pointer(&outputChars[0]))

	// call iconv for conversion of charsets, return on error
	_, errno = C.iconv(iconv, &inputCString, &bytesLeftInCSize, &outputCString, &bytesLeftOutCSize)
	if errno != nil {
		return nil, errno
	}

	// convert []int8 to WcharString
	// create WcharString with same length as input, and one extra position for the null terminator.
	output = make(WcharString, 0, len(input)+1)
	// create buff to convert each outputChar
	wcharAsByteAry := make([]byte, 4)
	// loop for as long as there are output chars
	for len(outputChars) >= 4 {
		// create 4 position byte slice
		wcharAsByteAry[0] = byte(outputChars[0])
		wcharAsByteAry[1] = byte(outputChars[1])
		wcharAsByteAry[2] = byte(outputChars[2])
		wcharAsByteAry[3] = byte(outputChars[3])
		// combine 4 position byte slice into uint32
		wcharAsUint32 := binary.LittleEndian.Uint32(wcharAsByteAry)
		// find null terminator (doing this right?)
		if wcharAsUint32 == 0x0 {
			break
		}
		// append uint32 to outputUint32
		output = append(output, Wchar(wcharAsUint32))
		// reslice the outputChars
		outputChars = outputChars[4:]
	}
	// Add null terminator
	output = append(output, Wchar(0x0))

	return output, nil
}
Beispiel #13
0
// Create a Iconv instance, convert codec from fromcode to tocode.
func NewIconv(tocode, fromcode string) (i Iconv, err error) {
	i.p, err = C.iconv_open(C.CString(tocode), C.CString(fromcode))
	return
}