Exemple #1
0
func (ic *Iconv) convert(input []byte, out io.Writer, outBuf []byte) (bytesConverted int, err error) {
	inputLen := len(input)
	if inputLen == 0 {
		return
	}

	outputLen := len(outBuf)
	if outputLen == 0 {
		outputLen = inputLen
		outBuf = make([]byte, outputLen)
	}

	outputPtr := &outBuf[0]
	outputPtrPtr := (**C.char)(unsafe.Pointer(&outputPtr))
	outputBytesLeft := C.size_t(outputLen)

	inputPtr := &input[0]
	inputPtrPtr := (**C.char)(unsafe.Pointer(&inputPtr))
	inputBytesLeft := C.size_t(inputLen)

	_, err = C.iconv(ic.pIconv, inputPtrPtr, &inputBytesLeft, outputPtrPtr, &outputBytesLeft)
	bytesConverted = inputLen - int(inputBytesLeft)
	if int(outputBytesLeft) < outputLen {
		out.Write(outBuf[:outputLen-int(outputBytesLeft)])
	}
	return
}
Exemple #2
0
func (iconv *IConv) ConvertBytes(inbuf []byte) ([]byte, error) {
	inbufp := (*C.char)(unsafe.Pointer(&inbuf[0]))
	inbytesleft := C.size_t(len(inbuf))
	outbufCap := len(inbuf) * 2
	if outbufCap < 10 {
		// too small outbuf makes C.iconv returning always E2BIG.
		outbufCap += 10
	}
	outbuf := make([]byte, outbufCap)
	var buf bytes.Buffer
	var err error

	for inbytesleft > 0 {
		outbufp := (*C.char)(unsafe.Pointer(&outbuf[0]))
		outbytesleft := C.size_t(cap(outbuf))

		_, err = C.iconv(iconv.h, &inbufp, &inbytesleft, &outbufp, &outbytesleft)
		buf.Write(outbuf[:outbufCap-int(outbytesleft)])
		if err != syscall.E2BIG {
			break
		}
	}

	return buf.Bytes(), err
}
Exemple #3
0
// Use the codec to convert a string
func (cd *Iconv) Conv(input string) (result string, err error) {
	var buf bytes.Buffer

	if len(input) == 0 {
		return "", nil
	}

	inbuf := []byte(input)
	outbuf := make([]byte, bufSize)
	inbytes := C.size_t(len(inbuf))
	inptr := &inbuf[0]

	for inbytes > 0 {
		outbytes := C.size_t(len(outbuf))
		outptr := &outbuf[0]
		_, err = C.iconv(cd.pointer,
			(**C.char)(unsafe.Pointer(&inptr)), &inbytes,
			(**C.char)(unsafe.Pointer(&outptr)), &outbytes)
		buf.Write(outbuf[:len(outbuf)-int(outbytes)])
		if err != nil && err != syscall.E2BIG {
			return buf.String(), err
		}
	}

	return buf.String(), nil
}
Exemple #4
0
func (cd *Iconv) Conv(input string) (result string, err os.Error) {
	var buf bytes.Buffer

	if len(input) == 0 {
		return "", nil
	}

	inbuf := []byte(input)
	outbuf := make([]byte, len(inbuf))
	inbytes := C.size_t(len(inbuf))
	inptr := &inbuf[0]

	for inbytes > 0 {
		prev_inbytes := inbytes
		outbytes := C.size_t(len(outbuf))
		outptr := &outbuf[0]
		_, err = C.iconv(cd.pointer,
			(**C.char)(unsafe.Pointer(&inptr)), &inbytes,
			(**C.char)(unsafe.Pointer(&outptr)), &outbytes)
		buf.Write(outbuf[:len(outbuf)-int(outbytes)])
		if err != nil {
			if err == E2BIG {
				if prev_inbytes == inbytes {
					// Couldn't progress because the output doesn't fit in the buffer, should grow the buffer
					outbuf = make([]byte, len(outbuf)*2)
				}
			} else {
				return buf.String(), err
			}
		}
	}

	return buf.String(), nil
}
Exemple #5
0
// Internal helper function, wrapped by several other functions
func convertWcharToGoRune(w Wchar) (output rune, err error) {
	// return  if len(input) == 0
	if w == 0 {
		return '\000', nil
	}

	// open iconv
	iconv, errno := C.iconv_open(iconvCharsetUtf8, iconvCharsetWchar)
	if iconv == nil || errno != nil {
		return '\000', fmt.Errorf("Could not open iconv instance: %s", errno.Error())
	}
	defer C.iconv_close(iconv)

	// split Wchar into bytes
	wcharAsBytes := make([]byte, 4)
	binary.LittleEndian.PutUint32(wcharAsBytes, uint32(w))

	// place the wcharAsBytes into wcharAsCChars
	// TODO: use unsafe.Pointer here to do the conversion?
	wcharAsCChars := make([]C.char, 0, 4)
	for i := 0; i < 4; i++ {
		wcharAsCChars = append(wcharAsCChars, C.char(wcharAsBytes[i]))
	}

	// pointer to the first wcharAsCChars
	wcharAsCCharsPtr := &wcharAsCChars[0]

	// calculate buffer size for input
	bytesLeftInCSize := C.size_t(4)

	// calculate buffer size for output
	bytesLeftOutCSize := C.size_t(4)

	// create output buffer
	outputChars := make([]C.char, 4)

	// output buffer pointer for C
	outputCharsPtr := &outputChars[0]

	// call iconv for conversion of charsets
	_, errno = C.iconv(iconv, &wcharAsCCharsPtr, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize)
	if errno != nil {
		return '\000', errno
	}

	// convert outputChars ([]int8, len 4) to Wchar
	// TODO: can this conversion be done easier by using this: ?
	// output = *((*rune)(unsafe.Pointer(&outputChars[0])))
	runeAsByteAry := make([]byte, 4)
	runeAsByteAry[0] = byte(outputChars[0])
	runeAsByteAry[1] = byte(outputChars[1])
	runeAsByteAry[2] = byte(outputChars[2])
	runeAsByteAry[3] = byte(outputChars[3])

	// combine 4 position byte slice into uint32 and convert to rune.
	runeAsUint32 := binary.LittleEndian.Uint32(runeAsByteAry)
	output = rune(runeAsUint32)

	return output, nil
}
Exemple #6
0
func (iconv *IConv) IConv(inbuf, outbuf []byte) (inleftbytes, outleftbytes []byte, err error) {
	inbufp, outbufp := (*C.char)(unsafe.Pointer(&inbuf[0])), (*C.char)(unsafe.Pointer(&outbuf[0]))
	inbufLen, outbufCap := len(inbuf), cap(outbuf)
	inbytesleft, outbytesleft := C.size_t(inbufLen), C.size_t(outbufCap)

	_, err = C.iconv(iconv.h, &inbufp, &inbytesleft, &outbufp, &outbytesleft)
	return inbuf[inbufLen-int(inbytesleft):], outbuf[:outbufCap-int(outbytesleft)], err
}
Exemple #7
0
// Do convert from in to out.
func (i Iconv) Conv(in, out []byte) (inlen int, outlen int, err error) {
	insize, outsize := C.size_t(len(in)), C.size_t(len(out))
	inptr, outptr := &in[0], &out[0]
	_, err = C.iconv(i.p,
		(**C.char)(unsafe.Pointer(&inptr)), &insize,
		(**C.char)(unsafe.Pointer(&outptr)), &outsize)
	inlen, outlen = len(in)-int(insize), len(out)-int(outsize)
	return
}
Exemple #8
0
// Internal helper function, wrapped by other functions
func convertGoRuneToWchar(r rune) (output Wchar, err error) {
	// quick return when input is an empty string
	if r == '\000' {
		return Wchar(0), nil
	}

	// open iconv
	iconv, errno := C.iconv_open(iconvCharsetWchar, iconvCharsetUtf8)
	if iconv == nil || errno != nil {
		return Wchar(0), fmt.Errorf("Could not open iconv instance: %s", errno)
	}
	defer C.iconv_close(iconv)

	// bufferSizes for C
	bytesLeftInCSize := C.size_t(4)
	bytesLeftOutCSize := C.size_t(4 * 4)
	// TODO/FIXME: the last 4 bytes as indicated by bytesLeftOutCSize wont be used...
	// iconv assumes each given char to be one wchar.
	// in this case we know that the given 4 chars will actually be one unicode-point and therefore will result in one wchar.
	// hence, we give the iconv library a buffer of 4 char's size, and tell the library that it has a buffer of 32 char's size.
	// if the rune would actually contain 2 unicode-point's this will result in massive failure (and probably the end of a process' life)

	// input for C. makes a copy using C malloc and therefore should be free'd.
	runeCString := C.CString(string(r))
	defer C.free(unsafe.Pointer(runeCString))

	// create output buffer
	outputChars := make([]C.char, 4)

	// output buffer pointer for C
	outputCharsPtr := &outputChars[0]

	// call iconv for conversion of charsets
	_, errno = C.iconv(iconv, &runeCString, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize)
	if errno != nil {
		return '\000', errno
	}

	// convert C.char's to Wchar
	wcharAsByteAry := make([]byte, 4)
	wcharAsByteAry[0] = byte(outputChars[0])
	wcharAsByteAry[1] = byte(outputChars[1])
	wcharAsByteAry[2] = byte(outputChars[2])
	wcharAsByteAry[3] = byte(outputChars[3])

	// combine 4 position byte slice into uint32 and convert to Wchar.
	wcharAsUint32 := binary.LittleEndian.Uint32(wcharAsByteAry)
	output = Wchar(wcharAsUint32)

	return output, nil
}
Exemple #9
0
// Convert bytes from an input byte slice into a give output byte slice
//
// As many bytes that can converted and fit into the size of output will be
// processed and the number of bytes read for input as well as the number of
// bytes written to output will be returned. If not all converted bytes can fit
// into output and E2BIG error will also be returned. If input contains an invalid
// sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned
//
// For shift based output encodings, any end shift byte sequences can be generated by
// passing a 0 length byte slice as input. Also passing a 0 length byte slice for output
// will simply reset the iconv descriptor shift state without writing any bytes.
func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) {
	// make sure we are still open
	if this.open {
		inputLeft := C.size_t(len(input))
		outputLeft := C.size_t(len(output))

		if inputLeft > 0 && outputLeft > 0 {
			// we have to give iconv a pointer to a pointer of the underlying
			// storage of each byte slice - so far this is the simplest
			// way i've found to do that in Go, but it seems ugly
			inputPointer := (*C.char)(unsafe.Pointer(&input[0]))
			outputPointer := (*C.char)(unsafe.Pointer(&output[0]))

			_, err = C.iconv(this.context, &inputPointer, &inputLeft, &outputPointer, &outputLeft)

			// update byte counters
			bytesRead = len(input) - int(inputLeft)
			bytesWritten = len(output) - int(outputLeft)
		} else if inputLeft == 0 && outputLeft > 0 {
			// inputPointer will be nil, outputPointer is generated as above
			outputPointer := (*C.char)(unsafe.Pointer(&output[0]))

			_, err = C.iconv(this.context, nil, &inputLeft, &outputPointer, &outputLeft)

			// update write byte counter
			bytesWritten = len(output) - int(outputLeft)
		} else {
			// both input and output are zero length, do a shift state reset
			_, err = C.iconv(this.context, nil, &inputLeft, nil, &outputLeft)
		}
	} else {
		err = syscall.EBADF
	}

	return bytesRead, bytesWritten, err
}
Exemple #10
0
func (cd Iconv) Do(inbuf []byte, in int, outbuf []byte) (out, inleft int, err error) {

	if in == 0 {
		return
	}

	inbytes := C.size_t(in)
	inptr := &inbuf[0]

	outbytes := C.size_t(len(outbuf))
	outptr := &outbuf[0]
	_, err = C.iconv(cd.Handle,
		(**C.char)(unsafe.Pointer(&inptr)), &inbytes,
		(**C.char)(unsafe.Pointer(&outptr)), &outbytes)

	out = len(outbuf) - int(outbytes)
	inleft = int(inbytes)
	return
}
Exemple #11
0
func (p *iconvTranslator) Translate(data []byte, eof bool) (rn int, rd []byte, rerr error) {
	n := 0
	p.scratch = p.scratch[:0]
	for len(data) > 0 {
		p.scratch = ensureCap(p.scratch, len(p.scratch)+len(data)*utf8.UTFMax)
		cData := (*C.char)(unsafe.Pointer(&data[:1][0]))
		nData := C.size_t(len(data))

		ns := len(p.scratch)
		cScratch := (*C.char)(unsafe.Pointer(&p.scratch[ns : ns+1][0]))
		nScratch := C.size_t(cap(p.scratch) - ns)
		r, err := C.iconv(p.cd, &cData, &nData, &cScratch, &nScratch)

		p.scratch = p.scratch[0 : cap(p.scratch)-int(nScratch)]
		n += len(data) - int(nData)
		data = data[len(data)-int(nData):]

		if r != C.iconv_error || err == nil {
			return n, p.scratch, nil
		}
		switch err := err.(syscall.Errno); err {
		case C.EILSEQ:
			// invalid multibyte sequence - skip one byte and continue
			p.scratch = appendRune(p.scratch, p.invalid)
			n++
			data = data[1:]
		case C.EINVAL:
			// incomplete multibyte sequence
			return n, p.scratch, nil
		case C.E2BIG:
			// output buffer not large enough; try again with larger buffer.
			p.scratch = ensureCap(p.scratch, cap(p.scratch)+utf8.UTFMax)
		default:
			panic(fmt.Sprintf("unexpected error code: %v", err))
		}
	}
	return n, p.scratch, nil
}
Exemple #12
0
func (cd Iconv) DoWrite(w io.Writer, inbuf []byte, in int, outbuf []byte) (inleft int, err error) {

	if in == 0 {
		return
	}

	inbytes := C.size_t(in)
	inptr := &inbuf[0]

	for inbytes > 0 {
		outbytes := C.size_t(len(outbuf))
		outptr := &outbuf[0]
		_, err = C.iconv(cd.Handle,
			(**C.char)(unsafe.Pointer(&inptr)), &inbytes,
			(**C.char)(unsafe.Pointer(&outptr)), &outbytes)
		w.Write(outbuf[:len(outbuf)-int(outbytes)])
		if err != nil && err != E2BIG {
			return int(inbytes), err
		}
	}

	return 0, nil
}
Exemple #13
0
// Internal helper function, wrapped by several other functions
func convertWcharStringToGoString(ws WcharString) (output string, err error) {
	// return empty string if len(input) == 0
	if len(ws) == 0 {
		return "", nil
	}

	// open iconv
	iconv, errno := C.iconv_open(iconvCharsetUtf8, iconvCharsetWchar)
	if iconv == nil || errno != nil {
		return "", fmt.Errorf("Could not open iconv instance: %s", errno.Error())
	}
	defer C.iconv_close(iconv)

	inputAsCChars := make([]C.char, 0, len(ws)*4)
	wcharAsBytes := make([]byte, 4)
	for _, nextWchar := range ws {
		// find null terminator
		if nextWchar == 0 {
			// Return empty string if there are no chars in buffer
			//++ FIXME: this should NEVER be the case because input is checked at the begin of this function.
			if len(inputAsCChars) == 0 {
				return "", nil
			}
			break
		}

		// split Wchar into bytes
		binary.LittleEndian.PutUint32(wcharAsBytes, uint32(nextWchar))

		// append the bytes as C.char to inputAsCChars
		for i := 0; i < 4; i++ {
			inputAsCChars = append(inputAsCChars, C.char(wcharAsBytes[i]))
		}
	}

	// input for C
	inputAsCCharsPtr := &inputAsCChars[0]

	// calculate buffer size for input
	bytesLeftInCSize := C.size_t(len(inputAsCChars))

	// calculate buffer size for output
	bytesLeftOutCSize := C.size_t(len(inputAsCChars))

	// create output buffer
	outputChars := make([]C.char, bytesLeftOutCSize)

	// output buffer pointer for C
	outputCharsPtr := &outputChars[0]

	// call iconv for conversion of charsets, return on error
	_, errno = C.iconv(iconv, &inputAsCCharsPtr, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize)
	if errno != nil {
		return "", errno
	}

	// conver output buffer to go string
	output = C.GoString((*C.char)(&outputChars[0]))

	return output, nil
}
Exemple #14
0
// Internal helper function, wrapped by several other functions
func convertGoStringToWcharString(input string) (output WcharString, err error) {
	// quick return when input is an empty string
	if input == "" {
		return NewWcharString(0), nil
	}

	// open iconv
	iconv, errno := C.iconv_open(iconvCharsetWchar, iconvCharsetUtf8)
	if iconv == nil || errno != nil {
		return nil, fmt.Errorf("Could not open iconv instance: %s", errno)
	}
	defer C.iconv_close(iconv)

	// calculate bufferSizes in bytes for C
	bytesLeftInCSize := C.size_t(len([]byte(input))) // count exact amount of bytes from input
	bytesLeftOutCSize := C.size_t(len(input) * 4)    // wide char seems to be 4 bytes for every single- or multi-byte character. Not very sure though.

	// input for C. makes a copy using C malloc and therefore should be free'd.
	inputCString := C.CString(input)
	defer C.free(unsafe.Pointer(inputCString))

	// create output buffer
	outputChars := make([]int8, len(input)*4)

	// output for C
	outputCString := (*C.char)(unsafe.Pointer(&outputChars[0]))

	// call iconv for conversion of charsets, return on error
	_, errno = C.iconv(iconv, &inputCString, &bytesLeftInCSize, &outputCString, &bytesLeftOutCSize)
	if errno != nil {
		return nil, errno
	}

	// convert []int8 to WcharString
	// create WcharString with same length as input, and one extra position for the null terminator.
	output = make(WcharString, 0, len(input)+1)
	// create buff to convert each outputChar
	wcharAsByteAry := make([]byte, 4)
	// loop for as long as there are output chars
	for len(outputChars) >= 4 {
		// create 4 position byte slice
		wcharAsByteAry[0] = byte(outputChars[0])
		wcharAsByteAry[1] = byte(outputChars[1])
		wcharAsByteAry[2] = byte(outputChars[2])
		wcharAsByteAry[3] = byte(outputChars[3])
		// combine 4 position byte slice into uint32
		wcharAsUint32 := binary.LittleEndian.Uint32(wcharAsByteAry)
		// find null terminator (doing this right?)
		if wcharAsUint32 == 0x0 {
			break
		}
		// append uint32 to outputUint32
		output = append(output, Wchar(wcharAsUint32))
		// reslice the outputChars
		outputChars = outputChars[4:]
	}
	// Add null terminator
	output = append(output, Wchar(0x0))

	return output, nil
}