func (ic *Iconv) convert(input []byte, out io.Writer, outBuf []byte) (bytesConverted int, err error) { inputLen := len(input) if inputLen == 0 { return } outputLen := len(outBuf) if outputLen == 0 { outputLen = inputLen outBuf = make([]byte, outputLen) } outputPtr := &outBuf[0] outputPtrPtr := (**C.char)(unsafe.Pointer(&outputPtr)) outputBytesLeft := C.size_t(outputLen) inputPtr := &input[0] inputPtrPtr := (**C.char)(unsafe.Pointer(&inputPtr)) inputBytesLeft := C.size_t(inputLen) _, err = C.iconv(ic.pIconv, inputPtrPtr, &inputBytesLeft, outputPtrPtr, &outputBytesLeft) bytesConverted = inputLen - int(inputBytesLeft) if int(outputBytesLeft) < outputLen { out.Write(outBuf[:outputLen-int(outputBytesLeft)]) } return }
func (iconv *IConv) ConvertBytes(inbuf []byte) ([]byte, error) { inbufp := (*C.char)(unsafe.Pointer(&inbuf[0])) inbytesleft := C.size_t(len(inbuf)) outbufCap := len(inbuf) * 2 if outbufCap < 10 { // too small outbuf makes C.iconv returning always E2BIG. outbufCap += 10 } outbuf := make([]byte, outbufCap) var buf bytes.Buffer var err error for inbytesleft > 0 { outbufp := (*C.char)(unsafe.Pointer(&outbuf[0])) outbytesleft := C.size_t(cap(outbuf)) _, err = C.iconv(iconv.h, &inbufp, &inbytesleft, &outbufp, &outbytesleft) buf.Write(outbuf[:outbufCap-int(outbytesleft)]) if err != syscall.E2BIG { break } } return buf.Bytes(), err }
// Use the codec to convert a string func (cd *Iconv) Conv(input string) (result string, err error) { var buf bytes.Buffer if len(input) == 0 { return "", nil } inbuf := []byte(input) outbuf := make([]byte, bufSize) inbytes := C.size_t(len(inbuf)) inptr := &inbuf[0] for inbytes > 0 { outbytes := C.size_t(len(outbuf)) outptr := &outbuf[0] _, err = C.iconv(cd.pointer, (**C.char)(unsafe.Pointer(&inptr)), &inbytes, (**C.char)(unsafe.Pointer(&outptr)), &outbytes) buf.Write(outbuf[:len(outbuf)-int(outbytes)]) if err != nil && err != syscall.E2BIG { return buf.String(), err } } return buf.String(), nil }
func (cd *Iconv) Conv(input string) (result string, err os.Error) { var buf bytes.Buffer if len(input) == 0 { return "", nil } inbuf := []byte(input) outbuf := make([]byte, len(inbuf)) inbytes := C.size_t(len(inbuf)) inptr := &inbuf[0] for inbytes > 0 { prev_inbytes := inbytes outbytes := C.size_t(len(outbuf)) outptr := &outbuf[0] _, err = C.iconv(cd.pointer, (**C.char)(unsafe.Pointer(&inptr)), &inbytes, (**C.char)(unsafe.Pointer(&outptr)), &outbytes) buf.Write(outbuf[:len(outbuf)-int(outbytes)]) if err != nil { if err == E2BIG { if prev_inbytes == inbytes { // Couldn't progress because the output doesn't fit in the buffer, should grow the buffer outbuf = make([]byte, len(outbuf)*2) } } else { return buf.String(), err } } } return buf.String(), nil }
// Internal helper function, wrapped by several other functions func convertWcharToGoRune(w Wchar) (output rune, err error) { // return if len(input) == 0 if w == 0 { return '\000', nil } // open iconv iconv, errno := C.iconv_open(iconvCharsetUtf8, iconvCharsetWchar) if iconv == nil || errno != nil { return '\000', fmt.Errorf("Could not open iconv instance: %s", errno.Error()) } defer C.iconv_close(iconv) // split Wchar into bytes wcharAsBytes := make([]byte, 4) binary.LittleEndian.PutUint32(wcharAsBytes, uint32(w)) // place the wcharAsBytes into wcharAsCChars // TODO: use unsafe.Pointer here to do the conversion? wcharAsCChars := make([]C.char, 0, 4) for i := 0; i < 4; i++ { wcharAsCChars = append(wcharAsCChars, C.char(wcharAsBytes[i])) } // pointer to the first wcharAsCChars wcharAsCCharsPtr := &wcharAsCChars[0] // calculate buffer size for input bytesLeftInCSize := C.size_t(4) // calculate buffer size for output bytesLeftOutCSize := C.size_t(4) // create output buffer outputChars := make([]C.char, 4) // output buffer pointer for C outputCharsPtr := &outputChars[0] // call iconv for conversion of charsets _, errno = C.iconv(iconv, &wcharAsCCharsPtr, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize) if errno != nil { return '\000', errno } // convert outputChars ([]int8, len 4) to Wchar // TODO: can this conversion be done easier by using this: ? // output = *((*rune)(unsafe.Pointer(&outputChars[0]))) runeAsByteAry := make([]byte, 4) runeAsByteAry[0] = byte(outputChars[0]) runeAsByteAry[1] = byte(outputChars[1]) runeAsByteAry[2] = byte(outputChars[2]) runeAsByteAry[3] = byte(outputChars[3]) // combine 4 position byte slice into uint32 and convert to rune. runeAsUint32 := binary.LittleEndian.Uint32(runeAsByteAry) output = rune(runeAsUint32) return output, nil }
func (iconv *IConv) IConv(inbuf, outbuf []byte) (inleftbytes, outleftbytes []byte, err error) { inbufp, outbufp := (*C.char)(unsafe.Pointer(&inbuf[0])), (*C.char)(unsafe.Pointer(&outbuf[0])) inbufLen, outbufCap := len(inbuf), cap(outbuf) inbytesleft, outbytesleft := C.size_t(inbufLen), C.size_t(outbufCap) _, err = C.iconv(iconv.h, &inbufp, &inbytesleft, &outbufp, &outbytesleft) return inbuf[inbufLen-int(inbytesleft):], outbuf[:outbufCap-int(outbytesleft)], err }
// Do convert from in to out. func (i Iconv) Conv(in, out []byte) (inlen int, outlen int, err error) { insize, outsize := C.size_t(len(in)), C.size_t(len(out)) inptr, outptr := &in[0], &out[0] _, err = C.iconv(i.p, (**C.char)(unsafe.Pointer(&inptr)), &insize, (**C.char)(unsafe.Pointer(&outptr)), &outsize) inlen, outlen = len(in)-int(insize), len(out)-int(outsize) return }
// Internal helper function, wrapped by other functions func convertGoRuneToWchar(r rune) (output Wchar, err error) { // quick return when input is an empty string if r == '\000' { return Wchar(0), nil } // open iconv iconv, errno := C.iconv_open(iconvCharsetWchar, iconvCharsetUtf8) if iconv == nil || errno != nil { return Wchar(0), fmt.Errorf("Could not open iconv instance: %s", errno) } defer C.iconv_close(iconv) // bufferSizes for C bytesLeftInCSize := C.size_t(4) bytesLeftOutCSize := C.size_t(4 * 4) // TODO/FIXME: the last 4 bytes as indicated by bytesLeftOutCSize wont be used... // iconv assumes each given char to be one wchar. // in this case we know that the given 4 chars will actually be one unicode-point and therefore will result in one wchar. // hence, we give the iconv library a buffer of 4 char's size, and tell the library that it has a buffer of 32 char's size. // if the rune would actually contain 2 unicode-point's this will result in massive failure (and probably the end of a process' life) // input for C. makes a copy using C malloc and therefore should be free'd. runeCString := C.CString(string(r)) defer C.free(unsafe.Pointer(runeCString)) // create output buffer outputChars := make([]C.char, 4) // output buffer pointer for C outputCharsPtr := &outputChars[0] // call iconv for conversion of charsets _, errno = C.iconv(iconv, &runeCString, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize) if errno != nil { return '\000', errno } // convert C.char's to Wchar wcharAsByteAry := make([]byte, 4) wcharAsByteAry[0] = byte(outputChars[0]) wcharAsByteAry[1] = byte(outputChars[1]) wcharAsByteAry[2] = byte(outputChars[2]) wcharAsByteAry[3] = byte(outputChars[3]) // combine 4 position byte slice into uint32 and convert to Wchar. wcharAsUint32 := binary.LittleEndian.Uint32(wcharAsByteAry) output = Wchar(wcharAsUint32) return output, nil }
// Convert bytes from an input byte slice into a give output byte slice // // As many bytes that can converted and fit into the size of output will be // processed and the number of bytes read for input as well as the number of // bytes written to output will be returned. If not all converted bytes can fit // into output and E2BIG error will also be returned. If input contains an invalid // sequence of bytes for the Converter's fromEncoding an EILSEQ error will be returned // // For shift based output encodings, any end shift byte sequences can be generated by // passing a 0 length byte slice as input. Also passing a 0 length byte slice for output // will simply reset the iconv descriptor shift state without writing any bytes. func (this *Converter) Convert(input []byte, output []byte) (bytesRead int, bytesWritten int, err error) { // make sure we are still open if this.open { inputLeft := C.size_t(len(input)) outputLeft := C.size_t(len(output)) if inputLeft > 0 && outputLeft > 0 { // we have to give iconv a pointer to a pointer of the underlying // storage of each byte slice - so far this is the simplest // way i've found to do that in Go, but it seems ugly inputPointer := (*C.char)(unsafe.Pointer(&input[0])) outputPointer := (*C.char)(unsafe.Pointer(&output[0])) _, err = C.iconv(this.context, &inputPointer, &inputLeft, &outputPointer, &outputLeft) // update byte counters bytesRead = len(input) - int(inputLeft) bytesWritten = len(output) - int(outputLeft) } else if inputLeft == 0 && outputLeft > 0 { // inputPointer will be nil, outputPointer is generated as above outputPointer := (*C.char)(unsafe.Pointer(&output[0])) _, err = C.iconv(this.context, nil, &inputLeft, &outputPointer, &outputLeft) // update write byte counter bytesWritten = len(output) - int(outputLeft) } else { // both input and output are zero length, do a shift state reset _, err = C.iconv(this.context, nil, &inputLeft, nil, &outputLeft) } } else { err = syscall.EBADF } return bytesRead, bytesWritten, err }
func (cd Iconv) Do(inbuf []byte, in int, outbuf []byte) (out, inleft int, err error) { if in == 0 { return } inbytes := C.size_t(in) inptr := &inbuf[0] outbytes := C.size_t(len(outbuf)) outptr := &outbuf[0] _, err = C.iconv(cd.Handle, (**C.char)(unsafe.Pointer(&inptr)), &inbytes, (**C.char)(unsafe.Pointer(&outptr)), &outbytes) out = len(outbuf) - int(outbytes) inleft = int(inbytes) return }
func (p *iconvTranslator) Translate(data []byte, eof bool) (rn int, rd []byte, rerr error) { n := 0 p.scratch = p.scratch[:0] for len(data) > 0 { p.scratch = ensureCap(p.scratch, len(p.scratch)+len(data)*utf8.UTFMax) cData := (*C.char)(unsafe.Pointer(&data[:1][0])) nData := C.size_t(len(data)) ns := len(p.scratch) cScratch := (*C.char)(unsafe.Pointer(&p.scratch[ns : ns+1][0])) nScratch := C.size_t(cap(p.scratch) - ns) r, err := C.iconv(p.cd, &cData, &nData, &cScratch, &nScratch) p.scratch = p.scratch[0 : cap(p.scratch)-int(nScratch)] n += len(data) - int(nData) data = data[len(data)-int(nData):] if r != C.iconv_error || err == nil { return n, p.scratch, nil } switch err := err.(syscall.Errno); err { case C.EILSEQ: // invalid multibyte sequence - skip one byte and continue p.scratch = appendRune(p.scratch, p.invalid) n++ data = data[1:] case C.EINVAL: // incomplete multibyte sequence return n, p.scratch, nil case C.E2BIG: // output buffer not large enough; try again with larger buffer. p.scratch = ensureCap(p.scratch, cap(p.scratch)+utf8.UTFMax) default: panic(fmt.Sprintf("unexpected error code: %v", err)) } } return n, p.scratch, nil }
func (cd Iconv) DoWrite(w io.Writer, inbuf []byte, in int, outbuf []byte) (inleft int, err error) { if in == 0 { return } inbytes := C.size_t(in) inptr := &inbuf[0] for inbytes > 0 { outbytes := C.size_t(len(outbuf)) outptr := &outbuf[0] _, err = C.iconv(cd.Handle, (**C.char)(unsafe.Pointer(&inptr)), &inbytes, (**C.char)(unsafe.Pointer(&outptr)), &outbytes) w.Write(outbuf[:len(outbuf)-int(outbytes)]) if err != nil && err != E2BIG { return int(inbytes), err } } return 0, nil }
// Internal helper function, wrapped by several other functions func convertWcharStringToGoString(ws WcharString) (output string, err error) { // return empty string if len(input) == 0 if len(ws) == 0 { return "", nil } // open iconv iconv, errno := C.iconv_open(iconvCharsetUtf8, iconvCharsetWchar) if iconv == nil || errno != nil { return "", fmt.Errorf("Could not open iconv instance: %s", errno.Error()) } defer C.iconv_close(iconv) inputAsCChars := make([]C.char, 0, len(ws)*4) wcharAsBytes := make([]byte, 4) for _, nextWchar := range ws { // find null terminator if nextWchar == 0 { // Return empty string if there are no chars in buffer //++ FIXME: this should NEVER be the case because input is checked at the begin of this function. if len(inputAsCChars) == 0 { return "", nil } break } // split Wchar into bytes binary.LittleEndian.PutUint32(wcharAsBytes, uint32(nextWchar)) // append the bytes as C.char to inputAsCChars for i := 0; i < 4; i++ { inputAsCChars = append(inputAsCChars, C.char(wcharAsBytes[i])) } } // input for C inputAsCCharsPtr := &inputAsCChars[0] // calculate buffer size for input bytesLeftInCSize := C.size_t(len(inputAsCChars)) // calculate buffer size for output bytesLeftOutCSize := C.size_t(len(inputAsCChars)) // create output buffer outputChars := make([]C.char, bytesLeftOutCSize) // output buffer pointer for C outputCharsPtr := &outputChars[0] // call iconv for conversion of charsets, return on error _, errno = C.iconv(iconv, &inputAsCCharsPtr, &bytesLeftInCSize, &outputCharsPtr, &bytesLeftOutCSize) if errno != nil { return "", errno } // conver output buffer to go string output = C.GoString((*C.char)(&outputChars[0])) return output, nil }
// Internal helper function, wrapped by several other functions func convertGoStringToWcharString(input string) (output WcharString, err error) { // quick return when input is an empty string if input == "" { return NewWcharString(0), nil } // open iconv iconv, errno := C.iconv_open(iconvCharsetWchar, iconvCharsetUtf8) if iconv == nil || errno != nil { return nil, fmt.Errorf("Could not open iconv instance: %s", errno) } defer C.iconv_close(iconv) // calculate bufferSizes in bytes for C bytesLeftInCSize := C.size_t(len([]byte(input))) // count exact amount of bytes from input bytesLeftOutCSize := C.size_t(len(input) * 4) // wide char seems to be 4 bytes for every single- or multi-byte character. Not very sure though. // input for C. makes a copy using C malloc and therefore should be free'd. inputCString := C.CString(input) defer C.free(unsafe.Pointer(inputCString)) // create output buffer outputChars := make([]int8, len(input)*4) // output for C outputCString := (*C.char)(unsafe.Pointer(&outputChars[0])) // call iconv for conversion of charsets, return on error _, errno = C.iconv(iconv, &inputCString, &bytesLeftInCSize, &outputCString, &bytesLeftOutCSize) if errno != nil { return nil, errno } // convert []int8 to WcharString // create WcharString with same length as input, and one extra position for the null terminator. output = make(WcharString, 0, len(input)+1) // create buff to convert each outputChar wcharAsByteAry := make([]byte, 4) // loop for as long as there are output chars for len(outputChars) >= 4 { // create 4 position byte slice wcharAsByteAry[0] = byte(outputChars[0]) wcharAsByteAry[1] = byte(outputChars[1]) wcharAsByteAry[2] = byte(outputChars[2]) wcharAsByteAry[3] = byte(outputChars[3]) // combine 4 position byte slice into uint32 wcharAsUint32 := binary.LittleEndian.Uint32(wcharAsByteAry) // find null terminator (doing this right?) if wcharAsUint32 == 0x0 { break } // append uint32 to outputUint32 output = append(output, Wchar(wcharAsUint32)) // reslice the outputChars outputChars = outputChars[4:] } // Add null terminator output = append(output, Wchar(0x0)) return output, nil }