func Convert(input []byte, from string, to string) ([]byte, error) { cfrom := C.CString(from) defer C.free(unsafe.Pointer(cfrom)) cto := C.CString(to) defer C.free(unsafe.Pointer(cto)) src := (*C.char)(unsafe.Pointer(&input[0])) srcLen := C.int32_t(len(input)) uErr := C.UErrorCode(C.U_ZERO_ERROR) // get dstLen // ignore ENOENT dstLen, _ := C.ucnv_convert(cto, cfrom, nil, 0, src, srcLen, &uErr) if uErr != C.U_BUFFER_OVERFLOW_ERROR { return nil, uErrorToGoError(uErr) } uErr = C.UErrorCode(C.U_ZERO_ERROR) output := make([]byte, int(dstLen)) dst := (*C.char)(unsafe.Pointer(&output[0])) dstLen, err := C.ucnv_convert(cto, cfrom, dst, dstLen, src, srcLen, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } return output, nil }
func (detector *Detector) Run(input []byte) (*Charset, error) { if len(input) == 0 { return nil, ErrEmptyInput } cinput := (*C.char)(unsafe.Pointer(&input[0])) cinputLen := C.size_t(len(input)) mime, err := detector.detectMime(cinput, cinputLen) if err != nil { return nil, err } if !strings.HasPrefix(mime, "text") { return makeCharset(nil, mime) } uErr := C.UErrorCode(C.U_ZERO_ERROR) uCharsetMatch, err := C.ucsd_run(detector.ucsd, (*C.char)(cinput), cinputLen, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } return makeCharset(uCharsetMatch, mime) }
func doICU(tag, caser, input string) string { err := C.UErrorCode(0) loc := C.CString(tag) cm := C.ucasemap_open(loc, C.uint32_t(0), &err) buf := make([]byte, len(input)*4) dst := (*C.char)(unsafe.Pointer(&buf[0])) src := C.CString(input) cn := C.int32_t(0) switch caser { case "fold": cn = C.ucasemap_utf8FoldCase(cm, dst, C.int32_t(len(buf)), src, C.int32_t(len(input)), &err) case "lower": cn = C.ucasemap_utf8ToLower(cm, dst, C.int32_t(len(buf)), src, C.int32_t(len(input)), &err) case "upper": cn = C.ucasemap_utf8ToUpper(cm, dst, C.int32_t(len(buf)), src, C.int32_t(len(input)), &err) case "title": cn = C.ucasemap_utf8ToTitle(cm, dst, C.int32_t(len(buf)), src, C.int32_t(len(input)), &err) } return string(buf[:cn]) }
func ucsdOpen() (*C.UCharsetDetector, error) { uErr := C.UErrorCode(C.U_ZERO_ERROR) ucsd, err := C.ucsdet_open(&uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } return ucsd, nil }
func (detector *Detector) RunAll(input []byte) ([]*Charset, error) { if len(input) == 0 { return nil, ErrEmptyInput } cinput := (*C.char)(unsafe.Pointer(&input[0])) cinputLen := C.size_t(len(input)) mime, err := detector.detectMime(cinput, cinputLen) if err != nil { return nil, err } if !strings.HasPrefix(mime, "text") { charset, err := makeCharset(nil, mime) if err != nil { return nil, err } return []*Charset{charset}, nil } uErr := C.UErrorCode(C.U_ZERO_ERROR) var matchesFound C.int32_t uCharsetMatches, err := C.ucsd_runAll(detector.ucsd, &matchesFound, (*C.char)(cinput), cinputLen, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } length := int(matchesFound) var umatches []*C.UCharsetMatch var matches []*Charset sliceHeader := (*reflect.SliceHeader)((unsafe.Pointer(&umatches))) sliceHeader.Cap = length sliceHeader.Len = length sliceHeader.Data = uintptr(unsafe.Pointer(uCharsetMatches)) for _, uCharsetMatch := range umatches { charset, err := makeCharset(uCharsetMatch, mime) if err != nil { return nil, err } matches = append(matches, charset) } return matches, nil }
func ucnvOpen(encoding string) (*C.UConverter, error) { cencoding := C.CString(encoding) defer C.free(unsafe.Pointer(cencoding)) uErr := C.UErrorCode(C.U_ZERO_ERROR) ucnv, err := C.ucnv_open(cencoding, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } return ucnv, nil }
func (c *icuUTF8iter) Key(s Input) []byte { err := C.UErrorCode(0) state := [2]C.uint32_t{} C.uiter_setUTF8(&c.a, icuCharP(s.UTF8), icuSLen(s.UTF8)) bp, bn := c.buf() n := C.ucol_nextSortKeyPart(c.col, &c.a, &(state[0]), bp, bn, &err) if n >= bn { // Force failure. if c.extendBuf(n+1) != nil { log.Fatal("expected extension to fail") } return c.Key(s) } return c.extendBuf(n) }
func (detector *Detector) SetDeclaredEncoding(encoding string) error { cencoding := C.CString(encoding) defer C.free(unsafe.Pointer(cencoding)) uErr := C.UErrorCode(C.U_ZERO_ERROR) _, err := C.ucsdet_setDeclaredEncoding(detector.ucsd, cencoding, C.int32_t(len(encoding)), &uErr) if err != nil { return err } if err = uErrorToGoError(uErr); err != nil { return err } return nil }
func (c *icuCollator) init(locale string) error { err := C.UErrorCode(0) c.loc = C.CString(locale) c.col = C.ucol_open(c.loc, &err) if err > 0 { return fmt.Errorf("failed opening collator for %q", locale) } else if err < 0 { loc := C.ucol_getLocaleByType(c.col, 0, &err) fmt, ok := map[int]string{ -127: "warning: using default collator: %s", -128: "warning: using fallback collator: %s", }[int(err)] if ok { log.Printf(fmt, C.GoString(loc)) } } c.keyBuf = make([]byte, 0, growBufSize) return nil }
func makeCharset(uCharsetMatch *C.UCharsetMatch, mime string) (*Charset, error) { if uCharsetMatch == nil { charset := &Charset{ Confidence: 100, Mime: mime, } return charset, nil } uErr := C.UErrorCode(C.U_ZERO_ERROR) cname, err := C.ucsdet_getName(uCharsetMatch, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } cconfidence, err := C.ucsdet_getConfidence(uCharsetMatch, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } clang, err := C.ucsdet_getLanguage(uCharsetMatch, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } charset := &Charset{ Name: C.GoString(cname), Confidence: int(cconfidence), Language: C.GoString(clang), Mime: mime, } return charset, nil }
func (w *WriteCloser) Write(p []byte) (int, error) { plen := len(p) src := (*C.char)(unsafe.Pointer(&p[0])) srcLimit := (*C.char)(unsafe.Pointer(&p[plen])) uErr := C.UErrorCode(C.U_ZERO_ERROR) oMaxLen, err := ucnvMaxLen(plen, w.from, w.to) if err != nil { return 0, err } // reallocate write buffer if oMaxLen > cap(w.obuf) { w.obuf = make([]byte, oMaxLen*2) } dst := (*C.char)(unsafe.Pointer(&w.obuf[0])) dstLimit := (*C.char)(unsafe.Pointer(&w.obuf[oMaxLen])) dstStart := uintptr(unsafe.Pointer(dst)) // fill write buffer by C.ucnv_convertEx _, err = C.ucnv_convertEx(w.to, w.from, &dst, dstLimit, &src, srcLimit, nil, nil, nil, nil, C.UBool(1), C.UBool(1), &uErr) if err != nil { return 0, err } if err = uErrorToGoError(uErr); err != nil { return 0, err } w.olen = int(uintptr(unsafe.Pointer(dst)) - dstStart) n, err := w.w.Write(w.obuf[:w.olen]) if err != nil { return n, err } return n, nil }
func (detector *Detector) SupportedEncodings() ([]string, error) { uErr := C.UErrorCode(C.U_ZERO_ERROR) uenum, err := C.ucsdet_getAllDetectableCharsets(detector.ucsd, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } defer C.uenum_close(uenum) ccount, err := C.uenum_count(uenum, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } encodings := make([]string, 0) var length C.int32_t for i := int(ccount); i > 0; i-- { cencoding, err := C.uenum_next(uenum, &length, &uErr) if err != nil { return nil, err } if err = uErrorToGoError(uErr); err != nil { return nil, err } encodings = append(encodings, C.GoString(cencoding)) } return encodings, nil }
func (r *ReadCloser) Read(p []byte) (int, error) { plen := len(p) if plen == 0 { return 0, nil } n := 0 olen := r.oend - r.ostart // flush write buffer if exist if olen > 0 { if olen > plen { copy(p, r.obuf[r.ostart:plen]) r.ostart += plen return plen, nil } copy(p, r.obuf[r.ostart:r.oend]) r.ostart = 0 r.oend = 0 if olen == plen { return plen, nil } n = olen } // reallocate read buffer or set Len if plen != len(r.ibuf) { if plen > cap(r.ibuf) { r.ibuf = make([]byte, plen, plen*2) } else { r.ibuf = r.ibuf[:plen] } } // fill read buffer ilen, err := r.r.Read(r.ibuf) r.ilen = ilen if err != nil { return n, err } if ilen == 0 { return n, io.EOF } src := (*C.char)(unsafe.Pointer(&r.ibuf[0])) srcLimit := (*C.char)(unsafe.Pointer(&r.ibuf[ilen])) uErr := C.UErrorCode(C.U_ZERO_ERROR) oMaxLen, err := ucnvMaxLen(ilen, r.from, r.to) if err != nil { return n, err } // reallocate write buffer if oMaxLen > cap(r.obuf) { r.obuf = make([]byte, oMaxLen*2) } dst := (*C.char)(unsafe.Pointer(&r.obuf[0])) dstLimit := (*C.char)(unsafe.Pointer(&r.obuf[oMaxLen])) dstStart := uintptr(unsafe.Pointer(dst)) // fill write buffer by C.ucnv_convertEx _, err = C.ucnv_convertEx(r.to, r.from, &dst, dstLimit, &src, srcLimit, nil, nil, nil, nil, C.UBool(1), C.UBool(1), &uErr) if err != nil { return n, err } if err = uErrorToGoError(uErr); err != nil { return n, err } r.ilen = 0 olen = int(uintptr(unsafe.Pointer(dst)) - dstStart) // flush write buffer if olen > plen-n { copy(p[n:], r.obuf[:plen-n]) r.ostart = plen - n r.oend = olen return plen, nil } copy(p[n:], r.obuf[:olen]) return olen + n, nil }
func (c *icuUTF8iter) Compare(a, b Input) int { err := C.UErrorCode(0) C.uiter_setUTF8(&c.a, icuCharP(a.UTF8), icuSLen(a.UTF8)) C.uiter_setUTF8(&c.b, icuCharP(b.UTF8), icuSLen(b.UTF8)) return int(C.ucol_strcollIter(c.col, &c.a, &c.b, &err)) }