Exemple #1
0
func Convert(input []byte, from string, to string) ([]byte, error) {
	cfrom := C.CString(from)
	defer C.free(unsafe.Pointer(cfrom))

	cto := C.CString(to)
	defer C.free(unsafe.Pointer(cto))

	src := (*C.char)(unsafe.Pointer(&input[0]))
	srcLen := C.int32_t(len(input))

	uErr := C.UErrorCode(C.U_ZERO_ERROR)

	// get dstLen
	// ignore ENOENT
	dstLen, _ := C.ucnv_convert(cto, cfrom, nil, 0, src, srcLen, &uErr)
	if uErr != C.U_BUFFER_OVERFLOW_ERROR {
		return nil, uErrorToGoError(uErr)
	}
	uErr = C.UErrorCode(C.U_ZERO_ERROR)

	output := make([]byte, int(dstLen))
	dst := (*C.char)(unsafe.Pointer(&output[0]))

	dstLen, err := C.ucnv_convert(cto, cfrom, dst, dstLen, src, srcLen, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}

	return output, nil
}
Exemple #2
0
func (detector *Detector) Run(input []byte) (*Charset, error) {
	if len(input) == 0 {
		return nil, ErrEmptyInput
	}

	cinput := (*C.char)(unsafe.Pointer(&input[0]))

	cinputLen := C.size_t(len(input))

	mime, err := detector.detectMime(cinput, cinputLen)
	if err != nil {
		return nil, err
	}

	if !strings.HasPrefix(mime, "text") {
		return makeCharset(nil, mime)
	}

	uErr := C.UErrorCode(C.U_ZERO_ERROR)

	uCharsetMatch, err := C.ucsd_run(detector.ucsd, (*C.char)(cinput), cinputLen, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}

	return makeCharset(uCharsetMatch, mime)
}
Exemple #3
0
func doICU(tag, caser, input string) string {
	err := C.UErrorCode(0)
	loc := C.CString(tag)
	cm := C.ucasemap_open(loc, C.uint32_t(0), &err)

	buf := make([]byte, len(input)*4)
	dst := (*C.char)(unsafe.Pointer(&buf[0]))
	src := C.CString(input)

	cn := C.int32_t(0)

	switch caser {
	case "fold":
		cn = C.ucasemap_utf8FoldCase(cm,
			dst, C.int32_t(len(buf)),
			src, C.int32_t(len(input)),
			&err)
	case "lower":
		cn = C.ucasemap_utf8ToLower(cm,
			dst, C.int32_t(len(buf)),
			src, C.int32_t(len(input)),
			&err)
	case "upper":
		cn = C.ucasemap_utf8ToUpper(cm,
			dst, C.int32_t(len(buf)),
			src, C.int32_t(len(input)),
			&err)
	case "title":
		cn = C.ucasemap_utf8ToTitle(cm,
			dst, C.int32_t(len(buf)),
			src, C.int32_t(len(input)),
			&err)
	}
	return string(buf[:cn])
}
Exemple #4
0
func ucsdOpen() (*C.UCharsetDetector, error) {
	uErr := C.UErrorCode(C.U_ZERO_ERROR)
	ucsd, err := C.ucsdet_open(&uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}
	return ucsd, nil
}
Exemple #5
0
func (detector *Detector) RunAll(input []byte) ([]*Charset, error) {
	if len(input) == 0 {
		return nil, ErrEmptyInput
	}

	cinput := (*C.char)(unsafe.Pointer(&input[0]))

	cinputLen := C.size_t(len(input))

	mime, err := detector.detectMime(cinput, cinputLen)
	if err != nil {
		return nil, err
	}

	if !strings.HasPrefix(mime, "text") {
		charset, err := makeCharset(nil, mime)
		if err != nil {
			return nil, err
		}
		return []*Charset{charset}, nil
	}

	uErr := C.UErrorCode(C.U_ZERO_ERROR)

	var matchesFound C.int32_t

	uCharsetMatches, err := C.ucsd_runAll(detector.ucsd, &matchesFound, (*C.char)(cinput), cinputLen, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}

	length := int(matchesFound)

	var umatches []*C.UCharsetMatch
	var matches []*Charset

	sliceHeader := (*reflect.SliceHeader)((unsafe.Pointer(&umatches)))
	sliceHeader.Cap = length
	sliceHeader.Len = length
	sliceHeader.Data = uintptr(unsafe.Pointer(uCharsetMatches))

	for _, uCharsetMatch := range umatches {
		charset, err := makeCharset(uCharsetMatch, mime)
		if err != nil {
			return nil, err
		}

		matches = append(matches, charset)
	}
	return matches, nil
}
Exemple #6
0
func ucnvOpen(encoding string) (*C.UConverter, error) {
	cencoding := C.CString(encoding)
	defer C.free(unsafe.Pointer(cencoding))

	uErr := C.UErrorCode(C.U_ZERO_ERROR)
	ucnv, err := C.ucnv_open(cencoding, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}
	return ucnv, nil
}
Exemple #7
0
func (c *icuUTF8iter) Key(s Input) []byte {
	err := C.UErrorCode(0)
	state := [2]C.uint32_t{}
	C.uiter_setUTF8(&c.a, icuCharP(s.UTF8), icuSLen(s.UTF8))
	bp, bn := c.buf()
	n := C.ucol_nextSortKeyPart(c.col, &c.a, &(state[0]), bp, bn, &err)
	if n >= bn {
		// Force failure.
		if c.extendBuf(n+1) != nil {
			log.Fatal("expected extension to fail")
		}
		return c.Key(s)
	}
	return c.extendBuf(n)
}
Exemple #8
0
func (detector *Detector) SetDeclaredEncoding(encoding string) error {
	cencoding := C.CString(encoding)
	defer C.free(unsafe.Pointer(cencoding))

	uErr := C.UErrorCode(C.U_ZERO_ERROR)

	_, err := C.ucsdet_setDeclaredEncoding(detector.ucsd, cencoding, C.int32_t(len(encoding)), &uErr)
	if err != nil {
		return err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return err
	}

	return nil
}
Exemple #9
0
func (c *icuCollator) init(locale string) error {
	err := C.UErrorCode(0)
	c.loc = C.CString(locale)
	c.col = C.ucol_open(c.loc, &err)
	if err > 0 {
		return fmt.Errorf("failed opening collator for %q", locale)
	} else if err < 0 {
		loc := C.ucol_getLocaleByType(c.col, 0, &err)
		fmt, ok := map[int]string{
			-127: "warning: using default collator: %s",
			-128: "warning: using fallback collator: %s",
		}[int(err)]
		if ok {
			log.Printf(fmt, C.GoString(loc))
		}
	}
	c.keyBuf = make([]byte, 0, growBufSize)
	return nil
}
Exemple #10
0
func makeCharset(uCharsetMatch *C.UCharsetMatch, mime string) (*Charset, error) {
	if uCharsetMatch == nil {
		charset := &Charset{
			Confidence: 100,
			Mime:       mime,
		}
		return charset, nil
	}
	uErr := C.UErrorCode(C.U_ZERO_ERROR)

	cname, err := C.ucsdet_getName(uCharsetMatch, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}

	cconfidence, err := C.ucsdet_getConfidence(uCharsetMatch, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}

	clang, err := C.ucsdet_getLanguage(uCharsetMatch, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}

	charset := &Charset{
		Name:       C.GoString(cname),
		Confidence: int(cconfidence),
		Language:   C.GoString(clang),
		Mime:       mime,
	}
	return charset, nil
}
Exemple #11
0
func (w *WriteCloser) Write(p []byte) (int, error) {
	plen := len(p)
	src := (*C.char)(unsafe.Pointer(&p[0]))
	srcLimit := (*C.char)(unsafe.Pointer(&p[plen]))

	uErr := C.UErrorCode(C.U_ZERO_ERROR)

	oMaxLen, err := ucnvMaxLen(plen, w.from, w.to)
	if err != nil {
		return 0, err
	}

	// reallocate write buffer
	if oMaxLen > cap(w.obuf) {
		w.obuf = make([]byte, oMaxLen*2)
	}
	dst := (*C.char)(unsafe.Pointer(&w.obuf[0]))
	dstLimit := (*C.char)(unsafe.Pointer(&w.obuf[oMaxLen]))

	dstStart := uintptr(unsafe.Pointer(dst))

	// fill write buffer by C.ucnv_convertEx
	_, err = C.ucnv_convertEx(w.to, w.from, &dst, dstLimit, &src, srcLimit, nil, nil, nil, nil, C.UBool(1), C.UBool(1), &uErr)
	if err != nil {
		return 0, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return 0, err
	}

	w.olen = int(uintptr(unsafe.Pointer(dst)) - dstStart)

	n, err := w.w.Write(w.obuf[:w.olen])
	if err != nil {
		return n, err
	}

	return n, nil
}
Exemple #12
0
func (detector *Detector) SupportedEncodings() ([]string, error) {
	uErr := C.UErrorCode(C.U_ZERO_ERROR)

	uenum, err := C.ucsdet_getAllDetectableCharsets(detector.ucsd, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}

	defer C.uenum_close(uenum)

	ccount, err := C.uenum_count(uenum, &uErr)
	if err != nil {
		return nil, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return nil, err
	}

	encodings := make([]string, 0)

	var length C.int32_t
	for i := int(ccount); i > 0; i-- {
		cencoding, err := C.uenum_next(uenum, &length, &uErr)
		if err != nil {
			return nil, err
		}
		if err = uErrorToGoError(uErr); err != nil {
			return nil, err
		}
		encodings = append(encodings, C.GoString(cencoding))
	}

	return encodings, nil
}
Exemple #13
0
func (r *ReadCloser) Read(p []byte) (int, error) {
	plen := len(p)

	if plen == 0 {
		return 0, nil
	}

	n := 0
	olen := r.oend - r.ostart

	// flush write buffer if exist
	if olen > 0 {
		if olen > plen {
			copy(p, r.obuf[r.ostart:plen])
			r.ostart += plen
			return plen, nil
		}
		copy(p, r.obuf[r.ostart:r.oend])
		r.ostart = 0
		r.oend = 0
		if olen == plen {
			return plen, nil
		}
		n = olen
	}

	// reallocate read buffer or set Len
	if plen != len(r.ibuf) {
		if plen > cap(r.ibuf) {
			r.ibuf = make([]byte, plen, plen*2)
		} else {
			r.ibuf = r.ibuf[:plen]
		}
	}

	// fill read buffer
	ilen, err := r.r.Read(r.ibuf)
	r.ilen = ilen
	if err != nil {
		return n, err
	}

	if ilen == 0 {
		return n, io.EOF
	}

	src := (*C.char)(unsafe.Pointer(&r.ibuf[0]))
	srcLimit := (*C.char)(unsafe.Pointer(&r.ibuf[ilen]))

	uErr := C.UErrorCode(C.U_ZERO_ERROR)

	oMaxLen, err := ucnvMaxLen(ilen, r.from, r.to)
	if err != nil {
		return n, err
	}

	// reallocate write buffer
	if oMaxLen > cap(r.obuf) {
		r.obuf = make([]byte, oMaxLen*2)
	}
	dst := (*C.char)(unsafe.Pointer(&r.obuf[0]))
	dstLimit := (*C.char)(unsafe.Pointer(&r.obuf[oMaxLen]))

	dstStart := uintptr(unsafe.Pointer(dst))

	// fill write buffer by C.ucnv_convertEx
	_, err = C.ucnv_convertEx(r.to, r.from, &dst, dstLimit, &src, srcLimit, nil, nil, nil, nil, C.UBool(1), C.UBool(1), &uErr)
	if err != nil {
		return n, err
	}
	if err = uErrorToGoError(uErr); err != nil {
		return n, err
	}

	r.ilen = 0

	olen = int(uintptr(unsafe.Pointer(dst)) - dstStart)

	// flush write buffer
	if olen > plen-n {
		copy(p[n:], r.obuf[:plen-n])
		r.ostart = plen - n
		r.oend = olen
		return plen, nil
	}
	copy(p[n:], r.obuf[:olen])
	return olen + n, nil
}
Exemple #14
0
func (c *icuUTF8iter) Compare(a, b Input) int {
	err := C.UErrorCode(0)
	C.uiter_setUTF8(&c.a, icuCharP(a.UTF8), icuSLen(a.UTF8))
	C.uiter_setUTF8(&c.b, icuCharP(b.UTF8), icuSLen(b.UTF8))
	return int(C.ucol_strcollIter(c.col, &c.a, &c.b, &err))
}