Example #1
0
func ExampleFullRune() {
	buf := []byte{228, 184, 150} // 世
	fmt.Println(utf8.FullRune(buf))
	fmt.Println(utf8.FullRune(buf[:2]))
	// Output:
	// true
	// false
}
Example #2
0
func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
	for n < len(src) {
		// ASCII fast path.
		if src[n] < utf8.RuneSelf {
			n++
			continue
		}

		r, size := utf8.DecodeRune(src[n:])

		// Look for a valid non-ASCII rune.
		if r != utf8.RuneError || size != 1 {
			n += size
			continue
		}

		// Look for short source data.
		if !atEOF && !utf8.FullRune(src[n:]) {
			err = transform.ErrShortSrc
			break
		}

		// We have an invalid rune.
		err = transform.ErrEndOfSpan
		break
	}
	return n, err
}
Example #3
0
File: buf.go Project: bgrundmann/e
func (rd *Reader) readRuneBackward() (r rune, size int, err error) {
	var bytes [4]byte
	size = 0
read_next_byte:
	if rd.off == 0 {
		if size == 0 {
			return 0, 0, io.EOF
		}
		// this means we wanted to read another byte
		// because we don't have a valid utf character
		// yet but there are not anymore...
		// TODO: handle that
		panic("partial utf8 at end of buffer not yet implemented")
	}
	if rd.offInPiece <= 0 {
		rd.piece = rd.piece.prev
		rd.offInPiece = rd.piece.off2
	}
	bytes[size] = rd.buf.sliceOfPiece(rd.piece)[rd.offInPiece-1]
	size++
	rd.offInPiece--
	rd.off--
	if rd.offInPiece <= 0 {
		rd.piece = rd.piece.prev
		rd.offInPiece = rd.piece.off2
	}
	if utf8.FullRune(bytes[:size]) {
		r, size = utf8.DecodeRune(bytes[:size])
		return r, size, nil
	}
	// not a full rune read another byte into the
	// buffer and try again
	goto read_next_byte
}
Example #4
0
func (p *translateToUTF8) Translate(data []byte, eof bool) (int, []byte, error) {
	p.scratch = ensureCap(p.scratch, (len(data))*errorRuneLen)
	buf := p.scratch[:0]
	for i := 0; i < len(data); {
		// fast path for ASCII
		if b := data[i]; b < utf8.RuneSelf {
			buf = append(buf, b)
			i++
			continue
		}
		_, size := utf8.DecodeRune(data[i:])
		if size == 1 {
			if !eof && !utf8.FullRune(data) {
				// When DecodeRune has converted only a single
				// byte, we know there must be some kind of error
				// because we know the byte's not ASCII.
				// If we aren't at EOF, and it's an incomplete
				// rune encoding, then we return to process
				// the final bytes in a subsequent call.
				return i, buf, nil
			}
			buf = append(buf, errorBytes...)
		} else {
			buf = append(buf, data[i:i+size]...)
		}
		i += size
	}
	return len(data), buf, nil
}
func (f FileLoggingCmdRunner) truncateUntilToken(data []byte, dataLossLimit int64) []byte {
	var i int64

	// Cut off until first line break unless it cuts off more allowed data loss
	if i = int64(bytes.IndexByte(data, '\n')); i >= 0 && i <= dataLossLimit {
		data = f.dropCR(data[i+1:])
	} else {
		// Make sure we don't break inside UTF encoded rune
		for {
			if len(data) < 1 {
				break
			}

			// Check for ASCII
			if data[0] < utf8.RuneSelf {
				break
			}

			// Check for UTF
			_, width := utf8.DecodeRune(data)
			if width > 1 && utf8.FullRune(data) {
				break
			}

			// Rune is not complete, check next
			data = data[1:]
		}
	}

	return data
}
Example #6
0
// ReadRune returns the next UTF-8 encoded code point from the
// io.Reader inside r.
func (r *readRune) ReadRune() (rr rune, size int, err error) {
	r.buf[0], err = r.readByte()
	if err != nil {
		return 0, 0, err
	}
	if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
		rr = rune(r.buf[0])
		size = 1 // Known to be 1.
		return
	}
	var n int
	for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
		r.buf[n], err = r.readByte()
		if err != nil {
			if err == io.EOF {
				err = nil
				break
			}
			return
		}
	}
	rr, size = utf8.DecodeRune(r.buf[0:n])
	if size < n { // an error
		r.unread(r.buf[size:n])
	}
	return
}
Example #7
0
func (p *translateToCodePage) Translate(data []byte, eof bool) (int, []byte, error) {
	p.scratch = ensureCap(p.scratch, len(data))
	buf := p.scratch[:0]

	for i := 0; i < len(data); {
		r := rune(data[i])
		size := 1
		if r >= utf8.RuneSelf {
			r, size = utf8.DecodeRune(data[i:])
			if size == 1 && !eof && !utf8.FullRune(data[i:]) {
				return i, buf, nil
			}
		}

		var b byte
		if r < p.same {
			b = byte(r)
		} else {
			var ok bool
			b, ok = p.rune2byte[r]
			if !ok {
				b = '?'
			}
		}
		buf = append(buf, b)
		i += size
	}
	return len(data), buf, nil
}
Example #8
0
func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	n := len(src)
	if n > len(dst) {
		n = len(dst)
	}
	for i := 0; i < n; {
		if c := src[i]; c < utf8.RuneSelf {
			dst[i] = c
			i++
			continue
		}
		_, size := utf8.DecodeRune(src[i:])
		if size == 1 {
			// All valid runes of size 1 (those below utf8.RuneSelf) were
			// handled above. We have invalid UTF-8 or we haven't seen the
			// full character yet.
			err = ErrInvalidUTF8
			if !atEOF && !utf8.FullRune(src[i:]) {
				err = transform.ErrShortSrc
			}
			return i, i, err
		}
		if i+size > len(dst) {
			return i, i, transform.ErrShortDst
		}
		for ; size > 0; size-- {
			dst[i] = src[i]
			i++
		}
	}
	if len(src) > len(dst) {
		err = transform.ErrShortDst
	}
	return n, n, err
}
Example #9
0
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	r, size := rune(0), 0

	for ; nSrc < len(src); nSrc += size {
		r = rune(src[nSrc])

		// Decode a 1-byte rune.
		if r < utf8.RuneSelf {
			size = 1

		} else {
			// Decode a multi-byte rune.
			r, size = utf8.DecodeRune(src[nSrc:])
			if size == 1 {
				// All valid runes of size 1 (those below utf8.RuneSelf) were
				// handled above. We have invalid UTF-8 or we haven't seen the
				// full character yet.
				if !atEOF && !utf8.FullRune(src[nSrc:]) {
					err = transform.ErrShortSrc
					break
				}
				r = '\ufffd'
			}
		}

		if nDst+utf8.RuneLen(r) > len(dst) {
			err = transform.ErrShortDst
			break
		}
		nDst += utf8.EncodeRune(dst[nDst:], r)
	}
	return nDst, nSrc, err
}
Example #10
0
File: scan.go Project: Greentor/go
// ScanRunes is a split function for a Scanner that returns each
// UTF-8-encoded rune as a token. The sequence of runes returned is
// equivalent to that from a range loop over the input as a string, which
// means that erroneous UTF-8 encodings translate to U+FFFD = "\xef\xbf\xbd".
// Because of the Scan interface, this makes it impossible for the client to
// distinguish correctly encoded replacement runes from encoding errors.
func ScanRunes(data []byte, atEOF bool) (advance int, token []byte, err error) {
	if atEOF && len(data) == 0 {
		return 0, nil, nil
	}

	// Fast path 1: ASCII.
	if data[0] < utf8.RuneSelf {
		return 1, data[0:1], nil
	}

	// Fast path 2: Correct UTF-8 decode without error.
	_, width := utf8.DecodeRune(data)
	if width > 1 {
		// It's a valid encoding. Width cannot be one for a correctly encoded
		// non-ASCII rune.
		return width, data[0:width], nil
	}

	// We know it's an error: we have width==1 and implicitly r==utf8.RuneError.
	// Is the error because there wasn't a full rune to be decoded?
	// FullRune distinguishes correctly between erroneous and incomplete encodings.
	if !atEOF && !utf8.FullRune(data) {
		// Incomplete; get more bytes.
		return 0, nil, nil
	}

	// We have a real UTF-8 encoding error. Return a properly encoded error rune
	// but advance only one byte. This matches the behavior of a range loop over
	// an incorrectly encoded string.
	return 1, errorRune, nil
}
Example #11
0
func main() {
	client, err := sarama.NewClient("a_logger_for_mhub", []string{"localhost:9092"}, nil)
	if err != nil {
		panic(err)
	} else {
		os.Stderr.WriteString("> connected\n")
	}
	defer client.Close()

	consumer, err := sarama.NewConsumer(client, "received", 0, "", nil)
	if err != nil {
		panic(err)
	} else {
		os.Stderr.WriteString("> consumer ready\n")
	}
	defer consumer.Close()

	for {
		select {
		case event := <-consumer.Events():
			if event.Err != nil {
				panic(event.Err)
			}
			fmt.Println(utf8.FullRune(event.Value))
		}
	}
}
Example #12
0
func (d *cmapEncoder) Transform(dst, src []byte, atEOF bool) (int, int, error) {
	var e error
	var ndst, nsrc int
	for nsrc < len(src) {
		if ndst >= len(dst) {
			e = transform.ErrShortDst
			break
		}

		r, sz := utf8.DecodeRune(src[nsrc:])
		if r == utf8.RuneError && sz == 1 {
			// If its inconclusive due to insufficient data in
			// in the source, report it
			if !atEOF && !utf8.FullRune(src[nsrc:]) {
				e = transform.ErrShortSrc
				break
			}
		}

		if c, ok := d.bytes[r]; ok {
			dst[ndst] = c
		} else {
			dst[ndst] = d.replace
		}
		nsrc += sz
		ndst++
	}

	return ndst, nsrc, e
}
Example #13
0
func (t replaceTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	var runeBytes [utf8.UTFMax]byte
	for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {

		if r = rune(src[0]); r < utf8.RuneSelf {
			sz = 1
		} else {
			r, sz = utf8.DecodeRune(src)

			if sz == 1 {
				// Invalid rune.
				if !atEOF && !utf8.FullRune(src) {
					err = transform.ErrShortSrc
					break
				}
			}
		}

		dsz := utf8.EncodeRune(runeBytes[:], t(r))
		if nDst+dsz > len(dst) {
			err = transform.ErrShortDst
			break
		}

		nDst += copy(dst[nDst:], runeBytes[:dsz])
		nSrc += sz
	}
	return
}
Example #14
0
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	r, size := rune(0), 0
loop:
	for nSrc < len(src) {
		if nDst >= len(dst) {
			err = transform.ErrShortDst
			break
		}
		r = rune(src[nSrc])

		// Decode a 1-byte rune.
		if r < utf8.RuneSelf {
			if m.charmap.asciiSuperset {
				nSrc++
				dst[nDst] = uint8(r)
				nDst++
				continue
			}
			size = 1

		} else {
			// Decode a multi-byte rune.
			r, size = utf8.DecodeRune(src[nSrc:])
			if size == 1 {
				// All valid runes of size 1 (those below utf8.RuneSelf) were
				// handled above. We have invalid UTF-8 or we haven't seen the
				// full character yet.
				if !atEOF && !utf8.FullRune(src[nSrc:]) {
					err = transform.ErrShortSrc
				} else {
					err = internal.RepertoireError(m.charmap.replacement)
				}
				break
			}
		}

		// Binary search in [low, high) for that rune in the m.charmap.encode table.
		for low, high := int(m.charmap.low), 0x100; ; {
			if low >= high {
				err = internal.RepertoireError(m.charmap.replacement)
				break loop
			}
			mid := (low + high) / 2
			got := m.charmap.encode[mid]
			gotRune := rune(got & (1<<24 - 1))
			if gotRune < r {
				low = mid + 1
			} else if gotRune > r {
				high = mid
			} else {
				dst[nDst] = byte(got >> 24)
				nDst++
				break
			}
		}
		nSrc += size
	}
	return nDst, nSrc, err
}
Example #15
0
func fullRuneBuffered(br *bufio.Reader) bool {
	n := br.Buffered()
	buf, err := br.Peek(n)
	if err != nil {
		return false
	}
	return utf8.FullRune(buf)
}
Example #16
0
// next reads and returns the next Unicode character. It is designed such
// that only a minimal amount of work needs to be done in the common ASCII
// case (one test to check for both ASCII and end-of-buffer, and one test
// to check for newlines).
func (s *Scanner) next() rune {
	ch := rune(s.srcBuf[s.srcPos])

	if ch >= utf8.RuneSelf {
		// uncommon case: not ASCII or not enough bytes
		for s.srcPos+utf8.UTFMax > s.srcEnd && !utf8.FullRune(s.srcBuf[s.srcPos:s.srcEnd]) {
			// not enough bytes: read some more, but first
			// save away token text if any
			if s.tokPos >= 0 {
				s.tokBuf.Write(s.srcBuf[s.tokPos:s.srcPos])
				s.tokPos = 0
			}
			// move unread bytes to beginning of buffer
			copy(s.srcBuf[0:], s.srcBuf[s.srcPos:s.srcEnd])
			s.srcBufOffset += s.srcPos
			// read more bytes
			i := s.srcEnd - s.srcPos
			n, err := s.src.Read(s.srcBuf[i:bufLen])
			s.srcEnd = i + n
			s.srcPos = 0
			s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel
			if err != nil {
				if s.srcEnd == 0 {
					return EOF
				}
				if err != io.EOF {
					s.error(err.Error())
					break
				}
			}
		}
		// at least one byte
		ch = rune(s.srcBuf[s.srcPos])
		if ch >= utf8.RuneSelf {
			// uncommon case: not ASCII
			var width int
			ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
			if ch == utf8.RuneError && width == 1 {
				s.error("illegal UTF-8 encoding")
			}
			s.srcPos += width - 1
		}
	}

	s.srcPos++
	s.column++
	switch ch {
	case 0:
		// implementation restriction for compatibility with other tools
		s.error("illegal character NUL")
	case '\n':
		s.line++
		s.column = 0
	}

	return ch
}
Example #17
0
// ReadRune implements io.RuneReader interface.
func (s *ByteSlice) ReadRune() (r rune, size int, err error) {
	if !utf8.FullRune(*s) {
		return utf8.RuneError, 0, io.ErrUnexpectedEOF
	}
	r, size = utf8.DecodeRune(*s)
	*s = (*s)[size:]

	return r, size, err
}
Example #18
0
func (s *source) getr() rune {
redo:
	s.r0, s.line0 = s.r, s.line

	// We could avoid at least one test that is always taken in the
	// for loop below by duplicating the common case code (ASCII)
	// here since we always have at least the sentinel (utf8.RuneSelf)
	// in the buffer. Measure and optimize if necessary.

	// make sure we have at least one rune in buffer, or we are at EOF
	for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
		s.fill() // s.w-s.r < len(s.buf) => buffer is not full
	}

	// common case: ASCII and enough bytes
	// (invariant: s.buf[s.w] == utf8.RuneSelf)
	if b := s.buf[s.r]; b < utf8.RuneSelf {
		s.r++
		if b == 0 {
			s.error("invalid NUL character")
			goto redo
		}
		if b == '\n' {
			s.line++
		}
		return rune(b)
	}

	// EOF
	if s.r == s.w {
		if s.err != io.EOF {
			s.error(s.err.Error())
		}
		return -1
	}

	// uncommon case: not ASCII
	r, w := utf8.DecodeRune(s.buf[s.r:s.w])
	s.r += w

	if r == utf8.RuneError && w == 1 {
		s.error("invalid UTF-8 encoding")
		goto redo
	}

	// BOM's are only allowed as the first character in a file
	const BOM = 0xfeff
	if r == BOM {
		if s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
			s.error("invalid BOM in the middle of the file")
		}
		goto redo
	}

	return r
}
Example #19
0
func (buf *buffer) ReadRune() (r rune, size int, err error) {
	l := buf.b.Len()

	chunk := make([]byte, utf8.UTFMax)
	if l > 0 {
		n, err := buf.b.Read(chunk)
		if err != nil {
			return 0, 0, err
		}
		if utf8.FullRune(chunk) {
			r, rL := utf8.DecodeRune(chunk)
			if n > rL {
				buf.PutBack(chunk[rL:n])
			}
			if buf.collect {
				buf.collection.WriteRune(r)
			}
			return r, rL, nil
		}
	}
	// else add bytes from the file, then try that
	for l < utf8.UTFMax {
		fn, err := buf.f.Read(chunk[l : l+1])
		if err != nil {
			return 0, 0, err
		}
		l = l + fn

		if utf8.FullRune(chunk) {
			r, rL := utf8.DecodeRune(chunk)
			if buf.collect {
				buf.collection.WriteRune(r)
			}
			if fn > 0 {
				if _, err := buf.output.Write(chunk[l : l+fn]); err != nil {
					return r, rL, err
				}
			}
			return r, rL, nil
		}
	}
	return 0, 0, errors.New("File is not a valid UTF=8 encoding")
}
Example #20
0
File: buffer.go Project: h12w/gombi
func (b *runeBuffer) ReadRune() (ru rune, size int, err error) {
	for !utf8.FullRune(b.buf[b.r:]) {
		if err := b.fill(); err != nil {
			return 0, 0, err
		}
	}
	ru, size = utf8.DecodeRune(b.buf[b.r:])
	b.r += size
	return ru, size, nil
}
Example #21
0
// 将一个字节数组转换成 utf-8 字符串
func Utf8(bs []byte) (str string, err error) {
	if utf8.FullRune(bs) {
		//sz := utf8.RuneCount(bs)
		str = string(bs)
		return
	}
	// 错误
	err = errors.New("fail to decode to UTF8")
	str = ""
	return
}
Example #22
0
func main() {
	b := []byte("Helloł, 世界")

	for len(b) > 0 {
		r, size := utf8.DecodeRune(b)
		fmt.Printf("%c %v\n", r, size)

		b = b[size:]
	}

	// FullRune example
	fmt.Println()
	fmt.Println()
	buf1 := []byte{228, 184, 150} // 世
	buf2 := []byte{228}           // 世

	fmt.Println(utf8.FullRune(buf1))
	fmt.Println(utf8.FullRune(buf2))

}
Example #23
0
// Write encodes and writes the data from p.
func (w *Writer) Write(p []byte) (n int, err error) {
	n = len(p)

	if len(w.inbuf) > 0 {
		w.inbuf = append(w.inbuf, p...)
		p = w.inbuf
	}

	if len(w.outbuf) < len(p) {
		w.outbuf = make([]byte, len(p)+10)
	}

	outpos := 0

	for len(p) > 0 {
		rune, size := utf8.DecodeRune(p)
		if rune == 0xfffd && !utf8.FullRune(p) {
			break
		}

		p = p[size:]

	retry:
		size, status := w.encode(w.outbuf[outpos:], rune)

		if status == NO_ROOM {
			newDest := make([]byte, len(w.outbuf)*2)
			copy(newDest, w.outbuf)
			w.outbuf = newDest
			goto retry
		}

		if status == STATE_ONLY {
			outpos += size
			goto retry
		}

		outpos += size
	}

	w.inbuf = w.inbuf[:0]
	if len(p) > 0 {
		w.inbuf = append(w.inbuf, p...)
	}

	n1, err := w.wr.Write(w.outbuf[0:outpos])

	if err != nil && n1 < n {
		n = n1
	}

	return
}
Example #24
0
// DataToString converts data bytes to readable string.
func DataToString(b []byte) string {
	if !utf8.FullRune(b) {
		return fmt.Sprintf("%#v", b)
	}
	s := string(b)
	for _, r := range s {
		if !unicode.IsPrint(r) {
			return fmt.Sprintf("%#v", b)
		}
	}
	return s
}
Example #25
0
//nextChar read next utf-8 character
func (lexer *Lexer) nextChar() error {

	c, err := lexer.reader.ReadByte()

	if err != nil {
		if err == io.EOF {
			lexer.curr = rune(TokenEOF)
			return nil
		}
		return err
	}

	lexer.offset++
	//not ASCII
	if c >= utf8.RuneSelf {
		lexer.buff[0] = c
		lexer.buffPos = 1
		for !utf8.FullRune(lexer.buff[0:lexer.buffPos]) {
			//continue read rest utf8 char bytes
			c, err = lexer.reader.ReadByte()
			if err != nil {
				if err == io.EOF {
					lexer.curr = rune(TokenEOF)
					return nil
				}
				return err
			}

			lexer.buff[lexer.buffPos] = c
			lexer.buffPos++

			gserrors.Assert(
				lexer.buffPos < len(lexer.buff),
				"utf8.UTFMax must << len(lexer.buff)",
			)
		}

		c, width := utf8.DecodeRune(lexer.buff[0:lexer.buffPos])

		if c == utf8.RuneError && width == 1 {
			return lexer.newerror("illegal utf8 character")

		}

		lexer.curr = c
	} else {
		lexer.curr = rune(c)
	}

	lexer.position.Column++

	return nil
}
Example #26
0
func (t *tScreen) parseRune(buf *bytes.Buffer) (bool, bool) {
	b := buf.Bytes()
	if b[0] >= ' ' && b[0] <= 0x7F {
		// printable ASCII easy to deal with -- no encodings
		ev := NewEventKey(KeyRune, rune(b[0]), ModNone)
		t.PostEvent(ev)
		buf.ReadByte()
		return true, true
	}

	if b[0] < 0x80 {
		// No encodings start with low numbered values
		return false, false
	}

	switch t.charset {
	case "UTF-8":
		if utf8.FullRune(b) {
			r, _, e := buf.ReadRune()
			if e == nil {
				ev := NewEventKey(KeyRune, r, ModNone)
				t.PostEvent(ev)
				return true, true
			}
		}
	case "US-ASCII":
		// ASCII cannot generate this, so most likely it was
		// entered as an Alt sequence
		ev := NewEventKey(KeyRune, rune(b[0]-128), ModAlt)
		t.PostEvent(ev)
		buf.ReadByte()
		return true, true

	default:
		utfb := make([]byte, 12)
		for l := 1; l <= len(b); l++ {
			t.decoder.Reset()
			nout, nin, _ := t.decoder.Transform(utfb, b[:l], true)
			if nout != 0 {
				if r, _ := utf8.DecodeRune(utfb[:nout]); r != utf8.RuneError {
					ev := NewEventKey(KeyRune, r, ModNone)
					t.PostEvent(ev)
				}
				for eat := 0; eat < nin; eat++ {
					buf.ReadByte()
				}
				return true, true
			}
		}
	}
	// Looks like potential escape
	return true, false
}
Example #27
0
func (u *utf32Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	if u.currentBOMPolicy&writeBOM != 0 {
		if len(dst) < 4 {
			return 0, 0, transform.ErrShortDst
		}
		dst[0], dst[1], dst[2], dst[3] = 0x00, 0x00, 0xfe, 0xff
		u.currentBOMPolicy = IgnoreBOM
		nDst = 4
	}

	r, size := rune(0), 0
	for nSrc < len(src) {
		r = rune(src[nSrc])

		// Decode a 1-byte rune.
		if r < utf8.RuneSelf {
			size = 1

		} else {
			// Decode a multi-byte rune.
			r, size = utf8.DecodeRune(src[nSrc:])
			if size == 1 {
				// All valid runes of size 1 (those below utf8.RuneSelf) were
				// handled above. We have invalid UTF-8 or we haven't seen the
				// full character yet.
				if !atEOF && !utf8.FullRune(src[nSrc:]) {
					err = transform.ErrShortSrc
					break
				}
			}
		}

		if nDst+4 > len(dst) {
			err = transform.ErrShortDst
			break
		}

		dst[nDst+0] = uint8(r >> 24)
		dst[nDst+1] = uint8(r >> 16)
		dst[nDst+2] = uint8(r >> 8)
		dst[nDst+3] = uint8(r)
		nDst += 4
		nSrc += size
	}

	if u.endianness == LittleEndian {
		for i := 0; i < nDst; i += 4 {
			dst[i], dst[i+1], dst[i+2], dst[i+3] = dst[i+3], dst[i+2], dst[i+1], dst[i]
		}
	}
	return nDst, nSrc, err
}
func main() {
	str := "Étoilé"
	rune := make([]byte, 0, 4)
	for i := 0; i < len(str); i++ {
		rune = append(rune, str[i])
		if utf8.FullRune(rune) {
			char, _ := utf8.DecodeRune(rune)
			fmt.Printf("%c", char)
			rune = rune[0:0]
		}
	}
	fmt.Printf("\n")
}
Example #29
0
func (o SFilePort) ReadRune() (r rune, size int, err error) {
	buf := []byte{}
	for !utf8.FullRune(buf) {
		var c byte
		c, err = o.ReadByte()
		if err != nil {
			r = 0
			return
		}
		buf = append(buf, c)
	}
	ruf := []rune(string(buf))
	return ruf[0], len(buf), nil
}
Example #30
0
File: lex.go Project: vsayer/go
func (l *lexer) getr() rune {
	// unread rune != 0 available
	if r := l.peekr1; r != 0 {
		l.peekr1 = l.peekr2
		l.peekr2 = 0
		if r == '\n' && importpkg == nil {
			lexlineno++
		}
		return r
	}

redo:
	// common case: 7bit ASCII
	c := obj.Bgetc(l.bin)
	if c < utf8.RuneSelf {
		if c == 0 {
			yyerrorl(int(lexlineno), "illegal NUL byte")
			return 0
		}
		if c == '\n' && importpkg == nil {
			lexlineno++
		}
		return rune(c)
	}
	// c >= utf8.RuneSelf

	// uncommon case: non-ASCII
	var buf [utf8.UTFMax]byte
	buf[0] = byte(c)
	buf[1] = byte(obj.Bgetc(l.bin))
	i := 2
	for ; i < len(buf) && !utf8.FullRune(buf[:i]); i++ {
		buf[i] = byte(obj.Bgetc(l.bin))
	}

	r, w := utf8.DecodeRune(buf[:i])
	if r == utf8.RuneError && w == 1 {
		// The string conversion here makes a copy for passing
		// to fmt.Printf, so that buf itself does not escape and
		// can be allocated on the stack.
		yyerrorl(int(lexlineno), "illegal UTF-8 sequence % x", string(buf[:i]))
	}

	if r == BOM {
		yyerrorl(int(lexlineno), "Unicode (UTF-8) BOM in middle of file")
		goto redo
	}

	return r
}