Example #1
0
func _peek_char(port Obj) Obj {
	if is_immediate(port) {
		panic("bad type")
	}
	switch v := (*port).(type) {
	case *InputPort:
		if v.is_binary {
			panic("bad port type")
		}
		for !utf8.FullRune(v.lookahead[0:v.lookahead_valid]) {
			n, err := io.ReadFull(v.r,
				v.lookahead[v.lookahead_valid:v.lookahead_valid+1])
			v.lookahead_valid += n
			switch {
			case err == os.EOF:
				return Eof
			case err != nil:
				panic("I/O read error")
			}
		}
		cp, _ := utf8.DecodeRune(v.lookahead[0:v.lookahead_valid])
		return Make_char(cp)
	}
	panic("bad type")
}
Example #2
0
// ReadRune returns the next UTF-8 encoded code point from the
// io.Reader inside r.
func (r *readRune) ReadRune() (rune int, size int, err os.Error) {
	r.buf[0], err = r.readByte()
	if err != nil {
		return 0, 0, err
	}
	if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case
		rune = int(r.buf[0])
		return
	}
	var n int
	for n = 1; !utf8.FullRune(r.buf[0:n]); n++ {
		r.buf[n], err = r.readByte()
		if err != nil {
			if err == os.EOF {
				err = nil
				break
			}
			return
		}
	}
	rune, size = utf8.DecodeRune(r.buf[0:n])
	if size < n { // an error
		r.unread(r.buf[size:n])
	}
	return
}
// next reads and returns the next Unicode character. It is designed such
// that only a minimal amount of work needs to be done in the common ASCII
// case (one test to check for both ASCII and end-of-buffer, and one test
// to check for newlines).
func (s *Scanner) next() int {
	ch := int(s.srcBuf[s.srcPos])

	if ch >= utf8.RuneSelf {
		// uncommon case: not ASCII or not enough bytes
		for s.srcPos+utf8.UTFMax > s.srcEnd && !utf8.FullRune(s.srcBuf[s.srcPos:s.srcEnd]) {
			// not enough bytes: read some more, but first
			// save away token text if any
			if s.tokPos >= 0 {
				s.tokBuf.Write(s.srcBuf[s.tokPos:s.srcPos])
				s.tokPos = 0
			}
			// move unread bytes to beginning of buffer
			copy(s.srcBuf[0:], s.srcBuf[s.srcPos:s.srcEnd])
			s.srcBufOffset += s.srcPos
			// read more bytes
			i := s.srcEnd - s.srcPos
			n, err := s.src.Read(s.srcBuf[i:bufLen])
			s.srcEnd = i + n
			s.srcPos = 0
			s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel
			if err != nil {
				if s.srcEnd == 0 {
					return EOF
				}
				if err != os.EOF {
					s.error(err.String())
					break
				}
			}
		}
		// at least one byte
		ch = int(s.srcBuf[s.srcPos])
		if ch >= utf8.RuneSelf {
			// uncommon case: not ASCII
			var width int
			ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
			if ch == utf8.RuneError && width == 1 {
				s.error("illegal UTF-8 encoding")
			}
			s.srcPos += width - 1
		}
	}

	s.srcPos++
	s.column++
	switch ch {
	case 0:
		// implementation restriction for compatibility with other tools
		s.error("illegal character NUL")
	case '\n':
		s.line++
		s.column = 0
	}

	return ch
}
Example #4
0
// ReadRune reads a single UTF-8 encoded Unicode character and returns the
// rune and its size in bytes.
func (b *Reader) ReadRune() (rune int, size int, err os.Error) {
	for b.r+utf8.UTFMax > b.w && !utf8.FullRune(b.buf[b.r:b.w]) && b.err == nil {
		b.fill()
	}
	if b.r == b.w {
		return 0, 0, b.err
	}
	rune, size = int(b.buf[b.r]), 1
	if rune >= 0x80 {
		rune, size = utf8.DecodeRune(b.buf[b.r:b.w])
	}
	b.r += size
	b.lastbyte = int(b.buf[b.r-1])
	return rune, size, nil
}
Example #5
0
// next reads and returns the next Unicode character. It is designed such
// that only a minimal amount of work needs to be done in the common ASCII
// case (one test to check for both ASCII and end-of-buffer, and one test
// to check for newlines).
func (s *Scanner) next() int {
	ch := int(s.srcBuf[s.srcPos])

	if ch >= utf8.RuneSelf {
		// uncommon case: not ASCII or not enough bytes
		for s.srcPos+utf8.UTFMax > s.srcEnd && !utf8.FullRune(s.srcBuf[s.srcPos:s.srcEnd]) {
			// not enough bytes: read some more, but first
			// save away token text if any
			if s.tokPos >= 0 {
				s.tokBuf.Write(s.srcBuf[s.tokPos:s.srcPos])
				s.tokPos = 0
			}
			// move unread bytes to beginning of buffer
			copy(s.srcBuf[0:], s.srcBuf[s.srcPos:s.srcEnd])
			s.srcBufOffset += s.srcPos
			// read more bytes
			i := s.srcEnd - s.srcPos
			n, err := s.src.Read(s.srcBuf[i:bufLen])
			s.srcEnd = i + n
			s.srcPos = 0
			s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel
			if err != nil {
				if s.srcEnd == 0 {
					return EOF
				}
				s.error(err.String())
				break
			}
		}
		// at least one byte
		ch = int(s.srcBuf[s.srcPos])
		if ch >= utf8.RuneSelf {
			// uncommon case: not ASCII
			var width int
			ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
			s.srcPos += width - 1
		}
	}

	s.srcPos++
	s.column++
	if ch == '\n' {
		s.line++
		s.column = 0
	}

	return ch
}
Example #6
0
// ReadRune reads a single UTF-8 encoded Unicode character and returns the
// rune and its size in bytes.
func (b *Reader) ReadRune() (r rune, size int, err error) {
	for b.r+utf8.UTFMax > b.w && !utf8.FullRune(b.buf[b.r:b.w]) && b.err == nil {
		b.fill()
	}
	b.lastRuneSize = -1
	if b.r == b.w {
		return 0, 0, b.readErr()
	}
	r, size = rune(b.buf[b.r]), 1
	if r >= 0x80 {
		r, size = utf8.DecodeRune(b.buf[b.r:b.w])
	}
	b.r += size
	b.lastByte = int(b.buf[b.r-1])
	b.lastRuneSize = size
	return r, size, nil
}
Example #7
0
func isText(b []byte) bool {
	for len(b) > 0 && utf8.FullRune(b) {
		if rune, size := utf8.DecodeRune(b); size == 1 && rune == utf8.RuneError {
			return false
		} else {
			if 0x80 <= rune && rune <= 0x9F {
				return false
			}

			if rune < ' ' {
				switch rune {
				case '\n', '\r', '\t':
				default:
					return false
				}
			}

			b = b[size:]
		}
	}
	return true
}
Example #8
0
// Heuristic: b is text if it is valid UTF-8 and doesn't
// contain any unprintable ASCII or Unicode characters.
func isText(b []byte) bool {
	for len(b) > 0 && utf8.FullRune(b) {
		rune, size := utf8.DecodeRune(b)
		if size == 1 && rune == utf8.RuneError {
			// decoding error
			return false
		}
		if 0x7F <= rune && rune <= 0x9F {
			return false
		}
		if rune < ' ' {
			switch rune {
			case '\n', '\r', '\t':
				// okay
			default:
				// binary garbage
				return false
			}
		}
		b = b[size:]
	}
	return true
}
Example #9
0
// next reads and returns the next Unicode character. It is designed such
// that only a minimal amount of work needs to be done in the common ASCII
// case (one test to check for both ASCII and end-of-buffer, and one test
// to check for newlines).
func (s *Scanner) next() rune {
	ch, width := rune(s.srcBuf[s.srcPos]), 1

	if ch >= utf8.RuneSelf {
		// uncommon case: not ASCII or not enough bytes
		for s.srcPos+utf8.UTFMax > s.srcEnd && !utf8.FullRune(s.srcBuf[s.srcPos:s.srcEnd]) {
			// not enough bytes: read some more, but first
			// save away token text if any
			if s.tokPos >= 0 {
				s.tokBuf.Write(s.srcBuf[s.tokPos:s.srcPos])
				s.tokPos = 0
				// s.tokEnd is set by Scan()
			}
			// move unread bytes to beginning of buffer
			copy(s.srcBuf[0:], s.srcBuf[s.srcPos:s.srcEnd])
			s.srcBufOffset += s.srcPos
			// read more bytes
			// (an io.Reader must return os.EOF when it reaches
			// the end of what it is reading - simply returning
			// n == 0 will make this loop retry forever; but the
			// error is in the reader implementation in that case)
			i := s.srcEnd - s.srcPos
			n, err := s.src.Read(s.srcBuf[i:bufLen])
			s.srcPos = 0
			s.srcEnd = i + n
			s.srcBuf[s.srcEnd] = utf8.RuneSelf // sentinel
			if err != nil {
				if s.srcEnd == 0 {
					if s.lastCharLen > 0 {
						// previous character was not EOF
						s.column++
					}
					s.lastCharLen = 0
					return EOF
				}
				if err != io.EOF {
					s.error(err.Error())
				}
				// If err == EOF, we won't be getting more
				// bytes; break to avoid infinite loop. If
				// err is something else, we don't know if
				// we can get more bytes; thus also break.
				break
			}
		}
		// at least one byte
		ch = rune(s.srcBuf[s.srcPos])
		if ch >= utf8.RuneSelf {
			// uncommon case: not ASCII
			ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
			if ch == utf8.RuneError && width == 1 {
				// advance for correct error position
				s.srcPos += width
				s.lastCharLen = width
				s.column++
				s.error("illegal UTF-8 encoding")
				return ch
			}
		}
	}

	// advance
	s.srcPos += width
	s.lastCharLen = width
	s.column++

	// special situations
	switch ch {
	case 0:
		// implementation restriction for compatibility with other tools
		s.error("illegal character NUL")
	case '\n':
		s.line++
		s.lastLineLen = s.column
		s.column = 0
	}

	return ch
}