Esempio n. 1
0
func TestReadWriteRune(t *testing.T) {
	const NRune = 1000
	byteBuf := new(bytes.Buffer)
	w := NewWriter(byteBuf)
	// Write the runes out using WriteRune
	buf := make([]byte, utf8.UTFMax)
	for rune := 0; rune < NRune; rune++ {
		size := utf8.EncodeRune(rune, buf)
		nbytes, err := w.WriteRune(rune)
		if err != nil {
			t.Fatalf("WriteRune(0x%x) error: %s", rune, err)
		}
		if nbytes != size {
			t.Fatalf("WriteRune(0x%x) expected %d, got %d", rune, size, nbytes)
		}
	}
	w.Flush()

	r := NewReader(byteBuf)
	// Read them back with ReadRune
	for rune := 0; rune < NRune; rune++ {
		size := utf8.EncodeRune(rune, buf)
		nr, nbytes, err := r.ReadRune()
		if nr != rune || nbytes != size || err != nil {
			t.Fatalf("ReadRune(0x%x) got 0x%x,%d not 0x%x,%d (err=%s)", r, nr, nbytes, r, size, err)
		}
	}
}
Esempio n. 2
0
func TestRuneIO(t *testing.T) {
	const NRune = 1000
	// Built a test array while we write the data
	b := make([]byte, utf8.UTFMax*NRune)
	var buf Buffer
	n := 0
	for r := 0; r < NRune; r++ {
		size := utf8.EncodeRune(r, b[n:])
		nbytes, err := buf.WriteRune(r)
		if err != nil {
			t.Fatalf("WriteRune(0x%x) error: %s", r, err)
		}
		if nbytes != size {
			t.Fatalf("WriteRune(0x%x) expected %d, got %d", r, size, nbytes)
		}
		n += size
	}
	b = b[0:n]

	// Check the resulting bytes
	if !Equal(buf.Bytes(), b) {
		t.Fatalf("incorrect result from WriteRune: %q not %q", buf.Bytes(), b)
	}

	// Read it back with ReadRune
	for r := 0; r < NRune; r++ {
		size := utf8.EncodeRune(r, b)
		nr, nbytes, err := buf.ReadRune()
		if nr != r || nbytes != size || err != nil {
			t.Fatalf("ReadRune(0x%x) got 0x%x,%d not 0x%x,%d (err=%s)", r, nr, nbytes, r, size, err)
		}
	}
}
Esempio n. 3
0
func (p *Trie) outputDot(vec *vector.StringVector, rune int, serial int64, rgen *rand.Rand) {
	this := make([]byte, 10)
	child := make([]byte, 10)

	utf8.EncodeRune(this, rune)

	thisChar := string(this[0])

	if serial == -1 {
		thisChar = "root"
	}

	for childRune, childNode := range p.children {
		utf8.EncodeRune(child, childRune)
		childSerial := rgen.Int63()
		childNodeStr := fmt.Sprintf("\"%s(%d)\"", string(child[0]), childSerial)
		var notation string

		if string(child[0]) == "/" {
			notation = fmt.Sprintf("[label=\"%s\" shape=box color=red]", string(child[0]))
		} else {
			notation = fmt.Sprintf("[label=\"%s\"]", string(child[0]))
		}
		vec.Push(fmt.Sprintf("\t%s %s\n\t\"%s(%d)\" -> \"%s(%d)\"", childNodeStr, notation, thisChar, serial, string(child[0]), childSerial))
		childNode.outputDot(vec, childRune, childSerial, rgen)
	}
}
Esempio n. 4
0
func TestRuneIO(t *testing.T) {
	const NRune = 1000
	// Built a test array while we write the data
	b := make([]byte, utf8.UTFMax*NRune)
	var buf Buffer
	n := 0
	for r := rune(0); r < NRune; r++ {
		size := utf8.EncodeRune(b[n:], r)
		nbytes, err := buf.WriteRune(r)
		if err != nil {
			t.Fatalf("WriteRune(%U) error: %s", r, err)
		}
		if nbytes != size {
			t.Fatalf("WriteRune(%U) expected %d, got %d", r, size, nbytes)
		}
		n += size
	}
	b = b[0:n]

	// Check the resulting bytes
	if !Equal(buf.Bytes(), b) {
		t.Fatalf("incorrect result from WriteRune: %q not %q", buf.Bytes(), b)
	}

	p := make([]byte, utf8.UTFMax)
	// Read it back with ReadRune
	for r := rune(0); r < NRune; r++ {
		size := utf8.EncodeRune(p, r)
		nr, nbytes, err := buf.ReadRune()
		if nr != r || nbytes != size || err != nil {
			t.Fatalf("ReadRune(%U) got %U,%d not %U,%d (err=%s)", r, nr, nbytes, r, size, err)
		}
	}

	// Check that UnreadRune works
	buf.Reset()
	buf.Write(b)
	for r := rune(0); r < NRune; r++ {
		r1, size, _ := buf.ReadRune()
		if err := buf.UnreadRune(); err != nil {
			t.Fatalf("UnreadRune(%U) got error %q", r, err)
		}
		r2, nbytes, err := buf.ReadRune()
		if r1 != r2 || r1 != r || nbytes != size || err != nil {
			t.Fatalf("ReadRune(%U) after UnreadRune got %U,%d not %U,%d (err=%s)", r, r2, nbytes, r, size, err)
		}
	}
}
Esempio n. 5
0
// WriteRune writes a single Unicode code point, returning
// the number of bytes written and any error.
func (b *Writer) WriteRune(r rune) (size int, err error) {
	if r < utf8.RuneSelf {
		err = b.WriteByte(byte(r))
		if err != nil {
			return 0, err
		}
		return 1, nil
	}
	if b.err != nil {
		return 0, b.err
	}
	n := b.Available()
	if n < utf8.UTFMax {
		if b.Flush(); b.err != nil {
			return 0, b.err
		}
		n = b.Available()
		if n < utf8.UTFMax {
			// Can only happen if buffer is silly small.
			return b.WriteString(string(r))
		}
	}
	size = utf8.EncodeRune(b.buf[b.n:], r)
	b.n += size
	return size, nil
}
Esempio n. 6
0
// Converts a single numerical html entity to a regular Go utf8-token.
func EntityToUtf8(entity string) string {
	var ok bool
	if ok = reg_entnamed.MatchString(entity); ok {
		return namedEntityToUtf8(entity[1 : len(entity)-1])
	}

	if ok = reg_entnumeric.MatchString(entity); !ok {
		return "&amp;" + entity[2:len(entity)-1] + ";"
	}

	var err os.Error
	var num int

	entity = entity[2 : len(entity)-1]
	if num, err = strconv.Atoi(entity); err != nil {
		return "&amp;#" + entity + ";"
	}

	var arr [4]byte
	if size := utf8.EncodeRune(arr[:], num); size == 0 {
		return "&amp;#" + entity + ";"
	}

	return string(arr[:])
}
Esempio n. 7
0
// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
// Precondition: src[0] == '&' && dst <= src.
func unescapeEntity(b []byte, dst, src int) (dst1, src1 int) {
	// TODO(nigeltao): Check that this entity substitution algorithm matches the spec:
	// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#consume-a-character-reference
	// TODO(nigeltao): Handle things like "&#20013;" or "&#x4e2d;".

	// i starts at 1 because we already know that s[0] == '&'.
	i, s := 1, b[src:]
	for i < len(s) {
		c := s[i]
		i++
		// Lower-cased characters are more common in entities, so we check for them first.
		if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
			continue
		}
		if c != ';' {
			i--
		}
		x := entity[string(s[1:i])]
		if x != 0 {
			return dst + utf8.EncodeRune(x, b[dst:]), src + i
		}
		break
	}
	dst1, src1 = dst+i, src+i
	copy(b[dst:dst1], b[src:src1])
	return dst1, src1
}
// Extract regular text from the beginning of the pattern,
// possibly after a leading iBOT.
// That text can be used by doExecute to speed up matching.
func (re *Regexp) setPrefix() {
	var b []byte
	var utf = make([]byte, utf8.UTFMax)
	var inst *instr
	// First instruction is start; skip that.  Also skip any initial iBOT.
	inst = re.inst[0].next
	for inst.kind == iBOT {
		inst = inst.next
	}
Loop:
	for ; inst.kind != iEnd; inst = inst.next {
		// stop if this is not a char
		if inst.kind != iChar {
			break
		}
		// stop if this char can be followed by a match for an empty string,
		// which includes closures, ^, and $.
		switch inst.next.kind {
		case iBOT, iEOT, iAlt:
			break Loop
		}
		n := utf8.EncodeRune(utf, inst.char)
		b = append(b, utf[0:n]...)
	}
	// point prefixStart instruction to first non-CHAR after prefix
	re.prefixStart = inst
	re.prefixBytes = b
	re.prefix = string(b)
}
Esempio n. 9
0
// Map returns a copy of the byte array s with all its characters modified
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.  The characters in s and the
// output are interpreted as UTF-8-encoded Unicode code points.
func Map(mapping func(rune int) int, s []byte) []byte {
	// In the worst case, the array can grow when mapped, making
	// things unpleasant.  But it's so rare we barge in assuming it's
	// fine.  It could also shrink but that falls out naturally.
	maxbytes := len(s) // length of b
	nbytes := 0        // number of bytes encoded in b
	b := make([]byte, maxbytes)
	for i := 0; i < len(s); {
		wid := 1
		rune := int(s[i])
		if rune >= utf8.RuneSelf {
			rune, wid = utf8.DecodeRune(s[i:])
		}
		rune = mapping(rune)
		if rune >= 0 {
			if nbytes+utf8.RuneLen(rune) > maxbytes {
				// Grow the buffer.
				maxbytes = maxbytes*2 + utf8.UTFMax
				nb := make([]byte, maxbytes)
				copy(nb, b[0:nbytes])
				b = nb
			}
			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], rune)
		}
		i += wid
	}
	return b[0:nbytes]
}
Esempio n. 10
0
// Map returns a copy of the string s with all its characters modified
// according to the mapping function.
func Map(mapping func(rune int) int, s string) string {
	// In the worst case, the string can grow when mapped, making
	// things unpleasant.  But it's so rare we barge in assuming it's
	// fine.  It could also shrink but that falls out naturally.
	maxbytes := len(s) // length of b
	nbytes := 0        // number of bytes encoded in b
	b := make([]byte, maxbytes)
	for _, c := range s {
		rune := mapping(c)
		wid := 1
		if rune >= utf8.RuneSelf {
			wid = utf8.RuneLen(rune)
		}
		if nbytes+wid > maxbytes {
			// Grow the buffer.
			maxbytes = maxbytes*2 + utf8.UTFMax
			nb := make([]byte, maxbytes)
			for i, c := range b[0:nbytes] {
				nb[i] = c
			}
			b = nb
		}
		nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes])
	}
	return string(b[0:nbytes])
}
Esempio n. 11
0
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(rune uint32) {
	bn := rb.nbyte
	sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
	rb.nbyte += uint8(sz)
	rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0}
	rb.nrune++
}
Esempio n. 12
0
// Inserts a character in the cursor position.
func (b *buffer) insertRune(rune int) os.Error {
	var useRefresh bool

	b.grow(b.size + 1) // Check if there is free space for one more character

	// Avoid a full update of the line.
	if b.pos == b.size {
		char := make([]byte, utf8.UTFMax)
		utf8.EncodeRune(char, rune)

		if _, err := output.Write(char); err != nil {
			return outputError(err.String())
		}
	} else {
		useRefresh = true
		copy(b.data[b.pos+1:b.size+1], b.data[b.pos:b.size])
	}

	b.data[b.pos] = rune
	b.pos++
	b.size++

	if useRefresh {
		return b.refresh()
	}
	return nil
}
Esempio n. 13
0
// Returns a slice of the contents of the buffer.
func (b *buffer) toBytes() []byte {
	chars := make([]byte, b.size*utf8.UTFMax)
	var end, runeLen int

	// === Each character (as integer) is encoded to []byte
	for i := 0; i < b.size; i++ {
		if i != 0 {
			runeLen = utf8.EncodeRune(chars[end:], b.data[i])
			end += runeLen
		} else {
			runeLen = utf8.EncodeRune(chars, b.data[i])
			end = runeLen
		}
	}
	return chars[:end]
}
Esempio n. 14
0
// Extract regular text from the beginning of the pattern.
// That text can be used by doExecute to speed up matching.
func (re *Regexp) setPrefix() {
	var b []byte
	var utf = make([]byte, utf8.UTFMax)
	// First instruction is start; skip that.
	i := re.inst.At(0).(instr).next().index()
Loop:
	for i < re.inst.Len() {
		inst := re.inst.At(i).(instr)
		// stop if this is not a char
		if inst.kind() != _CHAR {
			break
		}
		// stop if this char can be followed by a match for an empty string,
		// which includes closures, ^, and $.
		switch re.inst.At(inst.next().index()).(instr).kind() {
		case _BOT, _EOT, _ALT:
			break Loop
		}
		n := utf8.EncodeRune(inst.(*_Char).char, utf)
		b = bytes.Add(b, utf[0:n])
		i = inst.next().index()
	}
	// point prefixStart instruction to first non-CHAR after prefix
	re.prefixStart = re.inst.At(i).(instr)
	re.prefixBytes = b
	re.prefix = string(b)
}
Esempio n. 15
0
// fmtC formats a rune for the 'c' format.
func (p *pp) fmtC(c int64) {
	rune := int(c) // Check for overflow.
	if int64(rune) != c {
		rune = utf8.RuneError
	}
	w := utf8.EncodeRune(p.runeBuf[0:utf8.UTFMax], rune)
	p.fmt.pad(p.runeBuf[0:w])
}
Esempio n. 16
0
func (p *pp) add(c int) {
	if c < utf8.RuneSelf {
		p.buf.WriteByte(byte(c))
	} else {
		w := utf8.EncodeRune(c, &p.runeBuf)
		p.buf.Write(p.runeBuf[0:w])
	}
}
Esempio n. 17
0
func (S *StringBuffer) AppendStr(s string) *StringBuffer {
	// fmt.Printf("append: %c", ch)
	for _, ch := range s {
		w := utf8.EncodeRune(S.bytes[S.index:], ch)
		S.index += w
	}
	return S
}
Esempio n. 18
0
// WriteRune appends the UTF-8 encoding of Unicode
// code point r to the buffer, returning its length and
// an error, which is always nil but is included
// to match bufio.Writer's WriteRune.
func (b *Buffer) WriteRune(r rune) (n int, err error) {
	if r < utf8.RuneSelf {
		b.WriteByte(byte(r))
		return 1, nil
	}
	n = utf8.EncodeRune(b.runeBytes[0:], r)
	b.Write(b.runeBytes[0:n])
	return n, nil
}
Esempio n. 19
0
func urlquoter(c int, safe string) []byte {
	safe_bytes := strings.Bytes(safe);
	c_bytes := make([]byte, utf8.RuneLen(c));
	utf8.EncodeRune(c, c_bytes);
	if bytes.Index(safe_bytes, c_bytes) != -1 || bytes.Index(always_safe, c_bytes) != -1 {
		return c_bytes;
	}
	else {
		return strings.Bytes(fmt.Sprintf("%%%02X", c));
	}
	panic("unreachable");
}
Esempio n. 20
0
func (r *DelimReader) Read(p []byte) (n int, err os.Error) {
	bytes_written := 0

	if r.remainder != nil {
		for i := 0; i < len(r.remainder); i++ {
			p[i] = r.remainder[i]
			bytes_written++
		}
		r.remainder = nil
	}

	for bytes_written < len(p) {
		rune, size, err := r.reader.ReadRune()
		if err != nil {
			return bytes_written, err
		}
		for _, value := range r.delimiters {
			if value == rune {
				rune = r.used_delimiter
				size = utf8.RuneLen(rune)
			}
		}
		if bytes_written+size > len(p) {
			// we need to split the rune and hold on to the remainder
			writable := len(p) - bytes_written
			target := make([]byte, size)
			_ = utf8.EncodeRune(target, rune)
			for i := 0; i < writable; i++ {
				p[bytes_written] = target[i]
				bytes_written++
			}
			r.remainder = target[writable:]
		} else {
			target := p[bytes_written : bytes_written+size]
			_ = utf8.EncodeRune(target, rune)
			bytes_written += size
		}
	}
	return bytes_written, nil
}
Esempio n. 21
0
/*
Capitalizes the first character of the value.

Example:

	{value|capfirst}

If value is "neste", the output will be "Neste".
*/
func CapFirstFormatter(w io.Writer, formatter string, data ...interface{}) {
	b := getBytes(data...)

	if len(b) > 0 {
		rune, size := utf8.DecodeRune(b)
		rune = unicode.ToUpper(rune)
		capSize := utf8.RuneLen(rune)
		capb := make([]byte, capSize)
		utf8.EncodeRune(capb, rune)
		w.Write(capb)
		w.Write(b[size:])
	}
}
Esempio n. 22
0
// Parses the next rune and checks to see if its in a given range
func pRange(ranges []unicode.Range, result *string, src []byte, i *int) bool {
	rune, size := utf8.DecodeRune(src[i : i+utf8.UTF8Max])
	if unicode.Is(ranges, rune) {
		buf := make([]byte, size)
		utf8.EncodeRune(rune, buf)
		*result = string(buf) // return resulting rune
		*i += size            // Update index
		//src = src[size:len(src)]; // Update slice
		return true
	}
	// No match
	return false
}
Esempio n. 23
0
func runesToString(runes []int) string {
	length := 0
	for _, v := range runes {
		length += utf8.RuneLen(v)
	}
	data := make([]byte, length)
	cur := data
	for _, v := range runes {
		rlen := utf8.RuneLen(v)
		utf8.EncodeRune(cur[0:rlen], v)
		cur = cur[rlen:]
	}
	return string(data)
}
Esempio n. 24
0
// Internal output-building function used by Members()
func (p *Trie) buildMembers(prefix string) *vector.StringVector {
	strList := new(vector.StringVector)

	if p.leaf {
		strList.Push(prefix)
	}

	// for each child, go grab all suffixes
	for rune, child := range p.children {
		buf := make([]byte, 4)
		numChars := utf8.EncodeRune(buf, rune)
		strList.AppendVector(child.buildMembers(prefix + string(buf[0:numChars])))
	}

	return strList
}
Esempio n. 25
0
func (h *Hyphenator) hyphenateWord(s, hyphen string) string {
	testStr := `.` + s + `.`
	v := make([]int, utf8.RuneCountInString(testStr))
	vIndex := 0
	for pos, _ := range testStr {
		t := testStr[pos:]
		strs, values := h.patterns.AllSubstringsAndValues(t)
		for i := 0; i < values.Len(); i++ {
			str := strs.At(i)
			val := values.At(i).(*vector.IntVector)

			diff := val.Len() - len(str)
			vs := v[vIndex-diff:]

			for i := 0; i < val.Len(); i++ {
				if val.At(i) > vs[i] {
					vs[i] = val.At(i)
				}
			}
		}
		vIndex++
	}

	var outstr string

	// trim the values for the beginning and ending dots
	markers := v[1 : len(v)-1]
	mIndex := 0
	u := make([]byte, 4)
	for _, ch := range s {
		l := utf8.EncodeRune(ch, u)
		outstr += string(u[0:l])
		// don't hyphenate between (or after) the last two characters of a string
		if mIndex < len(markers)-2 {
			// hyphens are inserted on odd values, skipped on even ones
			if markers[mIndex]%2 != 0 {
				outstr += hyphen
			}
		}
		mIndex++
	}

	return outstr
}
Esempio n. 26
0
// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
// If there is no change, it returns the input, otherwise it returns a slice
// backed by a new array.
// http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
func decodeCSS(s []byte) []byte {
	i := bytes.IndexByte(s, '\\')
	if i == -1 {
		return s
	}
	// The UTF-8 sequence for a codepoint is never longer than 1 + the
	// number hex digits need to represent that codepoint, so len(s) is an
	// upper bound on the output length.
	b := make([]byte, 0, len(s))
	for len(s) != 0 {
		i := bytes.IndexByte(s, '\\')
		if i == -1 {
			i = len(s)
		}
		b, s = append(b, s[:i]...), s[i:]
		if len(s) < 2 {
			break
		}
		// http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
		// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
		if isHex(s[1]) {
			// http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
			//   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
			j := 2
			for j < len(s) && j < 7 && isHex(s[j]) {
				j++
			}
			r := hexDecode(s[1:j])
			if r > unicode.MaxRune {
				r, j = r/16, j-1
			}
			n := utf8.EncodeRune(b[len(b):cap(b)], r)
			// The optional space at the end allows a hex
			// sequence to be followed by a literal hex.
			// string(decodeCSS([]byte(`\A B`))) == "\nB"
			b, s = b[:len(b)+n], skipCSSSpace(s[j:])
		} else {
			// `\\` decodes to `\` and `\"` to `"`.
			_, n := utf8.DecodeRune(s[1:])
			b, s = append(b, s[1:1+n]...), s[1+n:]
		}
	}
	return b
}
Esempio n. 27
0
func (n *trieNode) insert(r rune, value uint16) {
	var p [utf8.UTFMax]byte
	sz := utf8.EncodeRune(p[:], r)

	for i := 0; i < sz; i++ {
		if n.leaf {
			log.Fatalf("triegen: insert: node (%#v) should not be a leaf", n)
		}
		nn := n.table[p[i]]
		if nn == nil {
			nn = newNode()
			nn.b = p[i]
			n.table[p[i]] = nn
		}
		n = nn
	}
	n.value = int(value)
	n.leaf = true
}
Esempio n. 28
0
// Converts a single numerical html entity to a regular Go utf-token.
//    ex: "&#9827;" -> "♣"
func HtmlToUTF8(entity string) string {
	// Make sure we have a valid entity: &#123;
	ok := reg_entity.MatchString(entity)
	if !ok {
		return ""
	}

	// Convert entity to number
	num, err := strconv.Atoi(entity[2 : len(entity)-1])
	if err != nil {
		return ""
	}

	var arr [3]byte
	size := utf8.EncodeRune(num, &arr)
	if size == 0 {
		return ""
	}

	return string(&arr)
}
Esempio n. 29
0
// Map returns a copy of the string s with all its characters modified
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement.
func Map(mapping func(rune) rune, s string) string {
	// In the worst case, the string can grow when mapped, making
	// things unpleasant.  But it's so rare we barge in assuming it's
	// fine.  It could also shrink but that falls out naturally.
	maxbytes := len(s) // length of b
	nbytes := 0        // number of bytes encoded in b
	// The output buffer b is initialized on demand, the first
	// time a character differs.
	var b []byte

	for i, c := range s {
		r := mapping(c)
		if b == nil {
			if r == c {
				continue
			}
			b = make([]byte, maxbytes)
			nbytes = copy(b, s[:i])
		}
		if r >= 0 {
			wid := 1
			if r >= utf8.RuneSelf {
				wid = utf8.RuneLen(r)
			}
			if nbytes+wid > maxbytes {
				// Grow the buffer.
				maxbytes = maxbytes*2 + utf8.UTFMax
				nb := make([]byte, maxbytes)
				copy(nb, b[0:nbytes])
				b = nb
			}
			nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
		}
	}
	if b == nil {
		return s
	}
	return string(b[0:nbytes])
}
Esempio n. 30
0
func (h *Hyphenator) Hyphenate(s, hyphen string) (string, bool) {
	var sc scanner.Scanner
	sc.Init(strings.NewReader(s))
	sc.Mode = scanner.ScanIdents
	sc.Whitespace = 0

	var outstr string

	tok := sc.Scan()
	for tok != scanner.EOF {
		switch tok {
		case scanner.Ident:
			// a word (or part thereof) to hyphenate
			t := sc.TokenText()

			// try the exceptions first
			exc := h.exceptions[t]
			if len(exc) != 0 {
				if hyphen != `-` {
					strings.Replace(exc, `-`, hyphen, -1)
				}
				return exc, true
			}

			// not an exception, hyphenate normally
			outstr += h.hyphenateWord(sc.TokenText(), hyphen)
		default:
			// A Unicode rune to append to the output
			p := make([]byte, utf8.UTFMax)
			l := utf8.EncodeRune(tok, p)
			outstr += string(p[0:l])
		}

		tok = sc.Scan()
	}

	return outstr, true
}