예제 #1
0
파일: neologd.go 프로젝트: ikawaha/x
func (n NeologdNormalizer) EliminateSpace(s string) string {
	var (
		b    bytes.Buffer
		prev rune
	)
	for p := 0; p < len(s); {
		c, w := utf8.DecodeRuneInString(s[p:])
		p += w
		if !unicode.IsSpace(c) {
			b.WriteRune(c)
			prev = c
			continue
		}
		for p < len(s) {
			c0, w0 := utf8.DecodeRuneInString(s[p:])
			p += w0
			if !unicode.IsSpace(c0) {
				if unicode.In(prev, unicode.Latin, latinSymbols) &&
					unicode.In(c0, unicode.Latin, latinSymbols) {
					b.WriteRune(' ')
				}
				b.WriteRune(c0)
				prev = c0
				break
			}
		}

	}
	return b.String()
}
예제 #2
0
파일: ndb.go 프로젝트: pombredanne/ndb
// bufio.Scanner function to split data by words and quoted strings
func scanStrings(data []byte, atEOF bool) (advance int, token []byte, err error) {
	// Skip leading spaces.
	start := 0
	for width := 0; start < len(data); start += width {
		var r rune
		r, width = utf8.DecodeRune(data[start:])
		if !unicode.IsSpace(r) {
			break
		}
	}

	if atEOF && len(data) == 0 {
		return 0, nil, nil
	}

	// Scan until space, marking end of word.
	inquote := false
	for width, i := 0, start; i < len(data); i += width {
		var r rune
		r, width = utf8.DecodeRune(data[i:])
		if r == '"' {
			inquote = !inquote
			continue
		}
		if unicode.IsSpace(r) && !inquote {
			return i + width, data[start:i], nil
		}
	}
	// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
	if atEOF && len(data) > start {
		return len(data), data[start:], nil
	}
	// Request more data.
	return 0, nil, nil
}
예제 #3
0
파일: scan_test.go 프로젝트: kostyll/gccpy
// Test white space table matches the Unicode definition.
func TestSpace(t *testing.T) {
	for r := rune(0); r <= utf8.MaxRune; r++ {
		if IsSpace(r) != unicode.IsSpace(r) {
			t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r))
		}
	}
}
예제 #4
0
파일: ctx.go 프로젝트: koron/nvcheck
// top returns offset to start of an match.
func (c *ctx) top(tail int, w string) int {
	for len(w) > 0 {
		if tail <= 0 {
			debug.Printf("over backtrack: w=%q", w)
			return -1
		}
		wr, wn := utf8.DecodeLastRuneInString(w)
		cr, cn := utf8.DecodeLastRuneInString(c.content[:tail])
		tail -= cn
		if unicode.IsSpace(wr) {
			if !unicode.IsSpace(cr) {
				// no spaces which required.
				debug.Printf("not space: tail=%d w=%q cr=%q", tail, w, cr)
				return -1
			}
			w = w[:len(w)-wn]
			continue
		}
		if unicode.IsSpace(cr) {
			continue
		}
		w = w[:len(w)-wn]
		if cr != wr {
			// didn't match runes.
			debug.Printf("not match: tail=%d w=%q cr=%q wr=%q",
				tail, w, cr, wr)
			return -1
		}
	}
	return tail
}
예제 #5
0
파일: rom.go 프로젝트: petrdel/scraper
// scanWords is a split function for a Scanner that returns each
// space-separated word of text, with surrounding spaces deleted. It will
// never return an empty string. The definition of space is set by
// unicode.IsSpace.
func scanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
	// Skip leading spaces.
	start := 0
	for width := 0; start < len(data); start += width {
		var r rune
		r, width = utf8.DecodeRune(data[start:])
		if !unicode.IsSpace(r) {
			break
		}
	}
	quote := false
	// Scan until space, marking end of word.
	for width, i := 0, start; i < len(data); i += width {
		var r rune
		r, width = utf8.DecodeRune(data[i:])
		switch {
		case i == 0 && r == '"':
			quote = true
		case !quote && unicode.IsSpace(r):
			return i + width, data[start:i], nil
		case quote && r == '"':
			return i + width, data[start+width : i], nil
		}
	}
	// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
	if atEOF && len(data) > start {
		return len(data), data[start:], nil
	}
	// Request more data.
	return start, nil, nil
}
예제 #6
0
// scanWordsKeepPrefix is a split function for a Scanner that returns each
// space-separated word of text, with prefixing spaces included. It will never
// return an empty string. The definition of space is set by unicode.IsSpace.
//
// Adapted from bufio.ScanWords().
func scanTokensKeepPrefix(data []byte, atEOF bool) (advance int, token []byte, err error) {
	// Skip leading spaces.
	start := 0
	for width := 0; start < len(data); start += width {
		var r rune
		r, width = utf8.DecodeRune(data[start:])
		if !unicode.IsSpace(r) {
			break
		}
	}
	if atEOF && len(data) == 0 || start == len(data) {
		return len(data), data, nil
	}
	if len(data) > start && data[start] == '#' {
		return scanLinesKeepPrefix(data, atEOF)
	}
	// Scan until space, marking end of word.
	for width, i := 0, start; i < len(data); i += width {
		var r rune
		r, width = utf8.DecodeRune(data[i:])
		if unicode.IsSpace(r) {
			return i, data[:i], nil
		}
	}
	// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
	if atEOF && len(data) > start {
		return len(data), data, nil
	}
	// Request more data.
	return 0, nil, nil
}
예제 #7
0
//converts a string into a slice of strings.  symbols and contiguous strings of any other type
//are returned as individual elements.  all whitespace is excluded
func getTokens(value string) []string {
	var buffer []rune
	var result []string
	chars := []rune(value)
	for i, r := range chars {
		if !unicode.IsLetter(r) && !unicode.IsNumber(r) && !unicode.IsDigit(r) && !unicode.IsSpace(r) {
			if len(buffer) > 0 {
				result = append(result, string(buffer))
				buffer = nil
			}
			result = append(result, string(r))
		} else if unicode.IsSpace(r) {
			if len(buffer) > 0 {
				result = append(result, string(buffer))
			}
			buffer = nil
		} else {
			buffer = append(buffer, r)
			if i == len(chars)-1 {
				result = append(result, string(buffer))
			}
		}
	}
	return result
}
예제 #8
0
파일: cursor.go 프로젝트: pombredanne/vigo
// Move cursor forward to beginning of the previous word.
// Skips the rest of the current word, if any, unless is located at its
// first character. Returns true if the move was successful, false if EOF reached.
func (c *Cursor) PrevWord() bool {
	isNotSpace := func(r rune) bool {
		return !unicode.IsSpace(r)
	}
	for {
		// Skip space until we find a word character.
		// Re-try if we reached beginning-of-line.
		if !c.PrevRuneFunc(isNotSpace) {
			return false
		}
		if !c.BOL() {
			break
		}
	}
	r, _ := c.RuneBefore()
	if isNotSpace(r) {
		// Lowercase word motion differentiates words consisting of
		// (A-Z0-9_) and any other non-whitespace character. Skip until
		// we find either the other word type or whitespace.
		if utils.IsWord(r) {
			c.PrevRuneFunc(func(r rune) bool {
				return !utils.IsWord(r) || unicode.IsSpace(r)
			})
		} else {
			c.PrevRuneFunc(func(r rune) bool {
				return utils.IsWord(r) || unicode.IsSpace(r)
			})
		}
	}
	return !c.BOL()
}
예제 #9
0
파일: repl.go 프로젝트: mytchel/pass
func splitSections(s string) (sections []string) {
	var i, j int
	var quote bool = false
	var section string

	i = 0
	for i < len(s) {
		section = ""
		for j = i; j < len(s); j++ {
			if s[j] == '\'' {
				quote = !quote
			} else if unicode.IsSpace(rune(s[j])) && !quote {
				break
			} else {
				section = section + string(s[j])
			}
		}

		sections = append(sections, section)

		for i = j; i < len(s); i++ {
			if !unicode.IsSpace(rune(s[i])) {
				break
			}
		}
	}

	return sections
}
예제 #10
0
func (self *TextPreview) Render(context *Context, writer *utils.XMLWriter) (err error) {
	if len(self.PlainText) < self.MaxLength {
		writer.Content(self.PlainText)
	} else {
		shortLength := self.ShortLength
		if shortLength == 0 {
			shortLength = self.MaxLength
		}

		// If in the middle of a word, go back to space before it
		for shortLength > 0 && !unicode.IsSpace(rune(self.PlainText[shortLength-1])) {
			shortLength--
		}

		// If in the middle of space, go back to word before it
		for shortLength > 0 && unicode.IsSpace(rune(self.PlainText[shortLength-1])) {
			shortLength--
		}

		writer.Content(self.PlainText[:shortLength])
		writer.Content("... ")
		if self.MoreLink != nil {
			writer.OpenTag("a")
			writer.Attrib("href", self.MoreLink.URL(context.PathArgs...))
			writer.AttribIfNotDefault("title", self.MoreLink.LinkTitle(context))
			content := self.MoreLink.LinkContent(context)
			if content != nil {
				err = content.Render(context, writer)
			}
			writer.ForceCloseTag() // a
		}
	}
	return err
}
예제 #11
0
// The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
func parseHealthConfig(rest string) (*Node, map[string]bool, error) {
	// Find end of first argument
	var sep int
	for ; sep < len(rest); sep++ {
		if unicode.IsSpace(rune(rest[sep])) {
			break
		}
	}
	next := sep
	for ; next < len(rest); next++ {
		if !unicode.IsSpace(rune(rest[next])) {
			break
		}
	}

	if sep == 0 {
		return nil, nil, nil
	}

	typ := rest[:sep]
	cmd, attrs, err := parseMaybeJSON(rest[next:])
	if err != nil {
		return nil, nil, err
	}

	return &Node{Value: typ, Next: cmd, Attributes: attrs}, nil, err
}
예제 #12
0
func anagram(word1 string, word2 string) bool {
	// make a map containing the number of appearances for each rune
	// (go's encoding-agnostic abstraction of characters)
	// in both strings, and compare them: if they match, then word1 and
	// word2 are anagrams of each other

	// initialize empty maps/dictionaries/hashes that map runes to
	// integers; these are our rune-count dicts for each word
	chars1 := make(map[rune]int)
	chars2 := make(map[rune]int)

	// range gives (int-index, rune) pairs for strings: this is a foreach
	// loop
	for _, c := range strings.ToLower(word1) {
		// discarding spaces makes the function more flexible, so
		// it can check whether two PHRASES, not just two WORDS,
		// are anagrams of each other
		if !unicode.IsSpace(c) {
			// default int value in golang is 0, so this is safe
			chars1[c] = chars1[c] + 1
		}
	}

	for _, c := range strings.ToLower(word2) {
		if !unicode.IsSpace(c) {
			chars2[c] = chars2[c] + 1
		}
	}

	return reflect.DeepEqual(chars1, chars2)
}
예제 #13
0
func (self *TextPreview) Render(ctx *Context) (err error) {
	if len(self.PlainText) < self.MaxLength {
		ctx.Response.XML.Content(self.PlainText)
	} else {
		shortLength := self.ShortLength
		if shortLength == 0 {
			shortLength = self.MaxLength
		}

		// If in the middle of a word, go back to space before it
		for shortLength > 0 && !unicode.IsSpace(rune(self.PlainText[shortLength-1])) {
			shortLength--
		}

		// If in the middle of space, go back to word before it
		for shortLength > 0 && unicode.IsSpace(rune(self.PlainText[shortLength-1])) {
			shortLength--
		}

		ctx.Response.XML.Content(self.PlainText[:shortLength])
		ctx.Response.XML.Content("... ")
		if self.MoreLink != nil {
			ctx.Response.XML.OpenTag("a")
			ctx.Response.XML.Attrib("href", self.MoreLink.URL(ctx))
			ctx.Response.XML.AttribIfNotDefault("title", self.MoreLink.LinkTitle(ctx))
			content := self.MoreLink.LinkContent(ctx)
			if content != nil {
				err = content.Render(ctx)
			}
			ctx.Response.XML.CloseTagAlways() // a
		}
	}
	return err
}
예제 #14
0
func count(in *bufio.Reader) (nl, nw, nr, nc int, err error) {
	inword := false
	for {
		var r rune
		var sz int
		r, sz, err = in.ReadRune()
		if err == io.EOF {
			err = nil
			break
		}
		if err != nil {
			return
		}

		nr++
		nc += sz
		if r == '\n' {
			nl++
		}
		if unicode.IsSpace(r) && inword {
			inword = false
			nw++
		} else if !unicode.IsSpace(r) {
			inword = true
		}
	}
	return
}
예제 #15
0
func lexPrivmsg(l *LogLexer) stateFn {

	for i := 0; ; i++ {
		l.buf.ignoreWhile(func(r rune) bool {
			return unicode.IsSpace(r) && r != '\n'
		})

		n := l.buf.acceptWhile(func(r rune) bool {
			return r != utf8.RuneError && !unicode.IsSpace(r)
		})
		if n > 0 {
			l.emit(markov.TokWord)
		}

		r := l.buf.peek()
		switch {
		case r == '\n':
			l.emit(markov.TokEOL)
			l.buf.next()
			l.buf.ignoreToken()
			l.newline()
			return lexDate
		case r == utf8.RuneError:
			l.errorfEOFValid(nil)
		}
	}
	panic("not reached")
}
예제 #16
0
// Fields splits the string s around each instance of one or more consecutive white space
// characters, returning an array of substrings of s or an empty list if s contains only white space.
func Fields(s string) []string {
	n := 0
	inField := false
	for _, rune := range s {
		wasInField := inField
		inField = !unicode.IsSpace(rune)
		if inField && !wasInField {
			n++
		}
	}

	a := make([]string, n)
	na := 0
	fieldStart := -1
	for i, rune := range s {
		if unicode.IsSpace(rune) {
			if fieldStart >= 0 {
				a[na] = s[fieldStart:i]
				na++
				fieldStart = -1
			}
		} else if fieldStart == -1 {
			fieldStart = i
		}
	}
	if fieldStart != -1 {
		a[na] = s[fieldStart:]
		na++
	}
	return a[0:na]
}
예제 #17
0
// Fields splits the array s around each instance of one or more consecutive white space
// characters, returning a slice of subarrays of s or an empty list if s contains only white space.
func Fields(s []byte) [][]byte {
	n := 0
	inField := false
	for i := 0; i < len(s); {
		rune, size := utf8.DecodeRune(s[i:])
		wasInField := inField
		inField = !unicode.IsSpace(rune)
		if inField && !wasInField {
			n++
		}
		i += size
	}

	a := make([][]byte, n)
	na := 0
	fieldStart := -1
	for i := 0; i <= len(s) && na < n; {
		rune, size := utf8.DecodeRune(s[i:])
		if fieldStart < 0 && size > 0 && !unicode.IsSpace(rune) {
			fieldStart = i
			i += size
			continue
		}
		if fieldStart >= 0 && (size == 0 || unicode.IsSpace(rune)) {
			a[na] = s[fieldStart:i]
			na++
			fieldStart = -1
		}
		if size == 0 {
			break
		}
		i += size
	}
	return a[0:na]
}
예제 #18
0
파일: binres.go 프로젝트: Luci4r/mobile
// poolTrim trims all but immediately surrounding space.
// \n\t\tfoobar\n\t\t becomes \tfoobar\n
func poolTrim(s string) string {
	var start, end int
	for i, r := range s {
		if !unicode.IsSpace(r) {
			if i != 0 {
				start = i - 1 // preserve preceding space
			}
			break
		}
	}

	for i := len(s) - 1; i >= 0; i-- {
		r := rune(s[i])
		if !unicode.IsSpace(r) {
			if i != len(s)-1 {
				end = i + 2
			}
			break
		}
	}

	if start == 0 && end == 0 {
		return "" // every char was a space
	}

	return s[start:end]
}
예제 #19
0
파일: kilt.go 프로젝트: robertkrimen/dogbag
// GraveTrim
func GraveTrim(target string) string {
	// Discard \r? Go already does this for raw string literals.
	end := len(target)

	last := 0
	index := 0
	for index = 0; index < end; index++ {
		chr := rune(target[index])
		if chr == '\n' || !unicode.IsSpace(chr) {
			last = index
			break
		}
	}
	if index >= end {
		return ""
	}
	start := last
	if rune(target[start]) == '\n' {
		// Skip the leading newline
		start++
	}

	last = end - 1
	for index = last; index > start; index-- {
		chr := rune(target[index])
		if chr == '\n' || !unicode.IsSpace(chr) {
			last = index
			break
		}
	}
	stop := last
	result := target[start : stop+1]
	return result
}
예제 #20
0
파일: util.go 프로젝트: zond/hackyhack
func SplitVerb(s string) (verb, rest string) {
	state := splitStateVerb
	verbBuf := &bytes.Buffer{}
	restBuf := &bytes.Buffer{}
	for _, r := range s {
		switch state {
		case splitStateVerb:
			if unicode.IsSpace(r) {
				state = splitStateWhite
			} else {
				io.WriteString(verbBuf, string([]rune{r}))
			}
		case splitStateWhite:
			if !unicode.IsSpace(r) {
				state = splitStateRest
				io.WriteString(restBuf, string([]rune{r}))
			}
		case splitStateRest:
			io.WriteString(restBuf, string([]rune{r}))
		}
	}
	verb = verbBuf.String()
	rest = restBuf.String()
	return
}
예제 #21
0
func upperWordLetterPairs(runes []rune) ([]runeBigram, int) {
	limit := len(runes) - 1
	if limit < 1 {
		return make([]runeBigram, 0), 0
	}
	bigrams := make([]runeBigram, limit)
	var a rune
	var b rune
	numPairs := 0
	for i := 0; i < limit; i++ {
		a = runes[i]
		b = runes[i+1]
		if unicode.IsSpace(b) {
			i++
			continue
		}
		if unicode.IsSpace(a) {
			continue
		}
		bigrams[numPairs] = runeBigram{rA: unicode.ToUpper(a), rB: unicode.ToUpper(b)}
		numPairs++
	}
	bigrams = bigrams[0:numPairs]
	return bigrams, numPairs
}
예제 #22
0
func (ctx *textifyTraverseCtx) emit(data string) error {
	if len(data) == 0 {
		return nil
	}
	lines := ctx.breakLongLines(data)
	var err error
	for _, line := range lines {
		runes := []rune(line)
		startsWithSpace := unicode.IsSpace(runes[0])
		if !startsWithSpace && !ctx.endsWithSpace {
			ctx.Buf.WriteByte(' ')
			ctx.lineLength++
		}
		ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1])
		for _, c := range line {
			_, err = ctx.Buf.WriteString(string(c))
			if err != nil {
				return err
			}
			ctx.lineLength++
			if c == '\n' {
				ctx.lineLength = 0
				if ctx.prefix != "" {
					_, err = ctx.Buf.WriteString(ctx.prefix)
					if err != nil {
						return err
					}
				}
			}
		}
	}
	return nil
}
예제 #23
0
// Trim returns a slice of the string s, with all leading and trailing white space
// removed, as defined by Unicode.
func TrimSpace(s string) string {
	start, end := 0, len(s)
	for start < end {
		wid := 1
		rune := int(s[start])
		if rune >= utf8.RuneSelf {
			rune, wid = utf8.DecodeRuneInString(s[start:end])
		}
		if !unicode.IsSpace(rune) {
			break
		}
		start += wid
	}
	for start < end {
		wid := 1
		rune := int(s[end-1])
		if rune >= utf8.RuneSelf {
			// Back up carefully looking for beginning of rune. Mustn't pass start.
			for wid = 2; start <= end-wid && !utf8.RuneStart(s[end-wid]); wid++ {
			}
			if start > end-wid { // invalid UTF-8 sequence; stop processing
				return s[start:end]
			}
			rune, wid = utf8.DecodeRuneInString(s[end-wid : end])
		}
		if !unicode.IsSpace(rune) {
			break
		}
		end -= wid
	}
	return s[start:end]
}
예제 #24
0
func beautify(line string) string {

	buf := new(bytes.Buffer)
	lineRune := []rune(line)

	for i, current := range lineRune {
		if i == 0 {
			buf.WriteString(string(current))
			continue
		}
		previous := lineRune[i-1]

		// chinese english char appears alternatively, when english char is not a space and
		// chinese char is not a punctuation, insert a whitespace.
		if isEnglish(previous) && isChinese(current) {
			if !unicode.IsSpace(previous) && !isPunctuation(string(current)) {
				buf.WriteString(insertionChar)
			}
		} else if isChinese(previous) && isEnglish(current) {
			if !isPunctuation(string(previous)) && !unicode.IsSpace(current) {
				buf.WriteString(insertionChar)
			}
		}

		buf.WriteString(string(current))
	}

	return buf.String()
}
예제 #25
0
파일: schema.go 프로젝트: antifuchs/saypi
func scanStmts(data []byte, atEOF bool) (advance int, token []byte, err error) {
	// Skip leading spaces.
	start := 0
	for width := 0; start < len(data); start += width {
		var r rune
		r, width = utf8.DecodeRune(data[start:])
		if !unicode.IsSpace(r) {
			break
		}
	}
	if atEOF && len(data) == 0 {
		return 0, nil, nil
	}

	end := start
	// Scan until semicolon, marking end of statement.
	for width, i := 0, start; i < len(data); i += width {
		var r rune
		r, width = utf8.DecodeRune(data[i:])
		if r == ';' {
			return i + width, data[start:i], nil
		} else if !unicode.IsSpace(r) {
			end = i + 1
		}
	}
	// If we're at EOF, we have a final, non-empty, non-terminated statement. Return it.
	if atEOF && len(data) > start {
		return len(data), data[start:end], nil
	}
	// Request more data.
	return 0, nil, nil
}
예제 #26
0
파일: keymap.go 프로젝트: r7kamura/peco
func handleForwardWord(i *Input, _ termbox.Event) {
	if i.caretPos >= len(i.query) {
		return
	}

	foundSpace := false
	for pos := i.caretPos; pos < len(i.query); pos++ {
		r := i.query[pos]
		if foundSpace {
			if !unicode.IsSpace(r) {
				i.caretPos = pos
				i.DrawMatches(nil)
				return
			}
		} else {
			if unicode.IsSpace(r) {
				foundSpace = true
			}
		}
	}

	// not found. just move to the end of the buffer
	i.caretPos = len(i.query)
	i.DrawMatches(nil)

}
예제 #27
0
파일: fmt_test.go 프로젝트: vsayer/go
func TestIsSpace(t *testing.T) {
	// This tests the internal isSpace function.
	// IsSpace = isSpace is defined in export_test.go.
	for i := rune(0); i <= unicode.MaxRune; i++ {
		if IsSpace(i) != unicode.IsSpace(i) {
			t.Errorf("isSpace(%U) = %v, want %v", i, IsSpace(i), unicode.IsSpace(i))
		}
	}
}
예제 #28
0
파일: lex.go 프로젝트: goods/tmpl
func lexInsideDelims(l *lexer) lexerState {
	for {
		rest := l.data[l.pos:]
		//lex the inside tokens that dont change state
		for _, delim := range insideDelims {
			if bytes.HasPrefix(rest, delim.value) {
				l.pos += len(delim.value)

				//if we have a keyword, check that the next letter
				//either is a space or a close delim follows it
				if !unicode.IsSpace(l.peek()) &&
					!bytes.HasPrefix(l.data[l.pos:], closeDelim.value) {
					//theres more than just a keyword so back up
					l.pos -= len(delim.value)
					continue
				}

				l.emit(delim.typ)
				return lexInsideDelims
			}
		}

		//check for things that start selectors
		for _, delim := range selDelims {
			if bytes.HasPrefix(rest, delim.value) {
				l.emit(tokenStartSel)
				return lexInsideSel
			}
		}

		//check for a close delim
		if bytes.HasPrefix(rest, closeDelim.value) {
			return lexCloseDelim
		}

		switch r := l.next(); {
		case r == eof || r == '\n' || r == '\r':
			return l.errorf("unclosed action")
		case unicode.IsSpace(r):
			l.advance()
		//remove letter/number literals
		/*
			case r == '+' || r == '-' || '0' <= r && r <= '9':
				l.backup()
				return lexNumber
			case r == '"':
				l.advance()
				return lexValue
		*/
		case unicode.IsLetter(r) || r == '_': //go spec
			return lexIdentifier
		default:
			return l.errorf("invalid character: %q", r)
		}
	}
	return nil
}
예제 #29
0
func (self *scanner) nextWord() (word tok, err os.Error) {
	if self.index >= len(self.content) {
		err = os.NewError("EOF")
		return
	}

	for self.index < len(self.content) {
		r, l := utf8.DecodeRune(self.content[self.index:])
		if !unicode.IsSpace(r) || r == '\n' {
			break
		}
		self.index += l
	}
	j, ttype, inchar, incode := self.index, other, false, 0
	for self.index < len(self.content) {
		r, l := utf8.DecodeRune(self.content[self.index:])
		if r == '\'' {
			inchar = !inchar
		}
		if self.index == j {
			switch {
			case unicode.IsUpper(r):
				ttype = nonterm
			case r == '\n':
				self.index++
				ttype = newline
				break
			case r == ':':
				ttype = begindef
			case r == ';':
				ttype = enddef
			case r == '|':
				ttype = alternate
			case r == '{' && memorizeTerms:
				incode++
				ttype = code
			default:
				ttype = term
			}
		} else if incode > 0 && r == '{' {
			incode++
		} else if incode > 0 && r == '}' {
			incode--
		}
		if incode == 0 && !inchar && unicode.IsSpace(r) {
			break
		}
		self.index += l
	}
	token := string(self.content[j:self.index])
	if ttype == newline {
		token = ""
	}
	word = tok{token, ttype}
	return
}
예제 #30
0
func (w *wordsStruct) addChar(ch rune) {
	if unicode.IsSpace(ch) && w.inWord {
		if len(w.word) != 0 {
			w.words = append(w.words, w.word)
			w.word = ""
			w.inWord = false
		}
	} else if !unicode.IsSpace(ch) {
		w.addRawChar(ch)
	}
}