Exemplo n.º 1
Arquivo: neologd.go Projeto: ikawaha/x
func (n NeologdNormalizer) EliminateSpace(s string) string {
	var (
		b    bytes.Buffer
		prev rune
	for p := 0; p < len(s); {
		c, w := utf8.DecodeRuneInString(s[p:])
		p += w
		if !unicode.IsSpace(c) {
			prev = c
		for p < len(s) {
			c0, w0 := utf8.DecodeRuneInString(s[p:])
			p += w0
			if !unicode.IsSpace(c0) {
				if unicode.In(prev, unicode.Latin, latinSymbols) &&
					unicode.In(c0, unicode.Latin, latinSymbols) {
					b.WriteRune(' ')
				prev = c0

	return b.String()
Exemplo n.º 2
// bufio.Scanner function to split data by words and quoted strings
func scanStrings(data []byte, atEOF bool) (advance int, token []byte, err error) {
	// Skip leading spaces.
	start := 0
	for width := 0; start < len(data); start += width {
		var r rune
		r, width = utf8.DecodeRune(data[start:])
		if !unicode.IsSpace(r) {

	if atEOF && len(data) == 0 {
		return 0, nil, nil

	// Scan until space, marking end of word.
	inquote := false
	for width, i := 0, start; i < len(data); i += width {
		var r rune
		r, width = utf8.DecodeRune(data[i:])
		if r == '"' {
			inquote = !inquote
		if unicode.IsSpace(r) && !inquote {
			return i + width, data[start:i], nil
	// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
	if atEOF && len(data) > start {
		return len(data), data[start:], nil
	// Request more data.
	return 0, nil, nil
Exemplo n.º 3
// Test white space table matches the Unicode definition.
func TestSpace(t *testing.T) {
	for r := rune(0); r <= utf8.MaxRune; r++ {
		if IsSpace(r) != unicode.IsSpace(r) {
			t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r))
Exemplo n.º 4
Arquivo: ctx.go Projeto: koron/nvcheck
// top returns offset to start of an match.
func (c *ctx) top(tail int, w string) int {
	for len(w) > 0 {
		if tail <= 0 {
			debug.Printf("over backtrack: w=%q", w)
			return -1
		wr, wn := utf8.DecodeLastRuneInString(w)
		cr, cn := utf8.DecodeLastRuneInString(c.content[:tail])
		tail -= cn
		if unicode.IsSpace(wr) {
			if !unicode.IsSpace(cr) {
				// no spaces which required.
				debug.Printf("not space: tail=%d w=%q cr=%q", tail, w, cr)
				return -1
			w = w[:len(w)-wn]
		if unicode.IsSpace(cr) {
		w = w[:len(w)-wn]
		if cr != wr {
			// didn't match runes.
			debug.Printf("not match: tail=%d w=%q cr=%q wr=%q",
				tail, w, cr, wr)
			return -1
	return tail
Exemplo n.º 5
// scanWords is a split function for a Scanner that returns each
// space-separated word of text, with surrounding spaces deleted. It will
// never return an empty string. The definition of space is set by
// unicode.IsSpace.
func scanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
	// Skip leading spaces.
	start := 0
	for width := 0; start < len(data); start += width {
		var r rune
		r, width = utf8.DecodeRune(data[start:])
		if !unicode.IsSpace(r) {
	quote := false
	// Scan until space, marking end of word.
	for width, i := 0, start; i < len(data); i += width {
		var r rune
		r, width = utf8.DecodeRune(data[i:])
		switch {
		case i == 0 && r == '"':
			quote = true
		case !quote && unicode.IsSpace(r):
			return i + width, data[start:i], nil
		case quote && r == '"':
			return i + width, data[start+width : i], nil
	// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
	if atEOF && len(data) > start {
		return len(data), data[start:], nil
	// Request more data.
	return start, nil, nil
Exemplo n.º 6
// scanWordsKeepPrefix is a split function for a Scanner that returns each
// space-separated word of text, with prefixing spaces included. It will never
// return an empty string. The definition of space is set by unicode.IsSpace.
// Adapted from bufio.ScanWords().
func scanTokensKeepPrefix(data []byte, atEOF bool) (advance int, token []byte, err error) {
	// Skip leading spaces.
	start := 0
	for width := 0; start < len(data); start += width {
		var r rune
		r, width = utf8.DecodeRune(data[start:])
		if !unicode.IsSpace(r) {
	if atEOF && len(data) == 0 || start == len(data) {
		return len(data), data, nil
	if len(data) > start && data[start] == '#' {
		return scanLinesKeepPrefix(data, atEOF)
	// Scan until space, marking end of word.
	for width, i := 0, start; i < len(data); i += width {
		var r rune
		r, width = utf8.DecodeRune(data[i:])
		if unicode.IsSpace(r) {
			return i, data[:i], nil
	// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
	if atEOF && len(data) > start {
		return len(data), data, nil
	// Request more data.
	return 0, nil, nil
Exemplo n.º 7
//converts a string into a slice of strings.  symbols and contiguous strings of any other type
//are returned as individual elements.  all whitespace is excluded
func getTokens(value string) []string {
	var buffer []rune
	var result []string
	chars := []rune(value)
	for i, r := range chars {
		if !unicode.IsLetter(r) && !unicode.IsNumber(r) && !unicode.IsDigit(r) && !unicode.IsSpace(r) {
			if len(buffer) > 0 {
				result = append(result, string(buffer))
				buffer = nil
			result = append(result, string(r))
		} else if unicode.IsSpace(r) {
			if len(buffer) > 0 {
				result = append(result, string(buffer))
			buffer = nil
		} else {
			buffer = append(buffer, r)
			if i == len(chars)-1 {
				result = append(result, string(buffer))
	return result
Exemplo n.º 8
// Move cursor forward to beginning of the previous word.
// Skips the rest of the current word, if any, unless is located at its
// first character. Returns true if the move was successful, false if EOF reached.
func (c *Cursor) PrevWord() bool {
	isNotSpace := func(r rune) bool {
		return !unicode.IsSpace(r)
	for {
		// Skip space until we find a word character.
		// Re-try if we reached beginning-of-line.
		if !c.PrevRuneFunc(isNotSpace) {
			return false
		if !c.BOL() {
	r, _ := c.RuneBefore()
	if isNotSpace(r) {
		// Lowercase word motion differentiates words consisting of
		// (A-Z0-9_) and any other non-whitespace character. Skip until
		// we find either the other word type or whitespace.
		if utils.IsWord(r) {
			c.PrevRuneFunc(func(r rune) bool {
				return !utils.IsWord(r) || unicode.IsSpace(r)
		} else {
			c.PrevRuneFunc(func(r rune) bool {
				return utils.IsWord(r) || unicode.IsSpace(r)
	return !c.BOL()
Exemplo n.º 9
Arquivo: repl.go Projeto: mytchel/pass
func splitSections(s string) (sections []string) {
	var i, j int
	var quote bool = false
	var section string

	i = 0
	for i < len(s) {
		section = ""
		for j = i; j < len(s); j++ {
			if s[j] == '\'' {
				quote = !quote
			} else if unicode.IsSpace(rune(s[j])) && !quote {
			} else {
				section = section + string(s[j])

		sections = append(sections, section)

		for i = j; i < len(s); i++ {
			if !unicode.IsSpace(rune(s[i])) {

	return sections
Exemplo n.º 10
func (self *TextPreview) Render(context *Context, writer *utils.XMLWriter) (err error) {
	if len(self.PlainText) < self.MaxLength {
	} else {
		shortLength := self.ShortLength
		if shortLength == 0 {
			shortLength = self.MaxLength

		// If in the middle of a word, go back to space before it
		for shortLength > 0 && !unicode.IsSpace(rune(self.PlainText[shortLength-1])) {

		// If in the middle of space, go back to word before it
		for shortLength > 0 && unicode.IsSpace(rune(self.PlainText[shortLength-1])) {

		writer.Content("... ")
		if self.MoreLink != nil {
			writer.Attrib("href", self.MoreLink.URL(context.PathArgs...))
			writer.AttribIfNotDefault("title", self.MoreLink.LinkTitle(context))
			content := self.MoreLink.LinkContent(context)
			if content != nil {
				err = content.Render(context, writer)
			writer.ForceCloseTag() // a
	return err
Exemplo n.º 11
// The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
func parseHealthConfig(rest string) (*Node, map[string]bool, error) {
	// Find end of first argument
	var sep int
	for ; sep < len(rest); sep++ {
		if unicode.IsSpace(rune(rest[sep])) {
	next := sep
	for ; next < len(rest); next++ {
		if !unicode.IsSpace(rune(rest[next])) {

	if sep == 0 {
		return nil, nil, nil

	typ := rest[:sep]
	cmd, attrs, err := parseMaybeJSON(rest[next:])
	if err != nil {
		return nil, nil, err

	return &Node{Value: typ, Next: cmd, Attributes: attrs}, nil, err
Exemplo n.º 12
func anagram(word1 string, word2 string) bool {
	// make a map containing the number of appearances for each rune
	// (go's encoding-agnostic abstraction of characters)
	// in both strings, and compare them: if they match, then word1 and
	// word2 are anagrams of each other

	// initialize empty maps/dictionaries/hashes that map runes to
	// integers; these are our rune-count dicts for each word
	chars1 := make(map[rune]int)
	chars2 := make(map[rune]int)

	// range gives (int-index, rune) pairs for strings: this is a foreach
	// loop
	for _, c := range strings.ToLower(word1) {
		// discarding spaces makes the function more flexible, so
		// it can check whether two PHRASES, not just two WORDS,
		// are anagrams of each other
		if !unicode.IsSpace(c) {
			// default int value in golang is 0, so this is safe
			chars1[c] = chars1[c] + 1

	for _, c := range strings.ToLower(word2) {
		if !unicode.IsSpace(c) {
			chars2[c] = chars2[c] + 1

	return reflect.DeepEqual(chars1, chars2)
Exemplo n.º 13
func (self *TextPreview) Render(ctx *Context) (err error) {
	if len(self.PlainText) < self.MaxLength {
	} else {
		shortLength := self.ShortLength
		if shortLength == 0 {
			shortLength = self.MaxLength

		// If in the middle of a word, go back to space before it
		for shortLength > 0 && !unicode.IsSpace(rune(self.PlainText[shortLength-1])) {

		// If in the middle of space, go back to word before it
		for shortLength > 0 && unicode.IsSpace(rune(self.PlainText[shortLength-1])) {

		ctx.Response.XML.Content("... ")
		if self.MoreLink != nil {
			ctx.Response.XML.Attrib("href", self.MoreLink.URL(ctx))
			ctx.Response.XML.AttribIfNotDefault("title", self.MoreLink.LinkTitle(ctx))
			content := self.MoreLink.LinkContent(ctx)
			if content != nil {
				err = content.Render(ctx)
			ctx.Response.XML.CloseTagAlways() // a
	return err
Exemplo n.º 14
func count(in *bufio.Reader) (nl, nw, nr, nc int, err error) {
	inword := false
	for {
		var r rune
		var sz int
		r, sz, err = in.ReadRune()
		if err == io.EOF {
			err = nil
		if err != nil {

		nc += sz
		if r == '\n' {
		if unicode.IsSpace(r) && inword {
			inword = false
		} else if !unicode.IsSpace(r) {
			inword = true
Exemplo n.º 15
func lexPrivmsg(l *LogLexer) stateFn {

	for i := 0; ; i++ {
		l.buf.ignoreWhile(func(r rune) bool {
			return unicode.IsSpace(r) && r != '\n'

		n := l.buf.acceptWhile(func(r rune) bool {
			return r != utf8.RuneError && !unicode.IsSpace(r)
		if n > 0 {

		r := l.buf.peek()
		switch {
		case r == '\n':
			return lexDate
		case r == utf8.RuneError:
	panic("not reached")
Exemplo n.º 16
// Fields splits the string s around each instance of one or more consecutive white space
// characters, returning an array of substrings of s or an empty list if s contains only white space.
func Fields(s string) []string {
	n := 0
	inField := false
	for _, rune := range s {
		wasInField := inField
		inField = !unicode.IsSpace(rune)
		if inField && !wasInField {

	a := make([]string, n)
	na := 0
	fieldStart := -1
	for i, rune := range s {
		if unicode.IsSpace(rune) {
			if fieldStart >= 0 {
				a[na] = s[fieldStart:i]
				fieldStart = -1
		} else if fieldStart == -1 {
			fieldStart = i
	if fieldStart != -1 {
		a[na] = s[fieldStart:]
	return a[0:na]
Exemplo n.º 17
// Fields splits the array s around each instance of one or more consecutive white space
// characters, returning a slice of subarrays of s or an empty list if s contains only white space.
func Fields(s []byte) [][]byte {
	n := 0
	inField := false
	for i := 0; i < len(s); {
		rune, size := utf8.DecodeRune(s[i:])
		wasInField := inField
		inField = !unicode.IsSpace(rune)
		if inField && !wasInField {
		i += size

	a := make([][]byte, n)
	na := 0
	fieldStart := -1
	for i := 0; i <= len(s) && na < n; {
		rune, size := utf8.DecodeRune(s[i:])
		if fieldStart < 0 && size > 0 && !unicode.IsSpace(rune) {
			fieldStart = i
			i += size
		if fieldStart >= 0 && (size == 0 || unicode.IsSpace(rune)) {
			a[na] = s[fieldStart:i]
			fieldStart = -1
		if size == 0 {
		i += size
	return a[0:na]
Exemplo n.º 18
// poolTrim trims all but immediately surrounding space.
// \n\t\tfoobar\n\t\t becomes \tfoobar\n
func poolTrim(s string) string {
	var start, end int
	for i, r := range s {
		if !unicode.IsSpace(r) {
			if i != 0 {
				start = i - 1 // preserve preceding space

	for i := len(s) - 1; i >= 0; i-- {
		r := rune(s[i])
		if !unicode.IsSpace(r) {
			if i != len(s)-1 {
				end = i + 2

	if start == 0 && end == 0 {
		return "" // every char was a space

	return s[start:end]
Exemplo n.º 19
// GraveTrim
func GraveTrim(target string) string {
	// Discard \r? Go already does this for raw string literals.
	end := len(target)

	last := 0
	index := 0
	for index = 0; index < end; index++ {
		chr := rune(target[index])
		if chr == '\n' || !unicode.IsSpace(chr) {
			last = index
	if index >= end {
		return ""
	start := last
	if rune(target[start]) == '\n' {
		// Skip the leading newline

	last = end - 1
	for index = last; index > start; index-- {
		chr := rune(target[index])
		if chr == '\n' || !unicode.IsSpace(chr) {
			last = index
	stop := last
	result := target[start : stop+1]
	return result
Exemplo n.º 20
func SplitVerb(s string) (verb, rest string) {
	state := splitStateVerb
	verbBuf := &bytes.Buffer{}
	restBuf := &bytes.Buffer{}
	for _, r := range s {
		switch state {
		case splitStateVerb:
			if unicode.IsSpace(r) {
				state = splitStateWhite
			} else {
				io.WriteString(verbBuf, string([]rune{r}))
		case splitStateWhite:
			if !unicode.IsSpace(r) {
				state = splitStateRest
				io.WriteString(restBuf, string([]rune{r}))
		case splitStateRest:
			io.WriteString(restBuf, string([]rune{r}))
	verb = verbBuf.String()
	rest = restBuf.String()
Exemplo n.º 21
func upperWordLetterPairs(runes []rune) ([]runeBigram, int) {
	limit := len(runes) - 1
	if limit < 1 {
		return make([]runeBigram, 0), 0
	bigrams := make([]runeBigram, limit)
	var a rune
	var b rune
	numPairs := 0
	for i := 0; i < limit; i++ {
		a = runes[i]
		b = runes[i+1]
		if unicode.IsSpace(b) {
		if unicode.IsSpace(a) {
		bigrams[numPairs] = runeBigram{rA: unicode.ToUpper(a), rB: unicode.ToUpper(b)}
	bigrams = bigrams[0:numPairs]
	return bigrams, numPairs
Exemplo n.º 22
func (ctx *textifyTraverseCtx) emit(data string) error {
	if len(data) == 0 {
		return nil
	lines := ctx.breakLongLines(data)
	var err error
	for _, line := range lines {
		runes := []rune(line)
		startsWithSpace := unicode.IsSpace(runes[0])
		if !startsWithSpace && !ctx.endsWithSpace {
			ctx.Buf.WriteByte(' ')
		ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1])
		for _, c := range line {
			_, err = ctx.Buf.WriteString(string(c))
			if err != nil {
				return err
			if c == '\n' {
				ctx.lineLength = 0
				if ctx.prefix != "" {
					_, err = ctx.Buf.WriteString(ctx.prefix)
					if err != nil {
						return err
	return nil
Exemplo n.º 23
// Trim returns a slice of the string s, with all leading and trailing white space
// removed, as defined by Unicode.
func TrimSpace(s string) string {
	start, end := 0, len(s)
	for start < end {
		wid := 1
		rune := int(s[start])
		if rune >= utf8.RuneSelf {
			rune, wid = utf8.DecodeRuneInString(s[start:end])
		if !unicode.IsSpace(rune) {
		start += wid
	for start < end {
		wid := 1
		rune := int(s[end-1])
		if rune >= utf8.RuneSelf {
			// Back up carefully looking for beginning of rune. Mustn't pass start.
			for wid = 2; start <= end-wid && !utf8.RuneStart(s[end-wid]); wid++ {
			if start > end-wid { // invalid UTF-8 sequence; stop processing
				return s[start:end]
			rune, wid = utf8.DecodeRuneInString(s[end-wid : end])
		if !unicode.IsSpace(rune) {
		end -= wid
	return s[start:end]
Exemplo n.º 24
func beautify(line string) string {

	buf := new(bytes.Buffer)
	lineRune := []rune(line)

	for i, current := range lineRune {
		if i == 0 {
		previous := lineRune[i-1]

		// chinese english char appears alternatively, when english char is not a space and
		// chinese char is not a punctuation, insert a whitespace.
		if isEnglish(previous) && isChinese(current) {
			if !unicode.IsSpace(previous) && !isPunctuation(string(current)) {
		} else if isChinese(previous) && isEnglish(current) {
			if !isPunctuation(string(previous)) && !unicode.IsSpace(current) {


	return buf.String()
Exemplo n.º 25
func scanStmts(data []byte, atEOF bool) (advance int, token []byte, err error) {
	// Skip leading spaces.
	start := 0
	for width := 0; start < len(data); start += width {
		var r rune
		r, width = utf8.DecodeRune(data[start:])
		if !unicode.IsSpace(r) {
	if atEOF && len(data) == 0 {
		return 0, nil, nil

	end := start
	// Scan until semicolon, marking end of statement.
	for width, i := 0, start; i < len(data); i += width {
		var r rune
		r, width = utf8.DecodeRune(data[i:])
		if r == ';' {
			return i + width, data[start:i], nil
		} else if !unicode.IsSpace(r) {
			end = i + 1
	// If we're at EOF, we have a final, non-empty, non-terminated statement. Return it.
	if atEOF && len(data) > start {
		return len(data), data[start:end], nil
	// Request more data.
	return 0, nil, nil
Exemplo n.º 26
func handleForwardWord(i *Input, _ termbox.Event) {
	if i.caretPos >= len(i.query) {

	foundSpace := false
	for pos := i.caretPos; pos < len(i.query); pos++ {
		r := i.query[pos]
		if foundSpace {
			if !unicode.IsSpace(r) {
				i.caretPos = pos
		} else {
			if unicode.IsSpace(r) {
				foundSpace = true

	// not found. just move to the end of the buffer
	i.caretPos = len(i.query)

Exemplo n.º 27
func TestIsSpace(t *testing.T) {
	// This tests the internal isSpace function.
	// IsSpace = isSpace is defined in export_test.go.
	for i := rune(0); i <= unicode.MaxRune; i++ {
		if IsSpace(i) != unicode.IsSpace(i) {
			t.Errorf("isSpace(%U) = %v, want %v", i, IsSpace(i), unicode.IsSpace(i))
Exemplo n.º 28
Arquivo: lex.go Projeto: goods/tmpl
func lexInsideDelims(l *lexer) lexerState {
	for {
		rest := l.data[l.pos:]
		//lex the inside tokens that dont change state
		for _, delim := range insideDelims {
			if bytes.HasPrefix(rest, delim.value) {
				l.pos += len(delim.value)

				//if we have a keyword, check that the next letter
				//either is a space or a close delim follows it
				if !unicode.IsSpace(l.peek()) &&
					!bytes.HasPrefix(l.data[l.pos:], closeDelim.value) {
					//theres more than just a keyword so back up
					l.pos -= len(delim.value)

				return lexInsideDelims

		//check for things that start selectors
		for _, delim := range selDelims {
			if bytes.HasPrefix(rest, delim.value) {
				return lexInsideSel

		//check for a close delim
		if bytes.HasPrefix(rest, closeDelim.value) {
			return lexCloseDelim

		switch r := l.next(); {
		case r == eof || r == '\n' || r == '\r':
			return l.errorf("unclosed action")
		case unicode.IsSpace(r):
		//remove letter/number literals
			case r == '+' || r == '-' || '0' <= r && r <= '9':
				return lexNumber
			case r == '"':
				return lexValue
		case unicode.IsLetter(r) || r == '_': //go spec
			return lexIdentifier
			return l.errorf("invalid character: %q", r)
	return nil
Exemplo n.º 29
func (self *scanner) nextWord() (word tok, err os.Error) {
	if self.index >= len(self.content) {
		err = os.NewError("EOF")

	for self.index < len(self.content) {
		r, l := utf8.DecodeRune(self.content[self.index:])
		if !unicode.IsSpace(r) || r == '\n' {
		self.index += l
	j, ttype, inchar, incode := self.index, other, false, 0
	for self.index < len(self.content) {
		r, l := utf8.DecodeRune(self.content[self.index:])
		if r == '\'' {
			inchar = !inchar
		if self.index == j {
			switch {
			case unicode.IsUpper(r):
				ttype = nonterm
			case r == '\n':
				ttype = newline
			case r == ':':
				ttype = begindef
			case r == ';':
				ttype = enddef
			case r == '|':
				ttype = alternate
			case r == '{' && memorizeTerms:
				ttype = code
				ttype = term
		} else if incode > 0 && r == '{' {
		} else if incode > 0 && r == '}' {
		if incode == 0 && !inchar && unicode.IsSpace(r) {
		self.index += l
	token := string(self.content[j:self.index])
	if ttype == newline {
		token = ""
	word = tok{token, ttype}
Exemplo n.º 30
func (w *wordsStruct) addChar(ch rune) {
	if unicode.IsSpace(ch) && w.inWord {
		if len(w.word) != 0 {
			w.words = append(w.words, w.word)
			w.word = ""
			w.inWord = false
	} else if !unicode.IsSpace(ch) {