Golang Regexp.Split 예제들

프로그래밍 언어: Golang

네임스페이스/패키지 이름: regexp

클래스/타입: Regexp

메소드/함수: Split

hotexamples.com에서의 예제들: 7

Golang Regexp.Split - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Golang의 regexp.Regexp.Split에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

SubexpNames(30)

String(30)

MatchString(30)

Match(30)

FindAllStringSubmatch(30)

FindStringSubmatch(30)

ReplaceAllString(22)

FindString(20)

FindSubmatch(20)

FindAllString(19)

FindIndex(15)

FindStringIndex(14)

FindAllStringIndex(14)

FindSubmatchIndex(9)

ReplaceAllStringFunc(8)

FindAllIndex(8)

Split(7)

FindAllStringSubmatchIndex(6)

Find(6)

LiteralPrefix(5)

ReplaceAllFunc(5)

NumSubexp(4)

ReplaceAll(3)

FindAllSubmatch(3)

ReplaceAllLiteralString(2)

FindStringSubmatchIndex(2)

FindAllSubmatchIndex(2)

ExpandString(2)

MatchReader(1)

Longest(1)

ExecuteString(1)

FindAll(1)

AllMatchesStringIter(1)

예제 #1

파일 보기

파일: tokenizer.go 프로젝트: ruianderson/shield

func PerformTokenization(text string, splitToken *regexp.Regexp) (words map[string]int64) {
	words = make(map[string]int64)
	for _, w := range splitToken.Split(text, -1) {
		if len(w) > 2 {
			words[strings.ToLower(w)]++
		}
	}
	return
}

예제 #2

파일 보기

파일: jieba.go 프로젝트: kennylixi/jiebago

func Cut(sentence string, cut_all bool, HMM bool) []string {
	result := make([]string, 0)
	var re_han, re_skip *regexp.Regexp
	if cut_all {
		re_han = regexp.MustCompile(`\p{Han}+`)
		re_skip = regexp.MustCompile(`[^[:alnum:]+#\n]`)
	} else {
		re_han = regexp.MustCompile(`([\p{Han}+[:alnum:]+#&\._]+)`)
		re_skip = regexp.MustCompile(`(\r\n|\s)`)
	}
	blocks := RegexpSplit(re_han, sentence)
	var cut_block cutAction
	if HMM {
		cut_block = cut_DAG
	} else {
		cut_block = cut_DAG_NO_HMM
	}
	if cut_all {
		cut_block = cut_All
	}
	for _, blk := range blocks {
		if len(blk) == 0 {
			continue
		}
		if re_han.MatchString(blk) {
			for _, word := range cut_block(blk) {
				result = append(result, word)
			}
		} else {
			type skipSplitFunc func(sentence string) []string
			var ssf skipSplitFunc
			if cut_all {
				ssf = func(sentence string) []string {
					return re_skip.Split(sentence, -1)
				}
			} else {
				ssf = func(sentence string) []string {
					return RegexpSplit(re_skip, sentence)
				}
			}

			for _, x := range ssf(blk) {
				if re_skip.MatchString(x) {
					result = append(result, x)
				} else if !cut_all {
					for _, xx := range x {
						result = append(result, string(xx))
					}
				} else {
					result = append(result, x)
				}
			}
		}
	}
	return result
}

예제 #3

파일 보기

파일: parse.go 프로젝트: toma63/parse

// given a channel of lines, split into tokens given an re object
// results go to an output chan of strings
func SplitTokenizer(split_re *regexp.Regexp, lines <-chan string, tokens chan<- string) {

	for line := range lines {
		for _, token := range split_re.Split(line, -1) {
			if token == "" {
				continue
			}
			tokens <- token
		}
	}
	close(tokens)
}

예제 #4

파일 보기

파일: tabular.go 프로젝트: allenbhuiyan/distributive

// SeparateString is an abstraction of stringToSlice that takes two kinds of
// separators, and splits a string into a 2D slice based on those separators
func SeparateString(rowSep *regexp.Regexp, colSep *regexp.Regexp, str string) (output Table) {
	lines := rowSep.Split(str, -1)
	for _, line := range lines {
		rawRow := colSep.Split(line, -1)
		row := []string{}
		for _, cell := range rawRow {
			row = append(row, strings.TrimSpace(cell))
		}
		if len(row) > 0 && HasNonEmpty(row) {
			output = append(output, row)
		}
	}
	return output
}

예제 #5

파일 보기

파일: probabalisticSplit.go 프로젝트: allenbhuiyan/distributive

// getColumnRegex is the core of the logic. It determines which regex most
// accurately splits the data into columns by testing the deviation in the
// row lengths using different regexps.
func getColumnRegex(str string, rowSep *regexp.Regexp) *regexp.Regexp {
	// matchesMost is used to ensure that our regexp actually is splitting the
	// lines of a table, instead of just returning them whole.
	matchesMost := func(re *regexp.Regexp, rows []string) bool {
		count := 0
		for _, row := range rows {
			if re.MatchString(row) {
				count++
			}
		}
		return count >= (len(rows) / 2)
	}
	// getRowLengths returns row length counts for each table
	getRowLengths := func(table Table) (lengths []int) {
		for _, row := range table {
			lengths = append(lengths, len(row))
		}
		return lengths
	}
	// getVariance returns the variance of the split provided by a regexp,
	// after discarding a number of outliers
	getVariance := func(colSep *regexp.Regexp, outliers int) float64 {
		table := SeparateString(rowSep, colSep, str)
		rowLengths := getRowLengths(table)
		for i := 0; i < outliers; i++ {
			rowLengths = chauvenet(rowLengths)
		}
		return variance(rowLengths)
	}
	// testRegexp determines whether or not a given regexp gives perfectly even
	// line lengths, including discarding of a number of outliers
	testRegexp := func(colSep *regexp.Regexp, outliers int) bool {
		for i := 0; i < outliers; i++ {
			variance := getVariance(colSep, i)
			if variance <= .1 {
				return true
			}
		}
		return false
	}
	// different column separators to try out
	initialColSeps := []*regexp.Regexp{
		regexp.MustCompile(`\t+`),    // tabs
		regexp.MustCompile(`\s{4}`),  // exactly four whitespaces
		regexp.MustCompile(`\s{2,}`), // two+ whitespace (spaces in cols)
		regexp.MustCompile(`\s+`),    // any whitespace
	}
	// filter regexps that have no matches at all - they will always return
	// rows of even length (length 1).
	colSeps := []*regexp.Regexp{}
	rows := rowSep.Split(str, -1)
	for _, re := range initialColSeps {
		if matchesMost(re, rows) {
			colSeps = append(colSeps, re)
		}
	}
	if len(colSeps) < 1 {
		log.WithFields(log.Fields{
			"attempted": initialColSeps,
			"table":     str,
		}).Warn("ProbabalisticSplit couldn't find a column separator.")
		colSeps = initialColSeps
	}
	// discarding up to passes outliers, test each regexp for row length
	// consistency
	passes := 3
	for i := 0; i < passes; i++ {
		for _, re := range colSeps {
			if testRegexp(re, i) {
				return re
			}
		}
	}
	// if still not done, just pick the one with the lowest variance
	log.WithFields(log.Fields{
		"attempted": initialColSeps,
		"outliers":  passes,
	}).Debug("ProbabalisticSplit couldn't find a consistent regexp")
	var variances []float64
	for _, colSep := range colSeps {
		variances = append(variances, getVariance(colSep, passes))
	}
	// ensure that index can be found in tables
	minVarianceIndex := extremaIndex(minFunc, variances)
	if len(colSeps) <= minVarianceIndex {
		msg := "Internal error: minVarianceIndex couldn't be found in colSeps"
		log.WithFields(log.Fields{
			"index":   minVarianceIndex,
			"colSeps": colSeps,
		}).Fatal(msg)
	}
	return colSeps[minVarianceIndex]
}

예제 #6

파일 보기

파일: glin.go 프로젝트: raff/glin

func main() {
	version := flag.Bool("version", false, "print version and exit")
	quote := flag.Bool("quote", false, "quote returned fields")
	unquote := flag.Bool("unquote", false, "quote returned fields")
	ifs := flag.String("ifs", " ", "input field separator")
	ire := flag.String("ifs-re", "", "input field separator (as regular expression)")
	ofs := flag.String("ofs", " ", "output field separator")
	re := flag.String("re", "", "regular expression for parsing input")
	grep := flag.String("grep", "", "output only lines that match the regular expression")
	format := flag.String("printf", "", "output is formatted according to specified format")
	matches := flag.String("matches", "", "return status code 100 if any line matches the specified pattern, 101 otherwise")
	after := flag.String("after", "", "process fields in line after specified tag")
	afterline := flag.String("after-line", "", "process lines after lines that matches")
	afterlinen := flag.Int("after-linen", 0, "process lines after n lines")
	printline := flag.Bool("line", false, "print line numbers")
	debug := flag.Bool("debug", false, "print debug info")

	flag.Parse()

	if *version {
		extra := ""
		if gitCommit != "" {
			extra = fmt.Sprintf(" (%.4v %v)", gitCommit, buildDate)
		}

		fmt.Printf("%s version %s%v\n", path.Base(os.Args[0]), VERSION, extra)
		return
	}

	pos := make([]Pos, len(flag.Args()))

	for i, arg := range flag.Args() {
		pos[i].Set(arg)
	}

	if len(*format) > 0 && !strings.HasSuffix(*format, "\n") {
		*format += "\n"
	}

	var split_re *regexp.Regexp
	var split_pattern *regexp.Regexp
	var match_pattern *regexp.Regexp
	var grep_pattern *regexp.Regexp
	status_code := OK

	if len(*matches) > 0 {
		match_pattern = regexp.MustCompile(*matches)
		status_code = MATCH_NOT_FOUND
	}

	if len(*grep) > 0 {
		grep_pattern = regexp.MustCompile(*grep)
	}

	if len(*re) > 0 {
		split_pattern = regexp.MustCompile(*re)
	}

	if len(*ire) > 0 {
		split_re = regexp.MustCompile(*ire)
	}

	scanner := bufio.NewScanner(os.Stdin)
	len_after := len(*after)
	len_afterline := len(*afterline)
	lineno := 0

	for scanner.Scan() {
		if scanner.Err() != nil {
			log.Fatal(scanner.Err())
		}

		line := scanner.Text()

		lineno += 1

		if *afterlinen >= lineno {
			continue
		}

		if len_afterline > 0 {
			if strings.Contains(line, *afterline) {
				len_afterline = 0
			}

			continue
		}

		if len_after > 0 {
			i := strings.Index(line, *after)
			if i < 0 {
				continue // no match
			}

			line = line[i+len_after:]
		}

		fields := []string{line} // $0 is the full line

		if grep_pattern != nil {
			if matches := grep_pattern.FindStringSubmatch(line); matches != nil {
				fields = matches
			} else {
				continue
			}
		} else if split_pattern != nil {
			if matches := split_pattern.FindStringSubmatch(line); matches != nil {
				fields = matches
			}
		} else if split_re != nil {
			// split line according to input regular expression
			fields = append(fields, split_re.Split(line, -1)...)
		} else if *ifs == " " {
			// split line on spaces (compact multiple spaces)
			fields = append(fields, SPACES.Split(strings.TrimSpace(line), -1)...)
		} else {
			// split line according to input field separator
			fields = append(fields, strings.Split(line, *ifs)...)
		}

		if *debug {
			log.Printf("input fields: %q\n", fields)
			if len(pos) > 0 {
				log.Printf("output fields: %q\n", pos)
			}
		}

		var result []string

		// do some processing
		if len(pos) > 0 {
			result = make([]string, 0)

			for _, p := range pos {
				result = append(result, Slice(fields, p)...)
			}
		} else {
			result = fields[1:]
		}

		if *unquote {
			result = Unquote(result)
		}

		if *quote {
			result = Quote(result)
		}

		if *printline {
			fmt.Printf("%d: ", lineno)
		}

		if len(*format) > 0 {
			Print(*format, result)
		} else {
			// join the result according to output field separator
			fmt.Println(strings.Join(result, *ofs))
		}

		if match_pattern != nil && match_pattern.MatchString(line) {
			status_code = MATCH_FOUND
		}
	}

	os.Exit(status_code)
}

예제 #7

파일 보기

파일: parents.go 프로젝트: feiquanbifeng/go-projects

// Public method to provider API
// Actually you could not pass method's parameters
func Parents(args ...string) []string {
	var (
		isWindows = runtime.GOOS == "windows"
		reg       *regexp.Regexp
		init      array.Array
		cwd       string
		sep       string
		c         string
	)
	if n := len(args); n == 0 {
		cwd, _ = os.Getwd()
	} else if n == 1 {
		cwd = args[0]
	} else {
		cwd = args[0]
		isWindows = strings.HasPrefix(args[1], "win")
	}

	if isWindows {
		c = `[\\\/]`
		init = array.Array{""}
	} else {
		c = `/`
		init = array.Array{"/"}
	}

	reg = regexp.MustCompile(c)

	var join = func(x, y interface{}) array.Array {
		tmpArray := array.Array{x, y}
		var ps = tmpArray.Filter(func(p interface{}, args ...interface{}) bool {
			switch p.(type) {
			case string:
				if p.(string) != "" {
					return true
				}
				return false
			default:
				return false
			}
			return false
		})
		if isWindows {
			sep = "\\"
		} else {
			sep = "/"
		}
		return array.Array{path.Clean(ps.Join(sep))}
	}

	var res = path.Clean(cwd)
	arr := array.Array{}
	for _, v := range reg.Split(res, -1) {
		arr.Push(v)
	}

	arrReduce := arr.Reduce(func(acc, dir interface{}, ix ...interface{}) interface{} {
		tmpAcc := acc.(array.Array)
		index := ix[0].(int)
		tmpAcc = tmpAcc.Concat(join(tmpAcc[index], dir))
		return tmpAcc
	}, init)

	// Not like javascript support method links
	// everytime you should assign
	arrSlice := arrReduce.(array.Array)
	arrReverse := arrSlice.Slice(1, 0)
	arrReverse.Reverse()
	if len(arrReverse) >= 2 {
		if arrReverse[0] == arrReverse[1] {
			return []string{arrReverse[0].(string)}
		}
	}

	if isWindows && strings.HasPrefix(cwd, "\\") {
		cut := arrReverse.Slice(0, -1)
		cut.Map(func(d interface{}, args ...interface{}) interface{} {
			var ch = d.(string)[0]
			if ch == '\\' {
				return d
			} else if ch == '.' {
				return "\\" + d.(string)[1:]
			} else {
				return "\\" + d.(string)
			}
		})
		return cut.ToString()
	}
	return arrReverse.ToString()
}