Exemple #1
0
func main() {
	runtime.GOMAXPROCS(4)
	bytes, err := ioutil.ReadFile("/dev/stdin")
	if err != nil {
		fmt.Fprintf(os.Stderr, "can't read input: %s\n", err)
		os.Exit(2)
	}
	ilen := len(bytes)
	// Delete the comment lines and newlines
	bytes = regexp.MustCompile("(>[^\n]+)?\n").ReplaceAll(bytes, []byte{})
	clen := len(bytes)

	mresults := make([]chan int, len(variants))
	for i, s := range variants {
		ch := make(chan int)
		mresults[i] = ch
		go func(ss string) {
			ch <- countMatches(ss, bytes)
		}(s)
	}

	lenresult := make(chan int)
	bb := bytes
	go func() {
		for _, sub := range substs {
			bb = regexp.MustCompile(sub.pat).ReplaceAll(bb, []byte(sub.repl))
		}
		lenresult <- len(bb)
	}()

	for i, s := range variants {
		fmt.Printf("%s %d\n", s, <-mresults[i])
	}
	fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult)
}
Exemple #2
0
func getBezLish(str string) string {
	str = strings.ToLower(str)
	reg := rubex.MustCompile(`[^a-zA-ZА-Яа-я0-9- ']`)
	str = reg.ReplaceAllString(str, "")
	reg = rubex.MustCompile(`[ ]{2,}`)
	str = reg.ReplaceAllString(str, " ")
	str = strings.Trim(str, " ")
	return str
}
Exemple #3
0
func getExp() {

	exp := make(map[string]string, 0)
	st, _ := ioutil.ReadFile("/home/serega/Загрузки/WordNet-3.0/dict/exp")
	lin := strings.Split(string(st), "\n")
	for _, word := range lin {
		lin1 := strings.Split(word, " ")
		i := 0
		zn := ""
		for _, word1 := range lin1 {
			reg := rubex.MustCompile(`_`)
			word1 = reg.ReplaceAllString(word1, " ")
			if i == 0 {
				zn = word1
			} else {
				exp[zn] = word1
				break
			}
			i++
		}
	}
	fmt.Println(len(exp))
	for k, v := range exp {
		_ = k
		_ = v
		fmt.Print(k)
		fmt.Print(" - ")
		fmt.Println(v)
	}
	var network bytes.Buffer
	enc := gob.NewEncoder(&network)
	_ = enc.Encode(exp)
	b := network.String()
	_ = ioutil.WriteFile("/home/serega/log/exp", []byte(b), 0644)
}
Exemple #4
0
func removStopWord(str string) string {
	st, _ := ioutil.ReadFile("/home/serega/log/stopWord200")
	lin0 := strings.Split(string(st), "\n")
	reg := rubex.MustCompile(`(?<=\ |\,|\.|\r|\n)the(?=\ |\,|\.|\r|\n)`)
	for _, word := range lin0 {
		lin1 := strings.Split(string(word), " ")
		reg := rubex.MustCompile(`(?<=\ |\,|\.|\r|\n)` + lin1[0] + `(?=\ |\,|\.|\r|\n)`)
		str = reg.ReplaceAllString(str, "")
		//str = strings.Split(string(str), " ")
	}
	reg = rubex.MustCompile(`[ ]{2,}`)
	str = reg.ReplaceAllString(str, " ")
	//str = reg.ReplaceAllString(str, "Е")
	//reg = rubex.MustCompile(`[ё]`)
	return str
}
Exemple #5
0
func allocate() []*rubex.Regexp {
	// some overlap in here, but it'll make the parsing functions clearer
	matchers := make([]*rubex.Regexp, 0, NUM_LEXEMES)
	for _, p := range pattern {
		matchers = append(matchers, rubex.MustCompile(`\A`+p))
	}
	return matchers
}
Exemple #6
0
func removeRN() {
	st, _ := ioutil.ReadFile("/home/serega/log/otvY")
	lin := string(st)
	reg := rubex.MustCompile(`\n\n`)
	lin = reg.ReplaceAllString(lin, "\n")
	err := ioutil.WriteFile("/home/serega/log/otvY1", []byte(lin), 0644)
	if err != nil {
		panic(err)
	}
}
Exemple #7
0
func RegSplit(text string, delimeter string) []string {
	reg := rubex.MustCompile(delimeter)
	indexes := reg.FindAllStringIndex(text, -1)
	laststart := 0
	result := make([]string, len(indexes)+1)
	for i, element := range indexes {
		result[i] = text[laststart:element[0]]
		laststart = element[1]
	}
	result[len(indexes)] = text[laststart:len(text)]
	return result
}
Exemple #8
0
func ispravText() {
	st, _ := ioutil.ReadFile("/home/serega/log/newRus1")
	lin := string(st)
	//fmt.Println(lin)
	//otv := prepereText(lin)
	reg := rubex.MustCompile(`[0-9]+(?=\r\n)`)
	otv := reg.ReplaceAllString(lin, "")
	err := ioutil.WriteFile("/home/serega/log/newRus12", []byte(otv), 0644)
	if err != nil {
		panic(err)
	}
}
Exemple #9
0
func prepText(str string) string {
	reg := rubex.MustCompile(`(?<=[\[\,])[0-9\.]*(?=[\]\,])`)
	str = reg.ReplaceAllString(str, `""`)
	reg = rubex.MustCompile(`(true|false)`)
	str = reg.ReplaceAllString(str, `""`)
	//	reg = regexp.MustCompile(`\[[0-9\,\.]*,`)
	//	str = reg.ReplaceAllString(str, "")
	//	reg = regexp.MustCompile(`,[0-9\,\.]*,`)
	//	str = reg.ReplaceAllString(str, "")
	//fmt.Println(str)
	//reg := rubex.MustCompile()
	//	indexes := RegSplit(str, `(?<=[\[\,]\")[a-zA-ZА-Яа-я 0-9\.\,\\\:\']{3,}(?=\")`)
	//	for _, v1 := range indexes {
	//		//reg := rubex.MustCompile(v1)
	//		//str = reg.ReplaceAllString(str, ``)
	//		str = strings.Replace(str, v1, "\r\n", -1)
	//		//fmt.Println(v1)
	//	}

	return str
}
Exemple #10
0
func countMatches(pat string, bytes []byte) int {
	re := regexp.MustCompile(pat)
	n := 0
	for {
		e := re.FindIndex(bytes)
		if e == nil {
			break
		}
		n++
		bytes = bytes[e[1]:]
	}
	return n
}
Exemple #11
0
func init() {
	re1 = make([]Matcher, NUM)
	re2 = make([]Matcher, NUM)
	for i := 0; i < NUM; i++ {
		re1[i] = regexp.MustCompile("[a-c]*$")
		re2[i] = re.MustCompile("[a-c]*$")
	}
	TaskChann = make(chan *Task, 100)
	for i := 0; i < 10; i++ {
		STR += STR
	}
	fmt.Println("len:", len(STR))
}
Exemple #12
0
func main12(jsonDataStream string) string {

	type mytype [][][]string

	decoder := json.NewDecoder(strings.NewReader(jsonDataStream))
	var worker mytype
	if err := decoder.Decode(&worker); err == io.EOF { // <-- here
		//break
	} else if err != nil {
		fmt.Println(err)
	}
	//fmt.Println(worker)
	//i := 1
	j := 1
	var per bytes.Buffer
	if len(worker) > 0 {
		lenn := len(worker[0])
		pr := true
		for _, v1 := range worker[0] {
			for _, v3 := range v1 {
				if len(v3) > 0 {
					if pr {
						reg := rubex.MustCompile(`[\r\n]`)
						v3 = reg.ReplaceAllString(v3, ``)
						per.WriteString(v3 + "\r\n")
						pr = false
					} else {
						pr = true
					}
					//fmt.Print(i)
					//fmt.Print(")")
					//fmt.Println(v3)
					//i++
				}
			}
			j++
			if j >= lenn {
				break
			}
		}
	}

	return per.String()
}
Exemple #13
0
func isTru1(str string, pro string) bool {
	wordRx := rubex.MustCompile(`.*` + pro + `.*`)
	return wordRx.MatchString(str)
}
Exemple #14
0
func prepereText(str string) string {
	//reg := rubex.MustCompile(`\t`)
	//str = reg.ReplaceAllString(str, "")
	reg := rubex.MustCompile(`[Ё]`)
	str = reg.ReplaceAllString(str, "Е")
	reg = rubex.MustCompile(`[ё]`)
	str = reg.ReplaceAllString(str, "е")
	reg = rubex.MustCompile(`(?<=[а-яa-z])-\r\n{1}(?=[а-яa-z])`)
	str = reg.ReplaceAllString(str, "")
	reg = rubex.MustCompile(`[^-a-zA-Z0-9_,:;'". !?а-яА-Я\r\n]`)
	str = reg.ReplaceAllString(str, "")
	reg = rubex.MustCompile(`(?<=[,.!])(?![ \r\n]+)`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`[ ]{2,}`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`[-]{2,}`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`(?<=\.)[ \r\n]+(?=\.)`)
	str = reg.ReplaceAllString(str, "")
	reg = rubex.MustCompile(`(?<=\.)[ \r\n]+(?=[^a-zA-Z0-9_,а-яА-Я])`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`(?<=[.?!\"\'])-`)
	str = reg.ReplaceAllString(str, " -")
	reg = rubex.MustCompile(`(?![a-zа-я])['"]+(?![a-zа-я])(?=[-a-zA-Z0-9_а-яА-Я, \r\n]+[.?!:]+)`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`(?=[-a-zA-Z0-9_а-яА-Я, ]*)(?<=[.!?:-])[ ]*["']+`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`(?![a-zа-я])'(?![a-zа-я])`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`(?<=[0-9])(?=[a-zA-Zа-яА-Я]+)`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`(?<=[a-zA-Zа-яА-Я])(?=[0-9])`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`[ ]{2,}`)
	str = reg.ReplaceAllString(str, " ")
	reg = rubex.MustCompile(`(?<=([\r\n ])[A-ZА-Яа-яa-z]\.)\s*(?=[A-ZА-Я\-0-9])`)
	str = reg.ReplaceAllString(str, "")
	str = strings.TrimSpace(str)
	//	err := ioutil.WriteFile("/home/serega/log/delRus", []byte(str), 0644)
	//	if err != nil {
	//		panic(err)
	//	}
	//(?=[^a-zA-Z0-9_,;:.!?а-яА-Я]+)["']{1}(?=[^a-zA-Z0-9_,;:.!?а-яА-Я]+)(?=[-a-zA-Z0-9_а-яА-Я, \r\n]*[.?!:]+['"]+)
	var s bytes.Buffer
	lin := RegSplit(str, `(?=[А-Я \-\.,0-9а-яA-Za-z]{5,})((?<=[\.\?\!:])\s+|(?<=[\r\n:])\s*)(?=[А-ЯA-Z0-9]|-|\.)`)
	for _, word := range lin {
		reg = rubex.MustCompile(`[\r\n](?=[ \r\n])`)
		word = reg.ReplaceAllString(word, "")
		reg = rubex.MustCompile(`[\r\n](?=[a-zA-Z0-9_,:;.!?а-яА-Я])`)
		word = reg.ReplaceAllString(word, " ")
		reg = rubex.MustCompile(`[\r\n]`)
		word = reg.ReplaceAllString(word, "")
		reg = rubex.MustCompile(`(?<![a-zA-Z])['"]+`)
		word = reg.ReplaceAllString(word, "")
		reg = rubex.MustCompile(`(?<=[a-zA-Z0-9_,;:.!?а-яА-Я]),(?=[a-zA-Z0-9_,:;.!?а-яА-Я])`)
		word = reg.ReplaceAllString(word, ", ")
		reg = rubex.MustCompile(`(?<=[a-zа-я])(?=[A-ZА-Я])`)
		word = reg.ReplaceAllString(word, " ")
		reg = rubex.MustCompile(`(?<=[A-ZА-Я]\.)\s*(?=[A-ZА-Я-])`)
		word = reg.ReplaceAllString(word, " ")
		s.WriteString(word + "\r\n")
	}
	return s.String()
}
Exemple #15
0
func getIshEng(st string) string {
	if _, found := enggl[st]; found {
		return st
	}
	if isTru1(st, `^[0-9,.-:;+-=/*]+$`) {
		return st
	}
	if isTru1(st, `n't.*`) {
		reg := rubex.MustCompile(`n't.*`)
		wot := ""
		word := reg.ReplaceAllString(st, wot)
		return getIshEng(word)
	}
	if isTru1(st, `'.*`) {
		reg := rubex.MustCompile(`'.*`)
		wot := ""
		word := reg.ReplaceAllString(st, wot)
		return getIshEng(word)
	}
	if sear, found := exp[st]; found {
		return sear
		//return getIshEng(sear)
	}
	if isTru1(st, `ed+$`) {
		reg := rubex.MustCompile(`ed+$`)
		wot := ""
		word := reg.ReplaceAllString(st, wot)
		r := getIshEng(word)
		if r != "" {
			return r
		} else {
			reg = rubex.MustCompile(`d+$`)
			wot = ""
			word = reg.ReplaceAllString(st, wot)
			r := getIshEng(word)
			if r != "" {
				return r
			}
			if isTru1(st, `ied+$`) {
				reg = rubex.MustCompile(`ied+$`)
				wot = "y"
			}
			if isTru1(st, `(d|p|t|l|r)ed+$`) {
				reg = rubex.MustCompile(`(d|p|t|l|r)ed+$`)
				wot = ""
			}
			word = reg.ReplaceAllString(st, wot)
			return getIshEng(word)
		}
	}
	if isTru1(st, `ing+$`) {
		reg := rubex.MustCompile(`ing+$`)
		wot := ""
		word := reg.ReplaceAllString(st, wot)
		r := getIshEng(word)
		if r != "" {
			return r
		} else {
			reg = rubex.MustCompile(`ing+$`)
			wot = "e"
			word = reg.ReplaceAllString(st, wot)
			r := getIshEng(word)
			if r != "" {
				return r
			}
			if isTru1(st, `ying+$`) {
				reg = rubex.MustCompile(`ying+$`)
				wot = "ie"
			}
			if isTru1(st, `(n|p|t|l|r)ing+$`) {
				reg = rubex.MustCompile(`(n|p|t|l|r)ing+$`)
				wot = ""
			}
			word = reg.ReplaceAllString(st, wot)
			return getIshEng(word)
		}
	}
	if isTru1(st, `s+$`) {
		reg := rubex.MustCompile(`s+$`)
		wot := ""
		word := reg.ReplaceAllString(st, wot)
		r := getIshEng(word)
		if r != "" {
			return r
		} else {
			//reg = rubex.MustCompile(`ing+$`)
			//wot = "e"
			if isTru1(st, `(?<=sh|ch|x|ss|o)es+$`) {
				reg = rubex.MustCompile(`(?<=sh|ch|x|ss|o)es+$`)
				wot = ""
			}
			if isTru1(st, `ies+$`) {
				reg = rubex.MustCompile(`ies+$`)
				wot = "y"
			}
			if isTru1(st, `ves+$`) {
				reg = rubex.MustCompile(`ves+$`)
				wot = "f"
				word = reg.ReplaceAllString(st, wot)
				r = getIshEng(word)
				if r != "" {
					wot = "fe"
				}
			}
			word = reg.ReplaceAllString(st, wot)
			return getIshEng(word)
		}
	}

	return ""
}