func main() { runtime.GOMAXPROCS(4) bytes, err := ioutil.ReadFile("/dev/stdin") if err != nil { fmt.Fprintf(os.Stderr, "can't read input: %s\n", err) os.Exit(2) } ilen := len(bytes) // Delete the comment lines and newlines bytes = regexp.MustCompile("(>[^\n]+)?\n").ReplaceAll(bytes, []byte{}) clen := len(bytes) mresults := make([]chan int, len(variants)) for i, s := range variants { ch := make(chan int) mresults[i] = ch go func(ss string) { ch <- countMatches(ss, bytes) }(s) } lenresult := make(chan int) bb := bytes go func() { for _, sub := range substs { bb = regexp.MustCompile(sub.pat).ReplaceAll(bb, []byte(sub.repl)) } lenresult <- len(bb) }() for i, s := range variants { fmt.Printf("%s %d\n", s, <-mresults[i]) } fmt.Printf("\n%d\n%d\n%d\n", ilen, clen, <-lenresult) }
func getBezLish(str string) string { str = strings.ToLower(str) reg := rubex.MustCompile(`[^a-zA-ZА-Яа-я0-9- ']`) str = reg.ReplaceAllString(str, "") reg = rubex.MustCompile(`[ ]{2,}`) str = reg.ReplaceAllString(str, " ") str = strings.Trim(str, " ") return str }
func getExp() { exp := make(map[string]string, 0) st, _ := ioutil.ReadFile("/home/serega/Загрузки/WordNet-3.0/dict/exp") lin := strings.Split(string(st), "\n") for _, word := range lin { lin1 := strings.Split(word, " ") i := 0 zn := "" for _, word1 := range lin1 { reg := rubex.MustCompile(`_`) word1 = reg.ReplaceAllString(word1, " ") if i == 0 { zn = word1 } else { exp[zn] = word1 break } i++ } } fmt.Println(len(exp)) for k, v := range exp { _ = k _ = v fmt.Print(k) fmt.Print(" - ") fmt.Println(v) } var network bytes.Buffer enc := gob.NewEncoder(&network) _ = enc.Encode(exp) b := network.String() _ = ioutil.WriteFile("/home/serega/log/exp", []byte(b), 0644) }
func removStopWord(str string) string { st, _ := ioutil.ReadFile("/home/serega/log/stopWord200") lin0 := strings.Split(string(st), "\n") reg := rubex.MustCompile(`(?<=\ |\,|\.|\r|\n)the(?=\ |\,|\.|\r|\n)`) for _, word := range lin0 { lin1 := strings.Split(string(word), " ") reg := rubex.MustCompile(`(?<=\ |\,|\.|\r|\n)` + lin1[0] + `(?=\ |\,|\.|\r|\n)`) str = reg.ReplaceAllString(str, "") //str = strings.Split(string(str), " ") } reg = rubex.MustCompile(`[ ]{2,}`) str = reg.ReplaceAllString(str, " ") //str = reg.ReplaceAllString(str, "Е") //reg = rubex.MustCompile(`[ё]`) return str }
func allocate() []*rubex.Regexp { // some overlap in here, but it'll make the parsing functions clearer matchers := make([]*rubex.Regexp, 0, NUM_LEXEMES) for _, p := range pattern { matchers = append(matchers, rubex.MustCompile(`\A`+p)) } return matchers }
func removeRN() { st, _ := ioutil.ReadFile("/home/serega/log/otvY") lin := string(st) reg := rubex.MustCompile(`\n\n`) lin = reg.ReplaceAllString(lin, "\n") err := ioutil.WriteFile("/home/serega/log/otvY1", []byte(lin), 0644) if err != nil { panic(err) } }
func RegSplit(text string, delimeter string) []string { reg := rubex.MustCompile(delimeter) indexes := reg.FindAllStringIndex(text, -1) laststart := 0 result := make([]string, len(indexes)+1) for i, element := range indexes { result[i] = text[laststart:element[0]] laststart = element[1] } result[len(indexes)] = text[laststart:len(text)] return result }
func ispravText() { st, _ := ioutil.ReadFile("/home/serega/log/newRus1") lin := string(st) //fmt.Println(lin) //otv := prepereText(lin) reg := rubex.MustCompile(`[0-9]+(?=\r\n)`) otv := reg.ReplaceAllString(lin, "") err := ioutil.WriteFile("/home/serega/log/newRus12", []byte(otv), 0644) if err != nil { panic(err) } }
func prepText(str string) string { reg := rubex.MustCompile(`(?<=[\[\,])[0-9\.]*(?=[\]\,])`) str = reg.ReplaceAllString(str, `""`) reg = rubex.MustCompile(`(true|false)`) str = reg.ReplaceAllString(str, `""`) // reg = regexp.MustCompile(`\[[0-9\,\.]*,`) // str = reg.ReplaceAllString(str, "") // reg = regexp.MustCompile(`,[0-9\,\.]*,`) // str = reg.ReplaceAllString(str, "") //fmt.Println(str) //reg := rubex.MustCompile() // indexes := RegSplit(str, `(?<=[\[\,]\")[a-zA-ZА-Яа-я 0-9\.\,\\\:\']{3,}(?=\")`) // for _, v1 := range indexes { // //reg := rubex.MustCompile(v1) // //str = reg.ReplaceAllString(str, ``) // str = strings.Replace(str, v1, "\r\n", -1) // //fmt.Println(v1) // } return str }
func countMatches(pat string, bytes []byte) int { re := regexp.MustCompile(pat) n := 0 for { e := re.FindIndex(bytes) if e == nil { break } n++ bytes = bytes[e[1]:] } return n }
func init() { re1 = make([]Matcher, NUM) re2 = make([]Matcher, NUM) for i := 0; i < NUM; i++ { re1[i] = regexp.MustCompile("[a-c]*$") re2[i] = re.MustCompile("[a-c]*$") } TaskChann = make(chan *Task, 100) for i := 0; i < 10; i++ { STR += STR } fmt.Println("len:", len(STR)) }
func main12(jsonDataStream string) string { type mytype [][][]string decoder := json.NewDecoder(strings.NewReader(jsonDataStream)) var worker mytype if err := decoder.Decode(&worker); err == io.EOF { // <-- here //break } else if err != nil { fmt.Println(err) } //fmt.Println(worker) //i := 1 j := 1 var per bytes.Buffer if len(worker) > 0 { lenn := len(worker[0]) pr := true for _, v1 := range worker[0] { for _, v3 := range v1 { if len(v3) > 0 { if pr { reg := rubex.MustCompile(`[\r\n]`) v3 = reg.ReplaceAllString(v3, ``) per.WriteString(v3 + "\r\n") pr = false } else { pr = true } //fmt.Print(i) //fmt.Print(")") //fmt.Println(v3) //i++ } } j++ if j >= lenn { break } } } return per.String() }
func isTru1(str string, pro string) bool { wordRx := rubex.MustCompile(`.*` + pro + `.*`) return wordRx.MatchString(str) }
func prepereText(str string) string { //reg := rubex.MustCompile(`\t`) //str = reg.ReplaceAllString(str, "") reg := rubex.MustCompile(`[Ё]`) str = reg.ReplaceAllString(str, "Е") reg = rubex.MustCompile(`[ё]`) str = reg.ReplaceAllString(str, "е") reg = rubex.MustCompile(`(?<=[а-яa-z])-\r\n{1}(?=[а-яa-z])`) str = reg.ReplaceAllString(str, "") reg = rubex.MustCompile(`[^-a-zA-Z0-9_,:;'". !?а-яА-Я\r\n]`) str = reg.ReplaceAllString(str, "") reg = rubex.MustCompile(`(?<=[,.!])(?![ \r\n]+)`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`[ ]{2,}`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`[-]{2,}`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`(?<=\.)[ \r\n]+(?=\.)`) str = reg.ReplaceAllString(str, "") reg = rubex.MustCompile(`(?<=\.)[ \r\n]+(?=[^a-zA-Z0-9_,а-яА-Я])`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`(?<=[.?!\"\'])-`) str = reg.ReplaceAllString(str, " -") reg = rubex.MustCompile(`(?![a-zа-я])['"]+(?![a-zа-я])(?=[-a-zA-Z0-9_а-яА-Я, \r\n]+[.?!:]+)`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`(?=[-a-zA-Z0-9_а-яА-Я, ]*)(?<=[.!?:-])[ ]*["']+`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`(?![a-zа-я])'(?![a-zа-я])`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`(?<=[0-9])(?=[a-zA-Zа-яА-Я]+)`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`(?<=[a-zA-Zа-яА-Я])(?=[0-9])`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`[ ]{2,}`) str = reg.ReplaceAllString(str, " ") reg = rubex.MustCompile(`(?<=([\r\n ])[A-ZА-Яа-яa-z]\.)\s*(?=[A-ZА-Я\-0-9])`) str = reg.ReplaceAllString(str, "") str = strings.TrimSpace(str) // err := ioutil.WriteFile("/home/serega/log/delRus", []byte(str), 0644) // if err != nil { // panic(err) // } //(?=[^a-zA-Z0-9_,;:.!?а-яА-Я]+)["']{1}(?=[^a-zA-Z0-9_,;:.!?а-яА-Я]+)(?=[-a-zA-Z0-9_а-яА-Я, \r\n]*[.?!:]+['"]+) var s bytes.Buffer lin := RegSplit(str, `(?=[А-Я \-\.,0-9а-яA-Za-z]{5,})((?<=[\.\?\!:])\s+|(?<=[\r\n:])\s*)(?=[А-ЯA-Z0-9]|-|\.)`) for _, word := range lin { reg = rubex.MustCompile(`[\r\n](?=[ \r\n])`) word = reg.ReplaceAllString(word, "") reg = rubex.MustCompile(`[\r\n](?=[a-zA-Z0-9_,:;.!?а-яА-Я])`) word = reg.ReplaceAllString(word, " ") reg = rubex.MustCompile(`[\r\n]`) word = reg.ReplaceAllString(word, "") reg = rubex.MustCompile(`(?<![a-zA-Z])['"]+`) word = reg.ReplaceAllString(word, "") reg = rubex.MustCompile(`(?<=[a-zA-Z0-9_,;:.!?а-яА-Я]),(?=[a-zA-Z0-9_,:;.!?а-яА-Я])`) word = reg.ReplaceAllString(word, ", ") reg = rubex.MustCompile(`(?<=[a-zа-я])(?=[A-ZА-Я])`) word = reg.ReplaceAllString(word, " ") reg = rubex.MustCompile(`(?<=[A-ZА-Я]\.)\s*(?=[A-ZА-Я-])`) word = reg.ReplaceAllString(word, " ") s.WriteString(word + "\r\n") } return s.String() }
func getIshEng(st string) string { if _, found := enggl[st]; found { return st } if isTru1(st, `^[0-9,.-:;+-=/*]+$`) { return st } if isTru1(st, `n't.*`) { reg := rubex.MustCompile(`n't.*`) wot := "" word := reg.ReplaceAllString(st, wot) return getIshEng(word) } if isTru1(st, `'.*`) { reg := rubex.MustCompile(`'.*`) wot := "" word := reg.ReplaceAllString(st, wot) return getIshEng(word) } if sear, found := exp[st]; found { return sear //return getIshEng(sear) } if isTru1(st, `ed+$`) { reg := rubex.MustCompile(`ed+$`) wot := "" word := reg.ReplaceAllString(st, wot) r := getIshEng(word) if r != "" { return r } else { reg = rubex.MustCompile(`d+$`) wot = "" word = reg.ReplaceAllString(st, wot) r := getIshEng(word) if r != "" { return r } if isTru1(st, `ied+$`) { reg = rubex.MustCompile(`ied+$`) wot = "y" } if isTru1(st, `(d|p|t|l|r)ed+$`) { reg = rubex.MustCompile(`(d|p|t|l|r)ed+$`) wot = "" } word = reg.ReplaceAllString(st, wot) return getIshEng(word) } } if isTru1(st, `ing+$`) { reg := rubex.MustCompile(`ing+$`) wot := "" word := reg.ReplaceAllString(st, wot) r := getIshEng(word) if r != "" { return r } else { reg = rubex.MustCompile(`ing+$`) wot = "e" word = reg.ReplaceAllString(st, wot) r := getIshEng(word) if r != "" { return r } if isTru1(st, `ying+$`) { reg = rubex.MustCompile(`ying+$`) wot = "ie" } if isTru1(st, `(n|p|t|l|r)ing+$`) { reg = rubex.MustCompile(`(n|p|t|l|r)ing+$`) wot = "" } word = reg.ReplaceAllString(st, wot) return getIshEng(word) } } if isTru1(st, `s+$`) { reg := rubex.MustCompile(`s+$`) wot := "" word := reg.ReplaceAllString(st, wot) r := getIshEng(word) if r != "" { return r } else { //reg = rubex.MustCompile(`ing+$`) //wot = "e" if isTru1(st, `(?<=sh|ch|x|ss|o)es+$`) { reg = rubex.MustCompile(`(?<=sh|ch|x|ss|o)es+$`) wot = "" } if isTru1(st, `ies+$`) { reg = rubex.MustCompile(`ies+$`) wot = "y" } if isTru1(st, `ves+$`) { reg = rubex.MustCompile(`ves+$`) wot = "f" word = reg.ReplaceAllString(st, wot) r = getIshEng(word) if r != "" { wot = "fe" } } word = reg.ReplaceAllString(st, wot) return getIshEng(word) } } return "" }