예제 #1
0
파일: createEnlish.go 프로젝트: sereg/morf
func createExceptionNoun() {
	name := "verb"
	buf := bytes.NewBuffer(nil)
	f, _ := os.Open("/home/serega/Copy/database/dict/" + name + ".exc") // Error handling elided for brevity.
	io.Copy(buf, f)                                                     // Error handling elided for brevity.
	f.Close()
	sentense := (buf.String())
	buf.Reset()
	rubbx = frub.NewRubAr()
	sentense = strings.ToLower(sentense)
	lin := regexp.MustCompile(`\n`).Split(sentense, -1)
	//word := make(ByLength, 2000)
	//wordArray := make([]string, 2000)
	findIsh := make(map[string]string)
	//findForm := make(map[string]map[string]string)
	//fmt.Println(sentense)

	for _, s := range lin {
		words := regexp.MustCompile(` `).Split(s, -1)
		if len(words) == 2 {
			findIsh[words[0]] = words[1]
		}
	}
	fmt.Println(len(findIsh))
	frub.CreateByZn(findIsh, "file/englishLemm/find"+name+".gob")
	wodfFile, _ := os.Create("file/englishLemm/find" + name)
	frub.Encode(findIsh, wodfFile)
	//frub.CreateByZn(findForm, "file/englishLemm/form" + name + ".gob")
	//wodfFile, _ = os.Create("file/englishLemm/form" + name)
	//frub.Encode(findForm, wodfFile)
}
예제 #2
0
파일: createEnlish.go 프로젝트: sereg/morf
func createFrequency() {
	i := 0
	frequence := make(map[string]int, 0)
	baseFrequence := [][]string{}
	frub.Decode(&baseFrequence, "file/englishLemm/baseFrequence")
	for _, v := range baseFrequence {
		i++
		var arrayType [][]string
		for _, v1 := range v {
			s := []string{}
			//for _, v1 := range v {
			s = append(s, v1)
			//}
			arrayType = append(arrayType, s)
		}
		first := []string{"^"}
		next := []string{}
		for k, v := range arrayType {
			if k == len(arrayType)-1 {
				next = []string{"$"}
			} else {
				next = arrayType[k+1]
			}
			for _, v1 := range first {
				for _, v2 := range v {
					for _, v3 := range next {
						if _, found := frequence[v1+v2+v3]; found {
							frequence[v1+v2+v3] = frequence[v1+v2+v3] + 1
						} else {
							frequence[v1+v2+v3] = 1
						}
					}
				}
			}
			first = v
		}
		if i%10000 == 0 {
			fmt.Println(i)
		}
		if i > 10000 {
			//break
		}
	}
	frub.CreateByZn(frequence, "file/englishLemm/frequence1.gob")
	wodfFile, _ := os.Create("file/englishLemm/frequence1")
	frub.Encode(frequence, wodfFile)
	//otv := rankByWordCount(frequence)
	for k, v := range frequence {
		fmt.Println(k, " - ", v)
	}
}
예제 #3
0
파일: createEnlish.go 프로젝트: sereg/morf
func createEngWord() {
	name := "verb"
	buf := bytes.NewBuffer(nil)
	f, _ := os.Open("/home/serega/Copy/database/dict/data." + name) // Error handling elided for brevity.
	io.Copy(buf, f)                                                 // Error handling elided for brevity.
	f.Close()

	sentense := (buf.String())
	buf.Reset()
	rubbx = frub.NewRubAr()
	sentense = strings.ToLower(sentense)
	lin := regexp.MustCompile(`\n`).Split(sentense, -1)
	word := make(map[string]string, 20000)
	for k, s := range lin {
		if k < 29 {
			continue
		}
		words := regexp.MustCompile(` `).Split(s, -1)
		cicl := true
		key := 4
		for cicl {
			if len(words) > key {
				pr := rubbx.IsTru1(words[key], `^[a-zA-Z]+$`)
				if pr {
					word[words[key]] = name
				} else {
					if rubbx.IsTru1(words[key], `^[a-zA-Z_]+$`) {
						words[key] = rubbx.ReplaseRub(words[key], ` `, `_`)
						word[words[key]] = "fraz"
					}
					//break
				}
			}
			if len(words) > key+2 {
				if words[key+1] != "0" {
					cicl = false
				}
				key = key + 2
			} else {
				cicl = false
			}
		}
	}
	frub.CreateByZn(word, "file/englishLemm/"+name+".gob")
	wodfFile, _ := os.Create("file/englishLemm/" + name)
	frub.Encode(word, wodfFile)
	fmt.Println(len(word))
}
예제 #4
0
파일: create.go 프로젝트: sereg/morf
func createPhraseVErbs() {
	buf := bytes.NewBuffer(nil)
	f1, _ := os.Open("file/material/phraseVerbs") // Error handling elided for brevity.
	io.Copy(buf, f1)                              // Error handling elided for brevity.
	f1.Close()
	fraz := make(map[string][][]string)
	arrayPhrase := regexp.MustCompile(`\n`).Split(buf.String(), -1)
	for _, v := range arrayPhrase {
		minArray := regexp.MustCompile(` `).Split(v, -1)
		//fmt.Println(minArray)
		key := ""
		index := 0
		for k1, v1 := range minArray {
			v1 = strings.ToLower(v1)
			if k1 == 0 {
				key = v1
				if _, found := fraz[v1]; found {
					index = len(fraz[v1]) - 1
					fraz[key] = append(fraz[key], []string{})
					index = len(fraz[v1]) - 1
				} else {
					fraz[v1] = [][]string{}
					index = 0
					fraz[key] = append(fraz[key], []string{})
				}
			} else {
				if index > 0 {
					if fraz[key][index-1][0] == v1 {
						fraz[key] = fraz[key][:len(fraz[key])-1]
						break
					}
				}
				fraz[key][index] = append(fraz[key][index], v1)
			}
		}
	}
	fmt.Println(fraz)
	for k, v := range fraz {
		fmt.Printf("%s - %s\n", k, v)
	}
	wodfFile, _ := os.Create("file/englishLemm/fraz")
	f.Encode(fraz, wodfFile)
}
예제 #5
0
파일: createEnlish.go 프로젝트: sereg/morf
func createException() {
	name := "verb1"
	buf := bytes.NewBuffer(nil)
	f, _ := os.Open("/home/serega/Copy/database/dict/" + name + ".exc") // Error handling elided for brevity.
	io.Copy(buf, f)                                                     // Error handling elided for brevity.
	f.Close()
	sentense := (buf.String())
	buf.Reset()
	rubbx = frub.NewRubAr()
	sentense = strings.ToLower(sentense)
	lin := regexp.MustCompile(`\n`).Split(sentense, -1)
	//word := make(ByLength, 2000)
	//wordArray := make([]string, 2000)
	findIsh := make(map[string][]string)
	//findForm := make(map[string]map[string]string)
	//fmt.Println(sentense)

	for _, s := range lin {
		words := regexp.MustCompile(` `).Split(s, -1)
		if len(words) > 1 {
			//findForm[words[0]] = map[string]string{"v1":words[0]}
			words1 := regexp.MustCompile(`\/`).Split(words[1], -1)
			words2 := regexp.MustCompile(`\/`).Split(words[2], -1)
			//fmt.Println(words1)
			//findForm[words[0]]["v2"] = words1[0]
			for _, s1 := range words1 {
				types := "v2"
				if s1 == words[0] {
					types = "u1"
				}
				for _, s2 := range words2 {
					if s1 == s2 {
						if types == "v2" {
							types = "u2"
						} else {
							types = "u"
						}
					}
				}
				findIsh[s1] = []string{words[0], types}
			}
			//fmt.Println(words1)
			//findForm[words[0]]["v3"] = words1[0]
			for _, s1 := range words2 {
				types := "v3"
				if s1 == words[0] {
					types = "u3"
				}
				for _, s2 := range words1 {
					if s1 == s2 {
						if types == "v3" {
							types = "u2"
						} else {
							types = "u"
						}
					}
				}
				findIsh[s1] = []string{words[0], types}
			}

		}
	}
	frub.CreateByZn(findIsh, "file/englishLemm/find"+name+".gob")
	wodfFile, _ := os.Create("file/englishLemm/find" + name)
	frub.Encode(findIsh, wodfFile)
	//frub.CreateByZn(findForm, "file/englishLemm/form" + name + ".gob")
	//wodfFile, _ = os.Create("file/englishLemm/form" + name)
	//frub.Encode(findForm, wodfFile)
}