Esempio n. 1
0
func createExceptionNoun() {
	name := "verb"
	buf := bytes.NewBuffer(nil)
	f, _ := os.Open("/home/serega/Copy/database/dict/" + name + ".exc") // Error handling elided for brevity.
	io.Copy(buf, f)                                                     // Error handling elided for brevity.
	f.Close()
	sentense := (buf.String())
	buf.Reset()
	rubbx = frub.NewRubAr()
	sentense = strings.ToLower(sentense)
	lin := regexp.MustCompile(`\n`).Split(sentense, -1)
	//word := make(ByLength, 2000)
	//wordArray := make([]string, 2000)
	findIsh := make(map[string]string)
	//findForm := make(map[string]map[string]string)
	//fmt.Println(sentense)

	for _, s := range lin {
		words := regexp.MustCompile(` `).Split(s, -1)
		if len(words) == 2 {
			findIsh[words[0]] = words[1]
		}
	}
	fmt.Println(len(findIsh))
	frub.CreateByZn(findIsh, "file/englishLemm/find"+name+".gob")
	wodfFile, _ := os.Create("file/englishLemm/find" + name)
	frub.Encode(findIsh, wodfFile)
	//frub.CreateByZn(findForm, "file/englishLemm/form" + name + ".gob")
	//wodfFile, _ = os.Create("file/englishLemm/form" + name)
	//frub.Encode(findForm, wodfFile)
}
Esempio n. 2
0
func (nn *MorfE) InitEngl() {
	nn.Graf.Init()

	nn.Unions = make(map[string]map[string]string, 0)
	frub.Decode(&nn.Unions, "file/englishLemm/noFounf")

	nn.frequence = make(map[string]int, 0)
	frub.GetByZn(&nn.frequence, "file/englishLemm/frequence1.gob")

	nn.fraz = make(map[string][][]string)
	frub.Decode(&nn.fraz, "file/englishLemm/fraz")

	nn.expVerb = make(map[string][]string, 0)
	frub.GetByZn(&nn.expVerb, "file/englishLemm/findverb1.gob")
	nn.expVerb1 = make(map[string]string, 0)
	frub.GetByZn(&nn.expVerb, "file/englishLemm/findverb.gob")
	nn.expAdj = make(map[string][]string, 0)
	frub.GetByZn(&nn.expAdj, "file/englishLemm/findadj1.gob")
	nn.expNoun = make(map[string]string, 0)
	frub.Decode(&nn.expNoun, "file/englishLemm/findnoun")

	nn.Verb = make(map[string]string, 0)
	frub.Decode(&nn.Verb, "file/englishLemm/verb")
	nn.Noun = make(map[string]string, 0)
	frub.Decode(&nn.Noun, "file/englishLemm/noun")
	nn.Adj = make(map[string]string, 0)
	frub.Decode(&nn.Adj, "file/englishLemm/adj")
	nn.Adv = make(map[string]string, 0)
	frub.Decode(&nn.Adv, "file/englishLemm/adv")
	nn.rubbx = frub.NewRubAr()
}
Esempio n. 3
0
File: create.go Progetto: sereg/morf
func translateSentense() {
	dbinfo := fmt.Sprintf("user=%s password=%s dbname=%s sslmode=disable",
		DB_USER, DB_PASSWORD, DB_NAME)
	db, err := sql.Open("postgres", dbinfo)
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()
	rows, err := db.Query(`SELECT id, text, count FROM sentenses1 WHERE trans = '' ORDER BY count ASC `)
	if err != nil {
		log.Fatal(err)
	}
	var text string
	var countElement, id, i, i1, j int
	buf := bytes.NewBuffer(nil)
	rubbx = f.NewRubAr()

	for rows.Next() {
		rows.Scan(&id, &text, &countElement)
		buf.WriteString(text)
		//fmt.Println(text)
		if i >= 0 {
			respons, _ := getTranslation("en", "ru", buf.String())
			respons = rubbx.ReplaseRub(respons, `"`, `'`)
			mapResult := (spltOnDimiter(respons))
			//sql := `UPDATE sentenses1 SET trans = '`+ respons +`' WHERE id = ` + strconv.Itoa(id) + ` `
			//_, err = db.Exec(sql)
			//if err != nil {
			//	fmt.Println(sql)
			//	log.Fatal(err)
			//}
			for k, v := range mapResult {
				sql := `UPDATE sentenses1 SET trans = '` + v + `' WHERE id = ` + strconv.Itoa(k) + ` `
				_, err = db.Exec(sql)
				if err != nil {
					fmt.Println(sql)
					log.Fatal(err)
				}
			}
			fmt.Println(buf.String())
			fmt.Println(j)
			buf.Reset()
			i = 0
			if i1 >= 0 {
				time.Sleep(0 * time.Second)
				i1 = 0
			} else {
				i1++
			}
			//break
		} else {
			buf.WriteString(` || ` + strconv.Itoa(id) + ` || `)
			i++
		}
		j++
	}

}
Esempio n. 4
0
func createEngWord() {
	name := "verb"
	buf := bytes.NewBuffer(nil)
	f, _ := os.Open("/home/serega/Copy/database/dict/data." + name) // Error handling elided for brevity.
	io.Copy(buf, f)                                                 // Error handling elided for brevity.
	f.Close()

	sentense := (buf.String())
	buf.Reset()
	rubbx = frub.NewRubAr()
	sentense = strings.ToLower(sentense)
	lin := regexp.MustCompile(`\n`).Split(sentense, -1)
	word := make(map[string]string, 20000)
	for k, s := range lin {
		if k < 29 {
			continue
		}
		words := regexp.MustCompile(` `).Split(s, -1)
		cicl := true
		key := 4
		for cicl {
			if len(words) > key {
				pr := rubbx.IsTru1(words[key], `^[a-zA-Z]+$`)
				if pr {
					word[words[key]] = name
				} else {
					if rubbx.IsTru1(words[key], `^[a-zA-Z_]+$`) {
						words[key] = rubbx.ReplaseRub(words[key], ` `, `_`)
						word[words[key]] = "fraz"
					}
					//break
				}
			}
			if len(words) > key+2 {
				if words[key+1] != "0" {
					cicl = false
				}
				key = key + 2
			} else {
				cicl = false
			}
		}
	}
	frub.CreateByZn(word, "file/englishLemm/"+name+".gob")
	wodfFile, _ := os.Create("file/englishLemm/" + name)
	frub.Encode(word, wodfFile)
	fmt.Println(len(word))
}
Esempio n. 5
0
File: create.go Progetto: sereg/morf
func removeExcess() {
	dbinfo := fmt.Sprintf("user=%s password=%s dbname=%s sslmode=disable",
		DB_USER, DB_PASSWORD, DB_NAME)
	db, err := sql.Open("postgres", dbinfo)
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()
	rows, err := db.Query(`SELECT id, text, count FROM sentenses ORDER BY count ASC `)
	if err != nil {
		log.Fatal(err)
	}
	var text string
	var countElement, id, i, j, chet int
	rubbx = f.NewRubAr()
	var gref Graf.Graf
	gref.Init()
	morfEng.InitEngl()
	sqlStart := `DELETE FROM sentenses WHERE 1=1 AND (`
	buf := bytes.NewBuffer(nil)
	for rows.Next() {
		rows.Scan(&id, &text, &countElement)
		_, prov := morfEng.getWordWithoutOmonemii(text)
		if !prov {
			i++
			//fmt.Println(i)
			buf.WriteString(`id = ` + strconv.Itoa(id))
			if j >= 300 {
				_, err = db.Exec(sqlStart + buf.String() + ")")
				if err != nil {
					fmt.Println(sqlStart + buf.String())
					log.Fatal(err)
				}
				j = 0
				buf.Reset()
			} else {
				buf.WriteString(` or `)
				j++
			}
		}
		chet++
		if chet%10000 == 0 {
			fmt.Printf("%d - %d - %d\r\n", i, chet, j)
		}
	}

	fmt.Println(i)
}
Esempio n. 6
0
// инициализация словарей
func (nn *Morf) Init() {
	nn.ItogOk = make([]map[string][]map[string]string, 20503)
	nn.Search = make(map[string]map[string][]uint16, 234457)
	var wg sync.WaitGroup
	wg.Add(2)
	go func() {
		defer wg.Done()
		f.GetByZn(&nn.ItogOk, "file/itogOkI")
	}()
	go func() {
		defer wg.Done()
		f.GetByZn(&nn.Search, "file/search1")
	}()
	wg.Wait()
	nn.rubbx = f.NewRubAr()
}
Esempio n. 7
0
File: create.go Progetto: sereg/morf
func extractFromQz() {
	soursStart := "/media/serega/c48f1bd8-a939-4630-ab12-9787df1f1fa0/home/paralel/OpenSubtitles2016/xml/en/"
	allGzFile := dirHod(soursStart, "txt", []string{})

	rubbx = f.NewRubAr()
	fmt.Println(len(allGzFile))
	sours := "/media/serega/c48f1bd8-a939-4630-ab12-9787df1f1fa0/home/paralel/OpenSubtitles2016/xml/en1/"
	for k, v := range allGzFile {
		//path := rubbx.ReplaseRub(v, ``, `[0-9]*\.xml.gz`)
		//name := strings.Replace(v, path, "", -1)
		ungzip(v, sours)
		//fmt.Println(name)
		if k%100 == 0 {
			fmt.Println(k)
		}
		//break
	}
	//fmt.Println(allGzFile)
}
Esempio n. 8
0
File: create.go Progetto: sereg/morf
func markerOnEnglish() {
	dbinfo := fmt.Sprintf("user=%s password=%s dbname=%s sslmode=disable",
		DB_USER, DB_PASSWORD, DB_NAME)
	db, err := sql.Open("postgres", dbinfo)
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()
	rows, err := db.Query(`SELECT id, trans, count FROM sentenses1 ORDER BY count ASC `)
	if err != nil {
		log.Fatal(err)
	}
	var text string
	var countElement, id, i int
	rubbx = f.NewRubAr()

	for rows.Next() {
		rows.Scan(&id, &text, &countElement)
		if !rubbx.IsTru1(text, `^[а-яА-Я ,.!?&\-0-9\:\;]*$`) {
			sql := `UPDATE sentenses1 SET engword = 'e' WHERE id = ` + strconv.Itoa(id) + ` `
			_, err = db.Exec(sql)
			if err != nil {
				fmt.Println(sql)
				log.Fatal(err)
			}
			//fmt.Println(text)
			//fmt.Println("en")
		}
		if i > 2000 {
			//break
		}
		if i%1000 == 0 {
			fmt.Println(i)
		}
		i++
	}
}
Esempio n. 9
0
func createException() {
	name := "verb1"
	buf := bytes.NewBuffer(nil)
	f, _ := os.Open("/home/serega/Copy/database/dict/" + name + ".exc") // Error handling elided for brevity.
	io.Copy(buf, f)                                                     // Error handling elided for brevity.
	f.Close()
	sentense := (buf.String())
	buf.Reset()
	rubbx = frub.NewRubAr()
	sentense = strings.ToLower(sentense)
	lin := regexp.MustCompile(`\n`).Split(sentense, -1)
	//word := make(ByLength, 2000)
	//wordArray := make([]string, 2000)
	findIsh := make(map[string][]string)
	//findForm := make(map[string]map[string]string)
	//fmt.Println(sentense)

	for _, s := range lin {
		words := regexp.MustCompile(` `).Split(s, -1)
		if len(words) > 1 {
			//findForm[words[0]] = map[string]string{"v1":words[0]}
			words1 := regexp.MustCompile(`\/`).Split(words[1], -1)
			words2 := regexp.MustCompile(`\/`).Split(words[2], -1)
			//fmt.Println(words1)
			//findForm[words[0]]["v2"] = words1[0]
			for _, s1 := range words1 {
				types := "v2"
				if s1 == words[0] {
					types = "u1"
				}
				for _, s2 := range words2 {
					if s1 == s2 {
						if types == "v2" {
							types = "u2"
						} else {
							types = "u"
						}
					}
				}
				findIsh[s1] = []string{words[0], types}
			}
			//fmt.Println(words1)
			//findForm[words[0]]["v3"] = words1[0]
			for _, s1 := range words2 {
				types := "v3"
				if s1 == words[0] {
					types = "u3"
				}
				for _, s2 := range words1 {
					if s1 == s2 {
						if types == "v3" {
							types = "u2"
						} else {
							types = "u"
						}
					}
				}
				findIsh[s1] = []string{words[0], types}
			}

		}
	}
	frub.CreateByZn(findIsh, "file/englishLemm/find"+name+".gob")
	wodfFile, _ := os.Create("file/englishLemm/find" + name)
	frub.Encode(findIsh, wodfFile)
	//frub.CreateByZn(findForm, "file/englishLemm/form" + name + ".gob")
	//wodfFile, _ = os.Create("file/englishLemm/form" + name)
	//frub.Encode(findForm, wodfFile)
}
Esempio n. 10
0
File: create.go Progetto: sereg/morf
func spliteonPredl() {
	dbinfo := fmt.Sprintf("user=%s password=%s dbname=%s sslmode=disable",
		DB_USER, DB_PASSWORD, DB_NAME)
	db, err := sql.Open("postgres", dbinfo)
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()

	//allName = dirHod("/home/serega/Copy/code/text/book/",, allName)
	allName := dirHod("/media/serega/c48f1bd8-a939-4630-ab12-9787df1f1fa0/home/paralel/OpenSubtitles2016/xml/txt/", "", []string{})
	//fmt.Println(allName)
	//echoArray(allName)
	var gref Graf.Graf
	gref.Init()
	rubbx = f.NewRubAr()
	morfEng.InitEngl()
	buf := bytes.NewBuffer(nil)
	for k, f1 := range allName {
		if k < 30002 {
			// for /media/serega/c48f1bd8-a939-4630-ab12-9787df1f1fa0/home/paralel/OpenSubtitles2016/xml/txt/
			continue
		}
		buf.Reset()
		f, _ := os.Open(f1) // Error handling elided for brevity.
		io.Copy(buf, f)     // Error handling elided for brevity.
		f.Close()
		//text, _ := html2text.FromString(buf.String())
		sentense := gref.PrepereText(buf.String())
		//echoArray(sentense)
		//fmt.Println(f1)
		//buf.Reset()
		i := 0
		sqlStart := `INSERT INTO sentenses(trans, text, count) VALUES `
		buf1 := bytes.NewBuffer(nil)
		for _, s := range sentense {
			_ = s
			if rubbx.IsTru1(s, `.*[-"].*`) {
				continue
			}
			sentenseItem, prov := morfEng.getWordWithoutOmonemii(s)
			if !prov || len(sentenseItem) < 4 || len(sentenseItem) > 20 {
				continue
			}
			s = rubbx.ReplaseRub(s, `''`, `'`)
			rows, err := db.Query(`SELECT count(*) as cou FROM sentenses WHERE text = '` + s + `' `)
			if err != nil {
				log.Fatal(err)
			}
			var cou string
			rows.Next()
			rows.Scan(&cou)
			rows.Close()
			//	//fmt.Println(cou)
			if cou == `0` && len(sentenseItem) > 4 && len(sentenseItem) < 20 {
				//fmt.Println(s)
				buf1.WriteString(`('','` + s + `',` + strconv.Itoa(len(sentenseItem)) + `)`)
				if i >= 300 {
					_, err = db.Exec(sqlStart + buf1.String())
					if err != nil {
						fmt.Println(sqlStart + buf1.String())
						log.Fatal(err)
					}
					i = 0
					buf1.Reset()
				} else {
					buf1.WriteString(`,`)
					i++
				}
			}
			//	//
			//	//	////fmt.Println(" ---------- ")
		}
		if i != 0 {
			str := buf1.String()
			str = string([]byte(str)[:len(str)-1])
			_, err = db.Exec(sqlStart + str)
			if err != nil {
				fmt.Println(sqlStart + buf1.String())
				log.Fatal(err)
			}
		}
		//
		if k%1 == 0 {
			fmt.Println(f1)
			fmt.Println(k)
		}
		if k > 10 {
			//break
		}
		////fmt.Println(f)
		//break
	}
}
Esempio n. 11
0
File: create.go Progetto: sereg/morf
func fillTableWord() {
	dbinfo := fmt.Sprintf("user=%s password=%s dbname=%s sslmode=disable",
		DB_USER, DB_PASSWORD, DB_NAME)
	db, err := sql.Open("postgres", dbinfo)
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()
	rows, err := db.Query(`SELECT id, text, count FROM sentenses ORDER BY count ASC `)
	if err != nil {
		log.Fatal(err)
	}
	var text string
	var countElement, id, i int
	rubbx = f.NewRubAr()
	var gref Graf.Graf
	gref.Init()
	type words struct {
		result string
		types  string
		forma  string
		count  int
	}
	type read struct {
		text         string
		countElement int
		id           int
	}
	word := make(map[string]words, 0)
	word_use := make(map[string][]int, 0)
	readderArray := make([]read, 6762636)
	morfEng.InitEngl()
	localCou := 0
	for rows.Next() {
		rows.Scan(&id, &text, &countElement)
		readderArray[localCou] = read{text, countElement, id}
		localCou++
	}
	for _, rowFrom := range readderArray {
		i++
		id = rowFrom.id
		text = rowFrom.text
		countElement = rowFrom.countElement
		sentenseItem, prov := morfEng.getWordWithoutOmonemii(text)
		if prov {
			fraz := []int{}
			for k, s := range sentenseItem {
				if len(fraz) > 0 {
					//fraz--
					if fraz[0] == k {
						fraz = fraz[1:]
						continue
					}
				}
				if s.types != "number" && s.types != "punctuation" {
					if zn, found := morfEng.fraz[s.result]; found {
						k1 := 0
						shift := 1
						for {
							fraz2 := false
							find := false
							for _, v1 := range zn {
								if k+shift+k1 < len(sentenseItem) {
									val := sentenseItem[k+shift+k1].result
									if len(v1) > k1 && val == v1[k1] {
										fraz = append(fraz, k+shift+k1)
										find = true
										if len(v1)-1 == k1 {
											for jc := 0; jc < len(v1); jc++ {
												s.result += " " + sentenseItem[k+jc+shift].result
											}
											s.types = "phras"
											if shift == 1 {
												s.forma = ""
											} else {
												s.forma = "stm"
											}
											shift = 2
											break
										} else {
											fraz2 = true
										}
									}
								}
							}
							if !find {

							}
							if !fraz2 && shift == 2 {
								break
							}
							if !fraz2 {
								shift++
							}
							k1++
						}
					}
					s.result = rubbx.ReplaseRub((s.result), `''`, `'`)
					keyWord := s.result + s.types + s.forma
					if _, found := word_use[keyWord]; found {
						word_use[keyWord] = append(word_use[keyWord], id)
					} else {
						word_use[keyWord] = []int{id}
					}
					if _, found := word[keyWord]; found {
						word[keyWord] = words{s.result, s.types, s.forma, word[keyWord].count + 1}
					} else {
						word[keyWord] = words{s.result, s.types, s.forma, 1}
					}
				}
			}
		}
		if i%1000 == 0 {
			fmt.Println(i)
			//break
		}
		if i > 100000 {
			//break
		}
	}
	//for k, v := range word {
	//	if v.forma == "phras"{
	//		fmt.Printf("%s - %v+\n", k, v)
	//	}
	//}
	//fmt.Println(word)
	//fmt.Println(word_use)
	i = 0
	jj := 0
	sqlStart := `INSERT INTO words (word_key, word, forma, type, frequensy) VALUES `
	buf := bytes.NewBuffer(nil)
	for k, s := range word {
		buf.WriteString(`('` + k + `','` + s.result + `','` + (s.forma) + `','` + (s.types) + `',` + strconv.Itoa(s.count) + `)`)
		if i >= 300 {
			_, err = db.Exec(sqlStart + buf.String())
			if err != nil {
				fmt.Println(sqlStart + buf.String())
				log.Fatal(err)
			}
			i = 0
			buf.Reset()
		} else {
			buf.WriteString(`,`)
			i++
		}
		jj++
		if jj%10000 == 0 {
			fmt.Println(jj)
			//break
		}
	}
	if i != 0 {
		str := buf.String()
		str = string([]byte(str)[:len(str)-1])
		_, err = db.Exec(sqlStart + str)
		if err != nil {
			fmt.Println(sqlStart + buf.String())
			log.Fatal(err)
		}
	}
	i = 0
	jj = 0
	sqlStart = `INSERT INTO word_use (id_word, id_sentense) VALUES `
	buf = bytes.NewBuffer(nil)
	var id_wodr int
	for k, s := range word_use {
		rows1, _ := db.Query(`SELECT id FROM words WHERE word_key = '` + k + `'`)
		rows1.Next()
		rows1.Scan(&id_wodr)
		rows1.Close()
		for _, s1 := range s {
			buf.WriteString(`('` + strconv.Itoa(id_wodr) + `',` + strconv.Itoa(s1) + `)`)
			if i >= 300 {
				_, err = db.Exec(sqlStart + buf.String())
				if err != nil {
					fmt.Println(sqlStart + buf.String())
					log.Fatal(err)
				}
				i = 0
				buf.Reset()
			} else {
				buf.WriteString(`,`)
				i++
			}
		}
		jj++
		if jj%10000 == 0 {
			fmt.Println(jj)
			//break
		}
	}
	if i != 0 {
		str := buf.String()
		str = string([]byte(str)[:len(str)-1])
		_, err = db.Exec(sqlStart + str)
		if err != nil {
			fmt.Println(sqlStart + buf.String())
			log.Fatal(err)
		}
	}
}