示例#1
0
文件: create.go 项目: sereg/morf
func createFraz() {
	buf := bytes.NewBuffer(nil)
	f, _ := os.Open("/home/serega/Copy/code/sourse/fraz") // Error handling elided for brevity.
	io.Copy(buf, f)                                       // Error handling elided for brevity.
	f.Close()
	fraz := make(map[string]map[string]bool)
	//rubbx = f.NewRubAr()
	lin := regexp.MustCompile(`\n`).Split(buf.String(), -1)
	for _, v := range lin {
		frazArray := regexp.MustCompile(`\s`).Split(v, -1)
		if _, found := fraz[frazArray[0]]; found {
			fraz[frazArray[0]][frazArray[1]] = true
		} else {
			fraz[frazArray[0]] = map[string]bool{frazArray[1]: true}
		}
	}
	fmt.Println(fraz)
	var network bytes.Buffer
	enc := gob.NewEncoder(&network)
	_ = enc.Encode(fraz)
	b := network.String()
	_ = ioutil.WriteFile("file/fraz", []byte(b), 0644)
}
示例#2
0
文件: create.go 项目: sereg/morf
func spliteonPredl() {
	dbinfo := fmt.Sprintf("user=%s password=%s dbname=%s sslmode=disable",
		DB_USER, DB_PASSWORD, DB_NAME)
	db, err := sql.Open("postgres", dbinfo)
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()

	//allName = dirHod("/home/serega/Copy/code/text/book/",, allName)
	allName := dirHod("/media/serega/c48f1bd8-a939-4630-ab12-9787df1f1fa0/home/paralel/OpenSubtitles2016/xml/txt/", "", []string{})
	//fmt.Println(allName)
	//echoArray(allName)
	var gref Graf.Graf
	gref.Init()
	rubbx = f.NewRubAr()
	morfEng.InitEngl()
	buf := bytes.NewBuffer(nil)
	for k, f1 := range allName {
		if k < 30002 {
			// for /media/serega/c48f1bd8-a939-4630-ab12-9787df1f1fa0/home/paralel/OpenSubtitles2016/xml/txt/
			continue
		}
		buf.Reset()
		f, _ := os.Open(f1) // Error handling elided for brevity.
		io.Copy(buf, f)     // Error handling elided for brevity.
		f.Close()
		//text, _ := html2text.FromString(buf.String())
		sentense := gref.PrepereText(buf.String())
		//echoArray(sentense)
		//fmt.Println(f1)
		//buf.Reset()
		i := 0
		sqlStart := `INSERT INTO sentenses(trans, text, count) VALUES `
		buf1 := bytes.NewBuffer(nil)
		for _, s := range sentense {
			_ = s
			if rubbx.IsTru1(s, `.*[-"].*`) {
				continue
			}
			sentenseItem, prov := morfEng.getWordWithoutOmonemii(s)
			if !prov || len(sentenseItem) < 4 || len(sentenseItem) > 20 {
				continue
			}
			s = rubbx.ReplaseRub(s, `''`, `'`)
			rows, err := db.Query(`SELECT count(*) as cou FROM sentenses WHERE text = '` + s + `' `)
			if err != nil {
				log.Fatal(err)
			}
			var cou string
			rows.Next()
			rows.Scan(&cou)
			rows.Close()
			//	//fmt.Println(cou)
			if cou == `0` && len(sentenseItem) > 4 && len(sentenseItem) < 20 {
				//fmt.Println(s)
				buf1.WriteString(`('','` + s + `',` + strconv.Itoa(len(sentenseItem)) + `)`)
				if i >= 300 {
					_, err = db.Exec(sqlStart + buf1.String())
					if err != nil {
						fmt.Println(sqlStart + buf1.String())
						log.Fatal(err)
					}
					i = 0
					buf1.Reset()
				} else {
					buf1.WriteString(`,`)
					i++
				}
			}
			//	//
			//	//	////fmt.Println(" ---------- ")
		}
		if i != 0 {
			str := buf1.String()
			str = string([]byte(str)[:len(str)-1])
			_, err = db.Exec(sqlStart + str)
			if err != nil {
				fmt.Println(sqlStart + buf1.String())
				log.Fatal(err)
			}
		}
		//
		if k%1 == 0 {
			fmt.Println(f1)
			fmt.Println(k)
		}
		if k > 10 {
			//break
		}
		////fmt.Println(f)
		//break
	}
}