func createFraz() { buf := bytes.NewBuffer(nil) f, _ := os.Open("/home/serega/Copy/code/sourse/fraz") // Error handling elided for brevity. io.Copy(buf, f) // Error handling elided for brevity. f.Close() fraz := make(map[string]map[string]bool) //rubbx = f.NewRubAr() lin := regexp.MustCompile(`\n`).Split(buf.String(), -1) for _, v := range lin { frazArray := regexp.MustCompile(`\s`).Split(v, -1) if _, found := fraz[frazArray[0]]; found { fraz[frazArray[0]][frazArray[1]] = true } else { fraz[frazArray[0]] = map[string]bool{frazArray[1]: true} } } fmt.Println(fraz) var network bytes.Buffer enc := gob.NewEncoder(&network) _ = enc.Encode(fraz) b := network.String() _ = ioutil.WriteFile("file/fraz", []byte(b), 0644) }
func spliteonPredl() { dbinfo := fmt.Sprintf("user=%s password=%s dbname=%s sslmode=disable", DB_USER, DB_PASSWORD, DB_NAME) db, err := sql.Open("postgres", dbinfo) if err != nil { log.Fatal(err) } defer db.Close() //allName = dirHod("/home/serega/Copy/code/text/book/",, allName) allName := dirHod("/media/serega/c48f1bd8-a939-4630-ab12-9787df1f1fa0/home/paralel/OpenSubtitles2016/xml/txt/", "", []string{}) //fmt.Println(allName) //echoArray(allName) var gref Graf.Graf gref.Init() rubbx = f.NewRubAr() morfEng.InitEngl() buf := bytes.NewBuffer(nil) for k, f1 := range allName { if k < 30002 { // for /media/serega/c48f1bd8-a939-4630-ab12-9787df1f1fa0/home/paralel/OpenSubtitles2016/xml/txt/ continue } buf.Reset() f, _ := os.Open(f1) // Error handling elided for brevity. io.Copy(buf, f) // Error handling elided for brevity. f.Close() //text, _ := html2text.FromString(buf.String()) sentense := gref.PrepereText(buf.String()) //echoArray(sentense) //fmt.Println(f1) //buf.Reset() i := 0 sqlStart := `INSERT INTO sentenses(trans, text, count) VALUES ` buf1 := bytes.NewBuffer(nil) for _, s := range sentense { _ = s if rubbx.IsTru1(s, `.*[-"].*`) { continue } sentenseItem, prov := morfEng.getWordWithoutOmonemii(s) if !prov || len(sentenseItem) < 4 || len(sentenseItem) > 20 { continue } s = rubbx.ReplaseRub(s, `''`, `'`) rows, err := db.Query(`SELECT count(*) as cou FROM sentenses WHERE text = '` + s + `' `) if err != nil { log.Fatal(err) } var cou string rows.Next() rows.Scan(&cou) rows.Close() // //fmt.Println(cou) if cou == `0` && len(sentenseItem) > 4 && len(sentenseItem) < 20 { //fmt.Println(s) buf1.WriteString(`('','` + s + `',` + strconv.Itoa(len(sentenseItem)) + `)`) if i >= 300 { _, err = db.Exec(sqlStart + buf1.String()) if err != nil { fmt.Println(sqlStart + buf1.String()) log.Fatal(err) } i = 0 buf1.Reset() } else { buf1.WriteString(`,`) i++ } } // // // // ////fmt.Println(" ---------- ") } if i != 0 { str := buf1.String() str = string([]byte(str)[:len(str)-1]) _, err = db.Exec(sqlStart + str) if err != nil { fmt.Println(sqlStart + buf1.String()) log.Fatal(err) } } // if k%1 == 0 { fmt.Println(f1) fmt.Println(k) } if k > 10 { //break } ////fmt.Println(f) //break } }