func main() { file, err := ioutil.ReadFile("brownv.txt") if err != nil { panic(err) } text := string(file) var totalTime float64 = 0.0 iterations := 5 for i := 0; i < iterations; i++ { start := time.Now() tokenizer, err := english.NewSentenceTokenizer(nil) if err != nil { panic(err) } tokenizer.Tokenize(text) elapsed := time.Since(start) totalTime += elapsed.Seconds() fmt.Println("Sentences took: ", elapsed) } fmt.Println("Sentences avg took: ", totalTime/float64(iterations)) }
func main() { if len(os.Args) < 2 { panic("Please supply data file to test sentence tokenizer") } file, _ := ioutil.ReadFile(os.Args[1]) text := string(file) expected_sentences := strings.Split(text, "\n") tokenizer, err := english.NewSentenceTokenizer(nil) if err != nil { panic(err) } actual_sentences := tokenizer.Tokenize(text) a_len := len(actual_sentences) e_len := len(expected_sentences) perc := (float64(a_len) / float64(e_len)) * 100 fmt.Printf("Actual Sentences: %d, Expected Sentences: %d, Percent: %f%%\n", a_len, e_len, perc) }
if err != nil { panic(err) } if (stat.Mode() & os.ModeCharDevice) != 0 { return } reader := bufio.NewReader(os.Stdin) text, err = ioutil.ReadAll(reader) if err != nil { panic(err) } } tokenizer, err := english.NewSentenceTokenizer(nil) if err != nil { panic(err) } sentences := tokenizer.Tokenize(string(text)) for _, s := range sentences { text := strings.Join(strings.Fields(s.Text), " ") text = strings.Join([]string{text, delim}, "") fmt.Printf("%s", text) } }, } func main() {
func main() { // get some comments commentId := "1" response, err := http.Get("http://local.docker:3000/comments/" + commentId + ".json?flat=true") if err != nil { panic(err) } defer response.Body.Close() body, err := ioutil.ReadAll(response.Body) if err != nil { panic(err) } var comments = []Comment{} json.Unmarshal([]byte(body), &comments) // collect all the text allText := "" for _, v := range comments { allText += v.Body + "\n\n" } allText = formatText(allText) // get the topics for the text payload := struct { Text string `json:"text"` TopicCount int `json:"topic_count"` TopWordCount int `json:"top_word_count"` }{ allText, 5, 5, } b, _ := json.Marshal(payload) reqBody := bytes.NewBuffer(b) response, err = http.Post("http://local.docker:4567", "text/json", reqBody) if err != nil { panic(err) } defer response.Body.Close() body, err = ioutil.ReadAll(response.Body) if err != nil { panic(err) } var result struct { Topics []string Groups [][]string } json.Unmarshal([]byte(body), &result) fmt.Println(result) // extract the sentences from the text tokenizer, err := english.NewSentenceTokenizer(nil) if err != nil { panic(err) } sentences := tokenizer.Tokenize(allText) // scan for matching sentences patterns := []string{ "{}=verb >nsubj {}=subj >dobj {}=obj", "{}=obj >nsubj {}=subj >cop {}=verb", "{}=verb >nsubj {}=subj >ccomp {}=verb2", "{}=subj >advcl {}=verb", "{}=verb >/nmod.*/ {}=subj >dobj {}=obj", "{}=verb >nsubj {}=subj >/nmod.*/ {}=obj", } var topics []string for _, s := range sentences { s.Text = strings.TrimSpace(s.Text) s.Text = strings.ToLower(s.Text) for _, topic := range result.Topics { if strings.Contains(s.Text, topic) { topics = append(topics, topic) } } if contains(topics, "evolution") { fmt.Println(topics) fmt.Println(s.Text) for _, v := range patterns { fmt.Println(v) payload := struct { Text string `json:"text"` Pattern string `json:"pattern"` }{ s.Text, v, } b, _ := json.Marshal(payload) reqBody := bytes.NewBuffer(b) response, err = http.Post("http://local.docker:4568", "text/json", reqBody) if err != nil { panic(err) } body, err = ioutil.ReadAll(response.Body) if len(string(body)) < 10 { continue } var out bytes.Buffer json.Indent(&out, body, "", " ") fmt.Println(string(out.Bytes())) } reader := bufio.NewReader(os.Stdin) fmt.Print(">") _, _ = reader.ReadString('\n') fmt.Println("-------\n\n") } topics = topics[:0] } }