Exemple #1
0
func main() {
	file, err := ioutil.ReadFile("brownv.txt")
	if err != nil {
		panic(err)
	}

	text := string(file)

	var totalTime float64 = 0.0

	iterations := 5
	for i := 0; i < iterations; i++ {
		start := time.Now()

		tokenizer, err := english.NewSentenceTokenizer(nil)
		if err != nil {
			panic(err)
		}

		tokenizer.Tokenize(text)

		elapsed := time.Since(start)

		totalTime += elapsed.Seconds()

		fmt.Println("Sentences took: ", elapsed)
	}

	fmt.Println("Sentences avg took: ", totalTime/float64(iterations))

}
Exemple #2
0
func main() {
	if len(os.Args) < 2 {
		panic("Please supply data file to test sentence tokenizer")
	}

	file, _ := ioutil.ReadFile(os.Args[1])
	text := string(file)

	expected_sentences := strings.Split(text, "\n")

	tokenizer, err := english.NewSentenceTokenizer(nil)
	if err != nil {
		panic(err)
	}

	actual_sentences := tokenizer.Tokenize(text)

	a_len := len(actual_sentences)
	e_len := len(expected_sentences)
	perc := (float64(a_len) / float64(e_len)) * 100

	fmt.Printf("Actual Sentences: %d, Expected Sentences: %d, Percent: %f%%\n", a_len, e_len, perc)
}
Exemple #3
0
			if err != nil {
				panic(err)
			}

			if (stat.Mode() & os.ModeCharDevice) != 0 {
				return
			}

			reader := bufio.NewReader(os.Stdin)
			text, err = ioutil.ReadAll(reader)
			if err != nil {
				panic(err)
			}
		}

		tokenizer, err := english.NewSentenceTokenizer(nil)
		if err != nil {
			panic(err)
		}

		sentences := tokenizer.Tokenize(string(text))
		for _, s := range sentences {
			text := strings.Join(strings.Fields(s.Text), " ")

			text = strings.Join([]string{text, delim}, "")
			fmt.Printf("%s", text)
		}
	},
}

func main() {
func main() {
	// get some comments
	commentId := "1"
	response, err := http.Get("http://local.docker:3000/comments/" + commentId + ".json?flat=true")
	if err != nil {
		panic(err)
	}
	defer response.Body.Close()
	body, err := ioutil.ReadAll(response.Body)
	if err != nil {
		panic(err)
	}
	var comments = []Comment{}
	json.Unmarshal([]byte(body), &comments)

	// collect all the text
	allText := ""
	for _, v := range comments {
		allText += v.Body + "\n\n"
	}
	allText = formatText(allText)

	// get the topics for the text
	payload := struct {
		Text         string `json:"text"`
		TopicCount   int    `json:"topic_count"`
		TopWordCount int    `json:"top_word_count"`
	}{
		allText, 5, 5,
	}
	b, _ := json.Marshal(payload)
	reqBody := bytes.NewBuffer(b)

	response, err = http.Post("http://local.docker:4567", "text/json", reqBody)
	if err != nil {
		panic(err)
	}

	defer response.Body.Close()
	body, err = ioutil.ReadAll(response.Body)
	if err != nil {
		panic(err)
	}
	var result struct {
		Topics []string
		Groups [][]string
	}
	json.Unmarshal([]byte(body), &result)
	fmt.Println(result)

	// extract the sentences from the text
	tokenizer, err := english.NewSentenceTokenizer(nil)
	if err != nil {
		panic(err)
	}

	sentences := tokenizer.Tokenize(allText)

	// scan for matching sentences
	patterns := []string{
		"{}=verb >nsubj {}=subj >dobj {}=obj",
		"{}=obj >nsubj {}=subj >cop {}=verb",
		"{}=verb >nsubj {}=subj >ccomp {}=verb2",
		"{}=subj >advcl {}=verb",
		"{}=verb >/nmod.*/ {}=subj >dobj {}=obj",
		"{}=verb >nsubj {}=subj >/nmod.*/ {}=obj",
	}
	var topics []string
	for _, s := range sentences {
		s.Text = strings.TrimSpace(s.Text)
		s.Text = strings.ToLower(s.Text)
		for _, topic := range result.Topics {
			if strings.Contains(s.Text, topic) {
				topics = append(topics, topic)
			}
		}
		if contains(topics, "evolution") {
			fmt.Println(topics)
			fmt.Println(s.Text)

			for _, v := range patterns {
				fmt.Println(v)
				payload := struct {
					Text    string `json:"text"`
					Pattern string `json:"pattern"`
				}{
					s.Text, v,
				}
				b, _ := json.Marshal(payload)
				reqBody := bytes.NewBuffer(b)

				response, err = http.Post("http://local.docker:4568", "text/json", reqBody)
				if err != nil {
					panic(err)
				}
				body, err = ioutil.ReadAll(response.Body)
				if len(string(body)) < 10 {
					continue
				}
				var out bytes.Buffer
				json.Indent(&out, body, "", "    ")
				fmt.Println(string(out.Bytes()))
			}
			reader := bufio.NewReader(os.Stdin)
			fmt.Print(">")
			_, _ = reader.ReadString('\n')
			fmt.Println("-------\n\n")
		}
		topics = topics[:0]
	}
}