func Test_MPSegment(t *testing.T) {
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")
	text := util.ReadFile("../data/testdata/125-1.txt")

	out := segment.SegmentSentence_MP(text, d)
	fmt.Println(out)
}
func Test_SegmentSentenceMP(t *testing.T) {
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")
	text := util.ReadFile("../data/testdata/125-1.txt")
	segments := segment.SegmentSentenceMP([]rune(text), 0, d)
	//str := segment.Output(segments)
	fmt.Println(len(segments))
}
Esempio n. 3
0
func mainMerge() {
	var freqDoor int
	var scoreDoor float32
	freqDoor = 6
	scoreDoor = 0.010

	sign := dict.NewSign("../data/dictionary/sign.txt")
	stop := dict.NewSign("../data/dictionary/stopwords.txt")
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")

	article := util.ReadFile("../data/testdata/125.txt")
	allsegs := segment.SegmentDoc(article, sign, d)

	fmt.Println(len(allsegs))
	str := segment.GetSegmentStr(allsegs)

	util.WriteFile("../data/test-125.log", str)
	allsegs = occurrence.FilterSegment(allsegs, stop)

	//occur := occurrence.NewOccurrence()
	//occur.AddSegments(allsegs, stop)
	//occur.Compute()
	//occur.Output()

	pairTerms := occurrence.Merge(allsegs, freqDoor, scoreDoor)
	str = term.GetPairTermStr(pairTerms)
	util.WriteFile("../data/main-test-125-merge-merge.log", str)
}
func Test_SplitDocument(t *testing.T) {
	article := util.ReadFile("../data/testdata/125-1.txt")
	//fmt.Println(article)
	article = segment.DeleteSpaceChar(article)
	sign := dict.NewSign("../data/dictionary/sign.txt")
	sentences := segment.SplitSentence([]rune(article), sign)
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")
	fmt.Println("Start====")

	allsegs := make([]*segment.Segment, 0)
	for _, sentence := range sentences {
		segments := segment.SegmentSentenceMP(sentence.Buffer(), sentence.Start(), d)
		//fmt.Println(len(segments))
		//fmt.Println(segments)
		//str := ""
		//for _, seg := range segments {
		//    str += seg.ToString()
		//}

		//fmt.Println(sentence.Start(), str)

		allsegs = append(allsegs, segments...)
	}

	fmt.Println(len(allsegs))
}
Esempio n. 5
0
func (d *Document) load(filename string) {
	article := util.ReadFile(filename)
	article = DeleteSpaceChar(article)

	d.filename = filename
	d.buf = []rune(article)
}
func Test_FMMSegement(t *testing.T) {
	seg := segment.NewFMMSegment("../data/dictionary/CoreNatureDictionary.mini.txt")
	content := util.ReadFile("../data/testdata/125-1.txt")
	words := seg.Segment(content)
	for _, w := range words {
		fmt.Println(w)
	}
}
Esempio n. 7
0
func handlePath(root string) {
	ws := NewWordSetting()
	files := getFilePath(root)
	for _, f := range files {
		fullfilepath := filepath.Join(f.folder, f.filename)
		fmt.Println("Handle the file: ", fullfilepath)
		content := util.ReadFile(fullfilepath)
		pairTerm := getWords(content, ws)
		writeOutput(f, pairTerm)
	}
}
func Test_SplitSentence(t *testing.T) {
	article := util.ReadFile("../data/testdata/125-1.txt")
	//fmt.Println(article)
	article = segment.DeleteSpaceChar(article)
	d := dict.NewSign("../data/dictionary/sign.txt")
	sentences := segment.SplitSentence([]rune(article), d)
	fmt.Println(len(sentences))
	//for _, s := range sentences {
	//    fmt.Println(s.ToString())
	//}
}
func Test_DNASegment(t *testing.T) {
	s := segment.NewDNASegment()
	//s.InitDict("../data/dictionary/dictionary.txt")
	s.InitDict("../data/dictionary/sogoudictionary.txt")

	text := util.ReadFile("../data/testdata/125-1.txt")
	fmt.Println(len(text))
	segs := s.MPSeg(text)
	fmt.Println(len(segs))
	fmt.Println(segs)

	fmt.Println(306234192.0 / 301869396788.0)
}
Esempio n. 10
0
func Test_Merge(t *testing.T) {
	sign := dict.NewSign("../data/dictionary/sign.txt")
	stop := dict.NewSign("../data/dictionary/stopwords.txt")
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")

	article := util.ReadFile("../data/testdata/125-2.txt")
	allsegs := segment.SegmentDoc(article, sign, d)
	allsegs = occurrence.FilterSegment(allsegs, stop)

	pairTerms := occurrence.Merge(allsegs, 4, 15.0)
	str := term.GetPairTermStr(pairTerms)
	util.WriteFile("../data/test-125-2-merge-merge.log", str)
}
Esempio n. 11
0
func handlePath(root string) {
	ws := NewWordSetting()
	decoder := mahonia.NewDecoder(Encoding)
	files := getFilePath(root)
	for _, f := range files {
		fullfilepath := filepath.Join(f.folder, f.filename)
		log.Printf("正在处理文件: %s", fullfilepath)
		content := util.ReadFile(fullfilepath)
		//if ret, ok := decoder.ConvertStringOK(content); ok {
		//    content = ret
		//}
		content = decoder.ConvertString(content)
		pairTerm := getWords(content, ws)
		writeOutput(f, pairTerm, ws.outputFreq)
	}
}
Esempio n. 12
0
func Test_Segment(t *testing.T) {
	article := util.ReadFile("../data/testdata/125-1.txt")
	sign := dict.NewSign("../data/dictionary/sign.txt")
	//sentences := SplitSentence([]rune(text), sign)
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")
	allsegs := segment.SegmentDoc(article, sign, d)

	fmt.Println(len(allsegs))
	final := ""
	for _, seg := range allsegs {
		final += seg.ToString()
	}

	final += fmt.Sprintf(": %d", len(allsegs))
	util.WriteFile("../data/docsegment_test_125-1.log", final)
}
Esempio n. 13
0
func Test_SegmentMerger(t *testing.T) {
	sign := dict.NewSign("../data/dictionary/sign.txt")
	stop := dict.NewSign("../data/dictionary/stopwords.txt")
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")

	article := util.ReadFile("../data/testdata/125-2.txt")
	allsegs := segment.SegmentDoc(article, sign, d)
	allsegs = occurrence.FilterSegment(allsegs, stop)
	fmt.Println(len(allsegs))
	str := segment.GetSegmentStr(allsegs)

	util.WriteFile("../data/test-125-2.log", str)

	occur := occurrence.NewOccurrence()
	occur.AddSegments(allsegs, 3)
	occur.Compute()
	occur.Output()

	pairTerms := occur.GetPairTerms(10.0)
	str = term.GetPairTermStr(pairTerms)
	util.WriteFile("../data/test-125-2-occur.log", str)

	newSegments := occurrence.MergeSegment(allsegs, pairTerms)
	fmt.Println(len(newSegments))
	str = segment.GetSegmentStr(newSegments)
	//fmt.Println(str)
	util.WriteFile("../data/test-125-2-merge.log", str)

	occur1 := occurrence.NewOccurrence()
	occur1.AddSegments(newSegments, 1)
	occur1.Compute()
	occur1.Output()
	pairTerms = occur1.GetPairTerms(10.0)
	str = term.GetPairTermStr(pairTerms)
	util.WriteFile("../data/test-125-2-second-merge.log", str)
}