예제 #1
0
func mainMerge() {
	var freqDoor int
	var scoreDoor float32
	freqDoor = 6
	scoreDoor = 0.010

	sign := dict.NewSign("../data/dictionary/sign.txt")
	stop := dict.NewSign("../data/dictionary/stopwords.txt")
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")

	article := util.ReadFile("../data/testdata/125.txt")
	allsegs := segment.SegmentDoc(article, sign, d)

	fmt.Println(len(allsegs))
	str := segment.GetSegmentStr(allsegs)

	util.WriteFile("../data/test-125.log", str)
	allsegs = occurrence.FilterSegment(allsegs, stop)

	//occur := occurrence.NewOccurrence()
	//occur.AddSegments(allsegs, stop)
	//occur.Compute()
	//occur.Output()

	pairTerms := occurrence.Merge(allsegs, freqDoor, scoreDoor)
	str = term.GetPairTermStr(pairTerms)
	util.WriteFile("../data/main-test-125-merge-merge.log", str)
}
예제 #2
0
func getWords(content string, ws *WordSetting) []*term.PairTerm {
	allsegs := segment.SegmentDoc(content, ws.signDict, ws.wordDict)

	//fmt.Println(len(allsegs))
	//str := segment.GetSegmentStr(allsegs)

	//util.WriteFile("../data/test-segment-125.log", str)
	allsegs = occurrence.FilterSegment(allsegs, ws.stopDict)
	pairTerms := occurrence.Merge(allsegs, ws.freqDoor, ws.scoreDoor)
	//str = term.GetPairTermStr(pairTerms)
	return pairTerms
}
예제 #3
0
func Test_Merge(t *testing.T) {
	sign := dict.NewSign("../data/dictionary/sign.txt")
	stop := dict.NewSign("../data/dictionary/stopwords.txt")
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")

	article := util.ReadFile("../data/testdata/125-2.txt")
	allsegs := segment.SegmentDoc(article, sign, d)
	allsegs = occurrence.FilterSegment(allsegs, stop)

	pairTerms := occurrence.Merge(allsegs, 4, 15.0)
	str := term.GetPairTermStr(pairTerms)
	util.WriteFile("../data/test-125-2-merge-merge.log", str)
}
예제 #4
0
func Test_Segment(t *testing.T) {
	article := util.ReadFile("../data/testdata/125-1.txt")
	sign := dict.NewSign("../data/dictionary/sign.txt")
	//sentences := SplitSentence([]rune(text), sign)
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")
	allsegs := segment.SegmentDoc(article, sign, d)

	fmt.Println(len(allsegs))
	final := ""
	for _, seg := range allsegs {
		final += seg.ToString()
	}

	final += fmt.Sprintf(": %d", len(allsegs))
	util.WriteFile("../data/docsegment_test_125-1.log", final)
}
예제 #5
0
func Test_Occurrence_Compute(t *testing.T) {
	//var segmenter sego.Segmenter
	//segmenter.LoadDictionary("C:/Go/thirdpartlib/src/github.com/huichen/sego/data/dictionary.txt")
	filename := "../data/testdata/125-2.txt"

	buf, err := ioutil.ReadFile(filename)
	if err != nil {
		fmt.Println(err)
		panic(err)
	}

	sign := dict.NewSign("../data/dictionary/sign.txt")
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")
	//stop := dict.NewSign("../data/dictionary/stopwords.txt.txt")
	//segments := segmenter.Segment(buf)
	segments := segment.SegmentDoc(string(buf), sign, d)
	fmt.Println(len(segments))
	LogSegments(segments)
	occur := occurrence.NewOccurrence()
	occur.AddSegments(segments, 3)
	occur.Compute(1)
	occur.Output()
}
예제 #6
0
func Test_SegmentMerger(t *testing.T) {
	sign := dict.NewSign("../data/dictionary/sign.txt")
	stop := dict.NewSign("../data/dictionary/stopwords.txt")
	d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt")

	article := util.ReadFile("../data/testdata/125-2.txt")
	allsegs := segment.SegmentDoc(article, sign, d)
	allsegs = occurrence.FilterSegment(allsegs, stop)
	fmt.Println(len(allsegs))
	str := segment.GetSegmentStr(allsegs)

	util.WriteFile("../data/test-125-2.log", str)

	occur := occurrence.NewOccurrence()
	occur.AddSegments(allsegs, 3)
	occur.Compute()
	occur.Output()

	pairTerms := occur.GetPairTerms(10.0)
	str = term.GetPairTermStr(pairTerms)
	util.WriteFile("../data/test-125-2-occur.log", str)

	newSegments := occurrence.MergeSegment(allsegs, pairTerms)
	fmt.Println(len(newSegments))
	str = segment.GetSegmentStr(newSegments)
	//fmt.Println(str)
	util.WriteFile("../data/test-125-2-merge.log", str)

	occur1 := occurrence.NewOccurrence()
	occur1.AddSegments(newSegments, 1)
	occur1.Compute()
	occur1.Output()
	pairTerms = occur1.GetPairTerms(10.0)
	str = term.GetPairTermStr(pairTerms)
	util.WriteFile("../data/test-125-2-second-merge.log", str)
}