func mainMerge() { var freqDoor int var scoreDoor float32 freqDoor = 6 scoreDoor = 0.010 sign := dict.NewSign("../data/dictionary/sign.txt") stop := dict.NewSign("../data/dictionary/stopwords.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") article := util.ReadFile("../data/testdata/125.txt") allsegs := segment.SegmentDoc(article, sign, d) fmt.Println(len(allsegs)) str := segment.GetSegmentStr(allsegs) util.WriteFile("../data/test-125.log", str) allsegs = occurrence.FilterSegment(allsegs, stop) //occur := occurrence.NewOccurrence() //occur.AddSegments(allsegs, stop) //occur.Compute() //occur.Output() pairTerms := occurrence.Merge(allsegs, freqDoor, scoreDoor) str = term.GetPairTermStr(pairTerms) util.WriteFile("../data/main-test-125-merge-merge.log", str) }
func getWords(content string, ws *WordSetting) []*term.PairTerm { allsegs := segment.SegmentDoc(content, ws.signDict, ws.wordDict) //fmt.Println(len(allsegs)) //str := segment.GetSegmentStr(allsegs) //util.WriteFile("../data/test-segment-125.log", str) allsegs = occurrence.FilterSegment(allsegs, ws.stopDict) pairTerms := occurrence.Merge(allsegs, ws.freqDoor, ws.scoreDoor) //str = term.GetPairTermStr(pairTerms) return pairTerms }
func Test_Merge(t *testing.T) { sign := dict.NewSign("../data/dictionary/sign.txt") stop := dict.NewSign("../data/dictionary/stopwords.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") article := util.ReadFile("../data/testdata/125-2.txt") allsegs := segment.SegmentDoc(article, sign, d) allsegs = occurrence.FilterSegment(allsegs, stop) pairTerms := occurrence.Merge(allsegs, 4, 15.0) str := term.GetPairTermStr(pairTerms) util.WriteFile("../data/test-125-2-merge-merge.log", str) }
func Test_SegmentMerger(t *testing.T) { sign := dict.NewSign("../data/dictionary/sign.txt") stop := dict.NewSign("../data/dictionary/stopwords.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") article := util.ReadFile("../data/testdata/125-2.txt") allsegs := segment.SegmentDoc(article, sign, d) allsegs = occurrence.FilterSegment(allsegs, stop) fmt.Println(len(allsegs)) str := segment.GetSegmentStr(allsegs) util.WriteFile("../data/test-125-2.log", str) occur := occurrence.NewOccurrence() occur.AddSegments(allsegs, 3) occur.Compute() occur.Output() pairTerms := occur.GetPairTerms(10.0) str = term.GetPairTermStr(pairTerms) util.WriteFile("../data/test-125-2-occur.log", str) newSegments := occurrence.MergeSegment(allsegs, pairTerms) fmt.Println(len(newSegments)) str = segment.GetSegmentStr(newSegments) //fmt.Println(str) util.WriteFile("../data/test-125-2-merge.log", str) occur1 := occurrence.NewOccurrence() occur1.AddSegments(newSegments, 1) occur1.Compute() occur1.Output() pairTerms = occur1.GetPairTerms(10.0) str = term.GetPairTermStr(pairTerms) util.WriteFile("../data/test-125-2-second-merge.log", str) }