func mainMerge() { var freqDoor int var scoreDoor float32 freqDoor = 6 scoreDoor = 0.010 sign := dict.NewSign("../data/dictionary/sign.txt") stop := dict.NewSign("../data/dictionary/stopwords.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") article := util.ReadFile("../data/testdata/125.txt") allsegs := segment.SegmentDoc(article, sign, d) fmt.Println(len(allsegs)) str := segment.GetSegmentStr(allsegs) util.WriteFile("../data/test-125.log", str) allsegs = occurrence.FilterSegment(allsegs, stop) //occur := occurrence.NewOccurrence() //occur.AddSegments(allsegs, stop) //occur.Compute() //occur.Output() pairTerms := occurrence.Merge(allsegs, freqDoor, scoreDoor) str = term.GetPairTermStr(pairTerms) util.WriteFile("../data/main-test-125-merge-merge.log", str) }
func getWords(content string, ws *WordSetting) []*term.PairTerm { allsegs := segment.SegmentDoc(content, ws.signDict, ws.wordDict) //fmt.Println(len(allsegs)) //str := segment.GetSegmentStr(allsegs) //util.WriteFile("../data/test-segment-125.log", str) allsegs = occurrence.FilterSegment(allsegs, ws.stopDict) pairTerms := occurrence.Merge(allsegs, ws.freqDoor, ws.scoreDoor) //str = term.GetPairTermStr(pairTerms) return pairTerms }
func Test_Merge(t *testing.T) { sign := dict.NewSign("../data/dictionary/sign.txt") stop := dict.NewSign("../data/dictionary/stopwords.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") article := util.ReadFile("../data/testdata/125-2.txt") allsegs := segment.SegmentDoc(article, sign, d) allsegs = occurrence.FilterSegment(allsegs, stop) pairTerms := occurrence.Merge(allsegs, 4, 15.0) str := term.GetPairTermStr(pairTerms) util.WriteFile("../data/test-125-2-merge-merge.log", str) }