func mainMerge() { var freqDoor int var scoreDoor float32 freqDoor = 6 scoreDoor = 0.010 sign := dict.NewSign("../data/dictionary/sign.txt") stop := dict.NewSign("../data/dictionary/stopwords.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") article := util.ReadFile("../data/testdata/125.txt") allsegs := segment.SegmentDoc(article, sign, d) fmt.Println(len(allsegs)) str := segment.GetSegmentStr(allsegs) util.WriteFile("../data/test-125.log", str) allsegs = occurrence.FilterSegment(allsegs, stop) //occur := occurrence.NewOccurrence() //occur.AddSegments(allsegs, stop) //occur.Compute() //occur.Output() pairTerms := occurrence.Merge(allsegs, freqDoor, scoreDoor) str = term.GetPairTermStr(pairTerms) util.WriteFile("../data/main-test-125-merge-merge.log", str) }
func getWords(content string, ws *WordSetting) []*term.PairTerm { allsegs := segment.SegmentDoc(content, ws.signDict, ws.wordDict) //fmt.Println(len(allsegs)) //str := segment.GetSegmentStr(allsegs) //util.WriteFile("../data/test-segment-125.log", str) allsegs = occurrence.FilterSegment(allsegs, ws.stopDict) pairTerms := occurrence.Merge(allsegs, ws.freqDoor, ws.scoreDoor) //str = term.GetPairTermStr(pairTerms) return pairTerms }
func Test_Merge(t *testing.T) { sign := dict.NewSign("../data/dictionary/sign.txt") stop := dict.NewSign("../data/dictionary/stopwords.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") article := util.ReadFile("../data/testdata/125-2.txt") allsegs := segment.SegmentDoc(article, sign, d) allsegs = occurrence.FilterSegment(allsegs, stop) pairTerms := occurrence.Merge(allsegs, 4, 15.0) str := term.GetPairTermStr(pairTerms) util.WriteFile("../data/test-125-2-merge-merge.log", str) }
func Test_Segment(t *testing.T) { article := util.ReadFile("../data/testdata/125-1.txt") sign := dict.NewSign("../data/dictionary/sign.txt") //sentences := SplitSentence([]rune(text), sign) d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") allsegs := segment.SegmentDoc(article, sign, d) fmt.Println(len(allsegs)) final := "" for _, seg := range allsegs { final += seg.ToString() } final += fmt.Sprintf(": %d", len(allsegs)) util.WriteFile("../data/docsegment_test_125-1.log", final) }
func Test_Occurrence_Compute(t *testing.T) { //var segmenter sego.Segmenter //segmenter.LoadDictionary("C:/Go/thirdpartlib/src/github.com/huichen/sego/data/dictionary.txt") filename := "../data/testdata/125-2.txt" buf, err := ioutil.ReadFile(filename) if err != nil { fmt.Println(err) panic(err) } sign := dict.NewSign("../data/dictionary/sign.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") //stop := dict.NewSign("../data/dictionary/stopwords.txt.txt") //segments := segmenter.Segment(buf) segments := segment.SegmentDoc(string(buf), sign, d) fmt.Println(len(segments)) LogSegments(segments) occur := occurrence.NewOccurrence() occur.AddSegments(segments, 3) occur.Compute(1) occur.Output() }
func Test_SegmentMerger(t *testing.T) { sign := dict.NewSign("../data/dictionary/sign.txt") stop := dict.NewSign("../data/dictionary/stopwords.txt") d := dict.NewDictionary("../data/dictionary/sogoudictionary.txt") article := util.ReadFile("../data/testdata/125-2.txt") allsegs := segment.SegmentDoc(article, sign, d) allsegs = occurrence.FilterSegment(allsegs, stop) fmt.Println(len(allsegs)) str := segment.GetSegmentStr(allsegs) util.WriteFile("../data/test-125-2.log", str) occur := occurrence.NewOccurrence() occur.AddSegments(allsegs, 3) occur.Compute() occur.Output() pairTerms := occur.GetPairTerms(10.0) str = term.GetPairTermStr(pairTerms) util.WriteFile("../data/test-125-2-occur.log", str) newSegments := occurrence.MergeSegment(allsegs, pairTerms) fmt.Println(len(newSegments)) str = segment.GetSegmentStr(newSegments) //fmt.Println(str) util.WriteFile("../data/test-125-2-merge.log", str) occur1 := occurrence.NewOccurrence() occur1.AddSegments(newSegments, 1) occur1.Compute() occur1.Output() pairTerms = occur1.GetPairTerms(10.0) str = term.GetPairTermStr(pairTerms) util.WriteFile("../data/test-125-2-second-merge.log", str) }