コード例 #1
0
ファイル: chinese_segmenter.go プロジェクト: postfix/LanModel
func evaluateSegmenter() {
	//cedict, err := LoadCEDict(cedict_path, cedict_key_type)
	//if err != nil {
	//	t.Fatalf("Failed to load CEDict[%s]: %s", cedict_path, err)
	//}
	model, err := ngram_model.LoadNGramModel(*unigramModel, *bigramModel)
	if err != nil {
		fmt.Printf("Failed to load model[%s,%s]: %s", *unigramModel, *bigramModel, err)
		return
	}
	segmenter := seg.NewSegmenter(nil, model)
	converter := util.NewUtf8Converter(*corpusCharSet)
	err = util.ForEachLineInFile(*corpus, func(line string) (bool, error) {
		line = converter.ConvertString(strings.Trim(line, " \t\n\r\f"))
		sample := strings.Replace(line, " ", "", -1)
		exp_result := strings.Split(line, " ")
		result, _ := segmenter.Segment(sample)

		is_eqv := len(result) == len(exp_result)
		for i, r := range result {
			if r != exp_result[i] {
				is_eqv = false
				break
			}
		}
		if !is_eqv {
			fmt.Printf("Segment(%s) expect result to be:\n%v\nbut got:\n%v\n\n",
				sample, exp_result, result)
		}
		return true, nil
	})
	if err != nil {
		fmt.Printf("Error encountered when attempting to evaluate segmenter: %s", err)
	}
}
コード例 #2
0
ファイル: language_model.go プロジェクト: postfix/LanModel
func evaluateNGramModel() {
	model, err := ngram_model.LoadNGramModel(*unigramModel, *bigramModel)
	if err != nil {
		log.Printf("Failed to load NGram model: %s", err)
	}
	corpusSupplier := ngram_model.NewSegCNCorpus("Big5")
	err = (&corpusSupplier).Load(*corpus)
	if err != nil {
		log.Printf("Failed to load Corpus [%s]: %s", *corpus, err)
	} else {
		predictor := ngram_model.NewSimpleUnigramPredictor(model)
		perplexity := ngram_model.Perplexity(predictor, corpusSupplier)
		fmt.Printf("Unigram Model Perplexity: %f\n", perplexity)
		bigram_predictor := ngram_model.NewSimpleBigramPredictor(model)
		bigram_perplexity := ngram_model.Perplexity(bigram_predictor, corpusSupplier)
		fmt.Printf("Bigram Model Perplexity: %f\n", bigram_perplexity)
	}
}
コード例 #3
0
ファイル: chinese_segmenter.go プロジェクト: postfix/LanModel
func doSegment() {
	model, err := ngram_model.LoadNGramModel(*unigramModel, *bigramModel)
	if err != nil {
		fmt.Printf("Failed to load model[%s,%s]: %s", *unigramModel, *bigramModel, err)
		return
	}
	segmenter := seg.NewSegmenter(nil, model)
	reader := bufio.NewReader(os.Stdin)
	for {
		line, has_more, err := reader.ReadLine()
		if err == nil {
			clean_line := strings.Trim(string(line), " \t\n\r\f")
			if len(line) > 0 {
				result, ok := segmenter.Segment(clean_line)
				if ok == nil {
					fmt.Printf("%v\n", result)
				}
			}
		}
		if !has_more {
			break
		}
	}
}