Esempio n. 1
0
func main() {
	flag.Parse()

	if len(flag.Args()) != 2 {
		fmt.Printf("Usage: %s lexicon modelname\n", os.Args[0])
		os.Exit(1)
	}

	f, err := os.Open(flag.Arg(0))
	if err != nil {
		fmt.Printf("Could not open file: %s\n", flag.Arg(0))
		os.Exit(1)
	}

	r := bufio.NewReader(f)
	dict := word_classification.ReadDictionary(r)

	word_classification.FilterDictionary(dict, 6)

	problem, metadata := word_classification.ExtractFeatures(dict)

	param := golinear.DefaultParameters()

	model, err := golinear.TrainModel(param, problem)
	if err != nil {
		panic(err)
	}

	modelName := flag.Arg(1)

	err = model.Save(fmt.Sprintf("%s.model", modelName))
	if err != nil {
		panic(err)
	}

	bMetadata, err := json.Marshal(metadata)
	if err != nil {
		panic(err)
	}

	metadataFile, err := os.OpenFile(fmt.Sprintf("%s.metadata", modelName),
		os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
	if err != nil {
		log.Fatal(err)
	}
	defer metadataFile.Close()

	metadataFile.Write(bMetadata)

	//testPrefix := prefixes("Microsoft", 3)
	//features := stringFeatureToFeature(testPrefix, featureMapping, norm)

	//class := model.Predict(features)

	//numberTagMapping := reverseMapping(tagMapping)

	//fmt.Printf("Predicted class: %s\n", numberTagMapping[int(class)])
}
Esempio n. 2
0
func main() {
	if len(os.Args) != 3 {
		os.Exit(1)
	}

	modelBasename := os.Args[1]
	model, err := golinear.LoadModel(fmt.Sprintf("%s.model", modelBasename))
	if err != nil {
		log.Fatal(err)
	}

	metadata, err := loadMetadata(modelBasename)
	if err != nil {
		log.Fatal(err)
	}

	testDict := readDictionary(os.Args[2])
	word_classification.FilterDictionary(testDict, 6)

	indexToClass := reverseMapping(metadata.ClassMapping)

	total, correct, baseline := 0, 0, 0

	errorsPerClass := make(map[string]int)
	totalPerClass := make(map[string]int)

	for word, tagFreq := range testDict {
		sfs := word_classification.ApplyTemplates(word_classification.DefaultTemplates, word)
		fs := word_classification.StringFeatureToFeature(sfs, metadata.FeatureMapping, metadata.Normalizer)

		for tag, freq := range tagFreq {
			var i uint64
			for i = 0; i < freq; i++ {
				class := model.Predict(fs)

				predictedTag := indexToClass[int(class)]

				// Update counts
				if predictedTag == tag {
					correct++
				} else {
					errorsPerClass[predictedTag]++
				}

				if tag == "NN" {
					baseline++
				}

				total++
				totalPerClass[predictedTag]++
			}
		}
	}

	errors := total - correct
	for class, classErrors := range errorsPerClass {
		fmt.Printf("%s:\t\t%.2f%%\t%.2f%%\n", class,
			float64(classErrors)/float64(errors)*100,
			float64(classErrors)/float64(totalPerClass[class])*100)
	}

	//fmt.Printf("Correct classifications: %d, total: %d\n", correct, total)
	fmt.Printf("\nAccuracy: %.2f%%\n", float64(correct)/float64(total)*100)
	fmt.Printf("Baseline: %.2f%%\n", float64(baseline)/float64(total)*100)
}