func main() { flag.Parse() if len(flag.Args()) != 2 { fmt.Printf("Usage: %s lexicon modelname\n", os.Args[0]) os.Exit(1) } f, err := os.Open(flag.Arg(0)) if err != nil { fmt.Printf("Could not open file: %s\n", flag.Arg(0)) os.Exit(1) } r := bufio.NewReader(f) dict := word_classification.ReadDictionary(r) word_classification.FilterDictionary(dict, 6) problem, metadata := word_classification.ExtractFeatures(dict) param := golinear.DefaultParameters() model, err := golinear.TrainModel(param, problem) if err != nil { panic(err) } modelName := flag.Arg(1) err = model.Save(fmt.Sprintf("%s.model", modelName)) if err != nil { panic(err) } bMetadata, err := json.Marshal(metadata) if err != nil { panic(err) } metadataFile, err := os.OpenFile(fmt.Sprintf("%s.metadata", modelName), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) if err != nil { log.Fatal(err) } defer metadataFile.Close() metadataFile.Write(bMetadata) //testPrefix := prefixes("Microsoft", 3) //features := stringFeatureToFeature(testPrefix, featureMapping, norm) //class := model.Predict(features) //numberTagMapping := reverseMapping(tagMapping) //fmt.Printf("Predicted class: %s\n", numberTagMapping[int(class)]) }
func main() { if len(os.Args) != 3 { os.Exit(1) } modelBasename := os.Args[1] model, err := golinear.LoadModel(fmt.Sprintf("%s.model", modelBasename)) if err != nil { log.Fatal(err) } metadata, err := loadMetadata(modelBasename) if err != nil { log.Fatal(err) } testDict := readDictionary(os.Args[2]) word_classification.FilterDictionary(testDict, 6) indexToClass := reverseMapping(metadata.ClassMapping) total, correct, baseline := 0, 0, 0 errorsPerClass := make(map[string]int) totalPerClass := make(map[string]int) for word, tagFreq := range testDict { sfs := word_classification.ApplyTemplates(word_classification.DefaultTemplates, word) fs := word_classification.StringFeatureToFeature(sfs, metadata.FeatureMapping, metadata.Normalizer) for tag, freq := range tagFreq { var i uint64 for i = 0; i < freq; i++ { class := model.Predict(fs) predictedTag := indexToClass[int(class)] // Update counts if predictedTag == tag { correct++ } else { errorsPerClass[predictedTag]++ } if tag == "NN" { baseline++ } total++ totalPerClass[predictedTag]++ } } } errors := total - correct for class, classErrors := range errorsPerClass { fmt.Printf("%s:\t\t%.2f%%\t%.2f%%\n", class, float64(classErrors)/float64(errors)*100, float64(classErrors)/float64(totalPerClass[class])*100) } //fmt.Printf("Correct classifications: %d, total: %d\n", correct, total) fmt.Printf("\nAccuracy: %.2f%%\n", float64(correct)/float64(total)*100) fmt.Printf("Baseline: %.2f%%\n", float64(baseline)/float64(total)*100) }