func (c *SvmClassifier) TrainClassifier(trainDataSetFile1 string, trainDataSetFile2 string) error { // Perform training fmt.Println("Start Training") start := time.Now() // train the model trainingData1, err := c.loadTrainDataSet(trainDataSetFile1, 2, 3) if err != nil { return err } trainingData2, err := c.loadTrainDataSet(trainDataSetFile2, 2, 3) if err != nil { return err } err = c.createBagOfWords(5, 800, trainingData1, trainingData2) if err != nil { return err } problem := gosvm.NewProblem() // We use all features from the bagofWords for _, val := range trainingData1 { problem.Add(gosvm.TrainingInstance{float64(val.sentimentLabel), gosvm.FromDenseVector(c.createFeatureVector(val.text))}) } for _, val := range trainingData2 { problem.Add(gosvm.TrainingInstance{float64(val.sentimentLabel), gosvm.FromDenseVector(c.createFeatureVector(val.text))}) } param := gosvm.DefaultParameters() param.Kernel = gosvm.NewLinearKernel() param.SVMType = gosvm.NewCSVC(0.1) c.model, err = gosvm.TrainModel(param, problem) if err != nil { return err } elapsed := time.Now().Sub(start) fmt.Println(elapsed) fmt.Println("Training finished!") err = c.model.Save("svm_model") fmt.Println("Svm model saved!") return err }
func (c *SvmClassifier) TestClassifier(testDataSetFile string) error { fmt.Println("Start Testing") start := time.Now() // test the model testData, err := c.loadTestDataSet(testDataSetFile, 2, 3) if err != nil { return err } flailCounter := 0 counter := 0 for _, val := range testData { label := c.model.Predict(gosvm.FromDenseVector(c.createFeatureVector(val.text))) if int(label) != val.sentimentLabel { flailCounter++ } counter++ } // print error rate fmt.Println("Error:", float64(flailCounter)/float64(counter)) elapsed := time.Now().Sub(start) fmt.Println(elapsed) fmt.Println("Test finished!") return nil }
func (c *SvmClassifier) ClassifyTweet(tweet string) float64 { label := c.model.Predict(gosvm.FromDenseVector(c.createFeatureVector(tweet))) return label }