func (s *SentimentSuite) TestNewAnalyzer(c *C) { analyzer := NewAnalyzer() learned := analyzer.classifier.Learned() c.Assert(analyzer.classifier, FitsTypeOf, bayesian.NewClassifier(Positive, Negative, Neutral)) c.Assert(learned, Not(Equals), 0) }
// trainClassifiers trains classifiers for each plugin. func trainClassifiers() error { for _, pconf := range PluginsGo { ss, err := fetchTrainingSentences(pconf.ID, pconf.Name) if err != nil { return err } // Assemble list of Bayesian classes from all trained intents // for this plugin. m is used to keep track of the classes // already taught to each classifier. m := map[string]struct{}{} for _, s := range ss { _, ok := m[s.Intent] if ok { continue } log.Debug("learning intent", s.Intent) m[s.Intent] = struct{}{} pluginIntents[s.PluginID] = append(pluginIntents[s.PluginID], bayesian.Class(s.Intent)) } // Build classifier from complete sets of intents for _, s := range ss { intents := pluginIntents[s.PluginID] // Calling bayesian.NewClassifier() with 0 or 1 // classes causes a panic. if len(intents) == 0 { break } if len(intents) == 1 { intents = append(intents, bayesian.Class("__no_intent")) } c := bayesian.NewClassifier(intents...) bClassifiers[s.PluginID] = c } // With classifiers initialized, train each of them on a // sentence's stems. for _, s := range ss { tokens := TokenizeSentence(s.Sentence) stems := StemTokens(tokens) c, exists := bClassifiers[s.PluginID] if exists { c.Learn(stems, bayesian.Class(s.Intent)) } } } return nil }
func main() { const ( Positive bayesian.Class = "Positive" Negative bayesian.Class = "Negative" ) stopWords := createMapOfStopWord("stopwords.txt") classifier := bayesian.NewClassifier(Positive, Negative) goodStuff := removeStopWords(stopWords, convFileToListWords("training-1.txt")) badStuff := removeStopWords(stopWords, convFileToListWords("training-0.txt")) classifier.Learn(goodStuff, Positive) classifier.Learn(badStuff, Negative) classifier.WriteToFile("classifier.gob") }
func (scanner *Scanner) LoadOrCreate() { classifier, err := bayesian.NewClassifierFromFile(scanner.BayesianFile()) if err != nil { if os.IsNotExist(err) { log.Printf("%s does not exist. Creating db\n", scanner.BayesianFile()) classifier = bayesian.NewClassifier(BAYESIAN_CLASSES...) } } cat, err := catalog.NewCatalogFromFile(scanner.CatalogFile()) if err != nil { if os.IsNotExist(err) { cat = &catalog.Catalog{Filename: scanner.CatalogFile(), Files: make([]uint32, 0)} } } scanner.classifier = classifier scanner.catalog = cat }
// Sets up and trains a new analyzer to classify sentiment func NewAnalyzer() Analyzer { a := Analyzer{} // Get the training data if not present _, err := os.Stat(DATA_FILE) if err != nil { if os.IsNotExist(err) { a.downloadDataSet() } } c, err := bayesian.NewClassifierFromFile(DATA_FILE) if err == nil { a.classifier = c } else { // Note: Nothing will be trained at this point, but we'll still have a classifier that can be trained a.classifier = bayesian.NewClassifier(Positive, Negative, Neutral) } return a }
func main() { const ( Good bayesian.Class = "Good" Bad bayesian.Class = "Bad" ) classifier := bayesian.NewClassifier(Good, Bad) goodStuff := []string{"tall", "rich", "handsome"} badStuff := []string{"poor", "smelly", "ugly"} classifier.Learn(goodStuff, Good) classifier.Learn(badStuff, Bad) scores, likely, _ := classifier.LogScores( []string{"tall", "girl"}, ) fmt.Printf("Score: %v, likely: %v\n", scores, likely) probs, likely, _ := classifier.ProbScores( []string{"tall", "girl"}, ) fmt.Printf("Probs: %v, likely: %v\n", probs, likely) }
func main() { var ledgerFileName string var accountSubstring, csvFileName, csvDateFormat string var negateAmount bool var fieldDelimiter string flag.BoolVar(&negateAmount, "neg", false, "Negate amount column value.") flag.StringVar(&ledgerFileName, "f", "", "Ledger file name (*Required).") flag.StringVar(&csvDateFormat, "date-format", "01/02/2006", "Date format.") flag.StringVar(&fieldDelimiter, "delimiter", ",", "Field delimiter.") flag.Parse() args := flag.Args() if len(args) != 2 { usage() } else { accountSubstring = args[0] csvFileName = args[1] } csvFileReader, err := os.Open(csvFileName) if err != nil { fmt.Println("CSV: ", err) return } defer csvFileReader.Close() ledgerFileReader, err := os.Open(ledgerFileName) if err != nil { fmt.Println("Ledger: ", err) return } defer ledgerFileReader.Close() generalLedger, parseError := ledger.ParseLedger(ledgerFileReader) if parseError != nil { fmt.Println(parseError) return } var matchingAccount string matchingAccounts := ledger.GetBalances(generalLedger, []string{accountSubstring}) if len(matchingAccounts) < 1 { fmt.Println("Unable to find matching account.") return } else { matchingAccount = matchingAccounts[len(matchingAccounts)-1].Name } allAccounts := ledger.GetBalances(generalLedger, []string{}) csvReader := csv.NewReader(csvFileReader) csvReader.Comma, _ = utf8.DecodeRuneInString(fieldDelimiter) csvRecords, _ := csvReader.ReadAll() classes := make([]bayesian.Class, len(allAccounts)) for i, bal := range allAccounts { classes[i] = bayesian.Class(bal.Name) } classifier := bayesian.NewClassifier(classes...) for _, tran := range generalLedger { payeeWords := strings.Split(tran.Payee, " ") for _, accChange := range tran.AccountChanges { if strings.Contains(accChange.Name, "Expense") { classifier.Learn(payeeWords, bayesian.Class(accChange.Name)) } } } // Find columns from header var dateColumn, payeeColumn, amountColumn int dateColumn, payeeColumn, amountColumn = -1, -1, -1 for fieldIndex, fieldName := range csvRecords[0] { fieldName = strings.ToLower(fieldName) if strings.Contains(fieldName, "date") { dateColumn = fieldIndex } else if strings.Contains(fieldName, "description") { payeeColumn = fieldIndex } else if strings.Contains(fieldName, "payee") { payeeColumn = fieldIndex } else if strings.Contains(fieldName, "amount") { amountColumn = fieldIndex } else if strings.Contains(fieldName, "expense") { amountColumn = fieldIndex } } if dateColumn < 0 || payeeColumn < 0 || amountColumn < 0 { fmt.Println("Unable to find columns required from header field names.") return } expenseAccount := ledger.Account{Name: "unknown:unknown", Balance: new(big.Rat)} csvAccount := ledger.Account{Name: matchingAccount, Balance: new(big.Rat)} for _, record := range csvRecords[1:] { inputPayeeWords := strings.Split(record[payeeColumn], " ") csvDate, _ := time.Parse(csvDateFormat, record[dateColumn]) if !existingTransaction(generalLedger, csvDate, inputPayeeWords[0]) { // Classify into expense account _, likely, _ := classifier.LogScores(inputPayeeWords) if likely >= 0 { expenseAccount.Name = string(classifier.Classes[likely]) } // Negate amount if required expenseAccount.Balance.SetString(record[amountColumn]) if negateAmount { expenseAccount.Balance.Neg(expenseAccount.Balance) } // Csv amount is the negative of the expense amount csvAccount.Balance.Neg(expenseAccount.Balance) // Create valid transaction for print in ledger format trans := &ledger.Transaction{Date: csvDate, Payee: record[payeeColumn]} trans.AccountChanges = []ledger.Account{csvAccount, expenseAccount} PrintTransaction(trans, 80) } } }
func NewClassifier() *bayesian.Classifier { return bayesian.NewClassifier(Boy, Girl, None) }
func main() { anaconda.SetConsumerKey("") anaconda.SetConsumerSecret("") api := anaconda.NewTwitterApi("", "") t := time.NewTicker(time.Second * 10) l := fmt.Sprintf("%d", time.Now().UnixNano()) te := bayesian.Class("tech") ot := bayesian.Class("other") fparts := strings.Split(*files, ",") b, err := ioutil.ReadFile(fparts[0]) if err != nil { panic(err.Error()) } words := strings.Split(string(b), "\n") c := bayesian.NewClassifier(te, ot) c.Learn(words, te) posts := make(chan string, 10) post := time.NewTicker(time.Minute * 30) var cur []string var max float64 var idx int go func() { for { select { case <-post.C: idx = -1 max = -1000.0 for i, p := range cur { parts := strings.Split(strings.ToLower(p), " ") scores, class, _ := c.LogScores(parts) if class != 0 { continue } if len(scores) == 0 { continue } if scores[0] > max { max = scores[0] idx = i } } if idx >= 0 { api.PostTweet(cur[idx], url.Values{}) } cur = []string{} case p := <-posts: cur = append(cur, p) } } }() for _ = range t.C { r, err := http.Get("http://127.0.0.1:8889/objects?stream=tech&last=" + l) if err != nil { fmt.Println(err) continue } b, err := ioutil.ReadAll(r.Body) r.Body.Close() if err != nil { fmt.Println(err) continue } var objects []Object err = json.Unmarshal(b, &objects) if err != nil { fmt.Println(err) continue } for _, object := range objects { text := object.Text parts := strings.Split(object.Text, " ") for _, part := range parts { m := getMetadata(part) if m != nil && len(m.Title) > 0 && len(m.Url) > 0 { text = m.Title + " " + m.Url } } posts <- text } if len(objects) > 0 { l = fmt.Sprintf("%d", objects[len(objects)-1].Created) } } }