Beispiel #1
0
func (s *SentimentSuite) TestNewAnalyzer(c *C) {
	analyzer := NewAnalyzer()
	learned := analyzer.classifier.Learned()

	c.Assert(analyzer.classifier, FitsTypeOf, bayesian.NewClassifier(Positive, Negative, Neutral))
	c.Assert(learned, Not(Equals), 0)
}
Beispiel #2
0
// trainClassifiers trains classifiers for each plugin.
func trainClassifiers() error {
	for _, pconf := range PluginsGo {
		ss, err := fetchTrainingSentences(pconf.ID, pconf.Name)
		if err != nil {
			return err
		}

		// Assemble list of Bayesian classes from all trained intents
		// for this plugin. m is used to keep track of the classes
		// already taught to each classifier.
		m := map[string]struct{}{}
		for _, s := range ss {
			_, ok := m[s.Intent]
			if ok {
				continue
			}
			log.Debug("learning intent", s.Intent)
			m[s.Intent] = struct{}{}
			pluginIntents[s.PluginID] = append(pluginIntents[s.PluginID],
				bayesian.Class(s.Intent))
		}

		// Build classifier from complete sets of intents
		for _, s := range ss {
			intents := pluginIntents[s.PluginID]
			// Calling bayesian.NewClassifier() with 0 or 1
			// classes causes a panic.
			if len(intents) == 0 {
				break
			}
			if len(intents) == 1 {
				intents = append(intents, bayesian.Class("__no_intent"))
			}
			c := bayesian.NewClassifier(intents...)
			bClassifiers[s.PluginID] = c
		}

		// With classifiers initialized, train each of them on a
		// sentence's stems.
		for _, s := range ss {
			tokens := TokenizeSentence(s.Sentence)
			stems := StemTokens(tokens)
			c, exists := bClassifiers[s.PluginID]
			if exists {
				c.Learn(stems, bayesian.Class(s.Intent))
			}
		}
	}
	return nil
}
Beispiel #3
0
func main() {

	const (
		Positive bayesian.Class = "Positive"
		Negative bayesian.Class = "Negative"
	)

	stopWords := createMapOfStopWord("stopwords.txt")
	classifier := bayesian.NewClassifier(Positive, Negative)
	goodStuff := removeStopWords(stopWords, convFileToListWords("training-1.txt"))
	badStuff := removeStopWords(stopWords, convFileToListWords("training-0.txt"))
	classifier.Learn(goodStuff, Positive)
	classifier.Learn(badStuff, Negative)
	classifier.WriteToFile("classifier.gob")
}
Beispiel #4
0
func (scanner *Scanner) LoadOrCreate() {
	classifier, err := bayesian.NewClassifierFromFile(scanner.BayesianFile())

	if err != nil {
		if os.IsNotExist(err) {
			log.Printf("%s does not exist. Creating db\n", scanner.BayesianFile())
			classifier = bayesian.NewClassifier(BAYESIAN_CLASSES...)
		}
	}

	cat, err := catalog.NewCatalogFromFile(scanner.CatalogFile())

	if err != nil {
		if os.IsNotExist(err) {
			cat = &catalog.Catalog{Filename: scanner.CatalogFile(), Files: make([]uint32, 0)}
		}
	}
	scanner.classifier = classifier
	scanner.catalog = cat
}
Beispiel #5
0
// Sets up and trains a new analyzer to classify sentiment
func NewAnalyzer() Analyzer {
	a := Analyzer{}

	// Get the training data if not present
	_, err := os.Stat(DATA_FILE)
	if err != nil {
		if os.IsNotExist(err) {
			a.downloadDataSet()
		}
	}

	c, err := bayesian.NewClassifierFromFile(DATA_FILE)
	if err == nil {
		a.classifier = c
	} else {
		// Note: Nothing will be trained at this point, but we'll still have a classifier that can be trained
		a.classifier = bayesian.NewClassifier(Positive, Negative, Neutral)
	}

	return a
}
Beispiel #6
0
func main() {
	const (
		Good bayesian.Class = "Good"
		Bad  bayesian.Class = "Bad"
	)

	classifier := bayesian.NewClassifier(Good, Bad)
	goodStuff := []string{"tall", "rich", "handsome"}
	badStuff := []string{"poor", "smelly", "ugly"}
	classifier.Learn(goodStuff, Good)
	classifier.Learn(badStuff, Bad)

	scores, likely, _ := classifier.LogScores(
		[]string{"tall", "girl"},
	)
	fmt.Printf("Score: %v, likely: %v\n", scores, likely)

	probs, likely, _ := classifier.ProbScores(
		[]string{"tall", "girl"},
	)
	fmt.Printf("Probs: %v, likely: %v\n", probs, likely)

}
Beispiel #7
0
func main() {
	var ledgerFileName string
	var accountSubstring, csvFileName, csvDateFormat string
	var negateAmount bool
	var fieldDelimiter string

	flag.BoolVar(&negateAmount, "neg", false, "Negate amount column value.")
	flag.StringVar(&ledgerFileName, "f", "", "Ledger file name (*Required).")
	flag.StringVar(&csvDateFormat, "date-format", "01/02/2006", "Date format.")
	flag.StringVar(&fieldDelimiter, "delimiter", ",", "Field delimiter.")
	flag.Parse()

	args := flag.Args()
	if len(args) != 2 {
		usage()
	} else {
		accountSubstring = args[0]
		csvFileName = args[1]
	}

	csvFileReader, err := os.Open(csvFileName)
	if err != nil {
		fmt.Println("CSV: ", err)
		return
	}
	defer csvFileReader.Close()

	ledgerFileReader, err := os.Open(ledgerFileName)
	if err != nil {
		fmt.Println("Ledger: ", err)
		return
	}
	defer ledgerFileReader.Close()

	generalLedger, parseError := ledger.ParseLedger(ledgerFileReader)
	if parseError != nil {
		fmt.Println(parseError)
		return
	}

	var matchingAccount string
	matchingAccounts := ledger.GetBalances(generalLedger, []string{accountSubstring})
	if len(matchingAccounts) < 1 {
		fmt.Println("Unable to find matching account.")
		return
	} else {
		matchingAccount = matchingAccounts[len(matchingAccounts)-1].Name
	}

	allAccounts := ledger.GetBalances(generalLedger, []string{})

	csvReader := csv.NewReader(csvFileReader)
	csvReader.Comma, _ = utf8.DecodeRuneInString(fieldDelimiter)
	csvRecords, _ := csvReader.ReadAll()

	classes := make([]bayesian.Class, len(allAccounts))
	for i, bal := range allAccounts {
		classes[i] = bayesian.Class(bal.Name)
	}
	classifier := bayesian.NewClassifier(classes...)
	for _, tran := range generalLedger {
		payeeWords := strings.Split(tran.Payee, " ")
		for _, accChange := range tran.AccountChanges {
			if strings.Contains(accChange.Name, "Expense") {
				classifier.Learn(payeeWords, bayesian.Class(accChange.Name))
			}
		}
	}

	// Find columns from header
	var dateColumn, payeeColumn, amountColumn int
	dateColumn, payeeColumn, amountColumn = -1, -1, -1
	for fieldIndex, fieldName := range csvRecords[0] {
		fieldName = strings.ToLower(fieldName)
		if strings.Contains(fieldName, "date") {
			dateColumn = fieldIndex
		} else if strings.Contains(fieldName, "description") {
			payeeColumn = fieldIndex
		} else if strings.Contains(fieldName, "payee") {
			payeeColumn = fieldIndex
		} else if strings.Contains(fieldName, "amount") {
			amountColumn = fieldIndex
		} else if strings.Contains(fieldName, "expense") {
			amountColumn = fieldIndex
		}
	}

	if dateColumn < 0 || payeeColumn < 0 || amountColumn < 0 {
		fmt.Println("Unable to find columns required from header field names.")
		return
	}

	expenseAccount := ledger.Account{Name: "unknown:unknown", Balance: new(big.Rat)}
	csvAccount := ledger.Account{Name: matchingAccount, Balance: new(big.Rat)}
	for _, record := range csvRecords[1:] {
		inputPayeeWords := strings.Split(record[payeeColumn], " ")
		csvDate, _ := time.Parse(csvDateFormat, record[dateColumn])
		if !existingTransaction(generalLedger, csvDate, inputPayeeWords[0]) {
			// Classify into expense account
			_, likely, _ := classifier.LogScores(inputPayeeWords)
			if likely >= 0 {
				expenseAccount.Name = string(classifier.Classes[likely])
			}

			// Negate amount if required
			expenseAccount.Balance.SetString(record[amountColumn])
			if negateAmount {
				expenseAccount.Balance.Neg(expenseAccount.Balance)
			}

			// Csv amount is the negative of the expense amount
			csvAccount.Balance.Neg(expenseAccount.Balance)

			// Create valid transaction for print in ledger format
			trans := &ledger.Transaction{Date: csvDate, Payee: record[payeeColumn]}
			trans.AccountChanges = []ledger.Account{csvAccount, expenseAccount}
			PrintTransaction(trans, 80)
		}
	}
}
Beispiel #8
0
func NewClassifier() *bayesian.Classifier {
	return bayesian.NewClassifier(Boy, Girl, None)
}
Beispiel #9
0
func main() {
	anaconda.SetConsumerKey("")
	anaconda.SetConsumerSecret("")
	api := anaconda.NewTwitterApi("", "")

	t := time.NewTicker(time.Second * 10)
	l := fmt.Sprintf("%d", time.Now().UnixNano())

	te := bayesian.Class("tech")
	ot := bayesian.Class("other")

	fparts := strings.Split(*files, ",")
	b, err := ioutil.ReadFile(fparts[0])
	if err != nil {
		panic(err.Error())
	}
	words := strings.Split(string(b), "\n")
	c := bayesian.NewClassifier(te, ot)
	c.Learn(words, te)

	posts := make(chan string, 10)
	post := time.NewTicker(time.Minute * 30)
	var cur []string
	var max float64
	var idx int

	go func() {
		for {
			select {
			case <-post.C:
				idx = -1
				max = -1000.0

				for i, p := range cur {
					parts := strings.Split(strings.ToLower(p), " ")
					scores, class, _ := c.LogScores(parts)

					if class != 0 {
						continue
					}

					if len(scores) == 0 {
						continue
					}

					if scores[0] > max {
						max = scores[0]
						idx = i
					}
				}

				if idx >= 0 {
					api.PostTweet(cur[idx], url.Values{})
				}

				cur = []string{}
			case p := <-posts:
				cur = append(cur, p)
			}
		}
	}()

	for _ = range t.C {
		r, err := http.Get("http://127.0.0.1:8889/objects?stream=tech&last=" + l)
		if err != nil {
			fmt.Println(err)
			continue
		}

		b, err := ioutil.ReadAll(r.Body)
		r.Body.Close()
		if err != nil {
			fmt.Println(err)
			continue
		}
		var objects []Object
		err = json.Unmarshal(b, &objects)
		if err != nil {
			fmt.Println(err)
			continue
		}
		for _, object := range objects {
			text := object.Text
			parts := strings.Split(object.Text, " ")
			for _, part := range parts {
				m := getMetadata(part)
				if m != nil && len(m.Title) > 0 && len(m.Url) > 0 {
					text = m.Title + " " + m.Url
				}
			}
			posts <- text
		}

		if len(objects) > 0 {
			l = fmt.Sprintf("%d", objects[len(objects)-1].Created)
		}
	}
}