Esempio n. 1
0
File: boot.go Progetto: itsabot/abot
// trainClassifiers trains classifiers for each plugin.
func trainClassifiers() error {
	for _, pconf := range PluginsGo {
		ss, err := fetchTrainingSentences(pconf.ID, pconf.Name)
		if err != nil {
			return err
		}

		// Assemble list of Bayesian classes from all trained intents
		// for this plugin. m is used to keep track of the classes
		// already taught to each classifier.
		m := map[string]struct{}{}
		for _, s := range ss {
			_, ok := m[s.Intent]
			if ok {
				continue
			}
			log.Debug("learning intent", s.Intent)
			m[s.Intent] = struct{}{}
			pluginIntents[s.PluginID] = append(pluginIntents[s.PluginID],
				bayesian.Class(s.Intent))
		}

		// Build classifier from complete sets of intents
		for _, s := range ss {
			intents := pluginIntents[s.PluginID]
			// Calling bayesian.NewClassifier() with 0 or 1
			// classes causes a panic.
			if len(intents) == 0 {
				break
			}
			if len(intents) == 1 {
				intents = append(intents, bayesian.Class("__no_intent"))
			}
			c := bayesian.NewClassifier(intents...)
			bClassifiers[s.PluginID] = c
		}

		// With classifiers initialized, train each of them on a
		// sentence's stems.
		for _, s := range ss {
			tokens := TokenizeSentence(s.Sentence)
			stems := StemTokens(tokens)
			c, exists := bClassifiers[s.PluginID]
			if exists {
				c.Learn(stems, bayesian.Class(s.Intent))
			}
		}
	}
	return nil
}
Esempio n. 2
0
func main() {
	var ledgerFileName string
	var accountSubstring, csvFileName, csvDateFormat string
	var negateAmount bool
	var fieldDelimiter string

	flag.BoolVar(&negateAmount, "neg", false, "Negate amount column value.")
	flag.StringVar(&ledgerFileName, "f", "", "Ledger file name (*Required).")
	flag.StringVar(&csvDateFormat, "date-format", "01/02/2006", "Date format.")
	flag.StringVar(&fieldDelimiter, "delimiter", ",", "Field delimiter.")
	flag.Parse()

	args := flag.Args()
	if len(args) != 2 {
		usage()
	} else {
		accountSubstring = args[0]
		csvFileName = args[1]
	}

	csvFileReader, err := os.Open(csvFileName)
	if err != nil {
		fmt.Println("CSV: ", err)
		return
	}
	defer csvFileReader.Close()

	ledgerFileReader, err := os.Open(ledgerFileName)
	if err != nil {
		fmt.Println("Ledger: ", err)
		return
	}
	defer ledgerFileReader.Close()

	generalLedger, parseError := ledger.ParseLedger(ledgerFileReader)
	if parseError != nil {
		fmt.Println(parseError)
		return
	}

	var matchingAccount string
	matchingAccounts := ledger.GetBalances(generalLedger, []string{accountSubstring})
	if len(matchingAccounts) < 1 {
		fmt.Println("Unable to find matching account.")
		return
	} else {
		matchingAccount = matchingAccounts[len(matchingAccounts)-1].Name
	}

	allAccounts := ledger.GetBalances(generalLedger, []string{})

	csvReader := csv.NewReader(csvFileReader)
	csvReader.Comma, _ = utf8.DecodeRuneInString(fieldDelimiter)
	csvRecords, _ := csvReader.ReadAll()

	classes := make([]bayesian.Class, len(allAccounts))
	for i, bal := range allAccounts {
		classes[i] = bayesian.Class(bal.Name)
	}
	classifier := bayesian.NewClassifier(classes...)
	for _, tran := range generalLedger {
		payeeWords := strings.Split(tran.Payee, " ")
		for _, accChange := range tran.AccountChanges {
			if strings.Contains(accChange.Name, "Expense") {
				classifier.Learn(payeeWords, bayesian.Class(accChange.Name))
			}
		}
	}

	// Find columns from header
	var dateColumn, payeeColumn, amountColumn int
	dateColumn, payeeColumn, amountColumn = -1, -1, -1
	for fieldIndex, fieldName := range csvRecords[0] {
		fieldName = strings.ToLower(fieldName)
		if strings.Contains(fieldName, "date") {
			dateColumn = fieldIndex
		} else if strings.Contains(fieldName, "description") {
			payeeColumn = fieldIndex
		} else if strings.Contains(fieldName, "payee") {
			payeeColumn = fieldIndex
		} else if strings.Contains(fieldName, "amount") {
			amountColumn = fieldIndex
		} else if strings.Contains(fieldName, "expense") {
			amountColumn = fieldIndex
		}
	}

	if dateColumn < 0 || payeeColumn < 0 || amountColumn < 0 {
		fmt.Println("Unable to find columns required from header field names.")
		return
	}

	expenseAccount := ledger.Account{Name: "unknown:unknown", Balance: new(big.Rat)}
	csvAccount := ledger.Account{Name: matchingAccount, Balance: new(big.Rat)}
	for _, record := range csvRecords[1:] {
		inputPayeeWords := strings.Split(record[payeeColumn], " ")
		csvDate, _ := time.Parse(csvDateFormat, record[dateColumn])
		if !existingTransaction(generalLedger, csvDate, inputPayeeWords[0]) {
			// Classify into expense account
			_, likely, _ := classifier.LogScores(inputPayeeWords)
			if likely >= 0 {
				expenseAccount.Name = string(classifier.Classes[likely])
			}

			// Negate amount if required
			expenseAccount.Balance.SetString(record[amountColumn])
			if negateAmount {
				expenseAccount.Balance.Neg(expenseAccount.Balance)
			}

			// Csv amount is the negative of the expense amount
			csvAccount.Balance.Neg(expenseAccount.Balance)

			// Create valid transaction for print in ledger format
			trans := &ledger.Transaction{Date: csvDate, Payee: record[payeeColumn]}
			trans.AccountChanges = []ledger.Account{csvAccount, expenseAccount}
			PrintTransaction(trans, 80)
		}
	}
}
Esempio n. 3
0
type Syntax struct {
	name       string
	extensions *regexp.Regexp
	class      bayesian.Class
}

type Scanner struct {
	classifier *bayesian.Classifier
	catalog    *catalog.Catalog
	base_fname string
}

var (
	VALID_LANGUAGES = map[string]*Syntax{
		"go":     &Syntax{"go", regexp.MustCompile("\\.go"), bayesian.Class("go")},
		"python": &Syntax{"python", regexp.MustCompile("\\.py$"), bayesian.Class("python")},
		"perl":   &Syntax{"perl", regexp.MustCompile("\\.pl$"), bayesian.Class("perl")},
		"ruby":   &Syntax{"ruby", regexp.MustCompile("\\.rb"), bayesian.Class("ruby")},
		"shell":  &Syntax{"shell", regexp.MustCompile("\\.sh"), bayesian.Class("shell")},
		"obj-c":  &Syntax{"obj-c", regexp.MustCompile("\\.m$"), bayesian.Class("obj-c")}}

	BAYESIAN_CLASSES     []bayesian.Class
	EXTENSIONS_TO_SYNTAX map[*regexp.Regexp]*Syntax
	CLASS_TO_SYNTAX      map[bayesian.Class]*Syntax
)

// Prepare the other data structures used by the scanner
func init() {
	EXTENSIONS_TO_SYNTAX = make(map[*regexp.Regexp]*Syntax)
	CLASS_TO_SYNTAX = make(map[bayesian.Class]*Syntax)
Esempio n. 4
0
func main() {
	anaconda.SetConsumerKey("")
	anaconda.SetConsumerSecret("")
	api := anaconda.NewTwitterApi("", "")

	t := time.NewTicker(time.Second * 10)
	l := fmt.Sprintf("%d", time.Now().UnixNano())

	te := bayesian.Class("tech")
	ot := bayesian.Class("other")

	fparts := strings.Split(*files, ",")
	b, err := ioutil.ReadFile(fparts[0])
	if err != nil {
		panic(err.Error())
	}
	words := strings.Split(string(b), "\n")
	c := bayesian.NewClassifier(te, ot)
	c.Learn(words, te)

	posts := make(chan string, 10)
	post := time.NewTicker(time.Minute * 30)
	var cur []string
	var max float64
	var idx int

	go func() {
		for {
			select {
			case <-post.C:
				idx = -1
				max = -1000.0

				for i, p := range cur {
					parts := strings.Split(strings.ToLower(p), " ")
					scores, class, _ := c.LogScores(parts)

					if class != 0 {
						continue
					}

					if len(scores) == 0 {
						continue
					}

					if scores[0] > max {
						max = scores[0]
						idx = i
					}
				}

				if idx >= 0 {
					api.PostTweet(cur[idx], url.Values{})
				}

				cur = []string{}
			case p := <-posts:
				cur = append(cur, p)
			}
		}
	}()

	for _ = range t.C {
		r, err := http.Get("http://127.0.0.1:8889/objects?stream=tech&last=" + l)
		if err != nil {
			fmt.Println(err)
			continue
		}

		b, err := ioutil.ReadAll(r.Body)
		r.Body.Close()
		if err != nil {
			fmt.Println(err)
			continue
		}
		var objects []Object
		err = json.Unmarshal(b, &objects)
		if err != nil {
			fmt.Println(err)
			continue
		}
		for _, object := range objects {
			text := object.Text
			parts := strings.Split(object.Text, " ")
			for _, part := range parts {
				m := getMetadata(part)
				if m != nil && len(m.Title) > 0 && len(m.Url) > 0 {
					text = m.Title + " " + m.Url
				}
			}
			posts <- text
		}

		if len(objects) > 0 {
			l = fmt.Sprintf("%d", objects[len(objects)-1].Created)
		}
	}
}