Exemple #1
0
func ImportIndex(s ImportSpec) *pipeline.Corpus {
	c := pipeline.NewCorpus()
	err := c.UpdateIndex(s.Path, s.Tokenizer)
	if err != nil {
		panic(err)
	}
	return s.Run(c)
}
Exemple #2
0
// NewWordLabelCorpusWordFeature creates a LabeledCorpus with word-feature pairs
// as features and document tiles as filepaths for labels.
func NewLabelCorpusWordFeature(c *pipeline.Corpus, z [][]int) *LabeledCorpus {
	featured := pipeline.NewCorpus()
	for d := 0; d < c.M; d++ {
		tokens := make([]string, c.N[d])
		for n := 0; n < c.N[d]; n++ {
			tokens[n] = fmt.Sprintf("%s-%d", c.Vocab.Tokens[c.W[d][n]], z[d][n])
		}
		featured.AddDocument(c.Titles[d], tokens)
	}
	return NewLabeledCorpusWord(featured)
}
Exemple #3
0
// NewLabelCorpusFeature creates a LabeledCorcpus with feature assignments in
// place of word feature, with using document titles as filepaths for labels.
func NewLabeledCorpusFeature(c *pipeline.Corpus, z [][]int) *LabeledCorpus {
	featured := pipeline.NewCorpus()
	for d, zd := range z {
		tokens := make([]string, len(zd))
		for n, z := range zd {
			tokens[n] = fmt.Sprintf("%d", z)
		}
		featured.AddDocument(c.Titles[d], tokens)
	}
	return NewLabeledCorpusWord(featured)
}