func ImportIndex(s ImportSpec) *pipeline.Corpus { c := pipeline.NewCorpus() err := c.UpdateIndex(s.Path, s.Tokenizer) if err != nil { panic(err) } return s.Run(c) }
// NewWordLabelCorpusWordFeature creates a LabeledCorpus with word-feature pairs // as features and document tiles as filepaths for labels. func NewLabelCorpusWordFeature(c *pipeline.Corpus, z [][]int) *LabeledCorpus { featured := pipeline.NewCorpus() for d := 0; d < c.M; d++ { tokens := make([]string, c.N[d]) for n := 0; n < c.N[d]; n++ { tokens[n] = fmt.Sprintf("%s-%d", c.Vocab.Tokens[c.W[d][n]], z[d][n]) } featured.AddDocument(c.Titles[d], tokens) } return NewLabeledCorpusWord(featured) }
// NewLabelCorpusFeature creates a LabeledCorcpus with feature assignments in // place of word feature, with using document titles as filepaths for labels. func NewLabeledCorpusFeature(c *pipeline.Corpus, z [][]int) *LabeledCorpus { featured := pipeline.NewCorpus() for d, zd := range z { tokens := make([]string, len(zd)) for n, z := range zd { tokens[n] = fmt.Sprintf("%d", z) } featured.AddDocument(c.Titles[d], tokens) } return NewLabeledCorpusWord(featured) }