func getConceptModule() (pipeline.Module, error) { weight := 1.0 scoreFunc := getCountSqLambdaWithWeight(weight) neoFunc := pipeline.NeoAnalyzer{MetadataType: "Concept", ScoreFunc: scoreFunc} module := new(pipeline.StandardModule) module.SetFuncs(&neoFunc) return module, nil }
func getTaxonomyModule() (pipeline.Module, error) { weight := 1.0 scoreFunc := getCountSqLambdaWithWeight(weight) taxFunc := pipeline.NeoAnalyzer{MetadataType: "Taxonomy", ScoreFunc: scoreFunc} taxModule := new(pipeline.StandardModule) taxModule.SetFuncs(&taxFunc) // is safe to ref local value return taxModule, nil }
func TestError(t *testing.T) { errFunc := errorAnalyzer{when: 2} funcModule := pipeline.StandardModule{} funcModule.SetFuncs(&errFunc) pipe := pipeline.NewPipeline() pipe.AddStage(&funcModule) story := storyFromSet(simpleSet) data, err := storyDriver(pipe, story) assert.NotNil(t, err) assert.EqualError(t, err, "Error(s) closing pipeline:\n\tok bump!") assert.Len(t, data, 0) }
func TestBump(t *testing.T) { add := addAnalyzer{howmuch: 1} addModule := pipeline.StandardModule{} addModule.SetFuncs(add) bump := bumpAnalyzer{when: 1, count: 0} bumpModule := pipeline.StandardModule{} bumpModule.SetFuncs(&bump) pipe := pipeline.NewPipeline() pipe.AddStage(&addModule) pipe.AddStage(&bumpModule) story := storyFromSet(simpleSet) data, err := storyDriver(pipe, story) assert.Nil(t, err) assert.Len(t, data, 2) for i := range data { scorei, err := data[i].GetScore("add") assert.Nil(t, err) score := scorei.(TestStandardScore) assert.EqualValues(t, 1.0, score.score) } }
func TestStandardAdd(t *testing.T) { add := addAnalyzer{howmuch: 1} funcModule := pipeline.StandardModule{} funcModule.SetFuncs(add) pipe := pipeline.NewPipeline() pipe.AddStage(&funcModule) story := storyFromSet(simpleSet) data, err := storyDriver(pipe, story) assert.Nil(t, err) assert.Len(t, data, 3) for i := range data { scorei, err := data[i].GetScore("add") assert.Nil(t, err) score := scorei.(TestStandardScore) assert.EqualValues(t, 1.0, score.score) } }
func TestFull(t *testing.T) { taxFunc := pipeline.NeoAnalyzer{MetadataType: "Taxonomy"} taxModule := pipeline.StandardModule{} taxModule.SetFuncs(taxFunc) conceptsFunc := pipeline.NeoAnalyzer{MetadataType: "Concept"} conceptsModule := pipeline.StandardModule{} conceptsModule.SetFuncs(conceptsFunc) keyFunc := pipeline.NeoAnalyzer{MetadataType: "Keyword"} keyModule := pipeline.StandardModule{} keyModule.SetFuncs(&keyFunc) entityFunc := pipeline.NeoAnalyzer{MetadataType: "Entity"} entityModule := pipeline.StandardModule{} entityModule.SetFuncs(&entityFunc) // idf funcs keyIDFFunc := pipeline.IDFAnalyzer{MetadataType: "Keyword"} keyIDFModule := pipeline.StandardModule{} keyIDFModule.SetFuncs(&keyIDFFunc) entityIDFFunc := pipeline.IDFAnalyzer{MetadataType: "Entity"} entityIDFModule := pipeline.StandardModule{} entityIDFModule.SetFuncs(&entityIDFFunc) conceptIDFFunc := pipeline.IDFAnalyzer{MetadataType: "Concept"} conceptIDFModule := pipeline.StandardModule{} conceptIDFModule.SetFuncs(&conceptIDFFunc) // word2vec entityWVFunc := pipeline.WordVecAnalyzer{MetadataType: "Entity"} entityWVModule := pipeline.StandardModule{} entityWVModule.SetFuncs(&entityWVFunc) conceptWVFunc := pipeline.WordVecAnalyzer{MetadataType: "Concept"} conceptWVModule := pipeline.StandardModule{} conceptWVModule.SetFuncs(&conceptWVFunc) keyWVFunc := pipeline.WordVecAnalyzer{MetadataType: "Keyword"} keyWVModule := pipeline.StandardModule{} keyWVModule.SetFuncs(&keyWVFunc) scoreFuncs := make(map[string]func(pipeline.Score) float32) scoreFuncs["neo_Taxonomy"] = SquareCount //SquareFlow scoreFuncs["neo_Concept"] = SquareCount scoreFuncs["neo_Keyword"] = ScoreAverage scoreFuncs["neo_Entity"] = ScoreAverage scoreFuncs["idf_Keyword"] = IDFAverage scoreFuncs["idf_Entity"] = IDFAverage scoreFuncs["idf_Concept"] = IDFAverage scoreFuncs["wordvec_Concept"] = SquareFlow scoreFuncs["wordvec_Keyword"] = SquareFlow scoreFuncs["wordvec_Entity"] = SquareFlow weightMap := make(map[string]float32) weightMap["neo_Taxonomy"] = 3.0 weightMap["neo_Concept"] = 3.0 weightMap["neo_Keyword"] = 3.0 weightMap["neo_Entity"] = 3.0 weightMap["idf_Keyword"] = 10.0 weightMap["idf_Entity"] = 10.0 weightMap["idf_Concept"] = 10.0 weightMap["wordvec_Taxonomy"] = 10.0 weightMap["wordvec_Concept"] = 15.0 weightMap["wordvec_Keyword"] = 10.0 weightMap["wordvec_Entity"] = 10.0 threshFunc := threshAnalyzer{0.0, scoreFuncs, weightMap} threshModule := pipeline.StandardModule{} threshModule.SetFuncs(threshFunc) lastThreshFunc := threshAnalyzer{0.0, scoreFuncs, weightMap} lastThreshModule := pipeline.StandardModule{} lastThreshModule.SetFuncs(lastThreshFunc) // build the pipe pipe := pipeline.NewPipeline() // 1.1 seems to do it for words // do coarse methods // pipe.AddStage(&taxModule) //pipe.AddStage(&conceptsModule) //pipe.AddStage(&keyIDFModule) //pipe.AddStage(&entityIDFModule) //pipe.AddStage(&conceptIDFModule) //pipe.AddStage(&threshModule) pipe.AddStage(&entityWVModule) pipe.AddStage(&conceptWVModule) //pipe.AddStage(&lastThreshModule) pipe.AddStage(&keyWVModule) // thresh then do finer methods //pipe.AddStage(&keyModule) //pipe.AddStage(&entityModule) // build the story assert.Nil(t, relationDB.Open("http://localhost:7474")) articles, err := relationDB.GetAll() assert.Nil(t, err) //assert.True(t, len(articles) > 150) set := testSet{ //mainArticle: "The Horror in San Bernardino", mainArticle: "Fear Ignorance, Not Muslims", //mainArticle: "Ted ‘Carpet-Bomb’ Cruz", //mainArticle: "Deregulating Corporate America", //mainArticle: "Course Correction for School Testing", //mainArticle: "If New York Really Wants to Help the Homeless", //mainArticle: "Social Security in an Election Year", //mainArticle: "The Reproductive Rights Rollback of 2015", //mainArticle: "Strong Unions, Strong Democracy", //mainArticle: "Voter Fatigue in New York", //mainArticle: "Depraved Indifference Toward Flint", relatedArticles: articles, } story := storyFromSet(set) fmt.Println(story.MainArticle.Name()) raw, err := storyDriver(pipe, story) fmt.Println("len of data comming out:", len(raw)) data := heapFilter(raw, scoreFuncs, weightMap, 10) // only get the top couple of articles assert.Nil(t, err) fmt.Println("main:", story.MainArticle.Name()) for i := range data { fmt.Println(i, data[i].Name()) printArticle(data[i]) fmt.Println("total score:", scoreArticle(&data[i], scoreFuncs, weightMap)) fmt.Println() } }