Exemple #1
0
func getConceptModule() (pipeline.Module, error) {
	weight := 1.0
	scoreFunc := getCountSqLambdaWithWeight(weight)
	neoFunc := pipeline.NeoAnalyzer{MetadataType: "Concept",
		ScoreFunc: scoreFunc}

	module := new(pipeline.StandardModule)
	module.SetFuncs(&neoFunc)

	return module, nil
}
Exemple #2
0
func getTaxonomyModule() (pipeline.Module, error) {
	weight := 1.0
	scoreFunc := getCountSqLambdaWithWeight(weight)
	taxFunc := pipeline.NeoAnalyzer{MetadataType: "Taxonomy",
		ScoreFunc: scoreFunc}

	taxModule := new(pipeline.StandardModule)
	taxModule.SetFuncs(&taxFunc) // is safe to ref local value

	return taxModule, nil
}
func TestError(t *testing.T) {

	errFunc := errorAnalyzer{when: 2}
	funcModule := pipeline.StandardModule{}
	funcModule.SetFuncs(&errFunc)

	pipe := pipeline.NewPipeline()
	pipe.AddStage(&funcModule)

	story := storyFromSet(simpleSet)
	data, err := storyDriver(pipe, story)

	assert.NotNil(t, err)
	assert.EqualError(t, err, "Error(s) closing pipeline:\n\tok bump!")
	assert.Len(t, data, 0)
}
func TestBump(t *testing.T) {

	add := addAnalyzer{howmuch: 1}
	addModule := pipeline.StandardModule{}
	addModule.SetFuncs(add)

	bump := bumpAnalyzer{when: 1, count: 0}
	bumpModule := pipeline.StandardModule{}
	bumpModule.SetFuncs(&bump)

	pipe := pipeline.NewPipeline()
	pipe.AddStage(&addModule)
	pipe.AddStage(&bumpModule)

	story := storyFromSet(simpleSet)
	data, err := storyDriver(pipe, story)

	assert.Nil(t, err)
	assert.Len(t, data, 2)

	for i := range data {
		scorei, err := data[i].GetScore("add")
		assert.Nil(t, err)
		score := scorei.(TestStandardScore)

		assert.EqualValues(t, 1.0, score.score)
	}
}
func TestStandardAdd(t *testing.T) {

	add := addAnalyzer{howmuch: 1}
	funcModule := pipeline.StandardModule{}
	funcModule.SetFuncs(add)

	pipe := pipeline.NewPipeline()
	pipe.AddStage(&funcModule)

	story := storyFromSet(simpleSet)
	data, err := storyDriver(pipe, story)

	assert.Nil(t, err)
	assert.Len(t, data, 3)

	for i := range data {
		scorei, err := data[i].GetScore("add")
		assert.Nil(t, err)
		score := scorei.(TestStandardScore)

		assert.EqualValues(t, 1.0, score.score)
	}
}
Exemple #6
0
func TestFull(t *testing.T) {

	taxFunc := pipeline.NeoAnalyzer{MetadataType: "Taxonomy"}
	taxModule := pipeline.StandardModule{}
	taxModule.SetFuncs(taxFunc)

	conceptsFunc := pipeline.NeoAnalyzer{MetadataType: "Concept"}
	conceptsModule := pipeline.StandardModule{}
	conceptsModule.SetFuncs(conceptsFunc)

	keyFunc := pipeline.NeoAnalyzer{MetadataType: "Keyword"}
	keyModule := pipeline.StandardModule{}
	keyModule.SetFuncs(&keyFunc)

	entityFunc := pipeline.NeoAnalyzer{MetadataType: "Entity"}
	entityModule := pipeline.StandardModule{}
	entityModule.SetFuncs(&entityFunc)

	// idf funcs
	keyIDFFunc := pipeline.IDFAnalyzer{MetadataType: "Keyword"}
	keyIDFModule := pipeline.StandardModule{}
	keyIDFModule.SetFuncs(&keyIDFFunc)

	entityIDFFunc := pipeline.IDFAnalyzer{MetadataType: "Entity"}
	entityIDFModule := pipeline.StandardModule{}
	entityIDFModule.SetFuncs(&entityIDFFunc)

	conceptIDFFunc := pipeline.IDFAnalyzer{MetadataType: "Concept"}
	conceptIDFModule := pipeline.StandardModule{}
	conceptIDFModule.SetFuncs(&conceptIDFFunc)

	// word2vec
	entityWVFunc := pipeline.WordVecAnalyzer{MetadataType: "Entity"}
	entityWVModule := pipeline.StandardModule{}
	entityWVModule.SetFuncs(&entityWVFunc)

	conceptWVFunc := pipeline.WordVecAnalyzer{MetadataType: "Concept"}
	conceptWVModule := pipeline.StandardModule{}
	conceptWVModule.SetFuncs(&conceptWVFunc)

	keyWVFunc := pipeline.WordVecAnalyzer{MetadataType: "Keyword"}
	keyWVModule := pipeline.StandardModule{}
	keyWVModule.SetFuncs(&keyWVFunc)

	scoreFuncs := make(map[string]func(pipeline.Score) float32)
	scoreFuncs["neo_Taxonomy"] = SquareCount //SquareFlow
	scoreFuncs["neo_Concept"] = SquareCount
	scoreFuncs["neo_Keyword"] = ScoreAverage
	scoreFuncs["neo_Entity"] = ScoreAverage
	scoreFuncs["idf_Keyword"] = IDFAverage
	scoreFuncs["idf_Entity"] = IDFAverage
	scoreFuncs["idf_Concept"] = IDFAverage
	scoreFuncs["wordvec_Concept"] = SquareFlow
	scoreFuncs["wordvec_Keyword"] = SquareFlow
	scoreFuncs["wordvec_Entity"] = SquareFlow

	weightMap := make(map[string]float32)
	weightMap["neo_Taxonomy"] = 3.0
	weightMap["neo_Concept"] = 3.0
	weightMap["neo_Keyword"] = 3.0
	weightMap["neo_Entity"] = 3.0
	weightMap["idf_Keyword"] = 10.0
	weightMap["idf_Entity"] = 10.0
	weightMap["idf_Concept"] = 10.0
	weightMap["wordvec_Taxonomy"] = 10.0
	weightMap["wordvec_Concept"] = 15.0
	weightMap["wordvec_Keyword"] = 10.0
	weightMap["wordvec_Entity"] = 10.0

	threshFunc := threshAnalyzer{0.0, scoreFuncs, weightMap}
	threshModule := pipeline.StandardModule{}
	threshModule.SetFuncs(threshFunc)

	lastThreshFunc := threshAnalyzer{0.0, scoreFuncs, weightMap}
	lastThreshModule := pipeline.StandardModule{}
	lastThreshModule.SetFuncs(lastThreshFunc)

	// build the pipe
	pipe := pipeline.NewPipeline()

	// 1.1 seems to do it for words

	// do coarse methods
	//	pipe.AddStage(&taxModule)
	//pipe.AddStage(&conceptsModule)
	//pipe.AddStage(&keyIDFModule)
	//pipe.AddStage(&entityIDFModule)
	//pipe.AddStage(&conceptIDFModule)
	//pipe.AddStage(&threshModule)
	pipe.AddStage(&entityWVModule)
	pipe.AddStage(&conceptWVModule)
	//pipe.AddStage(&lastThreshModule)
	pipe.AddStage(&keyWVModule)

	// thresh then do finer methods
	//pipe.AddStage(&keyModule)
	//pipe.AddStage(&entityModule)

	// build the story
	assert.Nil(t, relationDB.Open("http://localhost:7474"))
	articles, err := relationDB.GetAll()

	assert.Nil(t, err)
	//assert.True(t, len(articles) > 150)

	set := testSet{
		//mainArticle: "The Horror in San Bernardino",
		mainArticle: "Fear Ignorance, Not Muslims",
		//mainArticle:     "Ted ‘Carpet-Bomb’ Cruz",
		//mainArticle: "Deregulating Corporate America",
		//mainArticle: "Course Correction for School Testing",
		//mainArticle: "If New York Really Wants to Help the Homeless",
		//mainArticle: "Social Security in an Election Year",
		//mainArticle: "The Reproductive Rights Rollback of 2015",
		//mainArticle: "Strong Unions, Strong Democracy",
		//mainArticle: "Voter Fatigue in New York",
		//mainArticle: "Depraved Indifference Toward Flint",

		relatedArticles: articles,
	}

	story := storyFromSet(set)
	fmt.Println(story.MainArticle.Name())

	raw, err := storyDriver(pipe, story)
	fmt.Println("len of data comming out:", len(raw))
	data := heapFilter(raw, scoreFuncs, weightMap, 10)

	// only get the top couple of articles

	assert.Nil(t, err)
	fmt.Println("main:", story.MainArticle.Name())
	for i := range data {
		fmt.Println(i, data[i].Name())
		printArticle(data[i])
		fmt.Println("total score:", scoreArticle(&data[i], scoreFuncs, weightMap))
		fmt.Println()
	}
}