Ejemplo n.º 1
0
func TestAddKeywords(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId:    1,
		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
	}, false)
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId:    2,
		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
	}, false)
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId:    3,
		Keywords: []types.KeywordIndex{{"token3", 0, []int{}}},
	}, false)
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId:    7,
		Keywords: []types.KeywordIndex{{"token7", 0, []int{}}},
	}, false)
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId:    1,
		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
	}, false)
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId:    7,
		Keywords: []types.KeywordIndex{{"token77", 0, []int{}}},
	}, false)
	indexer.AddDocumentToCache(nil, true)

	utils.Expect(t, "", indicesToString(&indexer, "token1"))
	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token2"))
	utils.Expect(t, "3 ", indicesToString(&indexer, "token3"))
	utils.Expect(t, "7 ", indicesToString(&indexer, "token77"))
}
Ejemplo n.º 2
0
func TestRemoveDocument(t *testing.T) {
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			ScoringCriteria: TestScoringCriteria{},
		},
	})

	AddDocs(&engine)
	engine.RemoveDocument(5, false)
	engine.RemoveDocument(6, false)
	engine.FlushIndex()
	engine.IndexDocument(6, types.DocumentIndexData{
		Content: "中国人口有十三亿",
		Fields:  ScoringFields{0, 9, 1},
	}, false)
	engine.FlushIndex()

	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
	utils.Expect(t, "2", len(outputs.Docs))

	utils.Expect(t, "6", outputs.Docs[0].DocId)
	utils.Expect(t, "9000", int(outputs.Docs[0].Scores[0]*1000))
	utils.Expect(t, "1", outputs.Docs[1].DocId)
	utils.Expect(t, "6000", int(outputs.Docs[1].Scores[0]*1000))
}
Ejemplo n.º 3
0
func TestLookupWithPartialLocations(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
	// doc0 = "token2 token4 token4 token2 token3 token4" + "label1"(不在文本中)
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 0,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0, 21}},
			{"token3", 0, []int{28}},
			{"label1", 0, []int{}},
			{"token4", 0, []int{7, 14, 35}},
		},
	})
	// doc1 = "token2 token4 token4 token2 token3 token4"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 1,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0, 21}},
			{"token3", 0, []int{28}},
			{"token4", 0, []int{7, 14, 35}},
		},
	})

	utils.Expect(t, "0 ", indicesToString(&indexer, "label1"))

	utils.Expect(t, "[0 1 [21 28]] ",
		indexedDocsToString(indexer.Lookup([]string{"token2", "token3"}, []string{"label1"}, nil)))
}
Ejemplo n.º 4
0
func TestEngineIndexDocumentWithPersistentStorage(t *testing.T) {
	gob.Register(ScoringFields{})
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			OutputOffset:    0,
			MaxOutputs:      10,
			ScoringCriteria: &RankByTokenProximity{},
		},
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.LocationsIndex,
		},
		UsePersistentStorage:    true,
		PersistentStorageFolder: "wukong.persistent",
		PersistentStorageShards: 2,
	})
	AddDocs(&engine)
	engine.RemoveDocument(5, true)
	engine.Close()

	var engine1 Engine
	engine1.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			OutputOffset:    0,
			MaxOutputs:      10,
			ScoringCriteria: &RankByTokenProximity{},
		},
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.LocationsIndex,
		},
		UsePersistentStorage:    true,
		PersistentStorageFolder: "wukong.persistent",
		PersistentStorageShards: 2,
	})
	engine1.FlushIndex()

	outputs := engine1.Search(types.SearchRequest{Text: "中国人口"})
	utils.Expect(t, "2", len(outputs.Tokens))
	utils.Expect(t, "中国", outputs.Tokens[0])
	utils.Expect(t, "人口", outputs.Tokens[1])
	utils.Expect(t, "2", len(outputs.Docs))

	utils.Expect(t, "2", outputs.Docs[0].DocId)
	utils.Expect(t, "1000", int(outputs.Docs[0].Scores[0]*1000))
	utils.Expect(t, "[0 6]", outputs.Docs[0].TokenSnippetLocations)

	utils.Expect(t, "1", outputs.Docs[1].DocId)
	utils.Expect(t, "76", int(outputs.Docs[1].Scores[0]*1000))
	utils.Expect(t, "[0 18]", outputs.Docs[1].TokenSnippetLocations)

	engine1.Close()
	os.RemoveAll("wukong.persistent")
}
Ejemplo n.º 5
0
func TestLookupWithBM25(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{
		IndexType: types.FrequenciesIndex,
		BM25Parameters: &types.BM25Parameters{
			K1: 1,
			B:  1,
		},
	})
	// doc0 = "token2 token4 token4 token2 token3 token4"
	indexer.AddDocument(&types.DocumentIndex{
		DocId:       0,
		TokenLength: 6,
		Keywords: []types.KeywordIndex{
			{"token2", 3, []int{0, 21}},
			{"token3", 7, []int{28}},
			{"token4", 15, []int{7, 14, 35}},
		},
	})
	// doc0 = "token6 token7"
	indexer.AddDocument(&types.DocumentIndex{
		DocId:       1,
		TokenLength: 2,
		Keywords: []types.KeywordIndex{
			{"token6", 3, []int{0}},
			{"token7", 15, []int{7}},
		},
	})

	outputs := indexer.Lookup([]string{"token2", "token3", "token4"}, []string{}, nil)

	// BM25 = log2(3) * (12/9 + 28/17 + 60/33) = 6.3433
	utils.Expect(t, "76055", int(outputs[0].BM25*10000))
}
Ejemplo n.º 6
0
func TestRemoveDocument(t *testing.T) {
	var ranker Ranker
	ranker.Init()
	ranker.AddDoc(1, DummyScoringFields{
		label:   "label3",
		counter: 3,
		amount:  22.3,
	})
	ranker.AddDoc(2, DummyScoringFields{
		label:   "label4",
		counter: 1,
		amount:  2,
	})
	ranker.AddDoc(3, DummyScoringFields{
		label:   "label1",
		counter: 7,
		amount:  10.3,
	})
	ranker.RemoveDoc(3)

	criteria := DummyScoringCriteria{}
	scoredDocs, _ := ranker.Rank([]types.IndexedDocument{
		types.IndexedDocument{DocId: 1, TokenProximity: 6},
		types.IndexedDocument{DocId: 2, TokenProximity: -1},
		types.IndexedDocument{DocId: 3, TokenProximity: 24},
		types.IndexedDocument{DocId: 4, TokenProximity: 18},
	}, types.RankOptions{ScoringCriteria: criteria}, false)
	utils.Expect(t, "[1 [25300 ]] [2 [3000 ]] ", scoredDocsToString(scoredDocs))
}
Ejemplo n.º 7
0
func TestLookupWithLocations(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
	// doc1 = "token2 token4 token4 token2 token3 token4"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 1,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0, 21}},
			{"token3", 0, []int{28}},
			{"token4", 0, []int{7, 14, 35}},
		},
	}, true)

	// doc2 = "token2 token4 token4 token2 token3 token4"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 2,
		Keywords: []types.KeywordIndex{
			{"token3", 0, []int{0, 21}},
			{"token5", 0, []int{28}},
			{"token2", 0, []int{7, 14, 35}},
		},
	}, true)

	indexer.RemoveDocumentToCache(2, true)
	docs, _ := indexer.Lookup([]string{"token2", "token3"}, []string{}, nil, false)
	utils.Expect(t, "[[0 21] [28]]", docs[0].TokenLocations)
}
Ejemplo n.º 8
0
func TestRankDocument(t *testing.T) {
	var ranker Ranker
	ranker.Init()
	scoredDocs := ranker.Rank([]types.IndexedDocument{
		types.IndexedDocument{DocId: 1, BM25: 6},
		types.IndexedDocument{DocId: 3, BM25: 24},
		types.IndexedDocument{DocId: 4, BM25: 18},
	}, types.RankOptions{ScoringCriteria: types.RankByBM25{}})
	utils.Expect(t, "[3 [24000 ]] [4 [18000 ]] [1 [6000 ]] ", scoredDocsToString(scoredDocs))

	scoredDocs = ranker.Rank([]types.IndexedDocument{
		types.IndexedDocument{DocId: 1, BM25: 6},
		types.IndexedDocument{DocId: 3, BM25: 24},
		types.IndexedDocument{DocId: 2, BM25: 0},
		types.IndexedDocument{DocId: 4, BM25: 18},
	}, types.RankOptions{ScoringCriteria: types.RankByBM25{}, ReverseOrder: true})
	utils.Expect(t, "[2 [0 ]] [1 [6000 ]] [4 [18000 ]] [3 [24000 ]] ", scoredDocsToString(scoredDocs))
}
Ejemplo n.º 9
0
func TestRemoveDocument(t *testing.T) {
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			ScoringCriteria: TestScoringCriteria{},
		},
	})

	AddDocs(&engine)
	engine.RemoveDocument(4)

	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
	utils.Expect(t, "1", len(outputs.Docs))

	utils.Expect(t, "0", outputs.Docs[0].DocId)
	utils.Expect(t, "6000", int(outputs.Docs[0].Scores[0]*1000))
}
Ejemplo n.º 10
0
func TestRankWithCriteria(t *testing.T) {
	var ranker Ranker
	ranker.Init()
	ranker.AddScoringFields(1, DummyScoringFields{
		label:   "label3",
		counter: 3,
		amount:  22.3,
	})
	ranker.AddScoringFields(2, DummyScoringFields{
		label:   "label4",
		counter: 1,
		amount:  2,
	})
	ranker.AddScoringFields(3, DummyScoringFields{
		label:   "label1",
		counter: 7,
		amount:  10.3,
	})
	ranker.AddScoringFields(4, DummyScoringFields{
		label:   "label1",
		counter: -1,
		amount:  2.3,
	})

	criteria := DummyScoringCriteria{}
	scoredDocs := ranker.Rank([]types.IndexedDocument{
		types.IndexedDocument{DocId: 1, TokenProximity: 6},
		types.IndexedDocument{DocId: 2, TokenProximity: -1},
		types.IndexedDocument{DocId: 3, TokenProximity: 24},
		types.IndexedDocument{DocId: 4, TokenProximity: 18},
	}, types.RankOptions{ScoringCriteria: criteria})
	utils.Expect(t, "[1 [25300 ]] [3 [17300 ]] [2 [3000 ]] [4 [1300 ]] ", scoredDocsToString(scoredDocs))

	criteria.Threshold = 4
	scoredDocs = ranker.Rank([]types.IndexedDocument{
		types.IndexedDocument{DocId: 1, TokenProximity: 6},
		types.IndexedDocument{DocId: 2, TokenProximity: -1},
		types.IndexedDocument{DocId: 3, TokenProximity: 24},
		types.IndexedDocument{DocId: 4, TokenProximity: 18},
	}, types.RankOptions{ScoringCriteria: criteria})
	utils.Expect(t, "[1 [25300 ]] [3 [17300 ]] ", scoredDocsToString(scoredDocs))
}
Ejemplo n.º 11
0
func TestOpenOrCreateBolt(t *testing.T) {
	db, err := openBoltStorage("bolt_test")
	utils.Expect(t, "<nil>", err)
	db.Close()

	db, err = openBoltStorage("bolt_test")
	utils.Expect(t, "<nil>", err)
	err = db.Set([]byte("key1"), []byte("value1"))
	utils.Expect(t, "<nil>", err)

	buffer := make([]byte, 100)
	buffer, err = db.Get([]byte("key1"))
	utils.Expect(t, "<nil>", err)
	utils.Expect(t, "value1", string(buffer))

	walFile := db.WALName()
	db.Close()
	os.Remove(walFile)
	os.Remove("bolt_test")
}
Ejemplo n.º 12
0
func TestLookupWithProximity(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})

	// doc0 = "token2 token4 token4 token2 token3 token4"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 0,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0, 21}},
			{"token3", 0, []int{28}},
			{"token4", 0, []int{7, 14, 35}},
		},
	})
	utils.Expect(t, "[0 1 [21 28]] ",
		indexedDocsToString(indexer.Lookup([]string{"token2", "token3"}, []string{}, nil)))

	// doc0 = "t2 t1 . . . t2 t3"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 0,
		Keywords: []types.KeywordIndex{
			{"t1", 0, []int{3}},
			{"t2", 0, []int{0, 12}},
			{"t3", 0, []int{15}},
		},
	})
	utils.Expect(t, "[0 8 [3 12 15]] ",
		indexedDocsToString(indexer.Lookup([]string{"t1", "t2", "t3"}, []string{}, nil)))

	// doc0 = "t3 t2 t1 . . . . . t2 t3"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 0,
		Keywords: []types.KeywordIndex{
			{"t1", 0, []int{6}},
			{"t2", 0, []int{3, 19}},
			{"t3", 0, []int{0, 22}},
		},
	})
	utils.Expect(t, "[0 10 [6 3 0]] ",
		indexedDocsToString(indexer.Lookup([]string{"t1", "t2", "t3"}, []string{}, nil)))
}
Ejemplo n.º 13
0
func TestEngineIndexDocumentWithContentAndLabels(t *testing.T) {
	var engine1, engine2 Engine
	engine1.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../data/dictionary.txt",
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.LocationsIndex,
		},
	})
	engine2.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../data/dictionary.txt",
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.DocIdsIndex,
		},
	})

	addDocsWithLabels(&engine1)
	addDocsWithLabels(&engine2)

	outputs1 := engine1.Search(types.SearchRequest{Text: "百度"})
	outputs2 := engine2.Search(types.SearchRequest{Text: "百度"})
	utils.Expect(t, "1", len(outputs1.Tokens))
	utils.Expect(t, "1", len(outputs2.Tokens))
	utils.Expect(t, "百度", outputs1.Tokens[0])
	utils.Expect(t, "百度", outputs2.Tokens[0])
	utils.Expect(t, "5", len(outputs1.Docs))
	utils.Expect(t, "5", len(outputs2.Docs))
}
Ejemplo n.º 14
0
func TestAddKeywords(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
	indexer.AddDocument(&types.DocumentIndex{
		DocId:    1,
		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
	})
	indexer.AddDocument(&types.DocumentIndex{
		DocId:    7,
		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
	})
	indexer.AddDocument(&types.DocumentIndex{
		DocId:    2,
		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
	})
	indexer.AddDocument(&types.DocumentIndex{
		DocId:    3,
		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
	})
	indexer.AddDocument(&types.DocumentIndex{
		DocId:    1,
		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
	})
	indexer.AddDocument(&types.DocumentIndex{
		DocId:    1,
		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
	})
	indexer.AddDocument(&types.DocumentIndex{
		DocId:    2,
		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
	})
	indexer.AddDocument(&types.DocumentIndex{
		DocId:    0,
		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
	})

	utils.Expect(t, "1 2 7 ", indicesToString(&indexer, "token1"))
	utils.Expect(t, "0 1 2 3 ", indicesToString(&indexer, "token2"))
}
Ejemplo n.º 15
0
func TestCountDocsOnly(t *testing.T) {
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			ReverseOrder:    true,
			OutputOffset:    0,
			MaxOutputs:      1,
			ScoringCriteria: &RankByTokenProximity{},
		},
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.LocationsIndex,
		},
	})

	AddDocs(&engine)
	engine.RemoveDocument(4)

	outputs := engine.Search(types.SearchRequest{Text: "中国人口", CountDocsOnly: true})
	utils.Expect(t, "0", len(outputs.Docs))
	utils.Expect(t, "2", len(outputs.Tokens))
	utils.Expect(t, "2", outputs.NumDocs)
}
Ejemplo n.º 16
0
func TestFrequenciesIndex(t *testing.T) {
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			ScoringCriteria: BM25ScoringCriteria{},
		},
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.FrequenciesIndex,
		},
	})

	AddDocs(&engine)

	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
	utils.Expect(t, "2", len(outputs.Docs))

	utils.Expect(t, "4", outputs.Docs[0].DocId)
	utils.Expect(t, "2311", int(outputs.Docs[0].Scores[0]*1000))

	utils.Expect(t, "0", outputs.Docs[1].DocId)
	utils.Expect(t, "2211", int(outputs.Docs[1].Scores[0]*1000))
}
Ejemplo n.º 17
0
func TestRankDocument(t *testing.T) {
	var ranker Ranker
	ranker.Init()
	ranker.AddDoc(1, DummyScoringFields{})
	ranker.AddDoc(3, DummyScoringFields{})
	ranker.AddDoc(4, DummyScoringFields{})

	scoredDocs, _ := ranker.Rank([]types.IndexedDocument{
		types.IndexedDocument{DocId: 1, BM25: 6},
		types.IndexedDocument{DocId: 3, BM25: 24},
		types.IndexedDocument{DocId: 4, BM25: 18},
	}, types.RankOptions{ScoringCriteria: types.RankByBM25{}}, false)
	utils.Expect(t, "[3 [24000 ]] [4 [18000 ]] [1 [6000 ]] ", scoredDocsToString(scoredDocs))

	scoredDocs, _ = ranker.Rank([]types.IndexedDocument{
		types.IndexedDocument{DocId: 1, BM25: 6},
		types.IndexedDocument{DocId: 3, BM25: 24},
		types.IndexedDocument{DocId: 2, BM25: 0},
		types.IndexedDocument{DocId: 4, BM25: 18},
	}, types.RankOptions{ScoringCriteria: types.RankByBM25{}, ReverseOrder: true}, false)
	// doc0因为没有AddDoc所以没有添加进来
	utils.Expect(t, "[1 [6000 ]] [4 [18000 ]] [3 [24000 ]] ", scoredDocsToString(scoredDocs))
}
Ejemplo n.º 18
0
func TestOffsetAndMaxOutputs(t *testing.T) {
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			ReverseOrder:    true,
			OutputOffset:    1,
			MaxOutputs:      3,
			ScoringCriteria: &RankByTokenProximity{},
		},
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.LocationsIndex,
		},
	})

	AddDocs(&engine)

	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
	utils.Expect(t, "2", len(outputs.Docs))

	utils.Expect(t, "4", outputs.Docs[0].DocId)
	utils.Expect(t, "1", outputs.Docs[1].DocId)
}
Ejemplo n.º 19
0
func TestSearchWithin(t *testing.T) {
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			ReverseOrder:    true,
			OutputOffset:    0,
			MaxOutputs:      10,
			ScoringCriteria: &RankByTokenProximity{},
		},
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.LocationsIndex,
		},
	})

	AddDocs(&engine)

	docIds := make(map[uint64]bool)
	docIds[5] = true
	docIds[1] = true
	outputs := engine.Search(types.SearchRequest{
		Text:   "中国人口",
		DocIds: docIds,
	})
	utils.Expect(t, "2", len(outputs.Tokens))
	utils.Expect(t, "中国", outputs.Tokens[0])
	utils.Expect(t, "人口", outputs.Tokens[1])
	utils.Expect(t, "2", len(outputs.Docs))

	utils.Expect(t, "1", outputs.Docs[0].DocId)
	utils.Expect(t, "76", int(outputs.Docs[0].Scores[0]*1000))
	utils.Expect(t, "[0 18]", outputs.Docs[0].TokenSnippetLocations)

	utils.Expect(t, "5", outputs.Docs[1].DocId)
	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)
}
Ejemplo n.º 20
0
func TestLookupWithLocations(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
	// doc0 = "token2 token4 token4 token2 token3 token4"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 0,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0, 21}},
			{"token3", 0, []int{28}},
			{"token4", 0, []int{7, 14, 35}},
		},
	})

	utils.Expect(t, "[[0 21] [28]]",
		indexer.Lookup([]string{"token2", "token3"}, []string{}, nil)[0].TokenLocations)
}
Ejemplo n.º 21
0
func TestLookupWithinDocIds(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
	// doc0 = "token2 token3"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 0,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0}},
			{"token3", 0, []int{7}},
		},
	})
	// doc1 = "token1 token2 token3"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 1,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
			{"token3", 0, []int{14}},
		},
	})
	// doc2 = "token1 token2"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 2,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
		},
	})
	// doc3 = "token2"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 3,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0}},
		},
	})

	docIds := make(map[uint64]bool)
	docIds[0] = true
	docIds[2] = true
	utils.Expect(t, "[2 0 [7]] [0 0 [0]] ",
		indexedDocsToString(indexer.Lookup([]string{"token2"}, []string{}, &docIds)))
}
Ejemplo n.º 22
0
func TestRemoveDocument(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})

	// doc1 = "token2 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 1,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0}},
			{"token3", 0, []int{7}},
		},
	}, false)
	// doc2 = "token1 token2 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 2,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
		},
	}, true)
	utils.Expect(t, "2 ", indicesToString(&indexer, "token1"))
	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token2"))
	utils.Expect(t, "1 ", indicesToString(&indexer, "token3"))

	indexer.RemoveDocumentToCache(2, false)
	// doc1 = "token1 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 1,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token3", 0, []int{7}},
		},
	}, true)
	utils.Expect(t, "1 ", indicesToString(&indexer, "token1"))
	utils.Expect(t, "", indicesToString(&indexer, "token2"))
	utils.Expect(t, "1 ", indicesToString(&indexer, "token3"))

	// doc2 = "token1 token2 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 2,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
			{"token3", 0, []int{14}},
		},
	}, true)
	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token1"))
	utils.Expect(t, "2 ", indicesToString(&indexer, "token2"))
	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token3"))

	// doc3 = "token1 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 3,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
		},
	}, true)
	indexer.RemoveDocumentToCache(3, true)
	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token1"))
	utils.Expect(t, "2 ", indicesToString(&indexer, "token2"))
	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token3"))

	// doc2 = "token1 token2 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 2,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0}},
			{"token3", 0, []int{7}},
		},
	}, true)
	// doc3 = "token1 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 3,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
		},
	}, true)
	utils.Expect(t, "1 3 ", indicesToString(&indexer, "token1"))
	utils.Expect(t, "2 3 ", indicesToString(&indexer, "token2"))
	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token3"))
}
Ejemplo n.º 23
0
func TestEngineIndexDocumentWithTokens(t *testing.T) {
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			OutputOffset:    0,
			MaxOutputs:      10,
			ScoringCriteria: &RankByTokenProximity{},
		},
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.LocationsIndex,
		},
	})

	docId := uint64(0)
	engine.IndexDocument(docId, types.DocumentIndexData{
		Content: "",
		Tokens: []types.TokenData{
			{"中国", []int{0}},
			{"人口", []int{18, 24}},
		},
		Fields: ScoringFields{1, 2, 3},
	})
	docId++
	engine.IndexDocument(docId, types.DocumentIndexData{
		Content: "",
		Tokens: []types.TokenData{
			{"中国", []int{0}},
			{"人口", []int{6}},
		},
		Fields: ScoringFields{1, 2, 3},
	})
	docId++
	engine.IndexDocument(docId, types.DocumentIndexData{
		Content: "中国十三亿人口",
		Fields:  ScoringFields{0, 9, 1},
	})

	engine.FlushIndex()

	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
	utils.Expect(t, "2", len(outputs.Tokens))
	utils.Expect(t, "中国", outputs.Tokens[0])
	utils.Expect(t, "人口", outputs.Tokens[1])
	utils.Expect(t, "3", len(outputs.Docs))

	utils.Expect(t, "1", outputs.Docs[0].DocId)
	utils.Expect(t, "1000", int(outputs.Docs[0].Scores[0]*1000))
	utils.Expect(t, "[0 6]", outputs.Docs[0].TokenSnippetLocations)

	utils.Expect(t, "2", outputs.Docs[1].DocId)
	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)

	utils.Expect(t, "0", outputs.Docs[2].DocId)
	utils.Expect(t, "76", int(outputs.Docs[2].Scores[0]*1000))
	utils.Expect(t, "[0 18]", outputs.Docs[2].TokenSnippetLocations)
}
Ejemplo n.º 24
0
func TestLookup(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
	// doc0 = "token2 token3"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 0,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0}},
			{"token3", 0, []int{7}},
		},
	})
	// doc1 = "token1 token2 token3"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 1,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
			{"token3", 0, []int{14}},
		},
	})
	// doc2 = "token1 token2"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 2,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
		},
	})
	// doc3 = "token2"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 3,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0}},
		},
	})
	// doc7 = "token1 token3"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 7,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token3", 0, []int{7}},
		},
	})
	// doc9 = "token3"
	indexer.AddDocument(&types.DocumentIndex{
		DocId: 9,
		Keywords: []types.KeywordIndex{
			{"token3", 0, []int{0}},
		},
	})

	utils.Expect(t, "1 2 7 ", indicesToString(&indexer, "token1"))
	utils.Expect(t, "0 1 2 3 ", indicesToString(&indexer, "token2"))
	utils.Expect(t, "0 1 7 9 ", indicesToString(&indexer, "token3"))

	utils.Expect(t, "", indexedDocsToString(indexer.Lookup([]string{"token4"}, []string{}, nil)))

	utils.Expect(t, "[7 0 [0]] [2 0 [0]] [1 0 [0]] ",
		indexedDocsToString(indexer.Lookup([]string{"token1"}, []string{}, nil)))
	utils.Expect(t, "", indexedDocsToString(indexer.Lookup([]string{"token1", "token4"}, []string{}, nil)))

	utils.Expect(t, "[2 1 [0 7]] [1 1 [0 7]] ",
		indexedDocsToString(indexer.Lookup([]string{"token1", "token2"}, []string{}, nil)))
	utils.Expect(t, "[2 13 [7 0]] [1 13 [7 0]] ",
		indexedDocsToString(indexer.Lookup([]string{"token2", "token1"}, []string{}, nil)))
	utils.Expect(t, "[7 1 [0 7]] [1 8 [0 14]] ",
		indexedDocsToString(indexer.Lookup([]string{"token1", "token3"}, []string{}, nil)))
	utils.Expect(t, "[7 13 [7 0]] [1 20 [14 0]] ",
		indexedDocsToString(indexer.Lookup([]string{"token3", "token1"}, []string{}, nil)))
	utils.Expect(t, "[1 1 [7 14]] [0 1 [0 7]] ",
		indexedDocsToString(indexer.Lookup([]string{"token2", "token3"}, []string{}, nil)))
	utils.Expect(t, "[1 13 [14 7]] [0 13 [7 0]] ",
		indexedDocsToString(indexer.Lookup([]string{"token3", "token2"}, []string{}, nil)))

	utils.Expect(t, "[1 2 [0 7 14]] ",
		indexedDocsToString(indexer.Lookup([]string{"token1", "token2", "token3"}, []string{}, nil)))
	utils.Expect(t, "[1 26 [14 7 0]] ",
		indexedDocsToString(indexer.Lookup([]string{"token3", "token2", "token1"}, []string{}, nil)))
}
Ejemplo n.º 25
0
func TestEngineIndexDocument(t *testing.T) {
	var engine Engine
	engine.Init(types.EngineInitOptions{
		SegmenterDictionaries: "../testdata/test_dict.txt",
		DefaultRankOptions: &types.RankOptions{
			OutputOffset:    0,
			MaxOutputs:      10,
			ScoringCriteria: &RankByTokenProximity{},
		},
		IndexerInitOptions: &types.IndexerInitOptions{
			IndexType: types.LocationsIndex,
		},
	})

	AddDocs(&engine)

	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
	utils.Expect(t, "2", len(outputs.Tokens))
	utils.Expect(t, "中国", outputs.Tokens[0])
	utils.Expect(t, "人口", outputs.Tokens[1])
	utils.Expect(t, "3", len(outputs.Docs))

	utils.Expect(t, "1", outputs.Docs[0].DocId)
	utils.Expect(t, "1000", int(outputs.Docs[0].Scores[0]*1000))
	utils.Expect(t, "[0 6]", outputs.Docs[0].TokenSnippetLocations)

	utils.Expect(t, "4", outputs.Docs[1].DocId)
	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)

	utils.Expect(t, "0", outputs.Docs[2].DocId)
	utils.Expect(t, "76", int(outputs.Docs[2].Scores[0]*1000))
	utils.Expect(t, "[0 18]", outputs.Docs[2].TokenSnippetLocations)
}
Ejemplo n.º 26
0
func TestLookupDocIdsIndex(t *testing.T) {
	var indexer Indexer
	indexer.Init(types.IndexerInitOptions{IndexType: types.DocIdsIndex})
	// doc1 = "token2 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 1,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0}},
			{"token3", 0, []int{7}},
		},
	}, false)
	// doc2 = "token1 token2 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 2,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
			{"token3", 0, []int{14}},
		},
	}, false)
	// doc3 = "token1 token2"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 3,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token2", 0, []int{7}},
		},
	}, false)
	// doc4 = "token2"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 4,
		Keywords: []types.KeywordIndex{
			{"token2", 0, []int{0}},
		},
	}, false)
	// doc7 = "token1 token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 7,
		Keywords: []types.KeywordIndex{
			{"token1", 0, []int{0}},
			{"token3", 0, []int{7}},
		},
	}, false)
	// doc9 = "token3"
	indexer.AddDocumentToCache(&types.DocumentIndex{
		DocId: 9,
		Keywords: []types.KeywordIndex{
			{"token3", 0, []int{0}},
		},
	}, true)

	utils.Expect(t, "2 3 7 ", indicesToString(&indexer, "token1"))
	utils.Expect(t, "1 2 3 4 ", indicesToString(&indexer, "token2"))
	utils.Expect(t, "1 2 7 9 ", indicesToString(&indexer, "token3"))

	utils.Expect(t, "", indexedDocsToString(indexer.Lookup([]string{"token4"}, []string{}, nil, false)))

	utils.Expect(t, "[7 0 []] [3 0 []] [2 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token1"}, []string{}, nil, false)))
	utils.Expect(t, "", indexedDocsToString(indexer.Lookup([]string{"token1", "token4"}, []string{}, nil, false)))

	utils.Expect(t, "[3 0 []] [2 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token1", "token2"}, []string{}, nil, false)))
	utils.Expect(t, "[3 0 []] [2 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token2", "token1"}, []string{}, nil, false)))
	utils.Expect(t, "[7 0 []] [2 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token1", "token3"}, []string{}, nil, false)))
	utils.Expect(t, "[7 0 []] [2 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token3", "token1"}, []string{}, nil, false)))
	utils.Expect(t, "[2 0 []] [1 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token2", "token3"}, []string{}, nil, false)))
	utils.Expect(t, "[2 0 []] [1 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token3", "token2"}, []string{}, nil, false)))

	utils.Expect(t, "[2 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token1", "token2", "token3"}, []string{}, nil, false)))
	utils.Expect(t, "[2 0 []] ",
		indexedDocsToString(indexer.Lookup([]string{"token3", "token2", "token1"}, []string{}, nil, false)))
}