Пример #1
1
// Search searches the index for the given query, and returns documents,
// the total number of results, or an error if something went wrong
func (i *Index) Search(q query.Query) (docs []index.Document, total int, err error) {
	query := solr.NewQuery()
	query.Q(q.Term)
	query.AddParam("cache", "false")
	//query.Start(int(q.Paging.Offset))
	//query.Rows(int(q.Paging.Num))
	s := i.si.Search(query)
	r, err := s.Result(nil)
	if err != nil {
		return nil, 0, err
	}

	ret := make([]index.Document, 0, len(r.Results.Docs))
	for _, d := range r.Results.Docs {

		doc := index.NewDocument(d.Get("id").(string), 1.0)
		for k, v := range d {
			if reflect.TypeOf(v).Kind() == reflect.Slice {
				v = v.([]interface{})[0]
			}
			if k != "id" {
				doc.Set(k, v)
			}
		}
		ret = append(ret, doc)
	}

	return ret, r.Results.NumFound, nil
}
func TestIndex(t *testing.T) {
	// todo: run redisearch automatically
	//t.SkipNow()
	md := index.NewMetadata().AddField(index.NewTextField("title", 1.0)).
		AddField(index.NewNumericField("score"))

	idx := NewIndex("localhost:6379", "testung", md)

	docs := []index.Document{
		index.NewDocument("doc1", 0.1).Set("title", "hello world").Set("score", 1),
		index.NewDocument("doc2", 1.0).Set("title", "foo bar hello").Set("score", 2),
	}

	assert.NoError(t, idx.Drop())
	assert.NoError(t, idx.Create())

	assert.NoError(t, idx.Index(docs, nil))

	q := query.NewQuery(idx.name, "hello world")
	docs, total, err := idx.Search(*q)
	assert.NoError(t, err)
	assert.True(t, total > 0)
	assert.Len(t, docs, 1)
	assert.Equal(t, docs[0].Id, "doc1")
	assert.Equal(t, docs[0].Properties["title"], "hello world")

	q = query.NewQuery(idx.name, "hello")
	docs, total, err = idx.Search(*q)
	assert.NoError(t, err)
	assert.Equal(t, 2, total)
	assert.Len(t, docs, 2)
	assert.Equal(t, docs[0].Id, "doc2")
	assert.Equal(t, docs[1].Id, "doc1")

}
func TestDistributedIndex(t *testing.T) {
	// todo: run redisearch automatically
	//st.SkipNow()
	md := index.NewMetadata().AddField(index.NewTextField("title", 1.0)).
		AddField(index.NewNumericField("score"))

	idx := NewDistributedIndex("dtest", []string{"localhost:6379"}, 2, md)

	docs := []index.Document{
		index.NewDocument("doc1", 0.1).Set("title", "hello world").Set("score", 1),
		index.NewDocument("doc2", 1.0).Set("title", "foo bar hello").Set("score", 2),
	}

	assert.NoError(t, idx.Drop())
	assert.NoError(t, idx.Create())

	assert.NoError(t, idx.Index(docs, nil))

	q := query.NewQuery("dtest", "hello world")
	docs, total, err := idx.Search(*q)
	assert.NoError(t, err)
	assert.True(t, total > 0)
	assert.Len(t, docs, 1)
	assert.Equal(t, docs[0].Id, "doc1")
	assert.Equal(t, docs[0].Properties["title"], "hello world")

	q = query.NewQuery("dtest", "hello")
	docs, total, err = idx.Search(*q)
	t.Log(docs, total, err)
	assert.NoError(t, err)
	assert.Equal(t, 2, total)
	assert.Len(t, docs, 2)
	assert.Equal(t, docs[0].Id, "doc2")
	assert.Equal(t, docs[1].Id, "doc1")

	suggs := []index.Suggestion{}
	for i := 0; i < 100; i++ {
		suggs = append(suggs, index.Suggestion{fmt.Sprintf("suggestion %d", i), float64(i)})
	}

	assert.NoError(t, idx.AddTerms(suggs...))

	suggs, err = idx.Suggest("sugg", 10, false)
	assert.NoError(t, err)
	fmt.Println(suggs)
	assert.Len(t, suggs, 10)

}
Пример #4
0
func TestIndex(t *testing.T) {
	// todo: run redisearch automatically
	md := index.NewMetadata().AddField(index.NewTextField("title", 1.0)).
		AddField(index.NewNumericField("score"))

	idx, err := NewIndex("http://localhost:8983/solr", "testung", md)
	assert.NoError(t, err)

	docs := []index.Document{}
	for i := 0; i < 100; i++ {
		docs = append(docs, index.NewDocument(fmt.Sprintf("doc%d", i), 0.1).Set("title", "hello world").Set("body", "lorem ipsum foo bar"))

		//index.NewDocument("doc2", 1.0).Set("title", "foo bar hello").Set("score", 2),
	}

	assert.NoError(t, idx.Drop())

	//	assert.NoError(t, idx.Create())

	assert.NoError(t, idx.Index(docs, nil))

	q := query.NewQuery("testung", "hello world")
	docs, total, err := idx.Search(*q)
	assert.NoError(t, err)
	assert.True(t, total == 100)
	assert.Len(t, docs, int(q.Paging.Num))
	assert.Equal(t, docs[0].Id, "doc0")
	assert.Equal(t, docs[0].Properties["title"], "hello world")

}
Пример #5
0
// Search searches the index for the given query, and returns documents,
// the total number of results, or an error if something went wrong
func (i *Index) Search(q query.Query) ([]index.Document, int, error) {

	eq := elastic.NewQueryStringQuery(q.Term)
	res, err := i.conn.Search(i.name).Type("doc").
		Query(eq).
		From(q.Paging.Offset).
		Size(q.Paging.Num).
		Do()

	if err != nil {
		return nil, 0, err
	}

	ret := make([]index.Document, 0, q.Paging.Num)
	for _, h := range res.Hits.Hits {

		if h != nil {
			d := index.NewDocument(h.Id, float32(*h.Score))
			if err := json.Unmarshal(*h.Source, &d.Properties); err == nil {
				ret = append(ret, d)
			}
		}

	}

	return ret, int(res.TotalHits()), err
}
func (wr *WikipediaAbstractsReader) Read(r io.Reader) (<-chan index.Document, error) {

	dec := xml.NewDecoder(r)
	ch := make(chan index.Document)
	go func() {

		tok, err := dec.RawToken()

		props := map[string]string{}
		var currentText string
		for err != io.EOF {

			switch t := tok.(type) {

			case xml.CharData:
				if len(t) > 1 {
					currentText += string(t)
				}

			case xml.EndElement:
				name := t.Name.Local
				if name == "title" || name == "url" || name == "abstract" {
					props[name] = currentText
				} else if name == "doc" {

					id := path.Base(props["url"])
					if len(id) > 0 {
						title := strings.TrimPrefix(strings.TrimSpace(props["title"]), "Wikipedia: ")
						body := strings.TrimSpace(props["abstract"])
						//fmt.Println(title)
						if filter(title, body) {
							doc := index.NewDocument(id, wr.score(id)).
								Set("title", title).
								Set("body", body).
								Set("url", strings.TrimSpace(props["url"]))
							ch <- doc
						}
					}
					props = map[string]string{}
				}
				currentText = ""
			}
			tok, err = dec.RawToken()

		}
		fmt.Println("error: ", err)
		close(ch)
	}()
	return ch, nil
}
Пример #7
0
// convert the result from a redis query to a proper Document object
func loadDocument(id, sc, fields interface{}) (index.Document, error) {

	score, err := strconv.ParseFloat(string(sc.([]byte)), 64)
	if err != nil {
		return index.Document{}, fmt.Errorf("Could not parse score: %s", err)
	}

	doc := index.NewDocument(string(id.([]byte)), float32(score))
	lst := fields.([]interface{})
	for i := 0; i < len(lst); i += 2 {
		prop := string(lst[i].([]byte))
		var val interface{}
		switch v := lst[i+1].(type) {
		case []byte:
			val = string(v)
		default:
			val = v

		}
		doc = doc.Set(prop, val)
	}
	return doc, nil
}
func TestPaging(t *testing.T) {

	md := index.NewMetadata().AddField(index.NewTextField("title", 1.0)).
		AddField(index.NewNumericField("score"))

	idx := NewDistributedIndex("td", []string{"localhost:6379"}, 4, md)

	assert.NoError(t, idx.Drop())
	assert.NoError(t, idx.Create())

	N := 100
	docs := make([]index.Document, 0, N)
	for i := 0; i < N; i++ {
		docs = append(docs, index.NewDocument(fmt.Sprintf("doc%d", i), float32(i)/100).Set("title", fmt.Sprintf("hello world title%d", i)).Set("score", i))

	}
	assert.NoError(t, idx.Index(docs, nil))
	q := query.NewQuery("td", "hello").Limit(10, 10)
	docs, total, err := idx.Search(*q)

	assert.NoError(t, err)
	assert.Len(t, docs, 10)
	assert.Equal(t, docs[0].Id, "doc89")
	assert.Equal(t, N, total)

	q = query.NewQuery("td", "title80").Limit(0, 1)
	docs, total, err = idx.Search(*q)
	assert.Len(t, docs, 1)
	assert.Equal(t, docs[0].Id, "doc80")
	assert.Equal(t, 1, total)

	q = query.NewQuery("td", "title80").Limit(5, 1)
	docs, total, err = idx.Search(*q)
	assert.NoError(t, err)
	assert.Len(t, docs, 0)
}