Beispiel #1
0
func (self *Wiki) Search(queryString string) ([]*SearchResult, error) {
	query := bleve.NewMatchQuery(queryString)
	search := bleve.NewSearchRequest(query)
	search.Highlight = bleve.NewHighlight()
	searchResult, err := self.Index.Search(search)
	if err != nil {
		return nil, err
	}
	results := []*SearchResult{}
	for _, result := range searchResult.Hits {
		section, article := self.Find(result.ID)
		if article == nil && section == nil {
			return nil, fmt.Errorf("%v section or article not found", result.ID)
		}
		text := ""
		for _, values := range result.Fragments {
			for _, value := range values {
				text += value
			}
		}
		name := ""
		if section != nil {
			name = section.Name
		}
		if article != nil {
			name = article.Name
		}
		results = append(results, &SearchResult{Path: result.ID, Name: name, Text: text})
	}
	return results, nil
}
Beispiel #2
0
// Search method lookup for records using a query
func (i *bleveIndexer) Search(q string) (records []indexer.Record) {
	query := bleve.NewQueryStringQuery(q)
	request := bleve.NewSearchRequest(query)
	request.Highlight = bleve.NewHighlight()
	result, err := i.bleve.Search(request)
	if err != nil { // an empty query would cause this
		return
	}

	for _, match := range result.Hits {
		rec := i.Record(match.ID)
		loaded := rec.Load()

		if !loaded {
			continue
		}

		if len(match.Fragments["Body"]) > 0 {
			rec.SetBody([]byte(match.Fragments["Body"][0]))
		}

		records = append(records, rec)
	}

	return
}
Beispiel #3
0
func BlevePIndexQuerySamples() []cbgt.Documentation {
	return []cbgt.Documentation{
		cbgt.Documentation{
			Text: "A simple bleve query POST body:",
			JSON: &struct {
				*cbgt.QueryCtlParams
				*bleve.SearchRequest
			}{
				nil,
				&bleve.SearchRequest{
					From:  0,
					Size:  10,
					Query: bleve.NewQueryStringQuery("a sample query"),
				},
			},
		},
		cbgt.Documentation{
			Text: `An example POST body using from/size for results paging,
using ctl for a timeout and for "at_plus" consistency level.
On consistency, the index must have incorporated at least mutation
sequence-number 123 for partition (vbucket) 0 and mutation
sequence-number 234 for partition (vbucket) 1 (where vbucket 1
should have a vbucketUUID of a0b1c2):`,
			JSON: &struct {
				*cbgt.QueryCtlParams
				*bleve.SearchRequest
			}{
				&cbgt.QueryCtlParams{
					Ctl: cbgt.QueryCtl{
						Timeout: cbgt.QUERY_CTL_DEFAULT_TIMEOUT_MS,
						Consistency: &cbgt.ConsistencyParams{
							Level: "at_plus",
							Vectors: map[string]cbgt.ConsistencyVector{
								"customerIndex": cbgt.ConsistencyVector{
									"0":        123,
									"1/a0b1c2": 234,
								},
							},
						},
					},
				},
				&bleve.SearchRequest{
					From:      20,
					Size:      10,
					Fields:    []string{"*"},
					Query:     bleve.NewQueryStringQuery("alice smith"),
					Highlight: bleve.NewHighlight(),
					Explain:   true,
				},
			},
		},
	}
}
Beispiel #4
0
func Example() {
	INDEX_DIR := "gojieba.bleve"
	messages := []struct {
		Id   string
		Body string
	}{
		{
			Id:   "1",
			Body: "你好",
		},
		{
			Id:   "2",
			Body: "交代",
		},
		{
			Id:   "3",
			Body: "长江大桥",
		},
	}

	indexMapping := bleve.NewIndexMapping()
	os.RemoveAll(INDEX_DIR)
	// clean index when example finished
	defer os.RemoveAll(INDEX_DIR)

	err := indexMapping.AddCustomTokenizer("gojieba",
		map[string]interface{}{
			"dictpath":     gojieba.DICT_PATH,
			"hmmpath":      gojieba.HMM_PATH,
			"userdictpath": gojieba.USER_DICT_PATH,
			"idf":          gojieba.IDF_PATH,
			"stop_words":   gojieba.STOP_WORDS_PATH,
			"type":         "gojieba",
		},
	)
	if err != nil {
		panic(err)
	}
	err = indexMapping.AddCustomAnalyzer("gojieba",
		map[string]interface{}{
			"type":      "gojieba",
			"tokenizer": "gojieba",
		},
	)
	if err != nil {
		panic(err)
	}
	indexMapping.DefaultAnalyzer = "gojieba"

	index, err := bleve.New(INDEX_DIR, indexMapping)
	if err != nil {
		panic(err)
	}
	for _, msg := range messages {
		if err := index.Index(msg.Id, msg); err != nil {
			panic(err)
		}
	}

	querys := []string{
		"你好世界",
		"亲口交代",
		"长江",
	}

	for _, q := range querys {
		req := bleve.NewSearchRequest(bleve.NewQueryStringQuery(q))
		req.Highlight = bleve.NewHighlight()
		res, err := index.Search(req)
		if err != nil {
			panic(err)
		}
		fmt.Println(prettify(res))
	}

	// Output:
	// [{"id":"1","score":0.27650412875470115}]
	// [{"id":"2","score":0.27650412875470115}]
	// [{"id":"3","score":0.7027325540540822}]
}
Beispiel #5
0
func resultsHandler(w http.ResponseWriter, r *http.Request) {
	var (
		pageHTML    = "results-search.html"
		pageInclude = "results-search.include"
	)

	urlQuery := r.URL.Query()
	err := r.ParseForm()
	if err != nil {
		responseLogger(r, http.StatusBadRequest, err)
		w.Header().Set("Content-Type", "text/plain")
		w.WriteHeader(http.StatusBadRequest)
		w.Write([]byte(fmt.Sprintf("error in POST: %s", err)))
		return
	}

	// Collect the submissions fields.
	submission := make(map[string]interface{})
	// Basic Search results
	if r.Method == "GET" {
		for k, v := range urlQuery {
			if k == "all_ids" {
				if b, err := strconv.ParseBool(strings.Join(v, "")); err == nil {
					submission[k] = b
				}
			} else if k == "from" || k == "size" || k == "total" {
				if i, err := strconv.Atoi(strings.Join(v, "")); err == nil {
					submission[k] = i
				}
			} else if k == "q" || k == "q_exact" || k == "q_excluded" || k == "q_required" {
				submission[k] = strings.Join(v, "")
			}
		}
	}

	// Advanced Search results
	if r.Method == "POST" {
		for k, v := range r.Form {
			if k == "all_ids" {
				if b, err := strconv.ParseBool(strings.Join(v, "")); err == nil {
					submission[k] = b
				}
			} else if k == "from" || k == "size" || k == "total" {
				if i, err := strconv.Atoi(strings.Join(v, "")); err == nil {
					submission[k] = i
				}
			} else if k == "q" || k == "q_exact" || k == "q_excluded" || k == "q_required" {
				submission[k] = strings.Join(v, "")
			}
		}
	}

	q, err := mapToSearchQuery(submission)
	if err != nil {
		responseLogger(r, http.StatusBadRequest, err)
		w.WriteHeader(http.StatusBadRequest)
		w.Write([]byte(fmt.Sprintf("%s", err)))
		return
	}

	//
	// Note: Add logic to handle basic and advanced search...
	//
	// q           NewQueryStringQuery
	// q_required  NewQueryStringQuery with a + prefix for each strings.Fields(q_required) value
	// q_exact     NewMatchPhraseQuery
	// q_excluded NewQueryStringQuery with a - prefix for each strings.Feilds(q_excluded) value
	//
	var conQry []bleve.Query

	if q.Q != "" {
		conQry = append(conQry, bleve.NewQueryStringQuery(q.Q))
	}
	if q.QExact != "" {
		conQry = append(conQry, bleve.NewMatchPhraseQuery(q.QExact))
	}
	var terms []string
	for _, s := range strings.Fields(q.QRequired) {
		terms = append(terms, fmt.Sprintf("+%s", strings.TrimSpace(s)))
	}
	for _, s := range strings.Fields(q.QExcluded) {
		terms = append(terms, fmt.Sprintf("-%s", strings.TrimSpace(s)))
	}
	if len(terms) > 0 {
		qString := strings.Join(terms, " ")
		conQry = append(conQry, bleve.NewQueryStringQuery(qString))
	}

	qry := bleve.NewConjunctionQuery(conQry)
	if q.Size == 0 {
		q.Size = 10
	}
	searchRequest := bleve.NewSearchRequestOptions(qry, q.Size, q.From, q.Explain)
	if searchRequest == nil {
		responseLogger(r, http.StatusBadRequest, fmt.Errorf("Can't build new search request options %+v, %s", qry, err))
		w.WriteHeader(http.StatusBadRequest)
		w.Write([]byte(fmt.Sprintf("%s", err)))
		return
	}

	searchRequest.Highlight = bleve.NewHighlight()
	searchRequest.Highlight.AddField("title")
	searchRequest.Highlight.AddField("content_description")
	searchRequest.Highlight.AddField("subjects")
	searchRequest.Highlight.AddField("subjects_function")
	searchRequest.Highlight.AddField("subjects_topical")
	searchRequest.Highlight.AddField("extents")

	subjectFacet := bleve.NewFacetRequest("subjects", 3)
	searchRequest.AddFacet("subjects", subjectFacet)

	subjectTopicalFacet := bleve.NewFacetRequest("subjects_topical", 3)
	searchRequest.AddFacet("subjects_topical", subjectTopicalFacet)

	subjectFunctionFacet := bleve.NewFacetRequest("subjects_function", 3)
	searchRequest.AddFacet("subjects_function", subjectFunctionFacet)

	// Return all fields
	searchRequest.Fields = []string{
		"title",
		"identifier",
		"content_description",
		"content_condition",
		"resource_type",
		"access_restrictions",
		"access_restrictions_note",
		"use_restrictins",
		"use_restrictons_note",
		"dates",
		"date_expression",
		"extents",
		"subjects",
		"subjects_function",
		"subjects_topical",
		"linked_agents_creators",
		"linked_agents_subjects",
		"link_agents_sources",
		"digital_objects.title",
		"digital_objects.file_uris",
		"related_resources",
		"deaccessions",
		"accession_date",
		"created",
	}

	searchResults, err := index.Search(searchRequest)
	if err != nil {
		responseLogger(r, http.StatusInternalServerError, fmt.Errorf("Bleve results error %v, %s", qry, err))
		w.WriteHeader(http.StatusInternalServerError)
		w.Write([]byte(fmt.Sprintf("%s", err)))
		return
	}

	// q (ciat.SearchQuery) performs double duty as both the structure for query submission as well
	// as carring the results to support paging and other types of navigation through
	// the query set. Results are a query with the bleve.SearchReults merged
	q.AttachSearchResults(searchResults)
	pageHTML = "results-search.html"
	pageInclude = "results-search.include"

	// Load my templates and setup to execute them
	tmpl, err := tmplfn.Assemble(tmplFuncs, path.Join(templatesDir, pageHTML), path.Join(templatesDir, pageInclude))
	if err != nil {
		responseLogger(r, http.StatusInternalServerError, fmt.Errorf("Template Errors: %s, %s, %s\n", pageHTML, pageInclude, err))
		w.WriteHeader(http.StatusInternalServerError)
		w.Write([]byte(fmt.Sprintf("Template errors: %s", err)))
		return
	}
	// Render the page
	w.Header().Set("Content-Type", "text/html")
	var buf bytes.Buffer
	err = tmpl.Execute(&buf, q)
	//err = tmpl.Execute(w, q)
	if err != nil {
		responseLogger(r, http.StatusInternalServerError, fmt.Errorf("Can't render %s, %s/%s, %s", templatesDir, pageHTML, pageInclude, err))
		w.WriteHeader(http.StatusInternalServerError)
		w.Write([]byte("Template error"))
		return
	}
	//NOTE: This bit of ugliness is here because I need to allow <mark> elements and ellipis in the results fragments
	w.Write(bytes.Replace(bytes.Replace(bytes.Replace(buf.Bytes(), []byte("&lt;mark&gt;"), []byte("<mark>"), -1), []byte("&lt;/mark&gt;"), []byte("</mark>"), -1), []byte(`…`), []byte(`&hellip;`), -1))
}
Beispiel #6
0
func Example_beleveSearch() {
	// open a new index
	indexMapping := bleve.NewIndexMapping()

	err := indexMapping.AddCustomTokenizer("jieba",
		map[string]interface{}{
			"file": "../dict.txt",
			"type": "jieba",
		})
	if err != nil {
		log.Fatal(err)
	}

	// create a custom analyzer
	err = indexMapping.AddCustomAnalyzer("jieba",
		map[string]interface{}{
			"type":      "custom",
			"tokenizer": "jieba",
			"token_filters": []string{
				"possessive_en",
				"to_lower",
				"stop_en",
			},
		})

	if err != nil {
		log.Fatal(err)
	}

	indexMapping.DefaultAnalyzer = "jieba"
	cacheDir := "jieba.beleve"
	os.RemoveAll(cacheDir)
	index, err := bleve.New(cacheDir, indexMapping)

	if err != nil {
		log.Fatal(err)
	}

	docs := []struct {
		Title string
		Name  string
	}{
		{
			Title: "Doc 1",
			Name:  "This is the first document we’ve added",
		},
		{
			Title: "Doc 2",
			Name:  "The second one 你 中文测试中文 is even more interesting! 吃水果",
		},
		{
			Title: "Doc 3",
			Name:  "买水果然后来世博园。",
		},
		{
			Title: "Doc 4",
			Name:  "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
		},
		{
			Title: "Doc 5",
			Name:  "咱俩交换一下吧。",
		},
	}
	// index docs
	for _, doc := range docs {
		index.Index(doc.Title, doc)
	}

	// search for some text
	for _, keyword := range []string{"水果世博园", "你", "first", "中文", "交换机", "交换"} {
		query := bleve.NewQueryStringQuery(keyword)
		search := bleve.NewSearchRequest(query)
		search.Highlight = bleve.NewHighlight()
		searchResults, err := index.Search(search)
		if err != nil {
			log.Fatal(err)
		}
		fmt.Printf("Result of \"%s\": %d matches:\n", keyword, searchResults.Total)
		for i, hit := range searchResults.Hits {
			rv := fmt.Sprintf("%d. %s, (%f)\n", i+searchResults.Request.From+1, hit.ID, hit.Score)
			for fragmentField, fragments := range hit.Fragments {
				rv += fmt.Sprintf("%s: ", fragmentField)
				for _, fragment := range fragments {
					rv += fmt.Sprintf("%s", fragment)
				}
			}
			fmt.Printf("%s\n", rv)
		}
	}
	// Output:
	// Result of "水果世博园": 2 matches:
	// 1. Doc 3, (1.099550)
	// Name: 买<span class="highlight">水果</span>然后来<span class="highlight">世博</span>园。
	// 2. Doc 2, (0.031941)
	// Name: The second one 你 中文测试中文 is even more interesting! 吃<span class="highlight">水果</span>
	// Result of "你": 1 matches:
	// 1. Doc 2, (0.391161)
	// Name: The second one <span class="highlight">你</span> 中文测试中文 is even more interesting! 吃水果
	// Result of "first": 1 matches:
	// 1. Doc 1, (0.512150)
	// Name: This is the <span class="highlight">first</span> document we’ve added
	// Result of "中文": 1 matches:
	// 1. Doc 2, (0.553186)
	// Name: The second one 你 <span class="highlight">中文</span>测试<span class="highlight">中文</span> is even more interesting! 吃水果
	// Result of "交换机": 2 matches:
	// 1. Doc 4, (0.608495)
	// Name: 工信处女干事每月经过下属科室都要亲口交代24口<span class="highlight">交换机</span>等技术性器件的安装工作
	// 2. Doc 5, (0.086700)
	// Name: 咱俩<span class="highlight">交换</span>一下吧。
	// Result of "交换": 2 matches:
	// 1. Doc 5, (0.534158)
	// Name: 咱俩<span class="highlight">交换</span>一下吧。
	// 2. Doc 4, (0.296297)
	// Name: 工信处女干事每月经过下属科室都要亲口交代24口<span class="highlight">交换</span>机等技术性器件的安装工作
}