Beispiel #1
0
// BuildArticleMapping returns a mapping for indexing wikipedia articles
// in a manner similar to that done by lucene nightly benchmarks
func BuildArticleMapping() *bleve.IndexMapping {

	// a generic reusable mapping for english text
	standardJustIndexed := bleve.NewTextFieldMapping()
	standardJustIndexed.Store = false
	standardJustIndexed.IncludeInAll = false
	standardJustIndexed.IncludeTermVectors = false
	standardJustIndexed.Analyzer = "standard"

	keywordJustIndexed := bleve.NewTextFieldMapping()
	keywordJustIndexed.Store = false
	keywordJustIndexed.IncludeInAll = false
	keywordJustIndexed.IncludeTermVectors = false
	keywordJustIndexed.Analyzer = "keyword"

	articleMapping := bleve.NewDocumentMapping()

	// title
	articleMapping.AddFieldMappingsAt("title",
		keywordJustIndexed)

	// text
	articleMapping.AddFieldMappingsAt("text",
		standardJustIndexed)

	// _all (disabled)
	disabledSection := bleve.NewDocumentDisabledMapping()
	articleMapping.AddSubDocumentMapping("_all", disabledSection)

	indexMapping := bleve.NewIndexMapping()
	indexMapping.DefaultMapping = articleMapping
	indexMapping.DefaultAnalyzer = "standard"

	return indexMapping
}
func buildIndexMapping() *bleve.IndexMapping {

	enTextFieldMapping := bleve.NewTextFieldMapping()
	enTextFieldMapping.Analyzer = textFieldAnalyzer

	storeFieldOnlyMapping := bleve.NewTextFieldMapping()
	storeFieldOnlyMapping.Index = false
	storeFieldOnlyMapping.IncludeTermVectors = false
	storeFieldOnlyMapping.IncludeInAll = false

	dateTimeMapping := bleve.NewDateTimeFieldMapping()

	wikiMapping := bleve.NewDocumentMapping()
	wikiMapping.AddFieldMappingsAt("name", enTextFieldMapping)
	wikiMapping.AddFieldMappingsAt("body", enTextFieldMapping)
	wikiMapping.AddFieldMappingsAt("modified_by", enTextFieldMapping)
	wikiMapping.AddFieldMappingsAt("modified_by_name", enTextFieldMapping)
	wikiMapping.AddFieldMappingsAt("modified_by_email", enTextFieldMapping)
	wikiMapping.AddFieldMappingsAt("modified_by_avatar", storeFieldOnlyMapping)
	wikiMapping.AddFieldMappingsAt("modified", dateTimeMapping)

	indexMapping := bleve.NewIndexMapping()
	indexMapping.AddDocumentMapping("wiki", wikiMapping)

	indexMapping.DefaultAnalyzer = textFieldAnalyzer

	return indexMapping
}
Beispiel #3
0
func Init(allDocs GetAllDocsFn) {
	var err error

	idx_path := path.Join(config.Current.DataDir, index_name)

	index, err = bleve.Open(idx_path)
	if err == bleve.ErrorIndexPathDoesNotExist {
		log.Info("Creating new Index")
		indexMapping := bleve.NewIndexMapping()
		indexMapping.DefaultAnalyzer = config.Current.DefaultAnalyzer

		entryMapping := bleve.NewDocumentMapping()

		textField := bleve.NewTextFieldMapping()

		entryMapping.AddFieldMappingsAt("Body", textField)
		entryMapping.AddFieldMappingsAt("Title", textField)

		tagField := bleve.NewTextFieldMapping()
		tagField.Analyzer = tag_analyzer

		entryMapping.AddFieldMappingsAt("Tags", tagField)

		indexMapping.AddDocumentMapping("entry", entryMapping)

		index, err = bleve.New(idx_path, indexMapping)
		if err != nil {
			log.Fatal(err)
		}

		// reindex existing documents
		indexRebuildingLogged := false
		for itm := range allDocs() {
			if !indexRebuildingLogged {
				indexRebuildingLogged = true
				log.Info("Start rebuilding Search-Index")
			}

			index.Index(itm.Id(), itm.Content())
		}
		if indexRebuildingLogged {
			log.Info("Finished rebuilding Search-Index")
		}

	} else if err == nil {
		log.Info("Opening existing Index")
	} else {
		log.Fatal(err)
	}
}
Beispiel #4
0
func buildIndexMapping() (*bleve.IndexMapping, error) {

	// a generic reusable mapping for english text
	englishTextFieldMapping := bleve.NewTextFieldMapping()
	englishTextFieldMapping.Analyzer = "en"

	// a generic reusable mapping for keyword text
	keywordFieldMapping := bleve.NewTextFieldMapping()
	keywordFieldMapping.Analyzer = "keyword"

	// a specific mapping to index the description fields
	// detected language
	descriptionLangFieldMapping := bleve.NewTextFieldMapping()
	descriptionLangFieldMapping.Name = "descriptionLang"
	descriptionLangFieldMapping.Analyzer = "detect_lang"
	descriptionLangFieldMapping.Store = false
	descriptionLangFieldMapping.IncludeTermVectors = false
	descriptionLangFieldMapping.IncludeInAll = false

	beerMapping := bleve.NewDocumentMapping()

	// name
	beerMapping.AddFieldMappingsAt("name", englishTextFieldMapping)

	// description
	beerMapping.AddFieldMappingsAt("description",
		englishTextFieldMapping,
		descriptionLangFieldMapping)

	beerMapping.AddFieldMappingsAt("type", keywordFieldMapping)
	beerMapping.AddFieldMappingsAt("style", keywordFieldMapping)
	beerMapping.AddFieldMappingsAt("category", keywordFieldMapping)

	breweryMapping := bleve.NewDocumentMapping()
	breweryMapping.AddFieldMappingsAt("name", englishTextFieldMapping)
	breweryMapping.AddFieldMappingsAt("description", englishTextFieldMapping)

	indexMapping := bleve.NewIndexMapping()
	indexMapping.AddDocumentMapping("beer", beerMapping)
	indexMapping.AddDocumentMapping("brewery", breweryMapping)

	indexMapping.TypeField = "type"
	indexMapping.DefaultAnalyzer = "en"

	return indexMapping, nil
}
Beispiel #5
0
func buildIndexMapping(language string) *bleve.IndexMapping {
	txtMapping := bleve.NewTextFieldMapping()
	txtMapping.Analyzer = language

	storeFieldOnlyMapping := bleve.NewTextFieldMapping()
	storeFieldOnlyMapping.Index = false
	storeFieldOnlyMapping.IncludeTermVectors = false
	storeFieldOnlyMapping.IncludeInAll = false

	docMapping := bleve.NewDocumentMapping()
	docMapping.AddSubDocumentMapping("id", bleve.NewDocumentDisabledMapping())
	docMapping.AddFieldMappingsAt("content", txtMapping)
	docMapping.AddFieldMappingsAt("title", txtMapping)
	docMapping.AddFieldMappingsAt("data", storeFieldOnlyMapping)

	mapping := bleve.NewIndexMapping()
	mapping.AddDocumentMapping("doc", docMapping)
	mapping.DefaultAnalyzer = language
	return mapping
}
Beispiel #6
0
func buildIndexMapping() (*bleve.IndexMapping, error) {
	var err error

	// Create the index mapping, configure the analyzer, and set as default.
	indexMapping := bleve.NewIndexMapping()
	err = indexMapping.AddCustomTokenizer("ekanite_tk",
		map[string]interface{}{
			"regexp": `[^\W_]+`,
			"type":   regexp_tokenizer.Name,
		})
	if err != nil {
		return nil, err
	}

	err = indexMapping.AddCustomAnalyzer("ekanite",
		map[string]interface{}{
			"type":          custom_analyzer.Name,
			"char_filters":  []interface{}{},
			"tokenizer":     `ekanite_tk`,
			"token_filters": []interface{}{`to_lower`},
		})
	if err != nil {
		return nil, err
	}
	indexMapping.DefaultAnalyzer = "ekanite"

	// Create field-specific mappings.

	simpleJustIndexed := bleve.NewTextFieldMapping()
	simpleJustIndexed.Store = false
	simpleJustIndexed.IncludeInAll = true // XXX Move to false when using AST
	simpleJustIndexed.IncludeTermVectors = false

	timeJustIndexed := bleve.NewDateTimeFieldMapping()
	timeJustIndexed.Store = false
	timeJustIndexed.IncludeInAll = false
	timeJustIndexed.IncludeTermVectors = false

	articleMapping := bleve.NewDocumentMapping()

	// Connect field mappings to fields.
	articleMapping.AddFieldMappingsAt("Message", simpleJustIndexed)
	articleMapping.AddFieldMappingsAt("ReferenceTime", timeJustIndexed)
	articleMapping.AddFieldMappingsAt("ReceptionTime", timeJustIndexed)

	// Tell the index about field mappings.
	indexMapping.DefaultMapping = articleMapping

	return indexMapping, nil
}
Beispiel #7
0
func buildIndexMapping() (*bleve.IndexMapping, error) {

	// a generic reusable mapping for english text
	englishTextFieldMapping := bleve.NewTextFieldMapping()
	englishTextFieldMapping.Analyzer = en.AnalyzerName

	// a generic reusable mapping for keyword text
	keywordFieldMapping := bleve.NewTextFieldMapping()
	keywordFieldMapping.Analyzer = keyword_analyzer.Name

	beerMapping := bleve.NewDocumentMapping()

	// name
	beerMapping.AddFieldMappingsAt("name", englishTextFieldMapping)

	// description
	beerMapping.AddFieldMappingsAt("description",
		englishTextFieldMapping)

	beerMapping.AddFieldMappingsAt("type", keywordFieldMapping)
	beerMapping.AddFieldMappingsAt("style", keywordFieldMapping)
	beerMapping.AddFieldMappingsAt("category", keywordFieldMapping)

	breweryMapping := bleve.NewDocumentMapping()
	breweryMapping.AddFieldMappingsAt("name", englishTextFieldMapping)
	breweryMapping.AddFieldMappingsAt("description", englishTextFieldMapping)

	indexMapping := bleve.NewIndexMapping()
	indexMapping.AddDocumentMapping("beer", beerMapping)
	indexMapping.AddDocumentMapping("brewery", breweryMapping)

	indexMapping.TypeField = "type"
	indexMapping.DefaultAnalyzer = "en"

	return indexMapping, nil
}
Beispiel #8
0
func NewSearchIndex(repo content.Repo, config Config, logger webfw.Logger) (SearchIndex, error) {
	var err error
	var index bleve.Index

	si := SearchIndex{}

	_, err = os.Stat(config.SearchIndex.BlevePath)
	if err == nil {
		logger.Infoln("Opening search index " + config.SearchIndex.BlevePath)
		index, err = bleve.Open(config.SearchIndex.BlevePath)

		if err != nil {
			return EmptySearchIndex, errors.New(fmt.Sprintf("Error opening search index: %v\n", err))
		}
	} else if os.IsNotExist(err) {
		mapping := bleve.NewIndexMapping()
		docMapping := bleve.NewDocumentMapping()

		idfieldmapping := bleve.NewTextFieldMapping()
		idfieldmapping.IncludeInAll = false
		docMapping.AddFieldMappingsAt("FeedId", idfieldmapping)
		docMapping.AddFieldMappingsAt("ArticleId", idfieldmapping)

		mapping.AddDocumentMapping(mapping.DefaultType, docMapping)

		logger.Infoln("Creating search index " + config.SearchIndex.BlevePath)
		index, err = bleve.New(config.SearchIndex.BlevePath, mapping)

		if err != nil {
			return EmptySearchIndex, errors.New(fmt.Sprintf("Error creating search index: %v\n", err))
		}

		si.newIndex = true
	} else {
		return EmptySearchIndex, errors.New(
			fmt.Sprintf("Error getting stat of '%s': %v\n", config.SearchIndex.BlevePath, err))
	}

	si.logger = logger
	si.repo = repo
	si.Index = index
	si.batchSize = config.SearchIndex.BatchSize

	return si, nil
}
Beispiel #9
0
func buildIndexMapping() *bleve.IndexMapping {
	// a generic reusable mapping for keyword text
	keywordFieldMapping := bleve.NewTextFieldMapping()
	keywordFieldMapping.Analyzer = "keyword"

	torrentMapping := bleve.NewDocumentMapping()
	torrentMapping.AddFieldMappingsAt("category", keywordFieldMapping)

	im := bleve.NewIndexMapping()

	im.AddDocumentMapping("torrent", torrentMapping)
	im.TypeField = "type"
	im.DefaultAnalyzer = "standard"
	im.DefaultType = "torrent"
	im.DefaultField = "name"

	return im
}
Beispiel #10
0
func buildIndexMapping() (*bleve.IndexMapping, error) {
	zlmapping := bleve.NewDocumentMapping()
	fulltext := bleve.NewTextFieldMapping()
	fulltext.Name = "fulltext"
	fulltext.Analyzer = "gojieba"

	zlmapping.AddFieldMappingsAt("F_ApplicationNo", fulltext)
	zlmapping.AddFieldMappingsAt("F_PatentName", fulltext)
	zlmapping.AddFieldMappingsAt("F_Inventor", fulltext)
	zlmapping.AddFieldMappingsAt("F_Applicant", fulltext)
	zlmapping.AddFieldMappingsAt("F_AreaCode", fulltext)
	zlmapping.AddFieldMappingsAt("F_ApplicantAddress", fulltext)
	zlmapping.AddFieldMappingsAt("W_WoAppCode", fulltext)
	zlmapping.AddFieldMappingsAt("F_Abstract", fulltext)
	zlmapping.AddFieldMappingsAt("F_Cite", fulltext)
	zlmapping.AddFieldMappingsAt("W_ReprintPatent", fulltext)
	zlmapping.AddFieldMappingsAt("F_SignoryItem", fulltext)
	zlmapping.AddFieldMappingsAt("F_PatentName", fulltext)
	indexMapping.AddDocumentMapping("zl", zlmapping)
	return indexMapping, nil
}
Beispiel #11
0
func NewBleve(path string, size int64, logger webfw.Logger) (content.SearchProvider, error) {
	var err error
	var exists bool
	var index bleve.Index

	_, err = os.Stat(path)
	if err == nil {
		logger.Infoln("Opening search index " + path)
		index, err = bleve.Open(path)

		if err != nil {
			return nil, errors.New(fmt.Sprintf("Error opening search index: %v\n", err))
		}

		exists = true
	} else if os.IsNotExist(err) {
		mapping := bleve.NewIndexMapping()
		docMapping := bleve.NewDocumentMapping()

		idfieldmapping := bleve.NewTextFieldMapping()
		idfieldmapping.IncludeInAll = false
		docMapping.AddFieldMappingsAt("FeedId", idfieldmapping)
		docMapping.AddFieldMappingsAt("ArticleId", idfieldmapping)

		mapping.AddDocumentMapping(mapping.DefaultType, docMapping)

		logger.Infoln("Creating search index " + path)
		index, err = bleve.NewUsing(path, mapping, upside_down.Name, goleveldb.Name, nil)

		if err != nil {
			return nil, errors.New(fmt.Sprintf("Error creating search index: %v\n", err))
		}
	} else {
		return nil, errors.New(
			fmt.Sprintf("Error getting stat of '%s': %v\n", path, err))
	}

	return &Bleve{logger: logger, index: index, batchSize: size, newIndex: !exists}, nil
}
Beispiel #12
0
func openIndex(name string) (bleve.Index, error) {
	textFieldMapping := bleve.NewTextFieldMapping()

	doc := bleve.NewDocumentMapping()
	doc.AddFieldMappingsAt("name", textFieldMapping)
	doc.AddFieldMappingsAt("body", textFieldMapping)
	doc.AddFieldMappingsAt("modied", textFieldMapping)

	indexMap := bleve.NewIndexMapping()
	indexMap.AddDocumentMapping("document", doc)

	blv, err := bleve.New(name, indexMap)

	if err != nil {
		blv, err = bleve.Open(name)
		if err != nil {
			return nil, err
		}
	}

	return blv, nil
}
Beispiel #13
0
func buildIndexMapping() *bleve.IndexMapping {
	// a generic reusable mapping for english text
	englishTextFieldMapping := bleve.NewTextFieldMapping()
	englishTextFieldMapping.Analyzer = en.AnalyzerName

	// RecipeLink
	linkMapping := bleve.NewDocumentMapping()
	linkMapping.AddFieldMappingsAt("title", englishTextFieldMapping)
	linkMapping.AddFieldMappingsAt("description", englishTextFieldMapping)
	linkMapping.AddFieldMappingsAt("url", englishTextFieldMapping)

	// Recipe
	recipeMapping := bleve.NewDocumentMapping()
	recipeMapping.AddFieldMappingsAt("name", englishTextFieldMapping)
	recipeMapping.AddFieldMappingsAt("description", englishTextFieldMapping)
	recipeMapping.AddFieldMappingsAt("author", englishTextFieldMapping)

	indexMapping := bleve.NewIndexMapping()
	indexMapping.DefaultAnalyzer = "en"
	indexMapping.AddDocumentMapping("title", linkMapping)
	//indexMapping.AddDocumentMapping("recipe", recipeMapping)

	return indexMapping
}
Beispiel #14
0
func NewOfferIndex(dir string) (bleve.Index, error) {
	err := os.RemoveAll(dir)
	if err != nil && !os.IsNotExist(err) {
		return nil, err
	}

	parts := []string{}
	for _, exc := range indexExceptions {
		parts = append(parts, regexp.QuoteMeta(exc))
	}
	pattern := strings.Join(parts, "|")
	pattern = "(?i)(?:" + pattern + ")"

	m := bleve.NewIndexMapping()
	apecTokenizer := "apec"
	err = m.AddCustomTokenizer(apecTokenizer, map[string]interface{}{
		"type":       exception.Name,
		"exceptions": []string{pattern},
		"tokenizer":  bleveuni.Name,
	})
	if err != nil {
		return nil, err
	}

	apecTokens := "apec_tokens"
	err = m.AddCustomTokenMap(apecTokens, map[string]interface{}{
		"type":   tokenmap.Name,
		"tokens": stopWords,
	})
	if err != nil {
		return nil, err
	}

	apecStop := "apec_stop"
	err = m.AddCustomTokenFilter(apecStop, map[string]interface{}{
		"type":           stop.Name,
		"stop_token_map": apecTokens,
	})
	if err != nil {
		return nil, err
	}

	frTokens := []string{
		lowercase.Name,
		fr.ElisionName,
		fr.StopName,
		fr.LightStemmerName,
		apecStop,
	}
	fr := map[string]interface{}{
		"type":          custom.Name,
		"tokenizer":     apecTokenizer,
		"token_filters": frTokens,
	}
	frHtml := map[string]interface{}{
		"type": custom.Name,
		"char_filters": []string{
			html.Name,
		},
		"tokenizer":     apecTokenizer,
		"token_filters": frTokens,
	}
	err = m.AddCustomAnalyzer("fr", fr)
	if err != nil {
		return nil, fmt.Errorf("failed to register analyzer fr: %s", err)
	}
	err = m.AddCustomAnalyzer("fr_html", frHtml)
	if err != nil {
		return nil, fmt.Errorf("failed to register analyzer fr_html: %s", err)
	}

	htmlFr := bleve.NewTextFieldMapping()
	htmlFr.Store = false
	htmlFr.IncludeInAll = false
	htmlFr.IncludeTermVectors = false
	htmlFr.Analyzer = "fr_html"

	textFr := bleve.NewTextFieldMapping()
	textFr.Store = false
	textFr.IncludeInAll = false
	textFr.IncludeTermVectors = false
	textFr.Analyzer = "fr"

	textAll := bleve.NewTextFieldMapping()
	textAll.Store = false
	textAll.IncludeInAll = true
	textAll.IncludeTermVectors = false

	date := bleve.NewDateTimeFieldMapping()
	date.Index = false
	date.Store = true
	date.IncludeInAll = false
	date.IncludeTermVectors = false

	offer := bleve.NewDocumentStaticMapping()
	offer.Dynamic = false
	offer.AddFieldMappingsAt("html", htmlFr)
	offer.AddFieldMappingsAt("title", textFr)
	offer.AddFieldMappingsAt("date", date)

	m.AddDocumentMapping("offer", offer)
	m.DefaultMapping = offer

	index, err := bleve.NewUsing(dir, m, upsidedown.Name, boltdb.Name,
		map[string]interface{}{
			"nosync": true,
		})
	if err != nil {
		return nil, err
	}
	return index, nil
}
Beispiel #15
0
func main() {

	flag.Parse()

	if *path == "" {
		log.Fatal("provide a zim file path")
	}

	z, err := zim.NewReader(*path, false)
	if err != nil {
		log.Fatal(err)
	}

	if *indexPath == "" {
		log.Fatal("Please provide a path for the index")
	}

	switch *lang {
	case "en":
		//TODO: create a simple language support for stop word
	default:
		log.Fatal("unsupported language")
	}

	mapping := bleve.NewIndexMapping()
	mapping.DefaultType = "Article"

	articleMapping := bleve.NewDocumentMapping()
	mapping.AddDocumentMapping("Article", articleMapping)

	titleMapping := bleve.NewTextFieldMapping()
	titleMapping.Store = false
	titleMapping.Index = true
	titleMapping.Analyzer = "standard"
	articleMapping.AddFieldMappingsAt("Title", titleMapping)

	fmt.Println(registry.AnalyzerTypesAndInstances())

	index, err := bleve.New(*indexPath, mapping)
	if err != nil {
		log.Fatal(err)
	}

	i := 0

	batch := index.NewBatch()
	batchCount := 0
	idoc := ArticleIndex{}

	divisor := float64(z.ArticleCount) / 100

	z.ListTitlesPtrIterator(func(idx uint32) {

		if i%*batchSize == 0 {
			fmt.Printf("%.2f%% done\n", float64(i)/divisor)
		}
		a, err := z.ArticleAtURLIdx(idx)
		if err != nil || a.EntryType == zim.DeletedEntry {
			i++
			return
		}

		if a.Namespace == 'A' {
			idoc.Title = a.Title
			// index the idoc with the idx as key
			batch.Index(fmt.Sprint(idx), idoc)
			batchCount++
		}

		i++

		// send a batch to bleve
		if batchCount >= *batchSize {
			err = index.Batch(batch)
			if err != nil {
				log.Fatal(err.Error())
			}
			batch = index.NewBatch()
			batchCount = 0
		}
	})

	// batch the rest
	if batchCount > 0 {
		err = index.Batch(batch)
		if err != nil {
			log.Fatal(err.Error())
		}
	}

	index.Close()

}
Beispiel #16
0
// Run index++ to generate a (new) swish++ index file.
func GenerateIndex() error {
	// TODO handle multiple fields, i.e. the main text, @source, @type, tags, etc.
	newIndex := false
	// Try to open an existing index or create a new one if none exists.
	index, err := openIndex()
	if err != nil {
		enTextMapping := bleve.NewTextFieldMapping()
		enTextMapping.Analyzer = "en"

		simpleMapping := bleve.NewTextFieldMapping()
		simpleMapping.Analyzer = simple_analyzer.Name

		typeMapping := bleve.NewTextFieldMapping()
		typeMapping.Analyzer = keyword_analyzer.Name

		scrollMapping := bleve.NewDocumentMapping()
		scrollMapping.AddFieldMappingsAt("id", simpleMapping)
		scrollMapping.AddFieldMappingsAt("content", enTextMapping)
		scrollMapping.AddFieldMappingsAt("type", typeMapping)
		scrollMapping.AddFieldMappingsAt("source", simpleMapping)
		scrollMapping.AddFieldMappingsAt("tag", simpleMapping)
		scrollMapping.AddFieldMappingsAt("hidden", simpleMapping)
		scrollMapping.AddFieldMappingsAt("other", simpleMapping)

		mapping := bleve.NewIndexMapping()
		mapping.DefaultAnalyzer = "en"
		mapping.DefaultMapping = scrollMapping

		index, err = bleve.New(Config.AlexandriaDirectory+"bleve", mapping)
		if err != nil {
			panic(err)
		}
		newIndex = true
	}
	defer index.Close()

	files, err := ioutil.ReadDir(Config.KnowledgeDirectory)
	if err != nil {
		return err
	}

	indexUpdateFile := Config.AlexandriaDirectory + "index_updated"
	indexUpdateTime, err := getModTime(indexUpdateFile)
	if err != nil {
		LogError(err)
		return nil
	}
	// Save the time of this indexing operation
	_ = touch(Config.AlexandriaDirectory + "index_updated")

	batch := index.NewBatch()
	for _, file := range files {
		// Check whether the scroll is newer than the index.
		modTime, err := getModTime(Config.KnowledgeDirectory + file.Name())
		if err != nil {
			LogError(err)
			continue
		}
		id := strings.TrimSuffix(file.Name(), ".tex")
		if modTime < indexUpdateTime && !newIndex {
			continue
		}

		// Load and parse the scroll content
		contentBytes, err := ioutil.ReadFile(Config.KnowledgeDirectory + file.Name())
		TryLogError(err)
		content := string(contentBytes)
		scroll := Parse(id, content)

		batch.Index(id, scroll)
	}
	index.Batch(batch)

	return nil
}
Beispiel #17
0
func buildIndexMapping() (*bleve.IndexMapping, error) {

	// a custom field definition that uses our custom analyzer
	edgeNgram325FieldMapping := bleve.NewTextFieldMapping()
	edgeNgram325FieldMapping.Analyzer = "enWithEdgeNgram325"

	// a generic reusable mapping for english text
	englishTextFieldMapping := bleve.NewTextFieldMapping()
	englishTextFieldMapping.Analyzer = "en"

	// a generic reusable mapping for keyword text
	keywordFieldMapping := bleve.NewTextFieldMapping()
	keywordFieldMapping.Analyzer = "keyword"

	// a specific mapping to index the description fields
	// detected language
	descriptionLangFieldMapping := bleve.NewTextFieldMapping()
	descriptionLangFieldMapping.Name = "descriptionLang"
	descriptionLangFieldMapping.Analyzer = "detect_lang"
	descriptionLangFieldMapping.Store = false
	descriptionLangFieldMapping.IncludeTermVectors = false
	descriptionLangFieldMapping.IncludeInAll = false

	beerMapping := bleve.NewDocumentMapping()

	// name
	beerMapping.AddFieldMappingsAt("name", englishTextFieldMapping)

	// description
	beerMapping.AddFieldMappingsAt("description",
		edgeNgram325FieldMapping,
		descriptionLangFieldMapping)

	beerMapping.AddFieldMappingsAt("type", keywordFieldMapping)
	beerMapping.AddFieldMappingsAt("style", keywordFieldMapping)
	beerMapping.AddFieldMappingsAt("category", keywordFieldMapping)

	breweryMapping := bleve.NewDocumentMapping()
	breweryMapping.AddFieldMappingsAt("name", englishTextFieldMapping)
	breweryMapping.AddFieldMappingsAt("description", englishTextFieldMapping)

	indexMapping := bleve.NewIndexMapping()
	indexMapping.AddDocumentMapping("beer", beerMapping)
	indexMapping.AddDocumentMapping("brewery", breweryMapping)

	indexMapping.TypeField = "type"
	indexMapping.DefaultAnalyzer = textFieldAnalyzer

	err := indexMapping.AddCustomTokenFilter("edgeNgram325",
		map[string]interface{}{
			"type": "edge_ngram",
			"min":  3.0,
			"max":  25.0,
		})
	if err != nil {
		return nil, err
	}

	err = indexMapping.AddCustomAnalyzer("enWithEdgeNgram325",
		map[string]interface{}{
			"type":      "custom",
			"tokenizer": "unicode",
			"token_filters": []string{
				"possessive_en",
				"to_lower",
				"stop_en",
				"edgeNgram325",
			},
		})
	if err != nil {
		return nil, err
	}

	return indexMapping, nil
}
Beispiel #18
0
func getIndex(indexName string) (bleve.Index, error) {
	if _, err := os.Stat(indexName); os.IsNotExist(err) {
		log.Printf("Creating Bleve index at %s\n", indexName)

		log.Println("Setting up index...")
		indexMapping := bleve.NewIndexMapping()
		// Add Accession as a specific document map
		accessionMapping := bleve.NewDocumentMapping()

		// Now add specific accession fields
		titleMapping := bleve.NewTextFieldMapping()
		titleMapping.Analyzer = "en"
		titleMapping.Store = true
		titleMapping.Index = true
		accessionMapping.AddFieldMappingsAt("title", titleMapping)

		descriptionMapping := bleve.NewTextFieldMapping()
		descriptionMapping.Analyzer = "en"
		descriptionMapping.Store = true
		descriptionMapping.Index = true
		accessionMapping.AddFieldMappingsAt("content_description", descriptionMapping)

		subjectsMapping := bleve.NewTextFieldMapping()
		subjectsMapping.Analyzer = "en"
		subjectsMapping.Store = true
		subjectsMapping.Index = true
		subjectsMapping.IncludeTermVectors = true
		accessionMapping.AddFieldMappingsAt("subjects", subjectsMapping)

		subjectsFunctionMapping := bleve.NewTextFieldMapping()
		subjectsFunctionMapping.Analyzer = "en"
		subjectsFunctionMapping.Store = true
		subjectsFunctionMapping.Index = true
		subjectsFunctionMapping.IncludeTermVectors = true
		accessionMapping.AddFieldMappingsAt("subjects_function", subjectsFunctionMapping)

		subjectsTopicalMapping := bleve.NewTextFieldMapping()
		subjectsTopicalMapping.Analyzer = "en"
		subjectsTopicalMapping.Store = true
		subjectsTopicalMapping.Index = true
		subjectsTopicalMapping.IncludeTermVectors = true
		accessionMapping.AddFieldMappingsAt("subjects_topical", subjectsTopicalMapping)

		objectTitleMapping := bleve.NewTextFieldMapping()
		objectTitleMapping.Analyzer = "en"
		objectTitleMapping.Store = true
		objectTitleMapping.Index = false
		accessionMapping.AddFieldMappingsAt("digital_objects.title", objectTitleMapping)

		objectFileURIMapping := bleve.NewTextFieldMapping()
		objectFileURIMapping.Analyzer = ""
		objectFileURIMapping.Store = true
		objectFileURIMapping.Index = false
		accessionMapping.AddFieldMappingsAt("digital_objects.file_uris", objectFileURIMapping)

		extentsMapping := bleve.NewTextFieldMapping()
		extentsMapping.Analyzer = "en"
		extentsMapping.Store = true
		extentsMapping.Index = true
		accessionMapping.AddFieldMappingsAt("extents", extentsMapping)

		accessionDateMapping := bleve.NewTextFieldMapping()
		accessionDateMapping.Analyzer = "en"
		accessionDateMapping.Store = true
		accessionDateMapping.Index = false
		accessionMapping.AddFieldMappingsAt("accession_date", accessionDateMapping)

		datesMapping := bleve.NewTextFieldMapping()
		datesMapping.Store = true
		datesMapping.Index = false
		accessionMapping.AddFieldMappingsAt("date_expression", datesMapping)

		createdMapping := bleve.NewDateTimeFieldMapping()
		createdMapping.Store = true
		createdMapping.Index = false
		accessionMapping.AddFieldMappingsAt("created", createdMapping)

		// Finally add this mapping to the main index mapping
		indexMapping.AddDocumentMapping("accession", accessionMapping)

		index, err := bleve.New(indexName, indexMapping)
		if err != nil {
			return nil, fmt.Errorf("Can't create new bleve index %s, %s", indexName, err)
		}
		return index, nil
	}
	log.Printf("Opening Bleve index at %s", indexName)
	index, err := bleve.OpenUsing(indexName, map[string]interface{}{
		"read_only": false,
	})
	if err != nil {
		return nil, fmt.Errorf("Can't create new bleve index %s, %s", indexName, err)
	}
	return index, nil
}
Beispiel #19
0
func buildIndexMapping() (*bleve.IndexMapping, error) {

	// a custom field definition that uses our custom analyzer
	notTooLongFieldMapping := bleve.NewTextFieldMapping()
	notTooLongFieldMapping.Analyzer = "enNotTooLong"

	// a generic reusable mapping for english text
	englishTextFieldMapping := bleve.NewTextFieldMapping()
	englishTextFieldMapping.Analyzer = en.AnalyzerName

	// a generic reusable mapping for keyword text
	keywordFieldMapping := bleve.NewTextFieldMapping()
	keywordFieldMapping.Analyzer = keyword_analyzer.Name

	// a specific mapping to index the description fields
	// detected language
	descriptionLangFieldMapping := bleve.NewTextFieldMapping()
	descriptionLangFieldMapping.Name = "descriptionLang"
	descriptionLangFieldMapping.Analyzer = detect_lang.AnalyzerName
	descriptionLangFieldMapping.Store = false
	descriptionLangFieldMapping.IncludeTermVectors = false
	descriptionLangFieldMapping.IncludeInAll = false

	beerMapping := bleve.NewDocumentMapping()

	// name
	beerMapping.AddFieldMappingsAt("name", englishTextFieldMapping)

	// description
	beerMapping.AddFieldMappingsAt("description",
		notTooLongFieldMapping,
		descriptionLangFieldMapping)

	beerMapping.AddFieldMappingsAt("type", keywordFieldMapping)
	beerMapping.AddFieldMappingsAt("style", keywordFieldMapping)
	beerMapping.AddFieldMappingsAt("category", keywordFieldMapping)

	breweryMapping := bleve.NewDocumentMapping()
	breweryMapping.AddFieldMappingsAt("name", englishTextFieldMapping)
	breweryMapping.AddFieldMappingsAt("description", englishTextFieldMapping)

	indexMapping := bleve.NewIndexMapping()
	indexMapping.AddDocumentMapping("beer", beerMapping)
	indexMapping.AddDocumentMapping("brewery", breweryMapping)

	indexMapping.TypeField = "type"
	indexMapping.DefaultAnalyzer = textFieldAnalyzer

	err := indexMapping.AddCustomTokenFilter("notTooLong",
		map[string]interface{}{
			"type":   truncate_token_filter.Name,
			"length": 5.0,
		})
	if err != nil {
		return nil, err
	}

	err = indexMapping.AddCustomAnalyzer("enNotTooLong",
		map[string]interface{}{
			"type":      custom_analyzer.Name,
			"tokenizer": unicode.Name,
			"token_filters": []string{
				"notTooLong",
				en.PossessiveName,
				lower_case_filter.Name,
				en.StopName,
				porter.Name,
			},
		})
	if err != nil {
		return nil, err
	}

	return indexMapping, nil
}