Пример #1
0
func BleveMetaExtra(m map[string]interface{}) {
	br := make(map[string]map[string][]string)

	t, i := bleveRegistry.AnalyzerTypesAndInstances()
	br["Analyzer"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.ByteArrayConverterTypesAndInstances()
	br["ByteArrayConverter"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.CharFilterTypesAndInstances()
	br["CharFilter"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.DateTimeParserTypesAndInstances()
	br["DateTimeParser"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.FragmentFormatterTypesAndInstances()
	br["FragmentFormatter"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.FragmenterTypesAndInstances()
	br["Fragmenter"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.HighlighterTypesAndInstances()
	br["Highlighter"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.KVStoreTypesAndInstances()
	br["KVStore"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.TokenFilterTypesAndInstances()
	br["TokenFilter"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.TokenMapTypesAndInstances()
	br["TokenMap"] = map[string][]string{"types": t, "instances": i}
	t, i = bleveRegistry.TokenizerTypesAndInstances()
	br["Tokenizer"] = map[string][]string{"types": t, "instances": i}

	m["regBleve"] = br
}
Пример #2
0
func printRegistry() {
	types, instances := registry.CharFilterTypesAndInstances()
	printType("Char Filter", types, instances)

	types, instances = registry.TokenizerTypesAndInstances()
	printType("Tokenizer", types, instances)

	types, instances = registry.TokenMapTypesAndInstances()
	printType("Token Map", types, instances)

	types, instances = registry.TokenFilterTypesAndInstances()
	printType("Token Filter", types, instances)

	types, instances = registry.AnalyzerTypesAndInstances()
	printType("Analyzer", types, instances)

	types, instances = registry.DateTimeParserTypesAndInstances()
	printType("Date Time Parser", types, instances)

	types, instances = registry.KVStoreTypesAndInstances()
	printType("KV Store", types, instances)

	types, instances = registry.ByteArrayConverterTypesAndInstances()
	printType("ByteArrayConverter", types, instances)

	types, instances = registry.FragmentFormatterTypesAndInstances()
	printType("Fragment Formatter", types, instances)

	types, instances = registry.FragmenterTypesAndInstances()
	printType("Fragmenter", types, instances)

	types, instances = registry.HighlighterTypesAndInstances()
	printType("Highlighter", types, instances)
}
Пример #3
0
func ListAnalyzerNames(w http.ResponseWriter, req *http.Request) {
	indexMapping := bleve.NewIndexMapping()

	// read the request body
	requestBody, err := ioutil.ReadAll(req.Body)
	if err != nil {
		showError(w, req, fmt.Sprintf("error reading request body: %v", err), 400)
		return
	}

	// interpret request body as index mapping
	if len(requestBody) > 0 {
		requestBody, err = CleanseJSON(requestBody)
		if err != nil {
			showError(w, req, fmt.Sprintf("error preparing index mapping: %v", err), 400)
			return
		}
		err = json.Unmarshal(requestBody, &indexMapping)
		if err != nil {
			showError(w, req, fmt.Sprintf("error parsing index mapping: %v", err), 400)
			return
		}
	}

	// built in analyzer names
	_, analyzerNames := registry.AnalyzerTypesAndInstances()
	// add custom analyzer names
	for name := range indexMapping.CustomAnalysis.Analyzers {
		analyzerNames = append(analyzerNames, name)
	}

	sort.Strings(analyzerNames)

	rv := struct {
		Status    string   `json:"status"`
		Analyzers []string `json:"analyzers"`
	}{
		Status:    "ok",
		Analyzers: analyzerNames,
	}
	mustEncode(w, rv)
}
Пример #4
0
		return nil
	},
	Run: func(cmd *cobra.Command, args []string) {
		types, instances := registry.CharFilterTypesAndInstances()
		printType("Char Filter", types, instances)

		types, instances = registry.TokenizerTypesAndInstances()
		printType("Tokenizer", types, instances)

		types, instances = registry.TokenMapTypesAndInstances()
		printType("Token Map", types, instances)

		types, instances = registry.TokenFilterTypesAndInstances()
		printType("Token Filter", types, instances)

		types, instances = registry.AnalyzerTypesAndInstances()
		printType("Analyzer", types, instances)

		types, instances = registry.DateTimeParserTypesAndInstances()
		printType("Date Time Parser", types, instances)

		types, instances = registry.KVStoreTypesAndInstances()
		printType("KV Store", types, instances)

		types, instances = registry.FragmentFormatterTypesAndInstances()
		printType("Fragment Formatter", types, instances)

		types, instances = registry.FragmenterTypesAndInstances()
		printType("Fragmenter", types, instances)

		types, instances = registry.HighlighterTypesAndInstances()
Пример #5
0
func main() {

	flag.Parse()

	if *path == "" {
		log.Fatal("provide a zim file path")
	}

	z, err := zim.NewReader(*path, false)
	if err != nil {
		log.Fatal(err)
	}

	if *indexPath == "" {
		log.Fatal("Please provide a path for the index")
	}

	switch *lang {
	case "en":
		//TODO: create a simple language support for stop word
	default:
		log.Fatal("unsupported language")
	}

	mapping := bleve.NewIndexMapping()
	mapping.DefaultType = "Article"

	articleMapping := bleve.NewDocumentMapping()
	mapping.AddDocumentMapping("Article", articleMapping)

	titleMapping := bleve.NewTextFieldMapping()
	titleMapping.Store = false
	titleMapping.Index = true
	titleMapping.Analyzer = "standard"
	articleMapping.AddFieldMappingsAt("Title", titleMapping)

	fmt.Println(registry.AnalyzerTypesAndInstances())

	index, err := bleve.New(*indexPath, mapping)
	if err != nil {
		log.Fatal(err)
	}

	i := 0

	batch := index.NewBatch()
	batchCount := 0
	idoc := ArticleIndex{}

	divisor := float64(z.ArticleCount) / 100

	z.ListTitlesPtrIterator(func(idx uint32) {

		if i%*batchSize == 0 {
			fmt.Printf("%.2f%% done\n", float64(i)/divisor)
		}
		a, err := z.ArticleAtURLIdx(idx)
		if err != nil || a.EntryType == zim.DeletedEntry {
			i++
			return
		}

		if a.Namespace == 'A' {
			idoc.Title = a.Title
			// index the idoc with the idx as key
			batch.Index(fmt.Sprint(idx), idoc)
			batchCount++
		}

		i++

		// send a batch to bleve
		if batchCount >= *batchSize {
			err = index.Batch(batch)
			if err != nil {
				log.Fatal(err.Error())
			}
			batch = index.NewBatch()
			batchCount = 0
		}
	})

	// batch the rest
	if batchCount > 0 {
		err = index.Batch(batch)
		if err != nil {
			log.Fatal(err.Error())
		}
	}

	index.Close()

}