Ejemplo n.º 1
0
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
	var tokens analysis.TokenStream
	if t.analyzer != nil {
		bytesToAnalyze := t.Value()
		if t.options.IsStored() {
			// need to copy
			bytesCopied := make([]byte, len(bytesToAnalyze))
			copy(bytesCopied, bytesToAnalyze)
			bytesToAnalyze = bytesCopied
		}
		tokens = t.analyzer.Analyze(bytesToAnalyze)
	} else {
		tokens = analysis.TokenStream{
			&analysis.Token{
				Start:    0,
				End:      len(t.value),
				Term:     t.value,
				Position: 1,
				Type:     analysis.AlphaNumeric,
			},
		}
	}
	fieldLength := len(tokens) // number of tokens in this doc field
	tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors())
	return fieldLength, tokenFreqs
}
Ejemplo n.º 2
0
func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) {
	tokens := make(analysis.TokenStream, 0)
	tokens = append(tokens, &analysis.Token{
		Start:    0,
		End:      len(n.value),
		Term:     n.value,
		Position: 1,
		Type:     analysis.Numeric,
	})

	original, err := n.value.Int64()
	if err == nil {

		shift := DefaultPrecisionStep
		for shift < 64 {
			shiftEncoded, err := numeric_util.NewPrefixCodedInt64(original, shift)
			if err != nil {
				break
			}
			token := analysis.Token{
				Start:    0,
				End:      len(shiftEncoded),
				Term:     shiftEncoded,
				Position: 1,
				Type:     analysis.Numeric,
			}
			tokens = append(tokens, &token)
			shift += DefaultPrecisionStep
		}
	}

	fieldLength := len(tokens)
	tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
	return fieldLength, tokenFreqs
}
Ejemplo n.º 3
0
func (b *BooleanField) Analyze() (int, analysis.TokenFrequencies) {
	tokens := make(analysis.TokenStream, 0)
	tokens = append(tokens, &analysis.Token{
		Start:    0,
		End:      len(b.value),
		Term:     b.value,
		Position: 1,
		Type:     analysis.Boolean,
	})

	fieldLength := len(tokens)
	tokenFreqs := analysis.TokenFrequency(tokens, b.arrayPositions, b.options.IncludeTermVectors())
	return fieldLength, tokenFreqs
}
Ejemplo n.º 4
0
func BenchmarkAnalysis(b *testing.B) {
	for i := 0; i < b.N; i++ {

		cache := registry.NewCache()
		analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name)
		if err != nil {
			b.Fatal(err)
		}

		ts := analyzer.Analyze(bleveWikiArticle)
		freqs := analysis.TokenFrequency(ts, nil, true)
		if len(freqs) != 511 {
			b.Errorf("expected %d freqs, got %d", 511, len(freqs))
		}
	}
}