func (t *TextField) Analyze() (int, analysis.TokenFrequencies) { var tokens analysis.TokenStream if t.analyzer != nil { bytesToAnalyze := t.Value() if t.options.IsStored() { // need to copy bytesCopied := make([]byte, len(bytesToAnalyze)) copy(bytesCopied, bytesToAnalyze) bytesToAnalyze = bytesCopied } tokens = t.analyzer.Analyze(bytesToAnalyze) } else { tokens = analysis.TokenStream{ &analysis.Token{ Start: 0, End: len(t.value), Term: t.value, Position: 1, Type: analysis.AlphaNumeric, }, } } fieldLength := len(tokens) // number of tokens in this doc field tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors()) return fieldLength, tokenFreqs }
func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) { tokens := make(analysis.TokenStream, 0) tokens = append(tokens, &analysis.Token{ Start: 0, End: len(n.value), Term: n.value, Position: 1, Type: analysis.Numeric, }) original, err := n.value.Int64() if err == nil { shift := DefaultPrecisionStep for shift < 64 { shiftEncoded, err := numeric_util.NewPrefixCodedInt64(original, shift) if err != nil { break } token := analysis.Token{ Start: 0, End: len(shiftEncoded), Term: shiftEncoded, Position: 1, Type: analysis.Numeric, } tokens = append(tokens, &token) shift += DefaultPrecisionStep } } fieldLength := len(tokens) tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) return fieldLength, tokenFreqs }
func (b *BooleanField) Analyze() (int, analysis.TokenFrequencies) { tokens := make(analysis.TokenStream, 0) tokens = append(tokens, &analysis.Token{ Start: 0, End: len(b.value), Term: b.value, Position: 1, Type: analysis.Boolean, }) fieldLength := len(tokens) tokenFreqs := analysis.TokenFrequency(tokens, b.arrayPositions, b.options.IncludeTermVectors()) return fieldLength, tokenFreqs }
func BenchmarkAnalysis(b *testing.B) { for i := 0; i < b.N; i++ { cache := registry.NewCache() analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name) if err != nil { b.Fatal(err) } ts := analyzer.Analyze(bleveWikiArticle) freqs := analysis.TokenFrequency(ts, nil, true) if len(freqs) != 511 { b.Errorf("expected %d freqs, got %d", 511, len(freqs)) } } }