func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) { fieldName := getFieldName(pathString, path, fm) options := fm.Options() if fm.Type == "text" { analyzer := fm.analyzerForField(path, context) field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer) context.doc.AddField(field) if !fm.IncludeInAll { context.excludedFromAll = append(context.excludedFromAll, fieldName) } } else if fm.Type == "datetime" { dateTimeFormat := context.im.DefaultDateTimeParser if fm.DateFormat != "" { dateTimeFormat = fm.DateFormat } dateTimeParser := context.im.dateTimeParserNamed(dateTimeFormat) if dateTimeParser != nil { parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString) if err != nil { fm.processTime(parsedDateTime, pathString, path, indexes, context) } } } }
func TestAnalysisBug328(t *testing.T) { cache := registry.NewCache() analyzer, err := cache.AnalyzerNamed(standard_analyzer.Name) if err != nil { t.Fatal(err) } analysisQueue := index.NewAnalysisQueue(1) idx, err := NewFirestorm(gtreap.Name, nil, analysisQueue) if err != nil { t.Fatal(err) } d := document.NewDocument("1") f := document.NewTextFieldCustom("title", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer) d.AddField(f) f = document.NewTextFieldCustom("body", nil, []byte("bleve"), document.IndexField|document.IncludeTermVectors, analyzer) d.AddField(f) cf := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, []string{}, document.IndexField|document.IncludeTermVectors) d.AddField(cf) rv := idx.Analyze(d) fieldIndexes := make(map[uint16]string) for _, row := range rv.Rows { if row, ok := row.(*FieldRow); ok { fieldIndexes[row.index] = row.Name() } if row, ok := row.(*TermFreqRow); ok && string(row.term) == "bleve" { for _, vec := range row.Vectors() { if vec.GetField() != uint32(row.field) { if fieldIndexes[row.field] != "_all" { t.Errorf("row named %s field %d - vector field %d", fieldIndexes[row.field], row.field, vec.GetField()) } } } } } }
func TestIndexFieldDict(t *testing.T) { defer func() { err := os.RemoveAll("test") if err != nil { t.Fatal(err) } }() s := boltdb.New("test", "bleve") s.SetMergeOperator(&mergeOperator) analysisQueue := NewAnalysisQueue(1) idx := NewUpsideDownCouch(s, analysisQueue) err := idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer func() { err := idx.Close() if err != nil { t.Fatal(err) } }() var expectedCount uint64 doc := document.NewDocument("1") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ doc = document.NewDocument("2") doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer)) doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer)) doc.AddField(document.NewTextFieldCustom("prefix", []uint64{}, []byte("bob cat cats catting dog doggy zoo"), document.IndexField|document.IncludeTermVectors, testAnalyzer)) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ indexReader, err := idx.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Fatal(err) } }() dict, err := indexReader.FieldDict("name") if err != nil { t.Errorf("error creating reader: %v", err) } defer func() { err := dict.Close() if err != nil { t.Fatal(err) } }() termCount := 0 curr, err := dict.Next() for err == nil && curr != nil { termCount++ if curr.Term != "test" { t.Errorf("expected term to be 'test', got '%s'", curr.Term) } curr, err = dict.Next() } if termCount != 1 { t.Errorf("expected 1 term for this field, got %d", termCount) } dict2, err := indexReader.FieldDict("desc") if err != nil { t.Errorf("error creating reader: %v", err) } defer func() { err := dict2.Close() if err != nil { t.Fatal(err) } }() termCount = 0 terms := make([]string, 0) curr, err = dict2.Next() for err == nil && curr != nil { termCount++ terms = append(terms, curr.Term) curr, err = dict2.Next() } if termCount != 3 { t.Errorf("expected 3 term for this field, got %d", termCount) } expectedTerms := []string{"eat", "more", "rice"} if !reflect.DeepEqual(expectedTerms, terms) { t.Errorf("expected %#v, got %#v", expectedTerms, terms) } // test start and end range dict3, err := indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice")) if err != nil { t.Errorf("error creating reader: %v", err) } defer func() { err := dict3.Close() if err != nil { t.Fatal(err) } }() termCount = 0 terms = make([]string, 0) curr, err = dict3.Next() for err == nil && curr != nil { termCount++ terms = append(terms, curr.Term) curr, err = dict3.Next() } if termCount != 1 { t.Errorf("expected 1 term for this field, got %d", termCount) } expectedTerms = []string{"more"} if !reflect.DeepEqual(expectedTerms, terms) { t.Errorf("expected %#v, got %#v", expectedTerms, terms) } // test use case for prefix dict4, err := indexReader.FieldDictPrefix("prefix", []byte("cat")) if err != nil { t.Errorf("error creating reader: %v", err) } defer func() { err := dict4.Close() if err != nil { t.Fatal(err) } }() termCount = 0 terms = make([]string, 0) curr, err = dict4.Next() for err == nil && curr != nil { termCount++ terms = append(terms, curr.Term) curr, err = dict4.Next() } if termCount != 3 { t.Errorf("expected 3 term for this field, got %d", termCount) } expectedTerms = []string{"cat", "cats", "catting"} if !reflect.DeepEqual(expectedTerms, terms) { t.Errorf("expected %#v, got %#v", expectedTerms, terms) } }
func TestIndexFieldReader(t *testing.T) { defer os.RemoveAll("test") store, err := boltdb.Open("test", "bleve") idx := NewUpsideDownCouch(store) err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer idx.Close() var expectedCount uint64 doc := document.NewDocument("1") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ doc = document.NewDocument("2") doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer)) doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer)) doc.AddField(document.NewTextFieldCustom("prefix", []uint64{}, []byte("bob cat cats catting dog doggy zoo"), document.IndexField|document.IncludeTermVectors, testAnalyzer)) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ reader, err := idx.FieldReader("name", nil, nil) if err != nil { t.Errorf("error creating reader: %v", err) } defer reader.Close() termCount := 0 curr, err := reader.Next() for err == nil && curr != nil { termCount++ if curr.Term != "test" { t.Errorf("expected term to be 'test', got '%s'", curr.Term) } curr, err = reader.Next() } if termCount != 1 { t.Errorf("expected 1 term for this field, got %d", termCount) } reader, err = idx.FieldReader("desc", nil, nil) if err != nil { t.Errorf("error creating reader: %v", err) } defer reader.Close() termCount = 0 terms := make([]string, 0) curr, err = reader.Next() for err == nil && curr != nil { termCount++ terms = append(terms, curr.Term) curr, err = reader.Next() } if termCount != 3 { t.Errorf("expected 3 term for this field, got %d", termCount) } expectedTerms := []string{"eat", "more", "rice"} if !reflect.DeepEqual(expectedTerms, terms) { t.Errorf("expected %#v, got %#v", expectedTerms, terms) } // test use case for prefix reader, err = idx.FieldReader("prefix", []byte("cat"), []byte("cat")) if err != nil { t.Errorf("error creating reader: %v", err) } defer reader.Close() termCount = 0 terms = make([]string, 0) curr, err = reader.Next() for err == nil && curr != nil { termCount++ terms = append(terms, curr.Term) curr, err = reader.Next() } if termCount != 3 { t.Errorf("expected 3 term for this field, got %d", termCount) } expectedTerms = []string{"cats", "catting", "cat"} if !reflect.DeepEqual(expectedTerms, terms) { t.Errorf("expected %#v, got %#v", expectedTerms, terms) } }
} } // create a simpler analyzer which will support these tests var testAnalyzer = &analysis.Analyzer{ Tokenizer: regexp_tokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)), } // sets up some mock data used in many tests in this package var twoDocIndexDescIndexingOptions = document.DefaultTextIndexingOptions | document.IncludeTermVectors var twoDocIndexDocs = []*document.Document{ // must have 4/4 beer document.NewDocument("1"). AddField(document.NewTextField("name", []uint64{}, []byte("marty"))). AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)). AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)), // must have 1/4 beer document.NewDocument("2"). AddField(document.NewTextField("name", []uint64{}, []byte("steve"))). AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)). AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)). AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)), // must have 1/4 beer document.NewDocument("3"). AddField(document.NewTextField("name", []uint64{}, []byte("dustin"))). AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)). AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)), // must have 65/65 beer document.NewDocument("4"). AddField(document.NewTextField("name", []uint64{}, []byte("ravi"))).
func TestIndexReader(t *testing.T) { defer func() { err := DestroyTest() if err != nil { t.Fatal(err) } }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) if err != nil { t.Fatal(err) } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } defer func() { err := idx.Close() if err != nil { t.Fatal(err) } }() var expectedCount uint64 doc := document.NewDocument("1") doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ doc = document.NewDocument("2") doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer)) doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.IndexField|document.IncludeTermVectors, testAnalyzer)) err = idx.Update(doc) if err != nil { t.Errorf("Error updating index: %v", err) } expectedCount++ indexReader, err := idx.Reader() if err != nil { t.Error(err) } defer func() { err := indexReader.Close() if err != nil { t.Fatal(err) } }() // first look for a term that doesn't exist reader, err := indexReader.TermFieldReader([]byte("nope"), "name") if err != nil { t.Errorf("Error accessing term field reader: %v", err) } count := reader.Count() if count != 0 { t.Errorf("Expected doc count to be: %d got: %d", 0, count) } err = reader.Close() if err != nil { t.Fatal(err) } reader, err = indexReader.TermFieldReader([]byte("test"), "name") if err != nil { t.Errorf("Error accessing term field reader: %v", err) } expectedCount = 2 count = reader.Count() if count != expectedCount { t.Errorf("Exptected doc count to be: %d got: %d", expectedCount, count) } var match *index.TermFieldDoc var actualCount uint64 match, err = reader.Next() for err == nil && match != nil { match, err = reader.Next() if err != nil { t.Errorf("unexpected error reading next") } actualCount++ } if actualCount != count { t.Errorf("count was 2, but only saw %d", actualCount) } expectedMatch := &index.TermFieldDoc{ ID: "2", Freq: 1, Norm: 0.5773502588272095, Vectors: []*index.TermFieldVector{ &index.TermFieldVector{ Field: "desc", Pos: 3, Start: 9, End: 13, }, }, } tfr, err := indexReader.TermFieldReader([]byte("rice"), "desc") if err != nil { t.Errorf("unexpected error: %v", err) } match, err = tfr.Next() if err != nil { t.Errorf("unexpected error: %v", err) } if !reflect.DeepEqual(expectedMatch, match) { t.Errorf("got %#v, expected %#v", match, expectedMatch) } err = reader.Close() if err != nil { t.Fatal(err) } // now test usage of advance reader, err = indexReader.TermFieldReader([]byte("test"), "name") if err != nil { t.Errorf("Error accessing term field reader: %v", err) } match, err = reader.Advance("2") if err != nil { t.Errorf("unexpected error: %v", err) } if match == nil { t.Fatalf("Expected match, got nil") } if match.ID != "2" { t.Errorf("Expected ID '2', got '%s'", match.ID) } match, err = reader.Advance("3") if err != nil { t.Errorf("unexpected error: %v", err) } if match != nil { t.Errorf("expected nil, got %v", match) } err = reader.Close() if err != nil { t.Fatal(err) } // now test creating a reader for a field that doesn't exist reader, err = indexReader.TermFieldReader([]byte("water"), "doesnotexist") if err != nil { t.Errorf("Error accessing term field reader: %v", err) } count = reader.Count() if count != 0 { t.Errorf("expected count 0 for reader of non-existant field") } match, err = reader.Next() if err != nil { t.Errorf("unexpected error: %v", err) } if match != nil { t.Errorf("expected nil, got %v", match) } match, err = reader.Advance("anywhere") if err != nil { t.Errorf("unexpected error: %v", err) } if match != nil { t.Errorf("expected nil, got %v", match) } }