func (udc *UpsideDownCouch) storeField(docID string, field document.Field, fieldIndex uint16) ([]UpsideDownCouchRow, []*BackIndexStoreEntry) { rows := make([]UpsideDownCouchRow, 0, 100) backIndexStoredEntries := make([]*BackIndexStoreEntry, 0) fieldType := encodeFieldType(field) storedRow := NewStoredRow(docID, fieldIndex, field.ArrayPositions(), fieldType, field.Value()) // record the back index entry backIndexStoredEntry := BackIndexStoreEntry{Field: proto.Uint32(uint32(fieldIndex)), ArrayPositions: field.ArrayPositions()} backIndexStoredEntries = append(backIndexStoredEntries, &backIndexStoredEntry) rows = append(rows, storedRow) return rows, backIndexStoredEntries }
func TestRows(t *testing.T) { tests := []struct { input UpsideDownCouchRow outKey []byte outVal []byte }{ { NewVersionRow(1), []byte{'v'}, []byte{0x1}, }, { NewFieldRow(0, "name"), []byte{'f', 0, 0}, []byte{'n', 'a', 'm', 'e', ByteSeparator}, }, { NewFieldRow(1, "desc"), []byte{'f', 1, 0}, []byte{'d', 'e', 's', 'c', ByteSeparator}, }, { NewFieldRow(513, "style"), []byte{'f', 1, 2}, []byte{'s', 't', 'y', 'l', 'e', ByteSeparator}, }, { NewDictionaryRow([]byte{'b', 'e', 'e', 'r'}, 0, 27), []byte{'d', 0, 0, 'b', 'e', 'e', 'r'}, []byte{27}, }, { NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "catz", 3, 3.14), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'c', 'a', 't', 'z'}, []byte{3, 195, 235, 163, 130, 4}, }, { NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 195, 235, 163, 130, 4}, }, { NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14, []*TermVector{&TermVector{field: 0, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{3, 195, 235, 163, 130, 4, 0, 1, 3, 11, 0, 2, 23, 31, 0, 3, 43, 51}, }, // test larger varints { NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2198, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 0, 150, 17, 23, 31, 0, 3, 43, 51}, }, { NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}}, nil), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0}, }, { NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, nil), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1}, }, { NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, []*BackIndexStoreEntry{&BackIndexStoreEntry{Field: proto.Uint32(3)}, &BackIndexStoreEntry{Field: proto.Uint32(4)}, &BackIndexStoreEntry{Field: proto.Uint32(5)}}), []byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}, []byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1, 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}, }, { NewStoredRow("budweiser", 0, []uint64{}, byte('t'), []byte("an american beer")), []byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0}, []byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'}, }, { NewStoredRow("budweiser", 0, []uint64{2, 294, 3078}, byte('t'), []byte("an american beer")), []byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0, 2, 166, 2, 134, 24}, []byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'}, }, { NewInternalRow([]byte("mapping"), []byte(`{"mapping":"json content"}`)), []byte{'i', 'm', 'a', 'p', 'p', 'i', 'n', 'g'}, []byte{'{', '"', 'm', 'a', 'p', 'p', 'i', 'n', 'g', '"', ':', '"', 'j', 's', 'o', 'n', ' ', 'c', 'o', 'n', 't', 'e', 'n', 't', '"', '}'}, }, } // test going from struct to k/v bytes for i, test := range tests { rk := test.input.Key() if !reflect.DeepEqual(rk, test.outKey) { t.Errorf("Expected key to be %v got: %v", test.outKey, rk) } rv := test.input.Value() if !reflect.DeepEqual(rv, test.outVal) { t.Errorf("Expected value to be %v got: %v for %d", test.outVal, rv, i) } } // now test going back from k/v bytes to struct for i, test := range tests { row, err := ParseFromKeyValue(test.outKey, test.outVal) if err != nil { t.Errorf("error parsking key/value: %v", err) } if !reflect.DeepEqual(row, test.input) { t.Errorf("Expected: %#v got: %#v for %d", test.input, row, i) } } }
func (udc *UpsideDownCouch) indexField(docID string, field document.Field, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) ([]UpsideDownCouchRow, []*BackIndexTermEntry) { rows := make([]UpsideDownCouchRow, 0, 100) backIndexTermEntries := make([]*BackIndexTermEntry, 0) fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength))) for _, tf := range tokenFreqs { var termFreqRow *TermFrequencyRow if field.Options().IncludeTermVectors() { tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, tf) rows = append(rows, newFieldRows...) termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv) } else { termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm) } // record the back index entry backIndexTermEntry := BackIndexTermEntry{Term: proto.String(string(tf.Term)), Field: proto.Uint32(uint32(fieldIndex))} backIndexTermEntries = append(backIndexTermEntries, &backIndexTermEntry) rows = append(rows, termFreqRow) } return rows, backIndexTermEntries }