Example #1
0
File: col.go Project: jbenet/tiedot
// Create a new index.
func (col *Col) Index(indexPath []string) error {
	jointPath := strings.Join(indexPath, INDEX_PATH_SEP)
	// Check whether the index already exists
	if _, alreadyExists := col.SecIndexes[jointPath]; alreadyExists {
		return errors.New(fmt.Sprintf("Path %v is already indexed in collection %s", indexPath, col.BaseDir))
	}
	// Make the new index
	indexBaseDir := path.Join(col.BaseDir, HASHTABLE_DIRNAME_MAGIC+jointPath)
	col.openIndex(indexPath, indexBaseDir)
	// Put all documents on the new index
	newIndex := col.SecIndexes[jointPath]
	col.ForAll(func(id uint64, doc map[string]interface{}) bool {
		for _, toBeIndexed := range GetIn(doc, indexPath) {
			if toBeIndexed != nil {
				// Figure out where to put it
				hash := chunk.StrHash(toBeIndexed)
				dest := newIndex[hash%col.NumChunksI64]
				lock := dest.Mutex
				lock.Lock()
				dest.Put(hash, id)
				lock.Unlock()
			}
		}
		return true
	})
	return nil
}
Example #2
0
// Execute value equity check ("attribute == value") using hash lookup or collection scan.
func Lookup(lookupValue interface{}, expr map[string]interface{}, src *Col, result *map[uint64]struct{}) (err error) {
	// Figure out lookup path - JSON array "in"
	path, hasPath := expr["in"]
	if !hasPath {
		return errors.New("Missing lookup path `in`")
	}
	vecPath := make([]string, 0)
	if vecPathInterface, ok := path.([]interface{}); ok {
		for _, v := range vecPathInterface {
			vecPath = append(vecPath, fmt.Sprint(v))
		}
	} else {
		return errors.New(fmt.Sprintf("Expecting vector lookup path `in`, but %v given", path))
	}
	// Figure out result number limit
	intLimit := uint64(0)
	if limit, hasLimit := expr["limit"]; hasLimit {
		if floatLimit, ok := limit.(float64); ok {
			intLimit = uint64(floatLimit)
		} else {
			return errors.New(fmt.Sprintf("Expecting `limit` as a number, but %v given", limit))
		}
	}
	lookupStrValue := fmt.Sprint(lookupValue) // the value to match
	lookupValueHash := chunk.StrHash(lookupStrValue)
	scanPath := strings.Join(vecPath, INDEX_PATH_SEP)

	// Is it PK index?
	if path == uid.PK_NAME {
		// Convert lookup string value (which is the Persistent ID) to integer and put it into result
		strint, err := strconv.ParseUint(lookupStrValue, 10, 64)
		if err != nil {
			return err
		}
		(*result)[strint] = struct{}{}
		return nil
	}

	// It might be a secondary index
	if secIndex, ok := src.SecIndexes[scanPath]; ok {
		num := lookupValueHash % src.NumChunksI64
		ht := secIndex[num]
		ht.Mutex.RLock()
		_, vals := ht.Get(lookupValueHash, intLimit)
		ht.Mutex.RUnlock()
		for _, v := range vals {
			(*result)[v] = struct{}{}
		}
		return
	}
	// Neither PK or secondary index...
	return errors.New(fmt.Sprintf("Please index %v and retry query %v", scanPath, expr))
}
Example #3
0
File: col.go Project: jbenet/tiedot
// Remove the document from all secondary indexes.
func (col *Col) unindexDoc(id uint64, doc interface{}) {
	for _, index := range col.SecIndexes {
		for _, toBeIndexed := range GetIn(doc, index[0].Path) {
			if toBeIndexed != nil {
				// Figure out where it was put
				hashKey := chunk.StrHash(toBeIndexed)
				num := hashKey % col.NumChunksI64
				ht := index[num]
				ht.Mutex.Lock()
				index[num].Remove(hashKey, id)
				ht.Mutex.Unlock()
			}
		}
	}
}
Example #4
0
func SecIndexContainsAll(path string, col *Col, expectedKV map[uint64][]uint64) bool {
	// expectedKV is a mapping between expected Hash Value VS PK values
	for k, ids := range expectedKV {
		fmt.Printf("Looking for key %v, id %v\n", k, ids)
		keys, vals := col.HashScan(path, k, 0)
		if len(keys) == 0 || len(vals) == 0 {
			fmt.Printf("Hash table does not have the key\n")
			return false
		}
		if len(vals) != len(ids) {
			fmt.Printf("Number not matched: %v %v\n", vals, ids)
			return false
		}
		for _, id := range ids {
			fmt.Printf("Checking for ID %s match among physical IDs %v\n", id, vals)
			var doc interface{}
			_, err := col.Read(id, &doc)
			if err != nil {
				fmt.Printf("ID given by function parameter does not exist %s\n", id)
				panic(err)
			}
			match := false
			for _, v := range vals {
				if uint64(id) == v {
					match = true
					break
				}
			}
			if !match {
				fmt.Printf("Hash table value does not match with ID hash %v %v\n", chunk.StrHash(id), vals[0])
				return false
			}
		}
	}
	return true
}
Example #5
0
func TestIndex(t *testing.T) {
	fmt.Println("Running index test")
	tmp := "/tmp/tiedot_col_test"
	os.RemoveAll(tmp)
	defer os.RemoveAll(tmp)
	col, err := OpenCol(tmp, 4)
	if err != nil {
		t.Fatalf("Failed to open: %v", err)
		return
	}
	docs := []string{
		`{"a": {"b": {"c": 1}}, "d": 0}`,
		`{"a": {"b": [{"c": 2}]}, "d": 0}`,
		`{"a": [{"b": {"c": 3}}], "d": 0}`,
		`{"a": [{"b": {"c": [4]}}, {"b": {"c": [5, 6]}}], "d": [0, 9]}`,
		`{"a": {"b": {"c": null}}, "d": null}`}
	var jsonDoc [4]map[string]interface{}
	json.Unmarshal([]byte(docs[0]), &jsonDoc[0])
	json.Unmarshal([]byte(docs[1]), &jsonDoc[1])
	json.Unmarshal([]byte(docs[2]), &jsonDoc[2])
	json.Unmarshal([]byte(docs[3]), &jsonDoc[3])
	var ids [4]uint64
	// Insert a document, create two indexes and verify them
	ids[0], _ = col.Insert(jsonDoc[0])
	col.Index([]string{"a", "b", "c"})
	col.Index([]string{"d"})
	if !SecIndexContainsAll("a,b,c", col, map[uint64][]uint64{chunk.StrHash("1"): []uint64{ids[0]}}) {
		t.Fatal()
	}
	if !SecIndexContainsAll("a,b,c", col, map[uint64][]uint64{chunk.StrHash("1"): []uint64{ids[0]}}) {
		t.Fatal()
	}
	// Do the following:
	// 1. Insert second and third document
	// 2. Replace the third document by the fourth document
	// 3. Remove the second document
	ids[1], _ = col.Insert(jsonDoc[1])
	ids[2], _ = col.Insert(jsonDoc[2])
	col.Update(ids[2], jsonDoc[3])
	col.Delete(ids[1])
	// Now the first and fourth documents are left, scrub and reopen the collection and verify index
	//	col.Scrub()
	col.Close()
	col, err = OpenCol(tmp, 4)
	if err != nil {
		t.Fatalf("Failed to reopen: %v", err)
	}
	if !SecIndexContainsAll("d", col, map[uint64][]uint64{chunk.StrHash("0"): []uint64{ids[0], ids[2]}}) {
		t.Fatal()
	}
	if !SecIndexContainsAll("a,b,c", col, map[uint64][]uint64{chunk.StrHash("1"): []uint64{ids[0]}}) {
		t.Fatal()
	}
	if !SecIndexContainsAll("a,b,c", col, map[uint64][]uint64{chunk.StrHash("4"): []uint64{ids[2]}}) {
		t.Fatal()
	}
	// Insert one more document and verify indexes
	newID, _ := col.Insert(jsonDoc[0])
	if !SecIndexContainsAll("d", col, map[uint64][]uint64{chunk.StrHash("0"): []uint64{ids[0], ids[2], newID}}) {
		t.Fatal()
	}
	if !SecIndexContainsAll("a,b,c", col, map[uint64][]uint64{chunk.StrHash("1"): []uint64{ids[0], newID}}) {
		t.Fatal()
	}
	if !SecIndexContainsAll("a,b,c", col, map[uint64][]uint64{chunk.StrHash("4"): []uint64{ids[2]}}) {
		t.Fatal()
	}
	if err = col.Flush(); err != nil {
		t.Fatal(err)
	}
	col.Close()
}
Example #6
0
// Scan hash table or collection documents using an integer range.
func IntRange(intFrom interface{}, expr map[string]interface{}, src *Col, result *map[uint64]struct{}) (err error) {
	path, hasPath := expr["in"]
	if !hasPath {
		return errors.New("Missing path `in`")
	}
	// Figure out the path
	vecPath := make([]string, 0)
	if vecPathInterface, ok := path.([]interface{}); ok {
		for _, v := range vecPathInterface {
			vecPath = append(vecPath, fmt.Sprint(v))
		}
	} else {
		return errors.New(fmt.Sprintf("Expecting vector path `in`, but %v given", path))
	}
	if vecPath[0] == uid.PK_NAME {
		return errors.New("_pk is the primary index, integer range scan on _pk is meaningless")
	}
	// Figure out result number limit
	intLimit := int(0)
	if limit, hasLimit := expr["limit"]; hasLimit {
		if floatLimit, ok := limit.(float64); ok {
			intLimit = int(floatLimit)
		} else {
			return errors.New(fmt.Sprintf("Expecting `limit` as a number, but %v given", limit))
		}
	}
	// Figure out the range ("from" value & "to" value)
	from, to := int(0), int(0)
	if floatFrom, ok := intFrom.(float64); ok {
		from = int(floatFrom)
	} else {
		return errors.New(fmt.Sprintf("Expecting `int-from` as an integer, but %v given", from))
	}
	if intTo, ok := expr["int-to"]; ok {
		if floatTo, ok := intTo.(float64); ok {
			to = int(floatTo)
		} else {
			return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to))
		}
	} else if intTo, ok := expr["int to"]; ok {
		if floatTo, ok := intTo.(float64); ok {
			to = int(floatTo)
		} else {
			return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to))
		}
	} else {
		return errors.New(fmt.Sprintf("Missing `int-to`"))
	}
	if to > from && to-from > 1000 || from > to && from-to > 1000 {
		tdlog.Printf("Query %v is an index lookup of more than 1000 values, which may be inefficient", expr)
	}
	counter := int(0) // Number of results already collected
	htPath := strings.Join(vecPath, ",")
	if _, indexScan := src.SecIndexes[htPath]; indexScan {
		// Use index scan if it is available
		if from < to {
			// Forward scan - from low value to high value
			for lookupValue := from; lookupValue <= to; lookupValue++ {
				lookupStrValue := fmt.Sprint(lookupValue)
				hashValue := chunk.StrHash(lookupStrValue)
				_, vals := src.HashScan(htPath, hashValue, uint64(intLimit))
				for _, docID := range vals {
					if intLimit > 0 && counter == intLimit {
						break
					}
					counter += 1
					(*result)[docID] = struct{}{}
				}
			}
		} else {
			// Backward scan - from high value to low value
			for lookupValue := from; lookupValue >= to; lookupValue-- {
				lookupStrValue := fmt.Sprint(lookupValue)
				hashValue := chunk.StrHash(lookupStrValue)
				_, vals := src.HashScan(htPath, hashValue, uint64(intLimit))
				for _, docID := range vals {
					if intLimit > 0 && counter == intLimit {
						break
					}
					counter += 1
					(*result)[docID] = struct{}{}
				}
			}
		}
	} else {
		return errors.New(fmt.Sprintf("Please index %v and retry query %v", vecPath, expr))
	}
	return
}