Example #1
0
// Pause all activities and make a dump of entire database to another file system location.
func Dump(w http.ResponseWriter, r *http.Request) {
	w.Header().Set("Cache-Control", "must-revalidate")
	w.Header().Set("Content-Type", "application/json")
	var dest string
	if !Require(w, r, "dest", &dest) {
		return
	}
	// Note that symbol links are skipped!
	walkFun := func(currPath string, info os.FileInfo, err error) error {
		if info.IsDir() {
			// Calculate directory path at destination and create it
			relPath, err := filepath.Rel(V3DB.Dir, currPath)
			if err != nil {
				return err
			}
			destDir := path.Join(dest, relPath)
			if err := os.MkdirAll(destDir, 0700); err != nil {
				return err
			}
			tdlog.Printf("Dump created directory %s with permission 0700", destDir)
		} else {
			// Open the file to be copied (collection data/index)
			src, err := os.Open(currPath)
			if err != nil {
				return err
			}
			// Calculate file path at destination and create it
			relPath, err := filepath.Rel(V3DB.Dir, currPath)
			if err != nil {
				return err
			}
			destPath := path.Join(dest, relPath)
			destFile, err := os.Create(destPath)
			if err != nil {
				return err
			}
			// Copy from source to destination
			written, err := io.Copy(destFile, src)
			if err != nil {
				return err
			}
			tdlog.Printf("Dump create file %s with permission 666 (before umask), size is %d", destPath, written)
		}
		return nil
	}
	V3Sync.Lock()
	defer V3Sync.Unlock()
	V3DB.Flush()
	err := filepath.Walk(V3DB.Dir, walkFun)
	if err != nil {
		http.Error(w, fmt.Sprint(err), 500)
	}
}
Example #2
0
// Resolve the attribute(s) in the document structure along the given path.
func GetIn(doc interface{}, path []string) (ret []interface{}) {
	docMap, ok := doc.(map[string]interface{})
	if !ok {
		tdlog.Printf("%v cannot be indexed because type conversation to map[string]interface{} failed", doc)
		return
	}
	var thing interface{} = docMap
	// Get into each path segment
	for i, seg := range path {
		if aMap, ok := thing.(map[string]interface{}); ok {
			thing = aMap[seg]
		} else if anArray, ok := thing.([]interface{}); ok {
			for _, element := range anArray {
				ret = append(ret, GetIn(element, path[i:])...)
			}
		} else {
			return nil
		}
	}
	switch thing.(type) {
	case []interface{}:
		return append(ret, thing.([]interface{})...)
	default:
		return append(ret, thing)
	}
}
Example #3
0
func Start(db *db.DB, port int) {
	V3DB = db

	// collection management (synchronized)
	http.HandleFunc("/create", Create)
	http.HandleFunc("/rename", Rename)
	http.HandleFunc("/drop", Drop)
	http.HandleFunc("/all", All)
	http.HandleFunc("/scrub", Scrub)
	http.HandleFunc("/flush", Flush)
	// query (asynchronized)
	http.HandleFunc("/query", Query)
	http.HandleFunc("/queryID", QueryID)
	http.HandleFunc("/count", Count)
	// document management (asynchronized)
	http.HandleFunc("/insert", Insert)
	http.HandleFunc("/get", Get)
	http.HandleFunc("/update", Update)
	http.HandleFunc("/delete", Delete)
	// document management (with UID, asynchronized)
	http.HandleFunc("/insertWithUID", InsertWithUID)
	http.HandleFunc("/getByUID", GetByUID)
	http.HandleFunc("/updateByUID", UpdateByUID)
	http.HandleFunc("/reassignUID", ReassignUID)
	http.HandleFunc("/deleteByUID", DeleteByUID)
	// index management (synchronized)
	http.HandleFunc("/index", Index)
	http.HandleFunc("/indexes", Indexes)
	http.HandleFunc("/unindex", Unindex)
	// misc (synchronized)
	http.HandleFunc("/shutdown", Shutdown)
	http.HandleFunc("/dump", Dump)
	// misc (asynchronized)
	http.HandleFunc("/version", Version)
	http.HandleFunc("/memstats", MemStats)
	// flush all buffers every minute
	go func() {
		ticker := time.Tick(time.Minute)
		for _ = range ticker {
			V3DB.Flush()
			tdlog.Printf("Buffers flushed at %s", time.Now())
		}
	}()
	tdlog.Printf("Listening on all interfaces, port %d", port)
	http.ListenAndServe(fmt.Sprintf(":%d", port), nil)
}
Example #4
0
// Open a chunk.
func OpenChunk(number int, baseDir string) (chunk *ChunkCol, err error) {
	// Create the directory if it does not yet exist
	if err = os.MkdirAll(baseDir, 0700); err != nil {
		return
	}
	tdlog.Printf("Opening chunk %s", baseDir)
	chunk = &ChunkCol{Number: number, BaseDir: baseDir}
	// Open collection document data file
	tdlog.Printf("Opening collection data file %s", DAT_FILENAME_MAGIC)
	if chunk.Data, err = chunkfile.OpenCol(path.Join(baseDir, DAT_FILENAME_MAGIC)); err != nil {
		return
	}
	// Open PK hash table
	tdlog.Printf("Opening PK hash table file %s", PK_FILENAME_MAGIC)
	if chunk.PK, err = chunkfile.OpenHash(path.Join(baseDir, PK_FILENAME_MAGIC), []string{uid.PK_NAME}); err != nil {
		return
	}
	return
}
Example #5
0
File: col.go Project: jbenet/tiedot
// Close the collection.
func (col *Col) Close() {
	// Close chunks
	for _, dest := range col.Chunks {
		dest.Close()
	}
	// Close secondary indexes
	for _, index := range col.SecIndexes {
		for _, part := range index {
			part.File.Close()
		}
	}
	tdlog.Printf("Collection %s is closed", col.BaseDir)
}
Example #6
0
// Open the file, or create it if non-existing.
func Open(name string, growth uint64) (file *File, err error) {
	if growth < 1 {
		err = errors.New(fmt.Sprintf("Growth size (%d) is too small (opening %s)", growth, name))
	}
	file = &File{Name: name, Growth: growth}
	// Open file (get a handle) and determine its size
	if file.Fh, err = os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0600); err != nil {
		return
	}
	fsize, err := file.Fh.Seek(0, os.SEEK_END)
	if err != nil {
		return
	}
	file.Size = uint64(fsize)
	if file.Size == 0 {
		// Grow the file if it appears too small
		file.CheckSizeAndEnsure(file.Growth)
		return
	}
	// Map the file into memory buffer
	if file.Buf, err = gommap.Map(file.Fh, gommap.RDWR, 0); err != nil {
		return
	}
	// Bi-sect file buffer to find out how much space in the file is actively in-use
	for low, mid, high := uint64(0), file.Size/2, file.Size; ; {
		switch {
		case high-mid == 1:
			if ConsecutiveTwenty0s(file.Buf[mid:]) {
				if ConsecutiveTwenty0s(file.Buf[mid-1:]) {
					file.UsedSize = mid - 1
				} else {
					file.UsedSize = mid
				}
				return
			}
			file.UsedSize = high
			return
		case ConsecutiveTwenty0s(file.Buf[mid:]):
			high = mid
			mid = low + (mid-low)/2
		default:
			low = mid
			mid = mid + (high-mid)/2
		}
	}
	tdlog.Printf("%s has %d bytes out of %d bytes in-use", name, file.UsedSize, file.Size)
	return
}
Example #7
0
func Query(w http.ResponseWriter, r *http.Request) {
	w.Header().Set("Cache-Control", "must-revalidate")
	w.Header().Set("Content-Type", "text/plain")
	var col, q string
	if !Require(w, r, "col", &col) {
		return
	}
	if !Require(w, r, "q", &q) {
		return
	}
	var qJson interface{}
	if err := json.Unmarshal([]byte(q), &qJson); err != nil {
		http.Error(w, fmt.Sprintf("'%v' is not valid JSON.", q), 400)
		return
	}
	V3Sync.RLock()
	defer V3Sync.RUnlock()
	dbcol := V3DB.Use(col)
	if dbcol == nil {
		http.Error(w, fmt.Sprintf("Collection '%s' does not exist.", col), 400)
		return
	}
	// evaluate the query
	queryResult := make(map[uint64]struct{})
	if err := db.EvalQueryV2(qJson, dbcol, &queryResult); err != nil {
		http.Error(w, fmt.Sprint(err), 400)
		return
	}
	// write each document on a new line
	for k := range queryResult {
		var doc interface{}
		dbcol.Read(k, &doc)
		if doc == nil {
			continue
		}
		resp, err := json.Marshal(doc)
		if err != nil {
			tdlog.Printf("Query returned invalid JSON '%v'", doc)
			continue
		}
		w.Write([]byte(string(resp) + "\r\n"))
	}
}
Example #8
0
// Open a hash table file.
func OpenHash(name string, hashBits, perBucket uint64) (ht *HashTable, err error) {
	if hashBits < 2 || perBucket < 2 {
		return nil, errors.New(fmt.Sprintf("ERROR: Hash table is too small (%d hash bits, %d per bucket)", hashBits, perBucket))
	}
	file, err := Open(name, HASH_TABLE_GROWTH)
	if err != nil {
		return
	}
	// Devide hash file into regions (each bucket belongs to only one region), make one RW mutex per region.
	rwMutexes := make([]*sync.RWMutex, file.Size/HASH_TABLE_REGION_SIZE+1)
	for i := range rwMutexes {
		rwMutexes[i] = new(sync.RWMutex)
	}
	ht = &HashTable{File: file, HashBits: hashBits, PerBucket: perBucket,
		tableGrowMutex: sync.Mutex{},
		regionRWMutex:  rwMutexes}
	ht.BucketSize = BUCKET_HEADER_SIZE + ENTRY_SIZE*perBucket
	// Find out how many buckets there are in table - hence the amount of used space
	// .. assume the entire file is Full of buckets
	ht.File.UsedSize = ht.File.Size
	ht.NumBuckets = ht.File.Size / ht.BucketSize
	// .. starting from every head bucket, find the longest chain
	ht.InitialBuckets = uint64(math.Pow(2, float64(hashBits)))
	longestBucketChain := ht.InitialBuckets
	for i := uint64(0); i < ht.InitialBuckets; i++ {
		lastBucket := ht.lastBucket(i)
		if lastBucket+1 > longestBucketChain && lastBucket+1 <= ht.NumBuckets {
			longestBucketChain = lastBucket + 1
		}
	}
	// .. the longest chain tells amount of used space
	ht.NumBuckets = longestBucketChain
	usedSize := ht.NumBuckets * ht.BucketSize
	// Grow the file, if it is not yet large enough for all the buckets used
	if usedSize > ht.File.Size {
		ht.File.UsedSize = ht.File.Size
		ht.File.CheckSizeAndEnsure(((usedSize-ht.File.Size)/ht.BucketSize + 1) * ht.BucketSize)
	}
	ht.File.UsedSize = usedSize
	tdlog.Printf("%s has %d initial buckets, %d buckets, and %d bytes out of %d bytes in-use", name, ht.InitialBuckets, ht.NumBuckets, ht.File.UsedSize, ht.File.Size)
	return ht, nil
}
Example #9
0
// Ensure that the file has enough room for more data. Grow the file if necessary.
func (file *File) CheckSizeAndEnsure(more uint64) {
	if file.UsedSize+more <= file.Size {
		return
	}
	// Grow the file - unmap the file, truncate and then re-map
	var err error
	if file.Buf != nil {
		if err = file.Buf.Unmap(); err != nil {
			panic(err)
		}
	}
	if err = os.Truncate(file.Name, int64(file.Size+file.Growth)); err != nil {
		panic(err)
	}
	if file.Buf, err = gommap.Map(file.Fh, gommap.RDWR, 0); err != nil {
		panic(err)
	}
	file.Size += file.Growth
	tdlog.Printf("File %s has grown %d bytes\n", file.Name, file.Growth)
	file.CheckSizeAndEnsure(more)
}
Example #10
0
// Open a database.
func OpenDB(dir string) (db *DB, err error) {
	if err = os.MkdirAll(dir, 0700); err != nil {
		return
	}
	db = &DB{Dir: dir, StrCol: make(map[string]*Col)}
	files, err := ioutil.ReadDir(dir)
	if err != nil {
		return
	}
	// Try to open sub-directory as document collection
	for _, f := range files {
		if f.IsDir() {
			if db.StrCol[f.Name()], err = OpenCol(path.Join(dir, f.Name())); err != nil {
				tdlog.Errorf("ERROR: Failed to open collection %s, reason: %v", f.Name(), err)
			} else {
				tdlog.Printf("Successfully opened collection %s", f.Name())
			}
		}
	}
	return
}
Example #11
0
File: col.go Project: jbenet/tiedot
// Open a collection (made of chunks).
func OpenCol(baseDir string, numChunks int) (col *Col, err error) {
	// Create the directory if it does not yet exist
	if err = os.MkdirAll(baseDir, 0700); err != nil {
		return
	}
	col = &Col{BaseDir: baseDir, NumChunks: numChunks, NumChunksI64: uint64(numChunks),
		SecIndexes: make(map[string][]*chunkfile.HashTable),
		Chunks:     make([]*chunk.ChunkCol, numChunks), ChunkMutexes: make([]*sync.RWMutex, numChunks)}
	// Open each chunk
	for i := 0; i < numChunks; i++ {
		col.Chunks[i], err = chunk.OpenChunk(i, path.Join(baseDir, CHUNK_DIRNAME_MAGIC+strconv.Itoa(int(i))))
		if err != nil {
			panic(err)
		}
		col.ChunkMutexes[i] = &sync.RWMutex{}
	}
	// Look for hash table directories
	walker := func(currPath string, info os.FileInfo, err2 error) error {
		if err2 != nil {
			// log and skip the error
			tdlog.Error(err)
			return nil
		}
		if info.IsDir() {
			switch {
			case strings.HasPrefix(info.Name(), HASHTABLE_DIRNAME_MAGIC):
				// Found a hashtable index
				tdlog.Printf("Opening collection index hashtable %s", info.Name())
				// Figure out indexed path
				indexPath := strings.Split(info.Name()[len(HASHTABLE_DIRNAME_MAGIC):], INDEX_PATH_SEP)
				// Open a hash table index and put it into collection structure
				col.openIndex(indexPath, path.Join(baseDir, info.Name()))
			}
		}
		return nil
	}
	err = filepath.Walk(baseDir, walker)
	return
}
Example #12
0
// Ensure that the file has enough room for more data. Grow the file if necessary.
func (file *File) CheckSizeAndEnsure(more uint64) {
	if file.UsedSize+more <= file.Size {
		return
	}
	// Unmap file buffer
	var err error
	if file.Buf != nil {
		if err = file.Buf.Unmap(); err != nil {
			panic(err)
		}
	}
	if _, err = file.Fh.Seek(0, os.SEEK_END); err != nil {
		panic(err)
	}
	// Grow file size (incrementally)
	zeroBuf := make([]byte, FILE_GROWTH_INCREMENTAL)
	for i := uint64(0); i < file.Growth; i += FILE_GROWTH_INCREMENTAL {
		var slice []byte
		if i+FILE_GROWTH_INCREMENTAL > file.Growth {
			slice = zeroBuf[0 : i+FILE_GROWTH_INCREMENTAL-file.Growth]
		} else {
			slice = zeroBuf
		}
		if _, err = file.Fh.Write(slice); err != nil {
			panic(err)
		}
	}
	if err = file.Fh.Sync(); err != nil {
		panic(err)
	}
	// Re-map the (now larger) file buffer
	if file.Buf, err = gommap.Map(file.Fh, gommap.RDWR, 0); err != nil {
		panic(err)
	}
	file.Size += file.Growth
	tdlog.Printf("File %s has grown %d bytes\n", file.Name, file.Growth)
	file.CheckSizeAndEnsure(more)
}
Example #13
0
File: db.go Project: jbenet/tiedot
func OpenDB(baseDir string) (db *DB, err error) {
	if err = os.MkdirAll(baseDir, 0700); err != nil {
		return
	}
	db = &DB{BaseDir: baseDir, StrCol: make(map[string]*Col)}
	files, err := ioutil.ReadDir(baseDir)
	if err != nil {
		return
	}
	// Try to open sub-directory as document collection
	for _, f := range files {
		if f.IsDir() {
			// Figure out how many chunks there are in the collection
			var numchunksFH *os.File
			numchunksFH, err = os.OpenFile(path.Join(baseDir, f.Name(), NUMCHUNKS_FILENAME), os.O_CREATE|os.O_RDWR, 0600)
			defer numchunksFH.Close()
			if err != nil {
				return
			}
			numchunksContent, err := ioutil.ReadAll(numchunksFH)
			if err != nil {
				panic(err)
			}
			numchunks, err := strconv.Atoi(string(numchunksContent))
			if err != nil || numchunks < 1 {
				panic(fmt.Sprintf("Cannot figure out number of chunks for collection %s, manually repair it maybe? %v", baseDir, err))
			}

			// Open the directory as a collection
			if db.StrCol[f.Name()], err = OpenCol(path.Join(baseDir, f.Name()), numchunks); err != nil {
				tdlog.Errorf("ERROR: Failed to open collection %s, error: %v", f.Name(), err)
			} else {
				tdlog.Printf("Successfully opened collection %s", f.Name())
			}
		}
	}
	return
}
Example #14
0
// Overwrite the file with 0s and return to its initial size.
func (file *File) Clear() {
	var err error
	if err = file.Close(); err != nil {
		panic(err)
	}
	// Shrink to 0 size, then enlarge
	if err = os.Truncate(file.Name, int64(0)); err != nil {
		panic(err)
	}
	if err = os.Truncate(file.Name, int64(file.Growth)); err != nil {
		panic(err)
	}
	// Re-open and reset current size
	if file.Fh, err = os.OpenFile(file.Name, os.O_CREATE|os.O_RDWR, 0600); err != nil {
		panic(err)
	}
	if file.Buf, err = gommap.Map(file.Fh, gommap.RDWR, 0); err != nil {
		panic(err)
	}
	file.UsedSize = 0
	file.Size = file.Growth
	tdlog.Printf("File %s has been cleared, and the size is now %d", file.Name, file.Size)
}
Example #15
0
// Calculate used size, total size, total number of buckets.
func (ht *HashTable) calculateSizeInfo() {
	// Find out how many buckets there are in table - hence the amount of used space
	// .. assume the entire file is Full of buckets
	ht.File.UsedSize = ht.File.Size
	ht.NumBuckets = ht.File.Size / BUCKET_SIZE
	// .. starting from every head bucket, find the longest chain
	longestBucketChain := INITIAL_BUCKETS
	for i := uint64(0); i < INITIAL_BUCKETS; i++ {
		lastBucket := ht.lastBucket(i)
		if lastBucket+1 > longestBucketChain && lastBucket+1 <= ht.NumBuckets {
			longestBucketChain = lastBucket + 1
		}
	}
	// .. the longest chain tells amount of used space
	ht.NumBuckets = longestBucketChain
	usedSize := ht.NumBuckets * BUCKET_SIZE
	// Grow the file, if it is not yet large enough for all the buckets used
	if usedSize > ht.File.Size {
		ht.File.UsedSize = ht.File.Size
		ht.File.CheckSizeAndEnsure(((usedSize-ht.File.Size)/BUCKET_SIZE + 1) * BUCKET_SIZE)
	}
	ht.File.UsedSize = usedSize
	tdlog.Printf("%s has %d buckets, and %d bytes out of %d bytes in-use", ht.File.Name, ht.NumBuckets, ht.File.UsedSize, ht.File.Size)
}
Example #16
0
func main() {
	rand.Seed(time.Now().UTC().UnixNano())

	var err error
	var defaultMaxprocs int
	if defaultMaxprocs, err = strconv.Atoi(os.Getenv("GOMAXPROCS")); err != nil {
		defaultMaxprocs = runtime.NumCPU() * 2
	}

	// Parse CLI parameters
	var mode, dir string
	var port, maxprocs, benchSize int
	var profile bool
	flag.StringVar(&mode, "mode", "", "http|bench|bench2|bench3|example]")
	flag.StringVar(&dir, "dir", "", "database directory")
	flag.IntVar(&port, "port", 0, "listening port number")
	flag.IntVar(&maxprocs, "gomaxprocs", defaultMaxprocs, "GOMAXPROCS")
	flag.IntVar(&benchSize, "benchsize", 400000, "Benchmark sample size")
	flag.BoolVar(&profile, "profile", false, "write profiler results to prof.out")
	flag.BoolVar(&tdlog.VerboseLog, "verbose", true, "verbose logging true/false (default is true)")
	flag.Parse()

	// User must specify a mode to run
	if mode == "" {
		flag.PrintDefaults()
		return
	}

	// Setup appropriate GOMAXPROCS parameter
	runtime.GOMAXPROCS(maxprocs)
	tdlog.Printf("GOMAXPROCS is set to %d", maxprocs)
	if maxprocs < runtime.NumCPU() {
		tdlog.Printf("GOMAXPROCS (%d) is less than number of CPUs (%d), this may affect performance. You can change it via environment variable GOMAXPROCS or by passing CLI parameter -gomaxprocs", maxprocs, runtime.NumCPU())
	}

	// Start profiler if enabled
	if profile {
		resultFile, err := os.Create("perf.out")
		if err != nil {
			tdlog.Panicf("Cannot create profiler result file %s", resultFile)
		}
		pprof.StartCPUProfile(resultFile)
		defer pprof.StopCPUProfile()
	}

	switch mode {
	case "http": // Run HTTP service (API V3)
		if dir == "" {
			tdlog.Fatal("Please specify database directory, for example -dir=/tmp/db")
		}
		if port == 0 {
			tdlog.Fatal("Please specify port number, for example -port=8080")
		}
		db, err := db.OpenDB(dir)
		if err != nil {
			tdlog.Fatal(err)
		}
		v3.Start(db, port)

	case "bench": // Benchmark scenarios
		benchmark(benchSize)
	case "bench2":
		benchmark2(benchSize)
	case "bench3":
		benchmark3(benchSize)

	case "example": // Embedded usage example
		embeddedExample()
	default:
		flag.PrintDefaults()
		return
	}
}
Example #17
0
// Close all collections.
func (db *DB) Close() {
	for _, col := range db.StrCol {
		col.Close()
	}
	tdlog.Printf("Database closed (%s)", db.Dir)
}
Example #18
0
// Scan hash table or collection documents using an integer range.
func IntRange(intFrom interface{}, expr map[string]interface{}, src *Col, result *map[uint64]struct{}) (err error) {
	path, hasPath := expr["in"]
	if !hasPath {
		return errors.New("Missing path `in`")
	}
	// Figure out the path
	vecPath := make([]string, 0)
	if vecPathInterface, ok := path.([]interface{}); ok {
		for _, v := range vecPathInterface {
			vecPath = append(vecPath, fmt.Sprint(v))
		}
	} else {
		return errors.New(fmt.Sprintf("Expecting vector path `in`, but %v given", path))
	}
	if vecPath[0] == uid.PK_NAME {
		return errors.New("_pk is the primary index, integer range scan on _pk is meaningless")
	}
	// Figure out result number limit
	intLimit := int(0)
	if limit, hasLimit := expr["limit"]; hasLimit {
		if floatLimit, ok := limit.(float64); ok {
			intLimit = int(floatLimit)
		} else {
			return errors.New(fmt.Sprintf("Expecting `limit` as a number, but %v given", limit))
		}
	}
	// Figure out the range ("from" value & "to" value)
	from, to := int(0), int(0)
	if floatFrom, ok := intFrom.(float64); ok {
		from = int(floatFrom)
	} else {
		return errors.New(fmt.Sprintf("Expecting `int-from` as an integer, but %v given", from))
	}
	if intTo, ok := expr["int-to"]; ok {
		if floatTo, ok := intTo.(float64); ok {
			to = int(floatTo)
		} else {
			return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to))
		}
	} else if intTo, ok := expr["int to"]; ok {
		if floatTo, ok := intTo.(float64); ok {
			to = int(floatTo)
		} else {
			return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to))
		}
	} else {
		return errors.New(fmt.Sprintf("Missing `int-to`"))
	}
	if to > from && to-from > 1000 || from > to && from-to > 1000 {
		tdlog.Printf("Query %v is an index lookup of more than 1000 values, which may be inefficient", expr)
	}
	counter := int(0) // Number of results already collected
	htPath := strings.Join(vecPath, ",")
	if _, indexScan := src.SecIndexes[htPath]; indexScan {
		// Use index scan if it is available
		if from < to {
			// Forward scan - from low value to high value
			for lookupValue := from; lookupValue <= to; lookupValue++ {
				lookupStrValue := fmt.Sprint(lookupValue)
				hashValue := chunk.StrHash(lookupStrValue)
				_, vals := src.HashScan(htPath, hashValue, uint64(intLimit))
				for _, docID := range vals {
					if intLimit > 0 && counter == intLimit {
						break
					}
					counter += 1
					(*result)[docID] = struct{}{}
				}
			}
		} else {
			// Backward scan - from high value to low value
			for lookupValue := from; lookupValue >= to; lookupValue-- {
				lookupStrValue := fmt.Sprint(lookupValue)
				hashValue := chunk.StrHash(lookupStrValue)
				_, vals := src.HashScan(htPath, hashValue, uint64(intLimit))
				for _, docID := range vals {
					if intLimit > 0 && counter == intLimit {
						break
					}
					counter += 1
					(*result)[docID] = struct{}{}
				}
			}
		}
	} else {
		return errors.New(fmt.Sprintf("Please index %v and retry query %v", vecPath, expr))
	}
	return
}
Example #19
0
// Execute value equity check ("attribute == value") using hash lookup or collection scan.
func V2Lookup(lookupValue interface{}, expr map[string]interface{}, src *Col, result *map[uint64]struct{}) (err error) {
	// Figure out lookup path - JSON array "in"
	path, hasPath := expr["in"]
	if !hasPath {
		return errors.New("Missing lookup path `in`")
	}
	vecPath := make([]string, 0)
	if vecPathInterface, ok := path.([]interface{}); ok {
		for _, v := range vecPathInterface {
			vecPath = append(vecPath, fmt.Sprint(v))
		}
	} else {
		return errors.New(fmt.Sprintf("Expecting vector lookup path `in`, but %v given", path))
	}
	// Figure out result number limit
	intLimit := uint64(0)
	if limit, hasLimit := expr["limit"]; hasLimit {
		if floatLimit, ok := limit.(float64); ok {
			intLimit = uint64(floatLimit)
		} else {
			return errors.New(fmt.Sprintf("Expecting `limit` as a number, but %v given", limit))
		}
	}
	lookupStrValue := fmt.Sprint(lookupValue) // the value to match
	if ht, indexScan := src.StrHT[strings.Join(vecPath, ",")]; indexScan {
		// If index is available, do index scan
		// Hash collision detection function
		collisionDetection := func(k, v uint64) bool {
			var doc interface{}
			if src.Read(v, &doc) != nil {
				return false
			}
			// Actually get inside the document and match the value
			for _, v := range GetIn(doc, vecPath) {
				if fmt.Sprint(v) == lookupStrValue {
					return true
				}
			}
			return false
		}
		hashValue := StrHash(lookupStrValue)
		// Do hash scan
		_, scanResult := ht.Get(hashValue, intLimit, collisionDetection)
		for _, docID := range scanResult {
			(*result)[docID] = struct{}{}
		}
	} else {
		// Do collection scan, when index is not available
		tdlog.Printf("Query %v is a collection scan, which may be inefficient", expr)
		counter := uint64(0)
		docMatcher := func(id uint64, doc interface{}) bool {
			// Get inside each document and find match
			for _, v := range GetIn(doc, vecPath) {
				if fmt.Sprint(v) == lookupStrValue {
					(*result)[id] = struct{}{}
					counter += 1
					return counter != intLimit
				}
			}
			return true
		}
		src.ForAll(docMatcher)
	}
	return
}
Example #20
0
// Scan hash table or collection documents using an integer range.
func V2IntRange(intFrom interface{}, expr map[string]interface{}, src *Col, result *map[uint64]struct{}) (err error) {
	path, hasPath := expr["in"]
	if !hasPath {
		return errors.New("Missing path `in`")
	}
	// Figure out the path
	vecPath := make([]string, 0)
	if vecPathInterface, ok := path.([]interface{}); ok {
		for _, v := range vecPathInterface {
			vecPath = append(vecPath, fmt.Sprint(v))
		}
	} else {
		return errors.New(fmt.Sprintf("Expecting vector path `in`, but %v given", path))
	}
	// Figure out result number limit
	intLimit := int(0)
	if limit, hasLimit := expr["limit"]; hasLimit {
		if floatLimit, ok := limit.(float64); ok {
			intLimit = int(floatLimit)
		} else {
			return errors.New(fmt.Sprintf("Expecting `limit` as a number, but %v given", limit))
		}
	}
	// Figure out the range ("from" value & "to" value)
	from, to := int(0), int(0)
	if floatFrom, ok := intFrom.(float64); ok {
		from = int(floatFrom)
	} else {
		return errors.New(fmt.Sprintf("Expecting `int-from` as an integer, but %v given", from))
	}
	if intTo, ok := expr["int-to"]; ok {
		if floatTo, ok := intTo.(float64); ok {
			to = int(floatTo)
		} else {
			return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to))
		}
	} else if intTo, ok := expr["int to"]; ok {
		if floatTo, ok := intTo.(float64); ok {
			to = int(floatTo)
		} else {
			return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to))
		}
	} else {
		return errors.New(fmt.Sprintf("Missing `int-to`"))
	}
	if to > from && to-from > 1000 || from > to && from-to > 1000 {
		tdlog.Printf("Query %v is an index lookup of more than 1000 values, which may be inefficient", expr)
	}
	counter := int(0) // Number of results already collected
	if ht, indexScan := src.StrHT[strings.Join(vecPath, ",")]; indexScan {
		// Use index scan if it is available
		if from < to {
			// Forward scan - from low value to high value
			for lookupValue := from; lookupValue <= to; lookupValue++ {
				lookupStrValue := fmt.Sprint(lookupValue)
				hashValue := StrHash(lookupStrValue)
				// Hash collision detection function
				collisionDetection := func(k, v uint64) bool {
					var doc interface{}
					if src.Read(v, &doc) != nil {
						return false
					}
					for _, v := range GetIn(doc, vecPath) {
						if fmt.Sprint(v) == lookupStrValue {
							return true
						}
					}
					return false
				}
				_, vals := ht.Get(hashValue, uint64(intLimit), collisionDetection)
				for _, docID := range vals {
					if intLimit != 0 && counter == intLimit {
						break
					}
					counter += 1
					(*result)[docID] = struct{}{}
				}
			}
		} else {
			// Backward scan - from high value to low value
			for lookupValue := from; lookupValue >= to; lookupValue-- {
				lookupStrValue := fmt.Sprint(lookupValue)
				hashValue := StrHash(lookupStrValue)
				collisionDetection := func(k, v uint64) bool {
					var doc interface{}
					if src.Read(v, &doc) != nil {
						return false
					}
					for _, v := range GetIn(doc, vecPath) {
						if fmt.Sprint(v) == lookupStrValue {
							return true
						}
					}
					return false
				}
				_, vals := ht.Get(hashValue, uint64(intLimit), collisionDetection)
				for _, docID := range vals {
					if intLimit != 0 && counter == intLimit {
						break
					}
					counter += 1
					(*result)[docID] = struct{}{}
				}
			}
		}
	} else {
		// Fall back to collection scan, when index is not available
		tdlog.Printf("Query %v is a collection scan which can be *very* inefficient, also query \"limit\" and reverse range support is unavailable!", expr)
		// Reversed range cannot be supported, sorry
		if to < from {
			tmp := from
			from = to
			to = tmp
		}
		counter := int(0)
		docMatcher := func(id uint64, doc interface{}) bool {
			for _, v := range GetIn(doc, vecPath) {
				if floatV, ok := v.(float64); ok {
					if intV := int(floatV); intV <= to && intV >= from {
						(*result)[id] = struct{}{}
						counter += 1
						return counter != intLimit
					}
				}
			}
			return true
		}
		src.ForAll(docMatcher)
	}
	return
}
Example #21
0
// Flush all collection data and index files.
func (db *DB) Flush() {
	for _, col := range db.StrCol {
		col.Flush()
	}
	tdlog.Printf("All buffers flushed (database %s)", db.Dir)
}