// Pause all activities and make a dump of entire database to another file system location. func Dump(w http.ResponseWriter, r *http.Request) { w.Header().Set("Cache-Control", "must-revalidate") w.Header().Set("Content-Type", "application/json") var dest string if !Require(w, r, "dest", &dest) { return } // Note that symbol links are skipped! walkFun := func(currPath string, info os.FileInfo, err error) error { if info.IsDir() { // Calculate directory path at destination and create it relPath, err := filepath.Rel(V3DB.Dir, currPath) if err != nil { return err } destDir := path.Join(dest, relPath) if err := os.MkdirAll(destDir, 0700); err != nil { return err } tdlog.Printf("Dump created directory %s with permission 0700", destDir) } else { // Open the file to be copied (collection data/index) src, err := os.Open(currPath) if err != nil { return err } // Calculate file path at destination and create it relPath, err := filepath.Rel(V3DB.Dir, currPath) if err != nil { return err } destPath := path.Join(dest, relPath) destFile, err := os.Create(destPath) if err != nil { return err } // Copy from source to destination written, err := io.Copy(destFile, src) if err != nil { return err } tdlog.Printf("Dump create file %s with permission 666 (before umask), size is %d", destPath, written) } return nil } V3Sync.Lock() defer V3Sync.Unlock() V3DB.Flush() err := filepath.Walk(V3DB.Dir, walkFun) if err != nil { http.Error(w, fmt.Sprint(err), 500) } }
// Resolve the attribute(s) in the document structure along the given path. func GetIn(doc interface{}, path []string) (ret []interface{}) { docMap, ok := doc.(map[string]interface{}) if !ok { tdlog.Printf("%v cannot be indexed because type conversation to map[string]interface{} failed", doc) return } var thing interface{} = docMap // Get into each path segment for i, seg := range path { if aMap, ok := thing.(map[string]interface{}); ok { thing = aMap[seg] } else if anArray, ok := thing.([]interface{}); ok { for _, element := range anArray { ret = append(ret, GetIn(element, path[i:])...) } } else { return nil } } switch thing.(type) { case []interface{}: return append(ret, thing.([]interface{})...) default: return append(ret, thing) } }
func Start(db *db.DB, port int) { V3DB = db // collection management (synchronized) http.HandleFunc("/create", Create) http.HandleFunc("/rename", Rename) http.HandleFunc("/drop", Drop) http.HandleFunc("/all", All) http.HandleFunc("/scrub", Scrub) http.HandleFunc("/flush", Flush) // query (asynchronized) http.HandleFunc("/query", Query) http.HandleFunc("/queryID", QueryID) http.HandleFunc("/count", Count) // document management (asynchronized) http.HandleFunc("/insert", Insert) http.HandleFunc("/get", Get) http.HandleFunc("/update", Update) http.HandleFunc("/delete", Delete) // document management (with UID, asynchronized) http.HandleFunc("/insertWithUID", InsertWithUID) http.HandleFunc("/getByUID", GetByUID) http.HandleFunc("/updateByUID", UpdateByUID) http.HandleFunc("/reassignUID", ReassignUID) http.HandleFunc("/deleteByUID", DeleteByUID) // index management (synchronized) http.HandleFunc("/index", Index) http.HandleFunc("/indexes", Indexes) http.HandleFunc("/unindex", Unindex) // misc (synchronized) http.HandleFunc("/shutdown", Shutdown) http.HandleFunc("/dump", Dump) // misc (asynchronized) http.HandleFunc("/version", Version) http.HandleFunc("/memstats", MemStats) // flush all buffers every minute go func() { ticker := time.Tick(time.Minute) for _ = range ticker { V3DB.Flush() tdlog.Printf("Buffers flushed at %s", time.Now()) } }() tdlog.Printf("Listening on all interfaces, port %d", port) http.ListenAndServe(fmt.Sprintf(":%d", port), nil) }
// Open a chunk. func OpenChunk(number int, baseDir string) (chunk *ChunkCol, err error) { // Create the directory if it does not yet exist if err = os.MkdirAll(baseDir, 0700); err != nil { return } tdlog.Printf("Opening chunk %s", baseDir) chunk = &ChunkCol{Number: number, BaseDir: baseDir} // Open collection document data file tdlog.Printf("Opening collection data file %s", DAT_FILENAME_MAGIC) if chunk.Data, err = chunkfile.OpenCol(path.Join(baseDir, DAT_FILENAME_MAGIC)); err != nil { return } // Open PK hash table tdlog.Printf("Opening PK hash table file %s", PK_FILENAME_MAGIC) if chunk.PK, err = chunkfile.OpenHash(path.Join(baseDir, PK_FILENAME_MAGIC), []string{uid.PK_NAME}); err != nil { return } return }
// Close the collection. func (col *Col) Close() { // Close chunks for _, dest := range col.Chunks { dest.Close() } // Close secondary indexes for _, index := range col.SecIndexes { for _, part := range index { part.File.Close() } } tdlog.Printf("Collection %s is closed", col.BaseDir) }
// Open the file, or create it if non-existing. func Open(name string, growth uint64) (file *File, err error) { if growth < 1 { err = errors.New(fmt.Sprintf("Growth size (%d) is too small (opening %s)", growth, name)) } file = &File{Name: name, Growth: growth} // Open file (get a handle) and determine its size if file.Fh, err = os.OpenFile(name, os.O_CREATE|os.O_RDWR, 0600); err != nil { return } fsize, err := file.Fh.Seek(0, os.SEEK_END) if err != nil { return } file.Size = uint64(fsize) if file.Size == 0 { // Grow the file if it appears too small file.CheckSizeAndEnsure(file.Growth) return } // Map the file into memory buffer if file.Buf, err = gommap.Map(file.Fh, gommap.RDWR, 0); err != nil { return } // Bi-sect file buffer to find out how much space in the file is actively in-use for low, mid, high := uint64(0), file.Size/2, file.Size; ; { switch { case high-mid == 1: if ConsecutiveTwenty0s(file.Buf[mid:]) { if ConsecutiveTwenty0s(file.Buf[mid-1:]) { file.UsedSize = mid - 1 } else { file.UsedSize = mid } return } file.UsedSize = high return case ConsecutiveTwenty0s(file.Buf[mid:]): high = mid mid = low + (mid-low)/2 default: low = mid mid = mid + (high-mid)/2 } } tdlog.Printf("%s has %d bytes out of %d bytes in-use", name, file.UsedSize, file.Size) return }
func Query(w http.ResponseWriter, r *http.Request) { w.Header().Set("Cache-Control", "must-revalidate") w.Header().Set("Content-Type", "text/plain") var col, q string if !Require(w, r, "col", &col) { return } if !Require(w, r, "q", &q) { return } var qJson interface{} if err := json.Unmarshal([]byte(q), &qJson); err != nil { http.Error(w, fmt.Sprintf("'%v' is not valid JSON.", q), 400) return } V3Sync.RLock() defer V3Sync.RUnlock() dbcol := V3DB.Use(col) if dbcol == nil { http.Error(w, fmt.Sprintf("Collection '%s' does not exist.", col), 400) return } // evaluate the query queryResult := make(map[uint64]struct{}) if err := db.EvalQueryV2(qJson, dbcol, &queryResult); err != nil { http.Error(w, fmt.Sprint(err), 400) return } // write each document on a new line for k := range queryResult { var doc interface{} dbcol.Read(k, &doc) if doc == nil { continue } resp, err := json.Marshal(doc) if err != nil { tdlog.Printf("Query returned invalid JSON '%v'", doc) continue } w.Write([]byte(string(resp) + "\r\n")) } }
// Open a hash table file. func OpenHash(name string, hashBits, perBucket uint64) (ht *HashTable, err error) { if hashBits < 2 || perBucket < 2 { return nil, errors.New(fmt.Sprintf("ERROR: Hash table is too small (%d hash bits, %d per bucket)", hashBits, perBucket)) } file, err := Open(name, HASH_TABLE_GROWTH) if err != nil { return } // Devide hash file into regions (each bucket belongs to only one region), make one RW mutex per region. rwMutexes := make([]*sync.RWMutex, file.Size/HASH_TABLE_REGION_SIZE+1) for i := range rwMutexes { rwMutexes[i] = new(sync.RWMutex) } ht = &HashTable{File: file, HashBits: hashBits, PerBucket: perBucket, tableGrowMutex: sync.Mutex{}, regionRWMutex: rwMutexes} ht.BucketSize = BUCKET_HEADER_SIZE + ENTRY_SIZE*perBucket // Find out how many buckets there are in table - hence the amount of used space // .. assume the entire file is Full of buckets ht.File.UsedSize = ht.File.Size ht.NumBuckets = ht.File.Size / ht.BucketSize // .. starting from every head bucket, find the longest chain ht.InitialBuckets = uint64(math.Pow(2, float64(hashBits))) longestBucketChain := ht.InitialBuckets for i := uint64(0); i < ht.InitialBuckets; i++ { lastBucket := ht.lastBucket(i) if lastBucket+1 > longestBucketChain && lastBucket+1 <= ht.NumBuckets { longestBucketChain = lastBucket + 1 } } // .. the longest chain tells amount of used space ht.NumBuckets = longestBucketChain usedSize := ht.NumBuckets * ht.BucketSize // Grow the file, if it is not yet large enough for all the buckets used if usedSize > ht.File.Size { ht.File.UsedSize = ht.File.Size ht.File.CheckSizeAndEnsure(((usedSize-ht.File.Size)/ht.BucketSize + 1) * ht.BucketSize) } ht.File.UsedSize = usedSize tdlog.Printf("%s has %d initial buckets, %d buckets, and %d bytes out of %d bytes in-use", name, ht.InitialBuckets, ht.NumBuckets, ht.File.UsedSize, ht.File.Size) return ht, nil }
// Ensure that the file has enough room for more data. Grow the file if necessary. func (file *File) CheckSizeAndEnsure(more uint64) { if file.UsedSize+more <= file.Size { return } // Grow the file - unmap the file, truncate and then re-map var err error if file.Buf != nil { if err = file.Buf.Unmap(); err != nil { panic(err) } } if err = os.Truncate(file.Name, int64(file.Size+file.Growth)); err != nil { panic(err) } if file.Buf, err = gommap.Map(file.Fh, gommap.RDWR, 0); err != nil { panic(err) } file.Size += file.Growth tdlog.Printf("File %s has grown %d bytes\n", file.Name, file.Growth) file.CheckSizeAndEnsure(more) }
// Open a database. func OpenDB(dir string) (db *DB, err error) { if err = os.MkdirAll(dir, 0700); err != nil { return } db = &DB{Dir: dir, StrCol: make(map[string]*Col)} files, err := ioutil.ReadDir(dir) if err != nil { return } // Try to open sub-directory as document collection for _, f := range files { if f.IsDir() { if db.StrCol[f.Name()], err = OpenCol(path.Join(dir, f.Name())); err != nil { tdlog.Errorf("ERROR: Failed to open collection %s, reason: %v", f.Name(), err) } else { tdlog.Printf("Successfully opened collection %s", f.Name()) } } } return }
// Open a collection (made of chunks). func OpenCol(baseDir string, numChunks int) (col *Col, err error) { // Create the directory if it does not yet exist if err = os.MkdirAll(baseDir, 0700); err != nil { return } col = &Col{BaseDir: baseDir, NumChunks: numChunks, NumChunksI64: uint64(numChunks), SecIndexes: make(map[string][]*chunkfile.HashTable), Chunks: make([]*chunk.ChunkCol, numChunks), ChunkMutexes: make([]*sync.RWMutex, numChunks)} // Open each chunk for i := 0; i < numChunks; i++ { col.Chunks[i], err = chunk.OpenChunk(i, path.Join(baseDir, CHUNK_DIRNAME_MAGIC+strconv.Itoa(int(i)))) if err != nil { panic(err) } col.ChunkMutexes[i] = &sync.RWMutex{} } // Look for hash table directories walker := func(currPath string, info os.FileInfo, err2 error) error { if err2 != nil { // log and skip the error tdlog.Error(err) return nil } if info.IsDir() { switch { case strings.HasPrefix(info.Name(), HASHTABLE_DIRNAME_MAGIC): // Found a hashtable index tdlog.Printf("Opening collection index hashtable %s", info.Name()) // Figure out indexed path indexPath := strings.Split(info.Name()[len(HASHTABLE_DIRNAME_MAGIC):], INDEX_PATH_SEP) // Open a hash table index and put it into collection structure col.openIndex(indexPath, path.Join(baseDir, info.Name())) } } return nil } err = filepath.Walk(baseDir, walker) return }
// Ensure that the file has enough room for more data. Grow the file if necessary. func (file *File) CheckSizeAndEnsure(more uint64) { if file.UsedSize+more <= file.Size { return } // Unmap file buffer var err error if file.Buf != nil { if err = file.Buf.Unmap(); err != nil { panic(err) } } if _, err = file.Fh.Seek(0, os.SEEK_END); err != nil { panic(err) } // Grow file size (incrementally) zeroBuf := make([]byte, FILE_GROWTH_INCREMENTAL) for i := uint64(0); i < file.Growth; i += FILE_GROWTH_INCREMENTAL { var slice []byte if i+FILE_GROWTH_INCREMENTAL > file.Growth { slice = zeroBuf[0 : i+FILE_GROWTH_INCREMENTAL-file.Growth] } else { slice = zeroBuf } if _, err = file.Fh.Write(slice); err != nil { panic(err) } } if err = file.Fh.Sync(); err != nil { panic(err) } // Re-map the (now larger) file buffer if file.Buf, err = gommap.Map(file.Fh, gommap.RDWR, 0); err != nil { panic(err) } file.Size += file.Growth tdlog.Printf("File %s has grown %d bytes\n", file.Name, file.Growth) file.CheckSizeAndEnsure(more) }
func OpenDB(baseDir string) (db *DB, err error) { if err = os.MkdirAll(baseDir, 0700); err != nil { return } db = &DB{BaseDir: baseDir, StrCol: make(map[string]*Col)} files, err := ioutil.ReadDir(baseDir) if err != nil { return } // Try to open sub-directory as document collection for _, f := range files { if f.IsDir() { // Figure out how many chunks there are in the collection var numchunksFH *os.File numchunksFH, err = os.OpenFile(path.Join(baseDir, f.Name(), NUMCHUNKS_FILENAME), os.O_CREATE|os.O_RDWR, 0600) defer numchunksFH.Close() if err != nil { return } numchunksContent, err := ioutil.ReadAll(numchunksFH) if err != nil { panic(err) } numchunks, err := strconv.Atoi(string(numchunksContent)) if err != nil || numchunks < 1 { panic(fmt.Sprintf("Cannot figure out number of chunks for collection %s, manually repair it maybe? %v", baseDir, err)) } // Open the directory as a collection if db.StrCol[f.Name()], err = OpenCol(path.Join(baseDir, f.Name()), numchunks); err != nil { tdlog.Errorf("ERROR: Failed to open collection %s, error: %v", f.Name(), err) } else { tdlog.Printf("Successfully opened collection %s", f.Name()) } } } return }
// Overwrite the file with 0s and return to its initial size. func (file *File) Clear() { var err error if err = file.Close(); err != nil { panic(err) } // Shrink to 0 size, then enlarge if err = os.Truncate(file.Name, int64(0)); err != nil { panic(err) } if err = os.Truncate(file.Name, int64(file.Growth)); err != nil { panic(err) } // Re-open and reset current size if file.Fh, err = os.OpenFile(file.Name, os.O_CREATE|os.O_RDWR, 0600); err != nil { panic(err) } if file.Buf, err = gommap.Map(file.Fh, gommap.RDWR, 0); err != nil { panic(err) } file.UsedSize = 0 file.Size = file.Growth tdlog.Printf("File %s has been cleared, and the size is now %d", file.Name, file.Size) }
// Calculate used size, total size, total number of buckets. func (ht *HashTable) calculateSizeInfo() { // Find out how many buckets there are in table - hence the amount of used space // .. assume the entire file is Full of buckets ht.File.UsedSize = ht.File.Size ht.NumBuckets = ht.File.Size / BUCKET_SIZE // .. starting from every head bucket, find the longest chain longestBucketChain := INITIAL_BUCKETS for i := uint64(0); i < INITIAL_BUCKETS; i++ { lastBucket := ht.lastBucket(i) if lastBucket+1 > longestBucketChain && lastBucket+1 <= ht.NumBuckets { longestBucketChain = lastBucket + 1 } } // .. the longest chain tells amount of used space ht.NumBuckets = longestBucketChain usedSize := ht.NumBuckets * BUCKET_SIZE // Grow the file, if it is not yet large enough for all the buckets used if usedSize > ht.File.Size { ht.File.UsedSize = ht.File.Size ht.File.CheckSizeAndEnsure(((usedSize-ht.File.Size)/BUCKET_SIZE + 1) * BUCKET_SIZE) } ht.File.UsedSize = usedSize tdlog.Printf("%s has %d buckets, and %d bytes out of %d bytes in-use", ht.File.Name, ht.NumBuckets, ht.File.UsedSize, ht.File.Size) }
func main() { rand.Seed(time.Now().UTC().UnixNano()) var err error var defaultMaxprocs int if defaultMaxprocs, err = strconv.Atoi(os.Getenv("GOMAXPROCS")); err != nil { defaultMaxprocs = runtime.NumCPU() * 2 } // Parse CLI parameters var mode, dir string var port, maxprocs, benchSize int var profile bool flag.StringVar(&mode, "mode", "", "http|bench|bench2|bench3|example]") flag.StringVar(&dir, "dir", "", "database directory") flag.IntVar(&port, "port", 0, "listening port number") flag.IntVar(&maxprocs, "gomaxprocs", defaultMaxprocs, "GOMAXPROCS") flag.IntVar(&benchSize, "benchsize", 400000, "Benchmark sample size") flag.BoolVar(&profile, "profile", false, "write profiler results to prof.out") flag.BoolVar(&tdlog.VerboseLog, "verbose", true, "verbose logging true/false (default is true)") flag.Parse() // User must specify a mode to run if mode == "" { flag.PrintDefaults() return } // Setup appropriate GOMAXPROCS parameter runtime.GOMAXPROCS(maxprocs) tdlog.Printf("GOMAXPROCS is set to %d", maxprocs) if maxprocs < runtime.NumCPU() { tdlog.Printf("GOMAXPROCS (%d) is less than number of CPUs (%d), this may affect performance. You can change it via environment variable GOMAXPROCS or by passing CLI parameter -gomaxprocs", maxprocs, runtime.NumCPU()) } // Start profiler if enabled if profile { resultFile, err := os.Create("perf.out") if err != nil { tdlog.Panicf("Cannot create profiler result file %s", resultFile) } pprof.StartCPUProfile(resultFile) defer pprof.StopCPUProfile() } switch mode { case "http": // Run HTTP service (API V3) if dir == "" { tdlog.Fatal("Please specify database directory, for example -dir=/tmp/db") } if port == 0 { tdlog.Fatal("Please specify port number, for example -port=8080") } db, err := db.OpenDB(dir) if err != nil { tdlog.Fatal(err) } v3.Start(db, port) case "bench": // Benchmark scenarios benchmark(benchSize) case "bench2": benchmark2(benchSize) case "bench3": benchmark3(benchSize) case "example": // Embedded usage example embeddedExample() default: flag.PrintDefaults() return } }
// Close all collections. func (db *DB) Close() { for _, col := range db.StrCol { col.Close() } tdlog.Printf("Database closed (%s)", db.Dir) }
// Scan hash table or collection documents using an integer range. func IntRange(intFrom interface{}, expr map[string]interface{}, src *Col, result *map[uint64]struct{}) (err error) { path, hasPath := expr["in"] if !hasPath { return errors.New("Missing path `in`") } // Figure out the path vecPath := make([]string, 0) if vecPathInterface, ok := path.([]interface{}); ok { for _, v := range vecPathInterface { vecPath = append(vecPath, fmt.Sprint(v)) } } else { return errors.New(fmt.Sprintf("Expecting vector path `in`, but %v given", path)) } if vecPath[0] == uid.PK_NAME { return errors.New("_pk is the primary index, integer range scan on _pk is meaningless") } // Figure out result number limit intLimit := int(0) if limit, hasLimit := expr["limit"]; hasLimit { if floatLimit, ok := limit.(float64); ok { intLimit = int(floatLimit) } else { return errors.New(fmt.Sprintf("Expecting `limit` as a number, but %v given", limit)) } } // Figure out the range ("from" value & "to" value) from, to := int(0), int(0) if floatFrom, ok := intFrom.(float64); ok { from = int(floatFrom) } else { return errors.New(fmt.Sprintf("Expecting `int-from` as an integer, but %v given", from)) } if intTo, ok := expr["int-to"]; ok { if floatTo, ok := intTo.(float64); ok { to = int(floatTo) } else { return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to)) } } else if intTo, ok := expr["int to"]; ok { if floatTo, ok := intTo.(float64); ok { to = int(floatTo) } else { return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to)) } } else { return errors.New(fmt.Sprintf("Missing `int-to`")) } if to > from && to-from > 1000 || from > to && from-to > 1000 { tdlog.Printf("Query %v is an index lookup of more than 1000 values, which may be inefficient", expr) } counter := int(0) // Number of results already collected htPath := strings.Join(vecPath, ",") if _, indexScan := src.SecIndexes[htPath]; indexScan { // Use index scan if it is available if from < to { // Forward scan - from low value to high value for lookupValue := from; lookupValue <= to; lookupValue++ { lookupStrValue := fmt.Sprint(lookupValue) hashValue := chunk.StrHash(lookupStrValue) _, vals := src.HashScan(htPath, hashValue, uint64(intLimit)) for _, docID := range vals { if intLimit > 0 && counter == intLimit { break } counter += 1 (*result)[docID] = struct{}{} } } } else { // Backward scan - from high value to low value for lookupValue := from; lookupValue >= to; lookupValue-- { lookupStrValue := fmt.Sprint(lookupValue) hashValue := chunk.StrHash(lookupStrValue) _, vals := src.HashScan(htPath, hashValue, uint64(intLimit)) for _, docID := range vals { if intLimit > 0 && counter == intLimit { break } counter += 1 (*result)[docID] = struct{}{} } } } } else { return errors.New(fmt.Sprintf("Please index %v and retry query %v", vecPath, expr)) } return }
// Execute value equity check ("attribute == value") using hash lookup or collection scan. func V2Lookup(lookupValue interface{}, expr map[string]interface{}, src *Col, result *map[uint64]struct{}) (err error) { // Figure out lookup path - JSON array "in" path, hasPath := expr["in"] if !hasPath { return errors.New("Missing lookup path `in`") } vecPath := make([]string, 0) if vecPathInterface, ok := path.([]interface{}); ok { for _, v := range vecPathInterface { vecPath = append(vecPath, fmt.Sprint(v)) } } else { return errors.New(fmt.Sprintf("Expecting vector lookup path `in`, but %v given", path)) } // Figure out result number limit intLimit := uint64(0) if limit, hasLimit := expr["limit"]; hasLimit { if floatLimit, ok := limit.(float64); ok { intLimit = uint64(floatLimit) } else { return errors.New(fmt.Sprintf("Expecting `limit` as a number, but %v given", limit)) } } lookupStrValue := fmt.Sprint(lookupValue) // the value to match if ht, indexScan := src.StrHT[strings.Join(vecPath, ",")]; indexScan { // If index is available, do index scan // Hash collision detection function collisionDetection := func(k, v uint64) bool { var doc interface{} if src.Read(v, &doc) != nil { return false } // Actually get inside the document and match the value for _, v := range GetIn(doc, vecPath) { if fmt.Sprint(v) == lookupStrValue { return true } } return false } hashValue := StrHash(lookupStrValue) // Do hash scan _, scanResult := ht.Get(hashValue, intLimit, collisionDetection) for _, docID := range scanResult { (*result)[docID] = struct{}{} } } else { // Do collection scan, when index is not available tdlog.Printf("Query %v is a collection scan, which may be inefficient", expr) counter := uint64(0) docMatcher := func(id uint64, doc interface{}) bool { // Get inside each document and find match for _, v := range GetIn(doc, vecPath) { if fmt.Sprint(v) == lookupStrValue { (*result)[id] = struct{}{} counter += 1 return counter != intLimit } } return true } src.ForAll(docMatcher) } return }
// Scan hash table or collection documents using an integer range. func V2IntRange(intFrom interface{}, expr map[string]interface{}, src *Col, result *map[uint64]struct{}) (err error) { path, hasPath := expr["in"] if !hasPath { return errors.New("Missing path `in`") } // Figure out the path vecPath := make([]string, 0) if vecPathInterface, ok := path.([]interface{}); ok { for _, v := range vecPathInterface { vecPath = append(vecPath, fmt.Sprint(v)) } } else { return errors.New(fmt.Sprintf("Expecting vector path `in`, but %v given", path)) } // Figure out result number limit intLimit := int(0) if limit, hasLimit := expr["limit"]; hasLimit { if floatLimit, ok := limit.(float64); ok { intLimit = int(floatLimit) } else { return errors.New(fmt.Sprintf("Expecting `limit` as a number, but %v given", limit)) } } // Figure out the range ("from" value & "to" value) from, to := int(0), int(0) if floatFrom, ok := intFrom.(float64); ok { from = int(floatFrom) } else { return errors.New(fmt.Sprintf("Expecting `int-from` as an integer, but %v given", from)) } if intTo, ok := expr["int-to"]; ok { if floatTo, ok := intTo.(float64); ok { to = int(floatTo) } else { return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to)) } } else if intTo, ok := expr["int to"]; ok { if floatTo, ok := intTo.(float64); ok { to = int(floatTo) } else { return errors.New(fmt.Sprintf("Expecting `int-to` as an integer, but %v given", to)) } } else { return errors.New(fmt.Sprintf("Missing `int-to`")) } if to > from && to-from > 1000 || from > to && from-to > 1000 { tdlog.Printf("Query %v is an index lookup of more than 1000 values, which may be inefficient", expr) } counter := int(0) // Number of results already collected if ht, indexScan := src.StrHT[strings.Join(vecPath, ",")]; indexScan { // Use index scan if it is available if from < to { // Forward scan - from low value to high value for lookupValue := from; lookupValue <= to; lookupValue++ { lookupStrValue := fmt.Sprint(lookupValue) hashValue := StrHash(lookupStrValue) // Hash collision detection function collisionDetection := func(k, v uint64) bool { var doc interface{} if src.Read(v, &doc) != nil { return false } for _, v := range GetIn(doc, vecPath) { if fmt.Sprint(v) == lookupStrValue { return true } } return false } _, vals := ht.Get(hashValue, uint64(intLimit), collisionDetection) for _, docID := range vals { if intLimit != 0 && counter == intLimit { break } counter += 1 (*result)[docID] = struct{}{} } } } else { // Backward scan - from high value to low value for lookupValue := from; lookupValue >= to; lookupValue-- { lookupStrValue := fmt.Sprint(lookupValue) hashValue := StrHash(lookupStrValue) collisionDetection := func(k, v uint64) bool { var doc interface{} if src.Read(v, &doc) != nil { return false } for _, v := range GetIn(doc, vecPath) { if fmt.Sprint(v) == lookupStrValue { return true } } return false } _, vals := ht.Get(hashValue, uint64(intLimit), collisionDetection) for _, docID := range vals { if intLimit != 0 && counter == intLimit { break } counter += 1 (*result)[docID] = struct{}{} } } } } else { // Fall back to collection scan, when index is not available tdlog.Printf("Query %v is a collection scan which can be *very* inefficient, also query \"limit\" and reverse range support is unavailable!", expr) // Reversed range cannot be supported, sorry if to < from { tmp := from from = to to = tmp } counter := int(0) docMatcher := func(id uint64, doc interface{}) bool { for _, v := range GetIn(doc, vecPath) { if floatV, ok := v.(float64); ok { if intV := int(floatV); intV <= to && intV >= from { (*result)[id] = struct{}{} counter += 1 return counter != intLimit } } } return true } src.ForAll(docMatcher) } return }
// Flush all collection data and index files. func (db *DB) Flush() { for _, col := range db.StrCol { col.Flush() } tdlog.Printf("All buffers flushed (database %s)", db.Dir) }