// Scan the entire data file, look for documents and invoke the function on each. func (col *ColFile) ForAll(fun func(id uint64, doc []byte) bool) { addr := uint64(0) for { if col.File.UsedSize < DOC_HEADER_SIZE || addr >= col.File.UsedSize-DOC_HEADER_SIZE { break } // Read document header - validity and room validity := col.File.Buf[addr] room, _ := binary.Uvarint(col.File.Buf[addr+1 : addr+11]) if validity != DOC_VALID && validity != DOC_INVALID || room > DOC_MAX_ROOM { // If the document does not contain valid header, skip it tdlog.Errorf("ERROR: The document at %d in %s is corrupted", addr, col.File.Name) // Move forward until we meet a valid document header for addr++; col.File.Buf[addr] != DOC_VALID && col.File.Buf[addr] != DOC_INVALID && addr < col.File.UsedSize-DOC_HEADER_SIZE; addr++ { } tdlog.Errorf("ERROR: Corrupted document skipped, now at %d", addr) continue } // If the function returns false, do not continue scanning if validity == DOC_VALID && !fun(addr, col.File.Buf[addr+DOC_HEADER_SIZE:addr+DOC_HEADER_SIZE+room]) { break } addr += DOC_HEADER_SIZE + room } }
// Close the collection. func (col *ChunkCol) Close() { if err := col.Data.File.Close(); err != nil { tdlog.Errorf("Failed to close %s, reason: %v", col.Data.File.Name, err) } if err := col.PK.File.Close(); err != nil { tdlog.Errorf("Failed to close %s, reason: %v", col.PK.File.Name, err) } }
// Close the collection. func (col *Col) Close() { if err := col.Data.File.Close(); err != nil { tdlog.Errorf("ERROR: Failed to close %s, reason: %v", col.Data.File.Name, err) } for _, ht := range col.StrHT { if err := ht.File.Close(); err != nil { tdlog.Errorf("ERROR: Failed to close %s, reason: %v", ht.File.Name, err) } } }
// Flush collection data and index files. func (col *ChunkCol) Flush() (err error) { if err = col.Data.File.Flush(); err != nil { tdlog.Errorf("Failed to flush %s, reason: %v", col.Data.File.Name, err) return } if err = col.PK.File.Flush(); err != nil { tdlog.Errorf("Failed to flush %s, reason: %v", col.PK.File.Name, err) return } return }
// Flush collection data and index files. func (col *Col) Flush() error { if err := col.Data.File.Flush(); err != nil { tdlog.Errorf("ERROR: Failed to flush %s, reason: %v", col.Data.File.Name, err) return err } for _, ht := range col.StrHT { if err := ht.File.Flush(); err != nil { tdlog.Errorf("ERROR: Failed to flush %s, reason: %v", ht.File.Name, err) return err } } return nil }
// Update a document, return its new ID. func (col *Col) Update(id uint64, doc interface{}) (newID uint64, err error) { data, err := json.Marshal(doc) if err != nil { return } // Read the original document oldData := col.Data.Read(id) if oldData == nil { return id, errors.New(fmt.Sprintf("Document %d does not exist in %s", id, col.Dir)) } // Remove the original document from indexes var oldDoc interface{} if err = json.Unmarshal(oldData, &oldDoc); err == nil { col.UnindexDoc(id, oldDoc) } else { tdlog.Errorf("ERROR: The original document %d in %s is corrupted, this update will attempt to overwrite it", id, col.Dir) } // Update document data if newID, err = col.Data.Update(id, data); err != nil { return } // Index updated document col.IndexDoc(newID, doc) return }
// Scan the entire data file, look for documents and invoke the function on each. func (col *ColFile) ForAll(fun func(id uint64, doc []byte) bool) { addr := uint64(0) for { if col.File.UsedSize < DOC_HEADER || addr >= col.File.UsedSize-DOC_HEADER { break } // Lock down document region region := addr / COL_FILE_REGION_SIZE mutex := col.regionRWMutex[region] mutex.RLock() // Read document header - validity and room validity := col.File.Buf[addr] room, _ := binary.Uvarint(col.File.Buf[addr+1 : addr+11]) if validity != DOC_VALID && validity != DOC_INVALID || room > DOC_MAX_ROOM { // If the document does not contain valid header, skip it mutex.RUnlock() tdlog.Errorf("ERROR: The document at %d in %s is corrupted", addr, col.File.Name) // Move forward until we meet a valid document header for addr++; col.File.Buf[addr] != DOC_VALID && col.File.Buf[addr] != DOC_INVALID && addr < col.File.UsedSize-DOC_HEADER; addr++ { } continue } // If the function returns false, do not continue scanning if validity == DOC_VALID && !fun(addr, col.File.Buf[addr+DOC_HEADER:addr+DOC_HEADER+room]) { mutex.RUnlock() break } mutex.RUnlock() addr += DOC_HEADER + room } }
// Flush all collection data and index files. func (db *DB) Flush() { for _, col := range db.StrCol { if err := col.Flush(); err != nil { tdlog.Errorf("Error during database flush: %v", err) } } }
// Update a document by physical ID, return its new physical ID. func (col *ChunkCol) Update(id uint64, doc map[string]interface{}) (newID uint64, err error) { data, err := json.Marshal(doc) if err != nil { return } // Read the original document oldData := col.Data.Read(id) if oldData == nil { err = errors.New(fmt.Sprintf("Document %d does not exist in %s", id, col.BaseDir)) return } // Remove the original document from indexes var oldDoc map[string]interface{} if err = json.Unmarshal(oldData, &oldDoc); err == nil { col.PK.Remove(uint64(uid.PKOfDoc(oldDoc, false)), id) } else { tdlog.Errorf("ERROR: The original document %d in %s is corrupted, this update will attempt to overwrite it", id, col.BaseDir) } // Update document data if newID, err = col.Data.Update(id, data); err != nil { return } // Index updated document col.PK.Put(uint64(uid.PKOfDoc(doc, true)), newID) return }
// Return the number (not address) of next chained bucket, 0 if there is not any. func (ht *HashTable) NextBucket(bucket uint64) uint64 { if bucket >= ht.NumBuckets { return 0 } bucketAddr := bucket * BUCKET_SIZE if next, _ := binary.Uvarint(ht.File.Buf[bucketAddr : bucketAddr+BUCKET_HEADER_SIZE]); next == 0 { return 0 } else if next <= bucket { tdlog.Errorf("ERROR: Bucket loop in hash table %s at bucket no.%d, address %d", ht.File.Name, bucket, bucketAddr) return 0 } else if next >= ht.NumBuckets || next < INITIAL_BUCKETS { tdlog.Errorf("ERROR: Bad bucket refernece (%d is out of range %d - %d) in %s", next, INITIAL_BUCKETS, ht.NumBuckets, ht.File.Name) return 0 } else { return next } }
// Deserialize each document and invoke the function on the deserialized docuemnt (Collection Scsn). func (col *Col) ForAll(fun func(id uint64, doc interface{}) bool) { col.Data.ForAll(func(id uint64, data []byte) bool { var parsed interface{} if err := json.Unmarshal(data, &parsed); err != nil { tdlog.Errorf("ERROR: Cannot parse document %d in %s to JSON", id, col.Dir) return true } else { return fun(id, parsed) } }) }
// Change the number of partitions in collection func (db *DB) Repartition(name string, newNumber int) (counter uint64, err error) { counterMutex := &sync.Mutex{} target := db.Use(name) if target == nil { return 0, errors.New(fmt.Sprintf("Collection %s does not exist in %s", name, db.BaseDir)) } if newNumber < 1 { return 0, errors.New(fmt.Sprintf("New number of partitions must be above 0, %d given", newNumber)) } // Create a temporary collection tempName := fmt.Sprintf("temp-%s-%v", name, time.Now().Unix()) db.Create(tempName, newNumber) temp := db.Use(tempName) // Recreate secondary indexes for _, index := range target.SecIndexes { temp.Index(index[0].Path) } // Reinsert documents target.ForAll(func(id uint64, doc map[string]interface{}) bool { if err := temp.InsertRecovery(id, doc); err == nil { counterMutex.Lock() counter += 1 counterMutex.Unlock() } else { tdlog.Errorf("Failed to recover document %v", doc) } return true }) // Drop the old collection and rename the recovery collection if err = db.Drop(name); err != nil { tdlog.Errorf("Scrub operation failed to drop original collection %s: %v", name, err) return } if err = db.Rename(tempName, name); err != nil { tdlog.Errorf("Scrub operation failed to rename recovery collection %s: %v", tempName, err) } return }
// Deserialize each document and invoke the function on the deserialized document (Collection Scan). func (col *ChunkCol) ForAll(fun func(id uint64, doc map[string]interface{}) bool) { col.Data.ForAll(func(id uint64, data []byte) bool { var parsed map[string]interface{} if err := json.Unmarshal(data, &parsed); err != nil || parsed == nil { tdlog.Errorf("Cannot parse document %d in %s to JSON", id, col.BaseDir) return true } else { persistID := uid.PKOfDoc(parsed, false) // Skip documents without valid PK if persistID < 0 { return true } return fun(persistID, parsed) } }) }
// Open a database. func OpenDB(dir string) (db *DB, err error) { if err = os.MkdirAll(dir, 0700); err != nil { return } db = &DB{Dir: dir, StrCol: make(map[string]*Col)} files, err := ioutil.ReadDir(dir) if err != nil { return } // Try to open sub-directory as document collection for _, f := range files { if f.IsDir() { if db.StrCol[f.Name()], err = OpenCol(path.Join(dir, f.Name())); err != nil { tdlog.Errorf("ERROR: Failed to open collection %s, reason: %v", f.Name(), err) } else { tdlog.Printf("Successfully opened collection %s", f.Name()) } } } return }
func OpenDB(baseDir string) (db *DB, err error) { if err = os.MkdirAll(baseDir, 0700); err != nil { return } db = &DB{BaseDir: baseDir, StrCol: make(map[string]*Col)} files, err := ioutil.ReadDir(baseDir) if err != nil { return } // Try to open sub-directory as document collection for _, f := range files { if f.IsDir() { // Figure out how many chunks there are in the collection var numchunksFH *os.File numchunksFH, err = os.OpenFile(path.Join(baseDir, f.Name(), NUMCHUNKS_FILENAME), os.O_CREATE|os.O_RDWR, 0600) defer numchunksFH.Close() if err != nil { return } numchunksContent, err := ioutil.ReadAll(numchunksFH) if err != nil { panic(err) } numchunks, err := strconv.Atoi(string(numchunksContent)) if err != nil || numchunks < 1 { panic(fmt.Sprintf("Cannot figure out number of chunks for collection %s, manually repair it maybe? %v", baseDir, err)) } // Open the directory as a collection if db.StrCol[f.Name()], err = OpenCol(path.Join(baseDir, f.Name()), numchunks); err != nil { tdlog.Errorf("ERROR: Failed to open collection %s, error: %v", f.Name(), err) } else { tdlog.Printf("Successfully opened collection %s", f.Name()) } } } return }
// Repair damaged documents/indexes, collect unused space along the way. func (db *DB) Scrub(name string) (err error) { if col, ok := db.StrCol[name]; ok { db.Drop("scrub-" + name) // Create a temporary collection if err = db.Create("scrub-" + name); err != nil { return } scrub := db.Use("scrub-" + name) if scrub == nil { return errors.New(fmt.Sprint("Scrub temporary collection has disappeared, please try again.")) } // Recreate indexes for path := range col.StrIC { if path[0] != '_' { // Skip _uid index if err = scrub.Index(strings.Split(path, ",")); err != nil { return } } } // Recover as many documents as possible, insert them into the temporary collection col.ForAll(func(id uint64, doc interface{}) bool { if _, err = scrub.Insert(doc); err != nil { tdlog.Errorf("ERROR: Scrubing %s, I could not insert '%v' back", name, doc) } return true }) // Replace original collection by the "temporary collection" if err = db.Drop(name); err != nil { return } return db.Rename("scrub-"+name, name) } else { return errors.New(fmt.Sprintf("Collection %s does not exists in %s", name, db.Dir)) } return nil }