Esempio n. 1
File: col.go Progetto: jbenet/tiedot
// Scan the entire data file, look for documents and invoke the function on each.
func (col *ColFile) ForAll(fun func(id uint64, doc []byte) bool) {
	addr := uint64(0)
	for {
		if col.File.UsedSize < DOC_HEADER_SIZE || addr >= col.File.UsedSize-DOC_HEADER_SIZE {
		// Read document header - validity and room
		validity := col.File.Buf[addr]
		room, _ := binary.Uvarint(col.File.Buf[addr+1 : addr+11])
		if validity != DOC_VALID && validity != DOC_INVALID || room > DOC_MAX_ROOM {
			// If the document does not contain valid header, skip it
			tdlog.Errorf("ERROR: The document at %d in %s is corrupted", addr, col.File.Name)
			// Move forward until we meet a valid document header
			for addr++; col.File.Buf[addr] != DOC_VALID && col.File.Buf[addr] != DOC_INVALID && addr < col.File.UsedSize-DOC_HEADER_SIZE; addr++ {
			tdlog.Errorf("ERROR: Corrupted document skipped, now at %d", addr)
		// If the function returns false, do not continue scanning
		if validity == DOC_VALID && !fun(addr, col.File.Buf[addr+DOC_HEADER_SIZE:addr+DOC_HEADER_SIZE+room]) {
		addr += DOC_HEADER_SIZE + room
Esempio n. 2
// Close the collection.
func (col *ChunkCol) Close() {
	if err := col.Data.File.Close(); err != nil {
		tdlog.Errorf("Failed to close %s, reason: %v", col.Data.File.Name, err)
	if err := col.PK.File.Close(); err != nil {
		tdlog.Errorf("Failed to close %s, reason: %v", col.PK.File.Name, err)
Esempio n. 3
// Close the collection.
func (col *Col) Close() {
	if err := col.Data.File.Close(); err != nil {
		tdlog.Errorf("ERROR: Failed to close %s, reason: %v", col.Data.File.Name, err)
	for _, ht := range col.StrHT {
		if err := ht.File.Close(); err != nil {
			tdlog.Errorf("ERROR: Failed to close %s, reason: %v", ht.File.Name, err)
Esempio n. 4
// Flush collection data and index files.
func (col *ChunkCol) Flush() (err error) {
	if err = col.Data.File.Flush(); err != nil {
		tdlog.Errorf("Failed to flush %s, reason: %v", col.Data.File.Name, err)
	if err = col.PK.File.Flush(); err != nil {
		tdlog.Errorf("Failed to flush %s, reason: %v", col.PK.File.Name, err)
Esempio n. 5
// Flush collection data and index files.
func (col *Col) Flush() error {
	if err := col.Data.File.Flush(); err != nil {
		tdlog.Errorf("ERROR: Failed to flush %s, reason: %v", col.Data.File.Name, err)
		return err
	for _, ht := range col.StrHT {
		if err := ht.File.Flush(); err != nil {
			tdlog.Errorf("ERROR: Failed to flush %s, reason: %v", ht.File.Name, err)
			return err
	return nil
Esempio n. 6
// Update a document, return its new ID.
func (col *Col) Update(id uint64, doc interface{}) (newID uint64, err error) {
	data, err := json.Marshal(doc)
	if err != nil {
	// Read the original document
	oldData := col.Data.Read(id)
	if oldData == nil {
		return id, errors.New(fmt.Sprintf("Document %d does not exist in %s", id, col.Dir))
	// Remove the original document from indexes
	var oldDoc interface{}
	if err = json.Unmarshal(oldData, &oldDoc); err == nil {
		col.UnindexDoc(id, oldDoc)
	} else {
		tdlog.Errorf("ERROR: The original document %d in %s is corrupted, this update will attempt to overwrite it", id, col.Dir)
	// Update document data
	if newID, err = col.Data.Update(id, data); err != nil {
	// Index updated document
	col.IndexDoc(newID, doc)
Esempio n. 7
// Scan the entire data file, look for documents and invoke the function on each.
func (col *ColFile) ForAll(fun func(id uint64, doc []byte) bool) {
	addr := uint64(0)
	for {
		if col.File.UsedSize < DOC_HEADER || addr >= col.File.UsedSize-DOC_HEADER {
		// Lock down document region
		region := addr / COL_FILE_REGION_SIZE
		mutex := col.regionRWMutex[region]
		// Read document header - validity and room
		validity := col.File.Buf[addr]
		room, _ := binary.Uvarint(col.File.Buf[addr+1 : addr+11])
		if validity != DOC_VALID && validity != DOC_INVALID || room > DOC_MAX_ROOM {
			// If the document does not contain valid header, skip it
			tdlog.Errorf("ERROR: The document at %d in %s is corrupted", addr, col.File.Name)
			// Move forward until we meet a valid document header
			for addr++; col.File.Buf[addr] != DOC_VALID && col.File.Buf[addr] != DOC_INVALID && addr < col.File.UsedSize-DOC_HEADER; addr++ {
		// If the function returns false, do not continue scanning
		if validity == DOC_VALID && !fun(addr, col.File.Buf[addr+DOC_HEADER:addr+DOC_HEADER+room]) {
		addr += DOC_HEADER + room
Esempio n. 8
File: db.go Progetto: jbenet/tiedot
// Flush all collection data and index files.
func (db *DB) Flush() {
	for _, col := range db.StrCol {
		if err := col.Flush(); err != nil {
			tdlog.Errorf("Error during database flush: %v", err)
Esempio n. 9
// Update a document by physical ID, return its new physical ID.
func (col *ChunkCol) Update(id uint64, doc map[string]interface{}) (newID uint64, err error) {
	data, err := json.Marshal(doc)
	if err != nil {
	// Read the original document
	oldData := col.Data.Read(id)
	if oldData == nil {
		err = errors.New(fmt.Sprintf("Document %d does not exist in %s", id, col.BaseDir))
	// Remove the original document from indexes
	var oldDoc map[string]interface{}
	if err = json.Unmarshal(oldData, &oldDoc); err == nil {
		col.PK.Remove(uint64(uid.PKOfDoc(oldDoc, false)), id)
	} else {
		tdlog.Errorf("ERROR: The original document %d in %s is corrupted, this update will attempt to overwrite it", id, col.BaseDir)
	// Update document data
	if newID, err = col.Data.Update(id, data); err != nil {
	// Index updated document
	col.PK.Put(uint64(uid.PKOfDoc(doc, true)), newID)
Esempio n. 10
// Return the number (not address) of next chained bucket, 0 if there is not any.
func (ht *HashTable) NextBucket(bucket uint64) uint64 {
	if bucket >= ht.NumBuckets {
		return 0
	bucketAddr := bucket * BUCKET_SIZE
	if next, _ := binary.Uvarint(ht.File.Buf[bucketAddr : bucketAddr+BUCKET_HEADER_SIZE]); next == 0 {
		return 0
	} else if next <= bucket {
		tdlog.Errorf("ERROR: Bucket loop in hash table %s at bucket no.%d, address %d", ht.File.Name, bucket, bucketAddr)
		return 0
	} else if next >= ht.NumBuckets || next < INITIAL_BUCKETS {
		tdlog.Errorf("ERROR: Bad bucket refernece (%d is out of range %d - %d) in %s", next, INITIAL_BUCKETS, ht.NumBuckets, ht.File.Name)
		return 0
	} else {
		return next
Esempio n. 11
// Deserialize each document and invoke the function on the deserialized docuemnt (Collection Scsn).
func (col *Col) ForAll(fun func(id uint64, doc interface{}) bool) {
	col.Data.ForAll(func(id uint64, data []byte) bool {
		var parsed interface{}
		if err := json.Unmarshal(data, &parsed); err != nil {
			tdlog.Errorf("ERROR: Cannot parse document %d in %s to JSON", id, col.Dir)
			return true
		} else {
			return fun(id, parsed)
Esempio n. 12
File: db.go Progetto: jbenet/tiedot
// Change the number of partitions in collection
func (db *DB) Repartition(name string, newNumber int) (counter uint64, err error) {
	counterMutex := &sync.Mutex{}
	target := db.Use(name)
	if target == nil {
		return 0, errors.New(fmt.Sprintf("Collection %s does not exist in %s", name, db.BaseDir))
	if newNumber < 1 {
		return 0, errors.New(fmt.Sprintf("New number of partitions must be above 0, %d given", newNumber))
	// Create a temporary collection
	tempName := fmt.Sprintf("temp-%s-%v", name, time.Now().Unix())
	db.Create(tempName, newNumber)
	temp := db.Use(tempName)
	// Recreate secondary indexes
	for _, index := range target.SecIndexes {
	// Reinsert documents
	target.ForAll(func(id uint64, doc map[string]interface{}) bool {
		if err := temp.InsertRecovery(id, doc); err == nil {
			counter += 1
		} else {
			tdlog.Errorf("Failed to recover document %v", doc)
		return true
	// Drop the old collection and rename the recovery collection
	if err = db.Drop(name); err != nil {
		tdlog.Errorf("Scrub operation failed to drop original collection %s: %v", name, err)
	if err = db.Rename(tempName, name); err != nil {
		tdlog.Errorf("Scrub operation failed to rename recovery collection %s: %v", tempName, err)
Esempio n. 13
// Deserialize each document and invoke the function on the deserialized document (Collection Scan).
func (col *ChunkCol) ForAll(fun func(id uint64, doc map[string]interface{}) bool) {
	col.Data.ForAll(func(id uint64, data []byte) bool {
		var parsed map[string]interface{}
		if err := json.Unmarshal(data, &parsed); err != nil || parsed == nil {
			tdlog.Errorf("Cannot parse document %d in %s to JSON", id, col.BaseDir)
			return true
		} else {
			persistID := uid.PKOfDoc(parsed, false)
			// Skip documents without valid PK
			if persistID < 0 {
				return true
			return fun(persistID, parsed)
Esempio n. 14
// Open a database.
func OpenDB(dir string) (db *DB, err error) {
	if err = os.MkdirAll(dir, 0700); err != nil {
	db = &DB{Dir: dir, StrCol: make(map[string]*Col)}
	files, err := ioutil.ReadDir(dir)
	if err != nil {
	// Try to open sub-directory as document collection
	for _, f := range files {
		if f.IsDir() {
			if db.StrCol[f.Name()], err = OpenCol(path.Join(dir, f.Name())); err != nil {
				tdlog.Errorf("ERROR: Failed to open collection %s, reason: %v", f.Name(), err)
			} else {
				tdlog.Printf("Successfully opened collection %s", f.Name())
Esempio n. 15
File: db.go Progetto: jbenet/tiedot
func OpenDB(baseDir string) (db *DB, err error) {
	if err = os.MkdirAll(baseDir, 0700); err != nil {
	db = &DB{BaseDir: baseDir, StrCol: make(map[string]*Col)}
	files, err := ioutil.ReadDir(baseDir)
	if err != nil {
	// Try to open sub-directory as document collection
	for _, f := range files {
		if f.IsDir() {
			// Figure out how many chunks there are in the collection
			var numchunksFH *os.File
			numchunksFH, err = os.OpenFile(path.Join(baseDir, f.Name(), NUMCHUNKS_FILENAME), os.O_CREATE|os.O_RDWR, 0600)
			defer numchunksFH.Close()
			if err != nil {
			numchunksContent, err := ioutil.ReadAll(numchunksFH)
			if err != nil {
			numchunks, err := strconv.Atoi(string(numchunksContent))
			if err != nil || numchunks < 1 {
				panic(fmt.Sprintf("Cannot figure out number of chunks for collection %s, manually repair it maybe? %v", baseDir, err))

			// Open the directory as a collection
			if db.StrCol[f.Name()], err = OpenCol(path.Join(baseDir, f.Name()), numchunks); err != nil {
				tdlog.Errorf("ERROR: Failed to open collection %s, error: %v", f.Name(), err)
			} else {
				tdlog.Printf("Successfully opened collection %s", f.Name())
Esempio n. 16
// Repair damaged documents/indexes, collect unused space along the way.
func (db *DB) Scrub(name string) (err error) {
	if col, ok := db.StrCol[name]; ok {
		db.Drop("scrub-" + name)
		// Create a temporary collection
		if err = db.Create("scrub-" + name); err != nil {
		scrub := db.Use("scrub-" + name)
		if scrub == nil {
			return errors.New(fmt.Sprint("Scrub temporary collection has disappeared, please try again."))
		// Recreate indexes
		for path := range col.StrIC {
			if path[0] != '_' { // Skip _uid index
				if err = scrub.Index(strings.Split(path, ",")); err != nil {
		// Recover as many documents as possible, insert them into the temporary collection
		col.ForAll(func(id uint64, doc interface{}) bool {
			if _, err = scrub.Insert(doc); err != nil {
				tdlog.Errorf("ERROR: Scrubing %s, I could not insert '%v' back", name, doc)
			return true
		// Replace original collection by the "temporary collection"
		if err = db.Drop(name); err != nil {
		return db.Rename("scrub-"+name, name)
	} else {
		return errors.New(fmt.Sprintf("Collection %s does not exists in %s", name, db.Dir))
	return nil