Beispiel #1
0
func Reduce(source *sublevel.DB, target *sublevel.DB, name string, reduceFunc ReduceFunc, rereduceFunc RereduceFunc, valueFactory ValueFactory, level int) *ReduceTask {
	task := &ReduceTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), reduceFunc: reduceFunc, rereduceFunc: rereduceFunc, valueFactory: valueFactory, level: level}
	task.ro = levigo.NewReadOptions()
	task.wo = levigo.NewWriteOptions()

	filter := func(key, value []byte) []byte {
		return []byte{32}
		/*
			if task.level == 0 {
				return []byte{0}
			}
			s := bytes.Split(key[:len(key)-17], []byte{32})
			if len(s) < task.level {
				return nil
			}
			return bytes.Join(s[:task.level], []byte{32})
		*/
	}

	f := func(key, value []byte) bool {
		//		println("Working on", string(key), string(value))
		s := bytes.Split(key[4:len(key)-17], []byte{32})
		off := 16
		for i := len(s); i >= task.level; i-- {
			val := task.valueFactory()
			if i > 0 {
				k := append(joinReduceKey(s[:i], false), 32)
				// Iterate over all similar rows in the source DB
				it := task.source.NewIterator(task.ro)
				for it.Seek(k); it.Valid(); it.Next() {
					if !bytes.HasPrefix(it.Key(), k) {
						break
					}
					val = task.reduceFunc(val, it.Value())
				}
				it.Close()
			}
			// Iterate over all rows in the target DB which are more specific
			it := task.target.NewIterator(task.ro)
			k := joinReduceKey(s[:i], true)
			for it.Seek(k); it.Valid(); it.Next() {
				if !bytes.HasPrefix(it.Key(), k) {
					break
				}
				val = task.rereduceFunc(val, it.Value())
			}
			it.Close()
			task.target.Put(task.wo, joinReduceKey(s[:i], false), serializeValue(val))
			if i > 0 {
				off += len(s[i-1]) + 1
			}
		}
		return true
	}

	task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f)
	return task
}
Beispiel #2
0
func Map(source *sublevel.DB, target *sublevel.DB, name string, mapFunc MappingFunc) *MappingTask {
	task := &MappingTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), mapFunc: mapFunc}
	task.ro = levigo.NewReadOptions()
	task.wo = levigo.NewWriteOptions()

	filter := func(key, value []byte) []byte {
		return key
	}

	f := func(key, value []byte) bool {
		mapValues := new(bytes.Buffer)
		if value != nil {
			emit := func(key interface{}, value interface{}) {
				k := serializeKey(key, uniclock.Next())
				v := serializeValue(value)
				task.target.Put(task.wo, k, v)
				binary.Write(mapValues, binary.BigEndian, int32(len(k)))
				mapValues.Write(k)
			}
			mapFunc(key, value, emit)
		}
		val, err := task.taskDb.Get(task.ro, key)
		if err != nil {
			return false
		}
		if val != nil {
			valbuf := bytes.NewBuffer(val)
			var l int32
			off := 0
			for off < len(val) {
				err := binary.Read(valbuf, binary.BigEndian, &l)
				if err != nil {
					break
				}
				off += 4
				if l < 0 || int(l) > len(val) {
					panic("Something is very wrong with this data")
				}
				k := valbuf.Next(int(l))
				off += int(l)
				task.target.Delete(task.wo, k)
			}
		}
		task.taskDb.Put(task.wo, key, mapValues.Bytes())
		return true
	}

	task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f)
	return task
}
Beispiel #3
0
func Trigger(db *sublevel.DB, taskDb *sublevel.DB, filter FilterFunc, taskfunc ProcessFunc) *Task {
	wo := levigo.NewWriteOptions()
	ro := levigo.NewReadOptions()
	task := &Task{db: db, filter: filter, processFunc: taskfunc, taskDb: taskDb, wo: wo, ro: ro, running: make(map[string]bool)}

	var run func(taskKey []byte)
	run = func(taskKey []byte) {
		taskKeyStr := string(taskKey)
		var ok bool
		//		println("RUN", string(key), string(value), string(taskKey))
		hookfunc := func(key, value []byte, hook *sublevel.Hook) {
			ok = taskfunc(key, value)
		}

		for {
			it := task.taskDb.NewIterator(task.ro)
			for it.Seek(taskKey); it.Valid(); it.Next() {
				if !bytes.HasPrefix(it.Key(), taskKey) || len(taskKey)+16 != len(it.Key()) {
					break
				}
				key := it.Value()
				val, err := task.db.Get(task.ro, it.Value())
				if err != nil {
					continue
				}
				if len(val) == 0 {
					val = nil
				}
				//				println("RUN", string(key), string(val), string(taskKey))
				// This lock avoids that the processing is interrupted by a call to Close()
				task.closeMutex.Lock()
				// Execute taskfunc in the context of a new hook, commit to disk, then call after
				ok = true
				db.RunHook(wo, hookfunc, nil, key, val)
				if ok {
					taskDb.Delete(wo, it.Key())
				}
				task.closeMutex.Unlock()
			}
			it.Close()

			task.runningMutex.Lock()
			state := task.running[taskKeyStr]
			if !state {
				delete(task.running, taskKeyStr)
				task.runningMutex.Unlock()
				break
			}
			task.running[taskKeyStr] = false
			task.runningMutex.Unlock()
		}
	}

	// Hook into the db to watch for changes
	task.pre = func(key, value []byte, hook *sublevel.Hook) {
		//		println("PRE", string(key), string(value))
		// Is this change relevant?
		taskKey := filter(key, value)
		if taskKey == nil {
			return
		}

		// Write a DB row so the task is not forgotten if the system is terminated now
		now := uniclock.Next()
		nowBytes := []byte(fmt.Sprintf("%016x", now))
		hook.Put(append(taskKey, nowBytes...), key, taskDb)
	}

	task.post = func(key, value []byte) {
		//		println("POST", string(key), string(value))
		// Is this change relevant?
		taskKey := filter(key, value)
		if taskKey == nil {
			return
		}
		taskKeyStr := string(taskKey)

		//		println("POSTtask", "'" + taskKeyStr + "'")
		task.runningMutex.Lock()
		defer task.runningMutex.Unlock()
		if _, ok := task.running[taskKeyStr]; ok {
			task.running[taskKeyStr] = true
		} else {
			task.running[taskKeyStr] = false
			go run(taskKey)
		}
	}

	db.Pre(task.pre)
	db.Post(task.post)

	return task
}