func Reduce(source *sublevel.DB, target *sublevel.DB, name string, reduceFunc ReduceFunc, rereduceFunc RereduceFunc, valueFactory ValueFactory, level int) *ReduceTask { task := &ReduceTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), reduceFunc: reduceFunc, rereduceFunc: rereduceFunc, valueFactory: valueFactory, level: level} task.ro = levigo.NewReadOptions() task.wo = levigo.NewWriteOptions() filter := func(key, value []byte) []byte { return []byte{32} /* if task.level == 0 { return []byte{0} } s := bytes.Split(key[:len(key)-17], []byte{32}) if len(s) < task.level { return nil } return bytes.Join(s[:task.level], []byte{32}) */ } f := func(key, value []byte) bool { // println("Working on", string(key), string(value)) s := bytes.Split(key[4:len(key)-17], []byte{32}) off := 16 for i := len(s); i >= task.level; i-- { val := task.valueFactory() if i > 0 { k := append(joinReduceKey(s[:i], false), 32) // Iterate over all similar rows in the source DB it := task.source.NewIterator(task.ro) for it.Seek(k); it.Valid(); it.Next() { if !bytes.HasPrefix(it.Key(), k) { break } val = task.reduceFunc(val, it.Value()) } it.Close() } // Iterate over all rows in the target DB which are more specific it := task.target.NewIterator(task.ro) k := joinReduceKey(s[:i], true) for it.Seek(k); it.Valid(); it.Next() { if !bytes.HasPrefix(it.Key(), k) { break } val = task.rereduceFunc(val, it.Value()) } it.Close() task.target.Put(task.wo, joinReduceKey(s[:i], false), serializeValue(val)) if i > 0 { off += len(s[i-1]) + 1 } } return true } task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f) return task }
func Map(source *sublevel.DB, target *sublevel.DB, name string, mapFunc MappingFunc) *MappingTask { task := &MappingTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), mapFunc: mapFunc} task.ro = levigo.NewReadOptions() task.wo = levigo.NewWriteOptions() filter := func(key, value []byte) []byte { return key } f := func(key, value []byte) bool { mapValues := new(bytes.Buffer) if value != nil { emit := func(key interface{}, value interface{}) { k := serializeKey(key, uniclock.Next()) v := serializeValue(value) task.target.Put(task.wo, k, v) binary.Write(mapValues, binary.BigEndian, int32(len(k))) mapValues.Write(k) } mapFunc(key, value, emit) } val, err := task.taskDb.Get(task.ro, key) if err != nil { return false } if val != nil { valbuf := bytes.NewBuffer(val) var l int32 off := 0 for off < len(val) { err := binary.Read(valbuf, binary.BigEndian, &l) if err != nil { break } off += 4 if l < 0 || int(l) > len(val) { panic("Something is very wrong with this data") } k := valbuf.Next(int(l)) off += int(l) task.target.Delete(task.wo, k) } } task.taskDb.Put(task.wo, key, mapValues.Bytes()) return true } task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f) return task }
func Trigger(db *sublevel.DB, taskDb *sublevel.DB, filter FilterFunc, taskfunc ProcessFunc) *Task { wo := levigo.NewWriteOptions() ro := levigo.NewReadOptions() task := &Task{db: db, filter: filter, processFunc: taskfunc, taskDb: taskDb, wo: wo, ro: ro, running: make(map[string]bool)} var run func(taskKey []byte) run = func(taskKey []byte) { taskKeyStr := string(taskKey) var ok bool // println("RUN", string(key), string(value), string(taskKey)) hookfunc := func(key, value []byte, hook *sublevel.Hook) { ok = taskfunc(key, value) } for { it := task.taskDb.NewIterator(task.ro) for it.Seek(taskKey); it.Valid(); it.Next() { if !bytes.HasPrefix(it.Key(), taskKey) || len(taskKey)+16 != len(it.Key()) { break } key := it.Value() val, err := task.db.Get(task.ro, it.Value()) if err != nil { continue } if len(val) == 0 { val = nil } // println("RUN", string(key), string(val), string(taskKey)) // This lock avoids that the processing is interrupted by a call to Close() task.closeMutex.Lock() // Execute taskfunc in the context of a new hook, commit to disk, then call after ok = true db.RunHook(wo, hookfunc, nil, key, val) if ok { taskDb.Delete(wo, it.Key()) } task.closeMutex.Unlock() } it.Close() task.runningMutex.Lock() state := task.running[taskKeyStr] if !state { delete(task.running, taskKeyStr) task.runningMutex.Unlock() break } task.running[taskKeyStr] = false task.runningMutex.Unlock() } } // Hook into the db to watch for changes task.pre = func(key, value []byte, hook *sublevel.Hook) { // println("PRE", string(key), string(value)) // Is this change relevant? taskKey := filter(key, value) if taskKey == nil { return } // Write a DB row so the task is not forgotten if the system is terminated now now := uniclock.Next() nowBytes := []byte(fmt.Sprintf("%016x", now)) hook.Put(append(taskKey, nowBytes...), key, taskDb) } task.post = func(key, value []byte) { // println("POST", string(key), string(value)) // Is this change relevant? taskKey := filter(key, value) if taskKey == nil { return } taskKeyStr := string(taskKey) // println("POSTtask", "'" + taskKeyStr + "'") task.runningMutex.Lock() defer task.runningMutex.Unlock() if _, ok := task.running[taskKeyStr]; ok { task.running[taskKeyStr] = true } else { task.running[taskKeyStr] = false go run(taskKey) } } db.Pre(task.pre) db.Post(task.post) return task }