コード例 #1
0
ファイル: mapreduce.go プロジェクト: weistn/mrlevel
func Reduce(source *sublevel.DB, target *sublevel.DB, name string, reduceFunc ReduceFunc, rereduceFunc RereduceFunc, valueFactory ValueFactory, level int) *ReduceTask {
	task := &ReduceTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), reduceFunc: reduceFunc, rereduceFunc: rereduceFunc, valueFactory: valueFactory, level: level}
	task.ro = levigo.NewReadOptions()
	task.wo = levigo.NewWriteOptions()

	filter := func(key, value []byte) []byte {
		return []byte{32}
		/*
			if task.level == 0 {
				return []byte{0}
			}
			s := bytes.Split(key[:len(key)-17], []byte{32})
			if len(s) < task.level {
				return nil
			}
			return bytes.Join(s[:task.level], []byte{32})
		*/
	}

	f := func(key, value []byte) bool {
		//		println("Working on", string(key), string(value))
		s := bytes.Split(key[4:len(key)-17], []byte{32})
		off := 16
		for i := len(s); i >= task.level; i-- {
			val := task.valueFactory()
			if i > 0 {
				k := append(joinReduceKey(s[:i], false), 32)
				// Iterate over all similar rows in the source DB
				it := task.source.NewIterator(task.ro)
				for it.Seek(k); it.Valid(); it.Next() {
					if !bytes.HasPrefix(it.Key(), k) {
						break
					}
					val = task.reduceFunc(val, it.Value())
				}
				it.Close()
			}
			// Iterate over all rows in the target DB which are more specific
			it := task.target.NewIterator(task.ro)
			k := joinReduceKey(s[:i], true)
			for it.Seek(k); it.Valid(); it.Next() {
				if !bytes.HasPrefix(it.Key(), k) {
					break
				}
				val = task.rereduceFunc(val, it.Value())
			}
			it.Close()
			task.target.Put(task.wo, joinReduceKey(s[:i], false), serializeValue(val))
			if i > 0 {
				off += len(s[i-1]) + 1
			}
		}
		return true
	}

	task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f)
	return task
}
コード例 #2
0
ファイル: mapreduce.go プロジェクト: weistn/mrlevel
func Map(source *sublevel.DB, target *sublevel.DB, name string, mapFunc MappingFunc) *MappingTask {
	task := &MappingTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), mapFunc: mapFunc}
	task.ro = levigo.NewReadOptions()
	task.wo = levigo.NewWriteOptions()

	filter := func(key, value []byte) []byte {
		return key
	}

	f := func(key, value []byte) bool {
		mapValues := new(bytes.Buffer)
		if value != nil {
			emit := func(key interface{}, value interface{}) {
				k := serializeKey(key, uniclock.Next())
				v := serializeValue(value)
				task.target.Put(task.wo, k, v)
				binary.Write(mapValues, binary.BigEndian, int32(len(k)))
				mapValues.Write(k)
			}
			mapFunc(key, value, emit)
		}
		val, err := task.taskDb.Get(task.ro, key)
		if err != nil {
			return false
		}
		if val != nil {
			valbuf := bytes.NewBuffer(val)
			var l int32
			off := 0
			for off < len(val) {
				err := binary.Read(valbuf, binary.BigEndian, &l)
				if err != nil {
					break
				}
				off += 4
				if l < 0 || int(l) > len(val) {
					panic("Something is very wrong with this data")
				}
				k := valbuf.Next(int(l))
				off += int(l)
				task.target.Delete(task.wo, k)
			}
		}
		task.taskDb.Put(task.wo, key, mapValues.Bytes())
		return true
	}

	task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f)
	return task
}