func TestTrigger(t *testing.T) { opts := levigo.NewOptions() levigo.DestroyDatabase("test.ldb", opts) // opts.SetCache(levigo.NewLRUCache(3<<30)) opts.SetCreateIfMissing(true) db, err := levigo.Open("test.ldb", opts) if err != nil { t.Fatal(err) } ro := levigo.NewReadOptions() wo := levigo.NewWriteOptions() sub1 := sublevel.Sublevel(db, "input") index := sublevel.Sublevel(db, "index") job := sublevel.Sublevel(db, "job") task := Trigger(sub1, job, func(key, value []byte) []byte { if strings.HasPrefix(string(key), "Doc_") { return key } return nil }, func(key, value []byte) bool { doc := make(map[string]string) err := json.Unmarshal(value, &doc) if err != nil { t.Fatal(err) } index.Put(wo, []byte(doc["id"]), []byte(doc["number"])) // Make sure that the next task invocation comes in concurrently to this one time.Sleep(300 * time.Millisecond) return true }) sub1.Put(wo, []byte("foobar"), []byte("do-not-process")) // Two put operations which will both trigger the task for the same taskKey. sub1.Put(wo, []byte("Doc_1"), []byte("{\"id\":\"01234\", \"number\": \"42\"}")) sub1.Put(wo, []byte("Doc_1"), []byte("{\"id\":\"01234\", \"number\": \"43\"}")) val, err := sub1.Get(ro, []byte("Doc_1")) if err != nil || string(val) != "{\"id\":\"01234\", \"number\": \"43\"}" { t.Fatal(err, string(val)) } time.Sleep(800 * time.Millisecond) val, err = index.Get(ro, []byte("01234")) if err != nil || string(val) != "43" { t.Fatal(err, string(val)) } task.Close() ro.Close() wo.Close() db.Close() }
func Reduce(source *sublevel.DB, target *sublevel.DB, name string, reduceFunc ReduceFunc, rereduceFunc RereduceFunc, valueFactory ValueFactory, level int) *ReduceTask { task := &ReduceTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), reduceFunc: reduceFunc, rereduceFunc: rereduceFunc, valueFactory: valueFactory, level: level} task.ro = levigo.NewReadOptions() task.wo = levigo.NewWriteOptions() filter := func(key, value []byte) []byte { return []byte{32} /* if task.level == 0 { return []byte{0} } s := bytes.Split(key[:len(key)-17], []byte{32}) if len(s) < task.level { return nil } return bytes.Join(s[:task.level], []byte{32}) */ } f := func(key, value []byte) bool { // println("Working on", string(key), string(value)) s := bytes.Split(key[4:len(key)-17], []byte{32}) off := 16 for i := len(s); i >= task.level; i-- { val := task.valueFactory() if i > 0 { k := append(joinReduceKey(s[:i], false), 32) // Iterate over all similar rows in the source DB it := task.source.NewIterator(task.ro) for it.Seek(k); it.Valid(); it.Next() { if !bytes.HasPrefix(it.Key(), k) { break } val = task.reduceFunc(val, it.Value()) } it.Close() } // Iterate over all rows in the target DB which are more specific it := task.target.NewIterator(task.ro) k := joinReduceKey(s[:i], true) for it.Seek(k); it.Valid(); it.Next() { if !bytes.HasPrefix(it.Key(), k) { break } val = task.rereduceFunc(val, it.Value()) } it.Close() task.target.Put(task.wo, joinReduceKey(s[:i], false), serializeValue(val)) if i > 0 { off += len(s[i-1]) + 1 } } return true } task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f) return task }
func Map(source *sublevel.DB, target *sublevel.DB, name string, mapFunc MappingFunc) *MappingTask { task := &MappingTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), mapFunc: mapFunc} task.ro = levigo.NewReadOptions() task.wo = levigo.NewWriteOptions() filter := func(key, value []byte) []byte { return key } f := func(key, value []byte) bool { mapValues := new(bytes.Buffer) if value != nil { emit := func(key interface{}, value interface{}) { k := serializeKey(key, uniclock.Next()) v := serializeValue(value) task.target.Put(task.wo, k, v) binary.Write(mapValues, binary.BigEndian, int32(len(k))) mapValues.Write(k) } mapFunc(key, value, emit) } val, err := task.taskDb.Get(task.ro, key) if err != nil { return false } if val != nil { valbuf := bytes.NewBuffer(val) var l int32 off := 0 for off < len(val) { err := binary.Read(valbuf, binary.BigEndian, &l) if err != nil { break } off += 4 if l < 0 || int(l) > len(val) { panic("Something is very wrong with this data") } k := valbuf.Next(int(l)) off += int(l) task.target.Delete(task.wo, k) } } task.taskDb.Put(task.wo, key, mapValues.Bytes()) return true } task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f) return task }
func TestMap(t *testing.T) { opts := levigo.NewOptions() levigo.DestroyDatabase("test.ldb", opts) // opts.SetCache(levigo.NewLRUCache(3<<30)) opts.SetCreateIfMissing(true) db, err := levigo.Open("test.ldb", opts) if err != nil { t.Fatal(err) } ro := levigo.NewReadOptions() wo := levigo.NewWriteOptions() sub1 := sublevel.Sublevel(db, "input") index := sublevel.Sublevel(db, "index") mapcount := 0 task := Map(sub1, index, "job", func(key, value []byte, emit EmitFunc) { mapcount++ ingredients := strings.Split(string(value), ",") for _, ingredient := range ingredients { // println("emit", ingredient, string(key)) emit([]byte(ingredient), key) } }) sub1.Put(wo, []byte("Sushi"), []byte("Fish,Rice")) sub1.Put(wo, []byte("Forelle Blau"), []byte("Fish,Potatoes")) sub1.Put(wo, []byte("Wiener Schnitzel"), []byte("Fish,Potatoes")) sub1.Put(wo, []byte("Pulled Pork"), []byte("Pig,ColeSlaw")) // Wait until mapping has completed time.Sleep(500 * time.Millisecond) if mapcount != 4 { t.Fatal(mapcount) } correct1 := []string{"Sushi", "Forelle Blau", "Wiener Schnitzel"} it := task.NewIterator("Fish") i := 0 for it.SeekToFirst(); it.Valid(); it.Next() { if string(it.Value()) != correct1[i] { t.Fatal(it.Value()) } i++ } if i != len(correct1) { t.Fatal(i) } sub1.Put(wo, []byte("Wiener Schnitzel"), []byte("Pig,Potatoes")) // Wait until mapping has completed time.Sleep(500 * time.Millisecond) if mapcount != 5 { t.Fatal(mapcount) } correct2 := []string{"Sushi", "Forelle Blau"} i = 0 it = task.NewIterator("Fish") for it.SeekToFirst(); it.Valid(); it.Next() { if string(it.Value()) != correct2[i] { t.Fatal(it.Value()) } i++ } if i != len(correct2) { t.Fatal(i) } correct3 := []string{"Sushi"} sub1.Delete(wo, []byte("Forelle Blau")) // Wait until mapping has completed time.Sleep(500 * time.Millisecond) if mapcount != 5 { t.Fatal(mapcount) } it = task.NewIterator("Fish") i = 0 for it.SeekToFirst(); it.Valid(); it.Next() { if string(it.Value()) != correct3[i] { t.Fatal(it.Value()) } i++ } if i != len(correct3) { t.Fatal(i) } /* it2 := db.NewIterator(ro) for it2.SeekToFirst(); it2.Valid(); it2.Next() { println("DB", len(it2.Key()), string(it2.Key()), string(it2.Value())) } */ task.Close() ro.Close() wo.Close() db.Close() }
func TestReduce(t *testing.T) { opts := levigo.NewOptions() levigo.DestroyDatabase("test.ldb", opts) // opts.SetCache(levigo.NewLRUCache(3<<30)) opts.SetCreateIfMissing(true) db, err := levigo.Open("test.ldb", opts) if err != nil { t.Fatal(err) } ro := levigo.NewReadOptions() wo := levigo.NewWriteOptions() sub1 := sublevel.Sublevel(db, "input") index := sublevel.Sublevel(db, "index") reduce := sublevel.Sublevel(db, "reduce") mapcount := 0 task := Map(sub1, index, "mapjob", func(key, value []byte, emit EmitFunc) { mapcount++ doc := make(map[string]string) err := json.Unmarshal(value, &doc) if err != nil { t.Fatal(err) } emit([]string{doc["Country"], doc["State"], doc["City"]}, doc["Kind"]) }) reducecount := 0 rereducecount := 0 task2 := Reduce(index, reduce, "mapjob2", func(acc interface{}, value []byte) interface{} { reducecount++ var kind string err := json.Unmarshal(value, &kind) if err != nil { t.Fatal(err) } val := acc.(map[string]int) if n, ok := val[kind]; ok { val[kind] = n + 1 } else { val[kind] = 1 } return val }, func(acc interface{}, value []byte) interface{} { rereducecount++ var acc2 map[string]int err := json.Unmarshal(value, &acc2) if err != nil { t.Fatal(err) } val := acc.(map[string]int) for k, v := range acc2 { if n, ok := val[k]; ok { val[k] = n + v } else { val[k] = v } } return val }, func() interface{} { return make(map[string]int) }, 0) sub1.Put(wo, []byte("Bella Vista"), []byte("{\"Country\":\"Germany\",\"State\":\"NRW\",\"City\":\"Duisburg\",\"Kind\":\"Pizza\"}")) sub1.Put(wo, []byte("Tokio"), []byte("{\"Country\":\"Germany\",\"State\":\"NRW\",\"City\":\"Düsseldorf\",\"Kind\":\"Sushi\"}")) sub1.Put(wo, []byte("Maria"), []byte("{\"Country\":\"Germany\",\"State\":\"NRW\",\"City\":\"Duisburg\",\"Kind\":\"Pizza\"}")) sub1.Put(wo, []byte("Formagio"), []byte("{\"Country\":\"Germany\",\"State\":\"NRW\",\"City\":\"Essen\",\"Kind\":\"Pizza\"}")) sub1.Put(wo, []byte("Fungi"), []byte("{\"Country\":\"Germany\",\"State\":\"Hessen\",\"City\":\"Frankfurt\",\"Kind\":\"Pizza\"}")) sub1.Put(wo, []byte("New Tokio"), []byte("{\"Country\":\"Germany\",\"State\":\"Bayern\",\"City\":\"Munich\",\"Kind\":\"Sushi\"}")) sub1.Put(wo, []byte("San Torino"), []byte("{\"Country\":\"USA\",\"State\":\"CA\",\"City\":\"San Jose\",\"Kind\":\"Pizza\"}")) sub1.Delete(wo, []byte("Formagio")) // Wait until map reduce has completed time.Sleep(1000 * time.Millisecond) it2 := db.NewIterator(ro) for it2.SeekToFirst(); it2.Valid(); it2.Next() { println("DB", len(it2.Key()), string(it2.Key()), string(it2.Value())) } task.Close() task2.Close() ro.Close() wo.Close() db.Close() }