Пример #1
0
func TestTrigger(t *testing.T) {
	opts := levigo.NewOptions()
	levigo.DestroyDatabase("test.ldb", opts)
	// opts.SetCache(levigo.NewLRUCache(3<<30))
	opts.SetCreateIfMissing(true)
	db, err := levigo.Open("test.ldb", opts)
	if err != nil {
		t.Fatal(err)
	}

	ro := levigo.NewReadOptions()
	wo := levigo.NewWriteOptions()

	sub1 := sublevel.Sublevel(db, "input")
	index := sublevel.Sublevel(db, "index")
	job := sublevel.Sublevel(db, "job")

	task := Trigger(sub1, job, func(key, value []byte) []byte {
		if strings.HasPrefix(string(key), "Doc_") {
			return key
		}
		return nil
	}, func(key, value []byte) bool {
		doc := make(map[string]string)
		err := json.Unmarshal(value, &doc)
		if err != nil {
			t.Fatal(err)
		}
		index.Put(wo, []byte(doc["id"]), []byte(doc["number"]))
		// Make sure that the next task invocation comes in concurrently to this one
		time.Sleep(300 * time.Millisecond)
		return true
	})

	sub1.Put(wo, []byte("foobar"), []byte("do-not-process"))

	// Two put operations which will both trigger the task for the same taskKey.
	sub1.Put(wo, []byte("Doc_1"), []byte("{\"id\":\"01234\", \"number\": \"42\"}"))
	sub1.Put(wo, []byte("Doc_1"), []byte("{\"id\":\"01234\", \"number\": \"43\"}"))

	val, err := sub1.Get(ro, []byte("Doc_1"))
	if err != nil || string(val) != "{\"id\":\"01234\", \"number\": \"43\"}" {
		t.Fatal(err, string(val))
	}

	time.Sleep(800 * time.Millisecond)

	val, err = index.Get(ro, []byte("01234"))
	if err != nil || string(val) != "43" {
		t.Fatal(err, string(val))
	}

	task.Close()

	ro.Close()
	wo.Close()
	db.Close()
}
Пример #2
0
func Reduce(source *sublevel.DB, target *sublevel.DB, name string, reduceFunc ReduceFunc, rereduceFunc RereduceFunc, valueFactory ValueFactory, level int) *ReduceTask {
	task := &ReduceTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), reduceFunc: reduceFunc, rereduceFunc: rereduceFunc, valueFactory: valueFactory, level: level}
	task.ro = levigo.NewReadOptions()
	task.wo = levigo.NewWriteOptions()

	filter := func(key, value []byte) []byte {
		return []byte{32}
		/*
			if task.level == 0 {
				return []byte{0}
			}
			s := bytes.Split(key[:len(key)-17], []byte{32})
			if len(s) < task.level {
				return nil
			}
			return bytes.Join(s[:task.level], []byte{32})
		*/
	}

	f := func(key, value []byte) bool {
		//		println("Working on", string(key), string(value))
		s := bytes.Split(key[4:len(key)-17], []byte{32})
		off := 16
		for i := len(s); i >= task.level; i-- {
			val := task.valueFactory()
			if i > 0 {
				k := append(joinReduceKey(s[:i], false), 32)
				// Iterate over all similar rows in the source DB
				it := task.source.NewIterator(task.ro)
				for it.Seek(k); it.Valid(); it.Next() {
					if !bytes.HasPrefix(it.Key(), k) {
						break
					}
					val = task.reduceFunc(val, it.Value())
				}
				it.Close()
			}
			// Iterate over all rows in the target DB which are more specific
			it := task.target.NewIterator(task.ro)
			k := joinReduceKey(s[:i], true)
			for it.Seek(k); it.Valid(); it.Next() {
				if !bytes.HasPrefix(it.Key(), k) {
					break
				}
				val = task.rereduceFunc(val, it.Value())
			}
			it.Close()
			task.target.Put(task.wo, joinReduceKey(s[:i], false), serializeValue(val))
			if i > 0 {
				off += len(s[i-1]) + 1
			}
		}
		return true
	}

	task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f)
	return task
}
Пример #3
0
func Map(source *sublevel.DB, target *sublevel.DB, name string, mapFunc MappingFunc) *MappingTask {
	task := &MappingTask{source: source, target: target, taskDb: sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 65})), mapFunc: mapFunc}
	task.ro = levigo.NewReadOptions()
	task.wo = levigo.NewWriteOptions()

	filter := func(key, value []byte) []byte {
		return key
	}

	f := func(key, value []byte) bool {
		mapValues := new(bytes.Buffer)
		if value != nil {
			emit := func(key interface{}, value interface{}) {
				k := serializeKey(key, uniclock.Next())
				v := serializeValue(value)
				task.target.Put(task.wo, k, v)
				binary.Write(mapValues, binary.BigEndian, int32(len(k)))
				mapValues.Write(k)
			}
			mapFunc(key, value, emit)
		}
		val, err := task.taskDb.Get(task.ro, key)
		if err != nil {
			return false
		}
		if val != nil {
			valbuf := bytes.NewBuffer(val)
			var l int32
			off := 0
			for off < len(val) {
				err := binary.Read(valbuf, binary.BigEndian, &l)
				if err != nil {
					break
				}
				off += 4
				if l < 0 || int(l) > len(val) {
					panic("Something is very wrong with this data")
				}
				k := valbuf.Next(int(l))
				off += int(l)
				task.target.Delete(task.wo, k)
			}
		}
		task.taskDb.Put(task.wo, key, mapValues.Bytes())
		return true
	}

	task.task = runlevel.Trigger(source, sublevel.Sublevel(target.LevelDB(), name+string([]byte{0, 66})), filter, f)
	return task
}
Пример #4
0
func TestMap(t *testing.T) {
	opts := levigo.NewOptions()
	levigo.DestroyDatabase("test.ldb", opts)
	// opts.SetCache(levigo.NewLRUCache(3<<30))
	opts.SetCreateIfMissing(true)
	db, err := levigo.Open("test.ldb", opts)
	if err != nil {
		t.Fatal(err)
	}

	ro := levigo.NewReadOptions()
	wo := levigo.NewWriteOptions()

	sub1 := sublevel.Sublevel(db, "input")
	index := sublevel.Sublevel(db, "index")

	mapcount := 0
	task := Map(sub1, index, "job", func(key, value []byte, emit EmitFunc) {
		mapcount++
		ingredients := strings.Split(string(value), ",")
		for _, ingredient := range ingredients {
			//			println("emit", ingredient, string(key))
			emit([]byte(ingredient), key)
		}
	})

	sub1.Put(wo, []byte("Sushi"), []byte("Fish,Rice"))
	sub1.Put(wo, []byte("Forelle Blau"), []byte("Fish,Potatoes"))
	sub1.Put(wo, []byte("Wiener Schnitzel"), []byte("Fish,Potatoes"))
	sub1.Put(wo, []byte("Pulled Pork"), []byte("Pig,ColeSlaw"))

	// Wait until mapping has completed
	time.Sleep(500 * time.Millisecond)

	if mapcount != 4 {
		t.Fatal(mapcount)
	}

	correct1 := []string{"Sushi", "Forelle Blau", "Wiener Schnitzel"}
	it := task.NewIterator("Fish")
	i := 0
	for it.SeekToFirst(); it.Valid(); it.Next() {
		if string(it.Value()) != correct1[i] {
			t.Fatal(it.Value())
		}
		i++
	}
	if i != len(correct1) {
		t.Fatal(i)
	}

	sub1.Put(wo, []byte("Wiener Schnitzel"), []byte("Pig,Potatoes"))

	// Wait until mapping has completed
	time.Sleep(500 * time.Millisecond)

	if mapcount != 5 {
		t.Fatal(mapcount)
	}

	correct2 := []string{"Sushi", "Forelle Blau"}
	i = 0
	it = task.NewIterator("Fish")
	for it.SeekToFirst(); it.Valid(); it.Next() {
		if string(it.Value()) != correct2[i] {
			t.Fatal(it.Value())
		}
		i++
	}
	if i != len(correct2) {
		t.Fatal(i)
	}

	correct3 := []string{"Sushi"}
	sub1.Delete(wo, []byte("Forelle Blau"))

	// Wait until mapping has completed
	time.Sleep(500 * time.Millisecond)

	if mapcount != 5 {
		t.Fatal(mapcount)
	}

	it = task.NewIterator("Fish")
	i = 0
	for it.SeekToFirst(); it.Valid(); it.Next() {
		if string(it.Value()) != correct3[i] {
			t.Fatal(it.Value())
		}
		i++
	}
	if i != len(correct3) {
		t.Fatal(i)
	}

	/*
		it2 := db.NewIterator(ro)
		for it2.SeekToFirst(); it2.Valid(); it2.Next() {
			println("DB", len(it2.Key()), string(it2.Key()), string(it2.Value()))
		}
	*/

	task.Close()

	ro.Close()
	wo.Close()
	db.Close()
}
Пример #5
0
func TestReduce(t *testing.T) {
	opts := levigo.NewOptions()
	levigo.DestroyDatabase("test.ldb", opts)
	// opts.SetCache(levigo.NewLRUCache(3<<30))
	opts.SetCreateIfMissing(true)
	db, err := levigo.Open("test.ldb", opts)
	if err != nil {
		t.Fatal(err)
	}

	ro := levigo.NewReadOptions()
	wo := levigo.NewWriteOptions()

	sub1 := sublevel.Sublevel(db, "input")
	index := sublevel.Sublevel(db, "index")
	reduce := sublevel.Sublevel(db, "reduce")

	mapcount := 0
	task := Map(sub1, index, "mapjob", func(key, value []byte, emit EmitFunc) {
		mapcount++
		doc := make(map[string]string)
		err := json.Unmarshal(value, &doc)
		if err != nil {
			t.Fatal(err)
		}
		emit([]string{doc["Country"], doc["State"], doc["City"]}, doc["Kind"])
	})

	reducecount := 0
	rereducecount := 0
	task2 := Reduce(index, reduce, "mapjob2", func(acc interface{}, value []byte) interface{} {
		reducecount++
		var kind string
		err := json.Unmarshal(value, &kind)
		if err != nil {
			t.Fatal(err)
		}
		val := acc.(map[string]int)
		if n, ok := val[kind]; ok {
			val[kind] = n + 1
		} else {
			val[kind] = 1
		}
		return val
	}, func(acc interface{}, value []byte) interface{} {
		rereducecount++
		var acc2 map[string]int
		err := json.Unmarshal(value, &acc2)
		if err != nil {
			t.Fatal(err)
		}
		val := acc.(map[string]int)
		for k, v := range acc2 {
			if n, ok := val[k]; ok {
				val[k] = n + v
			} else {
				val[k] = v
			}
		}
		return val
	}, func() interface{} {
		return make(map[string]int)
	}, 0)

	sub1.Put(wo, []byte("Bella Vista"), []byte("{\"Country\":\"Germany\",\"State\":\"NRW\",\"City\":\"Duisburg\",\"Kind\":\"Pizza\"}"))
	sub1.Put(wo, []byte("Tokio"), []byte("{\"Country\":\"Germany\",\"State\":\"NRW\",\"City\":\"Düsseldorf\",\"Kind\":\"Sushi\"}"))
	sub1.Put(wo, []byte("Maria"), []byte("{\"Country\":\"Germany\",\"State\":\"NRW\",\"City\":\"Duisburg\",\"Kind\":\"Pizza\"}"))
	sub1.Put(wo, []byte("Formagio"), []byte("{\"Country\":\"Germany\",\"State\":\"NRW\",\"City\":\"Essen\",\"Kind\":\"Pizza\"}"))
	sub1.Put(wo, []byte("Fungi"), []byte("{\"Country\":\"Germany\",\"State\":\"Hessen\",\"City\":\"Frankfurt\",\"Kind\":\"Pizza\"}"))
	sub1.Put(wo, []byte("New Tokio"), []byte("{\"Country\":\"Germany\",\"State\":\"Bayern\",\"City\":\"Munich\",\"Kind\":\"Sushi\"}"))
	sub1.Put(wo, []byte("San Torino"), []byte("{\"Country\":\"USA\",\"State\":\"CA\",\"City\":\"San Jose\",\"Kind\":\"Pizza\"}"))
	sub1.Delete(wo, []byte("Formagio"))

	// Wait until map reduce has completed
	time.Sleep(1000 * time.Millisecond)

	it2 := db.NewIterator(ro)
	for it2.SeekToFirst(); it2.Valid(); it2.Next() {
		println("DB", len(it2.Key()), string(it2.Key()), string(it2.Value()))
	}

	task.Close()
	task2.Close()

	ro.Close()
	wo.Close()
	db.Close()
}