Пример #1
0
func benchmarkDeduplication(b *testing.B, numTrn, factsPerTrn, eLen, aLen, vLen int) {
	domain := "test"

	engine := randStorageWRepeats(domain, numTrn, factsPerTrn, eLen, aLen, vLen)

	log, err := view.OpenLog(engine, domain, "commit")

	if err != nil {
		b.Fatal(err)
	}

	b.ResetTimer()

	for i := 0; i < b.N; i++ {

		now := log.Now()

		iter := view.Deduplicate(now)

		if err = iter.Err(); err != nil {
			b.Fatal(err)
		}

		// The Deduplicate() operation is lazy, and most of the actual work happens during Next(),
		// so to evaluate the true cost of deduplication we need to time how long it takes to step
		// through the resulting iterator.
		_, err = testNext(iter)

		if err != nil {
			b.Fatal(err)
		}
	}
}
Пример #2
0
func TestLogExcludeDuplicates(t *testing.T) {
	domain := "test"

	// number of transactions
	n := 100

	// number of facts per transaction
	m := 100

	eLen, aLen, vLen := 2, 3, 4

	engine := randStorageWRepeats(domain, n, m, eLen, aLen, vLen)

	// Open the commit log.
	log, err := view.OpenLog(engine, domain, "commit")

	if err != nil {
		t.Fatal(err)
	}

	// first check the total number of facts
	num, err := testNext(log.Now())

	if err != nil {
		t.Fatal(err)
	}

	if num != n*m {
		t.Errorf("expected %d total facts, got %d", n*m, num)
	}

	// Now check that Next() works on the deduplicated stream,
	// and verify the number of unique facts.
	iter := view.Deduplicate(log.Now())

	if err := iter.Err(); err != nil {
		t.Fatal(err)
	}

	num, err = testNext(iter)

	if err != nil {
		t.Fatal(err)
	}

	// With the dictionary size being very small (e.g. 24) compared to the number of generated facts (e.g. 10,000),
	// the probability that any of the possible facts didn't get generated is negligible
	if num != eLen*aLen*vLen {
		t.Errorf("expected %d unique facts, got %d", eLen*aLen*vLen, num)
	}
}