// This doesn't actually test anything, but prints probabilities to log func TestBirthdayProblem(t *testing.T) { t.Log("Hash size is", dedup.HashSize*8, "bits") t.Log("1GiB, 1KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 30) / (1 << 10))) w, _ := dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 1<<10, 0) e, _ := w.MemUse(1 << 30) t.Logf("It will use %d MiB for encoder.", e>>20) t.Log("1TiB, 4KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 40) / (4 << 10))) w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 4<<10, 0) e, _ = w.MemUse(1 << 40) t.Logf("It will use %d MiB for encoder.", e>>20) t.Log("1PiB, 4KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 50) / (4 << 10))) e, _ = w.MemUse(1 << 50) t.Logf("It will use %d MiB for encoder.", e>>20) t.Log("1EiB, 64KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 60) / (64 << 10))) w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 64<<10, 0) e, _ = w.MemUse(1 << 60) t.Logf("It will use %d MiB for encoder.", e>>20) t.Log("1EiB, 1KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 60) / (1 << 10))) w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 1<<10, 0) e, _ = w.MemUse(1 << 60) t.Logf("It will use %d MiB for encoder.", e>>20) }
// This shows an example of a birthday problem calculation. // We calculate the probability of a collision of SHA1 hashes // on 1 Terabyte data, using 1 Kilobyte blocks. // With SHA-1, that gives a 1 in 2535301202817642046627252275200 chance // of a collision occurring. func ExampleBirthdayProblem() { fmt.Println("Hash size is", dedup.HashSize*8, "bits") fmt.Println("1TiB, 1KiB blocks:") fmt.Println(dedup.BirthdayProblem((1 << 40) / (1 << 10))) // Output: Hash size is 160 bits // 1TiB, 1KiB blocks: // Collision probability is ~ 1/2535301202817642046627252275200 ~ 3.944304522431639e-31 }
func TestFixedWriter(t *testing.T) { idx := bytes.Buffer{} data := bytes.Buffer{} const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, size*10) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } removed := ((totalinput) - data.Len()) / size t.Log(dedup.BirthdayProblem(totalinput / size)) t.Log("Index size:", idx.Len()) t.Log("Data size:", data.Len()) t.Log("Removed", removed, "blocks") // We should get at least 50 blocks if removed < 50 { t.Fatal("didn't remove at least 50 blocks") } if removed > 60 { t.Fatal("removed unreasonable high amount of blocks") } }