// This doesn't actually test anything, but prints probabilities to log func TestBirthdayProblem(t *testing.T) { t.Log("Hash size is", dedup.HashSize*8, "bits") t.Log("1GiB, 1KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 30) / (1 << 10))) w, _ := dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 1<<10, 0) e, _ := w.MemUse(1 << 30) t.Logf("It will use %d MiB for encoder.", e>>20) t.Log("1TiB, 4KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 40) / (4 << 10))) w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 4<<10, 0) e, _ = w.MemUse(1 << 40) t.Logf("It will use %d MiB for encoder.", e>>20) t.Log("1PiB, 4KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 50) / (4 << 10))) e, _ = w.MemUse(1 << 50) t.Logf("It will use %d MiB for encoder.", e>>20) t.Log("1EiB, 64KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 60) / (64 << 10))) w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 64<<10, 0) e, _ = w.MemUse(1 << 60) t.Logf("It will use %d MiB for encoder.", e>>20) t.Log("1EiB, 1KiB blocks:") t.Log(dedup.BirthdayProblem((1 << 60) / (1 << 10))) w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 1<<10, 0) e, _ = w.MemUse(1 << 60) t.Logf("It will use %d MiB for encoder.", e>>20) }
// This will deduplicate a buffer of zeros to an indexed stream func ExampleNewWriter() { // We will write to these idx := bytes.Buffer{} data := bytes.Buffer{} // This is our input: input := bytes.NewBuffer(make([]byte, 50000)) // Create a new writer, with each block being 1000 bytes w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, 1000, 0) if err != nil { panic(err) } // Copy our input to the writer. io.Copy(w, input) // Close the writer err = w.Close() if err != nil { panic(err) } // Let us inspect what was written: fmt.Println("Blocks:", w.Blocks()) fmt.Println("Index size:", idx.Len()) fmt.Println("Data size:", data.Len()) // OUTPUT: Blocks: 50 // Index size: 67 // Data size: 1000 }
// Maximum block size:4k func BenchmarkDynamicWriter4K(t *testing.B) { const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 4 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } t.ResetTimer() t.SetBytes(totalinput) for i := 0; i < t.N; i++ { input = bytes.NewBuffer(b) w, _ := dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeDynamic, size, 0) io.Copy(w, input) err := w.Close() if err != nil { t.Fatal(err) } } }
// This will deduplicate a buffer of zeros to an indexed stream func ExampleNewReader() { // Create data we can read. var idx, data bytes.Buffer input := bytes.NewBuffer(make([]byte, 50000)) w, _ := dedup.NewWriter(&idx, &data, dedup.ModeFixed, 1000, 0) _, _ = io.Copy(w, input) _ = w.Close() // Create a new reader. r, err := dedup.NewReader(&idx, &data) if err != nil { panic(err) } // Inspect how much memory it will use. fmt.Println("Memory use:", r.MaxMem()) var dst bytes.Buffer // Read everything _, err = io.Copy(&dst, r) if err != nil && err != io.EOF { panic(err) } // Let us inspect what was written: fmt.Println("Returned data length:", dst.Len()) fmt.Println("Everything zero:", 0 == bytes.Compare(dst.Bytes(), make([]byte, 50000))) // OUTPUT: Memory use: 1000 // Returned data length: 50000 // Everything zero: true }
func TestReader(t *testing.T) { idx := bytes.Buffer{} data := bytes.Buffer{} const totalinput = 10<<20 + 65 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, 0) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } t.Log("Fixed Index size:", idx.Len()) t.Log("Fixed Data size:", data.Len(), "-", data.Len()*100/totalinput, "%") r, err := dedup.NewReader(&idx, &data) if err != nil { t.Fatal(err) } t.Log("Maximum estimated memory:", r.MaxMem(), "bytes") out, err := ioutil.ReadAll(r) if err != io.EOF && err != nil { t.Fatal(err) } if len(b) != len(out) { t.Fatalf("Expected len %d, got %d", len(b), len(out)) } if bytes.Compare(b, out) != 0 { t.Fatal("Output mismatch") } err = r.Close() if err != nil { t.Fatal(err) } blocks := r.BlockSizes() for _, s := range blocks[:len(blocks)-1] { if s != size { t.Fatal("wrong size, expected", size, "got", s) } } }
// Indexed stream, 10MB input, 1K blocks func BenchmarkReader1K(t *testing.B) { idx := &bytes.Buffer{} data := &bytes.Buffer{} const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 1 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 500; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewWriter(idx, data, dedup.ModeFixed, size, 0) if err != nil { t.Fatal(err) } _, err = io.Copy(w, input) if err != nil { t.Fatal(err) } err = w.Close() if err != nil { t.Fatal(err) } index := idx.Bytes() alldata := data.Bytes() t.ResetTimer() t.SetBytes(totalinput) for i := 0; i < t.N; i++ { idx := bytes.NewBuffer(index) data := bytes.NewBuffer(alldata) r, err := dedup.NewReader(idx, data) if err != nil { t.Fatal(err) } n, err := io.Copy(ioutil.Discard, r) if err != nil && err != io.EOF { t.Fatal(err) } if n != int64(len(b)) { t.Fatal("read was short, expected", len(b), "was", n) } err = r.Close() if err != nil { t.Fatal(err) } } }
func TestFixedWriterLimit(t *testing.T) { idx := bytes.Buffer{} data := bytes.Buffer{} const totalinput = 10 << 20 const limit = 9 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+50-i)*size : (10+50-i)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, limit*size) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } removed := ((totalinput) - data.Len()) / size t.Log("Index size:", idx.Len()) t.Log("Data size:", data.Len()) t.Log("Removed", removed, "blocks") // We should get at least 50 blocks if removed > 10 { t.Fatal("it did not appear to respect the limit") } if removed < 8 { t.Fatal("removed too many blocks") } r, err := dedup.NewReader(&idx, &data) if err != nil { t.Fatal(err) } useBlocks := r.MaxMem() / size if useBlocks > 9 { t.Fatal("Uses too much memory, expected", limit, "got", useBlocks) } t.Log("Maximum estimated use:", r.MaxMem(), "bytes,", useBlocks, "blocks") r.Close() }
// This example will show how to write data to two files. // Running this example will deduplicate an empty byte slice // of 500000 bytes into an 'output.data' and 'output.idx' file. // // In the real world, you would likely want to add a bufio.NewWriter // to the output, but to keep it simple, we don't do that here. func ExampleNewWriter_file() { data, err := os.Create("output.data") if err != nil { panic(err) } // Close, print stats and remove it defer func() { data.Close() stat, _ := os.Stat("output.data") fmt.Println("Data size:", stat.Size()) os.Remove("output.data") }() idx, err := os.Create("output.idx") if err != nil { panic(err) } // Close, print stats and remove it defer func() { idx.Close() stat, _ := os.Stat("output.idx") fmt.Println("Index size:", stat.Size()) os.Remove("output.idx") }() // This is our input: input := bytes.NewBuffer(make([]byte, 500000)) // Create a new writer, with each block being 1000 bytes fixed size. w, err := dedup.NewWriter(idx, data, dedup.ModeFixed, 1000, 0) if err != nil { panic(err) } defer w.Close() // Copy our input to the writer. io.Copy(w, input) // Print the number of blocks written fmt.Println("Blocks:", w.Blocks()) // OUTPUT: Blocks: 500 // Index size: 517 // Data size: 1000 }
func TestFixedWriter(t *testing.T) { idx := bytes.Buffer{} data := bytes.Buffer{} const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, size*10) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } removed := ((totalinput) - data.Len()) / size t.Log(dedup.BirthdayProblem(totalinput / size)) t.Log("Index size:", idx.Len()) t.Log("Data size:", data.Len()) t.Log("Removed", removed, "blocks") // We should get at least 50 blocks if removed < 50 { t.Fatal("didn't remove at least 50 blocks") } if removed > 60 { t.Fatal("removed unreasonable high amount of blocks") } }
func TestDynamicWriter(t *testing.T) { idx := bytes.Buffer{} data := bytes.Buffer{} const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewWriter(&idx, &data, dedup.ModeDynamic, size, 10*8*size) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } removed := ((totalinput) - data.Len()) / size t.Log("Dynamic Index size:", idx.Len()) t.Log("Dynamic Data size:", data.Len()) t.Log("Removed", removed, "blocks") // We don't know how many, but it should remove some blocks if removed < 40 { t.Fatal("didn't remove at least 40 blocks") } }
func TestReaderWriteTo(t *testing.T) { idx := bytes.Buffer{} data := bytes.Buffer{} const totalinput = 10<<20 + 65 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, 0) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } r, err := dedup.NewReader(&idx, &data) if err != nil { t.Fatal(err) } dst := &bytes.Buffer{} n, err := r.WriteTo(dst) if err != io.EOF && err != nil { t.Fatal(err) } if len(b) != int(n) { t.Errorf("Write count, expected n %d, got %d", len(b), n) } out := dst.Bytes() if len(b) != len(out) { t.Fatalf("Expected len %d, got %d", len(b), len(out)) } if len(b) != len(out) { t.Fatalf("Expected len %d, got %d", len(b), len(out)) } if bytes.Compare(b, out) != 0 { t.Fatal("Output mismatch") } err = r.Close() if err != nil { t.Fatal(err) } blocks := r.BlockSizes() for _, s := range blocks[:len(blocks)-1] { if s != size { t.Fatal("wrong size, expected", size, "got", s) } } }
func TestDynamicRoundtrip(t *testing.T) { idx := bytes.Buffer{} data := bytes.Buffer{} const totalinput = 10<<20 + 65 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewWriter(&idx, &data, dedup.ModeDynamic, size, 0) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } t.Log("Dynamic Index size:", idx.Len()) t.Log("Dynamic Data size:", data.Len()) r, err := dedup.NewReader(&idx, &data) if err != nil { t.Fatal(err) } t.Log("Maximum estimated memory:", r.MaxMem(), "bytes") blocks := r.BlockSizes() avg := 0 for _, v := range blocks { if v > size { t.Fatal("too big block returned, should not be >", size, "was", v) } avg += v } t.Log("Average block size:", avg/len(blocks), "bytes") out, err := ioutil.ReadAll(r) if err != io.EOF && err != nil { t.Fatal(err) } if len(b) != len(out) { t.Fatalf("Expected len %d, got %d", len(b), len(out)) } if bytes.Compare(b, out) != 0 { t.Fatal("Output mismatch") } err = r.Close() if err != nil { t.Fatal(err) } }