// This will deduplicate a buffer of zeros to an non-indexed stream // written to a file. // It is not recommended to use a single stream when you are writing to // a stream. func ExampleNewStreamWriter_file() { // We will write to this data, err := os.Create("outputstream.data") if err != nil { panic(err) } // Close, print stats and remove it defer func() { data.Close() stat, _ := os.Stat("outputstream.data") fmt.Println("Stream size:", stat.Size()) os.Remove("outputstream.data") }() // This is our input: input := bytes.NewBuffer(make([]byte, 500000)) // Create a new writer, with each block being 1000 bytes, // And allow it to use 10000 bytes of memory w, err := dedup.NewStreamWriter(data, dedup.ModeFixed, 1000, 10000) if err != nil { panic(err) } defer w.Close() // Copy our input to the writer. io.Copy(w, input) // Print the number of blocks written fmt.Println("Blocks:", w.Blocks()) // OUTPUT: Blocks: 500 // Stream size: 1518 }
func BenchmarkFixedStreamWriter4K(t *testing.B) { const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 4 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 500; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } t.ResetTimer() t.SetBytes(totalinput) for i := 0; i < t.N; i++ { input = bytes.NewBuffer(b) w, _ := dedup.NewStreamWriter(ioutil.Discard, dedup.ModeFixed, size, 10*size) io.Copy(w, input) err := w.Close() if err != nil { t.Fatal(err) } } }
// This will deduplicate a buffer of zeros to an non-indexed stream func ExampleNewStreamWriter() { // We will write to this data := bytes.Buffer{} // This is our input: input := bytes.NewBuffer(make([]byte, 50000)) // Create a new writer, with each block being 1000 bytes, // And allow it to use 10000 bytes of memory w, err := dedup.NewStreamWriter(&data, dedup.ModeFixed, 1000, 10000) if err != nil { panic(err) } // Copy our input to the writer. io.Copy(w, input) // Close the writer err = w.Close() if err != nil { panic(err) } // Let us inspect what was written: fmt.Println("Blocks:", w.Blocks()) fmt.Println("Data size:", data.Len()) // OUTPUT: Blocks: 50 // Data size: 1068 }
func TestDynamicStreamWriter(t *testing.T) { data := bytes.Buffer{} const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewStreamWriter(&data, dedup.ModeDynamic, size, 10*8*size) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } removed := ((totalinput) - data.Len()) / size t.Log("Dynamic Data size:", data.Len()) t.Log("Removed", removed, "blocks") // We don't know how many, but it should remove some blocks if removed < 40 { t.Fatal("didn't remove at least 40 blocks") } }
// This will deduplicate a buffer of zeros to an indexed stream func ExampleNewStreamReader() { // Create data we can read. var data bytes.Buffer input := bytes.NewBuffer(make([]byte, 50000)) // Set the memory limit to 10000 bytes w, _ := dedup.NewStreamWriter(&data, dedup.ModeFixed, 1000, 10000) _, _ = io.Copy(w, input) _ = w.Close() // Create a new stream reader: r, err := dedup.NewStreamReader(&data) if err != nil { panic(err) } // Inspect how much memory it will use. // Since this is a stream, it will print the worst possible scenario fmt.Println("Memory use:", r.MaxMem()) var dst bytes.Buffer // Read everything _, err = io.Copy(&dst, r) if err != nil && err != io.EOF { panic(err) } // Let us inspect what was written: fmt.Println("Returned data length:", dst.Len()) fmt.Println("Everything zero:", 0 == bytes.Compare(dst.Bytes(), make([]byte, 50000))) // OUTPUT: Memory use: 10000 // Returned data length: 50000 // Everything zero: true }
func TestReaderStream(t *testing.T) { data := bytes.Buffer{} const totalinput = 10<<20 + 65 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewStreamWriter(&data, dedup.ModeFixed, size, 10*size) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } t.Log("Fixed Data size:", data.Len(), "-", data.Len()*100/totalinput, "%") r, err := dedup.NewStreamReader(&data) if err != nil { t.Fatal(err) } t.Log("Maximum estimated memory:", r.MaxMem(), "bytes") out, err := ioutil.ReadAll(r) if err != io.EOF && err != nil { t.Fatal(err) } if len(b) != len(out) { t.Fatalf("Expected len %d, got %d", len(b), len(out)) } if bytes.Compare(b, out) != 0 { t.Fatal("Output mismatch") } err = r.Close() if err != nil { t.Fatal(err) } }
// Stream, 1K blocks on 10MB data. func BenchmarkReaderStream1K(t *testing.B) { data := &bytes.Buffer{} const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 1 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 500; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewStreamWriter(data, dedup.ModeFixed, size, 100*size) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } alldata := data.Bytes() t.ResetTimer() t.SetBytes(totalinput) for i := 0; i < t.N; i++ { input := bytes.NewBuffer(alldata) r, err := dedup.NewStreamReader(input) if err != nil { t.Fatal(err) } n, err := io.Copy(ioutil.Discard, r) if err != io.EOF && err != nil { t.Fatal(err) } if len(b) != int(n) { t.Fatalf("Expected len %d, got %d", len(b), n) } err = r.Close() if err != nil { t.Fatal(err) } } }
func TestFixedStreamWriter(t *testing.T) { data := bytes.Buffer{} const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewStreamWriter(&data, dedup.ModeFixed, size, 10*size) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } removed := ((totalinput) - data.Len()) / size t.Log("Data size:", data.Len()) t.Log("Removed", removed, "blocks") // We should get at least 50 blocks, but there is a little overhead if removed < 49 { t.Fatal("didn't remove at least 49 blocks") } if removed > 60 { t.Fatal("removed unreasonable high amount of blocks") } }