// This will deduplicate a buffer of zeros to an indexed stream func ExampleNewStreamReader() { // Create data we can read. var data bytes.Buffer input := bytes.NewBuffer(make([]byte, 50000)) // Set the memory limit to 10000 bytes w, _ := dedup.NewStreamWriter(&data, dedup.ModeFixed, 1000, 10000) _, _ = io.Copy(w, input) _ = w.Close() // Create a new stream reader: r, err := dedup.NewStreamReader(&data) if err != nil { panic(err) } // Inspect how much memory it will use. // Since this is a stream, it will print the worst possible scenario fmt.Println("Memory use:", r.MaxMem()) var dst bytes.Buffer // Read everything _, err = io.Copy(&dst, r) if err != nil && err != io.EOF { panic(err) } // Let us inspect what was written: fmt.Println("Returned data length:", dst.Len()) fmt.Println("Everything zero:", 0 == bytes.Compare(dst.Bytes(), make([]byte, 50000))) // OUTPUT: Memory use: 10000 // Returned data length: 50000 // Everything zero: true }
func TestReaderStream(t *testing.T) { data := bytes.Buffer{} const totalinput = 10<<20 + 65 input := getBufferSize(totalinput) const size = 64 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 50; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewStreamWriter(&data, dedup.ModeFixed, size, 10*size) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } t.Log("Fixed Data size:", data.Len(), "-", data.Len()*100/totalinput, "%") r, err := dedup.NewStreamReader(&data) if err != nil { t.Fatal(err) } t.Log("Maximum estimated memory:", r.MaxMem(), "bytes") out, err := ioutil.ReadAll(r) if err != io.EOF && err != nil { t.Fatal(err) } if len(b) != len(out) { t.Fatalf("Expected len %d, got %d", len(b), len(out)) } if bytes.Compare(b, out) != 0 { t.Fatal("Output mismatch") } err = r.Close() if err != nil { t.Fatal(err) } }
// Stream, 1K blocks on 10MB data. func BenchmarkReaderStream1K(t *testing.B) { data := &bytes.Buffer{} const totalinput = 10 << 20 input := getBufferSize(totalinput) const size = 1 << 10 b := input.Bytes() // Create some duplicates for i := 0; i < 500; i++ { // Read from 10 first blocks src := b[(i%10)*size : (i%10)*size+size] // Write into the following ones dst := b[(10+i)*size : (i+10)*size+size] copy(dst, src) } input = bytes.NewBuffer(b) w, err := dedup.NewStreamWriter(data, dedup.ModeFixed, size, 100*size) if err != nil { t.Fatal(err) } io.Copy(w, input) err = w.Close() if err != nil { t.Fatal(err) } alldata := data.Bytes() t.ResetTimer() t.SetBytes(totalinput) for i := 0; i < t.N; i++ { input := bytes.NewBuffer(alldata) r, err := dedup.NewStreamReader(input) if err != nil { t.Fatal(err) } n, err := io.Copy(ioutil.Discard, r) if err != io.EOF && err != nil { t.Fatal(err) } if len(b) != int(n) { t.Fatalf("Expected len %d, got %d", len(b), n) } err = r.Close() if err != nil { t.Fatal(err) } } }