Example #1
0
// This will deduplicate a buffer of zeros to an non-indexed stream
// written to a file.
// It is not recommended to use a single stream when you are writing to
// a stream.
func ExampleNewStreamWriter_file() {
	// We will write to this
	data, err := os.Create("outputstream.data")
	if err != nil {
		panic(err)
	}
	// Close, print stats and remove it
	defer func() {
		data.Close()
		stat, _ := os.Stat("outputstream.data")
		fmt.Println("Stream size:", stat.Size())
		os.Remove("outputstream.data")
	}()

	// This is our input:
	input := bytes.NewBuffer(make([]byte, 500000))

	// Create a new writer, with each block being 1000 bytes,
	// And allow it to use 10000 bytes of memory
	w, err := dedup.NewStreamWriter(data, dedup.ModeFixed, 1000, 10000)
	if err != nil {
		panic(err)
	}
	defer w.Close()

	// Copy our input to the writer.
	io.Copy(w, input)

	// Print the number of blocks written
	fmt.Println("Blocks:", w.Blocks())

	// OUTPUT: Blocks: 500
	// Stream size: 1518
}
Example #2
0
func BenchmarkFixedStreamWriter4K(t *testing.B) {
	const totalinput = 10 << 20
	input := getBufferSize(totalinput)

	const size = 4 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 500; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	t.ResetTimer()
	t.SetBytes(totalinput)
	for i := 0; i < t.N; i++ {
		input = bytes.NewBuffer(b)
		w, _ := dedup.NewStreamWriter(ioutil.Discard, dedup.ModeFixed, size, 10*size)
		io.Copy(w, input)
		err := w.Close()
		if err != nil {
			t.Fatal(err)
		}
	}
}
Example #3
0
// This will deduplicate a buffer of zeros to an non-indexed stream
func ExampleNewStreamWriter() {
	// We will write to this
	data := bytes.Buffer{}

	// This is our input:
	input := bytes.NewBuffer(make([]byte, 50000))

	// Create a new writer, with each block being 1000 bytes,
	// And allow it to use 10000 bytes of memory
	w, err := dedup.NewStreamWriter(&data, dedup.ModeFixed, 1000, 10000)
	if err != nil {
		panic(err)
	}
	// Copy our input to the writer.
	io.Copy(w, input)

	// Close the writer
	err = w.Close()
	if err != nil {
		panic(err)
	}

	// Let us inspect what was written:
	fmt.Println("Blocks:", w.Blocks())
	fmt.Println("Data size:", data.Len())

	// OUTPUT: Blocks: 50
	// Data size: 1068
}
Example #4
0
func TestDynamicStreamWriter(t *testing.T) {
	data := bytes.Buffer{}

	const totalinput = 10 << 20
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewStreamWriter(&data, dedup.ModeDynamic, size, 10*8*size)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}
	removed := ((totalinput) - data.Len()) / size

	t.Log("Dynamic Data size:", data.Len())
	t.Log("Removed", removed, "blocks")
	// We don't know how many, but it should remove some blocks
	if removed < 40 {
		t.Fatal("didn't remove at least 40 blocks")
	}
}
Example #5
0
// This will deduplicate a buffer of zeros to an indexed stream
func ExampleNewStreamReader() {
	// Create data we can read.
	var data bytes.Buffer
	input := bytes.NewBuffer(make([]byte, 50000))
	// Set the memory limit to 10000 bytes
	w, _ := dedup.NewStreamWriter(&data, dedup.ModeFixed, 1000, 10000)
	_, _ = io.Copy(w, input)
	_ = w.Close()

	// Create a new stream reader:
	r, err := dedup.NewStreamReader(&data)
	if err != nil {
		panic(err)
	}

	// Inspect how much memory it will use.
	// Since this is a stream, it will print the worst possible scenario
	fmt.Println("Memory use:", r.MaxMem())

	var dst bytes.Buffer

	// Read everything
	_, err = io.Copy(&dst, r)
	if err != nil && err != io.EOF {
		panic(err)
	}

	// Let us inspect what was written:
	fmt.Println("Returned data length:", dst.Len())
	fmt.Println("Everything zero:", 0 == bytes.Compare(dst.Bytes(), make([]byte, 50000)))

	// OUTPUT: Memory use: 10000
	// Returned data length: 50000
	// Everything zero: true
}
Example #6
0
func TestReaderStream(t *testing.T) {
	data := bytes.Buffer{}

	const totalinput = 10<<20 + 65
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewStreamWriter(&data, dedup.ModeFixed, size, 10*size)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	t.Log("Fixed Data size:", data.Len(), "-", data.Len()*100/totalinput, "%")

	r, err := dedup.NewStreamReader(&data)
	if err != nil {
		t.Fatal(err)
	}

	t.Log("Maximum estimated memory:", r.MaxMem(), "bytes")

	out, err := ioutil.ReadAll(r)
	if err != io.EOF && err != nil {
		t.Fatal(err)
	}
	if len(b) != len(out) {
		t.Fatalf("Expected len %d, got %d", len(b), len(out))
	}
	if bytes.Compare(b, out) != 0 {
		t.Fatal("Output mismatch")
	}
	err = r.Close()
	if err != nil {
		t.Fatal(err)
	}
}
Example #7
0
// Stream, 1K blocks on 10MB data.
func BenchmarkReaderStream1K(t *testing.B) {
	data := &bytes.Buffer{}

	const totalinput = 10 << 20
	input := getBufferSize(totalinput)

	const size = 1 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 500; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewStreamWriter(data, dedup.ModeFixed, size, 100*size)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	alldata := data.Bytes()

	t.ResetTimer()
	t.SetBytes(totalinput)
	for i := 0; i < t.N; i++ {
		input := bytes.NewBuffer(alldata)
		r, err := dedup.NewStreamReader(input)
		if err != nil {
			t.Fatal(err)
		}

		n, err := io.Copy(ioutil.Discard, r)
		if err != io.EOF && err != nil {
			t.Fatal(err)
		}
		if len(b) != int(n) {
			t.Fatalf("Expected len %d, got %d", len(b), n)
		}
		err = r.Close()
		if err != nil {
			t.Fatal(err)
		}
	}
}
Example #8
0
func TestFixedStreamWriter(t *testing.T) {
	data := bytes.Buffer{}

	const totalinput = 10 << 20
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewStreamWriter(&data, dedup.ModeFixed, size, 10*size)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}
	removed := ((totalinput) - data.Len()) / size

	t.Log("Data size:", data.Len())
	t.Log("Removed", removed, "blocks")
	// We should get at least 50 blocks, but there is a little overhead
	if removed < 49 {
		t.Fatal("didn't remove at least 49 blocks")
	}
	if removed > 60 {
		t.Fatal("removed unreasonable high amount of blocks")
	}
}