Example #1
0
// This doesn't actually test anything, but prints probabilities to log
func TestBirthdayProblem(t *testing.T) {
	t.Log("Hash size is", dedup.HashSize*8, "bits")
	t.Log("1GiB, 1KiB blocks:")
	t.Log(dedup.BirthdayProblem((1 << 30) / (1 << 10)))
	w, _ := dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 1<<10, 0)
	e, _ := w.MemUse(1 << 30)
	t.Logf("It will use %d MiB for encoder.", e>>20)

	t.Log("1TiB, 4KiB blocks:")
	t.Log(dedup.BirthdayProblem((1 << 40) / (4 << 10)))
	w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 4<<10, 0)
	e, _ = w.MemUse(1 << 40)
	t.Logf("It will use %d MiB for encoder.", e>>20)

	t.Log("1PiB, 4KiB blocks:")
	t.Log(dedup.BirthdayProblem((1 << 50) / (4 << 10)))
	e, _ = w.MemUse(1 << 50)
	t.Logf("It will use %d MiB for encoder.", e>>20)

	t.Log("1EiB, 64KiB blocks:")
	t.Log(dedup.BirthdayProblem((1 << 60) / (64 << 10)))
	w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 64<<10, 0)
	e, _ = w.MemUse(1 << 60)
	t.Logf("It will use %d MiB for encoder.", e>>20)

	t.Log("1EiB, 1KiB blocks:")
	t.Log(dedup.BirthdayProblem((1 << 60) / (1 << 10)))
	w, _ = dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeFixed, 1<<10, 0)
	e, _ = w.MemUse(1 << 60)
	t.Logf("It will use %d MiB for encoder.", e>>20)
}
Example #2
0
// This will deduplicate a buffer of zeros to an indexed stream
func ExampleNewWriter() {
	// We will write to these
	idx := bytes.Buffer{}
	data := bytes.Buffer{}

	// This is our input:
	input := bytes.NewBuffer(make([]byte, 50000))

	// Create a new writer, with each block being 1000 bytes
	w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, 1000, 0)
	if err != nil {
		panic(err)
	}

	// Copy our input to the writer.
	io.Copy(w, input)

	// Close the writer
	err = w.Close()
	if err != nil {
		panic(err)
	}

	// Let us inspect what was written:
	fmt.Println("Blocks:", w.Blocks())
	fmt.Println("Index size:", idx.Len())
	fmt.Println("Data size:", data.Len())

	// OUTPUT: Blocks: 50
	// Index size: 67
	// Data size: 1000
}
Example #3
0
// Maximum block size:4k
func BenchmarkDynamicWriter4K(t *testing.B) {
	const totalinput = 10 << 20
	input := getBufferSize(totalinput)

	const size = 4 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	t.ResetTimer()
	t.SetBytes(totalinput)
	for i := 0; i < t.N; i++ {
		input = bytes.NewBuffer(b)
		w, _ := dedup.NewWriter(ioutil.Discard, ioutil.Discard, dedup.ModeDynamic, size, 0)
		io.Copy(w, input)
		err := w.Close()
		if err != nil {
			t.Fatal(err)
		}
	}
}
Example #4
0
// This will deduplicate a buffer of zeros to an indexed stream
func ExampleNewReader() {
	// Create data we can read.
	var idx, data bytes.Buffer
	input := bytes.NewBuffer(make([]byte, 50000))
	w, _ := dedup.NewWriter(&idx, &data, dedup.ModeFixed, 1000, 0)
	_, _ = io.Copy(w, input)
	_ = w.Close()

	// Create a new reader.
	r, err := dedup.NewReader(&idx, &data)
	if err != nil {
		panic(err)
	}

	// Inspect how much memory it will use.
	fmt.Println("Memory use:", r.MaxMem())

	var dst bytes.Buffer

	// Read everything
	_, err = io.Copy(&dst, r)
	if err != nil && err != io.EOF {
		panic(err)
	}

	// Let us inspect what was written:
	fmt.Println("Returned data length:", dst.Len())
	fmt.Println("Everything zero:", 0 == bytes.Compare(dst.Bytes(), make([]byte, 50000)))

	// OUTPUT: Memory use: 1000
	// Returned data length: 50000
	// Everything zero: true
}
Example #5
0
func TestReader(t *testing.T) {
	idx := bytes.Buffer{}
	data := bytes.Buffer{}

	const totalinput = 10<<20 + 65
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, 0)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	t.Log("Fixed Index size:", idx.Len())
	t.Log("Fixed Data size:", data.Len(), "-", data.Len()*100/totalinput, "%")

	r, err := dedup.NewReader(&idx, &data)
	if err != nil {
		t.Fatal(err)
	}

	t.Log("Maximum estimated memory:", r.MaxMem(), "bytes")

	out, err := ioutil.ReadAll(r)
	if err != io.EOF && err != nil {
		t.Fatal(err)
	}
	if len(b) != len(out) {
		t.Fatalf("Expected len %d, got %d", len(b), len(out))
	}
	if bytes.Compare(b, out) != 0 {
		t.Fatal("Output mismatch")
	}
	err = r.Close()
	if err != nil {
		t.Fatal(err)
	}
	blocks := r.BlockSizes()
	for _, s := range blocks[:len(blocks)-1] {
		if s != size {
			t.Fatal("wrong size, expected", size, "got", s)
		}
	}
}
Example #6
0
// Indexed stream, 10MB input, 1K blocks
func BenchmarkReader1K(t *testing.B) {
	idx := &bytes.Buffer{}
	data := &bytes.Buffer{}

	const totalinput = 10 << 20
	input := getBufferSize(totalinput)

	const size = 1 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 500; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewWriter(idx, data, dedup.ModeFixed, size, 0)
	if err != nil {
		t.Fatal(err)
	}
	_, err = io.Copy(w, input)
	if err != nil {
		t.Fatal(err)
	}
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	index := idx.Bytes()
	alldata := data.Bytes()

	t.ResetTimer()
	t.SetBytes(totalinput)
	for i := 0; i < t.N; i++ {
		idx := bytes.NewBuffer(index)
		data := bytes.NewBuffer(alldata)
		r, err := dedup.NewReader(idx, data)
		if err != nil {
			t.Fatal(err)
		}
		n, err := io.Copy(ioutil.Discard, r)
		if err != nil && err != io.EOF {
			t.Fatal(err)
		}
		if n != int64(len(b)) {
			t.Fatal("read was short, expected", len(b), "was", n)
		}
		err = r.Close()
		if err != nil {
			t.Fatal(err)
		}
	}
}
Example #7
0
func TestFixedWriterLimit(t *testing.T) {
	idx := bytes.Buffer{}
	data := bytes.Buffer{}

	const totalinput = 10 << 20
	const limit = 9
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+50-i)*size : (10+50-i)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, limit*size)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}
	removed := ((totalinput) - data.Len()) / size

	t.Log("Index size:", idx.Len())
	t.Log("Data size:", data.Len())
	t.Log("Removed", removed, "blocks")
	// We should get at least 50 blocks
	if removed > 10 {
		t.Fatal("it did not appear to respect the limit")
	}
	if removed < 8 {
		t.Fatal("removed too many blocks")
	}
	r, err := dedup.NewReader(&idx, &data)
	if err != nil {
		t.Fatal(err)
	}

	useBlocks := r.MaxMem() / size
	if useBlocks > 9 {
		t.Fatal("Uses too much memory, expected", limit, "got", useBlocks)
	}
	t.Log("Maximum estimated use:", r.MaxMem(), "bytes,", useBlocks, "blocks")
	r.Close()
}
Example #8
0
// This example will show how to write data to two files.
// Running this example will deduplicate an empty byte slice
// of 500000 bytes into an 'output.data' and 'output.idx' file.
//
// In the real world, you would likely want to add a bufio.NewWriter
// to the output, but to keep it simple, we don't do that here.
func ExampleNewWriter_file() {
	data, err := os.Create("output.data")
	if err != nil {
		panic(err)
	}
	// Close, print stats and remove it
	defer func() {
		data.Close()
		stat, _ := os.Stat("output.data")
		fmt.Println("Data size:", stat.Size())
		os.Remove("output.data")
	}()

	idx, err := os.Create("output.idx")
	if err != nil {
		panic(err)
	}
	// Close, print stats and remove it
	defer func() {
		idx.Close()
		stat, _ := os.Stat("output.idx")
		fmt.Println("Index size:", stat.Size())
		os.Remove("output.idx")
	}()

	// This is our input:
	input := bytes.NewBuffer(make([]byte, 500000))

	// Create a new writer, with each block being 1000 bytes fixed size.
	w, err := dedup.NewWriter(idx, data, dedup.ModeFixed, 1000, 0)
	if err != nil {
		panic(err)
	}
	defer w.Close()

	// Copy our input to the writer.
	io.Copy(w, input)

	// Print the number of blocks written
	fmt.Println("Blocks:", w.Blocks())

	// OUTPUT: Blocks: 500
	// Index size: 517
	// Data size: 1000
}
Example #9
0
func TestFixedWriter(t *testing.T) {
	idx := bytes.Buffer{}
	data := bytes.Buffer{}

	const totalinput = 10 << 20
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, size*10)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}
	removed := ((totalinput) - data.Len()) / size

	t.Log(dedup.BirthdayProblem(totalinput / size))
	t.Log("Index size:", idx.Len())
	t.Log("Data size:", data.Len())
	t.Log("Removed", removed, "blocks")
	// We should get at least 50 blocks
	if removed < 50 {
		t.Fatal("didn't remove at least 50 blocks")
	}
	if removed > 60 {
		t.Fatal("removed unreasonable high amount of blocks")
	}
}
Example #10
0
func TestDynamicWriter(t *testing.T) {
	idx := bytes.Buffer{}
	data := bytes.Buffer{}

	const totalinput = 10 << 20
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewWriter(&idx, &data, dedup.ModeDynamic, size, 10*8*size)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}
	removed := ((totalinput) - data.Len()) / size

	t.Log("Dynamic Index size:", idx.Len())
	t.Log("Dynamic Data size:", data.Len())
	t.Log("Removed", removed, "blocks")
	// We don't know how many, but it should remove some blocks
	if removed < 40 {
		t.Fatal("didn't remove at least 40 blocks")
	}
}
Example #11
0
func TestReaderWriteTo(t *testing.T) {
	idx := bytes.Buffer{}
	data := bytes.Buffer{}

	const totalinput = 10<<20 + 65
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewWriter(&idx, &data, dedup.ModeFixed, size, 0)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	r, err := dedup.NewReader(&idx, &data)
	if err != nil {
		t.Fatal(err)
	}

	dst := &bytes.Buffer{}
	n, err := r.WriteTo(dst)
	if err != io.EOF && err != nil {
		t.Fatal(err)
	}
	if len(b) != int(n) {
		t.Errorf("Write count, expected n %d, got %d", len(b), n)
	}

	out := dst.Bytes()
	if len(b) != len(out) {
		t.Fatalf("Expected len %d, got %d", len(b), len(out))
	}
	if len(b) != len(out) {
		t.Fatalf("Expected len %d, got %d", len(b), len(out))
	}
	if bytes.Compare(b, out) != 0 {
		t.Fatal("Output mismatch")
	}
	err = r.Close()
	if err != nil {
		t.Fatal(err)
	}
	blocks := r.BlockSizes()
	for _, s := range blocks[:len(blocks)-1] {
		if s != size {
			t.Fatal("wrong size, expected", size, "got", s)
		}
	}
}
Example #12
0
func TestDynamicRoundtrip(t *testing.T) {
	idx := bytes.Buffer{}
	data := bytes.Buffer{}

	const totalinput = 10<<20 + 65
	input := getBufferSize(totalinput)

	const size = 64 << 10
	b := input.Bytes()
	// Create some duplicates
	for i := 0; i < 50; i++ {
		// Read from 10 first blocks
		src := b[(i%10)*size : (i%10)*size+size]
		// Write into the following ones
		dst := b[(10+i)*size : (i+10)*size+size]
		copy(dst, src)
	}
	input = bytes.NewBuffer(b)
	w, err := dedup.NewWriter(&idx, &data, dedup.ModeDynamic, size, 0)
	if err != nil {
		t.Fatal(err)
	}
	io.Copy(w, input)
	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	t.Log("Dynamic Index size:", idx.Len())
	t.Log("Dynamic Data size:", data.Len())

	r, err := dedup.NewReader(&idx, &data)
	if err != nil {
		t.Fatal(err)
	}

	t.Log("Maximum estimated memory:", r.MaxMem(), "bytes")
	blocks := r.BlockSizes()
	avg := 0
	for _, v := range blocks {
		if v > size {
			t.Fatal("too big block returned, should not be >", size, "was", v)
		}
		avg += v
	}
	t.Log("Average block size:", avg/len(blocks), "bytes")

	out, err := ioutil.ReadAll(r)
	if err != io.EOF && err != nil {
		t.Fatal(err)
	}
	if len(b) != len(out) {
		t.Fatalf("Expected len %d, got %d", len(b), len(out))
	}
	if bytes.Compare(b, out) != 0 {
		t.Fatal("Output mismatch")
	}
	err = r.Close()
	if err != nil {
		t.Fatal(err)
	}
}