// Test erasureReadFile with random offset and lengths.
// This test is t.Skip()ed as it a long time to run, hence should be run
// explicitly after commenting out t.Skip()
func TestErasureReadFileRandomOffsetLength(t *testing.T) {
	// Comment the following line to run this test.
	t.SkipNow()
	// Initialize environment needed for the test.
	dataBlocks := 7
	parityBlocks := 7
	blockSize := int64(1 * 1024 * 1024)
	setup, err := newErasureTestSetup(dataBlocks, parityBlocks, blockSize)
	if err != nil {
		t.Error(err)
		return
	}
	defer setup.Remove()

	disks := setup.disks

	// Prepare a slice of 5MB with random data.
	data := make([]byte, 5*1024*1024)
	length := int64(len(data))
	_, err = rand.Read(data)
	if err != nil {
		t.Fatal(err)
	}

	// 10000 iterations with random offsets and lengths.
	iterations := 10000

	// Create a test file to read from.
	size, checkSums, err := erasureCreateFile(disks, "testbucket", "testobject", bytes.NewReader(data), blockSize, dataBlocks, parityBlocks, bitRotAlgo, dataBlocks+1)
	if err != nil {
		t.Fatal(err)
	}
	if size != length {
		t.Errorf("erasureCreateFile returned %d, expected %d", size, length)
	}

	// To generate random offset/length.
	r := rand.New(rand.NewSource(time.Now().UnixNano()))

	// create pool buffer which will be used by erasureReadFile for
	// reading from disks and erasure decoding.
	chunkSize := getChunkSize(blockSize, dataBlocks)
	pool := bpool.NewBytePool(chunkSize, len(disks))

	buf := &bytes.Buffer{}

	// Verify erasureReadFile() for random offsets and lengths.
	for i := 0; i < iterations; i++ {
		offset := r.Int63n(length)
		readLen := r.Int63n(length - offset)

		expected := data[offset : offset+readLen]

		_, err = erasureReadFile(buf, disks, "testbucket", "testobject", offset, readLen, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool)
		if err != nil {
			t.Fatal(err, offset, readLen)
		}
		got := buf.Bytes()
		if !bytes.Equal(expected, got) {
			t.Fatalf("read data is different from what was expected, offset=%d length=%d", offset, readLen)
		}
		buf.Reset()
	}
}
func TestErasureReadFileDiskFail(t *testing.T) {
	// Initialize environment needed for the test.
	dataBlocks := 7
	parityBlocks := 7
	blockSize := int64(blockSizeV1)
	setup, err := newErasureTestSetup(dataBlocks, parityBlocks, blockSize)
	if err != nil {
		t.Error(err)
		return
	}
	defer setup.Remove()

	disks := setup.disks

	// Prepare a slice of 1MB with random data.
	data := make([]byte, 1*1024*1024)
	length := int64(len(data))
	_, err = rand.Read(data)
	if err != nil {
		t.Fatal(err)
	}

	// Create a test file to read from.
	size, checkSums, err := erasureCreateFile(disks, "testbucket", "testobject", bytes.NewReader(data), blockSize, dataBlocks, parityBlocks, bitRotAlgo, dataBlocks+1)
	if err != nil {
		t.Fatal(err)
	}
	if size != length {
		t.Errorf("erasureCreateFile returned %d, expected %d", size, length)
	}

	// create byte pool which will be used by erasureReadFile for
	// reading from disks and erasure decoding.
	chunkSize := getChunkSize(blockSize, dataBlocks)
	pool := bpool.NewBytePool(chunkSize, len(disks))

	buf := &bytes.Buffer{}
	_, err = erasureReadFile(buf, disks, "testbucket", "testobject", 0, length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool)
	if err != nil {
		t.Error(err)
	}
	if !bytes.Equal(buf.Bytes(), data) {
		t.Error("Contents of the erasure coded file differs")
	}

	// 2 disks down. Read should succeed.
	disks[4] = ReadDiskDown{disks[4].(*posix)}
	disks[5] = ReadDiskDown{disks[5].(*posix)}

	buf.Reset()
	_, err = erasureReadFile(buf, disks, "testbucket", "testobject", 0, length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool)
	if err != nil {
		t.Error(err)
	}
	if !bytes.Equal(buf.Bytes(), data) {
		t.Error("Contents of the erasure coded file differs")
	}

	// 4 more disks down. 6 disks down in total. Read should succeed.
	disks[6] = ReadDiskDown{disks[6].(*posix)}
	disks[8] = ReadDiskDown{disks[8].(*posix)}
	disks[9] = ReadDiskDown{disks[9].(*posix)}
	disks[11] = ReadDiskDown{disks[11].(*posix)}

	buf.Reset()
	_, err = erasureReadFile(buf, disks, "testbucket", "testobject", 0, length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool)
	if err != nil {
		t.Error(err)
	}
	if !bytes.Equal(buf.Bytes(), data) {
		t.Error("Contents of the erasure coded file differs")
	}

	// 2 more disk down. 8 disks down in total. Read should fail.
	disks[12] = ReadDiskDown{disks[12].(*posix)}
	disks[13] = ReadDiskDown{disks[13].(*posix)}
	buf.Reset()
	_, err = erasureReadFile(buf, disks, "testbucket", "testobject", 0, length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool)
	if errorCause(err) != errXLReadQuorum {
		t.Fatal("expected errXLReadQuorum error")
	}
}
func TestErasureReadFileOffsetLength(t *testing.T) {
	// Initialize environment needed for the test.
	dataBlocks := 7
	parityBlocks := 7
	blockSize := int64(1 * 1024 * 1024)
	setup, err := newErasureTestSetup(dataBlocks, parityBlocks, blockSize)
	if err != nil {
		t.Error(err)
		return
	}
	defer setup.Remove()

	disks := setup.disks

	// Prepare a slice of 5MB with random data.
	data := make([]byte, 5*1024*1024)
	length := int64(len(data))
	_, err = rand.Read(data)
	if err != nil {
		t.Fatal(err)
	}

	// Create a test file to read from.
	size, checkSums, err := erasureCreateFile(disks, "testbucket", "testobject", bytes.NewReader(data), blockSize, dataBlocks, parityBlocks, bitRotAlgo, dataBlocks+1)
	if err != nil {
		t.Fatal(err)
	}
	if size != length {
		t.Errorf("erasureCreateFile returned %d, expected %d", size, length)
	}

	testCases := []struct {
		offset, length int64
	}{
		// Full file.
		{0, length},
		// Read nothing.
		{length, 0},
		// 2nd block.
		{blockSize, blockSize},
		// Test cases for random offsets and lengths.
		{blockSize - 1, 2},
		{blockSize - 1, blockSize + 1},
		{blockSize + 1, blockSize - 1},
		{blockSize + 1, blockSize},
		{blockSize + 1, blockSize + 1},
		{blockSize*2 - 1, blockSize + 1},
		{length - 1, 1},
		{length - blockSize, blockSize},
		{length - blockSize - 1, blockSize},
		{length - blockSize - 1, blockSize + 1},
	}
	chunkSize := getChunkSize(blockSize, dataBlocks)
	pool := bpool.NewBytePool(chunkSize, len(disks))

	// Compare the data read from file with "data" byte array.
	for i, testCase := range testCases {
		expected := data[testCase.offset:(testCase.offset + testCase.length)]
		buf := &bytes.Buffer{}
		_, err = erasureReadFile(buf, disks, "testbucket", "testobject", testCase.offset, testCase.length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool)
		if err != nil {
			t.Error(err)
			continue
		}
		got := buf.Bytes()
		if !bytes.Equal(expected, got) {
			t.Errorf("Test %d : read data is different from what was expected", i+1)
		}
	}
}
示例#4
0
// GetObject - reads an object erasured coded across multiple
// disks. Supports additional parameters like offset and length
// which is synonymous with HTTP Range requests.
//
// startOffset indicates the location at which the client requested
// object to be read at. length indicates the total length of the
// object requested by client.
func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length int64, writer io.Writer) error {
	if err := checkGetObjArgs(bucket, object); err != nil {
		return err
	}

	// Start offset cannot be negative.
	if startOffset < 0 {
		return traceError(errUnexpected)
	}

	// Writer cannot be nil.
	if writer == nil {
		return traceError(errUnexpected)
	}

	// Read metadata associated with the object from all disks.
	metaArr, errs := readAllXLMetadata(xl.storageDisks, bucket, object)
	// Do we have read quorum?
	if !isDiskQuorum(errs, xl.readQuorum) {
		return traceError(InsufficientReadQuorum{}, errs...)
	}

	if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, xl.readQuorum); reducedErr != nil {
		return toObjectErr(reducedErr, bucket, object)
	}

	// List all online disks.
	onlineDisks, modTime := listOnlineDisks(xl.storageDisks, metaArr, errs)

	// Pick latest valid metadata.
	xlMeta, err := pickValidXLMeta(metaArr, modTime)
	if err != nil {
		return err
	}

	// Reorder online disks based on erasure distribution order.
	onlineDisks = getOrderedDisks(xlMeta.Erasure.Distribution, onlineDisks)

	// Reorder parts metadata based on erasure distribution order.
	metaArr = getOrderedPartsMetadata(xlMeta.Erasure.Distribution, metaArr)

	// For negative length read everything.
	if length < 0 {
		length = xlMeta.Stat.Size - startOffset
	}

	// Reply back invalid range if the input offset and length fall out of range.
	if startOffset > xlMeta.Stat.Size || startOffset+length > xlMeta.Stat.Size {
		return traceError(InvalidRange{startOffset, length, xlMeta.Stat.Size})
	}

	// Get start part index and offset.
	partIndex, partOffset, err := xlMeta.ObjectToPartOffset(startOffset)
	if err != nil {
		return traceError(InvalidRange{startOffset, length, xlMeta.Stat.Size})
	}

	// Get last part index to read given length.
	lastPartIndex, _, err := xlMeta.ObjectToPartOffset(startOffset + length - 1)
	if err != nil {
		return traceError(InvalidRange{startOffset, length, xlMeta.Stat.Size})
	}

	// Save the writer.
	mw := writer

	// Object cache enabled block.
	if xlMeta.Stat.Size > 0 && xl.objCacheEnabled {
		// Validate if we have previous cache.
		var cachedBuffer io.ReadSeeker
		cachedBuffer, err = xl.objCache.Open(path.Join(bucket, object), modTime)
		if err == nil { // Cache hit.
			// Advance the buffer to offset as if it was read.
			if _, err = cachedBuffer.Seek(startOffset, 0); err != nil { // Seek to the offset.
				return traceError(err)
			}
			// Write the requested length.
			if _, err = io.CopyN(writer, cachedBuffer, length); err != nil {
				return traceError(err)
			}
			return nil
		} // Cache miss.
		// For unknown error, return and error out.
		if err != objcache.ErrKeyNotFoundInCache {
			return traceError(err)
		} // Cache has not been found, fill the cache.

		// Cache is only set if whole object is being read.
		if startOffset == 0 && length == xlMeta.Stat.Size {
			// Proceed to set the cache.
			var newBuffer io.WriteCloser
			// Create a new entry in memory of length.
			newBuffer, err = xl.objCache.Create(path.Join(bucket, object), length)
			if err == nil {
				// Create a multi writer to write to both memory and client response.
				mw = io.MultiWriter(newBuffer, writer)
				defer newBuffer.Close()
			}
			// Ignore error if cache is full, proceed to write the object.
			if err != nil && err != objcache.ErrCacheFull {
				// For any other error return here.
				return toObjectErr(traceError(err), bucket, object)
			}
		}
	}

	totalBytesRead := int64(0)

	chunkSize := getChunkSize(xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks)
	pool := bpool.NewBytePool(chunkSize, len(onlineDisks))

	// Read from all parts.
	for ; partIndex <= lastPartIndex; partIndex++ {
		if length == totalBytesRead {
			break
		}
		// Save the current part name and size.
		partName := xlMeta.Parts[partIndex].Name
		partSize := xlMeta.Parts[partIndex].Size

		readSize := partSize - partOffset
		// readSize should be adjusted so that we don't write more data than what was requested.
		if readSize > (length - totalBytesRead) {
			readSize = length - totalBytesRead
		}

		// Get the checksums of the current part.
		checkSums := make([]string, len(onlineDisks))
		var ckSumAlgo string
		for index, disk := range onlineDisks {
			// Disk is not found skip the checksum.
			if disk == nil {
				checkSums[index] = ""
				continue
			}
			ckSumInfo := metaArr[index].Erasure.GetCheckSumInfo(partName)
			checkSums[index] = ckSumInfo.Hash
			// Set checksum algo only once, while it is possible to have
			// different algos per block because of our `xl.json`.
			// It is not a requirement, set this only once for all the disks.
			if ckSumAlgo != "" {
				ckSumAlgo = ckSumInfo.Algorithm
			}
		}

		// Start erasure decoding and writing to the client.
		n, err := erasureReadFile(mw, onlineDisks, bucket, pathJoin(object, partName), partOffset, readSize, partSize, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks, xlMeta.Erasure.ParityBlocks, checkSums, ckSumAlgo, pool)
		if err != nil {
			errorIf(err, "Unable to read %s of the object `%s/%s`.", partName, bucket, object)
			return toObjectErr(err, bucket, object)
		}

		// Track total bytes read from disk and written to the client.
		totalBytesRead += n

		// partOffset will be valid only for the first part, hence reset it to 0 for
		// the remaining parts.
		partOffset = 0
	} // End of read all parts loop.

	// Return success.
	return nil
}