// Test erasureReadFile with random offset and lengths. // This test is t.Skip()ed as it a long time to run, hence should be run // explicitly after commenting out t.Skip() func TestErasureReadFileRandomOffsetLength(t *testing.T) { // Comment the following line to run this test. t.SkipNow() // Initialize environment needed for the test. dataBlocks := 7 parityBlocks := 7 blockSize := int64(1 * 1024 * 1024) setup, err := newErasureTestSetup(dataBlocks, parityBlocks, blockSize) if err != nil { t.Error(err) return } defer setup.Remove() disks := setup.disks // Prepare a slice of 5MB with random data. data := make([]byte, 5*1024*1024) length := int64(len(data)) _, err = rand.Read(data) if err != nil { t.Fatal(err) } // 10000 iterations with random offsets and lengths. iterations := 10000 // Create a test file to read from. size, checkSums, err := erasureCreateFile(disks, "testbucket", "testobject", bytes.NewReader(data), blockSize, dataBlocks, parityBlocks, bitRotAlgo, dataBlocks+1) if err != nil { t.Fatal(err) } if size != length { t.Errorf("erasureCreateFile returned %d, expected %d", size, length) } // To generate random offset/length. r := rand.New(rand.NewSource(time.Now().UnixNano())) // create pool buffer which will be used by erasureReadFile for // reading from disks and erasure decoding. chunkSize := getChunkSize(blockSize, dataBlocks) pool := bpool.NewBytePool(chunkSize, len(disks)) buf := &bytes.Buffer{} // Verify erasureReadFile() for random offsets and lengths. for i := 0; i < iterations; i++ { offset := r.Int63n(length) readLen := r.Int63n(length - offset) expected := data[offset : offset+readLen] _, err = erasureReadFile(buf, disks, "testbucket", "testobject", offset, readLen, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool) if err != nil { t.Fatal(err, offset, readLen) } got := buf.Bytes() if !bytes.Equal(expected, got) { t.Fatalf("read data is different from what was expected, offset=%d length=%d", offset, readLen) } buf.Reset() } }
func TestErasureReadFileDiskFail(t *testing.T) { // Initialize environment needed for the test. dataBlocks := 7 parityBlocks := 7 blockSize := int64(blockSizeV1) setup, err := newErasureTestSetup(dataBlocks, parityBlocks, blockSize) if err != nil { t.Error(err) return } defer setup.Remove() disks := setup.disks // Prepare a slice of 1MB with random data. data := make([]byte, 1*1024*1024) length := int64(len(data)) _, err = rand.Read(data) if err != nil { t.Fatal(err) } // Create a test file to read from. size, checkSums, err := erasureCreateFile(disks, "testbucket", "testobject", bytes.NewReader(data), blockSize, dataBlocks, parityBlocks, bitRotAlgo, dataBlocks+1) if err != nil { t.Fatal(err) } if size != length { t.Errorf("erasureCreateFile returned %d, expected %d", size, length) } // create byte pool which will be used by erasureReadFile for // reading from disks and erasure decoding. chunkSize := getChunkSize(blockSize, dataBlocks) pool := bpool.NewBytePool(chunkSize, len(disks)) buf := &bytes.Buffer{} _, err = erasureReadFile(buf, disks, "testbucket", "testobject", 0, length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool) if err != nil { t.Error(err) } if !bytes.Equal(buf.Bytes(), data) { t.Error("Contents of the erasure coded file differs") } // 2 disks down. Read should succeed. disks[4] = ReadDiskDown{disks[4].(*posix)} disks[5] = ReadDiskDown{disks[5].(*posix)} buf.Reset() _, err = erasureReadFile(buf, disks, "testbucket", "testobject", 0, length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool) if err != nil { t.Error(err) } if !bytes.Equal(buf.Bytes(), data) { t.Error("Contents of the erasure coded file differs") } // 4 more disks down. 6 disks down in total. Read should succeed. disks[6] = ReadDiskDown{disks[6].(*posix)} disks[8] = ReadDiskDown{disks[8].(*posix)} disks[9] = ReadDiskDown{disks[9].(*posix)} disks[11] = ReadDiskDown{disks[11].(*posix)} buf.Reset() _, err = erasureReadFile(buf, disks, "testbucket", "testobject", 0, length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool) if err != nil { t.Error(err) } if !bytes.Equal(buf.Bytes(), data) { t.Error("Contents of the erasure coded file differs") } // 2 more disk down. 8 disks down in total. Read should fail. disks[12] = ReadDiskDown{disks[12].(*posix)} disks[13] = ReadDiskDown{disks[13].(*posix)} buf.Reset() _, err = erasureReadFile(buf, disks, "testbucket", "testobject", 0, length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool) if errorCause(err) != errXLReadQuorum { t.Fatal("expected errXLReadQuorum error") } }
func TestErasureReadFileOffsetLength(t *testing.T) { // Initialize environment needed for the test. dataBlocks := 7 parityBlocks := 7 blockSize := int64(1 * 1024 * 1024) setup, err := newErasureTestSetup(dataBlocks, parityBlocks, blockSize) if err != nil { t.Error(err) return } defer setup.Remove() disks := setup.disks // Prepare a slice of 5MB with random data. data := make([]byte, 5*1024*1024) length := int64(len(data)) _, err = rand.Read(data) if err != nil { t.Fatal(err) } // Create a test file to read from. size, checkSums, err := erasureCreateFile(disks, "testbucket", "testobject", bytes.NewReader(data), blockSize, dataBlocks, parityBlocks, bitRotAlgo, dataBlocks+1) if err != nil { t.Fatal(err) } if size != length { t.Errorf("erasureCreateFile returned %d, expected %d", size, length) } testCases := []struct { offset, length int64 }{ // Full file. {0, length}, // Read nothing. {length, 0}, // 2nd block. {blockSize, blockSize}, // Test cases for random offsets and lengths. {blockSize - 1, 2}, {blockSize - 1, blockSize + 1}, {blockSize + 1, blockSize - 1}, {blockSize + 1, blockSize}, {blockSize + 1, blockSize + 1}, {blockSize*2 - 1, blockSize + 1}, {length - 1, 1}, {length - blockSize, blockSize}, {length - blockSize - 1, blockSize}, {length - blockSize - 1, blockSize + 1}, } chunkSize := getChunkSize(blockSize, dataBlocks) pool := bpool.NewBytePool(chunkSize, len(disks)) // Compare the data read from file with "data" byte array. for i, testCase := range testCases { expected := data[testCase.offset:(testCase.offset + testCase.length)] buf := &bytes.Buffer{} _, err = erasureReadFile(buf, disks, "testbucket", "testobject", testCase.offset, testCase.length, length, blockSize, dataBlocks, parityBlocks, checkSums, bitRotAlgo, pool) if err != nil { t.Error(err) continue } got := buf.Bytes() if !bytes.Equal(expected, got) { t.Errorf("Test %d : read data is different from what was expected", i+1) } } }
// GetObject - reads an object erasured coded across multiple // disks. Supports additional parameters like offset and length // which is synonymous with HTTP Range requests. // // startOffset indicates the location at which the client requested // object to be read at. length indicates the total length of the // object requested by client. func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length int64, writer io.Writer) error { if err := checkGetObjArgs(bucket, object); err != nil { return err } // Start offset cannot be negative. if startOffset < 0 { return traceError(errUnexpected) } // Writer cannot be nil. if writer == nil { return traceError(errUnexpected) } // Read metadata associated with the object from all disks. metaArr, errs := readAllXLMetadata(xl.storageDisks, bucket, object) // Do we have read quorum? if !isDiskQuorum(errs, xl.readQuorum) { return traceError(InsufficientReadQuorum{}, errs...) } if reducedErr := reduceReadQuorumErrs(errs, objectOpIgnoredErrs, xl.readQuorum); reducedErr != nil { return toObjectErr(reducedErr, bucket, object) } // List all online disks. onlineDisks, modTime := listOnlineDisks(xl.storageDisks, metaArr, errs) // Pick latest valid metadata. xlMeta, err := pickValidXLMeta(metaArr, modTime) if err != nil { return err } // Reorder online disks based on erasure distribution order. onlineDisks = getOrderedDisks(xlMeta.Erasure.Distribution, onlineDisks) // Reorder parts metadata based on erasure distribution order. metaArr = getOrderedPartsMetadata(xlMeta.Erasure.Distribution, metaArr) // For negative length read everything. if length < 0 { length = xlMeta.Stat.Size - startOffset } // Reply back invalid range if the input offset and length fall out of range. if startOffset > xlMeta.Stat.Size || startOffset+length > xlMeta.Stat.Size { return traceError(InvalidRange{startOffset, length, xlMeta.Stat.Size}) } // Get start part index and offset. partIndex, partOffset, err := xlMeta.ObjectToPartOffset(startOffset) if err != nil { return traceError(InvalidRange{startOffset, length, xlMeta.Stat.Size}) } // Get last part index to read given length. lastPartIndex, _, err := xlMeta.ObjectToPartOffset(startOffset + length - 1) if err != nil { return traceError(InvalidRange{startOffset, length, xlMeta.Stat.Size}) } // Save the writer. mw := writer // Object cache enabled block. if xlMeta.Stat.Size > 0 && xl.objCacheEnabled { // Validate if we have previous cache. var cachedBuffer io.ReadSeeker cachedBuffer, err = xl.objCache.Open(path.Join(bucket, object), modTime) if err == nil { // Cache hit. // Advance the buffer to offset as if it was read. if _, err = cachedBuffer.Seek(startOffset, 0); err != nil { // Seek to the offset. return traceError(err) } // Write the requested length. if _, err = io.CopyN(writer, cachedBuffer, length); err != nil { return traceError(err) } return nil } // Cache miss. // For unknown error, return and error out. if err != objcache.ErrKeyNotFoundInCache { return traceError(err) } // Cache has not been found, fill the cache. // Cache is only set if whole object is being read. if startOffset == 0 && length == xlMeta.Stat.Size { // Proceed to set the cache. var newBuffer io.WriteCloser // Create a new entry in memory of length. newBuffer, err = xl.objCache.Create(path.Join(bucket, object), length) if err == nil { // Create a multi writer to write to both memory and client response. mw = io.MultiWriter(newBuffer, writer) defer newBuffer.Close() } // Ignore error if cache is full, proceed to write the object. if err != nil && err != objcache.ErrCacheFull { // For any other error return here. return toObjectErr(traceError(err), bucket, object) } } } totalBytesRead := int64(0) chunkSize := getChunkSize(xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks) pool := bpool.NewBytePool(chunkSize, len(onlineDisks)) // Read from all parts. for ; partIndex <= lastPartIndex; partIndex++ { if length == totalBytesRead { break } // Save the current part name and size. partName := xlMeta.Parts[partIndex].Name partSize := xlMeta.Parts[partIndex].Size readSize := partSize - partOffset // readSize should be adjusted so that we don't write more data than what was requested. if readSize > (length - totalBytesRead) { readSize = length - totalBytesRead } // Get the checksums of the current part. checkSums := make([]string, len(onlineDisks)) var ckSumAlgo string for index, disk := range onlineDisks { // Disk is not found skip the checksum. if disk == nil { checkSums[index] = "" continue } ckSumInfo := metaArr[index].Erasure.GetCheckSumInfo(partName) checkSums[index] = ckSumInfo.Hash // Set checksum algo only once, while it is possible to have // different algos per block because of our `xl.json`. // It is not a requirement, set this only once for all the disks. if ckSumAlgo != "" { ckSumAlgo = ckSumInfo.Algorithm } } // Start erasure decoding and writing to the client. n, err := erasureReadFile(mw, onlineDisks, bucket, pathJoin(object, partName), partOffset, readSize, partSize, xlMeta.Erasure.BlockSize, xlMeta.Erasure.DataBlocks, xlMeta.Erasure.ParityBlocks, checkSums, ckSumAlgo, pool) if err != nil { errorIf(err, "Unable to read %s of the object `%s/%s`.", partName, bucket, object) return toObjectErr(err, bucket, object) } // Track total bytes read from disk and written to the client. totalBytesRead += n // partOffset will be valid only for the first part, hence reset it to 0 for // the remaining parts. partOffset = 0 } // End of read all parts loop. // Return success. return nil }