Esempio n. 1
0
func TestTFIFind(t *testing.T) {
	_, filename, _, _ := runtime.Caller(0)
	tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename),
		"..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar"))
	tfi, err := fileutil.NewTarFileIterator(tarFilePath)
	if tfi != nil {
		defer tfi.Close()
	}
	assert.NotNil(t, tfi)
	require.Nil(t, err)

	readCloser, err := tfi.Find("example.edu.tagsample_good/junk_file.txt")
	assert.Nil(t, err)
	assert.NotNil(t, readCloser)
	readCloser.Close()

	readCloser, err = tfi.Find("example.edu.tagsample_good/tagmanifest-sha256.txt")
	assert.Nil(t, err)
	assert.NotNil(t, readCloser)
	readCloser.Close()

	readCloser, err = tfi.Find("this-file-does-not-exist")
	assert.NotNil(t, err)
	assert.Nil(t, readCloser)
}
Esempio n. 2
0
func TestNewTarFileIterator(t *testing.T) {
	_, filename, _, _ := runtime.Caller(0)
	tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename),
		"..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar"))
	tfi, err := fileutil.NewTarFileIterator(tarFilePath)
	assert.NotNil(t, tfi)
	assert.Nil(t, err)
}
Esempio n. 3
0
// Read() reads the bag and returns an IntellectualObject and a WorkSummary.
// The WorkSummary will include a list of errors, if there were any.
// The list of files contained in IntellectualObject.GenericFiles will include
// ALL files found in the bag, even some we may not want to save, such as
// those beginning with dots and dashes. If you don't want to preserve those
// files you can delete them from the IntellectualObject manually later.
func (vbag *VirtualBag) Read() (*IntellectualObject, *WorkSummary) {
	vbag.summary = NewWorkSummary()
	vbag.summary.Start()
	vbag.obj = NewIntellectualObject()
	vbag.obj.Identifier = util.CleanBagName(path.Base(vbag.pathToBag))
	if strings.HasSuffix(vbag.pathToBag, ".tar") {
		vbag.obj.IngestTarFilePath = vbag.pathToBag
	} else {
		vbag.obj.IngestUntarredPath = vbag.pathToBag
	}

	// Compile a list of the bag's contents (GenericFiles),
	// and calculate checksums for everything in the bag.
	var err error
	if vbag.obj.IngestTarFilePath != "" {
		vbag.readIterator, err = fileutil.NewTarFileIterator(vbag.obj.IngestTarFilePath)
	} else {
		vbag.readIterator, err = fileutil.NewFileSystemIterator(vbag.obj.IngestUntarredPath)
	}
	if err != nil {
		vbag.summary.AddError("Could not read bag: %v", err)
		vbag.summary.Finish()
		return vbag.obj, vbag.summary
	} else {
		vbag.addGenericFiles()
	}
	vbag.obj.IngestTopLevelDirNames = vbag.readIterator.GetTopLevelDirNames()

	// Golang's tar file reader is forward-only, so we need to
	// open a new iterator to read through a handful of tag files,
	// manifests and tag manifests.
	vbag.readIterator = nil
	if vbag.obj.IngestTarFilePath != "" {
		vbag.readIterator, err = fileutil.NewTarFileIterator(vbag.obj.IngestTarFilePath)
	} else {
		vbag.readIterator, err = fileutil.NewFileSystemIterator(vbag.obj.IngestUntarredPath)
	}
	if err != nil {
		vbag.summary.AddError("Could not read bag: %v", err)
	} else {
		vbag.parseManifestsTagFilesAndMimeTypes()
	}
	vbag.summary.Finish()
	return vbag.obj, vbag.summary
}
Esempio n. 4
0
// Should be able to close repeatedly without panic.
func TestTarFileIteratorClose(t *testing.T) {
	_, filename, _, _ := runtime.Caller(0)
	tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename),
		"..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar"))
	tfi, _ := fileutil.NewTarFileIterator(tarFilePath)
	if tfi == nil {
		assert.Fail(t, "Could not get TarFileIterator")
	}
	assert.NotPanics(t, tfi.Close, "TarFileIterator.Close() freaked out")
	assert.NotPanics(t, tfi.Close, "TarFileIterator.Close() freaked out")
}
Esempio n. 5
0
func TestTFINext(t *testing.T) {
	_, filename, _, _ := runtime.Caller(0)
	tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename),
		"..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar"))
	tfi, err := fileutil.NewTarFileIterator(tarFilePath)
	if tfi != nil {
		defer tfi.Close()
	}
	assert.NotNil(t, tfi)
	assert.Nil(t, err)

	for {
		reader, fileSummary, err := tfi.Next()
		if err == io.EOF {
			break
		}
		if reader == nil {
			assert.Fail(t, "Reader is nil")
		}
		if fileSummary == nil {
			assert.Fail(t, "FileSummary is nil")
		}
		if fileSummary.IsDir {
			continue
		}

		assert.NotEmpty(t, fileSummary.RelPath)
		assert.False(t, strings.HasPrefix(fileSummary.RelPath, string(os.PathSeparator)))
		// On Windows, where separator is '\', tar files may still use '/'
		assert.False(t, strings.HasPrefix(fileSummary.RelPath, "/"))
		assert.Empty(t, fileSummary.AbsPath)
		assert.NotNil(t, fileSummary.Mode)
		if fileSummary.IsRegularFile {
			assert.True(t, fileSummary.Size > int64(0))
		}
		assert.False(t, fileSummary.ModTime.IsZero())

		buf := make([]byte, 1024)
		_, err = reader.Read(buf)
		if err != nil {
			assert.Equal(t, io.EOF, err)
		}
	}
}
Esempio n. 6
0
func TestTFIGetTopLevelDirNames(t *testing.T) {
	_, filename, _, _ := runtime.Caller(0)
	tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename),
		"..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar"))
	tfi, _ := fileutil.NewTarFileIterator(tarFilePath)
	if tfi == nil {
		assert.Fail(t, "Could not get TarFileIterator")
	}
	// Read the entire tar file, so we know the reader
	// has looked at all directories.
	for {
		_, _, err := tfi.Next()
		if err != nil {
			break
		}
	}
	topLevelDirs := tfi.GetTopLevelDirNames()
	require.NotEmpty(t, topLevelDirs)
	assert.Equal(t, 1, len(topLevelDirs))
	assert.Equal(t, "example.edu.tagsample_good", topLevelDirs[0])
}
Esempio n. 7
0
// calculateTagManifestDigest calculates the sha256 digest of the bag's
// tagmanifest-sha256.txt file.
func (copier *DPNCopier) calculateTagManifestDigest(manifest *models.ReplicationManifest) {
	tarFileIterator, err := fileutil.NewTarFileIterator(manifest.LocalPath)
	if err != nil {
		manifest.CopySummary.AddError("Can't get TarFileIterator for %s: %v",
			manifest.LocalPath, err.Error)
		return
	}
	// DPN BagIt spec says that the top-level dir inside the bag should
	// have the same name as the bag itself (a UUID).
	// https://wiki.duraspace.org/display/DPN/BagIt+Specification#BagItSpecification-DPNBagitStructure
	tagManifestPath := filepath.Join(manifest.ReplicationTransfer.Bag, "tagmanifest-sha256.txt")
	readCloser, err := tarFileIterator.Find(tagManifestPath)
	if readCloser != nil {
		defer readCloser.Close()
	}
	if err != nil {
		manifest.CopySummary.AddError("Can't get tagmanifest from bag: %v",
			err.Error)
		return
	}
	nonce := ""
	if manifest.ReplicationTransfer.FixityNonce != nil && *manifest.ReplicationTransfer.FixityNonce != "" {
		nonce = *manifest.ReplicationTransfer.FixityNonce
		copier.Context.MessageLog.Info("FixityNonce for replication %s is %s",
			manifest.ReplicationTransfer.ReplicationId, nonce)
	} else {
		copier.Context.MessageLog.Info("No FixityNonce for replication %s",
			manifest.ReplicationTransfer.ReplicationId)
	}
	digest, err := copier.calculateSha256(readCloser, nonce)
	if err != nil {
		manifest.CopySummary.AddError("Error calculating tagmanifest digest: %v",
			err.Error)
		return
	}
	manifest.ReplicationTransfer.FixityValue = digest
	copier.Context.MessageLog.Info("Xfer %s has digest %s",
		manifest.ReplicationTransfer.ReplicationId,
		*manifest.ReplicationTransfer.FixityValue)
}
Esempio n. 8
0
// Returns a reader that can read the file from within the tar archive.
// The S3 uploader uses this reader to stream data to S3 and Glacier.
func (storer *APTStorer) getReadCloser(ingestState *models.IngestState, gf *models.GenericFile) (*fileutil.TarFileIterator, io.ReadCloser) {
	tarFilePath := ingestState.IngestManifest.Object.IngestTarFilePath
	tfi, err := fileutil.NewTarFileIterator(tarFilePath)
	if err != nil {
		msg := fmt.Sprintf("Can't get TarFileIterator for %s: %v", tarFilePath, err)
		ingestState.IngestManifest.StoreResult.AddError(msg)
		return nil, nil
	}
	origPathWithBagName, err := gf.OriginalPathWithBagName()
	if err != nil {
		ingestState.IngestManifest.StoreResult.AddError(err.Error())
		return nil, nil
	}
	readCloser, err := tfi.Find(origPathWithBagName)
	if err != nil {
		msg := fmt.Sprintf("Can't get reader for %s: %v", gf.Identifier, err)
		ingestState.IngestManifest.StoreResult.AddError(msg)
		if readCloser != nil {
			readCloser.Close()
		}
		return nil, nil
	}
	return tfi, readCloser
}