func TestTFIFind(t *testing.T) { _, filename, _, _ := runtime.Caller(0) tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename), "..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar")) tfi, err := fileutil.NewTarFileIterator(tarFilePath) if tfi != nil { defer tfi.Close() } assert.NotNil(t, tfi) require.Nil(t, err) readCloser, err := tfi.Find("example.edu.tagsample_good/junk_file.txt") assert.Nil(t, err) assert.NotNil(t, readCloser) readCloser.Close() readCloser, err = tfi.Find("example.edu.tagsample_good/tagmanifest-sha256.txt") assert.Nil(t, err) assert.NotNil(t, readCloser) readCloser.Close() readCloser, err = tfi.Find("this-file-does-not-exist") assert.NotNil(t, err) assert.Nil(t, readCloser) }
func TestNewTarFileIterator(t *testing.T) { _, filename, _, _ := runtime.Caller(0) tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename), "..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar")) tfi, err := fileutil.NewTarFileIterator(tarFilePath) assert.NotNil(t, tfi) assert.Nil(t, err) }
// Read() reads the bag and returns an IntellectualObject and a WorkSummary. // The WorkSummary will include a list of errors, if there were any. // The list of files contained in IntellectualObject.GenericFiles will include // ALL files found in the bag, even some we may not want to save, such as // those beginning with dots and dashes. If you don't want to preserve those // files you can delete them from the IntellectualObject manually later. func (vbag *VirtualBag) Read() (*IntellectualObject, *WorkSummary) { vbag.summary = NewWorkSummary() vbag.summary.Start() vbag.obj = NewIntellectualObject() vbag.obj.Identifier = util.CleanBagName(path.Base(vbag.pathToBag)) if strings.HasSuffix(vbag.pathToBag, ".tar") { vbag.obj.IngestTarFilePath = vbag.pathToBag } else { vbag.obj.IngestUntarredPath = vbag.pathToBag } // Compile a list of the bag's contents (GenericFiles), // and calculate checksums for everything in the bag. var err error if vbag.obj.IngestTarFilePath != "" { vbag.readIterator, err = fileutil.NewTarFileIterator(vbag.obj.IngestTarFilePath) } else { vbag.readIterator, err = fileutil.NewFileSystemIterator(vbag.obj.IngestUntarredPath) } if err != nil { vbag.summary.AddError("Could not read bag: %v", err) vbag.summary.Finish() return vbag.obj, vbag.summary } else { vbag.addGenericFiles() } vbag.obj.IngestTopLevelDirNames = vbag.readIterator.GetTopLevelDirNames() // Golang's tar file reader is forward-only, so we need to // open a new iterator to read through a handful of tag files, // manifests and tag manifests. vbag.readIterator = nil if vbag.obj.IngestTarFilePath != "" { vbag.readIterator, err = fileutil.NewTarFileIterator(vbag.obj.IngestTarFilePath) } else { vbag.readIterator, err = fileutil.NewFileSystemIterator(vbag.obj.IngestUntarredPath) } if err != nil { vbag.summary.AddError("Could not read bag: %v", err) } else { vbag.parseManifestsTagFilesAndMimeTypes() } vbag.summary.Finish() return vbag.obj, vbag.summary }
// Should be able to close repeatedly without panic. func TestTarFileIteratorClose(t *testing.T) { _, filename, _, _ := runtime.Caller(0) tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename), "..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar")) tfi, _ := fileutil.NewTarFileIterator(tarFilePath) if tfi == nil { assert.Fail(t, "Could not get TarFileIterator") } assert.NotPanics(t, tfi.Close, "TarFileIterator.Close() freaked out") assert.NotPanics(t, tfi.Close, "TarFileIterator.Close() freaked out") }
func TestTFINext(t *testing.T) { _, filename, _, _ := runtime.Caller(0) tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename), "..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar")) tfi, err := fileutil.NewTarFileIterator(tarFilePath) if tfi != nil { defer tfi.Close() } assert.NotNil(t, tfi) assert.Nil(t, err) for { reader, fileSummary, err := tfi.Next() if err == io.EOF { break } if reader == nil { assert.Fail(t, "Reader is nil") } if fileSummary == nil { assert.Fail(t, "FileSummary is nil") } if fileSummary.IsDir { continue } assert.NotEmpty(t, fileSummary.RelPath) assert.False(t, strings.HasPrefix(fileSummary.RelPath, string(os.PathSeparator))) // On Windows, where separator is '\', tar files may still use '/' assert.False(t, strings.HasPrefix(fileSummary.RelPath, "/")) assert.Empty(t, fileSummary.AbsPath) assert.NotNil(t, fileSummary.Mode) if fileSummary.IsRegularFile { assert.True(t, fileSummary.Size > int64(0)) } assert.False(t, fileSummary.ModTime.IsZero()) buf := make([]byte, 1024) _, err = reader.Read(buf) if err != nil { assert.Equal(t, io.EOF, err) } } }
func TestTFIGetTopLevelDirNames(t *testing.T) { _, filename, _, _ := runtime.Caller(0) tarFilePath, _ := filepath.Abs(path.Join(filepath.Dir(filename), "..", "..", "testdata", "unit_test_bags", "example.edu.tagsample_good.tar")) tfi, _ := fileutil.NewTarFileIterator(tarFilePath) if tfi == nil { assert.Fail(t, "Could not get TarFileIterator") } // Read the entire tar file, so we know the reader // has looked at all directories. for { _, _, err := tfi.Next() if err != nil { break } } topLevelDirs := tfi.GetTopLevelDirNames() require.NotEmpty(t, topLevelDirs) assert.Equal(t, 1, len(topLevelDirs)) assert.Equal(t, "example.edu.tagsample_good", topLevelDirs[0]) }
// calculateTagManifestDigest calculates the sha256 digest of the bag's // tagmanifest-sha256.txt file. func (copier *DPNCopier) calculateTagManifestDigest(manifest *models.ReplicationManifest) { tarFileIterator, err := fileutil.NewTarFileIterator(manifest.LocalPath) if err != nil { manifest.CopySummary.AddError("Can't get TarFileIterator for %s: %v", manifest.LocalPath, err.Error) return } // DPN BagIt spec says that the top-level dir inside the bag should // have the same name as the bag itself (a UUID). // https://wiki.duraspace.org/display/DPN/BagIt+Specification#BagItSpecification-DPNBagitStructure tagManifestPath := filepath.Join(manifest.ReplicationTransfer.Bag, "tagmanifest-sha256.txt") readCloser, err := tarFileIterator.Find(tagManifestPath) if readCloser != nil { defer readCloser.Close() } if err != nil { manifest.CopySummary.AddError("Can't get tagmanifest from bag: %v", err.Error) return } nonce := "" if manifest.ReplicationTransfer.FixityNonce != nil && *manifest.ReplicationTransfer.FixityNonce != "" { nonce = *manifest.ReplicationTransfer.FixityNonce copier.Context.MessageLog.Info("FixityNonce for replication %s is %s", manifest.ReplicationTransfer.ReplicationId, nonce) } else { copier.Context.MessageLog.Info("No FixityNonce for replication %s", manifest.ReplicationTransfer.ReplicationId) } digest, err := copier.calculateSha256(readCloser, nonce) if err != nil { manifest.CopySummary.AddError("Error calculating tagmanifest digest: %v", err.Error) return } manifest.ReplicationTransfer.FixityValue = digest copier.Context.MessageLog.Info("Xfer %s has digest %s", manifest.ReplicationTransfer.ReplicationId, *manifest.ReplicationTransfer.FixityValue) }
// Returns a reader that can read the file from within the tar archive. // The S3 uploader uses this reader to stream data to S3 and Glacier. func (storer *APTStorer) getReadCloser(ingestState *models.IngestState, gf *models.GenericFile) (*fileutil.TarFileIterator, io.ReadCloser) { tarFilePath := ingestState.IngestManifest.Object.IngestTarFilePath tfi, err := fileutil.NewTarFileIterator(tarFilePath) if err != nil { msg := fmt.Sprintf("Can't get TarFileIterator for %s: %v", tarFilePath, err) ingestState.IngestManifest.StoreResult.AddError(msg) return nil, nil } origPathWithBagName, err := gf.OriginalPathWithBagName() if err != nil { ingestState.IngestManifest.StoreResult.AddError(err.Error()) return nil, nil } readCloser, err := tfi.Find(origPathWithBagName) if err != nil { msg := fmt.Sprintf("Can't get reader for %s: %v", gf.Identifier, err) ingestState.IngestManifest.StoreResult.AddError(msg) if readCloser != nil { readCloser.Close() } return nil, nil } return tfi, readCloser }