Beispiel #1
0
func TestCleanBagName(t *testing.T) {
	expected := "some.file"
	actual := util.CleanBagName("some.file.b001.of200.tar")
	if actual != expected {
		t.Error("CleanBagName should have returned '%s', but returned '%s'",
			expected, actual)
	}
	actual = util.CleanBagName("some.file.b1.of2.tar")
	if actual != expected {
		t.Error("CleanBagName should have returned '%s', but returned '%s'",
			expected, actual)
	}
}
Beispiel #2
0
// SetBasicObjectInfo sets initial essential properties on the
// IntellectualObject associated with an ingestState
// (ingestState.IngestManifest.Object). This is only used by
// apt_fetcher and is only ever called during the fetch stage.
func SetBasicObjectInfo(ingestState *models.IngestState, _context *context.Context) {
	// instIdentifier is, e.g., virginia.edu, ncsu.edu, etc.
	// We'll download the tar file from the receiving bucket to
	// something like /mnt/apt/data/virginia.edu/name_of_bag.tar
	// See IngestTarFilePath below.
	obj := ingestState.IngestManifest.Object
	instIdentifier := util.OwnerOf(ingestState.IngestManifest.S3Bucket)
	obj.BagName = util.CleanBagName(ingestState.IngestManifest.S3Key)
	obj.Institution = instIdentifier
	obj.InstitutionId = ingestState.WorkItem.InstitutionId
	obj.IngestS3Bucket = ingestState.IngestManifest.S3Bucket
	obj.IngestS3Key = ingestState.IngestManifest.S3Key
	obj.IngestTarFilePath = filepath.Join(
		_context.Config.TarDirectory,
		instIdentifier, ingestState.IngestManifest.S3Key)

	// If this IntellectualObject was created by our validator and VirtualBag,
	// the identifier will be the bag name (minus the .tar extension).
	// That's fine for cases where depositors or other organizations are
	// using the validator outside of APTrust's repository environment, but
	// APTrust requires that we add the Institution name and a slash to
	// the beginning of the identifier. So make sure it's there, and propagate
	// the change all the way down to the GenericFiles.
	if !strings.HasPrefix(obj.Identifier, obj.Institution+"/") {
		obj.Identifier = fmt.Sprintf("%s/%s", obj.Institution, obj.Identifier)
		for _, gf := range obj.GenericFiles {
			if !strings.HasPrefix(gf.Identifier, obj.Identifier) {
				gf.IntellectualObjectIdentifier = obj.Identifier
				gf.Identifier = fmt.Sprintf("%s/%s", obj.Institution, gf.Identifier)
			}
		}
	}
}
Beispiel #3
0
// Read() reads the bag and returns an IntellectualObject and a WorkSummary.
// The WorkSummary will include a list of errors, if there were any.
// The list of files contained in IntellectualObject.GenericFiles will include
// ALL files found in the bag, even some we may not want to save, such as
// those beginning with dots and dashes. If you don't want to preserve those
// files you can delete them from the IntellectualObject manually later.
func (vbag *VirtualBag) Read() (*IntellectualObject, *WorkSummary) {
	vbag.summary = NewWorkSummary()
	vbag.summary.Start()
	vbag.obj = NewIntellectualObject()
	vbag.obj.Identifier = util.CleanBagName(path.Base(vbag.pathToBag))
	if strings.HasSuffix(vbag.pathToBag, ".tar") {
		vbag.obj.IngestTarFilePath = vbag.pathToBag
	} else {
		vbag.obj.IngestUntarredPath = vbag.pathToBag
	}

	// Compile a list of the bag's contents (GenericFiles),
	// and calculate checksums for everything in the bag.
	var err error
	if vbag.obj.IngestTarFilePath != "" {
		vbag.readIterator, err = fileutil.NewTarFileIterator(vbag.obj.IngestTarFilePath)
	} else {
		vbag.readIterator, err = fileutil.NewFileSystemIterator(vbag.obj.IngestUntarredPath)
	}
	if err != nil {
		vbag.summary.AddError("Could not read bag: %v", err)
		vbag.summary.Finish()
		return vbag.obj, vbag.summary
	} else {
		vbag.addGenericFiles()
	}
	vbag.obj.IngestTopLevelDirNames = vbag.readIterator.GetTopLevelDirNames()

	// Golang's tar file reader is forward-only, so we need to
	// open a new iterator to read through a handful of tag files,
	// manifests and tag manifests.
	vbag.readIterator = nil
	if vbag.obj.IngestTarFilePath != "" {
		vbag.readIterator, err = fileutil.NewTarFileIterator(vbag.obj.IngestTarFilePath)
	} else {
		vbag.readIterator, err = fileutil.NewFileSystemIterator(vbag.obj.IngestUntarredPath)
	}
	if err != nil {
		vbag.summary.AddError("Could not read bag: %v", err)
	} else {
		vbag.parseManifestsTagFilesAndMimeTypes()
	}
	vbag.summary.Finish()
	return vbag.obj, vbag.summary
}
Beispiel #4
0
// Returns the object identifier that will identify this bag
// in fedora. That's the institution identifier, followed by
// a slash and the tar file name, minus the .tar extension
// and the ".bag1of12" multipart extension. So for BucketName
// "aptrust.receiving.unc.edu" and Key.Key "nc_bag.b001.of030.tar",
// this would return "unc.edu/nc_bag"
func (s3File *S3File) ObjectName() (string, error) {
	institution := util.OwnerOf(s3File.BucketName)
	cleanBagName := util.CleanBagName(s3File.Key.Key)
	return fmt.Sprintf("%s/%s", institution, cleanBagName), nil
}