func TestCleanBagName(t *testing.T) { expected := "some.file" actual := util.CleanBagName("some.file.b001.of200.tar") if actual != expected { t.Error("CleanBagName should have returned '%s', but returned '%s'", expected, actual) } actual = util.CleanBagName("some.file.b1.of2.tar") if actual != expected { t.Error("CleanBagName should have returned '%s', but returned '%s'", expected, actual) } }
// SetBasicObjectInfo sets initial essential properties on the // IntellectualObject associated with an ingestState // (ingestState.IngestManifest.Object). This is only used by // apt_fetcher and is only ever called during the fetch stage. func SetBasicObjectInfo(ingestState *models.IngestState, _context *context.Context) { // instIdentifier is, e.g., virginia.edu, ncsu.edu, etc. // We'll download the tar file from the receiving bucket to // something like /mnt/apt/data/virginia.edu/name_of_bag.tar // See IngestTarFilePath below. obj := ingestState.IngestManifest.Object instIdentifier := util.OwnerOf(ingestState.IngestManifest.S3Bucket) obj.BagName = util.CleanBagName(ingestState.IngestManifest.S3Key) obj.Institution = instIdentifier obj.InstitutionId = ingestState.WorkItem.InstitutionId obj.IngestS3Bucket = ingestState.IngestManifest.S3Bucket obj.IngestS3Key = ingestState.IngestManifest.S3Key obj.IngestTarFilePath = filepath.Join( _context.Config.TarDirectory, instIdentifier, ingestState.IngestManifest.S3Key) // If this IntellectualObject was created by our validator and VirtualBag, // the identifier will be the bag name (minus the .tar extension). // That's fine for cases where depositors or other organizations are // using the validator outside of APTrust's repository environment, but // APTrust requires that we add the Institution name and a slash to // the beginning of the identifier. So make sure it's there, and propagate // the change all the way down to the GenericFiles. if !strings.HasPrefix(obj.Identifier, obj.Institution+"/") { obj.Identifier = fmt.Sprintf("%s/%s", obj.Institution, obj.Identifier) for _, gf := range obj.GenericFiles { if !strings.HasPrefix(gf.Identifier, obj.Identifier) { gf.IntellectualObjectIdentifier = obj.Identifier gf.Identifier = fmt.Sprintf("%s/%s", obj.Institution, gf.Identifier) } } } }
// Read() reads the bag and returns an IntellectualObject and a WorkSummary. // The WorkSummary will include a list of errors, if there were any. // The list of files contained in IntellectualObject.GenericFiles will include // ALL files found in the bag, even some we may not want to save, such as // those beginning with dots and dashes. If you don't want to preserve those // files you can delete them from the IntellectualObject manually later. func (vbag *VirtualBag) Read() (*IntellectualObject, *WorkSummary) { vbag.summary = NewWorkSummary() vbag.summary.Start() vbag.obj = NewIntellectualObject() vbag.obj.Identifier = util.CleanBagName(path.Base(vbag.pathToBag)) if strings.HasSuffix(vbag.pathToBag, ".tar") { vbag.obj.IngestTarFilePath = vbag.pathToBag } else { vbag.obj.IngestUntarredPath = vbag.pathToBag } // Compile a list of the bag's contents (GenericFiles), // and calculate checksums for everything in the bag. var err error if vbag.obj.IngestTarFilePath != "" { vbag.readIterator, err = fileutil.NewTarFileIterator(vbag.obj.IngestTarFilePath) } else { vbag.readIterator, err = fileutil.NewFileSystemIterator(vbag.obj.IngestUntarredPath) } if err != nil { vbag.summary.AddError("Could not read bag: %v", err) vbag.summary.Finish() return vbag.obj, vbag.summary } else { vbag.addGenericFiles() } vbag.obj.IngestTopLevelDirNames = vbag.readIterator.GetTopLevelDirNames() // Golang's tar file reader is forward-only, so we need to // open a new iterator to read through a handful of tag files, // manifests and tag manifests. vbag.readIterator = nil if vbag.obj.IngestTarFilePath != "" { vbag.readIterator, err = fileutil.NewTarFileIterator(vbag.obj.IngestTarFilePath) } else { vbag.readIterator, err = fileutil.NewFileSystemIterator(vbag.obj.IngestUntarredPath) } if err != nil { vbag.summary.AddError("Could not read bag: %v", err) } else { vbag.parseManifestsTagFilesAndMimeTypes() } vbag.summary.Finish() return vbag.obj, vbag.summary }
// Returns the object identifier that will identify this bag // in fedora. That's the institution identifier, followed by // a slash and the tar file name, minus the .tar extension // and the ".bag1of12" multipart extension. So for BucketName // "aptrust.receiving.unc.edu" and Key.Key "nc_bag.b001.of030.tar", // this would return "unc.edu/nc_bag" func (s3File *S3File) ObjectName() (string, error) { institution := util.OwnerOf(s3File.BucketName) cleanBagName := util.CleanBagName(s3File.Key.Key) return fmt.Sprintf("%s/%s", institution, cleanBagName), nil }