// NewBagValidator creates a new BagValidator. Param pathToBag // should be an absolute path to either the tarred bag (.tar file) // or to the untarred bag (a directory). Param bagValidationConfig // defines what we need to validate, in addition to the checksums in the // manifests. func NewBagValidator(pathToBag string, bagValidationConfig *BagValidationConfig) (*BagValidator, error) { if !fileutil.FileExists(pathToBag) { return nil, fmt.Errorf("Bag does not exist at %s", pathToBag) } if bagValidationConfig == nil { return nil, fmt.Errorf("Param bagValidationConfig cannot be nil") } configErrors := bagValidationConfig.ValidateConfig() if len(configErrors) > 0 { errString := "BagValidationConfig has the following errors:" for _, e := range configErrors { errString += fmt.Sprintf("\n%s", e.Error()) } return nil, fmt.Errorf(errString) } err := bagValidationConfig.CompileFileNameRegex() if err != nil { return nil, fmt.Errorf("Error in BagValidationConfig: %v", err) } calculateMd5 := util.StringListContains(bagValidationConfig.FixityAlgorithms, constants.AlgMd5) calculateSha256 := util.StringListContains(bagValidationConfig.FixityAlgorithms, constants.AlgSha256) tagFilesToParse := make([]string, 0) for pathToFile, filespec := range bagValidationConfig.FileSpecs { if filespec.ParseAsTagFile { tagFilesToParse = append(tagFilesToParse, pathToFile) } } bagValidator := &BagValidator{ PathToBag: pathToBag, BagValidationConfig: bagValidationConfig, virtualBag: models.NewVirtualBag(pathToBag, tagFilesToParse, calculateMd5, calculateSha256), } return bagValidator, nil }
func TestStringListContains(t *testing.T) { list := []string{"apple", "orange", "banana"} assert.True(t, util.StringListContains(list, "orange")) assert.False(t, util.StringListContains(list, "wedgie")) // Don't crash on nil list assert.False(t, util.StringListContains(nil, "mars")) }
// We generated a sha256 checksum. func NewEventGenericFileDigestCalculation(checksumGeneratedAt time.Time, fixityAlg, digest string) (*PremisEvent, error) { if checksumGeneratedAt.IsZero() { return nil, fmt.Errorf("Param checksumVerifiedAt cannot be empty.") } if !util.StringListContains(constants.ChecksumAlgorithms, fixityAlg) { return nil, fmt.Errorf("Param fixityAlg '%s' is not valid.", fixityAlg) } if len(digest) != 32 && len(digest) != 64 { return nil, fmt.Errorf("Param digest must have 32 or 64 characters. '%s' doesn't.", digest) } eventId := uuid.NewV4() object := "Go language crypto/md5" agent := "http://golang.org/pkg/crypto/md5/" if fixityAlg == constants.AlgSha256 { object = "Go language crypto/sha256" agent = "http://golang.org/pkg/crypto/sha256/" } return &PremisEvent{ Identifier: eventId.String(), EventType: constants.EventDigestCalculation, DateTime: checksumGeneratedAt, Detail: "Calculated fixity value", Outcome: string(constants.StatusSuccess), OutcomeDetail: fmt.Sprintf("%s:%s", fixityAlg, digest), Object: object, Agent: agent, OutcomeInformation: "Calculated fixity value", }, nil }
func (vbag *VirtualBag) parseManifestsTagFilesAndMimeTypes() { for { reader, fileSummary, err := vbag.readIterator.Next() if reader != nil { defer reader.Close() } if err == io.EOF { return } if err != nil { vbag.summary.AddError(err.Error()) continue } // genericFile will sometimes be nil because the iterator // returns directory names as well as file names genericFile := vbag.obj.FindGenericFile(fileSummary.RelPath) if util.StringListContains(vbag.tagFilesToParse, fileSummary.RelPath) { vbag.parseTags(reader, fileSummary.RelPath) if genericFile != nil { // Our vbag library can only parse text files, so this // should be a plain text file. if strings.HasSuffix(genericFile.Identifier, ".txt") { genericFile.FileFormat = "text/plain" } else { genericFile.FileFormat = "application/binary" } } } else if util.StringListContains(vbag.obj.IngestManifests, fileSummary.RelPath) || util.StringListContains(vbag.obj.IngestTagManifests, fileSummary.RelPath) { vbag.parseManifest(reader, fileSummary.RelPath) } else { if genericFile != nil { vbag.setMimeType(reader, genericFile) } } } }
func NewEventObjectRights(accessSetting string) (*PremisEvent, error) { if !util.StringListContains(constants.AccessRights, strings.ToLower(accessSetting)) { return nil, fmt.Errorf("Param accessSetting '%s' is not valid.", accessSetting) } eventId := uuid.NewV4() return &PremisEvent{ Identifier: eventId.String(), EventType: constants.EventAccessAssignment, DateTime: time.Now().UTC(), Detail: "Assigned bag access rights", Outcome: string(constants.StatusSuccess), OutcomeDetail: accessSetting, Object: "APTrust exchange", Agent: "https://github.com/APTrust/exchange", OutcomeInformation: "Set access to " + accessSetting, }, nil }
// Make sure we catch all errors in an invalid bag. // This is a more thorough version of TestValidate_FromTarFile_BagInvalid func TestValidate_InvalidBag(t *testing.T) { bagValidationConfig, err := getValidationConfig() if err != nil { assert.Fail(t, "Could not load BagValidationConfig: %s", err.Error()) } _, filename, _, _ := runtime.Caller(0) dir := filepath.Dir(filename) pathToBag, err := filepath.Abs(path.Join(dir, "..", "testdata", "unit_test_bags", "example.edu.tagsample_bad.tar")) validator, err := validation.NewBagValidator(pathToBag, bagValidationConfig) if err != nil { assert.Fail(t, "NewBagValidator returned unexpected error: %s", err.Error()) } result := validator.Validate() assert.NotNil(t, result.IntellectualObject) assert.Equal(t, 16, len(result.IntellectualObject.GenericFiles)) assert.NotEmpty(t, result.IntellectualObject.IngestErrorMessage) assert.True(t, result.ParseSummary.HasErrors()) assert.True(t, result.ValidationSummary.HasErrors()) assert.True(t, result.HasErrors()) err_0 := "File 'data/file-not-in-bag' in manifest 'manifest-sha256.txt' is missing from bag" err_1 := "File 'custom_tags/tag_file_xyz.pdf' in manifest 'tagmanifest-md5.txt' is missing from bag" err_2 := "File 'custom_tags/tag_file_xyz.pdf' in manifest 'tagmanifest-sha256.txt' is missing from bag" err_3 := "Value for tag 'Title' is missing." err_4 := "Tag 'Access' has illegal value 'acksess'." err_5 := "Bad sha256 digest for 'data/datastream-descMetadata': manifest says 'This-checksum-is-bad-on-purpose.-The-validator-should-catch-it!!', file digest is 'cf9cbce80062932e10ee9cd70ec05ebc24019deddfea4e54b8788decd28b4bc7'" err_6 := "Bad md5 digest for 'custom_tags/tracked_tag_file.txt': manifest says '00000000000000000000000000000000', file digest is 'dafbffffc3ed28ef18363394935a2651'" err_7 := "Bad sha256 digest for 'custom_tags/tracked_tag_file.txt': manifest says '0000000000000000000000000000000000000000000000000000000000000000', file digest is '3f2f50c5bde87b58d6132faee14d1a295d115338643c658df7fa147e2296ccdd'" assert.Equal(t, 8, len(result.ValidationSummary.Errors)) assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_0)) assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_1)) assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_2)) assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_3)) assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_4)) assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_5)) assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_6)) assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_7)) }
// We checked fixity against the manifest. // If fixity didn't match, we wouldn't be ingesting this. func NewEventGenericFileFixityCheck(checksumVerifiedAt time.Time, fixityAlg, digest string, fixityMatched bool) (*PremisEvent, error) { if checksumVerifiedAt.IsZero() { return nil, fmt.Errorf("Param checksumVerifiedAt cannot be empty.") } if !util.StringListContains(constants.ChecksumAlgorithms, fixityAlg) { return nil, fmt.Errorf("Param fixityAlg '%s' is not valid.", fixityAlg) } if len(digest) != 32 && len(digest) != 64 { return nil, fmt.Errorf("Param digest must have 32 or 64 characters. '%s' doesn't.", digest) } eventId := uuid.NewV4() object := "Go language crypto/md5" agent := "http://golang.org/pkg/crypto/md5/" outcomeInformation := "Fixity matches" outcome := string(constants.StatusSuccess) if fixityAlg == constants.AlgSha256 { object = "Go language crypto/sha256" agent = "http://golang.org/pkg/crypto/sha256/" } if fixityMatched == false { outcome = string(constants.StatusFailed) outcomeInformation = "Fixity did not match" } return &PremisEvent{ Identifier: eventId.String(), EventType: constants.EventFixityCheck, DateTime: checksumVerifiedAt, Detail: "Fixity check against registered hash", Outcome: outcome, OutcomeDetail: fmt.Sprintf("%s:%s", fixityAlg, digest), Object: object, Agent: agent, OutcomeInformation: outcomeInformation, }, nil }
// Returns true if the specified bucketAndKey was found in S3 func (stats *APTBucketReaderStats) S3ItemWasFound(bucketAndKey string) bool { return util.StringListContains(stats.S3Items, bucketAndKey) }
// Returns true if value is a valid presence value. func ValidPresenceValue(value string) bool { return util.StringListContains(presenceValues, value) }
func runAssertions(t *testing.T, obj *models.IntellectualObject, summary *models.WorkSummary, caller string) { // WorkSummary assert.False(t, summary.StartedAt.IsZero(), caller) assert.False(t, summary.FinishedAt.IsZero(), caller) assert.Empty(t, summary.Errors, caller) // IntelObj properties assert.Equal(t, 0, obj.Id, caller) assert.Equal(t, "example.edu.tagsample_good", obj.Identifier, caller) // TODO: Is BagName necessary? It should be the same as obj.Identifier assert.Equal(t, "", obj.BagName, caller) assert.Equal(t, "virginia.edu", obj.Institution, caller) assert.Equal(t, 0, obj.InstitutionId, caller) assert.Equal(t, "Thirteen Ways of Looking at a Blackbird", obj.Title, caller) assert.Equal(t, "so much depends upon a red wheel barrow glazed with rain water beside the white chickens", obj.Description, caller) assert.Equal(t, "Institution", obj.Access, caller) assert.Equal(t, "uva-internal-id-0001", obj.AltIdentifier, caller) assert.Empty(t, obj.IngestErrorMessage, caller) if caller == "TestVirtualBagRead_FromTarFile" { assert.NotEmpty(t, obj.IngestTarFilePath, caller) } else if caller == "TestVirtualBagRead_FromDirectory" { assert.NotEmpty(t, obj.IngestUntarredPath, caller) } assert.Equal(t, 2, len(obj.IngestManifests), caller) assert.True(t, util.StringListContains(obj.IngestManifests, "manifest-md5.txt"), caller) assert.True(t, util.StringListContains(obj.IngestManifests, "manifest-sha256.txt"), caller) assert.Equal(t, 2, len(obj.IngestTagManifests), caller) assert.True(t, util.StringListContains(obj.IngestTagManifests, "tagmanifest-md5.txt"), caller) assert.True(t, util.StringListContains(obj.IngestTagManifests, "tagmanifest-sha256.txt"), caller) assert.Empty(t, obj.IngestFilesIgnored, caller) assert.NotNil(t, obj.IngestTopLevelDirNames, caller) if obj.IngestTopLevelDirNames != nil { assert.Equal(t, 1, len(obj.IngestTopLevelDirNames), caller) assert.NotEmpty(t, obj.IngestTopLevelDirNames[0], caller) } // Tags assert.Equal(t, 10, len(obj.IngestTags)) for _, tag := range obj.IngestTags { assert.NotEmpty(t, tag.SourceFile, caller) assert.NotEmpty(t, tag.Label, caller) assert.NotEmpty(t, tag.Value, caller) } // Spot check one tag tag := obj.IngestTags[4] assert.Equal(t, "bag-info.txt", tag.SourceFile, caller) assert.Equal(t, "Bag-Count", tag.Label, caller) assert.Equal(t, "1 of 1", tag.Value, caller) // Generic Files tagFileCount := 0 payloadFileCount := 0 manifestCount := 0 tagManifestCount := 0 assert.Equal(t, 16, len(obj.GenericFiles), caller) for _, gf := range obj.GenericFiles { assert.NotEmpty(t, gf.Identifier, caller) assert.NotEmpty(t, gf.IntellectualObjectIdentifier, caller) assert.NotEmpty(t, gf.FileFormat, caller, gf.Identifier) assert.NotEmpty(t, gf.IngestFileType, caller) assert.NotEmpty(t, gf.IngestMd5, caller) assert.NotEmpty(t, gf.IngestSha256, caller) if caller == "TestVirtualBagRead_FromTarFile" { assert.Empty(t, gf.IngestLocalPath, caller) } else if caller == "TestVirtualBagRead_FromDirectory" { assert.NotEmpty(t, gf.IngestLocalPath, caller) } assert.Empty(t, gf.IngestStorageURL, caller) assert.Empty(t, gf.IngestReplicationURL, caller) assert.True(t, gf.Size > 0, caller) switch gf.IngestFileType { case constants.PAYLOAD_FILE: payloadFileCount++ case constants.PAYLOAD_MANIFEST: manifestCount++ case constants.TAG_MANIFEST: tagManifestCount++ case constants.TAG_FILE: tagFileCount++ } } // Make sure file types were all tagged correctly assert.Equal(t, 4, payloadFileCount, caller) assert.Equal(t, 2, manifestCount, caller) assert.Equal(t, 2, tagManifestCount, caller) assert.Equal(t, 8, tagFileCount, caller) // Spot check generic file aptrust-info.txt gf := obj.FindGenericFile("aptrust-info.txt") if gf == nil { assert.Fail(t, "Could not find aptrust-info.txt", caller) } assert.Equal(t, "example.edu.tagsample_good/aptrust-info.txt", gf.Identifier, caller) assert.Equal(t, 0, gf.IntellectualObjectId, caller) assert.Equal(t, "example.edu.tagsample_good", gf.IntellectualObjectIdentifier, caller) assert.Equal(t, "text/plain", gf.FileFormat, caller) assert.Empty(t, gf.URI, caller) assert.EqualValues(t, 67, gf.Size, caller) assert.False(t, gf.FileModified.IsZero(), caller) assert.Equal(t, constants.TAG_FILE, gf.IngestFileType, caller) assert.Equal(t, "300e936e622605f9f7a846d261d53093", gf.IngestManifestMd5, caller) assert.Equal(t, "300e936e622605f9f7a846d261d53093", gf.IngestMd5, caller) assert.False(t, gf.IngestMd5GeneratedAt.IsZero(), caller) assert.True(t, gf.IngestMd5VerifiedAt.IsZero(), caller) assert.Equal(t, "a2b6c5a713af771c5e4edde8d5be25fbcad86e45ea338f43a5bb769347e7c8bb", gf.IngestManifestSha256, caller) assert.Equal(t, "a2b6c5a713af771c5e4edde8d5be25fbcad86e45ea338f43a5bb769347e7c8bb", gf.IngestSha256, caller) assert.False(t, gf.IngestSha256GeneratedAt.IsZero(), caller) assert.True(t, gf.IngestSha256VerifiedAt.IsZero(), caller) assert.NotEmpty(t, gf.IngestUUID, caller) assert.False(t, gf.IngestUUIDGeneratedAt.IsZero(), caller) assert.Empty(t, gf.IngestStorageURL, caller) assert.True(t, gf.IngestStoredAt.IsZero(), caller) assert.Empty(t, gf.IngestReplicationURL, caller) assert.True(t, gf.IngestReplicatedAt.IsZero(), caller) assert.False(t, gf.IngestPreviousVersionExists, caller) assert.True(t, gf.IngestNeedsSave, caller) assert.Empty(t, gf.IngestErrorMessage, caller) }