Пример #1
0
// NewBagValidator creates a new BagValidator. Param pathToBag
// should be an absolute path to either the tarred bag (.tar file)
// or to the untarred bag (a directory). Param bagValidationConfig
// defines what we need to validate, in addition to the checksums in the
// manifests.
func NewBagValidator(pathToBag string, bagValidationConfig *BagValidationConfig) (*BagValidator, error) {
	if !fileutil.FileExists(pathToBag) {
		return nil, fmt.Errorf("Bag does not exist at %s", pathToBag)
	}
	if bagValidationConfig == nil {
		return nil, fmt.Errorf("Param bagValidationConfig cannot be nil")
	}
	configErrors := bagValidationConfig.ValidateConfig()
	if len(configErrors) > 0 {
		errString := "BagValidationConfig has the following errors:"
		for _, e := range configErrors {
			errString += fmt.Sprintf("\n%s", e.Error())
		}
		return nil, fmt.Errorf(errString)
	}
	err := bagValidationConfig.CompileFileNameRegex()
	if err != nil {
		return nil, fmt.Errorf("Error in BagValidationConfig: %v", err)
	}
	calculateMd5 := util.StringListContains(bagValidationConfig.FixityAlgorithms, constants.AlgMd5)
	calculateSha256 := util.StringListContains(bagValidationConfig.FixityAlgorithms, constants.AlgSha256)
	tagFilesToParse := make([]string, 0)
	for pathToFile, filespec := range bagValidationConfig.FileSpecs {
		if filespec.ParseAsTagFile {
			tagFilesToParse = append(tagFilesToParse, pathToFile)
		}
	}
	bagValidator := &BagValidator{
		PathToBag:           pathToBag,
		BagValidationConfig: bagValidationConfig,
		virtualBag:          models.NewVirtualBag(pathToBag, tagFilesToParse, calculateMd5, calculateSha256),
	}
	return bagValidator, nil
}
Пример #2
0
func TestStringListContains(t *testing.T) {
	list := []string{"apple", "orange", "banana"}
	assert.True(t, util.StringListContains(list, "orange"))
	assert.False(t, util.StringListContains(list, "wedgie"))
	// Don't crash on nil list
	assert.False(t, util.StringListContains(nil, "mars"))
}
Пример #3
0
// We generated a sha256 checksum.
func NewEventGenericFileDigestCalculation(checksumGeneratedAt time.Time, fixityAlg, digest string) (*PremisEvent, error) {
	if checksumGeneratedAt.IsZero() {
		return nil, fmt.Errorf("Param checksumVerifiedAt cannot be empty.")
	}
	if !util.StringListContains(constants.ChecksumAlgorithms, fixityAlg) {
		return nil, fmt.Errorf("Param fixityAlg '%s' is not valid.", fixityAlg)
	}
	if len(digest) != 32 && len(digest) != 64 {
		return nil, fmt.Errorf("Param digest must have 32 or 64 characters. '%s' doesn't.",
			digest)
	}
	eventId := uuid.NewV4()
	object := "Go language crypto/md5"
	agent := "http://golang.org/pkg/crypto/md5/"
	if fixityAlg == constants.AlgSha256 {
		object = "Go language crypto/sha256"
		agent = "http://golang.org/pkg/crypto/sha256/"
	}
	return &PremisEvent{
		Identifier:         eventId.String(),
		EventType:          constants.EventDigestCalculation,
		DateTime:           checksumGeneratedAt,
		Detail:             "Calculated fixity value",
		Outcome:            string(constants.StatusSuccess),
		OutcomeDetail:      fmt.Sprintf("%s:%s", fixityAlg, digest),
		Object:             object,
		Agent:              agent,
		OutcomeInformation: "Calculated fixity value",
	}, nil
}
Пример #4
0
func (vbag *VirtualBag) parseManifestsTagFilesAndMimeTypes() {
	for {
		reader, fileSummary, err := vbag.readIterator.Next()
		if reader != nil {
			defer reader.Close()
		}
		if err == io.EOF {
			return
		}
		if err != nil {
			vbag.summary.AddError(err.Error())
			continue
		}
		// genericFile will sometimes be nil because the iterator
		// returns directory names as well as file names
		genericFile := vbag.obj.FindGenericFile(fileSummary.RelPath)
		if util.StringListContains(vbag.tagFilesToParse, fileSummary.RelPath) {
			vbag.parseTags(reader, fileSummary.RelPath)
			if genericFile != nil {
				// Our vbag library can only parse text files, so this
				// should be a plain text file.
				if strings.HasSuffix(genericFile.Identifier, ".txt") {
					genericFile.FileFormat = "text/plain"
				} else {
					genericFile.FileFormat = "application/binary"
				}
			}
		} else if util.StringListContains(vbag.obj.IngestManifests, fileSummary.RelPath) ||
			util.StringListContains(vbag.obj.IngestTagManifests, fileSummary.RelPath) {
			vbag.parseManifest(reader, fileSummary.RelPath)
		} else {
			if genericFile != nil {
				vbag.setMimeType(reader, genericFile)
			}
		}
	}
}
Пример #5
0
func NewEventObjectRights(accessSetting string) (*PremisEvent, error) {
	if !util.StringListContains(constants.AccessRights, strings.ToLower(accessSetting)) {
		return nil, fmt.Errorf("Param accessSetting '%s' is not valid.", accessSetting)
	}
	eventId := uuid.NewV4()
	return &PremisEvent{
		Identifier:         eventId.String(),
		EventType:          constants.EventAccessAssignment,
		DateTime:           time.Now().UTC(),
		Detail:             "Assigned bag access rights",
		Outcome:            string(constants.StatusSuccess),
		OutcomeDetail:      accessSetting,
		Object:             "APTrust exchange",
		Agent:              "https://github.com/APTrust/exchange",
		OutcomeInformation: "Set access to " + accessSetting,
	}, nil
}
Пример #6
0
// Make sure we catch all errors in an invalid bag.
// This is a more thorough version of TestValidate_FromTarFile_BagInvalid
func TestValidate_InvalidBag(t *testing.T) {
	bagValidationConfig, err := getValidationConfig()
	if err != nil {
		assert.Fail(t, "Could not load BagValidationConfig: %s", err.Error())
	}
	_, filename, _, _ := runtime.Caller(0)
	dir := filepath.Dir(filename)
	pathToBag, err := filepath.Abs(path.Join(dir, "..", "testdata", "unit_test_bags", "example.edu.tagsample_bad.tar"))
	validator, err := validation.NewBagValidator(pathToBag, bagValidationConfig)
	if err != nil {
		assert.Fail(t, "NewBagValidator returned unexpected error: %s", err.Error())
	}

	result := validator.Validate()
	assert.NotNil(t, result.IntellectualObject)
	assert.Equal(t, 16, len(result.IntellectualObject.GenericFiles))
	assert.NotEmpty(t, result.IntellectualObject.IngestErrorMessage)
	assert.True(t, result.ParseSummary.HasErrors())
	assert.True(t, result.ValidationSummary.HasErrors())
	assert.True(t, result.HasErrors())

	err_0 := "File 'data/file-not-in-bag' in manifest 'manifest-sha256.txt' is missing from bag"
	err_1 := "File 'custom_tags/tag_file_xyz.pdf' in manifest 'tagmanifest-md5.txt' is missing from bag"
	err_2 := "File 'custom_tags/tag_file_xyz.pdf' in manifest 'tagmanifest-sha256.txt' is missing from bag"
	err_3 := "Value for tag 'Title' is missing."
	err_4 := "Tag 'Access' has illegal value 'acksess'."
	err_5 := "Bad sha256 digest for 'data/datastream-descMetadata': manifest says 'This-checksum-is-bad-on-purpose.-The-validator-should-catch-it!!', file digest is 'cf9cbce80062932e10ee9cd70ec05ebc24019deddfea4e54b8788decd28b4bc7'"
	err_6 := "Bad md5 digest for 'custom_tags/tracked_tag_file.txt': manifest says '00000000000000000000000000000000', file digest is 'dafbffffc3ed28ef18363394935a2651'"
	err_7 := "Bad sha256 digest for 'custom_tags/tracked_tag_file.txt': manifest says '0000000000000000000000000000000000000000000000000000000000000000', file digest is '3f2f50c5bde87b58d6132faee14d1a295d115338643c658df7fa147e2296ccdd'"
	assert.Equal(t, 8, len(result.ValidationSummary.Errors))

	assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_0))
	assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_1))
	assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_2))
	assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_3))
	assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_4))
	assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_5))
	assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_6))
	assert.True(t, util.StringListContains(result.ValidationSummary.Errors, err_7))
}
Пример #7
0
// We checked fixity against the manifest.
// If fixity didn't match, we wouldn't be ingesting this.
func NewEventGenericFileFixityCheck(checksumVerifiedAt time.Time, fixityAlg, digest string, fixityMatched bool) (*PremisEvent, error) {
	if checksumVerifiedAt.IsZero() {
		return nil, fmt.Errorf("Param checksumVerifiedAt cannot be empty.")
	}
	if !util.StringListContains(constants.ChecksumAlgorithms, fixityAlg) {
		return nil, fmt.Errorf("Param fixityAlg '%s' is not valid.", fixityAlg)
	}
	if len(digest) != 32 && len(digest) != 64 {
		return nil, fmt.Errorf("Param digest must have 32 or 64 characters. '%s' doesn't.",
			digest)
	}
	eventId := uuid.NewV4()
	object := "Go language crypto/md5"
	agent := "http://golang.org/pkg/crypto/md5/"
	outcomeInformation := "Fixity matches"
	outcome := string(constants.StatusSuccess)
	if fixityAlg == constants.AlgSha256 {
		object = "Go language crypto/sha256"
		agent = "http://golang.org/pkg/crypto/sha256/"
	}
	if fixityMatched == false {
		outcome = string(constants.StatusFailed)
		outcomeInformation = "Fixity did not match"
	}
	return &PremisEvent{
		Identifier:         eventId.String(),
		EventType:          constants.EventFixityCheck,
		DateTime:           checksumVerifiedAt,
		Detail:             "Fixity check against registered hash",
		Outcome:            outcome,
		OutcomeDetail:      fmt.Sprintf("%s:%s", fixityAlg, digest),
		Object:             object,
		Agent:              agent,
		OutcomeInformation: outcomeInformation,
	}, nil
}
Пример #8
0
// Returns true if the specified bucketAndKey was found in S3
func (stats *APTBucketReaderStats) S3ItemWasFound(bucketAndKey string) bool {
	return util.StringListContains(stats.S3Items, bucketAndKey)
}
Пример #9
0
// Returns true if value is a valid presence value.
func ValidPresenceValue(value string) bool {
	return util.StringListContains(presenceValues, value)
}
Пример #10
0
func runAssertions(t *testing.T, obj *models.IntellectualObject, summary *models.WorkSummary, caller string) {
	// WorkSummary
	assert.False(t, summary.StartedAt.IsZero(), caller)
	assert.False(t, summary.FinishedAt.IsZero(), caller)
	assert.Empty(t, summary.Errors, caller)

	// IntelObj properties
	assert.Equal(t, 0, obj.Id, caller)
	assert.Equal(t, "example.edu.tagsample_good", obj.Identifier, caller)

	// TODO: Is BagName necessary? It should be the same as obj.Identifier
	assert.Equal(t, "", obj.BagName, caller)

	assert.Equal(t, "virginia.edu", obj.Institution, caller)
	assert.Equal(t, 0, obj.InstitutionId, caller)
	assert.Equal(t, "Thirteen Ways of Looking at a Blackbird", obj.Title, caller)
	assert.Equal(t, "so much depends upon a red wheel barrow glazed with rain water beside the white chickens", obj.Description, caller)
	assert.Equal(t, "Institution", obj.Access, caller)
	assert.Equal(t, "uva-internal-id-0001", obj.AltIdentifier, caller)
	assert.Empty(t, obj.IngestErrorMessage, caller)
	if caller == "TestVirtualBagRead_FromTarFile" {
		assert.NotEmpty(t, obj.IngestTarFilePath, caller)
	} else if caller == "TestVirtualBagRead_FromDirectory" {
		assert.NotEmpty(t, obj.IngestUntarredPath, caller)
	}
	assert.Equal(t, 2, len(obj.IngestManifests), caller)
	assert.True(t, util.StringListContains(obj.IngestManifests, "manifest-md5.txt"), caller)
	assert.True(t, util.StringListContains(obj.IngestManifests, "manifest-sha256.txt"), caller)
	assert.Equal(t, 2, len(obj.IngestTagManifests), caller)
	assert.True(t, util.StringListContains(obj.IngestTagManifests, "tagmanifest-md5.txt"), caller)
	assert.True(t, util.StringListContains(obj.IngestTagManifests, "tagmanifest-sha256.txt"), caller)
	assert.Empty(t, obj.IngestFilesIgnored, caller)

	assert.NotNil(t, obj.IngestTopLevelDirNames, caller)
	if obj.IngestTopLevelDirNames != nil {
		assert.Equal(t, 1, len(obj.IngestTopLevelDirNames), caller)
		assert.NotEmpty(t, obj.IngestTopLevelDirNames[0], caller)
	}

	// Tags
	assert.Equal(t, 10, len(obj.IngestTags))
	for _, tag := range obj.IngestTags {
		assert.NotEmpty(t, tag.SourceFile, caller)
		assert.NotEmpty(t, tag.Label, caller)
		assert.NotEmpty(t, tag.Value, caller)
	}

	// Spot check one tag
	tag := obj.IngestTags[4]
	assert.Equal(t, "bag-info.txt", tag.SourceFile, caller)
	assert.Equal(t, "Bag-Count", tag.Label, caller)
	assert.Equal(t, "1 of 1", tag.Value, caller)

	// Generic Files
	tagFileCount := 0
	payloadFileCount := 0
	manifestCount := 0
	tagManifestCount := 0
	assert.Equal(t, 16, len(obj.GenericFiles), caller)
	for _, gf := range obj.GenericFiles {
		assert.NotEmpty(t, gf.Identifier, caller)
		assert.NotEmpty(t, gf.IntellectualObjectIdentifier, caller)
		assert.NotEmpty(t, gf.FileFormat, caller, gf.Identifier)
		assert.NotEmpty(t, gf.IngestFileType, caller)
		assert.NotEmpty(t, gf.IngestMd5, caller)
		assert.NotEmpty(t, gf.IngestSha256, caller)

		if caller == "TestVirtualBagRead_FromTarFile" {
			assert.Empty(t, gf.IngestLocalPath, caller)
		} else if caller == "TestVirtualBagRead_FromDirectory" {
			assert.NotEmpty(t, gf.IngestLocalPath, caller)
		}

		assert.Empty(t, gf.IngestStorageURL, caller)
		assert.Empty(t, gf.IngestReplicationURL, caller)
		assert.True(t, gf.Size > 0, caller)
		switch gf.IngestFileType {
		case constants.PAYLOAD_FILE:
			payloadFileCount++
		case constants.PAYLOAD_MANIFEST:
			manifestCount++
		case constants.TAG_MANIFEST:
			tagManifestCount++
		case constants.TAG_FILE:
			tagFileCount++
		}
	}

	// Make sure file types were all tagged correctly
	assert.Equal(t, 4, payloadFileCount, caller)
	assert.Equal(t, 2, manifestCount, caller)
	assert.Equal(t, 2, tagManifestCount, caller)
	assert.Equal(t, 8, tagFileCount, caller)

	// Spot check generic file aptrust-info.txt
	gf := obj.FindGenericFile("aptrust-info.txt")
	if gf == nil {
		assert.Fail(t, "Could not find aptrust-info.txt", caller)
	}
	assert.Equal(t, "example.edu.tagsample_good/aptrust-info.txt", gf.Identifier, caller)
	assert.Equal(t, 0, gf.IntellectualObjectId, caller)
	assert.Equal(t, "example.edu.tagsample_good", gf.IntellectualObjectIdentifier, caller)
	assert.Equal(t, "text/plain", gf.FileFormat, caller)
	assert.Empty(t, gf.URI, caller)
	assert.EqualValues(t, 67, gf.Size, caller)
	assert.False(t, gf.FileModified.IsZero(), caller)
	assert.Equal(t, constants.TAG_FILE, gf.IngestFileType, caller)
	assert.Equal(t, "300e936e622605f9f7a846d261d53093", gf.IngestManifestMd5, caller)
	assert.Equal(t, "300e936e622605f9f7a846d261d53093", gf.IngestMd5, caller)
	assert.False(t, gf.IngestMd5GeneratedAt.IsZero(), caller)
	assert.True(t, gf.IngestMd5VerifiedAt.IsZero(), caller)
	assert.Equal(t, "a2b6c5a713af771c5e4edde8d5be25fbcad86e45ea338f43a5bb769347e7c8bb", gf.IngestManifestSha256, caller)
	assert.Equal(t, "a2b6c5a713af771c5e4edde8d5be25fbcad86e45ea338f43a5bb769347e7c8bb", gf.IngestSha256, caller)
	assert.False(t, gf.IngestSha256GeneratedAt.IsZero(), caller)
	assert.True(t, gf.IngestSha256VerifiedAt.IsZero(), caller)
	assert.NotEmpty(t, gf.IngestUUID, caller)
	assert.False(t, gf.IngestUUIDGeneratedAt.IsZero(), caller)
	assert.Empty(t, gf.IngestStorageURL, caller)
	assert.True(t, gf.IngestStoredAt.IsZero(), caller)
	assert.Empty(t, gf.IngestReplicationURL, caller)
	assert.True(t, gf.IngestReplicatedAt.IsZero(), caller)
	assert.False(t, gf.IngestPreviousVersionExists, caller)
	assert.True(t, gf.IngestNeedsSave, caller)
	assert.Empty(t, gf.IngestErrorMessage, caller)
}